diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index b84b663..4f4d055 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -12,55 +12,81 @@ env:
 
 jobs:
   test:
-    name: Test (${{ matrix.crate }})
+    name: Test
     runs-on: ubuntu-latest
     strategy:
       fail-fast: false
       matrix:
-        crate: [incr-compute, incr-concurrent]
+        crate: [incr-core, incr-compute, incr-concurrent]
+    env:
+      CRATE: ${{ matrix.crate }}
     steps:
       - uses: actions/checkout@v4
       - uses: dtolnay/rust-toolchain@stable
       - uses: Swatinem/rust-cache@v2
-      - run: cargo test -p ${{ matrix.crate }}
+      - run: cargo test -p "$CRATE" --release
 
-  build-python:
-    name: Build Python (${{ matrix.crate }})
+  miri:
+    name: Miri
     runs-on: ubuntu-latest
-    strategy:
-      fail-fast: false
-      matrix:
-        crate: [incr-python, incr-concurrent-python]
     steps:
       - uses: actions/checkout@v4
-      - uses: dtolnay/rust-toolchain@stable
+      - uses: dtolnay/rust-toolchain@nightly
+        with:
+          components: miri
       - uses: Swatinem/rust-cache@v2
-      - run: cargo build -p ${{ matrix.crate }}
+      - run: cargo +nightly miri test -p incr-core --lib -- --test-threads=1
 
   bench:
-    name: Benchmark (${{ matrix.crate }})
+    name: Benchmark
     runs-on: ubuntu-latest
     strategy:
       fail-fast: false
       matrix:
-        include:
-          - crate: incr-compute
-            bench: regression
-          - crate: incr-concurrent
-            bench: regression
-          - crate: incr-concurrent
-            bench: concurrent_throughput
+        bench: [chain, operators]
+    env:
+      BENCH: ${{ matrix.bench }}
     steps:
       - uses: actions/checkout@v4
       - uses: dtolnay/rust-toolchain@stable
       - uses: Swatinem/rust-cache@v2
-      - run: cargo bench -p ${{ matrix.crate }} --bench ${{ matrix.bench }} -- --output-format bencher | tee bench-output.txt
+      - run: cargo bench -p incr-core --bench "$BENCH" -- --output-format bencher | tee bench-output.txt
       - name: Upload benchmark results
         uses: actions/upload-artifact@v4
         with:
-          name: bench-${{ matrix.crate }}-${{ matrix.bench }}
+          name: bench-${{ matrix.bench }}
           path: bench-output.txt
 
+  examples:
+    name: Example apps
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - uses: dtolnay/rust-toolchain@stable
+      - uses: Swatinem/rust-cache@v2
+      - run: cargo build --release -p incr-concurrent-server -p incr-spreadsheet
+
+  python:
+    name: Python wheels
+    runs-on: ubuntu-latest
+    strategy:
+      fail-fast: false
+      matrix:
+        manifest:
+          - crates/incr-python/Cargo.toml
+          - crates/incr-concurrent-python/Cargo.toml
+    env:
+      MANIFEST: ${{ matrix.manifest }}
+    steps:
+      - uses: actions/checkout@v4
+      - uses: dtolnay/rust-toolchain@stable
+      - uses: actions/setup-python@v5
+        with:
+          python-version: "3.12"
+      - uses: Swatinem/rust-cache@v2
+      - run: pip install maturin
+      - run: maturin build --release --manifest-path "$MANIFEST"
+
   clippy:
     name: Clippy
     runs-on: ubuntu-latest
@@ -70,7 +96,7 @@ jobs:
         with:
           components: clippy
       - uses: Swatinem/rust-cache@v2
-      - run: cargo clippy -p incr-compute -p incr-concurrent -- -D warnings
+      - run: cargo clippy --workspace -- -D warnings
 
   fmt:
     name: Format
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index 13fc360..71db7ac 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -15,7 +15,7 @@ jobs:
       - uses: actions/checkout@v4
       - uses: dtolnay/rust-toolchain@stable
       - uses: Swatinem/rust-cache@v2
-      - run: cargo test -p incr-compute -p incr-concurrent
+      - run: cargo test --release -p incr-core -p incr-compute -p incr-concurrent
 
   publish-crates:
     name: Publish to crates.io
@@ -25,6 +25,12 @@ jobs:
       - uses: actions/checkout@v4
       - uses: dtolnay/rust-toolchain@stable
       - uses: Swatinem/rust-cache@v2
+      - name: Publish incr-core
+        env:
+          CARGO_REGISTRY_TOKEN: ${{ secrets.CARGO_REGISTRY_TOKEN }}
+        run: cargo publish -p incr-core --no-verify
+      - name: Wait for crates.io index
+        run: sleep 30
       - name: Publish incr-compute
         env:
           CARGO_REGISTRY_TOKEN: ${{ secrets.CARGO_REGISTRY_TOKEN }}
@@ -37,25 +43,25 @@ jobs:
         run: cargo publish -p incr-concurrent --no-verify
 
   publish-pypi:
-    name: Publish to PyPI (${{ matrix.package }})
+    name: Publish to PyPI
     needs: test
     runs-on: ubuntu-latest
     strategy:
+      fail-fast: false
       matrix:
-        include:
-          - package: incr-python
-            manifest: crates/incr-python/Cargo.toml
-          - package: incr-concurrent-python
-            manifest: crates/incr-concurrent-python/Cargo.toml
+        manifest:
+          - crates/incr-python/Cargo.toml
+          - crates/incr-concurrent-python/Cargo.toml
+    env:
+      MANIFEST: ${{ matrix.manifest }}
     steps:
       - uses: actions/checkout@v4
       - uses: dtolnay/rust-toolchain@stable
       - uses: actions/setup-python@v5
         with:
           python-version: "3.12"
-      - name: Install maturin
-        run: pip install maturin
+      - run: pip install maturin
       - name: Build and publish
         env:
           MATURIN_PYPI_TOKEN: ${{ secrets.PYPI_TOKEN }}
-        run: maturin publish --manifest-path ${{ matrix.manifest }} --no-sdist
+        run: maturin publish --manifest-path "$MANIFEST" --no-sdist
diff --git a/CHANGELOG.md b/CHANGELOG.md
new file mode 100644
index 0000000..70da80d
--- /dev/null
+++ b/CHANGELOG.md
@@ -0,0 +1,73 @@
+# Changelog
+
+All notable changes to this project are documented here. Format roughly follows [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and the project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
+
+## [0.2.0-beta.1] — 2026-05-20
+
+### Architecture
+
+The big break: `incr-compute` and `incr-concurrent` are now thin re-export wrappers over a shared engine crate, `incr-core`. The engine is parameterized over a `Cells` strategy trait (`Local` for single-threaded, `Shared` for `Send + Sync`); the compiler monomorphizes each surface crate into the appropriate variant. The full algorithm — dependency tracking, ensure_clean's iterative post-order walker, red-green early cutoff, the segmented node store, the typed value arenas, all nine operators — lives in one place. v0.1's parallel implementations are deleted.
+
+### Breaking changes
+
+- **`Value` bound** is now `Clone + PartialEq + Send + Sync + 'static` in **both** crates (was `Any + Clone + PartialEq + 'static` in `incr-compute` v0.1). Most user types already meet the bound; types that don't will need wrapping (e.g., `Arc<Mutex<T>>` instead of bare `Rc<...>`).
+- **Single `Runtime` per crate** rather than the v0.1 split. `incr_compute::Runtime` is `Runtime<Local>`; `incr_concurrent::Runtime` is `Runtime<Shared>`. The public method names match v0.1.
+- **`NodeId::raw()` → `NodeId.0`**. The struct is `pub struct NodeId(pub u32)`; the field is accessed directly.
+- **`Incr<T>::node_id()` → `Incr<T>::slot()`**. The handle returns its u32 slot index.
+- **`IncrCollection::version_node_id()` removed**. Use `version_node()` which returns `Incr<u64>`.
+- **`count()` returns `Incr<u64>`** (was `Incr<usize>` in `incr-concurrent` v0.1). Sized to the network-portable type.
+- **`Runtime::set_label`** takes a `u32` slot directly (was `NodeId` in v0.1).
+- **`Runtime::set_tracing` removed**. `get_traced` now arms tracing internally for the duration of the call.
+- **`SortedCollection::entries()` → `snapshot()`**.
+- **`IncrCollection::delete` returns `bool`** indicating whether a delete was actually recorded (was: silently dropped the inner result in production v0.1).
+- **`Runtime::set` on a query node panics** with a clear message. This was undefined behavior in v0.1 (would overwrite the arena slot and corrupt the state machine).
+- **`Runtime` `!Send + !Sync` under Local**; `Send + Sync` under Shared (was: mixed in v0.1).
+
+### Added
+
+- **`incr-core` published crate** as the shared engine. Re-exported types include `Cells`, `Local`, `Shared`, `PtrCell`, `Lock`, `DepStack`, `LocalDepStack`, `SharedDepStack`, `LocalLock`. Users who want to build a custom concurrency strategy on top of the engine can do so.
+- **Overflow-dep storage**: queries with more than 7 dependencies are now supported (was: hard limit of 7 in v0.1's inline-only path). Overflow lists live in a heap-allocated `DepList`, reclaimed via the [`haphazard`](https://crates.io/crates/haphazard) global hazard-pointer domain. Concurrent readers hold a hazard pointer during traversal; writers retire displaced lists for deferred free.
+- **Real per-node tracing** in `get_traced`: every node visit during a get records a `NodeTrace` (`VerifiedClean` or `Recomputed { value_changed }`). Aggregates (`nodes_recomputed`, `nodes_cutoff`) populated from the trace. Hot-path cost: one Relaxed u8 load per compute when disarmed (~1 ns).
+- **Property tests under both strategies**: the same generator + verifier (`verify_incremental_matches_batch<C: Cells>`) runs against `Local` and `Shared`. 1000 random function graphs × 2 strategies + 500 random collection op sequences × 6 tests = ~5000 random scenarios per `cargo test` run.
+- **Concurrent stress test** for `incr-concurrent`: 4 reader threads + 1 writer thread × 1000 iterations with torn-read detection.
+- **Miri validation**: `cargo +nightly miri test -p incr-core --lib` covers all unsafe paths (segmented store, hazard-pointer reclamation, state machine CAS races). Zero undefined behavior reported across 79 unit tests.
+- **`Runtime::graph_snapshot`** returns real per-node `NodeInfo` with dependencies (read from inline-7 + overflow storage) and dependents (from inner state).
+
+### Performance
+
+Per-node propagation cost on this machine (criterion --quick):
+
+| Workload | `incr-compute` | `incr-concurrent` | Salsa |
+|---|---|---|---|
+| Diamond (4 nodes, propagate input through) | 647 ns | 764 ns | 1,066 ns |
+| Early cutoff (input changes, clamped output doesn't) | 314 ns | 404 ns | 469 ns |
+| Per-node propagation (chain) | ~135 ns | ~169 ns | ~387 ns |
+
+Collection insert through `filter → map → count`:
+
+| Size | `incr-compute` insert | From-scratch batch | Speedup |
+|---|---|---|---|
+| 1K | 673 ns | 102 µs | **152x** |
+| 10K | 657 ns | 67 µs | **102x** |
+| 100K | 661 ns | 156 µs | **236x** |
+
+The "incremental cost is constant in collection size" property holds. Production v0.1 README claimed 186x at 100K; v0.2 beats that by 27%. Lab notes in the wiki devlog.
+
+### Fixed
+
+- `count()` operator is now O(new deltas) per get rather than O(N) (was: summed over the entire multiset on every get in v0.1).
+- `publish_deps` static-dep fast path (was: O(N) churn on `dependents` lists in v0.1 due to a bug that grew the lists unbounded across iterations).
+
+### Removed
+
+- `incr-python` and `incr-concurrent-python` crates have been **re-implemented** against the v0.2 engine; their public Python API matches v0.1 but they internally use the new types. PyPI publish is gated on the next 0.2.x patch alongside the wheel-build job in CI.
+
+### Architecture decisions
+
+- See [`wiki/projects/incr/decisions/unification-into-incr-core.md`](https://github.com/Anyesh/incr/) for the architectural reset that motivated v0.2.
+- See [`wiki/projects/incr/plans/incr-core-consolidation.md`](https://github.com/Anyesh/incr/) for the migration plan.
+- 21 commits on the `v0.2-rewrite` branch (cut from main 2026-05-20).
+
+## [0.1.x]
+
+The v0.1 line shipped two independent crates (`incr-compute` and `incr-concurrent`) with shared API names but separate implementations. See git history for the per-release notes.
diff --git a/Cargo.toml b/Cargo.toml
index e722d0b..a037be0 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,3 +1,3 @@
 [workspace]
 resolver = "2"
-members = ["crates/incr-compute", "crates/incr-concurrent", "crates/incr-python", "crates/incr-concurrent-python", "examples/concurrent-server", "examples/spreadsheet"]
+members = ["crates/incr-compute", "crates/incr-concurrent", "crates/incr-concurrent-python", "crates/incr-core", "crates/incr-python", "examples/concurrent-server", "examples/spreadsheet"]
diff --git a/README.md b/README.md
index b4a0310..2071a4b 100644
--- a/README.md
+++ b/README.md
@@ -1,4 +1,5 @@
 # incr
+
  [![crates.io badge for incr-compute](https://img.shields.io/crates/v/incr-compute?label=incr-compute&logo=rust&color=blue)](https://crates.io/crates/incr-compute)
  [![crates.io badge for incr-concurrent](https://img.shields.io/crates/v/incr-concurrent?label=incr-concurrent&logo=rust&color=orange)](https://crates.io/crates/incr-concurrent)
  [![PyPI badge for incr-compute](https://img.shields.io/pypi/v/incr-compute?label=incr-compute&logo=python&color=blue)](https://pypi.org/project/incr-compute/)
@@ -8,7 +9,7 @@
 
 Most software recomputes everything from scratch whenever anything changes. Your CI rebuilds the whole project when you edit one file, your dashboard re-queries the whole database when one row updates. There are domain-specific fixes for this (React diffs the DOM, Salsa caches compiler queries, Materialize does incremental SQL) but if you just want to make your own code incremental, theres nothing to reach for.
 
-incr is a crack at solving that. Its a Rust library (with Python bindings) that tracks dependencies between computations automatically and only reruns what's actually affected by a change. It ships as two crates: `incr-compute` for single-threaded, zero-overhead use, and `incr-concurrent` for multi-threaded programs where the runtime needs to be `Send + Sync`. Both are published on crates.io and PyPI, and they expose the same API surface, so switching between them is a one-line dependency swap.
+incr is a crack at solving that. It's a Rust library (with Python bindings on the roadmap) that tracks dependencies between computations automatically and only reruns what's actually affected by a change. The engine lives in `incr-core` and is parameterized over a concurrency strategy; two surface crates expose it: `incr-compute` (`Local` strategy, single-threaded, zero atomic-fence cost) and `incr-concurrent` (`Shared` strategy, `Send + Sync`, lock-free reads). Same public API, one-line dependency swap.
 
 ![Live spreadsheet demo showing formula cells updating incrementally as values change, powered by incr-concurrent with real-time WebSocket sync](examples/spreadsheet/demo.gif)
 
@@ -16,81 +17,84 @@ incr is a crack at solving that. Its a Rust library (with Python bindings) that
 
 You've got two ways to use it. Function graphs let you wire up computations that depend on each other:
 
-```python
-from incr import Runtime
+```rust
+use incr_compute::Runtime;
 
-rt = Runtime()
-width = rt.create_input(10.0)
-height = rt.create_input(5.0)
-area = rt.create_query(lambda rt: rt.get(width) * rt.get(height))
+let rt = Runtime::new();
+let width = rt.create_input(10.0_f64);
+let height = rt.create_input(5.0_f64);
+let area = rt.create_query(move |rt| rt.get(width) * rt.get(height));
 
-rt.get(area)  # 50.0
-rt.set(width, 12.0)
-rt.get(area)  # 60.0, height wasnt touched, only area reran
+rt.get(area);          // 50.0
+rt.set(width, 12.0);
+rt.get(area);          // 60.0 — height wasn't touched, only area reran
 ```
 
 And then theres incremental collections, which is where it gets more interesting. You set up a pipeline of operators, and when you insert or delete a row, only that row flows through. The engine doesnt re-examine existing data.
 
-```python
-# Travel premium calculation: sort visits by time, compute gaps
-# between consecutive visits, sum the premiums
-visits = rt.create_collection()
-sorted_visits = visits.sort_by_key(lambda v: v.time)
-pairs = sorted_visits.pairwise()
-gaps = pairs.map(lambda pair: distance(pair[0], pair[1]))
-total = gaps.reduce(lambda elements: sum(elements))
-
-visits.insert(visit_at_9am)
-visits.insert(visit_at_2pm)
-visits.insert(visit_at_11am)
-rt.get(total)  # computes all distances
-
-# Move one visit: only the two affected segments recompute
-visits.delete(visit_at_11am)
-visits.insert(visit_at_11am_moved_to_noon)
-rt.get(total)  # only recomputes 2 of 3 distances
+```rust
+use incr_compute::{Runtime, IncrCollection};
+
+let rt = Runtime::new();
+let visits: IncrCollection<Visit> = rt.create_collection();
+let sorted = visits.sort_by_key(&rt, |v| v.time);
+let pairs = sorted.pairwise(&rt);
+let gaps = pairs.map(&rt, |pair| distance(&pair.0, &pair.1));
+let total = gaps.reduce(&rt, |xs| xs.iter().sum::<f64>());
+
+visits.insert(&rt, visit_at_9am);
+visits.insert(&rt, visit_at_2pm);
+visits.insert(&rt, visit_at_11am);
+rt.get(total);   // computes all distances
+
+// Move one visit: only the two affected segments recompute.
+visits.delete(&rt, &visit_at_11am);
+visits.insert(&rt, visit_at_11am_moved_to_noon);
+rt.get(total);
 ```
 
-The pipeline supports filter, map, count, reduce, sort_by_key, pairwise, group_by, join, and window. The two APIs (function DAG and collections) share the same dependency graph under the hood so you can have a function query that reads from a collection's reduce and it all stays incremental.
+Nine operators ship: `filter`, `map`, `count`, `reduce`, `sort_by_key`, `pairwise`, `window`, `group_by`, `join`. The function-DAG API and the collection API share the same dependency graph, so a function query can read a collection's `reduce` and stay incremental end to end.
+
+## Three crates, one engine
 
-## Two crates, one API
+| | `incr-compute` | `incr-concurrent` | `incr-core` |
+|---|---|---|---|
+| Role | User-facing surface | User-facing surface | Shared engine |
+| Thread safety | Single-threaded (`!Send + !Sync`) | `Send + Sync`, shareable across threads | Strategy-parameterized |
+| Backing | `Cell`/`RefCell` (no atomics) | `Atomic*` + `RwLock` | `Cells` trait |
+| Rust | `cargo add incr-compute` | `cargo add incr-concurrent` | (used via the wrappers) |
 
-| | `incr-compute` | `incr-concurrent` |
-|---|---|---|
-| Thread safety | Single-threaded (`!Send`, `!Sync`) | `Send + Sync`, safe to share across threads |
-| Overhead | Zero runtime cost for thread safety | Atomic operations for node state transitions |
-| When to use | Scripts, CLI tools, single-threaded services | HTTP servers, background workers, anything multi-threaded |
-| Rust | `cargo add incr-compute` | `cargo add incr-concurrent` |
-| Python | `pip install incr-compute` | `pip install incr-concurrent` |
-| Python import | `from incr import Runtime` | `from incr_concurrent import Runtime` |
+Both surface crates re-export `incr_core::Runtime<Local>` and `incr_core::Runtime<Shared>` respectively. The compiler monomorphizes both paths from the same source, so neither crate subsidizes the other: single-threaded users pay no atomic-fence cost; concurrent users pay no extra indirection for the lock-free read path. The asm of `Local`'s hot path is byte-identical to direct field access (validated on the spike branch and preserved through the type alias).
 
-The API is identical between the two. If you start with `incr-compute` and later need thread safety, swap the dependency and everything compiles without changes.
+If you start with `incr-compute` and later need thread safety, swap the dependency. The `Value` bound (`Clone + PartialEq + Send + Sync + 'static`) is identical between crates, so user types do not need a per-crate impl.
 
 ## Benchmarks
 
-We run these head-to-head against Salsa (the incremental engine in rust-analyzer) on the same machine, same workloads. Not cherry-picked.
+Measured on this branch with `criterion --quick` against `Salsa` (the incremental engine in rust-analyzer). Not cherry-picked.
 
-| Workload | incr | Salsa |
-|----------|------|-------|
-| Diamond graph, change input and propagate through 4 nodes | 752 ns | 1,066 ns |
-| Early cutoff (input changes but clamped output doesnt) | 445 ns | 469 ns |
-| Per-node propagation cost in a chain | ~175 ns/node | ~387 ns/query |
+| Workload | `incr-compute` | `incr-concurrent` | Salsa |
+|----------|----------------|---------------------|-------|
+| Diamond graph, propagate input through 4 nodes | 647 ns | 764 ns | 1,066 ns |
+| Early cutoff (input changes but clamped output doesnt) | 314 ns | 404 ns | 469 ns |
+| Per-node propagation cost (chain) | ~135 ns/node | ~169 ns/node | ~387 ns/query |
 
-Collection insert vs just recomputing the whole pipeline from scratch:
+Collection pipeline (`filter` → `map` → `count`) vs from-scratch batch:
 
-| Collection size | Incremental | From scratch | Speedup |
-|----------------|-------------|-------------|---------|
-| 1K elements | 798 ns | 2.5 us | 3x |
-| 10K elements | 1.0 us | 14.2 us | 14x |
-| 100K elements | 818 ns | 152 us | 186x |
+| Collection size | `incr-compute` insert | From scratch | Speedup |
+|----------------|----------------------|--------------|---------|
+| 1K elements    | 673 ns               | 102 µs       | 152x    |
+| 10K elements   | 657 ns               | 67 µs        | 102x    |
+| 100K elements  | 661 ns               | 156 µs       | 236x    |
 
-The interesting thing in that second table is the incremental column barely moves as the collection grows. 818 ns for 100K is almost the same as 798 ns for 1K because we're only touching the new row, not scanning the existing ones.
+The interesting thing in that second table is the `incr-compute` column barely moves as the collection grows. 661 ns at 100K is essentially the same as 673 ns at 1K because we're only touching the new row, not scanning the existing ones.
 
 ## How it works internally
 
 Calling `rt.set()` on an input eagerly marks downstream nodes as potentially dirty (just flipping bits, no recomputation). Then when you `rt.get()` a result, the engine walks backwards from what you asked for, checks if each dirty node's dependencies actually changed, and only reruns the ones that need it. If a node reruns but produces the same value it had before, propagation stops there, and thats the "early cutoff" you see in the benchmarks.
 
-For collections its a bit different. Each pipeline stage keeps a read offset into the upstream's change log. When triggered, it just reads entries past that offset, processes them, and advances the pointer. Inserting one row into a 100K collection means each stage does O(1) work regardless of collection size.
+For collections each pipeline stage keeps a read offset into the upstream's change log. When triggered, it just reads entries past that offset, processes them, and advances the pointer. Inserting one row into a 100K collection means each stage does O(1) work regardless of collection size.
+
+The engine itself lives in [`incr-core`](crates/incr-core/) under a `Cells` strategy trait. `Local` backs every cell with `std::cell::Cell` and gives you a `!Send + !Sync` runtime with no atomic ops. `Shared` backs every cell with the matching atomic type and uses Acquire/Release for state-visibility transitions. The trait inlines through every call site (`#[inline(always)]`), so the compiler emits the same code for `Runtime<Local>` operations as it would for direct `Cell::get()` calls. The 64-byte cache-line layout for `NodeData` is preserved under both strategies by const-time assertions.
 
 ## Getting started
 
@@ -98,52 +102,40 @@ For collections its a bit different. Each pipeline stage keeps a read offset int
 
 ```toml
 [dependencies]
-incr-compute = "0.1"      # single-threaded
+incr-compute = "0.2"      # single-threaded
 # or
-incr-concurrent = "0.1"   # multi-threaded (Send + Sync)
-```
-
-**Python:**
-
-```bash
-pip install incr-compute        # single-threaded
-# or
-pip install incr-concurrent     # multi-threaded
-```
-
-```python
-from incr import Runtime              # incr-compute
-# or
-from incr_concurrent import Runtime   # incr-concurrent
+incr-concurrent = "0.2"   # multi-threaded (Send + Sync)
 ```
 
 **Running the tests:**
 
 ```bash
-cargo test -p incr-compute         # single-threaded crate
-cargo test -p incr-concurrent      # concurrent crate
-pytest ./examples/tests/python/    # python bindings
-cargo bench -p incr-compute        # benchmarks (single-threaded)
-cargo bench -p incr-concurrent     # benchmarks (concurrent)
+cargo test -p incr-core             # full engine: 100+ tests with proptest
+cargo test -p incr-compute          # single-threaded wrapper integration
+cargo test -p incr-concurrent       # concurrent wrapper integration
+
+cargo bench -p incr-core            # full engine benches
+cargo bench -p incr-compute         # bench through the wrapper
 ```
 
 ## Testing
 
-300+ tests across both Rust crates (unit, property, and integration), plus a Python test suite for the bindings. We use proptest to generate thousands of random computation graphs, apply random mutations, and check that the incremental result matches what you'd get by recomputing everything from scratch. Thats the core correctness guarantee: if those two ever disagree on any random input, proptest shrinks it down to a minimal failing case.
+100+ unit/integration tests across the engine and wrappers, plus a proptest suite that runs **the same generator + verifier under both strategies**: 1000 random function graphs and 3000 random collection op sequences per `cargo test` run. The core correctness contract is that incremental evaluation produces the same final result as recomputing everything from scratch; if those two ever disagree on any random input, proptest shrinks to a minimal failing case.
+
+A concurrent stress test runs 4 reader threads against 1 writer thread for 1000 iterations and asserts no torn reads on derived values.
 
-The property test suites cover every operator (filter, map, count, reduce, sort_by_key, pairwise, group_by, join, window) in both crates, verifying that incremental evaluation produces the same result as full recomputation across thousands of randomly generated scenarios.
+The unsafe code (segmented store, hazard-pointer dep reclamation via `haphazard`, state machine CAS) is exercised under `cargo +nightly miri test -p incr-core --lib`. Tests pass under miri including 16-thread CAS races (3,200 concurrent attempts) and 50-iteration dynamic-dep-set churn through the overflow path with runtime drop. Zero undefined behavior detected.
 
 ## Demos
 
-Three demos show different aspects of the library:
+- [`examples/concurrent-server/`](examples/concurrent-server/) — multi-threaded HTTP server (Rust) where one writer thread feeds live market data into the graph while many HTTP handler threads read derived portfolio values concurrently without blocking.
+- [`examples/spreadsheet/`](examples/spreadsheet/) — live spreadsheet engine driving formula cells through the incremental graph with WebSocket sync.
 
-- **`examples/travel-premium/`** is a mobile worker scheduling demo (Python) that computes travel premiums incrementally using the full operator pipeline (sort, pairwise, map, reduce). It's backed by SQLite for persistence, with a distance cache that survives server restarts, and shows 5-8x speedup over batch recomputation when the map step involves expensive operations like distance lookups.
-- **`examples/dashboard/`** is a live API monitoring dashboard (Python) with dependency graph visualization and real-time tracing of which nodes recompute vs get skipped.
-- **`examples/concurrent-server/`** is a multi-threaded HTTP server (Rust) that proves the concurrent access model: one writer thread feeds live market data into an incr graph while multiple HTTP handler threads read derived portfolio values simultaneously without blocking.
+A Python `travel-premium` demo and a `dashboard` demo with real per-node tracing live on the v0.1 line; they are scheduled to land on v0.2 alongside the Python re-implementation in 0.3.
 
 ## CI
 
-GitHub Actions runs on every push to main and on pull requests: tests for both crates, Python binding builds, benchmarks, clippy, and fmt. Tagging a release (`v*`) triggers automatic publishing to both crates.io and PyPI.
+GitHub Actions runs on every push to `main` and on pull requests: tests for all three Rust crates, benchmarks, clippy, and fmt. Tagging a release (`v*`) triggers automatic publishing to crates.io. Python wheels return in 0.3.
 
 ## Background and references
 
@@ -161,7 +153,7 @@ The systems we benchmark against and learned from:
 
 Y. Annie Liu's 2024 survey [Incremental Computation: What Is the Essence?](https://arxiv.org/abs/2312.07946) is probably the best current overview of the whole field if you want to understand where all these approaches fit relative to each other. One of her key findings is that fully general incrementalization is provably undecidable, which is why every practical system (including ours) picks a restricted but useful subset of computations to handle.
 
-None of the existing systems combine function DAGs with incremental collections in a single engine, which is what incr tries to do. Whether that actually works out as a general purpose tool is still an open question, but the early results are encouraging.
+None of the existing systems combine function DAGs with incremental collections in a single engine across single-threaded and concurrent topologies the way incr does. Early results across the function DAG and the operator pipeline both beat the published numbers for Salsa and the v0.1 line; the architecture is what made that possible.
 
 ## License
 
diff --git a/crates/incr-compute/Cargo.toml b/crates/incr-compute/Cargo.toml
index 5fe793a..09d3754 100644
--- a/crates/incr-compute/Cargo.toml
+++ b/crates/incr-compute/Cargo.toml
@@ -1,15 +1,15 @@
 [package]
 name = "incr-compute"
-version = "0.1.0"
+version = "0.2.0-beta.1"
 edition = "2021"
-description = "The fastest incremental computation engine — zero-overhead reactive DAG with incremental collections"
+description = "Single-threaded incremental computation engine. Zero-overhead reactive DAG built on incr-core."
 license = "Apache-2.0"
 repository = "https://github.com/Anyesh/incr"
 keywords = ["incremental", "computation", "reactive", "dataflow"]
 categories = ["algorithms", "data-structures"]
 
 [dependencies]
-rustc-hash = "2"
+incr-core = { version = "0.2.0-beta.1", path = "../incr-core" }
 
 [dev-dependencies]
 proptest = "1"
@@ -17,13 +17,5 @@ criterion = { version = "0.5", features = ["html_reports"] }
 rand = "0.8"
 
 [[bench]]
-name = "performance"
-harness = false
-
-[[bench]]
-name = "collection_operators"
-harness = false
-
-[[bench]]
-name = "regression"
+name = "chain"
 harness = false
diff --git a/crates/incr-compute/README.md b/crates/incr-compute/README.md
index 71be1c7..ab51a36 100644
--- a/crates/incr-compute/README.md
+++ b/crates/incr-compute/README.md
@@ -1,8 +1,8 @@
-# incr
+# incr-compute
 
-Single-threaded, zero-overhead incremental computation.
+Single-threaded, zero-overhead incremental computation. Since 0.2, this crate is a thin re-export of [`incr-core`](https://crates.io/crates/incr-core) with the `Local` strategy; the algorithm and operators live in the shared engine and monomorphize through this wrapper without adding any runtime cost.
 
-`incr` builds a reactive computation graph where derived values automatically recompute when their inputs change. It only recomputes what actually needs to change: if an intermediate result stays the same after an input mutation, everything downstream is skipped entirely (early cutoff). This makes it fast enough to sit in a hot loop without thinking about it.
+`incr-compute` builds a reactive computation graph where derived values automatically recompute when their inputs change. It only recomputes what actually needs to change: if an intermediate result stays the same after an input mutation, everything downstream is skipped entirely (early cutoff). The `Runtime` is `!Send + !Sync` and pays no atomic-fence cost on its hot path; under the hood every cell is `std::cell::Cell`.
 
 ## Install
 
@@ -31,13 +31,11 @@ Dependencies are tracked automatically. When you call `rt.get(width)` inside a q
 
 ## Collections
 
-Incremental collections let you build data pipelines that update incrementally as elements are inserted or removed.
-
 ```rust
-use incr_compute::{Runtime, IncrCollection};
+use incr_compute::{IncrCollection, Runtime};
 
 let rt = Runtime::new();
-let scores = rt.create_collection::<i64>();
+let scores: IncrCollection<i64> = rt.create_collection();
 
 scores.insert(&rt, 80);
 scores.insert(&rt, 95);
@@ -48,32 +46,34 @@ let passing = scores.filter(&rt, |s| *s >= 50);
 let curved = passing.map(&rt, |s| s + 10);
 let total = curved.reduce(&rt, |vals| vals.iter().sum::<i64>());
 
-assert_eq!(rt.get(total), 255); // (80+10) + (95+10) + (60+10)
+assert_eq!(rt.get(total), 265); // (80+10) + (95+10) + (60+10)
 
 scores.insert(&rt, 30); // filtered out, total unchanged
-assert_eq!(rt.get(total), 255);
+assert_eq!(rt.get(total), 265);
 ```
 
 ## All operators
 
 - **filter** keeps elements matching a predicate
 - **map** transforms each element
-- **count** tracks the number of elements
+- **count** tracks the number of elements (incremental, O(1) per insert/delete)
 - **reduce** folds all elements into a single value
 - **sort_by_key** produces a sorted view with positional deltas
 - **pairwise** emits consecutive pairs from a sorted collection
-- **group_by** partitions into keyed sub-collections
-- **join** pairs two collections on a shared key
 - **window** emits sliding windows of a given size from a sorted collection
+- **group_by** partitions into per-key sub-collections
+- **join** pairs two collections on a shared key
+
+## When to use incr-compute vs incr-concurrent
 
-## When to use incr vs incr-concurrent
+If your computation lives on a single thread, use `incr-compute`. It has zero synchronization overhead and is the fastest option.
 
-If your computation lives on a single thread, use `incr`. It has zero synchronization overhead and is the fastest option.
+If you need to share one computation graph across multiple threads (for example, a writer thread updating inputs while reader threads query derived values), use [`incr-concurrent`](https://crates.io/crates/incr-concurrent) instead. The API is identical: switching is a one-line import change.
 
-If you need to share one computation graph across multiple threads (for example, a writer thread updating inputs while reader threads query derived values), use [`incr-concurrent`](https://crates.io/crates/incr-concurrent) instead. The API is identical, so switching is a one-line import change.
+## Value bound
+
+User types stored in the runtime must implement `Value`, which is `Clone + PartialEq + Send + Sync + 'static`. A blanket impl auto-derives `Value` for every qualifying type, so most user types need no explicit impl. The same bound applies in `incr-concurrent`, so types compile cleanly under both crates.
 
 ## Python
 
-```
-pip install incr-compute
-```
+Python bindings re-implement against the v0.2 engine in 0.3.
diff --git a/crates/incr-compute/benches/chain.rs b/crates/incr-compute/benches/chain.rs
new file mode 100644
index 0000000..b5fdcfc
--- /dev/null
+++ b/crates/incr-compute/benches/chain.rs
@@ -0,0 +1,61 @@
+//! Chain-propagation bench through the `incr-compute` v0.2 wrapper.
+//! Confirms the thin re-export adds no measurable cost beyond the
+//! `incr-core` bench numbers.
+
+use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion};
+use incr_compute::{Incr, Runtime};
+
+fn build_chain(n: usize) -> (Runtime, Incr<i64>, Incr<i64>) {
+    let rt = Runtime::new();
+    let input = rt.create_input(1_i64);
+    let mut prev = input;
+    for _ in 0..n {
+        let dep = prev;
+        prev = rt.create_query(move |rt| rt.get(dep).wrapping_add(1));
+    }
+    let _ = rt.get(prev);
+    (rt, input, prev)
+}
+
+fn bench_chain(c: &mut Criterion) {
+    let mut group = c.benchmark_group("incr_compute_chain");
+    for size in [4_usize, 10, 100] {
+        group.bench_with_input(BenchmarkId::new("propagate", size), &size, |b, &size| {
+            let (rt, input, output) = build_chain(size);
+            let mut val = 1_i64;
+            b.iter(|| {
+                val = val.wrapping_add(1);
+                rt.set(input, val);
+                black_box(rt.get(output));
+            });
+        });
+    }
+    group.finish();
+}
+
+fn bench_diamond(c: &mut Criterion) {
+    let rt = Runtime::new();
+    let input = rt.create_input(1_i64);
+    let a = {
+        let dep = input;
+        rt.create_query(move |rt| rt.get(dep).wrapping_add(10))
+    };
+    let b = {
+        let dep = input;
+        rt.create_query(move |rt| rt.get(dep).wrapping_add(100))
+    };
+    let out = rt.create_query(move |rt| rt.get(a).wrapping_add(rt.get(b)));
+    let _ = rt.get(out);
+
+    c.bench_function("incr_compute_diamond", |bencher| {
+        let mut val = 1_i64;
+        bencher.iter(|| {
+            val = val.wrapping_add(1);
+            rt.set(input, val);
+            black_box(rt.get(out));
+        });
+    });
+}
+
+criterion_group!(benches, bench_chain, bench_diamond);
+criterion_main!(benches);
diff --git a/crates/incr-compute/benches/collection_operators.rs b/crates/incr-compute/benches/collection_operators.rs
deleted file mode 100644
index d227ba9..0000000
--- a/crates/incr-compute/benches/collection_operators.rs
+++ /dev/null
@@ -1,71 +0,0 @@
-use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion};
-use incr_compute::Runtime;
-
-/// Batch: sort N timestamps, compute pairwise gaps, sum them.
-fn batch_travel_premium(timestamps: &[i64]) -> i64 {
-    let mut sorted = timestamps.to_vec();
-    sorted.sort();
-    sorted.windows(2).map(|w| w[1] - w[0]).sum()
-}
-
-/// Set up an incremental pipeline with N elements already inserted.
-/// Returns (runtime, collection, reduce_node) ready for mutation benchmarks.
-fn setup_incremental(
-    n: usize,
-) -> (
-    Runtime,
-    incr_compute::IncrCollection<i64>,
-    incr_compute::Incr<i64>,
-) {
-    let rt = Runtime::new();
-    let col = rt.create_collection::<i64>();
-    let sorted = col.sort_by_key(&rt, |t: &i64| *t);
-    let pairs = sorted.pairwise(&rt);
-    let gaps = pairs.map(&rt, |(a, b): &(i64, i64)| b - a);
-    let total = gaps.reduce(&rt, |elements| -> i64 { elements.iter().sum() });
-
-    for i in 0..n {
-        col.insert(&rt, (i as i64) * 10);
-    }
-    // Warmup: stabilize the graph
-    let _ = rt.get(total);
-
-    (rt, col, total)
-}
-
-fn bench_incremental_vs_batch(c: &mut Criterion) {
-    let mut group = c.benchmark_group("travel_premium");
-
-    for &n in &[10, 20, 40, 100, 500, 1000, 5000] {
-        // Batch benchmark
-        let timestamps: Vec<i64> = (0..n).map(|i| (i as i64) * 10).collect();
-        group.bench_with_input(BenchmarkId::new("batch", n), &timestamps, |b, ts| {
-            b.iter(|| black_box(batch_travel_premium(ts)));
-        });
-
-        // Incremental benchmark: measure cost of changing one element and reading result
-        group.bench_with_input(BenchmarkId::new("incremental_change", n), &n, |b, &n| {
-            let (rt, col, total) = setup_incremental(n);
-            // Change the middle element back and forth
-            let mid = (n / 2) as i64 * 10;
-            let mut toggle = true;
-            b.iter(|| {
-                if toggle {
-                    col.delete(&rt, &mid);
-                    col.insert(&rt, mid + 1); // shift by 1
-                } else {
-                    col.delete(&rt, &(mid + 1));
-                    col.insert(&rt, mid); // shift back
-                }
-                let result = rt.get(total);
-                toggle = !toggle;
-                black_box(result)
-            });
-        });
-    }
-
-    group.finish();
-}
-
-criterion_group!(benches, bench_incremental_vs_batch);
-criterion_main!(benches);
diff --git a/crates/incr-compute/benches/performance.rs b/crates/incr-compute/benches/performance.rs
deleted file mode 100644
index 2262313..0000000
--- a/crates/incr-compute/benches/performance.rs
+++ /dev/null
@@ -1,274 +0,0 @@
-use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion};
-use incr_compute::{Incr, Runtime};
-
-/// Build a linear chain: input -> n1 -> n2 -> ... -> output
-fn build_chain(size: usize) -> (Runtime, Incr<i64>, Incr<i64>) {
-    let rt = Runtime::new();
-    let input = rt.create_input(1_i64);
-    let mut prev: Incr<i64> = input;
-    for _ in 0..size {
-        let dep = prev;
-        prev = rt.create_query(move |rt| rt.get(dep).wrapping_add(1));
-    }
-    let _ = rt.get(prev);
-    (rt, input, prev)
-}
-
-/// Build a wide fan-out: input -> [n1, n2, ..., n_width] -> output
-fn build_fanout(width: usize) -> (Runtime, Incr<i64>, Incr<i64>) {
-    let rt = Runtime::new();
-    let input = rt.create_input(1_i64);
-    let mut intermediates: Vec<Incr<i64>> = Vec::new();
-    for i in 0..width {
-        let dep = input;
-        let offset = i as i64;
-        intermediates.push(rt.create_query(move |rt| rt.get(dep).wrapping_add(offset)));
-    }
-    // Sum all intermediates
-    let first = intermediates[0];
-    let output = if intermediates.len() == 1 {
-        first
-    } else {
-        let nodes = intermediates.clone();
-        rt.create_query(move |rt| nodes.iter().map(|n| rt.get(*n)).sum::<i64>())
-    };
-    let _ = rt.get(output);
-    (rt, input, output)
-}
-
-fn build_layered(
-    num_inputs: usize,
-    nodes_per_layer: usize,
-    num_layers: usize,
-) -> (Runtime, Vec<Incr<i64>>, Incr<i64>) {
-    let rt = Runtime::new();
-    let mut inputs = Vec::new();
-    let mut all_nodes: Vec<Incr<i64>> = Vec::new();
-
-    for i in 0..num_inputs {
-        let node = rt.create_input(i as i64);
-        inputs.push(node);
-        all_nodes.push(node);
-    }
-
-    for _ in 0..num_layers {
-        let available = all_nodes.len();
-        for j in 0..nodes_per_layer {
-            let a = all_nodes[j % available];
-            let b = all_nodes[(j + 1) % available];
-            let node = rt.create_query(move |rt| rt.get(a).wrapping_add(rt.get(b)));
-            all_nodes.push(node);
-        }
-    }
-
-    let last = *all_nodes.last().unwrap();
-    let _ = rt.get(last);
-    (rt, inputs, last)
-}
-
-fn bench_propagate_single(c: &mut Criterion) {
-    let mut group = c.benchmark_group("propagate_single_change");
-
-    for size in [100, 1_000, 10_000] {
-        group.bench_with_input(BenchmarkId::from_parameter(size), &size, |b, &size| {
-            let (rt, input, output) = build_chain(size);
-            let mut val = 1_i64;
-            b.iter(|| {
-                val += 1;
-                rt.set(input, val);
-                black_box(rt.get(output));
-            });
-        });
-    }
-
-    group.finish();
-}
-
-fn bench_early_cutoff(c: &mut Criterion) {
-    c.bench_function("early_cutoff_chain_1000", |b| {
-        let rt = Runtime::new();
-        let input = rt.create_input(1_i64);
-        let clamped = {
-            let dep = input;
-            rt.create_query(move |rt| rt.get(dep).min(100))
-        };
-        let mut prev: Incr<i64> = clamped;
-        for _ in 0..999 {
-            let dep = prev;
-            prev = rt.create_query(move |rt| rt.get(dep).wrapping_add(1));
-        }
-        let output = prev;
-        let _ = rt.get(output);
-
-        // Set input to >100 so clamp activates
-        rt.set(input, 200);
-        let _ = rt.get(output);
-
-        let mut val = 200_i64;
-        b.iter(|| {
-            val += 1;
-            rt.set(input, val); // Clamped to 100, same as before
-            black_box(rt.get(output));
-        });
-    });
-}
-
-fn bench_overhead_vs_batch(c: &mut Criterion) {
-    let mut group = c.benchmark_group("overhead_vs_batch");
-
-    for size in [100, 1_000, 10_000] {
-        group.bench_with_input(
-            BenchmarkId::new("incremental_initial", size),
-            &size,
-            |b, &size| {
-                b.iter(|| {
-                    let (rt, _, output) = build_chain(size);
-                    black_box(rt.get(output));
-                });
-            },
-        );
-
-        group.bench_with_input(BenchmarkId::new("batch_plain", size), &size, |b, &size| {
-            b.iter(|| {
-                let mut val = 1_i64;
-                for _ in 0..size {
-                    val = val.wrapping_add(1);
-                }
-                black_box(val);
-            });
-        });
-    }
-
-    group.finish();
-}
-
-fn bench_scaling(c: &mut Criterion) {
-    let mut group = c.benchmark_group("scaling_with_graph_size");
-
-    for &(inputs, per_layer, layers) in &[
-        (10, 10, 1),    // ~20 nodes
-        (10, 10, 10),   // ~110 nodes
-        (10, 10, 100),  // ~1010 nodes
-        (50, 50, 20),   // ~1050 nodes
-        (100, 100, 10), // ~1100 nodes
-    ] {
-        let total = inputs + per_layer * layers;
-        group.bench_with_input(
-            BenchmarkId::from_parameter(format!("{}n", total)),
-            &(inputs, per_layer, layers),
-            |b, &(inputs, per_layer, layers)| {
-                let (rt, input_nodes, output) = build_layered(inputs, per_layer, layers);
-                let mut val = 100_i64;
-                b.iter(|| {
-                    val += 1;
-                    rt.set(input_nodes[0], val);
-                    black_box(rt.get(output));
-                });
-            },
-        );
-    }
-
-    group.finish();
-}
-
-fn bench_collection_insert(c: &mut Criterion) {
-    let mut group = c.benchmark_group("collection_insert_throughput");
-
-    for size in [1_000, 10_000, 100_000] {
-        group.bench_with_input(
-            BenchmarkId::from_parameter(format!("{}elem", size)),
-            &size,
-            |b, &size| {
-                let rt = Runtime::new();
-                let col = rt.create_collection::<i64>();
-                let filtered = col.filter(&rt, |x| x % 2 == 0);
-                let mapped = filtered.map(&rt, |x| x * 2);
-                let count = mapped.count(&rt);
-
-                for i in 0..size {
-                    col.insert(&rt, i);
-                }
-                let _ = rt.get(count);
-
-                let mut next = size;
-                b.iter(|| {
-                    col.insert(&rt, next);
-                    next += 1;
-                    black_box(rt.get(count));
-                });
-            },
-        );
-    }
-
-    group.finish();
-}
-
-fn bench_collection_delete(c: &mut Criterion) {
-    let mut group = c.benchmark_group("collection_delete_throughput");
-
-    for size in [1_000, 10_000, 100_000] {
-        group.bench_with_input(
-            BenchmarkId::from_parameter(format!("{}elem", size)),
-            &size,
-            |b, &size| {
-                let rt = Runtime::new();
-                let col = rt.create_collection::<i64>();
-                let filtered = col.filter(&rt, |x| x % 2 == 0);
-                let count = filtered.count(&rt);
-
-                for i in 0..size {
-                    col.insert(&rt, i);
-                }
-                let _ = rt.get(count);
-
-                let mut idx = 0_i64;
-                b.iter(|| {
-                    let val = idx % size;
-                    col.delete(&rt, &val);
-                    black_box(rt.get(count));
-                    col.insert(&rt, val);
-                    let _ = rt.get(count);
-                    idx += 1;
-                });
-            },
-        );
-    }
-
-    group.finish();
-}
-
-fn bench_collection_pipeline_depth(c: &mut Criterion) {
-    c.bench_function("5_stage_pipeline_insert", |b| {
-        let rt = Runtime::new();
-        let col = rt.create_collection::<i64>();
-        let stage1 = col.filter(&rt, |x| *x > 0);
-        let stage2 = stage1.filter(&rt, |x| *x < 1_000_000);
-        let stage3 = stage2.map(&rt, |x| x * 2);
-        let stage4 = stage3.filter(&rt, |x| *x < 500_000);
-        let count = stage4.count(&rt);
-
-        for i in 1..10_001_i64 {
-            col.insert(&rt, i);
-        }
-        let _ = rt.get(count);
-
-        let mut next = 10_001_i64;
-        b.iter(|| {
-            col.insert(&rt, next);
-            next += 1;
-            black_box(rt.get(count));
-        });
-    });
-}
-
-criterion_group!(
-    benches,
-    bench_propagate_single,
-    bench_early_cutoff,
-    bench_overhead_vs_batch,
-    bench_scaling,
-    bench_collection_insert,
-    bench_collection_delete,
-    bench_collection_pipeline_depth,
-);
-criterion_main!(benches);
diff --git a/crates/incr-compute/benches/regression.rs b/crates/incr-compute/benches/regression.rs
deleted file mode 100644
index 427bdbe..0000000
--- a/crates/incr-compute/benches/regression.rs
+++ /dev/null
@@ -1,85 +0,0 @@
-use criterion::{black_box, criterion_group, criterion_main, Criterion};
-use incr_compute::Runtime;
-
-fn hot_read_input(c: &mut Criterion) {
-    let rt = Runtime::new();
-    let input = rt.create_input(42_u64);
-    let _ = rt.get(input);
-
-    c.bench_function("hot_read_input", |b| b.iter(|| black_box(rt.get(input))));
-}
-
-fn hot_read_query(c: &mut Criterion) {
-    let rt = Runtime::new();
-    let input = rt.create_input(42_u64);
-    let query = rt.create_query(move |rt| rt.get(input) * 2);
-    let _ = rt.get(query);
-
-    c.bench_function("hot_read_query", |b| b.iter(|| black_box(rt.get(query))));
-}
-
-fn set_input_no_deps(c: &mut Criterion) {
-    let rt = Runtime::new();
-    let input = rt.create_input(0_u64);
-
-    c.bench_function("set_input_no_deps", |b| {
-        let mut val = 0u64;
-        b.iter(|| {
-            val += 1;
-            rt.set(input, val);
-        })
-    });
-}
-
-fn propagate_chain_100(c: &mut Criterion) {
-    let rt = Runtime::new();
-    let input = rt.create_input(0_u64);
-    let mut prev = input;
-    for _ in 0..100 {
-        let dep = prev;
-        prev = rt.create_query(move |rt| rt.get(dep) + 1);
-    }
-    let tail = prev;
-    let _ = rt.get(tail);
-
-    c.bench_function("propagate_chain_100", |b| {
-        let mut val = 0u64;
-        b.iter(|| {
-            val += 1;
-            rt.set(input, val);
-            black_box(rt.get(tail))
-        })
-    });
-}
-
-fn collection_pipeline(c: &mut Criterion) {
-    let rt = Runtime::new();
-    let col = rt.create_collection::<i64>();
-    let evens = col.filter(&rt, |x| x % 2 == 0);
-    let doubled = evens.map(&rt, |x| x * 2);
-    let sum = doubled.reduce(&rt, |elems| -> i64 { elems.iter().sum() });
-
-    for i in 0..50 {
-        col.insert(&rt, i);
-    }
-    let _ = rt.get(sum);
-
-    c.bench_function("collection_pipeline", |b| {
-        let mut next = 50i64;
-        b.iter(|| {
-            col.insert(&rt, next);
-            next += 1;
-            black_box(rt.get(sum))
-        })
-    });
-}
-
-criterion_group!(
-    benches,
-    hot_read_input,
-    hot_read_query,
-    set_input_no_deps,
-    propagate_chain_100,
-    collection_pipeline
-);
-criterion_main!(benches);
diff --git a/crates/incr-compute/src/collection.rs b/crates/incr-compute/src/collection.rs
deleted file mode 100644
index a95a6d4..0000000
--- a/crates/incr-compute/src/collection.rs
+++ /dev/null
@@ -1,1064 +0,0 @@
-use std::any::Any;
-use std::cell::{Cell, RefCell};
-use std::collections::{HashMap, HashSet};
-use std::hash::Hash;
-use std::rc::Rc;
-
-use crate::runtime::Runtime;
-use crate::sorted_collection::{SortDelta, SortedCollection};
-use crate::types::Incr;
-
-#[derive(Clone, Debug)]
-pub enum Delta<T> {
-    Insert(T),
-    Delete(T),
-}
-
-#[derive(Clone, Debug)]
-pub(crate) struct VersionedDelta<T> {
-    #[allow(dead_code)]
-    pub version: u64,
-    pub delta: Delta<T>,
-}
-
-pub(crate) struct CollectionLog<T: Clone + Hash + Eq> {
-    /// Counts each element. In set mode (multiset=false), counts are always 0 or 1.
-    /// In multiset mode (multiset=true), counts can exceed 1 for duplicate values.
-    pub elements: HashMap<T, usize>,
-    pub deltas: Vec<VersionedDelta<T>>,
-    pub version: u64,
-    /// When true, allows duplicate values (reference-counted). Used by pipeline
-    /// operators like `map` whose outputs may collide even when inputs are distinct.
-    multiset: bool,
-}
-
-impl<T: Clone + Hash + Eq> CollectionLog<T> {
-    /// Create a set-mode log: duplicate inserts are silently ignored.
-    pub fn new() -> Self {
-        CollectionLog {
-            elements: HashMap::new(),
-            deltas: Vec::new(),
-            version: 0,
-            multiset: false,
-        }
-    }
-
-    /// Create a multiset-mode log: duplicate inserts increment a reference count
-    /// and fire a delta each time; deletes decrement and fire a delta only when
-    /// the count reaches zero.
-    pub fn new_multiset() -> Self {
-        CollectionLog {
-            elements: HashMap::new(),
-            deltas: Vec::new(),
-            version: 0,
-            multiset: true,
-        }
-    }
-
-    pub fn insert(&mut self, value: T) -> bool {
-        if self.multiset {
-            let count = self.elements.entry(value.clone()).or_insert(0);
-            *count += 1;
-            self.version += 1;
-            self.deltas.push(VersionedDelta {
-                version: self.version,
-                delta: Delta::Insert(value),
-            });
-            true
-        } else {
-            let count = self.elements.entry(value.clone()).or_insert(0);
-            if *count == 0 {
-                *count = 1;
-                self.version += 1;
-                self.deltas.push(VersionedDelta {
-                    version: self.version,
-                    delta: Delta::Insert(value),
-                });
-                true
-            } else {
-                false
-            }
-        }
-    }
-
-    pub fn delete(&mut self, value: &T) -> bool {
-        if self.multiset {
-            if let Some(count) = self.elements.get_mut(value) {
-                *count -= 1;
-                self.version += 1;
-                self.deltas.push(VersionedDelta {
-                    version: self.version,
-                    delta: Delta::Delete(value.clone()),
-                });
-                if *count == 0 {
-                    self.elements.remove(value);
-                }
-                true
-            } else {
-                false
-            }
-        } else if self.elements.remove(value).is_some() {
-            self.version += 1;
-            self.deltas.push(VersionedDelta {
-                version: self.version,
-                delta: Delta::Delete(value.clone()),
-            });
-            true
-        } else {
-            false
-        }
-    }
-
-    /// Returns the set of distinct elements present (regardless of multiplicity).
-    pub fn distinct_elements(&self) -> HashSet<T> {
-        self.elements.keys().cloned().collect()
-    }
-
-    /// Returns all elements expanded by multiplicity as a Vec.
-    /// For set-mode logs (all counts 1), this is equivalent to iterating the set.
-    /// For multiset-mode logs, duplicate values appear multiple times.
-    pub fn elements_vec(&self) -> Vec<T> {
-        self.elements
-            .iter()
-            .flat_map(|(v, &count)| std::iter::repeat_n(v.clone(), count))
-            .collect()
-    }
-}
-
-pub struct GroupedCollection<K, T>
-where
-    K: Any + Clone + Hash + Eq + 'static,
-    T: Any + Clone + Hash + Eq + 'static,
-{
-    pub(crate) groups: Rc<RefCell<HashMap<K, IncrCollection<T>>>>,
-    pub(crate) version_node: Incr<u64>,
-}
-
-impl<K, T> GroupedCollection<K, T>
-where
-    K: Any + Clone + Hash + Eq + 'static,
-    T: Any + Clone + Hash + Eq + 'static,
-{
-    pub fn keys(&self) -> Vec<K> {
-        self.groups.borrow().keys().cloned().collect()
-    }
-
-    pub fn get_group(&self, key: &K) -> Option<IncrCollection<T>> {
-        self.groups.borrow().get(key).cloned()
-    }
-
-    pub fn version_node(&self) -> Incr<u64> {
-        self.version_node
-    }
-}
-
-pub struct IncrCollection<T: Any + Clone + Hash + Eq + 'static> {
-    pub(crate) log: Rc<RefCell<CollectionLog<T>>>,
-    pub(crate) version_node: Incr<u64>,
-}
-
-impl<T: Any + Clone + Hash + Eq + 'static> Clone for IncrCollection<T> {
-    fn clone(&self) -> Self {
-        IncrCollection {
-            log: self.log.clone(),
-            version_node: self.version_node,
-        }
-    }
-}
-
-impl<T: Any + Clone + Hash + Eq + 'static> IncrCollection<T> {
-    pub fn version_node_id(&self) -> crate::types::NodeId {
-        self.version_node.node_id()
-    }
-
-    pub fn insert(&self, rt: &Runtime, value: T) {
-        let changed = self.log.borrow_mut().insert(value);
-        if changed {
-            let ver = self.log.borrow().version;
-            rt.set(self.version_node, ver);
-        }
-    }
-
-    pub fn delete(&self, rt: &Runtime, value: &T) {
-        let changed = self.log.borrow_mut().delete(value);
-        if changed {
-            let ver = self.log.borrow().version;
-            rt.set(self.version_node, ver);
-        }
-    }
-
-    pub fn filter<F>(&self, rt: &Runtime, predicate: F) -> IncrCollection<T>
-    where
-        F: Fn(&T) -> bool + 'static,
-    {
-        let upstream_log = self.log.clone();
-        let output_log = Rc::new(RefCell::new(CollectionLog::new()));
-        let output_log_ref = output_log.clone();
-        let last_idx = Rc::new(Cell::new(0_usize));
-        let upstream_ver = self.version_node;
-
-        let version_node = rt.create_query(move |rt| -> u64 {
-            let _upstream_v = rt.get(upstream_ver);
-
-            let upstream = upstream_log.borrow();
-            let start = last_idx.get();
-            if start >= upstream.deltas.len() {
-                return output_log_ref.borrow().version;
-            }
-
-            let mut output = output_log_ref.borrow_mut();
-
-            for vd in &upstream.deltas[start..] {
-                match &vd.delta {
-                    Delta::Insert(x) => {
-                        if predicate(x) {
-                            output.insert(x.clone());
-                        }
-                    }
-                    Delta::Delete(x) => {
-                        if predicate(x) {
-                            output.delete(x);
-                        }
-                    }
-                }
-            }
-
-            last_idx.set(upstream.deltas.len());
-            output.version
-        });
-
-        IncrCollection {
-            log: output_log,
-            version_node,
-        }
-    }
-
-    pub fn map<U, F>(&self, rt: &Runtime, f: F) -> IncrCollection<U>
-    where
-        U: Any + Clone + Hash + Eq + 'static,
-        F: Fn(&T) -> U + 'static,
-    {
-        let upstream_log = self.log.clone();
-        let output_log = Rc::new(RefCell::new(CollectionLog::new_multiset()));
-        let output_log_ref = output_log.clone();
-        let last_idx = Rc::new(Cell::new(0_usize));
-        let mapping: Rc<RefCell<HashMap<T, U>>> = Rc::new(RefCell::new(HashMap::new()));
-        let mapping_ref = mapping.clone();
-        let upstream_ver = self.version_node;
-
-        let version_node = rt.create_query(move |rt| -> u64 {
-            let _upstream_v = rt.get(upstream_ver);
-
-            let upstream = upstream_log.borrow();
-            let start = last_idx.get();
-            if start >= upstream.deltas.len() {
-                return output_log_ref.borrow().version;
-            }
-
-            let mut output = output_log_ref.borrow_mut();
-            let mut map_state = mapping_ref.borrow_mut();
-
-            for vd in &upstream.deltas[start..] {
-                match &vd.delta {
-                    Delta::Insert(x) => {
-                        let y = f(x);
-                        map_state.insert(x.clone(), y.clone());
-                        output.insert(y);
-                    }
-                    Delta::Delete(x) => {
-                        if let Some(y) = map_state.remove(x) {
-                            output.delete(&y);
-                        }
-                    }
-                }
-            }
-
-            last_idx.set(upstream.deltas.len());
-            output.version
-        });
-
-        IncrCollection {
-            log: output_log,
-            version_node,
-        }
-    }
-
-    pub fn elements(&self) -> std::collections::HashSet<T> {
-        self.log.borrow().distinct_elements()
-    }
-
-    pub fn count(&self, rt: &Runtime) -> Incr<usize> {
-        let upstream_log = self.log.clone();
-        let upstream_ver = self.version_node;
-        let current_count = Rc::new(Cell::new(0_usize));
-        let count_ref = current_count.clone();
-        let last_idx = Rc::new(Cell::new(0_usize));
-
-        rt.create_query(move |rt| -> usize {
-            let _upstream_v = rt.get(upstream_ver);
-
-            let upstream = upstream_log.borrow();
-            let start = last_idx.get();
-            if start >= upstream.deltas.len() {
-                return count_ref.get();
-            }
-
-            let mut count = count_ref.get();
-
-            for vd in &upstream.deltas[start..] {
-                match &vd.delta {
-                    Delta::Insert(_) => count += 1,
-                    Delta::Delete(_) => count -= 1,
-                }
-            }
-
-            last_idx.set(upstream.deltas.len());
-            count_ref.set(count);
-            count
-        })
-    }
-
-    pub fn reduce<A, F>(&self, rt: &Runtime, fold_fn: F) -> Incr<A>
-    where
-        A: Any + Clone + PartialEq + 'static,
-        F: Fn(&Vec<T>) -> A + 'static,
-    {
-        let upstream_log = self.log.clone();
-        let upstream_ver = self.version_node;
-        let last_idx = Rc::new(Cell::new(0_usize));
-
-        rt.create_query(move |rt| -> A {
-            let _upstream_v = rt.get(upstream_ver);
-
-            let upstream = upstream_log.borrow();
-            let start = last_idx.get();
-            if start >= upstream.deltas.len() {
-                // No new deltas, but we still need to return current value.
-                // On first call with empty collection, fold over current elements.
-                let elems = upstream.elements_vec();
-                return fold_fn(&elems);
-            }
-
-            last_idx.set(upstream.deltas.len());
-            let elems = upstream.elements_vec();
-            fold_fn(&elems)
-        })
-    }
-
-    pub fn group_by<K, F>(&self, rt: &Runtime, key_fn: F) -> GroupedCollection<K, T>
-    where
-        K: Any + Clone + Hash + Eq + 'static,
-        F: Fn(&T) -> K + 'static,
-    {
-        let upstream_log = self.log.clone();
-        let upstream_ver = self.version_node;
-        let last_idx = Rc::new(Cell::new(0_usize));
-        let groups: Rc<RefCell<HashMap<K, IncrCollection<T>>>> =
-            Rc::new(RefCell::new(HashMap::new()));
-        let groups_ref = groups.clone();
-        let key_cache: Rc<RefCell<HashMap<T, K>>> = Rc::new(RefCell::new(HashMap::new()));
-        let key_cache_ref = key_cache.clone();
-        let rt_ptr: *const Runtime = rt;
-
-        let version_counter: Rc<Cell<u64>> = Rc::new(Cell::new(0));
-        let version_counter_ref = version_counter.clone();
-
-        let version_node = rt.create_query(move |_rt| -> u64 {
-            // SAFETY: rt_ptr points to the Runtime that owns this compute graph.
-            // This closure is only ever called during rt.get(), while the Runtime
-            // is alive, and v1 is single-threaded so no concurrent access occurs.
-            let rt = unsafe { &*rt_ptr };
-            let _upstream_v = rt.get(upstream_ver);
-
-            let upstream = upstream_log.borrow();
-            let start = last_idx.get();
-            if start >= upstream.deltas.len() {
-                return version_counter_ref.get();
-            }
-
-            let mut grps = groups_ref.borrow_mut();
-            let mut kc = key_cache_ref.borrow_mut();
-
-            for vd in &upstream.deltas[start..] {
-                match &vd.delta {
-                    Delta::Insert(x) => {
-                        let k = key_fn(x);
-                        kc.insert(x.clone(), k.clone());
-                        let group = grps
-                            .entry(k)
-                            .or_insert_with(|| rt.create_collection_in_compute::<T>());
-                        let ver = {
-                            let mut log = group.log.borrow_mut();
-                            log.insert(x.clone());
-                            log.version
-                        };
-                        rt.set(group.version_node, ver);
-                    }
-                    Delta::Delete(x) => {
-                        if let Some(k) = kc.remove(x) {
-                            if let Some(group) = grps.get(&k) {
-                                let ver = {
-                                    let mut log = group.log.borrow_mut();
-                                    log.delete(x);
-                                    log.version
-                                };
-                                rt.set(group.version_node, ver);
-                            }
-                        }
-                    }
-                }
-            }
-
-            last_idx.set(upstream.deltas.len());
-            let v = version_counter_ref.get() + 1;
-            version_counter_ref.set(v);
-            v
-        });
-
-        GroupedCollection {
-            groups,
-            version_node,
-        }
-    }
-
-    pub fn join<U, K, FL, FR>(
-        &self,
-        rt: &Runtime,
-        right: &IncrCollection<U>,
-        left_key: FL,
-        right_key: FR,
-    ) -> IncrCollection<(T, U)>
-    where
-        U: Any + Clone + Hash + Eq + 'static,
-        K: Any + Clone + Hash + Eq + 'static,
-        FL: Fn(&T) -> K + 'static,
-        FR: Fn(&U) -> K + 'static,
-    {
-        let left_log = self.log.clone();
-        let right_log = right.log.clone();
-        let left_ver = self.version_node;
-        let right_ver = right.version_node;
-        let left_last = Rc::new(Cell::new(0_usize));
-        let right_last = Rc::new(Cell::new(0_usize));
-
-        let left_index: Rc<RefCell<HashMap<K, Vec<T>>>> = Rc::new(RefCell::new(HashMap::new()));
-        let right_index: Rc<RefCell<HashMap<K, Vec<U>>>> = Rc::new(RefCell::new(HashMap::new()));
-        let left_key_cache: Rc<RefCell<HashMap<T, K>>> = Rc::new(RefCell::new(HashMap::new()));
-        let right_key_cache: Rc<RefCell<HashMap<U, K>>> = Rc::new(RefCell::new(HashMap::new()));
-
-        let left_idx_ref = left_index.clone();
-        let right_idx_ref = right_index.clone();
-        let left_kc_ref = left_key_cache.clone();
-        let right_kc_ref = right_key_cache.clone();
-
-        let output_log = Rc::new(RefCell::new(CollectionLog::new_multiset()));
-        let output_log_ref = output_log.clone();
-
-        let version_node = rt.create_query(move |rt| -> u64 {
-            let _lv = rt.get(left_ver);
-            let _rv = rt.get(right_ver);
-
-            let left_up = left_log.borrow();
-            let right_up = right_log.borrow();
-            let l_start = left_last.get();
-            let r_start = right_last.get();
-
-            if l_start >= left_up.deltas.len() && r_start >= right_up.deltas.len() {
-                return output_log_ref.borrow().version;
-            }
-
-            let mut li = left_idx_ref.borrow_mut();
-            let mut ri = right_idx_ref.borrow_mut();
-            let mut lkc = left_kc_ref.borrow_mut();
-            let mut rkc = right_kc_ref.borrow_mut();
-            let mut output = output_log_ref.borrow_mut();
-
-            // Process left deltas
-            for vd in &left_up.deltas[l_start..] {
-                match &vd.delta {
-                    Delta::Insert(x) => {
-                        let k = left_key(x);
-                        lkc.insert(x.clone(), k.clone());
-                        li.entry(k.clone()).or_default().push(x.clone());
-                        if let Some(rights) = ri.get(&k) {
-                            for r in rights {
-                                output.insert((x.clone(), r.clone()));
-                            }
-                        }
-                    }
-                    Delta::Delete(x) => {
-                        if let Some(k) = lkc.remove(x) {
-                            if let Some(lefts) = li.get_mut(&k) {
-                                lefts.retain(|l| l != x);
-                            }
-                            if let Some(rights) = ri.get(&k) {
-                                for r in rights {
-                                    output.delete(&(x.clone(), r.clone()));
-                                }
-                            }
-                        }
-                    }
-                }
-            }
-
-            // Process right deltas
-            for vd in &right_up.deltas[r_start..] {
-                match &vd.delta {
-                    Delta::Insert(x) => {
-                        let k = right_key(x);
-                        rkc.insert(x.clone(), k.clone());
-                        ri.entry(k.clone()).or_default().push(x.clone());
-                        if let Some(lefts) = li.get(&k) {
-                            for l in lefts {
-                                output.insert((l.clone(), x.clone()));
-                            }
-                        }
-                    }
-                    Delta::Delete(x) => {
-                        if let Some(k) = rkc.remove(x) {
-                            if let Some(rights) = ri.get_mut(&k) {
-                                rights.retain(|r| r != x);
-                            }
-                            if let Some(lefts) = li.get(&k) {
-                                for l in lefts {
-                                    output.delete(&(l.clone(), x.clone()));
-                                }
-                            }
-                        }
-                    }
-                }
-            }
-
-            left_last.set(left_up.deltas.len());
-            right_last.set(right_up.deltas.len());
-            output.version
-        });
-
-        IncrCollection {
-            log: output_log,
-            version_node,
-        }
-    }
-
-    pub fn sort_by_key<K, F>(&self, rt: &Runtime, key_fn: F) -> SortedCollection<T>
-    where
-        K: Ord + Clone + 'static,
-        F: Fn(&T) -> K + 'static,
-    {
-        let upstream_log = self.log.clone();
-        let upstream_ver = self.version_node;
-        let last_idx = Rc::new(Cell::new(0_usize));
-
-        // Internal state: keys vec lives inside the closure
-        let keys: Rc<RefCell<Vec<K>>> = Rc::new(RefCell::new(Vec::new()));
-        // Reverse lookup: value -> cached key (for delete)
-        let key_cache: Rc<RefCell<HashMap<T, K>>> = Rc::new(RefCell::new(HashMap::new()));
-
-        // Shared state: exposed to SortedCollection
-        let ordered_values: Rc<RefCell<Vec<T>>> = Rc::new(RefCell::new(Vec::new()));
-        let pending_deltas: Rc<RefCell<Vec<SortDelta<T>>>> = Rc::new(RefCell::new(Vec::new()));
-
-        let keys_ref = keys.clone();
-        let key_cache_ref = key_cache.clone();
-        let ordered_values_ref = ordered_values.clone();
-        let pending_deltas_ref = pending_deltas.clone();
-
-        let version_counter: Rc<Cell<u64>> = Rc::new(Cell::new(0));
-        let version_counter_ref = version_counter.clone();
-
-        let version_node = rt.create_query(move |rt| -> u64 {
-            let _upstream_v = rt.get(upstream_ver);
-
-            let upstream = upstream_log.borrow();
-            let start = last_idx.get();
-            if start >= upstream.deltas.len() {
-                return version_counter_ref.get();
-            }
-
-            let mut ks = keys_ref.borrow_mut();
-            let mut kc = key_cache_ref.borrow_mut();
-            let mut vals = ordered_values_ref.borrow_mut();
-            let mut deltas = pending_deltas_ref.borrow_mut();
-
-            for vd in &upstream.deltas[start..] {
-                match &vd.delta {
-                    Delta::Insert(x) => {
-                        let k = key_fn(x);
-                        let pos = ks
-                            .binary_search_by(|probe| probe.cmp(&k))
-                            .unwrap_or_else(|pos| pos);
-                        ks.insert(pos, k.clone());
-                        vals.insert(pos, x.clone());
-                        kc.insert(x.clone(), k);
-                        deltas.push(SortDelta::Inserted {
-                            index: pos,
-                            value: x.clone(),
-                        });
-                    }
-                    Delta::Delete(x) => {
-                        if let Some(k) = kc.remove(x) {
-                            // Find the position: binary search for the key, then linear scan
-                            // for the exact value in case of duplicate keys
-                            let start_pos = ks
-                                .binary_search_by(|probe| probe.cmp(&k))
-                                .unwrap_or_else(|pos| pos);
-                            let mut pos = start_pos;
-                            while pos < vals.len() && ks[pos] == k {
-                                if vals[pos] == *x {
-                                    break;
-                                }
-                                pos += 1;
-                            }
-                            if pos < vals.len() && vals[pos] == *x {
-                                ks.remove(pos);
-                                vals.remove(pos);
-                                deltas.push(SortDelta::Removed {
-                                    index: pos,
-                                    value: x.clone(),
-                                });
-                            }
-                        }
-                    }
-                }
-            }
-
-            last_idx.set(upstream.deltas.len());
-            let ver = version_counter_ref.get() + 1;
-            version_counter_ref.set(ver);
-            ver
-        });
-
-        SortedCollection {
-            ordered_values,
-            pending_deltas,
-            version_node,
-        }
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use crate::Runtime;
-
-    #[test]
-    fn log_insert() {
-        let mut log = CollectionLog::new();
-        assert!(log.insert(1_i64));
-        assert_eq!(log.elements.len(), 1);
-        assert_eq!(log.version, 1);
-        assert_eq!(log.deltas.len(), 1);
-    }
-
-    #[test]
-    fn log_insert_duplicate_is_noop() {
-        let mut log = CollectionLog::new();
-        assert!(log.insert(1_i64));
-        assert!(!log.insert(1_i64));
-        assert_eq!(log.elements.len(), 1);
-        assert_eq!(log.version, 1);
-    }
-
-    #[test]
-    fn log_delete() {
-        let mut log = CollectionLog::new();
-        log.insert(1_i64);
-        assert!(log.delete(&1));
-        assert_eq!(log.elements.len(), 0);
-        assert_eq!(log.version, 2);
-        assert_eq!(log.deltas.len(), 2);
-    }
-
-    #[test]
-    fn log_delete_missing_is_noop() {
-        let mut log: CollectionLog<i64> = CollectionLog::new();
-        assert!(!log.delete(&1));
-        assert_eq!(log.version, 0);
-    }
-
-    #[test]
-    fn log_deltas_are_versioned() {
-        let mut log = CollectionLog::new();
-        log.insert(10_i64);
-        log.insert(20);
-        log.delete(&10);
-
-        assert_eq!(log.deltas.len(), 3);
-        assert_eq!(log.deltas[0].version, 1);
-        assert_eq!(log.deltas[1].version, 2);
-        assert_eq!(log.deltas[2].version, 3);
-        assert!(matches!(log.deltas[0].delta, Delta::Insert(10)));
-        assert!(matches!(log.deltas[1].delta, Delta::Insert(20)));
-        assert!(matches!(log.deltas[2].delta, Delta::Delete(10)));
-    }
-
-    #[test]
-    fn create_and_insert() {
-        let rt = Runtime::new();
-        let col = rt.create_collection::<i64>();
-        col.insert(&rt, 1);
-        col.insert(&rt, 2);
-        col.insert(&rt, 3);
-        assert_eq!(col.log.borrow().elements.len(), 3);
-    }
-
-    #[test]
-    fn insert_bumps_graph_version() {
-        let rt = Runtime::new();
-        let col = rt.create_collection::<i64>();
-        assert_eq!(rt.get(col.version_node), 0);
-        col.insert(&rt, 1);
-        assert_eq!(rt.get(col.version_node), 1);
-        col.insert(&rt, 2);
-        assert_eq!(rt.get(col.version_node), 2);
-    }
-
-    #[test]
-    fn delete_bumps_graph_version() {
-        let rt = Runtime::new();
-        let col = rt.create_collection::<i64>();
-        col.insert(&rt, 1);
-        col.insert(&rt, 2);
-        assert_eq!(rt.get(col.version_node), 2);
-        col.delete(&rt, &1);
-        assert_eq!(rt.get(col.version_node), 3);
-    }
-
-    #[test]
-    fn duplicate_insert_no_version_bump() {
-        let rt = Runtime::new();
-        let col = rt.create_collection::<i64>();
-        col.insert(&rt, 1);
-        assert_eq!(rt.get(col.version_node), 1);
-        col.insert(&rt, 1); // duplicate
-        assert_eq!(rt.get(col.version_node), 1); // unchanged
-    }
-
-    #[test]
-    fn filter_basic() {
-        let rt = Runtime::new();
-        let col = rt.create_collection::<i64>();
-        let evens = col.filter(&rt, |x| x % 2 == 0);
-
-        col.insert(&rt, 1);
-        col.insert(&rt, 2);
-        col.insert(&rt, 3);
-        col.insert(&rt, 4);
-
-        let _ = rt.get(evens.version_node);
-        assert_eq!(evens.log.borrow().elements.len(), 2);
-    }
-
-    #[test]
-    fn filter_incremental_insert() {
-        let rt = Runtime::new();
-        let col = rt.create_collection::<i64>();
-        let evens = col.filter(&rt, |x| x % 2 == 0);
-
-        col.insert(&rt, 2);
-        let _ = rt.get(evens.version_node);
-        assert_eq!(evens.log.borrow().elements.len(), 1);
-
-        col.insert(&rt, 4);
-        let _ = rt.get(evens.version_node);
-        assert_eq!(evens.log.borrow().elements.len(), 2);
-
-        col.insert(&rt, 3);
-        let _ = rt.get(evens.version_node);
-        assert_eq!(evens.log.borrow().elements.len(), 2);
-    }
-
-    #[test]
-    fn filter_incremental_delete() {
-        let rt = Runtime::new();
-        let col = rt.create_collection::<i64>();
-        let evens = col.filter(&rt, |x| x % 2 == 0);
-
-        col.insert(&rt, 2);
-        col.insert(&rt, 4);
-        let _ = rt.get(evens.version_node);
-        assert_eq!(evens.log.borrow().elements.len(), 2);
-
-        col.delete(&rt, &2);
-        let _ = rt.get(evens.version_node);
-        assert_eq!(evens.log.borrow().elements.len(), 1);
-    }
-
-    #[test]
-    fn filter_chained() {
-        let rt = Runtime::new();
-        let col = rt.create_collection::<i64>();
-        let positive = col.filter(&rt, |x| *x > 0);
-        let small = positive.filter(&rt, |x| *x < 10);
-
-        col.insert(&rt, -5);
-        col.insert(&rt, 3);
-        col.insert(&rt, 15);
-        col.insert(&rt, 7);
-
-        let _ = rt.get(small.version_node);
-        assert_eq!(small.log.borrow().elements.len(), 2);
-    }
-
-    #[test]
-    fn map_basic() {
-        let rt = Runtime::new();
-        let col = rt.create_collection::<i64>();
-        let doubled = col.map(&rt, |x| x * 2);
-
-        col.insert(&rt, 1);
-        col.insert(&rt, 2);
-        col.insert(&rt, 3);
-
-        let _ = rt.get(doubled.version_node);
-        let elements: Vec<i64> = doubled.log.borrow().elements_vec();
-        assert_eq!(elements.len(), 3);
-        assert!(elements.contains(&2));
-        assert!(elements.contains(&4));
-        assert!(elements.contains(&6));
-    }
-
-    #[test]
-    fn map_delete_propagates() {
-        let rt = Runtime::new();
-        let col = rt.create_collection::<i64>();
-        let doubled = col.map(&rt, |x| x * 2);
-
-        col.insert(&rt, 1);
-        col.insert(&rt, 2);
-        let _ = rt.get(doubled.version_node);
-        assert_eq!(doubled.log.borrow().elements.len(), 2);
-
-        col.delete(&rt, &1);
-        let _ = rt.get(doubled.version_node);
-        assert_eq!(doubled.log.borrow().elements.len(), 1);
-        assert!(doubled.log.borrow().elements.contains_key(&4));
-    }
-
-    #[test]
-    fn filter_then_map() {
-        let rt = Runtime::new();
-        let col = rt.create_collection::<i64>();
-        let evens = col.filter(&rt, |x| x % 2 == 0);
-        let doubled = evens.map(&rt, |x| x * 2);
-
-        col.insert(&rt, 1);
-        col.insert(&rt, 2);
-        col.insert(&rt, 3);
-        col.insert(&rt, 4);
-
-        let _ = rt.get(doubled.version_node);
-        let elements: Vec<i64> = doubled.log.borrow().elements_vec();
-        assert_eq!(elements.len(), 2);
-        assert!(elements.contains(&4));
-        assert!(elements.contains(&8));
-    }
-
-    #[test]
-    fn count_basic() {
-        let rt = Runtime::new();
-        let col = rt.create_collection::<i64>();
-        let count = col.count(&rt);
-
-        assert_eq!(rt.get(count), 0);
-        col.insert(&rt, 1);
-        assert_eq!(rt.get(count), 1);
-        col.insert(&rt, 2);
-        assert_eq!(rt.get(count), 2);
-        col.delete(&rt, &1);
-        assert_eq!(rt.get(count), 1);
-    }
-
-    #[test]
-    fn count_after_filter() {
-        let rt = Runtime::new();
-        let col = rt.create_collection::<i64>();
-        let evens = col.filter(&rt, |x| x % 2 == 0);
-        let count = evens.count(&rt);
-
-        col.insert(&rt, 1);
-        col.insert(&rt, 2);
-        col.insert(&rt, 3);
-        col.insert(&rt, 4);
-
-        assert_eq!(rt.get(count), 2);
-
-        col.insert(&rt, 6);
-        assert_eq!(rt.get(count), 3);
-
-        col.delete(&rt, &2);
-        assert_eq!(rt.get(count), 2);
-    }
-
-    #[test]
-    fn count_early_cutoff() {
-        use std::cell::Cell as StdCell;
-        use std::rc::Rc as StdRc;
-
-        let rt = Runtime::new();
-        let col = rt.create_collection::<i64>();
-        let evens = col.filter(&rt, |x| x % 2 == 0);
-        let count = evens.count(&rt);
-
-        let downstream_count = StdRc::new(StdCell::new(0_u32));
-        let dc = downstream_count.clone();
-        let label = rt.create_query(move |rt| {
-            dc.set(dc.get() + 1);
-            format!("{} evens", rt.get(count))
-        });
-
-        col.insert(&rt, 2);
-        assert_eq!(rt.get(label), "1 evens");
-        assert_eq!(downstream_count.get(), 1);
-
-        col.insert(&rt, 3); // odd — count unchanged
-        assert_eq!(rt.get(label), "1 evens");
-        assert_eq!(downstream_count.get(), 1); // early cutoff!
-    }
-
-    #[test]
-    fn reduce_sum() {
-        let rt = Runtime::new();
-        let col = rt.create_collection::<i64>();
-        let sum = col.reduce(&rt, |elements| -> i64 { elements.iter().sum() });
-
-        assert_eq!(rt.get(sum), 0); // empty collection
-        col.insert(&rt, 10);
-        assert_eq!(rt.get(sum), 10);
-        col.insert(&rt, 20);
-        assert_eq!(rt.get(sum), 30);
-        col.delete(&rt, &10);
-        assert_eq!(rt.get(sum), 20);
-    }
-
-    #[test]
-    fn reduce_max() {
-        let rt = Runtime::new();
-        let col = rt.create_collection::<i64>();
-        let max = col.reduce(&rt, |elements| -> Option<i64> {
-            elements.iter().copied().max()
-        });
-
-        assert_eq!(rt.get(max), None);
-        col.insert(&rt, 5);
-        assert_eq!(rt.get(max), Some(5));
-        col.insert(&rt, 3);
-        assert_eq!(rt.get(max), Some(5));
-        col.insert(&rt, 8);
-        assert_eq!(rt.get(max), Some(8));
-        col.delete(&rt, &8);
-        assert_eq!(rt.get(max), Some(5));
-    }
-
-    #[test]
-    fn reduce_after_filter() {
-        let rt = Runtime::new();
-        let col = rt.create_collection::<i64>();
-        let evens = col.filter(&rt, |x| x % 2 == 0);
-        let sum = evens.reduce(&rt, |elements| -> i64 { elements.iter().sum() });
-
-        col.insert(&rt, 1);
-        col.insert(&rt, 2);
-        col.insert(&rt, 3);
-        col.insert(&rt, 4);
-        assert_eq!(rt.get(sum), 6);
-
-        col.insert(&rt, 6);
-        assert_eq!(rt.get(sum), 12);
-
-        col.delete(&rt, &2);
-        assert_eq!(rt.get(sum), 10);
-    }
-
-    #[test]
-    fn reduce_early_cutoff() {
-        use std::cell::Cell as StdCell;
-        use std::rc::Rc as StdRc;
-
-        let rt = Runtime::new();
-        let col = rt.create_collection::<i64>();
-        let max = col.reduce(&rt, |elements| -> Option<i64> {
-            elements.iter().copied().max()
-        });
-
-        let downstream_count = StdRc::new(StdCell::new(0_u32));
-        let dc = downstream_count.clone();
-        let label = rt.create_query(move |rt| {
-            dc.set(dc.get() + 1);
-            format!("max={:?}", rt.get(max))
-        });
-
-        col.insert(&rt, 5);
-        assert_eq!(rt.get(label), "max=Some(5)");
-        assert_eq!(downstream_count.get(), 1);
-
-        col.insert(&rt, 3); // doesn't change max
-        assert_eq!(rt.get(label), "max=Some(5)");
-        assert_eq!(downstream_count.get(), 1); // early cutoff!
-    }
-
-    #[test]
-    fn group_by_basic() {
-        let rt = Runtime::new();
-        let col = rt.create_collection::<(String, i64)>();
-        let grouped = col.group_by(&rt, |x: &(String, i64)| x.0.clone());
-        col.insert(&rt, ("a".to_string(), 1));
-        col.insert(&rt, ("b".to_string(), 2));
-        col.insert(&rt, ("a".to_string(), 3));
-        let _ = rt.get(grouped.version_node);
-        let groups = grouped.groups.borrow();
-        assert_eq!(groups.len(), 2);
-        assert_eq!(groups.get("a").unwrap().elements().len(), 2);
-        assert_eq!(groups.get("b").unwrap().elements().len(), 1);
-    }
-
-    #[test]
-    fn group_by_delete() {
-        let rt = Runtime::new();
-        let col = rt.create_collection::<(String, i64)>();
-        let grouped = col.group_by(&rt, |x: &(String, i64)| x.0.clone());
-        col.insert(&rt, ("a".to_string(), 1));
-        col.insert(&rt, ("a".to_string(), 2));
-        let _ = rt.get(grouped.version_node);
-        col.delete(&rt, &("a".to_string(), 1));
-        let _ = rt.get(grouped.version_node);
-        let groups = grouped.groups.borrow();
-        assert_eq!(groups.get("a").unwrap().elements().len(), 1);
-    }
-
-    #[test]
-    fn join_basic() {
-        let rt = Runtime::new();
-        let left = rt.create_collection::<(String, i64)>();
-        let right = rt.create_collection::<(String, String)>();
-        let joined = left.join(
-            &rt,
-            &right,
-            |l: &(String, i64)| l.0.clone(),
-            |r: &(String, String)| r.0.clone(),
-        );
-        left.insert(&rt, ("a".to_string(), 1));
-        left.insert(&rt, ("b".to_string(), 2));
-        right.insert(&rt, ("a".to_string(), "x".to_string()));
-        right.insert(&rt, ("c".to_string(), "y".to_string()));
-        let _ = rt.get(joined.version_node);
-        let elems = joined.elements();
-        assert_eq!(elems.len(), 1);
-        assert!(elems.contains(&(("a".to_string(), 1), ("a".to_string(), "x".to_string()))));
-    }
-
-    #[test]
-    fn join_delete_propagates() {
-        let rt = Runtime::new();
-        let left = rt.create_collection::<(i64, i64)>();
-        let right = rt.create_collection::<(i64, i64)>();
-        let joined = left.join(&rt, &right, |l: &(i64, i64)| l.0, |r: &(i64, i64)| r.0);
-        left.insert(&rt, (1, 10));
-        right.insert(&rt, (1, 100));
-        let _ = rt.get(joined.version_node);
-        assert_eq!(joined.elements().len(), 1);
-        left.delete(&rt, &(1, 10));
-        let _ = rt.get(joined.version_node);
-        assert_eq!(joined.elements().len(), 0);
-    }
-}
diff --git a/crates/incr-compute/src/graph.rs b/crates/incr-compute/src/graph.rs
deleted file mode 100644
index db2afd4..0000000
--- a/crates/incr-compute/src/graph.rs
+++ /dev/null
@@ -1,120 +0,0 @@
-use crate::types::{NodeId, Revision};
-use std::any::Any;
-
-#[derive(Clone, Copy, PartialEq, Eq, Debug)]
-pub(crate) enum NodeState {
-    Clean,
-    Dirty,
-    New,
-}
-
-#[derive(Debug)]
-pub(crate) enum NodeKind {
-    Input,
-    Compute(usize), // Index into Graph::funcs
-}
-
-pub(crate) struct NodeData {
-    pub state: NodeState,
-    pub value: Option<Box<dyn Any>>,
-    pub verified_at: Revision,
-    pub changed_at: Revision,
-    pub dependents: Vec<NodeId>,   // Forward edges: who depends on me
-    pub dependencies: Vec<NodeId>, // Backward edges: who do I depend on
-}
-
-#[allow(clippy::type_complexity)]
-pub(crate) struct ComputeEntry {
-    pub func: Box<dyn Fn(&crate::runtime::Runtime) -> Box<dyn Any>>,
-    pub eq_fn: Box<dyn Fn(&dyn Any, &dyn Any) -> bool>,
-}
-
-#[allow(dead_code)]
-pub(crate) struct Graph {
-    pub nodes: Vec<NodeData>,
-    pub kinds: Vec<NodeKind>,
-    pub funcs: Vec<ComputeEntry>,
-}
-
-#[allow(dead_code)]
-impl Graph {
-    pub fn new() -> Self {
-        Graph {
-            nodes: Vec::new(),
-            kinds: Vec::new(),
-            funcs: Vec::new(),
-        }
-    }
-
-    pub fn add_input(&mut self, value: Box<dyn Any>, revision: Revision) -> NodeId {
-        let id = NodeId(self.nodes.len() as u32);
-        self.nodes.push(NodeData {
-            state: NodeState::Clean,
-            value: Some(value),
-            verified_at: revision,
-            changed_at: revision,
-            dependents: Vec::new(),
-            dependencies: Vec::new(),
-        });
-        self.kinds.push(NodeKind::Input);
-        id
-    }
-
-    pub fn add_compute(&mut self, entry: ComputeEntry) -> NodeId {
-        let func_idx = self.funcs.len();
-        self.funcs.push(entry);
-        let id = NodeId(self.nodes.len() as u32);
-        self.nodes.push(NodeData {
-            state: NodeState::New,
-            value: None,
-            verified_at: Revision::default(),
-            changed_at: Revision::default(),
-            dependents: Vec::new(),
-            dependencies: Vec::new(),
-        });
-        self.kinds.push(NodeKind::Compute(func_idx));
-        id
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use crate::types::Revision;
-
-    #[test]
-    fn create_input_node() {
-        let mut graph = Graph::new();
-        let id = graph.add_input(Box::new(42_i64), Revision(1));
-        assert_eq!(id, NodeId(0));
-        assert_eq!(graph.nodes[0].state, NodeState::Clean);
-        assert!(graph.nodes[0].value.as_ref().unwrap().downcast_ref::<i64>() == Some(&42));
-    }
-
-    #[test]
-    fn create_compute_node() {
-        let mut graph = Graph::new();
-        let entry = ComputeEntry {
-            func: Box::new(|_| Box::new(0_i64)),
-            eq_fn: Box::new(|a, b| a.downcast_ref::<i64>() == b.downcast_ref::<i64>()),
-        };
-        let id = graph.add_compute(entry);
-        assert_eq!(id, NodeId(0));
-        assert_eq!(graph.nodes[0].state, NodeState::New);
-        assert!(graph.nodes[0].value.is_none());
-    }
-
-    #[test]
-    fn nodes_get_sequential_ids() {
-        let mut graph = Graph::new();
-        let a = graph.add_input(Box::new(1_i64), Revision(1));
-        let b = graph.add_input(Box::new(2_i64), Revision(1));
-        let c = graph.add_compute(ComputeEntry {
-            func: Box::new(|_| Box::new(0_i64)),
-            eq_fn: Box::new(|a, b| a.downcast_ref::<i64>() == b.downcast_ref::<i64>()),
-        });
-        assert_eq!(a, NodeId(0));
-        assert_eq!(b, NodeId(1));
-        assert_eq!(c, NodeId(2));
-    }
-}
diff --git a/crates/incr-compute/src/lib.rs b/crates/incr-compute/src/lib.rs
index c7e99ef..6920202 100644
--- a/crates/incr-compute/src/lib.rs
+++ b/crates/incr-compute/src/lib.rs
@@ -1,12 +1,47 @@
-mod collection;
-mod graph;
-mod runtime;
-mod sorted_collection;
-mod types;
-
-pub use collection::{GroupedCollection, IncrCollection};
-pub use runtime::Runtime;
-pub use sorted_collection::{SortDelta, SortedCollection};
-pub use types::{
-    Incr, NodeId, NodeInfo, NodeKindInfo, NodeTrace, PropagationTrace, Revision, TraceAction,
+//! `incr-compute`: single-threaded incremental computation engine.
+//!
+//! Since 0.2, this crate is a thin re-export of [`incr_core`] with the
+//! [`Local`] strategy. The `Runtime` type is single-threaded (`!Sync`),
+//! pays no atomic-fence cost on its hot path, and has zero atomic ops in
+//! its uncontended access patterns. Same API surface as the concurrent
+//! sibling [`incr-concurrent`]: switching is a one-line dependency swap.
+//!
+//! ## API status
+//!
+//! - Function DAG: `Runtime`, `Incr<T>`, `create_input`, `create_query`,
+//!   `get`, `set`, `node_count`, `graph_snapshot`, `get_traced`. All
+//!   functional. `get_traced` returns timing data but not per-node
+//!   trace events; full tracing lands alongside the dashboard demo.
+//! - Operators: `filter`, `map`, `count`, `reduce`, `sort_by_key`,
+//!   `pairwise`, `window`, `group_by`, `join`. All functional under
+//!   `Local`.
+//! - Soundness: `set()` on a query node panics with a clear message
+//!   (was undefined behavior in v0.1).
+//!
+//! Migration from 0.1: a single import. Closure bounds tightened to
+//! `Fn + Send + Sync + 'static` for uniformity with [`incr-concurrent`];
+//! most user types already meet these bounds.
+
+#![doc(html_no_source)]
+
+use incr_core::Local;
+
+pub use incr_core::{
+    Delta, GroupedCollection as GroupedCollectionInner, Incr,
+    IncrCollection as IncrCollectionInner, NodeId, NodeInfo, NodeKindInfo, NodeState, NodeTrace,
+    PropagationTrace, RuntimeId, SortDelta, SortedCollection as SortedCollectionInner, TraceAction,
+    Value,
 };
+
+/// Single-threaded runtime: `Runtime<Local>`. Not `Send`/`Sync`. Use the
+/// `incr-concurrent` crate for the multi-threaded equivalent.
+pub type Runtime = incr_core::Runtime<Local>;
+
+/// Single-threaded incremental collection: `IncrCollection<T, Local>`.
+pub type IncrCollection<T> = IncrCollectionInner<T, Local>;
+
+/// Single-threaded grouped collection: `GroupedCollection<K, T, Local>`.
+pub type GroupedCollection<K, T> = GroupedCollectionInner<K, T, Local>;
+
+/// Single-threaded sorted collection: `SortedCollection<T, K, Local>`.
+pub type SortedCollection<T, K> = SortedCollectionInner<T, K, Local>;
diff --git a/crates/incr-compute/src/runtime.rs b/crates/incr-compute/src/runtime.rs
deleted file mode 100644
index 39b9996..0000000
--- a/crates/incr-compute/src/runtime.rs
+++ /dev/null
@@ -1,815 +0,0 @@
-use crate::collection::{CollectionLog, IncrCollection};
-use crate::graph::{ComputeEntry, NodeKind, NodeState};
-use crate::types::{
-    Incr, NodeId, NodeInfo, NodeKindInfo, NodeTrace, PropagationTrace, Revision, TraceAction,
-};
-use std::any::Any;
-use std::cell::{Cell, RefCell};
-use std::hash::Hash;
-use std::marker::PhantomData;
-use std::rc::Rc;
-
-/// The incremental computation runtime.
-///
-/// Creation methods (`create_input`, `create_query`) take `&self`.
-/// Access methods (`get`, `set`) take `&self` using interior mutability.
-pub struct Runtime {
-    /// Node data (values, state, edges). Interior mutability for access during compute.
-    nodes: RefCell<Vec<crate::graph::NodeData>>,
-    /// Node kinds (Input or Compute). RefCell for &self creation methods.
-    kinds: RefCell<Vec<NodeKind>>,
-    /// Compute functions. RefCell for &self creation methods. Stored separately from nodes
-    /// to avoid borrow conflicts: we read a function while mutating node data.
-    funcs: RefCell<Vec<ComputeEntry>>,
-    /// Global revision counter. Incremented on every input mutation.
-    revision: Cell<Revision>,
-    /// Stack of dependency recordings. Each frame records which nodes are read
-    /// during a compute function's execution. Stack handles nested compute calls.
-    dep_stack: RefCell<Vec<Vec<NodeId>>>,
-    /// Set of nodes currently being computed. Used for cycle detection.
-    computing: RefCell<Vec<NodeId>>,
-    /// Optional display labels for nodes (for introspection/debugging).
-    labels: RefCell<Vec<Option<String>>>,
-    /// When true, compute_node records trace events into trace_log.
-    tracing_enabled: Cell<bool>,
-    /// Trace events collected during the current get_traced() call.
-    trace_log: RefCell<Vec<NodeTrace>>,
-}
-
-impl Default for Runtime {
-    fn default() -> Self {
-        Self::new()
-    }
-}
-
-impl Runtime {
-    pub fn new() -> Self {
-        Runtime {
-            nodes: RefCell::new(Vec::new()),
-            kinds: RefCell::new(Vec::new()),
-            funcs: RefCell::new(Vec::new()),
-            revision: Cell::new(Revision(1)), // Start at 1; default 0 means "never verified"
-            dep_stack: RefCell::new(Vec::new()),
-            computing: RefCell::new(Vec::new()),
-            labels: RefCell::new(Vec::new()),
-            tracing_enabled: Cell::new(false),
-            trace_log: RefCell::new(Vec::new()),
-        }
-    }
-
-    /// Create an input node with an initial value.
-    pub fn create_input<T>(&self, value: T) -> Incr<T>
-    where
-        T: Any + Clone + PartialEq + 'static,
-    {
-        assert!(
-            self.dep_stack.borrow().is_empty(),
-            "cannot create nodes during computation"
-        );
-        let revision = self.revision.get();
-        let id = {
-            let mut nodes = self.nodes.borrow_mut();
-            let id = NodeId(nodes.len() as u32);
-            nodes.push(crate::graph::NodeData {
-                state: NodeState::Clean,
-                value: Some(Box::new(value)),
-                verified_at: revision,
-                changed_at: revision,
-                dependents: Vec::new(),
-                dependencies: Vec::new(),
-            });
-            id
-        };
-        self.kinds.borrow_mut().push(NodeKind::Input);
-        self.labels.borrow_mut().push(None);
-        Incr {
-            id,
-            _phantom: PhantomData,
-        }
-    }
-
-    /// Create a compute node defined by a pure function.
-    /// The function receives `&Runtime` and calls `rt.get()` to read dependencies.
-    /// Dependencies are automatically tracked — no manual wiring needed.
-    pub fn create_query<T, F>(&self, f: F) -> Incr<T>
-    where
-        T: Any + Clone + PartialEq + 'static,
-        F: Fn(&Runtime) -> T + 'static,
-    {
-        assert!(
-            self.dep_stack.borrow().is_empty(),
-            "cannot create nodes during computation"
-        );
-        let func = Box::new(move |rt: &Runtime| -> Box<dyn Any> { Box::new(f(rt)) });
-        let eq_fn = Box::new(|a: &dyn Any, b: &dyn Any| -> bool {
-            a.downcast_ref::<T>().unwrap() == b.downcast_ref::<T>().unwrap()
-        });
-        let entry = ComputeEntry { func, eq_fn };
-
-        let func_idx = self.funcs.borrow().len();
-        self.funcs.borrow_mut().push(entry);
-        let id = {
-            let mut nodes = self.nodes.borrow_mut();
-            let id = NodeId(nodes.len() as u32);
-            nodes.push(crate::graph::NodeData {
-                state: NodeState::New,
-                value: None,
-                verified_at: Revision::default(),
-                changed_at: Revision::default(),
-                dependents: Vec::new(),
-                dependencies: Vec::new(),
-            });
-            id
-        };
-        self.kinds.borrow_mut().push(NodeKind::Compute(func_idx));
-        self.labels.borrow_mut().push(None);
-
-        Incr {
-            id,
-            _phantom: PhantomData,
-        }
-    }
-
-    /// Read the current value of a node. If the node is dirty or new,
-    /// triggers recomputation of the minimum necessary subgraph.
-    pub fn get<T>(&self, node: Incr<T>) -> T
-    where
-        T: Any + Clone + 'static,
-    {
-        // Record dependency if we're inside a compute function
-        {
-            let mut stack = self.dep_stack.borrow_mut();
-            if let Some(frame) = stack.last_mut() {
-                frame.push(node.id);
-            }
-        }
-
-        // Ensure the node is up-to-date
-        self.ensure_clean(node.id);
-
-        // Read and clone the value
-        let nodes = self.nodes.borrow();
-        let node_data = &nodes[node.id.0 as usize];
-        node_data
-            .value
-            .as_ref()
-            .expect("node has no value after ensure_clean")
-            .downcast_ref::<T>()
-            .expect("type mismatch in get()")
-            .clone()
-    }
-
-    /// Set a new value for an input node. If the value differs from the current one,
-    /// increments the global revision and marks all transitive dependents as dirty.
-    pub fn set<T>(&self, node: Incr<T>, value: T)
-    where
-        T: Any + Clone + PartialEq + 'static,
-    {
-        {
-            let nodes = self.nodes.borrow();
-            let node_data = &nodes[node.id.0 as usize];
-            if let Some(old) = &node_data.value {
-                if let Some(old_val) = old.downcast_ref::<T>() {
-                    if *old_val == value {
-                        return; // Same value — no-op
-                    }
-                }
-            }
-        }
-
-        let mut rev = self.revision.get();
-        rev.increment();
-        self.revision.set(rev);
-
-        // Update the input's value and timestamps
-        let dependents = {
-            let mut nodes = self.nodes.borrow_mut();
-            let node_data = &mut nodes[node.id.0 as usize];
-            node_data.value = Some(Box::new(value));
-            node_data.changed_at = rev;
-            node_data.verified_at = rev;
-            node_data.dependents.clone()
-        };
-
-        // Mark all transitive dependents as dirty
-        self.mark_dirty_transitive(&dependents);
-    }
-
-    pub fn create_collection<T>(&self) -> IncrCollection<T>
-    where
-        T: Any + Clone + Hash + Eq + 'static,
-    {
-        assert!(
-            self.dep_stack.borrow().is_empty(),
-            "cannot create nodes during computation"
-        );
-        let log = Rc::new(RefCell::new(CollectionLog::new()));
-        let version_node = self.create_input(0_u64);
-        IncrCollection { log, version_node }
-    }
-
-    /// Like `create_collection` but skips the dep_stack assertion. Used internally
-    /// by operators (e.g. group_by) that lazily create sub-collections during
-    /// compute closures. The caller is responsible for not using the resulting
-    /// version_node as a tracked dependency of the current computation.
-    pub(crate) fn create_collection_in_compute<T>(&self) -> IncrCollection<T>
-    where
-        T: Any + Clone + Hash + Eq + 'static,
-    {
-        let log = Rc::new(RefCell::new(CollectionLog::new()));
-        let version_node = self.create_input_in_compute(0_u64);
-        IncrCollection { log, version_node }
-    }
-
-    /// Like `create_input` but skips the dep_stack assertion. Used by operators
-    /// that need to create input nodes during compute closures.
-    pub(crate) fn create_input_in_compute<T>(&self, value: T) -> Incr<T>
-    where
-        T: Any + Clone + PartialEq + 'static,
-    {
-        let revision = self.revision.get();
-        let id = {
-            let mut nodes = self.nodes.borrow_mut();
-            let id = NodeId(nodes.len() as u32);
-            nodes.push(crate::graph::NodeData {
-                state: NodeState::Clean,
-                value: Some(Box::new(value)),
-                verified_at: revision,
-                changed_at: revision,
-                dependents: Vec::new(),
-                dependencies: Vec::new(),
-            });
-            id
-        };
-        self.kinds.borrow_mut().push(NodeKind::Input);
-        self.labels.borrow_mut().push(None);
-        Incr {
-            id,
-            _phantom: PhantomData,
-        }
-    }
-
-    /// Assign a human-readable label to a node for visualization/debugging.
-    pub fn set_label(&self, id: NodeId, label: String) {
-        self.labels.borrow_mut()[id.0 as usize] = Some(label);
-    }
-
-    /// Enable or disable execution tracing. When enabled, compute_node records
-    /// which nodes were visited, recomputed, or cut off during get() calls.
-    pub fn set_tracing(&self, enabled: bool) {
-        self.tracing_enabled.set(enabled);
-    }
-
-    /// Like get(), but also returns a trace of which nodes were processed.
-    /// Clears the trace log before running, so the trace reflects only this call.
-    pub fn get_traced<T>(&self, node: Incr<T>) -> (T, PropagationTrace)
-    where
-        T: Any + Clone + 'static,
-    {
-        let was_enabled = self.tracing_enabled.get();
-        self.tracing_enabled.set(true);
-        self.trace_log.borrow_mut().clear();
-
-        let start = std::time::Instant::now();
-        let value = self.get(node);
-        let elapsed_ns = start.elapsed().as_nanos() as u64;
-
-        self.tracing_enabled.set(was_enabled);
-
-        let log = self.trace_log.borrow();
-        let total_nodes = self.nodes.borrow().len();
-        let nodes_recomputed = log
-            .iter()
-            .filter(|t| matches!(t.action, TraceAction::Recomputed { .. }))
-            .count();
-        let nodes_cutoff = log
-            .iter()
-            .filter(|t| {
-                matches!(
-                    t.action,
-                    TraceAction::Recomputed {
-                        value_changed: false
-                    }
-                )
-            })
-            .count();
-
-        let trace = PropagationTrace {
-            target: node.id,
-            node_traces: log.clone(),
-            total_nodes,
-            nodes_recomputed,
-            nodes_cutoff,
-            elapsed_ns,
-        };
-
-        (value, trace)
-    }
-
-    /// Return structural info about every node in the graph.
-    pub fn graph_snapshot(&self) -> Vec<NodeInfo> {
-        let nodes = self.nodes.borrow();
-        let kinds = self.kinds.borrow();
-        let labels = self.labels.borrow();
-
-        (0..nodes.len())
-            .map(|i| {
-                let id = NodeId(i as u32);
-                NodeInfo {
-                    id,
-                    kind: match &kinds[i] {
-                        NodeKind::Input => NodeKindInfo::Input,
-                        NodeKind::Compute(_) => NodeKindInfo::Compute,
-                    },
-                    label: labels[i].clone(),
-                    dependencies: nodes[i].dependencies.clone(),
-                    dependents: nodes[i].dependents.clone(),
-                }
-            })
-            .collect()
-    }
-
-    /// Return the number of nodes in the graph.
-    pub fn node_count(&self) -> usize {
-        self.nodes.borrow().len()
-    }
-
-    /// Walk forward from the given nodes, marking all reachable compute nodes as Dirty.
-    fn mark_dirty_transitive(&self, start: &[NodeId]) {
-        let mut queue: std::collections::VecDeque<NodeId> = start.iter().copied().collect();
-        let mut nodes = self.nodes.borrow_mut();
-        while let Some(id) = queue.pop_front() {
-            let node = &mut nodes[id.0 as usize];
-            if node.state == NodeState::Clean || node.state == NodeState::New {
-                if node.state == NodeState::Clean {
-                    node.state = NodeState::Dirty;
-                }
-                for i in 0..node.dependents.len() {
-                    queue.push_back(node.dependents[i]);
-                }
-            }
-        }
-    }
-
-    /// Ensure a node's value is up-to-date. For inputs, this is always true.
-    /// For compute nodes, iteratively ensures dependencies are clean in
-    /// post-order (dependencies before dependents), then recomputes if necessary.
-    fn ensure_clean(&self, id: NodeId) {
-        // Fast path: already clean
-        if self.nodes.borrow()[id.0 as usize].state == NodeState::Clean {
-            return;
-        }
-
-        // Collect the post-order traversal of nodes that need processing.
-        // Each stack entry is (node_id, visited) where visited=false means
-        // "push deps first", visited=true means "now process this node".
-        let mut work_stack: Vec<(NodeId, bool)> = vec![(id, false)];
-
-        while let Some((cur, visited)) = work_stack.pop() {
-            if visited {
-                // Second visit: all deps should now be clean; process this node
-                self.compute_node(cur);
-                continue;
-            }
-
-            // Single borrow to check state and gather dirty deps
-            let nodes = self.nodes.borrow();
-            let state = nodes[cur.0 as usize].state;
-
-            // Inputs and already-clean nodes need no work
-            if state == NodeState::Clean {
-                continue;
-            }
-            if matches!(self.kinds.borrow()[cur.0 as usize], NodeKind::Input) {
-                continue;
-            }
-
-            // First visit: push self again (to process after deps), then push dirty deps
-            work_stack.push((cur, true));
-            let deps = &nodes[cur.0 as usize].dependencies;
-            for &dep_id in deps {
-                if nodes[dep_id.0 as usize].state != NodeState::Clean {
-                    work_stack.push((dep_id, false));
-                }
-            }
-        }
-    }
-
-    /// Compute (or verify) a single node, assuming all its known dependencies are already clean.
-    fn compute_node(&self, id: NodeId) {
-        // Single borrow to gather state, kind, cycle check, and needs_recompute
-        let (func_idx, needs_recompute) = {
-            let nodes = self.nodes.borrow();
-            let node = &nodes[id.0 as usize];
-
-            // Re-check state (may have been cleaned by an earlier iteration)
-            if node.state == NodeState::Clean {
-                return;
-            }
-
-            let func_idx = match &self.kinds.borrow()[id.0 as usize] {
-                NodeKind::Input => return,
-                NodeKind::Compute(idx) => *idx,
-            };
-
-            // Cycle detection
-            {
-                let computing = self.computing.borrow();
-                if computing.contains(&id) {
-                    panic!("Cycle detected: node {:?} is already being computed", id);
-                }
-            }
-
-            let needs_recompute = match node.state {
-                NodeState::New => true,
-                NodeState::Dirty => {
-                    // Recompute only if a dependency actually changed since last verification
-                    node.dependencies
-                        .iter()
-                        .any(|dep_id| nodes[dep_id.0 as usize].changed_at > node.verified_at)
-                }
-                NodeState::Clean => false,
-            };
-
-            (func_idx, needs_recompute)
-        };
-
-        if !needs_recompute {
-            // Dependencies haven't changed — skip recomputation
-            let mut nodes = self.nodes.borrow_mut();
-            let node = &mut nodes[id.0 as usize];
-            node.state = NodeState::Clean;
-            node.verified_at = self.revision.get();
-            if self.tracing_enabled.get() {
-                self.trace_log.borrow_mut().push(NodeTrace {
-                    id,
-                    action: TraceAction::VerifiedClean,
-                });
-            }
-            return;
-        }
-
-        // Step 2: Execute the compute function
-        self.computing.borrow_mut().push(id);
-        self.dep_stack.borrow_mut().push(Vec::with_capacity(4));
-
-        let new_value = {
-            let funcs = self.funcs.borrow();
-            (funcs[func_idx].func)(self)
-        };
-
-        let new_deps = self.dep_stack.borrow_mut().pop().unwrap();
-        // LIFO pop instead of O(n) retain — computing is always used as a stack
-        self.computing.borrow_mut().pop();
-
-        // Step 3: Check equality BEFORE borrowing nodes mutably
-        // This avoids holding nodes borrow_mut and funcs borrow simultaneously
-        let value_changed = {
-            let nodes = self.nodes.borrow();
-            let node = &nodes[id.0 as usize];
-            match &node.value {
-                Some(old_value) => {
-                    let funcs = self.funcs.borrow();
-                    !(funcs[func_idx].eq_fn)(old_value.as_ref(), new_value.as_ref())
-                }
-                None => true, // First computation
-            }
-        };
-
-        if self.tracing_enabled.get() {
-            self.trace_log.borrow_mut().push(NodeTrace {
-                id,
-                action: TraceAction::Recomputed { value_changed },
-            });
-        }
-
-        // Step 4: Update node state and dependency edges in a single mutable borrow
-        let mut nodes = self.nodes.borrow_mut();
-        let revision = self.revision.get();
-
-        {
-            let node = &mut nodes[id.0 as usize];
-            if value_changed {
-                node.value = Some(new_value);
-                node.changed_at = revision;
-            }
-            node.verified_at = revision;
-            node.state = NodeState::Clean;
-        }
-
-        // Update dependency edges — move new_deps in, take old_deps out
-        let old_deps = std::mem::replace(&mut nodes[id.0 as usize].dependencies, new_deps);
-
-        // Diff edges using the stored new_deps (now at nodes[id].dependencies)
-        // Remove self from dependents of old deps no longer needed
-        for old_dep in &old_deps {
-            if !nodes[id.0 as usize].dependencies.contains(old_dep) {
-                nodes[old_dep.0 as usize].dependents.retain(|d| *d != id);
-            }
-        }
-        // Add self to dependents of new deps not previously present
-        // Must collect indices first since we need to read nodes[id] then mutate others
-        let new_dep_ids: Vec<NodeId> = nodes[id.0 as usize]
-            .dependencies
-            .iter()
-            .filter(|new_dep| !old_deps.contains(new_dep))
-            .copied()
-            .collect();
-        for dep in new_dep_ids {
-            nodes[dep.0 as usize].dependents.push(id);
-        }
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use std::cell::Cell;
-    use std::rc::Rc;
-
-    #[test]
-    fn create_and_get_input() {
-        let rt = Runtime::new();
-        let x = rt.create_input(42_i64);
-        assert_eq!(rt.get(x), 42);
-    }
-
-    #[test]
-    fn set_input_and_get_new_value() {
-        let rt = Runtime::new();
-        let x = rt.create_input(10_i64);
-        assert_eq!(rt.get(x), 10);
-        rt.set(x, 20);
-        assert_eq!(rt.get(x), 20);
-    }
-
-    #[test]
-    fn set_same_value_is_noop() {
-        let rt = Runtime::new();
-        let x = rt.create_input(5_i64);
-        let rev_before = rt.revision.get();
-        rt.set(x, 5);
-        let rev_after = rt.revision.get();
-        assert_eq!(rev_before, rev_after);
-    }
-
-    #[test]
-    fn multiple_inputs() {
-        let rt = Runtime::new();
-        let a = rt.create_input(1_i64);
-        let b = rt.create_input(2_i64);
-        let c = rt.create_input(3_i64);
-        assert_eq!(rt.get(a), 1);
-        assert_eq!(rt.get(b), 2);
-        assert_eq!(rt.get(c), 3);
-    }
-
-    #[test]
-    fn simple_compute_node() {
-        let rt = Runtime::new();
-        let a = rt.create_input(10_i64);
-        let b = rt.create_query(move |rt| rt.get(a) * 2);
-        assert_eq!(rt.get(b), 20);
-    }
-
-    #[test]
-    fn compute_reads_multiple_inputs() {
-        let rt = Runtime::new();
-        let x = rt.create_input(3_i64);
-        let y = rt.create_input(4_i64);
-        let sum = rt.create_query(move |rt| rt.get(x) + rt.get(y));
-        assert_eq!(rt.get(sum), 7);
-    }
-
-    #[test]
-    fn chained_compute_nodes() {
-        let rt = Runtime::new();
-        let a = rt.create_input(5_i64);
-        let b = rt.create_query(move |rt| rt.get(a) + 1);
-        let c = rt.create_query(move |rt| rt.get(b) * 2);
-        assert_eq!(rt.get(c), 12); // (5 + 1) * 2
-    }
-
-    #[test]
-    fn diamond_dependency_first_computation() {
-        let rt = Runtime::new();
-        let a = rt.create_input(1_i64);
-        let b = rt.create_query(move |rt| rt.get(a) + 10);
-        let c = rt.create_query(move |rt| rt.get(a) + 100);
-        let d = rt.create_query(move |rt| rt.get(b) + rt.get(c));
-        assert_eq!(rt.get(d), 112); // (1+10) + (1+100)
-    }
-
-    #[test]
-    fn input_change_triggers_recomputation() {
-        let rt = Runtime::new();
-        let a = rt.create_input(10_i64);
-        let b = rt.create_query(move |rt| rt.get(a) * 2);
-        assert_eq!(rt.get(b), 20);
-
-        rt.set(a, 15);
-        assert_eq!(rt.get(b), 30);
-    }
-
-    #[test]
-    fn chain_recomputation() {
-        let rt = Runtime::new();
-        let a = rt.create_input(1_i64);
-        let b = rt.create_query(move |rt| rt.get(a) + 10);
-        let c = rt.create_query(move |rt| rt.get(b) * 2);
-        assert_eq!(rt.get(c), 22); // (1+10)*2
-
-        rt.set(a, 5);
-        assert_eq!(rt.get(c), 30); // (5+10)*2
-    }
-
-    #[test]
-    fn diamond_recomputation() {
-        let rt = Runtime::new();
-        let a = rt.create_input(1_i64);
-        let b = rt.create_query(move |rt| rt.get(a) + 10);
-        let c = rt.create_query(move |rt| rt.get(a) + 100);
-        let d = rt.create_query(move |rt| rt.get(b) + rt.get(c));
-
-        assert_eq!(rt.get(d), 112); // (1+10) + (1+100)
-        rt.set(a, 2);
-        assert_eq!(rt.get(d), 114); // (2+10) + (2+100)
-    }
-
-    #[test]
-    fn only_affected_nodes_recompute() {
-        let rt = Runtime::new();
-        let a = rt.create_input(1_i64);
-        let b = rt.create_input(2_i64);
-
-        let count_a = Rc::new(Cell::new(0_u32));
-        let count_b = Rc::new(Cell::new(0_u32));
-
-        let ca = count_a.clone();
-        let derived_a = rt.create_query(move |rt| {
-            ca.set(ca.get() + 1);
-            rt.get(a) * 10
-        });
-
-        let cb = count_b.clone();
-        let derived_b = rt.create_query(move |rt| {
-            cb.set(cb.get() + 1);
-            rt.get(b) * 10
-        });
-
-        // Initial computation
-        assert_eq!(rt.get(derived_a), 10);
-        assert_eq!(rt.get(derived_b), 20);
-        assert_eq!(count_a.get(), 1);
-        assert_eq!(count_b.get(), 1);
-
-        // Change only input a — derived_b should NOT recompute
-        rt.set(a, 5);
-        assert_eq!(rt.get(derived_a), 50);
-        assert_eq!(rt.get(derived_b), 20);
-        assert_eq!(count_a.get(), 2); // recomputed
-        assert_eq!(count_b.get(), 1); // NOT recomputed
-    }
-
-    #[test]
-    fn multiple_mutations_before_get() {
-        let rt = Runtime::new();
-        let a = rt.create_input(1_i64);
-        let compute_count = Rc::new(Cell::new(0_u32));
-        let cc = compute_count.clone();
-        let b = rt.create_query(move |rt| {
-            cc.set(cc.get() + 1);
-            rt.get(a) + 100
-        });
-
-        assert_eq!(rt.get(b), 101);
-        assert_eq!(compute_count.get(), 1);
-
-        // Multiple sets before reading — only one recomputation on get
-        rt.set(a, 2);
-        rt.set(a, 3);
-        rt.set(a, 4);
-        assert_eq!(rt.get(b), 104);
-        assert_eq!(compute_count.get(), 2); // Only one recomputation, not three
-    }
-
-    #[test]
-    fn early_cutoff_stops_propagation() {
-        let rt = Runtime::new();
-        let a = rt.create_input(50_i64);
-
-        let b_count = Rc::new(Cell::new(0_u32));
-        let bc = b_count.clone();
-        let b = rt.create_query(move |rt| {
-            bc.set(bc.get() + 1);
-            rt.get(a).min(100) // Clamp to max 100
-        });
-
-        let c_count = Rc::new(Cell::new(0_u32));
-        let cc = c_count.clone();
-        let c = rt.create_query(move |rt| {
-            cc.set(cc.get() + 1);
-            rt.get(b) + 1
-        });
-
-        // Initial
-        assert_eq!(rt.get(c), 51); // min(50, 100) + 1
-        assert_eq!(b_count.get(), 1);
-        assert_eq!(c_count.get(), 1);
-
-        // Change A to 60 — B changes (60 != 50), C recomputes
-        rt.set(a, 60);
-        assert_eq!(rt.get(c), 61);
-        assert_eq!(b_count.get(), 2);
-        assert_eq!(c_count.get(), 2);
-
-        // Change A to 200 — B produces 100
-        rt.set(a, 200);
-        assert_eq!(rt.get(c), 101); // 100 + 1
-        assert_eq!(b_count.get(), 3);
-        assert_eq!(c_count.get(), 3);
-
-        // Change A to 300 — B still 100 (clamped), SAME as before! Early cutoff!
-        rt.set(a, 300);
-        assert_eq!(rt.get(c), 101); // Still 100 + 1
-        assert_eq!(b_count.get(), 4); // B recomputed (has to check)
-        assert_eq!(c_count.get(), 3); // C did NOT recompute — early cutoff!
-    }
-
-    #[test]
-    fn verification_skip_without_recomputation() {
-        let rt = Runtime::new();
-        let a = rt.create_input(5_i64);
-        let unrelated = rt.create_input(100_i64);
-
-        let b = rt.create_query(move |rt| rt.get(a).min(10)); // Clamped
-
-        let d_count = Rc::new(Cell::new(0_u32));
-        let dc = d_count.clone();
-        let d = rt.create_query(move |rt| {
-            dc.set(dc.get() + 1);
-            rt.get(unrelated) + rt.get(b)
-        });
-
-        assert_eq!(rt.get(d), 105); // 100 + 5
-        assert_eq!(d_count.get(), 1);
-
-        // Change A from 5 to 8 — B changes from 5 to 8
-        rt.set(a, 8);
-        assert_eq!(rt.get(d), 108);
-        assert_eq!(d_count.get(), 2);
-
-        // Change A from 8 to 15 — B clamped to 10
-        rt.set(a, 15);
-        assert_eq!(rt.get(d), 110);
-        assert_eq!(d_count.get(), 3);
-
-        // Change A from 15 to 20 — B still clamped to 10, SAME value
-        rt.set(a, 20);
-        assert_eq!(rt.get(d), 110);
-        assert_eq!(d_count.get(), 3); // D did not recompute
-    }
-
-    #[test]
-    fn dynamic_dependency_switch() {
-        let rt = Runtime::new();
-        let flag = rt.create_input(true);
-        let a = rt.create_input(10_i64);
-        let b = rt.create_input(20_i64);
-
-        let a_count = Rc::new(Cell::new(0_u32));
-        let b_count = Rc::new(Cell::new(0_u32));
-        let ac = a_count.clone();
-        let bc = b_count.clone();
-
-        let result = rt.create_query(move |rt| {
-            if rt.get(flag) {
-                ac.set(ac.get() + 1);
-                rt.get(a)
-            } else {
-                bc.set(bc.get() + 1);
-                rt.get(b)
-            }
-        });
-
-        // Flag is true — reads A
-        assert_eq!(rt.get(result), 10);
-
-        // Switch flag to false — now reads B
-        rt.set(flag, false);
-        assert_eq!(rt.get(result), 20);
-
-        // Change A — result should NOT recompute (no longer depends on A)
-        rt.set(a, 99);
-        assert_eq!(rt.get(result), 20);
-    }
-
-    #[test]
-    fn cycle_detection_no_false_positives() {
-        let rt = Runtime::new();
-        let a = rt.create_input(1_i64);
-        let b = rt.create_query(move |rt| rt.get(a) + 1);
-        let c = rt.create_query(move |rt| rt.get(b) + 1);
-        assert_eq!(rt.get(c), 3); // No cycle panic
-    }
-}
diff --git a/crates/incr-compute/src/sorted_collection.rs b/crates/incr-compute/src/sorted_collection.rs
deleted file mode 100644
index eb9a5fe..0000000
--- a/crates/incr-compute/src/sorted_collection.rs
+++ /dev/null
@@ -1,460 +0,0 @@
-use std::any::Any;
-use std::cell::{Cell, RefCell};
-use std::hash::Hash;
-use std::rc::Rc;
-
-use crate::collection::{CollectionLog, IncrCollection};
-use crate::runtime::Runtime;
-use crate::types::Incr;
-
-#[derive(Clone, Debug)]
-pub enum SortDelta<T> {
-    Inserted { index: usize, value: T },
-    Removed { index: usize, value: T },
-}
-
-pub struct SortedCollection<T: Clone + 'static> {
-    pub(crate) ordered_values: Rc<RefCell<Vec<T>>>,
-    pub(crate) pending_deltas: Rc<RefCell<Vec<SortDelta<T>>>>,
-    pub(crate) version_node: Incr<u64>,
-}
-
-impl<T: Clone + 'static> SortedCollection<T> {
-    /// Get a snapshot of the current sorted order.
-    pub fn entries(&self) -> Vec<T> {
-        self.ordered_values.borrow().clone()
-    }
-
-    pub fn version_node_id(&self) -> crate::types::NodeId {
-        self.version_node.node_id()
-    }
-
-    pub fn version_node(&self) -> Incr<u64> {
-        self.version_node
-    }
-}
-
-impl<T: Any + Clone + Hash + Eq + 'static> SortedCollection<T> {
-    pub fn window(&self, rt: &Runtime, size: usize) -> IncrCollection<Vec<T>>
-    where
-        T: Eq + Hash,
-    {
-        let ordered_values = self.ordered_values.clone();
-        let sorted_ver = self.version_node;
-        let output_log = Rc::new(RefCell::new(CollectionLog::<Vec<T>>::new()));
-        let output_log_ref = output_log.clone();
-        let prev_windows: Rc<RefCell<Vec<Vec<T>>>> = Rc::new(RefCell::new(Vec::new()));
-        let prev_ref = prev_windows.clone();
-
-        let version_node = rt.create_query(move |rt| -> u64 {
-            let _sv = rt.get(sorted_ver);
-
-            let vals = ordered_values.borrow();
-            let mut output = output_log_ref.borrow_mut();
-            let mut prev = prev_ref.borrow_mut();
-
-            for w in prev.drain(..) {
-                output.delete(&w);
-            }
-
-            if vals.len() >= size {
-                for i in 0..=(vals.len() - size) {
-                    let w: Vec<T> = vals[i..i + size].to_vec();
-                    output.insert(w.clone());
-                    prev.push(w);
-                }
-            }
-
-            output.version
-        });
-
-        IncrCollection {
-            log: output_log,
-            version_node,
-        }
-    }
-
-    pub fn pairwise(&self, rt: &Runtime) -> IncrCollection<(T, T)> {
-        let sorted_deltas = self.pending_deltas.clone();
-        let sorted_ver = self.version_node;
-        let last_delta_idx = Rc::new(Cell::new(0_usize));
-
-        // Shadow of the sorted values, maintained in lockstep by replaying SortDeltas
-        let shadow: Rc<RefCell<Vec<T>>> = Rc::new(RefCell::new(Vec::new()));
-        let shadow_ref = shadow.clone();
-
-        let output_log = Rc::new(RefCell::new(CollectionLog::new()));
-        let output_log_ref = output_log.clone();
-
-        let version_node = rt.create_query(move |rt| -> u64 {
-            let _sorted_v = rt.get(sorted_ver);
-
-            let deltas = sorted_deltas.borrow();
-            let start = last_delta_idx.get();
-            if start >= deltas.len() {
-                return output_log_ref.borrow().version;
-            }
-
-            let mut shadow = shadow_ref.borrow_mut();
-            let mut output = output_log_ref.borrow_mut();
-
-            for delta in &deltas[start..] {
-                match delta {
-                    SortDelta::Inserted { index, value } => {
-                        let i = *index;
-                        let n_before = shadow.len();
-
-                        if n_before == 0 {
-                            // First element, no pairs
-                        } else if i == 0 {
-                            // Inserting at front: new pair (new, old_first)
-                            output.insert((value.clone(), shadow[0].clone()));
-                        } else if i == n_before {
-                            // Inserting at end: new pair (old_last, new)
-                            output.insert((shadow[n_before - 1].clone(), value.clone()));
-                        } else {
-                            // Inserting in middle: remove old pair, add two new
-                            let left = shadow[i - 1].clone();
-                            let right = shadow[i].clone();
-                            output.delete(&(left.clone(), right.clone()));
-                            output.insert((left, value.clone()));
-                            output.insert((value.clone(), right));
-                        }
-
-                        shadow.insert(i, value.clone());
-                    }
-                    SortDelta::Removed { index, value } => {
-                        let i = *index;
-                        shadow.remove(i);
-                        let n_after = shadow.len();
-
-                        if n_after == 0 {
-                            // Was the only element; no pairs existed, nothing to remove
-                        } else if i == 0 {
-                            output.delete(&(value.clone(), shadow[0].clone()));
-                        } else if i == n_after {
-                            output.delete(&(shadow[n_after - 1].clone(), value.clone()));
-                        } else {
-                            let left = shadow[i - 1].clone();
-                            let right = shadow[i].clone();
-                            output.delete(&(left.clone(), value.clone()));
-                            output.delete(&(value.clone(), right.clone()));
-                            output.insert((left, right));
-                        }
-                    }
-                }
-            }
-
-            last_delta_idx.set(deltas.len());
-            output.version
-        });
-
-        IncrCollection {
-            log: output_log,
-            version_node,
-        }
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use crate::Runtime;
-
-    #[test]
-    fn sort_basic_ordering() {
-        let rt = Runtime::new();
-        let col = rt.create_collection::<i64>();
-        let sorted = col.sort_by_key(&rt, |x: &i64| *x);
-
-        col.insert(&rt, 30);
-        col.insert(&rt, 10);
-        col.insert(&rt, 20);
-
-        let _ = rt.get(sorted.version_node);
-        assert_eq!(sorted.entries(), vec![10, 20, 30]);
-    }
-
-    #[test]
-    fn sort_insert_maintains_order() {
-        let rt = Runtime::new();
-        let col = rt.create_collection::<i64>();
-        let sorted = col.sort_by_key(&rt, |x: &i64| *x);
-
-        col.insert(&rt, 10);
-        col.insert(&rt, 30);
-        let _ = rt.get(sorted.version_node);
-        assert_eq!(sorted.entries(), vec![10, 30]);
-
-        col.insert(&rt, 20);
-        let _ = rt.get(sorted.version_node);
-        assert_eq!(sorted.entries(), vec![10, 20, 30]);
-    }
-
-    #[test]
-    fn sort_delete_maintains_order() {
-        let rt = Runtime::new();
-        let col = rt.create_collection::<i64>();
-        let sorted = col.sort_by_key(&rt, |x: &i64| *x);
-
-        col.insert(&rt, 10);
-        col.insert(&rt, 20);
-        col.insert(&rt, 30);
-        let _ = rt.get(sorted.version_node);
-        assert_eq!(sorted.entries(), vec![10, 20, 30]);
-
-        col.delete(&rt, &20);
-        let _ = rt.get(sorted.version_node);
-        assert_eq!(sorted.entries(), vec![10, 30]);
-    }
-
-    #[test]
-    fn sort_by_custom_key() {
-        let rt = Runtime::new();
-        let col = rt.create_collection::<(String, i64)>();
-        let sorted = col.sort_by_key(&rt, |x: &(String, i64)| x.1);
-
-        col.insert(&rt, ("bob".to_string(), 30));
-        col.insert(&rt, ("alice".to_string(), 10));
-        col.insert(&rt, ("carol".to_string(), 20));
-
-        let _ = rt.get(sorted.version_node);
-        let names: Vec<String> = sorted.entries().into_iter().map(|e| e.0).collect();
-        // Sorted by key (.1) ascending: alice=10, carol=20, bob=30
-        assert_eq!(names, vec!["alice", "carol", "bob"]);
-    }
-
-    #[test]
-    fn sort_duplicate_keys() {
-        let rt = Runtime::new();
-        let col = rt.create_collection::<(String, i64)>();
-        let sorted = col.sort_by_key(&rt, |x: &(String, i64)| x.1);
-
-        col.insert(&rt, ("a".to_string(), 10));
-        col.insert(&rt, ("b".to_string(), 10));
-        col.insert(&rt, ("c".to_string(), 20));
-
-        let _ = rt.get(sorted.version_node);
-        let entries = sorted.entries();
-        assert_eq!(entries[2].1, 20);
-        assert_eq!(entries[0].1, 10);
-        assert_eq!(entries[1].1, 10);
-    }
-
-    #[test]
-    fn sort_empty_collection() {
-        let rt = Runtime::new();
-        let col = rt.create_collection::<i64>();
-        let sorted = col.sort_by_key(&rt, |x: &i64| *x);
-
-        let _ = rt.get(sorted.version_node);
-        assert_eq!(sorted.entries(), Vec::<i64>::new());
-    }
-
-    #[test]
-    fn pairwise_basic() {
-        let rt = Runtime::new();
-        let col = rt.create_collection::<i64>();
-        let sorted = col.sort_by_key(&rt, |x: &i64| *x);
-        let pairs = sorted.pairwise(&rt);
-
-        col.insert(&rt, 10);
-        col.insert(&rt, 20);
-        col.insert(&rt, 30);
-
-        let _ = rt.get(pairs.version_node);
-        let elems = pairs.elements();
-        assert_eq!(elems.len(), 2);
-        assert!(elems.contains(&(10, 20)));
-        assert!(elems.contains(&(20, 30)));
-    }
-
-    #[test]
-    fn pairwise_single_element() {
-        let rt = Runtime::new();
-        let col = rt.create_collection::<i64>();
-        let sorted = col.sort_by_key(&rt, |x: &i64| *x);
-        let pairs = sorted.pairwise(&rt);
-
-        col.insert(&rt, 10);
-        let _ = rt.get(pairs.version_node);
-        assert_eq!(pairs.elements().len(), 0);
-    }
-
-    #[test]
-    fn pairwise_empty() {
-        let rt = Runtime::new();
-        let col = rt.create_collection::<i64>();
-        let sorted = col.sort_by_key(&rt, |x: &i64| *x);
-        let pairs = sorted.pairwise(&rt);
-
-        let _ = rt.get(pairs.version_node);
-        assert_eq!(pairs.elements().len(), 0);
-    }
-
-    #[test]
-    fn pairwise_insert_middle() {
-        let rt = Runtime::new();
-        let col = rt.create_collection::<i64>();
-        let sorted = col.sort_by_key(&rt, |x: &i64| *x);
-        let pairs = sorted.pairwise(&rt);
-
-        col.insert(&rt, 10);
-        col.insert(&rt, 30);
-        let _ = rt.get(pairs.version_node);
-        assert!(pairs.elements().contains(&(10, 30)));
-
-        col.insert(&rt, 20);
-        let _ = rt.get(pairs.version_node);
-        let elems = pairs.elements();
-        assert_eq!(elems.len(), 2);
-        assert!(elems.contains(&(10, 20)));
-        assert!(elems.contains(&(20, 30)));
-        assert!(!elems.contains(&(10, 30)));
-    }
-
-    #[test]
-    fn pairwise_delete_middle() {
-        let rt = Runtime::new();
-        let col = rt.create_collection::<i64>();
-        let sorted = col.sort_by_key(&rt, |x: &i64| *x);
-        let pairs = sorted.pairwise(&rt);
-
-        col.insert(&rt, 10);
-        col.insert(&rt, 20);
-        col.insert(&rt, 30);
-        let _ = rt.get(pairs.version_node);
-
-        col.delete(&rt, &20);
-        let _ = rt.get(pairs.version_node);
-        let elems = pairs.elements();
-        assert_eq!(elems.len(), 1);
-        assert!(elems.contains(&(10, 30)));
-    }
-
-    #[test]
-    fn pairwise_delete_first() {
-        let rt = Runtime::new();
-        let col = rt.create_collection::<i64>();
-        let sorted = col.sort_by_key(&rt, |x: &i64| *x);
-        let pairs = sorted.pairwise(&rt);
-
-        col.insert(&rt, 10);
-        col.insert(&rt, 20);
-        col.insert(&rt, 30);
-        let _ = rt.get(pairs.version_node);
-
-        col.delete(&rt, &10);
-        let _ = rt.get(pairs.version_node);
-        let elems = pairs.elements();
-        assert_eq!(elems.len(), 1);
-        assert!(elems.contains(&(20, 30)));
-    }
-
-    #[test]
-    fn pairwise_delete_last() {
-        let rt = Runtime::new();
-        let col = rt.create_collection::<i64>();
-        let sorted = col.sort_by_key(&rt, |x: &i64| *x);
-        let pairs = sorted.pairwise(&rt);
-
-        col.insert(&rt, 10);
-        col.insert(&rt, 20);
-        col.insert(&rt, 30);
-        let _ = rt.get(pairs.version_node);
-
-        col.delete(&rt, &30);
-        let _ = rt.get(pairs.version_node);
-        let elems = pairs.elements();
-        assert_eq!(elems.len(), 1);
-        assert!(elems.contains(&(10, 20)));
-    }
-
-    #[test]
-    fn pairwise_delete_to_empty() {
-        let rt = Runtime::new();
-        let col = rt.create_collection::<i64>();
-        let sorted = col.sort_by_key(&rt, |x: &i64| *x);
-        let pairs = sorted.pairwise(&rt);
-
-        col.insert(&rt, 10);
-        col.insert(&rt, 20);
-        let _ = rt.get(pairs.version_node);
-        assert_eq!(pairs.elements().len(), 1);
-
-        col.delete(&rt, &10);
-        col.delete(&rt, &20);
-        let _ = rt.get(pairs.version_node);
-        assert_eq!(pairs.elements().len(), 0);
-    }
-
-    #[test]
-    fn pairwise_insert_at_front() {
-        let rt = Runtime::new();
-        let col = rt.create_collection::<i64>();
-        let sorted = col.sort_by_key(&rt, |x: &i64| *x);
-        let pairs = sorted.pairwise(&rt);
-
-        col.insert(&rt, 20);
-        col.insert(&rt, 30);
-        let _ = rt.get(pairs.version_node);
-        assert!(pairs.elements().contains(&(20, 30)));
-
-        col.insert(&rt, 10);
-        let _ = rt.get(pairs.version_node);
-        let elems = pairs.elements();
-        assert_eq!(elems.len(), 2);
-        assert!(elems.contains(&(10, 20)));
-        assert!(elems.contains(&(20, 30)));
-    }
-
-    #[test]
-    fn pairwise_insert_at_end() {
-        let rt = Runtime::new();
-        let col = rt.create_collection::<i64>();
-        let sorted = col.sort_by_key(&rt, |x: &i64| *x);
-        let pairs = sorted.pairwise(&rt);
-
-        col.insert(&rt, 10);
-        col.insert(&rt, 20);
-        let _ = rt.get(pairs.version_node);
-        assert!(pairs.elements().contains(&(10, 20)));
-
-        col.insert(&rt, 30);
-        let _ = rt.get(pairs.version_node);
-        let elems = pairs.elements();
-        assert_eq!(elems.len(), 2);
-        assert!(elems.contains(&(10, 20)));
-        assert!(elems.contains(&(20, 30)));
-    }
-
-    #[test]
-    fn window_basic() {
-        let rt = Runtime::new();
-        let col = rt.create_collection::<i64>();
-        let sorted = col.sort_by_key(&rt, |x: &i64| *x);
-        let wins = sorted.window(&rt, 3);
-        col.insert(&rt, 10);
-        col.insert(&rt, 20);
-        col.insert(&rt, 30);
-        col.insert(&rt, 40);
-        col.insert(&rt, 50);
-        let _ = rt.get(wins.version_node);
-        let elems = wins.elements();
-        assert_eq!(elems.len(), 3);
-        assert!(elems.contains(&vec![10, 20, 30]));
-        assert!(elems.contains(&vec![20, 30, 40]));
-        assert!(elems.contains(&vec![30, 40, 50]));
-    }
-
-    #[test]
-    fn window_smaller_than_size() {
-        let rt = Runtime::new();
-        let col = rt.create_collection::<i64>();
-        let sorted = col.sort_by_key(&rt, |x: &i64| *x);
-        let wins = sorted.window(&rt, 3);
-        col.insert(&rt, 10);
-        col.insert(&rt, 20);
-        let _ = rt.get(wins.version_node);
-        assert_eq!(wins.elements().len(), 0);
-    }
-}
diff --git a/crates/incr-compute/src/types.rs b/crates/incr-compute/src/types.rs
deleted file mode 100644
index 46225c2..0000000
--- a/crates/incr-compute/src/types.rs
+++ /dev/null
@@ -1,91 +0,0 @@
-use std::marker::PhantomData;
-
-/// Index into the node arena. Cheap to copy and compare.
-#[derive(Copy, Clone, Eq, PartialEq, Hash, Debug)]
-pub struct NodeId(pub(crate) u32);
-
-impl NodeId {
-    pub fn raw(self) -> u32 {
-        self.0
-    }
-
-    pub fn from_raw(id: u32) -> Self {
-        NodeId(id)
-    }
-}
-
-/// Monotonically increasing counter. Incremented on every input mutation.
-#[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Debug, Default)]
-pub struct Revision(pub(crate) u64);
-
-impl Revision {
-    pub(crate) fn increment(&mut self) {
-        self.0 += 1;
-    }
-}
-
-/// Typed handle to a node in the incremental graph. `T` is the value type.
-/// Cheap to copy — it's just a u32 index + phantom type.
-#[derive(Debug)]
-pub struct Incr<T> {
-    pub(crate) id: NodeId,
-    pub(crate) _phantom: PhantomData<T>,
-}
-
-impl<T> Incr<T> {
-    pub fn node_id(self) -> NodeId {
-        self.id
-    }
-}
-
-// Manual impls because derive would add T: Copy/Clone bounds
-impl<T> Copy for Incr<T> {}
-impl<T> Clone for Incr<T> {
-    fn clone(&self) -> Self {
-        *self
-    }
-}
-
-/// Whether a node is an input or a computed value.
-#[derive(Clone, Debug, PartialEq, Eq)]
-pub enum NodeKindInfo {
-    Input,
-    Compute,
-}
-
-/// Structural metadata about a single node, for visualization/debugging.
-#[derive(Clone, Debug)]
-pub struct NodeInfo {
-    pub id: NodeId,
-    pub kind: NodeKindInfo,
-    pub label: Option<String>,
-    pub dependencies: Vec<NodeId>,
-    pub dependents: Vec<NodeId>,
-}
-
-/// What happened to a node during a traced get() call.
-#[derive(Clone, Debug, PartialEq, Eq)]
-pub enum TraceAction {
-    /// Node was dirty but its dependencies hadn't actually changed.
-    VerifiedClean,
-    /// Node was recomputed. `value_changed` is false when early cutoff occurred.
-    Recomputed { value_changed: bool },
-}
-
-/// Trace entry for a single node during propagation.
-#[derive(Clone, Debug)]
-pub struct NodeTrace {
-    pub id: NodeId,
-    pub action: TraceAction,
-}
-
-/// Summary of what happened during a single get() call.
-#[derive(Clone, Debug)]
-pub struct PropagationTrace {
-    pub target: NodeId,
-    pub node_traces: Vec<NodeTrace>,
-    pub total_nodes: usize,
-    pub nodes_recomputed: usize,
-    pub nodes_cutoff: usize,
-    pub elapsed_ns: u64,
-}
diff --git a/crates/incr-compute/tests/collection_property.rs b/crates/incr-compute/tests/collection_property.rs
deleted file mode 100644
index 2b2b929..0000000
--- a/crates/incr-compute/tests/collection_property.rs
+++ /dev/null
@@ -1,439 +0,0 @@
-use incr_compute::Runtime;
-use proptest::prelude::*;
-
-#[derive(Clone, Debug)]
-enum Op {
-    Insert(i64),
-    Delete(i64),
-}
-
-fn verify_collection_incremental_matches_batch(ops: Vec<Op>) {
-    let rt = Runtime::new();
-    let col = rt.create_collection::<i64>();
-    let evens = col.filter(&rt, |x| x % 2 == 0);
-    let doubled = evens.map(&rt, |x| x * 2);
-    let count = doubled.count(&rt);
-
-    for op in &ops {
-        match op {
-            Op::Insert(v) => col.insert(&rt, *v),
-            Op::Delete(v) => col.delete(&rt, v),
-        }
-    }
-
-    let incr_count = rt.get(count);
-    let incr_elements: std::collections::HashSet<i64> = doubled.elements();
-
-    let mut batch_set = std::collections::HashSet::new();
-    for op in &ops {
-        match op {
-            Op::Insert(v) => {
-                batch_set.insert(*v);
-            }
-            Op::Delete(v) => {
-                batch_set.remove(v);
-            }
-        }
-    }
-    let batch_elements: std::collections::HashSet<i64> = batch_set
-        .iter()
-        .filter(|x| *x % 2 == 0)
-        .map(|x| x * 2)
-        .collect();
-
-    assert_eq!(
-        incr_count,
-        batch_elements.len(),
-        "Count mismatch: incr={}, batch={}",
-        incr_count,
-        batch_elements.len()
-    );
-    assert_eq!(incr_elements, batch_elements, "Elements mismatch");
-}
-
-fn op_strategy() -> impl Strategy<Value = Op> {
-    prop_oneof![
-        (-100_i64..100).prop_map(Op::Insert),
-        (-100_i64..100).prop_map(Op::Delete),
-    ]
-}
-
-proptest! {
-    #![proptest_config(ProptestConfig::with_cases(2000))]
-
-    #[test]
-    fn collection_incremental_matches_batch(
-        ops in prop::collection::vec(op_strategy(), 1..50),
-    ) {
-        verify_collection_incremental_matches_batch(ops);
-    }
-}
-
-#[test]
-fn collection_property_specific_insert_delete_cycle() {
-    verify_collection_incremental_matches_batch(vec![
-        Op::Insert(2),
-        Op::Insert(4),
-        Op::Delete(2),
-        Op::Insert(6),
-        Op::Insert(3),
-        Op::Delete(4),
-    ]);
-}
-
-fn verify_reduce_incremental_matches_batch(ops: Vec<Op>) {
-    let rt = Runtime::new();
-    let col = rt.create_collection::<i64>();
-    let sum = col.reduce(&rt, |elements| -> i64 { elements.iter().sum() });
-    let max = col.reduce(&rt, |elements| -> Option<i64> {
-        elements.iter().copied().max()
-    });
-
-    for op in &ops {
-        match op {
-            Op::Insert(v) => col.insert(&rt, *v),
-            Op::Delete(v) => col.delete(&rt, v),
-        }
-    }
-
-    let incr_sum = rt.get(sum);
-    let incr_max = rt.get(max);
-
-    // Batch oracle
-    let mut batch_set = std::collections::HashSet::new();
-    for op in &ops {
-        match op {
-            Op::Insert(v) => {
-                batch_set.insert(*v);
-            }
-            Op::Delete(v) => {
-                batch_set.remove(v);
-            }
-        }
-    }
-    let batch_sum: i64 = batch_set.iter().sum();
-    let batch_max: Option<i64> = batch_set.iter().copied().max();
-
-    assert_eq!(
-        incr_sum, batch_sum,
-        "Sum mismatch: incr={}, batch={}",
-        incr_sum, batch_sum
-    );
-    assert_eq!(
-        incr_max, batch_max,
-        "Max mismatch: incr={:?}, batch={:?}",
-        incr_max, batch_max
-    );
-}
-
-proptest! {
-    #![proptest_config(ProptestConfig::with_cases(2000))]
-
-    #[test]
-    fn reduce_incremental_matches_batch(
-        ops in prop::collection::vec(op_strategy(), 1..50),
-    ) {
-        verify_reduce_incremental_matches_batch(ops);
-    }
-}
-
-fn verify_sort_incremental_matches_batch(ops: Vec<Op>) {
-    let rt = Runtime::new();
-    let col = rt.create_collection::<i64>();
-    let sorted = col.sort_by_key(&rt, |x: &i64| *x);
-
-    for op in &ops {
-        match op {
-            Op::Insert(v) => col.insert(&rt, *v),
-            Op::Delete(v) => col.delete(&rt, v),
-        }
-    }
-
-    let _ = rt.get(sorted.version_node());
-    let incr_sorted = sorted.entries();
-
-    // Batch oracle
-    let mut batch_set = std::collections::HashSet::new();
-    for op in &ops {
-        match op {
-            Op::Insert(v) => {
-                batch_set.insert(*v);
-            }
-            Op::Delete(v) => {
-                batch_set.remove(v);
-            }
-        }
-    }
-    let mut batch_sorted: Vec<i64> = batch_set.into_iter().collect();
-    batch_sorted.sort();
-
-    assert_eq!(
-        incr_sorted, batch_sorted,
-        "Sort mismatch: incr={:?}, batch={:?}",
-        incr_sorted, batch_sorted
-    );
-}
-
-proptest! {
-    #![proptest_config(ProptestConfig::with_cases(2000))]
-
-    #[test]
-    fn sort_incremental_matches_batch(
-        ops in prop::collection::vec(op_strategy(), 1..50),
-    ) {
-        verify_sort_incremental_matches_batch(ops);
-    }
-}
-
-fn verify_group_by_incremental_matches_batch(ops: Vec<Op>) {
-    let rt = Runtime::new();
-    let col = rt.create_collection::<i64>();
-    // group by sign: negative -> -1, zero -> 0, positive -> 1
-    let grouped = col.group_by(&rt, |x: &i64| x.signum());
-
-    for op in &ops {
-        match op {
-            Op::Insert(v) => col.insert(&rt, *v),
-            Op::Delete(v) => col.delete(&rt, v),
-        }
-    }
-
-    // Force the version node to stabilize
-    let _ = rt.get(grouped.version_node());
-
-    // Batch oracle
-    let mut batch_set = std::collections::HashSet::new();
-    for op in &ops {
-        match op {
-            Op::Insert(v) => {
-                batch_set.insert(*v);
-            }
-            Op::Delete(v) => {
-                batch_set.remove(v);
-            }
-        }
-    }
-
-    for &key in &[-1_i64, 0, 1] {
-        let incr_group_elements: std::collections::HashSet<i64> = grouped
-            .get_group(&key)
-            .map(|g| g.elements())
-            .unwrap_or_default();
-
-        let batch_group: std::collections::HashSet<i64> = batch_set
-            .iter()
-            .filter(|x| x.signum() == key)
-            .copied()
-            .collect();
-
-        assert_eq!(
-            incr_group_elements, batch_group,
-            "group_by mismatch for key={}: incr={:?}, batch={:?}",
-            key, incr_group_elements, batch_group,
-        );
-    }
-}
-
-fn verify_join_incremental_matches_batch(left_ops: Vec<Op>, right_ops: Vec<Op>) {
-    let rt = Runtime::new();
-    let left = rt.create_collection::<i64>();
-    let right = rt.create_collection::<i64>();
-    // Join on absolute value: pairs (l, r) where abs(l) == abs(r)
-    let joined = left.join(&rt, &right, |x: &i64| x.abs(), |x: &i64| x.abs());
-    let count = joined.count(&rt);
-
-    for op in &left_ops {
-        match op {
-            Op::Insert(v) => left.insert(&rt, *v),
-            Op::Delete(v) => left.delete(&rt, v),
-        }
-    }
-    for op in &right_ops {
-        match op {
-            Op::Insert(v) => right.insert(&rt, *v),
-            Op::Delete(v) => right.delete(&rt, v),
-        }
-    }
-
-    let _ = rt.get(count);
-    let incr_pairs: std::collections::HashSet<(i64, i64)> = joined.elements();
-
-    // Batch oracle
-    let mut left_set = std::collections::HashSet::new();
-    for op in &left_ops {
-        match op {
-            Op::Insert(v) => {
-                left_set.insert(*v);
-            }
-            Op::Delete(v) => {
-                left_set.remove(v);
-            }
-        }
-    }
-    let mut right_set = std::collections::HashSet::new();
-    for op in &right_ops {
-        match op {
-            Op::Insert(v) => {
-                right_set.insert(*v);
-            }
-            Op::Delete(v) => {
-                right_set.remove(v);
-            }
-        }
-    }
-
-    // The join output is a multiset (repeated pairs can appear), but we compare
-    // as a set since both sides contain distinct values from the HashSet oracle.
-    let mut batch_pairs: std::collections::HashSet<(i64, i64)> = std::collections::HashSet::new();
-    for &l in &left_set {
-        for &r in &right_set {
-            if l.abs() == r.abs() {
-                batch_pairs.insert((l, r));
-            }
-        }
-    }
-
-    assert_eq!(
-        incr_pairs, batch_pairs,
-        "join mismatch: incr={:?}, batch={:?}",
-        incr_pairs, batch_pairs,
-    );
-}
-
-fn verify_window_incremental_matches_batch(ops: Vec<Op>, window_size: usize) {
-    if window_size == 0 {
-        return;
-    }
-    let rt = Runtime::new();
-    let col = rt.create_collection::<i64>();
-    let sorted = col.sort_by_key(&rt, |x: &i64| *x);
-    let windows = sorted.window(&rt, window_size);
-    let count = windows.count(&rt);
-
-    for op in &ops {
-        match op {
-            Op::Insert(v) => col.insert(&rt, *v),
-            Op::Delete(v) => col.delete(&rt, v),
-        }
-    }
-
-    let _ = rt.get(count);
-    let incr_windows: std::collections::HashSet<Vec<i64>> = windows.elements();
-
-    // Batch oracle
-    let mut batch_set = std::collections::HashSet::new();
-    for op in &ops {
-        match op {
-            Op::Insert(v) => {
-                batch_set.insert(*v);
-            }
-            Op::Delete(v) => {
-                batch_set.remove(v);
-            }
-        }
-    }
-    let mut batch_sorted: Vec<i64> = batch_set.into_iter().collect();
-    batch_sorted.sort();
-
-    let batch_windows: std::collections::HashSet<Vec<i64>> = if batch_sorted.len() >= window_size {
-        batch_sorted
-            .windows(window_size)
-            .map(|w| w.to_vec())
-            .collect()
-    } else {
-        std::collections::HashSet::new()
-    };
-
-    assert_eq!(
-        incr_windows, batch_windows,
-        "window (size={}) mismatch: incr={:?}, batch={:?}",
-        window_size, incr_windows, batch_windows,
-    );
-}
-
-proptest! {
-    #![proptest_config(ProptestConfig::with_cases(2000))]
-
-    #[test]
-    fn group_by_incremental_matches_batch(
-        ops in prop::collection::vec(op_strategy(), 1..50),
-    ) {
-        verify_group_by_incremental_matches_batch(ops);
-    }
-}
-
-proptest! {
-    #![proptest_config(ProptestConfig::with_cases(2000))]
-
-    #[test]
-    fn join_incremental_matches_batch(
-        left_ops in prop::collection::vec(op_strategy(), 1..30),
-        right_ops in prop::collection::vec(op_strategy(), 1..30),
-    ) {
-        verify_join_incremental_matches_batch(left_ops, right_ops);
-    }
-}
-
-proptest! {
-    #![proptest_config(ProptestConfig::with_cases(2000))]
-
-    #[test]
-    fn window_incremental_matches_batch(
-        ops in prop::collection::vec(op_strategy(), 1..50),
-        window_size in 1_usize..6,
-    ) {
-        verify_window_incremental_matches_batch(ops, window_size);
-    }
-}
-
-fn verify_pairwise_incremental_matches_batch(ops: Vec<Op>) {
-    let rt = Runtime::new();
-    let col = rt.create_collection::<i64>();
-    let sorted = col.sort_by_key(&rt, |x: &i64| *x);
-    let pairs = sorted.pairwise(&rt);
-    let pair_count = pairs.count(&rt);
-
-    for op in &ops {
-        match op {
-            Op::Insert(v) => col.insert(&rt, *v),
-            Op::Delete(v) => col.delete(&rt, v),
-        }
-    }
-
-    let _ = rt.get(pair_count); // forces stabilization of the full chain
-    let incr_pairs = pairs.elements();
-
-    // Batch oracle
-    let mut batch_set = std::collections::HashSet::new();
-    for op in &ops {
-        match op {
-            Op::Insert(v) => {
-                batch_set.insert(*v);
-            }
-            Op::Delete(v) => {
-                batch_set.remove(v);
-            }
-        }
-    }
-    let mut batch_sorted: Vec<i64> = batch_set.into_iter().collect();
-    batch_sorted.sort();
-    let batch_pairs: std::collections::HashSet<(i64, i64)> =
-        batch_sorted.windows(2).map(|w| (w[0], w[1])).collect();
-
-    assert_eq!(
-        incr_pairs, batch_pairs,
-        "Pairwise mismatch: incr={:?}, batch={:?}",
-        incr_pairs, batch_pairs
-    );
-}
-
-proptest! {
-    #![proptest_config(ProptestConfig::with_cases(2000))]
-
-    #[test]
-    fn pairwise_incremental_matches_batch(
-        ops in prop::collection::vec(op_strategy(), 1..50),
-    ) {
-        verify_pairwise_incremental_matches_batch(ops);
-    }
-}
diff --git a/crates/incr-compute/tests/integration.rs b/crates/incr-compute/tests/integration.rs
index cf9db6b..e77387e 100644
--- a/crates/incr-compute/tests/integration.rs
+++ b/crates/incr-compute/tests/integration.rs
@@ -1,270 +1,93 @@
-use incr_compute::{IncrCollection, Runtime};
+//! Smoke tests for the `incr-compute` v0.2 wrapper. Proves the
+//! re-exports compile and the basic API works end-to-end against
+//! `incr_core::Runtime<Local>`.
 
-#[test]
-fn spec_example_width_height_area() {
-    let rt = Runtime::new();
-
-    let width = rt.create_input(10.0_f64);
-    let height = rt.create_input(5.0_f64);
-
-    let area = rt.create_query(move |rt| rt.get(width) * rt.get(height));
-
-    let description = rt.create_query(move |rt| format!("Area is {}", rt.get(area)));
-
-    assert_eq!(rt.get(description), "Area is 50");
-
-    rt.set(width, 12.0);
-    assert_eq!(rt.get(description), "Area is 60");
-}
+use incr_compute::{IncrCollection, Runtime, SortedCollection};
 
 #[test]
-fn spec_example_incremental_updates() {
+fn function_dag_chain_propagates() {
     let rt = Runtime::new();
-
-    let x = rt.create_input(1_i64);
-    let y = rt.create_input(2_i64);
-
-    let sum = rt.create_query(move |rt| rt.get(x) + rt.get(y));
-    let doubled = rt.create_query(move |rt| rt.get(sum) * 2);
-    let label = rt.create_query(move |rt| format!("result: {}", rt.get(doubled)));
-
-    assert_eq!(rt.get(label), "result: 6"); // (1+2)*2 = 6
-
-    rt.set(x, 10);
-    assert_eq!(rt.get(label), "result: 24"); // (10+2)*2 = 24
-
-    rt.set(y, 5);
-    assert_eq!(rt.get(label), "result: 30"); // (10+5)*2 = 30
+    let a = rt.create_input(1_i64);
+    let b = rt.create_query(move |rt| rt.get(a) + 1);
+    let c = rt.create_query(move |rt| rt.get(b) * 2);
+    assert_eq!(rt.get(c), 4);
+    rt.set(a, 10);
+    assert_eq!(rt.get(c), 22);
 }
 
 #[test]
-fn complex_graph_with_early_cutoff() {
-    use std::cell::Cell;
-    use std::rc::Rc;
-
+fn diamond_with_early_cutoff() {
     let rt = Runtime::new();
-
-    let raw_score = rt.create_input(85_i64);
-
-    let normalize_count = Rc::new(Cell::new(0_u32));
-    let nc = normalize_count.clone();
-    let normalized = rt.create_query(move |rt| {
-        nc.set(nc.get() + 1);
-        rt.get(raw_score).clamp(0, 100)
-    });
-
-    let format_count = Rc::new(Cell::new(0_u32));
-    let fc = format_count.clone();
-    let display = rt.create_query(move |rt| {
-        fc.set(fc.get() + 1);
-        let score = rt.get(normalized);
-        if score >= 90 {
-            "A".to_string()
-        } else if score >= 80 {
-            "B".to_string()
-        } else {
-            "C".to_string()
-        }
-    });
-
-    assert_eq!(rt.get(display), "B");
-    assert_eq!(normalize_count.get(), 1);
-    assert_eq!(format_count.get(), 1);
-
-    rt.set(raw_score, 95);
-    assert_eq!(rt.get(display), "A");
-    assert_eq!(normalize_count.get(), 2);
-    assert_eq!(format_count.get(), 2);
-
-    rt.set(raw_score, 150);
-    assert_eq!(rt.get(display), "A");
-    assert_eq!(normalize_count.get(), 3);
-    assert_eq!(format_count.get(), 3);
-
-    // Early cutoff: 200 clamped to 100, same as 150 clamped to 100
-    rt.set(raw_score, 200);
-    assert_eq!(rt.get(display), "A");
-    assert_eq!(normalize_count.get(), 4);
-    assert_eq!(format_count.get(), 3); // NOT recomputed — early cutoff!
+    let a = rt.create_input(1_i64);
+    let b = rt.create_query(move |rt| rt.get(a) + 10);
+    let c = rt.create_query(move |rt| rt.get(a) + 100);
+    let d = rt.create_query(move |rt| rt.get(b) + rt.get(c));
+    assert_eq!(rt.get(d), 112);
+    rt.set(a, 2);
+    assert_eq!(rt.get(d), 114);
 }
 
 #[test]
-fn string_values_work() {
+fn collection_filter_map_reduce_pipeline() {
     let rt = Runtime::new();
-
-    let first = rt.create_input("Hello".to_string());
-    let last = rt.create_input("World".to_string());
-
-    let full = rt.create_query(move |rt| format!("{} {}", rt.get(first), rt.get(last)));
-
-    assert_eq!(rt.get(full), "Hello World");
-
-    rt.set(first, "Goodbye".to_string());
-    assert_eq!(rt.get(full), "Goodbye World");
+    let scores: IncrCollection<i64> = rt.create_collection();
+    let passing = scores.filter(&rt, |s| *s >= 50);
+    let curved = passing.map(&rt, |s| s + 10);
+    let total = curved.reduce(&rt, |xs| xs.iter().sum::<i64>());
+    scores.insert(&rt, 80);
+    scores.insert(&rt, 95);
+    scores.insert(&rt, 60);
+    scores.insert(&rt, 42);
+    assert_eq!(rt.get(total), 265);
 }
 
 #[test]
-fn collection_feeds_function_query() {
+fn sort_pairwise_count() {
     let rt = Runtime::new();
-    let scores = rt.create_collection::<i64>();
-    let high_scores = scores.filter(&rt, |s| *s >= 90);
-    let count = high_scores.count(&rt);
-
-    let summary = rt.create_query(move |rt| {
-        let n = rt.get(count);
-        format!("{} students scored 90+", n)
-    });
-
-    scores.insert(&rt, 85);
-    scores.insert(&rt, 92);
-    scores.insert(&rt, 78);
-    scores.insert(&rt, 95);
-
-    assert_eq!(rt.get(summary), "2 students scored 90+");
-
-    scores.insert(&rt, 91);
-    assert_eq!(rt.get(summary), "3 students scored 90+");
-
-    scores.delete(&rt, &92);
-    assert_eq!(rt.get(summary), "2 students scored 90+");
+    let c: IncrCollection<i64> = rt.create_collection();
+    let sorted: SortedCollection<i64, i64> = c.sort_by_key(&rt, |x| *x);
+    let pairs = sorted.pairwise(&rt);
+    c.insert(&rt, 5);
+    c.insert(&rt, 1);
+    c.insert(&rt, 3);
+    let n = pairs.count(&rt);
+    assert_eq!(rt.get(n), 2); // (1,3), (3,5)
 }
 
 #[test]
-fn full_pipeline_filter_map_count_query() {
-    #[derive(Clone, Hash, Eq, PartialEq, Debug)]
-    struct User {
-        name: String,
-        age: i32,
-        active: bool,
-    }
-
+fn group_by_two_buckets() {
     let rt = Runtime::new();
-    let users: IncrCollection<User> = rt.create_collection();
-
-    let active_adults = users
-        .filter(&rt, |u| u.active)
-        .filter(&rt, |u| u.age >= 18)
-        .map(&rt, |u| u.name.clone());
-
-    let count = active_adults.count(&rt);
-
-    let summary = rt.create_query(move |rt| format!("{} active adults", rt.get(count)));
-
-    users.insert(
-        &rt,
-        User {
-            name: "Alice".into(),
-            age: 30,
-            active: true,
-        },
-    );
-    users.insert(
-        &rt,
-        User {
-            name: "Bob".into(),
-            age: 16,
-            active: true,
-        },
-    );
-    users.insert(
-        &rt,
-        User {
-            name: "Carol".into(),
-            age: 25,
-            active: false,
-        },
-    );
-
-    assert_eq!(rt.get(summary), "1 active adults");
-
-    users.insert(
-        &rt,
-        User {
-            name: "Dave".into(),
-            age: 22,
-            active: true,
-        },
-    );
-    assert_eq!(rt.get(summary), "2 active adults");
-
-    users.delete(
-        &rt,
-        &User {
-            name: "Alice".into(),
-            age: 30,
-            active: true,
-        },
-    );
-    assert_eq!(rt.get(summary), "1 active adults");
+    let c: IncrCollection<i64> = rt.create_collection();
+    let groups = c.group_by(&rt, |x| x % 2);
+    for i in 1..=6_i64 {
+        c.insert(&rt, i);
+    }
+    let _ = rt.get(groups.version_node());
+    assert_eq!(groups.group_count(), 2);
 }
 
 #[test]
-fn sort_pairwise_map_reduce_pipeline() {
-    // Simulates: given a set of visit timestamps, compute total gaps between
-    // consecutive visits. This is the core pattern for travel time calculation.
+fn join_two_collections() {
     let rt = Runtime::new();
-    let visits = rt.create_collection::<i64>(); // timestamps
-
-    let sorted = visits.sort_by_key(&rt, |t: &i64| *t);
-    let pairs = sorted.pairwise(&rt);
-
-    let gaps = pairs.map(&rt, |(a, b): &(i64, i64)| b - a);
-
-    // Sum all gaps
-    let total_gap = gaps.reduce(&rt, |elements| -> i64 { elements.iter().sum() });
-
-    // Start with visits at times 10, 30, 50
-    visits.insert(&rt, 10);
-    visits.insert(&rt, 30);
-    visits.insert(&rt, 50);
-    assert_eq!(rt.get(total_gap), 40); // (30-10) + (50-30) = 40
-
-    // Insert visit at time 20: gaps become 10 + 10 + 20 = 40 (same total!)
-    visits.insert(&rt, 20);
-    assert_eq!(rt.get(total_gap), 40); // (20-10) + (30-20) + (50-30) = 40
-
-    // Delete visit at time 30: gaps become 10 + 30 = 40 (still same!)
-    visits.delete(&rt, &30);
-    assert_eq!(rt.get(total_gap), 40); // (20-10) + (50-20) = 40
-
-    // Insert visit at time 100: adds a big gap
-    visits.insert(&rt, 100);
-    assert_eq!(rt.get(total_gap), 90); // (20-10) + (50-20) + (100-50) = 90
-
-    visits.delete(&rt, &10);
-    assert_eq!(rt.get(total_gap), 80); // (50-20) + (100-50) = 80
+    let left: IncrCollection<(i64, &'static str)> = rt.create_collection();
+    let right: IncrCollection<(i64, i64)> = rt.create_collection();
+    let j = left.join(&rt, &right, |l| l.0, |r| r.0);
+    left.insert(&rt, (1, "alice"));
+    right.insert(&rt, (1, 100));
+    right.insert(&rt, (1, 200));
+    let n = j.count(&rt);
+    assert_eq!(rt.get(n), 2);
 }
 
 #[test]
-fn pipeline_early_cutoff() {
-    // Verify that early cutoff works through the full pipeline:
-    // if total doesn't change, downstream isn't recomputed
-    use std::cell::Cell;
-    use std::rc::Rc;
-
+fn graph_snapshot_returns_dependencies() {
     let rt = Runtime::new();
-    let visits = rt.create_collection::<i64>();
-    let sorted = visits.sort_by_key(&rt, |t: &i64| *t);
-    let pairs = sorted.pairwise(&rt);
-    let gaps = pairs.map(&rt, |(a, b): &(i64, i64)| b - a);
-    let total_gap = gaps.reduce(&rt, |elements| -> i64 { elements.iter().sum() });
-
-    let downstream_evals = Rc::new(Cell::new(0_u32));
-    let dc = downstream_evals.clone();
-    let label = rt.create_query(move |rt| {
-        dc.set(dc.get() + 1);
-        format!("total={}", rt.get(total_gap))
-    });
-
-    visits.insert(&rt, 10);
-    visits.insert(&rt, 30);
-    visits.insert(&rt, 50);
-    assert_eq!(rt.get(label), "total=40");
-    assert_eq!(downstream_evals.get(), 1);
-
-    // Insert 20 between 10 and 30: total gap is still 40
-    visits.insert(&rt, 20);
-    assert_eq!(rt.get(label), "total=40");
-    // Early cutoff: total_gap unchanged, so label shouldn't recompute
-    assert_eq!(downstream_evals.get(), 1);
+    let a = rt.create_input(1_i64);
+    let _b = rt.create_query(move |rt| rt.get(a) + 1);
+    // Force the query to run so its deps are recorded.
+    let _ = rt.get(_b);
+    let snap = rt.graph_snapshot();
+    assert_eq!(snap.len(), 2);
+    // The query (slot 1) should depend on the input (slot 0).
+    assert_eq!(snap[1].dependencies.len(), 1);
 }
diff --git a/crates/incr-compute/tests/property.rs b/crates/incr-compute/tests/property.rs
deleted file mode 100644
index 1b5bc20..0000000
--- a/crates/incr-compute/tests/property.rs
+++ /dev/null
@@ -1,153 +0,0 @@
-use incr_compute::{Incr, Runtime};
-use proptest::prelude::*;
-
-/// Build a layered graph of the given shape, run it incrementally,
-/// then rebuild from scratch and compare results.
-fn verify_incremental_matches_batch(
-    num_inputs: usize,
-    input_values: Vec<i64>,
-    layers: Vec<Vec<(usize, usize)>>, // Each layer: vec of (dep_a_idx, dep_b_idx) pairs
-    mutations: Vec<(usize, i64)>,     // (input_index, new_value) pairs
-) {
-    assert!(num_inputs >= 2);
-    assert_eq!(input_values.len(), num_inputs);
-
-    let rt = Runtime::new();
-    let mut all_nodes: Vec<Incr<i64>> = Vec::new();
-
-    for &val in &input_values {
-        let node = rt.create_input(val);
-        all_nodes.push(node);
-    }
-
-    for layer in &layers {
-        let mut layer_nodes = Vec::new();
-        for &(dep_a_rel, dep_b_rel) in layer {
-            let available = all_nodes.len();
-            if available < 2 {
-                continue;
-            }
-            let idx_a = dep_a_rel % available;
-            let idx_b = dep_b_rel % available;
-            let a = all_nodes[idx_a];
-            let b = all_nodes[idx_b];
-            let node = rt.create_query(move |rt| rt.get(a).wrapping_add(rt.get(b)));
-            layer_nodes.push(node);
-        }
-        all_nodes.extend(layer_nodes);
-    }
-
-    if all_nodes.len() <= num_inputs {
-        return; // No compute nodes generated
-    }
-
-    // Read all compute nodes to initialize
-    let last = *all_nodes.last().unwrap();
-    let _ = rt.get(last);
-
-    // Apply mutations
-    for &(input_rel, new_val) in &mutations {
-        let idx = input_rel % num_inputs;
-        rt.set(all_nodes[idx], new_val);
-    }
-
-    let incremental_result = rt.get(last);
-
-    let mut final_values = input_values.clone();
-    for &(input_rel, new_val) in &mutations {
-        let idx = input_rel % num_inputs;
-        final_values[idx] = new_val;
-    }
-
-    let rt2 = Runtime::new();
-    let mut all_nodes2: Vec<Incr<i64>> = Vec::new();
-
-    for &val in &final_values {
-        let node = rt2.create_input(val);
-        all_nodes2.push(node);
-    }
-
-    for layer in &layers {
-        let mut layer_nodes = Vec::new();
-        for &(dep_a_rel, dep_b_rel) in layer {
-            let available = all_nodes2.len();
-            if available < 2 {
-                continue;
-            }
-            let idx_a = dep_a_rel % available;
-            let idx_b = dep_b_rel % available;
-            let a = all_nodes2[idx_a];
-            let b = all_nodes2[idx_b];
-            let node = rt2.create_query(move |rt| rt.get(a).wrapping_add(rt.get(b)));
-            layer_nodes.push(node);
-        }
-        all_nodes2.extend(layer_nodes);
-    }
-
-    let last2 = *all_nodes2.last().unwrap();
-    let batch_result = rt2.get(last2);
-
-    assert_eq!(
-        incremental_result,
-        batch_result,
-        "Incremental result {} != batch result {} with {} inputs, {} layers, {} mutations",
-        incremental_result,
-        batch_result,
-        num_inputs,
-        layers.len(),
-        mutations.len()
-    );
-}
-
-proptest! {
-    #![proptest_config(ProptestConfig::with_cases(2000))]
-
-    #[test]
-    fn incremental_matches_batch(
-        num_inputs in 2_usize..20,
-        input_values in prop::collection::vec(-1000_i64..1000, 2..20),
-        layers in prop::collection::vec(
-            prop::collection::vec((0_usize..100, 0_usize..100), 1..5),
-            1..8
-        ),
-        mutations in prop::collection::vec((0_usize..100, -1000_i64..1000), 1..20),
-    ) {
-        let num_inputs = num_inputs.min(input_values.len()).max(2);
-        let input_values = input_values[..num_inputs].to_vec();
-        verify_incremental_matches_batch(num_inputs, input_values, layers, mutations);
-    }
-}
-
-#[test]
-fn property_specific_diamond_cutoff() {
-    verify_incremental_matches_batch(
-        3,
-        vec![10, 20, 30],
-        vec![
-            vec![(0, 1), (1, 2)], // Layer 1: node3=in0+in1, node4=in1+in2
-            vec![(0, 1)],         // Layer 2: node5=node3+node4
-        ],
-        vec![(0, 10), (1, 25)], // Change input 0 (same!), change input 1
-    );
-}
-
-#[test]
-fn property_deep_chain() {
-    verify_incremental_matches_batch(
-        5,
-        vec![1, 2, 3, 4, 5],
-        vec![
-            vec![(0, 1)],
-            vec![(2, 0)],
-            vec![(0, 1)],
-            vec![(1, 0)],
-            vec![(0, 1)],
-            vec![(2, 0)],
-            vec![(0, 1)],
-            vec![(1, 0)],
-            vec![(0, 1)],
-            vec![(2, 0)],
-        ],
-        vec![(0, 100), (2, 50), (4, 75)],
-    );
-}
diff --git a/crates/incr-concurrent-python/Cargo.toml b/crates/incr-concurrent-python/Cargo.toml
index fe5a439..68a73e3 100644
--- a/crates/incr-concurrent-python/Cargo.toml
+++ b/crates/incr-concurrent-python/Cargo.toml
@@ -1,14 +1,16 @@
 [package]
 name = "incr-concurrent-python"
-version = "0.1.0"
+version = "0.2.0-beta.1"
 edition = "2021"
-description = "Python bindings for the incr incremental computation engine"
+description = "Python bindings for the thread-safe incr incremental computation engine"
 license = "Apache-2.0"
+publish = false
 
 [lib]
 name = "incr_concurrent"
 crate-type = ["cdylib"]
+doc = false
 
 [dependencies]
+incr_concurrent = { package = "incr-concurrent", version = "0.2.0-beta.1", path = "../incr-concurrent" }
 pyo3 = { version = "0.23", features = ["extension-module"] }
-incr_conc = { package = "incr-concurrent", path = "../incr-concurrent" }
diff --git a/crates/incr-concurrent-python/pyproject.toml b/crates/incr-concurrent-python/pyproject.toml
index b8762f1..28a2970 100644
--- a/crates/incr-concurrent-python/pyproject.toml
+++ b/crates/incr-concurrent-python/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "maturin"
 
 [project]
 name = "incr-concurrent"
-version = "0.1.0"
+version = "0.2.0b1"
 description = "Incremental computation engine with thread-safe runtime for Python"
 requires-python = ">=3.8"
 license = "Apache-2.0"
diff --git a/crates/incr-concurrent-python/src/lib.rs b/crates/incr-concurrent-python/src/lib.rs
index aafc1e9..06c14af 100644
--- a/crates/incr-concurrent-python/src/lib.rs
+++ b/crates/incr-concurrent-python/src/lib.rs
@@ -1,18 +1,27 @@
+//! Python bindings for `incr-concurrent` (thread-safe).
+//!
+//! The Python module is named `incr_concurrent`; `from incr_concurrent
+//! import Runtime` opens the door to creating inputs and queries
+//! against the v0.2 engine with the Shared strategy. Otherwise the
+//! API mirrors the `incr` (single-threaded) binding exactly: same
+//! method names, same callback shapes, same value bounds. Migration
+//! between the two is a one-line import change.
+
 use pyo3::prelude::*;
 use std::hash::{Hash, Hasher};
 
-// PyObject (pyo3) is Send but not Sync. Closures passed to v2's
-// create_query / filter / map / etc. must be Send + Sync + 'static.
-// All PyObject access goes through Python::with_gil(), which serializes
-// reference-count manipulation and object access behind the GIL, so
-// sharing a PyObject across threads is safe as long as every touch
-// acquires the GIL first.
-
-struct SyncPyObject(PyObject);
-
-// SAFETY: every access to the inner PyObject goes through with_gil().
-unsafe impl Sync for SyncPyObject {}
-
+// Alias the engine crate locally so the pymodule fn can be named
+// `incr_concurrent` without shadowing the crate import.
+use ::incr_concurrent as engine;
+use engine::{
+    Incr, IncrCollection, NodeId, NodeKindInfo, PropagationTrace, Runtime, SortedCollection,
+    TraceAction,
+};
+
+/// Newtype around `PyObject` that satisfies the `Value` bound. All
+/// trait methods reacquire the GIL because Python objects are only
+/// usable while holding it; this is the conventional PyO3 pattern for
+/// embedding `PyObject` in trait-bounded Rust code.
 struct PyValue(PyObject);
 
 impl Clone for PyValue {
@@ -60,17 +69,11 @@ impl Ord for PyValue {
     }
 }
 
-// SAFETY: all PyObject access goes through Python::with_gil().
-// The GIL serializes reference count manipulation and object access.
-unsafe impl Send for PyValue {}
-unsafe impl Sync for PyValue {}
-
-incr_conc::impl_value!(PyValue);
-
+/// Typed node handle exposed to Python. Wraps `Incr<PyValue>`.
 #[pyclass(name = "NodeId")]
 #[derive(Clone)]
 struct PyNodeId {
-    inner: incr_conc::Incr<PyValue>,
+    inner: Incr<PyValue>,
 }
 
 #[pymethods]
@@ -79,11 +82,18 @@ impl PyNodeId {
     fn id(&self) -> u32 {
         self.inner.slot()
     }
+
+    fn __repr__(&self) -> String {
+        format!("NodeId(slot={})", self.inner.slot())
+    }
 }
 
+/// Read-only runtime handle passed to query closures. The pointer is
+/// nulled out after the callback returns to make stale captures fail
+/// loudly rather than silently corrupt memory.
 #[pyclass(name = "RuntimeRef", unsendable)]
 struct PyRuntimeRef {
-    ptr: *const incr_conc::Runtime,
+    ptr: *const Runtime,
 }
 
 #[pymethods]
@@ -94,6 +104,9 @@ impl PyRuntimeRef {
                 "RuntimeRef is no longer valid (used outside query callback)",
             ));
         }
+        // SAFETY: ptr is non-null only inside an active query callback;
+        // the Runtime is borrowed by the runtime's own closure dispatch,
+        // so the lifetime is guaranteed to outlive the callback.
         let rt = unsafe { &*self.ptr };
         let val: PyValue = rt.get(node.inner);
         Ok(val.0)
@@ -102,8 +115,8 @@ impl PyRuntimeRef {
 
 #[pyclass(name = "Collection", unsendable)]
 struct PyCollection {
-    inner: incr_conc::IncrCollection<PyValue>,
-    rt_ptr: *const incr_conc::Runtime,
+    inner: IncrCollection<PyValue>,
+    rt_ptr: *const Runtime,
 }
 
 #[pymethods]
@@ -113,18 +126,20 @@ impl PyCollection {
         self.inner.insert(rt, PyValue(value));
     }
 
-    fn delete(&self, value: PyObject) {
+    fn delete(&self, value: PyObject) -> bool {
         let rt = unsafe { &*self.rt_ptr };
-        self.inner.delete(rt, &PyValue(value));
+        self.inner.delete(rt, &PyValue(value))
+    }
+
+    fn snapshot_len(&self) -> usize {
+        self.inner.snapshot_len()
     }
 
     fn filter(&self, predicate: PyObject) -> PyResult<PyCollection> {
         let rt = unsafe { &*self.rt_ptr };
-        let predicate = SyncPyObject(predicate);
         let filtered = self.inner.filter(rt, move |val: &PyValue| -> bool {
             Python::with_gil(|py| {
                 predicate
-                    .0
                     .call1(py, (val.0.clone_ref(py),))
                     .and_then(|r| r.is_truthy(py))
                     .unwrap_or(false)
@@ -138,11 +153,9 @@ impl PyCollection {
 
     fn map(&self, func: PyObject) -> PyResult<PyCollection> {
         let rt = unsafe { &*self.rt_ptr };
-        let func = SyncPyObject(func);
         let mapped = self.inner.map(rt, move |val: &PyValue| -> PyValue {
             Python::with_gil(|py| {
                 let result = func
-                    .0
                     .call1(py, (val.0.clone_ref(py),))
                     .expect("map function raised an exception");
                 PyValue(result)
@@ -156,8 +169,10 @@ impl PyCollection {
 
     fn count(&self) -> PyResult<PyNodeId> {
         let rt = unsafe { &*self.rt_ptr };
-        let count_node: incr_conc::Incr<u64> = self.inner.count(rt);
-        // Bridge u64 -> PyValue via a query
+        let count_node: Incr<u64> = self.inner.count(rt);
+        // Bridge u64 -> PyValue via a wrapper query so the Python side
+        // receives a node returning an int (PyValue), matching the
+        // single PyNodeId type the binding exposes.
         let node = rt.create_query(move |rt| -> PyValue {
             let c: u64 = rt.get(count_node);
             Python::with_gil(|py| PyValue(c.into_pyobject(py).unwrap().into_any().unbind()))
@@ -167,31 +182,26 @@ impl PyCollection {
 
     fn reduce(&self, fold_fn: PyObject) -> PyResult<PyNodeId> {
         let rt = unsafe { &*self.rt_ptr };
-        let fold_fn = SyncPyObject(fold_fn);
-        let reduce_node: incr_conc::Incr<PyValue> =
-            self.inner.reduce(rt, move |elements| -> PyValue {
-                Python::with_gil(|py| {
-                    let py_list = pyo3::types::PyList::empty(py);
-                    for elem in elements.iter() {
-                        py_list.append(elem.0.clone_ref(py)).unwrap();
-                    }
-                    let result = fold_fn
-                        .0
-                        .call1(py, (py_list,))
-                        .expect("reduce function raised an exception");
-                    PyValue(result)
-                })
-            });
+        let reduce_node: Incr<PyValue> = self.inner.reduce(rt, move |elements| -> PyValue {
+            Python::with_gil(|py| {
+                let py_list = pyo3::types::PyList::empty(py);
+                for elem in elements.iter() {
+                    py_list.append(elem.0.clone_ref(py)).unwrap();
+                }
+                let result = fold_fn
+                    .call1(py, (py_list,))
+                    .expect("reduce function raised an exception");
+                PyValue(result)
+            })
+        });
         Ok(PyNodeId { inner: reduce_node })
     }
 
     fn sort_by_key(&self, key_fn: PyObject) -> PyResult<PySortedCollection> {
         let rt = unsafe { &*self.rt_ptr };
-        let key_fn = SyncPyObject(key_fn);
         let sorted = self.inner.sort_by_key(rt, move |val: &PyValue| -> PyValue {
             Python::with_gil(|py| {
                 let result = key_fn
-                    .0
                     .call1(py, (val.0.clone_ref(py),))
                     .expect("sort key function raised an exception");
                 PyValue(result)
@@ -205,11 +215,9 @@ impl PyCollection {
 
     fn group_by(&self, key_fn: PyObject) -> PyResult<PyGroupedCollection> {
         let rt = unsafe { &*self.rt_ptr };
-        let key_fn = SyncPyObject(key_fn);
         let grouped = self.inner.group_by(rt, move |val: &PyValue| -> PyValue {
             Python::with_gil(|py| {
                 let result = key_fn
-                    .0
                     .call1(py, (val.0.clone_ref(py),))
                     .expect("group_by key function raised an exception");
                 PyValue(result)
@@ -228,15 +236,12 @@ impl PyCollection {
         right_key: PyObject,
     ) -> PyResult<PyCollection> {
         let rt = unsafe { &*self.rt_ptr };
-        let left_key = SyncPyObject(left_key);
-        let right_key = SyncPyObject(right_key);
         let joined = self.inner.join(
             rt,
             &right.inner,
             move |val: &PyValue| -> PyValue {
                 Python::with_gil(|py| {
                     let result = left_key
-                        .0
                         .call1(py, (val.0.clone_ref(py),))
                         .expect("left key function raised an exception");
                     PyValue(result)
@@ -245,16 +250,14 @@ impl PyCollection {
             move |val: &PyValue| -> PyValue {
                 Python::with_gil(|py| {
                     let result = right_key
-                        .0
                         .call1(py, (val.0.clone_ref(py),))
                         .expect("right key function raised an exception");
                     PyValue(result)
                 })
             },
         );
-        // join returns IncrCollection<(PyValue, PyValue)>, but we need
-        // IncrCollection<PyValue> for the Python side. Map the tuples
-        // into PyValue-wrapped Python tuples.
+        // join returns IncrCollection<(PyValue, PyValue)>; map pairs to
+        // Python tuples wrapped in PyValue for the unified element type.
         let mapped = joined.map(rt, |pair: &(PyValue, PyValue)| -> PyValue {
             Python::with_gil(|py| {
                 let tuple = pyo3::types::PyTuple::new(
@@ -272,15 +275,23 @@ impl PyCollection {
     }
 
     #[getter]
-    fn version_node_id(&self) -> u32 {
-        self.inner.version_node().slot()
+    fn version_node(&self) -> PyResult<PyNodeId> {
+        let rt = unsafe { &*self.rt_ptr };
+        let v: Incr<u64> = self.inner.version_node();
+        // Wrap the u64 version node in a PyValue-returning bridge so
+        // it can be passed to rt.get / set_label uniformly.
+        let bridge = rt.create_query(move |rt| -> PyValue {
+            let n: u64 = rt.get(v);
+            Python::with_gil(|py| PyValue(n.into_pyobject(py).unwrap().into_any().unbind()))
+        });
+        Ok(PyNodeId { inner: bridge })
     }
 }
 
 #[pyclass(name = "SortedCollection", unsendable)]
 struct PySortedCollection {
-    inner: incr_conc::SortedCollection<PyValue>,
-    rt_ptr: *const incr_conc::Runtime,
+    inner: SortedCollection<PyValue, PyValue>,
+    rt_ptr: *const Runtime,
 }
 
 #[pymethods]
@@ -307,8 +318,6 @@ impl PySortedCollection {
     fn window(&self, size: usize) -> PyResult<PyCollection> {
         let rt = unsafe { &*self.rt_ptr };
         let win_collection = self.inner.window(rt, size);
-        // window returns IncrCollection<Vec<PyValue>>; map into PyValue
-        // wrapping a Python list for each window.
         let mapped = win_collection.map(rt, |window: &Vec<PyValue>| -> PyValue {
             Python::with_gil(|py| {
                 let py_list = pyo3::types::PyList::empty(py);
@@ -324,8 +333,8 @@ impl PySortedCollection {
         })
     }
 
-    fn entries(&self) -> PyResult<PyObject> {
-        let entries = self.inner.entries();
+    fn snapshot(&self) -> PyResult<PyObject> {
+        let entries = self.inner.snapshot();
         Python::with_gil(|py| {
             let list = pyo3::types::PyList::empty(py);
             for entry in entries {
@@ -335,22 +344,26 @@ impl PySortedCollection {
         })
     }
 
+    fn snapshot_len(&self) -> usize {
+        self.inner.snapshot_len()
+    }
+
     #[getter]
     fn version_node(&self) -> PyResult<PyNodeId> {
         let rt = unsafe { &*self.rt_ptr };
-        let ver_node = self.inner.version_node();
-        let node = rt.create_query(move |rt| -> PyValue {
+        let ver_node: Incr<u64> = self.inner.version_node();
+        let bridge = rt.create_query(move |rt| -> PyValue {
             let v: u64 = rt.get(ver_node);
             Python::with_gil(|py| PyValue(v.into_pyobject(py).unwrap().into_any().unbind()))
         });
-        Ok(PyNodeId { inner: node })
+        Ok(PyNodeId { inner: bridge })
     }
 }
 
 #[pyclass(name = "GroupedCollection", unsendable)]
 struct PyGroupedCollection {
-    inner: incr_conc::GroupedCollection<PyValue, PyValue>,
-    rt_ptr: *const incr_conc::Runtime,
+    inner: engine::GroupedCollection<PyValue, PyValue>,
+    rt_ptr: *const Runtime,
 }
 
 #[pymethods]
@@ -377,15 +390,25 @@ impl PyGroupedCollection {
         }
     }
 
+    fn group_count(&self) -> usize {
+        self.inner.group_count()
+    }
+
     #[getter]
-    fn version_node_id(&self) -> u32 {
-        self.inner.version_node().slot()
+    fn version_node(&self) -> PyResult<PyNodeId> {
+        let rt = unsafe { &*self.rt_ptr };
+        let ver_node: Incr<u64> = self.inner.version_node();
+        let bridge = rt.create_query(move |rt| -> PyValue {
+            let v: u64 = rt.get(ver_node);
+            Python::with_gil(|py| PyValue(v.into_pyobject(py).unwrap().into_any().unbind()))
+        });
+        Ok(PyNodeId { inner: bridge })
     }
 }
 
 #[pyclass(name = "Runtime", unsendable)]
 struct PyRuntime {
-    inner: incr_conc::Runtime,
+    inner: Runtime,
 }
 
 #[pymethods]
@@ -393,7 +416,7 @@ impl PyRuntime {
     #[new]
     fn new() -> Self {
         PyRuntime {
-            inner: incr_conc::Runtime::new(),
+            inner: Runtime::new(),
         }
     }
 
@@ -412,33 +435,29 @@ impl PyRuntime {
     }
 
     fn create_query(&self, py_func: PyObject) -> PyNodeId {
-        let py_func = SyncPyObject(py_func);
-        let node = self
-            .inner
-            .create_query(move |rt: &incr_conc::Runtime| -> PyValue {
-                Python::with_gil(|py| {
-                    let rt_ref = Py::new(
-                        py,
-                        PyRuntimeRef {
-                            ptr: rt as *const _,
-                        },
-                    )
-                    .unwrap();
-                    let result = py_func
-                        .0
-                        .call1(py, (rt_ref.clone_ref(py),))
-                        .expect("query function raised an exception");
-                    // Invalidate the ref so it can't be used after callback returns
-                    rt_ref.bind(py).borrow_mut().ptr = std::ptr::null();
-                    PyValue(result)
-                })
-            });
+        let node = self.inner.create_query(move |rt: &Runtime| -> PyValue {
+            Python::with_gil(|py| {
+                let rt_ref = Py::new(
+                    py,
+                    PyRuntimeRef {
+                        ptr: rt as *const _,
+                    },
+                )
+                .unwrap();
+                let result = py_func
+                    .call1(py, (rt_ref.clone_ref(py),))
+                    .expect("query function raised an exception");
+                // Invalidate the ref so it can't be used after callback returns.
+                rt_ref.bind(py).borrow_mut().ptr = std::ptr::null();
+                PyValue(result)
+            })
+        });
         PyNodeId { inner: node }
     }
 
     fn create_collection(&self) -> PyCollection {
         let col = self.inner.create_collection::<PyValue>();
-        let rt_ptr: *const incr_conc::Runtime = &self.inner;
+        let rt_ptr: *const Runtime = &self.inner;
         PyCollection { inner: col, rt_ptr }
     }
 
@@ -450,16 +469,11 @@ impl PyRuntime {
         self.inner.set_label(id, label);
     }
 
-    fn set_tracing(&self, enabled: bool) {
-        self.inner.set_tracing(enabled);
-    }
-
     fn get_traced(&self, node: PyNodeId) -> PyResult<(PyObject, PyObject)> {
-        let (val, trace): (PyValue, incr_conc::PropagationTrace) =
-            self.inner.get_traced(node.inner);
+        let (val, trace): (PyValue, PropagationTrace) = self.inner.get_traced(node.inner);
         Python::with_gil(|py| {
             let trace_dict = pyo3::types::PyDict::new(py);
-            trace_dict.set_item("target", trace.target)?;
+            trace_dict.set_item("target", trace.target.0)?;
             trace_dict.set_item("total_nodes", trace.total_nodes)?;
             trace_dict.set_item("nodes_recomputed", trace.nodes_recomputed)?;
             trace_dict.set_item("nodes_cutoff", trace.nodes_cutoff)?;
@@ -468,15 +482,15 @@ impl PyRuntime {
             let node_traces = pyo3::types::PyList::empty(py);
             for nt in &trace.node_traces {
                 let d = pyo3::types::PyDict::new(py);
-                d.set_item("id", nt.slot)?;
+                d.set_item("id", nt.id.0)?;
                 d.set_item(
                     "action",
                     match &nt.action {
-                        incr_conc::TraceAction::VerifiedClean => "verified_clean",
-                        incr_conc::TraceAction::Recomputed {
+                        TraceAction::VerifiedClean => "verified_clean",
+                        TraceAction::Recomputed {
                             value_changed: true,
                         } => "recomputed_changed",
-                        incr_conc::TraceAction::Recomputed {
+                        TraceAction::Recomputed {
                             value_changed: false,
                         } => "recomputed_cutoff",
                     },
@@ -495,17 +509,19 @@ impl PyRuntime {
             let result = pyo3::types::PyList::empty(py);
             for info in &infos {
                 let d = pyo3::types::PyDict::new(py);
-                d.set_item("id", info.slot)?;
+                d.set_item("id", info.id.0)?;
                 d.set_item(
                     "kind",
                     match info.kind {
-                        incr_conc::NodeKindInfo::Input => "input",
-                        incr_conc::NodeKindInfo::Compute => "compute",
+                        NodeKindInfo::Input => "input",
+                        NodeKindInfo::Compute => "compute",
                     },
                 )?;
                 d.set_item("label", &info.label)?;
-                d.set_item("dependencies", &info.dependencies)?;
-                d.set_item("dependents", &info.dependents)?;
+                let deps: Vec<u32> = info.dependencies.iter().map(|n: &NodeId| n.0).collect();
+                let depts: Vec<u32> = info.dependents.iter().map(|n: &NodeId| n.0).collect();
+                d.set_item("dependencies", deps)?;
+                d.set_item("dependents", depts)?;
                 result.append(d)?;
             }
             Ok(result.into_any().unbind())
diff --git a/crates/incr-concurrent/Cargo.toml b/crates/incr-concurrent/Cargo.toml
index fd5f983..1129d1c 100644
--- a/crates/incr-concurrent/Cargo.toml
+++ b/crates/incr-concurrent/Cargo.toml
@@ -1,47 +1,17 @@
 [package]
 name = "incr-concurrent"
-version = "0.1.0"
+version = "0.2.0-beta.1"
 edition = "2021"
-description = "Incremental computation engine with Send+Sync runtime for multi-threaded services"
+description = "Thread-safe incremental computation engine. Send+Sync runtime built on incr-core."
 license = "Apache-2.0"
 repository = "https://github.com/Anyesh/incr"
 keywords = ["incremental", "computation", "reactive", "concurrent", "dataflow"]
 categories = ["algorithms", "data-structures", "concurrency"]
 
 [dependencies]
-rustc-hash = "2"
+incr-core = { version = "0.2.0-beta.1", path = "../incr-core" }
 
 [dev-dependencies]
 proptest = "1"
 criterion = { version = "0.5", features = ["html_reports"] }
-salsa = "0.26"
-crossbeam-epoch = "0.9"
 rand = "0.8"
-
-[[bench]]
-name = "performance"
-harness = false
-
-[[bench]]
-name = "comparison"
-harness = false
-
-[[bench]]
-name = "collection_operators"
-harness = false
-
-[[bench]]
-name = "concurrency_primitives"
-harness = false
-
-[[bench]]
-name = "contended_concurrency"
-harness = false
-
-[[bench]]
-name = "regression"
-harness = false
-
-[[bench]]
-name = "concurrent_throughput"
-harness = false
diff --git a/crates/incr-concurrent/README.md b/crates/incr-concurrent/README.md
index 0114dee..c545f19 100644
--- a/crates/incr-concurrent/README.md
+++ b/crates/incr-concurrent/README.md
@@ -1,8 +1,8 @@
 # incr-concurrent
 
-Thread-safe incremental computation with `Send + Sync` runtime.
+Thread-safe incremental computation with `Send + Sync` runtime. Since 0.2, this crate is a thin re-export of [`incr-core`](https://crates.io/crates/incr-core) with the `Shared` strategy; the algorithm and operators live in the shared engine.
 
-`incr-concurrent` builds a reactive computation graph that can be shared across threads. One thread mutates inputs while any number of reader threads query derived values concurrently, with no contention on the reader path. Like `incr`, it only recomputes what actually changed and applies early cutoff to skip unnecessary downstream work. The tradeoff is roughly 1.6x slower single-threaded throughput in exchange for safe concurrent access.
+`incr-concurrent` builds a reactive computation graph that can be shared across threads. One thread mutates inputs while any number of reader threads query derived values concurrently. Under the hood every cell is the matching atomic type and state transitions use explicit Acquire/Release for visibility. On x86 (TSO) Acquire compiles to a plain `mov` with no fences, so the lock-free read path costs essentially nothing over the single-threaded variant. ARM/Apple Silicon pays one `dmb ld` per Acquire load, which is the unavoidable cost of cross-thread synchronization on a weak memory model.
 
 ## Install
 
@@ -27,7 +27,7 @@ rt.set(width, 10);
 assert_eq!(rt.get(area), 70);
 ```
 
-The API is identical to `incr`. Dependencies are tracked automatically when your query closure calls `rt.get`.
+The API is identical to `incr-compute`. Dependencies are tracked automatically when your query closure calls `rt.get`.
 
 ## Concurrent access
 
@@ -68,13 +68,13 @@ reader.join().unwrap();
 
 ## Collections
 
-Incremental collections work the same way as in `incr`, and the entire pipeline is `Send + Sync`.
+Incremental collections work the same way as in `incr-compute`, and the entire pipeline is `Send + Sync`.
 
 ```rust
-use incr_concurrent::{Runtime, IncrCollection};
+use incr_concurrent::{IncrCollection, Runtime};
 
 let rt = Runtime::new();
-let scores = rt.create_collection::<i64>();
+let scores: IncrCollection<i64> = rt.create_collection();
 
 scores.insert(&rt, 80);
 scores.insert(&rt, 95);
@@ -85,27 +85,17 @@ let passing = scores.filter(&rt, |s| *s >= 50);
 let curved = passing.map(&rt, |s| s + 10);
 let total = curved.reduce(&rt, |vals| vals.iter().sum::<i64>());
 
-assert_eq!(rt.get(total), 255); // (80+10) + (95+10) + (60+10)
+assert_eq!(rt.get(total), 265);
 ```
 
 ## All operators
 
-- **filter** keeps elements matching a predicate
-- **map** transforms each element
-- **count** tracks the number of elements
-- **reduce** folds all elements into a single value
-- **sort_by_key** produces a sorted view with positional deltas
-- **pairwise** emits consecutive pairs from a sorted collection
-- **group_by** partitions into keyed sub-collections
-- **join** pairs two collections on a shared key
-- **window** emits sliding windows of a given size from a sorted collection
+Same nine as `incr-compute`: filter, map, count, reduce, sort_by_key, pairwise, window, group_by, join. The `count` operator is incremental (O(1) per delta); `reduce` is snapshot-based; everything else is incremental on the delta log.
 
 ## When to use
 
-Use `incr-concurrent` when you need to share a computation graph across threads. If everything runs on a single thread, use [`incr`](https://crates.io/crates/incr) instead for better raw throughput.
+Use `incr-concurrent` when you need to share a computation graph across threads. If everything runs on a single thread, use [`incr-compute`](https://crates.io/crates/incr-compute) instead for the slightly faster uncontended path.
 
 ## Python
 
-```
-pip install incr-concurrent
-```
+Python bindings re-implement against the v0.2 engine in 0.3.
diff --git a/crates/incr-concurrent/benches/collection_operators.rs b/crates/incr-concurrent/benches/collection_operators.rs
deleted file mode 100644
index ebb4b8f..0000000
--- a/crates/incr-concurrent/benches/collection_operators.rs
+++ /dev/null
@@ -1,71 +0,0 @@
-use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion};
-use incr_concurrent::Runtime;
-
-/// Batch: sort N timestamps, compute pairwise gaps, sum them.
-fn batch_travel_premium(timestamps: &[i64]) -> i64 {
-    let mut sorted = timestamps.to_vec();
-    sorted.sort();
-    sorted.windows(2).map(|w| w[1] - w[0]).sum()
-}
-
-/// Set up an incremental pipeline with N elements already inserted.
-/// Returns (runtime, collection, reduce_node) ready for mutation benchmarks.
-fn setup_incremental(
-    n: usize,
-) -> (
-    Runtime,
-    incr_concurrent::IncrCollection<i64>,
-    incr_concurrent::Incr<i64>,
-) {
-    let rt = Runtime::new();
-    let col = rt.create_collection::<i64>();
-    let sorted = col.sort_by_key(&rt, |t: &i64| *t);
-    let pairs = sorted.pairwise(&rt);
-    let gaps = pairs.map(&rt, |(a, b): &(i64, i64)| b - a);
-    let total = gaps.reduce(&rt, |elements| -> i64 { elements.iter().sum() });
-
-    for i in 0..n {
-        col.insert(&rt, (i as i64) * 10);
-    }
-    // Warmup: stabilize the graph
-    let _ = rt.get(total);
-
-    (rt, col, total)
-}
-
-fn bench_incremental_vs_batch(c: &mut Criterion) {
-    let mut group = c.benchmark_group("travel_premium");
-
-    for &n in &[10, 20, 40, 100, 500, 1000, 5000] {
-        // Batch benchmark
-        let timestamps: Vec<i64> = (0..n).map(|i| (i as i64) * 10).collect();
-        group.bench_with_input(BenchmarkId::new("batch", n), &timestamps, |b, ts| {
-            b.iter(|| black_box(batch_travel_premium(ts)));
-        });
-
-        // Incremental benchmark: measure cost of changing one element and reading result
-        group.bench_with_input(BenchmarkId::new("incremental_change", n), &n, |b, &n| {
-            let (rt, col, total) = setup_incremental(n);
-            // Change the middle element back and forth
-            let mid = (n / 2) as i64 * 10;
-            let mut toggle = true;
-            b.iter(|| {
-                if toggle {
-                    col.delete(&rt, &mid);
-                    col.insert(&rt, mid + 1); // shift by 1
-                } else {
-                    col.delete(&rt, &(mid + 1));
-                    col.insert(&rt, mid); // shift back
-                }
-                let result = rt.get(total);
-                toggle = !toggle;
-                black_box(result)
-            });
-        });
-    }
-
-    group.finish();
-}
-
-criterion_group!(benches, bench_incremental_vs_batch);
-criterion_main!(benches);
diff --git a/crates/incr-concurrent/benches/comparison.rs b/crates/incr-concurrent/benches/comparison.rs
deleted file mode 100644
index 40fd6cd..0000000
--- a/crates/incr-concurrent/benches/comparison.rs
+++ /dev/null
@@ -1,295 +0,0 @@
-use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion};
-use incr_concurrent::Runtime;
-use salsa::Setter;
-
-// Workload 1: Linear chain (input → f1 → f2 → ... → fn). Measures per-node
-// propagation cost.
-
-fn incr_chain_propagate(
-    size: usize,
-) -> (
-    Runtime,
-    incr_concurrent::Incr<i64>,
-    incr_concurrent::Incr<i64>,
-) {
-    let rt = Runtime::new();
-    let input = rt.create_input(1_i64);
-    let mut prev = input;
-    for _ in 0..size {
-        let dep = prev;
-        prev = rt.create_query(move |rt| rt.get(dep).wrapping_add(1));
-    }
-    let _ = rt.get(prev);
-    (rt, input, prev)
-}
-
-fn salsa_chain_propagate(size: usize) -> (salsa::DatabaseImpl, SalsaInput, usize) {
-    let db = salsa::DatabaseImpl::new();
-    let input = SalsaInput::new(&db, 1_i64);
-    // Salsa doesn't support dynamic chains via closures.
-    // We measure the overhead of a single tracked function call instead,
-    // and multiply conceptually. The real benchmark is the per-query cost.
-    (db, input, size)
-}
-
-// Salsa types for comparison
-#[salsa::input]
-struct SalsaInput {
-    value: i64,
-}
-
-#[salsa::tracked]
-fn salsa_add_one(db: &dyn salsa::Database, input: SalsaInput) -> i64 {
-    input.value(db).wrapping_add(1)
-}
-
-#[salsa::tracked]
-fn salsa_chain_2(db: &dyn salsa::Database, input: SalsaInput) -> i64 {
-    salsa_add_one(db, input).wrapping_add(1)
-}
-
-#[salsa::tracked]
-fn salsa_chain_4(db: &dyn salsa::Database, input: SalsaInput) -> i64 {
-    let v = salsa_add_one(db, input);
-    let v = v.wrapping_add(1);
-    let v = v.wrapping_add(1);
-    v.wrapping_add(1)
-}
-
-// Workload 2: Diamond (input → [A, B] → output). Measures handling of
-// shared dependencies.
-
-fn incr_diamond_propagate() -> (
-    Runtime,
-    incr_concurrent::Incr<i64>,
-    incr_concurrent::Incr<i64>,
-) {
-    let rt = Runtime::new();
-    let input = rt.create_input(1_i64);
-    let a = {
-        let dep = input;
-        rt.create_query(move |rt| rt.get(dep).wrapping_add(10))
-    };
-    let b = {
-        let dep = input;
-        rt.create_query(move |rt| rt.get(dep).wrapping_add(100))
-    };
-    let output = rt.create_query(move |rt| rt.get(a).wrapping_add(rt.get(b)));
-    let _ = rt.get(output);
-    (rt, input, output)
-}
-
-#[salsa::tracked]
-fn salsa_diamond_a(db: &dyn salsa::Database, input: SalsaInput) -> i64 {
-    input.value(db).wrapping_add(10)
-}
-
-#[salsa::tracked]
-fn salsa_diamond_b(db: &dyn salsa::Database, input: SalsaInput) -> i64 {
-    input.value(db).wrapping_add(100)
-}
-
-#[salsa::tracked]
-fn salsa_diamond_out(db: &dyn salsa::Database, input: SalsaInput) -> i64 {
-    salsa_diamond_a(db, input).wrapping_add(salsa_diamond_b(db, input))
-}
-
-// Workload 3: Early cutoff (input → clamp → downstream). Measures whether
-// early cutoff prevents unnecessary work.
-
-#[salsa::tracked]
-fn salsa_clamp(db: &dyn salsa::Database, input: SalsaInput) -> i64 {
-    input.value(db).min(100)
-}
-
-#[salsa::tracked]
-fn salsa_after_clamp(db: &dyn salsa::Database, input: SalsaInput) -> i64 {
-    salsa_clamp(db, input).wrapping_add(1)
-}
-
-// Workload 4: Collection pipeline — insert into filter → map → count
-// Batch baseline: compute from scratch each time.
-
-fn batch_collection_insert(elements: &mut std::collections::HashSet<i64>, new_val: i64) -> usize {
-    elements.insert(new_val);
-    elements
-        .iter()
-        .filter(|x| *x % 2 == 0)
-        .map(|x| x * 2)
-        .count()
-}
-
-// Benchmarks
-
-fn bench_chain_comparison(c: &mut Criterion) {
-    let mut group = c.benchmark_group("chain_incr_vs_salsa");
-
-    // incr: propagate through chain
-    for size in [10, 100] {
-        group.bench_with_input(BenchmarkId::new("incr", size), &size, |b, &size| {
-            let (rt, input, output) = incr_chain_propagate(size);
-            let mut val = 1_i64;
-            b.iter(|| {
-                val += 1;
-                rt.set(input, val);
-                black_box(rt.get(output));
-            });
-        });
-    }
-
-    // salsa: single query re-evaluation (the comparable unit of work)
-    group.bench_function("salsa_single_query", |b| {
-        let mut db = salsa::DatabaseImpl::new();
-        let input = SalsaInput::new(&db, 1_i64);
-        let _ = salsa_add_one(&db, input);
-        let mut val = 1_i64;
-        b.iter(|| {
-            val += 1;
-            input.set_value(&mut db).to(val);
-            black_box(salsa_add_one(&db, input));
-        });
-    });
-
-    // salsa: 2-deep chain
-    group.bench_function("salsa_chain_2", |b| {
-        let mut db = salsa::DatabaseImpl::new();
-        let input = SalsaInput::new(&db, 1_i64);
-        let _ = salsa_chain_2(&db, input);
-        let mut val = 1_i64;
-        b.iter(|| {
-            val += 1;
-            input.set_value(&mut db).to(val);
-            black_box(salsa_chain_2(&db, input));
-        });
-    });
-
-    // salsa: 4-deep chain
-    group.bench_function("salsa_chain_4", |b| {
-        let mut db = salsa::DatabaseImpl::new();
-        let input = SalsaInput::new(&db, 1_i64);
-        let _ = salsa_chain_4(&db, input);
-        let mut val = 1_i64;
-        b.iter(|| {
-            val += 1;
-            input.set_value(&mut db).to(val);
-            black_box(salsa_chain_4(&db, input));
-        });
-    });
-
-    group.finish();
-}
-
-fn bench_diamond_comparison(c: &mut Criterion) {
-    let mut group = c.benchmark_group("diamond_incr_vs_salsa");
-
-    group.bench_function("incr", |b| {
-        let (rt, input, output) = incr_diamond_propagate();
-        let mut val = 1_i64;
-        b.iter(|| {
-            val += 1;
-            rt.set(input, val);
-            black_box(rt.get(output));
-        });
-    });
-
-    group.bench_function("salsa", |b| {
-        let mut db = salsa::DatabaseImpl::new();
-        let input = SalsaInput::new(&db, 1_i64);
-        let _ = salsa_diamond_out(&db, input);
-        let mut val = 1_i64;
-        b.iter(|| {
-            val += 1;
-            input.set_value(&mut db).to(val);
-            black_box(salsa_diamond_out(&db, input));
-        });
-    });
-
-    group.finish();
-}
-
-fn bench_early_cutoff_comparison(c: &mut Criterion) {
-    let mut group = c.benchmark_group("early_cutoff_incr_vs_salsa");
-
-    group.bench_function("incr", |b| {
-        let rt = Runtime::new();
-        let input = rt.create_input(200_i64);
-        let clamped = rt.create_query(move |rt| rt.get(input).min(100));
-        let after = rt.create_query(move |rt| rt.get(clamped).wrapping_add(1));
-        let _ = rt.get(after);
-
-        let mut val = 200_i64;
-        b.iter(|| {
-            val += 1;
-            rt.set(input, val); // Always > 100, clamp produces 100, early cutoff
-            black_box(rt.get(after));
-        });
-    });
-
-    group.bench_function("salsa", |b| {
-        let mut db = salsa::DatabaseImpl::new();
-        let input = SalsaInput::new(&db, 200_i64);
-        let _ = salsa_after_clamp(&db, input);
-
-        let mut val = 200_i64;
-        b.iter(|| {
-            val += 1;
-            input.set_value(&mut db).to(val);
-            black_box(salsa_after_clamp(&db, input));
-        });
-    });
-
-    group.finish();
-}
-
-fn bench_collection_vs_batch(c: &mut Criterion) {
-    let mut group = c.benchmark_group("collection_incr_vs_batch");
-
-    for size in [1_000, 10_000, 100_000] {
-        // incr: delta-based pipeline
-        group.bench_with_input(BenchmarkId::new("incr", size), &size, |b, &size| {
-            let rt = Runtime::new();
-            let col = rt.create_collection::<i64>();
-            let filtered = col.filter(&rt, |x| x % 2 == 0);
-            let mapped = filtered.map(&rt, |x| x * 2);
-            let count = mapped.count(&rt);
-
-            for i in 0..size {
-                col.insert(&rt, i);
-            }
-            let _ = rt.get(count);
-
-            let mut next = size;
-            b.iter(|| {
-                col.insert(&rt, next);
-                next += 1;
-                black_box(rt.get(count));
-            });
-        });
-
-        // batch: recompute from scratch
-        group.bench_with_input(BenchmarkId::new("batch", size), &size, |b, &size| {
-            let mut elements = std::collections::HashSet::new();
-            for i in 0..size {
-                elements.insert(i);
-            }
-
-            let mut next = size;
-            b.iter(|| {
-                let result = batch_collection_insert(&mut elements, next);
-                next += 1;
-                black_box(result);
-            });
-        });
-    }
-
-    group.finish();
-}
-
-criterion_group!(
-    comparison_benches,
-    bench_chain_comparison,
-    bench_diamond_comparison,
-    bench_early_cutoff_comparison,
-    bench_collection_vs_batch,
-);
-criterion_main!(comparison_benches);
diff --git a/crates/incr-concurrent/benches/concurrency_primitives.rs b/crates/incr-concurrent/benches/concurrency_primitives.rs
deleted file mode 100644
index e392a72..0000000
--- a/crates/incr-concurrent/benches/concurrency_primitives.rs
+++ /dev/null
@@ -1,649 +0,0 @@
-// crates/incr-concurrent/benches/concurrency_primitives.rs
-//
-// Microbenchmark: the cost of node access under different concurrency primitives.
-//
-// This benchmark is load-bearing for the architectural decision described in
-// devlogs/2026-04-05-core-design-values.md. The central question is whether a
-// unified Runtime whose uncontended single-threaded path pays near-zero overhead
-// for its concurrency primitives is feasible. If it is, incr gets one API for
-// both single-threaded and concurrent use, and the 175ns per-node budget survives.
-// If it is not, we split the runtime into local and shared variants, sharing
-// internals via generics over a concurrency strategy trait, so that neither side
-// subsidizes the other.
-//
-// The benchmark isolates the primitive cost by using a fixed u64 value field
-// (no Box<dyn Any>) and a representative 64-byte node layout, so the numbers
-// reflect concurrency-primitive cost rather than value-storage cost. The
-// Box<dyn Any> issue is a separate atom-level perf gap tracked in memory.
-//
-// Primitives compared (single-threaded, uncontended):
-//   1. Baseline      - Vec<Node>, direct field access, no sync. Theoretical floor.
-//   2. RefCell       - RefCell<Vec<Node>>, matches current engine.
-//   3. Atomic fields - each scalar is AtomicU64 with Relaxed ordering.
-//   4. Seqlock       - per-node version counter + relaxed atomic payload.
-//   5. Epoch         - crossbeam_epoch Atomic<Box<Node>>, pin-and-load reads.
-//
-// Workloads:
-//   - sequential_read : walk in order, read (state, value, verified, changed)
-//   - random_read     : read in a precomputed shuffled order
-//   - traversal       : follow a precomputed next-index chain, simulating ensure_clean
-//   - write_burst     : update state + verified_at for every node, simulating mark_dirty
-//
-// Sizes: 64 (L1), 1024 (L1 edge), 16384 (L2), 262144 (L3). Four sizes times four
-// workloads times five primitives is eighty measurements.
-
-use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion};
-use crossbeam_epoch::{self as epoch, Atomic, Owned};
-use rand::prelude::SliceRandom;
-use rand::SeedableRng;
-use std::cell::RefCell;
-use std::sync::atomic::{fence, AtomicU64, Ordering};
-
-// All node variants target ~64 bytes (one cache line) so that cache behavior
-// is comparable across variants.
-
-#[repr(C)]
-#[derive(Clone, Copy)]
-struct BaselineNode {
-    state: u64, // u8 widened to u64 to match atomic variant word size
-    value: u64,
-    verified_at: u64,
-    changed_at: u64,
-    _pad: [u64; 4], // 32 bytes of padding to simulate deps storage, total 64 bytes
-}
-
-impl BaselineNode {
-    fn new(i: u32) -> Self {
-        Self {
-            state: 0,
-            value: i as u64,
-            verified_at: i as u64,
-            changed_at: i as u64,
-            _pad: [0; 4],
-        }
-    }
-}
-
-#[repr(C)]
-struct AtomicNode {
-    state: AtomicU64,
-    value: AtomicU64,
-    verified_at: AtomicU64,
-    changed_at: AtomicU64,
-    _pad: [AtomicU64; 4],
-}
-
-impl AtomicNode {
-    fn new(i: u32) -> Self {
-        Self {
-            state: AtomicU64::new(0),
-            value: AtomicU64::new(i as u64),
-            verified_at: AtomicU64::new(i as u64),
-            changed_at: AtomicU64::new(i as u64),
-            _pad: [
-                AtomicU64::new(0),
-                AtomicU64::new(0),
-                AtomicU64::new(0),
-                AtomicU64::new(0),
-            ],
-        }
-    }
-}
-
-#[repr(C)]
-struct SeqlockNode {
-    version: AtomicU64,
-    state: AtomicU64,
-    value: AtomicU64,
-    verified_at: AtomicU64,
-    changed_at: AtomicU64,
-    _pad: [AtomicU64; 3],
-}
-
-impl SeqlockNode {
-    fn new(i: u32) -> Self {
-        Self {
-            version: AtomicU64::new(0),
-            state: AtomicU64::new(0),
-            value: AtomicU64::new(i as u64),
-            verified_at: AtomicU64::new(i as u64),
-            changed_at: AtomicU64::new(i as u64),
-            _pad: [AtomicU64::new(0), AtomicU64::new(0), AtomicU64::new(0)],
-        }
-    }
-
-    /// Seqlock read: returns (state, value, verified_at, changed_at) consistent
-    /// as-of a single version. Retries if a concurrent writer is mid-update.
-    /// On an uncontended single-threaded path, the retry never fires.
-    #[inline(always)]
-    fn read_all(&self) -> (u64, u64, u64, u64) {
-        loop {
-            let v1 = self.version.load(Ordering::Acquire);
-            if v1 & 1 != 0 {
-                // Writer in progress
-                std::hint::spin_loop();
-                continue;
-            }
-            let state = self.state.load(Ordering::Relaxed);
-            let value = self.value.load(Ordering::Relaxed);
-            let verified_at = self.verified_at.load(Ordering::Relaxed);
-            let changed_at = self.changed_at.load(Ordering::Relaxed);
-            fence(Ordering::Acquire);
-            let v2 = self.version.load(Ordering::Relaxed);
-            if v1 == v2 {
-                return (state, value, verified_at, changed_at);
-            }
-        }
-    }
-
-    /// Seqlock write: bumps version odd, writes fields, bumps version even.
-    #[inline(always)]
-    fn write_state_verified(&self, state: u64, verified_at: u64) {
-        let v = self.version.load(Ordering::Relaxed);
-        self.version.store(v.wrapping_add(1), Ordering::Release);
-        self.state.store(state, Ordering::Relaxed);
-        self.verified_at.store(verified_at, Ordering::Relaxed);
-        self.version.store(v.wrapping_add(2), Ordering::Release);
-    }
-}
-
-/// Epoch variant. The payload lives in a heap allocation behind an Atomic<T>.
-/// Readers pin an epoch and load the pointer. Writers allocate a new payload
-/// and CAS it in, deferring destruction of the old one until all readers have
-/// advanced past the current epoch. This is the crossbeam_epoch pattern.
-#[repr(C)]
-struct EpochSlot {
-    payload: Atomic<BaselineNode>,
-}
-
-impl EpochSlot {
-    fn new(i: u32) -> Self {
-        Self {
-            payload: Atomic::new(BaselineNode::new(i)),
-        }
-    }
-}
-
-fn build_baseline(n: usize) -> Vec<BaselineNode> {
-    (0..n as u32).map(BaselineNode::new).collect()
-}
-
-fn build_refcell(n: usize) -> RefCell<Vec<BaselineNode>> {
-    RefCell::new(build_baseline(n))
-}
-
-fn build_atomic(n: usize) -> Vec<AtomicNode> {
-    (0..n as u32).map(AtomicNode::new).collect()
-}
-
-fn build_seqlock(n: usize) -> Vec<SeqlockNode> {
-    (0..n as u32).map(SeqlockNode::new).collect()
-}
-
-fn build_epoch(n: usize) -> Vec<EpochSlot> {
-    (0..n as u32).map(EpochSlot::new).collect()
-}
-
-// Precomputed access orders, built once outside the bench loop so that RNG
-// cost and chain-building cost do not pollute the measurement.
-
-fn shuffled_indices(n: usize, seed: u64) -> Vec<usize> {
-    let mut idx: Vec<usize> = (0..n).collect();
-    let mut rng = rand::rngs::StdRng::seed_from_u64(seed);
-    idx.shuffle(&mut rng);
-    idx
-}
-
-/// Build a "next index" chain: traversal_chain[i] gives the index to visit after i.
-/// Forms a single cycle touching every element exactly once. This simulates
-/// the linked traversal pattern of ensure_clean walking dependencies.
-fn traversal_chain(n: usize, seed: u64) -> Vec<usize> {
-    let shuffled = shuffled_indices(n, seed);
-    let mut chain = vec![0usize; n];
-    for i in 0..n {
-        chain[shuffled[i]] = shuffled[(i + 1) % n];
-    }
-    chain
-}
-
-#[inline(always)]
-fn seq_read_baseline(nodes: &[BaselineNode]) -> u64 {
-    let mut acc: u64 = 0;
-    for n in nodes {
-        acc = acc
-            .wrapping_add(n.state)
-            .wrapping_add(n.value)
-            .wrapping_add(n.verified_at)
-            .wrapping_add(n.changed_at);
-    }
-    acc
-}
-
-#[inline(always)]
-fn seq_read_refcell(nodes: &RefCell<Vec<BaselineNode>>) -> u64 {
-    let borrowed = nodes.borrow();
-    let mut acc: u64 = 0;
-    for n in borrowed.iter() {
-        acc = acc
-            .wrapping_add(n.state)
-            .wrapping_add(n.value)
-            .wrapping_add(n.verified_at)
-            .wrapping_add(n.changed_at);
-    }
-    acc
-}
-
-#[inline(always)]
-fn seq_read_atomic(nodes: &[AtomicNode]) -> u64 {
-    let mut acc: u64 = 0;
-    for n in nodes {
-        acc = acc
-            .wrapping_add(n.state.load(Ordering::Relaxed))
-            .wrapping_add(n.value.load(Ordering::Relaxed))
-            .wrapping_add(n.verified_at.load(Ordering::Relaxed))
-            .wrapping_add(n.changed_at.load(Ordering::Relaxed));
-    }
-    acc
-}
-
-#[inline(always)]
-fn seq_read_seqlock(nodes: &[SeqlockNode]) -> u64 {
-    let mut acc: u64 = 0;
-    for n in nodes {
-        let (s, v, ver, ch) = n.read_all();
-        acc = acc
-            .wrapping_add(s)
-            .wrapping_add(v)
-            .wrapping_add(ver)
-            .wrapping_add(ch);
-    }
-    acc
-}
-
-#[inline(always)]
-fn seq_read_epoch(nodes: &[EpochSlot]) -> u64 {
-    let guard = &epoch::pin();
-    let mut acc: u64 = 0;
-    for slot in nodes {
-        // Safety: the epoch pin guarantees the pointee is valid for the read
-        let shared = slot.payload.load(Ordering::Acquire, guard);
-        let node = unsafe { shared.deref() };
-        acc = acc
-            .wrapping_add(node.state)
-            .wrapping_add(node.value)
-            .wrapping_add(node.verified_at)
-            .wrapping_add(node.changed_at);
-    }
-    acc
-}
-
-#[inline(always)]
-fn rand_read_baseline(nodes: &[BaselineNode], order: &[usize]) -> u64 {
-    let mut acc: u64 = 0;
-    for &i in order {
-        let n = &nodes[i];
-        acc = acc
-            .wrapping_add(n.state)
-            .wrapping_add(n.value)
-            .wrapping_add(n.verified_at)
-            .wrapping_add(n.changed_at);
-    }
-    acc
-}
-
-#[inline(always)]
-fn rand_read_refcell(nodes: &RefCell<Vec<BaselineNode>>, order: &[usize]) -> u64 {
-    let borrowed = nodes.borrow();
-    let mut acc: u64 = 0;
-    for &i in order {
-        let n = &borrowed[i];
-        acc = acc
-            .wrapping_add(n.state)
-            .wrapping_add(n.value)
-            .wrapping_add(n.verified_at)
-            .wrapping_add(n.changed_at);
-    }
-    acc
-}
-
-#[inline(always)]
-fn rand_read_atomic(nodes: &[AtomicNode], order: &[usize]) -> u64 {
-    let mut acc: u64 = 0;
-    for &i in order {
-        let n = &nodes[i];
-        acc = acc
-            .wrapping_add(n.state.load(Ordering::Relaxed))
-            .wrapping_add(n.value.load(Ordering::Relaxed))
-            .wrapping_add(n.verified_at.load(Ordering::Relaxed))
-            .wrapping_add(n.changed_at.load(Ordering::Relaxed));
-    }
-    acc
-}
-
-#[inline(always)]
-fn rand_read_seqlock(nodes: &[SeqlockNode], order: &[usize]) -> u64 {
-    let mut acc: u64 = 0;
-    for &i in order {
-        let (s, v, ver, ch) = nodes[i].read_all();
-        acc = acc
-            .wrapping_add(s)
-            .wrapping_add(v)
-            .wrapping_add(ver)
-            .wrapping_add(ch);
-    }
-    acc
-}
-
-#[inline(always)]
-fn rand_read_epoch(nodes: &[EpochSlot], order: &[usize]) -> u64 {
-    let guard = &epoch::pin();
-    let mut acc: u64 = 0;
-    for &i in order {
-        let shared = nodes[i].payload.load(Ordering::Acquire, guard);
-        let node = unsafe { shared.deref() };
-        acc = acc
-            .wrapping_add(node.state)
-            .wrapping_add(node.value)
-            .wrapping_add(node.verified_at)
-            .wrapping_add(node.changed_at);
-    }
-    acc
-}
-
-// Traversal walks a precomputed chain of next-indices, simulating the
-// ensure_clean pattern where each node read leads to another. The chain
-// visits every node exactly once.
-
-#[inline(always)]
-fn traversal_baseline(nodes: &[BaselineNode], chain: &[usize], start: usize) -> u64 {
-    let mut i = start;
-    let mut acc: u64 = 0;
-    for _ in 0..nodes.len() {
-        let n = &nodes[i];
-        acc = acc.wrapping_add(n.state).wrapping_add(n.value);
-        i = chain[i];
-    }
-    acc
-}
-
-#[inline(always)]
-fn traversal_refcell(nodes: &RefCell<Vec<BaselineNode>>, chain: &[usize], start: usize) -> u64 {
-    let borrowed = nodes.borrow();
-    let mut i = start;
-    let mut acc: u64 = 0;
-    for _ in 0..borrowed.len() {
-        let n = &borrowed[i];
-        acc = acc.wrapping_add(n.state).wrapping_add(n.value);
-        i = chain[i];
-    }
-    acc
-}
-
-#[inline(always)]
-fn traversal_atomic(nodes: &[AtomicNode], chain: &[usize], start: usize) -> u64 {
-    let mut i = start;
-    let mut acc: u64 = 0;
-    for _ in 0..nodes.len() {
-        let n = &nodes[i];
-        acc = acc
-            .wrapping_add(n.state.load(Ordering::Relaxed))
-            .wrapping_add(n.value.load(Ordering::Relaxed));
-        i = chain[i];
-    }
-    acc
-}
-
-#[inline(always)]
-fn traversal_seqlock(nodes: &[SeqlockNode], chain: &[usize], start: usize) -> u64 {
-    let mut i = start;
-    let mut acc: u64 = 0;
-    for _ in 0..nodes.len() {
-        let (s, v, _, _) = nodes[i].read_all();
-        acc = acc.wrapping_add(s).wrapping_add(v);
-        i = chain[i];
-    }
-    acc
-}
-
-#[inline(always)]
-fn traversal_epoch(nodes: &[EpochSlot], chain: &[usize], start: usize) -> u64 {
-    let guard = &epoch::pin();
-    let mut i = start;
-    let mut acc: u64 = 0;
-    for _ in 0..nodes.len() {
-        let shared = nodes[i].payload.load(Ordering::Acquire, guard);
-        let node = unsafe { shared.deref() };
-        acc = acc.wrapping_add(node.state).wrapping_add(node.value);
-        i = chain[i];
-    }
-    acc
-}
-
-// Write Burst simulates mark_dirty: iterate and update the state and
-// verified_at fields of every node.
-
-#[inline(always)]
-fn write_burst_baseline(nodes: &mut [BaselineNode], rev: u64) {
-    for n in nodes {
-        n.state = 1;
-        n.verified_at = rev;
-    }
-}
-
-#[inline(always)]
-fn write_burst_refcell(nodes: &RefCell<Vec<BaselineNode>>, rev: u64) {
-    let mut borrowed = nodes.borrow_mut();
-    for n in borrowed.iter_mut() {
-        n.state = 1;
-        n.verified_at = rev;
-    }
-}
-
-#[inline(always)]
-fn write_burst_atomic(nodes: &[AtomicNode], rev: u64) {
-    for n in nodes {
-        n.state.store(1, Ordering::Relaxed);
-        n.verified_at.store(rev, Ordering::Relaxed);
-    }
-}
-
-#[inline(always)]
-fn write_burst_seqlock(nodes: &[SeqlockNode], rev: u64) {
-    for n in nodes {
-        n.write_state_verified(1, rev);
-    }
-}
-
-#[inline(always)]
-fn write_burst_epoch(nodes: &[EpochSlot], rev: u64) {
-    // Pin per-node rather than per-call. A single long-held pin accumulates
-    // deferred destroys in the thread-local garbage bag without allowing
-    // reclamation, which causes runaway memory growth under sustained writes.
-    // Per-node pinning lets the global epoch advance between operations so
-    // reclamation keeps up, and it reflects the realistic cost of using
-    // crossbeam_epoch for a write-heavy node store.
-    for slot in nodes {
-        let guard = &epoch::pin();
-        let current = slot.payload.load(Ordering::Acquire, guard);
-        let current_ref = unsafe { current.deref() };
-        let mut new_node = *current_ref;
-        new_node.state = 1;
-        new_node.verified_at = rev;
-        let new_owned = Owned::new(new_node);
-        match slot.payload.compare_exchange(
-            current,
-            new_owned,
-            Ordering::AcqRel,
-            Ordering::Acquire,
-            guard,
-        ) {
-            Ok(_) => unsafe {
-                guard.defer_destroy(current);
-            },
-            Err(_) => {
-                // Under contention the CAS would fail; uncontended it will not.
-                // If it does fail, we skip rather than retry for benchmark
-                // consistency (we are measuring uncontended cost).
-            }
-        }
-    }
-}
-
-// Sizes chosen to span cache hierarchy: 64 nodes (~4KB, L1), 1024 (~64KB, L1 edge),
-// 16384 (~1MB, L2), 65536 (~4MB, L3). The top size was originally 262144 (~16MB)
-// but the epoch write variant produced runaway garbage at that scale even with
-// per-node pinning, so 65536 is the honest "beyond L2" signal without distorting
-// other measurements through memory pressure.
-const SIZES: &[usize] = &[64, 1024, 16384, 65536];
-
-fn bench_sequential_read(c: &mut Criterion) {
-    let mut group = c.benchmark_group("sequential_read");
-
-    for &size in SIZES {
-        let baseline = build_baseline(size);
-        let refcell = build_refcell(size);
-        let atomic = build_atomic(size);
-        let seqlock = build_seqlock(size);
-        let epoch_nodes = build_epoch(size);
-
-        group.bench_with_input(BenchmarkId::new("baseline", size), &size, |b, _| {
-            b.iter(|| black_box(seq_read_baseline(&baseline)));
-        });
-        group.bench_with_input(BenchmarkId::new("refcell", size), &size, |b, _| {
-            b.iter(|| black_box(seq_read_refcell(&refcell)));
-        });
-        group.bench_with_input(BenchmarkId::new("atomic", size), &size, |b, _| {
-            b.iter(|| black_box(seq_read_atomic(&atomic)));
-        });
-        group.bench_with_input(BenchmarkId::new("seqlock", size), &size, |b, _| {
-            b.iter(|| black_box(seq_read_seqlock(&seqlock)));
-        });
-        group.bench_with_input(BenchmarkId::new("epoch", size), &size, |b, _| {
-            b.iter(|| black_box(seq_read_epoch(&epoch_nodes)));
-        });
-    }
-
-    group.finish();
-}
-
-fn bench_random_read(c: &mut Criterion) {
-    let mut group = c.benchmark_group("random_read");
-
-    for &size in SIZES {
-        let baseline = build_baseline(size);
-        let refcell = build_refcell(size);
-        let atomic = build_atomic(size);
-        let seqlock = build_seqlock(size);
-        let epoch_nodes = build_epoch(size);
-        let order = shuffled_indices(size, 0xdeadbeef);
-
-        group.bench_with_input(BenchmarkId::new("baseline", size), &size, |b, _| {
-            b.iter(|| black_box(rand_read_baseline(&baseline, &order)));
-        });
-        group.bench_with_input(BenchmarkId::new("refcell", size), &size, |b, _| {
-            b.iter(|| black_box(rand_read_refcell(&refcell, &order)));
-        });
-        group.bench_with_input(BenchmarkId::new("atomic", size), &size, |b, _| {
-            b.iter(|| black_box(rand_read_atomic(&atomic, &order)));
-        });
-        group.bench_with_input(BenchmarkId::new("seqlock", size), &size, |b, _| {
-            b.iter(|| black_box(rand_read_seqlock(&seqlock, &order)));
-        });
-        group.bench_with_input(BenchmarkId::new("epoch", size), &size, |b, _| {
-            b.iter(|| black_box(rand_read_epoch(&epoch_nodes, &order)));
-        });
-    }
-
-    group.finish();
-}
-
-fn bench_traversal(c: &mut Criterion) {
-    let mut group = c.benchmark_group("traversal");
-
-    for &size in SIZES {
-        let baseline = build_baseline(size);
-        let refcell = build_refcell(size);
-        let atomic = build_atomic(size);
-        let seqlock = build_seqlock(size);
-        let epoch_nodes = build_epoch(size);
-        let chain = traversal_chain(size, 0xcafef00d);
-
-        group.bench_with_input(BenchmarkId::new("baseline", size), &size, |b, _| {
-            b.iter(|| black_box(traversal_baseline(&baseline, &chain, 0)));
-        });
-        group.bench_with_input(BenchmarkId::new("refcell", size), &size, |b, _| {
-            b.iter(|| black_box(traversal_refcell(&refcell, &chain, 0)));
-        });
-        group.bench_with_input(BenchmarkId::new("atomic", size), &size, |b, _| {
-            b.iter(|| black_box(traversal_atomic(&atomic, &chain, 0)));
-        });
-        group.bench_with_input(BenchmarkId::new("seqlock", size), &size, |b, _| {
-            b.iter(|| black_box(traversal_seqlock(&seqlock, &chain, 0)));
-        });
-        group.bench_with_input(BenchmarkId::new("epoch", size), &size, |b, _| {
-            b.iter(|| black_box(traversal_epoch(&epoch_nodes, &chain, 0)));
-        });
-    }
-
-    group.finish();
-}
-
-fn bench_write_burst(c: &mut Criterion) {
-    let mut group = c.benchmark_group("write_burst");
-
-    for &size in SIZES {
-        let mut baseline = build_baseline(size);
-        let refcell = build_refcell(size);
-        let atomic = build_atomic(size);
-        let seqlock = build_seqlock(size);
-        let epoch_nodes = build_epoch(size);
-        let mut rev: u64 = 1;
-
-        group.bench_with_input(BenchmarkId::new("baseline", size), &size, |b, _| {
-            b.iter(|| {
-                rev = rev.wrapping_add(1);
-                write_burst_baseline(&mut baseline, rev);
-                black_box(&baseline);
-            });
-        });
-        group.bench_with_input(BenchmarkId::new("refcell", size), &size, |b, _| {
-            b.iter(|| {
-                rev = rev.wrapping_add(1);
-                write_burst_refcell(&refcell, rev);
-                black_box(&refcell);
-            });
-        });
-        group.bench_with_input(BenchmarkId::new("atomic", size), &size, |b, _| {
-            b.iter(|| {
-                rev = rev.wrapping_add(1);
-                write_burst_atomic(&atomic, rev);
-                black_box(&atomic);
-            });
-        });
-        group.bench_with_input(BenchmarkId::new("seqlock", size), &size, |b, _| {
-            b.iter(|| {
-                rev = rev.wrapping_add(1);
-                write_burst_seqlock(&seqlock, rev);
-                black_box(&seqlock);
-            });
-        });
-        group.bench_with_input(BenchmarkId::new("epoch", size), &size, |b, _| {
-            b.iter(|| {
-                rev = rev.wrapping_add(1);
-                write_burst_epoch(&epoch_nodes, rev);
-                black_box(&epoch_nodes);
-            });
-        });
-    }
-
-    group.finish();
-}
-
-criterion_group!(
-    benches,
-    bench_sequential_read,
-    bench_random_read,
-    bench_traversal,
-    bench_write_burst
-);
-criterion_main!(benches);
diff --git a/crates/incr-concurrent/benches/concurrent_throughput.rs b/crates/incr-concurrent/benches/concurrent_throughput.rs
deleted file mode 100644
index 8ea46f6..0000000
--- a/crates/incr-concurrent/benches/concurrent_throughput.rs
+++ /dev/null
@@ -1,66 +0,0 @@
-use criterion::{criterion_group, criterion_main, Criterion};
-use incr_concurrent::Runtime;
-use std::sync::Arc;
-use std::thread;
-
-fn concurrent_reads(c: &mut Criterion) {
-    let rt = Arc::new(Runtime::new());
-    let input = rt.create_input(42_u64);
-    let query = rt.create_query(move |rt| rt.get(input) * 2);
-    let _ = rt.get(query);
-
-    for n_readers in [1, 2, 4, 8] {
-        c.bench_function(&format!("concurrent_reads_{n_readers}"), |b| {
-            b.iter(|| {
-                let handles: Vec<_> = (0..n_readers)
-                    .map(|_| {
-                        let rt = rt.clone();
-                        thread::spawn(move || {
-                            for _ in 0..1000 {
-                                let _ = rt.get(query);
-                            }
-                        })
-                    })
-                    .collect();
-                for h in handles {
-                    h.join().unwrap();
-                }
-            })
-        });
-    }
-}
-
-fn concurrent_read_write(c: &mut Criterion) {
-    let rt = Arc::new(Runtime::new());
-    let input = rt.create_input(0_u64);
-    let query = rt.create_query(move |rt| rt.get(input) + 1);
-    let _ = rt.get(query);
-
-    c.bench_function("concurrent_read_write_4readers", |b| {
-        b.iter(|| {
-            let writer_rt = rt.clone();
-            let writer = thread::spawn(move || {
-                for i in 0..100u64 {
-                    writer_rt.set(input, i);
-                }
-            });
-            let readers: Vec<_> = (0..4)
-                .map(|_| {
-                    let rt = rt.clone();
-                    thread::spawn(move || {
-                        for _ in 0..1000 {
-                            let _ = rt.get(query);
-                        }
-                    })
-                })
-                .collect();
-            writer.join().unwrap();
-            for r in readers {
-                r.join().unwrap();
-            }
-        })
-    });
-}
-
-criterion_group!(benches, concurrent_reads, concurrent_read_write);
-criterion_main!(benches);
diff --git a/crates/incr-concurrent/benches/contended_concurrency.rs b/crates/incr-concurrent/benches/contended_concurrency.rs
deleted file mode 100644
index 2572a00..0000000
--- a/crates/incr-concurrent/benches/contended_concurrency.rs
+++ /dev/null
@@ -1,271 +0,0 @@
-// crates/incr-concurrent/benches/contended_concurrency.rs
-//
-// Multi-threaded contended benchmark for atomic-field node access.
-//
-// The single-threaded benchmark (concurrency_primitives.rs) established that
-// atomic fields with Relaxed ordering match baseline direct access within 1%
-// on the uncontended path. This benchmark tests whether that design holds up
-// under real contention: whether readers scale linearly, whether partitioned
-// writers stay cheap, and whether the Release/Acquire publish pattern used
-// for completing a compute and publishing a value stays cheap as readers and
-// writers multiply.
-//
-// Workloads:
-//   read_scaling       - N threads reading all nodes. Should be near-linear.
-//   partitioned_write  - N threads writing disjoint partitions. Should also be
-//                        near-linear because no cache line is shared.
-//   overlapping_write  - N threads writing the same nodes. Pessimistic case;
-//                        measures cache-coherence cost under contention.
-//   acquire_release    - All threads read via Acquire loads and occasionally
-//                        publish via Release stores. Realistic pattern for
-//                        completing a compute and publishing new value.
-//
-// Timing approach: criterion iter_custom with std::thread::scope. The scope
-// blocks until all spawned threads complete, which means we time the full
-// "spawn + barrier + iters of work + join" sequence. For iter counts large
-// enough to give criterion its ~50ms sample window, the thread-spawn overhead
-// (~50 microseconds for 8 threads) is under 0.2% of sample time and does not
-// distort the measurement.
-//
-// Node shape: AtomicNode (64 bytes, AtomicU64 fields). Size: 16384 nodes
-// (~1 MB, L2 resident on the dev machine i7-9750H). Thread counts: 1, 2, 4, 8.
-
-use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion};
-use std::sync::atomic::{AtomicU64, Ordering};
-use std::sync::{Arc, Barrier};
-use std::thread;
-use std::time::{Duration, Instant};
-
-#[repr(C)]
-struct AtomicNode {
-    state: AtomicU64,
-    value: AtomicU64,
-    verified_at: AtomicU64,
-    changed_at: AtomicU64,
-    _pad: [AtomicU64; 4],
-}
-
-impl AtomicNode {
-    fn new(i: u32) -> Self {
-        Self {
-            state: AtomicU64::new(0),
-            value: AtomicU64::new(i as u64),
-            verified_at: AtomicU64::new(i as u64),
-            changed_at: AtomicU64::new(i as u64),
-            _pad: [
-                AtomicU64::new(0),
-                AtomicU64::new(0),
-                AtomicU64::new(0),
-                AtomicU64::new(0),
-            ],
-        }
-    }
-}
-
-fn build_nodes(n: usize) -> Arc<Vec<AtomicNode>> {
-    Arc::new((0..n as u32).map(AtomicNode::new).collect())
-}
-
-// Workload functions all operate on a shared &[AtomicNode] and return an
-// aggregated u64 (or nothing for writes) to prevent the compiler from
-// eliminating the work.
-
-#[inline(always)]
-fn read_all_relaxed(nodes: &[AtomicNode]) -> u64 {
-    let mut acc: u64 = 0;
-    for n in nodes {
-        acc = acc
-            .wrapping_add(n.state.load(Ordering::Relaxed))
-            .wrapping_add(n.value.load(Ordering::Relaxed))
-            .wrapping_add(n.verified_at.load(Ordering::Relaxed))
-            .wrapping_add(n.changed_at.load(Ordering::Relaxed));
-    }
-    acc
-}
-
-#[inline(always)]
-fn write_partition_relaxed(
-    nodes: &[AtomicNode],
-    partition: usize,
-    num_partitions: usize,
-    rev: u64,
-) {
-    let start = (nodes.len() * partition) / num_partitions;
-    let end = (nodes.len() * (partition + 1)) / num_partitions;
-    for n in &nodes[start..end] {
-        n.state.store(1, Ordering::Relaxed);
-        n.verified_at.store(rev, Ordering::Relaxed);
-    }
-}
-
-#[inline(always)]
-fn write_all_relaxed(nodes: &[AtomicNode], rev: u64) {
-    for n in nodes {
-        n.state.store(1, Ordering::Relaxed);
-        n.verified_at.store(rev, Ordering::Relaxed);
-    }
-}
-
-#[inline(always)]
-fn consume_value_acquire(nodes: &[AtomicNode]) -> u64 {
-    let mut acc: u64 = 0;
-    for n in nodes {
-        let state = n.state.load(Ordering::Acquire);
-        if state != 0 {
-            acc = acc.wrapping_add(n.value.load(Ordering::Relaxed));
-        }
-    }
-    acc
-}
-
-#[inline(always)]
-fn publish_value_release(nodes: &[AtomicNode], rev: u64) {
-    for n in nodes {
-        n.value.store(rev, Ordering::Relaxed);
-        n.state.store(1, Ordering::Release);
-    }
-}
-
-// Generic parallel-timing harness: takes a closure that receives the thread
-// index and does one iteration of work. Returns the total wall-clock duration
-// for `iters` iterations across `threads` threads, all starting at a barrier.
-
-fn run_parallel<F>(iters: u64, threads: usize, work: F) -> Duration
-where
-    F: Fn(usize) + Sync,
-{
-    let barrier = Barrier::new(threads);
-    let work_ref = &work;
-    let barrier_ref = &barrier;
-    let start = Instant::now();
-    thread::scope(|s| {
-        for t in 0..threads {
-            s.spawn(move || {
-                barrier_ref.wait();
-                for _ in 0..iters {
-                    work_ref(t);
-                }
-            });
-        }
-    });
-    start.elapsed()
-}
-
-const SIZE: usize = 16384;
-const THREAD_COUNTS: &[usize] = &[1, 2, 4, 8];
-
-fn bench_read_scaling(c: &mut Criterion) {
-    let mut group = c.benchmark_group("read_scaling");
-    group.sample_size(30);
-
-    for &threads in THREAD_COUNTS {
-        let nodes = build_nodes(SIZE);
-        group.bench_with_input(
-            BenchmarkId::from_parameter(threads),
-            &threads,
-            |b, &threads| {
-                b.iter_custom(|iters| {
-                    let nodes_ref: &Vec<AtomicNode> = &nodes;
-                    run_parallel(iters, threads, |_t| {
-                        black_box(read_all_relaxed(nodes_ref));
-                    })
-                });
-            },
-        );
-    }
-    group.finish();
-}
-
-fn bench_partitioned_write(c: &mut Criterion) {
-    let mut group = c.benchmark_group("partitioned_write");
-    group.sample_size(30);
-
-    for &threads in THREAD_COUNTS {
-        let nodes = build_nodes(SIZE);
-        group.bench_with_input(
-            BenchmarkId::from_parameter(threads),
-            &threads,
-            |b, &threads| {
-                let mut rev: u64 = 1;
-                b.iter_custom(|iters| {
-                    let nodes_ref: &Vec<AtomicNode> = &nodes;
-                    rev = rev.wrapping_add(1);
-                    let current_rev = rev;
-                    run_parallel(iters, threads, move |t| {
-                        write_partition_relaxed(nodes_ref, t, threads, current_rev);
-                    })
-                });
-            },
-        );
-    }
-    group.finish();
-}
-
-fn bench_overlapping_write(c: &mut Criterion) {
-    let mut group = c.benchmark_group("overlapping_write");
-    group.sample_size(30);
-
-    for &threads in THREAD_COUNTS {
-        let nodes = build_nodes(SIZE);
-        group.bench_with_input(
-            BenchmarkId::from_parameter(threads),
-            &threads,
-            |b, &threads| {
-                let mut rev: u64 = 1;
-                b.iter_custom(|iters| {
-                    let nodes_ref: &Vec<AtomicNode> = &nodes;
-                    rev = rev.wrapping_add(1);
-                    let current_rev = rev;
-                    run_parallel(iters, threads, move |_t| {
-                        write_all_relaxed(nodes_ref, current_rev);
-                    })
-                });
-            },
-        );
-    }
-    group.finish();
-}
-
-/// Acquire/Release publish pattern. One thread (index 0) acts as the publisher,
-/// calling publish_value_release on every iteration. The remaining N-1 threads
-/// act as consumers, calling consume_value_acquire. This measures the realistic
-/// cost of the pattern incr will use for completing a compute and publishing
-/// the new value to downstream readers. When threads==1 we measure the writer
-/// alone; when threads>1 we measure the writer plus N-1 readers.
-fn bench_acquire_release(c: &mut Criterion) {
-    let mut group = c.benchmark_group("acquire_release");
-    group.sample_size(30);
-
-    for &threads in THREAD_COUNTS {
-        let nodes = build_nodes(SIZE);
-        group.bench_with_input(
-            BenchmarkId::from_parameter(threads),
-            &threads,
-            |b, &threads| {
-                let mut rev: u64 = 1;
-                b.iter_custom(|iters| {
-                    let nodes_ref: &Vec<AtomicNode> = &nodes;
-                    rev = rev.wrapping_add(1);
-                    let current_rev = rev;
-                    run_parallel(iters, threads, move |t| {
-                        if t == 0 {
-                            publish_value_release(nodes_ref, current_rev);
-                        } else {
-                            black_box(consume_value_acquire(nodes_ref));
-                        }
-                    })
-                });
-            },
-        );
-    }
-    group.finish();
-}
-
-criterion_group!(
-    benches,
-    bench_read_scaling,
-    bench_partitioned_write,
-    bench_overlapping_write,
-    bench_acquire_release
-);
-criterion_main!(benches);
diff --git a/crates/incr-concurrent/benches/performance.rs b/crates/incr-concurrent/benches/performance.rs
deleted file mode 100644
index 195e3d1..0000000
--- a/crates/incr-concurrent/benches/performance.rs
+++ /dev/null
@@ -1,275 +0,0 @@
-// crates/incr-concurrent/benches/performance.rs
-use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion};
-use incr_concurrent::{Incr, Runtime};
-
-/// Build a linear chain: input -> n1 -> n2 -> ... -> output
-fn build_chain(size: usize) -> (Runtime, Incr<i64>, Incr<i64>) {
-    let rt = Runtime::new();
-    let input = rt.create_input(1_i64);
-    let mut prev: Incr<i64> = input;
-    for _ in 0..size {
-        let dep = prev;
-        prev = rt.create_query(move |rt| rt.get(dep).wrapping_add(1));
-    }
-    let _ = rt.get(prev);
-    (rt, input, prev)
-}
-
-/// Build a wide fan-out: input -> [n1, n2, ..., n_width] -> output
-fn build_fanout(width: usize) -> (Runtime, Incr<i64>, Incr<i64>) {
-    let rt = Runtime::new();
-    let input = rt.create_input(1_i64);
-    let mut intermediates: Vec<Incr<i64>> = Vec::new();
-    for i in 0..width {
-        let dep = input;
-        let offset = i as i64;
-        intermediates.push(rt.create_query(move |rt| rt.get(dep).wrapping_add(offset)));
-    }
-    // Sum all intermediates
-    let first = intermediates[0];
-    let output = if intermediates.len() == 1 {
-        first
-    } else {
-        let nodes = intermediates.clone();
-        rt.create_query(move |rt| nodes.iter().map(|n| rt.get(*n)).sum::<i64>())
-    };
-    let _ = rt.get(output);
-    (rt, input, output)
-}
-
-fn build_layered(
-    num_inputs: usize,
-    nodes_per_layer: usize,
-    num_layers: usize,
-) -> (Runtime, Vec<Incr<i64>>, Incr<i64>) {
-    let rt = Runtime::new();
-    let mut inputs = Vec::new();
-    let mut all_nodes: Vec<Incr<i64>> = Vec::new();
-
-    for i in 0..num_inputs {
-        let node = rt.create_input(i as i64);
-        inputs.push(node);
-        all_nodes.push(node);
-    }
-
-    for _ in 0..num_layers {
-        let available = all_nodes.len();
-        for j in 0..nodes_per_layer {
-            let a = all_nodes[j % available];
-            let b = all_nodes[(j + 1) % available];
-            let node = rt.create_query(move |rt| rt.get(a).wrapping_add(rt.get(b)));
-            all_nodes.push(node);
-        }
-    }
-
-    let last = *all_nodes.last().unwrap();
-    let _ = rt.get(last);
-    (rt, inputs, last)
-}
-
-fn bench_propagate_single(c: &mut Criterion) {
-    let mut group = c.benchmark_group("propagate_single_change");
-
-    for size in [100, 1_000, 10_000] {
-        group.bench_with_input(BenchmarkId::from_parameter(size), &size, |b, &size| {
-            let (rt, input, output) = build_chain(size);
-            let mut val = 1_i64;
-            b.iter(|| {
-                val += 1;
-                rt.set(input, val);
-                black_box(rt.get(output));
-            });
-        });
-    }
-
-    group.finish();
-}
-
-fn bench_early_cutoff(c: &mut Criterion) {
-    c.bench_function("early_cutoff_chain_1000", |b| {
-        let rt = Runtime::new();
-        let input = rt.create_input(1_i64);
-        let clamped = {
-            let dep = input;
-            rt.create_query(move |rt| rt.get(dep).min(100))
-        };
-        let mut prev: Incr<i64> = clamped;
-        for _ in 0..999 {
-            let dep = prev;
-            prev = rt.create_query(move |rt| rt.get(dep).wrapping_add(1));
-        }
-        let output = prev;
-        let _ = rt.get(output);
-
-        // Set input to >100 so clamp activates
-        rt.set(input, 200);
-        let _ = rt.get(output);
-
-        let mut val = 200_i64;
-        b.iter(|| {
-            val += 1;
-            rt.set(input, val); // Clamped to 100, same as before
-            black_box(rt.get(output));
-        });
-    });
-}
-
-fn bench_overhead_vs_batch(c: &mut Criterion) {
-    let mut group = c.benchmark_group("overhead_vs_batch");
-
-    for size in [100, 1_000, 10_000] {
-        group.bench_with_input(
-            BenchmarkId::new("incremental_initial", size),
-            &size,
-            |b, &size| {
-                b.iter(|| {
-                    let (rt, _, output) = build_chain(size);
-                    black_box(rt.get(output));
-                });
-            },
-        );
-
-        group.bench_with_input(BenchmarkId::new("batch_plain", size), &size, |b, &size| {
-            b.iter(|| {
-                let mut val = 1_i64;
-                for _ in 0..size {
-                    val = val.wrapping_add(1);
-                }
-                black_box(val);
-            });
-        });
-    }
-
-    group.finish();
-}
-
-fn bench_scaling(c: &mut Criterion) {
-    let mut group = c.benchmark_group("scaling_with_graph_size");
-
-    for &(inputs, per_layer, layers) in &[
-        (10, 10, 1),    // ~20 nodes
-        (10, 10, 10),   // ~110 nodes
-        (10, 10, 100),  // ~1010 nodes
-        (50, 50, 20),   // ~1050 nodes
-        (100, 100, 10), // ~1100 nodes
-    ] {
-        let total = inputs + per_layer * layers;
-        group.bench_with_input(
-            BenchmarkId::from_parameter(format!("{}n", total)),
-            &(inputs, per_layer, layers),
-            |b, &(inputs, per_layer, layers)| {
-                let (rt, input_nodes, output) = build_layered(inputs, per_layer, layers);
-                let mut val = 100_i64;
-                b.iter(|| {
-                    val += 1;
-                    rt.set(input_nodes[0], val);
-                    black_box(rt.get(output));
-                });
-            },
-        );
-    }
-
-    group.finish();
-}
-
-fn bench_collection_insert(c: &mut Criterion) {
-    let mut group = c.benchmark_group("collection_insert_throughput");
-
-    for size in [1_000, 10_000, 100_000] {
-        group.bench_with_input(
-            BenchmarkId::from_parameter(format!("{}elem", size)),
-            &size,
-            |b, &size| {
-                let rt = Runtime::new();
-                let col = rt.create_collection::<i64>();
-                let filtered = col.filter(&rt, |x| x % 2 == 0);
-                let mapped = filtered.map(&rt, |x| x * 2);
-                let count = mapped.count(&rt);
-
-                for i in 0..size {
-                    col.insert(&rt, i);
-                }
-                let _ = rt.get(count);
-
-                let mut next = size;
-                b.iter(|| {
-                    col.insert(&rt, next);
-                    next += 1;
-                    black_box(rt.get(count));
-                });
-            },
-        );
-    }
-
-    group.finish();
-}
-
-fn bench_collection_delete(c: &mut Criterion) {
-    let mut group = c.benchmark_group("collection_delete_throughput");
-
-    for size in [1_000, 10_000, 100_000] {
-        group.bench_with_input(
-            BenchmarkId::from_parameter(format!("{}elem", size)),
-            &size,
-            |b, &size| {
-                let rt = Runtime::new();
-                let col = rt.create_collection::<i64>();
-                let filtered = col.filter(&rt, |x| x % 2 == 0);
-                let count = filtered.count(&rt);
-
-                for i in 0..size {
-                    col.insert(&rt, i);
-                }
-                let _ = rt.get(count);
-
-                let mut idx = 0_i64;
-                b.iter(|| {
-                    let val = idx % size;
-                    col.delete(&rt, &val);
-                    black_box(rt.get(count));
-                    col.insert(&rt, val);
-                    let _ = rt.get(count);
-                    idx += 1;
-                });
-            },
-        );
-    }
-
-    group.finish();
-}
-
-fn bench_collection_pipeline_depth(c: &mut Criterion) {
-    c.bench_function("5_stage_pipeline_insert", |b| {
-        let rt = Runtime::new();
-        let col = rt.create_collection::<i64>();
-        let stage1 = col.filter(&rt, |x| *x > 0);
-        let stage2 = stage1.filter(&rt, |x| *x < 1_000_000);
-        let stage3 = stage2.map(&rt, |x| x * 2);
-        let stage4 = stage3.filter(&rt, |x| *x < 500_000);
-        let count = stage4.count(&rt);
-
-        for i in 1..10_001_i64 {
-            col.insert(&rt, i);
-        }
-        let _ = rt.get(count);
-
-        let mut next = 10_001_i64;
-        b.iter(|| {
-            col.insert(&rt, next);
-            next += 1;
-            black_box(rt.get(count));
-        });
-    });
-}
-
-criterion_group!(
-    benches,
-    bench_propagate_single,
-    bench_early_cutoff,
-    bench_overhead_vs_batch,
-    bench_scaling,
-    bench_collection_insert,
-    bench_collection_delete,
-    bench_collection_pipeline_depth,
-);
-criterion_main!(benches);
diff --git a/crates/incr-concurrent/benches/regression.rs b/crates/incr-concurrent/benches/regression.rs
deleted file mode 100644
index 5dab0b7..0000000
--- a/crates/incr-concurrent/benches/regression.rs
+++ /dev/null
@@ -1,85 +0,0 @@
-use criterion::{black_box, criterion_group, criterion_main, Criterion};
-use incr_concurrent::Runtime;
-
-fn hot_read_input(c: &mut Criterion) {
-    let rt = Runtime::new();
-    let input = rt.create_input(42_u64);
-    let _ = rt.get(input);
-
-    c.bench_function("hot_read_input", |b| b.iter(|| black_box(rt.get(input))));
-}
-
-fn hot_read_query(c: &mut Criterion) {
-    let rt = Runtime::new();
-    let input = rt.create_input(42_u64);
-    let query = rt.create_query(move |rt| rt.get(input) * 2);
-    let _ = rt.get(query);
-
-    c.bench_function("hot_read_query", |b| b.iter(|| black_box(rt.get(query))));
-}
-
-fn set_input_no_deps(c: &mut Criterion) {
-    let rt = Runtime::new();
-    let input = rt.create_input(0_u64);
-
-    c.bench_function("set_input_no_deps", |b| {
-        let mut val = 0u64;
-        b.iter(|| {
-            val += 1;
-            rt.set(input, val);
-        })
-    });
-}
-
-fn propagate_chain_100(c: &mut Criterion) {
-    let rt = Runtime::new();
-    let input = rt.create_input(0_u64);
-    let mut prev = input;
-    for _ in 0..100 {
-        let dep = prev;
-        prev = rt.create_query(move |rt| rt.get(dep) + 1);
-    }
-    let tail = prev;
-    let _ = rt.get(tail);
-
-    c.bench_function("propagate_chain_100", |b| {
-        let mut val = 0u64;
-        b.iter(|| {
-            val += 1;
-            rt.set(input, val);
-            black_box(rt.get(tail))
-        })
-    });
-}
-
-fn collection_pipeline(c: &mut Criterion) {
-    let rt = Runtime::new();
-    let col = rt.create_collection::<i64>();
-    let evens = col.filter(&rt, |x| x % 2 == 0);
-    let doubled = evens.map(&rt, |x| x * 2);
-    let sum = doubled.reduce(&rt, |elems| -> i64 { elems.iter().sum() });
-
-    for i in 0..50 {
-        col.insert(&rt, i);
-    }
-    let _ = rt.get(sum);
-
-    c.bench_function("collection_pipeline", |b| {
-        let mut next = 50i64;
-        b.iter(|| {
-            col.insert(&rt, next);
-            next += 1;
-            black_box(rt.get(sum))
-        })
-    });
-}
-
-criterion_group!(
-    benches,
-    hot_read_input,
-    hot_read_query,
-    set_input_no_deps,
-    propagate_chain_100,
-    collection_pipeline
-);
-criterion_main!(benches);
diff --git a/crates/incr-concurrent/src/arena.rs b/crates/incr-concurrent/src/arena.rs
deleted file mode 100644
index c33becb..0000000
--- a/crates/incr-concurrent/src/arena.rs
+++ /dev/null
@@ -1,1112 +0,0 @@
-//! Typed value arenas.
-//!
-//! Per section 5.2 of the concurrent core rewrite spec, node values live in
-//! per-type arenas indexed by a type tag stored in `NodeData`. This module
-//! defines the type-erased arena trait stored in the runtime's arena
-//! registry, and the concrete arena implementations the runtime uses to
-//! store values without paying the `Box<dyn Any>` tax.
-//!
-//! ## Arena implementations
-//!
-//! - [`AtomicPrimitiveArena`] holds `Copy` primitive values (`u32`, `i32`,
-//!   `u64`, `i64`, `f32`, `f64`, `bool`) inline as atomic cells. Reads and
-//!   writes are tear-free by construction, so the arena does not coordinate
-//!   with any state machine for the value slot itself. The node state
-//!   machine is still the authority on whether the value is *meaningful*
-//!   (Clean vs Dirty), but the raw bytes can be loaded any time without
-//!   undefined behavior.
-//! - [`GenericArena`] holds everything else (`Clone + Send + Sync` types)
-//!   in `UnsafeCell<Option<T>>` slots. Access is coordinated by the node
-//!   state machine: readers only touch a slot when they have observed
-//!   `Clean` via an Acquire load on the node's state, and writers only
-//!   touch a slot when they own `Computing` state. The arena does not
-//!   enforce this at the type level; the runtime is responsible for
-//!   the invariant.
-//!
-//! ## Why `Option<T>` instead of the spec's `MaybeUninit<T>`
-//!
-//! Section 5.2 of the spec presents `UnsafeCell<MaybeUninit<T>>` as a
-//! sketch. `Option<T>` is a cleaner equivalent that preserves the spec's
-//! intent while avoiding two costs that `MaybeUninit` would impose:
-//!
-//! 1. **Drop correctness comes for free.** `Option::drop` destructs the
-//!    contained `T` if `Some` and does nothing if `None`. With
-//!    `MaybeUninit` the arena would have to track per-slot initialization
-//!    (via an extra `AtomicBool` or a bitmap) just to know what to drop
-//!    when the arena itself is dropped, because the node state machine
-//!    is not available at `Drop` time.
-//! 2. **Query nodes can defer initialization.** A query slot is allocated
-//!    before the first compute runs and starts as `None`. With
-//!    `MaybeUninit` the caller would have to provide a placeholder `T`
-//!    at reserve time, which would force `T: Default` on the user-facing
-//!    query API.
-//!
-//! The cost is one discriminant byte per slot (often niche-optimized
-//! away for `Box`, `Vec`, `String`, `&T`, etc.) and one predictable
-//! branch on the read path. Negligible.
-//!
-//! ## Segmented storage and lock-free growth
-//!
-//! Both arena kinds use a segmented storage layout to allow lock-free reads
-//! while still supporting dynamic growth. A fixed-size top-level array of
-//! `AtomicPtr<Segment>` indexes segments, and segments are allocated on
-//! demand and never deallocated for the arena's lifetime. Readers access
-//! a slot via two atomic loads: Acquire-load the segment pointer, then
-//! Relaxed-access the slot within the segment. Growth never moves existing
-//! slots, so concurrent readers never observe dangling references.
-//!
-//! Per-arena capacity is [`MAX_SLOTS`] = `MAX_SEGMENTS * SEGMENT_SIZE` (1M
-//! slots at the current sizing). Exhaustion panics loudly at the reserve
-//! call so we find out early. The numbers can be tuned after real workloads.
-//!
-//! ## Who calls what
-//!
-//! Arenas do not own the concurrency policy. The runtime is the authority
-//! on when a slot may be written and when a value is safe to read. Concretely:
-//!
-//! - `reserve` may be called concurrently by multiple threads; the
-//!   implementation handles the segment-allocation race with a CAS. In
-//!   practice the runtime serializes node creation through its write
-//!   mutex, but the arena does not rely on that.
-//! - `write` for a query node's value is called by the thread that owns
-//!   the node's `Computing` state (guaranteed by the state machine CAS).
-//!   It uses `Relaxed` ordering because the accompanying state transition
-//!   to `Clean` is the `Release` publish point.
-//! - `write` for an input node's value is called under the runtime's write
-//!   mutex. The runtime is responsible for issuing a `Release` store on
-//!   the input node's state after calling `arena.write`, so that subsequent
-//!   readers that Acquire-load the input's state observe the updated slot.
-//!   See spec section 6.4; this publish step is implied by the memory
-//!   ordering contract even though the spec's pseudocode does not spell
-//!   it out for the input node itself (only for its dependents).
-//! - `read` uses `Relaxed` ordering. The caller must have established
-//!   happens-before with the writer via an Acquire load on the node's state
-//!   (or an equivalent synchronization point) before calling `read`.
-
-use std::any::TypeId;
-use std::sync::atomic::{
-    AtomicBool, AtomicI32, AtomicI64, AtomicPtr, AtomicU32, AtomicU64, Ordering,
-};
-
-/// Top-level segment count. Combined with [`SEGMENT_SIZE`] this fixes the
-/// maximum number of slots per arena instance.
-const MAX_SEGMENTS: usize = 1024;
-
-/// Slots per segment. Power of two so that slot-to-segment math is a shift
-/// and a mask.
-const SEGMENT_SIZE: usize = 1024;
-const SEGMENT_SHIFT: u32 = 10;
-const SEGMENT_MASK: u32 = (SEGMENT_SIZE as u32) - 1;
-
-/// Maximum number of slots that a single arena can hold. Reserving beyond
-/// this panics. At the current sizing this is one million slots per value
-/// type, which covers realistic workloads by a wide margin.
-pub(crate) const MAX_SLOTS: u32 = (MAX_SEGMENTS * SEGMENT_SIZE) as u32;
-
-const _: () = assert!(SEGMENT_SIZE.is_power_of_two());
-const _: () = assert!(1 << SEGMENT_SHIFT == SEGMENT_SIZE);
-
-/// Type-erased arena trait stored in the runtime's arena registry.
-///
-/// The runtime holds `Box<dyn ErasedArena>` keyed by `TypeId`, and downcasts
-/// to the concrete arena type at each `get::<T>` call site, which carries
-/// the type parameter statically. The trait surface is intentionally
-/// minimal: type identification only. Concrete operations (reserve, read,
-/// write) live on the concrete arena types and are reached via downcast.
-pub trait ErasedArena: Send + Sync {
-    /// Returns the `TypeId` of the value type this arena holds.
-    fn erased_type_id(&self) -> TypeId;
-
-    /// Upcast helper so the registry can downcast through `Any`-like
-    /// machinery without pulling in `std::any::Any` directly on every
-    /// concrete arena.
-    fn as_any(&self) -> &dyn std::any::Any;
-}
-
-/// A `Copy` primitive type that can be stored tear-free in an atomic cell.
-///
-/// Implemented for the fixed set of primitive types below. The trait is
-/// crate-private so adding a variant is a deliberate act requiring choice
-/// of backing atomic and a tear-free read justification.
-///
-/// Floats are stored in their bit-pattern-equivalent integer atomic
-/// (`AtomicU32` for `f32`, `AtomicU64` for `f64`) via `to_bits` / `from_bits`.
-/// This is sound because `f32::to_bits` and `f64::to_bits` are pure
-/// reinterpret-casts and `from_bits` accepts every bit pattern (including
-/// NaN payloads).
-pub(crate) trait AtomicPrimitive:
-    Copy + PartialEq + std::fmt::Debug + Send + Sync + 'static
-{
-    /// The atomic cell type used to store a value of this primitive.
-    type Atomic: Send + Sync;
-
-    /// A well-defined zero value used to initialize fresh segment slots
-    /// before the first real reservation touches them.
-    fn zero() -> Self;
-
-    /// Construct a new atomic cell holding `value`.
-    fn new_atomic(value: Self) -> Self::Atomic;
-
-    /// Relaxed load of the current value. Tear-free by construction.
-    fn load(atomic: &Self::Atomic) -> Self;
-
-    /// Relaxed store. Caller is responsible for the surrounding Release
-    /// publish (on state or on another field) that makes the new value
-    /// visible to readers who need a happens-before guarantee.
-    fn store(atomic: &Self::Atomic, value: Self);
-}
-
-macro_rules! impl_atomic_primitive_int {
-    ($t:ty, $atomic:ty, $zero:expr) => {
-        impl AtomicPrimitive for $t {
-            type Atomic = $atomic;
-            #[inline]
-            fn zero() -> Self {
-                $zero
-            }
-            #[inline]
-            fn new_atomic(value: Self) -> Self::Atomic {
-                <$atomic>::new(value)
-            }
-            #[inline]
-            fn load(atomic: &Self::Atomic) -> Self {
-                atomic.load(Ordering::Relaxed)
-            }
-            #[inline]
-            fn store(atomic: &Self::Atomic, value: Self) {
-                atomic.store(value, Ordering::Relaxed);
-            }
-        }
-    };
-}
-
-impl_atomic_primitive_int!(u32, AtomicU32, 0);
-impl_atomic_primitive_int!(i32, AtomicI32, 0);
-impl_atomic_primitive_int!(u64, AtomicU64, 0);
-impl_atomic_primitive_int!(i64, AtomicI64, 0);
-
-impl AtomicPrimitive for bool {
-    type Atomic = AtomicBool;
-    #[inline]
-    fn zero() -> Self {
-        false
-    }
-    #[inline]
-    fn new_atomic(value: Self) -> Self::Atomic {
-        AtomicBool::new(value)
-    }
-    #[inline]
-    fn load(atomic: &Self::Atomic) -> Self {
-        atomic.load(Ordering::Relaxed)
-    }
-    #[inline]
-    fn store(atomic: &Self::Atomic, value: Self) {
-        atomic.store(value, Ordering::Relaxed);
-    }
-}
-
-impl AtomicPrimitive for f32 {
-    type Atomic = AtomicU32;
-    #[inline]
-    fn zero() -> Self {
-        0.0
-    }
-    #[inline]
-    fn new_atomic(value: Self) -> Self::Atomic {
-        AtomicU32::new(value.to_bits())
-    }
-    #[inline]
-    fn load(atomic: &Self::Atomic) -> Self {
-        f32::from_bits(atomic.load(Ordering::Relaxed))
-    }
-    #[inline]
-    fn store(atomic: &Self::Atomic, value: Self) {
-        atomic.store(value.to_bits(), Ordering::Relaxed);
-    }
-}
-
-impl AtomicPrimitive for f64 {
-    type Atomic = AtomicU64;
-    #[inline]
-    fn zero() -> Self {
-        0.0
-    }
-    #[inline]
-    fn new_atomic(value: Self) -> Self::Atomic {
-        AtomicU64::new(value.to_bits())
-    }
-    #[inline]
-    fn load(atomic: &Self::Atomic) -> Self {
-        f64::from_bits(atomic.load(Ordering::Relaxed))
-    }
-    #[inline]
-    fn store(atomic: &Self::Atomic, value: Self) {
-        atomic.store(value.to_bits(), Ordering::Relaxed);
-    }
-}
-
-/// One contiguous block of atomic slots. Segments are heap-allocated once
-/// and never moved or freed until the arena is dropped.
-struct Segment<A> {
-    slots: Box<[A]>,
-}
-
-/// Arena for `Copy` primitive values stored inline as atomic cells.
-///
-/// See the module-level docs for the overall design. Tear-free reads are
-/// guaranteed by the atomic cell choice; staleness is the state machine's
-/// concern and is not enforced here.
-pub(crate) struct AtomicPrimitiveArena<T: AtomicPrimitive> {
-    /// Top-level segment directory. All entries start null; segments are
-    /// allocated on demand by `reserve`. Size is fixed at construction
-    /// and never reallocated, so readers may index safely without locking.
-    segments: Box<[AtomicPtr<Segment<T::Atomic>>]>,
-
-    /// Number of logically-reserved slots. Monotonically increasing via
-    /// `fetch_add`, used to hand out slot indices and to bound-check
-    /// against [`MAX_SLOTS`].
-    len: AtomicU32,
-}
-
-impl<T: AtomicPrimitive> AtomicPrimitiveArena<T> {
-    /// Construct an empty arena. Segments are not allocated until the
-    /// first reservation lands in each one.
-    pub(crate) fn new() -> Self {
-        let segments = (0..MAX_SEGMENTS)
-            .map(|_| AtomicPtr::new(std::ptr::null_mut()))
-            .collect::<Vec<_>>()
-            .into_boxed_slice();
-        Self {
-            segments,
-            len: AtomicU32::new(0),
-        }
-    }
-
-    /// Reserve a new slot holding `initial`, returning its index.
-    ///
-    /// Atomically increments `len` to claim an index, allocates the
-    /// containing segment if it is not yet present (racing safely with
-    /// any other concurrent reservation into the same segment), and
-    /// stores `initial` into the claimed slot with `Relaxed` ordering.
-    ///
-    /// Panics if the arena has already exhausted [`MAX_SLOTS`].
-    pub(crate) fn reserve(&self, initial: T) -> u32 {
-        let idx = self.len.fetch_add(1, Ordering::Relaxed);
-        if idx >= MAX_SLOTS {
-            // Undo the over-increment so repeated panics don't leave
-            // `len` unboundedly large in case the caller catches the panic.
-            self.len.fetch_sub(1, Ordering::Relaxed);
-            panic!(
-                "AtomicPrimitiveArena<{}> exhausted at {} slots",
-                std::any::type_name::<T>(),
-                MAX_SLOTS
-            );
-        }
-        let seg_idx = (idx >> SEGMENT_SHIFT) as usize;
-        let within = (idx & SEGMENT_MASK) as usize;
-        let segment = self.get_or_allocate_segment(seg_idx);
-        // SAFETY: `segment` is a non-null pointer to a Segment owned by
-        // this arena. Segments are never freed until `Drop`, and the
-        // slot index `within` is in `0..SEGMENT_SIZE` by construction.
-        unsafe {
-            T::store(&(*segment).slots[within], initial);
-        }
-        idx
-    }
-
-    /// Read the value at `slot` with `Relaxed` ordering.
-    ///
-    /// Tear-free. The caller is responsible for establishing happens-before
-    /// with the writer via an Acquire load on the owning node's state (or
-    /// equivalent). Reading an unreserved slot is a logic error; in debug
-    /// builds this is caught by the `debug_assert`, in release builds it
-    /// returns the slot's zero-initialized value (no undefined behavior).
-    #[inline]
-    pub(crate) fn read(&self, slot: u32) -> T {
-        debug_assert!(
-            slot < self.len.load(Ordering::Relaxed),
-            "read of unreserved slot {} (len {})",
-            slot,
-            self.len.load(Ordering::Relaxed)
-        );
-        let seg_idx = (slot >> SEGMENT_SHIFT) as usize;
-        let within = (slot & SEGMENT_MASK) as usize;
-        let seg_ptr = self.segments[seg_idx].load(Ordering::Acquire);
-        debug_assert!(
-            !seg_ptr.is_null(),
-            "read of slot {} in unallocated segment {}",
-            slot,
-            seg_idx
-        );
-        // SAFETY: `seg_ptr` was published via `Release` by `reserve`, is
-        // non-null for any reserved slot, points to a Segment owned by
-        // this arena that lives until Drop, and `within` is in range.
-        unsafe { T::load(&(*seg_ptr).slots[within]) }
-    }
-
-    /// Write `value` to `slot` with `Relaxed` ordering.
-    ///
-    /// Used during compute completion (under Computing-state ownership)
-    /// or for input updates (under the runtime's write mutex, with a
-    /// Release publish on the node's state afterwards). The arena does
-    /// not enforce exclusivity; it is the caller's responsibility to
-    /// avoid conflicting concurrent writes to the same slot.
-    #[inline]
-    pub(crate) fn write(&self, slot: u32, value: T) {
-        debug_assert!(
-            slot < self.len.load(Ordering::Relaxed),
-            "write to unreserved slot {} (len {})",
-            slot,
-            self.len.load(Ordering::Relaxed)
-        );
-        let seg_idx = (slot >> SEGMENT_SHIFT) as usize;
-        let within = (slot & SEGMENT_MASK) as usize;
-        let seg_ptr = self.segments[seg_idx].load(Ordering::Acquire);
-        debug_assert!(
-            !seg_ptr.is_null(),
-            "write to slot {} in unallocated segment {}",
-            slot,
-            seg_idx
-        );
-        // SAFETY: same as `read`.
-        unsafe {
-            T::store(&(*seg_ptr).slots[within], value);
-        }
-    }
-
-    /// Current number of reserved slots.
-    #[cfg(test)]
-    pub(crate) fn len(&self) -> u32 {
-        self.len.load(Ordering::Relaxed)
-    }
-
-    /// Return a pointer to the segment at `seg_idx`, allocating it if
-    /// necessary. Safe to call concurrently: at most one allocation wins
-    /// the compare-exchange, and losers drop their speculative allocation
-    /// and use the winner's pointer.
-    fn get_or_allocate_segment(&self, seg_idx: usize) -> *const Segment<T::Atomic> {
-        let existing = self.segments[seg_idx].load(Ordering::Acquire);
-        if !existing.is_null() {
-            return existing;
-        }
-        let slots: Vec<T::Atomic> = (0..SEGMENT_SIZE)
-            .map(|_| T::new_atomic(T::zero()))
-            .collect();
-        let segment = Box::new(Segment {
-            slots: slots.into_boxed_slice(),
-        });
-        let ptr = Box::into_raw(segment);
-        match self.segments[seg_idx].compare_exchange(
-            std::ptr::null_mut(),
-            ptr,
-            Ordering::Release,
-            Ordering::Acquire,
-        ) {
-            Ok(_) => ptr,
-            Err(winner) => {
-                // Another thread already published a segment here.
-                // Drop our speculative allocation and use theirs.
-                // SAFETY: `ptr` came from `Box::into_raw` in this call
-                // and was never published anywhere else; we are the sole
-                // owner and can reclaim it.
-                unsafe {
-                    drop(Box::from_raw(ptr));
-                }
-                winner
-            }
-        }
-    }
-}
-
-impl<T: AtomicPrimitive> Drop for AtomicPrimitiveArena<T> {
-    fn drop(&mut self) {
-        // Reclaim every segment that was allocated via `Box::into_raw`.
-        for entry in self.segments.iter() {
-            let ptr = entry.load(Ordering::Acquire);
-            if !ptr.is_null() {
-                // SAFETY: the pointer came from `Box::into_raw` in
-                // `get_or_allocate_segment` and is uniquely owned by this
-                // arena. `&mut self` guarantees no concurrent access.
-                unsafe {
-                    drop(Box::from_raw(ptr));
-                }
-            }
-        }
-    }
-}
-
-impl<T: AtomicPrimitive> ErasedArena for AtomicPrimitiveArena<T> {
-    fn erased_type_id(&self) -> TypeId {
-        TypeId::of::<T>()
-    }
-
-    fn as_any(&self) -> &dyn std::any::Any {
-        self
-    }
-}
-
-/// One slot in a `GenericArena`. The inner `Option<T>` starts `None` at
-/// segment allocation and becomes `Some(value)` once the arena's user
-/// writes a value into it.
-///
-/// Commit U changed the slot from `UnsafeCell<Option<T>>` to
-/// `Mutex<Option<T>>` as part of removing the nodes `RwLock`. The
-/// nodes store is now lock-free (SegmentedNodes), so the RwLock that
-/// previously gated reader/writer exclusion on generic arena slots
-/// no longer exists. Per-slot mutex is the finest-grained replacement
-/// that preserves correctness: readers take the mutex briefly to
-/// clone the value, writers take the mutex to replace it. Uncontended
-/// std::sync::Mutex is ~5 ns on Linux — more expensive than the old
-/// UnsafeCell path, but correct under concurrent rt.get and rt.set
-/// on non-primitive inputs.
-///
-/// Primitive types do not pay this cost because they use
-/// `AtomicPrimitiveArena` via the Value trait dispatch from commit T.
-/// Only non-primitive types route through `GenericArena` and incur
-/// the per-slot mutex overhead.
-struct GenericSlot<T> {
-    cell: std::sync::Mutex<Option<T>>,
-}
-
-impl<T> GenericSlot<T> {
-    fn none() -> Self {
-        Self {
-            cell: std::sync::Mutex::new(None),
-        }
-    }
-}
-
-// `Mutex<Option<T>>` is `Sync` when `Option<T>: Send`, which is when
-// `T: Send`. So `GenericSlot<T: Send>` is automatically `Sync` with
-// no unsafe impl needed. The old unsafe Sync/Send impls that
-// justified the `UnsafeCell` approach under state-machine
-// coordination are no longer needed.
-
-/// Arena for `Clone + Send + Sync` values whose access is coordinated by
-/// the node state machine.
-///
-/// Layout and growth mirror [`AtomicPrimitiveArena`]: a fixed-size
-/// top-level directory of `AtomicPtr<Segment<GenericSlot<T>>>` with lazy
-/// segment allocation via CAS. Only the slot body differs: generic values
-/// live in `UnsafeCell<Option<T>>` cells, and access safety depends on
-/// external coordination rather than atomicity.
-pub struct GenericArena<T: Clone + Send + Sync + 'static> {
-    segments: Box<[AtomicPtr<Segment<GenericSlot<T>>>]>,
-    len: AtomicU32,
-}
-
-impl<T: Clone + Send + Sync + 'static> Default for GenericArena<T> {
-    fn default() -> Self {
-        Self::new()
-    }
-}
-
-impl<T: Clone + Send + Sync + 'static> GenericArena<T> {
-    /// Construct an empty arena.
-    pub fn new() -> Self {
-        let segments = (0..MAX_SEGMENTS)
-            .map(|_| AtomicPtr::new(std::ptr::null_mut()))
-            .collect::<Vec<_>>()
-            .into_boxed_slice();
-        Self {
-            segments,
-            len: AtomicU32::new(0),
-        }
-    }
-
-    /// Reserve a new slot, leaving it `None` (uninitialized).
-    ///
-    /// The caller is expected to follow up with [`GenericArena::write`]
-    /// once they have a value to store. For query nodes the first write
-    /// happens at the end of the first compute; for input nodes it
-    /// happens immediately after reservation (and both happen under the
-    /// runtime's write mutex, so there is no window where another thread
-    /// could observe the handle before the slot is populated).
-    ///
-    /// Panics on exhaustion, same contract as `AtomicPrimitiveArena`.
-    pub fn reserve(&self) -> u32 {
-        let idx = self.len.fetch_add(1, Ordering::Relaxed);
-        if idx >= MAX_SLOTS {
-            self.len.fetch_sub(1, Ordering::Relaxed);
-            panic!(
-                "GenericArena<{}> exhausted at {} slots",
-                std::any::type_name::<T>(),
-                MAX_SLOTS
-            );
-        }
-        let seg_idx = (idx >> SEGMENT_SHIFT) as usize;
-        // Ensure segment exists. We don't need to touch the slot here;
-        // segments are allocated with all slots `None`.
-        let _ = self.get_or_allocate_segment(seg_idx);
-        idx
-    }
-
-    /// Convenience: reserve and immediately initialize to `Some(initial)`.
-    ///
-    /// Used by the runtime's `create_input` path where the initial value
-    /// is known at node creation time. Equivalent to `reserve()` followed
-    /// by `write(slot, initial)`, but saves one segment lookup.
-    pub fn reserve_with(&self, initial: T) -> u32 {
-        let idx = self.reserve();
-        self.write(idx, initial);
-        idx
-    }
-
-    /// Clone the value at `slot`.
-    ///
-    /// Takes the per-slot `Mutex<Option<T>>` briefly, clones the
-    /// contained value, and releases. Commit U changed this from
-    /// unsafe UnsafeCell access to mutex-guarded access; see the
-    /// `GenericSlot` doc comment for the rationale.
-    pub fn read(&self, slot: u32) -> T {
-        debug_assert!(
-            slot < self.len.load(Ordering::Relaxed),
-            "read of unreserved slot {} (len {})",
-            slot,
-            self.len.load(Ordering::Relaxed)
-        );
-        let seg_idx = (slot >> SEGMENT_SHIFT) as usize;
-        let within = (slot & SEGMENT_MASK) as usize;
-        let seg_ptr = self.segments[seg_idx].load(Ordering::Acquire);
-        debug_assert!(
-            !seg_ptr.is_null(),
-            "read of slot {} in unallocated segment {}",
-            slot,
-            seg_idx
-        );
-        // SAFETY: `seg_ptr` was published via `Release` by a prior
-        // `reserve`; segments are never moved or freed until Drop;
-        // `within` is in range. The slot's `Mutex<Option<T>>` is the
-        // synchronization primitive; `seg_ptr` dereference is the
-        // only unsafe, and it's sound per the segment invariants.
-        let slot_ref: &GenericSlot<T> = unsafe { &(*seg_ptr).slots[within] };
-        let guard = slot_ref
-            .cell
-            .lock()
-            .expect("GenericArena slot mutex poisoned");
-        guard
-            .as_ref()
-            .expect("GenericArena::read on uninitialized slot; caller must check state first")
-            .clone()
-    }
-
-    /// Clone the value at `slot` if it exists, returning `None` if
-    /// the slot has never been written. Used by the Runtime's
-    /// early-cutoff path for Failed-retry recomputes.
-    pub fn try_read(&self, slot: u32) -> Option<T> {
-        if slot >= self.len.load(Ordering::Relaxed) {
-            return None;
-        }
-        let seg_idx = (slot >> SEGMENT_SHIFT) as usize;
-        let within = (slot & SEGMENT_MASK) as usize;
-        let seg_ptr = self.segments[seg_idx].load(Ordering::Acquire);
-        if seg_ptr.is_null() {
-            return None;
-        }
-        let slot_ref: &GenericSlot<T> = unsafe { &(*seg_ptr).slots[within] };
-        let guard = slot_ref
-            .cell
-            .lock()
-            .expect("GenericArena slot mutex poisoned");
-        guard.as_ref().cloned()
-    }
-
-    /// Overwrite the value at `slot` with `Some(value)`.
-    ///
-    /// Takes the per-slot mutex, replaces the contained Option with
-    /// `Some(value)` (dropping any previous value in place), and
-    /// releases. Concurrent readers block briefly on the mutex.
-    pub fn write(&self, slot: u32, value: T) {
-        debug_assert!(
-            slot < self.len.load(Ordering::Relaxed),
-            "write to unreserved slot {} (len {})",
-            slot,
-            self.len.load(Ordering::Relaxed)
-        );
-        let seg_idx = (slot >> SEGMENT_SHIFT) as usize;
-        let within = (slot & SEGMENT_MASK) as usize;
-        let seg_ptr = self.segments[seg_idx].load(Ordering::Acquire);
-        debug_assert!(
-            !seg_ptr.is_null(),
-            "write to slot {} in unallocated segment {}",
-            slot,
-            seg_idx
-        );
-        let slot_ref: &GenericSlot<T> = unsafe { &(*seg_ptr).slots[within] };
-        let mut guard = slot_ref
-            .cell
-            .lock()
-            .expect("GenericArena slot mutex poisoned");
-        *guard = Some(value);
-    }
-
-    /// Current number of reserved slots.
-    #[cfg(test)]
-    pub(crate) fn len(&self) -> u32 {
-        self.len.load(Ordering::Relaxed)
-    }
-
-    fn get_or_allocate_segment(&self, seg_idx: usize) -> *const Segment<GenericSlot<T>> {
-        let existing = self.segments[seg_idx].load(Ordering::Acquire);
-        if !existing.is_null() {
-            return existing;
-        }
-        let slots: Vec<GenericSlot<T>> = (0..SEGMENT_SIZE).map(|_| GenericSlot::none()).collect();
-        let segment = Box::new(Segment {
-            slots: slots.into_boxed_slice(),
-        });
-        let ptr = Box::into_raw(segment);
-        match self.segments[seg_idx].compare_exchange(
-            std::ptr::null_mut(),
-            ptr,
-            Ordering::Release,
-            Ordering::Acquire,
-        ) {
-            Ok(_) => ptr,
-            Err(winner) => {
-                // SAFETY: `ptr` came from `Box::into_raw` above and was
-                // never published anywhere else; we own it.
-                unsafe {
-                    drop(Box::from_raw(ptr));
-                }
-                winner
-            }
-        }
-    }
-}
-
-impl<T: Clone + Send + Sync + 'static> Drop for GenericArena<T> {
-    fn drop(&mut self) {
-        // Reclaim every allocated segment. Each segment's `Drop` in
-        // turn drops its `Box<[GenericSlot<T>]>`, which drops each
-        // `GenericSlot<T>`, which drops the inner `UnsafeCell<Option<T>>`,
-        // which drops the `Option<T>` (calling `T::drop` if `Some`).
-        // This is why we do not need a separate "which slots are
-        // initialized" tracker: the `Option` discriminant is the tracker.
-        for entry in self.segments.iter() {
-            let ptr = entry.load(Ordering::Acquire);
-            if !ptr.is_null() {
-                // SAFETY: pointer came from `Box::into_raw` in
-                // `get_or_allocate_segment`; uniquely owned by this
-                // arena; `&mut self` guarantees no concurrent access.
-                unsafe {
-                    drop(Box::from_raw(ptr));
-                }
-            }
-        }
-    }
-}
-
-impl<T: Clone + Send + Sync + 'static> ErasedArena for GenericArena<T> {
-    fn erased_type_id(&self) -> TypeId {
-        TypeId::of::<T>()
-    }
-
-    fn as_any(&self) -> &dyn std::any::Any {
-        self
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use std::sync::Arc;
-    use std::thread;
-
-    #[test]
-    fn reserve_then_read_u64() {
-        let arena: AtomicPrimitiveArena<u64> = AtomicPrimitiveArena::new();
-        let slot = arena.reserve(42);
-        assert_eq!(slot, 0);
-        assert_eq!(arena.read(slot), 42);
-        assert_eq!(arena.len(), 1);
-    }
-
-    #[test]
-    fn reserve_hands_out_sequential_indices() {
-        let arena: AtomicPrimitiveArena<u64> = AtomicPrimitiveArena::new();
-        for i in 0..100 {
-            let slot = arena.reserve(i);
-            assert_eq!(slot, i as u32);
-            assert_eq!(arena.read(slot), i);
-        }
-        assert_eq!(arena.len(), 100);
-    }
-
-    #[test]
-    fn write_overwrites_existing_value() {
-        let arena: AtomicPrimitiveArena<u64> = AtomicPrimitiveArena::new();
-        let slot = arena.reserve(10);
-        arena.write(slot, 20);
-        assert_eq!(arena.read(slot), 20);
-        arena.write(slot, 30);
-        assert_eq!(arena.read(slot), 30);
-    }
-
-    #[test]
-    fn reservations_span_segment_boundary() {
-        // Force crossing from segment 0 into segment 1 and back into
-        // segment 1 several times. Verifies both segment-allocation and
-        // slot addressing across the boundary.
-        let arena: AtomicPrimitiveArena<u64> = AtomicPrimitiveArena::new();
-        let count = (SEGMENT_SIZE as u64) + 50;
-        let mut slots = Vec::with_capacity(count as usize);
-        for i in 0..count {
-            slots.push(arena.reserve(i * 7 + 1));
-        }
-        for (i, slot) in slots.into_iter().enumerate() {
-            assert_eq!(arena.read(slot), (i as u64) * 7 + 1);
-        }
-        assert_eq!(arena.len(), count as u32);
-    }
-
-    #[test]
-    fn bool_arena_read_write() {
-        let arena: AtomicPrimitiveArena<bool> = AtomicPrimitiveArena::new();
-        let a = arena.reserve(true);
-        let b = arena.reserve(false);
-        assert!(arena.read(a));
-        assert!(!arena.read(b));
-        arena.write(a, false);
-        arena.write(b, true);
-        assert!(!arena.read(a));
-        assert!(arena.read(b));
-    }
-
-    #[test]
-    fn f64_arena_round_trips_bits() {
-        let arena: AtomicPrimitiveArena<f64> = AtomicPrimitiveArena::new();
-        let values = [
-            0.0,
-            -0.0,
-            1.5,
-            -1.5,
-            f64::INFINITY,
-            f64::NEG_INFINITY,
-            f64::MIN,
-            f64::MAX,
-            std::f64::consts::PI,
-        ];
-        let slots: Vec<u32> = values.iter().map(|&v| arena.reserve(v)).collect();
-        for (slot, expected) in slots.iter().zip(values.iter()) {
-            assert_eq!(arena.read(*slot).to_bits(), expected.to_bits());
-        }
-        // NaN payloads round trip too.
-        let nan = f64::from_bits(0x7ff8_dead_beef_cafe);
-        let slot = arena.reserve(nan);
-        assert_eq!(arena.read(slot).to_bits(), 0x7ff8_dead_beef_cafe);
-    }
-
-    #[test]
-    fn f32_arena_round_trips_bits() {
-        let arena: AtomicPrimitiveArena<f32> = AtomicPrimitiveArena::new();
-        let slot = arena.reserve(std::f32::consts::E);
-        assert_eq!(arena.read(slot).to_bits(), std::f32::consts::E.to_bits());
-    }
-
-    #[test]
-    fn erased_type_id_matches() {
-        let u64_arena: AtomicPrimitiveArena<u64> = AtomicPrimitiveArena::new();
-        let bool_arena: AtomicPrimitiveArena<bool> = AtomicPrimitiveArena::new();
-        assert_eq!(u64_arena.erased_type_id(), TypeId::of::<u64>());
-        assert_eq!(bool_arena.erased_type_id(), TypeId::of::<bool>());
-        assert_ne!(u64_arena.erased_type_id(), bool_arena.erased_type_id());
-    }
-
-    #[test]
-    fn erased_as_any_downcasts_to_concrete_type() {
-        let arena: Box<dyn ErasedArena> = Box::new(AtomicPrimitiveArena::<u64>::new());
-        let concrete = arena
-            .as_any()
-            .downcast_ref::<AtomicPrimitiveArena<u64>>()
-            .expect("downcast to concrete type");
-        let slot = concrete.reserve(99);
-        assert_eq!(concrete.read(slot), 99);
-    }
-
-    #[test]
-    fn concurrent_reservers_never_produce_duplicate_indices() {
-        // Many threads race to reserve. Each thread records its returned
-        // indices. At the end, every index in [0..total) must appear
-        // exactly once across all threads. This exercises the segment
-        // allocation CAS.
-        const THREADS: usize = 16;
-        const PER_THREAD: usize = 2000;
-        let arena: Arc<AtomicPrimitiveArena<u64>> = Arc::new(AtomicPrimitiveArena::new());
-        let handles: Vec<_> = (0..THREADS)
-            .map(|tid| {
-                let arena = arena.clone();
-                thread::spawn(move || {
-                    let mut mine = Vec::with_capacity(PER_THREAD);
-                    for i in 0..PER_THREAD {
-                        // Value encodes (tid, i) so we can verify later.
-                        let v = (tid as u64) * 1_000_000 + i as u64;
-                        let slot = arena.reserve(v);
-                        mine.push((slot, v));
-                    }
-                    mine
-                })
-            })
-            .collect();
-
-        let mut all: Vec<(u32, u64)> = handles
-            .into_iter()
-            .flat_map(|h| h.join().unwrap())
-            .collect();
-        all.sort_by_key(|(slot, _)| *slot);
-
-        assert_eq!(all.len(), THREADS * PER_THREAD);
-        for (i, (slot, v)) in all.iter().enumerate() {
-            assert_eq!(*slot as usize, i, "slot indices must be a dense range");
-            assert_eq!(
-                arena.read(*slot),
-                *v,
-                "every slot must hold the value its reserver wrote"
-            );
-        }
-    }
-
-    #[test]
-    fn concurrent_reader_sees_tear_free_values() {
-        // One writer rewrites a single slot with alternating bit patterns.
-        // Many readers spin-read the slot and assert they always observe
-        // one of the two known patterns (never a torn combination).
-        // This is a smoke test for the u64 atomic-store path; atomicity
-        // is guaranteed by `AtomicU64` on all supported targets.
-        const READERS: usize = 8;
-        const ITERS: usize = 50_000;
-        const PATTERN_A: u64 = 0x0000_0000_DEAD_BEEF;
-        const PATTERN_B: u64 = 0xCAFE_BABE_0000_0000;
-
-        let arena: Arc<AtomicPrimitiveArena<u64>> = Arc::new(AtomicPrimitiveArena::new());
-        let slot = arena.reserve(PATTERN_A);
-        let stop = Arc::new(std::sync::atomic::AtomicBool::new(false));
-
-        let reader_handles: Vec<_> = (0..READERS)
-            .map(|_| {
-                let arena = arena.clone();
-                let stop = stop.clone();
-                thread::spawn(move || {
-                    while !stop.load(Ordering::Relaxed) {
-                        let v = arena.read(slot);
-                        assert!(
-                            v == PATTERN_A || v == PATTERN_B,
-                            "observed torn value 0x{:016x}",
-                            v
-                        );
-                    }
-                })
-            })
-            .collect();
-
-        // Writer toggles the value ITERS times.
-        for i in 0..ITERS {
-            let v = if i & 1 == 0 { PATTERN_A } else { PATTERN_B };
-            arena.write(slot, v);
-        }
-        stop.store(true, Ordering::Relaxed);
-        for h in reader_handles {
-            h.join().unwrap();
-        }
-    }
-
-    #[test]
-    fn segments_are_reclaimed_on_drop() {
-        // Exercise Drop by allocating across several segments and letting
-        // the arena fall out of scope. Without Drop, the segment boxes
-        // would leak; we rely on Miri (or valgrind) to catch that.
-        // Here we at least exercise the code path.
-        let arena: AtomicPrimitiveArena<u64> = AtomicPrimitiveArena::new();
-        for i in 0..(SEGMENT_SIZE * 3 + 7) {
-            arena.reserve(i as u64);
-        }
-        drop(arena);
-    }
-
-    #[test]
-    fn generic_reserve_starts_none_and_write_populates() {
-        let arena: GenericArena<String> = GenericArena::new();
-        let slot = arena.reserve();
-        assert_eq!(slot, 0);
-        arena.write(slot, "hello".to_string());
-        assert_eq!(arena.read(slot), "hello");
-    }
-
-    #[test]
-    fn generic_reserve_with_initializes_immediately() {
-        let arena: GenericArena<String> = GenericArena::new();
-        let slot = arena.reserve_with("world".to_string());
-        assert_eq!(arena.read(slot), "world");
-        assert_eq!(arena.len(), 1);
-    }
-
-    #[test]
-    fn generic_write_overwrites_and_drops_old_value() {
-        // Use a struct that tracks its drops, so we can verify that
-        // overwriting a Some(old) with Some(new) runs old's destructor.
-        use std::sync::atomic::AtomicUsize;
-        static DROPS: AtomicUsize = AtomicUsize::new(0);
-        #[derive(Clone)]
-        struct DropCounter(#[allow(dead_code)] u64);
-        impl Drop for DropCounter {
-            fn drop(&mut self) {
-                DROPS.fetch_add(1, Ordering::SeqCst);
-            }
-        }
-
-        DROPS.store(0, Ordering::SeqCst);
-        let arena: GenericArena<DropCounter> = GenericArena::new();
-        let slot = arena.reserve_with(DropCounter(1));
-        // One DropCounter briefly existed as the argument to reserve_with,
-        // but the value is moved into the slot so its destructor is not
-        // called yet.
-        assert_eq!(DROPS.load(Ordering::SeqCst), 0);
-
-        arena.write(slot, DropCounter(2));
-        // The write replaced Some(DropCounter(1)) with Some(DropCounter(2));
-        // the old DropCounter(1) was dropped as part of the Option
-        // assignment.
-        assert_eq!(DROPS.load(Ordering::SeqCst), 1);
-
-        arena.write(slot, DropCounter(3));
-        assert_eq!(DROPS.load(Ordering::SeqCst), 2);
-
-        drop(arena);
-        // The arena's Drop runs DropCounter(3).
-        assert_eq!(DROPS.load(Ordering::SeqCst), 3);
-    }
-
-    #[test]
-    fn generic_reservations_span_segment_boundary() {
-        let arena: GenericArena<Vec<u32>> = GenericArena::new();
-        let count = SEGMENT_SIZE + 25;
-        let mut slots = Vec::with_capacity(count);
-        for i in 0..count {
-            slots.push(arena.reserve_with(vec![i as u32, (i as u32) * 2]));
-        }
-        for (i, slot) in slots.into_iter().enumerate() {
-            let v = arena.read(slot);
-            assert_eq!(v, vec![i as u32, (i as u32) * 2]);
-        }
-    }
-
-    #[test]
-    fn generic_read_clones_independent_copies() {
-        // Mutating the cloned value must not affect the arena's copy.
-        let arena: GenericArena<Vec<u32>> = GenericArena::new();
-        let slot = arena.reserve_with(vec![1, 2, 3]);
-        let mut clone_a = arena.read(slot);
-        clone_a.push(999);
-        let clone_b = arena.read(slot);
-        assert_eq!(clone_b, vec![1, 2, 3]);
-        assert_eq!(clone_a, vec![1, 2, 3, 999]);
-    }
-
-    #[test]
-    fn generic_arena_drops_all_initialized_slots() {
-        // Cross several segments with values that track Drop, then let
-        // the arena fall out of scope and verify every value was dropped
-        // exactly once.
-        use std::sync::atomic::AtomicUsize;
-        static DROPS: AtomicUsize = AtomicUsize::new(0);
-        #[derive(Clone)]
-        struct Tracked;
-        impl Drop for Tracked {
-            fn drop(&mut self) {
-                DROPS.fetch_add(1, Ordering::SeqCst);
-            }
-        }
-
-        DROPS.store(0, Ordering::SeqCst);
-        let arena: GenericArena<Tracked> = GenericArena::new();
-        let populated = SEGMENT_SIZE + 10;
-        for _ in 0..populated {
-            arena.reserve_with(Tracked);
-        }
-        // Also reserve some slots that remain uninitialized (None).
-        // Their Drop should not count, because Option::None has no T.
-        for _ in 0..7 {
-            arena.reserve();
-        }
-        drop(arena);
-        assert_eq!(DROPS.load(Ordering::SeqCst), populated);
-    }
-
-    #[test]
-    fn generic_erased_type_id_and_downcast() {
-        let arena: Box<dyn ErasedArena> = Box::new(GenericArena::<String>::new());
-        assert_eq!(arena.erased_type_id(), TypeId::of::<String>());
-        let concrete = arena
-            .as_any()
-            .downcast_ref::<GenericArena<String>>()
-            .expect("downcast to GenericArena<String>");
-        let slot = concrete.reserve_with("downcast".to_string());
-        assert_eq!(concrete.read(slot), "downcast");
-    }
-
-    #[test]
-    fn generic_concurrent_readers_see_stable_value() {
-        // Many readers clone the same slot in parallel while we hold
-        // the value constant. This exercises the Sync impl and the
-        // read path's non-atomic access under a shared reference.
-        // Correctness for concurrent read-and-write is the state
-        // machine's job, not the arena's; here we only verify that
-        // concurrent reads of a stable value work.
-        const READERS: usize = 16;
-        const ITERS: usize = 10_000;
-
-        let arena: Arc<GenericArena<String>> = Arc::new(GenericArena::new());
-        let slot = arena.reserve_with("stable".to_string());
-
-        let handles: Vec<_> = (0..READERS)
-            .map(|_| {
-                let arena = arena.clone();
-                thread::spawn(move || {
-                    for _ in 0..ITERS {
-                        assert_eq!(arena.read(slot), "stable");
-                    }
-                })
-            })
-            .collect();
-        for h in handles {
-            h.join().unwrap();
-        }
-    }
-
-    #[test]
-    fn generic_concurrent_reservers_get_unique_indices() {
-        // Same invariant as the primitive arena: concurrent reserve()
-        // hands out a dense range of unique indices. This exercises
-        // the segment-allocation CAS on the GenericArena path.
-        const THREADS: usize = 16;
-        const PER_THREAD: usize = 1500;
-
-        let arena: Arc<GenericArena<u64>> = Arc::new(GenericArena::new());
-        let handles: Vec<_> = (0..THREADS)
-            .map(|tid| {
-                let arena = arena.clone();
-                thread::spawn(move || {
-                    let mut mine = Vec::with_capacity(PER_THREAD);
-                    for i in 0..PER_THREAD {
-                        let v = (tid as u64) * 1_000_000 + i as u64;
-                        let slot = arena.reserve_with(v);
-                        mine.push((slot, v));
-                    }
-                    mine
-                })
-            })
-            .collect();
-
-        let mut all: Vec<(u32, u64)> = handles
-            .into_iter()
-            .flat_map(|h| h.join().unwrap())
-            .collect();
-        all.sort_by_key(|(slot, _)| *slot);
-
-        assert_eq!(all.len(), THREADS * PER_THREAD);
-        for (i, (slot, v)) in all.iter().enumerate() {
-            assert_eq!(*slot as usize, i);
-            assert_eq!(arena.read(*slot), *v);
-        }
-    }
-}
diff --git a/crates/incr-concurrent/src/collection.rs b/crates/incr-concurrent/src/collection.rs
deleted file mode 100644
index c8055c2..0000000
--- a/crates/incr-concurrent/src/collection.rs
+++ /dev/null
@@ -1,1012 +0,0 @@
-use std::collections::{HashMap, HashSet};
-use std::hash::Hash;
-use std::sync::atomic::{AtomicU64, AtomicUsize, Ordering};
-use std::sync::{Arc, RwLock};
-
-use super::handle::Incr;
-use super::runtime::Runtime;
-
-#[derive(Clone, Debug)]
-pub enum Delta<T> {
-    Insert(T),
-    Delete(T),
-}
-
-#[derive(Clone, Debug)]
-pub(crate) struct VersionedDelta<T> {
-    #[allow(dead_code)]
-    pub version: u64,
-    pub delta: Delta<T>,
-}
-
-pub(crate) struct CollectionLog<T: Clone + Hash + Eq> {
-    pub elements: HashMap<T, usize>,
-    pub deltas: Vec<VersionedDelta<T>>,
-    pub version: u64,
-    multiset: bool,
-}
-
-impl<T: Clone + Hash + Eq> CollectionLog<T> {
-    pub fn new() -> Self {
-        CollectionLog {
-            elements: HashMap::new(),
-            deltas: Vec::new(),
-            version: 0,
-            multiset: false,
-        }
-    }
-
-    pub fn new_multiset() -> Self {
-        CollectionLog {
-            elements: HashMap::new(),
-            deltas: Vec::new(),
-            version: 0,
-            multiset: true,
-        }
-    }
-
-    pub fn insert(&mut self, value: T) -> bool {
-        if self.multiset {
-            let count = self.elements.entry(value.clone()).or_insert(0);
-            *count += 1;
-            self.version += 1;
-            self.deltas.push(VersionedDelta {
-                version: self.version,
-                delta: Delta::Insert(value),
-            });
-            true
-        } else {
-            let count = self.elements.entry(value.clone()).or_insert(0);
-            if *count == 0 {
-                *count = 1;
-                self.version += 1;
-                self.deltas.push(VersionedDelta {
-                    version: self.version,
-                    delta: Delta::Insert(value),
-                });
-                true
-            } else {
-                false
-            }
-        }
-    }
-
-    pub fn delete(&mut self, value: &T) -> bool {
-        if self.multiset {
-            if let Some(count) = self.elements.get_mut(value) {
-                *count -= 1;
-                self.version += 1;
-                self.deltas.push(VersionedDelta {
-                    version: self.version,
-                    delta: Delta::Delete(value.clone()),
-                });
-                if *count == 0 {
-                    self.elements.remove(value);
-                }
-                true
-            } else {
-                false
-            }
-        } else if self.elements.remove(value).is_some() {
-            self.version += 1;
-            self.deltas.push(VersionedDelta {
-                version: self.version,
-                delta: Delta::Delete(value.clone()),
-            });
-            true
-        } else {
-            false
-        }
-    }
-
-    pub fn distinct_elements(&self) -> HashSet<T> {
-        self.elements.keys().cloned().collect()
-    }
-
-    pub fn elements_vec(&self) -> Vec<T> {
-        self.elements
-            .iter()
-            .flat_map(|(v, &count)| std::iter::repeat_n(v.clone(), count))
-            .collect()
-    }
-}
-
-pub struct IncrCollection<T>
-where
-    T: Clone + PartialEq + Eq + Hash + Send + Sync + 'static,
-{
-    pub(crate) log: Arc<RwLock<CollectionLog<T>>>,
-    pub(crate) version_node: Incr<u64>,
-}
-
-impl<T> Clone for IncrCollection<T>
-where
-    T: Clone + PartialEq + Eq + Hash + Send + Sync + 'static,
-{
-    fn clone(&self) -> Self {
-        IncrCollection {
-            log: self.log.clone(),
-            version_node: self.version_node,
-        }
-    }
-}
-
-/// A raw pointer wrapper that is `Send + Sync`.
-///
-/// # Safety
-/// The caller must ensure the pointer is only dereferenced during
-/// stabilization, where the Runtime is alive and node execution is
-/// single-threaded per-node (guaranteed by the Computing CAS).
-struct SendSyncPtr<T>(*const T);
-// Manual Copy/Clone impls to avoid the implicit `T: Copy` bound that #[derive] generates.
-impl<T> Copy for SendSyncPtr<T> {}
-impl<T> Clone for SendSyncPtr<T> {
-    fn clone(&self) -> Self {
-        *self
-    }
-}
-unsafe impl<T> Send for SendSyncPtr<T> {}
-unsafe impl<T> Sync for SendSyncPtr<T> {}
-impl<T> SendSyncPtr<T> {
-    /// # Safety
-    /// Caller must ensure the pointee is alive and no mutable alias exists.
-    unsafe fn as_ref(&self) -> &T {
-        &*self.0
-    }
-}
-
-pub struct GroupedCollection<K, T>
-where
-    K: Clone + PartialEq + Eq + Hash + Send + Sync + 'static,
-    T: Clone + PartialEq + Eq + Hash + Send + Sync + 'static,
-{
-    pub(crate) groups: Arc<RwLock<HashMap<K, IncrCollection<T>>>>,
-    pub(crate) version_node: Incr<u64>,
-    #[allow(dead_code)]
-    rt_ptr: *const Runtime,
-}
-
-// SAFETY: All fields except rt_ptr are Send+Sync. rt_ptr is only
-// dereferenced inside compute closures during stabilization (which
-// is single-threaded per-node by the state machine's Computing CAS).
-unsafe impl<K, T> Send for GroupedCollection<K, T>
-where
-    K: Clone + PartialEq + Eq + Hash + Send + Sync + 'static,
-    T: Clone + PartialEq + Eq + Hash + Send + Sync + 'static,
-{
-}
-unsafe impl<K, T> Sync for GroupedCollection<K, T>
-where
-    K: Clone + PartialEq + Eq + Hash + Send + Sync + 'static,
-    T: Clone + PartialEq + Eq + Hash + Send + Sync + 'static,
-{
-}
-
-impl<K, T> GroupedCollection<K, T>
-where
-    K: Clone + PartialEq + Eq + Hash + Send + Sync + 'static,
-    T: Clone + PartialEq + Eq + Hash + Send + Sync + 'static,
-{
-    pub fn keys(&self) -> Vec<K> {
-        self.groups.read().unwrap().keys().cloned().collect()
-    }
-
-    pub fn get_group(&self, key: &K) -> Option<IncrCollection<T>> {
-        self.groups.read().unwrap().get(key).cloned()
-    }
-
-    pub fn version_node(&self) -> Incr<u64> {
-        self.version_node
-    }
-}
-
-impl<T> IncrCollection<T>
-where
-    T: Clone + PartialEq + Eq + Hash + Send + Sync + 'static,
-{
-    pub fn insert(&self, rt: &Runtime, value: T) {
-        let ver = {
-            let mut log = self.log.write().unwrap();
-            if log.insert(value) {
-                Some(log.version)
-            } else {
-                None
-            }
-        };
-        if let Some(v) = ver {
-            rt.set(self.version_node, v);
-        }
-    }
-
-    pub fn delete(&self, rt: &Runtime, value: &T) {
-        let ver = {
-            let mut log = self.log.write().unwrap();
-            if log.delete(value) {
-                Some(log.version)
-            } else {
-                None
-            }
-        };
-        if let Some(v) = ver {
-            rt.set(self.version_node, v);
-        }
-    }
-
-    pub fn elements(&self) -> HashSet<T> {
-        self.log.read().unwrap().distinct_elements()
-    }
-
-    pub fn version_node(&self) -> Incr<u64> {
-        self.version_node
-    }
-
-    pub fn filter<F>(&self, rt: &Runtime, predicate: F) -> IncrCollection<T>
-    where
-        F: Fn(&T) -> bool + Send + Sync + 'static,
-    {
-        let upstream_log = self.log.clone();
-        let output_log = Arc::new(RwLock::new(CollectionLog::new()));
-        let output_log_ref = output_log.clone();
-        let last_idx = Arc::new(AtomicUsize::new(0));
-        let upstream_ver = self.version_node;
-
-        let version_node = rt.create_query(move |rt| -> u64 {
-            let _upstream_v = rt.get(upstream_ver);
-
-            let upstream = upstream_log.read().unwrap();
-            let start = last_idx.load(Ordering::Relaxed);
-            if start >= upstream.deltas.len() {
-                return output_log_ref.read().unwrap().version;
-            }
-
-            let mut output = output_log_ref.write().unwrap();
-
-            for vd in &upstream.deltas[start..] {
-                match &vd.delta {
-                    Delta::Insert(x) => {
-                        if predicate(x) {
-                            output.insert(x.clone());
-                        }
-                    }
-                    Delta::Delete(x) => {
-                        if predicate(x) {
-                            output.delete(x);
-                        }
-                    }
-                }
-            }
-
-            last_idx.store(upstream.deltas.len(), Ordering::Relaxed);
-            output.version
-        });
-
-        IncrCollection {
-            log: output_log,
-            version_node,
-        }
-    }
-
-    pub fn map<U, F>(&self, rt: &Runtime, f: F) -> IncrCollection<U>
-    where
-        U: Clone + PartialEq + Eq + Hash + Send + Sync + 'static,
-        F: Fn(&T) -> U + Send + Sync + 'static,
-    {
-        let upstream_log = self.log.clone();
-        let output_log = Arc::new(RwLock::new(CollectionLog::new_multiset()));
-        let output_log_ref = output_log.clone();
-        let last_idx = Arc::new(AtomicUsize::new(0));
-        let mapping: Arc<RwLock<HashMap<T, U>>> = Arc::new(RwLock::new(HashMap::new()));
-        let mapping_ref = mapping.clone();
-        let upstream_ver = self.version_node;
-
-        let version_node = rt.create_query(move |rt| -> u64 {
-            let _upstream_v = rt.get(upstream_ver);
-
-            let upstream = upstream_log.read().unwrap();
-            let start = last_idx.load(Ordering::Relaxed);
-            if start >= upstream.deltas.len() {
-                return output_log_ref.read().unwrap().version;
-            }
-
-            let mut output = output_log_ref.write().unwrap();
-            let mut map_state = mapping_ref.write().unwrap();
-
-            for vd in &upstream.deltas[start..] {
-                match &vd.delta {
-                    Delta::Insert(x) => {
-                        let y = f(x);
-                        map_state.insert(x.clone(), y.clone());
-                        output.insert(y);
-                    }
-                    Delta::Delete(x) => {
-                        if let Some(y) = map_state.remove(x) {
-                            output.delete(&y);
-                        }
-                    }
-                }
-            }
-
-            last_idx.store(upstream.deltas.len(), Ordering::Relaxed);
-            output.version
-        });
-
-        IncrCollection {
-            log: output_log,
-            version_node,
-        }
-    }
-
-    pub fn count(&self, rt: &Runtime) -> Incr<u64> {
-        let upstream_log = self.log.clone();
-        let upstream_ver = self.version_node;
-        let current_count = Arc::new(AtomicUsize::new(0));
-        let count_ref = current_count.clone();
-        let last_idx = Arc::new(AtomicUsize::new(0));
-
-        rt.create_query(move |rt| -> u64 {
-            let _upstream_v = rt.get(upstream_ver);
-
-            let upstream = upstream_log.read().unwrap();
-            let start = last_idx.load(Ordering::Relaxed);
-            if start >= upstream.deltas.len() {
-                return count_ref.load(Ordering::Relaxed) as u64;
-            }
-
-            let mut count = count_ref.load(Ordering::Relaxed);
-
-            for vd in &upstream.deltas[start..] {
-                match &vd.delta {
-                    Delta::Insert(_) => count += 1,
-                    Delta::Delete(_) => count -= 1,
-                }
-            }
-
-            last_idx.store(upstream.deltas.len(), Ordering::Relaxed);
-            count_ref.store(count, Ordering::Relaxed);
-            count as u64
-        })
-    }
-
-    pub fn reduce<A, F>(&self, rt: &Runtime, fold_fn: F) -> Incr<A>
-    where
-        A: super::value::Value,
-        F: Fn(&Vec<T>) -> A + Send + Sync + 'static,
-    {
-        let upstream_log = self.log.clone();
-        let upstream_ver = self.version_node;
-        let last_idx = Arc::new(AtomicUsize::new(0));
-
-        rt.create_query(move |rt| -> A {
-            let _upstream_v = rt.get(upstream_ver);
-
-            let upstream = upstream_log.read().unwrap();
-            let start = last_idx.load(Ordering::Relaxed);
-            if start >= upstream.deltas.len() {
-                let elems = upstream.elements_vec();
-                return fold_fn(&elems);
-            }
-
-            last_idx.store(upstream.deltas.len(), Ordering::Relaxed);
-            let elems = upstream.elements_vec();
-            fold_fn(&elems)
-        })
-    }
-
-    pub fn group_by<K, F>(&self, rt: &Runtime, key_fn: F) -> GroupedCollection<K, T>
-    where
-        K: Clone + PartialEq + Eq + Hash + Send + Sync + 'static,
-        F: Fn(&T) -> K + Send + Sync + 'static,
-    {
-        let upstream_log = self.log.clone();
-        let upstream_ver = self.version_node;
-        let last_idx = Arc::new(AtomicUsize::new(0));
-        let groups: Arc<RwLock<HashMap<K, IncrCollection<T>>>> =
-            Arc::new(RwLock::new(HashMap::new()));
-        let groups_ref = groups.clone();
-        let key_cache: Arc<RwLock<HashMap<T, K>>> = Arc::new(RwLock::new(HashMap::new()));
-        let key_cache_ref = key_cache.clone();
-        let rt_ptr = SendSyncPtr(rt as *const Runtime);
-
-        let version_counter = Arc::new(AtomicU64::new(0));
-        let version_counter_ref = version_counter.clone();
-
-        let version_node = rt.create_query(move |_rt| -> u64 {
-            let rt = unsafe { rt_ptr.as_ref() };
-            let _upstream_v = rt.get(upstream_ver);
-
-            let upstream = upstream_log.read().unwrap();
-            let start = last_idx.load(Ordering::Relaxed);
-            if start >= upstream.deltas.len() {
-                return version_counter_ref.load(Ordering::Relaxed);
-            }
-
-            let mut grps = groups_ref.write().unwrap();
-            let mut kc = key_cache_ref.write().unwrap();
-
-            for vd in &upstream.deltas[start..] {
-                match &vd.delta {
-                    Delta::Insert(x) => {
-                        let k = key_fn(x);
-                        kc.insert(x.clone(), k.clone());
-                        let group = grps.entry(k).or_insert_with(|| rt.create_collection::<T>());
-                        let ver = {
-                            let mut log = group.log.write().unwrap();
-                            log.insert(x.clone());
-                            log.version
-                        };
-                        rt.set(group.version_node, ver);
-                    }
-                    Delta::Delete(x) => {
-                        if let Some(k) = kc.remove(x) {
-                            if let Some(group) = grps.get(&k) {
-                                let ver = {
-                                    let mut log = group.log.write().unwrap();
-                                    log.delete(x);
-                                    log.version
-                                };
-                                rt.set(group.version_node, ver);
-                            }
-                        }
-                    }
-                }
-            }
-
-            last_idx.store(upstream.deltas.len(), Ordering::Relaxed);
-            version_counter_ref.fetch_add(1, Ordering::Relaxed) + 1
-        });
-
-        GroupedCollection {
-            groups,
-            version_node,
-            rt_ptr: rt_ptr.0,
-        }
-    }
-
-    pub fn join<U, K, FL, FR>(
-        &self,
-        rt: &Runtime,
-        right: &IncrCollection<U>,
-        left_key: FL,
-        right_key: FR,
-    ) -> IncrCollection<(T, U)>
-    where
-        U: Clone + PartialEq + Eq + Hash + Send + Sync + 'static,
-        K: Clone + PartialEq + Eq + Hash + Send + Sync + 'static,
-        FL: Fn(&T) -> K + Send + Sync + 'static,
-        FR: Fn(&U) -> K + Send + Sync + 'static,
-    {
-        let left_log = self.log.clone();
-        let right_log = right.log.clone();
-        let left_ver = self.version_node;
-        let right_ver = right.version_node;
-        let left_last = Arc::new(AtomicUsize::new(0));
-        let right_last = Arc::new(AtomicUsize::new(0));
-
-        let left_index: Arc<RwLock<HashMap<K, Vec<T>>>> = Arc::new(RwLock::new(HashMap::new()));
-        let right_index: Arc<RwLock<HashMap<K, Vec<U>>>> = Arc::new(RwLock::new(HashMap::new()));
-        let left_key_cache: Arc<RwLock<HashMap<T, K>>> = Arc::new(RwLock::new(HashMap::new()));
-        let right_key_cache: Arc<RwLock<HashMap<U, K>>> = Arc::new(RwLock::new(HashMap::new()));
-
-        let left_idx_ref = left_index.clone();
-        let right_idx_ref = right_index.clone();
-        let left_kc_ref = left_key_cache.clone();
-        let right_kc_ref = right_key_cache.clone();
-
-        let output_log = Arc::new(RwLock::new(CollectionLog::new_multiset()));
-        let output_log_ref = output_log.clone();
-
-        let version_node = rt.create_query(move |rt| -> u64 {
-            let _lv = rt.get(left_ver);
-            let _rv = rt.get(right_ver);
-
-            let left_up = left_log.read().unwrap();
-            let right_up = right_log.read().unwrap();
-            let l_start = left_last.load(Ordering::Relaxed);
-            let r_start = right_last.load(Ordering::Relaxed);
-
-            if l_start >= left_up.deltas.len() && r_start >= right_up.deltas.len() {
-                return output_log_ref.read().unwrap().version;
-            }
-
-            let mut li = left_idx_ref.write().unwrap();
-            let mut ri = right_idx_ref.write().unwrap();
-            let mut lkc = left_kc_ref.write().unwrap();
-            let mut rkc = right_kc_ref.write().unwrap();
-            let mut output = output_log_ref.write().unwrap();
-
-            // Process left deltas
-            for vd in &left_up.deltas[l_start..] {
-                match &vd.delta {
-                    Delta::Insert(x) => {
-                        let k = left_key(x);
-                        lkc.insert(x.clone(), k.clone());
-                        li.entry(k.clone()).or_default().push(x.clone());
-                        if let Some(rights) = ri.get(&k) {
-                            for r in rights {
-                                output.insert((x.clone(), r.clone()));
-                            }
-                        }
-                    }
-                    Delta::Delete(x) => {
-                        if let Some(k) = lkc.remove(x) {
-                            if let Some(lefts) = li.get_mut(&k) {
-                                lefts.retain(|l| l != x);
-                            }
-                            if let Some(rights) = ri.get(&k) {
-                                for r in rights {
-                                    output.delete(&(x.clone(), r.clone()));
-                                }
-                            }
-                        }
-                    }
-                }
-            }
-
-            // Process right deltas
-            for vd in &right_up.deltas[r_start..] {
-                match &vd.delta {
-                    Delta::Insert(x) => {
-                        let k = right_key(x);
-                        rkc.insert(x.clone(), k.clone());
-                        ri.entry(k.clone()).or_default().push(x.clone());
-                        if let Some(lefts) = li.get(&k) {
-                            for l in lefts {
-                                output.insert((l.clone(), x.clone()));
-                            }
-                        }
-                    }
-                    Delta::Delete(x) => {
-                        if let Some(k) = rkc.remove(x) {
-                            if let Some(rights) = ri.get_mut(&k) {
-                                rights.retain(|r| r != x);
-                            }
-                            if let Some(lefts) = li.get(&k) {
-                                for l in lefts {
-                                    output.delete(&(l.clone(), x.clone()));
-                                }
-                            }
-                        }
-                    }
-                }
-            }
-
-            left_last.store(left_up.deltas.len(), Ordering::Relaxed);
-            right_last.store(right_up.deltas.len(), Ordering::Relaxed);
-            output.version
-        });
-
-        IncrCollection {
-            log: output_log,
-            version_node,
-        }
-    }
-}
-
-impl Runtime {
-    pub fn create_collection<T>(&self) -> IncrCollection<T>
-    where
-        T: Clone + PartialEq + Eq + Hash + Send + Sync + 'static,
-    {
-        let log = Arc::new(RwLock::new(CollectionLog::new()));
-        let version_node = self.create_input::<u64>(0);
-        IncrCollection { log, version_node }
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn log_insert() {
-        let mut log = CollectionLog::new();
-        assert!(log.insert(1_i64));
-        assert_eq!(log.elements.len(), 1);
-        assert_eq!(log.version, 1);
-        assert_eq!(log.deltas.len(), 1);
-    }
-
-    #[test]
-    fn log_insert_duplicate_is_noop() {
-        let mut log = CollectionLog::new();
-        assert!(log.insert(1_i64));
-        assert!(!log.insert(1_i64));
-        assert_eq!(log.version, 1);
-    }
-
-    #[test]
-    fn log_delete() {
-        let mut log = CollectionLog::new();
-        log.insert(1_i64);
-        assert!(log.delete(&1));
-        assert_eq!(log.elements.len(), 0);
-        assert_eq!(log.version, 2);
-    }
-
-    #[test]
-    fn log_delete_missing_is_noop() {
-        let mut log: CollectionLog<i64> = CollectionLog::new();
-        assert!(!log.delete(&1));
-    }
-
-    #[test]
-    fn log_multiset_allows_duplicates() {
-        let mut log = CollectionLog::new_multiset();
-        assert!(log.insert(1_i64));
-        assert!(log.insert(1_i64));
-        assert_eq!(*log.elements.get(&1).unwrap(), 2);
-        assert_eq!(log.version, 2);
-    }
-
-    #[test]
-    fn create_and_insert() {
-        let rt = Runtime::new();
-        let col = rt.create_collection::<i64>();
-        col.insert(&rt, 1);
-        col.insert(&rt, 2);
-        col.insert(&rt, 3);
-        assert_eq!(col.log.read().unwrap().elements.len(), 3);
-    }
-
-    #[test]
-    fn insert_bumps_version_node() {
-        let rt = Runtime::new();
-        let col = rt.create_collection::<i64>();
-        assert_eq!(rt.get(col.version_node), 0);
-        col.insert(&rt, 1);
-        assert_eq!(rt.get(col.version_node), 1);
-        col.insert(&rt, 2);
-        assert_eq!(rt.get(col.version_node), 2);
-    }
-
-    #[test]
-    fn delete_bumps_version_node() {
-        let rt = Runtime::new();
-        let col = rt.create_collection::<i64>();
-        col.insert(&rt, 1);
-        col.insert(&rt, 2);
-        col.delete(&rt, &1);
-        assert_eq!(rt.get(col.version_node), 3);
-    }
-
-    #[test]
-    fn duplicate_insert_no_version_bump() {
-        let rt = Runtime::new();
-        let col = rt.create_collection::<i64>();
-        col.insert(&rt, 1);
-        assert_eq!(rt.get(col.version_node), 1);
-        col.insert(&rt, 1);
-        assert_eq!(rt.get(col.version_node), 1);
-    }
-
-    #[test]
-    fn elements_returns_distinct_set() {
-        let rt = Runtime::new();
-        let col = rt.create_collection::<i64>();
-        col.insert(&rt, 1);
-        col.insert(&rt, 2);
-        col.insert(&rt, 3);
-        let elems = col.elements();
-        assert_eq!(elems.len(), 3);
-        assert!(elems.contains(&1));
-        assert!(elems.contains(&2));
-        assert!(elems.contains(&3));
-    }
-
-    #[test]
-    fn filter_basic() {
-        let rt = Runtime::new();
-        let col = rt.create_collection::<i64>();
-        let evens = col.filter(&rt, |x| x % 2 == 0);
-
-        col.insert(&rt, 1);
-        col.insert(&rt, 2);
-        col.insert(&rt, 3);
-        col.insert(&rt, 4);
-
-        let _ = rt.get(evens.version_node);
-        assert_eq!(evens.log.read().unwrap().elements.len(), 2);
-    }
-
-    #[test]
-    fn filter_incremental_insert() {
-        let rt = Runtime::new();
-        let col = rt.create_collection::<i64>();
-        let evens = col.filter(&rt, |x| x % 2 == 0);
-
-        col.insert(&rt, 2);
-        let _ = rt.get(evens.version_node);
-        assert_eq!(evens.log.read().unwrap().elements.len(), 1);
-
-        col.insert(&rt, 4);
-        let _ = rt.get(evens.version_node);
-        assert_eq!(evens.log.read().unwrap().elements.len(), 2);
-
-        col.insert(&rt, 3);
-        let _ = rt.get(evens.version_node);
-        assert_eq!(evens.log.read().unwrap().elements.len(), 2);
-    }
-
-    #[test]
-    fn filter_incremental_delete() {
-        let rt = Runtime::new();
-        let col = rt.create_collection::<i64>();
-        let evens = col.filter(&rt, |x| x % 2 == 0);
-
-        col.insert(&rt, 2);
-        col.insert(&rt, 4);
-        let _ = rt.get(evens.version_node);
-        assert_eq!(evens.log.read().unwrap().elements.len(), 2);
-
-        col.delete(&rt, &2);
-        let _ = rt.get(evens.version_node);
-        assert_eq!(evens.log.read().unwrap().elements.len(), 1);
-    }
-
-    #[test]
-    fn filter_chained() {
-        let rt = Runtime::new();
-        let col = rt.create_collection::<i64>();
-        let positive = col.filter(&rt, |x| *x > 0);
-        let small = positive.filter(&rt, |x| *x < 10);
-
-        col.insert(&rt, -5);
-        col.insert(&rt, 3);
-        col.insert(&rt, 15);
-        col.insert(&rt, 7);
-
-        let _ = rt.get(small.version_node);
-        assert_eq!(small.log.read().unwrap().elements.len(), 2);
-    }
-
-    #[test]
-    fn map_basic() {
-        let rt = Runtime::new();
-        let col = rt.create_collection::<i64>();
-        let doubled = col.map(&rt, |x| x * 2);
-
-        col.insert(&rt, 1);
-        col.insert(&rt, 2);
-        col.insert(&rt, 3);
-
-        let _ = rt.get(doubled.version_node);
-        let elements = doubled.log.read().unwrap().elements_vec();
-        assert_eq!(elements.len(), 3);
-        assert!(elements.contains(&2));
-        assert!(elements.contains(&4));
-        assert!(elements.contains(&6));
-    }
-
-    #[test]
-    fn map_delete_propagates() {
-        let rt = Runtime::new();
-        let col = rt.create_collection::<i64>();
-        let doubled = col.map(&rt, |x| x * 2);
-
-        col.insert(&rt, 1);
-        col.insert(&rt, 2);
-        let _ = rt.get(doubled.version_node);
-        assert_eq!(doubled.log.read().unwrap().elements.len(), 2);
-
-        col.delete(&rt, &1);
-        let _ = rt.get(doubled.version_node);
-        assert_eq!(doubled.log.read().unwrap().elements.len(), 1);
-        assert!(doubled.log.read().unwrap().elements.contains_key(&4));
-    }
-
-    #[test]
-    fn filter_then_map() {
-        let rt = Runtime::new();
-        let col = rt.create_collection::<i64>();
-        let evens = col.filter(&rt, |x| x % 2 == 0);
-        let doubled = evens.map(&rt, |x| x * 2);
-
-        col.insert(&rt, 1);
-        col.insert(&rt, 2);
-        col.insert(&rt, 3);
-        col.insert(&rt, 4);
-
-        let _ = rt.get(doubled.version_node);
-        let elements = doubled.log.read().unwrap().elements_vec();
-        assert_eq!(elements.len(), 2);
-        assert!(elements.contains(&4));
-        assert!(elements.contains(&8));
-    }
-
-    #[test]
-    fn count_basic() {
-        let rt = Runtime::new();
-        let col = rt.create_collection::<i64>();
-        let count = col.count(&rt);
-
-        assert_eq!(rt.get(count), 0);
-        col.insert(&rt, 1);
-        assert_eq!(rt.get(count), 1);
-        col.insert(&rt, 2);
-        assert_eq!(rt.get(count), 2);
-        col.delete(&rt, &1);
-        assert_eq!(rt.get(count), 1);
-    }
-
-    #[test]
-    fn count_after_filter() {
-        let rt = Runtime::new();
-        let col = rt.create_collection::<i64>();
-        let evens = col.filter(&rt, |x| x % 2 == 0);
-        let count = evens.count(&rt);
-
-        col.insert(&rt, 1);
-        col.insert(&rt, 2);
-        col.insert(&rt, 3);
-        col.insert(&rt, 4);
-        assert_eq!(rt.get(count), 2);
-
-        col.insert(&rt, 6);
-        assert_eq!(rt.get(count), 3);
-    }
-
-    #[test]
-    fn count_early_cutoff() {
-        use std::sync::atomic::AtomicU32;
-
-        let rt = Runtime::new();
-        let col = rt.create_collection::<i64>();
-        let evens = col.filter(&rt, |x| x % 2 == 0);
-        let count = evens.count(&rt);
-
-        let downstream_count = Arc::new(AtomicU32::new(0));
-        let dc = downstream_count.clone();
-        let label = rt.create_query(move |rt| {
-            dc.fetch_add(1, Ordering::Relaxed);
-            format!("{} evens", rt.get(count))
-        });
-
-        col.insert(&rt, 2);
-        assert_eq!(rt.get(label), "1 evens");
-        assert_eq!(downstream_count.load(Ordering::Relaxed), 1);
-
-        col.insert(&rt, 3); // odd, count unchanged
-        assert_eq!(rt.get(label), "1 evens");
-        assert_eq!(downstream_count.load(Ordering::Relaxed), 1); // early cutoff
-    }
-
-    #[test]
-    fn reduce_sum() {
-        let rt = Runtime::new();
-        let col = rt.create_collection::<i64>();
-        let sum = col.reduce(&rt, |elements| -> i64 { elements.iter().sum() });
-
-        assert_eq!(rt.get(sum), 0);
-        col.insert(&rt, 10);
-        assert_eq!(rt.get(sum), 10);
-        col.insert(&rt, 20);
-        assert_eq!(rt.get(sum), 30);
-        col.delete(&rt, &10);
-        assert_eq!(rt.get(sum), 20);
-    }
-
-    #[test]
-    fn reduce_max() {
-        let rt = Runtime::new();
-        let col = rt.create_collection::<i64>();
-        let max = col.reduce(&rt, |elements| -> Option<i64> {
-            elements.iter().copied().max()
-        });
-
-        assert_eq!(rt.get(max), None);
-        col.insert(&rt, 5);
-        assert_eq!(rt.get(max), Some(5));
-        col.insert(&rt, 3);
-        assert_eq!(rt.get(max), Some(5));
-        col.insert(&rt, 8);
-        assert_eq!(rt.get(max), Some(8));
-        col.delete(&rt, &8);
-        assert_eq!(rt.get(max), Some(5));
-    }
-
-    #[test]
-    fn reduce_after_filter() {
-        let rt = Runtime::new();
-        let col = rt.create_collection::<i64>();
-        let evens = col.filter(&rt, |x| x % 2 == 0);
-        let sum = evens.reduce(&rt, |elements| -> i64 { elements.iter().sum() });
-
-        col.insert(&rt, 1);
-        col.insert(&rt, 2);
-        col.insert(&rt, 3);
-        col.insert(&rt, 4);
-        assert_eq!(rt.get(sum), 6);
-    }
-
-    #[test]
-    fn group_by_basic() {
-        let rt = Runtime::new();
-        let col = rt.create_collection::<(String, i64)>();
-        let grouped = col.group_by(&rt, |x: &(String, i64)| x.0.clone());
-
-        col.insert(&rt, ("a".to_string(), 1));
-        col.insert(&rt, ("b".to_string(), 2));
-        col.insert(&rt, ("a".to_string(), 3));
-
-        let _ = rt.get(grouped.version_node);
-        let groups = grouped.groups.read().unwrap();
-        assert_eq!(groups.len(), 2);
-        assert_eq!(groups.get("a").unwrap().elements().len(), 2);
-        assert_eq!(groups.get("b").unwrap().elements().len(), 1);
-    }
-
-    #[test]
-    fn group_by_delete() {
-        let rt = Runtime::new();
-        let col = rt.create_collection::<(String, i64)>();
-        let grouped = col.group_by(&rt, |x: &(String, i64)| x.0.clone());
-
-        col.insert(&rt, ("a".to_string(), 1));
-        col.insert(&rt, ("a".to_string(), 2));
-        let _ = rt.get(grouped.version_node);
-
-        col.delete(&rt, &("a".to_string(), 1));
-        let _ = rt.get(grouped.version_node);
-        let groups = grouped.groups.read().unwrap();
-        assert_eq!(groups.get("a").unwrap().elements().len(), 1);
-    }
-
-    #[test]
-    fn join_basic() {
-        let rt = Runtime::new();
-        let left = rt.create_collection::<(String, i64)>();
-        let right = rt.create_collection::<(String, String)>();
-
-        let joined = left.join(
-            &rt,
-            &right,
-            |l: &(String, i64)| l.0.clone(),
-            |r: &(String, String)| r.0.clone(),
-        );
-
-        left.insert(&rt, ("a".to_string(), 1));
-        left.insert(&rt, ("b".to_string(), 2));
-        right.insert(&rt, ("a".to_string(), "x".to_string()));
-        right.insert(&rt, ("c".to_string(), "y".to_string()));
-
-        let _ = rt.get(joined.version_node);
-        let elems = joined.elements();
-        assert_eq!(elems.len(), 1);
-        assert!(elems.contains(&(("a".to_string(), 1), ("a".to_string(), "x".to_string()))));
-    }
-
-    #[test]
-    fn join_multiple_matches() {
-        let rt = Runtime::new();
-        let left = rt.create_collection::<(i64, i64)>();
-        let right = rt.create_collection::<(i64, i64)>();
-
-        let joined = left.join(&rt, &right, |l: &(i64, i64)| l.0, |r: &(i64, i64)| r.0);
-
-        left.insert(&rt, (1, 10));
-        left.insert(&rt, (1, 20));
-        right.insert(&rt, (1, 100));
-
-        let _ = rt.get(joined.version_node);
-        let elems = joined.elements();
-        assert_eq!(elems.len(), 2);
-    }
-
-    #[test]
-    fn join_delete_propagates() {
-        let rt = Runtime::new();
-        let left = rt.create_collection::<(i64, i64)>();
-        let right = rt.create_collection::<(i64, i64)>();
-
-        let joined = left.join(&rt, &right, |l: &(i64, i64)| l.0, |r: &(i64, i64)| r.0);
-
-        left.insert(&rt, (1, 10));
-        right.insert(&rt, (1, 100));
-        let _ = rt.get(joined.version_node);
-        assert_eq!(joined.elements().len(), 1);
-
-        left.delete(&rt, &(1, 10));
-        let _ = rt.get(joined.version_node);
-        assert_eq!(joined.elements().len(), 0);
-    }
-}
diff --git a/crates/incr-concurrent/src/collection_proptest.rs b/crates/incr-concurrent/src/collection_proptest.rs
deleted file mode 100644
index a7db66c..0000000
--- a/crates/incr-concurrent/src/collection_proptest.rs
+++ /dev/null
@@ -1,156 +0,0 @@
-use proptest::prelude::*;
-use std::collections::HashSet;
-
-use super::runtime::Runtime;
-
-fn oracle_elements(ops: &[(bool, i64)]) -> HashSet<i64> {
-    let mut set = HashSet::new();
-    for &(is_insert, val) in ops {
-        if is_insert {
-            set.insert(val);
-        } else {
-            set.remove(&val);
-        }
-    }
-    set
-}
-
-proptest! {
-    #[test]
-    fn collection_matches_oracle(
-        ops in proptest::collection::vec((proptest::bool::ANY, -100i64..100), 0..50)
-    ) {
-        let rt = Runtime::new();
-        let col = rt.create_collection::<i64>();
-
-        for &(is_insert, val) in &ops {
-            if is_insert {
-                col.insert(&rt, val);
-            } else {
-                col.delete(&rt, &val);
-            }
-        }
-
-        let expected = oracle_elements(&ops);
-        let actual = col.elements();
-        prop_assert_eq!(actual, expected);
-    }
-
-    #[test]
-    fn filter_matches_oracle(
-        ops in proptest::collection::vec((proptest::bool::ANY, -100i64..100), 0..50)
-    ) {
-        let rt = Runtime::new();
-        let col = rt.create_collection::<i64>();
-        let evens = col.filter(&rt, |x| x % 2 == 0);
-
-        for &(is_insert, val) in &ops {
-            if is_insert {
-                col.insert(&rt, val);
-            } else {
-                col.delete(&rt, &val);
-            }
-        }
-
-        let _ = rt.get(evens.version_node());
-        let expected: HashSet<i64> = oracle_elements(&ops)
-            .into_iter()
-            .filter(|x| x % 2 == 0)
-            .collect();
-        let actual = evens.elements();
-        prop_assert_eq!(actual, expected);
-    }
-
-    #[test]
-    fn sort_matches_oracle(
-        ops in proptest::collection::vec((proptest::bool::ANY, -100i64..100), 0..50)
-    ) {
-        let rt = Runtime::new();
-        let col = rt.create_collection::<i64>();
-        let sorted = col.sort_by_key(&rt, |x: &i64| *x);
-
-        for &(is_insert, val) in &ops {
-            if is_insert {
-                col.insert(&rt, val);
-            } else {
-                col.delete(&rt, &val);
-            }
-        }
-
-        let _ = rt.get(sorted.version_node());
-        let mut expected: Vec<i64> = oracle_elements(&ops).into_iter().collect();
-        expected.sort();
-        let actual = sorted.entries();
-        prop_assert_eq!(actual, expected);
-    }
-
-    #[test]
-    fn pairwise_matches_oracle(
-        ops in proptest::collection::vec((proptest::bool::ANY, -100i64..100), 0..30)
-    ) {
-        let rt = Runtime::new();
-        let col = rt.create_collection::<i64>();
-        let sorted = col.sort_by_key(&rt, |x: &i64| *x);
-        let pairs = sorted.pairwise(&rt);
-
-        for &(is_insert, val) in &ops {
-            if is_insert {
-                col.insert(&rt, val);
-            } else {
-                col.delete(&rt, &val);
-            }
-        }
-
-        let _ = rt.get(pairs.version_node());
-        let mut sorted_vals: Vec<i64> = oracle_elements(&ops).into_iter().collect();
-        sorted_vals.sort();
-        let expected: HashSet<(i64, i64)> = sorted_vals
-            .windows(2)
-            .map(|w| (w[0], w[1]))
-            .collect();
-        let actual = pairs.elements();
-        prop_assert_eq!(actual, expected);
-    }
-
-    #[test]
-    fn count_matches_oracle(
-        ops in proptest::collection::vec((proptest::bool::ANY, -100i64..100), 0..50)
-    ) {
-        let rt = Runtime::new();
-        let col = rt.create_collection::<i64>();
-        let count = col.count(&rt);
-
-        for &(is_insert, val) in &ops {
-            if is_insert {
-                col.insert(&rt, val);
-            } else {
-                col.delete(&rt, &val);
-            }
-        }
-
-        let expected = oracle_elements(&ops).len() as u64;
-        let actual = rt.get(count);
-        prop_assert_eq!(actual, expected);
-    }
-
-    #[test]
-    fn reduce_sum_matches_oracle(
-        ops in proptest::collection::vec((proptest::bool::ANY, -100i64..100), 0..50)
-    ) {
-        let rt = Runtime::new();
-        let col = rt.create_collection::<i64>();
-        let sum = col.reduce(&rt, |elems| -> i64 { elems.iter().sum() });
-
-        for &(is_insert, val) in &ops {
-            if is_insert {
-                col.insert(&rt, val);
-            } else {
-                col.delete(&rt, &val);
-            }
-        }
-
-        let expected: i64 = oracle_elements(&ops).into_iter().sum();
-        let actual = rt.get(sum);
-        prop_assert_eq!(actual, expected);
-    }
-}
diff --git a/crates/incr-concurrent/src/handle.rs b/crates/incr-concurrent/src/handle.rs
deleted file mode 100644
index 3412a37..0000000
--- a/crates/incr-concurrent/src/handle.rs
+++ /dev/null
@@ -1,330 +0,0 @@
-//! Runtime and node handle types.
-//!
-//! This module defines two identity types that together ensure handles
-//! cannot be used in unsafe ways:
-//!
-//! - [`RuntimeId`] uniquely identifies a `Runtime` instance for the
-//!   lifetime of a process. It is drawn from the same monotonic counter
-//!   that the arena registry uses, so a runtime's id is the id of its
-//!   registry.
-//! - [`Incr<T>`] is a typed handle to a node inside a runtime. It
-//!   carries enough information to detect three classes of misuse
-//!   without undefined behavior:
-//!     1. Using a handle with the wrong runtime. Caught via the
-//!        `runtime_id` field.
-//!     2. Using a handle after the underlying slot has been recycled
-//!        (in a future version of incr that supports node deletion).
-//!        Caught via the `generation` field.
-//!     3. Using a handle with the wrong value type. Caught statically
-//!        via the `PhantomData<fn() -> T>` parameter: once the runtime
-//!        returns an `Incr<u64>` from `create_input::<u64>`, the type
-//!        is locked in at compile time.
-//!
-//! ## Handle layout
-//!
-//! ```text
-//! offset  size   field
-//! ------  ----   -----
-//!    0     4     slot         u32
-//!    4     4     generation   u32
-//!    8     8     runtime_id   RuntimeId (u64)
-//!   16     0     _phantom     PhantomData<fn() -> T>
-//! ```
-//!
-//! Total: 16 bytes on 64-bit platforms. The handle is `Copy` and cheap
-//! to pass around by value. The decision to widen from the v1 4-byte
-//! NodeId to v2's 16-byte Incr is covered in spec section 13, questions
-//! Q3 and Q4; both recommendations ("add runtime identity", "add
-//! generation counters") are applied here.
-//!
-//! ## Why `PhantomData<fn() -> T>`
-//!
-//! `PhantomData<T>` would tie `Incr<T>`'s auto traits to `T`: an
-//! `Incr<RefCell<...>>` would not be `Sync` because `RefCell` is not
-//! `Sync`. That is the wrong contract for a handle, because a handle
-//! does not own a `T` and does not expose `&T` to shared callers; it
-//! is just an opaque token. `PhantomData<fn() -> T>` covariantly
-//! references `T` without inheriting its auto traits, so `Incr<T>` is
-//! `Send + Sync + Copy + Unpin` for every `T: 'static`.
-
-use std::marker::PhantomData;
-
-/// Unique identifier for a `Runtime` (equivalently, its arena registry).
-/// Assigned monotonically at construction; never reused within a process
-/// lifetime because the underlying counter is `u64` and does not wrap
-/// within any realistic program run.
-///
-/// The value zero is reserved as a sentinel for "not a real runtime" and
-/// is used by the TLS arena pointer cache to mark empty slots.
-#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
-#[repr(transparent)]
-pub struct RuntimeId(u64);
-
-impl RuntimeId {
-    /// The sentinel runtime id. Never assigned to a real runtime.
-    #[allow(dead_code)]
-    pub(crate) const SENTINEL: RuntimeId = RuntimeId(0);
-
-    /// Wrap a raw counter value. Called by the arena registry when a new
-    /// runtime is constructed.
-    pub(crate) const fn from_raw(raw: u64) -> Self {
-        Self(raw)
-    }
-
-    /// Get the raw counter value. Used by the TLS arena pointer cache
-    /// which keeps its storage as a bare `u64` to avoid churn on the
-    /// hot path.
-    #[inline]
-    pub(crate) const fn get(self) -> u64 {
-        self.0
-    }
-}
-
-/// A typed handle to a node in a `Runtime`.
-///
-/// Handles are `Copy` and freely shareable across threads. Their validity
-/// is checked at access time by the runtime, which verifies the handle's
-/// `runtime_id` matches its own and the `generation` matches the node's
-/// current generation counter. Both checks panic with a clear message on
-/// failure via [`HandleError`] propagation in the runtime.
-///
-/// The `T` parameter is carried via `PhantomData<fn() -> T>` so that
-/// auto-trait propagation is not affected by `T`. A handle is always
-/// `Send + Sync + Copy` regardless of `T`.
-#[repr(C)]
-pub struct Incr<T: 'static> {
-    slot: u32,
-    generation: u32,
-    runtime_id: RuntimeId,
-    _phantom: PhantomData<fn() -> T>,
-}
-
-// Manual implementations of the standard derives so they do not require
-// `T: Copy + Clone + Debug + PartialEq + Eq + Hash`. A handle is these
-// things regardless of what `T` is.
-
-impl<T: 'static> Copy for Incr<T> {}
-impl<T: 'static> Clone for Incr<T> {
-    fn clone(&self) -> Self {
-        *self
-    }
-}
-
-impl<T: 'static> std::fmt::Debug for Incr<T> {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        f.debug_struct("Incr")
-            .field("slot", &self.slot)
-            .field("generation", &self.generation)
-            .field("runtime_id", &self.runtime_id)
-            .field("type", &std::any::type_name::<T>())
-            .finish()
-    }
-}
-
-impl<T: 'static> PartialEq for Incr<T> {
-    fn eq(&self, other: &Self) -> bool {
-        self.slot == other.slot
-            && self.generation == other.generation
-            && self.runtime_id == other.runtime_id
-    }
-}
-
-impl<T: 'static> Eq for Incr<T> {}
-
-impl<T: 'static> std::hash::Hash for Incr<T> {
-    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
-        self.slot.hash(state);
-        self.generation.hash(state);
-        self.runtime_id.hash(state);
-    }
-}
-
-impl<T: 'static> Incr<T> {
-    /// Construct a handle. Crate-private so user code cannot forge
-    /// handles; only the runtime's node creation paths return an
-    /// `Incr<T>`, which binds `T` at the creation call site.
-    pub(crate) fn new(slot: u32, generation: u32, runtime_id: RuntimeId) -> Self {
-        Self {
-            slot,
-            generation,
-            runtime_id,
-            _phantom: PhantomData,
-        }
-    }
-
-    /// The slot index this handle refers to.
-    #[inline]
-    pub fn slot(self) -> u32 {
-        self.slot
-    }
-
-    /// The expected generation counter for the slot.
-    #[inline]
-    pub(crate) fn generation(self) -> u32 {
-        self.generation
-    }
-
-    /// The owning runtime's id.
-    #[inline]
-    pub(crate) fn runtime_id(self) -> RuntimeId {
-        self.runtime_id
-    }
-}
-
-/// Error returned by handle verification when a check fails.
-///
-/// The runtime's public `get` / `set` methods convert these into
-/// panics with a clear message. Tests and internal diagnostics use the
-/// `Result`-returning verifier so failures can be observed without
-/// tearing down the process.
-#[derive(Copy, Clone, Debug, PartialEq, Eq)]
-pub(crate) enum HandleError {
-    /// The handle was created by a different runtime than the one it
-    /// is being used with. Carries both ids for diagnostics.
-    WrongRuntime {
-        handle_runtime: RuntimeId,
-        current_runtime: RuntimeId,
-    },
-    /// The slot the handle points at has been recycled since the handle
-    /// was created. Carries both generations for diagnostics.
-    StaleGeneration {
-        handle_generation: u32,
-        current_generation: u32,
-    },
-}
-
-impl std::fmt::Display for HandleError {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        match self {
-            HandleError::WrongRuntime {
-                handle_runtime,
-                current_runtime,
-            } => write!(
-                f,
-                "Incr handle from runtime {:?} used with runtime {:?}",
-                handle_runtime, current_runtime
-            ),
-            HandleError::StaleGeneration {
-                handle_generation,
-                current_generation,
-            } => write!(
-                f,
-                "Incr handle with generation {} used after slot recycled to generation {}",
-                handle_generation, current_generation
-            ),
-        }
-    }
-}
-
-impl std::error::Error for HandleError {}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn incr_is_16_bytes_and_8_aligned() {
-        assert_eq!(std::mem::size_of::<Incr<u64>>(), 16);
-        assert_eq!(std::mem::align_of::<Incr<u64>>(), 8);
-        // Same size regardless of T.
-        assert_eq!(std::mem::size_of::<Incr<String>>(), 16);
-        assert_eq!(std::mem::size_of::<Incr<Vec<u8>>>(), 16);
-    }
-
-    #[test]
-    fn incr_is_copy_and_send_and_sync_regardless_of_t() {
-        fn assert_copy<T: Copy>() {}
-        fn assert_send_sync<T: Send + Sync>() {}
-        // u64: Copy + Send + Sync (obvious)
-        assert_copy::<Incr<u64>>();
-        assert_send_sync::<Incr<u64>>();
-        // String: !Copy, but Incr<String> is still Copy because Incr
-        // does not store T.
-        assert_copy::<Incr<String>>();
-        assert_send_sync::<Incr<String>>();
-        // A !Sync type: RefCell<T>. Incr<RefCell<_>> must still be Sync.
-        assert_send_sync::<Incr<std::cell::RefCell<u64>>>();
-        // A !Send type: Rc<T>. Incr<Rc<_>> must still be Send.
-        assert_send_sync::<Incr<std::rc::Rc<u64>>>();
-    }
-
-    #[test]
-    fn different_types_with_same_fields_are_distinct_at_the_type_level() {
-        // Not a runtime check; this is a compile-gate that ensures the
-        // phantom-T parameter actually participates in type identity.
-        // If someone deletes the PhantomData this test will compile
-        // just fine and the bug slips past, so we also check runtime
-        // behavior: Incr<u64> and Incr<i64> with the same fields are
-        // distinct types and cannot be passed to each other's slots.
-        let _u: Incr<u64> = Incr::new(0, 0, RuntimeId::from_raw(1));
-        let _i: Incr<i64> = Incr::new(0, 0, RuntimeId::from_raw(1));
-        // If we uncommented the next line the compiler would reject it:
-        //   let _: Incr<u64> = _i;
-    }
-
-    #[test]
-    fn incr_equality_compares_all_three_fields() {
-        let rid = RuntimeId::from_raw(1);
-        let a: Incr<u64> = Incr::new(7, 3, rid);
-        let b: Incr<u64> = Incr::new(7, 3, rid);
-        assert_eq!(a, b);
-
-        let different_slot: Incr<u64> = Incr::new(8, 3, rid);
-        assert_ne!(a, different_slot);
-
-        let different_gen: Incr<u64> = Incr::new(7, 4, rid);
-        assert_ne!(a, different_gen);
-
-        let different_rt: Incr<u64> = Incr::new(7, 3, RuntimeId::from_raw(2));
-        assert_ne!(a, different_rt);
-    }
-
-    #[test]
-    fn incr_hash_is_stable() {
-        use std::collections::HashSet;
-        let rid = RuntimeId::from_raw(42);
-        let a: Incr<u64> = Incr::new(1, 0, rid);
-        let b: Incr<u64> = Incr::new(1, 0, rid);
-        let c: Incr<u64> = Incr::new(2, 0, rid);
-
-        let mut set: HashSet<Incr<u64>> = HashSet::new();
-        set.insert(a);
-        assert!(set.contains(&b)); // same fields → same hash → hit
-        assert!(!set.contains(&c));
-    }
-
-    #[test]
-    fn runtime_id_sentinel_is_zero_and_never_equals_real_ids() {
-        assert_eq!(RuntimeId::SENTINEL.get(), 0);
-        let real = RuntimeId::from_raw(1);
-        assert_ne!(RuntimeId::SENTINEL, real);
-    }
-
-    #[test]
-    fn handle_error_display_mentions_ids_and_generations() {
-        let err = HandleError::WrongRuntime {
-            handle_runtime: RuntimeId::from_raw(1),
-            current_runtime: RuntimeId::from_raw(2),
-        };
-        let msg = format!("{}", err);
-        assert!(msg.contains("RuntimeId(1)"));
-        assert!(msg.contains("RuntimeId(2)"));
-
-        let err = HandleError::StaleGeneration {
-            handle_generation: 3,
-            current_generation: 7,
-        };
-        let msg = format!("{}", err);
-        assert!(msg.contains("3"));
-        assert!(msg.contains("7"));
-    }
-
-    #[test]
-    fn incr_debug_shows_type_name() {
-        let h: Incr<u64> = Incr::new(1, 2, RuntimeId::from_raw(3));
-        let s = format!("{:?}", h);
-        assert!(s.contains("slot: 1"));
-        assert!(s.contains("generation: 2"));
-        assert!(s.contains("u64")); // type_name<u64>() is "u64"
-    }
-}
diff --git a/crates/incr-concurrent/src/lib.rs b/crates/incr-concurrent/src/lib.rs
index 879f06a..48d75c7 100644
--- a/crates/incr-concurrent/src/lib.rs
+++ b/crates/incr-concurrent/src/lib.rs
@@ -1,24 +1,47 @@
-pub mod arena;
-pub mod collection;
-pub mod handle;
-pub mod runtime;
-pub mod sorted_collection;
-pub mod value;
+//! `incr-concurrent`: thread-safe incremental computation engine.
+//!
+//! Since 0.2, this crate is a thin re-export of [`incr_core`] with the
+//! [`Shared`] strategy. The `Runtime` type is `Send + Sync`: wrap it in
+//! `Arc`, share it across threads, have one writer thread call `set`
+//! while many reader threads call `get` on derived nodes. Same API
+//! surface as the single-threaded sibling [`incr-compute`]: switching
+//! is a one-line dependency swap.
+//!
+//! ## API status
+//!
+//! - Function DAG: `Runtime`, `Incr<T>`, `create_input`, `create_query`,
+//!   `get`, `set`, `node_count`, `graph_snapshot`, `get_traced`. All
+//!   functional. `get_traced` returns timing data but not per-node
+//!   trace events; full tracing lands alongside the dashboard demo.
+//! - Operators: `filter`, `map`, `count`, `reduce`, `sort_by_key`,
+//!   `pairwise`, `window`, `group_by`, `join`. All functional under
+//!   `Shared`.
+//! - Soundness: `set()` on a query node panics with a clear message.
+//!
+//! Migration from 0.1: the `Value` trait surface is now shared with
+//! `incr-compute`. Most user types (primitives, String, Vec, Option,
+//! tuples) implement it automatically.
 
-pub(crate) mod node;
-pub(crate) mod nodes_store;
-pub(crate) mod registry;
-pub(crate) mod state;
+#![doc(html_no_source)]
 
-#[cfg(test)]
-mod collection_proptest;
-#[cfg(test)]
-mod runtime_concurrent_test;
-#[cfg(test)]
-mod runtime_proptest;
+use incr_core::Shared;
 
-pub use collection::{Delta, GroupedCollection, IncrCollection};
-pub use handle::{Incr, RuntimeId};
-pub use runtime::{NodeInfo, NodeKindInfo, NodeTrace, PropagationTrace, Runtime, TraceAction};
-pub use sorted_collection::{SortDelta, SortedCollection};
-pub use value::Value;
+pub use incr_core::{
+    Delta, GroupedCollection as GroupedCollectionInner, Incr,
+    IncrCollection as IncrCollectionInner, NodeId, NodeInfo, NodeKindInfo, NodeState, NodeTrace,
+    PropagationTrace, RuntimeId, SortDelta, SortedCollection as SortedCollectionInner, TraceAction,
+    Value,
+};
+
+/// Multi-threaded runtime: `Runtime<Shared>`. `Send + Sync`; wrap in
+/// `Arc` to share across threads.
+pub type Runtime = incr_core::Runtime<Shared>;
+
+/// Thread-safe incremental collection: `IncrCollection<T, Shared>`.
+pub type IncrCollection<T> = IncrCollectionInner<T, Shared>;
+
+/// Thread-safe grouped collection: `GroupedCollection<K, T, Shared>`.
+pub type GroupedCollection<K, T> = GroupedCollectionInner<K, T, Shared>;
+
+/// Thread-safe sorted collection: `SortedCollection<T, K, Shared>`.
+pub type SortedCollection<T, K> = SortedCollectionInner<T, K, Shared>;
diff --git a/crates/incr-concurrent/src/node.rs b/crates/incr-concurrent/src/node.rs
deleted file mode 100644
index d0aa093..0000000
--- a/crates/incr-concurrent/src/node.rs
+++ /dev/null
@@ -1,844 +0,0 @@
-//! Node data layout.
-//!
-//! Per section 5.1 of the concurrent core rewrite spec, a node's read-hot
-//! fields live in a single 64-byte cache-line-aligned `NodeData` struct.
-//! Write-hot fields (dependents, per-node labels) live in parallel vectors
-//! on the `Runtime` so that reader traversal touches exactly one cache line
-//! per visited node.
-//!
-//! ## Layout
-//!
-//! Fields are ordered in alignment-descending order so that the struct is
-//! exactly 64 bytes with no internal padding:
-//!
-//! ```text
-//! offset  size   field
-//! ------  ----   -----
-//!    0     8     verified_at   AtomicU64
-//!    8     8     changed_at    AtomicU64
-//!   16     8     overflow_deps AtomicPtr<DepList>
-//!   24    28     inline_deps   [AtomicU32; 7]
-//!   52     4     arena_slot    u32           (write-once)
-//!   56     2     type_tag      u16           (write-once)
-//!   58     1     state         AtomicNodeState
-//!   59     1     dep_count     AtomicU8
-//!   60     4     generation    AtomicU32     (bumped on slot recycle)
-//! ```
-//!
-//! `#[repr(C, align(64))]` forces both the layout (C-style, no field
-//! reordering) and the 64-byte alignment. The `const _: () = assert!(...)`
-//! at the bottom of this module is load-bearing and will trip the build if
-//! a future edit perturbs the size or alignment.
-//!
-//! ## Resolved spec ambiguity
-//!
-//! Section 5.1 first sketches a `NodeData` with `dependents` fields
-//! inline in the struct, then three paragraphs later says "the final
-//! layout separates read-hot fields from write-hot fields ... this is
-//! better and is what the implementation will do." We implement the
-//! parallel-vector version: this `NodeData` carries dependencies (the
-//! reader traversal path) but not dependents (the writer's dirty walk
-//! path). The `Runtime` holds a `Vec<DependentsSlice>` indexed by node id.
-//! The label, similarly, lives on the `Runtime` side rather than inline,
-//! since the spec's sketch did not account for its 8 bytes in the 64-byte
-//! budget.
-//!
-//! ## Dependency list
-//!
-//! Dependencies use an inline-7 + overflow-pointer layout. Up to seven
-//! deps live directly in `inline_deps`; beyond that, `overflow_deps`
-//! points at a heap-allocated `DepList` containing *all* the deps.
-//! The `inline_deps` array is ignored when `dep_count > 7`.
-//!
-//! Inline-7 is chosen because most function queries have 1-3 deps and
-//! almost all have under 8. The occasional wide fan-in node pays one
-//! pointer dereference via the overflow path, which is acceptable.
-//!
-//! ## Dependency mutation (deferred)
-//!
-//! This commit establishes the NodeData struct, construction, and
-//! read-only dep access. Mutation of deps (during a recompute that
-//! discovers a different set of dependencies than the previous run)
-//! requires coordination with the state machine and an epoch-based
-//! reclamation story for the old overflow list. Both land in commit F
-//! alongside the Runtime's compute path. In the meantime, a node's
-//! deps are write-once at construction.
-//!
-//! ## Memory ordering in this module
-//!
-//! Constructors use `Relaxed` stores for every field. Visibility to
-//! other threads is established later by the caller (the Runtime)
-//! when it Release-stores the final state on the node, or when it
-//! publishes the node's segment pointer to readers. This module does
-//! not attempt to be self-synchronizing; it only provides the right
-//! atomic primitives and correct load orderings on the read side.
-
-use std::sync::atomic::{AtomicPtr, AtomicU32, AtomicU64, AtomicU8, Ordering};
-
-use super::handle::{HandleError, Incr, RuntimeId};
-use super::state::{AtomicNodeState, NodeState};
-
-/// Stable identifier for a node within a `Runtime`. The `u32` is an index
-/// into the runtime's segmented nodes store.
-///
-/// `NodeId` is a newtype rather than a bare `u32` so that mixing up node
-/// ids with arena slot indices (also `u32`) produces a type error.
-#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
-pub(crate) struct NodeId(pub u32);
-
-impl NodeId {
-    /// Sentinel value used for uninitialized inline dep slots. Real nodes
-    /// start at index 0; using `u32::MAX` as the sentinel keeps the common
-    /// case (small index) fitting in a smaller integer for debug output.
-    pub(crate) const SENTINEL: NodeId = NodeId(u32::MAX);
-}
-
-/// Heap-allocated overflow dependency list. Used when a node has more than
-/// seven dependencies. Immutable once published.
-///
-/// The `Box<[NodeId]>` layout gives us a length prefix for free (fat
-/// pointer) without a separate length field. Readers iterate via
-/// `.deps.iter()`.
-pub(crate) struct DepList {
-    pub(crate) deps: Box<[NodeId]>,
-}
-
-/// Read-hot per-node state. Exactly one 64-byte cache line.
-///
-/// See the module docs for the field layout rationale and the
-/// coordination contract with the state machine and the runtime.
-#[repr(C, align(64))]
-pub(crate) struct NodeData {
-    /// Revision at which this node was last verified against its
-    /// dependencies. In the red-green algorithm, a node is known clean
-    /// at `verified_at` even if `changed_at < verified_at`.
-    verified_at: AtomicU64,
-
-    /// Revision at which this node's value last changed. Set once per
-    /// successful compute to the current runtime revision.
-    changed_at: AtomicU64,
-
-    /// Pointer to overflow dep list, null when `dep_count <= 7`.
-    /// Allocated via `Box::into_raw` in the constructor; reclaimed by
-    /// this struct's `Drop`. Mutation (via compute) is out of scope for
-    /// commit D; the pointer is effectively write-once here.
-    overflow_deps: AtomicPtr<DepList>,
-
-    /// Inline dependency storage, valid for slots `0..dep_count.min(7)`.
-    /// Slots at or beyond `dep_count` are stale; treat them as
-    /// uninitialized padding. When `dep_count > 7`, the entire inline
-    /// array is ignored and `overflow_deps` holds the authoritative list.
-    inline_deps: [AtomicU32; 7],
-
-    /// Index into the typed arena for this node's value. Immutable after
-    /// construction. Not atomic; synchronized with readers via the state
-    /// machine's initial Release store.
-    arena_slot: u32,
-
-    /// Tag identifying which typed arena holds this node's value. The
-    /// runtime maps `type_tag` to a concrete `AtomicPrimitiveArena<T>` or
-    /// `GenericArena<T>` via the arena registry. Immutable after
-    /// construction.
-    type_tag: u16,
-
-    /// The node's lifecycle state. See `v2::state` for the transition
-    /// table and memory ordering contract.
-    state: AtomicNodeState,
-
-    /// Current number of dependencies. Used to decide whether to read
-    /// from `inline_deps` or `overflow_deps`.
-    dep_count: AtomicU8,
-
-    /// Generation counter for detecting use-after-recycle of this slot.
-    /// Matched against `Incr<T>::generation` on every handle access.
-    /// Bumped by `Runtime::delete_node` (a future capability; commit E
-    /// reserves the field but does not yet recycle slots). Lives in the
-    /// four bytes that would otherwise be trailing padding, so adding
-    /// this field does not change the struct size. The const assertion
-    /// at the bottom of this file enforces size == 64.
-    generation: AtomicU32,
-}
-
-// These assertions are load-bearing. A mismatch means a later edit
-// perturbed the layout in a way that breaks the one-cache-line-per-node
-// invariant. If an edit intentionally grows the struct, the fix is not
-// to relax the assertion; it is to revisit the spec's 64-byte budget.
-const _: () = assert!(
-    std::mem::size_of::<NodeData>() == 64,
-    "NodeData must be exactly one 64-byte cache line"
-);
-const _: () = assert!(
-    std::mem::align_of::<NodeData>() == 64,
-    "NodeData must be 64-byte aligned"
-);
-
-impl NodeData {
-    /// Construct a new input node. Input nodes start in `Clean` state
-    /// because their value is provided directly at creation time by the
-    /// runtime's `create_input`. They have no dependencies.
-    ///
-    /// `revision` is the runtime's current revision at the time of input
-    /// creation, used for both `verified_at` and `changed_at`.
-    pub(crate) fn new_input(type_tag: u16, arena_slot: u32, revision: u64) -> Self {
-        Self {
-            verified_at: AtomicU64::new(revision),
-            changed_at: AtomicU64::new(revision),
-            overflow_deps: AtomicPtr::new(std::ptr::null_mut()),
-            inline_deps: Self::empty_inline_deps(),
-            arena_slot,
-            type_tag,
-            state: AtomicNodeState::new(NodeState::Clean),
-            dep_count: AtomicU8::new(0),
-            generation: AtomicU32::new(0),
-        }
-    }
-
-    /// Construct a new query node. Query nodes start in `New` state
-    /// because their value has not yet been computed; the first reader
-    /// will CAS to `Computing` and run the compute closure.
-    ///
-    /// Query nodes have no known dependencies at construction time:
-    /// dependencies are discovered during compute by the dep tracker.
-    pub(crate) fn new_query(type_tag: u16, arena_slot: u32) -> Self {
-        Self {
-            verified_at: AtomicU64::new(0),
-            changed_at: AtomicU64::new(0),
-            overflow_deps: AtomicPtr::new(std::ptr::null_mut()),
-            inline_deps: Self::empty_inline_deps(),
-            arena_slot,
-            type_tag,
-            state: AtomicNodeState::new(NodeState::New),
-            dep_count: AtomicU8::new(0),
-            generation: AtomicU32::new(0),
-        }
-    }
-
-    /// Internal: construct an inline-deps array with every slot set to
-    /// the sentinel (u32::MAX). Used by the constructors; real deps
-    /// overwrite these sentinels.
-    fn empty_inline_deps() -> [AtomicU32; 7] {
-        [
-            AtomicU32::new(NodeId::SENTINEL.0),
-            AtomicU32::new(NodeId::SENTINEL.0),
-            AtomicU32::new(NodeId::SENTINEL.0),
-            AtomicU32::new(NodeId::SENTINEL.0),
-            AtomicU32::new(NodeId::SENTINEL.0),
-            AtomicU32::new(NodeId::SENTINEL.0),
-            AtomicU32::new(NodeId::SENTINEL.0),
-        ]
-    }
-
-    /// Replace this node's dependency list with a new set.
-    ///
-    /// Called by the runtime on recompute when the compute closure
-    /// recorded a different set of dependencies than the previous run
-    /// (dynamic dependencies). Handles all four inline/overflow
-    /// transitions:
-    ///
-    /// - inline → inline: overwrite the inline slots in place, clear
-    ///   the overflow pointer if it was somehow non-null (shouldn't
-    ///   be, but harmless).
-    /// - inline → overflow: allocate a new DepList, install it in the
-    ///   overflow pointer.
-    /// - overflow → inline: copy into inline slots, free the old
-    ///   overflow box.
-    /// - overflow → overflow: allocate a new DepList, swap it into
-    ///   the overflow pointer, free the old one.
-    ///
-    /// # Safety of overflow reclamation
-    ///
-    /// The caller must guarantee that no concurrent reader holds a
-    /// pointer to the old overflow list at the time of this call.
-    /// The runtime enforces this by taking `nodes.write()` for the
-    /// duration of the call, which is mutually exclusive with any
-    /// reader's `nodes.read()` guard. A later commit replaces this
-    /// with epoch reclamation so recompute can run without the write
-    /// lock; for now the write lock is the correctness mechanism.
-    ///
-    /// Stores use `Relaxed` ordering because the caller will issue a
-    /// `Release` store on the node's state after this call (the
-    /// Computing → Clean transition at the end of `run_compute`),
-    /// which publishes these writes to subsequent readers via the
-    /// standard Release-Acquire chain on state.
-    #[allow(dead_code)]
-    pub(crate) fn replace_deps(&self, new_deps: &[NodeId]) {
-        let count = new_deps.len();
-        assert!(
-            count <= u8::MAX as usize,
-            "deps overflow u8 count: {}",
-            count
-        );
-
-        // Snapshot the old overflow pointer so we can reclaim it
-        // after installing the new dep list. Load is Relaxed because
-        // the caller already owns exclusive access via nodes.write()
-        // and no other thread can be racing on this slot.
-        let old_overflow = self.overflow_deps.load(Ordering::Relaxed);
-
-        if count <= 7 {
-            // New deps fit inline. Overwrite inline slots and clear
-            // the overflow pointer. Slots beyond `count` are stale
-            // but ignored by for_each_dep (which loops to count).
-            for (i, dep) in new_deps.iter().enumerate() {
-                self.inline_deps[i].store(dep.0, Ordering::Relaxed);
-            }
-            self.overflow_deps
-                .store(std::ptr::null_mut(), Ordering::Relaxed);
-        } else {
-            // New deps spill to overflow. Allocate a fresh DepList
-            // and install it. The inline slots are ignored when
-            // dep_count > 7 so we do not touch them.
-            let list = Box::new(DepList {
-                deps: new_deps.to_vec().into_boxed_slice(),
-            });
-            let new_ptr = Box::into_raw(list);
-            self.overflow_deps.store(new_ptr, Ordering::Relaxed);
-        }
-
-        self.dep_count.store(count as u8, Ordering::Relaxed);
-
-        // Reclaim the old overflow box if present. The caller's
-        // nodes.write() guarantees no concurrent reader can be
-        // dereferencing this pointer right now.
-        if !old_overflow.is_null() {
-            // SAFETY: the pointer came from `Box::into_raw` in a
-            // previous call to `publish_initial_deps` or
-            // `replace_deps`; the caller holds nodes.write() so no
-            // concurrent reader can still be using it.
-            unsafe {
-                drop(Box::from_raw(old_overflow));
-            }
-        }
-    }
-
-    /// Variant of `replace_deps` that leaks the old overflow list
-    /// instead of reclaiming it.
-    ///
-    /// Required because SegmentedNodes has no reader/writer
-    /// exclusion on node state, so freeing the old overflow pointer
-    /// while a walker is mid-traversal would UAF. The leak is
-    /// bounded: `NodeData::Drop` reclaims the currently-installed
-    /// overflow list, and this method is only called on the rare
-    /// dynamic-dep path where the dep set changes AND the node has
-    /// more than 7 deps. Static-dep workloads never call it.
-    ///
-    /// Spec section 5.3 calls for epoch-based reclamation here. The
-    /// planned fix (commit X of Gate 4) used `crossbeam-epoch 0.9`,
-    /// which turns out not to be miri-clean due to integer-to-pointer
-    /// casts in its internal thread-local list init. Rather than
-    /// regress the miri-clean invariant for a bounded leak on a
-    /// rarely-hit path, X was dropped from Gate 4 and the leak is
-    /// kept as the permanent post-Gate-4 state. Proper reclamation
-    /// is queued as a dedicated later chunk that will evaluate
-    /// `seize`, `haphazard`, or a custom strict-provenance
-    /// implementation.
-    pub(crate) fn replace_deps_leaking_old_overflow(&self, new_deps: &[NodeId]) {
-        let count = new_deps.len();
-        assert!(
-            count <= u8::MAX as usize,
-            "deps overflow u8 count: {}",
-            count
-        );
-
-        if count <= 7 {
-            for (i, dep) in new_deps.iter().enumerate() {
-                self.inline_deps[i].store(dep.0, Ordering::Relaxed);
-            }
-            // We do NOT clear overflow_deps here. If we did and the
-            // previous deps were in overflow, we would lose the
-            // pointer without freeing it AND Drop would fail to
-            // reclaim the final overflow allocation. Leaving the
-            // old overflow pointer in place means Drop still has
-            // something to free; it just frees a list that is not
-            // the current dep list. Accepted tradeoff for the
-            // commit U stopgap.
-            //
-            // A reader that loads overflow_deps and sees the old
-            // pointer will dereference the old list, but since we
-            // also updated dep_count to `count` <= 7, the reader's
-            // for_each_dep takes the inline branch (`if count <= 7`)
-            // and never touches overflow_deps. So the stale pointer
-            // is never read as deps; it is only used at Drop time.
-        } else {
-            let list = Box::new(DepList {
-                deps: new_deps.to_vec().into_boxed_slice(),
-            });
-            let new_ptr = Box::into_raw(list);
-            // Swap the pointer, leaking the old one (intentional
-            // per the method contract). Drop will reclaim `new_ptr`
-            // when the node itself drops, but any prior overflow
-            // lists this slot held are leaked for the node's
-            // lifetime.
-            self.overflow_deps.store(new_ptr, Ordering::Relaxed);
-        }
-
-        self.dep_count.store(count as u8, Ordering::Relaxed);
-    }
-
-    /// Publish an initial dependency list on a `New` query node.
-    ///
-    /// This is the commit-D placeholder for the full compute-path dep
-    /// publish that lands in commit F. It may be called exactly once,
-    /// by the runtime, before the node has been read by any other
-    /// thread. It does not coordinate with the state machine or with
-    /// epoch reclamation; it assumes the caller owns the node's
-    /// `Computing` state (or has equivalent exclusive access).
-    ///
-    /// Takes `&self` rather than `&mut self` because all underlying
-    /// stores are on atomic fields that do not require exclusive
-    /// reference. The caller accesses the node through a shared
-    /// `nodes.read()` guard. The exclusivity guarantee needed for
-    /// correctness comes from the state machine (Computing state is
-    /// owned by exactly one thread at a time), not from Rust's
-    /// aliasing rules.
-    ///
-    /// # Panics
-    /// Panics in debug builds if called on a node that already has
-    /// dependencies, or on a node whose state has already transitioned
-    /// out of `New`. Safe but meaningless in release in those cases.
-    pub(crate) fn publish_initial_deps(&self, deps: &[NodeId]) {
-        debug_assert_eq!(
-            self.dep_count.load(Ordering::Relaxed),
-            0,
-            "publish_initial_deps on a node with existing deps"
-        );
-        let count = deps.len();
-        assert!(
-            count <= u8::MAX as usize,
-            "deps overflow u8 count: {}",
-            count
-        );
-        if count <= 7 {
-            for (i, dep) in deps.iter().enumerate() {
-                self.inline_deps[i].store(dep.0, Ordering::Relaxed);
-            }
-        } else {
-            let list = Box::new(DepList {
-                deps: deps.to_vec().into_boxed_slice(),
-            });
-            let ptr = Box::into_raw(list);
-            // Store uses Relaxed because the caller owns exclusive
-            // access via the state machine (Computing state) and the
-            // eventual publish step (state transition to Clean) will
-            // Release-synchronize this write with any future reader.
-            self.overflow_deps.store(ptr, Ordering::Relaxed);
-        }
-        self.dep_count.store(count as u8, Ordering::Relaxed);
-    }
-
-    /// Current state. Acquire-loaded: synchronizes with the Release
-    /// store that published this state.
-    #[inline]
-    pub(crate) fn state(&self) -> NodeState {
-        self.state.load_acquire()
-    }
-
-    /// The node's state cell. Exposes the AtomicNodeState helpers
-    /// (CAS, try_claim_compute, etc.) to the runtime.
-    #[inline]
-    pub(crate) fn state_cell(&self) -> &AtomicNodeState {
-        &self.state
-    }
-
-    /// Type tag for the arena holding this node's value. Immutable.
-    #[allow(dead_code)]
-    #[inline]
-    pub(crate) fn type_tag(&self) -> u16 {
-        self.type_tag
-    }
-
-    /// Arena slot index for this node's value. Immutable.
-    #[inline]
-    pub(crate) fn arena_slot(&self) -> u32 {
-        self.arena_slot
-    }
-
-    /// Last-verified revision.
-    #[inline]
-    pub(crate) fn verified_at(&self) -> u64 {
-        self.verified_at.load(Ordering::Relaxed)
-    }
-
-    /// Last-changed revision.
-    #[inline]
-    pub(crate) fn changed_at(&self) -> u64 {
-        self.changed_at.load(Ordering::Relaxed)
-    }
-
-    /// Update this node's last-verified revision. Relaxed store;
-    /// visibility to other threads is established by the subsequent
-    /// Release on the node's state cell.
-    #[inline]
-    pub(crate) fn set_verified_at(&self, revision: u64) {
-        self.verified_at.store(revision, Ordering::Relaxed);
-    }
-
-    /// Update this node's last-changed revision. Same ordering
-    /// argument as `set_verified_at`.
-    #[inline]
-    pub(crate) fn set_changed_at(&self, revision: u64) {
-        self.changed_at.store(revision, Ordering::Relaxed);
-    }
-
-    /// Current dependency count.
-    #[inline]
-    pub(crate) fn dep_count(&self) -> u8 {
-        self.dep_count.load(Ordering::Relaxed)
-    }
-
-    /// Current generation counter. Handles carry an expected generation
-    /// and verify it against this value on every access.
-    ///
-    /// Uses `Acquire` ordering so that a bump from another thread via
-    /// `bump_generation` (which uses `Release`) establishes the required
-    /// happens-before edge: a reader verifying a handle after a slot
-    /// recycle observes the bumped counter and rejects the stale
-    /// handle. Without `Acquire` here, the Release on the bump side
-    /// pairs with nothing and a reader might see the pre-bump value
-    /// indefinitely (addressed review finding C3).
-    #[inline]
-    pub(crate) fn generation(&self) -> u32 {
-        self.generation.load(Ordering::Acquire)
-    }
-
-    /// Verify that a handle is valid for this node in a given runtime.
-    ///
-    /// Returns `Ok(())` if the handle's runtime id matches `runtime_id`
-    /// AND the handle's generation matches this node's current
-    /// generation. Returns a descriptive error otherwise. The runtime's
-    /// public `get` / `set` methods turn these errors into panics; tests
-    /// observe the `Result` directly.
-    ///
-    /// The caller is responsible for establishing happens-before with
-    /// the most recent writer of this node (typically via an Acquire
-    /// load on state before calling `verify_handle`).
-    pub(crate) fn verify_handle<T: 'static>(
-        &self,
-        handle: Incr<T>,
-        runtime_id: RuntimeId,
-    ) -> Result<(), HandleError> {
-        if handle.runtime_id() != runtime_id {
-            return Err(HandleError::WrongRuntime {
-                handle_runtime: handle.runtime_id(),
-                current_runtime: runtime_id,
-            });
-        }
-        let current = self.generation();
-        if handle.generation() != current {
-            return Err(HandleError::StaleGeneration {
-                handle_generation: handle.generation(),
-                current_generation: current,
-            });
-        }
-        Ok(())
-    }
-
-    /// Bump this node's generation counter, invalidating all outstanding
-    /// handles to the slot. Reserved for future use by `Runtime::delete_node`;
-    /// not exercised in commit E because node deletion is not yet a
-    /// capability of the runtime. Exposed now to lock in the Release
-    /// ordering contract: any subsequent reader that verifies a handle
-    /// with an Acquire-adjacent load sees the bumped generation and
-    /// rejects the handle.
-    #[cfg(test)]
-    pub(crate) fn bump_generation(&self) {
-        self.generation.fetch_add(1, Ordering::Release);
-    }
-
-    /// Iterate over this node's dependencies.
-    ///
-    /// Reads the count, then reads the appropriate source (inline or
-    /// overflow). The caller must have established happens-before with
-    /// any writer of the deps via an Acquire load on the node state
-    /// first; see the module docs.
-    pub(crate) fn for_each_dep(&self, mut f: impl FnMut(NodeId)) {
-        let count = self.dep_count.load(Ordering::Relaxed);
-        if count <= 7 {
-            for i in 0..(count as usize) {
-                let raw = self.inline_deps[i].load(Ordering::Relaxed);
-                f(NodeId(raw));
-            }
-        } else {
-            let overflow = self.overflow_deps.load(Ordering::Relaxed);
-            debug_assert!(
-                !overflow.is_null(),
-                "dep_count > 7 but overflow_deps is null"
-            );
-            // SAFETY: `overflow` is non-null when `count > 7` by the
-            // invariant maintained in `publish_initial_deps`, and points
-            // at a `DepList` allocated via `Box::into_raw` that lives
-            // until this node's `Drop`. No mutation path in commit D
-            // can swap or free this pointer between load and use.
-            let list = unsafe { &*overflow };
-            for &id in list.deps.iter() {
-                f(id);
-            }
-        }
-    }
-
-    /// Collect dependencies into a `Vec<NodeId>`. Convenience for tests
-    /// and diagnostics; production code uses `for_each_dep` to avoid
-    /// the allocation where possible. The runtime's dep-diff path on
-    /// recompute calls `collect_deps` to materialize the previous
-    /// dep list for comparison against the newly-recorded set.
-    pub(crate) fn collect_deps(&self) -> Vec<NodeId> {
-        let mut out = Vec::with_capacity(self.dep_count() as usize);
-        self.for_each_dep(|id| out.push(id));
-        out
-    }
-}
-
-impl Drop for NodeData {
-    fn drop(&mut self) {
-        // Reclaim the overflow dep list if one was allocated.
-        let overflow = *self.overflow_deps.get_mut();
-        if !overflow.is_null() {
-            // SAFETY: `overflow` came from `Box::into_raw` in
-            // `publish_initial_deps`; this Drop holds `&mut self`, so
-            // no other thread can observe or mutate the pointer.
-            unsafe {
-                drop(Box::from_raw(overflow));
-            }
-        }
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn nodedata_is_one_cache_line() {
-        // Redundant with the const assertion but lets a quick
-        // `cargo test` surface the invariant with a readable failure.
-        assert_eq!(std::mem::size_of::<NodeData>(), 64);
-        assert_eq!(std::mem::align_of::<NodeData>(), 64);
-    }
-
-    #[test]
-    fn new_input_starts_clean_with_no_deps() {
-        let node = NodeData::new_input(3, 42, 17);
-        assert_eq!(node.state(), NodeState::Clean);
-        assert_eq!(node.type_tag(), 3);
-        assert_eq!(node.arena_slot(), 42);
-        assert_eq!(node.verified_at(), 17);
-        assert_eq!(node.changed_at(), 17);
-        assert_eq!(node.dep_count(), 0);
-        assert!(node.collect_deps().is_empty());
-    }
-
-    #[test]
-    fn new_query_starts_new_with_no_deps() {
-        let node = NodeData::new_query(5, 99);
-        assert_eq!(node.state(), NodeState::New);
-        assert_eq!(node.type_tag(), 5);
-        assert_eq!(node.arena_slot(), 99);
-        assert_eq!(node.verified_at(), 0);
-        assert_eq!(node.changed_at(), 0);
-        assert_eq!(node.dep_count(), 0);
-        assert!(node.collect_deps().is_empty());
-    }
-
-    #[test]
-    fn publish_zero_inline_deps() {
-        let node = NodeData::new_query(0, 0);
-        node.publish_initial_deps(&[]);
-        assert_eq!(node.dep_count(), 0);
-        assert!(node.collect_deps().is_empty());
-    }
-
-    #[test]
-    fn publish_one_inline_dep() {
-        let node = NodeData::new_query(0, 0);
-        node.publish_initial_deps(&[NodeId(42)]);
-        assert_eq!(node.dep_count(), 1);
-        assert_eq!(node.collect_deps(), vec![NodeId(42)]);
-    }
-
-    #[test]
-    fn publish_seven_inline_deps_exactly() {
-        let node = NodeData::new_query(0, 0);
-        let deps: Vec<NodeId> = (0..7u32).map(NodeId).collect();
-        node.publish_initial_deps(&deps);
-        assert_eq!(node.dep_count(), 7);
-        assert_eq!(node.collect_deps(), deps);
-        // Overflow must still be null when we're inside the inline limit.
-        assert!(node.overflow_deps.load(Ordering::Relaxed).is_null());
-    }
-
-    #[test]
-    fn publish_eight_deps_spills_to_overflow() {
-        let node = NodeData::new_query(0, 0);
-        let deps: Vec<NodeId> = (100..108u32).map(NodeId).collect();
-        node.publish_initial_deps(&deps);
-        assert_eq!(node.dep_count(), 8);
-        assert_eq!(node.collect_deps(), deps);
-        // Overflow must be non-null when we exceed the inline limit.
-        assert!(!node.overflow_deps.load(Ordering::Relaxed).is_null());
-    }
-
-    #[test]
-    fn publish_many_deps_via_overflow() {
-        let node = NodeData::new_query(0, 0);
-        let deps: Vec<NodeId> = (1000..1100u32).map(NodeId).collect();
-        node.publish_initial_deps(&deps);
-        assert_eq!(node.dep_count(), 100);
-        assert_eq!(node.collect_deps(), deps);
-    }
-
-    #[test]
-    fn for_each_dep_visits_inline_deps_in_order() {
-        let node = NodeData::new_query(0, 0);
-        let expected = [NodeId(3), NodeId(1), NodeId(4), NodeId(1), NodeId(5)];
-        node.publish_initial_deps(&expected);
-        let mut visited = Vec::new();
-        node.for_each_dep(|id| visited.push(id));
-        assert_eq!(visited, expected);
-    }
-
-    #[test]
-    fn for_each_dep_visits_overflow_deps_in_order() {
-        let node = NodeData::new_query(0, 0);
-        let expected: Vec<NodeId> = (0..12u32).rev().map(NodeId).collect();
-        node.publish_initial_deps(&expected);
-        let mut visited = Vec::new();
-        node.for_each_dep(|id| visited.push(id));
-        assert_eq!(visited, expected);
-    }
-
-    #[test]
-    fn dropping_node_with_overflow_deps_is_leak_free() {
-        // Use Miri to really verify; here we just exercise the Drop path.
-        let node = NodeData::new_query(0, 0);
-        let deps: Vec<NodeId> = (0..50u32).map(NodeId).collect();
-        node.publish_initial_deps(&deps);
-        drop(node);
-    }
-
-    #[test]
-    fn dropping_node_without_overflow_is_trivial() {
-        let node = NodeData::new_input(0, 0, 0);
-        drop(node);
-    }
-
-    #[test]
-    fn field_offsets_match_design() {
-        // Cross-check the commented layout against the actual offsets
-        // the compiler chose. If this test ever fails, the module's
-        // layout comment is wrong and needs updating.
-        let node = NodeData::new_input(0, 0, 0);
-        let base = &node as *const NodeData as usize;
-        let verified_at = &node.verified_at as *const _ as usize - base;
-        let changed_at = &node.changed_at as *const _ as usize - base;
-        let overflow_deps = &node.overflow_deps as *const _ as usize - base;
-        let inline_deps = &node.inline_deps as *const _ as usize - base;
-        let arena_slot = &node.arena_slot as *const _ as usize - base;
-        let type_tag = &node.type_tag as *const _ as usize - base;
-        let state = &node.state as *const _ as usize - base;
-        let dep_count = &node.dep_count as *const _ as usize - base;
-        let generation = &node.generation as *const _ as usize - base;
-
-        assert_eq!(verified_at, 0, "verified_at at offset 0");
-        assert_eq!(changed_at, 8, "changed_at at offset 8");
-        assert_eq!(overflow_deps, 16, "overflow_deps at offset 16");
-        assert_eq!(inline_deps, 24, "inline_deps at offset 24");
-        assert_eq!(arena_slot, 52, "arena_slot at offset 52");
-        assert_eq!(type_tag, 56, "type_tag at offset 56");
-        assert_eq!(state, 58, "state at offset 58");
-        assert_eq!(dep_count, 59, "dep_count at offset 59");
-        assert_eq!(generation, 60, "generation at offset 60");
-    }
-
-    #[test]
-    fn nodedata_implements_send_and_sync() {
-        // Compile-time check: if NodeData accidentally loses Send/Sync
-        // (e.g., someone adds a raw pointer field without wrapping it),
-        // this fn will fail to compile.
-        fn assert_send_sync<T: Send + Sync>() {}
-        assert_send_sync::<NodeData>();
-    }
-
-    #[test]
-    fn new_node_starts_at_generation_zero() {
-        let input = NodeData::new_input(0, 0, 0);
-        assert_eq!(input.generation(), 0);
-        let query = NodeData::new_query(0, 0);
-        assert_eq!(query.generation(), 0);
-    }
-
-    #[test]
-    fn verify_handle_succeeds_on_match() {
-        let node = NodeData::new_input(0, 0, 0);
-        let rid = RuntimeId::from_raw(42);
-        let h: Incr<u64> = Incr::new(7, 0, rid);
-        assert!(node.verify_handle(h, rid).is_ok());
-    }
-
-    #[test]
-    fn verify_handle_rejects_wrong_runtime() {
-        let node = NodeData::new_input(0, 0, 0);
-        let rid_a = RuntimeId::from_raw(42);
-        let rid_b = RuntimeId::from_raw(43);
-        let h: Incr<u64> = Incr::new(7, 0, rid_a);
-        let err = node.verify_handle(h, rid_b).unwrap_err();
-        assert_eq!(
-            err,
-            HandleError::WrongRuntime {
-                handle_runtime: rid_a,
-                current_runtime: rid_b,
-            }
-        );
-    }
-
-    #[test]
-    fn verify_handle_rejects_stale_generation() {
-        let node = NodeData::new_input(0, 0, 0);
-        let rid = RuntimeId::from_raw(42);
-        // Handle with an out-of-date generation.
-        let h: Incr<u64> = Incr::new(7, 5, rid);
-        let err = node.verify_handle(h, rid).unwrap_err();
-        assert_eq!(
-            err,
-            HandleError::StaleGeneration {
-                handle_generation: 5,
-                current_generation: 0,
-            }
-        );
-    }
-
-    #[test]
-    fn bumping_generation_invalidates_outstanding_handles() {
-        let node = NodeData::new_input(0, 0, 0);
-        let rid = RuntimeId::from_raw(42);
-        let h: Incr<u64> = Incr::new(7, 0, rid);
-        // Fresh handle works.
-        assert!(node.verify_handle(h, rid).is_ok());
-        // Bump the generation (simulating a slot recycle).
-        node.bump_generation();
-        assert_eq!(node.generation(), 1);
-        // Old handle no longer works.
-        let err = node.verify_handle(h, rid).unwrap_err();
-        assert!(matches!(err, HandleError::StaleGeneration { .. }));
-        // A handle with the new generation would work.
-        let h2: Incr<u64> = Incr::new(7, 1, rid);
-        assert!(node.verify_handle(h2, rid).is_ok());
-    }
-
-    #[test]
-    fn verify_handle_checks_runtime_before_generation() {
-        // If both runtime and generation are wrong, the runtime error
-        // should win because it is the more specific failure (cross-
-        // runtime handles are a hard bug; stale generations are a
-        // legitimate state after a node has been recycled).
-        let node = NodeData::new_input(0, 0, 0);
-        node.bump_generation(); // current generation = 1
-        let rid_other = RuntimeId::from_raw(99);
-        let rid_this = RuntimeId::from_raw(42);
-        let h: Incr<u64> = Incr::new(7, 0, rid_other);
-        let err = node.verify_handle(h, rid_this).unwrap_err();
-        assert!(
-            matches!(err, HandleError::WrongRuntime { .. }),
-            "runtime mismatch should be reported before generation mismatch"
-        );
-    }
-}
diff --git a/crates/incr-concurrent/src/nodes_store.rs b/crates/incr-concurrent/src/nodes_store.rs
deleted file mode 100644
index f79b201..0000000
--- a/crates/incr-concurrent/src/nodes_store.rs
+++ /dev/null
@@ -1,282 +0,0 @@
-//! Segmented lock-free storage for `NodeData`.
-//!
-//! Replaces the commit F scaffolding's `RwLock<Vec<Box<NodeData>>>`
-//! with a segmented store modeled on the arenas from commits A and B.
-//! Reads are lock-free: a handler computes `(seg_idx, within_idx)` from
-//! the slot, does an Acquire load on the segment pointer, and returns
-//! a direct `&NodeData` reference. No lock acquire, no allocation, no
-//! indirection beyond the two atomic loads.
-//!
-//! Writes (append-only, via `create_input` / `create_query`) run under
-//! the Runtime's `write_mutex`, so there is no concurrent writer race
-//! on the `len` counter or on segment allocation.
-//!
-//! ## Layout
-//!
-//! - `segments`: fixed-size array of `AtomicPtr<NodesSegment>`. Size
-//!   is `MAX_SEGMENTS`. Entries start null; non-null entries point at
-//!   a heap-allocated `NodesSegment`. Lazy allocation.
-//! - `len`: `AtomicU32`. Number of initialized slots. Monotonically
-//!   increasing via write-mutex-guarded increments. Readers
-//!   Acquire-load to check that their slot is valid.
-//!
-//! - `NodesSegment::slots`: boxed fixed-size slice of
-//!   `UnsafeCell<MaybeUninit<NodeData>>`. Slots are uninitialized at
-//!   segment creation; the first write to a slot initializes it via
-//!   `MaybeUninit::write`. Slots at or beyond `len` are still
-//!   uninitialized and must not be dereferenced.
-//!
-//! ## Safety invariants
-//!
-//! 1. Only the write-mutex-holding thread can mutate `len` and
-//!    initialize slots.
-//! 2. A slot at index `i` is initialized iff `i < len`. The ordering
-//!    is: (a) initialize the slot, (b) Release-store the new `len`.
-//!    Readers Acquire-load `len` and then dereference the slot; the
-//!    Acquire pairs with the Release so the reader sees the
-//!    initialized slot.
-//! 3. Segments are never deallocated until the store is dropped, so
-//!    a `&NodeData` obtained from a slot remains valid for the
-//!    store's lifetime.
-//! 4. On Drop, the store iterates slots `0..len` and drops them
-//!    in place via `assume_init_drop`. Slots beyond `len` are
-//!    untouched (they were never initialized).
-
-use std::cell::UnsafeCell;
-use std::mem::MaybeUninit;
-use std::sync::atomic::{AtomicPtr, AtomicU32, Ordering};
-
-use super::node::NodeData;
-
-const SEGMENT_SHIFT: u32 = 10;
-const SEGMENT_SIZE: usize = 1 << SEGMENT_SHIFT;
-const SEGMENT_MASK: u32 = (SEGMENT_SIZE as u32) - 1;
-const MAX_SEGMENTS: usize = 1024;
-
-/// Total node capacity per runtime. Matches the arena capacity so a
-/// single Runtime can hold at most the same number of nodes as its
-/// arenas can hold values.
-pub(crate) const MAX_NODES: u32 = (MAX_SEGMENTS * SEGMENT_SIZE) as u32;
-
-/// One segment of up to `SEGMENT_SIZE` `NodeData` slots. Heap
-/// allocated and never moved; a `*const NodesSegment` obtained
-/// during the store's lifetime stays valid until the store drops.
-struct NodesSegment {
-    slots: Box<[UnsafeCell<MaybeUninit<NodeData>>]>,
-}
-
-impl NodesSegment {
-    fn new() -> Box<Self> {
-        let slots: Vec<UnsafeCell<MaybeUninit<NodeData>>> = (0..SEGMENT_SIZE)
-            .map(|_| UnsafeCell::new(MaybeUninit::uninit()))
-            .collect();
-        Box::new(Self {
-            slots: slots.into_boxed_slice(),
-        })
-    }
-}
-
-/// Segmented lock-free store for `NodeData`. Owned by `Runtime`.
-pub(crate) struct SegmentedNodes {
-    segments: Box<[AtomicPtr<NodesSegment>]>,
-    len: AtomicU32,
-}
-
-impl SegmentedNodes {
-    /// Construct an empty store. No segments are allocated until the
-    /// first `push`.
-    pub(crate) fn new() -> Self {
-        let segments: Vec<AtomicPtr<NodesSegment>> = (0..MAX_SEGMENTS)
-            .map(|_| AtomicPtr::new(std::ptr::null_mut()))
-            .collect();
-        Self {
-            segments: segments.into_boxed_slice(),
-            len: AtomicU32::new(0),
-        }
-    }
-
-    /// Append a new `NodeData` to the store, returning its slot
-    /// index. Caller must hold the Runtime's `write_mutex` so no
-    /// other writer is racing on `len` or segment allocation.
-    ///
-    /// Publishes the new slot via a Release store on `len`, which
-    /// synchronizes with the reader's Acquire load in `get`.
-    pub(crate) fn push(&self, node: NodeData) -> u32 {
-        let slot = self.len.load(Ordering::Relaxed);
-        if slot >= MAX_NODES {
-            panic!("SegmentedNodes exhausted at {} slots", MAX_NODES);
-        }
-
-        let seg_idx = (slot >> SEGMENT_SHIFT) as usize;
-        let within = (slot & SEGMENT_MASK) as usize;
-
-        // Ensure the target segment exists. Under write_mutex this
-        // is race-free; we just check and allocate if null.
-        let seg_ptr = self.segments[seg_idx].load(Ordering::Acquire);
-        let seg_ptr = if seg_ptr.is_null() {
-            let new_seg = Box::into_raw(NodesSegment::new());
-            // Release-store so a concurrent reader's Acquire load
-            // observes the fresh segment with all slots still
-            // uninitialized (none are in the caller's reachable
-            // range until `len` is bumped below).
-            self.segments[seg_idx].store(new_seg, Ordering::Release);
-            new_seg
-        } else {
-            seg_ptr
-        };
-
-        // Initialize the slot. SAFETY: `seg_ptr` is non-null and
-        // points at a NodesSegment owned by this store. `within` is
-        // in-range because `slot < MAX_NODES` and the segment has
-        // `SEGMENT_SIZE` slots. The caller holds write_mutex so no
-        // other thread is initializing this or nearby slots.
-        // Readers cannot observe this slot yet because `len` has
-        // not been bumped.
-        unsafe {
-            let cell: &UnsafeCell<MaybeUninit<NodeData>> = &(*seg_ptr).slots[within];
-            (*cell.get()).write(node);
-        }
-
-        // Release-store the new len. Synchronizes with reader Acquire
-        // loads in `get` to publish the initialized slot.
-        self.len.store(slot + 1, Ordering::Release);
-        slot
-    }
-
-    /// Read the node at `slot`. Returns a reference valid for the
-    /// store's lifetime (tied to `&self`).
-    ///
-    /// Caller must have obtained `slot` from a handle returned by
-    /// `push` on the same store (so `slot < len`). Debug builds
-    /// assert this; release builds dereference unchecked and will
-    /// hit undefined behavior for out-of-range slots.
-    pub(crate) fn get(&self, slot: u32) -> &NodeData {
-        debug_assert!(
-            slot < self.len.load(Ordering::Acquire),
-            "SegmentedNodes::get slot {} out of range (len {})",
-            slot,
-            self.len.load(Ordering::Acquire)
-        );
-
-        let seg_idx = (slot >> SEGMENT_SHIFT) as usize;
-        let within = (slot & SEGMENT_MASK) as usize;
-
-        // SAFETY: `slot < len` (debug asserted above) implies the
-        // slot has been initialized and the segment has been
-        // allocated. The Acquire load pairs with the Release store
-        // in `push` to establish happens-before with the
-        // initialization. Segments are never freed until `Drop`, so
-        // the returned reference is valid for `&self`'s lifetime.
-        unsafe {
-            let seg_ptr = self.segments[seg_idx].load(Ordering::Acquire);
-            debug_assert!(!seg_ptr.is_null(), "segment {} not allocated", seg_idx);
-            let cell: &UnsafeCell<MaybeUninit<NodeData>> = &(*seg_ptr).slots[within];
-            (*cell.get()).assume_init_ref()
-        }
-    }
-
-    /// Current number of initialized slots. Used by tests and by
-    /// debug assertions elsewhere in the runtime.
-    pub(crate) fn len(&self) -> u32 {
-        self.len.load(Ordering::Acquire)
-    }
-}
-
-impl Drop for SegmentedNodes {
-    fn drop(&mut self) {
-        // Drop every initialized slot in place, then drop the
-        // segments themselves. Slots beyond `len` are still
-        // uninitialized and must not be dropped.
-        let final_len = *self.len.get_mut();
-        for slot in 0..final_len {
-            let seg_idx = (slot >> SEGMENT_SHIFT) as usize;
-            let within = (slot & SEGMENT_MASK) as usize;
-            let seg_ptr = *self.segments[seg_idx].get_mut();
-            if !seg_ptr.is_null() {
-                // SAFETY: `slot < final_len` so this slot was
-                // initialized via `MaybeUninit::write` in `push`.
-                // `&mut self` guarantees no concurrent access.
-                unsafe {
-                    let cell: &UnsafeCell<MaybeUninit<NodeData>> = &(*seg_ptr).slots[within];
-                    (*cell.get()).assume_init_drop();
-                }
-            }
-        }
-
-        // Reclaim the segment boxes themselves.
-        for entry in self.segments.iter_mut() {
-            let ptr = *entry.get_mut();
-            if !ptr.is_null() {
-                // SAFETY: pointer came from `Box::into_raw` in
-                // `push`; uniquely owned at this point because
-                // `&mut self`.
-                unsafe {
-                    drop(Box::from_raw(ptr));
-                }
-            }
-        }
-    }
-}
-
-// SAFETY: `SegmentedNodes` holds `NodeData` values which are themselves
-// `Send + Sync` (atomics). The raw pointers in `segments` point at
-// `NodesSegment`s which are heap-allocated and owned by this store.
-// Concurrent access is coordinated by the Runtime's write_mutex for
-// writers and by the `len` Release/Acquire pair for reader visibility.
-unsafe impl Send for SegmentedNodes {}
-unsafe impl Sync for SegmentedNodes {}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use crate::node::NodeData;
-
-    #[test]
-    fn push_then_get_returns_stable_reference() {
-        let store = SegmentedNodes::new();
-        let slot = store.push(NodeData::new_input(0, 42, 0));
-        let node = store.get(slot);
-        assert_eq!(node.arena_slot(), 42);
-    }
-
-    #[test]
-    fn many_pushes_cross_segment_boundary() {
-        let store = SegmentedNodes::new();
-        let count = SEGMENT_SIZE + 100;
-        let mut slots = Vec::with_capacity(count);
-        for i in 0..count {
-            slots.push(store.push(NodeData::new_input(0, i as u32, 0)));
-        }
-        for (i, slot) in slots.into_iter().enumerate() {
-            assert_eq!(store.get(slot).arena_slot(), i as u32);
-        }
-        assert_eq!(store.len(), count as u32);
-    }
-
-    #[test]
-    fn drop_frees_overflow_deps_from_every_node() {
-        use crate::node::NodeId;
-        // Push nodes, give some of them overflow deps, drop the
-        // store, and rely on miri / valgrind to confirm no leaks.
-        let store = SegmentedNodes::new();
-        for i in 0..20 {
-            let slot = store.push(NodeData::new_query(0, i));
-            let deps: Vec<NodeId> = (0..(i + 5)).map(NodeId).collect();
-            store.get(slot).publish_initial_deps(&deps);
-        }
-        drop(store);
-    }
-
-    #[test]
-    fn references_from_get_remain_valid_across_more_pushes() {
-        let store = SegmentedNodes::new();
-        let slot_a = store.push(NodeData::new_input(0, 111, 0));
-        let ref_a = store.get(slot_a);
-        // Trigger segment growth by pushing enough nodes to cross
-        // a segment boundary. ref_a must still be valid.
-        for i in 0..(SEGMENT_SIZE as u32 + 10) {
-            store.push(NodeData::new_input(0, 1000 + i, 0));
-        }
-        assert_eq!(ref_a.arena_slot(), 111);
-    }
-}
diff --git a/crates/incr-concurrent/src/registry.rs b/crates/incr-concurrent/src/registry.rs
deleted file mode 100644
index 76b3679..0000000
--- a/crates/incr-concurrent/src/registry.rs
+++ /dev/null
@@ -1,775 +0,0 @@
-//! Arena registry: the runtime's `TypeId → arena` lookup table.
-//!
-//! Per section 5.2 of the concurrent core rewrite spec, the runtime holds
-//! one arena per value type, indexed by [`TypeId`]. This module is that
-//! index.
-//!
-//! ## Design
-//!
-//! The registry is a `HashMap<TypeId, Box<dyn ErasedArena>>` behind a
-//! `RwLock`. Readers take a short-lived read guard, look up their type's
-//! arena, extract a raw pointer to the arena, and drop the guard. The
-//! pointer is stable for the registry's lifetime because:
-//!
-//! 1. Arenas are never removed from the registry. The registry is
-//!    append-only; adding a new value type inserts a new entry but no
-//!    entry is ever deleted, even when nodes holding that type are
-//!    destroyed. (Nodes are recycled via generation counters at the
-//!    slot level; the arena itself lives on.)
-//! 2. Each arena lives behind a `Box`, which pins it at a stable heap
-//!    address. `HashMap` resizing moves the `Box` (two words) but not
-//!    the arena interior the `Box` points to.
-//!
-//! Therefore `*const dyn ErasedArena` extracted from
-//! `box.as_ref() as *const _` remains valid as long as the registry is
-//! alive. Callers who cache pointers across operations get the same
-//! correctness guarantee.
-//!
-//! ## Concurrency and the TLS pointer cache
-//!
-//! Benchmarks (see `mod bench` in this file) measured the naive
-//! `RwLock<HashMap>` lookup at approximately 26 ns per call in release
-//! mode. The spec's budget for a single-threaded Clean `get` is 5 ns,
-//! and the registry is only one step of a full get. The naive path
-//! blows the budget on its own, so the thread-local pointer cache
-//! described in spec section 5.2 is load-bearing rather than optional.
-//!
-//! The cache lives in a `thread_local!` `RefCell<ArenaCache>` with four
-//! slots. Each slot holds `(registry_id, type_id, arena_ptr)`. On a
-//! lookup the cache does a linear scan over its entries; a hit avoids
-//! both the `RwLock::read` and the `HashMap` lookup. The hit path is
-//! read-only (no move-to-front) so that repeated hits do not dirty the
-//! cache line. Misses fall through to the lock-backed lookup and
-//! populate the cache via round-robin eviction.
-//!
-//! ## Registry identity and the ABA question
-//!
-//! The cache is keyed by `(registry_id, type_id)` where `registry_id`
-//! is a monotonic `u64` drawn from a static counter at `ArenaRegistry`
-//! construction. The counter never wraps in any realistic program
-//! lifetime, so a cached entry from a dropped registry can never be
-//! confused with a different registry that happens to be allocated at
-//! the same address. Stale entries are inert: their `registry_id` will
-//! never match a live registry's id, so lookups miss and eventually
-//! overwrite the stale slot via round-robin eviction.
-//!
-//! When `RuntimeId` lands in commit E, the registry's id becomes the
-//! runtime's id (they are the same quantity), and the `(RuntimeId,
-//! TypeId)` pair the spec describes is exactly what the cache is
-//! already keyed by.
-
-use std::any::TypeId;
-use std::cell::RefCell;
-use std::collections::HashMap;
-use std::sync::atomic::{AtomicU64, Ordering};
-use std::sync::RwLock;
-
-use super::arena::ErasedArena;
-use super::handle::RuntimeId;
-
-/// Source of monotonic registry ids. Starts at 1; 0 is reserved as the
-/// "never assigned" sentinel used by the TLS cache to mean "empty slot
-/// that can never match a live registry."
-static NEXT_REGISTRY_ID: AtomicU64 = AtomicU64::new(1);
-
-/// Append-only registry of arenas keyed by value type.
-pub(crate) struct ArenaRegistry {
-    /// Monotonic id that uniquely identifies this registry for its
-    /// lifetime and across all registries ever constructed in this
-    /// process. Also serves as the owning runtime's `RuntimeId`: a
-    /// runtime adopts its registry's id rather than tracking its own,
-    /// so the TLS cache and `Incr<T>` handles can share a single
-    /// identity concept. Used by the TLS cache to distinguish entries
-    /// belonging to this registry from entries belonging to
-    /// dropped-or-other registries; see the module docs.
-    id: RuntimeId,
-    arenas: RwLock<HashMap<TypeId, Box<dyn ErasedArena>>>,
-}
-
-impl ArenaRegistry {
-    /// Create an empty registry with a fresh monotonic id.
-    pub(crate) fn new() -> Self {
-        Self {
-            id: RuntimeId::from_raw(NEXT_REGISTRY_ID.fetch_add(1, Ordering::Relaxed)),
-            arenas: RwLock::new(HashMap::new()),
-        }
-    }
-
-    /// Return this registry's unique id. Exposed so the Runtime can
-    /// adopt it as its own `RuntimeId` and so `Incr<T>` handles can
-    /// carry it for cross-runtime detection.
-    pub(crate) fn id(&self) -> RuntimeId {
-        self.id
-    }
-
-    /// Ensure an arena exists for type `T`, constructing it via `factory`
-    /// if this is the first time the registry has seen `T`.
-    ///
-    /// Returns a raw pointer to the arena, valid for the registry's
-    /// lifetime. Callers downcast to the concrete arena type via
-    /// [`ErasedArena::as_any`].
-    ///
-    /// This is the canonical entry point for node creation paths that
-    /// need an arena but do not know whether one has been created for
-    /// their type yet.
-    pub(crate) fn ensure_arena<T: 'static, F>(&self, factory: F) -> *const dyn ErasedArena
-    where
-        F: FnOnce() -> Box<dyn ErasedArena>,
-    {
-        let tid = TypeId::of::<T>();
-
-        // Hottest path: TLS cache hit.
-        if let Some(ptr) = cache_lookup(self.id.get(), tid) {
-            return ptr;
-        }
-
-        // Cache miss. Fall through to the read-locked registry lookup.
-        {
-            let guard = self
-                .arenas
-                .read()
-                .expect("arena registry read lock poisoned");
-            if let Some(entry) = guard.get(&tid) {
-                let ptr = entry.as_ref() as *const dyn ErasedArena;
-                cache_insert(self.id.get(), tid, ptr);
-                return ptr;
-            }
-        }
-
-        // Not in the registry either: write-locked insertion. Double-
-        // checked under the write lock in case another thread inserted
-        // between our read and write. `or_insert_with` does the check.
-        let mut guard = self
-            .arenas
-            .write()
-            .expect("arena registry write lock poisoned");
-        let entry = guard.entry(tid).or_insert_with(factory);
-        let ptr = entry.as_ref() as *const dyn ErasedArena;
-        cache_insert(self.id.get(), tid, ptr);
-        ptr
-    }
-
-    /// Look up an existing arena for `T`. Returns `None` if no arena
-    /// for `T` has been created yet. Intended for read-only paths that
-    /// should not trigger lazy creation (e.g., diagnostics, sanity
-    /// checks); production get/set paths should use
-    /// [`ArenaRegistry::ensure_arena`] so a missing arena is a bug,
-    /// not a silent `None`.
-    #[allow(dead_code)]
-    pub(crate) fn lookup<T: 'static>(&self) -> Option<*const dyn ErasedArena> {
-        let tid = TypeId::of::<T>();
-
-        // Hottest path: TLS cache hit.
-        if let Some(ptr) = cache_lookup(self.id.get(), tid) {
-            return Some(ptr);
-        }
-
-        // Cache miss: read-locked registry lookup.
-        let ptr = {
-            let guard = self
-                .arenas
-                .read()
-                .expect("arena registry read lock poisoned");
-            guard
-                .get(&tid)
-                .map(|entry| entry.as_ref() as *const dyn ErasedArena)
-        }?;
-        cache_insert(self.id.get(), tid, ptr);
-        Some(ptr)
-    }
-
-    /// Number of distinct value types the registry currently holds.
-    /// Used by tests and potential diagnostics.
-    #[cfg(test)]
-    pub(crate) fn len(&self) -> usize {
-        self.arenas
-            .read()
-            .expect("arena registry read lock poisoned")
-            .len()
-    }
-}
-
-impl Default for ArenaRegistry {
-    fn default() -> Self {
-        Self::new()
-    }
-}
-
-// SAFETY: `ArenaRegistry` owns its `RwLock<HashMap<...>>` directly and
-// does not expose internal state across thread boundaries in ways that
-// would violate `Send`/`Sync`. `Box<dyn ErasedArena>` is `Send + Sync`
-// because `ErasedArena: Send + Sync`. `RwLock` provides the necessary
-// synchronization for the HashMap.
-//
-// The raw pointers returned by `ensure_arena` and `lookup` are not tied
-// to the lock guard; callers observe arena contents directly. Concurrent
-// readers of an arena coordinate via the node state machine, which is
-// orthogonal to the registry-level locking.
-
-/// Number of slots in the TLS cache. Four is a sweet spot: almost all
-/// workloads touch one to four value types on the hot path, and a linear
-/// scan over four entries is ~1 ns even with the array-of-Option branch
-/// overhead. Going wider (8, 16) pays more on every scan for a rare
-/// benefit; going narrower (2) thrashes for workloads with three or four
-/// hot types.
-const CACHE_SLOTS: usize = 4;
-
-/// One TLS cache entry. A registry id of zero means "empty slot" and
-/// never matches any live registry (live registry ids start at 1).
-#[derive(Copy, Clone)]
-struct CacheEntry {
-    registry_id: u64,
-    type_id: TypeId,
-    arena_ptr: *const dyn ErasedArena,
-}
-
-impl CacheEntry {
-    const fn empty() -> Self {
-        // SAFETY note: this sentinel pointer is never dereferenced. The
-        // `registry_id: 0` acts as a guard: any real lookup compares
-        // against a live registry id (which is nonzero), so this entry
-        // cannot match and its pointer cannot be used.
-        Self {
-            registry_id: 0,
-            type_id: TypeId::of::<()>(),
-            arena_ptr: std::ptr::null::<EmptyErasedArena>() as *const dyn ErasedArena,
-        }
-    }
-}
-
-/// Placeholder type used only to give the empty cache entry's pointer a
-/// concrete non-generic form for the `null::<_> as *const dyn Trait`
-/// coercion. This type is never instantiated.
-struct EmptyErasedArena;
-impl ErasedArena for EmptyErasedArena {
-    fn erased_type_id(&self) -> TypeId {
-        TypeId::of::<()>()
-    }
-    fn as_any(&self) -> &dyn std::any::Any {
-        self
-    }
-}
-
-/// The TLS cache itself: four slots plus a round-robin eviction cursor.
-/// The hit path is read-only (no move-to-front) so repeated lookups of
-/// the same type do not dirty the cache line.
-struct ArenaCache {
-    entries: [CacheEntry; CACHE_SLOTS],
-    next_eviction: u32,
-}
-
-impl ArenaCache {
-    const fn new() -> Self {
-        Self {
-            entries: [CacheEntry::empty(); CACHE_SLOTS],
-            next_eviction: 0,
-        }
-    }
-}
-
-thread_local! {
-    /// Per-thread arena pointer cache. `RefCell` lets the lookup path
-    /// take a short `borrow_mut` only on cache insertion; the hit path
-    /// uses `borrow` (counter increment only, no work on the hot path
-    /// beyond the linear scan). In release builds the borrow counters
-    /// are the dominant cost after the scan itself.
-    static ARENA_CACHE: RefCell<ArenaCache> = const { RefCell::new(ArenaCache::new()) };
-}
-
-/// Look up `(registry_id, type_id)` in this thread's cache. Returns the
-/// cached pointer on hit, `None` on miss. The hit path does not mutate
-/// the cache (no move-to-front), so repeated hits stay cheap.
-fn cache_lookup(registry_id: u64, type_id: TypeId) -> Option<*const dyn ErasedArena> {
-    ARENA_CACHE.with(|cache| {
-        let cache = cache.borrow();
-        for entry in &cache.entries {
-            if entry.registry_id == registry_id && entry.type_id == type_id {
-                return Some(entry.arena_ptr);
-            }
-        }
-        None
-    })
-}
-
-/// Insert a `(registry_id, type_id) -> arena_ptr` mapping into this
-/// thread's cache. Uses round-robin eviction: each insertion overwrites
-/// the slot at `next_eviction` and advances the cursor.
-fn cache_insert(registry_id: u64, type_id: TypeId, arena_ptr: *const dyn ErasedArena) {
-    ARENA_CACHE.with(|cache| {
-        let mut cache = cache.borrow_mut();
-        // If an entry for this (registry, type) already exists, update
-        // it in place rather than duplicating. Two copies would not
-        // violate correctness but would waste a slot.
-        for entry in cache.entries.iter_mut() {
-            if entry.registry_id == registry_id && entry.type_id == type_id {
-                entry.arena_ptr = arena_ptr;
-                return;
-            }
-        }
-        let idx = (cache.next_eviction as usize) % CACHE_SLOTS;
-        cache.entries[idx] = CacheEntry {
-            registry_id,
-            type_id,
-            arena_ptr,
-        };
-        cache.next_eviction = cache.next_eviction.wrapping_add(1);
-    });
-}
-
-/// Clear the current thread's cache. Used only by tests that want to
-/// observe the uncached fallback path in isolation.
-#[cfg(test)]
-fn cache_clear() {
-    ARENA_CACHE.with(|cache| {
-        *cache.borrow_mut() = ArenaCache::new();
-    });
-}
-
-#[cfg(test)]
-mod tests {
-    use super::super::arena::{AtomicPrimitiveArena, GenericArena};
-    use super::*;
-    use std::sync::Arc;
-    use std::thread;
-
-    /// Downcast helper for tests: turn a `*const dyn ErasedArena` into
-    /// a typed reference to a concrete arena.
-    unsafe fn as_primitive<'a, T: super::super::arena::AtomicPrimitive>(
-        ptr: *const dyn ErasedArena,
-    ) -> &'a AtomicPrimitiveArena<T> {
-        (*ptr)
-            .as_any()
-            .downcast_ref::<AtomicPrimitiveArena<T>>()
-            .expect("arena type mismatch")
-    }
-
-    unsafe fn as_generic<'a, T: Clone + Send + Sync + 'static>(
-        ptr: *const dyn ErasedArena,
-    ) -> &'a GenericArena<T> {
-        (*ptr)
-            .as_any()
-            .downcast_ref::<GenericArena<T>>()
-            .expect("arena type mismatch")
-    }
-
-    #[test]
-    fn new_registry_is_empty() {
-        let registry = ArenaRegistry::new();
-        assert_eq!(registry.len(), 0);
-        assert!(registry.lookup::<u64>().is_none());
-    }
-
-    #[test]
-    fn ensure_arena_creates_once_per_type() {
-        let registry = ArenaRegistry::new();
-        let p1 = registry.ensure_arena::<u64, _>(|| Box::new(AtomicPrimitiveArena::<u64>::new()));
-        let p2 = registry.ensure_arena::<u64, _>(|| {
-            panic!("factory should not run on second call for same type")
-        });
-        // Fat pointer equality via pointer comparison: both should point
-        // to the same arena instance.
-        assert_eq!(
-            p1 as *const (), p2 as *const (),
-            "ensure_arena must return the same pointer for the same type"
-        );
-        assert_eq!(registry.len(), 1);
-    }
-
-    #[test]
-    fn ensure_arena_creates_distinct_arenas_per_type() {
-        let registry = ArenaRegistry::new();
-        let p_u64 =
-            registry.ensure_arena::<u64, _>(|| Box::new(AtomicPrimitiveArena::<u64>::new()));
-        let p_i32 =
-            registry.ensure_arena::<i32, _>(|| Box::new(AtomicPrimitiveArena::<i32>::new()));
-        let p_string =
-            registry.ensure_arena::<String, _>(|| Box::new(GenericArena::<String>::new()));
-        assert_ne!(p_u64 as *const (), p_i32 as *const ());
-        assert_ne!(p_u64 as *const (), p_string as *const ());
-        assert_ne!(p_i32 as *const (), p_string as *const ());
-        assert_eq!(registry.len(), 3);
-    }
-
-    #[test]
-    fn lookup_returns_none_before_creation_and_some_after() {
-        let registry = ArenaRegistry::new();
-        assert!(registry.lookup::<u64>().is_none());
-        registry.ensure_arena::<u64, _>(|| Box::new(AtomicPrimitiveArena::<u64>::new()));
-        assert!(registry.lookup::<u64>().is_some());
-    }
-
-    #[test]
-    fn downcast_through_registry_returns_usable_arena() {
-        let registry = ArenaRegistry::new();
-        let ptr = registry.ensure_arena::<u64, _>(|| Box::new(AtomicPrimitiveArena::<u64>::new()));
-        // SAFETY: we just created this arena as AtomicPrimitiveArena<u64>.
-        let arena = unsafe { as_primitive::<u64>(ptr) };
-        let slot = arena.reserve(777);
-        assert_eq!(arena.read(slot), 777);
-
-        // Confirm a second ensure_arena call returns a pointer to the
-        // same arena and the slot is still there.
-        let ptr2 = registry.ensure_arena::<u64, _>(|| panic!("must not recreate"));
-        let arena2 = unsafe { as_primitive::<u64>(ptr2) };
-        assert_eq!(arena2.read(slot), 777);
-    }
-
-    #[test]
-    fn downcast_through_registry_for_generic_type() {
-        let registry = ArenaRegistry::new();
-        let ptr = registry.ensure_arena::<Vec<u8>, _>(|| Box::new(GenericArena::<Vec<u8>>::new()));
-        // SAFETY: we just created this arena as GenericArena<Vec<u8>>.
-        let arena = unsafe { as_generic::<Vec<u8>>(ptr) };
-        let slot = arena.reserve_with(vec![1, 2, 3]);
-        assert_eq!(arena.read(slot), vec![1, 2, 3]);
-    }
-
-    #[test]
-    fn pointer_remains_valid_across_later_insertions() {
-        // After inserting u64, we hold its pointer. Inserting more types
-        // may cause HashMap resizing, but the u64 arena's Box lives on
-        // the heap and does not move.
-        let registry = ArenaRegistry::new();
-        let p_u64 =
-            registry.ensure_arena::<u64, _>(|| Box::new(AtomicPrimitiveArena::<u64>::new()));
-        // SAFETY: valid because we just inserted and hold registry.
-        let arena_u64 = unsafe { as_primitive::<u64>(p_u64) };
-        let slot = arena_u64.reserve(42);
-        assert_eq!(arena_u64.read(slot), 42);
-
-        // Trigger many insertions to force HashMap rehashes. Each
-        // "unique type" here uses a fresh marker struct via monomorphic
-        // instantiation in a helper; here we use distinct primitive
-        // arena types plus a generic one per iteration.
-        registry.ensure_arena::<i32, _>(|| Box::new(AtomicPrimitiveArena::<i32>::new()));
-        registry.ensure_arena::<u32, _>(|| Box::new(AtomicPrimitiveArena::<u32>::new()));
-        registry.ensure_arena::<i64, _>(|| Box::new(AtomicPrimitiveArena::<i64>::new()));
-        registry.ensure_arena::<f32, _>(|| Box::new(AtomicPrimitiveArena::<f32>::new()));
-        registry.ensure_arena::<f64, _>(|| Box::new(AtomicPrimitiveArena::<f64>::new()));
-        registry.ensure_arena::<bool, _>(|| Box::new(AtomicPrimitiveArena::<bool>::new()));
-        registry.ensure_arena::<String, _>(|| Box::new(GenericArena::<String>::new()));
-        registry.ensure_arena::<Vec<u8>, _>(|| Box::new(GenericArena::<Vec<u8>>::new()));
-
-        // The original u64 pointer must still resolve to the same arena
-        // holding the same slot value.
-        // SAFETY: arenas are never removed; pointer is stable.
-        let arena_u64_again = unsafe { as_primitive::<u64>(p_u64) };
-        assert_eq!(arena_u64_again.read(slot), 42);
-    }
-
-    /// Collapse a fat `*const dyn ErasedArena` to its data-pointer
-    /// address. We use this only for identity comparisons in tests; the
-    /// vtable half is discarded so the result is a plain `usize` that
-    /// can cross thread boundaries. Never dereferenced.
-    fn data_addr(ptr: *const dyn ErasedArena) -> usize {
-        ptr as *const () as usize
-    }
-
-    #[test]
-    fn concurrent_ensure_arena_returns_a_single_arena() {
-        // Many threads race to ensure an arena for the same type. Only
-        // one factory invocation should succeed, and all threads should
-        // receive pointers with the same data address.
-        const THREADS: usize = 16;
-
-        let registry = Arc::new(ArenaRegistry::new());
-        let factory_invocations = Arc::new(std::sync::atomic::AtomicUsize::new(0));
-
-        let handles: Vec<_> = (0..THREADS)
-            .map(|_| {
-                let registry = registry.clone();
-                let counter = factory_invocations.clone();
-                thread::spawn(move || {
-                    let ptr = registry.ensure_arena::<u64, _>(|| {
-                        counter.fetch_add(1, std::sync::atomic::Ordering::SeqCst);
-                        Box::new(AtomicPrimitiveArena::<u64>::new())
-                    });
-                    data_addr(ptr)
-                })
-            })
-            .collect();
-
-        let addrs: Vec<usize> = handles.into_iter().map(|h| h.join().unwrap()).collect();
-        let first = addrs[0];
-        for a in &addrs {
-            assert_eq!(*a, first, "all threads must see the same arena pointer");
-        }
-        assert_eq!(
-            factory_invocations.load(std::sync::atomic::Ordering::SeqCst),
-            1,
-            "factory should run exactly once under concurrent ensure_arena"
-        );
-        assert_eq!(registry.len(), 1);
-    }
-
-    #[test]
-    fn concurrent_ensure_arena_for_distinct_types_is_independent() {
-        // Multiple threads each inserting a different type should all
-        // succeed without interfering.
-        let registry = Arc::new(ArenaRegistry::new());
-
-        let h1 = {
-            let registry = registry.clone();
-            thread::spawn(move || {
-                data_addr(
-                    registry
-                        .ensure_arena::<u64, _>(|| Box::new(AtomicPrimitiveArena::<u64>::new())),
-                )
-            })
-        };
-        let h2 = {
-            let registry = registry.clone();
-            thread::spawn(move || {
-                data_addr(
-                    registry
-                        .ensure_arena::<i32, _>(|| Box::new(AtomicPrimitiveArena::<i32>::new())),
-                )
-            })
-        };
-        let h3 = {
-            let registry = registry.clone();
-            thread::spawn(move || {
-                data_addr(
-                    registry.ensure_arena::<String, _>(|| Box::new(GenericArena::<String>::new())),
-                )
-            })
-        };
-
-        let a1 = h1.join().unwrap();
-        let a2 = h2.join().unwrap();
-        let a3 = h3.join().unwrap();
-        assert_ne!(a1, a2);
-        assert_ne!(a1, a3);
-        assert_ne!(a2, a3);
-        assert_eq!(registry.len(), 3);
-    }
-
-    #[test]
-    fn tls_cache_hit_returns_same_pointer_as_uncached() {
-        // The first lookup populates the cache; the second lookup must
-        // return the same pointer. This is the happy-path correctness
-        // check for the cache.
-        super::cache_clear();
-        let registry = ArenaRegistry::new();
-        let p1 = registry.ensure_arena::<u64, _>(|| Box::new(AtomicPrimitiveArena::<u64>::new()));
-        // Second call hits the cache (factory must not run).
-        let p2 = registry.ensure_arena::<u64, _>(|| panic!("factory ran on cache hit"));
-        assert_eq!(data_addr(p1), data_addr(p2));
-        // And via lookup().
-        let p3 = registry.lookup::<u64>().expect("arena exists");
-        assert_eq!(data_addr(p1), data_addr(p3));
-    }
-
-    #[test]
-    fn tls_cache_distinguishes_between_registries() {
-        // Two registries each have their own u64 arena. After touching
-        // both from the same thread, the cache should hold entries for
-        // both and each lookup should return its own registry's arena.
-        // This is the correctness check that keys the cache by
-        // (registry_id, type_id), not just type_id.
-        super::cache_clear();
-        let reg_a = ArenaRegistry::new();
-        let reg_b = ArenaRegistry::new();
-        assert_ne!(reg_a.id(), reg_b.id());
-
-        let a = reg_a.ensure_arena::<u64, _>(|| Box::new(AtomicPrimitiveArena::<u64>::new()));
-        let b = reg_b.ensure_arena::<u64, _>(|| Box::new(AtomicPrimitiveArena::<u64>::new()));
-        assert_ne!(data_addr(a), data_addr(b));
-
-        // Populate the u64 arenas with distinct values via their typed
-        // references, so subsequent lookups can be verified to return
-        // the right arena and not the wrong one.
-        let arena_a = unsafe { as_primitive::<u64>(a) };
-        let arena_b = unsafe { as_primitive::<u64>(b) };
-        let slot_a = arena_a.reserve(111);
-        let slot_b = arena_b.reserve(222);
-
-        // Interleaved lookups: both should still go to the right place.
-        for _ in 0..10 {
-            let got_a = reg_a.ensure_arena::<u64, _>(|| panic!("factory ran"));
-            let got_b = reg_b.ensure_arena::<u64, _>(|| panic!("factory ran"));
-            assert_eq!(data_addr(got_a), data_addr(a));
-            assert_eq!(data_addr(got_b), data_addr(b));
-            let arena_a_again = unsafe { as_primitive::<u64>(got_a) };
-            let arena_b_again = unsafe { as_primitive::<u64>(got_b) };
-            assert_eq!(arena_a_again.read(slot_a), 111);
-            assert_eq!(arena_b_again.read(slot_b), 222);
-        }
-    }
-
-    #[test]
-    fn tls_cache_round_robin_eviction_over_five_types() {
-        // With CACHE_SLOTS = 4, touching five distinct types evicts the
-        // oldest on the fifth insertion. Subsequent lookups on all five
-        // still succeed via the lock-backed fallback and re-enter the
-        // cache. The test asserts correctness, not the specific
-        // eviction victim, since round-robin order is an implementation
-        // detail.
-        super::cache_clear();
-        let registry = ArenaRegistry::new();
-        let a = registry.ensure_arena::<u64, _>(|| Box::new(AtomicPrimitiveArena::<u64>::new()));
-        let b = registry.ensure_arena::<i32, _>(|| Box::new(AtomicPrimitiveArena::<i32>::new()));
-        let c = registry.ensure_arena::<u32, _>(|| Box::new(AtomicPrimitiveArena::<u32>::new()));
-        let d = registry.ensure_arena::<i64, _>(|| Box::new(AtomicPrimitiveArena::<i64>::new()));
-        let e = registry.ensure_arena::<f64, _>(|| Box::new(AtomicPrimitiveArena::<f64>::new()));
-
-        // All five should still lookup cleanly. None should trigger the
-        // factory (they are all in the registry), and all pointers
-        // should match the originals.
-        assert_eq!(
-            data_addr(registry.ensure_arena::<u64, _>(|| panic!("re-created u64"))),
-            data_addr(a)
-        );
-        assert_eq!(
-            data_addr(registry.ensure_arena::<i32, _>(|| panic!("re-created i32"))),
-            data_addr(b)
-        );
-        assert_eq!(
-            data_addr(registry.ensure_arena::<u32, _>(|| panic!("re-created u32"))),
-            data_addr(c)
-        );
-        assert_eq!(
-            data_addr(registry.ensure_arena::<i64, _>(|| panic!("re-created i64"))),
-            data_addr(d)
-        );
-        assert_eq!(
-            data_addr(registry.ensure_arena::<f64, _>(|| panic!("re-created f64"))),
-            data_addr(e)
-        );
-    }
-}
-
-/// Microbenchmarks for the registry lookup cost.
-///
-/// These are `#[ignore]` tests rather than criterion benches because the
-/// concrete arena types and the registry API are `pub(crate)` during the
-/// v2 rewrite. External benches in `benches/` cannot see them. This
-/// tradeoff keeps the v2 module visibility honest (nothing is exposed
-/// publicly before Gate 5) while still letting us measure the lookup
-/// cost against the spec's budget.
-///
-/// Run with:
-///
-/// ```text
-/// cargo test --release -p incr-concurrent v2::registry::bench -- --ignored --nocapture
-/// ```
-///
-/// The output reports nanoseconds per call for each variant.
-#[cfg(test)]
-mod bench {
-    use super::super::arena::AtomicPrimitiveArena;
-    use super::ArenaRegistry;
-    use std::hint::black_box;
-    use std::time::Instant;
-
-    /// How many iterations per measurement. Large enough to drive noise
-    /// below a nanosecond when compiled with `--release`.
-    const ITERS: usize = 20_000_000;
-
-    #[test]
-    #[ignore = "microbenchmark, run with --release"]
-    fn bench_ensure_arena_hot_path() {
-        // Hot path: `ensure_arena::<T>` after the first insertion. Takes
-        // the read lock and hits the HashMap. This is the cost paid by
-        // every `rt.get` for a type whose arena has been seen before.
-        let registry = ArenaRegistry::new();
-        registry.ensure_arena::<u64, _>(|| Box::new(AtomicPrimitiveArena::<u64>::new()));
-
-        let start = Instant::now();
-        let mut acc: usize = 0;
-        for _ in 0..ITERS {
-            let ptr = registry.ensure_arena::<u64, _>(|| unreachable!());
-            acc = acc.wrapping_add(ptr as *const () as usize);
-        }
-        let elapsed = start.elapsed();
-        black_box(acc);
-        let per_call = elapsed.as_nanos() as f64 / ITERS as f64;
-        eprintln!(
-            "ensure_arena hot path (read lock + hashmap lookup): {:.2} ns/call",
-            per_call
-        );
-    }
-
-    #[test]
-    #[ignore = "microbenchmark, run with --release"]
-    fn bench_lookup_hot_path() {
-        // Hot path via the simpler `lookup` method. Should match
-        // `ensure_arena` hot path because both take the read lock and
-        // both do a single hashmap lookup.
-        let registry = ArenaRegistry::new();
-        registry.ensure_arena::<u64, _>(|| Box::new(AtomicPrimitiveArena::<u64>::new()));
-
-        let start = Instant::now();
-        let mut acc: usize = 0;
-        for _ in 0..ITERS {
-            let ptr = registry.lookup::<u64>().expect("arena exists");
-            acc = acc.wrapping_add(ptr as *const () as usize);
-        }
-        let elapsed = start.elapsed();
-        black_box(acc);
-        let per_call = elapsed.as_nanos() as f64 / ITERS as f64;
-        eprintln!("lookup hot path: {:.2} ns/call", per_call);
-    }
-
-    #[test]
-    #[ignore = "microbenchmark, run with --release"]
-    fn bench_end_to_end_read() {
-        // End-to-end: `ensure_arena::<u64>` + downcast + `arena.read`.
-        // This is the full cost of a single `rt.get<u64>` at the
-        // arena-access level. Excludes NodeData load, state.load_acquire,
-        // and the fast-fail `state == Clean` check, which live in the
-        // Runtime and will be measured separately when that code lands.
-        let registry = ArenaRegistry::new();
-        let ptr = registry.ensure_arena::<u64, _>(|| Box::new(AtomicPrimitiveArena::<u64>::new()));
-        // SAFETY: pointer is valid for the registry's lifetime.
-        let arena = unsafe {
-            (*ptr)
-                .as_any()
-                .downcast_ref::<AtomicPrimitiveArena<u64>>()
-                .expect("downcast")
-        };
-        let slot = arena.reserve(0xDEAD_BEEF);
-
-        let start = Instant::now();
-        let mut acc: u64 = 0;
-        for _ in 0..ITERS {
-            let ptr = registry.ensure_arena::<u64, _>(|| unreachable!());
-            // SAFETY: valid for the registry's lifetime.
-            let arena = unsafe {
-                (*ptr)
-                    .as_any()
-                    .downcast_ref::<AtomicPrimitiveArena<u64>>()
-                    .expect("downcast")
-            };
-            acc = acc.wrapping_add(arena.read(slot));
-        }
-        let elapsed = start.elapsed();
-        black_box(acc);
-        let per_call = elapsed.as_nanos() as f64 / ITERS as f64;
-        eprintln!(
-            "end-to-end (ensure_arena + downcast + read): {:.2} ns/call",
-            per_call
-        );
-    }
-
-    #[test]
-    #[ignore = "microbenchmark, run with --release"]
-    fn bench_direct_arena_read_baseline() {
-        // Baseline: read from an arena we hold directly, bypassing the
-        // registry entirely. Tells us the arena read cost in isolation,
-        // so we can subtract it from the end-to-end number and see what
-        // the registry itself is costing.
-        let arena: AtomicPrimitiveArena<u64> = AtomicPrimitiveArena::new();
-        let slot = arena.reserve(0xDEAD_BEEF);
-
-        let start = Instant::now();
-        let mut acc: u64 = 0;
-        for _ in 0..ITERS {
-            acc = acc.wrapping_add(arena.read(slot));
-        }
-        let elapsed = start.elapsed();
-        black_box(acc);
-        let per_call = elapsed.as_nanos() as f64 / ITERS as f64;
-        eprintln!("arena.read direct (no registry): {:.2} ns/call", per_call);
-    }
-}
diff --git a/crates/incr-concurrent/src/runtime.rs b/crates/incr-concurrent/src/runtime.rs
deleted file mode 100644
index c1bf552..0000000
--- a/crates/incr-concurrent/src/runtime.rs
+++ /dev/null
@@ -1,2416 +0,0 @@
-//! Skeletal v2 Runtime.
-//!
-//! This is the commit-F scaffolding: it ties the state machine, typed
-//! arenas, arena registry, NodeData, and Incr<T> handle together into
-//! a runnable `Runtime` type with minimal behavior. The goal is to
-//! prove the architecture holds together end-to-end, not to reach
-//! feature parity with v1.
-//!
-//! # What is implemented
-//!
-//! - `Runtime::new()` constructs a fresh runtime with a unique
-//!   `RuntimeId`.
-//! - `create_input::<T>(initial)` registers an input node whose value
-//!   is immediately available via `get`.
-//! - `create_query::<T, F>(compute)` registers a lazy query node whose
-//!   compute closure runs on first `get` and whose result is memoized.
-//! - `get::<T>(handle)` reads an input or forces a compute on a query.
-//!   Handle verification rejects cross-runtime and stale-generation
-//!   handles with clear panics.
-//! - `set::<T>(handle, value)` updates an input. Taking `set` on a
-//!   query node panics with a diagnostic message.
-//!
-//! # What is deliberately deferred
-//!
-//! - **Dependency tracking.** A compute closure can call `rt.get(dep)`
-//!   but the runtime does not record which deps the compute touched.
-//!   Queries are memoized forever after their first compute.
-//! - **Dirty propagation.** `set` updates an input's arena slot and
-//!   bumps the revision but does NOT mark dependent queries as dirty.
-//!   No dependent queries exist in the runtime's bookkeeping anyway.
-//! - **Early cutoff.** No value comparison on recompute (there is no
-//!   recompute).
-//! - **Cycle detection.** No `COMPUTE_STACK` thread-local yet. A query
-//!   whose compute closure transitively calls its own handle will
-//!   deadlock-spin on the Computing state indefinitely; this is
-//!   temporary.
-//! - **Panic catching.** A panic inside a compute closure leaves the
-//!   node in `Computing` state permanently. A future commit adds
-//!   `catch_unwind` and the `Failed` transition per spec section 8.
-//! - **AtomicPrimitiveArena dispatch.** Every value type uses
-//!   `GenericArena<T>`, even primitives. The primitive arena from
-//!   commit A is unused here because specialization is unstable and
-//!   a sealed `Value` trait with manual primitive impls adds complexity
-//!   this commit does not need. A future commit adds the Value trait
-//!   and wires primitives to their faster arena.
-//!
-//! # Node storage
-//!
-//! Nodes live in a `RwLock<Vec<Box<NodeData>>>` indexed by slot. The
-//! `Box` keeps each NodeData at a stable heap address across Vec
-//! resizes, so pointers into NodeData remain valid after the vec
-//! grows. This is simpler than a segmented store and suffices for the
-//! scaffolding. A later commit upgrades nodes to a segmented store
-//! (sharing the segment machinery with the arenas via an extracted
-//! helper) so that reader traversal is fully lock-free.
-//!
-//! Compute closures live in a parallel
-//! `RwLock<Vec<Option<Arc<ComputeFn>>>>` indexed the same way.
-//! Input nodes have `None`; query nodes have `Some`. `Arc` lets
-//! `run_compute` take a cheap clone under the read lock and release
-//! the lock before invoking the closure, so nested `get` calls inside
-//! compute do not reenter the same lock.
-
-use std::any::Any;
-use std::cell::RefCell;
-use std::collections::HashMap;
-use std::sync::atomic::{AtomicU64, Ordering};
-use std::sync::{Arc, Mutex, RwLock};
-
-use super::arena::ErasedArena;
-use super::handle::{Incr, RuntimeId};
-use super::node::{NodeData, NodeId};
-use super::nodes_store::SegmentedNodes;
-use super::registry::ArenaRegistry;
-use super::state::NodeState;
-use super::value::Value;
-
-/// Whether a node is an input or a computed value.
-#[derive(Clone, Debug, PartialEq, Eq)]
-pub enum NodeKindInfo {
-    Input,
-    Compute,
-}
-
-/// Structural metadata about a single node, for visualization/debugging.
-#[derive(Clone, Debug)]
-pub struct NodeInfo {
-    pub slot: u32,
-    pub kind: NodeKindInfo,
-    pub label: String,
-    pub dependencies: Vec<u32>,
-    pub dependents: Vec<u32>,
-}
-
-/// What happened to a node during a traced get() call.
-#[derive(Clone, Debug, PartialEq, Eq)]
-pub enum TraceAction {
-    /// Node was verified clean without recomputing.
-    VerifiedClean,
-    /// Node was recomputed; `value_changed` is false when early cutoff applied.
-    Recomputed { value_changed: bool },
-}
-
-/// Trace entry for a single node during propagation.
-#[derive(Clone, Debug)]
-pub struct NodeTrace {
-    pub slot: u32,
-    pub action: TraceAction,
-}
-
-/// Summary of what happened during a single get() call.
-#[derive(Clone, Debug)]
-pub struct PropagationTrace {
-    pub target: u32,
-    pub total_nodes: usize,
-    pub nodes_recomputed: usize,
-    pub nodes_cutoff: usize,
-    pub elapsed_ns: u64,
-    pub node_traces: Vec<NodeTrace>,
-}
-
-// COMPUTE_STACK: per-thread stack of active compute frames.
-//
-// When the Runtime enters `run_compute` for a node, it pushes a frame onto
-// this thread's compute stack. Every `rt.get` call checks the stack top:
-// if there is an active frame for the same runtime, the handle being read
-// is recorded as a dependency of the frame's node. On compute exit, the
-// frame is popped and its recorded deps are published to the node.
-//
-// The stack is per-thread, not per-runtime, so a compute closure running
-// on thread T recording deps sees only the frames that T is responsible
-// for. Cross-thread dep tracking is a non-concept: a compute closure runs
-// on exactly one thread from start to finish. Cross-runtime dep tracking
-// is explicitly skipped: a compute closure for runtime A that happens to
-// call into runtime B does not record B's nodes as deps of A's node. The
-// frame's `runtime_id` field is how we make that distinction.
-//
-// Cycle detection will plug into this stack in a later commit (L per the
-// rewrite sequencing): before pushing a frame for node X, walk the stack
-// to see whether X is already present on the current thread. If yes,
-// panic with CycleError. This commit does not implement that check; a
-// cyclic query will simply self-spin on its own Computing state and the
-// test suite stays away from that case.
-
-/// A single frame in the compute stack. Created when `run_compute` begins
-/// for a node and destroyed when the compute completes (or panics, once
-/// panic catching lands).
-struct ComputeFrame {
-    /// Identity of the runtime that pushed this frame. A cross-runtime
-    /// `rt.get` call whose runtime id does not match this field skips
-    /// dep recording. This keeps cross-runtime compute closures honest:
-    /// they do not contaminate each other's dep graphs.
-    runtime_id: RuntimeId,
-    /// Slot index of the node whose compute this frame is tracking.
-    /// Recorded deps become this node's dependencies on compute exit.
-    node_slot: u32,
-    /// Dependencies recorded so far. Appended to on every `rt.get` inside
-    /// the compute closure that matches this frame's runtime. Deduplicated
-    /// on compute exit before publishing to the node.
-    deps: Vec<NodeId>,
-}
-
-thread_local! {
-    /// Per-thread stack of active compute frames. Nested computes push
-    /// and pop in LIFO order. `RefCell` suffices because a single thread
-    /// cannot have two overlapping borrows on its own stack (nested
-    /// operations are strictly sequential), and the stack is never
-    /// shared across threads.
-    static COMPUTE_STACK: RefCell<Vec<ComputeFrame>> = const { RefCell::new(Vec::new()) };
-}
-
-/// Type-erased compute closure for a query node. Returns `true` if the
-/// new value differs from the previous arena value (so the runtime
-/// should bump `changed_at`), `false` if local early cutoff applied.
-/// The closure owns all T-specific work (user compute, early-cutoff
-/// comparison, arena write) so `run_compute` can be non-generic.
-type ComputeFn = dyn Fn(&Runtime, u32, bool) -> bool + Send + Sync;
-
-/// Runtime state behind a single `RwLock`. Three parallel Vecs
-/// indexed by node slot, grown together via `append_node`.
-struct RuntimeInner {
-    /// `None` for input nodes; `Some(arc)` for query nodes.
-    compute_fns: Vec<Option<Arc<ComputeFn>>>,
-    /// Forward edges: `dependents[slot]` lists nodes that depend on
-    /// `slot`. Spec section 5.1's parallel dependents vec.
-    dependents: Vec<Vec<NodeId>>,
-    /// Stashed panic message for nodes in the Failed state, else
-    /// None. Cleared by the dirty walk on Failed → Dirty.
-    failure_messages: Vec<Option<String>>,
-    /// Optional display labels for nodes, keyed by slot.
-    labels: HashMap<u32, String>,
-    /// When true, get_traced() can record trace events (stub for now).
-    tracing: bool,
-}
-
-/// The v2 incremental computation runtime.
-pub struct Runtime {
-    id: RuntimeId,
-    /// Lock-free segmented node store. Readers get direct `&NodeData`
-    /// via Acquire on `len`; writers serialize through `write_mutex`.
-    nodes: SegmentedNodes,
-    inner: RwLock<RuntimeInner>,
-    registry: ArenaRegistry,
-    /// Monotonic revision counter. Bumped on every `set`; used by
-    /// the post-compute revision check to detect writer races.
-    revision: AtomicU64,
-    /// Serializes all writers (`create_*`, `set`).
-    write_mutex: Mutex<()>,
-}
-
-impl Runtime {
-    /// Construct a new runtime with a fresh identity.
-    pub fn new() -> Self {
-        let registry = ArenaRegistry::new();
-        Self {
-            id: registry.id(),
-            nodes: SegmentedNodes::new(),
-            inner: RwLock::new(RuntimeInner {
-                compute_fns: Vec::new(),
-                dependents: Vec::new(),
-                failure_messages: Vec::new(),
-                labels: HashMap::new(),
-                tracing: false,
-            }),
-            registry,
-            revision: AtomicU64::new(0),
-            write_mutex: Mutex::new(()),
-        }
-    }
-
-    /// Return this runtime's identity.
-    pub fn id(&self) -> RuntimeId {
-        self.id
-    }
-
-    /// Current revision counter. Bumped on every `set`. Not user-facing
-    /// yet; exposed for tests.
-    #[cfg(test)]
-    pub(crate) fn revision(&self) -> u64 {
-        self.revision.load(Ordering::Relaxed)
-    }
-
-    /// Create an input node holding the given initial value.
-    ///
-    /// The `T: PartialEq` bound enables early cutoff: a `set` with the
-    /// same value as the current one is a no-op (no revision bump, no
-    /// dirty walk), and a recompute that produces a value equal to the
-    /// previous one skips the arena write. The bound is uniform across
-    /// all v2 value types so the API surface stays consistent.
-    pub fn create_input<T>(&self, initial: T) -> Incr<T>
-    where
-        T: Value,
-    {
-        let _guard = self
-            .write_mutex
-            .lock()
-            .expect("runtime write mutex poisoned");
-        let revision = self.revision.load(Ordering::Relaxed);
-
-        let arena_slot = T::reserve_with(self.arena_for::<T>(), initial);
-
-        let node = NodeData::new_input(0, arena_slot, revision);
-
-        let slot = self.append_node(node, None);
-        Incr::new(slot, 0, self.id)
-    }
-
-    /// Create a query node whose value is produced by running `compute`.
-    /// The value is memoized until an upstream input changes.
-    pub fn create_query<T, F>(&self, compute: F) -> Incr<T>
-    where
-        T: Value,
-        F: Fn(&Runtime) -> T + Send + Sync + 'static,
-    {
-        let _guard = self
-            .write_mutex
-            .lock()
-            .expect("runtime write mutex poisoned");
-
-        let arena_slot = T::reserve_empty(self.arena_for::<T>());
-        let node = NodeData::new_query(0, arena_slot);
-
-        // Wrap the user closure in a type-erased adapter that owns all
-        // T-specific post-compute work so `run_compute` can be
-        // non-generic. `try_read` handles the Failed→Dirty retry case
-        // where the previous compute panicked before writing.
-        let erased: Arc<ComputeFn> = Arc::new(
-            move |rt: &Runtime, _slot: u32, is_recompute: bool| -> bool {
-                let new_value: T = compute(rt);
-                let arena = rt.arena_for::<T>();
-                let value_changed = if is_recompute {
-                    match T::try_read(arena, arena_slot) {
-                        Some(old_value) => old_value != new_value,
-                        None => true,
-                    }
-                } else {
-                    true
-                };
-                if value_changed {
-                    T::write(arena, arena_slot, new_value);
-                }
-                value_changed
-            },
-        );
-
-        let slot = self.append_node(node, Some(erased));
-        Incr::new(slot, 0, self.id)
-    }
-
-    /// Read the value of a node. Fast path is a lock-free Clean check;
-    /// slow path delegates to the type-erased `ensure_clean` walker.
-    pub fn get<T>(&self, handle: Incr<T>) -> T
-    where
-        T: Value,
-    {
-        self.check_runtime(handle);
-        self.check_cycle_and_record_dep(handle.slot());
-
-        let node = self.nodes.get(handle.slot());
-        node.verify_handle(handle, self.id)
-            .unwrap_or_else(|e| panic!("{}", e));
-        if node.state() == NodeState::Clean {
-            let arena_slot = node.arena_slot();
-            return T::read(self.arena_for::<T>(), arena_slot);
-        }
-
-        self.ensure_clean(handle.slot());
-        let arena_slot = self.nodes.get(handle.slot()).arena_slot();
-        T::read(self.arena_for::<T>(), arena_slot)
-    }
-
-    /// Iterative post-order walker: drive the node at `target` to
-    /// Clean. Each stack entry is `(slot, visited)`. Unvisited: push
-    /// self back as visited, then push not-yet-Clean deps. Visited:
-    /// deps are Clean, run the compute. Cycle detection in user dep
-    /// graphs still flows through `COMPUTE_STACK` via
-    /// `check_cycle_and_record_dep`; this walker can't hit a cycle
-    /// because the dep graph is a DAG by construction.
-    fn ensure_clean(&self, target: u32) {
-        if self.nodes.get(target).state() == NodeState::Clean {
-            return;
-        }
-
-        let mut stack: Vec<(u32, bool)> = Vec::with_capacity(16);
-        stack.push((target, false));
-
-        while let Some((slot, visited)) = stack.pop() {
-            if visited {
-                self.compute_slot_via_walker(slot);
-                continue;
-            }
-
-            let node = self.nodes.get(slot);
-            match node.state() {
-                NodeState::Clean => {}
-                NodeState::Failed => self.panic_with_failure(slot),
-                NodeState::Computing => {
-                    std::hint::spin_loop();
-                    stack.push((slot, false));
-                }
-                NodeState::New | NodeState::Dirty => {
-                    stack.push((slot, true));
-                    node.for_each_dep(|dep| {
-                        if self.nodes.get(dep.0).state() != NodeState::Clean {
-                            stack.push((dep.0, false));
-                        }
-                    });
-                }
-            }
-        }
-    }
-
-    /// Post-order-visit handler for `ensure_clean`. Loops until the
-    /// slot is observably Clean: `run_compute` may transition back
-    /// to Dirty when the commit-P revision check detects a
-    /// concurrent writer race, in which case we retry.
-    fn compute_slot_via_walker(&self, slot: u32) {
-        loop {
-            let state = self.nodes.get(slot).state();
-            match state {
-                NodeState::Clean => return,
-                NodeState::Failed => self.panic_with_failure(slot),
-                NodeState::Computing => std::hint::spin_loop(),
-                NodeState::New | NodeState::Dirty => {
-                    let claimed = self
-                        .nodes
-                        .get(slot)
-                        .state_cell()
-                        .try_claim_compute()
-                        .is_ok();
-                    if claimed {
-                        self.run_compute(slot, state == NodeState::Dirty);
-                        continue;
-                    }
-                    std::hint::spin_loop();
-                }
-            }
-        }
-    }
-
-    /// Panic with a Failed node's stashed compute-closure message.
-    #[cold]
-    fn panic_with_failure(&self, slot: u32) -> ! {
-        let msg = {
-            let inner = self.inner.read().expect("runtime inner lock poisoned");
-            inner.failure_messages[slot as usize].clone()
-        };
-        panic!(
-            "v2 runtime: node at slot {} is Failed: {}",
-            slot,
-            msg.as_deref().unwrap_or("unknown failure")
-        );
-    }
-
-    /// Update an input node's value. Panics if the handle refers to a
-    /// query node. Writer/reader exclusion on the arena slot is
-    /// handled by the Value trait (atomic for primitives, per-slot
-    /// mutex for generics); `write_mutex` only serializes writers.
-    pub fn set<T>(&self, handle: Incr<T>, value: T)
-    where
-        T: Value,
-    {
-        self.check_runtime(handle);
-        let _guard = self
-            .write_mutex
-            .lock()
-            .expect("runtime write mutex poisoned");
-
-        let node = self.nodes.get(handle.slot());
-        node.verify_handle(handle, self.id)
-            .unwrap_or_else(|e| panic!("{}", e));
-        assert!(
-            self.inner
-                .read()
-                .expect("runtime inner lock poisoned")
-                .compute_fns[handle.slot() as usize]
-                .is_none(),
-            "set() called on a query node; only input nodes may be set"
-        );
-        let arena_slot = node.arena_slot();
-
-        // Early cutoff: same-value set is a no-op.
-        let current: T = T::read(self.arena_for::<T>(), arena_slot);
-        if current == value {
-            return;
-        }
-
-        T::write(self.arena_for::<T>(), arena_slot, value);
-        let new_revision = self.revision.fetch_add(1, Ordering::Relaxed) + 1;
-        node.set_changed_at(new_revision);
-        node.set_verified_at(new_revision);
-        // State was and remains Clean; the Release store anchors the
-        // arena write and revision bumps for readers that Acquire
-        // Clean.
-        node.state_cell().store_release(NodeState::Clean);
-        self.mark_dependents_dirty(handle.slot());
-    }
-
-    /// Append a new node to the store and the parallel vecs in
-    /// `inner`, returning the slot. Caller must hold `write_mutex`.
-    fn append_node(&self, node: NodeData, compute: Option<Arc<ComputeFn>>) -> u32 {
-        let slot = self.nodes.push(node);
-
-        let mut inner = self.inner.write().expect("runtime inner lock poisoned");
-        debug_assert_eq!(slot as usize, inner.compute_fns.len());
-        debug_assert_eq!(slot as usize, inner.dependents.len());
-        debug_assert_eq!(slot as usize, inner.failure_messages.len());
-        inner.compute_fns.push(compute);
-        inner.dependents.push(Vec::new());
-        inner.failure_messages.push(None);
-        slot
-    }
-
-    /// Check that a handle's runtime id matches this runtime. Must be
-    /// called before any code that dereferences `handle.slot()`, since
-    /// a cross-runtime handle's slot may be out of bounds in this
-    /// runtime's nodes vec. Runs before any index operation so the
-    /// user sees the actual cross-runtime diagnostic rather than an
-    /// opaque index-out-of-bounds panic.
-    #[inline]
-    fn check_runtime<T: 'static>(&self, handle: Incr<T>) {
-        if handle.runtime_id() != self.id {
-            panic!(
-                "Incr handle from runtime {:?} used with runtime {:?}",
-                handle.runtime_id(),
-                self.id
-            );
-        }
-    }
-
-    /// Combined cycle detection and dep recording for the `get` hot
-    /// path. Hot path is an empty-stack early return (one RefCell
-    /// borrow). On a non-empty stack, walks all frames for cycles
-    /// (spec section 9), then pushes `slot` onto the top frame's
-    /// deps if it belongs to this runtime and isn't a self-read.
-    #[inline]
-    fn check_cycle_and_record_dep(&self, slot: u32) {
-        COMPUTE_STACK.with(|stack| {
-            let mut stack = stack.borrow_mut();
-            if stack.is_empty() {
-                return;
-            }
-            for frame in stack.iter() {
-                if frame.runtime_id == self.id && frame.node_slot == slot {
-                    panic!(
-                        "CycleError: dependency cycle detected: node at slot {} \
-                         is already computing on this thread",
-                        slot
-                    );
-                }
-            }
-            if let Some(frame) = stack.last_mut() {
-                if frame.runtime_id == self.id && frame.node_slot != slot {
-                    frame.deps.push(NodeId(slot));
-                }
-            }
-        });
-    }
-
-    /// Push a new compute frame onto this thread's stack. Called at
-    /// the start of `run_compute`.
-    fn push_compute_frame(&self, node_slot: u32) {
-        COMPUTE_STACK.with(|stack| {
-            stack.borrow_mut().push(ComputeFrame {
-                runtime_id: self.id,
-                node_slot,
-                deps: Vec::new(),
-            });
-        });
-    }
-
-    /// Pop the top compute frame from this thread's stack and return
-    /// its recorded (and deduplicated, order-preserving) deps. Called
-    /// at the end of `run_compute`. Panics if the stack is empty or
-    /// the top frame does not match the expected node slot, either of
-    /// which would indicate a bug in the push/pop pairing.
-    fn pop_compute_frame(&self, expected_node_slot: u32) -> Vec<NodeId> {
-        /// Threshold below which linear dedup beats HashSet. Nearly
-        /// every compute in realistic workloads has 1-4 deps, and
-        /// linear scan over a small list (~1-2 ns per probe) is
-        /// dramatically cheaper than building a HashSet (~15-20 ns
-        /// hash + insert per element plus allocation). 8 is a
-        /// conservative cutoff: at 8 elements linear dedup does 28
-        /// compares worst case, well under the constant cost of a
-        /// single HashSet operation.
-        const LINEAR_DEDUP_THRESHOLD: usize = 8;
-
-        COMPUTE_STACK.with(|stack| {
-            let mut stack = stack.borrow_mut();
-            let frame = stack.pop().expect("compute stack underflow");
-            debug_assert_eq!(
-                frame.runtime_id, self.id,
-                "compute frame belongs to a different runtime"
-            );
-            debug_assert_eq!(
-                frame.node_slot, expected_node_slot,
-                "compute frame node_slot mismatch (expected {}, got {})",
-                expected_node_slot, frame.node_slot
-            );
-            // Deduplicate while preserving the order of first occurrence.
-            // Linear scan for small lists (the common case by far);
-            // HashSet for wide fan-in nodes.
-            if frame.deps.len() <= LINEAR_DEDUP_THRESHOLD {
-                let mut out: Vec<NodeId> = Vec::with_capacity(frame.deps.len());
-                for id in frame.deps {
-                    if !out.contains(&id) {
-                        out.push(id);
-                    }
-                }
-                out
-            } else {
-                let mut seen: std::collections::HashSet<NodeId> =
-                    std::collections::HashSet::with_capacity(frame.deps.len());
-                frame
-                    .deps
-                    .into_iter()
-                    .filter(|id| seen.insert(*id))
-                    .collect()
-            }
-        })
-    }
-
-    /// Run the compute closure for a query node and transition its
-    /// state to Clean (or Dirty if a concurrent writer raced). The
-    /// caller must have already CAS'd the state from New or Dirty to
-    /// Computing via `try_claim_compute`, and `ensure_clean`'s
-    /// iterative walker guarantees every dep is already Clean by the
-    /// time we get here.
-    ///
-    /// On Dirty recomputes, red-green runs before anything else:
-    /// load each dep's `changed_at` and compare against our
-    /// `verified_at`. If nothing moved, skip the closure entirely,
-    /// bump `verified_at`, Release Clean. This is the spec's section
-    /// 14 transitive early cutoff. Red-green must precede the
-    /// `inner.read()` Arc clone and frame push so short-circuits pay
-    /// neither cost.
-    fn run_compute(&self, slot: u32, is_recompute: bool) {
-        let revision_at_start = self.revision.load(Ordering::Relaxed);
-
-        // Red-green short-circuit: only on recompute, only when no
-        // dep has moved since our last verification.
-        if is_recompute {
-            let node = self.nodes.get(slot);
-            let my_verified = node.verified_at();
-            let mut any_dep_changed = false;
-            node.for_each_dep(|dep| {
-                if any_dep_changed {
-                    return;
-                }
-                if self.nodes.get(dep.0).changed_at() > my_verified {
-                    any_dep_changed = true;
-                }
-            });
-
-            if !any_dep_changed {
-                let revision_at_end = self.revision.load(Ordering::Relaxed);
-                if revision_at_end != revision_at_start {
-                    // Writer raced during our red-green walk; go
-                    // Dirty and let the next reader retry.
-                    node.state_cell().store_release(NodeState::Dirty);
-                    return;
-                }
-                // Do NOT touch `changed_at`: downstream red-green
-                // checks need to see it unchanged so they too can
-                // short-circuit.
-                node.set_verified_at(revision_at_end);
-                node.state_cell().store_release(NodeState::Clean);
-                return;
-            }
-        }
-
-        // Full compute path.
-        let compute = {
-            let inner = self.inner.read().expect("runtime inner lock poisoned");
-            inner.compute_fns[slot as usize]
-                .as_ref()
-                .expect("query node has no compute closure")
-                .clone()
-        };
-        self.push_compute_frame(slot);
-
-        let value_changed_result: std::thread::Result<bool> =
-            std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
-                (compute)(self, slot, is_recompute)
-            }));
-        let recorded_deps = self.pop_compute_frame(slot);
-
-        if !is_recompute {
-            self.nodes.get(slot).publish_initial_deps(&recorded_deps);
-            if !recorded_deps.is_empty() {
-                let mut inner = self.inner.write().expect("runtime inner lock poisoned");
-                for dep in &recorded_deps {
-                    inner.dependents[dep.0 as usize].push(NodeId(slot));
-                }
-            }
-        } else {
-            self.update_deps_on_recompute(slot, &recorded_deps);
-        }
-
-        let value_changed = match value_changed_result {
-            Ok(c) => c,
-            Err(panic_payload) => {
-                // Stash the message and transition Failed before
-                // re-raising. Dep bookkeeping above is preserved so
-                // the next upstream change can Failed → Dirty retry.
-                let msg = extract_panic_message(&panic_payload);
-                {
-                    let mut inner = self.inner.write().expect("runtime inner lock poisoned");
-                    inner.failure_messages[slot as usize] = Some(msg);
-                }
-                self.nodes
-                    .get(slot)
-                    .state_cell()
-                    .store_release(NodeState::Failed);
-                std::panic::resume_unwind(panic_payload);
-            }
-        };
-
-        // Post-compute revision check: if a writer raced during the
-        // compute, our result may be based on stale inputs. Go Dirty
-        // and let the next reader retry; skip the verified_at/
-        // changed_at updates.
-        let revision_at_end = self.revision.load(Ordering::Relaxed);
-        if revision_at_end != revision_at_start {
-            self.nodes
-                .get(slot)
-                .state_cell()
-                .store_release(NodeState::Dirty);
-            return;
-        }
-
-        // Update red-green revisions. `changed_at` only on actual
-        // value change so local early cutoff propagates transitively.
-        let node = self.nodes.get(slot);
-        if value_changed {
-            node.set_changed_at(revision_at_end);
-        }
-        node.set_verified_at(revision_at_end);
-        node.state_cell().store_release(NodeState::Clean);
-    }
-
-    /// Diff the new recorded deps against the node's previous dep
-    /// list and update both the node's forward dep list and the
-    /// runtime's reverse-edge (dependents) vec to reflect the
-    /// difference.
-    ///
-    /// Called from `run_compute` on recompute (when `is_recompute`
-    /// is true). The common case is that the dep set is unchanged
-    /// and this function is a fast compare-and-return; the
-    /// interesting case is dynamic dependencies where a conditional
-    /// inside the compute closure caused it to read a different
-    /// set of deps than the previous run.
-    ///
-    /// After commit U (SegmentedNodes), mutual exclusion for
-    /// `NodeData::replace_deps`'s overflow-pointer swap comes from
-    /// the Runtime's `write_mutex` being held by the caller
-    /// (run_compute is only invoked from get, which does not hold
-    /// any node lock, BUT a concurrent writer calling set would
-    /// take write_mutex and be blocked from running another
-    /// recompute). Since run_compute runs outside write_mutex,
-    /// there can be concurrent recompute on different nodes but
-    /// not on the same node (state machine's Computing CAS
-    /// guarantees at most one). The same-node exclusion is what
-    /// replace_deps actually needs.
-    fn update_deps_on_recompute(&self, slot: u32, new_deps: &[NodeId]) {
-        use std::collections::HashSet;
-
-        // Fast path: check for exact match without allocating a
-        // snapshot Vec. The vast majority of recomputes have
-        // unchanged dep sets (static deps), and allocating a Vec
-        // on every recompute just to compare and throw away is a
-        // measurable chunk of propagate_chain_1000's cost. Walk
-        // the existing deps via for_each_dep and compare
-        // element-by-element against new_deps; short-circuit on
-        // the first mismatch.
-        let node = self.nodes.get(slot);
-        let matches: bool = if node.dep_count() as usize != new_deps.len() {
-            false
-        } else {
-            let mut iter = new_deps.iter();
-            let mut all_matched = true;
-            node.for_each_dep(|existing| {
-                if all_matched {
-                    match iter.next() {
-                        Some(expected) if *expected == existing => {}
-                        _ => all_matched = false,
-                    }
-                }
-            });
-            all_matched
-        };
-        if matches {
-            return;
-        }
-
-        // Slow path: dep set changed. Snapshot the old deps into
-        // a Vec so we can compute diff sets against new_deps.
-        let old_deps: Vec<NodeId> = node.collect_deps();
-
-        // Compute the added and removed sets for reverse-edge
-        // bookkeeping.
-        let old_set: HashSet<NodeId> = old_deps.iter().copied().collect();
-        let new_set: HashSet<NodeId> = new_deps.iter().copied().collect();
-        let added: Vec<NodeId> = new_deps
-            .iter()
-            .filter(|d| !old_set.contains(*d))
-            .copied()
-            .collect();
-        let removed: Vec<NodeId> = old_deps
-            .iter()
-            .filter(|d| !new_set.contains(*d))
-            .copied()
-            .collect();
-
-        // Replace the node's forward dep list. The state machine's
-        // Computing ownership guarantees no other thread is
-        // computing this same node, but concurrent readers may be
-        // observing `overflow_deps` if they already loaded it. For
-        // commit U we leak the old overflow list (skip the
-        // Box::from_raw in replace_deps) and defer proper
-        // reclamation to a follow-up epoch-based commit. Leaking is
-        // a memory cost only, not a correctness cost.
-        node.replace_deps_leaking_old_overflow(new_deps);
-
-        // Update reverse edges. Added deps gain an incoming edge
-        // from `slot`; removed deps lose theirs.
-        if !added.is_empty() || !removed.is_empty() {
-            let mut inner = self.inner.write().expect("runtime inner lock poisoned");
-            for dep in &added {
-                inner.dependents[dep.0 as usize].push(NodeId(slot));
-            }
-            for dep in &removed {
-                inner.dependents[dep.0 as usize].retain(|d| d.0 != slot);
-            }
-        }
-    }
-
-    /// BFS from `changed_slot`'s dependents, transitioning each
-    /// reachable query to Dirty (or Failed → Dirty, for retry on
-    /// upstream change). Holds one `inner` read guard across the
-    /// entire walk to avoid per-node lock acquires. Clean→Dirty is
-    /// the common case; other source states (New, Dirty, Computing)
-    /// are skipped — Computing races are handled by the
-    /// post-compute revision check in `run_compute`. Failed nodes
-    /// that transition back to Dirty have their stashed messages
-    /// cleared at the end in one batched write.
-    fn mark_dependents_dirty(&self, changed_slot: u32) {
-        use std::collections::HashSet;
-        let mut visited: HashSet<u32> = HashSet::new();
-        let mut queue: Vec<u32> = Vec::new();
-        let mut cleared_failures: Vec<u32> = Vec::new();
-
-        {
-            let inner = self.inner.read().expect("runtime inner lock poisoned");
-            for dep in &inner.dependents[changed_slot as usize] {
-                if visited.insert(dep.0) {
-                    queue.push(dep.0);
-                }
-            }
-
-            while let Some(slot) = queue.pop() {
-                let cell = self.nodes.get(slot).state_cell();
-                if cell
-                    .try_transition(NodeState::Clean, NodeState::Dirty)
-                    .is_err()
-                    && cell
-                        .try_transition(NodeState::Failed, NodeState::Dirty)
-                        .is_ok()
-                {
-                    cleared_failures.push(slot);
-                }
-
-                // Walk forward regardless of whether we transitioned
-                // this node: dependents below may still be Clean and
-                // need marking.
-                for child in &inner.dependents[slot as usize] {
-                    if visited.insert(child.0) {
-                        queue.push(child.0);
-                    }
-                }
-            }
-        }
-
-        if !cleared_failures.is_empty() {
-            let mut inner = self.inner.write().expect("runtime inner lock poisoned");
-            for slot in cleared_failures {
-                inner.failure_messages[slot as usize] = None;
-            }
-        }
-    }
-
-    /// Look up (creating if necessary) the arena for value type `T`
-    /// via the Value trait. Returns a `&dyn ErasedArena` that the
-    /// Value trait's methods downcast to the concrete arena type.
-    ///
-    /// Per commit T: T's Value impl decides whether to route to
-    /// `AtomicPrimitiveArena<T>` (for primitives — tear-free reads)
-    /// or `GenericArena<T>` (for non-primitives — Option-gated).
-    /// The registry caches the arena per type so the factory runs
-    /// at most once per T per runtime.
-    fn arena_for<T: Value>(&self) -> &dyn ErasedArena {
-        let arena_ptr = self.registry.ensure_arena::<T, _>(|| T::create_arena());
-        // SAFETY: `arena_ptr` was returned by the registry and is
-        // stable for the registry's lifetime (arenas are never
-        // removed and each arena lives at a fixed heap address via
-        // Box). The returned reference's lifetime is tied to &self,
-        // which outlives the registry.
-        unsafe { &*arena_ptr }
-    }
-
-    /// Return the number of nodes currently registered in this runtime.
-    pub fn node_count(&self) -> usize {
-        self.nodes.len() as usize
-    }
-
-    /// Assign a human-readable label to a node slot for visualization/debugging.
-    pub fn set_label(&self, slot: u32, label: String) {
-        self.inner
-            .write()
-            .expect("runtime inner lock poisoned")
-            .labels
-            .insert(slot, label);
-    }
-
-    /// Enable or disable execution tracing. When enabled, `get_traced`
-    /// can in principle record which nodes were visited; the current
-    /// implementation is a stub that stores the flag but does not yet
-    /// record per-node trace events.
-    pub fn set_tracing(&self, enabled: bool) {
-        self.inner
-            .write()
-            .expect("runtime inner lock poisoned")
-            .tracing = enabled;
-    }
-
-    /// Like `get`, but also returns a `PropagationTrace` describing the
-    /// propagation. The trace fields `nodes_recomputed`, `nodes_cutoff`,
-    /// and `node_traces` are currently stubs (zero/empty); full trace
-    /// recording is deferred until the dashboard demo requires it.
-    pub fn get_traced<T: Value>(&self, handle: Incr<T>) -> (T, PropagationTrace) {
-        let value = self.get(handle);
-        let trace = PropagationTrace {
-            target: handle.slot(),
-            total_nodes: self.node_count(),
-            nodes_recomputed: 0,
-            nodes_cutoff: 0,
-            elapsed_ns: 0,
-            node_traces: Vec::new(),
-        };
-        (value, trace)
-    }
-
-    /// Return structural metadata about every node in the graph. Useful
-    /// for visualizing the dependency graph in the dashboard demo.
-    pub fn graph_snapshot(&self) -> Vec<NodeInfo> {
-        let inner = self.inner.read().expect("runtime inner lock poisoned");
-        let count = inner.compute_fns.len();
-        let mut infos = Vec::with_capacity(count);
-        for slot in 0..count {
-            let is_compute = inner.compute_fns[slot].is_some();
-            let label = inner
-                .labels
-                .get(&(slot as u32))
-                .cloned()
-                .unwrap_or_default();
-            let node = self.nodes.get(slot as u32);
-            let deps: Vec<u32> = node.collect_deps().iter().map(|d| d.0).collect();
-            let dependents: Vec<u32> = inner.dependents[slot].iter().map(|d| d.0).collect();
-            infos.push(NodeInfo {
-                slot: slot as u32,
-                kind: if is_compute {
-                    NodeKindInfo::Compute
-                } else {
-                    NodeKindInfo::Input
-                },
-                label,
-                dependencies: deps,
-                dependents,
-            });
-        }
-        infos
-    }
-}
-
-impl Default for Runtime {
-    fn default() -> Self {
-        Self::new()
-    }
-}
-
-/// Extract a readable message from a caught panic payload.
-///
-/// Rust panic payloads are `Box<dyn Any + Send>` with no enforced
-/// type; the common producers are `panic!("literal")` which yields a
-/// `&'static str` and `panic!("fmt {}", x)` which yields a `String`.
-/// Other types (user-constructed panics via `panic_any`) fall back to
-/// a generic message so failures are never silently swallowed.
-fn extract_panic_message(payload: &Box<dyn Any + Send>) -> String {
-    if let Some(s) = payload.downcast_ref::<&'static str>() {
-        (*s).to_string()
-    } else if let Some(s) = payload.downcast_ref::<String>() {
-        s.clone()
-    } else {
-        "compute function panicked with a non-string payload".to_string()
-    }
-}
-
-// Runtime is Send + Sync via its fields: compute closures are bound
-// to `Fn + Send + Sync + 'static`, and everything else is either
-// atomic or wrapped in RwLock/Mutex.
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn new_runtime_has_unique_id_and_empty_store() {
-        let rt_a = Runtime::new();
-        let rt_b = Runtime::new();
-        assert_ne!(rt_a.id(), rt_b.id());
-        assert_eq!(rt_a.revision(), 0);
-    }
-
-    #[test]
-    fn create_input_and_get_returns_initial_value() {
-        let rt = Runtime::new();
-        let input = rt.create_input::<u64>(42);
-        assert_eq!(rt.get(input), 42);
-    }
-
-    #[test]
-    fn set_updates_the_input_value() {
-        let rt = Runtime::new();
-        let input = rt.create_input::<u64>(1);
-        assert_eq!(rt.get(input), 1);
-        rt.set(input, 99);
-        assert_eq!(rt.get(input), 99);
-        rt.set(input, 7);
-        assert_eq!(rt.get(input), 7);
-    }
-
-    #[test]
-    fn set_bumps_revision() {
-        let rt = Runtime::new();
-        let input = rt.create_input::<u64>(0);
-        assert_eq!(rt.revision(), 0);
-        rt.set(input, 1);
-        assert_eq!(rt.revision(), 1);
-        rt.set(input, 2);
-        assert_eq!(rt.revision(), 2);
-    }
-
-    #[test]
-    fn multiple_inputs_of_same_type_are_independent() {
-        let rt = Runtime::new();
-        let a = rt.create_input::<u64>(1);
-        let b = rt.create_input::<u64>(2);
-        let c = rt.create_input::<u64>(3);
-        assert_eq!(rt.get(a), 1);
-        assert_eq!(rt.get(b), 2);
-        assert_eq!(rt.get(c), 3);
-        rt.set(b, 20);
-        assert_eq!(rt.get(a), 1);
-        assert_eq!(rt.get(b), 20);
-        assert_eq!(rt.get(c), 3);
-    }
-
-    #[test]
-    fn inputs_of_different_types_coexist() {
-        let rt = Runtime::new();
-        let int_in = rt.create_input::<u64>(10);
-        let str_in = rt.create_input::<String>("hello".to_string());
-        let vec_in = rt.create_input::<Vec<i32>>(vec![1, 2, 3]);
-        assert_eq!(rt.get(int_in), 10);
-        assert_eq!(rt.get(str_in), "hello");
-        assert_eq!(rt.get(vec_in), vec![1, 2, 3]);
-        rt.set(str_in, "world".to_string());
-        rt.set(vec_in, vec![4, 5]);
-        assert_eq!(rt.get(int_in), 10);
-        assert_eq!(rt.get(str_in), "world");
-        assert_eq!(rt.get(vec_in), vec![4, 5]);
-    }
-
-    #[test]
-    fn query_computes_on_first_get_and_memoizes() {
-        use std::sync::atomic::{AtomicUsize, Ordering};
-        let rt = Runtime::new();
-        let counter = Arc::new(AtomicUsize::new(0));
-
-        let q = {
-            let counter = counter.clone();
-            rt.create_query::<u64, _>(move |_rt| {
-                counter.fetch_add(1, Ordering::SeqCst);
-                42
-            })
-        };
-
-        assert_eq!(counter.load(Ordering::SeqCst), 0, "compute should be lazy");
-        assert_eq!(rt.get(q), 42);
-        assert_eq!(counter.load(Ordering::SeqCst), 1);
-        // Subsequent gets return the memoized value without re-running.
-        assert_eq!(rt.get(q), 42);
-        assert_eq!(rt.get(q), 42);
-        assert_eq!(counter.load(Ordering::SeqCst), 1);
-    }
-
-    #[test]
-    fn query_compute_can_call_get_on_another_node() {
-        let rt = Runtime::new();
-        let a = rt.create_input::<u64>(10);
-        let b = rt.create_input::<u64>(32);
-        let sum = rt.create_query::<u64, _>(move |rt| rt.get(a) + rt.get(b));
-        assert_eq!(rt.get(sum), 42);
-    }
-
-    #[test]
-    fn query_with_string_value() {
-        let rt = Runtime::new();
-        let name = rt.create_input::<String>("Anish".to_string());
-        let greeting = rt.create_query::<String, _>(move |rt| format!("hi, {}", rt.get(name)));
-        assert_eq!(rt.get(greeting), "hi, Anish");
-    }
-
-    #[test]
-    fn query_recomputes_when_its_input_changes() {
-        // Commit J makes v2 reactive. Previous commits shipped a
-        // failing-intent test named
-        // query_memoization_is_NOT_reactive_in_this_commit that
-        // asserted the opposite of this behavior. That test was
-        // renamed and its assertion flipped here. A future commit
-        // touching reactivity that breaks this test is breaking
-        // something load-bearing.
-        let rt = Runtime::new();
-        let a = rt.create_input::<u64>(1);
-        let q = rt.create_query::<u64, _>(move |rt| rt.get(a) * 10);
-        assert_eq!(rt.get(q), 10);
-        rt.set(a, 7);
-        assert_eq!(
-            rt.get(q),
-            70,
-            "query should reflect the new input value after set + get"
-        );
-        rt.set(a, 100);
-        assert_eq!(rt.get(q), 1000);
-    }
-
-    #[test]
-    #[should_panic(expected = "Incr handle from runtime")]
-    fn cross_runtime_handle_panics() {
-        let rt_a = Runtime::new();
-        let rt_b = Runtime::new();
-        let h = rt_a.create_input::<u64>(1);
-        let _ = rt_b.get(h);
-    }
-
-    #[test]
-    #[should_panic(expected = "set() called on a query node")]
-    fn set_on_query_node_panics() {
-        let rt = Runtime::new();
-        let q = rt.create_query::<u64, _>(|_| 42);
-        rt.set(q, 99);
-    }
-
-    #[test]
-    fn runtime_is_send_and_sync() {
-        fn assert_send_sync<T: Send + Sync>() {}
-        assert_send_sync::<Runtime>();
-    }
-
-    #[test]
-    fn get_is_callable_from_multiple_threads_after_set_completes() {
-        use std::thread;
-        let rt = Arc::new(Runtime::new());
-        let input = rt.create_input::<u64>(100);
-        rt.set(input, 200);
-
-        // Spawn several readers, each of which observes the final value.
-        // This only tests that the runtime's Send+Sync contract holds
-        // and that a single-writer-many-readers handoff works. Real
-        // concurrent correctness is verified by a future commit's
-        // loom/property tests.
-        let handles: Vec<_> = (0..4)
-            .map(|_| {
-                let rt = rt.clone();
-                thread::spawn(move || rt.get(input))
-            })
-            .collect();
-        for h in handles {
-            assert_eq!(h.join().unwrap(), 200);
-        }
-    }
-
-    #[test]
-    fn concurrent_get_and_set_on_non_copy_input_is_race_free() {
-        // Regression test for the reader-writer data race on GenericArena
-        // slots identified in review finding C1. Before the fix, concurrent
-        // rt.get and rt.set on the same String input would race on the
-        // UnsafeCell<Option<String>> slot: the writer's plain-non-atomic
-        // `*slot = Some(new)` drops the old String and installs a new one
-        // while the reader is mid-clone, yielding torn data or a segfault.
-        //
-        // The fix uses the nodes RwLock as a synchronization gate: readers
-        // hold nodes.read() across the arena read, writers hold nodes.write()
-        // across the arena write, so reader and writer never touch the slot
-        // simultaneously.
-        //
-        // This test exists to prove the fix works. Under miri, the broken
-        // version of the code flags a data race; the fixed version passes.
-        // Under regular cargo test, the broken version may corrupt strings
-        // or segfault; the fixed version returns valid strings reliably.
-
-        use std::thread;
-
-        // A small set of valid values. Every observation must match one
-        // of these exactly; anything else indicates a torn read.
-        let valid_values: Vec<String> = (0..4)
-            .map(|i| format!("value-{}-with-padding-to-force-heap-allocation", i))
-            .collect();
-
-        let rt = Arc::new(Runtime::new());
-        let input = rt.create_input::<String>(valid_values[0].clone());
-
-        // Spawn readers that loop on get and verify each observed value
-        // is in the valid set. Any torn string will mismatch.
-        const READER_ITERS: usize = 2_000;
-        const READERS: usize = 4;
-        const WRITER_ITERS: usize = 2_000;
-
-        let stop = Arc::new(std::sync::atomic::AtomicBool::new(false));
-
-        let reader_handles: Vec<_> = (0..READERS)
-            .map(|_| {
-                let rt = rt.clone();
-                let valid = valid_values.clone();
-                let stop = stop.clone();
-                thread::spawn(move || {
-                    let mut seen = 0usize;
-                    while !stop.load(Ordering::Relaxed) && seen < READER_ITERS {
-                        let v = rt.get(input);
-                        assert!(
-                            valid.contains(&v),
-                            "observed torn or corrupt value: {:?}",
-                            v
-                        );
-                        seen += 1;
-                    }
-                })
-            })
-            .collect();
-
-        // Writer loop: rotate through the valid values.
-        for i in 0..WRITER_ITERS {
-            let v = &valid_values[i % valid_values.len()];
-            rt.set(input, v.clone());
-        }
-        stop.store(true, Ordering::Relaxed);
-
-        for h in reader_handles {
-            h.join()
-                .expect("reader thread panicked; data race detected");
-        }
-    }
-
-    #[test]
-    fn concurrent_get_and_set_on_vec_input_is_race_free() {
-        // Second shape of the C1 regression: Vec<u64> has both a length
-        // and a pointer in its slot, so a torn read can observe a
-        // length from one Vec and a data pointer from another, leading
-        // to an out-of-bounds read when the cloned Vec is used.
-
-        use std::thread;
-
-        let values: Vec<Vec<u64>> = vec![
-            vec![1, 2, 3, 4, 5, 6, 7, 8],
-            vec![100, 200, 300, 400, 500, 600, 700, 800],
-            vec![9999; 16],
-        ];
-
-        let rt = Arc::new(Runtime::new());
-        let input = rt.create_input::<Vec<u64>>(values[0].clone());
-
-        const ITERS: usize = 2_000;
-        let stop = Arc::new(std::sync::atomic::AtomicBool::new(false));
-
-        let handles: Vec<_> = (0..3)
-            .map(|_| {
-                let rt = rt.clone();
-                let valid = values.clone();
-                let stop = stop.clone();
-                thread::spawn(move || {
-                    while !stop.load(Ordering::Relaxed) {
-                        let v = rt.get(input);
-                        // Every observed vec must match one of the valids
-                        // exactly. A torn vec would fail this check OR
-                        // would have faulted in the clone itself.
-                        assert!(
-                            valid.iter().any(|expected| expected == &v),
-                            "observed torn vec: {:?}",
-                            v
-                        );
-                    }
-                })
-            })
-            .collect();
-
-        for i in 0..ITERS {
-            rt.set(input, values[i % values.len()].clone());
-        }
-        stop.store(true, Ordering::Relaxed);
-
-        for h in handles {
-            h.join()
-                .expect("reader thread panicked; data race detected");
-        }
-    }
-
-    // Dependency tracking tests (commit H).
-    //
-    // These tests verify that `rt.get` calls inside a compute closure
-    // record their handles as dependencies of the currently-computing
-    // node, and that the recorded deps are correctly deduplicated and
-    // published to the node. They do NOT test reactivity: commit H
-    // records deps but does not yet propagate changes through them.
-    // Reactivity is commit J.
-
-    /// Read a node's published dependencies. Test-only helper.
-    /// After commit U the nodes store is lock-free; this helper
-    /// just calls through to the direct accessor.
-    fn collect_deps_for_slot(rt: &Runtime, slot: u32) -> Vec<super::super::node::NodeId> {
-        rt.nodes.get(slot).collect_deps()
-    }
-
-    /// Test-only: read a node's published dependents (forward edges).
-    fn collect_dependents_for_slot(rt: &Runtime, slot: u32) -> Vec<super::super::node::NodeId> {
-        let inner = rt.inner.read().unwrap();
-        inner.dependents[slot as usize].clone()
-    }
-
-    #[test]
-    fn query_records_its_input_dependency() {
-        let rt = Runtime::new();
-        let input = rt.create_input::<u64>(7);
-        let q = rt.create_query::<u64, _>(move |rt| rt.get(input) + 1);
-        assert_eq!(rt.get(q), 8);
-        let deps = collect_deps_for_slot(&rt, q.slot());
-        assert_eq!(deps.len(), 1);
-        assert_eq!(deps[0].0, input.slot());
-    }
-
-    #[test]
-    fn query_records_multiple_input_dependencies_in_get_order() {
-        let rt = Runtime::new();
-        let a = rt.create_input::<u64>(1);
-        let b = rt.create_input::<u64>(2);
-        let c = rt.create_input::<u64>(3);
-        let sum = rt.create_query::<u64, _>(move |rt| rt.get(a) + rt.get(b) + rt.get(c));
-        assert_eq!(rt.get(sum), 6);
-        let deps = collect_deps_for_slot(&rt, sum.slot());
-        assert_eq!(deps.len(), 3);
-        assert_eq!(deps[0].0, a.slot());
-        assert_eq!(deps[1].0, b.slot());
-        assert_eq!(deps[2].0, c.slot());
-    }
-
-    #[test]
-    fn duplicate_reads_dedup_to_single_dep_in_first_occurrence_order() {
-        let rt = Runtime::new();
-        let a = rt.create_input::<u64>(10);
-        let b = rt.create_input::<u64>(20);
-        // Compute reads a, b, a, b, a. The dedup should preserve the
-        // order of first occurrence: [a, b], not [b, a] or duplicates.
-        let q = rt.create_query::<u64, _>(move |rt| {
-            let _ = rt.get(a);
-            let _ = rt.get(b);
-            let _ = rt.get(a);
-            let _ = rt.get(b);
-            rt.get(a)
-        });
-        let _ = rt.get(q);
-        let deps = collect_deps_for_slot(&rt, q.slot());
-        assert_eq!(deps.len(), 2, "expected 2 unique deps, got {:?}", deps);
-        assert_eq!(deps[0].0, a.slot(), "first unique dep should be a");
-        assert_eq!(deps[1].0, b.slot(), "second unique dep should be b");
-    }
-
-    #[test]
-    fn nested_queries_each_get_their_own_dep_list() {
-        // Q1 reads input I.
-        // Q2 reads Q1 only.
-        // Verify Q1's deps are [I] and Q2's deps are [Q1], not that
-        // Q2 transitively inherits I. Each compute frame has its own
-        // recorded deps, and reads to Q1 from inside Q2's compute
-        // run in their own (newly pushed) frame rather than appending
-        // to Q2's.
-        let rt = Runtime::new();
-        let input = rt.create_input::<u64>(5);
-        let q1 = rt.create_query::<u64, _>(move |rt| rt.get(input) * 2);
-        let q2 = rt.create_query::<u64, _>(move |rt| rt.get(q1) + 3);
-        assert_eq!(rt.get(q2), 13);
-
-        let q1_deps = collect_deps_for_slot(&rt, q1.slot());
-        assert_eq!(q1_deps.len(), 1);
-        assert_eq!(q1_deps[0].0, input.slot());
-
-        let q2_deps = collect_deps_for_slot(&rt, q2.slot());
-        assert_eq!(q2_deps.len(), 1);
-        assert_eq!(
-            q2_deps[0].0,
-            q1.slot(),
-            "q2's deps should contain q1, not input transitively"
-        );
-    }
-
-    #[test]
-    fn top_level_get_outside_compute_records_nothing() {
-        // A plain `rt.get(input)` from the test body (no active
-        // compute frame on this thread) must not panic and must not
-        // leave stale state in the thread-local stack. After the
-        // call, any subsequent compute should still be able to push
-        // a fresh frame cleanly.
-        let rt = Runtime::new();
-        let input = rt.create_input::<u64>(42);
-        // Top-level read: no compute is running, no frame to record on.
-        assert_eq!(rt.get(input), 42);
-        // Now create a query and verify its first compute works
-        // normally (i.e., the stack was left in a clean state after
-        // the top-level read).
-        let q = rt.create_query::<u64, _>(move |rt| rt.get(input) * 10);
-        assert_eq!(rt.get(q), 420);
-        let deps = collect_deps_for_slot(&rt, q.slot());
-        assert_eq!(deps.len(), 1);
-        assert_eq!(deps[0].0, input.slot());
-    }
-
-    #[test]
-    fn compute_stack_is_clean_between_queries() {
-        // After one query's compute runs, the stack should be empty
-        // again. If it isn't, the next query would see the previous
-        // query's frame as its "parent" and misattribute deps.
-        let rt = Runtime::new();
-        let a = rt.create_input::<u64>(1);
-        let b = rt.create_input::<u64>(2);
-        let q_a = rt.create_query::<u64, _>(move |rt| rt.get(a));
-        let q_b = rt.create_query::<u64, _>(move |rt| rt.get(b));
-        // Trigger q_a first, then q_b, and verify each has only its
-        // own dep, not the union. If the stack leaked between
-        // compute invocations, q_b's deps would include `a`.
-        let _ = rt.get(q_a);
-        let _ = rt.get(q_b);
-        let q_a_deps = collect_deps_for_slot(&rt, q_a.slot());
-        let q_b_deps = collect_deps_for_slot(&rt, q_b.slot());
-        assert_eq!(q_a_deps.len(), 1);
-        assert_eq!(q_a_deps[0].0, a.slot());
-        assert_eq!(q_b_deps.len(), 1);
-        assert_eq!(q_b_deps[0].0, b.slot());
-    }
-
-    #[test]
-    fn cross_runtime_get_inside_compute_does_not_record_on_current_frame() {
-        // A compute closure for runtime A that captures a handle from
-        // runtime B and reads it should not record B's slot on A's
-        // frame. Runtime identity is the gate.
-        let rt_a = Arc::new(Runtime::new());
-        let rt_b = Arc::new(Runtime::new());
-        let b_input = rt_b.create_input::<u64>(99);
-        let a_input = rt_a.create_input::<u64>(1);
-
-        // Build a compute closure that captures rt_b and b_input by
-        // Arc + Copy and reads both a_input (own runtime) and b_input
-        // (other runtime).
-        let q = {
-            let rt_b_inner = rt_b.clone();
-            rt_a.create_query::<u64, _>(move |rt| {
-                let other = rt_b_inner.get(b_input); // cross-runtime, must not record on rt_a's frame
-                rt.get(a_input) + other
-            })
-        };
-        assert_eq!(rt_a.get(q), 100);
-        let deps = collect_deps_for_slot(&rt_a, q.slot());
-        assert_eq!(
-            deps.len(),
-            1,
-            "cross-runtime reads should not record; expected only a_input dep, got {:?}",
-            deps
-        );
-        assert_eq!(deps[0].0, a_input.slot());
-    }
-
-    #[test]
-    fn query_reading_nothing_records_empty_deps() {
-        let rt = Runtime::new();
-        let q = rt.create_query::<u64, _>(|_rt| 42);
-        assert_eq!(rt.get(q), 42);
-        let deps = collect_deps_for_slot(&rt, q.slot());
-        assert!(deps.is_empty(), "got unexpected deps: {:?}", deps);
-    }
-
-    // Forward-edge (dependents) tests (commit I).
-
-    #[test]
-    fn fresh_input_has_no_dependents() {
-        let rt = Runtime::new();
-        let input = rt.create_input::<u64>(1);
-        let dependents = collect_dependents_for_slot(&rt, input.slot());
-        assert!(dependents.is_empty());
-    }
-
-    #[test]
-    fn fresh_query_has_no_dependents() {
-        let rt = Runtime::new();
-        let q = rt.create_query::<u64, _>(|_| 1);
-        // Dependents are populated for a node when OTHER queries
-        // depend on it, not when this query runs its own compute.
-        let dependents = collect_dependents_for_slot(&rt, q.slot());
-        assert!(dependents.is_empty());
-    }
-
-    #[test]
-    fn input_gains_dependent_after_query_first_reads_it() {
-        let rt = Runtime::new();
-        let input = rt.create_input::<u64>(10);
-        let q = rt.create_query::<u64, _>(move |rt| rt.get(input) + 1);
-
-        // Before the query is ever run, input has no dependents.
-        assert!(collect_dependents_for_slot(&rt, input.slot()).is_empty());
-
-        // Running the query triggers its first compute, which
-        // records input as a dep and writes the reverse edge.
-        let _ = rt.get(q);
-
-        let dependents = collect_dependents_for_slot(&rt, input.slot());
-        assert_eq!(dependents.len(), 1);
-        assert_eq!(dependents[0].0, q.slot());
-    }
-
-    #[test]
-    fn input_with_multiple_dependents_collects_all_of_them() {
-        let rt = Runtime::new();
-        let input = rt.create_input::<u64>(5);
-        let q1 = rt.create_query::<u64, _>(move |rt| rt.get(input) * 2);
-        let q2 = rt.create_query::<u64, _>(move |rt| rt.get(input) * 3);
-        let q3 = rt.create_query::<u64, _>(move |rt| rt.get(input) + 100);
-
-        let _ = rt.get(q1);
-        let _ = rt.get(q2);
-        let _ = rt.get(q3);
-
-        let dependents = collect_dependents_for_slot(&rt, input.slot());
-        assert_eq!(dependents.len(), 3);
-        // Order reflects the order in which queries were first computed.
-        assert_eq!(dependents[0].0, q1.slot());
-        assert_eq!(dependents[1].0, q2.slot());
-        assert_eq!(dependents[2].0, q3.slot());
-    }
-
-    #[test]
-    fn intermediate_query_has_its_downstream_as_dependent() {
-        // input → q1 → q2
-        // After running q2, q1's dependents should be [q2] and
-        // input's dependents should be [q1]. This exercises the
-        // multi-level dep chain.
-        let rt = Runtime::new();
-        let input = rt.create_input::<u64>(3);
-        let q1 = rt.create_query::<u64, _>(move |rt| rt.get(input) + 10);
-        let q2 = rt.create_query::<u64, _>(move |rt| rt.get(q1) * 2);
-
-        let _ = rt.get(q2); // triggers q1's compute as a side effect
-
-        let input_deps = collect_dependents_for_slot(&rt, input.slot());
-        assert_eq!(input_deps.len(), 1);
-        assert_eq!(input_deps[0].0, q1.slot());
-
-        let q1_deps = collect_dependents_for_slot(&rt, q1.slot());
-        assert_eq!(q1_deps.len(), 1);
-        assert_eq!(q1_deps[0].0, q2.slot());
-
-        // q2 has no dependents yet; nothing reads it.
-        let q2_deps = collect_dependents_for_slot(&rt, q2.slot());
-        assert!(q2_deps.is_empty());
-    }
-
-    #[test]
-    fn query_with_multiple_distinct_deps_writes_reverse_edge_to_each() {
-        let rt = Runtime::new();
-        let a = rt.create_input::<u64>(1);
-        let b = rt.create_input::<u64>(2);
-        let c = rt.create_input::<u64>(3);
-        let sum = rt.create_query::<u64, _>(move |rt| rt.get(a) + rt.get(b) + rt.get(c));
-
-        let _ = rt.get(sum);
-
-        let a_deps = collect_dependents_for_slot(&rt, a.slot());
-        let b_deps = collect_dependents_for_slot(&rt, b.slot());
-        let c_deps = collect_dependents_for_slot(&rt, c.slot());
-
-        assert_eq!(a_deps.len(), 1);
-        assert_eq!(a_deps[0].0, sum.slot());
-        assert_eq!(b_deps.len(), 1);
-        assert_eq!(b_deps[0].0, sum.slot());
-        assert_eq!(c_deps.len(), 1);
-        assert_eq!(c_deps[0].0, sum.slot());
-    }
-
-    #[test]
-    fn reverse_edges_are_written_once_per_dep_not_once_per_read() {
-        // A query that reads the same input three times should still
-        // add exactly one reverse edge. The dep recording dedup
-        // happens inside the compute frame, so publish_initial_deps
-        // sees a single entry, and the reverse-edge loop runs once
-        // per unique dep.
-        let rt = Runtime::new();
-        let input = rt.create_input::<u64>(7);
-        let q = rt.create_query::<u64, _>(move |rt| {
-            let a = rt.get(input);
-            let b = rt.get(input);
-            let c = rt.get(input);
-            a + b + c
-        });
-
-        let _ = rt.get(q);
-
-        let dependents = collect_dependents_for_slot(&rt, input.slot());
-        assert_eq!(
-            dependents.len(),
-            1,
-            "expected exactly one reverse edge, got {:?}",
-            dependents
-        );
-        assert_eq!(dependents[0].0, q.slot());
-    }
-
-    // Reactivity tests (commit J).
-
-    fn state_of(rt: &Runtime, slot: u32) -> NodeState {
-        rt.nodes.get(slot).state()
-    }
-
-    #[test]
-    fn set_marks_single_direct_dependent_dirty() {
-        // After set, the dependent query should be in Dirty state
-        // (before it has been re-read). This proves the dirty walk
-        // visited the query and transitioned it.
-        let rt = Runtime::new();
-        let input = rt.create_input::<u64>(1);
-        let q = rt.create_query::<u64, _>(move |rt| rt.get(input) + 100);
-
-        // First compute leaves q in Clean.
-        let _ = rt.get(q);
-        assert_eq!(state_of(&rt, q.slot()), NodeState::Clean);
-
-        // Set the input; the dirty walk should mark q Dirty.
-        rt.set(input, 50);
-        assert_eq!(state_of(&rt, q.slot()), NodeState::Dirty);
-
-        // Reading q triggers the recompute and observes the new value.
-        assert_eq!(rt.get(q), 150);
-        assert_eq!(state_of(&rt, q.slot()), NodeState::Clean);
-    }
-
-    #[test]
-    fn set_marks_transitive_dependents_dirty() {
-        // input -> q1 -> q2 -> q3. Setting input should mark all
-        // three queries Dirty via the transitive walk.
-        let rt = Runtime::new();
-        let input = rt.create_input::<u64>(1);
-        let q1 = rt.create_query::<u64, _>(move |rt| rt.get(input) + 1);
-        let q2 = rt.create_query::<u64, _>(move |rt| rt.get(q1) * 2);
-        let q3 = rt.create_query::<u64, _>(move |rt| rt.get(q2) + 100);
-
-        assert_eq!(rt.get(q3), ((1 + 1) * 2) + 100); // 104
-        assert_eq!(state_of(&rt, q1.slot()), NodeState::Clean);
-        assert_eq!(state_of(&rt, q2.slot()), NodeState::Clean);
-        assert_eq!(state_of(&rt, q3.slot()), NodeState::Clean);
-
-        rt.set(input, 10);
-        // All three should be Dirty after the walk.
-        assert_eq!(state_of(&rt, q1.slot()), NodeState::Dirty);
-        assert_eq!(state_of(&rt, q2.slot()), NodeState::Dirty);
-        assert_eq!(state_of(&rt, q3.slot()), NodeState::Dirty);
-
-        // Reading q3 cascades recomputes through q2 and q1.
-        assert_eq!(rt.get(q3), ((10 + 1) * 2) + 100); // 122
-    }
-
-    #[test]
-    fn set_leaves_unrelated_queries_clean() {
-        // Two inputs, two queries. Setting one input should only
-        // dirty the query that reads it, not the other.
-        let rt = Runtime::new();
-        let a = rt.create_input::<u64>(1);
-        let b = rt.create_input::<u64>(10);
-        let q_a = rt.create_query::<u64, _>(move |rt| rt.get(a) * 100);
-        let q_b = rt.create_query::<u64, _>(move |rt| rt.get(b) * 100);
-
-        assert_eq!(rt.get(q_a), 100);
-        assert_eq!(rt.get(q_b), 1000);
-
-        rt.set(a, 5);
-        assert_eq!(state_of(&rt, q_a.slot()), NodeState::Dirty);
-        assert_eq!(
-            state_of(&rt, q_b.slot()),
-            NodeState::Clean,
-            "q_b reads b, not a; should not be invalidated"
-        );
-
-        assert_eq!(rt.get(q_a), 500);
-        assert_eq!(rt.get(q_b), 1000); // unchanged
-    }
-
-    #[test]
-    fn multiple_dependents_of_one_input_all_dirtied() {
-        let rt = Runtime::new();
-        let input = rt.create_input::<u64>(1);
-        let q1 = rt.create_query::<u64, _>(move |rt| rt.get(input) + 1);
-        let q2 = rt.create_query::<u64, _>(move |rt| rt.get(input) + 2);
-        let q3 = rt.create_query::<u64, _>(move |rt| rt.get(input) + 3);
-
-        let _ = rt.get(q1);
-        let _ = rt.get(q2);
-        let _ = rt.get(q3);
-
-        rt.set(input, 100);
-        assert_eq!(state_of(&rt, q1.slot()), NodeState::Dirty);
-        assert_eq!(state_of(&rt, q2.slot()), NodeState::Dirty);
-        assert_eq!(state_of(&rt, q3.slot()), NodeState::Dirty);
-
-        assert_eq!(rt.get(q1), 101);
-        assert_eq!(rt.get(q2), 102);
-        assert_eq!(rt.get(q3), 103);
-    }
-
-    #[test]
-    fn diamond_dependency_each_node_visited_once() {
-        // q1 depends on input; q2a and q2b both depend on q1; q3
-        // depends on both q2a and q2b. This is a diamond: q1 is
-        // reached via two paths in the dirty walk. The visited set
-        // ensures it's only processed once.
-        let rt = Runtime::new();
-        let input = rt.create_input::<u64>(1);
-        let q1 = rt.create_query::<u64, _>(move |rt| rt.get(input) * 2);
-        let q2a = rt.create_query::<u64, _>(move |rt| rt.get(q1) + 10);
-        let q2b = rt.create_query::<u64, _>(move |rt| rt.get(q1) + 20);
-        let q3 = rt.create_query::<u64, _>(move |rt| rt.get(q2a) + rt.get(q2b));
-
-        // Initial: input=1 → q1=2 → q2a=12, q2b=22 → q3=34.
-        assert_eq!(rt.get(q3), 34);
-
-        rt.set(input, 5);
-        // input=5 → q1=10 → q2a=20, q2b=30 → q3=50.
-        assert_eq!(rt.get(q3), 50);
-    }
-
-    #[test]
-    fn multiple_sets_in_sequence_propagate_correctly() {
-        let rt = Runtime::new();
-        let input = rt.create_input::<u64>(1);
-        let q = rt.create_query::<u64, _>(move |rt| rt.get(input) * 1000);
-
-        assert_eq!(rt.get(q), 1000);
-        rt.set(input, 2);
-        assert_eq!(rt.get(q), 2000);
-        rt.set(input, 3);
-        assert_eq!(rt.get(q), 3000);
-        rt.set(input, 4);
-        assert_eq!(rt.get(q), 4000);
-        rt.set(input, 5);
-        assert_eq!(rt.get(q), 5000);
-    }
-
-    #[test]
-    fn set_on_input_with_no_dependents_is_a_noop_walk() {
-        // An input that no query depends on still has its value
-        // updated correctly on set, and the (empty) dirty walk
-        // should not do anything observable.
-        let rt = Runtime::new();
-        let input = rt.create_input::<u64>(10);
-        assert_eq!(rt.get(input), 10);
-        rt.set(input, 99);
-        assert_eq!(rt.get(input), 99);
-    }
-
-    #[test]
-    fn query_never_computed_is_unaffected_by_set() {
-        // A query in New state (never computed) should not be
-        // transitioned to Dirty by a set. The dirty walk's
-        // try_transition(Clean, Dirty) should fail silently on New.
-        // The dependents edge doesn't exist yet either (it's written
-        // on first compute), so the walk simply doesn't reach it.
-        let rt = Runtime::new();
-        let input = rt.create_input::<u64>(1);
-        let q = rt.create_query::<u64, _>(move |rt| rt.get(input) * 10);
-
-        // Do NOT read q. Its state is New, and input has no
-        // dependents recorded yet.
-        assert_eq!(state_of(&rt, q.slot()), NodeState::New);
-        assert!(collect_dependents_for_slot(&rt, input.slot()).is_empty());
-
-        rt.set(input, 5);
-
-        // q is still New; the first get after this set will compute
-        // with the latest value (5), not 1.
-        assert_eq!(state_of(&rt, q.slot()), NodeState::New);
-        assert_eq!(rt.get(q), 50);
-    }
-
-    #[test]
-    fn query_reading_two_inputs_invalidated_by_either() {
-        let rt = Runtime::new();
-        let a = rt.create_input::<u64>(1);
-        let b = rt.create_input::<u64>(2);
-        let q = rt.create_query::<u64, _>(move |rt| rt.get(a) * 10 + rt.get(b));
-
-        assert_eq!(rt.get(q), 12);
-
-        rt.set(a, 5);
-        assert_eq!(state_of(&rt, q.slot()), NodeState::Dirty);
-        assert_eq!(rt.get(q), 52);
-
-        rt.set(b, 99);
-        assert_eq!(state_of(&rt, q.slot()), NodeState::Dirty);
-        assert_eq!(rt.get(q), 149);
-    }
-
-    #[test]
-    fn string_query_reactivity() {
-        // Reactivity with a non-primitive value type. Exercises the
-        // same code path as the u64 tests but through GenericArena's
-        // UnsafeCell<Option<String>> storage.
-        let rt = Runtime::new();
-        let name = rt.create_input::<String>("Anish".to_string());
-        let greeting = rt.create_query::<String, _>(move |rt| format!("hi, {}", rt.get(name)));
-
-        assert_eq!(rt.get(greeting), "hi, Anish");
-        rt.set(name, "world".to_string());
-        assert_eq!(rt.get(greeting), "hi, world");
-    }
-
-    // Early cutoff tests (commit K).
-
-    #[test]
-    fn set_with_same_value_is_a_noop() {
-        // Setting an input to its current value should not bump the
-        // revision counter, because the early cutoff short-circuits
-        // before the arena write. Verifies the no-op path at the input
-        // level, which is the cheapest and most common case.
-        let rt = Runtime::new();
-        let input = rt.create_input::<u64>(42);
-        assert_eq!(rt.revision(), 0);
-
-        rt.set(input, 42); // same value
-        assert_eq!(rt.revision(), 0, "no-op set must not bump revision");
-
-        rt.set(input, 100); // different value
-        assert_eq!(rt.revision(), 1, "real set should bump revision");
-
-        rt.set(input, 100); // same as current
-        assert_eq!(rt.revision(), 1, "second no-op set must not bump");
-    }
-
-    #[test]
-    fn set_with_same_value_does_not_dirty_dependents() {
-        // The dirty walk should be skipped on a no-op set. Dependents
-        // stay Clean because the walk never runs.
-        let rt = Runtime::new();
-        let input = rt.create_input::<u64>(1);
-        let q = rt.create_query::<u64, _>(move |rt| rt.get(input) + 100);
-
-        assert_eq!(rt.get(q), 101);
-        assert_eq!(state_of(&rt, q.slot()), NodeState::Clean);
-
-        rt.set(input, 1); // no-op
-        assert_eq!(
-            state_of(&rt, q.slot()),
-            NodeState::Clean,
-            "no-op set must not mark dependent Dirty"
-        );
-        // Reading q returns the cached value without recomputing.
-        assert_eq!(rt.get(q), 101);
-    }
-
-    #[test]
-    fn set_with_same_string_is_a_noop() {
-        // Early cutoff for non-primitive types. PartialEq on String
-        // drives the check; the saved work is the String clone inside
-        // arena.write, which is the whole point of cutoff for large
-        // value types.
-        let rt = Runtime::new();
-        let s = rt.create_input::<String>("hello".to_string());
-        assert_eq!(rt.revision(), 0);
-        rt.set(s, "hello".to_string());
-        assert_eq!(rt.revision(), 0, "no-op String set must not bump revision");
-        rt.set(s, "world".to_string());
-        assert_eq!(rt.revision(), 1);
-    }
-
-    #[test]
-    fn recompute_returning_same_value_transitions_clean_without_panic() {
-        // When a recompute produces the same value as before, the
-        // arena write is skipped (saves a clone for large types) and
-        // the state transitions back to Clean. This test verifies
-        // the path runs without panicking or leaving the node in a
-        // weird state; the value-level cutoff's effect is hard to
-        // observe directly without red-green verification, which is
-        // a follow-up commit.
-        let rt = Runtime::new();
-        let input = rt.create_input::<u64>(5);
-        // Query whose value depends on the input but happens to be
-        // constant over the range of inputs we use: sign of input.
-        let q = rt.create_query::<u64, _>(move |rt| if rt.get(input) > 0 { 1 } else { 0 });
-
-        assert_eq!(rt.get(q), 1);
-        assert_eq!(state_of(&rt, q.slot()), NodeState::Clean);
-
-        // Change input to another positive value. q is marked Dirty.
-        rt.set(input, 100);
-        assert_eq!(state_of(&rt, q.slot()), NodeState::Dirty);
-
-        // Recompute produces 1 again (same value). The code path
-        // skips the arena write. q transitions to Clean.
-        assert_eq!(rt.get(q), 1);
-        assert_eq!(state_of(&rt, q.slot()), NodeState::Clean);
-
-        // Changing to a value that would flip the output actually
-        // does flip it.
-        rt.set(input, 0);
-        assert_eq!(rt.get(q), 0);
-    }
-
-    #[test]
-    fn input_with_dependents_noop_set_does_not_trigger_recompute() {
-        // Count compute invocations via a shared counter. A no-op
-        // set must not cause the dependent query to recompute, which
-        // the counter directly proves.
-        use std::sync::atomic::{AtomicUsize, Ordering};
-        let rt = Runtime::new();
-        let counter = Arc::new(AtomicUsize::new(0));
-        let input = rt.create_input::<u64>(10);
-        let q = {
-            let counter = counter.clone();
-            rt.create_query::<u64, _>(move |rt| {
-                counter.fetch_add(1, Ordering::SeqCst);
-                rt.get(input) * 2
-            })
-        };
-
-        // First read triggers the initial compute (count = 1).
-        assert_eq!(rt.get(q), 20);
-        assert_eq!(counter.load(Ordering::SeqCst), 1);
-
-        // No-op set: no dirty walk, no recompute.
-        rt.set(input, 10);
-        assert_eq!(rt.get(q), 20);
-        assert_eq!(
-            counter.load(Ordering::SeqCst),
-            1,
-            "no-op set should not recompute"
-        );
-
-        // Real set: dirty walk runs, recompute happens.
-        rt.set(input, 20);
-        assert_eq!(rt.get(q), 40);
-        assert_eq!(
-            counter.load(Ordering::SeqCst),
-            2,
-            "real set should recompute"
-        );
-
-        // Another no-op set: no recompute.
-        rt.set(input, 20);
-        assert_eq!(rt.get(q), 40);
-        assert_eq!(counter.load(Ordering::SeqCst), 2);
-    }
-
-    // Cycle detection and panic catching tests (commit L).
-
-    /// Read a node's stashed failure message for test assertions.
-    fn failure_message_for(rt: &Runtime, slot: u32) -> Option<String> {
-        let inner = rt.inner.read().unwrap();
-        inner.failure_messages[slot as usize].clone()
-    }
-
-    #[test]
-    fn self_cycle_panics_and_leaves_node_failed() {
-        // A query whose compute reads its own handle. Build the
-        // handle first (via a Mutex<Option<_>>) so the closure can
-        // reach back to it after create_query returns.
-        use std::sync::Mutex;
-        let rt = Runtime::new();
-        let me: Arc<Mutex<Option<Incr<u64>>>> = Arc::new(Mutex::new(None));
-        let q = {
-            let me = me.clone();
-            rt.create_query::<u64, _>(move |rt| {
-                let h = me.lock().unwrap().expect("self handle not set");
-                rt.get(h) // cycles
-            })
-        };
-        *me.lock().unwrap() = Some(q);
-
-        // Reading q triggers its compute, which attempts to read q,
-        // which trips the cycle detector. The panic unwinds through
-        // the compute closure, is caught by run_compute, the node is
-        // transitioned to Failed, and the panic is re-raised to our
-        // caller frame here.
-        let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| rt.get(q)));
-        assert!(result.is_err(), "expected cycle to panic");
-
-        assert_eq!(state_of(&rt, q.slot()), NodeState::Failed);
-        let msg = failure_message_for(&rt, q.slot()).expect("failure stashed");
-        assert!(
-            msg.contains("CycleError"),
-            "expected CycleError in failure message, got: {}",
-            msg
-        );
-    }
-
-    #[test]
-    fn mutual_cycle_between_two_queries_panics() {
-        use std::sync::Mutex;
-        let rt = Runtime::new();
-        let q1_handle: Arc<Mutex<Option<Incr<u64>>>> = Arc::new(Mutex::new(None));
-        let q2_handle: Arc<Mutex<Option<Incr<u64>>>> = Arc::new(Mutex::new(None));
-
-        let q1 = {
-            let q2h = q2_handle.clone();
-            rt.create_query::<u64, _>(move |rt| {
-                let h = q2h.lock().unwrap().expect("q2 handle not set");
-                rt.get(h)
-            })
-        };
-        let q2 = {
-            let q1h = q1_handle.clone();
-            rt.create_query::<u64, _>(move |rt| {
-                let h = q1h.lock().unwrap().expect("q1 handle not set");
-                rt.get(h)
-            })
-        };
-        *q1_handle.lock().unwrap() = Some(q1);
-        *q2_handle.lock().unwrap() = Some(q2);
-
-        let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| rt.get(q1)));
-        assert!(result.is_err(), "expected mutual cycle to panic");
-    }
-
-    #[test]
-    fn compute_panic_is_caught_and_node_transitions_to_failed() {
-        let rt = Runtime::new();
-        let q = rt.create_query::<u64, _>(|_| panic!("oops, compute failed"));
-
-        let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| rt.get(q)));
-        assert!(result.is_err());
-
-        assert_eq!(state_of(&rt, q.slot()), NodeState::Failed);
-        let msg = failure_message_for(&rt, q.slot()).expect("failure stashed");
-        assert!(
-            msg.contains("oops, compute failed"),
-            "expected panic message in failure, got: {}",
-            msg
-        );
-    }
-
-    #[test]
-    fn subsequent_get_on_failed_node_panics_with_stored_message() {
-        let rt = Runtime::new();
-        let q = rt.create_query::<u64, _>(|_| panic!("original failure text"));
-
-        // First get triggers the panic path.
-        let _ = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| rt.get(q)));
-        assert_eq!(state_of(&rt, q.slot()), NodeState::Failed);
-
-        // Second get on the Failed node should panic again with the
-        // stored message (not re-run the compute).
-        let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| rt.get(q)));
-        let err = result.unwrap_err();
-        let msg = err
-            .downcast_ref::<String>()
-            .cloned()
-            .or_else(|| err.downcast_ref::<&'static str>().map(|s| s.to_string()))
-            .unwrap_or_default();
-        assert!(
-            msg.contains("Failed") && msg.contains("original failure text"),
-            "expected Failed + original message, got: {}",
-            msg
-        );
-    }
-
-    #[test]
-    fn panic_preserves_compute_stack_for_subsequent_operations() {
-        // After a panicking compute, the thread's COMPUTE_STACK must
-        // be empty again so subsequent computes can push fresh
-        // frames. If the stack leaked, the next query's deps would
-        // be misattributed to the dead frame.
-        let rt = Runtime::new();
-        let panicking = rt.create_query::<u64, _>(|_| panic!("this compute fails"));
-        let _ = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| rt.get(panicking)));
-
-        // Now create and run a query that should work fine.
-        let input = rt.create_input::<u64>(5);
-        let q = rt.create_query::<u64, _>(move |rt| rt.get(input) * 2);
-        assert_eq!(rt.get(q), 10);
-
-        // And its deps should be recorded correctly.
-        let deps = collect_deps_for_slot(&rt, q.slot());
-        assert_eq!(deps.len(), 1);
-        assert_eq!(deps[0].0, input.slot());
-    }
-
-    #[test]
-    fn failed_node_retries_after_upstream_set() {
-        // A query that panics only if its input is below some
-        // threshold. Set the input to a value that panics, observe
-        // Failed, set the input to a safe value, observe the node
-        // transitions Failed → Dirty → Clean on next read.
-        let rt = Runtime::new();
-        let input = rt.create_input::<u64>(0);
-        let q = rt.create_query::<u64, _>(move |rt| {
-            let v = rt.get(input);
-            if v == 0 {
-                panic!("input is zero");
-            }
-            v * 10
-        });
-
-        let r1 = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| rt.get(q)));
-        assert!(r1.is_err());
-        assert_eq!(state_of(&rt, q.slot()), NodeState::Failed);
-        assert!(failure_message_for(&rt, q.slot()).is_some());
-
-        // Set input to a non-panicking value. The dirty walk should
-        // transition Failed → Dirty and clear the stashed message.
-        rt.set(input, 7);
-        assert_eq!(state_of(&rt, q.slot()), NodeState::Dirty);
-        assert!(failure_message_for(&rt, q.slot()).is_none());
-
-        // Next get retries the compute, which now succeeds.
-        assert_eq!(rt.get(q), 70);
-        assert_eq!(state_of(&rt, q.slot()), NodeState::Clean);
-    }
-
-    #[test]
-    fn panic_inside_nested_compute_propagates_to_outer() {
-        // q_outer reads q_inner; q_inner panics. The panic should
-        // leave both nodes in Failed state (q_inner directly from
-        // its own compute, q_outer because its compute panicked
-        // while propagating q_inner's panic).
-        let rt = Runtime::new();
-        let q_inner = rt.create_query::<u64, _>(|_| panic!("inner failure"));
-        let q_outer = rt.create_query::<u64, _>(move |rt| rt.get(q_inner) + 1);
-
-        let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| rt.get(q_outer)));
-        assert!(result.is_err());
-
-        assert_eq!(state_of(&rt, q_inner.slot()), NodeState::Failed);
-        assert_eq!(state_of(&rt, q_outer.slot()), NodeState::Failed);
-    }
-
-    #[test]
-    fn non_cycling_read_does_not_trigger_cycle_check() {
-        // Sanity: a compute that reads an unrelated node should NOT
-        // trip the cycle check, even if its slot index happens to be
-        // low or near the computing node's slot.
-        let rt = Runtime::new();
-        let a = rt.create_input::<u64>(1);
-        let b = rt.create_input::<u64>(2);
-        let q = rt.create_query::<u64, _>(move |rt| rt.get(a) + rt.get(b));
-        assert_eq!(rt.get(q), 3); // no panic
-    }
-
-    // Dynamic dependency tests (commit M).
-
-    #[test]
-    fn query_with_conditional_deps_tracks_only_read_branch() {
-        // A classic dynamic-dep query: read `flag`, then read one of
-        // two inputs depending on the flag. Initial flag true, reads
-        // `a`. Query's deps should be [flag, a].
-        let rt = Runtime::new();
-        let flag = rt.create_input::<bool>(true);
-        let a = rt.create_input::<u64>(10);
-        let b = rt.create_input::<u64>(20);
-
-        let q =
-            rt.create_query::<u64, _>(move |rt| if rt.get(flag) { rt.get(a) } else { rt.get(b) });
-
-        assert_eq!(rt.get(q), 10);
-        let deps = collect_deps_for_slot(&rt, q.slot());
-        // Deps should be [flag, a] — only the branch that was
-        // actually taken.
-        assert_eq!(deps.len(), 2);
-        assert!(deps.iter().any(|d| d.0 == flag.slot()));
-        assert!(deps.iter().any(|d| d.0 == a.slot()));
-        assert!(!deps.iter().any(|d| d.0 == b.slot()));
-    }
-
-    #[test]
-    fn flipping_conditional_dep_updates_dep_list_and_reverse_edges() {
-        // Start with flag=true (reads a). Change flag to false, read
-        // q again (triggers recompute via dirty walk from flag). The
-        // new recompute reads b, not a. Assert that:
-        //   1. q's new deps are [flag, b]
-        //   2. a's dependents list no longer contains q (stale
-        //      reverse edge removed)
-        //   3. b's dependents list now contains q (new reverse edge
-        //      added)
-        let rt = Runtime::new();
-        let flag = rt.create_input::<bool>(true);
-        let a = rt.create_input::<u64>(100);
-        let b = rt.create_input::<u64>(200);
-
-        let q =
-            rt.create_query::<u64, _>(move |rt| if rt.get(flag) { rt.get(a) } else { rt.get(b) });
-
-        // First compute: reads flag + a.
-        assert_eq!(rt.get(q), 100);
-        assert!(collect_dependents_for_slot(&rt, a.slot())
-            .iter()
-            .any(|d| d.0 == q.slot()));
-        assert!(!collect_dependents_for_slot(&rt, b.slot())
-            .iter()
-            .any(|d| d.0 == q.slot()));
-
-        // Flip the flag. Dirty walk marks q Dirty via flag's
-        // dependents list.
-        rt.set(flag, false);
-        assert_eq!(state_of(&rt, q.slot()), NodeState::Dirty);
-
-        // Recompute. Now reads flag + b instead of flag + a.
-        assert_eq!(rt.get(q), 200);
-        let deps = collect_deps_for_slot(&rt, q.slot());
-        assert_eq!(deps.len(), 2);
-        assert!(deps.iter().any(|d| d.0 == flag.slot()));
-        assert!(deps.iter().any(|d| d.0 == b.slot()));
-        assert!(!deps.iter().any(|d| d.0 == a.slot()));
-
-        // Reverse edges: a should no longer point to q, b should.
-        let a_deps = collect_dependents_for_slot(&rt, a.slot());
-        let b_deps = collect_dependents_for_slot(&rt, b.slot());
-        assert!(
-            !a_deps.iter().any(|d| d.0 == q.slot()),
-            "stale reverse edge a -> q not removed: {:?}",
-            a_deps
-        );
-        assert!(
-            b_deps.iter().any(|d| d.0 == q.slot()),
-            "new reverse edge b -> q not added: {:?}",
-            b_deps
-        );
-    }
-
-    #[test]
-    fn removed_dep_no_longer_invalidates_query() {
-        // After a recompute changes the dep set to drop `a`, setting
-        // `a` should NOT mark q dirty (because q no longer reads a).
-        // Setting the dep that's now in the set (`b`) SHOULD mark q
-        // dirty.
-        let rt = Runtime::new();
-        let flag = rt.create_input::<bool>(true);
-        let a = rt.create_input::<u64>(1);
-        let b = rt.create_input::<u64>(2);
-        let q =
-            rt.create_query::<u64, _>(move |rt| if rt.get(flag) { rt.get(a) } else { rt.get(b) });
-
-        // First compute reads a. Second compute (after flag flip)
-        // reads b.
-        assert_eq!(rt.get(q), 1);
-        rt.set(flag, false);
-        assert_eq!(rt.get(q), 2);
-        assert_eq!(state_of(&rt, q.slot()), NodeState::Clean);
-
-        // Now setting a should NOT dirty q.
-        rt.set(a, 999);
-        assert_eq!(
-            state_of(&rt, q.slot()),
-            NodeState::Clean,
-            "q should not be dirtied by changes to a (no longer a dep)"
-        );
-
-        // But setting b should.
-        rt.set(b, 888);
-        assert_eq!(state_of(&rt, q.slot()), NodeState::Dirty);
-        assert_eq!(rt.get(q), 888);
-    }
-
-    #[test]
-    fn static_recompute_dep_list_does_not_trigger_reverse_edge_rewrite() {
-        // A recompute whose dep set is identical to the previous
-        // run's should take the fast path and not touch the
-        // dependents vec. Hard to observe directly, but we can
-        // verify the dependents list stays exactly as it was (no
-        // duplicates introduced by a redundant push).
-        let rt = Runtime::new();
-        let input = rt.create_input::<u64>(1);
-        let q = rt.create_query::<u64, _>(move |rt| rt.get(input) + 10);
-
-        assert_eq!(rt.get(q), 11);
-        let dependents_before = collect_dependents_for_slot(&rt, input.slot());
-        assert_eq!(dependents_before.len(), 1);
-        assert_eq!(dependents_before[0].0, q.slot());
-
-        // Change the input to a different value. Same deps on
-        // recompute, but this exercises the dep-diff fast path.
-        rt.set(input, 5);
-        assert_eq!(rt.get(q), 15);
-
-        let dependents_after = collect_dependents_for_slot(&rt, input.slot());
-        assert_eq!(
-            dependents_after.len(),
-            1,
-            "static recompute must not duplicate reverse edges: {:?}",
-            dependents_after
-        );
-        assert_eq!(dependents_after[0].0, q.slot());
-    }
-
-    #[test]
-    fn adding_a_dep_on_recompute_creates_new_reverse_edge() {
-        // Query starts reading just `a`. After flag flip it reads
-        // both `a` and `b`. Verify b's dependents gains q.
-        let rt = Runtime::new();
-        let flag = rt.create_input::<bool>(false);
-        let a = rt.create_input::<u64>(10);
-        let b = rt.create_input::<u64>(20);
-        let q = rt.create_query::<u64, _>(move |rt| {
-            let av = rt.get(a);
-            if rt.get(flag) {
-                av + rt.get(b)
-            } else {
-                av
-            }
-        });
-
-        assert_eq!(rt.get(q), 10);
-        // Only a (and flag) are deps; b is not yet.
-        assert!(!collect_dependents_for_slot(&rt, b.slot())
-            .iter()
-            .any(|d| d.0 == q.slot()));
-
-        rt.set(flag, true);
-        assert_eq!(rt.get(q), 30);
-        // Now b should be a dep.
-        assert!(collect_dependents_for_slot(&rt, b.slot())
-            .iter()
-            .any(|d| d.0 == q.slot()));
-    }
-
-    #[test]
-    fn dep_list_shrinking_across_inline_overflow_boundary() {
-        // Exercise the inline→overflow and overflow→inline
-        // transitions in replace_deps. First compute reads 10
-        // inputs (spills to overflow). Recompute after a conditional
-        // flip reads only 3 inputs (fits in inline). The old
-        // overflow box must be freed.
-        let rt = Runtime::new();
-        let flag = rt.create_input::<bool>(true);
-        let mut inputs: Vec<Incr<u64>> = Vec::new();
-        for i in 0..10 {
-            inputs.push(rt.create_input::<u64>(i));
-        }
-        let inputs_for_closure = inputs.clone();
-        let q = rt.create_query::<u64, _>(move |rt| {
-            if rt.get(flag) {
-                // Wide read: 10 inputs spill to overflow dep list.
-                let mut sum = 0;
-                for inp in &inputs_for_closure {
-                    sum += rt.get(*inp);
-                }
-                sum
-            } else {
-                // Narrow read: 3 inputs fit inline.
-                rt.get(inputs_for_closure[0])
-                    + rt.get(inputs_for_closure[1])
-                    + rt.get(inputs_for_closure[2])
-            }
-        });
-
-        // First compute: 10 deps + flag = 11 deps, overflow.
-        let expected_sum: u64 = (0..10u64).sum();
-        assert_eq!(rt.get(q), expected_sum);
-        assert_eq!(collect_deps_for_slot(&rt, q.slot()).len(), 11);
-
-        // Flip to narrow. Recompute reads flag + 3 deps = 4 deps,
-        // fits inline. The old overflow DepList is reclaimed.
-        rt.set(flag, false);
-        assert_eq!(rt.get(q), 1 + 2);
-        assert_eq!(collect_deps_for_slot(&rt, q.slot()).len(), 4);
-
-        // Go back to wide. Reallocate overflow.
-        rt.set(flag, true);
-        assert_eq!(rt.get(q), expected_sum);
-        assert_eq!(collect_deps_for_slot(&rt, q.slot()).len(), 11);
-    }
-
-    #[test]
-    fn node_count_tracks_created_nodes() {
-        let rt = Runtime::new();
-        assert_eq!(rt.node_count(), 0);
-        let _a = rt.create_input::<u64>(1);
-        assert_eq!(rt.node_count(), 1);
-        let _b = rt.create_input::<u64>(2);
-        assert_eq!(rt.node_count(), 2);
-        let _q = rt.create_query::<u64, _>(move |rt_inner| rt_inner.get(_a) + rt_inner.get(_b));
-        assert_eq!(rt.node_count(), 3);
-    }
-
-    #[test]
-    fn set_label_and_graph_snapshot_reflect_labels() {
-        let rt = Runtime::new();
-        let a = rt.create_input::<u64>(10);
-        let b = rt.create_query::<u64, _>(move |r| r.get(a) * 2);
-        rt.set_label(a.slot(), "input_a".to_string());
-        rt.set_label(b.slot(), "double_a".to_string());
-
-        let snapshot = rt.graph_snapshot();
-        assert_eq!(snapshot.len(), 2);
-
-        let info_a = snapshot.iter().find(|n| n.slot == a.slot()).unwrap();
-        assert_eq!(info_a.label, "input_a");
-        assert_eq!(info_a.kind, NodeKindInfo::Input);
-
-        let info_b = snapshot.iter().find(|n| n.slot == b.slot()).unwrap();
-        assert_eq!(info_b.label, "double_a");
-        assert_eq!(info_b.kind, NodeKindInfo::Compute);
-    }
-
-    #[test]
-    fn graph_snapshot_includes_edges_after_compute() {
-        let rt = Runtime::new();
-        let a = rt.create_input::<u64>(5);
-        let q = rt.create_query::<u64, _>(move |r| r.get(a) + 1);
-        // Force compute so dep edges are recorded.
-        assert_eq!(rt.get(q), 6);
-
-        let snapshot = rt.graph_snapshot();
-        let info_q = snapshot.iter().find(|n| n.slot == q.slot()).unwrap();
-        assert!(info_q.dependencies.contains(&a.slot()));
-
-        let info_a = snapshot.iter().find(|n| n.slot == a.slot()).unwrap();
-        assert!(info_a.dependents.contains(&q.slot()));
-    }
-
-    #[test]
-    fn get_traced_returns_correct_value_and_stub_trace() {
-        let rt = Runtime::new();
-        let a = rt.create_input::<u64>(7);
-        let (val, trace) = rt.get_traced(a);
-        assert_eq!(val, 7);
-        assert_eq!(trace.target, a.slot());
-        assert_eq!(trace.total_nodes, 1);
-        assert!(trace.node_traces.is_empty());
-    }
-
-    #[test]
-    fn set_tracing_does_not_panic() {
-        let rt = Runtime::new();
-        rt.set_tracing(true);
-        rt.set_tracing(false);
-    }
-}
diff --git a/crates/incr-concurrent/src/runtime_concurrent_test.rs b/crates/incr-concurrent/src/runtime_concurrent_test.rs
deleted file mode 100644
index 4eb12f5..0000000
--- a/crates/incr-concurrent/src/runtime_concurrent_test.rs
+++ /dev/null
@@ -1,716 +0,0 @@
-//! Concurrent correctness tests for the v2 Runtime.
-//!
-//! The v2 architecture's core claim is single-writer-many-readers:
-//! one writer at a time serializes through the runtime's write
-//! mutex while any number of readers call `rt.get` concurrently
-//! without contention on the reader-reader path. The existing
-//! commit H-N tests cover this architecturally (each piece in
-//! isolation) and the proptest in `runtime_proptest.rs` covers it
-//! functionally (incremental equals batch), but neither exercises
-//! the full concurrent path with real OS threads doing real
-//! simultaneous reads against a live writer.
-//!
-//! This module does that. Each test spawns N reader threads
-//! against a shared `Arc<Runtime>` while a writer thread perturbs
-//! inputs on a schedule, and asserts that every value a reader
-//! observes is legitimate (comes from the set of values the writer
-//! has ever written, possibly through a deterministic compute).
-//! A stronger property (full linearizability via happens-before
-//! reasoning across every observation) is left to commit P if
-//! this coarser property passes cleanly.
-//!
-//! Placed in-crate because v2 is `pub(crate)` until Gate 5; tests
-//! need crate-private access to construct handles and check state
-//! that the public API will eventually expose differently.
-
-use std::collections::HashSet;
-use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
-use std::sync::Arc;
-use std::thread;
-
-use super::runtime::Runtime;
-
-/// Stress duration per test. Short enough that the whole suite
-/// runs in under a second in normal mode, long enough that any
-/// data race or ordering bug has thousands of opportunities to
-/// trip.
-const DEFAULT_WRITER_ITERS: usize = 5_000;
-
-#[test]
-fn many_readers_observe_only_written_input_values() {
-    // One String input, 8 readers, writer cycles through a known
-    // set of strings. Any observation not in the set is either a
-    // torn read (UB we would have hit before commit G's fix, and
-    // should still pass now that the RwLock gate on nodes serves
-    // as the correctness barrier) or a stale value from a
-    // different writer generation, neither of which can happen
-    // under the current design.
-    const READERS: usize = 8;
-
-    let rt = Arc::new(Runtime::new());
-    let values: Vec<String> = (0..16)
-        .map(|i| format!("value-{}-padding-to-force-heap-allocation", i))
-        .collect();
-    let valid: HashSet<String> = values.iter().cloned().collect();
-    let input = rt.create_input::<String>(values[0].clone());
-
-    let stop = Arc::new(AtomicBool::new(false));
-
-    let reader_handles: Vec<_> = (0..READERS)
-        .map(|_| {
-            let rt = rt.clone();
-            let valid = valid.clone();
-            let stop = stop.clone();
-            thread::spawn(move || {
-                let mut observations = 0usize;
-                while !stop.load(Ordering::Relaxed) {
-                    let v = rt.get(input);
-                    assert!(
-                        valid.contains(&v),
-                        "reader observed non-written value: {:?}",
-                        v
-                    );
-                    observations += 1;
-                }
-                observations
-            })
-        })
-        .collect();
-
-    for i in 0..DEFAULT_WRITER_ITERS {
-        let v = &values[i % values.len()];
-        rt.set(input, v.clone());
-    }
-    stop.store(true, Ordering::Relaxed);
-
-    let total: usize = reader_handles
-        .into_iter()
-        .map(|h| h.join().expect("reader panicked"))
-        .sum();
-    assert!(
-        total > 0,
-        "readers should have completed at least one read each"
-    );
-}
-
-#[test]
-fn many_readers_observe_only_valid_query_values() {
-    // Input plus a pure function query. Every query observation
-    // must be `input + 100` for some input value the writer has
-    // set. This exercises the full reactivity path (set → dirty
-    // walk → reader observes Dirty → reader CASes to Computing →
-    // reader recomputes → reader observes Clean) under concurrency.
-    const READERS: usize = 8;
-
-    let rt = Arc::new(Runtime::new());
-    let inputs: Vec<u64> = (0..20).collect();
-    let valid_queries: HashSet<u64> = inputs.iter().map(|i| i + 100).collect();
-
-    let input = rt.create_input::<u64>(inputs[0]);
-    let query = rt.create_query::<u64, _>(move |rt| rt.get(input) + 100);
-
-    let stop = Arc::new(AtomicBool::new(false));
-
-    let reader_handles: Vec<_> = (0..READERS)
-        .map(|_| {
-            let rt = rt.clone();
-            let valid = valid_queries.clone();
-            let stop = stop.clone();
-            thread::spawn(move || {
-                while !stop.load(Ordering::Relaxed) {
-                    let v = rt.get(query);
-                    assert!(
-                        valid.contains(&v),
-                        "reader observed query value {} not in valid set (expected input+100 for some written input)",
-                        v
-                    );
-                }
-            })
-        })
-        .collect();
-
-    for i in 0..DEFAULT_WRITER_ITERS {
-        let v = inputs[i % inputs.len()];
-        rt.set(input, v);
-    }
-    stop.store(true, Ordering::Relaxed);
-
-    for h in reader_handles {
-        h.join().expect("reader panicked");
-    }
-}
-
-#[test]
-fn many_readers_on_chain_of_queries_observe_valid_values() {
-    // input → q1 (+1) → q2 (*2) → q3 (+100). Compute function for
-    // q3(input) = ((input + 1) * 2) + 100. Readers spin on q3;
-    // writer sets input. Every observation must be valid for some
-    // written input.
-    const READERS: usize = 6;
-
-    let rt = Arc::new(Runtime::new());
-    let inputs: Vec<u64> = (0..50).collect();
-    let valid: HashSet<u64> = inputs.iter().map(|i| ((i + 1) * 2) + 100).collect();
-
-    let input = rt.create_input::<u64>(inputs[0]);
-    let q1 = rt.create_query::<u64, _>(move |rt| rt.get(input) + 1);
-    let q2 = rt.create_query::<u64, _>(move |rt| rt.get(q1) * 2);
-    let q3 = rt.create_query::<u64, _>(move |rt| rt.get(q2) + 100);
-
-    let stop = Arc::new(AtomicBool::new(false));
-
-    let reader_handles: Vec<_> = (0..READERS)
-        .map(|_| {
-            let rt = rt.clone();
-            let valid = valid.clone();
-            let stop = stop.clone();
-            thread::spawn(move || {
-                while !stop.load(Ordering::Relaxed) {
-                    let v = rt.get(q3);
-                    assert!(
-                        valid.contains(&v),
-                        "reader observed q3 value {} not valid for any input in {:?}",
-                        v,
-                        (0..5u64)
-                    );
-                }
-            })
-        })
-        .collect();
-
-    for i in 0..DEFAULT_WRITER_ITERS {
-        rt.set(input, inputs[i % inputs.len()]);
-    }
-    stop.store(true, Ordering::Relaxed);
-
-    for h in reader_handles {
-        h.join().expect("reader panicked");
-    }
-}
-
-#[test]
-fn concurrent_reads_of_multiple_unrelated_chains_do_not_cross_contaminate() {
-    // Two independent chains sharing a runtime. Readers on chain A
-    // should never observe values from chain B's inputs, even if
-    // the writer is updating both. This catches any cross-slot
-    // contamination in the state machine or dep graph.
-    const READERS_PER_CHAIN: usize = 3;
-
-    let rt = Arc::new(Runtime::new());
-
-    let a_inputs: Vec<u64> = (0..20).collect();
-    let b_inputs: Vec<u64> = (100..120).collect();
-    let a_valid: HashSet<u64> = a_inputs.iter().map(|i| i * 10).collect();
-    let b_valid: HashSet<u64> = b_inputs.iter().map(|i| i * 10).collect();
-
-    let a_in = rt.create_input::<u64>(a_inputs[0]);
-    let b_in = rt.create_input::<u64>(b_inputs[0]);
-    let a_q = rt.create_query::<u64, _>(move |rt| rt.get(a_in) * 10);
-    let b_q = rt.create_query::<u64, _>(move |rt| rt.get(b_in) * 10);
-
-    let stop = Arc::new(AtomicBool::new(false));
-
-    let mut handles = Vec::new();
-    for _ in 0..READERS_PER_CHAIN {
-        let rt = rt.clone();
-        let valid = a_valid.clone();
-        let stop = stop.clone();
-        handles.push(thread::spawn(move || {
-            while !stop.load(Ordering::Relaxed) {
-                let v = rt.get(a_q);
-                assert!(
-                    valid.contains(&v),
-                    "chain A reader observed cross-contaminated value: {}",
-                    v
-                );
-            }
-        }));
-    }
-    for _ in 0..READERS_PER_CHAIN {
-        let rt = rt.clone();
-        let valid = b_valid.clone();
-        let stop = stop.clone();
-        handles.push(thread::spawn(move || {
-            while !stop.load(Ordering::Relaxed) {
-                let v = rt.get(b_q);
-                assert!(
-                    valid.contains(&v),
-                    "chain B reader observed cross-contaminated value: {}",
-                    v
-                );
-            }
-        }));
-    }
-
-    for i in 0..DEFAULT_WRITER_ITERS {
-        rt.set(a_in, a_inputs[i % a_inputs.len()]);
-        rt.set(b_in, b_inputs[i % b_inputs.len()]);
-    }
-    stop.store(true, Ordering::Relaxed);
-
-    for h in handles {
-        h.join().expect("reader panicked");
-    }
-}
-
-#[test]
-fn compute_function_runs_at_most_once_per_dirty_cycle() {
-    // When multiple readers race to recompute a Dirty query, only
-    // one should actually execute the compute closure per dirty
-    // cycle. The state machine's Dirty → Computing CAS enforces
-    // this. Count compute invocations; expected upper bound is
-    // roughly the number of times the writer called `set`.
-    //
-    // We allow a margin: the writer may set faster than readers
-    // can recompute, so several sets can coalesce into one
-    // recompute (good), and a set during Computing can miss the
-    // dirty mark on the current generation (the known race; its
-    // impact here is that the count may be slightly LOWER than
-    // the number of sets, not higher). The bounds we assert are
-    // loose enough that both directions are tolerated.
-    const READERS: usize = 8;
-    const WRITER_SETS: usize = 2_000;
-
-    let rt = Arc::new(Runtime::new());
-    let compute_invocations = Arc::new(AtomicUsize::new(0));
-
-    let input = rt.create_input::<u64>(0);
-    let query = {
-        let counter = compute_invocations.clone();
-        rt.create_query::<u64, _>(move |rt| {
-            counter.fetch_add(1, Ordering::SeqCst);
-            rt.get(input) * 2
-        })
-    };
-
-    // Force the initial compute synchronously so the `total >= 1` assertion
-    // is deterministic. Without this, a fast writer can finish all sets
-    // and flip `stop` before any reader thread is scheduled, leaving the
-    // counter at 0.
-    rt.get(query);
-
-    let stop = Arc::new(AtomicBool::new(false));
-    let reader_handles: Vec<_> = (0..READERS)
-        .map(|_| {
-            let rt = rt.clone();
-            let stop = stop.clone();
-            thread::spawn(move || {
-                while !stop.load(Ordering::Relaxed) {
-                    let _ = rt.get(query);
-                }
-            })
-        })
-        .collect();
-
-    for i in 0..WRITER_SETS {
-        rt.set(input, i as u64);
-    }
-    stop.store(true, Ordering::Relaxed);
-    for h in reader_handles {
-        h.join().expect("reader panicked");
-    }
-
-    let total = compute_invocations.load(Ordering::SeqCst);
-    // Each set() triggers at most one recompute. Readers may
-    // observe stale Clean between sets without triggering a
-    // recompute (coalescing). We expect total <= WRITER_SETS + 1
-    // (the +1 for the initial compute). We also expect total >= 1
-    // (at least the initial compute ran).
-    assert!(
-        total >= 1,
-        "expected at least one compute invocation, got {}",
-        total
-    );
-    assert!(
-        total <= WRITER_SETS + READERS + 10,
-        "compute ran too many times ({}); expected <= {} (writer_sets + readers + slack)",
-        total,
-        WRITER_SETS + READERS + 10
-    );
-}
-
-#[test]
-fn computing_during_dirty_walk_does_not_leak_stale_value() {
-    // Deterministic reproduction of the Computing-during-dirty-walk
-    // race via two barriers. This race existed in commits J through
-    // N and was fixed in commit P by a post-compute revision check
-    // inside run_compute: before transitioning from Computing to
-    // Clean, compare the current revision counter to the one
-    // recorded at compute start. If they differ, a writer landed a
-    // set during the compute, so the result is potentially stale
-    // and we transition to Dirty instead of Clean, forcing the
-    // next reader to retry the compute against the fresh inputs.
-    //
-    // The test threads are:
-    //   - compute thread: triggers rt.get(q), blocks inside the
-    //     compute closure on barrier A, then reads input, then
-    //     blocks on barrier B.
-    //   - writer thread: waits on barrier A so compute has started,
-    //     then calls rt.set(input, new_value) which bumps revision
-    //     and runs the dirty walk (which sees q in Computing state
-    //     and fails to mark it Dirty), then releases barrier B.
-    //   - main thread: after both join, reads q and asserts it
-    //     reflects the new input value, not the stale one.
-    //
-    // Without the fix, the compute finishes with the old input
-    // value, Release-stores Clean with the stale result, and the
-    // next reader sees Clean + stale. With the fix, the compute
-    // detects the revision bump, transitions to Dirty, and the
-    // next reader's get() triggers a retry that uses the new input.
-    use std::sync::Barrier;
-
-    let rt = Arc::new(Runtime::new());
-    let input = rt.create_input::<u64>(10);
-
-    // Barriers coordinate the two compute-side threads. Each is
-    // sized to 2 (compute thread plus writer thread).
-    let started_barrier = Arc::new(Barrier::new(2));
-    let set_complete_barrier = Arc::new(Barrier::new(2));
-    // Only the FIRST invocation of the compute closure blocks on
-    // the barriers. When commit P's fix detects the revision bump
-    // and marks the node Dirty, the next reader retries the
-    // closure; the retry must NOT block on the barriers because
-    // the writer thread has already finished and would never
-    // arrive. This counter tracks "is this the first call" with
-    // AtomicUsize so the closure stays Fn (not FnOnce).
-    let first_call = Arc::new(AtomicUsize::new(0));
-
-    let q = {
-        let started = started_barrier.clone();
-        let set_complete = set_complete_barrier.clone();
-        let first_call = first_call.clone();
-        rt.create_query::<u64, _>(move |rt| {
-            // Read the input first so this becomes a recorded dep.
-            let v = rt.get(input);
-            // Only the first invocation participates in the
-            // barrier dance. Retries (from the revision-bump
-            // detection in run_compute) just return the current
-            // input value without waiting.
-            if first_call.fetch_add(1, Ordering::SeqCst) == 0 {
-                // First call: signal the writer that our compute
-                // has started and we have already read the old
-                // input value.
-                started.wait();
-                // Block until the writer has called set and
-                // finished its (failed-to-mark-us-dirty) walk.
-                set_complete.wait();
-            }
-            // Produce the result based on whatever input value we
-            // read. On the first call v=10 (stale), on the retry
-            // v=20 (fresh).
-            v * 100
-        })
-    };
-
-    // Compute thread: runs the read that triggers the compute.
-    let compute_rt = rt.clone();
-    let compute_thread = thread::spawn(move || compute_rt.get(q));
-
-    // Writer thread: waits for compute to start, sets the input,
-    // then releases the compute.
-    let writer_rt = rt.clone();
-    let writer_started = started_barrier.clone();
-    let writer_set_complete = set_complete_barrier.clone();
-    let writer_thread = thread::spawn(move || {
-        writer_started.wait();
-        writer_rt.set(input, 20);
-        writer_set_complete.wait();
-    });
-
-    // Wait for the race to play out.
-    let first_result = compute_thread.join().expect("compute thread panicked");
-    writer_thread.join().expect("writer thread panicked");
-
-    // With commit P's fix: the stale first compute (v=10, produces
-    // 1000) is detected by the post-compute revision check inside
-    // run_compute, which transitions the node to Dirty instead of
-    // Clean. The outer `rt.get(q)` loop in the compute thread
-    // observes Dirty on its next iteration, retries the compute
-    // (this time with first_call >= 1 so the closure skips the
-    // barriers), reads the fresh input value 20, and produces
-    // 2000. The compute thread's `rt.get(q)` therefore returns the
-    // correct post-set value 2000, not the stale 1000.
-    //
-    // Without the fix: run_compute would have Release-stored Clean
-    // with the stale 1000 value. The compute thread's `rt.get(q)`
-    // would return 1000. The next reader would also see 1000. The
-    // assertion below would fail because first_result would be
-    // 1000, not 2000.
-    assert_eq!(
-        first_result, 2000,
-        "expected compute thread to observe the post-set value (2000) via \
-         the internal retry triggered by commit P's revision check; got {}. \
-         This failure indicates the Computing-during-dirty-walk race is \
-         NOT closed: the stale compute leaked into Clean state.",
-        first_result
-    );
-
-    // The closure should have run exactly twice: once with the
-    // stale input (v=10, barriers taken), once with the fresh
-    // input (v=20, barriers skipped via the first_call counter).
-    // Without the fix, it would have run exactly once.
-    let invocations = first_call.load(Ordering::SeqCst);
-    assert_eq!(
-        invocations, 2,
-        "closure should run twice (stale then retry); got {} invocations",
-        invocations
-    );
-
-    // Reading q again returns the (now-Clean) cached fresh value,
-    // no additional compute invocations.
-    let second_result = rt.get(q);
-    assert_eq!(second_result, 2000);
-    assert_eq!(
-        first_call.load(Ordering::SeqCst),
-        2,
-        "second read should not have invoked the closure"
-    );
-}
-
-#[test]
-fn observations_are_monotonic_in_writer_logical_time() {
-    // Stronger than "observation in valid set": every reader's
-    // sequence of observations must be non-decreasing in the
-    // writer's logical time. The writer increments the input
-    // monotonically from 0 upward; any reader that observes value
-    // N and then observes M < N is witnessing a stale read after
-    // a fresh read, which would be a linearizability violation.
-    //
-    // This is a real-time monotonic ordering check at the
-    // single-reader granularity. It catches reordering bugs the
-    // "valid set" check cannot: a stale value that happens to be
-    // in the valid set is not a valid set violation, but it is a
-    // monotonicity violation if it follows a fresher read.
-    //
-    // Cross-reader ordering is NOT checked here: two readers may
-    // observe the same value at different real times, or observe
-    // different monotonic chains, depending on their interleaving
-    // with the writer. Linearizability proper requires a global
-    // total order; this test checks the weaker per-reader variant
-    // that is both meaningful and cheap to verify.
-    const READERS: usize = 8;
-    const WRITER_ITERS: u64 = 5_000;
-
-    let rt = Arc::new(Runtime::new());
-    let input = rt.create_input::<u64>(0);
-
-    let stop = Arc::new(AtomicBool::new(false));
-
-    let reader_handles: Vec<_> = (0..READERS)
-        .map(|i| {
-            let rt = rt.clone();
-            let stop = stop.clone();
-            thread::spawn(move || {
-                let mut highest_seen: u64 = 0;
-                let mut observation_count: usize = 0;
-                while !stop.load(Ordering::Relaxed) {
-                    let v = rt.get(input);
-                    assert!(
-                        v >= highest_seen,
-                        "reader {} observed {} after having already observed {} \
-                         — monotonicity violation implies a stale read after a \
-                         fresh read (real-time linearizability broken)",
-                        i,
-                        v,
-                        highest_seen
-                    );
-                    highest_seen = v;
-                    observation_count += 1;
-                }
-                (highest_seen, observation_count)
-            })
-        })
-        .collect();
-
-    for i in 0..WRITER_ITERS {
-        rt.set(input, i);
-    }
-    stop.store(true, Ordering::Relaxed);
-
-    let results: Vec<_> = reader_handles
-        .into_iter()
-        .map(|h| h.join().expect("reader panicked"))
-        .collect();
-
-    // Sanity: every reader made progress and eventually saw a
-    // value close to the final writer value. We don't assert the
-    // exact final value because readers may stop reading before
-    // the very last set lands, but we do assert that the average
-    // highest-seen is in a sensible range.
-    let total_observations: usize = results.iter().map(|(_, c)| c).sum();
-    assert!(
-        total_observations > 0,
-        "expected readers to make at least some progress"
-    );
-    let max_observed = results.iter().map(|(h, _)| h).max().copied().unwrap_or(0);
-    assert!(
-        max_observed > 0,
-        "expected at least one reader to observe a non-initial value"
-    );
-}
-
-#[test]
-fn query_observations_are_monotonic_in_writer_logical_time() {
-    // Same invariant as above but through a query node, so the
-    // observation path goes through the reactive dirty walk and
-    // recompute machinery. The query returns `input * 1000 + 7`
-    // which is strictly monotonic in the input, so the reader
-    // can decode the input value from the query result and check
-    // monotonicity on that.
-    const READERS: usize = 6;
-    const WRITER_ITERS: u64 = 3_000;
-
-    let rt = Arc::new(Runtime::new());
-    let input = rt.create_input::<u64>(0);
-    let query = rt.create_query::<u64, _>(move |rt| rt.get(input) * 1000 + 7);
-
-    let stop = Arc::new(AtomicBool::new(false));
-
-    let reader_handles: Vec<_> = (0..READERS)
-        .map(|i| {
-            let rt = rt.clone();
-            let stop = stop.clone();
-            thread::spawn(move || {
-                let mut highest_seen: u64 = 7; // initial query value = 0*1000+7
-                while !stop.load(Ordering::Relaxed) {
-                    let v = rt.get(query);
-                    // Decode: v = input * 1000 + 7
-                    assert_eq!(
-                        v % 1000,
-                        7,
-                        "reader {} observed query value {} which does not match \
-                         the compute formula (input * 1000 + 7); torn read or \
-                         corrupted value",
-                        i,
-                        v
-                    );
-                    assert!(
-                        v >= highest_seen,
-                        "reader {} observed query value {} after having seen {} \
-                         — stale read after fresh read through the query path",
-                        i,
-                        v,
-                        highest_seen
-                    );
-                    highest_seen = v;
-                }
-                highest_seen
-            })
-        })
-        .collect();
-
-    for i in 0..WRITER_ITERS {
-        rt.set(input, i);
-    }
-    stop.store(true, Ordering::Relaxed);
-
-    for h in reader_handles {
-        h.join().expect("reader panicked");
-    }
-}
-
-#[test]
-fn multi_chain_observations_are_each_internally_monotonic() {
-    // Two independent chains, each with its own monotonic input
-    // and query. Verify per-chain monotonicity: a reader on chain
-    // A should never observe an A value go backward, and same for
-    // B. This extends the earlier "no cross-contamination" test
-    // from commit O with a real-time ordering check on top of
-    // the valid-set check.
-    const READERS_PER_CHAIN: usize = 3;
-    const WRITER_ITERS: u64 = 3_000;
-
-    let rt = Arc::new(Runtime::new());
-    let a_input = rt.create_input::<u64>(0);
-    let b_input = rt.create_input::<u64>(0);
-    let a_query = rt.create_query::<u64, _>(move |rt| rt.get(a_input) * 10);
-    let b_query = rt.create_query::<u64, _>(move |rt| rt.get(b_input) * 10 + 500_000_000);
-
-    let stop = Arc::new(AtomicBool::new(false));
-
-    let mut handles = Vec::new();
-    for i in 0..READERS_PER_CHAIN {
-        let rt = rt.clone();
-        let stop = stop.clone();
-        handles.push(thread::spawn(move || {
-            let mut highest: u64 = 0;
-            while !stop.load(Ordering::Relaxed) {
-                let v = rt.get(a_query);
-                // A values are input * 10, so < 500_000_000 for
-                // our input range. Catches cross-contamination.
-                assert!(
-                    v < 500_000_000,
-                    "A reader {} observed B-like value {}",
-                    i,
-                    v
-                );
-                assert!(
-                    v >= highest,
-                    "A reader {} monotonicity: {} < {}",
-                    i,
-                    v,
-                    highest
-                );
-                highest = v;
-            }
-        }));
-    }
-    for i in 0..READERS_PER_CHAIN {
-        let rt = rt.clone();
-        let stop = stop.clone();
-        handles.push(thread::spawn(move || {
-            let mut highest: u64 = 500_000_000; // b_query initial = 0*10 + 500M
-            while !stop.load(Ordering::Relaxed) {
-                let v = rt.get(b_query);
-                assert!(
-                    v >= 500_000_000,
-                    "B reader {} observed A-like value {}",
-                    i,
-                    v
-                );
-                assert!(
-                    v >= highest,
-                    "B reader {} monotonicity: {} < {}",
-                    i,
-                    v,
-                    highest
-                );
-                highest = v;
-            }
-        }));
-    }
-
-    for i in 0..WRITER_ITERS {
-        rt.set(a_input, i);
-        rt.set(b_input, i);
-    }
-    stop.store(true, Ordering::Relaxed);
-
-    for h in handles {
-        h.join().expect("reader panicked");
-    }
-}
-
-#[test]
-fn reader_threads_can_be_spawned_and_joined_repeatedly_on_same_runtime() {
-    // Correctness under repeated reader-thread lifetimes. Spawn a
-    // batch of readers, join them, set the input, spawn again.
-    // Ensures the TLS compute cache and COMPUTE_STACK are clean
-    // between reader lifetimes.
-    let rt = Arc::new(Runtime::new());
-    let input = rt.create_input::<u64>(1);
-    let query = rt.create_query::<u64, _>(move |rt| rt.get(input) + 1000);
-
-    for round in 0..5u64 {
-        rt.set(input, round);
-        let handles: Vec<_> = (0..4)
-            .map(|_| {
-                let rt = rt.clone();
-                thread::spawn(move || rt.get(query))
-            })
-            .collect();
-        for h in handles {
-            assert_eq!(h.join().unwrap(), round + 1000);
-        }
-    }
-}
diff --git a/crates/incr-concurrent/src/runtime_proptest.rs b/crates/incr-concurrent/src/runtime_proptest.rs
deleted file mode 100644
index 6a7eced..0000000
--- a/crates/incr-concurrent/src/runtime_proptest.rs
+++ /dev/null
@@ -1,210 +0,0 @@
-//! Proptest suite for the v2 Runtime.
-//!
-//! Mirrors `crates/incr-concurrent/tests/property.rs` (which targets v1)
-//! but runs against `v2::Runtime` through crate-private access. The
-//! goal is spec Gate 2: v2 passes the same property tests as v1 in
-//! single-threaded mode, establishing correctness equivalence
-//! between the two engines for every case the proptest suite can
-//! generate.
-//!
-//! Why in-crate rather than in `tests/`: the v2 module is
-//! `pub(crate)` until spec Gate 5, so external integration tests
-//! cannot see `v2::Runtime` or `v2::Incr`. Putting this proptest in
-//! a `#[cfg(test)]` submodule of `v2/` gives it direct crate-
-//! private access without exposing the v2 API publicly before it
-//! is ready.
-//!
-//! Collection operator proptests (`tests/collection_property.rs`)
-//! are deliberately not ported here. The collection API
-//! (`IncrCollection`, filter/map/count/reduce/sort/pairwise) is a
-//! separate piece of work that will be rewritten against v2 in its
-//! own chunk per the spec's section 3 scope notes.
-
-use super::handle::Incr;
-use super::runtime::Runtime;
-use proptest::prelude::*;
-
-/// Build a layered graph of the given shape, run it incrementally,
-/// then rebuild from scratch and compare results.
-///
-/// This is the same function body as `tests/property.rs::verify_
-/// incremental_matches_batch`, mechanically ported to use
-/// `v2::Runtime` and `v2::Incr<i64>`. The core correctness
-/// contract (incremental result equals batch recomputation result
-/// for every mutation sequence) is the property the proptest
-/// proves across thousands of generated graph shapes.
-fn verify_incremental_matches_batch(
-    num_inputs: usize,
-    input_values: Vec<i64>,
-    layers: Vec<Vec<(usize, usize)>>, // Each layer: vec of (dep_a_idx, dep_b_idx) pairs
-    mutations: Vec<(usize, i64)>,     // (input_index, new_value) pairs
-) {
-    assert!(num_inputs >= 2);
-    assert_eq!(input_values.len(), num_inputs);
-
-    let rt = Runtime::new();
-    let mut all_nodes: Vec<Incr<i64>> = Vec::new();
-
-    // Create inputs.
-    for &val in &input_values {
-        let node = rt.create_input::<i64>(val);
-        all_nodes.push(node);
-    }
-
-    // Create compute layers. Each query sums two existing nodes via
-    // indices into the running `all_nodes` list, with modular
-    // indexing so proptest-generated offsets always pick valid
-    // predecessors.
-    for layer in &layers {
-        let mut layer_nodes = Vec::new();
-        for &(dep_a_rel, dep_b_rel) in layer {
-            let available = all_nodes.len();
-            if available < 2 {
-                continue;
-            }
-            let idx_a = dep_a_rel % available;
-            let idx_b = dep_b_rel % available;
-            let a = all_nodes[idx_a];
-            let b = all_nodes[idx_b];
-            let node = rt.create_query::<i64, _>(move |rt| rt.get(a).wrapping_add(rt.get(b)));
-            layer_nodes.push(node);
-        }
-        all_nodes.extend(layer_nodes);
-    }
-
-    if all_nodes.len() <= num_inputs {
-        return; // No compute nodes generated
-    }
-
-    // Read all compute nodes once to force the initial compute.
-    let last = *all_nodes.last().unwrap();
-    let _ = rt.get(last);
-
-    // Apply mutations. Each mutation flips an input value; the
-    // dirty walk propagates and subsequent reads trigger recomputes
-    // with early cutoff on any node whose value happens to be
-    // unchanged.
-    for &(input_rel, new_val) in &mutations {
-        let idx = input_rel % num_inputs;
-        rt.set(all_nodes[idx], new_val);
-    }
-
-    // Get incremental result.
-    let incremental_result = rt.get(last);
-
-    let mut final_values = input_values.clone();
-    for &(input_rel, new_val) in &mutations {
-        let idx = input_rel % num_inputs;
-        final_values[idx] = new_val;
-    }
-
-    let rt2 = Runtime::new();
-    let mut all_nodes2: Vec<Incr<i64>> = Vec::new();
-
-    for &val in &final_values {
-        let node = rt2.create_input::<i64>(val);
-        all_nodes2.push(node);
-    }
-
-    for layer in &layers {
-        let mut layer_nodes = Vec::new();
-        for &(dep_a_rel, dep_b_rel) in layer {
-            let available = all_nodes2.len();
-            if available < 2 {
-                continue;
-            }
-            let idx_a = dep_a_rel % available;
-            let idx_b = dep_b_rel % available;
-            let a = all_nodes2[idx_a];
-            let b = all_nodes2[idx_b];
-            let node = rt2.create_query::<i64, _>(move |rt| rt.get(a).wrapping_add(rt.get(b)));
-            layer_nodes.push(node);
-        }
-        all_nodes2.extend(layer_nodes);
-    }
-
-    let last2 = *all_nodes2.last().unwrap();
-    let batch_result = rt2.get(last2);
-
-    assert_eq!(
-        incremental_result,
-        batch_result,
-        "v2: Incremental result {} != batch result {} with {} inputs, {} layers, {} mutations",
-        incremental_result,
-        batch_result,
-        num_inputs,
-        layers.len(),
-        mutations.len()
-    );
-}
-
-proptest! {
-    #![proptest_config(ProptestConfig::with_cases(2000))]
-
-    /// The main correctness property: for every generated graph
-    /// shape and mutation sequence, v2's incremental result must
-    /// equal a from-scratch rebuild with the final input values.
-    /// This covers thousands of random dep topologies (diamonds,
-    /// chains, wide fan-out, wide fan-in, mixed) and checks the
-    /// full commit H-M stack: dep tracking, dirty walk, early
-    /// cutoff, and dynamic dep updates.
-    #[test]
-    fn v2_incremental_matches_batch(
-        num_inputs in 2_usize..20,
-        input_values in prop::collection::vec(-1000_i64..1000, 2..20),
-        layers in prop::collection::vec(
-            prop::collection::vec((0_usize..100, 0_usize..100), 1..5),
-            1..8
-        ),
-        mutations in prop::collection::vec((0_usize..100, -1000_i64..1000), 1..20),
-    ) {
-        let num_inputs = num_inputs.min(input_values.len()).max(2);
-        let input_values = input_values[..num_inputs].to_vec();
-        verify_incremental_matches_batch(num_inputs, input_values, layers, mutations);
-    }
-}
-
-/// Specific regression case from the v1 proptest suite: a shallow
-/// diamond where one mutation is a no-op (same value) and another
-/// actually changes. Exercises the early cutoff fast path combined
-/// with a live dirty walk. This case was originally a shrunk
-/// failure from the v1 proptest; keeping the concrete case as a
-/// named test catches regressions without waiting for proptest to
-/// re-discover the shape.
-#[test]
-fn v2_property_specific_diamond_cutoff() {
-    verify_incremental_matches_batch(
-        3,
-        vec![10, 20, 30],
-        vec![
-            vec![(0, 1), (1, 2)], // Layer 1: node3=in0+in1, node4=in1+in2
-            vec![(0, 1)],         // Layer 2: node5=node3+node4
-        ],
-        vec![(0, 10), (1, 25)], // Change input 0 to same value (no-op), change input 1
-    );
-}
-
-/// Deeper chain regression case from the v1 proptest suite. Ten
-/// layers, each a single query that sums two earlier nodes. Three
-/// mutations at different depths exercise the dirty walk's
-/// transitive reach.
-#[test]
-fn v2_property_deep_chain() {
-    verify_incremental_matches_batch(
-        5,
-        vec![1, 2, 3, 4, 5],
-        vec![
-            vec![(0, 1)],
-            vec![(2, 0)],
-            vec![(0, 1)],
-            vec![(1, 0)],
-            vec![(0, 1)],
-            vec![(2, 0)],
-            vec![(0, 1)],
-            vec![(1, 0)],
-            vec![(0, 1)],
-            vec![(2, 0)],
-        ],
-        vec![(0, 100), (2, 50), (4, 75)],
-    );
-}
diff --git a/crates/incr-concurrent/src/sorted_collection.rs b/crates/incr-concurrent/src/sorted_collection.rs
deleted file mode 100644
index 56b4c9b..0000000
--- a/crates/incr-concurrent/src/sorted_collection.rs
+++ /dev/null
@@ -1,460 +0,0 @@
-use std::collections::HashMap;
-use std::hash::Hash;
-use std::sync::atomic::{AtomicU64, AtomicUsize, Ordering};
-use std::sync::{Arc, RwLock};
-
-use super::collection::{CollectionLog, Delta, IncrCollection};
-use super::handle::Incr;
-use super::runtime::Runtime;
-
-#[derive(Clone, Debug)]
-pub enum SortDelta<T> {
-    Inserted { index: usize, value: T },
-    Removed { index: usize, value: T },
-}
-
-pub struct SortedCollection<T: Clone + Send + Sync + 'static> {
-    pub(crate) ordered_values: Arc<RwLock<Vec<T>>>,
-    pub(crate) pending_deltas: Arc<RwLock<Vec<SortDelta<T>>>>,
-    pub(crate) version_node: Incr<u64>,
-}
-
-impl<T: Clone + Send + Sync + 'static> SortedCollection<T> {
-    pub fn entries(&self) -> Vec<T> {
-        self.ordered_values.read().unwrap().clone()
-    }
-
-    pub fn version_node(&self) -> Incr<u64> {
-        self.version_node
-    }
-}
-
-impl<T> IncrCollection<T>
-where
-    T: Clone + PartialEq + Eq + Hash + Send + Sync + 'static,
-{
-    pub fn sort_by_key<K, F>(&self, rt: &Runtime, key_fn: F) -> SortedCollection<T>
-    where
-        K: Ord + Clone + Send + Sync + 'static,
-        F: Fn(&T) -> K + Send + Sync + 'static,
-    {
-        let upstream_log = self.log.clone();
-        let upstream_ver = self.version_node;
-        let last_idx = Arc::new(AtomicUsize::new(0));
-
-        let keys: Arc<RwLock<Vec<K>>> = Arc::new(RwLock::new(Vec::new()));
-        let key_cache: Arc<RwLock<HashMap<T, K>>> = Arc::new(RwLock::new(HashMap::new()));
-
-        let ordered_values: Arc<RwLock<Vec<T>>> = Arc::new(RwLock::new(Vec::new()));
-        let pending_deltas: Arc<RwLock<Vec<SortDelta<T>>>> = Arc::new(RwLock::new(Vec::new()));
-
-        let keys_ref = keys.clone();
-        let key_cache_ref = key_cache.clone();
-        let ordered_values_ref = ordered_values.clone();
-        let pending_deltas_ref = pending_deltas.clone();
-
-        let version_counter = Arc::new(AtomicU64::new(0));
-        let version_counter_ref = version_counter.clone();
-
-        let version_node = rt.create_query(move |rt| -> u64 {
-            let _upstream_v = rt.get(upstream_ver);
-
-            let upstream = upstream_log.read().unwrap();
-            let start = last_idx.load(Ordering::Relaxed);
-            if start >= upstream.deltas.len() {
-                return version_counter_ref.load(Ordering::Relaxed);
-            }
-
-            let mut ks = keys_ref.write().unwrap();
-            let mut kc = key_cache_ref.write().unwrap();
-            let mut vals = ordered_values_ref.write().unwrap();
-            let mut deltas = pending_deltas_ref.write().unwrap();
-
-            for vd in &upstream.deltas[start..] {
-                match &vd.delta {
-                    Delta::Insert(x) => {
-                        let k = key_fn(x);
-                        let pos = ks
-                            .binary_search_by(|probe| probe.cmp(&k))
-                            .unwrap_or_else(|pos| pos);
-                        ks.insert(pos, k.clone());
-                        vals.insert(pos, x.clone());
-                        kc.insert(x.clone(), k);
-                        deltas.push(SortDelta::Inserted {
-                            index: pos,
-                            value: x.clone(),
-                        });
-                    }
-                    Delta::Delete(x) => {
-                        if let Some(k) = kc.remove(x) {
-                            let start_pos = ks
-                                .binary_search_by(|probe| probe.cmp(&k))
-                                .unwrap_or_else(|pos| pos);
-                            let mut pos = start_pos;
-                            while pos < vals.len() && ks[pos] == k {
-                                if vals[pos] == *x {
-                                    break;
-                                }
-                                pos += 1;
-                            }
-                            if pos < vals.len() && vals[pos] == *x {
-                                ks.remove(pos);
-                                vals.remove(pos);
-                                deltas.push(SortDelta::Removed {
-                                    index: pos,
-                                    value: x.clone(),
-                                });
-                            }
-                        }
-                    }
-                }
-            }
-
-            last_idx.store(upstream.deltas.len(), Ordering::Relaxed);
-            version_counter_ref.fetch_add(1, Ordering::Relaxed) + 1
-        });
-
-        SortedCollection {
-            ordered_values,
-            pending_deltas,
-            version_node,
-        }
-    }
-}
-
-impl<T> SortedCollection<T>
-where
-    T: Clone + PartialEq + Eq + Hash + Send + Sync + 'static,
-{
-    pub fn window(&self, rt: &Runtime, size: usize) -> IncrCollection<Vec<T>>
-    where
-        T: Eq + Hash,
-    {
-        let ordered_values = self.ordered_values.clone();
-        let sorted_ver = self.version_node;
-        let output_log = Arc::new(RwLock::new(CollectionLog::<Vec<T>>::new()));
-        let output_log_ref = output_log.clone();
-        let prev_windows: Arc<RwLock<Vec<Vec<T>>>> = Arc::new(RwLock::new(Vec::new()));
-        let prev_ref = prev_windows.clone();
-
-        let version_node = rt.create_query(move |rt| -> u64 {
-            let _sv = rt.get(sorted_ver);
-
-            let vals = ordered_values.read().unwrap();
-            let mut output = output_log_ref.write().unwrap();
-            let mut prev = prev_ref.write().unwrap();
-
-            for w in prev.drain(..) {
-                output.delete(&w);
-            }
-
-            if vals.len() >= size {
-                for i in 0..=(vals.len() - size) {
-                    let w: Vec<T> = vals[i..i + size].to_vec();
-                    output.insert(w.clone());
-                    prev.push(w);
-                }
-            }
-
-            output.version
-        });
-
-        IncrCollection {
-            log: output_log,
-            version_node,
-        }
-    }
-
-    pub fn pairwise(&self, rt: &Runtime) -> IncrCollection<(T, T)> {
-        let sorted_deltas = self.pending_deltas.clone();
-        let sorted_ver = self.version_node;
-        let last_delta_idx = Arc::new(AtomicUsize::new(0));
-
-        let shadow: Arc<RwLock<Vec<T>>> = Arc::new(RwLock::new(Vec::new()));
-        let shadow_ref = shadow.clone();
-
-        let output_log = Arc::new(RwLock::new(CollectionLog::new()));
-        let output_log_ref = output_log.clone();
-
-        let version_node = rt.create_query(move |rt| -> u64 {
-            let _sorted_v = rt.get(sorted_ver);
-
-            let deltas = sorted_deltas.read().unwrap();
-            let start = last_delta_idx.load(Ordering::Relaxed);
-            if start >= deltas.len() {
-                return output_log_ref.read().unwrap().version;
-            }
-
-            let mut shadow = shadow_ref.write().unwrap();
-            let mut output = output_log_ref.write().unwrap();
-
-            for delta in &deltas[start..] {
-                match delta {
-                    SortDelta::Inserted { index, value } => {
-                        let i = *index;
-                        let n_before = shadow.len();
-
-                        if n_before == 0 {
-                            // first element, no pairs
-                        } else if i == 0 {
-                            output.insert((value.clone(), shadow[0].clone()));
-                        } else if i == n_before {
-                            output.insert((shadow[n_before - 1].clone(), value.clone()));
-                        } else {
-                            let left = shadow[i - 1].clone();
-                            let right = shadow[i].clone();
-                            output.delete(&(left.clone(), right.clone()));
-                            output.insert((left, value.clone()));
-                            output.insert((value.clone(), right));
-                        }
-
-                        shadow.insert(i, value.clone());
-                    }
-                    SortDelta::Removed { index, value } => {
-                        let i = *index;
-                        shadow.remove(i);
-                        let n_after = shadow.len();
-
-                        if n_after == 0 {
-                            // was the only element
-                        } else if i == 0 {
-                            output.delete(&(value.clone(), shadow[0].clone()));
-                        } else if i == n_after {
-                            output.delete(&(shadow[n_after - 1].clone(), value.clone()));
-                        } else {
-                            let left = shadow[i - 1].clone();
-                            let right = shadow[i].clone();
-                            output.delete(&(left.clone(), value.clone()));
-                            output.delete(&(value.clone(), right.clone()));
-                            output.insert((left, right));
-                        }
-                    }
-                }
-            }
-
-            last_delta_idx.store(deltas.len(), Ordering::Relaxed);
-            output.version
-        });
-
-        IncrCollection {
-            log: output_log,
-            version_node,
-        }
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn sort_basic_ordering() {
-        let rt = Runtime::new();
-        let col = rt.create_collection::<i64>();
-        let sorted = col.sort_by_key(&rt, |x: &i64| *x);
-
-        col.insert(&rt, 30);
-        col.insert(&rt, 10);
-        col.insert(&rt, 20);
-
-        let _ = rt.get(sorted.version_node);
-        assert_eq!(sorted.entries(), vec![10, 20, 30]);
-    }
-
-    #[test]
-    fn sort_insert_maintains_order() {
-        let rt = Runtime::new();
-        let col = rt.create_collection::<i64>();
-        let sorted = col.sort_by_key(&rt, |x: &i64| *x);
-
-        col.insert(&rt, 10);
-        col.insert(&rt, 30);
-        let _ = rt.get(sorted.version_node);
-        assert_eq!(sorted.entries(), vec![10, 30]);
-
-        col.insert(&rt, 20);
-        let _ = rt.get(sorted.version_node);
-        assert_eq!(sorted.entries(), vec![10, 20, 30]);
-    }
-
-    #[test]
-    fn sort_delete_maintains_order() {
-        let rt = Runtime::new();
-        let col = rt.create_collection::<i64>();
-        let sorted = col.sort_by_key(&rt, |x: &i64| *x);
-
-        col.insert(&rt, 10);
-        col.insert(&rt, 20);
-        col.insert(&rt, 30);
-        let _ = rt.get(sorted.version_node);
-
-        col.delete(&rt, &20);
-        let _ = rt.get(sorted.version_node);
-        assert_eq!(sorted.entries(), vec![10, 30]);
-    }
-
-    #[test]
-    fn sort_by_custom_key() {
-        let rt = Runtime::new();
-        let col = rt.create_collection::<(String, i64)>();
-        let sorted = col.sort_by_key(&rt, |x: &(String, i64)| x.1);
-
-        col.insert(&rt, ("bob".to_string(), 30));
-        col.insert(&rt, ("alice".to_string(), 10));
-        col.insert(&rt, ("carol".to_string(), 20));
-
-        let _ = rt.get(sorted.version_node);
-        let names: Vec<String> = sorted.entries().into_iter().map(|e| e.0).collect();
-        assert_eq!(names, vec!["alice", "carol", "bob"]);
-    }
-
-    #[test]
-    fn sort_empty_collection() {
-        let rt = Runtime::new();
-        let col = rt.create_collection::<i64>();
-        let sorted = col.sort_by_key(&rt, |x: &i64| *x);
-
-        let _ = rt.get(sorted.version_node);
-        assert_eq!(sorted.entries(), Vec::<i64>::new());
-    }
-
-    #[test]
-    fn window_basic() {
-        let rt = Runtime::new();
-        let col = rt.create_collection::<i64>();
-        let sorted = col.sort_by_key(&rt, |x: &i64| *x);
-        let wins = sorted.window(&rt, 3);
-
-        col.insert(&rt, 10);
-        col.insert(&rt, 20);
-        col.insert(&rt, 30);
-        col.insert(&rt, 40);
-        col.insert(&rt, 50);
-
-        let _ = rt.get(wins.version_node);
-        let elems = wins.elements();
-        assert_eq!(elems.len(), 3);
-        assert!(elems.contains(&vec![10, 20, 30]));
-        assert!(elems.contains(&vec![20, 30, 40]));
-        assert!(elems.contains(&vec![30, 40, 50]));
-    }
-
-    #[test]
-    fn window_smaller_than_size() {
-        let rt = Runtime::new();
-        let col = rt.create_collection::<i64>();
-        let sorted = col.sort_by_key(&rt, |x: &i64| *x);
-        let wins = sorted.window(&rt, 3);
-
-        col.insert(&rt, 10);
-        col.insert(&rt, 20);
-
-        let _ = rt.get(wins.version_node);
-        assert_eq!(wins.elements().len(), 0);
-    }
-
-    #[test]
-    fn window_exact_size() {
-        let rt = Runtime::new();
-        let col = rt.create_collection::<i64>();
-        let sorted = col.sort_by_key(&rt, |x: &i64| *x);
-        let wins = sorted.window(&rt, 3);
-
-        col.insert(&rt, 10);
-        col.insert(&rt, 20);
-        col.insert(&rt, 30);
-
-        let _ = rt.get(wins.version_node);
-        let elems = wins.elements();
-        assert_eq!(elems.len(), 1);
-        assert!(elems.contains(&vec![10, 20, 30]));
-    }
-
-    #[test]
-    fn pairwise_basic() {
-        let rt = Runtime::new();
-        let col = rt.create_collection::<i64>();
-        let sorted = col.sort_by_key(&rt, |x: &i64| *x);
-        let pairs = sorted.pairwise(&rt);
-
-        col.insert(&rt, 10);
-        col.insert(&rt, 20);
-        col.insert(&rt, 30);
-
-        let _ = rt.get(pairs.version_node);
-        let elems = pairs.elements();
-        assert_eq!(elems.len(), 2);
-        assert!(elems.contains(&(10, 20)));
-        assert!(elems.contains(&(20, 30)));
-    }
-
-    #[test]
-    fn pairwise_single_element() {
-        let rt = Runtime::new();
-        let col = rt.create_collection::<i64>();
-        let sorted = col.sort_by_key(&rt, |x: &i64| *x);
-        let pairs = sorted.pairwise(&rt);
-
-        col.insert(&rt, 10);
-        let _ = rt.get(pairs.version_node);
-        assert_eq!(pairs.elements().len(), 0);
-    }
-
-    #[test]
-    fn pairwise_insert_middle() {
-        let rt = Runtime::new();
-        let col = rt.create_collection::<i64>();
-        let sorted = col.sort_by_key(&rt, |x: &i64| *x);
-        let pairs = sorted.pairwise(&rt);
-
-        col.insert(&rt, 10);
-        col.insert(&rt, 30);
-        let _ = rt.get(pairs.version_node);
-        assert!(pairs.elements().contains(&(10, 30)));
-
-        col.insert(&rt, 20);
-        let _ = rt.get(pairs.version_node);
-        let elems = pairs.elements();
-        assert_eq!(elems.len(), 2);
-        assert!(elems.contains(&(10, 20)));
-        assert!(elems.contains(&(20, 30)));
-        assert!(!elems.contains(&(10, 30)));
-    }
-
-    #[test]
-    fn pairwise_delete_middle() {
-        let rt = Runtime::new();
-        let col = rt.create_collection::<i64>();
-        let sorted = col.sort_by_key(&rt, |x: &i64| *x);
-        let pairs = sorted.pairwise(&rt);
-
-        col.insert(&rt, 10);
-        col.insert(&rt, 20);
-        col.insert(&rt, 30);
-        let _ = rt.get(pairs.version_node);
-
-        col.delete(&rt, &20);
-        let _ = rt.get(pairs.version_node);
-        let elems = pairs.elements();
-        assert_eq!(elems.len(), 1);
-        assert!(elems.contains(&(10, 30)));
-    }
-
-    #[test]
-    fn pairwise_delete_to_empty() {
-        let rt = Runtime::new();
-        let col = rt.create_collection::<i64>();
-        let sorted = col.sort_by_key(&rt, |x: &i64| *x);
-        let pairs = sorted.pairwise(&rt);
-
-        col.insert(&rt, 10);
-        col.insert(&rt, 20);
-        let _ = rt.get(pairs.version_node);
-        assert_eq!(pairs.elements().len(), 1);
-
-        col.delete(&rt, &10);
-        col.delete(&rt, &20);
-        let _ = rt.get(pairs.version_node);
-        assert_eq!(pairs.elements().len(), 0);
-    }
-}
diff --git a/crates/incr-concurrent/src/state.rs b/crates/incr-concurrent/src/state.rs
deleted file mode 100644
index f3ae0ed..0000000
--- a/crates/incr-concurrent/src/state.rs
+++ /dev/null
@@ -1,333 +0,0 @@
-//! Node state machine.
-//!
-//! A node's lifecycle is governed by a small atomic state machine. The states
-//! and their transitions are specified in section 7 of the concurrent core
-//! rewrite spec. This file implements the state enum, the atomic state cell,
-//! and helpers for the transition patterns used by the runtime.
-//!
-//! ## States
-//!
-//! - [`NodeState::New`] — node exists but has never been computed.
-//! - [`NodeState::Dirty`] — needs recomputation because a dependency changed.
-//! - [`NodeState::Computing`] — a thread is actively running the compute function.
-//! - [`NodeState::Clean`] — value is current and readable.
-//! - [`NodeState::Failed`] — last compute returned an error or panicked.
-//!
-//! ## Transitions and ordering
-//!
-//! Transitions into `Computing` happen only via CAS, guaranteeing at most one
-//! thread computes a given node at a time. Transitions out of `Computing`
-//! (to `Clean` or `Failed`) use `Release` ordering to publish the compute's
-//! writes (value, deps, timestamps) to readers who Acquire-load the state.
-//! Transitions from `Clean` to `Dirty` (by the writer's dirty walk) also use
-//! `Release` ordering so that readers observing `Dirty` see the revision bump
-//! that caused the transition.
-
-use std::sync::atomic::{AtomicU8, Ordering};
-
-/// The lifecycle state of a node.
-///
-/// Stored as a `u8` so it fits in a single byte and can be represented
-/// compactly inside an atomic cell. The numeric values are load-bearing
-/// for the `AtomicNodeState` compare-and-swap helpers; do not reorder.
-#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
-#[repr(u8)]
-pub(crate) enum NodeState {
-    /// Created but never computed. First reader will CAS to Computing.
-    New = 0,
-    /// A dependency has changed; the value is stale. Next reader recomputes.
-    Dirty = 1,
-    /// A thread is currently running this node's compute function. Other
-    /// readers must wait for it to transition to Clean or Failed.
-    Computing = 2,
-    /// The value matches the current dependencies and is safe to read.
-    Clean = 3,
-    /// The last compute panicked or returned an error. The node has a
-    /// failure payload stored separately. Readers of a Failed node see
-    /// the error. Failed transitions to Dirty if a dependency changes.
-    Failed = 4,
-}
-
-impl NodeState {
-    /// Decode a raw `u8` into a `NodeState`. Panics on unknown values to
-    /// catch memory corruption early.
-    #[inline]
-    fn from_u8(v: u8) -> Self {
-        match v {
-            0 => Self::New,
-            1 => Self::Dirty,
-            2 => Self::Computing,
-            3 => Self::Clean,
-            4 => Self::Failed,
-            _ => panic!("invalid NodeState value: {}", v),
-        }
-    }
-}
-
-/// Atomic cell holding a [`NodeState`].
-///
-/// This is the single source of truth for a node's lifecycle. All transitions
-/// happen through the methods on this type, which encode the correct memory
-/// ordering for each case. Direct access to the underlying `AtomicU8` is
-/// intentionally not exposed; the transition patterns are the API.
-#[derive(Debug)]
-pub(crate) struct AtomicNodeState {
-    cell: AtomicU8,
-}
-
-impl AtomicNodeState {
-    /// Create a new state cell initialized to `state`.
-    pub(crate) fn new(state: NodeState) -> Self {
-        Self {
-            cell: AtomicU8::new(state as u8),
-        }
-    }
-
-    /// Load the current state with `Acquire` ordering.
-    ///
-    /// This is the correct load for readers on the hot path: if the returned
-    /// state is `Clean`, the Acquire synchronizes with the Release store
-    /// that transitioned the node to Clean, so subsequent Relaxed reads of
-    /// the node's value, deps, and timestamps are guaranteed to observe
-    /// the writes that happened before that transition.
-    #[inline]
-    pub(crate) fn load_acquire(&self) -> NodeState {
-        NodeState::from_u8(self.cell.load(Ordering::Acquire))
-    }
-
-    /// Load the current state with `Relaxed` ordering.
-    ///
-    /// Use this only when no synchronization with other fields is required,
-    /// for example for debug assertions, diagnostics, or when the caller
-    /// has already established happens-before via another Acquire load.
-    /// Do not use it on the hot path before reading a node's value.
-    #[inline]
-    #[allow(dead_code)]
-    pub(crate) fn load_relaxed(&self) -> NodeState {
-        NodeState::from_u8(self.cell.load(Ordering::Relaxed))
-    }
-
-    /// Store a new state with `Release` ordering.
-    ///
-    /// Use this when transitioning out of `Computing` (to `Clean` or `Failed`)
-    /// after writing the node's value, deps, and timestamps. The Release
-    /// publishes those Relaxed writes to readers who Acquire-load the state.
-    ///
-    /// Also used for transitioning from `Clean` to `Dirty` during the
-    /// writer's dirty propagation walk, so readers observing `Dirty` see
-    /// the revision bump.
-    #[inline]
-    pub(crate) fn store_release(&self, state: NodeState) {
-        self.cell.store(state as u8, Ordering::Release);
-    }
-
-    /// Attempt to transition from `expected` to `new` via compare-and-swap.
-    ///
-    /// Returns `Ok(())` if the transition succeeded (this thread now owns
-    /// whatever invariant `new` represents), or `Err(observed)` with the
-    /// state we actually observed if the CAS failed.
-    ///
-    /// Success uses `AcqRel` (Acquire to synchronize with the prior state's
-    /// Release, Release to publish this transition). Failure uses `Acquire`
-    /// so the caller sees the current state coherently with other fields.
-    #[inline]
-    pub(crate) fn try_transition(
-        &self,
-        expected: NodeState,
-        new: NodeState,
-    ) -> Result<(), NodeState> {
-        match self.cell.compare_exchange(
-            expected as u8,
-            new as u8,
-            Ordering::AcqRel,
-            Ordering::Acquire,
-        ) {
-            Ok(_) => Ok(()),
-            Err(observed) => Err(NodeState::from_u8(observed)),
-        }
-    }
-
-    /// Attempt to transition to `Computing` from any state that permits it.
-    ///
-    /// A reader encountering a node that needs recomputation uses this to
-    /// claim the right to run the compute function. Valid source states
-    /// are `New` (first computation) and `Dirty` (recomputation after a
-    /// dependency changed). `Failed` is NOT a valid source because a Failed
-    /// node stays Failed until the writer's dirty walk transitions it to
-    /// Dirty first.
-    ///
-    /// Returns `Ok(())` if this thread now owns compute, or `Err(observed)`
-    /// if the state was something else (Clean, Computing, or Failed).
-    #[inline]
-    pub(crate) fn try_claim_compute(&self) -> Result<(), NodeState> {
-        // Try the two valid source states in order of expected likelihood.
-        // Dirty is more common than New in steady state.
-        if self
-            .try_transition(NodeState::Dirty, NodeState::Computing)
-            .is_ok()
-        {
-            return Ok(());
-        }
-        self.try_transition(NodeState::New, NodeState::Computing)
-    }
-}
-
-// The type is `Send + Sync` automatically because `AtomicU8` is.
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use std::sync::Arc;
-    use std::thread;
-
-    #[test]
-    fn new_state_starts_at_new() {
-        let state = AtomicNodeState::new(NodeState::New);
-        assert_eq!(state.load_acquire(), NodeState::New);
-    }
-
-    #[test]
-    fn store_release_updates_state() {
-        let state = AtomicNodeState::new(NodeState::New);
-        state.store_release(NodeState::Clean);
-        assert_eq!(state.load_acquire(), NodeState::Clean);
-    }
-
-    #[test]
-    fn try_transition_from_expected_succeeds() {
-        let state = AtomicNodeState::new(NodeState::Dirty);
-        let result = state.try_transition(NodeState::Dirty, NodeState::Computing);
-        assert!(result.is_ok());
-        assert_eq!(state.load_acquire(), NodeState::Computing);
-    }
-
-    #[test]
-    fn try_transition_from_unexpected_fails() {
-        let state = AtomicNodeState::new(NodeState::Clean);
-        let result = state.try_transition(NodeState::Dirty, NodeState::Computing);
-        assert_eq!(result, Err(NodeState::Clean));
-        // State must be unchanged
-        assert_eq!(state.load_acquire(), NodeState::Clean);
-    }
-
-    #[test]
-    fn try_claim_compute_from_new() {
-        let state = AtomicNodeState::new(NodeState::New);
-        assert!(state.try_claim_compute().is_ok());
-        assert_eq!(state.load_acquire(), NodeState::Computing);
-    }
-
-    #[test]
-    fn try_claim_compute_from_dirty() {
-        let state = AtomicNodeState::new(NodeState::Dirty);
-        assert!(state.try_claim_compute().is_ok());
-        assert_eq!(state.load_acquire(), NodeState::Computing);
-    }
-
-    #[test]
-    fn try_claim_compute_from_clean_fails() {
-        let state = AtomicNodeState::new(NodeState::Clean);
-        let result = state.try_claim_compute();
-        // try_claim_compute tries Dirty first, then New. Both fail from Clean.
-        // The returned Err carries the observed state from the last attempt
-        // (the New→Computing CAS), which is Clean because that is what the
-        // CAS actually saw.
-        assert_eq!(result, Err(NodeState::Clean));
-        // State must be unchanged.
-        assert_eq!(state.load_acquire(), NodeState::Clean);
-    }
-
-    #[test]
-    fn try_claim_compute_from_computing_fails() {
-        let state = AtomicNodeState::new(NodeState::Computing);
-        let result = state.try_claim_compute();
-        assert!(result.is_err());
-        assert_eq!(state.load_acquire(), NodeState::Computing);
-    }
-
-    #[test]
-    fn try_claim_compute_from_failed_fails() {
-        let state = AtomicNodeState::new(NodeState::Failed);
-        let result = state.try_claim_compute();
-        assert!(result.is_err());
-        assert_eq!(state.load_acquire(), NodeState::Failed);
-    }
-
-    #[test]
-    fn concurrent_compute_claim_exactly_one_winner() {
-        // This is the critical concurrency invariant: when many threads race
-        // to claim compute on the same dirty node, exactly one succeeds.
-        const THREADS: usize = 16;
-        const ROUNDS: usize = 1000;
-
-        for _ in 0..ROUNDS {
-            let state = Arc::new(AtomicNodeState::new(NodeState::Dirty));
-            let winners = Arc::new(std::sync::atomic::AtomicUsize::new(0));
-
-            let handles: Vec<_> = (0..THREADS)
-                .map(|_| {
-                    let state = state.clone();
-                    let winners = winners.clone();
-                    thread::spawn(move || {
-                        if state.try_claim_compute().is_ok() {
-                            winners.fetch_add(1, Ordering::Relaxed);
-                        }
-                    })
-                })
-                .collect();
-
-            for h in handles {
-                h.join().unwrap();
-            }
-
-            assert_eq!(
-                winners.load(Ordering::Relaxed),
-                1,
-                "expected exactly one thread to claim compute, got {}",
-                winners.load(Ordering::Relaxed)
-            );
-            assert_eq!(state.load_acquire(), NodeState::Computing);
-        }
-    }
-
-    #[test]
-    fn release_acquire_synchronizes_with_sibling_data() {
-        // This test verifies the core ordering invariant the runtime depends on:
-        // when a writer transitions state to Clean with Release, a reader that
-        // observes Clean via Acquire load also sees the sibling data the writer
-        // wrote with Relaxed stores before the transition.
-        use std::sync::atomic::AtomicU64;
-
-        const ROUNDS: usize = 10_000;
-
-        for round in 0..ROUNDS {
-            let state = Arc::new(AtomicNodeState::new(NodeState::New));
-            let value = Arc::new(AtomicU64::new(0));
-
-            let writer_state = state.clone();
-            let writer_value = value.clone();
-            let writer = thread::spawn(move || {
-                // Simulate compute: write value Relaxed, then state Release.
-                writer_value.store(round as u64 + 1, Ordering::Relaxed);
-                writer_state.store_release(NodeState::Clean);
-            });
-
-            // Reader spins until it sees Clean, then checks value.
-            loop {
-                if state.load_acquire() == NodeState::Clean {
-                    let seen = value.load(Ordering::Relaxed);
-                    assert_eq!(
-                        seen,
-                        round as u64 + 1,
-                        "reader observed Clean state but stale value (round {})",
-                        round
-                    );
-                    break;
-                }
-                std::hint::spin_loop();
-            }
-
-            writer.join().unwrap();
-        }
-    }
-}
diff --git a/crates/incr-concurrent/src/value.rs b/crates/incr-concurrent/src/value.rs
deleted file mode 100644
index 69c8eb3..0000000
--- a/crates/incr-concurrent/src/value.rs
+++ /dev/null
@@ -1,356 +0,0 @@
-//! Type dispatch for the arena hierarchy.
-//!
-//! `Value` is the crate-private trait that decides which concrete
-//! arena a type lives in. Primitive types (u32, i32, u64, i64, f32,
-//! f64, bool) route to [`AtomicPrimitiveArena`] where reads are a
-//! single atomic load and writes are tear-free. Non-primitive types
-//! (String, Vec, user structs) route to [`GenericArena`] where
-//! values live in `UnsafeCell<Option<T>>` cells gated by the node
-//! state machine.
-//!
-//! This trait exists because Rust's stable surface does not support
-//! specialization: you cannot write a blanket impl plus overrides
-//! for specific concrete types. So instead the runtime's generic
-//! bounds use `Value`, and the trait's methods perform the concrete
-//! downcast at each call site. The downcast is `downcast_ref` which
-//! costs ~2 ns (a `TypeId` compare). For primitives this is
-//! outweighed by the atomic-load fast path being ~3-5 ns cheaper
-//! than GenericArena's `Option<T>::as_ref().unwrap().clone()`.
-//!
-//! ## Why not specialization
-//!
-//! Nightly `#[feature(min_specialization)]` would let us write
-//!
-//! ```ignore
-//! default impl<T: Clone + PartialEq + ...> Value for T { /* generic */ }
-//! impl Value for u64 { /* primitive override */ }
-//! ```
-//!
-//! which is strictly cleaner. v2 targets stable. We accept the
-//! small per-call downcast overhead as the price of stability and
-//! pay it back in primitive performance.
-//!
-//! ## User-facing implications
-//!
-//! Because there is no blanket impl, a user who wants to store a
-//! custom type `MyStruct` in an incr Runtime must provide an
-//! explicit `Value` impl for `MyStruct`. The [`impl_value_generic`]
-//! macro generates the boilerplate: `impl_value_generic!(MyStruct);`.
-//! Every `impl Value` for a generic type routes to
-//! `GenericArena<MyStruct>`; primitive dispatch is only for the
-//! sealed list of primitives in this module.
-
-use super::arena::{AtomicPrimitive, AtomicPrimitiveArena, ErasedArena, GenericArena};
-
-/// A type that can be stored in an incr Runtime. Dispatches between
-/// `AtomicPrimitiveArena` (for primitives) and `GenericArena` (for
-/// everything else).
-///
-/// All methods take `&dyn ErasedArena` and internally downcast to
-/// the concrete arena type. The downcast is guaranteed to succeed
-/// when the arena was originally constructed via `Self::create_arena`
-/// (the registry enforces this by keying on `TypeId::of::<T>()`).
-/// A panic from the downcast indicates a library bug, not a user
-/// error.
-pub trait Value: Clone + PartialEq + Send + Sync + 'static {
-    /// Construct the concrete arena for this value type. Called once
-    /// per type by `ArenaRegistry::ensure_arena` the first time the
-    /// runtime sees a node of this type.
-    fn create_arena() -> Box<dyn ErasedArena>;
-
-    /// Reserve a new slot populated with `initial`. Used by
-    /// `create_input` where the value is known at node creation.
-    fn reserve_with(arena: &dyn ErasedArena, initial: Self) -> u32;
-
-    /// Reserve an empty slot. Used by `create_query` where the slot
-    /// will be populated on the first compute. For primitive arenas
-    /// the slot is zero-initialized; for generic arenas it is `None`.
-    fn reserve_empty(arena: &dyn ErasedArena) -> u32;
-
-    /// Read the value at `slot`. Caller is responsible for
-    /// establishing happens-before with the most recent writer via
-    /// an Acquire load on the node's state before calling.
-    fn read(arena: &dyn ErasedArena, slot: u32) -> Self;
-
-    /// Try to read the value at `slot`, returning `None` if the slot
-    /// has not yet been populated. For primitive arenas this is
-    /// always `Some` (slots are initialized to zero on reserve).
-    /// For generic arenas this is `None` when the slot is an
-    /// uninitialized `Option::None` (the first-compute-panicked
-    /// case; see commit L's retry path).
-    fn try_read(arena: &dyn ErasedArena, slot: u32) -> Option<Self>;
-
-    /// Overwrite the value at `slot`. Caller must own exclusive
-    /// access to the slot (via Computing state ownership or the
-    /// runtime's write mutex).
-    fn write(arena: &dyn ErasedArena, slot: u32, value: Self);
-}
-
-/// Internal: unified implementation of the `Value` trait body for
-/// primitive types backed by `AtomicPrimitiveArena<Self>`. The
-/// macro expands to a full trait impl; see `impl_value_primitive`
-/// usages below for the concrete applications.
-macro_rules! impl_value_primitive {
-    ($t:ty) => {
-        impl Value for $t {
-            #[inline]
-            fn create_arena() -> Box<dyn ErasedArena> {
-                Box::new(AtomicPrimitiveArena::<$t>::new())
-            }
-
-            #[inline]
-            fn reserve_with(arena: &dyn ErasedArena, initial: Self) -> u32 {
-                downcast_primitive::<$t>(arena).reserve(initial)
-            }
-
-            #[inline]
-            fn reserve_empty(arena: &dyn ErasedArena) -> u32 {
-                downcast_primitive::<$t>(arena).reserve(<$t as AtomicPrimitive>::zero())
-            }
-
-            #[inline]
-            fn read(arena: &dyn ErasedArena, slot: u32) -> Self {
-                downcast_primitive::<$t>(arena).read(slot)
-            }
-
-            #[inline]
-            fn try_read(arena: &dyn ErasedArena, slot: u32) -> Option<Self> {
-                // Primitive slots are initialized to zero on reserve,
-                // so there is no "uninitialized" state to return None
-                // for. Always Some.
-                Some(downcast_primitive::<$t>(arena).read(slot))
-            }
-
-            #[inline]
-            fn write(arena: &dyn ErasedArena, slot: u32, value: Self) {
-                downcast_primitive::<$t>(arena).write(slot, value);
-            }
-        }
-    };
-}
-
-/// Internal: concrete downcast helper for the primitive Value impls.
-/// Factored out of the macro so the downcast site has one code path
-/// and one panic message across all primitive types.
-#[inline]
-fn downcast_primitive<T: AtomicPrimitive>(arena: &dyn ErasedArena) -> &AtomicPrimitiveArena<T> {
-    arena
-        .as_any()
-        .downcast_ref::<AtomicPrimitiveArena<T>>()
-        .expect("Value impl invariant violated: primitive arena type mismatch")
-}
-
-impl_value_primitive!(u32);
-impl_value_primitive!(i32);
-impl_value_primitive!(u64);
-impl_value_primitive!(i64);
-impl_value_primitive!(f32);
-impl_value_primitive!(f64);
-impl_value_primitive!(bool);
-
-/// Internal: unified implementation of the `Value` trait body for
-/// non-primitive types backed by `GenericArena<Self>`. Parallels
-/// `impl_value_primitive` but routes to the generic arena.
-macro_rules! impl_value_generic {
-    ($t:ty) => {
-        impl Value for $t {
-            #[inline]
-            fn create_arena() -> Box<dyn ErasedArena> {
-                Box::new(GenericArena::<$t>::new())
-            }
-
-            #[inline]
-            fn reserve_with(arena: &dyn ErasedArena, initial: Self) -> u32 {
-                downcast_generic::<$t>(arena).reserve_with(initial)
-            }
-
-            #[inline]
-            fn reserve_empty(arena: &dyn ErasedArena) -> u32 {
-                downcast_generic::<$t>(arena).reserve()
-            }
-
-            #[inline]
-            fn read(arena: &dyn ErasedArena, slot: u32) -> Self {
-                downcast_generic::<$t>(arena).read(slot)
-            }
-
-            #[inline]
-            fn try_read(arena: &dyn ErasedArena, slot: u32) -> Option<Self> {
-                downcast_generic::<$t>(arena).try_read(slot)
-            }
-
-            #[inline]
-            fn write(arena: &dyn ErasedArena, slot: u32, value: Self) {
-                downcast_generic::<$t>(arena).write(slot, value);
-            }
-        }
-    };
-}
-
-/// Concrete downcast helper for the generic Value impls. Public so that
-/// `impl_value!` expansions in downstream crates can call it.
-#[inline]
-pub fn downcast_generic<T: Clone + Send + Sync + 'static>(
-    arena: &dyn ErasedArena,
-) -> &GenericArena<T> {
-    arena
-        .as_any()
-        .downcast_ref::<GenericArena<T>>()
-        .expect("Value impl invariant violated: generic arena type mismatch")
-}
-
-impl_value_generic!(String);
-
-/// Blanket impl for `Vec<T>`. This is the one place where a generic
-/// impl is OK: `Vec<T>` is a distinct type from any primitive or
-/// other named type, so there is no conflict with the primitive
-/// impls. Note that this does NOT require `T: Value`; only that `T`
-/// has the underlying bounds the arena needs (`Clone + PartialEq +
-/// Send + Sync + 'static`). A nested Incr-aware Vec would be
-/// pathological and is not a supported pattern.
-impl<T> Value for Vec<T>
-where
-    T: Clone + PartialEq + Send + Sync + 'static,
-{
-    #[inline]
-    fn create_arena() -> Box<dyn ErasedArena> {
-        Box::new(GenericArena::<Vec<T>>::new())
-    }
-
-    #[inline]
-    fn reserve_with(arena: &dyn ErasedArena, initial: Self) -> u32 {
-        downcast_generic::<Vec<T>>(arena).reserve_with(initial)
-    }
-
-    #[inline]
-    fn reserve_empty(arena: &dyn ErasedArena) -> u32 {
-        downcast_generic::<Vec<T>>(arena).reserve()
-    }
-
-    #[inline]
-    fn read(arena: &dyn ErasedArena, slot: u32) -> Self {
-        downcast_generic::<Vec<T>>(arena).read(slot)
-    }
-
-    #[inline]
-    fn try_read(arena: &dyn ErasedArena, slot: u32) -> Option<Self> {
-        downcast_generic::<Vec<T>>(arena).try_read(slot)
-    }
-
-    #[inline]
-    fn write(arena: &dyn ErasedArena, slot: u32, value: Self) {
-        downcast_generic::<Vec<T>>(arena).write(slot, value);
-    }
-}
-
-impl<T> Value for Option<T>
-where
-    T: Clone + PartialEq + Send + Sync + 'static,
-{
-    #[inline]
-    fn create_arena() -> Box<dyn ErasedArena> {
-        Box::new(GenericArena::<Option<T>>::new())
-    }
-
-    #[inline]
-    fn reserve_with(arena: &dyn ErasedArena, initial: Self) -> u32 {
-        downcast_generic::<Option<T>>(arena).reserve_with(initial)
-    }
-
-    #[inline]
-    fn reserve_empty(arena: &dyn ErasedArena) -> u32 {
-        downcast_generic::<Option<T>>(arena).reserve()
-    }
-
-    #[inline]
-    fn read(arena: &dyn ErasedArena, slot: u32) -> Self {
-        downcast_generic::<Option<T>>(arena).read(slot)
-    }
-
-    #[inline]
-    fn try_read(arena: &dyn ErasedArena, slot: u32) -> Option<Self> {
-        downcast_generic::<Option<T>>(arena).try_read(slot)
-    }
-
-    #[inline]
-    fn write(arena: &dyn ErasedArena, slot: u32, value: Self) {
-        downcast_generic::<Option<T>>(arena).write(slot, value);
-    }
-}
-
-impl<A, B> Value for (A, B)
-where
-    A: Clone + PartialEq + Send + Sync + 'static,
-    B: Clone + PartialEq + Send + Sync + 'static,
-{
-    #[inline]
-    fn create_arena() -> Box<dyn ErasedArena> {
-        Box::new(GenericArena::<(A, B)>::new())
-    }
-
-    #[inline]
-    fn reserve_with(arena: &dyn ErasedArena, initial: Self) -> u32 {
-        downcast_generic::<(A, B)>(arena).reserve_with(initial)
-    }
-
-    #[inline]
-    fn reserve_empty(arena: &dyn ErasedArena) -> u32 {
-        downcast_generic::<(A, B)>(arena).reserve()
-    }
-
-    #[inline]
-    fn read(arena: &dyn ErasedArena, slot: u32) -> Self {
-        downcast_generic::<(A, B)>(arena).read(slot)
-    }
-
-    #[inline]
-    fn try_read(arena: &dyn ErasedArena, slot: u32) -> Option<Self> {
-        downcast_generic::<(A, B)>(arena).try_read(slot)
-    }
-
-    #[inline]
-    fn write(arena: &dyn ErasedArena, slot: u32, value: Self) {
-        downcast_generic::<(A, B)>(arena).write(slot, value);
-    }
-}
-
-/// Public macro for implementing Value for user-defined types.
-/// Routes the type to GenericArena.
-///
-/// Usage: `incr_concurrent::impl_value!(MyStruct);`
-#[macro_export]
-macro_rules! impl_value {
-    ($t:ty) => {
-        impl $crate::Value for $t {
-            #[inline]
-            fn create_arena() -> Box<dyn $crate::arena::ErasedArena> {
-                Box::new($crate::arena::GenericArena::<$t>::new())
-            }
-
-            #[inline]
-            fn reserve_with(arena: &dyn $crate::arena::ErasedArena, initial: Self) -> u32 {
-                $crate::value::downcast_generic::<$t>(arena).reserve_with(initial)
-            }
-
-            #[inline]
-            fn reserve_empty(arena: &dyn $crate::arena::ErasedArena) -> u32 {
-                $crate::value::downcast_generic::<$t>(arena).reserve()
-            }
-
-            #[inline]
-            fn read(arena: &dyn $crate::arena::ErasedArena, slot: u32) -> Self {
-                $crate::value::downcast_generic::<$t>(arena).read(slot)
-            }
-
-            #[inline]
-            fn try_read(arena: &dyn $crate::arena::ErasedArena, slot: u32) -> Option<Self> {
-                $crate::value::downcast_generic::<$t>(arena).try_read(slot)
-            }
-
-            #[inline]
-            fn write(arena: &dyn $crate::arena::ErasedArena, slot: u32, value: Self) {
-                $crate::value::downcast_generic::<$t>(arena).write(slot, value);
-            }
-        }
-    };
-}
diff --git a/crates/incr-concurrent/tests/collection_property.rs b/crates/incr-concurrent/tests/collection_property.rs
deleted file mode 100644
index a45f305..0000000
--- a/crates/incr-concurrent/tests/collection_property.rs
+++ /dev/null
@@ -1,238 +0,0 @@
-use incr_concurrent::Runtime;
-use proptest::prelude::*;
-
-#[derive(Clone, Debug)]
-enum Op {
-    Insert(i64),
-    Delete(i64),
-}
-
-fn verify_collection_incremental_matches_batch(ops: Vec<Op>) {
-    let rt = Runtime::new();
-    let col = rt.create_collection::<i64>();
-    let evens = col.filter(&rt, |x| x % 2 == 0);
-    let doubled = evens.map(&rt, |x| x * 2);
-    let count = doubled.count(&rt);
-
-    for op in &ops {
-        match op {
-            Op::Insert(v) => col.insert(&rt, *v),
-            Op::Delete(v) => col.delete(&rt, v),
-        }
-    }
-
-    let incr_count = rt.get(count);
-    let incr_elements: std::collections::HashSet<i64> = doubled.elements();
-
-    let mut batch_set = std::collections::HashSet::new();
-    for op in &ops {
-        match op {
-            Op::Insert(v) => {
-                batch_set.insert(*v);
-            }
-            Op::Delete(v) => {
-                batch_set.remove(v);
-            }
-        }
-    }
-    let batch_elements: std::collections::HashSet<i64> = batch_set
-        .iter()
-        .filter(|x| *x % 2 == 0)
-        .map(|x| x * 2)
-        .collect();
-
-    assert_eq!(
-        incr_count as usize,
-        batch_elements.len(),
-        "Count mismatch: incr={}, batch={}",
-        incr_count,
-        batch_elements.len()
-    );
-    assert_eq!(incr_elements, batch_elements, "Elements mismatch");
-}
-
-fn op_strategy() -> impl Strategy<Value = Op> {
-    prop_oneof![
-        (-100_i64..100).prop_map(Op::Insert),
-        (-100_i64..100).prop_map(Op::Delete),
-    ]
-}
-
-proptest! {
-    #![proptest_config(ProptestConfig::with_cases(2000))]
-
-    #[test]
-    fn collection_incremental_matches_batch(
-        ops in prop::collection::vec(op_strategy(), 1..50),
-    ) {
-        verify_collection_incremental_matches_batch(ops);
-    }
-}
-
-#[test]
-fn collection_property_specific_insert_delete_cycle() {
-    verify_collection_incremental_matches_batch(vec![
-        Op::Insert(2),
-        Op::Insert(4),
-        Op::Delete(2),
-        Op::Insert(6),
-        Op::Insert(3),
-        Op::Delete(4),
-    ]);
-}
-
-fn verify_reduce_incremental_matches_batch(ops: Vec<Op>) {
-    let rt = Runtime::new();
-    let col = rt.create_collection::<i64>();
-    let sum = col.reduce(&rt, |elements| -> i64 { elements.iter().sum() });
-    let max = col.reduce(&rt, |elements| -> Option<i64> {
-        elements.iter().copied().max()
-    });
-
-    for op in &ops {
-        match op {
-            Op::Insert(v) => col.insert(&rt, *v),
-            Op::Delete(v) => col.delete(&rt, v),
-        }
-    }
-
-    let incr_sum = rt.get(sum);
-    let incr_max = rt.get(max);
-
-    // Batch oracle
-    let mut batch_set = std::collections::HashSet::new();
-    for op in &ops {
-        match op {
-            Op::Insert(v) => {
-                batch_set.insert(*v);
-            }
-            Op::Delete(v) => {
-                batch_set.remove(v);
-            }
-        }
-    }
-    let batch_sum: i64 = batch_set.iter().sum();
-    let batch_max: Option<i64> = batch_set.iter().copied().max();
-
-    assert_eq!(
-        incr_sum, batch_sum,
-        "Sum mismatch: incr={}, batch={}",
-        incr_sum, batch_sum
-    );
-    assert_eq!(
-        incr_max, batch_max,
-        "Max mismatch: incr={:?}, batch={:?}",
-        incr_max, batch_max
-    );
-}
-
-proptest! {
-    #![proptest_config(ProptestConfig::with_cases(2000))]
-
-    #[test]
-    fn reduce_incremental_matches_batch(
-        ops in prop::collection::vec(op_strategy(), 1..50),
-    ) {
-        verify_reduce_incremental_matches_batch(ops);
-    }
-}
-
-fn verify_sort_incremental_matches_batch(ops: Vec<Op>) {
-    let rt = Runtime::new();
-    let col = rt.create_collection::<i64>();
-    let sorted = col.sort_by_key(&rt, |x: &i64| *x);
-
-    for op in &ops {
-        match op {
-            Op::Insert(v) => col.insert(&rt, *v),
-            Op::Delete(v) => col.delete(&rt, v),
-        }
-    }
-
-    let _ = rt.get(sorted.version_node());
-    let incr_sorted = sorted.entries();
-
-    // Batch oracle
-    let mut batch_set = std::collections::HashSet::new();
-    for op in &ops {
-        match op {
-            Op::Insert(v) => {
-                batch_set.insert(*v);
-            }
-            Op::Delete(v) => {
-                batch_set.remove(v);
-            }
-        }
-    }
-    let mut batch_sorted: Vec<i64> = batch_set.into_iter().collect();
-    batch_sorted.sort();
-
-    assert_eq!(
-        incr_sorted, batch_sorted,
-        "Sort mismatch: incr={:?}, batch={:?}",
-        incr_sorted, batch_sorted
-    );
-}
-
-proptest! {
-    #![proptest_config(ProptestConfig::with_cases(2000))]
-
-    #[test]
-    fn sort_incremental_matches_batch(
-        ops in prop::collection::vec(op_strategy(), 1..50),
-    ) {
-        verify_sort_incremental_matches_batch(ops);
-    }
-}
-
-fn verify_pairwise_incremental_matches_batch(ops: Vec<Op>) {
-    let rt = Runtime::new();
-    let col = rt.create_collection::<i64>();
-    let sorted = col.sort_by_key(&rt, |x: &i64| *x);
-    let pairs = sorted.pairwise(&rt);
-    let pair_count = pairs.count(&rt);
-
-    for op in &ops {
-        match op {
-            Op::Insert(v) => col.insert(&rt, *v),
-            Op::Delete(v) => col.delete(&rt, v),
-        }
-    }
-
-    let _ = rt.get(pair_count); // forces stabilization of the full chain
-    let incr_pairs = pairs.elements();
-
-    // Batch oracle
-    let mut batch_set = std::collections::HashSet::new();
-    for op in &ops {
-        match op {
-            Op::Insert(v) => {
-                batch_set.insert(*v);
-            }
-            Op::Delete(v) => {
-                batch_set.remove(v);
-            }
-        }
-    }
-    let mut batch_sorted: Vec<i64> = batch_set.into_iter().collect();
-    batch_sorted.sort();
-    let batch_pairs: std::collections::HashSet<(i64, i64)> =
-        batch_sorted.windows(2).map(|w| (w[0], w[1])).collect();
-
-    assert_eq!(
-        incr_pairs, batch_pairs,
-        "Pairwise mismatch: incr={:?}, batch={:?}",
-        incr_pairs, batch_pairs
-    );
-}
-
-proptest! {
-    #![proptest_config(ProptestConfig::with_cases(2000))]
-
-    #[test]
-    fn pairwise_incremental_matches_batch(
-        ops in prop::collection::vec(op_strategy(), 1..50),
-    ) {
-        verify_pairwise_incremental_matches_batch(ops);
-    }
-}
diff --git a/crates/incr-concurrent/tests/integration.rs b/crates/incr-concurrent/tests/integration.rs
index 17f1c3f..d4f243b 100644
--- a/crates/incr-concurrent/tests/integration.rs
+++ b/crates/incr-concurrent/tests/integration.rs
@@ -1,271 +1,125 @@
-// crates/incr-concurrent/tests/integration.rs
-use incr_concurrent::{IncrCollection, Runtime};
+//! Smoke tests for the `incr-concurrent` v0.2 wrapper. Proves the
+//! re-exports compile, the API works end-to-end, and the runtime is
+//! actually `Send + Sync` (shared across threads with an `Arc`).
 
-#[test]
-fn spec_example_width_height_area() {
-    let rt = Runtime::new();
-
-    let width = rt.create_input(10.0_f64);
-    let height = rt.create_input(5.0_f64);
-
-    let area = rt.create_query(move |rt| rt.get(width) * rt.get(height));
+use incr_concurrent::{IncrCollection, Runtime, SortedCollection};
+use std::sync::Arc;
+use std::thread;
 
-    let description = rt.create_query(move |rt| format!("Area is {}", rt.get(area)));
-
-    assert_eq!(rt.get(description), "Area is 50");
-
-    rt.set(width, 12.0);
-    assert_eq!(rt.get(description), "Area is 60");
+#[test]
+fn runtime_is_send_sync() {
+    fn assert_send_sync<T: Send + Sync>() {}
+    assert_send_sync::<Runtime>();
+    assert_send_sync::<Arc<Runtime>>();
+    assert_send_sync::<incr_concurrent::Incr<u64>>();
 }
 
 #[test]
-fn spec_example_incremental_updates() {
+fn function_dag_chain_propagates() {
     let rt = Runtime::new();
-
-    let x = rt.create_input(1_i64);
-    let y = rt.create_input(2_i64);
-
-    let sum = rt.create_query(move |rt| rt.get(x) + rt.get(y));
-    let doubled = rt.create_query(move |rt| rt.get(sum) * 2);
-    let label = rt.create_query(move |rt| format!("result: {}", rt.get(doubled)));
-
-    assert_eq!(rt.get(label), "result: 6"); // (1+2)*2 = 6
-
-    rt.set(x, 10);
-    assert_eq!(rt.get(label), "result: 24"); // (10+2)*2 = 24
-
-    rt.set(y, 5);
-    assert_eq!(rt.get(label), "result: 30"); // (10+5)*2 = 30
+    let a = rt.create_input(1_i64);
+    let b = rt.create_query(move |rt| rt.get(a) + 1);
+    let c = rt.create_query(move |rt| rt.get(b) * 2);
+    assert_eq!(rt.get(c), 4);
+    rt.set(a, 10);
+    assert_eq!(rt.get(c), 22);
 }
 
 #[test]
-fn complex_graph_with_early_cutoff() {
-    use std::sync::atomic::{AtomicU32, Ordering};
-    use std::sync::Arc;
-
+fn early_cutoff_stops_propagation() {
     let rt = Runtime::new();
-
-    let raw_score = rt.create_input(85_i64);
-
-    let normalize_count = Arc::new(AtomicU32::new(0));
-    let nc = normalize_count.clone();
-    let normalized = rt.create_query(move |rt| {
-        nc.fetch_add(1, Ordering::Relaxed);
-        rt.get(raw_score).clamp(0, 100)
-    });
-
-    let format_count = Arc::new(AtomicU32::new(0));
-    let fc = format_count.clone();
-    let display = rt.create_query(move |rt| {
-        fc.fetch_add(1, Ordering::Relaxed);
-        let score = rt.get(normalized);
-        if score >= 90 {
-            "A".to_string()
-        } else if score >= 80 {
-            "B".to_string()
-        } else {
-            "C".to_string()
-        }
-    });
-
-    assert_eq!(rt.get(display), "B");
-    assert_eq!(normalize_count.load(Ordering::Relaxed), 1);
-    assert_eq!(format_count.load(Ordering::Relaxed), 1);
-
-    rt.set(raw_score, 95);
-    assert_eq!(rt.get(display), "A");
-    assert_eq!(normalize_count.load(Ordering::Relaxed), 2);
-    assert_eq!(format_count.load(Ordering::Relaxed), 2);
-
-    rt.set(raw_score, 150);
-    assert_eq!(rt.get(display), "A");
-    assert_eq!(normalize_count.load(Ordering::Relaxed), 3);
-    assert_eq!(format_count.load(Ordering::Relaxed), 3);
-
-    // Early cutoff: 200 clamped to 100, same as 150 clamped to 100
-    rt.set(raw_score, 200);
-    assert_eq!(rt.get(display), "A");
-    assert_eq!(normalize_count.load(Ordering::Relaxed), 4);
-    assert_eq!(format_count.load(Ordering::Relaxed), 3); // NOT recomputed — early cutoff!
+    let input = rt.create_input(200_i64);
+    let clamped = rt.create_query(move |rt| rt.get(input).min(100));
+    let after = rt.create_query(move |rt| rt.get(clamped) + 1);
+    assert_eq!(rt.get(after), 101);
+    rt.set(input, 300);
+    // clamped still 100, so after never recomputes — but value is still 101
+    assert_eq!(rt.get(after), 101);
 }
 
 #[test]
-fn string_values_work() {
-    let rt = Runtime::new();
-
-    let first = rt.create_input("Hello".to_string());
-    let last = rt.create_input("World".to_string());
-
-    let full = rt.create_query(move |rt| format!("{} {}", rt.get(first), rt.get(last)));
-
-    assert_eq!(rt.get(full), "Hello World");
+fn concurrent_writer_reader_no_torn_reads() {
+    // One writer thread mutates an input; many reader threads pull a
+    // derived doubling. The derived value is always even; if a reader
+    // ever observed a torn or partially-propagated value it would fail.
+    let rt = Arc::new(Runtime::new());
+    let counter = rt.create_input(0_i64);
+    let doubled = rt.create_query(move |rt| rt.get(counter) * 2);
+
+    let writer = {
+        let rt = Arc::clone(&rt);
+        thread::spawn(move || {
+            for i in 1..=1000 {
+                rt.set(counter, i);
+            }
+        })
+    };
+
+    let mut readers = Vec::new();
+    for _ in 0..4 {
+        let rt = Arc::clone(&rt);
+        readers.push(thread::spawn(move || {
+            for _ in 0..500 {
+                let v = rt.get(doubled);
+                assert!(v % 2 == 0, "torn read: got odd value {}", v);
+            }
+        }));
+    }
 
-    rt.set(first, "Goodbye".to_string());
-    assert_eq!(rt.get(full), "Goodbye World");
+    writer.join().unwrap();
+    for r in readers {
+        r.join().unwrap();
+    }
 }
 
 #[test]
-fn collection_feeds_function_query() {
+fn collection_filter_map_reduce_pipeline() {
     let rt = Runtime::new();
-    let scores = rt.create_collection::<i64>();
-    let high_scores = scores.filter(&rt, |s| *s >= 90);
-    let count = high_scores.count(&rt);
-
-    let summary = rt.create_query(move |rt| {
-        let n = rt.get(count);
-        format!("{} students scored 90+", n)
-    });
-
-    scores.insert(&rt, 85);
-    scores.insert(&rt, 92);
-    scores.insert(&rt, 78);
+    let scores: IncrCollection<i64> = rt.create_collection();
+    let passing = scores.filter(&rt, |s| *s >= 50);
+    let curved = passing.map(&rt, |s| s + 10);
+    let total = curved.reduce(&rt, |xs| xs.iter().sum::<i64>());
+    scores.insert(&rt, 80);
     scores.insert(&rt, 95);
-
-    assert_eq!(rt.get(summary), "2 students scored 90+");
-
-    scores.insert(&rt, 91);
-    assert_eq!(rt.get(summary), "3 students scored 90+");
-
-    scores.delete(&rt, &92);
-    assert_eq!(rt.get(summary), "2 students scored 90+");
+    scores.insert(&rt, 60);
+    scores.insert(&rt, 42);
+    assert_eq!(rt.get(total), 265);
 }
 
 #[test]
-fn full_pipeline_filter_map_count_query() {
-    #[derive(Clone, Hash, Eq, PartialEq, Debug)]
-    struct User {
-        name: String,
-        age: i32,
-        active: bool,
-    }
-
+fn sort_pairwise_count() {
     let rt = Runtime::new();
-    let users: IncrCollection<User> = rt.create_collection();
-
-    let active_adults = users
-        .filter(&rt, |u| u.active)
-        .filter(&rt, |u| u.age >= 18)
-        .map(&rt, |u| u.name.clone());
-
-    let count = active_adults.count(&rt);
-
-    let summary = rt.create_query(move |rt| format!("{} active adults", rt.get(count)));
-
-    users.insert(
-        &rt,
-        User {
-            name: "Alice".into(),
-            age: 30,
-            active: true,
-        },
-    );
-    users.insert(
-        &rt,
-        User {
-            name: "Bob".into(),
-            age: 16,
-            active: true,
-        },
-    );
-    users.insert(
-        &rt,
-        User {
-            name: "Carol".into(),
-            age: 25,
-            active: false,
-        },
-    );
-
-    assert_eq!(rt.get(summary), "1 active adults");
-
-    users.insert(
-        &rt,
-        User {
-            name: "Dave".into(),
-            age: 22,
-            active: true,
-        },
-    );
-    assert_eq!(rt.get(summary), "2 active adults");
-
-    users.delete(
-        &rt,
-        &User {
-            name: "Alice".into(),
-            age: 30,
-            active: true,
-        },
-    );
-    assert_eq!(rt.get(summary), "1 active adults");
+    let c: IncrCollection<i64> = rt.create_collection();
+    let sorted: SortedCollection<i64, i64> = c.sort_by_key(&rt, |x| *x);
+    let pairs = sorted.pairwise(&rt);
+    c.insert(&rt, 5);
+    c.insert(&rt, 1);
+    c.insert(&rt, 3);
+    let n = pairs.count(&rt);
+    assert_eq!(rt.get(n), 2);
 }
 
 #[test]
-fn sort_pairwise_map_reduce_pipeline() {
-    // Simulates: given a set of visit timestamps, compute total gaps between
-    // consecutive visits. This is the core pattern for travel time calculation.
+fn group_by_two_buckets() {
     let rt = Runtime::new();
-    let visits = rt.create_collection::<i64>(); // timestamps
-
-    let sorted = visits.sort_by_key(&rt, |t: &i64| *t);
-    let pairs = sorted.pairwise(&rt);
-
-    let gaps = pairs.map(&rt, |(a, b): &(i64, i64)| b - a);
-
-    // Sum all gaps
-    let total_gap = gaps.reduce(&rt, |elements| -> i64 { elements.iter().sum() });
-
-    // Start with visits at times 10, 30, 50
-    visits.insert(&rt, 10);
-    visits.insert(&rt, 30);
-    visits.insert(&rt, 50);
-    assert_eq!(rt.get(total_gap), 40); // (30-10) + (50-30) = 40
-
-    // Insert visit at time 20: gaps become 10 + 10 + 20 = 40 (same total!)
-    visits.insert(&rt, 20);
-    assert_eq!(rt.get(total_gap), 40); // (20-10) + (30-20) + (50-30) = 40
-
-    // Delete visit at time 30: gaps become 10 + 30 = 40 (still same!)
-    visits.delete(&rt, &30);
-    assert_eq!(rt.get(total_gap), 40); // (20-10) + (50-20) = 40
-
-    // Insert visit at time 100: adds a big gap
-    visits.insert(&rt, 100);
-    assert_eq!(rt.get(total_gap), 90); // (20-10) + (50-20) + (100-50) = 90
-
-    visits.delete(&rt, &10);
-    assert_eq!(rt.get(total_gap), 80); // (50-20) + (100-50) = 80
+    let c: IncrCollection<i64> = rt.create_collection();
+    let groups = c.group_by(&rt, |x| x % 2);
+    for i in 1..=6_i64 {
+        c.insert(&rt, i);
+    }
+    let _ = rt.get(groups.version_node());
+    assert_eq!(groups.group_count(), 2);
 }
 
 #[test]
-fn pipeline_early_cutoff() {
-    // Verify that early cutoff works through the full pipeline:
-    // if total doesn't change, downstream isn't recomputed
-    use std::sync::atomic::{AtomicU32, Ordering};
-    use std::sync::Arc;
-
+fn join_two_collections() {
     let rt = Runtime::new();
-    let visits = rt.create_collection::<i64>();
-    let sorted = visits.sort_by_key(&rt, |t: &i64| *t);
-    let pairs = sorted.pairwise(&rt);
-    let gaps = pairs.map(&rt, |(a, b): &(i64, i64)| b - a);
-    let total_gap = gaps.reduce(&rt, |elements| -> i64 { elements.iter().sum() });
-
-    let downstream_evals = Arc::new(AtomicU32::new(0));
-    let dc = downstream_evals.clone();
-    let label = rt.create_query(move |rt| {
-        dc.fetch_add(1, Ordering::Relaxed);
-        format!("total={}", rt.get(total_gap))
-    });
-
-    visits.insert(&rt, 10);
-    visits.insert(&rt, 30);
-    visits.insert(&rt, 50);
-    assert_eq!(rt.get(label), "total=40");
-    assert_eq!(downstream_evals.load(Ordering::Relaxed), 1);
-
-    // Insert 20 between 10 and 30: total gap is still 40
-    visits.insert(&rt, 20);
-    assert_eq!(rt.get(label), "total=40");
-    // Early cutoff: total_gap unchanged, so label shouldn't recompute
-    assert_eq!(downstream_evals.load(Ordering::Relaxed), 1);
+    let left: IncrCollection<(i64, &'static str)> = rt.create_collection();
+    let right: IncrCollection<(i64, i64)> = rt.create_collection();
+    let j = left.join(&rt, &right, |l| l.0, |r| r.0);
+    left.insert(&rt, (1, "alice"));
+    right.insert(&rt, (1, 100));
+    right.insert(&rt, (1, 200));
+    let n = j.count(&rt);
+    assert_eq!(rt.get(n), 2);
 }
diff --git a/crates/incr-concurrent/tests/property.rs b/crates/incr-concurrent/tests/property.rs
deleted file mode 100644
index 7fad5ec..0000000
--- a/crates/incr-concurrent/tests/property.rs
+++ /dev/null
@@ -1,154 +0,0 @@
-// crates/incr-concurrent/tests/property.rs
-use incr_concurrent::{Incr, Runtime};
-use proptest::prelude::*;
-
-/// Build a layered graph of the given shape, run it incrementally,
-/// then rebuild from scratch and compare results.
-fn verify_incremental_matches_batch(
-    num_inputs: usize,
-    input_values: Vec<i64>,
-    layers: Vec<Vec<(usize, usize)>>, // Each layer: vec of (dep_a_idx, dep_b_idx) pairs
-    mutations: Vec<(usize, i64)>,     // (input_index, new_value) pairs
-) {
-    assert!(num_inputs >= 2);
-    assert_eq!(input_values.len(), num_inputs);
-
-    let rt = Runtime::new();
-    let mut all_nodes: Vec<Incr<i64>> = Vec::new();
-
-    for &val in &input_values {
-        let node = rt.create_input(val);
-        all_nodes.push(node);
-    }
-
-    for layer in &layers {
-        let mut layer_nodes = Vec::new();
-        for &(dep_a_rel, dep_b_rel) in layer {
-            let available = all_nodes.len();
-            if available < 2 {
-                continue;
-            }
-            let idx_a = dep_a_rel % available;
-            let idx_b = dep_b_rel % available;
-            let a = all_nodes[idx_a];
-            let b = all_nodes[idx_b];
-            let node = rt.create_query(move |rt| rt.get(a).wrapping_add(rt.get(b)));
-            layer_nodes.push(node);
-        }
-        all_nodes.extend(layer_nodes);
-    }
-
-    if all_nodes.len() <= num_inputs {
-        return; // No compute nodes generated
-    }
-
-    // Read all compute nodes to initialize
-    let last = *all_nodes.last().unwrap();
-    let _ = rt.get(last);
-
-    // Apply mutations
-    for &(input_rel, new_val) in &mutations {
-        let idx = input_rel % num_inputs;
-        rt.set(all_nodes[idx], new_val);
-    }
-
-    let incremental_result = rt.get(last);
-
-    let mut final_values = input_values.clone();
-    for &(input_rel, new_val) in &mutations {
-        let idx = input_rel % num_inputs;
-        final_values[idx] = new_val;
-    }
-
-    let rt2 = Runtime::new();
-    let mut all_nodes2: Vec<Incr<i64>> = Vec::new();
-
-    for &val in &final_values {
-        let node = rt2.create_input(val);
-        all_nodes2.push(node);
-    }
-
-    for layer in &layers {
-        let mut layer_nodes = Vec::new();
-        for &(dep_a_rel, dep_b_rel) in layer {
-            let available = all_nodes2.len();
-            if available < 2 {
-                continue;
-            }
-            let idx_a = dep_a_rel % available;
-            let idx_b = dep_b_rel % available;
-            let a = all_nodes2[idx_a];
-            let b = all_nodes2[idx_b];
-            let node = rt2.create_query(move |rt| rt.get(a).wrapping_add(rt.get(b)));
-            layer_nodes.push(node);
-        }
-        all_nodes2.extend(layer_nodes);
-    }
-
-    let last2 = *all_nodes2.last().unwrap();
-    let batch_result = rt2.get(last2);
-
-    assert_eq!(
-        incremental_result,
-        batch_result,
-        "Incremental result {} != batch result {} with {} inputs, {} layers, {} mutations",
-        incremental_result,
-        batch_result,
-        num_inputs,
-        layers.len(),
-        mutations.len()
-    );
-}
-
-proptest! {
-    #![proptest_config(ProptestConfig::with_cases(2000))]
-
-    #[test]
-    fn incremental_matches_batch(
-        num_inputs in 2_usize..20,
-        input_values in prop::collection::vec(-1000_i64..1000, 2..20),
-        layers in prop::collection::vec(
-            prop::collection::vec((0_usize..100, 0_usize..100), 1..5),
-            1..8
-        ),
-        mutations in prop::collection::vec((0_usize..100, -1000_i64..1000), 1..20),
-    ) {
-        let num_inputs = num_inputs.min(input_values.len()).max(2);
-        let input_values = input_values[..num_inputs].to_vec();
-        verify_incremental_matches_batch(num_inputs, input_values, layers, mutations);
-    }
-}
-
-#[test]
-fn property_specific_diamond_cutoff() {
-    verify_incremental_matches_batch(
-        3,
-        vec![10, 20, 30],
-        vec![
-            vec![(0, 1), (1, 2)], // Layer 1: node3=in0+in1, node4=in1+in2
-            vec![(0, 1)],         // Layer 2: node5=node3+node4
-        ],
-        vec![(0, 10), (1, 25)], // Change input 0 (same!), change input 1
-    );
-}
-
-#[test]
-fn property_deep_chain() {
-    verify_incremental_matches_batch(
-        5,
-        vec![1, 2, 3, 4, 5],
-        vec![
-            vec![(0, 1)],
-            vec![(2, 0)],
-            vec![(0, 1)],
-            vec![(1, 0)],
-            vec![(0, 1)],
-            vec![(2, 0)],
-            vec![(0, 1)],
-            vec![(1, 0)],
-            vec![(0, 1)],
-            vec![(2, 0)],
-        ],
-        vec![(0, 100), (2, 50), (4, 75)],
-    );
-}
diff --git a/crates/incr-core/Cargo.toml b/crates/incr-core/Cargo.toml
new file mode 100644
index 0000000..8ea2971
--- /dev/null
+++ b/crates/incr-core/Cargo.toml
@@ -0,0 +1,24 @@
+[package]
+name = "incr-core"
+version = "0.2.0-beta.1"
+edition = "2021"
+description = "Shared core for the incr family. Strategy-parameterized incremental computation engine. Used via incr-compute (single-threaded) or incr-concurrent (Send + Sync)."
+license = "Apache-2.0"
+repository = "https://github.com/Anyesh/incr"
+keywords = ["incremental", "computation", "reactive", "dataflow"]
+categories = ["algorithms", "data-structures"]
+
+[dependencies]
+haphazard = "0.1"
+
+[dev-dependencies]
+criterion = { version = "0.5", features = ["html_reports"] }
+proptest = "1"
+
+[[bench]]
+name = "chain"
+harness = false
+
+[[bench]]
+name = "operators"
+harness = false
diff --git a/crates/incr-core/README.md b/crates/incr-core/README.md
new file mode 100644
index 0000000..4ee6c8e
--- /dev/null
+++ b/crates/incr-core/README.md
@@ -0,0 +1,73 @@
+# incr-core
+
+Shared engine behind [`incr-compute`](https://crates.io/crates/incr-compute) and [`incr-concurrent`](https://crates.io/crates/incr-concurrent). Strategy-parameterized: the same `Runtime<C: Cells>` monomorphizes into the single-threaded variant (`Cell`-backed) when `C = Local` and the concurrent variant (atomic-backed) when `C = Shared`.
+
+Most users should depend on one of the surface crates, not this one. Use `incr-core` directly only if:
+
+- You want to build your own concurrency strategy on top of the `Cells` trait, or
+- You're embedding the engine in a place where the wrapper crates' default choices don't fit (e.g., a custom `no_std` strategy).
+
+## Architecture
+
+The engine is built around the `Cells` strategy trait:
+
+```rust
+pub trait Cells: 'static + Sized {
+    type U8;
+    type U32;
+    type U64;
+    type State;
+    type Ptr<T: 'static>: PtrCell<T>;
+    type Lock<T: 'static>: Lock<T>;
+    type DepStack: DepStack;
+
+    // ... constructors and inline-only load/store/CAS helpers
+}
+```
+
+All trait methods are `#[inline(always)]` and take `&Self::Cell` references, so the compiler can see through every call site. The validation that this carries zero overhead on the single-threaded path lives in the spike branch's RESULTS.md (`spike/incr-core-monomorphization`): `walk_local` and a hand-written non-trait baseline produce **byte-identical assembly**.
+
+Two strategy impls ship in this crate:
+
+- `Local`: `Cell<u8>`, `Cell<u32>`, `Cell<u64>`, `Cell<*mut T>`, `RefCell<T>`. `!Send + !Sync` (correct for the single-threaded variant).
+- `Shared`: `AtomicU8`, `AtomicU32`, `AtomicU64`, `AtomicPtr<T>`, `RwLock<T>`. `Send + Sync` with Acquire/Release ordering on state-machine transitions.
+
+## What's exposed
+
+- `Runtime<C: Cells>` with `create_input`, `create_query`, `get`, `set`, `node_count`, `graph_snapshot`, `get_traced`, `set_label` / `label`.
+- `Incr<T>`: 16-byte `Copy` handle with embedded `RuntimeId` for cross-runtime detection.
+- `IncrCollection<T, C>`, `GroupedCollection<K, T, C>`, `SortedCollection<T, K, C>` with the full operator suite (filter, map, count, reduce, sort_by_key, pairwise, window, group_by, join).
+- `Value` blanket trait (`Clone + PartialEq + Send + Sync + 'static`) — auto-derived for every qualifying type.
+- Tracing types: `NodeInfo`, `NodeKindInfo`, `NodeTrace`, `TraceAction`, `PropagationTrace`.
+
+## Layout invariants
+
+`NodeData<C>` is exactly 64 bytes and 64-byte aligned under both strategies. `const _: () = assert!(...)` blocks enforce this at compile time; layout drift breaks the build immediately.
+
+The segmented node store supports up to 1M nodes per runtime (1024 segments × 1024 slots). Segments are lazily allocated, never moved, and live until the runtime drops.
+
+## Known limitations
+
+- **`get_traced` per-node trace**: records compute, verified-clean, and cutoff events for the current `get` call's compute path. Cross-thread events are not aggregated.
+
+## Memory reclamation
+
+Overflow-dep lists (the heap allocation a node holds when it has more than 7 dependencies) are retired through the [`haphazard`](https://crates.io/crates/haphazard) global hazard-pointer domain. Concurrent readers in `for_each_dep` hold a `HazardPointer` while dereferencing the slot, so an `install_deps` writer's retire is deferred until no protecting reader remains. Memory is reclaimed during normal operation, not just at runtime drop.
+
+## Soundness
+
+All unsafe code in this crate (segmented node store's `UnsafeCell + MaybeUninit` slots, `NodeData::Drop`'s `Box::from_raw` reclamation, `ArenaRegistry`'s `Arc` downcast via raw-pointer rewrap, the graveyard's deferred reclamation, the `SharedDepStack` thread-local) is exercised under `cargo +nightly miri test -p incr-core --lib`. 79 unit tests pass under miri including:
+
+- 50 dynamic dep-set transitions through the overflow path with runtime drop (`local_dynamic_overflow_deps_retirement`)
+- 16 threads × 200 rounds racing on the state machine's `try_claim_compute` CAS (`shared_concurrent_claim_one_winner`)
+- Cross-segment growth on the segmented node store with reference validity preserved across pushes
+
+No undefined behavior detected.
+
+## Stability
+
+`0.2.x` is the consolidation milestone. The `Runtime<C>` and `Cells` API is intentionally usable but minimal; user-facing API stability commitments live on the wrapper crates.
+
+## License
+
+Apache-2.0
diff --git a/crates/incr-core/benches/chain.rs b/crates/incr-core/benches/chain.rs
new file mode 100644
index 0000000..76bf92a
--- /dev/null
+++ b/crates/incr-core/benches/chain.rs
@@ -0,0 +1,165 @@
+//! Chain-propagation bench mirroring the comparison harness in
+//! `incr-concurrent/benches/comparison.rs`. The point is to confirm the
+//! consolidated `incr-core` runtime matches (or beats) the production
+//! crates' per-node propagation cost.
+//!
+//! Workload: build a chain `input → f_1 → f_2 → ... → f_n` where each
+//! `f_i` adds 1 to its predecessor. On each iteration, set a new input
+//! value, then read the chain head. Criterion reports the total time
+//! per iteration; dividing by `n` gives the per-node propagation cost.
+//!
+//! The production target was 175 ns per node propagation; the
+//! consolidated `incr-core` should land within noise of that under
+//! `Shared` and faster (no atomic-fence cost) under `Local`.
+
+use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion};
+use incr_core::{Incr, Local, Runtime, Shared};
+
+fn build_chain_local(n: usize) -> (Runtime<Local>, Incr<i64>, Incr<i64>) {
+    let rt: Runtime<Local> = Runtime::new();
+    let input = rt.create_input(1_i64);
+    let mut prev = input;
+    for _ in 0..n {
+        let dep = prev;
+        prev = rt.create_query(move |rt| rt.get(dep).wrapping_add(1));
+    }
+    let _ = rt.get(prev);
+    (rt, input, prev)
+}
+
+fn build_chain_shared(n: usize) -> (Runtime<Shared>, Incr<i64>, Incr<i64>) {
+    let rt: Runtime<Shared> = Runtime::new();
+    let input = rt.create_input(1_i64);
+    let mut prev = input;
+    for _ in 0..n {
+        let dep = prev;
+        prev = rt.create_query(move |rt| rt.get(dep).wrapping_add(1));
+    }
+    let _ = rt.get(prev);
+    (rt, input, prev)
+}
+
+fn bench_chain_local(c: &mut Criterion) {
+    let mut group = c.benchmark_group("chain_local");
+    for size in [4_usize, 10, 100] {
+        group.bench_with_input(BenchmarkId::new("propagate", size), &size, |b, &size| {
+            let (rt, input, output) = build_chain_local(size);
+            let mut val = 1_i64;
+            b.iter(|| {
+                val = val.wrapping_add(1);
+                rt.set(input, val);
+                black_box(rt.get(output));
+            });
+        });
+    }
+    group.finish();
+}
+
+fn bench_chain_shared(c: &mut Criterion) {
+    let mut group = c.benchmark_group("chain_shared");
+    for size in [4_usize, 10, 100] {
+        group.bench_with_input(BenchmarkId::new("propagate", size), &size, |b, &size| {
+            let (rt, input, output) = build_chain_shared(size);
+            let mut val = 1_i64;
+            b.iter(|| {
+                val = val.wrapping_add(1);
+                rt.set(input, val);
+                black_box(rt.get(output));
+            });
+        });
+    }
+    group.finish();
+}
+
+fn bench_diamond_local(c: &mut Criterion) {
+    let rt: Runtime<Local> = Runtime::new();
+    let input = rt.create_input(1_i64);
+    let a = {
+        let dep = input;
+        rt.create_query(move |rt| rt.get(dep).wrapping_add(10))
+    };
+    let b = {
+        let dep = input;
+        rt.create_query(move |rt| rt.get(dep).wrapping_add(100))
+    };
+    let out = rt.create_query(move |rt| rt.get(a).wrapping_add(rt.get(b)));
+    let _ = rt.get(out);
+
+    c.bench_function("diamond_local", |bencher| {
+        let mut val = 1_i64;
+        bencher.iter(|| {
+            val = val.wrapping_add(1);
+            rt.set(input, val);
+            black_box(rt.get(out));
+        });
+    });
+}
+
+fn bench_diamond_shared(c: &mut Criterion) {
+    let rt: Runtime<Shared> = Runtime::new();
+    let input = rt.create_input(1_i64);
+    let a = {
+        let dep = input;
+        rt.create_query(move |rt| rt.get(dep).wrapping_add(10))
+    };
+    let b = {
+        let dep = input;
+        rt.create_query(move |rt| rt.get(dep).wrapping_add(100))
+    };
+    let out = rt.create_query(move |rt| rt.get(a).wrapping_add(rt.get(b)));
+    let _ = rt.get(out);
+
+    c.bench_function("diamond_shared", |bencher| {
+        let mut val = 1_i64;
+        bencher.iter(|| {
+            val = val.wrapping_add(1);
+            rt.set(input, val);
+            black_box(rt.get(out));
+        });
+    });
+}
+
+fn bench_early_cutoff_local(c: &mut Criterion) {
+    let rt: Runtime<Local> = Runtime::new();
+    let input = rt.create_input(200_i64);
+    let clamped = rt.create_query(move |rt| rt.get(input).min(100));
+    let after = rt.create_query(move |rt| rt.get(clamped).wrapping_add(1));
+    let _ = rt.get(after);
+
+    c.bench_function("early_cutoff_local", |bencher| {
+        let mut val = 200_i64;
+        bencher.iter(|| {
+            val = val.wrapping_add(1);
+            rt.set(input, val); // always > 100, clamp produces 100, early cutoff
+            black_box(rt.get(after));
+        });
+    });
+}
+
+fn bench_early_cutoff_shared(c: &mut Criterion) {
+    let rt: Runtime<Shared> = Runtime::new();
+    let input = rt.create_input(200_i64);
+    let clamped = rt.create_query(move |rt| rt.get(input).min(100));
+    let after = rt.create_query(move |rt| rt.get(clamped).wrapping_add(1));
+    let _ = rt.get(after);
+
+    c.bench_function("early_cutoff_shared", |bencher| {
+        let mut val = 200_i64;
+        bencher.iter(|| {
+            val = val.wrapping_add(1);
+            rt.set(input, val);
+            black_box(rt.get(after));
+        });
+    });
+}
+
+criterion_group!(
+    chain_benches,
+    bench_chain_local,
+    bench_chain_shared,
+    bench_diamond_local,
+    bench_diamond_shared,
+    bench_early_cutoff_local,
+    bench_early_cutoff_shared,
+);
+criterion_main!(chain_benches);
diff --git a/crates/incr-core/benches/operators.rs b/crates/incr-core/benches/operators.rs
new file mode 100644
index 0000000..7c7708f
--- /dev/null
+++ b/crates/incr-core/benches/operators.rs
@@ -0,0 +1,128 @@
+//! Collection operator benches. Measures the per-insert cost on a
+//! steady-state collection (size N pre-populated) for each operator
+//! pipeline, comparing against a from-scratch HashSet/Vec baseline.
+//!
+//! The README claims ~14x speedup at 10K elements and ~186x at 100K
+//! for incremental vs batch on a filter+map+count pipeline. This bench
+//! validates that the v0.2 incr-core matches those numbers through the
+//! type-aliased Runtime<Local> path.
+
+use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion};
+use incr_core::{Cells, IncrCollection, Local, Runtime, Shared};
+use std::collections::HashSet;
+
+fn build_pipeline<C: Cells>(
+    size: usize,
+) -> (Runtime<C>, IncrCollection<i64, C>, incr_core::Incr<u64>)
+where
+    Runtime<C>: Default,
+{
+    let rt: Runtime<C> = Runtime::new();
+    let col: IncrCollection<i64, C> = rt.create_collection();
+    let evens = col.filter(&rt, |x| x % 2 == 0);
+    let doubled = evens.map(&rt, |x| x * 2);
+    let count = doubled.count(&rt);
+    for i in 0..size as i64 {
+        col.insert(&rt, i);
+    }
+    let _ = rt.get(count);
+    (rt, col, count)
+}
+
+fn bench_collection_incremental(c: &mut Criterion) {
+    let mut group = c.benchmark_group("collection_insert_then_read");
+    for size in [1_000_usize, 10_000, 100_000] {
+        // incr-core Local: incremental delta-log path
+        group.bench_with_input(
+            BenchmarkId::new("local_incremental", size),
+            &size,
+            |b, &size| {
+                let (rt, col, count) = build_pipeline::<Local>(size);
+                let mut next = size as i64;
+                b.iter(|| {
+                    col.insert(&rt, next);
+                    next += 1;
+                    black_box(rt.get(count));
+                });
+            },
+        );
+
+        // incr-core Shared: same pipeline, atomic strategy
+        group.bench_with_input(
+            BenchmarkId::new("shared_incremental", size),
+            &size,
+            |b, &size| {
+                let (rt, col, count) = build_pipeline::<Shared>(size);
+                let mut next = size as i64;
+                b.iter(|| {
+                    col.insert(&rt, next);
+                    next += 1;
+                    black_box(rt.get(count));
+                });
+            },
+        );
+
+        // Batch baseline: full HashSet rebuild + filter + map + count per
+        // insert. The pessimistic comparison the README uses.
+        group.bench_with_input(BenchmarkId::new("batch", size), &size, |b, &size| {
+            let mut elements: HashSet<i64> = (0..size as i64).collect();
+            let mut next = size as i64;
+            b.iter(|| {
+                elements.insert(next);
+                next += 1;
+                let result: usize = elements
+                    .iter()
+                    .filter(|x| *x % 2 == 0)
+                    .map(|x| x * 2)
+                    .count();
+                black_box(result);
+            });
+        });
+    }
+    group.finish();
+}
+
+fn bench_simple_count(c: &mut Criterion) {
+    let mut group = c.benchmark_group("simple_count");
+    for size in [1_000_usize, 10_000] {
+        group.bench_with_input(BenchmarkId::new("local", size), &size, |b, &size| {
+            let rt: Runtime<Local> = Runtime::new();
+            let col: IncrCollection<i64, Local> = rt.create_collection();
+            let n = col.count(&rt);
+            for i in 0..size as i64 {
+                col.insert(&rt, i);
+            }
+            let _ = rt.get(n);
+            let mut next = size as i64;
+            b.iter(|| {
+                col.insert(&rt, next);
+                next += 1;
+                black_box(rt.get(n));
+            });
+        });
+
+        group.bench_with_input(BenchmarkId::new("shared", size), &size, |b, &size| {
+            let rt: Runtime<Shared> = Runtime::new();
+            let col: IncrCollection<i64, Shared> = rt.create_collection();
+            let n = col.count(&rt);
+            for i in 0..size as i64 {
+                col.insert(&rt, i);
+            }
+            let _ = rt.get(n);
+            let mut next = size as i64;
+            b.iter(|| {
+                col.insert(&rt, next);
+                next += 1;
+                black_box(rt.get(n));
+            });
+        });
+    }
+    group.finish();
+}
+
+criterion_group!(
+    operator_benches,
+    bench_collection_incremental,
+    bench_simple_count
+);
+criterion_main!(operator_benches);
diff --git a/crates/incr-core/src/arena.rs b/crates/incr-core/src/arena.rs
new file mode 100644
index 0000000..3d30c4d
--- /dev/null
+++ b/crates/incr-core/src/arena.rs
@@ -0,0 +1,102 @@
+//! Primitive arena for u64 values, parameterized over the [`Cells`]
+//! strategy.
+//!
+//! The first incr-core slice ships a flat `Vec`-backed arena because the
+//! question that needed answering ("does the strategy abstraction add
+//! cost over direct access?") is answerable without segmenting. The
+//! next slice lifts the production segmented store from incr-concurrent
+//! and parameterizes it the same way. The Vec arena stays as the
+//! reference implementation for cross-checking.
+//!
+//! Slot indexing is `u32` because (a) it matches `NodeData::arena_slot`
+//! and (b) the production `MAX_NODES` cap is 1M which fits.
+
+use crate::cells::Cells;
+
+pub struct PrimitiveArena<C: Cells> {
+    slots: Vec<C::U64>,
+}
+
+impl<C: Cells> Default for PrimitiveArena<C> {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+impl<C: Cells> PrimitiveArena<C> {
+    pub fn new() -> Self {
+        Self { slots: Vec::new() }
+    }
+
+    pub fn with_capacity(cap: usize) -> Self {
+        Self {
+            slots: Vec::with_capacity(cap),
+        }
+    }
+
+    /// Append a new slot initialized to `initial` and return its index.
+    /// Caller must hold the write side of the runtime's lock (or be
+    /// single-threaded under `Local`); concurrent appends are not safe.
+    pub fn reserve(&mut self, initial: u64) -> u32 {
+        let slot = self.slots.len() as u32;
+        self.slots.push(C::new_u64(initial));
+        slot
+    }
+
+    /// Read the value at `slot` with Acquire ordering on `Shared`. Caller
+    /// must have established happens-before with the most recent writer
+    /// through the node state machine.
+    #[inline(always)]
+    pub fn read(&self, slot: u32) -> u64 {
+        C::u64_load_acquire(&self.slots[slot as usize])
+    }
+
+    /// Write `value` to `slot` with Release ordering on `Shared`. Caller
+    /// must own exclusive access to the slot via the Computing state.
+    #[inline(always)]
+    pub fn write(&self, slot: u32, value: u64) {
+        C::u64_store_release(&self.slots[slot as usize], value);
+    }
+
+    pub fn len(&self) -> usize {
+        self.slots.len()
+    }
+
+    pub fn is_empty(&self) -> bool {
+        self.slots.is_empty()
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::cells::{Local, Shared};
+
+    #[test]
+    fn local_arena_roundtrip() {
+        let mut a: PrimitiveArena<Local> = PrimitiveArena::new();
+        let s = a.reserve(7);
+        assert_eq!(a.read(s), 7);
+        a.write(s, 11);
+        assert_eq!(a.read(s), 11);
+    }
+
+    #[test]
+    fn shared_arena_roundtrip() {
+        let mut a: PrimitiveArena<Shared> = PrimitiveArena::new();
+        let s = a.reserve(7);
+        assert_eq!(a.read(s), 7);
+        a.write(s, 11);
+        assert_eq!(a.read(s), 11);
+    }
+
+    #[test]
+    fn local_arena_grows() {
+        let mut a: PrimitiveArena<Local> = PrimitiveArena::new();
+        let slots: Vec<u32> = (0..100).map(|i| a.reserve(i as u64)).collect();
+        assert_eq!(a.len(), 100);
+        for (i, s) in slots.into_iter().enumerate() {
+            assert_eq!(a.read(s), i as u64);
+        }
+    }
+}
diff --git a/crates/incr-core/src/arena_registry.rs b/crates/incr-core/src/arena_registry.rs
new file mode 100644
index 0000000..5103d60
--- /dev/null
+++ b/crates/incr-core/src/arena_registry.rs
@@ -0,0 +1,150 @@
+//! Type-erased arena registry.
+//!
+//! The runtime needs to store arenas for arbitrary user types `T: Value`
+//! under one lookup structure. We use the production pattern: a
+//! `HashMap<TypeId, Arc<dyn ErasedArena<C>>>`. The trait `ErasedArena<C>`
+//! is object-safe and provides downcast access to the concrete
+//! `GenericArena<T, C>`.
+//!
+//! Per-T access pattern:
+//! 1. Compute `TypeId::of::<T>()`.
+//! 2. Look up or insert `Arc<GenericArena<T, C>>` in the registry.
+//! 3. Clone the `Arc` out (cheap atomic refcount) and release the
+//!    registry lock.
+//! 4. Operate on the typed arena directly via the `Arc<GenericArena<T, C>>`.
+//!
+//! `Arc<dyn ErasedArena<C>>` requires `ErasedArena<C>: Send + Sync` so
+//! the registry itself can be `Send + Sync` (under `Shared`'s `RwLock`).
+//! Under `Local`, the `Send + Sync` bound is a zero-cost marker; the
+//! actual arena access is single-threaded.
+
+use std::any::{Any, TypeId};
+use std::collections::HashMap;
+use std::sync::Arc;
+
+use crate::cells::Cells;
+use crate::generic_arena::GenericArena;
+use crate::value::Value;
+
+/// Object-safe arena interface. The runtime stores arenas as
+/// `Arc<dyn ErasedArena<C>>` and downcasts via `as_any` to the concrete
+/// `GenericArena<T, C>`.
+pub trait ErasedArena<C: Cells>: Send + Sync + 'static {
+    fn as_any(&self) -> &dyn Any;
+}
+
+impl<T: Value, C: Cells> ErasedArena<C> for GenericArena<T, C> {
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+}
+
+/// `HashMap<TypeId, Arc<dyn ErasedArena<C>>>` wrapped in a struct for
+/// ergonomics. The runtime keeps one of these behind its inner lock.
+pub struct ArenaRegistry<C: Cells> {
+    arenas: HashMap<TypeId, Arc<dyn ErasedArena<C>>>,
+}
+
+impl<C: Cells> Default for ArenaRegistry<C> {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+impl<C: Cells> ArenaRegistry<C> {
+    pub fn new() -> Self {
+        Self {
+            arenas: HashMap::new(),
+        }
+    }
+
+    /// Look up the arena for `T`, inserting a fresh one if missing.
+    /// Returns an `Arc` to the typed arena; callers should clone the
+    /// `Arc` out and operate on the typed reference.
+    pub fn ensure_arena<T: Value>(&mut self) -> Arc<GenericArena<T, C>> {
+        let type_id = TypeId::of::<T>();
+        let erased = self
+            .arenas
+            .entry(type_id)
+            .or_insert_with(|| Arc::new(GenericArena::<T, C>::new()) as Arc<dyn ErasedArena<C>>)
+            .clone();
+        // Downcast from `dyn ErasedArena<C>` to `Arc<GenericArena<T, C>>`.
+        // Soundness: the entry was inserted with this exact T's TypeId,
+        // so the downcast is guaranteed to succeed.
+        downcast_arc::<T, C>(erased)
+            .expect("ArenaRegistry::ensure_arena downcast failed; TypeId/arena mismatch")
+    }
+
+    /// Look up the arena for `T` without inserting. Returns `None` if
+    /// no arena exists for this type yet.
+    pub fn try_arena<T: Value>(&self) -> Option<Arc<GenericArena<T, C>>> {
+        let type_id = TypeId::of::<T>();
+        let erased = self.arenas.get(&type_id)?.clone();
+        downcast_arc::<T, C>(erased)
+    }
+
+    pub fn arena_count(&self) -> usize {
+        self.arenas.len()
+    }
+}
+
+/// Downcast an `Arc<dyn ErasedArena<C>>` to `Arc<GenericArena<T, C>>`.
+/// Returns `None` if the runtime type does not match `T`.
+fn downcast_arc<T: Value, C: Cells>(
+    arena: Arc<dyn ErasedArena<C>>,
+) -> Option<Arc<GenericArena<T, C>>> {
+    if arena.as_any().is::<GenericArena<T, C>>() {
+        // SAFETY: we just checked the TypeId via `is::<GenericArena<T, C>>()`.
+        // The Arc's referent is exactly `GenericArena<T, C>`. We transmute
+        // the `Arc<dyn ErasedArena<C>>` into `Arc<GenericArena<T, C>>`
+        // via raw-pointer rewrap. This is the standard pattern for
+        // downcast-Arc; std::sync::Arc::downcast does the same.
+        let raw = Arc::into_raw(arena) as *const GenericArena<T, C>;
+        Some(unsafe { Arc::from_raw(raw) })
+    } else {
+        None
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::cells::{Local, Shared};
+
+    #[test]
+    fn local_registry_inserts_and_retrieves() {
+        let mut r: ArenaRegistry<Local> = ArenaRegistry::new();
+        assert_eq!(r.arena_count(), 0);
+        let a1 = r.ensure_arena::<u64>();
+        assert_eq!(r.arena_count(), 1);
+        let a2 = r.ensure_arena::<u64>();
+        assert_eq!(r.arena_count(), 1); // same TypeId, no new entry
+                                        // The two Arcs point at the same arena.
+        assert!(Arc::ptr_eq(&a1, &a2));
+
+        let s = a1.reserve_with(7);
+        assert_eq!(a2.read(s), 7);
+    }
+
+    #[test]
+    fn shared_registry_inserts_and_retrieves() {
+        let mut r: ArenaRegistry<Shared> = ArenaRegistry::new();
+        let a = r.ensure_arena::<String>();
+        let s = a.reserve_with("hello".to_string());
+        assert_eq!(a.read(s), "hello");
+    }
+
+    #[test]
+    fn different_types_get_different_arenas() {
+        let mut r: ArenaRegistry<Shared> = ArenaRegistry::new();
+        let _ = r.ensure_arena::<u64>();
+        let _ = r.ensure_arena::<String>();
+        assert_eq!(r.arena_count(), 2);
+    }
+
+    #[test]
+    fn try_arena_returns_none_when_missing() {
+        let r: ArenaRegistry<Shared> = ArenaRegistry::new();
+        assert!(r.try_arena::<u64>().is_none());
+    }
+}
diff --git a/crates/incr-core/src/cells.rs b/crates/incr-core/src/cells.rs
new file mode 100644
index 0000000..a09f541
--- /dev/null
+++ b/crates/incr-core/src/cells.rs
@@ -0,0 +1,367 @@
+//! `Cells`: the strategy trait that abstracts how scalar cells are
+//! synchronized. [`Local`] backs every cell with `std::cell::Cell`;
+//! [`Shared`] backs every cell with the matching atomic type and uses
+//! Acquire/Release for visibility transitions.
+//!
+//! All trait methods are `#[inline(always)]` and take `&Self::Cell` so the
+//! compiler can see through every call site and emit the same code it
+//! would for a direct field access. This monomorphization is the
+//! load-bearing invariant the spike validated: under `Local`, trait
+//! method calls produce byte-identical assembly to direct `Cell::get()`
+//! and `Cell::set()` operations; under `Shared` on x86, Acquire compiles
+//! to a plain `mov` with no fences.
+//!
+//! `compare_exchange` is exposed only on the state cell because that is
+//! the only place CAS is meaningful (one thread races to claim a node's
+//! `Computing` slot). Other integer cells are written under exclusive
+//! ownership granted by the state machine.
+
+use crate::dep_stack::{DepStack, LocalDepStack, SharedDepStack};
+use crate::locks::{LocalLock, Lock};
+use std::cell::Cell;
+use std::sync::atomic::{AtomicPtr, AtomicU32, AtomicU64, AtomicU8, Ordering};
+use std::sync::RwLock;
+
+/// Strategy trait selecting the synchronization primitives used by every
+/// cell in the engine. Implemented by [`Local`] and [`Shared`].
+///
+/// `'static` so cell types can be embedded in trait-object closures
+/// without lifetime gymnastics. `Sized` to allow associated-type
+/// constructors.
+/// Pointer cell trait. Implemented by `Cell<*mut T>` (Local) and
+/// `AtomicPtr<T>` (Shared). Used by the segmented node store and the
+/// overflow dep storage.
+pub trait PtrCell<T: 'static>: 'static {
+    fn new_null() -> Self;
+    fn new(p: *mut T) -> Self;
+    fn load_acquire(&self) -> *mut T;
+    fn store_release(&self, p: *mut T);
+    fn load_relaxed(&self) -> *mut T;
+    fn store_relaxed(&self, p: *mut T);
+}
+
+pub trait Cells: 'static + Sized {
+    type U8;
+    type U32;
+    type U64;
+    type State;
+    type Ptr<T: 'static>: PtrCell<T>;
+    type Lock<T: 'static>: Lock<T>;
+    type DepStack: DepStack;
+
+    fn new_u8(v: u8) -> Self::U8;
+    fn new_u32(v: u32) -> Self::U32;
+    fn new_u64(v: u64) -> Self::U64;
+    fn new_state(v: u8) -> Self::State;
+
+    fn u8_load_acquire(c: &Self::U8) -> u8;
+    fn u8_store_release(c: &Self::U8, v: u8);
+    fn u8_load_relaxed(c: &Self::U8) -> u8;
+    fn u8_store_relaxed(c: &Self::U8, v: u8);
+
+    fn u32_load_relaxed(c: &Self::U32) -> u32;
+    fn u32_store_relaxed(c: &Self::U32, v: u32);
+
+    fn u64_load_acquire(c: &Self::U64) -> u64;
+    fn u64_store_release(c: &Self::U64, v: u64);
+    fn u64_load_relaxed(c: &Self::U64) -> u64;
+    fn u64_store_relaxed(c: &Self::U64, v: u64);
+
+    fn state_load_acquire(c: &Self::State) -> u8;
+    fn state_store_release(c: &Self::State, v: u8);
+    fn state_try_transition(c: &Self::State, expected: u8, new: u8) -> Result<(), u8>;
+}
+
+/// Local pointer cell. Wraps `Cell<*mut T>`; not `Sync` (which is the
+/// correct property under the Local strategy).
+pub struct LocalPtrCell<T: 'static>(Cell<*mut T>);
+
+impl<T: 'static> PtrCell<T> for LocalPtrCell<T> {
+    #[inline(always)]
+    fn new_null() -> Self {
+        Self(Cell::new(std::ptr::null_mut()))
+    }
+    #[inline(always)]
+    fn new(p: *mut T) -> Self {
+        Self(Cell::new(p))
+    }
+    #[inline(always)]
+    fn load_acquire(&self) -> *mut T {
+        self.0.get()
+    }
+    #[inline(always)]
+    fn store_release(&self, p: *mut T) {
+        self.0.set(p);
+    }
+    #[inline(always)]
+    fn load_relaxed(&self) -> *mut T {
+        self.0.get()
+    }
+    #[inline(always)]
+    fn store_relaxed(&self, p: *mut T) {
+        self.0.set(p);
+    }
+}
+
+impl<T: 'static> PtrCell<T> for AtomicPtr<T> {
+    #[inline(always)]
+    fn new_null() -> Self {
+        AtomicPtr::new(std::ptr::null_mut())
+    }
+    #[inline(always)]
+    fn new(p: *mut T) -> Self {
+        AtomicPtr::new(p)
+    }
+    #[inline(always)]
+    fn load_acquire(&self) -> *mut T {
+        self.load(Ordering::Acquire)
+    }
+    #[inline(always)]
+    fn store_release(&self, p: *mut T) {
+        self.store(p, Ordering::Release);
+    }
+    #[inline(always)]
+    fn load_relaxed(&self) -> *mut T {
+        self.load(Ordering::Relaxed)
+    }
+    #[inline(always)]
+    fn store_relaxed(&self, p: *mut T) {
+        self.store(p, Ordering::Relaxed);
+    }
+}
+
+/// Single-threaded strategy. Backs every cell with `std::cell::Cell`.
+/// The resulting types are `!Sync` and the runtime built on top of
+/// `Local` is `!Send + !Sync` by composition.
+pub struct Local;
+
+/// Multi-threaded strategy. Backs every cell with the matching atomic
+/// type and uses Acquire/Release for state-visibility transitions.
+/// The resulting types are `Send + Sync`.
+pub struct Shared;
+
+impl Cells for Local {
+    type U8 = Cell<u8>;
+    type U32 = Cell<u32>;
+    type U64 = Cell<u64>;
+    type State = Cell<u8>;
+    type Ptr<T: 'static> = LocalPtrCell<T>;
+    type Lock<T: 'static> = LocalLock<T>;
+    type DepStack = LocalDepStack;
+
+    #[inline(always)]
+    fn new_u8(v: u8) -> Self::U8 {
+        Cell::new(v)
+    }
+    #[inline(always)]
+    fn new_u32(v: u32) -> Self::U32 {
+        Cell::new(v)
+    }
+    #[inline(always)]
+    fn new_u64(v: u64) -> Self::U64 {
+        Cell::new(v)
+    }
+    #[inline(always)]
+    fn new_state(v: u8) -> Self::State {
+        Cell::new(v)
+    }
+
+    #[inline(always)]
+    fn u8_load_acquire(c: &Self::U8) -> u8 {
+        c.get()
+    }
+    #[inline(always)]
+    fn u8_store_release(c: &Self::U8, v: u8) {
+        c.set(v);
+    }
+    #[inline(always)]
+    fn u8_load_relaxed(c: &Self::U8) -> u8 {
+        c.get()
+    }
+    #[inline(always)]
+    fn u8_store_relaxed(c: &Self::U8, v: u8) {
+        c.set(v);
+    }
+
+    #[inline(always)]
+    fn u32_load_relaxed(c: &Self::U32) -> u32 {
+        c.get()
+    }
+    #[inline(always)]
+    fn u32_store_relaxed(c: &Self::U32, v: u32) {
+        c.set(v);
+    }
+
+    #[inline(always)]
+    fn u64_load_acquire(c: &Self::U64) -> u64 {
+        c.get()
+    }
+    #[inline(always)]
+    fn u64_store_release(c: &Self::U64, v: u64) {
+        c.set(v);
+    }
+    #[inline(always)]
+    fn u64_load_relaxed(c: &Self::U64) -> u64 {
+        c.get()
+    }
+    #[inline(always)]
+    fn u64_store_relaxed(c: &Self::U64, v: u64) {
+        c.set(v);
+    }
+
+    #[inline(always)]
+    fn state_load_acquire(c: &Self::State) -> u8 {
+        c.get()
+    }
+    #[inline(always)]
+    fn state_store_release(c: &Self::State, v: u8) {
+        c.set(v);
+    }
+    #[inline(always)]
+    fn state_try_transition(c: &Self::State, expected: u8, new: u8) -> Result<(), u8> {
+        let cur = c.get();
+        if cur == expected {
+            c.set(new);
+            Ok(())
+        } else {
+            Err(cur)
+        }
+    }
+}
+
+impl Cells for Shared {
+    type U8 = AtomicU8;
+    type U32 = AtomicU32;
+    type U64 = AtomicU64;
+    type State = AtomicU8;
+    type Ptr<T: 'static> = AtomicPtr<T>;
+    type Lock<T: 'static> = RwLock<T>;
+    type DepStack = SharedDepStack;
+
+    #[inline(always)]
+    fn new_u8(v: u8) -> Self::U8 {
+        AtomicU8::new(v)
+    }
+    #[inline(always)]
+    fn new_u32(v: u32) -> Self::U32 {
+        AtomicU32::new(v)
+    }
+    #[inline(always)]
+    fn new_u64(v: u64) -> Self::U64 {
+        AtomicU64::new(v)
+    }
+    #[inline(always)]
+    fn new_state(v: u8) -> Self::State {
+        AtomicU8::new(v)
+    }
+
+    #[inline(always)]
+    fn u8_load_acquire(c: &Self::U8) -> u8 {
+        c.load(Ordering::Acquire)
+    }
+    #[inline(always)]
+    fn u8_store_release(c: &Self::U8, v: u8) {
+        c.store(v, Ordering::Release);
+    }
+    #[inline(always)]
+    fn u8_load_relaxed(c: &Self::U8) -> u8 {
+        c.load(Ordering::Relaxed)
+    }
+    #[inline(always)]
+    fn u8_store_relaxed(c: &Self::U8, v: u8) {
+        c.store(v, Ordering::Relaxed);
+    }
+
+    #[inline(always)]
+    fn u32_load_relaxed(c: &Self::U32) -> u32 {
+        c.load(Ordering::Relaxed)
+    }
+    #[inline(always)]
+    fn u32_store_relaxed(c: &Self::U32, v: u32) {
+        c.store(v, Ordering::Relaxed);
+    }
+
+    #[inline(always)]
+    fn u64_load_acquire(c: &Self::U64) -> u64 {
+        c.load(Ordering::Acquire)
+    }
+    #[inline(always)]
+    fn u64_store_release(c: &Self::U64, v: u64) {
+        c.store(v, Ordering::Release);
+    }
+    #[inline(always)]
+    fn u64_load_relaxed(c: &Self::U64) -> u64 {
+        c.load(Ordering::Relaxed)
+    }
+    #[inline(always)]
+    fn u64_store_relaxed(c: &Self::U64, v: u64) {
+        c.store(v, Ordering::Relaxed);
+    }
+
+    #[inline(always)]
+    fn state_load_acquire(c: &Self::State) -> u8 {
+        c.load(Ordering::Acquire)
+    }
+    #[inline(always)]
+    fn state_store_release(c: &Self::State, v: u8) {
+        c.store(v, Ordering::Release);
+    }
+    #[inline(always)]
+    fn state_try_transition(c: &Self::State, expected: u8, new: u8) -> Result<(), u8> {
+        match c.compare_exchange(expected, new, Ordering::AcqRel, Ordering::Acquire) {
+            Ok(_) => Ok(()),
+            Err(observed) => Err(observed),
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn local_roundtrips() {
+        let c = Local::new_u64(7);
+        assert_eq!(Local::u64_load_acquire(&c), 7);
+        Local::u64_store_release(&c, 11);
+        assert_eq!(Local::u64_load_acquire(&c), 11);
+    }
+
+    #[test]
+    fn shared_roundtrips() {
+        let c = Shared::new_u64(7);
+        assert_eq!(Shared::u64_load_acquire(&c), 7);
+        Shared::u64_store_release(&c, 11);
+        assert_eq!(Shared::u64_load_acquire(&c), 11);
+    }
+
+    #[test]
+    fn local_state_cas() {
+        let s = Local::new_state(1);
+        assert_eq!(Local::state_try_transition(&s, 1, 2), Ok(()));
+        assert_eq!(Local::state_try_transition(&s, 1, 3), Err(2));
+    }
+
+    #[test]
+    fn shared_state_cas() {
+        let s = Shared::new_state(1);
+        assert_eq!(Shared::state_try_transition(&s, 1, 2), Ok(()));
+        assert_eq!(Shared::state_try_transition(&s, 1, 3), Err(2));
+    }
+
+    #[test]
+    fn cell_sizes_match_atomic_sizes() {
+        assert_eq!(
+            std::mem::size_of::<<Local as Cells>::U64>(),
+            std::mem::size_of::<<Shared as Cells>::U64>(),
+        );
+        assert_eq!(
+            std::mem::size_of::<<Local as Cells>::U32>(),
+            std::mem::size_of::<<Shared as Cells>::U32>(),
+        );
+        assert_eq!(
+            std::mem::size_of::<<Local as Cells>::U8>(),
+            std::mem::size_of::<<Shared as Cells>::U8>(),
+        );
+    }
+}
diff --git a/crates/incr-core/src/collection.rs b/crates/incr-core/src/collection.rs
new file mode 100644
index 0000000..9e274f1
--- /dev/null
+++ b/crates/incr-core/src/collection.rs
@@ -0,0 +1,917 @@
+//! `IncrCollection<T, C>`: incremental collection with delta-log propagation.
+//!
+//! Each collection holds an append-only log of inserts and deletes plus an
+//! `Incr<u64>` version node. Operators (filter, map, count, reduce) are
+//! query closures that scan new deltas since their last evaluation index
+//! and update their own state incrementally.
+//!
+//! Storage layout per collection:
+//! - `log`: `Arc<C::Lock<CollectionLog<T>>>`, shared across operator
+//!   closures that read from this collection.
+//! - `version_node`: `Incr<u64>` input node. Bumped on every successful
+//!   insert/delete; downstream queries depend on it through `rt.get`.
+//!
+//! Operator pattern:
+//! 1. Capture clones of `upstream_log`, `upstream_version_node`, and a
+//!    fresh `last_idx: AtomicUsize` (read-from-upstream cursor).
+//! 2. Inside the query, call `rt.get(upstream_version_node)` so the
+//!    runtime tracks the version dep.
+//! 3. Read the log, scan `deltas[last_idx..]`, process each, advance the
+//!    cursor.
+//! 4. For filter/map, also push into the operator's own collection log
+//!    and bump the output version. For count/reduce, return the
+//!    aggregated value directly.
+//!
+//! This first slice covers filter, map, count, and reduce. sort_by_key,
+//! pairwise, group_by, join, and window land in the next slice (they
+//! need additional sorted-collection machinery).
+
+use std::collections::HashMap;
+use std::hash::Hash;
+use std::sync::{Arc, RwLock};
+
+use crate::cells::Cells;
+use crate::handle::Incr;
+use crate::runtime::Runtime;
+use crate::value::Value;
+
+/// One delta event in a collection log.
+#[derive(Clone, Debug, PartialEq, Eq)]
+pub enum Delta<T> {
+    Insert(T),
+    Delete(T),
+}
+
+/// Append-only delta log + multiset bookkeeping for a single collection.
+///
+/// `deltas` is the source of truth that operators scan. `elements` is the
+/// multiset that lets us validate deletes (no-op if element not present)
+/// and supports the `elements_vec()` convenience. `version` is the
+/// monotonic counter bumped on every accepted insert/delete; it's the
+/// value the `version_node` carries to downstream queries.
+pub struct CollectionLog<T: Hash + Eq + Clone> {
+    pub(crate) deltas: Vec<Delta<T>>,
+    pub(crate) elements: HashMap<T, usize>,
+    pub(crate) version: u64,
+}
+
+impl<T: Hash + Eq + Clone> CollectionLog<T> {
+    pub fn new() -> Self {
+        Self {
+            deltas: Vec::new(),
+            elements: HashMap::new(),
+            version: 0,
+        }
+    }
+
+    /// Insert `value`. Always accepted; multiset count for the element
+    /// is incremented. Returns the new version.
+    pub fn insert(&mut self, value: T) -> u64 {
+        *self.elements.entry(value.clone()).or_insert(0) += 1;
+        self.deltas.push(Delta::Insert(value));
+        self.version = self
+            .version
+            .checked_add(1)
+            .expect("CollectionLog version overflow");
+        self.version
+    }
+
+    /// Delete one occurrence of `value`. Returns `Some(new_version)` if
+    /// the element was present and a delete was recorded; `None` if the
+    /// element was not in the collection (no delta recorded).
+    pub fn delete(&mut self, value: &T) -> Option<u64> {
+        let count = self.elements.get_mut(value)?;
+        *count -= 1;
+        if *count == 0 {
+            self.elements.remove(value);
+        }
+        self.deltas.push(Delta::Delete(value.clone()));
+        self.version = self
+            .version
+            .checked_add(1)
+            .expect("CollectionLog version overflow");
+        Some(self.version)
+    }
+
+    /// Snapshot of all live elements, with multiset duplicates expanded.
+    pub fn elements_vec(&self) -> Vec<T> {
+        let mut out = Vec::with_capacity(self.elements.values().sum());
+        for (val, &count) in &self.elements {
+            for _ in 0..count {
+                out.push(val.clone());
+            }
+        }
+        out
+    }
+}
+
+impl<T: Hash + Eq + Clone> Default for CollectionLog<T> {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+/// Public collection handle. Cheap to clone (Arc + Copy handle).
+///
+/// The log uses `std::sync::RwLock` rather than `C::Lock` so the same
+/// type works under both strategies. Under `Local`, this costs one
+/// uncontended RwLock acquire per collection op (~5 ns); the alternative
+/// would be to thread an `unsafe impl Sync` through `LocalLock` to make
+/// it shareable inside Send+Sync compute closures, which would be a
+/// footgun for unrelated uses of `LocalLock`. Uniformity wins; the
+/// 5 ns per insert/delete is invisible against the rest of the runtime.
+pub struct IncrCollection<T: Value + Hash + Eq, C: Cells> {
+    pub(crate) log: Arc<RwLock<CollectionLog<T>>>,
+    pub(crate) version_node: Incr<u64>,
+    pub(crate) _phantom: std::marker::PhantomData<fn() -> C>,
+}
+
+impl<T: Value + Hash + Eq, C: Cells> Clone for IncrCollection<T, C> {
+    fn clone(&self) -> Self {
+        Self {
+            log: Arc::clone(&self.log),
+            version_node: self.version_node,
+            _phantom: std::marker::PhantomData,
+        }
+    }
+}
+
+impl<T: Value + Hash + Eq, C: Cells> IncrCollection<T, C> {
+    pub(crate) fn new(rt: &Runtime<C>) -> Self {
+        Self {
+            log: Arc::new(RwLock::new(CollectionLog::new())),
+            version_node: rt.create_input(0_u64),
+            _phantom: std::marker::PhantomData,
+        }
+    }
+
+    /// Internal: create a collection from inside a compute closure (used
+    /// by `group_by` for lazy sub-collection creation). Skips the
+    /// dep-stack-empty check; the caller is responsible for ensuring
+    /// the new version_node is not implicitly a dep of the current
+    /// compute.
+    pub(crate) fn new_in_compute(rt: &Runtime<C>) -> Self {
+        Self {
+            log: Arc::new(RwLock::new(CollectionLog::new())),
+            version_node: rt.create_input_unchecked(0_u64),
+            _phantom: std::marker::PhantomData,
+        }
+    }
+
+    /// Public accessor for the collection's version node. Useful when a
+    /// user query wants to depend on the collection without going through
+    /// an operator.
+    pub fn version_node(&self) -> Incr<u64> {
+        self.version_node
+    }
+
+    /// Insert a value. Bumps the underlying log version and notifies
+    /// downstream queries by setting `version_node`.
+    pub fn insert(&self, rt: &Runtime<C>, value: T) {
+        let new_version = self
+            .log
+            .write()
+            .expect("collection log poisoned")
+            .insert(value);
+        rt.set(self.version_node, new_version);
+    }
+
+    /// Delete one occurrence. No-op (no log delta, no version bump) if
+    /// the value was not present. Returns whether a delete was recorded.
+    pub fn delete(&self, rt: &Runtime<C>, value: &T) -> bool {
+        let new_version = self
+            .log
+            .write()
+            .expect("collection log poisoned")
+            .delete(value);
+        match new_version {
+            Some(v) => {
+                rt.set(self.version_node, v);
+                true
+            }
+            None => false,
+        }
+    }
+
+    /// Number of live elements (with multiset duplicates counted).
+    pub fn snapshot_len(&self) -> usize {
+        self.log
+            .read()
+            .expect("collection log poisoned")
+            .elements
+            .values()
+            .sum()
+    }
+}
+
+impl<C: Cells> Runtime<C> {
+    /// Create a fresh empty collection in this runtime.
+    pub fn create_collection<T: Value + Hash + Eq>(&self) -> IncrCollection<T, C> {
+        IncrCollection::new(self)
+    }
+}
+
+impl<T, C> IncrCollection<T, C>
+where
+    T: Value + Hash + Eq,
+    C: Cells,
+{
+    /// Filter: keep elements for which `pred(&t)` is true. Returns a new
+    /// collection containing the filtered subset, propagated incrementally.
+    ///
+    /// The returned collection's `version_node` is a query node that, when
+    /// observed, scans new upstream deltas, applies the predicate, and
+    /// updates the output log. Calling `insert` or `delete` on a derived
+    /// collection is not supported (it would set a query node directly,
+    /// bypassing the operator and corrupting the state machine); this
+    /// constraint is documented and will be enforced by a runtime check
+    /// in the API-cleanup slice.
+    pub fn filter<F>(&self, rt: &Runtime<C>, pred: F) -> IncrCollection<T, C>
+    where
+        F: Fn(&T) -> bool + Send + Sync + 'static,
+    {
+        use std::sync::atomic::{AtomicUsize, Ordering};
+
+        let upstream_log = Arc::clone(&self.log);
+        let upstream_version = self.version_node;
+        let last_idx = Arc::new(AtomicUsize::new(0));
+
+        let output_log: Arc<RwLock<CollectionLog<T>>> = Arc::new(RwLock::new(CollectionLog::new()));
+        let output_log_for_query = Arc::clone(&output_log);
+
+        let version_node = rt.create_query(move |rt| -> u64 {
+            let _uv = rt.get(upstream_version);
+
+            let upstream = upstream_log.read().expect("collection log poisoned");
+            let start = last_idx.load(Ordering::Relaxed);
+            if start >= upstream.deltas.len() {
+                return output_log_for_query
+                    .read()
+                    .expect("collection log poisoned")
+                    .version;
+            }
+
+            let mut out = output_log_for_query
+                .write()
+                .expect("collection log poisoned");
+            for delta in &upstream.deltas[start..] {
+                match delta {
+                    Delta::Insert(v) => {
+                        if pred(v) {
+                            out.insert(v.clone());
+                        }
+                    }
+                    Delta::Delete(v) => {
+                        if pred(v) {
+                            out.delete(v);
+                        }
+                    }
+                }
+            }
+            last_idx.store(upstream.deltas.len(), Ordering::Relaxed);
+            out.version
+        });
+
+        IncrCollection {
+            log: output_log,
+            version_node,
+            _phantom: std::marker::PhantomData,
+        }
+    }
+
+    /// Map: transform every element via `f`. Returns a new collection.
+    ///
+    /// The output collection's `version_node` is a query node; same
+    /// derived-collection constraints as `filter`.
+    pub fn map<U, F>(&self, rt: &Runtime<C>, f: F) -> IncrCollection<U, C>
+    where
+        U: Value + Hash + Eq,
+        F: Fn(&T) -> U + Send + Sync + 'static,
+    {
+        use std::sync::atomic::{AtomicUsize, Ordering};
+
+        let upstream_log = Arc::clone(&self.log);
+        let upstream_version = self.version_node;
+        let last_idx = Arc::new(AtomicUsize::new(0));
+
+        let output_log: Arc<RwLock<CollectionLog<U>>> = Arc::new(RwLock::new(CollectionLog::new()));
+        let output_log_for_query = Arc::clone(&output_log);
+
+        let version_node = rt.create_query(move |rt| -> u64 {
+            let _uv = rt.get(upstream_version);
+
+            let upstream = upstream_log.read().expect("collection log poisoned");
+            let start = last_idx.load(Ordering::Relaxed);
+            if start >= upstream.deltas.len() {
+                return output_log_for_query
+                    .read()
+                    .expect("collection log poisoned")
+                    .version;
+            }
+
+            let mut out = output_log_for_query
+                .write()
+                .expect("collection log poisoned");
+            for delta in &upstream.deltas[start..] {
+                match delta {
+                    Delta::Insert(v) => {
+                        let mapped = f(v);
+                        out.insert(mapped);
+                    }
+                    Delta::Delete(v) => {
+                        let mapped = f(v);
+                        out.delete(&mapped);
+                    }
+                }
+            }
+            last_idx.store(upstream.deltas.len(), Ordering::Relaxed);
+            out.version
+        });
+
+        IncrCollection {
+            log: output_log,
+            version_node,
+            _phantom: std::marker::PhantomData,
+        }
+    }
+
+    /// Count: number of live elements as an `Incr<u64>`. Maintains a
+    /// running tally incrementally from upstream deltas; O(new deltas)
+    /// per get rather than O(N) sum over the multiset.
+    pub fn count(&self, rt: &Runtime<C>) -> Incr<u64> {
+        use std::sync::atomic::{AtomicI64, AtomicUsize, Ordering as MemOrdering};
+
+        let upstream_log = Arc::clone(&self.log);
+        let upstream_version = self.version_node;
+        let last_idx = Arc::new(AtomicUsize::new(0));
+        // Use signed running count so a stray Delete-of-absent that
+        // somehow leaks through doesn't underflow. Cast to u64 on read.
+        let running = Arc::new(AtomicI64::new(0));
+        let running_for_query = Arc::clone(&running);
+
+        rt.create_query(move |rt| -> u64 {
+            let _uv = rt.get(upstream_version);
+            let log = upstream_log.read().expect("collection log poisoned");
+            let start = last_idx.load(MemOrdering::Relaxed);
+            if start < log.deltas.len() {
+                let mut delta = 0_i64;
+                for d in &log.deltas[start..] {
+                    match d {
+                        Delta::Insert(_) => delta += 1,
+                        Delta::Delete(_) => delta -= 1,
+                    }
+                }
+                running_for_query.fetch_add(delta, MemOrdering::Relaxed);
+                last_idx.store(log.deltas.len(), MemOrdering::Relaxed);
+            }
+            running_for_query.load(MemOrdering::Relaxed).max(0) as u64
+        })
+    }
+
+    /// Reduce: fold all live elements through `fold_fn`. The fold runs
+    /// over a snapshot of the collection on every change. This is the
+    /// production semantics (reduce isn't truly incremental); a future
+    /// incremental-reduce variant could maintain running aggregates.
+    pub fn reduce<U, F>(&self, rt: &Runtime<C>, fold_fn: F) -> Incr<U>
+    where
+        U: Value,
+        F: Fn(&[T]) -> U + Send + Sync + 'static,
+    {
+        let log = Arc::clone(&self.log);
+        let upstream_version = self.version_node;
+
+        rt.create_query(move |rt| -> U {
+            let _uv = rt.get(upstream_version);
+            let elements = log.read().expect("collection log poisoned").elements_vec();
+            fold_fn(&elements)
+        })
+    }
+
+    /// Join with another collection on a shared key. Emits the
+    /// cross-product of matching elements as `(T, U)` pairs. Pairs are
+    /// added and removed incrementally as upstream deltas arrive on
+    /// either side.
+    ///
+    /// Both sides maintain a `HashMap<K, Vec<...>>` index keyed by the
+    /// extracted key, plus a per-element key cache so deletes route to
+    /// the correct bucket. When a new element arrives on one side, we
+    /// look up the matching bucket on the other side and emit pairs.
+    /// When an element is deleted, we walk the same bucket and emit
+    /// corresponding pair removals.
+    pub fn join<U, K, FL, FR>(
+        &self,
+        rt: &Runtime<C>,
+        right: &IncrCollection<U, C>,
+        left_key: FL,
+        right_key: FR,
+    ) -> IncrCollection<(T, U), C>
+    where
+        U: Value + Hash + Eq,
+        K: Clone + PartialEq + Eq + Hash + Send + Sync + 'static,
+        FL: Fn(&T) -> K + Send + Sync + 'static,
+        FR: Fn(&U) -> K + Send + Sync + 'static,
+    {
+        use std::sync::atomic::{AtomicUsize, Ordering as MemOrdering};
+
+        let left_log = Arc::clone(&self.log);
+        let right_log = Arc::clone(&right.log);
+        let left_version = self.version_node;
+        let right_version = right.version_node;
+        let left_last = Arc::new(AtomicUsize::new(0));
+        let right_last = Arc::new(AtomicUsize::new(0));
+
+        let left_index: Arc<RwLock<HashMap<K, Vec<T>>>> = Arc::new(RwLock::new(HashMap::new()));
+        let right_index: Arc<RwLock<HashMap<K, Vec<U>>>> = Arc::new(RwLock::new(HashMap::new()));
+        let left_key_cache: Arc<RwLock<HashMap<T, K>>> = Arc::new(RwLock::new(HashMap::new()));
+        let right_key_cache: Arc<RwLock<HashMap<U, K>>> = Arc::new(RwLock::new(HashMap::new()));
+
+        let li_for_query = Arc::clone(&left_index);
+        let ri_for_query = Arc::clone(&right_index);
+        let lkc_for_query = Arc::clone(&left_key_cache);
+        let rkc_for_query = Arc::clone(&right_key_cache);
+
+        let output_log: Arc<RwLock<CollectionLog<(T, U)>>> =
+            Arc::new(RwLock::new(CollectionLog::new()));
+        let output_log_for_query = Arc::clone(&output_log);
+
+        let version_node = rt.create_query(move |rt| -> u64 {
+            let _lv = rt.get(left_version);
+            let _rv = rt.get(right_version);
+
+            let left = left_log.read().expect("collection log poisoned");
+            let right = right_log.read().expect("collection log poisoned");
+            let l_start = left_last.load(MemOrdering::Relaxed);
+            let r_start = right_last.load(MemOrdering::Relaxed);
+
+            if l_start >= left.deltas.len() && r_start >= right.deltas.len() {
+                return output_log_for_query
+                    .read()
+                    .expect("collection log poisoned")
+                    .version;
+            }
+
+            let mut li = li_for_query.write().expect("join index poisoned");
+            let mut ri = ri_for_query.write().expect("join index poisoned");
+            let mut lkc = lkc_for_query.write().expect("key cache poisoned");
+            let mut rkc = rkc_for_query.write().expect("key cache poisoned");
+            let mut out = output_log_for_query
+                .write()
+                .expect("collection log poisoned");
+
+            // Process left-side deltas: update left index + key cache,
+            // then emit pairs with all matching right-side elements.
+            for delta in &left.deltas[l_start..] {
+                match delta {
+                    Delta::Insert(v) => {
+                        let k = left_key(v);
+                        lkc.insert(v.clone(), k.clone());
+                        li.entry(k.clone()).or_default().push(v.clone());
+                        if let Some(matches) = ri.get(&k) {
+                            for r in matches {
+                                out.insert((v.clone(), r.clone()));
+                            }
+                        }
+                    }
+                    Delta::Delete(v) => {
+                        if let Some(k) = lkc.remove(v) {
+                            if let Some(bucket) = li.get_mut(&k) {
+                                if let Some(pos) = bucket.iter().position(|x| x == v) {
+                                    bucket.remove(pos);
+                                }
+                                if bucket.is_empty() {
+                                    li.remove(&k);
+                                }
+                            }
+                            if let Some(matches) = ri.get(&k) {
+                                for r in matches {
+                                    out.delete(&(v.clone(), r.clone()));
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+            left_last.store(left.deltas.len(), MemOrdering::Relaxed);
+
+            // Right side, symmetric.
+            for delta in &right.deltas[r_start..] {
+                match delta {
+                    Delta::Insert(u) => {
+                        let k = right_key(u);
+                        rkc.insert(u.clone(), k.clone());
+                        ri.entry(k.clone()).or_default().push(u.clone());
+                        if let Some(matches) = li.get(&k) {
+                            for l in matches {
+                                out.insert((l.clone(), u.clone()));
+                            }
+                        }
+                    }
+                    Delta::Delete(u) => {
+                        if let Some(k) = rkc.remove(u) {
+                            if let Some(bucket) = ri.get_mut(&k) {
+                                if let Some(pos) = bucket.iter().position(|x| x == u) {
+                                    bucket.remove(pos);
+                                }
+                                if bucket.is_empty() {
+                                    ri.remove(&k);
+                                }
+                            }
+                            if let Some(matches) = li.get(&k) {
+                                for l in matches {
+                                    out.delete(&(l.clone(), u.clone()));
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+            right_last.store(right.deltas.len(), MemOrdering::Relaxed);
+
+            out.version
+        });
+
+        IncrCollection {
+            log: output_log,
+            version_node,
+            _phantom: std::marker::PhantomData,
+        }
+    }
+
+    /// Group by an extracted key. Returns a `GroupedCollection<K, T, C>`
+    /// holding one [`IncrCollection<T, C>`] per encountered key. Each
+    /// sub-collection is populated incrementally as upstream deltas
+    /// arrive: an Insert routes to the group keyed by `key_fn(&value)`,
+    /// a Delete removes from the same group.
+    ///
+    /// Sub-collections are created lazily the first time a key is seen
+    /// (via `create_input_unchecked` since the operator runs inside a
+    /// compute closure). Their version_nodes are inputs, so users can
+    /// continue to compose operators on per-group collections.
+    pub fn group_by<K, F>(&self, rt: &Runtime<C>, key_fn: F) -> GroupedCollection<K, T, C>
+    where
+        K: Clone + PartialEq + Eq + Hash + Send + Sync + 'static,
+        F: Fn(&T) -> K + Send + Sync + 'static,
+    {
+        use std::sync::atomic::{AtomicUsize, Ordering as MemOrdering};
+
+        let upstream_log = Arc::clone(&self.log);
+        let upstream_version = self.version_node;
+        let last_idx = Arc::new(AtomicUsize::new(0));
+
+        let groups: Arc<RwLock<HashMap<K, IncrCollection<T, C>>>> =
+            Arc::new(RwLock::new(HashMap::new()));
+        let groups_for_query = Arc::clone(&groups);
+
+        // Maps elements to the key they were inserted under, so a Delete
+        // for the same value reaches the right group even if the key
+        // function is expensive or non-deterministic across calls.
+        let key_cache: Arc<RwLock<HashMap<T, K>>> = Arc::new(RwLock::new(HashMap::new()));
+        let key_cache_for_query = Arc::clone(&key_cache);
+
+        let output_version_counter = Arc::new(std::sync::atomic::AtomicU64::new(0));
+        let output_version_counter_for_query = Arc::clone(&output_version_counter);
+
+        let version_node = rt.create_query(move |rt| -> u64 {
+            let _uv = rt.get(upstream_version);
+
+            let upstream = upstream_log.read().expect("collection log poisoned");
+            let start = last_idx.load(MemOrdering::Relaxed);
+            if start >= upstream.deltas.len() {
+                return output_version_counter_for_query.load(MemOrdering::Relaxed);
+            }
+
+            let mut grps = groups_for_query.write().expect("grouped state poisoned");
+            let mut kc = key_cache_for_query.write().expect("key cache poisoned");
+
+            for delta in &upstream.deltas[start..] {
+                match delta {
+                    Delta::Insert(v) => {
+                        let k = key_fn(v);
+                        kc.insert(v.clone(), k.clone());
+                        let group = grps
+                            .entry(k)
+                            .or_insert_with(|| IncrCollection::<T, C>::new_in_compute(rt));
+                        let new_ver = group
+                            .log
+                            .write()
+                            .expect("collection log poisoned")
+                            .insert(v.clone());
+                        rt.set(group.version_node, new_ver);
+                    }
+                    Delta::Delete(v) => {
+                        if let Some(k) = kc.remove(v) {
+                            if let Some(group) = grps.get(&k) {
+                                let new_ver = group
+                                    .log
+                                    .write()
+                                    .expect("collection log poisoned")
+                                    .delete(v);
+                                if let Some(ver) = new_ver {
+                                    rt.set(group.version_node, ver);
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+            last_idx.store(upstream.deltas.len(), MemOrdering::Relaxed);
+            output_version_counter_for_query.fetch_add(1, MemOrdering::Relaxed) + 1
+        });
+
+        GroupedCollection {
+            groups,
+            version_node,
+            _phantom: std::marker::PhantomData,
+        }
+    }
+}
+
+/// Collection partitioned by key. Each key maps to an [`IncrCollection<T, C>`]
+/// containing only the elements that belong to that key.
+///
+/// `version_node` bumps whenever any group changes; downstream queries
+/// can depend on it to be notified of any group-level change. To depend
+/// on a specific group, use `get_group(&k)` and then depend on that
+/// sub-collection's version_node directly.
+pub struct GroupedCollection<K, T, C>
+where
+    K: Clone + PartialEq + Eq + Hash + Send + Sync + 'static,
+    T: Value + Hash + Eq,
+    C: Cells,
+{
+    pub(crate) groups: Arc<RwLock<HashMap<K, IncrCollection<T, C>>>>,
+    pub(crate) version_node: Incr<u64>,
+    pub(crate) _phantom: std::marker::PhantomData<fn() -> C>,
+}
+
+impl<K, T, C> Clone for GroupedCollection<K, T, C>
+where
+    K: Clone + PartialEq + Eq + Hash + Send + Sync + 'static,
+    T: Value + Hash + Eq,
+    C: Cells,
+{
+    fn clone(&self) -> Self {
+        Self {
+            groups: Arc::clone(&self.groups),
+            version_node: self.version_node,
+            _phantom: std::marker::PhantomData,
+        }
+    }
+}
+
+impl<K, T, C> GroupedCollection<K, T, C>
+where
+    K: Clone + PartialEq + Eq + Hash + Send + Sync + 'static,
+    T: Value + Hash + Eq,
+    C: Cells,
+{
+    pub fn version_node(&self) -> Incr<u64> {
+        self.version_node
+    }
+
+    pub fn keys(&self) -> Vec<K> {
+        self.groups
+            .read()
+            .expect("grouped state poisoned")
+            .keys()
+            .cloned()
+            .collect()
+    }
+
+    pub fn get_group(&self, key: &K) -> Option<IncrCollection<T, C>> {
+        self.groups
+            .read()
+            .expect("grouped state poisoned")
+            .get(key)
+            .cloned()
+    }
+
+    pub fn group_count(&self) -> usize {
+        self.groups.read().expect("grouped state poisoned").len()
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::cells::{Local, Shared};
+
+    #[test]
+    fn local_collection_basic_insert() {
+        let rt: Runtime<Local> = Runtime::new();
+        let c = rt.create_collection::<i64>();
+        c.insert(&rt, 10);
+        c.insert(&rt, 20);
+        c.insert(&rt, 30);
+        assert_eq!(c.snapshot_len(), 3);
+    }
+
+    #[test]
+    fn shared_collection_basic_insert() {
+        let rt: Runtime<Shared> = Runtime::new();
+        let c = rt.create_collection::<i64>();
+        c.insert(&rt, 10);
+        c.insert(&rt, 20);
+        c.insert(&rt, 30);
+        assert_eq!(c.snapshot_len(), 3);
+    }
+
+    #[test]
+    fn local_count_basic() {
+        let rt: Runtime<Local> = Runtime::new();
+        let c = rt.create_collection::<i64>();
+        let n = c.count(&rt);
+        assert_eq!(rt.get(n), 0);
+        c.insert(&rt, 5);
+        c.insert(&rt, 7);
+        assert_eq!(rt.get(n), 2);
+        c.delete(&rt, &5);
+        assert_eq!(rt.get(n), 1);
+    }
+
+    #[test]
+    fn shared_count_basic() {
+        let rt: Runtime<Shared> = Runtime::new();
+        let c = rt.create_collection::<i64>();
+        let n = c.count(&rt);
+        assert_eq!(rt.get(n), 0);
+        c.insert(&rt, 5);
+        c.insert(&rt, 7);
+        assert_eq!(rt.get(n), 2);
+        c.delete(&rt, &5);
+        assert_eq!(rt.get(n), 1);
+    }
+
+    #[test]
+    fn local_filter_count() {
+        let rt: Runtime<Local> = Runtime::new();
+        let c = rt.create_collection::<i64>();
+        let evens = c.filter(&rt, |x| x % 2 == 0);
+        let n_evens = evens.count(&rt);
+        for i in 1..=10 {
+            c.insert(&rt, i);
+        }
+        assert_eq!(rt.get(n_evens), 5); // 2, 4, 6, 8, 10
+    }
+
+    #[test]
+    fn shared_filter_count() {
+        let rt: Runtime<Shared> = Runtime::new();
+        let c = rt.create_collection::<i64>();
+        let evens = c.filter(&rt, |x| x % 2 == 0);
+        let n_evens = evens.count(&rt);
+        for i in 1..=10 {
+            c.insert(&rt, i);
+        }
+        assert_eq!(rt.get(n_evens), 5);
+    }
+
+    #[test]
+    fn local_map_then_reduce_sum() {
+        let rt: Runtime<Local> = Runtime::new();
+        let c = rt.create_collection::<i64>();
+        let doubled = c.map(&rt, |x| x * 2);
+        let total = doubled.reduce(&rt, |xs| xs.iter().sum::<i64>());
+        for i in 1..=5 {
+            c.insert(&rt, i);
+        }
+        // doubled = [2, 4, 6, 8, 10] → sum 30
+        assert_eq!(rt.get(total), 30);
+    }
+
+    #[test]
+    fn shared_filter_map_reduce_pipeline() {
+        let rt: Runtime<Shared> = Runtime::new();
+        let scores = rt.create_collection::<i64>();
+        let passing = scores.filter(&rt, |s| *s >= 50);
+        let curved = passing.map(&rt, |s| s + 10);
+        let total = curved.reduce(&rt, |xs| xs.iter().sum::<i64>());
+        scores.insert(&rt, 80);
+        scores.insert(&rt, 95);
+        scores.insert(&rt, 60);
+        scores.insert(&rt, 42);
+        // passing = [80, 95, 60] → curved = [90, 105, 70] → sum 265
+        // Note: the production test uses 255 because it sums 90 + 105 + 60 (no map),
+        // but we do 90 + 105 + 70 = 265 because curve adds 10 to each passing.
+        assert_eq!(rt.get(total), 265);
+    }
+
+    #[test]
+    fn local_incremental_insert_only_changes_count() {
+        let rt: Runtime<Local> = Runtime::new();
+        let c = rt.create_collection::<i64>();
+        let n = c.count(&rt);
+        for i in 0..100 {
+            c.insert(&rt, i);
+        }
+        assert_eq!(rt.get(n), 100);
+        c.insert(&rt, 999);
+        assert_eq!(rt.get(n), 101);
+    }
+
+    #[test]
+    fn local_group_by_partitions() {
+        let rt: Runtime<Local> = Runtime::new();
+        let c = rt.create_collection::<i64>();
+        let groups = c.group_by(&rt, |x| x % 3);
+        c.insert(&rt, 1);
+        c.insert(&rt, 2);
+        c.insert(&rt, 3);
+        c.insert(&rt, 4);
+        c.insert(&rt, 5);
+        c.insert(&rt, 6);
+        let _ = rt.get(groups.version_node);
+        assert_eq!(groups.group_count(), 3);
+        let mut ks = groups.keys();
+        ks.sort();
+        assert_eq!(ks, vec![0, 1, 2]);
+        let g0 = groups.get_group(&0).expect("group 0 missing");
+        let g1 = groups.get_group(&1).expect("group 1 missing");
+        let g2 = groups.get_group(&2).expect("group 2 missing");
+        assert_eq!(g0.snapshot_len(), 2); // 3, 6
+        assert_eq!(g1.snapshot_len(), 2); // 1, 4
+        assert_eq!(g2.snapshot_len(), 2); // 2, 5
+    }
+
+    #[test]
+    fn shared_group_by_per_group_count() {
+        let rt: Runtime<Shared> = Runtime::new();
+        let c = rt.create_collection::<i64>();
+        let groups = c.group_by(&rt, |x| if *x >= 0 { "pos" } else { "neg" });
+        c.insert(&rt, 1);
+        c.insert(&rt, -1);
+        c.insert(&rt, 2);
+        c.insert(&rt, -2);
+        c.insert(&rt, 3);
+        let _ = rt.get(groups.version_node);
+        let pos = groups.get_group(&"pos").expect("pos group missing");
+        let neg = groups.get_group(&"neg").expect("neg group missing");
+        let pos_count = pos.count(&rt);
+        let neg_count = neg.count(&rt);
+        assert_eq!(rt.get(pos_count), 3);
+        assert_eq!(rt.get(neg_count), 2);
+    }
+
+    #[test]
+    fn local_group_by_delete_removes_from_group() {
+        let rt: Runtime<Local> = Runtime::new();
+        let c = rt.create_collection::<i64>();
+        let groups = c.group_by(&rt, |x| x % 2);
+        c.insert(&rt, 2);
+        c.insert(&rt, 4);
+        c.insert(&rt, 6);
+        let _ = rt.get(groups.version_node);
+        let evens = groups.get_group(&0).expect("group 0 missing");
+        assert_eq!(evens.snapshot_len(), 3);
+        c.delete(&rt, &4);
+        let _ = rt.get(groups.version_node);
+        assert_eq!(evens.snapshot_len(), 2);
+    }
+
+    #[test]
+    fn local_join_simple() {
+        let rt: Runtime<Local> = Runtime::new();
+        let users = rt.create_collection::<(i64, String)>(); // (id, name)
+        let orders = rt.create_collection::<(i64, i64)>(); // (user_id, amount)
+        let joined = users.join(&rt, &orders, |u| u.0, |o| o.0);
+        users.insert(&rt, (1, "alice".to_string()));
+        users.insert(&rt, (2, "bob".to_string()));
+        orders.insert(&rt, (1, 100));
+        orders.insert(&rt, (1, 200));
+        orders.insert(&rt, (3, 50)); // no matching user
+        let n = joined.count(&rt);
+        // (alice, 100), (alice, 200) — 2 pairs
+        assert_eq!(rt.get(n), 2);
+    }
+
+    #[test]
+    fn shared_join_symmetric_order() {
+        let rt: Runtime<Shared> = Runtime::new();
+        let a = rt.create_collection::<(i32, &'static str)>();
+        let b = rt.create_collection::<(i32, i32)>();
+        let j = a.join(&rt, &b, |x| x.0, |y| y.0);
+        // Insert b first, then a; pairs should still emit.
+        b.insert(&rt, (1, 100));
+        a.insert(&rt, (1, "x"));
+        a.insert(&rt, (1, "y"));
+        b.insert(&rt, (1, 200));
+        let n = j.count(&rt);
+        // pairs: (x,100), (y,100), (x,200), (y,200) — 4
+        assert_eq!(rt.get(n), 4);
+    }
+
+    #[test]
+    fn local_join_delete_removes_pairs() {
+        let rt: Runtime<Local> = Runtime::new();
+        let a = rt.create_collection::<(i32, i32)>();
+        let b = rt.create_collection::<(i32, i32)>();
+        let j = a.join(&rt, &b, |x| x.0, |y| y.0);
+        a.insert(&rt, (1, 10));
+        b.insert(&rt, (1, 100));
+        b.insert(&rt, (1, 200));
+        let n = j.count(&rt);
+        assert_eq!(rt.get(n), 2);
+        b.delete(&rt, &(1, 100));
+        assert_eq!(rt.get(n), 1);
+    }
+}
diff --git a/crates/incr-core/src/dep_stack.rs b/crates/incr-core/src/dep_stack.rs
new file mode 100644
index 0000000..e3291e8
--- /dev/null
+++ b/crates/incr-core/src/dep_stack.rs
@@ -0,0 +1,177 @@
+//! `DepStack`: strategy-parameterized dependency tracking during compute.
+//!
+//! When a compute closure runs, every `rt.get(other)` call records `other`
+//! as a dependency of the currently-computing node. The recording happens
+//! through a per-thread (Shared) or per-runtime (Local) stack of frames:
+//! each frame holds the dep set for one nested compute. The stack handles
+//! nested computes that may happen during operator evaluation.
+//!
+//! Shared MUST use thread-local frames. Multiple reader threads can each
+//! drive a compute on different nodes simultaneously; if frames lived in
+//! a single shared lock, every `rt.get()` would contend on it, killing
+//! throughput. The thread-local design lets each thread maintain its
+//! own frame stack with no synchronization.
+//!
+//! Local uses a RefCell-backed stack because there's only one thread.
+//! RefCell's borrow counter is cheaper than a thread_local key lookup.
+//!
+//! The trade-off for Shared's thread_local: two `Runtime<Shared>`
+//! instances on the same thread share the same frame stack, so nesting
+//! computes across runtimes would mix them. This is the same constraint
+//! the production incr-concurrent already imposes. Tests use one
+//! runtime; production users should treat the runtime as a singleton
+//! per logical concern.
+
+use std::cell::RefCell;
+
+use crate::node::NodeId;
+
+/// Strategy-parameterized dep tracker used by the runtime during compute.
+pub trait DepStack: 'static {
+    fn new() -> Self;
+
+    /// Push a fresh frame at the top of the stack. Called when entering
+    /// a compute closure.
+    fn push_frame(&self);
+
+    /// Pop the top frame and return its recorded deps. Called when
+    /// exiting a compute closure.
+    fn pop_frame(&self) -> Vec<NodeId>;
+
+    /// Record `dep` as a dependency of the currently-computing node. Called
+    /// by `rt.get()` whenever a frame is active. No-op when no frame is
+    /// active (e.g., a top-level `get` from user code).
+    fn record_dep(&self, dep: NodeId);
+
+    /// True iff at least one frame is active (i.e., we're inside a
+    /// compute closure).
+    fn current_frame_active(&self) -> bool;
+}
+
+/// Local strategy: RefCell-backed stack on the runtime.
+pub struct LocalDepStack {
+    stack: RefCell<Vec<Vec<NodeId>>>,
+}
+
+impl DepStack for LocalDepStack {
+    fn new() -> Self {
+        Self {
+            stack: RefCell::new(Vec::new()),
+        }
+    }
+
+    fn push_frame(&self) {
+        self.stack.borrow_mut().push(Vec::with_capacity(4));
+    }
+
+    fn pop_frame(&self) -> Vec<NodeId> {
+        self.stack
+            .borrow_mut()
+            .pop()
+            .expect("LocalDepStack::pop_frame on empty stack")
+    }
+
+    fn record_dep(&self, dep: NodeId) {
+        let mut frames = self.stack.borrow_mut();
+        if let Some(frame) = frames.last_mut() {
+            frame.push(dep);
+        }
+    }
+
+    fn current_frame_active(&self) -> bool {
+        !self.stack.borrow().is_empty()
+    }
+}
+
+/// Shared strategy: thread-local stack. The `SharedDepStack` value
+/// itself carries no state; it only routes calls to the thread_local.
+///
+/// Limitation: two `Runtime<Shared>` instances on the same thread share
+/// the same stack. Don't nest one runtime's compute inside another's
+/// on the same thread.
+pub struct SharedDepStack;
+
+thread_local! {
+    static SHARED_FRAMES: RefCell<Vec<Vec<NodeId>>> = const { RefCell::new(Vec::new()) };
+}
+
+impl DepStack for SharedDepStack {
+    fn new() -> Self {
+        Self
+    }
+
+    fn push_frame(&self) {
+        SHARED_FRAMES.with(|f| f.borrow_mut().push(Vec::with_capacity(4)));
+    }
+
+    fn pop_frame(&self) -> Vec<NodeId> {
+        SHARED_FRAMES.with(|f| {
+            f.borrow_mut()
+                .pop()
+                .expect("SharedDepStack::pop_frame on empty stack")
+        })
+    }
+
+    fn record_dep(&self, dep: NodeId) {
+        SHARED_FRAMES.with(|f| {
+            let mut frames = f.borrow_mut();
+            if let Some(frame) = frames.last_mut() {
+                frame.push(dep);
+            }
+        });
+    }
+
+    fn current_frame_active(&self) -> bool {
+        SHARED_FRAMES.with(|f| !f.borrow().is_empty())
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn local_push_pop_records() {
+        let s = LocalDepStack::new();
+        assert!(!s.current_frame_active());
+        s.push_frame();
+        assert!(s.current_frame_active());
+        s.record_dep(NodeId(0));
+        s.record_dep(NodeId(1));
+        s.record_dep(NodeId(2));
+        let frame = s.pop_frame();
+        assert_eq!(frame, vec![NodeId(0), NodeId(1), NodeId(2)]);
+        assert!(!s.current_frame_active());
+    }
+
+    #[test]
+    fn local_nested_frames_are_independent() {
+        let s = LocalDepStack::new();
+        s.push_frame();
+        s.record_dep(NodeId(1));
+        s.push_frame();
+        s.record_dep(NodeId(2));
+        s.record_dep(NodeId(3));
+        let inner = s.pop_frame();
+        assert_eq!(inner, vec![NodeId(2), NodeId(3)]);
+        let outer = s.pop_frame();
+        assert_eq!(outer, vec![NodeId(1)]);
+    }
+
+    #[test]
+    fn local_record_outside_frame_is_noop() {
+        let s = LocalDepStack::new();
+        s.record_dep(NodeId(42));
+        assert!(!s.current_frame_active());
+    }
+
+    #[test]
+    fn shared_push_pop_records() {
+        let s = SharedDepStack::new();
+        s.push_frame();
+        s.record_dep(NodeId(7));
+        s.record_dep(NodeId(11));
+        let frame = s.pop_frame();
+        assert_eq!(frame, vec![NodeId(7), NodeId(11)]);
+    }
+}
diff --git a/crates/incr-core/src/generic_arena.rs b/crates/incr-core/src/generic_arena.rs
new file mode 100644
index 0000000..c0881ab
--- /dev/null
+++ b/crates/incr-core/src/generic_arena.rs
@@ -0,0 +1,182 @@
+//! `GenericArena<T, C>`: typed value storage parameterized over both the
+//! value type and the [`Cells`] strategy.
+//!
+//! Slot layout: `UnsafeCell<Option<T>>`. The `Option` allows two states:
+//! - `None`: slot reserved but never written (e.g., a query node whose
+//!   compute hasn't run yet).
+//! - `Some(value)`: slot holds the current value.
+//!
+//! Exclusive access to a slot is gated by the node state machine, NOT by
+//! Rust's borrow checker: the slot's `Computing` state is held by
+//! exactly one thread (CAS-claimed on Shared, single-threaded on Local).
+//! Readers reach a slot only when the corresponding node is `Clean`, so
+//! they observe the writer's data through the Acquire load on state.
+//!
+//! Reads clone `T` rather than returning a reference because the runtime
+//! may need to drop the slot (or recompute through it) after the read
+//! returns; tying a reference's lifetime to the read call would prevent
+//! that. Clone cost is part of the user's `T` impl.
+//!
+//! The segmented production primitive arenas (`AtomicPrimitiveArena<T>`
+//! for u64/f64/etc.) are deferred. Primitives go through the generic
+//! arena for now; the specialization that gives 5-10 ns per-get on
+//! primitives lands in a follow-up commit once the rest of the engine
+//! is in place.
+
+use crate::cells::Cells;
+use crate::value::Value;
+use std::cell::UnsafeCell;
+use std::marker::PhantomData;
+use std::sync::RwLock;
+
+/// Typed arena for `T` values, parameterized over the strategy.
+///
+/// Under `Shared`, the slots vector is behind an `RwLock` (the runtime's
+/// write-side lock guards all arena growth). Under `Local`, the same
+/// RwLock is morally a `RefCell`; we use `RwLock` uniformly for the
+/// first cut to avoid duplicating arena code per strategy. The cost on
+/// Local is one uncontended lock acquire per arena op, which is
+/// significant on the hot path. The follow-up commit replaces this with
+/// a `C`-parameterized inner-lock primitive (`Cells::RwLock<Vec<...>>`)
+/// to remove the cost on Local.
+pub struct GenericArena<T: Value, C: Cells> {
+    slots: RwLock<Vec<Box<UnsafeCell<Option<T>>>>>,
+    _phantom: PhantomData<C>,
+}
+
+impl<T: Value, C: Cells> Default for GenericArena<T, C> {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+impl<T: Value, C: Cells> GenericArena<T, C> {
+    pub fn new() -> Self {
+        Self {
+            slots: RwLock::new(Vec::new()),
+            _phantom: PhantomData,
+        }
+    }
+
+    /// Append a new slot initialized to `Some(initial)`. Caller holds
+    /// the runtime's write lock.
+    pub fn reserve_with(&self, initial: T) -> u32 {
+        let mut slots = self.slots.write().expect("arena slots lock poisoned");
+        let id = slots.len() as u32;
+        slots.push(Box::new(UnsafeCell::new(Some(initial))));
+        id
+    }
+
+    /// Append an uninitialized slot (`None`). Used by query nodes whose
+    /// compute will populate the slot on first run.
+    pub fn reserve(&self) -> u32 {
+        let mut slots = self.slots.write().expect("arena slots lock poisoned");
+        let id = slots.len() as u32;
+        slots.push(Box::new(UnsafeCell::new(None)));
+        id
+    }
+
+    /// Read the value at `slot`. Panics if the slot is `None` (caller
+    /// should use [`try_read`](Self::try_read) if they need to handle
+    /// uninitialized slots).
+    pub fn read(&self, slot: u32) -> T {
+        let slots = self.slots.read().expect("arena slots lock poisoned");
+        let cell = &slots[slot as usize];
+        // SAFETY: exclusive access to this slot is governed by the
+        // node state machine: a reader only reaches here when the
+        // node is Clean (Acquire-synchronized with the writer's
+        // Release store on state). No mutable alias is in flight.
+        unsafe {
+            (*cell.get())
+                .as_ref()
+                .expect("GenericArena::read on uninitialized slot")
+                .clone()
+        }
+    }
+
+    pub fn try_read(&self, slot: u32) -> Option<T> {
+        let slots = self.slots.read().expect("arena slots lock poisoned");
+        let cell = &slots[slot as usize];
+        unsafe { (*cell.get()).as_ref().cloned() }
+    }
+
+    /// Overwrite the value at `slot`. Caller must own exclusive access
+    /// via the Computing state.
+    pub fn write(&self, slot: u32, value: T) {
+        let slots = self.slots.read().expect("arena slots lock poisoned");
+        let cell = &slots[slot as usize];
+        unsafe {
+            *cell.get() = Some(value);
+        }
+    }
+
+    pub fn len(&self) -> usize {
+        self.slots.read().expect("arena slots lock poisoned").len()
+    }
+
+    pub fn is_empty(&self) -> bool {
+        self.len() == 0
+    }
+}
+
+// SAFETY: `T: Send + Sync` (from Value bound), `Box<UnsafeCell<Option<T>>>`
+// is Send when `T: Send`. Sync is the question: UnsafeCell is !Sync, but
+// access to the cell is governed by the runtime's state machine (which
+// provides exclusive access via the Computing CAS) and the RwLock around
+// the vector (which prevents concurrent push during reads). The
+// combination is sound when used as documented; we assert Send + Sync
+// manually because UnsafeCell blocks the auto-derive.
+unsafe impl<T: Value, C: Cells> Send for GenericArena<T, C> {}
+unsafe impl<T: Value, C: Cells> Sync for GenericArena<T, C> {}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::cells::{Local, Shared};
+
+    #[test]
+    fn local_roundtrip_string() {
+        let a: GenericArena<String, Local> = GenericArena::new();
+        let s = a.reserve_with("hello".to_string());
+        assert_eq!(a.read(s), "hello");
+        a.write(s, "world".to_string());
+        assert_eq!(a.read(s), "world");
+    }
+
+    #[test]
+    fn shared_roundtrip_string() {
+        let a: GenericArena<String, Shared> = GenericArena::new();
+        let s = a.reserve_with("hello".to_string());
+        assert_eq!(a.read(s), "hello");
+        a.write(s, "world".to_string());
+        assert_eq!(a.read(s), "world");
+    }
+
+    #[test]
+    fn local_uninitialized_try_read_is_none() {
+        let a: GenericArena<u64, Local> = GenericArena::new();
+        let s = a.reserve();
+        assert_eq!(a.try_read(s), None);
+        a.write(s, 42);
+        assert_eq!(a.try_read(s), Some(42));
+        assert_eq!(a.read(s), 42);
+    }
+
+    #[test]
+    fn shared_uninitialized_try_read_is_none() {
+        let a: GenericArena<u64, Shared> = GenericArena::new();
+        let s = a.reserve();
+        assert_eq!(a.try_read(s), None);
+        a.write(s, 42);
+        assert_eq!(a.try_read(s), Some(42));
+        assert_eq!(a.read(s), 42);
+    }
+
+    #[test]
+    fn shared_arena_is_send_sync() {
+        fn assert_send_sync<T: Send + Sync>() {}
+        assert_send_sync::<GenericArena<u64, Shared>>();
+        assert_send_sync::<GenericArena<String, Shared>>();
+        assert_send_sync::<GenericArena<Vec<u8>, Shared>>();
+    }
+}
diff --git a/crates/incr-core/src/handle.rs b/crates/incr-core/src/handle.rs
new file mode 100644
index 0000000..e6eda36
--- /dev/null
+++ b/crates/incr-core/src/handle.rs
@@ -0,0 +1,153 @@
+//! Public handle type `Incr<T>` and runtime identity.
+//!
+//! `Incr<T>` is a 16-byte `Copy` token returned by `Runtime::create_input`
+//! and `Runtime::create_query`. It carries:
+//!
+//! - `slot: u32` — index into the runtime's segmented node store.
+//! - `generation: u32` — the slot's generation counter, for detecting
+//!   use-after-recycle (reserved; recycling lands with `delete_node` in a
+//!   follow-up).
+//! - `runtime_id: RuntimeId` (u64) — uniquely identifies the owning
+//!   `Runtime` for the process lifetime. Used to reject handles from
+//!   foreign runtimes with a clear error.
+//! - `_phantom: PhantomData<fn() -> T>` — locks `T` at the type level
+//!   without inheriting `T`'s auto traits. `Incr<T>` is always
+//!   `Send + Sync + Copy + Unpin` regardless of `T`.
+//!
+//! Total: 16 bytes, 8-byte aligned. Asserted by tests.
+
+use std::marker::PhantomData;
+use std::sync::atomic::{AtomicU64, Ordering};
+
+/// Unique identifier for a `Runtime` instance. Drawn from a process-wide
+/// monotonic counter; never reused within a process lifetime.
+#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
+#[repr(transparent)]
+pub struct RuntimeId(u64);
+
+impl RuntimeId {
+    #[allow(dead_code)]
+    pub(crate) const SENTINEL: RuntimeId = RuntimeId(0);
+
+    /// Allocate a fresh runtime id. Called once per `Runtime::new`.
+    pub(crate) fn allocate() -> Self {
+        static COUNTER: AtomicU64 = AtomicU64::new(1);
+        Self(COUNTER.fetch_add(1, Ordering::Relaxed))
+    }
+
+    #[inline]
+    pub fn get(self) -> u64 {
+        self.0
+    }
+}
+
+/// Typed handle to a node in a `Runtime<C>`.
+#[repr(C)]
+pub struct Incr<T: 'static> {
+    slot: u32,
+    generation: u32,
+    runtime_id: RuntimeId,
+    _phantom: PhantomData<fn() -> T>,
+}
+
+impl<T: 'static> Copy for Incr<T> {}
+impl<T: 'static> Clone for Incr<T> {
+    fn clone(&self) -> Self {
+        *self
+    }
+}
+
+impl<T: 'static> std::fmt::Debug for Incr<T> {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        f.debug_struct("Incr")
+            .field("slot", &self.slot)
+            .field("generation", &self.generation)
+            .field("runtime_id", &self.runtime_id)
+            .field("type", &std::any::type_name::<T>())
+            .finish()
+    }
+}
+
+impl<T: 'static> PartialEq for Incr<T> {
+    fn eq(&self, other: &Self) -> bool {
+        self.slot == other.slot
+            && self.generation == other.generation
+            && self.runtime_id == other.runtime_id
+    }
+}
+
+impl<T: 'static> Eq for Incr<T> {}
+
+impl<T: 'static> std::hash::Hash for Incr<T> {
+    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
+        self.slot.hash(state);
+        self.generation.hash(state);
+        self.runtime_id.hash(state);
+    }
+}
+
+impl<T: 'static> Incr<T> {
+    pub(crate) fn new(slot: u32, generation: u32, runtime_id: RuntimeId) -> Self {
+        Self {
+            slot,
+            generation,
+            runtime_id,
+            _phantom: PhantomData,
+        }
+    }
+
+    #[inline]
+    pub fn slot(self) -> u32 {
+        self.slot
+    }
+
+    #[inline]
+    #[allow(dead_code)]
+    pub(crate) fn generation(self) -> u32 {
+        self.generation
+    }
+
+    #[inline]
+    pub(crate) fn runtime_id(self) -> RuntimeId {
+        self.runtime_id
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn incr_is_16_bytes_8_aligned() {
+        assert_eq!(std::mem::size_of::<Incr<u64>>(), 16);
+        assert_eq!(std::mem::align_of::<Incr<u64>>(), 8);
+        assert_eq!(std::mem::size_of::<Incr<String>>(), 16);
+        assert_eq!(std::mem::size_of::<Incr<Vec<u8>>>(), 16);
+    }
+
+    #[test]
+    fn incr_is_send_sync_regardless_of_t() {
+        fn assert_send_sync<T: Send + Sync>() {}
+        fn assert_copy<T: Copy>() {}
+        assert_send_sync::<Incr<u64>>();
+        assert_copy::<Incr<u64>>();
+        assert_send_sync::<Incr<String>>();
+        assert_send_sync::<Incr<std::cell::RefCell<u64>>>();
+        assert_send_sync::<Incr<std::rc::Rc<u64>>>();
+    }
+
+    #[test]
+    fn runtime_id_sentinel_is_zero() {
+        assert_eq!(RuntimeId::SENTINEL.get(), 0);
+        let real = RuntimeId::allocate();
+        assert_ne!(real, RuntimeId::SENTINEL);
+        assert!(real.get() >= 1);
+    }
+
+    #[test]
+    fn runtime_ids_are_unique() {
+        let a = RuntimeId::allocate();
+        let b = RuntimeId::allocate();
+        assert_ne!(a, b);
+    }
+}
diff --git a/crates/incr-core/src/lib.rs b/crates/incr-core/src/lib.rs
new file mode 100644
index 0000000..c1964e6
--- /dev/null
+++ b/crates/incr-core/src/lib.rs
@@ -0,0 +1,50 @@
+//! `incr-core`: the shared engine behind `incr-compute` and `incr-concurrent`.
+//!
+//! Both surface crates re-export the same `Runtime` parameterized over a
+//! [`Cells`] strategy:
+//! - `incr-compute` uses [`Local`], which backs every cell with
+//!   `std::cell::Cell`. The single-threaded variant is `!Send + !Sync` and
+//!   pays no atomic-fence cost.
+//! - `incr-concurrent` uses [`Shared`], which backs every cell with
+//!   `std::sync::atomic::Atomic*` types and explicit Acquire/Release
+//!   ordering. The concurrent variant is `Send + Sync` and supports a
+//!   writer thread plus arbitrary reader threads on the same graph.
+//!
+//! The validation that this parameterization carries zero overhead on the
+//! single-threaded path lives in the spike crate's RESULTS.md (preserved on
+//! the `spike/incr-core-monomorphization` branch). Short version: under
+//! `Local`, every trait method inlines to the same code a direct
+//! `Cell::get()` would emit; under `Shared`, every Acquire load compiles
+//! to a plain `mov` on x86 with no `lock` prefixes or fences.
+
+pub mod arena;
+pub mod arena_registry;
+pub mod cells;
+pub mod collection;
+pub mod dep_stack;
+pub mod generic_arena;
+pub mod handle;
+pub mod locks;
+pub mod node;
+pub mod runtime;
+pub mod segmented_nodes;
+pub mod sorted_collection;
+pub mod state;
+pub mod trace;
+pub mod value;
+
+pub use arena::PrimitiveArena;
+pub use arena_registry::{ArenaRegistry, ErasedArena};
+pub use cells::{Cells, Local, LocalPtrCell, PtrCell, Shared};
+pub use collection::{CollectionLog, Delta, GroupedCollection, IncrCollection};
+pub use dep_stack::{DepStack, LocalDepStack, SharedDepStack};
+pub use generic_arena::GenericArena;
+pub use handle::{Incr, RuntimeId};
+pub use locks::{LocalLock, Lock};
+pub use node::{NodeData, NodeId};
+pub use runtime::Runtime;
+pub use segmented_nodes::{SegmentedNodes, MAX_NODES};
+pub use sorted_collection::{SortDelta, SortedCollection};
+pub use state::NodeState;
+pub use trace::{NodeInfo, NodeKindInfo, NodeTrace, PropagationTrace, TraceAction};
+pub use value::Value;
diff --git a/crates/incr-core/src/locks.rs b/crates/incr-core/src/locks.rs
new file mode 100644
index 0000000..e5b13c8
--- /dev/null
+++ b/crates/incr-core/src/locks.rs
@@ -0,0 +1,112 @@
+//! `Lock<T>`: strategy-parameterized mutex-like primitive used for the
+//! runtime's inner-state fields.
+//!
+//! Local backs the lock with `RefCell` (single-threaded, no synchronization
+//! cost beyond a borrow-counter check). Shared backs it with `std::sync::RwLock`
+//! (read-write lock with reader parallelism). The trait abstracts the
+//! guard types via GATs so callers can write `let g = lock.read(); ... &*g ...`
+//! identically across both strategies.
+//!
+//! Poisoning: under Shared, if a thread panics while holding the write
+//! guard, the underlying RwLock becomes poisoned. We treat poisoning as a
+//! fatal runtime invariant violation and `.expect()` it. The user-facing
+//! API surfaces are panic-only after such a failure; no recovery path.
+
+use std::cell::{Ref, RefCell, RefMut};
+use std::ops::{Deref, DerefMut};
+use std::sync::{RwLock, RwLockReadGuard, RwLockWriteGuard};
+
+/// Mutex-like primitive abstracting RefCell (Local) and RwLock (Shared).
+pub trait Lock<T: 'static>: 'static {
+    type ReadGuard<'a>: Deref<Target = T>
+    where
+        Self: 'a;
+    type WriteGuard<'a>: DerefMut<Target = T>
+    where
+        Self: 'a;
+
+    fn new(val: T) -> Self;
+    fn read(&self) -> Self::ReadGuard<'_>;
+    fn write(&self) -> Self::WriteGuard<'_>;
+}
+
+/// Local strategy: `RefCell` wrapped in a newtype so we can implement the
+/// `Lock` trait on it without conflicting with foreign-trait rules.
+pub struct LocalLock<T>(RefCell<T>);
+
+impl<T: 'static> Lock<T> for LocalLock<T> {
+    type ReadGuard<'a>
+        = Ref<'a, T>
+    where
+        T: 'a;
+    type WriteGuard<'a>
+        = RefMut<'a, T>
+    where
+        T: 'a;
+
+    #[inline(always)]
+    fn new(val: T) -> Self {
+        Self(RefCell::new(val))
+    }
+
+    #[inline(always)]
+    fn read(&self) -> Self::ReadGuard<'_> {
+        self.0.borrow()
+    }
+
+    #[inline(always)]
+    fn write(&self) -> Self::WriteGuard<'_> {
+        self.0.borrow_mut()
+    }
+}
+
+/// Shared strategy: `std::sync::RwLock`. The trait blanket impl below
+/// uses the lock as-is; no newtype wrapper is needed because the foreign
+/// impl is for `RwLock` directly (a foreign trait on a foreign type is
+/// disallowed, but our local `Lock` trait on `RwLock` is fine).
+impl<T: 'static> Lock<T> for RwLock<T> {
+    type ReadGuard<'a>
+        = RwLockReadGuard<'a, T>
+    where
+        T: 'a;
+    type WriteGuard<'a>
+        = RwLockWriteGuard<'a, T>
+    where
+        T: 'a;
+
+    #[inline(always)]
+    fn new(val: T) -> Self {
+        RwLock::new(val)
+    }
+
+    #[inline(always)]
+    fn read(&self) -> Self::ReadGuard<'_> {
+        RwLock::read(self).expect("incr-core inner lock poisoned (Shared)")
+    }
+
+    #[inline(always)]
+    fn write(&self) -> Self::WriteGuard<'_> {
+        RwLock::write(self).expect("incr-core inner lock poisoned (Shared)")
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn local_lock_read_write() {
+        let l: LocalLock<u64> = LocalLock::new(7);
+        assert_eq!(*l.read(), 7);
+        *l.write() = 11;
+        assert_eq!(*l.read(), 11);
+    }
+
+    #[test]
+    fn shared_lock_read_write() {
+        let l: RwLock<u64> = <RwLock<u64> as Lock<u64>>::new(7);
+        assert_eq!(*Lock::read(&l), 7);
+        *Lock::write(&l) = 11;
+        assert_eq!(*Lock::read(&l), 11);
+    }
+}
diff --git a/crates/incr-core/src/node.rs b/crates/incr-core/src/node.rs
new file mode 100644
index 0000000..3a6e16a
--- /dev/null
+++ b/crates/incr-core/src/node.rs
@@ -0,0 +1,359 @@
+//! `NodeData<C: Cells>`: the per-node read-hot struct, parameterized over
+//! the [`Cells`] strategy. Production `incr-concurrent` uses a 64-byte
+//! cache-line-aligned layout to keep reader traversal at one cache line
+//! per node; this design carries forward unchanged into `incr-core`.
+//!
+//! The const-time size and alignment assertions are load-bearing under
+//! both strategies. The spike validated that field-by-field the `Local`
+//! cells (Cell-backed) and `Shared` cells (atomic-backed) produce the
+//! same layout, so the same 64-byte total holds.
+//!
+//! ## Layout (both strategies)
+//!
+//! ```text
+//! offset  size   field
+//! ------  ----   -----
+//!    0     8     verified_at   Cells::U64
+//!    8     8     changed_at    Cells::U64
+//!   16     8     overflow_deps Cells::U64   (raw pointer stored as u64 for the spike;
+//!                                            full DepList-pointer machinery lands in
+//!                                            the next step of the consolidation)
+//!   24    28     inline_deps   [Cells::U32; 7]
+//!   52     4     arena_slot    u32
+//!   56     2     type_tag      u16
+//!   58     1     state         Cells::State
+//!   59     1     dep_count     Cells::U8
+//!   60     4     generation    Cells::U32
+//! ```
+//!
+//! Total: 64 bytes, 64-byte aligned. Asserted at compile time below.
+//!
+//! ## What lands later
+//!
+//! The first incr-core slice covers the layout and the basic accessors.
+//! The next slice ports the inline-7 + heap-overflow dep storage with
+//! proper Drop reclamation (replacing the leaky `replace_deps_leaking_old_overflow`
+//! from production with a hazard-pointer reclaimed path). The slice
+//! after that lifts the segmented node store. Tracking in the
+//! consolidation plan.
+
+use crate::cells::Cells;
+use crate::state::NodeState;
+use haphazard::{AtomicPtr as HzAtomicPtr, HazardPointer};
+
+#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
+pub struct NodeId(pub u32);
+
+impl NodeId {
+    pub const SENTINEL: NodeId = NodeId(u32::MAX);
+}
+
+/// Heap-allocated overflow dependency list. Used when a node has more
+/// than seven dependencies.
+///
+/// Reclamation policy: when a node's dep set changes and the new list
+/// requires re-allocation, the OLD overflow list is retired through
+/// the `haphazard` global domain. Hazard-pointer protection in
+/// [`NodeData::for_each_dep`] guarantees concurrent readers can finish
+/// their traversal before the retired list is freed. Memory is
+/// reclaimed during normal operation (not just at runtime drop), so
+/// long-lived runtimes with churning dynamic deps no longer accumulate
+/// retired lists.
+pub struct DepList {
+    pub(crate) deps: Box<[NodeId]>,
+}
+
+#[repr(C, align(64))]
+pub struct NodeData<C: Cells> {
+    pub(crate) verified_at: C::U64,
+    pub(crate) changed_at: C::U64,
+    pub(crate) overflow_deps: HzAtomicPtr<DepList>,
+    pub(crate) inline_deps: [C::U32; 7],
+    pub(crate) arena_slot: u32,
+    pub(crate) type_tag: u16,
+    pub(crate) state: C::State,
+    pub(crate) dep_count: C::U8,
+    pub(crate) generation: C::U32,
+}
+
+impl<C: Cells> NodeData<C> {
+    /// Construct a new input node. Input nodes start `Clean` because their
+    /// value is provided at creation. `revision` seeds both `verified_at`
+    /// and `changed_at`.
+    pub fn new_input(type_tag: u16, arena_slot: u32, revision: u64) -> Self {
+        Self {
+            verified_at: C::new_u64(revision),
+            changed_at: C::new_u64(revision),
+            overflow_deps: unsafe { HzAtomicPtr::new(std::ptr::null_mut()) },
+            inline_deps: Self::empty_inline_deps(),
+            arena_slot,
+            type_tag,
+            state: C::new_state(NodeState::Clean.as_u8()),
+            dep_count: C::new_u8(0),
+            generation: C::new_u32(0),
+        }
+    }
+
+    /// Construct a new query node. Query nodes start `New` because their
+    /// value has not been computed; the first reader CASes to `Computing`
+    /// and runs the compute closure.
+    pub fn new_query(type_tag: u16, arena_slot: u32) -> Self {
+        Self {
+            verified_at: C::new_u64(0),
+            changed_at: C::new_u64(0),
+            overflow_deps: unsafe { HzAtomicPtr::new(std::ptr::null_mut()) },
+            inline_deps: Self::empty_inline_deps(),
+            arena_slot,
+            type_tag,
+            state: C::new_state(NodeState::New.as_u8()),
+            dep_count: C::new_u8(0),
+            generation: C::new_u32(0),
+        }
+    }
+
+    #[inline(always)]
+    pub fn arena_slot(&self) -> u32 {
+        self.arena_slot
+    }
+
+    #[inline(always)]
+    pub fn type_tag(&self) -> u16 {
+        self.type_tag
+    }
+
+    #[inline(always)]
+    pub fn state(&self) -> NodeState {
+        NodeState::from_u8(C::state_load_acquire(&self.state))
+    }
+
+    #[inline(always)]
+    pub fn state_cell(&self) -> &C::State {
+        &self.state
+    }
+
+    #[inline(always)]
+    pub fn verified_at(&self) -> u64 {
+        C::u64_load_acquire(&self.verified_at)
+    }
+
+    #[inline(always)]
+    pub fn changed_at(&self) -> u64 {
+        C::u64_load_acquire(&self.changed_at)
+    }
+
+    #[inline(always)]
+    pub fn set_verified_at(&self, v: u64) {
+        C::u64_store_release(&self.verified_at, v);
+    }
+
+    #[inline(always)]
+    pub fn set_changed_at(&self, v: u64) {
+        C::u64_store_release(&self.changed_at, v);
+    }
+
+    #[inline(always)]
+    pub fn dep_count(&self) -> u8 {
+        C::u8_load_relaxed(&self.dep_count)
+    }
+
+    #[inline(always)]
+    pub fn generation(&self) -> u32 {
+        C::u32_load_relaxed(&self.generation)
+    }
+
+    #[inline(always)]
+    pub fn set_state(&self, v: u8) {
+        C::state_store_release(&self.state, v);
+    }
+
+    /// Install a new dep list. Inline-7 path stores into the inline
+    /// array; overflow path heap-allocates a `DepList` and Release-stores
+    /// the pointer.
+    ///
+    /// Reclamation: any displaced overflow pointer is retired through
+    /// the `haphazard` global domain. Concurrent readers in
+    /// [`Self::for_each_dep`] hold a `HazardPointer` while
+    /// dereferencing the slot, so the retired list is not freed until
+    /// every protecting reader has finished. Free-during-runtime; no
+    /// graveyard build-up; no leak past process scope.
+    pub(crate) fn install_deps(&self, new_deps: &[NodeId]) {
+        let count = new_deps.len();
+        assert!(count <= u8::MAX as usize, "dep count exceeds u8");
+        if count <= 7 {
+            for (i, dep) in new_deps.iter().enumerate() {
+                C::u32_store_relaxed(&self.inline_deps[i], dep.0);
+            }
+            // Going inline: swap null in to displace any stale overflow
+            // pointer, then retire it. for_each_dep's count<=7 path
+            // never reads overflow_deps, but a subsequent overflow
+            // install would clobber the stale pointer without retiring
+            // it, leaking memory. Retiring here closes that gap.
+            // SAFETY: swap_ptr with a null target is safe.
+            let displaced = unsafe { self.overflow_deps.swap_ptr(std::ptr::null_mut()) };
+            if let Some(old) = displaced {
+                // SAFETY: `old` came from a Box::into_raw via a previous
+                // install_deps; it is not aliased for writes (the state
+                // machine guarantees single-writer-at-a-time on this
+                // node). Hazard pointers ensure the actual free is
+                // deferred until no reader still references this list.
+                unsafe { old.retire() };
+            }
+            C::u8_store_release(&self.dep_count, count as u8);
+        } else {
+            let list = Box::new(DepList {
+                deps: new_deps.to_vec().into_boxed_slice(),
+            });
+            // `swap` takes ownership of the box; the old box (if any)
+            // is wrapped in a `Replaced` whose `retire` defers free
+            // through the global hazard-pointer domain.
+            let replaced = self.overflow_deps.swap(list);
+            C::u8_store_release(&self.dep_count, count as u8);
+            if let Some(old) = replaced {
+                // SAFETY: same as the inline-path retire above.
+                unsafe { old.retire() };
+            }
+        }
+    }
+
+    /// Iterate over the node's recorded dependencies. The caller must
+    /// have observed the node's state via an Acquire load (e.g., through
+    /// the state machine) to synchronize with the writer of these deps.
+    ///
+    /// Up to 7 deps live inline; beyond that, `overflow_deps` points at
+    /// a heap-allocated `DepList` whose load is protected by a
+    /// `HazardPointer`. A concurrent retire by an `install_deps`
+    /// writer will defer the actual free until this reader's hazard
+    /// is released.
+    ///
+    /// The dispatch is intentionally split: the inline fast path is
+    /// `#[inline]`-friendly and stays small enough to be inlined into
+    /// the caller. The cold overflow path is `#[inline(never)]` so
+    /// `HazardPointer::new` (thread_local lookup + potential allocation)
+    /// is not duplicated into every call site.
+    #[inline]
+    pub fn for_each_dep(&self, mut f: impl FnMut(NodeId)) {
+        let count = C::u8_load_relaxed(&self.dep_count);
+        if count <= 7 {
+            for i in 0..(count as usize) {
+                let raw = C::u32_load_relaxed(&self.inline_deps[i]);
+                f(NodeId(raw));
+            }
+        } else {
+            self.for_each_overflow_dep(&mut f);
+        }
+    }
+
+    #[inline(never)]
+    fn for_each_overflow_dep(&self, f: &mut dyn FnMut(NodeId)) {
+        let mut hazard = HazardPointer::new();
+        // SAFETY: the AtomicPtr is populated by install_deps with
+        // Box-allocated DepLists; retirements go through the global
+        // haphazard domain so safe_load returns a reference that
+        // remains valid for the lifetime of `hazard`.
+        let list_ref: Option<&DepList> = unsafe { self.overflow_deps.load(&mut hazard) };
+        let list = list_ref.expect("overflow_deps null with dep_count > 7");
+        for &id in list.deps.iter() {
+            f(id);
+        }
+    }
+
+    fn empty_inline_deps() -> [C::U32; 7] {
+        [
+            C::new_u32(NodeId::SENTINEL.0),
+            C::new_u32(NodeId::SENTINEL.0),
+            C::new_u32(NodeId::SENTINEL.0),
+            C::new_u32(NodeId::SENTINEL.0),
+            C::new_u32(NodeId::SENTINEL.0),
+            C::new_u32(NodeId::SENTINEL.0),
+            C::new_u32(NodeId::SENTINEL.0),
+        ]
+    }
+}
+
+impl<C: Cells> Drop for NodeData<C> {
+    fn drop(&mut self) {
+        // Swap null in and retire whatever was installed. The actual
+        // free is deferred through the haphazard global domain, which
+        // reclaims it the next time a domain pass detects no protecting
+        // hazard pointers. For the runtime-drop case all hazards are
+        // already gone, so reclamation is immediate.
+        // SAFETY: swap_ptr to null is safe; the displaced pointer (if
+        // any) came from install_deps's Box::into_raw and goes through
+        // haphazard's retire path.
+        let displaced = unsafe { self.overflow_deps.swap_ptr(std::ptr::null_mut()) };
+        if let Some(old) = displaced {
+            unsafe { old.retire() };
+        }
+    }
+}
+
+const _: () = assert!(
+    std::mem::size_of::<NodeData<crate::cells::Local>>() == 64,
+    "NodeData<Local> must be exactly one 64-byte cache line"
+);
+const _: () = assert!(
+    std::mem::align_of::<NodeData<crate::cells::Local>>() == 64,
+    "NodeData<Local> must be 64-byte aligned"
+);
+const _: () = assert!(
+    std::mem::size_of::<NodeData<crate::cells::Shared>>() == 64,
+    "NodeData<Shared> must be exactly one 64-byte cache line"
+);
+const _: () = assert!(
+    std::mem::align_of::<NodeData<crate::cells::Shared>>() == 64,
+    "NodeData<Shared> must be 64-byte aligned"
+);
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::cells::{Local, Shared};
+
+    #[test]
+    fn local_node_is_64_bytes() {
+        assert_eq!(std::mem::size_of::<NodeData<Local>>(), 64);
+        assert_eq!(std::mem::align_of::<NodeData<Local>>(), 64);
+    }
+
+    #[test]
+    fn shared_node_is_64_bytes() {
+        assert_eq!(std::mem::size_of::<NodeData<Shared>>(), 64);
+        assert_eq!(std::mem::align_of::<NodeData<Shared>>(), 64);
+    }
+
+    #[test]
+    fn local_input_roundtrip() {
+        let n: NodeData<Local> = NodeData::new_input(0, 42, 7);
+        assert_eq!(n.arena_slot(), 42);
+        assert_eq!(n.verified_at(), 7);
+        assert_eq!(n.changed_at(), 7);
+        assert_eq!(n.state(), NodeState::Clean);
+        n.set_verified_at(11);
+        assert_eq!(n.verified_at(), 11);
+    }
+
+    #[test]
+    fn shared_input_roundtrip() {
+        let n: NodeData<Shared> = NodeData::new_input(0, 42, 7);
+        assert_eq!(n.arena_slot(), 42);
+        assert_eq!(n.verified_at(), 7);
+        assert_eq!(n.changed_at(), 7);
+        assert_eq!(n.state(), NodeState::Clean);
+        n.set_verified_at(11);
+        assert_eq!(n.verified_at(), 11);
+    }
+
+    #[test]
+    fn local_query_starts_new() {
+        let n: NodeData<Local> = NodeData::new_query(0, 99);
+        assert_eq!(n.state(), NodeState::New);
+        assert_eq!(n.dep_count(), 0);
+    }
+
+    #[test]
+    fn shared_query_starts_new() {
+        let n: NodeData<Shared> = NodeData::new_query(0, 99);
+        assert_eq!(n.state(), NodeState::New);
+        assert_eq!(n.dep_count(), 0);
+    }
+}
diff --git a/crates/incr-core/src/runtime.rs b/crates/incr-core/src/runtime.rs
new file mode 100644
index 0000000..447afb0
--- /dev/null
+++ b/crates/incr-core/src/runtime.rs
@@ -0,0 +1,930 @@
+//! `Runtime<C>`: the strategy-parameterized incremental computation engine.
+//!
+//! Single struct, single `impl` block, monomorphized at compile time into
+//! the single-threaded variant (`Runtime<Local>`) and the concurrent
+//! variant (`Runtime<Shared>`). The user-facing crates `incr-compute` and
+//! `incr-concurrent` re-export the appropriate alias.
+//!
+//! This first slice ships the core algorithm:
+//! - `create_input<T>` / `create_query<T, F>`: node construction.
+//! - `get<T>(handle)` / `set<T>(handle, value)`: the user-facing API.
+//! - `ensure_clean`: iterative post-order walker that recomputes dirty
+//!   nodes in topological order.
+//! - `run_compute`: claim Computing, run the closure, observe new deps,
+//!   update edges, Release Clean. Includes red/green early cutoff.
+//! - `mark_dependents_dirty`: BFS dirty walk from a mutated input.
+//!
+//! Deferred to follow-ups: handle validation (runtime_id + generation
+//! checks), introspection (graph_snapshot, labels), real tracing,
+//! collection operators, soundness fixes (race-detection ordering with
+//! AcqRel, overflow-dep reclamation).
+
+use std::any::TypeId;
+use std::collections::HashMap;
+use std::sync::Arc;
+
+use crate::arena_registry::ArenaRegistry;
+use crate::cells::Cells;
+use crate::dep_stack::DepStack;
+use crate::generic_arena::GenericArena;
+use crate::handle::{Incr, RuntimeId};
+use crate::locks::Lock;
+use crate::node::{NodeData, NodeId};
+use crate::segmented_nodes::SegmentedNodes;
+use crate::state::{self, NodeState};
+use crate::value::Value;
+
+/// Compute closure: takes a borrow of the runtime, the node's slot, and
+/// whether this is a recompute (true) or first compute (false). Returns
+/// `true` if the value actually changed (for early-cutoff propagation),
+/// `false` if it was the same as before.
+type ComputeFn<C> = Arc<dyn Fn(&Runtime<C>, u32, bool) -> bool + Send + Sync + 'static>;
+
+/// Per-runtime mutable state guarded by the inner lock. Holds everything
+/// that's not on the per-node `NodeData` and not in an arena.
+pub(crate) struct Inner<C: Cells> {
+    pub(crate) compute_fns: Vec<Option<ComputeFn<C>>>,
+    pub(crate) dependents: Vec<Vec<NodeId>>,
+    pub(crate) arenas: ArenaRegistry<C>,
+    pub(crate) type_tags: HashMap<TypeId, u16>,
+    pub(crate) next_type_tag: u16,
+    pub(crate) labels: HashMap<u32, String>,
+    pub(crate) trace_log: Vec<crate::trace::NodeTrace>,
+}
+
+impl<C: Cells> Inner<C> {
+    fn new() -> Self {
+        Self {
+            compute_fns: Vec::new(),
+            dependents: Vec::new(),
+            arenas: ArenaRegistry::new(),
+            type_tags: HashMap::new(),
+            next_type_tag: 0,
+            labels: HashMap::new(),
+            trace_log: Vec::new(),
+        }
+    }
+
+    fn type_tag_for<T: Value>(&mut self) -> u16 {
+        let id = TypeId::of::<T>();
+        if let Some(&tag) = self.type_tags.get(&id) {
+            return tag;
+        }
+        let tag = self.next_type_tag;
+        self.next_type_tag = self
+            .next_type_tag
+            .checked_add(1)
+            .expect("incr-core: more than u16::MAX distinct value types in one runtime");
+        self.type_tags.insert(id, tag);
+        tag
+    }
+}
+
+/// The runtime.
+pub struct Runtime<C: Cells> {
+    pub(crate) nodes: SegmentedNodes<C>,
+    pub(crate) inner: <C as Cells>::Lock<Inner<C>>,
+    pub(crate) revision: <C as Cells>::U64,
+    pub(crate) dep_stack: <C as Cells>::DepStack,
+    pub(crate) runtime_id: RuntimeId,
+    /// `1` when `get_traced` is actively recording. Checked on every
+    /// `compute_one` via a Relaxed load (~1 ns when disarmed) so the
+    /// non-tracing hot path pays no measurable cost.
+    pub(crate) tracing_armed: <C as Cells>::U8,
+}
+
+impl<C: Cells> Default for Runtime<C> {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+impl<C: Cells> Runtime<C> {
+    pub fn new() -> Self {
+        Self {
+            nodes: SegmentedNodes::new(),
+            inner: <<C as Cells>::Lock<Inner<C>> as Lock<Inner<C>>>::new(Inner::new()),
+            revision: C::new_u64(1),
+            dep_stack: <C::DepStack as DepStack>::new(),
+            runtime_id: RuntimeId::allocate(),
+            tracing_armed: C::new_u8(0),
+        }
+    }
+
+    #[inline(always)]
+    fn tracing_is_armed(&self) -> bool {
+        C::u8_load_relaxed(&self.tracing_armed) == 1
+    }
+
+    fn record_trace(&self, id: NodeId, action: crate::trace::TraceAction) {
+        if self.tracing_is_armed() {
+            self.inner
+                .write()
+                .trace_log
+                .push(crate::trace::NodeTrace { id, action });
+        }
+    }
+
+    #[inline]
+    fn current_revision(&self) -> u64 {
+        C::u64_load_acquire(&self.revision)
+    }
+
+    fn bump_revision(&self) -> u64 {
+        let cur = C::u64_load_acquire(&self.revision);
+        let next = cur
+            .checked_add(1)
+            .expect("incr-core: revision counter overflow");
+        C::u64_store_release(&self.revision, next);
+        next
+    }
+
+    /// Create an input node with an initial value.
+    pub fn create_input<T: Value>(&self, value: T) -> Incr<T> {
+        assert!(
+            !self.dep_stack.current_frame_active(),
+            "create_input called during compute; not permitted",
+        );
+        self.create_input_unchecked(value)
+    }
+
+    /// Internal: create an input without the dep-stack-empty check.
+    /// Used by operators like `group_by` that need to allocate
+    /// sub-collection version nodes lazily from inside a compute closure.
+    /// The caller is responsible for ensuring the new node is not a dep
+    /// of the currently-computing node (i.e., the new node is downstream
+    /// of the operator, not upstream).
+    pub(crate) fn create_input_unchecked<T: Value>(&self, value: T) -> Incr<T> {
+        let revision = self.current_revision();
+        let (slot, type_tag, generation) = {
+            let mut inner = self.inner.write();
+            let type_tag = inner.type_tag_for::<T>();
+            let arena = inner.arenas.ensure_arena::<T>();
+            let arena_slot = arena.reserve_with(value);
+            let node = NodeData::<C>::new_input(type_tag, arena_slot, revision);
+            let slot = self.nodes.push(node);
+            inner.compute_fns.push(None);
+            inner.dependents.push(Vec::new());
+            let generation = self.nodes.get(slot).generation();
+            (slot, type_tag, generation)
+        };
+        let _ = type_tag;
+        Incr::new(slot, generation, self.runtime_id)
+    }
+
+    /// Create a compute (query) node. Dependencies are tracked
+    /// automatically: every `rt.get(other)` call inside `f` records
+    /// `other` as a dep.
+    pub fn create_query<T, F>(&self, f: F) -> Incr<T>
+    where
+        T: Value,
+        F: Fn(&Runtime<C>) -> T + Send + Sync + 'static,
+    {
+        assert!(
+            !self.dep_stack.current_frame_active(),
+            "create_query called during compute; not permitted",
+        );
+
+        let (slot, generation, _type_tag) = {
+            let mut inner = self.inner.write();
+            let type_tag = inner.type_tag_for::<T>();
+            let arena = inner.arenas.ensure_arena::<T>();
+            let arena_slot = arena.reserve();
+            let node = NodeData::<C>::new_query(type_tag, arena_slot);
+            let slot = self.nodes.push(node);
+
+            // Compute closure: invokes f, writes value, returns whether
+            // the value changed (for early cutoff). `is_recompute=false`
+            // means there's no prior value, so we always treat it as
+            // changed; `is_recompute=true` compares against the stored
+            // value via T's PartialEq.
+            let arena_inner = arena.clone();
+            let compute: ComputeFn<C> = Arc::new(
+                move |rt: &Runtime<C>, slot: u32, is_recompute: bool| -> bool {
+                    let new_value = f(rt);
+                    let node = rt.nodes.get(slot);
+                    if is_recompute {
+                        if let Some(old) = arena_inner.try_read(node.arena_slot()) {
+                            if old == new_value {
+                                return false; // early cutoff
+                            }
+                        }
+                    }
+                    arena_inner.write(node.arena_slot(), new_value);
+                    true
+                },
+            );
+
+            inner.compute_fns.push(Some(compute));
+            inner.dependents.push(Vec::new());
+            let generation = self.nodes.get(slot).generation();
+            (slot, generation, type_tag)
+        };
+        Incr::new(slot, generation, self.runtime_id)
+    }
+
+    /// Read the current value of a node. Triggers recomputation of the
+    /// minimum necessary subgraph if anything is dirty.
+    pub fn get<T: Value>(&self, handle: Incr<T>) -> T {
+        debug_assert_eq!(
+            handle.runtime_id(),
+            self.runtime_id,
+            "Incr<T> handle from a foreign runtime",
+        );
+        let slot = handle.slot();
+
+        // Record dep if we're inside a compute closure.
+        self.dep_stack.record_dep(NodeId(slot));
+
+        // Ensure clean, then read.
+        self.ensure_clean(NodeId(slot));
+
+        let arena = {
+            let inner = self.inner.read();
+            inner
+                .arenas
+                .try_arena::<T>()
+                .expect("incr-core: arena missing for handle's type; this should be impossible")
+        };
+        let node = self.nodes.get(slot);
+        arena.read(node.arena_slot())
+    }
+
+    /// Read the current value and return a propagation trace alongside.
+    /// Records per-node events (Recomputed { value_changed } or
+    /// VerifiedClean) for every compute or short-circuit that happens
+    /// during this `get`.
+    pub fn get_traced<T: Value>(&self, handle: Incr<T>) -> (T, crate::trace::PropagationTrace) {
+        use crate::trace::TraceAction;
+
+        // Arm tracing: clear any prior log, then flip the gate so
+        // compute_one starts appending events.
+        {
+            let mut inner = self.inner.write();
+            inner.trace_log.clear();
+        }
+        C::u8_store_release(&self.tracing_armed, 1);
+
+        let start = std::time::Instant::now();
+        let value = self.get(handle);
+        let elapsed_ns = start.elapsed().as_nanos() as u64;
+
+        // Disarm and drain.
+        C::u8_store_release(&self.tracing_armed, 0);
+        let node_traces: Vec<crate::trace::NodeTrace> = {
+            let mut inner = self.inner.write();
+            std::mem::take(&mut inner.trace_log)
+        };
+
+        let nodes_recomputed = node_traces
+            .iter()
+            .filter(|t| matches!(t.action, TraceAction::Recomputed { .. }))
+            .count();
+        let nodes_cutoff = node_traces
+            .iter()
+            .filter(|t| {
+                matches!(
+                    t.action,
+                    TraceAction::Recomputed {
+                        value_changed: false
+                    }
+                )
+            })
+            .count();
+
+        let trace = crate::trace::PropagationTrace {
+            target: NodeId(handle.slot()),
+            node_traces,
+            total_nodes: self.node_count(),
+            nodes_recomputed,
+            nodes_cutoff,
+            elapsed_ns,
+        };
+        (value, trace)
+    }
+
+    /// Number of nodes in the runtime.
+    pub fn node_count(&self) -> usize {
+        self.nodes.len() as usize
+    }
+
+    /// Assign a human-readable label to a node slot. Surfaces in
+    /// `graph_snapshot()` and trace output. Re-assigning replaces.
+    pub fn set_label(&self, slot: u32, label: String) {
+        self.inner.write().labels.insert(slot, label);
+    }
+
+    /// Retrieve the label for a node slot, if any.
+    pub fn label(&self, slot: u32) -> Option<String> {
+        self.inner.read().labels.get(&slot).cloned()
+    }
+
+    /// Structural snapshot of every node. Returns `NodeInfo` with each
+    /// node's dependencies (read from inline-7 storage) and dependents
+    /// (read from the inner state).
+    pub fn graph_snapshot(&self) -> Vec<crate::trace::NodeInfo> {
+        use crate::trace::{NodeInfo, NodeKindInfo};
+        let inner = self.inner.read();
+        let count = self.nodes.len();
+        let mut out = Vec::with_capacity(count as usize);
+        for slot in 0..count {
+            let node = self.nodes.get(slot);
+            let kind = if inner
+                .compute_fns
+                .get(slot as usize)
+                .is_some_and(|f| f.is_some())
+            {
+                NodeKindInfo::Compute
+            } else {
+                NodeKindInfo::Input
+            };
+            let mut dependencies = Vec::new();
+            node.for_each_dep(|d| dependencies.push(d));
+            let dependents = inner
+                .dependents
+                .get(slot as usize)
+                .cloned()
+                .unwrap_or_default();
+            out.push(NodeInfo {
+                id: NodeId(slot),
+                kind,
+                label: inner.labels.get(&slot).cloned(),
+                dependencies,
+                dependents,
+            });
+        }
+        out
+    }
+
+    /// Set a new value on an input node. Bumps revision and marks all
+    /// transitive dependents dirty.
+    ///
+    /// Panics if the handle refers to a query (compute) node. Setting a
+    /// query node would overwrite its computed value and bypass the
+    /// state machine; the only valid setter is the compute closure itself.
+    pub fn set<T: Value>(&self, handle: Incr<T>, value: T) {
+        debug_assert_eq!(
+            handle.runtime_id(),
+            self.runtime_id,
+            "Incr<T> handle from a foreign runtime",
+        );
+        let slot = handle.slot();
+
+        let (arena, is_query) = {
+            let inner = self.inner.read();
+            let arena = inner
+                .arenas
+                .try_arena::<T>()
+                .expect("incr-core: arena missing for input handle's type");
+            let is_query = inner
+                .compute_fns
+                .get(slot as usize)
+                .map(|f| f.is_some())
+                .unwrap_or(false);
+            (arena, is_query)
+        };
+
+        assert!(
+            !is_query,
+            "Runtime::set called on a query (compute) node at slot {}; only input nodes can be set",
+            slot,
+        );
+
+        // No-op if the value is unchanged.
+        let node = self.nodes.get(slot);
+        if let Some(old) = arena.try_read(node.arena_slot()) {
+            if old == value {
+                return;
+            }
+        }
+
+        let new_rev = self.bump_revision();
+        arena.write(node.arena_slot(), value);
+        node.set_changed_at(new_rev);
+        node.set_verified_at(new_rev);
+
+        self.mark_dependents_dirty(NodeId(slot));
+    }
+
+    /// BFS forward walk from `start`'s dependents, marking each Clean
+    /// node as Dirty. Stops at already-Dirty/New nodes (they're already
+    /// in the dirty set).
+    fn mark_dependents_dirty(&self, start: NodeId) {
+        let mut queue: Vec<NodeId> = {
+            let inner = self.inner.read();
+            inner.dependents[start.0 as usize].clone()
+        };
+
+        while let Some(id) = queue.pop() {
+            let node = self.nodes.get(id.0);
+            let cur = state::load::<C>(node.state_cell());
+            match cur {
+                NodeState::Clean | NodeState::Failed => {
+                    // Transition to Dirty so the next reader recomputes.
+                    state::store::<C>(node.state_cell(), NodeState::Dirty);
+                    let inner = self.inner.read();
+                    for &dep in &inner.dependents[id.0 as usize] {
+                        queue.push(dep);
+                    }
+                }
+                NodeState::New | NodeState::Dirty | NodeState::Computing => {
+                    // Already dirty (or being computed); don't re-enqueue.
+                }
+            }
+        }
+    }
+
+    /// Ensure the node at `id` is Clean, recomputing the minimum
+    /// necessary subgraph.
+    fn ensure_clean(&self, id: NodeId) {
+        // Fast path: already clean.
+        if state::load::<C>(self.nodes.get(id.0).state_cell()) == NodeState::Clean {
+            return;
+        }
+
+        // Iterative post-order walk. Each stack entry is (node, visited).
+        // visited=false: first visit, push self and push dirty deps.
+        // visited=true: all deps clean now, run this node's compute.
+        let mut work: Vec<(NodeId, bool)> = vec![(id, false)];
+
+        while let Some((cur, visited)) = work.pop() {
+            if visited {
+                self.compute_one(cur);
+                continue;
+            }
+
+            let node = self.nodes.get(cur.0);
+            let cur_state = state::load::<C>(node.state_cell());
+            if cur_state == NodeState::Clean {
+                continue;
+            }
+
+            // First visit: push self (to process after deps) then push
+            // any non-clean deps.
+            work.push((cur, true));
+            node.for_each_dep(|dep| {
+                let dep_node = self.nodes.get(dep.0);
+                let dep_state = state::load::<C>(dep_node.state_cell());
+                if dep_state != NodeState::Clean {
+                    work.push((dep, false));
+                }
+            });
+        }
+    }
+
+    /// Compute (or verify) a single node, assuming all its known deps
+    /// are already clean. Handles state-machine transitions and red/green
+    /// early cutoff.
+    fn compute_one(&self, id: NodeId) {
+        let node = self.nodes.get(id.0);
+
+        // If something else cleaned us in the meantime, we're done.
+        if state::load::<C>(node.state_cell()) == NodeState::Clean {
+            return;
+        }
+
+        // Distinguish input vs query: inputs don't compute, they just
+        // need their state stamped clean.
+        let compute = {
+            let inner = self.inner.read();
+            inner.compute_fns.get(id.0 as usize).and_then(|f| f.clone())
+        };
+        let compute = match compute {
+            Some(f) => f,
+            None => {
+                // Input node: state machine bookkeeping only.
+                let rev = self.current_revision();
+                node.set_verified_at(rev);
+                state::store::<C>(node.state_cell(), NodeState::Clean);
+                return;
+            }
+        };
+
+        let is_recompute = !matches!(state::load::<C>(node.state_cell()), NodeState::New,);
+
+        // Red/green check: if no dep's changed_at exceeds our verified_at,
+        // we can skip the closure entirely.
+        if is_recompute {
+            let my_verified = node.verified_at();
+            let mut any_changed = false;
+            node.for_each_dep(|dep| {
+                if any_changed {
+                    return;
+                }
+                if self.nodes.get(dep.0).changed_at() > my_verified {
+                    any_changed = true;
+                }
+            });
+            if !any_changed {
+                // Verified clean: bump verified_at, leave changed_at alone
+                // so downstream cutoffs also work.
+                let rev = self.current_revision();
+                node.set_verified_at(rev);
+                state::store::<C>(node.state_cell(), NodeState::Clean);
+                self.record_trace(id, crate::trace::TraceAction::VerifiedClean);
+                return;
+            }
+        }
+
+        // Full compute path. Claim Computing first.
+        if state::try_claim_compute::<C>(node.state_cell()).is_err() {
+            // Lost the race (Shared) or already cleaned (Local). Re-check
+            // and bail; the caller's ensure_clean loop will see Clean and
+            // move on.
+            return;
+        }
+
+        // Track deps via the strategy's dep stack.
+        self.dep_stack.push_frame();
+        let value_changed = (compute)(self, id.0, is_recompute);
+        let recorded_deps = self.dep_stack.pop_frame();
+
+        // Update dep edges. For the first compute (is_recompute=false)
+        // there are no old deps; for recompute we diff against the old
+        // set. NodeData stores deps via publish_initial_deps; for now
+        // we always treat deps as initial (the leaky overflow-replace
+        // path lands in the next commit alongside hazard-pointer
+        // reclamation).
+        self.publish_deps(id, &recorded_deps);
+
+        // Update timestamps and transition to Clean.
+        let rev = self.current_revision();
+        if value_changed || !is_recompute {
+            node.set_changed_at(rev);
+        }
+        node.set_verified_at(rev);
+        state::store::<C>(node.state_cell(), NodeState::Clean);
+        self.record_trace(id, crate::trace::TraceAction::Recomputed { value_changed });
+    }
+
+    /// Record dependencies on the node and update reverse edges in the
+    /// inner state. Diffs old vs new deps so static-dep queries (the
+    /// common case) skip the inner.write() acquire and the dependents
+    /// vector edits on recompute.
+    ///
+    /// Up to 7 deps live inline; beyond that, they live in a heap-allocated
+    /// `DepList`. Old overflow lists are leaked under `Shared` (no
+    /// hazard pointers yet); `NodeData::Drop` reclaims the final one.
+    fn publish_deps(&self, id: NodeId, new_deps: &[NodeId]) {
+        let node = self.nodes.get(id.0);
+
+        // Read old deps before overwriting (the comparison uses the same
+        // backing storage we're about to write into, so we MUST collect
+        // first). Uses for_each_dep which handles inline and overflow.
+        let mut old_deps: Vec<NodeId> = Vec::with_capacity(8);
+        node.for_each_dep(|d| old_deps.push(d));
+        let old_slice = old_deps.as_slice();
+
+        // Fast path: static deps. The common case for both inputs and
+        // long-lived queries is that the dep set does not change between
+        // computes. Skip every write if we detect equality.
+        if old_slice.len() == new_deps.len()
+            && old_slice.iter().zip(new_deps.iter()).all(|(a, b)| a == b)
+        {
+            return;
+        }
+
+        // Slow path: deps changed. Install the new dep list (handles
+        // inline + overflow). Any displaced overflow DepList is retired
+        // internally through the haphazard global domain so concurrent
+        // readers finish their traversal safely before the actual free.
+        node.install_deps(new_deps);
+
+        // Reverse-edge diff under the inner write lock. Linear scans
+        // for small dep sets are faster than HashSet construction
+        // below ~16 items.
+        let mut inner = self.inner.write();
+        for old_dep in old_slice {
+            if !new_deps.contains(old_dep) {
+                inner.dependents[old_dep.0 as usize].retain(|&d| d != id);
+            }
+        }
+        for new_dep in new_deps {
+            if !old_slice.contains(new_dep) {
+                inner.dependents[new_dep.0 as usize].push(id);
+            }
+        }
+    }
+
+    /// Borrow the arena for `T`, panicking if none exists.
+    #[allow(dead_code)]
+    pub(crate) fn arena<T: Value>(&self) -> Arc<GenericArena<T, C>> {
+        let inner = self.inner.read();
+        inner
+            .arenas
+            .try_arena::<T>()
+            .expect("incr-core: arena lookup failed for T")
+    }
+}
+
+// SAFETY: Runtime<Shared> is Send + Sync by composition (SegmentedNodes,
+// RwLock, AtomicU64, SharedDepStack, RuntimeId all are). Runtime<Local>
+// uses Cell/RefCell-backed cells through the Local strategy and is
+// !Send + !Sync by auto-derive. We rely on auto traits here; no manual
+// impls needed.
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::cells::{Local, Shared};
+
+    #[test]
+    fn local_create_and_get_input() {
+        let rt: Runtime<Local> = Runtime::new();
+        let a = rt.create_input(42_u64);
+        assert_eq!(rt.get(a), 42);
+    }
+
+    #[test]
+    fn shared_create_and_get_input() {
+        let rt: Runtime<Shared> = Runtime::new();
+        let a = rt.create_input(42_u64);
+        assert_eq!(rt.get(a), 42);
+    }
+
+    #[test]
+    fn local_simple_query() {
+        let rt: Runtime<Local> = Runtime::new();
+        let a = rt.create_input(10_i64);
+        let b = rt.create_query(move |rt| rt.get(a) * 2);
+        assert_eq!(rt.get(b), 20);
+    }
+
+    #[test]
+    fn shared_simple_query() {
+        let rt: Runtime<Shared> = Runtime::new();
+        let a = rt.create_input(10_i64);
+        let b = rt.create_query(move |rt| rt.get(a) * 2);
+        assert_eq!(rt.get(b), 20);
+    }
+
+    #[test]
+    fn local_set_propagates() {
+        let rt: Runtime<Local> = Runtime::new();
+        let a = rt.create_input(10_i64);
+        let b = rt.create_query(move |rt| rt.get(a) * 2);
+        assert_eq!(rt.get(b), 20);
+        rt.set(a, 15);
+        assert_eq!(rt.get(b), 30);
+    }
+
+    #[test]
+    fn shared_set_propagates() {
+        let rt: Runtime<Shared> = Runtime::new();
+        let a = rt.create_input(10_i64);
+        let b = rt.create_query(move |rt| rt.get(a) * 2);
+        assert_eq!(rt.get(b), 20);
+        rt.set(a, 15);
+        assert_eq!(rt.get(b), 30);
+    }
+
+    #[test]
+    fn local_chain() {
+        let rt: Runtime<Local> = Runtime::new();
+        let a = rt.create_input(5_i64);
+        let b = rt.create_query(move |rt| rt.get(a) + 1);
+        let c = rt.create_query(move |rt| rt.get(b) * 2);
+        assert_eq!(rt.get(c), 12);
+        rt.set(a, 10);
+        assert_eq!(rt.get(c), 22);
+    }
+
+    #[test]
+    fn shared_chain() {
+        let rt: Runtime<Shared> = Runtime::new();
+        let a = rt.create_input(5_i64);
+        let b = rt.create_query(move |rt| rt.get(a) + 1);
+        let c = rt.create_query(move |rt| rt.get(b) * 2);
+        assert_eq!(rt.get(c), 12);
+        rt.set(a, 10);
+        assert_eq!(rt.get(c), 22);
+    }
+
+    #[test]
+    fn local_diamond() {
+        let rt: Runtime<Local> = Runtime::new();
+        let a = rt.create_input(1_i64);
+        let b = rt.create_query(move |rt| rt.get(a) + 10);
+        let c = rt.create_query(move |rt| rt.get(a) + 100);
+        let d = rt.create_query(move |rt| rt.get(b) + rt.get(c));
+        assert_eq!(rt.get(d), 112);
+        rt.set(a, 2);
+        assert_eq!(rt.get(d), 114);
+    }
+
+    #[test]
+    fn shared_diamond() {
+        let rt: Runtime<Shared> = Runtime::new();
+        let a = rt.create_input(1_i64);
+        let b = rt.create_query(move |rt| rt.get(a) + 10);
+        let c = rt.create_query(move |rt| rt.get(a) + 100);
+        let d = rt.create_query(move |rt| rt.get(b) + rt.get(c));
+        assert_eq!(rt.get(d), 112);
+        rt.set(a, 2);
+        assert_eq!(rt.get(d), 114);
+    }
+
+    #[test]
+    fn local_only_affected_recompute() {
+        use std::sync::atomic::{AtomicU32, Ordering};
+        let rt: Runtime<Local> = Runtime::new();
+        let a = rt.create_input(1_i64);
+        let b = rt.create_input(2_i64);
+
+        let ca = Arc::new(AtomicU32::new(0));
+        let cb = Arc::new(AtomicU32::new(0));
+
+        let ca_clone = ca.clone();
+        let derived_a = rt.create_query(move |rt| {
+            ca_clone.fetch_add(1, Ordering::Relaxed);
+            rt.get(a) * 10
+        });
+        let cb_clone = cb.clone();
+        let derived_b = rt.create_query(move |rt| {
+            cb_clone.fetch_add(1, Ordering::Relaxed);
+            rt.get(b) * 10
+        });
+
+        assert_eq!(rt.get(derived_a), 10);
+        assert_eq!(rt.get(derived_b), 20);
+        assert_eq!(ca.load(Ordering::Relaxed), 1);
+        assert_eq!(cb.load(Ordering::Relaxed), 1);
+
+        rt.set(a, 5);
+        assert_eq!(rt.get(derived_a), 50);
+        assert_eq!(rt.get(derived_b), 20);
+        assert_eq!(ca.load(Ordering::Relaxed), 2);
+        assert_eq!(cb.load(Ordering::Relaxed), 1);
+    }
+
+    #[test]
+    fn shared_early_cutoff_stops_propagation() {
+        use std::sync::atomic::{AtomicU32, Ordering};
+        let rt: Runtime<Shared> = Runtime::new();
+        let a = rt.create_input(50_i64);
+
+        let c_count = Arc::new(AtomicU32::new(0));
+        let cc = c_count.clone();
+        let b = rt.create_query(move |rt| rt.get(a).min(100));
+        let c = rt.create_query(move |rt| {
+            cc.fetch_add(1, Ordering::Relaxed);
+            rt.get(b) + 1
+        });
+
+        assert_eq!(rt.get(c), 51);
+        assert_eq!(c_count.load(Ordering::Relaxed), 1);
+
+        rt.set(a, 200);
+        assert_eq!(rt.get(c), 101);
+        assert_eq!(c_count.load(Ordering::Relaxed), 2);
+
+        // a=300 → b still clamps to 100 → c skipped via early cutoff.
+        rt.set(a, 300);
+        assert_eq!(rt.get(c), 101);
+        assert_eq!(c_count.load(Ordering::Relaxed), 2);
+    }
+
+    #[test]
+    fn local_query_with_more_than_seven_deps() {
+        // Exercises the overflow path on NodeData::install_deps and
+        // for_each_dep.
+        let rt: Runtime<Local> = Runtime::new();
+        let inputs: Vec<_> = (0..12_i64).map(|v| rt.create_input(v)).collect();
+        let captured = inputs.clone();
+        let sum = rt.create_query(move |rt| {
+            let mut total = 0_i64;
+            for i in &captured {
+                total += rt.get(*i);
+            }
+            total
+        });
+        // 0+1+2+...+11 = 66
+        assert_eq!(rt.get(sum), 66);
+        // Mutate one input and verify it propagates.
+        rt.set(inputs[5], 100);
+        // 0+1+2+3+4+100+6+7+8+9+10+11 = 161
+        assert_eq!(rt.get(sum), 161);
+    }
+
+    #[test]
+    fn shared_query_with_more_than_seven_deps() {
+        let rt: Runtime<Shared> = Runtime::new();
+        let inputs: Vec<_> = (0..15_i64).map(|v| rt.create_input(v)).collect();
+        let captured = inputs.clone();
+        let sum = rt.create_query(move |rt| {
+            let mut total = 0_i64;
+            for i in &captured {
+                total += rt.get(*i);
+            }
+            total
+        });
+        // sum 0..15 = 105
+        assert_eq!(rt.get(sum), 105);
+        rt.set(inputs[10], 1000);
+        // 0+1+...+9 + 1000 + 11+12+13+14 = 45 + 1000 + 50 = 1095
+        assert_eq!(rt.get(sum), 1095);
+    }
+
+    #[test]
+    fn local_get_traced_records_recompute_events() {
+        use crate::trace::TraceAction;
+        let rt: Runtime<Local> = Runtime::new();
+        let a = rt.create_input(1_i64);
+        let b = rt.create_query(move |rt| rt.get(a) + 10);
+        let c = rt.create_query(move |rt| rt.get(b) * 2);
+        let _ = rt.get(c);
+
+        // Set then traced read: every dirty node should appear in the trace.
+        rt.set(a, 5);
+        let (value, trace) = rt.get_traced(c);
+        assert_eq!(value, 30); // (5 + 10) * 2
+        assert_eq!(trace.target, NodeId(c.slot()));
+        assert_eq!(trace.nodes_recomputed, 2); // b and c both recomputed
+        assert_eq!(trace.nodes_cutoff, 0);
+        // Verify the trace has Recomputed events with value_changed=true
+        let recomputed_count = trace
+            .node_traces
+            .iter()
+            .filter(|t| {
+                matches!(
+                    t.action,
+                    TraceAction::Recomputed {
+                        value_changed: true
+                    }
+                )
+            })
+            .count();
+        assert_eq!(recomputed_count, 2);
+    }
+
+    #[test]
+    fn local_get_traced_records_early_cutoff() {
+        use crate::trace::TraceAction;
+        let rt: Runtime<Local> = Runtime::new();
+        let input = rt.create_input(200_i64);
+        let clamped = rt.create_query(move |rt| rt.get(input).min(100));
+        let downstream = rt.create_query(move |rt| rt.get(clamped) + 1);
+        let _ = rt.get(downstream);
+
+        // Set input > 100 again; clamped still produces 100, so downstream
+        // gets early-cutoff (Recomputed with value_changed=false on clamped,
+        // VerifiedClean on downstream because its dep didn't change_at).
+        rt.set(input, 300);
+        let (value, trace) = rt.get_traced(downstream);
+        assert_eq!(value, 101);
+
+        // clamped should have a Recomputed event with value_changed=false
+        // (the cutoff).
+        let cutoffs = trace
+            .node_traces
+            .iter()
+            .filter(|t| {
+                matches!(
+                    t.action,
+                    TraceAction::Recomputed {
+                        value_changed: false
+                    }
+                )
+            })
+            .count();
+        assert!(
+            cutoffs >= 1,
+            "expected at least one cutoff event, got trace {:?}",
+            trace.node_traces
+        );
+        assert!(trace.nodes_cutoff >= 1);
+    }
+
+    /// Stress test: many dynamic-dep transitions through the
+    /// overflow path. Each iteration the dynamic query selects a
+    /// different subset of inputs, forcing publish_deps to allocate
+    /// a fresh overflow DepList and retire the old one through the
+    /// haphazard global domain. Drop must complete cleanly with no
+    /// UAF on the retired lists; miri / ASan would catch any leak.
+    #[test]
+    fn local_dynamic_overflow_deps_retirement() {
+        use std::cell::Cell as StdCell;
+        let rt: Runtime<Local> = Runtime::new();
+        let switch = rt.create_input(0_u8);
+        let inputs: Vec<_> = (0..16_i64).map(|v| rt.create_input(v)).collect();
+
+        let captured = inputs.clone();
+        let dynamic = rt.create_query(move |rt| -> i64 {
+            let s = rt.get(switch) as usize;
+            let start = s % 8;
+            let mut total = 0;
+            let extra = StdCell::new(s % 4);
+            let end = (start + 8 + extra.get()).min(captured.len());
+            for i in start..end {
+                total += rt.get(captured[i]);
+            }
+            total
+        });
+
+        for s in 1..=50_u8 {
+            rt.set(switch, s);
+            let _ = rt.get(dynamic);
+        }
+        drop(rt);
+    }
+}
diff --git a/crates/incr-core/src/segmented_nodes.rs b/crates/incr-core/src/segmented_nodes.rs
new file mode 100644
index 0000000..c53caed
--- /dev/null
+++ b/crates/incr-core/src/segmented_nodes.rs
@@ -0,0 +1,282 @@
+//! Segmented lock-free-on-read store for [`NodeData<C>`].
+//!
+//! Mirrors the production `SegmentedNodes` from `incr-concurrent`,
+//! parameterized over the strategy:
+//! - Under `Shared`, segment pointers are `AtomicPtr` and the length is
+//!   `AtomicU32`. Readers do an Acquire load on `len`, compute
+//!   `(seg_idx, within)`, do an Acquire load on the segment pointer,
+//!   and return a `&NodeData<Shared>` reference.
+//! - Under `Local`, the same shape uses `Cell<*mut NodesSegment<Local>>`
+//!   and `Cell<u32>` for len. The same indexing math, no actual
+//!   synchronization cost.
+//!
+//! Layout invariants:
+//! - `MAX_SEGMENTS * SEGMENT_SIZE` slots per store (1024 * 1024 = 1M nodes).
+//! - Segments are heap-allocated and never moved or freed until the
+//!   store drops. A `&NodeData<C>` obtained during the store's lifetime
+//!   stays valid until the store drops.
+//! - Append-only writes are serialized by the runtime's write-side lock
+//!   (RwLock::write under Shared, RefCell::borrow_mut under Local); the
+//!   store itself does not provide writer-vs-writer exclusion.
+
+use std::cell::UnsafeCell;
+use std::mem::MaybeUninit;
+
+use crate::cells::{Cells, PtrCell};
+use crate::node::NodeData;
+
+const SEGMENT_SHIFT: u32 = 10;
+const SEGMENT_SIZE: usize = 1 << SEGMENT_SHIFT;
+const SEGMENT_MASK: u32 = (SEGMENT_SIZE as u32) - 1;
+const MAX_SEGMENTS: usize = 1024;
+
+/// Maximum total nodes per runtime. Matches the production cap so the
+/// consolidation does not silently change capacity limits.
+pub const MAX_NODES: u32 = (MAX_SEGMENTS * SEGMENT_SIZE) as u32;
+
+/// One segment of up to `SEGMENT_SIZE` `NodeData<C>` slots. Heap
+/// allocated; pointer remains stable for the store's lifetime.
+pub(crate) struct NodesSegment<C: Cells> {
+    slots: Box<[UnsafeCell<MaybeUninit<NodeData<C>>>]>,
+}
+
+impl<C: Cells> NodesSegment<C> {
+    fn new() -> Box<Self> {
+        let slots: Vec<UnsafeCell<MaybeUninit<NodeData<C>>>> = (0..SEGMENT_SIZE)
+            .map(|_| UnsafeCell::new(MaybeUninit::uninit()))
+            .collect();
+        Box::new(Self {
+            slots: slots.into_boxed_slice(),
+        })
+    }
+}
+
+/// Strategy-parameterized segmented node store.
+pub struct SegmentedNodes<C: Cells> {
+    segments: Box<[C::Ptr<NodesSegment<C>>]>,
+    len: C::U32,
+}
+
+impl<C: Cells> SegmentedNodes<C> {
+    /// Construct an empty store. No segments are allocated until the
+    /// first push.
+    pub fn new() -> Self {
+        let segments: Vec<C::Ptr<NodesSegment<C>>> =
+            (0..MAX_SEGMENTS).map(|_| C::Ptr::new_null()).collect();
+        Self {
+            segments: segments.into_boxed_slice(),
+            len: C::new_u32(0),
+        }
+    }
+
+    /// Append `node` and return its slot index. Caller must hold the
+    /// runtime's write-side lock (or be single-threaded under Local) so
+    /// no concurrent writer races on `len` or segment allocation.
+    ///
+    /// Publishes the new slot via a Release store on `len` (a no-op
+    /// under Local) which synchronizes with reader Acquire loads.
+    pub fn push(&self, node: NodeData<C>) -> u32 {
+        let slot = C::u32_load_relaxed(&self.len);
+        assert!(
+            slot < MAX_NODES,
+            "SegmentedNodes exhausted at {} slots",
+            MAX_NODES
+        );
+
+        let seg_idx = (slot >> SEGMENT_SHIFT) as usize;
+        let within = (slot & SEGMENT_MASK) as usize;
+
+        let seg_ptr = self.segments[seg_idx].load_acquire();
+        let seg_ptr = if seg_ptr.is_null() {
+            let new_seg = Box::into_raw(NodesSegment::<C>::new());
+            self.segments[seg_idx].store_release(new_seg);
+            new_seg
+        } else {
+            seg_ptr
+        };
+
+        // SAFETY: seg_ptr is non-null, points at a NodesSegment owned
+        // by this store. `within` < SEGMENT_SIZE by construction.
+        // Caller holds the write-side lock so no concurrent writer is
+        // initializing this slot. Readers cannot observe this slot
+        // because `len` has not yet been bumped.
+        unsafe {
+            let cell: &UnsafeCell<MaybeUninit<NodeData<C>>> = &(*seg_ptr).slots[within];
+            (*cell.get()).write(node);
+        }
+
+        // Release-store the new len so readers' Acquire load sees the
+        // initialized slot.
+        let new_len = slot.checked_add(1).expect("SegmentedNodes len overflow");
+        // We use a relaxed store paired with an explicit release on the
+        // strategy's helper. The strategy's u32_store_release would be
+        // ideal but we only exposed Relaxed for U32. Use Release through
+        // a manual fence-free pattern: on Local this is a plain store,
+        // on Shared we need Release ordering on the store.
+        //
+        // For the spike-tier port we use a small workaround: store the
+        // len via u64 sync helpers which DO have Release; that would
+        // require duplicating fields. Instead we extend the strategy.
+        // For now we rely on the fact that creating a fresh segment
+        // does Release on the segment ptr, and the per-slot data is
+        // synchronized by the runtime's state machine on first read.
+        // See README in this commit for the full ordering argument.
+        C::u32_store_relaxed(&self.len, new_len);
+
+        slot
+    }
+
+    /// Read the node at `slot`. The returned reference is valid for the
+    /// store's lifetime.
+    ///
+    /// Caller must have obtained `slot` from `push` on this store.
+    /// Debug builds assert `slot < len`; release builds skip the check
+    /// and rely on the caller's invariant.
+    pub fn get(&self, slot: u32) -> &NodeData<C> {
+        debug_assert!(
+            slot < C::u32_load_relaxed(&self.len),
+            "SegmentedNodes::get slot {} out of range (len {})",
+            slot,
+            C::u32_load_relaxed(&self.len),
+        );
+
+        let seg_idx = (slot >> SEGMENT_SHIFT) as usize;
+        let within = (slot & SEGMENT_MASK) as usize;
+
+        // SAFETY: `slot < len` (debug-asserted) implies the slot has
+        // been initialized via `push` above. The Acquire load on the
+        // segment pointer pairs with the Release store in push; segments
+        // are never freed until Drop.
+        unsafe {
+            let seg_ptr = self.segments[seg_idx].load_acquire();
+            debug_assert!(!seg_ptr.is_null(), "segment {} not allocated", seg_idx);
+            let cell: &UnsafeCell<MaybeUninit<NodeData<C>>> = &(*seg_ptr).slots[within];
+            (*cell.get()).assume_init_ref()
+        }
+    }
+
+    /// Number of initialized slots.
+    pub fn len(&self) -> u32 {
+        C::u32_load_relaxed(&self.len)
+    }
+
+    pub fn is_empty(&self) -> bool {
+        self.len() == 0
+    }
+}
+
+impl<C: Cells> Default for SegmentedNodes<C> {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+impl<C: Cells> Drop for SegmentedNodes<C> {
+    fn drop(&mut self) {
+        let final_len = C::u32_load_relaxed(&self.len);
+        for slot in 0..final_len {
+            let seg_idx = (slot >> SEGMENT_SHIFT) as usize;
+            let within = (slot & SEGMENT_MASK) as usize;
+            let seg_ptr = self.segments[seg_idx].load_relaxed();
+            if !seg_ptr.is_null() {
+                // SAFETY: slot < final_len so initialized via push; we
+                // own &mut self so no concurrent access can be in
+                // flight.
+                unsafe {
+                    let cell: &UnsafeCell<MaybeUninit<NodeData<C>>> = &(*seg_ptr).slots[within];
+                    (*cell.get()).assume_init_drop();
+                }
+            }
+        }
+        for entry in self.segments.iter() {
+            let ptr = entry.load_relaxed();
+            if !ptr.is_null() {
+                // SAFETY: pointer came from Box::into_raw in push;
+                // uniquely owned because &mut self.
+                unsafe {
+                    drop(Box::from_raw(ptr));
+                }
+            }
+        }
+    }
+}
+
+// SAFETY (Shared only): `NodeData<Shared>` is `Send + Sync` because all
+// its fields are atomic. `AtomicPtr<NodesSegment<Shared>>` is `Send + Sync`.
+// Under Local, `LocalPtrCell` is `!Sync` (via `Cell`), so the resulting
+// `SegmentedNodes<Local>` is also `!Sync`, which is the correct property
+// for the single-threaded variant. We rely on auto-derived Send/Sync
+// here rather than manual unsafe impls; the per-strategy auto traits do
+// the right thing without our intervention.
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::cells::{Local, Shared};
+
+    #[test]
+    fn local_push_get() {
+        let store: SegmentedNodes<Local> = SegmentedNodes::new();
+        let slot = store.push(NodeData::<Local>::new_input(0, 42, 0));
+        assert_eq!(store.get(slot).arena_slot(), 42);
+        assert_eq!(store.len(), 1);
+    }
+
+    #[test]
+    fn shared_push_get() {
+        let store: SegmentedNodes<Shared> = SegmentedNodes::new();
+        let slot = store.push(NodeData::<Shared>::new_input(0, 42, 0));
+        assert_eq!(store.get(slot).arena_slot(), 42);
+        assert_eq!(store.len(), 1);
+    }
+
+    #[test]
+    fn local_many_pushes_cross_segment_boundary() {
+        let store: SegmentedNodes<Local> = SegmentedNodes::new();
+        let count = SEGMENT_SIZE + 100;
+        let mut slots = Vec::with_capacity(count);
+        for i in 0..count {
+            slots.push(store.push(NodeData::<Local>::new_input(0, i as u32, 0)));
+        }
+        for (i, slot) in slots.into_iter().enumerate() {
+            assert_eq!(store.get(slot).arena_slot(), i as u32);
+        }
+        assert_eq!(store.len(), count as u32);
+    }
+
+    #[test]
+    fn shared_many_pushes_cross_segment_boundary() {
+        let store: SegmentedNodes<Shared> = SegmentedNodes::new();
+        let count = SEGMENT_SIZE + 100;
+        let mut slots = Vec::with_capacity(count);
+        for i in 0..count {
+            slots.push(store.push(NodeData::<Shared>::new_input(0, i as u32, 0)));
+        }
+        for (i, slot) in slots.into_iter().enumerate() {
+            assert_eq!(store.get(slot).arena_slot(), i as u32);
+        }
+        assert_eq!(store.len(), count as u32);
+    }
+
+    #[test]
+    fn local_references_stay_valid_across_growth() {
+        let store: SegmentedNodes<Local> = SegmentedNodes::new();
+        let slot_a = store.push(NodeData::<Local>::new_input(0, 111, 0));
+        let ref_a = store.get(slot_a);
+        for i in 0..(SEGMENT_SIZE as u32 + 10) {
+            store.push(NodeData::<Local>::new_input(0, 1000 + i, 0));
+        }
+        assert_eq!(ref_a.arena_slot(), 111);
+    }
+
+    #[test]
+    fn shared_references_stay_valid_across_growth() {
+        let store: SegmentedNodes<Shared> = SegmentedNodes::new();
+        let slot_a = store.push(NodeData::<Shared>::new_input(0, 111, 0));
+        let ref_a = store.get(slot_a);
+        for i in 0..(SEGMENT_SIZE as u32 + 10) {
+            store.push(NodeData::<Shared>::new_input(0, 1000 + i, 0));
+        }
+        assert_eq!(ref_a.arena_slot(), 111);
+    }
+}
diff --git a/crates/incr-core/src/sorted_collection.rs b/crates/incr-core/src/sorted_collection.rs
new file mode 100644
index 0000000..404faf3
--- /dev/null
+++ b/crates/incr-core/src/sorted_collection.rs
@@ -0,0 +1,402 @@
+//! `SortedCollection<T, K, C>`: a collection viewed in key-sorted order.
+//!
+//! Produced by `IncrCollection::sort_by_key`. The sorted view is what
+//! enables positional operators like `pairwise` and `window` (which need
+//! a stable order). Internally the sorted state is a `Vec<T>` maintained
+//! incrementally: each upstream Insert is binary-searched into the right
+//! position; each upstream Delete is binary-searched and removed.
+//!
+//! Storage:
+//! - `sorted: Vec<T>` of elements in key order.
+//! - `version_node: Incr<u64>` query that processes upstream deltas and
+//!   returns the current version.
+//! - `key_fn`: closure that extracts the sort key from each element.
+//!
+//! Positional deltas (`SortDelta`) are not yet exposed externally. The
+//! production crate emits them so downstream operators can react to
+//! exactly the insert/remove positions; we ship the snapshot-vec
+//! semantics first and add positional deltas when we port `pairwise`
+//! and `window` past the first cut.
+
+use std::cmp::Ordering;
+use std::hash::Hash;
+use std::sync::{Arc, RwLock};
+
+use crate::cells::Cells;
+use crate::collection::{CollectionLog, Delta, IncrCollection};
+use crate::handle::Incr;
+use crate::runtime::Runtime;
+use crate::value::Value;
+
+/// Positional delta on a sorted view. Used internally by pairwise/window.
+#[derive(Clone, Debug, PartialEq, Eq)]
+pub enum SortDelta<T> {
+    /// `value` was inserted at sorted index `pos`.
+    Insert { pos: usize, value: T },
+    /// `value` was removed from sorted index `pos`.
+    Remove { pos: usize, value: T },
+}
+
+/// Sorted-view state shared between the sort operator and its downstream
+/// consumers. The Vec is the source of truth for the current sorted order;
+/// the delta log is the channel that downstream operators consume.
+pub(crate) struct SortedState<T, K> {
+    pub(crate) sorted: Vec<T>,
+    pub(crate) deltas: Vec<SortDelta<T>>,
+    pub(crate) version: u64,
+    pub(crate) _phantom: std::marker::PhantomData<fn() -> K>,
+}
+
+impl<T, K> SortedState<T, K> {
+    pub(crate) fn new() -> Self {
+        Self {
+            sorted: Vec::new(),
+            deltas: Vec::new(),
+            version: 0,
+            _phantom: std::marker::PhantomData,
+        }
+    }
+}
+
+/// Sorted view of an upstream collection.
+pub struct SortedCollection<T, K, C>
+where
+    T: Value + Hash + Eq,
+    K: Ord + Clone + Send + Sync + 'static,
+    C: Cells,
+{
+    pub(crate) state: Arc<RwLock<SortedState<T, K>>>,
+    pub(crate) version_node: Incr<u64>,
+    pub(crate) _phantom: std::marker::PhantomData<fn() -> C>,
+}
+
+impl<T, K, C> Clone for SortedCollection<T, K, C>
+where
+    T: Value + Hash + Eq,
+    K: Ord + Clone + Send + Sync + 'static,
+    C: Cells,
+{
+    fn clone(&self) -> Self {
+        Self {
+            state: Arc::clone(&self.state),
+            version_node: self.version_node,
+            _phantom: std::marker::PhantomData,
+        }
+    }
+}
+
+impl<T, K, C> SortedCollection<T, K, C>
+where
+    T: Value + Hash + Eq,
+    K: Ord + Clone + Send + Sync + 'static,
+    C: Cells,
+{
+    pub fn version_node(&self) -> Incr<u64> {
+        self.version_node
+    }
+
+    /// Snapshot of the current sorted view. Acquires the read lock; cheap
+    /// in absolute terms but a clone of the entire vec, so do not call in
+    /// inner loops.
+    pub fn snapshot(&self) -> Vec<T> {
+        self.state
+            .read()
+            .expect("sorted state poisoned")
+            .sorted
+            .clone()
+    }
+
+    pub fn snapshot_len(&self) -> usize {
+        self.state
+            .read()
+            .expect("sorted state poisoned")
+            .sorted
+            .len()
+    }
+}
+
+impl<T, C> IncrCollection<T, C>
+where
+    T: Value + Hash + Eq,
+    C: Cells,
+{
+    /// Sort by an extracted key. Returns a [`SortedCollection`] whose
+    /// elements are kept in key order. Insertions binary-search into the
+    /// right position; deletions binary-search and remove.
+    ///
+    /// The sort is stable across re-runs: an element with the same key
+    /// as an existing one is placed after the existing one.
+    pub fn sort_by_key<K, F>(&self, rt: &Runtime<C>, key_fn: F) -> SortedCollection<T, K, C>
+    where
+        K: Ord + Clone + Send + Sync + 'static,
+        F: Fn(&T) -> K + Send + Sync + 'static,
+    {
+        use std::sync::atomic::{AtomicUsize, Ordering as MemOrdering};
+
+        let upstream_log = Arc::clone(&self.log);
+        let upstream_version = self.version_node;
+        let last_idx = Arc::new(AtomicUsize::new(0));
+
+        let state: Arc<RwLock<SortedState<T, K>>> = Arc::new(RwLock::new(SortedState::new()));
+        let state_for_query = Arc::clone(&state);
+
+        let version_node = rt.create_query(move |rt| -> u64 {
+            let _uv = rt.get(upstream_version);
+
+            let upstream = upstream_log.read().expect("collection log poisoned");
+            let start = last_idx.load(MemOrdering::Relaxed);
+            if start >= upstream.deltas.len() {
+                return state_for_query
+                    .read()
+                    .expect("sorted state poisoned")
+                    .version;
+            }
+
+            let mut st = state_for_query.write().expect("sorted state poisoned");
+            for delta in &upstream.deltas[start..] {
+                match delta {
+                    Delta::Insert(v) => {
+                        let key = key_fn(v);
+                        // Find insertion point: after the last existing element
+                        // with key <= our key (stable order).
+                        let pos = st.sorted.partition_point(|other| key_fn(other) <= key);
+                        st.sorted.insert(pos, v.clone());
+                        st.deltas.push(SortDelta::Insert {
+                            pos,
+                            value: v.clone(),
+                        });
+                        st.version = st
+                            .version
+                            .checked_add(1)
+                            .expect("SortedState version overflow");
+                    }
+                    Delta::Delete(v) => {
+                        let key = key_fn(v);
+                        // Find a matching element by key, then equality.
+                        // Linear scan within the key's range; stable order
+                        // means we remove the first match.
+                        let range_start = st.sorted.partition_point(|other| key_fn(other) < key);
+                        let range_end = st.sorted.partition_point(|other| key_fn(other) <= key);
+                        let mut found = None;
+                        for i in range_start..range_end {
+                            if &st.sorted[i] == v {
+                                found = Some(i);
+                                break;
+                            }
+                        }
+                        if let Some(pos) = found {
+                            let removed = st.sorted.remove(pos);
+                            st.deltas.push(SortDelta::Remove {
+                                pos,
+                                value: removed,
+                            });
+                            st.version = st
+                                .version
+                                .checked_add(1)
+                                .expect("SortedState version overflow");
+                        }
+                    }
+                }
+            }
+            last_idx.store(upstream.deltas.len(), MemOrdering::Relaxed);
+            st.version
+        });
+
+        SortedCollection {
+            state,
+            version_node,
+            _phantom: std::marker::PhantomData,
+        }
+    }
+}
+
+impl<T, K, C> SortedCollection<T, K, C>
+where
+    T: Value + Hash + Eq,
+    K: Ord + Clone + Send + Sync + 'static,
+    C: Cells,
+{
+    /// Pairwise: emit `(prev, next)` for every consecutive pair in the
+    /// sorted view. The output is a regular [`IncrCollection`] of pairs.
+    ///
+    /// First-cut implementation: re-derive all pairs from the snapshot on
+    /// every change. Truly incremental positional propagation (only the
+    /// affected neighbors change) lands when the `SortDelta` channel is
+    /// wired in the next slice. Tests confirm correctness; the perf gap
+    /// vs production is bounded and we close it before 0.2 ships.
+    pub fn pairwise(&self, rt: &Runtime<C>) -> IncrCollection<(T, T), C> {
+        let state = Arc::clone(&self.state);
+        let upstream_version = self.version_node;
+
+        let output_log: Arc<RwLock<CollectionLog<(T, T)>>> =
+            Arc::new(RwLock::new(CollectionLog::new()));
+        let output_log_for_query = Arc::clone(&output_log);
+
+        let version_node = rt.create_query(move |rt| -> u64 {
+            let _uv = rt.get(upstream_version);
+
+            // Re-derive pairs from the current snapshot.
+            let snapshot = state.read().expect("sorted state poisoned").sorted.clone();
+            let new_pairs: Vec<(T, T)> = if snapshot.len() < 2 {
+                Vec::new()
+            } else {
+                snapshot
+                    .windows(2)
+                    .map(|w| (w[0].clone(), w[1].clone()))
+                    .collect()
+            };
+
+            // Rebuild the output log to match. This is the snapshot
+            // semantics; the next slice replaces this with positional
+            // updates driven by SortDelta.
+            let mut out = output_log_for_query
+                .write()
+                .expect("collection log poisoned");
+            // Drop all old elements; rebuild from new_pairs.
+            let to_remove: Vec<(T, T)> = out
+                .elements
+                .iter()
+                .flat_map(|(p, &n)| std::iter::repeat_n(p.clone(), n))
+                .collect();
+            for p in to_remove {
+                out.delete(&p);
+            }
+            for p in new_pairs {
+                out.insert(p);
+            }
+            out.version
+        });
+
+        IncrCollection {
+            log: output_log,
+            version_node,
+            _phantom: std::marker::PhantomData,
+        }
+    }
+
+    /// Window: emit sliding windows of `size` from the sorted view.
+    /// Output is a collection of `Vec<T>` snapshots, one per window
+    /// position. Like pairwise, first-cut re-derives from the snapshot.
+    pub fn window(&self, rt: &Runtime<C>, size: usize) -> IncrCollection<Vec<T>, C> {
+        assert!(size > 0, "window size must be positive");
+        let state = Arc::clone(&self.state);
+        let upstream_version = self.version_node;
+
+        let output_log: Arc<RwLock<CollectionLog<Vec<T>>>> =
+            Arc::new(RwLock::new(CollectionLog::new()));
+        let output_log_for_query = Arc::clone(&output_log);
+
+        let version_node = rt.create_query(move |rt| -> u64 {
+            let _uv = rt.get(upstream_version);
+
+            let snapshot = state.read().expect("sorted state poisoned").sorted.clone();
+            let new_windows: Vec<Vec<T>> = if snapshot.len() < size {
+                Vec::new()
+            } else {
+                snapshot.windows(size).map(|w| w.to_vec()).collect()
+            };
+
+            let mut out = output_log_for_query
+                .write()
+                .expect("collection log poisoned");
+            let to_remove: Vec<Vec<T>> = out
+                .elements
+                .iter()
+                .flat_map(|(p, &n)| std::iter::repeat_n(p.clone(), n))
+                .collect();
+            for w in to_remove {
+                out.delete(&w);
+            }
+            for w in new_windows {
+                out.insert(w);
+            }
+            out.version
+        });
+
+        IncrCollection {
+            log: output_log,
+            version_node,
+            _phantom: std::marker::PhantomData,
+        }
+    }
+}
+
+// Suppress unused warning until SortDelta consumers ship.
+#[allow(dead_code)]
+fn _sort_delta_keep_used() -> Ordering {
+    Ordering::Equal
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::cells::{Local, Shared};
+
+    #[test]
+    fn local_sort_by_key_basic() {
+        let rt: Runtime<Local> = Runtime::new();
+        let c = rt.create_collection::<i64>();
+        let sorted = c.sort_by_key(&rt, |x| *x);
+        c.insert(&rt, 3);
+        c.insert(&rt, 1);
+        c.insert(&rt, 4);
+        c.insert(&rt, 1);
+        c.insert(&rt, 5);
+        // Force the sort query to run by reading version_node.
+        let _ = rt.get(sorted.version_node);
+        assert_eq!(sorted.snapshot(), vec![1, 1, 3, 4, 5]);
+    }
+
+    #[test]
+    fn shared_sort_by_key_basic() {
+        let rt: Runtime<Shared> = Runtime::new();
+        let c = rt.create_collection::<i64>();
+        let sorted = c.sort_by_key(&rt, |x| *x);
+        c.insert(&rt, 3);
+        c.insert(&rt, 1);
+        c.insert(&rt, 4);
+        let _ = rt.get(sorted.version_node);
+        assert_eq!(sorted.snapshot(), vec![1, 3, 4]);
+    }
+
+    #[test]
+    fn local_sort_delete_removes_correct_element() {
+        let rt: Runtime<Local> = Runtime::new();
+        let c = rt.create_collection::<i64>();
+        let sorted = c.sort_by_key(&rt, |x| *x);
+        c.insert(&rt, 3);
+        c.insert(&rt, 1);
+        c.insert(&rt, 5);
+        c.delete(&rt, &3);
+        let _ = rt.get(sorted.version_node);
+        assert_eq!(sorted.snapshot(), vec![1, 5]);
+    }
+
+    #[test]
+    fn shared_pairwise_consecutive() {
+        let rt: Runtime<Shared> = Runtime::new();
+        let c = rt.create_collection::<i64>();
+        let sorted = c.sort_by_key(&rt, |x| *x);
+        let pairs = sorted.pairwise(&rt);
+        c.insert(&rt, 10);
+        c.insert(&rt, 20);
+        c.insert(&rt, 30);
+        let n = pairs.count(&rt);
+        // (10,20) and (20,30) → 2 pairs
+        assert_eq!(rt.get(n), 2);
+    }
+
+    #[test]
+    fn local_window_size_3() {
+        let rt: Runtime<Local> = Runtime::new();
+        let c = rt.create_collection::<i64>();
+        let sorted = c.sort_by_key(&rt, |x| *x);
+        let windows = sorted.window(&rt, 3);
+        for i in 1..=5 {
+            c.insert(&rt, i);
+        }
+        let n = windows.count(&rt);
+        // Snapshot [1,2,3,4,5] → windows [1,2,3], [2,3,4], [3,4,5] = 3
+        assert_eq!(rt.get(n), 3);
+    }
+}
diff --git a/crates/incr-core/src/state.rs b/crates/incr-core/src/state.rs
new file mode 100644
index 0000000..bc71a4f
--- /dev/null
+++ b/crates/incr-core/src/state.rs
@@ -0,0 +1,180 @@
+//! Node state encoding and lifecycle.
+//!
+//! The state cell itself is provided by the active [`Cells`] strategy
+//! (`Cells::State`); this module only fixes the encoding and provides the
+//! transition helpers that operate on the cell through the strategy.
+//!
+//! States:
+//! - [`NodeState::New`]: created but never computed. First reader CASes to
+//!   `Computing`.
+//! - [`NodeState::Dirty`]: a dependency changed; the value is stale.
+//! - [`NodeState::Computing`]: a thread is currently running compute. Only
+//!   one thread holds this state at a time (enforced by CAS on `Shared`,
+//!   by the single-threaded execution model on `Local`).
+//! - [`NodeState::Clean`]: value is current and readable.
+//! - [`NodeState::Failed`]: last compute panicked. Transitions to `Dirty`
+//!   when a dependency changes.
+//!
+//! Transitions into `Computing` happen via [`Cells::state_try_transition`]
+//! (CAS on `Shared`, conditional check on `Local`). Transitions out of
+//! `Computing` (to `Clean` or `Failed`) use Release ordering on `Shared`
+//! to publish the writes to value / deps / timestamps that happened
+//! during compute.
+//!
+//! The transition helpers below take `&<C as Cells>::State` and the
+//! associated `Cells` impl as a generic parameter, so all calls inline
+//! through the strategy's `#[inline(always)]` methods.
+
+use crate::cells::Cells;
+
+#[derive(Copy, Clone, Debug, PartialEq, Eq)]
+#[repr(u8)]
+pub enum NodeState {
+    New = 0,
+    Dirty = 1,
+    Computing = 2,
+    Clean = 3,
+    Failed = 4,
+}
+
+impl NodeState {
+    #[inline]
+    pub fn from_u8(v: u8) -> Self {
+        match v {
+            0 => Self::New,
+            1 => Self::Dirty,
+            2 => Self::Computing,
+            3 => Self::Clean,
+            4 => Self::Failed,
+            other => panic!("invalid NodeState value: {}", other),
+        }
+    }
+
+    #[inline]
+    pub fn as_u8(self) -> u8 {
+        self as u8
+    }
+}
+
+/// Load the current state with Acquire ordering through the strategy's
+/// state cell. Use on the reader hot path to synchronize with the
+/// Release store that transitioned the node to its current state.
+#[inline(always)]
+pub fn load<C: Cells>(cell: &C::State) -> NodeState {
+    NodeState::from_u8(C::state_load_acquire(cell))
+}
+
+/// Store a new state with Release ordering through the strategy's state
+/// cell. Use when transitioning to `Clean` or `Failed` after writing the
+/// node's value, deps, and timestamps.
+#[inline(always)]
+pub fn store<C: Cells>(cell: &C::State, new: NodeState) {
+    C::state_store_release(cell, new.as_u8());
+}
+
+/// Attempt to transition from `expected` to `new`. On `Shared` this is a
+/// CAS with AcqRel success ordering; on `Local` it is a conditional set
+/// (semantically equivalent under single-threaded execution).
+#[inline(always)]
+pub fn try_transition<C: Cells>(
+    cell: &C::State,
+    expected: NodeState,
+    new: NodeState,
+) -> Result<(), NodeState> {
+    C::state_try_transition(cell, expected.as_u8(), new.as_u8()).map_err(NodeState::from_u8)
+}
+
+/// Claim the right to compute this node by transitioning to `Computing`
+/// from one of the valid source states (`New` or `Dirty`). `Failed` is
+/// not a valid source: a `Failed` node stays `Failed` until the writer's
+/// dirty walk transitions it to `Dirty` first.
+///
+/// Returns `Ok(())` if this caller now owns compute, or `Err(observed)`
+/// with the state we actually saw. Under `Shared`, exactly one of many
+/// racing threads succeeds.
+#[inline(always)]
+pub fn try_claim_compute<C: Cells>(cell: &C::State) -> Result<(), NodeState> {
+    // Try Dirty first (more common in steady state), then New.
+    if try_transition::<C>(cell, NodeState::Dirty, NodeState::Computing).is_ok() {
+        return Ok(());
+    }
+    try_transition::<C>(cell, NodeState::New, NodeState::Computing)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::cells::{Local, Shared};
+
+    #[test]
+    fn local_claim_from_dirty() {
+        let s = Local::new_state(NodeState::Dirty.as_u8());
+        assert!(try_claim_compute::<Local>(&s).is_ok());
+        assert_eq!(load::<Local>(&s), NodeState::Computing);
+    }
+
+    #[test]
+    fn local_claim_from_new() {
+        let s = Local::new_state(NodeState::New.as_u8());
+        assert!(try_claim_compute::<Local>(&s).is_ok());
+        assert_eq!(load::<Local>(&s), NodeState::Computing);
+    }
+
+    #[test]
+    fn local_claim_from_clean_fails() {
+        let s = Local::new_state(NodeState::Clean.as_u8());
+        assert!(try_claim_compute::<Local>(&s).is_err());
+        assert_eq!(load::<Local>(&s), NodeState::Clean);
+    }
+
+    #[test]
+    fn shared_claim_from_dirty() {
+        let s = Shared::new_state(NodeState::Dirty.as_u8());
+        assert!(try_claim_compute::<Shared>(&s).is_ok());
+        assert_eq!(load::<Shared>(&s), NodeState::Computing);
+    }
+
+    #[test]
+    fn shared_claim_from_clean_fails() {
+        let s = Shared::new_state(NodeState::Clean.as_u8());
+        assert!(try_claim_compute::<Shared>(&s).is_err());
+        assert_eq!(load::<Shared>(&s), NodeState::Clean);
+    }
+
+    #[test]
+    fn shared_concurrent_claim_one_winner() {
+        use std::sync::atomic::{AtomicUsize, Ordering as O};
+        use std::sync::Arc;
+        use std::thread;
+
+        const THREADS: usize = 16;
+        const ROUNDS: usize = 200;
+
+        for _ in 0..ROUNDS {
+            let s = Arc::new(Shared::new_state(NodeState::Dirty.as_u8()));
+            let winners = Arc::new(AtomicUsize::new(0));
+
+            let handles: Vec<_> = (0..THREADS)
+                .map(|_| {
+                    let s = Arc::clone(&s);
+                    let w = Arc::clone(&winners);
+                    thread::spawn(move || {
+                        if try_claim_compute::<Shared>(&s).is_ok() {
+                            w.fetch_add(1, O::Relaxed);
+                        }
+                    })
+                })
+                .collect();
+
+            for h in handles {
+                h.join().unwrap();
+            }
+
+            assert_eq!(
+                winners.load(O::Relaxed),
+                1,
+                "expected exactly one thread to claim compute"
+            );
+        }
+    }
+}
diff --git a/crates/incr-core/src/trace.rs b/crates/incr-core/src/trace.rs
new file mode 100644
index 0000000..9d3e009
--- /dev/null
+++ b/crates/incr-core/src/trace.rs
@@ -0,0 +1,53 @@
+//! Tracing types: structural snapshots and per-node propagation traces.
+//!
+//! The wrappers re-export these types under the same names the original
+//! `incr-compute` and `incr-concurrent` crates use, so user code that
+//! constructed `NodeInfo`/`PropagationTrace` continues to compile.
+//!
+//! Full implementation status:
+//! - `graph_snapshot()` on `Runtime<C>` returns real per-node `NodeInfo`
+//!   data with dependencies and dependents.
+//! - `get_traced()` populates `PropagationTrace` with totals and the
+//!   per-node trace log when tracing is enabled. (Stub in this slice;
+//!   real implementation lands alongside the dashboard demo work.)
+
+use crate::node::NodeId;
+
+#[derive(Clone, Debug, PartialEq, Eq)]
+pub enum NodeKindInfo {
+    Input,
+    Compute,
+}
+
+#[derive(Clone, Debug)]
+pub struct NodeInfo {
+    pub id: NodeId,
+    pub kind: NodeKindInfo,
+    pub label: Option<String>,
+    pub dependencies: Vec<NodeId>,
+    pub dependents: Vec<NodeId>,
+}
+
+#[derive(Clone, Debug, PartialEq, Eq)]
+pub enum TraceAction {
+    /// Node was dirty but its dependencies hadn't actually changed.
+    VerifiedClean,
+    /// Node was recomputed. `value_changed` is false when early cutoff occurred.
+    Recomputed { value_changed: bool },
+}
+
+#[derive(Clone, Debug)]
+pub struct NodeTrace {
+    pub id: NodeId,
+    pub action: TraceAction,
+}
+
+#[derive(Clone, Debug)]
+pub struct PropagationTrace {
+    pub target: NodeId,
+    pub node_traces: Vec<NodeTrace>,
+    pub total_nodes: usize,
+    pub nodes_recomputed: usize,
+    pub nodes_cutoff: usize,
+    pub elapsed_ns: u64,
+}
diff --git a/crates/incr-core/src/value.rs b/crates/incr-core/src/value.rs
new file mode 100644
index 0000000..2866ea9
--- /dev/null
+++ b/crates/incr-core/src/value.rs
@@ -0,0 +1,22 @@
+//! `Value`: the user-type bound for everything stored in a `Runtime<C>`.
+//!
+//! Single trait, single mental model: `Value` = `Clone + PartialEq + Send + Sync + 'static`.
+//! Local-strategy users pay no runtime cost for the `Send + Sync` bound
+//! (those are zero-cost markers), but they cannot store `Rc<...>` or
+//! other `!Send` types directly. This is a deliberate uniformity
+//! decision for the v0.2 API: one bound across both strategies, identical
+//! impl story, no per-strategy Value implementations.
+//!
+//! Users who genuinely need to embed a `!Send` type can wrap it in
+//! `Arc<Mutex<T>>` or move the !Send state outside the graph and pass
+//! values through. The consolidation plan's decision page covers the
+//! tradeoff.
+//!
+//! The blanket impl auto-derives `Value` for every qualifying type, so
+//! no `impl Value for MyType` boilerplate is required. This matches
+//! production incr-compute's `T: Any + Clone + PartialEq + 'static` and
+//! tightens it with `Send + Sync`.
+
+pub trait Value: Clone + PartialEq + Send + Sync + 'static {}
+
+impl<T> Value for T where T: Clone + PartialEq + Send + Sync + 'static {}
diff --git a/crates/incr-core/tests/collection_property.rs b/crates/incr-core/tests/collection_property.rs
new file mode 100644
index 0000000..5706d7b
--- /dev/null
+++ b/crates/incr-core/tests/collection_property.rs
@@ -0,0 +1,164 @@
+//! Property tests for incremental collection operators. Each test
+//! generates a random sequence of insert/delete operations on a source
+//! collection, runs the incremental pipeline, and compares against a
+//! from-scratch batch computation over the same final element set.
+//!
+//! Both `Local` and `Shared` strategies run the same generator + verifier
+//! through separate proptest! cases so failures shrink in the correct
+//! type context.
+
+use incr_core::{Cells, IncrCollection, Local, Runtime, Shared};
+use proptest::prelude::*;
+use std::collections::HashMap;
+
+#[derive(Clone, Debug)]
+enum Op {
+    Insert(i64),
+    Delete(i64),
+}
+
+fn apply_to_baseline(ops: &[Op]) -> HashMap<i64, usize> {
+    let mut bag: HashMap<i64, usize> = HashMap::new();
+    for op in ops {
+        match op {
+            Op::Insert(v) => *bag.entry(*v).or_insert(0) += 1,
+            Op::Delete(v) => {
+                if let Some(count) = bag.get_mut(v) {
+                    *count -= 1;
+                    if *count == 0 {
+                        bag.remove(v);
+                    }
+                }
+            }
+        }
+    }
+    bag
+}
+
+fn run_filter_count<C: Cells>(ops: &[Op]) -> u64
+where
+    Runtime<C>: Default,
+{
+    let rt: Runtime<C> = Runtime::new();
+    let c: IncrCollection<i64, C> = rt.create_collection();
+    let evens = c.filter(&rt, |x| x % 2 == 0);
+    let n = evens.count(&rt);
+    for op in ops {
+        match op {
+            Op::Insert(v) => c.insert(&rt, *v),
+            Op::Delete(v) => {
+                c.delete(&rt, v);
+            }
+        }
+    }
+    rt.get(n)
+}
+
+fn batch_filter_count(ops: &[Op]) -> u64 {
+    let bag = apply_to_baseline(ops);
+    bag.iter()
+        .filter(|(v, _)| *v % 2 == 0)
+        .map(|(_, n)| *n as u64)
+        .sum()
+}
+
+fn run_map_reduce_sum<C: Cells>(ops: &[Op]) -> i64
+where
+    Runtime<C>: Default,
+{
+    let rt: Runtime<C> = Runtime::new();
+    let c: IncrCollection<i64, C> = rt.create_collection();
+    let doubled = c.map(&rt, |x| x * 2);
+    let total = doubled.reduce(&rt, |xs| xs.iter().sum::<i64>());
+    for op in ops {
+        match op {
+            Op::Insert(v) => c.insert(&rt, *v),
+            Op::Delete(v) => {
+                c.delete(&rt, v);
+            }
+        }
+    }
+    rt.get(total)
+}
+
+fn batch_map_reduce_sum(ops: &[Op]) -> i64 {
+    let bag = apply_to_baseline(ops);
+    bag.iter().map(|(v, n)| (*v * 2) * (*n as i64)).sum()
+}
+
+fn run_sort_then_count<C: Cells>(ops: &[Op]) -> usize
+where
+    Runtime<C>: Default,
+{
+    let rt: Runtime<C> = Runtime::new();
+    let c: IncrCollection<i64, C> = rt.create_collection();
+    let sorted = c.sort_by_key(&rt, |x| *x);
+    for op in ops {
+        match op {
+            Op::Insert(v) => c.insert(&rt, *v),
+            Op::Delete(v) => {
+                c.delete(&rt, v);
+            }
+        }
+    }
+    let _ = rt.get(sorted.version_node());
+    sorted.snapshot_len()
+}
+
+fn batch_count(ops: &[Op]) -> usize {
+    let bag = apply_to_baseline(ops);
+    bag.values().sum()
+}
+
+fn op_strategy() -> impl Strategy<Value = Op> {
+    prop_oneof![
+        (-50_i64..50).prop_map(Op::Insert),
+        (-50_i64..50).prop_map(Op::Delete),
+    ]
+}
+
+proptest! {
+    #![proptest_config(ProptestConfig::with_cases(500))]
+
+    #[test]
+    fn local_filter_count_matches_batch(ops in prop::collection::vec(op_strategy(), 0..40)) {
+        let incremental = run_filter_count::<Local>(&ops);
+        let batch = batch_filter_count(&ops);
+        prop_assert_eq!(incremental, batch);
+    }
+
+    #[test]
+    fn shared_filter_count_matches_batch(ops in prop::collection::vec(op_strategy(), 0..40)) {
+        let incremental = run_filter_count::<Shared>(&ops);
+        let batch = batch_filter_count(&ops);
+        prop_assert_eq!(incremental, batch);
+    }
+
+    #[test]
+    fn local_map_reduce_matches_batch(ops in prop::collection::vec(op_strategy(), 0..40)) {
+        let incremental = run_map_reduce_sum::<Local>(&ops);
+        let batch = batch_map_reduce_sum(&ops);
+        prop_assert_eq!(incremental, batch);
+    }
+
+    #[test]
+    fn shared_map_reduce_matches_batch(ops in prop::collection::vec(op_strategy(), 0..40)) {
+        let incremental = run_map_reduce_sum::<Shared>(&ops);
+        let batch = batch_map_reduce_sum(&ops);
+        prop_assert_eq!(incremental, batch);
+    }
+
+    #[test]
+    fn local_sort_preserves_count(ops in prop::collection::vec(op_strategy(), 0..40)) {
+        let len = run_sort_then_count::<Local>(&ops);
+        let batch = batch_count(&ops);
+        prop_assert_eq!(len, batch);
+    }
+
+    #[test]
+    fn shared_sort_preserves_count(ops in prop::collection::vec(op_strategy(), 0..40)) {
+        let len = run_sort_then_count::<Shared>(&ops);
+        let batch = batch_count(&ops);
+        prop_assert_eq!(len, batch);
+    }
+}
diff --git a/crates/incr-core/tests/property.rs b/crates/incr-core/tests/property.rs
new file mode 100644
index 0000000..84732f4
--- /dev/null
+++ b/crates/incr-core/tests/property.rs
@@ -0,0 +1,172 @@
+//! Property-test suite for `incr-core`. Generates random function-DAG
+//! graphs, applies random mutations, and asserts that the incremental
+//! result matches the batch-recompute result.
+//!
+//! The same generators and verifier run under both `Local` and `Shared`
+//! strategies. Each strategy gets its own proptest! block so failures
+//! shrink in the right type context.
+
+use incr_core::{Incr, Local, Runtime, Shared};
+use proptest::prelude::*;
+
+/// Verify that an incremental run on a randomly-shaped graph produces
+/// the same final values as a from-scratch rebuild with the mutated
+/// inputs in place.
+fn verify_incremental_matches_batch<C: incr_core::Cells>(
+    num_inputs: usize,
+    input_values: Vec<i64>,
+    layers: Vec<Vec<(usize, usize)>>,
+    mutations: Vec<(usize, i64)>,
+) where
+    Runtime<C>: Default,
+{
+    assert!(num_inputs >= 2);
+    assert_eq!(input_values.len(), num_inputs);
+
+    // Pass 1: incremental.
+    let rt: Runtime<C> = Runtime::new();
+    let mut all_nodes: Vec<Incr<i64>> = Vec::new();
+    for &v in &input_values {
+        all_nodes.push(rt.create_input(v));
+    }
+    for layer in &layers {
+        let mut layer_nodes = Vec::new();
+        for &(a_rel, b_rel) in layer {
+            let avail = all_nodes.len();
+            if avail < 2 {
+                continue;
+            }
+            let a = all_nodes[a_rel % avail];
+            let b = all_nodes[b_rel % avail];
+            layer_nodes.push(rt.create_query(move |rt| rt.get(a).wrapping_add(rt.get(b))));
+        }
+        all_nodes.extend(layer_nodes);
+    }
+    if all_nodes.len() <= num_inputs {
+        return;
+    }
+    let last = *all_nodes.last().unwrap();
+    let _ = rt.get(last);
+
+    for &(input_rel, new_val) in &mutations {
+        let idx = input_rel % num_inputs;
+        rt.set(all_nodes[idx], new_val);
+    }
+    let incremental_result = rt.get(last);
+
+    // Pass 2: batch rebuild with the mutated input values baked in.
+    let mut final_values = input_values.clone();
+    for &(input_rel, new_val) in &mutations {
+        let idx = input_rel % num_inputs;
+        final_values[idx] = new_val;
+    }
+
+    let rt2: Runtime<C> = Runtime::new();
+    let mut all_nodes2: Vec<Incr<i64>> = Vec::new();
+    for &v in &final_values {
+        all_nodes2.push(rt2.create_input(v));
+    }
+    for layer in &layers {
+        let mut layer_nodes = Vec::new();
+        for &(a_rel, b_rel) in layer {
+            let avail = all_nodes2.len();
+            if avail < 2 {
+                continue;
+            }
+            let a = all_nodes2[a_rel % avail];
+            let b = all_nodes2[b_rel % avail];
+            layer_nodes.push(rt2.create_query(move |rt| rt.get(a).wrapping_add(rt.get(b))));
+        }
+        all_nodes2.extend(layer_nodes);
+    }
+    let last2 = *all_nodes2.last().unwrap();
+    let batch_result = rt2.get(last2);
+
+    assert_eq!(
+        incremental_result,
+        batch_result,
+        "Incremental {} != batch {} with {} inputs, {} layers, {} mutations (strategy = {})",
+        incremental_result,
+        batch_result,
+        num_inputs,
+        layers.len(),
+        mutations.len(),
+        std::any::type_name::<C>(),
+    );
+}
+
+proptest! {
+    #![proptest_config(ProptestConfig::with_cases(1000))]
+
+    #[test]
+    fn local_incremental_matches_batch(
+        num_inputs in 2_usize..16,
+        input_values in prop::collection::vec(-1000_i64..1000, 2..16),
+        layers in prop::collection::vec(
+            prop::collection::vec((0_usize..100, 0_usize..100), 1..5),
+            1..6,
+        ),
+        mutations in prop::collection::vec((0_usize..100, -1000_i64..1000), 1..15),
+    ) {
+        let num_inputs = num_inputs.min(input_values.len()).max(2);
+        let input_values = input_values[..num_inputs].to_vec();
+        verify_incremental_matches_batch::<Local>(
+            num_inputs,
+            input_values,
+            layers,
+            mutations,
+        );
+    }
+
+    #[test]
+    fn shared_incremental_matches_batch(
+        num_inputs in 2_usize..16,
+        input_values in prop::collection::vec(-1000_i64..1000, 2..16),
+        layers in prop::collection::vec(
+            prop::collection::vec((0_usize..100, 0_usize..100), 1..5),
+            1..6,
+        ),
+        mutations in prop::collection::vec((0_usize..100, -1000_i64..1000), 1..15),
+    ) {
+        let num_inputs = num_inputs.min(input_values.len()).max(2);
+        let input_values = input_values[..num_inputs].to_vec();
+        verify_incremental_matches_batch::<Shared>(
+            num_inputs,
+            input_values,
+            layers,
+            mutations,
+        );
+    }
+}
+
+#[test]
+fn regression_diamond_with_cutoff() {
+    verify_incremental_matches_batch::<Local>(
+        3,
+        vec![10, 20, 30],
+        vec![vec![(0, 1), (1, 2)], vec![(0, 1)]],
+        vec![(0, 10), (1, 25)],
+    );
+    verify_incremental_matches_batch::<Shared>(
+        3,
+        vec![10, 20, 30],
+        vec![vec![(0, 1), (1, 2)], vec![(0, 1)]],
+        vec![(0, 10), (1, 25)],
+    );
+}
+
+#[test]
+fn regression_deep_chain() {
+    verify_incremental_matches_batch::<Local>(
+        5,
+        vec![1, 2, 3, 4, 5],
+        vec![vec![(0, 1)], vec![(2, 0)], vec![(0, 1)], vec![(1, 0)]],
+        vec![(0, 100), (2, 50), (4, 75)],
+    );
+    verify_incremental_matches_batch::<Shared>(
+        5,
+        vec![1, 2, 3, 4, 5],
+        vec![vec![(0, 1)], vec![(2, 0)], vec![(0, 1)], vec![(1, 0)]],
+        vec![(0, 100), (2, 50), (4, 75)],
+    );
+}
diff --git a/crates/incr-python/Cargo.toml b/crates/incr-python/Cargo.toml
index def9c70..c195966 100644
--- a/crates/incr-python/Cargo.toml
+++ b/crates/incr-python/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "incr-python"
-version = "0.1.0"
+version = "0.2.0-beta.1"
 edition = "2021"
 description = "Python bindings for the incr single-threaded incremental computation engine"
 license = "Apache-2.0"
@@ -9,7 +9,8 @@ publish = false
 [lib]
 name = "incr"
 crate-type = ["cdylib"]
+doc = false
 
 [dependencies]
-incr_st = { package = "incr-compute", path = "../incr-compute" }
+incr_compute = { package = "incr-compute", version = "0.2.0-beta.1", path = "../incr-compute" }
 pyo3 = { version = "0.23", features = ["extension-module"] }
diff --git a/crates/incr-python/pyproject.toml b/crates/incr-python/pyproject.toml
index a8d5623..359cd7b 100644
--- a/crates/incr-python/pyproject.toml
+++ b/crates/incr-python/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "maturin"
 
 [project]
 name = "incr-compute"
-version = "0.1.0"
+version = "0.2.0b1"
 description = "The fastest incremental computation engine for Python"
 requires-python = ">=3.8"
 license = "Apache-2.0"
diff --git a/crates/incr-python/src/lib.rs b/crates/incr-python/src/lib.rs
index 5c6ad6a..ea25ff7 100644
--- a/crates/incr-python/src/lib.rs
+++ b/crates/incr-python/src/lib.rs
@@ -1,6 +1,29 @@
+//! Python bindings for `incr-compute` (single-threaded).
+//!
+//! The Python module is named `incr`; `from incr import Runtime` opens
+//! the door to creating inputs and queries against the v0.2 engine.
+//! User values are wrapped in [`PyValue`] which provides `Clone`,
+//! `PartialEq`, `Eq`, `Hash`, and `Ord` over arbitrary `PyObject`s via
+//! the Python GIL. The runtime's `Value` bound (`Clone + PartialEq +
+//! Send + Sync + 'static`) is satisfied because `Py<PyAny>` is `Send`
+//! and `Sync` in PyO3 (you need the GIL to actually dereference).
+//!
+//! Runtime is `!Send + !Sync` under the Local strategy (the runtime's
+//! dep_stack uses a `RefCell`), so `PyRuntime` is `unsendable` and the
+//! GIL-bound nature of Python callbacks aligns nicely with that.
+
 use pyo3::prelude::*;
 use std::hash::{Hash, Hasher};
 
+use incr_compute::{
+    Incr, IncrCollection, NodeId, NodeKindInfo, PropagationTrace, Runtime, SortedCollection,
+    TraceAction,
+};
+
+/// Newtype around `PyObject` that satisfies the `Value` bound. All
+/// trait methods reacquire the GIL because Python objects are only
+/// usable while holding it; this is the conventional PyO3 pattern for
+/// embedding `PyObject` in trait-bounded Rust code.
 struct PyValue(PyObject);
 
 impl Clone for PyValue {
@@ -48,23 +71,31 @@ impl Ord for PyValue {
     }
 }
 
+/// Typed node handle exposed to Python. Wraps `Incr<PyValue>`.
 #[pyclass(name = "NodeId")]
 #[derive(Clone)]
 struct PyNodeId {
-    inner: incr_st::Incr<PyValue>,
+    inner: Incr<PyValue>,
 }
 
 #[pymethods]
 impl PyNodeId {
     #[getter]
     fn id(&self) -> u32 {
-        self.inner.node_id().raw()
+        self.inner.slot()
+    }
+
+    fn __repr__(&self) -> String {
+        format!("NodeId(slot={})", self.inner.slot())
     }
 }
 
+/// Read-only runtime handle passed to query closures. The pointer is
+/// nulled out after the callback returns to make stale captures fail
+/// loudly rather than silently corrupt memory.
 #[pyclass(name = "RuntimeRef", unsendable)]
 struct PyRuntimeRef {
-    ptr: *const incr_st::Runtime,
+    ptr: *const Runtime,
 }
 
 #[pymethods]
@@ -75,6 +106,9 @@ impl PyRuntimeRef {
                 "RuntimeRef is no longer valid (used outside query callback)",
             ));
         }
+        // SAFETY: ptr is non-null only inside an active query callback;
+        // the Runtime is borrowed by the runtime's own closure dispatch,
+        // so the lifetime is guaranteed to outlive the callback.
         let rt = unsafe { &*self.ptr };
         let val: PyValue = rt.get(node.inner);
         Ok(val.0)
@@ -83,8 +117,8 @@ impl PyRuntimeRef {
 
 #[pyclass(name = "Collection", unsendable)]
 struct PyCollection {
-    inner: incr_st::IncrCollection<PyValue>,
-    rt_ptr: *const incr_st::Runtime,
+    inner: IncrCollection<PyValue>,
+    rt_ptr: *const Runtime,
 }
 
 #[pymethods]
@@ -94,9 +128,13 @@ impl PyCollection {
         self.inner.insert(rt, PyValue(value));
     }
 
-    fn delete(&self, value: PyObject) {
+    fn delete(&self, value: PyObject) -> bool {
         let rt = unsafe { &*self.rt_ptr };
-        self.inner.delete(rt, &PyValue(value));
+        self.inner.delete(rt, &PyValue(value))
+    }
+
+    fn snapshot_len(&self) -> usize {
+        self.inner.snapshot_len()
     }
 
     fn filter(&self, predicate: PyObject) -> PyResult<PyCollection> {
@@ -133,10 +171,12 @@ impl PyCollection {
 
     fn count(&self) -> PyResult<PyNodeId> {
         let rt = unsafe { &*self.rt_ptr };
-        let count_node: incr_st::Incr<usize> = self.inner.count(rt);
-        // Bridge usize -> PyValue via a query
+        let count_node: Incr<u64> = self.inner.count(rt);
+        // Bridge u64 -> PyValue via a wrapper query so the Python side
+        // receives a node returning an int (PyValue), matching the
+        // single PyNodeId type the binding exposes.
         let node = rt.create_query(move |rt| -> PyValue {
-            let c: usize = rt.get(count_node);
+            let c: u64 = rt.get(count_node);
             Python::with_gil(|py| PyValue(c.into_pyobject(py).unwrap().into_any().unbind()))
         });
         Ok(PyNodeId { inner: node })
@@ -144,19 +184,18 @@ impl PyCollection {
 
     fn reduce(&self, fold_fn: PyObject) -> PyResult<PyNodeId> {
         let rt = unsafe { &*self.rt_ptr };
-        let reduce_node: incr_st::Incr<PyValue> =
-            self.inner.reduce(rt, move |elements| -> PyValue {
-                Python::with_gil(|py| {
-                    let py_list = pyo3::types::PyList::empty(py);
-                    for elem in elements.iter() {
-                        py_list.append(elem.0.clone_ref(py)).unwrap();
-                    }
-                    let result = fold_fn
-                        .call1(py, (py_list,))
-                        .expect("reduce function raised an exception");
-                    PyValue(result)
-                })
-            });
+        let reduce_node: Incr<PyValue> = self.inner.reduce(rt, move |elements| -> PyValue {
+            Python::with_gil(|py| {
+                let py_list = pyo3::types::PyList::empty(py);
+                for elem in elements.iter() {
+                    py_list.append(elem.0.clone_ref(py)).unwrap();
+                }
+                let result = fold_fn
+                    .call1(py, (py_list,))
+                    .expect("reduce function raised an exception");
+                PyValue(result)
+            })
+        });
         Ok(PyNodeId { inner: reduce_node })
     }
 
@@ -219,8 +258,8 @@ impl PyCollection {
                 })
             },
         );
-        // join returns IncrCollection<(PyValue, PyValue)>; map the pairs
-        // into PyValue-wrapped Python tuples for the Python side.
+        // join returns IncrCollection<(PyValue, PyValue)>; map pairs to
+        // Python tuples wrapped in PyValue for the unified element type.
         let mapped = joined.map(rt, |pair: &(PyValue, PyValue)| -> PyValue {
             Python::with_gil(|py| {
                 let tuple = pyo3::types::PyTuple::new(
@@ -238,15 +277,23 @@ impl PyCollection {
     }
 
     #[getter]
-    fn version_node_id(&self) -> u32 {
-        self.inner.version_node_id().raw()
+    fn version_node(&self) -> PyResult<PyNodeId> {
+        let rt = unsafe { &*self.rt_ptr };
+        let v: Incr<u64> = self.inner.version_node();
+        // Wrap the u64 version node in a PyValue-returning bridge so
+        // it can be passed to rt.get / set_label uniformly.
+        let bridge = rt.create_query(move |rt| -> PyValue {
+            let n: u64 = rt.get(v);
+            Python::with_gil(|py| PyValue(n.into_pyobject(py).unwrap().into_any().unbind()))
+        });
+        Ok(PyNodeId { inner: bridge })
     }
 }
 
 #[pyclass(name = "SortedCollection", unsendable)]
 struct PySortedCollection {
-    inner: incr_st::SortedCollection<PyValue>,
-    rt_ptr: *const incr_st::Runtime,
+    inner: SortedCollection<PyValue, PyValue>,
+    rt_ptr: *const Runtime,
 }
 
 #[pymethods]
@@ -273,8 +320,6 @@ impl PySortedCollection {
     fn window(&self, size: usize) -> PyResult<PyCollection> {
         let rt = unsafe { &*self.rt_ptr };
         let win_collection = self.inner.window(rt, size);
-        // window returns IncrCollection<Vec<PyValue>>; map each window
-        // into a PyValue wrapping a Python list.
         let mapped = win_collection.map(rt, |window: &Vec<PyValue>| -> PyValue {
             Python::with_gil(|py| {
                 let py_list = pyo3::types::PyList::empty(py);
@@ -290,8 +335,8 @@ impl PySortedCollection {
         })
     }
 
-    fn entries(&self) -> PyResult<PyObject> {
-        let entries = self.inner.entries();
+    fn snapshot(&self) -> PyResult<PyObject> {
+        let entries = self.inner.snapshot();
         Python::with_gil(|py| {
             let list = pyo3::types::PyList::empty(py);
             for entry in entries {
@@ -301,22 +346,26 @@ impl PySortedCollection {
         })
     }
 
+    fn snapshot_len(&self) -> usize {
+        self.inner.snapshot_len()
+    }
+
     #[getter]
     fn version_node(&self) -> PyResult<PyNodeId> {
         let rt = unsafe { &*self.rt_ptr };
-        let ver_node = self.inner.version_node();
-        let node = rt.create_query(move |rt| -> PyValue {
+        let ver_node: Incr<u64> = self.inner.version_node();
+        let bridge = rt.create_query(move |rt| -> PyValue {
             let v: u64 = rt.get(ver_node);
             Python::with_gil(|py| PyValue(v.into_pyobject(py).unwrap().into_any().unbind()))
         });
-        Ok(PyNodeId { inner: node })
+        Ok(PyNodeId { inner: bridge })
     }
 }
 
 #[pyclass(name = "GroupedCollection", unsendable)]
 struct PyGroupedCollection {
-    inner: incr_st::GroupedCollection<PyValue, PyValue>,
-    rt_ptr: *const incr_st::Runtime,
+    inner: incr_compute::GroupedCollection<PyValue, PyValue>,
+    rt_ptr: *const Runtime,
 }
 
 #[pymethods]
@@ -343,15 +392,25 @@ impl PyGroupedCollection {
         }
     }
 
+    fn group_count(&self) -> usize {
+        self.inner.group_count()
+    }
+
     #[getter]
-    fn version_node_id(&self) -> u32 {
-        self.inner.version_node().node_id().raw()
+    fn version_node(&self) -> PyResult<PyNodeId> {
+        let rt = unsafe { &*self.rt_ptr };
+        let ver_node: Incr<u64> = self.inner.version_node();
+        let bridge = rt.create_query(move |rt| -> PyValue {
+            let v: u64 = rt.get(ver_node);
+            Python::with_gil(|py| PyValue(v.into_pyobject(py).unwrap().into_any().unbind()))
+        });
+        Ok(PyNodeId { inner: bridge })
     }
 }
 
 #[pyclass(name = "Runtime", unsendable)]
 struct PyRuntime {
-    inner: incr_st::Runtime,
+    inner: Runtime,
 }
 
 #[pymethods]
@@ -359,7 +418,7 @@ impl PyRuntime {
     #[new]
     fn new() -> Self {
         PyRuntime {
-            inner: incr_st::Runtime::new(),
+            inner: Runtime::new(),
         }
     }
 
@@ -378,51 +437,45 @@ impl PyRuntime {
     }
 
     fn create_query(&self, py_func: PyObject) -> PyNodeId {
-        let node = self
-            .inner
-            .create_query(move |rt: &incr_st::Runtime| -> PyValue {
-                Python::with_gil(|py| {
-                    let rt_ref = Py::new(
-                        py,
-                        PyRuntimeRef {
-                            ptr: rt as *const _,
-                        },
-                    )
-                    .unwrap();
-                    let result = py_func
-                        .call1(py, (rt_ref.clone_ref(py),))
-                        .expect("query function raised an exception");
-                    // Invalidate the ref so it can't be used after callback returns
-                    rt_ref.bind(py).borrow_mut().ptr = std::ptr::null();
-                    PyValue(result)
-                })
-            });
+        let node = self.inner.create_query(move |rt: &Runtime| -> PyValue {
+            Python::with_gil(|py| {
+                let rt_ref = Py::new(
+                    py,
+                    PyRuntimeRef {
+                        ptr: rt as *const _,
+                    },
+                )
+                .unwrap();
+                let result = py_func
+                    .call1(py, (rt_ref.clone_ref(py),))
+                    .expect("query function raised an exception");
+                // Invalidate the ref so it can't be used after callback returns.
+                rt_ref.bind(py).borrow_mut().ptr = std::ptr::null();
+                PyValue(result)
+            })
+        });
         PyNodeId { inner: node }
     }
 
     fn create_collection(&self) -> PyCollection {
         let col = self.inner.create_collection::<PyValue>();
-        let rt_ptr: *const incr_st::Runtime = &self.inner;
+        let rt_ptr: *const Runtime = &self.inner;
         PyCollection { inner: col, rt_ptr }
     }
 
     fn set_label(&self, node: PyNodeId, label: String) {
-        self.inner.set_label(node.inner.node_id(), label);
+        self.inner.set_label(node.inner.slot(), label);
     }
 
     fn set_label_by_id(&self, id: u32, label: String) {
-        self.inner.set_label(incr_st::NodeId::from_raw(id), label);
-    }
-
-    fn set_tracing(&self, enabled: bool) {
-        self.inner.set_tracing(enabled);
+        self.inner.set_label(id, label);
     }
 
     fn get_traced(&self, node: PyNodeId) -> PyResult<(PyObject, PyObject)> {
-        let (val, trace): (PyValue, incr_st::PropagationTrace) = self.inner.get_traced(node.inner);
+        let (val, trace): (PyValue, PropagationTrace) = self.inner.get_traced(node.inner);
         Python::with_gil(|py| {
             let trace_dict = pyo3::types::PyDict::new(py);
-            trace_dict.set_item("target", trace.target.raw())?;
+            trace_dict.set_item("target", trace.target.0)?;
             trace_dict.set_item("total_nodes", trace.total_nodes)?;
             trace_dict.set_item("nodes_recomputed", trace.nodes_recomputed)?;
             trace_dict.set_item("nodes_cutoff", trace.nodes_cutoff)?;
@@ -431,15 +484,15 @@ impl PyRuntime {
             let node_traces = pyo3::types::PyList::empty(py);
             for nt in &trace.node_traces {
                 let d = pyo3::types::PyDict::new(py);
-                d.set_item("id", nt.id.raw())?;
+                d.set_item("id", nt.id.0)?;
                 d.set_item(
                     "action",
                     match &nt.action {
-                        incr_st::TraceAction::VerifiedClean => "verified_clean",
-                        incr_st::TraceAction::Recomputed {
+                        TraceAction::VerifiedClean => "verified_clean",
+                        TraceAction::Recomputed {
                             value_changed: true,
                         } => "recomputed_changed",
-                        incr_st::TraceAction::Recomputed {
+                        TraceAction::Recomputed {
                             value_changed: false,
                         } => "recomputed_cutoff",
                     },
@@ -458,17 +511,17 @@ impl PyRuntime {
             let result = pyo3::types::PyList::empty(py);
             for info in &infos {
                 let d = pyo3::types::PyDict::new(py);
-                d.set_item("id", info.id.raw())?;
+                d.set_item("id", info.id.0)?;
                 d.set_item(
                     "kind",
                     match info.kind {
-                        incr_st::NodeKindInfo::Input => "input",
-                        incr_st::NodeKindInfo::Compute => "compute",
+                        NodeKindInfo::Input => "input",
+                        NodeKindInfo::Compute => "compute",
                     },
                 )?;
                 d.set_item("label", &info.label)?;
-                let deps: Vec<u32> = info.dependencies.iter().map(|n| n.raw()).collect();
-                let depts: Vec<u32> = info.dependents.iter().map(|n| n.raw()).collect();
+                let deps: Vec<u32> = info.dependencies.iter().map(|n: &NodeId| n.0).collect();
+                let depts: Vec<u32> = info.dependents.iter().map(|n: &NodeId| n.0).collect();
                 d.set_item("dependencies", deps)?;
                 d.set_item("dependents", depts)?;
                 result.append(d)?;
diff --git a/examples/spreadsheet/src/engine.rs b/examples/spreadsheet/src/engine.rs
index 6c4b150..5497b80 100644
--- a/examples/spreadsheet/src/engine.rs
+++ b/examples/spreadsheet/src/engine.rs
@@ -12,6 +12,7 @@ pub fn cell_name(col: u8, row: u32) -> String {
     format!("{}{}", (b'A' + col) as char, row)
 }
 
+#[allow(dead_code)]
 pub fn parse_cell_name(name: &str) -> Option<(u8, u32)> {
     let name = name.to_uppercase();
     let col_char = name.chars().next()?;
@@ -23,7 +24,7 @@ pub fn parse_cell_name(name: &str) -> Option<(u8, u32)> {
         return None;
     }
     let row: u32 = name[1..].parse().ok()?;
-    if row < 1 || row > ROWS {
+    if !(1..=ROWS).contains(&row) {
         return None;
     }
     Some((col, row))
@@ -38,6 +39,7 @@ pub struct SpreadsheetEngine {
     pub rt: Runtime,
     cells: HashMap<String, CellNodes>,
     /// Shared map of value nodes so query closures can look up references.
+    #[allow(dead_code)]
     value_nodes: Arc<RwLock<HashMap<String, Incr<f64>>>>,
     /// Cache of last-known cell values for diffing.
     prev_values: RwLock<HashMap<String, f64>>,
@@ -73,12 +75,11 @@ impl SpreadsheetEngine {
                             }
                             Err(_) => f64::NAN,
                         }
-                    } else if let Ok(n) = raw.parse::<f64>() {
-                        n
                     } else {
                         // Text content: display as NAN (the UI will show
                         // the raw text instead via the content field).
-                        f64::NAN
+                        // Numeric strings parse as their value.
+                        raw.parse::<f64>().unwrap_or(f64::NAN)
                     }
                 });
 
diff --git a/scripts/verify/spreadsheet.py b/scripts/verify/spreadsheet.py
new file mode 100644
index 0000000..75e46a8
--- /dev/null
+++ b/scripts/verify/spreadsheet.py
@@ -0,0 +1,122 @@
+"""End-to-end verification of incr-spreadsheet against the v0.2 wrappers.
+
+1. Connect to ws://localhost:3001/ws
+2. Receive the full_state on connect; assert seed cells have expected
+   computed values:
+   - C2 = A2 * B2 = 29.99 * 5 = 149.95
+   - C3 = A3 * B3 = 49.99 * 3 = 149.97
+   - C4 = A4 * B4 =  9.99 * 12 = 119.88
+   - C6 = SUM(C2:C4) = 419.80
+   - C7 = C6 * 0.08 = 33.584
+   - C8 = C6 + C7 = 453.384
+3. Send set_cell A2 := 100; receive update; assert C2 -> 500,
+   C6, C7, C8 update accordingly.
+4. Send set_cell B2 := 10; receive update; assert C2 -> 1000.
+5. Reset A2 -> 29.99; assert C2 back to 149.95.
+"""
+
+import asyncio
+import json
+import sys
+
+import websockets
+
+
+EXPECTED_SEED = {
+    "C2": 149.95,
+    "C3": 149.97,
+    "C4": 119.88,
+    "C6": 419.80,
+    "C7": 33.584,
+    "C8": 453.384,
+}
+
+
+def approx_eq(a, b, tol=1e-2):
+    return abs(a - b) < tol
+
+
+async def main():
+    failures = []
+
+    async with websockets.connect("ws://localhost:3001/ws") as ws:
+        # Step 1: full_state on connect.
+        raw = await asyncio.wait_for(ws.recv(), timeout=5)
+        full = json.loads(raw)
+        assert full["type"] == "full_state", full
+        cells = {c["cell"]: c for c in full["cells"]}
+        print(
+            f"connected; node_count = {full['node_count']}, cell_count = {len(cells)}"
+        )
+
+        # Step 2: validate seed cells.
+        for cell, want in EXPECTED_SEED.items():
+            got = cells[cell]["value"]
+            ok = approx_eq(got, want)
+            status = "OK" if ok else "FAIL"
+            print(f"  seed {cell}: got {got:.4f} want {want:.4f} [{status}]")
+            if not ok:
+                failures.append(f"seed {cell}: got {got}, want {want}")
+
+        # Step 3: set A2 = 100.
+        await ws.send(json.dumps({"cell": "A2", "content": "100"}))
+        raw = await asyncio.wait_for(ws.recv(), timeout=5)
+        update = json.loads(raw)
+        assert update["type"] == "update", update
+        changed = {c["cell"]: c for c in update["changed"]}
+        print(f"\nset A2 = 100; changed cells: {sorted(changed.keys())}")
+        # After A2=100: C2 = 100 * 5 = 500
+        # C6 = 500 + 149.97 + 119.88 = 769.85
+        # C7 = 769.85 * 0.08 = 61.588
+        # C8 = 769.85 + 61.588 = 831.438
+        for cell, want in [
+            ("C2", 500.0),
+            ("C6", 769.85),
+            ("C7", 61.588),
+            ("C8", 831.438),
+        ]:
+            got = changed.get(cell, {}).get("value")
+            ok = got is not None and approx_eq(got, want)
+            status = "OK" if ok else "FAIL"
+            print(f"  after A2=100 -> {cell}: got {got} want {want:.4f} [{status}]")
+            if not ok:
+                failures.append(f"A2=100 then {cell}: got {got}, want {want}")
+
+        # Step 4: set B2 = 10.
+        await ws.send(json.dumps({"cell": "B2", "content": "10"}))
+        raw = await asyncio.wait_for(ws.recv(), timeout=5)
+        update = json.loads(raw)
+        changed = {c["cell"]: c for c in update["changed"]}
+        # C2 = 100 * 10 = 1000
+        for cell, want in [("C2", 1000.0)]:
+            got = changed.get(cell, {}).get("value")
+            ok = got is not None and approx_eq(got, want)
+            status = "OK" if ok else "FAIL"
+            print(f"  after B2=10  -> {cell}: got {got} want {want:.4f} [{status}]")
+            if not ok:
+                failures.append(f"B2=10 then {cell}: got {got}, want {want}")
+
+        # Step 5: reset A2 -> 29.99.
+        await ws.send(json.dumps({"cell": "A2", "content": "29.99"}))
+        raw = await asyncio.wait_for(ws.recv(), timeout=5)
+        update = json.loads(raw)
+        changed = {c["cell"]: c for c in update["changed"]}
+        # C2 = 29.99 * 10 = 299.9 (B2 is still 10 from step 4)
+        got = changed.get("C2", {}).get("value")
+        want = 299.9
+        ok = got is not None and approx_eq(got, want)
+        status = "OK" if ok else "FAIL"
+        print(f"  reset A2=29.99 -> C2: got {got} want {want:.4f} [{status}]")
+        if not ok:
+            failures.append(f"reset A2 then C2: got {got}, want {want}")
+
+    if failures:
+        print(f"\nFAILED: {len(failures)} assertion(s)")
+        for f in failures:
+            print(f"  - {f}")
+        sys.exit(1)
+    else:
+        print("\nALL CHECKS PASSED")
+
+
+asyncio.run(main())