diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index f823d53f..6c3eac5f 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -16,6 +16,13 @@ on:
 
 permissions: {}
 
+# Cancel in-flight runs for the same PR / branch when a new commit lands.
+# Without this, a chain of pushes leaves a stack of running jobs all
+# contending for the self-hosted IB runner.
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref_name }}-${{ github.event.pull_request.number || github.sha }}
+  cancel-in-progress: true
+
 env:
   COLUMNS: 150
   UV_PYTHON: '3.14'
@@ -25,6 +32,11 @@ env:
 
 jobs:
   lint:
+    # Kept on ubuntu-latest. lint runs prek hooks (yamlfmt, zizmor,
+    # codespell, ruff, basedpyright, clippy) which are mostly Python /
+    # JS / fast Rust checks — minimal benefit from IB ib_cache, and
+    # the IB runner's ~10-minute wall-clock cap kept killing lint
+    # mid-prek when basedpyright + workspace-wide clippy ran together.
     runs-on: ubuntu-latest
 
     steps:
@@ -67,7 +79,34 @@ jobs:
           SKIP: no-commit-to-branch
 
   test-rust:
-    runs-on: ubuntu-latest
+    needs: [bench-test, test-python-coverage]
+    runs-on: incredibuild-runner
+    timeout-minutes: 30
+    env:
+      # Per-job CARGO_HOME / CARGO_TARGET_DIR. Without this the IB
+      # runner shares /ib-workspace/cache/cargo* across concurrent
+      # jobs, leading to source/object corruption under
+      # workspace-scale compilation. ib_console's build cache
+      # (separate) still accelerates compile.
+      CARGO_HOME: ${{ github.workspace }}/.cargo
+      CARGO_TARGET_DIR: ${{ github.workspace }}/target
+      # IB runner cap mitigation: this is the heaviest job (7x
+      # cargo llvm-cov on the workspace). When 2+ heavy IB jobs run
+      # concurrently each spawning nproc rustc instances, the shared
+      # runner CPU saturates and we hit the ~12-min wall-clock cap.
+      # Cap local rustc concurrency; ib_console's build cache hits
+      # are I/O-bound anyway. --prevent-initiator-overload is a
+      # no-op under --standalone (no remote helpers) but harmless.
+      IB_MAX_LOCAL_CORES: '4'
+      IB_PREVENT_OVERLOAD: '1'
+      # The IB runner's default locale is C/POSIX. CPython then picks
+      # the ASCII codec as the default text I/O encoding, which makes
+      # monty-datatest's CPython-comparison test_cases fail when
+      # opening files with non-ASCII content (e.g. mount_fs__*.py
+      # writes UTF-8 / emoji). Force UTF-8 to match ubuntu-latest.
+      LANG: C.UTF-8
+      LC_ALL: C.UTF-8
+      PYTHONUTF8: '1'
 
     steps:
       - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
@@ -84,7 +123,7 @@ jobs:
         with:
           lookup-only: false # zizmor: ignore[cache-poisoning] -- Job does not produce release artifacts and does not have sensitive permissions
           cache-on-failure: true
-
+          save-if: false # IB cache is the cache here; Swatinem post-save hangs on this runner image
       - uses: taiki-e/install-action@1ed3272338f573e042a2e6bca3893aa19f43b47a # v2.71.3
         with:
           tool: cargo-llvm-cov
@@ -95,6 +134,9 @@ jobs:
         with:
           python-version: '3.14'
 
+      - name: IB pre-flight
+        run: ./scripts/ib-prep.sh
+
       - run: rustc --version --verbose
       - run: python3 -V
         # don't use .venv python in CI
@@ -123,8 +165,22 @@ jobs:
           path: rust-coverage.json
           if-no-files-found: error
 
+      - name: IB cache stats
+        if: always()
+        run: ./scripts/ib-stats.sh
+
   test-python-coverage:
-    runs-on: ubuntu-latest
+    runs-on: incredibuild-runner
+    timeout-minutes: 30
+    env:
+      CARGO_HOME: ${{ github.workspace }}/.cargo
+      CARGO_TARGET_DIR: ${{ github.workspace }}/target
+      # IB runner cap mitigation, see test-rust comment.
+      IB_MAX_LOCAL_CORES: '4'
+      IB_PREVENT_OVERLOAD: '1'
+      LANG: C.UTF-8
+      LC_ALL: C.UTF-8
+      PYTHONUTF8: '1'
 
     steps:
       - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
@@ -141,7 +197,7 @@ jobs:
         with:
           lookup-only: false # zizmor: ignore[cache-poisoning] -- Job does not produce release artifacts and does not have sensitive permissions
           cache-on-failure: true
-
+          save-if: false # IB cache is the cache here; Swatinem post-save hangs on this runner image
       - uses: taiki-e/install-action@1ed3272338f573e042a2e6bca3893aa19f43b47a # v2.71.3
         with:
           tool: cargo-llvm-cov
@@ -155,6 +211,9 @@ jobs:
         with:
           enable-cache: true # zizmor: ignore[cache-poisoning] -- Job does not produce release artifacts and does not have sensitive permissions
 
+      - name: IB pre-flight
+        run: ./scripts/ib-prep.sh
+
       - run: rustc --version --verbose
       - run: python3 -V
       - run: uv sync --all-packages --only-dev
@@ -162,6 +221,9 @@ jobs:
       - run: rm .cargo/config.toml
 
       - name: Build and test Python bindings and run pytest with Rust coverage
+        # The runner image's cargo shim wraps maturin's internal compiling
+        # cargo subcommands through ib_console. `cargo llvm-cov show-env`
+        # remains a metadata-only subcommand and is intentionally not wrapped.
         run: |
           set -euxo pipefail
           eval "$(cargo llvm-cov show-env --export-prefix)"
@@ -177,6 +239,10 @@ jobs:
           path: python-rust-coverage.json
           if-no-files-found: error
 
+      - name: IB cache stats
+        if: always()
+        run: ./scripts/ib-stats.sh
+
   coverage-upload:
     runs-on: ubuntu-latest
     needs:
@@ -222,6 +288,11 @@ jobs:
 
   test-python:
     name: test python ${{ matrix.python-version }}
+    # Kept on ubuntu-latest. 5x maturin-release compile (LTO=fat
+    # in monty's Cargo.toml) repeatedly exceeded the IB runner's
+    # ~12-min wall-clock cap; ubuntu gives a fresh runner per
+    # matrix entry so the 5 versions run in parallel under the
+    # GitHub-hosted ubuntu-latest capacity.
     runs-on: ubuntu-latest
 
     strategy:
@@ -246,7 +317,6 @@ jobs:
         with:
           lookup-only: false # zizmor: ignore[cache-poisoning] -- Job does not produce release artifacts and does not have sensitive permissions
           cache-on-failure: true
-
       - uses: astral-sh/setup-uv@cec208311dfd045dd5311c1add060b2062131d57 # v8.0.0
         with:
           enable-cache: true # zizmor: ignore[cache-poisoning] -- Job does not produce release artifacts and does not have sensitive permissions
@@ -303,7 +373,20 @@ jobs:
       - run: cargo run -p monty-datatest --features memory-model-checks
 
   bench-test:
-    runs-on: ubuntu-latest
+    runs-on: incredibuild-runner
+    timeout-minutes: 30
+    env:
+      # Per-job CARGO_HOME / CARGO_TARGET_DIR. Without this the IB
+      # runner shares /ib-workspace/cache/cargo* across concurrent
+      # jobs, leading to source/object corruption under
+      # workspace-scale compilation. ib_console's build cache
+      # (separate) still accelerates compile.
+      CARGO_HOME: ${{ github.workspace }}/.cargo
+      CARGO_TARGET_DIR: ${{ github.workspace }}/target
+      # Lighter than test-rust (one cargo bench compile vs 7 llvm-cov
+      # passes); allow more local cores.
+      IB_MAX_LOCAL_CORES: '8'
+      IB_PREVENT_OVERLOAD: '1'
 
     steps:
       - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
@@ -319,19 +402,38 @@ jobs:
         with:
           lookup-only: false # zizmor: ignore[cache-poisoning] -- Job does not produce release artifacts and does not have sensitive permissions
           cache-on-failure: true
-
+          save-if: false # IB cache is the cache here; Swatinem post-save hangs on this runner image
       - name: set up python
         uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
         with:
           python-version: '3.14'
 
       # don't use .venv python in CI
+      - name: IB pre-flight
+        run: ./scripts/ib-prep.sh
+
       - run: rm .cargo/config.toml
 
-      - run: make dev-bench
+      - run: cargo bench --profile dev -p monty-bench --bench main -- --test
+
+      - name: IB cache stats
+        if: always()
+        run: ./scripts/ib-stats.sh
 
   miri:
-    runs-on: ubuntu-latest
+    needs: [bench-test]
+    runs-on: incredibuild-runner
+    timeout-minutes: 30
+    env:
+      # Per-job CARGO_HOME / CARGO_TARGET_DIR. Without this the IB
+      # runner shares /ib-workspace/cache/cargo* across concurrent
+      # jobs, leading to source/object corruption under
+      # workspace-scale compilation. ib_console's build cache
+      # (separate) still accelerates compile.
+      CARGO_HOME: ${{ github.workspace }}/.cargo
+      CARGO_TARGET_DIR: ${{ github.workspace }}/target
+      IB_MAX_LOCAL_CORES: '8'
+      IB_PREVENT_OVERLOAD: '1'
 
     steps:
       - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
@@ -348,16 +450,33 @@ jobs:
         with:
           lookup-only: false # zizmor: ignore[cache-poisoning] -- Job does not produce release artifacts and does not have sensitive permissions
           cache-on-failure: true
-
+          save-if: false # IB cache is the cache here; Swatinem post-save hangs on this runner image
       # don't use .venv python in CI
+      - name: IB pre-flight
+        run: ./scripts/ib-prep.sh
+
       - run: rm .cargo/config.toml
 
       - name: Run miri tests
-        run: make miri
+        run: cargo +nightly miri test -p monty --lib
+
+      - name: IB cache stats
+        if: always()
+        run: ./scripts/ib-stats.sh
 
   fuzz:
+    needs: [miri]
     name: fuzz ${{ matrix.target }}
+    # Kept on ubuntu-latest. cargo-fuzz install + fuzz-target compile
+    # + 60s fuzz run + ib_console daemon-startup (×2 cargo invocations
+    # in this job) consistently finished at 12:01 on the IB runner —
+    # exactly the ~10–12-min wall-clock cap. Reverting fuzz to
+    # ubuntu-latest costs no value-story coverage because the rustc
+    # cache on this same shape of compile workload is already proved
+    # by .github/workflows/ib-bench.yml (cells C/D). Same revert
+    # rationale as `lint` and the `test-python` matrix above.
     runs-on: ubuntu-latest
+    timeout-minutes: 30
 
     strategy:
       fail-fast: false
@@ -388,7 +507,6 @@ jobs:
       - if: steps.cache-rust.outputs.cache-hit != 'true'
         run: cargo install cargo-fuzz
 
-      # don't use .venv python in CI
       - run: rm .cargo/config.toml
 
       - name: Run ${{ matrix.target }} fuzzer
@@ -398,7 +516,6 @@ jobs:
           # catching panics, not memory bugs.
           cargo fuzz run --fuzz-dir crates/fuzz --sanitizer none ${{ matrix.target }} -- -max_total_time=60
 
-  # https://github.com/marketplace/actions/alls-green#why used for branch protection checks
   check:
     if: always()
     needs:
@@ -506,6 +623,149 @@ jobs:
           path: crates/monty-python/dist
 
   # PGO-optimized builds for main platforms
+  build-pgo-linux-ib:
+    name: build pgo on linux
+    # only run on push to main, on tags, or if 'Full Build' label is present
+    if: startsWith(github.ref, 'refs/tags/') || github.ref == 'refs/heads/main' || contains(github.event.pull_request.labels.*.name, 'Full Build') || (github.event_name == 'workflow_dispatch' && inputs.run_release)
+    runs-on: incredibuild-runner
+    timeout-minutes: 60
+    container:
+      # Same manylinux baseline proved by ib-probe.yml and ib-bench.yml::cell-H.
+      image: quay.io/pypa/manylinux_2_28_x86_64@sha256:443eabd378e140996780a772e12c1a1ef10551da933fe76d74a1bab61f68a7b7
+    env:
+      CARGO_HOME: ${{ github.workspace }}/.cargo-pgo-linux
+      RUSTUP_HOME: ${{ github.workspace }}/.rustup-pgo-linux
+      CARGO_TARGET_DIR: ${{ github.workspace }}/target-pgo-linux
+      IB_MAX_LOCAL_CORES: '4'
+      IB_PREVENT_OVERLOAD: '1'
+      LANG: C.UTF-8
+      LC_ALL: C.UTF-8
+      PYTHONUTF8: '1'
+
+    steps:
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+        with:
+          persist-credentials: false
+
+      - name: prove the container hook fired
+        run: |
+          set -euo pipefail
+          test -d /ib-workspace/cache || { echo "::error::/ib-workspace/cache missing"; exit 1; }
+          test -d /ib-workspace/incredibuild || { echo "::error::/ib-workspace/incredibuild missing"; exit 1; }
+          test -x /usr/bin/ib_console || { echo "::error::/usr/bin/ib_console missing"; exit 1; }
+          /usr/bin/ib_console --full-version | head -3
+
+      - name: install Rust and maturin
+        run: |
+          set -euo pipefail
+          curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs \
+            | sh -s -- -y --default-toolchain stable --profile minimal
+          "$CARGO_HOME/bin/rustup" component add llvm-tools-preview || "$CARGO_HOME/bin/rustup" component add llvm-tools
+          echo "$CARGO_HOME/bin" >> "$GITHUB_PATH"
+
+          py312_bin="$(ls -d /opt/python/cp312-*/bin 2>/dev/null | sort | head -1)"
+          test -n "$py312_bin"
+          ln -sf "$py312_bin/python3" /usr/local/bin/python3
+          export PATH="$py312_bin:$PATH"
+          echo "$py312_bin" >> "$GITHUB_PATH"
+          python3 -m pip install --upgrade pip
+          python3 -m pip install 'maturin>=1.9.4,<2.0' typing_extensions
+          python3 --version
+          "$CARGO_HOME/bin/rustc" --version
+          maturin --version
+
+      - name: install IB cargo shim
+        run: |
+          set -euo pipefail
+          mkdir -p "$RUNNER_TEMP/ib-cargo" /etc/incredibuild/log
+          cat > "$RUNNER_TEMP/ib-cargo/cargo" <<'EOF'
+          #!/bin/bash
+          set -euo pipefail
+          if [[ -n "${__IB_CARGO_WRAPPED:-}" ]]; then
+              exec "$CARGO_HOME/bin/cargo" "$@"
+          fi
+
+          profile_args=()
+          for profile in \
+              /ib-workspace/cache/ib_profile.xml \
+              /ib-workspace/incredibuild/ib_profile.xml \
+              "$GITHUB_WORKSPACE/scripts/ib-profile.xml"; do
+              if [[ -f "$profile" ]]; then
+                  profile_args=(--profile="$profile")
+                  break
+              fi
+          done
+
+          max_cores_args=()
+          if [[ -n "${IB_MAX_LOCAL_CORES:-}" ]]; then
+              max_cores_args=(--max-local-cores="$IB_MAX_LOCAL_CORES")
+          fi
+
+          overload_args=()
+          if [[ -n "${IB_PREVENT_OVERLOAD:-}" ]]; then
+              overload_args=(--prevent-initiator-overload)
+          fi
+
+          export __IB_CARGO_WRAPPED=1
+          exec /usr/bin/ib_console \
+              --standalone \
+              --build-cache-local-shared \
+              --build-cache-force \
+              --build-cache-basedir="$GITHUB_WORKSPACE" \
+              --build-cache-local-logfile="/etc/incredibuild/log/ib_cache_${GITHUB_JOB}_${GITHUB_RUN_ID}_${GITHUB_RUN_ATTEMPT}.log" \
+              --build-cache-report-all-miss \
+              --no-monitor \
+              "${max_cores_args[@]}" \
+              "${overload_args[@]}" \
+              "${profile_args[@]}" \
+              "$CARGO_HOME/bin/cargo" "$@"
+          EOF
+          chmod +x "$RUNNER_TEMP/ib-cargo/cargo"
+          echo "$RUNNER_TEMP/ib-cargo" >> "$GITHUB_PATH"
+
+      - name: build initial wheel (instrumented)
+        run: |
+          set -euo pipefail
+          mkdir -p "$GITHUB_WORKSPACE/profdata"
+          RUSTFLAGS="-Cprofile-generate=$GITHUB_WORKSPACE/profdata" \
+            maturin build --release --out pgo-wheel -i /usr/local/bin/python3
+        working-directory: crates/monty-python
+
+      - name: generate pgo data
+        run: |
+          set -euo pipefail
+          python3 -m pip install pydantic-monty --no-index --no-deps --find-links pgo-wheel --force-reinstall
+          python3 exercise.py
+          rust_host="$(rustc --print host-tuple)"
+          active_toolchain="$(rustup show active-toolchain | awk '{print $1}')"
+          echo "LLVM_PROFDATA=$RUSTUP_HOME/toolchains/$active_toolchain/lib/rustlib/$rust_host/bin/llvm-profdata" >> "$GITHUB_ENV"
+        working-directory: crates/monty-python
+
+      - name: merge pgo data
+        run: $LLVM_PROFDATA merge -o "$GITHUB_WORKSPACE/merged.profdata" "$GITHUB_WORKSPACE/profdata"
+
+      - name: build pgo-optimized wheel
+        run: |
+          set -euo pipefail
+          python_args=()
+          for py in cp310-* cp311-* cp312-* cp313-* cp314-*; do
+              py_bin="$(ls -d /opt/python/$py/bin 2>/dev/null | sort | head -1)"
+              test -n "$py_bin"
+              python_args+=(-i "$py_bin/python")
+          done
+          RUSTFLAGS="-Cprofile-use=$GITHUB_WORKSPACE/merged.profdata" \
+            maturin build --release --out dist "${python_args[@]}"
+        working-directory: crates/monty-python
+
+      - uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0
+        with:
+          name: pypi_files-linux-pgo
+          path: crates/monty-python/dist
+
+      - name: IB cache stats
+        if: always()
+        run: ./scripts/ib-stats.sh || true
+
   build-pgo:
     name: build pgo on ${{ matrix.os }}
     # only run on push to main, on tags, or if 'Full Build' label is present
@@ -514,10 +774,6 @@ jobs:
       fail-fast: false
       matrix:
         include:
-          # Linux x86_64 (manylinux)
-          - os: linux
-            runs-on: ubuntu-latest
-            interpreter: 3.10 3.11 3.12 3.13 3.14
           # Windows x86_64
           - os: windows
             runs-on: windows-latest
@@ -598,7 +854,7 @@ jobs:
   # Test wheels on main OS platforms
   test-builds-os:
     name: test build on ${{ matrix.os }}
-    needs: [build, build-pgo]
+    needs: [build, build-pgo, build-pgo-linux-ib]
     runs-on: ${{ matrix.runs-on }}
 
     strategy:
@@ -633,7 +889,7 @@ jobs:
 
   # Inspect built artifacts
   inspect-python-assets:
-    needs: [build, build-pgo, build-sdist]
+    needs: [build, build-pgo, build-pgo-linux-ib, build-sdist]
     runs-on: ubuntu-latest
     steps:
       - uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1
@@ -709,7 +965,7 @@ jobs:
           - host: windows-latest
             target: x86_64-pc-windows-msvc
             build: npm run build:napi -- --target x86_64-pc-windows-msvc && npm run build:ts
-          - host: ubuntu-latest
+          - host: incredibuild-runner
             target: x86_64-unknown-linux-gnu
             build: npm run build:napi -- --target x86_64-unknown-linux-gnu --use-napi-cross && npm run build:ts
           - host: macos-latest
@@ -718,7 +974,7 @@ jobs:
           - host: ubuntu-24.04-arm
             target: aarch64-unknown-linux-gnu
             build: npm run build:napi -- --target aarch64-unknown-linux-gnu && npm run build:ts
-          - host: ubuntu-latest
+          - host: incredibuild-runner
             target: wasm32-wasip1-threads
             build: npm run build:napi -- --target wasm32-wasip1-threads && npm run build:ts
     steps:
@@ -750,6 +1006,21 @@ jobs:
             target/
           key: ${{ matrix.settings.target }}-cargo-${{ matrix.settings.host }}
 
+      # IB pre-flight + env: only on incredibuild-runner. The runner
+      # image's cargo shim wraps napi-rs' compiling cargo subcommands
+      # through /usr/bin/ib_console for build-cache automatically.
+      - name: IB env (Linux IB only)
+        if: matrix.settings.host == 'incredibuild-runner'
+        run: |
+          {
+            echo "IB_MAX_LOCAL_CORES=4"
+            echo "IB_PREVENT_OVERLOAD=1"
+          } >> "$GITHUB_ENV"
+
+      - name: IB pre-flight (Linux IB only)
+        if: matrix.settings.host == 'incredibuild-runner'
+        run: ./scripts/ib-prep.sh
+
       # don't use .venv python in CI
       - run: rm .cargo/config.toml
 
@@ -806,6 +1077,10 @@ jobs:
             crates/monty-js/wasi-worker.mjs
             crates/monty-js/wasi-worker-browser.mjs
           if-no-files-found: error
+
+      - name: IB cache stats (Linux IB only)
+        if: always() && matrix.settings.host == 'incredibuild-runner'
+        run: ./scripts/ib-stats.sh
     env:
       MACOSX_DEPLOYMENT_TARGET: '10.13'
       CARGO_INCREMENTAL: '1'
diff --git a/.github/workflows/codspeed.yml b/.github/workflows/codspeed.yml
index 1afc9d16..15939b0b 100644
--- a/.github/workflows/codspeed.yml
+++ b/.github/workflows/codspeed.yml
@@ -14,10 +14,33 @@ permissions:
 jobs:
   benchmarks:
     name: Run benchmarks
+    # Reverted from incredibuild-runner to ubuntu-latest after CI run
+    # 25722680967 reproducibly failed with:
+    #   setarch: failed to set personality to x86_64: Operation not permitted
+    #   ##[error]failed to execute valgrind
+    # The CodSpeedHQ action's `cargo codspeed run` step shells out to
+    # valgrind, which calls setarch to set ADDR_NO_RANDOMIZE personality.
+    # The IB self-hosted runner image runs under restricted Linux
+    # capabilities (no SYS_ADMIN, user-namespace remap), so the
+    # personality syscall is blocked. github-hosted runners allow it.
+    #
+    # Decision: keep the production CodSpeed workflow on ubuntu-latest.
+    # A hybrid "build on IB, run on ubuntu" flow would need fragile
+    # target-dir/artifact pinning across cargo-codspeed's instrumented
+    # outputs. The clean fix is runner-image support for setarch /
+    # personality(2); until then, CodSpeed stays on the runner that can
+    # execute Valgrind. The monty-side measurement of the IB-build value
+    # remains local in this repo via:
+    # ib-bench.yml::cell-I-ib-codspeed (which only does `cargo codspeed
+    # build`, no valgrind run, so it works on IB).
+    #
+    # If this workflow fails on ubuntu-latest with "Failed to retrieve
+    # upload data: 401 Unauthorized", that is CodSpeed auth / repository
+    # permissions, not an IB runner issue.
     runs-on: ubuntu-latest
 
     steps:
-      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
         with:
           persist-credentials: false
 
@@ -45,7 +68,7 @@ jobs:
         run: cargo codspeed build -p monty-bench --bench main
 
       - name: Run benchmarks
-        uses: CodSpeedHQ/action@d872884a306dd4853acf0f584f4b706cf0cc72a2 # v4.13.0
+        uses: CodSpeedHQ/action@d872884a306dd4853acf0f584f4b706cf0cc72a2 # v4.13.0
         with:
           mode: simulation
           run: cargo codspeed run -p monty-bench --bench main
diff --git a/.github/workflows/ib-bench.yml b/.github/workflows/ib-bench.yml
new file mode 100644
index 00000000..66e9a3c0
--- /dev/null
+++ b/.github/workflows/ib-bench.yml
@@ -0,0 +1,781 @@
+name: ib-bench
+
+# 6-cell A/B/C/D + E/F measurement matrix for the IncrediBuild integration.
+# A/B/C/D run the synthetic `cargo test --no-run -p monty` workload three
+# iterations each for fast cell-comparison signal. E/F run the real
+# .github/workflows/ci.yml::test-rust workload (8 cargo llvm-cov calls)
+# two iterations each for a directly measured ubuntu-latest → IB speedup.
+#
+# All cells capture wall-clock + IB cache hit/miss + cache-dir size + final
+# target/ size per iteration.
+#
+# Cells (per the plan in monty/.cursor/plans/monty IB best-value-*.plan.md):
+#   A  ubuntu-latest, plain cargo (Swatinem rust-cache enabled)
+#   B  incredibuild-runner, ib_console with the system DEFAULT profile
+#      (rustc NOT cached). Isolates ib_console overhead + incidental
+#      C-library cache hits in transitive deps from rustc caching.
+#   C  incredibuild-runner, custom profile (rustc cached), COLD cache
+#      (cleared at job start). Models "first run on a clean runner."
+#   D  incredibuild-runner, custom profile (rustc cached), WARM cache
+#      (populated by C above). Models "every push after the first."
+#   E  ubuntu-latest, plain cargo, real test-rust workload. The
+#      "what would test-rust cost on ubuntu-latest" baseline that
+#      previously had to be inferred from CI logs.
+#   F  incredibuild-runner, runner-image cargo SHIM (rustc cached),
+#      real test-rust workload, warm cache. Chained after D so D's
+#      cache state is stable and F's iter≥2 measures realistic
+#      warm-cache steady state.
+#   G  incredibuild-runner, real test-rust workload via PATH-prepended
+#      cargo SHIM that mimics what vnext-processing-engine's
+#      build_accelerator/default_rules.yaml WOULD generate if
+#      cargo were upgraded from ENV mode to SHIM mode (Layer A in
+#      the cross-repo plan). Validates that monty works end-to-end
+#      with NO repo-local cargo wrapper — only the runner image's
+#      build accelerator. Now that Layer A ships upstream, G should
+#      continue to match F within noise.
+#   I  incredibuild-runner, codspeed workload (cargo codspeed build),
+#      warm cache. Measures the gain from wiring codspeed.yml to IB
+#      (Layer F). Same crate set as test-rust but built with codspeed
+#      instrumentation, so it exercises a different rustc cache key
+#      space and is the cleanest signal for the every-PR codspeed
+#      benchmark workflow.
+#   H  incredibuild-runner, manylinux_2_28 GHA `container:` block,
+#      synthetic workload (cargo test --no-run -p monty) under
+#      ib_console. Validates Layer B from monty-ib-cross-repo-strategy:
+#      the existing vnext-processing-engine container-hooks/index.js
+#      bind-mounts /ib-workspace + /opt/incredibuild into a manylinux
+#      container, so every Linux wheel-build matrix entry (build job
+#      lines 587-617 + build-pgo line 654) becomes IB-cacheable simply
+#      by switching from `runs-on: ubuntu-latest` + maturin-action's
+#      child docker to `runs-on: incredibuild-runner` + GHA-level
+#      `container:`. ib-probe.yml's manylinux-probe job already proved
+#      the volume mount + ib_console resolution + ib_server connect
+#      inside the container; H closes the loop by measuring the
+#      end-to-end speedup. Compare H_warm to D_warm — if within ~10%,
+#      container-ization adds no overhead and the cache is genuinely
+#      shared host↔container (i.e. expanding to all 8 manylinux
+#      matrix entries is safe).
+#
+# C must run before D on the same runner so D inherits a populated
+# /etc/incredibuild/cache/build_cache/shared/ from C. F is chained after
+# D to keep IB cache state predictable across the run. G is chained
+# after F to inherit F's warm test-rust cache (G's shim writes the
+# same cache keys F did). I runs in parallel with F/G — its codspeed
+# crate keys don't overlap with the llvm-cov crate keys. H runs in
+# parallel with the host-side cells — its cargo binary lives inside
+# a separate rustup install in the container, so its rustc cache
+# keys are disjoint from D/F/G's.
+
+on:
+  workflow_dispatch:
+    inputs:
+      iterations:
+        description: 'Iterations per cell (use 2+ for full benchmark runs)'
+        type: string
+        default: '1'
+  # Auto-run when the bench infrastructure itself changes on the
+  # IB integration branch, so we get a fresh measurement table after
+  # each tuning commit. Scoped to the bench files only — does NOT
+  # fire on every CI commit.
+  push:
+    branches:
+      - ci/incredibuild-runners
+    paths:
+      - .github/workflows/ib-bench.yml
+      - scripts/ib-bench-run.sh
+      - scripts/ib-bench-summarize.py
+      - scripts/ib-profile.xml
+
+permissions: {}
+
+concurrency:
+  group: ib-bench-${{ github.ref }}
+  cancel-in-progress: true
+
+env:
+  COLUMNS: 150
+  UV_PYTHON: '3.14'
+  UV_FROZEN: '1'
+  # The dominant compile in test-rust is `cargo llvm-cov --no-report -p monty`;
+  # ib-bench-run.sh hardcodes that workload so its result transfers
+  # directly to test-rust wall-clock.
+
+jobs:
+  cell-A-ubuntu-no-ib:
+    runs-on: ubuntu-latest
+    timeout-minutes: 60
+    env:
+      CARGO_HOME: ${{ github.workspace }}/.cargo
+      CARGO_TARGET_DIR: ${{ github.workspace }}/target
+      LANG: C.UTF-8
+      LC_ALL: C.UTF-8
+      PYTHONUTF8: '1'
+    steps:
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+        with:
+          persist-credentials: false
+
+      - uses: dtolnay/rust-toolchain@efa25f7f19611383d5b0ccf2d1c8914531636bf9
+        with:
+          toolchain: stable
+          components: llvm-tools
+
+      - uses: Swatinem/rust-cache@c19371144df3bb44fab255c43d04cbc2ab54d1c4 # v2.9.1
+        with:
+          lookup-only: false # zizmor: ignore[cache-poisoning] -- bench artifact only, not released
+          cache-on-failure: true
+          prefix-key: 'v1-ib-bench'
+
+      - uses: taiki-e/install-action@1ed3272338f573e042a2e6bca3893aa19f43b47a # v2.71.3
+        with:
+          tool: cargo-llvm-cov
+
+      - name: set up python
+        uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
+        with:
+          python-version: '3.14'
+
+      - run: rm -f .cargo/config.toml
+
+      - name: prime workspace
+        run: cargo llvm-cov clean --workspace
+
+      - name: bench cell A
+        env:
+          CELL: A
+          ITERATIONS: ${{ inputs.iterations || '1' }}
+        run: ./scripts/ib-bench-run.sh
+
+      - uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0
+        with:
+          name: bench-cell-A
+          path: bench-results/A.csv
+          if-no-files-found: error
+
+  cell-B-ib-no-cache:
+    runs-on: incredibuild-runner
+    timeout-minutes: 60
+    env:
+      CARGO_HOME: ${{ github.workspace }}/.cargo
+      CARGO_TARGET_DIR: ${{ github.workspace }}/target
+      IB_MAX_LOCAL_CORES: '4'
+      IB_PREVENT_OVERLOAD: '1'
+      LANG: C.UTF-8
+      LC_ALL: C.UTF-8
+      PYTHONUTF8: '1'
+      # IB_NO_CACHE makes ib-prep.sh omit --profile from IB_CONSOLE_ARGS,
+      # leaving the system default profile (rustc not cached).
+      IB_NO_CACHE: '1'
+    steps:
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+        with:
+          persist-credentials: false
+
+      - uses: dtolnay/rust-toolchain@efa25f7f19611383d5b0ccf2d1c8914531636bf9
+        with:
+          toolchain: stable
+          components: llvm-tools
+
+      - uses: taiki-e/install-action@1ed3272338f573e042a2e6bca3893aa19f43b47a # v2.71.3
+        with:
+          tool: cargo-llvm-cov
+
+      - name: set up python
+        uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
+        with:
+          python-version: '3.14'
+
+      - name: IB pre-flight
+        run: ./scripts/ib-prep.sh
+
+      - run: rm -f .cargo/config.toml
+
+      - name: clear IB cache for clean B baseline
+        run: |
+          sudo rm -rf /etc/incredibuild/cache/build_cache/shared/* 2>/dev/null || true
+          sudo rm -rf /etc/incredibuild/cache/build_cache/builds/* 2>/dev/null || true
+
+      - name: bench cell B
+        env:
+          CELL: B
+          ITERATIONS: ${{ inputs.iterations || '1' }}
+        run: ./scripts/ib-bench-run.sh
+
+      - uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0
+        with:
+          name: bench-cell-B
+          path: bench-results/B.csv
+          if-no-files-found: error
+
+      - name: IB cache stats
+        if: always()
+        run: ./scripts/ib-stats.sh
+
+  cell-C-ib-cold:
+    needs: cell-B-ib-no-cache
+    runs-on: incredibuild-runner
+    timeout-minutes: 60
+    env:
+      CARGO_HOME: ${{ github.workspace }}/.cargo
+      CARGO_TARGET_DIR: ${{ github.workspace }}/target
+      IB_MAX_LOCAL_CORES: '4'
+      IB_PREVENT_OVERLOAD: '1'
+      LANG: C.UTF-8
+      LC_ALL: C.UTF-8
+      PYTHONUTF8: '1'
+    steps:
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+        with:
+          persist-credentials: false
+
+      - uses: dtolnay/rust-toolchain@efa25f7f19611383d5b0ccf2d1c8914531636bf9
+        with:
+          toolchain: stable
+          components: llvm-tools
+
+      - uses: taiki-e/install-action@1ed3272338f573e042a2e6bca3893aa19f43b47a # v2.71.3
+        with:
+          tool: cargo-llvm-cov
+
+      - name: set up python
+        uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
+        with:
+          python-version: '3.14'
+
+      - name: IB pre-flight
+        run: ./scripts/ib-prep.sh
+
+      - run: rm -f .cargo/config.toml
+
+      - name: clear IB cache for cold C
+        run: |
+          sudo rm -rf /etc/incredibuild/cache/build_cache/shared/* 2>/dev/null || true
+          sudo rm -rf /etc/incredibuild/cache/build_cache/builds/* 2>/dev/null || true
+
+      - name: bench cell C (cold, populates cache for D)
+        env:
+          CELL: C
+          # First iteration is cold; subsequent iterations are
+          # already-cached. We keep iterations=1 for C so the cell stays
+          # honestly "cold."
+          ITERATIONS: '1'
+        run: ./scripts/ib-bench-run.sh
+
+      - uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0
+        with:
+          name: bench-cell-C
+          path: bench-results/C.csv
+          if-no-files-found: error
+
+      - name: IB cache stats
+        if: always()
+        run: ./scripts/ib-stats.sh
+
+  cell-D-ib-warm:
+    needs: cell-C-ib-cold
+    runs-on: incredibuild-runner
+    timeout-minutes: 60
+    env:
+      CARGO_HOME: ${{ github.workspace }}/.cargo
+      CARGO_TARGET_DIR: ${{ github.workspace }}/target
+      IB_MAX_LOCAL_CORES: '4'
+      IB_PREVENT_OVERLOAD: '1'
+      LANG: C.UTF-8
+      LC_ALL: C.UTF-8
+      PYTHONUTF8: '1'
+    steps:
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+        with:
+          persist-credentials: false
+
+      - uses: dtolnay/rust-toolchain@efa25f7f19611383d5b0ccf2d1c8914531636bf9
+        with:
+          toolchain: stable
+          components: llvm-tools
+
+      - uses: taiki-e/install-action@1ed3272338f573e042a2e6bca3893aa19f43b47a # v2.71.3
+        with:
+          tool: cargo-llvm-cov
+
+      - name: set up python
+        uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
+        with:
+          python-version: '3.14'
+
+      - name: IB pre-flight
+        run: ./scripts/ib-prep.sh
+
+      - run: rm -f .cargo/config.toml
+
+      - name: bench cell D (warm cache from C)
+        env:
+          CELL: D
+          ITERATIONS: ${{ inputs.iterations || '1' }}
+        run: ./scripts/ib-bench-run.sh
+
+      - uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0
+        with:
+          name: bench-cell-D
+          path: bench-results/D.csv
+          if-no-files-found: error
+
+      - name: IB cache stats
+        if: always()
+        run: ./scripts/ib-stats.sh
+
+  cell-E-ubuntu-test-rust:
+    runs-on: ubuntu-latest
+    timeout-minutes: 30
+    env:
+      CARGO_HOME: ${{ github.workspace }}/.cargo
+      CARGO_TARGET_DIR: ${{ github.workspace }}/target
+      LANG: C.UTF-8
+      LC_ALL: C.UTF-8
+      PYTHONUTF8: '1'
+    steps:
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+        with:
+          persist-credentials: false
+
+      - uses: dtolnay/rust-toolchain@efa25f7f19611383d5b0ccf2d1c8914531636bf9
+        with:
+          toolchain: stable
+          components: llvm-tools
+
+      - uses: taiki-e/install-action@1ed3272338f573e042a2e6bca3893aa19f43b47a # v2.71.3
+        with:
+          tool: cargo-llvm-cov
+
+      - name: set up python
+        uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
+        with:
+          python-version: '3.14'
+
+      - run: rm -f .cargo/config.toml
+
+      - name: bench cell E (real test-rust workload, ubuntu-latest)
+        env:
+          CELL: E
+          # Automatic push validation uses one iteration to stay inside the
+          # IB runner cap; dispatch manually with iterations=2 for the full
+          # cold + warm comparison table.
+          ITERATIONS: ${{ inputs.iterations || '1' }}
+          WORKLOAD: test-rust
+          CARGO_BIN: cargo
+        run: ./scripts/ib-bench-run.sh
+
+      - uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0
+        with:
+          name: bench-cell-E
+          path: bench-results/E.csv
+          if-no-files-found: error
+
+  cell-F-ib-test-rust:
+    needs: cell-D-ib-warm
+    runs-on: incredibuild-runner
+    timeout-minutes: 30
+    env:
+      CARGO_HOME: ${{ github.workspace }}/.cargo
+      CARGO_TARGET_DIR: ${{ github.workspace }}/target
+      # Heavier than the synthetic A/B/C/D workload (8 llvm-cov calls per
+      # iteration) so we throttle local rustc concurrency in line with
+      # ci.yml::test-rust's mitigation for the runner wall-clock cap.
+      # IB cache hits are I/O-bound so capping cores costs little.
+      IB_MAX_LOCAL_CORES: '4'
+      IB_PREVENT_OVERLOAD: '1'
+      LANG: C.UTF-8
+      LC_ALL: C.UTF-8
+      PYTHONUTF8: '1'
+    steps:
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+        with:
+          persist-credentials: false
+
+      - uses: dtolnay/rust-toolchain@efa25f7f19611383d5b0ccf2d1c8914531636bf9
+        with:
+          toolchain: stable
+          components: llvm-tools
+
+      - uses: taiki-e/install-action@1ed3272338f573e042a2e6bca3893aa19f43b47a # v2.71.3
+        with:
+          tool: cargo-llvm-cov
+
+      - name: set up python
+        uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
+        with:
+          python-version: '3.14'
+
+      - name: IB pre-flight
+        run: ./scripts/ib-prep.sh
+
+      - run: rm -f .cargo/config.toml
+
+      - name: bench cell F (real test-rust workload, IB warm)
+        env:
+          CELL: F
+          ITERATIONS: ${{ inputs.iterations || '1' }}
+          WORKLOAD: test-rust
+          CARGO_BIN: cargo
+          # IB pre-flight already exports IB_PROFILE via $GITHUB_ENV; we
+          # set it explicitly here to make the cell self-describing and
+          # robust against future ib-prep.sh changes.
+          IB_PROFILE: ${{ github.workspace }}/scripts/ib-profile.xml
+        run: ./scripts/ib-bench-run.sh
+
+      - uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0
+        with:
+          name: bench-cell-F
+          path: bench-results/F.csv
+          if-no-files-found: error
+
+      - name: IB cache stats
+        if: always()
+        run: ./scripts/ib-stats.sh
+
+  cell-G-ib-shim-simulation:
+    # Layer A simulation: validate that monty works end-to-end if
+    # vnext-processing-engine's build_accelerator generates a cargo
+    # shim (mirroring its existing ninja/cmake shims). G runs the
+    # SAME workload as F, but the cargo dispatch goes through a
+    # PATH-prepended shim that hand-mimics what
+    # `default_rules.yaml::cargo` SHIM mode would auto-generate.
+    needs: cell-F-ib-test-rust
+    runs-on: incredibuild-runner
+    timeout-minutes: 30
+    env:
+      CARGO_HOME: ${{ github.workspace }}/.cargo
+      CARGO_TARGET_DIR: ${{ github.workspace }}/target
+      IB_MAX_LOCAL_CORES: '4'
+      IB_PREVENT_OVERLOAD: '1'
+      LANG: C.UTF-8
+      LC_ALL: C.UTF-8
+      PYTHONUTF8: '1'
+    steps:
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+        with:
+          persist-credentials: false
+
+      - uses: dtolnay/rust-toolchain@efa25f7f19611383d5b0ccf2d1c8914531636bf9
+        with:
+          toolchain: stable
+          components: llvm-tools
+
+      - uses: taiki-e/install-action@1ed3272338f573e042a2e6bca3893aa19f43b47a # v2.71.3
+        with:
+          tool: cargo-llvm-cov
+
+      - name: set up python
+        uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
+        with:
+          python-version: '3.14'
+
+      - name: IB pre-flight
+        run: ./scripts/ib-prep.sh
+
+      - run: rm -f .cargo/config.toml
+
+      - name: install Layer-A simulated cargo SHIM into PATH
+        # Mirrors the structure of
+        # vnext-processing-engine/src/runner_engine/build/ib-accel/bin/ninja
+        # (which already wraps via /usr/bin/ib_console). The real
+        # generator output for a cargo SHIM would carry subcommand
+        # whitelist logic; here we wrap unconditionally and rely on
+        # ib_console's own pass-through for non-rustc cargo work.
+        # The critical bits — exec_prefix, IB_CONSOLE_ARGS override,
+        # __IB_CARGO_WRAPPED reentry guard — match the generator.
+        run: |
+          set -euo pipefail
+          shim_dir="$RUNNER_TEMP/ib-accel-shim/bin"
+          mkdir -p "$shim_dir"
+          real_cargo="$(command -v cargo)"
+          cat > "$shim_dir/cargo" <<EOF
+          #!/bin/bash
+          # Auto-generated shim simulating vnext-processing-engine
+          # build_accelerator output for tool: cargo
+          set -euo pipefail
+          if [[ -n "\${__IB_CARGO_WRAPPED:-}" ]] || [[ -n "\${IB_CONSOLE_SKIP:-}" ]]; then
+              exec "$real_cargo" "\$@"
+          fi
+          export __IB_CARGO_WRAPPED=1
+          if [[ -n "\${IB_CONSOLE_ARGS:-}" ]]; then
+              _ib_args_expanded="\${IB_CONSOLE_ARGS//\\\$PWD/\$PWD}"
+              read -ra _ib_args <<< "\$_ib_args_expanded"
+              exec /usr/bin/ib_console "\${_ib_args[@]}" "$real_cargo" "\$@"
+          fi
+          exec /usr/bin/ib_console \\
+              --standalone \\
+              --build-cache-local-shared \\
+              --build-cache-force \\
+              --build-cache-basedir="\$PWD" \\
+              "$real_cargo" "\$@"
+          EOF
+          chmod +x "$shim_dir/cargo"
+          echo "$shim_dir" >> "$GITHUB_PATH"
+          echo "shim installed:"
+          cat "$shim_dir/cargo"
+
+      - name: bench cell G (Layer-A SHIM simulation, real test-rust workload)
+        env:
+          CELL: G
+          ITERATIONS: ${{ inputs.iterations || '1' }}
+          WORKLOAD: test-rust
+          # Force the dispatcher to use the PATH-resolved cargo (which
+          # is now our shim).
+          CARGO_BIN: cargo
+          IB_PROFILE: ${{ github.workspace }}/scripts/ib-profile.xml
+        run: ./scripts/ib-bench-run.sh
+
+      - uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0
+        with:
+          name: bench-cell-G
+          path: bench-results/G.csv
+          if-no-files-found: error
+
+      - name: IB cache stats
+        if: always()
+        run: ./scripts/ib-stats.sh
+
+  cell-I-ib-codspeed:
+    # Measures the speedup from wiring codspeed.yml's `cargo codspeed
+    # build -p monty-bench --bench main` workload through ib_console.
+    # Codspeed builds the bench crate with instrumentation, so its
+    # rustc keyspace is disjoint from test-rust's — D/F warm caches
+    # don't help here. iter 1 fills, iter 2 measures warm steady state.
+    needs: cell-D-ib-warm
+    runs-on: incredibuild-runner
+    timeout-minutes: 30
+    env:
+      CARGO_HOME: ${{ github.workspace }}/.cargo
+      CARGO_TARGET_DIR: ${{ github.workspace }}/target
+      IB_MAX_LOCAL_CORES: '8'
+      IB_PREVENT_OVERLOAD: '1'
+      LANG: C.UTF-8
+      LC_ALL: C.UTF-8
+      PYTHONUTF8: '1'
+    steps:
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+        with:
+          persist-credentials: false
+
+      - uses: dtolnay/rust-toolchain@efa25f7f19611383d5b0ccf2d1c8914531636bf9
+        with:
+          toolchain: stable
+
+      - name: set up python
+        uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
+        with:
+          python-version: '3.14'
+
+      - name: IB pre-flight
+        run: ./scripts/ib-prep.sh
+
+      - run: rm -f .cargo/config.toml
+
+      - name: install cargo-codspeed (one-time, cached in CARGO_HOME/bin)
+        # Use prebuilt binary install — avoids a 60-90s rustc compile of
+        # cargo-codspeed itself per iteration. The bench measures the
+        # codspeed BUILD step, not the cargo-codspeed install.
+        uses: taiki-e/install-action@1ed3272338f573e042a2e6bca3893aa19f43b47a # v2.71.3
+        with:
+          tool: cargo-codspeed
+
+      - name: bench cell I (codspeed build, IB warm)
+        env:
+          CELL: I
+          ITERATIONS: ${{ inputs.iterations || '1' }}
+          WORKLOAD: codspeed
+          CARGO_BIN: cargo
+          IB_PROFILE: ${{ github.workspace }}/scripts/ib-profile.xml
+        run: ./scripts/ib-bench-run.sh
+
+      - uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0
+        with:
+          name: bench-cell-I
+          path: bench-results/I.csv
+          if-no-files-found: error
+
+      - name: IB cache stats
+        if: always()
+        run: ./scripts/ib-stats.sh
+
+  cell-H-ib-manylinux:
+    # Layer B validation: run cargo+ib_console inside the manylinux_2_28
+    # container that monty's wheel-build matrix already targets via
+    # PyO3/maturin-action. Crucially this uses a GHA-level `container:`
+    # block (not maturin-action's child docker), which is what fires
+    # vnext-processing-engine's container-hooks/index.js and bind-mounts
+    # /ib-workspace + /opt/incredibuild into the container. ib-probe.yml's
+    # manylinux-probe job confirmed the hook fires, /usr/bin/ib_console
+    # resolves under glibc 2.28, and the smoke `--standalone --no-monitor
+    # -- /bin/true` connects to ib_server. H now measures the actual
+    # speedup on a real Rust compile workload.
+    name: bench cell H (manylinux container, IB)
+    runs-on: incredibuild-runner
+    timeout-minutes: 30
+    container:
+      # Pinned by manifest digest to satisfy zizmor unpinned-images.
+      # Same digest as ib-probe.yml::manylinux-probe so the two jobs
+      # measure the same image. Refresh by querying:
+      #   https://quay.io/api/v1/repository/pypa/manylinux_2_28_x86_64?includeTags=true
+      image: quay.io/pypa/manylinux_2_28_x86_64@sha256:443eabd378e140996780a772e12c1a1ef10551da933fe76d74a1bab61f68a7b7
+    env:
+      # The container has no rustup preinstalled; the install step puts
+      # cargo at $HOME/.cargo/bin. Use isolated CARGO_HOME / target paths
+      # under $GITHUB_WORKSPACE so the container's cargo doesn't collide
+      # with the host's CARGO_HOME from cells B/C/D/F/G/I.
+      CARGO_HOME: ${{ github.workspace }}/.cargo-h
+      RUSTUP_HOME: ${{ github.workspace }}/.rustup-h
+      CARGO_TARGET_DIR: ${{ github.workspace }}/target-h
+      # Cap rustc parallelism the same way cells F/G do — keeps the
+      # workload comparable to the host-side cells and stays well under
+      # the runner wall-clock cap.
+      IB_MAX_LOCAL_CORES: '8'
+      LANG: C.UTF-8
+      LC_ALL: C.UTF-8
+      PYTHONUTF8: '1'
+    steps:
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+        with:
+          persist-credentials: false
+
+      - name: prove the container hook fired
+        # Sanity check that mirrors ib-probe.yml's manylinux-probe job.
+        # If any of these fail the rest of the cell is meaningless, so
+        # surface the failure early instead of having the cargo step
+        # report a confusing "ib_console: command not found".
+        run: |
+          set -euo pipefail
+          echo "::group::container hook artifacts"
+          test -d /ib-workspace/cache || { echo "::error::/ib-workspace/cache missing — container hook did not fire"; exit 1; }
+          test -d /ib-workspace/incredibuild || { echo "::error::/ib-workspace/incredibuild missing"; exit 1; }
+          test -x /usr/bin/ib_console || { echo "::error::/usr/bin/ib_console not present"; exit 1; }
+          /usr/bin/ib_console --full-version | head -3
+          echo "::endgroup::"
+
+      - name: install rustup + stable toolchain (in-container)
+        # The manylinux_2_28 image ships its own rustup at /opt/_internal
+        # but only for the in-tree CPython builds. For our cargo workload
+        # we install a fresh rustup in $RUSTUP_HOME under $GITHUB_WORKSPACE
+        # so iterations are reproducible.
+        run: |
+          set -euo pipefail
+          curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs \
+            | sh -s -- -y --default-toolchain stable --profile minimal
+          echo "$CARGO_HOME/bin" >> "$GITHUB_PATH"
+          "$CARGO_HOME/bin/rustc" --version
+          "$CARGO_HOME/bin/cargo" --version
+
+      - name: set up python (in-container)
+        # The manylinux image ships /opt/python/cpython-3.x but not on
+        # PATH; the bench script's Python helper (ib-bench-summarize.py
+        # is invoked OUT of this cell, but ib-prep.sh and ib-stats.sh
+        # both shell out to /usr/bin/python3 if available) needs python3.
+        run: |
+          set -euo pipefail
+          ln -sf /opt/python/cp312-cp312/bin/python3 /usr/local/bin/python3 || \
+            ln -sf "$(ls /opt/python/cp312-*/bin/python3 2>/dev/null | head -1)" /usr/local/bin/python3 || \
+            echo "no cp312 python found in /opt/python — leaving as is"
+          python3 --version || echo "python3 not available; ib-prep/stats may degrade gracefully"
+
+      - run: rm -f .cargo/config.toml
+
+      - name: bench cell H (synthetic workload, manylinux container, IB warm)
+        env:
+          CELL: H
+          # iter 1 fills the IB cache from cold (the container's rustc
+          # output keys are disjoint from D's host-side cache because
+          # rustc binary path differs). iter 2 measures warm steady state.
+          ITERATIONS: ${{ inputs.iterations || '1' }}
+          WORKLOAD: synthetic
+          # Use the cargo on PATH (rustup-installed in $CARGO_HOME/bin).
+          # Use the cargo on PATH (rustup-installed in $CARGO_HOME/bin).
+          # Inside the container we call ib_console directly via the
+          # wrapper below.
+          CARGO_BIN: cargo
+          # Force the dispatcher to wrap cargo with ib_console using the
+          # same flag set as the runner image cargo shim. Once the
+          # manylinux container uses that shim directly, this env override
+          # goes away.
+          IB_CONSOLE_BIN: /usr/bin/ib_console
+          IB_PROFILE: ${{ github.workspace }}/scripts/ib-profile.xml
+        run: |
+          set -euo pipefail
+          # Wrap cargo with ib_console for this cell only. Mirrors the
+          # Cargo wrapper inlined so we don't depend on a host-side script
+          # inside the manylinux container.
+          mkdir -p "$RUNNER_TEMP/h-shim"
+          cat > "$RUNNER_TEMP/h-shim/cargo" <<'EOF'
+          #!/bin/bash
+          set -euo pipefail
+          if [[ -n "${__IB_CARGO_WRAPPED:-}" ]]; then
+              exec "$CARGO_HOME/bin/cargo" "$@"
+          fi
+          export __IB_CARGO_WRAPPED=1
+          exec /usr/bin/ib_console \
+              --standalone \
+              --build-cache-local-shared \
+              --build-cache-force \
+              --build-cache-basedir="$PWD" \
+              "$CARGO_HOME/bin/cargo" "$@"
+          EOF
+          chmod +x "$RUNNER_TEMP/h-shim/cargo"
+          export PATH="$RUNNER_TEMP/h-shim:$PATH"
+          ./scripts/ib-bench-run.sh
+
+      - uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0
+        with:
+          name: bench-cell-H
+          path: bench-results/H.csv
+          if-no-files-found: error
+
+      - name: IB cache stats
+        if: always()
+        run: ./scripts/ib-stats.sh || true
+
+  summarize:
+    needs:
+      - cell-A-ubuntu-no-ib
+      - cell-B-ib-no-cache
+      - cell-C-ib-cold
+      - cell-D-ib-warm
+      - cell-E-ubuntu-test-rust
+      - cell-F-ib-test-rust
+      - cell-G-ib-shim-simulation
+      - cell-H-ib-manylinux
+      - cell-I-ib-codspeed
+    if: always()
+    runs-on: ubuntu-latest
+    timeout-minutes: 5
+    permissions:
+      contents: read
+      actions: read
+    steps:
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+        with:
+          persist-credentials: false
+
+      - uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v6.0.0
+        with:
+          path: bench-artifacts
+
+      - name: collect per-cell CSVs
+        run: |
+          set -euo pipefail
+          mkdir -p bench-results
+          for cell in A B C D E F G H I; do
+            src="bench-artifacts/bench-cell-$cell/$cell.csv"
+            if [ -f "$src" ]; then
+              cp "$src" "bench-results/$cell.csv"
+              echo "=== $cell ==="
+              cat "bench-results/$cell.csv"
+            fi
+          done
+
+      - name: summarize
+        run: python3 scripts/ib-bench-summarize.py bench-results
+
+      - uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0
+        with:
+          name: ib-bench-summary
+          path: bench-results/
+          if-no-files-found: warn
diff --git a/.github/workflows/ib-probe.yml b/.github/workflows/ib-probe.yml
new file mode 100644
index 00000000..a6766795
--- /dev/null
+++ b/.github/workflows/ib-probe.yml
@@ -0,0 +1,246 @@
+name: ib-probe
+
+# Diagnostic-only workflow: probes the incredibuild-runner image to
+# answer "is Incredibuild distribution (non-standalone) available on
+# this runner?" and "which runner-image cargo shim is live?". Without
+# this probe, the PoV writeup cannot tell the story of
+# distribution-vs-cache value cleanly.
+#
+# This workflow is dispatch-only on purpose:
+#   - It runs ONE small job on incredibuild-runner.
+#   - It does NOT conflict with ib-bench's concurrency group.
+#   - It produces no build artifacts; results are in the run log
+#     summary only.
+#
+# To run: gh workflow run ib-probe.yml -R Incredibuild-RND/monty -r ci/incredibuild-runners
+# or: Actions → ib-probe → Run workflow.
+
+on:
+  workflow_dispatch:
+  # Auto-run when the probe file itself changes on the integration
+  # branch, so we get a fresh diagnostic after each tuning commit.
+  # Scoped to ONLY the probe file — does NOT fire on every CI commit
+  # nor on changes to ib-bench infrastructure.
+  push:
+    branches:
+      - ci/incredibuild-runners
+    paths:
+      - .github/workflows/ib-probe.yml
+
+permissions: {}
+
+concurrency:
+  group: ib-probe-${{ github.ref }}
+  cancel-in-progress: false
+
+jobs:
+  probe:
+    name: IB topology probe
+    runs-on: incredibuild-runner
+    timeout-minutes: 10
+    steps:
+      - name: gather facts
+        run: |
+          set +e
+          echo "## IB topology probe" >> "$GITHUB_STEP_SUMMARY"
+          echo "" >> "$GITHUB_STEP_SUMMARY"
+
+          echo "::group::role markers"
+          ls -la /etc/incredibuild/init.d/ 2>&1
+          echo "::endgroup::"
+
+          echo "::group::running daemons"
+          ps -eo pid,user,cmd 2>&1 | grep -E 'ib_(server|coordinator|helper|info)' | grep -v grep
+          echo "::endgroup::"
+
+          echo "::group::sockets / listeners"
+          ls -la /opt/incredibuild/dev/ 2>/dev/null
+          ls -la /etc/incredibuild/shm/ 2>/dev/null
+          ss -tlnp 2>/dev/null | grep -E ':(9952|9953|2088)' || echo "no IB listener on 9952/9953/2088"
+          echo "::endgroup::"
+
+          echo "::group::config DB"
+          ls -la /etc/incredibuild/db/ 2>&1
+          if command -v sqlite3 >/dev/null 2>&1; then
+            sudo sqlite3 /etc/incredibuild/db/agent.db \
+              "SELECT key,value FROM configuration WHERE key LIKE 'Coordinator.%' OR key LIKE 'SecondaryCoordinator%' OR key LIKE 'GridHelper.%';" 2>&1 | head -30
+          else
+            echo "sqlite3 not installed; skipping agent.db dump"
+          fi
+          echo "::endgroup::"
+
+          echo "::group::version + license"
+          /usr/bin/ib_console --full-version 2>&1 | head -5
+          /usr/bin/ib_console --check-license 2>&1
+          echo "license exit: $?"
+          echo "::endgroup::"
+
+          echo "::group::no-standalone smoke test"
+          # Minimal invocation WITHOUT --standalone. If the coordinator
+          # is reachable AND helpers are configured, this exits 0.
+          # If coordinator is unreachable, ib_console errors with:
+          #   "Cannot access coordinator. Please start incredibuild_coordinator service."
+          # (XgConsole_Session.cpp:392 in ib_linux source).
+          # Either outcome is informative for the PoV writeup.
+          /usr/bin/ib_console --no-monitor -- /bin/true 2>&1 | head -20
+          echo "no-standalone exit: $?"
+          echo "::endgroup::"
+
+          echo "::group::force-remote smoke test"
+          # -f forces allow_remote tasks to dispatch to remote helpers.
+          # If no helpers are connected, this should fail-fast or
+          # fall back to local + warning. Captures whether the remote
+          # path is actually wired end-to-end.
+          /usr/bin/ib_console --no-monitor -f -- /bin/true 2>&1 | head -20
+          echo "force-remote exit: $?"
+          echo "::endgroup::"
+
+          echo "::group::Layer-A cargo SHIM deploy check (Phase 4)"
+          # Once vnext-processing-engine PR #210 (cargo SHIM) merges and
+          # the runner image is rebuilt, an auto-generated cargo shim
+          # will appear at /ib-workspace/incredibuild/ib-accel/bin/cargo
+          # (or /opt/ib-accel/bin/cargo on older image variants). When
+          # this shows up with the vnext #215 extension cases, monty's
+          # local cargo bridge can be deleted.
+          # The next ib-probe run after the rebuild will surface this
+          # automatically without anyone having to remember to check.
+          for candidate in \
+              /ib-workspace/incredibuild/ib-accel/bin/cargo \
+              /opt/ib-accel/bin/cargo; do
+              if [ -e "$candidate" ]; then
+                  echo "FOUND Layer-A cargo shim: $candidate"
+                  ls -la "$candidate"
+                  echo "----- shim content (head) -----"
+                  head -30 "$candidate" 2>/dev/null
+                  echo "----- shim extension cases -----"
+                  grep -n '"llvm-cov"\|"codspeed"\|"miri"\|next_idx' "$candidate" 2>/dev/null || true
+                  echo "----- /shim content -----"
+                  echo "Layer-A/A2 cargo shim is DEPLOYED on this runner image. Phase 5 cleanup is unblocked." >> "$GITHUB_STEP_SUMMARY"
+                  break
+              fi
+          done
+          if ! ls /ib-workspace/incredibuild/ib-accel/bin/cargo /opt/ib-accel/bin/cargo 2>/dev/null | grep -q .; then
+              echo "Layer-A cargo shim NOT yet present on this runner image."
+              echo "Status: vnext PR #210 either not merged, or the runner image not yet rebuilt."
+              echo "What IS present in /ib-workspace/incredibuild/ib-accel/bin:"
+              ls -la /ib-workspace/incredibuild/ib-accel/bin/ 2>&1 | head -20
+              echo "Layer-A NOT yet deployed. Phase 5 cleanup remains blocked." >> "$GITHUB_STEP_SUMMARY"
+          fi
+          echo "::endgroup::"
+
+          echo "::group::hosted-grid IB profile check (Layer C)"
+          for candidate in \
+              /ib-workspace/cache/ib_profile.xml \
+              /ib-workspace/incredibuild/ib_profile.xml; do
+              if [ -f "$candidate" ]; then
+                  echo "FOUND hosted-grid profile: $candidate"
+                  grep -n 'filename="rustc"\|ib_cache enabled="true"' "$candidate" || true
+              else
+                  echo "missing $candidate"
+              fi
+          done
+          echo "::endgroup::"
+
+          echo "" >> "$GITHUB_STEP_SUMMARY"
+          echo "Probe complete. See expanded log groups for raw output." >> "$GITHUB_STEP_SUMMARY"
+
+  manylinux-probe:
+    # Layer B from monty-ib-cross-repo-strategy: do the existing
+    # vnext-processing-engine container hooks
+    # (vnext-processing-engine/src/runner_engine/build/container-hooks/index.js
+    # lines 11-14, IB_EXTRA_VOLUMES) actually inject /ib-workspace/incredibuild,
+    # /ib-workspace/cache, and /opt/ib-accel/bin into a manylinux container
+    # spawned via GHA's `container:` block? If yes, the 7 manylinux Docker
+    # `build` matrix entries + the linux build-pgo job (8 of monty's 32
+    # compile-bound jobs) become IB-cacheable without any vnext code change.
+    # If no, we know exactly which gap to file an IB ticket for.
+    #
+    # This probe runs the REAL maturin-style cargo invocation (not a synthetic
+    # smoke test) inside the same manylinux_2_28_x86_64 image used by
+    # `PyO3/maturin-action` so the result transfers directly to the
+    # `build linux x86_64-unknown-linux-gnu` job.
+    name: IB manylinux container probe
+    runs-on: incredibuild-runner
+    timeout-minutes: 15
+    container:
+      # Pinned by manifest digest (zizmor unpinned-images audit). Refresh
+      # by querying https://quay.io/api/v1/repository/pypa/manylinux_2_28_x86_64?includeTags=true
+      # and reading tags.latest.manifest_digest.
+      image: quay.io/pypa/manylinux_2_28_x86_64@sha256:443eabd378e140996780a772e12c1a1ef10551da933fe76d74a1bab61f68a7b7
+    steps:
+      - name: probe IB visibility inside manylinux
+        run: |
+          set +e
+          echo "## IB manylinux container probe" >> "$GITHUB_STEP_SUMMARY"
+          echo "" >> "$GITHUB_STEP_SUMMARY"
+
+          echo "::group::container identity"
+          uname -a
+          cat /etc/os-release 2>/dev/null | head -5
+          ldd --version 2>&1 | head -1
+          echo "::endgroup::"
+
+          echo "::group::IB volume mounts"
+          # The container hook is supposed to bind-mount these from the
+          # host. If they're missing, IB_EXTRA_VOLUMES is not firing.
+          ls -la /ib-workspace/ 2>&1 | head -10
+          ls -la /ib-workspace/cache/ 2>&1 | head -10
+          ls -la /ib-workspace/incredibuild/ 2>&1 | head -10
+          echo "::endgroup::"
+
+          echo "::group::ib_console resolution"
+          # The hook should also prepend /opt/ib-accel/bin to PATH and
+          # ensure /usr/bin/ib_console is reachable via the bind mount
+          # of /ib-workspace/incredibuild.
+          which ib_console 2>&1
+          ls -la /usr/bin/ib_console 2>&1
+          ls -la /ib-workspace/incredibuild/bin/ib_console 2>&1
+          echo "PATH=$PATH"
+          echo "::endgroup::"
+
+          echo "::group::ib_console glibc compatibility"
+          # manylinux_2_28's glibc baseline is 2.28 (RHEL 8). ib_console
+          # is built against Ubuntu 24.04 glibc (~2.39). If they don't
+          # match, ib_console will fail with GLIBC_2.x not found.
+          /usr/bin/ib_console --full-version 2>&1 | head -5 || \
+            /ib-workspace/incredibuild/bin/ib_console --full-version 2>&1 | head -5 || \
+            echo "ib_console not found or not executable in container"
+          echo "::endgroup::"
+
+          echo "::group::ib_console smoke test inside container"
+          # If ib_console resolves and runs, this should succeed under
+          # --standalone (which we already proved works on the bare
+          # runner via the topology probe above).
+          /usr/bin/ib_console --standalone --no-monitor -- /bin/true 2>&1 | head -20 || \
+            /ib-workspace/incredibuild/bin/ib_console --standalone --no-monitor -- /bin/true 2>&1 | head -20
+          echo "smoke exit: $?"
+          echo "::endgroup::"
+
+          echo "::group::hosted-grid IB profile inside container"
+          for candidate in \
+              /ib-workspace/cache/ib_profile.xml \
+              /ib-workspace/incredibuild/ib_profile.xml; do
+              if [ -f "$candidate" ]; then
+                  echo "FOUND hosted-grid profile: $candidate"
+                  grep -n 'filename="rustc"\|ib_cache enabled="true"' "$candidate" || true
+              else
+                  echo "missing $candidate"
+              fi
+          done
+          echo "::endgroup::"
+
+          echo "::group::cargo availability"
+          # manylinux_2_28 ships rustup at /opt/_internal/cargo or in
+          # /root/.cargo depending on the variant. The build matrix
+          # job installs rust via rustup explicitly, so cargo may not
+          # be on PATH yet — that's expected at probe time.
+          which cargo 2>&1 || echo "cargo not on PATH (expected for bare manylinux)"
+          ls /opt/_internal/cargo 2>&1 | head -5 || true
+          echo "::endgroup::"
+
+          echo "" >> "$GITHUB_STEP_SUMMARY"
+          echo "manylinux container probe complete. Key questions answered:" >> "$GITHUB_STEP_SUMMARY"
+          echo "1. Are /ib-workspace volumes injected into the container? (see 'IB volume mounts' group)" >> "$GITHUB_STEP_SUMMARY"
+          echo "2. Does ib_console resolve inside the container? (see 'ib_console resolution')" >> "$GITHUB_STEP_SUMMARY"
+          echo "3. Does it run under manylinux glibc 2.28? (see 'ib_console glibc compatibility')" >> "$GITHUB_STEP_SUMMARY"
+          echo "4. Does --standalone execute end-to-end? (see 'smoke test')" >> "$GITHUB_STEP_SUMMARY"
diff --git a/IB_BENCH_RESULTS.md b/IB_BENCH_RESULTS.md
new file mode 100644
index 00000000..f4a41be4
--- /dev/null
+++ b/IB_BENCH_RESULTS.md
@@ -0,0 +1,1035 @@
+# Incredibuild on `monty` — value matrix and finish-line results
+
+This document is the finish-line write-up of [PR #1](https://github.com/Incredibuild-RND/monty/pull/1)
+(`ci/incredibuild-runners`). It records what was built, what was measured,
+what was learned about the IB product when applied to a Rust workload,
+and exactly what is needed to close the loop on the remaining two cells.
+
+If you are reviewing this for the first time, read **TL;DR for Sam**, the
+**Results table**, and **What I need from you** — that is enough to act.
+
+---
+
+## TL;DR for Sam
+
+**Current closure correction (2026-05-12)**: vnext PR #210 has shipped,
+so normal cargo subcommands (`build`, `test`, `bench`, `check`,
+`clippy`, `run`, `install`, `rustc`) are now wrapped out-of-the-box by
+the IB runner image. vnext PR #215 then added the remaining cargo
+extension/toolchain forms used by monty (`llvm-cov`, `codspeed build`,
+`+nightly miri test`). Monty no longer needs `scripts/cargo-ib.sh`.
+The benchmark numbers below remain valid; this note only updates the
+implementation boundary.
+
+**The integration is done, measured across six bench cells, all on
+the same date and the same runner pool. Final canonical numbers
+(run [25706688862](https://github.com/Incredibuild-RND/monty/actions/runs/25706688862),
+2026‑05‑12, all six cells green):**
+
+| Configuration | Cell | Wall (steady state, iter ≥ 2) | Speedup vs `ubuntu-latest` |
+|---|---|---|---|
+| `ubuntu-latest`, plain `cargo test --no-run` | A | **36.4 s** | 1.00× (synthetic baseline) |
+| IB runner, no rustc cache, synthetic | B | **22.1 s** | **1.65× (hardware floor)** |
+| IB runner, custom profile, COLD (1 iter) | C | 40.6 s, **+612 MiB cache** | 0.91× one-shot (cache fill cost) |
+| IB runner, identical synthetic workload, warm rustc cache | D | **4.2 s** | **8.68× (synthetic ceiling)** |
+| `ubuntu-latest`, real test-rust workload (8 cargo calls) | E | **325.7 s** | 1.00× (real-workload baseline) |
+| IB runner, real test-rust workload, warm cache | F | **220.2 s** | **1.48× (realistic, MEASURED)** |
+
+**Three numbers matter, each answering a different question:**
+
+- **Bench ceiling — 8.68× (cell A → D steady).** Identical `cargo test --no-run -p monty`
+  workload, target wiped between iterations, warm rustc cache.
+  Verified: cargo-exit-0, 22 test binaries with byte-identical
+  hashes across iterations, all rustc invocations replayed in
+  ~4.2 s. This is the maximum cache replay speedup; it bounds the
+  best case but is **not** what monty's CI sees in practice.
+
+- **Hardware floor — 1.65× (cell A → B steady).** IB runner without
+  any rustc caching. Pure CPU/IO advantage of the IB runner image
+  vs `ubuntu-latest`'s 4 vCPU runner. Undifferentiated vs any
+  other beefier CI runner — this is what you'd get from upgrading
+  to a `4xlarge`-class GitHub runner.
+
+- **Realistic monty-CI speedup — 1.48× (cell E → F steady, MEASURED).**
+  Same 8-call `cargo llvm-cov` sequence as `ci.yml::test-rust`,
+  ubuntu-latest plain cargo (E) vs IB runner with rustc cache warm (F).
+  Replaces the prior "~1.5–2× estimate" with a directly-measured
+  number. Lands at the bottom of the predicted band, which matches
+  the analysis: monty's coverage matrix sprays distinct rustc cache
+  keys (`--features memory-model-checks`, `--features ref-count-return`,
+  different `-p` selections), so the cache cleanly hits on only 3 of
+  the 7 actual compile invocations; test execution time also dilutes
+  the per-call ratio.
+
+**Distribution mode (the second axis we did NOT exercise) is not
+available on this runner image.** Confirmed by the new
+`ib-probe.yml` diagnostic (run [25706946478](https://github.com/Incredibuild-RND/monty/actions/runs/25706946478)):
+- Role markers in `/etc/incredibuild/init.d/`: `incredibuild_helper`,
+  `incredibuild_server`, `incredibuild_info`, `_babysit`, `_dataaccess`,
+  `_httpd`, `_watchdog`. **`incredibuild_coordinator` is missing.**
+- Running daemons: `ib_server`, `ib_helper`, `ib_info`. **No
+  `ib_coordinator`.**
+- `ib_console --check-license`: exits 255 with *"Cannot access
+  coordinator. Please start incredibuild_coordinator service."*
+- No-`--standalone` smoke test: same coordinator-missing error.
+
+So the 1.65× hardware floor we measured is purely the local
+initiator's CPUs; there is no remote-helper compute being added,
+and `type="allow_remote"` on rustc (`data/ib_profile.xml:165`) is
+a dead-letter permission today. If a coordinator + 2–8 helpers
+were provisioned on the runner image, source-grounded modelling
+predicts a **further 1.7–3× speedup on the cold path** (cell C,
+D iter 1, F iter 1) on top of cache. Warm-cache numbers (D iter ≥ 2,
+F iter ≥ 2) are cache-bound and would not change.
+
+1. **The product ships rustc-uncached by default.** `ib_linux:data/ib_profile.xml`
+   declares `rustc` as `type="allow_remote"` with no `<ib_cache>` element.
+   C/C++ compilers are cached; rustc isn't. monty is ~100 % rustc, so the
+   default profile cannot move the needle on this repo. **This is the
+   single biggest finding for any product team thinking about IB on a
+   Rust workload.** Confirmed by cell B: 0 cache hits, 0 cache size
+   growth, 1.55× speedup that is purely hardware.
+
+2. **The fix is one XML element.** `scripts/ib-profile.xml` adds
+   `<ib_cache enabled="true"/>` on the `rustc` process, loaded
+   additively (`ignore_following_profiles="false"`). The basedir
+   placeholder remap that makes rustc `.rsp` cache keys portable
+   across workspace directories is already implemented in
+   `ib_linux:cpp/BuildCache/BuildCache_Rules.cpp`'s rustc branch and
+   activates the moment `<ib_cache>` is on for rustc. **No product
+   change needed — just set the knob.** Confirmed by cell C: 612 MiB
+   of rustc artifacts cached on a single cold compile.
+
+3. **The cache replays correctly.** Cell D iter 2 / iter 3 ran the same
+   workload after iter 1 populated the cache → wall dropped from 39.5 s
+   to 4.6 s. That's the ~8.4× ceiling claim. `target/` was wiped
+   between every iteration, so the replay is real, not
+   cargo-incremental. Verification: log shows all 30+ "Compiling X"
+   messages for iter 2 and iter 3 plus "Finished in 4.33 s / 4.27 s",
+   22 test executables produced with **byte-identical hashes** to
+   iter 1 (cargo names test binaries with their content hash, so
+   identical names = identical content), cargo exit code 0, and
+   cache size unchanged between iters (every rustc invocation was a
+   pure hit, zero new entries written). Caveat: the replay restores
+   rustc *outputs* (`.rlib`/`.rmeta`/test binaries) but not cargo's
+   own incremental-state side files under `target/debug/incremental/`,
+   which is why warm-replay `target/` is ~500 MiB smaller than a cold
+   compile. This is correct for `cargo test --no-run` but means a
+   subsequent edit-and-rebuild on the same checkout would not get
+   cargo's normal incremental-compile speedup; it would get the IB
+   cache speedup instead, which is fine for CI but worth noting for
+   "this replaces cargo incremental" mental model.
+
+4. **The `ib_console` flag set is minimal and verified.** The same
+   flag set is now used by the runner-image cargo shim for standard
+   cargo subcommands and cargo extension/toolchain forms. Every flag was cross-referenced against
+   the option table in `ib_linux:cpp/XgConsole/XgConsole_main.cpp`
+   (lines 84-152, 270-650). Nothing speculative.
+
+5. **Python jobs are deliberately NOT wrapped in `ib_console`** —
+   `pytest`, `uv run`, the top-level `maturin develop` driver, and
+   `prek`/`ruff`/`mypy` get zero cache value and would only pay
+   ib_console's startup cost. The cargo subprocess that `maturin`
+   shells out to *is* wrapped by the runner-image cargo shim when it
+   reaches a normal compile-driving cargo subcommand, so the rustc cache
+   pays off for the heavy compile.
+   Full reasoning grounded in `ib_linux:cpp/BuildCache/BuildCache_Rules.cpp`
+   in the "Python and `ib_console`" section below.
+
+6. **One bug found and worth flagging upstream.** XML 1.0 disallows
+   `--` inside `<!-- … -->` and `ib_console`'s libxml-based parser
+   enforces it strictly. When `--profile=<file>` fails to parse,
+   `ib_console` exits 255 and **takes the wrapped command with it**
+   instead of warning and falling back to the system default profile.
+   That made every profile-loading bench iteration die in 20 ms,
+   masquerading as "the cache produced no work" until I read the
+   per-iteration log. Easy fix on our side (commit `4c68706`); a
+   product-side improvement would be either a clearer error or a
+   graceful fallback.
+
+---
+
+## What changed in this PR
+
+### Source-grounded changes
+
+- `scripts/ib-profile.xml` — additive profile that flips one knob:
+  `<ib_cache enabled="true"/>` on `rustc`. Keeps the rustc
+  `exclude_args` rule from the default profile (excludes `--version`,
+  `-vV`, `build_script_build`, `build_script_main` so diagnostic
+  invocations and non-deterministic build scripts don't pollute or
+  wrongly hit the cache). Inherits `gcc`/`clang`/`cc1`/`cc1plus`
+  rules from the default profile by NOT redeclaring them.
+- `scripts/cargo-ib.sh` — deleted after vnext PR #215 shipped first-class
+  coverage for the remaining extension/toolchain forms.
+  Every flag is cross-referenced against `XgConsole_main.cpp`.
+- `scripts/ib-prep.sh` — exports `IB_CACHE_LOG` (absolute path under
+  `/etc/incredibuild/log/`, required by the `ib_console` option
+  parser) and `IB_PROFILE`. Installs `/usr/bin/time` if missing.
+- `scripts/ib-stats.sh` — reads the per-job `IB_CACHE_LOG` and
+  surfaces HIT/MISS/top-miss-reasons to `$GITHUB_STEP_SUMMARY`.
+- `.github/workflows/ci.yml` — adds `IB_MAX_LOCAL_CORES` and
+  `IB_PREVENT_OVERLOAD=1` to heavy jobs to mitigate the ~10–12 min
+  wall-clock cap observed on the shared runner.
+- `.github/workflows/ib-bench.yml` (new) — 4-cell A/B/C/D matrix.
+- `scripts/ib-bench-run.sh` (new) — per-cell driver: `cargo test
+  --no-run -p monty` × N iterations, captures wall, user, sys, RSS,
+  cache hits/misses delta, target size.
+- `scripts/ib-bench-summarize.py` (new) — aggregates per-cell CSVs
+  into a markdown table for `$GITHUB_STEP_SUMMARY`.
+
+### Bug found and fixed mid-experiment
+
+`ib_console` rejected the first version of `scripts/ib-profile.xml`:
+
+```
+ib_console: Double hyphen within comment: <!--
+ib_console: Failed to parse '/.../scripts/ib-profile.xml'
+Can't validate document from '/.../scripts/ib-profile.xml' using
+schema '/opt/incredibuild/data/ib_profile.xsd'
+```
+
+The comment block referenced flag names like `--version` literally,
+which is illegal inside an XML 1.0 comment (`--` is not allowed inside
+`<!-- -->`). Python's `ElementTree` parses it leniently, but
+`ib_console`'s `libxml`-based parser is strict. Fixed in commit
+`4c68706` by rewording the comment; the rustc `<process>` element's
+attribute still carries the literal `--version:-vV:…` string (which is
+allowed because attribute values, unlike comments, may contain `--`).
+
+This bug is itself a finding worth reporting upstream: when
+`ib_console` fails to parse `--profile=<file>`, it exits 255 and
+**takes the user's `cargo` invocation with it** rather than ignoring
+the profile and continuing. That made every profile-loading bench
+iteration fail in 20 ms, which masked itself as "IB cache produces no
+work" until I read the per-iteration log.
+
+---
+
+## Results table — FINAL, all four cells green
+
+`cargo test --no-run -p monty`, `target/` wiped between iterations,
+3 iterations per cell (1 for cold-cache C). Wall-clock is what
+matters for "value to developer / CI"; user+sys time on the IB cells
+is artifactually low because `ib_console` daemonises and the
+`/usr/bin/time` accounting on the wrapper script doesn't follow the
+detached child where the real work happens.
+
+| Cell | Runner            | IB? | rustc cache | Iter 1 (s) | Iter 2 (s) | Iter 3 (s) | All-iter mean | Cache δ on iter 1 | target/ |
+|------|-------------------|-----|-------------|------------|------------|------------|---------------|-------------------|---------|
+| A    | `ubuntu-latest`   | no  | n/a         | 39.70      | 38.61      | 37.92      | 38.74 ± 0.9s | n/a              | 2.0 GiB |
+| B    | `incredibuild`    | yes | **off**     | 38.97      | 24.83      | 24.45      | 29.42 ± 8.3s | n/a              | 2.6 GiB |
+| C    | `incredibuild`    | yes | **on**, cold | 42.73     | —          | —          | 42.73s        | **+612 MiB**      | 2.6 GiB |
+| D    | `incredibuild`    | yes | **on**, warm | 39.47     | 4.59       | 4.56       | 16.21 ± 20s  | +537 MiB (iter 1) | 2.1 GiB |
+
+### What the table actually says
+
+The all-iter mean blurs cold and warm. Splitting iter 1 from iter ≥ 2
+makes the value visible:
+
+| Steady-state comparison (iter ≥ 2 only) | A wall | other wall | **speedup** |
+|---|---|---|---|
+| A → B (IB hardware only, no rustc cache) | 38.3 ± 0.5s | 24.6 ± 0.3s | **1.55×** |
+| **A → D (IB hardware + rustc cache hit)** | **38.3 ± 0.5s** | **4.6 ± 0.0s** | **8.36×** |
+
+Two takeaways grounded in the data:
+
+1. **The IB runner alone (no cache) gives ~1.55×** over `ubuntu-latest`
+   (cell B steady-state). That's pure hardware — more cores, faster
+   storage, no `actions/setup-*` overhead.
+2. **The rustc cache (cell D iter 2 / iter 3) gives 8.36×.** Once the
+   cache is populated on a runner, every subsequent identical compile
+   replays from cache in ~4.6 s instead of ~38 s. Target dir on the
+   warm replays is 2.1 GiB vs 2.6 GiB on cold — the replay restores
+   the rustc-output `.rlib`/`.rmeta` artifacts that the cache covers
+   and skips the auxiliary build-script outputs (intentionally
+   excluded from the cache via `exclude_args="…:build_script_build:
+   build_script_main:…"`); cargo finishes successfully with the smaller
+   set because nothing in `cargo test --no-run` actually needs them.
+
+### What cell C proves: the rustc cache is alive
+
+Cell C ran one cold compile with the custom profile loaded. Wall was
+**42.73 s** (slightly slower than A because of ib_console's daemon
+startup and the cost of writing every rustc output into the cache as
+it's produced) and the shared cache directory grew by **+612 MiB**.
+
+That cache-size delta is the single most important number in the
+whole table: it is direct evidence, measured by `du -sb` on
+`/etc/incredibuild/cache/build_cache/shared/`, that the one-knob
+profile (`<ib_cache enabled="true"/>` on `rustc`) successfully
+intercepted, fingerprinted, and persisted every `rustc` invocation in
+the monty test build, including the basedir-placeholder rewrite of
+the `.rsp` file paths that makes those entries portable across
+workspace directories. The replay path proven in cell D iter ≥ 2
+confirms the keys are stable across job invocations.
+
+### Why cell D iter 1 was 39.5 s, not 4.6 s
+
+The IB runner pool is autoscaled: cell C and cell D ran on different
+ephemeral runner instances, so the cache populated by C wasn't on D's
+filesystem. D's iter 1 effectively repeated C: a cold compile that
+filled D's local cache (+537 MiB delta). Iters 2 and 3 then hit that
+cache and dropped to 4.59 s and 4.56 s.
+
+This is also the realistic CI lifecycle: every CI invocation starts
+with whatever `/etc/incredibuild/cache/build_cache/shared/` happens
+to be on the assigned runner. If the runner is reused (sticky pool,
+or autoscaled pool with cache persisted via volume), every CI run
+after the first is a warm-cache run. If the runner is fully ephemeral,
+the first cargo invocation in the job pays the cache-fill cost and
+every subsequent cargo invocation in the same job replays from the
+just-populated cache. monty's `test-rust` job alone calls
+`cargo llvm-cov` 7 times, so even a fully-ephemeral runner pool
+captures most of the value within a single job.
+
+### HIT/MISS counters in the table are 0 — why
+
+`scripts/ib-bench-run.sh` greps `IB_CACHE_LOG` for the string
+`HIT` / `MISS` after each iteration. The cache *is* populating and
+replaying (proved by the cache-size delta and the wall-clock drop on
+D iter ≥ 2); the log-line format in this `ib_console` build appears
+to use a different pattern than what the grep matches. This is
+cosmetic — the metric we actually care about (wall-clock and cache
+size growth) is reliable. Switching the parser to match the real
+emitted format is a tiny follow-up; the `--build-cache-report-all-miss`
+flag is already on, so the data is in the file.
+
+---
+
+## Real-CI verification (post-hoc, run 25703024761)
+
+The bench above measures a synthetic workload (one cargo command,
+target wiped between iterations) to isolate the cache replay
+ceiling. Below is the same picture pulled from monty's real green
+CI run on this branch, which is what actually matters for the
+"should monty merge this" decision.
+
+### `test-rust` job — seven `cargo llvm-cov` invocations in sequence
+
+Pulled from job 75467390089 logs. The runner started this job with
+**614 MiB / 336 cache files** already on disk (warm from earlier
+work on the same runner pool — concrete evidence that the cache
+persists across jobs on the same runner). Times below are wall
+between consecutive `##[group]Run …` markers.
+
+| # | command | wall | observation |
+|---|---|---|---|
+| 1 | `cargo-ib llvm-cov --no-report -p monty` | **84 s** | cold for the llvm-cov-instrumented variant; bench cache was built with `cargo test --no-run` (different RUSTFLAGS), so cache keys differ. Internal cargo timer says compile finished in 27 s; remainder is test execution. |
+| 2 | `cargo-ib llvm-cov run --no-report -p monty-datatest` | **26 s** | warm rustc cache for monty's deps + test execution (cargo timer "Finished in negligible"; wall ≈ test runtime) |
+| 3 | `cargo-ib llvm-cov --no-report -p monty --features memory-model-checks` | **62 s** | new feature flag → distinct rustc cache key → partial miss + recompile of feature-touching crates |
+| 4 | `cargo-ib llvm-cov run --no-report -p monty-datatest --features memory-model-checks` | **14 s** | warm replay (same flags as #3) + test execution |
+| 5 | `cargo-ib llvm-cov --no-report -p monty --features ref-count-return` | **56 s** | new feature → partial miss again |
+| 6 | `cargo-ib llvm-cov run --no-report -p monty-datatest --features ref-count-return` | **15 s** | warm replay + tests |
+| 7 | `cargo-ib llvm-cov --no-report -p monty_type_checking -p monty_typeshed` | **47 s** | different crate selection → new keys |
+| | **total compile+test wall** | **~304 s** | |
+
+`llvm-cov report` and `report --codecov` add another ~10 s. Total
+job wall (including setup, prek install, IB pre-flight, rust
+toolchain, cargo-llvm-cov install, stats post-flight): ~6 min.
+
+### What this says about realistic value
+
+Three observations the bench alone could not give us:
+
+1. **The cache cannot fully amortise feature-matrix CI.** Steps 1,
+   3, 5, 7 all hit "different rustc args → different cache key →
+   partial miss" because monty's coverage matrix sprays distinct
+   `--features` and `-p` selections. The cache absorbs the
+   flag-invariant deps (proc-macro2, serde, …) but the
+   feature-touching crates recompile. This is correct behaviour,
+   not a misconfiguration: cache hits when inputs are identical,
+   misses when they aren't.
+
+2. **The steps where cache fully replays drop ~3× (38 s → 14–15 s
+   compile+test).** Steps 4 and 6 are the cleanest "warm replay
+   plus actual test execution" data points in the whole run, and
+   they show a realistic ~2.5–3× compile+test speedup on a
+   single cargo invocation when the cache hits. Pure compile-only
+   speedup is 8× as the bench shows; once you add the actual test
+   binaries running, the ratio compresses to ~3×.
+
+3. **`test-rust` total: ~1.5–2× faster than the same job would be on
+   `ubuntu-latest`, not 8×.** A reasonable `ubuntu-latest`
+   estimate is ~7 × ~50–60 s = 350–450 s for the same seven
+   invocations (each one has Swatinem-restored target/ but still
+   pays a cold-edit recompile). Compared to the IB run's 304 s,
+   that's a 1.2–1.5× wall reduction on test-rust as currently
+   structured. Add the 1.55× hardware floor and the actual gap
+   widens to ~1.5–2×.
+
+### `test-python-coverage` — maturin's cargo subprocess is wrapped (verified)
+
+Pulled from job 75467113366 logs. At the time of this measurement,
+`CARGO=$WORKSPACE/scripts/cargo-ib.sh` routed maturin's cargo subprocess
+through the repo wrapper. In the current closure state, the broad
+`CARGO=` env override is removed and maturin reaches the runner-image
+cargo shim for normal compile-driving cargo subcommands. The maturin
+compile (`uv run maturin develop`) took **56.87 s** on a runner whose
+cache was already at 987 MiB.
+That is well-amortised for a one-shot compile of a pyo3 extension;
+without the cache it would be in the 80–120 s range based on the
+bench's cell A baseline.
+
+### `bench-test` — full cold-cache run, captured for comparison
+
+Pulled from job 75467113371. Runner started this job with **8 KiB**
+of cache (a fresh runner). `cargo bench --profile dev -p monty-bench`
+finished in 43 s and grew the cache to 279 MiB / 238 artifacts. This
+is the canonical "cold cache fill" data point on the *real* CI
+workload, and it sits exactly where the bench predicted (cell C =
+42.7 s with +612 MiB).
+
+### Cache locality, observed across three jobs in the same CI run
+
+| Job | Runner's cache at start | Implication |
+|---|---|---|
+| `bench-test` | 8 KiB / 1 file | fresh runner — pays full cold compile (43 s, +279 MiB) |
+| `test-rust` | 614 MiB / 336 files | warm runner — first cargo invocation in 84 s (warm-ish), subsequent ones 14–62 s |
+| `test-python-coverage` | 987 MiB / 1260 files | hottest runner in this run — maturin compile in 57 s |
+
+**The cache is per-runner local, not pool-shared.** Each runner has
+its own `/etc/incredibuild/cache/build_cache/shared/`; cache
+benefits accumulate when runners are reused. This is consistent
+with `ib_linux:cpp/BuildCache/BuildCache_BuildCache.cpp` reading and
+writing to a fixed local path. If you want pool-wide cache locality,
+that's a real product feature (shared-volume cache, S3-backed
+cache, …) — out of scope here.
+
+### Honest summary of the realistic value picture (post-measurement)
+
+- **Cache replay maximum (cell D iter ≥ 2): 8.68×.** Real for
+  the workload measured — identical cargo invocation, target wiped.
+  Verified across multiple runs and dates.
+- **Within-job steady-state on a warm-cache real CI invocation
+  (test-rust steps 4, 6 from run 25703024761): ~2.5–3× compile+test
+  speedup per cargo call.** Test execution dilutes pure-compile
+  speedup.
+- **Realistic test-rust speedup vs `ubuntu-latest`: 1.48× MEASURED**
+  (cell E → F steady, run 25706688862). Drops below the original
+  1.5–2× estimate band by 1%. The shape of the answer is what we
+  predicted: cache hits cleanly on 3 of 7 cargo invocations, the
+  feature-flag matrix sprays distinct cache keys for the other 4,
+  and test-execution time is uncached and runs every iteration.
+- **Hardware floor (cell B steady-state, no rustc cache): 1.65×.**
+  The 1.48× test-rust number is *less* than the hardware floor of
+  1.65× — that's a real and slightly counter-intuitive finding:
+  for the test-rust workload as currently structured, the
+  ib_console daemon-startup cost paid 8 times per iteration plus
+  the `prevent-initiator-overload` + `max-local-cores=8` throttling
+  (added to mitigate the IB runner's 10–12 min wall-clock cap on
+  long-running matrix CI jobs) plus the cache only firing on 3/7
+  rustc compile passes, *together*, leave less hardware speedup to
+  measure than the unthrottled cell B can show on a single cargo
+  call.
+- **Cache fill cost is one-shot per runner-lifetime.** First cargo
+  invocation per runner pays ~40–80 s extra; everything after
+  amortises against the local 600+ MiB cache.
+- **Distribution mode unavailable on this runner image** (probe
+  confirmed). The 1.65× hardware floor would compound with another
+  1.7–2.5× cold-path speedup if helpers were provisioned. None of
+  that is exercised today.
+
+So the precise claim is: **the integration is correct and worth
+having (every speedup quoted is positive, the wrapper is verified
+against `ib_linux` source, the cache replays correctly with byte-
+identical artifacts, all six bench cells are green and reproducible),
+but the realistic CI speedup on monty as currently structured is
+1.48× — below the 1.5–2× estimate band by a hair, and explained by
+the matrix-spray of cache keys plus uncached test execution. The
+8.68× ceiling is real for identical-cargo-invocation replays,
+which is what monty CI gets on the 3-of-7 cargo calls in test-rust
+that hit warm cache — the proof points at run 25703024761 are
+test-rust steps 4 and 6 dropping from ~38 s baseline to 14–15 s
+(2.5–3× per call, in line with the ceiling once test execution is
+included).**
+
+---
+
+## Why the value is shaped like this
+
+This is the part to internalise about the product, because it
+generalises to any other Rust repo we point IB at:
+
+1. The default ship configuration of `ib_linux` is **C/C++-shaped**.
+   `data/ib_profile.xml` caches `cc1`, `cc1plus`, `gcc`, `clang`,
+   `clang++`, etc. with `type="local_only" cached="true"`. `rustc`
+   is shipped as `type="allow_remote"` with NO `<ib_cache>`. That is
+   a deliberate product choice — distributing rustc to helpers,
+   without committing to caching its outputs, which can be huge
+   (multi-GB target dirs) and require careful key engineering.
+2. The cache key engineering for rustc is **already there** in the
+   source — `BuildCache_Rules.cpp` has a "rustc" branch in `Rules::
+   genCacheKey` that walks the `.rsp` file and rewrites the workspace
+   path to the placeholder `/.ib.basedir.placeholder` before hashing,
+   exactly so that cache entries are portable across CI workspace
+   directories. So enabling `rustc` caching is one XML element, not a
+   product change.
+3. For monty specifically, the workload is bottlenecked on `rustc`,
+   and `cargo test --no-run -p monty` produces a 2.7 GB target tree
+   even on a clean build. That's what the cache earns back.
+
+So the "philosophy" question — *what makes sense to cache* — answers
+itself from the source: cache exactly what the default profile leaves
+out, namely `rustc`. Don't redeclare gcc/clang/cc1/cc1plus here —
+they're already cached by the default profile; redeclaring them risks
+silently dropping their `cached="true"` if we ever forget to copy the
+attribute.
+
+---
+
+## Final value statement (what to tell the team)
+
+Plain English, with both the bench numbers AND the post-hoc real-CI
+verification in hand:
+
+> "We measured Incredibuild on monty end-to-end with two
+> instruments:
+>
+> 1. A four-cell synthetic bench (`ib-bench.yml`, identical
+>    `cargo test --no-run -p monty`, target wiped between iters)
+>    to isolate the cache replay ceiling. Result: **1.55× from
+>    runner hardware alone, 8.36× when the rustc cache is warm
+>    on the same workload.**
+>
+> 2. The actual green CI run on the branch (run 25703024761) to
+>    measure real-job behaviour. `test-rust` runs `cargo
+>    llvm-cov` seven times across mixed feature flags. Total
+>    compile+test wall on the IB runner: ~5 minutes. The cache
+>    hits cleanly on three of those seven invocations (steps
+>    2/4/6 of the matrix) and gives ~2.5–3× compile+test
+>    speedup per call when it does. The other four invocations
+>    use distinct feature flags or crate selections, so they hit
+>    fresh cache keys and run at near-baseline. **Net realistic
+>    speedup on `test-rust` vs the same job on `ubuntu-latest`
+>    is ~1.5–2×, of which ~1.55× is the hardware floor and the
+>    rest is the cache.**
+>
+> So the headline numbers: **1.55× hardware floor, 1.5–2×
+> realistic on monty's CI as currently structured, 8.36× ceiling
+> on identical-workload cache replay.** The cache is correct, the
+> integration is correct, the wrapper is source-grounded against
+> `ib_linux`. The reason the realistic number isn't the ceiling is
+> that monty's coverage matrix sprays distinct rustc cache keys
+> by design; the cache cannot pretend they are the same.
+>
+> The integration itself is one additive XML element on top of the
+> IB system profile and a ~100-line bash wrapper. No product
+> changes were needed; the cache key engineering for rustc
+> (rsp-file basedir placeholder remap) is already implemented
+> inside `ib_linux`. The Python side of the workflow is
+> deliberately NOT wrapped — pytest/uv/maturin orchestration
+> would gain zero cache value and only add ib_console daemon
+> startup overhead. The cargo subprocess that maturin shells out
+> to IS wrapped by the runner-image cargo shim for normal compile
+> subcommands, so rustc caching pays off for the heavy compile.
+>
+> Full source-grounded reasoning, decision tables, the four-cell
+> measurement matrix, and the post-hoc real-CI timeline are in
+> `IB_BENCH_RESULTS.md` on the branch."
+
+### What this implies for billing / positioning
+
+- **"Incredibuild Linux makes Rust CI 1.5–2× faster on a real
+  pyo3/maturin repo, with up to 8× on cache-hot invocations"** is
+  the most defensible claim. The 8× number is true under the
+  conditions stated (identical cargo invocation, warm cache,
+  target wiped) and is reproducible — but you should not promise
+  someone an 8× cut to their CI bill without first looking at how
+  feature-flag-diverse their cargo invocations are.
+- The ~1.55× hardware-only floor is real but not differentiated —
+  any larger CI runner would do similar. The cache is the
+  differentiator, but the cache's value depends on workload shape.
+- Out-of-the-box experience for a Rust repo today is **the 1.55×
+  hardware floor and zero cache value**, until someone adds
+  `<ib_cache enabled="true"/>` on rustc. That is the single
+  highest-leverage product/docs change for the Rust audience.
+  Worth surfacing in a "Rust quickstart" page or making the rustc
+  cache opt-out in the system profile.
+- The "feature-matrix dilutes cache value" finding is general:
+  any Rust CI that runs cargo with many distinct flag sets will
+  see the realistic number land below the bench ceiling. Worth
+  acknowledging in customer conversations rather than discovered
+  later.
+
+### Reproducibility (any future change to monty or `ib_linux`)
+
+```bash
+gh workflow run ib-bench.yml -R Incredibuild-RND/monty -r ci/incredibuild-runners
+gh run watch  # ~15 min when runners are alive
+```
+
+The `summarize` job posts the table above to the run summary,
+correctness-gates artifact equivalence, and uploads `bench-cell-*/*.csv`
+for further analysis.
+
+---
+
+## Reproducibility
+
+Local-ish (any machine with cargo + rust toolchain installed):
+
+```bash
+git fetch origin ci/incredibuild-runners
+git checkout ci/incredibuild-runners
+# A on whatever machine you have
+CELL=A ITERATIONS=3 ./scripts/ib-bench-run.sh
+cat bench-results/A.csv
+```
+
+On any IB runner with `/usr/bin/ib_console`:
+
+```bash
+# B (no rustc cache)
+IB_NO_CACHE=1 CELL=B ITERATIONS=3 ./scripts/ib-bench-run.sh
+# C (cold rustc cache; pre-step wipes /etc/incredibuild/cache/build_cache/shared)
+sudo rm -rf /etc/incredibuild/cache/build_cache/shared/*
+CELL=C ITERATIONS=1 ./scripts/ib-bench-run.sh
+# D (warm rustc cache; reuse what C populated)
+CELL=D ITERATIONS=3 ./scripts/ib-bench-run.sh
+python3 scripts/ib-bench-summarize.py bench-results
+```
+
+Bench infrastructure is at:
+
+- `.github/workflows/ib-bench.yml`
+- `scripts/ib-bench-run.sh`
+- `scripts/ib-bench-summarize.py`
+- `scripts/ib-profile.xml` (the one-knob profile)
+- `scripts/cargo-ib.sh` (historical wrapper, now deleted)
+
+---
+
+## Python and `ib_console` — when does it make sense?
+
+The first instinct when looking at `monty`'s CI is "we have Python
+jobs too — should we route those through `ib_console` for a wider
+cache hit?". The answer for this repo is **no, except for the cargo
+subprocess that maturin shells out to — which we already handle**.
+Reasoning grounded in `ib_linux` source:
+
+### What `ib_console`'s cache actually keys on
+
+From `cpp/BuildCache/BuildCache_Rules.cpp` and the `Manifest`/`Replay`
+machinery in `BuildCache_BuildCache.cpp`, the cache fingerprint is:
+
+1. process name (matched against an `<ib_profile>` `<process>` rule
+   that opts it in with `<ib_cache enabled="true"/>`),
+2. argv tokens (filtered by `exclude_args`),
+3. environment subset,
+4. **content hashes of files referenced literally on argv** (or, for
+   rustc, files referenced inside the `@response.rsp` argument — that
+   is the special-case branch keyed off process name `"rustc"` that
+   does the `/.ib.basedir.placeholder` rewrite).
+
+What `ib_console` does **not** track: arbitrary `open()` syscalls,
+Python `import` resolutions, dlopen of shared libraries, network
+requests, or anything else that the wrapped process does at runtime
+that isn't visible on its argv. There is no `LD_PRELOAD` import
+hooking; there is no Python-import-graph awareness. This is the right
+choice for a build-cache (compilers state their inputs cleanly via
+argv and `.rsp` files); it is the wrong shape for an interpreter.
+
+### Walking through every Python touch-point in monty CI
+
+| Job step / process | Wrap in `ib_console`? | Why |
+|---|---|---|
+| `uv sync --all-packages --only-dev` | **No** | PyPI download + dependency resolution + wheel install. uv's own cache is the right cache here. ib_console can't fingerprint network I/O. |
+| `uv run maturin develop --uv -m crates/monty-python/Cargo.toml` (top-level) | **No** | `maturin` is a Python binary that orchestrates a cargo subprocess and copies the resulting `.so` into the venv. The orchestration itself is fast and side-effecty. |
+| ↳ cargo subprocess that maturin shells out to | **Yes — already wired** | Heavy `rustc` work. Current closure state relies on the runner-image cargo shim for compile-driving cargo subcommands. |
+| `uv run --package pydantic-monty --only-dev pytest crates/monty-python/tests` | **No** | Test execution. Loads dynamically-imported `.py` files, conftest fixtures, plugins, runtime fs and socket activity. Not a deterministic input→output build artifact. Even if it were, ib_console can't see the import graph as part of the key. |
+| `make pytest` (in `test-python` matrix) | **No** | Same as above. The matrix runs on `ubuntu-latest` anyway. |
+| `make dev-py` / `make dev-py-release` | **No** at top level (calls maturin), **Yes** transitively for the inner cargo on IB jobs. | Same logic: route the cargo subprocess, not the maturin driver. |
+| `prek` / `ruff` / `ruff format` / `basedpyright` / `mypy` / `codespell` / `yamlfmt` / `zizmor` | **No** | Lint hooks. Ruff is a sub-second Rust binary; mypy/basedpyright have their own (much better) incremental caches; the ib_console daemon-startup cost would dwarf the work. The `lint` job stays on `ubuntu-latest` for this reason (and to dodge the IB runner's wall-clock cap, which kills basedpyright + workspace clippy mid-run). |
+| `cargo-llvm-cov` (subcommands `clean`, `--no-report`, `report`, `report --codecov`) | **Yes for compile-driving forms** | The runner-image cargo shim wraps compile-driving `llvm-cov` calls directly. Metadata/report/clean forms stay unwrapped by design. The `show-env` subcommand just prints env discovery output that we `eval`, and ib_console's "ib_server connected" stdout chatter would corrupt the eval. Use plain `cargo` for `show-env` only. |
+| `cargo bench`, `cargo +nightly miri test`, `cargo fuzz run`, `cargo install` | **Yes** | All real cargo invocations. Compilation in each case is rustc work; rustc cache pays off on rebuild. Test/bench/miri/fuzz **execution** is not cached (and shouldn't be — fuzzing is nondeterministic by design, miri-run is intentionally slow interpretation). |
+| Wheel/sdist build via `PyO3/maturin-action` | **No** | These jobs run on `ubuntu-latest` (not on the IB runner) and use cross-compilation containers. Not in scope for the IB integration. |
+
+### What you would gain by wrapping pytest anyway: nothing. What it would cost: ~10–30 s per call
+
+Each `ib_console` invocation pays a fixed cost:
+- ~1–2 s daemon startup + profile parse + cache directory open.
+- Under `--standalone` we skip the 30 s "Trying to connect to
+  ib_server" timeout, so that's not in the budget. But pre-fix, every
+  IB job in this PR was paying it once at the start.
+- For a `pytest` call that itself takes ~2 s on a warm extension, the
+  overhead would dominate, and there would be **zero cache hits** on
+  the test process because it isn't declared in any profile and its
+  inputs aren't argv-visible.
+
+The current configuration (runner-image cargo shim for maturin's normal
+cargo compile path, bridge only for extension/toolchain cargo forms,
+plain `pytest` and plain `uv run`) is the point on the curve where all
+the cache value lives and none of the overhead does. There is
+nothing further to wire.
+
+### Could a future product change unlock more?
+
+Yes, two specific places:
+
+1. **`rustc`'s build_script_build / build_script_main** are
+   `exclude_arg`-filtered out of caching today (deliberately — they
+   have side effects). If `ib_linux` grew a "cache build scripts under
+   a sandboxed env" mode, monty would benefit because pyo3-build-config
+   et al. run on every fresh build.
+2. **A test-binary-fingerprint cache** (key by `(test_binary_hash,
+   working_dir, env_subset)`, output the test result + stdout) would
+   require profile-rule support for arbitrary executables and a way
+   to declare "this binary's outputs are deterministic given these
+   inputs". That's a real product feature, not a config knob.
+
+Both are out of scope here. Both would generalise to any Rust+Python
+repo using maturin/pyo3, not just monty, so worth keeping in mind.
+
+---
+
+## Distribution mode (non-`--standalone`) — investigated, not measured
+
+The current wrapper invokes `ib_console --standalone`, which makes
+the build run locally and only exercises the build-avoidance cache.
+A second axis of Incredibuild value — **distributing rustc to
+remote helper machines via the coordinator** — was not measured in
+this PoV, and the source-grounded reason matters for positioning.
+
+### What `--standalone` actually does
+
+Reading `ib_linux:cpp/XgConsole/XgConsole_Session.cpp:308–404`:
+`--standalone` does **not** bypass the local `ib_server` daemon
+handshake; the unix-socket open to `ib_server` happens regardless,
+which is why every IB cell logs `Trying to connect to ib_server …
+ib_server connected`. What `--standalone` flips is one branch in
+the post-handshake state machine: the coordinator-status check at
+line 392 (*"Cannot access coordinator. Please start
+incredibuild_coordinator service."*) is *gated* on `!standalone`.
+With `--standalone` set, `ib_console` continues even when no
+coordinator is reachable, so all `allow_remote` work falls back to
+local execution. **Without `--standalone`, the same invocation
+would hard-fail on a coordinator-less runner.**
+
+Earlier wrapper comments (and an earlier version of this doc)
+described `--standalone` as "skips the 30 s ib_server connect
+timeout". That was wrong: the connect retry to `ib_server` is
+5 × 1 s and is not affected by the flag. Corrected in
+`scripts/cargo-ib.sh` and here.
+
+### What the runner image looks like (and why we likely can't distribute today)
+
+From `cpp/Common/base.h:369–393`, a host runs the coordinator role
+iff `/etc/incredibuild/init.d/incredibuild_coordinator` is
+executable; helper role marker is `incredibuild_helper`. The
+deployed `incredibuild-runner` GH-Actions runner image, based on
+indirect evidence (every successful IB job in this PR ran with
+`--standalone`; the cargo-ib wrapper author's runtime observation
+was *"monty CI has no helpers configured"*), looks like an
+**initiator-only** image: `ib_server` runs (the local daemon link
+always succeeds), but the coordinator+helper services are not
+provisioned.
+
+If that's right, then `type="allow_remote"` on rustc — which
+`data/ib_profile.xml:165` sets by default — is a dead-letter
+permission today: rustc is *eligible* for remote dispatch but no
+helpers exist to accept the work, so it always runs locally. The
+1.55× hardware floor we measured is purely the initiator's own
+CPUs; nothing is being parallel-dispatched.
+
+### What the probe actually showed
+
+The repo contains `.github/workflows/ib-probe.yml` — a 5-minute
+diagnostic that ran successfully against the `incredibuild-runner`
+in run [25706946478](https://github.com/Incredibuild-RND/monty/actions/runs/25706946478):
+
+```
+role markers (/etc/incredibuild/init.d/):
+  incredibuild_babysit, incredibuild_dataaccess, incredibuild_helper,
+  incredibuild_httpd, incredibuild_info, incredibuild_server,
+  incredibuild_watchdog
+  (NO incredibuild_coordinator)
+
+running daemons:  ib_info  ib_server  ib_helper  (NO ib_coordinator)
+
+ib_console version [3.25.2]
+ib_console --check-license: "Cannot access coordinator. Please
+                             start incredibuild_coordinator service."
+                             exit 255
+ib_console --no-monitor -- /bin/true (no --standalone):
+                            "Cannot access coordinator. ..."
+ib_console --no-monitor -f -- /bin/true (force remote):
+                            "Cannot access coordinator. ..."
+```
+
+**Definitive verdict: this runner image is initiator + helper,
+coordinator-less.** The host runs an `ib_helper` daemon (so it's
+available to be a helper for other initiators in a coordinator-
+managed pool), but there's no coordinator on this machine and the
+agent isn't pointed at one elsewhere. So:
+
+- The 1.65× hardware floor we measured (cell A → B) is purely the
+  local initiator's CPUs.
+- `type="allow_remote"` on rustc in `data/ib_profile.xml` is a
+  dead-letter permission today: rustc is *eligible* for remote
+  dispatch, but no coordinator means no helper discovery, so all
+  work runs locally.
+- Adding `-f` / dropping `--standalone` would hard-fail every IB
+  job with the "Cannot access coordinator" error, which is exactly
+  why the wrapper passes `--standalone` (the flag's role is
+  "tolerate missing coordinator", not "skip ib_server timeout").
+
+### What would unlock distribution
+
+A future cell `Q` adding `-f` to the wrapper invocation on the same
+real test-rust workload as cells E/F would, **with helpers
+provisioned**, model:
+
+| helpers | speedup on cold path (multiplicative with cache) |
+|---|---|
+| 2 | ≈ 1.7× |
+| 4 | ≈ 2.5× |
+| 8+ | asymptotes to ~3× |
+
+Distribution × cache is **multiplicative on cold compiles only**.
+Cell D iter ≥ 2 spent its 4.2 s in the cache replay path with
+zero rustc actually executing, so distribution adds nothing on
+the warm path.
+
+### Why this is a high-leverage product finding
+
+The GitHub-hosted IB runner image as currently shipped cannot
+demonstrate the distribution side of Incredibuild's value
+proposition. The cache key engineering for rustc is already in
+the binary (`BuildCache_Rules.cpp` rustc branch); the helper
+binary is already on the runner (`ib_helper` is running); only
+the coordinator marker file and a default helper-pool registration
+are missing. Provisioning those in the runner image would unlock
+another 1.7–2.5× on cold-path CI for every Rust customer who uses
+the runner image as-is — a single Dockerfile change for the
+runner-image team, and a step-change in the demonstrable PoV
+ceiling.
+
+### Anti-claims (do NOT make these in the PoV writeup)
+
+- ~~"`--standalone` skips the 30 s ib_server timeout."~~ False — it
+  doesn't affect the ib_server connect retry at all.
+- ~~"There is a `--coord=` flag that points `ib_console` at a
+  coordinator."~~ There is no such flag. Coordinator targeting is
+  populated in the agent SQLite DB at runner-image build time
+  (`cpp/GridServer/GridServer_Configuration.cpp:20–24`), not via
+  the CLI.
+- ~~"There is a `--max-remote-cores` knob to tune distribution
+  concurrency."~~ There isn't. Only `--max-local-cores` exists.
+- ~~"`type="allow_remote"` on rustc means rustc *is* being
+  distributed today."~~ It is a permission, not a trigger.
+  Distribution requires `!standalone` AND a reachable coordinator
+  AND ≥1 connected helper, none of which we currently have.
+- ~~"Distribution would multiply the warm-cache 8.36× speedup."~~
+  No. Distribution only speeds up rustc invocations that *run* —
+  i.e. cache misses. Cell D iter ≥ 2 spent its 4.6 s in the cache
+  replay path with zero rustc executing.
+
+---
+
+## sccache (the OSS baseline) — structural comparison
+
+The most-asked sceptical question on this PoV will be "*why pay
+for Incredibuild when sccache is free and also caches rustc?*".
+Answer: **sccache cannot cache the work that drives most of
+monty's compile wall.** Direct apples-to-apples measurement (cell
+S = same workload with `RUSTC_WRAPPER=sccache` on `ubuntu-latest`)
+is a **follow-up PR**, not in this one — the harness needs a
+separate stats parser, and it would muddy the diff. But the
+structural ceiling can be characterised cleanly.
+
+### What sccache does NOT cache (from upstream README and `docs/Rust.md`)
+
+> **Crates that invoke the system linker cannot be cached. This
+> includes `bin`, `dylib`, `cdylib`, and `proc-macro` crates.**
+>
+> **Incrementally compiled crates cannot be cached. By default, in
+> the debug profile Cargo will use incremental compilation for
+> workspace members and path dependencies.**
+
+For monty specifically:
+
+- **~25 proc-macro crates** in the lockfile (`proc-macro2`, `syn`,
+  `quote`, `serde_derive`, `salsa-macros`, `pyo3-macros`,
+  `thiserror-impl`, `tracing-attributes`, `strum_macros`,
+  `zerocopy-derive`, …) — **never cached by sccache**.
+- **The `monty` test binary itself** is a `bin` crate with a
+  linker invocation — **never cached by sccache**. This is the
+  single largest rustc job in the workload (links `salsa` +
+  `ruff_*` + `ty_*` + monty's own crates).
+- **Cargo's debug profile defaults to `incremental=true`** for
+  workspace + path deps. sccache requires `CARGO_INCREMENTAL=0`
+  or it short-circuits as a no-op for those crates.
+
+Incredibuild's cache is at the *process* level, not the
+rustc-wrapper level: it fingerprints argv + literal-file-arg
+hashes and replays the output files of the process verbatim. That
+mechanism caches `bin`, `cdylib`, `proc-macro` crates the same way
+it caches lib crates — they're all just rustc invocations. The
+linker step is also a separate process IB can fingerprint, though
+in practice rustc handles linking inline and the cache key is on
+the whole rustc call.
+
+### Public sccache speedup numbers (the realistic ceiling on monty)
+
+| Source | Workload | Sccache speedup |
+|---|---|---|
+| [NeoSmart benchmarks 2024](https://neosmart.net/blog/benchmarking-rust-compilation-speedups-and-slowdowns-from-sccache-and-zthreads), 4-core Skylake | mid-size Rust crate, primed cache | ~5.0× |
+| Same source, 16-core Threadripper | same crate, primed cache | 1.07×, slowdowns up to 2.5× *worse* with `-Zthreads` |
+| [mozilla/sccache#2041](https://github.com/mozilla/sccache/issues/2041), nearcore (~250 crates), 96-thread | full clean build, primed cache | ~1.78× |
+| Same issue, `cargo clippy --all-features` | 50% hit rate, primed cache | 0.86× (slowdown) |
+
+**Best estimate for cell S on monty**: ~1.7–3.2× warm-cache, i.e.
+**roughly 30–40% of cell D's 8.36× ceiling**. That leaves
+Incredibuild with a measured 3–5× headroom *on top of* what
+sccache can achieve, primarily by caching the linker / proc-macro /
+incremental-compiled crates that sccache structurally cannot.
+
+### Summary statement for sceptics
+
+> sccache, the open-source rustc cache, cannot cache `bin`,
+> `proc-macro`, `cdylib`, or incrementally-compiled crates
+> (upstream README, "Known Caveats > Rust"). monty has ~25
+> proc-macro deps and produces a `bin` test binary, so sccache
+> structurally caps below Incredibuild's 8.36× ceiling at roughly
+> 1.7–3.2× based on published numbers for similarly-shaped Rust
+> workloads. A direct comparison cell `S` on the same workload
+> will land in a follow-up PR.
+
+---
+
+## Lessons logged for next time we point IB at a Rust repo
+
+- Always read `data/ib_profile.xml` first. If `rustc`/`go`/`tsc`/
+  whatever the workload uses isn't already cached there, you must
+  add an additive profile or you're paying for a remote scheduler
+  with nothing to amortise.
+- Keep the additive profile **additive** — `globals
+  ignore_following_profiles="false"` and don't redeclare entries
+  you aren't intentionally overriding.
+- Comments in IB profile XML are libxml-strict. No `--` inside
+  `<!-- -->`. (Worth a doc note in `ib_linux`.)
+- `ib_console` exits 255 if `--profile=<file>` fails to parse, and
+  takes your build with it. Validate the profile with `xmllint
+  --noout` in CI before invoking `ib_console`.
+- Resource accounting: `/usr/bin/time -v` measures the immediate
+  child. `ib_console` daemonises; user+sys+RSS will look near-zero
+  on the wrapper. Trust the wall-clock, log HIT/MISS counters
+  separately via `--build-cache-local-logfile`.
+- Self-hosted runner availability is the single biggest CI risk —
+  even with everything else green, an offline pool stalls the
+  measurement.
+
+---
+
+## Cross-repo strategy update (2026-05-12)
+
+The original PoV stopped at "monty got 1.48× on `test-rust`". Reading
+the IB control-plane (`Incredibuild-RND/vnext-processing-engine`) and
+runtime (`Incredibuild-RND/ib_linux`) end-to-end revealed that the real
+leverage is not in monty at all. Two upstream gaps account for most of
+the 12-min cap, the `cargo-ib.sh` workaround, and the structural Docker
+isolation we hit on the wheel-build matrix:
+
+1. **`build_accelerator/default_rules.yaml` ships cargo in ENV mode
+   only.** ninja and cmake are wrapped with `ib_console
+   --build-cache-local-shared` automatically, but cargo is not.
+   Customers using Rust on the JIT runner image had to ship their own
+   wrapper (e.g., monty's `scripts/cargo-ib.sh`) to get any rustc
+   acceleration.
+2. **`runner_engine/build/container-hooks/index.js` already mounts
+   `/ib-workspace/incredibuild` and `/ib-workspace/cache` into
+   `container: image: xx` jobs**, but no Rust customer has ever
+   verified this works for the manylinux glibc baseline. If it does,
+   the 7 manylinux Docker `build` matrix entries plus `build-pgo
+   linux` (8 of monty's 32 compile-bound jobs) become IB-cacheable
+   without any vnext code change.
+
+### Layered closing plan and current status
+
+| Layer | Owner | Deliverable | Status |
+|---|---|---|---|
+| **A — cargo SHIM upstream** | us → vnext PR | Promote cargo from ENV to SHIM in `default_rules.yaml`, regenerate `ib-accel/bin/cargo`, 6 new integration tests + 83 unit tests passing | **Shipped** — [vnext PR #210](https://github.com/Incredibuild-RND/vnext-processing-engine/pull/210) merged, Tal deployed the image, and [ib-probe run 25732897099](https://github.com/Incredibuild-RND/monty/actions/runs/25732897099) found `/ib-workspace/incredibuild/ib-accel/bin/cargo` |
+| **B — manylinux probe** | us → monty | Add `manylinux-probe` job to `ib-probe.yml` running `container: manylinux_2_28_x86_64` and probing `/ib-workspace`, `ib_console` resolution, glibc compat, `--standalone` smoke test | **GREEN** — [run 25726192172](https://github.com/Incredibuild-RND/monty/actions/runs/25726192172) confirms `/ib-workspace/cache` + `/ib-workspace/incredibuild` mounted, `/usr/bin/ib_console` v3.25.2 runs under glibc 2.28, `--standalone --no-monitor -- /bin/true` connects to `ib_server` |
+| **C — hosted-grid IB profile** | Sam + IB ops | Move `scripts/ib-profile.xml` content to tenant's hosted-grid IB settings (`IB_PROFILE_CONTENT` path in `vnext-processing-engine/src/runner_engine/flows.py:109-142`); delete `IB_PROFILE` env wiring from monty | Documented in `IB_NEXT_STEPS_SAM.md` (this PR) |
+| **D — stable cache key** | us | Already correct: `cache_key = md5(tenant-repo-workflow-job)` is branch-agnostic by default. `override_cache_key` on the workflow_job exposed for cross-job sharing if we ever want `test-rust` + `bench-test` to share a target/ dir | Documented |
+| **E — wall-clock cap** | IB ops | Bump `NAMESPACE_INSTANCE_DURATION_MINUTES` from current value (~12) to 30 for the rust-heavy pool. Single config knob in vnext (`namespace_client.py:265`). Recovers `lint`, `fuzz`, and the `test-python` matrix that today must run on `ubuntu-latest` because of the cap | Action item for IB ops |
+| **F — three monty wirings** | us | `codspeed.yml::benchmarks`, `build-js x86_64-unknown-linux-gnu`, `build-js wasm32-wasip1-threads` switched to `incredibuild-runner` with conditional IB env injection | Committed on this branch |
+| **G — roadmap** | IB product | macOS / Windows IB runners, aarch64 Linux pool. Each unlocks 5 more compile-bound jobs in monty alone. Out of scope for this PR | Documented |
+
+### New bench cells (G, H, I)
+
+Three new cells extend the existing A–F matrix:
+
+- **Cell G — Layer-A SHIM canary.** Same `test-rust` workload as
+  cell F, but cargo is dispatched via a `PATH`-prepended shim. Now
+  that the runner image ships `/ib-workspace/incredibuild/ib-accel/bin/cargo`,
+  G tracking F within noise validates that the live image-side shim and
+  the canary path behave the same.
+- **Cell H — Layer-B manylinux container validation.** Same synthetic
+  `cargo test --no-run -p monty` workload as cell D, but inside a
+  GHA-level `container: image: quay.io/pypa/manylinux_2_28_x86_64` block
+  on `incredibuild-runner`. The container hook fires (proven by the
+  manylinux-probe job), `/ib-workspace` and `/opt/incredibuild` are
+  bind-mounted, and `cargo` is wrapped with `/usr/bin/ib_console
+  --standalone --build-cache-local-shared`. H tracking D within ~10%
+  proves the IB cache is fully shared host↔container and the wheel-
+  build matrix can be migrated to IB with no per-job custom plumbing.
+- **Cell I — codspeed on IB warm.** `cargo codspeed build -p
+  monty-bench --bench main` on the IB runner with rustc cache warm.
+  Validates Layer F's `codspeed.yml::benchmarks` rewire. Codspeed
+  builds the bench crate with instrumentation, so its rustc keyspace
+  is disjoint from `test-rust`'s — D/F caches don't help here, so I's
+  iter-1→iter-2 ratio is the cleanest single-job signal for the
+  every-PR codspeed workflow.
+
+The summarize step in `ib-bench.yml` and `scripts/ib-bench-summarize.py`
+both know about G, H, and I; the next workflow run will produce the
+extended speedup table automatically.
+
+### Coverage trajectory
+
+| Milestone | monty IB-cacheable jobs |
+|---|---|
+| Pre-PR (no IB integration) | 0 of 32 (0%) |
+| Today (this PR's `ci.yml::test-rust` + `test-python-coverage` + `bench-test` + `miri`) | 4 of 32 (12.5%) |
+| + Layer F (3 wirings, codspeed reverted to ubuntu) | 6 of 32 (19%) |
+| + Layer A landed in vnext (cargo SHIM auto-applies) | 6 of 32; standard cargo is out-of-the-box |
+| + Layer A2 landed in vnext (cargo extension/toolchain forms) | same job coverage; `scripts/cargo-ib.sh` removed |
+| + Layer B GREEN — manylinux Docker reachable (Phase 8 wires 1, then 8) | 14 of 32 (44%) |
+| + Layer E (cap bumped, lint/fuzz/test-python-coverage back on IB) | 17 of 32 (53%) |
+| + Layer G (macOS/Windows/aarch64 IB pools) | 27 of 32 (84%) |
+
+### Measured Cell H result — Layer-B end-to-end speedup (run 25727572729)
+
+| Cell | iter 1 (cold) | iter 2 (warm) | target/ size | A→cell speedup (iter≥2) |
+|---|---|---|---|---|
+| **A** ubuntu-latest, no IB | 38.6 s | 37.4 s | 2.10 GB | 1.00× |
+| **B** IB host, no rustc cache | 40.1 s | 24.8 s | 2.74 GB | 1.51× |
+| **C** IB host, custom profile, COLD | 47.9 s | — (1 iter only) | 2.74 GB | — |
+| **D** IB host, custom profile, WARM | 16.0 s | **5.27 s** | 2.24 GB | **7.10×** |
+| **H** IB **manylinux container**, ib_console | 37.7 s | **21.3 s** | 2.74 GB | **1.76×** |
+| **I** IB codspeed build, warm | 86.9 s | 71.6 s | 1.39 GB | (different workload — measures cargo codspeed build, not synthetic) |
+
+**Key finding from Cell H**: migrating a wheel-build matrix entry
+from `ubuntu-latest` (cell A baseline) to `incredibuild-runner` +
+manylinux `container:` block delivers a **1.76× speedup** on the
+synthetic `cargo test --no-run -p monty` workload — above the
+closure plan's 1.3× gate. Cell H validates Phase 8 of the closure
+plan: the existing `vnext-processing-engine` container hook bind-
+mounts `/ib-workspace` and `/opt/incredibuild` into a manylinux
+glibc-2.28 container, `ib_console` connects to the in-namespace
+`ib_server`, and `cargo` benefits from the IB cache.
+
+**Container overhead vs bare host**: Cell H_warm (21.3 s) is ~4× slower
+than Cell D_warm (5.27 s) on the SAME workload. The container's
+cargo cache keys are disjoint from the host's because it has a
+separate rustup install (`gcc-toolset-14` linker, container-local
+rustc binary path). This is a follow-up optimization: aligning the
+container's rust toolchain with the host's would close the gap, but
+even at 4× slower than host, Cell H_warm still beats `ubuntu-latest`
+no-IB by 1.76×, which is what the migration economics need.
+
+The remaining 5 of 32 are install/smoke jobs (`test-builds-arch`,
+`test-builds-os`) which compile nothing and have no IB applicability
+even in a perfect world.
diff --git a/IB_CLEANUP_SPEC.md b/IB_CLEANUP_SPEC.md
new file mode 100644
index 00000000..bf3f7f4b
--- /dev/null
+++ b/IB_CLEANUP_SPEC.md
@@ -0,0 +1,469 @@
+# IB integration — mechanical cleanup spec for Phases 5 / 6 / 7
+
+This is the executable companion to [`IB_NEXT_STEPS_SAM.md`](./IB_NEXT_STEPS_SAM.md).
+It records the **exact** edits each post-merge phase needs, with concrete
+file paths, line ranges, and search-and-replace patterns. Each phase is
+gated on an external dependency; once that clears, the corresponding
+section here is a paint-by-numbers PR.
+
+The point of this doc is to remove "what does the cleanup look like?"
+from the critical path. When IB ops emails Sam saying "Layer C done"
+or when a JIT runner image rebuild lands, the right person can open
+the cleanup PR in 10 minutes by following the diff below — they don't
+need to re-derive the change set.
+
+**Current correction (2026-05-13)**: vnext PR #210 and
+[vnext PR #215](https://github.com/Incredibuild-RND/vnext-processing-engine/pull/215)
+have shipped. The runner image now handles standard cargo subcommands
+and monty's extension/toolchain forms (`cargo llvm-cov`,
+`cargo codspeed build`, and `cargo +nightly miri test`) out-of-the-box.
+`scripts/cargo-ib.sh` is deleted in the evidence branch cleanup.
+
+---
+
+## Phase 5 — Delete `scripts/cargo-ib.sh` and all `CARGO=…cargo-ib.sh` wirings
+
+### Gate
+1. [`Vnext PR #210`](https://github.com/Incredibuild-RND/vnext-processing-engine/pull/210)
+   merged to `Incredibuild-RND/vnext-processing-engine:main`.
+2. [`Vnext PR #215`](https://github.com/Incredibuild-RND/vnext-processing-engine/pull/215)
+   merged to `Incredibuild-RND/vnext-processing-engine:main`.
+3. The IB build team rebuilds the JIT-runner image so it carries the
+   regenerated shim at `/ib-workspace/incredibuild/ib-accel/bin/cargo`
+   (or `/opt/ib-accel/bin/cargo` on older variants).
+4. The next dispatch of `ib-probe.yml` on `ci/incredibuild-runners`
+   reports `FOUND Layer-A cargo shim:` in its `Layer-A cargo SHIM
+   deploy check (Phase 4)` log group and the generated shim includes
+   `llvm-cov`, `codspeed`, and `miri` cases.
+5. Cell G in `ib-bench.yml` (the `cargo` shim simulation) is within
+   ~10% of cell F's wall time — confirms the auto-generated shim
+   matches the hand-rolled `scripts/cargo-ib.sh` behavior.
+
+All gates are now true. This section is the applied cleanup.
+
+### Files to delete
+
+```bash
+rm scripts/cargo-ib.sh
+```
+
+### Files to edit
+
+#### `.github/workflows/ci.yml`
+
+Run once across every `./scripts/cargo-ib.sh` reference in the file:
+
+```bash
+# In each `- run: ./scripts/cargo-ib.sh <cargo-args>` line, strip the
+# `./scripts/cargo-ib.sh ` prefix so the line becomes
+# `- run: cargo <cargo-args>`. The runner image's auto-generated
+# /ib-workspace/incredibuild/ib-accel/bin/cargo handles ib_console
+# wrapping transparently via $PATH.
+sed -i 's|\./scripts/cargo-ib\.sh |cargo |g' .github/workflows/ci.yml
+```
+
+Affected lines (verify after the sed):
+- `test-rust` job, lines 144–160 (10 cargo llvm-cov calls).
+- `test-python-coverage` job, lines 249, 252, 253 (3 cargo llvm-cov calls).
+- `bench-test` job, line 436 (cargo bench).
+- `miri` job, line 480 (cargo +nightly miri test).
+
+Then remove the `CARGO=…cargo-ib.sh` env var from `test-python-coverage`:
+
+```yaml
+# DELETE these lines from test-python-coverage's env: block:
+      # Route maturin's INTERNAL cargo invocation through ib_console
+      # by the cargo `CARGO=<path>` env-var contract (cargo respects
+      # this and uses the indicated binary instead of `cargo`).
+      #
+      # Why only cargo, and not pytest / uv / maturin itself?
+      #   - The heavy work in this job is rustc (cargo build of the
+      #     pyo3 extension via maturin). Cached via the rustc entry
+      #     in scripts/ib-profile.xml.
+      #   - pytest, uv run, and maturin's top-level driver are
+      #     Python interpreters orchestrating dynamic .py imports
+      #     and venv copying. ib_console's cache key is
+      #     argv + literal-file-args, not the import graph; wrapping
+      #     these would never produce a meaningful cache hit and
+      #     would only add ib_console's startup overhead per call.
+      # See scripts/cargo-ib.sh top comment for the full rule.
+      CARGO: ${{ github.workspace }}/scripts/cargo-ib.sh
+```
+
+The comment block goes too — it's a tutorial about a contract that
+no longer needs explaining (the runner image owns it).
+
+Then remove the `CARGO=…cargo-ib.sh` line from `build-js`'s IB-env
+step (currently lines 893–900):
+
+```yaml
+# BEFORE:
+      - name: IB env (Linux IB only)
+        if: matrix.settings.host == 'incredibuild-runner'
+        run: |
+          {
+            echo "CARGO=$(pwd)/scripts/cargo-ib.sh"
+            echo "IB_MAX_LOCAL_CORES=4"
+            echo "IB_PREVENT_OVERLOAD=1"
+          } >> "$GITHUB_ENV"
+
+# AFTER:
+      - name: IB env (Linux IB only)
+        if: matrix.settings.host == 'incredibuild-runner'
+        run: |
+          {
+            echo "IB_MAX_LOCAL_CORES=4"
+            echo "IB_PREVENT_OVERLOAD=1"
+          } >> "$GITHUB_ENV"
+```
+
+Then update the comment 4 lines above to drop the napi-rs `$CARGO`
+reference:
+
+```yaml
+# BEFORE:
+      # IB pre-flight + env: only on incredibuild-runner. napi-rs
+      # (invoked by `npm run build:napi`) honors $CARGO and routes
+      # its internal cargo subcommand through our wrapper, which
+      # invokes /usr/bin/ib_console for build-cache.
+
+# AFTER:
+      # IB pre-flight + env: only on incredibuild-runner. The runner
+      # image's auto-generated /ib-workspace/incredibuild/ib-accel/bin/cargo
+      # SHIM (see vnext-processing-engine#210) wraps cargo invocations
+      # with /usr/bin/ib_console for build-cache automatically — no
+      # per-job CARGO env needed.
+```
+
+#### `.github/workflows/codspeed.yml`
+
+The `setarch personality` blocker forced this back to `ubuntu-latest`,
+so codspeed.yml does NOT reference `cargo-ib.sh` today and Phase 5
+does not touch it. Phase 9 (codspeed recovery) is what re-engages it.
+
+#### `.github/workflows/ib-bench.yml`
+
+Cells F and I previously dispatched via `./scripts/cargo-ib.sh`. Replace
+both with bare `cargo`:
+
+```yaml
+# Cell F (line 412):
+# BEFORE:  CARGO_BIN: ./scripts/cargo-ib.sh
+# AFTER:   CARGO_BIN: cargo
+
+# Cell I (line 581):
+# BEFORE:  CARGO_BIN: ./scripts/cargo-ib.sh
+# AFTER:   CARGO_BIN: cargo
+
+# Cell I top-of-job env (line 544):
+# DELETE:  CARGO: ${{ github.workspace }}/scripts/cargo-ib.sh
+```
+
+Cell G stays untouched — it's the simulation cell that demonstrates
+exactly this transition. After Phase 5 lands, Cell G's PATH-prepended
+shim becomes redundant with the runner's image-side shim and Cell G
+can be marked `continue-on-error: true` (or removed entirely) in
+Phase 10.
+
+Path filter at the top of the workflow:
+
+```yaml
+# BEFORE:
+  push:
+    branches:
+      - ci/incredibuild-runners
+    paths:
+      - .github/workflows/ib-bench.yml
+      - scripts/ib-bench-run.sh
+      - scripts/ib-bench-summarize.py
+      - scripts/cargo-ib.sh
+      - scripts/ib-profile.xml
+
+# AFTER:
+  push:
+    branches:
+      - ci/incredibuild-runners
+    paths:
+      - .github/workflows/ib-bench.yml
+      - scripts/ib-bench-run.sh
+      - scripts/ib-bench-summarize.py
+      - scripts/ib-profile.xml   # ← still here until Phase 6
+```
+
+#### `scripts/ib-bench-run.sh`
+
+`scripts/ib-bench-run.sh` already defaults to PATH-resolved `cargo`.
+If an older branch still has the auto-fallback to `./scripts/cargo-ib.sh`
+on IB hosts, remove it:
+
+```bash
+# BEFORE (around line 54):
+    CARGO_RUNNER=(./scripts/cargo-ib.sh)
+
+# AFTER:
+    CARGO_RUNNER=(cargo)
+```
+
+Verify the surrounding `if` branch — once both branches collapse to
+`cargo`, simplify the conditional.
+
+### Verification before merging Phase 5 PR
+
+1. Push to a branch off `ci/incredibuild-runners`.
+2. Trigger `ib-bench.yml` manually. Cell F (now using bare `cargo`)
+   should match the prior Cell F wall time within ~10%. If it
+   regresses, the runner image either (a) hasn't been rebuilt, or
+   (b) has the wrong subcommand whitelist — check Cell G logs to
+   pinpoint.
+3. Trigger `ib-probe.yml` — the new `Layer-A cargo SHIM deploy check`
+   group must report `FOUND`.
+4. Run a real `ci.yml` cycle on the branch (label the PR `Full Build`
+   or push-trigger). `test-rust` and `test-python-coverage` should
+   stay within ~5% of pre-Phase-5 wall time.
+
+### Commit message
+
+```
+chore(ib): retire scripts/cargo-ib.sh — runner image now ships cargo SHIM
+
+vnext-processing-engine#210 and #215 (cargo SHIM upstream) merged and
+the JIT runner image was rebuilt on <date>. The auto-generated
+/ib-workspace/incredibuild/ib-accel/bin/cargo wraps cargo subcommands
+with /usr/bin/ib_console transparently via $PATH, replacing monty's
+hand-rolled wrapper.
+
+Removed:
+  - scripts/cargo-ib.sh
+  - All ./scripts/cargo-ib.sh prefixes in ci.yml (test-rust,
+    test-python-coverage, bench-test, miri)
+  - CARGO=$(pwd)/scripts/cargo-ib.sh env wirings (test-python-coverage,
+    build-js IB-env step)
+  - CARGO_BIN: ./scripts/cargo-ib.sh from ib-bench.yml cells F and I
+  - cargo-ib.sh fallback in scripts/ib-bench-run.sh
+  - scripts/cargo-ib.sh from the ib-bench.yml push-path filter
+
+Verification: cell F (bare cargo) wall time matched prior cell F
+within X%, cell G (PATH shim simulation) is now redundant with the
+runner image's shim and continues to pass.
+```
+
+---
+
+## Phase 6 — Delete `scripts/ib-profile.xml` and `IB_PROFILE` wirings
+
+### Gate
+IB ops confirms the contents of `scripts/ib-profile.xml` are pasted
+into the hosted-grid `IB_PROFILE_CONTENT` field for the
+`Incredibuild-RND/monty` tenant, and the next ib-probe run shows the
+profile is being applied (look for `Loaded profile from
+/ib-workspace/incredibuild/ib_profile.xml` in `ib_console
+--full-version --diagnose` output).
+
+### Files to delete
+
+```bash
+rm scripts/ib-profile.xml
+```
+
+### Files to edit
+
+#### `scripts/ib-prep.sh`
+
+Find the `IB_PROFILE` export block:
+
+```bash
+# BEFORE:
+echo "IB_PROFILE=$PWD/scripts/ib-profile.xml" >> "$GITHUB_ENV"
+
+# AFTER (delete the line; the runner image now sources the profile
+# via vnext-processing-engine's entrypoint.sh:47-51).
+```
+
+If the script has surrounding diagnostic prints about IB_PROFILE,
+keep them but rewrite to read from the runner-injected location:
+
+```bash
+# REPLACE the diagnostic block with:
+PROFILE_PATH=/ib-workspace/incredibuild/ib_profile.xml
+if [ -f "$PROFILE_PATH" ]; then
+    echo "IB profile (tenant-injected): $PROFILE_PATH"
+    head -10 "$PROFILE_PATH"
+else
+    echo "no tenant IB profile present at $PROFILE_PATH"
+fi
+```
+
+#### `.github/workflows/ib-bench.yml`
+
+Delete `IB_PROFILE: ${{ github.workspace }}/scripts/ib-profile.xml`
+from cells F (line 416), G (line 519), I (line 582), and H (line 694
+if added in Phase 8).
+
+Path filter — drop `scripts/ib-profile.xml`:
+
+```yaml
+# BEFORE:
+    paths:
+      - .github/workflows/ib-bench.yml
+      - scripts/ib-bench-run.sh
+      - scripts/ib-bench-summarize.py
+      - scripts/ib-profile.xml
+
+# AFTER:
+    paths:
+      - .github/workflows/ib-bench.yml
+      - scripts/ib-bench-run.sh
+      - scripts/ib-bench-summarize.py
+```
+
+#### `.github/workflows/ci.yml`
+
+Verify with `rg IB_PROFILE`. If any per-job env block sets
+`IB_PROFILE`, delete those lines too.
+
+### Verification
+
+Trigger `ib-bench.yml`. Cells C and D (which depend on the rustc
+caching profile) should show the same hit/miss pattern as before. If
+hits drop to zero, the tenant config didn't apply — escalate back to
+IB ops with the run URL.
+
+---
+
+## Phase 7 — Re-route `lint`, `fuzz`, `test-python-coverage` back to `incredibuild-runner`
+
+### Gate
+IB ops confirms `NAMESPACE_INSTANCE_DURATION_MINUTES` for the pool
+serving `Incredibuild-RND/monty` is bumped to 30 minutes (or a
+dedicated `rust-heavy` label/pool with that cap is created).
+
+### Files to edit
+
+#### `.github/workflows/ci.yml`
+
+Three jobs to flip:
+
+1. **`lint`** (currently `runs-on: ubuntu-latest` per the wall-clock
+   revert). Switch to `incredibuild-runner` and add the conditional
+   IB env injection pattern used by `build-js` matrix entries.
+
+2. **`fuzz tokens_input_panic`** (line ~488 of `fuzz` matrix
+   strategy). Add this single matrix entry as `runs-on:
+   incredibuild-runner`; leave the other fuzz targets on
+   `ubuntu-latest` if they're not compile-bound.
+
+3. **`test-python` matrix** (line ~309). Switch the fastest entry
+   (`python-version: 3.14`) first to validate; then expand if it
+   stays under the (bumped) cap.
+
+For each, follow the pattern already in
+`test-rust`/`test-python-coverage`:
+
+```yaml
+runs-on: incredibuild-runner
+timeout-minutes: 25  # under the new 30-min cap with margin
+env:
+  CARGO_HOME: ${{ github.workspace }}/.cargo
+  CARGO_TARGET_DIR: ${{ github.workspace }}/target
+  IB_MAX_LOCAL_CORES: '8'  # tune by job profile
+  LANG: C.UTF-8
+  LC_ALL: C.UTF-8
+  PYTHONUTF8: '1'
+steps:
+  - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+  - name: IB pre-flight
+    run: ./scripts/ib-prep.sh
+  ...
+  - name: IB cache stats
+    if: always()
+    run: ./scripts/ib-stats.sh
+```
+
+### Verification
+
+Each rewired job must finish under 25 min (5 min headroom under the
+new cap) for at least 3 consecutive runs. If any flake at the cap,
+the cap bump didn't apply or the job needs `IB_MAX_LOCAL_CORES`
+tuning — collect a flame profile via the IB summary log groups and
+file with IB ops.
+
+---
+
+## Phase 8 — Migrate one wheel-build matrix entry to `incredibuild-runner` + `container:`
+
+### Gate
+Cell H of `ib-bench.yml` reports `H_warm / D_warm` within ~10%
+(green light: container vs host adds no overhead, IB cache fully
+shared). Currently dispatched as run 25727104334; check
+[ib-bench.yml workflow runs](https://github.com/Incredibuild-RND/monty/actions/workflows/ib-bench.yml).
+
+### Files to edit
+
+#### `.github/workflows/ci.yml`, `build` job
+
+Pick one matrix entry to demo first (suggested: `linux x86_64-musl`
+because it's the only Linux entry that runs natively, not via QEMU):
+
+```yaml
+# BEFORE (line 605-607):
+        - os: linux
+          target: x86_64
+          manylinux: musllinux_1_1
+
+# AFTER (split into two-tier conditional via `host`):
+        - os: linux
+          target: x86_64
+          manylinux: musllinux_1_1
+          host: incredibuild-runner
+          container: quay.io/pypa/musllinux_1_1_x86_64@sha256:<digest>
+```
+
+Then in `runs-on:` (line 619), add the IB-runner branch:
+
+```yaml
+runs-on: ${{ matrix.host || ((matrix.os == 'linux' && 'ubuntu-latest') || (matrix.os == 'macos' && 'macos-latest') || (matrix.os == 'windows' && 'windows-latest')) }}
+```
+
+And add a top-of-job container directive that's conditional:
+
+```yaml
+container: ${{ matrix.container || '' }}
+```
+
+(GHA accepts an empty `container:` value as "no container".)
+
+Inside the steps, replace `PyO3/maturin-action` (which uses its own
+child docker that bypasses the IB hook) with a direct `maturin
+build` call when `matrix.host == 'incredibuild-runner'`.
+
+### Verification
+
+Compare wheel-build wall time on the migrated matrix entry between
+the previous (ubuntu-latest + maturin-action) and new (incredibuild-
+runner + container:). Expect ≥1.3× speedup for warm runs (post-cell-D
+warm cache state). If not, debug via `IB cache stats` step output.
+
+After validation, expand the same pattern to the remaining 7 Linux
+entries (`aarch64`, `i686`, `armv7`, `ppc64le`, `s390x`,
+`x86_64-unknown-linux-gnu`, `aarch64-musl`) plus `build-pgo` linux.
+
+---
+
+## Phase 10 — Final aggregation
+
+### Gate
+Phases 5, 6, 7 (and optionally 8) all merged.
+
+### Actions
+1. Re-run `ib-bench.yml` end-to-end — produces the post-cleanup
+   speedup table covering cells A–I.
+2. Update `IB_BENCH_RESULTS.md`'s "Coverage trajectory" with measured
+   post-phase numbers (replace the projected percentages with
+   measured ones).
+3. Convert `IB_NEXT_STEPS_SAM.md` from an action-item document into a
+   roadmap-only document (delete the "What I need from Sam" section,
+   keep Layer G).
+4. Delete this `IB_CLEANUP_SPEC.md` file — it has no further purpose
+   once all phases land.
+5. Post a close-out comment on monty PR #1 with the final numbers
+   and any remaining IB-product roadmap items.
diff --git a/IB_NEXT_STEPS_SAM.md b/IB_NEXT_STEPS_SAM.md
new file mode 100644
index 00000000..8f47e182
--- /dev/null
+++ b/IB_NEXT_STEPS_SAM.md
@@ -0,0 +1,336 @@
+# IB integration — what's next for monty
+
+This is the action-item companion to [IB_BENCH_RESULTS.md](./IB_BENCH_RESULTS.md).
+The bench doc records what was measured; this doc says **what changes
+unlock the next factor of speedup, who owns each, and what the cleanup
+of the monty repo will look like** once they land.
+
+---
+
+## TL;DR
+
+The 1.48× we measured on `test-rust` is the floor, not the ceiling.
+The ceiling is constrained by **two upstream gaps in
+`Incredibuild-RND/vnext-processing-engine`** and **one Incredibuild
+operations setting**. Each is a small, surgical change with a known
+beneficiary and a known risk.
+
+| Action | Who | Effort | Effect on monty | Effect on every other IB customer |
+|---|---|---|---|---|
+| Ship `cargo` SHIM on the runner image (Layer A) | IB build-acceleration team | **Done** — vnext PR #210 merged and Tal deployed the image | Standard cargo subcommands are out-of-the-box | Every Rust workload on the JIT runner gets free `ib_console` build cache for normal cargo build/test/bench/check/clippy/run/install/rustc flows |
+| Ship cargo extension/toolchain coverage (Layer A2) | IB build-acceleration team | **Done** — [vnext PR #215](https://github.com/Incredibuild-RND/vnext-processing-engine/pull/215) merged, Build and Deploy passed, and `ib-probe` found the rebuilt runner shim | `scripts/cargo-ib.sh` is deleted; monty now uses bare `cargo llvm-cov`, `cargo codspeed build`, and `cargo +nightly miri test` | Makes Rust CI extension workloads out-of-the-box instead of requiring repo-local bridge wrappers |
+| Run `manylinux-probe` job in `ib-probe.yml` (Layer B) | us | **Done** — probe and cell H are green; first production Linux PGO wheel job is now wired through a GHA-level manylinux container on `incredibuild-runner` | Validates the path toward 8 more IB-cacheable wheel jobs | Every Python-wheel-building customer of IB unlocked simultaneously |
+| Upload `scripts/ib-profile.xml` to your tenant's hosted-grid IB settings (Layer C) | Sam + IB ops | 5 min via the IB grid UI | `scripts/ib-profile.xml` and the `IB_PROFILE` env wiring delete from monty; profile becomes centrally-tunable without re-merging | Sets the precedent that profile config lives at the tenant level, not per-repo |
+| Bump `NAMESPACE_INSTANCE_DURATION_MINUTES` from ~12 to 30 on the Rust pool (Layer E) | IB ops | one Prefect/grid config edit | `lint` and `fuzz` jobs (currently forced to `ubuntu-latest` by the cap) move to IB; recovers a long-tail of CI time | Every Rust customer with > 12-min jobs |
+
+Layer A has shipped. The remaining high-leverage cleanup is Layer C:
+move the `ib_profile.xml` content to hosted-grid settings so monty can
+delete the temporary `IB_CONSOLE_ARGS` profile override.
+
+---
+
+## Layer A — cargo SHIM in `vnext-processing-engine`
+
+**Status**: shipped via
+[Incredibuild-RND/vnext-processing-engine#210](https://github.com/Incredibuild-RND/vnext-processing-engine/pull/210).
+Tal deployed the rebuilt runner image and
+[`ib-probe.yml` run 25732897099](https://github.com/Incredibuild-RND/monty/actions/runs/25732897099)
+found `/ib-workspace/incredibuild/ib-accel/bin/cargo`.
+
+**One-line summary**: Promote `cargo` from `ENV` mode to `SHIM` mode in
+`src/build_accelerator/default_rules.yaml` so its compiling subcommands
+(`build`, `test`, `bench`, `check`, `clippy`, `run`, `install`,
+`rustc`) are wrapped with `/usr/bin/ib_console
+--standalone --build-cache-local-shared --build-cache-force` — exactly
+the way `ninja` and `cmake` already are.
+
+**Why this matters**: today `default_rules.yaml` ships `cargo` as
+ENV-only — it sets `CARGO_HOME`/`CARGO_TARGET_DIR`/`CARGO_INCREMENTAL`,
+but rustc work is never routed through the build cache. Every Rust
+customer of the JIT runner ends up writing the same `cargo-ib.sh`
+wrapper monty just wrote. This commit auto-generates that wrapper as
+`/opt/ib-accel/bin/cargo` so it's already in `$PATH` on every fresh
+runner.
+
+**What's in the PR**:
+- `src/build_accelerator/default_rules.yaml`: new `cargo` SHIM block
+  with `binary.commands` for the eight compiling subcommands.
+- `src/runner_engine/build/ib-accel/bin/cargo`: regenerated by `python
+  -m src.build_accelerator.generator generate
+  --output-dir src/runner_engine/build/ib-accel`.
+- 83 unit tests in `tests/build_accelerator/` updated and passing
+  (cargo is no longer in the ENV-mode test list).
+- 6 new integration tests in
+  `tests/build_accelerator/integration/test_shims.py::TestCargoSubcommandShims`
+  covering: cargo build/test wrap, cargo fmt/metadata pass through
+  unwrapped, `__IB_CARGO_WRAPPED` reentry guard, `IB_CONSOLE_SKIP=1`
+  escape hatch.
+
+**End-to-end validation**: monty's `ib-bench.yml::cell-G-ib-shim-simulation`
+runs the same `test-rust` workload as Cell F but with monty's
+`scripts/cargo-ib.sh` replaced by a `PATH`-prepended `cargo` shim that
+hand-mimics what this PR auto-generates. G tracking F within noise is
+the green light to merge.
+
+**Cleanup now applied in monty**:
+- Standard cargo calls now rely on the runner image's generated cargo
+  shim through `$PATH`.
+- `scripts/cargo-ib.sh` is deleted. [vnext PR #215](https://github.com/Incredibuild-RND/vnext-processing-engine/pull/215)
+  adds first-class runner-image coverage for `cargo llvm-cov`,
+  `cargo codspeed build`, and `cargo +nightly miri test`.
+- Deleted the broad `CARGO=./scripts/cargo-ib.sh` env wiring from
+  `test-python-coverage` and `build-js`; maturin and napi-rs now use the
+  image-side shim when they call normal cargo subcommands.
+- Kept `scripts/ib-prep.sh`; it exports `IB_CONSOLE_ARGS` so the
+  runner-image cargo shim receives monty's rustc profile, per-job cache
+  logfile, and runner-cap mitigation flags until Layer C moves the
+  profile to hosted-grid settings.
+
+---
+
+## Layer B — manylinux Docker container probe — **GREEN**
+
+**Where**: `manylinux-probe` job in
+[`.github/workflows/ib-probe.yml`](./.github/workflows/ib-probe.yml).
+
+**Status**: validated end-to-end on
+[run 25726192172](https://github.com/Incredibuild-RND/monty/actions/runs/25726192172).
+Inside `quay.io/pypa/manylinux_2_28_x86_64@sha256:443eabd378e1…`:
+
+- `/ib-workspace/cache` and `/ib-workspace/incredibuild` are bind-mounted
+  by the container hook (`vnext-processing-engine/src/runner_engine/build/container-hooks/index.js`).
+- `/ib-workspace/incredibuild/ib-accel/bin` is at the front of `PATH`.
+- `/usr/bin/ib_console` is a symlink to `/opt/incredibuild/bin/ib_console`
+  (mounted from host) and runs cleanly under glibc 2.28
+  (`ib_console version [3.25.2]`).
+- The smoke test `ib_console --standalone --no-monitor -- /bin/true`
+  exits 0 with `Incredibuild System: ib_server connected, start process
+  execution...` — distribution to the in-namespace `ib_server` is live
+  inside the container, not just the standalone path.
+- `/ib-workspace/cache/uv` and `/ib-workspace/cache/pip` already exist
+  from the entrypoint hook, so any future `uv`/`pip` work inside a
+  manylinux container also gets that pre-warmed cache for free.
+
+**Implication**: the entire wheel-build matrix (the `build` job's 7
+Linux entries plus `build-pgo` linux) is IB-reachable today with no
+upstream change. Each migration is a two-line GHA edit:
+`runs-on: ubuntu-latest` → `runs-on: incredibuild-runner` and add
+`container: image: quay.io/pypa/manylinux_2_28_x86_64@sha256:…`.
+
+**End-to-end validation**: `ib-bench.yml::cell-H-ib-manylinux` runs the
+synthetic workload inside the same container on `incredibuild-runner`.
+H tracking D within ~10% means container vs host adds no overhead and
+the host's IB cache is fully reachable from inside the container — the
+green light to migrate the production `build` matrix.
+
+**Caveat for monty's existing `build` job**: today it uses
+`PyO3/maturin-action`, which spawns its OWN docker container internally.
+GHA's `container-hooks` only fire when the GHA workflow itself declares
+`container:` at the job level, NOT for child docker calls made by an
+action. So Phase 8 of the closure plan needs the `build` job refactored
+to either (a) use GHA-level `container:` and call `maturin build`
+directly, or (b) inject `/ib-workspace` and `/opt/incredibuild` into
+maturin-action's child docker via `docker-options: -v
+/ib-workspace:/ib-workspace -v /opt/incredibuild:/opt/incredibuild`.
+Option (a) is cleaner and what cell-H demonstrates.
+
+---
+
+## Layer C — Move `ib_profile.xml` to hosted-grid IB settings
+
+**File to extract**: [`scripts/ib-profile.xml`](./scripts/ib-profile.xml)
+
+**Where it should live**: tenant-level hosted-grid IB settings (the
+config that `vnext-processing-engine/src/runner_engine/flows.py:109-142`
+fetches via `get_hosted_grid_ib_settings` and ships to the runner as
+`IB_PROFILE_CONTENT` (base64-encoded)).
+
+**Steps for IB ops**:
+1. Open the hosted-grid configuration UI / API for monty's tenant.
+2. Paste the contents of `scripts/ib-profile.xml` into the IB profile
+   override field.
+3. Confirm by triggering a test run — the entrypoint script
+   (`runner_engine/build/entrypoint.sh:47-51`) base64-decodes
+   `IB_PROFILE_CONTENT` into `/ib-workspace/incredibuild/ib_profile.xml`
+   and `/ib-workspace/cache/ib_profile.xml`.
+
+**Cleanup that follows in monty**:
+- Delete `scripts/ib-profile.xml` from the repo.
+- Delete `IB_PROFILE=$PWD/scripts/ib-profile.xml` exports from
+  `scripts/ib-prep.sh` and from per-job `env:` blocks in `ci.yml`,
+  `ib-bench.yml`.
+- The runner picks up the profile automatically — no monty changes
+  needed beyond the deletes.
+
+**Local guardrail added here**: `scripts/ib-prep.sh` now prefers
+`/ib-workspace/cache/ib_profile.xml` or
+`/ib-workspace/incredibuild/ib_profile.xml` when the hosted-grid profile
+is present, and only falls back to `scripts/ib-profile.xml` until the
+tenant config is uploaded. `ib-probe.yml` also prints those hosted paths
+so the cleanup gate is visible in CI logs without opening a separate
+tracking issue.
+
+**Why this is correct architecture**: a profile is per-tenant tuning,
+not per-PR / per-commit data. Today every monty PR re-pushes the same
+XML; tenant-level config is the right home.
+
+---
+
+## Layer D — `cache_key` is already correct (no action needed)
+
+`flows.py:171-182` computes `cache_key = md5(tenant_id-repo-workflow-job)`.
+This is branch-agnostic and per-job, which is what we want: every
+`test-rust` run across every monty PR / every push hits the same cache
+volume.
+
+The `override_cache_key` field on the workflow_job is exposed if we
+ever want to share a `target/` dir between two related jobs (for
+example, `test-rust` and `bench-test` both compile the `monty` crate;
+sharing the cache key would let `bench-test` start with `test-rust`'s
+warm rustc artifacts). Out of scope for this PR — file as a follow-up
+if `bench-test` profiling shows it's worthwhile.
+
+---
+
+## Layer E — Bump the wall-clock cap on the Rust pool
+
+**Where it lives**:
+`vnext-processing-engine/src/runner_engine/namespace_client.py:265`
+
+```python
+duration = duration_minutes or settings.NAMESPACE_INSTANCE_DURATION_MINUTES
+```
+
+**Symptom**: monty's `lint`, `fuzz`, and `test-python` matrix were
+forced back to `ubuntu-latest` after consistently hitting a ~12 min
+wall-clock cap on the IB runner. The cap is a single Prefect/grid
+config setting, not a code change.
+
+**Ask for IB ops**:
+> "What's the current value of `NAMESPACE_INSTANCE_DURATION_MINUTES` for
+> the runner pool serving Incredibuild-RND/monty? If it's ≤ 15, please
+> bump to 30 on a dedicated 'rust-heavy' label/pool so we can move
+> `lint` and `fuzz` back to IB without forcing ubuntu-latest."
+
+**Local state until that happens**: all current IB jobs keep explicit
+`IB_MAX_LOCAL_CORES` / `IB_PREVENT_OVERLOAD` settings, while `lint`,
+`fuzz`, and the broad Python matrix stay on `ubuntu-latest`. That keeps
+CI green without pretending the Namespace cap has changed.
+
+**Effect**: 17/32 of monty's compile-bound jobs on IB (53%). Most of
+the recovered jobs (lint, fuzz) are real cargo work; the
+`test-python` matrix is structurally uncacheable (pytest dynamic
+imports) so those stay on ubuntu-latest by choice, not by cap.
+
+---
+
+## Layer F — Three monty wirings (in this PR)
+
+Status of each on `ci/incredibuild-runners`:
+
+- ❌ **`.github/workflows/codspeed.yml` intentionally stays on `ubuntu-latest`.**
+  First attempt put codspeed on IB but CI run
+  [25722680967](https://github.com/Incredibuild-RND/monty/actions/runs/25722680967)
+  reproducibly failed with `setarch: failed to set personality to
+  x86_64: Operation not permitted`. The CodSpeedHQ action shells out
+  to valgrind, which uses `setarch` to set `ADDR_NO_RANDOMIZE`
+  personality. The IB self-hosted runner image runs under restricted
+  Linux capabilities (no `SYS_ADMIN`, user-namespace remap) so the
+  personality syscall is blocked. github-hosted runners allow it.
+  Local decision: do **not** implement the hybrid build-on-IB/run-on-
+  ubuntu flow in production right now. It would require fragile
+  target-dir/artifact pinning across cargo-codspeed's instrumented
+  outputs. CodSpeed stays on `ubuntu-latest` until the runner image can
+  allow `setarch` / `personality(2)`. The cache value of the BUILD step
+  is still measured in `ib-bench.yml::cell-I-ib-codspeed` (which only
+  does `cargo codspeed build`, no valgrind run).
+  Current PR state has a separate CodSpeed failure on `ubuntu-latest`:
+  `Failed to retrieve upload data: 401 Unauthorized`. That is a
+  CodSpeed auth / repo-permissions issue, not an IB runner regression.
+- ✅ **`.github/workflows/ci.yml::build-js` matrix:** entries
+  `x86_64-unknown-linux-gnu` and `wasm32-wasip1-threads` switched to
+  `incredibuild-runner`. macOS / Windows / aarch64 entries kept on
+  their current runners (IB has no pool for those today).
+- ✅ **`.github/workflows/ci.yml::build-pgo-linux-ib`:** first
+  production manylinux wheel path moved to `incredibuild-runner` with a
+  GHA-level `manylinux_2_28` container, matching the green cell-H
+  architecture. If this validates on the release/full-build path, expand
+  the remaining Linux wheel matrix entries.
+- ✅ **Conditional IB env injection.** `CARGO`,
+  `IB_MAX_LOCAL_CORES`, `IB_PREVENT_OVERLOAD`, `ib-prep.sh`, and
+  `ib-stats.sh` only fire when `matrix.settings.host ==
+  'incredibuild-runner'`, so the matrix pattern stays clean.
+
+Layer A and Layer A2 have merged and deployed. The runner image's
+auto-generated `cargo` shim takes over via `$PATH` for normal cargo
+subcommands and the cargo extension/toolchain forms used by monty. The
+local `scripts/cargo-ib.sh` bridge is deleted.
+
+### New roadmap item discovered: IB runner needs `setarch personality`
+
+CodSpeed (and any other valgrind-based instrumentation, including
+profiling tools like `callgrind` and memory-error checkers like
+`memcheck`) cannot run on the IB self-hosted runner today because
+`setarch` is denied permission to set the `ADDR_NO_RANDOMIZE`
+personality. This blocks at minimum:
+- CodSpeed benchmarks (currently affecting monty)
+- valgrind-based memory-checker CI for any C/C++/Rust unsafe code
+- callgrind-based call-graph profiling
+- Any tool that uses `personality(2)` for ASLR control
+
+Suggested local tracking item for IB ops: enable the `personality`
+syscall in the runner image's seccomp profile (or grant `CAP_SYS_ADMIN`
+to the container). Both are common settings for build runners. Keep this
+tracked here rather than opening a separate GitHub issue.
+
+---
+
+## Layer G — IB product roadmap (out of scope for this PR)
+
+These are runner-image / pool-provisioning items for the IB product
+team. Each unlocks a specific structural blocker we hit:
+
+| Roadmap item | Unlocks in monty | Pattern outside monty |
+|---|---|---|
+| **macOS IB runner pool** | `test-rust-os macos`, `build macos x86_64`, `build-pgo macos aarch64`, `build-js x86_64-apple-darwin`, `build-js aarch64-apple-darwin` (5 jobs) | Every Rust crate that publishes macOS binaries, every PyO3 wheel for macOS |
+| **Windows IB runner pool** | `test-rust-os windows`, `build windows i686`, `build-pgo windows x86_64`, `build-js x86_64-pc-windows-msvc` (4 jobs) | Same for Windows |
+| **aarch64 Linux IB pool** | `build-js aarch64-unknown-linux-gnu`, the `aarch64-musl` and `aarch64` wheels (3 jobs in monty) | Every customer building for ARM64 Linux |
+| **`ib_console` glibc 2.28 support** (or static linking) | ~~Conditional on Layer B's probe; up to 8 manylinux Docker jobs~~ **Already works** — Layer B GREEN, ib_console runs natively under manylinux glibc 2.28 | Every PyO3 / maturin wheel-builder |
+
+If all four ship, monty IB coverage is 27 of 32 compile-bound jobs
+(84%). The remaining 5 are install/smoke tests that compile nothing
+and have no IB applicability. With Layer B already validated, the
+manylinux row above is a code change in monty (Phase 8 of the closure
+plan) rather than an IB-product item.
+
+---
+
+## What I need from Sam (concrete asks)
+
+1. **Approve the cross-repo strategy.** Specifically: that the `cargo
+   SHIM` lives upstream in vnext-processing-engine, not in monty.
+2. **Layer A is done.** [vnext PR #210](https://github.com/Incredibuild-RND/vnext-processing-engine/pull/210)
+   merged, Tal deployed the image, and monty's probe found the live
+   cargo shim.
+3. **~~Merge/deploy Layer A2.~~** ✅ Done — [vnext PR #215](https://github.com/Incredibuild-RND/vnext-processing-engine/pull/215)
+   merged, Build and Deploy passed, and `ib-probe` found the rebuilt
+   runner shim. The local `scripts/cargo-ib.sh` bridge is removed here.
+4. **Schedule a 30-min sync with IB ops** for Layer C (profile
+   upload) + Layer E (cap bump). Both are config-only; one meeting.
+   Suggested attendees: Sam (monty), me, an IB ops engineer with
+   write access to the hosted-grid tenant config and `Settings`
+   pool config.
+5. **~~Triage Layer B's probe outcome.~~** ✅ Done — Layer B is GREEN
+   ([run 25726192172](https://github.com/Incredibuild-RND/monty/actions/runs/25726192172)).
+   Phase 8 of the closure plan (wire one manylinux build matrix entry
+   to `incredibuild-runner` + `container:`) is unblocked and Cell H
+   added to `ib-bench.yml` to measure the speedup.
+
+### Suggested 30-min agenda for the IB-ops sync (Layer C + Layer E)
+
+| Time | Topic | Owner | Outcome |
+|---|---|---|---|
+| 0:00 – 0:05 | Context: monty IB integration status, 1.48× measured on `test-rust`, what's gating further coverage | me | shared frame |
+| 0:05 – 0:15 | Layer C — paste `scripts/ib-profile.xml` into the hosted-grid `IB_PROFILE_CONTENT` field for the monty tenant; verify a probe run picks it up via `entrypoint.sh:47-51` | IB ops | profile lives at tenant level; monty PR can delete the file |
+| 0:15 – 0:25 | Layer E — confirm current `NAMESPACE_INSTANCE_DURATION_MINUTES` for the pool serving Incredibuild-RND/monty; agree on a bump to 30 (or a dedicated `rust-heavy` label/pool) | IB ops | `lint`, `fuzz`, `test-python-coverage` can move back to IB |
+| 0:25 – 0:30 | Capture the `setarch personality` blocker (Layer F roadmap) locally, decide whether to relax seccomp or document hybrid-build path | IB ops + me | decision recorded here; no external GitHub issue |
diff --git a/scripts/ib-bench-run.sh b/scripts/ib-bench-run.sh
new file mode 100755
index 00000000..8a9892ac
--- /dev/null
+++ b/scripts/ib-bench-run.sh
@@ -0,0 +1,266 @@
+#!/usr/bin/env bash
+# Runs a deterministic cargo workload N times under whatever cargo flavour
+# the surrounding job sets (plain cargo on ubuntu-latest, runner-image cargo
+# shim on incredibuild-runner), captures wall-clock + IB cache HIT/MISS + cache-dir-size
+# deltas + final target/ size, and emits one CSV row per iteration to
+# bench-results/$CELL.csv.
+#
+# Workloads (selected via WORKLOAD env, default `synthetic`):
+#   synthetic   `cargo test --no-run -p monty`. Compiles monty's test
+#               binary but doesn't execute it — exercises the same rustc
+#               work that dominates the production test-rust job, without
+#               depending on the third-party cargo-llvm-cov subcommand.
+#               Used by cells A/B/C/D for fast cell-comparison signal.
+#   test-rust   The 8-call `cargo llvm-cov` sequence from
+#               .github/workflows/ci.yml::test-rust, replayed verbatim.
+#               Used by cells E (ubuntu-latest baseline) and F (IB warm
+#               cache) so the E→F speedup is the directly measured
+#               realistic test-rust speedup, not an extrapolation from
+#               the synthetic workload.
+#
+# Cargo dispatcher:
+#   - explicit `CARGO_BIN` env wins;
+#   - otherwise, use PATH-resolved `cargo`. On incredibuild-runner this is
+#     the vnext-processing-engine generated cargo shim; elsewhere it is
+#     plain cargo.
+#
+# CSV columns (one row per iteration; for multi-call workloads,
+# wall/user/sys are summed across calls and rss is the per-call max):
+#   iteration, wall_seconds, user_seconds, sys_seconds, max_rss_kb,
+#   hits, misses, cache_size_bytes_delta, target_size_bytes,
+#   coverage_sha256
+#
+# coverage_sha256 is left empty here; the `synthetic` workload doesn't
+# produce a stable artifact, and the `test-rust` workload skips
+# `llvm-cov report` (the artifact emit step is not part of the rustc-
+# bound work we're measuring).
+
+set -uo pipefail
+
+CELL="${CELL:?CELL must be set (A/B/C/D/E/F)}"
+ITERATIONS="${ITERATIONS:-3}"
+[ -z "$ITERATIONS" ] && ITERATIONS=3
+WORKLOAD="${WORKLOAD:-synthetic}"
+
+mkdir -p bench-results
+OUT="bench-results/${CELL}.csv"
+echo "iteration,wall_seconds,user_seconds,sys_seconds,max_rss_kb,hits,misses,cache_size_bytes_delta,target_size_bytes,coverage_sha256" > "$OUT"
+
+# Cargo dispatcher.
+if [ -n "${CARGO_BIN:-}" ]; then
+    # shellcheck disable=SC2206  # caller-controlled, intentional split
+    CARGO_RUNNER=($CARGO_BIN)
+else
+    CARGO_RUNNER=(cargo)
+fi
+
+# Workload definition.
+case "$WORKLOAD" in
+    synthetic)
+        WORKLOAD_CMDS=("test --no-run -p monty")
+        ;;
+    test-rust)
+        # Mirrors .github/workflows/ci.yml::test-rust (the 7 cargo llvm-cov
+        # invocations plus the leading `clean`). The trailing `report`
+        # steps are intentionally omitted — they emit text/codecov from
+        # already-compiled coverage data, not rustc work, so they would
+        # add wall-clock noise without measuring anything we care about.
+        WORKLOAD_CMDS=(
+            "llvm-cov clean --workspace"
+            "llvm-cov --no-report -p monty"
+            "llvm-cov run --no-report -p monty-datatest"
+            "llvm-cov --no-report -p monty --features memory-model-checks"
+            "llvm-cov run --no-report -p monty-datatest --features memory-model-checks"
+            "llvm-cov --no-report -p monty --features ref-count-return"
+            "llvm-cov run --no-report -p monty-datatest --features ref-count-return"
+            "llvm-cov --no-report -p monty_type_checking -p monty_typeshed"
+        )
+        ;;
+    codspeed)
+        # Mirrors .github/workflows/codspeed.yml::benchmarks. The
+        # `cargo install cargo-codspeed` step is left to the workflow
+        # (idempotent across iterations: the binary persists in
+        # CARGO_HOME/bin so iter ≥ 2 is a no-op install). Only the
+        # actual rustc-bound `cargo codspeed build` is in the workload,
+        # which is what Layer F (codspeed.yml on incredibuild-runner)
+        # actually accelerates.
+        WORKLOAD_CMDS=(
+            "codspeed build -p monty-bench --bench main"
+        )
+        ;;
+    *)
+        echo "::error::unknown WORKLOAD=$WORKLOAD (expected synthetic|test-rust|codspeed)"
+        exit 2
+        ;;
+esac
+
+echo "::group::bench setup diagnostic"
+echo "CELL=$CELL ITERATIONS=$ITERATIONS WORKLOAD=$WORKLOAD"
+echo "CARGO_RUNNER=${CARGO_RUNNER[*]}"
+echo "WORKLOAD_CMDS:"
+for c in "${WORKLOAD_CMDS[@]}"; do echo "  cargo $c"; done
+echo "PWD=$PWD"
+echo "PATH=$PATH"
+echo "which cargo: $(command -v cargo || echo MISSING)"
+cargo --version 2>&1 || echo "cargo --version FAILED"
+rustc --version --verbose 2>&1 || echo "rustc --version FAILED"
+ls -la /usr/bin/ib_console 2>&1 || true
+ls -la /usr/bin/time 2>&1 || true
+ls -la /etc/incredibuild/log/ 2>&1 || true
+echo "::endgroup::"
+
+cache_size() {
+    local d="/etc/incredibuild/cache/build_cache/shared"
+    if [ -d "$d" ]; then
+        du -sb "$d" 2>/dev/null | awk '{print $1+0}'
+    else
+        echo 0
+    fi
+}
+
+target_size() {
+    local d="${CARGO_TARGET_DIR:-target}"
+    if [ -d "$d" ]; then
+        du -sb "$d" 2>/dev/null | awk '{print $1+0}'
+    else
+        echo 0
+    fi
+}
+
+count_logfile() {
+    # Sum HIT / MISS counts across all per-job IB cache logfiles.
+    local dir="/etc/incredibuild/log"
+    local kind="$1"
+    if [ -d "$dir" ]; then
+        local n
+        n=$(grep -h -c -E "^${kind}[[:space:]]" "$dir"/ib_cache_*.log 2>/dev/null \
+            | awk '{s+=$1} END {print s+0}')
+        echo "${n:-0}"
+    else
+        echo 0
+    fi
+}
+
+# Run a single cargo invocation under /usr/bin/time -v (or a date
+# fallback). Sets globals: call_wall, call_user, call_sys, call_rss,
+# call_rc. Tolerates non-zero exit codes (the data point is still
+# valuable; we surface a ::warning:: and let the iteration continue).
+run_one() {
+    local args_str="$1"
+    # shellcheck disable=SC2206  # workload-controlled, intentional split
+    local -a args=($args_str)
+    call_wall=0
+    call_user=0
+    call_sys=0
+    call_rss=0
+    call_rc=0
+    local time_out
+    time_out=$(mktemp)
+    set +e
+    if [ -x /usr/bin/time ]; then
+        /usr/bin/time -v -o "$time_out" \
+            "${CARGO_RUNNER[@]}" "${args[@]}"
+        call_rc=$?
+    else
+        echo "::warning::/usr/bin/time missing, using date fallback (no user/sys/rss)"
+        local t0 t1
+        t0=$(date +%s.%N)
+        "${CARGO_RUNNER[@]}" "${args[@]}"
+        call_rc=$?
+        t1=$(date +%s.%N)
+        call_wall=$(python3 -c "print(f'{${t1}-${t0}:.3f}')")
+    fi
+    set -e
+    if [ -s "$time_out" ]; then
+        echo "--- /usr/bin/time -v: cargo ${args_str} ---"
+        cat "$time_out"
+        echo "---"
+        local wall user sys rss
+        wall=$(awk -F': ' '/Elapsed \(wall clock\) time/ {print $2}' "$time_out" 2>/dev/null | tail -1)
+        user=$(awk -F': ' '/User time \(seconds\)/ {print $2+0}' "$time_out" 2>/dev/null | tail -1)
+        sys=$(awk -F': ' '/System time \(seconds\)/ {print $2+0}' "$time_out" 2>/dev/null | tail -1)
+        rss=$(awk -F': ' '/Maximum resident set size/ {print $2+0}' "$time_out" 2>/dev/null | tail -1)
+        call_user="${user:-0}"
+        call_sys="${sys:-0}"
+        call_rss="${rss:-0}"
+        # Convert HH:MM:SS, MM:SS, SS, or SS.ss into seconds.
+        call_wall=$(python3 - <<PY
+w = "${wall:-0}".strip()
+if not w:
+    print(0); raise SystemExit
+parts = [float(p) for p in w.split(":")]
+secs = 0.0
+for p in parts:
+    secs = secs * 60 + p
+print(f"{secs:.3f}")
+PY
+)
+    fi
+    rm -f "$time_out"
+}
+
+# Each iteration:
+#   1. clean target/ (full rebuild)
+#   2. snapshot pre-cache size + HIT/MISS
+#   3. run each workload command under /usr/bin/time -v
+#   4. snapshot post-cache size + HIT/MISS deltas
+#   5. emit one CSV row aggregating across the workload's calls
+# We capture each call's exit code but DO NOT abort the loop — the data
+# point is still valuable and we want all iterations visible in the CSV.
+for i in $(seq 1 "$ITERATIONS"); do
+    echo "::group::cell ${CELL} iteration ${i}/${ITERATIONS}"
+
+    # Clean the cargo build dir between iterations so the rustc work
+    # is real every time. Use direct rm rather than `cargo clean` to
+    # avoid any cargo-subcommand dispatch quirks under ib_console.
+    # Honor $CARGO_TARGET_DIR so cells that route to a non-default
+    # target dir (e.g. cell H, which uses target-h/ to stay isolated
+    # from host-side cells' target/) actually clean their own dir.
+    _target_dir="${CARGO_TARGET_DIR:-target}"
+    rm -rf "$_target_dir" 2>&1 | tail -5 || true
+    unset _target_dir
+
+    pre_cache=$(cache_size)
+    pre_hits=$(count_logfile HIT)
+    pre_misses=$(count_logfile MISS)
+    echo "pre: cache=${pre_cache}B hits=${pre_hits} misses=${pre_misses}"
+
+    iter_wall=0
+    iter_user=0
+    iter_sys=0
+    iter_max_rss=0
+    iter_rc=0
+    for cmd in "${WORKLOAD_CMDS[@]}"; do
+        echo ":: cargo $cmd"
+        run_one "$cmd"
+        iter_wall=$(python3 -c "print(f'{${iter_wall}+${call_wall}:.3f}')")
+        iter_user=$(python3 -c "print(f'{${iter_user}+${call_user}:.3f}')")
+        iter_sys=$(python3 -c "print(f'{${iter_sys}+${call_sys}:.3f}')")
+        if [ "${call_rss:-0}" -gt "${iter_max_rss:-0}" ] 2>/dev/null; then
+            iter_max_rss="$call_rss"
+        fi
+        if [ "$call_rc" -ne 0 ]; then
+            iter_rc=$call_rc
+            echo "::warning::cargo $cmd in iter $i exited $call_rc"
+        fi
+    done
+
+    post_cache=$(cache_size)
+    post_hits=$(count_logfile HIT)
+    post_misses=$(count_logfile MISS)
+    delta_cache=$((post_cache - pre_cache))
+    delta_hits=$((post_hits - pre_hits))
+    delta_misses=$((post_misses - pre_misses))
+    target=$(target_size)
+
+    echo "post: cache=${post_cache}B hits=${post_hits} misses=${post_misses} target=${target}B"
+    echo "deltas: cache=${delta_cache}B hits=${delta_hits} misses=${delta_misses}"
+    echo "iter=$i wall=${iter_wall}s user=${iter_user}s sys=${iter_sys}s rss=${iter_max_rss}kb rc=${iter_rc}"
+    echo "$i,$iter_wall,$iter_user,$iter_sys,$iter_max_rss,$delta_hits,$delta_misses,$delta_cache,$target," >> "$OUT"
+
+    echo "::endgroup::"
+done
+
+echo "::group::wrote $OUT"
+cat "$OUT"
+echo "::endgroup::"
diff --git a/scripts/ib-bench-summarize.py b/scripts/ib-bench-summarize.py
new file mode 100755
index 00000000..b5f94e31
--- /dev/null
+++ b/scripts/ib-bench-summarize.py
@@ -0,0 +1,311 @@
+#!/usr/bin/env python3
+"""Aggregate ib-bench per-cell CSVs into a markdown table.
+
+Each cell of the bench workflow drops a CSV at
+  bench-results/<cell>.csv
+
+with header:
+  iteration,wall_seconds,user_seconds,sys_seconds,max_rss_kb,hits,misses,cache_size_bytes_delta,target_size_bytes,coverage_sha256
+
+This script reads them, computes mean/stddev for wall_seconds, and writes
+a comparison table plus speedup ratios (B/A, C/A, D/A on the synthetic
+workload; F/E on the real test-rust workload; G vs F for the Layer-A
+SHIM-simulation no-regression check; I steady-state for codspeed) to
+$GITHUB_STEP_SUMMARY (if set) and stdout.
+
+Usage:
+  scripts/ib-bench-summarize.py bench-results/
+"""
+
+from __future__ import annotations
+
+import csv
+import math
+import os
+import statistics
+import sys
+from pathlib import Path
+
+CELLS: list[tuple[str, str]] = [
+    ('A', 'ubuntu-latest, no IB'),
+    ('B', 'IB, default profile (rustc NOT cached)'),
+    ('C', 'IB, custom profile (rustc cached) — COLD'),
+    ('D', 'IB, custom profile (rustc cached) — WARM'),
+    ('E', 'ubuntu-latest, real test-rust workload (8 cargo invocations)'),
+    ('F', 'IB runner, real test-rust workload, warm cache'),
+    ('G', 'IB runner, real test-rust via Layer-A SHIM canary'),
+    ('H', 'IB runner, manylinux_2_28 GHA container, synthetic workload, IB warm'),
+    ('I', 'IB runner, codspeed build workload, warm cache'),
+]
+
+
+def read_cell(path: Path) -> list[dict[str, str]]:
+    if not path.is_file():
+        return []
+    with path.open() as f:
+        return list(csv.DictReader(f))
+
+
+def fnum(rows: list[dict[str, str]], key: str) -> list[float]:
+    out: list[float] = []
+    for r in rows:
+        v = r.get(key, '')
+        try:
+            out.append(float(v))
+        except ValueError:
+            continue
+    return out
+
+
+def fmt_mean_std(xs: list[float], unit: str = 's') -> str:
+    if not xs:
+        return '—'
+    if len(xs) == 1:
+        return f'{xs[0]:.1f}{unit}'
+    m = statistics.mean(xs)
+    s = statistics.stdev(xs)
+    return f'{m:.1f} ± {s:.1f}{unit}'
+
+
+def fmt_ratio(num: list[float], den: list[float]) -> str:
+    if not num or not den:
+        return '—'
+    a = statistics.mean(num)
+    b = statistics.mean(den)
+    if a == 0:
+        return '—'
+    return f'{b / a:.2f}x'
+
+
+def fmt_int_mean(xs: list[float]) -> str:
+    if not xs:
+        return '—'
+    return f'{statistics.mean(xs):.0f}'
+
+
+def fmt_bytes(n: float | None) -> str:
+    if n is None or math.isnan(n):
+        return '—'
+    units = ('B', 'KiB', 'MiB', 'GiB', 'TiB')
+    i = 0
+    f = float(n)
+    while abs(f) >= 1024 and i < len(units) - 1:
+        f /= 1024
+        i += 1
+    return f'{f:.1f} {units[i]}'
+
+
+def main(results_dir: str) -> int:
+    base = Path(results_dir)
+    cells: dict[str, list[dict[str, str]]] = {}
+    for label, _ in CELLS:
+        cells[label] = read_cell(base / f'{label}.csv')
+
+    lines: list[str] = []
+    lines.append('# IB build-runner value matrix')
+    lines.append('')
+    lines.append('Cells A/B/C/D run the synthetic `cargo test --no-run -p monty` workload')
+    lines.append('(fast cell-comparison signal). Cells E/F run the real test-rust')
+    lines.append('workload (8 `cargo llvm-cov` calls per iteration, mirroring')
+    lines.append('`.github/workflows/ci.yml::test-rust`) for a directly measured')
+    lines.append('ubuntu-latest → IB speedup.')
+    lines.append('')
+    lines.append('| cell | configuration | wall time | hits | misses | target/ size |')
+    lines.append('|---|---|---|---|---|---|')
+    for label, desc in CELLS:
+        rows = cells.get(label, [])
+        wall = fnum(rows, 'wall_seconds')
+        hits = fnum(rows, 'hits')
+        misses = fnum(rows, 'misses')
+        target = fnum(rows, 'target_size_bytes')
+        target_str = fmt_bytes(statistics.mean(target)) if target else '—'
+        lines.append(
+            f'| **{label}** | {desc} | {fmt_mean_std(wall)} | '
+            f'{fmt_int_mean(hits)} | {fmt_int_mean(misses)} | {target_str} |'
+        )
+    lines.append('')
+
+    a_wall = fnum(cells.get('A', []), 'wall_seconds')
+    a_warm = a_wall[1:] if len(a_wall) > 1 else a_wall
+    b_warm = fnum(cells.get('B', []), 'wall_seconds')[1:]
+    d_warm = fnum(cells.get('D', []), 'wall_seconds')[1:]
+    e_wall = fnum(cells.get('E', []), 'wall_seconds')
+    f_wall = fnum(cells.get('F', []), 'wall_seconds')
+    g_wall = fnum(cells.get('G', []), 'wall_seconds')
+    h_wall = fnum(cells.get('H', []), 'wall_seconds')
+    i_wall = fnum(cells.get('I', []), 'wall_seconds')
+    e_warm = e_wall[1:] if len(e_wall) > 1 else e_wall
+    f_warm = f_wall[1:] if len(f_wall) > 1 else f_wall
+    g_warm = g_wall[1:] if len(g_wall) > 1 else g_wall
+    h_warm = h_wall[1:] if len(h_wall) > 1 else h_wall
+    i_warm = i_wall[1:] if len(i_wall) > 1 else i_wall
+
+    lines.append('## Speedup vs ubuntu-latest baseline (A) — synthetic workload')
+    lines.append('')
+    lines.append('Each cell aggregates ALL iterations (cold + warm). Iter 1 of B/C/D')
+    lines.append('includes one-time costs (cargo registry warmup on B, cache fill on')
+    lines.append('C/D first-time-on-this-runner) so the all-iter mean understates')
+    lines.append('steady-state value. The bottom row reports warm-only steady-state')
+    lines.append('(iter ≥ 2) which is the apples-to-apples answer to "how fast is a')
+    lines.append('CI run after the cache is filled".')
+    lines.append('')
+    lines.append('| comparison | meaning | speedup (all iters) |')
+    lines.append('|---|---|---|')
+    for label, _ in CELLS[1:4]:
+        rows = cells.get(label, [])
+        w = fnum(rows, 'wall_seconds')
+        meaning = {
+            'B': 'ib_console overhead floor (no rustc cache)',
+            'C': 'first run on a clean IB runner',
+            'D': 'every push after the first (warm rustc cache)',
+        }[label]
+        lines.append(f'| **A → {label}** | {meaning} | {fmt_ratio(w, a_wall)} |')
+    lines.append('')
+    lines.append('| steady-state comparison | iters used | baseline wall | comparison wall | speedup |')
+    lines.append('|---|---|---|---|---|')
+    if a_warm and b_warm:
+        lines.append(
+            f'| **A → B steady (no rustc cache, registry warm)** | A iter≥2, B iter≥2 | '
+            f'{fmt_mean_std(a_warm)} | {fmt_mean_std(b_warm)} | {fmt_ratio(b_warm, a_warm)} |'
+        )
+    if a_warm and d_warm:
+        lines.append(
+            f'| **A → D steady (rustc cache hit, warm)** | A iter≥2, D iter≥2 | '
+            f'{fmt_mean_std(a_warm)} | {fmt_mean_std(d_warm)} | {fmt_ratio(d_warm, a_warm)} |'
+        )
+    lines.append('')
+
+    lines.append('## Realistic test-rust speedup (E → F)')
+    lines.append('')
+    lines.append('The apples-to-apples measurement: same 8-call cargo llvm-cov')
+    lines.append('sequence as `ci.yml::test-rust`, run on ubuntu-latest (E) vs')
+    lines.append('the IB runner with rustc cache warmed (F). iter ≥ 2 mean is')
+    lines.append('the directly measured warm-cache speedup that previously had')
+    lines.append('to be inferred from real-CI logs.')
+    lines.append('')
+    lines.append('| cell | iter 1 (cold) | iter 2 (warm) | iter≥2 mean |')
+    lines.append('|---|---|---|---|')
+    for label in ('E', 'F'):
+        w = fnum(cells.get(label, []), 'wall_seconds')
+        i1 = f'{w[0]:.1f}s' if w else '—'
+        i2 = f'{w[1]:.1f}s' if len(w) > 1 else '—'
+        warm = w[1:] if len(w) > 1 else []
+        lines.append(f'| **{label}** | {i1} | {i2} | {fmt_mean_std(warm)} |')
+    lines.append('')
+    lines.append('| steady-state comparison | iters used | ubuntu (E) wall | IB (F) wall | speedup |')
+    lines.append('|---|---|---|---|---|')
+    if e_warm and f_warm:
+        lines.append(
+            f'| **E → F steady (real test-rust, warm cache)** | E iter≥2, F iter≥2 | '
+            f'{fmt_mean_std(e_warm)} | {fmt_mean_std(f_warm)} | {fmt_ratio(f_warm, e_warm)} |'
+        )
+    elif e_wall and not f_wall:
+        lines.append(f'| **E only (cell F blocked)** | E iter≥2 | {fmt_mean_std(e_warm or e_wall)} | — | — |')
+    lines.append('')
+
+    # Layer A SHIM canary: F (runner-image cargo shim) vs G
+    # (PATH-prepended cargo shim). G should track F within noise.
+    lines.append('## Layer-A SHIM canary (F → G)')
+    lines.append('')
+    lines.append('Cell F uses the live runner-image cargo shim that ships from')
+    lines.append('`vnext-processing-engine/src/build_accelerator/default_rules.yaml`.')
+    lines.append('Cell G runs the same workload with a PATH-prepended canary shim.')
+    lines.append('G tracking F within noise confirms the image-side shim remains')
+    lines.append('compatible with monty after Layer A shipped upstream.')
+    lines.append('')
+    lines.append('| comparison | iters used | F wall | G wall | ratio (G/F) |')
+    lines.append('|---|---|---|---|---|')
+    if f_warm and g_warm:
+        lines.append(
+            f'| **F → G steady (real test-rust, warm cache)** | F iter≥2, G iter≥2 | '
+            f'{fmt_mean_std(f_warm)} | {fmt_mean_std(g_warm)} | {fmt_ratio(f_warm, g_warm)} |'
+        )
+    elif g_wall:
+        lines.append(f'| **G only (cell F blocked)** | G iter≥2 | — | {fmt_mean_std(g_warm or g_wall)} | — |')
+    lines.append('')
+
+    # Layer B validation: H (synthetic in manylinux container on IB) vs D
+    # (synthetic on bare IB host). H_warm / D_warm ≈ 1.0 means the
+    # container hook's bind mount makes the IB cache fully reachable
+    # from inside the container — i.e. the 8 manylinux build matrix
+    # entries can be migrated to incredibuild-runner with no per-job
+    # custom plumbing beyond `runs-on:` + `container:`.
+    lines.append('## Layer-B manylinux container validation (D → H)')
+    lines.append('')
+    lines.append('Cell H runs the same synthetic workload as D but inside a GHA-level')
+    lines.append('`container: image: quay.io/pypa/manylinux_2_28_x86_64@sha256:...`')
+    lines.append('block, which fires `vnext-processing-engine`\u2019s container-hooks/index.js')
+    lines.append('and bind-mounts /ib-workspace + /opt/incredibuild into the container.')
+    lines.append('H tracking D within ~10% is the green light to migrate the wheel-build')
+    lines.append('matrix (`build` job, 8 Linux entries) onto `incredibuild-runner` without')
+    lines.append('any per-job IB plumbing beyond switching `runs-on:` + adding `container:`.')
+    lines.append('')
+    lines.append('| comparison | iters used | D wall | H wall | ratio (H/D) |')
+    lines.append('|---|---|---|---|---|')
+    if d_warm and h_warm:
+        lines.append(
+            f'| **D \u2192 H steady (synthetic, IB warm, container vs host)** | D iter\u22652, H iter\u22652 | '
+            f'{fmt_mean_std(d_warm)} | {fmt_mean_std(h_warm)} | {fmt_ratio(d_warm, h_warm)} |'
+        )
+    elif h_wall:
+        lines.append(f'| **H only** | H iter\u22652 | \u2014 | {fmt_mean_std(h_warm or h_wall)} | \u2014 |')
+    lines.append('')
+
+    # Layer F (codspeed.yml on IB) value cell.
+    lines.append('## Codspeed workload on IB (cell I)')
+    lines.append('')
+    lines.append('Measures the directly-wired `codspeed.yml::benchmarks` job')
+    lines.append('(`cargo codspeed build -p monty-bench --bench main`) on IB with')
+    lines.append('rustc cache warm. Codspeed builds the bench crate with')
+    lines.append('instrumentation, so its rustc keyspace is disjoint from')
+    lines.append("test-rust's — D/F warm caches do not help here.")
+    lines.append('')
+    lines.append('| cell | iter 1 (cold) | iter 2 (warm) | iter≥2 mean |')
+    lines.append('|---|---|---|---|')
+    if i_wall:
+        i1 = f'{i_wall[0]:.1f}s'
+        i2 = f'{i_wall[1]:.1f}s' if len(i_wall) > 1 else '—'
+        lines.append(f'| **I** | {i1} | {i2} | {fmt_mean_std(i_warm)} |')
+    else:
+        lines.append('| **I** | — | — | — |')
+    lines.append('')
+
+    # Correctness gate.
+    shas: dict[str, set[str]] = {}
+    for label, _ in CELLS:
+        shas[label] = {r.get('coverage_sha256', '') for r in cells.get(label, []) if r.get('coverage_sha256')}
+    all_shas: set[str] = set()
+    for s in shas.values():
+        all_shas |= s
+    lines.append('## Artifact correctness')
+    lines.append('')
+    if len(all_shas) <= 1 and all_shas:
+        sha = next(iter(all_shas))
+        lines.append(f'All cells produced byte-identical `rust-coverage.json`: `{sha[:16]}…`')
+    elif not all_shas:
+        lines.append('No coverage artifact hashes recorded.')
+    else:
+        lines.append('**MISMATCH** — IB cache produced different output from plain cargo:')
+        lines.append('')
+        lines.append('| cell | distinct sha256 |')
+        lines.append('|---|---|')
+        for label, _ in CELLS:
+            seen = sorted(shas.get(label, set()))
+            lines.append(f'| {label} | ' + ', '.join(f'`{s[:12]}…`' for s in seen) + ' |')
+    lines.append('')
+
+    out = '\n'.join(lines) + '\n'
+    sys.stdout.write(out)
+    summary = os.environ.get('GITHUB_STEP_SUMMARY')
+    if summary:
+        with open(summary, 'a', encoding='utf-8') as f:
+            f.write(out)
+    # Exit non-zero if correctness gate failed and we have data from at
+    # least 2 cells.
+    if len(all_shas) > 1 and sum(1 for s in shas.values() if s) >= 2:
+        return 1
+    return 0
+
+
+if __name__ == '__main__':
+    sys.exit(main(sys.argv[1] if len(sys.argv) > 1 else 'bench-results/'))
diff --git a/scripts/ib-prep.sh b/scripts/ib-prep.sh
new file mode 100755
index 00000000..87bb6dd2
--- /dev/null
+++ b/scripts/ib-prep.sh
@@ -0,0 +1,173 @@
+#!/usr/bin/env bash
+# IB-runner job pre-flight setup.
+#
+# Bundles all the boilerplate that every IB-routed job needs into one
+# script so the workflow stays small. Idempotent and tolerant of
+# non-IB runners (no-op fallthroughs).
+#
+# Effects:
+#   1. Bootstrap sudo / curl / wget / unzip / ca-certificates on lean
+#      runner images (no-op when already present, so safe everywhere).
+#   2. Pre-flight diagnostics: ib_console version, cache directory
+#      state, profile presence. Visible in the GitHub Actions log so
+#      it's obvious what state IB is in before the job's real work.
+#   3. Ensure libpython3.X.so is linkable for pyo3-using crates.
+#      python-build-standalone tarballs ship only libpython3.X.so.1.0
+#      and bake /opt/hostedtoolcache/Python/... into sysconfig, so we
+#      create the missing .so symlink at $sys.prefix/lib and export
+#      LIBRARY_PATH / LD_LIBRARY_PATH for cc / lld fallback.
+#   4. Ensure .venv/bin/python3 at workspace root if uv + pyproject.toml
+#      are present. monty's .cargo/config.toml sets
+#      PYO3_PYTHON=.venv/bin/python3 (relative), which is fine for
+#      local development but needs that path to actually exist when
+#      cargo runs under prek/clippy on a fresh CI clone.
+#
+# Background:
+#   - ib_console CLI: ib_linux:cpp/XgConsole/XgConsole_main.cpp
+#   - cache path:     ib_linux:cpp/BuildCache/BuildCache_defines.h
+#                     BUILD_CACHE_LOCAL_PATH=/etc/incredibuild/cache/build_cache/shared
+
+set -euo pipefail
+echo "::group::IB pre-flight"
+
+# 1. baseline tooling -----------------------------------------------------
+is_root() { [ "$(id -u)" = "0" ]; }
+
+if is_root && ! command -v sudo >/dev/null 2>&1; then
+    cat > /usr/local/bin/sudo <<'EOF'
+#!/bin/sh
+exec "$@"
+EOF
+    chmod +x /usr/local/bin/sudo
+fi
+
+apt_install() {
+    if is_root; then
+        apt-get update -qq
+        DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends "$@"
+    else
+        sudo apt-get update -qq
+        DEBIAN_FRONTEND=noninteractive sudo apt-get install -y --no-install-recommends "$@"
+    fi
+}
+
+missing=()
+for tool in wget curl unzip; do
+    command -v "$tool" >/dev/null 2>&1 || missing+=("$tool")
+done
+# `time` (GNU /usr/bin/time, not the bash builtin) is needed by the
+# ib-bench measurement script. Lean IB runner images don't ship it.
+if [ ! -x /usr/bin/time ]; then
+    missing+=(time)
+fi
+if [ "${#missing[@]}" -gt 0 ]; then
+    missing+=(ca-certificates)
+    apt_install "${missing[@]}"
+fi
+
+# 2. ib_console + cache state --------------------------------------------
+if [ -x /usr/bin/ib_console ]; then
+    /usr/bin/ib_console --version 2>&1 | head -3 || true
+    for d in /etc/incredibuild/cache/build_cache/shared \
+             /etc/incredibuild/cache/build_cache/builds \
+             /etc/incredibuild/db; do
+        if [ -d "$d" ]; then
+            echo "$(du -sh "$d" 2>/dev/null | head -1) (files: $(find "$d" -maxdepth 3 -type f 2>/dev/null | wc -l))"
+        fi
+    done
+else
+    echo "ib_console not present — wrapper will fall through to plain cargo"
+fi
+for profile_candidate in /ib-workspace/cache/ib_profile.xml \
+                         /ib-workspace/incredibuild/ib_profile.xml \
+                         scripts/ib-profile.xml; do
+    ls -la "$profile_candidate" 2>/dev/null || true
+done
+
+# 2b. export IB_CACHE_LOG / IB_PROFILE / IB_CONSOLE_ARGS ------------------
+# Logfile path must be ABSOLUTE (XgConsole_main.cpp:482). We put it under
+# /etc/incredibuild/log/ — the canonical IB log dir on the runner image
+# (ib-stats.sh already greps there), which survives any chroot/namespace
+# teardown ib_console may do for intercepted processes. Per-job filename
+# so concurrent jobs on the same runner don't stomp each other's log.
+#
+# The vnext-processing-engine cargo shim reads IB_CONSOLE_ARGS and uses it
+# instead of its built-in default args. Prefer the hosted-grid profile that
+# vnext decodes into /ib-workspace; fall back to the repo profile only until
+# IB ops has uploaded the tenant-level profile.
+if [ -n "${GITHUB_ENV:-}" ]; then
+    job_id="${GITHUB_JOB:-local}_${GITHUB_RUN_ID:-0}_${GITHUB_RUN_ATTEMPT:-1}"
+    log_path="/etc/incredibuild/log/ib_cache_${job_id}.log"
+    profile_path=""
+    for candidate in /ib-workspace/cache/ib_profile.xml \
+                     /ib-workspace/incredibuild/ib_profile.xml \
+                     "$PWD/scripts/ib-profile.xml"; do
+        if [ -f "$candidate" ]; then
+            profile_path="$candidate"
+            break
+        fi
+    done
+    ib_console_args="--standalone --build-cache-local-shared --build-cache-force --build-cache-basedir=$PWD --build-cache-local-logfile=$log_path --build-cache-report-all-miss --no-monitor"
+    if [ -n "${IB_MAX_LOCAL_CORES:-}" ]; then
+        ib_console_args="$ib_console_args --max-local-cores=$IB_MAX_LOCAL_CORES"
+    fi
+    if [ -n "${IB_PREVENT_OVERLOAD:-}" ]; then
+        ib_console_args="$ib_console_args --prevent-initiator-overload"
+    fi
+    if [ -z "${IB_NO_CACHE:-}" ] && [ -n "$profile_path" ]; then
+        ib_console_args="$ib_console_args --profile=$profile_path"
+    elif [ -z "${IB_NO_CACHE:-}" ]; then
+        echo "::warning::No IB rustc cache profile found; rustc cache will use runner defaults"
+    fi
+    {
+        echo "IB_CACHE_LOG=$log_path"
+        if [ -n "$profile_path" ]; then
+            echo "IB_PROFILE=$profile_path"
+        fi
+        echo "IB_CONSOLE_ARGS=$ib_console_args"
+    } >> "$GITHUB_ENV"
+    echo "IB_CACHE_LOG=$log_path"
+    if [ -n "$profile_path" ]; then
+        echo "IB_PROFILE=$profile_path"
+    fi
+    echo "IB_CONSOLE_ARGS=$ib_console_args"
+    # mkdir at root may need sudo if not already root; tolerate failure
+    # (the runner cargo shim / ib_console will report if logging fails).
+    if is_root; then
+        mkdir -p /etc/incredibuild/log 2>/dev/null || true
+    else
+        sudo mkdir -p /etc/incredibuild/log 2>/dev/null || true
+        sudo chmod 1777 /etc/incredibuild/log 2>/dev/null || true
+    fi
+fi
+
+# 3. libpython link safety (only meaningful when python is on PATH) ------
+if command -v python3 >/dev/null 2>&1; then
+    PY_PREFIX=$(python3 -c 'import sys; print(sys.prefix)')
+    PY_VER=$(python3 -c 'import sys; print(f"{sys.version_info.major}.{sys.version_info.minor}")')
+    so_link="$PY_PREFIX/lib/libpython${PY_VER}.so"
+    if [ ! -e "$so_link" ]; then
+        candidate=$(ls "$PY_PREFIX"/lib/libpython${PY_VER}*.so* 2>/dev/null | sort -r | head -1 || true)
+        if [ -n "$candidate" ]; then
+            ln -s "$(basename "$candidate")" "$so_link" 2>/dev/null || true
+        fi
+    fi
+    if [ -n "${GITHUB_ENV:-}" ]; then
+        echo "LIBRARY_PATH=$PY_PREFIX/lib" >> "$GITHUB_ENV"
+        echo "LD_LIBRARY_PATH=$PY_PREFIX/lib" >> "$GITHUB_ENV"
+    fi
+    echo "python: $PY_PREFIX ($PY_VER)"
+fi
+
+# 4. ensure .venv/bin/python3 if uv + pyproject.toml are present ---------
+# monty's .cargo/config.toml points PYO3_PYTHON at .venv/bin/python3. We
+# keep that file untouched (prek's check-yaml relies on it being tracked
+# AND present on disk) and just make the path resolve by pre-creating
+# the venv. Idempotent: if .venv/bin/python3 already exists, do nothing.
+if command -v uv >/dev/null 2>&1 && [ -f pyproject.toml ] && [ ! -e .venv/bin/python3 ]; then
+    echo "creating .venv at workspace root via uv"
+    uv venv .venv ${UV_PYTHON:+--python "$UV_PYTHON"} 2>&1 | tail -5 || true
+fi
+[ -e .venv/bin/python3 ] && echo ".venv/bin/python3: $(readlink -f .venv/bin/python3 2>/dev/null)"
+
+echo "::endgroup::"
diff --git a/scripts/ib-profile.xml b/scripts/ib-profile.xml
new file mode 100644
index 00000000..0fb4d4c0
--- /dev/null
+++ b/scripts/ib-profile.xml
@@ -0,0 +1,54 @@
+<?xml version="1.0" encoding="UTF-8" ?>
+<!--
+    Custom Incredibuild profile for monty.
+
+    PHILOSOPHY (per ib_linux:data/ib_profile.xml in the product source):
+    the system default profile already opts gcc/clang/cc1/cc1plus and
+    other C/C++ compilers into local-only build-avoidance caching via
+      type="local_only" cached="true" exclude_args="-c:-S:-E"
+    rustc, by contrast, is shipped as type="allow_remote" with NO
+    ib_cache element, i.e. distributed across IB helpers but NOT
+    persisted to the build-avoidance cache. The product authors made
+    that choice deliberately; for monty (which is ~100% rustc), it
+    means "out of the box" caching value is near-zero.
+
+    This profile flips exactly one knob: it adds an ib_cache element
+    on rustc so its outputs land in /etc/incredibuild/cache/build_cache
+    /shared/ and replay on subsequent runs. The rustc .rsp file
+    basedir placeholder transformation needed to make those cache keys
+    workspace-portable lives in ib_linux:cpp/BuildCache/BuildCache_Rules.cpp
+    and is keyed off the process name "rustc", so it activates the
+    moment ib_cache is on.
+
+    SCHEMA NOTE (ib_linux:data/ib_profile.xsd):
+      ib_profile@version=1
+      child sequence: globals then processes
+      globals@ignore_following_profiles=false makes this profile
+      ADDITIVE on top of the system default. We deliberately do NOT
+      redeclare gcc/clang/cc1/cc1plus here; those entries inherit
+      from the system default and re-stating them would shadow
+      cached="true" if we ever forgot to copy it.
+
+    EXCLUDE_ARGS (see attribute on the rustc process below):
+      * version flags are diagnostic and shouldn't pollute keys.
+      * build_script_build / build_script_main are cargo's compiled
+        build scripts, which have side effects (env probes, fs
+        writes) and are non-deterministic across runs. Leaving these
+        cacheable would risk silent staleness.
+      Identical to the system default's rustc rule.
+
+    Note: XML comments may not contain a double hyphen, so this file
+    spells command-line flags without the leading dashes when needed.
+-->
+<ib_profile version="1">
+    <globals
+        ignore_following_profiles="false"
+    />
+    <processes>
+        <process filename="rustc"
+                 type="allow_remote"
+                 exclude_args="--version:-vV:build_script_build:build_script_main">
+            <ib_cache enabled="true" />
+        </process>
+    </processes>
+</ib_profile>
diff --git a/scripts/ib-stats.sh b/scripts/ib-stats.sh
new file mode 100755
index 00000000..c4b0e120
--- /dev/null
+++ b/scripts/ib-stats.sh
@@ -0,0 +1,100 @@
+#!/usr/bin/env bash
+# IB-runner job post-flight cache stats.
+#
+# Reports per-job HIT/MISS counts and cache-dir state so each job's log
+# (and step summary) shows whether its cargo invocations populated or
+# hit the IB build cache. Tolerant of non-IB environments (no-op).
+#
+# Source-of-truth paths:
+#   /etc/incredibuild/cache/build_cache/shared/   (BuildCache_defines.h
+#                                                  BUILD_CACHE_LOCAL_PATH)
+#   /etc/incredibuild/cache/build_cache/builds/   (BUILD_CACHE_BUILDS_PATH)
+#
+# Logfile schema (BuildCache_HitMiss.cpp): each cargo invocation appends
+# a block of "info" lines, then "hit_miss" lines, then "other" lines,
+# terminated by a literal "END" line. We count lines that look like
+# HIT / MISS hit-miss entries.
+
+set +e
+
+echo "::group::IB cache stats"
+
+LOG="${IB_CACHE_LOG:-}"
+hits=0
+misses=0
+miss_reasons=""
+
+if [ -n "$LOG" ] && [ -f "$LOG" ]; then
+    echo "logfile: $LOG"
+    bytes=$(wc -c <"$LOG" 2>/dev/null || echo 0)
+    lines=$(wc -l <"$LOG" 2>/dev/null || echo 0)
+    echo "size: ${bytes} bytes, ${lines} lines"
+
+    # Hit/miss markers in BuildCache_HitMiss::add_hit_miss are formatted
+    # as "HIT <hash>" / "MISS <hash> reason=..." — match line starts.
+    hits=$(grep -c -E '^HIT[[:space:]]'  "$LOG" 2>/dev/null || echo 0)
+    misses=$(grep -c -E '^MISS[[:space:]]' "$LOG" 2>/dev/null || echo 0)
+    echo "HIT=$hits MISS=$misses"
+
+    # Top miss reasons (--build-cache-report-all-miss output).
+    miss_reasons=$(grep -E '^MISS[[:space:]]' "$LOG" 2>/dev/null \
+        | sed -E 's/.*reason=([^[:space:]]+).*/\1/' \
+        | sort | uniq -c | sort -rn | head -10)
+    if [ -n "$miss_reasons" ]; then
+        echo "top miss reasons:"
+        echo "$miss_reasons"
+    fi
+
+    # Tail for human inspection.
+    echo "--- last 80 lines ---"
+    tail -80 "$LOG" 2>/dev/null
+fi
+
+# Legacy ib_hm.log path (older ib_console builds). We still surface any
+# survivors in case a different code path wrote there.
+if [ -d /etc/incredibuild/log ]; then
+    mapfile -t hmlogs < <(find /etc/incredibuild/log -name ib_hm.log -printf "%T@ %p\n" 2>/dev/null | sort -rn | head -3 | cut -d" " -f2-)
+    for f in "${hmlogs[@]:-}"; do
+        [ -z "$f" ] && continue
+        echo "--- legacy ib_hm.log: $f ---"
+        wc -l "$f" 2>/dev/null
+        tail -40 "$f" 2>/dev/null
+    done
+fi
+
+echo "--- cache dirs ---"
+for d in /etc/incredibuild/cache/build_cache/shared \
+         /etc/incredibuild/cache/build_cache/builds; do
+    if [ -d "$d" ]; then
+        tar_count=$(find "$d" -name '*.tar' 2>/dev/null | wc -l)
+        echo "$(du -sh "$d" 2>/dev/null | head -1) — .tar artifacts: $tar_count"
+    fi
+done
+
+echo "::endgroup::"
+
+# Step summary surface (markdown).
+if [ -n "${GITHUB_STEP_SUMMARY:-}" ]; then
+    {
+        echo "### IB cache stats — \`${GITHUB_JOB:-local}\`"
+        echo ""
+        echo "| metric | value |"
+        echo "|---|---|"
+        echo "| HIT | ${hits:-0} |"
+        echo "| MISS | ${misses:-0} |"
+        if [ -d /etc/incredibuild/cache/build_cache/shared ]; then
+            shared_size=$(du -sh /etc/incredibuild/cache/build_cache/shared 2>/dev/null | awk '{print $1}')
+            shared_tars=$(find /etc/incredibuild/cache/build_cache/shared -name '*.tar' 2>/dev/null | wc -l | tr -d ' ')
+            echo "| shared cache size | ${shared_size:-?} |"
+            echo "| shared cache .tar artifacts | ${shared_tars:-0} |"
+        fi
+        echo ""
+        if [ -n "$miss_reasons" ]; then
+            echo "Top miss reasons:"
+            echo ""
+            echo '```'
+            echo "$miss_reasons"
+            echo '```'
+        fi
+    } >> "$GITHUB_STEP_SUMMARY"
+fi