diff --git a/.github/workflows/dtvm_evm_test_x86.yml b/.github/workflows/dtvm_evm_test_x86.yml
index b34e26a32..2fedefedf 100644
--- a/.github/workflows/dtvm_evm_test_x86.yml
+++ b/.github/workflows/dtvm_evm_test_x86.yml
@@ -487,3 +487,86 @@ jobs:
         run: |
           echo "::error::Performance regression detected in ${{ matrix.mode }} mode. See logs for details."
           exit 1
+
+  peephole_validation_and_timing_budget:
+    name: Peephole Validation and Timing Budget Check
+    runs-on: ubuntu-latest
+    container:
+      image: dtvmdev1/dtvm-dev-x64:main
+    steps:
+      - name: Check out code
+        uses: actions/checkout@v3
+        with:
+          submodules: "true"
+
+      - name: Build dtvm and x86CgPeepholeTests
+        run: |
+          export LLVM_SYS_150_PREFIX=/opt/llvm15
+          export LLVM_DIR=$LLVM_SYS_150_PREFIX/lib/cmake/llvm
+          export PATH=$LLVM_SYS_150_PREFIX/bin:$PATH
+          cmake -S . -B build \
+            -DCMAKE_BUILD_TYPE=Release \
+            -DZEN_ENABLE_SINGLEPASS_JIT=OFF \
+            -DZEN_ENABLE_MULTIPASS_JIT=ON \
+            -DZEN_ENABLE_EVM=ON \
+            -DZEN_ENABLE_SPEC_TEST=ON \
+            -DZEN_ENABLE_CPU_EXCEPTION=ON \
+            -DZEN_ENABLE_VIRTUAL_STACK=ON
+          cmake --build build --target dtvm --target x86CgPeepholeTests --target dmirValidationTests -j$(nproc)
+          bash tools/easm2bytecode.sh tests/evm_asm tests/evm_asm
+
+      - name: Verify .inc generator output is up-to-date
+        run: |
+          python tools/generate_x86_cg_peephole.py \
+            --rules src/compiler/target/x86/x86_cg_peephole_rules.json \
+            --out-inc /tmp/x86_cg_peephole_generated_check.inc \
+            --out-report /tmp/x86_cg_peephole_report_check.txt
+          diff /tmp/x86_cg_peephole_generated_check.inc \
+            build/src/compiler/generated/target/x86/x86_cg_peephole_generated.inc
+
+      - name: Run peephole rule validation check
+        run: |
+          python tools/check_x86_cg_peephole_validation.py \
+            --rules src/compiler/target/x86/x86_cg_peephole_rules.json \
+            --gtest-binary build/x86CgPeepholeTests
+
+      - name: Run dmir rewrite validation tests
+        run: ./build/dmirValidationTests
+
+      - name: Collect compiler pass timings
+        run: |
+          python tools/collect_compiler_pass_timings.py \
+            --dtvm build/dtvm \
+            --manifest tests/evm_asm/compiler_pass_timing_manifest.json \
+            --runs 5 \
+            --output /tmp/ci_timing_report.json \
+            -- --format evm --mode multipass --compile-only
+
+      - name: Refresh timing budgets from CI data
+        run: |
+          python tools/update_compiler_pass_timing_budget.py \
+            --report /tmp/ci_timing_report.json \
+            --out /tmp/ci_budget_x86_cg_peephole.json \
+            --budget-in tests/evm_asm/compiler_pass_timing_budget_x86_cg_peephole.json \
+            --target-pass x86_cg_peephole \
+            --manifest tests/evm_asm/compiler_pass_timing_manifest.json \
+            --runs 5
+          python tools/update_compiler_pass_timing_budget.py \
+            --report /tmp/ci_timing_report.json \
+            --out /tmp/ci_budget_dmir_rewrite.json \
+            --budget-in tests/evm_asm/compiler_pass_timing_budget_dmir_rewrite.json \
+            --target-pass dmir_rewrite \
+            --manifest tests/evm_asm/compiler_pass_timing_manifest.json \
+            --runs 5
+
+      - name: Check timing budget (x86_cg_peephole)
+        run: |
+          python tools/check_compiler_pass_timing_budget.py \
+            --budget /tmp/ci_budget_x86_cg_peephole.json \
+            --report /tmp/ci_timing_report.json
+
+      - name: Check timing budget (dmir_rewrite)
+        run: |
+          python tools/check_compiler_pass_timing_budget.py \
+            --budget /tmp/ci_budget_dmir_rewrite.json \
+            --report /tmp/ci_timing_report.json
diff --git a/docs/changes/2026-03-30-peephole-optimization-system/README.md b/docs/changes/2026-03-30-peephole-optimization-system/README.md
new file mode 100644
index 000000000..44d2d6c3f
--- /dev/null
+++ b/docs/changes/2026-03-30-peephole-optimization-system/README.md
@@ -0,0 +1,70 @@
+# Change: Peephole Optimization System for dMIR and x86 CgIR
+
+- **Status**: Implemented
+- **Date**: 2026-03-30
+- **Tier**: Full
+
+## Overview
+
+A two-level peephole optimization system targeting both dMIR (mid-level IR) and x86 CgIR (code generation IR). The dMIR level has 65 accepted rewrite rules (plus 5 seed rules) covering identity elimination, boolean algebra, shift-zero, and carry-dead rewrites. The x86 CgIR level has 13 declarative JSON rules for self-moves, zero-shifts, redundant CMP/TEST, fallthrough branches, and setcc+test+jne chain folding. Includes Z3-verified synthesized rules and a CI validation gate.
+
+## Motivation
+
+The JIT compiler generated redundant instructions from mechanical U256 decomposition and lowering. Peephole optimization is a standard compiler technique to clean up such patterns without restructuring the pipeline. The two-level approach catches patterns at both the IR and machine code level.
+
+## Impact
+
+### Affected Modules
+
+- `docs/modules/compiler/` — new dMIR rewrite pass, carry-dead analysis, rule table infrastructure
+- `docs/modules/singlepass/` — x86 CgIR peephole pass
+- CI pipeline — new `peephole_validation_and_timing_budget` job
+
+### Affected Contracts
+
+No API or interface changes.
+
+### Compatibility
+
+- No breaking changes
+- +4.6% geomean improvement on evmone-bench (27 benchmarks)
+- Notable wins: snailtracer +3.9%, structarray_alloc +4.1%, swap_math +5.0-5.8%, memory_grow_mstore +11-13%
+- ~0.005ms p95 compile overhead from dMIR rewrite pass
+
+## Implementation Plan
+
+### Phase 1: dMIR Rewrite Infrastructure
+
+- [x] Pattern matching framework
+- [x] Rule table
+- [x] Validation tests
+
+### Phase 2: Carry-Dead Analysis
+
+- [x] `isCarryDead()` for adc→add and sbb→sub rewrites on dead-carry limbs
+
+### Phase 3: Z3-Synthesized Rules
+
+- [x] `add(x,x)→shl(x,1)`, negation folding, boolean identities
+- [x] Verified via `tools/synthesize_dmir_rules.py`
+
+### Phase 4: x86 CgIR Peephole
+
+- [x] 13 declarative JSON rules
+- [x] Pattern matching on machine instructions
+
+### Phase 5: CI Gate
+
+- [x] `.inc` freshness check
+- [x] Structural/execution/semantics validation
+- [x] Compile-time budget enforcement
+
+## Compatibility Notes
+
+No backwards-incompatible changes. The optimization passes are additive and do not alter any external APIs or module interfaces.
+
+## Risks
+
+- Rewrite rules must preserve U256 semantics exactly; all rules are Z3-verified but edge cases in carry chain analysis could theoretically miss a case
+- Compile-time budget (0.005ms p95) may need adjustment as more rules are added
+- JSON rule format for x86 CgIR is a new abstraction layer that adds maintenance surface
diff --git a/docs/compiler/dmir_to_x86_mapping.md b/docs/compiler/dmir_to_x86_mapping.md
new file mode 100644
index 000000000..3cf1703a4
--- /dev/null
+++ b/docs/compiler/dmir_to_x86_mapping.md
@@ -0,0 +1,86 @@
+<!--
+Copyright (C) 2025 the DTVM authors. All Rights Reserved.
+SPDX-License-Identifier: Apache-2.0
+-->
+
+# dMIR To CgIR/x86 Mapping
+
+## Scope
+
+This note records the lowering bridge for the dMIR arithmetic subset that the
+offline rewrite pipeline currently touches, plus the safe subset already wired
+into the production dMIR rewrite pass:
+
+- integer `add/sub`
+- `cmp`
+- `select`
+- `adc/sbb`
+- EVM 64x64->128 multiplication helpers
+- EVM 128/64 division helpers
+
+Phase 1 keeps the production DSL at `CgIR/x86`, so every dMIR-side candidate
+rule eventually has to be translated into the instruction families emitted by
+`X86CgLowering`.
+
+## Current Production Status
+
+`JITCompilerBase::compileMIRToCgIR()` now runs a tree-local `DMirRewritePass`
+after `dead_mbb_elim` and before x86 lowering. The pass currently applies only
+a conservative in-code subset of accepted rules whose replacements are either
+existing subtrees, typed integer constants, or small synthesized boolean
+expressions, for example:
+
+- `add/sub/or/xor/shift` identities with zero
+- `and` identities with zero or all-ones
+- `not(not x) => x`
+- `select(cond, x, x) => x`
+- complement folds such as `or((not x), x) => allones`
+- boolean factoring such as `xor((and x y), (xor x y)) => (or x y)`
+
+`adc` and `sbb` candidates remain validation-only: the explicit third operand
+is visible in dMIR, but rewriting them safely still requires carry/borrow-chain
+proof beyond the current structural pass.
+
+## Mapping Table
+
+| dMIR expression family | Lowering entrypoint | CgIR/x86 family | Bridge notes |
+| --- | --- | --- | --- |
+| `add`, `sub` | generic FastISel path in `CgLowering<X86CgLowering>` plus `X86GenFastISel.inc` (see `src/compiler/target/x86/x86lowering.h`) | `ADD*rr/ri`, `SUB*rr/ri` | This path is table-driven, not hand-written in `x86lowering.cpp`. The exact register/immediate form depends on operand materialization. |
+| `cmp` | `X86CgLowering::lowerCmpExpr()` in `src/compiler/target/x86/x86lowering.cpp` | compare op (`CMP*` or `TEST*`) + `SETCCr` + optional `MOVZX32rr8` | Integer compare results become 8-bit condition materialization first, then widen to i32/i64. This is the source-side pattern behind the existing `SETCCr/TEST8rr/JCC_1` peephole fold. |
+| `select` | `X86CgLowering::lowerSelectExpr()` in `src/compiler/target/x86/x86lowering.cpp` | integer: `CMOV*`; floating-point: conditional branch + `COPY` | Integer `select` survives as a recognizable dataflow choice. Floating-point `select` is lowered into control flow and loses the direct value-select shape. |
+| `adc` | `X86CgLowering::lowerAdcExpr()` in `src/compiler/target/x86/x86lowering.cpp` | `ADC8rr`, `ADC16rr`, `ADC32rr`, `ADC64rr` | The carry operand is not reified in x86 CgIR. Lowering asserts that operand 2 is the constant zero and then consumes the hardware `CF` chain directly. Any dMIR-side analysis that depends on the explicit third operand being zero must therefore happen before lowering. That alone does not justify rewriting `adc(lhs, rhs, 0)` into `add(lhs, rhs)` inside an EVM carry chain. |
+| `sbb` | `X86CgLowering::lowerSbbExpr()` in `src/compiler/target/x86/x86lowering.cpp` | `SBB8rr`, `SBB16rr`, `SBB32rr`, `SBB64rr` | Same information-loss caveat as `adc`: x86 CgIR only preserves the borrow-consuming instruction, not the explicit third operand from dMIR. The zero-borrow precondition can be checked only before lowering, but borrow-chain safety still has to be established separately. |
+| `evm_umul128_lo`, `evm_umul128_hi` | `X86CgLowering::lowerEvmUmul128Expr()` and `lowerEvmUmul128HiExpr()` in `src/compiler/target/x86/x86lowering.cpp` | `COPY -> RAX`, `MUL64r`, `COPY RAX`, optional `COPY RDX` | The low half is always materialized from `RAX`. The high half exists only when an `evm_umul128_hi` user is present; lowering pre-scans the function and allocates the extra copy lazily. |
+| `evm_udiv128_by64`, `evm_urem128_by64` | `X86CgLowering::lowerEvmUdiv128By64Expr()` and `lowerEvmUrem128By64Expr()` in `src/compiler/target/x86/x86lowering.cpp` | `COPY -> RDX`, `COPY -> RAX`, `DIV64r`, `COPY RAX`, `COPY RDX` | Quotient and remainder are split across `RAX` and `RDX`. As with `umul128`, the helper pair lowers to one x86 instruction plus explicit register copies. |
+
+## Translation Rules For The Current Seed Set
+
+The current seed dMIR candidate file lives at
+`src/compiler/mir/dmir_rewrite_rules.json`. For Phase 1 option A, these rules
+translate into x86-facing families as follows:
+
+| dMIR candidate | x86-facing shape after lowering | Recommended landing layer |
+| --- | --- | --- |
+| `(add x 0:i64) => x` | `ADD*rr/ri` with a zero operand | x86 DSL can represent this, but only after matching the exact zero-immediate form. |
+| `(not (not x)) => x` | `NOT*` pair | Either layer works; x86 DSL keeps it target-specific. |
+| `(select cond x x) => x` | integer `CMOV*` or FP branch diamond | Prefer dMIR for the generic rule. Lowering splits the integer and FP cases. |
+| `(adc x y 0:i64) => (add x y)` | `ADC*rr` consuming implicit `CF` | Only a dMIR-side candidate today. The explicit third operand disappears after lowering, so this precondition cannot be recovered at the x86 layer. A future promotion still needs carry-chain-specific safety proof. |
+| `(sbb x y 0:i64) => (sub x y)` | `SBB*rr` consuming implicit `CF` | Same reasoning as `adc`: the precondition is only visible in dMIR, but promotion still needs borrow-chain-specific safety proof. |
+
+## Why This Mapping Matters
+
+Two pieces of information are lost across lowering:
+
+- The explicit third operand of `adc/sbb`
+- The high-level `select(cmp(...), lhs, rhs)` shape once it turns into x86
+  condition codes plus `SETCCr`, `CMOV*`, or explicit branches
+
+That split is the main reason the current implementation keeps three parallel
+tracks:
+
+- a conservative production `DMirRewritePass` for tree-local structural folds
+- production peepholes at `CgIR/x86`
+- offline dMIR candidate rules plus interpreter-backed validation
+
+The bridge file above is the minimum subset needed to move rules between those
+tracks without rediscovering the source locations each time.
diff --git a/docs/compiler/x86_cg_peephole.md b/docs/compiler/x86_cg_peephole.md
new file mode 100644
index 000000000..7dec37a94
--- /dev/null
+++ b/docs/compiler/x86_cg_peephole.md
@@ -0,0 +1,165 @@
+<!--
+Copyright (C) 2025 the DTVM authors. All Rights Reserved.
+SPDX-License-Identifier: Apache-2.0
+-->
+
+# X86 Cg Peephole Foundation
+
+## Scope Decision
+
+Phase 1 keeps the declarative peephole framework at the existing `CgIR/x86`
+layer.
+
+- Rule matching still runs inside `X86CgPeephole`
+- Rules live in
+  `src/compiler/target/x86/x86_cg_peephole_rules.json`
+- The rule file is compiled into C++ at build time by
+  `tools/generate_x86_cg_peephole.py`
+
+This keeps the first migration aligned with the current optimization layer and
+avoids introducing a new dMIR pass before timing baselines exist.
+
+## Rule DSL
+
+Each rule is a JSON object with these fields:
+
+- `name`: stable identifier used in reports and tests
+- `stage`: `instruction` or `block_end`
+- `priority`: higher priority rules are emitted first
+- `pattern`: ordered instruction match window
+- `when`: optional block-level side conditions
+- `action`: deterministic rewrite steps
+
+Supported `pattern` matchers:
+
+- `predicate`: call a `CgInstruction` predicate such as `isCompare`
+- `opcode`: match a single x86 opcode
+- `opcode_any`: match one opcode from a fixed set
+- `capture`: bind an operand field for later reuse
+- `require`: constrain operand fields to captures, enums, or booleans
+
+Supported operand fields:
+
+- `reg`
+- `imm`
+- `is_mbb`
+
+Supported `when` conditions:
+
+- `target_is_next_block`
+
+Supported `action` steps:
+
+- `erase`
+- `set_imm`
+
+Each rule also carries validation metadata:
+
+- `validation.modes`: declared validation styles for the rule
+- `validation.coverage`: concrete test coverage entries
+
+`tools/check_x86_cg_peephole_validation.py` rejects rule files that add rewrites
+without validation metadata. When given `--gtest-binary`, it also verifies that
+each coverage entry names a real gtest case.
+
+The generated matcher is linear in the number of emitted rules. There is no
+runtime search, SMT solving, or e-graph exploration in the JIT path.
+
+Validation coverage can be exported as a machine-readable report:
+
+```bash
+python3 tools/report_x86_cg_peephole_validation.py \
+  --rules src/compiler/target/x86/x86_cg_peephole_rules.json \
+  --gtest-binary ./build-peephole/x86CgPeepholeTests \
+  --out /tmp/x86-cg-peephole-validation.json
+```
+
+The report summarizes:
+
+- rule count
+- per-stage rule counts
+- per-mode validation counts
+- per-rule coverage completeness against the current gtest binary
+
+## Conflict Checks
+
+The generator emits a rule report and rejects rules that share the same
+normalized pattern and priority. The report is generated at build time:
+
+- `build/.../generated/target/x86/x86_cg_peephole_report.txt`
+
+## Compiler Pass Timing Baseline
+
+Compiler-pass timing is written when
+`DTVM_COMPILER_PASS_TIMING_JSON=/path/to/file.json` is present.
+
+Recommended baseline workflow:
+
+```bash
+python3 tools/collect_compiler_pass_timings.py \
+  --dtvm ./build-peephole/dtvm \
+  --manifest tests/evm_asm/compiler_pass_timing_manifest.json \
+  --runs 5 \
+  --output /tmp/dtvm-pass-timing.json \
+  -- --format evm -m multipass --compile-only \
+     --num-extra-compilations 4 --evm-revision cancun
+```
+
+`--compile-only` avoids execution-side noise and keeps the benchmark focused on
+module loading and JIT compilation.
+
+The aggregated JSON includes:
+
+- per-case total compile time
+- per-pass timing statistics
+- `p95` pass-time and pass-share data for budget checks
+- per-pass share of total compile time
+- manifest-level aggregate summary
+
+Rule operand indices may count from the end of the explicit operand list when
+negative. For example, `-1` refers to the last explicit operand, which is
+useful for two-address x86 opcodes whose immediate operand is not at a fixed
+absolute index once implicit operands such as `EFLAGS` are present.
+
+Budget validation workflow:
+
+```bash
+python3 tools/check_compiler_pass_timing_budget.py \
+  --budget tests/evm_asm/compiler_pass_timing_budget_x86_cg_peephole.json \
+  --report /tmp/dtvm-pass-timing.json
+```
+
+Budget refresh workflow:
+
+```bash
+python3 tools/update_compiler_pass_timing_budget.py \
+  --report /tmp/dtvm-pass-timing.json \
+  --budget-in tests/evm_asm/compiler_pass_timing_budget_x86_cg_peephole.json \
+  --out tests/evm_asm/compiler_pass_timing_budget_x86_cg_peephole.json \
+  --rules src/compiler/target/x86/x86_cg_peephole_rules.json \
+  --runs 5 \
+  --num-extra-compilations 4
+```
+
+Phase 1 uses these outputs to set the peephole budget thresholds:
+
+- max share of function compile time
+- max pass wall time
+- CI regression threshold
+- linear growth check against rule count
+
+`tests/evm_asm/compiler_pass_timing_budget_x86_cg_peephole.json` is an initial local baseline.
+It should be recalibrated on the target CI runner before enforcing tighter
+regression gates.
+
+## Rule Validation
+
+Current validation coverage is split into two layers:
+
+- structural rewrite tests in `src/tests/x86_cg_peephole_tests.cpp`
+- semantics fuzzing for compare/setcc folding in the same test target
+
+The first execution-backed harness is now in place for the
+`cmp/setcc/test/jne -> cmp/jcc` rewrite. It executes both the original and
+rewritten x86 sequences with inline assembly across edge cases and randomized
+inputs, then compares the observed branch result.
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 5d73d028c..7fb34f4e8 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -102,6 +102,7 @@ if(ZEN_ENABLE_SINGLEPASS_JIT)
 endif()
 
 if(ZEN_ENABLE_MULTIPASS_JIT)
+  find_package(Python3 REQUIRED COMPONENTS Interpreter)
   find_package(LLVM 15 REQUIRED CONFIG)
   message(STATUS "Found LLVM ${LLVM_PACKAGE_VERSION}")
   message(STATUS "Using LLVMConfig.cmake in: ${LLVM_DIR}")
diff --git a/src/cli/dtvm.cpp b/src/cli/dtvm.cpp
index d241187ab..4795f1c57 100644
--- a/src/cli/dtvm.cpp
+++ b/src/cli/dtvm.cpp
@@ -109,12 +109,10 @@ static evmc_message createEvmMessage(evmc::MockedHost &Host,
   return Msg;
 }
 
-static bool runEVMBenchmark(const std::string &Filename,
-                            uint32_t NumExtraCompilations,
-                            uint32_t NumExtraExecutions, Runtime *RT,
-                            EVMModule *Mod, const EVMMessageConfig &MsgConfig,
-                            evmc::MockedHost &Host) {
-  if (NumExtraCompilations + NumExtraExecutions == 0) {
+static bool runEVMCompilationBenchmark(const std::string &Filename,
+                                       uint32_t NumExtraCompilations,
+                                       Runtime *RT) {
+  if (NumExtraCompilations == 0) {
     return true;
   }
 
@@ -132,6 +130,24 @@ static bool runEVMBenchmark(const std::string &Filename,
     RT->unloadEVMModule(*TestModRet);
   }
 
+  return true;
+}
+
+static bool runEVMExecutionBenchmark(const std::string &Filename,
+                                     uint32_t NumExtraExecutions, Runtime *RT,
+                                     EVMModule *Mod,
+                                     const EVMMessageConfig &MsgConfig,
+                                     evmc::MockedHost &Host) {
+  if (NumExtraExecutions == 0) {
+    return true;
+  }
+
+  std::vector<uint8_t> Bytecode;
+  if (!zen::utils::readBinaryFile(Filename, Bytecode)) {
+    SIMPLE_LOG_ERROR("failed to read EVM bytecode file %s", Filename.c_str());
+    return false;
+  }
+
   for (uint32_t I = 0; I < NumExtraExecutions; ++I) {
     IsolationUniquePtr TestIso = RT->createUnmanagedIsolation();
     ZEN_ASSERT(TestIso);
@@ -177,6 +193,7 @@ int main(int argc, char *argv[]) {
   uint32_t NumExtraExecutions = 0;
   RuntimeConfig Config;
   bool EnableBenchmark = false;
+  bool CompileOnly = false;
   bool DeployMode = false;
   std::string ContractAddress;
   std::string SenderAddress = "1000000000000000000000000000000000000000";
@@ -281,6 +298,8 @@ int main(int argc, char *argv[]) {
 #endif // ZEN_ENABLE_MULTIPASS_JIT
 #ifdef ZEN_ENABLE_EVM
     CLIParser->add_option("--calldata", Calldata, "Calldata hex pass to EVM");
+    CLIParser->add_flag("--compile-only", CompileOnly,
+                        "Compile EVM bytecode without creating an instance");
     CLIParser
         ->add_option("--evm-revision", EvmRevision,
                      "EVM revision (e.g., cancun, osaka)")
@@ -299,6 +318,11 @@ int main(int argc, char *argv[]) {
     return exitMain(EXIT_FAILURE);
   }
 
+  if (CompileOnly && Config.Format != InputFormat::EVM) {
+    SIMPLE_LOG_ERROR("--compile-only is only supported with --format evm");
+    return exitMain(EXIT_FAILURE);
+  }
+
   /// ================ EVM mode ================
 #ifdef ZEN_ENABLE_EVM
   if (Config.Format == InputFormat::EVM) {
@@ -338,6 +362,26 @@ int main(int argc, char *argv[]) {
     }
     EVMModule *Mod = *ModRet;
 
+    if (CompileOnly) {
+      if (NumExtraExecutions != 0) {
+        SIMPLE_LOG_ERROR(
+            "--num-extra-executions is not supported with --compile-only");
+        return exitMain(EXIT_FAILURE, RT.get());
+      }
+
+      if (!runEVMCompilationBenchmark(Filename, NumExtraCompilations,
+                                      RT.get())) {
+        return exitMain(EXIT_FAILURE, RT.get());
+      }
+
+      if (!RT->unloadEVMModule(Mod)) {
+        ZEN_LOG_ERROR("failed to unload EVM module");
+        return exitMain(EXIT_FAILURE, RT.get());
+      }
+
+      return exitMain(EXIT_SUCCESS, RT.get());
+    }
+
     Isolation *Iso = RT->createManagedIsolation();
     if (!Iso) {
       ZEN_LOG_ERROR("failed to create EVM isolation");
@@ -427,9 +471,12 @@ int main(int argc, char *argv[]) {
     }
 
     /// ======= EVM Extra compilations and executions for benchmarking =======
-    if (!runEVMBenchmark(Filename, NumExtraCompilations, NumExtraExecutions,
-                         RT.get(), Mod, MsgConfig,
-                         *static_cast<evmc::MockedHost *>(Host.get()))) {
+    if (!runEVMCompilationBenchmark(Filename, NumExtraCompilations, RT.get())) {
+      return exitMain(EXIT_FAILURE, RT.get());
+    }
+    if (!runEVMExecutionBenchmark(
+            Filename, NumExtraExecutions, RT.get(), Mod, MsgConfig,
+            *static_cast<evmc::MockedHost *>(Host.get()))) {
       return exitMain(EXIT_FAILURE, RT.get());
     }
 
diff --git a/src/compiler/CMakeLists.txt b/src/compiler/CMakeLists.txt
index 74f604ae4..5a6e5b3f1 100644
--- a/src/compiler/CMakeLists.txt
+++ b/src/compiler/CMakeLists.txt
@@ -32,6 +32,7 @@ endif()
 set(COMPILER_SRCS
     compiler.cpp
     context.cpp
+    common/pass_timing.cpp
     common/llvm_workaround.cpp
     frontend/parser.cpp
     frontend/lexer.cpp
@@ -94,6 +95,34 @@ set(COMPILER_SRCS
     cgir/pass/llvm_utils.cpp
 )
 
+set(X86_PEEPHOLE_RULES
+    ${CMAKE_CURRENT_SOURCE_DIR}/target/x86/x86_cg_peephole_rules.json
+)
+set(X86_PEEPHOLE_GENERATED_DIR ${CMAKE_CURRENT_BINARY_DIR}/generated/target/x86)
+set(X86_PEEPHOLE_GENERATED_INC
+    ${X86_PEEPHOLE_GENERATED_DIR}/x86_cg_peephole_generated.inc
+)
+set(X86_PEEPHOLE_REPORT
+    ${X86_PEEPHOLE_GENERATED_DIR}/x86_cg_peephole_report.txt
+)
+
+add_custom_command(
+  OUTPUT ${X86_PEEPHOLE_GENERATED_INC} ${X86_PEEPHOLE_REPORT}
+  COMMAND ${CMAKE_COMMAND} -E make_directory ${X86_PEEPHOLE_GENERATED_DIR}
+  COMMAND
+    ${Python3_EXECUTABLE} ${CMAKE_SOURCE_DIR}/tools/generate_x86_cg_peephole.py
+    --rules ${X86_PEEPHOLE_RULES} --out-inc ${X86_PEEPHOLE_GENERATED_INC}
+    --out-report ${X86_PEEPHOLE_REPORT}
+  DEPENDS ${X86_PEEPHOLE_RULES}
+          ${CMAKE_SOURCE_DIR}/tools/generate_x86_cg_peephole.py
+  VERBATIM
+)
+
+add_custom_target(
+  generateX86CgPeephole DEPENDS ${X86_PEEPHOLE_GENERATED_INC}
+                                ${X86_PEEPHOLE_REPORT}
+)
+
 if(ZEN_ENABLE_EVM)
   list(APPEND COMPILER_SRCS evm_compiler.cpp evm_frontend/evm_imported.cpp
        evm_frontend/evm_mir_compiler.cpp
@@ -111,6 +140,10 @@ set_property(
 )
 
 add_library(compiler STATIC ${COMPILER_SRCS} $<TARGET_OBJECTS:utils>)
+add_dependencies(compiler generateX86CgPeephole)
+target_include_directories(
+  compiler PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/generated
+)
 target_link_libraries(compiler PRIVATE ${llvm_libs})
 if(ZEN_ENABLE_EVM)
   target_link_libraries(compiler PUBLIC evmc::instructions)
diff --git a/src/compiler/cgir/lowering.h b/src/compiler/cgir/lowering.h
index 5ec152dbb..6927551a6 100644
--- a/src/compiler/cgir/lowering.h
+++ b/src/compiler/cgir/lowering.h
@@ -202,6 +202,22 @@ template <typename T> class CgLowering {
       ResultReg = SELF.lowerEvmU256MulResultExpr(
           llvm::cast<EvmU256MulResultInstruction>(Inst));
       break;
+    case MInstruction::EVM_U256_ADD:
+      ResultReg =
+          SELF.lowerEvmU256AddExpr(llvm::cast<EvmU256AddInstruction>(Inst));
+      break;
+    case MInstruction::EVM_U256_ADD_RESULT:
+      ResultReg = SELF.lowerEvmU256AddResultExpr(
+          llvm::cast<EvmU256AddResultInstruction>(Inst));
+      break;
+    case MInstruction::EVM_U256_SUB:
+      ResultReg =
+          SELF.lowerEvmU256SubExpr(llvm::cast<EvmU256SubInstruction>(Inst));
+      break;
+    case MInstruction::EVM_U256_SUB_RESULT:
+      ResultReg = SELF.lowerEvmU256SubResultExpr(
+          llvm::cast<EvmU256SubResultInstruction>(Inst));
+      break;
     case MInstruction::EVM_UDIV128_BY64:
       ResultReg = SELF.lowerEvmUdiv128By64Expr(
           llvm::cast<EvmUdiv128By64Instruction>(Inst));
diff --git a/src/compiler/cgir/pass/peephole.h b/src/compiler/cgir/pass/peephole.h
index 04492b6f3..cbedc056b 100644
--- a/src/compiler/cgir/pass/peephole.h
+++ b/src/compiler/cgir/pass/peephole.h
@@ -15,13 +15,18 @@ template <typename T> class CgPeephole : public NonCopyable {
 public:
   CgPeephole(CgFunction &MF) : MF(MF) {
     for (auto *MBB : MF) {
-      SELF.peepholeOptimizeBB(*MBB);
       for (CgBasicBlock::iterator MII = MBB->begin(), MIE = MBB->end();
            MII != MIE;) {
-        // may change MII
-        SELF.peepholeOptimize(*MBB, MII);
-        MII++;
+        // When the matcher erases the current instruction, it must advance
+        // MII itself and return true to avoid incrementing an invalid iterator.
+        if (!SELF.peepholeOptimize(*MBB, MII)) {
+          MII++;
+        }
       }
+      // Block-end rewrites (e.g. remove-fallthrough-jcc) erase terminators
+      // that instruction-level rules (e.g. fold-setcc-test-jne-to-jcc) need
+      // as part of a longer match window. Run instruction-level pass first.
+      SELF.peepholeOptimizeBB(*MBB);
     }
   }
 
diff --git a/src/compiler/common/pass_timing.cpp b/src/compiler/common/pass_timing.cpp
new file mode 100644
index 000000000..06d8d521f
--- /dev/null
+++ b/src/compiler/common/pass_timing.cpp
@@ -0,0 +1,157 @@
+// Copyright (C) 2025 the DTVM authors. All Rights Reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+#include "compiler/common/pass_timing.h"
+
+#include <cstdio>
+#include <cstdlib>
+#include <fstream>
+#include <iomanip>
+
+namespace COMPILER {
+
+namespace {
+
+constexpr const char *COMPILER_PASS_TIMING_PATH_ENV =
+    "DTVM_COMPILER_PASS_TIMING_JSON";
+
+double durationToMs(std::chrono::steady_clock::duration Duration) {
+  return std::chrono::duration<double, std::milli>(Duration).count();
+}
+
+} // namespace
+
+CompilerPassTimingSink &CompilerPassTimingSink::get() {
+  static CompilerPassTimingSink Sink;
+  return Sink;
+}
+
+CompilerPassTimingSink::CompilerPassTimingSink()
+    : Enabled(std::getenv(COMPILER_PASS_TIMING_PATH_ENV) != nullptr),
+      OutputPath(Enabled ? std::getenv(COMPILER_PASS_TIMING_PATH_ENV) : "") {}
+
+void CompilerPassTimingSink::appendRecord(CompilerPassTimingRecord Record) {
+  if (!Enabled) {
+    return;
+  }
+
+  std::lock_guard<std::mutex> Lock(Mutex);
+  Records.emplace_back(std::move(Record));
+}
+
+CompilerPassTimingSink::~CompilerPassTimingSink() {
+  if (!Enabled || Records.empty()) {
+    return;
+  }
+  std::lock_guard<std::mutex> Lock(Mutex);
+  writeReportLocked();
+}
+
+void CompilerPassTimingSink::writeReportLocked() const {
+  const std::string TempPath = OutputPath + ".tmp";
+  std::ofstream Out(TempPath, std::ios::out | std::ios::trunc);
+  if (!Out.is_open()) {
+    return;
+  }
+
+  Out << std::fixed << std::setprecision(6);
+  Out << "{\n  \"records\": [\n";
+  for (size_t RecordIdx = 0; RecordIdx < Records.size(); ++RecordIdx) {
+    const auto &Record = Records[RecordIdx];
+    Out << "    {\n";
+    Out << "      \"pipeline\": \"" << escapeJson(Record.Pipeline) << "\",\n";
+    Out << "      \"func_idx\": " << Record.FuncIdx << ",\n";
+    Out << "      \"total_time_ms\": " << Record.TotalTimeMs << ",\n";
+    Out << "      \"phases\": [\n";
+    for (size_t EntryIdx = 0; EntryIdx < Record.Entries.size(); ++EntryIdx) {
+      const auto &Entry = Record.Entries[EntryIdx];
+      Out << "        {\"name\": \"" << escapeJson(Entry.Name)
+          << "\", \"time_ms\": " << Entry.TimeMs << "}";
+      if (EntryIdx + 1 != Record.Entries.size()) {
+        Out << ",";
+      }
+      Out << "\n";
+    }
+    Out << "      ]\n";
+    Out << "    }";
+    if (RecordIdx + 1 != Records.size()) {
+      Out << ",";
+    }
+    Out << "\n";
+  }
+  Out << "  ]\n}\n";
+  Out.close();
+
+  std::rename(TempPath.c_str(), OutputPath.c_str());
+}
+
+std::string CompilerPassTimingSink::escapeJson(const std::string &Value) {
+  std::string Escaped;
+  Escaped.reserve(Value.size());
+  for (char Ch : Value) {
+    switch (Ch) {
+    case '\\':
+      Escaped += "\\\\";
+      break;
+    case '"':
+      Escaped += "\\\"";
+      break;
+    case '\n':
+      Escaped += "\\n";
+      break;
+    case '\r':
+      Escaped += "\\r";
+      break;
+    case '\t':
+      Escaped += "\\t";
+      break;
+    default:
+      Escaped += Ch;
+      break;
+    }
+  }
+  return Escaped;
+}
+
+CompilerPassTimingSession::CompilerPassTimingSession(std::string PipelineName,
+                                                     uint32_t FuncIdx)
+    : Enabled(CompilerPassTimingSink::get().isEnabled()),
+      StartTime(std::chrono::steady_clock::now()),
+      Record{std::move(PipelineName), FuncIdx, {}, 0.0} {}
+
+void CompilerPassTimingSession::addEntry(std::string Name, double TimeMs) {
+  if (!Enabled) {
+    return;
+  }
+
+  Record.Entries.push_back({std::move(Name), TimeMs});
+}
+
+void CompilerPassTimingSession::flush() {
+  if (!Enabled) {
+    return;
+  }
+
+  Record.TotalTimeMs =
+      durationToMs(std::chrono::steady_clock::now() - StartTime);
+  CompilerPassTimingSink::get().appendRecord(std::move(Record));
+  Record = {};
+}
+
+ScopedCompilerPassTimer::ScopedCompilerPassTimer(
+    CompilerPassTimingSession *Session, const char *Name)
+    : Session(Session), Name(Name),
+      StartTime(Session && Session->isEnabled()
+                    ? std::chrono::steady_clock::now()
+                    : std::chrono::steady_clock::time_point{}) {}
+
+ScopedCompilerPassTimer::~ScopedCompilerPassTimer() {
+  if (!Session || !Session->isEnabled()) {
+    return;
+  }
+
+  Session->addEntry(Name,
+                    durationToMs(std::chrono::steady_clock::now() - StartTime));
+}
+
+} // namespace COMPILER
diff --git a/src/compiler/common/pass_timing.h b/src/compiler/common/pass_timing.h
new file mode 100644
index 000000000..5cf766d6d
--- /dev/null
+++ b/src/compiler/common/pass_timing.h
@@ -0,0 +1,78 @@
+// Copyright (C) 2025 the DTVM authors. All Rights Reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+#ifndef ZEN_COMPILER_COMMON_PASS_TIMING_H
+#define ZEN_COMPILER_COMMON_PASS_TIMING_H
+
+#include "compiler/common/common_defs.h"
+
+#include <chrono>
+#include <mutex>
+#include <string>
+#include <vector>
+
+namespace COMPILER {
+
+struct CompilerPassTimingEntry {
+  std::string Name;
+  double TimeMs = 0.0;
+};
+
+struct CompilerPassTimingRecord {
+  std::string Pipeline;
+  uint32_t FuncIdx = 0;
+  std::vector<CompilerPassTimingEntry> Entries;
+  double TotalTimeMs = 0.0;
+};
+
+class CompilerPassTimingSink final : public NonCopyable {
+public:
+  static CompilerPassTimingSink &get();
+
+  bool isEnabled() const { return Enabled; }
+
+  void appendRecord(CompilerPassTimingRecord Record);
+
+private:
+  CompilerPassTimingSink();
+  ~CompilerPassTimingSink();
+
+  void writeReportLocked() const;
+  static std::string escapeJson(const std::string &Value);
+
+  const bool Enabled = false;
+  const std::string OutputPath;
+  mutable std::mutex Mutex;
+  std::vector<CompilerPassTimingRecord> Records;
+};
+
+class CompilerPassTimingSession final : public NonCopyable {
+public:
+  CompilerPassTimingSession(std::string PipelineName, uint32_t FuncIdx);
+
+  bool isEnabled() const { return Enabled; }
+
+  void addEntry(std::string Name, double TimeMs);
+  void flush();
+
+private:
+  const bool Enabled = false;
+  const std::chrono::steady_clock::time_point StartTime;
+  CompilerPassTimingRecord Record;
+};
+
+class ScopedCompilerPassTimer final : public NonCopyable {
+public:
+  ScopedCompilerPassTimer(CompilerPassTimingSession *Session, const char *Name);
+
+  ~ScopedCompilerPassTimer();
+
+private:
+  CompilerPassTimingSession *Session = nullptr;
+  const char *Name = nullptr;
+  std::chrono::steady_clock::time_point StartTime;
+};
+
+} // namespace COMPILER
+
+#endif // ZEN_COMPILER_COMMON_PASS_TIMING_H
diff --git a/src/compiler/compiler.cpp b/src/compiler/compiler.cpp
index a45ba3c2f..5f942d61d 100644
--- a/src/compiler/compiler.cpp
+++ b/src/compiler/compiler.cpp
@@ -12,11 +12,13 @@
 #include "compiler/cgir/pass/reg_alloc_basic.h"
 #include "compiler/cgir/pass/reg_alloc_greedy.h"
 #include "compiler/cgir/pass/register_coalescer.h"
+#include "compiler/common/pass_timing.h"
 #include "compiler/context.h"
 #include "compiler/frontend/parser.h"
 #include "compiler/mir/function.h"
 #include "compiler/mir/module.h"
 #include "compiler/mir/pass/dead_basicblock_elim.h"
+#include "compiler/mir/pass/dmir_rewrite.h"
 #include "compiler/mir/pass/verifier.h"
 #include "compiler/target/x86/x86_cg_peephole.h"
 #include "compiler/target/x86/x86_mc_lowering.h"
@@ -55,27 +57,45 @@ static inline bool isFuncNeedGreedyRA(uint32_t FuncIdx) {
 #endif // ZEN_ENABLE_DEBUG_GREEDY_RA
 
 void JITCompilerBase::compileMIRToCgIR(MModule &MMod, MFunction &MFunc,
-                                       CgFunction &CgFunc,
-                                       bool DisableGreedyRA) {
+                                       CgFunction &CgFunc, bool DisableGreedyRA,
+                                       CompilerPassTimingSession *PassTiming) {
 #ifdef ZEN_ENABLE_MULTIPASS_JIT_LOGGING
   llvm::DebugFlag = true;
   llvm::dbgs() << "\n########## MIR Dump ##########\n\n";
   MFunc.dump();
 #endif
 
-  MVerifier Verifier(MMod, MFunc, llvm::errs());
-  if (!Verifier.verify()) {
-    throw getError(ErrorCode::MIRVerifyingFailed);
+  {
+    ScopedCompilerPassTimer Timer(PassTiming, "verify_mir");
+    MVerifier Verifier(MMod, MFunc, llvm::errs());
+    if (!Verifier.verify()) {
+      throw getError(ErrorCode::MIRVerifyingFailed);
+    }
+  }
+
+  {
+    ScopedCompilerPassTimer Timer(PassTiming, "dead_mbb_elim");
+    DeadMBasicBlockElim MBBDCE;
+    MBBDCE.runOnMFunction(MFunc);
   }
 
-  DeadMBasicBlockElim MBBDCE;
-  MBBDCE.runOnMFunction(MFunc);
+  {
+    ScopedCompilerPassTimer Timer(PassTiming, "dmir_rewrite");
+    DMirRewritePass RewritePass;
+    RewritePass.runOnMFunction(MFunc);
+  }
 
   CgFunction &MF = CgFunc;
 
-  // TODO: refactor to pass
-  X86CgLowering CgLowering(MF);
-  X86CgPeephole CgPeephole(MF);
+  {
+    ScopedCompilerPassTimer Timer(PassTiming, "x86_cg_lowering");
+    // TODO: refactor to pass
+    X86CgLowering CgLowering(MF);
+  }
+  {
+    ScopedCompilerPassTimer Timer(PassTiming, "x86_cg_peephole");
+    X86CgPeephole CgPeephole(MF);
+  }
   CgPhiElimination PhiElimination;
   PhiElimination.runOnCgFunction(MF);
 
@@ -83,8 +103,10 @@ void JITCompilerBase::compileMIRToCgIR(MModule &MMod, MFunction &MFunc,
 
   if (DisableGreedyRA) {
     ZEN_LOG_DEBUG("using fast ra for function %d", MFuncIdx);
+    ScopedCompilerPassTimer Timer(PassTiming, "fast_ra");
     FastRA RA(MF);
   } else {
+    ScopedCompilerPassTimer Timer(PassTiming, "greedy_ra");
 #ifdef ZEN_ENABLE_DEBUG_GREEDY_RA
     if (!isFuncNeedGreedyRA(MFuncIdx)) {
       ZEN_LOG_DEBUG("using fast ra for function %d", MFuncIdx);
@@ -123,16 +145,22 @@ void JITCompilerBase::compileMIRToCgIR(MModule &MMod, MFunction &MFunc,
   MF.dump();
 #endif
 
-  PrologEpilogInserter PEInserter;
-  PEInserter.runOnCgFunction(MF);
+  {
+    ScopedCompilerPassTimer Timer(PassTiming, "prolog_epilog_inserter");
+    PrologEpilogInserter PEInserter;
+    PEInserter.runOnCgFunction(MF);
+  }
 #ifdef ZEN_ENABLE_MULTIPASS_JIT_LOGGING
   llvm::dbgs() << "\n########## CgIR Dump After Prologue/Epilogue Insertion "
                   "##########\n\n";
   MF.dump();
 #endif
 
-  ExpandPostRAPseudos PseudosExpander;
-  PseudosExpander.runOnCgFunction(MF);
+  {
+    ScopedCompilerPassTimer Timer(PassTiming, "expand_post_ra_pseudos");
+    ExpandPostRAPseudos PseudosExpander;
+    PseudosExpander.runOnCgFunction(MF);
+  }
 #ifdef ZEN_ENABLE_MULTIPASS_JIT_LOGGING
   llvm::dbgs() << "\n########## CgIR Dump After Post-RA Pseudo "
                   "Instruction Expansion "
diff --git a/src/compiler/compiler.h b/src/compiler/compiler.h
index 2fb0da0a1..9be84811d 100644
--- a/src/compiler/compiler.h
+++ b/src/compiler/compiler.h
@@ -10,6 +10,7 @@
 namespace COMPILER {
 
 class CompileContext;
+class CompilerPassTimingSession;
 class WasmFrontendContext;
 class MModule;
 class MFunction;
@@ -20,7 +21,8 @@ class JITCompilerBase : public NonCopyable {
   virtual ~JITCompilerBase() = default;
 
   static void compileMIRToCgIR(MModule &Mod, MFunction &MFunc,
-                               CgFunction &CgFunc, bool DisableGreedyRA);
+                               CgFunction &CgFunc, bool DisableGreedyRA,
+                               CompilerPassTimingSession *PassTiming = nullptr);
   static void emitObjectBuffer(CompileContext *Ctx);
 };
 
diff --git a/src/compiler/evm_compiler.cpp b/src/compiler/evm_compiler.cpp
index 04d45ad60..eaf1ea846 100644
--- a/src/compiler/evm_compiler.cpp
+++ b/src/compiler/evm_compiler.cpp
@@ -4,6 +4,7 @@
 #include "compiler/evm_compiler.h"
 #include "common/thread_pool.h"
 #include "compiler/cgir/cg_function.h"
+#include "compiler/common/pass_timing.h"
 #include "compiler/mir/module.h"
 #include "compiler/target/x86/x86_mc_lowering.h"
 #include "platform/map.h"
@@ -27,7 +28,8 @@ const size_t MPROTECT_CHUNK_SIZE = 0x1000;
 namespace COMPILER {
 
 void EVMJITCompiler::compileEVMToMC(EVMFrontendContext &Ctx, MModule &Mod,
-                                    uint32_t FuncIdx, bool DisableGreedyRA) {
+                                    uint32_t FuncIdx, bool DisableGreedyRA,
+                                    CompilerPassTimingSession *PassTiming) {
   if (Ctx.Inited) {
     // Release all memory allocated by previous function compilation
     Ctx.MemPool = CompileMemPool();
@@ -43,16 +45,22 @@ void EVMJITCompiler::compileEVMToMC(EVMFrontendContext &Ctx, MModule &Mod,
   CgFunction CgFunc(Ctx, MFunc);
   MFunc.setFunctionType(Mod.getFuncType(FuncIdx));
   EVMMirBuilder MIRBuilder(Ctx, MFunc);
-  MIRBuilder.compile(&Ctx);
+  {
+    ScopedCompilerPassTimer Timer(PassTiming, "evm_mir_build");
+    MIRBuilder.compile(&Ctx);
+  }
 #ifdef ZEN_ENABLE_MULTIPASS_JIT_LOGGING
   MIRBuilder.dumpMemoryCompileStats();
 #endif // ZEN_ENABLE_MULTIPASS_JIT_LOGGING
 
   // Apply MIR optimizations and generate machine code
-  compileMIRToCgIR(Mod, MFunc, CgFunc, DisableGreedyRA);
+  compileMIRToCgIR(Mod, MFunc, CgFunc, DisableGreedyRA, PassTiming);
 
   // Generate machine code
-  Ctx.getMCLowering().runOnCgFunction(CgFunc);
+  {
+    ScopedCompilerPassTimer Timer(PassTiming, "x86_mc_lowering");
+    Ctx.getMCLowering().runOnCgFunction(CgFunc);
+  }
 }
 
 void EagerEVMJITCompiler::compile() {
@@ -85,10 +93,15 @@ void EagerEVMJITCompiler::compile() {
 
   auto &CodeMPool = EVMMod->getJITCodeMemPool();
   uint8_t *JITCode = const_cast<uint8_t *>(CodeMPool.getMemStart());
+  CompilerPassTimingSession PassTiming("evm", 0);
 
   // EVM has only 1 function, use direct single-threaded compilation
-  compileEVMToMC(Ctx, Mod, 0, Config.DisableMultipassGreedyRA);
-  emitObjectBuffer(&Ctx);
+  compileEVMToMC(Ctx, Mod, 0, Config.DisableMultipassGreedyRA, &PassTiming);
+  {
+    ScopedCompilerPassTimer Timer(&PassTiming, "emit_object_buffer");
+    emitObjectBuffer(&Ctx);
+  }
+  PassTiming.flush();
   ZEN_ASSERT(Ctx.ExternRelocs.empty());
 
   uint8_t *JITFuncPtr = Ctx.CodePtr + Ctx.FuncOffsetMap[0];
diff --git a/src/compiler/evm_compiler.h b/src/compiler/evm_compiler.h
index 0dac7b84d..998add412 100644
--- a/src/compiler/evm_compiler.h
+++ b/src/compiler/evm_compiler.h
@@ -10,6 +10,8 @@
 
 namespace COMPILER {
 
+class CompilerPassTimingSession;
+
 class EVMJITCompiler : public JITCompilerBase {
 protected:
   EVMJITCompiler(runtime::EVMModule *EVMMod)
@@ -19,7 +21,8 @@ class EVMJITCompiler : public JITCompilerBase {
   ~EVMJITCompiler() override = default;
 
   void compileEVMToMC(EVMFrontendContext &Ctx, MModule &Mod, uint32_t FuncIdx,
-                      bool DisableGreedyRA);
+                      bool DisableGreedyRA,
+                      CompilerPassTimingSession *PassTiming = nullptr);
 
   runtime::EVMModule *EVMMod;
   const runtime::RuntimeConfig &Config;
diff --git a/src/compiler/evm_frontend/evm_mir_compiler.cpp b/src/compiler/evm_frontend/evm_mir_compiler.cpp
index fa748f4c3..ac4043091 100644
--- a/src/compiler/evm_frontend/evm_mir_compiler.cpp
+++ b/src/compiler/evm_frontend/evm_mir_compiler.cpp
@@ -1782,10 +1782,13 @@ typename EVMMirBuilder::Operand EVMMirBuilder::handleMul(Operand MultiplicandOp,
                                 MInstruction *Term) -> SumCarryPair {
       MInstruction *NewSum = createInstruction<BinaryInstruction>(
           false, OP_add, I64Type, Sum, Term);
-      MInstruction *NewCarry =
-          createInstruction<AdcInstruction>(false, I64Type, Carry, Zero, Zero);
-      return {protectUnsafeValue(NewSum, I64Type),
-              protectUnsafeValue(NewCarry, I64Type)};
+      // NewCarry captures the carry-out of ADD(Sum, Term). Operand 2 points
+      // to NewSum (the carry-producing instruction) to make the dependency
+      // explicit for analysis passes. x86 lowering uses hardware CF.
+      MInstruction *ProtectedSum = protectUnsafeValue(NewSum, I64Type);
+      MInstruction *NewCarry = createInstruction<AdcInstruction>(
+          false, I64Type, Carry, Zero, ProtectedSum);
+      return {ProtectedSum, protectUnsafeValue(NewCarry, I64Type)};
     };
 
     auto addTermNoCarry = [&](MInstruction *Sum, MInstruction *Term) {
@@ -2558,31 +2561,23 @@ EVMMirBuilder::handleAddU64Const(const Operand &FullOp,
   U256Inst LHS = extractU256Operand(FullOp);
   MType *MirI64Type =
       EVMFrontendContext::getMIRTypeFromEVMType(EVMType::UINT64);
-  MInstruction *Carry = createIntConstInstruction(MirI64Type, 0);
 
   MInstruction *RHS0 =
       createIntConstInstruction(MirI64Type, U64ConstOp.getConstValue()[0]);
   MInstruction *RHSZero = createIntConstInstruction(MirI64Type, 0);
 
-  // Pre-materialize LHS operands for carry chain safety
-  for (size_t I = 0; I < EVM_ELEMENTS_COUNT; ++I) {
-    LHS[I] = protectUnsafeValue(LHS[I], MirI64Type);
-  }
-  RHS0 = protectUnsafeValue(RHS0, MirI64Type);
-  MInstruction *ProtectedZero = protectUnsafeValue(RHSZero, MirI64Type);
-
-  U256Inst Result = {};
-  // Limb 0: ADD with the actual u64 value
-  Result[0] = protectUnsafeValue(createInstruction<BinaryInstruction>(
-                                     false, OP_add, MirI64Type, LHS[0], RHS0),
-                                 MirI64Type);
-  // Limbs 1-3: ADC with shared zero (carry propagation only)
-  for (size_t I = 1; I < EVM_ELEMENTS_COUNT; ++I) {
-    Result[I] =
-        protectUnsafeValue(createInstruction<AdcInstruction>(
-                               false, MirI64Type, LHS[I], ProtectedZero, Carry),
-                           MirI64Type);
-  }
+  MInstruction *AddInst = createInstruction<EvmU256AddInstruction>(
+      false, MirI64Type, LHS[0], LHS[1], LHS[2], LHS[3], RHS0, RHSZero, RHSZero,
+      RHSZero);
+  U256Inst Result = {
+      AddInst,
+      createInstruction<EvmU256AddResultInstruction>(false, MirI64Type, AddInst,
+                                                     1),
+      createInstruction<EvmU256AddResultInstruction>(false, MirI64Type, AddInst,
+                                                     2),
+      createInstruction<EvmU256AddResultInstruction>(false, MirI64Type, AddInst,
+                                                     3),
+  };
   return Operand(Result, EVMType::UINT256);
 }
 
@@ -3844,6 +3839,11 @@ EVMMirBuilder::handleMLoad(Operand AddrComponents) {
   return Result;
 }
 
+// The old ordering hack (ValueDep = or(parts) & 0) was needed to prevent
+// flag-clobbering interleaving when add/adc and sub/sbb chains were emitted
+// as separate instructions. With the introduction of EvmU256Add/Sub pseudo-ops,
+// the carry/borrow chain is atomic and cannot be interleaved, making the hack
+// unnecessary.
 void EVMMirBuilder::handleMStore(Operand AddrComponents,
                                  Operand ValueComponents) {
 #ifdef ZEN_ENABLE_EVM_GAS_REGISTER
@@ -3878,19 +3878,6 @@ void EVMMirBuilder::handleMStore(Operand AddrComponents,
     MInstruction *SizeConst = createIntConstInstruction(I64Type, 32);
     MInstruction *RequiredSize = createInstruction<BinaryInstruction>(
         false, OP_add, I64Type, Offset, SizeConst);
-    // Tie expansion ordering to the stored value to prevent reordering on the
-    // fallback path that still emits a per-op expand sequence.
-    MInstruction *Zero = createIntConstInstruction(I64Type, 0);
-    MInstruction *ValueDep = createInstruction<BinaryInstruction>(
-        false, OP_or, I64Type, ValueParts[0], ValueParts[1]);
-    ValueDep = createInstruction<BinaryInstruction>(false, OP_or, I64Type,
-                                                    ValueDep, ValueParts[2]);
-    ValueDep = createInstruction<BinaryInstruction>(false, OP_or, I64Type,
-                                                    ValueDep, ValueParts[3]);
-    ValueDep = createInstruction<BinaryInstruction>(false, OP_and, I64Type,
-                                                    ValueDep, Zero);
-    RequiredSize = createInstruction<BinaryInstruction>(false, OP_add, I64Type,
-                                                        RequiredSize, ValueDep);
     MInstruction *Overflow = createInstruction<CmpInstruction>(
         false, CmpInstruction::Predicate::ICMP_ULT, I64Type, RequiredSize,
         Offset);
diff --git a/src/compiler/evm_frontend/evm_mir_compiler.h b/src/compiler/evm_frontend/evm_mir_compiler.h
index 34b88c1e9..d16630c6a 100644
--- a/src/compiler/evm_frontend/evm_mir_compiler.h
+++ b/src/compiler/evm_frontend/evm_mir_compiler.h
@@ -356,56 +356,31 @@ class EVMMirBuilder final {
         EVMFrontendContext::getMIRTypeFromEVMType(EVMType::UINT64);
 
     if constexpr (Operator == BinaryOperator::BO_ADD) {
-      MInstruction *Carry = createIntConstInstruction(MirI64Type, 0);
-
-      // Pre-materialize all operand components into variables before the
-      // ADD/ADC carry chain to prevent flag-clobbering during x86 lowering.
-      for (size_t I = 0; I < EVM_ELEMENTS_COUNT; ++I) {
-        LHS[I] = protectUnsafeValue(LHS[I], MirI64Type);
-        RHS[I] = protectUnsafeValue(RHS[I], MirI64Type);
-      }
-
-      for (size_t I = 0; I < EVM_ELEMENTS_COUNT; ++I) {
-        if (I == 0) {
-          MInstruction *LocalResult = createInstruction<BinaryInstruction>(
-              false, OP_add, MirI64Type, LHS[I], RHS[I]);
-          Result[I] = protectUnsafeValue(LocalResult, MirI64Type);
-        } else {
-          MInstruction *LocalResult = createInstruction<AdcInstruction>(
-              false, MirI64Type, LHS[I], RHS[I], Carry);
-          Result[I] = protectUnsafeValue(LocalResult, MirI64Type);
-        }
-      }
+      MInstruction *AddInst = createInstruction<EvmU256AddInstruction>(
+          false, MirI64Type, LHS[0], LHS[1], LHS[2], LHS[3], RHS[0], RHS[1],
+          RHS[2], RHS[3]);
+      Result = {
+          AddInst,
+          createInstruction<EvmU256AddResultInstruction>(false, MirI64Type,
+                                                         AddInst, 1),
+          createInstruction<EvmU256AddResultInstruction>(false, MirI64Type,
+                                                         AddInst, 2),
+          createInstruction<EvmU256AddResultInstruction>(false, MirI64Type,
+                                                         AddInst, 3),
+      };
     } else if constexpr (Operator == BinaryOperator::BO_SUB) {
-      // The borrow here is only used for constructing the sbb instruction.
-      // We currently use sbb only in bo_sub, and since we can guarantee the
-      // instructions are consecutive, there's no need to compute the borrow
-      // in DMIR.
-      MInstruction *Borrow = createIntConstInstruction(MirI64Type, 0);
-
-      // Pre-materialize all operand components into variables before the
-      // SUB/SBB borrow chain. This ensures that during x86 lowering, no
-      // flag-modifying instructions (e.g. ADD for address computation in
-      // BYTES32-to-U256 conversion) are emitted between the SUB and SBB
-      // instructions that form the borrow chain. Without this, lazy
-      // expression lowering of operands like BSWAP(LOAD(ADD(ptr, offset)))
-      // would emit x86 ADD instructions that clobber the carry flag (CF).
-      for (size_t I = 0; I < EVM_ELEMENTS_COUNT; ++I) {
-        LHS[I] = protectUnsafeValue(LHS[I], MirI64Type);
-        RHS[I] = protectUnsafeValue(RHS[I], MirI64Type);
-      }
-
-      for (size_t I = 0; I < EVM_ELEMENTS_COUNT; ++I) {
-        if (I == 0) {
-          MInstruction *LocalResult = createInstruction<BinaryInstruction>(
-              false, OP_sub, MirI64Type, LHS[I], RHS[I]);
-          Result[I] = protectUnsafeValue(LocalResult, MirI64Type);
-        } else {
-          MInstruction *LocalResult = createInstruction<SbbInstruction>(
-              false, MirI64Type, LHS[I], RHS[I], Borrow);
-          Result[I] = protectUnsafeValue(LocalResult, MirI64Type);
-        }
-      }
+      MInstruction *SubInst = createInstruction<EvmU256SubInstruction>(
+          false, MirI64Type, LHS[0], LHS[1], LHS[2], LHS[3], RHS[0], RHS[1],
+          RHS[2], RHS[3]);
+      Result = {
+          SubInst,
+          createInstruction<EvmU256SubResultInstruction>(false, MirI64Type,
+                                                         SubInst, 1),
+          createInstruction<EvmU256SubResultInstruction>(false, MirI64Type,
+                                                         SubInst, 2),
+          createInstruction<EvmU256SubResultInstruction>(false, MirI64Type,
+                                                         SubInst, 3),
+      };
     } else {
       ZEN_ASSERT_TODO();
     }
diff --git a/src/compiler/mir/dmir_rewrite_mining_bootstrap.json b/src/compiler/mir/dmir_rewrite_mining_bootstrap.json
new file mode 100644
index 000000000..d9bc9a7cb
--- /dev/null
+++ b/src/compiler/mir/dmir_rewrite_mining_bootstrap.json
@@ -0,0 +1,165 @@
+{
+  "base_terms": [
+    "x",
+    "y",
+    "cond",
+    "0:i64",
+    "1:i64",
+    "18446744073709551615:i64"
+  ],
+  "unary_not_terms": [
+    "x",
+    "y",
+    "cond"
+  ],
+  "double_not_terms": [
+    "x",
+    "y",
+    "cond"
+  ],
+  "binary_fixed_rhs": [
+    {
+      "ops": [
+        "add",
+        "sub",
+        "and",
+        "or",
+        "xor",
+        "shl",
+        "sshr",
+        "ushr"
+      ],
+      "lhs": [
+        "x",
+        "y",
+        "cond"
+      ],
+      "rhs": "0:i64"
+    },
+    {
+      "ops": [
+        "and",
+        "or",
+        "xor"
+      ],
+      "lhs": [
+        "x",
+        "y",
+        "cond",
+        "(not x)",
+        "(not y)"
+      ],
+      "rhs": "18446744073709551615:i64"
+    },
+    {
+      "ops": [
+        "mul"
+      ],
+      "lhs": [
+        "x",
+        "y"
+      ],
+      "rhs": "0:i64"
+    },
+    {
+      "ops": [
+        "mul"
+      ],
+      "lhs": [
+        "x",
+        "y"
+      ],
+      "rhs": "1:i64"
+    }
+  ],
+  "binary_self": [
+    {
+      "ops": [
+        "and",
+        "mul",
+        "or",
+        "xor"
+      ],
+      "terms": [
+        "x",
+        "y",
+        "cond"
+      ]
+    }
+  ],
+  "select_same_arm": {
+    "conditions": [
+      "cond",
+      "x",
+      "0:i64",
+      "1:i64"
+    ],
+    "values": [
+      "x",
+      "y",
+      "(not x)"
+    ]
+  },
+  "pair_binary_groups": [
+    {
+      "ops": [
+        "add",
+        "sub",
+        "and",
+        "or",
+        "xor"
+      ],
+      "lhs": [
+        "x",
+        "y"
+      ],
+      "rhs": [
+        "x",
+        "y",
+        "0:i64"
+      ]
+    },
+    {
+      "ops": [
+        "and",
+        "or",
+        "xor"
+      ],
+      "lhs": [
+        "x",
+        "y",
+        "(and x y)",
+        "(or x y)",
+        "(xor x y)",
+        "(not x)",
+        "(not y)"
+      ],
+      "rhs": [
+        "x",
+        "y",
+        "0:i64",
+        "(and x y)",
+        "(or x y)",
+        "(xor x y)",
+        "(not x)",
+        "(not y)"
+      ]
+    }
+  ],
+  "adc_sbb_zero": {
+    "ops": [
+      "adc",
+      "sbb"
+    ],
+    "lhs": [
+      "x",
+      "y"
+    ],
+    "rhs": [
+      "x",
+      "y",
+      "0:i64"
+    ],
+    "carry": "0:i64"
+  }
+}
diff --git a/src/compiler/mir/dmir_rewrite_rules.json b/src/compiler/mir/dmir_rewrite_rules.json
new file mode 100644
index 000000000..4eae7e563
--- /dev/null
+++ b/src/compiler/mir/dmir_rewrite_rules.json
@@ -0,0 +1,2641 @@
+{
+  "version": 1,
+  "rules": [
+    {
+      "name": "add-zero",
+      "status": "accepted",
+      "inputs": [
+        "x"
+      ],
+      "lhs": "(add x 0:i64)",
+      "rhs": "x",
+      "cost": {
+        "lhs": {
+          "dmir_inst": 1,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        },
+        "rhs": {
+          "dmir_inst": 0,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        },
+        "delta": {
+          "dmir_inst": -1,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        }
+      },
+      "validation": {
+        "modes": [
+          "interpreter_fuzz"
+        ],
+        "coverage": [
+          "DMirValidation.FuzzesAddZeroRewrite"
+        ]
+      }
+    },
+    {
+      "name": "double-not",
+      "status": "accepted",
+      "inputs": [
+        "x"
+      ],
+      "lhs": "(not (not x))",
+      "rhs": "x",
+      "cost": {
+        "lhs": {
+          "dmir_inst": 2,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        },
+        "rhs": {
+          "dmir_inst": 0,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        },
+        "delta": {
+          "dmir_inst": -2,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        }
+      },
+      "validation": {
+        "modes": [
+          "interpreter_fuzz"
+        ],
+        "coverage": [
+          "DMirValidation.FuzzesDoubleNotRewrite"
+        ]
+      }
+    },
+    {
+      "name": "sub-zero",
+      "status": "accepted",
+      "inputs": [
+        "x"
+      ],
+      "lhs": "(sub x 0:i64)",
+      "rhs": "x",
+      "cost": {
+        "lhs": {
+          "dmir_inst": 1,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        },
+        "rhs": {
+          "dmir_inst": 0,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        },
+        "delta": {
+          "dmir_inst": -1,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        }
+      },
+      "validation": {
+        "modes": [
+          "interpreter_fuzz"
+        ],
+        "coverage": [
+          "DMirValidation.FuzzesSubZeroRewrite"
+        ]
+      }
+    },
+    {
+      "name": "and-zero",
+      "status": "accepted",
+      "inputs": [
+        "x"
+      ],
+      "lhs": "(and x 0:i64)",
+      "rhs": "0:i64",
+      "cost": {
+        "lhs": {
+          "dmir_inst": 1,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        },
+        "rhs": {
+          "dmir_inst": 0,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        },
+        "delta": {
+          "dmir_inst": -1,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        }
+      },
+      "validation": {
+        "modes": [
+          "interpreter_fuzz"
+        ],
+        "coverage": [
+          "DMirValidation.FuzzesAndZeroRewrite"
+        ]
+      }
+    },
+    {
+      "name": "and-allones",
+      "status": "accepted",
+      "inputs": [
+        "x"
+      ],
+      "lhs": "(and x 18446744073709551615:i64)",
+      "rhs": "x",
+      "cost": {
+        "lhs": {
+          "dmir_inst": 1,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        },
+        "rhs": {
+          "dmir_inst": 0,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        },
+        "delta": {
+          "dmir_inst": -1,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        }
+      },
+      "validation": {
+        "modes": [
+          "interpreter_fuzz"
+        ],
+        "coverage": [
+          "DMirValidation.FuzzesAndAllOnesRewrite"
+        ]
+      }
+    },
+    {
+      "name": "and-self",
+      "status": "accepted",
+      "inputs": [
+        "x"
+      ],
+      "lhs": "(and x x)",
+      "rhs": "x",
+      "cost": {
+        "lhs": {
+          "dmir_inst": 1,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        },
+        "rhs": {
+          "dmir_inst": 0,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        },
+        "delta": {
+          "dmir_inst": -1,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        }
+      },
+      "validation": {
+        "modes": [
+          "interpreter_fuzz"
+        ],
+        "coverage": [
+          "DMirValidation.FuzzesAndSelfRewrite"
+        ]
+      }
+    },
+    {
+      "name": "and-not-self",
+      "status": "accepted",
+      "inputs": [
+        "x"
+      ],
+      "lhs": "(and (not x) x)",
+      "rhs": "0:i64",
+      "cost": {
+        "lhs": {
+          "dmir_inst": 2,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        },
+        "rhs": {
+          "dmir_inst": 0,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        },
+        "delta": {
+          "dmir_inst": -2,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        }
+      },
+      "validation": {
+        "modes": [
+          "interpreter_fuzz"
+        ],
+        "coverage": [
+          "DMirValidation.FuzzesAndNotSelfRewrite"
+        ]
+      }
+    },
+    {
+      "name": "or-zero",
+      "status": "accepted",
+      "inputs": [
+        "x"
+      ],
+      "lhs": "(or x 0:i64)",
+      "rhs": "x",
+      "cost": {
+        "lhs": {
+          "dmir_inst": 1,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        },
+        "rhs": {
+          "dmir_inst": 0,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        },
+        "delta": {
+          "dmir_inst": -1,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        }
+      },
+      "validation": {
+        "modes": [
+          "interpreter_fuzz"
+        ],
+        "coverage": [
+          "DMirValidation.FuzzesOrZeroRewrite"
+        ]
+      }
+    },
+    {
+      "name": "or-allones",
+      "status": "accepted",
+      "inputs": [
+        "x"
+      ],
+      "lhs": "(or x 18446744073709551615:i64)",
+      "rhs": "18446744073709551615:i64",
+      "cost": {
+        "lhs": {
+          "dmir_inst": 1,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        },
+        "rhs": {
+          "dmir_inst": 0,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        },
+        "delta": {
+          "dmir_inst": -1,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        }
+      },
+      "validation": {
+        "modes": [
+          "interpreter_fuzz"
+        ],
+        "coverage": [
+          "DMirValidation.FuzzesOrAllOnesRewrite"
+        ]
+      }
+    },
+    {
+      "name": "or-self",
+      "status": "accepted",
+      "inputs": [
+        "x"
+      ],
+      "lhs": "(or x x)",
+      "rhs": "x",
+      "cost": {
+        "lhs": {
+          "dmir_inst": 1,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        },
+        "rhs": {
+          "dmir_inst": 0,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        },
+        "delta": {
+          "dmir_inst": -1,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        }
+      },
+      "validation": {
+        "modes": [
+          "interpreter_fuzz"
+        ],
+        "coverage": [
+          "DMirValidation.FuzzesOrSelfRewrite"
+        ]
+      }
+    },
+    {
+      "name": "and-absorb-or",
+      "status": "accepted",
+      "inputs": [
+        "x",
+        "y"
+      ],
+      "lhs": "(and (or x y) x)",
+      "rhs": "x",
+      "cost": {
+        "lhs": {
+          "dmir_inst": 2,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        },
+        "rhs": {
+          "dmir_inst": 0,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        },
+        "delta": {
+          "dmir_inst": -2,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        }
+      },
+      "validation": {
+        "modes": [
+          "interpreter_fuzz"
+        ],
+        "coverage": [
+          "DMirValidation.FuzzesAndAbsorbOrRewrite"
+        ]
+      }
+    },
+    {
+      "name": "and-factor-not-self",
+      "status": "accepted",
+      "inputs": [
+        "x",
+        "y"
+      ],
+      "lhs": "(and (and x y) (not x))",
+      "rhs": "0:i64",
+      "cost": {
+        "lhs": {
+          "dmir_inst": 3,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        },
+        "rhs": {
+          "dmir_inst": 0,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        },
+        "delta": {
+          "dmir_inst": -3,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        }
+      },
+      "validation": {
+        "modes": [
+          "interpreter_fuzz"
+        ],
+        "coverage": [
+          "DMirValidation.FuzzesAndFactorNotSelfRewrite"
+        ]
+      }
+    },
+    {
+      "name": "and-factor-or",
+      "status": "accepted",
+      "inputs": [
+        "x",
+        "y"
+      ],
+      "lhs": "(and (and x y) (or x y))",
+      "rhs": "(and x y)",
+      "cost": {
+        "lhs": {
+          "dmir_inst": 3,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        },
+        "rhs": {
+          "dmir_inst": 1,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        },
+        "delta": {
+          "dmir_inst": -2,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        }
+      },
+      "validation": {
+        "modes": [
+          "interpreter_fuzz"
+        ],
+        "coverage": [
+          "DMirValidation.FuzzesAndFactorOrRewrite"
+        ]
+      }
+    },
+    {
+      "name": "and-factor-lhs",
+      "status": "accepted",
+      "inputs": [
+        "x",
+        "y"
+      ],
+      "lhs": "(and (and x y) x)",
+      "rhs": "(and x y)",
+      "cost": {
+        "lhs": {
+          "dmir_inst": 2,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        },
+        "rhs": {
+          "dmir_inst": 1,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        },
+        "delta": {
+          "dmir_inst": -1,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        }
+      },
+      "validation": {
+        "modes": [
+          "interpreter_fuzz"
+        ],
+        "coverage": [
+          "DMirValidation.FuzzesAndFactorLhsRewrite"
+        ]
+      }
+    },
+    {
+      "name": "and-factor-rhs",
+      "status": "accepted",
+      "inputs": [
+        "x",
+        "y"
+      ],
+      "lhs": "(and (and x y) y)",
+      "rhs": "(and x y)",
+      "cost": {
+        "lhs": {
+          "dmir_inst": 2,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        },
+        "rhs": {
+          "dmir_inst": 1,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        },
+        "delta": {
+          "dmir_inst": -1,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        }
+      },
+      "validation": {
+        "modes": [
+          "interpreter_fuzz"
+        ],
+        "coverage": [
+          "DMirValidation.FuzzesAndFactorRhsRewrite"
+        ]
+      }
+    },
+    {
+      "name": "and-factor-not-rhs",
+      "status": "accepted",
+      "inputs": [
+        "x",
+        "y"
+      ],
+      "lhs": "(and (and x y) (not y))",
+      "rhs": "0:i64",
+      "cost": {
+        "lhs": {
+          "dmir_inst": 3,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        },
+        "rhs": {
+          "dmir_inst": 0,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        },
+        "delta": {
+          "dmir_inst": -3,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        }
+      },
+      "validation": {
+        "modes": [
+          "interpreter_fuzz"
+        ],
+        "coverage": [
+          "DMirValidation.FuzzesAndFactorNotRhsRewrite"
+        ]
+      }
+    },
+    {
+      "name": "and-and-xor-zero",
+      "status": "accepted",
+      "inputs": [
+        "x",
+        "y"
+      ],
+      "lhs": "(and (and x y) (xor x y))",
+      "rhs": "0:i64",
+      "cost": {
+        "lhs": {
+          "dmir_inst": 3,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        },
+        "rhs": {
+          "dmir_inst": 0,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        },
+        "delta": {
+          "dmir_inst": -3,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        }
+      },
+      "validation": {
+        "modes": [
+          "interpreter_fuzz"
+        ],
+        "coverage": [
+          "DMirValidation.FuzzesAndAndXorZeroRewrite"
+        ]
+      }
+    },
+    {
+      "name": "and-not-or",
+      "status": "accepted",
+      "inputs": [
+        "x",
+        "y"
+      ],
+      "lhs": "(and (not x) (or x y))",
+      "rhs": "(and (not x) y)",
+      "cost": {
+        "lhs": {
+          "dmir_inst": 3,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        },
+        "rhs": {
+          "dmir_inst": 2,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        },
+        "delta": {
+          "dmir_inst": -1,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        }
+      },
+      "validation": {
+        "modes": [
+          "interpreter_fuzz"
+        ],
+        "coverage": [
+          "DMirValidation.FuzzesAndNotOrRewrite"
+        ]
+      }
+    },
+    {
+      "name": "and-not-xor",
+      "status": "accepted",
+      "inputs": [
+        "x",
+        "y"
+      ],
+      "lhs": "(and (not x) (xor x y))",
+      "rhs": "(and (not x) y)",
+      "cost": {
+        "lhs": {
+          "dmir_inst": 3,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        },
+        "rhs": {
+          "dmir_inst": 2,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        },
+        "delta": {
+          "dmir_inst": -1,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        }
+      },
+      "validation": {
+        "modes": [
+          "interpreter_fuzz"
+        ],
+        "coverage": [
+          "DMirValidation.FuzzesAndNotXorRewrite"
+        ]
+      }
+    },
+    {
+      "name": "and-or-xor",
+      "status": "accepted",
+      "inputs": [
+        "x",
+        "y"
+      ],
+      "lhs": "(and (or x y) (xor x y))",
+      "rhs": "(xor x y)",
+      "cost": {
+        "lhs": {
+          "dmir_inst": 3,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        },
+        "rhs": {
+          "dmir_inst": 1,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        },
+        "delta": {
+          "dmir_inst": -2,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        }
+      },
+      "validation": {
+        "modes": [
+          "interpreter_fuzz"
+        ],
+        "coverage": [
+          "DMirValidation.FuzzesAndOrXorRewrite"
+        ]
+      }
+    },
+    {
+      "name": "and-or-rhs",
+      "status": "accepted",
+      "inputs": [
+        "x",
+        "y"
+      ],
+      "lhs": "(and (or x y) y)",
+      "rhs": "y",
+      "cost": {
+        "lhs": {
+          "dmir_inst": 2,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        },
+        "rhs": {
+          "dmir_inst": 0,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        },
+        "delta": {
+          "dmir_inst": -2,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        }
+      },
+      "validation": {
+        "modes": [
+          "interpreter_fuzz"
+        ],
+        "coverage": [
+          "DMirValidation.FuzzesAndOrRhsRewrite"
+        ]
+      }
+    },
+    {
+      "name": "or-absorb-and",
+      "status": "accepted",
+      "inputs": [
+        "x",
+        "y"
+      ],
+      "lhs": "(or (and x y) x)",
+      "rhs": "x",
+      "cost": {
+        "lhs": {
+          "dmir_inst": 2,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        },
+        "rhs": {
+          "dmir_inst": 0,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        },
+        "delta": {
+          "dmir_inst": -2,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        }
+      },
+      "validation": {
+        "modes": [
+          "interpreter_fuzz"
+        ],
+        "coverage": [
+          "DMirValidation.FuzzesOrAbsorbAndRewrite"
+        ]
+      }
+    },
+    {
+      "name": "or-and-or",
+      "status": "accepted",
+      "inputs": [
+        "x",
+        "y"
+      ],
+      "lhs": "(or (and x y) (or x y))",
+      "rhs": "(or x y)",
+      "cost": {
+        "lhs": {
+          "dmir_inst": 3,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        },
+        "rhs": {
+          "dmir_inst": 1,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        },
+        "delta": {
+          "dmir_inst": -2,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        }
+      },
+      "validation": {
+        "modes": [
+          "interpreter_fuzz"
+        ],
+        "coverage": [
+          "DMirValidation.FuzzesOrAndOrRewrite"
+        ]
+      }
+    },
+    {
+      "name": "or-and-rhs",
+      "status": "accepted",
+      "inputs": [
+        "x",
+        "y"
+      ],
+      "lhs": "(or (and x y) y)",
+      "rhs": "y",
+      "cost": {
+        "lhs": {
+          "dmir_inst": 2,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        },
+        "rhs": {
+          "dmir_inst": 0,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        },
+        "delta": {
+          "dmir_inst": -2,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        }
+      },
+      "validation": {
+        "modes": [
+          "interpreter_fuzz"
+        ],
+        "coverage": [
+          "DMirValidation.FuzzesOrAndRhsRewrite"
+        ]
+      }
+    },
+    {
+      "name": "or-and-xor",
+      "status": "accepted",
+      "inputs": [
+        "x",
+        "y"
+      ],
+      "lhs": "(or (and x y) (xor x y))",
+      "rhs": "(or x y)",
+      "cost": {
+        "lhs": {
+          "dmir_inst": 3,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        },
+        "rhs": {
+          "dmir_inst": 1,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        },
+        "delta": {
+          "dmir_inst": -2,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        }
+      },
+      "validation": {
+        "modes": [
+          "interpreter_fuzz"
+        ],
+        "coverage": [
+          "DMirValidation.FuzzesOrAndXorRewrite"
+        ]
+      }
+    },
+    {
+      "name": "or-factor-lhs",
+      "status": "accepted",
+      "inputs": [
+        "x",
+        "y"
+      ],
+      "lhs": "(or (or x y) x)",
+      "rhs": "(or x y)",
+      "cost": {
+        "lhs": {
+          "dmir_inst": 2,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        },
+        "rhs": {
+          "dmir_inst": 1,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        },
+        "delta": {
+          "dmir_inst": -1,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        }
+      },
+      "validation": {
+        "modes": [
+          "interpreter_fuzz"
+        ],
+        "coverage": [
+          "DMirValidation.FuzzesOrFactorLhsRewrite"
+        ]
+      }
+    },
+    {
+      "name": "or-factor-rhs",
+      "status": "accepted",
+      "inputs": [
+        "x",
+        "y"
+      ],
+      "lhs": "(or (or x y) y)",
+      "rhs": "(or x y)",
+      "cost": {
+        "lhs": {
+          "dmir_inst": 2,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        },
+        "rhs": {
+          "dmir_inst": 1,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        },
+        "delta": {
+          "dmir_inst": -1,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        }
+      },
+      "validation": {
+        "modes": [
+          "interpreter_fuzz"
+        ],
+        "coverage": [
+          "DMirValidation.FuzzesOrFactorRhsRewrite"
+        ]
+      }
+    },
+    {
+      "name": "or-xor-lhs",
+      "status": "accepted",
+      "inputs": [
+        "x",
+        "y"
+      ],
+      "lhs": "(or (xor x y) x)",
+      "rhs": "(or x y)",
+      "cost": {
+        "lhs": {
+          "dmir_inst": 2,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        },
+        "rhs": {
+          "dmir_inst": 1,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        },
+        "delta": {
+          "dmir_inst": -1,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        }
+      },
+      "validation": {
+        "modes": [
+          "interpreter_fuzz"
+        ],
+        "coverage": [
+          "DMirValidation.FuzzesOrXorLhsRewrite"
+        ]
+      }
+    },
+    {
+      "name": "or-xor-rhs",
+      "status": "accepted",
+      "inputs": [
+        "x",
+        "y"
+      ],
+      "lhs": "(or (xor x y) y)",
+      "rhs": "(or x y)",
+      "cost": {
+        "lhs": {
+          "dmir_inst": 2,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        },
+        "rhs": {
+          "dmir_inst": 1,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        },
+        "delta": {
+          "dmir_inst": -1,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        }
+      },
+      "validation": {
+        "modes": [
+          "interpreter_fuzz"
+        ],
+        "coverage": [
+          "DMirValidation.FuzzesOrXorRhsRewrite"
+        ]
+      }
+    },
+    {
+      "name": "or-not-self",
+      "status": "accepted",
+      "inputs": [
+        "x"
+      ],
+      "lhs": "(or (not x) x)",
+      "rhs": "18446744073709551615:i64",
+      "cost": {
+        "lhs": {
+          "dmir_inst": 2,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        },
+        "rhs": {
+          "dmir_inst": 0,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        },
+        "delta": {
+          "dmir_inst": -2,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        }
+      },
+      "validation": {
+        "modes": [
+          "interpreter_fuzz"
+        ],
+        "coverage": [
+          "DMirValidation.FuzzesOrNotSelfRewrite"
+        ]
+      }
+    },
+    {
+      "name": "or-and-not-lhs",
+      "status": "accepted",
+      "inputs": [
+        "x",
+        "y"
+      ],
+      "lhs": "(or (and x y) (not x))",
+      "rhs": "(or (not x) y)",
+      "cost": {
+        "lhs": {
+          "dmir_inst": 3,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        },
+        "rhs": {
+          "dmir_inst": 2,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        },
+        "delta": {
+          "dmir_inst": -1,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        }
+      },
+      "validation": {
+        "modes": [
+          "interpreter_fuzz"
+        ],
+        "coverage": [
+          "DMirValidation.FuzzesOrAndNotLhsRewrite"
+        ]
+      }
+    },
+    {
+      "name": "or-and-not-rhs",
+      "status": "accepted",
+      "inputs": [
+        "x",
+        "y"
+      ],
+      "lhs": "(or (and x y) (not y))",
+      "rhs": "(or (not y) x)",
+      "cost": {
+        "lhs": {
+          "dmir_inst": 3,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        },
+        "rhs": {
+          "dmir_inst": 2,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        },
+        "delta": {
+          "dmir_inst": -1,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        }
+      },
+      "validation": {
+        "modes": [
+          "interpreter_fuzz"
+        ],
+        "coverage": [
+          "DMirValidation.FuzzesOrAndNotRhsRewrite"
+        ]
+      }
+    },
+    {
+      "name": "or-or-xor",
+      "status": "accepted",
+      "inputs": [
+        "x",
+        "y"
+      ],
+      "lhs": "(or (or x y) (xor x y))",
+      "rhs": "(or x y)",
+      "cost": {
+        "lhs": {
+          "dmir_inst": 3,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        },
+        "rhs": {
+          "dmir_inst": 1,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        },
+        "delta": {
+          "dmir_inst": -2,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        }
+      },
+      "validation": {
+        "modes": [
+          "interpreter_fuzz"
+        ],
+        "coverage": [
+          "DMirValidation.FuzzesOrOrXorRewrite"
+        ]
+      }
+    },
+    {
+      "name": "or-not-or",
+      "status": "accepted",
+      "inputs": [
+        "x",
+        "y"
+      ],
+      "lhs": "(or (not x) (or x y))",
+      "rhs": "18446744073709551615:i64",
+      "cost": {
+        "lhs": {
+          "dmir_inst": 3,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        },
+        "rhs": {
+          "dmir_inst": 0,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        },
+        "delta": {
+          "dmir_inst": -3,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        }
+      },
+      "validation": {
+        "modes": [
+          "interpreter_fuzz"
+        ],
+        "coverage": [
+          "DMirValidation.FuzzesOrNotOrRewrite"
+        ]
+      }
+    },
+    {
+      "name": "select-same-arm",
+      "status": "accepted",
+      "inputs": [
+        "cond",
+        "x"
+      ],
+      "lhs": "(select cond x x)",
+      "rhs": "x",
+      "cost": {
+        "lhs": {
+          "dmir_inst": 1,
+          "select_depth": 1,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        },
+        "rhs": {
+          "dmir_inst": 0,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        },
+        "delta": {
+          "dmir_inst": -1,
+          "select_depth": -1,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        }
+      },
+      "validation": {
+        "modes": [
+          "interpreter_fuzz"
+        ],
+        "coverage": [
+          "DMirValidation.FuzzesSelectSameArmRewrite",
+          "DMirValidation.FuzzesSelectSameArmRewriteI8",
+          "DMirValidation.FuzzesSelectSameArmRewriteI32"
+        ]
+      }
+    },
+    {
+      "name": "select-false-cond",
+      "status": "accepted",
+      "inputs": [
+        "t",
+        "f"
+      ],
+      "lhs": "(select 0:i64 t f)",
+      "rhs": "f",
+      "cost": {
+        "lhs": {
+          "dmir_inst": 1,
+          "select_depth": 1,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        },
+        "rhs": {
+          "dmir_inst": 0,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        },
+        "delta": {
+          "dmir_inst": -1,
+          "select_depth": -1,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        }
+      },
+      "validation": {
+        "modes": [
+          "interpreter_fuzz"
+        ],
+        "coverage": [
+          "DMirValidation.FuzzesSelectFalseCondRewrite"
+        ]
+      }
+    },
+    {
+      "name": "select-true-cond",
+      "status": "accepted",
+      "inputs": [
+        "t",
+        "f"
+      ],
+      "lhs": "(select 1:i64 t f)",
+      "rhs": "t",
+      "cost": {
+        "lhs": {
+          "dmir_inst": 1,
+          "select_depth": 1,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        },
+        "rhs": {
+          "dmir_inst": 0,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        },
+        "delta": {
+          "dmir_inst": -1,
+          "select_depth": -1,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        }
+      },
+      "validation": {
+        "modes": [
+          "interpreter_fuzz"
+        ],
+        "coverage": [
+          "DMirValidation.FuzzesSelectTrueCondRewrite"
+        ]
+      }
+    },
+    {
+      "name": "mul-pow2-to-shl",
+      "status": "accepted",
+      "inputs": [
+        "x"
+      ],
+      "lhs": "(mul x 2:i64)",
+      "rhs": "(shl x 1:i64)",
+      "cost": {
+        "lhs": {
+          "dmir_inst": 1,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        },
+        "rhs": {
+          "dmir_inst": 1,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        },
+        "delta": {
+          "dmir_inst": 0,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        }
+      },
+      "validation": {
+        "modes": [
+          "interpreter_fuzz"
+        ],
+        "coverage": [
+          "DMirValidation.FuzzesMulPow2ToShlRewrite"
+        ]
+      }
+    },
+    {
+      "name": "xor-zero",
+      "status": "accepted",
+      "inputs": [
+        "x"
+      ],
+      "lhs": "(xor x 0:i64)",
+      "rhs": "x",
+      "cost": {
+        "lhs": {
+          "dmir_inst": 1,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        },
+        "rhs": {
+          "dmir_inst": 0,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        },
+        "delta": {
+          "dmir_inst": -1,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        }
+      },
+      "validation": {
+        "modes": [
+          "interpreter_fuzz"
+        ],
+        "coverage": [
+          "DMirValidation.FuzzesXorZeroRewrite"
+        ]
+      }
+    },
+    {
+      "name": "xor-self",
+      "status": "accepted",
+      "inputs": [
+        "x"
+      ],
+      "lhs": "(xor x x)",
+      "rhs": "0:i64",
+      "cost": {
+        "lhs": {
+          "dmir_inst": 1,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        },
+        "rhs": {
+          "dmir_inst": 0,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        },
+        "delta": {
+          "dmir_inst": -1,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        }
+      },
+      "validation": {
+        "modes": [
+          "interpreter_fuzz"
+        ],
+        "coverage": [
+          "DMirValidation.FuzzesXorSelfRewrite"
+        ]
+      }
+    },
+    {
+      "name": "xor-cancel",
+      "status": "accepted",
+      "inputs": [
+        "x",
+        "y"
+      ],
+      "lhs": "(xor (xor x y) x)",
+      "rhs": "y",
+      "cost": {
+        "lhs": {
+          "dmir_inst": 2,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        },
+        "rhs": {
+          "dmir_inst": 0,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        },
+        "delta": {
+          "dmir_inst": -2,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        }
+      },
+      "validation": {
+        "modes": [
+          "interpreter_fuzz"
+        ],
+        "coverage": [
+          "DMirValidation.FuzzesXorCancelRewrite"
+        ]
+      }
+    },
+    {
+      "name": "xor-cancel-rhs",
+      "status": "accepted",
+      "inputs": [
+        "x",
+        "y"
+      ],
+      "lhs": "(xor (xor x y) y)",
+      "rhs": "x",
+      "cost": {
+        "lhs": {
+          "dmir_inst": 2,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        },
+        "rhs": {
+          "dmir_inst": 0,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        },
+        "delta": {
+          "dmir_inst": -2,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        }
+      },
+      "validation": {
+        "modes": [
+          "interpreter_fuzz"
+        ],
+        "coverage": [
+          "DMirValidation.FuzzesXorCancelRhsRewrite"
+        ]
+      }
+    },
+    {
+      "name": "xor-not-cancel",
+      "status": "accepted",
+      "inputs": [
+        "x",
+        "y"
+      ],
+      "lhs": "(xor (not x) (xor x y))",
+      "rhs": "(not y)",
+      "cost": {
+        "lhs": {
+          "dmir_inst": 3,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        },
+        "rhs": {
+          "dmir_inst": 1,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        },
+        "delta": {
+          "dmir_inst": -2,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        }
+      },
+      "validation": {
+        "modes": [
+          "interpreter_fuzz"
+        ],
+        "coverage": [
+          "DMirValidation.FuzzesXorNotCancelRewrite"
+        ]
+      }
+    },
+    {
+      "name": "xor-not-self",
+      "status": "accepted",
+      "inputs": [
+        "x"
+      ],
+      "lhs": "(xor (not x) x)",
+      "rhs": "18446744073709551615:i64",
+      "cost": {
+        "lhs": {
+          "dmir_inst": 2,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        },
+        "rhs": {
+          "dmir_inst": 0,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        },
+        "delta": {
+          "dmir_inst": -2,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        }
+      },
+      "validation": {
+        "modes": [
+          "interpreter_fuzz"
+        ],
+        "coverage": [
+          "DMirValidation.FuzzesXorNotSelfRewrite"
+        ]
+      }
+    },
+    {
+      "name": "xor-not-not",
+      "status": "accepted",
+      "inputs": [
+        "x",
+        "y"
+      ],
+      "lhs": "(xor (not x) (not y))",
+      "rhs": "(xor x y)",
+      "cost": {
+        "lhs": {
+          "dmir_inst": 3,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        },
+        "rhs": {
+          "dmir_inst": 1,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        },
+        "delta": {
+          "dmir_inst": -2,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        }
+      },
+      "validation": {
+        "modes": [
+          "interpreter_fuzz"
+        ],
+        "coverage": [
+          "DMirValidation.FuzzesXorNotNotRewrite"
+        ]
+      }
+    },
+    {
+      "name": "xor-not-or",
+      "status": "accepted",
+      "inputs": [
+        "x",
+        "y"
+      ],
+      "lhs": "(xor (not x) (or x y))",
+      "rhs": "(or (not y) x)",
+      "cost": {
+        "lhs": {
+          "dmir_inst": 3,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        },
+        "rhs": {
+          "dmir_inst": 2,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        },
+        "delta": {
+          "dmir_inst": -1,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        }
+      },
+      "validation": {
+        "modes": [
+          "interpreter_fuzz"
+        ],
+        "coverage": [
+          "DMirValidation.FuzzesXorNotOrRewrite"
+        ]
+      }
+    },
+    {
+      "name": "xor-not-allones",
+      "status": "accepted",
+      "inputs": [
+        "x"
+      ],
+      "lhs": "(xor (not x) 18446744073709551615:i64)",
+      "rhs": "x",
+      "cost": {
+        "lhs": {
+          "dmir_inst": 2,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        },
+        "rhs": {
+          "dmir_inst": 0,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        },
+        "delta": {
+          "dmir_inst": -2,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        }
+      },
+      "validation": {
+        "modes": [
+          "interpreter_fuzz"
+        ],
+        "coverage": [
+          "DMirValidation.FuzzesXorNotAllOnesRewrite"
+        ]
+      }
+    },
+    {
+      "name": "xor-and-or",
+      "status": "accepted",
+      "inputs": [
+        "x",
+        "y"
+      ],
+      "lhs": "(xor (and x y) (or x y))",
+      "rhs": "(xor x y)",
+      "cost": {
+        "lhs": {
+          "dmir_inst": 3,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        },
+        "rhs": {
+          "dmir_inst": 1,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        },
+        "delta": {
+          "dmir_inst": -2,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        }
+      },
+      "validation": {
+        "modes": [
+          "interpreter_fuzz"
+        ],
+        "coverage": [
+          "DMirValidation.FuzzesXorAndOrRewrite"
+        ]
+      }
+    },
+    {
+      "name": "xor-and-not-lhs",
+      "status": "accepted",
+      "inputs": [
+        "x",
+        "y"
+      ],
+      "lhs": "(xor (and x y) (not x))",
+      "rhs": "(or (not x) y)",
+      "cost": {
+        "lhs": {
+          "dmir_inst": 3,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        },
+        "rhs": {
+          "dmir_inst": 2,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        },
+        "delta": {
+          "dmir_inst": -1,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        }
+      },
+      "validation": {
+        "modes": [
+          "interpreter_fuzz"
+        ],
+        "coverage": [
+          "DMirValidation.FuzzesXorAndNotLhsRewrite"
+        ]
+      }
+    },
+    {
+      "name": "xor-and-not-rhs",
+      "status": "accepted",
+      "inputs": [
+        "x",
+        "y"
+      ],
+      "lhs": "(xor (and x y) (not y))",
+      "rhs": "(or (not y) x)",
+      "cost": {
+        "lhs": {
+          "dmir_inst": 3,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        },
+        "rhs": {
+          "dmir_inst": 2,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        },
+        "delta": {
+          "dmir_inst": -1,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        }
+      },
+      "validation": {
+        "modes": [
+          "interpreter_fuzz"
+        ],
+        "coverage": [
+          "DMirValidation.FuzzesXorAndNotRhsRewrite"
+        ]
+      }
+    },
+    {
+      "name": "xor-and-xor",
+      "status": "accepted",
+      "inputs": [
+        "x",
+        "y"
+      ],
+      "lhs": "(xor (and x y) (xor x y))",
+      "rhs": "(or x y)",
+      "cost": {
+        "lhs": {
+          "dmir_inst": 3,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        },
+        "rhs": {
+          "dmir_inst": 1,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        },
+        "delta": {
+          "dmir_inst": -2,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        }
+      },
+      "validation": {
+        "modes": [
+          "interpreter_fuzz"
+        ],
+        "coverage": [
+          "DMirValidation.FuzzesXorAndXorRewrite"
+        ]
+      }
+    },
+    {
+      "name": "xor-or-xor",
+      "status": "accepted",
+      "inputs": [
+        "x",
+        "y"
+      ],
+      "lhs": "(xor (or x y) (xor x y))",
+      "rhs": "(and x y)",
+      "cost": {
+        "lhs": {
+          "dmir_inst": 3,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        },
+        "rhs": {
+          "dmir_inst": 1,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        },
+        "delta": {
+          "dmir_inst": -2,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        }
+      },
+      "validation": {
+        "modes": [
+          "interpreter_fuzz"
+        ],
+        "coverage": [
+          "DMirValidation.FuzzesXorOrXorRewrite"
+        ]
+      }
+    },
+    {
+      "name": "sub-self",
+      "status": "accepted",
+      "inputs": [
+        "x"
+      ],
+      "lhs": "(sub x x)",
+      "rhs": "0:i64",
+      "cost": {
+        "lhs": {
+          "dmir_inst": 1,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        },
+        "rhs": {
+          "dmir_inst": 0,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        },
+        "delta": {
+          "dmir_inst": -1,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        }
+      },
+      "validation": {
+        "modes": [
+          "interpreter_fuzz"
+        ],
+        "coverage": [
+          "DMirValidation.FuzzesSubSelfRewrite"
+        ]
+      }
+    },
+    {
+      "name": "shl-zero",
+      "status": "accepted",
+      "inputs": [
+        "x"
+      ],
+      "lhs": "(shl x 0:i64)",
+      "rhs": "x",
+      "cost": {
+        "lhs": {
+          "dmir_inst": 1,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        },
+        "rhs": {
+          "dmir_inst": 0,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        },
+        "delta": {
+          "dmir_inst": -1,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        }
+      },
+      "validation": {
+        "modes": [
+          "interpreter_fuzz"
+        ],
+        "coverage": [
+          "DMirValidation.FuzzesShlZeroRewrite"
+        ]
+      }
+    },
+    {
+      "name": "sshr-zero",
+      "status": "accepted",
+      "inputs": [
+        "x"
+      ],
+      "lhs": "(sshr x 0:i64)",
+      "rhs": "x",
+      "cost": {
+        "lhs": {
+          "dmir_inst": 1,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        },
+        "rhs": {
+          "dmir_inst": 0,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        },
+        "delta": {
+          "dmir_inst": -1,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        }
+      },
+      "validation": {
+        "modes": [
+          "interpreter_fuzz"
+        ],
+        "coverage": [
+          "DMirValidation.FuzzesSshrZeroRewrite"
+        ]
+      }
+    },
+    {
+      "name": "ushr-zero",
+      "status": "accepted",
+      "inputs": [
+        "x"
+      ],
+      "lhs": "(ushr x 0:i64)",
+      "rhs": "x",
+      "cost": {
+        "lhs": {
+          "dmir_inst": 1,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        },
+        "rhs": {
+          "dmir_inst": 0,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        },
+        "delta": {
+          "dmir_inst": -1,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        }
+      },
+      "validation": {
+        "modes": [
+          "interpreter_fuzz"
+        ],
+        "coverage": [
+          "DMirValidation.FuzzesUshrZeroRewrite"
+        ]
+      }
+    },
+    {
+      "name": "adc-zero-carry",
+      "status": "seed",
+      "inputs": [
+        "lhs",
+        "rhs"
+      ],
+      "lhs": "(adc lhs rhs 0:i64)",
+      "rhs": "(add lhs rhs)",
+      "cost": {
+        "lhs": {
+          "dmir_inst": 1,
+          "select_depth": 0,
+          "adc_chain": 1,
+          "runtime_calls": 0
+        },
+        "rhs": {
+          "dmir_inst": 1,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        },
+        "delta": {
+          "dmir_inst": 0,
+          "select_depth": 0,
+          "adc_chain": -1,
+          "runtime_calls": 0
+        }
+      },
+      "validation": {
+        "modes": [
+          "interpreter_fuzz"
+        ],
+        "coverage": [
+          "DMirValidation.FuzzesAdcWithoutCarryRewrite"
+        ]
+      }
+    },
+    {
+      "name": "adc-zero-operands",
+      "status": "seed",
+      "inputs": [
+        "x"
+      ],
+      "lhs": "(adc x 0:i64 0:i64)",
+      "rhs": "x",
+      "cost": {
+        "lhs": {
+          "dmir_inst": 1,
+          "select_depth": 0,
+          "adc_chain": 1,
+          "runtime_calls": 0
+        },
+        "rhs": {
+          "dmir_inst": 0,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        },
+        "delta": {
+          "dmir_inst": -1,
+          "select_depth": 0,
+          "adc_chain": -1,
+          "runtime_calls": 0
+        }
+      },
+      "validation": {
+        "modes": [
+          "interpreter_fuzz"
+        ],
+        "coverage": [
+          "DMirValidation.FuzzesAdcZeroOperandsRewrite"
+        ]
+      }
+    },
+    {
+      "name": "sbb-zero-borrow",
+      "status": "seed",
+      "inputs": [
+        "lhs",
+        "rhs"
+      ],
+      "lhs": "(sbb lhs rhs 0:i64)",
+      "rhs": "(sub lhs rhs)",
+      "cost": {
+        "lhs": {
+          "dmir_inst": 1,
+          "select_depth": 0,
+          "adc_chain": 1,
+          "runtime_calls": 0
+        },
+        "rhs": {
+          "dmir_inst": 1,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        },
+        "delta": {
+          "dmir_inst": 0,
+          "select_depth": 0,
+          "adc_chain": -1,
+          "runtime_calls": 0
+        }
+      },
+      "validation": {
+        "modes": [
+          "interpreter_fuzz"
+        ],
+        "coverage": [
+          "DMirValidation.FuzzesSbbWithoutBorrowRewrite"
+        ]
+      }
+    },
+    {
+      "name": "sbb-zero-operands",
+      "status": "seed",
+      "inputs": [
+        "x"
+      ],
+      "lhs": "(sbb x 0:i64 0:i64)",
+      "rhs": "x",
+      "cost": {
+        "lhs": {
+          "dmir_inst": 1,
+          "select_depth": 0,
+          "adc_chain": 1,
+          "runtime_calls": 0
+        },
+        "rhs": {
+          "dmir_inst": 0,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        },
+        "delta": {
+          "dmir_inst": -1,
+          "select_depth": 0,
+          "adc_chain": -1,
+          "runtime_calls": 0
+        }
+      },
+      "validation": {
+        "modes": [
+          "interpreter_fuzz"
+        ],
+        "coverage": [
+          "DMirValidation.FuzzesSbbZeroOperandsRewrite"
+        ]
+      }
+    },
+    {
+      "name": "sbb-self-zero-borrow",
+      "status": "seed",
+      "inputs": [
+        "x"
+      ],
+      "lhs": "(sbb x x 0:i64)",
+      "rhs": "0:i64",
+      "cost": {
+        "lhs": {
+          "dmir_inst": 1,
+          "select_depth": 0,
+          "adc_chain": 1,
+          "runtime_calls": 0
+        },
+        "rhs": {
+          "dmir_inst": 0,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        },
+        "delta": {
+          "dmir_inst": -1,
+          "select_depth": 0,
+          "adc_chain": -1,
+          "runtime_calls": 0
+        }
+      },
+      "validation": {
+        "modes": [
+          "interpreter_fuzz"
+        ],
+        "coverage": [
+          "DMirValidation.FuzzesSbbSelfWithoutBorrowRewrite"
+        ]
+      }
+    },
+    {
+      "name": "mul-zero-rhs",
+      "status": "accepted",
+      "inputs": [
+        "x"
+      ],
+      "lhs": "(mul x 0:i64)",
+      "rhs": "0:i64",
+      "cost": {
+        "lhs": {
+          "dmir_inst": 1,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        },
+        "rhs": {
+          "dmir_inst": 0,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        },
+        "delta": {
+          "dmir_inst": -1,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        }
+      },
+      "validation": {
+        "modes": [
+          "interpreter_fuzz"
+        ],
+        "coverage": [
+          "DMirValidation.FuzzesMulZeroRewrite"
+        ]
+      }
+    },
+    {
+      "name": "mul-one-rhs",
+      "status": "accepted",
+      "inputs": [
+        "x"
+      ],
+      "lhs": "(mul x 1:i64)",
+      "rhs": "x",
+      "cost": {
+        "lhs": {
+          "dmir_inst": 1,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        },
+        "rhs": {
+          "dmir_inst": 0,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        },
+        "delta": {
+          "dmir_inst": -1,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        }
+      },
+      "validation": {
+        "modes": [
+          "interpreter_fuzz"
+        ],
+        "coverage": [
+          "DMirValidation.FuzzesMulOneRewrite"
+        ]
+      }
+    },
+    {
+      "name": "add-self-to-shl1",
+      "status": "accepted",
+      "inputs": [
+        "x"
+      ],
+      "lhs": "(add x x)",
+      "rhs": "(shl x 1:i64)",
+      "cost": {
+        "lhs": {
+          "dmir_inst": 1,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        },
+        "rhs": {
+          "dmir_inst": 1,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        },
+        "delta": {
+          "dmir_inst": 0,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        }
+      },
+      "validation": {
+        "modes": [
+          "interpreter_fuzz"
+        ],
+        "coverage": [
+          "DMirValidation.FuzzesAddSelfToShl1Rewrite"
+        ]
+      }
+    },
+    {
+      "name": "add-neg-x-y-to-sub-y-x",
+      "status": "accepted",
+      "inputs": [
+        "x",
+        "y"
+      ],
+      "lhs": "(add (sub 0:i64 x) y)",
+      "rhs": "(sub y x)",
+      "cost": {
+        "lhs": {
+          "dmir_inst": 2,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        },
+        "rhs": {
+          "dmir_inst": 1,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        },
+        "delta": {
+          "dmir_inst": -1,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        }
+      },
+      "validation": {
+        "modes": [
+          "interpreter_fuzz"
+        ],
+        "coverage": [
+          "DMirValidation.FuzzesAddNegToSubRewrite"
+        ]
+      }
+    },
+    {
+      "name": "add-y-neg-x-to-sub-y-x",
+      "status": "accepted",
+      "inputs": [
+        "x",
+        "y"
+      ],
+      "lhs": "(add y (sub 0:i64 x))",
+      "rhs": "(sub y x)",
+      "cost": {
+        "lhs": {
+          "dmir_inst": 2,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        },
+        "rhs": {
+          "dmir_inst": 1,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        },
+        "delta": {
+          "dmir_inst": -1,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        }
+      },
+      "validation": {
+        "modes": [
+          "interpreter_fuzz"
+        ],
+        "coverage": [
+          "DMirValidation.FuzzesAddNegToSubRewrite"
+        ]
+      }
+    },
+    {
+      "name": "add-and-xor-to-or",
+      "status": "accepted",
+      "inputs": [
+        "x",
+        "y"
+      ],
+      "lhs": "(add (and x y) (xor x y))",
+      "rhs": "(or x y)",
+      "cost": {
+        "lhs": {
+          "dmir_inst": 2,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        },
+        "rhs": {
+          "dmir_inst": 1,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        },
+        "delta": {
+          "dmir_inst": -1,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        }
+      },
+      "validation": {
+        "modes": [
+          "interpreter_fuzz"
+        ],
+        "coverage": [
+          "DMirValidation.FuzzesAddAndXorToOrRewrite"
+        ]
+      }
+    },
+    {
+      "name": "add-and-or-to-add",
+      "status": "accepted",
+      "inputs": [
+        "x",
+        "y"
+      ],
+      "lhs": "(add (and x y) (or x y))",
+      "rhs": "(add x y)",
+      "cost": {
+        "lhs": {
+          "dmir_inst": 2,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        },
+        "rhs": {
+          "dmir_inst": 1,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        },
+        "delta": {
+          "dmir_inst": -1,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        }
+      },
+      "validation": {
+        "modes": [
+          "interpreter_fuzz"
+        ],
+        "coverage": [
+          "DMirValidation.FuzzesAddAndOrToAddRewrite"
+        ]
+      }
+    },
+    {
+      "name": "sub-and-or-to-neg-xor",
+      "status": "accepted",
+      "inputs": [
+        "x",
+        "y"
+      ],
+      "lhs": "(sub (and x y) (or x y))",
+      "rhs": "(sub 0:i64 (xor x y))",
+      "cost": {
+        "lhs": {
+          "dmir_inst": 2,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        },
+        "rhs": {
+          "dmir_inst": 2,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        },
+        "delta": {
+          "dmir_inst": 0,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        }
+      },
+      "validation": {
+        "modes": [
+          "interpreter_fuzz"
+        ],
+        "coverage": [
+          "DMirValidation.FuzzesSubAndOrToNegXorRewrite"
+        ]
+      }
+    },
+    {
+      "name": "sub-or-and-to-xor",
+      "status": "accepted",
+      "inputs": [
+        "x",
+        "y"
+      ],
+      "lhs": "(sub (or x y) (and x y))",
+      "rhs": "(xor x y)",
+      "cost": {
+        "lhs": {
+          "dmir_inst": 2,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        },
+        "rhs": {
+          "dmir_inst": 1,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        },
+        "delta": {
+          "dmir_inst": -1,
+          "select_depth": 0,
+          "adc_chain": 0,
+          "runtime_calls": 0
+        }
+      },
+      "validation": {
+        "modes": [
+          "interpreter_fuzz"
+        ],
+        "coverage": [
+          "DMirValidation.FuzzesSubOrAndToXorRewrite"
+        ]
+      }
+    }
+  ]
+}
diff --git a/src/compiler/mir/instruction.h b/src/compiler/mir/instruction.h
index c01198741..083a0a93a 100644
--- a/src/compiler/mir/instruction.h
+++ b/src/compiler/mir/instruction.h
@@ -34,6 +34,10 @@ class MInstruction : public NonCopyable {
     EVM_UMUL128_HI,
     EVM_U256_MUL,
     EVM_U256_MUL_RESULT,
+    EVM_U256_ADD,
+    EVM_U256_ADD_RESULT,
+    EVM_U256_SUB,
+    EVM_U256_SUB_RESULT,
     EVM_UDIV128_BY64,
     EVM_UREM128_BY64,
 
diff --git a/src/compiler/mir/instructions.cpp b/src/compiler/mir/instructions.cpp
index a8749b500..c6e37cf25 100644
--- a/src/compiler/mir/instructions.cpp
+++ b/src/compiler/mir/instructions.cpp
@@ -224,7 +224,9 @@ void MInstruction::print(llvm::raw_ostream &OS) const {
     OS << getOpcodeString(_opcode) << " (" << getOperand<0>() << ')';
     break;
   }
-  case EVM_U256_MUL: {
+  case EVM_U256_MUL:
+  case EVM_U256_ADD:
+  case EVM_U256_SUB: {
     OS << getOpcodeString(_opcode) << " (";
     for (OperandNum I = 0; I < getNumOperands(); ++I) {
       OS << getOperand(I);
@@ -241,6 +243,18 @@ void MInstruction::print(llvm::raw_ostream &OS) const {
        << ", idx = " << MulResult->getResultIdx() << ')';
     break;
   }
+  case EVM_U256_ADD_RESULT: {
+    auto *AddResult = llvm::cast<EvmU256AddResultInstruction>(this);
+    OS << getOpcodeString(_opcode) << " (" << AddResult->getAddInst()
+       << ", idx = " << AddResult->getResultIdx() << ')';
+    break;
+  }
+  case EVM_U256_SUB_RESULT: {
+    auto *SubResult = llvm::cast<EvmU256SubResultInstruction>(this);
+    OS << getOpcodeString(_opcode) << " (" << SubResult->getSubInst()
+       << ", idx = " << SubResult->getResultIdx() << ')';
+    break;
+  }
   case EVM_UDIV128_BY64: {
     OS << getOpcodeString(_opcode) << " (" << getOperand<0>() << ", "
        << getOperand<1>() << ", " << getOperand<2>() << ')';
diff --git a/src/compiler/mir/instructions.h b/src/compiler/mir/instructions.h
index 3891d2a91..2d9d1cb53 100644
--- a/src/compiler/mir/instructions.h
+++ b/src/compiler/mir/instructions.h
@@ -49,6 +49,10 @@ class BinaryInstruction : public FixedOperandInstruction<2> {
         std::forward<Arguments>(args)...);
   }
 
+  static bool classof(const MInstruction *Inst) {
+    return Inst->getKind() == MInstruction::BINARY;
+  }
+
 protected:
   // Used for subclass
   BinaryInstruction(Kind kind, Opcode opcode, MType *type, MInstruction *lhs,
@@ -860,6 +864,118 @@ class EvmU256MulResultInstruction : public UnaryInstruction {
   uint32_t ResultIdx = 0;
 };
 
+class EvmU256AddInstruction : public FixedOperandInstruction<8> {
+public:
+  template <typename... Arguments>
+  static EvmU256AddInstruction *create(Arguments &&...Args) {
+    return FixedOperandInstruction::create<EvmU256AddInstruction>(
+        std::forward<Arguments>(Args)...);
+  }
+
+  static bool classof(const MInstruction *Instr) {
+    return Instr->getKind() == EVM_U256_ADD;
+  }
+
+private:
+  friend class FixedOperandInstruction;
+  EvmU256AddInstruction(MType *Type, MInstruction *A0, MInstruction *A1,
+                        MInstruction *A2, MInstruction *A3, MInstruction *B0,
+                        MInstruction *B1, MInstruction *B2, MInstruction *B3)
+      : FixedOperandInstruction(MInstruction::EVM_U256_ADD, OP_evm_u256_add, 8,
+                                Type) {
+    setOperand<0>(A0);
+    setOperand<1>(A1);
+    setOperand<2>(A2);
+    setOperand<3>(A3);
+    setOperand<4>(B0);
+    setOperand<5>(B1);
+    setOperand<6>(B2);
+    setOperand<7>(B3);
+  }
+};
+
+class EvmU256AddResultInstruction : public UnaryInstruction {
+public:
+  template <typename... Arguments>
+  static EvmU256AddResultInstruction *create(Arguments &&...Args) {
+    return FixedOperandInstruction::create<EvmU256AddResultInstruction>(
+        std::forward<Arguments>(Args)...);
+  }
+
+  static bool classof(const MInstruction *Instr) {
+    return Instr->getKind() == EVM_U256_ADD_RESULT;
+  }
+
+  const MInstruction *getAddInst() const { return getOperand<0>(); }
+  uint32_t getResultIdx() const { return ResultIdx; }
+
+private:
+  friend class FixedOperandInstruction;
+  EvmU256AddResultInstruction(MType *Type, MInstruction *AddInst,
+                              uint32_t ResultIdx)
+      : UnaryInstruction(MInstruction::EVM_U256_ADD_RESULT,
+                         OP_evm_u256_add_result, Type, AddInst),
+        ResultIdx(ResultIdx) {}
+
+  uint32_t ResultIdx = 0;
+};
+
+class EvmU256SubInstruction : public FixedOperandInstruction<8> {
+public:
+  template <typename... Arguments>
+  static EvmU256SubInstruction *create(Arguments &&...Args) {
+    return FixedOperandInstruction::create<EvmU256SubInstruction>(
+        std::forward<Arguments>(Args)...);
+  }
+
+  static bool classof(const MInstruction *Instr) {
+    return Instr->getKind() == EVM_U256_SUB;
+  }
+
+private:
+  friend class FixedOperandInstruction;
+  EvmU256SubInstruction(MType *Type, MInstruction *A0, MInstruction *A1,
+                        MInstruction *A2, MInstruction *A3, MInstruction *B0,
+                        MInstruction *B1, MInstruction *B2, MInstruction *B3)
+      : FixedOperandInstruction(MInstruction::EVM_U256_SUB, OP_evm_u256_sub, 8,
+                                Type) {
+    setOperand<0>(A0);
+    setOperand<1>(A1);
+    setOperand<2>(A2);
+    setOperand<3>(A3);
+    setOperand<4>(B0);
+    setOperand<5>(B1);
+    setOperand<6>(B2);
+    setOperand<7>(B3);
+  }
+};
+
+class EvmU256SubResultInstruction : public UnaryInstruction {
+public:
+  template <typename... Arguments>
+  static EvmU256SubResultInstruction *create(Arguments &&...Args) {
+    return FixedOperandInstruction::create<EvmU256SubResultInstruction>(
+        std::forward<Arguments>(Args)...);
+  }
+
+  static bool classof(const MInstruction *Instr) {
+    return Instr->getKind() == EVM_U256_SUB_RESULT;
+  }
+
+  const MInstruction *getSubInst() const { return getOperand<0>(); }
+  uint32_t getResultIdx() const { return ResultIdx; }
+
+private:
+  friend class FixedOperandInstruction;
+  EvmU256SubResultInstruction(MType *Type, MInstruction *SubInst,
+                              uint32_t ResultIdx)
+      : UnaryInstruction(MInstruction::EVM_U256_SUB_RESULT,
+                         OP_evm_u256_sub_result, Type, SubInst),
+        ResultIdx(ResultIdx) {}
+
+  uint32_t ResultIdx = 0;
+};
+
 // EVM 128-bit / 64-bit unsigned division: (hi:lo) / divisor -> quotient.
 class EvmUdiv128By64Instruction : public FixedOperandInstruction<3> {
 public:
diff --git a/src/compiler/mir/opcodes.def b/src/compiler/mir/opcodes.def
index 52851f89c..9057270d7 100644
--- a/src/compiler/mir/opcodes.def
+++ b/src/compiler/mir/opcodes.def
@@ -70,6 +70,10 @@ OPCODE(evm_umul128_lo)              // 64x64->64 multiplication (low bits)
 OPCODE(evm_umul128_hi)              // extract high 64 bits from evm_umul128_lo
 OPCODE(evm_u256_mul)                // 256x256->256 multiplication pseudo op
 OPCODE(evm_u256_mul_result)         // extract extra limb from evm_u256_mul
+OPCODE(evm_u256_add)                // 256+256->256 addition pseudo op
+OPCODE(evm_u256_add_result)         // extract extra limb from evm_u256_add
+OPCODE(evm_u256_sub)                // 256-256->256 subtraction pseudo op
+OPCODE(evm_u256_sub_result)         // extract extra limb from evm_u256_sub
 OPCODE(evm_udiv128_by64)            // unsigned 128-bit (hi:lo) divided by 64-bit divisor -> 64-bit quotient
 OPCODE(evm_urem128_by64)            // unsigned remainder from the same 128/64 division as evm_udiv128_by64
                                     // OP_OTHER_EXPR_END
diff --git a/src/compiler/mir/pass/dmir_rewrite.h b/src/compiler/mir/pass/dmir_rewrite.h
new file mode 100644
index 000000000..660592956
--- /dev/null
+++ b/src/compiler/mir/pass/dmir_rewrite.h
@@ -0,0 +1,1017 @@
+// Copyright (C) 2025 the DTVM authors. All Rights Reserved.
+// SPDX-License-Identifier: Apache-2.0
+#pragma once
+
+#include "compiler/mir/constants.h"
+#include "compiler/mir/function.h"
+#include "compiler/mir/instructions.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/Support/Casting.h"
+
+namespace COMPILER {
+
+class DMirRewritePass {
+public:
+  bool runOnMFunction(MFunction &F) {
+    Func = &F;
+    Changed = false;
+
+    for (MBasicBlock *BB : F) {
+      runOnBasicBlock(*BB);
+    }
+
+#ifdef ZEN_ENABLE_MULTIPASS_JIT_LOGGING
+    if (Changed) {
+      llvm::dbgs() << "\n########## MIR Dump After dMIR Rewrite ##########\n\n";
+      F.dump();
+    }
+#endif
+    return Changed;
+  }
+
+private:
+  void runOnBasicBlock(MBasicBlock &BB) {
+    RewriteCache.clear();
+    for (MInstruction *Inst : BB) {
+      rewriteOperands(*Inst, BB);
+    }
+  }
+
+  void rewriteOperands(MInstruction &Inst, MBasicBlock &BB) {
+    for (uint32_t OperandIdx = 0; OperandIdx < Inst.getNumOperands();
+         ++OperandIdx) {
+      MInstruction *Operand = Inst.getOperand(OperandIdx);
+      MInstruction *Rewritten = rewriteExprTree(Operand, BB);
+      if (Rewritten != Operand) {
+        Inst.setOperand(OperandIdx, Rewritten);
+        Changed = true;
+      }
+    }
+  }
+
+  MInstruction *rewriteExprTree(MInstruction *Inst, MBasicBlock &BB,
+                                uint32_t Depth = 0) {
+    if (Depth > 16) {
+      return Inst;
+    }
+
+    auto CacheIt = RewriteCache.find(Inst);
+    if (CacheIt != RewriteCache.end()) {
+      return CacheIt->second;
+    }
+
+    for (uint32_t OperandIdx = 0; OperandIdx < Inst->getNumOperands();
+         ++OperandIdx) {
+      MInstruction *Operand = Inst->getOperand(OperandIdx);
+      MInstruction *Rewritten = rewriteExprTree(Operand, BB, Depth + 1);
+      if (Rewritten != Operand) {
+        Inst->setOperand(OperandIdx, Rewritten);
+        Changed = true;
+      }
+    }
+
+    MInstruction *Result = Inst;
+    if (MInstruction *Replacement = tryRewrite(*Inst, BB)) {
+      if (Replacement != Inst) {
+        Changed = true;
+        Result = rewriteExprTree(Replacement, BB, Depth + 1);
+      } else {
+        Result = Replacement;
+      }
+    }
+    RewriteCache[Inst] = Result;
+    return Result;
+  }
+
+  MInstruction *tryRewrite(MInstruction &Inst, MBasicBlock &BB) {
+    switch (Inst.getOpcode()) {
+    case OP_add:
+      return rewriteAdd(llvm::cast<BinaryInstruction>(Inst), BB);
+    case OP_sub:
+      return rewriteSub(llvm::cast<BinaryInstruction>(Inst), BB);
+    case OP_and:
+      return rewriteAnd(llvm::cast<BinaryInstruction>(Inst), BB);
+    case OP_or:
+      return rewriteOr(llvm::cast<BinaryInstruction>(Inst), BB);
+    case OP_xor:
+      return rewriteXor(llvm::cast<BinaryInstruction>(Inst), BB);
+    case OP_mul:
+      return rewriteMul(llvm::cast<BinaryInstruction>(Inst), BB);
+    case OP_shl:
+    case OP_sshr:
+    case OP_ushr:
+      return rewriteShift(llvm::cast<BinaryInstruction>(Inst));
+    case OP_not:
+      return rewriteNot(llvm::cast<NotInstruction>(Inst));
+    case OP_select:
+      return rewriteSelect(llvm::cast<SelectInstruction>(Inst));
+    case OP_adc:
+      return rewriteAdc(llvm::cast<AdcInstruction>(Inst), BB);
+    case OP_sbb:
+      return rewriteSbb(llvm::cast<SbbInstruction>(Inst), BB);
+    default:
+      return nullptr;
+    }
+  }
+
+  MInstruction *rewriteAdd(BinaryInstruction &Inst, MBasicBlock &BB) {
+    MInstruction *LHS = Inst.getOperand<0>();
+    MInstruction *RHS = Inst.getOperand<1>();
+    // Fold add(x, 0) -> x only when x is itself a constant (pure constant
+    // folding). For non-constant x, keeping the add node preserves a natural
+    // register-copy point that benefits downstream register allocation; the
+    // i64 ADD-with-immediate lowering path is more efficient with the node
+    // present than extending the live range of x across all uses.
+    if (isZeroConst(*RHS) && isIntegerConst(*LHS)) {
+      return LHS;
+    }
+    if (isZeroConst(*LHS) && isIntegerConst(*RHS)) {
+      return RHS;
+    }
+    // (add x x) -> (shl x 1): doubling is a left shift by one
+    if (structurallyEqual(*LHS, *RHS)) {
+      return createBinaryInstruction(OP_shl, *Inst.getType(), LHS,
+                                     createOneConstant(*Inst.getType(), BB),
+                                     BB);
+    }
+    // (add (sub 0 x) y) -> (sub y x): negation folding
+    if (isNeg(*LHS)) {
+      return createBinaryInstruction(OP_sub, *Inst.getType(), RHS,
+                                     getNegOperand(*LHS), BB);
+    }
+    if (isNeg(*RHS)) {
+      return createBinaryInstruction(OP_sub, *Inst.getType(), LHS,
+                                     getNegOperand(*RHS), BB);
+    }
+    // (add (and x y) (xor x y)) -> (or x y)
+    if (const auto *AndInst =
+            matchBinaryOperandPair(*LHS, *RHS, OP_and, OP_xor)) {
+      return createBinaryInstruction(OP_or, *Inst.getType(),
+                                     AndInst->getOperand<0>(),
+                                     AndInst->getOperand<1>(), BB);
+    }
+    // (add (and x y) (or x y)) -> (add x y)
+    if (const auto *AndInst =
+            matchBinaryOperandPair(*LHS, *RHS, OP_and, OP_or)) {
+      return createBinaryInstruction(OP_add, *Inst.getType(),
+                                     AndInst->getOperand<0>(),
+                                     AndInst->getOperand<1>(), BB);
+    }
+    return nullptr;
+  }
+
+  MInstruction *rewriteSub(BinaryInstruction &Inst, MBasicBlock &BB) {
+    MInstruction *LHS = Inst.getOperand<0>();
+    MInstruction *RHS = Inst.getOperand<1>();
+    if (isZeroConst(*RHS)) {
+      return LHS;
+    }
+    if (structurallyEqual(*LHS, *RHS)) {
+      return createZeroConstant(*Inst.getType(), BB);
+    }
+    // (sub (and x y) (or x y)) -> (sub 0 (xor x y))
+    if (const auto *AndInst =
+            matchBinaryOperandPair(*LHS, *RHS, OP_and, OP_or)) {
+      MInstruction *XorInst = createBinaryInstruction(
+          OP_xor, *Inst.getType(), AndInst->getOperand<0>(),
+          AndInst->getOperand<1>(), BB);
+      return createBinaryInstruction(OP_sub, *Inst.getType(),
+                                     createZeroConstant(*Inst.getType(), BB),
+                                     XorInst, BB);
+    }
+    // (sub (or x y) (and x y)) -> (xor x y)
+    if (const auto *OrInst =
+            matchBinaryOperandPair(*LHS, *RHS, OP_or, OP_and)) {
+      return createBinaryInstruction(OP_xor, *Inst.getType(),
+                                     OrInst->getOperand<0>(),
+                                     OrInst->getOperand<1>(), BB);
+    }
+    return nullptr;
+  }
+
+  MInstruction *rewriteAnd(BinaryInstruction &Inst, MBasicBlock &BB) {
+    MInstruction *LHS = Inst.getOperand<0>();
+    MInstruction *RHS = Inst.getOperand<1>();
+    if (isZeroConst(*LHS) || isZeroConst(*RHS)) {
+      return createZeroConstant(*Inst.getType(), BB);
+    }
+    if (isAllOnesConst(*LHS)) {
+      return RHS;
+    }
+    if (isAllOnesConst(*RHS)) {
+      return LHS;
+    }
+    if (structurallyEqual(*LHS, *RHS)) {
+      return LHS;
+    }
+    if (isNotOf(*LHS, *RHS) || isNotOf(*RHS, *LHS)) {
+      return createZeroConstant(*Inst.getType(), BB);
+    }
+    if (MInstruction *Replacement = rewriteAndWithNestedAnd(*LHS, *RHS, BB)) {
+      return Replacement;
+    }
+    if (MInstruction *Replacement = rewriteAndWithNestedAnd(*RHS, *LHS, BB)) {
+      return Replacement;
+    }
+    if (MInstruction *Replacement = rewriteAndWithNestedOr(*LHS, *RHS)) {
+      return Replacement;
+    }
+    if (MInstruction *Replacement = rewriteAndWithNestedOr(*RHS, *LHS)) {
+      return Replacement;
+    }
+    if (MInstruction *Replacement = rewriteAndWithNestedNot(*LHS, *RHS, BB)) {
+      return Replacement;
+    }
+    if (MInstruction *Replacement = rewriteAndWithNestedNot(*RHS, *LHS, BB)) {
+      return Replacement;
+    }
+    return nullptr;
+  }
+
+  MInstruction *rewriteOr(BinaryInstruction &Inst, MBasicBlock &BB) {
+    MInstruction *LHS = Inst.getOperand<0>();
+    MInstruction *RHS = Inst.getOperand<1>();
+    if (isZeroConst(*LHS)) {
+      return RHS;
+    }
+    if (isZeroConst(*RHS)) {
+      return LHS;
+    }
+    if (isAllOnesConst(*LHS) || isAllOnesConst(*RHS) || isNotOf(*LHS, *RHS) ||
+        isNotOf(*RHS, *LHS)) {
+      return createAllOnesConstant(*Inst.getType(), BB);
+    }
+    if (structurallyEqual(*LHS, *RHS)) {
+      return LHS;
+    }
+    if (MInstruction *Replacement = rewriteOrWithNestedAnd(*LHS, *RHS, BB)) {
+      return Replacement;
+    }
+    if (MInstruction *Replacement = rewriteOrWithNestedAnd(*RHS, *LHS, BB)) {
+      return Replacement;
+    }
+    if (MInstruction *Replacement = rewriteOrWithNestedOr(*LHS, *RHS)) {
+      return Replacement;
+    }
+    if (MInstruction *Replacement = rewriteOrWithNestedOr(*RHS, *LHS)) {
+      return Replacement;
+    }
+    if (MInstruction *Replacement = rewriteOrWithNestedXor(*LHS, *RHS, BB)) {
+      return Replacement;
+    }
+    if (MInstruction *Replacement = rewriteOrWithNestedXor(*RHS, *LHS, BB)) {
+      return Replacement;
+    }
+    if (MInstruction *Replacement = rewriteOrWithNestedNot(*LHS, *RHS, BB)) {
+      return Replacement;
+    }
+    if (MInstruction *Replacement = rewriteOrWithNestedNot(*RHS, *LHS, BB)) {
+      return Replacement;
+    }
+    return nullptr;
+  }
+
+  MInstruction *rewriteXor(BinaryInstruction &Inst, MBasicBlock &BB) {
+    MInstruction *LHS = Inst.getOperand<0>();
+    MInstruction *RHS = Inst.getOperand<1>();
+    if (isZeroConst(*LHS)) {
+      return RHS;
+    }
+    if (isZeroConst(*RHS)) {
+      return LHS;
+    }
+    if (structurallyEqual(*LHS, *RHS)) {
+      return createZeroConstant(*Inst.getType(), BB);
+    }
+    if (isNotOf(*LHS, *RHS) || isNotOf(*RHS, *LHS)) {
+      return createAllOnesConstant(*Inst.getType(), BB);
+    }
+    if (MInstruction *Replacement = rewriteXorWithNestedXor(*LHS, *RHS)) {
+      return Replacement;
+    }
+    if (MInstruction *Replacement = rewriteXorWithNestedXor(*RHS, *LHS)) {
+      return Replacement;
+    }
+    if (MInstruction *Replacement =
+            rewriteXorWithNestedNotAndAllOnes(*LHS, *RHS)) {
+      return Replacement;
+    }
+    if (MInstruction *Replacement =
+            rewriteXorWithNestedNotAndAllOnes(*RHS, *LHS)) {
+      return Replacement;
+    }
+    if (MInstruction *Replacement = rewriteXorWithNestedNot(*LHS, *RHS, BB)) {
+      return Replacement;
+    }
+    if (MInstruction *Replacement = rewriteXorWithNestedNot(*RHS, *LHS, BB)) {
+      return Replacement;
+    }
+    if (MInstruction *Replacement = rewriteXorWithTwoNots(*LHS, *RHS, BB)) {
+      return Replacement;
+    }
+    if (MInstruction *Replacement = rewriteXorWithNestedAnd(*LHS, *RHS, BB)) {
+      return Replacement;
+    }
+    if (MInstruction *Replacement = rewriteXorWithNestedAnd(*RHS, *LHS, BB)) {
+      return Replacement;
+    }
+    if (MInstruction *Replacement = rewriteXorWithNestedOr(*LHS, *RHS, BB)) {
+      return Replacement;
+    }
+    if (MInstruction *Replacement = rewriteXorWithNestedOr(*RHS, *LHS, BB)) {
+      return Replacement;
+    }
+    return nullptr;
+  }
+
+  MInstruction *rewriteMul(BinaryInstruction &Inst, MBasicBlock &BB) {
+    MInstruction *LHS = Inst.getOperand<0>();
+    MInstruction *RHS = Inst.getOperand<1>();
+    if (isZeroConst(*LHS) || isZeroConst(*RHS)) {
+      return createZeroConstant(*Inst.getType(), BB);
+    }
+    if (isOneConst(*LHS)) {
+      return RHS;
+    }
+    if (isOneConst(*RHS)) {
+      return LHS;
+    }
+    // mul(x, 2^k) -> shl(x, k) for i64 types when k >= 1
+    if (Inst.getType()->isInteger() && Inst.getType()->getBitWidth() == 64 &&
+        isIntegerConst(*RHS)) {
+      uint64_t C = llvm::cast<MConstantInt>(
+                       &llvm::cast<ConstantInstruction>(RHS)->getConstant())
+                       ->getValue()
+                       .getZExtValue();
+      if (C > 1 && (C & (C - 1)) == 0) {
+        uint64_t K = static_cast<uint64_t>(__builtin_ctzll(C));
+        return createBinaryInstruction(
+            OP_shl, *Inst.getType(), LHS,
+            createIntegerConstant(*Inst.getType(), llvm::APInt(64, K), BB), BB);
+      }
+    }
+    return nullptr;
+  }
+
+  /// Carry-dead analysis: returns true when the carry/borrow output of the
+  /// instruction that feeds this ADC/SBB is provably zero.
+  ///
+  /// Handles:
+  ///   1. const(0): zero constant has no carry (chain-head sentinel)
+  ///   2. add(x, 0) / add(0, x): adding zero never overflows, carry = 0
+  ///   3. adc(x, 0, prev) / adc(0, y, prev) where isCarryDead(prev):
+  ///      x + 0 + 0 never overflows
+  ///   4. sub(x, 0): subtracting zero never borrows
+  ///   5. sbb(x, 0, prev) where isCarryDead(prev): x - 0 - 0 never borrows
+  ///   6. zext(icmp_ult(x, 0)): comparison with zero always false, zext
+  ///      produces 0
+  bool isCarryDead(const MInstruction &CarryProducer,
+                   uint32_t Depth = 0) const {
+    if (Depth > 8) {
+      return false; // Conservative: assume carry is live
+    }
+    // A const(0) carry operand means "no incoming carry" (chain head).
+    if (isZeroConst(CarryProducer)) {
+      return true;
+    }
+    // add(x, 0) or add(0, x): adding zero never produces a carry.
+    if (CarryProducer.getOpcode() == OP_add &&
+        CarryProducer.getKind() == MInstruction::BINARY) {
+      const auto &Add = llvm::cast<BinaryInstruction>(CarryProducer);
+      if (isZeroConst(*Add.getOperand<0>()) ||
+          isZeroConst(*Add.getOperand<1>())) {
+        return true;
+      }
+    }
+    // adc(x, 0, prev) where prev's carry is also dead: recursive chain.
+    if (CarryProducer.getOpcode() == OP_adc) {
+      const auto &Adc = llvm::cast<AdcInstruction>(CarryProducer);
+      if ((isZeroConst(*Adc.getOperand<0>()) ||
+           isZeroConst(*Adc.getOperand<1>())) &&
+          isCarryDead(*Adc.getOperand<2>(), Depth + 1)) {
+        return true;
+      }
+    }
+    // sub(x, 0): subtracting zero never borrows.
+    if (CarryProducer.getOpcode() == OP_sub &&
+        CarryProducer.getKind() == MInstruction::BINARY) {
+      const auto &Sub = llvm::cast<BinaryInstruction>(CarryProducer);
+      if (isZeroConst(*Sub.getOperand<1>())) {
+        return true;
+      }
+    }
+    // sbb(x, 0, prev) where prev's borrow is dead: recursive chain.
+    if (CarryProducer.getOpcode() == OP_sbb) {
+      const auto &Sbb = llvm::cast<SbbInstruction>(CarryProducer);
+      if (isZeroConst(*Sbb.getOperand<1>()) &&
+          isCarryDead(*Sbb.getOperand<2>(), Depth + 1)) {
+        return true;
+      }
+    }
+    // zext(icmp(ULT, x, 0)): no unsigned value is less than 0, always false.
+    if (CarryProducer.getOpcode() == OP_uext &&
+        CarryProducer.getKind() == MInstruction::UNARY) {
+      const MInstruction *Inner = CarryProducer.getOperand<0>();
+      if (Inner->getOpcode() == OP_cmp &&
+          llvm::cast<CmpInstruction>(Inner)->getPredicate() ==
+              CmpInstruction::ICMP_ULT) {
+        if (isZeroConst(*Inner->getOperand<1>())) {
+          return true;
+        }
+      }
+    }
+    return false;
+  }
+
+  MInstruction *rewriteAdc(AdcInstruction &Inst, MBasicBlock &BB) {
+    MInstruction *LHS = Inst.getOperand<0>();
+    MInstruction *RHS = Inst.getOperand<1>();
+    MInstruction *CarryIn = Inst.getOperand<2>();
+    if (!isCarryDead(*CarryIn)) {
+      return nullptr;
+    }
+    // Carry is provably zero: adc(x, y, dead) → add(x, y)
+    if (isZeroConst(*RHS)) {
+      return LHS; // adc(x, 0, dead) → x
+    }
+    if (isZeroConst(*LHS)) {
+      return RHS; // adc(0, y, dead) → y
+    }
+    return createBinaryInstruction(OP_add, *Inst.getType(), LHS, RHS, BB);
+  }
+
+  MInstruction *rewriteSbb(SbbInstruction &Inst, MBasicBlock &BB) {
+    MInstruction *LHS = Inst.getOperand<0>();
+    MInstruction *RHS = Inst.getOperand<1>();
+    MInstruction *BorrowIn = Inst.getOperand<2>();
+    if (!isCarryDead(*BorrowIn)) {
+      return nullptr;
+    }
+    // Borrow is provably zero: sbb(x, y, dead) → sub(x, y)
+    if (isZeroConst(*RHS)) {
+      return LHS; // sbb(x, 0, dead) → x
+    }
+    if (structurallyEqual(*LHS, *RHS)) {
+      return createZeroConstant(*Inst.getType(), BB); // sbb(x, x, dead) → 0
+    }
+    return createBinaryInstruction(OP_sub, *Inst.getType(), LHS, RHS, BB);
+  }
+
+  MInstruction *rewriteShift(BinaryInstruction &Inst) const {
+    if (isZeroConst(*Inst.getOperand<1>())) {
+      return Inst.getOperand<0>();
+    }
+    return nullptr;
+  }
+
+  MInstruction *rewriteNot(NotInstruction &Inst) const {
+    MInstruction *Operand = Inst.getOperand<0>();
+    if (Operand->getOpcode() == OP_not) {
+      return Operand->getOperand<0>();
+    }
+    return nullptr;
+  }
+
+  MInstruction *rewriteSelect(SelectInstruction &Inst) const {
+    MInstruction *Cond = Inst.getOperand<0>();
+    MInstruction *TrueValue = Inst.getOperand<1>();
+    MInstruction *FalseValue = Inst.getOperand<2>();
+    // select(0, t, f) -> f: condition is always false
+    if (isZeroConst(*Cond)) {
+      return FalseValue;
+    }
+    // select(nonzero, t, f) -> t: condition is always true
+    if (isNonZeroIntConst(*Cond)) {
+      return TrueValue;
+    }
+    if (structurallyEqual(*TrueValue, *FalseValue)) {
+      return TrueValue;
+    }
+    return nullptr;
+  }
+
+  MInstruction *rewriteAndWithNestedAnd(MInstruction &NestedCandidate,
+                                        MInstruction &Other, MBasicBlock &BB) {
+    const BinaryInstruction *NestedAnd =
+        getBinaryWithOpcode(NestedCandidate, OP_and);
+    if (NestedAnd == nullptr) {
+      return nullptr;
+    }
+
+    if (structurallyContains(*NestedAnd, Other)) {
+      return const_cast<BinaryInstruction *>(NestedAnd);
+    }
+    if (isNotOf(Other, *NestedAnd->getOperand<0>()) ||
+        isNotOf(Other, *NestedAnd->getOperand<1>())) {
+      return createZeroConstant(*NestedAnd->getType(), BB);
+    }
+
+    const BinaryInstruction *OtherOr = getBinaryWithOpcode(Other, OP_or);
+    if (OtherOr != nullptr && hasSameUnorderedOperands(*NestedAnd, *OtherOr)) {
+      return const_cast<BinaryInstruction *>(NestedAnd);
+    }
+
+    const BinaryInstruction *OtherXor = getBinaryWithOpcode(Other, OP_xor);
+    if (OtherXor != nullptr &&
+        hasSameUnorderedOperands(*NestedAnd, *OtherXor)) {
+      return createZeroConstant(*NestedAnd->getType(), BB);
+    }
+
+    return nullptr;
+  }
+
+  MInstruction *rewriteAndWithNestedOr(MInstruction &NestedCandidate,
+                                       MInstruction &Other) const {
+    const BinaryInstruction *NestedOr =
+        getBinaryWithOpcode(NestedCandidate, OP_or);
+    if (NestedOr == nullptr) {
+      return nullptr;
+    }
+
+    if (structurallyContains(*NestedOr, Other)) {
+      return &Other;
+    }
+
+    const BinaryInstruction *OtherXor = getBinaryWithOpcode(Other, OP_xor);
+    if (OtherXor != nullptr && hasSameUnorderedOperands(*NestedOr, *OtherXor)) {
+      return const_cast<BinaryInstruction *>(OtherXor);
+    }
+
+    return nullptr;
+  }
+
+  MInstruction *rewriteAndWithNestedNot(MInstruction &NestedCandidate,
+                                        MInstruction &Other, MBasicBlock &BB) {
+    if (NestedCandidate.getOpcode() != OP_not) {
+      return nullptr;
+    }
+
+    const BinaryInstruction *OtherOr = getBinaryWithOpcode(Other, OP_or);
+    if (OtherOr != nullptr) {
+      if (MInstruction *OtherValue = getOtherBinaryOperand(
+              *OtherOr, *NestedCandidate.getOperand<0>())) {
+        return createBinaryInstruction(OP_and, *OtherOr->getType(),
+                                       &NestedCandidate, OtherValue, BB);
+      }
+    }
+
+    const BinaryInstruction *OtherXor = getBinaryWithOpcode(Other, OP_xor);
+    if (OtherXor != nullptr) {
+      if (MInstruction *OtherValue = getOtherBinaryOperand(
+              *OtherXor, *NestedCandidate.getOperand<0>())) {
+        return createBinaryInstruction(OP_and, *OtherXor->getType(),
+                                       &NestedCandidate, OtherValue, BB);
+      }
+    }
+
+    return nullptr;
+  }
+
+  MInstruction *rewriteOrWithNestedAnd(MInstruction &NestedCandidate,
+                                       MInstruction &Other, MBasicBlock &BB) {
+    const BinaryInstruction *NestedAnd =
+        getBinaryWithOpcode(NestedCandidate, OP_and);
+    if (NestedAnd == nullptr) {
+      return nullptr;
+    }
+
+    if (structurallyContains(*NestedAnd, Other)) {
+      return &Other;
+    }
+
+    const BinaryInstruction *OtherOr = getBinaryWithOpcode(Other, OP_or);
+    if (OtherOr != nullptr && hasSameUnorderedOperands(*NestedAnd, *OtherOr)) {
+      return const_cast<BinaryInstruction *>(OtherOr);
+    }
+
+    const BinaryInstruction *OtherXor = getBinaryWithOpcode(Other, OP_xor);
+    if (OtherXor != nullptr &&
+        hasSameUnorderedOperands(*NestedAnd, *OtherXor)) {
+      return createBinaryInstruction(OP_or, *NestedAnd->getType(),
+                                     NestedAnd->getOperand<0>(),
+                                     NestedAnd->getOperand<1>(), BB);
+    }
+
+    if (Other.getOpcode() == OP_not) {
+      if (MInstruction *OtherValue =
+              getOtherBinaryOperand(*NestedAnd, *Other.getOperand<0>())) {
+        return createBinaryInstruction(OP_or, *NestedAnd->getType(), &Other,
+                                       OtherValue, BB);
+      }
+    }
+
+    return nullptr;
+  }
+
+  MInstruction *rewriteOrWithNestedOr(MInstruction &NestedCandidate,
+                                      MInstruction &Other) const {
+    const BinaryInstruction *NestedOr =
+        getBinaryWithOpcode(NestedCandidate, OP_or);
+    if (NestedOr == nullptr) {
+      return nullptr;
+    }
+
+    if (structurallyContains(*NestedOr, Other)) {
+      return const_cast<BinaryInstruction *>(NestedOr);
+    }
+
+    const BinaryInstruction *OtherXor = getBinaryWithOpcode(Other, OP_xor);
+    if (OtherXor != nullptr && hasSameUnorderedOperands(*NestedOr, *OtherXor)) {
+      return const_cast<BinaryInstruction *>(NestedOr);
+    }
+
+    return nullptr;
+  }
+
+  MInstruction *rewriteOrWithNestedXor(MInstruction &NestedCandidate,
+                                       MInstruction &Other, MBasicBlock &BB) {
+    const BinaryInstruction *NestedXor =
+        getBinaryWithOpcode(NestedCandidate, OP_xor);
+    if (NestedXor == nullptr) {
+      return nullptr;
+    }
+
+    if (structurallyContains(*NestedXor, Other)) {
+      return createBinaryInstruction(OP_or, *NestedXor->getType(),
+                                     NestedXor->getOperand<0>(),
+                                     NestedXor->getOperand<1>(), BB);
+    }
+
+    return nullptr;
+  }
+
+  MInstruction *rewriteOrWithNestedNot(MInstruction &NestedCandidate,
+                                       MInstruction &Other, MBasicBlock &BB) {
+    if (NestedCandidate.getOpcode() != OP_not) {
+      return nullptr;
+    }
+
+    const BinaryInstruction *OtherOr = getBinaryWithOpcode(Other, OP_or);
+    if (OtherOr == nullptr) {
+      return nullptr;
+    }
+
+    if (structurallyContains(*OtherOr, *NestedCandidate.getOperand<0>())) {
+      return createAllOnesConstant(*OtherOr->getType(), BB);
+    }
+    return nullptr;
+  }
+
+  MInstruction *rewriteXorWithNestedXor(MInstruction &NestedCandidate,
+                                        MInstruction &Other) const {
+    const BinaryInstruction *NestedXor =
+        getBinaryWithOpcode(NestedCandidate, OP_xor);
+    if (NestedXor == nullptr) {
+      return nullptr;
+    }
+
+    if (structurallyEqual(*NestedXor->getOperand<0>(), Other)) {
+      return const_cast<MInstruction *>(NestedXor->getOperand<1>());
+    }
+    if (structurallyEqual(*NestedXor->getOperand<1>(), Other)) {
+      return const_cast<MInstruction *>(NestedXor->getOperand<0>());
+    }
+    return nullptr;
+  }
+
+  MInstruction *rewriteXorWithNestedNotAndAllOnes(MInstruction &NestedCandidate,
+                                                  MInstruction &Other) const {
+    if (!isAllOnesConst(Other) || NestedCandidate.getOpcode() != OP_not) {
+      return nullptr;
+    }
+    return NestedCandidate.getOperand<0>();
+  }
+
+  MInstruction *rewriteXorWithNestedNot(MInstruction &NestedCandidate,
+                                        MInstruction &Other, MBasicBlock &BB) {
+    if (NestedCandidate.getOpcode() != OP_not) {
+      return nullptr;
+    }
+
+    const BinaryInstruction *OtherXor = getBinaryWithOpcode(Other, OP_xor);
+    if (OtherXor != nullptr) {
+      if (MInstruction *OtherValue = getOtherBinaryOperand(
+              *OtherXor, *NestedCandidate.getOperand<0>())) {
+        return createNotInstruction(*OtherXor->getType(), OtherValue, BB);
+      }
+    }
+
+    const BinaryInstruction *OtherOr = getBinaryWithOpcode(Other, OP_or);
+    if (OtherOr != nullptr) {
+      if (MInstruction *OtherValue = getOtherBinaryOperand(
+              *OtherOr, *NestedCandidate.getOperand<0>())) {
+        return createBinaryInstruction(
+            OP_or, *OtherOr->getType(),
+            createNotInstruction(*OtherOr->getType(), OtherValue, BB),
+            OtherOr->getOperand(0) == OtherValue ? OtherOr->getOperand(1)
+                                                 : OtherOr->getOperand(0),
+            BB);
+      }
+    }
+
+    return nullptr;
+  }
+
+  MInstruction *rewriteXorWithTwoNots(MInstruction &LHS, MInstruction &RHS,
+                                      MBasicBlock &BB) {
+    if (LHS.getOpcode() != OP_not || RHS.getOpcode() != OP_not) {
+      return nullptr;
+    }
+    return createBinaryInstruction(OP_xor, *LHS.getType(), LHS.getOperand<0>(),
+                                   RHS.getOperand<0>(), BB);
+  }
+
+  MInstruction *rewriteXorWithNestedAnd(MInstruction &NestedCandidate,
+                                        MInstruction &Other, MBasicBlock &BB) {
+    const BinaryInstruction *NestedAnd =
+        getBinaryWithOpcode(NestedCandidate, OP_and);
+    if (NestedAnd == nullptr) {
+      return nullptr;
+    }
+
+    const BinaryInstruction *OtherOr = getBinaryWithOpcode(Other, OP_or);
+    if (OtherOr != nullptr && hasSameUnorderedOperands(*NestedAnd, *OtherOr)) {
+      return createBinaryInstruction(OP_xor, *NestedAnd->getType(),
+                                     NestedAnd->getOperand<0>(),
+                                     NestedAnd->getOperand<1>(), BB);
+    }
+
+    const BinaryInstruction *OtherXor = getBinaryWithOpcode(Other, OP_xor);
+    if (OtherXor != nullptr &&
+        hasSameUnorderedOperands(*NestedAnd, *OtherXor)) {
+      return createBinaryInstruction(OP_or, *NestedAnd->getType(),
+                                     NestedAnd->getOperand<0>(),
+                                     NestedAnd->getOperand<1>(), BB);
+    }
+
+    if (Other.getOpcode() == OP_not) {
+      if (MInstruction *OtherValue =
+              getOtherBinaryOperand(*NestedAnd, *Other.getOperand<0>())) {
+        return createBinaryInstruction(OP_or, *NestedAnd->getType(), &Other,
+                                       OtherValue, BB);
+      }
+    }
+
+    return nullptr;
+  }
+
+  MInstruction *rewriteXorWithNestedOr(MInstruction &NestedCandidate,
+                                       MInstruction &Other, MBasicBlock &BB) {
+    const BinaryInstruction *NestedOr =
+        getBinaryWithOpcode(NestedCandidate, OP_or);
+    if (NestedOr == nullptr) {
+      return nullptr;
+    }
+
+    const BinaryInstruction *OtherXor = getBinaryWithOpcode(Other, OP_xor);
+    if (OtherXor != nullptr && hasSameUnorderedOperands(*NestedOr, *OtherXor)) {
+      return createBinaryInstruction(OP_and, *NestedOr->getType(),
+                                     NestedOr->getOperand<0>(),
+                                     NestedOr->getOperand<1>(), BB);
+    }
+
+    return nullptr;
+  }
+
+  bool structurallyEqual(const MInstruction &LHS,
+                         const MInstruction &RHS) const {
+    if (&LHS == &RHS) {
+      return true;
+    }
+    if (LHS.getOpcode() != RHS.getOpcode() || LHS.getKind() != RHS.getKind() ||
+        LHS.getType() != RHS.getType() ||
+        LHS.getNumOperands() != RHS.getNumOperands()) {
+      return false;
+    }
+
+    switch (LHS.getOpcode()) {
+    case OP_const: {
+      const auto &LHSConst = llvm::cast<ConstantInstruction>(LHS).getConstant();
+      const auto &RHSConst = llvm::cast<ConstantInstruction>(RHS).getConstant();
+      if (!LHSConst.getType().isInteger() || !RHSConst.getType().isInteger()) {
+        return false;
+      }
+      return llvm::cast<MConstantInt>(&LHSConst)->getValue() ==
+             llvm::cast<MConstantInt>(&RHSConst)->getValue();
+    }
+    case OP_dread:
+      return llvm::cast<DreadInstruction>(LHS).getVarIdx() ==
+             llvm::cast<DreadInstruction>(RHS).getVarIdx();
+    case OP_cmp:
+      if (llvm::cast<CmpInstruction>(LHS).getPredicate() !=
+          llvm::cast<CmpInstruction>(RHS).getPredicate()) {
+        return false;
+      }
+      break;
+    case OP_load: {
+      // NOTE: Load instructions are compared structurally (by address
+      // computation parameters). This assumes no intervening stores between the
+      // two loads. In the current EVM frontend, each load comes from
+      // extractU256Operand and produces a unique instruction, so pointer
+      // equality catches all real cases. If the frontend evolves to produce
+      // aliased loads, this must be revisited.
+      const auto &LHSLoad = llvm::cast<LoadInstruction>(LHS);
+      const auto &RHSLoad = llvm::cast<LoadInstruction>(RHS);
+      if (LHSLoad.getScale() != RHSLoad.getScale() ||
+          LHSLoad.getOffset() != RHSLoad.getOffset() ||
+          LHSLoad.getSrcType() != RHSLoad.getSrcType() ||
+          LHSLoad.getDestType() != RHSLoad.getDestType() ||
+          LHSLoad.getSext() != RHSLoad.getSext()) {
+        return false;
+      }
+      const MInstruction *LHSIndex = LHSLoad.getIndex();
+      const MInstruction *RHSIndex = RHSLoad.getIndex();
+      if (LHSIndex == nullptr || RHSIndex == nullptr) {
+        if (LHSIndex != RHSIndex) {
+          return false;
+        }
+        break;
+      }
+      if (!structurallyEqual(*LHSIndex, *RHSIndex)) {
+        return false;
+      }
+      break;
+    }
+    case OP_evm_u256_add_result: {
+      const auto &LHSRes = llvm::cast<EvmU256AddResultInstruction>(LHS);
+      const auto &RHSRes = llvm::cast<EvmU256AddResultInstruction>(RHS);
+      if (LHSRes.getResultIdx() != RHSRes.getResultIdx()) {
+        return false;
+      }
+      break;
+    }
+    case OP_evm_u256_sub_result: {
+      const auto &LHSRes = llvm::cast<EvmU256SubResultInstruction>(LHS);
+      const auto &RHSRes = llvm::cast<EvmU256SubResultInstruction>(RHS);
+      if (LHSRes.getResultIdx() != RHSRes.getResultIdx()) {
+        return false;
+      }
+      break;
+    }
+    default:
+      break;
+    }
+
+    for (uint32_t OperandIdx = 0; OperandIdx < LHS.getNumOperands();
+         ++OperandIdx) {
+      if (!structurallyEqual(*LHS.getOperand(OperandIdx),
+                             *RHS.getOperand(OperandIdx))) {
+        return false;
+      }
+    }
+    return true;
+  }
+
+  bool isNotOf(const MInstruction &MaybeNot, const MInstruction &Value) const {
+    return MaybeNot.getOpcode() == OP_not &&
+           structurallyEqual(*MaybeNot.getOperand<0>(), Value);
+  }
+
+  const BinaryInstruction *getBinaryWithOpcode(const MInstruction &Inst,
+                                               Opcode Opc) const {
+    if (Inst.getKind() != MInstruction::BINARY || Inst.getOpcode() != Opc) {
+      return nullptr;
+    }
+    return static_cast<const BinaryInstruction *>(&Inst);
+  }
+
+  // Match a pair of binary operands where one has opcode OpcA and the other
+  // has opcode OpcB, and both share the same unordered operand set.
+  // Returns the OpcA instruction on success, nullptr otherwise.
+  const BinaryInstruction *matchBinaryOperandPair(const MInstruction &LHS,
+                                                  const MInstruction &RHS,
+                                                  Opcode OpcA,
+                                                  Opcode OpcB) const {
+    if (const auto *A = getBinaryWithOpcode(LHS, OpcA))
+      if (const auto *B = getBinaryWithOpcode(RHS, OpcB))
+        if (hasSameUnorderedOperands(*A, *B))
+          return A;
+    if (const auto *A = getBinaryWithOpcode(RHS, OpcA))
+      if (const auto *B = getBinaryWithOpcode(LHS, OpcB))
+        if (hasSameUnorderedOperands(*A, *B))
+          return A;
+    return nullptr;
+  }
+
+  bool structurallyContains(const BinaryInstruction &Inst,
+                            const MInstruction &Value) const {
+    return structurallyEqual(*Inst.getOperand<0>(), Value) ||
+           structurallyEqual(*Inst.getOperand<1>(), Value);
+  }
+
+  MInstruction *getOtherBinaryOperand(const BinaryInstruction &Inst,
+                                      const MInstruction &Value) const {
+    if (structurallyEqual(*Inst.getOperand<0>(), Value)) {
+      return const_cast<MInstruction *>(Inst.getOperand<1>());
+    }
+    if (structurallyEqual(*Inst.getOperand<1>(), Value)) {
+      return const_cast<MInstruction *>(Inst.getOperand<0>());
+    }
+    return nullptr;
+  }
+
+  bool hasSameUnorderedOperands(const BinaryInstruction &LHS,
+                                const BinaryInstruction &RHS) const {
+    return (structurallyEqual(*LHS.getOperand<0>(), *RHS.getOperand<0>()) &&
+            structurallyEqual(*LHS.getOperand<1>(), *RHS.getOperand<1>())) ||
+           (structurallyEqual(*LHS.getOperand<0>(), *RHS.getOperand<1>()) &&
+            structurallyEqual(*LHS.getOperand<1>(), *RHS.getOperand<0>()));
+  }
+
+  static bool isIntegerConst(const MInstruction &Inst) {
+    return Inst.getOpcode() == OP_const && Inst.getType()->isInteger();
+  }
+
+  static bool isZeroConst(const MInstruction &Inst) {
+    if (!isIntegerConst(Inst)) {
+      return false;
+    }
+    return llvm::cast<MConstantInt>(
+               &llvm::cast<ConstantInstruction>(Inst).getConstant())
+        ->getValue()
+        .isZero();
+  }
+
+  static bool isNonZeroIntConst(const MInstruction &Inst) {
+    if (!isIntegerConst(Inst)) {
+      return false;
+    }
+    return !llvm::cast<MConstantInt>(
+                &llvm::cast<ConstantInstruction>(Inst).getConstant())
+                ->getValue()
+                .isZero();
+  }
+
+  static bool isOneConst(const MInstruction &Inst) {
+    if (!isIntegerConst(Inst)) {
+      return false;
+    }
+    return llvm::cast<MConstantInt>(
+               &llvm::cast<ConstantInstruction>(Inst).getConstant())
+        ->getValue()
+        .isOne();
+  }
+
+  static bool isAllOnesConst(const MInstruction &Inst) {
+    if (!isIntegerConst(Inst)) {
+      return false;
+    }
+    return llvm::cast<MConstantInt>(
+               &llvm::cast<ConstantInstruction>(Inst).getConstant())
+        ->getValue()
+        .isAllOnes();
+  }
+
+  MInstruction *createZeroConstant(MType &Type, MBasicBlock &BB) {
+    return createIntegerConstant(Type, llvm::APInt(Type.getBitWidth(), 0), BB);
+  }
+
+  MInstruction *createOneConstant(MType &Type, MBasicBlock &BB) {
+    return createIntegerConstant(Type, llvm::APInt(Type.getBitWidth(), 1), BB);
+  }
+
+  // Returns true if Inst is (sub 0 x), i.e. a negation of x.
+  static bool isNeg(const MInstruction &Inst) {
+    if (Inst.getOpcode() != OP_sub) {
+      return false;
+    }
+    return isZeroConst(*Inst.getOperand<0>());
+  }
+
+  // Returns the negated operand x from (sub 0 x). Caller must check isNeg.
+  static MInstruction *getNegOperand(MInstruction &Inst) {
+    return Inst.getOperand<1>();
+  }
+
+  MInstruction *createAllOnesConstant(MType &Type, MBasicBlock &BB) {
+    return createIntegerConstant(
+        Type, llvm::APInt::getAllOnes(Type.getBitWidth()), BB);
+  }
+
+  MInstruction *createIntegerConstant(MType &Type, llvm::APInt Value,
+                                      MBasicBlock &BB) {
+    return Func->createInstruction<ConstantInstruction>(
+        false, BB, &Type, *MConstantInt::get(Func->getContext(), Type, Value));
+  }
+
+  MInstruction *createNotInstruction(MType &Type, const MInstruction *Operand,
+                                     MBasicBlock &BB) {
+    return Func->createInstruction<NotInstruction>(
+        false, BB, &Type, const_cast<MInstruction *>(Operand));
+  }
+
+  MInstruction *createBinaryInstruction(Opcode Opc, MType &Type,
+                                        const MInstruction *LHS,
+                                        const MInstruction *RHS,
+                                        MBasicBlock &BB) {
+    return Func->createInstruction<BinaryInstruction>(
+        false, BB, Opc, &Type, const_cast<MInstruction *>(LHS),
+        const_cast<MInstruction *>(RHS));
+  }
+
+  MFunction *Func = nullptr;
+  bool Changed = false;
+  llvm::DenseMap<MInstruction *, MInstruction *> RewriteCache;
+};
+
+} // namespace COMPILER
diff --git a/src/compiler/mir/pass/verifier.h b/src/compiler/mir/pass/verifier.h
index 21358a25d..9a63a49e2 100644
--- a/src/compiler/mir/pass/verifier.h
+++ b/src/compiler/mir/pass/verifier.h
@@ -3,6 +3,7 @@
 #pragma once
 
 #include "compiler/mir/pass/visitor.h"
+#include "llvm/ADT/SmallPtrSet.h"
 
 namespace COMPILER {
 
@@ -25,6 +26,7 @@ class MVerifier final : public MVisitor {
   }
 
   void visitBasicBlock(MBasicBlock &BB) override {
+    Visited.clear();
     if (BB.empty()) {
       return;
     }
@@ -51,6 +53,13 @@ class MVerifier final : public MVisitor {
     MVisitor::visitBasicBlock(BB);
   }
 
+  void visitInstruction(MInstruction &I) override {
+    if (!Visited.insert(&I).second) {
+      return;
+    }
+    MVisitor::visitInstruction(I);
+  }
+
   void visitUnaryInstruction(UnaryInstruction &I) override;
   void visitBinaryInstruction(BinaryInstruction &I) override;
   void visitAdcInstruction(AdcInstruction &I) override;
@@ -90,6 +99,7 @@ class MVerifier final : public MVisitor {
   bool Broken = false;
   llvm::raw_ostream &OS;
   uint32_t FailedCount = 0;
+  llvm::SmallPtrSet<const MInstruction *, 32> Visited;
 };
 
 } // namespace COMPILER
diff --git a/src/compiler/mir/pass/visitor.h b/src/compiler/mir/pass/visitor.h
index ff1794d5e..bc97bbe16 100644
--- a/src/compiler/mir/pass/visitor.h
+++ b/src/compiler/mir/pass/visitor.h
@@ -61,6 +61,20 @@ class MVisitor {
       visitEvmU256MulResultInstruction(
           static_cast<EvmU256MulResultInstruction &>(I));
       break;
+    case MInstruction::EVM_U256_ADD:
+      visitEvmU256AddInstruction(static_cast<EvmU256AddInstruction &>(I));
+      break;
+    case MInstruction::EVM_U256_ADD_RESULT:
+      visitEvmU256AddResultInstruction(
+          static_cast<EvmU256AddResultInstruction &>(I));
+      break;
+    case MInstruction::EVM_U256_SUB:
+      visitEvmU256SubInstruction(static_cast<EvmU256SubInstruction &>(I));
+      break;
+    case MInstruction::EVM_U256_SUB_RESULT:
+      visitEvmU256SubResultInstruction(
+          static_cast<EvmU256SubResultInstruction &>(I));
+      break;
     case MInstruction::EVM_UDIV128_BY64:
       visitEvmUdiv128By64Instruction(
           static_cast<EvmUdiv128By64Instruction &>(I));
@@ -212,6 +226,20 @@ class MVisitor {
   visitEvmU256MulResultInstruction(EvmU256MulResultInstruction &I) {
     VISIT_OPERAND_1
   }
+  virtual void visitEvmU256AddInstruction(EvmU256AddInstruction &I) {
+    VISIT_OPERANDS
+  }
+  virtual void
+  visitEvmU256AddResultInstruction(EvmU256AddResultInstruction &I) {
+    VISIT_OPERAND_1
+  }
+  virtual void visitEvmU256SubInstruction(EvmU256SubInstruction &I) {
+    VISIT_OPERANDS
+  }
+  virtual void
+  visitEvmU256SubResultInstruction(EvmU256SubResultInstruction &I) {
+    VISIT_OPERAND_1
+  }
   virtual void visitEvmUdiv128By64Instruction(EvmUdiv128By64Instruction &I) {
     VISIT_OPERAND_3
   }
diff --git a/src/compiler/target/x86/x86_cg_peephole.cpp b/src/compiler/target/x86/x86_cg_peephole.cpp
index bf7cb500c..91b44a70f 100644
--- a/src/compiler/target/x86/x86_cg_peephole.cpp
+++ b/src/compiler/target/x86/x86_cg_peephole.cpp
@@ -2,118 +2,74 @@
 // SPDX-License-Identifier: Apache-2.0
 
 #include "compiler/target/x86/x86_cg_peephole.h"
-#include "compiler/cgir/pass/cg_register_info.h"
 #include "compiler/llvm-prebuild/Target/X86/X86Subtarget.h"
-#include "compiler/target/x86/x86_constants.h"
 
 using namespace llvm;
 
 namespace COMPILER {
+
+#include "target/x86/x86_cg_peephole_generated.inc"
+
+// Fold MOVZX32rr8 + SUBREG_TO_REG(0, GR32, sub_32bit) -> MOVZX64rr8.
+// On x86-64, writing a 32-bit register implicitly zeroes the upper 32 bits,
+// so SUBREG_TO_REG is a pure register-class annotation and can be eliminated.
+static bool tryFoldMovzxSubregToReg(CgBasicBlock &MBB,
+                                    CgBasicBlock::iterator &MII) {
+  CgInstruction &Movzx = *MII;
+  if (Movzx.getOpcode() != X86::MOVZX32rr8)
+    return false;
+
+  auto NextMII = MII;
+  ++NextMII;
+  if (NextMII == MBB.end())
+    return false;
+
+  CgInstruction &Subreg = *NextMII;
+  if (!Subreg.isSubregToReg())
+    return false;
+
+  // SUBREG_TO_REG layout: op0=def(GR64), op1=imm(0), op2=src(GR32), op3=idx
+  if (Subreg.getNumOperands() < 4)
+    return false;
+  if (!Subreg.getOperand(1).isImm() || Subreg.getOperand(1).getImm() != 0)
+    return false;
+  if (!Subreg.getOperand(2).isReg())
+    return false;
+  if (!Subreg.getOperand(3).isImm() ||
+      Subreg.getOperand(3).getImm() != X86::sub_32bit)
+    return false;
+
+  // The src of SUBREG_TO_REG must be the def of MOVZX32rr8.
+  CgRegister Movzx32Def = Movzx.getOperand(0).getReg();
+  if (Subreg.getOperand(2).getReg() != Movzx32Def)
+    return false;
+
+  auto AfterSubreg = NextMII;
+  ++AfterSubreg;
+
+  // Rewrite: change MOVZX32rr8's def to the GR64 def from SUBREG_TO_REG and
+  // change the opcode to MOVZX64rr8, then erase SUBREG_TO_REG.
+  CgRegister SubregDef = Subreg.getOperand(0).getReg();
+  Movzx.getOperand(0).setReg(SubregDef);
+
+  const auto &TII = MBB.getParent()->getTargetInstrInfo();
+  Movzx.setDesc(TII.get(X86::MOVZX64rr8));
+
+  Subreg.eraseFromParent();
+  MII = AfterSubreg;
+  return true;
+}
+
 void X86CgPeephole::peepholeOptimizeBB(CgBasicBlock &MBB) {
-  if (MBB.empty()) {
-    return;
-  }
-
-  CgInstruction &LastMI = MBB.back();
-  if (LastMI.isUnconditionalBranch()) {
-    optimizeBranchInBlockEnd(MBB, LastMI);
-  }
+  (void)tryGeneratedBlockEndRules(MBB);
 }
 
-void X86CgPeephole::peepholeOptimize(CgBasicBlock &MBB,
+bool X86CgPeephole::peepholeOptimize(CgBasicBlock &MBB,
                                      CgBasicBlock::iterator &MII) {
-  auto &Inst = *MII;
-  if (Inst.isCompare()) {
-    optimizeCmp(MBB, MII);
-  }
+  if (tryFoldMovzxSubregToReg(MBB, MII))
+    return true;
+  return tryGeneratedInstructionRules(MBB, MII) ==
+         GeneratedInstructionRuleResult::Advanced;
 }
-void X86CgPeephole::optimizeCmp(CgBasicBlock &MBB,
-                                CgBasicBlock::iterator &MII) {
-  auto MIE = MBB.end();
-  // cmp/test -> setcc cond -> [movzx] -> test -> jne
-  // optimized to: cmp/test -> jcc cond
-  auto LocalMII = MII;
-  LocalMII++;
-  if (LocalMII == MIE)
-    return;
-  auto &Inst1 = *LocalMII;
-  if (Inst1.getOpcode() != X86::SETCCr)
-    return;
-  const auto &Op1 = Inst1.getOperand(0);
-  if (!Op1.isReg())
-    return;
-  auto CC = Inst1.getOperand(1).getImm();
-  unsigned TestReg = Op1.getReg();
-  CgInstruction *MovzxInst = nullptr;
-
-  LocalMII++;
-  if (LocalMII == MIE)
-    return;
-  auto &Inst2 = *LocalMII;
-  if (Inst2.getOpcode() == X86::MOVZX32rr8) {
-    const auto &MovzxDst = Inst2.getOperand(0);
-    const auto &MovzxSrc = Inst2.getOperand(1);
-    if (!MovzxDst.isReg() || !MovzxSrc.isReg() ||
-        MovzxSrc.getReg() != Op1.getReg())
-      return;
-    TestReg = MovzxDst.getReg();
-    MovzxInst = &Inst2;
-    LocalMII++;
-    if (LocalMII == MIE)
-      return;
-  }
-
-  auto &TestInst = *LocalMII;
-  switch (TestInst.getOpcode()) {
-  case X86::TEST8rr:
-  case X86::TEST16rr:
-  case X86::TEST32rr:
-  case X86::TEST64rr:
-    break;
-  default:
-    return;
-  }
-  const auto &TestOp0 = TestInst.getOperand(0);
-  const auto &TestOp1 = TestInst.getOperand(1);
-  if (!TestOp0.isReg() || !TestOp1.isReg() || TestOp0.getReg() != TestReg ||
-      TestOp1.getReg() != TestReg)
-    return;
-
-  LocalMII++;
-  if (LocalMII == MIE)
-    return;
-  auto &Inst3 = *LocalMII;
-  if (Inst3.getOpcode() != X86::JCC_1)
-    return;
-  if (Inst3.getOperand(1).getImm() != X86::CondCode::COND_NE)
-    return; // TODO, other optimization, use opposite condition code
-
-  // Ensure the SETCC/MOVZX registers have no uses beyond this chain.
-  // The lowering cache (_expr_reg_map) may share these virtual registers
-  // with other consumers; erasing them would leave dangling references.
-  const auto &RegInfo = MBB.getParent()->getRegInfo();
-  if (!RegInfo.hasOneNonDBGUse(Op1.getReg()))
-    return;
-  if (MovzxInst != nullptr && !RegInfo.hasOneNonDBGUse(TestReg))
-    return;
-
-  Inst1.eraseFromParent();
-  if (MovzxInst != nullptr) {
-    MovzxInst->eraseFromParent();
-  }
-  TestInst.eraseFromParent();
-  Inst3.getOperand(1).setImm(CC);
-}
-} // namespace COMPILER
 
-void X86CgPeephole::optimizeBranchInBlockEnd(CgBasicBlock &MBB,
-                                             CgInstruction &MI) {
-  ZEN_ASSERT(MI.getNumOperands() > 0);
-  CgOperand &MO = MI.getOperand(0);
-  ZEN_ASSERT(MO.isMBB());
-  CgBasicBlock *TargetMBB = MO.getMBB();
-  if (TargetMBB->getNumber() == MBB.getNumber() + 1) {
-    // remove the unconditional branch
-    MI.eraseFromParent();
-  }
-}
+} // namespace COMPILER
diff --git a/src/compiler/target/x86/x86_cg_peephole.h b/src/compiler/target/x86/x86_cg_peephole.h
index 631b31184..663f05693 100644
--- a/src/compiler/target/x86/x86_cg_peephole.h
+++ b/src/compiler/target/x86/x86_cg_peephole.h
@@ -10,12 +10,8 @@ class X86CgPeephole : public CgPeephole<X86CgPeephole> {
 public:
   using CgPeephole::CgPeephole;
   void peepholeOptimizeBB(CgBasicBlock &MBB);
-  // after this function, MII should be the processed instruction
-  void peepholeOptimize(CgBasicBlock &MBB, CgBasicBlock::iterator &MII);
-
-private:
-  void optimizeCmp(CgBasicBlock &MBB, CgBasicBlock::iterator &MII);
-  void optimizeBranchInBlockEnd(CgBasicBlock &MBB, CgInstruction &MI);
+  // Returns true when the matcher has already advanced MII.
+  bool peepholeOptimize(CgBasicBlock &MBB, CgBasicBlock::iterator &MII);
 };
 
 } // namespace COMPILER
diff --git a/src/compiler/target/x86/x86_cg_peephole_rules.SCHEMA.md b/src/compiler/target/x86/x86_cg_peephole_rules.SCHEMA.md
new file mode 100644
index 000000000..23078d0d4
--- /dev/null
+++ b/src/compiler/target/x86/x86_cg_peephole_rules.SCHEMA.md
@@ -0,0 +1,401 @@
+# x86 CgIR Peephole DSL Schema
+
+This document describes every field accepted by
+`x86_cg_peephole_rules.json` and the constraints that must hold for the
+generator (`tools/generate_x86_cg_peephole.py`) to accept the file and
+produce valid C++ code.
+
+---
+
+## 1. Top-level structure
+
+```json
+{
+  "version": 1,
+  "rules": [ /* array of rule objects */ ]
+}
+```
+
+| Field     | Type    | Required | Notes                              |
+|-----------|---------|----------|------------------------------------|
+| `version` | integer | yes      | Must be `1`.                       |
+| `rules`   | array   | yes      | Ordered list of rule objects.      |
+
+---
+
+## 2. Rule object
+
+```json
+{
+  "name":     "my-rule",
+  "stage":    "instruction",
+  "priority": 100,
+  "pattern":  [ /* pattern entries */ ],
+  "when":     [ /* optional conditions */ ],
+  "action":   { /* action object */ },
+  "validation": { /* validation object */ }
+}
+```
+
+| Field        | Type    | Required | Notes                                                        |
+|--------------|---------|----------|--------------------------------------------------------------|
+| `name`       | string  | yes      | Must be unique across all rules in the file.                 |
+| `stage`      | string  | yes      | `"instruction"` or `"block_end"`.                            |
+| `priority`   | integer | yes      | Higher value fires first within the same stage. Two rules with the same priority and identical normalised pattern are a generator error. |
+| `pattern`    | array   | yes      | Sequence of pattern entries. See section 3.                  |
+| `when`       | array   | no       | Optional extra conditions. See section 6. Only used with `block_end` stage currently. |
+| `action`     | object  | yes      | Describes what to do when the pattern matches. See section 7. |
+| `validation` | object  | yes      | Describes how the rule is validated. See section 8.          |
+
+---
+
+## 3. Pattern entry
+
+Each element of `pattern` describes one CgIR instruction that must match
+in program order.
+
+```json
+{
+  "bind":      "inst_name",
+  "opcode":    "CMP64rr",
+  "capture":   [ /* capture entries */ ],
+  "require":   [ /* require entries */ ]
+}
+```
+
+| Field       | Type   | Required | Notes                                                       |
+|-------------|--------|----------|-------------------------------------------------------------|
+| `bind`      | string | yes      | Local variable name for this instruction in the generated code. Used in `action` to refer to the instruction. |
+| `opcode`    | string | no*      | Exact x86 opcode name (without the `X86::` prefix). Exactly one of `opcode`, `opcode_any`, or `predicate` must be present. |
+| `opcode_any`| array  | no*      | List of opcode strings. The instruction matches if its opcode equals any element. |
+| `predicate` | string | no*      | A predicate method name called on the instruction object (e.g., `isCompare`, `isConditionalBranch`, `isUnconditionalBranch`). |
+| `capture`   | array  | no       | List of capture entries. See section 4.                     |
+| `require`   | array  | no       | List of requirement entries. See section 5.                 |
+
+\* Exactly one of `opcode`, `opcode_any`, or `predicate` must be present in each pattern entry.
+
+---
+
+## 4. Capture entry
+
+A capture reads an operand field from the bound instruction into a named
+variable that can be referenced in later `require` entries.
+
+```json
+{
+  "name":    "dst_reg",
+  "operand": 0,
+  "field":   "reg"
+}
+```
+
+| Field     | Type    | Required | Notes                                                        |
+|-----------|---------|----------|--------------------------------------------------------------|
+| `name`    | string  | yes      | Identifier used in `equals_capture` requirements and in `set_imm` actions. |
+| `operand` | integer | yes      | Zero-based operand index. A negative value `-N` selects the Nth-from-last explicit operand (counting from 1). |
+| `field`   | string  | yes      | `"reg"` to capture a register, `"imm"` to capture an immediate. |
+
+---
+
+## 5. Require entry variants
+
+A require entry constrains an operand of the bound instruction. All
+require entries for a given pattern element must hold for the match to
+succeed.
+
+### 5.1 `equals_capture`
+
+The operand's register value must equal a previously captured register.
+
+```json
+{
+  "operand": 1,
+  "field":   "reg",
+  "equals_capture": "dst_reg"
+}
+```
+
+| Field            | Type    | Notes                                                   |
+|------------------|---------|---------------------------------------------------------|
+| `operand`        | integer | Operand index (same semantics as in capture).           |
+| `field`          | string  | Must be `"reg"`.                                        |
+| `equals_capture` | string  | Name of a previously declared capture.                  |
+
+### 5.2 `equals_int`
+
+The operand's immediate value must equal the given integer constant.
+
+```json
+{
+  "operand":   -1,
+  "field":     "imm",
+  "equals_int": 0
+}
+```
+
+| Field        | Type    | Notes                                                   |
+|--------------|---------|---------------------------------------------------------|
+| `operand`    | integer | Operand index.                                          |
+| `field`      | string  | Must be `"imm"`.                                        |
+| `equals_int` | integer | The exact immediate value to match.                     |
+
+### 5.3 `equals_enum`
+
+The operand's immediate value must equal a named x86 `CondCode` constant.
+
+```json
+{
+  "operand":     1,
+  "field":       "imm",
+  "equals_enum": "COND_NE"
+}
+```
+
+| Field         | Type   | Notes                                                     |
+|---------------|--------|-----------------------------------------------------------|
+| `operand`     | integer| Operand index.                                            |
+| `field`       | string | Must be `"imm"`.                                          |
+| `equals_enum` | string | A constant name from `X86::CondCode` (without the `X86::CondCode::` prefix). |
+
+### 5.4 `equals_bool`
+
+The operand's `isMBB()` predicate must equal the given boolean.
+
+```json
+{
+  "operand":     0,
+  "field":       "is_mbb",
+  "equals_bool": true
+}
+```
+
+| Field         | Type    | Notes                                                  |
+|---------------|---------|--------------------------------------------------------|
+| `operand`     | integer | Operand index.                                         |
+| `field`       | string  | Must be `"is_mbb"`.                                    |
+| `equals_bool` | boolean | `true` requires `isMBB()` to hold; `false` requires it not to hold. |
+
+---
+
+## 6. When conditions
+
+The `when` array holds conditions checked after the pattern has matched.
+Currently only `block_end` rules use `when`.
+
+### 6.1 `target_is_next_block`
+
+The MBB operand at `operand` must point to the basic block that
+immediately follows the current block in the function's block list.
+
+```json
+{
+  "kind":    "target_is_next_block",
+  "inst":    "jmp",
+  "operand": 0
+}
+```
+
+| Field     | Type    | Notes                                                     |
+|-----------|---------|-----------------------------------------------------------|
+| `kind`    | string  | Must be `"target_is_next_block"`.                         |
+| `inst`    | string  | The `bind` name of the instruction whose operand to test. |
+| `operand` | integer | Zero-based operand index holding the target MBB.          |
+
+---
+
+## 7. Action object
+
+The action object specifies what transformations to apply when all
+pattern entries and when conditions have matched. Multiple primitives may
+appear in the same action.
+
+```json
+{
+  "erase":   [ "setcc", "test" ],
+  "set_imm": [
+    { "inst": "jcc", "operand": 1, "from_capture": "setcc_cc" }
+  ]
+}
+```
+
+### 7.1 `erase`
+
+A list of `bind` names. Each named instruction is erased from the basic
+block. If the first instruction in the pattern (`pattern[0]`) is in the
+erase list, the generator advances `MII` to the next instruction before
+erasing so that the caller's iterator remains valid.
+
+### 7.2 `set_imm`
+
+A list of immediate-mutation entries. Each entry overwrites one immediate
+operand of a bound instruction with the value stored in a named capture.
+
+| Field          | Type    | Notes                                                 |
+|----------------|---------|-------------------------------------------------------|
+| `inst`         | string  | `bind` name of the instruction to mutate.             |
+| `operand`      | integer | Zero-based operand index of the immediate to overwrite. |
+| `from_capture` | string  | Name of a previously declared `"imm"` capture.        |
+
+### 7.3 `custom` (not yet implemented)
+
+Reserved for future use. When present, the action requires a hand-written
+C++ helper function in `x86_cg_peephole.cpp`. The current generator does
+not emit calls to custom handlers; rules that require transformations
+beyond `erase` and `set_imm` (e.g., inverting a condition code) must wait
+until generator support is added.
+
+---
+
+## 8. Validation object
+
+Every rule must carry a `validation` block.
+
+```json
+{
+  "modes":    [ "structural", "execution" ],
+  "coverage": [ "X86CgPeephole.MyTestName" ]
+}
+```
+
+| Field      | Type   | Required | Notes                                                           |
+|------------|--------|----------|-----------------------------------------------------------------|
+| `modes`    | array  | yes      | Non-empty list of mode strings. See 8.1.                        |
+| `coverage` | array  | yes      | Non-empty list of gtest name strings. See 8.2.                  |
+
+### 8.1 Validation modes
+
+| Mode              | Meaning                                                                 |
+|-------------------|-------------------------------------------------------------------------|
+| `structural`      | Pattern is verified to match or not match a manually-constructed CgIR fixture. |
+| `execution`       | Before/after instruction sequences are executed with hardware and compared. |
+| `semantics_model` | A software model (e.g., flag evaluator) verifies semantic equivalence.  |
+
+Rules with `stage: "instruction"` must include at least one of
+`"execution"` or `"semantics_model"`. A `"structural"`-only instruction
+rule is rejected by `check_x86_cg_peephole_validation.py`. `block_end`
+rules may use `"structural"` and `"execution"` only.
+
+### 8.2 Coverage entries
+
+Each string must be a fully-qualified gtest name in the form
+`Suite.TestName`. The checker (`check_x86_cg_peephole_validation.py`)
+verifies that every coverage entry exists in the gtest binary when
+`--gtest-binary` is supplied.
+
+---
+
+## 9. Priority and conflict detection
+
+Rules are applied in descending priority order within each stage. The
+generator normalises each rule to a canonical signature (stage, pattern
+opcodes/predicates, require constraints, when conditions) and checks for
+duplicate `(signature, priority)` pairs. If two rules produce the same
+signature with the same priority the generator exits with a non-zero
+status and prints a conflict report.
+
+Rules with the same priority but different normalised signatures are
+legal and both are emitted into the dispatch function in the order they
+appear in the `rules` array.
+
+---
+
+## 10. DSL limits
+
+The following are intentionally outside the scope of the current DSL.
+They require either a new DSL feature or a `custom` action with a C++
+helper.
+
+- **No operand creation.** Actions may only erase instructions or mutate
+  existing immediate values. Creating new instructions or new operands is
+  not supported.
+- **No cross-block patterns.** All pattern entries must match within a
+  single basic block. `block_end` rules are a special case that look only
+  at the last instruction of a block and may inspect the successor list
+  via `target_is_next_block`.
+- **No register-class or liveness reasoning.** The DSL has no access to
+  register class information or live-range data. Patterns that are only
+  safe when a specific register is dead (e.g., flag-liveness after AND or
+  ADD with an identity immediate) are not expressible and must be
+  implemented as hand-coded passes.
+- **No arithmetic on captures.** The `set_imm` action copies a captured
+  value verbatim. Transformations such as inverting a condition code
+  (`cc ^ 1`) cannot be expressed; they require `custom`.
+- **Single-opcode window.** Pattern entries are matched strictly in
+  sequential order with no gaps. Patterns that need to skip intervening
+  instructions are not supported.
+
+---
+
+## 11. Examples
+
+### Instruction stage — remove redundant consecutive TEST64rr
+
+```json
+{
+  "name": "remove-redundant-test64rr",
+  "stage": "instruction",
+  "priority": 105,
+  "pattern": [
+    {
+      "bind": "test1",
+      "opcode": "TEST64rr",
+      "capture": [
+        { "name": "test1_op0", "operand": 0, "field": "reg" },
+        { "name": "test1_op1", "operand": 1, "field": "reg" }
+      ]
+    },
+    {
+      "bind": "test2",
+      "opcode": "TEST64rr",
+      "require": [
+        { "operand": 0, "field": "reg", "equals_capture": "test1_op0" },
+        { "operand": 1, "field": "reg", "equals_capture": "test1_op1" }
+      ]
+    }
+  ],
+  "action": { "erase": [ "test1" ] },
+  "validation": {
+    "modes": [ "structural", "execution" ],
+    "coverage": [
+      "X86CgPeephole.RemovesRedundantTest64rr",
+      "X86CgPeephole.KeepsNonRedundantTest64rr",
+      "X86CgPeephole.ExecutionHarnessRemoveRedundantTest64rr"
+    ]
+  }
+}
+```
+
+Safety note: `TEST64rr` does not modify any register value; it only sets
+flags. Two consecutive identical TEST instructions produce identical flag
+state. Removing the first leaves the second to set the same flags, so the
+transformation is correct without any liveness information.
+
+### Block-end stage — remove fallthrough unconditional jump
+
+```json
+{
+  "name": "remove-fallthrough-jump",
+  "stage": "block_end",
+  "priority": 100,
+  "pattern": [
+    {
+      "bind": "jmp",
+      "predicate": "isUnconditionalBranch",
+      "require": [
+        { "operand": 0, "field": "is_mbb", "equals_bool": true }
+      ]
+    }
+  ],
+  "when": [
+    { "kind": "target_is_next_block", "inst": "jmp", "operand": 0 }
+  ],
+  "action": { "erase": [ "jmp" ] },
+  "validation": {
+    "modes": [ "structural", "execution" ],
+    "coverage": [
+      "X86CgPeephole.RemovesFallthroughJump",
+      "X86CgPeephole.ExecutionHarnessRemoveFallthroughJump"
+    ]
+  }
+}
+```
diff --git a/src/compiler/target/x86/x86_cg_peephole_rules.json b/src/compiler/target/x86/x86_cg_peephole_rules.json
new file mode 100644
index 000000000..a7d332108
--- /dev/null
+++ b/src/compiler/target/x86/x86_cg_peephole_rules.json
@@ -0,0 +1,690 @@
+{
+  "version": 1,
+  "rules": [
+    {
+      "name": "remove-self-move",
+      "stage": "instruction",
+      "priority": 120,
+      "pattern": [
+        {
+          "bind": "mov",
+          "opcode_any": [
+            "MOV8rr",
+            "MOV16rr",
+            "MOV64rr"
+          ],
+          "capture": [
+            {
+              "name": "mov_dst",
+              "operand": 0,
+              "field": "reg"
+            }
+          ],
+          "require": [
+            {
+              "operand": 1,
+              "field": "reg",
+              "equals_capture": "mov_dst"
+            }
+          ]
+        }
+      ],
+      "action": {
+        "erase": [
+          "mov"
+        ]
+      },
+      "validation": {
+        "modes": [
+          "structural",
+          "execution"
+        ],
+        "coverage": [
+          "X86CgPeephole.RemovesSelfMove64",
+          "X86CgPeephole.KeepsSelfMove32",
+          "X86CgPeephole.ExecutionHarnessRemoveSelfMove",
+          "X86CgPeephole.ExecutionHarnessSelfMove32ChangesUpperBits"
+        ]
+      }
+    },
+    {
+      "name": "remove-zero-shift",
+      "stage": "instruction",
+      "priority": 115,
+      "pattern": [
+        {
+          "bind": "shift",
+          "opcode_any": [
+            "SHL8ri",
+            "SHL16ri",
+            "SHL64ri",
+            "SHR8ri",
+            "SHR16ri",
+            "SHR64ri",
+            "SAR8ri",
+            "SAR16ri",
+            "SAR64ri"
+          ],
+          "require": [
+            {
+              "operand": -1,
+              "field": "imm",
+              "equals_int": 0
+            }
+          ]
+        }
+      ],
+      "action": {
+        "erase": [
+          "shift"
+        ]
+      },
+      "validation": {
+        "modes": [
+          "structural",
+          "execution"
+        ],
+        "coverage": [
+          "X86CgPeephole.RemovesZeroShift64",
+          "X86CgPeephole.KeepsZeroShift32",
+          "X86CgPeephole.ExecutionHarnessRemoveZeroShift"
+        ]
+      }
+    },
+    {
+      "name": "fold-setcc-test-jne-to-jcc",
+      "stage": "instruction",
+      "priority": 100,
+      "pattern": [
+        {
+          "bind": "cmp",
+          "predicate": "isCompare"
+        },
+        {
+          "bind": "setcc",
+          "opcode": "SETCCr",
+          "capture": [
+            {
+              "name": "setcc_dst",
+              "operand": 0,
+              "field": "reg",
+              "require_single_use": true
+            },
+            {
+              "name": "setcc_cc",
+              "operand": 1,
+              "field": "imm"
+            }
+          ]
+        },
+        {
+          "bind": "test",
+          "opcode": "TEST8rr",
+          "require": [
+            {
+              "operand": 0,
+              "field": "reg",
+              "equals_capture": "setcc_dst"
+            },
+            {
+              "operand": 1,
+              "field": "reg",
+              "equals_capture": "setcc_dst"
+            }
+          ]
+        },
+        {
+          "bind": "jcc",
+          "opcode": "JCC_1",
+          "require": [
+            {
+              "operand": 1,
+              "field": "imm",
+              "equals_enum": "COND_NE"
+            }
+          ]
+        }
+      ],
+      "action": {
+        "erase": [
+          "setcc",
+          "test"
+        ],
+        "set_imm": [
+          {
+            "inst": "jcc",
+            "operand": 1,
+            "from_capture": "setcc_cc"
+          }
+        ]
+      },
+      "validation": {
+        "modes": [
+          "structural",
+          "semantics_model",
+          "execution"
+        ],
+        "coverage": [
+          "X86CgPeephole.FoldsSetccTestJneChain",
+          "X86CgPeephole.FuzzFoldSetccTestJneToJccSemantics",
+          "X86CgPeephole.ExecutionHarnessFoldSetccTestJneToJcc"
+        ]
+      }
+    },
+    {
+      "name": "remove-fallthrough-jcc",
+      "stage": "block_end",
+      "priority": 110,
+      "pattern": [
+        {
+          "bind": "jcc",
+          "predicate": "isConditionalBranch",
+          "require": [
+            {
+              "operand": 0,
+              "field": "is_mbb",
+              "equals_bool": true
+            }
+          ]
+        }
+      ],
+      "when": [
+        {
+          "kind": "target_is_next_block",
+          "inst": "jcc",
+          "operand": 0
+        }
+      ],
+      "action": {
+        "erase": [
+          "jcc"
+        ]
+      },
+      "validation": {
+        "modes": [
+          "structural",
+          "execution"
+        ],
+        "coverage": [
+          "X86CgPeephole.RemovesFallthroughConditionalJump",
+          "X86CgPeephole.ExecutionHarnessRemoveFallthroughConditionalJump"
+        ]
+      }
+    },
+    {
+      "name": "remove-redundant-test64rr",
+      "stage": "instruction",
+      "priority": 105,
+      "pattern": [
+        {
+          "bind": "test1",
+          "opcode": "TEST64rr",
+          "capture": [
+            {
+              "name": "test1_op0",
+              "operand": 0,
+              "field": "reg"
+            },
+            {
+              "name": "test1_op1",
+              "operand": 1,
+              "field": "reg"
+            }
+          ]
+        },
+        {
+          "bind": "test2",
+          "opcode": "TEST64rr",
+          "require": [
+            {
+              "operand": 0,
+              "field": "reg",
+              "equals_capture": "test1_op0"
+            },
+            {
+              "operand": 1,
+              "field": "reg",
+              "equals_capture": "test1_op1"
+            }
+          ]
+        }
+      ],
+      "action": {
+        "erase": [
+          "test1"
+        ]
+      },
+      "validation": {
+        "modes": [
+          "structural",
+          "execution"
+        ],
+        "coverage": [
+          "X86CgPeephole.RemovesRedundantTest64rr",
+          "X86CgPeephole.KeepsNonRedundantTest64rr",
+          "X86CgPeephole.ExecutionHarnessRemoveRedundantTest64rr"
+        ]
+      }
+    },
+    {
+      "name": "remove-redundant-test32rr",
+      "stage": "instruction",
+      "priority": 105,
+      "pattern": [
+        {
+          "bind": "test1",
+          "opcode": "TEST32rr",
+          "capture": [
+            {
+              "name": "test32_op0",
+              "operand": 0,
+              "field": "reg"
+            },
+            {
+              "name": "test32_op1",
+              "operand": 1,
+              "field": "reg"
+            }
+          ]
+        },
+        {
+          "bind": "test2",
+          "opcode": "TEST32rr",
+          "require": [
+            {
+              "operand": 0,
+              "field": "reg",
+              "equals_capture": "test32_op0"
+            },
+            {
+              "operand": 1,
+              "field": "reg",
+              "equals_capture": "test32_op1"
+            }
+          ]
+        }
+      ],
+      "action": {
+        "erase": [
+          "test1"
+        ]
+      },
+      "validation": {
+        "modes": [
+          "structural",
+          "execution"
+        ],
+        "coverage": [
+          "X86CgPeephole.RemovesRedundantTest32rr",
+          "X86CgPeephole.KeepsNonRedundantTest32rr",
+          "X86CgPeephole.ExecutionHarnessRemoveRedundantTest32rr"
+        ]
+      }
+    },
+    {
+      "name": "remove-redundant-test8rr",
+      "stage": "instruction",
+      "priority": 105,
+      "pattern": [
+        {
+          "bind": "test1",
+          "opcode": "TEST8rr",
+          "capture": [
+            {
+              "name": "test8_op0",
+              "operand": 0,
+              "field": "reg"
+            },
+            {
+              "name": "test8_op1",
+              "operand": 1,
+              "field": "reg"
+            }
+          ]
+        },
+        {
+          "bind": "test2",
+          "opcode": "TEST8rr",
+          "require": [
+            {
+              "operand": 0,
+              "field": "reg",
+              "equals_capture": "test8_op0"
+            },
+            {
+              "operand": 1,
+              "field": "reg",
+              "equals_capture": "test8_op1"
+            }
+          ]
+        }
+      ],
+      "action": {
+        "erase": [
+          "test1"
+        ]
+      },
+      "validation": {
+        "modes": [
+          "structural",
+          "execution"
+        ],
+        "coverage": [
+          "X86CgPeephole.RemovesRedundantTest8rr",
+          "X86CgPeephole.KeepsNonRedundantTest8rr",
+          "X86CgPeephole.ExecutionHarnessRemoveRedundantTestrr"
+        ]
+      }
+    },
+    {
+      "name": "remove-redundant-cmp64rr",
+      "stage": "instruction",
+      "priority": 105,
+      "pattern": [
+        {
+          "bind": "cmp1",
+          "opcode": "CMP64rr",
+          "capture": [
+            {
+              "name": "cmp64_op0",
+              "operand": 0,
+              "field": "reg"
+            },
+            {
+              "name": "cmp64_op1",
+              "operand": 1,
+              "field": "reg"
+            }
+          ]
+        },
+        {
+          "bind": "cmp2",
+          "opcode": "CMP64rr",
+          "require": [
+            {
+              "operand": 0,
+              "field": "reg",
+              "equals_capture": "cmp64_op0"
+            },
+            {
+              "operand": 1,
+              "field": "reg",
+              "equals_capture": "cmp64_op1"
+            }
+          ]
+        }
+      ],
+      "action": {
+        "erase": [
+          "cmp1"
+        ]
+      },
+      "validation": {
+        "modes": [
+          "structural",
+          "execution"
+        ],
+        "coverage": [
+          "X86CgPeephole.RemovesRedundantCmp64rr",
+          "X86CgPeephole.KeepsNonRedundantCmp64rr"
+        ]
+      }
+    },
+    {
+      "name": "remove-redundant-cmp32rr",
+      "stage": "instruction",
+      "priority": 105,
+      "pattern": [
+        {
+          "bind": "cmp1",
+          "opcode": "CMP32rr",
+          "capture": [
+            {
+              "name": "cmp32_op0",
+              "operand": 0,
+              "field": "reg"
+            },
+            {
+              "name": "cmp32_op1",
+              "operand": 1,
+              "field": "reg"
+            }
+          ]
+        },
+        {
+          "bind": "cmp2",
+          "opcode": "CMP32rr",
+          "require": [
+            {
+              "operand": 0,
+              "field": "reg",
+              "equals_capture": "cmp32_op0"
+            },
+            {
+              "operand": 1,
+              "field": "reg",
+              "equals_capture": "cmp32_op1"
+            }
+          ]
+        }
+      ],
+      "action": {
+        "erase": [
+          "cmp1"
+        ]
+      },
+      "validation": {
+        "modes": [
+          "structural",
+          "execution"
+        ],
+        "coverage": [
+          "X86CgPeephole.RemovesRedundantCmp32rr",
+          "X86CgPeephole.KeepsNonRedundantCmp32rr"
+        ]
+      }
+    },
+    {
+      "name": "remove-redundant-cmp8rr",
+      "stage": "instruction",
+      "priority": 105,
+      "pattern": [
+        {
+          "bind": "cmp1",
+          "opcode": "CMP8rr",
+          "capture": [
+            {
+              "name": "cmp8_op0",
+              "operand": 0,
+              "field": "reg"
+            },
+            {
+              "name": "cmp8_op1",
+              "operand": 1,
+              "field": "reg"
+            }
+          ]
+        },
+        {
+          "bind": "cmp2",
+          "opcode": "CMP8rr",
+          "require": [
+            {
+              "operand": 0,
+              "field": "reg",
+              "equals_capture": "cmp8_op0"
+            },
+            {
+              "operand": 1,
+              "field": "reg",
+              "equals_capture": "cmp8_op1"
+            }
+          ]
+        }
+      ],
+      "action": {
+        "erase": [
+          "cmp1"
+        ]
+      },
+      "validation": {
+        "modes": [
+          "structural",
+          "execution"
+        ],
+        "coverage": [
+          "X86CgPeephole.RemovesRedundantCmp8rr",
+          "X86CgPeephole.KeepsNonRedundantCmp8rr"
+        ]
+      }
+    },
+    {
+      "name": "remove-redundant-cmp16rr",
+      "stage": "instruction",
+      "priority": 105,
+      "pattern": [
+        {
+          "bind": "cmp1",
+          "opcode": "CMP16rr",
+          "capture": [
+            {
+              "name": "cmp16_op0",
+              "operand": 0,
+              "field": "reg"
+            },
+            {
+              "name": "cmp16_op1",
+              "operand": 1,
+              "field": "reg"
+            }
+          ]
+        },
+        {
+          "bind": "cmp2",
+          "opcode": "CMP16rr",
+          "require": [
+            {
+              "operand": 0,
+              "field": "reg",
+              "equals_capture": "cmp16_op0"
+            },
+            {
+              "operand": 1,
+              "field": "reg",
+              "equals_capture": "cmp16_op1"
+            }
+          ]
+        }
+      ],
+      "action": {
+        "erase": [
+          "cmp1"
+        ]
+      },
+      "validation": {
+        "modes": [
+          "structural",
+          "execution"
+        ],
+        "coverage": [
+          "X86CgPeephole.RemovesRedundantCmp16rr",
+          "X86CgPeephole.KeepsNonRedundantCmp16rr"
+        ]
+      }
+    },
+    {
+      "name": "remove-redundant-test16rr",
+      "stage": "instruction",
+      "priority": 105,
+      "pattern": [
+        {
+          "bind": "test1",
+          "opcode": "TEST16rr",
+          "capture": [
+            {
+              "name": "test16_op0",
+              "operand": 0,
+              "field": "reg"
+            },
+            {
+              "name": "test16_op1",
+              "operand": 1,
+              "field": "reg"
+            }
+          ]
+        },
+        {
+          "bind": "test2",
+          "opcode": "TEST16rr",
+          "require": [
+            {
+              "operand": 0,
+              "field": "reg",
+              "equals_capture": "test16_op0"
+            },
+            {
+              "operand": 1,
+              "field": "reg",
+              "equals_capture": "test16_op1"
+            }
+          ]
+        }
+      ],
+      "action": {
+        "erase": [
+          "test1"
+        ]
+      },
+      "validation": {
+        "modes": [
+          "structural",
+          "execution"
+        ],
+        "coverage": [
+          "X86CgPeephole.RemovesRedundantTest16rr",
+          "X86CgPeephole.KeepsNonRedundantTest16rr"
+        ]
+      }
+    },
+    {
+      "name": "remove-fallthrough-jump",
+      "stage": "block_end",
+      "priority": 100,
+      "pattern": [
+        {
+          "bind": "jmp",
+          "predicate": "isUnconditionalBranch",
+          "require": [
+            {
+              "operand": 0,
+              "field": "is_mbb",
+              "equals_bool": true
+            }
+          ]
+        }
+      ],
+      "when": [
+        {
+          "kind": "target_is_next_block",
+          "inst": "jmp",
+          "operand": 0
+        }
+      ],
+      "action": {
+        "erase": [
+          "jmp"
+        ]
+      },
+      "validation": {
+        "modes": [
+          "structural",
+          "execution"
+        ],
+        "coverage": [
+          "X86CgPeephole.RemovesFallthroughJump",
+          "X86CgPeephole.ExecutionHarnessRemoveFallthroughJump"
+        ]
+      }
+    }
+  ]
+}
diff --git a/src/compiler/target/x86/x86lowering.cpp b/src/compiler/target/x86/x86lowering.cpp
index e672eb938..c1557dd2d 100644
--- a/src/compiler/target/x86/x86lowering.cpp
+++ b/src/compiler/target/x86/x86lowering.cpp
@@ -1014,16 +1014,18 @@ CgRegister X86CgLowering::lowerAdcExpr(const AdcInstruction &Inst) {
   // Use x86 flags with direct ADC and rely on the existing carry chain.
   // The required invariant is that no flag-clobbering instruction is emitted
   // between the ADD/ADC instructions that produce and consume CF.
+  //
+  // Operand 2 is a chain link pointing to the carry-producing instruction and
+  // is metadata for analysis passes only. x86 lowering ignores it and relies
+  // on hardware CF. This is not a license to rewrite ADC into ADD: the
+  // carry chain is still live and must be preserved.
   const MInstruction *LHS = Inst.getOperand<0>();
   const MInstruction *RHS = Inst.getOperand<1>();
-  const MInstruction *Carry = Inst.getOperand<2>();
 
   MVT VT = getMVT(*Inst.getType());
   ZEN_ASSERT(VT.isInteger());
   const TargetRegisterClass *RC = TLI.getRegClassFor(VT);
 
-  assertZeroFlagChainOperand(Carry);
-
   CgRegister LHSReg = lowerExpr(*LHS);
   CgRegister RHSReg = lowerExpr(*RHS);
 
@@ -1056,16 +1058,18 @@ CgRegister X86CgLowering::lowerSbbExpr(const SbbInstruction &Inst) {
   // Use x86 flags with direct SBB and rely on the existing borrow chain.
   // The required invariant is that no flag-clobbering instruction is emitted
   // between the SUB/SBB instructions that produce and consume CF.
+  //
+  // Operand 2 is a chain link pointing to the borrow-producing instruction and
+  // is metadata for analysis passes only. x86 lowering ignores it and relies
+  // on hardware CF. This is not a license to rewrite SBB into SUB: the
+  // borrow chain is still live and must be preserved.
   const MInstruction *LHS = Inst.getOperand<0>();
   const MInstruction *RHS = Inst.getOperand<1>();
-  const MInstruction *Borrow = Inst.getOperand<2>();
 
   MVT VT = getMVT(*Inst.getType());
   ZEN_ASSERT(VT.isInteger());
   const TargetRegisterClass *RC = TLI.getRegClassFor(VT);
 
-  assertZeroFlagChainOperand(Borrow);
-
   CgRegister LHSReg = lowerExpr(*LHS);
   CgRegister RHSReg = lowerExpr(*RHS);
 
@@ -1328,6 +1332,86 @@ CgRegister X86CgLowering::lowerEvmU256MulResultExpr(
   return It->second[ResultIdx - 1];
 }
 
+CgRegister
+X86CgLowering::lowerEvmU256AddExpr(const EvmU256AddInstruction &Inst) {
+  const TargetRegisterClass *RC = &X86::GR64RegClass;
+
+  std::array<CgRegister, 4> L = {};
+  std::array<CgRegister, 4> R = {};
+  for (size_t I = 0; I < 4; ++I) {
+    L[I] = lowerExpr(*Inst.getOperand(I));
+    R[I] = lowerExpr(*Inst.getOperand(4 + I));
+  }
+
+  // COPY (MOV) does not modify EFLAGS — safe between carry-chain steps
+  std::array<CgRegister, 4> Res = {};
+  Res[0] = fastEmitCopy(RC, L[0]);
+  MF->createCgInstruction(*CurBB, TII.get(X86::ADD64rr), Res[0], R[0], Res[0]);
+  for (size_t I = 1; I < 4; ++I) {
+    Res[I] = fastEmitCopy(RC, L[I]);
+    MF->createCgInstruction(*CurBB, TII.get(X86::ADC64rr), Res[I], R[I],
+                            Res[I]);
+  }
+
+  U256AddResultRegs[&Inst] = {Res[1], Res[2], Res[3]};
+  return Res[0];
+}
+
+CgRegister X86CgLowering::lowerEvmU256AddResultExpr(
+    const EvmU256AddResultInstruction &Inst) {
+  const MInstruction *AddInst = Inst.getAddInst();
+  CgRegister LowReg = lowerExpr(*AddInst);
+  uint32_t ResultIdx = Inst.getResultIdx();
+  if (ResultIdx == 0) {
+    return LowReg;
+  }
+
+  auto It = U256AddResultRegs.find(AddInst);
+  ZEN_ASSERT(It != U256AddResultRegs.end());
+  ZEN_ASSERT(ResultIdx <= It->second.size());
+  return It->second[ResultIdx - 1];
+}
+
+CgRegister
+X86CgLowering::lowerEvmU256SubExpr(const EvmU256SubInstruction &Inst) {
+  const TargetRegisterClass *RC = &X86::GR64RegClass;
+
+  std::array<CgRegister, 4> L = {};
+  std::array<CgRegister, 4> R = {};
+  for (size_t I = 0; I < 4; ++I) {
+    L[I] = lowerExpr(*Inst.getOperand(I));
+    R[I] = lowerExpr(*Inst.getOperand(4 + I));
+  }
+
+  // COPY (MOV) does not modify EFLAGS — safe between borrow-chain steps
+  std::array<CgRegister, 4> Res = {};
+  Res[0] = fastEmitCopy(RC, L[0]);
+  MF->createCgInstruction(*CurBB, TII.get(X86::SUB64rr), Res[0], R[0], Res[0]);
+  for (size_t I = 1; I < 4; ++I) {
+    Res[I] = fastEmitCopy(RC, L[I]);
+    MF->createCgInstruction(*CurBB, TII.get(X86::SBB64rr), Res[I], R[I],
+                            Res[I]);
+  }
+
+  U256SubResultRegs[&Inst] = {Res[1], Res[2], Res[3]};
+  return Res[0];
+}
+
+CgRegister X86CgLowering::lowerEvmU256SubResultExpr(
+    const EvmU256SubResultInstruction &Inst) {
+  const MInstruction *SubInst = Inst.getSubInst();
+  CgRegister LowReg = lowerExpr(*SubInst);
+  uint32_t ResultIdx = Inst.getResultIdx();
+  if (ResultIdx == 0) {
+    return LowReg;
+  }
+
+  auto It = U256SubResultRegs.find(SubInst);
+  ZEN_ASSERT(It != U256SubResultRegs.end());
+  ZEN_ASSERT(ResultIdx <= It->second.size());
+  return It->second[ResultIdx - 1];
+}
+
 CgRegister
 X86CgLowering::lowerEvmUdiv128By64Expr(const EvmUdiv128By64Instruction &Inst) {
   const MInstruction *Hi = Inst.getOperand<0>();
diff --git a/src/compiler/target/x86/x86lowering.h b/src/compiler/target/x86/x86lowering.h
index b29bef3a8..70406620b 100644
--- a/src/compiler/target/x86/x86lowering.h
+++ b/src/compiler/target/x86/x86lowering.h
@@ -75,6 +75,10 @@ class X86CgLowering : public CgLowering<X86CgLowering> {
   CgRegister lowerEvmUmul128HiExpr(const EvmUmul128HiInstruction &Inst);
   CgRegister lowerEvmU256MulExpr(const EvmU256MulInstruction &Inst);
   CgRegister lowerEvmU256MulResultExpr(const EvmU256MulResultInstruction &Inst);
+  CgRegister lowerEvmU256AddExpr(const EvmU256AddInstruction &Inst);
+  CgRegister lowerEvmU256AddResultExpr(const EvmU256AddResultInstruction &Inst);
+  CgRegister lowerEvmU256SubExpr(const EvmU256SubInstruction &Inst);
+  CgRegister lowerEvmU256SubResultExpr(const EvmU256SubResultInstruction &Inst);
   CgRegister lowerEvmUdiv128By64Expr(const EvmUdiv128By64Instruction &Inst);
   CgRegister lowerEvmUrem128By64Expr(const EvmUrem128By64Instruction &Inst);
   CgRegister lowerAdcExpr(const AdcInstruction &Inst);
@@ -154,6 +158,10 @@ class X86CgLowering : public CgLowering<X86CgLowering> {
   llvm::DenseSet<const MInstruction *> Umul128NeedHi;
   llvm::DenseMap<const MInstruction *, std::array<CgRegister, 3>>
       U256MulResultRegs;
+  llvm::DenseMap<const MInstruction *, std::array<CgRegister, 3>>
+      U256AddResultRegs;
+  llvm::DenseMap<const MInstruction *, std::array<CgRegister, 3>>
+      U256SubResultRegs;
   llvm::DenseMap<const MInstruction *, CgRegister> Udiv128RemRegs;
 };
 
diff --git a/src/tests/CMakeLists.txt b/src/tests/CMakeLists.txt
index 5d28aa60d..f4e1f5cdd 100644
--- a/src/tests/CMakeLists.txt
+++ b/src/tests/CMakeLists.txt
@@ -53,6 +53,10 @@ if(ZEN_ENABLE_SPEC_TEST)
 
   add_executable(mempoolTests mempool_tests.cpp)
   add_executable(cAPITests c_api_tests.cpp)
+  if(ZEN_ENABLE_MULTIPASS_JIT)
+    add_executable(x86CgPeepholeTests x86_cg_peephole_tests.cpp)
+    add_executable(dmirValidationTests dmir_validation_tests.cpp)
+  endif()
   target_include_directories(
     mempoolTests PRIVATE ${googletest_SOURCE_DIR}/googletest/include
   )
@@ -116,6 +120,13 @@ if(ZEN_ENABLE_SPEC_TEST)
         PRIVATE dtvmcore gtest_main -fsanitize=address
         PUBLIC ${GTEST_BOTH_LIBRARIES}
       )
+      if(ZEN_ENABLE_MULTIPASS_JIT)
+        target_link_libraries(
+          x86CgPeepholeTests
+          PRIVATE compiler dtvmcore gtest_main -fsanitize=address
+          PUBLIC ${GTEST_BOTH_LIBRARIES}
+        )
+      endif()
 
       if(ZEN_ENABLE_EVM)
         target_link_libraries(
@@ -162,6 +173,20 @@ if(ZEN_ENABLE_SPEC_TEST)
         PRIVATE dtvmcore gtest_main -fsanitize=address -static-libasan
         PUBLIC ${GTEST_BOTH_LIBRARIES}
       )
+      if(ZEN_ENABLE_MULTIPASS_JIT)
+        target_link_libraries(
+          x86CgPeepholeTests
+          PRIVATE compiler dtvmcore gtest_main -fsanitize=address
+                  -static-libasan
+          PUBLIC ${GTEST_BOTH_LIBRARIES}
+        )
+        target_link_libraries(
+          dmirValidationTests
+          PRIVATE compiler dtvmcore gtest_main -fsanitize=address
+                  -static-libasan
+          PUBLIC ${GTEST_BOTH_LIBRARIES}
+        )
+      endif()
 
       if(ZEN_ENABLE_EVM)
         target_link_libraries(
@@ -227,6 +252,18 @@ if(ZEN_ENABLE_SPEC_TEST)
       PRIVATE dtvmcore gtest_main
       PUBLIC ${GTEST_BOTH_LIBRARIES}
     )
+    if(ZEN_ENABLE_MULTIPASS_JIT)
+      target_link_libraries(
+        x86CgPeepholeTests
+        PRIVATE compiler dtvmcore gtest_main
+        PUBLIC ${GTEST_BOTH_LIBRARIES}
+      )
+      target_link_libraries(
+        dmirValidationTests
+        PRIVATE compiler dtvmcore gtest_main
+        PUBLIC ${GTEST_BOTH_LIBRARIES}
+      )
+    endif()
 
     if(ZEN_ENABLE_EVM)
       target_link_libraries(
@@ -273,6 +310,89 @@ if(ZEN_ENABLE_SPEC_TEST)
   endif()
   add_test(NAME mempoolTests COMMAND mempoolTests)
   add_test(NAME cAPITests COMMAND cAPITests)
+  if(ZEN_ENABLE_MULTIPASS_JIT)
+    add_test(NAME x86CgPeepholeTests COMMAND x86CgPeepholeTests)
+    add_test(NAME dmirValidationTests COMMAND dmirValidationTests)
+    add_test(
+      NAME x86CgPeepholeRuleGen
+      COMMAND
+        ${Python3_EXECUTABLE}
+        ${CMAKE_SOURCE_DIR}/tools/test_x86_cg_peephole_generator.py
+        ${CMAKE_SOURCE_DIR}
+    )
+    add_test(
+      NAME x86CgPeepholeValidationMeta
+      COMMAND
+        ${Python3_EXECUTABLE}
+        ${CMAKE_SOURCE_DIR}/tools/test_x86_cg_peephole_validation.py
+        ${CMAKE_SOURCE_DIR} $<TARGET_FILE:x86CgPeepholeTests>
+    )
+    add_test(
+      NAME x86CgPeepholeValidationReport
+      COMMAND
+        ${Python3_EXECUTABLE}
+        ${CMAKE_SOURCE_DIR}/tools/test_report_x86_cg_peephole_validation.py
+        ${CMAKE_SOURCE_DIR} $<TARGET_FILE:x86CgPeepholeTests>
+    )
+    add_test(
+      NAME dmirRewriteRuleMeta
+      COMMAND
+        ${Python3_EXECUTABLE}
+        ${CMAKE_SOURCE_DIR}/tools/test_check_dmir_rewrite_rules.py
+        ${CMAKE_SOURCE_DIR} $<TARGET_FILE:dmirValidationTests>
+    )
+    add_test(
+      NAME dmirRewriteRuleReport
+      COMMAND
+        ${Python3_EXECUTABLE}
+        ${CMAKE_SOURCE_DIR}/tools/test_report_dmir_rewrite_rules.py
+        ${CMAKE_SOURCE_DIR} $<TARGET_FILE:dmirValidationTests>
+    )
+    add_test(
+      NAME dmirSeedRuleMiner
+      COMMAND
+        ${Python3_EXECUTABLE}
+        ${CMAKE_SOURCE_DIR}/tools/test_mine_dmir_seed_rules.py
+        ${CMAKE_SOURCE_DIR}
+    )
+    add_test(
+      NAME dmirBootstrapMinerConfig
+      COMMAND
+        ${Python3_EXECUTABLE}
+        ${CMAKE_SOURCE_DIR}/tools/test_mine_dmir_bootstrap_config.py
+        ${CMAKE_SOURCE_DIR}
+    )
+    add_test(
+      NAME dmirNovelRuleMiner
+      COMMAND
+        ${Python3_EXECUTABLE}
+        ${CMAKE_SOURCE_DIR}/tools/test_mine_dmir_novel_rules.py
+        ${CMAKE_SOURCE_DIR}
+    )
+    if(ZEN_ENABLE_EVM)
+      add_test(
+        NAME compilerPassTimingTool
+        COMMAND
+          ${Python3_EXECUTABLE}
+          ${CMAKE_SOURCE_DIR}/tools/test_collect_compiler_pass_timings.py
+          ${CMAKE_SOURCE_DIR} $<TARGET_FILE:dtvm>
+      )
+      add_test(
+        NAME compilerPassTimingBudgetTool
+        COMMAND
+          ${Python3_EXECUTABLE}
+          ${CMAKE_SOURCE_DIR}/tools/test_check_compiler_pass_timing_budget.py
+          ${CMAKE_SOURCE_DIR}
+      )
+      add_test(
+        NAME compilerPassTimingBudgetRefreshTool
+        COMMAND
+          ${Python3_EXECUTABLE}
+          ${CMAKE_SOURCE_DIR}/tools/test_update_compiler_pass_timing_budget.py
+          ${CMAKE_SOURCE_DIR}
+      )
+    endif()
+  endif()
 
   if(ZEN_ENABLE_EVM)
     add_test(NAME evmInterpTests COMMAND evmInterpTests)
diff --git a/src/tests/dmir_validation_tests.cpp b/src/tests/dmir_validation_tests.cpp
new file mode 100644
index 000000000..521ad4582
--- /dev/null
+++ b/src/tests/dmir_validation_tests.cpp
@@ -0,0 +1,2349 @@
+// Copyright (C) 2025 the DTVM authors. All Rights Reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+#include "compiler/context.h"
+#include "compiler/mir/constants.h"
+#include "compiler/mir/function.h"
+#include "compiler/mir/instructions.h"
+#include "compiler/mir/pass/dmir_rewrite.h"
+#include "compiler/mir/pointer.h"
+#include "intx/intx.hpp"
+
+#include <array>
+#include <cstdint>
+#include <gtest/gtest.h>
+#include <optional>
+#include <random>
+#include <stdexcept>
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+namespace {
+
+using namespace COMPILER;
+using namespace llvm;
+
+MFunctionType *createVoidFunctionType(CompileContext &Context) {
+  return MFunctionType::create(Context, Context.VoidType, {});
+}
+
+class DMirTestBuilder {
+public:
+  DMirTestBuilder() : Func(Context, 0) {
+    Context.initialize();
+    Func.setFunctionType(createVoidFunctionType(Context));
+    BB = Func.createBasicBlock();
+    Func.appendBlock(BB);
+    I64PtrType = MPointerType::create(Context, Context.I64Type);
+  }
+
+  ConstantInstruction *createConstI8(uint64_t Value) {
+    return createConst(Context.I8Type, Value);
+  }
+
+  ConstantInstruction *createConstI32(uint64_t Value) {
+    return createConst(Context.I32Type, Value);
+  }
+
+  ConstantInstruction *createConstI64(uint64_t Value) {
+    return createConst(Context.I64Type, Value);
+  }
+
+  template <class T, typename... Arguments> T *createExpr(Arguments &&...Args) {
+    return Func.createInstruction<T>(false, *BB,
+                                     std::forward<Arguments>(Args)...);
+  }
+
+  template <class T, typename... Arguments> T *createStmt(Arguments &&...Args) {
+    return Func.createInstruction<T>(true, *BB,
+                                     std::forward<Arguments>(Args)...);
+  }
+
+  Variable *createVariable(MType *Type) { return Func.createVariable(Type); }
+
+  MBasicBlock &getBlock() { return *BB; }
+
+  CompileContext Context;
+  MFunction Func;
+  MPointerType *I64PtrType = nullptr;
+
+private:
+  ConstantInstruction *createConst(MType &Type, uint64_t Value) {
+    return createExpr<ConstantInstruction>(
+        &Type, *MConstantInt::get(Context, Type, Value));
+  }
+
+  MBasicBlock *BB = nullptr;
+};
+
+class DMirFragmentInterpreter {
+public:
+  void setVariableValue(VariableIdx VarIdx, const APInt &Value) {
+    Variables[VarIdx] = Value;
+  }
+
+  APInt evaluate(const MInstruction *Inst) {
+    switch (Inst->getOpcode()) {
+    case OP_const:
+      return evaluateConstant(cast<ConstantInstruction>(Inst));
+    case OP_dread:
+      return evaluateDread(cast<DreadInstruction>(Inst));
+    case OP_not:
+      return ~evaluate(Inst->getOperand<0>());
+    case OP_clz:
+      return createScalarResult(
+          *Inst->getType(),
+          evaluate(Inst->getOperand<0>()).countLeadingZeros());
+    case OP_ctz:
+      return createScalarResult(
+          *Inst->getType(),
+          evaluate(Inst->getOperand<0>()).countTrailingZeros());
+    case OP_popcnt:
+      return createScalarResult(
+          *Inst->getType(), evaluate(Inst->getOperand<0>()).countPopulation());
+    case OP_bswap:
+      return evaluate(Inst->getOperand<0>()).byteSwap();
+    case OP_add:
+      return evaluate(Inst->getOperand<0>()) + evaluate(Inst->getOperand<1>());
+    case OP_sub:
+      return evaluate(Inst->getOperand<0>()) - evaluate(Inst->getOperand<1>());
+    case OP_mul:
+      return evaluate(Inst->getOperand<0>()) * evaluate(Inst->getOperand<1>());
+    case OP_sdiv:
+      return evaluateDiv(Inst, true, false);
+    case OP_udiv:
+      return evaluateDiv(Inst, false, false);
+    case OP_srem:
+      return evaluateDiv(Inst, true, true);
+    case OP_urem:
+      return evaluateDiv(Inst, false, true);
+    case OP_and:
+      return evaluate(Inst->getOperand<0>()) & evaluate(Inst->getOperand<1>());
+    case OP_or:
+      return evaluate(Inst->getOperand<0>()) | evaluate(Inst->getOperand<1>());
+    case OP_xor:
+      return evaluate(Inst->getOperand<0>()) ^ evaluate(Inst->getOperand<1>());
+    case OP_shl:
+      return evaluateShift(Inst, ShiftKind::Left);
+    case OP_sshr:
+      return evaluateShift(Inst, ShiftKind::ArithmeticRight);
+    case OP_ushr:
+      return evaluateShift(Inst, ShiftKind::LogicalRight);
+    case OP_rotl:
+      return evaluateRotate(Inst, true);
+    case OP_rotr:
+      return evaluateRotate(Inst, false);
+    case OP_trunc:
+      return evaluate(Inst->getOperand<0>())
+          .trunc(getBitWidth(*Inst->getType()));
+    case OP_sext:
+      return evaluate(Inst->getOperand<0>())
+          .sext(getBitWidth(*Inst->getType()));
+    case OP_uext:
+      return evaluate(Inst->getOperand<0>())
+          .zext(getBitWidth(*Inst->getType()));
+    case OP_inttoptr:
+    case OP_ptrtoint:
+    case OP_bitcast:
+      return evaluate(Inst->getOperand<0>())
+          .zextOrTrunc(getBitWidth(*Inst->getType()));
+    case OP_cmp:
+      return evaluateCmp(cast<CmpInstruction>(Inst));
+    case OP_select:
+      return evaluateSelect(cast<SelectInstruction>(Inst));
+    case OP_adc:
+      return evaluateAdc(cast<AdcInstruction>(Inst));
+    case OP_sbb:
+      return evaluateSbb(cast<SbbInstruction>(Inst));
+    case OP_evm_umul128_lo:
+      return createScalarResult(
+          *Inst->getType(),
+          evaluateUmul128(cast<EvmUmul128Instruction>(Inst)).first);
+    case OP_evm_umul128_hi:
+      return createScalarResult(
+          *Inst->getType(),
+          evaluateUmul128Hi(cast<EvmUmul128HiInstruction>(Inst)));
+    case OP_evm_udiv128_by64:
+      return createScalarResult(
+          *Inst->getType(),
+          evaluateUdiv128By64(cast<EvmUdiv128By64Instruction>(Inst)).first);
+    case OP_evm_urem128_by64:
+      return createScalarResult(
+          *Inst->getType(),
+          evaluateUrem128By64(cast<EvmUrem128By64Instruction>(Inst)));
+    default:
+      throw std::runtime_error("unsupported dMIR opcode: " +
+                               getOpcodeString(Inst->getOpcode()));
+    }
+  }
+
+  std::optional<APInt> execute(MBasicBlock &BB) {
+    for (auto *Inst : BB) {
+      switch (Inst->getOpcode()) {
+      case OP_dassign: {
+        auto *Dassign = cast<DassignInstruction>(Inst);
+        Variables[Dassign->getVarIdx()] = evaluate(Dassign->getOperand<0>());
+        break;
+      }
+      case OP_return:
+        if (Inst->getType()->isVoid()) {
+          return std::nullopt;
+        }
+        return evaluate(Inst->getOperand<0>());
+      default:
+        throw std::runtime_error("unsupported dMIR statement: " +
+                                 getOpcodeString(Inst->getOpcode()));
+      }
+    }
+    return std::nullopt;
+  }
+
+private:
+  enum class ShiftKind : uint8_t {
+    Left,
+    ArithmeticRight,
+    LogicalRight,
+  };
+
+  static unsigned getBitWidth(const MType &Type) {
+    if (Type.isInteger()) {
+      return Type.getBitWidth();
+    }
+    if (Type.isPointer()) {
+      return Type.getNumBytes() * 8;
+    }
+    throw std::runtime_error("unsupported dMIR value type");
+  }
+
+  static APInt createScalarResult(const MType &Type, uint64_t Value) {
+    return APInt(getBitWidth(Type), Value, Type.isInteger() && Type.isSigned());
+  }
+
+  APInt evaluateConstant(const ConstantInstruction *Inst) {
+    const auto &Constant = Inst->getConstant();
+    if (!Constant.getType().isInteger()) {
+      throw std::runtime_error("unsupported non-integer dMIR constant");
+    }
+    return cast<MConstantInt>(&Constant)->getValue();
+  }
+
+  APInt evaluateDread(const DreadInstruction *Inst) {
+    auto It = Variables.find(Inst->getVarIdx());
+    if (It == Variables.end()) {
+      throw std::runtime_error("dMIR variable was read before assignment");
+    }
+    return It->second;
+  }
+
+  APInt evaluateDiv(const MInstruction *Inst, bool Signed, bool Remainder) {
+    APInt Lhs = evaluate(Inst->getOperand<0>());
+    APInt Rhs = evaluate(Inst->getOperand<1>());
+    if (Rhs.isZero()) {
+      throw std::runtime_error("division by zero in dMIR fragment");
+    }
+    if (Signed) {
+      return Remainder ? Lhs.srem(Rhs) : Lhs.sdiv(Rhs);
+    }
+    return Remainder ? Lhs.urem(Rhs) : Lhs.udiv(Rhs);
+  }
+
+  APInt evaluateShift(const MInstruction *Inst, ShiftKind Kind) {
+    APInt Value = evaluate(Inst->getOperand<0>());
+    const unsigned BitWidth = Value.getBitWidth();
+    const uint64_t Amount = evaluate(Inst->getOperand<1>()).getLimitedValue();
+    if (Amount >= BitWidth) {
+      if (Kind == ShiftKind::ArithmeticRight && Value.isNegative()) {
+        return APInt::getAllOnes(BitWidth);
+      }
+      return APInt::getZero(BitWidth);
+    }
+    switch (Kind) {
+    case ShiftKind::Left:
+      return Value.shl(Amount);
+    case ShiftKind::ArithmeticRight:
+      return Value.ashr(Amount);
+    case ShiftKind::LogicalRight:
+      return Value.lshr(Amount);
+    }
+    llvm_unreachable("unknown shift kind");
+  }
+
+  APInt evaluateRotate(const MInstruction *Inst, bool Left) {
+    APInt Value = evaluate(Inst->getOperand<0>());
+    const unsigned BitWidth = Value.getBitWidth();
+    const uint64_t Amount = evaluate(Inst->getOperand<1>()).getLimitedValue();
+    const unsigned EffectiveAmount =
+        BitWidth == 0 ? 0 : static_cast<unsigned>(Amount % BitWidth);
+    return Left ? Value.rotl(EffectiveAmount) : Value.rotr(EffectiveAmount);
+  }
+
+  APInt evaluateCmp(const CmpInstruction *Inst) {
+    APInt Lhs = evaluate(Inst->getOperand<0>());
+    APInt Rhs = evaluate(Inst->getOperand<1>());
+    bool Result = false;
+    switch (Inst->getPredicate()) {
+    case CmpInstruction::ICMP_EQ:
+      Result = Lhs == Rhs;
+      break;
+    case CmpInstruction::ICMP_NE:
+      Result = Lhs != Rhs;
+      break;
+    case CmpInstruction::ICMP_UGT:
+      Result = Lhs.ugt(Rhs);
+      break;
+    case CmpInstruction::ICMP_UGE:
+      Result = Lhs.uge(Rhs);
+      break;
+    case CmpInstruction::ICMP_ULT:
+      Result = Lhs.ult(Rhs);
+      break;
+    case CmpInstruction::ICMP_ULE:
+      Result = Lhs.ule(Rhs);
+      break;
+    case CmpInstruction::ICMP_SGT:
+      Result = Lhs.sgt(Rhs);
+      break;
+    case CmpInstruction::ICMP_SGE:
+      Result = Lhs.sge(Rhs);
+      break;
+    case CmpInstruction::ICMP_SLT:
+      Result = Lhs.slt(Rhs);
+      break;
+    case CmpInstruction::ICMP_SLE:
+      Result = Lhs.sle(Rhs);
+      break;
+    default:
+      throw std::runtime_error("unsupported dMIR predicate");
+    }
+    return createScalarResult(*Inst->getType(), Result ? 1 : 0);
+  }
+
+  APInt evaluateSelect(const SelectInstruction *Inst) {
+    APInt Cond = evaluate(Inst->getOperand<0>());
+    return evaluate(Cond.isZero() ? Inst->getOperand<2>()
+                                  : Inst->getOperand<1>());
+  }
+
+  APInt evaluateAdc(const AdcInstruction *Inst) {
+    APInt Lhs = evaluate(Inst->getOperand<0>());
+    APInt Rhs = evaluate(Inst->getOperand<1>());
+    APInt Carry =
+        evaluate(Inst->getOperand<2>()).zextOrTrunc(Lhs.getBitWidth());
+    return Lhs + Rhs + Carry;
+  }
+
+  APInt evaluateSbb(const SbbInstruction *Inst) {
+    APInt Lhs = evaluate(Inst->getOperand<0>());
+    APInt Rhs = evaluate(Inst->getOperand<1>());
+    APInt Borrow =
+        evaluate(Inst->getOperand<2>()).zextOrTrunc(Lhs.getBitWidth());
+    return Lhs - Rhs - Borrow;
+  }
+
+  std::pair<uint64_t, uint64_t>
+  evaluateUmul128(const EvmUmul128Instruction *Inst) {
+    const uint64_t Lhs = evaluateUnsigned64(Inst->getOperand<0>());
+    const uint64_t Rhs = evaluateUnsigned64(Inst->getOperand<1>());
+    const unsigned __int128 Product = static_cast<unsigned __int128>(Lhs) *
+                                      static_cast<unsigned __int128>(Rhs);
+    return {static_cast<uint64_t>(Product),
+            static_cast<uint64_t>(Product >> 64)};
+  }
+
+  uint64_t evaluateUmul128Hi(const EvmUmul128HiInstruction *Inst) {
+    return evaluateUmul128(cast<EvmUmul128Instruction>(Inst->getOperand<0>()))
+        .second;
+  }
+
+  std::pair<uint64_t, uint64_t>
+  evaluateUdiv128By64(const EvmUdiv128By64Instruction *Inst) {
+    const uint64_t Hi = evaluateUnsigned64(Inst->getOperand<0>());
+    const uint64_t Lo = evaluateUnsigned64(Inst->getOperand<1>());
+    const uint64_t Divisor = evaluateUnsigned64(Inst->getOperand<2>());
+    if (Divisor == 0) {
+      throw std::runtime_error("128/64 division by zero in dMIR fragment");
+    }
+    const unsigned __int128 Dividend =
+        (static_cast<unsigned __int128>(Hi) << 64) | Lo;
+    return {static_cast<uint64_t>(Dividend / Divisor),
+            static_cast<uint64_t>(Dividend % Divisor)};
+  }
+
+  uint64_t evaluateUrem128By64(const EvmUrem128By64Instruction *Inst) {
+    return evaluateUdiv128By64(
+               cast<EvmUdiv128By64Instruction>(Inst->getOperand<0>()))
+        .second;
+  }
+
+  uint64_t evaluateUnsigned64(const MInstruction *Inst) {
+    return evaluate(Inst).zextOrTrunc(64).getZExtValue();
+  }
+
+  std::unordered_map<VariableIdx, APInt> Variables;
+};
+
+intx::uint256 composeU256(const std::array<uint64_t, 4> &Limbs) {
+  intx::uint256 Value = Limbs[0];
+  Value |= intx::uint256(Limbs[1]) << 64;
+  Value |= intx::uint256(Limbs[2]) << 128;
+  Value |= intx::uint256(Limbs[3]) << 192;
+  return Value;
+}
+
+struct BinaryInputCase {
+  uint64_t Lhs = 0;
+  uint64_t Rhs = 0;
+};
+
+struct TernaryInputCase {
+  uint64_t First = 0;
+  uint64_t Second = 0;
+  uint64_t Third = 0;
+};
+
+const std::array<uint64_t, 12> &getBoundaryU64Values() {
+  static const std::array<uint64_t, 12> Values = {
+      0ULL,
+      1ULL,
+      2ULL,
+      3ULL,
+      0x7fffffffULL,
+      0x80000000ULL,
+      0xffffffffULL,
+      0x100000000ULL,
+      0x7fffffffffffffffULL,
+      0x8000000000000000ULL,
+      0xfffffffffffffffeULL,
+      0xffffffffffffffffULL,
+  };
+  return Values;
+}
+
+const std::vector<uint64_t> &getInterestingU64Values() {
+  static const std::vector<uint64_t> Values = []() {
+    std::vector<uint64_t> Result = {
+        0ULL,
+        1ULL,
+        2ULL,
+        3ULL,
+        7ULL,
+        8ULL,
+        15ULL,
+        16ULL,
+        31ULL,
+        32ULL,
+        63ULL,
+        64ULL,
+        65ULL,
+        127ULL,
+        128ULL,
+        255ULL,
+        256ULL,
+        0xaaaaaaaaaaaaaaaaULL,
+        0x5555555555555555ULL,
+        0x8000000000000000ULL,
+        0x7fffffffffffffffULL,
+        0xfffffffffffffffeULL,
+        0xffffffffffffffffULL,
+    };
+
+    std::mt19937_64 Rng(0x44d7a5f3e219c8b1ULL);
+    for (size_t I = 0; I < 8; ++I) {
+      Result.push_back(Rng());
+    }
+    return Result;
+  }();
+  return Values;
+}
+
+std::vector<BinaryInputCase> getInterestingBinaryInputCases() {
+  std::vector<BinaryInputCase> Cases;
+  for (uint64_t Lhs : getBoundaryU64Values()) {
+    for (uint64_t Rhs : getBoundaryU64Values()) {
+      Cases.push_back({Lhs, Rhs});
+    }
+  }
+
+  std::mt19937_64 Rng(0x93ad71b6ce204f55ULL);
+  for (size_t I = 0; I < 96; ++I) {
+    Cases.push_back({Rng(), Rng()});
+  }
+  return Cases;
+}
+
+std::vector<TernaryInputCase> getInterestingTernaryInputCases() {
+  std::vector<TernaryInputCase> Cases;
+  for (uint64_t First : getBoundaryU64Values()) {
+    for (uint64_t Second : getBoundaryU64Values()) {
+      for (uint64_t Third : getBoundaryU64Values()) {
+        Cases.push_back({First, Second, Third});
+      }
+    }
+  }
+
+  std::mt19937_64 Rng(0x7bf8c9ae1304d261ULL);
+  for (size_t I = 0; I < 128; ++I) {
+    Cases.push_back({Rng(), Rng(), Rng()});
+  }
+  return Cases;
+}
+
+void expectI64Equivalent(const APInt &Original, const APInt &Rewritten,
+                         const std::string &Context) {
+  ASSERT_EQ(Original.getBitWidth(), 64U) << Context;
+  ASSERT_EQ(Rewritten.getBitWidth(), 64U) << Context;
+  EXPECT_TRUE(Original == Rewritten)
+      << Context << " original=" << Original.getZExtValue()
+      << " rewritten=" << Rewritten.getZExtValue();
+}
+
+template <typename OriginalBuilder, typename RewrittenBuilder>
+void expectUnaryI64RewriteEquivalent(const std::vector<uint64_t> &Values,
+                                     OriginalBuilder &&BuildOriginal,
+                                     RewrittenBuilder &&BuildRewritten) {
+  for (uint64_t Value : Values) {
+    DMirTestBuilder Builder;
+    Variable *InputVar = Builder.createVariable(&Builder.Context.I64Type);
+    auto *Input = Builder.createExpr<DreadInstruction>(&Builder.Context.I64Type,
+                                                       InputVar->getVarIdx());
+    auto *Original = BuildOriginal(Builder, Input);
+    auto *Rewritten = BuildRewritten(Builder, Input);
+
+    DMirFragmentInterpreter Interpreter;
+    Interpreter.setVariableValue(InputVar->getVarIdx(), APInt(64, Value));
+    expectI64Equivalent(Interpreter.evaluate(Original),
+                        Interpreter.evaluate(Rewritten),
+                        "value=" + std::to_string(Value));
+  }
+}
+
+template <typename OriginalBuilder, typename RewrittenBuilder>
+void expectBinaryI64RewriteEquivalent(const std::vector<BinaryInputCase> &Cases,
+                                      OriginalBuilder &&BuildOriginal,
+                                      RewrittenBuilder &&BuildRewritten) {
+  for (const auto &InputCase : Cases) {
+    DMirTestBuilder Builder;
+    Variable *LhsVar = Builder.createVariable(&Builder.Context.I64Type);
+    Variable *RhsVar = Builder.createVariable(&Builder.Context.I64Type);
+    auto *Lhs = Builder.createExpr<DreadInstruction>(&Builder.Context.I64Type,
+                                                     LhsVar->getVarIdx());
+    auto *Rhs = Builder.createExpr<DreadInstruction>(&Builder.Context.I64Type,
+                                                     RhsVar->getVarIdx());
+    auto *Original = BuildOriginal(Builder, Lhs, Rhs);
+    auto *Rewritten = BuildRewritten(Builder, Lhs, Rhs);
+
+    DMirFragmentInterpreter Interpreter;
+    Interpreter.setVariableValue(LhsVar->getVarIdx(), APInt(64, InputCase.Lhs));
+    Interpreter.setVariableValue(RhsVar->getVarIdx(), APInt(64, InputCase.Rhs));
+    expectI64Equivalent(Interpreter.evaluate(Original),
+                        Interpreter.evaluate(Rewritten),
+                        "lhs=" + std::to_string(InputCase.Lhs) +
+                            " rhs=" + std::to_string(InputCase.Rhs));
+  }
+}
+
+bool runDMirRewritePass(DMirTestBuilder &Builder) {
+  DMirRewritePass RewritePass;
+  return RewritePass.runOnMFunction(Builder.Func);
+}
+
+MInstruction *rewriteReturnedValue(DMirTestBuilder &Builder,
+                                   MInstruction *ReturnedValue) {
+  auto *Return = Builder.createStmt<ReturnInstruction>(ReturnedValue->getType(),
+                                                       ReturnedValue);
+  runDMirRewritePass(Builder);
+  return Return->getOperand<0>();
+}
+
+void expectBinaryOperandsMatch(MInstruction *Inst, Opcode Opc, MInstruction *A,
+                               MInstruction *B) {
+  ASSERT_EQ(Inst->getOpcode(), Opc);
+  auto *Binary = llvm::cast<BinaryInstruction>(Inst);
+  const bool Matches =
+      (Binary->getOperand<0>() == A && Binary->getOperand<1>() == B) ||
+      (Binary->getOperand<0>() == B && Binary->getOperand<1>() == A);
+  EXPECT_TRUE(Matches);
+}
+
+TEST(DMirValidation, EvaluatesIntegerExpressionDag) {
+  DMirTestBuilder Builder;
+  auto *Value = Builder.createConstI64(0x0f0f0f0f0f0f0f0fULL);
+  auto *Mask = Builder.createConstI64(0xf0f0f0f0f0f0f0f0ULL);
+  auto *Xor = Builder.createExpr<BinaryInstruction>(
+      OP_xor, &Builder.Context.I64Type, Value, Mask);
+  auto *Shift = Builder.createExpr<BinaryInstruction>(
+      OP_ushr, &Builder.Context.I64Type, Xor, Builder.createConstI64(4));
+  auto *Rot = Builder.createExpr<BinaryInstruction>(
+      OP_rotl, &Builder.Context.I64Type, Shift, Builder.createConstI64(8));
+  auto *Popcnt = Builder.createExpr<UnaryInstruction>(
+      OP_popcnt, &Builder.Context.I64Type, Rot);
+
+  DMirFragmentInterpreter Interpreter;
+  const APInt Result = Interpreter.evaluate(Popcnt);
+  EXPECT_EQ(Result.getZExtValue(), 60ULL);
+}
+
+TEST(DMirValidation, FuzzesAddZeroRewrite) {
+  expectUnaryI64RewriteEquivalent(
+      getInterestingU64Values(),
+      [](DMirTestBuilder &Builder, MInstruction *Input) {
+        return Builder.createExpr<BinaryInstruction>(
+            OP_add, &Builder.Context.I64Type, Input, Builder.createConstI64(0));
+      },
+      [](DMirTestBuilder &, MInstruction *Input) { return Input; });
+}
+
+TEST(DMirValidation, FuzzesSubZeroRewrite) {
+  expectUnaryI64RewriteEquivalent(
+      getInterestingU64Values(),
+      [](DMirTestBuilder &Builder, MInstruction *Input) {
+        return Builder.createExpr<BinaryInstruction>(
+            OP_sub, &Builder.Context.I64Type, Input, Builder.createConstI64(0));
+      },
+      [](DMirTestBuilder &, MInstruction *Input) { return Input; });
+}
+
+TEST(DMirValidation, FuzzesAndZeroRewrite) {
+  expectUnaryI64RewriteEquivalent(
+      getInterestingU64Values(),
+      [](DMirTestBuilder &Builder, MInstruction *Input) {
+        return Builder.createExpr<BinaryInstruction>(
+            OP_and, &Builder.Context.I64Type, Input, Builder.createConstI64(0));
+      },
+      [](DMirTestBuilder &Builder, MInstruction *) {
+        return Builder.createConstI64(0);
+      });
+}
+
+TEST(DMirValidation, FuzzesAndAllOnesRewrite) {
+  expectUnaryI64RewriteEquivalent(
+      getInterestingU64Values(),
+      [](DMirTestBuilder &Builder, MInstruction *Input) {
+        return Builder.createExpr<BinaryInstruction>(
+            OP_and, &Builder.Context.I64Type, Input,
+            Builder.createConstI64(0xffffffffffffffffULL));
+      },
+      [](DMirTestBuilder &, MInstruction *Input) { return Input; });
+}
+
+TEST(DMirValidation, FuzzesAndSelfRewrite) {
+  expectUnaryI64RewriteEquivalent(
+      getInterestingU64Values(),
+      [](DMirTestBuilder &Builder, MInstruction *Input) {
+        return Builder.createExpr<BinaryInstruction>(
+            OP_and, &Builder.Context.I64Type, Input, Input);
+      },
+      [](DMirTestBuilder &, MInstruction *Input) { return Input; });
+}
+
+TEST(DMirValidation, FuzzesAndNotSelfRewrite) {
+  expectUnaryI64RewriteEquivalent(
+      getInterestingU64Values(),
+      [](DMirTestBuilder &Builder, MInstruction *Input) {
+        auto *NotInput =
+            Builder.createExpr<NotInstruction>(&Builder.Context.I64Type, Input);
+        return Builder.createExpr<BinaryInstruction>(
+            OP_and, &Builder.Context.I64Type, NotInput, Input);
+      },
+      [](DMirTestBuilder &Builder, MInstruction *) {
+        return Builder.createConstI64(0);
+      });
+}
+
+TEST(DMirValidation, FuzzesOrZeroRewrite) {
+  expectUnaryI64RewriteEquivalent(
+      getInterestingU64Values(),
+      [](DMirTestBuilder &Builder, MInstruction *Input) {
+        return Builder.createExpr<BinaryInstruction>(
+            OP_or, &Builder.Context.I64Type, Input, Builder.createConstI64(0));
+      },
+      [](DMirTestBuilder &, MInstruction *Input) { return Input; });
+}
+
+TEST(DMirValidation, FuzzesOrAllOnesRewrite) {
+  expectUnaryI64RewriteEquivalent(
+      getInterestingU64Values(),
+      [](DMirTestBuilder &Builder, MInstruction *Input) {
+        return Builder.createExpr<BinaryInstruction>(
+            OP_or, &Builder.Context.I64Type, Input,
+            Builder.createConstI64(0xffffffffffffffffULL));
+      },
+      [](DMirTestBuilder &Builder, MInstruction *) {
+        return Builder.createConstI64(0xffffffffffffffffULL);
+      });
+}
+
+TEST(DMirValidation, FuzzesOrSelfRewrite) {
+  expectUnaryI64RewriteEquivalent(
+      getInterestingU64Values(),
+      [](DMirTestBuilder &Builder, MInstruction *Input) {
+        return Builder.createExpr<BinaryInstruction>(
+            OP_or, &Builder.Context.I64Type, Input, Input);
+      },
+      [](DMirTestBuilder &, MInstruction *Input) { return Input; });
+}
+
+TEST(DMirValidation, FuzzesAndAbsorbOrRewrite) {
+  expectBinaryI64RewriteEquivalent(
+      getInterestingBinaryInputCases(),
+      [](DMirTestBuilder &Builder, MInstruction *Lhs, MInstruction *Rhs) {
+        auto *Or = Builder.createExpr<BinaryInstruction>(
+            OP_or, &Builder.Context.I64Type, Lhs, Rhs);
+        return Builder.createExpr<BinaryInstruction>(
+            OP_and, &Builder.Context.I64Type, Or, Lhs);
+      },
+      [](DMirTestBuilder &, MInstruction *Lhs, MInstruction *) { return Lhs; });
+}
+
+TEST(DMirValidation, FuzzesAndFactorNotSelfRewrite) {
+  expectBinaryI64RewriteEquivalent(
+      getInterestingBinaryInputCases(),
+      [](DMirTestBuilder &Builder, MInstruction *Lhs, MInstruction *Rhs) {
+        auto *And = Builder.createExpr<BinaryInstruction>(
+            OP_and, &Builder.Context.I64Type, Lhs, Rhs);
+        auto *NotLhs =
+            Builder.createExpr<NotInstruction>(&Builder.Context.I64Type, Lhs);
+        return Builder.createExpr<BinaryInstruction>(
+            OP_and, &Builder.Context.I64Type, And, NotLhs);
+      },
+      [](DMirTestBuilder &Builder, MInstruction *, MInstruction *) {
+        return Builder.createConstI64(0);
+      });
+}
+
+TEST(DMirValidation, FuzzesAndFactorOrRewrite) {
+  expectBinaryI64RewriteEquivalent(
+      getInterestingBinaryInputCases(),
+      [](DMirTestBuilder &Builder, MInstruction *Lhs, MInstruction *Rhs) {
+        auto *And = Builder.createExpr<BinaryInstruction>(
+            OP_and, &Builder.Context.I64Type, Lhs, Rhs);
+        auto *Or = Builder.createExpr<BinaryInstruction>(
+            OP_or, &Builder.Context.I64Type, Lhs, Rhs);
+        return Builder.createExpr<BinaryInstruction>(
+            OP_and, &Builder.Context.I64Type, And, Or);
+      },
+      [](DMirTestBuilder &Builder, MInstruction *Lhs, MInstruction *Rhs) {
+        return Builder.createExpr<BinaryInstruction>(
+            OP_and, &Builder.Context.I64Type, Lhs, Rhs);
+      });
+}
+
+TEST(DMirValidation, FuzzesAndFactorLhsRewrite) {
+  expectBinaryI64RewriteEquivalent(
+      getInterestingBinaryInputCases(),
+      [](DMirTestBuilder &Builder, MInstruction *Lhs, MInstruction *Rhs) {
+        auto *And = Builder.createExpr<BinaryInstruction>(
+            OP_and, &Builder.Context.I64Type, Lhs, Rhs);
+        return Builder.createExpr<BinaryInstruction>(
+            OP_and, &Builder.Context.I64Type, And, Lhs);
+      },
+      [](DMirTestBuilder &Builder, MInstruction *Lhs, MInstruction *Rhs) {
+        return Builder.createExpr<BinaryInstruction>(
+            OP_and, &Builder.Context.I64Type, Lhs, Rhs);
+      });
+}
+
+TEST(DMirValidation, FuzzesAndFactorRhsRewrite) {
+  expectBinaryI64RewriteEquivalent(
+      getInterestingBinaryInputCases(),
+      [](DMirTestBuilder &Builder, MInstruction *Lhs, MInstruction *Rhs) {
+        auto *And = Builder.createExpr<BinaryInstruction>(
+            OP_and, &Builder.Context.I64Type, Lhs, Rhs);
+        return Builder.createExpr<BinaryInstruction>(
+            OP_and, &Builder.Context.I64Type, And, Rhs);
+      },
+      [](DMirTestBuilder &Builder, MInstruction *Lhs, MInstruction *Rhs) {
+        return Builder.createExpr<BinaryInstruction>(
+            OP_and, &Builder.Context.I64Type, Lhs, Rhs);
+      });
+}
+
+TEST(DMirValidation, FuzzesAndFactorNotRhsRewrite) {
+  expectBinaryI64RewriteEquivalent(
+      getInterestingBinaryInputCases(),
+      [](DMirTestBuilder &Builder, MInstruction *Lhs, MInstruction *Rhs) {
+        auto *And = Builder.createExpr<BinaryInstruction>(
+            OP_and, &Builder.Context.I64Type, Lhs, Rhs);
+        auto *NotRhs =
+            Builder.createExpr<NotInstruction>(&Builder.Context.I64Type, Rhs);
+        return Builder.createExpr<BinaryInstruction>(
+            OP_and, &Builder.Context.I64Type, And, NotRhs);
+      },
+      [](DMirTestBuilder &Builder, MInstruction *, MInstruction *) {
+        return Builder.createConstI64(0);
+      });
+}
+
+TEST(DMirValidation, FuzzesAndAndXorZeroRewrite) {
+  expectBinaryI64RewriteEquivalent(
+      getInterestingBinaryInputCases(),
+      [](DMirTestBuilder &Builder, MInstruction *Lhs, MInstruction *Rhs) {
+        auto *And = Builder.createExpr<BinaryInstruction>(
+            OP_and, &Builder.Context.I64Type, Lhs, Rhs);
+        auto *Xor = Builder.createExpr<BinaryInstruction>(
+            OP_xor, &Builder.Context.I64Type, Lhs, Rhs);
+        return Builder.createExpr<BinaryInstruction>(
+            OP_and, &Builder.Context.I64Type, And, Xor);
+      },
+      [](DMirTestBuilder &Builder, MInstruction *, MInstruction *) {
+        return Builder.createConstI64(0);
+      });
+}
+
+TEST(DMirValidation, FuzzesAndOrXorRewrite) {
+  expectBinaryI64RewriteEquivalent(
+      getInterestingBinaryInputCases(),
+      [](DMirTestBuilder &Builder, MInstruction *Lhs, MInstruction *Rhs) {
+        auto *Or = Builder.createExpr<BinaryInstruction>(
+            OP_or, &Builder.Context.I64Type, Lhs, Rhs);
+        auto *Xor = Builder.createExpr<BinaryInstruction>(
+            OP_xor, &Builder.Context.I64Type, Lhs, Rhs);
+        return Builder.createExpr<BinaryInstruction>(
+            OP_and, &Builder.Context.I64Type, Or, Xor);
+      },
+      [](DMirTestBuilder &Builder, MInstruction *Lhs, MInstruction *Rhs) {
+        return Builder.createExpr<BinaryInstruction>(
+            OP_xor, &Builder.Context.I64Type, Lhs, Rhs);
+      });
+}
+
+TEST(DMirValidation, FuzzesAndOrRhsRewrite) {
+  expectBinaryI64RewriteEquivalent(
+      getInterestingBinaryInputCases(),
+      [](DMirTestBuilder &Builder, MInstruction *Lhs, MInstruction *Rhs) {
+        auto *Or = Builder.createExpr<BinaryInstruction>(
+            OP_or, &Builder.Context.I64Type, Lhs, Rhs);
+        return Builder.createExpr<BinaryInstruction>(
+            OP_and, &Builder.Context.I64Type, Or, Rhs);
+      },
+      [](DMirTestBuilder &, MInstruction *, MInstruction *Rhs) { return Rhs; });
+}
+
+TEST(DMirValidation, FuzzesAndNotOrRewrite) {
+  expectBinaryI64RewriteEquivalent(
+      getInterestingBinaryInputCases(),
+      [](DMirTestBuilder &Builder, MInstruction *Lhs, MInstruction *Rhs) {
+        auto *NotLhs =
+            Builder.createExpr<NotInstruction>(&Builder.Context.I64Type, Lhs);
+        auto *Or = Builder.createExpr<BinaryInstruction>(
+            OP_or, &Builder.Context.I64Type, Lhs, Rhs);
+        return Builder.createExpr<BinaryInstruction>(
+            OP_and, &Builder.Context.I64Type, NotLhs, Or);
+      },
+      [](DMirTestBuilder &Builder, MInstruction *Lhs, MInstruction *Rhs) {
+        auto *NotLhs =
+            Builder.createExpr<NotInstruction>(&Builder.Context.I64Type, Lhs);
+        return Builder.createExpr<BinaryInstruction>(
+            OP_and, &Builder.Context.I64Type, NotLhs, Rhs);
+      });
+}
+
+TEST(DMirValidation, FuzzesAndNotXorRewrite) {
+  expectBinaryI64RewriteEquivalent(
+      getInterestingBinaryInputCases(),
+      [](DMirTestBuilder &Builder, MInstruction *Lhs, MInstruction *Rhs) {
+        auto *NotLhs =
+            Builder.createExpr<NotInstruction>(&Builder.Context.I64Type, Lhs);
+        auto *Xor = Builder.createExpr<BinaryInstruction>(
+            OP_xor, &Builder.Context.I64Type, Lhs, Rhs);
+        return Builder.createExpr<BinaryInstruction>(
+            OP_and, &Builder.Context.I64Type, NotLhs, Xor);
+      },
+      [](DMirTestBuilder &Builder, MInstruction *Lhs, MInstruction *Rhs) {
+        auto *NotLhs =
+            Builder.createExpr<NotInstruction>(&Builder.Context.I64Type, Lhs);
+        return Builder.createExpr<BinaryInstruction>(
+            OP_and, &Builder.Context.I64Type, NotLhs, Rhs);
+      });
+}
+
+TEST(DMirValidation, FuzzesOrAbsorbAndRewrite) {
+  expectBinaryI64RewriteEquivalent(
+      getInterestingBinaryInputCases(),
+      [](DMirTestBuilder &Builder, MInstruction *Lhs, MInstruction *Rhs) {
+        auto *And = Builder.createExpr<BinaryInstruction>(
+            OP_and, &Builder.Context.I64Type, Lhs, Rhs);
+        return Builder.createExpr<BinaryInstruction>(
+            OP_or, &Builder.Context.I64Type, And, Lhs);
+      },
+      [](DMirTestBuilder &, MInstruction *Lhs, MInstruction *) { return Lhs; });
+}
+
+TEST(DMirValidation, FuzzesOrAndOrRewrite) {
+  expectBinaryI64RewriteEquivalent(
+      getInterestingBinaryInputCases(),
+      [](DMirTestBuilder &Builder, MInstruction *Lhs, MInstruction *Rhs) {
+        auto *And = Builder.createExpr<BinaryInstruction>(
+            OP_and, &Builder.Context.I64Type, Lhs, Rhs);
+        auto *Or = Builder.createExpr<BinaryInstruction>(
+            OP_or, &Builder.Context.I64Type, Lhs, Rhs);
+        return Builder.createExpr<BinaryInstruction>(
+            OP_or, &Builder.Context.I64Type, And, Or);
+      },
+      [](DMirTestBuilder &Builder, MInstruction *Lhs, MInstruction *Rhs) {
+        return Builder.createExpr<BinaryInstruction>(
+            OP_or, &Builder.Context.I64Type, Lhs, Rhs);
+      });
+}
+
+TEST(DMirValidation, FuzzesOrAndRhsRewrite) {
+  expectBinaryI64RewriteEquivalent(
+      getInterestingBinaryInputCases(),
+      [](DMirTestBuilder &Builder, MInstruction *Lhs, MInstruction *Rhs) {
+        auto *And = Builder.createExpr<BinaryInstruction>(
+            OP_and, &Builder.Context.I64Type, Lhs, Rhs);
+        return Builder.createExpr<BinaryInstruction>(
+            OP_or, &Builder.Context.I64Type, And, Rhs);
+      },
+      [](DMirTestBuilder &, MInstruction *, MInstruction *Rhs) { return Rhs; });
+}
+
+TEST(DMirValidation, FuzzesOrAndXorRewrite) {
+  expectBinaryI64RewriteEquivalent(
+      getInterestingBinaryInputCases(),
+      [](DMirTestBuilder &Builder, MInstruction *Lhs, MInstruction *Rhs) {
+        auto *And = Builder.createExpr<BinaryInstruction>(
+            OP_and, &Builder.Context.I64Type, Lhs, Rhs);
+        auto *Xor = Builder.createExpr<BinaryInstruction>(
+            OP_xor, &Builder.Context.I64Type, Lhs, Rhs);
+        return Builder.createExpr<BinaryInstruction>(
+            OP_or, &Builder.Context.I64Type, And, Xor);
+      },
+      [](DMirTestBuilder &Builder, MInstruction *Lhs, MInstruction *Rhs) {
+        return Builder.createExpr<BinaryInstruction>(
+            OP_or, &Builder.Context.I64Type, Lhs, Rhs);
+      });
+}
+
+TEST(DMirValidation, FuzzesOrFactorLhsRewrite) {
+  expectBinaryI64RewriteEquivalent(
+      getInterestingBinaryInputCases(),
+      [](DMirTestBuilder &Builder, MInstruction *Lhs, MInstruction *Rhs) {
+        auto *Or = Builder.createExpr<BinaryInstruction>(
+            OP_or, &Builder.Context.I64Type, Lhs, Rhs);
+        return Builder.createExpr<BinaryInstruction>(
+            OP_or, &Builder.Context.I64Type, Or, Lhs);
+      },
+      [](DMirTestBuilder &Builder, MInstruction *Lhs, MInstruction *Rhs) {
+        return Builder.createExpr<BinaryInstruction>(
+            OP_or, &Builder.Context.I64Type, Lhs, Rhs);
+      });
+}
+
+TEST(DMirValidation, FuzzesOrFactorRhsRewrite) {
+  expectBinaryI64RewriteEquivalent(
+      getInterestingBinaryInputCases(),
+      [](DMirTestBuilder &Builder, MInstruction *Lhs, MInstruction *Rhs) {
+        auto *Or = Builder.createExpr<BinaryInstruction>(
+            OP_or, &Builder.Context.I64Type, Lhs, Rhs);
+        return Builder.createExpr<BinaryInstruction>(
+            OP_or, &Builder.Context.I64Type, Or, Rhs);
+      },
+      [](DMirTestBuilder &Builder, MInstruction *Lhs, MInstruction *Rhs) {
+        return Builder.createExpr<BinaryInstruction>(
+            OP_or, &Builder.Context.I64Type, Lhs, Rhs);
+      });
+}
+
+TEST(DMirValidation, FuzzesOrXorLhsRewrite) {
+  expectBinaryI64RewriteEquivalent(
+      getInterestingBinaryInputCases(),
+      [](DMirTestBuilder &Builder, MInstruction *Lhs, MInstruction *Rhs) {
+        auto *Xor = Builder.createExpr<BinaryInstruction>(
+            OP_xor, &Builder.Context.I64Type, Lhs, Rhs);
+        return Builder.createExpr<BinaryInstruction>(
+            OP_or, &Builder.Context.I64Type, Xor, Lhs);
+      },
+      [](DMirTestBuilder &Builder, MInstruction *Lhs, MInstruction *Rhs) {
+        return Builder.createExpr<BinaryInstruction>(
+            OP_or, &Builder.Context.I64Type, Lhs, Rhs);
+      });
+}
+
+TEST(DMirValidation, FuzzesOrXorRhsRewrite) {
+  expectBinaryI64RewriteEquivalent(
+      getInterestingBinaryInputCases(),
+      [](DMirTestBuilder &Builder, MInstruction *Lhs, MInstruction *Rhs) {
+        auto *Xor = Builder.createExpr<BinaryInstruction>(
+            OP_xor, &Builder.Context.I64Type, Lhs, Rhs);
+        return Builder.createExpr<BinaryInstruction>(
+            OP_or, &Builder.Context.I64Type, Xor, Rhs);
+      },
+      [](DMirTestBuilder &Builder, MInstruction *Lhs, MInstruction *Rhs) {
+        return Builder.createExpr<BinaryInstruction>(
+            OP_or, &Builder.Context.I64Type, Lhs, Rhs);
+      });
+}
+
+TEST(DMirValidation, FuzzesOrNotSelfRewrite) {
+  expectUnaryI64RewriteEquivalent(
+      getInterestingU64Values(),
+      [](DMirTestBuilder &Builder, MInstruction *Input) {
+        auto *NotInput =
+            Builder.createExpr<NotInstruction>(&Builder.Context.I64Type, Input);
+        return Builder.createExpr<BinaryInstruction>(
+            OP_or, &Builder.Context.I64Type, NotInput, Input);
+      },
+      [](DMirTestBuilder &Builder, MInstruction *) {
+        return Builder.createConstI64(0xffffffffffffffffULL);
+      });
+}
+
+TEST(DMirValidation, FuzzesOrAndNotLhsRewrite) {
+  expectBinaryI64RewriteEquivalent(
+      getInterestingBinaryInputCases(),
+      [](DMirTestBuilder &Builder, MInstruction *Lhs, MInstruction *Rhs) {
+        auto *And = Builder.createExpr<BinaryInstruction>(
+            OP_and, &Builder.Context.I64Type, Lhs, Rhs);
+        auto *NotLhs =
+            Builder.createExpr<NotInstruction>(&Builder.Context.I64Type, Lhs);
+        return Builder.createExpr<BinaryInstruction>(
+            OP_or, &Builder.Context.I64Type, And, NotLhs);
+      },
+      [](DMirTestBuilder &Builder, MInstruction *Lhs, MInstruction *Rhs) {
+        auto *NotLhs =
+            Builder.createExpr<NotInstruction>(&Builder.Context.I64Type, Lhs);
+        return Builder.createExpr<BinaryInstruction>(
+            OP_or, &Builder.Context.I64Type, NotLhs, Rhs);
+      });
+}
+
+TEST(DMirValidation, FuzzesOrAndNotRhsRewrite) {
+  expectBinaryI64RewriteEquivalent(
+      getInterestingBinaryInputCases(),
+      [](DMirTestBuilder &Builder, MInstruction *Lhs, MInstruction *Rhs) {
+        auto *And = Builder.createExpr<BinaryInstruction>(
+            OP_and, &Builder.Context.I64Type, Lhs, Rhs);
+        auto *NotRhs =
+            Builder.createExpr<NotInstruction>(&Builder.Context.I64Type, Rhs);
+        return Builder.createExpr<BinaryInstruction>(
+            OP_or, &Builder.Context.I64Type, And, NotRhs);
+      },
+      [](DMirTestBuilder &Builder, MInstruction *Lhs, MInstruction *Rhs) {
+        auto *NotRhs =
+            Builder.createExpr<NotInstruction>(&Builder.Context.I64Type, Rhs);
+        return Builder.createExpr<BinaryInstruction>(
+            OP_or, &Builder.Context.I64Type, NotRhs, Lhs);
+      });
+}
+
+TEST(DMirValidation, FuzzesOrOrXorRewrite) {
+  expectBinaryI64RewriteEquivalent(
+      getInterestingBinaryInputCases(),
+      [](DMirTestBuilder &Builder, MInstruction *Lhs, MInstruction *Rhs) {
+        auto *Or = Builder.createExpr<BinaryInstruction>(
+            OP_or, &Builder.Context.I64Type, Lhs, Rhs);
+        auto *Xor = Builder.createExpr<BinaryInstruction>(
+            OP_xor, &Builder.Context.I64Type, Lhs, Rhs);
+        return Builder.createExpr<BinaryInstruction>(
+            OP_or, &Builder.Context.I64Type, Or, Xor);
+      },
+      [](DMirTestBuilder &Builder, MInstruction *Lhs, MInstruction *Rhs) {
+        return Builder.createExpr<BinaryInstruction>(
+            OP_or, &Builder.Context.I64Type, Lhs, Rhs);
+      });
+}
+
+TEST(DMirValidation, FuzzesOrNotOrRewrite) {
+  expectBinaryI64RewriteEquivalent(
+      getInterestingBinaryInputCases(),
+      [](DMirTestBuilder &Builder, MInstruction *Lhs, MInstruction *Rhs) {
+        auto *NotLhs =
+            Builder.createExpr<NotInstruction>(&Builder.Context.I64Type, Lhs);
+        auto *Or = Builder.createExpr<BinaryInstruction>(
+            OP_or, &Builder.Context.I64Type, Lhs, Rhs);
+        return Builder.createExpr<BinaryInstruction>(
+            OP_or, &Builder.Context.I64Type, NotLhs, Or);
+      },
+      [](DMirTestBuilder &Builder, MInstruction *, MInstruction *) {
+        return Builder.createConstI64(0xffffffffffffffffULL);
+      });
+}
+
+TEST(DMirValidation, FuzzesDoubleNotRewrite) {
+  expectUnaryI64RewriteEquivalent(
+      getInterestingU64Values(),
+      [](DMirTestBuilder &Builder, MInstruction *Input) {
+        auto *Inner =
+            Builder.createExpr<NotInstruction>(&Builder.Context.I64Type, Input);
+        return Builder.createExpr<NotInstruction>(&Builder.Context.I64Type,
+                                                  Inner);
+      },
+      [](DMirTestBuilder &, MInstruction *Input) { return Input; });
+}
+
+TEST(DMirValidation, ExecutesDassignCmpSelectAndConversions) {
+  DMirTestBuilder Builder;
+  Variable *Var = Builder.createVariable(&Builder.Context.I64Type);
+  auto *Assigned = Builder.createConstI64(0xfffffffffffffff0ULL);
+  Builder.createStmt<DassignInstruction>(&Builder.Context.VoidType, Assigned,
+                                         Var->getVarIdx());
+
+  auto *Read = Builder.createExpr<DreadInstruction>(&Builder.Context.I64Type,
+                                                    Var->getVarIdx());
+  auto *Cmp = Builder.createExpr<CmpInstruction>(CmpInstruction::ICMP_SLT,
+                                                 &Builder.Context.I64Type, Read,
+                                                 Builder.createConstI64(0));
+  auto *Truncated = Builder.createExpr<ConversionInstruction>(
+      OP_trunc, &Builder.Context.I32Type, Read);
+  auto *Extended = Builder.createExpr<ConversionInstruction>(
+      OP_sext, &Builder.Context.I64Type, Truncated);
+  auto *Pointer = Builder.createExpr<ConversionInstruction>(
+      OP_inttoptr, Builder.I64PtrType, Extended);
+  auto *RoundTrip = Builder.createExpr<ConversionInstruction>(
+      OP_ptrtoint, &Builder.Context.I64Type, Pointer);
+  auto *Selected = Builder.createExpr<SelectInstruction>(
+      &Builder.Context.I64Type, Cmp, RoundTrip, Builder.createConstI64(0));
+  Builder.createStmt<ReturnInstruction>(&Builder.Context.I64Type, Selected);
+
+  DMirFragmentInterpreter Interpreter;
+  const auto Result = Interpreter.execute(Builder.getBlock());
+  ASSERT_TRUE(Result.has_value());
+  EXPECT_EQ(Result->getSExtValue(), -16);
+}
+
+TEST(DMirValidation, FuzzesSelectSameArmRewrite) {
+  expectBinaryI64RewriteEquivalent(
+      getInterestingBinaryInputCases(),
+      [](DMirTestBuilder &Builder, MInstruction *Cond, MInstruction *Value) {
+        return Builder.createExpr<SelectInstruction>(&Builder.Context.I64Type,
+                                                     Cond, Value, Value);
+      },
+      [](DMirTestBuilder &, MInstruction *, MInstruction *Value) {
+        return Value;
+      });
+}
+
+// Verify select-same-arm for i8 and i32 value types.  The rule is structural
+// (both arms are the same SSA value), so it must hold for any integer width.
+template <typename ValTypeSelector>
+void fuzzSelectSameArmNarrow(ValTypeSelector &&GetValType,
+                             unsigned ExpectedWidth) {
+  for (const auto &InputCase : getInterestingBinaryInputCases()) {
+    DMirTestBuilder Builder;
+    MType *ValType = GetValType(Builder);
+    Variable *CondVar = Builder.createVariable(&Builder.Context.I64Type);
+    Variable *ValVar = Builder.createVariable(ValType);
+    auto *Cond = Builder.createExpr<DreadInstruction>(&Builder.Context.I64Type,
+                                                      CondVar->getVarIdx());
+    auto *Val =
+        Builder.createExpr<DreadInstruction>(ValType, ValVar->getVarIdx());
+    auto *Original =
+        Builder.createExpr<SelectInstruction>(ValType, Cond, Val, Val);
+
+    DMirFragmentInterpreter Interpreter;
+    Interpreter.setVariableValue(CondVar->getVarIdx(),
+                                 APInt(64, InputCase.Lhs));
+    Interpreter.setVariableValue(ValVar->getVarIdx(),
+                                 APInt(ExpectedWidth, InputCase.Rhs));
+    APInt OrigResult = Interpreter.evaluate(Original);
+    APInt ValResult = Interpreter.evaluate(Val);
+    ASSERT_EQ(OrigResult.getBitWidth(), ExpectedWidth);
+    EXPECT_TRUE(OrigResult == ValResult)
+        << "cond=" << InputCase.Lhs << " val=" << InputCase.Rhs
+        << " original=" << OrigResult.getZExtValue()
+        << " rewritten=" << ValResult.getZExtValue();
+  }
+}
+
+TEST(DMirValidation, FuzzesSelectSameArmRewriteI8) {
+  fuzzSelectSameArmNarrow(
+      [](DMirTestBuilder &B) -> MType * { return &B.Context.I8Type; }, 8U);
+}
+
+TEST(DMirValidation, FuzzesSelectSameArmRewriteI32) {
+  fuzzSelectSameArmNarrow(
+      [](DMirTestBuilder &B) -> MType * { return &B.Context.I32Type; }, 32U);
+}
+
+TEST(DMirRewritePass, RewritesReturnedAddZeroToInput) {
+  // add(non_const, 0) is intentionally NOT folded: keeping the add node
+  // preserves a register-copy point that benefits downstream register
+  // allocation for i64 operands. Only add(const, 0) folds to const.
+  DMirTestBuilder Builder;
+  Variable *InputVar = Builder.createVariable(&Builder.Context.I64Type);
+  auto *Input = Builder.createExpr<DreadInstruction>(&Builder.Context.I64Type,
+                                                     InputVar->getVarIdx());
+  auto *Add = Builder.createExpr<BinaryInstruction>(
+      OP_add, &Builder.Context.I64Type, Input, Builder.createConstI64(0));
+
+  MInstruction *Rewritten = rewriteReturnedValue(Builder, Add);
+  EXPECT_EQ(Rewritten, Add);
+}
+
+TEST(DMirRewritePass, RewritesNestedTreeBottomUp) {
+  // Bottom-up rewrites fire: not(not(x)) -> x, and(x, ~0) -> x.
+  // The final add(x, 0) is intentionally NOT folded for non-constant x
+  // (preserves register-copy point for register allocation). The result
+  // is the Add node itself, with its LHS simplified to Input.
+  DMirTestBuilder Builder;
+  Variable *InputVar = Builder.createVariable(&Builder.Context.I64Type);
+  auto *Input = Builder.createExpr<DreadInstruction>(&Builder.Context.I64Type,
+                                                     InputVar->getVarIdx());
+  auto *DoubleNot =
+      Builder.createExpr<NotInstruction>(&Builder.Context.I64Type, Input);
+  DoubleNot =
+      Builder.createExpr<NotInstruction>(&Builder.Context.I64Type, DoubleNot);
+  auto *Masked = Builder.createExpr<BinaryInstruction>(
+      OP_and, &Builder.Context.I64Type, DoubleNot,
+      Builder.createConstI64(0xffffffffffffffffULL));
+  auto *Add = Builder.createExpr<BinaryInstruction>(
+      OP_add, &Builder.Context.I64Type, Masked, Builder.createConstI64(0));
+
+  MInstruction *Rewritten = rewriteReturnedValue(Builder, Add);
+  EXPECT_EQ(Rewritten, Add);
+  EXPECT_EQ(llvm::cast<BinaryInstruction>(Add)->getOperand<0>(), Input);
+}
+
+TEST(DMirRewritePass, RewritesSelectSameArmByStructure) {
+  DMirTestBuilder Builder;
+  Variable *CondVar = Builder.createVariable(&Builder.Context.I64Type);
+  Variable *ValueVar = Builder.createVariable(&Builder.Context.I64Type);
+  auto *Cond = Builder.createExpr<DreadInstruction>(&Builder.Context.I64Type,
+                                                    CondVar->getVarIdx());
+  auto *TrueValue = Builder.createExpr<DreadInstruction>(
+      &Builder.Context.I64Type, ValueVar->getVarIdx());
+  auto *FalseValue = Builder.createExpr<DreadInstruction>(
+      &Builder.Context.I64Type, ValueVar->getVarIdx());
+  auto *Select = Builder.createExpr<SelectInstruction>(
+      &Builder.Context.I64Type, Cond, TrueValue, FalseValue);
+
+  MInstruction *Rewritten = rewriteReturnedValue(Builder, Select);
+  EXPECT_EQ(Rewritten, TrueValue);
+}
+
+TEST(DMirRewritePass, MaterializesTypedAllOnesForOrNotSelf) {
+  DMirTestBuilder Builder;
+  Variable *InputVar = Builder.createVariable(&Builder.Context.I32Type);
+  auto *Input = Builder.createExpr<DreadInstruction>(&Builder.Context.I32Type,
+                                                     InputVar->getVarIdx());
+  auto *NotInput =
+      Builder.createExpr<NotInstruction>(&Builder.Context.I32Type, Input);
+  auto *Or = Builder.createExpr<BinaryInstruction>(
+      OP_or, &Builder.Context.I32Type, NotInput, Input);
+
+  MInstruction *Rewritten = rewriteReturnedValue(Builder, Or);
+  ASSERT_EQ(Rewritten->getOpcode(), OP_const);
+  const auto &Constant =
+      llvm::cast<ConstantInstruction>(Rewritten)->getConstant();
+  EXPECT_EQ(llvm::cast<MConstantInt>(&Constant)->getValue().getBitWidth(), 32U);
+  EXPECT_TRUE(llvm::cast<MConstantInt>(&Constant)->getValue() ==
+              llvm::APInt(32, ~0U));
+}
+
+TEST(DMirRewritePass, RewritesAdcZeroCarryToAdd) {
+  // adc(lhs, rhs, const(0)) → add(lhs, rhs) when carry is dead
+  DMirTestBuilder Builder;
+  Variable *LhsVar = Builder.createVariable(&Builder.Context.I64Type);
+  Variable *RhsVar = Builder.createVariable(&Builder.Context.I64Type);
+  auto *Lhs = Builder.createExpr<DreadInstruction>(&Builder.Context.I64Type,
+                                                   LhsVar->getVarIdx());
+  auto *Rhs = Builder.createExpr<DreadInstruction>(&Builder.Context.I64Type,
+                                                   RhsVar->getVarIdx());
+  auto *Adc = Builder.createExpr<AdcInstruction>(
+      &Builder.Context.I64Type, Lhs, Rhs, Builder.createConstI64(0));
+  auto *Return =
+      Builder.createStmt<ReturnInstruction>(&Builder.Context.I64Type, Adc);
+
+  EXPECT_TRUE(runDMirRewritePass(Builder));
+  auto *Result = Return->getOperand<0>();
+  EXPECT_NE(Result, Adc);
+  EXPECT_EQ(Result->getOpcode(), OP_add);
+}
+
+TEST(DMirRewritePass, RewritesAdcZeroOperandsToInput) {
+  // adc(input, 0, const(0)) → input when carry is dead and RHS is zero
+  DMirTestBuilder Builder;
+  Variable *InputVar = Builder.createVariable(&Builder.Context.I64Type);
+  auto *Input = Builder.createExpr<DreadInstruction>(&Builder.Context.I64Type,
+                                                     InputVar->getVarIdx());
+  auto *Adc = Builder.createExpr<AdcInstruction>(
+      &Builder.Context.I64Type, Input, Builder.createConstI64(0),
+      Builder.createConstI64(0));
+  auto *Return =
+      Builder.createStmt<ReturnInstruction>(&Builder.Context.I64Type, Adc);
+
+  EXPECT_TRUE(runDMirRewritePass(Builder));
+  EXPECT_EQ(Return->getOperand<0>(), Input);
+}
+
+TEST(DMirRewritePass, RewritesSbbZeroOperandsToInput) {
+  // sbb(input, 0, const(0)) → input when borrow is dead and RHS is zero
+  DMirTestBuilder Builder;
+  Variable *InputVar = Builder.createVariable(&Builder.Context.I64Type);
+  auto *Input = Builder.createExpr<DreadInstruction>(&Builder.Context.I64Type,
+                                                     InputVar->getVarIdx());
+  auto *Sbb = Builder.createExpr<SbbInstruction>(
+      &Builder.Context.I64Type, Input, Builder.createConstI64(0),
+      Builder.createConstI64(0));
+  auto *Return =
+      Builder.createStmt<ReturnInstruction>(&Builder.Context.I64Type, Sbb);
+
+  EXPECT_TRUE(runDMirRewritePass(Builder));
+  EXPECT_EQ(Return->getOperand<0>(), Input);
+}
+
+TEST(DMirRewritePass, RewritesSbbSelfZeroBorrowToZero) {
+  // sbb(input, input, const(0)) → 0 when borrow is dead and LHS==RHS
+  DMirTestBuilder Builder;
+  Variable *InputVar = Builder.createVariable(&Builder.Context.I64Type);
+  auto *Input = Builder.createExpr<DreadInstruction>(&Builder.Context.I64Type,
+                                                     InputVar->getVarIdx());
+  auto *Sbb = Builder.createExpr<SbbInstruction>(
+      &Builder.Context.I64Type, Input,
+      Builder.createExpr<DreadInstruction>(&Builder.Context.I64Type,
+                                           InputVar->getVarIdx()),
+      Builder.createConstI64(0));
+  auto *Return =
+      Builder.createStmt<ReturnInstruction>(&Builder.Context.I64Type, Sbb);
+
+  EXPECT_TRUE(runDMirRewritePass(Builder));
+  auto *Result = Return->getOperand<0>();
+  EXPECT_NE(Result, Sbb);
+  EXPECT_TRUE(llvm::isa<ConstantInstruction>(Result));
+}
+
+TEST(DMirRewritePass, RewritesAndAbsorbOrToExistingOperand) {
+  DMirTestBuilder Builder;
+  Variable *LhsVar = Builder.createVariable(&Builder.Context.I64Type);
+  Variable *RhsVar = Builder.createVariable(&Builder.Context.I64Type);
+  auto *Lhs = Builder.createExpr<DreadInstruction>(&Builder.Context.I64Type,
+                                                   LhsVar->getVarIdx());
+  auto *Rhs = Builder.createExpr<DreadInstruction>(&Builder.Context.I64Type,
+                                                   RhsVar->getVarIdx());
+  auto *Or = Builder.createExpr<BinaryInstruction>(
+      OP_or, &Builder.Context.I64Type, Lhs, Rhs);
+  auto *And = Builder.createExpr<BinaryInstruction>(
+      OP_and, &Builder.Context.I64Type, Or, Lhs);
+
+  MInstruction *Rewritten = rewriteReturnedValue(Builder, And);
+  EXPECT_EQ(Rewritten, Lhs);
+}
+
+TEST(DMirRewritePass, RewritesAndOrXorToExistingXorSubtree) {
+  DMirTestBuilder Builder;
+  Variable *LhsVar = Builder.createVariable(&Builder.Context.I64Type);
+  Variable *RhsVar = Builder.createVariable(&Builder.Context.I64Type);
+  auto *Lhs = Builder.createExpr<DreadInstruction>(&Builder.Context.I64Type,
+                                                   LhsVar->getVarIdx());
+  auto *Rhs = Builder.createExpr<DreadInstruction>(&Builder.Context.I64Type,
+                                                   RhsVar->getVarIdx());
+  auto *Or = Builder.createExpr<BinaryInstruction>(
+      OP_or, &Builder.Context.I64Type, Lhs, Rhs);
+  auto *Xor = Builder.createExpr<BinaryInstruction>(
+      OP_xor, &Builder.Context.I64Type,
+      Builder.createExpr<DreadInstruction>(&Builder.Context.I64Type,
+                                           LhsVar->getVarIdx()),
+      Builder.createExpr<DreadInstruction>(&Builder.Context.I64Type,
+                                           RhsVar->getVarIdx()));
+  auto *And = Builder.createExpr<BinaryInstruction>(
+      OP_and, &Builder.Context.I64Type, Or, Xor);
+
+  MInstruction *Rewritten = rewriteReturnedValue(Builder, And);
+  EXPECT_EQ(Rewritten, Xor);
+}
+
+TEST(DMirRewritePass, RewritesOrNotOrToAllOnes) {
+  DMirTestBuilder Builder;
+  Variable *LhsVar = Builder.createVariable(&Builder.Context.I64Type);
+  Variable *RhsVar = Builder.createVariable(&Builder.Context.I64Type);
+  auto *Lhs = Builder.createExpr<DreadInstruction>(&Builder.Context.I64Type,
+                                                   LhsVar->getVarIdx());
+  auto *Rhs = Builder.createExpr<DreadInstruction>(&Builder.Context.I64Type,
+                                                   RhsVar->getVarIdx());
+  auto *NotLhs =
+      Builder.createExpr<NotInstruction>(&Builder.Context.I64Type, Lhs);
+  auto *Or = Builder.createExpr<BinaryInstruction>(
+      OP_or, &Builder.Context.I64Type,
+      Builder.createExpr<DreadInstruction>(&Builder.Context.I64Type,
+                                           LhsVar->getVarIdx()),
+      Rhs);
+  auto *Root = Builder.createExpr<BinaryInstruction>(
+      OP_or, &Builder.Context.I64Type, NotLhs, Or);
+
+  MInstruction *Rewritten = rewriteReturnedValue(Builder, Root);
+  ASSERT_EQ(Rewritten->getOpcode(), OP_const);
+  const auto Value =
+      llvm::cast<MConstantInt>(
+          &llvm::cast<ConstantInstruction>(Rewritten)->getConstant())
+          ->getValue();
+  EXPECT_TRUE(Value.isAllOnes());
+}
+
+TEST(DMirRewritePass, RewritesXorCancelToSiblingOperand) {
+  DMirTestBuilder Builder;
+  Variable *LhsVar = Builder.createVariable(&Builder.Context.I64Type);
+  Variable *RhsVar = Builder.createVariable(&Builder.Context.I64Type);
+  auto *Lhs = Builder.createExpr<DreadInstruction>(&Builder.Context.I64Type,
+                                                   LhsVar->getVarIdx());
+  auto *Rhs = Builder.createExpr<DreadInstruction>(&Builder.Context.I64Type,
+                                                   RhsVar->getVarIdx());
+  auto *NestedXor = Builder.createExpr<BinaryInstruction>(
+      OP_xor, &Builder.Context.I64Type, Lhs, Rhs);
+  auto *Root = Builder.createExpr<BinaryInstruction>(
+      OP_xor, &Builder.Context.I64Type, NestedXor,
+      Builder.createExpr<DreadInstruction>(&Builder.Context.I64Type,
+                                           LhsVar->getVarIdx()));
+
+  MInstruction *Rewritten = rewriteReturnedValue(Builder, Root);
+  EXPECT_EQ(Rewritten, Rhs);
+}
+
+TEST(DMirRewritePass, RewritesXorNotAllOnesToOperand) {
+  DMirTestBuilder Builder;
+  Variable *InputVar = Builder.createVariable(&Builder.Context.I64Type);
+  auto *Input = Builder.createExpr<DreadInstruction>(&Builder.Context.I64Type,
+                                                     InputVar->getVarIdx());
+  auto *NotInput =
+      Builder.createExpr<NotInstruction>(&Builder.Context.I64Type, Input);
+  auto *Root = Builder.createExpr<BinaryInstruction>(
+      OP_xor, &Builder.Context.I64Type, NotInput,
+      Builder.createConstI64(0xffffffffffffffffULL));
+
+  MInstruction *Rewritten = rewriteReturnedValue(Builder, Root);
+  EXPECT_EQ(Rewritten, Input);
+}
+
+TEST(DMirRewritePass, RewritesAndNotOrToNewAndNode) {
+  DMirTestBuilder Builder;
+  Variable *LhsVar = Builder.createVariable(&Builder.Context.I64Type);
+  Variable *RhsVar = Builder.createVariable(&Builder.Context.I64Type);
+  auto *Lhs = Builder.createExpr<DreadInstruction>(&Builder.Context.I64Type,
+                                                   LhsVar->getVarIdx());
+  auto *Rhs = Builder.createExpr<DreadInstruction>(&Builder.Context.I64Type,
+                                                   RhsVar->getVarIdx());
+  auto *NotLhs =
+      Builder.createExpr<NotInstruction>(&Builder.Context.I64Type, Lhs);
+  auto *Or = Builder.createExpr<BinaryInstruction>(
+      OP_or, &Builder.Context.I64Type,
+      Builder.createExpr<DreadInstruction>(&Builder.Context.I64Type,
+                                           LhsVar->getVarIdx()),
+      Rhs);
+  auto *Root = Builder.createExpr<BinaryInstruction>(
+      OP_and, &Builder.Context.I64Type, NotLhs, Or);
+
+  MInstruction *Rewritten = rewriteReturnedValue(Builder, Root);
+  expectBinaryOperandsMatch(Rewritten, OP_and, NotLhs, Rhs);
+}
+
+TEST(DMirRewritePass, RewritesOrXorLhsToNewOrNode) {
+  DMirTestBuilder Builder;
+  Variable *LhsVar = Builder.createVariable(&Builder.Context.I64Type);
+  Variable *RhsVar = Builder.createVariable(&Builder.Context.I64Type);
+  auto *Lhs = Builder.createExpr<DreadInstruction>(&Builder.Context.I64Type,
+                                                   LhsVar->getVarIdx());
+  auto *Rhs = Builder.createExpr<DreadInstruction>(&Builder.Context.I64Type,
+                                                   RhsVar->getVarIdx());
+  auto *NestedXor = Builder.createExpr<BinaryInstruction>(
+      OP_xor, &Builder.Context.I64Type, Lhs, Rhs);
+  auto *Root = Builder.createExpr<BinaryInstruction>(
+      OP_or, &Builder.Context.I64Type, NestedXor,
+      Builder.createExpr<DreadInstruction>(&Builder.Context.I64Type,
+                                           LhsVar->getVarIdx()));
+
+  MInstruction *Rewritten = rewriteReturnedValue(Builder, Root);
+  expectBinaryOperandsMatch(Rewritten, OP_or, Lhs, Rhs);
+}
+
+TEST(DMirRewritePass, RewritesOrAndNotToNewOrNode) {
+  DMirTestBuilder Builder;
+  Variable *LhsVar = Builder.createVariable(&Builder.Context.I64Type);
+  Variable *RhsVar = Builder.createVariable(&Builder.Context.I64Type);
+  auto *Lhs = Builder.createExpr<DreadInstruction>(&Builder.Context.I64Type,
+                                                   LhsVar->getVarIdx());
+  auto *Rhs = Builder.createExpr<DreadInstruction>(&Builder.Context.I64Type,
+                                                   RhsVar->getVarIdx());
+  auto *And = Builder.createExpr<BinaryInstruction>(
+      OP_and, &Builder.Context.I64Type, Lhs, Rhs);
+  auto *NotLhs = Builder.createExpr<NotInstruction>(
+      &Builder.Context.I64Type,
+      Builder.createExpr<DreadInstruction>(&Builder.Context.I64Type,
+                                           LhsVar->getVarIdx()));
+  auto *Root = Builder.createExpr<BinaryInstruction>(
+      OP_or, &Builder.Context.I64Type, And, NotLhs);
+
+  MInstruction *Rewritten = rewriteReturnedValue(Builder, Root);
+  expectBinaryOperandsMatch(Rewritten, OP_or, NotLhs, Rhs);
+}
+
+TEST(DMirRewritePass, RewritesXorNotNotToNewXorNode) {
+  DMirTestBuilder Builder;
+  Variable *LhsVar = Builder.createVariable(&Builder.Context.I64Type);
+  Variable *RhsVar = Builder.createVariable(&Builder.Context.I64Type);
+  auto *Lhs = Builder.createExpr<DreadInstruction>(&Builder.Context.I64Type,
+                                                   LhsVar->getVarIdx());
+  auto *Rhs = Builder.createExpr<DreadInstruction>(&Builder.Context.I64Type,
+                                                   RhsVar->getVarIdx());
+  auto *Root = Builder.createExpr<BinaryInstruction>(
+      OP_xor, &Builder.Context.I64Type,
+      Builder.createExpr<NotInstruction>(&Builder.Context.I64Type, Lhs),
+      Builder.createExpr<NotInstruction>(&Builder.Context.I64Type, Rhs));
+
+  MInstruction *Rewritten = rewriteReturnedValue(Builder, Root);
+  expectBinaryOperandsMatch(Rewritten, OP_xor, Lhs, Rhs);
+}
+
+TEST(DMirRewritePass, RewritesXorAndXorToNewOrNode) {
+  DMirTestBuilder Builder;
+  Variable *LhsVar = Builder.createVariable(&Builder.Context.I64Type);
+  Variable *RhsVar = Builder.createVariable(&Builder.Context.I64Type);
+  auto *Lhs = Builder.createExpr<DreadInstruction>(&Builder.Context.I64Type,
+                                                   LhsVar->getVarIdx());
+  auto *Rhs = Builder.createExpr<DreadInstruction>(&Builder.Context.I64Type,
+                                                   RhsVar->getVarIdx());
+  auto *And = Builder.createExpr<BinaryInstruction>(
+      OP_and, &Builder.Context.I64Type, Lhs, Rhs);
+  auto *Xor = Builder.createExpr<BinaryInstruction>(
+      OP_xor, &Builder.Context.I64Type,
+      Builder.createExpr<DreadInstruction>(&Builder.Context.I64Type,
+                                           LhsVar->getVarIdx()),
+      Builder.createExpr<DreadInstruction>(&Builder.Context.I64Type,
+                                           RhsVar->getVarIdx()));
+  auto *Root = Builder.createExpr<BinaryInstruction>(
+      OP_xor, &Builder.Context.I64Type, And, Xor);
+
+  MInstruction *Rewritten = rewriteReturnedValue(Builder, Root);
+  expectBinaryOperandsMatch(Rewritten, OP_or, Lhs, Rhs);
+}
+
+TEST(DMirRewritePass, RewritesXorOrXorToNewAndNode) {
+  DMirTestBuilder Builder;
+  Variable *LhsVar = Builder.createVariable(&Builder.Context.I64Type);
+  Variable *RhsVar = Builder.createVariable(&Builder.Context.I64Type);
+  auto *Lhs = Builder.createExpr<DreadInstruction>(&Builder.Context.I64Type,
+                                                   LhsVar->getVarIdx());
+  auto *Rhs = Builder.createExpr<DreadInstruction>(&Builder.Context.I64Type,
+                                                   RhsVar->getVarIdx());
+  auto *Or = Builder.createExpr<BinaryInstruction>(
+      OP_or, &Builder.Context.I64Type, Lhs, Rhs);
+  auto *Xor = Builder.createExpr<BinaryInstruction>(
+      OP_xor, &Builder.Context.I64Type,
+      Builder.createExpr<DreadInstruction>(&Builder.Context.I64Type,
+                                           LhsVar->getVarIdx()),
+      Builder.createExpr<DreadInstruction>(&Builder.Context.I64Type,
+                                           RhsVar->getVarIdx()));
+  auto *Root = Builder.createExpr<BinaryInstruction>(
+      OP_xor, &Builder.Context.I64Type, Or, Xor);
+
+  MInstruction *Rewritten = rewriteReturnedValue(Builder, Root);
+  expectBinaryOperandsMatch(Rewritten, OP_and, Lhs, Rhs);
+}
+
+TEST(DMirValidation, FuzzesAdcWithoutCarryRewrite) {
+  expectBinaryI64RewriteEquivalent(
+      getInterestingBinaryInputCases(),
+      [](DMirTestBuilder &Builder, MInstruction *Lhs, MInstruction *Rhs) {
+        return Builder.createExpr<AdcInstruction>(
+            &Builder.Context.I64Type, Lhs, Rhs, Builder.createConstI64(0));
+      },
+      [](DMirTestBuilder &Builder, MInstruction *Lhs, MInstruction *Rhs) {
+        return Builder.createExpr<BinaryInstruction>(
+            OP_add, &Builder.Context.I64Type, Lhs, Rhs);
+      });
+}
+
+TEST(DMirValidation, FuzzesAdcZeroOperandsRewrite) {
+  expectUnaryI64RewriteEquivalent(
+      getInterestingU64Values(),
+      [](DMirTestBuilder &Builder, MInstruction *Input) {
+        return Builder.createExpr<AdcInstruction>(
+            &Builder.Context.I64Type, Input, Builder.createConstI64(0),
+            Builder.createConstI64(0));
+      },
+      [](DMirTestBuilder &, MInstruction *Input) { return Input; });
+}
+
+TEST(DMirValidation, FuzzesSbbWithoutBorrowRewrite) {
+  expectBinaryI64RewriteEquivalent(
+      getInterestingBinaryInputCases(),
+      [](DMirTestBuilder &Builder, MInstruction *Lhs, MInstruction *Rhs) {
+        return Builder.createExpr<SbbInstruction>(
+            &Builder.Context.I64Type, Lhs, Rhs, Builder.createConstI64(0));
+      },
+      [](DMirTestBuilder &Builder, MInstruction *Lhs, MInstruction *Rhs) {
+        return Builder.createExpr<BinaryInstruction>(
+            OP_sub, &Builder.Context.I64Type, Lhs, Rhs);
+      });
+}
+
+TEST(DMirValidation, FuzzesSbbZeroOperandsRewrite) {
+  expectUnaryI64RewriteEquivalent(
+      getInterestingU64Values(),
+      [](DMirTestBuilder &Builder, MInstruction *Input) {
+        return Builder.createExpr<SbbInstruction>(
+            &Builder.Context.I64Type, Input, Builder.createConstI64(0),
+            Builder.createConstI64(0));
+      },
+      [](DMirTestBuilder &, MInstruction *Input) { return Input; });
+}
+
+TEST(DMirValidation, FuzzesXorZeroRewrite) {
+  expectUnaryI64RewriteEquivalent(
+      getInterestingU64Values(),
+      [](DMirTestBuilder &Builder, MInstruction *Input) {
+        return Builder.createExpr<BinaryInstruction>(
+            OP_xor, &Builder.Context.I64Type, Input, Builder.createConstI64(0));
+      },
+      [](DMirTestBuilder &, MInstruction *Input) { return Input; });
+}
+
+TEST(DMirValidation, FuzzesXorSelfRewrite) {
+  expectUnaryI64RewriteEquivalent(
+      getInterestingU64Values(),
+      [](DMirTestBuilder &Builder, MInstruction *Input) {
+        return Builder.createExpr<BinaryInstruction>(
+            OP_xor, &Builder.Context.I64Type, Input, Input);
+      },
+      [](DMirTestBuilder &Builder, MInstruction *) {
+        return Builder.createConstI64(0);
+      });
+}
+
+TEST(DMirValidation, FuzzesXorCancelRewrite) {
+  expectBinaryI64RewriteEquivalent(
+      getInterestingBinaryInputCases(),
+      [](DMirTestBuilder &Builder, MInstruction *Lhs, MInstruction *Rhs) {
+        auto *Inner = Builder.createExpr<BinaryInstruction>(
+            OP_xor, &Builder.Context.I64Type, Lhs, Rhs);
+        return Builder.createExpr<BinaryInstruction>(
+            OP_xor, &Builder.Context.I64Type, Inner, Lhs);
+      },
+      [](DMirTestBuilder &, MInstruction *, MInstruction *Rhs) { return Rhs; });
+}
+
+TEST(DMirValidation, FuzzesXorCancelRhsRewrite) {
+  expectBinaryI64RewriteEquivalent(
+      getInterestingBinaryInputCases(),
+      [](DMirTestBuilder &Builder, MInstruction *Lhs, MInstruction *Rhs) {
+        auto *Inner = Builder.createExpr<BinaryInstruction>(
+            OP_xor, &Builder.Context.I64Type, Lhs, Rhs);
+        return Builder.createExpr<BinaryInstruction>(
+            OP_xor, &Builder.Context.I64Type, Inner, Rhs);
+      },
+      [](DMirTestBuilder &, MInstruction *Lhs, MInstruction *) { return Lhs; });
+}
+
+TEST(DMirValidation, FuzzesXorNotCancelRewrite) {
+  expectBinaryI64RewriteEquivalent(
+      getInterestingBinaryInputCases(),
+      [](DMirTestBuilder &Builder, MInstruction *Lhs, MInstruction *Rhs) {
+        auto *NotLhs =
+            Builder.createExpr<NotInstruction>(&Builder.Context.I64Type, Lhs);
+        auto *Xor = Builder.createExpr<BinaryInstruction>(
+            OP_xor, &Builder.Context.I64Type, Lhs, Rhs);
+        return Builder.createExpr<BinaryInstruction>(
+            OP_xor, &Builder.Context.I64Type, NotLhs, Xor);
+      },
+      [](DMirTestBuilder &Builder, MInstruction *, MInstruction *Rhs) {
+        return Builder.createExpr<NotInstruction>(&Builder.Context.I64Type,
+                                                  Rhs);
+      });
+}
+
+TEST(DMirValidation, FuzzesXorNotSelfRewrite) {
+  expectUnaryI64RewriteEquivalent(
+      getInterestingU64Values(),
+      [](DMirTestBuilder &Builder, MInstruction *Input) {
+        auto *NotInput =
+            Builder.createExpr<NotInstruction>(&Builder.Context.I64Type, Input);
+        return Builder.createExpr<BinaryInstruction>(
+            OP_xor, &Builder.Context.I64Type, NotInput, Input);
+      },
+      [](DMirTestBuilder &Builder, MInstruction *) {
+        return Builder.createConstI64(0xffffffffffffffffULL);
+      });
+}
+
+TEST(DMirValidation, FuzzesXorNotNotRewrite) {
+  expectBinaryI64RewriteEquivalent(
+      getInterestingBinaryInputCases(),
+      [](DMirTestBuilder &Builder, MInstruction *Lhs, MInstruction *Rhs) {
+        auto *NotLhs =
+            Builder.createExpr<NotInstruction>(&Builder.Context.I64Type, Lhs);
+        auto *NotRhs =
+            Builder.createExpr<NotInstruction>(&Builder.Context.I64Type, Rhs);
+        return Builder.createExpr<BinaryInstruction>(
+            OP_xor, &Builder.Context.I64Type, NotLhs, NotRhs);
+      },
+      [](DMirTestBuilder &Builder, MInstruction *Lhs, MInstruction *Rhs) {
+        return Builder.createExpr<BinaryInstruction>(
+            OP_xor, &Builder.Context.I64Type, Lhs, Rhs);
+      });
+}
+
+TEST(DMirValidation, FuzzesXorNotOrRewrite) {
+  expectBinaryI64RewriteEquivalent(
+      getInterestingBinaryInputCases(),
+      [](DMirTestBuilder &Builder, MInstruction *Lhs, MInstruction *Rhs) {
+        auto *NotLhs =
+            Builder.createExpr<NotInstruction>(&Builder.Context.I64Type, Lhs);
+        auto *Or = Builder.createExpr<BinaryInstruction>(
+            OP_or, &Builder.Context.I64Type, Lhs, Rhs);
+        return Builder.createExpr<BinaryInstruction>(
+            OP_xor, &Builder.Context.I64Type, NotLhs, Or);
+      },
+      [](DMirTestBuilder &Builder, MInstruction *Lhs, MInstruction *Rhs) {
+        auto *NotRhs =
+            Builder.createExpr<NotInstruction>(&Builder.Context.I64Type, Rhs);
+        return Builder.createExpr<BinaryInstruction>(
+            OP_or, &Builder.Context.I64Type, NotRhs, Lhs);
+      });
+}
+
+TEST(DMirValidation, FuzzesXorNotAllOnesRewrite) {
+  expectUnaryI64RewriteEquivalent(
+      getInterestingU64Values(),
+      [](DMirTestBuilder &Builder, MInstruction *Input) {
+        auto *NotInput =
+            Builder.createExpr<NotInstruction>(&Builder.Context.I64Type, Input);
+        return Builder.createExpr<BinaryInstruction>(
+            OP_xor, &Builder.Context.I64Type, NotInput,
+            Builder.createConstI64(0xffffffffffffffffULL));
+      },
+      [](DMirTestBuilder &, MInstruction *Input) { return Input; });
+}
+
+TEST(DMirValidation, FuzzesXorAndOrRewrite) {
+  expectBinaryI64RewriteEquivalent(
+      getInterestingBinaryInputCases(),
+      [](DMirTestBuilder &Builder, MInstruction *Lhs, MInstruction *Rhs) {
+        auto *And = Builder.createExpr<BinaryInstruction>(
+            OP_and, &Builder.Context.I64Type, Lhs, Rhs);
+        auto *Or = Builder.createExpr<BinaryInstruction>(
+            OP_or, &Builder.Context.I64Type, Lhs, Rhs);
+        return Builder.createExpr<BinaryInstruction>(
+            OP_xor, &Builder.Context.I64Type, And, Or);
+      },
+      [](DMirTestBuilder &Builder, MInstruction *Lhs, MInstruction *Rhs) {
+        return Builder.createExpr<BinaryInstruction>(
+            OP_xor, &Builder.Context.I64Type, Lhs, Rhs);
+      });
+}
+
+TEST(DMirValidation, FuzzesXorAndNotLhsRewrite) {
+  expectBinaryI64RewriteEquivalent(
+      getInterestingBinaryInputCases(),
+      [](DMirTestBuilder &Builder, MInstruction *Lhs, MInstruction *Rhs) {
+        auto *And = Builder.createExpr<BinaryInstruction>(
+            OP_and, &Builder.Context.I64Type, Lhs, Rhs);
+        auto *NotLhs =
+            Builder.createExpr<NotInstruction>(&Builder.Context.I64Type, Lhs);
+        return Builder.createExpr<BinaryInstruction>(
+            OP_xor, &Builder.Context.I64Type, And, NotLhs);
+      },
+      [](DMirTestBuilder &Builder, MInstruction *Lhs, MInstruction *Rhs) {
+        auto *NotLhs =
+            Builder.createExpr<NotInstruction>(&Builder.Context.I64Type, Lhs);
+        return Builder.createExpr<BinaryInstruction>(
+            OP_or, &Builder.Context.I64Type, NotLhs, Rhs);
+      });
+}
+
+TEST(DMirValidation, FuzzesXorAndNotRhsRewrite) {
+  expectBinaryI64RewriteEquivalent(
+      getInterestingBinaryInputCases(),
+      [](DMirTestBuilder &Builder, MInstruction *Lhs, MInstruction *Rhs) {
+        auto *And = Builder.createExpr<BinaryInstruction>(
+            OP_and, &Builder.Context.I64Type, Lhs, Rhs);
+        auto *NotRhs =
+            Builder.createExpr<NotInstruction>(&Builder.Context.I64Type, Rhs);
+        return Builder.createExpr<BinaryInstruction>(
+            OP_xor, &Builder.Context.I64Type, And, NotRhs);
+      },
+      [](DMirTestBuilder &Builder, MInstruction *Lhs, MInstruction *Rhs) {
+        auto *NotRhs =
+            Builder.createExpr<NotInstruction>(&Builder.Context.I64Type, Rhs);
+        return Builder.createExpr<BinaryInstruction>(
+            OP_or, &Builder.Context.I64Type, NotRhs, Lhs);
+      });
+}
+
+TEST(DMirValidation, FuzzesXorAndXorRewrite) {
+  expectBinaryI64RewriteEquivalent(
+      getInterestingBinaryInputCases(),
+      [](DMirTestBuilder &Builder, MInstruction *Lhs, MInstruction *Rhs) {
+        auto *And = Builder.createExpr<BinaryInstruction>(
+            OP_and, &Builder.Context.I64Type, Lhs, Rhs);
+        auto *Xor = Builder.createExpr<BinaryInstruction>(
+            OP_xor, &Builder.Context.I64Type, Lhs, Rhs);
+        return Builder.createExpr<BinaryInstruction>(
+            OP_xor, &Builder.Context.I64Type, And, Xor);
+      },
+      [](DMirTestBuilder &Builder, MInstruction *Lhs, MInstruction *Rhs) {
+        return Builder.createExpr<BinaryInstruction>(
+            OP_or, &Builder.Context.I64Type, Lhs, Rhs);
+      });
+}
+
+TEST(DMirValidation, FuzzesXorOrXorRewrite) {
+  expectBinaryI64RewriteEquivalent(
+      getInterestingBinaryInputCases(),
+      [](DMirTestBuilder &Builder, MInstruction *Lhs, MInstruction *Rhs) {
+        auto *Or = Builder.createExpr<BinaryInstruction>(
+            OP_or, &Builder.Context.I64Type, Lhs, Rhs);
+        auto *Xor = Builder.createExpr<BinaryInstruction>(
+            OP_xor, &Builder.Context.I64Type, Lhs, Rhs);
+        return Builder.createExpr<BinaryInstruction>(
+            OP_xor, &Builder.Context.I64Type, Or, Xor);
+      },
+      [](DMirTestBuilder &Builder, MInstruction *Lhs, MInstruction *Rhs) {
+        return Builder.createExpr<BinaryInstruction>(
+            OP_and, &Builder.Context.I64Type, Lhs, Rhs);
+      });
+}
+
+TEST(DMirValidation, FuzzesSubSelfRewrite) {
+  expectUnaryI64RewriteEquivalent(
+      getInterestingU64Values(),
+      [](DMirTestBuilder &Builder, MInstruction *Input) {
+        return Builder.createExpr<BinaryInstruction>(
+            OP_sub, &Builder.Context.I64Type, Input, Input);
+      },
+      [](DMirTestBuilder &Builder, MInstruction *) {
+        return Builder.createConstI64(0);
+      });
+}
+
+TEST(DMirValidation, FuzzesShlZeroRewrite) {
+  expectUnaryI64RewriteEquivalent(
+      getInterestingU64Values(),
+      [](DMirTestBuilder &Builder, MInstruction *Input) {
+        return Builder.createExpr<BinaryInstruction>(
+            OP_shl, &Builder.Context.I64Type, Input, Builder.createConstI64(0));
+      },
+      [](DMirTestBuilder &, MInstruction *Input) { return Input; });
+}
+
+TEST(DMirValidation, FuzzesSshrZeroRewrite) {
+  expectUnaryI64RewriteEquivalent(
+      getInterestingU64Values(),
+      [](DMirTestBuilder &Builder, MInstruction *Input) {
+        return Builder.createExpr<BinaryInstruction>(
+            OP_sshr, &Builder.Context.I64Type, Input,
+            Builder.createConstI64(0));
+      },
+      [](DMirTestBuilder &, MInstruction *Input) { return Input; });
+}
+
+TEST(DMirValidation, FuzzesUshrZeroRewrite) {
+  expectUnaryI64RewriteEquivalent(
+      getInterestingU64Values(),
+      [](DMirTestBuilder &Builder, MInstruction *Input) {
+        return Builder.createExpr<BinaryInstruction>(
+            OP_ushr, &Builder.Context.I64Type, Input,
+            Builder.createConstI64(0));
+      },
+      [](DMirTestBuilder &, MInstruction *Input) { return Input; });
+}
+
+TEST(DMirValidation, FuzzesSbbSelfWithoutBorrowRewrite) {
+  expectUnaryI64RewriteEquivalent(
+      getInterestingU64Values(),
+      [](DMirTestBuilder &Builder, MInstruction *Input) {
+        return Builder.createExpr<SbbInstruction>(
+            &Builder.Context.I64Type, Input, Input, Builder.createConstI64(0));
+      },
+      [](DMirTestBuilder &Builder, MInstruction *) {
+        return Builder.createConstI64(0);
+      });
+}
+
+TEST(DMirValidation, EvaluatesAdcAndSbbLimbChains) {
+  DMirTestBuilder Builder;
+  std::array<uint64_t, 4> LhsLimbs = {
+      0xffffffffffffffffULL,
+      0x0000000000000000ULL,
+      0x1234567890abcdefULL,
+      0x0fedcba987654321ULL,
+  };
+  std::array<uint64_t, 4> RhsLimbs = {
+      0x0000000000000002ULL,
+      0xffffffffffffffffULL,
+      0x1111111111111111ULL,
+      0x2222222222222222ULL,
+  };
+
+  std::array<MInstruction *, 4> Sum = {};
+  std::array<MInstruction *, 4> Diff = {};
+  MInstruction *Carry = Builder.createConstI64(0);
+  MInstruction *Borrow = Builder.createConstI64(0);
+  for (size_t I = 0; I < LhsLimbs.size(); ++I) {
+    auto *Lhs = Builder.createConstI64(LhsLimbs[I]);
+    auto *Rhs = Builder.createConstI64(RhsLimbs[I]);
+    if (I == 0) {
+      Sum[I] = Builder.createExpr<BinaryInstruction>(
+          OP_add, &Builder.Context.I64Type, Lhs, Rhs);
+      Diff[I] = Builder.createExpr<BinaryInstruction>(
+          OP_sub, &Builder.Context.I64Type, Lhs, Rhs);
+    } else {
+      Sum[I] = Builder.createExpr<AdcInstruction>(&Builder.Context.I64Type, Lhs,
+                                                  Rhs, Carry);
+      Diff[I] = Builder.createExpr<SbbInstruction>(&Builder.Context.I64Type,
+                                                   Lhs, Rhs, Borrow);
+    }
+    auto *CarryInNonZero = Builder.createExpr<CmpInstruction>(
+        CmpInstruction::ICMP_NE, &Builder.Context.I64Type, Carry,
+        Builder.createConstI64(0));
+    auto *BorrowInNonZero = Builder.createExpr<CmpInstruction>(
+        CmpInstruction::ICMP_NE, &Builder.Context.I64Type, Borrow,
+        Builder.createConstI64(0));
+    auto *SumCmp = Builder.createExpr<CmpInstruction>(
+        CmpInstruction::ICMP_ULT, &Builder.Context.I64Type, Sum[I], Lhs);
+    auto *SumEq = Builder.createExpr<CmpInstruction>(
+        CmpInstruction::ICMP_EQ, &Builder.Context.I64Type, Sum[I], Lhs);
+    auto *CarryInOverflow = Builder.createExpr<BinaryInstruction>(
+        OP_and, &Builder.Context.I64Type, CarryInNonZero, SumEq);
+    auto *DiffCmp = Builder.createExpr<CmpInstruction>(
+        CmpInstruction::ICMP_UGT, &Builder.Context.I64Type, Diff[I], Lhs);
+    auto *DiffEq = Builder.createExpr<CmpInstruction>(
+        CmpInstruction::ICMP_EQ, &Builder.Context.I64Type, Diff[I], Lhs);
+    auto *BorrowInOverflow = Builder.createExpr<BinaryInstruction>(
+        OP_and, &Builder.Context.I64Type, BorrowInNonZero, DiffEq);
+    Carry = Builder.createExpr<BinaryInstruction>(
+        OP_or, &Builder.Context.I64Type, SumCmp, CarryInOverflow);
+    Borrow = Builder.createExpr<BinaryInstruction>(
+        OP_or, &Builder.Context.I64Type, DiffCmp, BorrowInOverflow);
+  }
+
+  DMirFragmentInterpreter Interpreter;
+  std::array<uint64_t, 4> SumLimbs = {};
+  std::array<uint64_t, 4> DiffLimbs = {};
+  for (size_t I = 0; I < Sum.size(); ++I) {
+    SumLimbs[I] = Interpreter.evaluate(Sum[I]).getZExtValue();
+    DiffLimbs[I] = Interpreter.evaluate(Diff[I]).getZExtValue();
+  }
+
+  const intx::uint256 ExpectedSum =
+      composeU256(LhsLimbs) + composeU256(RhsLimbs);
+  const intx::uint256 ExpectedDiff =
+      composeU256(LhsLimbs) - composeU256(RhsLimbs);
+  EXPECT_EQ(composeU256(SumLimbs), ExpectedSum);
+  EXPECT_EQ(composeU256(DiffLimbs), ExpectedDiff);
+}
+
+TEST(DMirValidation, EvaluatesEvm128Helpers) {
+  DMirTestBuilder Builder;
+  auto *MulLhs = Builder.createConstI64(0xffffffffffffffffULL);
+  auto *MulRhs = Builder.createConstI64(3ULL);
+  auto *MulLo = Builder.createExpr<EvmUmul128Instruction>(
+      OP_evm_umul128_lo, &Builder.Context.I64Type, MulLhs, MulRhs);
+  auto *MulHi = Builder.createExpr<EvmUmul128HiInstruction>(
+      &Builder.Context.I64Type, MulLo);
+
+  auto *DividendHi = Builder.createConstI64(1ULL);
+  auto *DividendLo = Builder.createConstI64(0ULL);
+  auto *Divisor = Builder.createConstI64(3ULL);
+  auto *Quotient = Builder.createExpr<EvmUdiv128By64Instruction>(
+      OP_evm_udiv128_by64, &Builder.Context.I64Type, DividendHi, DividendLo,
+      Divisor);
+  auto *Remainder = Builder.createExpr<EvmUrem128By64Instruction>(
+      &Builder.Context.I64Type, Quotient);
+
+  DMirFragmentInterpreter Interpreter;
+  EXPECT_EQ(Interpreter.evaluate(MulLo).getZExtValue(), 0xfffffffffffffffdULL);
+  EXPECT_EQ(Interpreter.evaluate(MulHi).getZExtValue(), 2ULL);
+  EXPECT_EQ(Interpreter.evaluate(Quotient).getZExtValue(),
+            0x5555555555555555ULL);
+  EXPECT_EQ(Interpreter.evaluate(Remainder).getZExtValue(), 1ULL);
+}
+
+TEST(DMirValidation, FuzzesEvm128HelpersAgainstHostArithmetic) {
+  const auto Values = getInterestingU64Values();
+  for (uint64_t Lhs : Values) {
+    for (uint64_t Rhs : Values) {
+      DMirTestBuilder Builder;
+      auto *MulLhs = Builder.createConstI64(Lhs);
+      auto *MulRhs = Builder.createConstI64(Rhs);
+      auto *MulLo = Builder.createExpr<EvmUmul128Instruction>(
+          OP_evm_umul128_lo, &Builder.Context.I64Type, MulLhs, MulRhs);
+      auto *MulHi = Builder.createExpr<EvmUmul128HiInstruction>(
+          &Builder.Context.I64Type, MulLo);
+
+      const unsigned __int128 Product = static_cast<unsigned __int128>(Lhs) *
+                                        static_cast<unsigned __int128>(Rhs);
+      DMirFragmentInterpreter Interpreter;
+      EXPECT_EQ(Interpreter.evaluate(MulLo).getZExtValue(),
+                static_cast<uint64_t>(Product))
+          << "lhs=" << Lhs << " rhs=" << Rhs;
+      EXPECT_EQ(Interpreter.evaluate(MulHi).getZExtValue(),
+                static_cast<uint64_t>(Product >> 64))
+          << "lhs=" << Lhs << " rhs=" << Rhs;
+    }
+  }
+
+  for (const auto &InputCase : getInterestingTernaryInputCases()) {
+    if (InputCase.Third == 0) {
+      continue;
+    }
+    DMirTestBuilder Builder;
+    auto *Quotient = Builder.createExpr<EvmUdiv128By64Instruction>(
+        OP_evm_udiv128_by64, &Builder.Context.I64Type,
+        Builder.createConstI64(InputCase.First),
+        Builder.createConstI64(InputCase.Second),
+        Builder.createConstI64(InputCase.Third));
+    auto *Remainder = Builder.createExpr<EvmUrem128By64Instruction>(
+        &Builder.Context.I64Type, Quotient);
+
+    const unsigned __int128 Dividend =
+        (static_cast<unsigned __int128>(InputCase.First) << 64) |
+        InputCase.Second;
+    DMirFragmentInterpreter Interpreter;
+    EXPECT_EQ(Interpreter.evaluate(Quotient).getZExtValue(),
+              static_cast<uint64_t>(Dividend / InputCase.Third))
+        << "hi=" << InputCase.First << " lo=" << InputCase.Second
+        << " divisor=" << InputCase.Third;
+    EXPECT_EQ(Interpreter.evaluate(Remainder).getZExtValue(),
+              static_cast<uint64_t>(Dividend % InputCase.Third))
+        << "hi=" << InputCase.First << " lo=" << InputCase.Second
+        << " divisor=" << InputCase.Third;
+  }
+}
+
+TEST(DMirValidation, FuzzesMulZeroRewrite) {
+  // (mul x 0) -> 0
+  expectUnaryI64RewriteEquivalent(
+      getInterestingU64Values(),
+      [](DMirTestBuilder &Builder, MInstruction *Input) {
+        return Builder.createExpr<BinaryInstruction>(
+            OP_mul, &Builder.Context.I64Type, Input, Builder.createConstI64(0));
+      },
+      [](DMirTestBuilder &Builder, MInstruction *) {
+        return Builder.createConstI64(0);
+      });
+  // (mul 0 x) -> 0
+  expectUnaryI64RewriteEquivalent(
+      getInterestingU64Values(),
+      [](DMirTestBuilder &Builder, MInstruction *Input) {
+        return Builder.createExpr<BinaryInstruction>(
+            OP_mul, &Builder.Context.I64Type, Builder.createConstI64(0), Input);
+      },
+      [](DMirTestBuilder &Builder, MInstruction *) {
+        return Builder.createConstI64(0);
+      });
+}
+
+TEST(DMirValidation, FuzzesMulOneRewrite) {
+  // (mul x 1) -> x
+  expectUnaryI64RewriteEquivalent(
+      getInterestingU64Values(),
+      [](DMirTestBuilder &Builder, MInstruction *Input) {
+        return Builder.createExpr<BinaryInstruction>(
+            OP_mul, &Builder.Context.I64Type, Input, Builder.createConstI64(1));
+      },
+      [](DMirTestBuilder &, MInstruction *Input) { return Input; });
+  // (mul 1 x) -> x
+  expectUnaryI64RewriteEquivalent(
+      getInterestingU64Values(),
+      [](DMirTestBuilder &Builder, MInstruction *Input) {
+        return Builder.createExpr<BinaryInstruction>(
+            OP_mul, &Builder.Context.I64Type, Builder.createConstI64(1), Input);
+      },
+      [](DMirTestBuilder &, MInstruction *Input) { return Input; });
+}
+
+TEST(DMirValidation, FuzzesAddSelfToShl1Rewrite) {
+  // (add x x) -> (shl x 1)
+  expectUnaryI64RewriteEquivalent(
+      getInterestingU64Values(),
+      [](DMirTestBuilder &Builder, MInstruction *Input) {
+        return Builder.createExpr<BinaryInstruction>(
+            OP_add, &Builder.Context.I64Type, Input, Input);
+      },
+      [](DMirTestBuilder &Builder, MInstruction *Input) {
+        return Builder.createExpr<BinaryInstruction>(
+            OP_shl, &Builder.Context.I64Type, Input, Builder.createConstI64(1));
+      });
+}
+
+TEST(DMirValidation, FuzzesAddNegToSubRewrite) {
+  // (add (sub 0 x) y) -> (sub y x)
+  expectBinaryI64RewriteEquivalent(
+      getInterestingBinaryInputCases(),
+      [](DMirTestBuilder &Builder, MInstruction *X, MInstruction *Y) {
+        auto *NegX = Builder.createExpr<BinaryInstruction>(
+            OP_sub, &Builder.Context.I64Type, Builder.createConstI64(0), X);
+        return Builder.createExpr<BinaryInstruction>(
+            OP_add, &Builder.Context.I64Type, NegX, Y);
+      },
+      [](DMirTestBuilder &Builder, MInstruction *X, MInstruction *Y) {
+        return Builder.createExpr<BinaryInstruction>(
+            OP_sub, &Builder.Context.I64Type, Y, X);
+      });
+  // (add y (sub 0 x)) -> (sub y x)
+  expectBinaryI64RewriteEquivalent(
+      getInterestingBinaryInputCases(),
+      [](DMirTestBuilder &Builder, MInstruction *X, MInstruction *Y) {
+        auto *NegX = Builder.createExpr<BinaryInstruction>(
+            OP_sub, &Builder.Context.I64Type, Builder.createConstI64(0), X);
+        return Builder.createExpr<BinaryInstruction>(
+            OP_add, &Builder.Context.I64Type, Y, NegX);
+      },
+      [](DMirTestBuilder &Builder, MInstruction *X, MInstruction *Y) {
+        return Builder.createExpr<BinaryInstruction>(
+            OP_sub, &Builder.Context.I64Type, Y, X);
+      });
+}
+
+TEST(DMirValidation, FuzzesAddAndXorToOrRewrite) {
+  // (add (and x y) (xor x y)) -> (or x y)
+  expectBinaryI64RewriteEquivalent(
+      getInterestingBinaryInputCases(),
+      [](DMirTestBuilder &Builder, MInstruction *X, MInstruction *Y) {
+        auto *And = Builder.createExpr<BinaryInstruction>(
+            OP_and, &Builder.Context.I64Type, X, Y);
+        auto *Xor = Builder.createExpr<BinaryInstruction>(
+            OP_xor, &Builder.Context.I64Type, X, Y);
+        return Builder.createExpr<BinaryInstruction>(
+            OP_add, &Builder.Context.I64Type, And, Xor);
+      },
+      [](DMirTestBuilder &Builder, MInstruction *X, MInstruction *Y) {
+        return Builder.createExpr<BinaryInstruction>(
+            OP_or, &Builder.Context.I64Type, X, Y);
+      });
+}
+
+TEST(DMirValidation, FuzzesAddAndOrToAddRewrite) {
+  // (add (and x y) (or x y)) -> (add x y)
+  expectBinaryI64RewriteEquivalent(
+      getInterestingBinaryInputCases(),
+      [](DMirTestBuilder &Builder, MInstruction *X, MInstruction *Y) {
+        auto *And = Builder.createExpr<BinaryInstruction>(
+            OP_and, &Builder.Context.I64Type, X, Y);
+        auto *Or = Builder.createExpr<BinaryInstruction>(
+            OP_or, &Builder.Context.I64Type, X, Y);
+        return Builder.createExpr<BinaryInstruction>(
+            OP_add, &Builder.Context.I64Type, And, Or);
+      },
+      [](DMirTestBuilder &Builder, MInstruction *X, MInstruction *Y) {
+        return Builder.createExpr<BinaryInstruction>(
+            OP_add, &Builder.Context.I64Type, X, Y);
+      });
+}
+
+TEST(DMirValidation, FuzzesSubAndOrToNegXorRewrite) {
+  // (sub (and x y) (or x y)) -> (sub 0 (xor x y))
+  expectBinaryI64RewriteEquivalent(
+      getInterestingBinaryInputCases(),
+      [](DMirTestBuilder &Builder, MInstruction *X, MInstruction *Y) {
+        auto *And = Builder.createExpr<BinaryInstruction>(
+            OP_and, &Builder.Context.I64Type, X, Y);
+        auto *Or = Builder.createExpr<BinaryInstruction>(
+            OP_or, &Builder.Context.I64Type, X, Y);
+        return Builder.createExpr<BinaryInstruction>(
+            OP_sub, &Builder.Context.I64Type, And, Or);
+      },
+      [](DMirTestBuilder &Builder, MInstruction *X, MInstruction *Y) {
+        auto *Xor = Builder.createExpr<BinaryInstruction>(
+            OP_xor, &Builder.Context.I64Type, X, Y);
+        return Builder.createExpr<BinaryInstruction>(
+            OP_sub, &Builder.Context.I64Type, Builder.createConstI64(0), Xor);
+      });
+}
+
+TEST(DMirValidation, FuzzesSubOrAndToXorRewrite) {
+  // (sub (or x y) (and x y)) -> (xor x y)
+  expectBinaryI64RewriteEquivalent(
+      getInterestingBinaryInputCases(),
+      [](DMirTestBuilder &Builder, MInstruction *X, MInstruction *Y) {
+        auto *Or = Builder.createExpr<BinaryInstruction>(
+            OP_or, &Builder.Context.I64Type, X, Y);
+        auto *And = Builder.createExpr<BinaryInstruction>(
+            OP_and, &Builder.Context.I64Type, X, Y);
+        return Builder.createExpr<BinaryInstruction>(
+            OP_sub, &Builder.Context.I64Type, Or, And);
+      },
+      [](DMirTestBuilder &Builder, MInstruction *X, MInstruction *Y) {
+        return Builder.createExpr<BinaryInstruction>(
+            OP_xor, &Builder.Context.I64Type, X, Y);
+      });
+}
+
+// Optimization 1: select(0, t, f) -> f and select(nonzero, t, f) -> t
+
+TEST(DMirValidation, FuzzesSelectFalseCondRewrite) {
+  // select(0, t, f) -> f
+  expectBinaryI64RewriteEquivalent(
+      getInterestingBinaryInputCases(),
+      [](DMirTestBuilder &Builder, MInstruction *TrueVal,
+         MInstruction *FalseVal) {
+        auto *Cond = Builder.createConstI64(0);
+        return Builder.createExpr<SelectInstruction>(&Builder.Context.I64Type,
+                                                     Cond, TrueVal, FalseVal);
+      },
+      [](DMirTestBuilder &, MInstruction *, MInstruction *FalseVal) {
+        return FalseVal;
+      });
+}
+
+TEST(DMirValidation, FuzzesSelectTrueCondRewrite) {
+  // select(nonzero, t, f) -> t
+  expectBinaryI64RewriteEquivalent(
+      getInterestingBinaryInputCases(),
+      [](DMirTestBuilder &Builder, MInstruction *TrueVal,
+         MInstruction *FalseVal) {
+        auto *Cond = Builder.createConstI64(1);
+        return Builder.createExpr<SelectInstruction>(&Builder.Context.I64Type,
+                                                     Cond, TrueVal, FalseVal);
+      },
+      [](DMirTestBuilder &, MInstruction *TrueVal, MInstruction *) {
+        return TrueVal;
+      });
+}
+
+TEST(DMirRewritePass, RewritesSelectFalseCondToFalseArm) {
+  // select(0, t, f) -> f
+  DMirTestBuilder Builder;
+  Variable *TrueVar = Builder.createVariable(&Builder.Context.I64Type);
+  Variable *FalseVar = Builder.createVariable(&Builder.Context.I64Type);
+  auto *TrueValue = Builder.createExpr<DreadInstruction>(
+      &Builder.Context.I64Type, TrueVar->getVarIdx());
+  auto *FalseValue = Builder.createExpr<DreadInstruction>(
+      &Builder.Context.I64Type, FalseVar->getVarIdx());
+  auto *Select = Builder.createExpr<SelectInstruction>(
+      &Builder.Context.I64Type, Builder.createConstI64(0), TrueValue,
+      FalseValue);
+
+  MInstruction *Rewritten = rewriteReturnedValue(Builder, Select);
+  EXPECT_EQ(Rewritten, FalseValue);
+}
+
+TEST(DMirRewritePass, RewritesSelectTrueCondToTrueArm) {
+  // select(1, t, f) -> t
+  DMirTestBuilder Builder;
+  Variable *TrueVar = Builder.createVariable(&Builder.Context.I64Type);
+  Variable *FalseVar = Builder.createVariable(&Builder.Context.I64Type);
+  auto *TrueValue = Builder.createExpr<DreadInstruction>(
+      &Builder.Context.I64Type, TrueVar->getVarIdx());
+  auto *FalseValue = Builder.createExpr<DreadInstruction>(
+      &Builder.Context.I64Type, FalseVar->getVarIdx());
+  auto *Select = Builder.createExpr<SelectInstruction>(
+      &Builder.Context.I64Type, Builder.createConstI64(1), TrueValue,
+      FalseValue);
+
+  MInstruction *Rewritten = rewriteReturnedValue(Builder, Select);
+  EXPECT_EQ(Rewritten, TrueValue);
+}
+
+// Optimization 2: mul(x, 2^k) -> shl(x, k)
+
+TEST(DMirValidation, FuzzesMulPow2ToShlRewrite) {
+  // mul(x, 2) -> shl(x, 1)
+  expectUnaryI64RewriteEquivalent(
+      getInterestingU64Values(),
+      [](DMirTestBuilder &Builder, MInstruction *Input) {
+        return Builder.createExpr<BinaryInstruction>(
+            OP_mul, &Builder.Context.I64Type, Input, Builder.createConstI64(2));
+      },
+      [](DMirTestBuilder &Builder, MInstruction *Input) {
+        return Builder.createExpr<BinaryInstruction>(
+            OP_shl, &Builder.Context.I64Type, Input, Builder.createConstI64(1));
+      });
+}
+
+TEST(DMirRewritePass, RewritesMulBy2ToShl1) {
+  DMirTestBuilder Builder;
+  Variable *InputVar = Builder.createVariable(&Builder.Context.I64Type);
+  auto *Input = Builder.createExpr<DreadInstruction>(&Builder.Context.I64Type,
+                                                     InputVar->getVarIdx());
+  auto *Mul = Builder.createExpr<BinaryInstruction>(
+      OP_mul, &Builder.Context.I64Type, Input, Builder.createConstI64(2));
+
+  MInstruction *Rewritten = rewriteReturnedValue(Builder, Mul);
+  ASSERT_EQ(Rewritten->getOpcode(), OP_shl);
+  auto *Shl = llvm::cast<BinaryInstruction>(Rewritten);
+  EXPECT_EQ(Shl->getOperand<0>(), Input);
+  ASSERT_EQ(Shl->getOperand<1>()->getOpcode(), OP_const);
+  EXPECT_EQ(
+      llvm::cast<MConstantInt>(
+          &llvm::cast<ConstantInstruction>(Shl->getOperand<1>())->getConstant())
+          ->getValue()
+          .getZExtValue(),
+      1ULL);
+}
+
+TEST(DMirRewritePass, RewritesMulBy4ToShl2) {
+  DMirTestBuilder Builder;
+  Variable *InputVar = Builder.createVariable(&Builder.Context.I64Type);
+  auto *Input = Builder.createExpr<DreadInstruction>(&Builder.Context.I64Type,
+                                                     InputVar->getVarIdx());
+  auto *Mul = Builder.createExpr<BinaryInstruction>(
+      OP_mul, &Builder.Context.I64Type, Input, Builder.createConstI64(4));
+
+  MInstruction *Rewritten = rewriteReturnedValue(Builder, Mul);
+  ASSERT_EQ(Rewritten->getOpcode(), OP_shl);
+  auto *Shl = llvm::cast<BinaryInstruction>(Rewritten);
+  EXPECT_EQ(Shl->getOperand<0>(), Input);
+  EXPECT_EQ(
+      llvm::cast<MConstantInt>(
+          &llvm::cast<ConstantInstruction>(Shl->getOperand<1>())->getConstant())
+          ->getValue()
+          .getZExtValue(),
+      2ULL);
+}
+
+TEST(DMirRewritePass, RewritesMulBy8ToShl3) {
+  DMirTestBuilder Builder;
+  Variable *InputVar = Builder.createVariable(&Builder.Context.I64Type);
+  auto *Input = Builder.createExpr<DreadInstruction>(&Builder.Context.I64Type,
+                                                     InputVar->getVarIdx());
+  auto *Mul = Builder.createExpr<BinaryInstruction>(
+      OP_mul, &Builder.Context.I64Type, Input, Builder.createConstI64(8));
+
+  MInstruction *Rewritten = rewriteReturnedValue(Builder, Mul);
+  ASSERT_EQ(Rewritten->getOpcode(), OP_shl);
+  EXPECT_EQ(llvm::cast<MConstantInt>(
+                &llvm::cast<ConstantInstruction>(
+                     llvm::cast<BinaryInstruction>(Rewritten)->getOperand<1>())
+                     ->getConstant())
+                ->getValue()
+                .getZExtValue(),
+            3ULL);
+}
+
+TEST(DMirRewritePass, DoesNotRewriteMulBy3) {
+  // mul(x, 3) should not be rewritten (not a power of two)
+  DMirTestBuilder Builder;
+  Variable *InputVar = Builder.createVariable(&Builder.Context.I64Type);
+  auto *Input = Builder.createExpr<DreadInstruction>(&Builder.Context.I64Type,
+                                                     InputVar->getVarIdx());
+  auto *Mul = Builder.createExpr<BinaryInstruction>(
+      OP_mul, &Builder.Context.I64Type, Input, Builder.createConstI64(3));
+
+  MInstruction *Rewritten = rewriteReturnedValue(Builder, Mul);
+  EXPECT_EQ(Rewritten->getOpcode(), OP_mul);
+}
+
+// Optimization 3: isCarryDead recognizes zext(icmp_ult(x, 0))
+
+TEST(DMirRewritePass, RewritesSbbWithZextIcmpUltZeroBorrowToSub) {
+  // sbb(x, y, zext(icmp_ult(z, 0))) -> sub(x, y) since borrow is always dead
+  DMirTestBuilder Builder;
+  Variable *XVar = Builder.createVariable(&Builder.Context.I64Type);
+  Variable *YVar = Builder.createVariable(&Builder.Context.I64Type);
+  Variable *ZVar = Builder.createVariable(&Builder.Context.I64Type);
+  auto *X = Builder.createExpr<DreadInstruction>(&Builder.Context.I64Type,
+                                                 XVar->getVarIdx());
+  auto *Y = Builder.createExpr<DreadInstruction>(&Builder.Context.I64Type,
+                                                 YVar->getVarIdx());
+  auto *Z = Builder.createExpr<DreadInstruction>(&Builder.Context.I64Type,
+                                                 ZVar->getVarIdx());
+  // icmp_ult(z, 0): always false, always zero
+  auto *Cmp = Builder.createExpr<CmpInstruction>(CmpInstruction::ICMP_ULT,
+                                                 &Builder.Context.I64Type, Z,
+                                                 Builder.createConstI64(0));
+  // zext to i64
+  auto *Zext = Builder.createExpr<UnaryInstruction>(
+      OP_uext, &Builder.Context.I64Type, Cmp);
+  auto *Sbb =
+      Builder.createExpr<SbbInstruction>(&Builder.Context.I64Type, X, Y, Zext);
+  auto *Return =
+      Builder.createStmt<ReturnInstruction>(&Builder.Context.I64Type, Sbb);
+
+  EXPECT_TRUE(runDMirRewritePass(Builder));
+  auto *Result = Return->getOperand<0>();
+  EXPECT_NE(Result, Sbb);
+  EXPECT_EQ(Result->getOpcode(), OP_sub);
+}
+
+} // namespace
diff --git a/src/tests/testdata/x86_cg_peephole_conflict_rules.json b/src/tests/testdata/x86_cg_peephole_conflict_rules.json
new file mode 100644
index 000000000..fc441ff5e
--- /dev/null
+++ b/src/tests/testdata/x86_cg_peephole_conflict_rules.json
@@ -0,0 +1,138 @@
+{
+  "version": 1,
+  "rules": [
+    {
+      "name": "fold-a",
+      "stage": "instruction",
+      "priority": 100,
+      "pattern": [
+        {
+          "bind": "cmp",
+          "predicate": "isCompare"
+        },
+        {
+          "bind": "setcc",
+          "opcode": "SETCCr",
+          "capture": [
+            {
+              "name": "setcc_dst",
+              "operand": 0,
+              "field": "reg"
+            },
+            {
+              "name": "setcc_cc",
+              "operand": 1,
+              "field": "imm"
+            }
+          ]
+        },
+        {
+          "bind": "test",
+          "opcode_any": [
+            "TEST8rr",
+            "TEST16rr",
+            "TEST32rr",
+            "TEST64rr"
+          ],
+          "require": [
+            {
+              "operand": 0,
+              "field": "reg",
+              "equals_capture": "setcc_dst"
+            }
+          ]
+        },
+        {
+          "bind": "jcc",
+          "opcode": "JCC_1",
+          "require": [
+            {
+              "operand": 1,
+              "field": "imm",
+              "equals_enum": "COND_NE"
+            }
+          ]
+        }
+      ],
+      "action": {
+        "erase": [
+          "setcc"
+        ],
+        "set_imm": [
+          {
+            "inst": "jcc",
+            "operand": 1,
+            "from_capture": "setcc_cc"
+          }
+        ]
+      }
+    },
+    {
+      "name": "fold-b",
+      "stage": "instruction",
+      "priority": 100,
+      "pattern": [
+        {
+          "bind": "cmp",
+          "predicate": "isCompare"
+        },
+        {
+          "bind": "setcc",
+          "opcode": "SETCCr",
+          "capture": [
+            {
+              "name": "setcc_dst",
+              "operand": 0,
+              "field": "reg"
+            },
+            {
+              "name": "setcc_cc",
+              "operand": 1,
+              "field": "imm"
+            }
+          ]
+        },
+        {
+          "bind": "test",
+          "opcode_any": [
+            "TEST8rr",
+            "TEST16rr",
+            "TEST32rr",
+            "TEST64rr"
+          ],
+          "require": [
+            {
+              "operand": 0,
+              "field": "reg",
+              "equals_capture": "setcc_dst"
+            }
+          ]
+        },
+        {
+          "bind": "jcc",
+          "opcode": "JCC_1",
+          "require": [
+            {
+              "operand": 1,
+              "field": "imm",
+              "equals_enum": "COND_NE"
+            }
+          ]
+        }
+      ],
+      "action": {
+        "erase": [
+          "test"
+        ],
+        "set_imm": [
+          {
+            "inst": "jcc",
+            "operand": 1,
+            "from_capture": "setcc_cc"
+          }
+        ]
+      }
+    }
+  ]
+}
+
diff --git a/src/tests/x86_cg_peephole_tests.cpp b/src/tests/x86_cg_peephole_tests.cpp
new file mode 100644
index 000000000..9b89a1738
--- /dev/null
+++ b/src/tests/x86_cg_peephole_tests.cpp
@@ -0,0 +1,2052 @@
+// Copyright (C) 2025 the DTVM authors. All Rights Reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+#include "compiler/context.h"
+#include "compiler/llvm-prebuild/Target/X86/X86Subtarget.h"
+#include "compiler/mir/function.h"
+#include "compiler/mir/module.h"
+#include "compiler/target/x86/x86_cg_peephole.h"
+
+#include <array>
+#include <cstdint>
+#include <gtest/gtest.h>
+#include <random>
+
+namespace {
+
+using namespace COMPILER;
+using namespace llvm;
+
+MFunctionType *createVoidFunctionType(CompileContext &Context) {
+  return MFunctionType::create(Context, Context.VoidType, {});
+}
+
+struct X86CmpFlags {
+  bool Overflow = false;
+  bool Sign = false;
+  bool Zero = false;
+  bool Carry = false;
+  bool Parity = false;
+};
+
+X86CmpFlags computeCmpFlags(uint64_t Lhs, uint64_t Rhs) {
+  const uint64_t Result = Lhs - Rhs;
+  X86CmpFlags Flags;
+  Flags.Overflow = ((Lhs ^ Rhs) & (Lhs ^ Result) & (1ULL << 63)) != 0;
+  Flags.Sign = (Result >> 63) != 0;
+  Flags.Zero = Result == 0;
+  Flags.Carry = Lhs < Rhs;
+  Flags.Parity =
+      (__builtin_popcount(static_cast<unsigned>(Result & 0xff)) % 2) == 0;
+  return Flags;
+}
+
+bool evaluateCondCode(int64_t CondCode, const X86CmpFlags &Flags) {
+  switch (CondCode) {
+  case X86::COND_O:
+    return Flags.Overflow;
+  case X86::COND_NO:
+    return !Flags.Overflow;
+  case X86::COND_B:
+    return Flags.Carry;
+  case X86::COND_AE:
+    return !Flags.Carry;
+  case X86::COND_E:
+    return Flags.Zero;
+  case X86::COND_NE:
+    return !Flags.Zero;
+  case X86::COND_BE:
+    return Flags.Carry || Flags.Zero;
+  case X86::COND_A:
+    return !Flags.Carry && !Flags.Zero;
+  case X86::COND_S:
+    return Flags.Sign;
+  case X86::COND_NS:
+    return !Flags.Sign;
+  case X86::COND_P:
+    return Flags.Parity;
+  case X86::COND_NP:
+    return !Flags.Parity;
+  case X86::COND_L:
+    return Flags.Sign != Flags.Overflow;
+  case X86::COND_GE:
+    return Flags.Sign == Flags.Overflow;
+  case X86::COND_LE:
+    return Flags.Zero || (Flags.Sign != Flags.Overflow);
+  case X86::COND_G:
+    return !Flags.Zero && (Flags.Sign == Flags.Overflow);
+  default:
+    ADD_FAILURE() << "unexpected cond code " << CondCode;
+    return false;
+  }
+}
+
+#if defined(__x86_64__) && (defined(__GNUC__) || defined(__clang__))
+struct X86ExecutionHarnessCase {
+  const char *Name = nullptr;
+  int64_t CondCode = X86::COND_INVALID;
+  uint64_t (*Original)(uint64_t, uint64_t) = nullptr;
+  uint64_t (*Rewritten)(uint64_t, uint64_t) = nullptr;
+};
+
+struct X86ZeroShiftHarnessResult {
+  uint64_t Value = 0;
+  uint64_t Flags = 0;
+};
+
+struct X86ZeroShiftExecutionHarnessCase {
+  const char *Name = nullptr;
+  X86ZeroShiftHarnessResult (*Original)(uint64_t, uint64_t, uint64_t) = nullptr;
+  X86ZeroShiftHarnessResult (*Rewritten)(uint64_t, uint64_t,
+                                         uint64_t) = nullptr;
+  uint64_t ValueMask = 0;
+};
+
+struct X86SelfMoveExecutionHarnessCase {
+  const char *Name = nullptr;
+  X86ZeroShiftHarnessResult (*Original)(uint64_t, uint64_t, uint64_t) = nullptr;
+  X86ZeroShiftHarnessResult (*Rewritten)(uint64_t, uint64_t,
+                                         uint64_t) = nullptr;
+  uint64_t ValueMask = 0;
+};
+
+struct X86FallthroughJccExecutionHarnessCase {
+  const char *Name = nullptr;
+  int64_t CondCode = X86::COND_INVALID;
+  X86ZeroShiftHarnessResult (*Original)(uint64_t, uint64_t, uint64_t) = nullptr;
+  X86ZeroShiftHarnessResult (*Rewritten)(uint64_t, uint64_t,
+                                         uint64_t) = nullptr;
+};
+
+#define DEFINE_SETCC_TEST_JNE_EXEC_CASE(Name, CondCodeValue, SetccMnemonic,    \
+                                        JccMnemonic)                           \
+  static uint64_t execOriginal_##Name(uint64_t Lhs, uint64_t Rhs) {            \
+    uint64_t Out;                                                              \
+    asm volatile("cmpq %[rhs], %[lhs]\n\t" SetccMnemonic " %%al\n\t"           \
+                 "testb %%al, %%al\n\t"                                        \
+                 "jne 1f\n\t"                                                  \
+                 "xorq %[out], %[out]\n\t"                                     \
+                 "jmp 2f\n\t"                                                  \
+                 "1:\n\t"                                                      \
+                 "movq $1, %[out]\n\t"                                         \
+                 "2:\n\t"                                                      \
+                 : [out] "=&r"(Out)                                            \
+                 : [lhs] "r"(Lhs), [rhs] "r"(Rhs)                              \
+                 : "cc", "rax");                                               \
+    return Out;                                                                \
+  }                                                                            \
+  static uint64_t execRewritten_##Name(uint64_t Lhs, uint64_t Rhs) {           \
+    uint64_t Out;                                                              \
+    asm volatile("cmpq %[rhs], %[lhs]\n\t" JccMnemonic " 1f\n\t"               \
+                 "xorq %[out], %[out]\n\t"                                     \
+                 "jmp 2f\n\t"                                                  \
+                 "1:\n\t"                                                      \
+                 "movq $1, %[out]\n\t"                                         \
+                 "2:\n\t"                                                      \
+                 : [out] "=&r"(Out)                                            \
+                 : [lhs] "r"(Lhs), [rhs] "r"(Rhs)                              \
+                 : "cc");                                                      \
+    return Out;                                                                \
+  }                                                                            \
+  static constexpr X86ExecutionHarnessCase ExecCase_##Name = {                 \
+      #Name, CondCodeValue, execOriginal_##Name, execRewritten_##Name}
+
+DEFINE_SETCC_TEST_JNE_EXEC_CASE(O, X86::COND_O, "seto", "jo");
+DEFINE_SETCC_TEST_JNE_EXEC_CASE(NO, X86::COND_NO, "setno", "jno");
+DEFINE_SETCC_TEST_JNE_EXEC_CASE(B, X86::COND_B, "setb", "jb");
+DEFINE_SETCC_TEST_JNE_EXEC_CASE(AE, X86::COND_AE, "setae", "jae");
+DEFINE_SETCC_TEST_JNE_EXEC_CASE(E, X86::COND_E, "sete", "je");
+DEFINE_SETCC_TEST_JNE_EXEC_CASE(NE, X86::COND_NE, "setne", "jne");
+DEFINE_SETCC_TEST_JNE_EXEC_CASE(BE, X86::COND_BE, "setbe", "jbe");
+DEFINE_SETCC_TEST_JNE_EXEC_CASE(A, X86::COND_A, "seta", "ja");
+DEFINE_SETCC_TEST_JNE_EXEC_CASE(S, X86::COND_S, "sets", "js");
+DEFINE_SETCC_TEST_JNE_EXEC_CASE(NS, X86::COND_NS, "setns", "jns");
+DEFINE_SETCC_TEST_JNE_EXEC_CASE(P, X86::COND_P, "setp", "jp");
+DEFINE_SETCC_TEST_JNE_EXEC_CASE(NP, X86::COND_NP, "setnp", "jnp");
+DEFINE_SETCC_TEST_JNE_EXEC_CASE(L, X86::COND_L, "setl", "jl");
+DEFINE_SETCC_TEST_JNE_EXEC_CASE(GE, X86::COND_GE, "setge", "jge");
+DEFINE_SETCC_TEST_JNE_EXEC_CASE(LE, X86::COND_LE, "setle", "jle");
+DEFINE_SETCC_TEST_JNE_EXEC_CASE(G, X86::COND_G, "setg", "jg");
+
+const std::array<X86ExecutionHarnessCase, 16> ExecutionHarnessCases = {
+    ExecCase_O,  ExecCase_NO, ExecCase_B,  ExecCase_AE, ExecCase_E, ExecCase_NE,
+    ExecCase_BE, ExecCase_A,  ExecCase_S,  ExecCase_NS, ExecCase_P, ExecCase_NP,
+    ExecCase_L,  ExecCase_GE, ExecCase_LE, ExecCase_G,
+};
+
+#define DEFINE_ZERO_SHIFT_EXEC_CASE_8(Name, Mnemonic)                          \
+  static X86ZeroShiftHarnessResult execOriginal_##Name(                        \
+      uint64_t Value, uint64_t FlagLhs, uint64_t FlagRhs) {                    \
+    const uint8_t Input = static_cast<uint8_t>(Value);                         \
+    uint64_t Out;                                                              \
+    uint64_t Flags;                                                            \
+    asm volatile(                                                              \
+        "cmpq %[flag_rhs], %[flag_lhs]\n\t"                                    \
+        "movb %[value], %%al\n\t" Mnemonic " $0, %%al\n\t"                     \
+        "pushfq\n\t"                                                           \
+        "popq %[flags]\n\t"                                                    \
+        "movzbq %%al, %[out]\n\t"                                              \
+        : [out] "=&r"(Out), [flags] "=&r"(Flags)                               \
+        : [value] "q"(Input), [flag_lhs] "r"(FlagLhs), [flag_rhs] "r"(FlagRhs) \
+        : "cc", "rax");                                                        \
+    return {.Value = Out, .Flags = Flags};                                     \
+  }                                                                            \
+  static X86ZeroShiftHarnessResult execRewritten_##Name(                       \
+      uint64_t Value, uint64_t FlagLhs, uint64_t FlagRhs) {                    \
+    const uint8_t Input = static_cast<uint8_t>(Value);                         \
+    uint64_t Out;                                                              \
+    uint64_t Flags;                                                            \
+    asm volatile(                                                              \
+        "cmpq %[flag_rhs], %[flag_lhs]\n\t"                                    \
+        "movb %[value], %%al\n\t"                                              \
+        "pushfq\n\t"                                                           \
+        "popq %[flags]\n\t"                                                    \
+        "movzbq %%al, %[out]\n\t"                                              \
+        : [out] "=&r"(Out), [flags] "=&r"(Flags)                               \
+        : [value] "q"(Input), [flag_lhs] "r"(FlagLhs), [flag_rhs] "r"(FlagRhs) \
+        : "cc", "rax");                                                        \
+    return {.Value = Out, .Flags = Flags};                                     \
+  }                                                                            \
+  static constexpr X86ZeroShiftExecutionHarnessCase ZeroShiftCase_##Name = {   \
+      #Name, execOriginal_##Name, execRewritten_##Name, 0xffULL}
+
+#define DEFINE_ZERO_SHIFT_EXEC_CASE_16(Name, Mnemonic)                         \
+  static X86ZeroShiftHarnessResult execOriginal_##Name(                        \
+      uint64_t Value, uint64_t FlagLhs, uint64_t FlagRhs) {                    \
+    const uint16_t Input = static_cast<uint16_t>(Value);                       \
+    uint64_t Out;                                                              \
+    uint64_t Flags;                                                            \
+    asm volatile(                                                              \
+        "cmpq %[flag_rhs], %[flag_lhs]\n\t"                                    \
+        "movw %[value], %%ax\n\t" Mnemonic " $0, %%ax\n\t"                     \
+        "pushfq\n\t"                                                           \
+        "popq %[flags]\n\t"                                                    \
+        "movzwq %%ax, %[out]\n\t"                                              \
+        : [out] "=&r"(Out), [flags] "=&r"(Flags)                               \
+        : [value] "r"(Input), [flag_lhs] "r"(FlagLhs), [flag_rhs] "r"(FlagRhs) \
+        : "cc", "rax");                                                        \
+    return {.Value = Out, .Flags = Flags};                                     \
+  }                                                                            \
+  static X86ZeroShiftHarnessResult execRewritten_##Name(                       \
+      uint64_t Value, uint64_t FlagLhs, uint64_t FlagRhs) {                    \
+    const uint16_t Input = static_cast<uint16_t>(Value);                       \
+    uint64_t Out;                                                              \
+    uint64_t Flags;                                                            \
+    asm volatile(                                                              \
+        "cmpq %[flag_rhs], %[flag_lhs]\n\t"                                    \
+        "movw %[value], %%ax\n\t"                                              \
+        "pushfq\n\t"                                                           \
+        "popq %[flags]\n\t"                                                    \
+        "movzwq %%ax, %[out]\n\t"                                              \
+        : [out] "=&r"(Out), [flags] "=&r"(Flags)                               \
+        : [value] "r"(Input), [flag_lhs] "r"(FlagLhs), [flag_rhs] "r"(FlagRhs) \
+        : "cc", "rax");                                                        \
+    return {.Value = Out, .Flags = Flags};                                     \
+  }                                                                            \
+  static constexpr X86ZeroShiftExecutionHarnessCase ZeroShiftCase_##Name = {   \
+      #Name, execOriginal_##Name, execRewritten_##Name, 0xffffULL}
+
+#define DEFINE_ZERO_SHIFT_EXEC_CASE_64(Name, Mnemonic)                         \
+  static X86ZeroShiftHarnessResult execOriginal_##Name(                        \
+      uint64_t Value, uint64_t FlagLhs, uint64_t FlagRhs) {                    \
+    uint64_t Out;                                                              \
+    uint64_t Flags;                                                            \
+    asm volatile(                                                              \
+        "cmpq %[flag_rhs], %[flag_lhs]\n\t"                                    \
+        "movq %[value], %%rax\n\t" Mnemonic " $0, %%rax\n\t"                   \
+        "pushfq\n\t"                                                           \
+        "popq %[flags]\n\t"                                                    \
+        "movq %%rax, %[out]\n\t"                                               \
+        : [out] "=&r"(Out), [flags] "=&r"(Flags)                               \
+        : [value] "r"(Value), [flag_lhs] "r"(FlagLhs), [flag_rhs] "r"(FlagRhs) \
+        : "cc", "rax");                                                        \
+    return {.Value = Out, .Flags = Flags};                                     \
+  }                                                                            \
+  static X86ZeroShiftHarnessResult execRewritten_##Name(                       \
+      uint64_t Value, uint64_t FlagLhs, uint64_t FlagRhs) {                    \
+    uint64_t Out;                                                              \
+    uint64_t Flags;                                                            \
+    asm volatile(                                                              \
+        "cmpq %[flag_rhs], %[flag_lhs]\n\t"                                    \
+        "movq %[value], %%rax\n\t"                                             \
+        "pushfq\n\t"                                                           \
+        "popq %[flags]\n\t"                                                    \
+        "movq %%rax, %[out]\n\t"                                               \
+        : [out] "=&r"(Out), [flags] "=&r"(Flags)                               \
+        : [value] "r"(Value), [flag_lhs] "r"(FlagLhs), [flag_rhs] "r"(FlagRhs) \
+        : "cc", "rax");                                                        \
+    return {.Value = Out, .Flags = Flags};                                     \
+  }                                                                            \
+  static constexpr X86ZeroShiftExecutionHarnessCase ZeroShiftCase_##Name = {   \
+      #Name, execOriginal_##Name, execRewritten_##Name, ~0ULL}
+
+DEFINE_ZERO_SHIFT_EXEC_CASE_8(SHL8, "shlb");
+DEFINE_ZERO_SHIFT_EXEC_CASE_16(SHL16, "shlw");
+DEFINE_ZERO_SHIFT_EXEC_CASE_64(SHL64, "shlq");
+DEFINE_ZERO_SHIFT_EXEC_CASE_8(SHR8, "shrb");
+DEFINE_ZERO_SHIFT_EXEC_CASE_16(SHR16, "shrw");
+DEFINE_ZERO_SHIFT_EXEC_CASE_64(SHR64, "shrq");
+DEFINE_ZERO_SHIFT_EXEC_CASE_8(SAR8, "sarb");
+DEFINE_ZERO_SHIFT_EXEC_CASE_16(SAR16, "sarw");
+DEFINE_ZERO_SHIFT_EXEC_CASE_64(SAR64, "sarq");
+
+const std::array<X86ZeroShiftExecutionHarnessCase, 9> ZeroShiftHarnessCases = {
+    ZeroShiftCase_SHL8, ZeroShiftCase_SHL16, ZeroShiftCase_SHL64,
+    ZeroShiftCase_SHR8, ZeroShiftCase_SHR16, ZeroShiftCase_SHR64,
+    ZeroShiftCase_SAR8, ZeroShiftCase_SAR16, ZeroShiftCase_SAR64,
+};
+
+static X86ZeroShiftHarnessResult
+execOriginalSelfMove8(uint64_t Value, uint64_t FlagLhs, uint64_t FlagRhs) {
+  const uint8_t Input = static_cast<uint8_t>(Value);
+  uint64_t Out;
+  uint64_t Flags;
+  asm volatile(
+      "cmpq %[flag_rhs], %[flag_lhs]\n\t"
+      "movb %[value], %%al\n\t"
+      "movb %%al, %%al\n\t"
+      "pushfq\n\t"
+      "popq %[flags]\n\t"
+      "movzbq %%al, %[out]\n\t"
+      : [out] "=&r"(Out), [flags] "=&r"(Flags)
+      : [value] "q"(Input), [flag_lhs] "r"(FlagLhs), [flag_rhs] "r"(FlagRhs)
+      : "cc", "rax");
+  return {.Value = Out, .Flags = Flags};
+}
+
+static X86ZeroShiftHarnessResult
+execRewrittenSelfMove8(uint64_t Value, uint64_t FlagLhs, uint64_t FlagRhs) {
+  const uint8_t Input = static_cast<uint8_t>(Value);
+  uint64_t Out;
+  uint64_t Flags;
+  asm volatile(
+      "cmpq %[flag_rhs], %[flag_lhs]\n\t"
+      "movb %[value], %%al\n\t"
+      "pushfq\n\t"
+      "popq %[flags]\n\t"
+      "movzbq %%al, %[out]\n\t"
+      : [out] "=&r"(Out), [flags] "=&r"(Flags)
+      : [value] "q"(Input), [flag_lhs] "r"(FlagLhs), [flag_rhs] "r"(FlagRhs)
+      : "cc", "rax");
+  return {.Value = Out, .Flags = Flags};
+}
+
+static X86ZeroShiftHarnessResult
+execOriginalSelfMove16(uint64_t Value, uint64_t FlagLhs, uint64_t FlagRhs) {
+  const uint16_t Input = static_cast<uint16_t>(Value);
+  uint64_t Out;
+  uint64_t Flags;
+  asm volatile(
+      "cmpq %[flag_rhs], %[flag_lhs]\n\t"
+      "movw %[value], %%ax\n\t"
+      "movw %%ax, %%ax\n\t"
+      "pushfq\n\t"
+      "popq %[flags]\n\t"
+      "movzwq %%ax, %[out]\n\t"
+      : [out] "=&r"(Out), [flags] "=&r"(Flags)
+      : [value] "r"(Input), [flag_lhs] "r"(FlagLhs), [flag_rhs] "r"(FlagRhs)
+      : "cc", "rax");
+  return {.Value = Out, .Flags = Flags};
+}
+
+static X86ZeroShiftHarnessResult
+execRewrittenSelfMove16(uint64_t Value, uint64_t FlagLhs, uint64_t FlagRhs) {
+  const uint16_t Input = static_cast<uint16_t>(Value);
+  uint64_t Out;
+  uint64_t Flags;
+  asm volatile(
+      "cmpq %[flag_rhs], %[flag_lhs]\n\t"
+      "movw %[value], %%ax\n\t"
+      "pushfq\n\t"
+      "popq %[flags]\n\t"
+      "movzwq %%ax, %[out]\n\t"
+      : [out] "=&r"(Out), [flags] "=&r"(Flags)
+      : [value] "r"(Input), [flag_lhs] "r"(FlagLhs), [flag_rhs] "r"(FlagRhs)
+      : "cc", "rax");
+  return {.Value = Out, .Flags = Flags};
+}
+
+static X86ZeroShiftHarnessResult
+execOriginalSelfMove64(uint64_t Value, uint64_t FlagLhs, uint64_t FlagRhs) {
+  uint64_t Out;
+  uint64_t Flags;
+  asm volatile(
+      "cmpq %[flag_rhs], %[flag_lhs]\n\t"
+      "movq %[value], %%rax\n\t"
+      "movq %%rax, %%rax\n\t"
+      "pushfq\n\t"
+      "popq %[flags]\n\t"
+      "movq %%rax, %[out]\n\t"
+      : [out] "=&r"(Out), [flags] "=&r"(Flags)
+      : [value] "r"(Value), [flag_lhs] "r"(FlagLhs), [flag_rhs] "r"(FlagRhs)
+      : "cc", "rax");
+  return {.Value = Out, .Flags = Flags};
+}
+
+static X86ZeroShiftHarnessResult
+execRewrittenSelfMove64(uint64_t Value, uint64_t FlagLhs, uint64_t FlagRhs) {
+  uint64_t Out;
+  uint64_t Flags;
+  asm volatile(
+      "cmpq %[flag_rhs], %[flag_lhs]\n\t"
+      "movq %[value], %%rax\n\t"
+      "pushfq\n\t"
+      "popq %[flags]\n\t"
+      "movq %%rax, %[out]\n\t"
+      : [out] "=&r"(Out), [flags] "=&r"(Flags)
+      : [value] "r"(Value), [flag_lhs] "r"(FlagLhs), [flag_rhs] "r"(FlagRhs)
+      : "cc", "rax");
+  return {.Value = Out, .Flags = Flags};
+}
+
+static X86ZeroShiftHarnessResult
+execOriginalSelfMove32(uint64_t Value, uint64_t FlagLhs, uint64_t FlagRhs) {
+  uint64_t Out;
+  uint64_t Flags;
+  asm volatile(
+      "cmpq %[flag_rhs], %[flag_lhs]\n\t"
+      "movq %[value], %%rax\n\t"
+      "movl %%eax, %%eax\n\t"
+      "pushfq\n\t"
+      "popq %[flags]\n\t"
+      "movq %%rax, %[out]\n\t"
+      : [out] "=&r"(Out), [flags] "=&r"(Flags)
+      : [value] "r"(Value), [flag_lhs] "r"(FlagLhs), [flag_rhs] "r"(FlagRhs)
+      : "cc", "rax");
+  return {.Value = Out, .Flags = Flags};
+}
+
+static X86ZeroShiftHarnessResult
+execRewrittenSelfMove32(uint64_t Value, uint64_t FlagLhs, uint64_t FlagRhs) {
+  uint64_t Out;
+  uint64_t Flags;
+  asm volatile(
+      "cmpq %[flag_rhs], %[flag_lhs]\n\t"
+      "movq %[value], %%rax\n\t"
+      "pushfq\n\t"
+      "popq %[flags]\n\t"
+      "movq %%rax, %[out]\n\t"
+      : [out] "=&r"(Out), [flags] "=&r"(Flags)
+      : [value] "r"(Value), [flag_lhs] "r"(FlagLhs), [flag_rhs] "r"(FlagRhs)
+      : "cc", "rax");
+  return {.Value = Out, .Flags = Flags};
+}
+
+const std::array<X86SelfMoveExecutionHarnessCase, 3> SelfMoveHarnessCases = {
+    X86SelfMoveExecutionHarnessCase{"MOV8rr", execOriginalSelfMove8,
+                                    execRewrittenSelfMove8, 0xffULL},
+    X86SelfMoveExecutionHarnessCase{"MOV16rr", execOriginalSelfMove16,
+                                    execRewrittenSelfMove16, 0xffffULL},
+    X86SelfMoveExecutionHarnessCase{"MOV64rr", execOriginalSelfMove64,
+                                    execRewrittenSelfMove64, ~0ULL},
+};
+
+#define DEFINE_FALLTHROUGH_JCC_EXEC_CASE(Name, CondCodeValue, JccMnemonic)     \
+  static X86ZeroShiftHarnessResult execOriginalFallthroughJcc_##Name(          \
+      uint64_t Value, uint64_t FlagLhs, uint64_t FlagRhs) {                    \
+    uint64_t Out;                                                              \
+    uint64_t Flags;                                                            \
+    asm volatile(                                                              \
+        "cmpq %[flag_rhs], %[flag_lhs]\n\t" JccMnemonic " 1f\n\t"              \
+        "1:\n\t"                                                               \
+        "movq %[value], %[out]\n\t"                                            \
+        "pushfq\n\t"                                                           \
+        "popq %[flags]\n\t"                                                    \
+        : [out] "=&r"(Out), [flags] "=&r"(Flags)                               \
+        : [value] "r"(Value), [flag_lhs] "r"(FlagLhs), [flag_rhs] "r"(FlagRhs) \
+        : "cc");                                                               \
+    return {.Value = Out, .Flags = Flags};                                     \
+  }                                                                            \
+  static X86ZeroShiftHarnessResult execRewrittenFallthroughJcc_##Name(         \
+      uint64_t Value, uint64_t FlagLhs, uint64_t FlagRhs) {                    \
+    uint64_t Out;                                                              \
+    uint64_t Flags;                                                            \
+    asm volatile(                                                              \
+        "cmpq %[flag_rhs], %[flag_lhs]\n\t"                                    \
+        "movq %[value], %[out]\n\t"                                            \
+        "pushfq\n\t"                                                           \
+        "popq %[flags]\n\t"                                                    \
+        : [out] "=&r"(Out), [flags] "=&r"(Flags)                               \
+        : [value] "r"(Value), [flag_lhs] "r"(FlagLhs), [flag_rhs] "r"(FlagRhs) \
+        : "cc");                                                               \
+    return {.Value = Out, .Flags = Flags};                                     \
+  }                                                                            \
+  static constexpr X86FallthroughJccExecutionHarnessCase                       \
+      FallthroughJccCase_##Name = {#Name, CondCodeValue,                       \
+                                   execOriginalFallthroughJcc_##Name,          \
+                                   execRewrittenFallthroughJcc_##Name}
+
+DEFINE_FALLTHROUGH_JCC_EXEC_CASE(O, X86::COND_O, "jo");
+DEFINE_FALLTHROUGH_JCC_EXEC_CASE(NO, X86::COND_NO, "jno");
+DEFINE_FALLTHROUGH_JCC_EXEC_CASE(B, X86::COND_B, "jb");
+DEFINE_FALLTHROUGH_JCC_EXEC_CASE(AE, X86::COND_AE, "jae");
+DEFINE_FALLTHROUGH_JCC_EXEC_CASE(E, X86::COND_E, "je");
+DEFINE_FALLTHROUGH_JCC_EXEC_CASE(NE, X86::COND_NE, "jne");
+DEFINE_FALLTHROUGH_JCC_EXEC_CASE(BE, X86::COND_BE, "jbe");
+DEFINE_FALLTHROUGH_JCC_EXEC_CASE(A, X86::COND_A, "ja");
+DEFINE_FALLTHROUGH_JCC_EXEC_CASE(S, X86::COND_S, "js");
+DEFINE_FALLTHROUGH_JCC_EXEC_CASE(NS, X86::COND_NS, "jns");
+DEFINE_FALLTHROUGH_JCC_EXEC_CASE(P, X86::COND_P, "jp");
+DEFINE_FALLTHROUGH_JCC_EXEC_CASE(NP, X86::COND_NP, "jnp");
+DEFINE_FALLTHROUGH_JCC_EXEC_CASE(L, X86::COND_L, "jl");
+DEFINE_FALLTHROUGH_JCC_EXEC_CASE(GE, X86::COND_GE, "jge");
+DEFINE_FALLTHROUGH_JCC_EXEC_CASE(LE, X86::COND_LE, "jle");
+DEFINE_FALLTHROUGH_JCC_EXEC_CASE(G, X86::COND_G, "jg");
+
+const std::array<X86FallthroughJccExecutionHarnessCase, 16>
+    FallthroughJccHarnessCases = {
+        FallthroughJccCase_O,  FallthroughJccCase_NO, FallthroughJccCase_B,
+        FallthroughJccCase_AE, FallthroughJccCase_E,  FallthroughJccCase_NE,
+        FallthroughJccCase_BE, FallthroughJccCase_A,  FallthroughJccCase_S,
+        FallthroughJccCase_NS, FallthroughJccCase_P,  FallthroughJccCase_NP,
+        FallthroughJccCase_L,  FallthroughJccCase_GE, FallthroughJccCase_LE,
+        FallthroughJccCase_G,
+};
+
+static X86ZeroShiftHarnessResult execOriginalRedundantTest64(uint64_t Value,
+                                                             uint64_t FlagLhs,
+                                                             uint64_t FlagRhs) {
+  uint64_t Out;
+  uint64_t Flags;
+  asm volatile(
+      "movq %[value], %%rax\n\t"
+      "testq %%rax, %%rax\n\t"
+      "testq %%rax, %%rax\n\t"
+      "pushfq\n\t"
+      "popq %[flags]\n\t"
+      "movq %%rax, %[out]\n\t"
+      : [out] "=&r"(Out), [flags] "=&r"(Flags)
+      : [value] "r"(Value), [flag_lhs] "r"(FlagLhs), [flag_rhs] "r"(FlagRhs)
+      : "cc", "rax");
+  return {.Value = Out, .Flags = Flags};
+}
+
+static X86ZeroShiftHarnessResult
+execRewrittenRedundantTest64(uint64_t Value, uint64_t FlagLhs,
+                             uint64_t FlagRhs) {
+  uint64_t Out;
+  uint64_t Flags;
+  asm volatile(
+      "movq %[value], %%rax\n\t"
+      "testq %%rax, %%rax\n\t"
+      "pushfq\n\t"
+      "popq %[flags]\n\t"
+      "movq %%rax, %[out]\n\t"
+      : [out] "=&r"(Out), [flags] "=&r"(Flags)
+      : [value] "r"(Value), [flag_lhs] "r"(FlagLhs), [flag_rhs] "r"(FlagRhs)
+      : "cc", "rax");
+  return {.Value = Out, .Flags = Flags};
+}
+
+static X86ZeroShiftHarnessResult execOriginalRedundantTest32(uint64_t Value,
+                                                             uint64_t FlagLhs,
+                                                             uint64_t FlagRhs) {
+  uint64_t Out;
+  uint64_t Flags;
+  const uint32_t Input = static_cast<uint32_t>(Value);
+  asm volatile("movq %[value], %%rax\n\t"
+               "testl %%eax, %%eax\n\t"
+               "testl %%eax, %%eax\n\t"
+               "pushfq\n\t"
+               "popq %[flags]\n\t"
+               "movl %%eax, %k[out]\n\t"
+               : [out] "=&r"(Out), [flags] "=&r"(Flags)
+               : [value] "r"(static_cast<uint64_t>(Input)),
+                 [flag_lhs] "r"(FlagLhs), [flag_rhs] "r"(FlagRhs)
+               : "cc", "rax");
+  return {.Value = Out, .Flags = Flags};
+}
+
+static X86ZeroShiftHarnessResult
+execRewrittenRedundantTest32(uint64_t Value, uint64_t FlagLhs,
+                             uint64_t FlagRhs) {
+  uint64_t Out;
+  uint64_t Flags;
+  const uint32_t Input = static_cast<uint32_t>(Value);
+  asm volatile("movq %[value], %%rax\n\t"
+               "testl %%eax, %%eax\n\t"
+               "pushfq\n\t"
+               "popq %[flags]\n\t"
+               "movl %%eax, %k[out]\n\t"
+               : [out] "=&r"(Out), [flags] "=&r"(Flags)
+               : [value] "r"(static_cast<uint64_t>(Input)),
+                 [flag_lhs] "r"(FlagLhs), [flag_rhs] "r"(FlagRhs)
+               : "cc", "rax");
+  return {.Value = Out, .Flags = Flags};
+}
+
+static X86ZeroShiftHarnessResult
+execOriginalRedundantTest8(uint64_t Value, uint64_t FlagLhs, uint64_t FlagRhs) {
+  uint64_t Out;
+  uint64_t Flags;
+  asm volatile("movb %[value8], %%al\n\t"
+               "testb %%al, %%al\n\t"
+               "testb %%al, %%al\n\t"
+               "pushfq\n\t"
+               "popq %[flags]\n\t"
+               "movzbq %%al, %[out]\n\t"
+               : [out] "=&r"(Out), [flags] "=&r"(Flags)
+               : [value8] "q"(static_cast<uint8_t>(Value)),
+                 [flag_lhs] "r"(FlagLhs), [flag_rhs] "r"(FlagRhs)
+               : "cc", "rax");
+  return {.Value = Out, .Flags = Flags};
+}
+
+static X86ZeroShiftHarnessResult execRewrittenRedundantTest8(uint64_t Value,
+                                                             uint64_t FlagLhs,
+                                                             uint64_t FlagRhs) {
+  uint64_t Out;
+  uint64_t Flags;
+  asm volatile("movb %[value8], %%al\n\t"
+               "testb %%al, %%al\n\t"
+               "pushfq\n\t"
+               "popq %[flags]\n\t"
+               "movzbq %%al, %[out]\n\t"
+               : [out] "=&r"(Out), [flags] "=&r"(Flags)
+               : [value8] "q"(static_cast<uint8_t>(Value)),
+                 [flag_lhs] "r"(FlagLhs), [flag_rhs] "r"(FlagRhs)
+               : "cc", "rax");
+  return {.Value = Out, .Flags = Flags};
+}
+
+static X86ZeroShiftHarnessResult execOriginalFallthroughJump(uint64_t Value,
+                                                             uint64_t FlagLhs,
+                                                             uint64_t FlagRhs) {
+  uint64_t Out;
+  uint64_t Flags;
+  asm volatile(
+      "cmpq %[flag_rhs], %[flag_lhs]\n\t"
+      "jmp 1f\n\t"
+      "1:\n\t"
+      "movq %[value], %[out]\n\t"
+      "pushfq\n\t"
+      "popq %[flags]\n\t"
+      : [out] "=&r"(Out), [flags] "=&r"(Flags)
+      : [value] "r"(Value), [flag_lhs] "r"(FlagLhs), [flag_rhs] "r"(FlagRhs)
+      : "cc");
+  return {.Value = Out, .Flags = Flags};
+}
+
+static X86ZeroShiftHarnessResult
+execRewrittenFallthroughJump(uint64_t Value, uint64_t FlagLhs,
+                             uint64_t FlagRhs) {
+  uint64_t Out;
+  uint64_t Flags;
+  asm volatile(
+      "cmpq %[flag_rhs], %[flag_lhs]\n\t"
+      "movq %[value], %[out]\n\t"
+      "pushfq\n\t"
+      "popq %[flags]\n\t"
+      : [out] "=&r"(Out), [flags] "=&r"(Flags)
+      : [value] "r"(Value), [flag_lhs] "r"(FlagLhs), [flag_rhs] "r"(FlagRhs)
+      : "cc");
+  return {.Value = Out, .Flags = Flags};
+}
+#endif
+
+TEST(X86CgPeephole, FoldsSetccTestJneChain) {
+  CompileContext Context;
+  Context.initialize();
+
+  MModule Mod(Context);
+  MFunctionType *FuncType = createVoidFunctionType(Context);
+  Mod.addFuncType(FuncType);
+
+  MFunction MirFunc(Context, 0);
+  MirFunc.setFunctionType(FuncType);
+  CgFunction MF(Context, MirFunc);
+
+  CgBasicBlock *BB = MF.createCgBasicBlock();
+  CgBasicBlock *TargetBB = MF.createCgBasicBlock();
+  MF.appendCgBasicBlock(BB);
+  MF.appendCgBasicBlock(MF.createCgBasicBlock());
+  MF.appendCgBasicBlock(TargetBB);
+
+  const auto &TII = MF.getTargetInstrInfo();
+  std::array<CgOperand, 2> CmpOps = {
+      CgOperand::createRegOperand(X86::RAX, false),
+      CgOperand::createRegOperand(X86::RBX, false),
+  };
+  MF.createCgInstruction(*BB, TII.get(X86::CMP64rr), CmpOps);
+
+  std::array<CgOperand, 2> SetccOps = {
+      CgOperand::createRegOperand(X86::AL, true),
+      CgOperand::createImmOperand(X86::COND_E),
+  };
+  MF.createCgInstruction(*BB, TII.get(X86::SETCCr), SetccOps);
+
+  std::array<CgOperand, 2> TestOps = {
+      CgOperand::createRegOperand(X86::AL, false),
+      CgOperand::createRegOperand(X86::AL, false),
+  };
+  MF.createCgInstruction(*BB, TII.get(X86::TEST8rr), TestOps);
+
+  std::array<CgOperand, 2> JccOps = {
+      CgOperand::createMBB(TargetBB),
+      CgOperand::createImmOperand(X86::COND_NE),
+  };
+  MF.createCgInstruction(*BB, TII.get(X86::JCC_1), JccOps);
+
+  X86CgPeephole Peephole(MF);
+
+  ASSERT_EQ(std::distance(BB->begin(), BB->end()), 2);
+  auto It = BB->begin();
+  EXPECT_EQ(It->getOpcode(), X86::CMP64rr);
+  ++It;
+  ASSERT_NE(It, BB->end());
+  EXPECT_EQ(It->getOpcode(), X86::JCC_1);
+  EXPECT_EQ(It->getOperand(1).getImm(), X86::COND_E);
+}
+
+TEST(X86CgPeephole, RemovesSelfMove64) {
+  CompileContext Context;
+  Context.initialize();
+
+  MModule Mod(Context);
+  MFunctionType *FuncType = createVoidFunctionType(Context);
+  Mod.addFuncType(FuncType);
+
+  MFunction MirFunc(Context, 0);
+  MirFunc.setFunctionType(FuncType);
+  CgFunction MF(Context, MirFunc);
+
+  CgBasicBlock *BB = MF.createCgBasicBlock();
+  MF.appendCgBasicBlock(BB);
+
+  const auto &TII = MF.getTargetInstrInfo();
+  std::array<CgOperand, 2> MoveOps = {
+      CgOperand::createRegOperand(X86::RAX, true),
+      CgOperand::createRegOperand(X86::RAX, false),
+  };
+  MF.createCgInstruction(*BB, TII.get(X86::MOV64rr), MoveOps);
+
+  X86CgPeephole Peephole(MF);
+
+  EXPECT_TRUE(BB->empty());
+}
+
+TEST(X86CgPeephole, KeepsSelfMove32) {
+  CompileContext Context;
+  Context.initialize();
+
+  MModule Mod(Context);
+  MFunctionType *FuncType = createVoidFunctionType(Context);
+  Mod.addFuncType(FuncType);
+
+  MFunction MirFunc(Context, 0);
+  MirFunc.setFunctionType(FuncType);
+  CgFunction MF(Context, MirFunc);
+
+  CgBasicBlock *BB = MF.createCgBasicBlock();
+  MF.appendCgBasicBlock(BB);
+
+  const auto &TII = MF.getTargetInstrInfo();
+  std::array<CgOperand, 2> MoveOps = {
+      CgOperand::createRegOperand(X86::EAX, true),
+      CgOperand::createRegOperand(X86::EAX, false),
+  };
+  MF.createCgInstruction(*BB, TII.get(X86::MOV32rr), MoveOps);
+
+  X86CgPeephole Peephole(MF);
+
+  ASSERT_EQ(std::distance(BB->begin(), BB->end()), 1);
+  EXPECT_EQ(BB->begin()->getOpcode(), X86::MOV32rr);
+}
+
+TEST(X86CgPeephole, RemovesZeroShift64) {
+  CompileContext Context;
+  Context.initialize();
+
+  MModule Mod(Context);
+  MFunctionType *FuncType = createVoidFunctionType(Context);
+  Mod.addFuncType(FuncType);
+
+  MFunction MirFunc(Context, 0);
+  MirFunc.setFunctionType(FuncType);
+  CgFunction MF(Context, MirFunc);
+
+  CgBasicBlock *BB = MF.createCgBasicBlock();
+  MF.appendCgBasicBlock(BB);
+
+  const auto &TII = MF.getTargetInstrInfo();
+  std::array<CgOperand, 3> ShiftOps = {
+      CgOperand::createRegOperand(X86::RAX, true),
+      CgOperand::createRegOperand(X86::RAX, false),
+      CgOperand::createImmOperand(0),
+  };
+  MF.createCgInstruction(*BB, TII.get(X86::SHL64ri), ShiftOps);
+
+  X86CgPeephole Peephole(MF);
+
+  EXPECT_TRUE(BB->empty());
+}
+
+TEST(X86CgPeephole, KeepsZeroShift32) {
+  CompileContext Context;
+  Context.initialize();
+
+  MModule Mod(Context);
+  MFunctionType *FuncType = createVoidFunctionType(Context);
+  Mod.addFuncType(FuncType);
+
+  MFunction MirFunc(Context, 0);
+  MirFunc.setFunctionType(FuncType);
+  CgFunction MF(Context, MirFunc);
+
+  CgBasicBlock *BB = MF.createCgBasicBlock();
+  MF.appendCgBasicBlock(BB);
+
+  const auto &TII = MF.getTargetInstrInfo();
+  std::array<CgOperand, 3> ShiftOps = {
+      CgOperand::createRegOperand(X86::EAX, true),
+      CgOperand::createRegOperand(X86::EAX, false),
+      CgOperand::createImmOperand(0),
+  };
+  MF.createCgInstruction(*BB, TII.get(X86::SHL32ri), ShiftOps);
+
+  X86CgPeephole Peephole(MF);
+
+  ASSERT_EQ(std::distance(BB->begin(), BB->end()), 1);
+  EXPECT_EQ(BB->begin()->getOpcode(), X86::SHL32ri);
+}
+
+TEST(X86CgPeephole, KeepsMixedOperandTestChain) {
+  CompileContext Context;
+  Context.initialize();
+
+  MModule Mod(Context);
+  MFunctionType *FuncType = createVoidFunctionType(Context);
+  Mod.addFuncType(FuncType);
+
+  MFunction MirFunc(Context, 0);
+  MirFunc.setFunctionType(FuncType);
+  CgFunction MF(Context, MirFunc);
+
+  CgBasicBlock *BB = MF.createCgBasicBlock();
+  CgBasicBlock *TargetBB = MF.createCgBasicBlock();
+  MF.appendCgBasicBlock(BB);
+  MF.appendCgBasicBlock(MF.createCgBasicBlock());
+  MF.appendCgBasicBlock(TargetBB);
+
+  const auto &TII = MF.getTargetInstrInfo();
+  std::array<CgOperand, 2> CmpOps = {
+      CgOperand::createRegOperand(X86::RAX, false),
+      CgOperand::createRegOperand(X86::RBX, false),
+  };
+  MF.createCgInstruction(*BB, TII.get(X86::CMP64rr), CmpOps);
+
+  std::array<CgOperand, 2> SetccOps = {
+      CgOperand::createRegOperand(X86::AL, true),
+      CgOperand::createImmOperand(X86::COND_E),
+  };
+  MF.createCgInstruction(*BB, TII.get(X86::SETCCr), SetccOps);
+
+  std::array<CgOperand, 2> TestOps = {
+      CgOperand::createRegOperand(X86::AL, false),
+      CgOperand::createRegOperand(X86::BL, false),
+  };
+  MF.createCgInstruction(*BB, TII.get(X86::TEST8rr), TestOps);
+
+  std::array<CgOperand, 2> JccOps = {
+      CgOperand::createMBB(TargetBB),
+      CgOperand::createImmOperand(X86::COND_NE),
+  };
+  MF.createCgInstruction(*BB, TII.get(X86::JCC_1), JccOps);
+
+  X86CgPeephole Peephole(MF);
+
+  EXPECT_EQ(std::distance(BB->begin(), BB->end()), 4);
+}
+
+TEST(X86CgPeephole, FuzzFoldSetccTestJneToJccSemantics) {
+  const std::array<int64_t, 16> CondCodes = {
+      X86::COND_O, X86::COND_NO, X86::COND_B,  X86::COND_AE,
+      X86::COND_E, X86::COND_NE, X86::COND_BE, X86::COND_A,
+      X86::COND_S, X86::COND_NS, X86::COND_P,  X86::COND_NP,
+      X86::COND_L, X86::COND_GE, X86::COND_LE, X86::COND_G,
+  };
+  std::mt19937_64 Rng(0xD7A12025ULL);
+
+  for (int64_t CondCode : CondCodes) {
+    for (int Iter = 0; Iter < 20000; ++Iter) {
+      const uint64_t Lhs = Rng();
+      const uint64_t Rhs = Rng();
+      const X86CmpFlags Flags = computeCmpFlags(Lhs, Rhs);
+      const uint8_t SetccResult =
+          evaluateCondCode(CondCode, Flags) ? uint8_t{1} : uint8_t{0};
+      const bool OriginalBranches = SetccResult != 0;
+      const bool RewrittenBranches = evaluateCondCode(CondCode, Flags);
+      EXPECT_EQ(OriginalBranches, RewrittenBranches)
+          << "cond=" << CondCode << " lhs=" << Lhs << " rhs=" << Rhs;
+    }
+  }
+}
+
+TEST(X86CgPeephole, ExecutionHarnessFoldSetccTestJneToJcc) {
+#if !defined(__x86_64__) || !(defined(__GNUC__) || defined(__clang__))
+  GTEST_SKIP() << "execution harness requires x86_64 GNU-style inline asm";
+#else
+  const std::array<uint64_t, 12> EdgeValues = {
+      0ULL,
+      1ULL,
+      2ULL,
+      0x7fffffffffffffffULL,
+      0x8000000000000000ULL,
+      0x8000000000000001ULL,
+      0xffffffffffffffffULL,
+      0xfffffffffffffffeULL,
+      0xaaaaaaaaaaaaaaaaULL,
+      0x5555555555555555ULL,
+      0x00000000ffffffffULL,
+      0xffffffff00000000ULL,
+  };
+  std::mt19937_64 Rng(0xE8EC2025ULL);
+
+  for (const auto &HarnessCase : ExecutionHarnessCases) {
+    for (uint64_t Lhs : EdgeValues) {
+      for (uint64_t Rhs : EdgeValues) {
+        const bool Original = HarnessCase.Original(Lhs, Rhs) != 0;
+        const bool Rewritten = HarnessCase.Rewritten(Lhs, Rhs) != 0;
+        const bool Modeled =
+            evaluateCondCode(HarnessCase.CondCode, computeCmpFlags(Lhs, Rhs));
+        EXPECT_EQ(Original, Rewritten)
+            << "case=" << HarnessCase.Name << " lhs=" << Lhs << " rhs=" << Rhs;
+        EXPECT_EQ(Original, Modeled)
+            << "case=" << HarnessCase.Name << " lhs=" << Lhs << " rhs=" << Rhs;
+      }
+    }
+
+    for (int Iter = 0; Iter < 10000; ++Iter) {
+      const uint64_t Lhs = Rng();
+      const uint64_t Rhs = Rng();
+      const bool Original = HarnessCase.Original(Lhs, Rhs) != 0;
+      const bool Rewritten = HarnessCase.Rewritten(Lhs, Rhs) != 0;
+      EXPECT_EQ(Original, Rewritten)
+          << "case=" << HarnessCase.Name << " lhs=" << Lhs << " rhs=" << Rhs;
+    }
+  }
+#endif
+}
+
+TEST(X86CgPeephole, ExecutionHarnessRemoveZeroShift) {
+#if !defined(__x86_64__) || !(defined(__GNUC__) || defined(__clang__))
+  GTEST_SKIP() << "execution harness requires x86_64 GNU-style inline asm";
+#else
+  const std::array<uint64_t, 12> EdgeValues = {
+      0ULL,
+      1ULL,
+      2ULL,
+      0x7fffffffffffffffULL,
+      0x8000000000000000ULL,
+      0x8000000000000001ULL,
+      0xffffffffffffffffULL,
+      0xfffffffffffffffeULL,
+      0xaaaaaaaaaaaaaaaaULL,
+      0x5555555555555555ULL,
+      0x00000000ffffffffULL,
+      0xffffffff00000000ULL,
+  };
+  const std::array<std::pair<uint64_t, uint64_t>, 6> FlagSeeds = {
+      std::pair<uint64_t, uint64_t>{0ULL, 0ULL},
+      std::pair<uint64_t, uint64_t>{0ULL, 1ULL},
+      std::pair<uint64_t, uint64_t>{1ULL, 0ULL},
+      std::pair<uint64_t, uint64_t>{0x8000000000000000ULL, 1ULL},
+      std::pair<uint64_t, uint64_t>{0x7fffffffffffffffULL,
+                                    0xffffffffffffffffULL},
+      std::pair<uint64_t, uint64_t>{0xaaaaaaaaaaaaaaaaULL,
+                                    0x5555555555555555ULL},
+  };
+  std::mt19937_64 Rng(0xA0C02026ULL);
+
+  for (const auto &HarnessCase : ZeroShiftHarnessCases) {
+    for (const auto &[FlagLhs, FlagRhs] : FlagSeeds) {
+      for (uint64_t Value : EdgeValues) {
+        const auto Original = HarnessCase.Original(Value, FlagLhs, FlagRhs);
+        const auto Rewritten = HarnessCase.Rewritten(Value, FlagLhs, FlagRhs);
+        EXPECT_EQ(Original.Value & HarnessCase.ValueMask,
+                  Rewritten.Value & HarnessCase.ValueMask)
+            << "case=" << HarnessCase.Name << " value=" << Value
+            << " flag_lhs=" << FlagLhs << " flag_rhs=" << FlagRhs;
+        EXPECT_EQ(Original.Flags, Rewritten.Flags)
+            << "case=" << HarnessCase.Name << " value=" << Value
+            << " flag_lhs=" << FlagLhs << " flag_rhs=" << FlagRhs;
+      }
+    }
+
+    for (int Iter = 0; Iter < 4000; ++Iter) {
+      const uint64_t Value = Rng();
+      const uint64_t FlagLhs = Rng();
+      const uint64_t FlagRhs = Rng();
+      const auto Original = HarnessCase.Original(Value, FlagLhs, FlagRhs);
+      const auto Rewritten = HarnessCase.Rewritten(Value, FlagLhs, FlagRhs);
+      EXPECT_EQ(Original.Value & HarnessCase.ValueMask,
+                Rewritten.Value & HarnessCase.ValueMask)
+          << "case=" << HarnessCase.Name << " value=" << Value
+          << " flag_lhs=" << FlagLhs << " flag_rhs=" << FlagRhs;
+      EXPECT_EQ(Original.Flags, Rewritten.Flags)
+          << "case=" << HarnessCase.Name << " value=" << Value
+          << " flag_lhs=" << FlagLhs << " flag_rhs=" << FlagRhs;
+    }
+  }
+#endif
+}
+
+TEST(X86CgPeephole, ExecutionHarnessRemoveSelfMove) {
+#if !defined(__x86_64__) || !(defined(__GNUC__) || defined(__clang__))
+  GTEST_SKIP() << "execution harness requires x86_64 GNU-style inline asm";
+#else
+  const std::array<uint64_t, 12> EdgeValues = {
+      0ULL,
+      1ULL,
+      2ULL,
+      0x7fffffffffffffffULL,
+      0x8000000000000000ULL,
+      0x8000000000000001ULL,
+      0xffffffffffffffffULL,
+      0xfffffffffffffffeULL,
+      0xaaaaaaaaaaaaaaaaULL,
+      0x5555555555555555ULL,
+      0x00000000ffffffffULL,
+      0xffffffff00000000ULL,
+  };
+  const std::array<std::pair<uint64_t, uint64_t>, 6> FlagSeeds = {
+      std::pair<uint64_t, uint64_t>{0ULL, 0ULL},
+      std::pair<uint64_t, uint64_t>{0ULL, 1ULL},
+      std::pair<uint64_t, uint64_t>{1ULL, 0ULL},
+      std::pair<uint64_t, uint64_t>{0x8000000000000000ULL, 1ULL},
+      std::pair<uint64_t, uint64_t>{0x7fffffffffffffffULL,
+                                    0xffffffffffffffffULL},
+      std::pair<uint64_t, uint64_t>{0xaaaaaaaaaaaaaaaaULL,
+                                    0x5555555555555555ULL},
+  };
+  std::mt19937_64 Rng(0x51F2026ULL);
+
+  for (const auto &HarnessCase : SelfMoveHarnessCases) {
+    for (const auto &[FlagLhs, FlagRhs] : FlagSeeds) {
+      for (uint64_t Value : EdgeValues) {
+        const auto Original = HarnessCase.Original(Value, FlagLhs, FlagRhs);
+        const auto Rewritten = HarnessCase.Rewritten(Value, FlagLhs, FlagRhs);
+        EXPECT_EQ(Original.Value & HarnessCase.ValueMask,
+                  Rewritten.Value & HarnessCase.ValueMask)
+            << "case=" << HarnessCase.Name << " value=" << Value
+            << " flag_lhs=" << FlagLhs << " flag_rhs=" << FlagRhs;
+        EXPECT_EQ(Original.Flags, Rewritten.Flags)
+            << "case=" << HarnessCase.Name << " value=" << Value
+            << " flag_lhs=" << FlagLhs << " flag_rhs=" << FlagRhs;
+      }
+    }
+
+    for (int Iter = 0; Iter < 4000; ++Iter) {
+      const uint64_t Value = Rng();
+      const uint64_t FlagLhs = Rng();
+      const uint64_t FlagRhs = Rng();
+      const auto Original = HarnessCase.Original(Value, FlagLhs, FlagRhs);
+      const auto Rewritten = HarnessCase.Rewritten(Value, FlagLhs, FlagRhs);
+      EXPECT_EQ(Original.Value & HarnessCase.ValueMask,
+                Rewritten.Value & HarnessCase.ValueMask)
+          << "case=" << HarnessCase.Name << " value=" << Value
+          << " flag_lhs=" << FlagLhs << " flag_rhs=" << FlagRhs;
+      EXPECT_EQ(Original.Flags, Rewritten.Flags)
+          << "case=" << HarnessCase.Name << " value=" << Value
+          << " flag_lhs=" << FlagLhs << " flag_rhs=" << FlagRhs;
+    }
+  }
+#endif
+}
+
+TEST(X86CgPeephole, ExecutionHarnessSelfMove32ChangesUpperBits) {
+#if !defined(__x86_64__) || !(defined(__GNUC__) || defined(__clang__))
+  GTEST_SKIP() << "execution harness requires x86_64 GNU-style inline asm";
+#else
+  const std::array<uint64_t, 6> Values = {
+      0xffffffff00000000ULL, 0xffffffff00000001ULL, 0xaaaaaaaa55555555ULL,
+      0x8000000000000001ULL, 0x7fffffff00000000ULL, 0x1234567800000000ULL,
+  };
+  const std::array<std::pair<uint64_t, uint64_t>, 4> FlagSeeds = {
+      std::pair<uint64_t, uint64_t>{0ULL, 0ULL},
+      std::pair<uint64_t, uint64_t>{0ULL, 1ULL},
+      std::pair<uint64_t, uint64_t>{1ULL, 0ULL},
+      std::pair<uint64_t, uint64_t>{0x8000000000000000ULL, 1ULL},
+  };
+
+  for (const auto &[FlagLhs, FlagRhs] : FlagSeeds) {
+    for (uint64_t Value : Values) {
+      const auto Original = execOriginalSelfMove32(Value, FlagLhs, FlagRhs);
+      const auto Rewritten = execRewrittenSelfMove32(Value, FlagLhs, FlagRhs);
+      EXPECT_NE(Original.Value, Rewritten.Value)
+          << "value=" << Value << " flag_lhs=" << FlagLhs
+          << " flag_rhs=" << FlagRhs;
+      EXPECT_EQ(Original.Value, Value & 0xffffffffULL)
+          << "value=" << Value << " flag_lhs=" << FlagLhs
+          << " flag_rhs=" << FlagRhs;
+      EXPECT_EQ(Rewritten.Value, Value)
+          << "value=" << Value << " flag_lhs=" << FlagLhs
+          << " flag_rhs=" << FlagRhs;
+      EXPECT_EQ(Original.Flags, Rewritten.Flags)
+          << "value=" << Value << " flag_lhs=" << FlagLhs
+          << " flag_rhs=" << FlagRhs;
+    }
+  }
+#endif
+}
+
+TEST(X86CgPeephole, ExecutionHarnessRemoveFallthroughConditionalJump) {
+#if !defined(__x86_64__) || !(defined(__GNUC__) || defined(__clang__))
+  GTEST_SKIP() << "execution harness requires x86_64 GNU-style inline asm";
+#else
+  const std::array<uint64_t, 12> EdgeValues = {
+      0ULL,
+      1ULL,
+      2ULL,
+      0x7fffffffffffffffULL,
+      0x8000000000000000ULL,
+      0x8000000000000001ULL,
+      0xffffffffffffffffULL,
+      0xfffffffffffffffeULL,
+      0xaaaaaaaaaaaaaaaaULL,
+      0x5555555555555555ULL,
+      0x00000000ffffffffULL,
+      0xffffffff00000000ULL,
+  };
+  const std::array<std::pair<uint64_t, uint64_t>, 6> FlagSeeds = {
+      std::pair<uint64_t, uint64_t>{0ULL, 0ULL},
+      std::pair<uint64_t, uint64_t>{0ULL, 1ULL},
+      std::pair<uint64_t, uint64_t>{1ULL, 0ULL},
+      std::pair<uint64_t, uint64_t>{0x8000000000000000ULL, 1ULL},
+      std::pair<uint64_t, uint64_t>{0x7fffffffffffffffULL,
+                                    0xffffffffffffffffULL},
+      std::pair<uint64_t, uint64_t>{0xaaaaaaaaaaaaaaaaULL,
+                                    0x5555555555555555ULL},
+  };
+  std::mt19937_64 Rng(0xF4112026ULL);
+
+  for (const auto &HarnessCase : FallthroughJccHarnessCases) {
+    for (const auto &[FlagLhs, FlagRhs] : FlagSeeds) {
+      for (uint64_t Value : EdgeValues) {
+        const auto Original = HarnessCase.Original(Value, FlagLhs, FlagRhs);
+        const auto Rewritten = HarnessCase.Rewritten(Value, FlagLhs, FlagRhs);
+        EXPECT_EQ(Original.Value, Rewritten.Value)
+            << "case=" << HarnessCase.Name << " value=" << Value
+            << " flag_lhs=" << FlagLhs << " flag_rhs=" << FlagRhs;
+        EXPECT_EQ(Original.Flags, Rewritten.Flags)
+            << "case=" << HarnessCase.Name << " value=" << Value
+            << " flag_lhs=" << FlagLhs << " flag_rhs=" << FlagRhs;
+      }
+    }
+
+    for (int Iter = 0; Iter < 4000; ++Iter) {
+      const uint64_t Value = Rng();
+      const uint64_t FlagLhs = Rng();
+      const uint64_t FlagRhs = Rng();
+      const auto Original = HarnessCase.Original(Value, FlagLhs, FlagRhs);
+      const auto Rewritten = HarnessCase.Rewritten(Value, FlagLhs, FlagRhs);
+      EXPECT_EQ(Original.Value, Rewritten.Value)
+          << "case=" << HarnessCase.Name << " value=" << Value
+          << " flag_lhs=" << FlagLhs << " flag_rhs=" << FlagRhs;
+      EXPECT_EQ(Original.Flags, Rewritten.Flags)
+          << "case=" << HarnessCase.Name << " value=" << Value
+          << " flag_lhs=" << FlagLhs << " flag_rhs=" << FlagRhs;
+    }
+  }
+#endif
+}
+
+TEST(X86CgPeephole, ExecutionHarnessRemoveFallthroughJump) {
+#if !defined(__x86_64__) || !(defined(__GNUC__) || defined(__clang__))
+  GTEST_SKIP() << "execution harness requires x86_64 GNU-style inline asm";
+#else
+  const std::array<uint64_t, 12> EdgeValues = {
+      0ULL,
+      1ULL,
+      2ULL,
+      0x7fffffffffffffffULL,
+      0x8000000000000000ULL,
+      0x8000000000000001ULL,
+      0xffffffffffffffffULL,
+      0xfffffffffffffffeULL,
+      0xaaaaaaaaaaaaaaaaULL,
+      0x5555555555555555ULL,
+      0x00000000ffffffffULL,
+      0xffffffff00000000ULL,
+  };
+  const std::array<std::pair<uint64_t, uint64_t>, 6> FlagSeeds = {
+      std::pair<uint64_t, uint64_t>{0ULL, 0ULL},
+      std::pair<uint64_t, uint64_t>{0ULL, 1ULL},
+      std::pair<uint64_t, uint64_t>{1ULL, 0ULL},
+      std::pair<uint64_t, uint64_t>{0x8000000000000000ULL, 1ULL},
+      std::pair<uint64_t, uint64_t>{0x7fffffffffffffffULL,
+                                    0xffffffffffffffffULL},
+      std::pair<uint64_t, uint64_t>{0xaaaaaaaaaaaaaaaaULL,
+                                    0x5555555555555555ULL},
+  };
+  std::mt19937_64 Rng(0xF4122026ULL);
+
+  for (const auto &[FlagLhs, FlagRhs] : FlagSeeds) {
+    for (uint64_t Value : EdgeValues) {
+      const auto Original =
+          execOriginalFallthroughJump(Value, FlagLhs, FlagRhs);
+      const auto Rewritten =
+          execRewrittenFallthroughJump(Value, FlagLhs, FlagRhs);
+      EXPECT_EQ(Original.Value, Rewritten.Value)
+          << "value=" << Value << " flag_lhs=" << FlagLhs
+          << " flag_rhs=" << FlagRhs;
+      EXPECT_EQ(Original.Flags, Rewritten.Flags)
+          << "value=" << Value << " flag_lhs=" << FlagLhs
+          << " flag_rhs=" << FlagRhs;
+    }
+  }
+
+  for (int Iter = 0; Iter < 4000; ++Iter) {
+    const uint64_t Value = Rng();
+    const uint64_t FlagLhs = Rng();
+    const uint64_t FlagRhs = Rng();
+    const auto Original = execOriginalFallthroughJump(Value, FlagLhs, FlagRhs);
+    const auto Rewritten =
+        execRewrittenFallthroughJump(Value, FlagLhs, FlagRhs);
+    EXPECT_EQ(Original.Value, Rewritten.Value)
+        << "value=" << Value << " flag_lhs=" << FlagLhs
+        << " flag_rhs=" << FlagRhs;
+    EXPECT_EQ(Original.Flags, Rewritten.Flags)
+        << "value=" << Value << " flag_lhs=" << FlagLhs
+        << " flag_rhs=" << FlagRhs;
+  }
+#endif
+}
+
+TEST(X86CgPeephole, RemovesFallthroughConditionalJump) {
+  CompileContext Context;
+  Context.initialize();
+
+  MModule Mod(Context);
+  MFunctionType *FuncType = createVoidFunctionType(Context);
+  Mod.addFuncType(FuncType);
+
+  MFunction MirFunc(Context, 0);
+  MirFunc.setFunctionType(FuncType);
+  CgFunction MF(Context, MirFunc);
+
+  CgBasicBlock *BB0 = MF.createCgBasicBlock();
+  CgBasicBlock *BB1 = MF.createCgBasicBlock();
+  MF.appendCgBasicBlock(BB0);
+  MF.appendCgBasicBlock(BB1);
+
+  const auto &TII = MF.getTargetInstrInfo();
+  std::array<CgOperand, 2> JccOps = {
+      CgOperand::createMBB(BB1),
+      CgOperand::createImmOperand(X86::COND_NE),
+  };
+  MF.createCgInstruction(*BB0, TII.get(X86::JCC_1), JccOps);
+
+  X86CgPeephole Peephole(MF);
+
+  EXPECT_TRUE(BB0->empty());
+}
+
+TEST(X86CgPeephole, RemovesFallthroughJump) {
+  CompileContext Context;
+  Context.initialize();
+
+  MModule Mod(Context);
+  MFunctionType *FuncType = createVoidFunctionType(Context);
+  Mod.addFuncType(FuncType);
+
+  MFunction MirFunc(Context, 0);
+  MirFunc.setFunctionType(FuncType);
+  CgFunction MF(Context, MirFunc);
+
+  CgBasicBlock *BB0 = MF.createCgBasicBlock();
+  CgBasicBlock *BB1 = MF.createCgBasicBlock();
+  MF.appendCgBasicBlock(BB0);
+  MF.appendCgBasicBlock(BB1);
+
+  const auto &TII = MF.getTargetInstrInfo();
+  std::array<CgOperand, 1> JmpOps = {CgOperand::createMBB(BB1)};
+  MF.createCgInstruction(*BB0, TII.get(X86::JMP_1), JmpOps);
+
+  X86CgPeephole Peephole(MF);
+
+  EXPECT_TRUE(BB0->empty());
+}
+
+TEST(X86CgPeephole, RemovesRedundantTest64rr) {
+  CompileContext Context;
+  Context.initialize();
+
+  MModule Mod(Context);
+  MFunctionType *FuncType = createVoidFunctionType(Context);
+  Mod.addFuncType(FuncType);
+
+  MFunction MirFunc(Context, 0);
+  MirFunc.setFunctionType(FuncType);
+  CgFunction MF(Context, MirFunc);
+
+  CgBasicBlock *BB = MF.createCgBasicBlock();
+  MF.appendCgBasicBlock(BB);
+
+  const auto &TII = MF.getTargetInstrInfo();
+  std::array<CgOperand, 2> TestOps1 = {
+      CgOperand::createRegOperand(X86::RAX, false),
+      CgOperand::createRegOperand(X86::RAX, false),
+  };
+  MF.createCgInstruction(*BB, TII.get(X86::TEST64rr), TestOps1);
+  std::array<CgOperand, 2> TestOps2 = {
+      CgOperand::createRegOperand(X86::RAX, false),
+      CgOperand::createRegOperand(X86::RAX, false),
+  };
+  MF.createCgInstruction(*BB, TII.get(X86::TEST64rr), TestOps2);
+
+  X86CgPeephole Peephole(MF);
+
+  ASSERT_EQ(std::distance(BB->begin(), BB->end()), 1);
+  EXPECT_EQ(BB->begin()->getOpcode(), X86::TEST64rr);
+}
+
+TEST(X86CgPeephole, KeepsNonRedundantTest64rr) {
+  CompileContext Context;
+  Context.initialize();
+
+  MModule Mod(Context);
+  MFunctionType *FuncType = createVoidFunctionType(Context);
+  Mod.addFuncType(FuncType);
+
+  MFunction MirFunc(Context, 0);
+  MirFunc.setFunctionType(FuncType);
+  CgFunction MF(Context, MirFunc);
+
+  CgBasicBlock *BB = MF.createCgBasicBlock();
+  MF.appendCgBasicBlock(BB);
+
+  const auto &TII = MF.getTargetInstrInfo();
+  std::array<CgOperand, 2> TestOps1 = {
+      CgOperand::createRegOperand(X86::RAX, false),
+      CgOperand::createRegOperand(X86::RAX, false),
+  };
+  MF.createCgInstruction(*BB, TII.get(X86::TEST64rr), TestOps1);
+  std::array<CgOperand, 2> TestOps2 = {
+      CgOperand::createRegOperand(X86::RBX, false),
+      CgOperand::createRegOperand(X86::RBX, false),
+  };
+  MF.createCgInstruction(*BB, TII.get(X86::TEST64rr), TestOps2);
+
+  X86CgPeephole Peephole(MF);
+
+  EXPECT_EQ(std::distance(BB->begin(), BB->end()), 2);
+}
+
+TEST(X86CgPeephole, RemovesRedundantTest32rr) {
+  CompileContext Context;
+  Context.initialize();
+
+  MModule Mod(Context);
+  MFunctionType *FuncType = createVoidFunctionType(Context);
+  Mod.addFuncType(FuncType);
+
+  MFunction MirFunc(Context, 0);
+  MirFunc.setFunctionType(FuncType);
+  CgFunction MF(Context, MirFunc);
+
+  CgBasicBlock *BB = MF.createCgBasicBlock();
+  MF.appendCgBasicBlock(BB);
+
+  const auto &TII = MF.getTargetInstrInfo();
+  std::array<CgOperand, 2> TestOps1 = {
+      CgOperand::createRegOperand(X86::EAX, false),
+      CgOperand::createRegOperand(X86::EAX, false),
+  };
+  MF.createCgInstruction(*BB, TII.get(X86::TEST32rr), TestOps1);
+  std::array<CgOperand, 2> TestOps2 = {
+      CgOperand::createRegOperand(X86::EAX, false),
+      CgOperand::createRegOperand(X86::EAX, false),
+  };
+  MF.createCgInstruction(*BB, TII.get(X86::TEST32rr), TestOps2);
+
+  X86CgPeephole Peephole(MF);
+
+  ASSERT_EQ(std::distance(BB->begin(), BB->end()), 1);
+  EXPECT_EQ(BB->begin()->getOpcode(), X86::TEST32rr);
+}
+
+TEST(X86CgPeephole, KeepsNonRedundantTest32rr) {
+  CompileContext Context;
+  Context.initialize();
+
+  MModule Mod(Context);
+  MFunctionType *FuncType = createVoidFunctionType(Context);
+  Mod.addFuncType(FuncType);
+
+  MFunction MirFunc(Context, 0);
+  MirFunc.setFunctionType(FuncType);
+  CgFunction MF(Context, MirFunc);
+
+  CgBasicBlock *BB = MF.createCgBasicBlock();
+  MF.appendCgBasicBlock(BB);
+
+  const auto &TII = MF.getTargetInstrInfo();
+  std::array<CgOperand, 2> TestOps1 = {
+      CgOperand::createRegOperand(X86::EAX, false),
+      CgOperand::createRegOperand(X86::EAX, false),
+  };
+  MF.createCgInstruction(*BB, TII.get(X86::TEST32rr), TestOps1);
+  std::array<CgOperand, 2> TestOps2 = {
+      CgOperand::createRegOperand(X86::ECX, false),
+      CgOperand::createRegOperand(X86::ECX, false),
+  };
+  MF.createCgInstruction(*BB, TII.get(X86::TEST32rr), TestOps2);
+
+  X86CgPeephole Peephole(MF);
+
+  EXPECT_EQ(std::distance(BB->begin(), BB->end()), 2);
+}
+
+TEST(X86CgPeephole, RemovesRedundantTest8rr) {
+  CompileContext Context;
+  Context.initialize();
+
+  MModule Mod(Context);
+  MFunctionType *FuncType = createVoidFunctionType(Context);
+  Mod.addFuncType(FuncType);
+
+  MFunction MirFunc(Context, 0);
+  MirFunc.setFunctionType(FuncType);
+  CgFunction MF(Context, MirFunc);
+
+  CgBasicBlock *BB = MF.createCgBasicBlock();
+  MF.appendCgBasicBlock(BB);
+
+  const auto &TII = MF.getTargetInstrInfo();
+  std::array<CgOperand, 2> TestOps1 = {
+      CgOperand::createRegOperand(X86::AL, false),
+      CgOperand::createRegOperand(X86::AL, false),
+  };
+  MF.createCgInstruction(*BB, TII.get(X86::TEST8rr), TestOps1);
+  std::array<CgOperand, 2> TestOps2 = {
+      CgOperand::createRegOperand(X86::AL, false),
+      CgOperand::createRegOperand(X86::AL, false),
+  };
+  MF.createCgInstruction(*BB, TII.get(X86::TEST8rr), TestOps2);
+
+  X86CgPeephole Peephole(MF);
+
+  ASSERT_EQ(std::distance(BB->begin(), BB->end()), 1);
+  EXPECT_EQ(BB->begin()->getOpcode(), X86::TEST8rr);
+}
+
+TEST(X86CgPeephole, KeepsNonRedundantTest8rr) {
+  CompileContext Context;
+  Context.initialize();
+
+  MModule Mod(Context);
+  MFunctionType *FuncType = createVoidFunctionType(Context);
+  Mod.addFuncType(FuncType);
+
+  MFunction MirFunc(Context, 0);
+  MirFunc.setFunctionType(FuncType);
+  CgFunction MF(Context, MirFunc);
+
+  CgBasicBlock *BB = MF.createCgBasicBlock();
+  MF.appendCgBasicBlock(BB);
+
+  const auto &TII = MF.getTargetInstrInfo();
+  std::array<CgOperand, 2> TestOps1 = {
+      CgOperand::createRegOperand(X86::AL, false),
+      CgOperand::createRegOperand(X86::AL, false),
+  };
+  MF.createCgInstruction(*BB, TII.get(X86::TEST8rr), TestOps1);
+  std::array<CgOperand, 2> TestOps2 = {
+      CgOperand::createRegOperand(X86::BL, false),
+      CgOperand::createRegOperand(X86::BL, false),
+  };
+  MF.createCgInstruction(*BB, TII.get(X86::TEST8rr), TestOps2);
+
+  X86CgPeephole Peephole(MF);
+
+  EXPECT_EQ(std::distance(BB->begin(), BB->end()), 2);
+}
+
+TEST(X86CgPeephole, RemovesRedundantCmp64rr) {
+  CompileContext Context;
+  Context.initialize();
+
+  MModule Mod(Context);
+  MFunctionType *FuncType = createVoidFunctionType(Context);
+  Mod.addFuncType(FuncType);
+
+  MFunction MirFunc(Context, 0);
+  MirFunc.setFunctionType(FuncType);
+  CgFunction MF(Context, MirFunc);
+
+  CgBasicBlock *BB = MF.createCgBasicBlock();
+  MF.appendCgBasicBlock(BB);
+
+  const auto &TII = MF.getTargetInstrInfo();
+  std::array<CgOperand, 2> CmpOps1 = {
+      CgOperand::createRegOperand(X86::RAX, false),
+      CgOperand::createRegOperand(X86::RAX, false),
+  };
+  MF.createCgInstruction(*BB, TII.get(X86::CMP64rr), CmpOps1);
+  std::array<CgOperand, 2> CmpOps2 = {
+      CgOperand::createRegOperand(X86::RAX, false),
+      CgOperand::createRegOperand(X86::RAX, false),
+  };
+  MF.createCgInstruction(*BB, TII.get(X86::CMP64rr), CmpOps2);
+
+  X86CgPeephole Peephole(MF);
+
+  ASSERT_EQ(std::distance(BB->begin(), BB->end()), 1);
+  EXPECT_EQ(BB->begin()->getOpcode(), X86::CMP64rr);
+}
+
+TEST(X86CgPeephole, KeepsNonRedundantCmp64rr) {
+  CompileContext Context;
+  Context.initialize();
+
+  MModule Mod(Context);
+  MFunctionType *FuncType = createVoidFunctionType(Context);
+  Mod.addFuncType(FuncType);
+
+  MFunction MirFunc(Context, 0);
+  MirFunc.setFunctionType(FuncType);
+  CgFunction MF(Context, MirFunc);
+
+  CgBasicBlock *BB = MF.createCgBasicBlock();
+  MF.appendCgBasicBlock(BB);
+
+  const auto &TII = MF.getTargetInstrInfo();
+  std::array<CgOperand, 2> CmpOps1 = {
+      CgOperand::createRegOperand(X86::RAX, false),
+      CgOperand::createRegOperand(X86::RAX, false),
+  };
+  MF.createCgInstruction(*BB, TII.get(X86::CMP64rr), CmpOps1);
+  std::array<CgOperand, 2> CmpOps2 = {
+      CgOperand::createRegOperand(X86::RBX, false),
+      CgOperand::createRegOperand(X86::RAX, false),
+  };
+  MF.createCgInstruction(*BB, TII.get(X86::CMP64rr), CmpOps2);
+
+  X86CgPeephole Peephole(MF);
+
+  EXPECT_EQ(std::distance(BB->begin(), BB->end()), 2);
+}
+
+TEST(X86CgPeephole, RemovesRedundantCmp32rr) {
+  CompileContext Context;
+  Context.initialize();
+
+  MModule Mod(Context);
+  MFunctionType *FuncType = createVoidFunctionType(Context);
+  Mod.addFuncType(FuncType);
+
+  MFunction MirFunc(Context, 0);
+  MirFunc.setFunctionType(FuncType);
+  CgFunction MF(Context, MirFunc);
+
+  CgBasicBlock *BB = MF.createCgBasicBlock();
+  MF.appendCgBasicBlock(BB);
+
+  const auto &TII = MF.getTargetInstrInfo();
+  std::array<CgOperand, 2> CmpOps1 = {
+      CgOperand::createRegOperand(X86::EAX, false),
+      CgOperand::createRegOperand(X86::EAX, false),
+  };
+  MF.createCgInstruction(*BB, TII.get(X86::CMP32rr), CmpOps1);
+  std::array<CgOperand, 2> CmpOps2 = {
+      CgOperand::createRegOperand(X86::EAX, false),
+      CgOperand::createRegOperand(X86::EAX, false),
+  };
+  MF.createCgInstruction(*BB, TII.get(X86::CMP32rr), CmpOps2);
+
+  X86CgPeephole Peephole(MF);
+
+  ASSERT_EQ(std::distance(BB->begin(), BB->end()), 1);
+  EXPECT_EQ(BB->begin()->getOpcode(), X86::CMP32rr);
+}
+
+TEST(X86CgPeephole, KeepsNonRedundantCmp32rr) {
+  CompileContext Context;
+  Context.initialize();
+
+  MModule Mod(Context);
+  MFunctionType *FuncType = createVoidFunctionType(Context);
+  Mod.addFuncType(FuncType);
+
+  MFunction MirFunc(Context, 0);
+  MirFunc.setFunctionType(FuncType);
+  CgFunction MF(Context, MirFunc);
+
+  CgBasicBlock *BB = MF.createCgBasicBlock();
+  MF.appendCgBasicBlock(BB);
+
+  const auto &TII = MF.getTargetInstrInfo();
+  std::array<CgOperand, 2> CmpOps1 = {
+      CgOperand::createRegOperand(X86::EAX, false),
+      CgOperand::createRegOperand(X86::EAX, false),
+  };
+  MF.createCgInstruction(*BB, TII.get(X86::CMP32rr), CmpOps1);
+  std::array<CgOperand, 2> CmpOps2 = {
+      CgOperand::createRegOperand(X86::EBX, false),
+      CgOperand::createRegOperand(X86::EAX, false),
+  };
+  MF.createCgInstruction(*BB, TII.get(X86::CMP32rr), CmpOps2);
+
+  X86CgPeephole Peephole(MF);
+
+  EXPECT_EQ(std::distance(BB->begin(), BB->end()), 2);
+}
+
+TEST(X86CgPeephole, RemovesRedundantCmp8rr) {
+  CompileContext Context;
+  Context.initialize();
+
+  MModule Mod(Context);
+  MFunctionType *FuncType = createVoidFunctionType(Context);
+  Mod.addFuncType(FuncType);
+
+  MFunction MirFunc(Context, 0);
+  MirFunc.setFunctionType(FuncType);
+  CgFunction MF(Context, MirFunc);
+
+  CgBasicBlock *BB = MF.createCgBasicBlock();
+  MF.appendCgBasicBlock(BB);
+
+  const auto &TII = MF.getTargetInstrInfo();
+  std::array<CgOperand, 2> CmpOps1 = {
+      CgOperand::createRegOperand(X86::AL, false),
+      CgOperand::createRegOperand(X86::AL, false),
+  };
+  MF.createCgInstruction(*BB, TII.get(X86::CMP8rr), CmpOps1);
+  std::array<CgOperand, 2> CmpOps2 = {
+      CgOperand::createRegOperand(X86::AL, false),
+      CgOperand::createRegOperand(X86::AL, false),
+  };
+  MF.createCgInstruction(*BB, TII.get(X86::CMP8rr), CmpOps2);
+
+  X86CgPeephole Peephole(MF);
+
+  ASSERT_EQ(std::distance(BB->begin(), BB->end()), 1);
+  EXPECT_EQ(BB->begin()->getOpcode(), X86::CMP8rr);
+}
+
+TEST(X86CgPeephole, KeepsNonRedundantCmp8rr) {
+  CompileContext Context;
+  Context.initialize();
+
+  MModule Mod(Context);
+  MFunctionType *FuncType = createVoidFunctionType(Context);
+  Mod.addFuncType(FuncType);
+
+  MFunction MirFunc(Context, 0);
+  MirFunc.setFunctionType(FuncType);
+  CgFunction MF(Context, MirFunc);
+
+  CgBasicBlock *BB = MF.createCgBasicBlock();
+  MF.appendCgBasicBlock(BB);
+
+  const auto &TII = MF.getTargetInstrInfo();
+  std::array<CgOperand, 2> CmpOps1 = {
+      CgOperand::createRegOperand(X86::AL, false),
+      CgOperand::createRegOperand(X86::AL, false),
+  };
+  MF.createCgInstruction(*BB, TII.get(X86::CMP8rr), CmpOps1);
+  std::array<CgOperand, 2> CmpOps2 = {
+      CgOperand::createRegOperand(X86::BL, false),
+      CgOperand::createRegOperand(X86::AL, false),
+  };
+  MF.createCgInstruction(*BB, TII.get(X86::CMP8rr), CmpOps2);
+
+  X86CgPeephole Peephole(MF);
+
+  EXPECT_EQ(std::distance(BB->begin(), BB->end()), 2);
+}
+
+TEST(X86CgPeephole, RemovesRedundantCmp16rr) {
+  CompileContext Context;
+  Context.initialize();
+
+  MModule Mod(Context);
+  MFunctionType *FuncType = createVoidFunctionType(Context);
+  Mod.addFuncType(FuncType);
+
+  MFunction MirFunc(Context, 0);
+  MirFunc.setFunctionType(FuncType);
+  CgFunction MF(Context, MirFunc);
+
+  CgBasicBlock *BB = MF.createCgBasicBlock();
+  MF.appendCgBasicBlock(BB);
+
+  const auto &TII = MF.getTargetInstrInfo();
+  std::array<CgOperand, 2> CmpOps1 = {
+      CgOperand::createRegOperand(X86::AX, false),
+      CgOperand::createRegOperand(X86::AX, false),
+  };
+  MF.createCgInstruction(*BB, TII.get(X86::CMP16rr), CmpOps1);
+  std::array<CgOperand, 2> CmpOps2 = {
+      CgOperand::createRegOperand(X86::AX, false),
+      CgOperand::createRegOperand(X86::AX, false),
+  };
+  MF.createCgInstruction(*BB, TII.get(X86::CMP16rr), CmpOps2);
+
+  X86CgPeephole Peephole(MF);
+
+  ASSERT_EQ(std::distance(BB->begin(), BB->end()), 1);
+  EXPECT_EQ(BB->begin()->getOpcode(), X86::CMP16rr);
+}
+
+TEST(X86CgPeephole, KeepsNonRedundantCmp16rr) {
+  CompileContext Context;
+  Context.initialize();
+
+  MModule Mod(Context);
+  MFunctionType *FuncType = createVoidFunctionType(Context);
+  Mod.addFuncType(FuncType);
+
+  MFunction MirFunc(Context, 0);
+  MirFunc.setFunctionType(FuncType);
+  CgFunction MF(Context, MirFunc);
+
+  CgBasicBlock *BB = MF.createCgBasicBlock();
+  MF.appendCgBasicBlock(BB);
+
+  const auto &TII = MF.getTargetInstrInfo();
+  std::array<CgOperand, 2> CmpOps1 = {
+      CgOperand::createRegOperand(X86::AX, false),
+      CgOperand::createRegOperand(X86::AX, false),
+  };
+  MF.createCgInstruction(*BB, TII.get(X86::CMP16rr), CmpOps1);
+  std::array<CgOperand, 2> CmpOps2 = {
+      CgOperand::createRegOperand(X86::BX, false),
+      CgOperand::createRegOperand(X86::AX, false),
+  };
+  MF.createCgInstruction(*BB, TII.get(X86::CMP16rr), CmpOps2);
+
+  X86CgPeephole Peephole(MF);
+
+  EXPECT_EQ(std::distance(BB->begin(), BB->end()), 2);
+}
+
+TEST(X86CgPeephole, RemovesRedundantTest16rr) {
+  CompileContext Context;
+  Context.initialize();
+
+  MModule Mod(Context);
+  MFunctionType *FuncType = createVoidFunctionType(Context);
+  Mod.addFuncType(FuncType);
+
+  MFunction MirFunc(Context, 0);
+  MirFunc.setFunctionType(FuncType);
+  CgFunction MF(Context, MirFunc);
+
+  CgBasicBlock *BB = MF.createCgBasicBlock();
+  MF.appendCgBasicBlock(BB);
+
+  const auto &TII = MF.getTargetInstrInfo();
+  std::array<CgOperand, 2> TestOps1 = {
+      CgOperand::createRegOperand(X86::AX, false),
+      CgOperand::createRegOperand(X86::AX, false),
+  };
+  MF.createCgInstruction(*BB, TII.get(X86::TEST16rr), TestOps1);
+  std::array<CgOperand, 2> TestOps2 = {
+      CgOperand::createRegOperand(X86::AX, false),
+      CgOperand::createRegOperand(X86::AX, false),
+  };
+  MF.createCgInstruction(*BB, TII.get(X86::TEST16rr), TestOps2);
+
+  X86CgPeephole Peephole(MF);
+
+  ASSERT_EQ(std::distance(BB->begin(), BB->end()), 1);
+  EXPECT_EQ(BB->begin()->getOpcode(), X86::TEST16rr);
+}
+
+TEST(X86CgPeephole, KeepsNonRedundantTest16rr) {
+  CompileContext Context;
+  Context.initialize();
+
+  MModule Mod(Context);
+  MFunctionType *FuncType = createVoidFunctionType(Context);
+  Mod.addFuncType(FuncType);
+
+  MFunction MirFunc(Context, 0);
+  MirFunc.setFunctionType(FuncType);
+  CgFunction MF(Context, MirFunc);
+
+  CgBasicBlock *BB = MF.createCgBasicBlock();
+  MF.appendCgBasicBlock(BB);
+
+  const auto &TII = MF.getTargetInstrInfo();
+  std::array<CgOperand, 2> TestOps1 = {
+      CgOperand::createRegOperand(X86::AX, false),
+      CgOperand::createRegOperand(X86::AX, false),
+  };
+  MF.createCgInstruction(*BB, TII.get(X86::TEST16rr), TestOps1);
+  std::array<CgOperand, 2> TestOps2 = {
+      CgOperand::createRegOperand(X86::BX, false),
+      CgOperand::createRegOperand(X86::BX, false),
+  };
+  MF.createCgInstruction(*BB, TII.get(X86::TEST16rr), TestOps2);
+
+  X86CgPeephole Peephole(MF);
+
+  EXPECT_EQ(std::distance(BB->begin(), BB->end()), 2);
+}
+
+TEST(X86CgPeephole, ExecutionHarnessRemoveRedundantTest64rr) {
+#if !defined(__x86_64__) || !(defined(__GNUC__) || defined(__clang__))
+  GTEST_SKIP() << "execution harness requires x86_64 GNU-style inline asm";
+#else
+  const std::array<uint64_t, 8> EdgeValues = {
+      0ULL,
+      1ULL,
+      0x7fffffffffffffffULL,
+      0x8000000000000000ULL,
+      0xffffffffffffffffULL,
+      0xaaaaaaaaaaaaaaaaULL,
+      0x5555555555555555ULL,
+      0x00000000ffffffffULL,
+  };
+  const std::array<std::pair<uint64_t, uint64_t>, 4> FlagSeeds = {
+      std::pair<uint64_t, uint64_t>{0ULL, 0ULL},
+      std::pair<uint64_t, uint64_t>{0ULL, 1ULL},
+      std::pair<uint64_t, uint64_t>{1ULL, 0ULL},
+      std::pair<uint64_t, uint64_t>{0x8000000000000000ULL, 1ULL},
+  };
+  std::mt19937_64 Rng(0xBB112026ULL);
+
+  for (const auto &[FlagLhs, FlagRhs] : FlagSeeds) {
+    for (uint64_t Value : EdgeValues) {
+      const auto Original =
+          execOriginalRedundantTest64(Value, FlagLhs, FlagRhs);
+      const auto Rewritten =
+          execRewrittenRedundantTest64(Value, FlagLhs, FlagRhs);
+      EXPECT_EQ(Original.Value, Rewritten.Value)
+          << "value=" << Value << " flag_lhs=" << FlagLhs
+          << " flag_rhs=" << FlagRhs;
+      EXPECT_EQ(Original.Flags, Rewritten.Flags)
+          << "value=" << Value << " flag_lhs=" << FlagLhs
+          << " flag_rhs=" << FlagRhs;
+    }
+  }
+  for (int Iter = 0; Iter < 4000; ++Iter) {
+    const uint64_t Value = Rng();
+    const uint64_t FlagLhs = Rng();
+    const uint64_t FlagRhs = Rng();
+    const auto Original = execOriginalRedundantTest64(Value, FlagLhs, FlagRhs);
+    const auto Rewritten =
+        execRewrittenRedundantTest64(Value, FlagLhs, FlagRhs);
+    EXPECT_EQ(Original.Value, Rewritten.Value)
+        << "value=" << Value << " flag_lhs=" << FlagLhs
+        << " flag_rhs=" << FlagRhs;
+    EXPECT_EQ(Original.Flags, Rewritten.Flags)
+        << "value=" << Value << " flag_lhs=" << FlagLhs
+        << " flag_rhs=" << FlagRhs;
+  }
+#endif
+}
+
+TEST(X86CgPeephole, ExecutionHarnessRemoveRedundantTest32rr) {
+#if !defined(__x86_64__) || !(defined(__GNUC__) || defined(__clang__))
+  GTEST_SKIP() << "execution harness requires x86_64 GNU-style inline asm";
+#else
+  const std::array<uint32_t, 8> EdgeValues = {
+      0UL,          1UL,          0x7fffffffUL, 0x80000000UL,
+      0xffffffffUL, 0xaaaaaaaaUL, 0x55555555UL, 0x0000ffffUL,
+  };
+  const std::array<std::pair<uint64_t, uint64_t>, 4> FlagSeeds = {
+      std::pair<uint64_t, uint64_t>{0ULL, 0ULL},
+      std::pair<uint64_t, uint64_t>{0ULL, 1ULL},
+      std::pair<uint64_t, uint64_t>{1ULL, 0ULL},
+      std::pair<uint64_t, uint64_t>{0x8000000000000000ULL, 1ULL},
+  };
+  std::mt19937_64 Rng(0xCC122026ULL);
+
+  for (const auto &[FlagLhs, FlagRhs] : FlagSeeds) {
+    for (uint32_t Value : EdgeValues) {
+      const auto Original =
+          execOriginalRedundantTest32(Value, FlagLhs, FlagRhs);
+      const auto Rewritten =
+          execRewrittenRedundantTest32(Value, FlagLhs, FlagRhs);
+      EXPECT_EQ(Original.Value, Rewritten.Value)
+          << "value=" << Value << " flag_lhs=" << FlagLhs
+          << " flag_rhs=" << FlagRhs;
+      EXPECT_EQ(Original.Flags, Rewritten.Flags)
+          << "value=" << Value << " flag_lhs=" << FlagLhs
+          << " flag_rhs=" << FlagRhs;
+    }
+  }
+  for (int Iter = 0; Iter < 4000; ++Iter) {
+    const uint32_t Value = static_cast<uint32_t>(Rng());
+    const uint64_t FlagLhs = Rng();
+    const uint64_t FlagRhs = Rng();
+    const auto Original = execOriginalRedundantTest32(Value, FlagLhs, FlagRhs);
+    const auto Rewritten =
+        execRewrittenRedundantTest32(Value, FlagLhs, FlagRhs);
+    EXPECT_EQ(Original.Value, Rewritten.Value)
+        << "value=" << Value << " flag_lhs=" << FlagLhs
+        << " flag_rhs=" << FlagRhs;
+    EXPECT_EQ(Original.Flags, Rewritten.Flags)
+        << "value=" << Value << " flag_lhs=" << FlagLhs
+        << " flag_rhs=" << FlagRhs;
+  }
+#endif
+}
+
+TEST(X86CgPeephole, ExecutionHarnessRemoveRedundantTestrr) {
+#if !defined(__x86_64__) || !(defined(__GNUC__) || defined(__clang__))
+  GTEST_SKIP() << "execution harness requires x86_64 GNU-style inline asm";
+#else
+  const std::array<uint8_t, 6> EdgeValues = {
+      0, 1, 0x7f, 0x80, 0xff, 0xaa,
+  };
+  const std::array<std::pair<uint64_t, uint64_t>, 4> FlagSeeds = {
+      std::pair<uint64_t, uint64_t>{0ULL, 0ULL},
+      std::pair<uint64_t, uint64_t>{0ULL, 1ULL},
+      std::pair<uint64_t, uint64_t>{1ULL, 0ULL},
+      std::pair<uint64_t, uint64_t>{0x8000000000000000ULL, 1ULL},
+  };
+  std::mt19937_64 Rng(0xDD132026ULL);
+
+  for (const auto &[FlagLhs, FlagRhs] : FlagSeeds) {
+    for (uint8_t Value : EdgeValues) {
+      const auto Original = execOriginalRedundantTest8(Value, FlagLhs, FlagRhs);
+      const auto Rewritten =
+          execRewrittenRedundantTest8(Value, FlagLhs, FlagRhs);
+      EXPECT_EQ(Original.Value, Rewritten.Value)
+          << "value=" << static_cast<int>(Value) << " flag_lhs=" << FlagLhs
+          << " flag_rhs=" << FlagRhs;
+      EXPECT_EQ(Original.Flags, Rewritten.Flags)
+          << "value=" << static_cast<int>(Value) << " flag_lhs=" << FlagLhs
+          << " flag_rhs=" << FlagRhs;
+    }
+  }
+  for (int Iter = 0; Iter < 4000; ++Iter) {
+    const uint8_t Value = static_cast<uint8_t>(Rng());
+    const uint64_t FlagLhs = Rng();
+    const uint64_t FlagRhs = Rng();
+    const auto Original = execOriginalRedundantTest8(Value, FlagLhs, FlagRhs);
+    const auto Rewritten = execRewrittenRedundantTest8(Value, FlagLhs, FlagRhs);
+    EXPECT_EQ(Original.Value, Rewritten.Value)
+        << "value=" << static_cast<int>(Value) << " flag_lhs=" << FlagLhs
+        << " flag_rhs=" << FlagRhs;
+    EXPECT_EQ(Original.Flags, Rewritten.Flags)
+        << "value=" << static_cast<int>(Value) << " flag_lhs=" << FlagLhs
+        << " flag_rhs=" << FlagRhs;
+  }
+#endif
+}
+
+#if defined(__x86_64__) && (defined(__GNUC__) || defined(__clang__))
+static uint64_t execOriginalMovzxSubreg(uint64_t Input) {
+  uint64_t Out;
+  uint8_t In8 = static_cast<uint8_t>(Input);
+  asm volatile("movzbl %[in], %%eax\n\t"
+               "movq %%rax, %[out]\n\t"
+               : [out] "=r"(Out)
+               : [in] "q"(In8)
+               : "rax");
+  return Out;
+}
+
+static uint64_t execRewrittenMovzxSubreg(uint64_t Input) {
+  uint64_t Out;
+  uint8_t In8 = static_cast<uint8_t>(Input);
+  asm volatile("movzbq %[in], %%rax\n\t"
+               "movq %%rax, %[out]\n\t"
+               : [out] "=r"(Out)
+               : [in] "q"(In8)
+               : "rax");
+  return Out;
+}
+#endif
+
+TEST(X86CgPeephole, FoldsMovzxSubregToReg) {
+  CompileContext Context;
+  Context.initialize();
+
+  MModule Mod(Context);
+  MFunctionType *FuncType = createVoidFunctionType(Context);
+  Mod.addFuncType(FuncType);
+
+  MFunction MirFunc(Context, 0);
+  MirFunc.setFunctionType(FuncType);
+  CgFunction MF(Context, MirFunc);
+
+  CgBasicBlock *BB = MF.createCgBasicBlock();
+  MF.appendCgBasicBlock(BB);
+
+  const auto &TII = MF.getTargetInstrInfo();
+  std::array<CgOperand, 2> MovzxOps = {
+      CgOperand::createRegOperand(X86::EAX, true),
+      CgOperand::createRegOperand(X86::AL, false),
+  };
+  MF.createCgInstruction(*BB, TII.get(X86::MOVZX32rr8), MovzxOps);
+
+  std::array<CgOperand, 4> SubregOps = {
+      CgOperand::createRegOperand(X86::RAX, true),
+      CgOperand::createImmOperand(0),
+      CgOperand::createRegOperand(X86::EAX, false),
+      CgOperand::createImmOperand(6), // sub_32bit
+  };
+  MF.createCgInstruction(*BB, TII.get(TargetOpcode::SUBREG_TO_REG), SubregOps);
+
+  X86CgPeephole Peephole(MF);
+
+  ASSERT_EQ(std::distance(BB->begin(), BB->end()), 1);
+  auto It = BB->begin();
+  EXPECT_EQ(It->getOpcode(), X86::MOVZX64rr8);
+  EXPECT_EQ(It->getOperand(0).getReg(), X86::RAX);
+}
+
+TEST(X86CgPeephole, KeepsMovzxSubregToRegWhenMismatch) {
+  CompileContext Context;
+  Context.initialize();
+
+  MModule Mod(Context);
+  MFunctionType *FuncType = createVoidFunctionType(Context);
+  Mod.addFuncType(FuncType);
+
+  MFunction MirFunc(Context, 0);
+  MirFunc.setFunctionType(FuncType);
+  CgFunction MF(Context, MirFunc);
+
+  CgBasicBlock *BB = MF.createCgBasicBlock();
+  MF.appendCgBasicBlock(BB);
+
+  const auto &TII = MF.getTargetInstrInfo();
+  // MOVZX32rr8 defines EAX, but SUBREG_TO_REG uses EBX - mismatch, no fold.
+  std::array<CgOperand, 2> MovzxOps = {
+      CgOperand::createRegOperand(X86::EAX, true),
+      CgOperand::createRegOperand(X86::AL, false),
+  };
+  MF.createCgInstruction(*BB, TII.get(X86::MOVZX32rr8), MovzxOps);
+
+  std::array<CgOperand, 4> SubregOps = {
+      CgOperand::createRegOperand(X86::RBX, true),
+      CgOperand::createImmOperand(0),
+      CgOperand::createRegOperand(X86::EBX, false),
+      CgOperand::createImmOperand(6), // sub_32bit
+  };
+  MF.createCgInstruction(*BB, TII.get(TargetOpcode::SUBREG_TO_REG), SubregOps);
+
+  X86CgPeephole Peephole(MF);
+
+  EXPECT_EQ(std::distance(BB->begin(), BB->end()), 2);
+}
+
+TEST(X86CgPeephole, ExecutionHarnessFoldMovzxSubregToReg) {
+#if !defined(__x86_64__) || !(defined(__GNUC__) || defined(__clang__))
+  GTEST_SKIP() << "execution harness requires x86_64 GNU-style inline asm";
+#else
+  const std::array<uint8_t, 6> EdgeValues = {0, 1, 0x7f, 0x80, 0xff, 0xaa};
+  std::mt19937_64 Rng(0xEE442026ULL);
+
+  for (uint8_t Value : EdgeValues) {
+    EXPECT_EQ(execOriginalMovzxSubreg(Value), execRewrittenMovzxSubreg(Value))
+        << "value=" << static_cast<int>(Value);
+  }
+  for (int Iter = 0; Iter < 16; ++Iter) {
+    const uint8_t Value = static_cast<uint8_t>(Rng());
+    EXPECT_EQ(execOriginalMovzxSubreg(Value), execRewrittenMovzxSubreg(Value))
+        << "value=" << static_cast<int>(Value);
+  }
+#endif
+}
+
+} // namespace
diff --git a/tests/evm_asm/bool_and_or_xor_not.easm b/tests/evm_asm/bool_and_or_xor_not.easm
new file mode 100644
index 000000000..330f0e175
--- /dev/null
+++ b/tests/evm_asm/bool_and_or_xor_not.easm
@@ -0,0 +1,14 @@
+// Boolean chain: NOT(XOR(OR(AND(0xFF, 0x0F), 0xF0), 0x55)) = 0xFF...FF55
+PUSH1 0xFF
+PUSH1 0x0F
+AND
+PUSH1 0xF0
+OR
+PUSH1 0x55
+XOR
+NOT
+PUSH1 0x00
+MSTORE
+PUSH1 0x20
+PUSH1 0x00
+RETURN
diff --git a/tests/evm_asm/bool_and_or_xor_not.expected b/tests/evm_asm/bool_and_or_xor_not.expected
new file mode 100644
index 000000000..9b3d2ca77
--- /dev/null
+++ b/tests/evm_asm/bool_and_or_xor_not.expected
@@ -0,0 +1,8 @@
+status: success
+error_code: 0
+stack: []
+memory: 'FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF55'
+storage: {}
+transient_storage: {}
+return: 'FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF55'
+events: []
diff --git a/tests/evm_asm/bool_xor_not_chain.easm b/tests/evm_asm/bool_xor_not_chain.easm
new file mode 100644
index 000000000..463e829a9
--- /dev/null
+++ b/tests/evm_asm/bool_xor_not_chain.easm
@@ -0,0 +1,11 @@
+// Boolean chain: NOT(XOR(NOT(0xAA), 0x55)) = 0x55
+PUSH1 0xAA
+NOT
+PUSH1 0x55
+XOR
+NOT
+PUSH1 0x00
+MSTORE
+PUSH1 0x20
+PUSH1 0x00
+RETURN
diff --git a/tests/evm_asm/bool_xor_not_chain.expected b/tests/evm_asm/bool_xor_not_chain.expected
new file mode 100644
index 000000000..3edad12fb
--- /dev/null
+++ b/tests/evm_asm/bool_xor_not_chain.expected
@@ -0,0 +1,8 @@
+status: success
+error_code: 0
+stack: []
+memory: '00000000000000000000000000000000000000000000000000000000000000FF'
+storage: {}
+transient_storage: {}
+return: '00000000000000000000000000000000000000000000000000000000000000FF'
+events: []
diff --git a/tests/evm_asm/compiler_pass_timing_budget_dmir_rewrite.json b/tests/evm_asm/compiler_pass_timing_budget_dmir_rewrite.json
new file mode 100644
index 000000000..e0ee63d79
--- /dev/null
+++ b/tests/evm_asm/compiler_pass_timing_budget_dmir_rewrite.json
@@ -0,0 +1,41 @@
+{
+  "version": 1,
+  "target_pass": "dmir_rewrite",
+  "thresholds": {
+    "max_pass_share_p95_pct": 1.25,
+    "max_pass_time_p95_ms": 0.028,
+    "max_overall_total_time_regression_pct": 5.0,
+    "max_case_total_time_regression_pct": 20.0
+  },
+  "baseline": {
+    "overall_total_time_ms_median": 0.85175,
+    "case_total_time_ms_median": {
+      "add": 0.864481,
+      "mul": 0.903338,
+      "div": 0.854079,
+      "shl": 0.841816,
+      "shr": 0.820848,
+      "sar": 0.827417,
+      "byte": 0.880214,
+      "eq_true": 0.844935,
+      "lt_true": 0.870578,
+      "jump": 0.873635,
+      "u256_shl_add_mul": 0.839147,
+      "u256_mul_add_chain": 0.861729,
+      "u256_shr_add_shl": 0.844389,
+      "bool_and_or_xor_not": 0.848925,
+      "bool_xor_not_chain": 0.847343
+    }
+  },
+  "metadata": {
+    "manifest": "tests/evm_asm/compiler_pass_timing_manifest.json",
+    "runs": 5,
+    "num_extra_compilations": 4,
+    "rule_count": 70,
+    "compile_mode": "compile-only",
+    "thresholds_status": "provisional",
+    "measured_p95_ms": 0.013796,
+    "measured_p95_share_pct": 0.5947,
+    "threshold_multiplier": 2.0
+  }
+}
diff --git a/tests/evm_asm/compiler_pass_timing_budget_x86_cg_peephole.json b/tests/evm_asm/compiler_pass_timing_budget_x86_cg_peephole.json
new file mode 100644
index 000000000..f8d750257
--- /dev/null
+++ b/tests/evm_asm/compiler_pass_timing_budget_x86_cg_peephole.json
@@ -0,0 +1,38 @@
+{
+  "version": 1,
+  "target_pass": "x86_cg_peephole",
+  "thresholds": {
+    "max_overall_total_time_regression_pct": 5.0,
+    "max_case_total_time_regression_pct": 20.0,
+    "max_pass_share_p95_pct": 2.0,
+    "max_pass_time_p95_ms": 0.06
+  },
+  "baseline": {
+    "overall_total_time_ms_median": 0.815081,
+    "case_total_time_ms_median": {
+      "add": 0.90462,
+      "mul": 0.816997,
+      "div": 0.768571,
+      "shl": 0.758534,
+      "shr": 0.770613,
+      "sar": 0.750282,
+      "byte": 0.794542,
+      "eq_true": 0.839906,
+      "lt_true": 0.752717,
+      "jump": 0.854454,
+      "u256_shl_add_mul": 0.880501,
+      "u256_mul_add_chain": 0.818078,
+      "u256_shr_add_shl": 0.849664,
+      "bool_and_or_xor_not": 0.869076,
+      "bool_xor_not_chain": 0.823643
+    }
+  },
+  "metadata": {
+    "manifest": "tests/evm_asm/compiler_pass_timing_manifest.json",
+    "runs": 5,
+    "num_extra_compilations": 4,
+    "compile_mode": "compile-only",
+    "rule_count": 8,
+    "thresholds_status": "provisional"
+  }
+}
diff --git a/tests/evm_asm/compiler_pass_timing_manifest.json b/tests/evm_asm/compiler_pass_timing_manifest.json
new file mode 100644
index 000000000..7e45d4865
--- /dev/null
+++ b/tests/evm_asm/compiler_pass_timing_manifest.json
@@ -0,0 +1,65 @@
+{
+  "version": 1,
+  "cases": [
+    {
+      "name": "add",
+      "input": "add.evm.hex"
+    },
+    {
+      "name": "mul",
+      "input": "mul.evm.hex"
+    },
+    {
+      "name": "div",
+      "input": "div.evm.hex"
+    },
+    {
+      "name": "shl",
+      "input": "shl.evm.hex"
+    },
+    {
+      "name": "shr",
+      "input": "shr.evm.hex"
+    },
+    {
+      "name": "sar",
+      "input": "sar.evm.hex"
+    },
+    {
+      "name": "byte",
+      "input": "byte.evm.hex"
+    },
+    {
+      "name": "eq_true",
+      "input": "eq_true.evm.hex"
+    },
+    {
+      "name": "lt_true",
+      "input": "lt_true.evm.hex"
+    },
+    {
+      "name": "jump",
+      "input": "jump.evm.hex"
+    },
+    {
+      "name": "u256_shl_add_mul",
+      "input": "u256_shl_add_mul.evm.hex"
+    },
+    {
+      "name": "u256_mul_add_chain",
+      "input": "u256_mul_add_chain.evm.hex"
+    },
+    {
+      "name": "u256_shr_add_shl",
+      "input": "u256_shr_add_shl.evm.hex"
+    },
+    {
+      "name": "bool_and_or_xor_not",
+      "input": "bool_and_or_xor_not.evm.hex"
+    },
+    {
+      "name": "bool_xor_not_chain",
+      "input": "bool_xor_not_chain.evm.hex"
+    }
+  ]
+}
diff --git a/tests/evm_asm/sar.easm b/tests/evm_asm/sar.easm
new file mode 100644
index 000000000..688bc0744
--- /dev/null
+++ b/tests/evm_asm/sar.easm
@@ -0,0 +1,9 @@
+// SAR(shift=2, value=-8) = -2 (as U256: 0xFFF...FFE)
+PUSH32 0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF8
+PUSH1 0x02
+SAR
+PUSH1 0x00
+MSTORE
+PUSH1 0x20
+PUSH1 0x00
+RETURN
diff --git a/tests/evm_asm/sar.expected b/tests/evm_asm/sar.expected
new file mode 100644
index 000000000..ebacdd6cf
--- /dev/null
+++ b/tests/evm_asm/sar.expected
@@ -0,0 +1,8 @@
+status: success
+error_code: 0
+stack: []
+memory: 'FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFE'
+storage: {}
+transient_storage: {}
+return: 'FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFE'
+events: []
diff --git a/tests/evm_asm/shl.easm b/tests/evm_asm/shl.easm
new file mode 100644
index 000000000..959206447
--- /dev/null
+++ b/tests/evm_asm/shl.easm
@@ -0,0 +1,9 @@
+// SHL(shift=3, value=1) = 8
+PUSH1 0x01
+PUSH1 0x03
+SHL
+PUSH1 0x00
+MSTORE
+PUSH1 0x20
+PUSH1 0x00
+RETURN
diff --git a/tests/evm_asm/shl.expected b/tests/evm_asm/shl.expected
new file mode 100644
index 000000000..5fea867ed
--- /dev/null
+++ b/tests/evm_asm/shl.expected
@@ -0,0 +1,8 @@
+status: success
+error_code: 0
+stack: []
+memory: '0000000000000000000000000000000000000000000000000000000000000008'
+storage: {}
+transient_storage: {}
+return: '0000000000000000000000000000000000000000000000000000000000000008'
+events: []
diff --git a/tests/evm_asm/shr.easm b/tests/evm_asm/shr.easm
new file mode 100644
index 000000000..676995db5
--- /dev/null
+++ b/tests/evm_asm/shr.easm
@@ -0,0 +1,9 @@
+// SHR(shift=8, value=2) = 0
+PUSH1 0x02
+PUSH1 0x08
+SHR
+PUSH1 0x00
+MSTORE
+PUSH1 0x20
+PUSH1 0x00
+RETURN
diff --git a/tests/evm_asm/shr.expected b/tests/evm_asm/shr.expected
new file mode 100644
index 000000000..481e245e2
--- /dev/null
+++ b/tests/evm_asm/shr.expected
@@ -0,0 +1,8 @@
+status: success
+error_code: 0
+stack: []
+memory: '0000000000000000000000000000000000000000000000000000000000000000'
+storage: {}
+transient_storage: {}
+return: '0000000000000000000000000000000000000000000000000000000000000000'
+events: []
diff --git a/tests/evm_asm/u256_mul_add_chain.easm b/tests/evm_asm/u256_mul_add_chain.easm
new file mode 100644
index 000000000..e22058292
--- /dev/null
+++ b/tests/evm_asm/u256_mul_add_chain.easm
@@ -0,0 +1,13 @@
+// U256 chain: (2 MUL 3) MUL 4 ADD 8 = 32
+PUSH1 0x03
+PUSH1 0x02
+MUL
+PUSH1 0x04
+MUL
+PUSH1 0x08
+ADD
+PUSH1 0x00
+MSTORE
+PUSH1 0x20
+PUSH1 0x00
+RETURN
diff --git a/tests/evm_asm/u256_mul_add_chain.expected b/tests/evm_asm/u256_mul_add_chain.expected
new file mode 100644
index 000000000..c667a6ef3
--- /dev/null
+++ b/tests/evm_asm/u256_mul_add_chain.expected
@@ -0,0 +1,8 @@
+status: success
+error_code: 0
+stack: []
+memory: '0000000000000000000000000000000000000000000000000000000000000020'
+storage: {}
+transient_storage: {}
+return: '0000000000000000000000000000000000000000000000000000000000000020'
+events: []
diff --git a/tests/evm_asm/u256_shl_add_mul.easm b/tests/evm_asm/u256_shl_add_mul.easm
new file mode 100644
index 000000000..ac8650751
--- /dev/null
+++ b/tests/evm_asm/u256_shl_add_mul.easm
@@ -0,0 +1,13 @@
+// U256 arithmetic chain: ((1 SHL 2) ADD 3) MUL 4 = 28
+PUSH1 0x02
+PUSH1 0x01
+SHL
+PUSH1 0x03
+ADD
+PUSH1 0x04
+MUL
+PUSH1 0x00
+MSTORE
+PUSH1 0x20
+PUSH1 0x00
+RETURN
diff --git a/tests/evm_asm/u256_shl_add_mul.expected b/tests/evm_asm/u256_shl_add_mul.expected
new file mode 100644
index 000000000..cb4508831
--- /dev/null
+++ b/tests/evm_asm/u256_shl_add_mul.expected
@@ -0,0 +1,8 @@
+status: success
+error_code: 0
+stack: []
+memory: '000000000000000000000000000000000000000000000000000000000000001C'
+storage: {}
+transient_storage: {}
+return: '000000000000000000000000000000000000000000000000000000000000001C'
+events: []
diff --git a/tests/evm_asm/u256_shr_add_shl.easm b/tests/evm_asm/u256_shr_add_shl.easm
new file mode 100644
index 000000000..eda45e6b7
--- /dev/null
+++ b/tests/evm_asm/u256_shr_add_shl.easm
@@ -0,0 +1,15 @@
+// U256 shift chain: ((8 SHR 2) ADD 3) SHL 1 = 10
+// SWAP1 before SHL reorders stack so value (5) is the shift target
+PUSH1 0x02
+PUSH1 0x08
+SHR
+PUSH1 0x03
+ADD
+PUSH1 0x01
+SWAP1
+SHL
+PUSH1 0x00
+MSTORE
+PUSH1 0x20
+PUSH1 0x00
+RETURN
diff --git a/tests/evm_asm/u256_shr_add_shl.expected b/tests/evm_asm/u256_shr_add_shl.expected
new file mode 100644
index 000000000..5fea867ed
--- /dev/null
+++ b/tests/evm_asm/u256_shr_add_shl.expected
@@ -0,0 +1,8 @@
+status: success
+error_code: 0
+stack: []
+memory: '0000000000000000000000000000000000000000000000000000000000000008'
+storage: {}
+transient_storage: {}
+return: '0000000000000000000000000000000000000000000000000000000000000008'
+events: []
diff --git a/tools/check_compiler_pass_timing_budget.py b/tools/check_compiler_pass_timing_budget.py
new file mode 100644
index 000000000..bf8c354e6
--- /dev/null
+++ b/tools/check_compiler_pass_timing_budget.py
@@ -0,0 +1,122 @@
+#!/usr/bin/env python3
+
+import argparse
+import json
+import pathlib
+import sys
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description="Validate compiler pass timing output against a budget file."
+    )
+    parser.add_argument("--budget", required=True, help="Budget JSON path")
+    parser.add_argument("--report", required=True, help="Timing report JSON path")
+    parser.add_argument(
+        "--allow-missing-cases",
+        action="store_true",
+        help="Skip case-level checks when a baseline case is absent in the report",
+    )
+    return parser.parse_args()
+
+
+def load_json(path):
+    with pathlib.Path(path).open("r", encoding="utf-8") as f:
+        return json.load(f)
+
+
+def percent_regression(current, baseline):
+    if baseline <= 0:
+        return 0.0 if current <= 0 else float("inf")
+    return (current - baseline) * 100.0 / baseline
+
+
+def get_report_scope(report):
+    if "overall" in report:
+        return report["overall"], {
+            case["name"]: case["summary"] for case in report.get("cases", [])
+        }
+    return report, {}
+
+
+def get_threshold(thresholds, new_key, old_key):
+    if new_key in thresholds:
+        return thresholds[new_key]
+    return thresholds[old_key]
+
+
+def main():
+    args = parse_args()
+    budget = load_json(args.budget)
+    report = load_json(args.report)
+
+    summary, case_summaries = get_report_scope(report)
+    target_pass = budget["target_pass"]
+    thresholds = budget["thresholds"]
+    baseline = budget.get("baseline", {})
+    errors = []
+
+    pass_summary = summary.get("phases", {}).get(target_pass)
+    if pass_summary is None:
+        errors.append(f"report is missing target pass '{target_pass}'")
+    else:
+        observed_share = pass_summary["share_of_total_pct"].get(
+            "p95", pass_summary["share_of_total_pct"]["max"]
+        )
+        max_share = get_threshold(
+            thresholds, "max_pass_share_p95_pct", "max_pass_share_of_total_pct"
+        )
+        if observed_share > max_share:
+            errors.append(
+                f"{target_pass} share p95 {observed_share:.6f}% exceeds budget "
+                f"{max_share:.6f}%"
+            )
+
+        observed_time = pass_summary.get("p95", pass_summary["max"])
+        max_time = get_threshold(
+            thresholds, "max_pass_time_p95_ms", "max_pass_time_ms"
+        )
+        if observed_time > max_time:
+            errors.append(
+                f"{target_pass} p95 time {observed_time:.6f} ms exceeds budget "
+                f"{max_time:.6f} ms"
+            )
+
+    baseline_overall = baseline.get("overall_total_time_ms_median")
+    if baseline_overall is not None:
+        observed_overall = summary["total_time_ms"]["median"]
+        regression = percent_regression(observed_overall, baseline_overall)
+        max_regression = thresholds["max_overall_total_time_regression_pct"]
+        if regression > max_regression:
+            errors.append(
+                "overall median compile time regression "
+                f"{regression:.6f}% exceeds budget {max_regression:.6f}%"
+            )
+
+    max_case_regression = thresholds.get("max_case_total_time_regression_pct")
+    for case_name, baseline_value in baseline.get("case_total_time_ms_median", {}).items():
+        current_case = case_summaries.get(case_name)
+        if current_case is None:
+            if not args.allow_missing_cases:
+                errors.append(f"report is missing baseline case '{case_name}'")
+            continue
+        regression = percent_regression(
+            current_case["total_time_ms"]["median"], baseline_value
+        )
+        if regression > max_case_regression:
+            errors.append(
+                f"case '{case_name}' median compile time regression {regression:.6f}% "
+                f"exceeds budget {max_case_regression:.6f}%"
+            )
+
+    if errors:
+        for error in errors:
+            print(error, file=sys.stderr)
+        return 1
+
+    print("compiler pass timing budget check passed")
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/tools/check_dmir_rewrite_rules.py b/tools/check_dmir_rewrite_rules.py
new file mode 100644
index 000000000..c1e1c1100
--- /dev/null
+++ b/tools/check_dmir_rewrite_rules.py
@@ -0,0 +1,193 @@
+#!/usr/bin/env python3
+
+import argparse
+import json
+import pathlib
+import subprocess
+import sys
+
+from mine_dmir_seed_rules import build_candidate_key, parse_expr
+
+
+ALLOWED_RULE_STATUSES = {
+    "seed",
+    "candidate",
+    "accepted",
+}
+
+ALLOWED_VALIDATION_MODES = {
+    "interpreter_sample",
+    "interpreter_fuzz",
+    "smt",
+}
+
+COST_FIELDS = (
+    "dmir_inst",
+    "select_depth",
+    "adc_chain",
+    "runtime_calls",
+)
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description="Validate dMIR rewrite candidate metadata."
+    )
+    parser.add_argument("--rules", required=True, help="Path to the rule JSON file")
+    parser.add_argument(
+        "--gtest-binary",
+        help="Optional gtest binary used to verify coverage entries exist",
+    )
+    return parser.parse_args()
+
+
+def load_rules(path):
+    with pathlib.Path(path).open("r", encoding="utf-8") as f:
+        return json.load(f)
+
+
+def load_gtest_names(path):
+    proc = subprocess.run(
+        [str(pathlib.Path(path).resolve()), "--gtest_list_tests"],
+        capture_output=True,
+        text=True,
+        check=False,
+    )
+    if proc.returncode != 0:
+        raise RuntimeError(f"failed to list gtests from {path}")
+
+    names = set()
+    suite_name = None
+    for line in proc.stdout.splitlines():
+        if not line.strip():
+            continue
+        if not line.startswith("  "):
+            suite_name = line.strip().rstrip(".")
+            continue
+        if suite_name is None:
+            continue
+        test_name = line.strip().split()[0]
+        test_name = test_name.split("#", 1)[0]
+        names.add(f"{suite_name}.{test_name}")
+    return names
+
+
+def validate_cost(name, cost, errors):
+    if not isinstance(cost, dict):
+        errors.append(f"rule '{name}' has invalid cost metadata")
+        return
+
+    for section in ("lhs", "rhs", "delta"):
+        section_cost = cost.get(section)
+        if not isinstance(section_cost, dict):
+            errors.append(f"rule '{name}' is missing cost section '{section}'")
+            continue
+        for field in COST_FIELDS:
+            value = section_cost.get(field)
+            if not isinstance(value, int):
+                errors.append(
+                    f"rule '{name}' has non-integer cost field '{section}.{field}'"
+                )
+
+
+def main():
+    args = parse_args()
+    data = load_rules(args.rules)
+    errors = []
+    seen_names = set()
+    seen_rule_keys = {}
+    mode_counts = {mode: 0 for mode in ALLOWED_VALIDATION_MODES}
+    gtest_names = load_gtest_names(args.gtest_binary) if args.gtest_binary else None
+
+    for rule in data.get("rules", []):
+        name = rule.get("name", "<unnamed>")
+        if name in seen_names:
+            errors.append(f"duplicate dMIR rule name '{name}'")
+            continue
+        seen_names.add(name)
+
+        status = rule.get("status")
+        if status not in ALLOWED_RULE_STATUSES:
+            errors.append(f"rule '{name}' has invalid status '{status}'")
+
+        inputs = rule.get("inputs")
+        if not isinstance(inputs, list) or not inputs or any(
+            not isinstance(item, str) or not item.strip() for item in inputs
+        ):
+            errors.append(f"rule '{name}' has invalid inputs metadata")
+        elif len(set(inputs)) != len(inputs):
+            errors.append(f"rule '{name}' repeats input bindings")
+
+        for field in ("lhs", "rhs"):
+            value = rule.get(field)
+            if not isinstance(value, str) or not value.strip():
+                errors.append(f"rule '{name}' is missing '{field}'")
+
+        lhs = rule.get("lhs")
+        rhs = rule.get("rhs")
+        if isinstance(lhs, str) and lhs.strip() and isinstance(rhs, str) and rhs.strip():
+            try:
+                canonical_key = build_candidate_key(parse_expr(lhs), parse_expr(rhs))
+            except ValueError as exc:
+                errors.append(f"rule '{name}' has invalid expression syntax: {exc}")
+            else:
+                existing_name = seen_rule_keys.get(canonical_key)
+                if existing_name is not None:
+                    errors.append(
+                        "rule "
+                        f"'{name}' duplicates canonical rewrite '{existing_name}'"
+                    )
+                else:
+                    seen_rule_keys[canonical_key] = name
+
+        validate_cost(name, rule.get("cost"), errors)
+
+        validation = rule.get("validation")
+        if not isinstance(validation, dict):
+            errors.append(f"rule '{name}' is missing validation metadata")
+            continue
+
+        modes = validation.get("modes")
+        if not isinstance(modes, list) or not modes:
+            errors.append(f"rule '{name}' has no validation modes")
+        else:
+            has_semantic_mode = False
+            for mode in modes:
+                if mode not in ALLOWED_VALIDATION_MODES:
+                    errors.append(
+                        f"rule '{name}' uses unknown validation mode '{mode}'"
+                    )
+                    continue
+                mode_counts[mode] += 1
+                if mode in {"interpreter_fuzz", "smt"}:
+                    has_semantic_mode = True
+            if not has_semantic_mode:
+                errors.append(
+                    f"rule '{name}' needs interpreter_fuzz or smt validation"
+                )
+
+        coverage = validation.get("coverage")
+        if not isinstance(coverage, list) or not coverage:
+            errors.append(f"rule '{name}' has no validation coverage entries")
+        else:
+            for entry in coverage:
+                if not isinstance(entry, str) or not entry.strip():
+                    errors.append(f"rule '{name}' has an invalid coverage entry")
+                elif gtest_names is not None and entry not in gtest_names:
+                    errors.append(
+                        f"rule '{name}' references missing gtest coverage '{entry}'"
+                    )
+
+    if errors:
+        for error in errors:
+            print(error, file=sys.stderr)
+        return 1
+
+    print("dmir rewrite rule metadata is complete")
+    for mode in sorted(mode_counts):
+        print(f"{mode}: {mode_counts[mode]}")
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/tools/check_x86_cg_peephole_validation.py b/tools/check_x86_cg_peephole_validation.py
new file mode 100644
index 000000000..91c792258
--- /dev/null
+++ b/tools/check_x86_cg_peephole_validation.py
@@ -0,0 +1,119 @@
+#!/usr/bin/env python3
+
+import argparse
+import json
+import pathlib
+import subprocess
+import sys
+
+
+ALLOWED_VALIDATION_MODES = {
+    "structural",
+    "semantics_model",
+    "execution",
+}
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description="Validate x86 peephole rule validation metadata."
+    )
+    parser.add_argument("--rules", required=True, help="Path to the rule JSON file")
+    parser.add_argument(
+        "--gtest-binary",
+        help="Optional gtest binary used to verify coverage entries exist",
+    )
+    return parser.parse_args()
+
+
+def load_rules(path):
+    with pathlib.Path(path).open("r", encoding="utf-8") as f:
+        return json.load(f)
+
+
+def load_gtest_names(path):
+    proc = subprocess.run(
+        [str(pathlib.Path(path).resolve()), "--gtest_list_tests"],
+        capture_output=True,
+        text=True,
+        check=False,
+    )
+    if proc.returncode != 0:
+        raise RuntimeError(f"failed to list gtests from {path}")
+
+    names = set()
+    suite_name = None
+    for line in proc.stdout.splitlines():
+        if not line.strip():
+            continue
+        if not line.startswith("  "):
+            suite_name = line.strip().rstrip(".")
+            continue
+        if suite_name is None:
+            continue
+        test_name = line.strip().split()[0]
+        test_name = test_name.split("#", 1)[0]
+        names.add(f"{suite_name}.{test_name}")
+    return names
+
+
+def main():
+    args = parse_args()
+    data = load_rules(args.rules)
+    errors = []
+    mode_counts = {mode: 0 for mode in ALLOWED_VALIDATION_MODES}
+    gtest_names = None
+    if args.gtest_binary:
+        gtest_names = load_gtest_names(args.gtest_binary)
+
+    for rule in data.get("rules", []):
+        name = rule.get("name", "<unnamed>")
+        validation = rule.get("validation")
+        if validation is None:
+            errors.append(f"rule '{name}' is missing validation metadata")
+            continue
+
+        modes = validation.get("modes")
+        if not isinstance(modes, list) or not modes:
+            errors.append(f"rule '{name}' has no validation modes")
+        else:
+            has_non_structural_mode = False
+            for mode in modes:
+                if mode not in ALLOWED_VALIDATION_MODES:
+                    errors.append(
+                        f"rule '{name}' uses unknown validation mode '{mode}'"
+                    )
+                else:
+                    mode_counts[mode] += 1
+                    if mode != "structural":
+                        has_non_structural_mode = True
+            if rule.get("stage") == "instruction" and not has_non_structural_mode:
+                errors.append(
+                    f"rule '{name}' needs execution or semantics_model validation"
+                )
+
+        coverage = validation.get("coverage")
+        if not isinstance(coverage, list) or not coverage:
+            errors.append(f"rule '{name}' has no validation coverage entries")
+        else:
+            for entry in coverage:
+                if not isinstance(entry, str) or not entry.strip():
+                    errors.append(f"rule '{name}' has an invalid coverage entry")
+                elif gtest_names is not None and entry not in gtest_names:
+                    errors.append(
+                        f"rule '{name}' references missing gtest coverage '{entry}'"
+                    )
+
+    if errors:
+        for error in errors:
+            print(error, file=sys.stderr)
+        return 1
+
+    print("x86 cg peephole validation metadata is complete")
+    for mode in sorted(mode_counts):
+        print(f"{mode}: {mode_counts[mode]}")
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/tools/collect_compiler_pass_timings.py b/tools/collect_compiler_pass_timings.py
new file mode 100644
index 000000000..9d4b1dead
--- /dev/null
+++ b/tools/collect_compiler_pass_timings.py
@@ -0,0 +1,239 @@
+#!/usr/bin/env python3
+
+import argparse
+import json
+import os
+import pathlib
+import statistics
+import subprocess
+import sys
+import tempfile
+from collections import defaultdict
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description="Run dtvm with compiler pass timing enabled and aggregate the JSON output."
+    )
+    parser.add_argument("--dtvm", required=True, help="Path to the dtvm executable")
+    input_group = parser.add_mutually_exclusive_group(required=True)
+    input_group.add_argument("--input", help="Input EVM file to compile")
+    input_group.add_argument(
+        "--manifest",
+        help="JSON manifest that lists multiple benchmark inputs",
+    )
+    parser.add_argument("--runs", type=int, default=1, help="Number of process runs")
+    parser.add_argument(
+        "--case",
+        dest="cases",
+        action="append",
+        default=[],
+        help="Optional case name filter when --manifest is used",
+    )
+    parser.add_argument(
+        "--output",
+        help="Optional path to save the aggregated timing summary as JSON",
+    )
+    parser.add_argument(
+        "--allow-nonzero",
+        action="store_true",
+        help="Keep timings when dtvm exits non-zero but still writes a timing file",
+    )
+    parser.add_argument(
+        "dtvm_args",
+        nargs=argparse.REMAINDER,
+        help="Extra arguments passed to dtvm after '--'",
+    )
+    return parser.parse_args()
+
+
+def load_records(path: pathlib.Path):
+    with path.open("r", encoding="utf-8") as f:
+        data = json.load(f)
+    return data.get("records", [])
+
+
+def build_stats(values):
+    if not values:
+        return {
+            "mean": 0.0,
+            "median": 0.0,
+            "p95": 0.0,
+            "min": 0.0,
+            "max": 0.0,
+        }
+    ordered = sorted(values)
+    p95_index = max(0, (len(ordered) * 95 + 99) // 100 - 1)
+    return {
+        "mean": statistics.fmean(values),
+        "median": statistics.median(values),
+        "p95": ordered[p95_index],
+        "min": min(values),
+        "max": max(values),
+    }
+
+
+def aggregate(records_per_run):
+    phases = defaultdict(list)
+    phase_shares = defaultdict(list)
+    totals = []
+    record_count = 0
+    for records in records_per_run:
+        record_count += len(records)
+        for record in records:
+            total_time = record["total_time_ms"]
+            totals.append(total_time)
+            for phase in record["phases"]:
+                phases[phase["name"]].append(phase["time_ms"])
+                if total_time > 0:
+                    phase_shares[phase["name"]].append(
+                        phase["time_ms"] * 100.0 / total_time
+                    )
+
+    summary = {
+        "runs": len(records_per_run),
+        "record_count": record_count,
+        "total_time_ms": {
+            "mean": statistics.fmean(totals) if totals else 0.0,
+            "median": statistics.median(totals) if totals else 0.0,
+        },
+        "phases": {},
+    }
+    for name, values in sorted(phases.items()):
+        summary["phases"][name] = {
+            **build_stats(values),
+            "share_of_total_pct": build_stats(phase_shares[name]),
+        }
+    return summary
+
+
+def normalize_dtvm_args(raw_args):
+    extra_args = list(raw_args)
+    if extra_args and extra_args[0] == "--":
+        extra_args = extra_args[1:]
+    return extra_args
+
+
+def collect_records(dtvm_path, input_path, runs, allow_nonzero, extra_args):
+    all_records = []
+    for _ in range(runs):
+        with tempfile.TemporaryDirectory() as tmp_dir:
+            timing_path = pathlib.Path(tmp_dir) / "compiler_pass_timing.json"
+            env = os.environ.copy()
+            env["DTVM_COMPILER_PASS_TIMING_JSON"] = str(timing_path)
+            proc = subprocess.run(
+                [str(dtvm_path), str(input_path), *extra_args],
+                env=env,
+                capture_output=True,
+                text=True,
+                check=False,
+            )
+            if proc.returncode != 0 and not allow_nonzero:
+                sys.stderr.write(proc.stderr)
+                raise RuntimeError(f"dtvm exited with code {proc.returncode}")
+            if not timing_path.exists():
+                sys.stderr.write("timing file was not written\n")
+                raise RuntimeError("timing file was not written")
+            all_records.append(load_records(timing_path))
+    return all_records
+
+
+def load_manifest(path):
+    manifest_path = pathlib.Path(path).resolve()
+    with manifest_path.open("r", encoding="utf-8") as f:
+        data = json.load(f)
+
+    cases = []
+    for entry in data.get("cases", []):
+        if isinstance(entry, str):
+            input_path = manifest_path.parent / entry
+            name = pathlib.Path(entry).stem
+        else:
+            input_path = manifest_path.parent / entry["input"]
+            name = entry["name"]
+        cases.append(
+            {
+                "name": name,
+                "input": input_path.resolve(),
+            }
+        )
+    return cases
+
+
+def filter_cases(cases, wanted_names):
+    if not wanted_names:
+        return cases
+    wanted = set(wanted_names)
+    filtered = [case for case in cases if case["name"] in wanted]
+    missing = sorted(wanted - {case["name"] for case in filtered})
+    if missing:
+        raise RuntimeError(f"unknown manifest case(s): {', '.join(missing)}")
+    return filtered
+
+
+def collect_single_case(dtvm_path, input_path, runs, allow_nonzero, extra_args):
+    records_per_run = collect_records(
+        dtvm_path, input_path, runs, allow_nonzero, extra_args
+    )
+    return {
+        "input": str(input_path),
+        "summary": aggregate(records_per_run),
+    }
+
+
+def main():
+    args = parse_args()
+    dtvm_path = pathlib.Path(args.dtvm).resolve()
+    extra_args = normalize_dtvm_args(args.dtvm_args)
+
+    if args.input:
+        summary = collect_single_case(
+            dtvm_path,
+            pathlib.Path(args.input).resolve(),
+            args.runs,
+            args.allow_nonzero,
+            extra_args,
+        )
+        output_data = summary["summary"]
+        output_data["input"] = summary["input"]
+    else:
+        manifest_cases = filter_cases(load_manifest(args.manifest), args.cases)
+        case_summaries = []
+        overall_records = []
+        for case in manifest_cases:
+            records_per_run = collect_records(
+                dtvm_path,
+                case["input"],
+                args.runs,
+                args.allow_nonzero,
+                extra_args,
+            )
+            overall_records.extend(records_per_run)
+            case_summaries.append(
+                {
+                    "name": case["name"],
+                    "input": str(case["input"]),
+                    "summary": aggregate(records_per_run),
+                }
+            )
+
+        output_data = {
+            "manifest": str(pathlib.Path(args.manifest).resolve()),
+            "case_count": len(case_summaries),
+            "cases": case_summaries,
+            "overall": aggregate(overall_records),
+        }
+
+    output = json.dumps(output_data, indent=2)
+    if args.output:
+        pathlib.Path(args.output).write_text(output + "\n", encoding="utf-8")
+    print(output)
+    return 0
+
+
+if __name__ == "__main__":
+    try:
+        sys.exit(main())
+    except RuntimeError as exc:
+        print(exc, file=sys.stderr)
+        sys.exit(1)
diff --git a/tools/generate_x86_cg_peephole.py b/tools/generate_x86_cg_peephole.py
new file mode 100644
index 000000000..a68ff338e
--- /dev/null
+++ b/tools/generate_x86_cg_peephole.py
@@ -0,0 +1,345 @@
+#!/usr/bin/env python3
+
+import argparse
+import json
+import pathlib
+import sys
+from typing import Dict, List, Tuple
+
+
+def load_rules(path: pathlib.Path) -> Dict:
+    with path.open("r", encoding="utf-8") as f:
+        return json.load(f)
+
+
+def normalize_rule(rule: Dict) -> str:
+    stage = rule["stage"]
+    pattern_parts: List[str] = []
+    for item in rule["pattern"]:
+        if "opcode" in item:
+            head = f"opcode:{item['opcode']}"
+        elif "opcode_any" in item:
+            head = "opcode_any:" + ",".join(item["opcode_any"])
+        else:
+            head = f"predicate:{item['predicate']}"
+        requires = item.get("require", [])
+        require_key = ",".join(
+            f"{req['operand']}:{req['field']}:{sorted(req.items())}"
+            for req in requires
+        )
+        pattern_parts.append(f"{head}[{require_key}]")
+    when_parts = ",".join(str(sorted(item.items())) for item in rule.get("when", []))
+    return f"{stage}|{pattern_parts}|{when_parts}"
+
+
+def validate_rules(data: Dict) -> Tuple[List[str], List[str]]:
+    report_lines: List[str] = []
+    conflicts: List[str] = []
+    seen: Dict[Tuple[str, int], str] = {}
+
+    report_lines.append("X86 Cg peephole rule report")
+    report_lines.append("==========================")
+    report_lines.append("")
+
+    rules = sorted(
+        data["rules"],
+        key=lambda rule: (rule["stage"], -int(rule["priority"]), rule["name"]),
+    )
+    for rule in rules:
+        signature = normalize_rule(rule)
+        key = (signature, int(rule["priority"]))
+        if key in seen:
+            conflicts.append(
+                f"Conflicting rules with the same normalized pattern and priority: "
+                f"{seen[key]} vs {rule['name']}"
+            )
+        else:
+            seen[key] = rule["name"]
+        report_lines.append(
+            f"- {rule['name']} | stage={rule['stage']} | priority={rule['priority']}"
+        )
+
+    report_lines.append("")
+    if conflicts:
+        report_lines.append("Conflicts:")
+        for item in conflicts:
+            report_lines.append(f"- {item}")
+    else:
+        report_lines.append("No conflicts detected.")
+
+    return report_lines, conflicts
+
+
+def emit_file_header() -> List[str]:
+    return [
+        "// Copyright (C) 2025 the DTVM authors. All Rights Reserved.",
+        "// SPDX-License-Identifier: Apache-2.0",
+        "// Generated by tools/generate_x86_cg_peephole.py. Do not edit.",
+        "",
+    ]
+
+
+def resolve_operand_expr(bind: str, operand: int) -> Tuple[List[str], str]:
+    if operand >= 0:
+        return (
+            [
+                f"  if ({bind}.getNumOperands() <= {operand})",
+                "    return {miss_return};",
+            ],
+            str(operand),
+        )
+
+    required_operands = -operand
+    explicit_count = f"{bind}.getDesc().getNumOperands()"
+    operand_expr = f"({explicit_count} - {required_operands})"
+    return (
+        [
+            f"  if ({explicit_count} < {required_operands})",
+            "    return {miss_return};",
+            f"  if ({bind}.getNumOperands() <= {operand_expr})",
+            "    return {miss_return};",
+        ],
+        operand_expr,
+    )
+
+
+def emit_operand_check(bind: str, req: Dict, miss_return: str) -> List[str]:
+    operand = req["operand"]
+    field = req["field"]
+    guard_lines, operand_expr = resolve_operand_expr(bind, operand)
+    guard_lines = [line.format(miss_return=miss_return) for line in guard_lines]
+    if field == "reg":
+        if "equals_capture" in req:
+            capture = req["equals_capture"]
+            return guard_lines + [
+                f"  if (!{bind}.getOperand({operand_expr}).isReg() ||",
+                f"      {bind}.getOperand({operand_expr}).getReg() != {capture})",
+                f"    return {miss_return};",
+            ]
+    if field == "imm":
+        if "equals_enum" in req:
+            enum_name = req["equals_enum"]
+            return guard_lines + [
+                f"  if (!{bind}.getOperand({operand_expr}).isImm())",
+                f"    return {miss_return};",
+                f"  if ({bind}.getOperand({operand_expr}).getImm() != X86::CondCode::{enum_name})",
+                f"    return {miss_return};",
+            ]
+        if "equals_int" in req:
+            value = int(req["equals_int"])
+            return guard_lines + [
+                f"  if (!{bind}.getOperand({operand_expr}).isImm())",
+                f"    return {miss_return};",
+                f"  if ({bind}.getOperand({operand_expr}).getImm() != {value})",
+                f"    return {miss_return};",
+            ]
+    if field == "is_mbb":
+        predicate = "!" if req["equals_bool"] else ""
+        return guard_lines + [
+            f"  if ({predicate}{bind}.getOperand({operand_expr}).isMBB())",
+            f"    return {miss_return};",
+        ]
+    raise ValueError(f"Unsupported operand requirement: {req}")
+
+
+def emit_capture(bind: str, capture: Dict, miss_return: str) -> List[str]:
+    operand = capture["operand"]
+    field = capture["field"]
+    name = capture["name"]
+    guard_lines, operand_expr = resolve_operand_expr(bind, operand)
+    guard_lines = [line.format(miss_return=miss_return) for line in guard_lines]
+    if field == "reg":
+        result = guard_lines + [
+            f"  if (!{bind}.getOperand({operand_expr}).isReg())",
+            f"    return {miss_return};",
+            f"  auto {name} = {bind}.getOperand({operand_expr}).getReg();",
+        ]
+        if capture.get("require_single_use"):
+            result.extend([
+                f"  if ({name}.isVirtual() &&",
+                f"      !MBB.getParent()->getRegInfo().hasOneNonDBGUse({name}))",
+                f"    return {miss_return};",
+            ])
+        return result
+    if field == "imm":
+        return guard_lines + [
+            f"  if (!{bind}.getOperand({operand_expr}).isImm())",
+            f"    return {miss_return};",
+            f"  auto {name} = {bind}.getOperand({operand_expr}).getImm();",
+        ]
+    raise ValueError(f"Unsupported capture: {capture}")
+
+
+def emit_instruction_match(rule: Dict) -> List[str]:
+    name = rule["name"].replace("-", "_")
+    first_bind = rule["pattern"][0]["bind"]
+    erases_current = first_bind in rule["action"].get("erase", [])
+    miss_return = "GeneratedInstructionRuleResult::NoMatch"
+    lines = [
+        f"GeneratedInstructionRuleResult match_{name}(CgBasicBlock &MBB, "
+        "CgBasicBlock::iterator &MII) {",
+    ]
+    if len(rule["pattern"]) > 1:
+        lines.append("  auto MIE = MBB.end();")
+    for index, item in enumerate(rule["pattern"]):
+        iterator_name = f"LocalMII{index}"
+        bind = item["bind"]
+        if index == 0:
+            lines.append(f"  auto {iterator_name} = MII;")
+        else:
+            prev = f"LocalMII{index - 1}"
+            lines.append(f"  auto {iterator_name} = {prev};")
+            lines.append(f"  ++{iterator_name};")
+            lines.append(f"  if ({iterator_name} == MIE)")
+            lines.append(f"    return {miss_return};")
+        lines.append(f"  auto &{bind} = *{iterator_name};")
+
+        if "predicate" in item:
+            predicate = item["predicate"]
+            lines.append(f"  if (!{bind}.{predicate}())")
+            lines.append(f"    return {miss_return};")
+        elif "opcode" in item:
+            lines.append(f"  if ({bind}.getOpcode() != X86::{item['opcode']})")
+            lines.append(f"    return {miss_return};")
+        else:
+            lines.append(f"  switch ({bind}.getOpcode()) {{")
+            for opcode in item["opcode_any"]:
+                lines.append(f"  case X86::{opcode}:")
+                lines.append("    break;")
+            lines.append("  default:")
+            lines.append(f"    return {miss_return};")
+            lines.append("  }")
+
+        for capture in item.get("capture", []):
+            lines.extend(emit_capture(bind, capture, miss_return))
+        for req in item.get("require", []):
+            lines.extend(emit_operand_check(bind, req, miss_return))
+
+    if erases_current:
+        lines.append("  auto NextMII = MII;")
+        lines.append("  ++NextMII;")
+    for action in rule["action"].get("erase", []):
+        lines.append(f"  {action}.eraseFromParent();")
+    for action in rule["action"].get("set_imm", []):
+        lines.append(
+            f"  {action['inst']}.getOperand({action['operand']}).setImm("
+            f"{action['from_capture']});"
+        )
+    if erases_current:
+        lines.append("  MII = NextMII;")
+        lines.append("  return GeneratedInstructionRuleResult::Advanced;")
+    else:
+        lines.append("  return GeneratedInstructionRuleResult::Matched;")
+    lines.append("}")
+    lines.append("")
+    return lines
+
+
+def emit_block_end_match(rule: Dict) -> List[str]:
+    name = rule["name"].replace("-", "_")
+    pattern = rule["pattern"][0]
+    bind = pattern["bind"]
+    lines = [
+        f"bool match_{name}(CgBasicBlock &MBB) {{",
+        "  if (MBB.empty())",
+        "    return false;",
+        f"  auto &{bind} = MBB.back();",
+    ]
+    if "predicate" in pattern:
+        predicate = pattern["predicate"]
+        lines.append(f"  if (!{bind}.{predicate}())")
+        lines.append("    return false;")
+    for req in pattern.get("require", []):
+        lines.extend(emit_operand_check(bind, req, "false"))
+    for item in rule.get("when", []):
+        if item["kind"] == "target_is_next_block":
+            inst = item["inst"]
+            operand = item["operand"]
+            lines.extend(
+                [
+                    f"  CgBasicBlock *TargetBB = {inst}.getOperand({operand}).getMBB();",
+                    "  if (TargetBB->getNumber() != MBB.getNumber() + 1)",
+                    "    return false;",
+                ]
+            )
+        else:
+            raise ValueError(f"Unsupported rule condition: {item}")
+    for action in rule["action"].get("erase", []):
+        lines.append(f"  {action}.eraseFromParent();")
+    lines.append("  return true;")
+    lines.append("}")
+    lines.append("")
+    return lines
+
+
+def emit_dispatch(data: Dict) -> List[str]:
+    lines = [
+        "namespace {",
+        "",
+        "enum class GeneratedInstructionRuleResult {",
+        "  NoMatch,",
+        "  Matched,",
+        "  Advanced,",
+        "};",
+        "",
+    ]
+    instruction_rules = [rule for rule in data["rules"] if rule["stage"] == "instruction"]
+    block_rules = [rule for rule in data["rules"] if rule["stage"] == "block_end"]
+
+    for rule in instruction_rules:
+        lines.extend(emit_instruction_match(rule))
+    for rule in block_rules:
+        lines.extend(emit_block_end_match(rule))
+
+    lines.extend(
+        [
+            "GeneratedInstructionRuleResult tryGeneratedInstructionRules(",
+            "    CgBasicBlock &MBB, CgBasicBlock::iterator &MII) {",
+        ]
+    )
+    for rule in instruction_rules:
+        lines.append(
+            f"  if (auto Result = match_{rule['name'].replace('-', '_')}(MBB, MII);"
+        )
+        lines.append("      Result != GeneratedInstructionRuleResult::NoMatch)")
+        lines.append("    return Result;")
+    lines.extend(["  return GeneratedInstructionRuleResult::NoMatch;", "}", ""])
+
+    lines.extend(["bool tryGeneratedBlockEndRules(CgBasicBlock &MBB) {"])
+    for rule in block_rules:
+        lines.append(f"  if (match_{rule['name'].replace('-', '_')}(MBB))")
+        lines.append("    return true;")
+    lines.extend(["  return false;", "}", "", "} // namespace", ""])
+    return lines
+
+
+def write_text(path: pathlib.Path, content: str) -> None:
+    path.parent.mkdir(parents=True, exist_ok=True)
+    path.write_text(content, encoding="utf-8")
+
+
+def main() -> int:
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--rules", required=True)
+    parser.add_argument("--out-inc", required=True)
+    parser.add_argument("--out-report", required=True)
+    args = parser.parse_args()
+
+    rules_path = pathlib.Path(args.rules)
+    data = load_rules(rules_path)
+    report_lines, conflicts = validate_rules(data)
+
+    if conflicts:
+        write_text(pathlib.Path(args.out_report), "\n".join(report_lines) + "\n")
+        for item in conflicts:
+            print(item, file=sys.stderr)
+        return 1
+
+    inc_lines = emit_file_header() + emit_dispatch(data)
+    write_text(pathlib.Path(args.out_inc), "\n".join(inc_lines))
+    write_text(pathlib.Path(args.out_report), "\n".join(report_lines) + "\n")
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/tools/mine_dmir_seed_rules.py b/tools/mine_dmir_seed_rules.py
new file mode 100644
index 000000000..ec324cacf
--- /dev/null
+++ b/tools/mine_dmir_seed_rules.py
@@ -0,0 +1,625 @@
+#!/usr/bin/env python3
+
+import argparse
+import json
+import pathlib
+import random
+from dataclasses import dataclass
+
+
+MASK64 = (1 << 64) - 1
+COMMUTATIVE_OPS = {"add", "and", "mul", "or", "xor"}
+DEFAULT_SEARCH_CONFIG = {
+    "base_terms": [
+        "x",
+        "y",
+        "cond",
+        "0:i64",
+        "1:i64",
+        "18446744073709551615:i64",
+    ],
+    "unary_not_terms": ["x", "y", "cond"],
+    "double_not_terms": ["x", "y", "cond"],
+    "binary_fixed_rhs": [
+        {
+            "ops": ["add", "sub", "and", "or", "xor", "shl", "sshr", "ushr"],
+            "lhs": ["x", "y", "cond"],
+            "rhs": "0:i64",
+        },
+        {
+            "ops": ["and", "or", "xor"],
+            "lhs": ["x", "y", "cond", "(not x)", "(not y)"],
+            "rhs": "18446744073709551615:i64",
+        },
+    ],
+    "binary_self": [
+        {
+            "ops": ["and", "or", "xor"],
+            "terms": ["x", "y", "cond"],
+        }
+    ],
+    "select_same_arm": {
+        "conditions": ["cond", "x", "0:i64", "1:i64"],
+        "values": ["x", "y", "(not x)"],
+    },
+    "pair_binary_groups": [
+        {
+            "ops": ["add", "sub", "and", "or", "xor"],
+            "lhs": ["x", "y"],
+            "rhs": ["x", "y", "0:i64"],
+        },
+        {
+            "ops": ["and", "or", "xor"],
+            "lhs": [
+                "x",
+                "y",
+                "(and x y)",
+                "(or x y)",
+                "(xor x y)",
+                "(not x)",
+                "(not y)",
+            ],
+            "rhs": [
+                "x",
+                "y",
+                "0:i64",
+                "(and x y)",
+                "(or x y)",
+                "(xor x y)",
+                "(not x)",
+                "(not y)",
+            ],
+        },
+    ],
+    "adc_sbb_zero": {
+        "ops": ["adc", "sbb"],
+        "lhs": ["x", "y"],
+        "rhs": ["x", "y", "0:i64"],
+        "carry": "0:i64",
+    },
+}
+
+
+@dataclass(frozen=True)
+class Expr:
+    op: str
+    args: tuple["Expr", ...] = ()
+    value: str | int | None = None
+
+    def render(self) -> str:
+        if self.op == "var":
+            return str(self.value)
+        if self.op == "const":
+            return f"{self.value}:i64"
+        rendered_args = " ".join(arg.render() for arg in self.args)
+        return f"({self.op} {rendered_args})"
+
+
+def var(name: str) -> Expr:
+    return Expr("var", value=name)
+
+
+def const(value: int) -> Expr:
+    return Expr("const", value=value)
+
+
+def unary(op: str, arg: Expr) -> Expr:
+    return Expr(op, args=(arg,))
+
+
+def binary(op: str, lhs: Expr, rhs: Expr) -> Expr:
+    return Expr(op, args=(lhs, rhs))
+
+
+def ternary(op: str, first: Expr, second: Expr, third: Expr) -> Expr:
+    return Expr(op, args=(first, second, third))
+
+
+def wrap_u64(value: int) -> int:
+    return value & MASK64
+
+
+def parse_expr(text: str) -> Expr:
+    tokens = text.replace("(", " ( ").replace(")", " ) ").split()
+    index = 0
+
+    def parse() -> Expr:
+        nonlocal index
+        token = tokens[index]
+        index += 1
+        if token == "(":
+            op = tokens[index]
+            index += 1
+            args = []
+            while tokens[index] != ")":
+                args.append(parse())
+            index += 1
+            return Expr(op, args=tuple(args))
+        if token.endswith(":i64"):
+            return const(int(token[:-4], 10))
+        return var(token)
+
+    expr = parse()
+    if index != len(tokens):
+        raise ValueError(f"unexpected trailing tokens in expression '{text}'")
+    return expr
+
+
+def canonical_var_name(index: int) -> str:
+    base_names = ("x", "y", "z")
+    if index < len(base_names):
+        return base_names[index]
+    return f"v{index}"
+
+
+def canonicalize_expr(expr: Expr, env: dict[str, str] | None = None) -> Expr:
+    if env is None:
+        env = {}
+    if expr.op == "var":
+        name = str(expr.value)
+        if name not in env:
+            env[name] = canonical_var_name(len(env))
+        return var(env[name])
+    if expr.op == "const":
+        return expr
+
+    args = tuple(canonicalize_expr(arg, env) for arg in expr.args)
+    if expr.op in COMMUTATIVE_OPS:
+        args = tuple(sorted(args, key=lambda arg: (arg.op == "const", arg.render())))
+    return Expr(expr.op, args=args)
+
+
+def canonicalize_pair(lhs: Expr, rhs: Expr) -> tuple[Expr, Expr]:
+    env: dict[str, str] = {}
+    return canonicalize_expr(lhs, env), canonicalize_expr(rhs, env)
+
+
+def build_candidate_key(lhs: Expr, rhs: Expr) -> tuple[str, str]:
+    canonical_lhs, canonical_rhs = canonicalize_pair(lhs, rhs)
+    return canonical_lhs.render(), canonical_rhs.render()
+
+
+def substitute_expr(expr: Expr, bindings: dict[str, Expr]) -> Expr:
+    if expr.op == "var":
+        return bindings.get(str(expr.value), expr)
+    if expr.op == "const":
+        return expr
+    return Expr(
+        expr.op, args=tuple(substitute_expr(arg, bindings) for arg in expr.args)
+    )
+
+
+def match_pattern(pattern: Expr, expr: Expr, bindings: dict[str, Expr]) -> bool:
+    if pattern.op == "var":
+        name = str(pattern.value)
+        bound = bindings.get(name)
+        if bound is None:
+            bindings[name] = expr
+            return True
+        return bound == expr
+    if pattern.op == "const":
+        return pattern == expr
+    if pattern.op != expr.op or len(pattern.args) != len(expr.args):
+        return False
+    return all(
+        match_pattern(pattern_arg, expr_arg, bindings)
+        for pattern_arg, expr_arg in zip(pattern.args, expr.args)
+    )
+
+
+def is_rule_instance(rule_lhs: Expr, rule_rhs: Expr,
+                     candidate_lhs: Expr, candidate_rhs: Expr) -> bool:
+    bindings: dict[str, Expr] = {}
+    if not match_pattern(rule_lhs, candidate_lhs, bindings):
+        return False
+    substituted_rhs = substitute_expr(rule_rhs, bindings)
+    return substituted_rhs == candidate_rhs
+
+
+def eval_expr(expr: Expr, env: dict[str, int]) -> int:
+    if expr.op == "var":
+        return env[str(expr.value)]
+    if expr.op == "const":
+        return int(expr.value)
+    if expr.op == "not":
+        return wrap_u64(~eval_expr(expr.args[0], env))
+    if expr.op == "add":
+        return wrap_u64(eval_expr(expr.args[0], env) + eval_expr(expr.args[1], env))
+    if expr.op == "sub":
+        return wrap_u64(eval_expr(expr.args[0], env) - eval_expr(expr.args[1], env))
+    if expr.op == "mul":
+        return wrap_u64(eval_expr(expr.args[0], env) * eval_expr(expr.args[1], env))
+    if expr.op == "and":
+        return wrap_u64(eval_expr(expr.args[0], env) & eval_expr(expr.args[1], env))
+    if expr.op == "or":
+        return wrap_u64(eval_expr(expr.args[0], env) | eval_expr(expr.args[1], env))
+    if expr.op == "xor":
+        return wrap_u64(eval_expr(expr.args[0], env) ^ eval_expr(expr.args[1], env))
+    if expr.op == "adc":
+        return wrap_u64(
+            eval_expr(expr.args[0], env)
+            + eval_expr(expr.args[1], env)
+            + eval_expr(expr.args[2], env)
+        )
+    if expr.op == "sbb":
+        return wrap_u64(
+            eval_expr(expr.args[0], env)
+            - eval_expr(expr.args[1], env)
+            - eval_expr(expr.args[2], env)
+        )
+    if expr.op == "select":
+        return (
+            eval_expr(expr.args[1], env)
+            if eval_expr(expr.args[0], env) != 0
+            else eval_expr(expr.args[2], env)
+        )
+    if expr.op == "shl":
+        amount = eval_expr(expr.args[1], env)
+        if amount >= 64:
+            return 0
+        return wrap_u64(eval_expr(expr.args[0], env) << amount)
+    if expr.op == "sshr":
+        amount = eval_expr(expr.args[1], env)
+        value = eval_expr(expr.args[0], env)
+        if amount >= 64:
+            return MASK64 if value & (1 << 63) else 0
+        if value & (1 << 63):
+            value -= 1 << 64
+        return wrap_u64(value >> amount)
+    if expr.op == "ushr":
+        amount = eval_expr(expr.args[1], env)
+        if amount >= 64:
+            return 0
+        return eval_expr(expr.args[0], env) >> amount
+    raise ValueError(f"unsupported op {expr.op}")
+
+
+def expr_cost(expr: Expr) -> dict[str, int]:
+    if expr.op in {"var", "const"}:
+        return {
+            "dmir_inst": 0,
+            "select_depth": 0,
+            "adc_chain": 0,
+            "runtime_calls": 0,
+        }
+
+    child_costs = [expr_cost(arg) for arg in expr.args]
+    return {
+        "dmir_inst": 1 + sum(cost["dmir_inst"] for cost in child_costs),
+        "select_depth": (
+            1 + max(cost["select_depth"] for cost in child_costs)
+            if expr.op == "select"
+            else max(cost["select_depth"] for cost in child_costs)
+        ),
+        "adc_chain": (
+            1 + sum(cost["adc_chain"] for cost in child_costs)
+            if expr.op in {"adc", "sbb"}
+            else sum(cost["adc_chain"] for cost in child_costs)
+        ),
+        "runtime_calls": sum(cost["runtime_calls"] for cost in child_costs),
+    }
+
+
+def dominates(rhs_cost: dict[str, int], lhs_cost: dict[str, int]) -> bool:
+    fields = ("dmir_inst", "select_depth", "adc_chain", "runtime_calls")
+    return all(rhs_cost[field] <= lhs_cost[field] for field in fields) and any(
+        rhs_cost[field] < lhs_cost[field] for field in fields
+    )
+
+
+def cost_delta(lhs_cost: dict[str, int], rhs_cost: dict[str, int]) -> dict[str, int]:
+    return {
+        field: rhs_cost[field] - lhs_cost[field]
+        for field in ("dmir_inst", "select_depth", "adc_chain", "runtime_calls")
+    }
+
+
+def build_sample_envs() -> list[dict[str, int]]:
+    boundary_values = [
+        0,
+        1,
+        2,
+        3,
+        7,
+        8,
+        15,
+        16,
+        0x7FFFFFFFFFFFFFFF,
+        0x8000000000000000,
+        0xFFFFFFFFFFFFFFFF,
+    ]
+    envs = []
+    for x in boundary_values:
+        # Use the full boundary set for y so shift-sensitive expressions
+        # (e.g. shl/ushr with large shift amounts) are covered.
+        for y in boundary_values:
+            for cond in (0, 1, x, y, x ^ y):
+                envs.append({"x": x, "y": y, "cond": wrap_u64(cond)})
+
+    rng = random.Random(0x7D6B4A1C)
+    for _ in range(64):
+        envs.append(
+            {
+                "x": rng.getrandbits(64),
+                "y": rng.getrandbits(64),
+                "cond": rng.getrandbits(64),
+            }
+        )
+    return envs
+
+
+def load_search_config(path: str | None) -> dict:
+    if path is None:
+        return DEFAULT_SEARCH_CONFIG
+    return json.loads(pathlib.Path(path).read_text(encoding="utf-8"))
+
+
+def build_term_map(config: dict) -> dict[str, Expr]:
+    term_specs = set(config.get("base_terms", []))
+    term_specs.update(config.get("unary_not_terms", []))
+    term_specs.update(config.get("double_not_terms", []))
+    for entry in config.get("binary_fixed_rhs", []):
+        term_specs.update(entry.get("lhs", []))
+        term_specs.add(entry.get("rhs"))
+    for entry in config.get("binary_self", []):
+        term_specs.update(entry.get("terms", []))
+    select_same_arm = config.get("select_same_arm", {})
+    term_specs.update(select_same_arm.get("conditions", []))
+    term_specs.update(select_same_arm.get("values", []))
+    pair_binary_groups = list(config.get("pair_binary_groups", []))
+    if not pair_binary_groups and "pair_binary" in config:
+        pair_binary_groups.append(config["pair_binary"])
+    for entry in pair_binary_groups:
+        term_specs.update(entry.get("lhs", []))
+        term_specs.update(entry.get("rhs", []))
+    adc_sbb_zero = config.get("adc_sbb_zero", {})
+    term_specs.update(adc_sbb_zero.get("lhs", []))
+    term_specs.update(adc_sbb_zero.get("rhs", []))
+    if adc_sbb_zero.get("carry"):
+        term_specs.add(adc_sbb_zero["carry"])
+
+    return {spec: parse_expr(spec) for spec in term_specs}
+
+
+def build_search_space(config: dict) -> list[Expr]:
+    term_map = build_term_map(config)
+    base_terms = [term_map[spec] for spec in config.get("base_terms", [])]
+
+    terms = set(base_terms)
+
+    for spec in config.get("unary_not_terms", []):
+        terms.add(unary("not", term_map[spec]))
+
+    for spec in config.get("double_not_terms", []):
+        terms.add(unary("not", unary("not", term_map[spec])))
+
+    for entry in config.get("binary_fixed_rhs", []):
+        rhs = term_map[entry["rhs"]]
+        for op in entry.get("ops", []):
+            for lhs_spec in entry.get("lhs", []):
+                terms.add(binary(op, term_map[lhs_spec], rhs))
+
+    for entry in config.get("binary_self", []):
+        for op in entry.get("ops", []):
+            for spec in entry.get("terms", []):
+                value = term_map[spec]
+                terms.add(binary(op, value, value))
+
+    select_same_arm = config.get("select_same_arm", {})
+    for cond_spec in select_same_arm.get("conditions", []):
+        for value_spec in select_same_arm.get("values", []):
+            value = term_map[value_spec]
+            terms.add(ternary("select", term_map[cond_spec], value, value))
+
+    pair_binary_groups = list(config.get("pair_binary_groups", []))
+    if not pair_binary_groups and "pair_binary" in config:
+        pair_binary_groups.append(config["pair_binary"])
+    for entry in pair_binary_groups:
+        for op in entry.get("ops", []):
+            for lhs_spec in entry.get("lhs", []):
+                for rhs_spec in entry.get("rhs", []):
+                    terms.add(binary(op, term_map[lhs_spec], term_map[rhs_spec]))
+
+    adc_sbb_zero = config.get("adc_sbb_zero", {})
+    carry = term_map[adc_sbb_zero.get("carry", "0:i64")]
+    for op in adc_sbb_zero.get("ops", []):
+        for lhs_spec in adc_sbb_zero.get("lhs", []):
+            for rhs_spec in adc_sbb_zero.get("rhs", []):
+                terms.add(ternary(op, term_map[lhs_spec], term_map[rhs_spec], carry))
+
+    return sorted(terms, key=lambda expr: expr.render())
+
+def load_rule_patterns(rules_path: str | None) -> list[tuple[Expr, Expr]]:
+    if rules_path is None:
+        return []
+    data = json.loads(pathlib.Path(rules_path).read_text(encoding="utf-8"))
+    return [
+        (parse_expr(rule["lhs"]), parse_expr(rule["rhs"]))
+        for rule in data.get("rules", [])
+    ]
+
+
+def build_rule_key_set(rule_patterns: list[tuple[Expr, Expr]]) -> set[tuple[str, str]]:
+    return {build_candidate_key(lhs, rhs) for lhs, rhs in rule_patterns}
+
+
+def is_candidate_covered(lhs: Expr, rhs: Expr,
+                         rule_patterns: list[tuple[Expr, Expr]],
+                         rule_keys: set[tuple[str, str]]) -> bool:
+    if build_candidate_key(lhs, rhs) in rule_keys:
+        return True
+    canonical_lhs, canonical_rhs = canonicalize_pair(lhs, rhs)
+    return any(
+        is_rule_instance(
+            *canonicalize_pair(rule_lhs, rule_rhs),
+            canonical_lhs,
+            canonical_rhs,
+        )
+        for rule_lhs, rule_rhs in rule_patterns
+    )
+
+
+def serialize_candidate(lhs: Expr, rhs: Expr, cost: dict[str, dict[str, int]],
+                        variants: list[tuple[str, str]] | None = None,
+                        covered: bool | None = None) -> dict:
+    entry = {
+        "lhs": lhs.render(),
+        "rhs": rhs.render(),
+        "cost": cost,
+    }
+    if variants is not None:
+        entry["variant_count"] = len(variants)
+        entry["variants"] = [{"lhs": variant[0], "rhs": variant[1]} for variant in variants]
+    if covered is not None:
+        entry["covered_by_rule_repo"] = covered
+    return entry
+
+
+def build_candidates(rules_path: str | None = None,
+                     config_path: str | None = None) -> dict:
+    envs = build_sample_envs()
+    search_config = load_search_config(config_path)
+    terms = build_search_space(search_config)
+    classes: dict[tuple[int, ...], list[Expr]] = {}
+    for expr in terms:
+        signature = tuple(eval_expr(expr, env) for env in envs)
+        classes.setdefault(signature, []).append(expr)
+
+    raw_candidates = []
+    for exprs in classes.values():
+        exprs = sorted(
+            exprs,
+            key=lambda expr: (
+                expr_cost(expr)["dmir_inst"],
+                expr_cost(expr)["select_depth"],
+                expr_cost(expr)["adc_chain"],
+                expr_cost(expr)["runtime_calls"],
+                expr.render(),
+            ),
+        )
+        best = exprs[0]
+        best_cost = expr_cost(best)
+        for expr in exprs[1:]:
+            expr_cost_value = expr_cost(expr)
+            if not dominates(best_cost, expr_cost_value):
+                continue
+            raw_candidates.append(
+                {
+                    "lhs_expr": expr,
+                    "rhs_expr": best,
+                    "cost": {
+                        "lhs": expr_cost_value,
+                        "rhs": best_cost,
+                        "delta": cost_delta(expr_cost_value, best_cost),
+                    },
+                }
+            )
+
+    raw_candidates.sort(
+        key=lambda item: (item["lhs_expr"].render(), item["rhs_expr"].render())
+    )
+
+    curated: dict[tuple[str, str], dict[str, object]] = {}
+    for candidate in raw_candidates:
+        lhs_expr = candidate["lhs_expr"]
+        rhs_expr = candidate["rhs_expr"]
+        key = build_candidate_key(lhs_expr, rhs_expr)
+        variant = (lhs_expr.render(), rhs_expr.render())
+        entry = curated.setdefault(
+            key,
+            {
+                "lhs_expr": parse_expr(key[0]),
+                "rhs_expr": parse_expr(key[1]),
+                "cost": candidate["cost"],
+                "variants": [],
+            },
+        )
+        entry["variants"].append(variant)
+
+    rule_patterns = load_rule_patterns(rules_path)
+    rule_keys = build_rule_key_set(rule_patterns)
+    curated_candidates = []
+    novel_candidates = []
+    covered_candidates = []
+    for key, entry in sorted(curated.items()):
+        covered = is_candidate_covered(
+            entry["lhs_expr"], entry["rhs_expr"], rule_patterns, rule_keys
+        )
+        serialized = serialize_candidate(
+            entry["lhs_expr"],
+            entry["rhs_expr"],
+            entry["cost"],
+            variants=sorted(set(entry["variants"])),
+            covered=covered,
+        )
+        curated_candidates.append(serialized)
+        if covered:
+            covered_candidates.append(serialized)
+        else:
+            novel_candidates.append(serialized)
+
+    novel_candidates.sort(
+        key=lambda item: (
+            item["cost"]["delta"]["runtime_calls"],
+            item["cost"]["delta"]["dmir_inst"],
+            item["cost"]["delta"]["select_depth"],
+            item["cost"]["delta"]["adc_chain"],
+            item["lhs"],
+            item["rhs"],
+        )
+    )
+
+    return {
+        "summary": {
+            "term_count": len(terms),
+            "sample_count": len(envs),
+            "candidate_count": len(raw_candidates),
+            "curated_candidate_count": len(curated_candidates),
+            "covered_candidate_count": len(covered_candidates),
+            "novel_candidate_count": len(novel_candidates),
+            "config_supplied": config_path is not None,
+        },
+        "candidates": [
+            serialize_candidate(
+                candidate["lhs_expr"], candidate["rhs_expr"], candidate["cost"]
+            )
+            for candidate in raw_candidates
+        ],
+        "curated_candidates": curated_candidates,
+        "covered_candidates": covered_candidates,
+        "novel_candidates": novel_candidates,
+    }
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description="Bootstrap offline dMIR rewrite mining with a seed search space."
+    )
+    parser.add_argument(
+        "--out",
+        help="Optional output path. Defaults to stdout when omitted.",
+    )
+    parser.add_argument(
+        "--rules",
+        help="Optional rule file used to mark already-covered candidates.",
+    )
+    parser.add_argument(
+        "--config",
+        help="Optional search-space config file.",
+    )
+    return parser.parse_args()
+
+
+def main():
+    args = parse_args()
+    result = build_candidates(args.rules, args.config)
+    output = json.dumps(result, indent=2) + "\n"
+    if args.out:
+        pathlib.Path(args.out).write_text(output, encoding="utf-8")
+    else:
+        print(output, end="")
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
diff --git a/tools/report_dmir_rewrite_rules.py b/tools/report_dmir_rewrite_rules.py
new file mode 100644
index 000000000..c6ca272c3
--- /dev/null
+++ b/tools/report_dmir_rewrite_rules.py
@@ -0,0 +1,86 @@
+#!/usr/bin/env python3
+
+import argparse
+import json
+import pathlib
+from collections import Counter
+
+from check_dmir_rewrite_rules import load_gtest_names, load_rules
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description="Generate a validation coverage report for dMIR rewrite rules."
+    )
+    parser.add_argument("--rules", required=True, help="Path to the rule JSON file")
+    parser.add_argument(
+        "--gtest-binary",
+        help="Optional gtest binary used to mark coverage entries as present",
+    )
+    parser.add_argument(
+        "--out",
+        help="Optional output path. Defaults to stdout when omitted.",
+    )
+    return parser.parse_args()
+
+
+def build_rule_entry(rule, gtest_names):
+    validation = rule.get("validation", {})
+    coverage_entries = []
+    all_present = True
+    for name in validation.get("coverage", []):
+        present = gtest_names is None or name in gtest_names
+        coverage_entries.append({"name": name, "present": present})
+        all_present = all_present and present
+
+    return {
+        "name": rule.get("name"),
+        "status": rule.get("status"),
+        "inputs": list(rule.get("inputs", [])),
+        "modes": list(validation.get("modes", [])),
+        "cost_delta": dict(rule.get("cost", {}).get("delta", {})),
+        "coverage": coverage_entries,
+        "coverage_complete": all_present,
+    }
+
+
+def main():
+    args = parse_args()
+    data = load_rules(args.rules)
+    gtest_names = load_gtest_names(args.gtest_binary) if args.gtest_binary else None
+
+    status_counts = Counter()
+    mode_counts = Counter()
+    rule_entries = []
+    missing_coverage_count = 0
+
+    for rule in data.get("rules", []):
+        status_counts[rule.get("status", "<unknown>")] += 1
+        for mode in rule.get("validation", {}).get("modes", []):
+            mode_counts[mode] += 1
+
+        entry = build_rule_entry(rule, gtest_names)
+        if not entry["coverage_complete"]:
+            missing_coverage_count += 1
+        rule_entries.append(entry)
+
+    report = {
+        "summary": {
+            "rule_count": len(rule_entries),
+            "status_counts": dict(sorted(status_counts.items())),
+            "mode_counts": dict(sorted(mode_counts.items())),
+            "rules_with_missing_coverage": missing_coverage_count,
+        },
+        "rules": rule_entries,
+    }
+
+    output = json.dumps(report, indent=2) + "\n"
+    if args.out:
+        pathlib.Path(args.out).write_text(output, encoding="utf-8")
+    else:
+        print(output, end="")
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
diff --git a/tools/report_x86_cg_peephole_validation.py b/tools/report_x86_cg_peephole_validation.py
new file mode 100644
index 000000000..50c40c889
--- /dev/null
+++ b/tools/report_x86_cg_peephole_validation.py
@@ -0,0 +1,85 @@
+#!/usr/bin/env python3
+
+import argparse
+import json
+import pathlib
+from collections import Counter
+
+from check_x86_cg_peephole_validation import load_gtest_names, load_rules
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description="Generate a validation coverage report for x86 peephole rules."
+    )
+    parser.add_argument("--rules", required=True, help="Path to the rule JSON file")
+    parser.add_argument(
+        "--gtest-binary",
+        help="Optional gtest binary used to mark coverage entries as present",
+    )
+    parser.add_argument(
+        "--out",
+        help="Optional output path. Defaults to stdout when omitted.",
+    )
+    return parser.parse_args()
+
+
+def build_rule_entry(rule, gtest_names):
+    validation = rule.get("validation", {})
+    coverage_entries = []
+    all_present = True
+    for name in validation.get("coverage", []):
+        present = gtest_names is None or name in gtest_names
+        coverage_entries.append({"name": name, "present": present})
+        all_present = all_present and present
+
+    return {
+        "name": rule.get("name"),
+        "stage": rule.get("stage"),
+        "priority": rule.get("priority"),
+        "modes": list(validation.get("modes", [])),
+        "coverage": coverage_entries,
+        "coverage_complete": all_present,
+    }
+
+
+def main():
+    args = parse_args()
+    data = load_rules(args.rules)
+    gtest_names = load_gtest_names(args.gtest_binary) if args.gtest_binary else None
+
+    stage_counts = Counter()
+    mode_counts = Counter()
+    rule_entries = []
+    missing_coverage_count = 0
+
+    for rule in data.get("rules", []):
+        stage_counts[rule.get("stage", "<unknown>")] += 1
+        for mode in rule.get("validation", {}).get("modes", []):
+            mode_counts[mode] += 1
+
+        entry = build_rule_entry(rule, gtest_names)
+        if not entry["coverage_complete"]:
+            missing_coverage_count += 1
+        rule_entries.append(entry)
+
+    report = {
+        "summary": {
+            "rule_count": len(rule_entries),
+            "stage_counts": dict(sorted(stage_counts.items())),
+            "mode_counts": dict(sorted(mode_counts.items())),
+            "rules_with_missing_coverage": missing_coverage_count,
+        },
+        "rules": rule_entries,
+    }
+
+    output = json.dumps(report, indent=2) + "\n"
+    if args.out:
+        pathlib.Path(args.out).write_text(output, encoding="utf-8")
+    else:
+        print(output, end="")
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
diff --git a/tools/synthesize_dmir_rules.py b/tools/synthesize_dmir_rules.py
new file mode 100644
index 000000000..0edf85526
--- /dev/null
+++ b/tools/synthesize_dmir_rules.py
@@ -0,0 +1,691 @@
+#!/usr/bin/env python3
+"""Automated dMIR rewrite rule synthesis via enumeration + Z3 verification."""
+
+import argparse
+import json
+import pathlib
+import sys
+import time
+
+import z3
+
+from mine_dmir_seed_rules import (
+    COMMUTATIVE_OPS,
+    MASK64,
+    Expr,
+    binary,
+    build_candidate_key,
+    build_rule_key_set,
+    build_sample_envs,
+    canonicalize_pair,
+    const,
+    cost_delta,
+    dominates,
+    eval_expr,
+    expr_cost,
+    is_candidate_covered,
+    load_rule_patterns,
+    unary,
+    var,
+    wrap_u64,
+)
+
+# ---------------------------------------------------------------------------
+# Expression enumeration
+# ---------------------------------------------------------------------------
+
+BINARY_OPS = ["add", "sub", "mul", "and", "or", "xor"]
+SHIFT_OPS = ["shl", "ushr", "sshr"]
+SHIFT_AMOUNTS = [1, 2, 3, 4, 8, 16, 32, 63]
+CONSTANTS = [0, 1, MASK64]
+VAR_NAMES_2 = ["x", "y"]
+
+
+def _expr_sort_key(e: Expr) -> str:
+    return e.render()
+
+
+class ExprBank:
+    """Stores expressions indexed by depth, with deduplication by eval signature."""
+
+    def __init__(self, envs: list[dict[str, int]]):
+        self.envs = envs
+        self.by_depth: dict[int, list[Expr]] = {}
+        self.seen_sigs: set[tuple[int, ...]] = set()
+        self.sig_to_exprs: dict[tuple[int, ...], list[Expr]] = {}
+        self.total_added = 0
+        self.total_deduped = 0
+
+    def signature(self, expr: Expr) -> tuple[int, ...]:
+        return tuple(eval_expr(expr, env) for env in self.envs)
+
+    def add(self, expr: Expr, depth: int) -> bool:
+        sig = self.signature(expr)
+        self.by_depth.setdefault(depth, [])
+        if sig in self.seen_sigs:
+            self.total_deduped += 1
+            existing = self.sig_to_exprs[sig]
+            ec = expr_cost(expr)["dmir_inst"]
+            best_ec = min(expr_cost(e)["dmir_inst"] for e in existing)
+            if ec < best_ec:
+                existing.append(expr)
+                self.by_depth[depth].append(expr)
+                self.total_added += 1
+            return False
+        self.seen_sigs.add(sig)
+        self.sig_to_exprs.setdefault(sig, []).append(expr)
+        self.by_depth[depth].append(expr)
+        self.total_added += 1
+        return True
+
+    def all_up_to(self, depth: int) -> list[Expr]:
+        result = []
+        for d in range(depth + 1):
+            result.extend(self.by_depth.get(d, []))
+        return result
+
+
+def enumerate_expressions(
+    max_depth: int, num_vars: int, envs: list[dict[str, int]], max_cost: int = 6,
+    verbose: bool = False,
+) -> ExprBank:
+    bank = ExprBank(envs)
+    var_names = VAR_NAMES_2[:num_vars]
+
+    # Depth 0: leaves
+    for name in var_names:
+        bank.add(var(name), 0)
+    for c in CONSTANTS:
+        bank.add(const(c), 0)
+    if verbose:
+        _log(f"depth 0: {len(bank.by_depth.get(0, []))} terms")
+
+    for depth in range(1, max_depth + 1):
+        prev_all = bank.all_up_to(depth - 1)
+        prev_exact = bank.by_depth.get(depth - 1, [])
+        prev_exact_set = set(id(e) for e in prev_exact)
+
+        # For depth >= 3, limit the RHS pool to depth 0-1 to avoid O(n^2) on
+        # large depth-2 sets. This still discovers (depth2 op leaf) patterns.
+        if depth >= 3:
+            shallow = bank.all_up_to(1)
+        else:
+            shallow = None  # use prev_all
+
+        def is_new_depth(e: Expr) -> bool:
+            return id(e) in prev_exact_set
+
+        # Unary: not
+        for e in prev_exact:
+            candidate = unary("not", e)
+            if expr_cost(candidate)["dmir_inst"] <= max_cost:
+                bank.add(candidate, depth)
+
+        # Binary ops
+        rhs_pool = shallow if shallow is not None else prev_all
+        for op in BINARY_OPS:
+            is_comm = op in COMMUTATIVE_OPS
+            # new_depth × rhs_pool
+            for lhs_e in prev_exact:
+                for rhs_e in rhs_pool:
+                    if is_comm and _expr_sort_key(lhs_e) > _expr_sort_key(rhs_e):
+                        continue
+                    candidate = binary(op, lhs_e, rhs_e)
+                    if expr_cost(candidate)["dmir_inst"] <= max_cost:
+                        bank.add(candidate, depth)
+            # lhs_pool × new_depth (non-commutative, or commutative with swapped order)
+            for lhs_e in rhs_pool:
+                for rhs_e in prev_exact:
+                    if is_new_depth(lhs_e) and is_new_depth(rhs_e):
+                        continue  # already covered above
+                    if is_comm and _expr_sort_key(lhs_e) > _expr_sort_key(rhs_e):
+                        continue
+                    candidate = binary(op, lhs_e, rhs_e)
+                    if expr_cost(candidate)["dmir_inst"] <= max_cost:
+                        bank.add(candidate, depth)
+
+        # Shifts with constant amounts
+        for op in SHIFT_OPS:
+            for e in prev_exact:
+                for amt in SHIFT_AMOUNTS:
+                    candidate = binary(op, e, const(amt))
+                    if expr_cost(candidate)["dmir_inst"] <= max_cost:
+                        bank.add(candidate, depth)
+
+        if verbose:
+            d_count = len(bank.by_depth.get(depth, []))
+            _log(f"depth {depth}: +{d_count} terms (total {bank.total_added}, "
+                 f"deduped {bank.total_deduped})")
+
+    return bank
+
+
+# ---------------------------------------------------------------------------
+# Z3 verification
+# ---------------------------------------------------------------------------
+
+def expr_to_z3(expr: Expr, z3_vars: dict[str, z3.BitVecRef]) -> z3.BitVecRef:
+    if expr.op == "var":
+        return z3_vars[str(expr.value)]
+    if expr.op == "const":
+        return z3.BitVecVal(int(expr.value), 64)
+    if expr.op == "not":
+        return ~expr_to_z3(expr.args[0], z3_vars)
+
+    lhs_z3 = expr_to_z3(expr.args[0], z3_vars)
+    rhs_z3 = expr_to_z3(expr.args[1], z3_vars)
+
+    op = expr.op
+    if op == "add":
+        return lhs_z3 + rhs_z3
+    if op == "sub":
+        return lhs_z3 - rhs_z3
+    if op == "mul":
+        return lhs_z3 * rhs_z3
+    if op == "and":
+        return lhs_z3 & rhs_z3
+    if op == "or":
+        return lhs_z3 | rhs_z3
+    if op == "xor":
+        return lhs_z3 ^ rhs_z3
+    if op == "shl":
+        return z3.If(z3.UGE(rhs_z3, z3.BitVecVal(64, 64)),
+                     z3.BitVecVal(0, 64), lhs_z3 << rhs_z3)
+    if op == "ushr":
+        return z3.If(z3.UGE(rhs_z3, z3.BitVecVal(64, 64)),
+                     z3.BitVecVal(0, 64), z3.LShR(lhs_z3, rhs_z3))
+    if op == "sshr":
+        return z3.If(z3.UGE(rhs_z3, z3.BitVecVal(64, 64)),
+                     lhs_z3 >> z3.BitVecVal(63, 64), lhs_z3 >> rhs_z3)
+    # Ternary carry-chain ops
+    if expr.op in ("adc", "sbb") and len(expr.args) == 3:
+        carry_z3 = expr_to_z3(expr.args[2], z3_vars)
+        if expr.op == "adc":
+            return lhs_z3 + rhs_z3 + carry_z3
+        return lhs_z3 - rhs_z3 - carry_z3
+
+    raise ValueError(f"unsupported op: {op}")
+
+
+def verify_equivalence(
+    lhs: Expr, rhs: Expr, var_names: list[str], timeout_ms: int = 5000,
+) -> tuple[bool, str]:
+    z3_vars = {name: z3.BitVec(name, 64) for name in var_names}
+    try:
+        lhs_z3 = expr_to_z3(lhs, z3_vars)
+        rhs_z3 = expr_to_z3(rhs, z3_vars)
+    except (ValueError, KeyError) as e:
+        return False, f"encode_error: {e}"
+
+    solver = z3.Solver()
+    solver.set("timeout", timeout_ms)
+    solver.add(lhs_z3 != rhs_z3)
+
+    result = solver.check()
+    if result == z3.unsat:
+        return True, "valid"
+    if result == z3.sat:
+        return False, "invalid"
+    return False, "timeout"
+
+
+# ---------------------------------------------------------------------------
+# Carry-chain synthesis (Phase 3)
+# ---------------------------------------------------------------------------
+
+def _carry_out_z3(a: z3.BitVecRef, b: z3.BitVecRef,
+                  cf: z3.BitVecRef) -> z3.BitVecRef:
+    """Compute carry-out of a + b + cf using 65-bit arithmetic."""
+    wide_a = z3.ZeroExt(1, a)
+    wide_b = z3.ZeroExt(1, b)
+    wide_cf = z3.ZeroExt(1, cf)
+    wide_sum = wide_a + wide_b + wide_cf
+    return z3.Extract(64, 64, wide_sum)  # bit 64 = carry out
+
+
+def _borrow_out_z3(a: z3.BitVecRef, b: z3.BitVecRef,
+                   bf: z3.BitVecRef) -> z3.BitVecRef:
+    """Compute borrow-out of a - b - bf using 65-bit arithmetic."""
+    wide_a = z3.ZeroExt(1, a)
+    wide_b = z3.ZeroExt(1, b)
+    wide_bf = z3.ZeroExt(1, bf)
+    wide_diff = wide_a - wide_b - wide_bf
+    return z3.Extract(64, 64, wide_diff)  # bit 64 = borrow out
+
+
+def verify_carry_rule(
+    lhs: Expr, rhs: Expr, var_names: list[str],
+    carry_mode: str = "carry_zero", timeout_ms: int = 10000,
+) -> tuple[bool, str]:
+    """
+    Verify equivalence of a carry-chain rule under carry constraints.
+
+    carry_mode:
+      - "carry_zero": cf_in is 0 (safe at chain head or after non-carrying op)
+      - "carry_any": cf_in is unconstrained {0, 1} (universally valid)
+      - "result_and_carry": both result AND carry_out must match
+    """
+    z3_vars = {name: z3.BitVec(name, 64) for name in var_names if name != "cf"}
+
+    cf_bit = z3.BitVec("cf_bit", 1)
+    if carry_mode == "carry_zero":
+        cf_64 = z3.BitVecVal(0, 64)
+    else:
+        cf_64 = z3.ZeroExt(63, cf_bit)
+    z3_vars["cf"] = cf_64
+
+    try:
+        lhs_z3 = expr_to_z3(lhs, z3_vars)
+        rhs_z3 = expr_to_z3(rhs, z3_vars)
+    except (ValueError, KeyError) as e:
+        return False, f"encode_error: {e}"
+
+    solver = z3.Solver()
+    solver.set("timeout", timeout_ms)
+
+    if carry_mode == "carry_any":
+        solver.add(z3.Or(cf_bit == z3.BitVecVal(0, 1),
+                         cf_bit == z3.BitVecVal(1, 1)))
+
+    if carry_mode == "result_and_carry":
+        # Also verify carry-out matches (for chain-interior rules)
+        solver.add(z3.Or(cf_bit == z3.BitVecVal(0, 1),
+                         cf_bit == z3.BitVecVal(1, 1)))
+        # Extract operands from LHS to compute carry_out
+        # This is for rules like adc(x,y,cf) where we need carry to also match
+        if lhs.op == "adc" and rhs.op == "adc":
+            lhs_a = expr_to_z3(lhs.args[0], z3_vars)
+            lhs_b = expr_to_z3(lhs.args[1], z3_vars)
+            lhs_cf = expr_to_z3(lhs.args[2], z3_vars)
+            rhs_a = expr_to_z3(rhs.args[0], z3_vars)
+            rhs_b = expr_to_z3(rhs.args[1], z3_vars)
+            rhs_cf = expr_to_z3(rhs.args[2], z3_vars)
+            lhs_cout = _carry_out_z3(lhs_a, lhs_b, lhs_cf)
+            rhs_cout = _carry_out_z3(rhs_a, rhs_b, rhs_cf)
+            solver.add(z3.Or(lhs_z3 != rhs_z3, lhs_cout != rhs_cout))
+            result = solver.check()
+            if result == z3.unsat:
+                return True, "valid_with_carry"
+            if result == z3.sat:
+                return False, "invalid_carry_mismatch"
+            return False, "timeout"
+
+    solver.add(lhs_z3 != rhs_z3)
+    result = solver.check()
+    if result == z3.unsat:
+        return True, "valid"
+    if result == z3.sat:
+        return False, "invalid"
+    return False, "timeout"
+
+
+def synthesize_carry_rules(verbose: bool = True) -> list[dict]:
+    """
+    Synthesize ADC/SBB rewrite rules with carry-chain safety proofs.
+    Tests each candidate under three modes:
+      1. carry_any: universally valid (safe everywhere)
+      2. carry_zero: valid when cf_in = 0 (needs precondition)
+      3. neither: UNSAFE (the rule we incorrectly implemented before)
+    """
+    from mine_dmir_seed_rules import ternary
+
+    results = []
+
+    # Build candidate ADC/SBB rules
+    candidates = []
+    var_x = var("x")
+    var_y = var("y")
+    cf = var("cf")
+    zero = const(0)
+    one = const(1)
+
+    # ADC candidates: adc(x, y, cf) vs simpler forms
+    adc_forms = [
+        (ternary("adc", var_x, var_y, cf), "adc(x, y, cf)"),
+        (ternary("adc", var_x, zero, cf), "adc(x, 0, cf)"),
+        (ternary("adc", zero, var_y, cf), "adc(0, y, cf)"),
+        (ternary("adc", var_x, var_x, cf), "adc(x, x, cf)"),
+        (ternary("adc", zero, zero, cf), "adc(0, 0, cf)"),
+    ]
+
+    simpler_forms = [
+        (binary("add", var_x, var_y), "add(x, y)"),
+        (var_x, "x"),
+        (var_y, "y"),
+        (zero, "0"),
+        (binary("add", var_x, cf), "add(x, cf)"),
+        (binary("add", var_y, cf), "add(y, cf)"),
+        (cf, "cf"),
+        (binary("shl", var_x, one), "shl(x, 1)"),
+        (binary("add", binary("add", var_x, var_x), cf), "add(add(x,x), cf)"),
+        (binary("add", binary("add", var_x, var_y), cf), "add(add(x,y), cf)"),
+    ]
+
+    for adc_expr, adc_name in adc_forms:
+        for simple_expr, simple_name in simpler_forms:
+            candidates.append({
+                "lhs": adc_expr,
+                "rhs": simple_expr,
+                "lhs_name": adc_name,
+                "rhs_name": simple_name,
+                "op": "adc",
+            })
+
+    # SBB candidates: sbb(x, y, cf) vs simpler forms
+    sbb_forms = [
+        (ternary("sbb", var_x, var_y, cf), "sbb(x, y, cf)"),
+        (ternary("sbb", var_x, zero, cf), "sbb(x, 0, cf)"),
+        (ternary("sbb", zero, var_y, cf), "sbb(0, y, cf)"),
+        (ternary("sbb", var_x, var_x, cf), "sbb(x, x, cf)"),
+    ]
+
+    sbb_simpler = [
+        (binary("sub", var_x, var_y), "sub(x, y)"),
+        (var_x, "x"),
+        (binary("sub", zero, var_y), "sub(0, y)"),
+        (zero, "0"),
+        (binary("sub", var_x, cf), "sub(x, cf)"),
+        (binary("sub", zero, cf), "sub(0, cf)"),
+        (binary("sub", binary("sub", var_x, var_y), cf), "sub(sub(x,y), cf)"),
+    ]
+
+    for sbb_expr, sbb_name in sbb_forms:
+        for simple_expr, simple_name in sbb_simpler:
+            candidates.append({
+                "lhs": sbb_expr,
+                "rhs": simple_expr,
+                "lhs_name": sbb_name,
+                "rhs_name": simple_name,
+                "op": "sbb",
+            })
+
+    if verbose:
+        _log(f"carry-chain candidates: {len(candidates)}")
+
+    # Test each candidate under different carry modes
+    for c in candidates:
+        lhs_e, rhs_e = c["lhs"], c["rhs"]
+        var_names = sorted(extract_var_names(lhs_e) | extract_var_names(rhs_e))
+
+        # Mode 1: universally valid (cf ∈ {0,1})
+        valid_any, status_any = verify_carry_rule(
+            lhs_e, rhs_e, var_names, "carry_any")
+
+        # Mode 2: valid when cf = 0
+        valid_zero, status_zero = verify_carry_rule(
+            lhs_e, rhs_e, var_names, "carry_zero")
+
+        if valid_any or valid_zero:
+            safety = "universal" if valid_any else "carry_zero_only"
+            results.append({
+                "lhs": lhs_e.render(),
+                "rhs": rhs_e.render(),
+                "lhs_desc": c["lhs_name"],
+                "rhs_desc": c["rhs_name"],
+                "op": c["op"],
+                "safety": safety,
+                "z3_any": status_any,
+                "z3_zero": status_zero,
+            })
+            if verbose:
+                _log(f"  ✓ {c['lhs_name']} → {c['rhs_name']}  [{safety}]")
+
+    if verbose:
+        n_univ = sum(1 for r in results if r["safety"] == "universal")
+        n_zero = sum(1 for r in results if r["safety"] == "carry_zero_only")
+        _log(f"carry rules found: {len(results)} "
+             f"({n_univ} universal, {n_zero} carry_zero_only)")
+
+    return results
+
+
+# ---------------------------------------------------------------------------
+# Candidate extraction and filtering
+# ---------------------------------------------------------------------------
+
+def extract_var_names(expr: Expr) -> set[str]:
+    if expr.op == "var":
+        return {str(expr.value)}
+    result: set[str] = set()
+    for a in expr.args:
+        result |= extract_var_names(a)
+    return result
+
+
+def extract_candidates(bank: ExprBank) -> list[dict]:
+    candidates = []
+    for sig, exprs in bank.sig_to_exprs.items():
+        if len(exprs) < 2:
+            continue
+        sorted_exprs = sorted(
+            exprs,
+            key=lambda e: (
+                expr_cost(e)["dmir_inst"],
+                expr_cost(e).get("select_depth", 0),
+                expr_cost(e).get("adc_chain", 0),
+                e.render(),
+            ),
+        )
+        best = sorted_exprs[0]
+        best_cost = expr_cost(best)
+        for other in sorted_exprs[1:]:
+            other_cost = expr_cost(other)
+            if dominates(best_cost, other_cost):
+                candidates.append(
+                    {
+                        "lhs_expr": other,
+                        "rhs_expr": best,
+                        "lhs": other.render(),
+                        "rhs": best.render(),
+                        "cost": {
+                            "lhs": other_cost,
+                            "rhs": best_cost,
+                            "delta": cost_delta(other_cost, best_cost),
+                        },
+                    }
+                )
+    return candidates
+
+
+def filter_novel(
+    candidates: list[dict],
+    rule_patterns: list[tuple[Expr, Expr]],
+    rule_keys: set[tuple[str, str]],
+) -> list[dict]:
+    novel = []
+    for c in candidates:
+        lhs_e, rhs_e = c["lhs_expr"], c["rhs_expr"]
+        cl, cr = canonicalize_pair(lhs_e, rhs_e)
+        key = build_candidate_key(cl, cr)
+        if key in rule_keys:
+            continue
+        if is_candidate_covered(lhs_e, rhs_e, rule_patterns, rule_keys):
+            continue
+        novel.append(c)
+    return novel
+
+
+def auto_name(lhs: Expr, rhs: Expr, index: int) -> str:
+    ops = set()
+
+    def collect(e: Expr):
+        if e.op not in ("var", "const"):
+            ops.add(e.op)
+        for a in e.args:
+            collect(a)
+
+    collect(lhs)
+    collect(rhs)
+    tag = "-".join(sorted(ops)[:3]) if ops else "identity"
+    return f"synth-{tag}-{index:03d}"
+
+
+# ---------------------------------------------------------------------------
+# Main pipeline
+# ---------------------------------------------------------------------------
+
+def _log(msg: str):
+    sys.stderr.write(f"[synth] {msg}\n")
+    sys.stderr.flush()
+
+
+def run_synthesis(args) -> dict:
+    t0 = time.time()
+
+    # Step 1: Build test vectors
+    envs = build_sample_envs()
+    _log(f"sample envs: {len(envs)}")
+
+    # Step 2: Enumerate
+    _log(f"enumerating expressions (depth={args.max_depth}, vars={args.num_vars}, "
+         f"max_cost={args.max_cost})...")
+    bank = enumerate_expressions(
+        max_depth=args.max_depth,
+        num_vars=args.num_vars,
+        envs=envs,
+        max_cost=args.max_cost,
+        verbose=True,
+    )
+    _log(f"expression bank: {bank.total_added} terms, "
+         f"{len(bank.sig_to_exprs)} unique signatures")
+
+    # Step 3: Extract candidates
+    raw = extract_candidates(bank)
+    _log(f"raw candidates: {len(raw)}")
+
+    # Step 4: Filter against existing rules
+    rule_patterns = load_rule_patterns(args.rules) if args.rules else []
+    rule_keys = build_rule_key_set(rule_patterns)
+    novel = filter_novel(raw, rule_patterns, rule_keys)
+    _log(f"novel candidates (not in existing rules): {len(novel)}")
+
+    # Step 5: Z3 verification
+    verified = []
+    rejected = []
+    if not args.no_z3 and novel:
+        _log(f"verifying {len(novel)} candidates with Z3 (timeout={args.z3_timeout}ms)...")
+        for i, c in enumerate(novel):
+            var_names = sorted(extract_var_names(c["lhs_expr"]) |
+                               extract_var_names(c["rhs_expr"]))
+            is_valid, status = verify_equivalence(
+                c["lhs_expr"], c["rhs_expr"], var_names, args.z3_timeout,
+            )
+            if is_valid:
+                verified.append(c)
+            else:
+                c["z3_status"] = status
+                rejected.append(c)
+            if (i + 1) % 50 == 0:
+                _log(f"  verified {i + 1}/{len(novel)} "
+                     f"(valid={len(verified)}, rejected={len(rejected)})")
+        _log(f"Z3 done: {len(verified)} valid, {len(rejected)} rejected")
+    elif args.no_z3:
+        verified = novel
+        _log("Z3 skipped (--no-z3)")
+
+    # Step 6: Deduplicate by canonical key
+    seen_keys: set[tuple[str, str]] = set()
+    deduped = []
+    for c in verified:
+        cl, cr = canonicalize_pair(c["lhs_expr"], c["rhs_expr"])
+        key = build_candidate_key(cl, cr)
+        if key not in seen_keys:
+            seen_keys.add(key)
+            deduped.append(c)
+    _log(f"after dedup: {len(deduped)} rules")
+
+    # Step 7: Sort by cost delta
+    deduped.sort(
+        key=lambda c: (
+            c["cost"]["delta"].get("runtime_calls", 0),
+            c["cost"]["delta"]["dmir_inst"],
+        )
+    )
+
+    # Step 8: Assign names and format
+    rules_out = []
+    for i, c in enumerate(deduped):
+        name = auto_name(c["lhs_expr"], c["rhs_expr"], i)
+        rules_out.append(
+            {
+                "name": name,
+                "status": "synthesized",
+                "inputs": sorted(
+                    extract_var_names(c["lhs_expr"]) | extract_var_names(c["rhs_expr"])
+                ),
+                "lhs": c["lhs"],
+                "rhs": c["rhs"],
+                "cost": c["cost"],
+                "validation": {
+                    "modes": ["smt"] if not args.no_z3 else ["interpreter_sample"],
+                    "coverage": [],
+                },
+            }
+        )
+
+    elapsed = time.time() - t0
+    report = {
+        "summary": {
+            "term_count": bank.total_added,
+            "unique_signatures": len(bank.sig_to_exprs),
+            "raw_candidate_count": len(raw),
+            "novel_count": len(novel),
+            "z3_verified": len(verified),
+            "z3_rejected": len(rejected),
+            "final_rule_count": len(rules_out),
+            "max_depth": args.max_depth,
+            "num_vars": args.num_vars,
+            "elapsed_seconds": round(elapsed, 2),
+        },
+        "rules": rules_out,
+        "rejected": [
+            {"lhs": r["lhs"], "rhs": r["rhs"], "z3_status": r.get("z3_status", "?")}
+            for r in rejected
+        ],
+    }
+    return report
+
+
+def parse_args():
+    p = argparse.ArgumentParser(description="Synthesize dMIR rewrite rules")
+    p.add_argument("--max-depth", type=int, default=3)
+    p.add_argument("--num-vars", type=int, default=2)
+    p.add_argument("--max-cost", type=int, default=6)
+    p.add_argument("--rules", type=str, default=None,
+                   help="Existing rules JSON to filter against")
+    p.add_argument("--out", type=str, default=None,
+                   help="Output report path (default: stdout)")
+    p.add_argument("--no-z3", action="store_true",
+                   help="Skip Z3 verification (sampling only)")
+    p.add_argument("--z3-timeout", type=int, default=5000,
+                   help="Z3 timeout per query in ms")
+    p.add_argument("--include-carry", action="store_true",
+                   help="Run carry-chain ADC/SBB synthesis (Phase 3)")
+    return p.parse_args()
+
+
+def main():
+    args = parse_args()
+
+    if args.include_carry:
+        carry_rules = synthesize_carry_rules(verbose=True)
+        report = {"carry_rules": carry_rules}
+        output = json.dumps(report, indent=2)
+        if args.out:
+            pathlib.Path(args.out).write_text(output, encoding="utf-8")
+            _log(f"carry report written to {args.out}")
+        else:
+            print(output)
+        return
+
+    report = run_synthesis(args)
+    output = json.dumps(report, indent=2)
+    if args.out:
+        pathlib.Path(args.out).write_text(output, encoding="utf-8")
+        _log(f"report written to {args.out}")
+    else:
+        print(output)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tools/test_check_compiler_pass_timing_budget.py b/tools/test_check_compiler_pass_timing_budget.py
new file mode 100644
index 000000000..c2d91198c
--- /dev/null
+++ b/tools/test_check_compiler_pass_timing_budget.py
@@ -0,0 +1,196 @@
+#!/usr/bin/env python3
+# Copyright (C) 2025 the DTVM authors. All Rights Reserved.
+# SPDX-License-Identifier: Apache-2.0
+"""Test wrapper for check_compiler_pass_timing_budget.py.
+
+Called by CMakeLists.txt as:
+    test_check_compiler_pass_timing_budget.py <source_dir>
+
+Verifies the budget-checker tool works correctly by building a synthetic timing
+report that satisfies both committed budget files and running the checker against
+each one.  No dtvm binary is needed.
+"""
+
+import json
+import pathlib
+import subprocess
+import sys
+import tempfile
+
+BUDGET_FILES = [
+    "tests/evm_asm/compiler_pass_timing_budget_x86_cg_peephole.json",
+    "tests/evm_asm/compiler_pass_timing_budget_dmir_rewrite.json",
+]
+
+# Case names that appear in both budget baselines
+CASE_NAMES = [
+    "add",
+    "mul",
+    "div",
+    "shl",
+    "shr",
+    "sar",
+    "byte",
+    "eq_true",
+    "lt_true",
+    "jump",
+    "u256_shl_add_mul",
+    "u256_mul_add_chain",
+    "u256_shr_add_shl",
+    "bool_and_or_xor_not",
+    "bool_xor_not_chain",
+]
+
+def make_phase_stats(time_ms, share_pct):
+    """Return a phase stats dict well within any reasonable budget."""
+    return {
+        "mean": time_ms,
+        "median": time_ms,
+        "p95": time_ms,
+        "min": time_ms,
+        "max": time_ms,
+        "share_of_total_pct": {
+            "mean": share_pct,
+            "median": share_pct,
+            "p95": share_pct,
+            "min": share_pct,
+            "max": share_pct,
+        },
+    }
+
+def make_case_summary(total_time_ms, pass_name, pass_time_ms, pass_share_pct):
+    return {
+        "total_time_ms": {"mean": total_time_ms, "median": total_time_ms},
+        "phases": {
+            pass_name: make_phase_stats(pass_time_ms, pass_share_pct),
+        },
+        "runs": 1,
+        "record_count": 1,
+    }
+
+def build_synthetic_report(pass_name, total_time_ms, pass_time_ms, pass_share_pct):
+    """Build a manifest-style timing report that stays inside the budget."""
+    cases = []
+    for name in CASE_NAMES:
+        cases.append(
+            {
+                "name": name,
+                "input": f"/synthetic/{name}.evm.hex",
+                "summary": make_case_summary(
+                    total_time_ms, pass_name, pass_time_ms, pass_share_pct
+                ),
+            }
+        )
+
+    overall_summary = make_case_summary(
+        total_time_ms, pass_name, pass_time_ms, pass_share_pct
+    )
+    overall_summary["runs"] = 1
+    overall_summary["record_count"] = len(CASE_NAMES)
+
+    return {
+        "manifest": "/synthetic/manifest.json",
+        "case_count": len(CASE_NAMES),
+        "cases": cases,
+        "overall": overall_summary,
+    }
+
+def run_checker(checker, budget_path, report_path):
+    cmd = [
+        sys.executable,
+        str(checker),
+        "--budget",
+        str(budget_path),
+        "--report",
+        str(report_path),
+        "--allow-missing-cases",
+    ]
+    return subprocess.run(cmd, capture_output=True, text=True, check=False)
+
+def main():
+    if len(sys.argv) != 2:
+        print(
+            "usage: test_check_compiler_pass_timing_budget.py <source_dir>",
+            file=sys.stderr,
+        )
+        return 1
+
+    source_dir = pathlib.Path(sys.argv[1]).resolve()
+    checker = source_dir / "tools" / "check_compiler_pass_timing_budget.py"
+
+    if not checker.exists():
+        print(f"checker not found: {checker}", file=sys.stderr)
+        return 1
+
+    failures = []
+
+    with tempfile.TemporaryDirectory() as tmp_dir:
+        tmp = pathlib.Path(tmp_dir)
+
+        for rel_budget in BUDGET_FILES:
+            budget_path = source_dir / rel_budget
+            if not budget_path.exists():
+                print(f"budget file not found: {budget_path}", file=sys.stderr)
+                return 1
+
+            budget = json.loads(budget_path.read_text(encoding="utf-8"))
+            target_pass = budget["target_pass"]
+            thresholds = budget["thresholds"]
+            baseline_overall = budget["baseline"]["overall_total_time_ms_median"]
+
+            # Choose values well inside all thresholds:
+            #   - pass share p95 = 0.1 %  (budget typically 1.2–2.0 %)
+            #   - pass time p95  = 0.001 ms (budget 0.01–0.06 ms)
+            #   - total time = baseline (0 % regression)
+            report = build_synthetic_report(
+                pass_name=target_pass,
+                total_time_ms=baseline_overall,
+                pass_time_ms=0.001,
+                pass_share_pct=0.1,
+            )
+
+            report_path = tmp / f"report_{pathlib.Path(rel_budget).stem}.json"
+            report_path.write_text(json.dumps(report, indent=2), encoding="utf-8")
+
+            result = run_checker(checker, budget_path, report_path)
+            tag = pathlib.Path(rel_budget).stem
+
+            if result.returncode != 0:
+                failures.append(
+                    f"checker failed for {tag} (exit {result.returncode}):\n"
+                    f"{result.stderr.strip()}"
+                )
+                continue
+
+            # Also verify that a clearly over-budget report is rejected
+            bad_report = build_synthetic_report(
+                pass_name=target_pass,
+                total_time_ms=baseline_overall,
+                pass_time_ms=999.0,         # massively over time budget
+                pass_share_pct=99.0,        # massively over share budget
+            )
+            bad_report_path = tmp / f"bad_report_{pathlib.Path(rel_budget).stem}.json"
+            bad_report_path.write_text(
+                json.dumps(bad_report, indent=2), encoding="utf-8"
+            )
+
+            bad_result = run_checker(checker, budget_path, bad_report_path)
+            if bad_result.returncode == 0:
+                failures.append(
+                    f"checker INCORRECTLY passed an over-budget report for {tag}"
+                )
+
+    if failures:
+        for msg in failures:
+            print(msg, file=sys.stderr)
+        print(
+            "FAIL: test_check_compiler_pass_timing_budget",
+            file=sys.stderr,
+        )
+        return 1
+
+    print("PASS: test_check_compiler_pass_timing_budget")
+    return 0
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/tools/test_check_dmir_rewrite_rules.py b/tools/test_check_dmir_rewrite_rules.py
new file mode 100644
index 000000000..ef4581d1c
--- /dev/null
+++ b/tools/test_check_dmir_rewrite_rules.py
@@ -0,0 +1,148 @@
+#!/usr/bin/env python3
+# Copyright (C) 2025 the DTVM authors. All Rights Reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+import copy
+import json
+import pathlib
+import subprocess
+import sys
+import tempfile
+
+VALID_RULE_TEMPLATE = {
+    "name": "test-add-zero",
+    "status": "accepted",
+    "inputs": ["x"],
+    "lhs": "(add x 0:i64)",
+    "rhs": "x",
+    "cost": {
+        "lhs": {"dmir_inst": 1, "select_depth": 0, "adc_chain": 0, "runtime_calls": 0},
+        "rhs": {"dmir_inst": 0, "select_depth": 0, "adc_chain": 0, "runtime_calls": 0},
+        "delta": {"dmir_inst": -1, "select_depth": 0, "adc_chain": 0, "runtime_calls": 0},
+    },
+    "validation": {
+        "modes": ["interpreter_fuzz"],
+        "coverage": ["DMirValidation.FuzzesAddZeroRewrite"],
+    },
+}
+
+
+def run_checker(source_dir, rules_path, gtest_binary=None):
+    script = pathlib.Path(source_dir) / "tools" / "check_dmir_rewrite_rules.py"
+    cmd = [sys.executable, str(script), "--rules", str(rules_path)]
+    if gtest_binary:
+        cmd += ["--gtest-binary", str(gtest_binary)]
+    return subprocess.run(cmd, capture_output=True, text=True)
+
+
+def write_rules(path, rules):
+    path.write_text(json.dumps({"rules": rules}), encoding="utf-8")
+
+
+def main():
+    if len(sys.argv) not in (2, 3):
+        print(f"Usage: {sys.argv[0]} <source_dir> [<dmirValidationTests_binary>]",
+              file=sys.stderr)
+        return 1
+
+    source_dir = pathlib.Path(sys.argv[1])
+    gtest_binary = pathlib.Path(sys.argv[2]) if len(sys.argv) == 3 else None
+    rules_path = source_dir / "src/compiler/mir/dmir_rewrite_rules.json"
+
+    if not rules_path.exists():
+        print(f"Rules file not found: {rules_path}", file=sys.stderr)
+        return 1
+
+    proc = run_checker(source_dir, rules_path, gtest_binary)
+    if proc.returncode != 0:
+        print("FAIL: checker failed on real dmir rules", file=sys.stderr)
+        print(proc.stderr, file=sys.stderr)
+        return 1
+    if "dmir rewrite rule metadata is complete" not in proc.stdout:
+        print("FAIL: expected success message not found", file=sys.stderr)
+        return 1
+
+    if gtest_binary:
+        proc2 = run_checker(source_dir, rules_path, None)
+        if proc2.returncode != 0:
+            print("FAIL: checker failed on real dmir rules without binary", file=sys.stderr)
+            return 1
+
+    with tempfile.TemporaryDirectory() as tmpdir:
+        tmpdir = pathlib.Path(tmpdir)
+
+        dup_path = tmpdir / "dup.json"
+        rule_a = copy.deepcopy(VALID_RULE_TEMPLATE)
+        rule_b = copy.deepcopy(VALID_RULE_TEMPLATE)
+        rule_b["lhs"] = "(add x 1:i64)"  # different expression so only name duplicates
+        write_rules(dup_path, [rule_a, rule_b])
+        proc3 = run_checker(source_dir, dup_path, None)
+        if proc3.returncode == 0:
+            print("FAIL: checker should fail on duplicate rule name", file=sys.stderr)
+            return 1
+        if "duplicate" not in proc3.stderr:
+            print("FAIL: expected 'duplicate' in error output", file=sys.stderr)
+            return 1
+
+        bad_status = copy.deepcopy(VALID_RULE_TEMPLATE)
+        bad_status["name"] = "bad-status-rule"
+        bad_status["status"] = "unknown_status"
+        bad_path = tmpdir / "bad_status.json"
+        write_rules(bad_path, [bad_status])
+        proc4 = run_checker(source_dir, bad_path, None)
+        if proc4.returncode == 0:
+            print("FAIL: checker should fail on invalid status", file=sys.stderr)
+            return 1
+        if "invalid status" not in proc4.stderr:
+            print("FAIL: expected 'invalid status' in error output", file=sys.stderr)
+            return 1
+
+        rule_c = copy.deepcopy(VALID_RULE_TEMPLATE)
+        rule_c["name"] = "test-add-zero-commuted"
+        # (add 0:i64 x) normalizes to same canonical key as (add x 0:i64) due to commutativity
+        rule_c["lhs"] = "(add 0:i64 x)"
+        dup_canonical_path = tmpdir / "dup_canonical.json"
+        write_rules(dup_canonical_path, [VALID_RULE_TEMPLATE, rule_c])
+        proc5 = run_checker(source_dir, dup_canonical_path, None)
+        if proc5.returncode == 0:
+            print("FAIL: checker should fail on duplicate canonical lhs/rhs", file=sys.stderr)
+            return 1
+        if "duplicates canonical rewrite" not in proc5.stderr:
+            print("FAIL: expected 'duplicates canonical rewrite' in error output", file=sys.stderr)
+            return 1
+
+        no_semantic = copy.deepcopy(VALID_RULE_TEMPLATE)
+        no_semantic["name"] = "no-semantic-mode"
+        no_semantic["validation"]["modes"] = ["interpreter_sample"]
+        no_semantic_path = tmpdir / "no_semantic.json"
+        write_rules(no_semantic_path, [no_semantic])
+        proc6 = run_checker(source_dir, no_semantic_path, None)
+        if proc6.returncode == 0:
+            print("FAIL: checker should fail on rule with no semantic mode", file=sys.stderr)
+            return 1
+        if "interpreter_fuzz or smt" not in proc6.stderr:
+            print("FAIL: expected 'interpreter_fuzz or smt' in error output", file=sys.stderr)
+            return 1
+
+        if gtest_binary:
+            missing_cov = copy.deepcopy(VALID_RULE_TEMPLATE)
+            missing_cov["name"] = "missing-coverage-rule"
+            missing_cov["lhs"] = "(sub x 0:i64)"
+            missing_cov["cost"]["lhs"]["dmir_inst"] = 1
+            missing_cov["validation"]["coverage"] = ["NonExistentSuite.NonExistentTest"]
+            missing_path = tmpdir / "missing_cov.json"
+            write_rules(missing_path, [missing_cov])
+            proc7 = run_checker(source_dir, missing_path, gtest_binary)
+            if proc7.returncode == 0:
+                print("FAIL: checker should fail on missing gtest coverage entry", file=sys.stderr)
+                return 1
+            if "missing gtest coverage" not in proc7.stderr:
+                print("FAIL: expected 'missing gtest coverage' in error output", file=sys.stderr)
+                return 1
+
+    print("PASS: test_check_dmir_rewrite_rules")
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/tools/test_collect_compiler_pass_timings.py b/tools/test_collect_compiler_pass_timings.py
new file mode 100644
index 000000000..c618ae5e3
--- /dev/null
+++ b/tools/test_collect_compiler_pass_timings.py
@@ -0,0 +1,132 @@
+#!/usr/bin/env python3
+# Copyright (C) 2025 the DTVM authors. All Rights Reserved.
+# SPDX-License-Identifier: Apache-2.0
+"""Test wrapper for collect_compiler_pass_timings.py.
+
+Called by CMakeLists.txt as:
+    test_collect_compiler_pass_timings.py <source_dir> <dtvm_binary>
+"""
+
+import json
+import pathlib
+import subprocess
+import sys
+import tempfile
+
+
+def main():
+    if len(sys.argv) != 3:
+        print(
+            "usage: test_collect_compiler_pass_timings.py <source_dir> <dtvm_binary>",
+            file=sys.stderr,
+        )
+        return 1
+
+    source_dir = pathlib.Path(sys.argv[1]).resolve()
+    dtvm_binary = pathlib.Path(sys.argv[2]).resolve()
+
+    collector = source_dir / "tools" / "collect_compiler_pass_timings.py"
+    manifest = source_dir / "tests" / "evm_asm" / "compiler_pass_timing_manifest.json"
+
+    if not collector.exists():
+        print(f"collector not found: {collector}", file=sys.stderr)
+        return 1
+    if not manifest.exists():
+        print(f"manifest not found: {manifest}", file=sys.stderr)
+        return 1
+    if not dtvm_binary.exists():
+        print(f"dtvm binary not found: {dtvm_binary}", file=sys.stderr)
+        return 1
+
+    with tempfile.TemporaryDirectory() as tmp_dir:
+        output_path = pathlib.Path(tmp_dir) / "timing_report.json"
+
+        # Use --case to select only a single small case (add) for speed.
+        cmd = [
+            sys.executable,
+            str(collector),
+            "--dtvm",
+            str(dtvm_binary),
+            "--manifest",
+            str(manifest),
+            "--runs",
+            "1",
+            "--case",
+            "add",
+            "--output",
+            str(output_path),
+            "--",
+            "--format",
+            "evm",
+            "--mode",
+            "multipass",
+            "--compile-only",
+        ]
+
+        result = subprocess.run(
+            cmd,
+            capture_output=True,
+            text=True,
+            check=False,
+        )
+
+        if result.returncode != 0:
+            print(result.stderr, file=sys.stderr)
+            print(
+                f"FAIL: test_collect_compiler_pass_timings — collector exited with "
+                f"code {result.returncode}",
+                file=sys.stderr,
+            )
+            return 1
+
+        if not output_path.exists():
+            print(
+                "FAIL: test_collect_compiler_pass_timings — output JSON was not written",
+                file=sys.stderr,
+            )
+            return 1
+
+        try:
+            report = json.loads(output_path.read_text(encoding="utf-8"))
+        except json.JSONDecodeError as exc:
+            print(
+                f"FAIL: test_collect_compiler_pass_timings — invalid JSON: {exc}",
+                file=sys.stderr,
+            )
+            return 1
+
+        # Required top-level fields for a manifest run
+        for field in ("manifest", "case_count", "cases", "overall"):
+            if field not in report:
+                print(
+                    f"FAIL: test_collect_compiler_pass_timings — missing field '{field}'",
+                    file=sys.stderr,
+                )
+                return 1
+
+        overall = report["overall"]
+        for field in ("runs", "record_count", "total_time_ms", "phases"):
+            if field not in overall:
+                print(
+                    f"FAIL: test_collect_compiler_pass_timings — overall missing "
+                    f"field '{field}'",
+                    file=sys.stderr,
+                )
+                return 1
+
+        total_time = overall["total_time_ms"]
+        for stat in ("mean", "median"):
+            if stat not in total_time:
+                print(
+                    f"FAIL: test_collect_compiler_pass_timings — "
+                    f"total_time_ms missing stat '{stat}'",
+                    file=sys.stderr,
+                )
+                return 1
+
+    print("PASS: test_collect_compiler_pass_timings")
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/tools/test_mine_dmir_bootstrap_config.py b/tools/test_mine_dmir_bootstrap_config.py
new file mode 100644
index 000000000..1017588f7
--- /dev/null
+++ b/tools/test_mine_dmir_bootstrap_config.py
@@ -0,0 +1,88 @@
+#!/usr/bin/env python3
+# Copyright (C) 2025 the DTVM authors. All Rights Reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+import json
+import pathlib
+import subprocess
+import sys
+import tempfile
+
+
+def run_miner(source_dir, extra_args=()):
+    script = pathlib.Path(source_dir) / "tools" / "mine_dmir_seed_rules.py"
+    cmd = [sys.executable, str(script)] + list(extra_args)
+    return subprocess.run(cmd, capture_output=True, text=True)
+
+
+def main():
+    if len(sys.argv) != 2:
+        print(f"Usage: {sys.argv[0]} <source_dir>", file=sys.stderr)
+        return 1
+
+    source_dir = pathlib.Path(sys.argv[1])
+    bootstrap_config = source_dir / "src/compiler/mir/dmir_rewrite_mining_bootstrap.json"
+
+    if not bootstrap_config.exists():
+        print(f"Bootstrap config not found: {bootstrap_config}", file=sys.stderr)
+        return 1
+
+    with tempfile.TemporaryDirectory() as tmpdir:
+        tmpdir = pathlib.Path(tmpdir)
+        out_path = tmpdir / "bootstrap_candidates.json"
+
+        proc = run_miner(source_dir, [
+            "--config", str(bootstrap_config),
+            "--out", str(out_path),
+        ])
+        if proc.returncode != 0:
+            print("FAIL: miner exited non-zero with bootstrap config", file=sys.stderr)
+            print(proc.stderr, file=sys.stderr)
+            return 1
+        try:
+            result = json.loads(out_path.read_text(encoding="utf-8"))
+        except json.JSONDecodeError as exc:
+            print(f"FAIL: output is not valid JSON: {exc}", file=sys.stderr)
+            return 1
+
+        if result["summary"].get("config_supplied") is not True:
+            print("FAIL: config_supplied should be true when --config is used", file=sys.stderr)
+            return 1
+
+        for key in ("summary", "candidates", "curated_candidates",
+                    "covered_candidates", "novel_candidates"):
+            if key not in result:
+                print(f"FAIL: output missing key '{key}'", file=sys.stderr)
+                return 1
+        for key in ("term_count", "sample_count", "candidate_count",
+                    "curated_candidate_count", "covered_candidate_count",
+                    "novel_candidate_count", "config_supplied"):
+            if key not in result["summary"]:
+                print(f"FAIL: summary missing key '{key}'", file=sys.stderr)
+                return 1
+
+        default_out = tmpdir / "default_candidates.json"
+        proc2 = run_miner(source_dir, ["--out", str(default_out)])
+        if proc2.returncode != 0:
+            print("FAIL: default miner failed", file=sys.stderr)
+            return 1
+        default_result = json.loads(default_out.read_text(encoding="utf-8"))
+        if result["summary"]["term_count"] <= default_result["summary"]["term_count"]:
+            print("FAIL: bootstrap config should produce more terms than default",
+                  file=sys.stderr)
+            return 1
+
+        lhs_set = {entry["lhs"] for entry in result["curated_candidates"]}
+        bootstrap_expected = {"(mul x 0:i64)", "(mul x 1:i64)"}
+        for expected_lhs in bootstrap_expected:
+            if expected_lhs not in lhs_set:
+                print(f"FAIL: expected bootstrap candidate '{expected_lhs}' not found",
+                      file=sys.stderr)
+                return 1
+
+    print("PASS: test_mine_dmir_bootstrap_config")
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/tools/test_mine_dmir_novel_rules.py b/tools/test_mine_dmir_novel_rules.py
new file mode 100644
index 000000000..eeda59231
--- /dev/null
+++ b/tools/test_mine_dmir_novel_rules.py
@@ -0,0 +1,105 @@
+#!/usr/bin/env python3
+# Copyright (C) 2025 the DTVM authors. All Rights Reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+import json
+import pathlib
+import subprocess
+import sys
+import tempfile
+
+
+def run_miner(source_dir, extra_args=()):
+    script = pathlib.Path(source_dir) / "tools" / "mine_dmir_seed_rules.py"
+    cmd = [sys.executable, str(script)] + list(extra_args)
+    return subprocess.run(cmd, capture_output=True, text=True)
+
+
+def main():
+    if len(sys.argv) != 2:
+        print(f"Usage: {sys.argv[0]} <source_dir>", file=sys.stderr)
+        return 1
+
+    source_dir = pathlib.Path(sys.argv[1])
+    rules_path = source_dir / "src/compiler/mir/dmir_rewrite_rules.json"
+    bootstrap_config = source_dir / "src/compiler/mir/dmir_rewrite_mining_bootstrap.json"
+
+    if not rules_path.exists():
+        print(f"Rules file not found: {rules_path}", file=sys.stderr)
+        return 1
+
+    with tempfile.TemporaryDirectory() as tmpdir:
+        tmpdir = pathlib.Path(tmpdir)
+        out_path = tmpdir / "novel_candidates.json"
+
+        proc = run_miner(source_dir, ["--rules", str(rules_path), "--out", str(out_path)])
+        if proc.returncode != 0:
+            print("FAIL: miner exited non-zero", file=sys.stderr)
+            print(proc.stderr, file=sys.stderr)
+            return 1
+        try:
+            result = json.loads(out_path.read_text(encoding="utf-8"))
+        except json.JSONDecodeError as exc:
+            print(f"FAIL: output is not valid JSON: {exc}", file=sys.stderr)
+            return 1
+
+        summary = result["summary"]
+
+        if summary["covered_candidate_count"] == 0:
+            print("FAIL: expected some candidates covered by the real rules file",
+                  file=sys.stderr)
+            return 1
+
+        if (summary["covered_candidate_count"] + summary["novel_candidate_count"]
+                != summary["curated_candidate_count"]):
+            print("FAIL: covered + novel != curated", file=sys.stderr)
+            return 1
+
+        if summary["novel_candidate_count"] >= summary["curated_candidate_count"]:
+            print("FAIL: novel_candidate_count should be < curated_candidate_count",
+                  file=sys.stderr)
+            return 1
+
+        covered_lhs_set = {entry["lhs"] for entry in result["covered_candidates"]}
+        if "(add x 0:i64)" not in covered_lhs_set:
+            print("FAIL: '(add x 0:i64)' should appear in covered_candidates", file=sys.stderr)
+            return 1
+
+        for entry in result["novel_candidates"]:
+            if entry.get("covered_by_rule_repo") is not False:
+                print(f"FAIL: novel candidate '{entry.get('lhs')}' has wrong "
+                      "covered_by_rule_repo", file=sys.stderr)
+                return 1
+
+        for entry in result["covered_candidates"]:
+            if entry.get("covered_by_rule_repo") is not True:
+                print(f"FAIL: covered candidate '{entry.get('lhs')}' has wrong "
+                      "covered_by_rule_repo", file=sys.stderr)
+                return 1
+
+        if bootstrap_config.exists():
+            out_path2 = tmpdir / "novel_bootstrap.json"
+            proc2 = run_miner(source_dir, [
+                "--rules", str(rules_path),
+                "--config", str(bootstrap_config),
+                "--out", str(out_path2),
+            ])
+            if proc2.returncode != 0:
+                print("FAIL: miner failed with --rules + --config", file=sys.stderr)
+                print(proc2.stderr, file=sys.stderr)
+                return 1
+            result2 = json.loads(out_path2.read_text(encoding="utf-8"))
+            if result2["summary"]["config_supplied"] is not True:
+                print("FAIL: config_supplied should be true with --config", file=sys.stderr)
+                return 1
+            if result2["summary"]["covered_candidate_count"] == 0:
+                print("FAIL: expected some covered candidates with bootstrap + rules",
+                      file=sys.stderr)
+                return 1
+
+    print("PASS: test_mine_dmir_novel_rules")
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/tools/test_mine_dmir_seed_rules.py b/tools/test_mine_dmir_seed_rules.py
new file mode 100644
index 000000000..4f7c71acf
--- /dev/null
+++ b/tools/test_mine_dmir_seed_rules.py
@@ -0,0 +1,92 @@
+#!/usr/bin/env python3
+# Copyright (C) 2025 the DTVM authors. All Rights Reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+import json
+import pathlib
+import subprocess
+import sys
+import tempfile
+
+
+def run_miner(source_dir, extra_args=()):
+    script = pathlib.Path(source_dir) / "tools" / "mine_dmir_seed_rules.py"
+    cmd = [sys.executable, str(script)] + list(extra_args)
+    return subprocess.run(cmd, capture_output=True, text=True)
+
+
+def main():
+    if len(sys.argv) != 2:
+        print(f"Usage: {sys.argv[0]} <source_dir>", file=sys.stderr)
+        return 1
+
+    source_dir = pathlib.Path(sys.argv[1])
+
+    with tempfile.TemporaryDirectory() as tmpdir:
+        tmpdir = pathlib.Path(tmpdir)
+        out_path = tmpdir / "seed_candidates.json"
+
+        proc = run_miner(source_dir, ["--out", str(out_path)])
+        if proc.returncode != 0:
+            print("FAIL: miner exited non-zero", file=sys.stderr)
+            print(proc.stderr, file=sys.stderr)
+            return 1
+        if not out_path.exists():
+            print("FAIL: output file not created", file=sys.stderr)
+            return 1
+        try:
+            result = json.loads(out_path.read_text(encoding="utf-8"))
+        except json.JSONDecodeError as exc:
+            print(f"FAIL: output is not valid JSON: {exc}", file=sys.stderr)
+            return 1
+
+        for key in ("summary", "candidates", "curated_candidates",
+                    "covered_candidates", "novel_candidates"):
+            if key not in result:
+                print(f"FAIL: output missing top-level key '{key}'", file=sys.stderr)
+                return 1
+
+        summary = result["summary"]
+        for key in ("term_count", "sample_count", "candidate_count",
+                    "curated_candidate_count", "covered_candidate_count",
+                    "novel_candidate_count", "config_supplied"):
+            if key not in summary:
+                print(f"FAIL: summary missing key '{key}'", file=sys.stderr)
+                return 1
+
+        if summary["term_count"] <= 0:
+            print("FAIL: term_count should be > 0", file=sys.stderr)
+            return 1
+        if summary["sample_count"] <= 0:
+            print("FAIL: sample_count should be > 0", file=sys.stderr)
+            return 1
+
+        if summary["covered_candidate_count"] != 0:
+            print("FAIL: covered_candidate_count should be 0 without --rules", file=sys.stderr)
+            return 1
+        if summary["config_supplied"] is not False:
+            print("FAIL: config_supplied should be false without --config", file=sys.stderr)
+            return 1
+
+        for entry in result["curated_candidates"]:
+            for field in ("lhs", "rhs", "cost"):
+                if field not in entry:
+                    print(f"FAIL: candidate entry missing field '{field}'", file=sys.stderr)
+                    return 1
+
+        lhs_set = {entry["lhs"] for entry in result["curated_candidates"]}
+        if "(add x 0:i64)" not in lhs_set:
+            print("FAIL: expected '(add x 0:i64)' in curated candidates", file=sys.stderr)
+            return 1
+
+        if summary["novel_candidate_count"] != summary["curated_candidate_count"]:
+            print("FAIL: without --rules, novel count should equal curated count",
+                  file=sys.stderr)
+            return 1
+
+    print("PASS: test_mine_dmir_seed_rules")
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/tools/test_report_dmir_rewrite_rules.py b/tools/test_report_dmir_rewrite_rules.py
new file mode 100644
index 000000000..e6c4b4ca4
--- /dev/null
+++ b/tools/test_report_dmir_rewrite_rules.py
@@ -0,0 +1,114 @@
+#!/usr/bin/env python3
+# Copyright (C) 2025 the DTVM authors. All Rights Reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+import json
+import pathlib
+import subprocess
+import sys
+import tempfile
+
+
+def run_reporter(source_dir, rules_path, gtest_binary=None, out_path=None):
+    script = pathlib.Path(source_dir) / "tools" / "report_dmir_rewrite_rules.py"
+    cmd = [sys.executable, str(script), "--rules", str(rules_path)]
+    if gtest_binary:
+        cmd += ["--gtest-binary", str(gtest_binary)]
+    if out_path:
+        cmd += ["--out", str(out_path)]
+    return subprocess.run(cmd, capture_output=True, text=True)
+
+
+def main():
+    if len(sys.argv) not in (2, 3):
+        print(f"Usage: {sys.argv[0]} <source_dir> [<dmirValidationTests_binary>]",
+              file=sys.stderr)
+        return 1
+
+    source_dir = pathlib.Path(sys.argv[1])
+    gtest_binary = pathlib.Path(sys.argv[2]) if len(sys.argv) == 3 else None
+    rules_path = source_dir / "src/compiler/mir/dmir_rewrite_rules.json"
+
+    if not rules_path.exists():
+        print(f"Rules file not found: {rules_path}", file=sys.stderr)
+        return 1
+
+    with tempfile.TemporaryDirectory() as tmpdir:
+        tmpdir = pathlib.Path(tmpdir)
+
+        out_path = tmpdir / "report.json"
+        proc = run_reporter(source_dir, rules_path, gtest_binary, out_path)
+        if proc.returncode != 0:
+            print("FAIL: reporter exited non-zero", file=sys.stderr)
+            print(proc.stderr, file=sys.stderr)
+            return 1
+        if not out_path.exists():
+            print("FAIL: output file not created", file=sys.stderr)
+            return 1
+        try:
+            report = json.loads(out_path.read_text(encoding="utf-8"))
+        except json.JSONDecodeError as exc:
+            print(f"FAIL: output is not valid JSON: {exc}", file=sys.stderr)
+            return 1
+
+        for key in ("summary", "rules"):
+            if key not in report:
+                print(f"FAIL: report missing top-level key '{key}'", file=sys.stderr)
+                return 1
+
+        summary = report["summary"]
+        for key in ("rule_count", "status_counts", "mode_counts", "rules_with_missing_coverage"):
+            if key not in summary:
+                print(f"FAIL: summary missing key '{key}'", file=sys.stderr)
+                return 1
+
+        if summary["rule_count"] <= 0:
+            print("FAIL: summary.rule_count must be > 0", file=sys.stderr)
+            return 1
+
+        cost_fields = ("dmir_inst", "select_depth", "adc_chain", "runtime_calls")
+        for entry in report["rules"]:
+            for field in ("name", "status", "inputs", "modes", "cost_delta",
+                          "coverage", "coverage_complete"):
+                if field not in entry:
+                    print(f"FAIL: rule entry missing field '{field}'", file=sys.stderr)
+                    return 1
+            for cost_field in cost_fields:
+                if cost_field not in entry["cost_delta"]:
+                    print(f"FAIL: cost_delta missing field '{cost_field}'", file=sys.stderr)
+                    return 1
+
+        if gtest_binary and summary["rules_with_missing_coverage"] != 0:
+            print("FAIL: real dmir rules have missing coverage according to gtest binary",
+                  file=sys.stderr)
+            return 1
+
+        out_path2 = tmpdir / "report_no_binary.json"
+        proc2 = run_reporter(source_dir, rules_path, None, out_path2)
+        if proc2.returncode != 0:
+            print("FAIL: reporter failed without gtest binary", file=sys.stderr)
+            return 1
+        report2 = json.loads(out_path2.read_text(encoding="utf-8"))
+        for entry in report2["rules"]:
+            for cov in entry.get("coverage", []):
+                if not cov.get("present", True):
+                    print(f"FAIL: coverage entry marked absent without binary: {cov}",
+                          file=sys.stderr)
+                    return 1
+
+        proc3 = run_reporter(source_dir, rules_path, None, None)
+        if proc3.returncode != 0:
+            print("FAIL: reporter failed when writing to stdout", file=sys.stderr)
+            return 1
+        try:
+            json.loads(proc3.stdout)
+        except json.JSONDecodeError as exc:
+            print(f"FAIL: stdout is not valid JSON: {exc}", file=sys.stderr)
+            return 1
+
+    print("PASS: test_report_dmir_rewrite_rules")
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/tools/test_report_x86_cg_peephole_validation.py b/tools/test_report_x86_cg_peephole_validation.py
new file mode 100644
index 000000000..7994fb77a
--- /dev/null
+++ b/tools/test_report_x86_cg_peephole_validation.py
@@ -0,0 +1,109 @@
+#!/usr/bin/env python3
+# Copyright (C) 2025 the DTVM authors. All Rights Reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+import json
+import pathlib
+import subprocess
+import sys
+import tempfile
+
+
+def run_reporter(source_dir, rules_path, gtest_binary=None, out_path=None):
+    script = pathlib.Path(source_dir) / "tools" / "report_x86_cg_peephole_validation.py"
+    cmd = [sys.executable, str(script), "--rules", str(rules_path)]
+    if gtest_binary:
+        cmd += ["--gtest-binary", str(gtest_binary)]
+    if out_path:
+        cmd += ["--out", str(out_path)]
+    return subprocess.run(cmd, capture_output=True, text=True)
+
+
+def main():
+    if len(sys.argv) not in (2, 3):
+        print(f"Usage: {sys.argv[0]} <source_dir> [<x86CgPeepholeTests_binary>]",
+              file=sys.stderr)
+        return 1
+
+    source_dir = pathlib.Path(sys.argv[1])
+    gtest_binary = pathlib.Path(sys.argv[2]) if len(sys.argv) == 3 else None
+    rules_path = source_dir / "src/compiler/target/x86/x86_cg_peephole_rules.json"
+
+    if not rules_path.exists():
+        print(f"Rules file not found: {rules_path}", file=sys.stderr)
+        return 1
+
+    with tempfile.TemporaryDirectory() as tmpdir:
+        tmpdir = pathlib.Path(tmpdir)
+
+        out_path = tmpdir / "report.json"
+        proc = run_reporter(source_dir, rules_path, gtest_binary, out_path)
+        if proc.returncode != 0:
+            print("FAIL: reporter exited non-zero", file=sys.stderr)
+            print(proc.stderr, file=sys.stderr)
+            return 1
+        if not out_path.exists():
+            print("FAIL: output file not created", file=sys.stderr)
+            return 1
+        try:
+            report = json.loads(out_path.read_text(encoding="utf-8"))
+        except json.JSONDecodeError as exc:
+            print(f"FAIL: output is not valid JSON: {exc}", file=sys.stderr)
+            return 1
+
+        for key in ("summary", "rules"):
+            if key not in report:
+                print(f"FAIL: report missing top-level key '{key}'", file=sys.stderr)
+                return 1
+
+        summary = report["summary"]
+        for key in ("rule_count", "stage_counts", "mode_counts", "rules_with_missing_coverage"):
+            if key not in summary:
+                print(f"FAIL: summary missing key '{key}'", file=sys.stderr)
+                return 1
+
+        if summary["rule_count"] <= 0:
+            print("FAIL: summary.rule_count must be > 0", file=sys.stderr)
+            return 1
+
+        for entry in report["rules"]:
+            for field in ("name", "stage", "priority", "modes", "coverage", "coverage_complete"):
+                if field not in entry:
+                    print(f"FAIL: rule entry missing field '{field}'", file=sys.stderr)
+                    return 1
+
+        out_path2 = tmpdir / "report_no_binary.json"
+        proc2 = run_reporter(source_dir, rules_path, None, out_path2)
+        if proc2.returncode != 0:
+            print("FAIL: reporter failed without gtest binary", file=sys.stderr)
+            return 1
+        report2 = json.loads(out_path2.read_text(encoding="utf-8"))
+        for entry in report2["rules"]:
+            for cov in entry.get("coverage", []):
+                if not cov.get("present", True):
+                    print(f"FAIL: coverage entry marked absent without binary: {cov}",
+                          file=sys.stderr)
+                    return 1
+
+        if gtest_binary:
+            if report["summary"]["rules_with_missing_coverage"] != 0:
+                print("FAIL: real rules have missing coverage according to gtest binary",
+                      file=sys.stderr)
+                return 1
+
+        proc3 = run_reporter(source_dir, rules_path, None, None)
+        if proc3.returncode != 0:
+            print("FAIL: reporter failed when writing to stdout", file=sys.stderr)
+            return 1
+        try:
+            json.loads(proc3.stdout)
+        except json.JSONDecodeError as exc:
+            print(f"FAIL: stdout is not valid JSON: {exc}", file=sys.stderr)
+            return 1
+
+    print("PASS: test_report_x86_cg_peephole_validation")
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/tools/test_update_compiler_pass_timing_budget.py b/tools/test_update_compiler_pass_timing_budget.py
new file mode 100644
index 000000000..2f6856adb
--- /dev/null
+++ b/tools/test_update_compiler_pass_timing_budget.py
@@ -0,0 +1,253 @@
+#!/usr/bin/env python3
+# Copyright (C) 2025 the DTVM authors. All Rights Reserved.
+# SPDX-License-Identifier: Apache-2.0
+"""Test wrapper for update_compiler_pass_timing_budget.py.
+
+Called by CMakeLists.txt as:
+    test_update_compiler_pass_timing_budget.py <source_dir>
+
+Runs the updater with a synthetic timing report and verifies that the output
+budget JSON has the required structure.  No dtvm binary is needed.
+"""
+
+import json
+import pathlib
+import subprocess
+import sys
+import tempfile
+
+CASE_NAMES = [
+    "add",
+    "mul",
+    "div",
+    "shl",
+    "shr",
+    "sar",
+    "byte",
+    "eq_true",
+    "lt_true",
+    "jump",
+    "u256_shl_add_mul",
+    "u256_mul_add_chain",
+    "u256_shr_add_shl",
+    "bool_and_or_xor_not",
+    "bool_xor_not_chain",
+]
+
+PASS_NAME = "x86_cg_peephole"
+TOTAL_TIME_MS = 1.0
+PASS_TIME_MS = 0.002
+PASS_SHARE_PCT = 0.2
+
+def make_phase_stats(time_ms, share_pct):
+    return {
+        "mean": time_ms,
+        "median": time_ms,
+        "p95": time_ms,
+        "min": time_ms,
+        "max": time_ms,
+        "share_of_total_pct": {
+            "mean": share_pct,
+            "median": share_pct,
+            "p95": share_pct,
+            "min": share_pct,
+            "max": share_pct,
+        },
+    }
+
+def make_case_summary(total_time_ms, pass_name, pass_time_ms, pass_share_pct):
+    return {
+        "total_time_ms": {"mean": total_time_ms, "median": total_time_ms},
+        "phases": {
+            pass_name: make_phase_stats(pass_time_ms, pass_share_pct),
+        },
+        "runs": 1,
+        "record_count": 1,
+    }
+
+def build_synthetic_report(manifest_path):
+    cases = []
+    for name in CASE_NAMES:
+        cases.append(
+            {
+                "name": name,
+                "input": f"/synthetic/{name}.evm.hex",
+                "summary": make_case_summary(
+                    TOTAL_TIME_MS, PASS_NAME, PASS_TIME_MS, PASS_SHARE_PCT
+                ),
+            }
+        )
+
+    overall_summary = make_case_summary(
+        TOTAL_TIME_MS, PASS_NAME, PASS_TIME_MS, PASS_SHARE_PCT
+    )
+    overall_summary["runs"] = 1
+    overall_summary["record_count"] = len(CASE_NAMES)
+
+    return {
+        "manifest": str(manifest_path),
+        "case_count": len(CASE_NAMES),
+        "cases": cases,
+        "overall": overall_summary,
+    }
+
+def main():
+    if len(sys.argv) != 2:
+        print(
+            "usage: test_update_compiler_pass_timing_budget.py <source_dir>",
+            file=sys.stderr,
+        )
+        return 1
+
+    source_dir = pathlib.Path(sys.argv[1]).resolve()
+    updater = source_dir / "tools" / "update_compiler_pass_timing_budget.py"
+
+    if not updater.exists():
+        print(f"updater not found: {updater}", file=sys.stderr)
+        return 1
+
+    with tempfile.TemporaryDirectory() as tmp_dir:
+        tmp = pathlib.Path(tmp_dir)
+
+        manifest_path = (
+            source_dir / "tests" / "evm_asm" / "compiler_pass_timing_manifest.json"
+        )
+        report = build_synthetic_report(manifest_path)
+
+        report_path = tmp / "timing_report.json"
+        report_path.write_text(json.dumps(report, indent=2), encoding="utf-8")
+
+        output_path = tmp / "budget_out.json"
+
+        cmd = [
+            sys.executable,
+            str(updater),
+            "--report",
+            str(report_path),
+            "--out",
+            str(output_path),
+            "--target-pass",
+            PASS_NAME,
+            "--runs",
+            "1",
+            "--compile-mode",
+            "compile-only",
+            "--threshold-status",
+            "provisional",
+        ]
+
+        result = subprocess.run(
+            cmd,
+            capture_output=True,
+            text=True,
+            check=False,
+        )
+
+        if result.returncode != 0:
+            print(result.stderr, file=sys.stderr)
+            print(
+                f"FAIL: test_update_compiler_pass_timing_budget — updater exited with "
+                f"code {result.returncode}",
+                file=sys.stderr,
+            )
+            return 1
+
+        if not output_path.exists():
+            print(
+                "FAIL: test_update_compiler_pass_timing_budget — output JSON was not written",
+                file=sys.stderr,
+            )
+            return 1
+
+        try:
+            budget = json.loads(output_path.read_text(encoding="utf-8"))
+        except json.JSONDecodeError as exc:
+            print(
+                f"FAIL: test_update_compiler_pass_timing_budget — invalid JSON: {exc}",
+                file=sys.stderr,
+            )
+            return 1
+
+        # Verify required top-level keys
+        for field in ("version", "target_pass", "thresholds", "baseline", "metadata"):
+            if field not in budget:
+                print(
+                    f"FAIL: test_update_compiler_pass_timing_budget — missing field "
+                    f"'{field}'",
+                    file=sys.stderr,
+                )
+                return 1
+
+        # Verify target_pass recorded correctly
+        if budget["target_pass"] != PASS_NAME:
+            print(
+                f"FAIL: test_update_compiler_pass_timing_budget — target_pass mismatch: "
+                f"expected '{PASS_NAME}', got '{budget['target_pass']}'",
+                file=sys.stderr,
+            )
+            return 1
+
+        # Verify baseline structure
+        baseline = budget["baseline"]
+        for field in ("overall_total_time_ms_median", "case_total_time_ms_median"):
+            if field not in baseline:
+                print(
+                    f"FAIL: test_update_compiler_pass_timing_budget — baseline missing "
+                    f"field '{field}'",
+                    file=sys.stderr,
+                )
+                return 1
+
+        # Verify all cases are present in the baseline
+        case_baselines = baseline["case_total_time_ms_median"]
+        for name in CASE_NAMES:
+            if name not in case_baselines:
+                print(
+                    f"FAIL: test_update_compiler_pass_timing_budget — baseline missing "
+                    f"case '{name}'",
+                    file=sys.stderr,
+                )
+                return 1
+
+        # Verify overall baseline value matches the synthetic report
+        expected_overall = TOTAL_TIME_MS  # synthetic median
+        if abs(baseline["overall_total_time_ms_median"] - expected_overall) > 1e-9:
+            print(
+                f"FAIL: test_update_compiler_pass_timing_budget — overall baseline "
+                f"{baseline['overall_total_time_ms_median']} != expected {expected_overall}",
+                file=sys.stderr,
+            )
+            return 1
+
+        # Verify thresholds keys are present
+        thresholds = budget["thresholds"]
+        for key in (
+            "max_pass_share_p95_pct",
+            "max_pass_time_p95_ms",
+            "max_overall_total_time_regression_pct",
+            "max_case_total_time_regression_pct",
+        ):
+            if key not in thresholds:
+                print(
+                    f"FAIL: test_update_compiler_pass_timing_budget — thresholds "
+                    f"missing key '{key}'",
+                    file=sys.stderr,
+                )
+                return 1
+
+        # Verify metadata
+        metadata = budget["metadata"]
+        for key in ("compile_mode", "thresholds_status", "runs"):
+            if key not in metadata:
+                print(
+                    f"FAIL: test_update_compiler_pass_timing_budget — metadata "
+                    f"missing key '{key}'",
+                    file=sys.stderr,
+                )
+                return 1
+
+    print("PASS: test_update_compiler_pass_timing_budget")
+    return 0
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/tools/test_x86_cg_peephole_generator.py b/tools/test_x86_cg_peephole_generator.py
new file mode 100644
index 000000000..57d287105
--- /dev/null
+++ b/tools/test_x86_cg_peephole_generator.py
@@ -0,0 +1,108 @@
+#!/usr/bin/env python3
+# Copyright (C) 2025 the DTVM authors. All Rights Reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+import json
+import pathlib
+import subprocess
+import sys
+import tempfile
+
+
+def run_generator(rules_path, out_inc, out_report, source_dir):
+    script = pathlib.Path(source_dir) / "tools" / "generate_x86_cg_peephole.py"
+    proc = subprocess.run(
+        [sys.executable, str(script),
+         "--rules", str(rules_path),
+         "--out-inc", str(out_inc),
+         "--out-report", str(out_report)],
+        capture_output=True,
+        text=True,
+    )
+    return proc
+
+
+def main():
+    if len(sys.argv) != 2:
+        print(f"Usage: {sys.argv[0]} <source_dir>", file=sys.stderr)
+        return 1
+
+    source_dir = pathlib.Path(sys.argv[1])
+    rules_path = source_dir / "src/compiler/target/x86/x86_cg_peephole_rules.json"
+
+    if not rules_path.exists():
+        print(f"Rules file not found: {rules_path}", file=sys.stderr)
+        return 1
+
+    with tempfile.TemporaryDirectory() as tmpdir:
+        tmpdir = pathlib.Path(tmpdir)
+
+        out_inc = tmpdir / "generated.inc"
+        out_report = tmpdir / "report.txt"
+        proc = run_generator(rules_path, out_inc, out_report, source_dir)
+        if proc.returncode != 0:
+            print(f"FAIL: generator exited {proc.returncode} on valid rules", file=sys.stderr)
+            print(proc.stderr, file=sys.stderr)
+            return 1
+        if not out_inc.exists() or out_inc.stat().st_size == 0:
+            print("FAIL: generated .inc file is missing or empty", file=sys.stderr)
+            return 1
+        inc_text = out_inc.read_text(encoding="utf-8")
+        for marker in [
+            "// Copyright (C) 2025 the DTVM authors",
+            "GeneratedInstructionRuleResult",
+            "tryGeneratedInstructionRules",
+            "tryGeneratedBlockEndRules",
+            "namespace {",
+        ]:
+            if marker not in inc_text:
+                print(f"FAIL: generated .inc missing expected marker: {marker!r}", file=sys.stderr)
+                return 1
+        if not out_report.exists():
+            print("FAIL: report file was not created", file=sys.stderr)
+            return 1
+        report_text = out_report.read_text(encoding="utf-8")
+        if "No conflicts detected." not in report_text:
+            print("FAIL: report does not say 'No conflicts detected.'", file=sys.stderr)
+            print(report_text, file=sys.stderr)
+            return 1
+
+        conflict_rules = {
+            "version": 1,
+            "rules": [
+                {
+                    "name": "rule-a",
+                    "stage": "instruction",
+                    "priority": 100,
+                    "pattern": [{"bind": "I", "opcode": "MOV64rr"}],
+                    "action": {"erase": ["I"]},
+                },
+                {
+                    "name": "rule-b",
+                    "stage": "instruction",
+                    "priority": 100,
+                    "pattern": [{"bind": "I", "opcode": "MOV64rr"}],
+                    "action": {"erase": ["I"]},
+                },
+            ],
+        }
+        conflict_rules_path = tmpdir / "conflict_rules.json"
+        conflict_rules_path.write_text(json.dumps(conflict_rules), encoding="utf-8")
+        out_inc2 = tmpdir / "generated2.inc"
+        out_report2 = tmpdir / "report2.txt"
+        proc2 = run_generator(conflict_rules_path, out_inc2, out_report2, source_dir)
+        if proc2.returncode == 0:
+            print("FAIL: generator should exit 1 for conflicting rules", file=sys.stderr)
+            return 1
+        if out_report2.exists():
+            report2_text = out_report2.read_text(encoding="utf-8")
+            if "Conflicts:" not in report2_text:
+                print("FAIL: conflict report does not mention 'Conflicts:'", file=sys.stderr)
+                return 1
+
+    print("PASS: test_x86_cg_peephole_generator")
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/tools/test_x86_cg_peephole_validation.py b/tools/test_x86_cg_peephole_validation.py
new file mode 100644
index 000000000..204f23c3a
--- /dev/null
+++ b/tools/test_x86_cg_peephole_validation.py
@@ -0,0 +1,108 @@
+#!/usr/bin/env python3
+# Copyright (C) 2025 the DTVM authors. All Rights Reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+import json
+import pathlib
+import subprocess
+import sys
+import tempfile
+
+
+def run_checker(source_dir, rules_path, gtest_binary=None):
+    script = pathlib.Path(source_dir) / "tools" / "check_x86_cg_peephole_validation.py"
+    cmd = [sys.executable, str(script), "--rules", str(rules_path)]
+    if gtest_binary:
+        cmd += ["--gtest-binary", str(gtest_binary)]
+    return subprocess.run(cmd, capture_output=True, text=True)
+
+
+def main():
+    if len(sys.argv) not in (2, 3):
+        print(f"Usage: {sys.argv[0]} <source_dir> [<x86CgPeepholeTests_binary>]",
+              file=sys.stderr)
+        return 1
+
+    source_dir = pathlib.Path(sys.argv[1])
+    gtest_binary = pathlib.Path(sys.argv[2]) if len(sys.argv) == 3 else None
+    rules_path = source_dir / "src/compiler/target/x86/x86_cg_peephole_rules.json"
+
+    if not rules_path.exists():
+        print(f"Rules file not found: {rules_path}", file=sys.stderr)
+        return 1
+
+    proc = run_checker(source_dir, rules_path, gtest_binary)
+    if proc.returncode != 0:
+        print("FAIL: checker failed on real rules file", file=sys.stderr)
+        print(proc.stderr, file=sys.stderr)
+        return 1
+    if "x86 cg peephole validation metadata is complete" not in proc.stdout:
+        print("FAIL: expected success message not in stdout", file=sys.stderr)
+        print(proc.stdout, file=sys.stderr)
+        return 1
+
+    if gtest_binary:
+        proc2 = run_checker(source_dir, rules_path, None)
+        if proc2.returncode != 0:
+            print("FAIL: checker failed without gtest binary", file=sys.stderr)
+            print(proc2.stderr, file=sys.stderr)
+            return 1
+
+    with tempfile.TemporaryDirectory() as tmpdir:
+        tmpdir = pathlib.Path(tmpdir)
+
+        bad_rules = {
+            "rules": [
+                {
+                    "name": "no-validation-rule",
+                    "stage": "instruction",
+                    "priority": 100,
+                    "pattern": [{"bind": "I", "opcode": "MOV64rr"}],
+                    "action": {"erase": ["I"]},
+                }
+            ]
+        }
+        bad_path = tmpdir / "bad_rules.json"
+        bad_path.write_text(json.dumps(bad_rules), encoding="utf-8")
+        proc3 = run_checker(source_dir, bad_path, None)
+        if proc3.returncode == 0:
+            print("FAIL: checker should fail on rule missing validation", file=sys.stderr)
+            return 1
+        if "missing validation metadata" not in proc3.stderr:
+            print("FAIL: expected error about missing validation metadata", file=sys.stderr)
+            print(proc3.stderr, file=sys.stderr)
+            return 1
+
+        structural_only = {
+            "rules": [
+                {
+                    "name": "structural-only-rule",
+                    "stage": "instruction",
+                    "priority": 100,
+                    "pattern": [{"bind": "I", "opcode": "MOV64rr"}],
+                    "action": {"erase": ["I"]},
+                    "validation": {
+                        "modes": ["structural"],
+                        "coverage": ["SomeSuite.SomeTest"],
+                    },
+                }
+            ]
+        }
+        structural_path = tmpdir / "structural_only.json"
+        structural_path.write_text(json.dumps(structural_only), encoding="utf-8")
+        proc4 = run_checker(source_dir, structural_path, None)
+        if proc4.returncode == 0:
+            print("FAIL: checker should fail on instruction rule with only structural mode",
+                  file=sys.stderr)
+            return 1
+        if "execution or semantics_model" not in proc4.stderr:
+            print("FAIL: expected error about execution or semantics_model", file=sys.stderr)
+            print(proc4.stderr, file=sys.stderr)
+            return 1
+
+    print("PASS: test_x86_cg_peephole_validation")
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/tools/update_compiler_pass_timing_budget.py b/tools/update_compiler_pass_timing_budget.py
new file mode 100644
index 000000000..038248975
--- /dev/null
+++ b/tools/update_compiler_pass_timing_budget.py
@@ -0,0 +1,127 @@
+#!/usr/bin/env python3
+
+import argparse
+import json
+import pathlib
+
+
+DEFAULT_THRESHOLDS = {
+    "max_pass_share_p95_pct": 2.0,
+    "max_pass_time_p95_ms": 0.05,
+    "max_overall_total_time_regression_pct": 15.0,
+    "max_case_total_time_regression_pct": 20.0,
+}
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description="Refresh compiler pass timing budget baselines from a timing report."
+    )
+    parser.add_argument("--report", required=True, help="Timing report JSON path")
+    parser.add_argument("--out", required=True, help="Budget JSON output path")
+    parser.add_argument(
+        "--budget-in",
+        help="Existing budget JSON to preserve thresholds and metadata fields",
+    )
+    parser.add_argument(
+        "--rules",
+        help="Optional rule JSON path used to refresh the recorded rule count",
+    )
+    parser.add_argument(
+        "--target-pass",
+        default="x86_cg_peephole",
+        help="Pass name recorded in the budget file",
+    )
+    parser.add_argument("--manifest", help="Manifest path to record in metadata")
+    parser.add_argument("--runs", type=int, help="Run count to record in metadata")
+    parser.add_argument(
+        "--num-extra-compilations",
+        type=int,
+        help="Extra compilation count used during collection",
+    )
+    parser.add_argument(
+        "--compile-mode",
+        default="compile-only",
+        help="Compile mode label recorded in metadata",
+    )
+    parser.add_argument(
+        "--threshold-status",
+        default="provisional",
+        help="Threshold status label recorded in metadata",
+    )
+    return parser.parse_args()
+
+
+def load_json(path):
+    with pathlib.Path(path).open("r", encoding="utf-8") as f:
+        return json.load(f)
+
+
+def count_rules(path):
+    return len(load_json(path).get("rules", []))
+
+
+def normalize_thresholds(thresholds):
+    if not thresholds:
+        return dict(DEFAULT_THRESHOLDS)
+
+    normalized = dict(thresholds)
+    if "max_pass_share_p95_pct" not in normalized:
+        normalized["max_pass_share_p95_pct"] = normalized.pop(
+            "max_pass_share_of_total_pct", DEFAULT_THRESHOLDS["max_pass_share_p95_pct"]
+        )
+    if "max_pass_time_p95_ms" not in normalized:
+        normalized["max_pass_time_p95_ms"] = normalized.pop(
+            "max_pass_time_ms", DEFAULT_THRESHOLDS["max_pass_time_p95_ms"]
+        )
+    return normalized
+
+
+def main():
+    args = parse_args()
+    report = load_json(args.report)
+    prior_budget = load_json(args.budget_in) if args.budget_in else {}
+
+    thresholds = normalize_thresholds(prior_budget.get("thresholds"))
+    case_baselines = {}
+    for case in report.get("cases", []):
+        case_baselines[case["name"]] = case["summary"]["total_time_ms"]["median"]
+
+    metadata = dict(prior_budget.get("metadata", {}))
+    if args.manifest:
+        metadata["manifest"] = args.manifest
+    elif "manifest" in report:
+        metadata["manifest"] = report["manifest"]
+    if args.runs is not None:
+        metadata["runs"] = args.runs
+    elif "runs" in metadata:
+        metadata["runs"] = metadata["runs"]
+    if args.num_extra_compilations is not None:
+        metadata["num_extra_compilations"] = args.num_extra_compilations
+    if args.rules:
+        metadata["rule_count"] = count_rules(args.rules)
+    metadata["compile_mode"] = args.compile_mode
+    metadata["thresholds_status"] = args.threshold_status
+
+    budget = {
+        "version": 1,
+        "target_pass": args.target_pass,
+        "thresholds": thresholds,
+        "baseline": {
+            "overall_total_time_ms_median": report["overall"]["total_time_ms"][
+                "median"
+            ],
+            "case_total_time_ms_median": case_baselines,
+        },
+        "metadata": metadata,
+    }
+
+    pathlib.Path(args.out).write_text(
+        json.dumps(budget, indent=2) + "\n", encoding="utf-8"
+    )
+    print(json.dumps(budget, indent=2))
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())