diff --git a/.github/workflows/dtvm_evm_test_x86.yml b/.github/workflows/dtvm_evm_test_x86.yml index b34e26a32..2fedefedf 100644 --- a/.github/workflows/dtvm_evm_test_x86.yml +++ b/.github/workflows/dtvm_evm_test_x86.yml @@ -487,3 +487,86 @@ jobs: run: | echo "::error::Performance regression detected in ${{ matrix.mode }} mode. See logs for details." exit 1 + + peephole_validation_and_timing_budget: + name: Peephole Validation and Timing Budget Check + runs-on: ubuntu-latest + container: + image: dtvmdev1/dtvm-dev-x64:main + steps: + - name: Check out code + uses: actions/checkout@v3 + with: + submodules: "true" + + - name: Build dtvm and x86CgPeepholeTests + run: | + export LLVM_SYS_150_PREFIX=/opt/llvm15 + export LLVM_DIR=$LLVM_SYS_150_PREFIX/lib/cmake/llvm + export PATH=$LLVM_SYS_150_PREFIX/bin:$PATH + cmake -S . -B build \ + -DCMAKE_BUILD_TYPE=Release \ + -DZEN_ENABLE_SINGLEPASS_JIT=OFF \ + -DZEN_ENABLE_MULTIPASS_JIT=ON \ + -DZEN_ENABLE_EVM=ON \ + -DZEN_ENABLE_SPEC_TEST=ON \ + -DZEN_ENABLE_CPU_EXCEPTION=ON \ + -DZEN_ENABLE_VIRTUAL_STACK=ON + cmake --build build --target dtvm --target x86CgPeepholeTests --target dmirValidationTests -j$(nproc) + bash tools/easm2bytecode.sh tests/evm_asm tests/evm_asm + + - name: Verify .inc generator output is up-to-date + run: | + python tools/generate_x86_cg_peephole.py \ + --rules src/compiler/target/x86/x86_cg_peephole_rules.json \ + --out-inc /tmp/x86_cg_peephole_generated_check.inc \ + --out-report /tmp/x86_cg_peephole_report_check.txt + diff /tmp/x86_cg_peephole_generated_check.inc \ + build/src/compiler/generated/target/x86/x86_cg_peephole_generated.inc + + - name: Run peephole rule validation check + run: | + python tools/check_x86_cg_peephole_validation.py \ + --rules src/compiler/target/x86/x86_cg_peephole_rules.json \ + --gtest-binary build/x86CgPeepholeTests + + - name: Run dmir rewrite validation tests + run: ./build/dmirValidationTests + + - name: Collect compiler pass timings + run: | + python tools/collect_compiler_pass_timings.py \ + --dtvm build/dtvm \ + --manifest tests/evm_asm/compiler_pass_timing_manifest.json \ + --runs 5 \ + --output /tmp/ci_timing_report.json \ + -- --format evm --mode multipass --compile-only + + - name: Refresh timing budgets from CI data + run: | + python tools/update_compiler_pass_timing_budget.py \ + --report /tmp/ci_timing_report.json \ + --out /tmp/ci_budget_x86_cg_peephole.json \ + --budget-in tests/evm_asm/compiler_pass_timing_budget_x86_cg_peephole.json \ + --target-pass x86_cg_peephole \ + --manifest tests/evm_asm/compiler_pass_timing_manifest.json \ + --runs 5 + python tools/update_compiler_pass_timing_budget.py \ + --report /tmp/ci_timing_report.json \ + --out /tmp/ci_budget_dmir_rewrite.json \ + --budget-in tests/evm_asm/compiler_pass_timing_budget_dmir_rewrite.json \ + --target-pass dmir_rewrite \ + --manifest tests/evm_asm/compiler_pass_timing_manifest.json \ + --runs 5 + + - name: Check timing budget (x86_cg_peephole) + run: | + python tools/check_compiler_pass_timing_budget.py \ + --budget /tmp/ci_budget_x86_cg_peephole.json \ + --report /tmp/ci_timing_report.json + + - name: Check timing budget (dmir_rewrite) + run: | + python tools/check_compiler_pass_timing_budget.py \ + --budget /tmp/ci_budget_dmir_rewrite.json \ + --report /tmp/ci_timing_report.json diff --git a/docs/changes/2026-03-30-peephole-optimization-system/README.md b/docs/changes/2026-03-30-peephole-optimization-system/README.md new file mode 100644 index 000000000..44d2d6c3f --- /dev/null +++ b/docs/changes/2026-03-30-peephole-optimization-system/README.md @@ -0,0 +1,70 @@ +# Change: Peephole Optimization System for dMIR and x86 CgIR + +- **Status**: Implemented +- **Date**: 2026-03-30 +- **Tier**: Full + +## Overview + +A two-level peephole optimization system targeting both dMIR (mid-level IR) and x86 CgIR (code generation IR). The dMIR level has 65 accepted rewrite rules (plus 5 seed rules) covering identity elimination, boolean algebra, shift-zero, and carry-dead rewrites. The x86 CgIR level has 13 declarative JSON rules for self-moves, zero-shifts, redundant CMP/TEST, fallthrough branches, and setcc+test+jne chain folding. Includes Z3-verified synthesized rules and a CI validation gate. + +## Motivation + +The JIT compiler generated redundant instructions from mechanical U256 decomposition and lowering. Peephole optimization is a standard compiler technique to clean up such patterns without restructuring the pipeline. The two-level approach catches patterns at both the IR and machine code level. + +## Impact + +### Affected Modules + +- `docs/modules/compiler/` — new dMIR rewrite pass, carry-dead analysis, rule table infrastructure +- `docs/modules/singlepass/` — x86 CgIR peephole pass +- CI pipeline — new `peephole_validation_and_timing_budget` job + +### Affected Contracts + +No API or interface changes. + +### Compatibility + +- No breaking changes +- +4.6% geomean improvement on evmone-bench (27 benchmarks) +- Notable wins: snailtracer +3.9%, structarray_alloc +4.1%, swap_math +5.0-5.8%, memory_grow_mstore +11-13% +- ~0.005ms p95 compile overhead from dMIR rewrite pass + +## Implementation Plan + +### Phase 1: dMIR Rewrite Infrastructure + +- [x] Pattern matching framework +- [x] Rule table +- [x] Validation tests + +### Phase 2: Carry-Dead Analysis + +- [x] `isCarryDead()` for adc→add and sbb→sub rewrites on dead-carry limbs + +### Phase 3: Z3-Synthesized Rules + +- [x] `add(x,x)→shl(x,1)`, negation folding, boolean identities +- [x] Verified via `tools/synthesize_dmir_rules.py` + +### Phase 4: x86 CgIR Peephole + +- [x] 13 declarative JSON rules +- [x] Pattern matching on machine instructions + +### Phase 5: CI Gate + +- [x] `.inc` freshness check +- [x] Structural/execution/semantics validation +- [x] Compile-time budget enforcement + +## Compatibility Notes + +No backwards-incompatible changes. The optimization passes are additive and do not alter any external APIs or module interfaces. + +## Risks + +- Rewrite rules must preserve U256 semantics exactly; all rules are Z3-verified but edge cases in carry chain analysis could theoretically miss a case +- Compile-time budget (0.005ms p95) may need adjustment as more rules are added +- JSON rule format for x86 CgIR is a new abstraction layer that adds maintenance surface diff --git a/docs/compiler/dmir_to_x86_mapping.md b/docs/compiler/dmir_to_x86_mapping.md new file mode 100644 index 000000000..3cf1703a4 --- /dev/null +++ b/docs/compiler/dmir_to_x86_mapping.md @@ -0,0 +1,86 @@ + + +# dMIR To CgIR/x86 Mapping + +## Scope + +This note records the lowering bridge for the dMIR arithmetic subset that the +offline rewrite pipeline currently touches, plus the safe subset already wired +into the production dMIR rewrite pass: + +- integer `add/sub` +- `cmp` +- `select` +- `adc/sbb` +- EVM 64x64->128 multiplication helpers +- EVM 128/64 division helpers + +Phase 1 keeps the production DSL at `CgIR/x86`, so every dMIR-side candidate +rule eventually has to be translated into the instruction families emitted by +`X86CgLowering`. + +## Current Production Status + +`JITCompilerBase::compileMIRToCgIR()` now runs a tree-local `DMirRewritePass` +after `dead_mbb_elim` and before x86 lowering. The pass currently applies only +a conservative in-code subset of accepted rules whose replacements are either +existing subtrees, typed integer constants, or small synthesized boolean +expressions, for example: + +- `add/sub/or/xor/shift` identities with zero +- `and` identities with zero or all-ones +- `not(not x) => x` +- `select(cond, x, x) => x` +- complement folds such as `or((not x), x) => allones` +- boolean factoring such as `xor((and x y), (xor x y)) => (or x y)` + +`adc` and `sbb` candidates remain validation-only: the explicit third operand +is visible in dMIR, but rewriting them safely still requires carry/borrow-chain +proof beyond the current structural pass. + +## Mapping Table + +| dMIR expression family | Lowering entrypoint | CgIR/x86 family | Bridge notes | +| --- | --- | --- | --- | +| `add`, `sub` | generic FastISel path in `CgLowering` plus `X86GenFastISel.inc` (see `src/compiler/target/x86/x86lowering.h`) | `ADD*rr/ri`, `SUB*rr/ri` | This path is table-driven, not hand-written in `x86lowering.cpp`. The exact register/immediate form depends on operand materialization. | +| `cmp` | `X86CgLowering::lowerCmpExpr()` in `src/compiler/target/x86/x86lowering.cpp` | compare op (`CMP*` or `TEST*`) + `SETCCr` + optional `MOVZX32rr8` | Integer compare results become 8-bit condition materialization first, then widen to i32/i64. This is the source-side pattern behind the existing `SETCCr/TEST8rr/JCC_1` peephole fold. | +| `select` | `X86CgLowering::lowerSelectExpr()` in `src/compiler/target/x86/x86lowering.cpp` | integer: `CMOV*`; floating-point: conditional branch + `COPY` | Integer `select` survives as a recognizable dataflow choice. Floating-point `select` is lowered into control flow and loses the direct value-select shape. | +| `adc` | `X86CgLowering::lowerAdcExpr()` in `src/compiler/target/x86/x86lowering.cpp` | `ADC8rr`, `ADC16rr`, `ADC32rr`, `ADC64rr` | The carry operand is not reified in x86 CgIR. Lowering asserts that operand 2 is the constant zero and then consumes the hardware `CF` chain directly. Any dMIR-side analysis that depends on the explicit third operand being zero must therefore happen before lowering. That alone does not justify rewriting `adc(lhs, rhs, 0)` into `add(lhs, rhs)` inside an EVM carry chain. | +| `sbb` | `X86CgLowering::lowerSbbExpr()` in `src/compiler/target/x86/x86lowering.cpp` | `SBB8rr`, `SBB16rr`, `SBB32rr`, `SBB64rr` | Same information-loss caveat as `adc`: x86 CgIR only preserves the borrow-consuming instruction, not the explicit third operand from dMIR. The zero-borrow precondition can be checked only before lowering, but borrow-chain safety still has to be established separately. | +| `evm_umul128_lo`, `evm_umul128_hi` | `X86CgLowering::lowerEvmUmul128Expr()` and `lowerEvmUmul128HiExpr()` in `src/compiler/target/x86/x86lowering.cpp` | `COPY -> RAX`, `MUL64r`, `COPY RAX`, optional `COPY RDX` | The low half is always materialized from `RAX`. The high half exists only when an `evm_umul128_hi` user is present; lowering pre-scans the function and allocates the extra copy lazily. | +| `evm_udiv128_by64`, `evm_urem128_by64` | `X86CgLowering::lowerEvmUdiv128By64Expr()` and `lowerEvmUrem128By64Expr()` in `src/compiler/target/x86/x86lowering.cpp` | `COPY -> RDX`, `COPY -> RAX`, `DIV64r`, `COPY RAX`, `COPY RDX` | Quotient and remainder are split across `RAX` and `RDX`. As with `umul128`, the helper pair lowers to one x86 instruction plus explicit register copies. | + +## Translation Rules For The Current Seed Set + +The current seed dMIR candidate file lives at +`src/compiler/mir/dmir_rewrite_rules.json`. For Phase 1 option A, these rules +translate into x86-facing families as follows: + +| dMIR candidate | x86-facing shape after lowering | Recommended landing layer | +| --- | --- | --- | +| `(add x 0:i64) => x` | `ADD*rr/ri` with a zero operand | x86 DSL can represent this, but only after matching the exact zero-immediate form. | +| `(not (not x)) => x` | `NOT*` pair | Either layer works; x86 DSL keeps it target-specific. | +| `(select cond x x) => x` | integer `CMOV*` or FP branch diamond | Prefer dMIR for the generic rule. Lowering splits the integer and FP cases. | +| `(adc x y 0:i64) => (add x y)` | `ADC*rr` consuming implicit `CF` | Only a dMIR-side candidate today. The explicit third operand disappears after lowering, so this precondition cannot be recovered at the x86 layer. A future promotion still needs carry-chain-specific safety proof. | +| `(sbb x y 0:i64) => (sub x y)` | `SBB*rr` consuming implicit `CF` | Same reasoning as `adc`: the precondition is only visible in dMIR, but promotion still needs borrow-chain-specific safety proof. | + +## Why This Mapping Matters + +Two pieces of information are lost across lowering: + +- The explicit third operand of `adc/sbb` +- The high-level `select(cmp(...), lhs, rhs)` shape once it turns into x86 + condition codes plus `SETCCr`, `CMOV*`, or explicit branches + +That split is the main reason the current implementation keeps three parallel +tracks: + +- a conservative production `DMirRewritePass` for tree-local structural folds +- production peepholes at `CgIR/x86` +- offline dMIR candidate rules plus interpreter-backed validation + +The bridge file above is the minimum subset needed to move rules between those +tracks without rediscovering the source locations each time. diff --git a/docs/compiler/x86_cg_peephole.md b/docs/compiler/x86_cg_peephole.md new file mode 100644 index 000000000..7dec37a94 --- /dev/null +++ b/docs/compiler/x86_cg_peephole.md @@ -0,0 +1,165 @@ + + +# X86 Cg Peephole Foundation + +## Scope Decision + +Phase 1 keeps the declarative peephole framework at the existing `CgIR/x86` +layer. + +- Rule matching still runs inside `X86CgPeephole` +- Rules live in + `src/compiler/target/x86/x86_cg_peephole_rules.json` +- The rule file is compiled into C++ at build time by + `tools/generate_x86_cg_peephole.py` + +This keeps the first migration aligned with the current optimization layer and +avoids introducing a new dMIR pass before timing baselines exist. + +## Rule DSL + +Each rule is a JSON object with these fields: + +- `name`: stable identifier used in reports and tests +- `stage`: `instruction` or `block_end` +- `priority`: higher priority rules are emitted first +- `pattern`: ordered instruction match window +- `when`: optional block-level side conditions +- `action`: deterministic rewrite steps + +Supported `pattern` matchers: + +- `predicate`: call a `CgInstruction` predicate such as `isCompare` +- `opcode`: match a single x86 opcode +- `opcode_any`: match one opcode from a fixed set +- `capture`: bind an operand field for later reuse +- `require`: constrain operand fields to captures, enums, or booleans + +Supported operand fields: + +- `reg` +- `imm` +- `is_mbb` + +Supported `when` conditions: + +- `target_is_next_block` + +Supported `action` steps: + +- `erase` +- `set_imm` + +Each rule also carries validation metadata: + +- `validation.modes`: declared validation styles for the rule +- `validation.coverage`: concrete test coverage entries + +`tools/check_x86_cg_peephole_validation.py` rejects rule files that add rewrites +without validation metadata. When given `--gtest-binary`, it also verifies that +each coverage entry names a real gtest case. + +The generated matcher is linear in the number of emitted rules. There is no +runtime search, SMT solving, or e-graph exploration in the JIT path. + +Validation coverage can be exported as a machine-readable report: + +```bash +python3 tools/report_x86_cg_peephole_validation.py \ + --rules src/compiler/target/x86/x86_cg_peephole_rules.json \ + --gtest-binary ./build-peephole/x86CgPeepholeTests \ + --out /tmp/x86-cg-peephole-validation.json +``` + +The report summarizes: + +- rule count +- per-stage rule counts +- per-mode validation counts +- per-rule coverage completeness against the current gtest binary + +## Conflict Checks + +The generator emits a rule report and rejects rules that share the same +normalized pattern and priority. The report is generated at build time: + +- `build/.../generated/target/x86/x86_cg_peephole_report.txt` + +## Compiler Pass Timing Baseline + +Compiler-pass timing is written when +`DTVM_COMPILER_PASS_TIMING_JSON=/path/to/file.json` is present. + +Recommended baseline workflow: + +```bash +python3 tools/collect_compiler_pass_timings.py \ + --dtvm ./build-peephole/dtvm \ + --manifest tests/evm_asm/compiler_pass_timing_manifest.json \ + --runs 5 \ + --output /tmp/dtvm-pass-timing.json \ + -- --format evm -m multipass --compile-only \ + --num-extra-compilations 4 --evm-revision cancun +``` + +`--compile-only` avoids execution-side noise and keeps the benchmark focused on +module loading and JIT compilation. + +The aggregated JSON includes: + +- per-case total compile time +- per-pass timing statistics +- `p95` pass-time and pass-share data for budget checks +- per-pass share of total compile time +- manifest-level aggregate summary + +Rule operand indices may count from the end of the explicit operand list when +negative. For example, `-1` refers to the last explicit operand, which is +useful for two-address x86 opcodes whose immediate operand is not at a fixed +absolute index once implicit operands such as `EFLAGS` are present. + +Budget validation workflow: + +```bash +python3 tools/check_compiler_pass_timing_budget.py \ + --budget tests/evm_asm/compiler_pass_timing_budget_x86_cg_peephole.json \ + --report /tmp/dtvm-pass-timing.json +``` + +Budget refresh workflow: + +```bash +python3 tools/update_compiler_pass_timing_budget.py \ + --report /tmp/dtvm-pass-timing.json \ + --budget-in tests/evm_asm/compiler_pass_timing_budget_x86_cg_peephole.json \ + --out tests/evm_asm/compiler_pass_timing_budget_x86_cg_peephole.json \ + --rules src/compiler/target/x86/x86_cg_peephole_rules.json \ + --runs 5 \ + --num-extra-compilations 4 +``` + +Phase 1 uses these outputs to set the peephole budget thresholds: + +- max share of function compile time +- max pass wall time +- CI regression threshold +- linear growth check against rule count + +`tests/evm_asm/compiler_pass_timing_budget_x86_cg_peephole.json` is an initial local baseline. +It should be recalibrated on the target CI runner before enforcing tighter +regression gates. + +## Rule Validation + +Current validation coverage is split into two layers: + +- structural rewrite tests in `src/tests/x86_cg_peephole_tests.cpp` +- semantics fuzzing for compare/setcc folding in the same test target + +The first execution-backed harness is now in place for the +`cmp/setcc/test/jne -> cmp/jcc` rewrite. It executes both the original and +rewritten x86 sequences with inline assembly across edge cases and randomized +inputs, then compares the observed branch result. diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 5d73d028c..7fb34f4e8 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -102,6 +102,7 @@ if(ZEN_ENABLE_SINGLEPASS_JIT) endif() if(ZEN_ENABLE_MULTIPASS_JIT) + find_package(Python3 REQUIRED COMPONENTS Interpreter) find_package(LLVM 15 REQUIRED CONFIG) message(STATUS "Found LLVM ${LLVM_PACKAGE_VERSION}") message(STATUS "Using LLVMConfig.cmake in: ${LLVM_DIR}") diff --git a/src/cli/dtvm.cpp b/src/cli/dtvm.cpp index d241187ab..4795f1c57 100644 --- a/src/cli/dtvm.cpp +++ b/src/cli/dtvm.cpp @@ -109,12 +109,10 @@ static evmc_message createEvmMessage(evmc::MockedHost &Host, return Msg; } -static bool runEVMBenchmark(const std::string &Filename, - uint32_t NumExtraCompilations, - uint32_t NumExtraExecutions, Runtime *RT, - EVMModule *Mod, const EVMMessageConfig &MsgConfig, - evmc::MockedHost &Host) { - if (NumExtraCompilations + NumExtraExecutions == 0) { +static bool runEVMCompilationBenchmark(const std::string &Filename, + uint32_t NumExtraCompilations, + Runtime *RT) { + if (NumExtraCompilations == 0) { return true; } @@ -132,6 +130,24 @@ static bool runEVMBenchmark(const std::string &Filename, RT->unloadEVMModule(*TestModRet); } + return true; +} + +static bool runEVMExecutionBenchmark(const std::string &Filename, + uint32_t NumExtraExecutions, Runtime *RT, + EVMModule *Mod, + const EVMMessageConfig &MsgConfig, + evmc::MockedHost &Host) { + if (NumExtraExecutions == 0) { + return true; + } + + std::vector Bytecode; + if (!zen::utils::readBinaryFile(Filename, Bytecode)) { + SIMPLE_LOG_ERROR("failed to read EVM bytecode file %s", Filename.c_str()); + return false; + } + for (uint32_t I = 0; I < NumExtraExecutions; ++I) { IsolationUniquePtr TestIso = RT->createUnmanagedIsolation(); ZEN_ASSERT(TestIso); @@ -177,6 +193,7 @@ int main(int argc, char *argv[]) { uint32_t NumExtraExecutions = 0; RuntimeConfig Config; bool EnableBenchmark = false; + bool CompileOnly = false; bool DeployMode = false; std::string ContractAddress; std::string SenderAddress = "1000000000000000000000000000000000000000"; @@ -281,6 +298,8 @@ int main(int argc, char *argv[]) { #endif // ZEN_ENABLE_MULTIPASS_JIT #ifdef ZEN_ENABLE_EVM CLIParser->add_option("--calldata", Calldata, "Calldata hex pass to EVM"); + CLIParser->add_flag("--compile-only", CompileOnly, + "Compile EVM bytecode without creating an instance"); CLIParser ->add_option("--evm-revision", EvmRevision, "EVM revision (e.g., cancun, osaka)") @@ -299,6 +318,11 @@ int main(int argc, char *argv[]) { return exitMain(EXIT_FAILURE); } + if (CompileOnly && Config.Format != InputFormat::EVM) { + SIMPLE_LOG_ERROR("--compile-only is only supported with --format evm"); + return exitMain(EXIT_FAILURE); + } + /// ================ EVM mode ================ #ifdef ZEN_ENABLE_EVM if (Config.Format == InputFormat::EVM) { @@ -338,6 +362,26 @@ int main(int argc, char *argv[]) { } EVMModule *Mod = *ModRet; + if (CompileOnly) { + if (NumExtraExecutions != 0) { + SIMPLE_LOG_ERROR( + "--num-extra-executions is not supported with --compile-only"); + return exitMain(EXIT_FAILURE, RT.get()); + } + + if (!runEVMCompilationBenchmark(Filename, NumExtraCompilations, + RT.get())) { + return exitMain(EXIT_FAILURE, RT.get()); + } + + if (!RT->unloadEVMModule(Mod)) { + ZEN_LOG_ERROR("failed to unload EVM module"); + return exitMain(EXIT_FAILURE, RT.get()); + } + + return exitMain(EXIT_SUCCESS, RT.get()); + } + Isolation *Iso = RT->createManagedIsolation(); if (!Iso) { ZEN_LOG_ERROR("failed to create EVM isolation"); @@ -427,9 +471,12 @@ int main(int argc, char *argv[]) { } /// ======= EVM Extra compilations and executions for benchmarking ======= - if (!runEVMBenchmark(Filename, NumExtraCompilations, NumExtraExecutions, - RT.get(), Mod, MsgConfig, - *static_cast(Host.get()))) { + if (!runEVMCompilationBenchmark(Filename, NumExtraCompilations, RT.get())) { + return exitMain(EXIT_FAILURE, RT.get()); + } + if (!runEVMExecutionBenchmark( + Filename, NumExtraExecutions, RT.get(), Mod, MsgConfig, + *static_cast(Host.get()))) { return exitMain(EXIT_FAILURE, RT.get()); } diff --git a/src/compiler/CMakeLists.txt b/src/compiler/CMakeLists.txt index 74f604ae4..5a6e5b3f1 100644 --- a/src/compiler/CMakeLists.txt +++ b/src/compiler/CMakeLists.txt @@ -32,6 +32,7 @@ endif() set(COMPILER_SRCS compiler.cpp context.cpp + common/pass_timing.cpp common/llvm_workaround.cpp frontend/parser.cpp frontend/lexer.cpp @@ -94,6 +95,34 @@ set(COMPILER_SRCS cgir/pass/llvm_utils.cpp ) +set(X86_PEEPHOLE_RULES + ${CMAKE_CURRENT_SOURCE_DIR}/target/x86/x86_cg_peephole_rules.json +) +set(X86_PEEPHOLE_GENERATED_DIR ${CMAKE_CURRENT_BINARY_DIR}/generated/target/x86) +set(X86_PEEPHOLE_GENERATED_INC + ${X86_PEEPHOLE_GENERATED_DIR}/x86_cg_peephole_generated.inc +) +set(X86_PEEPHOLE_REPORT + ${X86_PEEPHOLE_GENERATED_DIR}/x86_cg_peephole_report.txt +) + +add_custom_command( + OUTPUT ${X86_PEEPHOLE_GENERATED_INC} ${X86_PEEPHOLE_REPORT} + COMMAND ${CMAKE_COMMAND} -E make_directory ${X86_PEEPHOLE_GENERATED_DIR} + COMMAND + ${Python3_EXECUTABLE} ${CMAKE_SOURCE_DIR}/tools/generate_x86_cg_peephole.py + --rules ${X86_PEEPHOLE_RULES} --out-inc ${X86_PEEPHOLE_GENERATED_INC} + --out-report ${X86_PEEPHOLE_REPORT} + DEPENDS ${X86_PEEPHOLE_RULES} + ${CMAKE_SOURCE_DIR}/tools/generate_x86_cg_peephole.py + VERBATIM +) + +add_custom_target( + generateX86CgPeephole DEPENDS ${X86_PEEPHOLE_GENERATED_INC} + ${X86_PEEPHOLE_REPORT} +) + if(ZEN_ENABLE_EVM) list(APPEND COMPILER_SRCS evm_compiler.cpp evm_frontend/evm_imported.cpp evm_frontend/evm_mir_compiler.cpp @@ -111,6 +140,10 @@ set_property( ) add_library(compiler STATIC ${COMPILER_SRCS} $) +add_dependencies(compiler generateX86CgPeephole) +target_include_directories( + compiler PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/generated +) target_link_libraries(compiler PRIVATE ${llvm_libs}) if(ZEN_ENABLE_EVM) target_link_libraries(compiler PUBLIC evmc::instructions) diff --git a/src/compiler/cgir/lowering.h b/src/compiler/cgir/lowering.h index 5ec152dbb..6927551a6 100644 --- a/src/compiler/cgir/lowering.h +++ b/src/compiler/cgir/lowering.h @@ -202,6 +202,22 @@ template class CgLowering { ResultReg = SELF.lowerEvmU256MulResultExpr( llvm::cast(Inst)); break; + case MInstruction::EVM_U256_ADD: + ResultReg = + SELF.lowerEvmU256AddExpr(llvm::cast(Inst)); + break; + case MInstruction::EVM_U256_ADD_RESULT: + ResultReg = SELF.lowerEvmU256AddResultExpr( + llvm::cast(Inst)); + break; + case MInstruction::EVM_U256_SUB: + ResultReg = + SELF.lowerEvmU256SubExpr(llvm::cast(Inst)); + break; + case MInstruction::EVM_U256_SUB_RESULT: + ResultReg = SELF.lowerEvmU256SubResultExpr( + llvm::cast(Inst)); + break; case MInstruction::EVM_UDIV128_BY64: ResultReg = SELF.lowerEvmUdiv128By64Expr( llvm::cast(Inst)); diff --git a/src/compiler/cgir/pass/peephole.h b/src/compiler/cgir/pass/peephole.h index 04492b6f3..cbedc056b 100644 --- a/src/compiler/cgir/pass/peephole.h +++ b/src/compiler/cgir/pass/peephole.h @@ -15,13 +15,18 @@ template class CgPeephole : public NonCopyable { public: CgPeephole(CgFunction &MF) : MF(MF) { for (auto *MBB : MF) { - SELF.peepholeOptimizeBB(*MBB); for (CgBasicBlock::iterator MII = MBB->begin(), MIE = MBB->end(); MII != MIE;) { - // may change MII - SELF.peepholeOptimize(*MBB, MII); - MII++; + // When the matcher erases the current instruction, it must advance + // MII itself and return true to avoid incrementing an invalid iterator. + if (!SELF.peepholeOptimize(*MBB, MII)) { + MII++; + } } + // Block-end rewrites (e.g. remove-fallthrough-jcc) erase terminators + // that instruction-level rules (e.g. fold-setcc-test-jne-to-jcc) need + // as part of a longer match window. Run instruction-level pass first. + SELF.peepholeOptimizeBB(*MBB); } } diff --git a/src/compiler/common/pass_timing.cpp b/src/compiler/common/pass_timing.cpp new file mode 100644 index 000000000..06d8d521f --- /dev/null +++ b/src/compiler/common/pass_timing.cpp @@ -0,0 +1,157 @@ +// Copyright (C) 2025 the DTVM authors. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +#include "compiler/common/pass_timing.h" + +#include +#include +#include +#include + +namespace COMPILER { + +namespace { + +constexpr const char *COMPILER_PASS_TIMING_PATH_ENV = + "DTVM_COMPILER_PASS_TIMING_JSON"; + +double durationToMs(std::chrono::steady_clock::duration Duration) { + return std::chrono::duration(Duration).count(); +} + +} // namespace + +CompilerPassTimingSink &CompilerPassTimingSink::get() { + static CompilerPassTimingSink Sink; + return Sink; +} + +CompilerPassTimingSink::CompilerPassTimingSink() + : Enabled(std::getenv(COMPILER_PASS_TIMING_PATH_ENV) != nullptr), + OutputPath(Enabled ? std::getenv(COMPILER_PASS_TIMING_PATH_ENV) : "") {} + +void CompilerPassTimingSink::appendRecord(CompilerPassTimingRecord Record) { + if (!Enabled) { + return; + } + + std::lock_guard Lock(Mutex); + Records.emplace_back(std::move(Record)); +} + +CompilerPassTimingSink::~CompilerPassTimingSink() { + if (!Enabled || Records.empty()) { + return; + } + std::lock_guard Lock(Mutex); + writeReportLocked(); +} + +void CompilerPassTimingSink::writeReportLocked() const { + const std::string TempPath = OutputPath + ".tmp"; + std::ofstream Out(TempPath, std::ios::out | std::ios::trunc); + if (!Out.is_open()) { + return; + } + + Out << std::fixed << std::setprecision(6); + Out << "{\n \"records\": [\n"; + for (size_t RecordIdx = 0; RecordIdx < Records.size(); ++RecordIdx) { + const auto &Record = Records[RecordIdx]; + Out << " {\n"; + Out << " \"pipeline\": \"" << escapeJson(Record.Pipeline) << "\",\n"; + Out << " \"func_idx\": " << Record.FuncIdx << ",\n"; + Out << " \"total_time_ms\": " << Record.TotalTimeMs << ",\n"; + Out << " \"phases\": [\n"; + for (size_t EntryIdx = 0; EntryIdx < Record.Entries.size(); ++EntryIdx) { + const auto &Entry = Record.Entries[EntryIdx]; + Out << " {\"name\": \"" << escapeJson(Entry.Name) + << "\", \"time_ms\": " << Entry.TimeMs << "}"; + if (EntryIdx + 1 != Record.Entries.size()) { + Out << ","; + } + Out << "\n"; + } + Out << " ]\n"; + Out << " }"; + if (RecordIdx + 1 != Records.size()) { + Out << ","; + } + Out << "\n"; + } + Out << " ]\n}\n"; + Out.close(); + + std::rename(TempPath.c_str(), OutputPath.c_str()); +} + +std::string CompilerPassTimingSink::escapeJson(const std::string &Value) { + std::string Escaped; + Escaped.reserve(Value.size()); + for (char Ch : Value) { + switch (Ch) { + case '\\': + Escaped += "\\\\"; + break; + case '"': + Escaped += "\\\""; + break; + case '\n': + Escaped += "\\n"; + break; + case '\r': + Escaped += "\\r"; + break; + case '\t': + Escaped += "\\t"; + break; + default: + Escaped += Ch; + break; + } + } + return Escaped; +} + +CompilerPassTimingSession::CompilerPassTimingSession(std::string PipelineName, + uint32_t FuncIdx) + : Enabled(CompilerPassTimingSink::get().isEnabled()), + StartTime(std::chrono::steady_clock::now()), + Record{std::move(PipelineName), FuncIdx, {}, 0.0} {} + +void CompilerPassTimingSession::addEntry(std::string Name, double TimeMs) { + if (!Enabled) { + return; + } + + Record.Entries.push_back({std::move(Name), TimeMs}); +} + +void CompilerPassTimingSession::flush() { + if (!Enabled) { + return; + } + + Record.TotalTimeMs = + durationToMs(std::chrono::steady_clock::now() - StartTime); + CompilerPassTimingSink::get().appendRecord(std::move(Record)); + Record = {}; +} + +ScopedCompilerPassTimer::ScopedCompilerPassTimer( + CompilerPassTimingSession *Session, const char *Name) + : Session(Session), Name(Name), + StartTime(Session && Session->isEnabled() + ? std::chrono::steady_clock::now() + : std::chrono::steady_clock::time_point{}) {} + +ScopedCompilerPassTimer::~ScopedCompilerPassTimer() { + if (!Session || !Session->isEnabled()) { + return; + } + + Session->addEntry(Name, + durationToMs(std::chrono::steady_clock::now() - StartTime)); +} + +} // namespace COMPILER diff --git a/src/compiler/common/pass_timing.h b/src/compiler/common/pass_timing.h new file mode 100644 index 000000000..5cf766d6d --- /dev/null +++ b/src/compiler/common/pass_timing.h @@ -0,0 +1,78 @@ +// Copyright (C) 2025 the DTVM authors. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +#ifndef ZEN_COMPILER_COMMON_PASS_TIMING_H +#define ZEN_COMPILER_COMMON_PASS_TIMING_H + +#include "compiler/common/common_defs.h" + +#include +#include +#include +#include + +namespace COMPILER { + +struct CompilerPassTimingEntry { + std::string Name; + double TimeMs = 0.0; +}; + +struct CompilerPassTimingRecord { + std::string Pipeline; + uint32_t FuncIdx = 0; + std::vector Entries; + double TotalTimeMs = 0.0; +}; + +class CompilerPassTimingSink final : public NonCopyable { +public: + static CompilerPassTimingSink &get(); + + bool isEnabled() const { return Enabled; } + + void appendRecord(CompilerPassTimingRecord Record); + +private: + CompilerPassTimingSink(); + ~CompilerPassTimingSink(); + + void writeReportLocked() const; + static std::string escapeJson(const std::string &Value); + + const bool Enabled = false; + const std::string OutputPath; + mutable std::mutex Mutex; + std::vector Records; +}; + +class CompilerPassTimingSession final : public NonCopyable { +public: + CompilerPassTimingSession(std::string PipelineName, uint32_t FuncIdx); + + bool isEnabled() const { return Enabled; } + + void addEntry(std::string Name, double TimeMs); + void flush(); + +private: + const bool Enabled = false; + const std::chrono::steady_clock::time_point StartTime; + CompilerPassTimingRecord Record; +}; + +class ScopedCompilerPassTimer final : public NonCopyable { +public: + ScopedCompilerPassTimer(CompilerPassTimingSession *Session, const char *Name); + + ~ScopedCompilerPassTimer(); + +private: + CompilerPassTimingSession *Session = nullptr; + const char *Name = nullptr; + std::chrono::steady_clock::time_point StartTime; +}; + +} // namespace COMPILER + +#endif // ZEN_COMPILER_COMMON_PASS_TIMING_H diff --git a/src/compiler/compiler.cpp b/src/compiler/compiler.cpp index a45ba3c2f..5f942d61d 100644 --- a/src/compiler/compiler.cpp +++ b/src/compiler/compiler.cpp @@ -12,11 +12,13 @@ #include "compiler/cgir/pass/reg_alloc_basic.h" #include "compiler/cgir/pass/reg_alloc_greedy.h" #include "compiler/cgir/pass/register_coalescer.h" +#include "compiler/common/pass_timing.h" #include "compiler/context.h" #include "compiler/frontend/parser.h" #include "compiler/mir/function.h" #include "compiler/mir/module.h" #include "compiler/mir/pass/dead_basicblock_elim.h" +#include "compiler/mir/pass/dmir_rewrite.h" #include "compiler/mir/pass/verifier.h" #include "compiler/target/x86/x86_cg_peephole.h" #include "compiler/target/x86/x86_mc_lowering.h" @@ -55,27 +57,45 @@ static inline bool isFuncNeedGreedyRA(uint32_t FuncIdx) { #endif // ZEN_ENABLE_DEBUG_GREEDY_RA void JITCompilerBase::compileMIRToCgIR(MModule &MMod, MFunction &MFunc, - CgFunction &CgFunc, - bool DisableGreedyRA) { + CgFunction &CgFunc, bool DisableGreedyRA, + CompilerPassTimingSession *PassTiming) { #ifdef ZEN_ENABLE_MULTIPASS_JIT_LOGGING llvm::DebugFlag = true; llvm::dbgs() << "\n########## MIR Dump ##########\n\n"; MFunc.dump(); #endif - MVerifier Verifier(MMod, MFunc, llvm::errs()); - if (!Verifier.verify()) { - throw getError(ErrorCode::MIRVerifyingFailed); + { + ScopedCompilerPassTimer Timer(PassTiming, "verify_mir"); + MVerifier Verifier(MMod, MFunc, llvm::errs()); + if (!Verifier.verify()) { + throw getError(ErrorCode::MIRVerifyingFailed); + } + } + + { + ScopedCompilerPassTimer Timer(PassTiming, "dead_mbb_elim"); + DeadMBasicBlockElim MBBDCE; + MBBDCE.runOnMFunction(MFunc); } - DeadMBasicBlockElim MBBDCE; - MBBDCE.runOnMFunction(MFunc); + { + ScopedCompilerPassTimer Timer(PassTiming, "dmir_rewrite"); + DMirRewritePass RewritePass; + RewritePass.runOnMFunction(MFunc); + } CgFunction &MF = CgFunc; - // TODO: refactor to pass - X86CgLowering CgLowering(MF); - X86CgPeephole CgPeephole(MF); + { + ScopedCompilerPassTimer Timer(PassTiming, "x86_cg_lowering"); + // TODO: refactor to pass + X86CgLowering CgLowering(MF); + } + { + ScopedCompilerPassTimer Timer(PassTiming, "x86_cg_peephole"); + X86CgPeephole CgPeephole(MF); + } CgPhiElimination PhiElimination; PhiElimination.runOnCgFunction(MF); @@ -83,8 +103,10 @@ void JITCompilerBase::compileMIRToCgIR(MModule &MMod, MFunction &MFunc, if (DisableGreedyRA) { ZEN_LOG_DEBUG("using fast ra for function %d", MFuncIdx); + ScopedCompilerPassTimer Timer(PassTiming, "fast_ra"); FastRA RA(MF); } else { + ScopedCompilerPassTimer Timer(PassTiming, "greedy_ra"); #ifdef ZEN_ENABLE_DEBUG_GREEDY_RA if (!isFuncNeedGreedyRA(MFuncIdx)) { ZEN_LOG_DEBUG("using fast ra for function %d", MFuncIdx); @@ -123,16 +145,22 @@ void JITCompilerBase::compileMIRToCgIR(MModule &MMod, MFunction &MFunc, MF.dump(); #endif - PrologEpilogInserter PEInserter; - PEInserter.runOnCgFunction(MF); + { + ScopedCompilerPassTimer Timer(PassTiming, "prolog_epilog_inserter"); + PrologEpilogInserter PEInserter; + PEInserter.runOnCgFunction(MF); + } #ifdef ZEN_ENABLE_MULTIPASS_JIT_LOGGING llvm::dbgs() << "\n########## CgIR Dump After Prologue/Epilogue Insertion " "##########\n\n"; MF.dump(); #endif - ExpandPostRAPseudos PseudosExpander; - PseudosExpander.runOnCgFunction(MF); + { + ScopedCompilerPassTimer Timer(PassTiming, "expand_post_ra_pseudos"); + ExpandPostRAPseudos PseudosExpander; + PseudosExpander.runOnCgFunction(MF); + } #ifdef ZEN_ENABLE_MULTIPASS_JIT_LOGGING llvm::dbgs() << "\n########## CgIR Dump After Post-RA Pseudo " "Instruction Expansion " diff --git a/src/compiler/compiler.h b/src/compiler/compiler.h index 2fb0da0a1..9be84811d 100644 --- a/src/compiler/compiler.h +++ b/src/compiler/compiler.h @@ -10,6 +10,7 @@ namespace COMPILER { class CompileContext; +class CompilerPassTimingSession; class WasmFrontendContext; class MModule; class MFunction; @@ -20,7 +21,8 @@ class JITCompilerBase : public NonCopyable { virtual ~JITCompilerBase() = default; static void compileMIRToCgIR(MModule &Mod, MFunction &MFunc, - CgFunction &CgFunc, bool DisableGreedyRA); + CgFunction &CgFunc, bool DisableGreedyRA, + CompilerPassTimingSession *PassTiming = nullptr); static void emitObjectBuffer(CompileContext *Ctx); }; diff --git a/src/compiler/evm_compiler.cpp b/src/compiler/evm_compiler.cpp index 04d45ad60..eaf1ea846 100644 --- a/src/compiler/evm_compiler.cpp +++ b/src/compiler/evm_compiler.cpp @@ -4,6 +4,7 @@ #include "compiler/evm_compiler.h" #include "common/thread_pool.h" #include "compiler/cgir/cg_function.h" +#include "compiler/common/pass_timing.h" #include "compiler/mir/module.h" #include "compiler/target/x86/x86_mc_lowering.h" #include "platform/map.h" @@ -27,7 +28,8 @@ const size_t MPROTECT_CHUNK_SIZE = 0x1000; namespace COMPILER { void EVMJITCompiler::compileEVMToMC(EVMFrontendContext &Ctx, MModule &Mod, - uint32_t FuncIdx, bool DisableGreedyRA) { + uint32_t FuncIdx, bool DisableGreedyRA, + CompilerPassTimingSession *PassTiming) { if (Ctx.Inited) { // Release all memory allocated by previous function compilation Ctx.MemPool = CompileMemPool(); @@ -43,16 +45,22 @@ void EVMJITCompiler::compileEVMToMC(EVMFrontendContext &Ctx, MModule &Mod, CgFunction CgFunc(Ctx, MFunc); MFunc.setFunctionType(Mod.getFuncType(FuncIdx)); EVMMirBuilder MIRBuilder(Ctx, MFunc); - MIRBuilder.compile(&Ctx); + { + ScopedCompilerPassTimer Timer(PassTiming, "evm_mir_build"); + MIRBuilder.compile(&Ctx); + } #ifdef ZEN_ENABLE_MULTIPASS_JIT_LOGGING MIRBuilder.dumpMemoryCompileStats(); #endif // ZEN_ENABLE_MULTIPASS_JIT_LOGGING // Apply MIR optimizations and generate machine code - compileMIRToCgIR(Mod, MFunc, CgFunc, DisableGreedyRA); + compileMIRToCgIR(Mod, MFunc, CgFunc, DisableGreedyRA, PassTiming); // Generate machine code - Ctx.getMCLowering().runOnCgFunction(CgFunc); + { + ScopedCompilerPassTimer Timer(PassTiming, "x86_mc_lowering"); + Ctx.getMCLowering().runOnCgFunction(CgFunc); + } } void EagerEVMJITCompiler::compile() { @@ -85,10 +93,15 @@ void EagerEVMJITCompiler::compile() { auto &CodeMPool = EVMMod->getJITCodeMemPool(); uint8_t *JITCode = const_cast(CodeMPool.getMemStart()); + CompilerPassTimingSession PassTiming("evm", 0); // EVM has only 1 function, use direct single-threaded compilation - compileEVMToMC(Ctx, Mod, 0, Config.DisableMultipassGreedyRA); - emitObjectBuffer(&Ctx); + compileEVMToMC(Ctx, Mod, 0, Config.DisableMultipassGreedyRA, &PassTiming); + { + ScopedCompilerPassTimer Timer(&PassTiming, "emit_object_buffer"); + emitObjectBuffer(&Ctx); + } + PassTiming.flush(); ZEN_ASSERT(Ctx.ExternRelocs.empty()); uint8_t *JITFuncPtr = Ctx.CodePtr + Ctx.FuncOffsetMap[0]; diff --git a/src/compiler/evm_compiler.h b/src/compiler/evm_compiler.h index 0dac7b84d..998add412 100644 --- a/src/compiler/evm_compiler.h +++ b/src/compiler/evm_compiler.h @@ -10,6 +10,8 @@ namespace COMPILER { +class CompilerPassTimingSession; + class EVMJITCompiler : public JITCompilerBase { protected: EVMJITCompiler(runtime::EVMModule *EVMMod) @@ -19,7 +21,8 @@ class EVMJITCompiler : public JITCompilerBase { ~EVMJITCompiler() override = default; void compileEVMToMC(EVMFrontendContext &Ctx, MModule &Mod, uint32_t FuncIdx, - bool DisableGreedyRA); + bool DisableGreedyRA, + CompilerPassTimingSession *PassTiming = nullptr); runtime::EVMModule *EVMMod; const runtime::RuntimeConfig &Config; diff --git a/src/compiler/evm_frontend/evm_mir_compiler.cpp b/src/compiler/evm_frontend/evm_mir_compiler.cpp index fa748f4c3..ac4043091 100644 --- a/src/compiler/evm_frontend/evm_mir_compiler.cpp +++ b/src/compiler/evm_frontend/evm_mir_compiler.cpp @@ -1782,10 +1782,13 @@ typename EVMMirBuilder::Operand EVMMirBuilder::handleMul(Operand MultiplicandOp, MInstruction *Term) -> SumCarryPair { MInstruction *NewSum = createInstruction( false, OP_add, I64Type, Sum, Term); - MInstruction *NewCarry = - createInstruction(false, I64Type, Carry, Zero, Zero); - return {protectUnsafeValue(NewSum, I64Type), - protectUnsafeValue(NewCarry, I64Type)}; + // NewCarry captures the carry-out of ADD(Sum, Term). Operand 2 points + // to NewSum (the carry-producing instruction) to make the dependency + // explicit for analysis passes. x86 lowering uses hardware CF. + MInstruction *ProtectedSum = protectUnsafeValue(NewSum, I64Type); + MInstruction *NewCarry = createInstruction( + false, I64Type, Carry, Zero, ProtectedSum); + return {ProtectedSum, protectUnsafeValue(NewCarry, I64Type)}; }; auto addTermNoCarry = [&](MInstruction *Sum, MInstruction *Term) { @@ -2558,31 +2561,23 @@ EVMMirBuilder::handleAddU64Const(const Operand &FullOp, U256Inst LHS = extractU256Operand(FullOp); MType *MirI64Type = EVMFrontendContext::getMIRTypeFromEVMType(EVMType::UINT64); - MInstruction *Carry = createIntConstInstruction(MirI64Type, 0); MInstruction *RHS0 = createIntConstInstruction(MirI64Type, U64ConstOp.getConstValue()[0]); MInstruction *RHSZero = createIntConstInstruction(MirI64Type, 0); - // Pre-materialize LHS operands for carry chain safety - for (size_t I = 0; I < EVM_ELEMENTS_COUNT; ++I) { - LHS[I] = protectUnsafeValue(LHS[I], MirI64Type); - } - RHS0 = protectUnsafeValue(RHS0, MirI64Type); - MInstruction *ProtectedZero = protectUnsafeValue(RHSZero, MirI64Type); - - U256Inst Result = {}; - // Limb 0: ADD with the actual u64 value - Result[0] = protectUnsafeValue(createInstruction( - false, OP_add, MirI64Type, LHS[0], RHS0), - MirI64Type); - // Limbs 1-3: ADC with shared zero (carry propagation only) - for (size_t I = 1; I < EVM_ELEMENTS_COUNT; ++I) { - Result[I] = - protectUnsafeValue(createInstruction( - false, MirI64Type, LHS[I], ProtectedZero, Carry), - MirI64Type); - } + MInstruction *AddInst = createInstruction( + false, MirI64Type, LHS[0], LHS[1], LHS[2], LHS[3], RHS0, RHSZero, RHSZero, + RHSZero); + U256Inst Result = { + AddInst, + createInstruction(false, MirI64Type, AddInst, + 1), + createInstruction(false, MirI64Type, AddInst, + 2), + createInstruction(false, MirI64Type, AddInst, + 3), + }; return Operand(Result, EVMType::UINT256); } @@ -3844,6 +3839,11 @@ EVMMirBuilder::handleMLoad(Operand AddrComponents) { return Result; } +// The old ordering hack (ValueDep = or(parts) & 0) was needed to prevent +// flag-clobbering interleaving when add/adc and sub/sbb chains were emitted +// as separate instructions. With the introduction of EvmU256Add/Sub pseudo-ops, +// the carry/borrow chain is atomic and cannot be interleaved, making the hack +// unnecessary. void EVMMirBuilder::handleMStore(Operand AddrComponents, Operand ValueComponents) { #ifdef ZEN_ENABLE_EVM_GAS_REGISTER @@ -3878,19 +3878,6 @@ void EVMMirBuilder::handleMStore(Operand AddrComponents, MInstruction *SizeConst = createIntConstInstruction(I64Type, 32); MInstruction *RequiredSize = createInstruction( false, OP_add, I64Type, Offset, SizeConst); - // Tie expansion ordering to the stored value to prevent reordering on the - // fallback path that still emits a per-op expand sequence. - MInstruction *Zero = createIntConstInstruction(I64Type, 0); - MInstruction *ValueDep = createInstruction( - false, OP_or, I64Type, ValueParts[0], ValueParts[1]); - ValueDep = createInstruction(false, OP_or, I64Type, - ValueDep, ValueParts[2]); - ValueDep = createInstruction(false, OP_or, I64Type, - ValueDep, ValueParts[3]); - ValueDep = createInstruction(false, OP_and, I64Type, - ValueDep, Zero); - RequiredSize = createInstruction(false, OP_add, I64Type, - RequiredSize, ValueDep); MInstruction *Overflow = createInstruction( false, CmpInstruction::Predicate::ICMP_ULT, I64Type, RequiredSize, Offset); diff --git a/src/compiler/evm_frontend/evm_mir_compiler.h b/src/compiler/evm_frontend/evm_mir_compiler.h index 34b88c1e9..d16630c6a 100644 --- a/src/compiler/evm_frontend/evm_mir_compiler.h +++ b/src/compiler/evm_frontend/evm_mir_compiler.h @@ -356,56 +356,31 @@ class EVMMirBuilder final { EVMFrontendContext::getMIRTypeFromEVMType(EVMType::UINT64); if constexpr (Operator == BinaryOperator::BO_ADD) { - MInstruction *Carry = createIntConstInstruction(MirI64Type, 0); - - // Pre-materialize all operand components into variables before the - // ADD/ADC carry chain to prevent flag-clobbering during x86 lowering. - for (size_t I = 0; I < EVM_ELEMENTS_COUNT; ++I) { - LHS[I] = protectUnsafeValue(LHS[I], MirI64Type); - RHS[I] = protectUnsafeValue(RHS[I], MirI64Type); - } - - for (size_t I = 0; I < EVM_ELEMENTS_COUNT; ++I) { - if (I == 0) { - MInstruction *LocalResult = createInstruction( - false, OP_add, MirI64Type, LHS[I], RHS[I]); - Result[I] = protectUnsafeValue(LocalResult, MirI64Type); - } else { - MInstruction *LocalResult = createInstruction( - false, MirI64Type, LHS[I], RHS[I], Carry); - Result[I] = protectUnsafeValue(LocalResult, MirI64Type); - } - } + MInstruction *AddInst = createInstruction( + false, MirI64Type, LHS[0], LHS[1], LHS[2], LHS[3], RHS[0], RHS[1], + RHS[2], RHS[3]); + Result = { + AddInst, + createInstruction(false, MirI64Type, + AddInst, 1), + createInstruction(false, MirI64Type, + AddInst, 2), + createInstruction(false, MirI64Type, + AddInst, 3), + }; } else if constexpr (Operator == BinaryOperator::BO_SUB) { - // The borrow here is only used for constructing the sbb instruction. - // We currently use sbb only in bo_sub, and since we can guarantee the - // instructions are consecutive, there's no need to compute the borrow - // in DMIR. - MInstruction *Borrow = createIntConstInstruction(MirI64Type, 0); - - // Pre-materialize all operand components into variables before the - // SUB/SBB borrow chain. This ensures that during x86 lowering, no - // flag-modifying instructions (e.g. ADD for address computation in - // BYTES32-to-U256 conversion) are emitted between the SUB and SBB - // instructions that form the borrow chain. Without this, lazy - // expression lowering of operands like BSWAP(LOAD(ADD(ptr, offset))) - // would emit x86 ADD instructions that clobber the carry flag (CF). - for (size_t I = 0; I < EVM_ELEMENTS_COUNT; ++I) { - LHS[I] = protectUnsafeValue(LHS[I], MirI64Type); - RHS[I] = protectUnsafeValue(RHS[I], MirI64Type); - } - - for (size_t I = 0; I < EVM_ELEMENTS_COUNT; ++I) { - if (I == 0) { - MInstruction *LocalResult = createInstruction( - false, OP_sub, MirI64Type, LHS[I], RHS[I]); - Result[I] = protectUnsafeValue(LocalResult, MirI64Type); - } else { - MInstruction *LocalResult = createInstruction( - false, MirI64Type, LHS[I], RHS[I], Borrow); - Result[I] = protectUnsafeValue(LocalResult, MirI64Type); - } - } + MInstruction *SubInst = createInstruction( + false, MirI64Type, LHS[0], LHS[1], LHS[2], LHS[3], RHS[0], RHS[1], + RHS[2], RHS[3]); + Result = { + SubInst, + createInstruction(false, MirI64Type, + SubInst, 1), + createInstruction(false, MirI64Type, + SubInst, 2), + createInstruction(false, MirI64Type, + SubInst, 3), + }; } else { ZEN_ASSERT_TODO(); } diff --git a/src/compiler/mir/dmir_rewrite_mining_bootstrap.json b/src/compiler/mir/dmir_rewrite_mining_bootstrap.json new file mode 100644 index 000000000..d9bc9a7cb --- /dev/null +++ b/src/compiler/mir/dmir_rewrite_mining_bootstrap.json @@ -0,0 +1,165 @@ +{ + "base_terms": [ + "x", + "y", + "cond", + "0:i64", + "1:i64", + "18446744073709551615:i64" + ], + "unary_not_terms": [ + "x", + "y", + "cond" + ], + "double_not_terms": [ + "x", + "y", + "cond" + ], + "binary_fixed_rhs": [ + { + "ops": [ + "add", + "sub", + "and", + "or", + "xor", + "shl", + "sshr", + "ushr" + ], + "lhs": [ + "x", + "y", + "cond" + ], + "rhs": "0:i64" + }, + { + "ops": [ + "and", + "or", + "xor" + ], + "lhs": [ + "x", + "y", + "cond", + "(not x)", + "(not y)" + ], + "rhs": "18446744073709551615:i64" + }, + { + "ops": [ + "mul" + ], + "lhs": [ + "x", + "y" + ], + "rhs": "0:i64" + }, + { + "ops": [ + "mul" + ], + "lhs": [ + "x", + "y" + ], + "rhs": "1:i64" + } + ], + "binary_self": [ + { + "ops": [ + "and", + "mul", + "or", + "xor" + ], + "terms": [ + "x", + "y", + "cond" + ] + } + ], + "select_same_arm": { + "conditions": [ + "cond", + "x", + "0:i64", + "1:i64" + ], + "values": [ + "x", + "y", + "(not x)" + ] + }, + "pair_binary_groups": [ + { + "ops": [ + "add", + "sub", + "and", + "or", + "xor" + ], + "lhs": [ + "x", + "y" + ], + "rhs": [ + "x", + "y", + "0:i64" + ] + }, + { + "ops": [ + "and", + "or", + "xor" + ], + "lhs": [ + "x", + "y", + "(and x y)", + "(or x y)", + "(xor x y)", + "(not x)", + "(not y)" + ], + "rhs": [ + "x", + "y", + "0:i64", + "(and x y)", + "(or x y)", + "(xor x y)", + "(not x)", + "(not y)" + ] + } + ], + "adc_sbb_zero": { + "ops": [ + "adc", + "sbb" + ], + "lhs": [ + "x", + "y" + ], + "rhs": [ + "x", + "y", + "0:i64" + ], + "carry": "0:i64" + } +} diff --git a/src/compiler/mir/dmir_rewrite_rules.json b/src/compiler/mir/dmir_rewrite_rules.json new file mode 100644 index 000000000..4eae7e563 --- /dev/null +++ b/src/compiler/mir/dmir_rewrite_rules.json @@ -0,0 +1,2641 @@ +{ + "version": 1, + "rules": [ + { + "name": "add-zero", + "status": "accepted", + "inputs": [ + "x" + ], + "lhs": "(add x 0:i64)", + "rhs": "x", + "cost": { + "lhs": { + "dmir_inst": 1, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + }, + "rhs": { + "dmir_inst": 0, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + }, + "delta": { + "dmir_inst": -1, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + } + }, + "validation": { + "modes": [ + "interpreter_fuzz" + ], + "coverage": [ + "DMirValidation.FuzzesAddZeroRewrite" + ] + } + }, + { + "name": "double-not", + "status": "accepted", + "inputs": [ + "x" + ], + "lhs": "(not (not x))", + "rhs": "x", + "cost": { + "lhs": { + "dmir_inst": 2, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + }, + "rhs": { + "dmir_inst": 0, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + }, + "delta": { + "dmir_inst": -2, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + } + }, + "validation": { + "modes": [ + "interpreter_fuzz" + ], + "coverage": [ + "DMirValidation.FuzzesDoubleNotRewrite" + ] + } + }, + { + "name": "sub-zero", + "status": "accepted", + "inputs": [ + "x" + ], + "lhs": "(sub x 0:i64)", + "rhs": "x", + "cost": { + "lhs": { + "dmir_inst": 1, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + }, + "rhs": { + "dmir_inst": 0, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + }, + "delta": { + "dmir_inst": -1, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + } + }, + "validation": { + "modes": [ + "interpreter_fuzz" + ], + "coverage": [ + "DMirValidation.FuzzesSubZeroRewrite" + ] + } + }, + { + "name": "and-zero", + "status": "accepted", + "inputs": [ + "x" + ], + "lhs": "(and x 0:i64)", + "rhs": "0:i64", + "cost": { + "lhs": { + "dmir_inst": 1, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + }, + "rhs": { + "dmir_inst": 0, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + }, + "delta": { + "dmir_inst": -1, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + } + }, + "validation": { + "modes": [ + "interpreter_fuzz" + ], + "coverage": [ + "DMirValidation.FuzzesAndZeroRewrite" + ] + } + }, + { + "name": "and-allones", + "status": "accepted", + "inputs": [ + "x" + ], + "lhs": "(and x 18446744073709551615:i64)", + "rhs": "x", + "cost": { + "lhs": { + "dmir_inst": 1, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + }, + "rhs": { + "dmir_inst": 0, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + }, + "delta": { + "dmir_inst": -1, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + } + }, + "validation": { + "modes": [ + "interpreter_fuzz" + ], + "coverage": [ + "DMirValidation.FuzzesAndAllOnesRewrite" + ] + } + }, + { + "name": "and-self", + "status": "accepted", + "inputs": [ + "x" + ], + "lhs": "(and x x)", + "rhs": "x", + "cost": { + "lhs": { + "dmir_inst": 1, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + }, + "rhs": { + "dmir_inst": 0, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + }, + "delta": { + "dmir_inst": -1, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + } + }, + "validation": { + "modes": [ + "interpreter_fuzz" + ], + "coverage": [ + "DMirValidation.FuzzesAndSelfRewrite" + ] + } + }, + { + "name": "and-not-self", + "status": "accepted", + "inputs": [ + "x" + ], + "lhs": "(and (not x) x)", + "rhs": "0:i64", + "cost": { + "lhs": { + "dmir_inst": 2, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + }, + "rhs": { + "dmir_inst": 0, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + }, + "delta": { + "dmir_inst": -2, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + } + }, + "validation": { + "modes": [ + "interpreter_fuzz" + ], + "coverage": [ + "DMirValidation.FuzzesAndNotSelfRewrite" + ] + } + }, + { + "name": "or-zero", + "status": "accepted", + "inputs": [ + "x" + ], + "lhs": "(or x 0:i64)", + "rhs": "x", + "cost": { + "lhs": { + "dmir_inst": 1, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + }, + "rhs": { + "dmir_inst": 0, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + }, + "delta": { + "dmir_inst": -1, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + } + }, + "validation": { + "modes": [ + "interpreter_fuzz" + ], + "coverage": [ + "DMirValidation.FuzzesOrZeroRewrite" + ] + } + }, + { + "name": "or-allones", + "status": "accepted", + "inputs": [ + "x" + ], + "lhs": "(or x 18446744073709551615:i64)", + "rhs": "18446744073709551615:i64", + "cost": { + "lhs": { + "dmir_inst": 1, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + }, + "rhs": { + "dmir_inst": 0, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + }, + "delta": { + "dmir_inst": -1, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + } + }, + "validation": { + "modes": [ + "interpreter_fuzz" + ], + "coverage": [ + "DMirValidation.FuzzesOrAllOnesRewrite" + ] + } + }, + { + "name": "or-self", + "status": "accepted", + "inputs": [ + "x" + ], + "lhs": "(or x x)", + "rhs": "x", + "cost": { + "lhs": { + "dmir_inst": 1, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + }, + "rhs": { + "dmir_inst": 0, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + }, + "delta": { + "dmir_inst": -1, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + } + }, + "validation": { + "modes": [ + "interpreter_fuzz" + ], + "coverage": [ + "DMirValidation.FuzzesOrSelfRewrite" + ] + } + }, + { + "name": "and-absorb-or", + "status": "accepted", + "inputs": [ + "x", + "y" + ], + "lhs": "(and (or x y) x)", + "rhs": "x", + "cost": { + "lhs": { + "dmir_inst": 2, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + }, + "rhs": { + "dmir_inst": 0, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + }, + "delta": { + "dmir_inst": -2, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + } + }, + "validation": { + "modes": [ + "interpreter_fuzz" + ], + "coverage": [ + "DMirValidation.FuzzesAndAbsorbOrRewrite" + ] + } + }, + { + "name": "and-factor-not-self", + "status": "accepted", + "inputs": [ + "x", + "y" + ], + "lhs": "(and (and x y) (not x))", + "rhs": "0:i64", + "cost": { + "lhs": { + "dmir_inst": 3, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + }, + "rhs": { + "dmir_inst": 0, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + }, + "delta": { + "dmir_inst": -3, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + } + }, + "validation": { + "modes": [ + "interpreter_fuzz" + ], + "coverage": [ + "DMirValidation.FuzzesAndFactorNotSelfRewrite" + ] + } + }, + { + "name": "and-factor-or", + "status": "accepted", + "inputs": [ + "x", + "y" + ], + "lhs": "(and (and x y) (or x y))", + "rhs": "(and x y)", + "cost": { + "lhs": { + "dmir_inst": 3, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + }, + "rhs": { + "dmir_inst": 1, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + }, + "delta": { + "dmir_inst": -2, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + } + }, + "validation": { + "modes": [ + "interpreter_fuzz" + ], + "coverage": [ + "DMirValidation.FuzzesAndFactorOrRewrite" + ] + } + }, + { + "name": "and-factor-lhs", + "status": "accepted", + "inputs": [ + "x", + "y" + ], + "lhs": "(and (and x y) x)", + "rhs": "(and x y)", + "cost": { + "lhs": { + "dmir_inst": 2, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + }, + "rhs": { + "dmir_inst": 1, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + }, + "delta": { + "dmir_inst": -1, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + } + }, + "validation": { + "modes": [ + "interpreter_fuzz" + ], + "coverage": [ + "DMirValidation.FuzzesAndFactorLhsRewrite" + ] + } + }, + { + "name": "and-factor-rhs", + "status": "accepted", + "inputs": [ + "x", + "y" + ], + "lhs": "(and (and x y) y)", + "rhs": "(and x y)", + "cost": { + "lhs": { + "dmir_inst": 2, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + }, + "rhs": { + "dmir_inst": 1, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + }, + "delta": { + "dmir_inst": -1, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + } + }, + "validation": { + "modes": [ + "interpreter_fuzz" + ], + "coverage": [ + "DMirValidation.FuzzesAndFactorRhsRewrite" + ] + } + }, + { + "name": "and-factor-not-rhs", + "status": "accepted", + "inputs": [ + "x", + "y" + ], + "lhs": "(and (and x y) (not y))", + "rhs": "0:i64", + "cost": { + "lhs": { + "dmir_inst": 3, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + }, + "rhs": { + "dmir_inst": 0, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + }, + "delta": { + "dmir_inst": -3, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + } + }, + "validation": { + "modes": [ + "interpreter_fuzz" + ], + "coverage": [ + "DMirValidation.FuzzesAndFactorNotRhsRewrite" + ] + } + }, + { + "name": "and-and-xor-zero", + "status": "accepted", + "inputs": [ + "x", + "y" + ], + "lhs": "(and (and x y) (xor x y))", + "rhs": "0:i64", + "cost": { + "lhs": { + "dmir_inst": 3, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + }, + "rhs": { + "dmir_inst": 0, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + }, + "delta": { + "dmir_inst": -3, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + } + }, + "validation": { + "modes": [ + "interpreter_fuzz" + ], + "coverage": [ + "DMirValidation.FuzzesAndAndXorZeroRewrite" + ] + } + }, + { + "name": "and-not-or", + "status": "accepted", + "inputs": [ + "x", + "y" + ], + "lhs": "(and (not x) (or x y))", + "rhs": "(and (not x) y)", + "cost": { + "lhs": { + "dmir_inst": 3, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + }, + "rhs": { + "dmir_inst": 2, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + }, + "delta": { + "dmir_inst": -1, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + } + }, + "validation": { + "modes": [ + "interpreter_fuzz" + ], + "coverage": [ + "DMirValidation.FuzzesAndNotOrRewrite" + ] + } + }, + { + "name": "and-not-xor", + "status": "accepted", + "inputs": [ + "x", + "y" + ], + "lhs": "(and (not x) (xor x y))", + "rhs": "(and (not x) y)", + "cost": { + "lhs": { + "dmir_inst": 3, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + }, + "rhs": { + "dmir_inst": 2, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + }, + "delta": { + "dmir_inst": -1, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + } + }, + "validation": { + "modes": [ + "interpreter_fuzz" + ], + "coverage": [ + "DMirValidation.FuzzesAndNotXorRewrite" + ] + } + }, + { + "name": "and-or-xor", + "status": "accepted", + "inputs": [ + "x", + "y" + ], + "lhs": "(and (or x y) (xor x y))", + "rhs": "(xor x y)", + "cost": { + "lhs": { + "dmir_inst": 3, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + }, + "rhs": { + "dmir_inst": 1, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + }, + "delta": { + "dmir_inst": -2, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + } + }, + "validation": { + "modes": [ + "interpreter_fuzz" + ], + "coverage": [ + "DMirValidation.FuzzesAndOrXorRewrite" + ] + } + }, + { + "name": "and-or-rhs", + "status": "accepted", + "inputs": [ + "x", + "y" + ], + "lhs": "(and (or x y) y)", + "rhs": "y", + "cost": { + "lhs": { + "dmir_inst": 2, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + }, + "rhs": { + "dmir_inst": 0, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + }, + "delta": { + "dmir_inst": -2, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + } + }, + "validation": { + "modes": [ + "interpreter_fuzz" + ], + "coverage": [ + "DMirValidation.FuzzesAndOrRhsRewrite" + ] + } + }, + { + "name": "or-absorb-and", + "status": "accepted", + "inputs": [ + "x", + "y" + ], + "lhs": "(or (and x y) x)", + "rhs": "x", + "cost": { + "lhs": { + "dmir_inst": 2, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + }, + "rhs": { + "dmir_inst": 0, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + }, + "delta": { + "dmir_inst": -2, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + } + }, + "validation": { + "modes": [ + "interpreter_fuzz" + ], + "coverage": [ + "DMirValidation.FuzzesOrAbsorbAndRewrite" + ] + } + }, + { + "name": "or-and-or", + "status": "accepted", + "inputs": [ + "x", + "y" + ], + "lhs": "(or (and x y) (or x y))", + "rhs": "(or x y)", + "cost": { + "lhs": { + "dmir_inst": 3, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + }, + "rhs": { + "dmir_inst": 1, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + }, + "delta": { + "dmir_inst": -2, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + } + }, + "validation": { + "modes": [ + "interpreter_fuzz" + ], + "coverage": [ + "DMirValidation.FuzzesOrAndOrRewrite" + ] + } + }, + { + "name": "or-and-rhs", + "status": "accepted", + "inputs": [ + "x", + "y" + ], + "lhs": "(or (and x y) y)", + "rhs": "y", + "cost": { + "lhs": { + "dmir_inst": 2, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + }, + "rhs": { + "dmir_inst": 0, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + }, + "delta": { + "dmir_inst": -2, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + } + }, + "validation": { + "modes": [ + "interpreter_fuzz" + ], + "coverage": [ + "DMirValidation.FuzzesOrAndRhsRewrite" + ] + } + }, + { + "name": "or-and-xor", + "status": "accepted", + "inputs": [ + "x", + "y" + ], + "lhs": "(or (and x y) (xor x y))", + "rhs": "(or x y)", + "cost": { + "lhs": { + "dmir_inst": 3, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + }, + "rhs": { + "dmir_inst": 1, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + }, + "delta": { + "dmir_inst": -2, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + } + }, + "validation": { + "modes": [ + "interpreter_fuzz" + ], + "coverage": [ + "DMirValidation.FuzzesOrAndXorRewrite" + ] + } + }, + { + "name": "or-factor-lhs", + "status": "accepted", + "inputs": [ + "x", + "y" + ], + "lhs": "(or (or x y) x)", + "rhs": "(or x y)", + "cost": { + "lhs": { + "dmir_inst": 2, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + }, + "rhs": { + "dmir_inst": 1, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + }, + "delta": { + "dmir_inst": -1, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + } + }, + "validation": { + "modes": [ + "interpreter_fuzz" + ], + "coverage": [ + "DMirValidation.FuzzesOrFactorLhsRewrite" + ] + } + }, + { + "name": "or-factor-rhs", + "status": "accepted", + "inputs": [ + "x", + "y" + ], + "lhs": "(or (or x y) y)", + "rhs": "(or x y)", + "cost": { + "lhs": { + "dmir_inst": 2, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + }, + "rhs": { + "dmir_inst": 1, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + }, + "delta": { + "dmir_inst": -1, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + } + }, + "validation": { + "modes": [ + "interpreter_fuzz" + ], + "coverage": [ + "DMirValidation.FuzzesOrFactorRhsRewrite" + ] + } + }, + { + "name": "or-xor-lhs", + "status": "accepted", + "inputs": [ + "x", + "y" + ], + "lhs": "(or (xor x y) x)", + "rhs": "(or x y)", + "cost": { + "lhs": { + "dmir_inst": 2, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + }, + "rhs": { + "dmir_inst": 1, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + }, + "delta": { + "dmir_inst": -1, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + } + }, + "validation": { + "modes": [ + "interpreter_fuzz" + ], + "coverage": [ + "DMirValidation.FuzzesOrXorLhsRewrite" + ] + } + }, + { + "name": "or-xor-rhs", + "status": "accepted", + "inputs": [ + "x", + "y" + ], + "lhs": "(or (xor x y) y)", + "rhs": "(or x y)", + "cost": { + "lhs": { + "dmir_inst": 2, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + }, + "rhs": { + "dmir_inst": 1, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + }, + "delta": { + "dmir_inst": -1, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + } + }, + "validation": { + "modes": [ + "interpreter_fuzz" + ], + "coverage": [ + "DMirValidation.FuzzesOrXorRhsRewrite" + ] + } + }, + { + "name": "or-not-self", + "status": "accepted", + "inputs": [ + "x" + ], + "lhs": "(or (not x) x)", + "rhs": "18446744073709551615:i64", + "cost": { + "lhs": { + "dmir_inst": 2, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + }, + "rhs": { + "dmir_inst": 0, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + }, + "delta": { + "dmir_inst": -2, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + } + }, + "validation": { + "modes": [ + "interpreter_fuzz" + ], + "coverage": [ + "DMirValidation.FuzzesOrNotSelfRewrite" + ] + } + }, + { + "name": "or-and-not-lhs", + "status": "accepted", + "inputs": [ + "x", + "y" + ], + "lhs": "(or (and x y) (not x))", + "rhs": "(or (not x) y)", + "cost": { + "lhs": { + "dmir_inst": 3, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + }, + "rhs": { + "dmir_inst": 2, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + }, + "delta": { + "dmir_inst": -1, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + } + }, + "validation": { + "modes": [ + "interpreter_fuzz" + ], + "coverage": [ + "DMirValidation.FuzzesOrAndNotLhsRewrite" + ] + } + }, + { + "name": "or-and-not-rhs", + "status": "accepted", + "inputs": [ + "x", + "y" + ], + "lhs": "(or (and x y) (not y))", + "rhs": "(or (not y) x)", + "cost": { + "lhs": { + "dmir_inst": 3, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + }, + "rhs": { + "dmir_inst": 2, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + }, + "delta": { + "dmir_inst": -1, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + } + }, + "validation": { + "modes": [ + "interpreter_fuzz" + ], + "coverage": [ + "DMirValidation.FuzzesOrAndNotRhsRewrite" + ] + } + }, + { + "name": "or-or-xor", + "status": "accepted", + "inputs": [ + "x", + "y" + ], + "lhs": "(or (or x y) (xor x y))", + "rhs": "(or x y)", + "cost": { + "lhs": { + "dmir_inst": 3, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + }, + "rhs": { + "dmir_inst": 1, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + }, + "delta": { + "dmir_inst": -2, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + } + }, + "validation": { + "modes": [ + "interpreter_fuzz" + ], + "coverage": [ + "DMirValidation.FuzzesOrOrXorRewrite" + ] + } + }, + { + "name": "or-not-or", + "status": "accepted", + "inputs": [ + "x", + "y" + ], + "lhs": "(or (not x) (or x y))", + "rhs": "18446744073709551615:i64", + "cost": { + "lhs": { + "dmir_inst": 3, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + }, + "rhs": { + "dmir_inst": 0, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + }, + "delta": { + "dmir_inst": -3, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + } + }, + "validation": { + "modes": [ + "interpreter_fuzz" + ], + "coverage": [ + "DMirValidation.FuzzesOrNotOrRewrite" + ] + } + }, + { + "name": "select-same-arm", + "status": "accepted", + "inputs": [ + "cond", + "x" + ], + "lhs": "(select cond x x)", + "rhs": "x", + "cost": { + "lhs": { + "dmir_inst": 1, + "select_depth": 1, + "adc_chain": 0, + "runtime_calls": 0 + }, + "rhs": { + "dmir_inst": 0, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + }, + "delta": { + "dmir_inst": -1, + "select_depth": -1, + "adc_chain": 0, + "runtime_calls": 0 + } + }, + "validation": { + "modes": [ + "interpreter_fuzz" + ], + "coverage": [ + "DMirValidation.FuzzesSelectSameArmRewrite", + "DMirValidation.FuzzesSelectSameArmRewriteI8", + "DMirValidation.FuzzesSelectSameArmRewriteI32" + ] + } + }, + { + "name": "select-false-cond", + "status": "accepted", + "inputs": [ + "t", + "f" + ], + "lhs": "(select 0:i64 t f)", + "rhs": "f", + "cost": { + "lhs": { + "dmir_inst": 1, + "select_depth": 1, + "adc_chain": 0, + "runtime_calls": 0 + }, + "rhs": { + "dmir_inst": 0, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + }, + "delta": { + "dmir_inst": -1, + "select_depth": -1, + "adc_chain": 0, + "runtime_calls": 0 + } + }, + "validation": { + "modes": [ + "interpreter_fuzz" + ], + "coverage": [ + "DMirValidation.FuzzesSelectFalseCondRewrite" + ] + } + }, + { + "name": "select-true-cond", + "status": "accepted", + "inputs": [ + "t", + "f" + ], + "lhs": "(select 1:i64 t f)", + "rhs": "t", + "cost": { + "lhs": { + "dmir_inst": 1, + "select_depth": 1, + "adc_chain": 0, + "runtime_calls": 0 + }, + "rhs": { + "dmir_inst": 0, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + }, + "delta": { + "dmir_inst": -1, + "select_depth": -1, + "adc_chain": 0, + "runtime_calls": 0 + } + }, + "validation": { + "modes": [ + "interpreter_fuzz" + ], + "coverage": [ + "DMirValidation.FuzzesSelectTrueCondRewrite" + ] + } + }, + { + "name": "mul-pow2-to-shl", + "status": "accepted", + "inputs": [ + "x" + ], + "lhs": "(mul x 2:i64)", + "rhs": "(shl x 1:i64)", + "cost": { + "lhs": { + "dmir_inst": 1, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + }, + "rhs": { + "dmir_inst": 1, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + }, + "delta": { + "dmir_inst": 0, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + } + }, + "validation": { + "modes": [ + "interpreter_fuzz" + ], + "coverage": [ + "DMirValidation.FuzzesMulPow2ToShlRewrite" + ] + } + }, + { + "name": "xor-zero", + "status": "accepted", + "inputs": [ + "x" + ], + "lhs": "(xor x 0:i64)", + "rhs": "x", + "cost": { + "lhs": { + "dmir_inst": 1, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + }, + "rhs": { + "dmir_inst": 0, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + }, + "delta": { + "dmir_inst": -1, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + } + }, + "validation": { + "modes": [ + "interpreter_fuzz" + ], + "coverage": [ + "DMirValidation.FuzzesXorZeroRewrite" + ] + } + }, + { + "name": "xor-self", + "status": "accepted", + "inputs": [ + "x" + ], + "lhs": "(xor x x)", + "rhs": "0:i64", + "cost": { + "lhs": { + "dmir_inst": 1, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + }, + "rhs": { + "dmir_inst": 0, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + }, + "delta": { + "dmir_inst": -1, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + } + }, + "validation": { + "modes": [ + "interpreter_fuzz" + ], + "coverage": [ + "DMirValidation.FuzzesXorSelfRewrite" + ] + } + }, + { + "name": "xor-cancel", + "status": "accepted", + "inputs": [ + "x", + "y" + ], + "lhs": "(xor (xor x y) x)", + "rhs": "y", + "cost": { + "lhs": { + "dmir_inst": 2, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + }, + "rhs": { + "dmir_inst": 0, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + }, + "delta": { + "dmir_inst": -2, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + } + }, + "validation": { + "modes": [ + "interpreter_fuzz" + ], + "coverage": [ + "DMirValidation.FuzzesXorCancelRewrite" + ] + } + }, + { + "name": "xor-cancel-rhs", + "status": "accepted", + "inputs": [ + "x", + "y" + ], + "lhs": "(xor (xor x y) y)", + "rhs": "x", + "cost": { + "lhs": { + "dmir_inst": 2, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + }, + "rhs": { + "dmir_inst": 0, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + }, + "delta": { + "dmir_inst": -2, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + } + }, + "validation": { + "modes": [ + "interpreter_fuzz" + ], + "coverage": [ + "DMirValidation.FuzzesXorCancelRhsRewrite" + ] + } + }, + { + "name": "xor-not-cancel", + "status": "accepted", + "inputs": [ + "x", + "y" + ], + "lhs": "(xor (not x) (xor x y))", + "rhs": "(not y)", + "cost": { + "lhs": { + "dmir_inst": 3, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + }, + "rhs": { + "dmir_inst": 1, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + }, + "delta": { + "dmir_inst": -2, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + } + }, + "validation": { + "modes": [ + "interpreter_fuzz" + ], + "coverage": [ + "DMirValidation.FuzzesXorNotCancelRewrite" + ] + } + }, + { + "name": "xor-not-self", + "status": "accepted", + "inputs": [ + "x" + ], + "lhs": "(xor (not x) x)", + "rhs": "18446744073709551615:i64", + "cost": { + "lhs": { + "dmir_inst": 2, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + }, + "rhs": { + "dmir_inst": 0, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + }, + "delta": { + "dmir_inst": -2, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + } + }, + "validation": { + "modes": [ + "interpreter_fuzz" + ], + "coverage": [ + "DMirValidation.FuzzesXorNotSelfRewrite" + ] + } + }, + { + "name": "xor-not-not", + "status": "accepted", + "inputs": [ + "x", + "y" + ], + "lhs": "(xor (not x) (not y))", + "rhs": "(xor x y)", + "cost": { + "lhs": { + "dmir_inst": 3, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + }, + "rhs": { + "dmir_inst": 1, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + }, + "delta": { + "dmir_inst": -2, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + } + }, + "validation": { + "modes": [ + "interpreter_fuzz" + ], + "coverage": [ + "DMirValidation.FuzzesXorNotNotRewrite" + ] + } + }, + { + "name": "xor-not-or", + "status": "accepted", + "inputs": [ + "x", + "y" + ], + "lhs": "(xor (not x) (or x y))", + "rhs": "(or (not y) x)", + "cost": { + "lhs": { + "dmir_inst": 3, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + }, + "rhs": { + "dmir_inst": 2, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + }, + "delta": { + "dmir_inst": -1, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + } + }, + "validation": { + "modes": [ + "interpreter_fuzz" + ], + "coverage": [ + "DMirValidation.FuzzesXorNotOrRewrite" + ] + } + }, + { + "name": "xor-not-allones", + "status": "accepted", + "inputs": [ + "x" + ], + "lhs": "(xor (not x) 18446744073709551615:i64)", + "rhs": "x", + "cost": { + "lhs": { + "dmir_inst": 2, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + }, + "rhs": { + "dmir_inst": 0, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + }, + "delta": { + "dmir_inst": -2, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + } + }, + "validation": { + "modes": [ + "interpreter_fuzz" + ], + "coverage": [ + "DMirValidation.FuzzesXorNotAllOnesRewrite" + ] + } + }, + { + "name": "xor-and-or", + "status": "accepted", + "inputs": [ + "x", + "y" + ], + "lhs": "(xor (and x y) (or x y))", + "rhs": "(xor x y)", + "cost": { + "lhs": { + "dmir_inst": 3, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + }, + "rhs": { + "dmir_inst": 1, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + }, + "delta": { + "dmir_inst": -2, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + } + }, + "validation": { + "modes": [ + "interpreter_fuzz" + ], + "coverage": [ + "DMirValidation.FuzzesXorAndOrRewrite" + ] + } + }, + { + "name": "xor-and-not-lhs", + "status": "accepted", + "inputs": [ + "x", + "y" + ], + "lhs": "(xor (and x y) (not x))", + "rhs": "(or (not x) y)", + "cost": { + "lhs": { + "dmir_inst": 3, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + }, + "rhs": { + "dmir_inst": 2, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + }, + "delta": { + "dmir_inst": -1, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + } + }, + "validation": { + "modes": [ + "interpreter_fuzz" + ], + "coverage": [ + "DMirValidation.FuzzesXorAndNotLhsRewrite" + ] + } + }, + { + "name": "xor-and-not-rhs", + "status": "accepted", + "inputs": [ + "x", + "y" + ], + "lhs": "(xor (and x y) (not y))", + "rhs": "(or (not y) x)", + "cost": { + "lhs": { + "dmir_inst": 3, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + }, + "rhs": { + "dmir_inst": 2, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + }, + "delta": { + "dmir_inst": -1, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + } + }, + "validation": { + "modes": [ + "interpreter_fuzz" + ], + "coverage": [ + "DMirValidation.FuzzesXorAndNotRhsRewrite" + ] + } + }, + { + "name": "xor-and-xor", + "status": "accepted", + "inputs": [ + "x", + "y" + ], + "lhs": "(xor (and x y) (xor x y))", + "rhs": "(or x y)", + "cost": { + "lhs": { + "dmir_inst": 3, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + }, + "rhs": { + "dmir_inst": 1, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + }, + "delta": { + "dmir_inst": -2, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + } + }, + "validation": { + "modes": [ + "interpreter_fuzz" + ], + "coverage": [ + "DMirValidation.FuzzesXorAndXorRewrite" + ] + } + }, + { + "name": "xor-or-xor", + "status": "accepted", + "inputs": [ + "x", + "y" + ], + "lhs": "(xor (or x y) (xor x y))", + "rhs": "(and x y)", + "cost": { + "lhs": { + "dmir_inst": 3, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + }, + "rhs": { + "dmir_inst": 1, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + }, + "delta": { + "dmir_inst": -2, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + } + }, + "validation": { + "modes": [ + "interpreter_fuzz" + ], + "coverage": [ + "DMirValidation.FuzzesXorOrXorRewrite" + ] + } + }, + { + "name": "sub-self", + "status": "accepted", + "inputs": [ + "x" + ], + "lhs": "(sub x x)", + "rhs": "0:i64", + "cost": { + "lhs": { + "dmir_inst": 1, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + }, + "rhs": { + "dmir_inst": 0, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + }, + "delta": { + "dmir_inst": -1, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + } + }, + "validation": { + "modes": [ + "interpreter_fuzz" + ], + "coverage": [ + "DMirValidation.FuzzesSubSelfRewrite" + ] + } + }, + { + "name": "shl-zero", + "status": "accepted", + "inputs": [ + "x" + ], + "lhs": "(shl x 0:i64)", + "rhs": "x", + "cost": { + "lhs": { + "dmir_inst": 1, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + }, + "rhs": { + "dmir_inst": 0, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + }, + "delta": { + "dmir_inst": -1, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + } + }, + "validation": { + "modes": [ + "interpreter_fuzz" + ], + "coverage": [ + "DMirValidation.FuzzesShlZeroRewrite" + ] + } + }, + { + "name": "sshr-zero", + "status": "accepted", + "inputs": [ + "x" + ], + "lhs": "(sshr x 0:i64)", + "rhs": "x", + "cost": { + "lhs": { + "dmir_inst": 1, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + }, + "rhs": { + "dmir_inst": 0, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + }, + "delta": { + "dmir_inst": -1, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + } + }, + "validation": { + "modes": [ + "interpreter_fuzz" + ], + "coverage": [ + "DMirValidation.FuzzesSshrZeroRewrite" + ] + } + }, + { + "name": "ushr-zero", + "status": "accepted", + "inputs": [ + "x" + ], + "lhs": "(ushr x 0:i64)", + "rhs": "x", + "cost": { + "lhs": { + "dmir_inst": 1, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + }, + "rhs": { + "dmir_inst": 0, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + }, + "delta": { + "dmir_inst": -1, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + } + }, + "validation": { + "modes": [ + "interpreter_fuzz" + ], + "coverage": [ + "DMirValidation.FuzzesUshrZeroRewrite" + ] + } + }, + { + "name": "adc-zero-carry", + "status": "seed", + "inputs": [ + "lhs", + "rhs" + ], + "lhs": "(adc lhs rhs 0:i64)", + "rhs": "(add lhs rhs)", + "cost": { + "lhs": { + "dmir_inst": 1, + "select_depth": 0, + "adc_chain": 1, + "runtime_calls": 0 + }, + "rhs": { + "dmir_inst": 1, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + }, + "delta": { + "dmir_inst": 0, + "select_depth": 0, + "adc_chain": -1, + "runtime_calls": 0 + } + }, + "validation": { + "modes": [ + "interpreter_fuzz" + ], + "coverage": [ + "DMirValidation.FuzzesAdcWithoutCarryRewrite" + ] + } + }, + { + "name": "adc-zero-operands", + "status": "seed", + "inputs": [ + "x" + ], + "lhs": "(adc x 0:i64 0:i64)", + "rhs": "x", + "cost": { + "lhs": { + "dmir_inst": 1, + "select_depth": 0, + "adc_chain": 1, + "runtime_calls": 0 + }, + "rhs": { + "dmir_inst": 0, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + }, + "delta": { + "dmir_inst": -1, + "select_depth": 0, + "adc_chain": -1, + "runtime_calls": 0 + } + }, + "validation": { + "modes": [ + "interpreter_fuzz" + ], + "coverage": [ + "DMirValidation.FuzzesAdcZeroOperandsRewrite" + ] + } + }, + { + "name": "sbb-zero-borrow", + "status": "seed", + "inputs": [ + "lhs", + "rhs" + ], + "lhs": "(sbb lhs rhs 0:i64)", + "rhs": "(sub lhs rhs)", + "cost": { + "lhs": { + "dmir_inst": 1, + "select_depth": 0, + "adc_chain": 1, + "runtime_calls": 0 + }, + "rhs": { + "dmir_inst": 1, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + }, + "delta": { + "dmir_inst": 0, + "select_depth": 0, + "adc_chain": -1, + "runtime_calls": 0 + } + }, + "validation": { + "modes": [ + "interpreter_fuzz" + ], + "coverage": [ + "DMirValidation.FuzzesSbbWithoutBorrowRewrite" + ] + } + }, + { + "name": "sbb-zero-operands", + "status": "seed", + "inputs": [ + "x" + ], + "lhs": "(sbb x 0:i64 0:i64)", + "rhs": "x", + "cost": { + "lhs": { + "dmir_inst": 1, + "select_depth": 0, + "adc_chain": 1, + "runtime_calls": 0 + }, + "rhs": { + "dmir_inst": 0, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + }, + "delta": { + "dmir_inst": -1, + "select_depth": 0, + "adc_chain": -1, + "runtime_calls": 0 + } + }, + "validation": { + "modes": [ + "interpreter_fuzz" + ], + "coverage": [ + "DMirValidation.FuzzesSbbZeroOperandsRewrite" + ] + } + }, + { + "name": "sbb-self-zero-borrow", + "status": "seed", + "inputs": [ + "x" + ], + "lhs": "(sbb x x 0:i64)", + "rhs": "0:i64", + "cost": { + "lhs": { + "dmir_inst": 1, + "select_depth": 0, + "adc_chain": 1, + "runtime_calls": 0 + }, + "rhs": { + "dmir_inst": 0, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + }, + "delta": { + "dmir_inst": -1, + "select_depth": 0, + "adc_chain": -1, + "runtime_calls": 0 + } + }, + "validation": { + "modes": [ + "interpreter_fuzz" + ], + "coverage": [ + "DMirValidation.FuzzesSbbSelfWithoutBorrowRewrite" + ] + } + }, + { + "name": "mul-zero-rhs", + "status": "accepted", + "inputs": [ + "x" + ], + "lhs": "(mul x 0:i64)", + "rhs": "0:i64", + "cost": { + "lhs": { + "dmir_inst": 1, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + }, + "rhs": { + "dmir_inst": 0, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + }, + "delta": { + "dmir_inst": -1, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + } + }, + "validation": { + "modes": [ + "interpreter_fuzz" + ], + "coverage": [ + "DMirValidation.FuzzesMulZeroRewrite" + ] + } + }, + { + "name": "mul-one-rhs", + "status": "accepted", + "inputs": [ + "x" + ], + "lhs": "(mul x 1:i64)", + "rhs": "x", + "cost": { + "lhs": { + "dmir_inst": 1, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + }, + "rhs": { + "dmir_inst": 0, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + }, + "delta": { + "dmir_inst": -1, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + } + }, + "validation": { + "modes": [ + "interpreter_fuzz" + ], + "coverage": [ + "DMirValidation.FuzzesMulOneRewrite" + ] + } + }, + { + "name": "add-self-to-shl1", + "status": "accepted", + "inputs": [ + "x" + ], + "lhs": "(add x x)", + "rhs": "(shl x 1:i64)", + "cost": { + "lhs": { + "dmir_inst": 1, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + }, + "rhs": { + "dmir_inst": 1, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + }, + "delta": { + "dmir_inst": 0, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + } + }, + "validation": { + "modes": [ + "interpreter_fuzz" + ], + "coverage": [ + "DMirValidation.FuzzesAddSelfToShl1Rewrite" + ] + } + }, + { + "name": "add-neg-x-y-to-sub-y-x", + "status": "accepted", + "inputs": [ + "x", + "y" + ], + "lhs": "(add (sub 0:i64 x) y)", + "rhs": "(sub y x)", + "cost": { + "lhs": { + "dmir_inst": 2, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + }, + "rhs": { + "dmir_inst": 1, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + }, + "delta": { + "dmir_inst": -1, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + } + }, + "validation": { + "modes": [ + "interpreter_fuzz" + ], + "coverage": [ + "DMirValidation.FuzzesAddNegToSubRewrite" + ] + } + }, + { + "name": "add-y-neg-x-to-sub-y-x", + "status": "accepted", + "inputs": [ + "x", + "y" + ], + "lhs": "(add y (sub 0:i64 x))", + "rhs": "(sub y x)", + "cost": { + "lhs": { + "dmir_inst": 2, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + }, + "rhs": { + "dmir_inst": 1, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + }, + "delta": { + "dmir_inst": -1, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + } + }, + "validation": { + "modes": [ + "interpreter_fuzz" + ], + "coverage": [ + "DMirValidation.FuzzesAddNegToSubRewrite" + ] + } + }, + { + "name": "add-and-xor-to-or", + "status": "accepted", + "inputs": [ + "x", + "y" + ], + "lhs": "(add (and x y) (xor x y))", + "rhs": "(or x y)", + "cost": { + "lhs": { + "dmir_inst": 2, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + }, + "rhs": { + "dmir_inst": 1, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + }, + "delta": { + "dmir_inst": -1, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + } + }, + "validation": { + "modes": [ + "interpreter_fuzz" + ], + "coverage": [ + "DMirValidation.FuzzesAddAndXorToOrRewrite" + ] + } + }, + { + "name": "add-and-or-to-add", + "status": "accepted", + "inputs": [ + "x", + "y" + ], + "lhs": "(add (and x y) (or x y))", + "rhs": "(add x y)", + "cost": { + "lhs": { + "dmir_inst": 2, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + }, + "rhs": { + "dmir_inst": 1, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + }, + "delta": { + "dmir_inst": -1, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + } + }, + "validation": { + "modes": [ + "interpreter_fuzz" + ], + "coverage": [ + "DMirValidation.FuzzesAddAndOrToAddRewrite" + ] + } + }, + { + "name": "sub-and-or-to-neg-xor", + "status": "accepted", + "inputs": [ + "x", + "y" + ], + "lhs": "(sub (and x y) (or x y))", + "rhs": "(sub 0:i64 (xor x y))", + "cost": { + "lhs": { + "dmir_inst": 2, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + }, + "rhs": { + "dmir_inst": 2, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + }, + "delta": { + "dmir_inst": 0, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + } + }, + "validation": { + "modes": [ + "interpreter_fuzz" + ], + "coverage": [ + "DMirValidation.FuzzesSubAndOrToNegXorRewrite" + ] + } + }, + { + "name": "sub-or-and-to-xor", + "status": "accepted", + "inputs": [ + "x", + "y" + ], + "lhs": "(sub (or x y) (and x y))", + "rhs": "(xor x y)", + "cost": { + "lhs": { + "dmir_inst": 2, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + }, + "rhs": { + "dmir_inst": 1, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + }, + "delta": { + "dmir_inst": -1, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0 + } + }, + "validation": { + "modes": [ + "interpreter_fuzz" + ], + "coverage": [ + "DMirValidation.FuzzesSubOrAndToXorRewrite" + ] + } + } + ] +} diff --git a/src/compiler/mir/instruction.h b/src/compiler/mir/instruction.h index c01198741..083a0a93a 100644 --- a/src/compiler/mir/instruction.h +++ b/src/compiler/mir/instruction.h @@ -34,6 +34,10 @@ class MInstruction : public NonCopyable { EVM_UMUL128_HI, EVM_U256_MUL, EVM_U256_MUL_RESULT, + EVM_U256_ADD, + EVM_U256_ADD_RESULT, + EVM_U256_SUB, + EVM_U256_SUB_RESULT, EVM_UDIV128_BY64, EVM_UREM128_BY64, diff --git a/src/compiler/mir/instructions.cpp b/src/compiler/mir/instructions.cpp index a8749b500..c6e37cf25 100644 --- a/src/compiler/mir/instructions.cpp +++ b/src/compiler/mir/instructions.cpp @@ -224,7 +224,9 @@ void MInstruction::print(llvm::raw_ostream &OS) const { OS << getOpcodeString(_opcode) << " (" << getOperand<0>() << ')'; break; } - case EVM_U256_MUL: { + case EVM_U256_MUL: + case EVM_U256_ADD: + case EVM_U256_SUB: { OS << getOpcodeString(_opcode) << " ("; for (OperandNum I = 0; I < getNumOperands(); ++I) { OS << getOperand(I); @@ -241,6 +243,18 @@ void MInstruction::print(llvm::raw_ostream &OS) const { << ", idx = " << MulResult->getResultIdx() << ')'; break; } + case EVM_U256_ADD_RESULT: { + auto *AddResult = llvm::cast(this); + OS << getOpcodeString(_opcode) << " (" << AddResult->getAddInst() + << ", idx = " << AddResult->getResultIdx() << ')'; + break; + } + case EVM_U256_SUB_RESULT: { + auto *SubResult = llvm::cast(this); + OS << getOpcodeString(_opcode) << " (" << SubResult->getSubInst() + << ", idx = " << SubResult->getResultIdx() << ')'; + break; + } case EVM_UDIV128_BY64: { OS << getOpcodeString(_opcode) << " (" << getOperand<0>() << ", " << getOperand<1>() << ", " << getOperand<2>() << ')'; diff --git a/src/compiler/mir/instructions.h b/src/compiler/mir/instructions.h index 3891d2a91..2d9d1cb53 100644 --- a/src/compiler/mir/instructions.h +++ b/src/compiler/mir/instructions.h @@ -49,6 +49,10 @@ class BinaryInstruction : public FixedOperandInstruction<2> { std::forward(args)...); } + static bool classof(const MInstruction *Inst) { + return Inst->getKind() == MInstruction::BINARY; + } + protected: // Used for subclass BinaryInstruction(Kind kind, Opcode opcode, MType *type, MInstruction *lhs, @@ -860,6 +864,118 @@ class EvmU256MulResultInstruction : public UnaryInstruction { uint32_t ResultIdx = 0; }; +class EvmU256AddInstruction : public FixedOperandInstruction<8> { +public: + template + static EvmU256AddInstruction *create(Arguments &&...Args) { + return FixedOperandInstruction::create( + std::forward(Args)...); + } + + static bool classof(const MInstruction *Instr) { + return Instr->getKind() == EVM_U256_ADD; + } + +private: + friend class FixedOperandInstruction; + EvmU256AddInstruction(MType *Type, MInstruction *A0, MInstruction *A1, + MInstruction *A2, MInstruction *A3, MInstruction *B0, + MInstruction *B1, MInstruction *B2, MInstruction *B3) + : FixedOperandInstruction(MInstruction::EVM_U256_ADD, OP_evm_u256_add, 8, + Type) { + setOperand<0>(A0); + setOperand<1>(A1); + setOperand<2>(A2); + setOperand<3>(A3); + setOperand<4>(B0); + setOperand<5>(B1); + setOperand<6>(B2); + setOperand<7>(B3); + } +}; + +class EvmU256AddResultInstruction : public UnaryInstruction { +public: + template + static EvmU256AddResultInstruction *create(Arguments &&...Args) { + return FixedOperandInstruction::create( + std::forward(Args)...); + } + + static bool classof(const MInstruction *Instr) { + return Instr->getKind() == EVM_U256_ADD_RESULT; + } + + const MInstruction *getAddInst() const { return getOperand<0>(); } + uint32_t getResultIdx() const { return ResultIdx; } + +private: + friend class FixedOperandInstruction; + EvmU256AddResultInstruction(MType *Type, MInstruction *AddInst, + uint32_t ResultIdx) + : UnaryInstruction(MInstruction::EVM_U256_ADD_RESULT, + OP_evm_u256_add_result, Type, AddInst), + ResultIdx(ResultIdx) {} + + uint32_t ResultIdx = 0; +}; + +class EvmU256SubInstruction : public FixedOperandInstruction<8> { +public: + template + static EvmU256SubInstruction *create(Arguments &&...Args) { + return FixedOperandInstruction::create( + std::forward(Args)...); + } + + static bool classof(const MInstruction *Instr) { + return Instr->getKind() == EVM_U256_SUB; + } + +private: + friend class FixedOperandInstruction; + EvmU256SubInstruction(MType *Type, MInstruction *A0, MInstruction *A1, + MInstruction *A2, MInstruction *A3, MInstruction *B0, + MInstruction *B1, MInstruction *B2, MInstruction *B3) + : FixedOperandInstruction(MInstruction::EVM_U256_SUB, OP_evm_u256_sub, 8, + Type) { + setOperand<0>(A0); + setOperand<1>(A1); + setOperand<2>(A2); + setOperand<3>(A3); + setOperand<4>(B0); + setOperand<5>(B1); + setOperand<6>(B2); + setOperand<7>(B3); + } +}; + +class EvmU256SubResultInstruction : public UnaryInstruction { +public: + template + static EvmU256SubResultInstruction *create(Arguments &&...Args) { + return FixedOperandInstruction::create( + std::forward(Args)...); + } + + static bool classof(const MInstruction *Instr) { + return Instr->getKind() == EVM_U256_SUB_RESULT; + } + + const MInstruction *getSubInst() const { return getOperand<0>(); } + uint32_t getResultIdx() const { return ResultIdx; } + +private: + friend class FixedOperandInstruction; + EvmU256SubResultInstruction(MType *Type, MInstruction *SubInst, + uint32_t ResultIdx) + : UnaryInstruction(MInstruction::EVM_U256_SUB_RESULT, + OP_evm_u256_sub_result, Type, SubInst), + ResultIdx(ResultIdx) {} + + uint32_t ResultIdx = 0; +}; + // EVM 128-bit / 64-bit unsigned division: (hi:lo) / divisor -> quotient. class EvmUdiv128By64Instruction : public FixedOperandInstruction<3> { public: diff --git a/src/compiler/mir/opcodes.def b/src/compiler/mir/opcodes.def index 52851f89c..9057270d7 100644 --- a/src/compiler/mir/opcodes.def +++ b/src/compiler/mir/opcodes.def @@ -70,6 +70,10 @@ OPCODE(evm_umul128_lo) // 64x64->64 multiplication (low bits) OPCODE(evm_umul128_hi) // extract high 64 bits from evm_umul128_lo OPCODE(evm_u256_mul) // 256x256->256 multiplication pseudo op OPCODE(evm_u256_mul_result) // extract extra limb from evm_u256_mul +OPCODE(evm_u256_add) // 256+256->256 addition pseudo op +OPCODE(evm_u256_add_result) // extract extra limb from evm_u256_add +OPCODE(evm_u256_sub) // 256-256->256 subtraction pseudo op +OPCODE(evm_u256_sub_result) // extract extra limb from evm_u256_sub OPCODE(evm_udiv128_by64) // unsigned 128-bit (hi:lo) divided by 64-bit divisor -> 64-bit quotient OPCODE(evm_urem128_by64) // unsigned remainder from the same 128/64 division as evm_udiv128_by64 // OP_OTHER_EXPR_END diff --git a/src/compiler/mir/pass/dmir_rewrite.h b/src/compiler/mir/pass/dmir_rewrite.h new file mode 100644 index 000000000..660592956 --- /dev/null +++ b/src/compiler/mir/pass/dmir_rewrite.h @@ -0,0 +1,1017 @@ +// Copyright (C) 2025 the DTVM authors. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +#pragma once + +#include "compiler/mir/constants.h" +#include "compiler/mir/function.h" +#include "compiler/mir/instructions.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/Support/Casting.h" + +namespace COMPILER { + +class DMirRewritePass { +public: + bool runOnMFunction(MFunction &F) { + Func = &F; + Changed = false; + + for (MBasicBlock *BB : F) { + runOnBasicBlock(*BB); + } + +#ifdef ZEN_ENABLE_MULTIPASS_JIT_LOGGING + if (Changed) { + llvm::dbgs() << "\n########## MIR Dump After dMIR Rewrite ##########\n\n"; + F.dump(); + } +#endif + return Changed; + } + +private: + void runOnBasicBlock(MBasicBlock &BB) { + RewriteCache.clear(); + for (MInstruction *Inst : BB) { + rewriteOperands(*Inst, BB); + } + } + + void rewriteOperands(MInstruction &Inst, MBasicBlock &BB) { + for (uint32_t OperandIdx = 0; OperandIdx < Inst.getNumOperands(); + ++OperandIdx) { + MInstruction *Operand = Inst.getOperand(OperandIdx); + MInstruction *Rewritten = rewriteExprTree(Operand, BB); + if (Rewritten != Operand) { + Inst.setOperand(OperandIdx, Rewritten); + Changed = true; + } + } + } + + MInstruction *rewriteExprTree(MInstruction *Inst, MBasicBlock &BB, + uint32_t Depth = 0) { + if (Depth > 16) { + return Inst; + } + + auto CacheIt = RewriteCache.find(Inst); + if (CacheIt != RewriteCache.end()) { + return CacheIt->second; + } + + for (uint32_t OperandIdx = 0; OperandIdx < Inst->getNumOperands(); + ++OperandIdx) { + MInstruction *Operand = Inst->getOperand(OperandIdx); + MInstruction *Rewritten = rewriteExprTree(Operand, BB, Depth + 1); + if (Rewritten != Operand) { + Inst->setOperand(OperandIdx, Rewritten); + Changed = true; + } + } + + MInstruction *Result = Inst; + if (MInstruction *Replacement = tryRewrite(*Inst, BB)) { + if (Replacement != Inst) { + Changed = true; + Result = rewriteExprTree(Replacement, BB, Depth + 1); + } else { + Result = Replacement; + } + } + RewriteCache[Inst] = Result; + return Result; + } + + MInstruction *tryRewrite(MInstruction &Inst, MBasicBlock &BB) { + switch (Inst.getOpcode()) { + case OP_add: + return rewriteAdd(llvm::cast(Inst), BB); + case OP_sub: + return rewriteSub(llvm::cast(Inst), BB); + case OP_and: + return rewriteAnd(llvm::cast(Inst), BB); + case OP_or: + return rewriteOr(llvm::cast(Inst), BB); + case OP_xor: + return rewriteXor(llvm::cast(Inst), BB); + case OP_mul: + return rewriteMul(llvm::cast(Inst), BB); + case OP_shl: + case OP_sshr: + case OP_ushr: + return rewriteShift(llvm::cast(Inst)); + case OP_not: + return rewriteNot(llvm::cast(Inst)); + case OP_select: + return rewriteSelect(llvm::cast(Inst)); + case OP_adc: + return rewriteAdc(llvm::cast(Inst), BB); + case OP_sbb: + return rewriteSbb(llvm::cast(Inst), BB); + default: + return nullptr; + } + } + + MInstruction *rewriteAdd(BinaryInstruction &Inst, MBasicBlock &BB) { + MInstruction *LHS = Inst.getOperand<0>(); + MInstruction *RHS = Inst.getOperand<1>(); + // Fold add(x, 0) -> x only when x is itself a constant (pure constant + // folding). For non-constant x, keeping the add node preserves a natural + // register-copy point that benefits downstream register allocation; the + // i64 ADD-with-immediate lowering path is more efficient with the node + // present than extending the live range of x across all uses. + if (isZeroConst(*RHS) && isIntegerConst(*LHS)) { + return LHS; + } + if (isZeroConst(*LHS) && isIntegerConst(*RHS)) { + return RHS; + } + // (add x x) -> (shl x 1): doubling is a left shift by one + if (structurallyEqual(*LHS, *RHS)) { + return createBinaryInstruction(OP_shl, *Inst.getType(), LHS, + createOneConstant(*Inst.getType(), BB), + BB); + } + // (add (sub 0 x) y) -> (sub y x): negation folding + if (isNeg(*LHS)) { + return createBinaryInstruction(OP_sub, *Inst.getType(), RHS, + getNegOperand(*LHS), BB); + } + if (isNeg(*RHS)) { + return createBinaryInstruction(OP_sub, *Inst.getType(), LHS, + getNegOperand(*RHS), BB); + } + // (add (and x y) (xor x y)) -> (or x y) + if (const auto *AndInst = + matchBinaryOperandPair(*LHS, *RHS, OP_and, OP_xor)) { + return createBinaryInstruction(OP_or, *Inst.getType(), + AndInst->getOperand<0>(), + AndInst->getOperand<1>(), BB); + } + // (add (and x y) (or x y)) -> (add x y) + if (const auto *AndInst = + matchBinaryOperandPair(*LHS, *RHS, OP_and, OP_or)) { + return createBinaryInstruction(OP_add, *Inst.getType(), + AndInst->getOperand<0>(), + AndInst->getOperand<1>(), BB); + } + return nullptr; + } + + MInstruction *rewriteSub(BinaryInstruction &Inst, MBasicBlock &BB) { + MInstruction *LHS = Inst.getOperand<0>(); + MInstruction *RHS = Inst.getOperand<1>(); + if (isZeroConst(*RHS)) { + return LHS; + } + if (structurallyEqual(*LHS, *RHS)) { + return createZeroConstant(*Inst.getType(), BB); + } + // (sub (and x y) (or x y)) -> (sub 0 (xor x y)) + if (const auto *AndInst = + matchBinaryOperandPair(*LHS, *RHS, OP_and, OP_or)) { + MInstruction *XorInst = createBinaryInstruction( + OP_xor, *Inst.getType(), AndInst->getOperand<0>(), + AndInst->getOperand<1>(), BB); + return createBinaryInstruction(OP_sub, *Inst.getType(), + createZeroConstant(*Inst.getType(), BB), + XorInst, BB); + } + // (sub (or x y) (and x y)) -> (xor x y) + if (const auto *OrInst = + matchBinaryOperandPair(*LHS, *RHS, OP_or, OP_and)) { + return createBinaryInstruction(OP_xor, *Inst.getType(), + OrInst->getOperand<0>(), + OrInst->getOperand<1>(), BB); + } + return nullptr; + } + + MInstruction *rewriteAnd(BinaryInstruction &Inst, MBasicBlock &BB) { + MInstruction *LHS = Inst.getOperand<0>(); + MInstruction *RHS = Inst.getOperand<1>(); + if (isZeroConst(*LHS) || isZeroConst(*RHS)) { + return createZeroConstant(*Inst.getType(), BB); + } + if (isAllOnesConst(*LHS)) { + return RHS; + } + if (isAllOnesConst(*RHS)) { + return LHS; + } + if (structurallyEqual(*LHS, *RHS)) { + return LHS; + } + if (isNotOf(*LHS, *RHS) || isNotOf(*RHS, *LHS)) { + return createZeroConstant(*Inst.getType(), BB); + } + if (MInstruction *Replacement = rewriteAndWithNestedAnd(*LHS, *RHS, BB)) { + return Replacement; + } + if (MInstruction *Replacement = rewriteAndWithNestedAnd(*RHS, *LHS, BB)) { + return Replacement; + } + if (MInstruction *Replacement = rewriteAndWithNestedOr(*LHS, *RHS)) { + return Replacement; + } + if (MInstruction *Replacement = rewriteAndWithNestedOr(*RHS, *LHS)) { + return Replacement; + } + if (MInstruction *Replacement = rewriteAndWithNestedNot(*LHS, *RHS, BB)) { + return Replacement; + } + if (MInstruction *Replacement = rewriteAndWithNestedNot(*RHS, *LHS, BB)) { + return Replacement; + } + return nullptr; + } + + MInstruction *rewriteOr(BinaryInstruction &Inst, MBasicBlock &BB) { + MInstruction *LHS = Inst.getOperand<0>(); + MInstruction *RHS = Inst.getOperand<1>(); + if (isZeroConst(*LHS)) { + return RHS; + } + if (isZeroConst(*RHS)) { + return LHS; + } + if (isAllOnesConst(*LHS) || isAllOnesConst(*RHS) || isNotOf(*LHS, *RHS) || + isNotOf(*RHS, *LHS)) { + return createAllOnesConstant(*Inst.getType(), BB); + } + if (structurallyEqual(*LHS, *RHS)) { + return LHS; + } + if (MInstruction *Replacement = rewriteOrWithNestedAnd(*LHS, *RHS, BB)) { + return Replacement; + } + if (MInstruction *Replacement = rewriteOrWithNestedAnd(*RHS, *LHS, BB)) { + return Replacement; + } + if (MInstruction *Replacement = rewriteOrWithNestedOr(*LHS, *RHS)) { + return Replacement; + } + if (MInstruction *Replacement = rewriteOrWithNestedOr(*RHS, *LHS)) { + return Replacement; + } + if (MInstruction *Replacement = rewriteOrWithNestedXor(*LHS, *RHS, BB)) { + return Replacement; + } + if (MInstruction *Replacement = rewriteOrWithNestedXor(*RHS, *LHS, BB)) { + return Replacement; + } + if (MInstruction *Replacement = rewriteOrWithNestedNot(*LHS, *RHS, BB)) { + return Replacement; + } + if (MInstruction *Replacement = rewriteOrWithNestedNot(*RHS, *LHS, BB)) { + return Replacement; + } + return nullptr; + } + + MInstruction *rewriteXor(BinaryInstruction &Inst, MBasicBlock &BB) { + MInstruction *LHS = Inst.getOperand<0>(); + MInstruction *RHS = Inst.getOperand<1>(); + if (isZeroConst(*LHS)) { + return RHS; + } + if (isZeroConst(*RHS)) { + return LHS; + } + if (structurallyEqual(*LHS, *RHS)) { + return createZeroConstant(*Inst.getType(), BB); + } + if (isNotOf(*LHS, *RHS) || isNotOf(*RHS, *LHS)) { + return createAllOnesConstant(*Inst.getType(), BB); + } + if (MInstruction *Replacement = rewriteXorWithNestedXor(*LHS, *RHS)) { + return Replacement; + } + if (MInstruction *Replacement = rewriteXorWithNestedXor(*RHS, *LHS)) { + return Replacement; + } + if (MInstruction *Replacement = + rewriteXorWithNestedNotAndAllOnes(*LHS, *RHS)) { + return Replacement; + } + if (MInstruction *Replacement = + rewriteXorWithNestedNotAndAllOnes(*RHS, *LHS)) { + return Replacement; + } + if (MInstruction *Replacement = rewriteXorWithNestedNot(*LHS, *RHS, BB)) { + return Replacement; + } + if (MInstruction *Replacement = rewriteXorWithNestedNot(*RHS, *LHS, BB)) { + return Replacement; + } + if (MInstruction *Replacement = rewriteXorWithTwoNots(*LHS, *RHS, BB)) { + return Replacement; + } + if (MInstruction *Replacement = rewriteXorWithNestedAnd(*LHS, *RHS, BB)) { + return Replacement; + } + if (MInstruction *Replacement = rewriteXorWithNestedAnd(*RHS, *LHS, BB)) { + return Replacement; + } + if (MInstruction *Replacement = rewriteXorWithNestedOr(*LHS, *RHS, BB)) { + return Replacement; + } + if (MInstruction *Replacement = rewriteXorWithNestedOr(*RHS, *LHS, BB)) { + return Replacement; + } + return nullptr; + } + + MInstruction *rewriteMul(BinaryInstruction &Inst, MBasicBlock &BB) { + MInstruction *LHS = Inst.getOperand<0>(); + MInstruction *RHS = Inst.getOperand<1>(); + if (isZeroConst(*LHS) || isZeroConst(*RHS)) { + return createZeroConstant(*Inst.getType(), BB); + } + if (isOneConst(*LHS)) { + return RHS; + } + if (isOneConst(*RHS)) { + return LHS; + } + // mul(x, 2^k) -> shl(x, k) for i64 types when k >= 1 + if (Inst.getType()->isInteger() && Inst.getType()->getBitWidth() == 64 && + isIntegerConst(*RHS)) { + uint64_t C = llvm::cast( + &llvm::cast(RHS)->getConstant()) + ->getValue() + .getZExtValue(); + if (C > 1 && (C & (C - 1)) == 0) { + uint64_t K = static_cast(__builtin_ctzll(C)); + return createBinaryInstruction( + OP_shl, *Inst.getType(), LHS, + createIntegerConstant(*Inst.getType(), llvm::APInt(64, K), BB), BB); + } + } + return nullptr; + } + + /// Carry-dead analysis: returns true when the carry/borrow output of the + /// instruction that feeds this ADC/SBB is provably zero. + /// + /// Handles: + /// 1. const(0): zero constant has no carry (chain-head sentinel) + /// 2. add(x, 0) / add(0, x): adding zero never overflows, carry = 0 + /// 3. adc(x, 0, prev) / adc(0, y, prev) where isCarryDead(prev): + /// x + 0 + 0 never overflows + /// 4. sub(x, 0): subtracting zero never borrows + /// 5. sbb(x, 0, prev) where isCarryDead(prev): x - 0 - 0 never borrows + /// 6. zext(icmp_ult(x, 0)): comparison with zero always false, zext + /// produces 0 + bool isCarryDead(const MInstruction &CarryProducer, + uint32_t Depth = 0) const { + if (Depth > 8) { + return false; // Conservative: assume carry is live + } + // A const(0) carry operand means "no incoming carry" (chain head). + if (isZeroConst(CarryProducer)) { + return true; + } + // add(x, 0) or add(0, x): adding zero never produces a carry. + if (CarryProducer.getOpcode() == OP_add && + CarryProducer.getKind() == MInstruction::BINARY) { + const auto &Add = llvm::cast(CarryProducer); + if (isZeroConst(*Add.getOperand<0>()) || + isZeroConst(*Add.getOperand<1>())) { + return true; + } + } + // adc(x, 0, prev) where prev's carry is also dead: recursive chain. + if (CarryProducer.getOpcode() == OP_adc) { + const auto &Adc = llvm::cast(CarryProducer); + if ((isZeroConst(*Adc.getOperand<0>()) || + isZeroConst(*Adc.getOperand<1>())) && + isCarryDead(*Adc.getOperand<2>(), Depth + 1)) { + return true; + } + } + // sub(x, 0): subtracting zero never borrows. + if (CarryProducer.getOpcode() == OP_sub && + CarryProducer.getKind() == MInstruction::BINARY) { + const auto &Sub = llvm::cast(CarryProducer); + if (isZeroConst(*Sub.getOperand<1>())) { + return true; + } + } + // sbb(x, 0, prev) where prev's borrow is dead: recursive chain. + if (CarryProducer.getOpcode() == OP_sbb) { + const auto &Sbb = llvm::cast(CarryProducer); + if (isZeroConst(*Sbb.getOperand<1>()) && + isCarryDead(*Sbb.getOperand<2>(), Depth + 1)) { + return true; + } + } + // zext(icmp(ULT, x, 0)): no unsigned value is less than 0, always false. + if (CarryProducer.getOpcode() == OP_uext && + CarryProducer.getKind() == MInstruction::UNARY) { + const MInstruction *Inner = CarryProducer.getOperand<0>(); + if (Inner->getOpcode() == OP_cmp && + llvm::cast(Inner)->getPredicate() == + CmpInstruction::ICMP_ULT) { + if (isZeroConst(*Inner->getOperand<1>())) { + return true; + } + } + } + return false; + } + + MInstruction *rewriteAdc(AdcInstruction &Inst, MBasicBlock &BB) { + MInstruction *LHS = Inst.getOperand<0>(); + MInstruction *RHS = Inst.getOperand<1>(); + MInstruction *CarryIn = Inst.getOperand<2>(); + if (!isCarryDead(*CarryIn)) { + return nullptr; + } + // Carry is provably zero: adc(x, y, dead) → add(x, y) + if (isZeroConst(*RHS)) { + return LHS; // adc(x, 0, dead) → x + } + if (isZeroConst(*LHS)) { + return RHS; // adc(0, y, dead) → y + } + return createBinaryInstruction(OP_add, *Inst.getType(), LHS, RHS, BB); + } + + MInstruction *rewriteSbb(SbbInstruction &Inst, MBasicBlock &BB) { + MInstruction *LHS = Inst.getOperand<0>(); + MInstruction *RHS = Inst.getOperand<1>(); + MInstruction *BorrowIn = Inst.getOperand<2>(); + if (!isCarryDead(*BorrowIn)) { + return nullptr; + } + // Borrow is provably zero: sbb(x, y, dead) → sub(x, y) + if (isZeroConst(*RHS)) { + return LHS; // sbb(x, 0, dead) → x + } + if (structurallyEqual(*LHS, *RHS)) { + return createZeroConstant(*Inst.getType(), BB); // sbb(x, x, dead) → 0 + } + return createBinaryInstruction(OP_sub, *Inst.getType(), LHS, RHS, BB); + } + + MInstruction *rewriteShift(BinaryInstruction &Inst) const { + if (isZeroConst(*Inst.getOperand<1>())) { + return Inst.getOperand<0>(); + } + return nullptr; + } + + MInstruction *rewriteNot(NotInstruction &Inst) const { + MInstruction *Operand = Inst.getOperand<0>(); + if (Operand->getOpcode() == OP_not) { + return Operand->getOperand<0>(); + } + return nullptr; + } + + MInstruction *rewriteSelect(SelectInstruction &Inst) const { + MInstruction *Cond = Inst.getOperand<0>(); + MInstruction *TrueValue = Inst.getOperand<1>(); + MInstruction *FalseValue = Inst.getOperand<2>(); + // select(0, t, f) -> f: condition is always false + if (isZeroConst(*Cond)) { + return FalseValue; + } + // select(nonzero, t, f) -> t: condition is always true + if (isNonZeroIntConst(*Cond)) { + return TrueValue; + } + if (structurallyEqual(*TrueValue, *FalseValue)) { + return TrueValue; + } + return nullptr; + } + + MInstruction *rewriteAndWithNestedAnd(MInstruction &NestedCandidate, + MInstruction &Other, MBasicBlock &BB) { + const BinaryInstruction *NestedAnd = + getBinaryWithOpcode(NestedCandidate, OP_and); + if (NestedAnd == nullptr) { + return nullptr; + } + + if (structurallyContains(*NestedAnd, Other)) { + return const_cast(NestedAnd); + } + if (isNotOf(Other, *NestedAnd->getOperand<0>()) || + isNotOf(Other, *NestedAnd->getOperand<1>())) { + return createZeroConstant(*NestedAnd->getType(), BB); + } + + const BinaryInstruction *OtherOr = getBinaryWithOpcode(Other, OP_or); + if (OtherOr != nullptr && hasSameUnorderedOperands(*NestedAnd, *OtherOr)) { + return const_cast(NestedAnd); + } + + const BinaryInstruction *OtherXor = getBinaryWithOpcode(Other, OP_xor); + if (OtherXor != nullptr && + hasSameUnorderedOperands(*NestedAnd, *OtherXor)) { + return createZeroConstant(*NestedAnd->getType(), BB); + } + + return nullptr; + } + + MInstruction *rewriteAndWithNestedOr(MInstruction &NestedCandidate, + MInstruction &Other) const { + const BinaryInstruction *NestedOr = + getBinaryWithOpcode(NestedCandidate, OP_or); + if (NestedOr == nullptr) { + return nullptr; + } + + if (structurallyContains(*NestedOr, Other)) { + return &Other; + } + + const BinaryInstruction *OtherXor = getBinaryWithOpcode(Other, OP_xor); + if (OtherXor != nullptr && hasSameUnorderedOperands(*NestedOr, *OtherXor)) { + return const_cast(OtherXor); + } + + return nullptr; + } + + MInstruction *rewriteAndWithNestedNot(MInstruction &NestedCandidate, + MInstruction &Other, MBasicBlock &BB) { + if (NestedCandidate.getOpcode() != OP_not) { + return nullptr; + } + + const BinaryInstruction *OtherOr = getBinaryWithOpcode(Other, OP_or); + if (OtherOr != nullptr) { + if (MInstruction *OtherValue = getOtherBinaryOperand( + *OtherOr, *NestedCandidate.getOperand<0>())) { + return createBinaryInstruction(OP_and, *OtherOr->getType(), + &NestedCandidate, OtherValue, BB); + } + } + + const BinaryInstruction *OtherXor = getBinaryWithOpcode(Other, OP_xor); + if (OtherXor != nullptr) { + if (MInstruction *OtherValue = getOtherBinaryOperand( + *OtherXor, *NestedCandidate.getOperand<0>())) { + return createBinaryInstruction(OP_and, *OtherXor->getType(), + &NestedCandidate, OtherValue, BB); + } + } + + return nullptr; + } + + MInstruction *rewriteOrWithNestedAnd(MInstruction &NestedCandidate, + MInstruction &Other, MBasicBlock &BB) { + const BinaryInstruction *NestedAnd = + getBinaryWithOpcode(NestedCandidate, OP_and); + if (NestedAnd == nullptr) { + return nullptr; + } + + if (structurallyContains(*NestedAnd, Other)) { + return &Other; + } + + const BinaryInstruction *OtherOr = getBinaryWithOpcode(Other, OP_or); + if (OtherOr != nullptr && hasSameUnorderedOperands(*NestedAnd, *OtherOr)) { + return const_cast(OtherOr); + } + + const BinaryInstruction *OtherXor = getBinaryWithOpcode(Other, OP_xor); + if (OtherXor != nullptr && + hasSameUnorderedOperands(*NestedAnd, *OtherXor)) { + return createBinaryInstruction(OP_or, *NestedAnd->getType(), + NestedAnd->getOperand<0>(), + NestedAnd->getOperand<1>(), BB); + } + + if (Other.getOpcode() == OP_not) { + if (MInstruction *OtherValue = + getOtherBinaryOperand(*NestedAnd, *Other.getOperand<0>())) { + return createBinaryInstruction(OP_or, *NestedAnd->getType(), &Other, + OtherValue, BB); + } + } + + return nullptr; + } + + MInstruction *rewriteOrWithNestedOr(MInstruction &NestedCandidate, + MInstruction &Other) const { + const BinaryInstruction *NestedOr = + getBinaryWithOpcode(NestedCandidate, OP_or); + if (NestedOr == nullptr) { + return nullptr; + } + + if (structurallyContains(*NestedOr, Other)) { + return const_cast(NestedOr); + } + + const BinaryInstruction *OtherXor = getBinaryWithOpcode(Other, OP_xor); + if (OtherXor != nullptr && hasSameUnorderedOperands(*NestedOr, *OtherXor)) { + return const_cast(NestedOr); + } + + return nullptr; + } + + MInstruction *rewriteOrWithNestedXor(MInstruction &NestedCandidate, + MInstruction &Other, MBasicBlock &BB) { + const BinaryInstruction *NestedXor = + getBinaryWithOpcode(NestedCandidate, OP_xor); + if (NestedXor == nullptr) { + return nullptr; + } + + if (structurallyContains(*NestedXor, Other)) { + return createBinaryInstruction(OP_or, *NestedXor->getType(), + NestedXor->getOperand<0>(), + NestedXor->getOperand<1>(), BB); + } + + return nullptr; + } + + MInstruction *rewriteOrWithNestedNot(MInstruction &NestedCandidate, + MInstruction &Other, MBasicBlock &BB) { + if (NestedCandidate.getOpcode() != OP_not) { + return nullptr; + } + + const BinaryInstruction *OtherOr = getBinaryWithOpcode(Other, OP_or); + if (OtherOr == nullptr) { + return nullptr; + } + + if (structurallyContains(*OtherOr, *NestedCandidate.getOperand<0>())) { + return createAllOnesConstant(*OtherOr->getType(), BB); + } + return nullptr; + } + + MInstruction *rewriteXorWithNestedXor(MInstruction &NestedCandidate, + MInstruction &Other) const { + const BinaryInstruction *NestedXor = + getBinaryWithOpcode(NestedCandidate, OP_xor); + if (NestedXor == nullptr) { + return nullptr; + } + + if (structurallyEqual(*NestedXor->getOperand<0>(), Other)) { + return const_cast(NestedXor->getOperand<1>()); + } + if (structurallyEqual(*NestedXor->getOperand<1>(), Other)) { + return const_cast(NestedXor->getOperand<0>()); + } + return nullptr; + } + + MInstruction *rewriteXorWithNestedNotAndAllOnes(MInstruction &NestedCandidate, + MInstruction &Other) const { + if (!isAllOnesConst(Other) || NestedCandidate.getOpcode() != OP_not) { + return nullptr; + } + return NestedCandidate.getOperand<0>(); + } + + MInstruction *rewriteXorWithNestedNot(MInstruction &NestedCandidate, + MInstruction &Other, MBasicBlock &BB) { + if (NestedCandidate.getOpcode() != OP_not) { + return nullptr; + } + + const BinaryInstruction *OtherXor = getBinaryWithOpcode(Other, OP_xor); + if (OtherXor != nullptr) { + if (MInstruction *OtherValue = getOtherBinaryOperand( + *OtherXor, *NestedCandidate.getOperand<0>())) { + return createNotInstruction(*OtherXor->getType(), OtherValue, BB); + } + } + + const BinaryInstruction *OtherOr = getBinaryWithOpcode(Other, OP_or); + if (OtherOr != nullptr) { + if (MInstruction *OtherValue = getOtherBinaryOperand( + *OtherOr, *NestedCandidate.getOperand<0>())) { + return createBinaryInstruction( + OP_or, *OtherOr->getType(), + createNotInstruction(*OtherOr->getType(), OtherValue, BB), + OtherOr->getOperand(0) == OtherValue ? OtherOr->getOperand(1) + : OtherOr->getOperand(0), + BB); + } + } + + return nullptr; + } + + MInstruction *rewriteXorWithTwoNots(MInstruction &LHS, MInstruction &RHS, + MBasicBlock &BB) { + if (LHS.getOpcode() != OP_not || RHS.getOpcode() != OP_not) { + return nullptr; + } + return createBinaryInstruction(OP_xor, *LHS.getType(), LHS.getOperand<0>(), + RHS.getOperand<0>(), BB); + } + + MInstruction *rewriteXorWithNestedAnd(MInstruction &NestedCandidate, + MInstruction &Other, MBasicBlock &BB) { + const BinaryInstruction *NestedAnd = + getBinaryWithOpcode(NestedCandidate, OP_and); + if (NestedAnd == nullptr) { + return nullptr; + } + + const BinaryInstruction *OtherOr = getBinaryWithOpcode(Other, OP_or); + if (OtherOr != nullptr && hasSameUnorderedOperands(*NestedAnd, *OtherOr)) { + return createBinaryInstruction(OP_xor, *NestedAnd->getType(), + NestedAnd->getOperand<0>(), + NestedAnd->getOperand<1>(), BB); + } + + const BinaryInstruction *OtherXor = getBinaryWithOpcode(Other, OP_xor); + if (OtherXor != nullptr && + hasSameUnorderedOperands(*NestedAnd, *OtherXor)) { + return createBinaryInstruction(OP_or, *NestedAnd->getType(), + NestedAnd->getOperand<0>(), + NestedAnd->getOperand<1>(), BB); + } + + if (Other.getOpcode() == OP_not) { + if (MInstruction *OtherValue = + getOtherBinaryOperand(*NestedAnd, *Other.getOperand<0>())) { + return createBinaryInstruction(OP_or, *NestedAnd->getType(), &Other, + OtherValue, BB); + } + } + + return nullptr; + } + + MInstruction *rewriteXorWithNestedOr(MInstruction &NestedCandidate, + MInstruction &Other, MBasicBlock &BB) { + const BinaryInstruction *NestedOr = + getBinaryWithOpcode(NestedCandidate, OP_or); + if (NestedOr == nullptr) { + return nullptr; + } + + const BinaryInstruction *OtherXor = getBinaryWithOpcode(Other, OP_xor); + if (OtherXor != nullptr && hasSameUnorderedOperands(*NestedOr, *OtherXor)) { + return createBinaryInstruction(OP_and, *NestedOr->getType(), + NestedOr->getOperand<0>(), + NestedOr->getOperand<1>(), BB); + } + + return nullptr; + } + + bool structurallyEqual(const MInstruction &LHS, + const MInstruction &RHS) const { + if (&LHS == &RHS) { + return true; + } + if (LHS.getOpcode() != RHS.getOpcode() || LHS.getKind() != RHS.getKind() || + LHS.getType() != RHS.getType() || + LHS.getNumOperands() != RHS.getNumOperands()) { + return false; + } + + switch (LHS.getOpcode()) { + case OP_const: { + const auto &LHSConst = llvm::cast(LHS).getConstant(); + const auto &RHSConst = llvm::cast(RHS).getConstant(); + if (!LHSConst.getType().isInteger() || !RHSConst.getType().isInteger()) { + return false; + } + return llvm::cast(&LHSConst)->getValue() == + llvm::cast(&RHSConst)->getValue(); + } + case OP_dread: + return llvm::cast(LHS).getVarIdx() == + llvm::cast(RHS).getVarIdx(); + case OP_cmp: + if (llvm::cast(LHS).getPredicate() != + llvm::cast(RHS).getPredicate()) { + return false; + } + break; + case OP_load: { + // NOTE: Load instructions are compared structurally (by address + // computation parameters). This assumes no intervening stores between the + // two loads. In the current EVM frontend, each load comes from + // extractU256Operand and produces a unique instruction, so pointer + // equality catches all real cases. If the frontend evolves to produce + // aliased loads, this must be revisited. + const auto &LHSLoad = llvm::cast(LHS); + const auto &RHSLoad = llvm::cast(RHS); + if (LHSLoad.getScale() != RHSLoad.getScale() || + LHSLoad.getOffset() != RHSLoad.getOffset() || + LHSLoad.getSrcType() != RHSLoad.getSrcType() || + LHSLoad.getDestType() != RHSLoad.getDestType() || + LHSLoad.getSext() != RHSLoad.getSext()) { + return false; + } + const MInstruction *LHSIndex = LHSLoad.getIndex(); + const MInstruction *RHSIndex = RHSLoad.getIndex(); + if (LHSIndex == nullptr || RHSIndex == nullptr) { + if (LHSIndex != RHSIndex) { + return false; + } + break; + } + if (!structurallyEqual(*LHSIndex, *RHSIndex)) { + return false; + } + break; + } + case OP_evm_u256_add_result: { + const auto &LHSRes = llvm::cast(LHS); + const auto &RHSRes = llvm::cast(RHS); + if (LHSRes.getResultIdx() != RHSRes.getResultIdx()) { + return false; + } + break; + } + case OP_evm_u256_sub_result: { + const auto &LHSRes = llvm::cast(LHS); + const auto &RHSRes = llvm::cast(RHS); + if (LHSRes.getResultIdx() != RHSRes.getResultIdx()) { + return false; + } + break; + } + default: + break; + } + + for (uint32_t OperandIdx = 0; OperandIdx < LHS.getNumOperands(); + ++OperandIdx) { + if (!structurallyEqual(*LHS.getOperand(OperandIdx), + *RHS.getOperand(OperandIdx))) { + return false; + } + } + return true; + } + + bool isNotOf(const MInstruction &MaybeNot, const MInstruction &Value) const { + return MaybeNot.getOpcode() == OP_not && + structurallyEqual(*MaybeNot.getOperand<0>(), Value); + } + + const BinaryInstruction *getBinaryWithOpcode(const MInstruction &Inst, + Opcode Opc) const { + if (Inst.getKind() != MInstruction::BINARY || Inst.getOpcode() != Opc) { + return nullptr; + } + return static_cast(&Inst); + } + + // Match a pair of binary operands where one has opcode OpcA and the other + // has opcode OpcB, and both share the same unordered operand set. + // Returns the OpcA instruction on success, nullptr otherwise. + const BinaryInstruction *matchBinaryOperandPair(const MInstruction &LHS, + const MInstruction &RHS, + Opcode OpcA, + Opcode OpcB) const { + if (const auto *A = getBinaryWithOpcode(LHS, OpcA)) + if (const auto *B = getBinaryWithOpcode(RHS, OpcB)) + if (hasSameUnorderedOperands(*A, *B)) + return A; + if (const auto *A = getBinaryWithOpcode(RHS, OpcA)) + if (const auto *B = getBinaryWithOpcode(LHS, OpcB)) + if (hasSameUnorderedOperands(*A, *B)) + return A; + return nullptr; + } + + bool structurallyContains(const BinaryInstruction &Inst, + const MInstruction &Value) const { + return structurallyEqual(*Inst.getOperand<0>(), Value) || + structurallyEqual(*Inst.getOperand<1>(), Value); + } + + MInstruction *getOtherBinaryOperand(const BinaryInstruction &Inst, + const MInstruction &Value) const { + if (structurallyEqual(*Inst.getOperand<0>(), Value)) { + return const_cast(Inst.getOperand<1>()); + } + if (structurallyEqual(*Inst.getOperand<1>(), Value)) { + return const_cast(Inst.getOperand<0>()); + } + return nullptr; + } + + bool hasSameUnorderedOperands(const BinaryInstruction &LHS, + const BinaryInstruction &RHS) const { + return (structurallyEqual(*LHS.getOperand<0>(), *RHS.getOperand<0>()) && + structurallyEqual(*LHS.getOperand<1>(), *RHS.getOperand<1>())) || + (structurallyEqual(*LHS.getOperand<0>(), *RHS.getOperand<1>()) && + structurallyEqual(*LHS.getOperand<1>(), *RHS.getOperand<0>())); + } + + static bool isIntegerConst(const MInstruction &Inst) { + return Inst.getOpcode() == OP_const && Inst.getType()->isInteger(); + } + + static bool isZeroConst(const MInstruction &Inst) { + if (!isIntegerConst(Inst)) { + return false; + } + return llvm::cast( + &llvm::cast(Inst).getConstant()) + ->getValue() + .isZero(); + } + + static bool isNonZeroIntConst(const MInstruction &Inst) { + if (!isIntegerConst(Inst)) { + return false; + } + return !llvm::cast( + &llvm::cast(Inst).getConstant()) + ->getValue() + .isZero(); + } + + static bool isOneConst(const MInstruction &Inst) { + if (!isIntegerConst(Inst)) { + return false; + } + return llvm::cast( + &llvm::cast(Inst).getConstant()) + ->getValue() + .isOne(); + } + + static bool isAllOnesConst(const MInstruction &Inst) { + if (!isIntegerConst(Inst)) { + return false; + } + return llvm::cast( + &llvm::cast(Inst).getConstant()) + ->getValue() + .isAllOnes(); + } + + MInstruction *createZeroConstant(MType &Type, MBasicBlock &BB) { + return createIntegerConstant(Type, llvm::APInt(Type.getBitWidth(), 0), BB); + } + + MInstruction *createOneConstant(MType &Type, MBasicBlock &BB) { + return createIntegerConstant(Type, llvm::APInt(Type.getBitWidth(), 1), BB); + } + + // Returns true if Inst is (sub 0 x), i.e. a negation of x. + static bool isNeg(const MInstruction &Inst) { + if (Inst.getOpcode() != OP_sub) { + return false; + } + return isZeroConst(*Inst.getOperand<0>()); + } + + // Returns the negated operand x from (sub 0 x). Caller must check isNeg. + static MInstruction *getNegOperand(MInstruction &Inst) { + return Inst.getOperand<1>(); + } + + MInstruction *createAllOnesConstant(MType &Type, MBasicBlock &BB) { + return createIntegerConstant( + Type, llvm::APInt::getAllOnes(Type.getBitWidth()), BB); + } + + MInstruction *createIntegerConstant(MType &Type, llvm::APInt Value, + MBasicBlock &BB) { + return Func->createInstruction( + false, BB, &Type, *MConstantInt::get(Func->getContext(), Type, Value)); + } + + MInstruction *createNotInstruction(MType &Type, const MInstruction *Operand, + MBasicBlock &BB) { + return Func->createInstruction( + false, BB, &Type, const_cast(Operand)); + } + + MInstruction *createBinaryInstruction(Opcode Opc, MType &Type, + const MInstruction *LHS, + const MInstruction *RHS, + MBasicBlock &BB) { + return Func->createInstruction( + false, BB, Opc, &Type, const_cast(LHS), + const_cast(RHS)); + } + + MFunction *Func = nullptr; + bool Changed = false; + llvm::DenseMap RewriteCache; +}; + +} // namespace COMPILER diff --git a/src/compiler/mir/pass/verifier.h b/src/compiler/mir/pass/verifier.h index 21358a25d..9a63a49e2 100644 --- a/src/compiler/mir/pass/verifier.h +++ b/src/compiler/mir/pass/verifier.h @@ -3,6 +3,7 @@ #pragma once #include "compiler/mir/pass/visitor.h" +#include "llvm/ADT/SmallPtrSet.h" namespace COMPILER { @@ -25,6 +26,7 @@ class MVerifier final : public MVisitor { } void visitBasicBlock(MBasicBlock &BB) override { + Visited.clear(); if (BB.empty()) { return; } @@ -51,6 +53,13 @@ class MVerifier final : public MVisitor { MVisitor::visitBasicBlock(BB); } + void visitInstruction(MInstruction &I) override { + if (!Visited.insert(&I).second) { + return; + } + MVisitor::visitInstruction(I); + } + void visitUnaryInstruction(UnaryInstruction &I) override; void visitBinaryInstruction(BinaryInstruction &I) override; void visitAdcInstruction(AdcInstruction &I) override; @@ -90,6 +99,7 @@ class MVerifier final : public MVisitor { bool Broken = false; llvm::raw_ostream &OS; uint32_t FailedCount = 0; + llvm::SmallPtrSet Visited; }; } // namespace COMPILER diff --git a/src/compiler/mir/pass/visitor.h b/src/compiler/mir/pass/visitor.h index ff1794d5e..bc97bbe16 100644 --- a/src/compiler/mir/pass/visitor.h +++ b/src/compiler/mir/pass/visitor.h @@ -61,6 +61,20 @@ class MVisitor { visitEvmU256MulResultInstruction( static_cast(I)); break; + case MInstruction::EVM_U256_ADD: + visitEvmU256AddInstruction(static_cast(I)); + break; + case MInstruction::EVM_U256_ADD_RESULT: + visitEvmU256AddResultInstruction( + static_cast(I)); + break; + case MInstruction::EVM_U256_SUB: + visitEvmU256SubInstruction(static_cast(I)); + break; + case MInstruction::EVM_U256_SUB_RESULT: + visitEvmU256SubResultInstruction( + static_cast(I)); + break; case MInstruction::EVM_UDIV128_BY64: visitEvmUdiv128By64Instruction( static_cast(I)); @@ -212,6 +226,20 @@ class MVisitor { visitEvmU256MulResultInstruction(EvmU256MulResultInstruction &I) { VISIT_OPERAND_1 } + virtual void visitEvmU256AddInstruction(EvmU256AddInstruction &I) { + VISIT_OPERANDS + } + virtual void + visitEvmU256AddResultInstruction(EvmU256AddResultInstruction &I) { + VISIT_OPERAND_1 + } + virtual void visitEvmU256SubInstruction(EvmU256SubInstruction &I) { + VISIT_OPERANDS + } + virtual void + visitEvmU256SubResultInstruction(EvmU256SubResultInstruction &I) { + VISIT_OPERAND_1 + } virtual void visitEvmUdiv128By64Instruction(EvmUdiv128By64Instruction &I) { VISIT_OPERAND_3 } diff --git a/src/compiler/target/x86/x86_cg_peephole.cpp b/src/compiler/target/x86/x86_cg_peephole.cpp index bf7cb500c..91b44a70f 100644 --- a/src/compiler/target/x86/x86_cg_peephole.cpp +++ b/src/compiler/target/x86/x86_cg_peephole.cpp @@ -2,118 +2,74 @@ // SPDX-License-Identifier: Apache-2.0 #include "compiler/target/x86/x86_cg_peephole.h" -#include "compiler/cgir/pass/cg_register_info.h" #include "compiler/llvm-prebuild/Target/X86/X86Subtarget.h" -#include "compiler/target/x86/x86_constants.h" using namespace llvm; namespace COMPILER { + +#include "target/x86/x86_cg_peephole_generated.inc" + +// Fold MOVZX32rr8 + SUBREG_TO_REG(0, GR32, sub_32bit) -> MOVZX64rr8. +// On x86-64, writing a 32-bit register implicitly zeroes the upper 32 bits, +// so SUBREG_TO_REG is a pure register-class annotation and can be eliminated. +static bool tryFoldMovzxSubregToReg(CgBasicBlock &MBB, + CgBasicBlock::iterator &MII) { + CgInstruction &Movzx = *MII; + if (Movzx.getOpcode() != X86::MOVZX32rr8) + return false; + + auto NextMII = MII; + ++NextMII; + if (NextMII == MBB.end()) + return false; + + CgInstruction &Subreg = *NextMII; + if (!Subreg.isSubregToReg()) + return false; + + // SUBREG_TO_REG layout: op0=def(GR64), op1=imm(0), op2=src(GR32), op3=idx + if (Subreg.getNumOperands() < 4) + return false; + if (!Subreg.getOperand(1).isImm() || Subreg.getOperand(1).getImm() != 0) + return false; + if (!Subreg.getOperand(2).isReg()) + return false; + if (!Subreg.getOperand(3).isImm() || + Subreg.getOperand(3).getImm() != X86::sub_32bit) + return false; + + // The src of SUBREG_TO_REG must be the def of MOVZX32rr8. + CgRegister Movzx32Def = Movzx.getOperand(0).getReg(); + if (Subreg.getOperand(2).getReg() != Movzx32Def) + return false; + + auto AfterSubreg = NextMII; + ++AfterSubreg; + + // Rewrite: change MOVZX32rr8's def to the GR64 def from SUBREG_TO_REG and + // change the opcode to MOVZX64rr8, then erase SUBREG_TO_REG. + CgRegister SubregDef = Subreg.getOperand(0).getReg(); + Movzx.getOperand(0).setReg(SubregDef); + + const auto &TII = MBB.getParent()->getTargetInstrInfo(); + Movzx.setDesc(TII.get(X86::MOVZX64rr8)); + + Subreg.eraseFromParent(); + MII = AfterSubreg; + return true; +} + void X86CgPeephole::peepholeOptimizeBB(CgBasicBlock &MBB) { - if (MBB.empty()) { - return; - } - - CgInstruction &LastMI = MBB.back(); - if (LastMI.isUnconditionalBranch()) { - optimizeBranchInBlockEnd(MBB, LastMI); - } + (void)tryGeneratedBlockEndRules(MBB); } -void X86CgPeephole::peepholeOptimize(CgBasicBlock &MBB, +bool X86CgPeephole::peepholeOptimize(CgBasicBlock &MBB, CgBasicBlock::iterator &MII) { - auto &Inst = *MII; - if (Inst.isCompare()) { - optimizeCmp(MBB, MII); - } + if (tryFoldMovzxSubregToReg(MBB, MII)) + return true; + return tryGeneratedInstructionRules(MBB, MII) == + GeneratedInstructionRuleResult::Advanced; } -void X86CgPeephole::optimizeCmp(CgBasicBlock &MBB, - CgBasicBlock::iterator &MII) { - auto MIE = MBB.end(); - // cmp/test -> setcc cond -> [movzx] -> test -> jne - // optimized to: cmp/test -> jcc cond - auto LocalMII = MII; - LocalMII++; - if (LocalMII == MIE) - return; - auto &Inst1 = *LocalMII; - if (Inst1.getOpcode() != X86::SETCCr) - return; - const auto &Op1 = Inst1.getOperand(0); - if (!Op1.isReg()) - return; - auto CC = Inst1.getOperand(1).getImm(); - unsigned TestReg = Op1.getReg(); - CgInstruction *MovzxInst = nullptr; - - LocalMII++; - if (LocalMII == MIE) - return; - auto &Inst2 = *LocalMII; - if (Inst2.getOpcode() == X86::MOVZX32rr8) { - const auto &MovzxDst = Inst2.getOperand(0); - const auto &MovzxSrc = Inst2.getOperand(1); - if (!MovzxDst.isReg() || !MovzxSrc.isReg() || - MovzxSrc.getReg() != Op1.getReg()) - return; - TestReg = MovzxDst.getReg(); - MovzxInst = &Inst2; - LocalMII++; - if (LocalMII == MIE) - return; - } - - auto &TestInst = *LocalMII; - switch (TestInst.getOpcode()) { - case X86::TEST8rr: - case X86::TEST16rr: - case X86::TEST32rr: - case X86::TEST64rr: - break; - default: - return; - } - const auto &TestOp0 = TestInst.getOperand(0); - const auto &TestOp1 = TestInst.getOperand(1); - if (!TestOp0.isReg() || !TestOp1.isReg() || TestOp0.getReg() != TestReg || - TestOp1.getReg() != TestReg) - return; - - LocalMII++; - if (LocalMII == MIE) - return; - auto &Inst3 = *LocalMII; - if (Inst3.getOpcode() != X86::JCC_1) - return; - if (Inst3.getOperand(1).getImm() != X86::CondCode::COND_NE) - return; // TODO, other optimization, use opposite condition code - - // Ensure the SETCC/MOVZX registers have no uses beyond this chain. - // The lowering cache (_expr_reg_map) may share these virtual registers - // with other consumers; erasing them would leave dangling references. - const auto &RegInfo = MBB.getParent()->getRegInfo(); - if (!RegInfo.hasOneNonDBGUse(Op1.getReg())) - return; - if (MovzxInst != nullptr && !RegInfo.hasOneNonDBGUse(TestReg)) - return; - - Inst1.eraseFromParent(); - if (MovzxInst != nullptr) { - MovzxInst->eraseFromParent(); - } - TestInst.eraseFromParent(); - Inst3.getOperand(1).setImm(CC); -} -} // namespace COMPILER -void X86CgPeephole::optimizeBranchInBlockEnd(CgBasicBlock &MBB, - CgInstruction &MI) { - ZEN_ASSERT(MI.getNumOperands() > 0); - CgOperand &MO = MI.getOperand(0); - ZEN_ASSERT(MO.isMBB()); - CgBasicBlock *TargetMBB = MO.getMBB(); - if (TargetMBB->getNumber() == MBB.getNumber() + 1) { - // remove the unconditional branch - MI.eraseFromParent(); - } -} +} // namespace COMPILER diff --git a/src/compiler/target/x86/x86_cg_peephole.h b/src/compiler/target/x86/x86_cg_peephole.h index 631b31184..663f05693 100644 --- a/src/compiler/target/x86/x86_cg_peephole.h +++ b/src/compiler/target/x86/x86_cg_peephole.h @@ -10,12 +10,8 @@ class X86CgPeephole : public CgPeephole { public: using CgPeephole::CgPeephole; void peepholeOptimizeBB(CgBasicBlock &MBB); - // after this function, MII should be the processed instruction - void peepholeOptimize(CgBasicBlock &MBB, CgBasicBlock::iterator &MII); - -private: - void optimizeCmp(CgBasicBlock &MBB, CgBasicBlock::iterator &MII); - void optimizeBranchInBlockEnd(CgBasicBlock &MBB, CgInstruction &MI); + // Returns true when the matcher has already advanced MII. + bool peepholeOptimize(CgBasicBlock &MBB, CgBasicBlock::iterator &MII); }; } // namespace COMPILER diff --git a/src/compiler/target/x86/x86_cg_peephole_rules.SCHEMA.md b/src/compiler/target/x86/x86_cg_peephole_rules.SCHEMA.md new file mode 100644 index 000000000..23078d0d4 --- /dev/null +++ b/src/compiler/target/x86/x86_cg_peephole_rules.SCHEMA.md @@ -0,0 +1,401 @@ +# x86 CgIR Peephole DSL Schema + +This document describes every field accepted by +`x86_cg_peephole_rules.json` and the constraints that must hold for the +generator (`tools/generate_x86_cg_peephole.py`) to accept the file and +produce valid C++ code. + +--- + +## 1. Top-level structure + +```json +{ + "version": 1, + "rules": [ /* array of rule objects */ ] +} +``` + +| Field | Type | Required | Notes | +|-----------|---------|----------|------------------------------------| +| `version` | integer | yes | Must be `1`. | +| `rules` | array | yes | Ordered list of rule objects. | + +--- + +## 2. Rule object + +```json +{ + "name": "my-rule", + "stage": "instruction", + "priority": 100, + "pattern": [ /* pattern entries */ ], + "when": [ /* optional conditions */ ], + "action": { /* action object */ }, + "validation": { /* validation object */ } +} +``` + +| Field | Type | Required | Notes | +|--------------|---------|----------|--------------------------------------------------------------| +| `name` | string | yes | Must be unique across all rules in the file. | +| `stage` | string | yes | `"instruction"` or `"block_end"`. | +| `priority` | integer | yes | Higher value fires first within the same stage. Two rules with the same priority and identical normalised pattern are a generator error. | +| `pattern` | array | yes | Sequence of pattern entries. See section 3. | +| `when` | array | no | Optional extra conditions. See section 6. Only used with `block_end` stage currently. | +| `action` | object | yes | Describes what to do when the pattern matches. See section 7. | +| `validation` | object | yes | Describes how the rule is validated. See section 8. | + +--- + +## 3. Pattern entry + +Each element of `pattern` describes one CgIR instruction that must match +in program order. + +```json +{ + "bind": "inst_name", + "opcode": "CMP64rr", + "capture": [ /* capture entries */ ], + "require": [ /* require entries */ ] +} +``` + +| Field | Type | Required | Notes | +|-------------|--------|----------|-------------------------------------------------------------| +| `bind` | string | yes | Local variable name for this instruction in the generated code. Used in `action` to refer to the instruction. | +| `opcode` | string | no* | Exact x86 opcode name (without the `X86::` prefix). Exactly one of `opcode`, `opcode_any`, or `predicate` must be present. | +| `opcode_any`| array | no* | List of opcode strings. The instruction matches if its opcode equals any element. | +| `predicate` | string | no* | A predicate method name called on the instruction object (e.g., `isCompare`, `isConditionalBranch`, `isUnconditionalBranch`). | +| `capture` | array | no | List of capture entries. See section 4. | +| `require` | array | no | List of requirement entries. See section 5. | + +\* Exactly one of `opcode`, `opcode_any`, or `predicate` must be present in each pattern entry. + +--- + +## 4. Capture entry + +A capture reads an operand field from the bound instruction into a named +variable that can be referenced in later `require` entries. + +```json +{ + "name": "dst_reg", + "operand": 0, + "field": "reg" +} +``` + +| Field | Type | Required | Notes | +|-----------|---------|----------|--------------------------------------------------------------| +| `name` | string | yes | Identifier used in `equals_capture` requirements and in `set_imm` actions. | +| `operand` | integer | yes | Zero-based operand index. A negative value `-N` selects the Nth-from-last explicit operand (counting from 1). | +| `field` | string | yes | `"reg"` to capture a register, `"imm"` to capture an immediate. | + +--- + +## 5. Require entry variants + +A require entry constrains an operand of the bound instruction. All +require entries for a given pattern element must hold for the match to +succeed. + +### 5.1 `equals_capture` + +The operand's register value must equal a previously captured register. + +```json +{ + "operand": 1, + "field": "reg", + "equals_capture": "dst_reg" +} +``` + +| Field | Type | Notes | +|------------------|---------|---------------------------------------------------------| +| `operand` | integer | Operand index (same semantics as in capture). | +| `field` | string | Must be `"reg"`. | +| `equals_capture` | string | Name of a previously declared capture. | + +### 5.2 `equals_int` + +The operand's immediate value must equal the given integer constant. + +```json +{ + "operand": -1, + "field": "imm", + "equals_int": 0 +} +``` + +| Field | Type | Notes | +|--------------|---------|---------------------------------------------------------| +| `operand` | integer | Operand index. | +| `field` | string | Must be `"imm"`. | +| `equals_int` | integer | The exact immediate value to match. | + +### 5.3 `equals_enum` + +The operand's immediate value must equal a named x86 `CondCode` constant. + +```json +{ + "operand": 1, + "field": "imm", + "equals_enum": "COND_NE" +} +``` + +| Field | Type | Notes | +|---------------|--------|-----------------------------------------------------------| +| `operand` | integer| Operand index. | +| `field` | string | Must be `"imm"`. | +| `equals_enum` | string | A constant name from `X86::CondCode` (without the `X86::CondCode::` prefix). | + +### 5.4 `equals_bool` + +The operand's `isMBB()` predicate must equal the given boolean. + +```json +{ + "operand": 0, + "field": "is_mbb", + "equals_bool": true +} +``` + +| Field | Type | Notes | +|---------------|---------|--------------------------------------------------------| +| `operand` | integer | Operand index. | +| `field` | string | Must be `"is_mbb"`. | +| `equals_bool` | boolean | `true` requires `isMBB()` to hold; `false` requires it not to hold. | + +--- + +## 6. When conditions + +The `when` array holds conditions checked after the pattern has matched. +Currently only `block_end` rules use `when`. + +### 6.1 `target_is_next_block` + +The MBB operand at `operand` must point to the basic block that +immediately follows the current block in the function's block list. + +```json +{ + "kind": "target_is_next_block", + "inst": "jmp", + "operand": 0 +} +``` + +| Field | Type | Notes | +|-----------|---------|-----------------------------------------------------------| +| `kind` | string | Must be `"target_is_next_block"`. | +| `inst` | string | The `bind` name of the instruction whose operand to test. | +| `operand` | integer | Zero-based operand index holding the target MBB. | + +--- + +## 7. Action object + +The action object specifies what transformations to apply when all +pattern entries and when conditions have matched. Multiple primitives may +appear in the same action. + +```json +{ + "erase": [ "setcc", "test" ], + "set_imm": [ + { "inst": "jcc", "operand": 1, "from_capture": "setcc_cc" } + ] +} +``` + +### 7.1 `erase` + +A list of `bind` names. Each named instruction is erased from the basic +block. If the first instruction in the pattern (`pattern[0]`) is in the +erase list, the generator advances `MII` to the next instruction before +erasing so that the caller's iterator remains valid. + +### 7.2 `set_imm` + +A list of immediate-mutation entries. Each entry overwrites one immediate +operand of a bound instruction with the value stored in a named capture. + +| Field | Type | Notes | +|----------------|---------|-------------------------------------------------------| +| `inst` | string | `bind` name of the instruction to mutate. | +| `operand` | integer | Zero-based operand index of the immediate to overwrite. | +| `from_capture` | string | Name of a previously declared `"imm"` capture. | + +### 7.3 `custom` (not yet implemented) + +Reserved for future use. When present, the action requires a hand-written +C++ helper function in `x86_cg_peephole.cpp`. The current generator does +not emit calls to custom handlers; rules that require transformations +beyond `erase` and `set_imm` (e.g., inverting a condition code) must wait +until generator support is added. + +--- + +## 8. Validation object + +Every rule must carry a `validation` block. + +```json +{ + "modes": [ "structural", "execution" ], + "coverage": [ "X86CgPeephole.MyTestName" ] +} +``` + +| Field | Type | Required | Notes | +|------------|--------|----------|-----------------------------------------------------------------| +| `modes` | array | yes | Non-empty list of mode strings. See 8.1. | +| `coverage` | array | yes | Non-empty list of gtest name strings. See 8.2. | + +### 8.1 Validation modes + +| Mode | Meaning | +|-------------------|-------------------------------------------------------------------------| +| `structural` | Pattern is verified to match or not match a manually-constructed CgIR fixture. | +| `execution` | Before/after instruction sequences are executed with hardware and compared. | +| `semantics_model` | A software model (e.g., flag evaluator) verifies semantic equivalence. | + +Rules with `stage: "instruction"` must include at least one of +`"execution"` or `"semantics_model"`. A `"structural"`-only instruction +rule is rejected by `check_x86_cg_peephole_validation.py`. `block_end` +rules may use `"structural"` and `"execution"` only. + +### 8.2 Coverage entries + +Each string must be a fully-qualified gtest name in the form +`Suite.TestName`. The checker (`check_x86_cg_peephole_validation.py`) +verifies that every coverage entry exists in the gtest binary when +`--gtest-binary` is supplied. + +--- + +## 9. Priority and conflict detection + +Rules are applied in descending priority order within each stage. The +generator normalises each rule to a canonical signature (stage, pattern +opcodes/predicates, require constraints, when conditions) and checks for +duplicate `(signature, priority)` pairs. If two rules produce the same +signature with the same priority the generator exits with a non-zero +status and prints a conflict report. + +Rules with the same priority but different normalised signatures are +legal and both are emitted into the dispatch function in the order they +appear in the `rules` array. + +--- + +## 10. DSL limits + +The following are intentionally outside the scope of the current DSL. +They require either a new DSL feature or a `custom` action with a C++ +helper. + +- **No operand creation.** Actions may only erase instructions or mutate + existing immediate values. Creating new instructions or new operands is + not supported. +- **No cross-block patterns.** All pattern entries must match within a + single basic block. `block_end` rules are a special case that look only + at the last instruction of a block and may inspect the successor list + via `target_is_next_block`. +- **No register-class or liveness reasoning.** The DSL has no access to + register class information or live-range data. Patterns that are only + safe when a specific register is dead (e.g., flag-liveness after AND or + ADD with an identity immediate) are not expressible and must be + implemented as hand-coded passes. +- **No arithmetic on captures.** The `set_imm` action copies a captured + value verbatim. Transformations such as inverting a condition code + (`cc ^ 1`) cannot be expressed; they require `custom`. +- **Single-opcode window.** Pattern entries are matched strictly in + sequential order with no gaps. Patterns that need to skip intervening + instructions are not supported. + +--- + +## 11. Examples + +### Instruction stage — remove redundant consecutive TEST64rr + +```json +{ + "name": "remove-redundant-test64rr", + "stage": "instruction", + "priority": 105, + "pattern": [ + { + "bind": "test1", + "opcode": "TEST64rr", + "capture": [ + { "name": "test1_op0", "operand": 0, "field": "reg" }, + { "name": "test1_op1", "operand": 1, "field": "reg" } + ] + }, + { + "bind": "test2", + "opcode": "TEST64rr", + "require": [ + { "operand": 0, "field": "reg", "equals_capture": "test1_op0" }, + { "operand": 1, "field": "reg", "equals_capture": "test1_op1" } + ] + } + ], + "action": { "erase": [ "test1" ] }, + "validation": { + "modes": [ "structural", "execution" ], + "coverage": [ + "X86CgPeephole.RemovesRedundantTest64rr", + "X86CgPeephole.KeepsNonRedundantTest64rr", + "X86CgPeephole.ExecutionHarnessRemoveRedundantTest64rr" + ] + } +} +``` + +Safety note: `TEST64rr` does not modify any register value; it only sets +flags. Two consecutive identical TEST instructions produce identical flag +state. Removing the first leaves the second to set the same flags, so the +transformation is correct without any liveness information. + +### Block-end stage — remove fallthrough unconditional jump + +```json +{ + "name": "remove-fallthrough-jump", + "stage": "block_end", + "priority": 100, + "pattern": [ + { + "bind": "jmp", + "predicate": "isUnconditionalBranch", + "require": [ + { "operand": 0, "field": "is_mbb", "equals_bool": true } + ] + } + ], + "when": [ + { "kind": "target_is_next_block", "inst": "jmp", "operand": 0 } + ], + "action": { "erase": [ "jmp" ] }, + "validation": { + "modes": [ "structural", "execution" ], + "coverage": [ + "X86CgPeephole.RemovesFallthroughJump", + "X86CgPeephole.ExecutionHarnessRemoveFallthroughJump" + ] + } +} +``` diff --git a/src/compiler/target/x86/x86_cg_peephole_rules.json b/src/compiler/target/x86/x86_cg_peephole_rules.json new file mode 100644 index 000000000..a7d332108 --- /dev/null +++ b/src/compiler/target/x86/x86_cg_peephole_rules.json @@ -0,0 +1,690 @@ +{ + "version": 1, + "rules": [ + { + "name": "remove-self-move", + "stage": "instruction", + "priority": 120, + "pattern": [ + { + "bind": "mov", + "opcode_any": [ + "MOV8rr", + "MOV16rr", + "MOV64rr" + ], + "capture": [ + { + "name": "mov_dst", + "operand": 0, + "field": "reg" + } + ], + "require": [ + { + "operand": 1, + "field": "reg", + "equals_capture": "mov_dst" + } + ] + } + ], + "action": { + "erase": [ + "mov" + ] + }, + "validation": { + "modes": [ + "structural", + "execution" + ], + "coverage": [ + "X86CgPeephole.RemovesSelfMove64", + "X86CgPeephole.KeepsSelfMove32", + "X86CgPeephole.ExecutionHarnessRemoveSelfMove", + "X86CgPeephole.ExecutionHarnessSelfMove32ChangesUpperBits" + ] + } + }, + { + "name": "remove-zero-shift", + "stage": "instruction", + "priority": 115, + "pattern": [ + { + "bind": "shift", + "opcode_any": [ + "SHL8ri", + "SHL16ri", + "SHL64ri", + "SHR8ri", + "SHR16ri", + "SHR64ri", + "SAR8ri", + "SAR16ri", + "SAR64ri" + ], + "require": [ + { + "operand": -1, + "field": "imm", + "equals_int": 0 + } + ] + } + ], + "action": { + "erase": [ + "shift" + ] + }, + "validation": { + "modes": [ + "structural", + "execution" + ], + "coverage": [ + "X86CgPeephole.RemovesZeroShift64", + "X86CgPeephole.KeepsZeroShift32", + "X86CgPeephole.ExecutionHarnessRemoveZeroShift" + ] + } + }, + { + "name": "fold-setcc-test-jne-to-jcc", + "stage": "instruction", + "priority": 100, + "pattern": [ + { + "bind": "cmp", + "predicate": "isCompare" + }, + { + "bind": "setcc", + "opcode": "SETCCr", + "capture": [ + { + "name": "setcc_dst", + "operand": 0, + "field": "reg", + "require_single_use": true + }, + { + "name": "setcc_cc", + "operand": 1, + "field": "imm" + } + ] + }, + { + "bind": "test", + "opcode": "TEST8rr", + "require": [ + { + "operand": 0, + "field": "reg", + "equals_capture": "setcc_dst" + }, + { + "operand": 1, + "field": "reg", + "equals_capture": "setcc_dst" + } + ] + }, + { + "bind": "jcc", + "opcode": "JCC_1", + "require": [ + { + "operand": 1, + "field": "imm", + "equals_enum": "COND_NE" + } + ] + } + ], + "action": { + "erase": [ + "setcc", + "test" + ], + "set_imm": [ + { + "inst": "jcc", + "operand": 1, + "from_capture": "setcc_cc" + } + ] + }, + "validation": { + "modes": [ + "structural", + "semantics_model", + "execution" + ], + "coverage": [ + "X86CgPeephole.FoldsSetccTestJneChain", + "X86CgPeephole.FuzzFoldSetccTestJneToJccSemantics", + "X86CgPeephole.ExecutionHarnessFoldSetccTestJneToJcc" + ] + } + }, + { + "name": "remove-fallthrough-jcc", + "stage": "block_end", + "priority": 110, + "pattern": [ + { + "bind": "jcc", + "predicate": "isConditionalBranch", + "require": [ + { + "operand": 0, + "field": "is_mbb", + "equals_bool": true + } + ] + } + ], + "when": [ + { + "kind": "target_is_next_block", + "inst": "jcc", + "operand": 0 + } + ], + "action": { + "erase": [ + "jcc" + ] + }, + "validation": { + "modes": [ + "structural", + "execution" + ], + "coverage": [ + "X86CgPeephole.RemovesFallthroughConditionalJump", + "X86CgPeephole.ExecutionHarnessRemoveFallthroughConditionalJump" + ] + } + }, + { + "name": "remove-redundant-test64rr", + "stage": "instruction", + "priority": 105, + "pattern": [ + { + "bind": "test1", + "opcode": "TEST64rr", + "capture": [ + { + "name": "test1_op0", + "operand": 0, + "field": "reg" + }, + { + "name": "test1_op1", + "operand": 1, + "field": "reg" + } + ] + }, + { + "bind": "test2", + "opcode": "TEST64rr", + "require": [ + { + "operand": 0, + "field": "reg", + "equals_capture": "test1_op0" + }, + { + "operand": 1, + "field": "reg", + "equals_capture": "test1_op1" + } + ] + } + ], + "action": { + "erase": [ + "test1" + ] + }, + "validation": { + "modes": [ + "structural", + "execution" + ], + "coverage": [ + "X86CgPeephole.RemovesRedundantTest64rr", + "X86CgPeephole.KeepsNonRedundantTest64rr", + "X86CgPeephole.ExecutionHarnessRemoveRedundantTest64rr" + ] + } + }, + { + "name": "remove-redundant-test32rr", + "stage": "instruction", + "priority": 105, + "pattern": [ + { + "bind": "test1", + "opcode": "TEST32rr", + "capture": [ + { + "name": "test32_op0", + "operand": 0, + "field": "reg" + }, + { + "name": "test32_op1", + "operand": 1, + "field": "reg" + } + ] + }, + { + "bind": "test2", + "opcode": "TEST32rr", + "require": [ + { + "operand": 0, + "field": "reg", + "equals_capture": "test32_op0" + }, + { + "operand": 1, + "field": "reg", + "equals_capture": "test32_op1" + } + ] + } + ], + "action": { + "erase": [ + "test1" + ] + }, + "validation": { + "modes": [ + "structural", + "execution" + ], + "coverage": [ + "X86CgPeephole.RemovesRedundantTest32rr", + "X86CgPeephole.KeepsNonRedundantTest32rr", + "X86CgPeephole.ExecutionHarnessRemoveRedundantTest32rr" + ] + } + }, + { + "name": "remove-redundant-test8rr", + "stage": "instruction", + "priority": 105, + "pattern": [ + { + "bind": "test1", + "opcode": "TEST8rr", + "capture": [ + { + "name": "test8_op0", + "operand": 0, + "field": "reg" + }, + { + "name": "test8_op1", + "operand": 1, + "field": "reg" + } + ] + }, + { + "bind": "test2", + "opcode": "TEST8rr", + "require": [ + { + "operand": 0, + "field": "reg", + "equals_capture": "test8_op0" + }, + { + "operand": 1, + "field": "reg", + "equals_capture": "test8_op1" + } + ] + } + ], + "action": { + "erase": [ + "test1" + ] + }, + "validation": { + "modes": [ + "structural", + "execution" + ], + "coverage": [ + "X86CgPeephole.RemovesRedundantTest8rr", + "X86CgPeephole.KeepsNonRedundantTest8rr", + "X86CgPeephole.ExecutionHarnessRemoveRedundantTestrr" + ] + } + }, + { + "name": "remove-redundant-cmp64rr", + "stage": "instruction", + "priority": 105, + "pattern": [ + { + "bind": "cmp1", + "opcode": "CMP64rr", + "capture": [ + { + "name": "cmp64_op0", + "operand": 0, + "field": "reg" + }, + { + "name": "cmp64_op1", + "operand": 1, + "field": "reg" + } + ] + }, + { + "bind": "cmp2", + "opcode": "CMP64rr", + "require": [ + { + "operand": 0, + "field": "reg", + "equals_capture": "cmp64_op0" + }, + { + "operand": 1, + "field": "reg", + "equals_capture": "cmp64_op1" + } + ] + } + ], + "action": { + "erase": [ + "cmp1" + ] + }, + "validation": { + "modes": [ + "structural", + "execution" + ], + "coverage": [ + "X86CgPeephole.RemovesRedundantCmp64rr", + "X86CgPeephole.KeepsNonRedundantCmp64rr" + ] + } + }, + { + "name": "remove-redundant-cmp32rr", + "stage": "instruction", + "priority": 105, + "pattern": [ + { + "bind": "cmp1", + "opcode": "CMP32rr", + "capture": [ + { + "name": "cmp32_op0", + "operand": 0, + "field": "reg" + }, + { + "name": "cmp32_op1", + "operand": 1, + "field": "reg" + } + ] + }, + { + "bind": "cmp2", + "opcode": "CMP32rr", + "require": [ + { + "operand": 0, + "field": "reg", + "equals_capture": "cmp32_op0" + }, + { + "operand": 1, + "field": "reg", + "equals_capture": "cmp32_op1" + } + ] + } + ], + "action": { + "erase": [ + "cmp1" + ] + }, + "validation": { + "modes": [ + "structural", + "execution" + ], + "coverage": [ + "X86CgPeephole.RemovesRedundantCmp32rr", + "X86CgPeephole.KeepsNonRedundantCmp32rr" + ] + } + }, + { + "name": "remove-redundant-cmp8rr", + "stage": "instruction", + "priority": 105, + "pattern": [ + { + "bind": "cmp1", + "opcode": "CMP8rr", + "capture": [ + { + "name": "cmp8_op0", + "operand": 0, + "field": "reg" + }, + { + "name": "cmp8_op1", + "operand": 1, + "field": "reg" + } + ] + }, + { + "bind": "cmp2", + "opcode": "CMP8rr", + "require": [ + { + "operand": 0, + "field": "reg", + "equals_capture": "cmp8_op0" + }, + { + "operand": 1, + "field": "reg", + "equals_capture": "cmp8_op1" + } + ] + } + ], + "action": { + "erase": [ + "cmp1" + ] + }, + "validation": { + "modes": [ + "structural", + "execution" + ], + "coverage": [ + "X86CgPeephole.RemovesRedundantCmp8rr", + "X86CgPeephole.KeepsNonRedundantCmp8rr" + ] + } + }, + { + "name": "remove-redundant-cmp16rr", + "stage": "instruction", + "priority": 105, + "pattern": [ + { + "bind": "cmp1", + "opcode": "CMP16rr", + "capture": [ + { + "name": "cmp16_op0", + "operand": 0, + "field": "reg" + }, + { + "name": "cmp16_op1", + "operand": 1, + "field": "reg" + } + ] + }, + { + "bind": "cmp2", + "opcode": "CMP16rr", + "require": [ + { + "operand": 0, + "field": "reg", + "equals_capture": "cmp16_op0" + }, + { + "operand": 1, + "field": "reg", + "equals_capture": "cmp16_op1" + } + ] + } + ], + "action": { + "erase": [ + "cmp1" + ] + }, + "validation": { + "modes": [ + "structural", + "execution" + ], + "coverage": [ + "X86CgPeephole.RemovesRedundantCmp16rr", + "X86CgPeephole.KeepsNonRedundantCmp16rr" + ] + } + }, + { + "name": "remove-redundant-test16rr", + "stage": "instruction", + "priority": 105, + "pattern": [ + { + "bind": "test1", + "opcode": "TEST16rr", + "capture": [ + { + "name": "test16_op0", + "operand": 0, + "field": "reg" + }, + { + "name": "test16_op1", + "operand": 1, + "field": "reg" + } + ] + }, + { + "bind": "test2", + "opcode": "TEST16rr", + "require": [ + { + "operand": 0, + "field": "reg", + "equals_capture": "test16_op0" + }, + { + "operand": 1, + "field": "reg", + "equals_capture": "test16_op1" + } + ] + } + ], + "action": { + "erase": [ + "test1" + ] + }, + "validation": { + "modes": [ + "structural", + "execution" + ], + "coverage": [ + "X86CgPeephole.RemovesRedundantTest16rr", + "X86CgPeephole.KeepsNonRedundantTest16rr" + ] + } + }, + { + "name": "remove-fallthrough-jump", + "stage": "block_end", + "priority": 100, + "pattern": [ + { + "bind": "jmp", + "predicate": "isUnconditionalBranch", + "require": [ + { + "operand": 0, + "field": "is_mbb", + "equals_bool": true + } + ] + } + ], + "when": [ + { + "kind": "target_is_next_block", + "inst": "jmp", + "operand": 0 + } + ], + "action": { + "erase": [ + "jmp" + ] + }, + "validation": { + "modes": [ + "structural", + "execution" + ], + "coverage": [ + "X86CgPeephole.RemovesFallthroughJump", + "X86CgPeephole.ExecutionHarnessRemoveFallthroughJump" + ] + } + } + ] +} diff --git a/src/compiler/target/x86/x86lowering.cpp b/src/compiler/target/x86/x86lowering.cpp index e672eb938..c1557dd2d 100644 --- a/src/compiler/target/x86/x86lowering.cpp +++ b/src/compiler/target/x86/x86lowering.cpp @@ -1014,16 +1014,18 @@ CgRegister X86CgLowering::lowerAdcExpr(const AdcInstruction &Inst) { // Use x86 flags with direct ADC and rely on the existing carry chain. // The required invariant is that no flag-clobbering instruction is emitted // between the ADD/ADC instructions that produce and consume CF. + // + // Operand 2 is a chain link pointing to the carry-producing instruction and + // is metadata for analysis passes only. x86 lowering ignores it and relies + // on hardware CF. This is not a license to rewrite ADC into ADD: the + // carry chain is still live and must be preserved. const MInstruction *LHS = Inst.getOperand<0>(); const MInstruction *RHS = Inst.getOperand<1>(); - const MInstruction *Carry = Inst.getOperand<2>(); MVT VT = getMVT(*Inst.getType()); ZEN_ASSERT(VT.isInteger()); const TargetRegisterClass *RC = TLI.getRegClassFor(VT); - assertZeroFlagChainOperand(Carry); - CgRegister LHSReg = lowerExpr(*LHS); CgRegister RHSReg = lowerExpr(*RHS); @@ -1056,16 +1058,18 @@ CgRegister X86CgLowering::lowerSbbExpr(const SbbInstruction &Inst) { // Use x86 flags with direct SBB and rely on the existing borrow chain. // The required invariant is that no flag-clobbering instruction is emitted // between the SUB/SBB instructions that produce and consume CF. + // + // Operand 2 is a chain link pointing to the borrow-producing instruction and + // is metadata for analysis passes only. x86 lowering ignores it and relies + // on hardware CF. This is not a license to rewrite SBB into SUB: the + // borrow chain is still live and must be preserved. const MInstruction *LHS = Inst.getOperand<0>(); const MInstruction *RHS = Inst.getOperand<1>(); - const MInstruction *Borrow = Inst.getOperand<2>(); MVT VT = getMVT(*Inst.getType()); ZEN_ASSERT(VT.isInteger()); const TargetRegisterClass *RC = TLI.getRegClassFor(VT); - assertZeroFlagChainOperand(Borrow); - CgRegister LHSReg = lowerExpr(*LHS); CgRegister RHSReg = lowerExpr(*RHS); @@ -1328,6 +1332,86 @@ CgRegister X86CgLowering::lowerEvmU256MulResultExpr( return It->second[ResultIdx - 1]; } +CgRegister +X86CgLowering::lowerEvmU256AddExpr(const EvmU256AddInstruction &Inst) { + const TargetRegisterClass *RC = &X86::GR64RegClass; + + std::array L = {}; + std::array R = {}; + for (size_t I = 0; I < 4; ++I) { + L[I] = lowerExpr(*Inst.getOperand(I)); + R[I] = lowerExpr(*Inst.getOperand(4 + I)); + } + + // COPY (MOV) does not modify EFLAGS — safe between carry-chain steps + std::array Res = {}; + Res[0] = fastEmitCopy(RC, L[0]); + MF->createCgInstruction(*CurBB, TII.get(X86::ADD64rr), Res[0], R[0], Res[0]); + for (size_t I = 1; I < 4; ++I) { + Res[I] = fastEmitCopy(RC, L[I]); + MF->createCgInstruction(*CurBB, TII.get(X86::ADC64rr), Res[I], R[I], + Res[I]); + } + + U256AddResultRegs[&Inst] = {Res[1], Res[2], Res[3]}; + return Res[0]; +} + +CgRegister X86CgLowering::lowerEvmU256AddResultExpr( + const EvmU256AddResultInstruction &Inst) { + const MInstruction *AddInst = Inst.getAddInst(); + CgRegister LowReg = lowerExpr(*AddInst); + uint32_t ResultIdx = Inst.getResultIdx(); + if (ResultIdx == 0) { + return LowReg; + } + + auto It = U256AddResultRegs.find(AddInst); + ZEN_ASSERT(It != U256AddResultRegs.end()); + ZEN_ASSERT(ResultIdx <= It->second.size()); + return It->second[ResultIdx - 1]; +} + +CgRegister +X86CgLowering::lowerEvmU256SubExpr(const EvmU256SubInstruction &Inst) { + const TargetRegisterClass *RC = &X86::GR64RegClass; + + std::array L = {}; + std::array R = {}; + for (size_t I = 0; I < 4; ++I) { + L[I] = lowerExpr(*Inst.getOperand(I)); + R[I] = lowerExpr(*Inst.getOperand(4 + I)); + } + + // COPY (MOV) does not modify EFLAGS — safe between borrow-chain steps + std::array Res = {}; + Res[0] = fastEmitCopy(RC, L[0]); + MF->createCgInstruction(*CurBB, TII.get(X86::SUB64rr), Res[0], R[0], Res[0]); + for (size_t I = 1; I < 4; ++I) { + Res[I] = fastEmitCopy(RC, L[I]); + MF->createCgInstruction(*CurBB, TII.get(X86::SBB64rr), Res[I], R[I], + Res[I]); + } + + U256SubResultRegs[&Inst] = {Res[1], Res[2], Res[3]}; + return Res[0]; +} + +CgRegister X86CgLowering::lowerEvmU256SubResultExpr( + const EvmU256SubResultInstruction &Inst) { + const MInstruction *SubInst = Inst.getSubInst(); + CgRegister LowReg = lowerExpr(*SubInst); + uint32_t ResultIdx = Inst.getResultIdx(); + if (ResultIdx == 0) { + return LowReg; + } + + auto It = U256SubResultRegs.find(SubInst); + ZEN_ASSERT(It != U256SubResultRegs.end()); + ZEN_ASSERT(ResultIdx <= It->second.size()); + return It->second[ResultIdx - 1]; +} + CgRegister X86CgLowering::lowerEvmUdiv128By64Expr(const EvmUdiv128By64Instruction &Inst) { const MInstruction *Hi = Inst.getOperand<0>(); diff --git a/src/compiler/target/x86/x86lowering.h b/src/compiler/target/x86/x86lowering.h index b29bef3a8..70406620b 100644 --- a/src/compiler/target/x86/x86lowering.h +++ b/src/compiler/target/x86/x86lowering.h @@ -75,6 +75,10 @@ class X86CgLowering : public CgLowering { CgRegister lowerEvmUmul128HiExpr(const EvmUmul128HiInstruction &Inst); CgRegister lowerEvmU256MulExpr(const EvmU256MulInstruction &Inst); CgRegister lowerEvmU256MulResultExpr(const EvmU256MulResultInstruction &Inst); + CgRegister lowerEvmU256AddExpr(const EvmU256AddInstruction &Inst); + CgRegister lowerEvmU256AddResultExpr(const EvmU256AddResultInstruction &Inst); + CgRegister lowerEvmU256SubExpr(const EvmU256SubInstruction &Inst); + CgRegister lowerEvmU256SubResultExpr(const EvmU256SubResultInstruction &Inst); CgRegister lowerEvmUdiv128By64Expr(const EvmUdiv128By64Instruction &Inst); CgRegister lowerEvmUrem128By64Expr(const EvmUrem128By64Instruction &Inst); CgRegister lowerAdcExpr(const AdcInstruction &Inst); @@ -154,6 +158,10 @@ class X86CgLowering : public CgLowering { llvm::DenseSet Umul128NeedHi; llvm::DenseMap> U256MulResultRegs; + llvm::DenseMap> + U256AddResultRegs; + llvm::DenseMap> + U256SubResultRegs; llvm::DenseMap Udiv128RemRegs; }; diff --git a/src/tests/CMakeLists.txt b/src/tests/CMakeLists.txt index 5d28aa60d..f4e1f5cdd 100644 --- a/src/tests/CMakeLists.txt +++ b/src/tests/CMakeLists.txt @@ -53,6 +53,10 @@ if(ZEN_ENABLE_SPEC_TEST) add_executable(mempoolTests mempool_tests.cpp) add_executable(cAPITests c_api_tests.cpp) + if(ZEN_ENABLE_MULTIPASS_JIT) + add_executable(x86CgPeepholeTests x86_cg_peephole_tests.cpp) + add_executable(dmirValidationTests dmir_validation_tests.cpp) + endif() target_include_directories( mempoolTests PRIVATE ${googletest_SOURCE_DIR}/googletest/include ) @@ -116,6 +120,13 @@ if(ZEN_ENABLE_SPEC_TEST) PRIVATE dtvmcore gtest_main -fsanitize=address PUBLIC ${GTEST_BOTH_LIBRARIES} ) + if(ZEN_ENABLE_MULTIPASS_JIT) + target_link_libraries( + x86CgPeepholeTests + PRIVATE compiler dtvmcore gtest_main -fsanitize=address + PUBLIC ${GTEST_BOTH_LIBRARIES} + ) + endif() if(ZEN_ENABLE_EVM) target_link_libraries( @@ -162,6 +173,20 @@ if(ZEN_ENABLE_SPEC_TEST) PRIVATE dtvmcore gtest_main -fsanitize=address -static-libasan PUBLIC ${GTEST_BOTH_LIBRARIES} ) + if(ZEN_ENABLE_MULTIPASS_JIT) + target_link_libraries( + x86CgPeepholeTests + PRIVATE compiler dtvmcore gtest_main -fsanitize=address + -static-libasan + PUBLIC ${GTEST_BOTH_LIBRARIES} + ) + target_link_libraries( + dmirValidationTests + PRIVATE compiler dtvmcore gtest_main -fsanitize=address + -static-libasan + PUBLIC ${GTEST_BOTH_LIBRARIES} + ) + endif() if(ZEN_ENABLE_EVM) target_link_libraries( @@ -227,6 +252,18 @@ if(ZEN_ENABLE_SPEC_TEST) PRIVATE dtvmcore gtest_main PUBLIC ${GTEST_BOTH_LIBRARIES} ) + if(ZEN_ENABLE_MULTIPASS_JIT) + target_link_libraries( + x86CgPeepholeTests + PRIVATE compiler dtvmcore gtest_main + PUBLIC ${GTEST_BOTH_LIBRARIES} + ) + target_link_libraries( + dmirValidationTests + PRIVATE compiler dtvmcore gtest_main + PUBLIC ${GTEST_BOTH_LIBRARIES} + ) + endif() if(ZEN_ENABLE_EVM) target_link_libraries( @@ -273,6 +310,89 @@ if(ZEN_ENABLE_SPEC_TEST) endif() add_test(NAME mempoolTests COMMAND mempoolTests) add_test(NAME cAPITests COMMAND cAPITests) + if(ZEN_ENABLE_MULTIPASS_JIT) + add_test(NAME x86CgPeepholeTests COMMAND x86CgPeepholeTests) + add_test(NAME dmirValidationTests COMMAND dmirValidationTests) + add_test( + NAME x86CgPeepholeRuleGen + COMMAND + ${Python3_EXECUTABLE} + ${CMAKE_SOURCE_DIR}/tools/test_x86_cg_peephole_generator.py + ${CMAKE_SOURCE_DIR} + ) + add_test( + NAME x86CgPeepholeValidationMeta + COMMAND + ${Python3_EXECUTABLE} + ${CMAKE_SOURCE_DIR}/tools/test_x86_cg_peephole_validation.py + ${CMAKE_SOURCE_DIR} $ + ) + add_test( + NAME x86CgPeepholeValidationReport + COMMAND + ${Python3_EXECUTABLE} + ${CMAKE_SOURCE_DIR}/tools/test_report_x86_cg_peephole_validation.py + ${CMAKE_SOURCE_DIR} $ + ) + add_test( + NAME dmirRewriteRuleMeta + COMMAND + ${Python3_EXECUTABLE} + ${CMAKE_SOURCE_DIR}/tools/test_check_dmir_rewrite_rules.py + ${CMAKE_SOURCE_DIR} $ + ) + add_test( + NAME dmirRewriteRuleReport + COMMAND + ${Python3_EXECUTABLE} + ${CMAKE_SOURCE_DIR}/tools/test_report_dmir_rewrite_rules.py + ${CMAKE_SOURCE_DIR} $ + ) + add_test( + NAME dmirSeedRuleMiner + COMMAND + ${Python3_EXECUTABLE} + ${CMAKE_SOURCE_DIR}/tools/test_mine_dmir_seed_rules.py + ${CMAKE_SOURCE_DIR} + ) + add_test( + NAME dmirBootstrapMinerConfig + COMMAND + ${Python3_EXECUTABLE} + ${CMAKE_SOURCE_DIR}/tools/test_mine_dmir_bootstrap_config.py + ${CMAKE_SOURCE_DIR} + ) + add_test( + NAME dmirNovelRuleMiner + COMMAND + ${Python3_EXECUTABLE} + ${CMAKE_SOURCE_DIR}/tools/test_mine_dmir_novel_rules.py + ${CMAKE_SOURCE_DIR} + ) + if(ZEN_ENABLE_EVM) + add_test( + NAME compilerPassTimingTool + COMMAND + ${Python3_EXECUTABLE} + ${CMAKE_SOURCE_DIR}/tools/test_collect_compiler_pass_timings.py + ${CMAKE_SOURCE_DIR} $ + ) + add_test( + NAME compilerPassTimingBudgetTool + COMMAND + ${Python3_EXECUTABLE} + ${CMAKE_SOURCE_DIR}/tools/test_check_compiler_pass_timing_budget.py + ${CMAKE_SOURCE_DIR} + ) + add_test( + NAME compilerPassTimingBudgetRefreshTool + COMMAND + ${Python3_EXECUTABLE} + ${CMAKE_SOURCE_DIR}/tools/test_update_compiler_pass_timing_budget.py + ${CMAKE_SOURCE_DIR} + ) + endif() + endif() if(ZEN_ENABLE_EVM) add_test(NAME evmInterpTests COMMAND evmInterpTests) diff --git a/src/tests/dmir_validation_tests.cpp b/src/tests/dmir_validation_tests.cpp new file mode 100644 index 000000000..521ad4582 --- /dev/null +++ b/src/tests/dmir_validation_tests.cpp @@ -0,0 +1,2349 @@ +// Copyright (C) 2025 the DTVM authors. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +#include "compiler/context.h" +#include "compiler/mir/constants.h" +#include "compiler/mir/function.h" +#include "compiler/mir/instructions.h" +#include "compiler/mir/pass/dmir_rewrite.h" +#include "compiler/mir/pointer.h" +#include "intx/intx.hpp" + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace { + +using namespace COMPILER; +using namespace llvm; + +MFunctionType *createVoidFunctionType(CompileContext &Context) { + return MFunctionType::create(Context, Context.VoidType, {}); +} + +class DMirTestBuilder { +public: + DMirTestBuilder() : Func(Context, 0) { + Context.initialize(); + Func.setFunctionType(createVoidFunctionType(Context)); + BB = Func.createBasicBlock(); + Func.appendBlock(BB); + I64PtrType = MPointerType::create(Context, Context.I64Type); + } + + ConstantInstruction *createConstI8(uint64_t Value) { + return createConst(Context.I8Type, Value); + } + + ConstantInstruction *createConstI32(uint64_t Value) { + return createConst(Context.I32Type, Value); + } + + ConstantInstruction *createConstI64(uint64_t Value) { + return createConst(Context.I64Type, Value); + } + + template T *createExpr(Arguments &&...Args) { + return Func.createInstruction(false, *BB, + std::forward(Args)...); + } + + template T *createStmt(Arguments &&...Args) { + return Func.createInstruction(true, *BB, + std::forward(Args)...); + } + + Variable *createVariable(MType *Type) { return Func.createVariable(Type); } + + MBasicBlock &getBlock() { return *BB; } + + CompileContext Context; + MFunction Func; + MPointerType *I64PtrType = nullptr; + +private: + ConstantInstruction *createConst(MType &Type, uint64_t Value) { + return createExpr( + &Type, *MConstantInt::get(Context, Type, Value)); + } + + MBasicBlock *BB = nullptr; +}; + +class DMirFragmentInterpreter { +public: + void setVariableValue(VariableIdx VarIdx, const APInt &Value) { + Variables[VarIdx] = Value; + } + + APInt evaluate(const MInstruction *Inst) { + switch (Inst->getOpcode()) { + case OP_const: + return evaluateConstant(cast(Inst)); + case OP_dread: + return evaluateDread(cast(Inst)); + case OP_not: + return ~evaluate(Inst->getOperand<0>()); + case OP_clz: + return createScalarResult( + *Inst->getType(), + evaluate(Inst->getOperand<0>()).countLeadingZeros()); + case OP_ctz: + return createScalarResult( + *Inst->getType(), + evaluate(Inst->getOperand<0>()).countTrailingZeros()); + case OP_popcnt: + return createScalarResult( + *Inst->getType(), evaluate(Inst->getOperand<0>()).countPopulation()); + case OP_bswap: + return evaluate(Inst->getOperand<0>()).byteSwap(); + case OP_add: + return evaluate(Inst->getOperand<0>()) + evaluate(Inst->getOperand<1>()); + case OP_sub: + return evaluate(Inst->getOperand<0>()) - evaluate(Inst->getOperand<1>()); + case OP_mul: + return evaluate(Inst->getOperand<0>()) * evaluate(Inst->getOperand<1>()); + case OP_sdiv: + return evaluateDiv(Inst, true, false); + case OP_udiv: + return evaluateDiv(Inst, false, false); + case OP_srem: + return evaluateDiv(Inst, true, true); + case OP_urem: + return evaluateDiv(Inst, false, true); + case OP_and: + return evaluate(Inst->getOperand<0>()) & evaluate(Inst->getOperand<1>()); + case OP_or: + return evaluate(Inst->getOperand<0>()) | evaluate(Inst->getOperand<1>()); + case OP_xor: + return evaluate(Inst->getOperand<0>()) ^ evaluate(Inst->getOperand<1>()); + case OP_shl: + return evaluateShift(Inst, ShiftKind::Left); + case OP_sshr: + return evaluateShift(Inst, ShiftKind::ArithmeticRight); + case OP_ushr: + return evaluateShift(Inst, ShiftKind::LogicalRight); + case OP_rotl: + return evaluateRotate(Inst, true); + case OP_rotr: + return evaluateRotate(Inst, false); + case OP_trunc: + return evaluate(Inst->getOperand<0>()) + .trunc(getBitWidth(*Inst->getType())); + case OP_sext: + return evaluate(Inst->getOperand<0>()) + .sext(getBitWidth(*Inst->getType())); + case OP_uext: + return evaluate(Inst->getOperand<0>()) + .zext(getBitWidth(*Inst->getType())); + case OP_inttoptr: + case OP_ptrtoint: + case OP_bitcast: + return evaluate(Inst->getOperand<0>()) + .zextOrTrunc(getBitWidth(*Inst->getType())); + case OP_cmp: + return evaluateCmp(cast(Inst)); + case OP_select: + return evaluateSelect(cast(Inst)); + case OP_adc: + return evaluateAdc(cast(Inst)); + case OP_sbb: + return evaluateSbb(cast(Inst)); + case OP_evm_umul128_lo: + return createScalarResult( + *Inst->getType(), + evaluateUmul128(cast(Inst)).first); + case OP_evm_umul128_hi: + return createScalarResult( + *Inst->getType(), + evaluateUmul128Hi(cast(Inst))); + case OP_evm_udiv128_by64: + return createScalarResult( + *Inst->getType(), + evaluateUdiv128By64(cast(Inst)).first); + case OP_evm_urem128_by64: + return createScalarResult( + *Inst->getType(), + evaluateUrem128By64(cast(Inst))); + default: + throw std::runtime_error("unsupported dMIR opcode: " + + getOpcodeString(Inst->getOpcode())); + } + } + + std::optional execute(MBasicBlock &BB) { + for (auto *Inst : BB) { + switch (Inst->getOpcode()) { + case OP_dassign: { + auto *Dassign = cast(Inst); + Variables[Dassign->getVarIdx()] = evaluate(Dassign->getOperand<0>()); + break; + } + case OP_return: + if (Inst->getType()->isVoid()) { + return std::nullopt; + } + return evaluate(Inst->getOperand<0>()); + default: + throw std::runtime_error("unsupported dMIR statement: " + + getOpcodeString(Inst->getOpcode())); + } + } + return std::nullopt; + } + +private: + enum class ShiftKind : uint8_t { + Left, + ArithmeticRight, + LogicalRight, + }; + + static unsigned getBitWidth(const MType &Type) { + if (Type.isInteger()) { + return Type.getBitWidth(); + } + if (Type.isPointer()) { + return Type.getNumBytes() * 8; + } + throw std::runtime_error("unsupported dMIR value type"); + } + + static APInt createScalarResult(const MType &Type, uint64_t Value) { + return APInt(getBitWidth(Type), Value, Type.isInteger() && Type.isSigned()); + } + + APInt evaluateConstant(const ConstantInstruction *Inst) { + const auto &Constant = Inst->getConstant(); + if (!Constant.getType().isInteger()) { + throw std::runtime_error("unsupported non-integer dMIR constant"); + } + return cast(&Constant)->getValue(); + } + + APInt evaluateDread(const DreadInstruction *Inst) { + auto It = Variables.find(Inst->getVarIdx()); + if (It == Variables.end()) { + throw std::runtime_error("dMIR variable was read before assignment"); + } + return It->second; + } + + APInt evaluateDiv(const MInstruction *Inst, bool Signed, bool Remainder) { + APInt Lhs = evaluate(Inst->getOperand<0>()); + APInt Rhs = evaluate(Inst->getOperand<1>()); + if (Rhs.isZero()) { + throw std::runtime_error("division by zero in dMIR fragment"); + } + if (Signed) { + return Remainder ? Lhs.srem(Rhs) : Lhs.sdiv(Rhs); + } + return Remainder ? Lhs.urem(Rhs) : Lhs.udiv(Rhs); + } + + APInt evaluateShift(const MInstruction *Inst, ShiftKind Kind) { + APInt Value = evaluate(Inst->getOperand<0>()); + const unsigned BitWidth = Value.getBitWidth(); + const uint64_t Amount = evaluate(Inst->getOperand<1>()).getLimitedValue(); + if (Amount >= BitWidth) { + if (Kind == ShiftKind::ArithmeticRight && Value.isNegative()) { + return APInt::getAllOnes(BitWidth); + } + return APInt::getZero(BitWidth); + } + switch (Kind) { + case ShiftKind::Left: + return Value.shl(Amount); + case ShiftKind::ArithmeticRight: + return Value.ashr(Amount); + case ShiftKind::LogicalRight: + return Value.lshr(Amount); + } + llvm_unreachable("unknown shift kind"); + } + + APInt evaluateRotate(const MInstruction *Inst, bool Left) { + APInt Value = evaluate(Inst->getOperand<0>()); + const unsigned BitWidth = Value.getBitWidth(); + const uint64_t Amount = evaluate(Inst->getOperand<1>()).getLimitedValue(); + const unsigned EffectiveAmount = + BitWidth == 0 ? 0 : static_cast(Amount % BitWidth); + return Left ? Value.rotl(EffectiveAmount) : Value.rotr(EffectiveAmount); + } + + APInt evaluateCmp(const CmpInstruction *Inst) { + APInt Lhs = evaluate(Inst->getOperand<0>()); + APInt Rhs = evaluate(Inst->getOperand<1>()); + bool Result = false; + switch (Inst->getPredicate()) { + case CmpInstruction::ICMP_EQ: + Result = Lhs == Rhs; + break; + case CmpInstruction::ICMP_NE: + Result = Lhs != Rhs; + break; + case CmpInstruction::ICMP_UGT: + Result = Lhs.ugt(Rhs); + break; + case CmpInstruction::ICMP_UGE: + Result = Lhs.uge(Rhs); + break; + case CmpInstruction::ICMP_ULT: + Result = Lhs.ult(Rhs); + break; + case CmpInstruction::ICMP_ULE: + Result = Lhs.ule(Rhs); + break; + case CmpInstruction::ICMP_SGT: + Result = Lhs.sgt(Rhs); + break; + case CmpInstruction::ICMP_SGE: + Result = Lhs.sge(Rhs); + break; + case CmpInstruction::ICMP_SLT: + Result = Lhs.slt(Rhs); + break; + case CmpInstruction::ICMP_SLE: + Result = Lhs.sle(Rhs); + break; + default: + throw std::runtime_error("unsupported dMIR predicate"); + } + return createScalarResult(*Inst->getType(), Result ? 1 : 0); + } + + APInt evaluateSelect(const SelectInstruction *Inst) { + APInt Cond = evaluate(Inst->getOperand<0>()); + return evaluate(Cond.isZero() ? Inst->getOperand<2>() + : Inst->getOperand<1>()); + } + + APInt evaluateAdc(const AdcInstruction *Inst) { + APInt Lhs = evaluate(Inst->getOperand<0>()); + APInt Rhs = evaluate(Inst->getOperand<1>()); + APInt Carry = + evaluate(Inst->getOperand<2>()).zextOrTrunc(Lhs.getBitWidth()); + return Lhs + Rhs + Carry; + } + + APInt evaluateSbb(const SbbInstruction *Inst) { + APInt Lhs = evaluate(Inst->getOperand<0>()); + APInt Rhs = evaluate(Inst->getOperand<1>()); + APInt Borrow = + evaluate(Inst->getOperand<2>()).zextOrTrunc(Lhs.getBitWidth()); + return Lhs - Rhs - Borrow; + } + + std::pair + evaluateUmul128(const EvmUmul128Instruction *Inst) { + const uint64_t Lhs = evaluateUnsigned64(Inst->getOperand<0>()); + const uint64_t Rhs = evaluateUnsigned64(Inst->getOperand<1>()); + const unsigned __int128 Product = static_cast(Lhs) * + static_cast(Rhs); + return {static_cast(Product), + static_cast(Product >> 64)}; + } + + uint64_t evaluateUmul128Hi(const EvmUmul128HiInstruction *Inst) { + return evaluateUmul128(cast(Inst->getOperand<0>())) + .second; + } + + std::pair + evaluateUdiv128By64(const EvmUdiv128By64Instruction *Inst) { + const uint64_t Hi = evaluateUnsigned64(Inst->getOperand<0>()); + const uint64_t Lo = evaluateUnsigned64(Inst->getOperand<1>()); + const uint64_t Divisor = evaluateUnsigned64(Inst->getOperand<2>()); + if (Divisor == 0) { + throw std::runtime_error("128/64 division by zero in dMIR fragment"); + } + const unsigned __int128 Dividend = + (static_cast(Hi) << 64) | Lo; + return {static_cast(Dividend / Divisor), + static_cast(Dividend % Divisor)}; + } + + uint64_t evaluateUrem128By64(const EvmUrem128By64Instruction *Inst) { + return evaluateUdiv128By64( + cast(Inst->getOperand<0>())) + .second; + } + + uint64_t evaluateUnsigned64(const MInstruction *Inst) { + return evaluate(Inst).zextOrTrunc(64).getZExtValue(); + } + + std::unordered_map Variables; +}; + +intx::uint256 composeU256(const std::array &Limbs) { + intx::uint256 Value = Limbs[0]; + Value |= intx::uint256(Limbs[1]) << 64; + Value |= intx::uint256(Limbs[2]) << 128; + Value |= intx::uint256(Limbs[3]) << 192; + return Value; +} + +struct BinaryInputCase { + uint64_t Lhs = 0; + uint64_t Rhs = 0; +}; + +struct TernaryInputCase { + uint64_t First = 0; + uint64_t Second = 0; + uint64_t Third = 0; +}; + +const std::array &getBoundaryU64Values() { + static const std::array Values = { + 0ULL, + 1ULL, + 2ULL, + 3ULL, + 0x7fffffffULL, + 0x80000000ULL, + 0xffffffffULL, + 0x100000000ULL, + 0x7fffffffffffffffULL, + 0x8000000000000000ULL, + 0xfffffffffffffffeULL, + 0xffffffffffffffffULL, + }; + return Values; +} + +const std::vector &getInterestingU64Values() { + static const std::vector Values = []() { + std::vector Result = { + 0ULL, + 1ULL, + 2ULL, + 3ULL, + 7ULL, + 8ULL, + 15ULL, + 16ULL, + 31ULL, + 32ULL, + 63ULL, + 64ULL, + 65ULL, + 127ULL, + 128ULL, + 255ULL, + 256ULL, + 0xaaaaaaaaaaaaaaaaULL, + 0x5555555555555555ULL, + 0x8000000000000000ULL, + 0x7fffffffffffffffULL, + 0xfffffffffffffffeULL, + 0xffffffffffffffffULL, + }; + + std::mt19937_64 Rng(0x44d7a5f3e219c8b1ULL); + for (size_t I = 0; I < 8; ++I) { + Result.push_back(Rng()); + } + return Result; + }(); + return Values; +} + +std::vector getInterestingBinaryInputCases() { + std::vector Cases; + for (uint64_t Lhs : getBoundaryU64Values()) { + for (uint64_t Rhs : getBoundaryU64Values()) { + Cases.push_back({Lhs, Rhs}); + } + } + + std::mt19937_64 Rng(0x93ad71b6ce204f55ULL); + for (size_t I = 0; I < 96; ++I) { + Cases.push_back({Rng(), Rng()}); + } + return Cases; +} + +std::vector getInterestingTernaryInputCases() { + std::vector Cases; + for (uint64_t First : getBoundaryU64Values()) { + for (uint64_t Second : getBoundaryU64Values()) { + for (uint64_t Third : getBoundaryU64Values()) { + Cases.push_back({First, Second, Third}); + } + } + } + + std::mt19937_64 Rng(0x7bf8c9ae1304d261ULL); + for (size_t I = 0; I < 128; ++I) { + Cases.push_back({Rng(), Rng(), Rng()}); + } + return Cases; +} + +void expectI64Equivalent(const APInt &Original, const APInt &Rewritten, + const std::string &Context) { + ASSERT_EQ(Original.getBitWidth(), 64U) << Context; + ASSERT_EQ(Rewritten.getBitWidth(), 64U) << Context; + EXPECT_TRUE(Original == Rewritten) + << Context << " original=" << Original.getZExtValue() + << " rewritten=" << Rewritten.getZExtValue(); +} + +template +void expectUnaryI64RewriteEquivalent(const std::vector &Values, + OriginalBuilder &&BuildOriginal, + RewrittenBuilder &&BuildRewritten) { + for (uint64_t Value : Values) { + DMirTestBuilder Builder; + Variable *InputVar = Builder.createVariable(&Builder.Context.I64Type); + auto *Input = Builder.createExpr(&Builder.Context.I64Type, + InputVar->getVarIdx()); + auto *Original = BuildOriginal(Builder, Input); + auto *Rewritten = BuildRewritten(Builder, Input); + + DMirFragmentInterpreter Interpreter; + Interpreter.setVariableValue(InputVar->getVarIdx(), APInt(64, Value)); + expectI64Equivalent(Interpreter.evaluate(Original), + Interpreter.evaluate(Rewritten), + "value=" + std::to_string(Value)); + } +} + +template +void expectBinaryI64RewriteEquivalent(const std::vector &Cases, + OriginalBuilder &&BuildOriginal, + RewrittenBuilder &&BuildRewritten) { + for (const auto &InputCase : Cases) { + DMirTestBuilder Builder; + Variable *LhsVar = Builder.createVariable(&Builder.Context.I64Type); + Variable *RhsVar = Builder.createVariable(&Builder.Context.I64Type); + auto *Lhs = Builder.createExpr(&Builder.Context.I64Type, + LhsVar->getVarIdx()); + auto *Rhs = Builder.createExpr(&Builder.Context.I64Type, + RhsVar->getVarIdx()); + auto *Original = BuildOriginal(Builder, Lhs, Rhs); + auto *Rewritten = BuildRewritten(Builder, Lhs, Rhs); + + DMirFragmentInterpreter Interpreter; + Interpreter.setVariableValue(LhsVar->getVarIdx(), APInt(64, InputCase.Lhs)); + Interpreter.setVariableValue(RhsVar->getVarIdx(), APInt(64, InputCase.Rhs)); + expectI64Equivalent(Interpreter.evaluate(Original), + Interpreter.evaluate(Rewritten), + "lhs=" + std::to_string(InputCase.Lhs) + + " rhs=" + std::to_string(InputCase.Rhs)); + } +} + +bool runDMirRewritePass(DMirTestBuilder &Builder) { + DMirRewritePass RewritePass; + return RewritePass.runOnMFunction(Builder.Func); +} + +MInstruction *rewriteReturnedValue(DMirTestBuilder &Builder, + MInstruction *ReturnedValue) { + auto *Return = Builder.createStmt(ReturnedValue->getType(), + ReturnedValue); + runDMirRewritePass(Builder); + return Return->getOperand<0>(); +} + +void expectBinaryOperandsMatch(MInstruction *Inst, Opcode Opc, MInstruction *A, + MInstruction *B) { + ASSERT_EQ(Inst->getOpcode(), Opc); + auto *Binary = llvm::cast(Inst); + const bool Matches = + (Binary->getOperand<0>() == A && Binary->getOperand<1>() == B) || + (Binary->getOperand<0>() == B && Binary->getOperand<1>() == A); + EXPECT_TRUE(Matches); +} + +TEST(DMirValidation, EvaluatesIntegerExpressionDag) { + DMirTestBuilder Builder; + auto *Value = Builder.createConstI64(0x0f0f0f0f0f0f0f0fULL); + auto *Mask = Builder.createConstI64(0xf0f0f0f0f0f0f0f0ULL); + auto *Xor = Builder.createExpr( + OP_xor, &Builder.Context.I64Type, Value, Mask); + auto *Shift = Builder.createExpr( + OP_ushr, &Builder.Context.I64Type, Xor, Builder.createConstI64(4)); + auto *Rot = Builder.createExpr( + OP_rotl, &Builder.Context.I64Type, Shift, Builder.createConstI64(8)); + auto *Popcnt = Builder.createExpr( + OP_popcnt, &Builder.Context.I64Type, Rot); + + DMirFragmentInterpreter Interpreter; + const APInt Result = Interpreter.evaluate(Popcnt); + EXPECT_EQ(Result.getZExtValue(), 60ULL); +} + +TEST(DMirValidation, FuzzesAddZeroRewrite) { + expectUnaryI64RewriteEquivalent( + getInterestingU64Values(), + [](DMirTestBuilder &Builder, MInstruction *Input) { + return Builder.createExpr( + OP_add, &Builder.Context.I64Type, Input, Builder.createConstI64(0)); + }, + [](DMirTestBuilder &, MInstruction *Input) { return Input; }); +} + +TEST(DMirValidation, FuzzesSubZeroRewrite) { + expectUnaryI64RewriteEquivalent( + getInterestingU64Values(), + [](DMirTestBuilder &Builder, MInstruction *Input) { + return Builder.createExpr( + OP_sub, &Builder.Context.I64Type, Input, Builder.createConstI64(0)); + }, + [](DMirTestBuilder &, MInstruction *Input) { return Input; }); +} + +TEST(DMirValidation, FuzzesAndZeroRewrite) { + expectUnaryI64RewriteEquivalent( + getInterestingU64Values(), + [](DMirTestBuilder &Builder, MInstruction *Input) { + return Builder.createExpr( + OP_and, &Builder.Context.I64Type, Input, Builder.createConstI64(0)); + }, + [](DMirTestBuilder &Builder, MInstruction *) { + return Builder.createConstI64(0); + }); +} + +TEST(DMirValidation, FuzzesAndAllOnesRewrite) { + expectUnaryI64RewriteEquivalent( + getInterestingU64Values(), + [](DMirTestBuilder &Builder, MInstruction *Input) { + return Builder.createExpr( + OP_and, &Builder.Context.I64Type, Input, + Builder.createConstI64(0xffffffffffffffffULL)); + }, + [](DMirTestBuilder &, MInstruction *Input) { return Input; }); +} + +TEST(DMirValidation, FuzzesAndSelfRewrite) { + expectUnaryI64RewriteEquivalent( + getInterestingU64Values(), + [](DMirTestBuilder &Builder, MInstruction *Input) { + return Builder.createExpr( + OP_and, &Builder.Context.I64Type, Input, Input); + }, + [](DMirTestBuilder &, MInstruction *Input) { return Input; }); +} + +TEST(DMirValidation, FuzzesAndNotSelfRewrite) { + expectUnaryI64RewriteEquivalent( + getInterestingU64Values(), + [](DMirTestBuilder &Builder, MInstruction *Input) { + auto *NotInput = + Builder.createExpr(&Builder.Context.I64Type, Input); + return Builder.createExpr( + OP_and, &Builder.Context.I64Type, NotInput, Input); + }, + [](DMirTestBuilder &Builder, MInstruction *) { + return Builder.createConstI64(0); + }); +} + +TEST(DMirValidation, FuzzesOrZeroRewrite) { + expectUnaryI64RewriteEquivalent( + getInterestingU64Values(), + [](DMirTestBuilder &Builder, MInstruction *Input) { + return Builder.createExpr( + OP_or, &Builder.Context.I64Type, Input, Builder.createConstI64(0)); + }, + [](DMirTestBuilder &, MInstruction *Input) { return Input; }); +} + +TEST(DMirValidation, FuzzesOrAllOnesRewrite) { + expectUnaryI64RewriteEquivalent( + getInterestingU64Values(), + [](DMirTestBuilder &Builder, MInstruction *Input) { + return Builder.createExpr( + OP_or, &Builder.Context.I64Type, Input, + Builder.createConstI64(0xffffffffffffffffULL)); + }, + [](DMirTestBuilder &Builder, MInstruction *) { + return Builder.createConstI64(0xffffffffffffffffULL); + }); +} + +TEST(DMirValidation, FuzzesOrSelfRewrite) { + expectUnaryI64RewriteEquivalent( + getInterestingU64Values(), + [](DMirTestBuilder &Builder, MInstruction *Input) { + return Builder.createExpr( + OP_or, &Builder.Context.I64Type, Input, Input); + }, + [](DMirTestBuilder &, MInstruction *Input) { return Input; }); +} + +TEST(DMirValidation, FuzzesAndAbsorbOrRewrite) { + expectBinaryI64RewriteEquivalent( + getInterestingBinaryInputCases(), + [](DMirTestBuilder &Builder, MInstruction *Lhs, MInstruction *Rhs) { + auto *Or = Builder.createExpr( + OP_or, &Builder.Context.I64Type, Lhs, Rhs); + return Builder.createExpr( + OP_and, &Builder.Context.I64Type, Or, Lhs); + }, + [](DMirTestBuilder &, MInstruction *Lhs, MInstruction *) { return Lhs; }); +} + +TEST(DMirValidation, FuzzesAndFactorNotSelfRewrite) { + expectBinaryI64RewriteEquivalent( + getInterestingBinaryInputCases(), + [](DMirTestBuilder &Builder, MInstruction *Lhs, MInstruction *Rhs) { + auto *And = Builder.createExpr( + OP_and, &Builder.Context.I64Type, Lhs, Rhs); + auto *NotLhs = + Builder.createExpr(&Builder.Context.I64Type, Lhs); + return Builder.createExpr( + OP_and, &Builder.Context.I64Type, And, NotLhs); + }, + [](DMirTestBuilder &Builder, MInstruction *, MInstruction *) { + return Builder.createConstI64(0); + }); +} + +TEST(DMirValidation, FuzzesAndFactorOrRewrite) { + expectBinaryI64RewriteEquivalent( + getInterestingBinaryInputCases(), + [](DMirTestBuilder &Builder, MInstruction *Lhs, MInstruction *Rhs) { + auto *And = Builder.createExpr( + OP_and, &Builder.Context.I64Type, Lhs, Rhs); + auto *Or = Builder.createExpr( + OP_or, &Builder.Context.I64Type, Lhs, Rhs); + return Builder.createExpr( + OP_and, &Builder.Context.I64Type, And, Or); + }, + [](DMirTestBuilder &Builder, MInstruction *Lhs, MInstruction *Rhs) { + return Builder.createExpr( + OP_and, &Builder.Context.I64Type, Lhs, Rhs); + }); +} + +TEST(DMirValidation, FuzzesAndFactorLhsRewrite) { + expectBinaryI64RewriteEquivalent( + getInterestingBinaryInputCases(), + [](DMirTestBuilder &Builder, MInstruction *Lhs, MInstruction *Rhs) { + auto *And = Builder.createExpr( + OP_and, &Builder.Context.I64Type, Lhs, Rhs); + return Builder.createExpr( + OP_and, &Builder.Context.I64Type, And, Lhs); + }, + [](DMirTestBuilder &Builder, MInstruction *Lhs, MInstruction *Rhs) { + return Builder.createExpr( + OP_and, &Builder.Context.I64Type, Lhs, Rhs); + }); +} + +TEST(DMirValidation, FuzzesAndFactorRhsRewrite) { + expectBinaryI64RewriteEquivalent( + getInterestingBinaryInputCases(), + [](DMirTestBuilder &Builder, MInstruction *Lhs, MInstruction *Rhs) { + auto *And = Builder.createExpr( + OP_and, &Builder.Context.I64Type, Lhs, Rhs); + return Builder.createExpr( + OP_and, &Builder.Context.I64Type, And, Rhs); + }, + [](DMirTestBuilder &Builder, MInstruction *Lhs, MInstruction *Rhs) { + return Builder.createExpr( + OP_and, &Builder.Context.I64Type, Lhs, Rhs); + }); +} + +TEST(DMirValidation, FuzzesAndFactorNotRhsRewrite) { + expectBinaryI64RewriteEquivalent( + getInterestingBinaryInputCases(), + [](DMirTestBuilder &Builder, MInstruction *Lhs, MInstruction *Rhs) { + auto *And = Builder.createExpr( + OP_and, &Builder.Context.I64Type, Lhs, Rhs); + auto *NotRhs = + Builder.createExpr(&Builder.Context.I64Type, Rhs); + return Builder.createExpr( + OP_and, &Builder.Context.I64Type, And, NotRhs); + }, + [](DMirTestBuilder &Builder, MInstruction *, MInstruction *) { + return Builder.createConstI64(0); + }); +} + +TEST(DMirValidation, FuzzesAndAndXorZeroRewrite) { + expectBinaryI64RewriteEquivalent( + getInterestingBinaryInputCases(), + [](DMirTestBuilder &Builder, MInstruction *Lhs, MInstruction *Rhs) { + auto *And = Builder.createExpr( + OP_and, &Builder.Context.I64Type, Lhs, Rhs); + auto *Xor = Builder.createExpr( + OP_xor, &Builder.Context.I64Type, Lhs, Rhs); + return Builder.createExpr( + OP_and, &Builder.Context.I64Type, And, Xor); + }, + [](DMirTestBuilder &Builder, MInstruction *, MInstruction *) { + return Builder.createConstI64(0); + }); +} + +TEST(DMirValidation, FuzzesAndOrXorRewrite) { + expectBinaryI64RewriteEquivalent( + getInterestingBinaryInputCases(), + [](DMirTestBuilder &Builder, MInstruction *Lhs, MInstruction *Rhs) { + auto *Or = Builder.createExpr( + OP_or, &Builder.Context.I64Type, Lhs, Rhs); + auto *Xor = Builder.createExpr( + OP_xor, &Builder.Context.I64Type, Lhs, Rhs); + return Builder.createExpr( + OP_and, &Builder.Context.I64Type, Or, Xor); + }, + [](DMirTestBuilder &Builder, MInstruction *Lhs, MInstruction *Rhs) { + return Builder.createExpr( + OP_xor, &Builder.Context.I64Type, Lhs, Rhs); + }); +} + +TEST(DMirValidation, FuzzesAndOrRhsRewrite) { + expectBinaryI64RewriteEquivalent( + getInterestingBinaryInputCases(), + [](DMirTestBuilder &Builder, MInstruction *Lhs, MInstruction *Rhs) { + auto *Or = Builder.createExpr( + OP_or, &Builder.Context.I64Type, Lhs, Rhs); + return Builder.createExpr( + OP_and, &Builder.Context.I64Type, Or, Rhs); + }, + [](DMirTestBuilder &, MInstruction *, MInstruction *Rhs) { return Rhs; }); +} + +TEST(DMirValidation, FuzzesAndNotOrRewrite) { + expectBinaryI64RewriteEquivalent( + getInterestingBinaryInputCases(), + [](DMirTestBuilder &Builder, MInstruction *Lhs, MInstruction *Rhs) { + auto *NotLhs = + Builder.createExpr(&Builder.Context.I64Type, Lhs); + auto *Or = Builder.createExpr( + OP_or, &Builder.Context.I64Type, Lhs, Rhs); + return Builder.createExpr( + OP_and, &Builder.Context.I64Type, NotLhs, Or); + }, + [](DMirTestBuilder &Builder, MInstruction *Lhs, MInstruction *Rhs) { + auto *NotLhs = + Builder.createExpr(&Builder.Context.I64Type, Lhs); + return Builder.createExpr( + OP_and, &Builder.Context.I64Type, NotLhs, Rhs); + }); +} + +TEST(DMirValidation, FuzzesAndNotXorRewrite) { + expectBinaryI64RewriteEquivalent( + getInterestingBinaryInputCases(), + [](DMirTestBuilder &Builder, MInstruction *Lhs, MInstruction *Rhs) { + auto *NotLhs = + Builder.createExpr(&Builder.Context.I64Type, Lhs); + auto *Xor = Builder.createExpr( + OP_xor, &Builder.Context.I64Type, Lhs, Rhs); + return Builder.createExpr( + OP_and, &Builder.Context.I64Type, NotLhs, Xor); + }, + [](DMirTestBuilder &Builder, MInstruction *Lhs, MInstruction *Rhs) { + auto *NotLhs = + Builder.createExpr(&Builder.Context.I64Type, Lhs); + return Builder.createExpr( + OP_and, &Builder.Context.I64Type, NotLhs, Rhs); + }); +} + +TEST(DMirValidation, FuzzesOrAbsorbAndRewrite) { + expectBinaryI64RewriteEquivalent( + getInterestingBinaryInputCases(), + [](DMirTestBuilder &Builder, MInstruction *Lhs, MInstruction *Rhs) { + auto *And = Builder.createExpr( + OP_and, &Builder.Context.I64Type, Lhs, Rhs); + return Builder.createExpr( + OP_or, &Builder.Context.I64Type, And, Lhs); + }, + [](DMirTestBuilder &, MInstruction *Lhs, MInstruction *) { return Lhs; }); +} + +TEST(DMirValidation, FuzzesOrAndOrRewrite) { + expectBinaryI64RewriteEquivalent( + getInterestingBinaryInputCases(), + [](DMirTestBuilder &Builder, MInstruction *Lhs, MInstruction *Rhs) { + auto *And = Builder.createExpr( + OP_and, &Builder.Context.I64Type, Lhs, Rhs); + auto *Or = Builder.createExpr( + OP_or, &Builder.Context.I64Type, Lhs, Rhs); + return Builder.createExpr( + OP_or, &Builder.Context.I64Type, And, Or); + }, + [](DMirTestBuilder &Builder, MInstruction *Lhs, MInstruction *Rhs) { + return Builder.createExpr( + OP_or, &Builder.Context.I64Type, Lhs, Rhs); + }); +} + +TEST(DMirValidation, FuzzesOrAndRhsRewrite) { + expectBinaryI64RewriteEquivalent( + getInterestingBinaryInputCases(), + [](DMirTestBuilder &Builder, MInstruction *Lhs, MInstruction *Rhs) { + auto *And = Builder.createExpr( + OP_and, &Builder.Context.I64Type, Lhs, Rhs); + return Builder.createExpr( + OP_or, &Builder.Context.I64Type, And, Rhs); + }, + [](DMirTestBuilder &, MInstruction *, MInstruction *Rhs) { return Rhs; }); +} + +TEST(DMirValidation, FuzzesOrAndXorRewrite) { + expectBinaryI64RewriteEquivalent( + getInterestingBinaryInputCases(), + [](DMirTestBuilder &Builder, MInstruction *Lhs, MInstruction *Rhs) { + auto *And = Builder.createExpr( + OP_and, &Builder.Context.I64Type, Lhs, Rhs); + auto *Xor = Builder.createExpr( + OP_xor, &Builder.Context.I64Type, Lhs, Rhs); + return Builder.createExpr( + OP_or, &Builder.Context.I64Type, And, Xor); + }, + [](DMirTestBuilder &Builder, MInstruction *Lhs, MInstruction *Rhs) { + return Builder.createExpr( + OP_or, &Builder.Context.I64Type, Lhs, Rhs); + }); +} + +TEST(DMirValidation, FuzzesOrFactorLhsRewrite) { + expectBinaryI64RewriteEquivalent( + getInterestingBinaryInputCases(), + [](DMirTestBuilder &Builder, MInstruction *Lhs, MInstruction *Rhs) { + auto *Or = Builder.createExpr( + OP_or, &Builder.Context.I64Type, Lhs, Rhs); + return Builder.createExpr( + OP_or, &Builder.Context.I64Type, Or, Lhs); + }, + [](DMirTestBuilder &Builder, MInstruction *Lhs, MInstruction *Rhs) { + return Builder.createExpr( + OP_or, &Builder.Context.I64Type, Lhs, Rhs); + }); +} + +TEST(DMirValidation, FuzzesOrFactorRhsRewrite) { + expectBinaryI64RewriteEquivalent( + getInterestingBinaryInputCases(), + [](DMirTestBuilder &Builder, MInstruction *Lhs, MInstruction *Rhs) { + auto *Or = Builder.createExpr( + OP_or, &Builder.Context.I64Type, Lhs, Rhs); + return Builder.createExpr( + OP_or, &Builder.Context.I64Type, Or, Rhs); + }, + [](DMirTestBuilder &Builder, MInstruction *Lhs, MInstruction *Rhs) { + return Builder.createExpr( + OP_or, &Builder.Context.I64Type, Lhs, Rhs); + }); +} + +TEST(DMirValidation, FuzzesOrXorLhsRewrite) { + expectBinaryI64RewriteEquivalent( + getInterestingBinaryInputCases(), + [](DMirTestBuilder &Builder, MInstruction *Lhs, MInstruction *Rhs) { + auto *Xor = Builder.createExpr( + OP_xor, &Builder.Context.I64Type, Lhs, Rhs); + return Builder.createExpr( + OP_or, &Builder.Context.I64Type, Xor, Lhs); + }, + [](DMirTestBuilder &Builder, MInstruction *Lhs, MInstruction *Rhs) { + return Builder.createExpr( + OP_or, &Builder.Context.I64Type, Lhs, Rhs); + }); +} + +TEST(DMirValidation, FuzzesOrXorRhsRewrite) { + expectBinaryI64RewriteEquivalent( + getInterestingBinaryInputCases(), + [](DMirTestBuilder &Builder, MInstruction *Lhs, MInstruction *Rhs) { + auto *Xor = Builder.createExpr( + OP_xor, &Builder.Context.I64Type, Lhs, Rhs); + return Builder.createExpr( + OP_or, &Builder.Context.I64Type, Xor, Rhs); + }, + [](DMirTestBuilder &Builder, MInstruction *Lhs, MInstruction *Rhs) { + return Builder.createExpr( + OP_or, &Builder.Context.I64Type, Lhs, Rhs); + }); +} + +TEST(DMirValidation, FuzzesOrNotSelfRewrite) { + expectUnaryI64RewriteEquivalent( + getInterestingU64Values(), + [](DMirTestBuilder &Builder, MInstruction *Input) { + auto *NotInput = + Builder.createExpr(&Builder.Context.I64Type, Input); + return Builder.createExpr( + OP_or, &Builder.Context.I64Type, NotInput, Input); + }, + [](DMirTestBuilder &Builder, MInstruction *) { + return Builder.createConstI64(0xffffffffffffffffULL); + }); +} + +TEST(DMirValidation, FuzzesOrAndNotLhsRewrite) { + expectBinaryI64RewriteEquivalent( + getInterestingBinaryInputCases(), + [](DMirTestBuilder &Builder, MInstruction *Lhs, MInstruction *Rhs) { + auto *And = Builder.createExpr( + OP_and, &Builder.Context.I64Type, Lhs, Rhs); + auto *NotLhs = + Builder.createExpr(&Builder.Context.I64Type, Lhs); + return Builder.createExpr( + OP_or, &Builder.Context.I64Type, And, NotLhs); + }, + [](DMirTestBuilder &Builder, MInstruction *Lhs, MInstruction *Rhs) { + auto *NotLhs = + Builder.createExpr(&Builder.Context.I64Type, Lhs); + return Builder.createExpr( + OP_or, &Builder.Context.I64Type, NotLhs, Rhs); + }); +} + +TEST(DMirValidation, FuzzesOrAndNotRhsRewrite) { + expectBinaryI64RewriteEquivalent( + getInterestingBinaryInputCases(), + [](DMirTestBuilder &Builder, MInstruction *Lhs, MInstruction *Rhs) { + auto *And = Builder.createExpr( + OP_and, &Builder.Context.I64Type, Lhs, Rhs); + auto *NotRhs = + Builder.createExpr(&Builder.Context.I64Type, Rhs); + return Builder.createExpr( + OP_or, &Builder.Context.I64Type, And, NotRhs); + }, + [](DMirTestBuilder &Builder, MInstruction *Lhs, MInstruction *Rhs) { + auto *NotRhs = + Builder.createExpr(&Builder.Context.I64Type, Rhs); + return Builder.createExpr( + OP_or, &Builder.Context.I64Type, NotRhs, Lhs); + }); +} + +TEST(DMirValidation, FuzzesOrOrXorRewrite) { + expectBinaryI64RewriteEquivalent( + getInterestingBinaryInputCases(), + [](DMirTestBuilder &Builder, MInstruction *Lhs, MInstruction *Rhs) { + auto *Or = Builder.createExpr( + OP_or, &Builder.Context.I64Type, Lhs, Rhs); + auto *Xor = Builder.createExpr( + OP_xor, &Builder.Context.I64Type, Lhs, Rhs); + return Builder.createExpr( + OP_or, &Builder.Context.I64Type, Or, Xor); + }, + [](DMirTestBuilder &Builder, MInstruction *Lhs, MInstruction *Rhs) { + return Builder.createExpr( + OP_or, &Builder.Context.I64Type, Lhs, Rhs); + }); +} + +TEST(DMirValidation, FuzzesOrNotOrRewrite) { + expectBinaryI64RewriteEquivalent( + getInterestingBinaryInputCases(), + [](DMirTestBuilder &Builder, MInstruction *Lhs, MInstruction *Rhs) { + auto *NotLhs = + Builder.createExpr(&Builder.Context.I64Type, Lhs); + auto *Or = Builder.createExpr( + OP_or, &Builder.Context.I64Type, Lhs, Rhs); + return Builder.createExpr( + OP_or, &Builder.Context.I64Type, NotLhs, Or); + }, + [](DMirTestBuilder &Builder, MInstruction *, MInstruction *) { + return Builder.createConstI64(0xffffffffffffffffULL); + }); +} + +TEST(DMirValidation, FuzzesDoubleNotRewrite) { + expectUnaryI64RewriteEquivalent( + getInterestingU64Values(), + [](DMirTestBuilder &Builder, MInstruction *Input) { + auto *Inner = + Builder.createExpr(&Builder.Context.I64Type, Input); + return Builder.createExpr(&Builder.Context.I64Type, + Inner); + }, + [](DMirTestBuilder &, MInstruction *Input) { return Input; }); +} + +TEST(DMirValidation, ExecutesDassignCmpSelectAndConversions) { + DMirTestBuilder Builder; + Variable *Var = Builder.createVariable(&Builder.Context.I64Type); + auto *Assigned = Builder.createConstI64(0xfffffffffffffff0ULL); + Builder.createStmt(&Builder.Context.VoidType, Assigned, + Var->getVarIdx()); + + auto *Read = Builder.createExpr(&Builder.Context.I64Type, + Var->getVarIdx()); + auto *Cmp = Builder.createExpr(CmpInstruction::ICMP_SLT, + &Builder.Context.I64Type, Read, + Builder.createConstI64(0)); + auto *Truncated = Builder.createExpr( + OP_trunc, &Builder.Context.I32Type, Read); + auto *Extended = Builder.createExpr( + OP_sext, &Builder.Context.I64Type, Truncated); + auto *Pointer = Builder.createExpr( + OP_inttoptr, Builder.I64PtrType, Extended); + auto *RoundTrip = Builder.createExpr( + OP_ptrtoint, &Builder.Context.I64Type, Pointer); + auto *Selected = Builder.createExpr( + &Builder.Context.I64Type, Cmp, RoundTrip, Builder.createConstI64(0)); + Builder.createStmt(&Builder.Context.I64Type, Selected); + + DMirFragmentInterpreter Interpreter; + const auto Result = Interpreter.execute(Builder.getBlock()); + ASSERT_TRUE(Result.has_value()); + EXPECT_EQ(Result->getSExtValue(), -16); +} + +TEST(DMirValidation, FuzzesSelectSameArmRewrite) { + expectBinaryI64RewriteEquivalent( + getInterestingBinaryInputCases(), + [](DMirTestBuilder &Builder, MInstruction *Cond, MInstruction *Value) { + return Builder.createExpr(&Builder.Context.I64Type, + Cond, Value, Value); + }, + [](DMirTestBuilder &, MInstruction *, MInstruction *Value) { + return Value; + }); +} + +// Verify select-same-arm for i8 and i32 value types. The rule is structural +// (both arms are the same SSA value), so it must hold for any integer width. +template +void fuzzSelectSameArmNarrow(ValTypeSelector &&GetValType, + unsigned ExpectedWidth) { + for (const auto &InputCase : getInterestingBinaryInputCases()) { + DMirTestBuilder Builder; + MType *ValType = GetValType(Builder); + Variable *CondVar = Builder.createVariable(&Builder.Context.I64Type); + Variable *ValVar = Builder.createVariable(ValType); + auto *Cond = Builder.createExpr(&Builder.Context.I64Type, + CondVar->getVarIdx()); + auto *Val = + Builder.createExpr(ValType, ValVar->getVarIdx()); + auto *Original = + Builder.createExpr(ValType, Cond, Val, Val); + + DMirFragmentInterpreter Interpreter; + Interpreter.setVariableValue(CondVar->getVarIdx(), + APInt(64, InputCase.Lhs)); + Interpreter.setVariableValue(ValVar->getVarIdx(), + APInt(ExpectedWidth, InputCase.Rhs)); + APInt OrigResult = Interpreter.evaluate(Original); + APInt ValResult = Interpreter.evaluate(Val); + ASSERT_EQ(OrigResult.getBitWidth(), ExpectedWidth); + EXPECT_TRUE(OrigResult == ValResult) + << "cond=" << InputCase.Lhs << " val=" << InputCase.Rhs + << " original=" << OrigResult.getZExtValue() + << " rewritten=" << ValResult.getZExtValue(); + } +} + +TEST(DMirValidation, FuzzesSelectSameArmRewriteI8) { + fuzzSelectSameArmNarrow( + [](DMirTestBuilder &B) -> MType * { return &B.Context.I8Type; }, 8U); +} + +TEST(DMirValidation, FuzzesSelectSameArmRewriteI32) { + fuzzSelectSameArmNarrow( + [](DMirTestBuilder &B) -> MType * { return &B.Context.I32Type; }, 32U); +} + +TEST(DMirRewritePass, RewritesReturnedAddZeroToInput) { + // add(non_const, 0) is intentionally NOT folded: keeping the add node + // preserves a register-copy point that benefits downstream register + // allocation for i64 operands. Only add(const, 0) folds to const. + DMirTestBuilder Builder; + Variable *InputVar = Builder.createVariable(&Builder.Context.I64Type); + auto *Input = Builder.createExpr(&Builder.Context.I64Type, + InputVar->getVarIdx()); + auto *Add = Builder.createExpr( + OP_add, &Builder.Context.I64Type, Input, Builder.createConstI64(0)); + + MInstruction *Rewritten = rewriteReturnedValue(Builder, Add); + EXPECT_EQ(Rewritten, Add); +} + +TEST(DMirRewritePass, RewritesNestedTreeBottomUp) { + // Bottom-up rewrites fire: not(not(x)) -> x, and(x, ~0) -> x. + // The final add(x, 0) is intentionally NOT folded for non-constant x + // (preserves register-copy point for register allocation). The result + // is the Add node itself, with its LHS simplified to Input. + DMirTestBuilder Builder; + Variable *InputVar = Builder.createVariable(&Builder.Context.I64Type); + auto *Input = Builder.createExpr(&Builder.Context.I64Type, + InputVar->getVarIdx()); + auto *DoubleNot = + Builder.createExpr(&Builder.Context.I64Type, Input); + DoubleNot = + Builder.createExpr(&Builder.Context.I64Type, DoubleNot); + auto *Masked = Builder.createExpr( + OP_and, &Builder.Context.I64Type, DoubleNot, + Builder.createConstI64(0xffffffffffffffffULL)); + auto *Add = Builder.createExpr( + OP_add, &Builder.Context.I64Type, Masked, Builder.createConstI64(0)); + + MInstruction *Rewritten = rewriteReturnedValue(Builder, Add); + EXPECT_EQ(Rewritten, Add); + EXPECT_EQ(llvm::cast(Add)->getOperand<0>(), Input); +} + +TEST(DMirRewritePass, RewritesSelectSameArmByStructure) { + DMirTestBuilder Builder; + Variable *CondVar = Builder.createVariable(&Builder.Context.I64Type); + Variable *ValueVar = Builder.createVariable(&Builder.Context.I64Type); + auto *Cond = Builder.createExpr(&Builder.Context.I64Type, + CondVar->getVarIdx()); + auto *TrueValue = Builder.createExpr( + &Builder.Context.I64Type, ValueVar->getVarIdx()); + auto *FalseValue = Builder.createExpr( + &Builder.Context.I64Type, ValueVar->getVarIdx()); + auto *Select = Builder.createExpr( + &Builder.Context.I64Type, Cond, TrueValue, FalseValue); + + MInstruction *Rewritten = rewriteReturnedValue(Builder, Select); + EXPECT_EQ(Rewritten, TrueValue); +} + +TEST(DMirRewritePass, MaterializesTypedAllOnesForOrNotSelf) { + DMirTestBuilder Builder; + Variable *InputVar = Builder.createVariable(&Builder.Context.I32Type); + auto *Input = Builder.createExpr(&Builder.Context.I32Type, + InputVar->getVarIdx()); + auto *NotInput = + Builder.createExpr(&Builder.Context.I32Type, Input); + auto *Or = Builder.createExpr( + OP_or, &Builder.Context.I32Type, NotInput, Input); + + MInstruction *Rewritten = rewriteReturnedValue(Builder, Or); + ASSERT_EQ(Rewritten->getOpcode(), OP_const); + const auto &Constant = + llvm::cast(Rewritten)->getConstant(); + EXPECT_EQ(llvm::cast(&Constant)->getValue().getBitWidth(), 32U); + EXPECT_TRUE(llvm::cast(&Constant)->getValue() == + llvm::APInt(32, ~0U)); +} + +TEST(DMirRewritePass, RewritesAdcZeroCarryToAdd) { + // adc(lhs, rhs, const(0)) → add(lhs, rhs) when carry is dead + DMirTestBuilder Builder; + Variable *LhsVar = Builder.createVariable(&Builder.Context.I64Type); + Variable *RhsVar = Builder.createVariable(&Builder.Context.I64Type); + auto *Lhs = Builder.createExpr(&Builder.Context.I64Type, + LhsVar->getVarIdx()); + auto *Rhs = Builder.createExpr(&Builder.Context.I64Type, + RhsVar->getVarIdx()); + auto *Adc = Builder.createExpr( + &Builder.Context.I64Type, Lhs, Rhs, Builder.createConstI64(0)); + auto *Return = + Builder.createStmt(&Builder.Context.I64Type, Adc); + + EXPECT_TRUE(runDMirRewritePass(Builder)); + auto *Result = Return->getOperand<0>(); + EXPECT_NE(Result, Adc); + EXPECT_EQ(Result->getOpcode(), OP_add); +} + +TEST(DMirRewritePass, RewritesAdcZeroOperandsToInput) { + // adc(input, 0, const(0)) → input when carry is dead and RHS is zero + DMirTestBuilder Builder; + Variable *InputVar = Builder.createVariable(&Builder.Context.I64Type); + auto *Input = Builder.createExpr(&Builder.Context.I64Type, + InputVar->getVarIdx()); + auto *Adc = Builder.createExpr( + &Builder.Context.I64Type, Input, Builder.createConstI64(0), + Builder.createConstI64(0)); + auto *Return = + Builder.createStmt(&Builder.Context.I64Type, Adc); + + EXPECT_TRUE(runDMirRewritePass(Builder)); + EXPECT_EQ(Return->getOperand<0>(), Input); +} + +TEST(DMirRewritePass, RewritesSbbZeroOperandsToInput) { + // sbb(input, 0, const(0)) → input when borrow is dead and RHS is zero + DMirTestBuilder Builder; + Variable *InputVar = Builder.createVariable(&Builder.Context.I64Type); + auto *Input = Builder.createExpr(&Builder.Context.I64Type, + InputVar->getVarIdx()); + auto *Sbb = Builder.createExpr( + &Builder.Context.I64Type, Input, Builder.createConstI64(0), + Builder.createConstI64(0)); + auto *Return = + Builder.createStmt(&Builder.Context.I64Type, Sbb); + + EXPECT_TRUE(runDMirRewritePass(Builder)); + EXPECT_EQ(Return->getOperand<0>(), Input); +} + +TEST(DMirRewritePass, RewritesSbbSelfZeroBorrowToZero) { + // sbb(input, input, const(0)) → 0 when borrow is dead and LHS==RHS + DMirTestBuilder Builder; + Variable *InputVar = Builder.createVariable(&Builder.Context.I64Type); + auto *Input = Builder.createExpr(&Builder.Context.I64Type, + InputVar->getVarIdx()); + auto *Sbb = Builder.createExpr( + &Builder.Context.I64Type, Input, + Builder.createExpr(&Builder.Context.I64Type, + InputVar->getVarIdx()), + Builder.createConstI64(0)); + auto *Return = + Builder.createStmt(&Builder.Context.I64Type, Sbb); + + EXPECT_TRUE(runDMirRewritePass(Builder)); + auto *Result = Return->getOperand<0>(); + EXPECT_NE(Result, Sbb); + EXPECT_TRUE(llvm::isa(Result)); +} + +TEST(DMirRewritePass, RewritesAndAbsorbOrToExistingOperand) { + DMirTestBuilder Builder; + Variable *LhsVar = Builder.createVariable(&Builder.Context.I64Type); + Variable *RhsVar = Builder.createVariable(&Builder.Context.I64Type); + auto *Lhs = Builder.createExpr(&Builder.Context.I64Type, + LhsVar->getVarIdx()); + auto *Rhs = Builder.createExpr(&Builder.Context.I64Type, + RhsVar->getVarIdx()); + auto *Or = Builder.createExpr( + OP_or, &Builder.Context.I64Type, Lhs, Rhs); + auto *And = Builder.createExpr( + OP_and, &Builder.Context.I64Type, Or, Lhs); + + MInstruction *Rewritten = rewriteReturnedValue(Builder, And); + EXPECT_EQ(Rewritten, Lhs); +} + +TEST(DMirRewritePass, RewritesAndOrXorToExistingXorSubtree) { + DMirTestBuilder Builder; + Variable *LhsVar = Builder.createVariable(&Builder.Context.I64Type); + Variable *RhsVar = Builder.createVariable(&Builder.Context.I64Type); + auto *Lhs = Builder.createExpr(&Builder.Context.I64Type, + LhsVar->getVarIdx()); + auto *Rhs = Builder.createExpr(&Builder.Context.I64Type, + RhsVar->getVarIdx()); + auto *Or = Builder.createExpr( + OP_or, &Builder.Context.I64Type, Lhs, Rhs); + auto *Xor = Builder.createExpr( + OP_xor, &Builder.Context.I64Type, + Builder.createExpr(&Builder.Context.I64Type, + LhsVar->getVarIdx()), + Builder.createExpr(&Builder.Context.I64Type, + RhsVar->getVarIdx())); + auto *And = Builder.createExpr( + OP_and, &Builder.Context.I64Type, Or, Xor); + + MInstruction *Rewritten = rewriteReturnedValue(Builder, And); + EXPECT_EQ(Rewritten, Xor); +} + +TEST(DMirRewritePass, RewritesOrNotOrToAllOnes) { + DMirTestBuilder Builder; + Variable *LhsVar = Builder.createVariable(&Builder.Context.I64Type); + Variable *RhsVar = Builder.createVariable(&Builder.Context.I64Type); + auto *Lhs = Builder.createExpr(&Builder.Context.I64Type, + LhsVar->getVarIdx()); + auto *Rhs = Builder.createExpr(&Builder.Context.I64Type, + RhsVar->getVarIdx()); + auto *NotLhs = + Builder.createExpr(&Builder.Context.I64Type, Lhs); + auto *Or = Builder.createExpr( + OP_or, &Builder.Context.I64Type, + Builder.createExpr(&Builder.Context.I64Type, + LhsVar->getVarIdx()), + Rhs); + auto *Root = Builder.createExpr( + OP_or, &Builder.Context.I64Type, NotLhs, Or); + + MInstruction *Rewritten = rewriteReturnedValue(Builder, Root); + ASSERT_EQ(Rewritten->getOpcode(), OP_const); + const auto Value = + llvm::cast( + &llvm::cast(Rewritten)->getConstant()) + ->getValue(); + EXPECT_TRUE(Value.isAllOnes()); +} + +TEST(DMirRewritePass, RewritesXorCancelToSiblingOperand) { + DMirTestBuilder Builder; + Variable *LhsVar = Builder.createVariable(&Builder.Context.I64Type); + Variable *RhsVar = Builder.createVariable(&Builder.Context.I64Type); + auto *Lhs = Builder.createExpr(&Builder.Context.I64Type, + LhsVar->getVarIdx()); + auto *Rhs = Builder.createExpr(&Builder.Context.I64Type, + RhsVar->getVarIdx()); + auto *NestedXor = Builder.createExpr( + OP_xor, &Builder.Context.I64Type, Lhs, Rhs); + auto *Root = Builder.createExpr( + OP_xor, &Builder.Context.I64Type, NestedXor, + Builder.createExpr(&Builder.Context.I64Type, + LhsVar->getVarIdx())); + + MInstruction *Rewritten = rewriteReturnedValue(Builder, Root); + EXPECT_EQ(Rewritten, Rhs); +} + +TEST(DMirRewritePass, RewritesXorNotAllOnesToOperand) { + DMirTestBuilder Builder; + Variable *InputVar = Builder.createVariable(&Builder.Context.I64Type); + auto *Input = Builder.createExpr(&Builder.Context.I64Type, + InputVar->getVarIdx()); + auto *NotInput = + Builder.createExpr(&Builder.Context.I64Type, Input); + auto *Root = Builder.createExpr( + OP_xor, &Builder.Context.I64Type, NotInput, + Builder.createConstI64(0xffffffffffffffffULL)); + + MInstruction *Rewritten = rewriteReturnedValue(Builder, Root); + EXPECT_EQ(Rewritten, Input); +} + +TEST(DMirRewritePass, RewritesAndNotOrToNewAndNode) { + DMirTestBuilder Builder; + Variable *LhsVar = Builder.createVariable(&Builder.Context.I64Type); + Variable *RhsVar = Builder.createVariable(&Builder.Context.I64Type); + auto *Lhs = Builder.createExpr(&Builder.Context.I64Type, + LhsVar->getVarIdx()); + auto *Rhs = Builder.createExpr(&Builder.Context.I64Type, + RhsVar->getVarIdx()); + auto *NotLhs = + Builder.createExpr(&Builder.Context.I64Type, Lhs); + auto *Or = Builder.createExpr( + OP_or, &Builder.Context.I64Type, + Builder.createExpr(&Builder.Context.I64Type, + LhsVar->getVarIdx()), + Rhs); + auto *Root = Builder.createExpr( + OP_and, &Builder.Context.I64Type, NotLhs, Or); + + MInstruction *Rewritten = rewriteReturnedValue(Builder, Root); + expectBinaryOperandsMatch(Rewritten, OP_and, NotLhs, Rhs); +} + +TEST(DMirRewritePass, RewritesOrXorLhsToNewOrNode) { + DMirTestBuilder Builder; + Variable *LhsVar = Builder.createVariable(&Builder.Context.I64Type); + Variable *RhsVar = Builder.createVariable(&Builder.Context.I64Type); + auto *Lhs = Builder.createExpr(&Builder.Context.I64Type, + LhsVar->getVarIdx()); + auto *Rhs = Builder.createExpr(&Builder.Context.I64Type, + RhsVar->getVarIdx()); + auto *NestedXor = Builder.createExpr( + OP_xor, &Builder.Context.I64Type, Lhs, Rhs); + auto *Root = Builder.createExpr( + OP_or, &Builder.Context.I64Type, NestedXor, + Builder.createExpr(&Builder.Context.I64Type, + LhsVar->getVarIdx())); + + MInstruction *Rewritten = rewriteReturnedValue(Builder, Root); + expectBinaryOperandsMatch(Rewritten, OP_or, Lhs, Rhs); +} + +TEST(DMirRewritePass, RewritesOrAndNotToNewOrNode) { + DMirTestBuilder Builder; + Variable *LhsVar = Builder.createVariable(&Builder.Context.I64Type); + Variable *RhsVar = Builder.createVariable(&Builder.Context.I64Type); + auto *Lhs = Builder.createExpr(&Builder.Context.I64Type, + LhsVar->getVarIdx()); + auto *Rhs = Builder.createExpr(&Builder.Context.I64Type, + RhsVar->getVarIdx()); + auto *And = Builder.createExpr( + OP_and, &Builder.Context.I64Type, Lhs, Rhs); + auto *NotLhs = Builder.createExpr( + &Builder.Context.I64Type, + Builder.createExpr(&Builder.Context.I64Type, + LhsVar->getVarIdx())); + auto *Root = Builder.createExpr( + OP_or, &Builder.Context.I64Type, And, NotLhs); + + MInstruction *Rewritten = rewriteReturnedValue(Builder, Root); + expectBinaryOperandsMatch(Rewritten, OP_or, NotLhs, Rhs); +} + +TEST(DMirRewritePass, RewritesXorNotNotToNewXorNode) { + DMirTestBuilder Builder; + Variable *LhsVar = Builder.createVariable(&Builder.Context.I64Type); + Variable *RhsVar = Builder.createVariable(&Builder.Context.I64Type); + auto *Lhs = Builder.createExpr(&Builder.Context.I64Type, + LhsVar->getVarIdx()); + auto *Rhs = Builder.createExpr(&Builder.Context.I64Type, + RhsVar->getVarIdx()); + auto *Root = Builder.createExpr( + OP_xor, &Builder.Context.I64Type, + Builder.createExpr(&Builder.Context.I64Type, Lhs), + Builder.createExpr(&Builder.Context.I64Type, Rhs)); + + MInstruction *Rewritten = rewriteReturnedValue(Builder, Root); + expectBinaryOperandsMatch(Rewritten, OP_xor, Lhs, Rhs); +} + +TEST(DMirRewritePass, RewritesXorAndXorToNewOrNode) { + DMirTestBuilder Builder; + Variable *LhsVar = Builder.createVariable(&Builder.Context.I64Type); + Variable *RhsVar = Builder.createVariable(&Builder.Context.I64Type); + auto *Lhs = Builder.createExpr(&Builder.Context.I64Type, + LhsVar->getVarIdx()); + auto *Rhs = Builder.createExpr(&Builder.Context.I64Type, + RhsVar->getVarIdx()); + auto *And = Builder.createExpr( + OP_and, &Builder.Context.I64Type, Lhs, Rhs); + auto *Xor = Builder.createExpr( + OP_xor, &Builder.Context.I64Type, + Builder.createExpr(&Builder.Context.I64Type, + LhsVar->getVarIdx()), + Builder.createExpr(&Builder.Context.I64Type, + RhsVar->getVarIdx())); + auto *Root = Builder.createExpr( + OP_xor, &Builder.Context.I64Type, And, Xor); + + MInstruction *Rewritten = rewriteReturnedValue(Builder, Root); + expectBinaryOperandsMatch(Rewritten, OP_or, Lhs, Rhs); +} + +TEST(DMirRewritePass, RewritesXorOrXorToNewAndNode) { + DMirTestBuilder Builder; + Variable *LhsVar = Builder.createVariable(&Builder.Context.I64Type); + Variable *RhsVar = Builder.createVariable(&Builder.Context.I64Type); + auto *Lhs = Builder.createExpr(&Builder.Context.I64Type, + LhsVar->getVarIdx()); + auto *Rhs = Builder.createExpr(&Builder.Context.I64Type, + RhsVar->getVarIdx()); + auto *Or = Builder.createExpr( + OP_or, &Builder.Context.I64Type, Lhs, Rhs); + auto *Xor = Builder.createExpr( + OP_xor, &Builder.Context.I64Type, + Builder.createExpr(&Builder.Context.I64Type, + LhsVar->getVarIdx()), + Builder.createExpr(&Builder.Context.I64Type, + RhsVar->getVarIdx())); + auto *Root = Builder.createExpr( + OP_xor, &Builder.Context.I64Type, Or, Xor); + + MInstruction *Rewritten = rewriteReturnedValue(Builder, Root); + expectBinaryOperandsMatch(Rewritten, OP_and, Lhs, Rhs); +} + +TEST(DMirValidation, FuzzesAdcWithoutCarryRewrite) { + expectBinaryI64RewriteEquivalent( + getInterestingBinaryInputCases(), + [](DMirTestBuilder &Builder, MInstruction *Lhs, MInstruction *Rhs) { + return Builder.createExpr( + &Builder.Context.I64Type, Lhs, Rhs, Builder.createConstI64(0)); + }, + [](DMirTestBuilder &Builder, MInstruction *Lhs, MInstruction *Rhs) { + return Builder.createExpr( + OP_add, &Builder.Context.I64Type, Lhs, Rhs); + }); +} + +TEST(DMirValidation, FuzzesAdcZeroOperandsRewrite) { + expectUnaryI64RewriteEquivalent( + getInterestingU64Values(), + [](DMirTestBuilder &Builder, MInstruction *Input) { + return Builder.createExpr( + &Builder.Context.I64Type, Input, Builder.createConstI64(0), + Builder.createConstI64(0)); + }, + [](DMirTestBuilder &, MInstruction *Input) { return Input; }); +} + +TEST(DMirValidation, FuzzesSbbWithoutBorrowRewrite) { + expectBinaryI64RewriteEquivalent( + getInterestingBinaryInputCases(), + [](DMirTestBuilder &Builder, MInstruction *Lhs, MInstruction *Rhs) { + return Builder.createExpr( + &Builder.Context.I64Type, Lhs, Rhs, Builder.createConstI64(0)); + }, + [](DMirTestBuilder &Builder, MInstruction *Lhs, MInstruction *Rhs) { + return Builder.createExpr( + OP_sub, &Builder.Context.I64Type, Lhs, Rhs); + }); +} + +TEST(DMirValidation, FuzzesSbbZeroOperandsRewrite) { + expectUnaryI64RewriteEquivalent( + getInterestingU64Values(), + [](DMirTestBuilder &Builder, MInstruction *Input) { + return Builder.createExpr( + &Builder.Context.I64Type, Input, Builder.createConstI64(0), + Builder.createConstI64(0)); + }, + [](DMirTestBuilder &, MInstruction *Input) { return Input; }); +} + +TEST(DMirValidation, FuzzesXorZeroRewrite) { + expectUnaryI64RewriteEquivalent( + getInterestingU64Values(), + [](DMirTestBuilder &Builder, MInstruction *Input) { + return Builder.createExpr( + OP_xor, &Builder.Context.I64Type, Input, Builder.createConstI64(0)); + }, + [](DMirTestBuilder &, MInstruction *Input) { return Input; }); +} + +TEST(DMirValidation, FuzzesXorSelfRewrite) { + expectUnaryI64RewriteEquivalent( + getInterestingU64Values(), + [](DMirTestBuilder &Builder, MInstruction *Input) { + return Builder.createExpr( + OP_xor, &Builder.Context.I64Type, Input, Input); + }, + [](DMirTestBuilder &Builder, MInstruction *) { + return Builder.createConstI64(0); + }); +} + +TEST(DMirValidation, FuzzesXorCancelRewrite) { + expectBinaryI64RewriteEquivalent( + getInterestingBinaryInputCases(), + [](DMirTestBuilder &Builder, MInstruction *Lhs, MInstruction *Rhs) { + auto *Inner = Builder.createExpr( + OP_xor, &Builder.Context.I64Type, Lhs, Rhs); + return Builder.createExpr( + OP_xor, &Builder.Context.I64Type, Inner, Lhs); + }, + [](DMirTestBuilder &, MInstruction *, MInstruction *Rhs) { return Rhs; }); +} + +TEST(DMirValidation, FuzzesXorCancelRhsRewrite) { + expectBinaryI64RewriteEquivalent( + getInterestingBinaryInputCases(), + [](DMirTestBuilder &Builder, MInstruction *Lhs, MInstruction *Rhs) { + auto *Inner = Builder.createExpr( + OP_xor, &Builder.Context.I64Type, Lhs, Rhs); + return Builder.createExpr( + OP_xor, &Builder.Context.I64Type, Inner, Rhs); + }, + [](DMirTestBuilder &, MInstruction *Lhs, MInstruction *) { return Lhs; }); +} + +TEST(DMirValidation, FuzzesXorNotCancelRewrite) { + expectBinaryI64RewriteEquivalent( + getInterestingBinaryInputCases(), + [](DMirTestBuilder &Builder, MInstruction *Lhs, MInstruction *Rhs) { + auto *NotLhs = + Builder.createExpr(&Builder.Context.I64Type, Lhs); + auto *Xor = Builder.createExpr( + OP_xor, &Builder.Context.I64Type, Lhs, Rhs); + return Builder.createExpr( + OP_xor, &Builder.Context.I64Type, NotLhs, Xor); + }, + [](DMirTestBuilder &Builder, MInstruction *, MInstruction *Rhs) { + return Builder.createExpr(&Builder.Context.I64Type, + Rhs); + }); +} + +TEST(DMirValidation, FuzzesXorNotSelfRewrite) { + expectUnaryI64RewriteEquivalent( + getInterestingU64Values(), + [](DMirTestBuilder &Builder, MInstruction *Input) { + auto *NotInput = + Builder.createExpr(&Builder.Context.I64Type, Input); + return Builder.createExpr( + OP_xor, &Builder.Context.I64Type, NotInput, Input); + }, + [](DMirTestBuilder &Builder, MInstruction *) { + return Builder.createConstI64(0xffffffffffffffffULL); + }); +} + +TEST(DMirValidation, FuzzesXorNotNotRewrite) { + expectBinaryI64RewriteEquivalent( + getInterestingBinaryInputCases(), + [](DMirTestBuilder &Builder, MInstruction *Lhs, MInstruction *Rhs) { + auto *NotLhs = + Builder.createExpr(&Builder.Context.I64Type, Lhs); + auto *NotRhs = + Builder.createExpr(&Builder.Context.I64Type, Rhs); + return Builder.createExpr( + OP_xor, &Builder.Context.I64Type, NotLhs, NotRhs); + }, + [](DMirTestBuilder &Builder, MInstruction *Lhs, MInstruction *Rhs) { + return Builder.createExpr( + OP_xor, &Builder.Context.I64Type, Lhs, Rhs); + }); +} + +TEST(DMirValidation, FuzzesXorNotOrRewrite) { + expectBinaryI64RewriteEquivalent( + getInterestingBinaryInputCases(), + [](DMirTestBuilder &Builder, MInstruction *Lhs, MInstruction *Rhs) { + auto *NotLhs = + Builder.createExpr(&Builder.Context.I64Type, Lhs); + auto *Or = Builder.createExpr( + OP_or, &Builder.Context.I64Type, Lhs, Rhs); + return Builder.createExpr( + OP_xor, &Builder.Context.I64Type, NotLhs, Or); + }, + [](DMirTestBuilder &Builder, MInstruction *Lhs, MInstruction *Rhs) { + auto *NotRhs = + Builder.createExpr(&Builder.Context.I64Type, Rhs); + return Builder.createExpr( + OP_or, &Builder.Context.I64Type, NotRhs, Lhs); + }); +} + +TEST(DMirValidation, FuzzesXorNotAllOnesRewrite) { + expectUnaryI64RewriteEquivalent( + getInterestingU64Values(), + [](DMirTestBuilder &Builder, MInstruction *Input) { + auto *NotInput = + Builder.createExpr(&Builder.Context.I64Type, Input); + return Builder.createExpr( + OP_xor, &Builder.Context.I64Type, NotInput, + Builder.createConstI64(0xffffffffffffffffULL)); + }, + [](DMirTestBuilder &, MInstruction *Input) { return Input; }); +} + +TEST(DMirValidation, FuzzesXorAndOrRewrite) { + expectBinaryI64RewriteEquivalent( + getInterestingBinaryInputCases(), + [](DMirTestBuilder &Builder, MInstruction *Lhs, MInstruction *Rhs) { + auto *And = Builder.createExpr( + OP_and, &Builder.Context.I64Type, Lhs, Rhs); + auto *Or = Builder.createExpr( + OP_or, &Builder.Context.I64Type, Lhs, Rhs); + return Builder.createExpr( + OP_xor, &Builder.Context.I64Type, And, Or); + }, + [](DMirTestBuilder &Builder, MInstruction *Lhs, MInstruction *Rhs) { + return Builder.createExpr( + OP_xor, &Builder.Context.I64Type, Lhs, Rhs); + }); +} + +TEST(DMirValidation, FuzzesXorAndNotLhsRewrite) { + expectBinaryI64RewriteEquivalent( + getInterestingBinaryInputCases(), + [](DMirTestBuilder &Builder, MInstruction *Lhs, MInstruction *Rhs) { + auto *And = Builder.createExpr( + OP_and, &Builder.Context.I64Type, Lhs, Rhs); + auto *NotLhs = + Builder.createExpr(&Builder.Context.I64Type, Lhs); + return Builder.createExpr( + OP_xor, &Builder.Context.I64Type, And, NotLhs); + }, + [](DMirTestBuilder &Builder, MInstruction *Lhs, MInstruction *Rhs) { + auto *NotLhs = + Builder.createExpr(&Builder.Context.I64Type, Lhs); + return Builder.createExpr( + OP_or, &Builder.Context.I64Type, NotLhs, Rhs); + }); +} + +TEST(DMirValidation, FuzzesXorAndNotRhsRewrite) { + expectBinaryI64RewriteEquivalent( + getInterestingBinaryInputCases(), + [](DMirTestBuilder &Builder, MInstruction *Lhs, MInstruction *Rhs) { + auto *And = Builder.createExpr( + OP_and, &Builder.Context.I64Type, Lhs, Rhs); + auto *NotRhs = + Builder.createExpr(&Builder.Context.I64Type, Rhs); + return Builder.createExpr( + OP_xor, &Builder.Context.I64Type, And, NotRhs); + }, + [](DMirTestBuilder &Builder, MInstruction *Lhs, MInstruction *Rhs) { + auto *NotRhs = + Builder.createExpr(&Builder.Context.I64Type, Rhs); + return Builder.createExpr( + OP_or, &Builder.Context.I64Type, NotRhs, Lhs); + }); +} + +TEST(DMirValidation, FuzzesXorAndXorRewrite) { + expectBinaryI64RewriteEquivalent( + getInterestingBinaryInputCases(), + [](DMirTestBuilder &Builder, MInstruction *Lhs, MInstruction *Rhs) { + auto *And = Builder.createExpr( + OP_and, &Builder.Context.I64Type, Lhs, Rhs); + auto *Xor = Builder.createExpr( + OP_xor, &Builder.Context.I64Type, Lhs, Rhs); + return Builder.createExpr( + OP_xor, &Builder.Context.I64Type, And, Xor); + }, + [](DMirTestBuilder &Builder, MInstruction *Lhs, MInstruction *Rhs) { + return Builder.createExpr( + OP_or, &Builder.Context.I64Type, Lhs, Rhs); + }); +} + +TEST(DMirValidation, FuzzesXorOrXorRewrite) { + expectBinaryI64RewriteEquivalent( + getInterestingBinaryInputCases(), + [](DMirTestBuilder &Builder, MInstruction *Lhs, MInstruction *Rhs) { + auto *Or = Builder.createExpr( + OP_or, &Builder.Context.I64Type, Lhs, Rhs); + auto *Xor = Builder.createExpr( + OP_xor, &Builder.Context.I64Type, Lhs, Rhs); + return Builder.createExpr( + OP_xor, &Builder.Context.I64Type, Or, Xor); + }, + [](DMirTestBuilder &Builder, MInstruction *Lhs, MInstruction *Rhs) { + return Builder.createExpr( + OP_and, &Builder.Context.I64Type, Lhs, Rhs); + }); +} + +TEST(DMirValidation, FuzzesSubSelfRewrite) { + expectUnaryI64RewriteEquivalent( + getInterestingU64Values(), + [](DMirTestBuilder &Builder, MInstruction *Input) { + return Builder.createExpr( + OP_sub, &Builder.Context.I64Type, Input, Input); + }, + [](DMirTestBuilder &Builder, MInstruction *) { + return Builder.createConstI64(0); + }); +} + +TEST(DMirValidation, FuzzesShlZeroRewrite) { + expectUnaryI64RewriteEquivalent( + getInterestingU64Values(), + [](DMirTestBuilder &Builder, MInstruction *Input) { + return Builder.createExpr( + OP_shl, &Builder.Context.I64Type, Input, Builder.createConstI64(0)); + }, + [](DMirTestBuilder &, MInstruction *Input) { return Input; }); +} + +TEST(DMirValidation, FuzzesSshrZeroRewrite) { + expectUnaryI64RewriteEquivalent( + getInterestingU64Values(), + [](DMirTestBuilder &Builder, MInstruction *Input) { + return Builder.createExpr( + OP_sshr, &Builder.Context.I64Type, Input, + Builder.createConstI64(0)); + }, + [](DMirTestBuilder &, MInstruction *Input) { return Input; }); +} + +TEST(DMirValidation, FuzzesUshrZeroRewrite) { + expectUnaryI64RewriteEquivalent( + getInterestingU64Values(), + [](DMirTestBuilder &Builder, MInstruction *Input) { + return Builder.createExpr( + OP_ushr, &Builder.Context.I64Type, Input, + Builder.createConstI64(0)); + }, + [](DMirTestBuilder &, MInstruction *Input) { return Input; }); +} + +TEST(DMirValidation, FuzzesSbbSelfWithoutBorrowRewrite) { + expectUnaryI64RewriteEquivalent( + getInterestingU64Values(), + [](DMirTestBuilder &Builder, MInstruction *Input) { + return Builder.createExpr( + &Builder.Context.I64Type, Input, Input, Builder.createConstI64(0)); + }, + [](DMirTestBuilder &Builder, MInstruction *) { + return Builder.createConstI64(0); + }); +} + +TEST(DMirValidation, EvaluatesAdcAndSbbLimbChains) { + DMirTestBuilder Builder; + std::array LhsLimbs = { + 0xffffffffffffffffULL, + 0x0000000000000000ULL, + 0x1234567890abcdefULL, + 0x0fedcba987654321ULL, + }; + std::array RhsLimbs = { + 0x0000000000000002ULL, + 0xffffffffffffffffULL, + 0x1111111111111111ULL, + 0x2222222222222222ULL, + }; + + std::array Sum = {}; + std::array Diff = {}; + MInstruction *Carry = Builder.createConstI64(0); + MInstruction *Borrow = Builder.createConstI64(0); + for (size_t I = 0; I < LhsLimbs.size(); ++I) { + auto *Lhs = Builder.createConstI64(LhsLimbs[I]); + auto *Rhs = Builder.createConstI64(RhsLimbs[I]); + if (I == 0) { + Sum[I] = Builder.createExpr( + OP_add, &Builder.Context.I64Type, Lhs, Rhs); + Diff[I] = Builder.createExpr( + OP_sub, &Builder.Context.I64Type, Lhs, Rhs); + } else { + Sum[I] = Builder.createExpr(&Builder.Context.I64Type, Lhs, + Rhs, Carry); + Diff[I] = Builder.createExpr(&Builder.Context.I64Type, + Lhs, Rhs, Borrow); + } + auto *CarryInNonZero = Builder.createExpr( + CmpInstruction::ICMP_NE, &Builder.Context.I64Type, Carry, + Builder.createConstI64(0)); + auto *BorrowInNonZero = Builder.createExpr( + CmpInstruction::ICMP_NE, &Builder.Context.I64Type, Borrow, + Builder.createConstI64(0)); + auto *SumCmp = Builder.createExpr( + CmpInstruction::ICMP_ULT, &Builder.Context.I64Type, Sum[I], Lhs); + auto *SumEq = Builder.createExpr( + CmpInstruction::ICMP_EQ, &Builder.Context.I64Type, Sum[I], Lhs); + auto *CarryInOverflow = Builder.createExpr( + OP_and, &Builder.Context.I64Type, CarryInNonZero, SumEq); + auto *DiffCmp = Builder.createExpr( + CmpInstruction::ICMP_UGT, &Builder.Context.I64Type, Diff[I], Lhs); + auto *DiffEq = Builder.createExpr( + CmpInstruction::ICMP_EQ, &Builder.Context.I64Type, Diff[I], Lhs); + auto *BorrowInOverflow = Builder.createExpr( + OP_and, &Builder.Context.I64Type, BorrowInNonZero, DiffEq); + Carry = Builder.createExpr( + OP_or, &Builder.Context.I64Type, SumCmp, CarryInOverflow); + Borrow = Builder.createExpr( + OP_or, &Builder.Context.I64Type, DiffCmp, BorrowInOverflow); + } + + DMirFragmentInterpreter Interpreter; + std::array SumLimbs = {}; + std::array DiffLimbs = {}; + for (size_t I = 0; I < Sum.size(); ++I) { + SumLimbs[I] = Interpreter.evaluate(Sum[I]).getZExtValue(); + DiffLimbs[I] = Interpreter.evaluate(Diff[I]).getZExtValue(); + } + + const intx::uint256 ExpectedSum = + composeU256(LhsLimbs) + composeU256(RhsLimbs); + const intx::uint256 ExpectedDiff = + composeU256(LhsLimbs) - composeU256(RhsLimbs); + EXPECT_EQ(composeU256(SumLimbs), ExpectedSum); + EXPECT_EQ(composeU256(DiffLimbs), ExpectedDiff); +} + +TEST(DMirValidation, EvaluatesEvm128Helpers) { + DMirTestBuilder Builder; + auto *MulLhs = Builder.createConstI64(0xffffffffffffffffULL); + auto *MulRhs = Builder.createConstI64(3ULL); + auto *MulLo = Builder.createExpr( + OP_evm_umul128_lo, &Builder.Context.I64Type, MulLhs, MulRhs); + auto *MulHi = Builder.createExpr( + &Builder.Context.I64Type, MulLo); + + auto *DividendHi = Builder.createConstI64(1ULL); + auto *DividendLo = Builder.createConstI64(0ULL); + auto *Divisor = Builder.createConstI64(3ULL); + auto *Quotient = Builder.createExpr( + OP_evm_udiv128_by64, &Builder.Context.I64Type, DividendHi, DividendLo, + Divisor); + auto *Remainder = Builder.createExpr( + &Builder.Context.I64Type, Quotient); + + DMirFragmentInterpreter Interpreter; + EXPECT_EQ(Interpreter.evaluate(MulLo).getZExtValue(), 0xfffffffffffffffdULL); + EXPECT_EQ(Interpreter.evaluate(MulHi).getZExtValue(), 2ULL); + EXPECT_EQ(Interpreter.evaluate(Quotient).getZExtValue(), + 0x5555555555555555ULL); + EXPECT_EQ(Interpreter.evaluate(Remainder).getZExtValue(), 1ULL); +} + +TEST(DMirValidation, FuzzesEvm128HelpersAgainstHostArithmetic) { + const auto Values = getInterestingU64Values(); + for (uint64_t Lhs : Values) { + for (uint64_t Rhs : Values) { + DMirTestBuilder Builder; + auto *MulLhs = Builder.createConstI64(Lhs); + auto *MulRhs = Builder.createConstI64(Rhs); + auto *MulLo = Builder.createExpr( + OP_evm_umul128_lo, &Builder.Context.I64Type, MulLhs, MulRhs); + auto *MulHi = Builder.createExpr( + &Builder.Context.I64Type, MulLo); + + const unsigned __int128 Product = static_cast(Lhs) * + static_cast(Rhs); + DMirFragmentInterpreter Interpreter; + EXPECT_EQ(Interpreter.evaluate(MulLo).getZExtValue(), + static_cast(Product)) + << "lhs=" << Lhs << " rhs=" << Rhs; + EXPECT_EQ(Interpreter.evaluate(MulHi).getZExtValue(), + static_cast(Product >> 64)) + << "lhs=" << Lhs << " rhs=" << Rhs; + } + } + + for (const auto &InputCase : getInterestingTernaryInputCases()) { + if (InputCase.Third == 0) { + continue; + } + DMirTestBuilder Builder; + auto *Quotient = Builder.createExpr( + OP_evm_udiv128_by64, &Builder.Context.I64Type, + Builder.createConstI64(InputCase.First), + Builder.createConstI64(InputCase.Second), + Builder.createConstI64(InputCase.Third)); + auto *Remainder = Builder.createExpr( + &Builder.Context.I64Type, Quotient); + + const unsigned __int128 Dividend = + (static_cast(InputCase.First) << 64) | + InputCase.Second; + DMirFragmentInterpreter Interpreter; + EXPECT_EQ(Interpreter.evaluate(Quotient).getZExtValue(), + static_cast(Dividend / InputCase.Third)) + << "hi=" << InputCase.First << " lo=" << InputCase.Second + << " divisor=" << InputCase.Third; + EXPECT_EQ(Interpreter.evaluate(Remainder).getZExtValue(), + static_cast(Dividend % InputCase.Third)) + << "hi=" << InputCase.First << " lo=" << InputCase.Second + << " divisor=" << InputCase.Third; + } +} + +TEST(DMirValidation, FuzzesMulZeroRewrite) { + // (mul x 0) -> 0 + expectUnaryI64RewriteEquivalent( + getInterestingU64Values(), + [](DMirTestBuilder &Builder, MInstruction *Input) { + return Builder.createExpr( + OP_mul, &Builder.Context.I64Type, Input, Builder.createConstI64(0)); + }, + [](DMirTestBuilder &Builder, MInstruction *) { + return Builder.createConstI64(0); + }); + // (mul 0 x) -> 0 + expectUnaryI64RewriteEquivalent( + getInterestingU64Values(), + [](DMirTestBuilder &Builder, MInstruction *Input) { + return Builder.createExpr( + OP_mul, &Builder.Context.I64Type, Builder.createConstI64(0), Input); + }, + [](DMirTestBuilder &Builder, MInstruction *) { + return Builder.createConstI64(0); + }); +} + +TEST(DMirValidation, FuzzesMulOneRewrite) { + // (mul x 1) -> x + expectUnaryI64RewriteEquivalent( + getInterestingU64Values(), + [](DMirTestBuilder &Builder, MInstruction *Input) { + return Builder.createExpr( + OP_mul, &Builder.Context.I64Type, Input, Builder.createConstI64(1)); + }, + [](DMirTestBuilder &, MInstruction *Input) { return Input; }); + // (mul 1 x) -> x + expectUnaryI64RewriteEquivalent( + getInterestingU64Values(), + [](DMirTestBuilder &Builder, MInstruction *Input) { + return Builder.createExpr( + OP_mul, &Builder.Context.I64Type, Builder.createConstI64(1), Input); + }, + [](DMirTestBuilder &, MInstruction *Input) { return Input; }); +} + +TEST(DMirValidation, FuzzesAddSelfToShl1Rewrite) { + // (add x x) -> (shl x 1) + expectUnaryI64RewriteEquivalent( + getInterestingU64Values(), + [](DMirTestBuilder &Builder, MInstruction *Input) { + return Builder.createExpr( + OP_add, &Builder.Context.I64Type, Input, Input); + }, + [](DMirTestBuilder &Builder, MInstruction *Input) { + return Builder.createExpr( + OP_shl, &Builder.Context.I64Type, Input, Builder.createConstI64(1)); + }); +} + +TEST(DMirValidation, FuzzesAddNegToSubRewrite) { + // (add (sub 0 x) y) -> (sub y x) + expectBinaryI64RewriteEquivalent( + getInterestingBinaryInputCases(), + [](DMirTestBuilder &Builder, MInstruction *X, MInstruction *Y) { + auto *NegX = Builder.createExpr( + OP_sub, &Builder.Context.I64Type, Builder.createConstI64(0), X); + return Builder.createExpr( + OP_add, &Builder.Context.I64Type, NegX, Y); + }, + [](DMirTestBuilder &Builder, MInstruction *X, MInstruction *Y) { + return Builder.createExpr( + OP_sub, &Builder.Context.I64Type, Y, X); + }); + // (add y (sub 0 x)) -> (sub y x) + expectBinaryI64RewriteEquivalent( + getInterestingBinaryInputCases(), + [](DMirTestBuilder &Builder, MInstruction *X, MInstruction *Y) { + auto *NegX = Builder.createExpr( + OP_sub, &Builder.Context.I64Type, Builder.createConstI64(0), X); + return Builder.createExpr( + OP_add, &Builder.Context.I64Type, Y, NegX); + }, + [](DMirTestBuilder &Builder, MInstruction *X, MInstruction *Y) { + return Builder.createExpr( + OP_sub, &Builder.Context.I64Type, Y, X); + }); +} + +TEST(DMirValidation, FuzzesAddAndXorToOrRewrite) { + // (add (and x y) (xor x y)) -> (or x y) + expectBinaryI64RewriteEquivalent( + getInterestingBinaryInputCases(), + [](DMirTestBuilder &Builder, MInstruction *X, MInstruction *Y) { + auto *And = Builder.createExpr( + OP_and, &Builder.Context.I64Type, X, Y); + auto *Xor = Builder.createExpr( + OP_xor, &Builder.Context.I64Type, X, Y); + return Builder.createExpr( + OP_add, &Builder.Context.I64Type, And, Xor); + }, + [](DMirTestBuilder &Builder, MInstruction *X, MInstruction *Y) { + return Builder.createExpr( + OP_or, &Builder.Context.I64Type, X, Y); + }); +} + +TEST(DMirValidation, FuzzesAddAndOrToAddRewrite) { + // (add (and x y) (or x y)) -> (add x y) + expectBinaryI64RewriteEquivalent( + getInterestingBinaryInputCases(), + [](DMirTestBuilder &Builder, MInstruction *X, MInstruction *Y) { + auto *And = Builder.createExpr( + OP_and, &Builder.Context.I64Type, X, Y); + auto *Or = Builder.createExpr( + OP_or, &Builder.Context.I64Type, X, Y); + return Builder.createExpr( + OP_add, &Builder.Context.I64Type, And, Or); + }, + [](DMirTestBuilder &Builder, MInstruction *X, MInstruction *Y) { + return Builder.createExpr( + OP_add, &Builder.Context.I64Type, X, Y); + }); +} + +TEST(DMirValidation, FuzzesSubAndOrToNegXorRewrite) { + // (sub (and x y) (or x y)) -> (sub 0 (xor x y)) + expectBinaryI64RewriteEquivalent( + getInterestingBinaryInputCases(), + [](DMirTestBuilder &Builder, MInstruction *X, MInstruction *Y) { + auto *And = Builder.createExpr( + OP_and, &Builder.Context.I64Type, X, Y); + auto *Or = Builder.createExpr( + OP_or, &Builder.Context.I64Type, X, Y); + return Builder.createExpr( + OP_sub, &Builder.Context.I64Type, And, Or); + }, + [](DMirTestBuilder &Builder, MInstruction *X, MInstruction *Y) { + auto *Xor = Builder.createExpr( + OP_xor, &Builder.Context.I64Type, X, Y); + return Builder.createExpr( + OP_sub, &Builder.Context.I64Type, Builder.createConstI64(0), Xor); + }); +} + +TEST(DMirValidation, FuzzesSubOrAndToXorRewrite) { + // (sub (or x y) (and x y)) -> (xor x y) + expectBinaryI64RewriteEquivalent( + getInterestingBinaryInputCases(), + [](DMirTestBuilder &Builder, MInstruction *X, MInstruction *Y) { + auto *Or = Builder.createExpr( + OP_or, &Builder.Context.I64Type, X, Y); + auto *And = Builder.createExpr( + OP_and, &Builder.Context.I64Type, X, Y); + return Builder.createExpr( + OP_sub, &Builder.Context.I64Type, Or, And); + }, + [](DMirTestBuilder &Builder, MInstruction *X, MInstruction *Y) { + return Builder.createExpr( + OP_xor, &Builder.Context.I64Type, X, Y); + }); +} + +// Optimization 1: select(0, t, f) -> f and select(nonzero, t, f) -> t + +TEST(DMirValidation, FuzzesSelectFalseCondRewrite) { + // select(0, t, f) -> f + expectBinaryI64RewriteEquivalent( + getInterestingBinaryInputCases(), + [](DMirTestBuilder &Builder, MInstruction *TrueVal, + MInstruction *FalseVal) { + auto *Cond = Builder.createConstI64(0); + return Builder.createExpr(&Builder.Context.I64Type, + Cond, TrueVal, FalseVal); + }, + [](DMirTestBuilder &, MInstruction *, MInstruction *FalseVal) { + return FalseVal; + }); +} + +TEST(DMirValidation, FuzzesSelectTrueCondRewrite) { + // select(nonzero, t, f) -> t + expectBinaryI64RewriteEquivalent( + getInterestingBinaryInputCases(), + [](DMirTestBuilder &Builder, MInstruction *TrueVal, + MInstruction *FalseVal) { + auto *Cond = Builder.createConstI64(1); + return Builder.createExpr(&Builder.Context.I64Type, + Cond, TrueVal, FalseVal); + }, + [](DMirTestBuilder &, MInstruction *TrueVal, MInstruction *) { + return TrueVal; + }); +} + +TEST(DMirRewritePass, RewritesSelectFalseCondToFalseArm) { + // select(0, t, f) -> f + DMirTestBuilder Builder; + Variable *TrueVar = Builder.createVariable(&Builder.Context.I64Type); + Variable *FalseVar = Builder.createVariable(&Builder.Context.I64Type); + auto *TrueValue = Builder.createExpr( + &Builder.Context.I64Type, TrueVar->getVarIdx()); + auto *FalseValue = Builder.createExpr( + &Builder.Context.I64Type, FalseVar->getVarIdx()); + auto *Select = Builder.createExpr( + &Builder.Context.I64Type, Builder.createConstI64(0), TrueValue, + FalseValue); + + MInstruction *Rewritten = rewriteReturnedValue(Builder, Select); + EXPECT_EQ(Rewritten, FalseValue); +} + +TEST(DMirRewritePass, RewritesSelectTrueCondToTrueArm) { + // select(1, t, f) -> t + DMirTestBuilder Builder; + Variable *TrueVar = Builder.createVariable(&Builder.Context.I64Type); + Variable *FalseVar = Builder.createVariable(&Builder.Context.I64Type); + auto *TrueValue = Builder.createExpr( + &Builder.Context.I64Type, TrueVar->getVarIdx()); + auto *FalseValue = Builder.createExpr( + &Builder.Context.I64Type, FalseVar->getVarIdx()); + auto *Select = Builder.createExpr( + &Builder.Context.I64Type, Builder.createConstI64(1), TrueValue, + FalseValue); + + MInstruction *Rewritten = rewriteReturnedValue(Builder, Select); + EXPECT_EQ(Rewritten, TrueValue); +} + +// Optimization 2: mul(x, 2^k) -> shl(x, k) + +TEST(DMirValidation, FuzzesMulPow2ToShlRewrite) { + // mul(x, 2) -> shl(x, 1) + expectUnaryI64RewriteEquivalent( + getInterestingU64Values(), + [](DMirTestBuilder &Builder, MInstruction *Input) { + return Builder.createExpr( + OP_mul, &Builder.Context.I64Type, Input, Builder.createConstI64(2)); + }, + [](DMirTestBuilder &Builder, MInstruction *Input) { + return Builder.createExpr( + OP_shl, &Builder.Context.I64Type, Input, Builder.createConstI64(1)); + }); +} + +TEST(DMirRewritePass, RewritesMulBy2ToShl1) { + DMirTestBuilder Builder; + Variable *InputVar = Builder.createVariable(&Builder.Context.I64Type); + auto *Input = Builder.createExpr(&Builder.Context.I64Type, + InputVar->getVarIdx()); + auto *Mul = Builder.createExpr( + OP_mul, &Builder.Context.I64Type, Input, Builder.createConstI64(2)); + + MInstruction *Rewritten = rewriteReturnedValue(Builder, Mul); + ASSERT_EQ(Rewritten->getOpcode(), OP_shl); + auto *Shl = llvm::cast(Rewritten); + EXPECT_EQ(Shl->getOperand<0>(), Input); + ASSERT_EQ(Shl->getOperand<1>()->getOpcode(), OP_const); + EXPECT_EQ( + llvm::cast( + &llvm::cast(Shl->getOperand<1>())->getConstant()) + ->getValue() + .getZExtValue(), + 1ULL); +} + +TEST(DMirRewritePass, RewritesMulBy4ToShl2) { + DMirTestBuilder Builder; + Variable *InputVar = Builder.createVariable(&Builder.Context.I64Type); + auto *Input = Builder.createExpr(&Builder.Context.I64Type, + InputVar->getVarIdx()); + auto *Mul = Builder.createExpr( + OP_mul, &Builder.Context.I64Type, Input, Builder.createConstI64(4)); + + MInstruction *Rewritten = rewriteReturnedValue(Builder, Mul); + ASSERT_EQ(Rewritten->getOpcode(), OP_shl); + auto *Shl = llvm::cast(Rewritten); + EXPECT_EQ(Shl->getOperand<0>(), Input); + EXPECT_EQ( + llvm::cast( + &llvm::cast(Shl->getOperand<1>())->getConstant()) + ->getValue() + .getZExtValue(), + 2ULL); +} + +TEST(DMirRewritePass, RewritesMulBy8ToShl3) { + DMirTestBuilder Builder; + Variable *InputVar = Builder.createVariable(&Builder.Context.I64Type); + auto *Input = Builder.createExpr(&Builder.Context.I64Type, + InputVar->getVarIdx()); + auto *Mul = Builder.createExpr( + OP_mul, &Builder.Context.I64Type, Input, Builder.createConstI64(8)); + + MInstruction *Rewritten = rewriteReturnedValue(Builder, Mul); + ASSERT_EQ(Rewritten->getOpcode(), OP_shl); + EXPECT_EQ(llvm::cast( + &llvm::cast( + llvm::cast(Rewritten)->getOperand<1>()) + ->getConstant()) + ->getValue() + .getZExtValue(), + 3ULL); +} + +TEST(DMirRewritePass, DoesNotRewriteMulBy3) { + // mul(x, 3) should not be rewritten (not a power of two) + DMirTestBuilder Builder; + Variable *InputVar = Builder.createVariable(&Builder.Context.I64Type); + auto *Input = Builder.createExpr(&Builder.Context.I64Type, + InputVar->getVarIdx()); + auto *Mul = Builder.createExpr( + OP_mul, &Builder.Context.I64Type, Input, Builder.createConstI64(3)); + + MInstruction *Rewritten = rewriteReturnedValue(Builder, Mul); + EXPECT_EQ(Rewritten->getOpcode(), OP_mul); +} + +// Optimization 3: isCarryDead recognizes zext(icmp_ult(x, 0)) + +TEST(DMirRewritePass, RewritesSbbWithZextIcmpUltZeroBorrowToSub) { + // sbb(x, y, zext(icmp_ult(z, 0))) -> sub(x, y) since borrow is always dead + DMirTestBuilder Builder; + Variable *XVar = Builder.createVariable(&Builder.Context.I64Type); + Variable *YVar = Builder.createVariable(&Builder.Context.I64Type); + Variable *ZVar = Builder.createVariable(&Builder.Context.I64Type); + auto *X = Builder.createExpr(&Builder.Context.I64Type, + XVar->getVarIdx()); + auto *Y = Builder.createExpr(&Builder.Context.I64Type, + YVar->getVarIdx()); + auto *Z = Builder.createExpr(&Builder.Context.I64Type, + ZVar->getVarIdx()); + // icmp_ult(z, 0): always false, always zero + auto *Cmp = Builder.createExpr(CmpInstruction::ICMP_ULT, + &Builder.Context.I64Type, Z, + Builder.createConstI64(0)); + // zext to i64 + auto *Zext = Builder.createExpr( + OP_uext, &Builder.Context.I64Type, Cmp); + auto *Sbb = + Builder.createExpr(&Builder.Context.I64Type, X, Y, Zext); + auto *Return = + Builder.createStmt(&Builder.Context.I64Type, Sbb); + + EXPECT_TRUE(runDMirRewritePass(Builder)); + auto *Result = Return->getOperand<0>(); + EXPECT_NE(Result, Sbb); + EXPECT_EQ(Result->getOpcode(), OP_sub); +} + +} // namespace diff --git a/src/tests/testdata/x86_cg_peephole_conflict_rules.json b/src/tests/testdata/x86_cg_peephole_conflict_rules.json new file mode 100644 index 000000000..fc441ff5e --- /dev/null +++ b/src/tests/testdata/x86_cg_peephole_conflict_rules.json @@ -0,0 +1,138 @@ +{ + "version": 1, + "rules": [ + { + "name": "fold-a", + "stage": "instruction", + "priority": 100, + "pattern": [ + { + "bind": "cmp", + "predicate": "isCompare" + }, + { + "bind": "setcc", + "opcode": "SETCCr", + "capture": [ + { + "name": "setcc_dst", + "operand": 0, + "field": "reg" + }, + { + "name": "setcc_cc", + "operand": 1, + "field": "imm" + } + ] + }, + { + "bind": "test", + "opcode_any": [ + "TEST8rr", + "TEST16rr", + "TEST32rr", + "TEST64rr" + ], + "require": [ + { + "operand": 0, + "field": "reg", + "equals_capture": "setcc_dst" + } + ] + }, + { + "bind": "jcc", + "opcode": "JCC_1", + "require": [ + { + "operand": 1, + "field": "imm", + "equals_enum": "COND_NE" + } + ] + } + ], + "action": { + "erase": [ + "setcc" + ], + "set_imm": [ + { + "inst": "jcc", + "operand": 1, + "from_capture": "setcc_cc" + } + ] + } + }, + { + "name": "fold-b", + "stage": "instruction", + "priority": 100, + "pattern": [ + { + "bind": "cmp", + "predicate": "isCompare" + }, + { + "bind": "setcc", + "opcode": "SETCCr", + "capture": [ + { + "name": "setcc_dst", + "operand": 0, + "field": "reg" + }, + { + "name": "setcc_cc", + "operand": 1, + "field": "imm" + } + ] + }, + { + "bind": "test", + "opcode_any": [ + "TEST8rr", + "TEST16rr", + "TEST32rr", + "TEST64rr" + ], + "require": [ + { + "operand": 0, + "field": "reg", + "equals_capture": "setcc_dst" + } + ] + }, + { + "bind": "jcc", + "opcode": "JCC_1", + "require": [ + { + "operand": 1, + "field": "imm", + "equals_enum": "COND_NE" + } + ] + } + ], + "action": { + "erase": [ + "test" + ], + "set_imm": [ + { + "inst": "jcc", + "operand": 1, + "from_capture": "setcc_cc" + } + ] + } + } + ] +} + diff --git a/src/tests/x86_cg_peephole_tests.cpp b/src/tests/x86_cg_peephole_tests.cpp new file mode 100644 index 000000000..9b89a1738 --- /dev/null +++ b/src/tests/x86_cg_peephole_tests.cpp @@ -0,0 +1,2052 @@ +// Copyright (C) 2025 the DTVM authors. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +#include "compiler/context.h" +#include "compiler/llvm-prebuild/Target/X86/X86Subtarget.h" +#include "compiler/mir/function.h" +#include "compiler/mir/module.h" +#include "compiler/target/x86/x86_cg_peephole.h" + +#include +#include +#include +#include + +namespace { + +using namespace COMPILER; +using namespace llvm; + +MFunctionType *createVoidFunctionType(CompileContext &Context) { + return MFunctionType::create(Context, Context.VoidType, {}); +} + +struct X86CmpFlags { + bool Overflow = false; + bool Sign = false; + bool Zero = false; + bool Carry = false; + bool Parity = false; +}; + +X86CmpFlags computeCmpFlags(uint64_t Lhs, uint64_t Rhs) { + const uint64_t Result = Lhs - Rhs; + X86CmpFlags Flags; + Flags.Overflow = ((Lhs ^ Rhs) & (Lhs ^ Result) & (1ULL << 63)) != 0; + Flags.Sign = (Result >> 63) != 0; + Flags.Zero = Result == 0; + Flags.Carry = Lhs < Rhs; + Flags.Parity = + (__builtin_popcount(static_cast(Result & 0xff)) % 2) == 0; + return Flags; +} + +bool evaluateCondCode(int64_t CondCode, const X86CmpFlags &Flags) { + switch (CondCode) { + case X86::COND_O: + return Flags.Overflow; + case X86::COND_NO: + return !Flags.Overflow; + case X86::COND_B: + return Flags.Carry; + case X86::COND_AE: + return !Flags.Carry; + case X86::COND_E: + return Flags.Zero; + case X86::COND_NE: + return !Flags.Zero; + case X86::COND_BE: + return Flags.Carry || Flags.Zero; + case X86::COND_A: + return !Flags.Carry && !Flags.Zero; + case X86::COND_S: + return Flags.Sign; + case X86::COND_NS: + return !Flags.Sign; + case X86::COND_P: + return Flags.Parity; + case X86::COND_NP: + return !Flags.Parity; + case X86::COND_L: + return Flags.Sign != Flags.Overflow; + case X86::COND_GE: + return Flags.Sign == Flags.Overflow; + case X86::COND_LE: + return Flags.Zero || (Flags.Sign != Flags.Overflow); + case X86::COND_G: + return !Flags.Zero && (Flags.Sign == Flags.Overflow); + default: + ADD_FAILURE() << "unexpected cond code " << CondCode; + return false; + } +} + +#if defined(__x86_64__) && (defined(__GNUC__) || defined(__clang__)) +struct X86ExecutionHarnessCase { + const char *Name = nullptr; + int64_t CondCode = X86::COND_INVALID; + uint64_t (*Original)(uint64_t, uint64_t) = nullptr; + uint64_t (*Rewritten)(uint64_t, uint64_t) = nullptr; +}; + +struct X86ZeroShiftHarnessResult { + uint64_t Value = 0; + uint64_t Flags = 0; +}; + +struct X86ZeroShiftExecutionHarnessCase { + const char *Name = nullptr; + X86ZeroShiftHarnessResult (*Original)(uint64_t, uint64_t, uint64_t) = nullptr; + X86ZeroShiftHarnessResult (*Rewritten)(uint64_t, uint64_t, + uint64_t) = nullptr; + uint64_t ValueMask = 0; +}; + +struct X86SelfMoveExecutionHarnessCase { + const char *Name = nullptr; + X86ZeroShiftHarnessResult (*Original)(uint64_t, uint64_t, uint64_t) = nullptr; + X86ZeroShiftHarnessResult (*Rewritten)(uint64_t, uint64_t, + uint64_t) = nullptr; + uint64_t ValueMask = 0; +}; + +struct X86FallthroughJccExecutionHarnessCase { + const char *Name = nullptr; + int64_t CondCode = X86::COND_INVALID; + X86ZeroShiftHarnessResult (*Original)(uint64_t, uint64_t, uint64_t) = nullptr; + X86ZeroShiftHarnessResult (*Rewritten)(uint64_t, uint64_t, + uint64_t) = nullptr; +}; + +#define DEFINE_SETCC_TEST_JNE_EXEC_CASE(Name, CondCodeValue, SetccMnemonic, \ + JccMnemonic) \ + static uint64_t execOriginal_##Name(uint64_t Lhs, uint64_t Rhs) { \ + uint64_t Out; \ + asm volatile("cmpq %[rhs], %[lhs]\n\t" SetccMnemonic " %%al\n\t" \ + "testb %%al, %%al\n\t" \ + "jne 1f\n\t" \ + "xorq %[out], %[out]\n\t" \ + "jmp 2f\n\t" \ + "1:\n\t" \ + "movq $1, %[out]\n\t" \ + "2:\n\t" \ + : [out] "=&r"(Out) \ + : [lhs] "r"(Lhs), [rhs] "r"(Rhs) \ + : "cc", "rax"); \ + return Out; \ + } \ + static uint64_t execRewritten_##Name(uint64_t Lhs, uint64_t Rhs) { \ + uint64_t Out; \ + asm volatile("cmpq %[rhs], %[lhs]\n\t" JccMnemonic " 1f\n\t" \ + "xorq %[out], %[out]\n\t" \ + "jmp 2f\n\t" \ + "1:\n\t" \ + "movq $1, %[out]\n\t" \ + "2:\n\t" \ + : [out] "=&r"(Out) \ + : [lhs] "r"(Lhs), [rhs] "r"(Rhs) \ + : "cc"); \ + return Out; \ + } \ + static constexpr X86ExecutionHarnessCase ExecCase_##Name = { \ + #Name, CondCodeValue, execOriginal_##Name, execRewritten_##Name} + +DEFINE_SETCC_TEST_JNE_EXEC_CASE(O, X86::COND_O, "seto", "jo"); +DEFINE_SETCC_TEST_JNE_EXEC_CASE(NO, X86::COND_NO, "setno", "jno"); +DEFINE_SETCC_TEST_JNE_EXEC_CASE(B, X86::COND_B, "setb", "jb"); +DEFINE_SETCC_TEST_JNE_EXEC_CASE(AE, X86::COND_AE, "setae", "jae"); +DEFINE_SETCC_TEST_JNE_EXEC_CASE(E, X86::COND_E, "sete", "je"); +DEFINE_SETCC_TEST_JNE_EXEC_CASE(NE, X86::COND_NE, "setne", "jne"); +DEFINE_SETCC_TEST_JNE_EXEC_CASE(BE, X86::COND_BE, "setbe", "jbe"); +DEFINE_SETCC_TEST_JNE_EXEC_CASE(A, X86::COND_A, "seta", "ja"); +DEFINE_SETCC_TEST_JNE_EXEC_CASE(S, X86::COND_S, "sets", "js"); +DEFINE_SETCC_TEST_JNE_EXEC_CASE(NS, X86::COND_NS, "setns", "jns"); +DEFINE_SETCC_TEST_JNE_EXEC_CASE(P, X86::COND_P, "setp", "jp"); +DEFINE_SETCC_TEST_JNE_EXEC_CASE(NP, X86::COND_NP, "setnp", "jnp"); +DEFINE_SETCC_TEST_JNE_EXEC_CASE(L, X86::COND_L, "setl", "jl"); +DEFINE_SETCC_TEST_JNE_EXEC_CASE(GE, X86::COND_GE, "setge", "jge"); +DEFINE_SETCC_TEST_JNE_EXEC_CASE(LE, X86::COND_LE, "setle", "jle"); +DEFINE_SETCC_TEST_JNE_EXEC_CASE(G, X86::COND_G, "setg", "jg"); + +const std::array ExecutionHarnessCases = { + ExecCase_O, ExecCase_NO, ExecCase_B, ExecCase_AE, ExecCase_E, ExecCase_NE, + ExecCase_BE, ExecCase_A, ExecCase_S, ExecCase_NS, ExecCase_P, ExecCase_NP, + ExecCase_L, ExecCase_GE, ExecCase_LE, ExecCase_G, +}; + +#define DEFINE_ZERO_SHIFT_EXEC_CASE_8(Name, Mnemonic) \ + static X86ZeroShiftHarnessResult execOriginal_##Name( \ + uint64_t Value, uint64_t FlagLhs, uint64_t FlagRhs) { \ + const uint8_t Input = static_cast(Value); \ + uint64_t Out; \ + uint64_t Flags; \ + asm volatile( \ + "cmpq %[flag_rhs], %[flag_lhs]\n\t" \ + "movb %[value], %%al\n\t" Mnemonic " $0, %%al\n\t" \ + "pushfq\n\t" \ + "popq %[flags]\n\t" \ + "movzbq %%al, %[out]\n\t" \ + : [out] "=&r"(Out), [flags] "=&r"(Flags) \ + : [value] "q"(Input), [flag_lhs] "r"(FlagLhs), [flag_rhs] "r"(FlagRhs) \ + : "cc", "rax"); \ + return {.Value = Out, .Flags = Flags}; \ + } \ + static X86ZeroShiftHarnessResult execRewritten_##Name( \ + uint64_t Value, uint64_t FlagLhs, uint64_t FlagRhs) { \ + const uint8_t Input = static_cast(Value); \ + uint64_t Out; \ + uint64_t Flags; \ + asm volatile( \ + "cmpq %[flag_rhs], %[flag_lhs]\n\t" \ + "movb %[value], %%al\n\t" \ + "pushfq\n\t" \ + "popq %[flags]\n\t" \ + "movzbq %%al, %[out]\n\t" \ + : [out] "=&r"(Out), [flags] "=&r"(Flags) \ + : [value] "q"(Input), [flag_lhs] "r"(FlagLhs), [flag_rhs] "r"(FlagRhs) \ + : "cc", "rax"); \ + return {.Value = Out, .Flags = Flags}; \ + } \ + static constexpr X86ZeroShiftExecutionHarnessCase ZeroShiftCase_##Name = { \ + #Name, execOriginal_##Name, execRewritten_##Name, 0xffULL} + +#define DEFINE_ZERO_SHIFT_EXEC_CASE_16(Name, Mnemonic) \ + static X86ZeroShiftHarnessResult execOriginal_##Name( \ + uint64_t Value, uint64_t FlagLhs, uint64_t FlagRhs) { \ + const uint16_t Input = static_cast(Value); \ + uint64_t Out; \ + uint64_t Flags; \ + asm volatile( \ + "cmpq %[flag_rhs], %[flag_lhs]\n\t" \ + "movw %[value], %%ax\n\t" Mnemonic " $0, %%ax\n\t" \ + "pushfq\n\t" \ + "popq %[flags]\n\t" \ + "movzwq %%ax, %[out]\n\t" \ + : [out] "=&r"(Out), [flags] "=&r"(Flags) \ + : [value] "r"(Input), [flag_lhs] "r"(FlagLhs), [flag_rhs] "r"(FlagRhs) \ + : "cc", "rax"); \ + return {.Value = Out, .Flags = Flags}; \ + } \ + static X86ZeroShiftHarnessResult execRewritten_##Name( \ + uint64_t Value, uint64_t FlagLhs, uint64_t FlagRhs) { \ + const uint16_t Input = static_cast(Value); \ + uint64_t Out; \ + uint64_t Flags; \ + asm volatile( \ + "cmpq %[flag_rhs], %[flag_lhs]\n\t" \ + "movw %[value], %%ax\n\t" \ + "pushfq\n\t" \ + "popq %[flags]\n\t" \ + "movzwq %%ax, %[out]\n\t" \ + : [out] "=&r"(Out), [flags] "=&r"(Flags) \ + : [value] "r"(Input), [flag_lhs] "r"(FlagLhs), [flag_rhs] "r"(FlagRhs) \ + : "cc", "rax"); \ + return {.Value = Out, .Flags = Flags}; \ + } \ + static constexpr X86ZeroShiftExecutionHarnessCase ZeroShiftCase_##Name = { \ + #Name, execOriginal_##Name, execRewritten_##Name, 0xffffULL} + +#define DEFINE_ZERO_SHIFT_EXEC_CASE_64(Name, Mnemonic) \ + static X86ZeroShiftHarnessResult execOriginal_##Name( \ + uint64_t Value, uint64_t FlagLhs, uint64_t FlagRhs) { \ + uint64_t Out; \ + uint64_t Flags; \ + asm volatile( \ + "cmpq %[flag_rhs], %[flag_lhs]\n\t" \ + "movq %[value], %%rax\n\t" Mnemonic " $0, %%rax\n\t" \ + "pushfq\n\t" \ + "popq %[flags]\n\t" \ + "movq %%rax, %[out]\n\t" \ + : [out] "=&r"(Out), [flags] "=&r"(Flags) \ + : [value] "r"(Value), [flag_lhs] "r"(FlagLhs), [flag_rhs] "r"(FlagRhs) \ + : "cc", "rax"); \ + return {.Value = Out, .Flags = Flags}; \ + } \ + static X86ZeroShiftHarnessResult execRewritten_##Name( \ + uint64_t Value, uint64_t FlagLhs, uint64_t FlagRhs) { \ + uint64_t Out; \ + uint64_t Flags; \ + asm volatile( \ + "cmpq %[flag_rhs], %[flag_lhs]\n\t" \ + "movq %[value], %%rax\n\t" \ + "pushfq\n\t" \ + "popq %[flags]\n\t" \ + "movq %%rax, %[out]\n\t" \ + : [out] "=&r"(Out), [flags] "=&r"(Flags) \ + : [value] "r"(Value), [flag_lhs] "r"(FlagLhs), [flag_rhs] "r"(FlagRhs) \ + : "cc", "rax"); \ + return {.Value = Out, .Flags = Flags}; \ + } \ + static constexpr X86ZeroShiftExecutionHarnessCase ZeroShiftCase_##Name = { \ + #Name, execOriginal_##Name, execRewritten_##Name, ~0ULL} + +DEFINE_ZERO_SHIFT_EXEC_CASE_8(SHL8, "shlb"); +DEFINE_ZERO_SHIFT_EXEC_CASE_16(SHL16, "shlw"); +DEFINE_ZERO_SHIFT_EXEC_CASE_64(SHL64, "shlq"); +DEFINE_ZERO_SHIFT_EXEC_CASE_8(SHR8, "shrb"); +DEFINE_ZERO_SHIFT_EXEC_CASE_16(SHR16, "shrw"); +DEFINE_ZERO_SHIFT_EXEC_CASE_64(SHR64, "shrq"); +DEFINE_ZERO_SHIFT_EXEC_CASE_8(SAR8, "sarb"); +DEFINE_ZERO_SHIFT_EXEC_CASE_16(SAR16, "sarw"); +DEFINE_ZERO_SHIFT_EXEC_CASE_64(SAR64, "sarq"); + +const std::array ZeroShiftHarnessCases = { + ZeroShiftCase_SHL8, ZeroShiftCase_SHL16, ZeroShiftCase_SHL64, + ZeroShiftCase_SHR8, ZeroShiftCase_SHR16, ZeroShiftCase_SHR64, + ZeroShiftCase_SAR8, ZeroShiftCase_SAR16, ZeroShiftCase_SAR64, +}; + +static X86ZeroShiftHarnessResult +execOriginalSelfMove8(uint64_t Value, uint64_t FlagLhs, uint64_t FlagRhs) { + const uint8_t Input = static_cast(Value); + uint64_t Out; + uint64_t Flags; + asm volatile( + "cmpq %[flag_rhs], %[flag_lhs]\n\t" + "movb %[value], %%al\n\t" + "movb %%al, %%al\n\t" + "pushfq\n\t" + "popq %[flags]\n\t" + "movzbq %%al, %[out]\n\t" + : [out] "=&r"(Out), [flags] "=&r"(Flags) + : [value] "q"(Input), [flag_lhs] "r"(FlagLhs), [flag_rhs] "r"(FlagRhs) + : "cc", "rax"); + return {.Value = Out, .Flags = Flags}; +} + +static X86ZeroShiftHarnessResult +execRewrittenSelfMove8(uint64_t Value, uint64_t FlagLhs, uint64_t FlagRhs) { + const uint8_t Input = static_cast(Value); + uint64_t Out; + uint64_t Flags; + asm volatile( + "cmpq %[flag_rhs], %[flag_lhs]\n\t" + "movb %[value], %%al\n\t" + "pushfq\n\t" + "popq %[flags]\n\t" + "movzbq %%al, %[out]\n\t" + : [out] "=&r"(Out), [flags] "=&r"(Flags) + : [value] "q"(Input), [flag_lhs] "r"(FlagLhs), [flag_rhs] "r"(FlagRhs) + : "cc", "rax"); + return {.Value = Out, .Flags = Flags}; +} + +static X86ZeroShiftHarnessResult +execOriginalSelfMove16(uint64_t Value, uint64_t FlagLhs, uint64_t FlagRhs) { + const uint16_t Input = static_cast(Value); + uint64_t Out; + uint64_t Flags; + asm volatile( + "cmpq %[flag_rhs], %[flag_lhs]\n\t" + "movw %[value], %%ax\n\t" + "movw %%ax, %%ax\n\t" + "pushfq\n\t" + "popq %[flags]\n\t" + "movzwq %%ax, %[out]\n\t" + : [out] "=&r"(Out), [flags] "=&r"(Flags) + : [value] "r"(Input), [flag_lhs] "r"(FlagLhs), [flag_rhs] "r"(FlagRhs) + : "cc", "rax"); + return {.Value = Out, .Flags = Flags}; +} + +static X86ZeroShiftHarnessResult +execRewrittenSelfMove16(uint64_t Value, uint64_t FlagLhs, uint64_t FlagRhs) { + const uint16_t Input = static_cast(Value); + uint64_t Out; + uint64_t Flags; + asm volatile( + "cmpq %[flag_rhs], %[flag_lhs]\n\t" + "movw %[value], %%ax\n\t" + "pushfq\n\t" + "popq %[flags]\n\t" + "movzwq %%ax, %[out]\n\t" + : [out] "=&r"(Out), [flags] "=&r"(Flags) + : [value] "r"(Input), [flag_lhs] "r"(FlagLhs), [flag_rhs] "r"(FlagRhs) + : "cc", "rax"); + return {.Value = Out, .Flags = Flags}; +} + +static X86ZeroShiftHarnessResult +execOriginalSelfMove64(uint64_t Value, uint64_t FlagLhs, uint64_t FlagRhs) { + uint64_t Out; + uint64_t Flags; + asm volatile( + "cmpq %[flag_rhs], %[flag_lhs]\n\t" + "movq %[value], %%rax\n\t" + "movq %%rax, %%rax\n\t" + "pushfq\n\t" + "popq %[flags]\n\t" + "movq %%rax, %[out]\n\t" + : [out] "=&r"(Out), [flags] "=&r"(Flags) + : [value] "r"(Value), [flag_lhs] "r"(FlagLhs), [flag_rhs] "r"(FlagRhs) + : "cc", "rax"); + return {.Value = Out, .Flags = Flags}; +} + +static X86ZeroShiftHarnessResult +execRewrittenSelfMove64(uint64_t Value, uint64_t FlagLhs, uint64_t FlagRhs) { + uint64_t Out; + uint64_t Flags; + asm volatile( + "cmpq %[flag_rhs], %[flag_lhs]\n\t" + "movq %[value], %%rax\n\t" + "pushfq\n\t" + "popq %[flags]\n\t" + "movq %%rax, %[out]\n\t" + : [out] "=&r"(Out), [flags] "=&r"(Flags) + : [value] "r"(Value), [flag_lhs] "r"(FlagLhs), [flag_rhs] "r"(FlagRhs) + : "cc", "rax"); + return {.Value = Out, .Flags = Flags}; +} + +static X86ZeroShiftHarnessResult +execOriginalSelfMove32(uint64_t Value, uint64_t FlagLhs, uint64_t FlagRhs) { + uint64_t Out; + uint64_t Flags; + asm volatile( + "cmpq %[flag_rhs], %[flag_lhs]\n\t" + "movq %[value], %%rax\n\t" + "movl %%eax, %%eax\n\t" + "pushfq\n\t" + "popq %[flags]\n\t" + "movq %%rax, %[out]\n\t" + : [out] "=&r"(Out), [flags] "=&r"(Flags) + : [value] "r"(Value), [flag_lhs] "r"(FlagLhs), [flag_rhs] "r"(FlagRhs) + : "cc", "rax"); + return {.Value = Out, .Flags = Flags}; +} + +static X86ZeroShiftHarnessResult +execRewrittenSelfMove32(uint64_t Value, uint64_t FlagLhs, uint64_t FlagRhs) { + uint64_t Out; + uint64_t Flags; + asm volatile( + "cmpq %[flag_rhs], %[flag_lhs]\n\t" + "movq %[value], %%rax\n\t" + "pushfq\n\t" + "popq %[flags]\n\t" + "movq %%rax, %[out]\n\t" + : [out] "=&r"(Out), [flags] "=&r"(Flags) + : [value] "r"(Value), [flag_lhs] "r"(FlagLhs), [flag_rhs] "r"(FlagRhs) + : "cc", "rax"); + return {.Value = Out, .Flags = Flags}; +} + +const std::array SelfMoveHarnessCases = { + X86SelfMoveExecutionHarnessCase{"MOV8rr", execOriginalSelfMove8, + execRewrittenSelfMove8, 0xffULL}, + X86SelfMoveExecutionHarnessCase{"MOV16rr", execOriginalSelfMove16, + execRewrittenSelfMove16, 0xffffULL}, + X86SelfMoveExecutionHarnessCase{"MOV64rr", execOriginalSelfMove64, + execRewrittenSelfMove64, ~0ULL}, +}; + +#define DEFINE_FALLTHROUGH_JCC_EXEC_CASE(Name, CondCodeValue, JccMnemonic) \ + static X86ZeroShiftHarnessResult execOriginalFallthroughJcc_##Name( \ + uint64_t Value, uint64_t FlagLhs, uint64_t FlagRhs) { \ + uint64_t Out; \ + uint64_t Flags; \ + asm volatile( \ + "cmpq %[flag_rhs], %[flag_lhs]\n\t" JccMnemonic " 1f\n\t" \ + "1:\n\t" \ + "movq %[value], %[out]\n\t" \ + "pushfq\n\t" \ + "popq %[flags]\n\t" \ + : [out] "=&r"(Out), [flags] "=&r"(Flags) \ + : [value] "r"(Value), [flag_lhs] "r"(FlagLhs), [flag_rhs] "r"(FlagRhs) \ + : "cc"); \ + return {.Value = Out, .Flags = Flags}; \ + } \ + static X86ZeroShiftHarnessResult execRewrittenFallthroughJcc_##Name( \ + uint64_t Value, uint64_t FlagLhs, uint64_t FlagRhs) { \ + uint64_t Out; \ + uint64_t Flags; \ + asm volatile( \ + "cmpq %[flag_rhs], %[flag_lhs]\n\t" \ + "movq %[value], %[out]\n\t" \ + "pushfq\n\t" \ + "popq %[flags]\n\t" \ + : [out] "=&r"(Out), [flags] "=&r"(Flags) \ + : [value] "r"(Value), [flag_lhs] "r"(FlagLhs), [flag_rhs] "r"(FlagRhs) \ + : "cc"); \ + return {.Value = Out, .Flags = Flags}; \ + } \ + static constexpr X86FallthroughJccExecutionHarnessCase \ + FallthroughJccCase_##Name = {#Name, CondCodeValue, \ + execOriginalFallthroughJcc_##Name, \ + execRewrittenFallthroughJcc_##Name} + +DEFINE_FALLTHROUGH_JCC_EXEC_CASE(O, X86::COND_O, "jo"); +DEFINE_FALLTHROUGH_JCC_EXEC_CASE(NO, X86::COND_NO, "jno"); +DEFINE_FALLTHROUGH_JCC_EXEC_CASE(B, X86::COND_B, "jb"); +DEFINE_FALLTHROUGH_JCC_EXEC_CASE(AE, X86::COND_AE, "jae"); +DEFINE_FALLTHROUGH_JCC_EXEC_CASE(E, X86::COND_E, "je"); +DEFINE_FALLTHROUGH_JCC_EXEC_CASE(NE, X86::COND_NE, "jne"); +DEFINE_FALLTHROUGH_JCC_EXEC_CASE(BE, X86::COND_BE, "jbe"); +DEFINE_FALLTHROUGH_JCC_EXEC_CASE(A, X86::COND_A, "ja"); +DEFINE_FALLTHROUGH_JCC_EXEC_CASE(S, X86::COND_S, "js"); +DEFINE_FALLTHROUGH_JCC_EXEC_CASE(NS, X86::COND_NS, "jns"); +DEFINE_FALLTHROUGH_JCC_EXEC_CASE(P, X86::COND_P, "jp"); +DEFINE_FALLTHROUGH_JCC_EXEC_CASE(NP, X86::COND_NP, "jnp"); +DEFINE_FALLTHROUGH_JCC_EXEC_CASE(L, X86::COND_L, "jl"); +DEFINE_FALLTHROUGH_JCC_EXEC_CASE(GE, X86::COND_GE, "jge"); +DEFINE_FALLTHROUGH_JCC_EXEC_CASE(LE, X86::COND_LE, "jle"); +DEFINE_FALLTHROUGH_JCC_EXEC_CASE(G, X86::COND_G, "jg"); + +const std::array + FallthroughJccHarnessCases = { + FallthroughJccCase_O, FallthroughJccCase_NO, FallthroughJccCase_B, + FallthroughJccCase_AE, FallthroughJccCase_E, FallthroughJccCase_NE, + FallthroughJccCase_BE, FallthroughJccCase_A, FallthroughJccCase_S, + FallthroughJccCase_NS, FallthroughJccCase_P, FallthroughJccCase_NP, + FallthroughJccCase_L, FallthroughJccCase_GE, FallthroughJccCase_LE, + FallthroughJccCase_G, +}; + +static X86ZeroShiftHarnessResult execOriginalRedundantTest64(uint64_t Value, + uint64_t FlagLhs, + uint64_t FlagRhs) { + uint64_t Out; + uint64_t Flags; + asm volatile( + "movq %[value], %%rax\n\t" + "testq %%rax, %%rax\n\t" + "testq %%rax, %%rax\n\t" + "pushfq\n\t" + "popq %[flags]\n\t" + "movq %%rax, %[out]\n\t" + : [out] "=&r"(Out), [flags] "=&r"(Flags) + : [value] "r"(Value), [flag_lhs] "r"(FlagLhs), [flag_rhs] "r"(FlagRhs) + : "cc", "rax"); + return {.Value = Out, .Flags = Flags}; +} + +static X86ZeroShiftHarnessResult +execRewrittenRedundantTest64(uint64_t Value, uint64_t FlagLhs, + uint64_t FlagRhs) { + uint64_t Out; + uint64_t Flags; + asm volatile( + "movq %[value], %%rax\n\t" + "testq %%rax, %%rax\n\t" + "pushfq\n\t" + "popq %[flags]\n\t" + "movq %%rax, %[out]\n\t" + : [out] "=&r"(Out), [flags] "=&r"(Flags) + : [value] "r"(Value), [flag_lhs] "r"(FlagLhs), [flag_rhs] "r"(FlagRhs) + : "cc", "rax"); + return {.Value = Out, .Flags = Flags}; +} + +static X86ZeroShiftHarnessResult execOriginalRedundantTest32(uint64_t Value, + uint64_t FlagLhs, + uint64_t FlagRhs) { + uint64_t Out; + uint64_t Flags; + const uint32_t Input = static_cast(Value); + asm volatile("movq %[value], %%rax\n\t" + "testl %%eax, %%eax\n\t" + "testl %%eax, %%eax\n\t" + "pushfq\n\t" + "popq %[flags]\n\t" + "movl %%eax, %k[out]\n\t" + : [out] "=&r"(Out), [flags] "=&r"(Flags) + : [value] "r"(static_cast(Input)), + [flag_lhs] "r"(FlagLhs), [flag_rhs] "r"(FlagRhs) + : "cc", "rax"); + return {.Value = Out, .Flags = Flags}; +} + +static X86ZeroShiftHarnessResult +execRewrittenRedundantTest32(uint64_t Value, uint64_t FlagLhs, + uint64_t FlagRhs) { + uint64_t Out; + uint64_t Flags; + const uint32_t Input = static_cast(Value); + asm volatile("movq %[value], %%rax\n\t" + "testl %%eax, %%eax\n\t" + "pushfq\n\t" + "popq %[flags]\n\t" + "movl %%eax, %k[out]\n\t" + : [out] "=&r"(Out), [flags] "=&r"(Flags) + : [value] "r"(static_cast(Input)), + [flag_lhs] "r"(FlagLhs), [flag_rhs] "r"(FlagRhs) + : "cc", "rax"); + return {.Value = Out, .Flags = Flags}; +} + +static X86ZeroShiftHarnessResult +execOriginalRedundantTest8(uint64_t Value, uint64_t FlagLhs, uint64_t FlagRhs) { + uint64_t Out; + uint64_t Flags; + asm volatile("movb %[value8], %%al\n\t" + "testb %%al, %%al\n\t" + "testb %%al, %%al\n\t" + "pushfq\n\t" + "popq %[flags]\n\t" + "movzbq %%al, %[out]\n\t" + : [out] "=&r"(Out), [flags] "=&r"(Flags) + : [value8] "q"(static_cast(Value)), + [flag_lhs] "r"(FlagLhs), [flag_rhs] "r"(FlagRhs) + : "cc", "rax"); + return {.Value = Out, .Flags = Flags}; +} + +static X86ZeroShiftHarnessResult execRewrittenRedundantTest8(uint64_t Value, + uint64_t FlagLhs, + uint64_t FlagRhs) { + uint64_t Out; + uint64_t Flags; + asm volatile("movb %[value8], %%al\n\t" + "testb %%al, %%al\n\t" + "pushfq\n\t" + "popq %[flags]\n\t" + "movzbq %%al, %[out]\n\t" + : [out] "=&r"(Out), [flags] "=&r"(Flags) + : [value8] "q"(static_cast(Value)), + [flag_lhs] "r"(FlagLhs), [flag_rhs] "r"(FlagRhs) + : "cc", "rax"); + return {.Value = Out, .Flags = Flags}; +} + +static X86ZeroShiftHarnessResult execOriginalFallthroughJump(uint64_t Value, + uint64_t FlagLhs, + uint64_t FlagRhs) { + uint64_t Out; + uint64_t Flags; + asm volatile( + "cmpq %[flag_rhs], %[flag_lhs]\n\t" + "jmp 1f\n\t" + "1:\n\t" + "movq %[value], %[out]\n\t" + "pushfq\n\t" + "popq %[flags]\n\t" + : [out] "=&r"(Out), [flags] "=&r"(Flags) + : [value] "r"(Value), [flag_lhs] "r"(FlagLhs), [flag_rhs] "r"(FlagRhs) + : "cc"); + return {.Value = Out, .Flags = Flags}; +} + +static X86ZeroShiftHarnessResult +execRewrittenFallthroughJump(uint64_t Value, uint64_t FlagLhs, + uint64_t FlagRhs) { + uint64_t Out; + uint64_t Flags; + asm volatile( + "cmpq %[flag_rhs], %[flag_lhs]\n\t" + "movq %[value], %[out]\n\t" + "pushfq\n\t" + "popq %[flags]\n\t" + : [out] "=&r"(Out), [flags] "=&r"(Flags) + : [value] "r"(Value), [flag_lhs] "r"(FlagLhs), [flag_rhs] "r"(FlagRhs) + : "cc"); + return {.Value = Out, .Flags = Flags}; +} +#endif + +TEST(X86CgPeephole, FoldsSetccTestJneChain) { + CompileContext Context; + Context.initialize(); + + MModule Mod(Context); + MFunctionType *FuncType = createVoidFunctionType(Context); + Mod.addFuncType(FuncType); + + MFunction MirFunc(Context, 0); + MirFunc.setFunctionType(FuncType); + CgFunction MF(Context, MirFunc); + + CgBasicBlock *BB = MF.createCgBasicBlock(); + CgBasicBlock *TargetBB = MF.createCgBasicBlock(); + MF.appendCgBasicBlock(BB); + MF.appendCgBasicBlock(MF.createCgBasicBlock()); + MF.appendCgBasicBlock(TargetBB); + + const auto &TII = MF.getTargetInstrInfo(); + std::array CmpOps = { + CgOperand::createRegOperand(X86::RAX, false), + CgOperand::createRegOperand(X86::RBX, false), + }; + MF.createCgInstruction(*BB, TII.get(X86::CMP64rr), CmpOps); + + std::array SetccOps = { + CgOperand::createRegOperand(X86::AL, true), + CgOperand::createImmOperand(X86::COND_E), + }; + MF.createCgInstruction(*BB, TII.get(X86::SETCCr), SetccOps); + + std::array TestOps = { + CgOperand::createRegOperand(X86::AL, false), + CgOperand::createRegOperand(X86::AL, false), + }; + MF.createCgInstruction(*BB, TII.get(X86::TEST8rr), TestOps); + + std::array JccOps = { + CgOperand::createMBB(TargetBB), + CgOperand::createImmOperand(X86::COND_NE), + }; + MF.createCgInstruction(*BB, TII.get(X86::JCC_1), JccOps); + + X86CgPeephole Peephole(MF); + + ASSERT_EQ(std::distance(BB->begin(), BB->end()), 2); + auto It = BB->begin(); + EXPECT_EQ(It->getOpcode(), X86::CMP64rr); + ++It; + ASSERT_NE(It, BB->end()); + EXPECT_EQ(It->getOpcode(), X86::JCC_1); + EXPECT_EQ(It->getOperand(1).getImm(), X86::COND_E); +} + +TEST(X86CgPeephole, RemovesSelfMove64) { + CompileContext Context; + Context.initialize(); + + MModule Mod(Context); + MFunctionType *FuncType = createVoidFunctionType(Context); + Mod.addFuncType(FuncType); + + MFunction MirFunc(Context, 0); + MirFunc.setFunctionType(FuncType); + CgFunction MF(Context, MirFunc); + + CgBasicBlock *BB = MF.createCgBasicBlock(); + MF.appendCgBasicBlock(BB); + + const auto &TII = MF.getTargetInstrInfo(); + std::array MoveOps = { + CgOperand::createRegOperand(X86::RAX, true), + CgOperand::createRegOperand(X86::RAX, false), + }; + MF.createCgInstruction(*BB, TII.get(X86::MOV64rr), MoveOps); + + X86CgPeephole Peephole(MF); + + EXPECT_TRUE(BB->empty()); +} + +TEST(X86CgPeephole, KeepsSelfMove32) { + CompileContext Context; + Context.initialize(); + + MModule Mod(Context); + MFunctionType *FuncType = createVoidFunctionType(Context); + Mod.addFuncType(FuncType); + + MFunction MirFunc(Context, 0); + MirFunc.setFunctionType(FuncType); + CgFunction MF(Context, MirFunc); + + CgBasicBlock *BB = MF.createCgBasicBlock(); + MF.appendCgBasicBlock(BB); + + const auto &TII = MF.getTargetInstrInfo(); + std::array MoveOps = { + CgOperand::createRegOperand(X86::EAX, true), + CgOperand::createRegOperand(X86::EAX, false), + }; + MF.createCgInstruction(*BB, TII.get(X86::MOV32rr), MoveOps); + + X86CgPeephole Peephole(MF); + + ASSERT_EQ(std::distance(BB->begin(), BB->end()), 1); + EXPECT_EQ(BB->begin()->getOpcode(), X86::MOV32rr); +} + +TEST(X86CgPeephole, RemovesZeroShift64) { + CompileContext Context; + Context.initialize(); + + MModule Mod(Context); + MFunctionType *FuncType = createVoidFunctionType(Context); + Mod.addFuncType(FuncType); + + MFunction MirFunc(Context, 0); + MirFunc.setFunctionType(FuncType); + CgFunction MF(Context, MirFunc); + + CgBasicBlock *BB = MF.createCgBasicBlock(); + MF.appendCgBasicBlock(BB); + + const auto &TII = MF.getTargetInstrInfo(); + std::array ShiftOps = { + CgOperand::createRegOperand(X86::RAX, true), + CgOperand::createRegOperand(X86::RAX, false), + CgOperand::createImmOperand(0), + }; + MF.createCgInstruction(*BB, TII.get(X86::SHL64ri), ShiftOps); + + X86CgPeephole Peephole(MF); + + EXPECT_TRUE(BB->empty()); +} + +TEST(X86CgPeephole, KeepsZeroShift32) { + CompileContext Context; + Context.initialize(); + + MModule Mod(Context); + MFunctionType *FuncType = createVoidFunctionType(Context); + Mod.addFuncType(FuncType); + + MFunction MirFunc(Context, 0); + MirFunc.setFunctionType(FuncType); + CgFunction MF(Context, MirFunc); + + CgBasicBlock *BB = MF.createCgBasicBlock(); + MF.appendCgBasicBlock(BB); + + const auto &TII = MF.getTargetInstrInfo(); + std::array ShiftOps = { + CgOperand::createRegOperand(X86::EAX, true), + CgOperand::createRegOperand(X86::EAX, false), + CgOperand::createImmOperand(0), + }; + MF.createCgInstruction(*BB, TII.get(X86::SHL32ri), ShiftOps); + + X86CgPeephole Peephole(MF); + + ASSERT_EQ(std::distance(BB->begin(), BB->end()), 1); + EXPECT_EQ(BB->begin()->getOpcode(), X86::SHL32ri); +} + +TEST(X86CgPeephole, KeepsMixedOperandTestChain) { + CompileContext Context; + Context.initialize(); + + MModule Mod(Context); + MFunctionType *FuncType = createVoidFunctionType(Context); + Mod.addFuncType(FuncType); + + MFunction MirFunc(Context, 0); + MirFunc.setFunctionType(FuncType); + CgFunction MF(Context, MirFunc); + + CgBasicBlock *BB = MF.createCgBasicBlock(); + CgBasicBlock *TargetBB = MF.createCgBasicBlock(); + MF.appendCgBasicBlock(BB); + MF.appendCgBasicBlock(MF.createCgBasicBlock()); + MF.appendCgBasicBlock(TargetBB); + + const auto &TII = MF.getTargetInstrInfo(); + std::array CmpOps = { + CgOperand::createRegOperand(X86::RAX, false), + CgOperand::createRegOperand(X86::RBX, false), + }; + MF.createCgInstruction(*BB, TII.get(X86::CMP64rr), CmpOps); + + std::array SetccOps = { + CgOperand::createRegOperand(X86::AL, true), + CgOperand::createImmOperand(X86::COND_E), + }; + MF.createCgInstruction(*BB, TII.get(X86::SETCCr), SetccOps); + + std::array TestOps = { + CgOperand::createRegOperand(X86::AL, false), + CgOperand::createRegOperand(X86::BL, false), + }; + MF.createCgInstruction(*BB, TII.get(X86::TEST8rr), TestOps); + + std::array JccOps = { + CgOperand::createMBB(TargetBB), + CgOperand::createImmOperand(X86::COND_NE), + }; + MF.createCgInstruction(*BB, TII.get(X86::JCC_1), JccOps); + + X86CgPeephole Peephole(MF); + + EXPECT_EQ(std::distance(BB->begin(), BB->end()), 4); +} + +TEST(X86CgPeephole, FuzzFoldSetccTestJneToJccSemantics) { + const std::array CondCodes = { + X86::COND_O, X86::COND_NO, X86::COND_B, X86::COND_AE, + X86::COND_E, X86::COND_NE, X86::COND_BE, X86::COND_A, + X86::COND_S, X86::COND_NS, X86::COND_P, X86::COND_NP, + X86::COND_L, X86::COND_GE, X86::COND_LE, X86::COND_G, + }; + std::mt19937_64 Rng(0xD7A12025ULL); + + for (int64_t CondCode : CondCodes) { + for (int Iter = 0; Iter < 20000; ++Iter) { + const uint64_t Lhs = Rng(); + const uint64_t Rhs = Rng(); + const X86CmpFlags Flags = computeCmpFlags(Lhs, Rhs); + const uint8_t SetccResult = + evaluateCondCode(CondCode, Flags) ? uint8_t{1} : uint8_t{0}; + const bool OriginalBranches = SetccResult != 0; + const bool RewrittenBranches = evaluateCondCode(CondCode, Flags); + EXPECT_EQ(OriginalBranches, RewrittenBranches) + << "cond=" << CondCode << " lhs=" << Lhs << " rhs=" << Rhs; + } + } +} + +TEST(X86CgPeephole, ExecutionHarnessFoldSetccTestJneToJcc) { +#if !defined(__x86_64__) || !(defined(__GNUC__) || defined(__clang__)) + GTEST_SKIP() << "execution harness requires x86_64 GNU-style inline asm"; +#else + const std::array EdgeValues = { + 0ULL, + 1ULL, + 2ULL, + 0x7fffffffffffffffULL, + 0x8000000000000000ULL, + 0x8000000000000001ULL, + 0xffffffffffffffffULL, + 0xfffffffffffffffeULL, + 0xaaaaaaaaaaaaaaaaULL, + 0x5555555555555555ULL, + 0x00000000ffffffffULL, + 0xffffffff00000000ULL, + }; + std::mt19937_64 Rng(0xE8EC2025ULL); + + for (const auto &HarnessCase : ExecutionHarnessCases) { + for (uint64_t Lhs : EdgeValues) { + for (uint64_t Rhs : EdgeValues) { + const bool Original = HarnessCase.Original(Lhs, Rhs) != 0; + const bool Rewritten = HarnessCase.Rewritten(Lhs, Rhs) != 0; + const bool Modeled = + evaluateCondCode(HarnessCase.CondCode, computeCmpFlags(Lhs, Rhs)); + EXPECT_EQ(Original, Rewritten) + << "case=" << HarnessCase.Name << " lhs=" << Lhs << " rhs=" << Rhs; + EXPECT_EQ(Original, Modeled) + << "case=" << HarnessCase.Name << " lhs=" << Lhs << " rhs=" << Rhs; + } + } + + for (int Iter = 0; Iter < 10000; ++Iter) { + const uint64_t Lhs = Rng(); + const uint64_t Rhs = Rng(); + const bool Original = HarnessCase.Original(Lhs, Rhs) != 0; + const bool Rewritten = HarnessCase.Rewritten(Lhs, Rhs) != 0; + EXPECT_EQ(Original, Rewritten) + << "case=" << HarnessCase.Name << " lhs=" << Lhs << " rhs=" << Rhs; + } + } +#endif +} + +TEST(X86CgPeephole, ExecutionHarnessRemoveZeroShift) { +#if !defined(__x86_64__) || !(defined(__GNUC__) || defined(__clang__)) + GTEST_SKIP() << "execution harness requires x86_64 GNU-style inline asm"; +#else + const std::array EdgeValues = { + 0ULL, + 1ULL, + 2ULL, + 0x7fffffffffffffffULL, + 0x8000000000000000ULL, + 0x8000000000000001ULL, + 0xffffffffffffffffULL, + 0xfffffffffffffffeULL, + 0xaaaaaaaaaaaaaaaaULL, + 0x5555555555555555ULL, + 0x00000000ffffffffULL, + 0xffffffff00000000ULL, + }; + const std::array, 6> FlagSeeds = { + std::pair{0ULL, 0ULL}, + std::pair{0ULL, 1ULL}, + std::pair{1ULL, 0ULL}, + std::pair{0x8000000000000000ULL, 1ULL}, + std::pair{0x7fffffffffffffffULL, + 0xffffffffffffffffULL}, + std::pair{0xaaaaaaaaaaaaaaaaULL, + 0x5555555555555555ULL}, + }; + std::mt19937_64 Rng(0xA0C02026ULL); + + for (const auto &HarnessCase : ZeroShiftHarnessCases) { + for (const auto &[FlagLhs, FlagRhs] : FlagSeeds) { + for (uint64_t Value : EdgeValues) { + const auto Original = HarnessCase.Original(Value, FlagLhs, FlagRhs); + const auto Rewritten = HarnessCase.Rewritten(Value, FlagLhs, FlagRhs); + EXPECT_EQ(Original.Value & HarnessCase.ValueMask, + Rewritten.Value & HarnessCase.ValueMask) + << "case=" << HarnessCase.Name << " value=" << Value + << " flag_lhs=" << FlagLhs << " flag_rhs=" << FlagRhs; + EXPECT_EQ(Original.Flags, Rewritten.Flags) + << "case=" << HarnessCase.Name << " value=" << Value + << " flag_lhs=" << FlagLhs << " flag_rhs=" << FlagRhs; + } + } + + for (int Iter = 0; Iter < 4000; ++Iter) { + const uint64_t Value = Rng(); + const uint64_t FlagLhs = Rng(); + const uint64_t FlagRhs = Rng(); + const auto Original = HarnessCase.Original(Value, FlagLhs, FlagRhs); + const auto Rewritten = HarnessCase.Rewritten(Value, FlagLhs, FlagRhs); + EXPECT_EQ(Original.Value & HarnessCase.ValueMask, + Rewritten.Value & HarnessCase.ValueMask) + << "case=" << HarnessCase.Name << " value=" << Value + << " flag_lhs=" << FlagLhs << " flag_rhs=" << FlagRhs; + EXPECT_EQ(Original.Flags, Rewritten.Flags) + << "case=" << HarnessCase.Name << " value=" << Value + << " flag_lhs=" << FlagLhs << " flag_rhs=" << FlagRhs; + } + } +#endif +} + +TEST(X86CgPeephole, ExecutionHarnessRemoveSelfMove) { +#if !defined(__x86_64__) || !(defined(__GNUC__) || defined(__clang__)) + GTEST_SKIP() << "execution harness requires x86_64 GNU-style inline asm"; +#else + const std::array EdgeValues = { + 0ULL, + 1ULL, + 2ULL, + 0x7fffffffffffffffULL, + 0x8000000000000000ULL, + 0x8000000000000001ULL, + 0xffffffffffffffffULL, + 0xfffffffffffffffeULL, + 0xaaaaaaaaaaaaaaaaULL, + 0x5555555555555555ULL, + 0x00000000ffffffffULL, + 0xffffffff00000000ULL, + }; + const std::array, 6> FlagSeeds = { + std::pair{0ULL, 0ULL}, + std::pair{0ULL, 1ULL}, + std::pair{1ULL, 0ULL}, + std::pair{0x8000000000000000ULL, 1ULL}, + std::pair{0x7fffffffffffffffULL, + 0xffffffffffffffffULL}, + std::pair{0xaaaaaaaaaaaaaaaaULL, + 0x5555555555555555ULL}, + }; + std::mt19937_64 Rng(0x51F2026ULL); + + for (const auto &HarnessCase : SelfMoveHarnessCases) { + for (const auto &[FlagLhs, FlagRhs] : FlagSeeds) { + for (uint64_t Value : EdgeValues) { + const auto Original = HarnessCase.Original(Value, FlagLhs, FlagRhs); + const auto Rewritten = HarnessCase.Rewritten(Value, FlagLhs, FlagRhs); + EXPECT_EQ(Original.Value & HarnessCase.ValueMask, + Rewritten.Value & HarnessCase.ValueMask) + << "case=" << HarnessCase.Name << " value=" << Value + << " flag_lhs=" << FlagLhs << " flag_rhs=" << FlagRhs; + EXPECT_EQ(Original.Flags, Rewritten.Flags) + << "case=" << HarnessCase.Name << " value=" << Value + << " flag_lhs=" << FlagLhs << " flag_rhs=" << FlagRhs; + } + } + + for (int Iter = 0; Iter < 4000; ++Iter) { + const uint64_t Value = Rng(); + const uint64_t FlagLhs = Rng(); + const uint64_t FlagRhs = Rng(); + const auto Original = HarnessCase.Original(Value, FlagLhs, FlagRhs); + const auto Rewritten = HarnessCase.Rewritten(Value, FlagLhs, FlagRhs); + EXPECT_EQ(Original.Value & HarnessCase.ValueMask, + Rewritten.Value & HarnessCase.ValueMask) + << "case=" << HarnessCase.Name << " value=" << Value + << " flag_lhs=" << FlagLhs << " flag_rhs=" << FlagRhs; + EXPECT_EQ(Original.Flags, Rewritten.Flags) + << "case=" << HarnessCase.Name << " value=" << Value + << " flag_lhs=" << FlagLhs << " flag_rhs=" << FlagRhs; + } + } +#endif +} + +TEST(X86CgPeephole, ExecutionHarnessSelfMove32ChangesUpperBits) { +#if !defined(__x86_64__) || !(defined(__GNUC__) || defined(__clang__)) + GTEST_SKIP() << "execution harness requires x86_64 GNU-style inline asm"; +#else + const std::array Values = { + 0xffffffff00000000ULL, 0xffffffff00000001ULL, 0xaaaaaaaa55555555ULL, + 0x8000000000000001ULL, 0x7fffffff00000000ULL, 0x1234567800000000ULL, + }; + const std::array, 4> FlagSeeds = { + std::pair{0ULL, 0ULL}, + std::pair{0ULL, 1ULL}, + std::pair{1ULL, 0ULL}, + std::pair{0x8000000000000000ULL, 1ULL}, + }; + + for (const auto &[FlagLhs, FlagRhs] : FlagSeeds) { + for (uint64_t Value : Values) { + const auto Original = execOriginalSelfMove32(Value, FlagLhs, FlagRhs); + const auto Rewritten = execRewrittenSelfMove32(Value, FlagLhs, FlagRhs); + EXPECT_NE(Original.Value, Rewritten.Value) + << "value=" << Value << " flag_lhs=" << FlagLhs + << " flag_rhs=" << FlagRhs; + EXPECT_EQ(Original.Value, Value & 0xffffffffULL) + << "value=" << Value << " flag_lhs=" << FlagLhs + << " flag_rhs=" << FlagRhs; + EXPECT_EQ(Rewritten.Value, Value) + << "value=" << Value << " flag_lhs=" << FlagLhs + << " flag_rhs=" << FlagRhs; + EXPECT_EQ(Original.Flags, Rewritten.Flags) + << "value=" << Value << " flag_lhs=" << FlagLhs + << " flag_rhs=" << FlagRhs; + } + } +#endif +} + +TEST(X86CgPeephole, ExecutionHarnessRemoveFallthroughConditionalJump) { +#if !defined(__x86_64__) || !(defined(__GNUC__) || defined(__clang__)) + GTEST_SKIP() << "execution harness requires x86_64 GNU-style inline asm"; +#else + const std::array EdgeValues = { + 0ULL, + 1ULL, + 2ULL, + 0x7fffffffffffffffULL, + 0x8000000000000000ULL, + 0x8000000000000001ULL, + 0xffffffffffffffffULL, + 0xfffffffffffffffeULL, + 0xaaaaaaaaaaaaaaaaULL, + 0x5555555555555555ULL, + 0x00000000ffffffffULL, + 0xffffffff00000000ULL, + }; + const std::array, 6> FlagSeeds = { + std::pair{0ULL, 0ULL}, + std::pair{0ULL, 1ULL}, + std::pair{1ULL, 0ULL}, + std::pair{0x8000000000000000ULL, 1ULL}, + std::pair{0x7fffffffffffffffULL, + 0xffffffffffffffffULL}, + std::pair{0xaaaaaaaaaaaaaaaaULL, + 0x5555555555555555ULL}, + }; + std::mt19937_64 Rng(0xF4112026ULL); + + for (const auto &HarnessCase : FallthroughJccHarnessCases) { + for (const auto &[FlagLhs, FlagRhs] : FlagSeeds) { + for (uint64_t Value : EdgeValues) { + const auto Original = HarnessCase.Original(Value, FlagLhs, FlagRhs); + const auto Rewritten = HarnessCase.Rewritten(Value, FlagLhs, FlagRhs); + EXPECT_EQ(Original.Value, Rewritten.Value) + << "case=" << HarnessCase.Name << " value=" << Value + << " flag_lhs=" << FlagLhs << " flag_rhs=" << FlagRhs; + EXPECT_EQ(Original.Flags, Rewritten.Flags) + << "case=" << HarnessCase.Name << " value=" << Value + << " flag_lhs=" << FlagLhs << " flag_rhs=" << FlagRhs; + } + } + + for (int Iter = 0; Iter < 4000; ++Iter) { + const uint64_t Value = Rng(); + const uint64_t FlagLhs = Rng(); + const uint64_t FlagRhs = Rng(); + const auto Original = HarnessCase.Original(Value, FlagLhs, FlagRhs); + const auto Rewritten = HarnessCase.Rewritten(Value, FlagLhs, FlagRhs); + EXPECT_EQ(Original.Value, Rewritten.Value) + << "case=" << HarnessCase.Name << " value=" << Value + << " flag_lhs=" << FlagLhs << " flag_rhs=" << FlagRhs; + EXPECT_EQ(Original.Flags, Rewritten.Flags) + << "case=" << HarnessCase.Name << " value=" << Value + << " flag_lhs=" << FlagLhs << " flag_rhs=" << FlagRhs; + } + } +#endif +} + +TEST(X86CgPeephole, ExecutionHarnessRemoveFallthroughJump) { +#if !defined(__x86_64__) || !(defined(__GNUC__) || defined(__clang__)) + GTEST_SKIP() << "execution harness requires x86_64 GNU-style inline asm"; +#else + const std::array EdgeValues = { + 0ULL, + 1ULL, + 2ULL, + 0x7fffffffffffffffULL, + 0x8000000000000000ULL, + 0x8000000000000001ULL, + 0xffffffffffffffffULL, + 0xfffffffffffffffeULL, + 0xaaaaaaaaaaaaaaaaULL, + 0x5555555555555555ULL, + 0x00000000ffffffffULL, + 0xffffffff00000000ULL, + }; + const std::array, 6> FlagSeeds = { + std::pair{0ULL, 0ULL}, + std::pair{0ULL, 1ULL}, + std::pair{1ULL, 0ULL}, + std::pair{0x8000000000000000ULL, 1ULL}, + std::pair{0x7fffffffffffffffULL, + 0xffffffffffffffffULL}, + std::pair{0xaaaaaaaaaaaaaaaaULL, + 0x5555555555555555ULL}, + }; + std::mt19937_64 Rng(0xF4122026ULL); + + for (const auto &[FlagLhs, FlagRhs] : FlagSeeds) { + for (uint64_t Value : EdgeValues) { + const auto Original = + execOriginalFallthroughJump(Value, FlagLhs, FlagRhs); + const auto Rewritten = + execRewrittenFallthroughJump(Value, FlagLhs, FlagRhs); + EXPECT_EQ(Original.Value, Rewritten.Value) + << "value=" << Value << " flag_lhs=" << FlagLhs + << " flag_rhs=" << FlagRhs; + EXPECT_EQ(Original.Flags, Rewritten.Flags) + << "value=" << Value << " flag_lhs=" << FlagLhs + << " flag_rhs=" << FlagRhs; + } + } + + for (int Iter = 0; Iter < 4000; ++Iter) { + const uint64_t Value = Rng(); + const uint64_t FlagLhs = Rng(); + const uint64_t FlagRhs = Rng(); + const auto Original = execOriginalFallthroughJump(Value, FlagLhs, FlagRhs); + const auto Rewritten = + execRewrittenFallthroughJump(Value, FlagLhs, FlagRhs); + EXPECT_EQ(Original.Value, Rewritten.Value) + << "value=" << Value << " flag_lhs=" << FlagLhs + << " flag_rhs=" << FlagRhs; + EXPECT_EQ(Original.Flags, Rewritten.Flags) + << "value=" << Value << " flag_lhs=" << FlagLhs + << " flag_rhs=" << FlagRhs; + } +#endif +} + +TEST(X86CgPeephole, RemovesFallthroughConditionalJump) { + CompileContext Context; + Context.initialize(); + + MModule Mod(Context); + MFunctionType *FuncType = createVoidFunctionType(Context); + Mod.addFuncType(FuncType); + + MFunction MirFunc(Context, 0); + MirFunc.setFunctionType(FuncType); + CgFunction MF(Context, MirFunc); + + CgBasicBlock *BB0 = MF.createCgBasicBlock(); + CgBasicBlock *BB1 = MF.createCgBasicBlock(); + MF.appendCgBasicBlock(BB0); + MF.appendCgBasicBlock(BB1); + + const auto &TII = MF.getTargetInstrInfo(); + std::array JccOps = { + CgOperand::createMBB(BB1), + CgOperand::createImmOperand(X86::COND_NE), + }; + MF.createCgInstruction(*BB0, TII.get(X86::JCC_1), JccOps); + + X86CgPeephole Peephole(MF); + + EXPECT_TRUE(BB0->empty()); +} + +TEST(X86CgPeephole, RemovesFallthroughJump) { + CompileContext Context; + Context.initialize(); + + MModule Mod(Context); + MFunctionType *FuncType = createVoidFunctionType(Context); + Mod.addFuncType(FuncType); + + MFunction MirFunc(Context, 0); + MirFunc.setFunctionType(FuncType); + CgFunction MF(Context, MirFunc); + + CgBasicBlock *BB0 = MF.createCgBasicBlock(); + CgBasicBlock *BB1 = MF.createCgBasicBlock(); + MF.appendCgBasicBlock(BB0); + MF.appendCgBasicBlock(BB1); + + const auto &TII = MF.getTargetInstrInfo(); + std::array JmpOps = {CgOperand::createMBB(BB1)}; + MF.createCgInstruction(*BB0, TII.get(X86::JMP_1), JmpOps); + + X86CgPeephole Peephole(MF); + + EXPECT_TRUE(BB0->empty()); +} + +TEST(X86CgPeephole, RemovesRedundantTest64rr) { + CompileContext Context; + Context.initialize(); + + MModule Mod(Context); + MFunctionType *FuncType = createVoidFunctionType(Context); + Mod.addFuncType(FuncType); + + MFunction MirFunc(Context, 0); + MirFunc.setFunctionType(FuncType); + CgFunction MF(Context, MirFunc); + + CgBasicBlock *BB = MF.createCgBasicBlock(); + MF.appendCgBasicBlock(BB); + + const auto &TII = MF.getTargetInstrInfo(); + std::array TestOps1 = { + CgOperand::createRegOperand(X86::RAX, false), + CgOperand::createRegOperand(X86::RAX, false), + }; + MF.createCgInstruction(*BB, TII.get(X86::TEST64rr), TestOps1); + std::array TestOps2 = { + CgOperand::createRegOperand(X86::RAX, false), + CgOperand::createRegOperand(X86::RAX, false), + }; + MF.createCgInstruction(*BB, TII.get(X86::TEST64rr), TestOps2); + + X86CgPeephole Peephole(MF); + + ASSERT_EQ(std::distance(BB->begin(), BB->end()), 1); + EXPECT_EQ(BB->begin()->getOpcode(), X86::TEST64rr); +} + +TEST(X86CgPeephole, KeepsNonRedundantTest64rr) { + CompileContext Context; + Context.initialize(); + + MModule Mod(Context); + MFunctionType *FuncType = createVoidFunctionType(Context); + Mod.addFuncType(FuncType); + + MFunction MirFunc(Context, 0); + MirFunc.setFunctionType(FuncType); + CgFunction MF(Context, MirFunc); + + CgBasicBlock *BB = MF.createCgBasicBlock(); + MF.appendCgBasicBlock(BB); + + const auto &TII = MF.getTargetInstrInfo(); + std::array TestOps1 = { + CgOperand::createRegOperand(X86::RAX, false), + CgOperand::createRegOperand(X86::RAX, false), + }; + MF.createCgInstruction(*BB, TII.get(X86::TEST64rr), TestOps1); + std::array TestOps2 = { + CgOperand::createRegOperand(X86::RBX, false), + CgOperand::createRegOperand(X86::RBX, false), + }; + MF.createCgInstruction(*BB, TII.get(X86::TEST64rr), TestOps2); + + X86CgPeephole Peephole(MF); + + EXPECT_EQ(std::distance(BB->begin(), BB->end()), 2); +} + +TEST(X86CgPeephole, RemovesRedundantTest32rr) { + CompileContext Context; + Context.initialize(); + + MModule Mod(Context); + MFunctionType *FuncType = createVoidFunctionType(Context); + Mod.addFuncType(FuncType); + + MFunction MirFunc(Context, 0); + MirFunc.setFunctionType(FuncType); + CgFunction MF(Context, MirFunc); + + CgBasicBlock *BB = MF.createCgBasicBlock(); + MF.appendCgBasicBlock(BB); + + const auto &TII = MF.getTargetInstrInfo(); + std::array TestOps1 = { + CgOperand::createRegOperand(X86::EAX, false), + CgOperand::createRegOperand(X86::EAX, false), + }; + MF.createCgInstruction(*BB, TII.get(X86::TEST32rr), TestOps1); + std::array TestOps2 = { + CgOperand::createRegOperand(X86::EAX, false), + CgOperand::createRegOperand(X86::EAX, false), + }; + MF.createCgInstruction(*BB, TII.get(X86::TEST32rr), TestOps2); + + X86CgPeephole Peephole(MF); + + ASSERT_EQ(std::distance(BB->begin(), BB->end()), 1); + EXPECT_EQ(BB->begin()->getOpcode(), X86::TEST32rr); +} + +TEST(X86CgPeephole, KeepsNonRedundantTest32rr) { + CompileContext Context; + Context.initialize(); + + MModule Mod(Context); + MFunctionType *FuncType = createVoidFunctionType(Context); + Mod.addFuncType(FuncType); + + MFunction MirFunc(Context, 0); + MirFunc.setFunctionType(FuncType); + CgFunction MF(Context, MirFunc); + + CgBasicBlock *BB = MF.createCgBasicBlock(); + MF.appendCgBasicBlock(BB); + + const auto &TII = MF.getTargetInstrInfo(); + std::array TestOps1 = { + CgOperand::createRegOperand(X86::EAX, false), + CgOperand::createRegOperand(X86::EAX, false), + }; + MF.createCgInstruction(*BB, TII.get(X86::TEST32rr), TestOps1); + std::array TestOps2 = { + CgOperand::createRegOperand(X86::ECX, false), + CgOperand::createRegOperand(X86::ECX, false), + }; + MF.createCgInstruction(*BB, TII.get(X86::TEST32rr), TestOps2); + + X86CgPeephole Peephole(MF); + + EXPECT_EQ(std::distance(BB->begin(), BB->end()), 2); +} + +TEST(X86CgPeephole, RemovesRedundantTest8rr) { + CompileContext Context; + Context.initialize(); + + MModule Mod(Context); + MFunctionType *FuncType = createVoidFunctionType(Context); + Mod.addFuncType(FuncType); + + MFunction MirFunc(Context, 0); + MirFunc.setFunctionType(FuncType); + CgFunction MF(Context, MirFunc); + + CgBasicBlock *BB = MF.createCgBasicBlock(); + MF.appendCgBasicBlock(BB); + + const auto &TII = MF.getTargetInstrInfo(); + std::array TestOps1 = { + CgOperand::createRegOperand(X86::AL, false), + CgOperand::createRegOperand(X86::AL, false), + }; + MF.createCgInstruction(*BB, TII.get(X86::TEST8rr), TestOps1); + std::array TestOps2 = { + CgOperand::createRegOperand(X86::AL, false), + CgOperand::createRegOperand(X86::AL, false), + }; + MF.createCgInstruction(*BB, TII.get(X86::TEST8rr), TestOps2); + + X86CgPeephole Peephole(MF); + + ASSERT_EQ(std::distance(BB->begin(), BB->end()), 1); + EXPECT_EQ(BB->begin()->getOpcode(), X86::TEST8rr); +} + +TEST(X86CgPeephole, KeepsNonRedundantTest8rr) { + CompileContext Context; + Context.initialize(); + + MModule Mod(Context); + MFunctionType *FuncType = createVoidFunctionType(Context); + Mod.addFuncType(FuncType); + + MFunction MirFunc(Context, 0); + MirFunc.setFunctionType(FuncType); + CgFunction MF(Context, MirFunc); + + CgBasicBlock *BB = MF.createCgBasicBlock(); + MF.appendCgBasicBlock(BB); + + const auto &TII = MF.getTargetInstrInfo(); + std::array TestOps1 = { + CgOperand::createRegOperand(X86::AL, false), + CgOperand::createRegOperand(X86::AL, false), + }; + MF.createCgInstruction(*BB, TII.get(X86::TEST8rr), TestOps1); + std::array TestOps2 = { + CgOperand::createRegOperand(X86::BL, false), + CgOperand::createRegOperand(X86::BL, false), + }; + MF.createCgInstruction(*BB, TII.get(X86::TEST8rr), TestOps2); + + X86CgPeephole Peephole(MF); + + EXPECT_EQ(std::distance(BB->begin(), BB->end()), 2); +} + +TEST(X86CgPeephole, RemovesRedundantCmp64rr) { + CompileContext Context; + Context.initialize(); + + MModule Mod(Context); + MFunctionType *FuncType = createVoidFunctionType(Context); + Mod.addFuncType(FuncType); + + MFunction MirFunc(Context, 0); + MirFunc.setFunctionType(FuncType); + CgFunction MF(Context, MirFunc); + + CgBasicBlock *BB = MF.createCgBasicBlock(); + MF.appendCgBasicBlock(BB); + + const auto &TII = MF.getTargetInstrInfo(); + std::array CmpOps1 = { + CgOperand::createRegOperand(X86::RAX, false), + CgOperand::createRegOperand(X86::RAX, false), + }; + MF.createCgInstruction(*BB, TII.get(X86::CMP64rr), CmpOps1); + std::array CmpOps2 = { + CgOperand::createRegOperand(X86::RAX, false), + CgOperand::createRegOperand(X86::RAX, false), + }; + MF.createCgInstruction(*BB, TII.get(X86::CMP64rr), CmpOps2); + + X86CgPeephole Peephole(MF); + + ASSERT_EQ(std::distance(BB->begin(), BB->end()), 1); + EXPECT_EQ(BB->begin()->getOpcode(), X86::CMP64rr); +} + +TEST(X86CgPeephole, KeepsNonRedundantCmp64rr) { + CompileContext Context; + Context.initialize(); + + MModule Mod(Context); + MFunctionType *FuncType = createVoidFunctionType(Context); + Mod.addFuncType(FuncType); + + MFunction MirFunc(Context, 0); + MirFunc.setFunctionType(FuncType); + CgFunction MF(Context, MirFunc); + + CgBasicBlock *BB = MF.createCgBasicBlock(); + MF.appendCgBasicBlock(BB); + + const auto &TII = MF.getTargetInstrInfo(); + std::array CmpOps1 = { + CgOperand::createRegOperand(X86::RAX, false), + CgOperand::createRegOperand(X86::RAX, false), + }; + MF.createCgInstruction(*BB, TII.get(X86::CMP64rr), CmpOps1); + std::array CmpOps2 = { + CgOperand::createRegOperand(X86::RBX, false), + CgOperand::createRegOperand(X86::RAX, false), + }; + MF.createCgInstruction(*BB, TII.get(X86::CMP64rr), CmpOps2); + + X86CgPeephole Peephole(MF); + + EXPECT_EQ(std::distance(BB->begin(), BB->end()), 2); +} + +TEST(X86CgPeephole, RemovesRedundantCmp32rr) { + CompileContext Context; + Context.initialize(); + + MModule Mod(Context); + MFunctionType *FuncType = createVoidFunctionType(Context); + Mod.addFuncType(FuncType); + + MFunction MirFunc(Context, 0); + MirFunc.setFunctionType(FuncType); + CgFunction MF(Context, MirFunc); + + CgBasicBlock *BB = MF.createCgBasicBlock(); + MF.appendCgBasicBlock(BB); + + const auto &TII = MF.getTargetInstrInfo(); + std::array CmpOps1 = { + CgOperand::createRegOperand(X86::EAX, false), + CgOperand::createRegOperand(X86::EAX, false), + }; + MF.createCgInstruction(*BB, TII.get(X86::CMP32rr), CmpOps1); + std::array CmpOps2 = { + CgOperand::createRegOperand(X86::EAX, false), + CgOperand::createRegOperand(X86::EAX, false), + }; + MF.createCgInstruction(*BB, TII.get(X86::CMP32rr), CmpOps2); + + X86CgPeephole Peephole(MF); + + ASSERT_EQ(std::distance(BB->begin(), BB->end()), 1); + EXPECT_EQ(BB->begin()->getOpcode(), X86::CMP32rr); +} + +TEST(X86CgPeephole, KeepsNonRedundantCmp32rr) { + CompileContext Context; + Context.initialize(); + + MModule Mod(Context); + MFunctionType *FuncType = createVoidFunctionType(Context); + Mod.addFuncType(FuncType); + + MFunction MirFunc(Context, 0); + MirFunc.setFunctionType(FuncType); + CgFunction MF(Context, MirFunc); + + CgBasicBlock *BB = MF.createCgBasicBlock(); + MF.appendCgBasicBlock(BB); + + const auto &TII = MF.getTargetInstrInfo(); + std::array CmpOps1 = { + CgOperand::createRegOperand(X86::EAX, false), + CgOperand::createRegOperand(X86::EAX, false), + }; + MF.createCgInstruction(*BB, TII.get(X86::CMP32rr), CmpOps1); + std::array CmpOps2 = { + CgOperand::createRegOperand(X86::EBX, false), + CgOperand::createRegOperand(X86::EAX, false), + }; + MF.createCgInstruction(*BB, TII.get(X86::CMP32rr), CmpOps2); + + X86CgPeephole Peephole(MF); + + EXPECT_EQ(std::distance(BB->begin(), BB->end()), 2); +} + +TEST(X86CgPeephole, RemovesRedundantCmp8rr) { + CompileContext Context; + Context.initialize(); + + MModule Mod(Context); + MFunctionType *FuncType = createVoidFunctionType(Context); + Mod.addFuncType(FuncType); + + MFunction MirFunc(Context, 0); + MirFunc.setFunctionType(FuncType); + CgFunction MF(Context, MirFunc); + + CgBasicBlock *BB = MF.createCgBasicBlock(); + MF.appendCgBasicBlock(BB); + + const auto &TII = MF.getTargetInstrInfo(); + std::array CmpOps1 = { + CgOperand::createRegOperand(X86::AL, false), + CgOperand::createRegOperand(X86::AL, false), + }; + MF.createCgInstruction(*BB, TII.get(X86::CMP8rr), CmpOps1); + std::array CmpOps2 = { + CgOperand::createRegOperand(X86::AL, false), + CgOperand::createRegOperand(X86::AL, false), + }; + MF.createCgInstruction(*BB, TII.get(X86::CMP8rr), CmpOps2); + + X86CgPeephole Peephole(MF); + + ASSERT_EQ(std::distance(BB->begin(), BB->end()), 1); + EXPECT_EQ(BB->begin()->getOpcode(), X86::CMP8rr); +} + +TEST(X86CgPeephole, KeepsNonRedundantCmp8rr) { + CompileContext Context; + Context.initialize(); + + MModule Mod(Context); + MFunctionType *FuncType = createVoidFunctionType(Context); + Mod.addFuncType(FuncType); + + MFunction MirFunc(Context, 0); + MirFunc.setFunctionType(FuncType); + CgFunction MF(Context, MirFunc); + + CgBasicBlock *BB = MF.createCgBasicBlock(); + MF.appendCgBasicBlock(BB); + + const auto &TII = MF.getTargetInstrInfo(); + std::array CmpOps1 = { + CgOperand::createRegOperand(X86::AL, false), + CgOperand::createRegOperand(X86::AL, false), + }; + MF.createCgInstruction(*BB, TII.get(X86::CMP8rr), CmpOps1); + std::array CmpOps2 = { + CgOperand::createRegOperand(X86::BL, false), + CgOperand::createRegOperand(X86::AL, false), + }; + MF.createCgInstruction(*BB, TII.get(X86::CMP8rr), CmpOps2); + + X86CgPeephole Peephole(MF); + + EXPECT_EQ(std::distance(BB->begin(), BB->end()), 2); +} + +TEST(X86CgPeephole, RemovesRedundantCmp16rr) { + CompileContext Context; + Context.initialize(); + + MModule Mod(Context); + MFunctionType *FuncType = createVoidFunctionType(Context); + Mod.addFuncType(FuncType); + + MFunction MirFunc(Context, 0); + MirFunc.setFunctionType(FuncType); + CgFunction MF(Context, MirFunc); + + CgBasicBlock *BB = MF.createCgBasicBlock(); + MF.appendCgBasicBlock(BB); + + const auto &TII = MF.getTargetInstrInfo(); + std::array CmpOps1 = { + CgOperand::createRegOperand(X86::AX, false), + CgOperand::createRegOperand(X86::AX, false), + }; + MF.createCgInstruction(*BB, TII.get(X86::CMP16rr), CmpOps1); + std::array CmpOps2 = { + CgOperand::createRegOperand(X86::AX, false), + CgOperand::createRegOperand(X86::AX, false), + }; + MF.createCgInstruction(*BB, TII.get(X86::CMP16rr), CmpOps2); + + X86CgPeephole Peephole(MF); + + ASSERT_EQ(std::distance(BB->begin(), BB->end()), 1); + EXPECT_EQ(BB->begin()->getOpcode(), X86::CMP16rr); +} + +TEST(X86CgPeephole, KeepsNonRedundantCmp16rr) { + CompileContext Context; + Context.initialize(); + + MModule Mod(Context); + MFunctionType *FuncType = createVoidFunctionType(Context); + Mod.addFuncType(FuncType); + + MFunction MirFunc(Context, 0); + MirFunc.setFunctionType(FuncType); + CgFunction MF(Context, MirFunc); + + CgBasicBlock *BB = MF.createCgBasicBlock(); + MF.appendCgBasicBlock(BB); + + const auto &TII = MF.getTargetInstrInfo(); + std::array CmpOps1 = { + CgOperand::createRegOperand(X86::AX, false), + CgOperand::createRegOperand(X86::AX, false), + }; + MF.createCgInstruction(*BB, TII.get(X86::CMP16rr), CmpOps1); + std::array CmpOps2 = { + CgOperand::createRegOperand(X86::BX, false), + CgOperand::createRegOperand(X86::AX, false), + }; + MF.createCgInstruction(*BB, TII.get(X86::CMP16rr), CmpOps2); + + X86CgPeephole Peephole(MF); + + EXPECT_EQ(std::distance(BB->begin(), BB->end()), 2); +} + +TEST(X86CgPeephole, RemovesRedundantTest16rr) { + CompileContext Context; + Context.initialize(); + + MModule Mod(Context); + MFunctionType *FuncType = createVoidFunctionType(Context); + Mod.addFuncType(FuncType); + + MFunction MirFunc(Context, 0); + MirFunc.setFunctionType(FuncType); + CgFunction MF(Context, MirFunc); + + CgBasicBlock *BB = MF.createCgBasicBlock(); + MF.appendCgBasicBlock(BB); + + const auto &TII = MF.getTargetInstrInfo(); + std::array TestOps1 = { + CgOperand::createRegOperand(X86::AX, false), + CgOperand::createRegOperand(X86::AX, false), + }; + MF.createCgInstruction(*BB, TII.get(X86::TEST16rr), TestOps1); + std::array TestOps2 = { + CgOperand::createRegOperand(X86::AX, false), + CgOperand::createRegOperand(X86::AX, false), + }; + MF.createCgInstruction(*BB, TII.get(X86::TEST16rr), TestOps2); + + X86CgPeephole Peephole(MF); + + ASSERT_EQ(std::distance(BB->begin(), BB->end()), 1); + EXPECT_EQ(BB->begin()->getOpcode(), X86::TEST16rr); +} + +TEST(X86CgPeephole, KeepsNonRedundantTest16rr) { + CompileContext Context; + Context.initialize(); + + MModule Mod(Context); + MFunctionType *FuncType = createVoidFunctionType(Context); + Mod.addFuncType(FuncType); + + MFunction MirFunc(Context, 0); + MirFunc.setFunctionType(FuncType); + CgFunction MF(Context, MirFunc); + + CgBasicBlock *BB = MF.createCgBasicBlock(); + MF.appendCgBasicBlock(BB); + + const auto &TII = MF.getTargetInstrInfo(); + std::array TestOps1 = { + CgOperand::createRegOperand(X86::AX, false), + CgOperand::createRegOperand(X86::AX, false), + }; + MF.createCgInstruction(*BB, TII.get(X86::TEST16rr), TestOps1); + std::array TestOps2 = { + CgOperand::createRegOperand(X86::BX, false), + CgOperand::createRegOperand(X86::BX, false), + }; + MF.createCgInstruction(*BB, TII.get(X86::TEST16rr), TestOps2); + + X86CgPeephole Peephole(MF); + + EXPECT_EQ(std::distance(BB->begin(), BB->end()), 2); +} + +TEST(X86CgPeephole, ExecutionHarnessRemoveRedundantTest64rr) { +#if !defined(__x86_64__) || !(defined(__GNUC__) || defined(__clang__)) + GTEST_SKIP() << "execution harness requires x86_64 GNU-style inline asm"; +#else + const std::array EdgeValues = { + 0ULL, + 1ULL, + 0x7fffffffffffffffULL, + 0x8000000000000000ULL, + 0xffffffffffffffffULL, + 0xaaaaaaaaaaaaaaaaULL, + 0x5555555555555555ULL, + 0x00000000ffffffffULL, + }; + const std::array, 4> FlagSeeds = { + std::pair{0ULL, 0ULL}, + std::pair{0ULL, 1ULL}, + std::pair{1ULL, 0ULL}, + std::pair{0x8000000000000000ULL, 1ULL}, + }; + std::mt19937_64 Rng(0xBB112026ULL); + + for (const auto &[FlagLhs, FlagRhs] : FlagSeeds) { + for (uint64_t Value : EdgeValues) { + const auto Original = + execOriginalRedundantTest64(Value, FlagLhs, FlagRhs); + const auto Rewritten = + execRewrittenRedundantTest64(Value, FlagLhs, FlagRhs); + EXPECT_EQ(Original.Value, Rewritten.Value) + << "value=" << Value << " flag_lhs=" << FlagLhs + << " flag_rhs=" << FlagRhs; + EXPECT_EQ(Original.Flags, Rewritten.Flags) + << "value=" << Value << " flag_lhs=" << FlagLhs + << " flag_rhs=" << FlagRhs; + } + } + for (int Iter = 0; Iter < 4000; ++Iter) { + const uint64_t Value = Rng(); + const uint64_t FlagLhs = Rng(); + const uint64_t FlagRhs = Rng(); + const auto Original = execOriginalRedundantTest64(Value, FlagLhs, FlagRhs); + const auto Rewritten = + execRewrittenRedundantTest64(Value, FlagLhs, FlagRhs); + EXPECT_EQ(Original.Value, Rewritten.Value) + << "value=" << Value << " flag_lhs=" << FlagLhs + << " flag_rhs=" << FlagRhs; + EXPECT_EQ(Original.Flags, Rewritten.Flags) + << "value=" << Value << " flag_lhs=" << FlagLhs + << " flag_rhs=" << FlagRhs; + } +#endif +} + +TEST(X86CgPeephole, ExecutionHarnessRemoveRedundantTest32rr) { +#if !defined(__x86_64__) || !(defined(__GNUC__) || defined(__clang__)) + GTEST_SKIP() << "execution harness requires x86_64 GNU-style inline asm"; +#else + const std::array EdgeValues = { + 0UL, 1UL, 0x7fffffffUL, 0x80000000UL, + 0xffffffffUL, 0xaaaaaaaaUL, 0x55555555UL, 0x0000ffffUL, + }; + const std::array, 4> FlagSeeds = { + std::pair{0ULL, 0ULL}, + std::pair{0ULL, 1ULL}, + std::pair{1ULL, 0ULL}, + std::pair{0x8000000000000000ULL, 1ULL}, + }; + std::mt19937_64 Rng(0xCC122026ULL); + + for (const auto &[FlagLhs, FlagRhs] : FlagSeeds) { + for (uint32_t Value : EdgeValues) { + const auto Original = + execOriginalRedundantTest32(Value, FlagLhs, FlagRhs); + const auto Rewritten = + execRewrittenRedundantTest32(Value, FlagLhs, FlagRhs); + EXPECT_EQ(Original.Value, Rewritten.Value) + << "value=" << Value << " flag_lhs=" << FlagLhs + << " flag_rhs=" << FlagRhs; + EXPECT_EQ(Original.Flags, Rewritten.Flags) + << "value=" << Value << " flag_lhs=" << FlagLhs + << " flag_rhs=" << FlagRhs; + } + } + for (int Iter = 0; Iter < 4000; ++Iter) { + const uint32_t Value = static_cast(Rng()); + const uint64_t FlagLhs = Rng(); + const uint64_t FlagRhs = Rng(); + const auto Original = execOriginalRedundantTest32(Value, FlagLhs, FlagRhs); + const auto Rewritten = + execRewrittenRedundantTest32(Value, FlagLhs, FlagRhs); + EXPECT_EQ(Original.Value, Rewritten.Value) + << "value=" << Value << " flag_lhs=" << FlagLhs + << " flag_rhs=" << FlagRhs; + EXPECT_EQ(Original.Flags, Rewritten.Flags) + << "value=" << Value << " flag_lhs=" << FlagLhs + << " flag_rhs=" << FlagRhs; + } +#endif +} + +TEST(X86CgPeephole, ExecutionHarnessRemoveRedundantTestrr) { +#if !defined(__x86_64__) || !(defined(__GNUC__) || defined(__clang__)) + GTEST_SKIP() << "execution harness requires x86_64 GNU-style inline asm"; +#else + const std::array EdgeValues = { + 0, 1, 0x7f, 0x80, 0xff, 0xaa, + }; + const std::array, 4> FlagSeeds = { + std::pair{0ULL, 0ULL}, + std::pair{0ULL, 1ULL}, + std::pair{1ULL, 0ULL}, + std::pair{0x8000000000000000ULL, 1ULL}, + }; + std::mt19937_64 Rng(0xDD132026ULL); + + for (const auto &[FlagLhs, FlagRhs] : FlagSeeds) { + for (uint8_t Value : EdgeValues) { + const auto Original = execOriginalRedundantTest8(Value, FlagLhs, FlagRhs); + const auto Rewritten = + execRewrittenRedundantTest8(Value, FlagLhs, FlagRhs); + EXPECT_EQ(Original.Value, Rewritten.Value) + << "value=" << static_cast(Value) << " flag_lhs=" << FlagLhs + << " flag_rhs=" << FlagRhs; + EXPECT_EQ(Original.Flags, Rewritten.Flags) + << "value=" << static_cast(Value) << " flag_lhs=" << FlagLhs + << " flag_rhs=" << FlagRhs; + } + } + for (int Iter = 0; Iter < 4000; ++Iter) { + const uint8_t Value = static_cast(Rng()); + const uint64_t FlagLhs = Rng(); + const uint64_t FlagRhs = Rng(); + const auto Original = execOriginalRedundantTest8(Value, FlagLhs, FlagRhs); + const auto Rewritten = execRewrittenRedundantTest8(Value, FlagLhs, FlagRhs); + EXPECT_EQ(Original.Value, Rewritten.Value) + << "value=" << static_cast(Value) << " flag_lhs=" << FlagLhs + << " flag_rhs=" << FlagRhs; + EXPECT_EQ(Original.Flags, Rewritten.Flags) + << "value=" << static_cast(Value) << " flag_lhs=" << FlagLhs + << " flag_rhs=" << FlagRhs; + } +#endif +} + +#if defined(__x86_64__) && (defined(__GNUC__) || defined(__clang__)) +static uint64_t execOriginalMovzxSubreg(uint64_t Input) { + uint64_t Out; + uint8_t In8 = static_cast(Input); + asm volatile("movzbl %[in], %%eax\n\t" + "movq %%rax, %[out]\n\t" + : [out] "=r"(Out) + : [in] "q"(In8) + : "rax"); + return Out; +} + +static uint64_t execRewrittenMovzxSubreg(uint64_t Input) { + uint64_t Out; + uint8_t In8 = static_cast(Input); + asm volatile("movzbq %[in], %%rax\n\t" + "movq %%rax, %[out]\n\t" + : [out] "=r"(Out) + : [in] "q"(In8) + : "rax"); + return Out; +} +#endif + +TEST(X86CgPeephole, FoldsMovzxSubregToReg) { + CompileContext Context; + Context.initialize(); + + MModule Mod(Context); + MFunctionType *FuncType = createVoidFunctionType(Context); + Mod.addFuncType(FuncType); + + MFunction MirFunc(Context, 0); + MirFunc.setFunctionType(FuncType); + CgFunction MF(Context, MirFunc); + + CgBasicBlock *BB = MF.createCgBasicBlock(); + MF.appendCgBasicBlock(BB); + + const auto &TII = MF.getTargetInstrInfo(); + std::array MovzxOps = { + CgOperand::createRegOperand(X86::EAX, true), + CgOperand::createRegOperand(X86::AL, false), + }; + MF.createCgInstruction(*BB, TII.get(X86::MOVZX32rr8), MovzxOps); + + std::array SubregOps = { + CgOperand::createRegOperand(X86::RAX, true), + CgOperand::createImmOperand(0), + CgOperand::createRegOperand(X86::EAX, false), + CgOperand::createImmOperand(6), // sub_32bit + }; + MF.createCgInstruction(*BB, TII.get(TargetOpcode::SUBREG_TO_REG), SubregOps); + + X86CgPeephole Peephole(MF); + + ASSERT_EQ(std::distance(BB->begin(), BB->end()), 1); + auto It = BB->begin(); + EXPECT_EQ(It->getOpcode(), X86::MOVZX64rr8); + EXPECT_EQ(It->getOperand(0).getReg(), X86::RAX); +} + +TEST(X86CgPeephole, KeepsMovzxSubregToRegWhenMismatch) { + CompileContext Context; + Context.initialize(); + + MModule Mod(Context); + MFunctionType *FuncType = createVoidFunctionType(Context); + Mod.addFuncType(FuncType); + + MFunction MirFunc(Context, 0); + MirFunc.setFunctionType(FuncType); + CgFunction MF(Context, MirFunc); + + CgBasicBlock *BB = MF.createCgBasicBlock(); + MF.appendCgBasicBlock(BB); + + const auto &TII = MF.getTargetInstrInfo(); + // MOVZX32rr8 defines EAX, but SUBREG_TO_REG uses EBX - mismatch, no fold. + std::array MovzxOps = { + CgOperand::createRegOperand(X86::EAX, true), + CgOperand::createRegOperand(X86::AL, false), + }; + MF.createCgInstruction(*BB, TII.get(X86::MOVZX32rr8), MovzxOps); + + std::array SubregOps = { + CgOperand::createRegOperand(X86::RBX, true), + CgOperand::createImmOperand(0), + CgOperand::createRegOperand(X86::EBX, false), + CgOperand::createImmOperand(6), // sub_32bit + }; + MF.createCgInstruction(*BB, TII.get(TargetOpcode::SUBREG_TO_REG), SubregOps); + + X86CgPeephole Peephole(MF); + + EXPECT_EQ(std::distance(BB->begin(), BB->end()), 2); +} + +TEST(X86CgPeephole, ExecutionHarnessFoldMovzxSubregToReg) { +#if !defined(__x86_64__) || !(defined(__GNUC__) || defined(__clang__)) + GTEST_SKIP() << "execution harness requires x86_64 GNU-style inline asm"; +#else + const std::array EdgeValues = {0, 1, 0x7f, 0x80, 0xff, 0xaa}; + std::mt19937_64 Rng(0xEE442026ULL); + + for (uint8_t Value : EdgeValues) { + EXPECT_EQ(execOriginalMovzxSubreg(Value), execRewrittenMovzxSubreg(Value)) + << "value=" << static_cast(Value); + } + for (int Iter = 0; Iter < 16; ++Iter) { + const uint8_t Value = static_cast(Rng()); + EXPECT_EQ(execOriginalMovzxSubreg(Value), execRewrittenMovzxSubreg(Value)) + << "value=" << static_cast(Value); + } +#endif +} + +} // namespace diff --git a/tests/evm_asm/bool_and_or_xor_not.easm b/tests/evm_asm/bool_and_or_xor_not.easm new file mode 100644 index 000000000..330f0e175 --- /dev/null +++ b/tests/evm_asm/bool_and_or_xor_not.easm @@ -0,0 +1,14 @@ +// Boolean chain: NOT(XOR(OR(AND(0xFF, 0x0F), 0xF0), 0x55)) = 0xFF...FF55 +PUSH1 0xFF +PUSH1 0x0F +AND +PUSH1 0xF0 +OR +PUSH1 0x55 +XOR +NOT +PUSH1 0x00 +MSTORE +PUSH1 0x20 +PUSH1 0x00 +RETURN diff --git a/tests/evm_asm/bool_and_or_xor_not.expected b/tests/evm_asm/bool_and_or_xor_not.expected new file mode 100644 index 000000000..9b3d2ca77 --- /dev/null +++ b/tests/evm_asm/bool_and_or_xor_not.expected @@ -0,0 +1,8 @@ +status: success +error_code: 0 +stack: [] +memory: 'FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF55' +storage: {} +transient_storage: {} +return: 'FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF55' +events: [] diff --git a/tests/evm_asm/bool_xor_not_chain.easm b/tests/evm_asm/bool_xor_not_chain.easm new file mode 100644 index 000000000..463e829a9 --- /dev/null +++ b/tests/evm_asm/bool_xor_not_chain.easm @@ -0,0 +1,11 @@ +// Boolean chain: NOT(XOR(NOT(0xAA), 0x55)) = 0x55 +PUSH1 0xAA +NOT +PUSH1 0x55 +XOR +NOT +PUSH1 0x00 +MSTORE +PUSH1 0x20 +PUSH1 0x00 +RETURN diff --git a/tests/evm_asm/bool_xor_not_chain.expected b/tests/evm_asm/bool_xor_not_chain.expected new file mode 100644 index 000000000..3edad12fb --- /dev/null +++ b/tests/evm_asm/bool_xor_not_chain.expected @@ -0,0 +1,8 @@ +status: success +error_code: 0 +stack: [] +memory: '00000000000000000000000000000000000000000000000000000000000000FF' +storage: {} +transient_storage: {} +return: '00000000000000000000000000000000000000000000000000000000000000FF' +events: [] diff --git a/tests/evm_asm/compiler_pass_timing_budget_dmir_rewrite.json b/tests/evm_asm/compiler_pass_timing_budget_dmir_rewrite.json new file mode 100644 index 000000000..e0ee63d79 --- /dev/null +++ b/tests/evm_asm/compiler_pass_timing_budget_dmir_rewrite.json @@ -0,0 +1,41 @@ +{ + "version": 1, + "target_pass": "dmir_rewrite", + "thresholds": { + "max_pass_share_p95_pct": 1.25, + "max_pass_time_p95_ms": 0.028, + "max_overall_total_time_regression_pct": 5.0, + "max_case_total_time_regression_pct": 20.0 + }, + "baseline": { + "overall_total_time_ms_median": 0.85175, + "case_total_time_ms_median": { + "add": 0.864481, + "mul": 0.903338, + "div": 0.854079, + "shl": 0.841816, + "shr": 0.820848, + "sar": 0.827417, + "byte": 0.880214, + "eq_true": 0.844935, + "lt_true": 0.870578, + "jump": 0.873635, + "u256_shl_add_mul": 0.839147, + "u256_mul_add_chain": 0.861729, + "u256_shr_add_shl": 0.844389, + "bool_and_or_xor_not": 0.848925, + "bool_xor_not_chain": 0.847343 + } + }, + "metadata": { + "manifest": "tests/evm_asm/compiler_pass_timing_manifest.json", + "runs": 5, + "num_extra_compilations": 4, + "rule_count": 70, + "compile_mode": "compile-only", + "thresholds_status": "provisional", + "measured_p95_ms": 0.013796, + "measured_p95_share_pct": 0.5947, + "threshold_multiplier": 2.0 + } +} diff --git a/tests/evm_asm/compiler_pass_timing_budget_x86_cg_peephole.json b/tests/evm_asm/compiler_pass_timing_budget_x86_cg_peephole.json new file mode 100644 index 000000000..f8d750257 --- /dev/null +++ b/tests/evm_asm/compiler_pass_timing_budget_x86_cg_peephole.json @@ -0,0 +1,38 @@ +{ + "version": 1, + "target_pass": "x86_cg_peephole", + "thresholds": { + "max_overall_total_time_regression_pct": 5.0, + "max_case_total_time_regression_pct": 20.0, + "max_pass_share_p95_pct": 2.0, + "max_pass_time_p95_ms": 0.06 + }, + "baseline": { + "overall_total_time_ms_median": 0.815081, + "case_total_time_ms_median": { + "add": 0.90462, + "mul": 0.816997, + "div": 0.768571, + "shl": 0.758534, + "shr": 0.770613, + "sar": 0.750282, + "byte": 0.794542, + "eq_true": 0.839906, + "lt_true": 0.752717, + "jump": 0.854454, + "u256_shl_add_mul": 0.880501, + "u256_mul_add_chain": 0.818078, + "u256_shr_add_shl": 0.849664, + "bool_and_or_xor_not": 0.869076, + "bool_xor_not_chain": 0.823643 + } + }, + "metadata": { + "manifest": "tests/evm_asm/compiler_pass_timing_manifest.json", + "runs": 5, + "num_extra_compilations": 4, + "compile_mode": "compile-only", + "rule_count": 8, + "thresholds_status": "provisional" + } +} diff --git a/tests/evm_asm/compiler_pass_timing_manifest.json b/tests/evm_asm/compiler_pass_timing_manifest.json new file mode 100644 index 000000000..7e45d4865 --- /dev/null +++ b/tests/evm_asm/compiler_pass_timing_manifest.json @@ -0,0 +1,65 @@ +{ + "version": 1, + "cases": [ + { + "name": "add", + "input": "add.evm.hex" + }, + { + "name": "mul", + "input": "mul.evm.hex" + }, + { + "name": "div", + "input": "div.evm.hex" + }, + { + "name": "shl", + "input": "shl.evm.hex" + }, + { + "name": "shr", + "input": "shr.evm.hex" + }, + { + "name": "sar", + "input": "sar.evm.hex" + }, + { + "name": "byte", + "input": "byte.evm.hex" + }, + { + "name": "eq_true", + "input": "eq_true.evm.hex" + }, + { + "name": "lt_true", + "input": "lt_true.evm.hex" + }, + { + "name": "jump", + "input": "jump.evm.hex" + }, + { + "name": "u256_shl_add_mul", + "input": "u256_shl_add_mul.evm.hex" + }, + { + "name": "u256_mul_add_chain", + "input": "u256_mul_add_chain.evm.hex" + }, + { + "name": "u256_shr_add_shl", + "input": "u256_shr_add_shl.evm.hex" + }, + { + "name": "bool_and_or_xor_not", + "input": "bool_and_or_xor_not.evm.hex" + }, + { + "name": "bool_xor_not_chain", + "input": "bool_xor_not_chain.evm.hex" + } + ] +} diff --git a/tests/evm_asm/sar.easm b/tests/evm_asm/sar.easm new file mode 100644 index 000000000..688bc0744 --- /dev/null +++ b/tests/evm_asm/sar.easm @@ -0,0 +1,9 @@ +// SAR(shift=2, value=-8) = -2 (as U256: 0xFFF...FFE) +PUSH32 0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF8 +PUSH1 0x02 +SAR +PUSH1 0x00 +MSTORE +PUSH1 0x20 +PUSH1 0x00 +RETURN diff --git a/tests/evm_asm/sar.expected b/tests/evm_asm/sar.expected new file mode 100644 index 000000000..ebacdd6cf --- /dev/null +++ b/tests/evm_asm/sar.expected @@ -0,0 +1,8 @@ +status: success +error_code: 0 +stack: [] +memory: 'FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFE' +storage: {} +transient_storage: {} +return: 'FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFE' +events: [] diff --git a/tests/evm_asm/shl.easm b/tests/evm_asm/shl.easm new file mode 100644 index 000000000..959206447 --- /dev/null +++ b/tests/evm_asm/shl.easm @@ -0,0 +1,9 @@ +// SHL(shift=3, value=1) = 8 +PUSH1 0x01 +PUSH1 0x03 +SHL +PUSH1 0x00 +MSTORE +PUSH1 0x20 +PUSH1 0x00 +RETURN diff --git a/tests/evm_asm/shl.expected b/tests/evm_asm/shl.expected new file mode 100644 index 000000000..5fea867ed --- /dev/null +++ b/tests/evm_asm/shl.expected @@ -0,0 +1,8 @@ +status: success +error_code: 0 +stack: [] +memory: '0000000000000000000000000000000000000000000000000000000000000008' +storage: {} +transient_storage: {} +return: '0000000000000000000000000000000000000000000000000000000000000008' +events: [] diff --git a/tests/evm_asm/shr.easm b/tests/evm_asm/shr.easm new file mode 100644 index 000000000..676995db5 --- /dev/null +++ b/tests/evm_asm/shr.easm @@ -0,0 +1,9 @@ +// SHR(shift=8, value=2) = 0 +PUSH1 0x02 +PUSH1 0x08 +SHR +PUSH1 0x00 +MSTORE +PUSH1 0x20 +PUSH1 0x00 +RETURN diff --git a/tests/evm_asm/shr.expected b/tests/evm_asm/shr.expected new file mode 100644 index 000000000..481e245e2 --- /dev/null +++ b/tests/evm_asm/shr.expected @@ -0,0 +1,8 @@ +status: success +error_code: 0 +stack: [] +memory: '0000000000000000000000000000000000000000000000000000000000000000' +storage: {} +transient_storage: {} +return: '0000000000000000000000000000000000000000000000000000000000000000' +events: [] diff --git a/tests/evm_asm/u256_mul_add_chain.easm b/tests/evm_asm/u256_mul_add_chain.easm new file mode 100644 index 000000000..e22058292 --- /dev/null +++ b/tests/evm_asm/u256_mul_add_chain.easm @@ -0,0 +1,13 @@ +// U256 chain: (2 MUL 3) MUL 4 ADD 8 = 32 +PUSH1 0x03 +PUSH1 0x02 +MUL +PUSH1 0x04 +MUL +PUSH1 0x08 +ADD +PUSH1 0x00 +MSTORE +PUSH1 0x20 +PUSH1 0x00 +RETURN diff --git a/tests/evm_asm/u256_mul_add_chain.expected b/tests/evm_asm/u256_mul_add_chain.expected new file mode 100644 index 000000000..c667a6ef3 --- /dev/null +++ b/tests/evm_asm/u256_mul_add_chain.expected @@ -0,0 +1,8 @@ +status: success +error_code: 0 +stack: [] +memory: '0000000000000000000000000000000000000000000000000000000000000020' +storage: {} +transient_storage: {} +return: '0000000000000000000000000000000000000000000000000000000000000020' +events: [] diff --git a/tests/evm_asm/u256_shl_add_mul.easm b/tests/evm_asm/u256_shl_add_mul.easm new file mode 100644 index 000000000..ac8650751 --- /dev/null +++ b/tests/evm_asm/u256_shl_add_mul.easm @@ -0,0 +1,13 @@ +// U256 arithmetic chain: ((1 SHL 2) ADD 3) MUL 4 = 28 +PUSH1 0x02 +PUSH1 0x01 +SHL +PUSH1 0x03 +ADD +PUSH1 0x04 +MUL +PUSH1 0x00 +MSTORE +PUSH1 0x20 +PUSH1 0x00 +RETURN diff --git a/tests/evm_asm/u256_shl_add_mul.expected b/tests/evm_asm/u256_shl_add_mul.expected new file mode 100644 index 000000000..cb4508831 --- /dev/null +++ b/tests/evm_asm/u256_shl_add_mul.expected @@ -0,0 +1,8 @@ +status: success +error_code: 0 +stack: [] +memory: '000000000000000000000000000000000000000000000000000000000000001C' +storage: {} +transient_storage: {} +return: '000000000000000000000000000000000000000000000000000000000000001C' +events: [] diff --git a/tests/evm_asm/u256_shr_add_shl.easm b/tests/evm_asm/u256_shr_add_shl.easm new file mode 100644 index 000000000..eda45e6b7 --- /dev/null +++ b/tests/evm_asm/u256_shr_add_shl.easm @@ -0,0 +1,15 @@ +// U256 shift chain: ((8 SHR 2) ADD 3) SHL 1 = 10 +// SWAP1 before SHL reorders stack so value (5) is the shift target +PUSH1 0x02 +PUSH1 0x08 +SHR +PUSH1 0x03 +ADD +PUSH1 0x01 +SWAP1 +SHL +PUSH1 0x00 +MSTORE +PUSH1 0x20 +PUSH1 0x00 +RETURN diff --git a/tests/evm_asm/u256_shr_add_shl.expected b/tests/evm_asm/u256_shr_add_shl.expected new file mode 100644 index 000000000..5fea867ed --- /dev/null +++ b/tests/evm_asm/u256_shr_add_shl.expected @@ -0,0 +1,8 @@ +status: success +error_code: 0 +stack: [] +memory: '0000000000000000000000000000000000000000000000000000000000000008' +storage: {} +transient_storage: {} +return: '0000000000000000000000000000000000000000000000000000000000000008' +events: [] diff --git a/tools/check_compiler_pass_timing_budget.py b/tools/check_compiler_pass_timing_budget.py new file mode 100644 index 000000000..bf8c354e6 --- /dev/null +++ b/tools/check_compiler_pass_timing_budget.py @@ -0,0 +1,122 @@ +#!/usr/bin/env python3 + +import argparse +import json +import pathlib +import sys + + +def parse_args(): + parser = argparse.ArgumentParser( + description="Validate compiler pass timing output against a budget file." + ) + parser.add_argument("--budget", required=True, help="Budget JSON path") + parser.add_argument("--report", required=True, help="Timing report JSON path") + parser.add_argument( + "--allow-missing-cases", + action="store_true", + help="Skip case-level checks when a baseline case is absent in the report", + ) + return parser.parse_args() + + +def load_json(path): + with pathlib.Path(path).open("r", encoding="utf-8") as f: + return json.load(f) + + +def percent_regression(current, baseline): + if baseline <= 0: + return 0.0 if current <= 0 else float("inf") + return (current - baseline) * 100.0 / baseline + + +def get_report_scope(report): + if "overall" in report: + return report["overall"], { + case["name"]: case["summary"] for case in report.get("cases", []) + } + return report, {} + + +def get_threshold(thresholds, new_key, old_key): + if new_key in thresholds: + return thresholds[new_key] + return thresholds[old_key] + + +def main(): + args = parse_args() + budget = load_json(args.budget) + report = load_json(args.report) + + summary, case_summaries = get_report_scope(report) + target_pass = budget["target_pass"] + thresholds = budget["thresholds"] + baseline = budget.get("baseline", {}) + errors = [] + + pass_summary = summary.get("phases", {}).get(target_pass) + if pass_summary is None: + errors.append(f"report is missing target pass '{target_pass}'") + else: + observed_share = pass_summary["share_of_total_pct"].get( + "p95", pass_summary["share_of_total_pct"]["max"] + ) + max_share = get_threshold( + thresholds, "max_pass_share_p95_pct", "max_pass_share_of_total_pct" + ) + if observed_share > max_share: + errors.append( + f"{target_pass} share p95 {observed_share:.6f}% exceeds budget " + f"{max_share:.6f}%" + ) + + observed_time = pass_summary.get("p95", pass_summary["max"]) + max_time = get_threshold( + thresholds, "max_pass_time_p95_ms", "max_pass_time_ms" + ) + if observed_time > max_time: + errors.append( + f"{target_pass} p95 time {observed_time:.6f} ms exceeds budget " + f"{max_time:.6f} ms" + ) + + baseline_overall = baseline.get("overall_total_time_ms_median") + if baseline_overall is not None: + observed_overall = summary["total_time_ms"]["median"] + regression = percent_regression(observed_overall, baseline_overall) + max_regression = thresholds["max_overall_total_time_regression_pct"] + if regression > max_regression: + errors.append( + "overall median compile time regression " + f"{regression:.6f}% exceeds budget {max_regression:.6f}%" + ) + + max_case_regression = thresholds.get("max_case_total_time_regression_pct") + for case_name, baseline_value in baseline.get("case_total_time_ms_median", {}).items(): + current_case = case_summaries.get(case_name) + if current_case is None: + if not args.allow_missing_cases: + errors.append(f"report is missing baseline case '{case_name}'") + continue + regression = percent_regression( + current_case["total_time_ms"]["median"], baseline_value + ) + if regression > max_case_regression: + errors.append( + f"case '{case_name}' median compile time regression {regression:.6f}% " + f"exceeds budget {max_case_regression:.6f}%" + ) + + if errors: + for error in errors: + print(error, file=sys.stderr) + return 1 + + print("compiler pass timing budget check passed") + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/tools/check_dmir_rewrite_rules.py b/tools/check_dmir_rewrite_rules.py new file mode 100644 index 000000000..c1e1c1100 --- /dev/null +++ b/tools/check_dmir_rewrite_rules.py @@ -0,0 +1,193 @@ +#!/usr/bin/env python3 + +import argparse +import json +import pathlib +import subprocess +import sys + +from mine_dmir_seed_rules import build_candidate_key, parse_expr + + +ALLOWED_RULE_STATUSES = { + "seed", + "candidate", + "accepted", +} + +ALLOWED_VALIDATION_MODES = { + "interpreter_sample", + "interpreter_fuzz", + "smt", +} + +COST_FIELDS = ( + "dmir_inst", + "select_depth", + "adc_chain", + "runtime_calls", +) + + +def parse_args(): + parser = argparse.ArgumentParser( + description="Validate dMIR rewrite candidate metadata." + ) + parser.add_argument("--rules", required=True, help="Path to the rule JSON file") + parser.add_argument( + "--gtest-binary", + help="Optional gtest binary used to verify coverage entries exist", + ) + return parser.parse_args() + + +def load_rules(path): + with pathlib.Path(path).open("r", encoding="utf-8") as f: + return json.load(f) + + +def load_gtest_names(path): + proc = subprocess.run( + [str(pathlib.Path(path).resolve()), "--gtest_list_tests"], + capture_output=True, + text=True, + check=False, + ) + if proc.returncode != 0: + raise RuntimeError(f"failed to list gtests from {path}") + + names = set() + suite_name = None + for line in proc.stdout.splitlines(): + if not line.strip(): + continue + if not line.startswith(" "): + suite_name = line.strip().rstrip(".") + continue + if suite_name is None: + continue + test_name = line.strip().split()[0] + test_name = test_name.split("#", 1)[0] + names.add(f"{suite_name}.{test_name}") + return names + + +def validate_cost(name, cost, errors): + if not isinstance(cost, dict): + errors.append(f"rule '{name}' has invalid cost metadata") + return + + for section in ("lhs", "rhs", "delta"): + section_cost = cost.get(section) + if not isinstance(section_cost, dict): + errors.append(f"rule '{name}' is missing cost section '{section}'") + continue + for field in COST_FIELDS: + value = section_cost.get(field) + if not isinstance(value, int): + errors.append( + f"rule '{name}' has non-integer cost field '{section}.{field}'" + ) + + +def main(): + args = parse_args() + data = load_rules(args.rules) + errors = [] + seen_names = set() + seen_rule_keys = {} + mode_counts = {mode: 0 for mode in ALLOWED_VALIDATION_MODES} + gtest_names = load_gtest_names(args.gtest_binary) if args.gtest_binary else None + + for rule in data.get("rules", []): + name = rule.get("name", "") + if name in seen_names: + errors.append(f"duplicate dMIR rule name '{name}'") + continue + seen_names.add(name) + + status = rule.get("status") + if status not in ALLOWED_RULE_STATUSES: + errors.append(f"rule '{name}' has invalid status '{status}'") + + inputs = rule.get("inputs") + if not isinstance(inputs, list) or not inputs or any( + not isinstance(item, str) or not item.strip() for item in inputs + ): + errors.append(f"rule '{name}' has invalid inputs metadata") + elif len(set(inputs)) != len(inputs): + errors.append(f"rule '{name}' repeats input bindings") + + for field in ("lhs", "rhs"): + value = rule.get(field) + if not isinstance(value, str) or not value.strip(): + errors.append(f"rule '{name}' is missing '{field}'") + + lhs = rule.get("lhs") + rhs = rule.get("rhs") + if isinstance(lhs, str) and lhs.strip() and isinstance(rhs, str) and rhs.strip(): + try: + canonical_key = build_candidate_key(parse_expr(lhs), parse_expr(rhs)) + except ValueError as exc: + errors.append(f"rule '{name}' has invalid expression syntax: {exc}") + else: + existing_name = seen_rule_keys.get(canonical_key) + if existing_name is not None: + errors.append( + "rule " + f"'{name}' duplicates canonical rewrite '{existing_name}'" + ) + else: + seen_rule_keys[canonical_key] = name + + validate_cost(name, rule.get("cost"), errors) + + validation = rule.get("validation") + if not isinstance(validation, dict): + errors.append(f"rule '{name}' is missing validation metadata") + continue + + modes = validation.get("modes") + if not isinstance(modes, list) or not modes: + errors.append(f"rule '{name}' has no validation modes") + else: + has_semantic_mode = False + for mode in modes: + if mode not in ALLOWED_VALIDATION_MODES: + errors.append( + f"rule '{name}' uses unknown validation mode '{mode}'" + ) + continue + mode_counts[mode] += 1 + if mode in {"interpreter_fuzz", "smt"}: + has_semantic_mode = True + if not has_semantic_mode: + errors.append( + f"rule '{name}' needs interpreter_fuzz or smt validation" + ) + + coverage = validation.get("coverage") + if not isinstance(coverage, list) or not coverage: + errors.append(f"rule '{name}' has no validation coverage entries") + else: + for entry in coverage: + if not isinstance(entry, str) or not entry.strip(): + errors.append(f"rule '{name}' has an invalid coverage entry") + elif gtest_names is not None and entry not in gtest_names: + errors.append( + f"rule '{name}' references missing gtest coverage '{entry}'" + ) + + if errors: + for error in errors: + print(error, file=sys.stderr) + return 1 + + print("dmir rewrite rule metadata is complete") + for mode in sorted(mode_counts): + print(f"{mode}: {mode_counts[mode]}") + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/tools/check_x86_cg_peephole_validation.py b/tools/check_x86_cg_peephole_validation.py new file mode 100644 index 000000000..91c792258 --- /dev/null +++ b/tools/check_x86_cg_peephole_validation.py @@ -0,0 +1,119 @@ +#!/usr/bin/env python3 + +import argparse +import json +import pathlib +import subprocess +import sys + + +ALLOWED_VALIDATION_MODES = { + "structural", + "semantics_model", + "execution", +} + + +def parse_args(): + parser = argparse.ArgumentParser( + description="Validate x86 peephole rule validation metadata." + ) + parser.add_argument("--rules", required=True, help="Path to the rule JSON file") + parser.add_argument( + "--gtest-binary", + help="Optional gtest binary used to verify coverage entries exist", + ) + return parser.parse_args() + + +def load_rules(path): + with pathlib.Path(path).open("r", encoding="utf-8") as f: + return json.load(f) + + +def load_gtest_names(path): + proc = subprocess.run( + [str(pathlib.Path(path).resolve()), "--gtest_list_tests"], + capture_output=True, + text=True, + check=False, + ) + if proc.returncode != 0: + raise RuntimeError(f"failed to list gtests from {path}") + + names = set() + suite_name = None + for line in proc.stdout.splitlines(): + if not line.strip(): + continue + if not line.startswith(" "): + suite_name = line.strip().rstrip(".") + continue + if suite_name is None: + continue + test_name = line.strip().split()[0] + test_name = test_name.split("#", 1)[0] + names.add(f"{suite_name}.{test_name}") + return names + + +def main(): + args = parse_args() + data = load_rules(args.rules) + errors = [] + mode_counts = {mode: 0 for mode in ALLOWED_VALIDATION_MODES} + gtest_names = None + if args.gtest_binary: + gtest_names = load_gtest_names(args.gtest_binary) + + for rule in data.get("rules", []): + name = rule.get("name", "") + validation = rule.get("validation") + if validation is None: + errors.append(f"rule '{name}' is missing validation metadata") + continue + + modes = validation.get("modes") + if not isinstance(modes, list) or not modes: + errors.append(f"rule '{name}' has no validation modes") + else: + has_non_structural_mode = False + for mode in modes: + if mode not in ALLOWED_VALIDATION_MODES: + errors.append( + f"rule '{name}' uses unknown validation mode '{mode}'" + ) + else: + mode_counts[mode] += 1 + if mode != "structural": + has_non_structural_mode = True + if rule.get("stage") == "instruction" and not has_non_structural_mode: + errors.append( + f"rule '{name}' needs execution or semantics_model validation" + ) + + coverage = validation.get("coverage") + if not isinstance(coverage, list) or not coverage: + errors.append(f"rule '{name}' has no validation coverage entries") + else: + for entry in coverage: + if not isinstance(entry, str) or not entry.strip(): + errors.append(f"rule '{name}' has an invalid coverage entry") + elif gtest_names is not None and entry not in gtest_names: + errors.append( + f"rule '{name}' references missing gtest coverage '{entry}'" + ) + + if errors: + for error in errors: + print(error, file=sys.stderr) + return 1 + + print("x86 cg peephole validation metadata is complete") + for mode in sorted(mode_counts): + print(f"{mode}: {mode_counts[mode]}") + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/tools/collect_compiler_pass_timings.py b/tools/collect_compiler_pass_timings.py new file mode 100644 index 000000000..9d4b1dead --- /dev/null +++ b/tools/collect_compiler_pass_timings.py @@ -0,0 +1,239 @@ +#!/usr/bin/env python3 + +import argparse +import json +import os +import pathlib +import statistics +import subprocess +import sys +import tempfile +from collections import defaultdict + + +def parse_args(): + parser = argparse.ArgumentParser( + description="Run dtvm with compiler pass timing enabled and aggregate the JSON output." + ) + parser.add_argument("--dtvm", required=True, help="Path to the dtvm executable") + input_group = parser.add_mutually_exclusive_group(required=True) + input_group.add_argument("--input", help="Input EVM file to compile") + input_group.add_argument( + "--manifest", + help="JSON manifest that lists multiple benchmark inputs", + ) + parser.add_argument("--runs", type=int, default=1, help="Number of process runs") + parser.add_argument( + "--case", + dest="cases", + action="append", + default=[], + help="Optional case name filter when --manifest is used", + ) + parser.add_argument( + "--output", + help="Optional path to save the aggregated timing summary as JSON", + ) + parser.add_argument( + "--allow-nonzero", + action="store_true", + help="Keep timings when dtvm exits non-zero but still writes a timing file", + ) + parser.add_argument( + "dtvm_args", + nargs=argparse.REMAINDER, + help="Extra arguments passed to dtvm after '--'", + ) + return parser.parse_args() + + +def load_records(path: pathlib.Path): + with path.open("r", encoding="utf-8") as f: + data = json.load(f) + return data.get("records", []) + + +def build_stats(values): + if not values: + return { + "mean": 0.0, + "median": 0.0, + "p95": 0.0, + "min": 0.0, + "max": 0.0, + } + ordered = sorted(values) + p95_index = max(0, (len(ordered) * 95 + 99) // 100 - 1) + return { + "mean": statistics.fmean(values), + "median": statistics.median(values), + "p95": ordered[p95_index], + "min": min(values), + "max": max(values), + } + + +def aggregate(records_per_run): + phases = defaultdict(list) + phase_shares = defaultdict(list) + totals = [] + record_count = 0 + for records in records_per_run: + record_count += len(records) + for record in records: + total_time = record["total_time_ms"] + totals.append(total_time) + for phase in record["phases"]: + phases[phase["name"]].append(phase["time_ms"]) + if total_time > 0: + phase_shares[phase["name"]].append( + phase["time_ms"] * 100.0 / total_time + ) + + summary = { + "runs": len(records_per_run), + "record_count": record_count, + "total_time_ms": { + "mean": statistics.fmean(totals) if totals else 0.0, + "median": statistics.median(totals) if totals else 0.0, + }, + "phases": {}, + } + for name, values in sorted(phases.items()): + summary["phases"][name] = { + **build_stats(values), + "share_of_total_pct": build_stats(phase_shares[name]), + } + return summary + + +def normalize_dtvm_args(raw_args): + extra_args = list(raw_args) + if extra_args and extra_args[0] == "--": + extra_args = extra_args[1:] + return extra_args + + +def collect_records(dtvm_path, input_path, runs, allow_nonzero, extra_args): + all_records = [] + for _ in range(runs): + with tempfile.TemporaryDirectory() as tmp_dir: + timing_path = pathlib.Path(tmp_dir) / "compiler_pass_timing.json" + env = os.environ.copy() + env["DTVM_COMPILER_PASS_TIMING_JSON"] = str(timing_path) + proc = subprocess.run( + [str(dtvm_path), str(input_path), *extra_args], + env=env, + capture_output=True, + text=True, + check=False, + ) + if proc.returncode != 0 and not allow_nonzero: + sys.stderr.write(proc.stderr) + raise RuntimeError(f"dtvm exited with code {proc.returncode}") + if not timing_path.exists(): + sys.stderr.write("timing file was not written\n") + raise RuntimeError("timing file was not written") + all_records.append(load_records(timing_path)) + return all_records + + +def load_manifest(path): + manifest_path = pathlib.Path(path).resolve() + with manifest_path.open("r", encoding="utf-8") as f: + data = json.load(f) + + cases = [] + for entry in data.get("cases", []): + if isinstance(entry, str): + input_path = manifest_path.parent / entry + name = pathlib.Path(entry).stem + else: + input_path = manifest_path.parent / entry["input"] + name = entry["name"] + cases.append( + { + "name": name, + "input": input_path.resolve(), + } + ) + return cases + + +def filter_cases(cases, wanted_names): + if not wanted_names: + return cases + wanted = set(wanted_names) + filtered = [case for case in cases if case["name"] in wanted] + missing = sorted(wanted - {case["name"] for case in filtered}) + if missing: + raise RuntimeError(f"unknown manifest case(s): {', '.join(missing)}") + return filtered + + +def collect_single_case(dtvm_path, input_path, runs, allow_nonzero, extra_args): + records_per_run = collect_records( + dtvm_path, input_path, runs, allow_nonzero, extra_args + ) + return { + "input": str(input_path), + "summary": aggregate(records_per_run), + } + + +def main(): + args = parse_args() + dtvm_path = pathlib.Path(args.dtvm).resolve() + extra_args = normalize_dtvm_args(args.dtvm_args) + + if args.input: + summary = collect_single_case( + dtvm_path, + pathlib.Path(args.input).resolve(), + args.runs, + args.allow_nonzero, + extra_args, + ) + output_data = summary["summary"] + output_data["input"] = summary["input"] + else: + manifest_cases = filter_cases(load_manifest(args.manifest), args.cases) + case_summaries = [] + overall_records = [] + for case in manifest_cases: + records_per_run = collect_records( + dtvm_path, + case["input"], + args.runs, + args.allow_nonzero, + extra_args, + ) + overall_records.extend(records_per_run) + case_summaries.append( + { + "name": case["name"], + "input": str(case["input"]), + "summary": aggregate(records_per_run), + } + ) + + output_data = { + "manifest": str(pathlib.Path(args.manifest).resolve()), + "case_count": len(case_summaries), + "cases": case_summaries, + "overall": aggregate(overall_records), + } + + output = json.dumps(output_data, indent=2) + if args.output: + pathlib.Path(args.output).write_text(output + "\n", encoding="utf-8") + print(output) + return 0 + + +if __name__ == "__main__": + try: + sys.exit(main()) + except RuntimeError as exc: + print(exc, file=sys.stderr) + sys.exit(1) diff --git a/tools/generate_x86_cg_peephole.py b/tools/generate_x86_cg_peephole.py new file mode 100644 index 000000000..a68ff338e --- /dev/null +++ b/tools/generate_x86_cg_peephole.py @@ -0,0 +1,345 @@ +#!/usr/bin/env python3 + +import argparse +import json +import pathlib +import sys +from typing import Dict, List, Tuple + + +def load_rules(path: pathlib.Path) -> Dict: + with path.open("r", encoding="utf-8") as f: + return json.load(f) + + +def normalize_rule(rule: Dict) -> str: + stage = rule["stage"] + pattern_parts: List[str] = [] + for item in rule["pattern"]: + if "opcode" in item: + head = f"opcode:{item['opcode']}" + elif "opcode_any" in item: + head = "opcode_any:" + ",".join(item["opcode_any"]) + else: + head = f"predicate:{item['predicate']}" + requires = item.get("require", []) + require_key = ",".join( + f"{req['operand']}:{req['field']}:{sorted(req.items())}" + for req in requires + ) + pattern_parts.append(f"{head}[{require_key}]") + when_parts = ",".join(str(sorted(item.items())) for item in rule.get("when", [])) + return f"{stage}|{pattern_parts}|{when_parts}" + + +def validate_rules(data: Dict) -> Tuple[List[str], List[str]]: + report_lines: List[str] = [] + conflicts: List[str] = [] + seen: Dict[Tuple[str, int], str] = {} + + report_lines.append("X86 Cg peephole rule report") + report_lines.append("==========================") + report_lines.append("") + + rules = sorted( + data["rules"], + key=lambda rule: (rule["stage"], -int(rule["priority"]), rule["name"]), + ) + for rule in rules: + signature = normalize_rule(rule) + key = (signature, int(rule["priority"])) + if key in seen: + conflicts.append( + f"Conflicting rules with the same normalized pattern and priority: " + f"{seen[key]} vs {rule['name']}" + ) + else: + seen[key] = rule["name"] + report_lines.append( + f"- {rule['name']} | stage={rule['stage']} | priority={rule['priority']}" + ) + + report_lines.append("") + if conflicts: + report_lines.append("Conflicts:") + for item in conflicts: + report_lines.append(f"- {item}") + else: + report_lines.append("No conflicts detected.") + + return report_lines, conflicts + + +def emit_file_header() -> List[str]: + return [ + "// Copyright (C) 2025 the DTVM authors. All Rights Reserved.", + "// SPDX-License-Identifier: Apache-2.0", + "// Generated by tools/generate_x86_cg_peephole.py. Do not edit.", + "", + ] + + +def resolve_operand_expr(bind: str, operand: int) -> Tuple[List[str], str]: + if operand >= 0: + return ( + [ + f" if ({bind}.getNumOperands() <= {operand})", + " return {miss_return};", + ], + str(operand), + ) + + required_operands = -operand + explicit_count = f"{bind}.getDesc().getNumOperands()" + operand_expr = f"({explicit_count} - {required_operands})" + return ( + [ + f" if ({explicit_count} < {required_operands})", + " return {miss_return};", + f" if ({bind}.getNumOperands() <= {operand_expr})", + " return {miss_return};", + ], + operand_expr, + ) + + +def emit_operand_check(bind: str, req: Dict, miss_return: str) -> List[str]: + operand = req["operand"] + field = req["field"] + guard_lines, operand_expr = resolve_operand_expr(bind, operand) + guard_lines = [line.format(miss_return=miss_return) for line in guard_lines] + if field == "reg": + if "equals_capture" in req: + capture = req["equals_capture"] + return guard_lines + [ + f" if (!{bind}.getOperand({operand_expr}).isReg() ||", + f" {bind}.getOperand({operand_expr}).getReg() != {capture})", + f" return {miss_return};", + ] + if field == "imm": + if "equals_enum" in req: + enum_name = req["equals_enum"] + return guard_lines + [ + f" if (!{bind}.getOperand({operand_expr}).isImm())", + f" return {miss_return};", + f" if ({bind}.getOperand({operand_expr}).getImm() != X86::CondCode::{enum_name})", + f" return {miss_return};", + ] + if "equals_int" in req: + value = int(req["equals_int"]) + return guard_lines + [ + f" if (!{bind}.getOperand({operand_expr}).isImm())", + f" return {miss_return};", + f" if ({bind}.getOperand({operand_expr}).getImm() != {value})", + f" return {miss_return};", + ] + if field == "is_mbb": + predicate = "!" if req["equals_bool"] else "" + return guard_lines + [ + f" if ({predicate}{bind}.getOperand({operand_expr}).isMBB())", + f" return {miss_return};", + ] + raise ValueError(f"Unsupported operand requirement: {req}") + + +def emit_capture(bind: str, capture: Dict, miss_return: str) -> List[str]: + operand = capture["operand"] + field = capture["field"] + name = capture["name"] + guard_lines, operand_expr = resolve_operand_expr(bind, operand) + guard_lines = [line.format(miss_return=miss_return) for line in guard_lines] + if field == "reg": + result = guard_lines + [ + f" if (!{bind}.getOperand({operand_expr}).isReg())", + f" return {miss_return};", + f" auto {name} = {bind}.getOperand({operand_expr}).getReg();", + ] + if capture.get("require_single_use"): + result.extend([ + f" if ({name}.isVirtual() &&", + f" !MBB.getParent()->getRegInfo().hasOneNonDBGUse({name}))", + f" return {miss_return};", + ]) + return result + if field == "imm": + return guard_lines + [ + f" if (!{bind}.getOperand({operand_expr}).isImm())", + f" return {miss_return};", + f" auto {name} = {bind}.getOperand({operand_expr}).getImm();", + ] + raise ValueError(f"Unsupported capture: {capture}") + + +def emit_instruction_match(rule: Dict) -> List[str]: + name = rule["name"].replace("-", "_") + first_bind = rule["pattern"][0]["bind"] + erases_current = first_bind in rule["action"].get("erase", []) + miss_return = "GeneratedInstructionRuleResult::NoMatch" + lines = [ + f"GeneratedInstructionRuleResult match_{name}(CgBasicBlock &MBB, " + "CgBasicBlock::iterator &MII) {", + ] + if len(rule["pattern"]) > 1: + lines.append(" auto MIE = MBB.end();") + for index, item in enumerate(rule["pattern"]): + iterator_name = f"LocalMII{index}" + bind = item["bind"] + if index == 0: + lines.append(f" auto {iterator_name} = MII;") + else: + prev = f"LocalMII{index - 1}" + lines.append(f" auto {iterator_name} = {prev};") + lines.append(f" ++{iterator_name};") + lines.append(f" if ({iterator_name} == MIE)") + lines.append(f" return {miss_return};") + lines.append(f" auto &{bind} = *{iterator_name};") + + if "predicate" in item: + predicate = item["predicate"] + lines.append(f" if (!{bind}.{predicate}())") + lines.append(f" return {miss_return};") + elif "opcode" in item: + lines.append(f" if ({bind}.getOpcode() != X86::{item['opcode']})") + lines.append(f" return {miss_return};") + else: + lines.append(f" switch ({bind}.getOpcode()) {{") + for opcode in item["opcode_any"]: + lines.append(f" case X86::{opcode}:") + lines.append(" break;") + lines.append(" default:") + lines.append(f" return {miss_return};") + lines.append(" }") + + for capture in item.get("capture", []): + lines.extend(emit_capture(bind, capture, miss_return)) + for req in item.get("require", []): + lines.extend(emit_operand_check(bind, req, miss_return)) + + if erases_current: + lines.append(" auto NextMII = MII;") + lines.append(" ++NextMII;") + for action in rule["action"].get("erase", []): + lines.append(f" {action}.eraseFromParent();") + for action in rule["action"].get("set_imm", []): + lines.append( + f" {action['inst']}.getOperand({action['operand']}).setImm(" + f"{action['from_capture']});" + ) + if erases_current: + lines.append(" MII = NextMII;") + lines.append(" return GeneratedInstructionRuleResult::Advanced;") + else: + lines.append(" return GeneratedInstructionRuleResult::Matched;") + lines.append("}") + lines.append("") + return lines + + +def emit_block_end_match(rule: Dict) -> List[str]: + name = rule["name"].replace("-", "_") + pattern = rule["pattern"][0] + bind = pattern["bind"] + lines = [ + f"bool match_{name}(CgBasicBlock &MBB) {{", + " if (MBB.empty())", + " return false;", + f" auto &{bind} = MBB.back();", + ] + if "predicate" in pattern: + predicate = pattern["predicate"] + lines.append(f" if (!{bind}.{predicate}())") + lines.append(" return false;") + for req in pattern.get("require", []): + lines.extend(emit_operand_check(bind, req, "false")) + for item in rule.get("when", []): + if item["kind"] == "target_is_next_block": + inst = item["inst"] + operand = item["operand"] + lines.extend( + [ + f" CgBasicBlock *TargetBB = {inst}.getOperand({operand}).getMBB();", + " if (TargetBB->getNumber() != MBB.getNumber() + 1)", + " return false;", + ] + ) + else: + raise ValueError(f"Unsupported rule condition: {item}") + for action in rule["action"].get("erase", []): + lines.append(f" {action}.eraseFromParent();") + lines.append(" return true;") + lines.append("}") + lines.append("") + return lines + + +def emit_dispatch(data: Dict) -> List[str]: + lines = [ + "namespace {", + "", + "enum class GeneratedInstructionRuleResult {", + " NoMatch,", + " Matched,", + " Advanced,", + "};", + "", + ] + instruction_rules = [rule for rule in data["rules"] if rule["stage"] == "instruction"] + block_rules = [rule for rule in data["rules"] if rule["stage"] == "block_end"] + + for rule in instruction_rules: + lines.extend(emit_instruction_match(rule)) + for rule in block_rules: + lines.extend(emit_block_end_match(rule)) + + lines.extend( + [ + "GeneratedInstructionRuleResult tryGeneratedInstructionRules(", + " CgBasicBlock &MBB, CgBasicBlock::iterator &MII) {", + ] + ) + for rule in instruction_rules: + lines.append( + f" if (auto Result = match_{rule['name'].replace('-', '_')}(MBB, MII);" + ) + lines.append(" Result != GeneratedInstructionRuleResult::NoMatch)") + lines.append(" return Result;") + lines.extend([" return GeneratedInstructionRuleResult::NoMatch;", "}", ""]) + + lines.extend(["bool tryGeneratedBlockEndRules(CgBasicBlock &MBB) {"]) + for rule in block_rules: + lines.append(f" if (match_{rule['name'].replace('-', '_')}(MBB))") + lines.append(" return true;") + lines.extend([" return false;", "}", "", "} // namespace", ""]) + return lines + + +def write_text(path: pathlib.Path, content: str) -> None: + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text(content, encoding="utf-8") + + +def main() -> int: + parser = argparse.ArgumentParser() + parser.add_argument("--rules", required=True) + parser.add_argument("--out-inc", required=True) + parser.add_argument("--out-report", required=True) + args = parser.parse_args() + + rules_path = pathlib.Path(args.rules) + data = load_rules(rules_path) + report_lines, conflicts = validate_rules(data) + + if conflicts: + write_text(pathlib.Path(args.out_report), "\n".join(report_lines) + "\n") + for item in conflicts: + print(item, file=sys.stderr) + return 1 + + inc_lines = emit_file_header() + emit_dispatch(data) + write_text(pathlib.Path(args.out_inc), "\n".join(inc_lines)) + write_text(pathlib.Path(args.out_report), "\n".join(report_lines) + "\n") + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/tools/mine_dmir_seed_rules.py b/tools/mine_dmir_seed_rules.py new file mode 100644 index 000000000..ec324cacf --- /dev/null +++ b/tools/mine_dmir_seed_rules.py @@ -0,0 +1,625 @@ +#!/usr/bin/env python3 + +import argparse +import json +import pathlib +import random +from dataclasses import dataclass + + +MASK64 = (1 << 64) - 1 +COMMUTATIVE_OPS = {"add", "and", "mul", "or", "xor"} +DEFAULT_SEARCH_CONFIG = { + "base_terms": [ + "x", + "y", + "cond", + "0:i64", + "1:i64", + "18446744073709551615:i64", + ], + "unary_not_terms": ["x", "y", "cond"], + "double_not_terms": ["x", "y", "cond"], + "binary_fixed_rhs": [ + { + "ops": ["add", "sub", "and", "or", "xor", "shl", "sshr", "ushr"], + "lhs": ["x", "y", "cond"], + "rhs": "0:i64", + }, + { + "ops": ["and", "or", "xor"], + "lhs": ["x", "y", "cond", "(not x)", "(not y)"], + "rhs": "18446744073709551615:i64", + }, + ], + "binary_self": [ + { + "ops": ["and", "or", "xor"], + "terms": ["x", "y", "cond"], + } + ], + "select_same_arm": { + "conditions": ["cond", "x", "0:i64", "1:i64"], + "values": ["x", "y", "(not x)"], + }, + "pair_binary_groups": [ + { + "ops": ["add", "sub", "and", "or", "xor"], + "lhs": ["x", "y"], + "rhs": ["x", "y", "0:i64"], + }, + { + "ops": ["and", "or", "xor"], + "lhs": [ + "x", + "y", + "(and x y)", + "(or x y)", + "(xor x y)", + "(not x)", + "(not y)", + ], + "rhs": [ + "x", + "y", + "0:i64", + "(and x y)", + "(or x y)", + "(xor x y)", + "(not x)", + "(not y)", + ], + }, + ], + "adc_sbb_zero": { + "ops": ["adc", "sbb"], + "lhs": ["x", "y"], + "rhs": ["x", "y", "0:i64"], + "carry": "0:i64", + }, +} + + +@dataclass(frozen=True) +class Expr: + op: str + args: tuple["Expr", ...] = () + value: str | int | None = None + + def render(self) -> str: + if self.op == "var": + return str(self.value) + if self.op == "const": + return f"{self.value}:i64" + rendered_args = " ".join(arg.render() for arg in self.args) + return f"({self.op} {rendered_args})" + + +def var(name: str) -> Expr: + return Expr("var", value=name) + + +def const(value: int) -> Expr: + return Expr("const", value=value) + + +def unary(op: str, arg: Expr) -> Expr: + return Expr(op, args=(arg,)) + + +def binary(op: str, lhs: Expr, rhs: Expr) -> Expr: + return Expr(op, args=(lhs, rhs)) + + +def ternary(op: str, first: Expr, second: Expr, third: Expr) -> Expr: + return Expr(op, args=(first, second, third)) + + +def wrap_u64(value: int) -> int: + return value & MASK64 + + +def parse_expr(text: str) -> Expr: + tokens = text.replace("(", " ( ").replace(")", " ) ").split() + index = 0 + + def parse() -> Expr: + nonlocal index + token = tokens[index] + index += 1 + if token == "(": + op = tokens[index] + index += 1 + args = [] + while tokens[index] != ")": + args.append(parse()) + index += 1 + return Expr(op, args=tuple(args)) + if token.endswith(":i64"): + return const(int(token[:-4], 10)) + return var(token) + + expr = parse() + if index != len(tokens): + raise ValueError(f"unexpected trailing tokens in expression '{text}'") + return expr + + +def canonical_var_name(index: int) -> str: + base_names = ("x", "y", "z") + if index < len(base_names): + return base_names[index] + return f"v{index}" + + +def canonicalize_expr(expr: Expr, env: dict[str, str] | None = None) -> Expr: + if env is None: + env = {} + if expr.op == "var": + name = str(expr.value) + if name not in env: + env[name] = canonical_var_name(len(env)) + return var(env[name]) + if expr.op == "const": + return expr + + args = tuple(canonicalize_expr(arg, env) for arg in expr.args) + if expr.op in COMMUTATIVE_OPS: + args = tuple(sorted(args, key=lambda arg: (arg.op == "const", arg.render()))) + return Expr(expr.op, args=args) + + +def canonicalize_pair(lhs: Expr, rhs: Expr) -> tuple[Expr, Expr]: + env: dict[str, str] = {} + return canonicalize_expr(lhs, env), canonicalize_expr(rhs, env) + + +def build_candidate_key(lhs: Expr, rhs: Expr) -> tuple[str, str]: + canonical_lhs, canonical_rhs = canonicalize_pair(lhs, rhs) + return canonical_lhs.render(), canonical_rhs.render() + + +def substitute_expr(expr: Expr, bindings: dict[str, Expr]) -> Expr: + if expr.op == "var": + return bindings.get(str(expr.value), expr) + if expr.op == "const": + return expr + return Expr( + expr.op, args=tuple(substitute_expr(arg, bindings) for arg in expr.args) + ) + + +def match_pattern(pattern: Expr, expr: Expr, bindings: dict[str, Expr]) -> bool: + if pattern.op == "var": + name = str(pattern.value) + bound = bindings.get(name) + if bound is None: + bindings[name] = expr + return True + return bound == expr + if pattern.op == "const": + return pattern == expr + if pattern.op != expr.op or len(pattern.args) != len(expr.args): + return False + return all( + match_pattern(pattern_arg, expr_arg, bindings) + for pattern_arg, expr_arg in zip(pattern.args, expr.args) + ) + + +def is_rule_instance(rule_lhs: Expr, rule_rhs: Expr, + candidate_lhs: Expr, candidate_rhs: Expr) -> bool: + bindings: dict[str, Expr] = {} + if not match_pattern(rule_lhs, candidate_lhs, bindings): + return False + substituted_rhs = substitute_expr(rule_rhs, bindings) + return substituted_rhs == candidate_rhs + + +def eval_expr(expr: Expr, env: dict[str, int]) -> int: + if expr.op == "var": + return env[str(expr.value)] + if expr.op == "const": + return int(expr.value) + if expr.op == "not": + return wrap_u64(~eval_expr(expr.args[0], env)) + if expr.op == "add": + return wrap_u64(eval_expr(expr.args[0], env) + eval_expr(expr.args[1], env)) + if expr.op == "sub": + return wrap_u64(eval_expr(expr.args[0], env) - eval_expr(expr.args[1], env)) + if expr.op == "mul": + return wrap_u64(eval_expr(expr.args[0], env) * eval_expr(expr.args[1], env)) + if expr.op == "and": + return wrap_u64(eval_expr(expr.args[0], env) & eval_expr(expr.args[1], env)) + if expr.op == "or": + return wrap_u64(eval_expr(expr.args[0], env) | eval_expr(expr.args[1], env)) + if expr.op == "xor": + return wrap_u64(eval_expr(expr.args[0], env) ^ eval_expr(expr.args[1], env)) + if expr.op == "adc": + return wrap_u64( + eval_expr(expr.args[0], env) + + eval_expr(expr.args[1], env) + + eval_expr(expr.args[2], env) + ) + if expr.op == "sbb": + return wrap_u64( + eval_expr(expr.args[0], env) + - eval_expr(expr.args[1], env) + - eval_expr(expr.args[2], env) + ) + if expr.op == "select": + return ( + eval_expr(expr.args[1], env) + if eval_expr(expr.args[0], env) != 0 + else eval_expr(expr.args[2], env) + ) + if expr.op == "shl": + amount = eval_expr(expr.args[1], env) + if amount >= 64: + return 0 + return wrap_u64(eval_expr(expr.args[0], env) << amount) + if expr.op == "sshr": + amount = eval_expr(expr.args[1], env) + value = eval_expr(expr.args[0], env) + if amount >= 64: + return MASK64 if value & (1 << 63) else 0 + if value & (1 << 63): + value -= 1 << 64 + return wrap_u64(value >> amount) + if expr.op == "ushr": + amount = eval_expr(expr.args[1], env) + if amount >= 64: + return 0 + return eval_expr(expr.args[0], env) >> amount + raise ValueError(f"unsupported op {expr.op}") + + +def expr_cost(expr: Expr) -> dict[str, int]: + if expr.op in {"var", "const"}: + return { + "dmir_inst": 0, + "select_depth": 0, + "adc_chain": 0, + "runtime_calls": 0, + } + + child_costs = [expr_cost(arg) for arg in expr.args] + return { + "dmir_inst": 1 + sum(cost["dmir_inst"] for cost in child_costs), + "select_depth": ( + 1 + max(cost["select_depth"] for cost in child_costs) + if expr.op == "select" + else max(cost["select_depth"] for cost in child_costs) + ), + "adc_chain": ( + 1 + sum(cost["adc_chain"] for cost in child_costs) + if expr.op in {"adc", "sbb"} + else sum(cost["adc_chain"] for cost in child_costs) + ), + "runtime_calls": sum(cost["runtime_calls"] for cost in child_costs), + } + + +def dominates(rhs_cost: dict[str, int], lhs_cost: dict[str, int]) -> bool: + fields = ("dmir_inst", "select_depth", "adc_chain", "runtime_calls") + return all(rhs_cost[field] <= lhs_cost[field] for field in fields) and any( + rhs_cost[field] < lhs_cost[field] for field in fields + ) + + +def cost_delta(lhs_cost: dict[str, int], rhs_cost: dict[str, int]) -> dict[str, int]: + return { + field: rhs_cost[field] - lhs_cost[field] + for field in ("dmir_inst", "select_depth", "adc_chain", "runtime_calls") + } + + +def build_sample_envs() -> list[dict[str, int]]: + boundary_values = [ + 0, + 1, + 2, + 3, + 7, + 8, + 15, + 16, + 0x7FFFFFFFFFFFFFFF, + 0x8000000000000000, + 0xFFFFFFFFFFFFFFFF, + ] + envs = [] + for x in boundary_values: + # Use the full boundary set for y so shift-sensitive expressions + # (e.g. shl/ushr with large shift amounts) are covered. + for y in boundary_values: + for cond in (0, 1, x, y, x ^ y): + envs.append({"x": x, "y": y, "cond": wrap_u64(cond)}) + + rng = random.Random(0x7D6B4A1C) + for _ in range(64): + envs.append( + { + "x": rng.getrandbits(64), + "y": rng.getrandbits(64), + "cond": rng.getrandbits(64), + } + ) + return envs + + +def load_search_config(path: str | None) -> dict: + if path is None: + return DEFAULT_SEARCH_CONFIG + return json.loads(pathlib.Path(path).read_text(encoding="utf-8")) + + +def build_term_map(config: dict) -> dict[str, Expr]: + term_specs = set(config.get("base_terms", [])) + term_specs.update(config.get("unary_not_terms", [])) + term_specs.update(config.get("double_not_terms", [])) + for entry in config.get("binary_fixed_rhs", []): + term_specs.update(entry.get("lhs", [])) + term_specs.add(entry.get("rhs")) + for entry in config.get("binary_self", []): + term_specs.update(entry.get("terms", [])) + select_same_arm = config.get("select_same_arm", {}) + term_specs.update(select_same_arm.get("conditions", [])) + term_specs.update(select_same_arm.get("values", [])) + pair_binary_groups = list(config.get("pair_binary_groups", [])) + if not pair_binary_groups and "pair_binary" in config: + pair_binary_groups.append(config["pair_binary"]) + for entry in pair_binary_groups: + term_specs.update(entry.get("lhs", [])) + term_specs.update(entry.get("rhs", [])) + adc_sbb_zero = config.get("adc_sbb_zero", {}) + term_specs.update(adc_sbb_zero.get("lhs", [])) + term_specs.update(adc_sbb_zero.get("rhs", [])) + if adc_sbb_zero.get("carry"): + term_specs.add(adc_sbb_zero["carry"]) + + return {spec: parse_expr(spec) for spec in term_specs} + + +def build_search_space(config: dict) -> list[Expr]: + term_map = build_term_map(config) + base_terms = [term_map[spec] for spec in config.get("base_terms", [])] + + terms = set(base_terms) + + for spec in config.get("unary_not_terms", []): + terms.add(unary("not", term_map[spec])) + + for spec in config.get("double_not_terms", []): + terms.add(unary("not", unary("not", term_map[spec]))) + + for entry in config.get("binary_fixed_rhs", []): + rhs = term_map[entry["rhs"]] + for op in entry.get("ops", []): + for lhs_spec in entry.get("lhs", []): + terms.add(binary(op, term_map[lhs_spec], rhs)) + + for entry in config.get("binary_self", []): + for op in entry.get("ops", []): + for spec in entry.get("terms", []): + value = term_map[spec] + terms.add(binary(op, value, value)) + + select_same_arm = config.get("select_same_arm", {}) + for cond_spec in select_same_arm.get("conditions", []): + for value_spec in select_same_arm.get("values", []): + value = term_map[value_spec] + terms.add(ternary("select", term_map[cond_spec], value, value)) + + pair_binary_groups = list(config.get("pair_binary_groups", [])) + if not pair_binary_groups and "pair_binary" in config: + pair_binary_groups.append(config["pair_binary"]) + for entry in pair_binary_groups: + for op in entry.get("ops", []): + for lhs_spec in entry.get("lhs", []): + for rhs_spec in entry.get("rhs", []): + terms.add(binary(op, term_map[lhs_spec], term_map[rhs_spec])) + + adc_sbb_zero = config.get("adc_sbb_zero", {}) + carry = term_map[adc_sbb_zero.get("carry", "0:i64")] + for op in adc_sbb_zero.get("ops", []): + for lhs_spec in adc_sbb_zero.get("lhs", []): + for rhs_spec in adc_sbb_zero.get("rhs", []): + terms.add(ternary(op, term_map[lhs_spec], term_map[rhs_spec], carry)) + + return sorted(terms, key=lambda expr: expr.render()) + +def load_rule_patterns(rules_path: str | None) -> list[tuple[Expr, Expr]]: + if rules_path is None: + return [] + data = json.loads(pathlib.Path(rules_path).read_text(encoding="utf-8")) + return [ + (parse_expr(rule["lhs"]), parse_expr(rule["rhs"])) + for rule in data.get("rules", []) + ] + + +def build_rule_key_set(rule_patterns: list[tuple[Expr, Expr]]) -> set[tuple[str, str]]: + return {build_candidate_key(lhs, rhs) for lhs, rhs in rule_patterns} + + +def is_candidate_covered(lhs: Expr, rhs: Expr, + rule_patterns: list[tuple[Expr, Expr]], + rule_keys: set[tuple[str, str]]) -> bool: + if build_candidate_key(lhs, rhs) in rule_keys: + return True + canonical_lhs, canonical_rhs = canonicalize_pair(lhs, rhs) + return any( + is_rule_instance( + *canonicalize_pair(rule_lhs, rule_rhs), + canonical_lhs, + canonical_rhs, + ) + for rule_lhs, rule_rhs in rule_patterns + ) + + +def serialize_candidate(lhs: Expr, rhs: Expr, cost: dict[str, dict[str, int]], + variants: list[tuple[str, str]] | None = None, + covered: bool | None = None) -> dict: + entry = { + "lhs": lhs.render(), + "rhs": rhs.render(), + "cost": cost, + } + if variants is not None: + entry["variant_count"] = len(variants) + entry["variants"] = [{"lhs": variant[0], "rhs": variant[1]} for variant in variants] + if covered is not None: + entry["covered_by_rule_repo"] = covered + return entry + + +def build_candidates(rules_path: str | None = None, + config_path: str | None = None) -> dict: + envs = build_sample_envs() + search_config = load_search_config(config_path) + terms = build_search_space(search_config) + classes: dict[tuple[int, ...], list[Expr]] = {} + for expr in terms: + signature = tuple(eval_expr(expr, env) for env in envs) + classes.setdefault(signature, []).append(expr) + + raw_candidates = [] + for exprs in classes.values(): + exprs = sorted( + exprs, + key=lambda expr: ( + expr_cost(expr)["dmir_inst"], + expr_cost(expr)["select_depth"], + expr_cost(expr)["adc_chain"], + expr_cost(expr)["runtime_calls"], + expr.render(), + ), + ) + best = exprs[0] + best_cost = expr_cost(best) + for expr in exprs[1:]: + expr_cost_value = expr_cost(expr) + if not dominates(best_cost, expr_cost_value): + continue + raw_candidates.append( + { + "lhs_expr": expr, + "rhs_expr": best, + "cost": { + "lhs": expr_cost_value, + "rhs": best_cost, + "delta": cost_delta(expr_cost_value, best_cost), + }, + } + ) + + raw_candidates.sort( + key=lambda item: (item["lhs_expr"].render(), item["rhs_expr"].render()) + ) + + curated: dict[tuple[str, str], dict[str, object]] = {} + for candidate in raw_candidates: + lhs_expr = candidate["lhs_expr"] + rhs_expr = candidate["rhs_expr"] + key = build_candidate_key(lhs_expr, rhs_expr) + variant = (lhs_expr.render(), rhs_expr.render()) + entry = curated.setdefault( + key, + { + "lhs_expr": parse_expr(key[0]), + "rhs_expr": parse_expr(key[1]), + "cost": candidate["cost"], + "variants": [], + }, + ) + entry["variants"].append(variant) + + rule_patterns = load_rule_patterns(rules_path) + rule_keys = build_rule_key_set(rule_patterns) + curated_candidates = [] + novel_candidates = [] + covered_candidates = [] + for key, entry in sorted(curated.items()): + covered = is_candidate_covered( + entry["lhs_expr"], entry["rhs_expr"], rule_patterns, rule_keys + ) + serialized = serialize_candidate( + entry["lhs_expr"], + entry["rhs_expr"], + entry["cost"], + variants=sorted(set(entry["variants"])), + covered=covered, + ) + curated_candidates.append(serialized) + if covered: + covered_candidates.append(serialized) + else: + novel_candidates.append(serialized) + + novel_candidates.sort( + key=lambda item: ( + item["cost"]["delta"]["runtime_calls"], + item["cost"]["delta"]["dmir_inst"], + item["cost"]["delta"]["select_depth"], + item["cost"]["delta"]["adc_chain"], + item["lhs"], + item["rhs"], + ) + ) + + return { + "summary": { + "term_count": len(terms), + "sample_count": len(envs), + "candidate_count": len(raw_candidates), + "curated_candidate_count": len(curated_candidates), + "covered_candidate_count": len(covered_candidates), + "novel_candidate_count": len(novel_candidates), + "config_supplied": config_path is not None, + }, + "candidates": [ + serialize_candidate( + candidate["lhs_expr"], candidate["rhs_expr"], candidate["cost"] + ) + for candidate in raw_candidates + ], + "curated_candidates": curated_candidates, + "covered_candidates": covered_candidates, + "novel_candidates": novel_candidates, + } + + +def parse_args(): + parser = argparse.ArgumentParser( + description="Bootstrap offline dMIR rewrite mining with a seed search space." + ) + parser.add_argument( + "--out", + help="Optional output path. Defaults to stdout when omitted.", + ) + parser.add_argument( + "--rules", + help="Optional rule file used to mark already-covered candidates.", + ) + parser.add_argument( + "--config", + help="Optional search-space config file.", + ) + return parser.parse_args() + + +def main(): + args = parse_args() + result = build_candidates(args.rules, args.config) + output = json.dumps(result, indent=2) + "\n" + if args.out: + pathlib.Path(args.out).write_text(output, encoding="utf-8") + else: + print(output, end="") + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/tools/report_dmir_rewrite_rules.py b/tools/report_dmir_rewrite_rules.py new file mode 100644 index 000000000..c6ca272c3 --- /dev/null +++ b/tools/report_dmir_rewrite_rules.py @@ -0,0 +1,86 @@ +#!/usr/bin/env python3 + +import argparse +import json +import pathlib +from collections import Counter + +from check_dmir_rewrite_rules import load_gtest_names, load_rules + + +def parse_args(): + parser = argparse.ArgumentParser( + description="Generate a validation coverage report for dMIR rewrite rules." + ) + parser.add_argument("--rules", required=True, help="Path to the rule JSON file") + parser.add_argument( + "--gtest-binary", + help="Optional gtest binary used to mark coverage entries as present", + ) + parser.add_argument( + "--out", + help="Optional output path. Defaults to stdout when omitted.", + ) + return parser.parse_args() + + +def build_rule_entry(rule, gtest_names): + validation = rule.get("validation", {}) + coverage_entries = [] + all_present = True + for name in validation.get("coverage", []): + present = gtest_names is None or name in gtest_names + coverage_entries.append({"name": name, "present": present}) + all_present = all_present and present + + return { + "name": rule.get("name"), + "status": rule.get("status"), + "inputs": list(rule.get("inputs", [])), + "modes": list(validation.get("modes", [])), + "cost_delta": dict(rule.get("cost", {}).get("delta", {})), + "coverage": coverage_entries, + "coverage_complete": all_present, + } + + +def main(): + args = parse_args() + data = load_rules(args.rules) + gtest_names = load_gtest_names(args.gtest_binary) if args.gtest_binary else None + + status_counts = Counter() + mode_counts = Counter() + rule_entries = [] + missing_coverage_count = 0 + + for rule in data.get("rules", []): + status_counts[rule.get("status", "")] += 1 + for mode in rule.get("validation", {}).get("modes", []): + mode_counts[mode] += 1 + + entry = build_rule_entry(rule, gtest_names) + if not entry["coverage_complete"]: + missing_coverage_count += 1 + rule_entries.append(entry) + + report = { + "summary": { + "rule_count": len(rule_entries), + "status_counts": dict(sorted(status_counts.items())), + "mode_counts": dict(sorted(mode_counts.items())), + "rules_with_missing_coverage": missing_coverage_count, + }, + "rules": rule_entries, + } + + output = json.dumps(report, indent=2) + "\n" + if args.out: + pathlib.Path(args.out).write_text(output, encoding="utf-8") + else: + print(output, end="") + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/tools/report_x86_cg_peephole_validation.py b/tools/report_x86_cg_peephole_validation.py new file mode 100644 index 000000000..50c40c889 --- /dev/null +++ b/tools/report_x86_cg_peephole_validation.py @@ -0,0 +1,85 @@ +#!/usr/bin/env python3 + +import argparse +import json +import pathlib +from collections import Counter + +from check_x86_cg_peephole_validation import load_gtest_names, load_rules + + +def parse_args(): + parser = argparse.ArgumentParser( + description="Generate a validation coverage report for x86 peephole rules." + ) + parser.add_argument("--rules", required=True, help="Path to the rule JSON file") + parser.add_argument( + "--gtest-binary", + help="Optional gtest binary used to mark coverage entries as present", + ) + parser.add_argument( + "--out", + help="Optional output path. Defaults to stdout when omitted.", + ) + return parser.parse_args() + + +def build_rule_entry(rule, gtest_names): + validation = rule.get("validation", {}) + coverage_entries = [] + all_present = True + for name in validation.get("coverage", []): + present = gtest_names is None or name in gtest_names + coverage_entries.append({"name": name, "present": present}) + all_present = all_present and present + + return { + "name": rule.get("name"), + "stage": rule.get("stage"), + "priority": rule.get("priority"), + "modes": list(validation.get("modes", [])), + "coverage": coverage_entries, + "coverage_complete": all_present, + } + + +def main(): + args = parse_args() + data = load_rules(args.rules) + gtest_names = load_gtest_names(args.gtest_binary) if args.gtest_binary else None + + stage_counts = Counter() + mode_counts = Counter() + rule_entries = [] + missing_coverage_count = 0 + + for rule in data.get("rules", []): + stage_counts[rule.get("stage", "")] += 1 + for mode in rule.get("validation", {}).get("modes", []): + mode_counts[mode] += 1 + + entry = build_rule_entry(rule, gtest_names) + if not entry["coverage_complete"]: + missing_coverage_count += 1 + rule_entries.append(entry) + + report = { + "summary": { + "rule_count": len(rule_entries), + "stage_counts": dict(sorted(stage_counts.items())), + "mode_counts": dict(sorted(mode_counts.items())), + "rules_with_missing_coverage": missing_coverage_count, + }, + "rules": rule_entries, + } + + output = json.dumps(report, indent=2) + "\n" + if args.out: + pathlib.Path(args.out).write_text(output, encoding="utf-8") + else: + print(output, end="") + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/tools/synthesize_dmir_rules.py b/tools/synthesize_dmir_rules.py new file mode 100644 index 000000000..0edf85526 --- /dev/null +++ b/tools/synthesize_dmir_rules.py @@ -0,0 +1,691 @@ +#!/usr/bin/env python3 +"""Automated dMIR rewrite rule synthesis via enumeration + Z3 verification.""" + +import argparse +import json +import pathlib +import sys +import time + +import z3 + +from mine_dmir_seed_rules import ( + COMMUTATIVE_OPS, + MASK64, + Expr, + binary, + build_candidate_key, + build_rule_key_set, + build_sample_envs, + canonicalize_pair, + const, + cost_delta, + dominates, + eval_expr, + expr_cost, + is_candidate_covered, + load_rule_patterns, + unary, + var, + wrap_u64, +) + +# --------------------------------------------------------------------------- +# Expression enumeration +# --------------------------------------------------------------------------- + +BINARY_OPS = ["add", "sub", "mul", "and", "or", "xor"] +SHIFT_OPS = ["shl", "ushr", "sshr"] +SHIFT_AMOUNTS = [1, 2, 3, 4, 8, 16, 32, 63] +CONSTANTS = [0, 1, MASK64] +VAR_NAMES_2 = ["x", "y"] + + +def _expr_sort_key(e: Expr) -> str: + return e.render() + + +class ExprBank: + """Stores expressions indexed by depth, with deduplication by eval signature.""" + + def __init__(self, envs: list[dict[str, int]]): + self.envs = envs + self.by_depth: dict[int, list[Expr]] = {} + self.seen_sigs: set[tuple[int, ...]] = set() + self.sig_to_exprs: dict[tuple[int, ...], list[Expr]] = {} + self.total_added = 0 + self.total_deduped = 0 + + def signature(self, expr: Expr) -> tuple[int, ...]: + return tuple(eval_expr(expr, env) for env in self.envs) + + def add(self, expr: Expr, depth: int) -> bool: + sig = self.signature(expr) + self.by_depth.setdefault(depth, []) + if sig in self.seen_sigs: + self.total_deduped += 1 + existing = self.sig_to_exprs[sig] + ec = expr_cost(expr)["dmir_inst"] + best_ec = min(expr_cost(e)["dmir_inst"] for e in existing) + if ec < best_ec: + existing.append(expr) + self.by_depth[depth].append(expr) + self.total_added += 1 + return False + self.seen_sigs.add(sig) + self.sig_to_exprs.setdefault(sig, []).append(expr) + self.by_depth[depth].append(expr) + self.total_added += 1 + return True + + def all_up_to(self, depth: int) -> list[Expr]: + result = [] + for d in range(depth + 1): + result.extend(self.by_depth.get(d, [])) + return result + + +def enumerate_expressions( + max_depth: int, num_vars: int, envs: list[dict[str, int]], max_cost: int = 6, + verbose: bool = False, +) -> ExprBank: + bank = ExprBank(envs) + var_names = VAR_NAMES_2[:num_vars] + + # Depth 0: leaves + for name in var_names: + bank.add(var(name), 0) + for c in CONSTANTS: + bank.add(const(c), 0) + if verbose: + _log(f"depth 0: {len(bank.by_depth.get(0, []))} terms") + + for depth in range(1, max_depth + 1): + prev_all = bank.all_up_to(depth - 1) + prev_exact = bank.by_depth.get(depth - 1, []) + prev_exact_set = set(id(e) for e in prev_exact) + + # For depth >= 3, limit the RHS pool to depth 0-1 to avoid O(n^2) on + # large depth-2 sets. This still discovers (depth2 op leaf) patterns. + if depth >= 3: + shallow = bank.all_up_to(1) + else: + shallow = None # use prev_all + + def is_new_depth(e: Expr) -> bool: + return id(e) in prev_exact_set + + # Unary: not + for e in prev_exact: + candidate = unary("not", e) + if expr_cost(candidate)["dmir_inst"] <= max_cost: + bank.add(candidate, depth) + + # Binary ops + rhs_pool = shallow if shallow is not None else prev_all + for op in BINARY_OPS: + is_comm = op in COMMUTATIVE_OPS + # new_depth × rhs_pool + for lhs_e in prev_exact: + for rhs_e in rhs_pool: + if is_comm and _expr_sort_key(lhs_e) > _expr_sort_key(rhs_e): + continue + candidate = binary(op, lhs_e, rhs_e) + if expr_cost(candidate)["dmir_inst"] <= max_cost: + bank.add(candidate, depth) + # lhs_pool × new_depth (non-commutative, or commutative with swapped order) + for lhs_e in rhs_pool: + for rhs_e in prev_exact: + if is_new_depth(lhs_e) and is_new_depth(rhs_e): + continue # already covered above + if is_comm and _expr_sort_key(lhs_e) > _expr_sort_key(rhs_e): + continue + candidate = binary(op, lhs_e, rhs_e) + if expr_cost(candidate)["dmir_inst"] <= max_cost: + bank.add(candidate, depth) + + # Shifts with constant amounts + for op in SHIFT_OPS: + for e in prev_exact: + for amt in SHIFT_AMOUNTS: + candidate = binary(op, e, const(amt)) + if expr_cost(candidate)["dmir_inst"] <= max_cost: + bank.add(candidate, depth) + + if verbose: + d_count = len(bank.by_depth.get(depth, [])) + _log(f"depth {depth}: +{d_count} terms (total {bank.total_added}, " + f"deduped {bank.total_deduped})") + + return bank + + +# --------------------------------------------------------------------------- +# Z3 verification +# --------------------------------------------------------------------------- + +def expr_to_z3(expr: Expr, z3_vars: dict[str, z3.BitVecRef]) -> z3.BitVecRef: + if expr.op == "var": + return z3_vars[str(expr.value)] + if expr.op == "const": + return z3.BitVecVal(int(expr.value), 64) + if expr.op == "not": + return ~expr_to_z3(expr.args[0], z3_vars) + + lhs_z3 = expr_to_z3(expr.args[0], z3_vars) + rhs_z3 = expr_to_z3(expr.args[1], z3_vars) + + op = expr.op + if op == "add": + return lhs_z3 + rhs_z3 + if op == "sub": + return lhs_z3 - rhs_z3 + if op == "mul": + return lhs_z3 * rhs_z3 + if op == "and": + return lhs_z3 & rhs_z3 + if op == "or": + return lhs_z3 | rhs_z3 + if op == "xor": + return lhs_z3 ^ rhs_z3 + if op == "shl": + return z3.If(z3.UGE(rhs_z3, z3.BitVecVal(64, 64)), + z3.BitVecVal(0, 64), lhs_z3 << rhs_z3) + if op == "ushr": + return z3.If(z3.UGE(rhs_z3, z3.BitVecVal(64, 64)), + z3.BitVecVal(0, 64), z3.LShR(lhs_z3, rhs_z3)) + if op == "sshr": + return z3.If(z3.UGE(rhs_z3, z3.BitVecVal(64, 64)), + lhs_z3 >> z3.BitVecVal(63, 64), lhs_z3 >> rhs_z3) + # Ternary carry-chain ops + if expr.op in ("adc", "sbb") and len(expr.args) == 3: + carry_z3 = expr_to_z3(expr.args[2], z3_vars) + if expr.op == "adc": + return lhs_z3 + rhs_z3 + carry_z3 + return lhs_z3 - rhs_z3 - carry_z3 + + raise ValueError(f"unsupported op: {op}") + + +def verify_equivalence( + lhs: Expr, rhs: Expr, var_names: list[str], timeout_ms: int = 5000, +) -> tuple[bool, str]: + z3_vars = {name: z3.BitVec(name, 64) for name in var_names} + try: + lhs_z3 = expr_to_z3(lhs, z3_vars) + rhs_z3 = expr_to_z3(rhs, z3_vars) + except (ValueError, KeyError) as e: + return False, f"encode_error: {e}" + + solver = z3.Solver() + solver.set("timeout", timeout_ms) + solver.add(lhs_z3 != rhs_z3) + + result = solver.check() + if result == z3.unsat: + return True, "valid" + if result == z3.sat: + return False, "invalid" + return False, "timeout" + + +# --------------------------------------------------------------------------- +# Carry-chain synthesis (Phase 3) +# --------------------------------------------------------------------------- + +def _carry_out_z3(a: z3.BitVecRef, b: z3.BitVecRef, + cf: z3.BitVecRef) -> z3.BitVecRef: + """Compute carry-out of a + b + cf using 65-bit arithmetic.""" + wide_a = z3.ZeroExt(1, a) + wide_b = z3.ZeroExt(1, b) + wide_cf = z3.ZeroExt(1, cf) + wide_sum = wide_a + wide_b + wide_cf + return z3.Extract(64, 64, wide_sum) # bit 64 = carry out + + +def _borrow_out_z3(a: z3.BitVecRef, b: z3.BitVecRef, + bf: z3.BitVecRef) -> z3.BitVecRef: + """Compute borrow-out of a - b - bf using 65-bit arithmetic.""" + wide_a = z3.ZeroExt(1, a) + wide_b = z3.ZeroExt(1, b) + wide_bf = z3.ZeroExt(1, bf) + wide_diff = wide_a - wide_b - wide_bf + return z3.Extract(64, 64, wide_diff) # bit 64 = borrow out + + +def verify_carry_rule( + lhs: Expr, rhs: Expr, var_names: list[str], + carry_mode: str = "carry_zero", timeout_ms: int = 10000, +) -> tuple[bool, str]: + """ + Verify equivalence of a carry-chain rule under carry constraints. + + carry_mode: + - "carry_zero": cf_in is 0 (safe at chain head or after non-carrying op) + - "carry_any": cf_in is unconstrained {0, 1} (universally valid) + - "result_and_carry": both result AND carry_out must match + """ + z3_vars = {name: z3.BitVec(name, 64) for name in var_names if name != "cf"} + + cf_bit = z3.BitVec("cf_bit", 1) + if carry_mode == "carry_zero": + cf_64 = z3.BitVecVal(0, 64) + else: + cf_64 = z3.ZeroExt(63, cf_bit) + z3_vars["cf"] = cf_64 + + try: + lhs_z3 = expr_to_z3(lhs, z3_vars) + rhs_z3 = expr_to_z3(rhs, z3_vars) + except (ValueError, KeyError) as e: + return False, f"encode_error: {e}" + + solver = z3.Solver() + solver.set("timeout", timeout_ms) + + if carry_mode == "carry_any": + solver.add(z3.Or(cf_bit == z3.BitVecVal(0, 1), + cf_bit == z3.BitVecVal(1, 1))) + + if carry_mode == "result_and_carry": + # Also verify carry-out matches (for chain-interior rules) + solver.add(z3.Or(cf_bit == z3.BitVecVal(0, 1), + cf_bit == z3.BitVecVal(1, 1))) + # Extract operands from LHS to compute carry_out + # This is for rules like adc(x,y,cf) where we need carry to also match + if lhs.op == "adc" and rhs.op == "adc": + lhs_a = expr_to_z3(lhs.args[0], z3_vars) + lhs_b = expr_to_z3(lhs.args[1], z3_vars) + lhs_cf = expr_to_z3(lhs.args[2], z3_vars) + rhs_a = expr_to_z3(rhs.args[0], z3_vars) + rhs_b = expr_to_z3(rhs.args[1], z3_vars) + rhs_cf = expr_to_z3(rhs.args[2], z3_vars) + lhs_cout = _carry_out_z3(lhs_a, lhs_b, lhs_cf) + rhs_cout = _carry_out_z3(rhs_a, rhs_b, rhs_cf) + solver.add(z3.Or(lhs_z3 != rhs_z3, lhs_cout != rhs_cout)) + result = solver.check() + if result == z3.unsat: + return True, "valid_with_carry" + if result == z3.sat: + return False, "invalid_carry_mismatch" + return False, "timeout" + + solver.add(lhs_z3 != rhs_z3) + result = solver.check() + if result == z3.unsat: + return True, "valid" + if result == z3.sat: + return False, "invalid" + return False, "timeout" + + +def synthesize_carry_rules(verbose: bool = True) -> list[dict]: + """ + Synthesize ADC/SBB rewrite rules with carry-chain safety proofs. + Tests each candidate under three modes: + 1. carry_any: universally valid (safe everywhere) + 2. carry_zero: valid when cf_in = 0 (needs precondition) + 3. neither: UNSAFE (the rule we incorrectly implemented before) + """ + from mine_dmir_seed_rules import ternary + + results = [] + + # Build candidate ADC/SBB rules + candidates = [] + var_x = var("x") + var_y = var("y") + cf = var("cf") + zero = const(0) + one = const(1) + + # ADC candidates: adc(x, y, cf) vs simpler forms + adc_forms = [ + (ternary("adc", var_x, var_y, cf), "adc(x, y, cf)"), + (ternary("adc", var_x, zero, cf), "adc(x, 0, cf)"), + (ternary("adc", zero, var_y, cf), "adc(0, y, cf)"), + (ternary("adc", var_x, var_x, cf), "adc(x, x, cf)"), + (ternary("adc", zero, zero, cf), "adc(0, 0, cf)"), + ] + + simpler_forms = [ + (binary("add", var_x, var_y), "add(x, y)"), + (var_x, "x"), + (var_y, "y"), + (zero, "0"), + (binary("add", var_x, cf), "add(x, cf)"), + (binary("add", var_y, cf), "add(y, cf)"), + (cf, "cf"), + (binary("shl", var_x, one), "shl(x, 1)"), + (binary("add", binary("add", var_x, var_x), cf), "add(add(x,x), cf)"), + (binary("add", binary("add", var_x, var_y), cf), "add(add(x,y), cf)"), + ] + + for adc_expr, adc_name in adc_forms: + for simple_expr, simple_name in simpler_forms: + candidates.append({ + "lhs": adc_expr, + "rhs": simple_expr, + "lhs_name": adc_name, + "rhs_name": simple_name, + "op": "adc", + }) + + # SBB candidates: sbb(x, y, cf) vs simpler forms + sbb_forms = [ + (ternary("sbb", var_x, var_y, cf), "sbb(x, y, cf)"), + (ternary("sbb", var_x, zero, cf), "sbb(x, 0, cf)"), + (ternary("sbb", zero, var_y, cf), "sbb(0, y, cf)"), + (ternary("sbb", var_x, var_x, cf), "sbb(x, x, cf)"), + ] + + sbb_simpler = [ + (binary("sub", var_x, var_y), "sub(x, y)"), + (var_x, "x"), + (binary("sub", zero, var_y), "sub(0, y)"), + (zero, "0"), + (binary("sub", var_x, cf), "sub(x, cf)"), + (binary("sub", zero, cf), "sub(0, cf)"), + (binary("sub", binary("sub", var_x, var_y), cf), "sub(sub(x,y), cf)"), + ] + + for sbb_expr, sbb_name in sbb_forms: + for simple_expr, simple_name in sbb_simpler: + candidates.append({ + "lhs": sbb_expr, + "rhs": simple_expr, + "lhs_name": sbb_name, + "rhs_name": simple_name, + "op": "sbb", + }) + + if verbose: + _log(f"carry-chain candidates: {len(candidates)}") + + # Test each candidate under different carry modes + for c in candidates: + lhs_e, rhs_e = c["lhs"], c["rhs"] + var_names = sorted(extract_var_names(lhs_e) | extract_var_names(rhs_e)) + + # Mode 1: universally valid (cf ∈ {0,1}) + valid_any, status_any = verify_carry_rule( + lhs_e, rhs_e, var_names, "carry_any") + + # Mode 2: valid when cf = 0 + valid_zero, status_zero = verify_carry_rule( + lhs_e, rhs_e, var_names, "carry_zero") + + if valid_any or valid_zero: + safety = "universal" if valid_any else "carry_zero_only" + results.append({ + "lhs": lhs_e.render(), + "rhs": rhs_e.render(), + "lhs_desc": c["lhs_name"], + "rhs_desc": c["rhs_name"], + "op": c["op"], + "safety": safety, + "z3_any": status_any, + "z3_zero": status_zero, + }) + if verbose: + _log(f" ✓ {c['lhs_name']} → {c['rhs_name']} [{safety}]") + + if verbose: + n_univ = sum(1 for r in results if r["safety"] == "universal") + n_zero = sum(1 for r in results if r["safety"] == "carry_zero_only") + _log(f"carry rules found: {len(results)} " + f"({n_univ} universal, {n_zero} carry_zero_only)") + + return results + + +# --------------------------------------------------------------------------- +# Candidate extraction and filtering +# --------------------------------------------------------------------------- + +def extract_var_names(expr: Expr) -> set[str]: + if expr.op == "var": + return {str(expr.value)} + result: set[str] = set() + for a in expr.args: + result |= extract_var_names(a) + return result + + +def extract_candidates(bank: ExprBank) -> list[dict]: + candidates = [] + for sig, exprs in bank.sig_to_exprs.items(): + if len(exprs) < 2: + continue + sorted_exprs = sorted( + exprs, + key=lambda e: ( + expr_cost(e)["dmir_inst"], + expr_cost(e).get("select_depth", 0), + expr_cost(e).get("adc_chain", 0), + e.render(), + ), + ) + best = sorted_exprs[0] + best_cost = expr_cost(best) + for other in sorted_exprs[1:]: + other_cost = expr_cost(other) + if dominates(best_cost, other_cost): + candidates.append( + { + "lhs_expr": other, + "rhs_expr": best, + "lhs": other.render(), + "rhs": best.render(), + "cost": { + "lhs": other_cost, + "rhs": best_cost, + "delta": cost_delta(other_cost, best_cost), + }, + } + ) + return candidates + + +def filter_novel( + candidates: list[dict], + rule_patterns: list[tuple[Expr, Expr]], + rule_keys: set[tuple[str, str]], +) -> list[dict]: + novel = [] + for c in candidates: + lhs_e, rhs_e = c["lhs_expr"], c["rhs_expr"] + cl, cr = canonicalize_pair(lhs_e, rhs_e) + key = build_candidate_key(cl, cr) + if key in rule_keys: + continue + if is_candidate_covered(lhs_e, rhs_e, rule_patterns, rule_keys): + continue + novel.append(c) + return novel + + +def auto_name(lhs: Expr, rhs: Expr, index: int) -> str: + ops = set() + + def collect(e: Expr): + if e.op not in ("var", "const"): + ops.add(e.op) + for a in e.args: + collect(a) + + collect(lhs) + collect(rhs) + tag = "-".join(sorted(ops)[:3]) if ops else "identity" + return f"synth-{tag}-{index:03d}" + + +# --------------------------------------------------------------------------- +# Main pipeline +# --------------------------------------------------------------------------- + +def _log(msg: str): + sys.stderr.write(f"[synth] {msg}\n") + sys.stderr.flush() + + +def run_synthesis(args) -> dict: + t0 = time.time() + + # Step 1: Build test vectors + envs = build_sample_envs() + _log(f"sample envs: {len(envs)}") + + # Step 2: Enumerate + _log(f"enumerating expressions (depth={args.max_depth}, vars={args.num_vars}, " + f"max_cost={args.max_cost})...") + bank = enumerate_expressions( + max_depth=args.max_depth, + num_vars=args.num_vars, + envs=envs, + max_cost=args.max_cost, + verbose=True, + ) + _log(f"expression bank: {bank.total_added} terms, " + f"{len(bank.sig_to_exprs)} unique signatures") + + # Step 3: Extract candidates + raw = extract_candidates(bank) + _log(f"raw candidates: {len(raw)}") + + # Step 4: Filter against existing rules + rule_patterns = load_rule_patterns(args.rules) if args.rules else [] + rule_keys = build_rule_key_set(rule_patterns) + novel = filter_novel(raw, rule_patterns, rule_keys) + _log(f"novel candidates (not in existing rules): {len(novel)}") + + # Step 5: Z3 verification + verified = [] + rejected = [] + if not args.no_z3 and novel: + _log(f"verifying {len(novel)} candidates with Z3 (timeout={args.z3_timeout}ms)...") + for i, c in enumerate(novel): + var_names = sorted(extract_var_names(c["lhs_expr"]) | + extract_var_names(c["rhs_expr"])) + is_valid, status = verify_equivalence( + c["lhs_expr"], c["rhs_expr"], var_names, args.z3_timeout, + ) + if is_valid: + verified.append(c) + else: + c["z3_status"] = status + rejected.append(c) + if (i + 1) % 50 == 0: + _log(f" verified {i + 1}/{len(novel)} " + f"(valid={len(verified)}, rejected={len(rejected)})") + _log(f"Z3 done: {len(verified)} valid, {len(rejected)} rejected") + elif args.no_z3: + verified = novel + _log("Z3 skipped (--no-z3)") + + # Step 6: Deduplicate by canonical key + seen_keys: set[tuple[str, str]] = set() + deduped = [] + for c in verified: + cl, cr = canonicalize_pair(c["lhs_expr"], c["rhs_expr"]) + key = build_candidate_key(cl, cr) + if key not in seen_keys: + seen_keys.add(key) + deduped.append(c) + _log(f"after dedup: {len(deduped)} rules") + + # Step 7: Sort by cost delta + deduped.sort( + key=lambda c: ( + c["cost"]["delta"].get("runtime_calls", 0), + c["cost"]["delta"]["dmir_inst"], + ) + ) + + # Step 8: Assign names and format + rules_out = [] + for i, c in enumerate(deduped): + name = auto_name(c["lhs_expr"], c["rhs_expr"], i) + rules_out.append( + { + "name": name, + "status": "synthesized", + "inputs": sorted( + extract_var_names(c["lhs_expr"]) | extract_var_names(c["rhs_expr"]) + ), + "lhs": c["lhs"], + "rhs": c["rhs"], + "cost": c["cost"], + "validation": { + "modes": ["smt"] if not args.no_z3 else ["interpreter_sample"], + "coverage": [], + }, + } + ) + + elapsed = time.time() - t0 + report = { + "summary": { + "term_count": bank.total_added, + "unique_signatures": len(bank.sig_to_exprs), + "raw_candidate_count": len(raw), + "novel_count": len(novel), + "z3_verified": len(verified), + "z3_rejected": len(rejected), + "final_rule_count": len(rules_out), + "max_depth": args.max_depth, + "num_vars": args.num_vars, + "elapsed_seconds": round(elapsed, 2), + }, + "rules": rules_out, + "rejected": [ + {"lhs": r["lhs"], "rhs": r["rhs"], "z3_status": r.get("z3_status", "?")} + for r in rejected + ], + } + return report + + +def parse_args(): + p = argparse.ArgumentParser(description="Synthesize dMIR rewrite rules") + p.add_argument("--max-depth", type=int, default=3) + p.add_argument("--num-vars", type=int, default=2) + p.add_argument("--max-cost", type=int, default=6) + p.add_argument("--rules", type=str, default=None, + help="Existing rules JSON to filter against") + p.add_argument("--out", type=str, default=None, + help="Output report path (default: stdout)") + p.add_argument("--no-z3", action="store_true", + help="Skip Z3 verification (sampling only)") + p.add_argument("--z3-timeout", type=int, default=5000, + help="Z3 timeout per query in ms") + p.add_argument("--include-carry", action="store_true", + help="Run carry-chain ADC/SBB synthesis (Phase 3)") + return p.parse_args() + + +def main(): + args = parse_args() + + if args.include_carry: + carry_rules = synthesize_carry_rules(verbose=True) + report = {"carry_rules": carry_rules} + output = json.dumps(report, indent=2) + if args.out: + pathlib.Path(args.out).write_text(output, encoding="utf-8") + _log(f"carry report written to {args.out}") + else: + print(output) + return + + report = run_synthesis(args) + output = json.dumps(report, indent=2) + if args.out: + pathlib.Path(args.out).write_text(output, encoding="utf-8") + _log(f"report written to {args.out}") + else: + print(output) + + +if __name__ == "__main__": + main() diff --git a/tools/test_check_compiler_pass_timing_budget.py b/tools/test_check_compiler_pass_timing_budget.py new file mode 100644 index 000000000..c2d91198c --- /dev/null +++ b/tools/test_check_compiler_pass_timing_budget.py @@ -0,0 +1,196 @@ +#!/usr/bin/env python3 +# Copyright (C) 2025 the DTVM authors. All Rights Reserved. +# SPDX-License-Identifier: Apache-2.0 +"""Test wrapper for check_compiler_pass_timing_budget.py. + +Called by CMakeLists.txt as: + test_check_compiler_pass_timing_budget.py + +Verifies the budget-checker tool works correctly by building a synthetic timing +report that satisfies both committed budget files and running the checker against +each one. No dtvm binary is needed. +""" + +import json +import pathlib +import subprocess +import sys +import tempfile + +BUDGET_FILES = [ + "tests/evm_asm/compiler_pass_timing_budget_x86_cg_peephole.json", + "tests/evm_asm/compiler_pass_timing_budget_dmir_rewrite.json", +] + +# Case names that appear in both budget baselines +CASE_NAMES = [ + "add", + "mul", + "div", + "shl", + "shr", + "sar", + "byte", + "eq_true", + "lt_true", + "jump", + "u256_shl_add_mul", + "u256_mul_add_chain", + "u256_shr_add_shl", + "bool_and_or_xor_not", + "bool_xor_not_chain", +] + +def make_phase_stats(time_ms, share_pct): + """Return a phase stats dict well within any reasonable budget.""" + return { + "mean": time_ms, + "median": time_ms, + "p95": time_ms, + "min": time_ms, + "max": time_ms, + "share_of_total_pct": { + "mean": share_pct, + "median": share_pct, + "p95": share_pct, + "min": share_pct, + "max": share_pct, + }, + } + +def make_case_summary(total_time_ms, pass_name, pass_time_ms, pass_share_pct): + return { + "total_time_ms": {"mean": total_time_ms, "median": total_time_ms}, + "phases": { + pass_name: make_phase_stats(pass_time_ms, pass_share_pct), + }, + "runs": 1, + "record_count": 1, + } + +def build_synthetic_report(pass_name, total_time_ms, pass_time_ms, pass_share_pct): + """Build a manifest-style timing report that stays inside the budget.""" + cases = [] + for name in CASE_NAMES: + cases.append( + { + "name": name, + "input": f"/synthetic/{name}.evm.hex", + "summary": make_case_summary( + total_time_ms, pass_name, pass_time_ms, pass_share_pct + ), + } + ) + + overall_summary = make_case_summary( + total_time_ms, pass_name, pass_time_ms, pass_share_pct + ) + overall_summary["runs"] = 1 + overall_summary["record_count"] = len(CASE_NAMES) + + return { + "manifest": "/synthetic/manifest.json", + "case_count": len(CASE_NAMES), + "cases": cases, + "overall": overall_summary, + } + +def run_checker(checker, budget_path, report_path): + cmd = [ + sys.executable, + str(checker), + "--budget", + str(budget_path), + "--report", + str(report_path), + "--allow-missing-cases", + ] + return subprocess.run(cmd, capture_output=True, text=True, check=False) + +def main(): + if len(sys.argv) != 2: + print( + "usage: test_check_compiler_pass_timing_budget.py ", + file=sys.stderr, + ) + return 1 + + source_dir = pathlib.Path(sys.argv[1]).resolve() + checker = source_dir / "tools" / "check_compiler_pass_timing_budget.py" + + if not checker.exists(): + print(f"checker not found: {checker}", file=sys.stderr) + return 1 + + failures = [] + + with tempfile.TemporaryDirectory() as tmp_dir: + tmp = pathlib.Path(tmp_dir) + + for rel_budget in BUDGET_FILES: + budget_path = source_dir / rel_budget + if not budget_path.exists(): + print(f"budget file not found: {budget_path}", file=sys.stderr) + return 1 + + budget = json.loads(budget_path.read_text(encoding="utf-8")) + target_pass = budget["target_pass"] + thresholds = budget["thresholds"] + baseline_overall = budget["baseline"]["overall_total_time_ms_median"] + + # Choose values well inside all thresholds: + # - pass share p95 = 0.1 % (budget typically 1.2–2.0 %) + # - pass time p95 = 0.001 ms (budget 0.01–0.06 ms) + # - total time = baseline (0 % regression) + report = build_synthetic_report( + pass_name=target_pass, + total_time_ms=baseline_overall, + pass_time_ms=0.001, + pass_share_pct=0.1, + ) + + report_path = tmp / f"report_{pathlib.Path(rel_budget).stem}.json" + report_path.write_text(json.dumps(report, indent=2), encoding="utf-8") + + result = run_checker(checker, budget_path, report_path) + tag = pathlib.Path(rel_budget).stem + + if result.returncode != 0: + failures.append( + f"checker failed for {tag} (exit {result.returncode}):\n" + f"{result.stderr.strip()}" + ) + continue + + # Also verify that a clearly over-budget report is rejected + bad_report = build_synthetic_report( + pass_name=target_pass, + total_time_ms=baseline_overall, + pass_time_ms=999.0, # massively over time budget + pass_share_pct=99.0, # massively over share budget + ) + bad_report_path = tmp / f"bad_report_{pathlib.Path(rel_budget).stem}.json" + bad_report_path.write_text( + json.dumps(bad_report, indent=2), encoding="utf-8" + ) + + bad_result = run_checker(checker, budget_path, bad_report_path) + if bad_result.returncode == 0: + failures.append( + f"checker INCORRECTLY passed an over-budget report for {tag}" + ) + + if failures: + for msg in failures: + print(msg, file=sys.stderr) + print( + "FAIL: test_check_compiler_pass_timing_budget", + file=sys.stderr, + ) + return 1 + + print("PASS: test_check_compiler_pass_timing_budget") + return 0 + +if __name__ == "__main__": + sys.exit(main()) diff --git a/tools/test_check_dmir_rewrite_rules.py b/tools/test_check_dmir_rewrite_rules.py new file mode 100644 index 000000000..ef4581d1c --- /dev/null +++ b/tools/test_check_dmir_rewrite_rules.py @@ -0,0 +1,148 @@ +#!/usr/bin/env python3 +# Copyright (C) 2025 the DTVM authors. All Rights Reserved. +# SPDX-License-Identifier: Apache-2.0 + +import copy +import json +import pathlib +import subprocess +import sys +import tempfile + +VALID_RULE_TEMPLATE = { + "name": "test-add-zero", + "status": "accepted", + "inputs": ["x"], + "lhs": "(add x 0:i64)", + "rhs": "x", + "cost": { + "lhs": {"dmir_inst": 1, "select_depth": 0, "adc_chain": 0, "runtime_calls": 0}, + "rhs": {"dmir_inst": 0, "select_depth": 0, "adc_chain": 0, "runtime_calls": 0}, + "delta": {"dmir_inst": -1, "select_depth": 0, "adc_chain": 0, "runtime_calls": 0}, + }, + "validation": { + "modes": ["interpreter_fuzz"], + "coverage": ["DMirValidation.FuzzesAddZeroRewrite"], + }, +} + + +def run_checker(source_dir, rules_path, gtest_binary=None): + script = pathlib.Path(source_dir) / "tools" / "check_dmir_rewrite_rules.py" + cmd = [sys.executable, str(script), "--rules", str(rules_path)] + if gtest_binary: + cmd += ["--gtest-binary", str(gtest_binary)] + return subprocess.run(cmd, capture_output=True, text=True) + + +def write_rules(path, rules): + path.write_text(json.dumps({"rules": rules}), encoding="utf-8") + + +def main(): + if len(sys.argv) not in (2, 3): + print(f"Usage: {sys.argv[0]} []", + file=sys.stderr) + return 1 + + source_dir = pathlib.Path(sys.argv[1]) + gtest_binary = pathlib.Path(sys.argv[2]) if len(sys.argv) == 3 else None + rules_path = source_dir / "src/compiler/mir/dmir_rewrite_rules.json" + + if not rules_path.exists(): + print(f"Rules file not found: {rules_path}", file=sys.stderr) + return 1 + + proc = run_checker(source_dir, rules_path, gtest_binary) + if proc.returncode != 0: + print("FAIL: checker failed on real dmir rules", file=sys.stderr) + print(proc.stderr, file=sys.stderr) + return 1 + if "dmir rewrite rule metadata is complete" not in proc.stdout: + print("FAIL: expected success message not found", file=sys.stderr) + return 1 + + if gtest_binary: + proc2 = run_checker(source_dir, rules_path, None) + if proc2.returncode != 0: + print("FAIL: checker failed on real dmir rules without binary", file=sys.stderr) + return 1 + + with tempfile.TemporaryDirectory() as tmpdir: + tmpdir = pathlib.Path(tmpdir) + + dup_path = tmpdir / "dup.json" + rule_a = copy.deepcopy(VALID_RULE_TEMPLATE) + rule_b = copy.deepcopy(VALID_RULE_TEMPLATE) + rule_b["lhs"] = "(add x 1:i64)" # different expression so only name duplicates + write_rules(dup_path, [rule_a, rule_b]) + proc3 = run_checker(source_dir, dup_path, None) + if proc3.returncode == 0: + print("FAIL: checker should fail on duplicate rule name", file=sys.stderr) + return 1 + if "duplicate" not in proc3.stderr: + print("FAIL: expected 'duplicate' in error output", file=sys.stderr) + return 1 + + bad_status = copy.deepcopy(VALID_RULE_TEMPLATE) + bad_status["name"] = "bad-status-rule" + bad_status["status"] = "unknown_status" + bad_path = tmpdir / "bad_status.json" + write_rules(bad_path, [bad_status]) + proc4 = run_checker(source_dir, bad_path, None) + if proc4.returncode == 0: + print("FAIL: checker should fail on invalid status", file=sys.stderr) + return 1 + if "invalid status" not in proc4.stderr: + print("FAIL: expected 'invalid status' in error output", file=sys.stderr) + return 1 + + rule_c = copy.deepcopy(VALID_RULE_TEMPLATE) + rule_c["name"] = "test-add-zero-commuted" + # (add 0:i64 x) normalizes to same canonical key as (add x 0:i64) due to commutativity + rule_c["lhs"] = "(add 0:i64 x)" + dup_canonical_path = tmpdir / "dup_canonical.json" + write_rules(dup_canonical_path, [VALID_RULE_TEMPLATE, rule_c]) + proc5 = run_checker(source_dir, dup_canonical_path, None) + if proc5.returncode == 0: + print("FAIL: checker should fail on duplicate canonical lhs/rhs", file=sys.stderr) + return 1 + if "duplicates canonical rewrite" not in proc5.stderr: + print("FAIL: expected 'duplicates canonical rewrite' in error output", file=sys.stderr) + return 1 + + no_semantic = copy.deepcopy(VALID_RULE_TEMPLATE) + no_semantic["name"] = "no-semantic-mode" + no_semantic["validation"]["modes"] = ["interpreter_sample"] + no_semantic_path = tmpdir / "no_semantic.json" + write_rules(no_semantic_path, [no_semantic]) + proc6 = run_checker(source_dir, no_semantic_path, None) + if proc6.returncode == 0: + print("FAIL: checker should fail on rule with no semantic mode", file=sys.stderr) + return 1 + if "interpreter_fuzz or smt" not in proc6.stderr: + print("FAIL: expected 'interpreter_fuzz or smt' in error output", file=sys.stderr) + return 1 + + if gtest_binary: + missing_cov = copy.deepcopy(VALID_RULE_TEMPLATE) + missing_cov["name"] = "missing-coverage-rule" + missing_cov["lhs"] = "(sub x 0:i64)" + missing_cov["cost"]["lhs"]["dmir_inst"] = 1 + missing_cov["validation"]["coverage"] = ["NonExistentSuite.NonExistentTest"] + missing_path = tmpdir / "missing_cov.json" + write_rules(missing_path, [missing_cov]) + proc7 = run_checker(source_dir, missing_path, gtest_binary) + if proc7.returncode == 0: + print("FAIL: checker should fail on missing gtest coverage entry", file=sys.stderr) + return 1 + if "missing gtest coverage" not in proc7.stderr: + print("FAIL: expected 'missing gtest coverage' in error output", file=sys.stderr) + return 1 + + print("PASS: test_check_dmir_rewrite_rules") + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/tools/test_collect_compiler_pass_timings.py b/tools/test_collect_compiler_pass_timings.py new file mode 100644 index 000000000..c618ae5e3 --- /dev/null +++ b/tools/test_collect_compiler_pass_timings.py @@ -0,0 +1,132 @@ +#!/usr/bin/env python3 +# Copyright (C) 2025 the DTVM authors. All Rights Reserved. +# SPDX-License-Identifier: Apache-2.0 +"""Test wrapper for collect_compiler_pass_timings.py. + +Called by CMakeLists.txt as: + test_collect_compiler_pass_timings.py +""" + +import json +import pathlib +import subprocess +import sys +import tempfile + + +def main(): + if len(sys.argv) != 3: + print( + "usage: test_collect_compiler_pass_timings.py ", + file=sys.stderr, + ) + return 1 + + source_dir = pathlib.Path(sys.argv[1]).resolve() + dtvm_binary = pathlib.Path(sys.argv[2]).resolve() + + collector = source_dir / "tools" / "collect_compiler_pass_timings.py" + manifest = source_dir / "tests" / "evm_asm" / "compiler_pass_timing_manifest.json" + + if not collector.exists(): + print(f"collector not found: {collector}", file=sys.stderr) + return 1 + if not manifest.exists(): + print(f"manifest not found: {manifest}", file=sys.stderr) + return 1 + if not dtvm_binary.exists(): + print(f"dtvm binary not found: {dtvm_binary}", file=sys.stderr) + return 1 + + with tempfile.TemporaryDirectory() as tmp_dir: + output_path = pathlib.Path(tmp_dir) / "timing_report.json" + + # Use --case to select only a single small case (add) for speed. + cmd = [ + sys.executable, + str(collector), + "--dtvm", + str(dtvm_binary), + "--manifest", + str(manifest), + "--runs", + "1", + "--case", + "add", + "--output", + str(output_path), + "--", + "--format", + "evm", + "--mode", + "multipass", + "--compile-only", + ] + + result = subprocess.run( + cmd, + capture_output=True, + text=True, + check=False, + ) + + if result.returncode != 0: + print(result.stderr, file=sys.stderr) + print( + f"FAIL: test_collect_compiler_pass_timings — collector exited with " + f"code {result.returncode}", + file=sys.stderr, + ) + return 1 + + if not output_path.exists(): + print( + "FAIL: test_collect_compiler_pass_timings — output JSON was not written", + file=sys.stderr, + ) + return 1 + + try: + report = json.loads(output_path.read_text(encoding="utf-8")) + except json.JSONDecodeError as exc: + print( + f"FAIL: test_collect_compiler_pass_timings — invalid JSON: {exc}", + file=sys.stderr, + ) + return 1 + + # Required top-level fields for a manifest run + for field in ("manifest", "case_count", "cases", "overall"): + if field not in report: + print( + f"FAIL: test_collect_compiler_pass_timings — missing field '{field}'", + file=sys.stderr, + ) + return 1 + + overall = report["overall"] + for field in ("runs", "record_count", "total_time_ms", "phases"): + if field not in overall: + print( + f"FAIL: test_collect_compiler_pass_timings — overall missing " + f"field '{field}'", + file=sys.stderr, + ) + return 1 + + total_time = overall["total_time_ms"] + for stat in ("mean", "median"): + if stat not in total_time: + print( + f"FAIL: test_collect_compiler_pass_timings — " + f"total_time_ms missing stat '{stat}'", + file=sys.stderr, + ) + return 1 + + print("PASS: test_collect_compiler_pass_timings") + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/tools/test_mine_dmir_bootstrap_config.py b/tools/test_mine_dmir_bootstrap_config.py new file mode 100644 index 000000000..1017588f7 --- /dev/null +++ b/tools/test_mine_dmir_bootstrap_config.py @@ -0,0 +1,88 @@ +#!/usr/bin/env python3 +# Copyright (C) 2025 the DTVM authors. All Rights Reserved. +# SPDX-License-Identifier: Apache-2.0 + +import json +import pathlib +import subprocess +import sys +import tempfile + + +def run_miner(source_dir, extra_args=()): + script = pathlib.Path(source_dir) / "tools" / "mine_dmir_seed_rules.py" + cmd = [sys.executable, str(script)] + list(extra_args) + return subprocess.run(cmd, capture_output=True, text=True) + + +def main(): + if len(sys.argv) != 2: + print(f"Usage: {sys.argv[0]} ", file=sys.stderr) + return 1 + + source_dir = pathlib.Path(sys.argv[1]) + bootstrap_config = source_dir / "src/compiler/mir/dmir_rewrite_mining_bootstrap.json" + + if not bootstrap_config.exists(): + print(f"Bootstrap config not found: {bootstrap_config}", file=sys.stderr) + return 1 + + with tempfile.TemporaryDirectory() as tmpdir: + tmpdir = pathlib.Path(tmpdir) + out_path = tmpdir / "bootstrap_candidates.json" + + proc = run_miner(source_dir, [ + "--config", str(bootstrap_config), + "--out", str(out_path), + ]) + if proc.returncode != 0: + print("FAIL: miner exited non-zero with bootstrap config", file=sys.stderr) + print(proc.stderr, file=sys.stderr) + return 1 + try: + result = json.loads(out_path.read_text(encoding="utf-8")) + except json.JSONDecodeError as exc: + print(f"FAIL: output is not valid JSON: {exc}", file=sys.stderr) + return 1 + + if result["summary"].get("config_supplied") is not True: + print("FAIL: config_supplied should be true when --config is used", file=sys.stderr) + return 1 + + for key in ("summary", "candidates", "curated_candidates", + "covered_candidates", "novel_candidates"): + if key not in result: + print(f"FAIL: output missing key '{key}'", file=sys.stderr) + return 1 + for key in ("term_count", "sample_count", "candidate_count", + "curated_candidate_count", "covered_candidate_count", + "novel_candidate_count", "config_supplied"): + if key not in result["summary"]: + print(f"FAIL: summary missing key '{key}'", file=sys.stderr) + return 1 + + default_out = tmpdir / "default_candidates.json" + proc2 = run_miner(source_dir, ["--out", str(default_out)]) + if proc2.returncode != 0: + print("FAIL: default miner failed", file=sys.stderr) + return 1 + default_result = json.loads(default_out.read_text(encoding="utf-8")) + if result["summary"]["term_count"] <= default_result["summary"]["term_count"]: + print("FAIL: bootstrap config should produce more terms than default", + file=sys.stderr) + return 1 + + lhs_set = {entry["lhs"] for entry in result["curated_candidates"]} + bootstrap_expected = {"(mul x 0:i64)", "(mul x 1:i64)"} + for expected_lhs in bootstrap_expected: + if expected_lhs not in lhs_set: + print(f"FAIL: expected bootstrap candidate '{expected_lhs}' not found", + file=sys.stderr) + return 1 + + print("PASS: test_mine_dmir_bootstrap_config") + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/tools/test_mine_dmir_novel_rules.py b/tools/test_mine_dmir_novel_rules.py new file mode 100644 index 000000000..eeda59231 --- /dev/null +++ b/tools/test_mine_dmir_novel_rules.py @@ -0,0 +1,105 @@ +#!/usr/bin/env python3 +# Copyright (C) 2025 the DTVM authors. All Rights Reserved. +# SPDX-License-Identifier: Apache-2.0 + +import json +import pathlib +import subprocess +import sys +import tempfile + + +def run_miner(source_dir, extra_args=()): + script = pathlib.Path(source_dir) / "tools" / "mine_dmir_seed_rules.py" + cmd = [sys.executable, str(script)] + list(extra_args) + return subprocess.run(cmd, capture_output=True, text=True) + + +def main(): + if len(sys.argv) != 2: + print(f"Usage: {sys.argv[0]} ", file=sys.stderr) + return 1 + + source_dir = pathlib.Path(sys.argv[1]) + rules_path = source_dir / "src/compiler/mir/dmir_rewrite_rules.json" + bootstrap_config = source_dir / "src/compiler/mir/dmir_rewrite_mining_bootstrap.json" + + if not rules_path.exists(): + print(f"Rules file not found: {rules_path}", file=sys.stderr) + return 1 + + with tempfile.TemporaryDirectory() as tmpdir: + tmpdir = pathlib.Path(tmpdir) + out_path = tmpdir / "novel_candidates.json" + + proc = run_miner(source_dir, ["--rules", str(rules_path), "--out", str(out_path)]) + if proc.returncode != 0: + print("FAIL: miner exited non-zero", file=sys.stderr) + print(proc.stderr, file=sys.stderr) + return 1 + try: + result = json.loads(out_path.read_text(encoding="utf-8")) + except json.JSONDecodeError as exc: + print(f"FAIL: output is not valid JSON: {exc}", file=sys.stderr) + return 1 + + summary = result["summary"] + + if summary["covered_candidate_count"] == 0: + print("FAIL: expected some candidates covered by the real rules file", + file=sys.stderr) + return 1 + + if (summary["covered_candidate_count"] + summary["novel_candidate_count"] + != summary["curated_candidate_count"]): + print("FAIL: covered + novel != curated", file=sys.stderr) + return 1 + + if summary["novel_candidate_count"] >= summary["curated_candidate_count"]: + print("FAIL: novel_candidate_count should be < curated_candidate_count", + file=sys.stderr) + return 1 + + covered_lhs_set = {entry["lhs"] for entry in result["covered_candidates"]} + if "(add x 0:i64)" not in covered_lhs_set: + print("FAIL: '(add x 0:i64)' should appear in covered_candidates", file=sys.stderr) + return 1 + + for entry in result["novel_candidates"]: + if entry.get("covered_by_rule_repo") is not False: + print(f"FAIL: novel candidate '{entry.get('lhs')}' has wrong " + "covered_by_rule_repo", file=sys.stderr) + return 1 + + for entry in result["covered_candidates"]: + if entry.get("covered_by_rule_repo") is not True: + print(f"FAIL: covered candidate '{entry.get('lhs')}' has wrong " + "covered_by_rule_repo", file=sys.stderr) + return 1 + + if bootstrap_config.exists(): + out_path2 = tmpdir / "novel_bootstrap.json" + proc2 = run_miner(source_dir, [ + "--rules", str(rules_path), + "--config", str(bootstrap_config), + "--out", str(out_path2), + ]) + if proc2.returncode != 0: + print("FAIL: miner failed with --rules + --config", file=sys.stderr) + print(proc2.stderr, file=sys.stderr) + return 1 + result2 = json.loads(out_path2.read_text(encoding="utf-8")) + if result2["summary"]["config_supplied"] is not True: + print("FAIL: config_supplied should be true with --config", file=sys.stderr) + return 1 + if result2["summary"]["covered_candidate_count"] == 0: + print("FAIL: expected some covered candidates with bootstrap + rules", + file=sys.stderr) + return 1 + + print("PASS: test_mine_dmir_novel_rules") + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/tools/test_mine_dmir_seed_rules.py b/tools/test_mine_dmir_seed_rules.py new file mode 100644 index 000000000..4f7c71acf --- /dev/null +++ b/tools/test_mine_dmir_seed_rules.py @@ -0,0 +1,92 @@ +#!/usr/bin/env python3 +# Copyright (C) 2025 the DTVM authors. All Rights Reserved. +# SPDX-License-Identifier: Apache-2.0 + +import json +import pathlib +import subprocess +import sys +import tempfile + + +def run_miner(source_dir, extra_args=()): + script = pathlib.Path(source_dir) / "tools" / "mine_dmir_seed_rules.py" + cmd = [sys.executable, str(script)] + list(extra_args) + return subprocess.run(cmd, capture_output=True, text=True) + + +def main(): + if len(sys.argv) != 2: + print(f"Usage: {sys.argv[0]} ", file=sys.stderr) + return 1 + + source_dir = pathlib.Path(sys.argv[1]) + + with tempfile.TemporaryDirectory() as tmpdir: + tmpdir = pathlib.Path(tmpdir) + out_path = tmpdir / "seed_candidates.json" + + proc = run_miner(source_dir, ["--out", str(out_path)]) + if proc.returncode != 0: + print("FAIL: miner exited non-zero", file=sys.stderr) + print(proc.stderr, file=sys.stderr) + return 1 + if not out_path.exists(): + print("FAIL: output file not created", file=sys.stderr) + return 1 + try: + result = json.loads(out_path.read_text(encoding="utf-8")) + except json.JSONDecodeError as exc: + print(f"FAIL: output is not valid JSON: {exc}", file=sys.stderr) + return 1 + + for key in ("summary", "candidates", "curated_candidates", + "covered_candidates", "novel_candidates"): + if key not in result: + print(f"FAIL: output missing top-level key '{key}'", file=sys.stderr) + return 1 + + summary = result["summary"] + for key in ("term_count", "sample_count", "candidate_count", + "curated_candidate_count", "covered_candidate_count", + "novel_candidate_count", "config_supplied"): + if key not in summary: + print(f"FAIL: summary missing key '{key}'", file=sys.stderr) + return 1 + + if summary["term_count"] <= 0: + print("FAIL: term_count should be > 0", file=sys.stderr) + return 1 + if summary["sample_count"] <= 0: + print("FAIL: sample_count should be > 0", file=sys.stderr) + return 1 + + if summary["covered_candidate_count"] != 0: + print("FAIL: covered_candidate_count should be 0 without --rules", file=sys.stderr) + return 1 + if summary["config_supplied"] is not False: + print("FAIL: config_supplied should be false without --config", file=sys.stderr) + return 1 + + for entry in result["curated_candidates"]: + for field in ("lhs", "rhs", "cost"): + if field not in entry: + print(f"FAIL: candidate entry missing field '{field}'", file=sys.stderr) + return 1 + + lhs_set = {entry["lhs"] for entry in result["curated_candidates"]} + if "(add x 0:i64)" not in lhs_set: + print("FAIL: expected '(add x 0:i64)' in curated candidates", file=sys.stderr) + return 1 + + if summary["novel_candidate_count"] != summary["curated_candidate_count"]: + print("FAIL: without --rules, novel count should equal curated count", + file=sys.stderr) + return 1 + + print("PASS: test_mine_dmir_seed_rules") + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/tools/test_report_dmir_rewrite_rules.py b/tools/test_report_dmir_rewrite_rules.py new file mode 100644 index 000000000..e6c4b4ca4 --- /dev/null +++ b/tools/test_report_dmir_rewrite_rules.py @@ -0,0 +1,114 @@ +#!/usr/bin/env python3 +# Copyright (C) 2025 the DTVM authors. All Rights Reserved. +# SPDX-License-Identifier: Apache-2.0 + +import json +import pathlib +import subprocess +import sys +import tempfile + + +def run_reporter(source_dir, rules_path, gtest_binary=None, out_path=None): + script = pathlib.Path(source_dir) / "tools" / "report_dmir_rewrite_rules.py" + cmd = [sys.executable, str(script), "--rules", str(rules_path)] + if gtest_binary: + cmd += ["--gtest-binary", str(gtest_binary)] + if out_path: + cmd += ["--out", str(out_path)] + return subprocess.run(cmd, capture_output=True, text=True) + + +def main(): + if len(sys.argv) not in (2, 3): + print(f"Usage: {sys.argv[0]} []", + file=sys.stderr) + return 1 + + source_dir = pathlib.Path(sys.argv[1]) + gtest_binary = pathlib.Path(sys.argv[2]) if len(sys.argv) == 3 else None + rules_path = source_dir / "src/compiler/mir/dmir_rewrite_rules.json" + + if not rules_path.exists(): + print(f"Rules file not found: {rules_path}", file=sys.stderr) + return 1 + + with tempfile.TemporaryDirectory() as tmpdir: + tmpdir = pathlib.Path(tmpdir) + + out_path = tmpdir / "report.json" + proc = run_reporter(source_dir, rules_path, gtest_binary, out_path) + if proc.returncode != 0: + print("FAIL: reporter exited non-zero", file=sys.stderr) + print(proc.stderr, file=sys.stderr) + return 1 + if not out_path.exists(): + print("FAIL: output file not created", file=sys.stderr) + return 1 + try: + report = json.loads(out_path.read_text(encoding="utf-8")) + except json.JSONDecodeError as exc: + print(f"FAIL: output is not valid JSON: {exc}", file=sys.stderr) + return 1 + + for key in ("summary", "rules"): + if key not in report: + print(f"FAIL: report missing top-level key '{key}'", file=sys.stderr) + return 1 + + summary = report["summary"] + for key in ("rule_count", "status_counts", "mode_counts", "rules_with_missing_coverage"): + if key not in summary: + print(f"FAIL: summary missing key '{key}'", file=sys.stderr) + return 1 + + if summary["rule_count"] <= 0: + print("FAIL: summary.rule_count must be > 0", file=sys.stderr) + return 1 + + cost_fields = ("dmir_inst", "select_depth", "adc_chain", "runtime_calls") + for entry in report["rules"]: + for field in ("name", "status", "inputs", "modes", "cost_delta", + "coverage", "coverage_complete"): + if field not in entry: + print(f"FAIL: rule entry missing field '{field}'", file=sys.stderr) + return 1 + for cost_field in cost_fields: + if cost_field not in entry["cost_delta"]: + print(f"FAIL: cost_delta missing field '{cost_field}'", file=sys.stderr) + return 1 + + if gtest_binary and summary["rules_with_missing_coverage"] != 0: + print("FAIL: real dmir rules have missing coverage according to gtest binary", + file=sys.stderr) + return 1 + + out_path2 = tmpdir / "report_no_binary.json" + proc2 = run_reporter(source_dir, rules_path, None, out_path2) + if proc2.returncode != 0: + print("FAIL: reporter failed without gtest binary", file=sys.stderr) + return 1 + report2 = json.loads(out_path2.read_text(encoding="utf-8")) + for entry in report2["rules"]: + for cov in entry.get("coverage", []): + if not cov.get("present", True): + print(f"FAIL: coverage entry marked absent without binary: {cov}", + file=sys.stderr) + return 1 + + proc3 = run_reporter(source_dir, rules_path, None, None) + if proc3.returncode != 0: + print("FAIL: reporter failed when writing to stdout", file=sys.stderr) + return 1 + try: + json.loads(proc3.stdout) + except json.JSONDecodeError as exc: + print(f"FAIL: stdout is not valid JSON: {exc}", file=sys.stderr) + return 1 + + print("PASS: test_report_dmir_rewrite_rules") + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/tools/test_report_x86_cg_peephole_validation.py b/tools/test_report_x86_cg_peephole_validation.py new file mode 100644 index 000000000..7994fb77a --- /dev/null +++ b/tools/test_report_x86_cg_peephole_validation.py @@ -0,0 +1,109 @@ +#!/usr/bin/env python3 +# Copyright (C) 2025 the DTVM authors. All Rights Reserved. +# SPDX-License-Identifier: Apache-2.0 + +import json +import pathlib +import subprocess +import sys +import tempfile + + +def run_reporter(source_dir, rules_path, gtest_binary=None, out_path=None): + script = pathlib.Path(source_dir) / "tools" / "report_x86_cg_peephole_validation.py" + cmd = [sys.executable, str(script), "--rules", str(rules_path)] + if gtest_binary: + cmd += ["--gtest-binary", str(gtest_binary)] + if out_path: + cmd += ["--out", str(out_path)] + return subprocess.run(cmd, capture_output=True, text=True) + + +def main(): + if len(sys.argv) not in (2, 3): + print(f"Usage: {sys.argv[0]} []", + file=sys.stderr) + return 1 + + source_dir = pathlib.Path(sys.argv[1]) + gtest_binary = pathlib.Path(sys.argv[2]) if len(sys.argv) == 3 else None + rules_path = source_dir / "src/compiler/target/x86/x86_cg_peephole_rules.json" + + if not rules_path.exists(): + print(f"Rules file not found: {rules_path}", file=sys.stderr) + return 1 + + with tempfile.TemporaryDirectory() as tmpdir: + tmpdir = pathlib.Path(tmpdir) + + out_path = tmpdir / "report.json" + proc = run_reporter(source_dir, rules_path, gtest_binary, out_path) + if proc.returncode != 0: + print("FAIL: reporter exited non-zero", file=sys.stderr) + print(proc.stderr, file=sys.stderr) + return 1 + if not out_path.exists(): + print("FAIL: output file not created", file=sys.stderr) + return 1 + try: + report = json.loads(out_path.read_text(encoding="utf-8")) + except json.JSONDecodeError as exc: + print(f"FAIL: output is not valid JSON: {exc}", file=sys.stderr) + return 1 + + for key in ("summary", "rules"): + if key not in report: + print(f"FAIL: report missing top-level key '{key}'", file=sys.stderr) + return 1 + + summary = report["summary"] + for key in ("rule_count", "stage_counts", "mode_counts", "rules_with_missing_coverage"): + if key not in summary: + print(f"FAIL: summary missing key '{key}'", file=sys.stderr) + return 1 + + if summary["rule_count"] <= 0: + print("FAIL: summary.rule_count must be > 0", file=sys.stderr) + return 1 + + for entry in report["rules"]: + for field in ("name", "stage", "priority", "modes", "coverage", "coverage_complete"): + if field not in entry: + print(f"FAIL: rule entry missing field '{field}'", file=sys.stderr) + return 1 + + out_path2 = tmpdir / "report_no_binary.json" + proc2 = run_reporter(source_dir, rules_path, None, out_path2) + if proc2.returncode != 0: + print("FAIL: reporter failed without gtest binary", file=sys.stderr) + return 1 + report2 = json.loads(out_path2.read_text(encoding="utf-8")) + for entry in report2["rules"]: + for cov in entry.get("coverage", []): + if not cov.get("present", True): + print(f"FAIL: coverage entry marked absent without binary: {cov}", + file=sys.stderr) + return 1 + + if gtest_binary: + if report["summary"]["rules_with_missing_coverage"] != 0: + print("FAIL: real rules have missing coverage according to gtest binary", + file=sys.stderr) + return 1 + + proc3 = run_reporter(source_dir, rules_path, None, None) + if proc3.returncode != 0: + print("FAIL: reporter failed when writing to stdout", file=sys.stderr) + return 1 + try: + json.loads(proc3.stdout) + except json.JSONDecodeError as exc: + print(f"FAIL: stdout is not valid JSON: {exc}", file=sys.stderr) + return 1 + + print("PASS: test_report_x86_cg_peephole_validation") + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/tools/test_update_compiler_pass_timing_budget.py b/tools/test_update_compiler_pass_timing_budget.py new file mode 100644 index 000000000..2f6856adb --- /dev/null +++ b/tools/test_update_compiler_pass_timing_budget.py @@ -0,0 +1,253 @@ +#!/usr/bin/env python3 +# Copyright (C) 2025 the DTVM authors. All Rights Reserved. +# SPDX-License-Identifier: Apache-2.0 +"""Test wrapper for update_compiler_pass_timing_budget.py. + +Called by CMakeLists.txt as: + test_update_compiler_pass_timing_budget.py + +Runs the updater with a synthetic timing report and verifies that the output +budget JSON has the required structure. No dtvm binary is needed. +""" + +import json +import pathlib +import subprocess +import sys +import tempfile + +CASE_NAMES = [ + "add", + "mul", + "div", + "shl", + "shr", + "sar", + "byte", + "eq_true", + "lt_true", + "jump", + "u256_shl_add_mul", + "u256_mul_add_chain", + "u256_shr_add_shl", + "bool_and_or_xor_not", + "bool_xor_not_chain", +] + +PASS_NAME = "x86_cg_peephole" +TOTAL_TIME_MS = 1.0 +PASS_TIME_MS = 0.002 +PASS_SHARE_PCT = 0.2 + +def make_phase_stats(time_ms, share_pct): + return { + "mean": time_ms, + "median": time_ms, + "p95": time_ms, + "min": time_ms, + "max": time_ms, + "share_of_total_pct": { + "mean": share_pct, + "median": share_pct, + "p95": share_pct, + "min": share_pct, + "max": share_pct, + }, + } + +def make_case_summary(total_time_ms, pass_name, pass_time_ms, pass_share_pct): + return { + "total_time_ms": {"mean": total_time_ms, "median": total_time_ms}, + "phases": { + pass_name: make_phase_stats(pass_time_ms, pass_share_pct), + }, + "runs": 1, + "record_count": 1, + } + +def build_synthetic_report(manifest_path): + cases = [] + for name in CASE_NAMES: + cases.append( + { + "name": name, + "input": f"/synthetic/{name}.evm.hex", + "summary": make_case_summary( + TOTAL_TIME_MS, PASS_NAME, PASS_TIME_MS, PASS_SHARE_PCT + ), + } + ) + + overall_summary = make_case_summary( + TOTAL_TIME_MS, PASS_NAME, PASS_TIME_MS, PASS_SHARE_PCT + ) + overall_summary["runs"] = 1 + overall_summary["record_count"] = len(CASE_NAMES) + + return { + "manifest": str(manifest_path), + "case_count": len(CASE_NAMES), + "cases": cases, + "overall": overall_summary, + } + +def main(): + if len(sys.argv) != 2: + print( + "usage: test_update_compiler_pass_timing_budget.py ", + file=sys.stderr, + ) + return 1 + + source_dir = pathlib.Path(sys.argv[1]).resolve() + updater = source_dir / "tools" / "update_compiler_pass_timing_budget.py" + + if not updater.exists(): + print(f"updater not found: {updater}", file=sys.stderr) + return 1 + + with tempfile.TemporaryDirectory() as tmp_dir: + tmp = pathlib.Path(tmp_dir) + + manifest_path = ( + source_dir / "tests" / "evm_asm" / "compiler_pass_timing_manifest.json" + ) + report = build_synthetic_report(manifest_path) + + report_path = tmp / "timing_report.json" + report_path.write_text(json.dumps(report, indent=2), encoding="utf-8") + + output_path = tmp / "budget_out.json" + + cmd = [ + sys.executable, + str(updater), + "--report", + str(report_path), + "--out", + str(output_path), + "--target-pass", + PASS_NAME, + "--runs", + "1", + "--compile-mode", + "compile-only", + "--threshold-status", + "provisional", + ] + + result = subprocess.run( + cmd, + capture_output=True, + text=True, + check=False, + ) + + if result.returncode != 0: + print(result.stderr, file=sys.stderr) + print( + f"FAIL: test_update_compiler_pass_timing_budget — updater exited with " + f"code {result.returncode}", + file=sys.stderr, + ) + return 1 + + if not output_path.exists(): + print( + "FAIL: test_update_compiler_pass_timing_budget — output JSON was not written", + file=sys.stderr, + ) + return 1 + + try: + budget = json.loads(output_path.read_text(encoding="utf-8")) + except json.JSONDecodeError as exc: + print( + f"FAIL: test_update_compiler_pass_timing_budget — invalid JSON: {exc}", + file=sys.stderr, + ) + return 1 + + # Verify required top-level keys + for field in ("version", "target_pass", "thresholds", "baseline", "metadata"): + if field not in budget: + print( + f"FAIL: test_update_compiler_pass_timing_budget — missing field " + f"'{field}'", + file=sys.stderr, + ) + return 1 + + # Verify target_pass recorded correctly + if budget["target_pass"] != PASS_NAME: + print( + f"FAIL: test_update_compiler_pass_timing_budget — target_pass mismatch: " + f"expected '{PASS_NAME}', got '{budget['target_pass']}'", + file=sys.stderr, + ) + return 1 + + # Verify baseline structure + baseline = budget["baseline"] + for field in ("overall_total_time_ms_median", "case_total_time_ms_median"): + if field not in baseline: + print( + f"FAIL: test_update_compiler_pass_timing_budget — baseline missing " + f"field '{field}'", + file=sys.stderr, + ) + return 1 + + # Verify all cases are present in the baseline + case_baselines = baseline["case_total_time_ms_median"] + for name in CASE_NAMES: + if name not in case_baselines: + print( + f"FAIL: test_update_compiler_pass_timing_budget — baseline missing " + f"case '{name}'", + file=sys.stderr, + ) + return 1 + + # Verify overall baseline value matches the synthetic report + expected_overall = TOTAL_TIME_MS # synthetic median + if abs(baseline["overall_total_time_ms_median"] - expected_overall) > 1e-9: + print( + f"FAIL: test_update_compiler_pass_timing_budget — overall baseline " + f"{baseline['overall_total_time_ms_median']} != expected {expected_overall}", + file=sys.stderr, + ) + return 1 + + # Verify thresholds keys are present + thresholds = budget["thresholds"] + for key in ( + "max_pass_share_p95_pct", + "max_pass_time_p95_ms", + "max_overall_total_time_regression_pct", + "max_case_total_time_regression_pct", + ): + if key not in thresholds: + print( + f"FAIL: test_update_compiler_pass_timing_budget — thresholds " + f"missing key '{key}'", + file=sys.stderr, + ) + return 1 + + # Verify metadata + metadata = budget["metadata"] + for key in ("compile_mode", "thresholds_status", "runs"): + if key not in metadata: + print( + f"FAIL: test_update_compiler_pass_timing_budget — metadata " + f"missing key '{key}'", + file=sys.stderr, + ) + return 1 + + print("PASS: test_update_compiler_pass_timing_budget") + return 0 + +if __name__ == "__main__": + sys.exit(main()) diff --git a/tools/test_x86_cg_peephole_generator.py b/tools/test_x86_cg_peephole_generator.py new file mode 100644 index 000000000..57d287105 --- /dev/null +++ b/tools/test_x86_cg_peephole_generator.py @@ -0,0 +1,108 @@ +#!/usr/bin/env python3 +# Copyright (C) 2025 the DTVM authors. All Rights Reserved. +# SPDX-License-Identifier: Apache-2.0 + +import json +import pathlib +import subprocess +import sys +import tempfile + + +def run_generator(rules_path, out_inc, out_report, source_dir): + script = pathlib.Path(source_dir) / "tools" / "generate_x86_cg_peephole.py" + proc = subprocess.run( + [sys.executable, str(script), + "--rules", str(rules_path), + "--out-inc", str(out_inc), + "--out-report", str(out_report)], + capture_output=True, + text=True, + ) + return proc + + +def main(): + if len(sys.argv) != 2: + print(f"Usage: {sys.argv[0]} ", file=sys.stderr) + return 1 + + source_dir = pathlib.Path(sys.argv[1]) + rules_path = source_dir / "src/compiler/target/x86/x86_cg_peephole_rules.json" + + if not rules_path.exists(): + print(f"Rules file not found: {rules_path}", file=sys.stderr) + return 1 + + with tempfile.TemporaryDirectory() as tmpdir: + tmpdir = pathlib.Path(tmpdir) + + out_inc = tmpdir / "generated.inc" + out_report = tmpdir / "report.txt" + proc = run_generator(rules_path, out_inc, out_report, source_dir) + if proc.returncode != 0: + print(f"FAIL: generator exited {proc.returncode} on valid rules", file=sys.stderr) + print(proc.stderr, file=sys.stderr) + return 1 + if not out_inc.exists() or out_inc.stat().st_size == 0: + print("FAIL: generated .inc file is missing or empty", file=sys.stderr) + return 1 + inc_text = out_inc.read_text(encoding="utf-8") + for marker in [ + "// Copyright (C) 2025 the DTVM authors", + "GeneratedInstructionRuleResult", + "tryGeneratedInstructionRules", + "tryGeneratedBlockEndRules", + "namespace {", + ]: + if marker not in inc_text: + print(f"FAIL: generated .inc missing expected marker: {marker!r}", file=sys.stderr) + return 1 + if not out_report.exists(): + print("FAIL: report file was not created", file=sys.stderr) + return 1 + report_text = out_report.read_text(encoding="utf-8") + if "No conflicts detected." not in report_text: + print("FAIL: report does not say 'No conflicts detected.'", file=sys.stderr) + print(report_text, file=sys.stderr) + return 1 + + conflict_rules = { + "version": 1, + "rules": [ + { + "name": "rule-a", + "stage": "instruction", + "priority": 100, + "pattern": [{"bind": "I", "opcode": "MOV64rr"}], + "action": {"erase": ["I"]}, + }, + { + "name": "rule-b", + "stage": "instruction", + "priority": 100, + "pattern": [{"bind": "I", "opcode": "MOV64rr"}], + "action": {"erase": ["I"]}, + }, + ], + } + conflict_rules_path = tmpdir / "conflict_rules.json" + conflict_rules_path.write_text(json.dumps(conflict_rules), encoding="utf-8") + out_inc2 = tmpdir / "generated2.inc" + out_report2 = tmpdir / "report2.txt" + proc2 = run_generator(conflict_rules_path, out_inc2, out_report2, source_dir) + if proc2.returncode == 0: + print("FAIL: generator should exit 1 for conflicting rules", file=sys.stderr) + return 1 + if out_report2.exists(): + report2_text = out_report2.read_text(encoding="utf-8") + if "Conflicts:" not in report2_text: + print("FAIL: conflict report does not mention 'Conflicts:'", file=sys.stderr) + return 1 + + print("PASS: test_x86_cg_peephole_generator") + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/tools/test_x86_cg_peephole_validation.py b/tools/test_x86_cg_peephole_validation.py new file mode 100644 index 000000000..204f23c3a --- /dev/null +++ b/tools/test_x86_cg_peephole_validation.py @@ -0,0 +1,108 @@ +#!/usr/bin/env python3 +# Copyright (C) 2025 the DTVM authors. All Rights Reserved. +# SPDX-License-Identifier: Apache-2.0 + +import json +import pathlib +import subprocess +import sys +import tempfile + + +def run_checker(source_dir, rules_path, gtest_binary=None): + script = pathlib.Path(source_dir) / "tools" / "check_x86_cg_peephole_validation.py" + cmd = [sys.executable, str(script), "--rules", str(rules_path)] + if gtest_binary: + cmd += ["--gtest-binary", str(gtest_binary)] + return subprocess.run(cmd, capture_output=True, text=True) + + +def main(): + if len(sys.argv) not in (2, 3): + print(f"Usage: {sys.argv[0]} []", + file=sys.stderr) + return 1 + + source_dir = pathlib.Path(sys.argv[1]) + gtest_binary = pathlib.Path(sys.argv[2]) if len(sys.argv) == 3 else None + rules_path = source_dir / "src/compiler/target/x86/x86_cg_peephole_rules.json" + + if not rules_path.exists(): + print(f"Rules file not found: {rules_path}", file=sys.stderr) + return 1 + + proc = run_checker(source_dir, rules_path, gtest_binary) + if proc.returncode != 0: + print("FAIL: checker failed on real rules file", file=sys.stderr) + print(proc.stderr, file=sys.stderr) + return 1 + if "x86 cg peephole validation metadata is complete" not in proc.stdout: + print("FAIL: expected success message not in stdout", file=sys.stderr) + print(proc.stdout, file=sys.stderr) + return 1 + + if gtest_binary: + proc2 = run_checker(source_dir, rules_path, None) + if proc2.returncode != 0: + print("FAIL: checker failed without gtest binary", file=sys.stderr) + print(proc2.stderr, file=sys.stderr) + return 1 + + with tempfile.TemporaryDirectory() as tmpdir: + tmpdir = pathlib.Path(tmpdir) + + bad_rules = { + "rules": [ + { + "name": "no-validation-rule", + "stage": "instruction", + "priority": 100, + "pattern": [{"bind": "I", "opcode": "MOV64rr"}], + "action": {"erase": ["I"]}, + } + ] + } + bad_path = tmpdir / "bad_rules.json" + bad_path.write_text(json.dumps(bad_rules), encoding="utf-8") + proc3 = run_checker(source_dir, bad_path, None) + if proc3.returncode == 0: + print("FAIL: checker should fail on rule missing validation", file=sys.stderr) + return 1 + if "missing validation metadata" not in proc3.stderr: + print("FAIL: expected error about missing validation metadata", file=sys.stderr) + print(proc3.stderr, file=sys.stderr) + return 1 + + structural_only = { + "rules": [ + { + "name": "structural-only-rule", + "stage": "instruction", + "priority": 100, + "pattern": [{"bind": "I", "opcode": "MOV64rr"}], + "action": {"erase": ["I"]}, + "validation": { + "modes": ["structural"], + "coverage": ["SomeSuite.SomeTest"], + }, + } + ] + } + structural_path = tmpdir / "structural_only.json" + structural_path.write_text(json.dumps(structural_only), encoding="utf-8") + proc4 = run_checker(source_dir, structural_path, None) + if proc4.returncode == 0: + print("FAIL: checker should fail on instruction rule with only structural mode", + file=sys.stderr) + return 1 + if "execution or semantics_model" not in proc4.stderr: + print("FAIL: expected error about execution or semantics_model", file=sys.stderr) + print(proc4.stderr, file=sys.stderr) + return 1 + + print("PASS: test_x86_cg_peephole_validation") + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/tools/update_compiler_pass_timing_budget.py b/tools/update_compiler_pass_timing_budget.py new file mode 100644 index 000000000..038248975 --- /dev/null +++ b/tools/update_compiler_pass_timing_budget.py @@ -0,0 +1,127 @@ +#!/usr/bin/env python3 + +import argparse +import json +import pathlib + + +DEFAULT_THRESHOLDS = { + "max_pass_share_p95_pct": 2.0, + "max_pass_time_p95_ms": 0.05, + "max_overall_total_time_regression_pct": 15.0, + "max_case_total_time_regression_pct": 20.0, +} + + +def parse_args(): + parser = argparse.ArgumentParser( + description="Refresh compiler pass timing budget baselines from a timing report." + ) + parser.add_argument("--report", required=True, help="Timing report JSON path") + parser.add_argument("--out", required=True, help="Budget JSON output path") + parser.add_argument( + "--budget-in", + help="Existing budget JSON to preserve thresholds and metadata fields", + ) + parser.add_argument( + "--rules", + help="Optional rule JSON path used to refresh the recorded rule count", + ) + parser.add_argument( + "--target-pass", + default="x86_cg_peephole", + help="Pass name recorded in the budget file", + ) + parser.add_argument("--manifest", help="Manifest path to record in metadata") + parser.add_argument("--runs", type=int, help="Run count to record in metadata") + parser.add_argument( + "--num-extra-compilations", + type=int, + help="Extra compilation count used during collection", + ) + parser.add_argument( + "--compile-mode", + default="compile-only", + help="Compile mode label recorded in metadata", + ) + parser.add_argument( + "--threshold-status", + default="provisional", + help="Threshold status label recorded in metadata", + ) + return parser.parse_args() + + +def load_json(path): + with pathlib.Path(path).open("r", encoding="utf-8") as f: + return json.load(f) + + +def count_rules(path): + return len(load_json(path).get("rules", [])) + + +def normalize_thresholds(thresholds): + if not thresholds: + return dict(DEFAULT_THRESHOLDS) + + normalized = dict(thresholds) + if "max_pass_share_p95_pct" not in normalized: + normalized["max_pass_share_p95_pct"] = normalized.pop( + "max_pass_share_of_total_pct", DEFAULT_THRESHOLDS["max_pass_share_p95_pct"] + ) + if "max_pass_time_p95_ms" not in normalized: + normalized["max_pass_time_p95_ms"] = normalized.pop( + "max_pass_time_ms", DEFAULT_THRESHOLDS["max_pass_time_p95_ms"] + ) + return normalized + + +def main(): + args = parse_args() + report = load_json(args.report) + prior_budget = load_json(args.budget_in) if args.budget_in else {} + + thresholds = normalize_thresholds(prior_budget.get("thresholds")) + case_baselines = {} + for case in report.get("cases", []): + case_baselines[case["name"]] = case["summary"]["total_time_ms"]["median"] + + metadata = dict(prior_budget.get("metadata", {})) + if args.manifest: + metadata["manifest"] = args.manifest + elif "manifest" in report: + metadata["manifest"] = report["manifest"] + if args.runs is not None: + metadata["runs"] = args.runs + elif "runs" in metadata: + metadata["runs"] = metadata["runs"] + if args.num_extra_compilations is not None: + metadata["num_extra_compilations"] = args.num_extra_compilations + if args.rules: + metadata["rule_count"] = count_rules(args.rules) + metadata["compile_mode"] = args.compile_mode + metadata["thresholds_status"] = args.threshold_status + + budget = { + "version": 1, + "target_pass": args.target_pass, + "thresholds": thresholds, + "baseline": { + "overall_total_time_ms_median": report["overall"]["total_time_ms"][ + "median" + ], + "case_total_time_ms_median": case_baselines, + }, + "metadata": metadata, + } + + pathlib.Path(args.out).write_text( + json.dumps(budget, indent=2) + "\n", encoding="utf-8" + ) + print(json.dumps(budget, indent=2)) + return 0 + + +if __name__ == "__main__": + raise SystemExit(main())