From f9e92e81f9cb9a0acac8c0eca614f1768f1e1907 Mon Sep 17 00:00:00 2001 From: Abmcar Date: Sat, 16 May 2026 18:47:57 +0800 Subject: [PATCH 1/2] ci: speed up evmone perf regression checks --- .ci/run_test_suite.sh | 40 +++- .github/workflows/dtvm_evm_test_x86.yml | 33 ++- tools/check_performance_regression.py | 296 +++++++++++++++++++++--- 3 files changed, 327 insertions(+), 42 deletions(-) diff --git a/.ci/run_test_suite.sh b/.ci/run_test_suite.sh index dd253b0c4..bb03e1c70 100644 --- a/.ci/run_test_suite.sh +++ b/.ci/run_test_suite.sh @@ -305,16 +305,34 @@ for STACK_TYPE in ${STACK_TYPES[@]}; do ;; "benchmarksuite") # Clone evmone and run performance regression check - EVMONE_DIR="evmone" - if [ ! -d "$EVMONE_DIR" ]; then - git clone --depth 1 --recurse-submodules -b for_test https://github.com/DTVMStack/evmone.git $EVMONE_DIR + EVMONE_DIR=${EVMONE_DIR:-evmone} + EVMONE_REPO=${EVMONE_REPO:-https://github.com/DTVMStack/evmone.git} + EVMONE_REF=${EVMONE_REF:-for_test} + EVMONE_COMMIT=${EVMONE_COMMIT:-} + + if [ -z "$EVMONE_COMMIT" ]; then + EVMONE_COMMIT=$(git ls-remote "$EVMONE_REPO" "refs/heads/$EVMONE_REF" | awk '{print $1}') + fi + if [ -z "$EVMONE_COMMIT" ]; then + echo "Unable to resolve $EVMONE_REPO refs/heads/$EVMONE_REF" + exit 1 fi + if [ ! -d "$EVMONE_DIR/.git" ]; then + rm -rf "$EVMONE_DIR" + git clone --depth 1 --recurse-submodules -b "$EVMONE_REF" "$EVMONE_REPO" "$EVMONE_DIR" + fi + git -C "$EVMONE_DIR" remote set-url origin "$EVMONE_REPO" + git -C "$EVMONE_DIR" fetch --depth 1 origin "$EVMONE_REF" + git -C "$EVMONE_DIR" checkout --detach "$EVMONE_COMMIT" + git -C "$EVMONE_DIR" submodule update --init --recursive + BENCHMARK_THRESHOLD=${BENCHMARK_THRESHOLD:-0.15} BENCHMARK_MODE=${BENCHMARK_MODE:-multipass} BENCHMARK_SUMMARY_FILE=${BENCHMARK_SUMMARY_FILE:-/tmp/perf_summary.md} BENCHMARK_REPETITIONS=${BENCHMARK_REPETITIONS:-3} BENCHMARK_MIN_TIME=${BENCHMARK_MIN_TIME:-""} + BENCHMARK_JOBS=${BENCHMARK_JOBS:-1} PERF_ARGS="" if [ -n "$BENCHMARK_REPETITIONS" ]; then @@ -323,6 +341,9 @@ for STACK_TYPE in ${STACK_TYPES[@]}; do if [ -n "$BENCHMARK_MIN_TIME" ]; then PERF_ARGS="$PERF_ARGS --benchmark-min-time $BENCHMARK_MIN_TIME" fi + if [ -n "$BENCHMARK_JOBS" ]; then + PERF_ARGS="$PERF_ARGS --benchmark-jobs $BENCHMARK_JOBS" + fi cp build/lib/* $EVMONE_DIR/ @@ -331,7 +352,18 @@ for STACK_TYPE in ${STACK_TYPES[@]}; do cp ../tools/check_performance_regression.py ./ if [ ! -f "build/bin/evmone-bench" ]; then - "$SCRIPT_DIR/cmake_ci_build.sh" build -- -DEVMONE_TESTING=ON -DCMAKE_BUILD_TYPE=Release + EVMONE_CMAKE_ARGS=(-DEVMONE_TESTING=ON -DCMAKE_BUILD_TYPE=Release) + if [ "${DTVM_CI_USE_NINJA:-0}" = "1" ]; then + EVMONE_CMAKE_ARGS=(-G Ninja "${EVMONE_CMAKE_ARGS[@]}") + fi + if [ "${DTVM_CI_USE_SCCACHE:-0}" = "1" ]; then + EVMONE_CMAKE_ARGS+=( + -DCMAKE_C_COMPILER_LAUNCHER=sccache + -DCMAKE_CXX_COMPILER_LAUNCHER=sccache + ) + fi + cmake -S . -B build "${EVMONE_CMAKE_ARGS[@]}" + cmake --build build --target evmone-bench --parallel "${DTVM_CI_BUILD_JOBS:-16}" fi print_sccache_stats "before benchmarks" diff --git a/.github/workflows/dtvm_evm_test_x86.yml b/.github/workflows/dtvm_evm_test_x86.yml index b2815fbd0..d85dddb7f 100644 --- a/.github/workflows/dtvm_evm_test_x86.yml +++ b/.github/workflows/dtvm_evm_test_x86.yml @@ -16,6 +16,10 @@ on: permissions: contents: read +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: ${{ github.event_name == 'pull_request' }} + # Shared FetchContent cache root for all container jobs. The hook in # CMakeLists.txt (commit 96707a2 lines 8-18) picks this up as the base # dir for FetchContent populations. The cache key includes the generator @@ -28,6 +32,9 @@ env: SCCACHE_IDLE_TIMEOUT: "0" DTVM_CI_USE_NINJA: "1" DTVM_CI_USE_SCCACHE: "1" + EVMONE_REPO: https://github.com/DTVMStack/evmone.git + EVMONE_REF: for_test + EVMONE_BENCH_CACHE_VERSION: v1 jobs: build_test_evm_interpreter_x86_ctest: @@ -500,6 +507,23 @@ jobs: git checkout ${{ github.sha }} git stash pop || true + - name: Resolve evmone benchmark commit + id: evmone + run: | + EVMONE_COMMIT=$(git ls-remote "$EVMONE_REPO" "refs/heads/$EVMONE_REF" | awk '{print $1}') + if [ -z "$EVMONE_COMMIT" ]; then + echo "Unable to resolve $EVMONE_REPO refs/heads/$EVMONE_REF" >&2 + exit 1 + fi + echo "commit=$EVMONE_COMMIT" >> "$GITHUB_OUTPUT" + echo "Resolved evmone $EVMONE_REF to $EVMONE_COMMIT" + + - name: Cache evmone-bench + uses: actions/cache@v4 + with: + path: evmone + key: ${{ runner.os }}-evmone-bench-${{ env.EVMONE_BENCH_CACHE_VERSION }}-${{ steps.evmone.outputs.commit }}-${{ hashFiles('.ci/run_test_suite.sh', '.github/workflows/dtvm_evm_test_x86.yml') }} + - name: Build current PR and check regression id: perf-check run: | @@ -509,7 +533,7 @@ jobs: export LLVM_DIR=$LLVM_SYS_150_PREFIX/lib/cmake/llvm export PATH=$LLVM_SYS_150_PREFIX/bin:$PATH - rm -rf build evmone + rm -rf build export CMAKE_BUILD_TARGET=Release export ENABLE_ASAN=false @@ -523,10 +547,17 @@ jobs: export BENCHMARK_BASELINE_LIB=/tmp/baseline_lib export BENCHMARK_SUMMARY_FILE=/tmp/perf_summary_${{ matrix.mode }}.md export BENCHMARK_REPETITIONS=5 + export BENCHMARK_JOBS=3 + export EVMONE_COMMIT=${{ steps.evmone.outputs.commit }} bash .ci/run_test_suite.sh continue-on-error: true + - name: Clean evmone cache inputs + if: always() + run: | + rm -f evmone/libdtvmapi.so evmone/libdtvmapi.so.* evmone/check_performance_regression.py + - name: Write Performance Summary if: always() run: | diff --git a/tools/check_performance_regression.py b/tools/check_performance_regression.py index 48d2d4e02..96a779a06 100755 --- a/tools/check_performance_regression.py +++ b/tools/check_performance_regression.py @@ -21,10 +21,12 @@ import argparse import json import os +import re import shutil import subprocess import sys import tempfile +from collections import Counter from dataclasses import dataclass from typing import Dict, List, Optional, Tuple @@ -37,33 +39,43 @@ class BenchmarkResult: iterations: int -def run_benchmark( - lib_path: str, - mode: str, - benchmark_dir: str, - extra_args: Optional[List[str]] = None, - repetitions: int = 3, - benchmark_min_time: Optional[str] = None, -) -> List[BenchmarkResult]: - """Run benchmark and parse JSON output. +def benchmark_env(lib_path: str, mode: str) -> Dict[str, str]: + return {**os.environ, "EVMONE_EXTERNAL_OPTIONS": f"{lib_path},mode={mode}"} - Uses --benchmark_out to write JSON results to a temporary file so that - the human-readable benchmark progress streams to stdout/stderr in real - time (important for CI visibility). - When *repetitions* > 1, each benchmark runs N times and only the - median aggregate is kept, significantly reducing noise from ASLR and - shared-runner contention. - """ - env = {"EVMONE_EXTERNAL_OPTIONS": f"{lib_path},mode={mode}"} +def available_cpus() -> List[int]: + if hasattr(os, "sched_getaffinity"): + return sorted(os.sched_getaffinity(0)) + cpu_count = os.cpu_count() or 1 + return list(range(cpu_count)) - fd, json_out_path = tempfile.mkstemp(suffix=".json") - os.close(fd) +def split_benchmark_args(extra_args: Optional[List[str]]) -> Tuple[str, List[str]]: + benchmark_filter = "external/total/*" + remaining_args: List[str] = [] + + for arg in extra_args or []: + if arg.startswith("--benchmark_filter="): + benchmark_filter = arg.split("=", 1)[1] + else: + remaining_args.append(arg) + + return benchmark_filter, remaining_args + + +def build_benchmark_cmd( + benchmark_dir: str, + json_out_path: str, + benchmark_filter: str, + repetitions: int, + benchmark_min_time: Optional[str], + extra_args: List[str], + cpu: Optional[int], +) -> List[str]: cmd: List[str] = [] - if shutil.which("taskset"): - cmd.extend(["taskset", "-c", "0"]) + if cpu is not None and shutil.which("taskset"): + cmd.extend(["taskset", "-c", str(cpu)]) cmd.extend([ "./build/bin/evmone-bench", @@ -80,39 +92,242 @@ def run_benchmark( if benchmark_min_time: cmd.append(f"--benchmark_min_time={benchmark_min_time}") - if extra_args: - cmd.extend(extra_args) + cmd.extend(extra_args) + cmd.append(f"--benchmark_filter={benchmark_filter}") + return cmd - if not any(arg.startswith("--benchmark_filter") for arg in cmd): - cmd.append("--benchmark_filter=external/total/*") - print(f"Running: {' '.join(cmd)}") - print(f"Environment: EVMONE_EXTERNAL_OPTIONS={env['EVMONE_EXTERNAL_OPTIONS']}") +def list_benchmark_names( + lib_path: str, + mode: str, + benchmark_dir: str, + benchmark_filter: str, + extra_args: List[str], +) -> List[str]: + cmd = [ + "./build/bin/evmone-bench", + benchmark_dir, + "--benchmark_list_tests", + f"--benchmark_filter={benchmark_filter}", + ] + cmd.extend(extra_args) + + print(f"Listing benchmarks: {' '.join(cmd)}") sys.stdout.flush() result = subprocess.run( cmd, - env={**subprocess.os.environ, **env}, + env=benchmark_env(lib_path, mode), + text=True, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, ) - if result.returncode != 0: - print(f"Benchmark execution failed with code {result.returncode}") - try: - os.unlink(json_out_path) - except OSError: - pass + print(result.stdout, end="") + print(f"Benchmark listing failed with code {result.returncode}") + sys.exit(2) + + names: List[str] = [] + for line in result.stdout.splitlines(): + name = line.strip() + if not name or any(ch.isspace() for ch in name): + continue + names.append(name) + + if not names: + print(f"::error::No benchmarks matched filter: {benchmark_filter}") sys.exit(2) + return names + + +def exact_filter(names: List[str]) -> str: + return "^(" + "|".join(re.escape(name) for name in names) + ")$" + + +def parse_benchmark_json_file(json_out_path: str) -> List[BenchmarkResult]: + with open(json_out_path, "r") as f: + json_data = f.read() + return parse_benchmark_json(json_data) + + +def run_benchmark_shard( + lib_path: str, + mode: str, + benchmark_dir: str, + benchmark_filter: str, + extra_args: List[str], + repetitions: int, + benchmark_min_time: Optional[str], + cpu: Optional[int], +) -> List[BenchmarkResult]: + fd, json_out_path = tempfile.mkstemp(suffix=".json") + os.close(fd) + try: - with open(json_out_path, "r") as f: - json_data = f.read() + cmd = build_benchmark_cmd( + benchmark_dir, + json_out_path, + benchmark_filter, + repetitions, + benchmark_min_time, + extra_args, + cpu, + ) + + print(f"Running: {' '.join(cmd)}") + print(f"Environment: EVMONE_EXTERNAL_OPTIONS={lib_path},mode={mode}") + sys.stdout.flush() + + result = subprocess.run(cmd, env=benchmark_env(lib_path, mode)) + + if result.returncode != 0: + print(f"Benchmark execution failed with code {result.returncode}") + sys.exit(2) + + return parse_benchmark_json_file(json_out_path) finally: try: os.unlink(json_out_path) except OSError: pass - return parse_benchmark_json(json_data) + +def run_benchmark_parallel( + lib_path: str, + mode: str, + benchmark_dir: str, + extra_args: Optional[List[str]], + repetitions: int, + benchmark_min_time: Optional[str], + benchmark_jobs: int, +) -> List[BenchmarkResult]: + benchmark_filter, remaining_args = split_benchmark_args(extra_args) + + if benchmark_jobs <= 1: + cpus = available_cpus() + cpu = cpus[0] if cpus else None + return run_benchmark_shard( + lib_path, + mode, + benchmark_dir, + benchmark_filter, + remaining_args, + repetitions, + benchmark_min_time, + cpu, + ) + + names = list_benchmark_names( + lib_path, mode, benchmark_dir, benchmark_filter, remaining_args + ) + cpus = available_cpus() + jobs = min(benchmark_jobs, len(names), len(cpus) if cpus else benchmark_jobs) + + if jobs < benchmark_jobs: + print(f"::notice::Reduced benchmark jobs from {benchmark_jobs} to {jobs}") + + shards = [names[i::jobs] for i in range(jobs)] + temp_paths: List[str] = [] + processes: List[subprocess.Popen] = [] + + try: + for index, shard_names in enumerate(shards): + fd, json_out_path = tempfile.mkstemp(suffix=".json") + os.close(fd) + temp_paths.append(json_out_path) + + cpu = cpus[index % len(cpus)] if cpus else None + cmd = build_benchmark_cmd( + benchmark_dir, + json_out_path, + exact_filter(shard_names), + repetitions, + benchmark_min_time, + remaining_args, + cpu, + ) + + cpu_msg = f" on CPU {cpu}" if cpu is not None else "" + print( + f"Starting benchmark shard {index + 1}/{jobs}" + f" ({len(shard_names)} benchmarks){cpu_msg}: {' '.join(cmd)}" + ) + sys.stdout.flush() + + processes.append( + subprocess.Popen(cmd, env=benchmark_env(lib_path, mode)) + ) + + failures = [] + for index, process in enumerate(processes): + return_code = process.wait() + if return_code != 0: + failures.append((index + 1, return_code)) + + if failures: + for shard, return_code in failures: + print( + f"Benchmark shard {shard} failed with code {return_code}" + ) + sys.exit(2) + + results: List[BenchmarkResult] = [] + for json_out_path in temp_paths: + results.extend(parse_benchmark_json_file(json_out_path)) + + name_counts = Counter(result.name for result in results) + duplicate_names = sorted( + name for name, count in name_counts.items() if count > 1 + ) + if duplicate_names: + print(f"::error::Duplicate benchmark results: {duplicate_names}") + sys.exit(2) + + return sorted(results, key=lambda result: result.name) + finally: + for process in processes: + if process.poll() is None: + process.terminate() + for json_out_path in temp_paths: + try: + os.unlink(json_out_path) + except OSError: + pass + + +def run_benchmark( + lib_path: str, + mode: str, + benchmark_dir: str, + extra_args: Optional[List[str]] = None, + repetitions: int = 3, + benchmark_min_time: Optional[str] = None, + benchmark_jobs: int = 1, +) -> List[BenchmarkResult]: + """Run benchmark and parse JSON output. + + Uses --benchmark_out to write JSON results to a temporary file so that + the human-readable benchmark progress streams to stdout/stderr in real + time (important for CI visibility). + + When *repetitions* > 1, each benchmark runs N times and only the + median aggregate is kept, significantly reducing noise from ASLR and + shared-runner contention. + """ + if benchmark_jobs < 1: + print("::error::--benchmark-jobs must be >= 1") + sys.exit(2) + + return run_benchmark_parallel( + lib_path, + mode, + benchmark_dir, + extra_args, + repetitions, + benchmark_min_time, + benchmark_jobs, + ) def parse_benchmark_json(json_output: str) -> List[BenchmarkResult]: @@ -483,6 +698,12 @@ def main(): default=None, help="Minimum execution time per benchmark (e.g., '1s' or '2.0'). Forwarded to evmone-bench.", ) + parser.add_argument( + "--benchmark-jobs", + type=int, + default=1, + help="Run benchmark shards in parallel across CPUs (default: 1).", + ) args = parser.parse_args() @@ -502,6 +723,7 @@ def main(): extra_args=bench_extra, repetitions=args.benchmark_repetitions, benchmark_min_time=args.benchmark_min_time, + benchmark_jobs=args.benchmark_jobs, ) except Exception as e: print(f"::error::Failed to run benchmarks: {e}") From 1f7965ba3b695f1916033e867588a3b19746002b Mon Sep 17 00:00:00 2001 From: Abmcar Date: Sat, 16 May 2026 22:11:59 +0800 Subject: [PATCH 2/2] ci: address evmone benchmark review comments --- .ci/run_test_suite.sh | 19 +++++++++--- tools/check_performance_regression.py | 42 +++++++++++++++++++++++++-- 2 files changed, 54 insertions(+), 7 deletions(-) diff --git a/.ci/run_test_suite.sh b/.ci/run_test_suite.sh index bb03e1c70..a44ac2638 100644 --- a/.ci/run_test_suite.sh +++ b/.ci/run_test_suite.sh @@ -322,10 +322,21 @@ for STACK_TYPE in ${STACK_TYPES[@]}; do rm -rf "$EVMONE_DIR" git clone --depth 1 --recurse-submodules -b "$EVMONE_REF" "$EVMONE_REPO" "$EVMONE_DIR" fi - git -C "$EVMONE_DIR" remote set-url origin "$EVMONE_REPO" - git -C "$EVMONE_DIR" fetch --depth 1 origin "$EVMONE_REF" - git -C "$EVMONE_DIR" checkout --detach "$EVMONE_COMMIT" - git -C "$EVMONE_DIR" submodule update --init --recursive + + EVMONE_HEAD=$(git -C "$EVMONE_DIR" rev-parse HEAD 2>/dev/null || true) + EVMONE_SUBMODULES_READY=1 + if git -C "$EVMONE_DIR" submodule status --recursive | grep -q '^-'; then + EVMONE_SUBMODULES_READY=0 + fi + + if [ "$EVMONE_HEAD" = "$EVMONE_COMMIT" ] && [ "$EVMONE_SUBMODULES_READY" = 1 ]; then + echo "Using cached evmone at $EVMONE_COMMIT" + else + git -C "$EVMONE_DIR" remote set-url origin "$EVMONE_REPO" + git -C "$EVMONE_DIR" fetch --depth 1 origin "$EVMONE_REF" + git -C "$EVMONE_DIR" checkout --detach "$EVMONE_COMMIT" + git -C "$EVMONE_DIR" submodule update --init --recursive + fi BENCHMARK_THRESHOLD=${BENCHMARK_THRESHOLD:-0.15} BENCHMARK_MODE=${BENCHMARK_MODE:-multipass} diff --git a/tools/check_performance_regression.py b/tools/check_performance_regression.py index 96a779a06..ae96eeba7 100755 --- a/tools/check_performance_regression.py +++ b/tools/check_performance_regression.py @@ -26,6 +26,7 @@ import subprocess import sys import tempfile +import time from collections import Counter from dataclasses import dataclass from typing import Dict, List, Optional, Tuple @@ -151,6 +152,43 @@ def parse_benchmark_json_file(json_out_path: str) -> List[BenchmarkResult]: return parse_benchmark_json(json_data) +def stop_processes(processes: List[subprocess.Popen], timeout: float = 5.0) -> None: + running = [process for process in processes if process.poll() is None] + if not running: + return + + for process in running: + try: + process.terminate() + except OSError: + pass + + deadline = time.monotonic() + timeout + for process in running: + remaining = deadline - time.monotonic() + if remaining <= 0: + break + try: + process.wait(timeout=remaining) + except subprocess.TimeoutExpired: + pass + + still_running = [process for process in running if process.poll() is None] + for process in still_running: + try: + process.kill() + except OSError: + pass + + for process in still_running: + try: + process.wait(timeout=1.0) + except subprocess.TimeoutExpired: + print( + f"::warning::Benchmark shard process {process.pid} did not exit after kill" + ) + + def run_benchmark_shard( lib_path: str, mode: str, @@ -286,9 +324,7 @@ def run_benchmark_parallel( return sorted(results, key=lambda result: result.name) finally: - for process in processes: - if process.poll() is None: - process.terminate() + stop_processes(processes) for json_out_path in temp_paths: try: os.unlink(json_out_path)