From 3b28d03a444644b66d8b16b4c42ffec8700be4c0 Mon Sep 17 00:00:00 2001 From: Partouf Date: Sat, 27 Jun 2026 15:53:15 +0200 Subject: [PATCH 1/5] Add objdump parser benchmark with aggregated JSON output Add a Catch2 micro-benchmark for the ObjDumpParser pipeline (fromStream + outputJson) over the pre-captured objdump resource files, plus tooling to run it repeatedly and aggregate run-to-run statistics. - src/test/objdump_benchmark.cpp: BENCHMARK over four inputs of increasing size, tagged [!benchmark] so it stays out of the default test run. - src/test/CMakeLists.txt: compile the benchmark and enable Catch2's CATCH_CONFIG_ENABLE_BENCHMARKING. - scripts/run-benchmarks.sh: run N times, emit one Catch2 XML per run. - scripts/aggregate_bench.py: combine the XML runs into a summary JSON (mean of per-run means, run-to-run stddev, CV, min/max, per-run means). - .github/workflows/benchmark.yml: manual workflow that builds Release, runs the benchmarks, and uploads the summary + raw XML as an artifact. - .gitignore: ignore build-release/ and bench-results/. Co-Authored-By: Claude Opus 4.8 (1M context) --- .github/workflows/benchmark.yml | 55 +++++++++++++++++ .gitignore | 2 + scripts/aggregate_bench.py | 102 ++++++++++++++++++++++++++++++++ scripts/run-benchmarks.sh | 52 ++++++++++++++++ src/test/CMakeLists.txt | 4 +- src/test/objdump_benchmark.cpp | 71 ++++++++++++++++++++++ 6 files changed, 285 insertions(+), 1 deletion(-) create mode 100644 .github/workflows/benchmark.yml create mode 100755 scripts/aggregate_bench.py create mode 100755 scripts/run-benchmarks.sh create mode 100644 src/test/objdump_benchmark.cpp diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml new file mode 100644 index 0000000..26a07e9 --- /dev/null +++ b/.github/workflows/benchmark.yml @@ -0,0 +1,55 @@ +name: Benchmark + +# Manual trigger: run this to produce a benchmark summary you can download as an +# artifact (and promote to a baseline for future comparisons). +on: + workflow_dispatch: + inputs: + runs: + description: "Number of benchmark process runs to average" + required: false + default: "5" + +jobs: + benchmark: + runs-on: ubuntu-22.04 + + steps: + - uses: actions/checkout@v4 + + - name: Install Conan + id: conan + uses: turtlebrowser/get-conan@main + with: + version: 2.8.1 + + - name: Configure conan + run: | + conan profile detect + sed -i 's/compiler\.cppstd=.*/compiler.cppstd=20/g' ~/.conan2/profiles/default + conan install --build=fmt/11.0.0 . + + - name: Build (Release) + run: | + CURDIR=$PWD + mkdir -p build-release + cd build-release + cmake "-DCMAKE_PROJECT_TOP_LEVEL_INCLUDES=$CURDIR/conan_provider.cmake" -DCMAKE_C_COMPILER=/usr/bin/gcc-10 -DCMAKE_CXX_COMPILER=/usr/bin/g++-10 -DCMAKE_BUILD_TYPE=Release .. + cmake --build . + + - name: Run benchmarks + env: + BENCH_RUNS: ${{ github.event.inputs.runs }} + BENCH_LABEL: ${{ runner.os }}/${{ runner.arch }} + run: | + ./scripts/run-benchmarks.sh + + - name: Show summary + run: cat bench-results/bench-summary.json + + - uses: actions/upload-artifact@v4 + with: + name: bench-summary + path: | + bench-results/bench-summary.json + bench-results/run-*.xml diff --git a/.gitignore b/.gitignore index 6c2375d..ea0deb9 100644 --- a/.gitignore +++ b/.gitignore @@ -32,6 +32,8 @@ *.app build/ +build-release/ +bench-results/ CMakeUserPresets.json # CLion stuff diff --git a/scripts/aggregate_bench.py b/scripts/aggregate_bench.py new file mode 100755 index 0000000..815d5fd --- /dev/null +++ b/scripts/aggregate_bench.py @@ -0,0 +1,102 @@ +#!/usr/bin/env python3 +"""Aggregate one or more Catch2 (v2) XML benchmark runs into a summary JSON. + +Each input XML is one process run of the benchmark (which itself takes many +samples). This script combines the per-run means for each benchmark into +run-to-run statistics suitable for storing as a CI artifact / baseline. + +Usage: + aggregate_bench.py run-*.xml -o bench-summary.json + aggregate_bench.py run-1.xml run-2.xml --commit "$(git rev-parse HEAD)" + +All timing values in the Catch2 XML are nanoseconds; the summary keeps raw +nanoseconds and adds millisecond convenience values. +""" +import argparse +import datetime +import json +import statistics +import sys +import xml.etree.ElementTree as ET + + +def parse_run(path): + """Return {benchmark_name: {mean_ns, stddev_ns, samples}} for one XML run.""" + root = ET.parse(path).getroot() + out = {} + for bench in root.iter("BenchmarkResults"): + mean = bench.find("mean") + stddev = bench.find("standardDeviation") + out[bench.attrib["name"]] = { + "mean_ns": float(mean.attrib["value"]) if mean is not None else None, + "stddev_ns": float(stddev.attrib["value"]) if stddev is not None else None, + "samples": int(bench.attrib["samples"]), + } + return out + + +def aggregate(paths): + runs = [parse_run(p) for p in paths] + # Preserve first-seen order of benchmark names. + names = [] + for run in runs: + for name in run: + if name not in names: + names.append(name) + + benchmarks = [] + for name in names: + means = [run[name]["mean_ns"] for run in runs if name in run] + within = [run[name]["stddev_ns"] for run in runs if name in run] + samples = next(run[name]["samples"] for run in runs if name in run) + + mean_ns = statistics.fmean(means) + across_ns = statistics.stdev(means) if len(means) > 1 else 0.0 + benchmarks.append({ + "name": name, + "runs": len(means), + "samples_per_run": samples, + "mean_ns": mean_ns, + "mean_ms": mean_ns / 1e6, + "stddev_across_runs_ns": across_ns, + "stddev_across_runs_ms": across_ns / 1e6, + "cv_percent": (across_ns / mean_ns * 100.0) if mean_ns else 0.0, + "min_ms": min(means) / 1e6, + "max_ms": max(means) / 1e6, + "mean_within_run_stddev_ms": (statistics.fmean(within) / 1e6) if within else None, + "per_run_mean_ms": [m / 1e6 for m in means], + }) + return benchmarks + + +def main(): + ap = argparse.ArgumentParser(description=__doc__, + formatter_class=argparse.RawDescriptionHelpFormatter) + ap.add_argument("xml", nargs="+", help="Catch2 XML run file(s)") + ap.add_argument("-o", "--output", help="JSON output file (default: stdout)") + ap.add_argument("--commit", default="", help="git commit SHA to record") + ap.add_argument("--ref", default="", help="git ref/branch to record") + ap.add_argument("--label", default="", help="free-form label (e.g. runner/build)") + args = ap.parse_args() + + summary = { + "schema": "asm-parser-bench/1", + "generated_utc": datetime.datetime.now(datetime.timezone.utc).isoformat(), + "commit": args.commit, + "ref": args.ref, + "label": args.label, + "run_count": len(args.xml), + "benchmarks": aggregate(args.xml), + } + text = json.dumps(summary, indent=2) + + if args.output: + with open(args.output, "w") as f: + f.write(text + "\n") + print(f"Wrote {args.output}", file=sys.stderr) + else: + print(text) + + +if __name__ == "__main__": + main() diff --git a/scripts/run-benchmarks.sh b/scripts/run-benchmarks.sh new file mode 100755 index 0000000..6b92dbf --- /dev/null +++ b/scripts/run-benchmarks.sh @@ -0,0 +1,52 @@ +#!/usr/bin/env bash +# +# Run the objdump benchmark N times and aggregate the per-run Catch2 XML into a +# single summary JSON (bench-results/bench-summary.json by default). +# +# Env overrides: +# BENCH_RUNS number of process runs (default: 5) +# BENCH_FILTER Catch2 test filter (default: [objdump]) +# BENCH_TEST_BIN path to asm-parser-test (default: build-release/src/test/asm-parser-test) +# BENCH_OUTDIR where to write XML + summary (default: bench-results) +# +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" +REPO_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" +cd "$REPO_ROOT" + +RUNS="${BENCH_RUNS:-5}" +FILTER="${BENCH_FILTER:-[objdump]}" +TEST_BIN="${BENCH_TEST_BIN:-build-release/src/test/asm-parser-test}" +OUTDIR="${BENCH_OUTDIR:-bench-results}" + +if [ ! -x "$TEST_BIN" ]; then + echo "error: test binary not found/executable at '$TEST_BIN'" >&2 + echo "build a Release first, e.g.:" >&2 + echo " cmake -B build-release -G Ninja -S . -DCMAKE_BUILD_TYPE=Release && cmake --build build-release" >&2 + exit 1 +fi + +TEST_DIR="$(cd "$(dirname "$TEST_BIN")" && pwd)" +TEST_EXE="$TEST_DIR/$(basename "$TEST_BIN")" + +mkdir -p "$OUTDIR" +ABS_OUTDIR="$(cd "$OUTDIR" && pwd)" + +xmls=() +for i in $(seq 1 "$RUNS"); do + xml="$ABS_OUTDIR/run-$i.xml" + echo "Benchmark run $i/$RUNS -> $OUTDIR/run-$i.xml" + # The test resolves resource paths relative to its own working dir. + ( cd "$TEST_DIR" && "$TEST_EXE" "$FILTER" -r xml -o "$xml" ) + xmls+=("$xml") +done + +COMMIT="$(git -C "$REPO_ROOT" rev-parse HEAD 2>/dev/null || echo "")" +REF="$(git -C "$REPO_ROOT" rev-parse --abbrev-ref HEAD 2>/dev/null || echo "")" + +python3 "$SCRIPT_DIR/aggregate_bench.py" "${xmls[@]}" \ + --commit "$COMMIT" --ref "$REF" --label "${BENCH_LABEL:-}" \ + -o "$ABS_OUTDIR/bench-summary.json" + +echo "Summary written to $OUTDIR/bench-summary.json" diff --git a/src/test/CMakeLists.txt b/src/test/CMakeLists.txt index 897a3b2..6fa683f 100644 --- a/src/test/CMakeLists.txt +++ b/src/test/CMakeLists.txt @@ -1,3 +1,5 @@ -add_executable(asm-parser-test test_test.cpp asmtext_filter_tests.cpp) +add_executable(asm-parser-test test_test.cpp asmtext_filter_tests.cpp objdump_benchmark.cpp) target_link_libraries(asm-parser-test asm-parser-lib) +# Enable Catch2's built-in micro-benchmarking support (BENCHMARK macro). +target_compile_definitions(asm-parser-test PRIVATE CATCH_CONFIG_ENABLE_BENCHMARKING) add_test(NAME asm-parser-test COMMAND $) diff --git a/src/test/objdump_benchmark.cpp b/src/test/objdump_benchmark.cpp new file mode 100644 index 0000000..9c4da41 --- /dev/null +++ b/src/test/objdump_benchmark.cpp @@ -0,0 +1,71 @@ +#include + +#include "../objdump/parser.hpp" +#include "../types/filter.hpp" + +#include +#include +#include +#include + +namespace +{ + +std::string resourcePath(const std::string &name) +{ + if (std::filesystem::current_path().string().ends_with("test")) + return "../../../resources/" + name; + return "../../resources/" + name; +} + +std::string readResource(const std::string &name) +{ + std::ifstream fs(resourcePath(name), std::ios::binary); + REQUIRE(fs.is_open() == true); + std::stringstream ss; + ss << fs.rdbuf(); + return ss.str(); +} + +AsmParser::Filter defaultBinaryFilter() +{ + AsmParser::Filter filter; + filter.binary = true; + filter.plt = true; + filter.library_functions = true; + filter.unused_labels = true; + return filter; +} + +// Parse a buffer with a fresh parser and produce the JSON output, exercising the +// full objdump pipeline the way main.cpp does for a -binary run. Returns the +// output size so Catch keeps the result and the work isn't optimised away. +size_t parseObjDump(const std::string &input, const AsmParser::Filter &filter) +{ + AsmParser::ObjDumpParser parser(filter); + std::istringstream in(input); + parser.fromStream(in); + + std::ostringstream out; + parser.outputJson(out); + return out.str().size(); +} + +} // namespace + +TEST_CASE("objdump parser benchmark", "[!benchmark][objdump]") +{ + const auto filter = defaultBinaryFilter(); + + // The asm files here are pre-captured objdump output of various sizes. + for (const auto &name : {"example.asm", "example_intel.asm", "gcc12_sort_object_reloc.asm", + "gcc12_bin_fmt_O2_flto.asm"}) + { + const std::string input = readResource(name); + + BENCHMARK(std::string("parse ") + name) + { + return parseObjDump(input, filter); + }; + } +} From 7c6b98147baf84e0e3e8ab0f3938422553182925 Mon Sep 17 00:00:00 2001 From: Partouf Date: Sat, 27 Jun 2026 15:57:41 +0200 Subject: [PATCH 2/5] Run benchmark workflow automatically on push and PR Trigger the benchmark workflow on push to main and on pull requests (matching build.yml), instead of only on manual dispatch. Keep workflow_dispatch for ad-hoc reruns. Co-Authored-By: Claude Opus 4.8 (1M context) --- .github/workflows/benchmark.yml | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml index 26a07e9..544b088 100644 --- a/.github/workflows/benchmark.yml +++ b/.github/workflows/benchmark.yml @@ -1,8 +1,13 @@ name: Benchmark -# Manual trigger: run this to produce a benchmark summary you can download as an -# artifact (and promote to a baseline for future comparisons). +# Runs automatically on every push to main and on pull requests, producing a +# benchmark summary artifact (the main-branch runs can be promoted to a baseline +# for future comparisons). Also exposes a manual trigger for ad-hoc reruns. on: + push: + branches: [ main ] + pull_request: + branches: [ main ] workflow_dispatch: inputs: runs: From 5f5c2c28a30ef58e02d71509cb2e24ed33a4cdf5 Mon Sep 17 00:00:00 2001 From: Partouf Date: Sat, 27 Jun 2026 15:58:02 +0200 Subject: [PATCH 3/5] Run benchmark workflow on all branches Drop the branch filters so the benchmark runs on push to any branch. Co-Authored-By: Claude Opus 4.8 (1M context) --- .github/workflows/benchmark.yml | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml index 544b088..3c8b505 100644 --- a/.github/workflows/benchmark.yml +++ b/.github/workflows/benchmark.yml @@ -1,13 +1,11 @@ name: Benchmark -# Runs automatically on every push to main and on pull requests, producing a -# benchmark summary artifact (the main-branch runs can be promoted to a baseline -# for future comparisons). Also exposes a manual trigger for ad-hoc reruns. +# Runs automatically on every push to any branch and on pull requests, producing +# a benchmark summary artifact (the main-branch runs can be promoted to a +# baseline for future comparisons). Also exposes a manual trigger for ad-hoc reruns. on: push: - branches: [ main ] pull_request: - branches: [ main ] workflow_dispatch: inputs: runs: From f8c43454198ce729bdf152b26f15d023e9cc434d Mon Sep 17 00:00:00 2001 From: Partouf Date: Sat, 27 Jun 2026 16:05:38 +0200 Subject: [PATCH 4/5] Update GitHub Actions to latest versions Bump all actions to their latest releases to clear the Node.js 20 deprecation warnings (runners now default to Node 24): - actions/checkout@v4 -> @v7 (Node 24) - actions/upload-artifact@v4 -> @v7 (Node 24) - turtlebrowser/get-conan@main -> @v1.2 (pin to release) - fnkr/github-action-ghr@v1 -> @v1.3 Co-Authored-By: Claude Opus 4.8 (1M context) --- .github/workflows/benchmark.yml | 6 +++--- .github/workflows/build.yml | 8 ++++---- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml index 3c8b505..80da0b2 100644 --- a/.github/workflows/benchmark.yml +++ b/.github/workflows/benchmark.yml @@ -18,11 +18,11 @@ jobs: runs-on: ubuntu-22.04 steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v7 - name: Install Conan id: conan - uses: turtlebrowser/get-conan@main + uses: turtlebrowser/get-conan@v1.2 with: version: 2.8.1 @@ -50,7 +50,7 @@ jobs: - name: Show summary run: cat bench-results/bench-summary.json - - uses: actions/upload-artifact@v4 + - uses: actions/upload-artifact@v7 with: name: bench-summary path: | diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 6f85850..f9bb428 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -12,11 +12,11 @@ jobs: runs-on: ubuntu-22.04 steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v7 - name: Install Conan id: conan - uses: turtlebrowser/get-conan@main + uses: turtlebrowser/get-conan@v1.2 with: version: 2.8.1 @@ -42,12 +42,12 @@ jobs: run: | $GITHUB_WORKSPACE/build/src/asm-parser $GITHUB_WORKSPACE/resources/example_intel.asm > ./example_intel.json - - uses: actions/upload-artifact@v4 + - uses: actions/upload-artifact@v7 with: path: build/src/asm-parser - name: Release - uses: fnkr/github-action-ghr@v1 + uses: fnkr/github-action-ghr@v1.3 if: startsWith(github.ref, 'refs/tags/') env: GHR_COMPRESS: xz From a912400b6e88af7cb5f0f4aaf4840b3bc2d6e285 Mon Sep 17 00:00:00 2001 From: Partouf Date: Sat, 27 Jun 2026 16:13:38 +0200 Subject: [PATCH 5/5] Add benchmark baseline and regression comparison in CI Capture a baseline benchmark summary (5 runs, GitHub runner) and compare each new run against it in the workflow. - bench-results/baseline.json: committed baseline; .gitignore keeps it tracked while ignoring other generated benchmark output. - scripts/compare_bench.py: compares a summary against the baseline, prints a per-benchmark delta table, writes a Markdown table to the GitHub step summary, and exits non-zero on a regression beyond the threshold (default 10%). - benchmark.yml: run the comparison after generating the summary; upload the artifact even on failure. Co-Authored-By: Claude Opus 4.8 (1M context) --- .github/workflows/benchmark.yml | 10 ++++ .gitignore | 4 +- bench-results/baseline.json | 90 ++++++++++++++++++++++++++++ scripts/compare_bench.py | 101 ++++++++++++++++++++++++++++++++ 4 files changed, 204 insertions(+), 1 deletion(-) create mode 100644 bench-results/baseline.json create mode 100755 scripts/compare_bench.py diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml index 80da0b2..6a1239c 100644 --- a/.github/workflows/benchmark.yml +++ b/.github/workflows/benchmark.yml @@ -50,7 +50,17 @@ jobs: - name: Show summary run: cat bench-results/bench-summary.json + - name: Compare against baseline + run: | + python3 scripts/compare_bench.py \ + --baseline bench-results/baseline.json \ + --current bench-results/bench-summary.json \ + --threshold "${BENCH_THRESHOLD:-10}" + env: + BENCH_THRESHOLD: "10" + - uses: actions/upload-artifact@v7 + if: always() with: name: bench-summary path: | diff --git a/.gitignore b/.gitignore index ea0deb9..8b314ae 100644 --- a/.gitignore +++ b/.gitignore @@ -33,7 +33,9 @@ build/ build-release/ -bench-results/ +# ignore generated benchmark output, but keep the committed baseline +/bench-results/* +!/bench-results/baseline.json CMakeUserPresets.json # CLion stuff diff --git a/bench-results/baseline.json b/bench-results/baseline.json new file mode 100644 index 0000000..5e49968 --- /dev/null +++ b/bench-results/baseline.json @@ -0,0 +1,90 @@ +{ + "schema": "asm-parser-bench/1", + "generated_utc": "2026-06-27T14:08:48.248475+00:00", + "commit": "c3440213b27ecae1583337c4b446991da21341a0", + "ref": "HEAD", + "label": "Linux/X64", + "run_count": 5, + "benchmarks": [ + { + "name": "parse example.asm", + "runs": 5, + "samples_per_run": 100, + "mean_ns": 155782.8, + "mean_ms": 0.1557828, + "stddev_across_runs_ns": 1852.4474891343075, + "stddev_across_runs_ms": 0.0018524474891343074, + "cv_percent": 1.1891219628446192, + "min_ms": 0.154184, + "max_ms": 0.158972, + "mean_within_run_stddev_ms": 0.016918420000000003, + "per_run_mean_ms": [ + 0.155571, + 0.155049, + 0.154184, + 0.155138, + 0.158972 + ] + }, + { + "name": "parse example_intel.asm", + "runs": 5, + "samples_per_run": 100, + "mean_ns": 7681760.0, + "mean_ms": 7.68176, + "stddev_across_runs_ns": 8631.949374272303, + "stddev_across_runs_ms": 0.008631949374272302, + "cv_percent": 0.11236942281810812, + "min_ms": 7.67117, + "max_ms": 7.69446, + "mean_within_run_stddev_ms": 0.08032122, + "per_run_mean_ms": [ + 7.67117, + 7.69446, + 7.67717, + 7.68252, + 7.68348 + ] + }, + { + "name": "parse gcc12_sort_object_reloc.asm", + "runs": 5, + "samples_per_run": 100, + "mean_ns": 6399328.0, + "mean_ms": 6.399328, + "stddev_across_runs_ns": 95071.93550149276, + "stddev_across_runs_ms": 0.09507193550149276, + "cv_percent": 1.4856549859843526, + "min_ms": 6.33027, + "max_ms": 6.56389, + "mean_within_run_stddev_ms": 0.33537957999999995, + "per_run_mean_ms": [ + 6.56389, + 6.37586, + 6.38739, + 6.33027, + 6.33923 + ] + }, + { + "name": "parse gcc12_bin_fmt_O2_flto.asm", + "runs": 5, + "samples_per_run": 100, + "mean_ns": 117524800.0, + "mean_ms": 117.5248, + "stddev_across_runs_ns": 272434.76283323317, + "stddev_across_runs_ms": 0.27243476283323315, + "cv_percent": 0.231810445823548, + "min_ms": 117.144, + "max_ms": 117.791, + "mean_within_run_stddev_ms": 0.9706758000000001, + "per_run_mean_ms": [ + 117.604, + 117.791, + 117.735, + 117.35, + 117.144 + ] + } + ] +} diff --git a/scripts/compare_bench.py b/scripts/compare_bench.py new file mode 100755 index 0000000..9912014 --- /dev/null +++ b/scripts/compare_bench.py @@ -0,0 +1,101 @@ +#!/usr/bin/env python3 +"""Compare a benchmark summary against a committed baseline. + +Reads two summary JSON files produced by aggregate_bench.py and reports the +per-benchmark delta. Exits non-zero if any benchmark regressed (got slower) by +more than the threshold, so it can gate a CI job. + +Usage: + compare_bench.py --baseline bench-results/baseline.json \ + --current bench-results/bench-summary.json \ + --threshold 10 + +A Markdown table is written to $GITHUB_STEP_SUMMARY when that env var is set. +""" +import argparse +import json +import os +import sys + + +def load(path): + with open(path) as f: + data = json.load(f) + return {b["name"]: b for b in data.get("benchmarks", [])}, data + + +def main(): + ap = argparse.ArgumentParser(description=__doc__, + formatter_class=argparse.RawDescriptionHelpFormatter) + ap.add_argument("--baseline", required=True) + ap.add_argument("--current", required=True) + ap.add_argument("--threshold", type=float, default=10.0, + help="max allowed regression in percent (default: 10)") + args = ap.parse_args() + + base, base_meta = load(args.baseline) + cur, cur_meta = load(args.current) + + rows = [] + regressions = [] + for name, c in cur.items(): + b = base.get(name) + cur_ms = c["mean_ms"] + if b is None: + rows.append((name, None, cur_ms, None, "NEW")) + continue + base_ms = b["mean_ms"] + delta = (cur_ms - base_ms) / base_ms * 100.0 if base_ms else 0.0 + status = "ok" + if delta > args.threshold: + status = "REGRESSION" + regressions.append((name, delta)) + elif delta < -args.threshold: + status = "improved" + rows.append((name, base_ms, cur_ms, delta, status)) + + for name in base: + if name not in cur: + rows.append((name, base[name]["mean_ms"], None, None, "MISSING")) + + # Console report + print(f"Baseline commit: {base_meta.get('commit', '?')}") + print(f"Current commit: {cur_meta.get('commit', '?')}") + print(f"Threshold: ±{args.threshold:.1f}%\n") + header = f"{'benchmark':<36} {'baseline':>12} {'current':>12} {'delta':>9} status" + print(header) + print("-" * len(header)) + for name, base_ms, cur_ms, delta, status in rows: + b = f"{base_ms:.4f} ms" if base_ms is not None else "—" + c = f"{cur_ms:.4f} ms" if cur_ms is not None else "—" + d = f"{delta:+.2f}%" if delta is not None else "—" + print(f"{name:<36} {b:>12} {c:>12} {d:>9} {status}") + + # GitHub step summary (Markdown) + step_summary = os.environ.get("GITHUB_STEP_SUMMARY") + if step_summary: + with open(step_summary, "a") as f: + f.write("## Benchmark vs baseline\n\n") + f.write(f"Threshold: ±{args.threshold:.1f}% · " + f"baseline `{base_meta.get('commit', '?')[:12]}`\n\n") + f.write("| benchmark | baseline | current | delta | status |\n") + f.write("|---|--:|--:|--:|:--|\n") + for name, base_ms, cur_ms, delta, status in rows: + b = f"{base_ms:.4f} ms" if base_ms is not None else "—" + c = f"{cur_ms:.4f} ms" if cur_ms is not None else "—" + d = f"{delta:+.2f}%" if delta is not None else "—" + emoji = {"REGRESSION": "🔴", "improved": "🟢", "ok": "⚪", + "NEW": "🆕", "MISSING": "⚠️"}.get(status, "") + f.write(f"| {name} | {b} | {c} | {d} | {emoji} {status} |\n") + + if regressions: + print(f"\n{len(regressions)} benchmark(s) regressed beyond ±{args.threshold:.1f}%:") + for name, delta in regressions: + print(f" - {name}: {delta:+.2f}%") + return 1 + print("\nNo regressions beyond threshold.") + return 0 + + +if __name__ == "__main__": + sys.exit(main())