From 12bdc107aad9b5fcb559a7f6fbc28d3b1d05d181 Mon Sep 17 00:00:00 2001 From: "Flavio S. Glock" Date: Tue, 21 Apr 2026 19:36:25 +0200 Subject: [PATCH] =?UTF-8?q?bench:=20=C2=A70.1=20=E2=80=94=20COMPARE=3Dperl?= =?UTF-8?q?,=20life=5Fbitpacked,=20markdown=20output?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Prerequisite for the §0 parity work (see dev/design/next_steps.md). Without side-by-side `jperl` vs `perl` numbers, perf changes are hard to evaluate and regressions (like the one currently shipped in PR #526) slip in unnoticed. Changes to dev/bench/run_baseline.sh: - COMPARE=perl: when set, each bench runs under both jperl and system perl (configurable via PERL=/path/to/perl); ratio + parity marker are emitted in the markdown summary. - life_bitpacked: if examples/life_bitpacked.pl exists, run it with `-r none -g 500` and parse "Cell updates per second" from stdout. Tracked as Mcells/s rather than wallclock seconds, so higher = faster (ratio is inverted: perl / jperl). - Dual output: baseline-.json (machine-readable) AND baseline-.md (human-readable table). - SKIP_LIFE=1 to opt out of the life bench (e.g. in CI where startup cost dominates short runs). - bash 3.2 compatible (macOS default) — no associative arrays. Add dev/bench/README.md documenting the harness, workloads, and link to the parity plan. Current baseline (PR #526 tip, BENCH_RUNS=1 sanity run, macOS M-series, perl 5.42): lexical 0.40× ✅ string 0.62× ✅ closure 1.39× ❌ regex 1.36× ❌ method 1.78× ❌ global 1.92× ❌ refcount_anon 3.82× ❌ eval_string 4.68× ❌ anon_simple 5.43× ❌ refcount_bless 6.67× ❌ life_bitpacked ~3.17× ❌ (6.47 Mcells/s vs 20.52) 3 runs per bench (default) amortizes JVM startup better than the single-run shown above. Generated with [Devin](https://cli.devin.ai/docs) Co-Authored-By: Devin <158243242+devin-ai-integration[bot]@users.noreply.github.com> --- dev/bench/README.md | 54 +++++++++ dev/bench/run_baseline.sh | 241 ++++++++++++++++++++++++++++++++------ 2 files changed, 257 insertions(+), 38 deletions(-) create mode 100644 dev/bench/README.md diff --git a/dev/bench/README.md b/dev/bench/README.md new file mode 100644 index 000000000..8a65ac356 --- /dev/null +++ b/dev/bench/README.md @@ -0,0 +1,54 @@ +# PerlOnJava microbenchmarks + +Tiny workloads to catch per-call / per-op regressions that CPAN test +suites don't surface cleanly. The headline goal is **parity with +system `perl`** on every benchmark here (≤ 1.0× wallclock). + +## Run + +```bash +# Fast: jperl only (default 3 runs per bench) +dev/bench/run_baseline.sh + +# Side-by-side with system perl (gives ratio + parity marker) +COMPARE=perl dev/bench/run_baseline.sh + +# Other knobs +BENCH_RUNS=5 dev/bench/run_baseline.sh +PERL=/opt/homebrew/bin/perl COMPARE=perl dev/bench/run_baseline.sh +SKIP_LIFE=1 dev/bench/run_baseline.sh +``` + +Outputs for ``: + +- `results/baseline-.json` — machine-readable +- `results/baseline-.md` — human-readable markdown table + +With `COMPARE=perl` the markdown has a `ratio` column and a parity +marker: **✅** (≤1.0×), **≈** (≤1.2×), **❌** (>1.2×). + +## Workloads + +| File | Measures | +|---|---| +| `benchmark_anon_simple.pl` | anon-sub creation churn (no blessing) | +| `benchmark_closure.pl` | closure capture + invoke | +| `benchmark_eval_string.pl` | `eval "..."` compile+run overhead | +| `benchmark_global.pl` | package-global variable access | +| `benchmark_lexical.pl` | `my` variable access | +| `benchmark_method.pl` | OO method dispatch (inline cache hot) | +| `benchmark_refcount_anon.pl` | anon-sub + refcount traffic (plain refs) | +| `benchmark_refcount_bless.pl` | anon-sub + blessed refs (walker / DESTROY machinery) | +| `benchmark_regex.pl` | regex compile+match on hot path | +| `benchmark_string.pl` | concat / substr / index | +| `benchmark_memory*.pl` | memory footprint (not in the baseline loop) | +| `examples/life_bitpacked.pl` | real workload (Conway bit-packed) — reports Mcells/s instead of wallclock seconds | + +## Historical baselines + +`results/` keeps per-sha snapshots. Treat anything before 2026-04-21 +(PR #526 merge) as the old single-column format (jperl-only, no +`perl` comparison, no markdown). + +See [`dev/design/next_steps.md`](../design/next_steps.md) §0 for the +parity plan and current gap analysis. diff --git a/dev/bench/run_baseline.sh b/dev/bench/run_baseline.sh index 4bdf22381..0203160e7 100755 --- a/dev/bench/run_baseline.sh +++ b/dev/bench/run_baseline.sh @@ -1,14 +1,20 @@ #!/usr/bin/env bash # run_baseline.sh -# Captures timings for every benchmark_*.pl under dev/bench and writes a -# JSON-ish summary to dev/bench/results/.json. +# Capture wallclock timings for every benchmark_*.pl under dev/bench and +# (optionally) the life_bitpacked example, for both jperl and — if +# COMPARE=perl is set — system perl. Results are written to: +# dev/bench/results/baseline-.json (machine-readable) +# dev/bench/results/baseline-.md (human-readable table) # # Usage: -# dev/bench/run_baseline.sh # runs against jperl -# BENCH_RUNS=5 dev/bench/run_baseline.sh # repeat each bench 5 times -# COMPARE=perl dev/bench/run_baseline.sh # also run each with system perl +# dev/bench/run_baseline.sh # jperl only +# COMPARE=perl dev/bench/run_baseline.sh # side-by-side with system perl +# BENCH_RUNS=5 dev/bench/run_baseline.sh # 5 runs per bench (default 3) +# PERL=/path/to/perl COMPARE=perl dev/bench/run_baseline.sh # specific perl +# SKIP_LIFE=1 dev/bench/run_baseline.sh # skip examples/life_bitpacked # -# The output is intentionally hand-written JSON (no deps) so it's stable in diffs. +# The JSON output is hand-written so it's stable in diffs (no jq dep). +# Written for bash 3.2+ (macOS default) — no associative arrays. set -u @@ -16,56 +22,215 @@ REPO_ROOT="$(cd "$(dirname "$0")/../.." && pwd)" cd "$REPO_ROOT" JPERL="${JPERL:-$REPO_ROOT/jperl}" +PERL="${PERL:-perl}" SHA="$(git rev-parse --short HEAD 2>/dev/null || echo unknown)" BENCH_RUNS="${BENCH_RUNS:-3}" +COMPARE="${COMPARE:-}" +SKIP_LIFE="${SKIP_LIFE:-}" + OUT_DIR="$REPO_ROOT/dev/bench/results" mkdir -p "$OUT_DIR" -OUT_FILE="$OUT_DIR/baseline-$SHA.json" +OUT_JSON="$OUT_DIR/baseline-$SHA.json" +OUT_MD="$OUT_DIR/baseline-$SHA.md" if [ ! -x "$JPERL" ]; then echo "ERROR: $JPERL not found or not executable — run 'make dev' first" >&2 exit 1 fi -echo "Writing results to: $OUT_FILE" +PERL_VERSION="" +if [ -n "$COMPARE" ]; then + if ! command -v "$PERL" >/dev/null 2>&1; then + echo "ERROR: COMPARE=$COMPARE was requested but '$PERL' is not on PATH" >&2 + exit 1 + fi + PERL_VERSION="$("$PERL" -e 'print $]' 2>/dev/null || echo unknown)" +fi + +echo "Writing JSON to: $OUT_JSON" +echo "Writing MD to: $OUT_MD" echo "Runs per benchmark: $BENCH_RUNS" +[ -n "$COMPARE" ] && echo "Comparing against: $PERL (version $PERL_VERSION)" echo +# -- Benchmark runners --------------------------------------------------- +# run_times +# echoes BENCH_RUNS wallclock seconds, comma-separated. +run_times() { + local bin="$1" bench="$2" + local times=() t i + for i in $(seq 1 "$BENCH_RUNS"); do + t=$({ TIMEFORMAT='%R'; time "$bin" "$bench" >/dev/null 2>&1; } 2>&1) + times+=("$t") + done + (IFS=,; echo "${times[*]}") +} + +# life_mcells +# Runs life_bitpacked.pl with fixed args and extracts Mcells/s values. +life_mcells() { + local bin="$1" + local values=() v i + for i in $(seq 1 "$BENCH_RUNS"); do + v=$("$bin" examples/life_bitpacked.pl -r none -g 500 2>/dev/null \ + | grep -oE 'Cell updates per second: [0-9.]+ Mcells/s' \ + | grep -oE '[0-9.]+' | head -1) + [ -z "$v" ] && v="0" + values+=("$v") + done + (IFS=,; echo "${values[*]}") +} + +# -- Parallel indexed arrays (bash 3.2 compatible) ----------------------- +BENCH_NAMES=() +BENCH_UNITS=() +BENCH_JPERL=() +BENCH_PERL=() + +# push_result +push_result() { + BENCH_NAMES+=("$1") + BENCH_UNITS+=("$2") + BENCH_JPERL+=("$3") + BENCH_PERL+=("$4") +} + +for bench in dev/bench/benchmark_*.pl; do + name="$(basename "$bench" .pl)" + case "$name" in + benchmark_memory|benchmark_memory_delta) continue ;; + esac + + echo " [jperl] $name" >&2 + jtimes="$(run_times "$JPERL" "$bench")" + + ptimes="" + if [ -n "$COMPARE" ]; then + echo " [perl] $name" >&2 + ptimes="$(run_times "$PERL" "$bench")" + fi + + push_result "$name" "s" "$jtimes" "$ptimes" +done + +if [ -z "$SKIP_LIFE" ] && [ -f "examples/life_bitpacked.pl" ]; then + name="life_bitpacked" + echo " [jperl] $name" >&2 + jvals="$(life_mcells "$JPERL")" + pvals="" + if [ -n "$COMPARE" ]; then + echo " [perl] $name" >&2 + pvals="$(life_mcells "$PERL")" + fi + push_result "$name" "Mcells/s" "$jvals" "$pvals" +fi + +# -- Helpers -------------------------------------------------------------- +# avg_csv "1.0,2.0,3.0" -> "2.000" +avg_csv() { + awk -v s="$1" 'BEGIN{ + n = split(s, a, ","); sum = 0; + for (i = 1; i <= n; i++) sum += a[i]; + if (n == 0) { print "0.000"; exit } + printf "%.3f", sum / n; + }' +} + +# ratio "a" "b" [higher_better] -> printed ratio +ratio() { + awk -v a="$1" -v b="$2" -v h="${3:-0}" 'BEGIN{ + if (a == 0 || b == 0) { print "inf"; exit } + r = (h == "1") ? b / a : a / b; + printf "%.2f", r; + }' +} + +# -- Emit JSON ------------------------------------------------------------ { echo "{" - echo " \"git_sha\": \"$SHA\"," - echo " \"date\": \"$(date -u +%Y-%m-%dT%H:%M:%SZ)\"," - echo " \"runs\": $BENCH_RUNS," - echo " \"jperl\": \"$JPERL\"," - echo " \"benchmarks\": {" - - first=1 - for bench in dev/bench/benchmark_*.pl; do - name="$(basename "$bench" .pl)" - # Skip memory benches from baseline loop (they are slow + already - # write their own output files). - case "$name" in - benchmark_memory|benchmark_memory_delta) continue ;; - esac - - echo " -> $name" >&2 - times=() - for i in $(seq 1 "$BENCH_RUNS"); do - # Use Bash's builtin time for wallclock, captured via redirection. - # Some benches print to stdout; discard it. - t=$({ TIMEFORMAT='%R'; time "$JPERL" "$bench" >/dev/null 2>&1; } 2>&1) - times+=("$t") - done - - [ $first -eq 0 ] && echo "," - first=0 - printf ' "%s": [%s]' "$name" "$(IFS=,; echo "${times[*]}")" + echo " \"git_sha\": \"$SHA\"," + echo " \"date\": \"$(date -u +%Y-%m-%dT%H:%M:%SZ)\"," + echo " \"runs\": $BENCH_RUNS," + echo " \"jperl\": \"$JPERL\"," + if [ -n "$COMPARE" ]; then + echo " \"perl\": \"$PERL\"," + echo " \"perl_version\": \"$PERL_VERSION\"," + fi + echo " \"benchmarks\": {" + + n=${#BENCH_NAMES[@]} + for i in $(seq 0 $((n - 1))); do + [ $i -gt 0 ] && echo "," + name="${BENCH_NAMES[$i]}" + unit="${BENCH_UNITS[$i]}" + jv="${BENCH_JPERL[$i]}" + pv="${BENCH_PERL[$i]}" + if [ -n "$COMPARE" ]; then + printf ' "%s": { "unit": "%s", "jperl": [%s], "perl": [%s] }' \ + "$name" "$unit" "$jv" "$pv" + else + printf ' "%s": { "unit": "%s", "jperl": [%s] }' "$name" "$unit" "$jv" + fi done echo echo " }" echo "}" -} > "$OUT_FILE" +} > "$OUT_JSON" + +# -- Emit Markdown -------------------------------------------------------- +{ + echo "# Benchmark baseline — $SHA" + echo + echo "**Date:** $(date -u +%Y-%m-%dT%H:%M:%SZ)" + echo "**Runs per benchmark:** $BENCH_RUNS" + echo "**jperl:** \`$JPERL\`" + if [ -n "$COMPARE" ]; then + echo "**perl:** \`$PERL\` ($PERL_VERSION)" + echo + echo "For \"time\" benches lower = faster; ratio is \`jperl / perl\`." + echo "For \"Mcells/s\" (life_bitpacked) higher = faster; ratio is \`perl / jperl\`." + echo + echo "| Benchmark | unit | jperl | perl | ratio | parity? |" + echo "|---|---|---:|---:|---:|:---:|" + else + echo + echo "| Benchmark | unit | jperl |" + echo "|---|---|---:|" + fi + n=${#BENCH_NAMES[@]} + for i in $(seq 0 $((n - 1))); do + name="${BENCH_NAMES[$i]}" + unit="${BENCH_UNITS[$i]}" + jperl_avg="$(avg_csv "${BENCH_JPERL[$i]}")" + + if [ -n "$COMPARE" ]; then + perl_avg="$(avg_csv "${BENCH_PERL[$i]}")" + higher_is_better=0 + [ "$unit" = "Mcells/s" ] && higher_is_better=1 + + r="$(ratio "$jperl_avg" "$perl_avg" "$higher_is_better")" + + # Parity marker: + # ✅ ratio ≤ 1.00× (at or faster than perl) + # ≈ ratio ≤ 1.20× (within 20%) + # ❌ ratio > 1.20× (slower) + marker="❌" + if awk -v r="$r" 'BEGIN{ exit !(r <= 1.00) }'; then + marker="✅" + elif awk -v r="$r" 'BEGIN{ exit !(r <= 1.20) }'; then + marker="≈" + fi + + printf "| \`%s\` | %s | %s | %s | **%s×** | %s |\n" \ + "$name" "$unit" "$jperl_avg" "$perl_avg" "$r" "$marker" + else + printf "| \`%s\` | %s | %s |\n" "$name" "$unit" "$jperl_avg" + fi + done +} > "$OUT_MD" + +echo +echo "Done. Markdown summary:" echo -echo "Done. Summary:" -cat "$OUT_FILE" +cat "$OUT_MD"