diff --git a/.JuliaFormatter.toml b/.JuliaFormatter.toml index f889d3f..b499386 100644 --- a/.JuliaFormatter.toml +++ b/.JuliaFormatter.toml @@ -1,14 +1,14 @@ style = "default" indent = 4 -margin = 92 +margin = 96 always_for_in = true whitespace_typedefs = true whitespace_ops_in_indices = true remove_extra_newlines = true short_to_long_function_def = false -long_to_short_function_def = false +long_to_short_function_def = true always_use_return = false -whitespace_in_kwargs = true +whitespace_in_kwargs = false annotate_untyped_fields_with_any = false format_docstrings = false align_assignment = false @@ -16,4 +16,4 @@ align_struct_field = false align_conditional = false align_pair_arrow = false trailing_comma = true -join_lines_based_on_source = true +join_lines_based_on_source = false diff --git a/.github/README.md b/.github/README.md new file mode 100644 index 0000000..5307183 --- /dev/null +++ b/.github/README.md @@ -0,0 +1,12 @@ +# GitHub Configuration + +This directory holds GitHub-specific repository automation and metadata. + +## Contents + +- `workflows/`: GitHub Actions CI, docs, release, and maintenance workflows. + +## Notes + +- Workflow behavior is documented in [`../docs/src/ci-testing.md`](../docs/src/ci-testing.md). +- Generated macOS metadata files such as `.DS_Store` are not meaningful project content. diff --git a/.github/workflows/CompatHelper.yml b/.github/workflows/CompatHelper.yml.disabled similarity index 100% rename from .github/workflows/CompatHelper.yml rename to .github/workflows/CompatHelper.yml.disabled diff --git a/.github/workflows/README.md b/.github/workflows/README.md new file mode 100644 index 0000000..c58ba2d --- /dev/null +++ b/.github/workflows/README.md @@ -0,0 +1,18 @@ +# GitHub Actions Workflows + +This folder contains the repository's GitHub Actions workflows. + +## Files + +- `ci.yml`: fast Linux CI for unit tests, notebooks, and coverage upload. +- `ci-full.yml`: broader matrix CI for pull requests to `develop` and `master`. +- `docs.yml`: Documenter build and deployment workflow. +- `TagBot.yml`: release-tag automation. + +### Disabled + +- `format.yml.disabled`: disabled formatter workflow kept for reference. +- `CompatHelper.yml.disabled`: dependency update automation. +- `nightly.yml.disabled`: scheduled regression sweep on non-Linux platforms. + +See [`../../docs/src/ci-testing.md`](../../docs/src/ci-testing.md) for the user-facing workflow overview. diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 9c4f6b6..40ecf2d 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -38,17 +38,22 @@ jobs: - name: Install dependencies run: julia --project=. -e 'using Pkg; Pkg.instantiate()' - - name: Run unit tests - run: julia --project=. -e 'using Pkg; Pkg.test()' + - name: Run unit tests with coverage + run: julia --project=. -e 'using Pkg; Pkg.test(coverage=true)' - name: Process coverage - if: matrix.julia-version == '1.11' uses: julia-actions/julia-processcoverage@v1 with: directories: src + - name: Upload LCOV artifact + uses: actions/upload-artifact@v4 + with: + name: coverage-julia-${{ matrix.julia-version }}-linux + path: lcov.info + if-no-files-found: warn + - name: Upload coverage to Codecov - if: matrix.julia-version == '1.11' uses: codecov/codecov-action@v4 with: files: lcov.info diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml.disabled similarity index 100% rename from .github/workflows/nightly.yml rename to .github/workflows/nightly.yml.disabled diff --git a/.gitignore b/.gitignore index 9a74e17..16c4bc5 100644 --- a/.gitignore +++ b/.gitignore @@ -41,3 +41,4 @@ benchmark/results/ # Developer-specific files sc_* +lcov.info diff --git a/Makefile b/Makefile index a8c9f30..c76e49e 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,7 @@ -.PHONY: test doc format format-check lint clean bench bench-compare bench-compare-py bench-compare-py-label bench-all-label bench-compare-labels check-qmcpy-python +.PHONY: test coverage doc format format-check lint clean bench bench-compare bench-compare-py bench-compare-py-label bench-all-label bench-compare-labels check-qmcpy-python FORMATTER_PROJECT=devtools/formatter +DOC_DEPOT ?= $(if $(TMPDIR),$(TMPDIR),/tmp/)qmcju-doc-depot QMCPY_PYTHON_AUTO := $(shell \ for py in python python3 "$(HOME)/miniconda3/bin/python" "$(HOME)/miniconda3/envs/qmcpy/bin/python" "$(HOME)/miniconda3/envs/qmcpy-leadership/bin/python"; do \ if { [ -x "$$py" ] || command -v "$$py" >/dev/null 2>&1; } && "$$py" -c "import qmcpy" >/dev/null 2>&1; then \ @@ -45,27 +46,35 @@ update: test: julia --project=. -e 'using Pkg; Pkg.instantiate(); Pkg.test()' +# Run tests with Julia coverage instrumentation +coverage: + find src test -name '*.cov' -delete + rm -f lcov.info + julia --project=. -e 'using Pkg; Pkg.instantiate(); Pkg.test(coverage=true)' + julia --project=. devtools/process_coverage.jl + # Run specific test file test-%: julia --project=. -e 'include("test/$*.jl")' # Build documentation doc: - julia --project=docs -e 'using Pkg; Pkg.instantiate(); Pkg.resolve()' - julia --project=docs docs/make.jl + rm -rf docs/build + JULIA_DEPOT_PATH="$(DOC_DEPOT):$(HOME)/.julia" julia --project=docs -e 'using Pkg; Pkg.instantiate(); Pkg.resolve()' + JULIA_DEPOT_PATH="$(DOC_DEPOT):$(HOME)/.julia" julia --project=docs docs/make.jl -# Format code with JuliaFormatter +# Format code with JuliaFormatter (uses the repo .JuliaFormatter.toml for all paths) format: - julia --project=$(FORMATTER_PROJECT) -e 'using Pkg; Pkg.instantiate(); using JuliaFormatter; format("src/"); format("test/")' + julia --project=$(FORMATTER_PROJECT) -e 'using Pkg; Pkg.instantiate(); using JuliaFormatter; format(["src/", "test/", "benchmark/"])' # Check formatting (CI-friendly, fails if changes needed) format-check: - julia --project=$(FORMATTER_PROJECT) -e 'using Pkg; Pkg.instantiate(); using JuliaFormatter; @assert format("src/", overwrite=false); @assert format("test/", overwrite=false)' + julia --project=$(FORMATTER_PROJECT) -e 'using Pkg; Pkg.instantiate(); using JuliaFormatter; @assert format(["src/", "test/", "benchmark/"], overwrite=false)' # Clean build artifacts clean: rm -rf docs/build - rm -rf *.jl.cov *.jl.*.cov *.jl.mem + rm -rf *.jl.cov *.jl.*.cov *.jl.mem lcov.info # Instantiate project dependencies (includes Plots and all other deps). Download what Manifest.toml says. setup: diff --git a/README.md b/README.md index fffac59..37a9c3c 100644 --- a/README.md +++ b/README.md @@ -96,10 +96,23 @@ julia -e 'using IJulia; notebook(dir="demos")' Run the benchmark suite: ```bash -julia --project=. benchmark/benchmarks.jl +make bench +# or +julia benchmark/runbenchmarks.jl ``` -This benchmarks sampling, transforms, integrand evaluation, and end-to-end integration across all DD types. Results are saved to `benchmark/results/latest.json`. +This benchmarks sampling, transforms, integrand evaluation, and end-to-end integration across all DD types. Results are saved under `benchmark/results/`. See [`benchmark/README.md`](benchmark/README.md) for the full workflow, comparison scripts, and the Julia-vs-QMCPy accuracy sidecars. + +## Repository Layout + +Most top-level and source subdirectories now include a local `README.md` describing +their purpose and the files they contain. Useful starting points: + +- [`benchmark/README.md`](benchmark/README.md) — standalone benchmarking and comparison tooling +- [`demos/README.md`](demos/README.md) — notebook demos and how to run them +- [`docs/README.md`](docs/README.md) — Documenter build/deploy layout +- [`src/README.md`](src/README.md) — package source tree and component folders +- [`test/README.md`](test/README.md) — unit tests, notebook tests, and coverage commands ## Documentation @@ -119,6 +132,11 @@ using QMC # Unit tests julia --project=. -e 'using Pkg; Pkg.test()' +# Unit tests with coverage instrumentation +julia --project=. -e 'using Pkg; Pkg.test(coverage=true)' +# or +make coverage + # Demo notebooks julia --project=. test/run_notebooks.jl @@ -126,7 +144,9 @@ julia --project=. test/run_notebooks.jl julia --project=docs docs/make.jl ``` -See [CI/CD Testing](https://qmcsoftware.github.io/QMC.jl/ci-testing/) for workflow details. +CI uploads LCOV coverage reports to Codecov and stores the generated `lcov.info` +as a workflow artifact. See [`test/README.md`](test/README.md) and +[CI/CD Testing](https://qmcsoftware.github.io/QMC.jl/ci-testing/) for details. ## Citation diff --git a/benchmark/benchmarks.jl b/benchmark/benchmarks.jl index a9c8810..285b52e 100644 --- a/benchmark/benchmarks.jl +++ b/benchmark/benchmarks.jl @@ -53,16 +53,14 @@ const SUITE = BenchmarkGroup() # and Kronecker are pure Julia. SUITE["gen_samples"] = BenchmarkGroup() for dim in DIMS, n in SAMPLES - SUITE["gen_samples"]["IIDStdUniform d=$dim n=$n"] = - bench_gen_samples(IIDStdUniform, dim, n) + SUITE["gen_samples"]["IIDStdUniform d=$dim n=$n"] = bench_gen_samples(IIDStdUniform, dim, n) SUITE["gen_samples"]["Lattice d=$dim n=$n"] = bench_gen_samples(Lattice, dim, n; randomize=true) SUITE["gen_samples"]["DigitalNetB2 d=$dim n=$n"] = bench_gen_samples(DigitalNetB2, dim, n; randomize="LMS_DS") SUITE["gen_samples"]["Halton d=$dim n=$n"] = bench_gen_samples(Halton, dim, n; randomize=true) - SUITE["gen_samples"]["Kronecker d=$dim n=$n"] = - bench_gen_samples(Kronecker, dim, n) + SUITE["gen_samples"]["Kronecker d=$dim n=$n"] = bench_gen_samples(Kronecker, dim, n) end # 2. Transform (pure Julia: inverse-CDF + covariance factor) @@ -200,25 +198,35 @@ end function _int_cubmcclt_asian() dd = IIDStdUniform(50; seed=42) - tm = GeometricBrownianMotion(dd; volatility=0.2, start_price=100.0, - interest_rate=0.05, t_final=1.0) + tm = GeometricBrownianMotion( + dd; + volatility=0.2, + start_price=100.0, + interest_rate=0.05, + t_final=1.0, + ) f = FinancialOption(tm; option_type=:asian, strike_price=100.0) CubMCCLT(f; abs_tol=0.5) end function _int_cubqmcnetg_european() dd = DigitalNetB2(50; seed=42, randomize="LMS_DS") - tm = GeometricBrownianMotion(dd; volatility=0.2, start_price=100.0, - interest_rate=0.05, t_final=1.0) + tm = GeometricBrownianMotion( + dd; + volatility=0.2, + start_price=100.0, + interest_rate=0.05, + t_final=1.0, + ) f = FinancialOption(tm; option_type=:european, strike_price=100.0) CubQMCNetG(f; abs_tol=0.5) end -const INTEGRATE_CASES = Pair{String,Function}[ - "CubMCCLT Keister" => _int_cubmcclt_keister, - "CubQMCLatticeG Keister" => _int_cubqmclatticeg_keister, - "CubQMCNetG Keister" => _int_cubqmcnetg_keister, - "CubMCCLT AsianOption" => _int_cubmcclt_asian, +const INTEGRATE_CASES = Pair{String, Function}[ + "CubMCCLT Keister" => _int_cubmcclt_keister, + "CubQMCLatticeG Keister" => _int_cubqmclatticeg_keister, + "CubQMCNetG Keister" => _int_cubqmcnetg_keister, + "CubMCCLT AsianOption" => _int_cubmcclt_asian, "CubQMCNetG EuropeanOption" => _int_cubqmcnetg_european, ] diff --git a/benchmark/compare.jl b/benchmark/compare.jl index 07ffa3e..1111ed4 100644 --- a/benchmark/compare.jl +++ b/benchmark/compare.jl @@ -24,7 +24,7 @@ using Pkg Pkg.activate(@__DIR__) let deps = keys(Pkg.project().dependencies) - "QMC" in deps || Pkg.develop(; path = dirname(@__DIR__)) + "QMC" in deps || Pkg.develop(; path=dirname(@__DIR__)) "BenchmarkTools" in deps || Pkg.add("BenchmarkTools") "PkgBenchmark" in deps || Pkg.add("PkgBenchmark") end @@ -53,14 +53,17 @@ function collect_comparison_rows(target_result, baseline_result) haskey(bg[group], name) || continue local_trial = median(tg[group][name]) ref_trial = median(bg[group][name]) - push!(rows, ( - group = group, - name = name, - local_ms = local_trial.time / 1e6, - ref_ms = ref_trial.time / 1e6, - local_kib = local_trial.memory / 1024, - ref_kib = ref_trial.memory / 1024, - )) + push!( + rows, + ( + group=group, + name=name, + local_ms=(local_trial.time / 1e6), + ref_ms=(ref_trial.time / 1e6), + local_kib=(local_trial.memory / 1024), + ref_kib=(ref_trial.memory / 1024), + ), + ) end end return rows @@ -73,18 +76,18 @@ function summary_metrics(rows) total_local_kib = sum(row.local_kib for row in rows) total_ref_kib = sum(row.ref_kib for row in rows) return ( - matched = length(rows), - local_ms = total_local_ms, - ref_ms = total_ref_ms, - time_ratio = total_local_ms > 0 ? total_ref_ms / total_local_ms : NaN, - local_kib = total_local_kib, - ref_kib = total_ref_kib, - memory_ratio = total_local_kib > 0 ? total_ref_kib / total_local_kib : NaN, + matched=length(rows), + local_ms=total_local_ms, + ref_ms=total_ref_ms, + time_ratio=total_local_ms > 0 ? total_ref_ms / total_local_ms : NaN, + local_kib=total_local_kib, + ref_kib=total_ref_kib, + memory_ratio=total_local_kib > 0 ? total_ref_kib / total_local_kib : NaN, ) end "Write a sanitized single-run benchmark summary without host or path metadata." -function write_single_run_md(outfile, result; label = "local") +function write_single_run_md(outfile, result; label="local") groupdata = result.benchmarkgroup open(outfile, "w") do io println(io, "# Benchmark: `$(label)`\n") @@ -93,8 +96,15 @@ function write_single_run_md(outfile, result; label = "local") for group in sort(collect(keys(groupdata))) for name in sort(collect(keys(groupdata[group]))) trial = median(groupdata[group][name]) - @printf(io, "| `[\"%s\", \"%s\"]` | %.3f | %d | %.1f |\n", - group, name, trial.time / 1e6, trial.allocs, trial.memory / 1024) + @printf( + io, + "| `[\"%s\", \"%s\"]` | %.3f | %d | %.1f |\n", + group, + name, + trial.time / 1e6, + trial.allocs, + trial.memory / 1024 + ) end end end @@ -109,8 +119,13 @@ Ratio = reference (`baseline_label`) time ÷ local (`target_label`) time. - ratio < 1 → local is **slower** than reference ❌ - ratio > 1 → local is **faster** than reference ✅ """ -function write_comparison_md(outfile, target_result, baseline_result, - target_label, baseline_label) +function write_comparison_md( + outfile, + target_result, + baseline_result, + target_label, + baseline_label, +) rows = collect_comparison_rows(target_result, baseline_result) summary = summary_metrics(rows) open(outfile, "w") do io @@ -121,30 +136,66 @@ function write_comparison_md(outfile, target_result, baseline_result, println(io, "| date | $(target_result.date) | $(baseline_result.date) |") println(io, "") println(io, "**`ratio = reference time ÷ local time`** ") - println(io, "ratio `< 1` → local is **slower** ❌ | ratio `> 1` → local is **faster** ✅") + println( + io, + "ratio `< 1` → local is **slower** ❌ | ratio `> 1` → local is **faster** ✅", + ) println(io, "") println(io, "## Aggregate Summary\n") println(io, "| metric | ratio | local total | reference total |") println(io, "|:-------|------:|------------:|----------------:|") println(io, "| matched benchmarks | $(summary.matched) | — | — |") - @printf(io, "| weighted time ratio | %.3f | %.3f ms | %.3f ms |\n", - summary.time_ratio, summary.local_ms, summary.ref_ms) - @printf(io, "| weighted memory ratio | %.3f | %.1f KiB | %.1f KiB |\n\n", - summary.memory_ratio, summary.local_kib, summary.ref_kib) + @printf( + io, + "| weighted time ratio | %.3f | %.3f ms | %.3f ms |\n", + summary.time_ratio, + summary.local_ms, + summary.ref_ms + ) + @printf( + io, + "| weighted memory ratio | %.3f | %.1f KiB | %.1f KiB |\n\n", + summary.memory_ratio, + summary.local_kib, + summary.ref_kib + ) println(io, "| benchmark | ratio | verdict | local (ms) | reference (ms) |") println(io, "|:----------|------:|:-------:|----------:|---------------:|") for row in rows ratio = row.ref_ms / row.local_ms verdict = ratio < 0.95 ? "❌" : ratio > 1.05 ? "✅" : "–" - @printf(io, "| `[\"%s\", \"%s\"]` | %.3f | %s | %.3f | %.3f |\n", - row.group, row.name, ratio, verdict, row.local_ms, row.ref_ms) + @printf( + io, + "| `[\"%s\", \"%s\"]` | %.3f | %s | %.3f | %.3f |\n", + row.group, + row.name, + ratio, + verdict, + row.local_ms, + row.ref_ms + ) end end return summary end "Benchmark the current working tree in place (uncommitted changes included)." -bench_worktree() = benchmarkpkg(PKG; verbose = false) +bench_worktree() = benchmarkpkg(PKG; verbose=false) + +"Run `f()` with `project_dir` active and `pkgdir` developed as `QMC`." +function with_benchmark_env(project_dir::AbstractString, pkgdir::AbstractString, f::Function) + original_project = Base.active_project() + try + Pkg.activate(project_dir; io=devnull) + Pkg.develop(; path=pkgdir, io=devnull) + Pkg.instantiate(; io=devnull) + return f() + finally + if original_project !== nothing + Pkg.activate(dirname(original_project); io=devnull) + end + end +end "Benchmark a committed `rev` in a throwaway git worktree, leaving PKG untouched. The current benchmark/benchmarks.jl is copied into the worktree first, so the @@ -152,9 +203,11 @@ revision's own (possibly old or broken) benchmarks.jl is never used — both sid run today's suite against their respective package source." function bench_revision(rev::AbstractString) if !success(`git -C $PKG rev-parse --verify --quiet $rev`) - error("git revision \"$rev\" not found in this repository. Pass a valid " * - "branch/tag/commit, e.g. `make bench-compare REV=HEAD` (compare " * - "uncommitted changes against the last commit) or `REV=master`.") + error( + "git revision \"$rev\" not found in this repository. Pass a valid " * + "branch/tag/commit, e.g. `make bench-compare REV=HEAD` (compare " * + "uncommitted changes against the last commit) or `REV=master`.", + ) end parent = mktempdir() wt = joinpath(parent, "wt") # must not pre-exist; `git worktree add` creates it @@ -163,15 +216,42 @@ function bench_revision(rev::AbstractString) # Run the CURRENT benchmark suite against the revision's package source, so # an old/broken benchmarks.jl committed at `rev` doesn't break the run and # both sides measure the same suite. - cp(joinpath(PKG, "benchmark", "benchmarks.jl"), - joinpath(wt, "benchmark", "benchmarks.jl"); force = true) - return benchmarkpkg(wt; verbose = false) + cp( + joinpath(PKG, "benchmark", "benchmarks.jl"), + joinpath(wt, "benchmark", "benchmarks.jl"); + force=true, + ) + # Commit the synced suite inside the throwaway worktree so its working tree + # is clean. PkgBenchmark checks out the commit it benchmarks and then tries + # to restore the original sha; on a *dirty* detached worktree that restore + # fails and emits "Failed to return back to original sha …". A clean tree + # avoids the stash/checkout dance. This only moves the worktree's detached + # HEAD (the main repo and `rev` are untouched), and the worktree is removed + # below regardless. + try + run(`git -C $wt add -A`) + run( + pipeline( + `git -C $wt -c user.email=bench@localhost -c user.name=bench commit -q --allow-empty -m bench-sync-suite`; + stdout=devnull, + stderr=devnull, + ), + ) + catch + # If committing fails (e.g. git identity unavailable), benchmarking + # still works; PkgBenchmark may just re-emit its restore warning. + end + return with_benchmark_env( + joinpath(wt, "benchmark"), + wt, + () -> benchmarkpkg(wt; verbose=false), + ) finally try run(`git -C $PKG worktree remove --force $wt`) catch end - rm(parent; force = true, recursive = true) + rm(parent; force=true, recursive=true) end end @@ -182,40 +262,80 @@ if length(ARGS) == 0 println("\nWrote benchmark/results/bench_local.md (single-run summary, no comparison)") elseif length(ARGS) == 1 baseline_rev = ARGS[1] - target = bench_worktree() # current tree (dirty OK) + target = bench_worktree() # current tree (dirty OK) baseline = bench_revision(baseline_rev) - outfile = comparison_outfile("") + outfile = comparison_outfile("") summary = write_comparison_md(outfile, target, baseline, "local", baseline_rev) println("\nWrote benchmark/results/$(basename(outfile)) (local vs $(baseline_rev))") println(" ratio = $(baseline_rev) ÷ local → < 1: local slower | > 1: local faster") - @printf(" weighted time ratio = %.3f (%s total %.3f ms vs %s total %.3f ms)\n", - summary.time_ratio, baseline_rev, summary.ref_ms, "local", summary.local_ms) - @printf(" weighted memory ratio = %.3f (%s total %.1f KiB vs %s total %.1f KiB)\n", - summary.memory_ratio, baseline_rev, summary.ref_kib, "local", summary.local_kib) + @printf( + " weighted time ratio = %.3f (%s total %.3f ms vs %s total %.3f ms)\n", + summary.time_ratio, + baseline_rev, + summary.ref_ms, + "local", + summary.local_ms + ) + @printf( + " weighted memory ratio = %.3f (%s total %.1f KiB vs %s total %.1f KiB)\n", + summary.memory_ratio, + baseline_rev, + summary.ref_kib, + "local", + summary.local_kib + ) elseif length(ARGS) == 2 baseline_rev, out_label = ARGS[1], ARGS[2] - target = bench_worktree() # current tree (dirty OK) + target = bench_worktree() # current tree (dirty OK) baseline = bench_revision(baseline_rev) - outfile = comparison_outfile(out_label) + outfile = comparison_outfile(out_label) summary = write_comparison_md(outfile, target, baseline, "local", baseline_rev) println("\nWrote benchmark/results/$(basename(outfile)) (local vs $(baseline_rev))") println(" ratio = $(baseline_rev) ÷ local → < 1: local slower | > 1: local faster") - @printf(" weighted time ratio = %.3f (%s total %.3f ms vs %s total %.3f ms)\n", - summary.time_ratio, baseline_rev, summary.ref_ms, "local", summary.local_ms) - @printf(" weighted memory ratio = %.3f (%s total %.1f KiB vs %s total %.1f KiB)\n", - summary.memory_ratio, baseline_rev, summary.ref_kib, "local", summary.local_kib) + @printf( + " weighted time ratio = %.3f (%s total %.3f ms vs %s total %.3f ms)\n", + summary.time_ratio, + baseline_rev, + summary.ref_ms, + "local", + summary.local_ms + ) + @printf( + " weighted memory ratio = %.3f (%s total %.1f KiB vs %s total %.1f KiB)\n", + summary.memory_ratio, + baseline_rev, + summary.ref_kib, + "local", + summary.local_kib + ) elseif length(ARGS) == 3 target_rev, baseline_rev = ARGS[1], ARGS[2] - target = bench_revision(target_rev) + target = bench_revision(target_rev) baseline = bench_revision(baseline_rev) - outfile = comparison_outfile(ARGS[3]) + outfile = comparison_outfile(ARGS[3]) summary = write_comparison_md(outfile, target, baseline, target_rev, baseline_rev) println("\nWrote benchmark/results/$(basename(outfile)) ($(target_rev) vs $(baseline_rev))") - println(" ratio = $(baseline_rev) ÷ $(target_rev) → < 1: $(target_rev) slower | > 1: $(target_rev) faster") - @printf(" weighted time ratio = %.3f (%s total %.3f ms vs %s total %.3f ms)\n", - summary.time_ratio, baseline_rev, summary.ref_ms, target_rev, summary.local_ms) - @printf(" weighted memory ratio = %.3f (%s total %.1f KiB vs %s total %.1f KiB)\n", - summary.memory_ratio, baseline_rev, summary.ref_kib, target_rev, summary.local_kib) + println( + " ratio = $(baseline_rev) ÷ $(target_rev) → < 1: $(target_rev) slower | > 1: $(target_rev) faster", + ) + @printf( + " weighted time ratio = %.3f (%s total %.3f ms vs %s total %.3f ms)\n", + summary.time_ratio, + baseline_rev, + summary.ref_ms, + target_rev, + summary.local_ms + ) + @printf( + " weighted memory ratio = %.3f (%s total %.1f KiB vs %s total %.1f KiB)\n", + summary.memory_ratio, + baseline_rev, + summary.ref_kib, + target_rev, + summary.local_kib + ) else - error("Usage: julia benchmark/compare.jl [baseline_rev [output_label]] or julia benchmark/compare.jl target_rev baseline_rev output_label") + error( + "Usage: julia benchmark/compare.jl [baseline_rev [output_label]] or julia benchmark/compare.jl target_rev baseline_rev output_label", + ) end diff --git a/benchmark/compare_labels.jl b/benchmark/compare_labels.jl index 39a2559..755ee19 100644 --- a/benchmark/compare_labels.jl +++ b/benchmark/compare_labels.jl @@ -32,7 +32,11 @@ using Printf const RESDIR = joinpath(@__DIR__, "results") const TOL = 0.05 -compare_labels_outfile(label_a::AbstractString, label_b::AbstractString, out_label::AbstractString) = +compare_labels_outfile( + label_a::AbstractString, + label_b::AbstractString, + out_label::AbstractString, +) = isempty(out_label) ? joinpath(RESDIR, "compare_labels_$(label_a)_vs_$(label_b).md") : joinpath(RESDIR, "compare_labels_$(out_label).md") @@ -44,14 +48,17 @@ function collect_label_rows(results_a, results_b) haskey(results_b[group], name) || continue trial_a = median(results_a[group][name]) trial_b = median(results_b[group][name]) - push!(rows, ( - group = group, - name = name, - a_ms = trial_a.time / 1e6, - b_ms = trial_b.time / 1e6, - a_kib = trial_a.memory / 1024, - b_kib = trial_b.memory / 1024, - )) + push!( + rows, + ( + group=group, + name=name, + a_ms=(trial_a.time / 1e6), + b_ms=(trial_b.time / 1e6), + a_kib=(trial_a.memory / 1024), + b_kib=(trial_b.memory / 1024), + ), + ) end end return rows @@ -63,15 +70,15 @@ function summary_metrics(rows) total_a_kib = sum(row.a_kib for row in rows) total_b_kib = sum(row.b_kib for row in rows) return ( - matched = length(rows), - a_ms = total_a_ms, - b_ms = total_b_ms, - time_ratio = total_a_ms > 0 ? total_b_ms / total_a_ms : NaN, - a_kib = total_a_kib, - b_kib = total_b_kib, - memory_ratio = total_a_kib > 0 ? total_b_kib / total_a_kib : NaN, - combined_ratio = (total_a_ms > 0 && total_a_kib > 0) ? - sqrt((total_b_ms / total_a_ms) * (total_b_kib / total_a_kib)) : NaN, + matched=length(rows), + a_ms=total_a_ms, + b_ms=total_b_ms, + time_ratio=total_a_ms > 0 ? total_b_ms / total_a_ms : NaN, + a_kib=total_a_kib, + b_kib=total_b_kib, + memory_ratio=total_a_kib > 0 ? total_b_kib / total_a_kib : NaN, + combined_ratio=(total_a_ms > 0 && total_a_kib > 0) ? + sqrt((total_b_ms / total_a_ms) * (total_b_kib / total_a_kib)) : NaN, ) end @@ -84,38 +91,38 @@ function overall_decision(summary, label_a::AbstractString, label_b::AbstractStr if time_winner == label_a && memory_winner == label_a return ( - winner = label_a, - reason = "$(label_a) is faster and uses less memory overall.", - time_winner = time_winner, - memory_winner = memory_winner, - combined_winner = label_a, + winner=label_a, + reason="$(label_a) is faster and uses less memory overall.", + time_winner=time_winner, + memory_winner=memory_winner, + combined_winner=label_a, ) elseif time_winner == label_b && memory_winner == label_b return ( - winner = label_b, - reason = "$(label_b) is faster and uses less memory overall.", - time_winner = time_winner, - memory_winner = memory_winner, - combined_winner = label_b, + winner=label_b, + reason="$(label_b) is faster and uses less memory overall.", + time_winner=time_winner, + memory_winner=memory_winner, + combined_winner=label_b, ) end combined_winner = ratio_winner(summary.combined_ratio, label_a, label_b) if combined_winner == "tie" return ( - winner = "tie", - reason = "No clear overall winner: time and memory trade off within the 5% tolerance.", - time_winner = time_winner, - memory_winner = memory_winner, - combined_winner = combined_winner, + winner="tie", + reason="No clear overall winner: time and memory trade off within the 5% tolerance.", + time_winner=time_winner, + memory_winner=memory_winner, + combined_winner=combined_winner, ) else return ( - winner = combined_winner, - reason = "$(combined_winner) wins the combined time/memory score, but time and memory trade off.", - time_winner = time_winner, - memory_winner = memory_winner, - combined_winner = combined_winner, + winner=combined_winner, + reason="$(combined_winner) wins the combined time/memory score, but time and memory trade off.", + time_winner=time_winner, + memory_winner=memory_winner, + combined_winner=combined_winner, ) end end @@ -127,7 +134,10 @@ function write_comparison_md(outfile, rows, summary, decision, label_a, label_b) open(outfile, "w") do io println(io, "# Benchmark Labels: `$(label_a)` vs `$(label_b)`\n") println(io, "**`ratio = $(label_b) ÷ $(label_a)`** ") - println(io, "ratio `> 1` → `$(label_a)` is better | ratio `< 1` → `$(label_b)` is better") + println( + io, + "ratio `> 1` → `$(label_a)` is better | ratio `< 1` → `$(label_b)` is better", + ) println(io, "") println(io, "## Decision\n") if decision.winner == "tie" @@ -141,27 +151,59 @@ function write_comparison_md(outfile, rows, summary, decision, label_a, label_b) println(io, "| metric | ratio | better | $(label_a) total | $(label_b) total |") println(io, "|:-------|------:|:------:|-----------------:|-----------------:|") println(io, "| matched benchmarks | $(summary.matched) | — | — | — |") - @printf(io, "| weighted time ratio | %.3f | %s | %.3f ms | %.3f ms |\n", - summary.time_ratio, decision.time_winner, summary.a_ms, summary.b_ms) - @printf(io, "| weighted memory ratio | %.3f | %s | %.1f KiB | %.1f KiB |\n", - summary.memory_ratio, decision.memory_winner, summary.a_kib, summary.b_kib) - @printf(io, "| combined score | %.3f | %s | — | — |\n\n", - summary.combined_ratio, decision.combined_winner) - println(io, "| benchmark | time ratio | time better | memory ratio | memory better | $(label_a) (ms) | $(label_b) (ms) | $(label_a) (KiB) | $(label_b) (KiB) |") - println(io, "|:----------|-----------:|:-----------:|-------------:|:-------------:|----------------:|----------------:|-----------------:|-----------------:|") + @printf( + io, + "| weighted time ratio | %.3f | %s | %.3f ms | %.3f ms |\n", + summary.time_ratio, + decision.time_winner, + summary.a_ms, + summary.b_ms + ) + @printf( + io, + "| weighted memory ratio | %.3f | %s | %.1f KiB | %.1f KiB |\n", + summary.memory_ratio, + decision.memory_winner, + summary.a_kib, + summary.b_kib + ) + @printf( + io, + "| combined score | %.3f | %s | — | — |\n\n", + summary.combined_ratio, + decision.combined_winner + ) + println( + io, + "| benchmark | time ratio | time better | memory ratio | memory better | $(label_a) (ms) | $(label_b) (ms) | $(label_a) (KiB) | $(label_b) (KiB) |", + ) + println( + io, + "|:----------|-----------:|:-----------:|-------------:|:-------------:|----------------:|----------------:|-----------------:|-----------------:|", + ) for row in rows time_ratio = row.b_ms / row.a_ms memory_ratio = row.b_kib / row.a_kib - @printf(io, "| `[\"%s\", \"%s\"]` | %.3f | %s | %.3f | %s | %.3f | %.3f | %.1f | %.1f |\n", - row.group, row.name, - time_ratio, row_verdict(time_ratio, label_a, label_b), - memory_ratio, row_verdict(memory_ratio, label_a, label_b), - row.a_ms, row.b_ms, row.a_kib, row.b_kib) + @printf( + io, + "| `[\"%s\", \"%s\"]` | %.3f | %s | %.3f | %s | %.3f | %.3f | %.1f | %.1f |\n", + row.group, + row.name, + time_ratio, + row_verdict(time_ratio, label_a, label_b), + memory_ratio, + row_verdict(memory_ratio, label_a, label_b), + row.a_ms, + row.b_ms, + row.a_kib, + row.b_kib + ) end end end -length(ARGS) >= 2 || error("Usage: julia benchmark/compare_labels.jl label_a label_b [output_label]") +length(ARGS) >= 2 || + error("Usage: julia benchmark/compare_labels.jl label_a label_b [output_label]") label_a = ARGS[1] label_b = ARGS[2] @@ -177,7 +219,8 @@ results_a = load(file_a)[1] results_b = load(file_b)[1] rows = collect_label_rows(results_a, results_b) -isempty(rows) && error("No overlapping benchmarks found between labels \"$(label_a)\" and \"$(label_b)\".") +isempty(rows) && + error("No overlapping benchmarks found between labels \"$(label_a)\" and \"$(label_b)\".") summary = summary_metrics(rows) decision = overall_decision(summary, label_a, label_b) diff --git a/benchmark/compare_py.jl b/benchmark/compare_py.jl index 2b4cd2b..2afefaa 100644 --- a/benchmark/compare_py.jl +++ b/benchmark/compare_py.jl @@ -25,7 +25,7 @@ using Pkg Pkg.activate(@__DIR__) let deps = keys(Pkg.project().dependencies) - "QMC" in deps || Pkg.develop(; path = dirname(@__DIR__)) + "QMC" in deps || Pkg.develop(; path=dirname(@__DIR__)) "BenchmarkTools" in deps || Pkg.add("BenchmarkTools") "JSON3" in deps || Pkg.add("JSON3") end @@ -46,7 +46,11 @@ is_c_kernel_row(name::AbstractString) = occursin(r"Lattice|DigitalNetB2|Halton", function lookup_py_entry(py_results, group, name) py_group = get(py_results, Symbol(group), nothing) py_group === nothing && return nothing - return get(py_group, name, get(py_group, replace(name, r" d=\d+" => s -> " [C]" * s), nothing)) + return get( + py_group, + name, + get(py_group, replace(name, r" d=\d+" => s -> " [C]" * s), nothing), + ) end function lookup_jl_memory_entry(jl_memory_results, group, name) @@ -66,35 +70,44 @@ function collect_comparison_rows(jl_results, jl_memory_results, py_results) jl_mem_entry = lookup_jl_memory_entry(jl_memory_results, group, name) py_entry = lookup_py_entry(py_results, group, name) if py_entry !== nothing && !haskey(py_entry, "error") - push!(rows, ( - group = group, - name = name, - jl_ms = jl_ms, - jl_kib = jl_kib, - jl_rss_delta_kib = jl_mem_entry !== nothing && haskey(jl_mem_entry, "rss_delta_kib") ? - Float64(jl_mem_entry["rss_delta_kib"]) : nothing, - py_ms = Float64(py_entry["median_ms"]), - py_peak_kib = haskey(py_entry, "tracemalloc_peak_kib") ? - Float64(py_entry["tracemalloc_peak_kib"]) : nothing, - py_rss_delta_kib = haskey(py_entry, "rss_delta_kib") ? - Float64(py_entry["rss_delta_kib"]) : nothing, - py_error = nothing, - c_kernel = is_c_kernel_row(name), - )) + push!( + rows, + ( + group=group, + name=name, + jl_ms=jl_ms, + jl_kib=jl_kib, + jl_rss_delta_kib=jl_mem_entry !== nothing && + haskey(jl_mem_entry, "rss_delta_kib") ? + Float64(jl_mem_entry["rss_delta_kib"]) : nothing, + py_ms=Float64(py_entry["median_ms"]), + py_peak_kib=haskey(py_entry, "tracemalloc_peak_kib") ? + Float64(py_entry["tracemalloc_peak_kib"]) : nothing, + py_rss_delta_kib=haskey(py_entry, "rss_delta_kib") ? + Float64(py_entry["rss_delta_kib"]) : nothing, + py_error=nothing, + c_kernel=is_c_kernel_row(name), + ), + ) else - push!(rows, ( - group = group, - name = name, - jl_ms = jl_ms, - jl_kib = jl_kib, - jl_rss_delta_kib = jl_mem_entry !== nothing && haskey(jl_mem_entry, "rss_delta_kib") ? - Float64(jl_mem_entry["rss_delta_kib"]) : nothing, - py_ms = nothing, - py_peak_kib = nothing, - py_rss_delta_kib = nothing, - py_error = py_entry === nothing ? "no Python data" : string(py_entry["error"]), - c_kernel = is_c_kernel_row(name), - )) + push!( + rows, + ( + group=group, + name=name, + jl_ms=jl_ms, + jl_kib=jl_kib, + jl_rss_delta_kib=jl_mem_entry !== nothing && + haskey(jl_mem_entry, "rss_delta_kib") ? + Float64(jl_mem_entry["rss_delta_kib"]) : nothing, + py_ms=nothing, + py_peak_kib=nothing, + py_rss_delta_kib=nothing, + py_error=py_entry === nothing ? "no Python data" : + string(py_entry["error"]), + c_kernel=is_c_kernel_row(name), + ), + ) end end end @@ -103,28 +116,31 @@ end function summary_metrics(rows) matched = filter(row -> row.py_ms !== nothing, rows) - total_jl_ms = sum((row.jl_ms for row in matched); init = 0.0) - total_py_ms = sum((row.py_ms for row in matched); init = 0.0) + total_jl_ms = sum((row.jl_ms for row in matched); init=0.0) + total_py_ms = sum((row.py_ms for row in matched); init=0.0) peak_rows = filter(row -> row.py_peak_kib !== nothing, matched) - rss_rows = filter(row -> row.py_rss_delta_kib !== nothing && row.jl_rss_delta_kib !== nothing, matched) - total_jl_peak_kib = sum((row.jl_kib for row in peak_rows); init = 0.0) - total_py_peak_kib = sum((row.py_peak_kib for row in peak_rows); init = 0.0) - total_jl_rss_kib = sum((row.jl_rss_delta_kib for row in rss_rows); init = 0.0) - total_py_rss_delta_kib = sum((row.py_rss_delta_kib for row in rss_rows); init = 0.0) + rss_rows = filter( + row -> row.py_rss_delta_kib !== nothing && row.jl_rss_delta_kib !== nothing, + matched, + ) + total_jl_peak_kib = sum((row.jl_kib for row in peak_rows); init=0.0) + total_py_peak_kib = sum((row.py_peak_kib for row in peak_rows); init=0.0) + total_jl_rss_kib = sum((row.jl_rss_delta_kib for row in rss_rows); init=0.0) + total_py_rss_delta_kib = sum((row.py_rss_delta_kib for row in rss_rows); init=0.0) return ( - matched = length(matched), - total = length(rows), - jl_ms = total_jl_ms, - py_ms = total_py_ms, - time_ratio = total_jl_ms > 0 ? total_py_ms / total_jl_ms : NaN, - peak_rows = length(peak_rows), - jl_peak_kib = total_jl_peak_kib, - py_peak_kib = total_py_peak_kib, - peak_ratio = total_jl_peak_kib > 0 ? total_py_peak_kib / total_jl_peak_kib : NaN, - rss_rows = length(rss_rows), - jl_rss_kib = total_jl_rss_kib, - py_rss_delta_kib = total_py_rss_delta_kib, - rss_ratio = total_jl_rss_kib > 0 ? total_py_rss_delta_kib / total_jl_rss_kib : NaN, + matched=length(matched), + total=length(rows), + jl_ms=total_jl_ms, + py_ms=total_py_ms, + time_ratio=total_jl_ms > 0 ? total_py_ms / total_jl_ms : NaN, + peak_rows=length(peak_rows), + jl_peak_kib=total_jl_peak_kib, + py_peak_kib=total_py_peak_kib, + peak_ratio=total_jl_peak_kib > 0 ? total_py_peak_kib / total_jl_peak_kib : NaN, + rss_rows=length(rss_rows), + jl_rss_kib=total_jl_rss_kib, + py_rss_delta_kib=total_py_rss_delta_kib, + rss_ratio=total_jl_rss_kib > 0 ? total_py_rss_delta_kib / total_jl_rss_kib : NaN, ) end @@ -141,8 +157,14 @@ function accuracy_check(jl_sol, py_sol, abs_tol, rel_tol) allowed = 2 * eff_tol mode = (rel_tol > 0 && rel_tol * abs(py_sol) >= abs_tol) ? "rel" : "abs" rel_diff = py_sol != 0 ? diff / abs(py_sol) : NaN - return (diff = diff, rel_diff = rel_diff, eff_tol = eff_tol, - allowed = allowed, mode = mode, flagged = diff > allowed) + return ( + diff=diff, + rel_diff=rel_diff, + eff_tol=eff_tol, + allowed=allowed, + mode=mode, + flagged=diff > allowed, + ) end function collect_accuracy_rows(jl_solutions, py_results) @@ -156,14 +178,23 @@ function collect_accuracy_rows(jl_solutions, py_results) abs_tol = Float64(jl["abs_tol"]) rel_tol = Float64(jl["rel_tol"]) py_entry = py_group === nothing ? nothing : get(py_group, namestr, nothing) - py_sol = (py_entry !== nothing && haskey(py_entry, "solution")) ? - Float64(py_entry["solution"]) : nothing - check = py_sol === nothing ? nothing : - accuracy_check(jl_sol, py_sol, abs_tol, rel_tol) - push!(rows, (name = namestr, jl_sol = jl_sol, py_sol = py_sol, - abs_tol = abs_tol, rel_tol = rel_tol, check = check)) + py_sol = + (py_entry !== nothing && haskey(py_entry, "solution")) ? + Float64(py_entry["solution"]) : nothing + check = py_sol === nothing ? nothing : accuracy_check(jl_sol, py_sol, abs_tol, rel_tol) + push!( + rows, + ( + name=namestr, + jl_sol=jl_sol, + py_sol=py_sol, + abs_tol=abs_tol, + rel_tol=rel_tol, + check=check, + ), + ) end - return sort(rows; by = r -> r.name) + return sort(rows; by=r -> r.name) end jl_label = length(ARGS) >= 1 ? ARGS[1] : "latest" @@ -174,14 +205,18 @@ jl_file = joinpath(resdir, "$(jl_label).json") jl_mem_file = joinpath(resdir, "$(jl_label)_memory.json") py_file = joinpath(resdir, "qmcpy_$(py_label).json") -isfile(jl_file) || error("Julia results not found: $jl_file\nRun: make bench" * - (jl_label == "latest" ? "" : " then julia benchmark/runbenchmarks.jl $jl_label")) -isfile(py_file) || error("QMCPy results not found: $py_file\n" * - "Run: python benchmark/benchmark_qmcpy.py $py_label") +isfile(jl_file) || error( + "Julia results not found: $jl_file\nRun: make bench" * + (jl_label == "latest" ? "" : " then julia benchmark/runbenchmarks.jl $jl_label"), +) +isfile(py_file) || error( + "QMCPy results not found: $py_file\n" * + "Run: python benchmark/benchmark_qmcpy.py $py_label", +) jl_results = BenchmarkTools.load(jl_file)[1] jl_mem_data = isfile(jl_mem_file) ? JSON3.read(read(jl_mem_file, String)) : nothing -py_data = JSON3.read(read(py_file, String)) +py_data = JSON3.read(read(py_file, String)) jl_memory_results = jl_mem_data === nothing ? nothing : jl_mem_data["results"] py_results = py_data["results"] @@ -214,7 +249,14 @@ for group in sort(unique(row.group for row in rows)) if row.py_ms !== nothing ratio = row.py_ms / row.jl_ms tag = row.c_kernel ? " [C]" : " " - @printf(" %s %-44s %11.3f %11.3f %6.2fx\n", tag, row.name, row.jl_ms, row.py_ms, ratio) + @printf( + " %s %-44s %11.3f %11.3f %6.2fx\n", + tag, + row.name, + row.jl_ms, + row.py_ms, + ratio + ) else # missing/unavailable Python data @printf(" ??? %-44s %11.3f %11s %7s\n", row.name, row.jl_ms, "n/a", "n/a") end @@ -223,18 +265,40 @@ end println() println("="^length(header)) -println("ratio > 1: local (Julia) faster than Python | ratio < 1: local (Julia) slower than Python") -@printf("weighted time ratio = %.3f (Python total %.3f ms vs Julia total %.3f ms across %d/%d matched rows)\n", - summary.time_ratio, summary.py_ms, summary.jl_ms, summary.matched, summary.total) +println( + "ratio > 1: local (Julia) faster than Python | ratio < 1: local (Julia) slower than Python", +) +@printf( + "weighted time ratio = %.3f (Python total %.3f ms vs Julia total %.3f ms across %d/%d matched rows)\n", + summary.time_ratio, + summary.py_ms, + summary.jl_ms, + summary.matched, + summary.total +) if summary.peak_rows > 0 - @printf("weighted tracemalloc ratio = %.3f (Python peak total %.1f KiB vs Julia alloc total %.1f KiB across %d/%d rows)\n", - summary.peak_ratio, summary.py_peak_kib, summary.jl_peak_kib, summary.peak_rows, summary.total) + @printf( + "weighted tracemalloc ratio = %.3f (Python peak total %.1f KiB vs Julia alloc total %.1f KiB across %d/%d rows)\n", + summary.peak_ratio, + summary.py_peak_kib, + summary.jl_peak_kib, + summary.peak_rows, + summary.total + ) else - println("weighted tracemalloc ratio = n/a (QMCPy results do not record Python memory metrics)") + println( + "weighted tracemalloc ratio = n/a (QMCPy results do not record Python memory metrics)", + ) end if summary.rss_rows > 0 - @printf("weighted RSS delta ratio = %.3f (Python RSS Δ total %.1f KiB vs Julia RSS Δ total %.1f KiB across %d/%d rows)\n", - summary.rss_ratio, summary.py_rss_delta_kib, summary.jl_rss_kib, summary.rss_rows, summary.total) + @printf( + "weighted RSS delta ratio = %.3f (Python RSS Δ total %.1f KiB vs Julia RSS Δ total %.1f KiB across %d/%d rows)\n", + summary.rss_ratio, + summary.py_rss_delta_kib, + summary.jl_rss_kib, + summary.rss_rows, + summary.total + ) else println("weighted RSS delta ratio = n/a (missing Julia or QMCPy RSS delta sidecar data)") end @@ -242,8 +306,10 @@ end # ── Accuracy (integrate): Julia vs Python solution values ─────────────────────── if isempty(accuracy_rows) println() - println("accuracy (integrate): no solution data " * - "(need Julia `$(jl_label)_solutions.json` + QMCPy solutions in $(basename(py_file)))") + println( + "accuracy (integrate): no solution data " * + "(need Julia `$(jl_label)_solutions.json` + QMCPy solutions in $(basename(py_file)))", + ) else println() println("="^length(header)) @@ -251,16 +317,27 @@ else println("-"^length(header)) for r in accuracy_rows if r.check === nothing - @printf(" ??? %-30s Julia=%- 12.6g Python=n/a\n", r.name, r.jl_sol) + @printf(" ??? %-40s Julia=%- 12.6g Python=n/a\n", r.name, r.jl_sol) else c = r.check mark = c.flagged ? "❌ DIFF" : "✅ ok " - @printf(" %s %-30s Julia=%- 12.6g Python=%- 12.6g |Δ|=%.3g allowed(2·%s)=%.3g\n", - mark, r.name, r.jl_sol, r.py_sol, c.diff, c.mode, c.allowed) + @printf( + " %s %-40s Julia=%- 12.6g Python=%- 12.6g |Δ|=%.3g allowed(2·%s)=%.3g\n", + mark, + r.name, + r.jl_sol, + r.py_sol, + c.diff, + c.mode, + c.allowed + ) end end - @printf("%d of %d integrate case(s) exceed 2×tolerance\n", - n_flagged, count(r -> r.check !== nothing, accuracy_rows)) + @printf( + "%d of %d integrate case(s) exceed 2×tolerance\n", + n_flagged, + count(r -> r.check !== nothing, accuracy_rows) + ) end # ── Save markdown file ────────────────────────────────────────────────────────── @@ -274,50 +351,120 @@ open(outfile, "w") do io println(io, "| qmcpy version | — | $(py_version) |") println(io, "") println(io, "**`ratio = Python time ÷ Julia time`** ") - println(io, "ratio `< 1` → local (Julia) is **slower** ❌ | ratio `> 1` → local (Julia) is **faster** ✅ ") + println( + io, + "ratio `< 1` → local (Julia) is **slower** ❌ | ratio `> 1` → local (Julia) is **faster** ✅ ", + ) println(io, "") - println(io, "> ⚠️ C-kernel rows `[C]` (Lattice/DigitalNetB2/Halton `gen_samples`) call the same") - println(io, "> `qmctoolscl` library on both sides and are **not** a Julia vs Python comparison.") - println(io, "> Julia `alloc KiB` is allocated bytes from BenchmarkTools. Julia `RSS Δ` and Python") - println(io, "> `RSS Δ` are coarse retained-memory signals from one warmed call. Python `tracemalloc`") - println(io, "> peak is Python-managed temporary memory. These are related but not interchangeable.") + println( + io, + "> ⚠️ C-kernel rows `[C]` (Lattice/DigitalNetB2/Halton `gen_samples`) call the same", + ) + println( + io, + "> `qmctoolscl` library on both sides and are **not** a Julia vs Python comparison.", + ) + println( + io, + "> Julia `alloc KiB` is allocated bytes from BenchmarkTools. Julia `RSS Δ` and Python", + ) + println( + io, + "> `RSS Δ` are coarse retained-memory signals from one warmed call. Python `tracemalloc`", + ) + println( + io, + "> peak is Python-managed temporary memory. These are related but not interchangeable.", + ) println(io, "") println(io, "## Aggregate Summary\n") println(io, "| metric | ratio | Julia total | Python total | rows |") println(io, "|:-------|------:|------------:|-------------:|-----:|") - println(io, "| matched benchmarks | $(summary.matched)/$(summary.total) | — | — | $(summary.matched) |") - @printf(io, "| weighted time ratio | %.3f | %.3f ms | %.3f ms | %d |\n", - summary.time_ratio, summary.jl_ms, summary.py_ms, summary.matched) + println( + io, + "| matched benchmarks | $(summary.matched)/$(summary.total) | — | — | $(summary.matched) |", + ) + @printf( + io, + "| weighted time ratio | %.3f | %.3f ms | %.3f ms | %d |\n", + summary.time_ratio, + summary.jl_ms, + summary.py_ms, + summary.matched + ) if summary.peak_rows > 0 - @printf(io, "| weighted tracemalloc ratio | %.3f | %.1f KiB | %.1f KiB | %d |\n", - summary.peak_ratio, summary.jl_peak_kib, summary.py_peak_kib, summary.peak_rows) + @printf( + io, + "| weighted tracemalloc ratio | %.3f | %.1f KiB | %.1f KiB | %d |\n", + summary.peak_ratio, + summary.jl_peak_kib, + summary.py_peak_kib, + summary.peak_rows + ) else println(io, "| weighted tracemalloc ratio | n/a | n/a | n/a | 0 |") end if summary.rss_rows > 0 - @printf(io, "| weighted RSS delta ratio | %.3f | %.1f KiB | %.1f KiB | %d |\n\n", - summary.rss_ratio, summary.jl_rss_kib, summary.py_rss_delta_kib, summary.rss_rows) + @printf( + io, + "| weighted RSS delta ratio | %.3f | %.1f KiB | %.1f KiB | %d |\n\n", + summary.rss_ratio, + summary.jl_rss_kib, + summary.py_rss_delta_kib, + summary.rss_rows + ) else println(io, "| weighted RSS delta ratio | n/a | n/a | n/a | 0 |\n") end println(io, "") - println(io, "| benchmark | time ratio | verdict | Julia (ms) | Python (ms) | Julia alloc (KiB) | Julia RSS Δ (KiB) | Python peak (KiB) | Python RSS Δ (KiB) |") - println(io, "|:----------|-----------:|:-------:|----------:|------------:|-------------------:|------------------:|------------------:|-------------------:|") + println( + io, + "| benchmark | time ratio | verdict | Julia (ms) | Python (ms) | Julia alloc (KiB) | Julia RSS Δ (KiB) | Python peak (KiB) | Python RSS Δ (KiB) |", + ) + println( + io, + "|:----------|-----------:|:-------:|----------:|------------:|-------------------:|------------------:|------------------:|-------------------:|", + ) for row in rows c_note = row.c_kernel ? " `[C]`" : "" if row.py_ms !== nothing - ratio = row.py_ms / row.jl_ms - verdict = ratio < 0.95 ? "❌" : ratio > 1.05 ? "✅" : "–" - jl_rss_txt = row.jl_rss_delta_kib === nothing ? "n/a" : @sprintf("%.1f", row.jl_rss_delta_kib) - peak_txt = row.py_peak_kib === nothing ? "n/a" : @sprintf("%.1f", row.py_peak_kib) - rss_txt = row.py_rss_delta_kib === nothing ? "n/a" : @sprintf("%.1f", row.py_rss_delta_kib) - @printf(io, "| `[\"%s\", \"%s\"]`%s | %.3f | %s | %.3f | %.3f | %.1f | %s | %s | %s |\n", - row.group, row.name, c_note, ratio, verdict, row.jl_ms, row.py_ms, - row.jl_kib, jl_rss_txt, peak_txt, rss_txt) + ratio = row.py_ms / row.jl_ms + verdict = ratio < 0.95 ? "❌" : ratio > 1.05 ? "✅" : "–" + jl_rss_txt = + row.jl_rss_delta_kib === nothing ? "n/a" : + @sprintf("%.1f", row.jl_rss_delta_kib) + peak_txt = row.py_peak_kib === nothing ? "n/a" : @sprintf("%.1f", row.py_peak_kib) + rss_txt = + row.py_rss_delta_kib === nothing ? "n/a" : + @sprintf("%.1f", row.py_rss_delta_kib) + @printf( + io, + "| `[\"%s\", \"%s\"]`%s | %.3f | %s | %.3f | %.3f | %.1f | %s | %s | %s |\n", + row.group, + row.name, + c_note, + ratio, + verdict, + row.jl_ms, + row.py_ms, + row.jl_kib, + jl_rss_txt, + peak_txt, + rss_txt + ) else - jl_rss_txt = row.jl_rss_delta_kib === nothing ? "n/a" : @sprintf("%.1f", row.jl_rss_delta_kib) - @printf(io, "| `[\"%s\", \"%s\"]` | n/a | — | %.3f | n/a | %.1f | %s | n/a | n/a |\n", - row.group, row.name, row.jl_ms, row.jl_kib, jl_rss_txt) + jl_rss_txt = + row.jl_rss_delta_kib === nothing ? "n/a" : + @sprintf("%.1f", row.jl_rss_delta_kib) + @printf( + io, + "| `[\"%s\", \"%s\"]` | n/a | — | %.3f | n/a | %.1f | %s | n/a | n/a |\n", + row.group, + row.name, + row.jl_ms, + row.jl_kib, + jl_rss_txt + ) end end @@ -325,26 +472,60 @@ open(outfile, "w") do io println(io, "") println(io, "## Accuracy (integrate): solution agreement\n") println(io, "Each criterion converges to within `tol` of the true value, so the Julia and") - println(io, "Python solutions should agree within **`2·tol`** (effective `tol = max(abs_tol,") - println(io, "rel_tol·|value|)`). Rows whose `|Julia − Python|` exceeds that bound are flagged ❌.\n") + println( + io, + "Python solutions should agree within **`2·tol`** (effective `tol = max(abs_tol,", + ) + println( + io, + "rel_tol·|value|)`). Rows whose `|Julia − Python|` exceeds that bound are flagged ❌.\n", + ) if isempty(accuracy_rows) println(io, "_No solution data found. Re-run `make bench` (writes the Julia ") - println(io, "`$(jl_label)_solutions.json` sidecar) and a QMCPy harness recent enough to ") + println( + io, + "`$(jl_label)_solutions.json` sidecar) and a QMCPy harness recent enough to ", + ) println(io, "record `solution`/`abs_tol`/`rel_tol`._") else - @printf(io, "**%d of %d** matched integrate case(s) exceed 2×tolerance.\n\n", - n_flagged, count(r -> r.check !== nothing, accuracy_rows)) - println(io, "| integrate case | Julia | Python | abs Δ | rel Δ | tol mode | allowed 2·tol | verdict |") - println(io, "|:---------------|------:|-------:|------:|------:|:--------:|--------------:|:-------:|") + @printf( + io, + "**%d of %d** matched integrate case(s) exceed 2×tolerance.\n\n", + n_flagged, + count(r -> r.check !== nothing, accuracy_rows) + ) + println( + io, + "| integrate case | Julia | Python | abs Δ | rel Δ | tol mode | allowed 2·tol | verdict |", + ) + println( + io, + "|:---------------|------:|-------:|------:|------:|:--------:|--------------:|:-------:|", + ) for r in accuracy_rows if r.check === nothing - @printf(io, "| `%s` | %.6g | n/a | n/a | n/a | n/a | n/a | — |\n", r.name, r.jl_sol) + @printf( + io, + "| `%s` | %.6g | n/a | n/a | n/a | n/a | n/a | — |\n", + r.name, + r.jl_sol + ) else c = r.check verdict = c.flagged ? "❌" : "✅" reld = isnan(c.rel_diff) ? "n/a" : @sprintf("%.2e", c.rel_diff) - @printf(io, "| `%s` | %.6g | %.6g | %.3e | %s | %s | %.3e | %s |\n", - r.name, r.jl_sol, r.py_sol, c.diff, reld, c.mode, c.allowed, verdict) + @printf( + io, + "| `%s` | %.6g | %.6g | %.3e | %s | %s | %.3e | %s |\n", + r.name, + r.jl_sol, + r.py_sol, + c.diff, + reld, + c.mode, + c.allowed, + verdict + ) end end end diff --git a/benchmark/results/README.md b/benchmark/results/README.md new file mode 100644 index 0000000..15a6607 --- /dev/null +++ b/benchmark/results/README.md @@ -0,0 +1,14 @@ +# Benchmark Results + +This directory stores generated benchmark outputs and comparison reports. + +## Typical files + +- `