diff --git a/.github/workflows/python-bench.yml b/.github/workflows/python-bench.yml index 6e62e20..986cdf6 100644 --- a/.github/workflows/python-bench.yml +++ b/.github/workflows/python-bench.yml @@ -74,7 +74,7 @@ jobs: - name: Run parse benchmark run: | - poetry run python -m tests.benchmarks.bench_parse \ + poetry run python -m pathable.benchmarks.bench_parse \ --output "reports/bench-parse.${{ inputs.suffix }}.json" \ ${{ inputs.quick && '--quick' || '' }} \ --repeats "${{ inputs.repeats }}" \ @@ -82,7 +82,7 @@ jobs: - name: Run lookup benchmark run: | - poetry run python -m tests.benchmarks.bench_lookup \ + poetry run python -m pathable.benchmarks.bench_lookup \ --output "reports/bench-lookup.${{ inputs.suffix }}.json" \ ${{ inputs.quick && '--quick' || '' }} \ --repeats "${{ inputs.repeats }}" \ diff --git a/README.md b/README.md index bc1b830..0559570 100644 --- a/README.md +++ b/README.md @@ -144,32 +144,49 @@ pip install -e git+https://github.com/p1c2u/pathable.git#egg=pathable ## Benchmarks -Benchmarks live in `tests/benchmarks/` and produce JSON reports. +Benchmark tooling is shipped in the package and exposed as `pathable-bench`. -Local run (recommended as modules): +Install (core package): ```console -poetry run python -m tests.benchmarks.bench_parse --output reports/bench-parse.json -poetry run python -m tests.benchmarks.bench_lookup --output reports/bench-lookup.json +pip install pathable +``` + +Optional benchmark extra (reserved for benchmark-specific deps): + +```console +pip install "pathable[bench]" +``` + +Run all benchmark scenarios for an implementation: + +```console +poetry run pathable-bench run --impl pathable.LookupPath --output reports/bench-lookup.json ``` Quick sanity run: ```console -poetry run python -m tests.benchmarks.bench_parse --quick --output reports/bench-parse.quick.json -poetry run python -m tests.benchmarks.bench_lookup --quick --output reports/bench-lookup.quick.json +poetry run pathable-bench run --impl pathable.LookupPath --quick --output reports/bench-lookup.quick.json ``` -Compare two results (fails if candidate is >20% slower in any scenario): +Compare two results (compares overlapping scenarios only; fails if candidate is >20% slower in any compared scenario): ```console -poetry run python -m tests.benchmarks.compare_results \ +poetry run pathable-bench compare \ --baseline reports/bench-before.json \ --candidate reports/bench-after.json \ --tolerance 0.20 ``` +Deprecated compatibility wrappers still exist for now: + +```console +poetry run python -m tests.benchmarks.bench_lookup --output reports/bench-lookup.json +poetry run python -m tests.benchmarks.bench_parse --output reports/bench-parse.json +poetry run python -m tests.benchmarks.compare_results --baseline a.json --candidate b.json +``` + CI (on-demand): - GitHub Actions workflow `Benchmarks` runs via `workflow_dispatch` and uploads the JSON artifacts. - diff --git a/pathable/benchmarks/__init__.py b/pathable/benchmarks/__init__.py new file mode 100644 index 0000000..5ef569a --- /dev/null +++ b/pathable/benchmarks/__init__.py @@ -0,0 +1 @@ +"""Public benchmark toolkit for pathable.""" diff --git a/pathable/benchmarks/bench_lookup.py b/pathable/benchmarks/bench_lookup.py new file mode 100644 index 0000000..bec84a1 --- /dev/null +++ b/pathable/benchmarks/bench_lookup.py @@ -0,0 +1,49 @@ +"""Standalone lookup benchmark command.""" + +import argparse +from typing import Iterable +from typing import cast + +from pathable.benchmarks.core import add_common_args +from pathable.benchmarks.core import default_meta +from pathable.benchmarks.core import results_to_json +from pathable.benchmarks.core import write_json +from pathable.benchmarks.registry import resolve_impl +from pathable.benchmarks.scenarios.lookup import run_lookup_scenarios +from pathable.paths import AccessorPath + + +def main(argv: Iterable[str] | None = None) -> int: + parser = argparse.ArgumentParser() + add_common_args(parser) + parser.add_argument( + "--impl", + default="pathable.LookupPath", + help="AccessorPath implementation target (default: pathable.LookupPath).", + ) + args = parser.parse_args(list(argv) if argv is not None else None) + + impl = resolve_impl(args.impl) + if not issubclass(impl, AccessorPath): + raise TypeError( + "lookup benchmark requires an AccessorPath implementation" + ) + accessor_impl = cast(type[AccessorPath[object, object, object]], impl) + + results = run_lookup_scenarios( + accessor_impl, + quick=args.quick, + repeats=args.repeats, + warmup_loops=args.warmup_loops, + ) + + meta = default_meta() + meta["impl"] = f"{impl.__module__}.{impl.__qualname__}" + + payload = results_to_json(results=results, meta=meta) + write_json(args.output, payload) + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/pathable/benchmarks/bench_parse.py b/pathable/benchmarks/bench_parse.py new file mode 100644 index 0000000..37c569e --- /dev/null +++ b/pathable/benchmarks/bench_parse.py @@ -0,0 +1,29 @@ +"""Standalone parse benchmark command.""" + +import argparse +from typing import Iterable + +from pathable.benchmarks.core import add_common_args +from pathable.benchmarks.core import results_to_json +from pathable.benchmarks.core import write_json +from pathable.benchmarks.scenarios.parse import run_parse_scenarios + + +def main(argv: Iterable[str] | None = None) -> int: + parser = argparse.ArgumentParser() + add_common_args(parser) + args = parser.parse_args(list(argv) if argv is not None else None) + + results = run_parse_scenarios( + quick=args.quick, + repeats=args.repeats, + warmup_loops=args.warmup_loops, + ) + + payload = results_to_json(results=results) + write_json(args.output, payload) + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/pathable/benchmarks/cli.py b/pathable/benchmarks/cli.py new file mode 100644 index 0000000..3f1c1da --- /dev/null +++ b/pathable/benchmarks/cli.py @@ -0,0 +1,80 @@ +"""CLI entrypoint for pathable benchmarks.""" + +import argparse +from typing import Iterable + +from pathable.benchmarks.compare import main as compare_main +from pathable.benchmarks.core import write_json +from pathable.benchmarks.run import run_all + + +def _build_parser() -> argparse.ArgumentParser: + parser = argparse.ArgumentParser(prog="pathable-bench") + subparsers = parser.add_subparsers(dest="command", required=True) + + run_parser = subparsers.add_parser("run", help="Run benchmark scenarios") + run_parser.add_argument( + "--impl", + required=True, + help="Implementation target, e.g. pathable.LookupPath", + ) + run_parser.add_argument("--output", required=True) + run_parser.add_argument("--quick", action="store_true") + run_parser.add_argument("--repeats", type=int, default=5) + run_parser.add_argument("--warmup-loops", type=int, default=1) + run_parser.add_argument( + "--scenario", + action="append", + choices=["parse", "lookup"], + help=( + "Run only selected scenario groups. Repeat flag to select multiple; " + "defaults to both parse and lookup." + ), + ) + + compare_parser = subparsers.add_parser( + "compare", + help="Compare benchmark JSON reports", + ) + compare_parser.add_argument("--baseline", required=True) + compare_parser.add_argument("--candidate", required=True) + compare_parser.add_argument("--tolerance", type=float, default=0.20) + return parser + + +def main(argv: Iterable[str] | None = None) -> int: + parser = _build_parser() + args = parser.parse_args(list(argv) if argv is not None else None) + + if args.command == "run": + scenarios = ( + tuple(args.scenario) if args.scenario else ("parse", "lookup") + ) + payload = run_all( + impl_target=args.impl, + quick=args.quick, + repeats=args.repeats, + warmup_loops=args.warmup_loops, + scenarios=scenarios, + ) + write_json(args.output, payload) + return 0 + + if args.command == "compare": + return compare_main( + [ + "--baseline", + args.baseline, + "--candidate", + args.candidate, + "--tolerance", + str(args.tolerance), + ] + ) + + parser.error("unknown command") + return 2 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/pathable/benchmarks/compare.py b/pathable/benchmarks/compare.py new file mode 100644 index 0000000..d7eedb7 --- /dev/null +++ b/pathable/benchmarks/compare.py @@ -0,0 +1,149 @@ +"""Compare benchmark JSON results.""" + +import argparse +import json +from dataclasses import dataclass +from typing import Any +from typing import Iterable +from typing import Mapping +from typing import cast + + +@dataclass(frozen=True) +class ScenarioComparison: + name: str + baseline_ops: float + candidate_ops: float + ratio: float + + +@dataclass(frozen=True) +class CompareResult: + comparisons: list[ScenarioComparison] + regressions: list[ScenarioComparison] + baseline_only: list[str] + candidate_only: list[str] + + +def _load(path: str) -> Mapping[str, Any]: + with open(path, "r", encoding="utf-8") as f: + data_any = json.load(f) + if not isinstance(data_any, dict): + raise ValueError("Invalid report: expected top-level JSON object") + return cast(dict[str, Any], data_any) + + +def _extract_ops(report: Mapping[str, Any]) -> dict[str, float]: + benchmarks = report.get("benchmarks") + if not isinstance(benchmarks, dict): + raise ValueError("Invalid report: missing 'benchmarks' dict") + + benchmarks_d = cast(dict[str, Any], benchmarks) + + out: dict[str, float] = {} + for name, payload in benchmarks_d.items(): + if not isinstance(payload, dict): + continue + payload_d = cast(dict[str, Any], payload) + ops_any = payload_d.get("median_ops_per_sec") + ops = ops_any if isinstance(ops_any, (int, float)) else None + if ops is not None: + out[name] = float(ops) + return out + + +def compare( + *, + baseline: Mapping[str, Any], + candidate: Mapping[str, Any], + tolerance: float, +) -> CompareResult: + if tolerance < 0: + raise ValueError("tolerance must be >= 0") + + b = _extract_ops(baseline) + c = _extract_ops(candidate) + + b_names = set(b) + c_names = set(c) + common_names = sorted(b_names & c_names) + + comparisons: list[ScenarioComparison] = [] + for name in common_names: + bops = b[name] + cops = c[name] + ratio = cops / bops if bops > 0 else float("inf") + comparisons.append( + ScenarioComparison( + name=name, + baseline_ops=bops, + candidate_ops=cops, + ratio=ratio, + ) + ) + + floor_ratio = 1.0 - tolerance + regressions = [x for x in comparisons if x.ratio < floor_ratio] + + return CompareResult( + comparisons=comparisons, + regressions=regressions, + baseline_only=sorted(b_names - c_names), + candidate_only=sorted(c_names - b_names), + ) + + +def main(argv: Iterable[str] | None = None) -> int: + parser = argparse.ArgumentParser() + parser.add_argument("--baseline", required=True) + parser.add_argument("--candidate", required=True) + parser.add_argument( + "--tolerance", + type=float, + default=0.20, + help="Allowed slowdown (e.g. 0.20 means 20% slower allowed).", + ) + args = parser.parse_args(list(argv) if argv is not None else None) + + baseline = _load(args.baseline) + candidate = _load(args.candidate) + + result = compare( + baseline=baseline, + candidate=candidate, + tolerance=args.tolerance, + ) + + common_count = len(result.comparisons) + print( + "scenarios: " + f"common={common_count} " + f"baseline_only={len(result.baseline_only)} " + f"candidate_only={len(result.candidate_only)}" + ) + + if common_count == 0: + print("ERROR: no overlapping scenarios between reports") + return 1 + + print("scenario\tbaseline_ops/s\tcandidate_ops/s\tratio") + for row in result.comparisons: + print( + f"{row.name}\t{row.baseline_ops:.2f}\t{row.candidate_ops:.2f}\t{row.ratio:.3f}" + ) + + if result.regressions: + print("\nREGRESSIONS:") + for row in result.regressions: + print( + f"- {row.name}: {row.ratio:.3f}x " + f"(baseline {row.baseline_ops:.2f} ops/s, " + f"candidate {row.candidate_ops:.2f} ops/s)" + ) + return 1 + + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/pathable/benchmarks/core.py b/pathable/benchmarks/core.py new file mode 100644 index 0000000..8649dc4 --- /dev/null +++ b/pathable/benchmarks/core.py @@ -0,0 +1,159 @@ +"""Minimal benchmark utilities (dependency-free).""" + +import argparse +import json +import os +import platform +import statistics +import sys +import time +from dataclasses import dataclass +from typing import Any +from typing import Callable +from typing import Iterable +from typing import Mapping +from typing import MutableMapping + + +@dataclass(frozen=True) +class BenchmarkResult: + name: str + loops: int + repeats: int + warmup_loops: int + times_s: tuple[float, ...] + + @property + def total_s_median(self) -> float: + return statistics.median(self.times_s) + + @property + def per_loop_s_median(self) -> float: + if self.loops <= 0: + return float("inf") + return self.total_s_median / self.loops + + @property + def ops_per_sec_median(self) -> float: + per = self.per_loop_s_median + if per <= 0: + return float("inf") + return 1.0 / per + + +def _safe_int_env(name: str) -> int | None: + value = os.environ.get(name) + if value is None: + return None + try: + return int(value) + except ValueError: + return None + + +def default_meta() -> dict[str, Any]: + return { + "python": sys.version, + "python_implementation": platform.python_implementation(), + "platform": platform.platform(), + "machine": platform.machine(), + "processor": platform.processor(), + "pythondotorg": platform.python_build(), + "py_hash_seed": os.environ.get("PYTHONHASHSEED"), + "github_sha": os.environ.get("GITHUB_SHA"), + "github_ref": os.environ.get("GITHUB_REF"), + "ci": os.environ.get("CI"), + } + + +def run_benchmark( + name: str, + func: Callable[[], Any], + *, + loops: int, + repeats: int = 5, + warmup_loops: int = 1, +) -> BenchmarkResult: + if loops <= 0: + raise ValueError("loops must be > 0") + if repeats <= 0: + raise ValueError("repeats must be > 0") + if warmup_loops < 0: + raise ValueError("warmup_loops must be >= 0") + + for _ in range(warmup_loops): + for __ in range(loops): + func() + + times: list[float] = [] + for _ in range(repeats): + start = time.perf_counter() + for __ in range(loops): + func() + end = time.perf_counter() + times.append(end - start) + + return BenchmarkResult( + name=name, + loops=loops, + repeats=repeats, + warmup_loops=warmup_loops, + times_s=tuple(times), + ) + + +def results_to_json( + *, + results: Iterable[BenchmarkResult], + meta: Mapping[str, Any] | None = None, +) -> dict[str, Any]: + out: dict[str, Any] = { + "meta": dict(meta or default_meta()), + "benchmarks": {}, + } + + bench: MutableMapping[str, Any] = out["benchmarks"] + for r in results: + bench[r.name] = { + "loops": r.loops, + "repeats": r.repeats, + "warmup_loops": r.warmup_loops, + "times_s": list(r.times_s), + "median_total_s": r.total_s_median, + "median_per_loop_s": r.per_loop_s_median, + "median_ops_per_sec": r.ops_per_sec_median, + } + + return out + + +def add_common_args(parser: argparse.ArgumentParser) -> None: + parser.add_argument( + "--output", + required=True, + help="Write JSON results to this file.", + ) + parser.add_argument( + "--quick", + action="store_true", + help="Run fewer iterations for a fast sanity check.", + ) + parser.add_argument( + "--repeats", + type=int, + default=_safe_int_env("PATHABLE_BENCH_REPEATS") or 5, + help="Number of repeats per scenario (median is reported).", + ) + parser.add_argument( + "--warmup-loops", + type=int, + default=_safe_int_env("PATHABLE_BENCH_WARMUP") or 1, + help="Warmup passes before timing.", + ) + + +def write_json(path: str, payload: Mapping[str, Any]) -> None: + os.makedirs(os.path.dirname(path) or ".", exist_ok=True) + with open(path, "w", encoding="utf-8") as f: + json.dump(payload, f, indent=2, sort_keys=True) + f.write("\n") diff --git a/pathable/benchmarks/registry.py b/pathable/benchmarks/registry.py new file mode 100644 index 0000000..07532f6 --- /dev/null +++ b/pathable/benchmarks/registry.py @@ -0,0 +1,48 @@ +"""Implementation resolution for benchmark CLI.""" + +import importlib +from typing import Any + +from pathable.paths import BasePath + + +def _resolve_qualname(module_name: str, qualname: str) -> Any: + module = importlib.import_module(module_name) + obj: Any = module + for part in qualname.split("."): + obj = getattr(obj, part) + return obj + + +def resolve_impl(target: str) -> type[BasePath]: + """Resolve an implementation target into a BasePath subclass. + + Supported formats: + - ``module.path.ClassName`` + - ``module.path:ClassName`` + """ + if not target: + raise ValueError("implementation target must be non-empty") + + obj: Any + if ":" in target: + module_name, qualname = target.split(":", 1) + obj = _resolve_qualname(module_name, qualname) + else: + if "." not in target: + raise ValueError( + "implementation target must be dotted path or module:qualname" + ) + module_name, qualname = target.rsplit(".", 1) + obj = _resolve_qualname(module_name, qualname) + + if not isinstance(obj, type): + raise TypeError( + f"implementation target must resolve to a class: {target}" + ) + if not issubclass(obj, BasePath): + raise TypeError( + "implementation target must resolve to BasePath subclass: " + f"{target}" + ) + return obj diff --git a/pathable/benchmarks/run.py b/pathable/benchmarks/run.py new file mode 100644 index 0000000..f5096ff --- /dev/null +++ b/pathable/benchmarks/run.py @@ -0,0 +1,64 @@ +"""Benchmark runner orchestration.""" + +from typing import Any + +from pathable.benchmarks.core import BenchmarkResult +from pathable.benchmarks.core import default_meta +from pathable.benchmarks.core import results_to_json +from pathable.benchmarks.registry import resolve_impl +from pathable.benchmarks.scenarios.lookup import run_lookup_scenarios +from pathable.benchmarks.scenarios.parse import run_parse_scenarios +from pathable.paths import AccessorPath + + +def run_all( + *, + impl_target: str, + quick: bool, + repeats: int, + warmup_loops: int, + scenarios: tuple[str, ...] = ("parse", "lookup"), +) -> dict[str, Any]: + impl = resolve_impl(impl_target) + + results: list[BenchmarkResult] = [] + skipped: list[str] = [] + + wanted = set(scenarios) + valid = {"parse", "lookup"} + unknown = sorted(wanted - valid) + if unknown: + raise ValueError("unknown scenarios requested: " + ", ".join(unknown)) + + if "parse" in wanted: + results.extend( + run_parse_scenarios( + quick=quick, + repeats=repeats, + warmup_loops=warmup_loops, + ) + ) + + if "lookup" in wanted: + if issubclass(impl, AccessorPath): + results.extend( + run_lookup_scenarios( + impl, + quick=quick, + repeats=repeats, + warmup_loops=warmup_loops, + ) + ) + else: + skipped.append("lookup") + + meta = default_meta() + meta["impl"] = f"{impl.__module__}.{impl.__qualname__}" + meta["quick"] = quick + meta["repeats"] = repeats + meta["warmup_loops"] = warmup_loops + meta["requested_scenarios"] = sorted(wanted) + if skipped: + meta["skipped_scenarios"] = skipped + + return results_to_json(results=results, meta=meta) diff --git a/pathable/benchmarks/scenarios/__init__.py b/pathable/benchmarks/scenarios/__init__.py new file mode 100644 index 0000000..32729f5 --- /dev/null +++ b/pathable/benchmarks/scenarios/__init__.py @@ -0,0 +1 @@ +"""Benchmark scenarios.""" diff --git a/pathable/benchmarks/scenarios/lookup.py b/pathable/benchmarks/scenarios/lookup.py new file mode 100644 index 0000000..e7834c8 --- /dev/null +++ b/pathable/benchmarks/scenarios/lookup.py @@ -0,0 +1,282 @@ +"""Benchmarks for AccessorPath lookup-style operations.""" + +from typing import Any + +from pathable.accessors import LookupAccessor +from pathable.benchmarks.core import BenchmarkResult +from pathable.benchmarks.core import run_benchmark +from pathable.paths import AccessorPath + + +def _build_deep_tree(depth: int) -> dict[str, Any]: + node: dict[str, Any] = {"value": 1} + for i in range(depth - 1, -1, -1): + node = {f"k{i}": node} + return node + + +def _deep_keys(depth: int) -> tuple[str, ...]: + return tuple(f"k{i}" for i in range(depth)) + + +def _make_deep_path(root: AccessorPath[Any, Any, Any], depth: int) -> Any: + p: Any = root + for k in _deep_keys(depth): + p = p / k + return p + + +def _build_mapping(size: int) -> dict[str, int]: + return {f"k{i}": i for i in range(size)} + + +def _from_lookup( + impl: type[AccessorPath[Any, Any, Any]], lookup: Any, *parts: Any +) -> AccessorPath[Any, Any, Any]: + ctor = getattr(impl, "from_lookup", None) + if ctor is None or not callable(ctor): + raise TypeError( + f"{impl.__module__}.{impl.__qualname__} does not provide from_lookup()" + ) + path = ctor(lookup, *parts) + if not isinstance(path, AccessorPath): + raise TypeError("from_lookup() must return AccessorPath instance") + return path + + +def _benchmark_lookup( + impl: type[AccessorPath[Any, Any, Any]], + *, + quick: bool, + repeats: int, + warmup_loops: int, +) -> list[BenchmarkResult]: + results: list[BenchmarkResult] = [] + depth = 25 if not quick else 10 + loops_hit = 200_000 if not quick else 20_000 + loops_miss = 80_000 if not quick else 10_000 + + data = _build_deep_tree(depth) + root = _from_lookup(impl, data) + deep = _make_deep_path(root, depth) + + results.append( + run_benchmark( + f"lookup.read_value.cache_hit.depth{depth}", + deep.read_value, + loops=loops_hit, + repeats=repeats, + warmup_loops=warmup_loops, + ) + ) + + leaf_parent = _from_lookup( + impl, + {"root": {"branch": {"leaf": "value"}}}, + "root", + "branch", + ) + + def getitem_leaf(_p: AccessorPath[Any, Any, Any] = leaf_parent) -> None: + _ = _p["leaf"] + + loops_getitem_leaf = 200_000 if not quick else 20_000 + results.append( + run_benchmark( + "lookup.getitem.leaf", + getitem_leaf, + loops=loops_getitem_leaf, + repeats=repeats, + warmup_loops=warmup_loops, + ) + ) + + branch_parent = _from_lookup( + impl, + {"root": {"branch": {"child": {"x": 1}}}}, + "root", + "branch", + ) + + def getitem_branch( + _p: AccessorPath[Any, Any, Any] = branch_parent, + ) -> None: + _ = _p["child"] + + loops_getitem_branch = 200_000 if not quick else 20_000 + results.append( + run_benchmark( + "lookup.getitem.branch", + getitem_branch, + loops=loops_getitem_branch, + repeats=repeats, + warmup_loops=warmup_loops, + ) + ) + + deep_accessor = deep.accessor + if not isinstance(deep_accessor, LookupAccessor): + raise TypeError( + "lookup scenarios require LookupAccessor-backed implementation" + ) + deep_accessor.disable_cache() + results.append( + run_benchmark( + f"lookup.read_value.cache_disabled.depth{depth}", + deep.read_value, + loops=loops_miss, + repeats=repeats, + warmup_loops=warmup_loops, + ) + ) + + data2 = { + "a": _build_deep_tree(depth), + "x": _build_deep_tree(depth), + } + root2 = _from_lookup(impl, data2) + a_path = _make_deep_path(root2 / "a", depth) + x_path = _make_deep_path(root2 / "x", depth) + + root2_accessor = root2.accessor + if not isinstance(root2_accessor, LookupAccessor): + raise TypeError( + "lookup scenarios require LookupAccessor-backed implementation" + ) + root2_accessor.enable_cache(maxsize=1) + + toggle = {"i": 0} + + def read_alternating() -> None: + if toggle["i"] & 1: + x_path.read_value() + else: + a_path.read_value() + toggle["i"] += 1 + + loops_eviction = 120_000 if not quick else 15_000 + results.append( + run_benchmark( + f"lookup.read_value.eviction_alternate.maxsize1.depth{depth}", + read_alternating, + loops=loops_eviction, + repeats=repeats, + warmup_loops=warmup_loops, + ) + ) + + sizes = [10, 1_000, 50_000] if not quick else [10, 1_000] + for size in sizes: + mapping = _build_mapping(size) + p = _from_lookup(impl, {"root": mapping}) / "root" + + loops_keys = 5_000 if size <= 1_000 else 200 + if quick: + loops_keys = min(loops_keys, 500) + + results.append( + run_benchmark( + f"lookup.keys.mapping.size{size}", + p.keys, + loops=loops_keys, + repeats=repeats, + warmup_loops=warmup_loops, + ) + ) + + probe_key = f"k{size - 1}" if size else "k0" + loops_contains = 20_000 if size <= 1_000 else 500 + if quick: + loops_contains = min(loops_contains, 2_000) + + def contains_probe( + _p: AccessorPath[Any, Any, Any] = p, + _key: str = probe_key, + ) -> None: + _ = _key in _p + + results.append( + run_benchmark( + f"lookup.contains.mapping.size{size}", + contains_probe, + loops=loops_contains, + repeats=repeats, + warmup_loops=warmup_loops, + ) + ) + + loops_floordiv = 20_000 if size <= 1_000 else 500 + if quick: + loops_floordiv = min(loops_floordiv, 2_000) + + def floordiv_probe( + _p: AccessorPath[Any, Any, Any] = p, + _key: str = probe_key, + ) -> None: + _ = _p // _key + + results.append( + run_benchmark( + f"lookup.floordiv.mapping.size{size}", + floordiv_probe, + loops=loops_floordiv, + repeats=repeats, + warmup_loops=warmup_loops, + ) + ) + + missing_key = "missing" + + def floordiv_missing_probe( + _p: AccessorPath[Any, Any, Any] = p, + _key: str = missing_key, + ) -> None: + try: + _ = _p // _key + except KeyError: + return + + results.append( + run_benchmark( + f"lookup.floordiv_missing.mapping.size{size}", + floordiv_missing_probe, + loops=loops_floordiv, + repeats=repeats, + warmup_loops=warmup_loops, + ) + ) + + loops_iter = 500 if size <= 1_000 else 3 + if quick: + loops_iter = min(loops_iter, 50) + + def iter_children(_p: AccessorPath[Any, Any, Any] = p) -> None: + for _ in _p: + pass + + results.append( + run_benchmark( + f"lookup.iter_children.mapping.size{size}", + iter_children, + loops=loops_iter, + repeats=repeats, + warmup_loops=warmup_loops, + ) + ) + + return results + + +def run_lookup_scenarios( + impl: type[AccessorPath[Any, Any, Any]], + *, + quick: bool, + repeats: int, + warmup_loops: int, +) -> list[BenchmarkResult]: + return _benchmark_lookup( + impl, + quick=quick, + repeats=repeats, + warmup_loops=warmup_loops, + ) diff --git a/pathable/benchmarks/scenarios/parse.py b/pathable/benchmarks/scenarios/parse.py new file mode 100644 index 0000000..cecb283 --- /dev/null +++ b/pathable/benchmarks/scenarios/parse.py @@ -0,0 +1,68 @@ +"""Benchmarks for parsing and BasePath construction.""" + +from pathable.benchmarks.core import BenchmarkResult +from pathable.benchmarks.core import run_benchmark +from pathable.paths import BasePath + + +def _build_args(n: int) -> list[object]: + out: list[object] = [] + for i in range(n): + if i % 11 == 0: + out.append(".") + elif i % 11 == 1: + out.append(b"bytes") + elif i % 11 == 2: + out.append(i) + elif i % 11 == 3: + out.append(f"a/{i}/b") + else: + out.append(f"seg{i}") + return out + + +def run_parse_scenarios( + *, quick: bool, repeats: int, warmup_loops: int +) -> list[BenchmarkResult]: + results: list[BenchmarkResult] = [] + sizes = [10, 100, 1_000] if not quick else [10, 100] + + for n in sizes: + inputs = _build_args(n) + inputs_t = tuple(inputs) + + loops_parse = 80_000 if n <= 100 else 10_000 + if quick: + loops_parse = min(loops_parse, 10_000) + + def do_parse(_inputs: tuple[object, ...] = inputs_t) -> None: + BasePath._parse_args(_inputs) + + results.append( + run_benchmark( + f"parse.BasePath._parse_args.size{n}", + do_parse, + loops=loops_parse, + repeats=repeats, + warmup_loops=warmup_loops, + ) + ) + + loops_basepath = 60_000 if n <= 100 else 3_000 + if quick: + loops_basepath = min(loops_basepath, 5_000) + + def do_basepath(_inputs: tuple[object, ...] = inputs_t) -> None: + BasePath(*_inputs) + + results.append( + run_benchmark( + f"paths.BasePath.constructor.size{n}", + do_basepath, + loops=loops_basepath, + repeats=repeats, + warmup_loops=warmup_loops, + ) + ) + + return results diff --git a/poetry.lock b/poetry.lock index 23d7ac6..2e4cb44 100644 --- a/poetry.lock +++ b/poetry.lock @@ -936,7 +936,10 @@ six = ">=1.9.0,<2" docs = ["proselint (>=0.10.2)", "sphinx (>=3)", "sphinx-argparse (>=0.2.5)", "sphinx-rtd-theme (>=0.4.3)", "towncrier (>=21.3)"] testing = ["coverage (>=4)", "coverage-enable-subprocess (>=1)", "flaky (>=3)", "packaging (>=20.0) ; python_version > \"3.4\"", "pytest (>=4)", "pytest-env (>=0.6.2)", "pytest-freezegun (>=0.4.1)", "pytest-mock (>=2)", "pytest-randomly (>=1)", "pytest-timeout (>=1)"] +[extras] +bench = [] + [metadata] lock-version = "2.1" python-versions = ">=3.10,<4.0" -content-hash = "fef5732c8b443ab4a7d7f997abb08cc050886e42b3e5c882df3f5b3ecf3e7314" +content-hash = "5ee48c773806e8267b1f69c691f987f8a5e3ff9798638a244687ecd8ac4a593f" diff --git a/pyproject.toml b/pyproject.toml index ef91984..e09a65e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -58,6 +58,12 @@ classifiers = [ [tool.poetry.dependencies] python = ">=3.10,<4.0" +[tool.poetry.extras] +bench = [] + +[tool.poetry.scripts] +pathable-bench = "pathable.benchmarks.cli:main" + [tool.poetry.group.dev.dependencies] tbump = "^6.11.0" pre-commit = "*" diff --git a/tests/benchmarks/__init__.py b/tests/benchmarks/__init__.py index 8b67c9f..51d3ac7 100644 --- a/tests/benchmarks/__init__.py +++ b/tests/benchmarks/__init__.py @@ -1,7 +1,7 @@ -"""Benchmark scripts for pathable. +"""Deprecated compatibility wrappers for pathable benchmarks. -Run as modules for the most reliable import behavior: +Prefer: -- `python -m tests.benchmarks.bench_lookup --output bench-lookup.json` -- `python -m tests.benchmarks.bench_parse --output bench-parse.json` +- `pathable-bench run --impl pathable.LookupPath --output bench.json` +- `pathable-bench compare --baseline a.json --candidate b.json` """ diff --git a/tests/benchmarks/bench_lookup.py b/tests/benchmarks/bench_lookup.py index 294f0b3..b10f2a2 100644 --- a/tests/benchmarks/bench_lookup.py +++ b/tests/benchmarks/bench_lookup.py @@ -1,276 +1,28 @@ -"""Benchmarks for LookupPath / LookupAccessor hot paths. +"""Compatibility wrapper for deprecated tests.benchmarks command.""" -These benchmarks avoid filesystem I/O (too noisy for CI) and focus on: -- traversal cost (cache disabled) -- cache hit speed -- LRU eviction patterns -- keys/contains/iter overhead on large mappings -""" - -import argparse -from typing import Any +import sys +import warnings +from pathlib import Path from typing import Iterable -from pathable.accessors import LookupAccessor -from pathable.paths import LookupPath - try: - # Prefer module execution: `python -m tests.benchmarks.bench_lookup ...` - from .bench_utils import BenchmarkResult - from .bench_utils import add_common_args - from .bench_utils import results_to_json - from .bench_utils import run_benchmark - from .bench_utils import write_json -except ImportError: # pragma: no cover - # Allow direct execution: `python tests/benchmarks/bench_lookup.py ...` - from bench_utils import BenchmarkResult # type: ignore[no-redef] - from bench_utils import add_common_args # type: ignore[no-redef] - from bench_utils import results_to_json # type: ignore[no-redef] - from bench_utils import run_benchmark # type: ignore[no-redef] - from bench_utils import write_json # type: ignore[no-redef] - - -def _build_deep_tree(depth: int) -> dict[str, Any]: - node: dict[str, Any] = {"value": 1} - for i in range(depth - 1, -1, -1): - node = {f"k{i}": node} - return node - - -def _deep_keys(depth: int) -> tuple[str, ...]: - return tuple(f"k{i}" for i in range(depth)) + from pathable.benchmarks.bench_lookup import main as _main +except ModuleNotFoundError as exc: + if exc.name != "pathable": + raise + sys.path.insert(0, str(Path(__file__).resolve().parents[2])) + from pathable.benchmarks.bench_lookup import main as _main - -def _make_deep_path(root: LookupPath, depth: int) -> LookupPath: - p = root - for k in _deep_keys(depth): - p = p / k - return p - - -def _build_mapping(size: int) -> dict[str, int]: - return {f"k{i}": i for i in range(size)} +_MESSAGE = ( + "tests.benchmarks.bench_lookup is deprecated and will be removed in a " + "future release; use `pathable-bench run --impl pathable.LookupPath ...` " + "or `python -m pathable.benchmarks.bench_lookup ...` instead." +) def main(argv: Iterable[str] | None = None) -> int: - parser = argparse.ArgumentParser() - add_common_args(parser) - args = parser.parse_args(list(argv) if argv is not None else None) - - repeats: int = args.repeats - warmup_loops: int = args.warmup_loops - - results: list[BenchmarkResult] = [] - - # --- Lookup read benchmarks --- - depth = 25 if not args.quick else 10 - loops_hit = 200_000 if not args.quick else 20_000 - loops_miss = 80_000 if not args.quick else 10_000 - - data = _build_deep_tree(depth) - root = LookupPath.from_lookup(data) - deep = _make_deep_path(root, depth) - - # Cache hit: repeated reads of the same path. - results.append( - run_benchmark( - f"lookup.read_value.cache_hit.depth{depth}", - deep.read_value, - loops=loops_hit, - repeats=repeats, - warmup_loops=warmup_loops, - ) - ) - - # __getitem__ leaf read: should return value for non-traversable child. - leaf_parent = LookupPath.from_lookup( - {"root": {"branch": {"leaf": "value"}}}, "root", "branch" - ) - - def getitem_leaf(_p: LookupPath = leaf_parent) -> None: - _ = _p["leaf"] - - loops_getitem_leaf = 200_000 if not args.quick else 20_000 - results.append( - run_benchmark( - "lookup.getitem.leaf", - getitem_leaf, - loops=loops_getitem_leaf, - repeats=repeats, - warmup_loops=warmup_loops, - ) - ) - - # __getitem__ branch read: should return child path for traversable child. - branch_parent = LookupPath.from_lookup( - {"root": {"branch": {"child": {"x": 1}}}}, "root", "branch" - ) - - def getitem_branch(_p: LookupPath = branch_parent) -> None: - _ = _p["child"] - - loops_getitem_branch = 200_000 if not args.quick else 20_000 - results.append( - run_benchmark( - "lookup.getitem.branch", - getitem_branch, - loops=loops_getitem_branch, - repeats=repeats, - warmup_loops=warmup_loops, - ) - ) - - # Cache miss cost: disable cache and repeatedly read. - deep_accessor = deep.accessor - if not isinstance(deep_accessor, LookupAccessor): - raise TypeError("Expected LookupPath.accessor to be LookupAccessor") - deep_accessor.disable_cache() - results.append( - run_benchmark( - f"lookup.read_value.cache_disabled.depth{depth}", - deep.read_value, - loops=loops_miss, - repeats=repeats, - warmup_loops=warmup_loops, - ) - ) - - # LRU eviction: alternate two distinct deep paths with maxsize=1. - data2 = { - "a": _build_deep_tree(depth), - "x": _build_deep_tree(depth), - } - root2 = LookupPath.from_lookup(data2) - a_path = _make_deep_path(root2 / "a", depth) - x_path = _make_deep_path(root2 / "x", depth) - - root2_accessor = root2.accessor - if not isinstance(root2_accessor, LookupAccessor): - raise TypeError("Expected LookupPath.accessor to be LookupAccessor") - root2_accessor.enable_cache(maxsize=1) - - toggle = {"i": 0} - - def read_alternating() -> None: - if toggle["i"] & 1: - x_path.read_value() - else: - a_path.read_value() - toggle["i"] += 1 - - loops_eviction = 120_000 if not args.quick else 15_000 - results.append( - run_benchmark( - f"lookup.read_value.eviction_alternate.maxsize1.depth{depth}", - read_alternating, - loops=loops_eviction, - repeats=repeats, - warmup_loops=warmup_loops, - ) - ) - - # --- Large mapping operations --- - sizes = [10, 1_000, 50_000] if not args.quick else [10, 1_000] - for size in sizes: - mapping = _build_mapping(size) - p = LookupPath.from_lookup({"root": mapping}) / "root" - - # keys() materializes a list in LookupAccessor.keys for mappings. - loops_keys = 5_000 if size <= 1_000 else 200 - if args.quick: - loops_keys = min(loops_keys, 500) - - results.append( - run_benchmark( - f"lookup.keys.mapping.size{size}", - p.keys, - loops=loops_keys, - repeats=repeats, - warmup_loops=warmup_loops, - ) - ) - - # contains: AccessorPath.__contains__ calls keys() then `in`. - probe_key = f"k{size - 1}" if size else "k0" - loops_contains = 20_000 if size <= 1_000 else 500 - if args.quick: - loops_contains = min(loops_contains, 2_000) - - def contains_probe(_p: LookupPath = p, _key: str = probe_key) -> None: - _ = _key in _p - - results.append( - run_benchmark( - f"lookup.contains.mapping.size{size}", - contains_probe, - loops=loops_contains, - repeats=repeats, - warmup_loops=warmup_loops, - ) - ) - - # floordiv (`//`): strict child assertion. - # Previously this was implemented via keys()+membership, which - # materializes all keys for mappings. - loops_floordiv = 20_000 if size <= 1_000 else 500 - if args.quick: - loops_floordiv = min(loops_floordiv, 2_000) - - def floordiv_probe(_p: LookupPath = p, _key: str = probe_key) -> None: - _ = _p // _key - - results.append( - run_benchmark( - f"lookup.floordiv.mapping.size{size}", - floordiv_probe, - loops=loops_floordiv, - repeats=repeats, - warmup_loops=warmup_loops, - ) - ) - - missing_key = "missing" - - def floordiv_missing_probe( - _p: LookupPath = p, _key: str = missing_key - ) -> None: - try: - _ = _p // _key - except KeyError: - return - - results.append( - run_benchmark( - f"lookup.floordiv_missing.mapping.size{size}", - floordiv_missing_probe, - loops=loops_floordiv, - repeats=repeats, - warmup_loops=warmup_loops, - ) - ) - - # iterating children: should call keys() once and yield child paths. - loops_iter = 500 if size <= 1_000 else 3 - if args.quick: - loops_iter = min(loops_iter, 50) - - def iter_children(_p: LookupPath = p) -> None: - for _ in _p: - pass - - results.append( - run_benchmark( - f"lookup.iter_children.mapping.size{size}", - iter_children, - loops=loops_iter, - repeats=repeats, - warmup_loops=warmup_loops, - ) - ) - - payload = results_to_json(results=results) - write_json(args.output, payload) - return 0 + warnings.warn(_MESSAGE, DeprecationWarning, stacklevel=2) + return _main(argv) if __name__ == "__main__": diff --git a/tests/benchmarks/bench_parse.py b/tests/benchmarks/bench_parse.py index 89e4967..b5921d9 100644 --- a/tests/benchmarks/bench_parse.py +++ b/tests/benchmarks/bench_parse.py @@ -1,96 +1,28 @@ -"""Benchmarks for parsing and BasePath construction.""" +"""Compatibility wrapper for deprecated tests.benchmarks command.""" -import argparse +import sys +import warnings +from pathlib import Path from typing import Iterable -from pathable.paths import BasePath - try: - # Prefer module execution: `python -m tests.benchmarks.bench_parse ...` - from .bench_utils import BenchmarkResult - from .bench_utils import add_common_args - from .bench_utils import results_to_json - from .bench_utils import run_benchmark - from .bench_utils import write_json -except ImportError: # pragma: no cover - # Allow direct execution: `python tests/benchmarks/bench_parse.py ...` - from bench_utils import BenchmarkResult # type: ignore[no-redef] - from bench_utils import add_common_args # type: ignore[no-redef] - from bench_utils import results_to_json # type: ignore[no-redef] - from bench_utils import run_benchmark # type: ignore[no-redef] - from bench_utils import write_json # type: ignore[no-redef] - + from pathable.benchmarks.bench_parse import main as _main +except ModuleNotFoundError as exc: + if exc.name != "pathable": + raise + sys.path.insert(0, str(Path(__file__).resolve().parents[2])) + from pathable.benchmarks.bench_parse import main as _main -def _build_args(n: int) -> list[object]: - # Mix in segments that exercise splitting, filtering, bytes decode, and ints. - out: list[object] = [] - for i in range(n): - if i % 11 == 0: - out.append(".") - elif i % 11 == 1: - out.append(b"bytes") - elif i % 11 == 2: - out.append(i) - elif i % 11 == 3: - out.append(f"a/{i}/b") - else: - out.append(f"seg{i}") - return out +_MESSAGE = ( + "tests.benchmarks.bench_parse is deprecated and will be removed in a " + "future release; use `pathable-bench run --impl pathable.LookupPath ...` " + "or `python -m pathable.benchmarks.bench_parse ...` instead." +) def main(argv: Iterable[str] | None = None) -> int: - parser = argparse.ArgumentParser() - add_common_args(parser) - args = parser.parse_args(list(argv) if argv is not None else None) - - repeats: int = args.repeats - warmup_loops: int = args.warmup_loops - - results: list[BenchmarkResult] = [] - - sizes = [10, 100, 1_000] if not args.quick else [10, 100] - - for n in sizes: - inputs = _build_args(n) - inputs_t = tuple(inputs) - - loops_parse = 80_000 if n <= 100 else 10_000 - if args.quick: - loops_parse = min(loops_parse, 10_000) - - def do_parse(_inputs: tuple[object, ...] = inputs_t) -> None: - BasePath._parse_args(_inputs) - - results.append( - run_benchmark( - f"parse.BasePath._parse_args.size{n}", - do_parse, - loops=loops_parse, - repeats=repeats, - warmup_loops=warmup_loops, - ) - ) - - loops_basepath = 60_000 if n <= 100 else 3_000 - if args.quick: - loops_basepath = min(loops_basepath, 5_000) - - def do_basepath(_inputs: tuple[object, ...] = inputs_t) -> None: - BasePath(*_inputs) - - results.append( - run_benchmark( - f"paths.BasePath.constructor.size{n}", - do_basepath, - loops=loops_basepath, - repeats=repeats, - warmup_loops=warmup_loops, - ) - ) - - payload = results_to_json(results=results) - write_json(args.output, payload) - return 0 + warnings.warn(_MESSAGE, DeprecationWarning, stacklevel=2) + return _main(argv) if __name__ == "__main__": diff --git a/tests/benchmarks/bench_utils.py b/tests/benchmarks/bench_utils.py index 5942187..9101685 100644 --- a/tests/benchmarks/bench_utils.py +++ b/tests/benchmarks/bench_utils.py @@ -1,163 +1,39 @@ -"""Minimal benchmark utilities (dependency-free). +"""Compatibility exports for deprecated benchmark utility module.""" -This module is intentionally simple to keep benchmarks stable and easy to run -locally and in CI. -""" - -import argparse -import json -import os -import platform -import statistics import sys -import time -from dataclasses import dataclass -from typing import Any -from typing import Callable -from typing import Iterable -from typing import Mapping -from typing import MutableMapping - - -@dataclass(frozen=True) -class BenchmarkResult: - name: str - loops: int - repeats: int - warmup_loops: int - times_s: tuple[float, ...] - - @property - def total_s_median(self) -> float: - return statistics.median(self.times_s) - - @property - def per_loop_s_median(self) -> float: - if self.loops <= 0: - return float("inf") - return self.total_s_median / self.loops - - @property - def ops_per_sec_median(self) -> float: - per = self.per_loop_s_median - if per <= 0: - return float("inf") - return 1.0 / per - - -def _safe_int_env(name: str) -> int | None: - value = os.environ.get(name) - if value is None: - return None - try: - return int(value) - except ValueError: - return None - - -def default_meta() -> dict[str, Any]: - return { - "python": sys.version, - "python_implementation": platform.python_implementation(), - "platform": platform.platform(), - "machine": platform.machine(), - "processor": platform.processor(), - "pythondotorg": platform.python_build(), - "py_hash_seed": os.environ.get("PYTHONHASHSEED"), - "github_sha": os.environ.get("GITHUB_SHA"), - "github_ref": os.environ.get("GITHUB_REF"), - "ci": os.environ.get("CI"), - } - - -def run_benchmark( - name: str, - func: Callable[[], Any], - *, - loops: int, - repeats: int = 5, - warmup_loops: int = 1, -) -> BenchmarkResult: - if loops <= 0: - raise ValueError("loops must be > 0") - if repeats <= 0: - raise ValueError("repeats must be > 0") - if warmup_loops < 0: - raise ValueError("warmup_loops must be >= 0") - - for _ in range(warmup_loops): - for __ in range(loops): - func() - - times: list[float] = [] - for _ in range(repeats): - start = time.perf_counter() - for __ in range(loops): - func() - end = time.perf_counter() - times.append(end - start) - - return BenchmarkResult( - name=name, - loops=loops, - repeats=repeats, - warmup_loops=warmup_loops, - times_s=tuple(times), - ) - - -def results_to_json( - *, - results: Iterable[BenchmarkResult], - meta: Mapping[str, Any] | None = None, -) -> dict[str, Any]: - out: dict[str, Any] = { - "meta": dict(meta or default_meta()), - "benchmarks": {}, - } - - bench: MutableMapping[str, Any] = out["benchmarks"] - for r in results: - bench[r.name] = { - "loops": r.loops, - "repeats": r.repeats, - "warmup_loops": r.warmup_loops, - "times_s": list(r.times_s), - "median_total_s": r.total_s_median, - "median_per_loop_s": r.per_loop_s_median, - "median_ops_per_sec": r.ops_per_sec_median, - } - - return out - - -def add_common_args(parser: argparse.ArgumentParser) -> None: - parser.add_argument( - "--output", - required=True, - help="Write JSON results to this file.", - ) - parser.add_argument( - "--quick", - action="store_true", - help="Run fewer iterations for a fast sanity check.", - ) - parser.add_argument( - "--repeats", - type=int, - default=_safe_int_env("PATHABLE_BENCH_REPEATS") or 5, - help="Number of repeats per scenario (median is reported).", - ) - parser.add_argument( - "--warmup-loops", - type=int, - default=_safe_int_env("PATHABLE_BENCH_WARMUP") or 1, - help="Warmup passes before timing.", - ) - - -def write_json(path: str, payload: Mapping[str, Any]) -> None: - os.makedirs(os.path.dirname(path) or ".", exist_ok=True) - with open(path, "w", encoding="utf-8") as f: - json.dump(payload, f, indent=2, sort_keys=True) - f.write("\n") +import warnings +from pathlib import Path + +try: + from pathable.benchmarks.core import BenchmarkResult + from pathable.benchmarks.core import add_common_args + from pathable.benchmarks.core import default_meta + from pathable.benchmarks.core import results_to_json + from pathable.benchmarks.core import run_benchmark + from pathable.benchmarks.core import write_json +except ModuleNotFoundError as exc: + if exc.name != "pathable": + raise + sys.path.insert(0, str(Path(__file__).resolve().parents[2])) + from pathable.benchmarks.core import BenchmarkResult + from pathable.benchmarks.core import add_common_args + from pathable.benchmarks.core import default_meta + from pathable.benchmarks.core import results_to_json + from pathable.benchmarks.core import run_benchmark + from pathable.benchmarks.core import write_json + +warnings.warn( + "tests.benchmarks.bench_utils is deprecated and will be removed in a " + "future release; use pathable.benchmarks.core instead.", + DeprecationWarning, + stacklevel=2, +) + +__all__ = [ + "BenchmarkResult", + "add_common_args", + "default_meta", + "results_to_json", + "run_benchmark", + "write_json", +] diff --git a/tests/benchmarks/compare_results.py b/tests/benchmarks/compare_results.py index 08529ee..05a73ae 100644 --- a/tests/benchmarks/compare_results.py +++ b/tests/benchmarks/compare_results.py @@ -1,157 +1,28 @@ -"""Compare two pathable benchmark JSON results. +"""Compatibility wrapper for deprecated tests.benchmarks command.""" -Exits non-zero if candidate regresses beyond the configured tolerance. - -This is meant for local regression checking and optional CI gating. -""" - -import argparse -import json -from dataclasses import dataclass -from typing import Any +import sys +import warnings +from pathlib import Path from typing import Iterable -from typing import Mapping -from typing import cast - - -@dataclass(frozen=True) -class ScenarioComparison: - name: str - baseline_ops: float - candidate_ops: float - ratio: float - baseline_scenario: str - candidate_scenario: str - - -def _canonicalize_scenario_name(name: str) -> str: - """Return a stable scenario identifier across benchmark renames. - - This keeps `compare_results.py` compatible with older JSON reports. - """ - aliases: tuple[tuple[str, str], ...] = ( - # Historical rename in bench_parse: - # parse.parse_args.sizeN -> parse.BasePath._parse_args.sizeN - # Canonicalize both to parse.args.sizeN. - ("parse.parse_args.", "parse.args."), - ("parse.BasePath._parse_args.", "parse.args."), - ) - - for prefix, replacement in aliases: - if name.startswith(prefix): - return replacement + name[len(prefix) :] - return name - - -def _load(path: str) -> Mapping[str, Any]: - with open(path, "r", encoding="utf-8") as f: - data_any = json.load(f) - if not isinstance(data_any, dict): - raise ValueError("Invalid report: expected top-level JSON object") - return cast(dict[str, Any], data_any) - - -def _extract_ops(report: Mapping[str, Any]) -> dict[str, float]: - benchmarks = report.get("benchmarks") - if not isinstance(benchmarks, dict): - raise ValueError("Invalid report: missing 'benchmarks' dict") - - benchmarks_d = cast(dict[str, Any], benchmarks) - out: dict[str, float] = {} - for name, payload in benchmarks_d.items(): - if not isinstance(payload, dict): - continue - payload_d = cast(dict[str, Any], payload) - ops_any = payload_d.get("median_ops_per_sec") - ops = ops_any if isinstance(ops_any, (int, float)) else None - if ops is not None: - out[name] = float(ops) - return out +try: + from pathable.benchmarks.compare import main as _main +except ModuleNotFoundError as exc: + if exc.name != "pathable": + raise + sys.path.insert(0, str(Path(__file__).resolve().parents[2])) + from pathable.benchmarks.compare import main as _main - -def compare( - *, - baseline: Mapping[str, Any], - candidate: Mapping[str, Any], - tolerance: float, -) -> tuple[list[ScenarioComparison], list[ScenarioComparison]]: - if tolerance < 0: - raise ValueError("tolerance must be >= 0") - - b_raw = _extract_ops(baseline) - c_raw = _extract_ops(candidate) - - b: dict[str, tuple[str, float]] = {} - c: dict[str, tuple[str, float]] = {} - - for name, ops in b_raw.items(): - canon = _canonicalize_scenario_name(name) - b.setdefault(canon, (name, ops)) - - for name, ops in c_raw.items(): - canon = _canonicalize_scenario_name(name) - c.setdefault(canon, (name, ops)) - - comparisons: list[ScenarioComparison] = [] - for name in sorted(set(b) & set(c)): - b_name, bops = b[name] - c_name, cops = c[name] - ratio = cops / bops if bops > 0 else float("inf") - comparisons.append( - ScenarioComparison( - name=name, - baseline_ops=bops, - candidate_ops=cops, - ratio=ratio, - baseline_scenario=b_name, - candidate_scenario=c_name, - ) - ) - - # Regression if candidate is slower by more than tolerance: - # candidate_ops < baseline_ops * (1 - tolerance) - floor_ratio = 1.0 - tolerance - regressions = [x for x in comparisons if x.ratio < floor_ratio] - return comparisons, regressions +_MESSAGE = ( + "tests.benchmarks.compare_results is deprecated and will be removed in a " + "future release; use `pathable-bench compare ...` or " + "`python -m pathable.benchmarks.compare ...` instead." +) def main(argv: Iterable[str] | None = None) -> int: - parser = argparse.ArgumentParser() - parser.add_argument("--baseline", required=True) - parser.add_argument("--candidate", required=True) - parser.add_argument( - "--tolerance", - type=float, - default=0.20, - help="Allowed slowdown (e.g. 0.20 means 20% slower allowed).", - ) - args = parser.parse_args(list(argv) if argv is not None else None) - - baseline = _load(args.baseline) - candidate = _load(args.candidate) - - comparisons, regressions = compare( - baseline=baseline, - candidate=candidate, - tolerance=args.tolerance, - ) - - print("scenario\tbaseline_ops/s\tcandidate_ops/s\tratio") - for c in comparisons: - print( - f"{c.name}\t{c.baseline_ops:.2f}\t{c.candidate_ops:.2f}\t{c.ratio:.3f}" - ) - - if regressions: - print("\nREGRESSIONS:") - for r in regressions: - print( - f"- {r.name}: {r.ratio:.3f}x (baseline {r.baseline_ops:.2f} ops/s, candidate {r.candidate_ops:.2f} ops/s)" - ) - return 1 - - return 0 + warnings.warn(_MESSAGE, DeprecationWarning, stacklevel=2) + return _main(argv) if __name__ == "__main__": diff --git a/tests/unit/test_benchmarks.py b/tests/unit/test_benchmarks.py new file mode 100644 index 0000000..c457b37 --- /dev/null +++ b/tests/unit/test_benchmarks.py @@ -0,0 +1,93 @@ +import json +from pathlib import Path + +import pytest + +from pathable.benchmarks.compare import compare +from pathable.benchmarks.compare import main as compare_main +from pathable.benchmarks.registry import resolve_impl +from pathable.benchmarks.run import run_all +from tests.benchmarks import bench_lookup +from tests.benchmarks import bench_parse +from tests.benchmarks import compare_results + + +def test_resolve_impl_by_dotted_name() -> None: + impl = resolve_impl("pathable.LookupPath") + assert impl.__name__ == "LookupPath" + + +def test_compare_uses_only_overlapping_scenarios() -> None: + baseline = { + "benchmarks": { + "same": {"median_ops_per_sec": 100.0}, + "baseline_only": {"median_ops_per_sec": 100.0}, + } + } + candidate = { + "benchmarks": { + "same": {"median_ops_per_sec": 85.0}, + "candidate_only": {"median_ops_per_sec": 200.0}, + } + } + + result = compare( + baseline=baseline, + candidate=candidate, + tolerance=0.10, + ) + + assert [x.name for x in result.comparisons] == ["same"] + assert [x.name for x in result.regressions] == ["same"] + assert result.baseline_only == ["baseline_only"] + assert result.candidate_only == ["candidate_only"] + + +def test_compare_main_fails_on_no_overlap(tmp_path: Path) -> None: + baseline_path = tmp_path / "baseline.json" + candidate_path = tmp_path / "candidate.json" + + baseline_path.write_text( + json.dumps({"benchmarks": {"a": {"median_ops_per_sec": 1.0}}}), + encoding="utf-8", + ) + candidate_path.write_text( + json.dumps({"benchmarks": {"b": {"median_ops_per_sec": 1.0}}}), + encoding="utf-8", + ) + + code = compare_main( + [ + "--baseline", + str(baseline_path), + "--candidate", + str(candidate_path), + ] + ) + assert code == 1 + + +def test_run_all_can_limit_to_lookup_scenarios() -> None: + payload = run_all( + impl_target="pathable.LookupPath", + quick=True, + repeats=1, + warmup_loops=0, + scenarios=("lookup",), + ) + benchmarks = payload["benchmarks"] + assert benchmarks + assert all(name.startswith("lookup.") for name in benchmarks) + + +@pytest.mark.parametrize( + "module", + [bench_lookup, bench_parse, compare_results], +) +def test_compat_wrapper_emits_deprecation_warning(module: object) -> None: + with pytest.warns(DeprecationWarning): + # Delegate may fail due to missing required args; warning is what matters. + try: + getattr(module, "main")([]) + except SystemExit: + pass