diff --git a/.gitignore b/.gitignore index 3839201..24a96b9 100644 --- a/.gitignore +++ b/.gitignore @@ -16,6 +16,7 @@ dist/ .coverage.* htmlcov/ site/ +.benchmarks/ .venv/ venv/ diff --git a/benchmarks/__init__.py b/benchmarks/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/benchmarks/bench_construction.py b/benchmarks/bench_construction.py new file mode 100644 index 0000000..9b7e5a3 --- /dev/null +++ b/benchmarks/bench_construction.py @@ -0,0 +1,49 @@ +"""Benchmarks: ``URLPattern(...)`` construction cost. + +Construction is paid once per pattern but is the load-bearing cost for any +caller that does *not* memoise the compiled pattern (e.g. ad-hoc one-shot +URL checks in scripts, or per-request reconstruction in code that has not +been profiled yet). The four pattern shapes here cover the spectrum from +"literal-only, no regex compile" to "multi-component dict with five regex +backings". +""" + +from __future__ import annotations + +from typing import TYPE_CHECKING + +from yarlpattern import URLPattern + +if TYPE_CHECKING: + from pytest_benchmark.fixture import BenchmarkFixture + + +def test_construct_from_pathname_string( + benchmark: BenchmarkFixture, + pattern_source: str, +) -> None: + """Compile a single-component pattern from its pathname string.""" + benchmark(lambda: URLPattern({"pathname": pattern_source})) + + +def test_construct_from_full_url_string(benchmark: BenchmarkFixture) -> None: + """Compile a pattern from a full URL string (parses to multi-component).""" + pat = "https://example.com/users/:id(\\d+)/posts/:slug" + benchmark(lambda: URLPattern(pat)) + + +def test_construct_from_multi_component_dict(benchmark: BenchmarkFixture) -> None: + """Compile MDN's kitchen-sink five-component pattern from a dict literal.""" + components = { + "protocol": "http{s}?", + "username": ":user?", + "password": ":pass?", + "hostname": "{:subdomain.}*example.com", + "pathname": "/product/:action*", + } + benchmark(lambda: URLPattern(components)) + + +def test_construct_with_base_url(benchmark: BenchmarkFixture) -> None: + """Compile a relative-form pattern resolved against a baseURL.""" + benchmark(lambda: URLPattern("/users/:id", "https://example.com/")) diff --git a/benchmarks/bench_exec.py b/benchmarks/bench_exec.py new file mode 100644 index 0000000..e8a7d71 --- /dev/null +++ b/benchmarks/bench_exec.py @@ -0,0 +1,55 @@ +"""Benchmarks: ``URLPattern.exec()`` throughput. + +``exec()`` extends ``test()`` by building a :class:`URLPatternResult` — +one dict per component containing the input substring and a ``groups`` +sub-dict of named-group captures. That extra allocation is the entire +delta versus :file:`bench_test.py`; comparing the two measurements +reveals the overhead of result construction in isolation. +""" + +from __future__ import annotations + +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from pytest_benchmark.fixture import BenchmarkFixture + + from yarlpattern import URLPattern + + +def test_exec_hit_pathname_only( + benchmark: BenchmarkFixture, + compiled_patterns: dict[str, URLPattern], + pattern_source: str, # noqa: ARG001 — included only to parametrize over shapes + url_match_str: str, + request: object, +) -> None: + """exec-hit on each pattern shape; result envelope must be allocated.""" + shape = request.node.callspec.id # type: ignore[attr-defined] + pat = compiled_patterns[shape] + benchmark(pat.exec, url_match_str) + + +def test_exec_miss_returns_none( + benchmark: BenchmarkFixture, + kitchen_sink_pattern: URLPattern, + url_nomatch_str: str, +) -> None: + """exec-miss returns ``None`` without allocating a result — should match ``test`` miss cost.""" + benchmark(kitchen_sink_pattern.exec, url_nomatch_str) + + +def test_exec_hit_extract_named_groups( + benchmark: BenchmarkFixture, + compiled_patterns: dict[str, URLPattern], + url_match_str: str, +) -> None: + """exec-hit and immediately read out the captured ``:id`` and ``:slug`` groups.""" + pat = compiled_patterns["regex-constrained"] + + def _hit_and_extract() -> tuple[str | None, str | None]: + result = pat.exec(url_match_str) + assert result is not None + return result.pathname["groups"].get("id"), result.pathname["groups"].get("slug") + + benchmark(_hit_and_extract) diff --git a/benchmarks/bench_test.py b/benchmarks/bench_test.py new file mode 100644 index 0000000..375591e --- /dev/null +++ b/benchmarks/bench_test.py @@ -0,0 +1,53 @@ +"""Benchmarks: ``URLPattern.test()`` throughput. + +``test()`` is the boolean-only fast path: it short-circuits on the first +failing component and never builds the ``URLPatternResult`` envelope. +The benchmarks cover both the "hit" case (every component matches) and +the "miss" case (a wrong protocol fails fast at the first component). +""" + +from __future__ import annotations + +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from pytest_benchmark.fixture import BenchmarkFixture + + from yarlpattern import URLPattern + + +def test_test_hit_pathname_only( + benchmark: BenchmarkFixture, + compiled_patterns: dict[str, URLPattern], + pattern_source: str, # noqa: ARG001 — included only to parametrize over shapes + url_match_str: str, + request: object, +) -> None: + """Match a single-component pattern against the canonical URL string.""" + shape = request.node.callspec.id # type: ignore[attr-defined] + pat = compiled_patterns[shape] + benchmark(pat.test, url_match_str) + + +def test_test_miss_protocol_fail_fast( + benchmark: BenchmarkFixture, + kitchen_sink_pattern: URLPattern, + url_nomatch_str: str, +) -> None: + """Match-miss with the protocol failing first — exercises short-circuit cost.""" + benchmark(kitchen_sink_pattern.test, url_nomatch_str) + + +def test_test_hit_kitchen_sink_dict( + benchmark: BenchmarkFixture, + kitchen_sink_pattern: URLPattern, +) -> None: + """Match-hit on a pre-parsed component dict input — skips URL parsing.""" + components = { + "protocol": "https", + "username": "foo", + "password": "bar", + "hostname": "sub.example.com", + "pathname": "/product/view", + } + benchmark(kitchen_sink_pattern.test, components) diff --git a/benchmarks/bench_yarl_fast_path.py b/benchmarks/bench_yarl_fast_path.py new file mode 100644 index 0000000..9428c75 --- /dev/null +++ b/benchmarks/bench_yarl_fast_path.py @@ -0,0 +1,62 @@ +"""Benchmarks: the :class:`yarl.URL` input fast path. + +When the caller passes an already-parsed :class:`yarl.URL`, yarlpattern +skips ``yarl.URL(url_str)`` construction and reads the URL components +directly off the yarl instance. For ``aiohttp`` / yarl-based applications +(where every request already holds a parsed URL) this avoids re-parsing +the same string on every match. + +These benchmarks expose the delta by running the same match through the +string-input path (parse-then-match) and the yarl-input path (match +only) side by side. +""" + +from __future__ import annotations + +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from pytest_benchmark.fixture import BenchmarkFixture + from yarl import URL + + from yarlpattern import URLPattern + + +def test_test_with_string_input( + benchmark: BenchmarkFixture, + compiled_patterns: dict[str, URLPattern], + url_match_str: str, +) -> None: + """Baseline: ``test()`` on a string input — includes ``yarl.URL`` parsing.""" + pat = compiled_patterns["regex-constrained"] + benchmark(pat.test, url_match_str) + + +def test_test_with_yarl_input( + benchmark: BenchmarkFixture, + compiled_patterns: dict[str, URLPattern], + url_match_yarl: URL, +) -> None: + """Fast path: ``test()`` on a pre-built :class:`yarl.URL` — no reparse.""" + pat = compiled_patterns["regex-constrained"] + benchmark(pat.test, url_match_yarl) + + +def test_exec_with_string_input( + benchmark: BenchmarkFixture, + compiled_patterns: dict[str, URLPattern], + url_match_str: str, +) -> None: + """Baseline: ``exec()`` on a string input — includes ``yarl.URL`` parsing.""" + pat = compiled_patterns["regex-constrained"] + benchmark(pat.exec, url_match_str) + + +def test_exec_with_yarl_input( + benchmark: BenchmarkFixture, + compiled_patterns: dict[str, URLPattern], + url_match_yarl: URL, +) -> None: + """Fast path: ``exec()`` on a pre-built :class:`yarl.URL` — no reparse.""" + pat = compiled_patterns["regex-constrained"] + benchmark(pat.exec, url_match_yarl) diff --git a/benchmarks/conftest.py b/benchmarks/conftest.py new file mode 100644 index 0000000..7a01e49 --- /dev/null +++ b/benchmarks/conftest.py @@ -0,0 +1,93 @@ +"""Shared fixtures for the yarlpattern benchmark suite. + +The benchmark tree lives outside ``tests/`` so it is *not* picked up by the +default ``pytest`` invocation (see ``[tool.pytest.ini_options].testpaths`` +in ``pyproject.toml``). Run with:: + + just bench + # or + uv run --group bench pytest benchmarks/ + +These benchmarks measure yarlpattern against itself across the hot paths +that real callers exercise: constructor cost, ``test()``, ``exec()``, and +the ``yarl.URL`` fast path. They deliberately do *not* compare against +other URLPattern implementations — apples-to-apples cross-library +comparison is a different exercise with its own methodological pitfalls. +""" + +from __future__ import annotations + +import pytest +from yarl import URL + +from yarlpattern import URLPattern + +# A representative cross-section of pattern shapes callers actually use. +# Picked to cover: +# - literal-only (fastest path; no groups, no regex back-references) +# - one named group with default segment-wildcard +# - one named group with an explicit ECMAScript-style regex constraint +# - the multi-component "kitchen sink" used in the MDN guide +# - wildcard-heavy catch-all routing pattern +PATTERN_DEFINITIONS = { + "literal-pathname": "/foo/bar/baz", + "named-group": "/users/:id", + "regex-constrained": r"/users/:id(\d+)/posts/:slug", + "wildcard-tail": "/api/:version/*", +} + + +@pytest.fixture(scope="session") +def url_match_str() -> str: + """A URL string that matches every fixture pattern's pathname expectations.""" + return "https://example.com/users/123/posts/hello-world" + + +@pytest.fixture(scope="session") +def url_match_yarl(url_match_str: str) -> URL: + """The matching URL pre-parsed into a :class:`yarl.URL` — the fast-path input.""" + return URL(url_match_str) + + +@pytest.fixture(scope="session") +def url_nomatch_str() -> str: + """A URL that fails pattern-matching early at the protocol or path stage.""" + return "ftp://example.com/sessions/abc" + + +@pytest.fixture(scope="session") +def kitchen_sink_pattern() -> URLPattern: + """Multi-component pattern from MDN's "Using multiple components" guide. + + Held as a session-scoped instance so the construction cost is amortised + across benchmarks that only care about ``test`` / ``exec`` throughput. + """ + return URLPattern( + { + "protocol": "http{s}?", + "username": ":user?", + "password": ":pass?", + "hostname": "{:subdomain.}*example.com", + "pathname": "/product/:action*", + } + ) + + +@pytest.fixture( + scope="session", + params=sorted(PATTERN_DEFINITIONS.keys()), + ids=sorted(PATTERN_DEFINITIONS.keys()), +) +def pattern_source(request: pytest.FixtureRequest) -> str: + """The raw pathname-only pattern string, parametrized across all shapes.""" + return PATTERN_DEFINITIONS[request.param] + + +@pytest.fixture(scope="session") +def compiled_patterns() -> dict[str, URLPattern]: + """One pre-compiled :class:`URLPattern` per pattern shape, keyed by id. + + Constructors are not free — keeping these in a session-scoped dict lets + ``test`` / ``exec`` benchmarks isolate match cost from compile cost. + """ + return {name: URLPattern({"pathname": src}) for name, src in PATTERN_DEFINITIONS.items()} diff --git a/justfile b/justfile index 0a37611..9331737 100644 --- a/justfile +++ b/justfile @@ -126,6 +126,29 @@ cov-open: test-cov [group('quality')] check: lint test +[doc('Run the self-benchmark suite under benchmarks/')] +[group('quality')] +bench: + uv run --group bench pytest benchmarks/ \ + -o python_files=bench_*.py \ + --benchmark-only \ + --benchmark-columns=min,mean,median,stddev,ops,rounds + +[doc('Run the benchmark suite and save a baseline named ARG (default: HEAD short SHA)')] +[group('quality')] +bench-save name='': + #!/usr/bin/env bash + set -euo pipefail + name="{{ name }}" + if [ -z "$name" ]; then + name=$(git rev-parse --short HEAD) + fi + uv run --group bench pytest benchmarks/ \ + -o python_files=bench_*.py \ + --benchmark-only \ + --benchmark-autosave \ + --benchmark-save="$name" + # --- Build --- [group('build')] diff --git a/pyproject.toml b/pyproject.toml index 22b7927..a38861a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -114,6 +114,13 @@ docs = [ dev = [ "pyclean>=3.6.0", ] +# Self-benchmark suite under ``benchmarks/``. Pinned via dependency-group (not +# a published extra) because the suite is for contributors / maintainers; end +# users have no need for ``pytest-benchmark``. +bench = [ + "pytest>=9.0.3", + "pytest-benchmark>=5.1.0", +] maintainer = [ { include-group = "dev" }, "python-semantic-release>=10.5.3", @@ -294,6 +301,14 @@ force-single-line = false "SLF001", # tests legitimately reach into ``_private`` attributes to # assert internal wiring (e.g. ``pat._engine is X``). ] +"benchmarks/**/*.py" = [ + "S101", # ``assert`` validates fixture invariants inside benchmark bodies + "PLR2004", # magic literals are unavoidable in benchmark payloads + "ANN", # benchmarks don't require full annotations + "D", # benchmarks document at module level, not per-function + "S105", # ``password = ...`` is dict-key naming, not a hardcoded credential + "S106", # same — ``password="bar"`` in kitchen-sink benchmark payloads +] "scripts/**/*.py" = [ "T201", # print is fine in CLI scripts "INP001", diff --git a/uv.lock b/uv.lock index 2bcc736..a466eb1 100644 --- a/uv.lock +++ b/uv.lock @@ -1476,6 +1476,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/f6/f0/10642828a8dfb741e5f3fbaac830550a518a775c7fff6f04a007259b0548/py-1.11.0-py2.py3-none-any.whl", hash = "sha256:607c53218732647dff4acdfcd50cb62615cedf612e72d1724fb1a0cc6405b378", size = 98708, upload-time = "2021-11-04T17:17:00.152Z" }, ] +[[package]] +name = "py-cpuinfo" +version = "9.0.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/37/a8/d832f7293ebb21690860d2e01d8115e5ff6f2ae8bbdc953f0eb0fa4bd2c7/py-cpuinfo-9.0.0.tar.gz", hash = "sha256:3cdbbf3fac90dc6f118bfd64384f309edeadd902d7c8fb17f02ffa1fc3f49690", size = 104716, upload-time = "2022-10-25T20:38:06.303Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e0/a9/023730ba63db1e494a271cb018dcd361bd2c917ba7004c3e49d5daf795a2/py_cpuinfo-9.0.0-py3-none-any.whl", hash = "sha256:859625bc251f64e21f077d099d4162689c762b5d6a4c3c97553d56241c9674d5", size = 22335, upload-time = "2022-10-25T20:38:27.636Z" }, +] + [[package]] name = "pyclean" version = "3.6.0" @@ -1663,6 +1672,19 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/d4/24/a372aaf5c9b7208e7112038812994107bc65a84cd00e0354a88c2c77a617/pytest-9.0.3-py3-none-any.whl", hash = "sha256:2c5efc453d45394fdd706ade797c0a81091eccd1d6e4bccfcd476e2b8e0ab5d9", size = 375249, upload-time = "2026-04-07T17:16:16.13Z" }, ] +[[package]] +name = "pytest-benchmark" +version = "5.2.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "py-cpuinfo" }, + { name = "pytest" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/24/34/9f732b76456d64faffbef6232f1f9dbec7a7c4999ff46282fa418bd1af66/pytest_benchmark-5.2.3.tar.gz", hash = "sha256:deb7317998a23c650fd4ff76e1230066a76cb45dcece0aca5607143c619e7779", size = 341340, upload-time = "2025-11-09T18:48:43.215Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/33/29/e756e715a48959f1c0045342088d7ca9762a2f509b945f362a316e9412b7/pytest_benchmark-5.2.3-py3-none-any.whl", hash = "sha256:bc839726ad20e99aaa0d11a127445457b4219bdb9e80a1afc4b51da7f96b0803", size = 45255, upload-time = "2025-11-09T18:48:39.765Z" }, +] + [[package]] name = "pytest-cov" version = "7.1.0" @@ -2678,6 +2700,10 @@ regex = [ ] [package.dev-dependencies] +bench = [ + { name = "pytest" }, + { name = "pytest-benchmark" }, +] dev = [ { name = "pyclean" }, ] @@ -2722,6 +2748,10 @@ requires-dist = [ provides-extras = ["regex"] [package.metadata.requires-dev] +bench = [ + { name = "pytest", specifier = ">=9.0.3" }, + { name = "pytest-benchmark", specifier = ">=5.1.0" }, +] dev = [{ name = "pyclean", specifier = ">=3.6.0" }] docs = [ { name = "mkdocs-code-validator", specifier = ">=0.2.1" },