Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ dist/
.coverage.*
htmlcov/
site/
.benchmarks/
.venv/
venv/

Expand Down
Empty file added benchmarks/__init__.py
Empty file.
49 changes: 49 additions & 0 deletions benchmarks/bench_construction.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
"""Benchmarks: ``URLPattern(...)`` construction cost.

Construction is paid once per pattern but is the load-bearing cost for any
caller that does *not* memoise the compiled pattern (e.g. ad-hoc one-shot
URL checks in scripts, or per-request reconstruction in code that has not
been profiled yet). The four pattern shapes here cover the spectrum from
"literal-only, no regex compile" to "multi-component dict with five regex
backings".
"""

from __future__ import annotations

from typing import TYPE_CHECKING

from yarlpattern import URLPattern

if TYPE_CHECKING:
from pytest_benchmark.fixture import BenchmarkFixture


def test_construct_from_pathname_string(
benchmark: BenchmarkFixture,
pattern_source: str,
) -> None:
"""Compile a single-component pattern from its pathname string."""
benchmark(lambda: URLPattern({"pathname": pattern_source}))


def test_construct_from_full_url_string(benchmark: BenchmarkFixture) -> None:
"""Compile a pattern from a full URL string (parses to multi-component)."""
pat = "https://example.com/users/:id(\\d+)/posts/:slug"
benchmark(lambda: URLPattern(pat))


def test_construct_from_multi_component_dict(benchmark: BenchmarkFixture) -> None:
"""Compile MDN's kitchen-sink five-component pattern from a dict literal."""
components = {
"protocol": "http{s}?",
"username": ":user?",
"password": ":pass?",
"hostname": "{:subdomain.}*example.com",
"pathname": "/product/:action*",
}
benchmark(lambda: URLPattern(components))


def test_construct_with_base_url(benchmark: BenchmarkFixture) -> None:
"""Compile a relative-form pattern resolved against a baseURL."""
benchmark(lambda: URLPattern("/users/:id", "https://example.com/"))
55 changes: 55 additions & 0 deletions benchmarks/bench_exec.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
"""Benchmarks: ``URLPattern.exec()`` throughput.

``exec()`` extends ``test()`` by building a :class:`URLPatternResult` —
one dict per component containing the input substring and a ``groups``
sub-dict of named-group captures. That extra allocation is the entire
delta versus :file:`bench_test.py`; comparing the two measurements
reveals the overhead of result construction in isolation.
"""

from __future__ import annotations

from typing import TYPE_CHECKING

if TYPE_CHECKING:
from pytest_benchmark.fixture import BenchmarkFixture

from yarlpattern import URLPattern


def test_exec_hit_pathname_only(
benchmark: BenchmarkFixture,
compiled_patterns: dict[str, URLPattern],
pattern_source: str, # noqa: ARG001 — included only to parametrize over shapes
url_match_str: str,
request: object,
) -> None:
"""exec-hit on each pattern shape; result envelope must be allocated."""
shape = request.node.callspec.id # type: ignore[attr-defined]
pat = compiled_patterns[shape]
benchmark(pat.exec, url_match_str)


def test_exec_miss_returns_none(
benchmark: BenchmarkFixture,
kitchen_sink_pattern: URLPattern,
url_nomatch_str: str,
) -> None:
"""exec-miss returns ``None`` without allocating a result — should match ``test`` miss cost."""
benchmark(kitchen_sink_pattern.exec, url_nomatch_str)


def test_exec_hit_extract_named_groups(
benchmark: BenchmarkFixture,
compiled_patterns: dict[str, URLPattern],
url_match_str: str,
) -> None:
"""exec-hit and immediately read out the captured ``:id`` and ``:slug`` groups."""
pat = compiled_patterns["regex-constrained"]

def _hit_and_extract() -> tuple[str | None, str | None]:
result = pat.exec(url_match_str)
assert result is not None
return result.pathname["groups"].get("id"), result.pathname["groups"].get("slug")

benchmark(_hit_and_extract)
53 changes: 53 additions & 0 deletions benchmarks/bench_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
"""Benchmarks: ``URLPattern.test()`` throughput.

``test()`` is the boolean-only fast path: it short-circuits on the first
failing component and never builds the ``URLPatternResult`` envelope.
The benchmarks cover both the "hit" case (every component matches) and
the "miss" case (a wrong protocol fails fast at the first component).
"""

from __future__ import annotations

from typing import TYPE_CHECKING

if TYPE_CHECKING:
from pytest_benchmark.fixture import BenchmarkFixture

from yarlpattern import URLPattern


def test_test_hit_pathname_only(
benchmark: BenchmarkFixture,
compiled_patterns: dict[str, URLPattern],
pattern_source: str, # noqa: ARG001 — included only to parametrize over shapes
url_match_str: str,
request: object,
) -> None:
"""Match a single-component pattern against the canonical URL string."""
shape = request.node.callspec.id # type: ignore[attr-defined]
pat = compiled_patterns[shape]
benchmark(pat.test, url_match_str)


def test_test_miss_protocol_fail_fast(
benchmark: BenchmarkFixture,
kitchen_sink_pattern: URLPattern,
url_nomatch_str: str,
) -> None:
"""Match-miss with the protocol failing first — exercises short-circuit cost."""
benchmark(kitchen_sink_pattern.test, url_nomatch_str)


def test_test_hit_kitchen_sink_dict(
benchmark: BenchmarkFixture,
kitchen_sink_pattern: URLPattern,
) -> None:
"""Match-hit on a pre-parsed component dict input — skips URL parsing."""
components = {
"protocol": "https",
"username": "foo",
"password": "bar",
"hostname": "sub.example.com",
"pathname": "/product/view",
}
benchmark(kitchen_sink_pattern.test, components)
62 changes: 62 additions & 0 deletions benchmarks/bench_yarl_fast_path.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
"""Benchmarks: the :class:`yarl.URL` input fast path.

When the caller passes an already-parsed :class:`yarl.URL`, yarlpattern
skips ``yarl.URL(url_str)`` construction and reads the URL components
directly off the yarl instance. For ``aiohttp`` / yarl-based applications
(where every request already holds a parsed URL) this avoids re-parsing
the same string on every match.

These benchmarks expose the delta by running the same match through the
string-input path (parse-then-match) and the yarl-input path (match
only) side by side.
"""

from __future__ import annotations

from typing import TYPE_CHECKING

if TYPE_CHECKING:
from pytest_benchmark.fixture import BenchmarkFixture
from yarl import URL

from yarlpattern import URLPattern


def test_test_with_string_input(
benchmark: BenchmarkFixture,
compiled_patterns: dict[str, URLPattern],
url_match_str: str,
) -> None:
"""Baseline: ``test()`` on a string input — includes ``yarl.URL`` parsing."""
pat = compiled_patterns["regex-constrained"]
benchmark(pat.test, url_match_str)


def test_test_with_yarl_input(
benchmark: BenchmarkFixture,
compiled_patterns: dict[str, URLPattern],
url_match_yarl: URL,
) -> None:
"""Fast path: ``test()`` on a pre-built :class:`yarl.URL` — no reparse."""
pat = compiled_patterns["regex-constrained"]
benchmark(pat.test, url_match_yarl)


def test_exec_with_string_input(
benchmark: BenchmarkFixture,
compiled_patterns: dict[str, URLPattern],
url_match_str: str,
) -> None:
"""Baseline: ``exec()`` on a string input — includes ``yarl.URL`` parsing."""
pat = compiled_patterns["regex-constrained"]
benchmark(pat.exec, url_match_str)


def test_exec_with_yarl_input(
benchmark: BenchmarkFixture,
compiled_patterns: dict[str, URLPattern],
url_match_yarl: URL,
) -> None:
"""Fast path: ``exec()`` on a pre-built :class:`yarl.URL` — no reparse."""
pat = compiled_patterns["regex-constrained"]
benchmark(pat.exec, url_match_yarl)
93 changes: 93 additions & 0 deletions benchmarks/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
"""Shared fixtures for the yarlpattern benchmark suite.

The benchmark tree lives outside ``tests/`` so it is *not* picked up by the
default ``pytest`` invocation (see ``[tool.pytest.ini_options].testpaths``
in ``pyproject.toml``). Run with::

just bench
# or
uv run --group bench pytest benchmarks/

These benchmarks measure yarlpattern against itself across the hot paths
that real callers exercise: constructor cost, ``test()``, ``exec()``, and
the ``yarl.URL`` fast path. They deliberately do *not* compare against
other URLPattern implementations — apples-to-apples cross-library
comparison is a different exercise with its own methodological pitfalls.
"""

from __future__ import annotations

import pytest
from yarl import URL

from yarlpattern import URLPattern

# A representative cross-section of pattern shapes callers actually use.
# Picked to cover:
# - literal-only (fastest path; no groups, no regex back-references)
# - one named group with default segment-wildcard
# - one named group with an explicit ECMAScript-style regex constraint
# - the multi-component "kitchen sink" used in the MDN guide
# - wildcard-heavy catch-all routing pattern
PATTERN_DEFINITIONS = {
"literal-pathname": "/foo/bar/baz",
"named-group": "/users/:id",
"regex-constrained": r"/users/:id(\d+)/posts/:slug",
"wildcard-tail": "/api/:version/*",
}


@pytest.fixture(scope="session")
def url_match_str() -> str:
"""A URL string that matches every fixture pattern's pathname expectations."""
return "https://example.com/users/123/posts/hello-world"


@pytest.fixture(scope="session")
def url_match_yarl(url_match_str: str) -> URL:
"""The matching URL pre-parsed into a :class:`yarl.URL` — the fast-path input."""
return URL(url_match_str)


@pytest.fixture(scope="session")
def url_nomatch_str() -> str:
"""A URL that fails pattern-matching early at the protocol or path stage."""
return "ftp://example.com/sessions/abc"


@pytest.fixture(scope="session")
def kitchen_sink_pattern() -> URLPattern:
"""Multi-component pattern from MDN's "Using multiple components" guide.

Held as a session-scoped instance so the construction cost is amortised
across benchmarks that only care about ``test`` / ``exec`` throughput.
"""
return URLPattern(
{
"protocol": "http{s}?",
"username": ":user?",
"password": ":pass?",
"hostname": "{:subdomain.}*example.com",
"pathname": "/product/:action*",
}
)


@pytest.fixture(
scope="session",
params=sorted(PATTERN_DEFINITIONS.keys()),
ids=sorted(PATTERN_DEFINITIONS.keys()),
)
def pattern_source(request: pytest.FixtureRequest) -> str:
"""The raw pathname-only pattern string, parametrized across all shapes."""
return PATTERN_DEFINITIONS[request.param]


@pytest.fixture(scope="session")
def compiled_patterns() -> dict[str, URLPattern]:
"""One pre-compiled :class:`URLPattern` per pattern shape, keyed by id.

Constructors are not free — keeping these in a session-scoped dict lets
``test`` / ``exec`` benchmarks isolate match cost from compile cost.
"""
return {name: URLPattern({"pathname": src}) for name, src in PATTERN_DEFINITIONS.items()}
23 changes: 23 additions & 0 deletions justfile
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,29 @@ cov-open: test-cov
[group('quality')]
check: lint test

[doc('Run the self-benchmark suite under benchmarks/')]
[group('quality')]
bench:
uv run --group bench pytest benchmarks/ \
-o python_files=bench_*.py \
--benchmark-only \
--benchmark-columns=min,mean,median,stddev,ops,rounds

[doc('Run the benchmark suite and save a baseline named ARG (default: HEAD short SHA)')]
[group('quality')]
bench-save name='':
#!/usr/bin/env bash
set -euo pipefail
name="{{ name }}"
if [ -z "$name" ]; then
name=$(git rev-parse --short HEAD)
fi
uv run --group bench pytest benchmarks/ \
-o python_files=bench_*.py \
--benchmark-only \
--benchmark-autosave \
--benchmark-save="$name"

# --- Build ---

[group('build')]
Expand Down
15 changes: 15 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,13 @@ docs = [
dev = [
"pyclean>=3.6.0",
]
# Self-benchmark suite under ``benchmarks/``. Pinned via dependency-group (not
# a published extra) because the suite is for contributors / maintainers; end
# users have no need for ``pytest-benchmark``.
bench = [
"pytest>=9.0.3",
"pytest-benchmark>=5.1.0",
]
maintainer = [
{ include-group = "dev" },
"python-semantic-release>=10.5.3",
Expand Down Expand Up @@ -294,6 +301,14 @@ force-single-line = false
"SLF001", # tests legitimately reach into ``_private`` attributes to
# assert internal wiring (e.g. ``pat._engine is X``).
]
"benchmarks/**/*.py" = [
"S101", # ``assert`` validates fixture invariants inside benchmark bodies
"PLR2004", # magic literals are unavoidable in benchmark payloads
"ANN", # benchmarks don't require full annotations
"D", # benchmarks document at module level, not per-function
"S105", # ``password = ...`` is dict-key naming, not a hardcoded credential
"S106", # same — ``password="bar"`` in kitchen-sink benchmark payloads
]
"scripts/**/*.py" = [
"T201", # print is fine in CLI scripts
"INP001",
Expand Down
Loading
Loading