From 62f91c9ef5865a67e56f6d3b7ea854c2816bb9a2 Mon Sep 17 00:00:00 2001 From: chad-loder <26261238+chad-loder@users.noreply.github.com> Date: Tue, 12 May 2026 19:47:02 -0700 Subject: [PATCH] test: hand-coded MDN + WHATWG spec example tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add two new test files that exercise the canonical examples readers encounter first when learning URLPattern: tests/test_spec_examples.py - WHATWG URLPattern Standard §1.2 shop example: http{s}?://{:subdomain.}?shop.example/products/:id([0-9]+)#reviews 3 positive matches + 4 negative matches (missing fragment, port disallowed, search disallowed, non-digit id). tests/test_mdn_examples.py - "Using multiple components": kitchen-sink protocol+username+ password+hostname+pathname pattern with full and zero-optional input matches. - "Custom prefix and suffix modifiers": {:subdomain.}*example.com hostname wildcard with named-group expansion across 0–3 labels. - "Pattern syntax — regex groups": /books/:id(\d+) digit-only constraint, plus negative cases for letters, trailing junk, empty id, and signed integers. 23 new test cases, all passing against yarlpattern as-is. The WPT corpus carries the conformance load; these files document the pedagogical surface so spec / MDN regressions surface as named failures. Part of v0.2.0 roadmap. --- tests/test_mdn_examples.py | 142 ++++++++++++++++++++++++++++++++++++ tests/test_spec_examples.py | 88 ++++++++++++++++++++++ 2 files changed, 230 insertions(+) create mode 100644 tests/test_mdn_examples.py create mode 100644 tests/test_spec_examples.py diff --git a/tests/test_mdn_examples.py b/tests/test_mdn_examples.py new file mode 100644 index 0000000..a048dbf --- /dev/null +++ b/tests/test_mdn_examples.py @@ -0,0 +1,142 @@ +"""Hand-coded tests for the canonical examples on MDN's ``URLPattern`` page. + +These mirror the worked examples shown on +[developer.mozilla.org/en-US/docs/Web/API/URL_Pattern_API](https://developer.mozilla.org/en-US/docs/Web/API/URL_Pattern_API) +and its companion guides. They are deliberately a small, narrated set — +the WPT corpus carries the conformance load; this file documents the +*pedagogical* surface readers actually encounter first. +""" + +from __future__ import annotations + +import pytest + +from yarlpattern import URLPattern + + +class TestMdnMultipleComponents: + """MDN guide: "Using multiple components". + + Constructs a URLPattern from a dict literal covering protocol, username, + password, hostname, and pathname components simultaneously, then exec's it + against a fully-populated URL. + """ + + @pytest.fixture(scope="class") + def pattern(self) -> URLPattern: + return URLPattern( + { + "protocol": "http{s}?", + "username": ":user?", + "password": ":pass?", + "hostname": "{:subdomain.}*example.com", + "pathname": "/product/:action*", + } + ) + + def test_match_extracts_every_component_group(self, pattern: URLPattern) -> None: + url = "http://foo:bar@sub.example.com/product/view?q=12345" + result = pattern.exec(url) + assert result is not None + assert result.username["groups"]["user"] == "foo" + assert result.password["groups"]["pass"] == "bar" + assert result.hostname["groups"]["subdomain"] == "sub" + assert result.pathname["groups"]["action"] == "view" + + def test_match_with_no_optional_components_present(self, pattern: URLPattern) -> None: + # Per the MDN walkthrough, the optional username/password/subdomain + # are absent here and the pathname's ``:action*`` matches zero + # segments. Note: the ``*`` modifier in path-to-regexp absorbs its + # preceding ``/`` separator, so the zero-match form is ``/product`` + # — not ``/product/``. + url = "https://example.com/product" + result = pattern.exec(url) + assert result is not None + # When an optional named group does not participate in the match, + # WHATWG URLPattern omits the key from ``groups`` entirely (rather + # than returning ``None``). Use ``.get()`` to express "absent or + # empty". + assert result.username["groups"].get("user") in (None, "") + assert result.password["groups"].get("pass") in (None, "") + assert result.hostname["groups"].get("subdomain") in (None, "") + assert result.pathname["groups"].get("action") in (None, "") + + +class TestMdnSubdomainWildcard: + """MDN guide: "Custom prefix and suffix modifiers". + + ``{:subdomain.}*example.com`` matches any number of dotted subdomain + labels, including zero. The named group inside the explicit ``{ ... }`` + grouping captures whatever subdomain prefix was matched. + """ + + @pytest.fixture(scope="class") + def pattern(self) -> URLPattern: + return URLPattern({"hostname": "{:subdomain.}*example.com"}) + + @pytest.mark.parametrize( + ("hostname", "expected_subdomain"), + [ + ("example.com", None), + ("foo.example.com", "foo"), + ("foo.bar.example.com", "foo.bar"), + ("a.b.c.example.com", "a.b.c"), + ], + ) + def test_positive( + self, + pattern: URLPattern, + hostname: str, + expected_subdomain: str | None, + ) -> None: + result = pattern.exec({"hostname": hostname}) + assert result is not None, f"expected match for {hostname!r}" + assert result.hostname["groups"].get("subdomain") == expected_subdomain + + @pytest.mark.parametrize( + "hostname", + [ + ".example.com", # leading dot — empty label + "example.org", # wrong eTLD + "example.com.", # trailing dot + ], + ) + def test_negative(self, pattern: URLPattern, hostname: str) -> None: + assert not pattern.test({"hostname": hostname}), f"expected no match for {hostname!r}" + + +class TestMdnRegexConstrainedGroup: + r"""MDN guide: "Pattern syntax — regex groups". + + The pathname pattern ``/books/:id(\d+)`` constrains the ``:id`` group to + one or more decimal digits using an inline regex tail. + """ + + @pytest.fixture(scope="class") + def pattern(self) -> URLPattern: + return URLPattern({"pathname": r"/books/:id(\d+)"}) + + @pytest.mark.parametrize( + ("url", "expected_id"), + [ + ("https://x.example/books/42", "42"), + ("https://x.example/books/0", "0"), + ("https://x.example/books/9876543210", "9876543210"), + ], + ) + def test_positive(self, pattern: URLPattern, url: str, expected_id: str) -> None: + result = pattern.exec(url) + assert result is not None + assert result.pathname["groups"]["id"] == expected_id + + @pytest.mark.parametrize( + "url", + [ + "https://x.example/books/abc", # letters disallowed + "https://x.example/books/42a", # trailing letter + "https://x.example/books/", # empty id rejected by + quantifier + "https://x.example/books/-1", # signs not in [0-9] + ], + ) + def test_negative(self, pattern: URLPattern, url: str) -> None: + assert pattern.exec(url) is None, f"expected no match: {url!r}" diff --git a/tests/test_spec_examples.py b/tests/test_spec_examples.py new file mode 100644 index 0000000..ebdc36a --- /dev/null +++ b/tests/test_spec_examples.py @@ -0,0 +1,88 @@ +"""Hand-coded tests for the canonical examples in the WHATWG URLPattern Standard. + +The WPT corpus exercises 469 conformance cases by construction, but those entries +are terse JSON tuples. The examples *written into the prose* of the +[WHATWG URLPattern Standard](https://urlpattern.spec.whatwg.org/) — the ones a +human reads when they first land on the spec — deserve a separate, narrated +test surface so that: + + 1. Regressions on the spec's headline examples surface as named failures. + 2. Readers can land in this file and see exactly the patterns and inputs + the standard authors used to illustrate the feature. + +Each example is annotated with its section number and a one-line description. +""" + +from __future__ import annotations + +import pytest + +from yarlpattern import URLPattern + + +class TestSpecExample12Shop: + """WHATWG URLPattern Standard §1.2 example. + + Pattern: ``http{s}?://{:subdomain.}?shop.example/products/:id([0-9]+)#reviews`` + + The spec uses this example to illustrate: + - optional protocol suffix (``http{s}?``) + - optional named hostname segment with trailing dot (``{:subdomain.}?``) + - regex-constrained named pathname group (``:id([0-9]+)``) + - literal-required hash component (``#reviews``) + """ + + PATTERN = "http{s}?://{:subdomain.}?shop.example/products/:id([0-9]+)#reviews" + + @pytest.fixture(scope="class") + def pattern(self) -> URLPattern: + return URLPattern(self.PATTERN) + + @pytest.mark.parametrize( + ("url", "subdomain", "id_"), + [ + ("https://shop.example/products/74205#reviews", None, "74205"), + ("https://kathryn@voyager.shop.example/products/74656#reviews", "voyager", "74656"), + ("http://insecure.shop.example/products/1701#reviews", "insecure", "1701"), + ], + ) + def test_positive_matches( + self, + pattern: URLPattern, + url: str, + subdomain: str | None, + id_: str, + ) -> None: + result = pattern.exec(url) + assert result is not None, f"expected match for {url!r}" + assert result.pathname["groups"]["id"] == id_ + assert result.hostname["groups"].get("subdomain") == subdomain + + @pytest.mark.parametrize( + ("url", "reason"), + [ + ( + "https://shop.example/products/74205", + "missing #reviews fragment", + ), + ( + "https://shop.example:8443/products/74205#reviews", + "explicit port disallowed", + ), + ( + "https://shop.example/products/74205?ref=hn#reviews", + "search component disallowed", + ), + ( + "https://shop.example/products/abc#reviews", + "non-digit id rejected by [0-9]+", + ), + ], + ) + def test_negative_matches( + self, + pattern: URLPattern, + url: str, + reason: str, + ) -> None: + assert pattern.exec(url) is None, f"expected no match ({reason}): {url!r}"