From 62f91c9ef5865a67e56f6d3b7ea854c2816bb9a2 Mon Sep 17 00:00:00 2001
From: chad-loder <26261238+chad-loder@users.noreply.github.com>
Date: Tue, 12 May 2026 19:47:02 -0700
Subject: [PATCH] test: hand-coded MDN + WHATWG spec example tests
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add two new test files that exercise the canonical examples readers
encounter first when learning URLPattern:

  tests/test_spec_examples.py
    - WHATWG URLPattern Standard §1.2 shop example:
      http{s}?://{:subdomain.}?shop.example/products/:id([0-9]+)#reviews
      3 positive matches + 4 negative matches (missing fragment, port
      disallowed, search disallowed, non-digit id).

  tests/test_mdn_examples.py
    - "Using multiple components": kitchen-sink protocol+username+
      password+hostname+pathname pattern with full and zero-optional
      input matches.
    - "Custom prefix and suffix modifiers": {:subdomain.}*example.com
      hostname wildcard with named-group expansion across 0–3 labels.
    - "Pattern syntax — regex groups": /books/:id(\d+) digit-only
      constraint, plus negative cases for letters, trailing junk,
      empty id, and signed integers.

23 new test cases, all passing against yarlpattern as-is. The WPT
corpus carries the conformance load; these files document the
pedagogical surface so spec / MDN regressions surface as named
failures.

Part of v0.2.0 roadmap.
---
 tests/test_mdn_examples.py  | 142 ++++++++++++++++++++++++++++++++++++
 tests/test_spec_examples.py |  88 ++++++++++++++++++++++
 2 files changed, 230 insertions(+)
 create mode 100644 tests/test_mdn_examples.py
 create mode 100644 tests/test_spec_examples.py

diff --git a/tests/test_mdn_examples.py b/tests/test_mdn_examples.py
new file mode 100644
index 0000000..a048dbf
--- /dev/null
+++ b/tests/test_mdn_examples.py
@@ -0,0 +1,142 @@
+"""Hand-coded tests for the canonical examples on MDN's ``URLPattern`` page.
+
+These mirror the worked examples shown on
+[developer.mozilla.org/en-US/docs/Web/API/URL_Pattern_API](https://developer.mozilla.org/en-US/docs/Web/API/URL_Pattern_API)
+and its companion guides. They are deliberately a small, narrated set —
+the WPT corpus carries the conformance load; this file documents the
+*pedagogical* surface readers actually encounter first.
+"""
+
+from __future__ import annotations
+
+import pytest
+
+from yarlpattern import URLPattern
+
+
+class TestMdnMultipleComponents:
+    """MDN guide: "Using multiple components".
+
+    Constructs a URLPattern from a dict literal covering protocol, username,
+    password, hostname, and pathname components simultaneously, then exec's it
+    against a fully-populated URL.
+    """
+
+    @pytest.fixture(scope="class")
+    def pattern(self) -> URLPattern:
+        return URLPattern(
+            {
+                "protocol": "http{s}?",
+                "username": ":user?",
+                "password": ":pass?",
+                "hostname": "{:subdomain.}*example.com",
+                "pathname": "/product/:action*",
+            }
+        )
+
+    def test_match_extracts_every_component_group(self, pattern: URLPattern) -> None:
+        url = "http://foo:bar@sub.example.com/product/view?q=12345"
+        result = pattern.exec(url)
+        assert result is not None
+        assert result.username["groups"]["user"] == "foo"
+        assert result.password["groups"]["pass"] == "bar"
+        assert result.hostname["groups"]["subdomain"] == "sub"
+        assert result.pathname["groups"]["action"] == "view"
+
+    def test_match_with_no_optional_components_present(self, pattern: URLPattern) -> None:
+        # Per the MDN walkthrough, the optional username/password/subdomain
+        # are absent here and the pathname's ``:action*`` matches zero
+        # segments. Note: the ``*`` modifier in path-to-regexp absorbs its
+        # preceding ``/`` separator, so the zero-match form is ``/product``
+        # — not ``/product/``.
+        url = "https://example.com/product"
+        result = pattern.exec(url)
+        assert result is not None
+        # When an optional named group does not participate in the match,
+        # WHATWG URLPattern omits the key from ``groups`` entirely (rather
+        # than returning ``None``). Use ``.get()`` to express "absent or
+        # empty".
+        assert result.username["groups"].get("user") in (None, "")
+        assert result.password["groups"].get("pass") in (None, "")
+        assert result.hostname["groups"].get("subdomain") in (None, "")
+        assert result.pathname["groups"].get("action") in (None, "")
+
+
+class TestMdnSubdomainWildcard:
+    """MDN guide: "Custom prefix and suffix modifiers".
+
+    ``{:subdomain.}*example.com`` matches any number of dotted subdomain
+    labels, including zero. The named group inside the explicit ``{ ... }``
+    grouping captures whatever subdomain prefix was matched.
+    """
+
+    @pytest.fixture(scope="class")
+    def pattern(self) -> URLPattern:
+        return URLPattern({"hostname": "{:subdomain.}*example.com"})
+
+    @pytest.mark.parametrize(
+        ("hostname", "expected_subdomain"),
+        [
+            ("example.com", None),
+            ("foo.example.com", "foo"),
+            ("foo.bar.example.com", "foo.bar"),
+            ("a.b.c.example.com", "a.b.c"),
+        ],
+    )
+    def test_positive(
+        self,
+        pattern: URLPattern,
+        hostname: str,
+        expected_subdomain: str | None,
+    ) -> None:
+        result = pattern.exec({"hostname": hostname})
+        assert result is not None, f"expected match for {hostname!r}"
+        assert result.hostname["groups"].get("subdomain") == expected_subdomain
+
+    @pytest.mark.parametrize(
+        "hostname",
+        [
+            ".example.com",  # leading dot — empty label
+            "example.org",  # wrong eTLD
+            "example.com.",  # trailing dot
+        ],
+    )
+    def test_negative(self, pattern: URLPattern, hostname: str) -> None:
+        assert not pattern.test({"hostname": hostname}), f"expected no match for {hostname!r}"
+
+
+class TestMdnRegexConstrainedGroup:
+    r"""MDN guide: "Pattern syntax — regex groups".
+
+    The pathname pattern ``/books/:id(\d+)`` constrains the ``:id`` group to
+    one or more decimal digits using an inline regex tail.
+    """
+
+    @pytest.fixture(scope="class")
+    def pattern(self) -> URLPattern:
+        return URLPattern({"pathname": r"/books/:id(\d+)"})
+
+    @pytest.mark.parametrize(
+        ("url", "expected_id"),
+        [
+            ("https://x.example/books/42", "42"),
+            ("https://x.example/books/0", "0"),
+            ("https://x.example/books/9876543210", "9876543210"),
+        ],
+    )
+    def test_positive(self, pattern: URLPattern, url: str, expected_id: str) -> None:
+        result = pattern.exec(url)
+        assert result is not None
+        assert result.pathname["groups"]["id"] == expected_id
+
+    @pytest.mark.parametrize(
+        "url",
+        [
+            "https://x.example/books/abc",  # letters disallowed
+            "https://x.example/books/42a",  # trailing letter
+            "https://x.example/books/",  # empty id rejected by + quantifier
+            "https://x.example/books/-1",  # signs not in [0-9]
+        ],
+    )
+    def test_negative(self, pattern: URLPattern, url: str) -> None:
+        assert pattern.exec(url) is None, f"expected no match: {url!r}"
diff --git a/tests/test_spec_examples.py b/tests/test_spec_examples.py
new file mode 100644
index 0000000..ebdc36a
--- /dev/null
+++ b/tests/test_spec_examples.py
@@ -0,0 +1,88 @@
+"""Hand-coded tests for the canonical examples in the WHATWG URLPattern Standard.
+
+The WPT corpus exercises 469 conformance cases by construction, but those entries
+are terse JSON tuples. The examples *written into the prose* of the
+[WHATWG URLPattern Standard](https://urlpattern.spec.whatwg.org/) — the ones a
+human reads when they first land on the spec — deserve a separate, narrated
+test surface so that:
+
+  1. Regressions on the spec's headline examples surface as named failures.
+  2. Readers can land in this file and see exactly the patterns and inputs
+     the standard authors used to illustrate the feature.
+
+Each example is annotated with its section number and a one-line description.
+"""
+
+from __future__ import annotations
+
+import pytest
+
+from yarlpattern import URLPattern
+
+
+class TestSpecExample12Shop:
+    """WHATWG URLPattern Standard §1.2 example.
+
+    Pattern: ``http{s}?://{:subdomain.}?shop.example/products/:id([0-9]+)#reviews``
+
+    The spec uses this example to illustrate:
+      - optional protocol suffix (``http{s}?``)
+      - optional named hostname segment with trailing dot (``{:subdomain.}?``)
+      - regex-constrained named pathname group (``:id([0-9]+)``)
+      - literal-required hash component (``#reviews``)
+    """
+
+    PATTERN = "http{s}?://{:subdomain.}?shop.example/products/:id([0-9]+)#reviews"
+
+    @pytest.fixture(scope="class")
+    def pattern(self) -> URLPattern:
+        return URLPattern(self.PATTERN)
+
+    @pytest.mark.parametrize(
+        ("url", "subdomain", "id_"),
+        [
+            ("https://shop.example/products/74205#reviews", None, "74205"),
+            ("https://kathryn@voyager.shop.example/products/74656#reviews", "voyager", "74656"),
+            ("http://insecure.shop.example/products/1701#reviews", "insecure", "1701"),
+        ],
+    )
+    def test_positive_matches(
+        self,
+        pattern: URLPattern,
+        url: str,
+        subdomain: str | None,
+        id_: str,
+    ) -> None:
+        result = pattern.exec(url)
+        assert result is not None, f"expected match for {url!r}"
+        assert result.pathname["groups"]["id"] == id_
+        assert result.hostname["groups"].get("subdomain") == subdomain
+
+    @pytest.mark.parametrize(
+        ("url", "reason"),
+        [
+            (
+                "https://shop.example/products/74205",
+                "missing #reviews fragment",
+            ),
+            (
+                "https://shop.example:8443/products/74205#reviews",
+                "explicit port disallowed",
+            ),
+            (
+                "https://shop.example/products/74205?ref=hn#reviews",
+                "search component disallowed",
+            ),
+            (
+                "https://shop.example/products/abc#reviews",
+                "non-digit id rejected by [0-9]+",
+            ),
+        ],
+    )
+    def test_negative_matches(
+        self,
+        pattern: URLPattern,
+        url: str,
+        reason: str,
+    ) -> None:
+        assert pattern.exec(url) is None, f"expected no match ({reason}): {url!r}"