Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 36 additions & 1 deletion scripts/src/scverse_template_scripts/cruft_prs.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import json
import math
import os
import re
import sys
from collections.abc import Iterable
from dataclasses import KW_ONLY, InitVar, dataclass, field
Expand Down Expand Up @@ -72,6 +73,39 @@
]


def _escape_github_mentions(text: str) -> str:
"""Escape GitHub @mentions with backticks to prevent notifications.

Wraps ``@username`` patterns in backticks so that GitHub doesn't treat them as
real mentions when the release notes are embedded in template-update PRs.
Otherwise every contributor named in the release notes would be subscribed to
the ~150 template-update PRs that are opened on every release.

Already-escaped mentions and email addresses are left unchanged.

Note
----
This is a simple regex that comes with certain limitations,
e.g., a mention that sits *inside* an inline code span but is preceded by whitespace
(e.g. ``\\`see @bar here\\```) would be re-escaped incorrectly.
This does not occur in GitHub's auto-generated release notes (a flat bullet list of `… by @user in <url>`).

At the time of writing, we couldn't identify a library providing a markdown parser
that reliably identifies github usernames.
"""
# A GitHub @mention, e.g. `@grst`. The username pattern matches GitHub's own rules:
# alphanumeric or single non-leading/non-trailing/non-consecutive hyphens, max 39 chars.
# See https://github.com/shinnn/github-username-regex.
# The negative lookbehind skips email addresses (e.g. `bot@example.com`) and
# already-escaped mentions (e.g. `` `@grst` ``).
github_username_regex = re.compile(
r"(?<![`\w])@([a-z\d](?:[a-z\d]|-(?=[a-z\d])){0,38})",
re.IGNORECASE,
)

return github_username_regex.sub(r"`@\1`", text)


@dataclass
class GitHubConnection:
"""API connection to a GitHub user (e.g. scverse-bot)"""
Expand Down Expand Up @@ -138,10 +172,11 @@ def namespaced_head(self) -> str:

@property
def body(self) -> str:
return PR_BODY_TEMPLATE.format(
body = PR_BODY_TEMPLATE.format(
release=self.release,
template_usage="https://cookiecutter-scverse-instance.readthedocs.io/en/latest/template_usage.html",
)
return _escape_github_mentions(body)

def matches_prefix(self, pr: PullRequest) -> bool:
"""Check if `pr` is either a current or previous template update PR by matching the branch name"""
Expand Down
54 changes: 54 additions & 0 deletions scripts/tests/test_cruft.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
_apply_update,
_clone_and_prepare_repo,
_commit_update,
_escape_github_mentions,
_get_cruft_config_from_upstream,
get_repo_urls,
get_template_release,
Expand Down Expand Up @@ -143,3 +144,56 @@ def test_commit_update(clone: Repo, exclude_files: list[str], expected_untracked

def test_commit_update_no_files(clone: Repo) -> None:
assert _commit_update(clone, commit_msg="foo", commit_author="scverse-bot") is False


@pytest.mark.parametrize(
("input_text", "expected"),
[
# Basic mention gets escaped
("by @grst in", "by `@grst` in"),
# Multiple mentions get escaped
("@alice and @bob", "`@alice` and `@bob`"),
# Already-escaped mention stays unchanged
("`@grst`", "`@grst`"),
# Email address stays unchanged
("user@example.com", "user@example.com"),
# Mention with hyphenated username
("by @some-user in", "by `@some-user` in"),
# Mention at start of line
("@grst made changes", "`@grst` made changes"),
# No mentions
("no mentions here", "no mentions here"),
# Single char username
("@a contributed", "`@a` contributed"),
# Realistic release notes
(
"* Fix bug by @grst in https://github.com/scverse/cookiecutter-scverse/pull/1\n"
"* Add feature by @some-user in https://github.com/scverse/cookiecutter-scverse/pull/2",
"* Fix bug by `@grst` in https://github.com/scverse/cookiecutter-scverse/pull/1\n"
"* Add feature by `@some-user` in https://github.com/scverse/cookiecutter-scverse/pull/2",
),
# Bot email should not be escaped
("108668866+scverse-bot@users.noreply.github.com", "108668866+scverse-bot@users.noreply.github.com"),
# Trailing hyphen is not part of a valid username
("ping @user- now", "ping `@user`- now"),
# Consecutive hyphens are not allowed: only the valid prefix is matched
("@a--b", "`@a`--b"),
# Username is capped at 39 characters; the 40th char is left outside the mention
(f"@{'a' * 40}", f"`@{'a' * 39}`a"),
],
)
def test_escape_github_mentions(input_text: str, expected: str) -> None:
assert _escape_github_mentions(input_text) == expected


@pytest.mark.xfail(
reason="regex approach has no github-flavored markdown context; mentions inside code spans are wrongly escaped",
strict=True,
)
def test_escape_github_mentions_inside_code_span() -> None:
"""A mention inside an inline code span should be left unchanged.

This is not handled by the regex approach (no full GFM parse), but it does not occur
in GitHub's auto-generated release notes. See ``_escape_github_mentions``.
"""
assert _escape_github_mentions("`see @bar here`") == "`see @bar here`"
Loading