From 580722b5d804a345a63c616d217ace6633dbe4da Mon Sep 17 00:00:00 2001 From: Jarek Potiuk Date: Mon, 30 Mar 2026 05:54:05 +0200 Subject: [PATCH 1/2] allowlist-check: print gh command to create allowlist PR MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When the check finds action refs not on the allowlist, print a ready-to-run shell script that forks apache/infrastructure-actions, appends wildcard entries to actions.yml, and opens a pull request — all via the gh CLI with no manual file editing required. Also add verbose output showing each action ref being checked, its status (allowed/not allowed), and the reason (trusted owner, matches allowlist, or not on allowlist). --- allowlist-check/check_asf_allowlist.py | 78 +++++++++++++- allowlist-check/test_check_asf_allowlist.py | 114 ++++++++++++++++++++ 2 files changed, 191 insertions(+), 1 deletion(-) diff --git a/allowlist-check/check_asf_allowlist.py b/allowlist-check/check_asf_allowlist.py index f0c02c9a..9a6df827 100644 --- a/allowlist-check/check_asf_allowlist.py +++ b/allowlist-check/check_asf_allowlist.py @@ -32,6 +32,7 @@ import fnmatch import glob import os +import shlex import sys from typing import Any, Generator @@ -148,6 +149,63 @@ def is_allowed(action_ref: str, allowlist: list[str]) -> bool: return any(fnmatch.fnmatch(action_ref, pattern) for pattern in allowlist) +def build_gh_pr_command(missing_refs: list[str], repo_name: str) -> str: + """Build a shell command that creates a PR adding missing actions to the allowlist. + + The generated script forks ``apache/infrastructure-actions``, appends + wildcard entries to ``actions.yml``, and opens a pull request — all via + the ``gh`` CLI with no manual file editing required. + + Args: + missing_refs: Action refs not on the allowlist (e.g. ``["owner/act@sha"]``). + repo_name: Value of ``$GITHUB_REPOSITORY`` (may be empty). + + Returns: + str: A copy-pasteable shell script. + """ + action_names = sorted({ref.split("@")[0] for ref in missing_refs}) + + # Branch name from first action, sanitised for git + sanitized = action_names[0].replace("/", "-") + if len(action_names) > 1: + sanitized += f"-and-{len(action_names) - 1}-more" + branch = f"allowlist-add-{sanitized}" + + # YAML entries to append (wildcard — maintainers can pin later) + yaml_lines: list[str] = [] + for name in action_names: + yaml_lines.append(f"{name}:") + yaml_lines.append(" '*':") + yaml_lines.append(" keep: true") + yaml_block = "\n".join(yaml_lines) + + summary = ", ".join(action_names) + title = f"Add {summary} to the GitHub Actions allowlist" + + body_lines = ["Add the following action(s) to the allowlist:", ""] + for name in action_names: + body_lines.append(f"- `{name}`") + if repo_name: + body_lines.extend(["", f"Needed by: `{repo_name}`"]) + body = "\n".join(body_lines) + + return ( + f"( set -e; _d=$(mktemp -d); trap 'rm -rf \"$_d\"' EXIT; cd \"$_d\"\n" + f" gh repo clone apache/infrastructure-actions . -- --depth=1\n" + f" gh repo fork --remote\n" + f" git checkout -b {shlex.quote(branch)}\n" + f" cat >> actions.yml << 'ALLOWLIST_YAML'\n" + f"{yaml_block}\n" + f"ALLOWLIST_YAML\n" + f" git add actions.yml\n" + f" git commit -m {shlex.quote(f'Add {summary} to allowlist')}\n" + f" git push -u origin {shlex.quote(branch)}\n" + f" gh pr create --repo apache/infrastructure-actions" + f" --title {shlex.quote(title)}" + f" --body {shlex.quote(body)} )\n" + ) + + def main(): if len(sys.argv) != 2: print(f"Usage: {sys.argv[0]} ", file=sys.stderr) @@ -158,9 +216,21 @@ def main(): scan_glob = os.environ.get("GITHUB_YAML_GLOB", DEFAULT_GITHUB_YAML_GLOB) action_refs = collect_action_refs(scan_glob) + print(f"Checking {len(action_refs)} unique action ref(s) against the ASF allowlist:\n") violations = [] for action_ref, filepaths in sorted(action_refs.items()): - if not is_allowed(action_ref, allowlist): + allowed = is_allowed(action_ref, allowlist) + owner = action_ref.split("/")[0] + if owner in TRUSTED_OWNERS: + reason = f"trusted owner ({owner})" + elif allowed: + reason = "matches allowlist" + else: + reason = "NOT ON ALLOWLIST" + status = "✅" if allowed else "❌" + files_str = ", ".join(filepaths) + print(f" {status} {action_ref} — {reason} ({files_str})") + if not allowed: for filepath in filepaths: violations.append((filepath, action_ref)) @@ -175,6 +245,12 @@ def main(): " the action or version to the allowlist:" " https://github.com/apache/infrastructure-actions#adding-a-new-action-to-the-allow-list" ) + + missing_refs = sorted({ref for _, ref in violations}) + repo_name = os.environ.get("GITHUB_REPOSITORY", "") + script = build_gh_pr_command(missing_refs, repo_name) + print(f"\n::notice::You can create a PR to add the missing entries by running:\n{script}") + sys.exit(1) else: print(f"All {len(action_refs)} unique action refs are on the ASF allowlist") diff --git a/allowlist-check/test_check_asf_allowlist.py b/allowlist-check/test_check_asf_allowlist.py index 03754133..13ec3cdd 100644 --- a/allowlist-check/test_check_asf_allowlist.py +++ b/allowlist-check/test_check_asf_allowlist.py @@ -20,12 +20,15 @@ import tempfile import textwrap import unittest +from unittest.mock import patch from check_asf_allowlist import ( + build_gh_pr_command, collect_action_refs, find_action_refs, is_allowed, load_allowlist, + main, ) @@ -303,5 +306,116 @@ def test_no_matching_files(self): self.assertEqual(refs, {}) +class TestBuildGhPrCommand(unittest.TestCase): + """Tests for the generated gh PR command.""" + + def test_single_action(self): + script = build_gh_pr_command( + ["evil-org/evil-action@abc123"], "apache/test-repo" + ) + self.assertIn("gh repo clone apache/infrastructure-actions", script) + self.assertIn("gh repo fork --remote", script) + self.assertIn("allowlist-add-evil-org-evil-action", script) + self.assertIn("evil-org/evil-action:", script) + self.assertIn(" '*':", script) + self.assertIn(" keep: true", script) + self.assertIn("gh pr create --repo apache/infrastructure-actions", script) + self.assertIn("apache/test-repo", script) + + def test_multiple_actions(self): + script = build_gh_pr_command( + ["b-org/b-action@sha1", "a-org/a-action@sha2"], "" + ) + # Actions should be sorted + self.assertIn("a-org/a-action:", script) + self.assertIn("b-org/b-action:", script) + # Branch name mentions first action + "and more" + self.assertIn("allowlist-add-a-org-a-action-and-1-more", script) + # No repo reference when GITHUB_REPOSITORY is empty + self.assertNotIn("Needed by:", script) + + def test_deduplicates_same_action_different_shas(self): + script = build_gh_pr_command( + ["org/action@sha1", "org/action@sha2"], "" + ) + # Should appear only once in the YAML block + self.assertEqual(script.count("org/action:"), 1) + + +class TestMainGhPrCommand(unittest.TestCase): + """Tests that main() prints a gh PR command on violations.""" + + def setUp(self): + self.tmpdir = tempfile.mkdtemp() + self.github_dir = os.path.join(self.tmpdir, ".github", "workflows") + os.makedirs(self.github_dir) + + filepath = os.path.join(self.github_dir, "ci.yml") + with open(filepath, "w") as f: + f.write( + textwrap.dedent( + """\ + name: CI + on: push + jobs: + build: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: evil-org/evil-action@abc123 + """ + ) + ) + + self.allowlist_path = os.path.join(self.tmpdir, "allowlist.yml") + with open(self.allowlist_path, "w") as f: + f.write("") + + def tearDown(self): + shutil.rmtree(self.tmpdir) + + @patch.dict(os.environ, {"GITHUB_REPOSITORY": "apache/test-repo"}) + def test_main_prints_pr_command(self): + scan_glob = os.path.join(self.tmpdir, ".github/**/*.yml") + with ( + patch.dict(os.environ, {"GITHUB_YAML_GLOB": scan_glob}), + patch("sys.argv", ["check_asf_allowlist.py", self.allowlist_path]), + patch("sys.stdout") as mock_stdout, + self.assertRaises(SystemExit) as cm, + ): + main() + + self.assertEqual(cm.exception.code, 1) + output = "".join( + call.args[0] for call in mock_stdout.write.call_args_list + ) + self.assertIn("gh pr create --repo apache/infrastructure-actions", output) + self.assertIn("evil-org/evil-action", output) + self.assertIn("apache/test-repo", output) + + @patch.dict(os.environ, {"GITHUB_REPOSITORY": "apache/test-repo"}) + def test_main_prints_verbose_check_output(self): + scan_glob = os.path.join(self.tmpdir, ".github/**/*.yml") + with ( + patch.dict(os.environ, {"GITHUB_YAML_GLOB": scan_glob}), + patch("sys.argv", ["check_asf_allowlist.py", self.allowlist_path]), + patch("sys.stdout") as mock_stdout, + self.assertRaises(SystemExit), + ): + main() + + output = "".join( + call.args[0] for call in mock_stdout.write.call_args_list + ) + # Trusted action should show as allowed with reason + self.assertIn("actions/checkout@v4", output) + self.assertIn("trusted owner", output) + # Violation should show as not allowed + self.assertIn("evil-org/evil-action@abc123", output) + self.assertIn("NOT ON ALLOWLIST", output) + # Header line + self.assertIn("Checking 2 unique action ref(s)", output) + + if __name__ == "__main__": unittest.main() From d4c18fd2e03fd1879819e736942250b67fad741b Mon Sep 17 00:00:00 2001 From: Jarek Potiuk Date: Sat, 4 Apr 2026 11:37:46 +0200 Subject: [PATCH 2/2] fixup! allowlist-check: print gh command to create allowlist PR fixup! allowlist-check: print gh command to create allowlist PR - Use pinned versions instead of wildcards in generated entries - Insert entries alphabetically via separate insert_actions.py script - Download insert_actions.py from raw GitHub instead of inlining - Push to fork and open PR from fork - Generate one PR command per action, encourage one PR per action --- allowlist-check/check_asf_allowlist.py | 77 +++++++++--------- allowlist-check/insert_actions.py | 74 +++++++++++++++++ allowlist-check/test_check_asf_allowlist.py | 88 ++++++++++++++++----- 3 files changed, 184 insertions(+), 55 deletions(-) create mode 100644 allowlist-check/insert_actions.py diff --git a/allowlist-check/check_asf_allowlist.py b/allowlist-check/check_asf_allowlist.py index 9a6df827..22201633 100644 --- a/allowlist-check/check_asf_allowlist.py +++ b/allowlist-check/check_asf_allowlist.py @@ -149,58 +149,49 @@ def is_allowed(action_ref: str, allowlist: list[str]) -> bool: return any(fnmatch.fnmatch(action_ref, pattern) for pattern in allowlist) -def build_gh_pr_command(missing_refs: list[str], repo_name: str) -> str: - """Build a shell command that creates a PR adding missing actions to the allowlist. +def build_gh_pr_command(action_name: str, refs: list[str], repo_name: str) -> str: + """Build a shell command that creates a PR adding one action to the allowlist. - The generated script forks ``apache/infrastructure-actions``, appends - wildcard entries to ``actions.yml``, and opens a pull request — all via - the ``gh`` CLI with no manual file editing required. + The generated script forks ``apache/infrastructure-actions``, inserts + pinned version entries into ``actions.yml`` in alphabetical order, and + opens a pull request — all via the ``gh`` CLI with no manual file editing + required. Args: - missing_refs: Action refs not on the allowlist (e.g. ``["owner/act@sha"]``). + action_name: The action name (e.g. ``"owner/action"``). + refs: Full action refs for this action (e.g. ``["owner/action@sha"]``). repo_name: Value of ``$GITHUB_REPOSITORY`` (may be empty). Returns: str: A copy-pasteable shell script. """ - action_names = sorted({ref.split("@")[0] for ref in missing_refs}) - - # Branch name from first action, sanitised for git - sanitized = action_names[0].replace("/", "-") - if len(action_names) > 1: - sanitized += f"-and-{len(action_names) - 1}-more" - branch = f"allowlist-add-{sanitized}" - - # YAML entries to append (wildcard — maintainers can pin later) - yaml_lines: list[str] = [] - for name in action_names: - yaml_lines.append(f"{name}:") - yaml_lines.append(" '*':") - yaml_lines.append(" keep: true") - yaml_block = "\n".join(yaml_lines) - - summary = ", ".join(action_names) - title = f"Add {summary} to the GitHub Actions allowlist" - - body_lines = ["Add the following action(s) to the allowlist:", ""] - for name in action_names: - body_lines.append(f"- `{name}`") + branch = f"allowlist-add-{action_name.replace('/', '-')}" + title = f"Add {action_name} to the GitHub Actions allowlist" + + body_lines = [f"Add `{action_name}` to the allowlist:", ""] + for ref in sorted(refs): + body_lines.append(f"- `{ref}`") if repo_name: body_lines.extend(["", f"Needed by: `{repo_name}`"]) body = "\n".join(body_lines) + ref_args = " ".join(shlex.quote(r) for r in sorted(refs)) + + inserter_url = ( + "https://raw.githubusercontent.com/apache/infrastructure-actions/" + "main/allowlist-check/insert_actions.py" + ) + return ( f"( set -e; _d=$(mktemp -d); trap 'rm -rf \"$_d\"' EXIT; cd \"$_d\"\n" - f" gh repo clone apache/infrastructure-actions . -- --depth=1\n" - f" gh repo fork --remote\n" + f" gh repo fork apache/infrastructure-actions --clone -- --depth=1\n" + f" cd infrastructure-actions\n" f" git checkout -b {shlex.quote(branch)}\n" - f" cat >> actions.yml << 'ALLOWLIST_YAML'\n" - f"{yaml_block}\n" - f"ALLOWLIST_YAML\n" + f" curl -fsSL {shlex.quote(inserter_url)} | python3 - actions.yml {ref_args}\n" f" git add actions.yml\n" - f" git commit -m {shlex.quote(f'Add {summary} to allowlist')}\n" + f" git commit -m {shlex.quote(f'Add {action_name} to allowlist')}\n" f" git push -u origin {shlex.quote(branch)}\n" - f" gh pr create --repo apache/infrastructure-actions" + f" gh pr create --repo apache/infrastructure-actions --head \"$(gh api user -q .login):{shlex.quote(branch)}\"" f" --title {shlex.quote(title)}" f" --body {shlex.quote(body)} )\n" ) @@ -248,8 +239,20 @@ def main(): missing_refs = sorted({ref for _, ref in violations}) repo_name = os.environ.get("GITHUB_REPOSITORY", "") - script = build_gh_pr_command(missing_refs, repo_name) - print(f"\n::notice::You can create a PR to add the missing entries by running:\n{script}") + + # Group by action name so we can suggest one PR per action + by_action: dict[str, list[str]] = {} + for ref in missing_refs: + name = ref.split("@")[0] + by_action.setdefault(name, []).append(ref) + + print( + "\n::notice::Please create one PR per action." + " You can create the PRs by running the commands below:" + ) + for action_name in sorted(by_action): + script = build_gh_pr_command(action_name, by_action[action_name], repo_name) + print(f"\n# {action_name}\n{script}") sys.exit(1) else: diff --git a/allowlist-check/insert_actions.py b/allowlist-check/insert_actions.py new file mode 100644 index 00000000..61f6ff18 --- /dev/null +++ b/allowlist-check/insert_actions.py @@ -0,0 +1,74 @@ +#!/usr/bin/env python3 +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +"""Insert action entries into actions.yml in alphabetical order. + +Usage: + python3 insert_actions.py [ ...] + +Each ``ref`` is an action reference in ``owner/action@version`` format. +New entries are inserted so that the top-level keys in ``actions.yml`` +remain sorted case-insensitively. If an action already exists in the +file, the entry is skipped (it will *not* be overwritten). +""" + +import re +import sys + + +def insert_actions(actions_yml_path: str, refs: list[str]) -> None: + """Insert *refs* into *actions_yml_path* in alphabetical order.""" + # Group refs by action name: {name: [version, ...]} + by_action: dict[str, list[str]] = {} + for ref in refs: + name, _, version = ref.partition("@") + by_action.setdefault(name, []).append(version or "*") + + # Build YAML blocks for the new entries + new_entries: dict[str, str] = {} + for name in sorted(by_action): + lines = [f"{name}:"] + for version in sorted(by_action[name]): + lines.append(f" '{version}':") + lines.append(" keep: true") + new_entries[name] = "\n".join(lines) + + # Parse existing top-level blocks + text = open(actions_yml_path).read() + blocks = re.split(r"(?m)(?=^\S)", text) + by_key: dict[str, str] = {} + for block in blocks: + if block.strip(): + by_key[block.split(":", 1)[0].strip()] = block.rstrip() + + # Merge — setdefault keeps existing entries untouched + for key, value in new_entries.items(): + by_key.setdefault(key, value) + + # Write back sorted + with open(actions_yml_path, "w") as f: + f.write( + "\n".join(by_key[k] for k in sorted(by_key, key=str.casefold)) + + "\n" + ) + + +if __name__ == "__main__": + if len(sys.argv) < 3: + print(f"Usage: {sys.argv[0]} [ ...]", file=sys.stderr) + sys.exit(2) + insert_actions(sys.argv[1], sys.argv[2:]) diff --git a/allowlist-check/test_check_asf_allowlist.py b/allowlist-check/test_check_asf_allowlist.py index 13ec3cdd..d7ba1f9a 100644 --- a/allowlist-check/test_check_asf_allowlist.py +++ b/allowlist-check/test_check_asf_allowlist.py @@ -30,6 +30,7 @@ load_allowlist, main, ) +from insert_actions import insert_actions class TestFindActionRefs(unittest.TestCase): @@ -306,40 +307,90 @@ def test_no_matching_files(self): self.assertEqual(refs, {}) +class TestInsertActions(unittest.TestCase): + """Tests for the insert_actions helper script.""" + + def setUp(self): + self.tmpdir = tempfile.mkdtemp() + self.actions_yml = os.path.join(self.tmpdir, "actions.yml") + + def tearDown(self): + shutil.rmtree(self.tmpdir) + + def test_inserts_alphabetically(self): + with open(self.actions_yml, "w") as f: + f.write("aaa/action:\n 'sha1':\n keep: true\nzzz/action:\n 'sha2':\n keep: true\n") + insert_actions(self.actions_yml, ["mmm/middle@sha3"]) + content = open(self.actions_yml).read() + lines = content.splitlines() + top_keys = [l for l in lines if not l.startswith(" ") and l.endswith(":")] + self.assertEqual(top_keys, ["aaa/action:", "mmm/middle:", "zzz/action:"]) + + def test_does_not_overwrite_existing(self): + with open(self.actions_yml, "w") as f: + f.write("org/action:\n 'existing-sha':\n keep: true\n") + insert_actions(self.actions_yml, ["org/action@new-sha"]) + content = open(self.actions_yml).read() + self.assertIn("existing-sha", content) + self.assertNotIn("new-sha", content) + + def test_multiple_refs_same_action(self): + with open(self.actions_yml, "w") as f: + f.write("") + insert_actions(self.actions_yml, ["org/act@sha1", "org/act@sha2"]) + content = open(self.actions_yml).read() + self.assertIn("sha1", content) + self.assertIn("sha2", content) + self.assertEqual(content.count("org/act:"), 1) + + def test_case_insensitive_sort(self): + with open(self.actions_yml, "w") as f: + f.write("Bbb/action:\n 'sha1':\n keep: true\n") + insert_actions(self.actions_yml, ["aaa/action@sha2"]) + content = open(self.actions_yml).read() + self.assertTrue(content.index("aaa/action:") < content.index("Bbb/action:")) + + class TestBuildGhPrCommand(unittest.TestCase): """Tests for the generated gh PR command.""" def test_single_action(self): script = build_gh_pr_command( - ["evil-org/evil-action@abc123"], "apache/test-repo" + "evil-org/evil-action", ["evil-org/evil-action@abc123"], "apache/test-repo" ) - self.assertIn("gh repo clone apache/infrastructure-actions", script) - self.assertIn("gh repo fork --remote", script) + self.assertIn("gh repo fork apache/infrastructure-actions --clone", script) self.assertIn("allowlist-add-evil-org-evil-action", script) - self.assertIn("evil-org/evil-action:", script) - self.assertIn(" '*':", script) - self.assertIn(" keep: true", script) + self.assertIn("insert_actions.py", script) + self.assertIn("evil-org/evil-action@abc123", script) self.assertIn("gh pr create --repo apache/infrastructure-actions", script) self.assertIn("apache/test-repo", script) - def test_multiple_actions(self): + def test_no_repo_name(self): script = build_gh_pr_command( - ["b-org/b-action@sha1", "a-org/a-action@sha2"], "" + "some-org/some-action", ["some-org/some-action@sha1"], "" ) - # Actions should be sorted - self.assertIn("a-org/a-action:", script) - self.assertIn("b-org/b-action:", script) - # Branch name mentions first action + "and more" - self.assertIn("allowlist-add-a-org-a-action-and-1-more", script) - # No repo reference when GITHUB_REPOSITORY is empty self.assertNotIn("Needed by:", script) - def test_deduplicates_same_action_different_shas(self): + def test_multiple_shas_same_action(self): script = build_gh_pr_command( - ["org/action@sha1", "org/action@sha2"], "" + "org/action", ["org/action@sha1", "org/action@sha2"], "" + ) + self.assertIn("org/action@sha1", script) + self.assertIn("org/action@sha2", script) + self.assertIn("allowlist-add-org-action", script) + + def test_downloads_inserter_from_raw_github(self): + """The generated script must download insert_actions.py.""" + script = build_gh_pr_command( + "zoo/action", ["zoo/action@abc123"], "" + ) + self.assertIn( + "https://raw.githubusercontent.com/apache/infrastructure-actions/" + "main/allowlist-check/insert_actions.py", + script, ) - # Should appear only once in the YAML block - self.assertEqual(script.count("org/action:"), 1) + self.assertIn("curl -fsSL", script) + self.assertIn("python3 -", script) class TestMainGhPrCommand(unittest.TestCase): @@ -392,6 +443,7 @@ def test_main_prints_pr_command(self): self.assertIn("gh pr create --repo apache/infrastructure-actions", output) self.assertIn("evil-org/evil-action", output) self.assertIn("apache/test-repo", output) + self.assertIn("Please create one PR per action", output) @patch.dict(os.environ, {"GITHUB_REPOSITORY": "apache/test-repo"}) def test_main_prints_verbose_check_output(self):