diff --git a/.github/workflows/docs-ci.yml b/.github/workflows/docs-ci.yml index 023b36a..d6e2ca4 100644 --- a/.github/workflows/docs-ci.yml +++ b/.github/workflows/docs-ci.yml @@ -37,3 +37,17 @@ jobs: run: python -m pip install --disable-pip-version-check pyyaml - name: README project table matches repos.yml run: python scripts/generate_project_table.py --check README.md + + project-table-unit-tests: + name: Project table generator unit tests + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.x" + - name: Install PyYAML + run: python -m pip install --disable-pip-version-check pyyaml + - name: Run generator unit tests + run: python3 -m unittest discover -s tests diff --git a/.gitignore b/.gitignore index fafff2e..ebe4d7f 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,7 @@ .DS_Store Thumbs.db + +# Python bytecode +__pycache__/ +*.py[cod] +*.pyo diff --git a/scripts/generate_project_table.py b/scripts/generate_project_table.py index 4cf463a..d8b1298 100755 --- a/scripts/generate_project_table.py +++ b/scripts/generate_project_table.py @@ -58,9 +58,36 @@ def load_repositories(manifest_path: Path = MANIFEST) -> list[dict]: def public_repositories(manifest_path: Path = MANIFEST) -> list[dict]: - """Return only repositories that should appear in the public table.""" + """Return only repositories that should appear in the public table. + + Repositories are first filtered by the ``public_product`` flag, then two + defence-in-depth guards keep internal tooling out of the public table even + when that flag is misconfigured: + + 1. Category-based: a repository whose ``category`` is ``internal-tool`` is + never surfaced. ``category`` is the org-wide metadata that distinguishes + internal tools from public products, so a contradictory + ``public_product: true`` on an internal-tool repo is treated as a hard + error rather than silently rendered as a public product row. + 2. Name-based: ``FORBIDDEN_PUBLIC`` hard-codes known-sensitive internal + repos that must never appear regardless of any other field. + """ repos = load_repositories(manifest_path) surfaced = [r for r in repos if r.get("public_product") is True] + + # Guard 1 (category-based): internal tooling must never be surfaced as a + # public product, even if public_product was set to true by mistake. This + # is self-maintaining -- new internal tools are excluded by their category + # without needing to be added to FORBIDDEN_PUBLIC. + for repo in surfaced: + if repo.get("category") == "internal-tool": + raise ValueError( + f"{repo.get('name')!r} has category 'internal-tool' but is " + f"flagged public_product: true in {manifest_path.name}; " + f"internal tooling must set public_product: false" + ) + + # Guard 2 (name-based defence-in-depth) for known-sensitive repos. names = {r["name"] for r in surfaced} for forbidden in FORBIDDEN_PUBLIC: if forbidden in names: diff --git a/tests/test_generate_project_table.py b/tests/test_generate_project_table.py new file mode 100644 index 0000000..d974648 --- /dev/null +++ b/tests/test_generate_project_table.py @@ -0,0 +1,137 @@ +"""Tests for the WasmAgent public project-table generator. + +Stdlib-only (``unittest``) so no extra dependencies are required. Run with:: + + python3 -m unittest discover -s tests + # or + python3 tests/test_generate_project_table.py +""" +from __future__ import annotations + +import sys +import tempfile +import unittest +from pathlib import Path + +# Make the generator importable regardless of the current working directory. +SCRIPTS_DIR = Path(__file__).resolve().parent.parent / "scripts" +sys.path.insert(0, str(SCRIPTS_DIR)) + +import generate_project_table as gpt # noqa: E402 + + +def _write_manifest(text: str) -> Path: + """Write ``text`` to a temporary manifest file and return its path.""" + handle = tempfile.NamedTemporaryFile( + mode="w", suffix=".yml", delete=False, encoding="utf-8" + ) + handle.write(text) + handle.flush() + handle.close() + return Path(handle.name) + + +PUBLIC = """\ +schema_version: 1 +repositories: + - name: wasmagent-js + url: https://github.com/WasmAgent/wasmagent-js + category: public-product + public_product: true + purpose: Core JS/TS runtime and MCP server +""" + + +class PublicRepositoriesTests(unittest.TestCase): + def test_surfaces_public_products(self): + path = _write_manifest(PUBLIC) + surfaced = gpt.public_repositories(path) + self.assertEqual([r["name"] for r in surfaced], ["wasmagent-js"]) + + def test_excludes_internal_tool_with_public_product_false(self): + """An internal-tool repo with public_product: false is omitted.""" + path = _write_manifest( + PUBLIC + + """\ + - name: wasmagent-ops + url: https://github.com/WasmAgent/wasmagent-ops + category: internal-tool + public_product: false + purpose: Internal operations and automation tooling +""" + ) + surfaced = gpt.public_repositories(path) + self.assertEqual([r["name"] for r in surfaced], ["wasmagent-js"]) + + def test_rejects_internal_tool_misflagged_as_public(self): + """A category=internal-tool repo with public_product: true is a hard + error (category-based defence-in-depth, wasmagent#53).""" + path = _write_manifest( + PUBLIC + + """\ + - name: wasmagent-ops + url: https://github.com/WasmAgent/wasmagent-ops + category: internal-tool + public_product: true + purpose: Internal operations and automation tooling +""" + ) + with self.assertRaises(ValueError) as ctx: + gpt.public_repositories(path) + self.assertIn("internal-tool", str(ctx.exception)) + self.assertIn("wasmagent-ops", str(ctx.exception)) + + def test_rejects_unknown_internal_tool_misflagged_as_public(self): + """The category guard is self-maintaining: an internal tool that is + NOT in FORBIDDEN_PUBLIC is still rejected by its category.""" + path = _write_manifest( + PUBLIC + + """\ + - name: some-future-ops-bot + url: https://github.com/WasmAgent/some-future-ops-bot + category: internal-tool + public_product: true + purpose: Another internal tool +""" + ) + with self.assertRaises(ValueError): + gpt.public_repositories(path) + + def test_rejects_forbidden_public_name_even_without_category(self): + """FORBIDDEN_PUBLIC is a name-based backstop independent of category.""" + path = _write_manifest( + PUBLIC + + """\ + - name: claude-bot + url: https://github.com/WasmAgent/claude-bot + category: public-product + public_product: true + purpose: Should not be surfaced +""" + ) + with self.assertRaises(ValueError) as ctx: + gpt.public_repositories(path) + self.assertIn("claude-bot", str(ctx.exception)) + + +class RenderTableTests(unittest.TestCase): + def test_render_excludes_internal_tools(self): + repos = gpt.public_repositories( + _write_manifest( + PUBLIC + + """\ + - name: wasmagent-ops + url: https://github.com/WasmAgent/wasmagent-ops + category: internal-tool + public_product: false + purpose: Internal operations and automation tooling +""" + ) + ) + table = gpt.render_table(repos) + self.assertIn("wasmagent-js", table) + self.assertNotIn("wasmagent-ops", table) + + +if __name__ == "__main__": + unittest.main()