-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathgenerate_project_table.py
More file actions
executable file
·220 lines (183 loc) · 7.85 KB
/
Copy pathgenerate_project_table.py
File metadata and controls
executable file
·220 lines (183 loc) · 7.85 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
#!/usr/bin/env python3
"""Generate the WasmAgent public project table from the repository manifest.
The project table in ``README.md`` is derived from the canonical repository
manifest (``repos.yml``). Only repositories with ``public_product: true`` are
surfaced; internal tooling such as ``claude-bot`` and ``wasmagent-ops`` is
omitted. This keeps the README table in lock-step with the public WasmAgent org
profile and prevents the table from silently drifting out of sync.
Usage::
scripts/generate_project_table.py --check README.md
Verify the README project table matches repos.yml. Exits non-zero and
prints a diff on drift.
scripts/generate_project_table.py --write README.md
Rewrite the README project table to match repos.yml.
The managed table in README.md is delimited by the markers
``<!-- BEGIN PROJECT TABLE -->`` / ``<!-- END PROJECT TABLE -->``.
"""
from __future__ import annotations
import argparse
import difflib
import re
import sys
from pathlib import Path
try:
import yaml
except ImportError: # pragma: no cover - dependency declared in CI
sys.stderr.write("PyYAML is required: pip install pyyaml\n")
sys.exit(2)
ROOT = Path(__file__).resolve().parent.parent
MANIFEST = ROOT / "repos.yml"
README = ROOT / "README.md"
BEGIN_MARKER = "<!-- BEGIN PROJECT TABLE -->"
END_MARKER = "<!-- END PROJECT TABLE -->"
# Repositories that must never be surfaced as public products, regardless of the
# manifest. This is a defence-in-depth guard against accidental reclassification.
FORBIDDEN_PUBLIC = ("claude-bot", "wasmagent-ops")
# Matches the managed block, markers inclusive (DOTALL so newlines are covered).
_BLOCK_RE = re.compile(
re.escape(BEGIN_MARKER) + r".*?" + re.escape(END_MARKER), re.DOTALL
)
def load_repositories(manifest_path: Path = MANIFEST) -> list[dict]:
"""Return the full repository list from the manifest."""
with manifest_path.open("r", encoding="utf-8") as handle:
data = yaml.safe_load(handle) or {}
return list(data.get("repositories", []))
def public_repositories(manifest_path: Path = MANIFEST) -> list[dict]:
"""Return only repositories that should appear in the public table.
Repositories are first filtered by the ``public_product`` flag, then two
defence-in-depth guards keep internal tooling out of the public table even
when that flag is misconfigured:
1. Category-based: a repository whose ``category`` is ``internal-tool`` is
never surfaced. ``category`` is the org-wide metadata that distinguishes
internal tools from public products, so a contradictory
``public_product: true`` on an internal-tool repo is treated as a hard
error rather than silently rendered as a public product row.
2. Name-based: ``FORBIDDEN_PUBLIC`` hard-codes known-sensitive internal
repos that must never appear regardless of any other field.
"""
repos = load_repositories(manifest_path)
surfaced = [r for r in repos if r.get("public_product") is True]
# Guard 1 (category-based): internal tooling must never be surfaced as a
# public product, even if public_product was set to true by mistake. This
# is self-maintaining -- new internal tools are excluded by their category
# without needing to be added to FORBIDDEN_PUBLIC.
for repo in surfaced:
if repo.get("category") == "internal-tool":
raise ValueError(
f"{repo.get('name')!r} has category 'internal-tool' but is "
f"flagged public_product: true in {manifest_path.name}; "
f"internal tooling must set public_product: false"
)
# Guard 2 (name-based defence-in-depth) for known-sensitive repos.
names = {r["name"] for r in surfaced}
for forbidden in FORBIDDEN_PUBLIC:
if forbidden in names:
raise ValueError(
f"{forbidden!r} is classified as a public product in "
f"{manifest_path.name}; it is internal tooling and must set "
f"public_product: false"
)
return surfaced
def render_table(repos: list[dict]) -> str:
"""Render the markdown table (header + rows) for the given repositories."""
lines = ["| Repository | Purpose |", "|---|---|"]
for repo in repos:
name = repo["name"]
url = repo.get("url") or f"https://github.com/WasmAgent/{name}"
purpose = str(repo.get("purpose", "")).strip().replace("\n", " ")
lines.append(f"| [{name}]({url}) | {purpose} |")
return "\n".join(lines)
def expected_block(repos: list[dict]) -> str:
"""Return the full managed block (markers inclusive) for README.md."""
return f"{BEGIN_MARKER}\n\n{render_table(repos)}\n\n{END_MARKER}"
def current_block(text: str) -> str | None:
"""Return the existing managed block (markers inclusive), or None."""
match = _BLOCK_RE.search(text)
return match.group(0) if match else None
def cmd_check(readme_path: Path) -> int:
"""Verify README.md's table matches the manifest. Returns shell exit code."""
try:
repos = public_repositories()
except ValueError as exc:
sys.stderr.write(f"error: {exc}\n")
return 1
text = readme_path.read_text(encoding="utf-8")
existing = current_block(text)
if existing is None:
sys.stderr.write(
f"error: managed table markers not found in {readme_path.name} "
f"(expected `{BEGIN_MARKER}` ... `{END_MARKER}`)\n"
)
return 1
expected = expected_block(repos)
if existing != expected:
sys.stderr.write(
f"error: README project table is out of sync with "
f"{MANIFEST.relative_to(ROOT)}\n"
)
diff = difflib.unified_diff(
existing.splitlines(keepends=True),
expected.splitlines(keepends=True),
fromfile=f"{readme_path.name} (current)",
tofile=f"{readme_path.name} (expected)",
n=1,
)
sys.stderr.writelines(diff)
sys.stderr.write(
"\nFix with: python3 scripts/generate_project_table.py --write "
"README.md\n"
)
return 1
return 0
def cmd_write(readme_path: Path) -> int:
"""Regenerate the managed table in README.md. Returns shell exit code."""
repos = public_repositories()
block = expected_block(repos)
text = readme_path.read_text(encoding="utf-8")
if current_block(text) is not None:
new_text = _BLOCK_RE.sub(lambda _: block, text)
else:
# Insert the managed block right after the "## Repositories" heading.
heading = "## Repositories"
if heading not in text:
sys.stderr.write(
f"error: could not find `{heading}` heading in {readme_path.name} "
f"and no managed markers are present\n"
)
return 1
new_text = text.replace(
heading, f"{heading}\n\n{block}", 1
)
readme_path.write_text(new_text, encoding="utf-8")
return 0
def main(argv: list[str] | None = None) -> int:
parser = argparse.ArgumentParser(
description="Generate the WasmAgent public project table from repos.yml.",
)
parser.add_argument(
"readme",
nargs="?",
type=Path,
default=README,
help="Path to README.md (default: %(default)s)",
)
mode = parser.add_mutually_exclusive_group(required=True)
mode.add_argument(
"--check",
action="store_true",
help="verify the README table matches repos.yml",
)
mode.add_argument(
"--write",
action="store_true",
help="rewrite the README table to match repos.yml",
)
args = parser.parse_args(argv)
if not args.readme.exists():
sys.stderr.write(f"error: {args.readme} does not exist\n")
return 2
if args.check:
return cmd_check(args.readme)
return cmd_write(args.readme)
if __name__ == "__main__":
sys.exit(main())