diff --git a/scripts/markdown/governance.mjs b/scripts/markdown/governance.mjs
index 305fb45..4e89d7b 100644
--- a/scripts/markdown/governance.mjs
+++ b/scripts/markdown/governance.mjs
@@ -1,6 +1,7 @@
import { mkdir, writeFile } from 'node:fs/promises';
import { join } from 'node:path';
import { fetchWithRetry } from '../utils/fetch.mjs';
+import { rewriteRelativeLinks } from './sanitize.mjs';
const { GH_TOKEN } = process.env;
@@ -33,18 +34,6 @@ const LINK_REWRITE_MAP = Object.fromEntries(
])
);
-// Rewrites relative cross-references between governance docs.
-// Covers both inline [text](./FILE.md) and reference-style [label]: ./FILE.md.
-// Negative lookaheads prevent rewriting absolute URLs that happen to end in a known filename.
-const rewriteLinks = content =>
- content.replace(
- /(\]\(|\]:\s*)(?!https?:\/\/)(?!\/)(\.\/)?([A-Z_]+\.md)/g,
- (match, prefix, _dot, filename) =>
- LINK_REWRITE_MAP[filename]
- ? `${prefix}${LINK_REWRITE_MAP[filename]}`
- : match
- );
-
const outputDir = join(
import.meta.dirname,
'..',
@@ -65,7 +54,10 @@ const results = await Promise.all(
return null;
}
- let body = rewriteLinks(await res.text());
+ let body = rewriteRelativeLinks(
+ await res.text(),
+ file => LINK_REWRITE_MAP[file] ?? null
+ );
// Some governance docs (e.g. MEMBER_EXPECTATIONS.md) have no H1, which the
// site derives the page title from — fall back to the sidebar label.
diff --git a/scripts/markdown/readmes.mjs b/scripts/markdown/readmes.mjs
index 2d516af..b374989 100644
--- a/scripts/markdown/readmes.mjs
+++ b/scripts/markdown/readmes.mjs
@@ -1,6 +1,7 @@
import { mkdir, writeFile } from 'node:fs/promises';
import { join } from 'node:path';
import { fetchWithRetry } from '../utils/fetch.mjs';
+import cleanupMarkdown from './sanitize.mjs';
const { GH_TOKEN } = process.env;
@@ -33,19 +34,12 @@ const discoverRepos = async () => {
return { loaders, plugins };
};
-const stripLeadingDiv = content =>
- content.replace(/^\s*
\n*/i, '');
-
-// Remove badge-only lines: [![...][ref]][ref] or [](url)
-const stripBadges = content =>
- content
- .replace(
- /^(\[!\[[^\]]*\](?:\[[^\]]*\]|\([^)]*\))\]\s*(?:\[[^\]]*\]|\([^)]*\))\s*)+$/gm,
- ''
- )
- .replace(/\n{3,}/g, '\n\n');
-
-const processContent = content => stripBadges(stripLeadingDiv(content));
+// Strip repo chrome, then point any relative links at the source repo on GitHub.
+const cleanReadme = (content, fullName) =>
+ cleanupMarkdown(
+ content,
+ target => `https://github.com/${fullName}/blob/HEAD/${target}`
+ );
const repoName = fullName => fullName.split('/')[1];
@@ -65,7 +59,7 @@ const processRepos = async (repos, { label, basePath, outputDir }) => {
const result = await fetchReadme(fullName);
await writeFile(
join(outputDir, `${name}.md`),
- processContent(result),
+ cleanReadme(result, fullName),
'utf8'
);
return name;
diff --git a/scripts/markdown/sanitize.mjs b/scripts/markdown/sanitize.mjs
new file mode 100644
index 0000000..407d505
--- /dev/null
+++ b/scripts/markdown/sanitize.mjs
@@ -0,0 +1,39 @@
+// Cleanup for Markdown fetched from other repos (READMEs, governance docs).
+
+const LEADING_HTML = /^\s*
\n*/i;
+const BADGES =
+ /^(\[!\[[^\]]*\](?:\[[^\]]*\]|\([^)]*\))\]\s*(?:\[[^\]]*\]|\([^)]*\))\s*)+$/gm;
+const EXTRA_BLANK_LINES = /\n{3,}/g;
+const BOILERPLATE = /^#{1,6}\s*(?:Contributing|License)\b.*$/im;
+const RELATIVE_LINK =
+ /(\]\(|\]:\s*)(?![a-z][\w+.-]*:)(?!\/)(?!#)(?:\.{1,2}\/)?([^)\s#]+)/g;
+
+// Drop the leading
logo banner.
+export const stripLeadingHtml = content => content.replace(LEADING_HTML, '');
+
+// Drop badge-only lines.
+export const stripBadges = content =>
+ content.replace(BADGES, '').replace(EXTRA_BLANK_LINES, '\n\n');
+
+// Cut the trailing Contributing/License sections (and anything after) off the end.
+export const stripBoilerplate = content => {
+ const match = content.match(BOILERPLATE);
+ return match ? `${content.slice(0, match.index).trimEnd()}\n` : content;
+};
+
+// Rewrite relative links via resolve(); skips full URLs, root-relative and anchors.
+export const rewriteRelativeLinks = (content, resolve) =>
+ content.replace(RELATIVE_LINK, (match, prefix, target) => {
+ const url = resolve(target);
+ return url ? `${prefix}${url}` : match;
+ });
+
+const STEPS = [stripLeadingHtml, stripBadges, stripBoilerplate];
+
+// Run the full cleanup pipeline.
+const cleanupMarkdown = (content, resolve) => {
+ const result = STEPS.reduce((acc, step) => step(acc), content);
+ return resolve ? rewriteRelativeLinks(result, resolve) : result;
+};
+
+export default cleanupMarkdown;