Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 5 additions & 13 deletions scripts/markdown/governance.mjs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import { mkdir, writeFile } from 'node:fs/promises';
import { join } from 'node:path';
import { fetchWithRetry } from '../utils/fetch.mjs';
import { rewriteRelativeLinks } from './sanitize.mjs';

const { GH_TOKEN } = process.env;

Expand Down Expand Up @@ -33,18 +34,6 @@ const LINK_REWRITE_MAP = Object.fromEntries(
])
);

// Rewrites relative cross-references between governance docs.
// Covers both inline [text](./FILE.md) and reference-style [label]: ./FILE.md.
// Negative lookaheads prevent rewriting absolute URLs that happen to end in a known filename.
const rewriteLinks = content =>
content.replace(
/(\]\(|\]:\s*)(?!https?:\/\/)(?!\/)(\.\/)?([A-Z_]+\.md)/g,
(match, prefix, _dot, filename) =>
LINK_REWRITE_MAP[filename]
? `${prefix}${LINK_REWRITE_MAP[filename]}`
: match
);

const outputDir = join(
import.meta.dirname,
'..',
Expand All @@ -65,7 +54,10 @@ const results = await Promise.all(
return null;
}

let body = rewriteLinks(await res.text());
let body = rewriteRelativeLinks(
await res.text(),
file => LINK_REWRITE_MAP[file] ?? null
);

// Some governance docs (e.g. MEMBER_EXPECTATIONS.md) have no H1, which the
// site derives the page title from — fall back to the sidebar label.
Expand Down
22 changes: 8 additions & 14 deletions scripts/markdown/readmes.mjs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import { mkdir, writeFile } from 'node:fs/promises';
import { join } from 'node:path';
import { fetchWithRetry } from '../utils/fetch.mjs';
import cleanupMarkdown from './sanitize.mjs';

const { GH_TOKEN } = process.env;

Expand Down Expand Up @@ -33,19 +34,12 @@ const discoverRepos = async () => {
return { loaders, plugins };
};

const stripLeadingDiv = content =>
content.replace(/^\s*<div[\s\S]*?<\/div>\n*/i, '');

// Remove badge-only lines: [![...][ref]][ref] or [![...](url)](url)
const stripBadges = content =>
content
.replace(
/^(\[!\[[^\]]*\](?:\[[^\]]*\]|\([^)]*\))\]\s*(?:\[[^\]]*\]|\([^)]*\))\s*)+$/gm,
''
)
.replace(/\n{3,}/g, '\n\n');

const processContent = content => stripBadges(stripLeadingDiv(content));
// Strip repo chrome, then point any relative links at the source repo on GitHub.
const cleanReadme = (content, fullName) =>
cleanupMarkdown(
content,
target => `https://github.com/${fullName}/blob/HEAD/${target}`
);

const repoName = fullName => fullName.split('/')[1];

Expand All @@ -65,7 +59,7 @@ const processRepos = async (repos, { label, basePath, outputDir }) => {
const result = await fetchReadme(fullName);
await writeFile(
join(outputDir, `${name}.md`),
processContent(result),
cleanReadme(result, fullName),
'utf8'
);
return name;
Expand Down
39 changes: 39 additions & 0 deletions scripts/markdown/sanitize.mjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
// Cleanup for Markdown fetched from other repos (READMEs, governance docs).

const LEADING_HTML = /^\s*<div[\s\S]*?<\/div>\n*/i;
const BADGES =
/^(\[!\[[^\]]*\](?:\[[^\]]*\]|\([^)]*\))\]\s*(?:\[[^\]]*\]|\([^)]*\))\s*)+$/gm;
const EXTRA_BLANK_LINES = /\n{3,}/g;
const BOILERPLATE = /^#{1,6}\s*(?:Contributing|License)\b.*$/im;
const RELATIVE_LINK =
/(\]\(|\]:\s*)(?![a-z][\w+.-]*:)(?!\/)(?!#)(?:\.{1,2}\/)?([^)\s#]+)/g;

// Drop the leading <div> logo banner.
export const stripLeadingHtml = content => content.replace(LEADING_HTML, '');

// Drop badge-only lines.
export const stripBadges = content =>
content.replace(BADGES, '').replace(EXTRA_BLANK_LINES, '\n\n');

// Cut the trailing Contributing/License sections (and anything after) off the end.
export const stripBoilerplate = content => {
const match = content.match(BOILERPLATE);
return match ? `${content.slice(0, match.index).trimEnd()}\n` : content;
};

// Rewrite relative links via resolve(); skips full URLs, root-relative and anchors.
export const rewriteRelativeLinks = (content, resolve) =>
content.replace(RELATIVE_LINK, (match, prefix, target) => {
const url = resolve(target);
return url ? `${prefix}${url}` : match;
});

const STEPS = [stripLeadingHtml, stripBadges, stripBoilerplate];

// Run the full cleanup pipeline.
const cleanupMarkdown = (content, resolve) => {
const result = STEPS.reduce((acc, step) => step(acc), content);
return resolve ? rewriteRelativeLinks(result, resolve) : result;
};

export default cleanupMarkdown;