Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,9 @@
"documentation",
"testing",
"llms-txt",
"agent-friendly"
"agent-friendly",
"agent score",
"agent friendly docs"
],
"files": [
"dist/",
Expand Down
20 changes: 14 additions & 6 deletions src/checks/content-discoverability/llms-txt-exists.ts
Original file line number Diff line number Diff line change
Expand Up @@ -146,17 +146,25 @@ async function checkLlmsTxtExists(ctx: CheckContext): Promise<CheckResult> {
details.redirectedOrigins = redirectedOrigins;
}

// Set effectiveOrigin for downstream checks when content lives at a different host.
// Derive from redirect URLs on discovered files, or from the fallback redirectedOrigins.
// Set effectiveOrigin for downstream checks when content lives at a different origin.
// This covers both true cross-host redirects (e.g. example.com → docs.other.com)
// and www canonicalization (e.g. mongodb.com → www.mongodb.com). Downstream checks
// need to know the actual origin so sitemap scoping and link classification work.
if (!ctx.effectiveOrigin) {
const crossHostFile = discovered.find((f) => f.crossHostRedirect && f.redirectUrl);
if (crossHostFile?.redirectUrl) {
// First try: a discovered file that redirected to a different origin
const redirectedFile = discovered.find((f) => f.redirectUrl);
if (redirectedFile?.redirectUrl) {
try {
ctx.effectiveOrigin = new URL(crossHostFile.redirectUrl).origin;
const redirectedOrigin = new URL(redirectedFile.redirectUrl).origin;
if (redirectedOrigin !== ctx.origin) {
ctx.effectiveOrigin = redirectedOrigin;
}
} catch {
/* ignore malformed */
}
} else if (redirectedOrigins.length > 0) {
}
// Second try: origins discovered from cross-host redirect fallback probing
if (!ctx.effectiveOrigin && redirectedOrigins.length > 0) {
ctx.effectiveOrigin = redirectedOrigins[0];
}
}
Expand Down
9 changes: 4 additions & 5 deletions src/checks/url-stability/redirect-behavior.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import { registerCheck } from '../registry.js';
import { discoverAndSamplePages } from '../../helpers/get-page-urls.js';
import { isCrossHostRedirect } from '../../helpers/to-md-urls.js';
import type { CheckContext, CheckResult } from '../../types.js';

interface RedirectResult {
Expand Down Expand Up @@ -56,13 +57,11 @@ async function check(ctx: CheckContext): Promise<CheckResult> {
}

const resolvedTarget = new URL(location, url).toString();
const sourceOrigin = new URL(url).origin;
const targetOrigin = new URL(resolvedTarget).origin;

if (sourceOrigin === targetOrigin) {
return { url, status, classification: 'same-host', redirectTarget: resolvedTarget };
if (isCrossHostRedirect(url, resolvedTarget)) {
return { url, status, classification: 'cross-host', redirectTarget: resolvedTarget };
}
return { url, status, classification: 'cross-host', redirectTarget: resolvedTarget };
return { url, status, classification: 'same-host', redirectTarget: resolvedTarget };
} catch (err) {
return {
url,
Expand Down
8 changes: 6 additions & 2 deletions src/cli/commands/check.ts
Original file line number Diff line number Diff line change
Expand Up @@ -53,8 +53,12 @@ export function registerCheckCommand(program: Command): void {
}

if (format !== 'json') {
const domain = new URL(url).hostname;
process.stderr.write(`Running checks on ${domain}...\n`);
const parsed = new URL(url);
const target =
parsed.pathname && parsed.pathname !== '/'
? `${parsed.hostname}${parsed.pathname}`
: parsed.hostname;
process.stderr.write(`Running checks on ${target}...\n`);
}

const report = await runChecks(url, {
Expand Down
16 changes: 13 additions & 3 deletions src/cli/formatters/scorecard.ts
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,12 @@ function gradeColor(grade: string): (s: string) => string {
return GRADE_COLORS[grade] ?? ((s: string) => s);
}

function formatLocalTime(iso: string): string {
const d = new Date(iso);
if (isNaN(d.getTime())) return iso;
return d.toLocaleString();
}

function formatCategoryLine(name: string, score: number, grade: string): string {
const paddedName = name.padEnd(36);
const scoreStr = `${score} / 100`;
Expand All @@ -49,9 +55,13 @@ function formatCategoryLine(name: string, score: number, grade: string): string
function formatDiagnostic(diag: Diagnostic): string[] {
const icon = SEVERITY_ICONS[diag.severity] ?? '[?]';
const lines: string[] = [];
lines.push(` ${icon} ${chalk.bold(diag.message.split('.')[0])}`);
// Extract first sentence for heading. Split on ". " (period + space) rather than
// bare "." to avoid breaking on file extensions like .md or llms.txt.
const firstSentenceEnd = diag.message.indexOf('. ');
const heading = firstSentenceEnd !== -1 ? diag.message.slice(0, firstSentenceEnd) : diag.message;
lines.push(` ${icon} ${chalk.bold(heading)}`);

// Wrap message (skip the first sentence already used as heading)
// Full message as detail text
const fullMsg = diag.message;
lines.push(` ${chalk.dim(fullMsg)}`);
lines.push('');
Expand All @@ -69,7 +79,7 @@ export function formatScorecard(report: ReportResult, scoreResult?: ScoreResult)
lines.push(chalk.bold('Agent-Friendly Docs Scorecard'));
lines.push(chalk.bold('=============================='));
lines.push('');
lines.push(chalk.gray(`${report.url} · ${report.timestamp}`));
lines.push(chalk.gray(`${report.url} · ${formatLocalTime(report.timestamp)}`));
lines.push('');

// Overall score
Expand Down
6 changes: 5 additions & 1 deletion src/cli/formatters/text.ts
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,11 @@ export function formatText(report: ReportResult, options?: FormatTextOptions): s

lines.push('');
lines.push(chalk.bold(`Agent-Friendly Docs Check: ${report.url}`));
lines.push(chalk.gray(`Timestamp: ${report.timestamp}`));
const localTime = (() => {
const d = new Date(report.timestamp);
return isNaN(d.getTime()) ? report.timestamp : d.toLocaleString();
})();
lines.push(chalk.gray(`Timestamp: ${localTime}`));
lines.push('');

// Group by category
Expand Down
5 changes: 4 additions & 1 deletion src/helpers/get-page-urls.ts
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,10 @@ async function walkAggregateLinks(ctx: CheckContext, urls: string[]): Promise<st
// Only keep same-origin page URLs (skip further .txt nesting)
try {
const parsed = new URL(subUrl);
if (parsed.origin === ctx.origin && !isNonPageUrl(subUrl)) {
if (
(parsed.origin === ctx.origin || parsed.origin === siteOrigin) &&
!isNonPageUrl(subUrl)
) {
pageUrls.push(subUrl);
}
} catch {
Expand Down
13 changes: 12 additions & 1 deletion src/helpers/to-md-urls.ts
Original file line number Diff line number Diff line change
@@ -1,11 +1,22 @@
/**
* Strip the leading "www." from a hostname, if present.
*/
function stripWww(host: string): string {
return host.startsWith('www.') ? host.slice(4) : host;
}

/**
* Returns true if the two URLs have different hosts (i.e. a cross-host redirect).
* A www ↔ bare-domain redirect (e.g. mongodb.com → www.mongodb.com) is NOT
* considered cross-host because every HTTP client and agent follows it.
*/
export function isCrossHostRedirect(originalUrl: string, finalUrl: string): boolean {
try {
const original = new URL(originalUrl);
const final_ = new URL(finalUrl);
return original.host !== final_.host;
if (original.host === final_.host) return false;
// www ↔ bare-domain is same-site, not cross-host
return stripWww(original.host) !== stripWww(final_.host);
} catch {
return false;
}
Expand Down
10 changes: 9 additions & 1 deletion test/unit/cli/formatters.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,9 @@ describe('formatText', () => {
it('includes the URL and timestamp', () => {
const output = formatText(makeReport());
expect(output).toContain('http://example.com');
expect(output).toContain('2026-01-01T00:00:00.000Z');
// Timestamp is displayed in local time, not raw ISO
const expected = new Date('2026-01-01T00:00:00.000Z').toLocaleString();
expect(output).toContain(expected);
});

it('groups results by category', () => {
Expand Down Expand Up @@ -599,6 +601,12 @@ describe('formatJson', () => {
expect(parsed.summary.total).toBe(5);
});

it('preserves raw ISO timestamp', () => {
const output = formatJson(makeReport());
const parsed = JSON.parse(output);
expect(parsed.timestamp).toBe('2026-01-01T00:00:00.000Z');
});

it('is pretty-printed', () => {
const output = formatJson(makeReport());
expect(output).toContain('\n');
Expand Down
27 changes: 27 additions & 0 deletions test/unit/cli/scorecard-formatter.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -306,6 +306,33 @@ describe('formatScorecard', () => {
expect(output).toContain('Weird status');
});

it('does not split diagnostic heading on periods in file extensions', () => {
const score = makeScoreResult({
diagnostics: [
{
id: 'markdown-undiscoverable',
severity: 'warning',
message:
'Your site serves markdown at .md URLs, but agents have no way to discover this. Without content negotiation, an llms.txt directive on your pages, most agents will default to the HTML path.',
resolution: 'Add a blockquote directive.',
},
{
id: 'llms-txt-oversized',
severity: 'warning',
message:
'Your llms.txt is 4,561,591 characters. Agents see roughly the first 100,000 characters.',
resolution: 'Split into section-level files.',
},
],
});
const output = formatScorecard(makeReport(), score);
// The heading should include the full first sentence, not split on ".md" or "llms.txt"
expect(output).toContain(
'Your site serves markdown at .md URLs, but agents have no way to discover this',
);
expect(output).toContain('Your llms.txt is 4,561,591 characters');
});

it('handles diagnostic with unknown severity gracefully', () => {
const score = makeScoreResult({
diagnostics: [
Expand Down
35 changes: 34 additions & 1 deletion test/unit/helpers/to-md-urls.test.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import { describe, it, expect } from 'vitest';
import { toMdUrls } from '../../../src/helpers/to-md-urls.js';
import { isCrossHostRedirect, toMdUrls } from '../../../src/helpers/to-md-urls.js';

describe('toMdUrls', () => {
it('returns URL as-is when it already ends in .md', () => {
Expand Down Expand Up @@ -64,3 +64,36 @@ describe('toMdUrls', () => {
expect(toMdUrls('https://example.com/sitemap.xml')).toEqual([]);
});
});

describe('isCrossHostRedirect', () => {
it('returns false for same host', () => {
expect(isCrossHostRedirect('https://example.com/a', 'https://example.com/b')).toBe(false);
});

it('returns false for www to bare domain', () => {
expect(isCrossHostRedirect('https://www.example.com/a', 'https://example.com/a')).toBe(false);
});

it('returns false for bare domain to www', () => {
expect(
isCrossHostRedirect(
'https://mongodb.com/docs/llms.txt',
'https://www.mongodb.com/docs/llms.txt',
),
).toBe(false);
});

it('returns true for genuinely different hosts', () => {
expect(isCrossHostRedirect('https://example.com/a', 'https://other.com/a')).toBe(true);
});

it('returns true for different subdomains (not www)', () => {
expect(isCrossHostRedirect('https://docs.example.com/a', 'https://api.example.com/a')).toBe(
true,
);
});

it('returns false for malformed URLs', () => {
expect(isCrossHostRedirect('not-a-url', 'https://example.com')).toBe(false);
});
});
Loading