diff --git a/package.json b/package.json index 57ecc65..0915002 100644 --- a/package.json +++ b/package.json @@ -50,7 +50,9 @@ "documentation", "testing", "llms-txt", - "agent-friendly" + "agent-friendly", + "agent score", + "agent friendly docs" ], "files": [ "dist/", diff --git a/src/checks/content-discoverability/llms-txt-exists.ts b/src/checks/content-discoverability/llms-txt-exists.ts index 6311fd9..c6b4104 100644 --- a/src/checks/content-discoverability/llms-txt-exists.ts +++ b/src/checks/content-discoverability/llms-txt-exists.ts @@ -146,17 +146,25 @@ async function checkLlmsTxtExists(ctx: CheckContext): Promise { details.redirectedOrigins = redirectedOrigins; } - // Set effectiveOrigin for downstream checks when content lives at a different host. - // Derive from redirect URLs on discovered files, or from the fallback redirectedOrigins. + // Set effectiveOrigin for downstream checks when content lives at a different origin. + // This covers both true cross-host redirects (e.g. example.com → docs.other.com) + // and www canonicalization (e.g. mongodb.com → www.mongodb.com). Downstream checks + // need to know the actual origin so sitemap scoping and link classification work. if (!ctx.effectiveOrigin) { - const crossHostFile = discovered.find((f) => f.crossHostRedirect && f.redirectUrl); - if (crossHostFile?.redirectUrl) { + // First try: a discovered file that redirected to a different origin + const redirectedFile = discovered.find((f) => f.redirectUrl); + if (redirectedFile?.redirectUrl) { try { - ctx.effectiveOrigin = new URL(crossHostFile.redirectUrl).origin; + const redirectedOrigin = new URL(redirectedFile.redirectUrl).origin; + if (redirectedOrigin !== ctx.origin) { + ctx.effectiveOrigin = redirectedOrigin; + } } catch { /* ignore malformed */ } - } else if (redirectedOrigins.length > 0) { + } + // Second try: origins discovered from cross-host redirect fallback probing + if (!ctx.effectiveOrigin && redirectedOrigins.length > 0) { ctx.effectiveOrigin = redirectedOrigins[0]; } } diff --git a/src/checks/url-stability/redirect-behavior.ts b/src/checks/url-stability/redirect-behavior.ts index 9b46e8f..f45916e 100644 --- a/src/checks/url-stability/redirect-behavior.ts +++ b/src/checks/url-stability/redirect-behavior.ts @@ -1,5 +1,6 @@ import { registerCheck } from '../registry.js'; import { discoverAndSamplePages } from '../../helpers/get-page-urls.js'; +import { isCrossHostRedirect } from '../../helpers/to-md-urls.js'; import type { CheckContext, CheckResult } from '../../types.js'; interface RedirectResult { @@ -56,13 +57,11 @@ async function check(ctx: CheckContext): Promise { } const resolvedTarget = new URL(location, url).toString(); - const sourceOrigin = new URL(url).origin; - const targetOrigin = new URL(resolvedTarget).origin; - if (sourceOrigin === targetOrigin) { - return { url, status, classification: 'same-host', redirectTarget: resolvedTarget }; + if (isCrossHostRedirect(url, resolvedTarget)) { + return { url, status, classification: 'cross-host', redirectTarget: resolvedTarget }; } - return { url, status, classification: 'cross-host', redirectTarget: resolvedTarget }; + return { url, status, classification: 'same-host', redirectTarget: resolvedTarget }; } catch (err) { return { url, diff --git a/src/cli/commands/check.ts b/src/cli/commands/check.ts index 3d11ec3..70439b9 100644 --- a/src/cli/commands/check.ts +++ b/src/cli/commands/check.ts @@ -53,8 +53,12 @@ export function registerCheckCommand(program: Command): void { } if (format !== 'json') { - const domain = new URL(url).hostname; - process.stderr.write(`Running checks on ${domain}...\n`); + const parsed = new URL(url); + const target = + parsed.pathname && parsed.pathname !== '/' + ? `${parsed.hostname}${parsed.pathname}` + : parsed.hostname; + process.stderr.write(`Running checks on ${target}...\n`); } const report = await runChecks(url, { diff --git a/src/cli/formatters/scorecard.ts b/src/cli/formatters/scorecard.ts index 3f8e676..98d43f7 100644 --- a/src/cli/formatters/scorecard.ts +++ b/src/cli/formatters/scorecard.ts @@ -39,6 +39,12 @@ function gradeColor(grade: string): (s: string) => string { return GRADE_COLORS[grade] ?? ((s: string) => s); } +function formatLocalTime(iso: string): string { + const d = new Date(iso); + if (isNaN(d.getTime())) return iso; + return d.toLocaleString(); +} + function formatCategoryLine(name: string, score: number, grade: string): string { const paddedName = name.padEnd(36); const scoreStr = `${score} / 100`; @@ -49,9 +55,13 @@ function formatCategoryLine(name: string, score: number, grade: string): string function formatDiagnostic(diag: Diagnostic): string[] { const icon = SEVERITY_ICONS[diag.severity] ?? '[?]'; const lines: string[] = []; - lines.push(` ${icon} ${chalk.bold(diag.message.split('.')[0])}`); + // Extract first sentence for heading. Split on ". " (period + space) rather than + // bare "." to avoid breaking on file extensions like .md or llms.txt. + const firstSentenceEnd = diag.message.indexOf('. '); + const heading = firstSentenceEnd !== -1 ? diag.message.slice(0, firstSentenceEnd) : diag.message; + lines.push(` ${icon} ${chalk.bold(heading)}`); - // Wrap message (skip the first sentence already used as heading) + // Full message as detail text const fullMsg = diag.message; lines.push(` ${chalk.dim(fullMsg)}`); lines.push(''); @@ -69,7 +79,7 @@ export function formatScorecard(report: ReportResult, scoreResult?: ScoreResult) lines.push(chalk.bold('Agent-Friendly Docs Scorecard')); lines.push(chalk.bold('==============================')); lines.push(''); - lines.push(chalk.gray(`${report.url} · ${report.timestamp}`)); + lines.push(chalk.gray(`${report.url} · ${formatLocalTime(report.timestamp)}`)); lines.push(''); // Overall score diff --git a/src/cli/formatters/text.ts b/src/cli/formatters/text.ts index d2aab21..f63bf49 100644 --- a/src/cli/formatters/text.ts +++ b/src/cli/formatters/text.ts @@ -186,7 +186,11 @@ export function formatText(report: ReportResult, options?: FormatTextOptions): s lines.push(''); lines.push(chalk.bold(`Agent-Friendly Docs Check: ${report.url}`)); - lines.push(chalk.gray(`Timestamp: ${report.timestamp}`)); + const localTime = (() => { + const d = new Date(report.timestamp); + return isNaN(d.getTime()) ? report.timestamp : d.toLocaleString(); + })(); + lines.push(chalk.gray(`Timestamp: ${localTime}`)); lines.push(''); // Group by category diff --git a/src/helpers/get-page-urls.ts b/src/helpers/get-page-urls.ts index 13651df..f19081d 100644 --- a/src/helpers/get-page-urls.ts +++ b/src/helpers/get-page-urls.ts @@ -124,7 +124,10 @@ async function walkAggregateLinks(ctx: CheckContext, urls: string[]): Promise { it('includes the URL and timestamp', () => { const output = formatText(makeReport()); expect(output).toContain('http://example.com'); - expect(output).toContain('2026-01-01T00:00:00.000Z'); + // Timestamp is displayed in local time, not raw ISO + const expected = new Date('2026-01-01T00:00:00.000Z').toLocaleString(); + expect(output).toContain(expected); }); it('groups results by category', () => { @@ -599,6 +601,12 @@ describe('formatJson', () => { expect(parsed.summary.total).toBe(5); }); + it('preserves raw ISO timestamp', () => { + const output = formatJson(makeReport()); + const parsed = JSON.parse(output); + expect(parsed.timestamp).toBe('2026-01-01T00:00:00.000Z'); + }); + it('is pretty-printed', () => { const output = formatJson(makeReport()); expect(output).toContain('\n'); diff --git a/test/unit/cli/scorecard-formatter.test.ts b/test/unit/cli/scorecard-formatter.test.ts index c7bd144..542fc50 100644 --- a/test/unit/cli/scorecard-formatter.test.ts +++ b/test/unit/cli/scorecard-formatter.test.ts @@ -306,6 +306,33 @@ describe('formatScorecard', () => { expect(output).toContain('Weird status'); }); + it('does not split diagnostic heading on periods in file extensions', () => { + const score = makeScoreResult({ + diagnostics: [ + { + id: 'markdown-undiscoverable', + severity: 'warning', + message: + 'Your site serves markdown at .md URLs, but agents have no way to discover this. Without content negotiation, an llms.txt directive on your pages, most agents will default to the HTML path.', + resolution: 'Add a blockquote directive.', + }, + { + id: 'llms-txt-oversized', + severity: 'warning', + message: + 'Your llms.txt is 4,561,591 characters. Agents see roughly the first 100,000 characters.', + resolution: 'Split into section-level files.', + }, + ], + }); + const output = formatScorecard(makeReport(), score); + // The heading should include the full first sentence, not split on ".md" or "llms.txt" + expect(output).toContain( + 'Your site serves markdown at .md URLs, but agents have no way to discover this', + ); + expect(output).toContain('Your llms.txt is 4,561,591 characters'); + }); + it('handles diagnostic with unknown severity gracefully', () => { const score = makeScoreResult({ diagnostics: [ diff --git a/test/unit/helpers/to-md-urls.test.ts b/test/unit/helpers/to-md-urls.test.ts index 21fd42b..02a319c 100644 --- a/test/unit/helpers/to-md-urls.test.ts +++ b/test/unit/helpers/to-md-urls.test.ts @@ -1,5 +1,5 @@ import { describe, it, expect } from 'vitest'; -import { toMdUrls } from '../../../src/helpers/to-md-urls.js'; +import { isCrossHostRedirect, toMdUrls } from '../../../src/helpers/to-md-urls.js'; describe('toMdUrls', () => { it('returns URL as-is when it already ends in .md', () => { @@ -64,3 +64,36 @@ describe('toMdUrls', () => { expect(toMdUrls('https://example.com/sitemap.xml')).toEqual([]); }); }); + +describe('isCrossHostRedirect', () => { + it('returns false for same host', () => { + expect(isCrossHostRedirect('https://example.com/a', 'https://example.com/b')).toBe(false); + }); + + it('returns false for www to bare domain', () => { + expect(isCrossHostRedirect('https://www.example.com/a', 'https://example.com/a')).toBe(false); + }); + + it('returns false for bare domain to www', () => { + expect( + isCrossHostRedirect( + 'https://mongodb.com/docs/llms.txt', + 'https://www.mongodb.com/docs/llms.txt', + ), + ).toBe(false); + }); + + it('returns true for genuinely different hosts', () => { + expect(isCrossHostRedirect('https://example.com/a', 'https://other.com/a')).toBe(true); + }); + + it('returns true for different subdomains (not www)', () => { + expect(isCrossHostRedirect('https://docs.example.com/a', 'https://api.example.com/a')).toBe( + true, + ); + }); + + it('returns false for malformed URLs', () => { + expect(isCrossHostRedirect('not-a-url', 'https://example.com')).toBe(false); + }); +});