From 669f1866b1fbfc02c9eeb5058da620c62c8e7022 Mon Sep 17 00:00:00 2001
From: Drew Stone <drewstone329@gmail.com>
Date: Sun, 26 Apr 2026 04:00:58 -0600
Subject: [PATCH 1/2] fix(discover/wayback): use CDX collapse=timestamp:6
 instead of limit
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

A wayback-source job with since=2012, until=2024, snapshotsPerUrl=4 against a
popular site returned all four snapshots clustered in 2012-2013, not spread
across 2012-2024. CDX's `limit` truncates captures *before* we sample, so for
sites with thousands of captures the first 50 chronologically are all in the
start of the window.

Drop `limit`, use `collapse=timestamp:6` (one capture per month). Bounded by
window length, properly distributed.

Verified: stripe.com 2012-2024 with count=5 → 2012-02, 2015-03, 2018-03,
2021-02, 2024-01.
---
 .changeset/wayback-collapse-fix.md | 13 +++++++++++++
 src/discover/wayback.ts            |  7 ++++++-
 2 files changed, 19 insertions(+), 1 deletion(-)
 create mode 100644 .changeset/wayback-collapse-fix.md

diff --git a/.changeset/wayback-collapse-fix.md b/.changeset/wayback-collapse-fix.md
new file mode 100644
index 0000000..f5e21e5
--- /dev/null
+++ b/.changeset/wayback-collapse-fix.md
@@ -0,0 +1,13 @@
+---
+'@tangle-network/browser-agent-driver': patch
+---
+
+fix(discover/wayback): use CDX `collapse=timestamp:6` instead of `limit` so longitudinal jobs span the requested window
+
+Symptom: a job with `since: 2012-01-01, until: 2024-01-01, snapshotsPerUrl: 4` against a popular site returned four snapshots all clustered in 2012-2013 instead of evenly across 2012-2024.
+
+Cause: the CDX call passed `limit: max(count*4, 50)`, which caps how many captures CDX returns *before* `sampleEvenly` runs. For sites with thousands of captures (Stripe, Linear, GitHub, etc.) the first 50 in chronological order are all from the start of the window, so even sampling could only produce early-window snapshots.
+
+Fix: drop `limit`, use `collapse=timestamp:6` (one capture per month). The row count is now bounded by the window length in months, which keeps payloads sane while ensuring captures are spread across the whole window.
+
+Verified: `discoverWaybackSnapshots('https://stripe.com/', { count: 5, since: '2012-01-01', until: '2024-01-01' })` now returns snapshots at 2012-02, 2015-03, 2018-03, 2021-02, 2024-01.
diff --git a/src/discover/wayback.ts b/src/discover/wayback.ts
index d779690..ddf8ad7 100644
--- a/src/discover/wayback.ts
+++ b/src/discover/wayback.ts
@@ -111,7 +111,12 @@ export async function discoverWaybackSnapshots(url: string, opts: WaybackOptions
   const params = new URLSearchParams({
     url,
     output: 'json',
-    limit: String(Math.max(count * 4, 50)), // overcollect, then sample evenly
+    // collapse=timestamp:6 dedupes to one capture per month (yyyymm = 6 chars).
+    // Without this, CDX returns every capture in the window — which for popular
+    // sites is tens of thousands and silently skews `sampleEvenly` if combined
+    // with a `limit`. With the collapse, the row count is bounded by the
+    // window length in months, so we don't need a limit.
+    collapse: 'timestamp:6',
   })
   if (opts.since) params.set('from', isoToCdxStamp(opts.since))
   if (opts.until) params.set('to', isoToCdxStamp(opts.until, true))

From 3b3839d21fc1a48e559a201d5a3a02521b8870aa Mon Sep 17 00:00:00 2001
From: Drew Stone <drewstone329@gmail.com>
Date: Sun, 26 Apr 2026 04:14:40 -0600
Subject: [PATCH 2/2] feat(jobs+reports): brand-kit extraction at every target
 + brand-evolution template
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Adds AuditOptions.extractTokens. When true, every target in a comparative-audit
job runs the existing deterministic extractDesignTokens after the audit and
persists tokens.json to the per-target output dir. The runner threads
tokensPath through JobResultEntry.

src/reports/tokens.ts — aggregateTokens / diffTokens / groupByUrl. Pure
functions over on-disk data. Same contract as aggregate.ts.

renderBrandEvolution renders per-URL chronological evolution with
snapshot-to-snapshot deltas (colors added/removed, font family swaps,
brand-meta changes, library adoption).

AI SDK tools: fetchTokens, diffTokens. renderTemplate gains
template: 'brand-evolution'.

Verified on stripe.com 2014→2024: Whitney → Camphor → sohne-var
progression, primary color #008cdd → #6772e5 → #635bff, type-scale
grew 25 → 38 → 61 entries.

+12 tests. Total: 1460 passing.
---
 .changeset/brand-evolution.md |  19 ++++
 src/cli-jobs.ts               |  20 ++++-
 src/cli-reports.ts            |   5 +-
 src/jobs/queue.ts             |   2 +
 src/jobs/types.ts             |   8 ++
 src/reports/index.ts          |   4 +
 src/reports/templates.ts      |  68 ++++++++++++++
 src/reports/tokens.ts         | 132 +++++++++++++++++++++++++++
 src/reports/tools.ts          |  56 +++++++++++-
 src/reports/types.ts          |   2 +-
 tests/jobs-queue.test.ts      |  10 +++
 tests/reports-tokens.test.ts  | 163 ++++++++++++++++++++++++++++++++++
 tests/reports-tools.test.ts   |   2 +-
 13 files changed, 484 insertions(+), 7 deletions(-)
 create mode 100644 .changeset/brand-evolution.md
 create mode 100644 src/reports/tokens.ts
 create mode 100644 tests/reports-tokens.test.ts

diff --git a/.changeset/brand-evolution.md b/.changeset/brand-evolution.md
new file mode 100644
index 0000000..542dedb
--- /dev/null
+++ b/.changeset/brand-evolution.md
@@ -0,0 +1,19 @@
+---
+'@tangle-network/browser-agent-driver': minor
+---
+
+feat(jobs+reports): brand-kit / design-system extraction at every audit target
+
+Comparative-audit jobs can now extract the full deterministic design-token bundle (colors, font families, type scale, logos, font files, brand metadata, detected libraries) at every target — including every wayback snapshot. New `brand-evolution` report template renders a per-URL chronological view of palette and typography drift, with snapshot-to-snapshot deltas (colors added/removed, font family swaps, brand-meta changes, library adoption).
+
+**Spec:** add `audit.extractTokens: true` to a `JobSpec`. Each per-target output dir gets a `tokens.json` alongside `report.json`.
+
+**CLI:** `bad reports generate --template brand-evolution --job <id>`
+
+**AI SDK tools:** two new tools — `fetchTokens` (returns the per-target token summaries, optionally filtered to one URL's chronological series) and `diffTokens` (deterministic delta between two token summaries in the same job). `renderTemplate` now accepts `template: 'brand-evolution'`.
+
+The token extractor is the existing `extractDesignTokens` (no LLM, ~10s per target). Same deterministic-data / LLM-narrates contract as the rest of the reports surface — every callout in the brand-evolution report comes from a pure function of `tokens.json`.
+
+Verified end-to-end on `https://stripe.com/` 2014 → 2019 → 2024 wayback snapshots: pulled out the Whitney → Camphor → sohne-var typeface progression and the matching primary-color shifts (`#008cdd` → `#6772e5` → `#635bff`).
+
++12 new tests across `reports-tokens` and the queue/tools touch-ups. Total: 1460 passing.
diff --git a/src/cli-jobs.ts b/src/cli-jobs.ts
index ea5b6af..f2c0f96 100644
--- a/src/cli-jobs.ts
+++ b/src/cli-jobs.ts
@@ -152,7 +152,7 @@ async function cmdCreate(opts: ParsedArgs): Promise<void> {
  * we can deterministically locate `report.json` after the audit returns.
  */
 async function buildAuditFn(_spec: JobSpec): Promise<AuditFn> {
-  const { runDesignAudit } = await import('./cli-design-audit.js')
+  const { runDesignAudit, extractDesignTokens } = await import('./cli-design-audit.js')
   let counter = 0
   return async (target, opts) => {
     const url = target.snapshotUrl ?? target.url
@@ -186,11 +186,29 @@ async function buildAuditFn(_spec: JobSpec): Promise<AuditFn> {
     const page = data.pages?.[0]
     const rollupScore = page?.auditResultV2?.rollup?.score ?? page?.rollup?.score ?? page?.score
     const pageType = page?.auditResultV2?.classification?.type ?? page?.classification?.type
+
+    let tokensPath: string | undefined
+    if (opts?.extractTokens) {
+      try {
+        const tokensDir = path.join(outputDir, 'tokens')
+        const { tokens } = await extractDesignTokens({ url, headless: opts?.headless ?? true, outputDir: tokensDir })
+        tokensPath = path.resolve(tokensDir, 'tokens.json')
+        // extractDesignTokens persists its own files; ensure tokens.json exists at the canonical path.
+        if (!fs.existsSync(tokensPath)) {
+          fs.writeFileSync(tokensPath, JSON.stringify(tokens, null, 2))
+        }
+      } catch (err) {
+        // Token extraction is additive — never let it fail the parent audit.
+        console.warn(`  ${chalk.dim('tokens:')} extraction failed for ${url}: ${(err as Error).message}`)
+      }
+    }
+
     return {
       runId: outputDir, // The output dir is the de-facto runId for jobs.
       resultPath: reportJson,
       rollupScore,
       pageType,
+      tokensPath,
     }
   }
 }
diff --git a/src/cli-reports.ts b/src/cli-reports.ts
index 0902575..f1a53a6 100644
--- a/src/cli-reports.ts
+++ b/src/cli-reports.ts
@@ -23,6 +23,7 @@ import {
   renderLeaderboard,
   renderLongitudinal,
   renderBatchComparison,
+  renderBrandEvolution,
   renderJobHeader,
   narrateReport,
 } from './reports/index.js'
@@ -56,7 +57,7 @@ function parseArgs(argv: string[]): ReportArgs {
   return out
 }
 
-const TEMPLATES = new Set(['leaderboard', 'longitudinal', 'batch-comparison'])
+const TEMPLATES = new Set(['leaderboard', 'longitudinal', 'batch-comparison', 'brand-evolution'])
 
 export async function runReportsCli(args: string[]): Promise<void> {
   const sub = args[0]
@@ -76,6 +77,8 @@ export async function runReportsCli(args: string[]): Promise<void> {
     body = renderLeaderboard(rows, { topN: opts.top, byType: opts.byType, buckets: opts.buckets })
   } else if (opts.template === 'longitudinal') {
     body = renderLongitudinal(rows)
+  } else if (opts.template === 'brand-evolution') {
+    body = renderBrandEvolution(job)
   } else {
     body = renderBatchComparison(rows)
   }
diff --git a/src/jobs/queue.ts b/src/jobs/queue.ts
index 5633790..6359a5d 100644
--- a/src/jobs/queue.ts
+++ b/src/jobs/queue.ts
@@ -21,6 +21,7 @@ export interface AuditFn {
     rollupScore?: number
     pageType?: string
     costUSD?: number
+    tokensPath?: string
   }>
 }
 
@@ -87,6 +88,7 @@ async function runOne(
       rollupScore: out.rollupScore,
       pageType: out.pageType,
       costUSD: out.costUSD,
+      tokensPath: out.tokensPath,
     }
   } catch (err) {
     const error = err as Error
diff --git a/src/jobs/types.ts b/src/jobs/types.ts
index 9f02750..5446585 100644
--- a/src/jobs/types.ts
+++ b/src/jobs/types.ts
@@ -41,6 +41,12 @@ export interface AuditOptions {
   regulatoryContext?: RegulatoryContextTag[]
   headless?: boolean
   skipEthics?: boolean
+  /**
+   * Layer 8 add-on: also run the deterministic brand/design-token extractor at
+   * every target. Adds ~10s/target (no LLM). Output lands at
+   * `<resultPath dir>/tokens.json` and is surfaced via `JobResultEntry.tokensPath`.
+   */
+  extractTokens?: boolean
 }
 
 export interface JobSpec {
@@ -81,6 +87,8 @@ export interface JobResultEntry extends JobTarget {
   rollupScore?: number
   /** Page-type classification. */
   pageType?: string
+  /** Path to the tokens.json from the brand-kit extractor (when extractTokens=true). */
+  tokensPath?: string
 }
 
 export interface Job {
diff --git a/src/reports/index.ts b/src/reports/index.ts
index d85ec50..63632b0 100644
--- a/src/reports/index.ts
+++ b/src/reports/index.ts
@@ -15,13 +15,17 @@ export {
   renderLeaderboard,
   renderLongitudinal,
   renderBatchComparison,
+  renderBrandEvolution,
   renderJobHeader,
 } from './templates.js'
 export type {
   LeaderboardRenderOpts,
   LongitudinalRenderOpts,
   BatchComparisonRenderOpts,
+  BrandEvolutionRenderOpts,
 } from './templates.js'
+export { aggregateTokens, diffTokens, groupByUrl } from './tokens.js'
+export type { TokenSummary, TokenDiff, TokenSeries } from './tokens.js'
 export { buildReportTools } from './tools.js'
 export type { ReportToolsContext, ReportToolSet } from './tools.js'
 export { narrateReport } from './narrate.js'
diff --git a/src/reports/templates.ts b/src/reports/templates.ts
index 1f94a68..86a7d5d 100644
--- a/src/reports/templates.ts
+++ b/src/reports/templates.ts
@@ -9,6 +9,7 @@
 import type { Job } from '../jobs/types.js'
 import type { AggregateRow, LongitudinalRow } from './types.js'
 import { leaderboard, longitudinalFor, tierBuckets, compareRuns } from './aggregate.js'
+import { aggregateTokens, diffTokens, groupByUrl, type TokenSummary } from './tokens.js'
 
 export interface LeaderboardRenderOpts {
   title?: string
@@ -136,6 +137,73 @@ export function renderBatchComparison(rows: AggregateRow[], opts: BatchCompariso
   return lines.join('\n')
 }
 
+export interface BrandEvolutionRenderOpts {
+  title?: string
+  /** Max colors to render per snapshot (top by usage count). Default 6. */
+  topColors?: number
+}
+
+/**
+ * Render a per-URL brand-kit evolution: for each URL, list the snapshots in
+ * chronological order with their distinct color palette, font families, and
+ * detected libraries. Between consecutive snapshots, surface the delta
+ * (colors added/removed, families added/removed, brand-meta changes).
+ */
+export function renderBrandEvolution(job: Job, opts: BrandEvolutionRenderOpts = {}): string {
+  const summaries = aggregateTokens(job)
+  const lines: string[] = []
+  lines.push(`# ${opts.title ?? 'Brand & Design-System Evolution'}`)
+  lines.push('')
+  lines.push(`Generated: ${new Date().toISOString()}`)
+  lines.push('')
+
+  if (summaries.length === 0) {
+    lines.push(`_No tokens.json files were produced. Run the job with \`audit.extractTokens: true\` to enable._`)
+    return lines.join('\n')
+  }
+
+  const topColors = opts.topColors ?? 6
+  const series = groupByUrl(summaries)
+  for (const { url, snapshots } of series) {
+    lines.push(`## ${escapeMd(url)}`)
+    lines.push('')
+    for (let i = 0; i < snapshots.length; i++) {
+      const s = snapshots[i]
+      const captured = s.capturedAt ? s.capturedAt.slice(0, 10) : 'live'
+      lines.push(`### ${captured}`)
+      lines.push('')
+      const swatches = s.colors.slice(0, topColors).map(c => `\`${c.hex}\` ×${c.count}`).join(' · ')
+      lines.push(`**Top colors**: ${swatches || '_none extracted_'}`)
+      const families = s.fontFamilies.map(f => `${f.family} (${f.classification})`).join(', ')
+      lines.push(`**Font families**: ${families || '_none_'}`)
+      lines.push(`**Type-scale entries**: ${s.typeScaleEntries}`)
+      if (s.detectedLibraries.length > 0) lines.push(`**Detected libraries**: ${s.detectedLibraries.join(', ')}`)
+      if (s.brand?.themeColor) lines.push(`**Theme color**: \`${s.brand.themeColor}\``)
+      if (s.logos.length > 0) lines.push(`**Logos**: ${s.logos.length}`)
+      lines.push('')
+
+      // Snapshot-to-snapshot delta against the previous snapshot in the series.
+      const prev = snapshots[i - 1]
+      if (prev) {
+        const d = diffTokens(prev, s)
+        const callouts: string[] = []
+        if (d.colorsAdded.length > 0) callouts.push(`+${d.colorsAdded.length} new colors`)
+        if (d.colorsRemoved.length > 0) callouts.push(`−${d.colorsRemoved.length} removed`)
+        if (d.familiesAdded.length > 0) callouts.push(`+ ${d.familiesAdded.join(', ')}`)
+        if (d.familiesRemoved.length > 0) callouts.push(`− ${d.familiesRemoved.join(', ')}`)
+        if (d.librariesAdded.length > 0) callouts.push(`adopted ${d.librariesAdded.join(', ')}`)
+        if (d.librariesRemoved.length > 0) callouts.push(`dropped ${d.librariesRemoved.join(', ')}`)
+        if (d.brandChanges.length > 0) callouts.push(`brand meta: ${d.brandChanges.map(c => c.field).join(', ')} changed`)
+        if (callouts.length > 0) {
+          lines.push(`_Δ vs ${prev.capturedAt?.slice(0, 10) ?? 'previous'}: ${callouts.join(' · ')}_`)
+          lines.push('')
+        }
+      }
+    }
+  }
+  return lines.join('\n')
+}
+
 export function renderJobHeader(job: Job): string {
   const ok = job.results.filter(r => r.status === 'ok').length
   const fail = job.results.filter(r => r.status === 'failed').length
diff --git a/src/reports/tokens.ts b/src/reports/tokens.ts
new file mode 100644
index 0000000..be242f5
--- /dev/null
+++ b/src/reports/tokens.ts
@@ -0,0 +1,132 @@
+/**
+ * Brand-kit / design-token aggregation across a job's targets.
+ *
+ * Reads each per-target `tokens.json` (produced when AuditOptions.extractTokens
+ * was true) and projects to a flat row shape so longitudinal evolution and
+ * batch comparison templates can render without re-implementing extraction.
+ *
+ * No LLM. Pure function of on-disk data — same contract as aggregate.ts.
+ */
+
+import * as fs from 'node:fs'
+import type { Job } from '../jobs/types.js'
+import type { DesignTokens, ColorToken, FontFamily } from '../types.js'
+
+export interface TokenSummary {
+  /** Seed URL (groups snapshots of the same site). */
+  url: string
+  /** Snapshot URL when wayback. */
+  snapshotUrl?: string
+  /** ISO datetime of capture. */
+  capturedAt?: string
+  /** Per-target runId (== outputDir for jobs). */
+  runId: string
+  /** Resolved on-disk path to tokens.json. */
+  tokensPath?: string
+  /** Top-level brand metadata (title, theme color, favicon, og image). */
+  brand: DesignTokens['brand']
+  /** All distinct colors, sorted desc by usage count. */
+  colors: ColorToken[]
+  /** Distinct typography families with classification + weight set. */
+  fontFamilies: FontFamily[]
+  /** Type-scale entry count (proxy for typographic complexity). */
+  typeScaleEntries: number
+  /** Logo asset URLs (svg + raster). */
+  logos: string[]
+  /** Loaded font-file URLs. */
+  fontFiles: string[]
+  /** Detected libraries (e.g. ['tailwind','radix-ui']). */
+  detectedLibraries: string[]
+}
+
+/** Read each ok result's tokens.json and project to TokenSummary. */
+export function aggregateTokens(job: Job): TokenSummary[] {
+  const out: TokenSummary[] = []
+  for (const r of job.results) {
+    if (r.status !== 'ok' || !r.tokensPath || !fs.existsSync(r.tokensPath)) continue
+    try {
+      const tokens = JSON.parse(fs.readFileSync(r.tokensPath, 'utf-8')) as DesignTokens
+      out.push({
+        url: r.url,
+        snapshotUrl: r.snapshotUrl,
+        capturedAt: r.capturedAt,
+        runId: r.runId ?? '',
+        tokensPath: r.tokensPath,
+        brand: tokens.brand ?? {},
+        colors: (tokens.colors ?? []).slice().sort((a, b) => (b.count ?? 0) - (a.count ?? 0)),
+        fontFamilies: tokens.typography?.families ?? [],
+        typeScaleEntries: tokens.typography?.scale?.length ?? 0,
+        logos: (tokens.logos ?? []).map(l => l.src ?? '').filter(Boolean),
+        fontFiles: (tokens.fontFiles ?? []).map(f => f.src).filter(Boolean),
+        detectedLibraries: tokens.detectedLibraries ?? [],
+      })
+    } catch {
+      // Skip corrupted token files.
+    }
+  }
+  return out
+}
+
+/**
+ * Diff between two TokenSummary records. Useful for "this URL evolved from
+ * 4 colors → 12 colors and dropped Helvetica for Inter" callouts.
+ */
+export interface TokenDiff {
+  colorsAdded: string[]
+  colorsRemoved: string[]
+  colorsCommon: number
+  familiesAdded: string[]
+  familiesRemoved: string[]
+  brandChanges: Array<{ field: keyof DesignTokens['brand']; before: string | undefined; after: string | undefined }>
+  librariesAdded: string[]
+  librariesRemoved: string[]
+}
+
+export function diffTokens(a: TokenSummary, b: TokenSummary): TokenDiff {
+  const aHex = new Set(a.colors.map(c => c.hex.toLowerCase()))
+  const bHex = new Set(b.colors.map(c => c.hex.toLowerCase()))
+  const colorsAdded = [...bHex].filter(h => !aHex.has(h))
+  const colorsRemoved = [...aHex].filter(h => !bHex.has(h))
+  const colorsCommon = [...aHex].filter(h => bHex.has(h)).length
+
+  const aFam = new Set(a.fontFamilies.map(f => f.family))
+  const bFam = new Set(b.fontFamilies.map(f => f.family))
+  const familiesAdded = [...bFam].filter(f => !aFam.has(f))
+  const familiesRemoved = [...aFam].filter(f => !bFam.has(f))
+
+  const brandFields: Array<keyof DesignTokens['brand']> = ['title', 'description', 'themeColor', 'favicon', 'ogImage']
+  const brandChanges = brandFields
+    .filter(f => (a.brand?.[f] ?? '') !== (b.brand?.[f] ?? ''))
+    .map(f => ({ field: f, before: a.brand?.[f], after: b.brand?.[f] }))
+
+  const aLib = new Set(a.detectedLibraries)
+  const bLib = new Set(b.detectedLibraries)
+  const librariesAdded = [...bLib].filter(l => !aLib.has(l))
+  const librariesRemoved = [...aLib].filter(l => !bLib.has(l))
+
+  return { colorsAdded, colorsRemoved, colorsCommon, familiesAdded, familiesRemoved, brandChanges, librariesAdded, librariesRemoved }
+}
+
+/**
+ * Group token summaries by URL and return a chronological evolution series.
+ * Returns one entry per URL; each carries the sequence of TokenSummary rows
+ * sorted by capturedAt (or insertion order when capturedAt is missing).
+ */
+export interface TokenSeries {
+  url: string
+  snapshots: TokenSummary[]
+}
+
+export function groupByUrl(summaries: TokenSummary[]): TokenSeries[] {
+  const map = new Map<string, TokenSummary[]>()
+  for (const s of summaries) {
+    if (!map.has(s.url)) map.set(s.url, [])
+    map.get(s.url)!.push(s)
+  }
+  const out: TokenSeries[] = []
+  for (const [url, snapshots] of map.entries()) {
+    snapshots.sort((a, b) => (a.capturedAt ?? '').localeCompare(b.capturedAt ?? ''))
+    out.push({ url, snapshots })
+  }
+  return out
+}
diff --git a/src/reports/tools.ts b/src/reports/tools.ts
index ad1f86b..fb92ddf 100644
--- a/src/reports/tools.ts
+++ b/src/reports/tools.ts
@@ -15,7 +15,8 @@ import { tool, jsonSchema } from 'ai'
 import * as fs from 'node:fs'
 import { loadJob } from '../jobs/store.js'
 import { aggregateJob, leaderboard, longitudinalFor, compareRuns, tierBuckets } from './aggregate.js'
-import { renderLeaderboard, renderLongitudinal, renderBatchComparison } from './templates.js'
+import { aggregateTokens, diffTokens as diffTokensFn, groupByUrl } from './tokens.js'
+import { renderLeaderboard, renderLongitudinal, renderBatchComparison, renderBrandEvolution } from './templates.js'
 import type { AggregateRow } from './types.js'
 
 export interface ReportToolsContext {
@@ -128,7 +129,7 @@ export function buildReportTools(ctx: ReportToolsContext = {}) {
       description: 'Render a deterministic markdown report from a job. Use this when the user wants a shareable artifact, not a free-form answer.',
       inputSchema: jsonSchema<{
         jobId: string
-        template: 'leaderboard' | 'longitudinal' | 'batch-comparison'
+        template: 'leaderboard' | 'longitudinal' | 'batch-comparison' | 'brand-evolution'
         title?: string
         topN?: number
         byType?: string
@@ -137,7 +138,7 @@ export function buildReportTools(ctx: ReportToolsContext = {}) {
         type: 'object',
         properties: {
           jobId: { type: 'string' },
-          template: { type: 'string', enum: ['leaderboard', 'longitudinal', 'batch-comparison'] },
+          template: { type: 'string', enum: ['leaderboard', 'longitudinal', 'batch-comparison', 'brand-evolution'] },
           title: { type: 'string' },
           topN: { type: 'integer', minimum: 1 },
           byType: { type: 'string' },
@@ -146,6 +147,11 @@ export function buildReportTools(ctx: ReportToolsContext = {}) {
         required: ['jobId', 'template'],
       }),
       execute: async ({ jobId, template, title, topN, byType, buckets }) => {
+        if (template === 'brand-evolution') {
+          const job = loadJob(jobId, ctx.jobsDir)
+          if (!job) throw new Error(`job not found: ${jobId}`)
+          return { markdown: renderBrandEvolution(job, { title }) }
+        }
         const rows = rowsForJob(jobId, ctx.jobsDir)
         if (template === 'leaderboard') return { markdown: renderLeaderboard(rows, { title, topN, byType, buckets }) }
         if (template === 'longitudinal') return { markdown: renderLongitudinal(rows, { title }) }
@@ -153,6 +159,50 @@ export function buildReportTools(ctx: ReportToolsContext = {}) {
       },
     }),
 
+    fetchTokens: tool({
+      description: 'Return aggregated brand-kit token summaries (colors, fonts, libraries, brand metadata) for every audited target that had token extraction enabled. Use for "what colors does X use" or "how has the design system evolved" questions.',
+      inputSchema: jsonSchema<{ jobId: string; url?: string }>({
+        type: 'object',
+        properties: {
+          jobId: { type: 'string' },
+          url: { type: 'string', description: 'Filter to one URL (returns the chronological series).' },
+        },
+        required: ['jobId'],
+      }),
+      execute: async ({ jobId, url }) => {
+        const job = loadJob(jobId, ctx.jobsDir)
+        if (!job) throw new Error(`job not found: ${jobId}`)
+        const summaries = aggregateTokens(job)
+        if (url) {
+          const series = groupByUrl(summaries).find(s => s.url === url)
+          return series ? series.snapshots : []
+        }
+        return summaries
+      },
+    }),
+
+    diffTokens: tool({
+      description: 'Compute the token delta (colors added/removed, font families added/removed, brand-meta changes, library swaps) between two audited targets in the same job.',
+      inputSchema: jsonSchema<{ jobId: string; runIdA: string; runIdB: string }>({
+        type: 'object',
+        properties: {
+          jobId: { type: 'string' },
+          runIdA: { type: 'string' },
+          runIdB: { type: 'string' },
+        },
+        required: ['jobId', 'runIdA', 'runIdB'],
+      }),
+      execute: async ({ jobId, runIdA, runIdB }) => {
+        const job = loadJob(jobId, ctx.jobsDir)
+        if (!job) throw new Error(`job not found: ${jobId}`)
+        const summaries = aggregateTokens(job)
+        const a = summaries.find(s => s.runId === runIdA)
+        const b = summaries.find(s => s.runId === runIdB)
+        if (!a || !b) throw new Error(`token summary not found for ${!a ? runIdA : runIdB} — was extractTokens enabled?`)
+        return diffTokensFn(a, b)
+      },
+    }),
+
     runFreshAudit: tool({
       description: 'Kick off a NEW single-page audit when the agent needs current data not in the job. Cost-bearing. Use sparingly.',
       inputSchema: jsonSchema<{ url: string }>({
diff --git a/src/reports/types.ts b/src/reports/types.ts
index 3465aac..2972633 100644
--- a/src/reports/types.ts
+++ b/src/reports/types.ts
@@ -53,4 +53,4 @@ export interface LongitudinalRow {
   pageType?: string
 }
 
-export type ReportTemplate = 'leaderboard' | 'longitudinal' | 'batch-comparison'
+export type ReportTemplate = 'leaderboard' | 'longitudinal' | 'batch-comparison' | 'brand-evolution'
diff --git a/tests/jobs-queue.test.ts b/tests/jobs-queue.test.ts
index d7a9dac..fd1aec7 100644
--- a/tests/jobs-queue.test.ts
+++ b/tests/jobs-queue.test.ts
@@ -17,6 +17,16 @@ describe('runJob', () => {
   let dir: string
   afterEach(() => { if (dir) rmSync(dir, { recursive: true, force: true }) })
 
+  it('persists tokensPath when auditFn returns one', async () => {
+    dir = mkdtempSync(join(tmpdir(), 'bad-q-'))
+    const job = createJob(SPEC, [{ url: 'https://a.test' }], dir)
+    const auditFn: AuditFn = async () => ({
+      runId: 'run-a', resultPath: '/tmp/x/report.json', rollupScore: 7, tokensPath: '/tmp/x/tokens.json',
+    })
+    const final = await runJob(job, { auditFn, dir })
+    expect(final.results[0].tokensPath).toBe('/tmp/x/tokens.json')
+  })
+
   it('runs every target and marks the job completed when all succeed', async () => {
     dir = mkdtempSync(join(tmpdir(), 'bad-q-'))
     const job = createJob(SPEC, SPEC.discover.urls.map(url => ({ url })), dir)
diff --git a/tests/reports-tokens.test.ts b/tests/reports-tokens.test.ts
new file mode 100644
index 0000000..1932cab
--- /dev/null
+++ b/tests/reports-tokens.test.ts
@@ -0,0 +1,163 @@
+import { describe, it, expect, afterEach } from 'vitest'
+import { mkdtempSync, rmSync, writeFileSync, mkdirSync } from 'node:fs'
+import { tmpdir } from 'node:os'
+import { join } from 'node:path'
+import { aggregateTokens, diffTokens, groupByUrl } from '../src/reports/tokens.js'
+import { renderBrandEvolution } from '../src/reports/templates.js'
+import type { Job } from '../src/jobs/types.js'
+import type { DesignTokens } from '../src/types.js'
+
+function writeTokens(dir: string, name: string, tokens: Partial<DesignTokens>): string {
+  const p = join(dir, `${name}.json`)
+  writeFileSync(p, JSON.stringify({
+    url: tokens.url ?? 'https://x/',
+    extractedAt: new Date().toISOString(),
+    viewportsAudited: ['1440'],
+    customProperties: {},
+    colors: [], typography: { families: [], scale: [] },
+    brand: {}, logos: [], icons: [], fontFiles: [], images: [], videos: [],
+    stylesheets: [], responsive: {}, detectedLibraries: [],
+    ...tokens,
+  }))
+  return p
+}
+
+function makeJob(results: Job['results']): Job {
+  return {
+    jobId: 'tj',
+    spec: { kind: 'comparative-audit', discover: { source: 'wayback', urls: ['https://x/'] } },
+    status: 'completed',
+    createdAt: new Date().toISOString(),
+    targets: results.map(r => ({ url: r.url, snapshotUrl: r.snapshotUrl, capturedAt: r.capturedAt })),
+    results,
+    totalCostUSD: 0,
+  }
+}
+
+describe('aggregateTokens', () => {
+  let dir: string
+  afterEach(() => { if (dir) rmSync(dir, { recursive: true, force: true }) })
+
+  it('reads each ok result\'s tokens.json and projects to TokenSummary', () => {
+    dir = mkdtempSync(join(tmpdir(), 'bad-tok-'))
+    const t1 = writeTokens(dir, 't1', {
+      colors: [{ value: 'rgb(0,113,227)', hex: '#0071e3', count: 50, properties: ['color'] }],
+      typography: { families: [{ family: 'Inter', weights: [400, 600], classification: 'body' }], scale: [{ fontSize: '16px', fontWeight: '400', lineHeight: '24px', letterSpacing: '0', fontFamily: 'Inter', tag: 'body', count: 12 }] },
+      brand: { themeColor: '#0071e3', title: 'Stripe' },
+      detectedLibraries: ['tailwind'],
+    })
+    const job = makeJob([
+      { url: 'https://stripe.com/', status: 'ok', runId: 'run-1', tokensPath: t1 },
+      { url: 'https://stripe.com/', status: 'failed', error: 'x' },
+    ])
+    const summaries = aggregateTokens(job)
+    expect(summaries).toHaveLength(1)
+    expect(summaries[0].colors[0].hex).toBe('#0071e3')
+    expect(summaries[0].fontFamilies[0].family).toBe('Inter')
+    expect(summaries[0].brand.themeColor).toBe('#0071e3')
+    expect(summaries[0].typeScaleEntries).toBe(1)
+    expect(summaries[0].detectedLibraries).toEqual(['tailwind'])
+  })
+
+  it('skips results without tokensPath', () => {
+    const job = makeJob([{ url: 'https://x/', status: 'ok', runId: 'r' }])
+    expect(aggregateTokens(job)).toEqual([])
+  })
+
+  it('skips corrupt token files', () => {
+    dir = mkdtempSync(join(tmpdir(), 'bad-tok-'))
+    const bad = join(dir, 'bad.json')
+    writeFileSync(bad, '{ not valid json')
+    const job = makeJob([{ url: 'https://x/', status: 'ok', runId: 'r', tokensPath: bad }])
+    expect(aggregateTokens(job)).toEqual([])
+  })
+
+  it('sorts colors by usage count descending', () => {
+    dir = mkdtempSync(join(tmpdir(), 'bad-tok-'))
+    const t = writeTokens(dir, 't', {
+      colors: [
+        { value: 'a', hex: '#aaa', count: 10, properties: [] },
+        { value: 'b', hex: '#bbb', count: 100, properties: [] },
+        { value: 'c', hex: '#ccc', count: 50, properties: [] },
+      ],
+    })
+    const summaries = aggregateTokens(makeJob([{ url: 'x', status: 'ok', runId: 'r', tokensPath: t }]))
+    expect(summaries[0].colors.map(c => c.hex)).toEqual(['#bbb', '#ccc', '#aaa'])
+  })
+})
+
+describe('diffTokens', () => {
+  it('produces colorsAdded / colorsRemoved / colorsCommon', () => {
+    const a = { url: 'x', runId: '1', brand: {}, fontFamilies: [], colors: [{ value: '', hex: '#aaa', count: 1, properties: [] }, { value: '', hex: '#bbb', count: 1, properties: [] }], typeScaleEntries: 0, logos: [], fontFiles: [], detectedLibraries: [] }
+    const b = { url: 'x', runId: '2', brand: {}, fontFamilies: [], colors: [{ value: '', hex: '#bbb', count: 1, properties: [] }, { value: '', hex: '#ccc', count: 1, properties: [] }], typeScaleEntries: 0, logos: [], fontFiles: [], detectedLibraries: [] }
+    const d = diffTokens(a, b)
+    expect(d.colorsAdded).toEqual(['#ccc'])
+    expect(d.colorsRemoved).toEqual(['#aaa'])
+    expect(d.colorsCommon).toBe(1)
+  })
+
+  it('detects font family swaps', () => {
+    const a = { url: 'x', runId: '1', brand: {}, fontFamilies: [{ family: 'Helvetica', weights: [400], classification: 'body' as const }], colors: [], typeScaleEntries: 0, logos: [], fontFiles: [], detectedLibraries: [] }
+    const b = { url: 'x', runId: '2', brand: {}, fontFamilies: [{ family: 'Inter', weights: [400], classification: 'body' as const }], colors: [], typeScaleEntries: 0, logos: [], fontFiles: [], detectedLibraries: [] }
+    const d = diffTokens(a, b)
+    expect(d.familiesAdded).toEqual(['Inter'])
+    expect(d.familiesRemoved).toEqual(['Helvetica'])
+  })
+
+  it('flags brand metadata changes', () => {
+    const a = { url: 'x', runId: '1', brand: { themeColor: '#000', title: 'Old' }, fontFamilies: [], colors: [], typeScaleEntries: 0, logos: [], fontFiles: [], detectedLibraries: [] }
+    const b = { url: 'x', runId: '2', brand: { themeColor: '#fff', title: 'New' }, fontFamilies: [], colors: [], typeScaleEntries: 0, logos: [], fontFiles: [], detectedLibraries: [] }
+    const d = diffTokens(a, b)
+    expect(d.brandChanges.map(c => c.field).sort()).toEqual(['themeColor', 'title'])
+  })
+
+  it('detects library adoption / drop', () => {
+    const a = { url: 'x', runId: '1', brand: {}, fontFamilies: [], colors: [], typeScaleEntries: 0, logos: [], fontFiles: [], detectedLibraries: ['bootstrap'] }
+    const b = { url: 'x', runId: '2', brand: {}, fontFamilies: [], colors: [], typeScaleEntries: 0, logos: [], fontFiles: [], detectedLibraries: ['tailwind'] }
+    const d = diffTokens(a, b)
+    expect(d.librariesAdded).toEqual(['tailwind'])
+    expect(d.librariesRemoved).toEqual(['bootstrap'])
+  })
+})
+
+describe('groupByUrl', () => {
+  it('groups summaries by URL and sorts by capturedAt', () => {
+    const summaries = [
+      { url: 'a', runId: '1', capturedAt: '2020-01-01T00:00:00Z', brand: {}, fontFamilies: [], colors: [], typeScaleEntries: 0, logos: [], fontFiles: [], detectedLibraries: [] },
+      { url: 'a', runId: '2', capturedAt: '2010-01-01T00:00:00Z', brand: {}, fontFamilies: [], colors: [], typeScaleEntries: 0, logos: [], fontFiles: [], detectedLibraries: [] },
+      { url: 'b', runId: '3', capturedAt: '2024-01-01T00:00:00Z', brand: {}, fontFamilies: [], colors: [], typeScaleEntries: 0, logos: [], fontFiles: [], detectedLibraries: [] },
+    ]
+    const series = groupByUrl(summaries)
+    const a = series.find(s => s.url === 'a')!
+    expect(a.snapshots[0].capturedAt).toBe('2010-01-01T00:00:00Z')
+    expect(a.snapshots[1].capturedAt).toBe('2020-01-01T00:00:00Z')
+  })
+})
+
+describe('renderBrandEvolution', () => {
+  let dir: string
+  afterEach(() => { if (dir) rmSync(dir, { recursive: true, force: true }) })
+
+  it('returns a placeholder when no tokens are in the job', () => {
+    const job = makeJob([{ url: 'https://x/', status: 'ok', runId: 'r' }])
+    const md = renderBrandEvolution(job)
+    expect(md).toMatch(/No tokens.json files were produced/)
+  })
+
+  it('renders one section per URL with snapshots in order', () => {
+    dir = mkdtempSync(join(tmpdir(), 'bad-tok-'))
+    const t1 = writeTokens(dir, 't1', { colors: [{ value: '', hex: '#000', count: 1, properties: [] }] })
+    const t2 = writeTokens(dir, 't2', { colors: [{ value: '', hex: '#fff', count: 1, properties: [] }] })
+    const job = makeJob([
+      { url: 'https://x/', status: 'ok', runId: 'r1', tokensPath: t1, capturedAt: '2010-01-01T00:00:00Z' },
+      { url: 'https://x/', status: 'ok', runId: 'r2', tokensPath: t2, capturedAt: '2020-01-01T00:00:00Z' },
+    ])
+    const md = renderBrandEvolution(job)
+    expect(md).toMatch(/## https:\/\/x\//)
+    expect(md).toMatch(/### 2010-01-01/)
+    expect(md).toMatch(/### 2020-01-01/)
+    expect(md).toMatch(/Δ vs 2010-01-01/)
+    expect(md).toMatch(/\+1 new colors/)
+    expect(md).toMatch(/−1 removed/)
+  })
+})
diff --git a/tests/reports-tools.test.ts b/tests/reports-tools.test.ts
index d011337..54ddf96 100644
--- a/tests/reports-tools.test.ts
+++ b/tests/reports-tools.test.ts
@@ -71,7 +71,7 @@ describe('buildReportTools', () => {
   it('exposes the documented tool surface', () => {
     const tools = buildReportTools()
     const names = Object.keys(tools).sort()
-    expect(names).toEqual(['compareRuns', 'fetchAudit', 'longitudinal', 'queryJob', 'renderTemplate', 'runFreshAudit', 'tierBuckets'].sort())
+    expect(names).toEqual(['compareRuns', 'diffTokens', 'fetchAudit', 'fetchTokens', 'longitudinal', 'queryJob', 'renderTemplate', 'runFreshAudit', 'tierBuckets'].sort())
   })
 
   it('queryJob returns ranked rows', async () => {