diff --git a/apps/insights/src/generation.ts b/apps/insights/src/generation.ts index 198c2575f..427115689 100644 --- a/apps/insights/src/generation.ts +++ b/apps/insights/src/generation.ts @@ -49,7 +49,7 @@ import { buildSystemPrompt, fetchDismissedPatterns, fetchRecentAnnotations, - fetchRecentInsightsForPrompt, + fetchInsightHistory, formatOrgWebsitesContext, type OrgWebsiteRow, } from "./prompts"; @@ -295,10 +295,10 @@ async function analyzeWebsite(params: { const investigationMode = enrichedSignals.length > 0; - const [annotationContext, recentInsightsBlock, siteContext, dismissedBlock] = + const [annotationContext, historyBlock, siteContext, dismissedBlock] = await Promise.all([ fetchRecentAnnotations(params.websiteId, params.config), - fetchRecentInsightsForPrompt( + fetchInsightHistory( params.organizationId, params.websiteId, params.config @@ -321,14 +321,14 @@ async function analyzeWebsite(params: { githubRepo: params.githubRepo, period: params.period, timezone: params.config.timezone, - recentInsightsBlock, + historyBlock, annotationContext, dismissedBlock, orgContext, siteContext: siteBlock, }) : `Analyze ${params.domain} (${currentRange.from} to ${currentRange.to} vs ${previousRange.from} to ${previousRange.to}, ${params.config.timezone}). Use web_metrics with period="both" to compare periods efficiently.${siteBlock} -${orgContext}${annotationContext}${recentInsightsBlock}${dismissedBlock}`; +${orgContext}${annotationContext}${historyBlock}${dismissedBlock}`; const { tools: analyticsTools } = createInsightsAgentTools({ websiteId: params.websiteId, diff --git a/apps/insights/src/prompts.ts b/apps/insights/src/prompts.ts index ceb7c309c..23d1a3458 100644 --- a/apps/insights/src/prompts.ts +++ b/apps/insights/src/prompts.ts @@ -86,7 +86,7 @@ export async function fetchDismissedPatterns( return `\n\nInsights users marked as NOT helpful (avoid similar narratives):\n${lines.join("\n")}`; } -export async function fetchRecentInsightsForPrompt( +export async function fetchInsightHistory( organizationId: string, websiteId: string, config: InsightGenerationConfigSnapshot @@ -95,8 +95,14 @@ export async function fetchRecentInsightsForPrompt( const rows = await db .select({ title: analyticsInsights.title, + description: analyticsInsights.description, type: analyticsInsights.type, + severity: analyticsInsights.severity, + rootCause: analyticsInsights.rootCause, + changePercent: analyticsInsights.changePercent, + subjectKey: analyticsInsights.subjectKey, createdAt: analyticsInsights.createdAt, + runId: analyticsInsights.runId, }) .from(analyticsInsights) .where( @@ -113,12 +119,43 @@ export async function fetchRecentInsightsForPrompt( return ""; } - const lines = rows.map( - (row) => - `- [${row.type}] ${row.title} (${dayjs(row.createdAt).format("YYYY-MM-DD")})` - ); + const subjectCounts = new Map(); + for (const row of rows) { + subjectCounts.set( + row.subjectKey, + (subjectCounts.get(row.subjectKey) ?? 0) + 1 + ); + } + + const seen = new Set(); + const lines: string[] = []; + for (const row of rows) { + if (seen.has(row.subjectKey)) { + continue; + } + seen.add(row.subjectKey); + + const date = dayjs(row.createdAt).format("YYYY-MM-DD"); + const recurrence = subjectCounts.get(row.subjectKey) ?? 1; + const recurring = recurrence > 1 ? ` (reported ${recurrence}x)` : ""; + const change = + row.changePercent === null + ? "" + : ` ${row.changePercent > 0 ? "+" : ""}${Math.round(row.changePercent)}%`; + + lines.push( + `- [${row.severity}] ${row.title}${change}${recurring} (${date})` + ); + if (row.description) { + lines.push(` ${row.description.slice(0, 150)}`); + } + if (row.rootCause) { + lines.push(` Cause: ${row.rootCause.slice(0, 100)}`); + } + } - return `\n\nRecently reported (avoid repeating unless materially changed):\n${lines.join("\n")}`; + return `\n\nPrevious findings for this site (compare against current data — note what resolved, worsened, or persists): +${lines.join("\n")}`; } export interface OrgWebsiteRow { @@ -252,7 +289,7 @@ export function buildInvestigationPrompt( githubRepo?: { owner: string; repo: string }; orgContext: string; period: WeekOverWeekPeriod; - recentInsightsBlock: string; + historyBlock: string; siteContext: string; timezone: string; } @@ -285,5 +322,5 @@ ${githubInstruction} 8. Emit findings via emit_insight as you go. summary_metrics is the canonical source for headline numbers. -${params.orgContext}${params.annotationContext}${params.recentInsightsBlock}${params.dismissedBlock}`; +${params.orgContext}${params.annotationContext}${params.historyBlock}${params.dismissedBlock}`; } diff --git a/packages/evals/ui/index.html b/packages/evals/ui/index.html index fb2e6efb6..38dd3c06a 100644 --- a/packages/evals/ui/index.html +++ b/packages/evals/ui/index.html @@ -794,16 +794,16 @@

Latest model board

const id = escapeHtml(c.id); const cost = (c.metrics?.costUsd || 0) + (c.metrics?.judgeCostUsd || 0); return ` -
${id}
- ${escapeHtml(c.category || "case")} - ${c.passed ? "Pass" : "Fail"} - ${c.scores?.tool_routing ?? "--"} - ${c.scores?.quality ?? "--"} - ${((c.metrics?.latencyMs || 0) / 1000).toFixed(1)}s - ${c.metrics?.steps ?? "--"} - ${money(cost)} - -
${detail(c)}
`; +
${id}
+ ${escapeHtml(c.category || "case")} + ${c.passed ? "Pass" : "Fail"} + ${c.scores?.tool_routing ?? "--"} + ${c.scores?.quality ?? "--"} + ${((c.metrics?.latencyMs || 0) / 1000).toFixed(1)}s + ${c.metrics?.steps ?? "--"} + ${money(cost)} + +
${detail(c)}
`; } function detail(c) { @@ -815,7 +815,7 @@

Latest model board

.map((t) => `${escapeHtml(t)}`) .join("") || 'No tools called'; return `

Response

${escapeHtml(c.response || "No response captured.")}
-

Failures

    ${failures}

Tools

${tools}
`; +

Failures

    ${failures}

Tools

${tools}
`; } function toggle(id) {