diff --git a/.agents/skills/databuddy-internal/SKILL.md b/.agents/skills/databuddy-internal/SKILL.md index 38dc9779f..121a2fec4 100644 --- a/.agents/skills/databuddy-internal/SKILL.md +++ b/.agents/skills/databuddy-internal/SKILL.md @@ -21,6 +21,7 @@ Keep additions **minimal**: one bullet, a new `rg` hint, or a routing note—eno - Never use production/customer data as tests, fixtures, snapshots, examples, or copied output. Tests must use placeholders/mocks only (example.com, example IDs). If production ClickHouse is queried for investigation, summarize anonymized aggregates and do not paste customer domains, client IDs, emails, or other identifiers into code or responses. - `apps/dashboard`: Next.js app on port `3000` (per-website **agent** chat: `@ai-sdk/react` `useChat` via `contexts/chat-context.tsx` — not the separate `chat-sdk` package; overlapping sends while streaming are queued client-side to mirror a “queue latest” strategy.) - Dashboard Playwright webServer commands run under CI PATH from setup-bun; avoid `bash -lc` because login shells can drop Bun from PATH. Build dist-only workspace packages such as `@databuddy/sdk` and `@databuddy/devtools` before starting the API/dashboard. Client `NEXT_PUBLIC_*` flags must use direct env access so Next can inline them. `readBooleanEnv` only treats the literal string `"true"` as enabled, so CI E2E booleans must use `"true"`/`"false"`, not `"1"`/`"0"`. +- Local E2E dashboard smokes that need `/api/test/e2e/*` should start the API/dashboard directly (or through Playwright's webServer command), not via `bun run dev:dashboard`; Turbo runs in strict env mode and drops `DATABUDDY_E2E_MODE`/`DATABUDDY_E2E_TEST_KEY` unless they are added to `turbo.json` `globalEnv`. - Dashboard Playwright public/demo analytics specs call API `/v1/query` anonymously from the browser; keep `DATABUDDY_E2E_MODE` query behavior isolated from production rate limits so CI retries do not exhaust `anon:unknown`. - `apps/api`: Elysia API on port `3001` - `apps/slack`: Slack agent adapter; Slack installs must resolve through org-scoped DB integration records, not a single env bot token/default website. Agent calls must use an encrypted per-integration Databuddy API key secret as a normal bearer token, never a global internal secret. diff --git a/apps/api/package.json b/apps/api/package.json index 7a489d0fd..ae2578b9e 100644 --- a/apps/api/package.json +++ b/apps/api/package.json @@ -34,7 +34,7 @@ "@orpc/openapi": "^1.14.0", "@orpc/server": "^1.14.0", "@orpc/zod": "^1.14.0", - "ai": "^6.0.154", + "ai": "^6.0.188", "autumn-js": "catalog:", "bullmq": "^5.66.5", "dayjs": "^1.11.19", diff --git a/apps/api/src/routes/agent.ts b/apps/api/src/routes/agent.ts index 999c03d23..5e6082305 100644 --- a/apps/api/src/routes/agent.ts +++ b/apps/api/src/routes/agent.ts @@ -804,6 +804,7 @@ export const agent = new Elysia({ prefix: "/v1/agent" }) const config = createAgentConfig( { userId, + organizationId: organizationId ?? undefined, websiteId: body.websiteId, websiteDomain: domain, timezone, diff --git a/apps/dashboard/app/(main)/insights/_components/insight-card.tsx b/apps/dashboard/app/(main)/insights/_components/insight-card.tsx index 784de8a5d..02d29c194 100644 --- a/apps/dashboard/app/(main)/insights/_components/insight-card.tsx +++ b/apps/dashboard/app/(main)/insights/_components/insight-card.tsx @@ -22,7 +22,7 @@ import { changePercentChipClassName, formatSignedChangePercent, } from "@/lib/insight-signal-key"; -import type { Insight, InsightType } from "@/lib/insight-types"; +import type { Insight, InsightAction, InsightType } from "@/lib/insight-types"; import { cn } from "@/lib/utils"; import { ArrowRightIcon, @@ -375,6 +375,52 @@ function InsightCardPanel({ ); } +const ACTION_ICONS: Record = { + fix_goal: , + create_funnel: , + add_custom_event: , + create_annotation: ( + + ), + update_config: , + add_tracking: , + investigate_further: , + code_fix: , +}; + +function InsightActionPill({ action }: { action: InsightAction }) { + const handleClick = async () => { + if ( + (action.type === "code_fix" || action.type === "investigate_further") && + action.params.prompt + ) { + try { + await navigator.clipboard.writeText(action.params.prompt); + toast.success( + action.type === "code_fix" + ? "Copied to clipboard -- paste in Cursor or Claude Code" + : "Copied investigation prompt" + ); + } catch { + toast.error("Could not copy to clipboard"); + } + return; + } + toast.info(`${action.label}`); + }; + + return ( + + ); +} + function InsightCopy({ view }: { view: InsightCardViewModel }) { return ( <> @@ -387,6 +433,38 @@ function InsightCopy({ view }: { view: InsightCardViewModel }) {

+ {view.rootCause && ( +
+

+ Root cause +

+

+ {view.rootCause} +

+
+ )} + + {view.investigationEvidence.length > 0 && ( +
+

+ Evidence +

+
    + {view.investigationEvidence.map((e, i) => ( +
  • + + • + + {e.description} +
  • + ))} +
+
+ )} + {view.nextStep && (
@@ -401,23 +479,30 @@ function InsightCopy({ view }: { view: InsightCardViewModel }) {

{view.nextStep}

+ {view.actions.length > 0 && ( +
+ {view.actions.map((action, i) => ( + + ))} +
+ )}
)} ); } -function InsightEvidence({ view }: { view: InsightCardViewModel }) { - if (view.evidence.length === 0) { +function InsightMetricsSection({ view }: { view: InsightCardViewModel }) { + if (view.metrics.length === 0) { return null; } return (

- Evidence + Metrics

- +
); } @@ -645,7 +730,7 @@ export function InsightCard({ - {!isCompact && } + {!isCompact && } { expect(view.headline).toBe("Interactions got slower"); expect(view.metaLabel).toBe("Marketing"); expect(view.primaryActionLabel).toBe("Review speed"); - expect(view.evidence[0]?.label).toBe("Interaction delay"); + expect(view.metrics[0]?.label).toBe("Interaction delay"); }); it("falls back to domain and default action when needed", () => { @@ -41,4 +41,28 @@ describe("insight card view model", () => { expect(view.metaLabel).toBe("databuddy.cc"); expect(view.primaryActionLabel).toBe("Open analytics"); }); + + it("keeps investigation evidence separate from metric evidence", () => { + const view = toInsightCardViewModel({ + ...baseInsight, + rootCause: "The homepage script bundle delayed hydration.", + evidence: [ + { + description: "LCP moved after the new checkout banner shipped.", + type: "deploy_correlation", + }, + ], + }); + + expect(view.rootCause).toBe( + "The homepage script bundle delayed hydration." + ); + expect(view.investigationEvidence).toEqual([ + { + description: "LCP moved after the new checkout banner shipped.", + type: "deploy_correlation", + }, + ]); + expect(view.metrics[0]?.label).toBe("Interaction delay"); + }); }); diff --git a/apps/dashboard/app/(main)/insights/lib/insight-card-view-model.ts b/apps/dashboard/app/(main)/insights/lib/insight-card-view-model.ts index 0222a5c40..c1209eb10 100644 --- a/apps/dashboard/app/(main)/insights/lib/insight-card-view-model.ts +++ b/apps/dashboard/app/(main)/insights/lib/insight-card-view-model.ts @@ -1,4 +1,10 @@ -import type { Insight, InsightMetric, InsightType } from "@/lib/insight-types"; +import type { + Insight, + InsightAction, + InsightEvidence, + InsightMetric, + InsightType, +} from "@/lib/insight-types"; const DEFAULT_PRIMARY_ACTION_LABEL = "Open analytics"; @@ -25,22 +31,28 @@ const PRIMARY_ACTION_LABELS: Partial> = { }; export interface InsightCardViewModel { - evidence: InsightMetric[]; + actions: InsightAction[]; headline: string; + investigationEvidence: InsightEvidence[]; metaLabel: string; + metrics: InsightMetric[]; nextStep: string; primaryActionLabel: string; + rootCause: string | null; whyItMatters: string; } export function toInsightCardViewModel(insight: Insight): InsightCardViewModel { return { - evidence: insight.metrics ?? [], + actions: insight.actions ?? [], headline: insight.title, + investigationEvidence: insight.evidence ?? [], metaLabel: insight.websiteName ?? insight.websiteDomain, + metrics: insight.metrics ?? [], nextStep: insight.suggestion, primaryActionLabel: PRIMARY_ACTION_LABELS[insight.type] ?? DEFAULT_PRIMARY_ACTION_LABEL, + rootCause: insight.rootCause ?? null, whyItMatters: insight.description, }; } diff --git a/apps/dashboard/app/(main)/organizations/components/integrations-settings.tsx b/apps/dashboard/app/(main)/organizations/components/integrations-settings.tsx index 7aef6d150..9a6b8b7c7 100644 --- a/apps/dashboard/app/(main)/organizations/components/integrations-settings.tsx +++ b/apps/dashboard/app/(main)/organizations/components/integrations-settings.tsx @@ -55,6 +55,8 @@ const SIMPLE_ICONS = { "M20.317 4.3698a19.7913 19.7913 0 00-4.8851-1.5152.0741.0741 0 00-.0785.0371c-.211.3753-.4447.8648-.6083 1.2495-1.8447-.2762-3.68-.2762-5.4868 0-.1636-.3933-.4058-.8742-.6177-1.2495a.077.077 0 00-.0785-.037 19.7363 19.7363 0 00-4.8852 1.515.0699.0699 0 00-.0321.0277C.5334 9.0458-.319 13.5799.0992 18.0578a.0824.0824 0 00.0312.0561c2.0528 1.5076 4.0413 2.4228 5.9929 3.0294a.0777.0777 0 00.0842-.0276c.4616-.6304.8731-1.2952 1.226-1.9942a.076.076 0 00-.0416-.1057c-.6528-.2476-1.2743-.5495-1.8722-.8923a.077.077 0 01-.0076-.1277c.1258-.0943.2517-.1923.3718-.2914a.0743.0743 0 01.0776-.0105c3.9278 1.7933 8.18 1.7933 12.0614 0a.0739.0739 0 01.0785.0095c.1202.099.246.1981.3728.2924a.077.077 0 01-.0066.1276 12.2986 12.2986 0 01-1.873.8914.0766.0766 0 00-.0407.1067c.3604.698.7719 1.3628 1.225 1.9932a.076.076 0 00.0842.0286c1.961-.6067 3.9495-1.5219 6.0023-3.0294a.077.077 0 00.0313-.0552c.5004-5.177-.8382-9.6739-3.5485-13.6604a.061.061 0 00-.0312-.0286zM8.02 15.3312c-1.1825 0-2.1569-1.0857-2.1569-2.419 0-1.3332.9555-2.4189 2.157-2.4189 1.2108 0 2.1757 1.0952 2.1568 2.419 0 1.3332-.9555 2.4189-2.1569 2.4189zm7.9748 0c-1.1825 0-2.1569-1.0857-2.1569-2.419 0-1.3332.9554-2.4189 2.1569-2.4189 1.2108 0 2.1757 1.0952 2.1568 2.419 0 1.3332-.946 2.4189-2.1568 2.4189Z", cloudflare: "M16.5088 16.8447c.1475-.5068.0908-.9707-.1553-1.3154-.2246-.3164-.6045-.499-1.0615-.5205l-8.6592-.1123a.1559.1559 0 0 1-.1333-.0713c-.0283-.042-.0351-.0986-.021-.1553.0278-.084.1123-.1484.2036-.1562l8.7359-.1123c1.0351-.0489 2.1601-.8868 2.5537-1.9136l.499-1.3013c.0215-.0561.0293-.1128.0147-.168-.5625-2.5463-2.835-4.4453-5.5499-4.4453-2.5039 0-4.6284 1.6177-5.3876 3.8614-.4927-.3658-1.1187-.5625-1.794-.499-1.2026.119-2.1665 1.083-2.2861 2.2856-.0283.31-.0069.6128.0635.894C1.5683 13.171 0 14.7754 0 16.752c0 .1748.0142.3515.0352.5273.0141.083.0844.1475.1689.1475h15.9814c.0909 0 .1758-.0645.2032-.1553l.12-.4268zm2.7568-5.5634c-.0771 0-.1611 0-.2383.0112-.0566 0-.1054.0415-.127.0976l-.3378 1.1744c-.1475.5068-.0918.9707.1543 1.3164.2256.3164.6055.498 1.0625.5195l1.8437.1133c.0557 0 .1055.0263.1329.0703.0283.043.0351.1074.0214.1562-.0283.084-.1132.1485-.204.1553l-1.921.1123c-1.041.0488-2.1582.8867-2.5527 1.914l-.1406.3585c-.0283.0713.0215.1416.0986.1416h6.5977c.0771 0 .1474-.0489.169-.126.1122-.4082.1757-.837.1757-1.2803 0-2.6025-2.125-4.727-4.7344-4.727", + googlesearchconsole: + "M8.548 1.156L6.832 2.872v1.682h1.716zm0 3.398v.035H6.832v-.035H3.386L0 7.844v3.577h2.826V8.94c0-.525.429-.954.954-.954h16.476c.525 0 .954.43.954.954v2.48h2.754V7.844l-3.386-3.29H17.3v.035h-1.717v-.035zm7.035 0H17.3V2.872l-1.717-1.716zM8.679 1.188V2.84h6.773V1.188zm11.471 7.07a.834.834 0 00-.132.01l-.543.002c-5.216.014-10.432-.008-15.648.01-.435-.063-.794.436-.716.883v2.264h17.812c-.016-.888.045-1.782-.034-2.666-.104-.342-.427-.502-.739-.502zm-15.422.634a.689.698 0 01.689.698.689.698 0 01-.689.697.689.698 0 01-.688-.697.689.698 0 01.688-.698zm2.134 0a.689.698 0 01.689.698.689.698 0 01-.689.697.689.698 0 01-.688-.697.689.698 0 01.688-.698zM.036 11.645v9.156c0 1.05.858 1.908 1.907 1.908h.883V11.645zm21.174 0v11.064h.882c1.05 0 1.908-.858 1.908-1.908v-9.156zM4.057 13.133v6.85h6.137v-6.85zm13.243.021v3.777l-1.708.977-1.708-.977v-3.758a4.006 4.006 0 000 7.23v2.441h3.457v-2.442a4.006 4.006 0 00-.041-7.248zm-13.243 8.26v1.43h7.925v-1.43z", googleAnalytics: "M22.84 2.9982v17.9987c.0086 1.6473-1.3197 2.9897-2.967 2.9984a2.9808 2.9808 0 01-.3677-.0208c-1.528-.226-2.6477-1.5558-2.6105-3.1V3.1204c-.0369-1.5458 1.0856-2.8762 2.6157-3.1 1.6361-.1915 3.1178.9796 3.3093 2.6158.014.1201.0208.241.0202.3619zM4.1326 18.0548c-1.6417 0-2.9726 1.331-2.9726 2.9726C1.16 22.6691 2.4909 24 4.1326 24s2.9726-1.3309 2.9726-2.9726-1.331-2.9726-2.9726-2.9726zm7.8728-9.0098c-.0171 0-.0342 0-.0513.0003-1.6495.0904-2.9293 1.474-2.891 3.1256v7.9846c0 2.167.9535 3.4825 2.3505 3.763 1.6118.3266 3.1832-.7152 3.5098-2.327.04-.1974.06-.3983.0593-.5998v-8.9585c.003-1.6474-1.33-2.9852-2.9773-2.9882z", notion: @@ -90,6 +92,18 @@ const GITHUB_ITEM: IntegrationCatalogItem = { const GITHUB_SCOPES = ["repo:status", "read:org"]; +const GSC_ITEM: IntegrationCatalogItem = { + accent: "#4285F4", + category: "Intelligence", + description: + "Surface keyword ranking changes, impression drops, and CTR shifts in investigations.", + iconPath: SIMPLE_ICONS.googlesearchconsole, + id: "google-search-console", + name: "Google Search Console", +}; + +const GSC_SCOPES = ["https://www.googleapis.com/auth/webmasters.readonly"]; + const COMING_SOON_INTEGRATIONS: IntegrationCatalogItem[] = [ { accent: "#5E6AD2", @@ -195,6 +209,25 @@ function useLinkedAccounts() { }); } +function useOAuthConnect(provider: string, scopes: string[], label: string) { + return useMutation({ + mutationFn: async () => { + const result = await authClient.linkSocial({ + provider, + scopes, + callbackURL: window.location.href, + }); + if (result.error) { + throw new Error(result.error.message); + } + return result; + }, + onError: (err) => { + toast.error(err.message || `Could not connect ${label}`); + }, + }); +} + function ConnectionBadge({ connected, loading, @@ -337,6 +370,8 @@ export function IntegrationsSettings({ + + {COMING_SOON_INTEGRATIONS.map((item) => ( a.providerId === "google"); + + const gscCheck = useQuery({ + ...orpc.integrations.checkSearchConsoleAccess.queryOptions({ + input: {}, + }), + enabled: Boolean(googleAccount), + }); + + const hasGscAccess = gscCheck.data?.hasAccess === true; + const connect = useOAuthConnect( + "google", + GSC_SCOPES, + "Google Search Console" + ); + + let action: React.ReactNode; + if (accounts.isLoading || gscCheck.isLoading) { + action = ; + } else if (hasGscAccess) { + action = ( + + ); + } else { + action = ( + + ); + } + + return ( + + } + item={GSC_ITEM} + /> + ); +} + function GitHubIntegrationRow({ organizationId }: { organizationId: string }) { const queryClient = useQueryClient(); const accounts = useLinkedAccounts(); @@ -392,22 +484,7 @@ function GitHubIntegrationRow({ organizationId }: { organizationId: string }) { enabled: Boolean(githubAccount), }); - const connect = useMutation({ - mutationFn: async () => { - const result = await authClient.linkSocial({ - provider: "github", - scopes: GITHUB_SCOPES, - callbackURL: window.location.href, - }); - if (result.error) { - throw new Error(result.error.message); - } - return result; - }, - onError: (err) => { - toast.error(err.message || "Could not connect GitHub"); - }, - }); + const connect = useOAuthConnect("github", GITHUB_SCOPES, "GitHub"); const disconnect = useMutation({ mutationFn: async () => { diff --git a/apps/dashboard/lib/insight-types.ts b/apps/dashboard/lib/insight-types.ts index a1d53ba9b..f75041d01 100644 --- a/apps/dashboard/lib/insight-types.ts +++ b/apps/dashboard/lib/insight-types.ts @@ -44,19 +44,46 @@ export interface InsightMetric { previous?: number; } +export interface InsightEvidence { + description: string; + type: string; +} + +export type InvestigationDepth = "surface" | "investigated" | "deep"; + +export type InsightActionType = + | "fix_goal" + | "create_funnel" + | "add_custom_event" + | "create_annotation" + | "update_config" + | "add_tracking" + | "investigate_further" + | "code_fix"; + +export interface InsightAction { + label: string; + params: Record; + type: InsightActionType; +} + export interface Insight { + actions?: InsightAction[] | null; changePercent?: number; createdAt?: string; currentPeriodFrom?: string | null; currentPeriodTo?: string | null; description: string; + evidence?: InsightEvidence[] | null; id: string; insightSource?: InsightSource; + investigationDepth?: InvestigationDepth | null; link: string; metrics?: InsightMetric[]; previousPeriodFrom?: string | null; previousPeriodTo?: string | null; priority: number; + rootCause?: string | null; sentiment: InsightSentiment; severity: InsightSeverity; suggestion: string; @@ -69,23 +96,27 @@ export interface Insight { } export interface HistoryInsightRow { + actions?: InsightAction[] | null; changePercent?: number | null; createdAt?: string; currentPeriodFrom?: string | null; currentPeriodTo?: string | null; description: string; + evidence?: InsightEvidence[] | null; id: string; + investigationDepth?: InvestigationDepth | null; link: string; metrics?: InsightMetric[]; previousPeriodFrom?: string | null; previousPeriodTo?: string | null; priority: number; - sentiment: string; - severity: string; + rootCause?: string | null; + sentiment: InsightSentiment; + severity: InsightSeverity; suggestion: string; timezone?: string | null; title: string; - type: string; + type: InsightType; websiteDomain: string; websiteId: string; websiteName: string | null; @@ -94,9 +125,9 @@ export interface HistoryInsightRow { export function mapHistoryRowToInsight(row: HistoryInsightRow): Insight { return { id: row.id, - type: row.type as InsightType, - severity: row.severity as InsightSeverity, - sentiment: row.sentiment as InsightSentiment, + type: row.type, + severity: row.severity, + sentiment: row.sentiment, priority: row.priority, websiteId: row.websiteId, websiteName: row.websiteName, @@ -106,6 +137,10 @@ export function mapHistoryRowToInsight(row: HistoryInsightRow): Insight { suggestion: row.suggestion, metrics: row.metrics ?? [], changePercent: row.changePercent ?? undefined, + rootCause: row.rootCause, + evidence: row.evidence, + investigationDepth: row.investigationDepth, + actions: row.actions, link: row.link, insightSource: "history", createdAt: row.createdAt ?? undefined, diff --git a/apps/dashboard/package.json b/apps/dashboard/package.json index 7d664b516..b6b143b00 100644 --- a/apps/dashboard/package.json +++ b/apps/dashboard/package.json @@ -60,7 +60,7 @@ "@types/leaflet": "^1.9.21", "@types/react-grid-layout": "^2.1.0", "@xyflow/react": "^12.10.1", - "ai": "^6.0.116", + "ai": "^6.0.188", "atmn": "^1.1.8", "autumn-js": "catalog:", "babel-plugin-react-compiler": "^19.1.0-rc.1-rc-af1b7da-20250421", diff --git a/apps/insights/package.json b/apps/insights/package.json index a93ccb66a..484f25eb9 100644 --- a/apps/insights/package.json +++ b/apps/insights/package.json @@ -15,7 +15,7 @@ "@databuddy/env": "workspace:*", "@databuddy/redis": "workspace:*", "@databuddy/rpc": "workspace:*", - "ai": "^6.0.154", + "ai": "^6.0.188", "bullmq": "^5.66.5", "dayjs": "^1.11.19", "elysia": "catalog:", diff --git a/apps/insights/src/detection.test.ts b/apps/insights/src/detection.test.ts index 2f4e6dd79..82720e78f 100644 --- a/apps/insights/src/detection.test.ts +++ b/apps/insights/src/detection.test.ts @@ -1,4 +1,4 @@ -import { describe, expect, it, mock } from "bun:test"; +import { describe, expect, it } from "bun:test"; import dayjs from "dayjs"; import { type DetectSignalsParams, @@ -59,24 +59,27 @@ const BASE_PARAMS: DetectSignalsParams = { function createMockQueryFn( dailyRows: Record[], summaryCurrentRow?: Record, - summaryPreviousRow?: Record + summaryPreviousRow?: Record, + extras?: Record?, Record?]> ): QueryFn { - let summaryCallIndex = 0; - return mock( - (request: { type: string }) => { - if (request.type === "events_by_date") { - return Promise.resolve(dailyRows); - } - if (request.type === "summary_metrics") { - summaryCallIndex++; - if (summaryCallIndex === 1) { - return Promise.resolve([summaryCurrentRow ?? {}]); - } - return Promise.resolve([summaryPreviousRow ?? {}]); - } - return Promise.resolve([]); + const callCounts = new Map(); + return async (request: { type: string }) => { + if (request.type === "events_by_date") { + return dailyRows; + } + const count = (callCounts.get(request.type) ?? 0) + 1; + callCounts.set(request.type, count); + if (request.type === "summary_metrics") { + return [ + count === 1 ? (summaryCurrentRow ?? {}) : (summaryPreviousRow ?? {}), + ]; } - ) as unknown as QueryFn; + const extra = extras?.[request.type]; + if (extra) { + return [count === 1 ? (extra[0] ?? {}) : (extra[1] ?? {})]; + } + return []; + }; } describe("median", () => { @@ -688,6 +691,63 @@ describe("detectSignals", () => { }); }); + describe("error detection", () => { + it("flags error count spike above 40%", async () => { + const queryFn = createMockQueryFn([], {}, {}, { + error_summary: [{ totalErrors: 50 }, { totalErrors: 20 }], + }); + + const signals = await detectSignals(BASE_PARAMS, queryFn); + const errorSignal = signals.find((s) => s.metric === "error_count"); + expect(errorSignal).toBeDefined(); + expect(errorSignal!.direction).toBe("up"); + expect(errorSignal!.deltaPercent).toBe(150); + }); + + it("skips errors below absolute threshold", async () => { + const queryFn = createMockQueryFn([], {}, {}, { + error_summary: [{ totalErrors: 3 }, { totalErrors: 1 }], + }); + + const signals = await detectSignals(BASE_PARAMS, queryFn); + expect(signals.find((s) => s.metric === "error_count")).toBeUndefined(); + }); + }); + + describe("revenue detection", () => { + it("flags new revenue appearing", async () => { + const queryFn = createMockQueryFn([], {}, {}, { + revenue_overview: [{ total_revenue: 100 }, { total_revenue: 0 }], + }); + + const signals = await detectSignals(BASE_PARAMS, queryFn); + const revSignal = signals.find((s) => s.metric === "revenue"); + expect(revSignal).toBeDefined(); + expect(revSignal!.direction).toBe("up"); + }); + + it("flags revenue drop above 30%", async () => { + const queryFn = createMockQueryFn([], {}, {}, { + revenue_overview: [{ total_revenue: 50 }, { total_revenue: 100 }], + }); + + const signals = await detectSignals(BASE_PARAMS, queryFn); + const revSignal = signals.find((s) => s.metric === "revenue"); + expect(revSignal).toBeDefined(); + expect(revSignal!.direction).toBe("down"); + expect(revSignal!.deltaPercent).toBe(-50); + }); + + it("skips small revenue changes", async () => { + const queryFn = createMockQueryFn([], {}, {}, { + revenue_overview: [{ total_revenue: 110 }, { total_revenue: 100 }], + }); + + const signals = await detectSignals(BASE_PARAMS, queryFn); + expect(signals.find((s) => s.metric === "revenue")).toBeUndefined(); + }); + }); + describe("correlated signal collapsing", () => { it("collapses 2+ same-direction traffic metrics to the strongest", async () => { const queryFn = createMockQueryFn( diff --git a/apps/insights/src/detection.ts b/apps/insights/src/detection.ts index de93abce4..e0b0039fd 100644 --- a/apps/insights/src/detection.ts +++ b/apps/insights/src/detection.ts @@ -60,15 +60,6 @@ const ANOMALY_METRICS: AnomalyMetric[] = [ }, ]; -interface DailyRow { - bounce_rate?: unknown; - date?: unknown; - median_session_duration?: unknown; - pageviews?: unknown; - sessions?: unknown; - visitors?: unknown; -} - export function median(values: number[]): number { if (values.length === 0) { return 0; @@ -90,6 +81,77 @@ export function mad(values: number[]): number { } const MAD_SCALE = 1.4826; +const ZSCORE_THRESHOLD = 2.5; +const ZSCORE_MIN_BASELINE = 6; +const WOW_TRAFFIC_THRESHOLD = 40; +const WOW_ERROR_THRESHOLD = 40; +const WOW_REVENUE_THRESHOLD = 30; +const WOW_VITALS_THRESHOLD = 30; +const WOW_CUSTOM_EVENT_THRESHOLD = 40; +const FILTER_SESSION_DURATION_MIN_DELTA = 60; +const FILTER_SESSION_DURATION_MIN_PEAK = 20; +const FILTER_BOUNCE_MIN_DELTA = 10; +const FILTER_ERROR_MIN_DELTA = 5; +const FILTER_ERROR_MIN_PEAK = 10; +const FILTER_TRAFFIC_MIN_PEAK = 80; +const FILTER_TRAFFIC_MIN_DELTA = 50; +const CUSTOM_EVENT_MIN_COUNT = 5; +const CUSTOM_EVENT_NEW_THRESHOLD = 10; +const CUSTOM_EVENT_DISAPPEARED_THRESHOLD = 10; + +const VITALS_METRICS: Record = { + LCP: "Page load time (LCP)", + INP: "Interaction speed (INP)", +}; + +type SignalFilter = (signal: DetectedSignal) => boolean; + +const METRIC_FILTERS: Record = { + session_duration: (s) => + Math.abs(s.current - s.baseline) >= FILTER_SESSION_DURATION_MIN_DELTA && + Math.max(s.current, s.baseline) >= FILTER_SESSION_DURATION_MIN_PEAK, + bounce_rate: (s) => + Math.abs(s.current - s.baseline) >= FILTER_BOUNCE_MIN_DELTA, + error_count: (s) => + Math.abs(s.current - s.baseline) >= FILTER_ERROR_MIN_DELTA && + Math.max(s.current, s.baseline) >= FILTER_ERROR_MIN_PEAK, + revenue: () => true, + lcp: () => true, + inp: () => true, +}; + +const DEFAULT_TRAFFIC_FILTER: SignalFilter = (s) => + Math.max(s.current, s.baseline) >= FILTER_TRAFFIC_MIN_PEAK && + Math.abs(s.current - s.baseline) >= FILTER_TRAFFIC_MIN_DELTA; + +function makeWowSignal( + metric: string, + label: string, + current: number, + baseline: number, + detectedAt: string +): DetectedSignal { + const pct = baseline === 0 ? 100 : safeDeltaPercent(current, baseline); + return { + metric, + label, + method: "wow", + direction: current > baseline ? "up" : "down", + current, + baseline, + deltaPercent: Number(pct.toFixed(2)), + severity: assignSeverity(undefined, pct), + detectedAt, + }; +} + +function passesImpactFilter(signal: DetectedSignal): boolean { + if (signal.metric.startsWith("custom_event:")) { + return true; + } + const filter = METRIC_FILTERS[signal.metric]; + return filter ? filter(signal) : DEFAULT_TRAFFIC_FILTER(signal); +} export function safeDeltaPercent(current: number, previous: number): number { if (previous === 0) { @@ -103,6 +165,36 @@ function isWeekend(dateStr: string): boolean { return day === 0 || day === 6; } +function numberField( + row: Record | undefined, + key: string +): number { + const value = Number(row?.[key] ?? 0); + return Number.isFinite(value) ? value : 0; +} + +function stringField( + row: Record | undefined, + key: string +): string | null { + const value = row?.[key]; + return typeof value === "string" && value ? value : null; +} + +function mapRowsByStringField( + rows: Record[], + key: string +): Map> { + const mapped = new Map>(); + for (const row of rows) { + const value = stringField(row, key); + if (value) { + mapped.set(value, row); + } + } + return mapped; +} + export function assignSeverity( zScore: number | undefined, deltaPercent: number @@ -132,7 +224,7 @@ export async function detectSignals( .format("YYYY-MM-DD"); const dailyTo = today.format("YYYY-MM-DD"); - const rows = (await queryFn( + const rows = await queryFn( { projectId: websiteId, type: "events_by_date", @@ -144,7 +236,7 @@ export async function detectSignals( }, undefined, timezone - )) as DailyRow[]; + ); const sorted = [...rows].sort((a, b) => String(a.date ?? "").localeCompare(String(b.date ?? "")) @@ -173,20 +265,7 @@ export async function detectSignals( } } - const filtered = [...byMetric.values()].filter((signal) => { - const absDelta = Math.abs(signal.current - signal.baseline); - if (signal.metric === "session_duration") { - return absDelta >= 60 && Math.max(signal.current, signal.baseline) >= 20; - } - if (signal.metric === "bounce_rate") { - return absDelta >= 10; - } - const peak = Math.max(signal.current, signal.baseline); - if (peak < 80) { - return false; - } - return absDelta >= 50; - }); + const filtered = [...byMetric.values()].filter(passesImpactFilter); const collapsed = collapseCorrelated(filtered); @@ -218,7 +297,7 @@ function collapseCorrelated(signals: DetectedSignal[]): DetectedSignal[] { return [...collapsedUp, ...collapsedDown]; } -function detectZscore(sorted: DailyRow[]): DetectedSignal[] { +function detectZscore(sorted: Record[]): DetectedSignal[] { if (sorted.length < 7) { return []; } @@ -237,7 +316,7 @@ function detectZscore(sorted: DailyRow[]): DetectedSignal[] { return latestIsWeekend === rowIsWeekend; }); - if (baseline.length < 6) { + if (baseline.length < ZSCORE_MIN_BASELINE) { return []; } @@ -245,10 +324,10 @@ function detectZscore(sorted: DailyRow[]): DetectedSignal[] { for (const metric of ANOMALY_METRICS) { const baselineValues = baseline - .map((r) => Number(r[metric.dailyField as keyof DailyRow] ?? 0)) + .map((row) => numberField(row, metric.dailyField)) .filter((v) => Number.isFinite(v)); - if (baselineValues.length < 6) { + if (baselineValues.length < ZSCORE_MIN_BASELINE) { continue; } @@ -259,11 +338,9 @@ function detectZscore(sorted: DailyRow[]): DetectedSignal[] { continue; } - const currentValue = Number( - latest[metric.dailyField as keyof DailyRow] ?? 0 - ); + const currentValue = numberField(latest, metric.dailyField); const zScore = (currentValue - baselineMedian) / scaledMad; - if (Math.abs(zScore) < 2.5) { + if (Math.abs(zScore) < ZSCORE_THRESHOLD) { continue; } @@ -305,60 +382,189 @@ async function detectWow( .format("YYYY-MM-DD"); const previousTo = today.subtract(windowDays, "day").format("YYYY-MM-DD"); - const [currentRows, previousRows] = await Promise.all([ - queryFn( - { - projectId: websiteId, - type: "summary_metrics", - from: currentFrom, - to: currentTo, - timezone, - }, - undefined, - timezone - ), - queryFn( - { - projectId: websiteId, - type: "summary_metrics", - from: previousFrom, - to: previousTo, - timezone, - }, + function query(type: string, from: string, to: string) { + return queryFn( + { projectId: websiteId, type, from, to, timezone }, undefined, timezone - ), + ); + } + + const [ + currentSummary, + previousSummary, + currentErrors, + previousErrors, + currentRevenue, + previousRevenue, + currentVitals, + previousVitals, + currentCustom, + previousCustom, + ] = await Promise.all([ + query("summary_metrics", currentFrom, currentTo), + query("summary_metrics", previousFrom, previousTo), + query("error_summary", currentFrom, currentTo), + query("error_summary", previousFrom, previousTo), + query("revenue_overview", currentFrom, currentTo), + query("revenue_overview", previousFrom, previousTo), + query("vitals_overview", currentFrom, currentTo), + query("vitals_overview", previousFrom, previousTo), + query("custom_events_discovery", currentFrom, currentTo), + query("custom_events_discovery", previousFrom, previousTo), ]); - const currentRow = (currentRows[0] ?? {}) as Record; - const previousRow = (previousRows[0] ?? {}) as Record; const signals: DetectedSignal[] = []; for (const metric of ANOMALY_METRICS) { - const currentValue = Number(currentRow[metric.summaryField] ?? 0); - const previousValue = Number(previousRow[metric.summaryField] ?? 0); + const currentValue = numberField(currentSummary[0], metric.summaryField); + const previousValue = numberField(previousSummary[0], metric.summaryField); if (previousValue === 0 || currentValue === 0) { continue; } - const pct = safeDeltaPercent(currentValue, previousValue); - if (Math.abs(pct) < 40) { + if ( + Math.abs(safeDeltaPercent(currentValue, previousValue)) < + WOW_TRAFFIC_THRESHOLD + ) { continue; } + signals.push( + makeWowSignal( + metric.key, + metric.label, + currentValue, + previousValue, + currentTo + ) + ); + } + + const errNow = numberField(currentErrors[0], "totalErrors"); + const errPrev = numberField(previousErrors[0], "totalErrors"); + if (errPrev === 0 && errNow >= FILTER_ERROR_MIN_PEAK) { + signals.push(makeWowSignal("error_count", "Errors", errNow, 0, currentTo)); + } else if ( + errNow > 0 && + errPrev > 0 && + Math.abs(safeDeltaPercent(errNow, errPrev)) >= WOW_ERROR_THRESHOLD + ) { + signals.push( + makeWowSignal("error_count", "Errors", errNow, errPrev, currentTo) + ); + } + + const revNow = numberField(currentRevenue[0], "total_revenue"); + const revPrev = numberField(previousRevenue[0], "total_revenue"); + if ((revNow > 0 || revPrev > 0) && Math.abs(revNow - revPrev) > 0) { + const pct = revPrev === 0 ? 100 : safeDeltaPercent(revNow, revPrev); + if ( + Math.abs(pct) >= WOW_REVENUE_THRESHOLD || + (revPrev === 0 && revNow > 0) + ) { + signals.push( + makeWowSignal("revenue", "Revenue", revNow, revPrev, currentTo) + ); + } + } - const direction: "up" | "down" = - currentValue > previousValue ? "up" : "down"; + const vitalsCurrentMap = mapRowsByStringField(currentVitals, "metric_name"); + const vitalsPreviousMap = mapRowsByStringField(previousVitals, "metric_name"); + + for (const [metricName, label] of Object.entries(VITALS_METRICS)) { + const cur = vitalsCurrentMap.get(metricName); + const prev = vitalsPreviousMap.get(metricName); + const curVal = numberField(cur, "p75"); + const prevVal = numberField(prev, "p75"); + const curSamples = numberField(cur, "samples"); + if (curSamples < 10 || prevVal === 0 || curVal === 0) { + continue; + } + + const pct = safeDeltaPercent(curVal, prevVal); + if (Math.abs(pct) < WOW_VITALS_THRESHOLD) { + continue; + } + + signals.push( + makeWowSignal(metricName.toLowerCase(), label, curVal, prevVal, currentTo) + ); + } + + const prevEventsMap = new Map(); + for (const row of previousCustom) { + const name = stringField(row, "event_name"); + if (name) { + prevEventsMap.set(name, numberField(row, "total_events")); + } + } + + const curEventNames = new Set(); + for (const row of currentCustom) { + const name = stringField(row, "event_name"); + const curCount = numberField(row, "total_events"); + if (!name) { + continue; + } + curEventNames.add(name); + if (curCount < CUSTOM_EVENT_MIN_COUNT) { + continue; + } + + const prevCount = prevEventsMap.get(name) ?? 0; + if (prevCount === 0 && curCount >= CUSTOM_EVENT_NEW_THRESHOLD) { + signals.push( + makeWowSignal( + `custom_event:${name}`, + `Custom event "${name}"`, + curCount, + 0, + currentTo + ) + ); + continue; + } + if (prevCount === 0) { + continue; + } + if ( + Math.abs(safeDeltaPercent(curCount, prevCount)) < + WOW_CUSTOM_EVENT_THRESHOLD + ) { + continue; + } + if (Math.abs(curCount - prevCount) < CUSTOM_EVENT_MIN_COUNT) { + continue; + } + signals.push( + makeWowSignal( + `custom_event:${name}`, + `Custom event "${name}"`, + curCount, + prevCount, + currentTo + ) + ); + } + + for (const [name, prevCount] of prevEventsMap) { + if (prevCount < CUSTOM_EVENT_DISAPPEARED_THRESHOLD) { + continue; + } + if (curEventNames.has(name)) { + continue; + } signals.push({ - metric: metric.key, - label: metric.label, - method: "wow", - direction, - current: currentValue, - baseline: previousValue, - deltaPercent: Number(pct.toFixed(2)), - severity: assignSeverity(undefined, pct), + ...makeWowSignal( + `custom_event:${name}`, + `Custom event "${name}"`, + 0, + prevCount, + currentTo + ), + severity: "warning", detectedAt: currentTo, }); } diff --git a/apps/insights/src/enrichment.test.ts b/apps/insights/src/enrichment.test.ts index 25853eb9f..628a9afc6 100644 --- a/apps/insights/src/enrichment.test.ts +++ b/apps/insights/src/enrichment.test.ts @@ -1,4 +1,4 @@ -import { describe, expect, it, mock } from "bun:test"; +import { describe, expect, it } from "bun:test"; import type { DetectedSignal, QueryFn } from "./detection"; import { type AnnotationContext, @@ -32,18 +32,18 @@ const BASE_PARAMS = { function createMockQueryFn( responses: Record> ): QueryFn { - return mock((request: { type: string; from: string; to: string }) => { + return async (request: { type: string; from: string; to: string }) => { const byType = responses[request.type]; - if (!byType) return Promise.resolve([]); + if (!byType) return []; const key = `${request.from}:${request.to}`; - return Promise.resolve(byType[key] ?? byType["*"] ?? []); - }) as unknown as QueryFn; + return byType[key] ?? byType["*"] ?? []; + }; } function createMockAnnotationFn( result: AnnotationContext[] = [] ): AnnotationQueryFn { - return mock(() => Promise.resolve(result)) as unknown as AnnotationQueryFn; + return async () => result; } describe("enrichSignals", () => { @@ -358,16 +358,18 @@ describe("enrichSignals", () => { const signal = makeSignal({ method: "zscore", detectedAt: "2026-05-20" }); const calls: { type: string; from: string; to: string }[] = []; - const queryFn = mock( - (request: { type: string; from: string; to: string }) => { - calls.push({ - type: request.type, - from: request.from, - to: request.to, - }); - return Promise.resolve([]); - } - ) as unknown as QueryFn; + const queryFn: QueryFn = async (request: { + type: string; + from: string; + to: string; + }) => { + calls.push({ + type: request.type, + from: request.from, + to: request.to, + }); + return []; + }; await enrichSignals( [signal], @@ -383,20 +385,22 @@ describe("enrichSignals", () => { expect(pagesCurrent!.to).toBe("2026-05-20"); }); - it("uses half-lookback windows for wow signals", async () => { + it("uses full-lookback windows for wow signals", async () => { const signal = makeSignal({ method: "wow", detectedAt: "2026-05-20" }); const calls: { type: string; from: string; to: string }[] = []; - const queryFn = mock( - (request: { type: string; from: string; to: string }) => { - calls.push({ - type: request.type, - from: request.from, - to: request.to, - }); - return Promise.resolve([]); - } - ) as unknown as QueryFn; + const queryFn: QueryFn = async (request: { + type: string; + from: string; + to: string; + }) => { + calls.push({ + type: request.type, + from: request.from, + to: request.to, + }); + return []; + }; await enrichSignals( [signal], @@ -410,11 +414,11 @@ describe("enrichSignals", () => { const currentCall = pagesCalls.find((c) => c.to === "2026-05-20"); expect(currentCall).toBeDefined(); - expect(currentCall!.from).toBe("2026-05-14"); + expect(currentCall!.from).toBe("2026-05-07"); const previousCall = pagesCalls.find((c) => c.to !== "2026-05-20"); expect(previousCall).toBeDefined(); - expect(previousCall!.to).toBe("2026-05-13"); + expect(previousCall!.to).toBe("2026-05-06"); }); }); }); diff --git a/apps/insights/src/enrichment.ts b/apps/insights/src/enrichment.ts index 19e37115b..602da8b6e 100644 --- a/apps/insights/src/enrichment.ts +++ b/apps/insights/src/enrichment.ts @@ -45,11 +45,21 @@ export interface GitHubContext { repo: string; } +export interface VitalsContext { + metrics: Array<{ + name: string; + currentP75: number; + previousP75: number; + deltaPercent: number; + }>; +} + export interface EnrichedSignal extends DetectedSignal { annotations: AnnotationContext[]; errorContext?: ErrorContext; githubContext?: GitHubContext; segments: SegmentBreakdown[]; + vitalsContext?: VitalsContext; } export interface EnrichSignalsParams { @@ -96,6 +106,7 @@ const SEGMENT_TOP_MOVERS = 3; const SEGMENT_FETCH_LIMIT = 100; const ERROR_MIN_DELTA_PERCENT = 20; const ERROR_TOP_LIMIT = 5; +const ERROR_WORD_SPLIT_RE = /[\s:()]+/; function computeWindow( signal: DetectedSignal, @@ -114,7 +125,7 @@ function computeWindow( }; } - const windowDays = Math.max(3, Math.floor(lookbackDays / 2)); + const windowDays = Math.max(3, lookbackDays); return { currentFrom: detectedDay .subtract(windowDays - 1, "day") @@ -127,6 +138,41 @@ function computeWindow( }; } +function queryPeriodPair( + websiteId: string, + timezone: string, + window: SignalWindow, + queryFn: QueryFn +) { + return (type: string, limit?: number) => + Promise.all([ + queryFn( + { + projectId: websiteId, + type, + from: window.currentFrom, + to: window.currentTo, + timezone, + ...(limit ? { limit } : {}), + }, + undefined, + timezone + ), + queryFn( + { + projectId: websiteId, + type, + from: window.previousFrom, + to: window.previousTo, + timezone, + ...(limit ? { limit } : {}), + }, + undefined, + timezone + ), + ]); +} + function computeSegmentMovers( currentRows: DimensionRow[], previousRows: DimensionRow[] @@ -177,44 +223,20 @@ async function enrichSegments( window: SignalWindow, queryFn: QueryFn ): Promise { + const query = queryPeriodPair(websiteId, timezone, window, queryFn); const results = await Promise.all( DIMENSION_CONFIGS.map(async ({ dimension, queryType }) => { - const [currentRows, previousRows] = await Promise.all([ - queryFn( - { - projectId: websiteId, - type: queryType, - from: window.currentFrom, - to: window.currentTo, - timezone, - limit: SEGMENT_FETCH_LIMIT, - }, - undefined, - timezone - ), - queryFn( - { - projectId: websiteId, - type: queryType, - from: window.previousFrom, - to: window.previousTo, - timezone, - limit: SEGMENT_FETCH_LIMIT, - }, - undefined, - timezone - ), - ]); - + const [currentRows, previousRows] = await query( + queryType, + SEGMENT_FETCH_LIMIT + ); const topMovers = computeSegmentMovers( currentRows as DimensionRow[], previousRows as DimensionRow[] ); - return { dimension, topMovers }; }) ); - return results.filter((r) => r.topMovers.length > 0); } @@ -224,54 +246,11 @@ async function enrichErrors( window: SignalWindow, queryFn: QueryFn ): Promise { - const [currentSummary, previousSummary, currentTypes, previousTypes] = + const query = queryPeriodPair(websiteId, timezone, window, queryFn); + const [[currentSummary, previousSummary], [currentTypes, previousTypes]] = await Promise.all([ - queryFn( - { - projectId: websiteId, - type: "error_summary", - from: window.currentFrom, - to: window.currentTo, - timezone, - }, - undefined, - timezone - ), - queryFn( - { - projectId: websiteId, - type: "error_summary", - from: window.previousFrom, - to: window.previousTo, - timezone, - }, - undefined, - timezone - ), - queryFn( - { - projectId: websiteId, - type: "error_types", - from: window.currentFrom, - to: window.currentTo, - timezone, - limit: SEGMENT_FETCH_LIMIT, - }, - undefined, - timezone - ), - queryFn( - { - projectId: websiteId, - type: "error_types", - from: window.previousFrom, - to: window.previousTo, - timezone, - limit: SEGMENT_FETCH_LIMIT, - }, - undefined, - timezone - ), + query("error_summary"), + query("error_types", SEGMENT_FETCH_LIMIT), ]); const currentRow = (currentSummary[0] ?? {}) as ErrorSummaryRow; @@ -373,8 +352,54 @@ async function enrichAnnotations( return await annotationQueryFn(websiteId, from, to); } -const LEADING_SLASH_RE = /^\//; -const WORD_SPLIT_RE = /[\s:()]+/; +async function enrichVitals( + websiteId: string, + timezone: string, + window: SignalWindow, + queryFn: QueryFn +): Promise { + const query = queryPeriodPair(websiteId, timezone, window, queryFn); + const [currentVitals, previousVitals] = await query("vitals_overview"); + + interface VitalsRow { + metric_name?: string; + p75?: number; + samples?: number; + } + const currentMap = new Map( + (currentVitals as VitalsRow[]).map((r) => [r.metric_name, r]) + ); + const previousMap = new Map( + (previousVitals as VitalsRow[]).map((r) => [r.metric_name, r]) + ); + + const metrics: VitalsContext["metrics"] = []; + for (const name of ["LCP", "INP", "CLS", "FCP", "TTFB"]) { + const cur = currentMap.get(name); + const prev = previousMap.get(name); + const curVal = cur?.p75 ?? 0; + const prevVal = prev?.p75 ?? 0; + if ( + curVal === 0 || + prevVal === 0 || + (cur?.samples ?? 0) < 5 || + (prev?.samples ?? 0) < 5 + ) { + continue; + } + const pct = safeDeltaPercent(curVal, prevVal); + if (Math.abs(pct) >= 15) { + metrics.push({ + name, + currentP75: curVal, + previousP75: prevVal, + deltaPercent: Number(pct.toFixed(1)), + }); + } + } + + return metrics.length > 0 ? { metrics } : undefined; +} function extractSignalKeywords(signals: EnrichedSignal[]): string[] { const keywords = new Set(); @@ -382,15 +407,17 @@ function extractSignalKeywords(signals: EnrichedSignal[]): string[] { keywords.add(s.metric); for (const seg of s.segments) { for (const m of seg.topMovers) { - const path = m.name.replace(LEADING_SLASH_RE, "").split("/")[0]; - if (path && path.length > 2) { - keywords.add(path.toLowerCase()); + const segment = m.name.split("/").find((p) => p.length > 2); + if (segment) { + keywords.add(segment.toLowerCase()); } } } if (s.errorContext) { for (const err of s.errorContext.topNewErrors) { - const words = err.split(WORD_SPLIT_RE).filter((w) => w.length > 3); + const words = err + .split(ERROR_WORD_SPLIT_RE) + .filter((w) => w.length > 3); for (const w of words.slice(0, 3)) { keywords.add(w.toLowerCase()); } @@ -426,16 +453,16 @@ async function enrichGitHub( } interface GHCommit { - sha?: string; commit?: { message?: string; author?: { name?: string; date?: string }; }; + sha?: string; } interface GHPR { + merged_at?: string | null; number?: number; title?: string; - merged_at?: string | null; user?: { login?: string }; } @@ -467,9 +494,8 @@ async function enrichGitHub( if (!detail || typeof detail !== "object" || "error" in detail) { return null; } - const files = ( - (detail as { files?: Array<{ filename: string }> }).files ?? [] - ); + const files = + (detail as { files?: Array<{ filename: string }> }).files ?? []; const changedFiles = files.map((f) => f.filename); const relevant = signalKeywords.some( (kw) => @@ -514,16 +540,19 @@ export async function enrichSignals( signals.map(async (signal) => { const window = computeWindow(signal, lookbackDays); - const [segments, errorContext, signalAnnotations] = await Promise.all([ - enrichSegments(websiteId, timezone, window, queryFn), - enrichErrors(websiteId, timezone, window, queryFn), - enrichAnnotations(websiteId, window, annotationQueryFn), - ]); + const [segments, errorContext, vitalsContext, signalAnnotations] = + await Promise.all([ + enrichSegments(websiteId, timezone, window, queryFn), + enrichErrors(websiteId, timezone, window, queryFn), + enrichVitals(websiteId, timezone, window, queryFn), + enrichAnnotations(websiteId, window, annotationQueryFn), + ]); return { ...signal, segments, errorContext, + vitalsContext, annotations: signalAnnotations, } as EnrichedSignal; }) diff --git a/apps/insights/src/generation.ts b/apps/insights/src/generation.ts index c67e7d090..198c2575f 100644 --- a/apps/insights/src/generation.ts +++ b/apps/insights/src/generation.ts @@ -1,5 +1,8 @@ import type { AppContext } from "@databuddy/ai/config/context"; -import { ANTHROPIC_CACHE_1H, models } from "@databuddy/ai/config/models"; +import { + ANTHROPIC_CACHE_1H, + createModelFromId, +} from "@databuddy/ai/config/models"; import { insightDedupeKey } from "@databuddy/ai/insights/dedupe"; import { hasWebInsightData } from "@databuddy/ai/insights/fetch-context"; import type { @@ -11,6 +14,7 @@ import { getAILogger } from "@databuddy/ai/lib/ai-logger"; import { storeAnalyticsSummary } from "@databuddy/ai/lib/supermemory"; import type { ParsedInsight } from "@databuddy/ai/schemas/smart-insights-output"; import { insightSchema } from "@databuddy/ai/schemas/smart-insights-output"; +import { createToolkit } from "@databuddy/ai/tools/toolkit"; import { createInsightsAgentTools } from "@databuddy/ai/tools/insights-agent-tools"; import { and, @@ -24,32 +28,37 @@ import { sql, } from "@databuddy/db"; import { - account, analyticsInsights, - annotations, type InsightGenerationConfigSnapshot, type InsightGenerationTool, - member, websites, } from "@databuddy/db/schema"; import { invalidateAgentContextSnapshotsForWebsite, invalidateInsightsCachesForOrganization, } from "@databuddy/redis"; -import { createGitHubTools } from "@databuddy/ai/tools/github-tools"; -import { createScrapeTools } from "@databuddy/ai/tools/scrape-page"; +import { getCachedSiteContext } from "@databuddy/ai/tools/scrape-page"; +import { getOAuthToken } from "@databuddy/ai/tools/utils/oauth-token"; import { stepCountIs, tool, ToolLoopAgent } from "ai"; import { randomUUIDv7 } from "bun"; import dayjs from "dayjs"; import { detectSignals } from "./detection"; import { enrichSignals, type EnrichedSignal } from "./enrichment"; +import { + buildInvestigationPrompt, + buildSystemPrompt, + fetchDismissedPatterns, + fetchRecentAnnotations, + fetchRecentInsightsForPrompt, + formatOrgWebsitesContext, + type OrgWebsiteRow, +} from "./prompts"; import { captureInsightsError, emitInsightsEvent, setInsightsLog, } from "./lib/evlog-insights"; -const RECENT_INSIGHTS_PROMPT_LIMIT = 12; const DEFAULT_MAX_INSIGHTS = 2; const TOOL_NAMES = [ "web_metrics", @@ -58,11 +67,15 @@ const TOOL_NAMES = [ "business_context", ] as const satisfies readonly InsightGenerationTool[]; -interface OrgWebsiteRow { - domain: string; - id: string; - name: string | null; -} +const ALWAYS_ON_TOOLS = new Set([ + "execute_sql", + "scrape_page", + "search_console", + "create_annotation", + "update_goal", + "create_funnel", + "create_goal", +]); interface GeneratedWebsiteInsight extends ParsedInsight { id: string; @@ -94,10 +107,6 @@ function maxInsights(config: InsightGenerationConfigSnapshot): number { ); } -function promptLookbackDays(config: InsightGenerationConfigSnapshot): number { - return Math.max(14, Math.min(180, config.lookbackDays * 2)); -} - function getComparisonPeriod(lookbackDays: number): WeekOverWeekPeriod { const days = Math.max(1, Math.min(90, lookbackDays)); const now = dayjs(); @@ -113,26 +122,38 @@ function getComparisonPeriod(lookbackDays: number): WeekOverWeekPeriod { }; } -function modelForTier(tier: InsightGenerationConfigSnapshot["modelTier"]) { +const INSIGHTS_MODELS = { + quick: createModelFromId("openai/gpt-5.4-mini"), + balanced: createModelFromId("anthropic/claude-sonnet-4.6"), + deep: createModelFromId("anthropic/claude-opus-4.7"), +}; + +function modelForTier( + tier: InsightGenerationConfigSnapshot["modelTier"], + hasCriticalSignals?: boolean +) { if (tier === "fast") { - return models.quick; + return INSIGHTS_MODELS.quick; } if (tier === "deep") { - return models.deep; + return INSIGHTS_MODELS.deep; } - return models.balanced; + if (tier === "balanced" && hasCriticalSignals) { + return INSIGHTS_MODELS.deep; + } + return INSIGHTS_MODELS.balanced; } function normalizeAllowedTools( tools: InsightGenerationConfigSnapshot["allowedTools"] ): InsightGenerationTool[] { const allowed = new Set( - tools.filter((tool): tool is InsightGenerationTool => - (TOOL_NAMES as readonly string[]).includes(tool) + tools.filter((t): t is InsightGenerationTool => + (TOOL_NAMES as readonly string[]).includes(t) ) ); allowed.add("web_metrics"); - return TOOL_NAMES.filter((tool) => allowed.has(tool)); + return TOOL_NAMES.filter((t) => allowed.has(t)); } function dedupeKeyFor(insight: GeneratedWebsiteInsight): string { @@ -186,225 +207,6 @@ async function fetchInsightDedupeKeyToIdMap( return map; } -async function fetchRecentAnnotations( - websiteId: string, - config: InsightGenerationConfigSnapshot -): Promise { - const since = dayjs().subtract(promptLookbackDays(config), "day").toDate(); - const rows = await db - .select({ - text: annotations.text, - xValue: annotations.xValue, - tags: annotations.tags, - }) - .from(annotations) - .where( - and( - eq(annotations.websiteId, websiteId), - gte(annotations.xValue, since), - isNull(annotations.deletedAt) - ) - ) - .orderBy(annotations.xValue) - .limit(20); - - if (rows.length === 0) { - return ""; - } - - const lines = rows.map((row) => { - const date = dayjs(row.xValue).format("YYYY-MM-DD"); - const tags = row.tags?.length ? ` [${row.tags.join(", ")}]` : ""; - return `- ${date}: ${row.text}${tags}`; - }); - - return `\n\nUser annotations (known events that may explain changes):\n${lines.join("\n")}`; -} - -async function fetchRecentInsightsForPrompt( - organizationId: string, - websiteId: string, - config: InsightGenerationConfigSnapshot -): Promise { - const since = dayjs().subtract(promptLookbackDays(config), "day").toDate(); - const rows = await db - .select({ - title: analyticsInsights.title, - type: analyticsInsights.type, - createdAt: analyticsInsights.createdAt, - }) - .from(analyticsInsights) - .where( - and( - eq(analyticsInsights.organizationId, organizationId), - eq(analyticsInsights.websiteId, websiteId), - gte(analyticsInsights.createdAt, since) - ) - ) - .orderBy(desc(analyticsInsights.createdAt)) - .limit(RECENT_INSIGHTS_PROMPT_LIMIT); - - if (rows.length === 0) { - return ""; - } - - const lines = rows.map( - (row) => - `- [${row.type}] ${row.title} (${dayjs(row.createdAt).format("YYYY-MM-DD")})` - ); - - return `\n\n## Recently reported insights for this website (avoid repeating the same narrative unless something materially changed)\n${lines.join("\n")}`; -} - -function formatOrgWebsitesContext( - orgSites: OrgWebsiteRow[], - currentWebsiteId: string -): string { - if (orgSites.length <= 1) { - return ""; - } - const sorted = [...orgSites].sort((a, b) => - a.domain.localeCompare(b.domain, "en") - ); - const lines = sorted.map((site) => { - const label = site.name?.trim() ? site.name.trim() : site.domain; - const marker = - site.id === currentWebsiteId - ? " - metrics below are for this site only" - : ""; - return `- ${label} (${site.domain})${marker}`; - }); - return `## Organization websites (same account, separate analytics) -Each row is a different tracked property (e.g. marketing site vs app vs docs). The period metrics in this message apply only to the site marked "metrics below". Do not blend numbers across rows. If referrers include another domain from this list, treat it as cross-property traffic and name both sides clearly. - -${lines.join("\n")} - -`; -} - -function buildSystemPrompt( - config: InsightGenerationConfigSnapshot, - options?: { investigationMode?: boolean } -): string { - const targetCount = maxInsights(config); - const depthInstruction = - config.depth === "light" - ? "Use the smallest useful tool set. Prefer 1-2 high-confidence insights and skip speculative cross-domain analysis." - : config.depth === "deep" - ? "Actively cross-check web, product, ops, and business context when those tools are enabled. Prefer a fuller ranked set, but only when signals are distinct and data-backed." - : "Explore enough context to produce concise, distinct, high-confidence insights without over-querying."; - - return `You are an analytics investigator. Return up to ${targetCount} insights ranked by business impact. ${depthInstruction} - -RULES: -- Write titles a founder can scan in 2 seconds. Lead with the outcome: "Checkout errors tripled after deploy" not "Error rate shows concerning trend". -- Titles must use plain language. Never use technical acronyms (INP, LCP, FCP, TTFB, CLS, p75). Say "page load time" not "LCP", "interaction speed" not "INP", "layout stability" not "CLS". -- Only report signals that would change what someone does today. Silence over noise. -- Never use hedging words in titles (concerning, softened, slightly, worth watching). -- Never say "monitor" or "watch" in suggestions. Name the exact page, error, or component to fix. -- Do not invent causality. Cite evidence. Confidence > 0.7 requires segment isolation or temporal correlation. -- Call emit_insight for each finding. Include rootCause, evidence array, and investigationDepth. -- Metrics array: only include numbers you queried and verified. The primary metric must be a real measured value, not estimated or extrapolated. If the metric is segment-specific (e.g. "Google sessions"), label it clearly — do not put segment values where total values are expected. -- CRITICAL: your title's direction words MUST match the primary metric. If current > previous, use "rose/surged/jumped/up". If current < previous, use "fell/dropped/declined/down". A title saying "dropped" when the metric went up will be rejected. -- Keep copy tight: title ≤80 chars, description ≤480 chars, suggestion ≤400 chars. Insights exceeding these limits are dropped. -- Low-traffic sites (<50 sessions/week): do not claim percentage changes on segments with fewer than 10 absolute values. Focus on structural issues (missing tracking, unconfigured goals) instead of noisy metric movements. -- When you suspect a code change caused an issue, use github_search_code to check if the relevant code exists, github_commit_diff to see what changed, or github_read_file to inspect the current state. -- Use scrape_page on "/" early to understand the product: what it does, pricing, CTAs. Scrape specific pages that appear in anomalies. Product context makes insights actionable — reference specific pages, features, and CTAs by name.${ - options?.investigationMode - ? "\n- Investigate the detected signals using tools. Call emit_insight for each real finding. Drop noise." - : "" - }`; -} - -function formatSignalBlock(signal: EnrichedSignal, index: number): string { - const dir = signal.direction === "up" ? "+" : "-"; - const scope = - signal.method === "zscore" - ? `z=${signal.zScore}, latest day vs baseline mean` - : "WoW period total"; - const parts = [ - `${index + 1}. ${signal.label} ${dir}${Math.abs(signal.deltaPercent).toFixed(0)}% (${scope}, ${signal.severity}) — ${signal.current.toLocaleString()} vs ${signal.baseline.toLocaleString()}`, - ]; - - for (const seg of signal.segments) { - parts.push( - ` ${seg.dimension}: ${seg.topMovers.map((m) => `${m.name} ${m.deltaPercent > 0 ? "+" : ""}${m.deltaPercent}%`).join(", ")}` - ); - } - - if (signal.errorContext) { - const ec = signal.errorContext; - parts.push( - ` errors: ${ec.totalErrorsPrevious}->${ec.totalErrorsCurrent} (${ec.deltaPercent > 0 ? "+" : ""}${ec.deltaPercent}%)` - ); - if (ec.topNewErrors.length > 0) { - parts.push(` new: ${ec.topNewErrors.join(", ")}`); - } - } - - for (const a of signal.annotations) { - parts.push(` [${a.date}] ${a.title}`); - } - - if (signal.githubContext) { - const gc = signal.githubContext; - for (const c of gc.commits.slice(0, 3)) { - parts.push(` ${c.sha} ${c.message} (${c.date?.slice(0, 10)})`); - } - for (const pr of gc.recentPRs.slice(0, 3)) { - parts.push( - ` PR#${pr.number} ${pr.title} (${pr.mergedAt?.slice(0, 10)})` - ); - } - } - - return parts.join("\n"); -} - -function buildInvestigationPrompt( - enrichedSignals: EnrichedSignal[], - params: { - annotationContext: string; - config: InsightGenerationConfigSnapshot; - domain: string; - githubRepo?: { owner: string; repo: string }; - orgContext: string; - period: WeekOverWeekPeriod; - recentInsightsBlock: string; - timezone: string; - } -): string { - const { domain, period, timezone } = params; - const signalBlocks = enrichedSignals - .map((signal, i) => formatSignalBlock(signal, i)) - .join("\n\n"); - - const githubInstruction = params.githubRepo - ? `2. Call github_commits for ${params.githubRepo.owner}/${params.githubRepo.repo} with since/until dates matching the anomaly window. If commits correlate temporally with a metric change, check github_pull_requests for what shipped.` - : "2. If GitHub tools are available, call github_repos first, then github_commits with since/until dates matching the anomaly window."; - - return `Investigating ${enrichedSignals.length} statistical anomalies detected on ${domain}. -Period: ${period.current.from} to ${period.current.to} vs ${period.previous.from} to ${period.previous.to} -Timezone: ${timezone} - -Start by scraping "/" to understand the product, then investigate the signals below. - -SIGNALS: - -${signalBlocks} - -1. Scrape "/" for product context, then use web_metrics (period="both") and execute_sql to investigate. -${githubInstruction} -3. If a signal involves a specific page (path), scrape that page to see what's on it. - -Z-score signals compare ONE DAY against the baseline mean — they are NOT period totals. Always query summary_metrics with period="both" to get actual period totals before citing WoW changes. - -When you emit_insight: title direction MUST match the primary metric. If sessions went up, say "surged/jumped/rose" not "dropped/fell". Every number you cite must match its direction word — not just the title. If TTFB went from 2500 to 1500, say "improved" or "fell", not "rose". Use summary_metrics as the canonical source for headline numbers, execute_sql only for segment breakdowns. - -Low-traffic sites (<50 sessions/week): do not report percentage changes on segments with <10 absolute values. A 3-visitor change is not "43% decline" — it's noise. Focus on structural observations (missing tracking, unconfigured goals) rather than metric movements. -${params.orgContext}${params.annotationContext}${params.recentInsightsBlock}`; -} - function validateCollectedInsights( insights: ParsedInsight[], context: { @@ -434,7 +236,7 @@ async function analyzeWebsite(params: { organizationId: string; orgSites: OrgWebsiteRow[]; period: WeekOverWeekPeriod; - userId: string; + userId?: string; websiteId: string; }): Promise { const startedAt = performance.now(); @@ -473,21 +275,9 @@ async function analyzeWebsite(params: { timezone: params.config.timezone, }); if (signals.length > 0) { - let githubToken: string | null = null; - if (params.githubRepo) { - const [ghAccount] = await db - .select({ accessToken: account.accessToken }) - .from(account) - .innerJoin(member, eq(member.userId, account.userId)) - .where( - and( - eq(member.organizationId, params.organizationId), - eq(account.providerId, "github") - ) - ) - .limit(1); - githubToken = ghAccount?.accessToken ?? null; - } + const githubToken = params.githubRepo + ? await getOAuthToken("github", params.organizationId, params.userId) + : null; enrichedSignals = await enrichSignals(signals, { websiteId: params.websiteId, @@ -505,33 +295,40 @@ async function analyzeWebsite(params: { const investigationMode = enrichedSignals.length > 0; - const [annotationContext, recentInsightsBlock] = await Promise.all([ - fetchRecentAnnotations(params.websiteId, params.config), - fetchRecentInsightsForPrompt( - params.organizationId, - params.websiteId, - params.config - ), - ]); + const [annotationContext, recentInsightsBlock, siteContext, dismissedBlock] = + await Promise.all([ + fetchRecentAnnotations(params.websiteId, params.config), + fetchRecentInsightsForPrompt( + params.organizationId, + params.websiteId, + params.config + ), + getCachedSiteContext(params.domain), + fetchDismissedPatterns(params.organizationId, params.websiteId), + ]); const allowedTools = normalizeAllowedTools(params.config.allowedTools); const orgContext = formatOrgWebsitesContext( params.orgSites, params.websiteId ); + const siteBlock = siteContext + ? `\n\nProduct context (cached from homepage):\n${siteContext}` + : '\nScrape "/" first to understand the product.'; const userPrompt = investigationMode ? buildInvestigationPrompt(enrichedSignals, { domain: params.domain, githubRepo: params.githubRepo, period: params.period, - config: params.config, timezone: params.config.timezone, recentInsightsBlock, annotationContext, + dismissedBlock, orgContext, + siteContext: siteBlock, }) - : `Analyze ${params.domain} (${currentRange.from} to ${currentRange.to} vs ${previousRange.from} to ${previousRange.to}, ${params.config.timezone}). Start by scraping "/" to understand what the product does, then use web_metrics with period="both" to compare periods efficiently. -${orgContext}${annotationContext}${recentInsightsBlock}`; + : `Analyze ${params.domain} (${currentRange.from} to ${currentRange.to} vs ${previousRange.from} to ${previousRange.to}, ${params.config.timezone}). Use web_metrics with period="both" to compare periods efficiently.${siteBlock} +${orgContext}${annotationContext}${recentInsightsBlock}${dismissedBlock}`; const { tools: analyticsTools } = createInsightsAgentTools({ websiteId: params.websiteId, @@ -539,27 +336,24 @@ ${orgContext}${annotationContext}${recentInsightsBlock}`; timezone: params.config.timezone, periodBounds: { current: currentRange, previous: previousRange }, }); - const githubTools = investigationMode - ? createGitHubTools({ - organizationId: params.organizationId, - userId: params.userId, - }) - : {}; - const scrapeTools = createScrapeTools(params.domain); - const allTools = { ...analyticsTools, ...githubTools, ...scrapeTools }; + const investigationTools = createToolkit({ + capabilities: ["investigation", "mutations"], + domain: params.domain, + organizationId: params.organizationId, + userId: params.userId, + }); + const allTools = { ...analyticsTools, ...investigationTools }; + const isEnabled = (name: string) => + allowedTools.includes(name as InsightGenerationTool) || + name.startsWith("github_") || + ALWAYS_ON_TOOLS.has(name); const availableTools = Object.fromEntries( - Object.entries(allTools).filter( - ([name]) => - allowedTools.includes(name as InsightGenerationTool) || - name.startsWith("github_") || - name === "execute_sql" || - name === "scrape_page" - ) + Object.entries(allTools).filter(([name]) => isEnabled(name)) ) as typeof allTools; try { const appContext: AppContext = { - userId: params.userId, + userId: params.userId ?? "system", organizationId: params.organizationId, websiteId: params.websiteId, websiteDomain: params.domain, @@ -577,7 +371,7 @@ ${orgContext}${annotationContext}${recentInsightsBlock}`; description: "Call this when you have a finding worth reporting. Each call produces one insight. Call multiple times for multiple findings.", inputSchema: insightSchema, - execute: (insight) => { + execute: (insight: ParsedInsight) => { collected.push(insight); return `Insight recorded: "${insight.title}"`; }, @@ -590,14 +384,32 @@ ${orgContext}${annotationContext}${recentInsightsBlock}`; emit_insight: emitInsightTool, }; const agent = new ToolLoopAgent({ - model: ai.wrap(modelForTier(params.config.modelTier)), + model: ai.wrap( + modelForTier( + params.config.modelTier, + enrichedSignals.some((s) => s.severity === "critical") + ) + ), instructions: { role: "system", content: buildSystemPrompt(params.config, { investigationMode }), providerOptions: ANTHROPIC_CACHE_1H, }, tools: allToolsWithEmit, - stopWhen: stepCountIs(params.config.maxSteps), + stopWhen: (event) => { + if (stepCountIs(params.config.maxSteps)(event)) { + return true; + } + if ( + collected.length >= maxInsights(params.config) && + event.steps + .at(-1) + ?.toolCalls.some((tc) => tc?.toolName === "emit_insight") + ) { + return true; + } + return false; + }, onStepFinish: ({ usage, finishReason, toolCalls }) => { toolCallCount += toolCalls.length; emitInsightsEvent("info", "generation.agent.step_finished", { @@ -620,7 +432,7 @@ ${orgContext}${annotationContext}${recentInsightsBlock}`; source: "insights_worker", feature: "smart_insights", organizationId: params.organizationId, - userId: params.userId, + userId: params.userId ?? "system", websiteId: params.websiteId, websiteDomain: params.domain, timezone: params.config.timezone, @@ -717,26 +529,8 @@ async function persistWebsiteInsights(params: { return []; } - const updatePayload = { - runId: params.runId, - timezone: params.config.timezone, - currentPeriodFrom: params.period.current.from, - currentPeriodTo: params.period.current.to, - previousPeriodFrom: params.period.previous.from, - previousPeriodTo: params.period.previous.to, - createdAt: new Date(), - }; - - const insightsWithKeys = finalInsights.map((insight) => { - const key = dedupeKeyFor(insight); - const existingId = dedupeKeyToId.get(key); - const isRefresh = existingId !== undefined && insight.id === existingId; - return { insight, key, isRefresh }; - }); - - const toInsert = insightsWithKeys - .filter((i) => !i.isRefresh) - .map(({ insight, key }) => ({ + function insightRow(insight: GeneratedWebsiteInsight, key: string) { + return { id: insight.id, organizationId: params.organizationId, websiteId: insight.websiteId, @@ -757,6 +551,7 @@ async function persistWebsiteInsights(params: { rootCause: insight.rootCause ?? null, evidence: insight.evidence ?? null, investigationDepth: insight.investigationDepth ?? null, + actions: insight.actions ?? null, metrics: insight.metrics.length > 0 ? (insight.metrics as InsightMetricRow[]) @@ -766,11 +561,26 @@ async function persistWebsiteInsights(params: { currentPeriodTo: params.period.current.to, previousPeriodFrom: params.period.previous.from, previousPeriodTo: params.period.previous.to, - })); + }; + } + + const insightsWithKeys = finalInsights.map((insight) => { + const key = dedupeKeyFor(insight); + const existingId = dedupeKeyToId.get(key); + const isRefresh = existingId !== undefined && insight.id === existingId; + return { insight, key, isRefresh }; + }); + + const toInsert = insightsWithKeys + .filter((i) => !i.isRefresh) + .map(({ insight, key }) => insightRow(insight, key)); const toRefresh = insightsWithKeys .filter((i) => i.isRefresh) - .map((i) => i.insight); + .map(({ insight, key }) => ({ + id: insight.id, + row: insightRow(insight, key), + })); if (toInsert.length > 0) { await db @@ -802,38 +612,14 @@ async function persistWebsiteInsights(params: { rootCause: sql.raw("excluded.root_cause"), evidence: sql.raw("excluded.evidence"), investigationDepth: sql.raw("excluded.investigation_depth"), + actions: sql.raw("excluded.actions"), metrics: sql.raw("excluded.metrics"), }, }); } await Promise.all( - toRefresh.map((insight) => - db - .update(analyticsInsights) - .set({ - ...updatePayload, - title: insight.title, - description: insight.description, - suggestion: insight.suggestion, - severity: insight.severity, - sentiment: insight.sentiment, - type: insight.type, - priority: insight.priority, - changePercent: insight.changePercent ?? null, - dedupeKey: dedupeKeyFor(insight), - subjectKey: insight.subjectKey, - sources: insight.sources, - confidence: insight.confidence, - impactSummary: insight.impactSummary ?? null, - rootCause: insight.rootCause ?? null, - evidence: insight.evidence ?? null, - investigationDepth: insight.investigationDepth ?? null, - metrics: - insight.metrics.length > 0 - ? (insight.metrics as InsightMetricRow[]) - : null, - }) - .where(eq(analyticsInsights.id, insight.id)) + toRefresh.map(({ id, row }) => + db.update(analyticsInsights).set(row).where(eq(analyticsInsights.id, id)) ) ); @@ -968,7 +754,7 @@ export async function generateWebsiteInsights( }); const period = getComparisonPeriod(input.config.lookbackDays); - const userId = input.requestedByUserId ?? "insights-worker"; + const userId = input.requestedByUserId ?? undefined; const ghIntegration = site.integrations?.github as | { owner: string; repo: string } | undefined; diff --git a/apps/insights/src/prompts.ts b/apps/insights/src/prompts.ts new file mode 100644 index 000000000..ceb7c309c --- /dev/null +++ b/apps/insights/src/prompts.ts @@ -0,0 +1,289 @@ +import type { WeekOverWeekPeriod } from "@databuddy/ai/insights/types"; +import { and, db, desc, eq, gte, isNull } from "@databuddy/db"; +import { + analyticsInsights, + annotations, + type InsightGenerationConfigSnapshot, + insightUserFeedback, +} from "@databuddy/db/schema"; +import dayjs from "dayjs"; +import type { EnrichedSignal } from "./enrichment"; + +const RECENT_INSIGHTS_PROMPT_LIMIT = 12; + +export function promptLookbackDays( + config: InsightGenerationConfigSnapshot +): number { + return Math.max(14, Math.min(180, config.lookbackDays * 2)); +} + +export async function fetchRecentAnnotations( + websiteId: string, + config: InsightGenerationConfigSnapshot +): Promise { + const since = dayjs().subtract(promptLookbackDays(config), "day").toDate(); + const rows = await db + .select({ + text: annotations.text, + xValue: annotations.xValue, + tags: annotations.tags, + }) + .from(annotations) + .where( + and( + eq(annotations.websiteId, websiteId), + gte(annotations.xValue, since), + isNull(annotations.deletedAt) + ) + ) + .orderBy(annotations.xValue) + .limit(20); + + if (rows.length === 0) { + return ""; + } + + const lines = rows.map((row) => { + const date = dayjs(row.xValue).format("YYYY-MM-DD"); + const tags = row.tags?.length ? ` [${row.tags.join(", ")}]` : ""; + return `- ${date}: ${row.text}${tags}`; + }); + + return `\n\nUser annotations (known events that may explain changes):\n${lines.join("\n")}`; +} + +export async function fetchDismissedPatterns( + organizationId: string, + websiteId: string +): Promise { + const since = dayjs().subtract(30, "day").toDate(); + const rows = await db + .select({ + title: analyticsInsights.title, + type: analyticsInsights.type, + }) + .from(insightUserFeedback) + .innerJoin( + analyticsInsights, + eq(insightUserFeedback.insightId, analyticsInsights.id) + ) + .where( + and( + eq(insightUserFeedback.organizationId, organizationId), + eq(analyticsInsights.websiteId, websiteId), + eq(insightUserFeedback.vote, "down"), + gte(insightUserFeedback.createdAt, since) + ) + ) + .orderBy(desc(insightUserFeedback.createdAt)) + .limit(10); + + if (rows.length === 0) { + return ""; + } + + const lines = rows.map((r) => `- [${r.type}] ${r.title}`); + return `\n\nInsights users marked as NOT helpful (avoid similar narratives):\n${lines.join("\n")}`; +} + +export async function fetchRecentInsightsForPrompt( + organizationId: string, + websiteId: string, + config: InsightGenerationConfigSnapshot +): Promise { + const since = dayjs().subtract(promptLookbackDays(config), "day").toDate(); + const rows = await db + .select({ + title: analyticsInsights.title, + type: analyticsInsights.type, + createdAt: analyticsInsights.createdAt, + }) + .from(analyticsInsights) + .where( + and( + eq(analyticsInsights.organizationId, organizationId), + eq(analyticsInsights.websiteId, websiteId), + gte(analyticsInsights.createdAt, since) + ) + ) + .orderBy(desc(analyticsInsights.createdAt)) + .limit(RECENT_INSIGHTS_PROMPT_LIMIT); + + if (rows.length === 0) { + return ""; + } + + const lines = rows.map( + (row) => + `- [${row.type}] ${row.title} (${dayjs(row.createdAt).format("YYYY-MM-DD")})` + ); + + return `\n\nRecently reported (avoid repeating unless materially changed):\n${lines.join("\n")}`; +} + +export interface OrgWebsiteRow { + domain: string; + id: string; + name: string | null; +} + +export function formatOrgWebsitesContext( + orgSites: OrgWebsiteRow[], + currentWebsiteId: string +): string { + if (orgSites.length <= 1) { + return ""; + } + + const sorted = [...orgSites].sort((a, b) => + a.domain.localeCompare(b.domain, "en") + ); + const lines = sorted.map((site) => { + const label = site.name?.trim() ? site.name.trim() : site.domain; + const marker = + site.id === currentWebsiteId + ? " - metrics below are for this site only" + : ""; + return `- ${label} (${site.domain})${marker}`; + }); + return `## Organization websites\n${lines.join("\n")}\n\n`; +} + +export function buildSystemPrompt( + config: InsightGenerationConfigSnapshot, + options?: { investigationMode?: boolean } +): string { + const targetCount = Math.max( + 1, + Math.min(10, config.maxInsightsPerWebsite ?? 2) + ); + const depthInstruction = + config.depth === "light" + ? "Use the smallest useful tool set. Prefer 1-2 high-confidence insights." + : config.depth === "deep" + ? "Actively cross-check web, product, ops, and business context." + : "Explore enough context for concise, distinct, high-confidence insights."; + + return `You are an analytics investigator. Return up to ${targetCount} insights ranked by business impact. ${depthInstruction} + +RULES: +- Titles: outcome-first, plain language, ≤80 chars. No hedging, no jargon (INP, LCP, TTFB, CLS, p75). +- Title direction MUST match the primary metric. Mismatches are rejected. +- Only report signals that change what someone does today. Silence > noise. +- Suggestions: name the exact page, button, or query. Never say "monitor" or "watch". +- ZERO REPETITION: title = what. description = so what (≤300 chars). rootCause = why. evidence = new facts only. suggestion = one action (≤300 chars). +- Metrics: only verified numbers. Label segment-specific values clearly. +- Low traffic (<50 sessions/week): no percentage claims on <10 absolute values. +- Tools: batch queries in web_metrics (up to 8). search_console for keywords. summary_metrics for headline numbers. +- Confidence > 0.7 requires segment isolation or temporal correlation. +- Actions: include when fixable (fix_goal, add_custom_event, create_annotation, create_funnel, add_tracking, investigate_further, code_fix). +- code_fix: when you find a bug with a clear fix, emit a code_fix action with params {prompt, file_hint, error_message}. The prompt should be paste-ready for Cursor or Claude Code — include the exact file to change, what to change, and why. +- You have mutation tools: call create_annotation directly to mark deploys or incidents on the timeline. Call update_goal to fix goal target mismatches. Use confirmed=true to execute.${ + options?.investigationMode + ? "\n- Investigate detected signals using tools. Call emit_insight for each finding. Drop noise." + : "" + }`; +} + +export function formatSignalBlock( + signal: EnrichedSignal, + index: number +): string { + const dir = signal.direction === "up" ? "+" : "-"; + const scope = + signal.method === "zscore" + ? `z=${signal.zScore}, latest day vs baseline` + : "WoW"; + const parts = [ + `${index + 1}. ${signal.label} ${dir}${Math.abs(signal.deltaPercent).toFixed(0)}% (${scope}, ${signal.severity}) — ${signal.current.toLocaleString()} vs ${signal.baseline.toLocaleString()}`, + ]; + + for (const seg of signal.segments) { + parts.push( + ` ${seg.dimension}: ${seg.topMovers.map((m) => `${m.name} ${m.deltaPercent > 0 ? "+" : ""}${m.deltaPercent}%`).join(", ")}` + ); + } + + if (signal.errorContext) { + const ec = signal.errorContext; + parts.push( + ` errors: ${ec.totalErrorsPrevious}->${ec.totalErrorsCurrent} (${ec.deltaPercent > 0 ? "+" : ""}${ec.deltaPercent}%)` + ); + if (ec.topNewErrors.length > 0) { + parts.push(` new: ${ec.topNewErrors.join(", ")}`); + } + } + + if (signal.vitalsContext) { + const vitals = signal.vitalsContext.metrics + .map( + (m) => + `${m.name} p75: ${m.previousP75}→${m.currentP75} (${m.deltaPercent > 0 ? "+" : ""}${m.deltaPercent}%)` + ) + .join(", "); + parts.push(` vitals: ${vitals}`); + } + + for (const a of signal.annotations) { + parts.push(` [${a.date}] ${a.title}`); + } + + if (signal.githubContext) { + const gc = signal.githubContext; + for (const c of gc.commits.slice(0, 3)) { + parts.push(` ${c.sha} ${c.message} (${c.date?.slice(0, 10)})`); + } + for (const pr of gc.recentPRs.slice(0, 3)) { + parts.push( + ` PR#${pr.number} ${pr.title} (${pr.mergedAt?.slice(0, 10)})` + ); + } + } + + return parts.join("\n"); +} + +export function buildInvestigationPrompt( + enrichedSignals: EnrichedSignal[], + params: { + annotationContext: string; + dismissedBlock: string; + domain: string; + githubRepo?: { owner: string; repo: string }; + orgContext: string; + period: WeekOverWeekPeriod; + recentInsightsBlock: string; + siteContext: string; + timezone: string; + } +): string { + const { domain, period, timezone } = params; + const signalBlocks = enrichedSignals + .map((signal, i) => formatSignalBlock(signal, i)) + .join("\n\n"); + + const githubInstruction = params.githubRepo + ? `2. github_commits for ${params.githubRepo.owner}/${params.githubRepo.repo} with dates matching the anomaly window.` + : "2. If GitHub tools are available, check commits matching the anomaly window."; + + return `Investigating ${enrichedSignals.length} anomalies on ${domain}. +Period: ${period.current.from} to ${period.current.to} vs ${period.previous.from} to ${period.previous.to} (${timezone}) +${params.siteContext} + +SIGNALS: + +${signalBlocks} + +STRATEGY: +1. web_metrics period="both" to confirm signals and get segment breakdowns. Batch queries. +${githubInstruction} +3. search_console for keyword/page changes between periods. +4. For errors: get messages and affected pages. Scrape the page if needed. +5. For conversion/funnel changes: check product_metrics for funnel/goal data. +6. For user behavior: use interesting_sessions or session_list to find specific sessions that dropped off, then session_events to see what they did. +7. When you find something fixable, execute it: call create_annotation to mark deploys, update_goal to fix targets. Use confirmed=true. +8. Emit findings via emit_insight as you go. + +summary_metrics is the canonical source for headline numbers. +${params.orgContext}${params.annotationContext}${params.recentInsightsBlock}${params.dismissedBlock}`; +} diff --git a/apps/slack/src/slack/respond.test.ts b/apps/slack/src/slack/respond.test.ts index 90d6dea10..61795c9e2 100644 --- a/apps/slack/src/slack/respond.test.ts +++ b/apps/slack/src/slack/respond.test.ts @@ -5,7 +5,7 @@ import { SLACK_COPY } from "@/slack/messages"; import { streamAgentToSlack } from "@/slack/respond"; import type { SlackAgentClient } from "@/slack/types"; -function createStreamClient(startTs = "stream_ts") { +function createStreamClient(startTs: string | null = "stream_ts") { const calls: Array<{ method: string; options: unknown }> = []; const client: Pick = { chat: { @@ -15,6 +15,7 @@ function createStreamClient(startTs = "stream_ts") { }, startStream: async (options) => { calls.push({ method: "chat.startStream", options }); + if (startTs === null) return { ok: false, error: "not_allowed" }; return { ok: true, ts: startTs }; }, stopStream: async (options) => { @@ -23,14 +24,25 @@ function createStreamClient(startTs = "stream_ts") { }, }, }; + return { calls, client }; +} + +const silentLogger = { error: () => {}, warn: () => {} }; + +function baseRun() { return { - calls, - client, + channelId: "C123", + messageTs: "171234.567", + teamId: "T123", + text: "What changed?", + threadTs: "171234.567", + trigger: "app_mention" as const, + userId: "U123", }; } describe("Databuddy Slack response streaming", () => { - it("starts streams with answer text, not a loading placeholder", async () => { + it("shows a thinking indicator then streams the answer", async () => { const originalDateNow = Date.now; let now = 0; const { calls, client } = createStreamClient(); @@ -47,19 +59,8 @@ describe("Databuddy Slack response streaming", () => { result = await streamAgentToSlack({ agent, client, - logger: { - error: () => {}, - warn: () => {}, - }, - run: { - channelId: "C123", - messageTs: "171234.567", - teamId: "T123", - text: "What changed?", - threadTs: "171234.567", - trigger: "app_mention", - userId: "U123", - }, + logger: silentLogger, + run: baseRun(), say: async () => {}, }); } finally { @@ -71,17 +72,36 @@ describe("Databuddy Slack response streaming", () => { responseTs: "stream_ts", streamed: true, }); + expect(calls[0]).toEqual({ method: "chat.startStream", + options: expect.objectContaining({ + chunks: [ + expect.objectContaining({ + type: "task_update", + status: "in_progress", + }), + ], + task_display_mode: "plan", + }), + }); + + expect(calls[1]).toEqual({ + method: "chat.appendStream", options: expect.objectContaining({ markdown_text: "Traffic is up 12%.", + chunks: [ + expect.objectContaining({ + type: "task_update", + status: "complete", + }), + ], }), }); - expect(calls[0]?.options).not.toEqual( - expect.objectContaining({ markdown_text: SLACK_COPY.streamOpening }) - ); - expect(calls.map((call) => call.method)).toEqual([ + + expect(calls.map((c) => c.method)).toEqual([ "chat.startStream", + "chat.appendStream", "chat.stopStream", ]); }); @@ -98,34 +118,20 @@ describe("Databuddy Slack response streaming", () => { const result = await streamAgentToSlack({ agent, client, - logger: { - error: () => {}, - warn: () => {}, - }, - run: { - channelId: "C123", - messageTs: "171234.567", - teamId: "T123", - text: "say something nice", - threadTs: "171234.567", - trigger: "app_mention", - userId: "U123", - }, + logger: silentLogger, + run: baseRun(), say: async () => {}, }); expect(result).toMatchObject({ ok: false, streamed: true }); - expect(calls.map((call) => call.method)).toEqual([ - "chat.startStream", - "chat.stopStream", - ]); - expect(calls.at(-1)?.options).not.toHaveProperty("markdown_text"); + + const stopCall = calls.find((c) => c.method === "chat.stopStream"); + expect(stopCall?.options).not.toHaveProperty("markdown_text"); expect(JSON.stringify(calls)).not.toContain(SLACK_COPY.agentFailure); }); - it("surfaces user-facing agent errors instead of the generic failure copy", async () => { + it("surfaces user-facing agent errors in the stream", async () => { const { calls, client } = createStreamClient(); - const sayCalls: Array<{ text: string; thread_ts?: string }> = []; const agent: Pick = { async *stream() { throw new DatabuddyAgentUserError({ @@ -139,19 +145,47 @@ describe("Databuddy Slack response streaming", () => { const result = await streamAgentToSlack({ agent, client, - logger: { - error: () => {}, - warn: () => {}, - }, - run: { - channelId: "C123", - messageTs: "171234.567", - teamId: "T123", - text: "top pages", - threadTs: "171234.567", - trigger: "app_mention", - userId: "U123", + logger: silentLogger, + run: baseRun(), + say: async () => {}, + }); + + expect(result).toMatchObject({ + ok: false, + responseTs: "stream_ts", + streamed: true, + }); + + const thinkingResolve = calls.find( + (c) => + c.method === "chat.appendStream" && + JSON.stringify(c.options).includes('"error"'), + ); + expect(thinkingResolve).toBeDefined(); + + const stopCall = calls.find((c) => c.method === "chat.stopStream"); + expect(getStringOption(stopCall?.options, "markdown_text")).toBe( + "You're out of Databunny credits this month. Upgrade or wait for the monthly reset.", + ); + }); + + it("falls back to say when streaming is unavailable", async () => { + const { client } = createStreamClient(null); + const sayCalls: Array<{ text: string; thread_ts?: string }> = []; + const agent: Pick = { + async *stream() { + throw new DatabuddyAgentUserError({ + code: "agent_credits_exhausted", + message: "No credits left.", + }); }, + }; + + const result = await streamAgentToSlack({ + agent, + client, + logger: silentLogger, + run: baseRun(), say: async (message) => { sayCalls.push(message); return { ok: true, ts: "say_ts" }; @@ -163,14 +197,7 @@ describe("Databuddy Slack response streaming", () => { responseTs: "say_ts", streamed: false, }); - expect(calls).toEqual([]); - expect(sayCalls).toEqual([ - { - text: "You're out of Databunny credits this month. Upgrade or wait for the monthly reset.", - thread_ts: "171234.567", - }, - ]); - expect(sayCalls[0]?.text).not.toBe(SLACK_COPY.agentFailure); + expect(sayCalls[0]?.text).toBe("No credits left."); }); it("does not start a new Slack response when the run is already aborted", async () => { @@ -193,19 +220,8 @@ describe("Databuddy Slack response streaming", () => { abortSignal: controller.signal, agent, client, - logger: { - error: () => {}, - warn: () => {}, - }, - run: { - channelId: "C123", - messageTs: "171234.567", - teamId: "T123", - text: "say something nice", - threadTs: "171234.567", - trigger: "app_mention", - userId: "U123", - }, + logger: silentLogger, + run: baseRun(), say: async (message) => { sayCalls.push(message); }, @@ -233,19 +249,8 @@ describe("Databuddy Slack response streaming", () => { await streamAgentToSlack({ agent, client, - logger: { - error: () => {}, - warn: () => {}, - }, - run: { - channelId: "C123", - messageTs: "171234.567", - teamId: "T123", - text: "top pages", - threadTs: "171234.567", - trigger: "app_mention", - userId: "U123", - }, + logger: silentLogger, + run: baseRun(), say: async () => {}, }); diff --git a/apps/slack/src/slack/respond.ts b/apps/slack/src/slack/respond.ts index 319c3c3a6..d02700683 100644 --- a/apps/slack/src/slack/respond.ts +++ b/apps/slack/src/slack/respond.ts @@ -9,6 +9,7 @@ import type { SlackAgentClient } from "@/slack/types"; const STREAM_FLUSH_INTERVAL_MS = 900; const STREAM_FLUSH_CHARS = 1200; const STREAM_APPEND_LIMIT_CHARS = 3500; +const THINKING_TASK_ID = "thinking"; const SLACK_USER_CANCELLED_CODES = new Set([ "message_not_found", @@ -60,88 +61,67 @@ export async function streamAgentToSlack({ run, say, }: StreamAgentToSlackOptions): Promise { - let streamTs: string | null = null; - let streamStartAttempted = false; + if (abortSignal?.aborted) { + return { + aborted: true, + answerChars: 0, + chunks: 0, + ok: false, + streamed: false, + }; + } + + const startedAt = performance.now(); + + const streamTs = run.threadTs + ? await startThinkingStream(client, run, logger, run.threadTs) + : null; + setSlackLog(eventLog, { slack_stream_started: Boolean(streamTs) }); + let pending = ""; let fullText = ""; let safeMarkdown = ""; - let chunks = 0; - let convertedComponentCount = 0; - let droppedComponentCount = 0; + let chunkCount = 0; + let convertedComponents = 0; + let droppedComponents = 0; let lastFlushAt = Date.now(); - const startedAt = performance.now(); - - const streamThreadTs = run.threadTs; - const shouldStream = Boolean(streamThreadTs); + let thinkingResolved = false; const flush = async (force = false) => { - if (!pending) { + if (!(pending && streamTs)) { return; } - - if (shouldStream && !streamTs && !streamStartAttempted) { - await tryStartStream(); - } - - if (!streamTs) { - return; - } - if (!pending) { - return; - } - - const shouldFlush = - force || - pending.length >= STREAM_FLUSH_CHARS || - Date.now() - lastFlushAt >= STREAM_FLUSH_INTERVAL_MS; - - if (!shouldFlush) { + if ( + !force && + pending.length < STREAM_FLUSH_CHARS && + Date.now() - lastFlushAt < STREAM_FLUSH_INTERVAL_MS + ) { return; } do { - const chunk = pending.slice(0, STREAM_APPEND_LIMIT_CHARS); - pending = pending.slice(chunk.length); + const text = pending.slice(0, STREAM_APPEND_LIMIT_CHARS); + pending = pending.slice(text.length); lastFlushAt = Date.now(); - if (streamTs && chunk.trim()) { + if (text.trim()) { await client.chat.appendStream({ channel: run.channelId, - markdown_text: chunk, + chunks: thinkingResolved + ? undefined + : [thinkingTaskChunk("complete")], + markdown_text: text, ts: streamTs, }); + thinkingResolved = true; } } while (force && pending); }; - const tryStartStream = async () => { - const initialText = pending.slice(0, STREAM_APPEND_LIMIT_CHARS); - if (!initialText.trim()) { - return; - } - if (!streamThreadTs) { - return; - } - - streamStartAttempted = true; - pending = pending.slice(initialText.length); - streamTs = await startSlackStream( - client, - run, - logger, - initialText, - streamThreadTs - ); - if (!streamTs) { - pending = initialText + pending; - } - setSlackLog(eventLog, { slack_stream_started: Boolean(streamTs) }); - }; - - const appendSafeSlackMarkdown = (streaming: boolean) => { + const renderIncremental = (streaming: boolean) => { const rendered = renderAgentOutputForSlack(fullText, { streaming }); - convertedComponentCount = rendered.convertedComponents; - droppedComponentCount = rendered.droppedComponents; + convertedComponents = rendered.convertedComponents; + droppedComponents = rendered.droppedComponents; if (rendered.markdown.startsWith(safeMarkdown)) { pending += rendered.markdown.slice(safeMarkdown.length); safeMarkdown = rendered.markdown; @@ -150,53 +130,57 @@ export async function streamAgentToSlack({ try { for await (const chunk of agent.stream(run, { abortSignal })) { - chunks++; + chunkCount++; fullText += chunk; - appendSafeSlackMarkdown(true); + renderIncremental(true); await flush(false); } - appendSafeSlackMarkdown(false); + renderIncremental(false); await flush(true); const finalText = safeMarkdown.trim(); - return streamTs - ? finishStreamedResponse({ - client, - convertedComponentCount, - droppedComponentCount, - eventLog, - finalText, - run, - chunks, - startedAt, - streamTs, - }) - : sendFinalSlackMessage({ - convertedComponentCount, - droppedComponentCount, - eventLog, - finalText, - run, - say, - chunks, - startedAt, - }); + if (streamTs) { + if (!thinkingResolved) { + await resolveThinking(client, run.channelId, streamTs, "complete"); + } + return finishStreamedResponse({ + client, + convertedComponents, + droppedComponents, + eventLog, + finalText, + run, + chunkCount, + startedAt, + streamTs, + }); + } + return sendFinalMessage({ + convertedComponents, + droppedComponents, + eventLog, + finalText, + run, + say, + chunkCount, + startedAt, + }); } catch (error) { + if (streamTs && !thinkingResolved) { + const status = + abortSignal?.aborted || + isAbortError(error) || + isSlackUserCancellation(error) + ? "complete" + : "error"; + await resolveThinking(client, run.channelId, streamTs, status); + } + if (abortSignal?.aborted || isAbortError(error)) { if (streamTs) { - await flush(true).catch((flushError) => - logger.warn("Failed to flush partial Slack stream", flushError) - ); - await client.chat - .stopStream({ - channel: run.channelId, - ts: streamTs, - }) - .catch((stopError) => - logger.warn("Failed to stop aborted Slack stream", stopError) - ); + await flushAndStop(client, run.channelId, streamTs, pending, logger); } - return abortedStreamResult(safeMarkdown, chunks, streamTs); + return abortedResult(safeMarkdown, chunkCount, streamTs); } if (isSlackUserCancellation(error)) { @@ -204,150 +188,126 @@ export async function streamAgentToSlack({ slack_stream_cancelled: true, slack_stream_cancelled_code: getSlackApiErrorCode(error), }); - return abortedStreamResult(safeMarkdown, chunks, streamTs); + return abortedResult(safeMarkdown, chunkCount, streamTs); } - const userFacingError = isDatabuddyAgentUserError(error) ? error : null; - const err = toError(error); - const slackApiCode = getSlackApiErrorCode(error); - setSlackLog(eventLog, { - slack_agent_error_code: userFacingError?.code, - slack_agent_error_message: err.message, - slack_agent_error_name: err.name, - slack_agent_error_user_facing: Boolean(userFacingError), - slack_api_error_code: slackApiCode, - }); - if (userFacingError) { - logger.warn("Slack agent returned a user-facing error", err); - eventLog?.warn(err.message, { - agent_error_code: userFacingError.code, - error_step: "agent_response", - }); - } else if (slackApiCode) { - logger.warn("Slack API rejected stream payload", err); - eventLog?.warn(err.message, { - error_step: "slack_api", - slack_api_error_code: slackApiCode, - }); - } else { - logger.error("Slack agent response failed", err); - eventLog?.error(err, { error_step: "agent_response" }); - } - appendSafeSlackMarkdown(false); + logStreamError(error, eventLog, logger); + renderIncremental(false); + const partialText = safeMarkdown.trim(); - const failureText = userFacingError?.message ?? SLACK_COPY.agentFailure; - if (partialText) { - await flush(true).catch((flushError) => - logger.warn("Failed to flush partial Slack stream", flushError) - ); - if (streamTs) { - await client.chat - .stopStream({ - channel: run.channelId, - ts: streamTs, - }) - .catch((stopError) => - logger.warn("Failed to stop Slack stream", stopError) - ); - return { - answerChars: partialText.length, - chunks, - ok: false, - responseTs: streamTs, - streamed: true, - }; - } - const response = await say({ - text: partialText, - thread_ts: run.threadTs, - }); - return { - answerChars: partialText.length, - chunks, - ok: false, - responseTs: getSlackMessageTs(response), - streamed: false, - }; - } - if (streamTs) { - await client.chat - .stopStream({ - channel: run.channelId, - markdown_text: failureText, - ts: streamTs, - }) - .catch((stopError) => - logger.warn("Failed to stop Slack stream", stopError) - ); - return { - answerChars: 0, - chunks, - ok: false, - responseTs: streamTs, - streamed: true, - }; - } - const response = await say({ - text: failureText, - thread_ts: run.threadTs, + const failureText = isDatabuddyAgentUserError(error) + ? error.message + : SLACK_COPY.agentFailure; + + return recoverFromError({ + client, + chunkCount, + failureText, + logger, + partialText, + pending, + run, + say, + streamTs, }); - return { - answerChars: 0, - chunks, - ok: false, - responseTs: getSlackMessageTs(response), - streamed: false, - }; } } -function abortedStreamResult( - safeMarkdown: string, - chunks: number, - streamTs: string | null -): StreamAgentToSlackResult { +function thinkingTaskChunk(status: "complete" | "error" | "in_progress") { return { - answerChars: safeMarkdown.trim().length, - aborted: true, - chunks, - ok: false, - responseTs: streamTs ?? undefined, - streamed: Boolean(streamTs), + id: THINKING_TASK_ID, + status, + title: SLACK_COPY.streamOpening, + type: "task_update" as const, }; } -interface SlackSuccessLogOptions { - chunks: number; - convertedComponentCount: number; - droppedComponentCount: number; +async function startThinkingStream( + client: Pick, + run: SlackAgentRun, + logger: LoggerLike, + threadTs: string +): Promise { + try { + const result = await client.chat.startStream({ + channel: run.channelId, + chunks: [thinkingTaskChunk("in_progress")], + recipient_team_id: run.teamId, + recipient_user_id: run.userId, + task_display_mode: "plan", + thread_ts: threadTs, + }); + + if ( + isRecord(result) && + result.ok === true && + typeof result.ts === "string" + ) { + return result.ts; + } + + logger.warn( + "Slack streaming unavailable", + isRecord(result) && typeof result.error === "string" + ? result.error + : undefined + ); + return null; + } catch (error) { + logger.warn("Slack streaming failed to start", error); + return null; + } +} + +async function resolveThinking( + client: Pick, + channelId: string, + streamTs: string, + status: "complete" | "error" +): Promise { + try { + await client.chat.appendStream({ + channel: channelId, + chunks: [thinkingTaskChunk(status)], + ts: streamTs, + }); + } catch { + // Non-critical — thinking card stays unresolved + } +} + +interface SuccessLogOptions { + chunkCount: number; + convertedComponents: number; + droppedComponents: number; eventLog?: RequestLogger; finalText: string; startedAt: number; } -function logSlackSuccess( +function logSuccess( { - chunks, - convertedComponentCount, - droppedComponentCount, + chunkCount, + convertedComponents, + droppedComponents, eventLog, finalText, startedAt, - }: SlackSuccessLogOptions, + }: SuccessLogOptions, extra: Record ) { setSlackLog(eventLog, { slack_answer_chars: finalText.length, - slack_components_converted: convertedComponentCount, - slack_components_dropped: droppedComponentCount, - slack_stream_chunks: chunks, + slack_components_converted: convertedComponents, + slack_components_dropped: droppedComponents, + slack_stream_chunks: chunkCount, "timing.slack_agent_response_ms": Math.round(performance.now() - startedAt), ...extra, }); } async function finishStreamedResponse( - options: SlackSuccessLogOptions & { + options: SuccessLogOptions & { client: Pick; run: SlackAgentRun; streamTs: string; @@ -358,85 +318,175 @@ async function finishStreamedResponse( markdown_text: options.finalText ? undefined : SLACK_COPY.noAnswer, ts: options.streamTs, }); - logSlackSuccess(options, { slack_streamed: true }); + logSuccess(options, { slack_streamed: true }); return { answerChars: options.finalText.length, - chunks: options.chunks, + chunks: options.chunkCount, ok: true, responseTs: options.streamTs, streamed: true, }; } -async function sendFinalSlackMessage( - options: SlackSuccessLogOptions & { run: SlackAgentRun; say: SayFn } +async function sendFinalMessage( + options: SuccessLogOptions & { run: SlackAgentRun; say: SayFn } ): Promise { const response = await options.say({ text: options.finalText || SLACK_COPY.noAnswer, thread_ts: options.run.threadTs, }); - const responseTs = getSlackMessageTs(response); - logSlackSuccess(options, { + const responseTs = getMessageTs(response); + logSuccess(options, { slack_response_ts: responseTs, slack_streamed: false, }); return { answerChars: options.finalText.length, - chunks: options.chunks, + chunks: options.chunkCount, ok: true, responseTs, streamed: false, }; } -async function startSlackStream( +async function flushAndStop( client: Pick, - run: SlackAgentRun, + channelId: string, + streamTs: string, + pending: string, logger: LoggerLike, - openingText: string, - threadTs: string -): Promise { - try { - const result = await client.chat.startStream({ - channel: run.channelId, - markdown_text: openingText, - recipient_team_id: run.teamId, - recipient_user_id: run.userId, - thread_ts: threadTs, - task_display_mode: "plan", - }); - - if ( - isRecord(result) && - result.ok === true && - typeof result.ts === "string" - ) { - return result.ts; - } + stopText?: string +): Promise { + if (pending.trim()) { + await client.chat + .appendStream({ + channel: channelId, + markdown_text: pending.slice(0, STREAM_APPEND_LIMIT_CHARS), + ts: streamTs, + }) + .catch((e) => logger.warn("Failed to flush partial Slack stream", e)); + } + await client.chat + .stopStream({ + channel: channelId, + ts: streamTs, + ...(stopText ? { markdown_text: stopText } : {}), + }) + .catch((e) => logger.warn("Failed to stop Slack stream", e)); +} - logger.warn( - "Slack streaming unavailable", - isRecord(result) && typeof result.error === "string" - ? result.error - : undefined +async function recoverFromError({ + client, + chunkCount, + failureText, + logger, + partialText, + pending, + run, + say, + streamTs, +}: { + client: Pick; + chunkCount: number; + failureText: string; + logger: LoggerLike; + partialText: string; + pending: string; + run: SlackAgentRun; + say: SayFn; + streamTs: string | null; +}): Promise { + if (streamTs) { + await flushAndStop( + client, + run.channelId, + streamTs, + pending, + logger, + partialText ? undefined : failureText ); - return null; - } catch (error) { - logger.warn("Slack streaming failed to start", error); - return null; + return { + answerChars: partialText.length, + chunks: chunkCount, + ok: false, + responseTs: streamTs, + streamed: true, + }; } + + const response = await say({ + text: partialText || failureText, + thread_ts: run.threadTs, + }); + return { + answerChars: partialText.length, + chunks: chunkCount, + ok: false, + responseTs: getMessageTs(response), + streamed: false, + }; } -function isRecord(value: unknown): value is Record { - return Boolean(value && typeof value === "object" && !Array.isArray(value)); +function logStreamError( + error: unknown, + eventLog: RequestLogger | undefined, + logger: LoggerLike +): void { + const userFacingError = isDatabuddyAgentUserError(error) ? error : null; + const err = toError(error); + const slackApiCode = getSlackApiErrorCode(error); + + setSlackLog(eventLog, { + slack_agent_error_code: userFacingError?.code, + slack_agent_error_message: err.message, + slack_agent_error_name: err.name, + slack_agent_error_user_facing: Boolean(userFacingError), + slack_api_error_code: slackApiCode, + }); + + if (userFacingError) { + logger.warn("Slack agent returned a user-facing error", err); + eventLog?.warn(err.message, { + agent_error_code: userFacingError.code, + error_step: "agent_response", + }); + } else if (slackApiCode) { + logger.warn("Slack API rejected stream payload", err); + eventLog?.warn(err.message, { + error_step: "slack_api", + slack_api_error_code: slackApiCode, + }); + } else { + logger.error("Slack agent response failed", err); + eventLog?.error(err, { error_step: "agent_response" }); + } +} + +function abortedResult( + safeMarkdown: string, + chunkCount: number, + streamTs: string | null +): StreamAgentToSlackResult { + return { + aborted: true, + answerChars: safeMarkdown.trim().length, + chunks: chunkCount, + ok: false, + responseTs: streamTs ?? undefined, + streamed: Boolean(streamTs), + }; } -function getSlackMessageTs(response: unknown): string | undefined { +function getMessageTs(response: unknown): string | undefined { return isRecord(response) && typeof response.ts === "string" ? response.ts : undefined; } +function isRecord(value: unknown): value is Record { + return Boolean(value && typeof value === "object" && !Array.isArray(value)); +} + function isAbortError(error: unknown): boolean { return ( (error instanceof DOMException && error.name === "AbortError") || diff --git a/bun.lock b/bun.lock index f23381797..eb20a89d5 100644 --- a/bun.lock +++ b/bun.lock @@ -64,7 +64,7 @@ "@orpc/openapi": "^1.14.0", "@orpc/server": "^1.14.0", "@orpc/zod": "^1.14.0", - "ai": "^6.0.154", + "ai": "^6.0.188", "autumn-js": "catalog:", "bullmq": "^5.66.5", "dayjs": "^1.11.19", @@ -162,7 +162,7 @@ "@types/leaflet": "^1.9.21", "@types/react-grid-layout": "^2.1.0", "@xyflow/react": "^12.10.1", - "ai": "^6.0.116", + "ai": "^6.0.188", "atmn": "^1.1.8", "autumn-js": "catalog:", "babel-plugin-react-compiler": "^19.1.0-rc.1-rc-af1b7da-20250421", @@ -338,7 +338,7 @@ "@databuddy/env": "workspace:*", "@databuddy/redis": "workspace:*", "@databuddy/rpc": "workspace:*", - "ai": "^6.0.154", + "ai": "^6.0.188", "bullmq": "^5.66.5", "dayjs": "^1.11.19", "elysia": "catalog:", @@ -441,7 +441,7 @@ "@modelcontextprotocol/sdk": "^1.26.0", "@orpc/server": "^1.14.0", "@tokenlens/models": "catalog:", - "ai": "^6.0.154", + "ai": "^6.0.188", "autumn-js": "catalog:", "dayjs": "^1.11.19", "drizzle-orm": "catalog:", @@ -582,7 +582,7 @@ "dependencies": { "@databuddy/ai": "workspace:*", "@databuddy/env": "workspace:*", - "ai": "^6.0.78", + "ai": "^6.0.188", }, }, "packages/mapper": { @@ -622,7 +622,7 @@ }, "packages/nuxt": { "name": "@databuddy/nuxt", - "version": "1.0.0", + "version": "2.4.20", "dependencies": { "@databuddy/sdk": "^2.4.20", "@nuxt/kit": "^4.4.5", @@ -852,13 +852,13 @@ "zod": "4.1.12", }, "packages": { - "@ai-sdk/gateway": ["@ai-sdk/gateway@3.0.115", "", { "dependencies": { "@ai-sdk/provider": "3.0.10", "@ai-sdk/provider-utils": "4.0.27", "@vercel/oidc": "3.2.0" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-xonmGfN9pt54WdKqMzWe68BRYS3rsYvraBzioyA0gfNcecHs8Ir5qk/X8grJSyZ95hghjWiOphrK6bAc11E6SA=="], + "@ai-sdk/gateway": ["@ai-sdk/gateway@3.0.120", "", { "dependencies": { "@ai-sdk/provider": "3.0.10", "@ai-sdk/provider-utils": "4.0.27", "@vercel/oidc": "3.2.0" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-MYKAeD2q7/sa1ZdqtL2tw0Me0B8Tok6Q/fhkJDhJl39dG8u+VBlWO9yk9lcdm784bM418o1EKObo4aOxs6+18Q=="], "@ai-sdk/provider": ["@ai-sdk/provider@3.0.10", "", { "dependencies": { "json-schema": "^0.4.0" } }, "sha512-Q3BZ27qfpYqnCYGvE3vt+Qi6LGOF9R5Nmzn+9JoM1lCRsD9mYaIhfJLkSunN48nfGXJ6n+XNV0J/XVpqGQl7Dw=="], "@ai-sdk/provider-utils": ["@ai-sdk/provider-utils@4.0.27", "", { "dependencies": { "@ai-sdk/provider": "3.0.10", "@standard-schema/spec": "^1.1.0", "eventsource-parser": "^3.0.8" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-ubkAJ+xODouwtmN1tYlvTPphH1hPOBfZaEQe8U7skGvFAnIRs9PPpsq57bC2+Ky/MB4yzhd6YOsxTAx9sGpazw=="], - "@ai-sdk/react": ["@ai-sdk/react@3.0.186", "", { "dependencies": { "@ai-sdk/provider-utils": "4.0.27", "ai": "6.0.184", "swr": "^2.2.5", "throttleit": "2.1.0" }, "peerDependencies": { "react": "^18 || ~19.0.1 || ~19.1.2 || ^19.2.1" } }, "sha512-fy8wuy8pBghYD1ECw/M5vAsGsZp2D3y/oSTp1iOlAnJqRXzvz4rWLBz1n+rjL+aHZNgJK3kR3NHlnifoKYERfA=="], + "@ai-sdk/react": ["@ai-sdk/react@3.0.193", "", { "dependencies": { "@ai-sdk/provider-utils": "4.0.27", "ai": "6.0.191", "swr": "^2.2.5", "throttleit": "2.1.0" }, "peerDependencies": { "react": "^18 || ~19.0.1 || ~19.1.2 || ^19.2.1" } }, "sha512-El0jUZ/B7mvBHAD5rfSDqOAhWxutVTq7BCNhfGuwfDPT9SO0TMHybh2bMkieJQI7YOfl+qNBoWrRAOHHaFb99Q=="], "@alcalzone/ansi-tokenize": ["@alcalzone/ansi-tokenize@0.2.5", "", { "dependencies": { "ansi-styles": "^6.2.1", "is-fullwidth-code-point": "^5.0.0" } }, "sha512-3NX/MpTdroi0aKz134A6RC2Gb2iXVECN4QaAXnvCIxxIm3C3AVB1mkUe8NaaiyvOpDfsrqWhYtj+Q6a62RrTsw=="], @@ -2466,7 +2466,7 @@ "agent-base": ["agent-base@7.1.4", "", {}, "sha512-MnA+YT8fwfJPgBx3m60MNqakm30XOkyIoH1y6huTQvC0PwZG7ki8NacLBcrPbNoo8vEZy7Jpuk7+jMO+CUovTQ=="], - "ai": ["ai@6.0.184", "", { "dependencies": { "@ai-sdk/gateway": "3.0.115", "@ai-sdk/provider": "3.0.10", "@ai-sdk/provider-utils": "4.0.27", "@opentelemetry/api": "^1.9.0" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-j//zHkKvj5ra27l8izHco8cj1g1Pr7vx1ZK+hrzrkHvndgIRmdfZKOb6+RAPpvbk42qGIsuYvlYbGlVAu3erNQ=="], + "ai": ["ai@6.0.191", "", { "dependencies": { "@ai-sdk/gateway": "3.0.120", "@ai-sdk/provider": "3.0.10", "@ai-sdk/provider-utils": "4.0.27", "@opentelemetry/api": "^1.9.0" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-zAxvjKebQE7YkSyyNIl0OM7i6/zygnKeF+yNUjD4nWOelYrG+LpDd6RnH6mjySI4zUpZ7o4wbnmAy8jc6u98vQ=="], "ajv": ["ajv@8.20.0", "", { "dependencies": { "fast-deep-equal": "^3.1.3", "fast-uri": "^3.0.1", "json-schema-traverse": "^1.0.0", "require-from-string": "^2.0.2" } }, "sha512-Thbli+OlOj+iMPYFBVBfJ3OmCAnaSyNn4M1vz9T6Gka5Jt9ba/HIR56joy65tY6kx/FCF5VXNB819Y7/GUrBGA=="], @@ -4134,7 +4134,7 @@ "react-draggable": ["react-draggable@4.5.0", "", { "dependencies": { "clsx": "^2.1.1", "prop-types": "^15.8.1" }, "peerDependencies": { "react": ">= 16.3.0", "react-dom": ">= 16.3.0" } }, "sha512-VC+HBLEZ0XJxnOxVAZsdRi8rD04Iz3SiiKOoYzamjylUcju/hP9np/aZdLHf/7WOD268WMoNJMvYfB5yAK45cw=="], - "react-email": ["react-email@6.1.4", "", { "dependencies": { "@babel/parser": "7.27.0", "@babel/traverse": "7.27.0", "@react-email/render": ">=2.0.8", "chokidar": "^4.0.3", "commander": "^13.0.0", "conf": "^15.0.2", "css-tree": "3.2.1", "debounce": "^2.0.0", "esbuild": "^0.28.0", "glob": "^13.0.6", "jiti": "2.4.2", "log-symbols": "^7.0.0", "marked": "^15.0.12", "mime-types": "^3.0.0", "normalize-path": "^3.0.0", "nypm": "0.6.6", "picospinner": "^3.0.0", "prismjs": "^1.30.0", "prompts": "2.4.2", "socket.io": "^4.8.1", "tailwindcss": "^4.1.18", "tsconfig-paths": "4.2.0" }, "peerDependencies": { "react": "^18.0 || ^19.0 || ^19.0.0-rc", "react-dom": "^18.0 || ^19.0 || ^19.0.0-rc" }, "bin": { "email": "./dist/cli/index.mjs" } }, "sha512-UKCfry4W7zkAWoJX1ngaWgPrUazOebxI8IYrO8TBEqgFmmz97VqZ84ell2x36Fdvtzd/UI5e4ZOywlsXeydwgQ=="], + "react-email": ["react-email@6.3.2", "", { "dependencies": { "@babel/parser": "7.27.0", "@babel/traverse": "7.27.0", "@react-email/render": ">=2.0.8", "chokidar": "^4.0.3", "commander": "^13.0.0", "conf": "^15.0.2", "css-tree": "3.2.1", "debounce": "^2.0.0", "esbuild": "^0.28.0", "glob": "^13.0.6", "jiti": "2.4.2", "log-symbols": "^7.0.0", "marked": "^15.0.12", "mime-types": "^3.0.0", "normalize-path": "^3.0.0", "nypm": "0.6.6", "picospinner": "^3.0.0", "prismjs": "^1.30.0", "prompts": "2.4.2", "socket.io": "^4.8.1", "tailwindcss": "^4.1.18", "tsconfig-paths": "4.2.0" }, "peerDependencies": { "react": "^18.0 || ^19.0 || ^19.0.0-rc", "react-dom": "^18.0 || ^19.0 || ^19.0.0-rc" }, "bin": { "email": "./dist/cli/index.mjs" } }, "sha512-ZzmrwM+QLzfs/EZBnFZRMZwT3Kfvp46zIMCLsGn/rtRBh9ocRJDKHcnV0JWJyc0AVJTdPDHeFNBWap6N/3Dnhg=="], "react-fast-marquee": ["react-fast-marquee@1.6.5", "", { "peerDependencies": { "react": ">= 16.8.0 || ^18.0.0", "react-dom": ">= 16.8.0 || ^18.0.0" } }, "sha512-swDnPqrT2XISAih0o74zQVE2wQJFMvkx+9VZXYYNSLb/CUcAzU9pNj637Ar2+hyRw6b4tP6xh4GQZip2ZCpQpg=="], @@ -4874,8 +4874,6 @@ "@databuddy/env/@types/node": ["@types/node@22.19.19", "", { "dependencies": { "undici-types": "~6.21.0" } }, "sha512-dyh/xO2Fh5bYrfWaaqGrRQQGkNdmYw6AmaAUvYeUMNTWQtvb796ikLdmTchRmOlOiIJ1TDXfWgVx1QkUlQ6Hew=="], - "@databuddy/nuxt/@databuddy/sdk": ["@databuddy/sdk@2.4.20", "", { "peerDependencies": { "react": ">=18", "vue": ">=3" }, "optionalPeers": ["react", "vue"] }, "sha512-gp8PLX7ZlVP7nKey0OuVtSKAdmDm5RcZbcpW5cmUqRoDcKQ6CnF0U3vi9fh7GwtXABIACq8FWunJthT1NmcTfQ=="], - "@databuddy/redis/@types/node": ["@types/node@20.19.41", "", { "dependencies": { "undici-types": "~6.21.0" } }, "sha512-ECymXOukMnOoVkC2bb1Vc/w/836DXncOg5m8Xj1RH7xSHZJWNYY6Zh7EH477vcnD5egKNNfy2RpNOmuChhFPgQ=="], "@databuddy/sdk/@types/node": ["@types/node@20.19.41", "", { "dependencies": { "undici-types": "~6.21.0" } }, "sha512-ECymXOukMnOoVkC2bb1Vc/w/836DXncOg5m8Xj1RH7xSHZJWNYY6Zh7EH477vcnD5egKNNfy2RpNOmuChhFPgQ=="], diff --git a/packages/ai/package.json b/packages/ai/package.json index 5f42a5008..14ff772ca 100644 --- a/packages/ai/package.json +++ b/packages/ai/package.json @@ -31,7 +31,11 @@ "./schemas/smart-insights-output": "./src/ai/schemas/smart-insights-output.ts", "./tools/github-tools": "./src/ai/tools/github-tools.ts", "./tools/insights-agent-tools": "./src/ai/tools/insights-agent-tools.ts", + "./tools/investigation-tools": "./src/ai/tools/investigation-tools.ts", + "./tools/toolkit": "./src/ai/tools/toolkit.ts", "./tools/scrape-page": "./src/ai/tools/scrape-page.ts", + "./tools/search-console": "./src/ai/tools/search-console.ts", + "./tools/utils/oauth-token": "./src/ai/tools/utils/oauth-token.ts", "./tools/workspace": "./src/ai/tools/workspace.ts", "./query": "./src/query/index.ts", "./query/analytics-tables": "./src/query/analytics-tables.ts", @@ -60,7 +64,7 @@ "@modelcontextprotocol/sdk": "^1.26.0", "@orpc/server": "^1.14.0", "@tokenlens/models": "catalog:", - "ai": "^6.0.154", + "ai": "^6.0.188", "autumn-js": "catalog:", "dayjs": "^1.11.19", "drizzle-orm": "catalog:", diff --git a/packages/ai/src/ai/agents/analytics.ts b/packages/ai/src/ai/agents/analytics.ts index 756fe3fa8..b2f8010d8 100644 --- a/packages/ai/src/ai/agents/analytics.ts +++ b/packages/ai/src/ai/agents/analytics.ts @@ -8,32 +8,9 @@ import { } from "../config/models"; import { TIER_CONFIG } from "../config/tiers"; import { buildAnalyticsInstructions } from "../prompts/analytics"; -import { createAnnotationTools } from "../tools/annotations"; -import { dashboardActionsTool } from "../tools/dashboard-actions"; -import { executeSqlQueryTool } from "../tools/execute-sql-query"; -import { createFlagTools } from "../tools/flags"; -import { createFunnelTools } from "../tools/funnels"; -import { getDataTool } from "../tools/get-data"; -import { createGoalTools } from "../tools/goals"; -import { createLinksTools } from "../tools/links"; -import { createMemoryTools } from "../tools/memory"; -import { createProfileTools } from "../tools/profiles"; -import { createScrapeTools } from "../tools/scrape-page"; +import { createToolkit } from "../tools/toolkit"; import type { AgentConfig, AgentContext, AgentThinking } from "./types"; -const analyticsTools = { - get_data: getDataTool, - execute_sql_query: executeSqlQueryTool, - dashboard_actions: dashboardActionsTool, - ...createMemoryTools(), - ...createProfileTools(), - ...createFlagTools(), - ...createFunnelTools(), - ...createGoalTools(), - ...createAnnotationTools(), - ...createLinksTools(), -}; - function thinkingProviderOptions( thinking: AgentThinking | undefined, modelKey: AgentModelKey @@ -80,12 +57,18 @@ export function createConfig( content: buildAnalyticsInstructions(appContext), providerOptions: tier.promptCaching ? ANTHROPIC_CACHE_1H : undefined, }, - tools: { - ...analyticsTools, - ...(context.websiteDomain - ? createScrapeTools(context.websiteDomain) - : {}), - }, + tools: createToolkit({ + capabilities: [ + "analytics", + "investigation", + "mutations", + "memory", + "dashboard", + ], + domain: context.websiteDomain, + organizationId: context.organizationId, + userId: context.userId, + }), stopWhen: stepCountIs(tier.maxSteps), temperature: tier.temperature, providerOptions: thinkingProviderOptions(context.thinking, modelKey), diff --git a/packages/ai/src/ai/agents/types.ts b/packages/ai/src/ai/agents/types.ts index 8d46ac526..93f6f930c 100644 --- a/packages/ai/src/ai/agents/types.ts +++ b/packages/ai/src/ai/agents/types.ts @@ -30,6 +30,7 @@ export const AGENT_TIERS: readonly AgentTier[] = [ export interface AgentContext { billingCustomerId?: string | null; chatId: string; + organizationId?: string; requestHeaders?: Headers; thinking?: AgentThinking; timezone: string; diff --git a/packages/ai/src/ai/insights/validate.ts b/packages/ai/src/ai/insights/validate.ts index 6d6dfff5c..4ddcf03d4 100644 --- a/packages/ai/src/ai/insights/validate.ts +++ b/packages/ai/src/ai/insights/validate.ts @@ -49,8 +49,22 @@ const BUSINESS_CLAIM_PATTERN = const TECHNICAL_TITLE_JARGON_PATTERN = /\b(INP|LCP|FCP|TTFB|CLS|p75)\b/i; const MAX_TITLE_CHARS = 80; -const MAX_DESCRIPTION_CHARS = 480; -const MAX_SUGGESTION_CHARS = 400; +const MAX_DESCRIPTION_CHARS = 300; +const MAX_SUGGESTION_CHARS = 300; + +function truncateAtSentence(text: string, maxLength: number): string { + if (text.length <= maxLength) { + return text; + } + const truncated = text.slice(0, maxLength); + const lastPeriod = truncated.lastIndexOf(". "); + const lastSemicolon = truncated.lastIndexOf("; "); + const cut = Math.max(lastPeriod, lastSemicolon); + if (cut > maxLength * 0.5) { + return text.slice(0, cut + 1).trim(); + } + return text.slice(0, maxLength - 1).trim(); +} function roundPercent(value: number): number { return Math.round(value * 10) / 10; @@ -79,16 +93,18 @@ function sentimentForPrimaryMetric( return improved ? "positive" : "negative"; } +const SENTIMENT_DIVERGENCE_TYPES = new Set([ + "conversion_leak", + "funnel_regression", + "channel_concentration", + "quality_shift", + "cross_property_dependency", + "referrer_change", + "engagement_change", +]); + function allowsSentimentDivergence(insight: ParsedInsight): boolean { - return [ - "conversion_leak", - "funnel_regression", - "channel_concentration", - "quality_shift", - "cross_property_dependency", - "referrer_change", - "engagement_change", - ].includes(insight.type); + return SENTIMENT_DIVERGENCE_TYPES.has(insight.type); } function typeForDirection( @@ -227,20 +243,37 @@ export function validateInsight(input: ParsedInsight): InsightValidationResult { }; } - if ( - insight.title.length > MAX_TITLE_CHARS || - insight.description.length > MAX_DESCRIPTION_CHARS || - insight.suggestion.length > MAX_SUGGESTION_CHARS - ) { + if (insight.title.length > MAX_TITLE_CHARS) { return { insight: null, warnings: [ ...warnings, - `${insight.title}: dropped because insight copy is too verbose`, + `${insight.title}: dropped because title exceeds ${MAX_TITLE_CHARS} chars`, ], }; } + if ( + insight.description.length > MAX_DESCRIPTION_CHARS || + insight.suggestion.length > MAX_SUGGESTION_CHARS + ) { + const trimmed = { + ...insight, + description: truncateAtSentence( + insight.description, + MAX_DESCRIPTION_CHARS + ), + suggestion: truncateAtSentence(insight.suggestion, MAX_SUGGESTION_CHARS), + }; + if ( + trimmed.description !== insight.description || + trimmed.suggestion !== insight.suggestion + ) { + warnings.push(`${insight.title}: truncated copy to fit limits`); + } + insight = trimmed; + } + if ( GENERIC_MONITORING_PATTERN.test(insight.suggestion) && !ACTION_VERB_PATTERN.test(insight.suggestion) diff --git a/packages/ai/src/ai/prompts/analytics.ts b/packages/ai/src/ai/prompts/analytics.ts index 6fbb3fdea..a44ae6779 100644 --- a/packages/ai/src/ai/prompts/analytics.ts +++ b/packages/ai/src/ai/prompts/analytics.ts @@ -31,6 +31,11 @@ const ANALYTICS_BODY = ` - Use now() - INTERVAL N DAY for date ranges, not custom parameters. Only {websiteId:String} is auto-injected. - Batch related questions into a single SQL query using CTEs (WITH clauses) instead of multiple sequential queries. +**Investigation tools (when available):** +9. scrape_page: Scrape a page on the website to see its content, CTAs, and structure. Use when investigating page-specific issues (bounce rate, errors, conversion drops) or to understand what the product does. +10. search_console: Query Google Search Console for keyword rankings, impressions, clicks, CTR. Use when investigating traffic changes to find which search queries drove them. +11. github_commits / github_commit_diff / github_search_code / github_read_file: Correlate code changes with metric anomalies. Use when a deploy or code change may have caused an issue. + **Analysis:** - Before answering analytics questions, classify each requested metric as directly supported by tool output, available only as a proxy, or missing/not answerable. - Every number in the final answer must come from tool output or simple arithmetic using tool-output numbers. Never fabricate numbers or unsupported breakdowns. diff --git a/packages/ai/src/ai/schemas/smart-insights-output.test.ts b/packages/ai/src/ai/schemas/smart-insights-output.test.ts index 12887f6a9..1066d7701 100644 --- a/packages/ai/src/ai/schemas/smart-insights-output.test.ts +++ b/packages/ai/src/ai/schemas/smart-insights-output.test.ts @@ -1,12 +1,12 @@ import { describe, expect, test } from "bun:test"; -import { insightSchema, insightsOutputSchema } from "./smart-insights-output"; +import { insightSchema } from "./smart-insights-output"; const baseInsight = { title: "Pricing page traffic up 28%", description: - "Pricing Page Visitors became a larger share of site activity while Bounce Rate improved. The audience that arrived this week was more qualified than a broad awareness spike. Worth confirming campaign attribution before drawing wider conclusions.", + "Pricing visitors grew while bounce rate improved and audience quality improved.", suggestion: - "Review the journey from Pricing Page Visitors into the next high-intent step and tighten the CTA path if Contact Page Visitors are lagging.", + "Review the journey from pricing into the next high-intent step.", metrics: [ { label: "Pricing Page Visitors", @@ -24,36 +24,36 @@ const baseInsight = { confidence: 0.82, }; -describe("insightSchema impactSummary length bound", () => { - test("accepts a 160-character impactSummary (legacy upper bound)", () => { - const summary = "x".repeat(160); - const result = insightSchema.safeParse({ ...baseInsight, impactSummary: summary }); - expect(result.success).toBe(true); - }); - - test("accepts a long impactSummary with no upper bound", () => { - const summary = "x".repeat(500); - const result = insightSchema.safeParse({ ...baseInsight, impactSummary: summary }); +describe("insightSchema", () => { + test("accepts a valid insight", () => { + const result = insightSchema.safeParse(baseInsight); expect(result.success).toBe(true); }); - test("impactSummary remains optional", () => { - const result = insightSchema.safeParse(baseInsight); + test("accepts impactSummary when provided", () => { + const result = insightSchema.safeParse({ + ...baseInsight, + impactSummary: "Revenue at risk if not addressed.", + }); expect(result.success).toBe(true); }); -}); -describe("insightsOutputSchema container", () => { - test("accepts configured deep runs with up to 10 insights", () => { - const result = insightsOutputSchema.safeParse({ - insights: Array.from({ length: 10 }, () => baseInsight), + test("requires at least one metric", () => { + const result = insightSchema.safeParse({ + ...baseInsight, + metrics: [], }); - expect(result.success).toBe(true); + expect(result.success).toBe(false); }); - test("rejects 11 insights", () => { - const result = insightsOutputSchema.safeParse({ - insights: Array.from({ length: 11 }, () => baseInsight), + test("rejects more than 5 metrics", () => { + const result = insightSchema.safeParse({ + ...baseInsight, + metrics: Array.from({ length: 6 }, (_, i) => ({ + label: `Metric ${i}`, + current: i * 10, + format: "number" as const, + })), }); expect(result.success).toBe(false); }); diff --git a/packages/ai/src/ai/schemas/smart-insights-output.ts b/packages/ai/src/ai/schemas/smart-insights-output.ts index 3c0f65817..a9a293720 100644 --- a/packages/ai/src/ai/schemas/smart-insights-output.ts +++ b/packages/ai/src/ai/schemas/smart-insights-output.ts @@ -22,12 +22,12 @@ export const insightSchema = z.object({ description: z .string() .describe( - "1-3 concise sentences in plain English explaining what changed and why it matters. Translate technical metrics into user/product outcomes; keep raw metric names in the metrics array. Do NOT restate numbers already in metrics. Keep under 480 characters." + "1-2 sentences: what changed and why it matters. Do NOT restate numbers from the title or metrics array. Add NEW context only. Under 300 characters." ), suggestion: z .string() .describe( - "One specific next action in plain English tied to this product's data. Name the surface to inspect (page, funnel step, referrer segment, error class, sessions, flag rollout). Do not give generic monitoring advice. Keep under 400 characters." + "One specific action. Name the exact page, button, query, or tool to use. Under 300 characters." ), metrics: z .array(insightMetricSchema) @@ -47,7 +47,7 @@ export const insightSchema = z.object({ .min(1) .max(10) .describe( - "1-10 from actionability × business impact, NOT raw % magnitude. User-facing errors, conversion/session drops, or reliability issues outrank vanity traffic spikes. A 5% drop in a meaningful engagement metric can score higher than a 70% visitor increase with no conversion context. Reserve 8-10 for issues that hurt users or revenue signals in the data." + "1-10 from actionability x business impact, NOT raw % magnitude. User-facing errors, conversion/session drops, or reliability issues outrank vanity traffic spikes. A 5% drop in a meaningful engagement metric can score higher than a 70% visitor increase with no conversion context. Reserve 8-10 for issues that hurt users or revenue signals in the data." ), type: z.enum([ "error_spike", @@ -111,7 +111,9 @@ export const insightSchema = z.object({ rootCause: z .string() .optional() - .describe("Root cause hypothesis with evidence citation."), + .describe( + "WHY it happened (the mechanism). Must add info beyond the description. Skip if unknown." + ), evidence: z .array( z.object({ @@ -121,21 +123,41 @@ export const insightSchema = z.object({ ) .max(5) .optional() - .describe("Supporting evidence for the root cause"), + .describe( + "Data points NOT already in description or rootCause. Each bullet must be a different fact." + ), investigationDepth: z .enum(["surface", "investigated", "deep"]) .optional() .describe("How deeply this signal was investigated"), -}); - -export const insightsOutputSchema = z.object({ - insights: z - .array(insightSchema) - .max(10) + actions: z + .array( + z.object({ + type: z.enum([ + "fix_goal", + "create_funnel", + "add_custom_event", + "create_annotation", + "update_config", + "add_tracking", + "investigate_further", + "code_fix", + ]), + label: z.string().describe("Button label (e.g. 'Fix goal target')"), + params: z + .record(z.string(), z.string()) + .describe( + "Action-specific parameters. code_fix: {prompt, file_hint, error_message} — generates a cursor/claude-code-ready prompt." + ), + }) + ) + .max(3) + .optional() .describe( - "Insight cards ranked by actionability x business impact. Default runs usually request 1-3 cards, but configured deep runs may request more. When the period is mostly positive, at least one insight MUST still call out a material risk or watch (e.g. session duration down, bounce up, single-channel dependency, volatile referrer, error count up in absolute terms) if those signals appear in the data. Skip repeating a narrative already listed under recently reported insights unless the change is materially new." + "Machine-readable actions the user can take. fix_goal: {goalName, from, to}. create_funnel: {name, steps}. add_custom_event: {eventName, element, page}. create_annotation: {text, date}. add_tracking: {page, element, snippet}. investigate_further: {prompt}." ), }); export type ParsedInsight = z.infer; export type InsightMetric = z.infer; +export type InsightAction = NonNullable[number]; diff --git a/packages/ai/src/ai/tools/github-tools.ts b/packages/ai/src/ai/tools/github-tools.ts index 099fce20e..398aba5d4 100644 --- a/packages/ai/src/ai/tools/github-tools.ts +++ b/packages/ai/src/ai/tools/github-tools.ts @@ -1,44 +1,10 @@ import { tool } from "ai"; import { z } from "zod"; -import { db, eq, and } from "@databuddy/db"; -import { account, member } from "@databuddy/db/schema"; +import { createCachedTokenFn } from "./utils/oauth-token"; const GITHUB_API = "https://api.github.com"; const MAX_RESULTS = 10; -async function getGitHubToken( - organizationId: string, - preferUserId?: string -): Promise { - if (preferUserId) { - const [ghAccount] = await db - .select({ accessToken: account.accessToken }) - .from(account) - .where( - and(eq(account.userId, preferUserId), eq(account.providerId, "github")) - ) - .limit(1); - - if (ghAccount?.accessToken) { - return ghAccount.accessToken; - } - } - - const [fallback] = await db - .select({ accessToken: account.accessToken }) - .from(account) - .innerJoin(member, eq(member.userId, account.userId)) - .where( - and( - eq(member.organizationId, organizationId), - eq(account.providerId, "github") - ) - ) - .limit(1); - - return fallback?.accessToken ?? null; -} - export async function githubFetch( path: string, token: string @@ -65,15 +31,11 @@ export interface GitHubToolsParams { } export function createGitHubTools(params: GitHubToolsParams) { - let cachedToken: string | null | undefined; - - async function getToken(): Promise { - if (cachedToken !== undefined) { - return cachedToken; - } - cachedToken = await getGitHubToken(params.organizationId, params.userId); - return cachedToken; - } + const getToken = createCachedTokenFn( + "github", + params.organizationId, + params.userId + ); const getRecentDeploysTool = tool({ description: diff --git a/packages/ai/src/ai/tools/insights-agent-tools.ts b/packages/ai/src/ai/tools/insights-agent-tools.ts index 76e3fcb64..d38ff77bb 100644 --- a/packages/ai/src/ai/tools/insights-agent-tools.ts +++ b/packages/ai/src/ai/tools/insights-agent-tools.ts @@ -79,7 +79,7 @@ export function createInsightsAgentTools( const periodSchema = z.enum(["current", "previous", "both"]); const webMetricsTool = tool({ - description: `Query analytics data. ${ALL_QUERY_TYPES.length} query types. Use period="both" to compare. Key types: summary_metrics, top_pages, entry_pages, exit_pages, recent_errors, errors_by_page, error_types, session_flow, sessions_by_device, sessions_by_browser, web_vitals_by_page, web_vitals_by_browser, revenue_overview, revenue_by_referrer, custom_events_discovery, custom_events_trends, country, region, city, utm_campaigns, device_types. Filter by: path, country, device_type, browser_name, os_name, referrer, utm_source, utm_medium, utm_campaign.`, + description: `Query analytics data. ${ALL_QUERY_TYPES.length} query types. Use period="both" to compare. Key types: summary_metrics, top_pages, entry_pages, exit_pages, recent_errors, errors_by_page, error_types, session_flow, session_pages, interesting_sessions, session_list, sessions_by_device, sessions_by_browser, web_vitals_by_page, web_vitals_by_browser, revenue_overview, revenue_by_referrer, custom_events_discovery, custom_events_trends, country, region, city, utm_campaigns, device_types. Filter by: path, country, device_type, browser_name, os_name, referrer, utm_source, utm_medium, utm_campaign.`, inputSchema: z.object({ period: periodSchema, queries: z.array(querySchema).min(1).max(MAX_QUERIES), diff --git a/packages/ai/src/ai/tools/investigation-tools.ts b/packages/ai/src/ai/tools/investigation-tools.ts new file mode 100644 index 000000000..dbe74b727 --- /dev/null +++ b/packages/ai/src/ai/tools/investigation-tools.ts @@ -0,0 +1,27 @@ +import type { ToolSet } from "ai"; +import { createGitHubTools } from "./github-tools"; +import { createScrapeTools } from "./scrape-page"; +import { createSearchConsoleTools } from "./search-console"; + +export interface InvestigationToolsParams { + domain: string; + organizationId: string; + userId?: string; +} + +export function createInvestigationTools( + params: InvestigationToolsParams +): ToolSet { + return { + ...createScrapeTools(params.domain), + ...createSearchConsoleTools({ + domain: params.domain, + organizationId: params.organizationId, + userId: params.userId, + }), + ...createGitHubTools({ + organizationId: params.organizationId, + userId: params.userId, + }), + }; +} diff --git a/packages/ai/src/ai/tools/scrape-page.ts b/packages/ai/src/ai/tools/scrape-page.ts index a27d6fe26..29f916241 100644 --- a/packages/ai/src/ai/tools/scrape-page.ts +++ b/packages/ai/src/ai/tools/scrape-page.ts @@ -13,30 +13,46 @@ function cacheKey(domain: string, path: string): string { return `scrape:${domain}:${path}`; } +let _redis: typeof import("@databuddy/redis").redis | null = null; +async function getRedis() { + if (!_redis) { + try { + _redis = (await import("@databuddy/redis")).redis; + } catch { + return null; + } + } + return _redis; +} + async function getCached(key: string): Promise { + const r = await getRedis(); + if (!r) { + return null; + } try { - const { redis } = await import("@databuddy/redis"); - return await redis.get(key); + return await r.get(key); } catch { return null; } } async function setCache(key: string, value: string): Promise { - try { - const { redis } = await import("@databuddy/redis"); - await redis.set(key, value, "EX", CACHE_TTL_SECONDS); - } catch {} + const r = await getRedis(); + if (!r) { + return; + } + r.set(key, value, "EX", CACHE_TTL_SECONDS).catch(() => {}); } interface ScrapeResult { - url: string; - title: string | null; - description: string | null; - statusCode: number | null; + cached?: boolean; content: string; + description: string | null; internalLinks: string[]; - cached?: boolean; + statusCode: number | null; + title: string | null; + url: string; } async function scrapePage( @@ -55,7 +71,9 @@ async function scrapePage( if (cached) { try { return { ...(JSON.parse(cached) as ScrapeResult), cached: true }; - } catch {} + } catch { + // Ignore corrupt cache entries and fetch a fresh copy. + } } const url = `https://${domain}${cleanPath}`; @@ -71,7 +89,7 @@ async function scrapePage( url, formats: ["markdown", "links"], onlyMainContent: true, - timeout: 15000, + timeout: 15_000, }), signal: AbortSignal.timeout(20_000), }); @@ -96,31 +114,41 @@ async function scrapePage( }; }; - if (!data.success || !data.data?.markdown) { + if (!(data.success && data.data?.markdown)) { return { error: "Page returned no content" }; } const markdown = data.data.markdown; const meta = data.data.metadata; const allLinks = data.data.links ?? []; - const internalLinks = allLinks - .filter((l) => { - try { - const u = new URL(l); - return u.hostname === domain || u.hostname === `www.${domain}`; - } catch { - return l.startsWith("/"); + const seen = new Set(); + const internalLinks: string[] = []; + for (const l of allLinks) { + let hostname: string; + let pathname: string; + try { + const u = new URL(l); + hostname = u.hostname; + pathname = u.pathname; + } catch { + if (!l.startsWith("/")) { + continue; } - }) - .map((l) => { - try { - return new URL(l).pathname; - } catch { - return l; - } - }) - .filter((p, i, arr) => arr.indexOf(p) === i) - .slice(0, 30); + hostname = domain; + pathname = l; + } + if (hostname !== domain && hostname !== `www.${domain}`) { + continue; + } + if (seen.has(pathname)) { + continue; + } + seen.add(pathname); + internalLinks.push(pathname); + if (internalLinks.length >= 30) { + break; + } + } const result: ScrapeResult = { url, @@ -144,6 +172,36 @@ async function scrapePage( } } +export async function getCachedSiteContext( + domain: string +): Promise { + const key = cacheKey(domain, "/"); + const cached = await getCached(key); + if (!cached) { + return null; + } + try { + const data = JSON.parse(cached) as ScrapeResult; + const parts = [`Site: ${domain}`]; + if (data.title) { + parts.push(`Title: ${data.title}`); + } + if (data.description) { + parts.push(`Description: ${data.description}`); + } + if (data.content) { + const truncated = + data.content.length > 2000 + ? `${data.content.slice(0, 2000)}...` + : data.content; + parts.push(`Content:\n${truncated}`); + } + return parts.join("\n"); + } catch { + return null; + } +} + export function createScrapeTools(domain: string) { const scrapeTool = tool({ description: `Scrape a page from ${domain} and return its content as markdown plus internal links. Use to understand the product: what the site does, key pages, pricing, CTAs. Also use when investigating page-level anomalies. Scrape "/" first for product context, then specific pages as needed. Results are cached for 24h.`, diff --git a/packages/ai/src/ai/tools/search-console.test.ts b/packages/ai/src/ai/tools/search-console.test.ts new file mode 100644 index 000000000..49b9d09af --- /dev/null +++ b/packages/ai/src/ai/tools/search-console.test.ts @@ -0,0 +1,206 @@ +import { afterEach, describe, expect, mock, test } from "bun:test"; +import { querySearchAnalytics, type SearchConsoleRow } from "./search-console"; + +const SITE_URL = "sc-domain:example.com"; + +function mockFetch( + body: unknown, + status = 200 +): typeof globalThis.fetch { + return mock(() => + Promise.resolve( + new Response(JSON.stringify(body), { + status, + headers: { "Content-Type": "application/json" }, + }) + ) + ) as unknown as typeof globalThis.fetch; +} + +afterEach(() => { + globalThis.fetch = globalThis.fetch; +}); + +describe("querySearchAnalytics", () => { + test("maps rows with single dimension", async () => { + const original = globalThis.fetch; + globalThis.fetch = mockFetch({ + rows: [ + { keys: ["best analytics tool"], clicks: 42, impressions: 1200, ctr: 0.035, position: 3.7 }, + { keys: ["web analytics"], clicks: 18, impressions: 800, ctr: 0.0225, position: 5.2 }, + ], + }); + + const result = await querySearchAnalytics("token-123", SITE_URL, { + startDate: "2026-05-01", + endDate: "2026-05-15", + dimensions: ["query"], + rowLimit: 25, + }); + globalThis.fetch = original; + + expect(result).not.toHaveProperty("error"); + const data = result as { rows: SearchConsoleRow[]; rowCount: number; siteUrl: string }; + expect(data.siteUrl).toBe(SITE_URL); + expect(data.rowCount).toBe(2); + + expect(data.rows[0].query).toBe("best analytics tool"); + expect(data.rows[0].clicks).toBe(42); + expect(data.rows[0].impressions).toBe(1200); + expect(data.rows[0].ctr).toBe(3.5); + expect(data.rows[0].position).toBe(3.7); + + expect(data.rows[1].query).toBe("web analytics"); + expect(data.rows[1].clicks).toBe(18); + }); + + test("maps rows with multiple dimensions", async () => { + const original = globalThis.fetch; + globalThis.fetch = mockFetch({ + rows: [ + { keys: ["analytics", "/pricing"], clicks: 10, impressions: 500, ctr: 0.02, position: 4.0 }, + ], + }); + + const result = await querySearchAnalytics("token-123", SITE_URL, { + startDate: "2026-05-01", + endDate: "2026-05-15", + dimensions: ["query", "page"], + rowLimit: 25, + }); + globalThis.fetch = original; + + const data = result as { rows: SearchConsoleRow[] }; + expect(data.rows[0].query).toBe("analytics"); + expect(data.rows[0].page).toBe("/pricing"); + expect(data.rows[0].clicks).toBe(10); + }); + + test("returns empty rows when API returns no data", async () => { + const original = globalThis.fetch; + globalThis.fetch = mockFetch({}); + + const result = await querySearchAnalytics("token-123", SITE_URL, { + startDate: "2026-05-01", + endDate: "2026-05-15", + dimensions: ["query"], + rowLimit: 25, + }); + globalThis.fetch = original; + + const data = result as { rows: SearchConsoleRow[]; rowCount: number }; + expect(data.rows).toEqual([]); + expect(data.rowCount).toBe(0); + }); + + test("returns error on non-ok response", async () => { + const original = globalThis.fetch; + globalThis.fetch = mock(() => + Promise.resolve(new Response("Forbidden", { status: 403 })) + ) as unknown as typeof globalThis.fetch; + + const result = await querySearchAnalytics("token-123", SITE_URL, { + startDate: "2026-05-01", + endDate: "2026-05-15", + dimensions: ["query"], + rowLimit: 25, + }); + globalThis.fetch = original; + + expect(result).toHaveProperty("error"); + const err = result as { error: string }; + expect(err.error).toContain("403"); + }); + + test("rounds CTR to one decimal percentage", async () => { + const original = globalThis.fetch; + globalThis.fetch = mockFetch({ + rows: [{ keys: ["test"], clicks: 1, impressions: 3, ctr: 0.33333, position: 1.0 }], + }); + + const result = await querySearchAnalytics("token-123", SITE_URL, { + startDate: "2026-05-01", + endDate: "2026-05-15", + dimensions: ["query"], + rowLimit: 25, + }); + globalThis.fetch = original; + + const data = result as { rows: SearchConsoleRow[] }; + expect(data.rows[0].ctr).toBe(33.3); + }); + + test("rounds position to one decimal", async () => { + const original = globalThis.fetch; + globalThis.fetch = mockFetch({ + rows: [{ keys: ["test"], clicks: 1, impressions: 10, ctr: 0.1, position: 7.456 }], + }); + + const result = await querySearchAnalytics("token-123", SITE_URL, { + startDate: "2026-05-01", + endDate: "2026-05-15", + dimensions: ["query"], + rowLimit: 25, + }); + globalThis.fetch = original; + + const data = result as { rows: SearchConsoleRow[] }; + expect(data.rows[0].position).toBe(7.5); + }); + + test("sends correct request body to GSC API", async () => { + const original = globalThis.fetch; + let capturedBody: string | undefined; + let capturedUrl: string | undefined; + globalThis.fetch = mock((url: string | URL | Request, init?: RequestInit) => { + capturedUrl = typeof url === "string" ? url : url.toString(); + capturedBody = init?.body as string; + return Promise.resolve( + new Response(JSON.stringify({ rows: [] }), { + status: 200, + headers: { "Content-Type": "application/json" }, + }) + ); + }) as unknown as typeof globalThis.fetch; + + await querySearchAnalytics("my-token", "sc-domain:test.com", { + startDate: "2026-01-01", + endDate: "2026-01-31", + dimensions: ["page", "device"], + rowLimit: 10, + }); + globalThis.fetch = original; + + expect(capturedUrl).toContain("sc-domain%3Atest.com"); + expect(capturedUrl).toContain("searchAnalytics/query"); + + const body = JSON.parse(capturedBody!); + expect(body.startDate).toBe("2026-01-01"); + expect(body.endDate).toBe("2026-01-31"); + expect(body.dimensions).toEqual(["page", "device"]); + expect(body.rowLimit).toBe(10); + expect(body.dataState).toBe("final"); + }); + + test("sends authorization header", async () => { + const original = globalThis.fetch; + let capturedHeaders: HeadersInit | undefined; + globalThis.fetch = mock((_url: string | URL | Request, init?: RequestInit) => { + capturedHeaders = init?.headers; + return Promise.resolve( + new Response(JSON.stringify({ rows: [] }), { status: 200 }) + ); + }) as unknown as typeof globalThis.fetch; + + await querySearchAnalytics("secret-token", SITE_URL, { + startDate: "2026-05-01", + endDate: "2026-05-15", + dimensions: ["query"], + rowLimit: 25, + }); + globalThis.fetch = original; + + const headers = capturedHeaders as Record; + expect(headers.Authorization).toBe("Bearer secret-token"); + }); +}); diff --git a/packages/ai/src/ai/tools/search-console.ts b/packages/ai/src/ai/tools/search-console.ts new file mode 100644 index 000000000..9398e9c0e --- /dev/null +++ b/packages/ai/src/ai/tools/search-console.ts @@ -0,0 +1,122 @@ +import { tool } from "ai"; +import { z } from "zod"; +import { createCachedTokenFn } from "./utils/oauth-token"; + +const GSC_API = "https://www.googleapis.com/webmasters/v3"; +const MAX_ROWS = 25; + +const dimensionEnum = z.enum(["query", "page", "country", "device", "date"]); + +const searchAnalyticsInput = z.object({ + startDate: z.string().describe("Start date YYYY-MM-DD"), + endDate: z.string().describe("End date YYYY-MM-DD"), + dimensions: dimensionEnum + .array() + .min(1) + .max(3) + .describe( + "Dimensions to group by. 'query' for keywords, 'page' for URLs, 'date' for daily trends." + ), + rowLimit: z.number().min(1).max(MAX_ROWS).optional().default(MAX_ROWS), +}); + +export interface SearchConsoleRow { + clicks: number; + ctr: number; + impressions: number; + position: number; + [dimension: string]: string | number; +} + +export async function querySearchAnalytics( + token: string, + siteUrl: string, + input: z.infer +): Promise< + | { rows: SearchConsoleRow[]; siteUrl: string; rowCount: number } + | { error: string } +> { + const res = await fetch( + `${GSC_API}/sites/${encodeURIComponent(siteUrl)}/searchAnalytics/query`, + { + method: "POST", + headers: { + Authorization: `Bearer ${token}`, + "Content-Type": "application/json", + }, + body: JSON.stringify({ + startDate: input.startDate, + endDate: input.endDate, + dimensions: input.dimensions, + rowLimit: input.rowLimit, + dataState: "final", + }), + signal: AbortSignal.timeout(15_000), + } + ); + + if (!res.ok) { + const body = await res.text().catch(() => ""); + return { error: `Search Console API ${res.status}: ${body.slice(0, 200)}` }; + } + + const data = (await res.json()) as { + rows?: Array<{ + keys: string[]; + clicks: number; + impressions: number; + ctr: number; + position: number; + }>; + }; + + const rows: SearchConsoleRow[] = (data.rows ?? []).map((row) => { + const entry: Record = {}; + for (let i = 0; i < input.dimensions.length; i++) { + entry[input.dimensions[i]] = row.keys[i]; + } + entry.clicks = row.clicks; + entry.impressions = row.impressions; + entry.ctr = Math.round(row.ctr * 1000) / 10; + entry.position = Math.round(row.position * 10) / 10; + return entry as SearchConsoleRow; + }); + + return { siteUrl, rowCount: rows.length, rows }; +} + +export function createSearchConsoleTools(params: { + domain: string; + organizationId: string; + userId?: string; +}) { + const getToken = createCachedTokenFn( + "google", + params.organizationId, + params.userId + ); + const siteUrl = `sc-domain:${params.domain}`; + + return { + search_console: tool({ + description: `Query Google Search Console for ${params.domain}. Returns search queries, pages, countries, or devices with clicks, impressions, CTR, and average position. Use to find which keywords lost rankings, which pages dropped in impressions, or where traffic is coming from in Google search.`, + inputSchema: searchAnalyticsInput, + execute: async (input) => { + const token = await getToken(); + if (!token) { + return { + error: + "No Google account connected. Connect Google in Settings > Integrations with Search Console scope.", + }; + } + try { + return await querySearchAnalytics(token, siteUrl, input); + } catch (err) { + return { + error: `Search Console query failed: ${(err as Error).message?.slice(0, 200)}`, + }; + } + }, + }), + }; +} diff --git a/packages/ai/src/ai/tools/toolkit.ts b/packages/ai/src/ai/tools/toolkit.ts new file mode 100644 index 000000000..bf9854a42 --- /dev/null +++ b/packages/ai/src/ai/tools/toolkit.ts @@ -0,0 +1,82 @@ +import type { ToolSet } from "ai"; +import { createAnnotationTools } from "./annotations"; +import { executeSqlQueryTool } from "./execute-sql-query"; +import { createFlagTools } from "./flags"; +import { createFunnelTools } from "./funnels"; +import { getDataTool } from "./get-data"; +import { createGoalTools } from "./goals"; +import { createInvestigationTools } from "./investigation-tools"; +import { createLinksTools } from "./links"; +import { createMemoryTools } from "./memory"; +import { createProfileTools } from "./profiles"; +import { dashboardActionsTool } from "./dashboard-actions"; + +export type ToolCapability = + | "analytics" + | "investigation" + | "mutations" + | "memory" + | "dashboard"; + +export interface ToolkitParams { + capabilities: ToolCapability[]; + domain?: string; + organizationId?: string; + userId?: string; +} + +const ANALYTICS_TOOLS: ToolSet = { + get_data: getDataTool, + execute_sql_query: executeSqlQueryTool, +}; + +const MUTATION_TOOLS: ToolSet = { + ...createFunnelTools(), + ...createGoalTools(), + ...createAnnotationTools(), + ...createFlagTools(), + ...createLinksTools(), +}; + +const MEMORY_TOOLS: ToolSet = { + ...createMemoryTools(), + ...createProfileTools(), +}; + +const DASHBOARD_TOOLS: ToolSet = { + dashboard_actions: dashboardActionsTool, +}; + +export function createToolkit(params: ToolkitParams): ToolSet { + const tools: ToolSet = {}; + const caps = new Set(params.capabilities); + + if (caps.has("analytics")) { + Object.assign(tools, ANALYTICS_TOOLS); + } + + if (caps.has("investigation") && params.domain && params.organizationId) { + Object.assign( + tools, + createInvestigationTools({ + domain: params.domain, + organizationId: params.organizationId, + userId: params.userId, + }) + ); + } + + if (caps.has("mutations")) { + Object.assign(tools, MUTATION_TOOLS); + } + + if (caps.has("memory")) { + Object.assign(tools, MEMORY_TOOLS); + } + + if (caps.has("dashboard")) { + Object.assign(tools, DASHBOARD_TOOLS); + } + + return tools; +} diff --git a/packages/ai/src/ai/tools/utils/index.ts b/packages/ai/src/ai/tools/utils/index.ts index f050b6cb9..d278ca9d4 100644 --- a/packages/ai/src/ai/tools/utils/index.ts +++ b/packages/ai/src/ai/tools/utils/index.ts @@ -1,5 +1,6 @@ /** biome-ignore-all lint/performance/noBarrelFile: no barrel file */ export { getAppContext } from "./context"; export { createToolLogger } from "./logger"; +export { getOAuthToken, createCachedTokenFn } from "./oauth-token"; export { executeTimedQuery, type QueryResult } from "./query"; export { callRPCProcedure } from "./rpc"; diff --git a/packages/ai/src/ai/tools/utils/oauth-token.ts b/packages/ai/src/ai/tools/utils/oauth-token.ts new file mode 100644 index 000000000..4a8b0030c --- /dev/null +++ b/packages/ai/src/ai/tools/utils/oauth-token.ts @@ -0,0 +1,66 @@ +import { db, eq, and, sql } from "@databuddy/db"; +import { account, member } from "@databuddy/db/schema"; + +const ROLE_PRIORITY = sql`CASE ${member.role} WHEN 'owner' THEN 0 WHEN 'admin' THEN 1 ELSE 2 END`; +const TOKEN_TTL_MS = 45 * 60 * 1000; + +export async function getOAuthToken( + providerId: string, + organizationId: string, + preferUserId?: string +): Promise { + if (preferUserId) { + const [preferred] = await db + .select({ accessToken: account.accessToken }) + .from(account) + .innerJoin(member, eq(member.userId, account.userId)) + .where( + and( + eq(account.userId, preferUserId), + eq(account.providerId, providerId), + eq(member.organizationId, organizationId) + ) + ) + .limit(1); + + if (preferred?.accessToken) { + return preferred.accessToken; + } + } + + const [fallback] = await db + .select({ accessToken: account.accessToken }) + .from(account) + .innerJoin(member, eq(member.userId, account.userId)) + .where( + and( + eq(member.organizationId, organizationId), + eq(account.providerId, providerId) + ) + ) + .orderBy(ROLE_PRIORITY) + .limit(1); + + return fallback?.accessToken ?? null; +} + +export function createCachedTokenFn( + providerId: string, + organizationId: string, + preferUserId?: string +): () => Promise { + let cached: string | null | undefined; + let cachedAt = 0; + const NEGATIVE_TTL_MS = 5 * 60 * 1000; + return async () => { + const age = Date.now() - cachedAt; + const ttl = cached ? TOKEN_TTL_MS : NEGATIVE_TTL_MS; + if (cached !== undefined && age < ttl) { + return cached; + } + const token = await getOAuthToken(providerId, organizationId, preferUserId); + cached = token; + cachedAt = Date.now(); + return token; + }; +} diff --git a/packages/db/src/clickhouse/sql-validation.test.ts b/packages/db/src/clickhouse/sql-validation.test.ts index 564380315..e246903aa 100644 --- a/packages/db/src/clickhouse/sql-validation.test.ts +++ b/packages/db/src/clickhouse/sql-validation.test.ts @@ -190,19 +190,35 @@ describe("validateAgentSQL", () => { expect(result.reason).toContain("Multiple statements"); }); - it("rejects common analytics.events schema footguns", () => { - for (const [badColumn, replacement] of [ - ["website_id", "client_id"], - ["created_at", "time"], - ["page_path", "path"], - ["event_type", "event_name"], - ] as const) { - const result = validateAgentSQL( - `SELECT count() FROM analytics.events WHERE client_id = {websiteId:String} AND ${badColumn} != ''` - ); - expect(result.valid).toBe(false); - expect(result.reason).toContain(replacement); - } + it("rejects qualified columns that don't exist on the aliased table", () => { + const result = validateAgentSQL( + "SELECT es.browser_name FROM analytics.error_spans es WHERE es.client_id = {websiteId:String}" + ); + expect(result.valid).toBe(false); + expect(result.reason).toContain("browser_name"); + expect(result.reason).toContain("does not exist"); + }); + + it("allows valid qualified columns", () => { + const result = validateAgentSQL( + "SELECT es.message, es.path FROM analytics.error_spans es WHERE es.client_id = {websiteId:String}" + ); + expect(result).toEqual({ valid: true, reason: null }); + }); + + it("allows columns from the correct table in a JOIN", () => { + const result = validateAgentSQL( + "SELECT e.browser_name, es.message FROM analytics.events e JOIN analytics.error_spans es ON e.session_id = es.session_id WHERE e.client_id = {websiteId:String} AND es.client_id = {websiteId:String}" + ); + expect(result).toEqual({ valid: true, reason: null }); + }); + + it("rejects cross-table column misuse in a JOIN", () => { + const result = validateAgentSQL( + "SELECT es.browser_name FROM analytics.events e JOIN analytics.error_spans es ON e.session_id = es.session_id WHERE e.client_id = {websiteId:String} AND es.client_id = {websiteId:String}" + ); + expect(result.valid).toBe(false); + expect(result.reason).toContain("browser_name"); }); it("rejects the nonexistent pageview event name", () => { diff --git a/packages/db/src/clickhouse/sql-validation.ts b/packages/db/src/clickhouse/sql-validation.ts index 07eed195f..0f7abcb60 100644 --- a/packages/db/src/clickhouse/sql-validation.ts +++ b/packages/db/src/clickhouse/sql-validation.ts @@ -18,6 +18,100 @@ export const AGENT_TENANT_COLUMN_BY_TABLE: Readonly> = { "analytics.link_visits": "client_id", }; +export const AGENT_TABLE_COLUMNS: Readonly< + Record> +> = { + "analytics.events": new Set([ + "client_id", + "anonymous_id", + "session_id", + "time", + "path", + "referrer", + "browser_name", + "os_name", + "device_type", + "country", + "region", + "city", + "utm_source", + "utm_medium", + "utm_campaign", + "utm_term", + "utm_content", + "load_time", + "time_on_page", + "scroll_depth", + "properties", + "event_name", + ]), + "analytics.error_spans": new Set([ + "client_id", + "anonymous_id", + "session_id", + "timestamp", + "path", + "message", + "filename", + "lineno", + "colno", + "stack", + "error_type", + ]), + "analytics.web_vitals_spans": new Set([ + "client_id", + "anonymous_id", + "session_id", + "timestamp", + "path", + "metric_name", + "metric_value", + ]), + "analytics.outgoing_links": new Set([ + "client_id", + "anonymous_id", + "session_id", + "timestamp", + "path", + "href", + "text", + ]), + "analytics.custom_events": new Set([ + "owner_id", + "anonymous_id", + "session_id", + "timestamp", + "event_name", + "properties", + ]), + "analytics.revenue": new Set([ + "owner_id", + "transaction_id", + "amount", + "currency", + "provider", + "type", + "customer_id", + "created", + ]), + "analytics.blocked_traffic": new Set([ + "client_id", + "timestamp", + "block_reason", + "bot_name", + "path", + ]), + "analytics.link_visits": new Set([ + "client_id", + "timestamp", + "link_id", + "referrer", + "country", + "device_type", + "browser_name", + ]), +}; + /** * Builds the `additional_table_filters` ClickHouse session-setting value * scoped to `websiteId` for the supplied tables. The returned string is the @@ -60,12 +154,6 @@ const TOP_LEVEL_OR_PATTERN = /\bOR\b/i; const CLAUSE_TERMINATOR_PATTERN = /\b(?:GROUP\s+BY|ORDER\s+BY|HAVING|LIMIT|OFFSET|SETTINGS|WINDOW|JOIN)\b/i; const PAGEVIEW_EVENT_PATTERN = /\bevent_name\s*=\s*(['"])pageview\1/i; -const BAD_EVENTS_COLUMN_REPLACEMENTS: Record = { - created_at: "time", - event_type: "event_name", - page_path: "path", - website_id: "client_id", -}; function maskCommentsAndStrings(sql: string): string { let result = ""; @@ -320,17 +408,6 @@ export function validateAgentSQL(sql: string): { }; } - for (const [badColumn, replacement] of Object.entries( - BAD_EVENTS_COLUMN_REPLACEMENTS - )) { - if (new RegExp(`\\b${badColumn}\\b`, "i").test(sanitized)) { - return { - valid: false, - reason: `Invalid analytics.events column "${badColumn}". Use "${replacement}" instead.`, - }; - } - } - const cteNames = extractCteNames(sanitized); const refs = extractRelationReferences(sanitized); @@ -371,6 +448,33 @@ export function validateAgentSQL(sql: string): { } } + const aliasToTable = new Map(); + for (const ref of refs) { + if (!cteNames.has(ref.name) && ref.name in AGENT_TABLE_COLUMNS) { + aliasToTable.set(ref.alias, ref.name); + } + } + + const QUALIFIED_COLUMN = + /\b([a-zA-Z_][a-zA-Z0-9_]*)\.([a-zA-Z_][a-zA-Z0-9_]*)\b/g; + QUALIFIED_COLUMN.lastIndex = 0; + let qm = QUALIFIED_COLUMN.exec(sanitized); + while (qm) { + const alias = qm[1].toLowerCase(); + const col = qm[2].toLowerCase(); + const table = aliasToTable.get(alias); + if (table) { + const validCols = AGENT_TABLE_COLUMNS[table]; + if (validCols && !validCols.has(col)) { + return { + valid: false, + reason: `Column "${qm[2]}" does not exist on ${table}. Valid columns: ${[...validCols].join(", ")}.`, + }; + } + } + qm = QUALIFIED_COLUMN.exec(sanitized); + } + const selectCount = sanitized.match(SELECT_KEYWORD_PATTERN)?.length ?? 0; if (selectCount > 1 + cteNames.size) { return { diff --git a/packages/db/src/drizzle/schema/analytics.ts b/packages/db/src/drizzle/schema/analytics.ts index f05aa237d..271c364f1 100644 --- a/packages/db/src/drizzle/schema/analytics.ts +++ b/packages/db/src/drizzle/schema/analytics.ts @@ -71,6 +71,20 @@ export interface AnalyticsInsightEvidence { export type AnalyticsInsightSource = "web" | "product" | "ops" | "business"; +export interface AnalyticsInsightAction { + label: string; + params: Record; + type: + | "fix_goal" + | "create_funnel" + | "add_custom_event" + | "create_annotation" + | "update_config" + | "add_tracking" + | "investigate_further" + | "code_fix"; +} + export const funnelDefinitions = pgTable( "funnel_definitions", { @@ -222,6 +236,7 @@ export const analyticsInsights = pgTable( investigationDepth: text("investigation_depth").$type< "surface" | "investigated" | "deep" >(), + actions: jsonb().$type(), timezone: text().notNull().default("UTC"), currentPeriodFrom: text("current_period_from").notNull(), currentPeriodTo: text("current_period_to").notNull(), diff --git a/packages/evals/package.json b/packages/evals/package.json index 86fecc1b3..fe07dda7b 100644 --- a/packages/evals/package.json +++ b/packages/evals/package.json @@ -12,6 +12,6 @@ "dependencies": { "@databuddy/ai": "workspace:*", "@databuddy/env": "workspace:*", - "ai": "^6.0.78" + "ai": "^6.0.188" } } diff --git a/packages/evals/ui/index.html b/packages/evals/ui/index.html index 95c6cbc86..fb2e6efb6 100644 --- a/packages/evals/ui/index.html +++ b/packages/evals/ui/index.html @@ -794,16 +794,16 @@

Latest model board

const id = escapeHtml(c.id); const cost = (c.metrics?.costUsd || 0) + (c.metrics?.judgeCostUsd || 0); return ` -
${id}
- ${escapeHtml(c.category || "case")} - ${c.passed ? "Pass" : "Fail"} - ${c.scores?.tool_routing ?? "--"} - ${c.scores?.quality ?? "--"} - ${((c.metrics?.latencyMs || 0) / 1000).toFixed(1)}s - ${c.metrics?.steps ?? "--"} - ${money(cost)} - -
${detail(c)}
`; +
${id}
+ ${escapeHtml(c.category || "case")} + ${c.passed ? "Pass" : "Fail"} + ${c.scores?.tool_routing ?? "--"} + ${c.scores?.quality ?? "--"} + ${((c.metrics?.latencyMs || 0) / 1000).toFixed(1)}s + ${c.metrics?.steps ?? "--"} + ${money(cost)} + +
${detail(c)}
`; } function detail(c) { @@ -815,7 +815,7 @@

Latest model board

.map((t) => `${escapeHtml(t)}`) .join("") || 'No tools called'; return `

Response

${escapeHtml(c.response || "No response captured.")}
-

Failures

    ${failures}

Tools

${tools}
`; +

Failures

    ${failures}

Tools

${tools}
`; } function toggle(id) { diff --git a/packages/nuxt/tsconfig.json b/packages/nuxt/tsconfig.json index 077dd3d81..c46ed3b72 100644 --- a/packages/nuxt/tsconfig.json +++ b/packages/nuxt/tsconfig.json @@ -5,8 +5,8 @@ "moduleResolution": "bundler", "types": ["@nuxt/schema"], "paths": { - "#app": ["../../node_modules/nuxt/dist/app/index.d.ts"], - "#imports": ["../../node_modules/nuxt/dist/app/index.d.ts"] + "#app": ["./node_modules/nuxt/dist/app/index.d.ts"], + "#imports": ["./node_modules/nuxt/dist/app/index.d.ts"] } }, "include": ["src/**/*"], diff --git a/packages/rpc/src/routers/insights.ts b/packages/rpc/src/routers/insights.ts index a6f544bbe..eea20dc30 100644 --- a/packages/rpc/src/routers/insights.ts +++ b/packages/rpc/src/routers/insights.ts @@ -45,15 +45,25 @@ const insightMetricSchema = z.object({ previous: z.number().optional(), }); +const insightEvidenceSchema = z.object({ + description: z.string(), + type: z.string(), +}); + +const investigationDepthSchema = z.enum(["surface", "investigated", "deep"]); + const websiteInsightSchema = z.object({ changePercent: z.number().optional(), confidence: z.number(), description: z.string(), + evidence: z.array(insightEvidenceSchema).nullable().optional(), id: z.string(), impactSummary: z.string().optional(), + investigationDepth: investigationDepthSchema.nullable().optional(), link: z.string(), metrics: z.array(insightMetricSchema), priority: z.number(), + rootCause: z.string().nullable().optional(), sentiment: z.string(), severity: z.string(), sources: z.array(z.enum(["web", "product", "ops", "business"])), @@ -192,6 +202,10 @@ async function getInsightsFromDb(options: { sources: analyticsInsights.sources, confidence: analyticsInsights.confidence, impactSummary: analyticsInsights.impactSummary, + rootCause: analyticsInsights.rootCause, + evidence: analyticsInsights.evidence, + investigationDepth: analyticsInsights.investigationDepth, + actions: analyticsInsights.actions, metrics: analyticsInsights.metrics, createdAt: analyticsInsights.createdAt, }) @@ -216,6 +230,9 @@ async function getInsightsFromDb(options: { priority: row.priority, subjectKey: row.subjectKey, confidence: row.confidence, + rootCause: row.rootCause, + evidence: row.evidence ?? null, + investigationDepth: row.investigationDepth ?? null, ...parseInsightShape(row), })); } @@ -486,6 +503,9 @@ export const insightsRouter = { sources: analyticsInsights.sources, confidence: analyticsInsights.confidence, impactSummary: analyticsInsights.impactSummary, + rootCause: analyticsInsights.rootCause, + evidence: analyticsInsights.evidence, + investigationDepth: analyticsInsights.investigationDepth, metrics: analyticsInsights.metrics, createdAt: analyticsInsights.createdAt, currentPeriodFrom: analyticsInsights.currentPeriodFrom, @@ -514,6 +534,9 @@ export const insightsRouter = { priority: row.priority, subjectKey: row.subjectKey, confidence: row.confidence, + rootCause: row.rootCause, + evidence: row.evidence ?? null, + investigationDepth: row.investigationDepth ?? null, ...parseInsightShape(row), createdAt: row.createdAt.toISOString(), currentPeriodFrom: row.currentPeriodFrom, diff --git a/packages/rpc/src/routers/integrations.ts b/packages/rpc/src/routers/integrations.ts index 152c296f8..d001c39b3 100644 --- a/packages/rpc/src/routers/integrations.ts +++ b/packages/rpc/src/routers/integrations.ts @@ -9,6 +9,7 @@ import type { WebsiteIntegrations } from "@databuddy/db/schema"; import { invalidateSlackIntegrationCache } from "@databuddy/redis"; import { z } from "zod"; import { rpcError } from "../errors"; +import type { Context } from "../orpc"; import { protectedProcedure, sessionProcedure, @@ -16,6 +17,19 @@ import { } from "../orpc"; import { withWorkspace } from "../procedures/with-workspace"; +async function getUserProviderToken( + database: Context["db"], + userId: string, + providerId: string +): Promise { + const [row] = await database + .select({ accessToken: account.accessToken }) + .from(account) + .where(and(eq(account.userId, userId), eq(account.providerId, providerId))) + .limit(1); + return row?.accessToken ?? null; +} + const slackChannelBindingOutputSchema = z.object({ id: z.string(), slackChannelId: z.string(), @@ -273,8 +287,11 @@ export const integrationsRouter = { permissions: ["update"], }); - const integrations = { ...(website.integrations ?? {}) }; - delete integrations.github; + const integrations: WebsiteIntegrations = Object.fromEntries( + Object.entries(website.integrations ?? {}).filter( + ([key]) => key !== "github" + ) + ); await context.db .update(websites) @@ -284,6 +301,41 @@ export const integrationsRouter = { return { success: true }; }), + checkSearchConsoleAccess: sessionProcedure + .route({ + description: + "Checks whether the current user has Google Search Console access.", + method: "POST", + path: "/integrations/checkSearchConsoleAccess", + summary: "Check Search Console access", + tags: ["Integrations"], + }) + .input(z.object({})) + .output(z.object({ hasAccess: z.boolean() })) + .handler(async ({ context }) => { + const token = await getUserProviderToken( + context.db, + context.user.id, + "google" + ); + if (!token) { + return { hasAccess: false }; + } + + try { + const res = await fetch( + "https://www.googleapis.com/webmasters/v3/sites", + { + headers: { Authorization: `Bearer ${token}` }, + signal: AbortSignal.timeout(5000), + } + ); + return { hasAccess: res.ok }; + } catch { + return { hasAccess: false }; + } + }), + listGitHubRepos: sessionProcedure .route({ description: "Lists GitHub repos accessible to the current user.", @@ -305,18 +357,12 @@ export const integrationsRouter = { }) ) .handler(async ({ context }) => { - const [ghAccount] = await context.db - .select({ accessToken: account.accessToken }) - .from(account) - .where( - and( - eq(account.userId, context.user.id), - eq(account.providerId, "github") - ) - ) - .limit(1); - - if (!ghAccount?.accessToken) { + const token = await getUserProviderToken( + context.db, + context.user.id, + "github" + ); + if (!token) { return { repos: [] }; } @@ -324,7 +370,7 @@ export const integrationsRouter = { "https://api.github.com/user/repos?sort=pushed&direction=desc&per_page=50", { headers: { - Authorization: `Bearer ${ghAccount.accessToken}`, + Authorization: `Bearer ${token}`, Accept: "application/vnd.github+json", "X-GitHub-Api-Version": "2022-11-28", },