diff --git a/.agents/skills/databuddy-internal/SKILL.md b/.agents/skills/databuddy-internal/SKILL.md index 8663872d4..dff1030b0 100644 --- a/.agents/skills/databuddy-internal/SKILL.md +++ b/.agents/skills/databuddy-internal/SKILL.md @@ -21,6 +21,7 @@ Keep additions **minimal**: one bullet, a new `rg` hint, or a routing note—eno - Never use production/customer data as tests, fixtures, snapshots, examples, or copied output. Tests must use placeholders/mocks only (example.com, example IDs). If production ClickHouse is queried for investigation, summarize anonymized aggregates and do not paste customer domains, client IDs, emails, or other identifiers into code or responses. - `apps/dashboard`: Next.js app on port `3000` (per-website **agent** chat: `@ai-sdk/react` `useChat` via `contexts/chat-context.tsx` — not the separate `chat-sdk` package; overlapping sends while streaming are queued client-side to mirror a “queue latest” strategy.) - Dashboard Playwright webServer commands run under CI PATH from setup-bun; avoid `bash -lc` because login shells can drop Bun from PATH. Build dist-only workspace packages such as `@databuddy/sdk` and `@databuddy/devtools` before starting the API/dashboard. Client `NEXT_PUBLIC_*` flags must use direct env access so Next can inline them. `readBooleanEnv` only treats the literal string `"true"` as enabled, so CI E2E booleans must use `"true"`/`"false"`, not `"1"`/`"0"`. +- Dashboard Playwright public/demo analytics specs call API `/v1/query` anonymously from the browser; keep `DATABUDDY_E2E_MODE` query behavior isolated from production rate limits so CI retries do not exhaust `anon:unknown`. - `apps/api`: Elysia API on port `3001` - `apps/slack`: Slack agent adapter; Slack installs must resolve through org-scoped DB integration records, not a single env bot token/default website. Agent calls must use an encrypted per-integration Databuddy API key secret as a normal bearer token, never a global internal secret. - Slack OAuth lives in `apps/api`, but slash commands/events require `apps/slack` to be running too; local `bun run dev:dashboard` runs dashboard + API only, so use `bun run dev:slack` when working on Slack. The Slack package scripts read the root `.env`. @@ -30,10 +31,12 @@ Keep additions **minimal**: one bullet, a new `rg` hint, or a routing note—eno - Slack memory is separate from billing/auth: pass a Slack-scoped `memoryUserId` such as `slack-{team}-{user}` plus current-speaker context so one Slack user's saved name/preferences do not bleed into another user's replies. - Slack agent write tools need the integration automation API key to include the matching Databuddy API scopes (currently `read:data`, `read:links`, `write:links`, `manage:websites`, `manage:flags`); older installs may need reconnecting so a new key is minted. - Shared agent integrations should call `@databuddy/ai/agent` (`askDatabuddyAgent` / `streamDatabuddyAgent`) instead of importing internal MCP run/history helpers directly. +- Insights generation logic belongs in `apps/insights` and should reuse `@databuddy/ai`; `apps/api` should only read insight data or queue runs, not own prompts, model calls, tool loops, validation, or persistence orchestration. - Agent ClickHouse SQL must use the canonical analytics.events schema: `client_id`, `time`, `path`, `event_name`, and pageviews as `event_name = 'screen_view'`; never `website_id`, `created_at`, `page_path`, `event_type`, or `pageview`. - Slack agent evals live in `packages/evals`: use `bun run eval --surface slack` for the whole Slack surface. `--tag slack` is only a tiny smoke subset, and `cost_fallback` in agent telemetry is pricing-catalog fallback, not proof the model request fell back. - Slack agent expected stops such as exhausted Databunny credits should throw `DatabuddyAgentUserError` from `@databuddy/ai/agent/errors`; Slack surfaces those messages directly and reserves the generic reconnect copy for real infrastructure failures. - Slack Docker builds use `bun build --compile --bytecode`; keep `apps/slack/src/index.ts` bootstrapping inside an async `main()` instead of top-level `await`, which can fail during compile even when typecheck passes. +- Insights Docker builds also use `bun build --compile --bytecode`; keep `apps/insights/src/index.ts` startup work inside async functions instead of top-level `await`. - After Slack Docker changes, verify the full pruned image with `docker build --progress=plain -f slack.Dockerfile -t databuddy-slack:test .`; the inner Bun compile is not enough because prune can miss dependency build outputs and package exports. - Slack-reachable shared packages (`@databuddy/ai`, `@databuddy/rpc`) must not import `evlog/elysia`; use host-injected request logger providers from the API and plain evlog fallbacks elsewhere. - AI link tools must assign link folders by existing folder `id` or `slug` only; folder names are display text and must not be used for routing or dedupe. @@ -98,6 +101,8 @@ Read [codebase-map.md](./references/codebase-map.md) when you need deeper routin - Insights merged feed (`use-insights-feed`) collapses history + AI by `insightSignalDedupeKey` in `apps/dashboard/lib/insight-signal-key.ts` so the list is one row per signal (latest wins). - Insights page (`app/(main)/insights`) should stay focused on the brief + signal queue; do not add generic global analytics KPI cards or top pages/referrers/countries tables there. - Theme: `apps/dashboard/app/globals.css`. **`--border` is intentionally subtle**; do not crank it darker for “contrast” unless **iza** asks—prefer text tokens or layout for readability. +- Website analytics filters are two-way synced between Jotai and the `filters` URL param in `app/(main)/websites/[id]/layout.tsx`; guard URL-driven atom writes from echoing stale atom state back into `nuqs`, or adding a filter can lock the page during form submit. +- Do not centralize, relocate, or otherwise refactor dashboard E2E API route access gates during cleanup; keep test-only access checks local to each route unless iza explicitly asks for that change. - Integration catalog logos: use filled Simple Icons SVG path data (or equivalent filled brand SVG), store the path on each item as `iconPath`, render it through a shared logo tile with `bg-secondary/60`, `border-border/70`, `text-foreground`, and `fill="currentColor"`, then use brand color only as a small accent bar (`accent` or `accentClassName: "bg-foreground/70"` for black/near-black brands). Avoid raw brand-black icons or mixed line/filled icon sets that disappear in dark mode. - Organization integrations settings should stay list-first and operational: coming-soon integrations are static rows, Slack is the only expandable row for now, and connected integrations need obvious lifecycle controls such as uninstall/disconnect in the row details. - Dashboard UI must use `apps/dashboard/components/ds` primitives exactly; feature code must not use raw form/control elements (`button`, `input`, `select`, `textarea`, native dialogs), Base UI/Radix primitives, or ad hoc styled controls directly. If a variant is missing, add or extend the DS component first. For menu-style folder/status/filter/sort/action pickers, use `components/ds/dropdown-menu.tsx`; use `Select` only when the established pattern is explicitly a select/combobox. Read `apps/dashboard/components/ds/README.md` before creating new dashboard UI. diff --git a/.env.example b/.env.example index 1adf211d9..17286cc06 100644 --- a/.env.example +++ b/.env.example @@ -5,8 +5,18 @@ DATABASE_URL="postgres://databuddy:databuddy_dev_password@localhost:5432/databud DB_POOL_MAX="10" REDIS_URL="redis://localhost:6379" BULLMQ_REDIS_URL="redis://localhost:6379" +# Optional dedicated BullMQ Redis URL for the insights worker. Falls back to BULLMQ_REDIS_URL. +INSIGHTS_PORT="4002" +INSIGHTS_BULLMQ_REDIS_URL="" +INSIGHTS_DISPATCH_INTERVAL_MS="300000" +INSIGHTS_MAINTENANCE_INTERVAL_MS="300000" +INSIGHTS_STALE_ITEM_MS="900000" +INSIGHTS_WORKER_CONCURRENCY="5" +INSIGHTS_WORKER_ENABLED="true" +INSIGHTS_EVLOG_FS="" AI_GATEWAY_API_KEY="" +SUPERMEMORY_API_KEY="" BETTER_AUTH_URL="http://localhost:3000" BETTER_AUTH_SECRET="generate-a-random-32-byte-base64-secret" diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index a072053d3..cc33b19c3 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -136,6 +136,12 @@ jobs: env: NODE_ENV: test run: bun run test + - name: Insights integration + env: + NODE_ENV: test + INSIGHTS_INTEGRATION_TESTS: "true" + BULLMQ_REDIS_URL: redis://localhost:6379/4 + run: bun run --cwd apps/insights test:integration - name: Uptime router integration env: NODE_ENV: test diff --git a/.github/workflows/docker-publish.yml b/.github/workflows/docker-publish.yml index b034215ed..c1f02fe15 100644 --- a/.github/workflows/docker-publish.yml +++ b/.github/workflows/docker-publish.yml @@ -49,7 +49,7 @@ jobs: EVENT_NAME: ${{ github.event_name }} BEFORE_SHA: ${{ github.event.before }} run: | - ALL='["api","basket","dashboard","links","uptime"]' + ALL='["api","basket","dashboard","insights","links","uptime"]' if [[ "$EVENT_NAME" != "push" ]]; then echo "services=$ALL" >> "$GITHUB_OUTPUT" exit 0 @@ -61,7 +61,7 @@ jobs: export TURBO_SCM_BASE="$BEFORE_SHA" export TURBO_SCM_HEAD="HEAD" affected=() - for svc in api basket dashboard links uptime; do + for svc in api basket dashboard insights links uptime; do count=$(bunx turbo ls --affected --filter="@databuddy/$svc" --output=json | jq -r '.packages.count') if [[ "$count" != "0" ]]; then affected+=("\"$svc\"") @@ -103,6 +103,8 @@ jobs: description: "Databuddy Basket service - event ingestion" - service: dashboard description: "Databuddy Dashboard service - web analytics UI" + - service: insights + description: "Databuddy Insights service - queued insight generation" - service: links description: "Databuddy Links service - URL shortening and tracking" - service: uptime diff --git a/.github/workflows/health-check.yml b/.github/workflows/health-check.yml index 158e19013..2db9f5537 100644 --- a/.github/workflows/health-check.yml +++ b/.github/workflows/health-check.yml @@ -8,6 +8,7 @@ on: - ".dockerignore" - "apps/api/**" - "apps/basket/**" + - "apps/insights/**" - "packages/**" - "bun.lock" - "package.json" @@ -20,6 +21,7 @@ on: - ".dockerignore" - "apps/api/**" - "apps/basket/**" + - "apps/insights/**" - "packages/**" - "bun.lock" - "package.json" @@ -267,3 +269,115 @@ jobs: fi echo "Basket health check passed!" + + insights-health-check: + name: Insights Health Check + runs-on: blacksmith-4vcpu-ubuntu-2404 + timeout-minutes: 20 + + services: + redis: + image: redis:7-alpine + ports: + - 6379:6379 + options: >- + --health-cmd "redis-cli ping" + --health-interval 10s + --health-timeout 5s + --health-retries 5 + postgres: + image: postgres:17-alpine + env: + POSTGRES_USER: postgres + POSTGRES_PASSWORD: postgres + POSTGRES_DB: databuddy_test + ports: + - 5432:5432 + options: >- + --health-cmd "pg_isready -U postgres -d databuddy_test" + --health-interval 10s + --health-timeout 5s + --health-retries 5 + + steps: + - name: Checkout repository + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6 + + - name: Mount Docker build cache + uses: useblacksmith/stickydisk@41873b1513bb679f9c115504cbd13d3660432504 # v1 + with: + key: ${{ github.repository }}-docker-build-cache + path: /tmp/docker-build-cache + + - name: Set up Docker Builder + uses: useblacksmith/setup-docker-builder@ac083cc84672d01c60d5e8561d0a939b697de542 # v1 + + - name: Build Insights Docker image + uses: useblacksmith/build-push-action@cbd1f60d194a98cb3be5523b15134501eaf0fbf3 # v2 + with: + context: . + file: ./insights.Dockerfile + push: false + load: true + tags: insights:test + + - name: Run Insights health check + run: | + set -euo pipefail + trap 'docker rm -f insights-health-check >/dev/null 2>&1 || true' EXIT + + docker run -d \ + --name insights-health-check \ + --network host \ + -e NODE_ENV=test \ + -e PORT=4002 \ + -e DATABASE_URL=postgresql://postgres:postgres@localhost:5432/databuddy_test \ + -e REDIS_URL=redis://localhost:6379 \ + -e BULLMQ_REDIS_URL=redis://localhost:6379/4 \ + -e INSIGHTS_BULLMQ_REDIS_URL= \ + -e INSIGHTS_DISPATCH_INTERVAL_MS=60000 \ + -e INSIGHTS_MAINTENANCE_INTERVAL_MS=60000 \ + -e INSIGHTS_STALE_ITEM_MS=300000 \ + -e INSIGHTS_WORKER_CONCURRENCY=1 \ + -e INSIGHTS_WORKER_ENABLED=true \ + -e BETTER_AUTH_SECRET=test-better-auth-secret-for-health-checks \ + -e AI_GATEWAY_API_KEY=test-ai-gateway-key \ + -e SUPERMEMORY_API_KEY= \ + insights:test + + echo "Waiting for Insights to start..." + for i in {1..30}; do + if curl -sf http://localhost:4002/health > /dev/null 2>&1; then + echo "Insights is responding!" + break + fi + if [ $i -eq 30 ]; then + echo "Insights failed to start within 30 seconds" + docker logs insights-health-check + exit 1 + fi + sleep 1 + done + + STATUS_BODY=$(curl -sS http://localhost:4002/health/status) + echo "Insights /health/status: $STATUS_BODY" + if echo "$STATUS_BODY" | jq -e '.status == "ok"' > /dev/null; then + echo "Insights dependency health is valid" + else + echo "Insights dependency health is not ok" + docker logs insights-health-check + exit 1 + fi + + RESPONSE=$(curl -sf http://localhost:4002/health || echo '{}') + echo "Insights /health: $RESPONSE" + + if echo "$RESPONSE" | jq -e '.workerEnabled == true' > /dev/null; then + echo "Insights health endpoint structure is valid" + else + echo "Insights health endpoint response missing expected workerEnabled=true" + docker logs insights-health-check + exit 1 + fi + + echo "Insights health check passed!" diff --git a/apps/api/src/index.ts b/apps/api/src/index.ts index 78a9026af..e50db47b4 100644 --- a/apps/api/src/index.ts +++ b/apps/api/src/index.ts @@ -21,7 +21,6 @@ import { import { openApiHandler } from "@/rpc/openapi"; import { agent } from "./routes/agent"; import { health } from "./routes/health"; -import { insights } from "./routes/insights"; import { integrations } from "./routes/integrations"; import { mcp } from "./routes/mcp"; import { publicApi } from "./routes/public"; @@ -90,7 +89,6 @@ const app = new Elysia({ precompile: true }) .use(query) .use(agent) .use(integrations) - .use(insights) .use(mcp) .all("/rpc/*", handleRpcEndpoint, { parse: "none" }) .all("/", handleOpenApiReference, { parse: "none" }) diff --git a/apps/api/src/lib/public-query-access.test.ts b/apps/api/src/lib/public-query-access.test.ts deleted file mode 100644 index cceb9f372..000000000 --- a/apps/api/src/lib/public-query-access.test.ts +++ /dev/null @@ -1,25 +0,0 @@ -import { describe, expect, it } from "vitest"; -import { isPublicQueryAccess } from "./public-query-access"; - -describe("isPublicQueryAccess", () => { - it("allows only query types explicitly marked public-readable", () => { - expect( - isPublicQueryAccess([ - "summary_metrics", - "top_pages", - "custom_events_summary", - "recent_errors", - "vitals_overview", - ]) - ).toBe(true); - }); - - it("denies revenue, unknown, and empty public query requests", () => { - expect(isPublicQueryAccess(["revenue_overview"])).toBe(false); - expect(isPublicQueryAccess(["summary_metrics", "revenue_overview"])).toBe( - false - ); - expect(isPublicQueryAccess(["missing_query_type"])).toBe(false); - expect(isPublicQueryAccess([])).toBe(false); - }); -}); diff --git a/apps/api/src/lib/public-query-access.ts b/apps/api/src/lib/public-query-access.ts deleted file mode 100644 index d0a758604..000000000 --- a/apps/api/src/lib/public-query-access.ts +++ /dev/null @@ -1,8 +0,0 @@ -import { QueryBuilders } from "@databuddy/ai/query/builders"; - -export function isPublicQueryAccess(queryTypes: string[]): boolean { - return ( - queryTypes.length > 0 && - queryTypes.every((type) => QueryBuilders[type]?.publicAccess === true) - ); -} diff --git a/apps/api/src/routes/insights.ts b/apps/api/src/routes/insights.ts deleted file mode 100644 index dc1fce257..000000000 --- a/apps/api/src/routes/insights.ts +++ /dev/null @@ -1,1421 +0,0 @@ -import { auth } from "@databuddy/auth"; -import { and, db, desc, eq, gte, inArray, isNull } from "@databuddy/db"; -import { - analyticsInsights, - annotations, - insightUserFeedback, - websites, -} from "@databuddy/db/schema"; -import { - cacheNamespaces, - cacheTags, - cacheable, - getRedisCache, - invalidateAgentContextSnapshotsForOwner, - invalidateAgentContextSnapshotsForWebsite, - invalidateInsightsCachesForOrganization, -} from "@databuddy/redis"; -import { getRateLimitHeaders, ratelimit } from "@databuddy/redis/rate-limit"; -import { generateText, Output, stepCountIs, ToolLoopAgent } from "ai"; -import dayjs from "dayjs"; -import { Elysia, t } from "elysia"; -import { useLogger } from "evlog/elysia"; -import type { AppContext } from "@databuddy/ai/config/context"; -import { ANTHROPIC_CACHE_1H, models } from "@databuddy/ai/config/models"; -import { createInsightsAgentTools } from "@databuddy/ai/tools/insights-agent-tools"; -import { - fetchInsightDedupeKeyToIdMap, - insightDedupeKey, -} from "@databuddy/ai/insights/dedupe"; -import { - fetchWebPeriodData, - getWeekOverWeekPeriod, - hasWebInsightData, -} from "@databuddy/ai/insights/fetch-context"; -import { formatLegacyWebDataForPrompt } from "@databuddy/ai/insights/normalize"; -import { validateInsights } from "@databuddy/ai/insights/validate"; -import type { - InsightMetricRow, - WeekOverWeekPeriod, -} from "@databuddy/ai/insights/types"; -import type { ParsedInsight } from "@databuddy/ai/schemas/smart-insights-output"; -import { insightsOutputSchema } from "@databuddy/ai/schemas/smart-insights-output"; -import { storeAnalyticsSummary } from "@databuddy/ai/lib/supermemory"; -import { getAILogger } from "../lib/ai-logger"; -import { captureError, mergeWideEvent } from "../lib/tracing"; - -const CACHE_TTL = 900; -const NEGATIVE_CACHE_TTL = Math.floor(CACHE_TTL / 3); -const CACHE_KEY_PREFIX = "ai-insights"; -const TIMEOUT_MS = 60_000; -const INSIGHTS_AGENT_MAX_STEPS = 24; -const INSIGHTS_AGENT_TIMEOUT_MS = 120_000; -const MAX_WEBSITES = 5; -const CONCURRENCY = 3; -const GENERATION_COOLDOWN_HOURS = 6; -const RECENT_INSIGHTS_LOOKBACK_DAYS = 14; -const RECENT_INSIGHTS_PROMPT_LIMIT = 12; -const TOP_INSIGHTS_LIMIT = 10; - -interface WebsiteInsight extends ParsedInsight { - id: string; - link: string; - websiteDomain: string; - websiteId: string; - websiteName: string | null; -} - -interface InsightsPayload { - insights: WebsiteInsight[]; - source: "ai" | "fallback"; -} - -interface OrgWebsiteRow { - domain: string; - id: string; - name: string | null; -} - -function dedupeKeyFor(insight: WebsiteInsight): string { - return insightDedupeKey({ - ...insight, - changePercent: insight.changePercent ?? null, - }); -} - -function buildInsightLink(websiteId: string, insight: ParsedInsight): string { - const base = `/websites/${websiteId}`; - if ( - [ - "error_spike", - "new_errors", - "persistent_error_hotspot", - "reliability_improved", - ].includes(insight.type) - ) { - return `${base}/errors`; - } - if ( - ["vitals_degraded", "performance", "performance_improved"].includes( - insight.type - ) - ) { - return `${base}/vitals`; - } - if (["conversion_leak", "funnel_regression"].includes(insight.type)) { - return `${base}/funnels`; - } - if ( - ["custom_event_spike", "engagement_change", "quality_shift"].includes( - insight.type - ) - ) { - return `${base}/events/stream`; - } - if (insight.type === "uptime_issue") { - return `${base}/anomalies`; - } - return base; -} - -interface RawInsightShape { - changePercent: number | null; - impactSummary: string | null; - metrics: unknown; - sentiment: string; - severity: string; - sources: unknown; - type: string; -} - -function parseInsightShape(r: RawInsightShape) { - return { - severity: r.severity as ParsedInsight["severity"], - sentiment: r.sentiment as ParsedInsight["sentiment"], - type: r.type as ParsedInsight["type"], - sources: - (r.sources as Array<"web" | "product" | "ops" | "business"> | null) ?? [], - metrics: (r.metrics as InsightMetricRow[] | null) ?? [], - changePercent: r.changePercent ?? undefined, - impactSummary: r.impactSummary ?? undefined, - }; -} - -async function userHasOrgAccess( - userId: string, - organizationId: string -): Promise { - const memberships = await db.query.member.findMany({ - where: { userId }, - columns: { organizationId: true }, - }); - return memberships.some((m) => m.organizationId === organizationId); -} - -async function userIsOrgAdmin( - userId: string, - organizationId: string -): Promise { - const membership = await db.query.member.findFirst({ - where: { userId, organizationId }, - columns: { role: true }, - }); - return membership?.role === "owner" || membership?.role === "admin"; -} - -function tryCacheSet( - redis: ReturnType, - key: string, - ttl: number, - payload: unknown -): void { - if (!redis) { - return; - } - redis.setex(key, ttl, JSON.stringify(payload)).catch((error: unknown) => { - useLogger().info("Insights cache write failed (best-effort)", { - insights: { key, error }, - }); - }); -} - -async function fetchRecentAnnotations(websiteId: string): Promise { - const since = dayjs().subtract(14, "day").toDate(); - - const rows = await db - .select({ - text: annotations.text, - xValue: annotations.xValue, - tags: annotations.tags, - }) - .from(annotations) - .where( - and( - eq(annotations.websiteId, websiteId), - gte(annotations.xValue, since), - isNull(annotations.deletedAt) - ) - ) - .orderBy(annotations.xValue) - .limit(20); - - if (rows.length === 0) { - return ""; - } - - const lines = rows.map((r) => { - const date = dayjs(r.xValue).format("YYYY-MM-DD"); - const tags = r.tags?.length ? ` [${r.tags.join(", ")}]` : ""; - return `- ${date}: ${r.text}${tags}`; - }); - - return `\n\nUser annotations (known events that may explain changes):\n${lines.join("\n")}`; -} - -async function fetchRecentInsightsForPrompt( - organizationId: string, - websiteId: string -): Promise { - const since = dayjs().subtract(RECENT_INSIGHTS_LOOKBACK_DAYS, "day").toDate(); - - const rows = await db - .select({ - title: analyticsInsights.title, - type: analyticsInsights.type, - createdAt: analyticsInsights.createdAt, - }) - .from(analyticsInsights) - .where( - and( - eq(analyticsInsights.organizationId, organizationId), - eq(analyticsInsights.websiteId, websiteId), - gte(analyticsInsights.createdAt, since) - ) - ) - .orderBy(desc(analyticsInsights.createdAt)) - .limit(RECENT_INSIGHTS_PROMPT_LIMIT); - - if (rows.length === 0) { - return ""; - } - - const lines = rows.map( - (r) => - `- [${r.type}] ${r.title} (${dayjs(r.createdAt).format("YYYY-MM-DD")})` - ); - - return `\n\n## Recently reported insights for this website (avoid repeating the same narrative unless something materially changed)\n${lines.join("\n")}`; -} - -function formatOrgWebsitesContext( - orgSites: OrgWebsiteRow[], - currentWebsiteId: string -): string { - if (orgSites.length <= 1) { - return ""; - } - const sorted = [...orgSites].sort((a, b) => - a.domain.localeCompare(b.domain, "en") - ); - const lines = sorted.map((s) => { - const label = s.name?.trim() ? s.name.trim() : s.domain; - const marker = - s.id === currentWebsiteId - ? " — **metrics below are for this site only**" - : ""; - return `- ${label} (${s.domain})${marker}`; - }); - return `## Organization websites (same account, separate analytics) -Each row is a different tracked property (e.g. marketing site vs app vs docs). The week-over-week metrics in this message apply only to the site marked "metrics below". Do not blend numbers across rows. If referrers include another domain from this list, treat it as cross-property traffic (e.g. landing → product) and name both sides clearly. - -${lines.join("\n")} - -`; -} - -const INSIGHTS_SYSTEM_PROMPT = ` -You are an analytics insights engine. Return exactly 3 week-over-week insights when there are 3 distinct data-backed signals; otherwise return only the distinct signals that exist. Rank by actionability and user/business impact. - - - -- Write for a founder/operator, not an analytics engineer. Translate technical metrics into plain outcomes: "interactions got slower", "pages feel slower", "setup is leaking users", "one source now dominates traffic". -- Prefer reliability, conversion/product impact, engagement quality, broken instrumentation, and meaningful behavior changes over vanity traffic spikes. -- Score actionability × impact, not raw percentage magnitude. Reserve priority 8-10 for likely user, revenue, or operational impact. -- Prefer fewer, sharper insights over broad coverage. Return only signals a user can act on this week. -- Avoid repeating recently reported narratives unless the signal materially changed. - - - -- Use only provided data, tool results, annotations, and recent-insight context. -- Do not invent revenue, signups, retention, funnel conversion, causality, root causes, or business impact. -- If multiple org websites are listed, keep properties separate; cross-domain referrers are cross-property traffic, not generic referrals. -- Use cautious language for correlations unless segment-level evidence directly proves the cause. -- Do not punt, apologize, or say you cannot produce insights when any useful metrics exist. If one query is sparse, use stronger available evidence and lower confidence. - - - -- Prefer 3 concise insights: reliability/product risk first, then engagement/acquisition opportunity. Do not make near-duplicates. -- Each insight must be one clear signal with 1-5 metrics; primary metric first. -- Metrics array owns the numbers. Description/suggestion should reference metric labels, not restate values. -- Keep title under 80 chars, description under 320 chars, suggestion under 260 chars. -- Titles must be plain English and user-facing. Do not put raw metric jargon like INP, LCP, FCP, TTFB, CLS, or p75 in titles; put technical metric names only in the metrics array. -- Keep description 1-2 concise sentences: what changed, why it matters, and whether cause is evidence or hypothesis. -- Suggestion must be a specific next action with an operational verb such as inspect, review, compare, segment, drill into, fix, audit, trace, or verify. Never use generic monitoring advice. -- Suggestion must name the exact product surface to inspect next: funnel step, goal, referrer segment, page path, error class, session stream, web vital, flag rollout, or agent diagnostic prompt. -- subjectKey must be stable; sources must include only evidence domains used; confidence 0-1 should reflect evidence strength. -- impactSummary is optional, one sentence under 220 characters. - - - -Good: Error Rate rose while Sessions stayed stable -> reliability issue; suggest reviewing affected page/errors first. -Good: INP p75 rose -> title "Interactions got slower"; metrics can still include "INP p75". -Good: Onboarding step 2 drop-off is 80% -> title "Onboarding is leaking at step 2". -Bad: Pricing Visitors rose -> "revenue opportunity" without business data. -Bad: Twitter rose and Bounce Rate worsened -> "Twitter caused the drop" without segmented engagement data. -Bad: "INP p75 still rising" as a title; users should not need to know web-vitals acronyms. - - - -Before finalizing: exactly 3 if data supports it, data-backed only, metrics present, primary metric first, no duplicate narrative, concise copy, specific action, named product surface, no punt on partial data. -`; - -async function validateOrRepairInsights( - insights: ParsedInsight[], - context: { domain: string; mode: "agent" | "legacy"; websiteId: string } -): Promise { - const validated = validateInsights(insights); - if (validated.warnings.length > 0) { - useLogger().warn("Insights validation repaired or dropped output", { - insights: { - websiteId: context.websiteId, - mode: context.mode, - warnings: validated.warnings, - }, - }); - } - - const targetCount = Math.min(3, insights.length); - if (targetCount === 0 || validated.insights.length >= targetCount) { - return validated.insights; - } - - try { - const ai = getAILogger(); - const repair = await generateText({ - model: ai.wrap(models.balanced), - output: Output.object({ schema: insightsOutputSchema }), - messages: [ - { - role: "system", - content: `Repair Databuddy insight cards. Return exactly ${targetCount} concise, valid cards when the source contains ${targetCount} distinct data-backed signals. Use only the provided metrics and claims; do not invent numbers, causes, revenue impact, or new entities. Keep title <=80 chars, description <=320 chars, suggestion <=260 chars. Write for a founder/operator: titles must be plain English and avoid raw metric jargon like INP, LCP, FCP, TTFB, CLS, or p75. Technical metric names may remain in the metrics array. Suggestions need specific operational actions, not monitoring. Soften unsupported causality.`, - }, - { - role: "user", - content: JSON.stringify( - { - domain: context.domain, - validationWarnings: validated.warnings, - originalInsights: insights, - }, - null, - 2 - ), - }, - ], - temperature: 0, - maxOutputTokens: 4096, - abortSignal: AbortSignal.timeout(30_000), - experimental_telemetry: { - isEnabled: true, - functionId: "databuddy.insights.repair", - metadata: { - source: "insights", - feature: "smart_insights", - mode: context.mode, - websiteId: context.websiteId, - websiteDomain: context.domain, - }, - }, - }); - - const repairedOutput = repair.output?.insights ?? []; - const repaired = validateInsights(repairedOutput); - if (repaired.warnings.length > 0) { - useLogger().warn("Insights repair validation warnings", { - insights: { - websiteId: context.websiteId, - mode: context.mode, - warnings: repaired.warnings, - }, - }); - } - - if (repaired.insights.length >= validated.insights.length) { - return repaired.insights.slice(0, targetCount); - } - } catch (error) { - useLogger().warn("Insights repair failed", { - insights: { websiteId: context.websiteId, mode: context.mode, error }, - }); - } - - return validated.insights; -} - -async function analyzeWebsiteLegacy( - organizationId: string, - userId: string, - websiteId: string, - domain: string, - timezone: string, - period: WeekOverWeekPeriod, - orgSites: OrgWebsiteRow[], - annotationContext: string, - recentInsightsBlock: string -): Promise { - const currentRange = period.current; - const previousRange = period.previous; - - const [current, previous] = await Promise.all([ - fetchWebPeriodData( - websiteId, - domain, - currentRange.from, - currentRange.to, - timezone - ), - fetchWebPeriodData( - websiteId, - domain, - previousRange.from, - previousRange.to, - timezone - ), - ]); - - const hasData = current.summary.length > 0 || current.topPages.length > 0; - if (!hasData) { - return []; - } - - const dataSection = formatLegacyWebDataForPrompt( - current, - previous, - currentRange, - previousRange - ); - - const orgContext = formatOrgWebsitesContext(orgSites, websiteId); - const prompt = `Analyze this website's week-over-week data and return insights.\n\n${orgContext}${dataSection}${annotationContext}${recentInsightsBlock}`; - - try { - const ai = getAILogger(); - const result = await generateText({ - model: ai.wrap(models.balanced), - output: Output.object({ schema: insightsOutputSchema }), - messages: [ - { - role: "system", - content: INSIGHTS_SYSTEM_PROMPT, - providerOptions: ANTHROPIC_CACHE_1H, - }, - { role: "user", content: prompt }, - ], - temperature: 0.2, - maxOutputTokens: 8192, - abortSignal: AbortSignal.timeout(TIMEOUT_MS), - experimental_telemetry: { - isEnabled: true, - functionId: "databuddy.insights.analyze_website", - metadata: { - source: "insights", - feature: "smart_insights", - mode: "legacy_fallback", - organizationId, - userId, - websiteId, - websiteDomain: domain, - timezone, - }, - }, - }); - - if (!result.output) { - useLogger().warn("No structured output from insights model (legacy)", { - insights: { websiteId }, - }); - return []; - } - - return await validateOrRepairInsights(result.output.insights, { - domain, - mode: "legacy", - websiteId, - }); - } catch (error) { - useLogger().warn("Failed to generate insights (legacy)", { - insights: { websiteId, error }, - }); - return []; - } -} - -async function analyzeWebsite( - organizationId: string, - userId: string, - websiteId: string, - domain: string, - timezone: string, - period: WeekOverWeekPeriod, - orgSites: OrgWebsiteRow[], - requestHeaders: Headers -): Promise { - const currentRange = period.current; - const previousRange = period.previous; - - const hasData = await hasWebInsightData( - websiteId, - domain, - currentRange.from, - currentRange.to, - timezone - ); - if (!hasData) { - return []; - } - - const [annotationContext, recentInsightsBlock] = await Promise.all([ - fetchRecentAnnotations(websiteId), - fetchRecentInsightsForPrompt(organizationId, websiteId), - ]); - - const orgContext = formatOrgWebsitesContext(orgSites, websiteId); - const userPrompt = `Analyze this website's week-over-week data and produce insights. - -**Current period:** ${currentRange.from} to ${currentRange.to} -**Previous period:** ${previousRange.from} to ${previousRange.to} -**Timezone:** ${timezone} -**Domain:** ${domain} - -Use web_metrics to pull metrics for both current and previous periods before inferring trends. Start with summary_metrics for both periods, then add top_pages, error_summary, top_referrers, country, browser_name, vitals_overview, or custom_events queries only when they sharpen the narrative. Use product_metrics for goals, funnels, retention, and custom event behavior when a traffic change may have downstream product impact. Use ops_context for page-level errors, uptime, anomaly signals, and recent flag rollouts when reliability or product changes may explain the trend. Use business_context for revenue totals, attribution, and product mix when commercial impact matters. - -${orgContext}${annotationContext}${recentInsightsBlock}`; - - const { tools } = createInsightsAgentTools({ - websiteId, - domain, - timezone, - periodBounds: { current: currentRange, previous: previousRange }, - }); - - try { - const appContext: AppContext = { - userId, - organizationId, - websiteId, - websiteDomain: domain, - timezone, - currentDateTime: new Date().toISOString(), - chatId: `insights:${organizationId}:${websiteId}`, - requestHeaders, - }; - - const ai = getAILogger(); - const agent = new ToolLoopAgent({ - model: ai.wrap(models.balanced), - instructions: { - role: "system", - content: INSIGHTS_SYSTEM_PROMPT, - providerOptions: ANTHROPIC_CACHE_1H, - }, - output: Output.object({ schema: insightsOutputSchema }), - tools, - stopWhen: stepCountIs(INSIGHTS_AGENT_MAX_STEPS), - prepareStep: ({ stepNumber }) => { - if (stepNumber === 0) { - return { - activeTools: ["web_metrics"], - toolChoice: { type: "tool", toolName: "web_metrics" }, - }; - } - return {}; - }, - onStepFinish: ({ usage, finishReason, toolCalls }) => { - const toolNames = toolCalls.map((toolCall) => toolCall.toolName); - mergeWideEvent({ - insights_agent_step_tool_calls: toolCalls.length, - insights_agent_step_total_tokens: usage?.totalTokens ?? 0, - insights_agent_step_used_tools: toolNames.length > 0, - }); - useLogger().info("Insights agent step finished", { - insights: { - websiteId, - finishReason, - toolCalls: toolNames, - totalTokens: usage?.totalTokens, - }, - }); - }, - temperature: 0.2, - experimental_context: appContext, - experimental_telemetry: { - isEnabled: true, - functionId: "databuddy.insights.analyze_website", - metadata: { - source: "insights", - feature: "smart_insights", - mode: "agent", - organizationId, - userId, - websiteId, - websiteDomain: domain, - timezone, - }, - }, - }); - - const result = await agent.generate({ - messages: [{ role: "user", content: userPrompt }], - abortSignal: AbortSignal.timeout(INSIGHTS_AGENT_TIMEOUT_MS), - }); - - if (result.output?.insights?.length) { - return await validateOrRepairInsights(result.output.insights, { - domain, - mode: "agent", - websiteId, - }); - } - - useLogger().warn("Insights agent finished without structured output", { - insights: { websiteId }, - }); - } catch (error) { - useLogger().warn("Insights agent failed, using legacy fallback", { - insights: { websiteId, error }, - }); - } - - return analyzeWebsiteLegacy( - organizationId, - userId, - websiteId, - domain, - timezone, - period, - orgSites, - annotationContext, - recentInsightsBlock - ); -} - -async function processInBatches( - items: T[], - action: (item: T) => Promise, - limit: number -): Promise { - const results: R[] = []; - let nextIndex = 0; - - async function worker() { - while (true) { - const index = nextIndex; - nextIndex += 1; - if (index >= items.length) { - break; - } - const item = items[index]; - if (item === undefined) { - break; - } - results.push(await action(item)); - } - } - - await Promise.all( - Array.from({ length: Math.min(limit, items.length) }, () => worker()) - ); - return results; -} - -async function getRecentInsightsFromDb( - organizationId: string -): Promise { - const cutoff = dayjs().subtract(GENERATION_COOLDOWN_HOURS, "hour").toDate(); - - const rows = await db - .select({ - id: analyticsInsights.id, - websiteId: analyticsInsights.websiteId, - websiteName: websites.name, - websiteDomain: websites.domain, - title: analyticsInsights.title, - description: analyticsInsights.description, - suggestion: analyticsInsights.suggestion, - severity: analyticsInsights.severity, - sentiment: analyticsInsights.sentiment, - type: analyticsInsights.type, - priority: analyticsInsights.priority, - changePercent: analyticsInsights.changePercent, - subjectKey: analyticsInsights.subjectKey, - sources: analyticsInsights.sources, - confidence: analyticsInsights.confidence, - impactSummary: analyticsInsights.impactSummary, - metrics: analyticsInsights.metrics, - createdAt: analyticsInsights.createdAt, - }) - .from(analyticsInsights) - .innerJoin(websites, eq(analyticsInsights.websiteId, websites.id)) - .where( - and( - eq(analyticsInsights.organizationId, organizationId), - gte(analyticsInsights.createdAt, cutoff), - isNull(websites.deletedAt) - ) - ) - .orderBy(desc(analyticsInsights.priority)) - .limit(10); - - if (rows.length === 0) { - return null; - } - - return rows.map( - (r): WebsiteInsight => ({ - id: r.id, - websiteId: r.websiteId, - websiteName: r.websiteName, - websiteDomain: r.websiteDomain, - link: `/websites/${r.websiteId}`, - title: r.title, - description: r.description, - suggestion: r.suggestion, - priority: r.priority, - subjectKey: r.subjectKey, - confidence: r.confidence, - ...parseInsightShape(r), - }) - ); -} - -function getRedis() { - try { - return getRedisCache(); - } catch { - return null; - } -} - -async function invalidateInsightsCacheForOrg( - organizationId: string -): Promise { - const redis = getRedis(); - if (!redis) { - return; - } - const pattern = `${CACHE_KEY_PREFIX}:${organizationId}:*`; - let cursor = "0"; - try { - do { - const [nextCursor, keys] = (await redis.scan( - cursor, - "MATCH", - pattern, - "COUNT", - 100 - )) as [string, string[]]; - cursor = nextCursor; - if (keys.length > 0) { - await redis.del(...keys); - } - } while (cursor !== "0"); - - await invalidateInsightsCachesForOrganization(organizationId); - } catch (error) { - useLogger().info("Insights cache invalidation scan failed (best-effort)", { - insights: { organizationId, error }, - }); - } -} - -const NARRATIVE_RATE_LIMIT = 30; -const NARRATIVE_RATE_WINDOW_SECS = 3600; -const NARRATIVE_CACHE_TTL_SECS = 3600; -const NARRATIVE_INSIGHTS_LIMIT = 5; - -const RANGE_WORDS: Record = { - "7d": "week", - "30d": "month", - "90d": "quarter", -}; - -function rangeWord(range: "7d" | "30d" | "90d"): string { - return RANGE_WORDS[range] ?? "quarter"; -} - -function buildDeterministicNarrative( - range: "7d" | "30d" | "90d", - topInsights: { - title: string; - severity: string; - websiteName: string | null; - }[] -): string { - const word = rangeWord(range); - const headline = topInsights[0]; - if (!headline) { - return `All systems healthy this ${word}. No actionable signals detected.`; - } - const siteSuffix = headline.websiteName ? ` on ${headline.websiteName}` : ""; - if (topInsights.length === 1) { - return `This ${word}: ${headline.title}${siteSuffix}.`; - } - const extra = topInsights.length - 1; - return `This ${word}: ${headline.title}${siteSuffix}, plus ${extra} more signal${extra === 1 ? "" : "s"} worth reviewing.`; -} - -const RANGE_TO_DAYS = { "7d": 7, "30d": 30, "90d": 90 } as const; - -const generateNarrativeCached = cacheable( - async function generateNarrativeCached( - organizationId: string, - range: "7d" | "30d" | "90d" - ): Promise<{ narrative: string }> { - const cutoff = dayjs().subtract(RANGE_TO_DAYS[range], "day").toDate(); - - const topInsights = await db - .select({ - title: analyticsInsights.title, - description: analyticsInsights.description, - severity: analyticsInsights.severity, - changePercent: analyticsInsights.changePercent, - websiteName: websites.name, - }) - .from(analyticsInsights) - .innerJoin(websites, eq(analyticsInsights.websiteId, websites.id)) - .where( - and( - eq(analyticsInsights.organizationId, organizationId), - gte(analyticsInsights.createdAt, cutoff), - isNull(websites.deletedAt) - ) - ) - .orderBy(desc(analyticsInsights.priority)) - .limit(NARRATIVE_INSIGHTS_LIMIT); - - if (topInsights.length === 0) { - return { - narrative: `All systems healthy this ${rangeWord(range)}. No actionable signals detected.`, - }; - } - - const insightLines = topInsights.map((ins) => { - const site = ins.websiteName ? ` [${ins.websiteName}]` : ""; - const change = - ins.changePercent == null - ? "" - : ` (${ins.changePercent > 0 ? "+" : ""}${ins.changePercent.toFixed(0)}%)`; - return `- [${ins.severity}] ${ins.title}${change}${site}: ${ins.description ?? ""}`; - }); - - const prompt = `You are an analytics assistant summarizing an organization's state over the last ${range}. - -Write a crisp 2–3 sentence executive summary of the top insights below. - -Rules: -- Lead with the most important change -- Include concrete numbers when available -- Never exceed 60 words total -- State facts, do not editorialize -- If nothing meaningful is happening, say so plainly - -Top signals this ${range}: -${insightLines.join("\n")}`; - - let narrative = ""; - try { - const result = await generateText({ - model: getAILogger().wrap(models.balanced), - prompt, - temperature: 0.2, - maxOutputTokens: 200, - }); - narrative = result.text.trim(); - } catch (error) { - useLogger().warn("Narrative LLM call failed", { - insights: { organizationId, range, error }, - }); - } - - if (!narrative) { - narrative = buildDeterministicNarrative(range, topInsights); - mergeWideEvent({ insights_narrative_fallback: true }); - } - - return { narrative }; - }, - { - expireInSec: NARRATIVE_CACHE_TTL_SECS, - prefix: cacheNamespaces.insightsNarrative, - tags: (_result, organizationId) => [cacheTags.organization(organizationId)], - } -); - -export const insights = new Elysia({ prefix: "/v1/insights" }) - .derive(async ({ request }) => { - const session = await auth.api.getSession({ headers: request.headers }); - return { user: session?.user ?? null, requestHeaders: request.headers }; - }) - .onBeforeHandle(({ user, set }) => { - if (!user) { - mergeWideEvent({ insights_ai_auth: "unauthorized" }); - set.status = 401; - return { - success: false, - error: "Authentication required", - code: "AUTH_REQUIRED", - }; - } - }) - .get( - "/history", - async ({ query, user, set }) => { - const userId = user?.id; - if (!userId) { - return { success: false, error: "User ID required", insights: [] }; - } - - const { organizationId, websiteId: websiteIdFilter } = query; - const limitParsed = Number.parseInt(query.limit ?? "50", 10); - const limit = Number.isFinite(limitParsed) - ? Math.min(Math.max(limitParsed, 1), 100) - : 50; - const offsetParsed = Number.parseInt(query.offset ?? "0", 10); - const offset = Number.isFinite(offsetParsed) - ? Math.max(offsetParsed, 0) - : 0; - - mergeWideEvent({ insights_history_org_id: organizationId }); - - if (!(await userHasOrgAccess(userId, organizationId))) { - mergeWideEvent({ insights_history_access: "denied" }); - set.status = 403; - return { - success: false, - error: "Access denied to this organization", - insights: [], - }; - } - - const whereClause = websiteIdFilter - ? and( - eq(analyticsInsights.organizationId, organizationId), - eq(analyticsInsights.websiteId, websiteIdFilter), - isNull(websites.deletedAt) - ) - : and( - eq(analyticsInsights.organizationId, organizationId), - isNull(websites.deletedAt) - ); - - const rows = await db - .select({ - id: analyticsInsights.id, - runId: analyticsInsights.runId, - websiteId: analyticsInsights.websiteId, - websiteName: websites.name, - websiteDomain: websites.domain, - title: analyticsInsights.title, - description: analyticsInsights.description, - suggestion: analyticsInsights.suggestion, - severity: analyticsInsights.severity, - sentiment: analyticsInsights.sentiment, - type: analyticsInsights.type, - priority: analyticsInsights.priority, - changePercent: analyticsInsights.changePercent, - subjectKey: analyticsInsights.subjectKey, - sources: analyticsInsights.sources, - confidence: analyticsInsights.confidence, - impactSummary: analyticsInsights.impactSummary, - metrics: analyticsInsights.metrics, - createdAt: analyticsInsights.createdAt, - currentPeriodFrom: analyticsInsights.currentPeriodFrom, - currentPeriodTo: analyticsInsights.currentPeriodTo, - previousPeriodFrom: analyticsInsights.previousPeriodFrom, - previousPeriodTo: analyticsInsights.previousPeriodTo, - timezone: analyticsInsights.timezone, - }) - .from(analyticsInsights) - .innerJoin(websites, eq(analyticsInsights.websiteId, websites.id)) - .where(whereClause) - .orderBy(desc(analyticsInsights.createdAt)) - .limit(limit) - .offset(offset); - - const insights = rows.map((r) => ({ - id: r.id, - runId: r.runId, - websiteId: r.websiteId, - websiteName: r.websiteName, - websiteDomain: r.websiteDomain, - link: `/websites/${r.websiteId}`, - title: r.title, - description: r.description, - suggestion: r.suggestion, - priority: r.priority, - subjectKey: r.subjectKey, - confidence: r.confidence, - ...parseInsightShape(r), - createdAt: r.createdAt.toISOString(), - currentPeriodFrom: r.currentPeriodFrom, - currentPeriodTo: r.currentPeriodTo, - previousPeriodFrom: r.previousPeriodFrom, - previousPeriodTo: r.previousPeriodTo, - timezone: r.timezone, - })); - - return { - success: true, - insights, - hasMore: rows.length === limit, - }; - }, - { - query: t.Object({ - organizationId: t.String(), - limit: t.Optional(t.String()), - offset: t.Optional(t.String()), - websiteId: t.Optional(t.String()), - }), - } - ) - .get( - "/org-narrative", - async ({ query, user, set }) => { - const userId = user?.id; - if (!userId) { - return { success: false, error: "User ID required" }; - } - - const { organizationId, range } = query; - mergeWideEvent({ - insights_narrative_org_id: organizationId, - insights_narrative_range: range, - }); - - if (!(await userHasOrgAccess(userId, organizationId))) { - mergeWideEvent({ insights_narrative_access: "denied" }); - set.status = 403; - return { success: false, error: "Access denied to this organization" }; - } - - const rl = await ratelimit( - `insights:narrative:${organizationId}:${userId}`, - NARRATIVE_RATE_LIMIT, - NARRATIVE_RATE_WINDOW_SECS - ); - const rlHeaders = getRateLimitHeaders(rl); - for (const [key, value] of Object.entries(rlHeaders)) { - set.headers[key] = value; - } - if (!rl.success) { - set.status = 429; - return { - success: false, - error: "Rate limit exceeded. Try again later.", - }; - } - - try { - const { narrative } = await generateNarrativeCached( - organizationId, - range - ); - return { - success: true, - narrative, - generatedAt: new Date().toISOString(), - }; - } catch (error) { - captureError(error, { - insights_narrative_org_id: organizationId, - insights_narrative_range: range, - }); - useLogger().warn("Failed to generate org narrative", { - insights: { organizationId, range, error }, - }); - set.status = 500; - return { success: false, error: "Could not generate narrative" }; - } - }, - { - query: t.Object({ - organizationId: t.String(), - range: t.Union([t.Literal("7d"), t.Literal("30d"), t.Literal("90d")]), - }), - } - ) - .post( - "/clear", - async ({ body, user, set }) => { - const userId = user?.id; - if (!userId) { - return { success: false, error: "User ID required", deleted: 0 }; - } - - const { organizationId } = body; - mergeWideEvent({ insights_clear_org_id: organizationId }); - - if (!(await userIsOrgAdmin(userId, organizationId))) { - set.status = 403; - return { - success: false, - error: "Owner or admin role required to clear insights", - deleted: 0, - }; - } - - const idRows = await db - .select({ id: analyticsInsights.id }) - .from(analyticsInsights) - .where(eq(analyticsInsights.organizationId, organizationId)); - - const ids = idRows.map((r) => r.id); - - if (ids.length > 0) { - await db - .delete(insightUserFeedback) - .where( - and( - eq(insightUserFeedback.organizationId, organizationId), - inArray(insightUserFeedback.insightId, ids) - ) - ); - await db - .delete(analyticsInsights) - .where(eq(analyticsInsights.organizationId, organizationId)); - } - - await invalidateInsightsCacheForOrg(organizationId); - await invalidateAgentContextSnapshotsForOwner(organizationId); - mergeWideEvent({ insights_cleared: ids.length }); - - return { success: true, deleted: ids.length }; - }, - { - body: t.Object({ - organizationId: t.String(), - }), - } - ) - .post( - "/ai", - async ({ body, user, set, requestHeaders }) => { - const userId = user?.id; - if (!userId) { - mergeWideEvent({ insights_ai_error: "missing_user_id" }); - return { success: false, error: "User ID required", insights: [] }; - } - - const { organizationId, timezone = "UTC" } = body; - mergeWideEvent({ - insights_org_id: organizationId, - insights_timezone: timezone, - }); - - if (!(await userHasOrgAccess(userId, organizationId))) { - mergeWideEvent({ insights_access: "denied" }); - set.status = 403; - return { - success: false, - error: "Access denied to this organization", - insights: [], - }; - } - - const redis = getRedis(); - const cacheKey = `${CACHE_KEY_PREFIX}:${organizationId}:${timezone}`; - - if (redis) { - try { - const cached = await redis.get(cacheKey); - if (cached) { - mergeWideEvent({ insights_cache: "hit" }); - const payload = JSON.parse(cached) as InsightsPayload; - return { success: true, ...payload }; - } - } catch (error) { - useLogger().info( - "Insights cache read failed; continuing without cache", - { - insights: { error }, - } - ); - } - } - - mergeWideEvent({ insights_cache: "miss" }); - - const recentInsights = await getRecentInsightsFromDb(organizationId); - if (recentInsights) { - mergeWideEvent({ - insights_returned: recentInsights.length, - insights_source: "db_cooldown", - }); - const payload: InsightsPayload = { - insights: recentInsights, - source: "ai", - }; - tryCacheSet(redis, cacheKey, CACHE_TTL, payload); - return { success: true, ...payload }; - } - - const orgSites = await db.query.websites.findMany({ - where: { organizationId, deletedAt: { isNull: true } }, - columns: { id: true, name: true, domain: true }, - }); - - if (orgSites.length === 0) { - mergeWideEvent({ insights_websites: 0 }); - return { success: true, insights: [], source: "ai" }; - } - - try { - const period = getWeekOverWeekPeriod(); - const dedupeKeyToId = - await fetchInsightDedupeKeyToIdMap(organizationId); - const groups = await processInBatches( - orgSites.slice(0, MAX_WEBSITES), - async (site: { id: string; name: string | null; domain: string }) => { - const results = await analyzeWebsite( - organizationId, - userId, - site.id, - site.domain, - timezone, - period, - orgSites, - requestHeaders - ); - return results.map( - (insight): WebsiteInsight => ({ - ...insight, - id: crypto.randomUUID(), - websiteId: site.id, - websiteName: site.name, - websiteDomain: site.domain, - link: buildInsightLink(site.id, insight), - }) - ); - }, - CONCURRENCY - ); - - const merged = groups.flat().sort((a, b) => b.priority - a.priority); - const seenInBatch = new Set(); - const sorted: WebsiteInsight[] = []; - for (const insight of merged) { - const key = dedupeKeyFor(insight); - if (seenInBatch.has(key)) { - continue; - } - seenInBatch.add(key); - const existingId = dedupeKeyToId.get(key); - sorted.push(existingId ? { ...insight, id: existingId } : insight); - if (sorted.length >= TOP_INSIGHTS_LIMIT) { - break; - } - } - - const runId = crypto.randomUUID(); - let finalInsights: WebsiteInsight[] = sorted; - if (sorted.length > 0) { - const toInsert = sorted - .filter((insight) => { - const existingId = dedupeKeyToId.get(dedupeKeyFor(insight)); - return !(existingId && insight.id === existingId); - }) - .map((insight) => ({ - id: insight.id, - organizationId, - websiteId: insight.websiteId, - runId, - title: insight.title, - description: insight.description, - suggestion: insight.suggestion, - severity: insight.severity, - sentiment: insight.sentiment, - type: insight.type, - priority: insight.priority, - changePercent: insight.changePercent ?? null, - subjectKey: insight.subjectKey, - sources: insight.sources, - confidence: insight.confidence, - impactSummary: insight.impactSummary ?? null, - metrics: insight.metrics.length > 0 ? insight.metrics : null, - timezone, - currentPeriodFrom: period.current.from, - currentPeriodTo: period.current.to, - previousPeriodFrom: period.previous.from, - previousPeriodTo: period.previous.to, - })); - - const updatePayload = { - runId, - timezone, - currentPeriodFrom: period.current.from, - currentPeriodTo: period.current.to, - previousPeriodFrom: period.previous.from, - previousPeriodTo: period.previous.to, - createdAt: new Date(), - }; - - try { - if (toInsert.length > 0) { - await db.insert(analyticsInsights).values(toInsert); - } - const toRefresh = sorted.filter((insight) => { - const existingId = dedupeKeyToId.get(dedupeKeyFor(insight)); - return existingId !== undefined && insight.id === existingId; - }); - await Promise.all( - toRefresh.map((insight) => - db - .update(analyticsInsights) - .set({ - ...updatePayload, - title: insight.title, - description: insight.description, - suggestion: insight.suggestion, - severity: insight.severity, - sentiment: insight.sentiment, - type: insight.type, - priority: insight.priority, - changePercent: insight.changePercent ?? null, - subjectKey: insight.subjectKey, - sources: insight.sources, - confidence: insight.confidence, - impactSummary: insight.impactSummary ?? null, - metrics: - insight.metrics.length > 0 ? insight.metrics : null, - }) - .where(eq(analyticsInsights.id, insight.id)) - ) - ); - } catch (error) { - useLogger().warn("Failed to persist analytics insights", { - insights: { organizationId, error }, - }); - finalInsights = []; - mergeWideEvent({ insights_persist_failed: true }); - } - - if (finalInsights.length > 0) { - await invalidateInsightsCacheForOrg(organizationId); - } - - await Promise.all( - [...new Set(finalInsights.map((insight) => insight.websiteId))].map( - (websiteId) => - invalidateAgentContextSnapshotsForWebsite(websiteId) - ) - ); - } - - for (const site of orgSites.slice(0, MAX_WEBSITES)) { - const siteInsights = finalInsights.filter( - (s) => s.websiteId === site.id - ); - if (siteInsights.length > 0) { - const summary = siteInsights - .map( - (s) => - `[${s.severity}] ${s.title}: ${s.description} Suggestion: ${s.suggestion}` - ) - .join("\n"); - storeAnalyticsSummary( - `Weekly insights for ${site.domain} (${dayjs().format("YYYY-MM-DD")}):\n${summary}`, - site.id, - { period: "weekly" } - ).catch((error: unknown) => { - useLogger().warn("Failed to store analytics summary", { - insights: { websiteId: site.id, error }, - }); - }); - } - } - - const payload: InsightsPayload = { - insights: finalInsights, - source: "ai", - }; - - tryCacheSet( - redis, - cacheKey, - finalInsights.length > 0 ? CACHE_TTL : NEGATIVE_CACHE_TTL, - payload - ); - - mergeWideEvent({ - insights_returned: finalInsights.length, - insights_source: "ai", - }); - return { success: true, ...payload }; - } catch (error) { - mergeWideEvent({ insights_error: true }); - useLogger().error( - error instanceof Error ? error : new Error(String(error)), - { insights: { organizationId } } - ); - return { success: false, insights: [], source: "fallback" }; - } - }, - { - body: t.Object({ - organizationId: t.String(), - timezone: t.Optional(t.String()), - }), - idleTimeout: 240_000, - } - ); diff --git a/apps/api/src/routes/query.ts b/apps/api/src/routes/query.ts index 56b5d90c7..f572cf960 100644 --- a/apps/api/src/routes/query.ts +++ b/apps/api/src/routes/query.ts @@ -8,6 +8,7 @@ import { isApiKeyPresent, } from "@databuddy/api-keys/resolve"; import { db } from "@databuddy/db"; +import { readBooleanEnv } from "@databuddy/env/boolean"; import { ratelimit } from "@databuddy/redis/rate-limit"; import { getBillingOwner, @@ -20,7 +21,10 @@ import { } from "@databuddy/shared/types/features"; import type { CustomQueryRequest } from "@databuddy/ai/query/custom-query-types"; import { compileQuery, executeBatch } from "@databuddy/ai/query"; -import { QueryBuilders } from "@databuddy/ai/query/builders"; +import { + canReadQueryTypesPublicly, + QueryBuilders, +} from "@databuddy/ai/query/builders"; import { executeCustomQuery } from "@databuddy/ai/query/custom-query-builder"; import { isNormalizedQueryDate, @@ -30,7 +34,6 @@ import type { Filter, QueryRequest } from "@databuddy/ai/query/types"; import { Elysia, t } from "elysia"; import { getAccessibleWebsites } from "../lib/accessible-websites"; import { resolveDatePreset } from "../lib/date-presets"; -import { isPublicQueryAccess } from "../lib/public-query-access"; import { mergeWideEvent } from "../lib/tracing"; import { getCachedWebsiteDomain, getWebsiteDomain } from "../lib/website-utils"; import { @@ -41,6 +44,47 @@ import { type DynamicQueryRequestType, } from "../schemas/query-schemas"; +const parsedPerWebsiteQueryConcurrency = Number( + process.env.PER_WEBSITE_QUERY_CONCURRENCY ?? 8 +); +const PER_WEBSITE_QUERY_CONCURRENCY = Number.isFinite( + parsedPerWebsiteQueryConcurrency +) + ? Math.max(1, parsedPerWebsiteQueryConcurrency) + : 8; + +interface KeyedSemaphore { + active: number; + queue: Array<() => void>; +} + +const websiteSemaphores = new Map(); + +async function runPerWebsite(key: string, fn: () => Promise): Promise { + let sem = websiteSemaphores.get(key); + if (!sem) { + sem = { active: 0, queue: [] }; + websiteSemaphores.set(key, sem); + } + while (sem.active >= PER_WEBSITE_QUERY_CONCURRENCY) { + await new Promise((resolve) => { + (sem as KeyedSemaphore).queue.push(resolve); + }); + } + sem.active++; + try { + return await fn(); + } finally { + sem.active--; + const next = sem.queue.shift(); + if (next) { + next(); + } else if (sem.active === 0 && sem.queue.length === 0) { + websiteSemaphores.delete(key); + } + } +} + const DEFAULT_ALLOWED_FILTERS = [ "path", "query_string", @@ -328,6 +372,10 @@ async function enforceQueryRateLimit( requestId: string, request: Request ): Promise { + if (readBooleanEnv("DATABUDDY_E2E_MODE")) { + return null; + } + const principal = ctx.apiKey ? `apikey:${ctx.apiKey.id}` : ctx.user @@ -479,7 +527,7 @@ async function verifyWebsiteAccess( return false; } - if (website.isPublic && isPublicQueryAccess(queryTypes)) { + if (website.isPublic && canReadQueryTypesPublicly(queryTypes)) { mergeWideEvent({ access_result: "public_query" }); return true; } @@ -948,9 +996,11 @@ async function executeDynamicQuery( } if (validParameters.length > 0) { - const results = await executeBatch( - validParameters.map((v) => v.request), - { websiteDomain: domain, timezone } + const results = await runPerWebsite(projectId, () => + executeBatch( + validParameters.map((v) => v.request), + { websiteDomain: domain, timezone } + ) ); for (let i = 0; i < validParameters.length; i++) { diff --git a/apps/basket/src/lib/blocked-traffic-alerts.test.ts b/apps/basket/src/lib/blocked-traffic-alerts.test.ts index 8ccf7a2fb..302c3671b 100644 --- a/apps/basket/src/lib/blocked-traffic-alerts.test.ts +++ b/apps/basket/src/lib/blocked-traffic-alerts.test.ts @@ -2,6 +2,7 @@ import { describe, expect, test } from "vitest"; import { decideBlockedTrafficAlert, matchesTrackingAlertIgnoredOrigin, + shouldEvaluateBlockedTrafficAlert, shouldIgnoreBlockedTrafficAlertEvent, } from "./blocked-traffic-alerts"; @@ -138,4 +139,13 @@ describe("blocked traffic alert rules", () => { }) ).toEqual({ kind: "blocked_spike", severity: "warning" }); }); + + test("continues checking spikes after the first spike threshold", () => { + expect(shouldEvaluateBlockedTrafficAlert(3)).toBe(true); + expect(shouldEvaluateBlockedTrafficAlert(4)).toBe(false); + expect(shouldEvaluateBlockedTrafficAlert(24)).toBe(false); + expect(shouldEvaluateBlockedTrafficAlert(25)).toBe(true); + expect(shouldEvaluateBlockedTrafficAlert(50)).toBe(true); + expect(shouldEvaluateBlockedTrafficAlert(75)).toBe(true); + }); }); diff --git a/apps/basket/src/lib/blocked-traffic-alerts.ts b/apps/basket/src/lib/blocked-traffic-alerts.ts index 84fd135f5..90547495d 100644 --- a/apps/basket/src/lib/blocked-traffic-alerts.ts +++ b/apps/basket/src/lib/blocked-traffic-alerts.ts @@ -12,6 +12,7 @@ import { getTrackingBlockOriginHost, isActionableTrackingBlockReason, isIgnoredTrackingBlockOrigin, + matchesTrackingBlockIgnoredOrigin, } from "@databuddy/shared/tracking-blocks"; import { captureError } from "@lib/tracing"; @@ -22,7 +23,6 @@ const ZERO_TRACKING_BLOCK_THRESHOLD = 3; const BLOCKED_SPIKE_THRESHOLD = 25; const MIN_BASELINE_EVENTS = 5; const SPIKE_MULTIPLIER = 3; -const TRAILING_DOT_REGEX = /\.$/; export interface BlockedTrafficAlertContext { organizationId?: string | null; @@ -31,12 +31,12 @@ export interface BlockedTrafficAlertContext { websiteName?: string | null; } -interface TrackingHealthRow { +interface TrackingHealthCounts { baselineEvents: number; recentEvents: number; } -interface PreviousBlockedRow { +interface PreviousBlockedCountRow { previousBlocked: number; } @@ -45,53 +45,25 @@ export interface BlockedTrafficAlertDecision { severity: "critical" | "warning"; } -function alertOrigin(event: BlockedTraffic): string { +function getAlertOrigin(event: BlockedTraffic): string { return event.origin?.trim() || ""; } -function alertGroup(event: BlockedTraffic): string { - return encodeURIComponent(alertOrigin(event) || "missing-origin"); +function getAlertOriginKey(event: BlockedTraffic): string { + return encodeURIComponent(getAlertOrigin(event) || "missing-origin"); } -function eventSource( +function getBlockedTrafficSource( event: Pick ): string | null { return event.origin || event.referrer || null; } -function normalizedPatternHost(pattern: string): string | null { - const value = pattern.trim().toLowerCase(); - if (!value) { - return null; - } - if (value.startsWith("*.")) { - return value.slice(2); - } - return ( - getTrackingBlockOriginHost(value) ?? value.replace(TRAILING_DOT_REGEX, "") - ); -} - export function matchesTrackingAlertIgnoredOrigin( source: string | null, patterns: string[] ): boolean { - const host = - getTrackingBlockOriginHost(source) ?? source?.trim().toLowerCase(); - if (!host) { - return false; - } - - return patterns.some((pattern) => { - const normalized = normalizedPatternHost(pattern); - if (!normalized) { - return false; - } - if (pattern.trim().startsWith("*.")) { - return host.endsWith(`.${normalized}`); - } - return host === normalized; - }); + return matchesTrackingBlockIgnoredOrigin(source, patterns); } export function shouldIgnoreBlockedTrafficAlertEvent( @@ -106,10 +78,10 @@ export function shouldIgnoreBlockedTrafficAlertEvent( return true; } - return isIgnoredTrackingBlockOrigin(eventSource(event)); + return isIgnoredTrackingBlockOrigin(getBlockedTrafficSource(event)); } -function buildFix(event: BlockedTraffic): string { +function buildRecommendedFix(event: BlockedTraffic): string { const host = getTrackingBlockOriginHost(event.origin ?? null); if (event.block_reason === "origin_not_authorized") { return host @@ -124,13 +96,13 @@ function buildFix(event: BlockedTraffic): string { return "Update the website IP allowlist or remove the restriction if browser traffic should be accepted from dynamic client IPs."; } -function dashboardUrl(clientId: string, reason: string): string { +function buildDashboardUrl(clientId: string, reason: string): string { const section = reason === "origin_not_authorized" ? "general" : "security"; return `${config.urls.dashboard}/websites/${clientId}/settings/${section}`; } async function incrementWindowCounter(event: BlockedTraffic): Promise { - const key = `blocked-traffic-alert:count:${event.client_id}:${event.block_reason}:${alertGroup(event)}`; + const key = `blocked-traffic-alert:count:${event.client_id}:${event.block_reason}:${getAlertOriginKey(event)}`; const count = await redis.incr(key); if (count === 1) { await redis.expire(key, ALERT_WINDOW_MINUTES * 60); @@ -138,8 +110,10 @@ async function incrementWindowCounter(event: BlockedTraffic): Promise { return count; } -async function getTrackingHealth(clientId: string): Promise { - const rows = await chQuery( +async function getTrackingHealth( + clientId: string +): Promise { + const rows = await chQuery( `SELECT countIf(event_name = 'screen_view' AND time >= now() - INTERVAL ${RECENT_SUCCESS_MINUTES} MINUTE) AS recentEvents, countIf(event_name = 'screen_view' AND time >= now() - INTERVAL ${BASELINE_SUCCESS_HOURS} HOUR AND time < now() - INTERVAL ${RECENT_SUCCESS_MINUTES} MINUTE) AS baselineEvents @@ -151,7 +125,7 @@ async function getTrackingHealth(clientId: string): Promise { } async function getPreviousBlockedCount(event: BlockedTraffic): Promise { - const rows = await chQuery( + const rows = await chQuery( `SELECT count() AS previousBlocked FROM analytics.blocked_traffic PREWHERE timestamp >= now() - INTERVAL ${ALERT_WINDOW_MINUTES * 2} MINUTE @@ -161,7 +135,7 @@ async function getPreviousBlockedCount(event: BlockedTraffic): Promise { AND ifNull(origin, '') = {origin:String}`, { clientId: event.client_id, - origin: alertOrigin(event), + origin: getAlertOrigin(event), reason: event.block_reason, } ); @@ -193,11 +167,20 @@ export function decideBlockedTrafficAlert(input: { return null; } +export function shouldEvaluateBlockedTrafficAlert( + windowBlockedCount: number +): boolean { + return ( + windowBlockedCount === ZERO_TRACKING_BLOCK_THRESHOLD || + windowBlockedCount >= BLOCKED_SPIKE_THRESHOLD + ); +} + function cooldownKey( event: BlockedTraffic, kind: BlockedTrafficAlertDecision["kind"] ): string { - return `blocked-traffic-alert:sent:${event.client_id}:${event.block_reason}:${alertGroup(event)}:${kind}`; + return `blocked-traffic-alert:sent:${event.client_id}:${event.block_reason}:${getAlertOriginKey(event)}:${kind}`; } async function reserveCooldown( @@ -238,7 +221,7 @@ async function getOrganizationEmailSettings( return normalizeEmailNotificationSettings(row?.emailNotifications); } -function shouldSkipForSettings(input: { +function isAlertMutedBySettings(input: { decision: BlockedTrafficAlertDecision; event: BlockedTraffic; settings: EmailNotificationSettings; @@ -261,7 +244,7 @@ function shouldSkipForSettings(input: { return true; } return matchesTrackingAlertIgnoredOrigin( - eventSource(input.event), + getBlockedTrafficSource(input.event), tracking.ignoredOrigins ); } @@ -270,10 +253,10 @@ async function sendAlertEmail(input: { context: BlockedTrafficAlertContext; decision: BlockedTrafficAlertDecision; event: BlockedTraffic; - health: TrackingHealthRow; + trackingHealth: TrackingHealthCounts; ownerEmail: string; previousBlocked: number; - windowCount: number; + windowBlockedCount: number; }): Promise { const apiKey = process.env.RESEND_API_KEY; if (!apiKey) { @@ -292,18 +275,18 @@ async function sendAlertEmail(input: { const html = await render( BlockedTrafficAlertEmail({ - baselineEvents: input.health.baselineEvents, + baselineEvents: input.trackingHealth.baselineEvents, baselineHours: BASELINE_SUCCESS_HOURS, blockReason: input.event.block_reason, - blockedCount: input.windowCount, - dashboardUrl: dashboardUrl( + blockedCount: input.windowBlockedCount, + dashboardUrl: buildDashboardUrl( input.event.client_id || "", input.event.block_reason ), - fix: buildFix(input.event), + fix: buildRecommendedFix(input.event), origin: input.event.origin ?? null, previousBlockedCount: input.previousBlocked, - recentEvents: input.health.recentEvents, + recentEvents: input.trackingHealth.recentEvents, severity: input.decision.severity, siteLabel, windowMinutes: ALERT_WINDOW_MINUTES, @@ -343,32 +326,27 @@ async function maybeSendBlockedTrafficAlertAsync( return; } - const count = await incrementWindowCounter(event); - if ( - !( - count === ZERO_TRACKING_BLOCK_THRESHOLD || - count === BLOCKED_SPIKE_THRESHOLD - ) - ) { + const windowBlockedCount = await incrementWindowCounter(event); + if (!shouldEvaluateBlockedTrafficAlert(windowBlockedCount)) { return; } - const [health, previousBlocked] = await Promise.all([ + const [trackingHealth, previousBlocked] = await Promise.all([ getTrackingHealth(event.client_id || ""), getPreviousBlockedCount(event), ]); const decision = decideBlockedTrafficAlert({ - baselineEvents: health.baselineEvents, - count, + baselineEvents: trackingHealth.baselineEvents, + count: windowBlockedCount, previousBlocked, - recentEvents: health.recentEvents, + recentEvents: trackingHealth.recentEvents, }); if (!decision) { return; } const settings = await getOrganizationEmailSettings(context.organizationId); - if (shouldSkipForSettings({ decision, event, settings })) { + if (isAlertMutedBySettings({ decision, event, settings })) { return; } @@ -387,15 +365,13 @@ async function maybeSendBlockedTrafficAlertAsync( context, decision, event, - health, + trackingHealth, ownerEmail: owner.email, previousBlocked, - windowCount: count, + windowBlockedCount, }); } catch (error) { - await redis.del(reservedKey).catch(() => { - // Cooldown cleanup is best-effort; the alert evaluator must not throw twice. - }); + await redis.del(reservedKey).catch(() => undefined); throw error; } } diff --git a/apps/dashboard/app/(main)/home/_components/smart-insights-section.tsx b/apps/dashboard/app/(main)/home/_components/smart-insights-section.tsx index 424d9ef2a..c9c7d02c9 100644 --- a/apps/dashboard/app/(main)/home/_components/smart-insights-section.tsx +++ b/apps/dashboard/app/(main)/home/_components/smart-insights-section.tsx @@ -11,7 +11,7 @@ import { LightbulbIcon, WarningCircleIcon, } from "@databuddy/ui/icons"; -import { Card, Skeleton } from "@databuddy/ui"; +import { Button, Card, Skeleton } from "@databuddy/ui"; function InsightRowWrapper({ insight }: { insight: Insight }) { const [expanded, setExpanded] = useState(false); @@ -101,13 +101,14 @@ function InsightsErrorState({ onRetryAction }: { onRetryAction?: () => void }) {

{onRetryAction && ( - + )} ); @@ -193,17 +194,18 @@ export function SmartInsightsSection({ View all {onRefreshAction && ( - + )} diff --git a/apps/dashboard/app/(main)/insights/_components/cockpit-narrative.tsx b/apps/dashboard/app/(main)/insights/_components/cockpit-narrative.tsx index 9ff107ad4..91aec5e2e 100644 --- a/apps/dashboard/app/(main)/insights/_components/cockpit-narrative.tsx +++ b/apps/dashboard/app/(main)/insights/_components/cockpit-narrative.tsx @@ -1,6 +1,7 @@ "use client"; import { useAtomValue } from "jotai"; +import { Streamdown } from "streamdown"; import { cn } from "@/lib/utils"; import { useOrgNarrative } from "../hooks/use-org-narrative"; import { insightsRangeAtom } from "../lib/time-range"; @@ -67,9 +68,9 @@ export function CockpitNarrative() { )} {!(isLoading || isError) && data && data.success && ( -

+ {data.narrative} -

+ )} {!(isLoading || isError) && data && !data.success && ( diff --git a/apps/dashboard/app/(main)/insights/_components/insight-generation-settings.tsx b/apps/dashboard/app/(main)/insights/_components/insight-generation-settings.tsx new file mode 100644 index 000000000..0350e6ecc --- /dev/null +++ b/apps/dashboard/app/(main)/insights/_components/insight-generation-settings.tsx @@ -0,0 +1,604 @@ +"use client"; + +import { useMutation, useQuery, useQueryClient } from "@tanstack/react-query"; +import { useEffect, useMemo, useState } from "react"; +import { toast } from "sonner"; +import { insightQueries } from "@/lib/insight-api"; +import { orpc } from "@/lib/orpc"; +import { + CaretUpDownIcon, + FloppyDiskIcon, + GearIcon, + MediaPlayIcon, +} from "@databuddy/ui/icons"; +import { Button, Field, Input, Skeleton, guessTimezone } from "@databuddy/ui"; +import { + Accordion, + Popover, + SearchList, + Sheet, + Switch, +} from "@databuddy/ui/client"; + +type Depth = "light" | "standard" | "deep"; +type Frequency = "hourly" | "daily" | "weekly" | "custom"; +type ModelTier = "fast" | "balanced" | "deep"; +type ToolName = + | "web_metrics" + | "product_metrics" + | "ops_context" + | "business_context"; + +interface WebsiteOption { + domain: string; + id: string; + name: string | null; +} + +interface ConfigFormState { + allowedTools: ToolName[]; + cooldownHours: string; + cron: string; + depth: Depth; + enabled: boolean; + frequency: Frequency; + lookbackDays: string; + maxInsightsPerWebsite: string; + maxSteps: string; + maxToolCalls: string; + modelTier: ModelTier; + timezone: string; +} + +interface InsightGenerationSettingsProps { + organizationId?: string; + websites: WebsiteOption[]; +} + +const DEFAULT_FORM: ConfigFormState = { + allowedTools: ["web_metrics", "product_metrics", "ops_context"], + cooldownHours: "6", + cron: "", + depth: "standard", + enabled: true, + frequency: "weekly", + lookbackDays: "7", + maxInsightsPerWebsite: "3", + maxSteps: "24", + maxToolCalls: "16", + modelTier: "balanced", + timezone: "UTC", +}; + +const FREQUENCY_OPTIONS: { label: string; value: Frequency }[] = [ + { label: "Hourly", value: "hourly" }, + { label: "Daily", value: "daily" }, + { label: "Weekly", value: "weekly" }, + { label: "Custom", value: "custom" }, +]; + +const QUALITY_PRESETS: { depth: Depth; label: string; modelTier: ModelTier }[] = + [ + { depth: "light", label: "Fast", modelTier: "fast" }, + { depth: "standard", label: "Balanced", modelTier: "balanced" }, + { depth: "deep", label: "Thorough", modelTier: "deep" }, + ]; + +const TOOL_OPTIONS: { label: string; value: ToolName }[] = [ + { label: "Web metrics", value: "web_metrics" }, + { label: "Product metrics", value: "product_metrics" }, + { label: "Ops context", value: "ops_context" }, + { label: "Business context", value: "business_context" }, +]; + +export function InsightGenerationSettings({ + organizationId, + websites, +}: InsightGenerationSettingsProps) { + const queryClient = useQueryClient(); + const [open, setOpen] = useState(false); + const [form, setForm] = useState(DEFAULT_FORM); + + const configQuery = useQuery({ + ...orpc.insightGeneration.getConfig.queryOptions({ + input: { organizationId }, + }), + enabled: !!organizationId, + }); + + useEffect(() => { + const config = configQuery.data; + if (!config) { + return; + } + setForm({ + allowedTools: normalizeTools(config.allowedTools as ToolName[]), + cooldownHours: String(config.cooldownHours), + cron: config.cron ?? "", + depth: config.depth as Depth, + enabled: config.enabled, + frequency: normalizeFrequency(config.frequency), + lookbackDays: String(config.lookbackDays), + maxInsightsPerWebsite: String(config.maxInsightsPerWebsite), + maxSteps: String(config.maxSteps), + maxToolCalls: String(config.maxToolCalls), + modelTier: config.modelTier as ModelTier, + timezone: config.timezone || guessTimezone(), + }); + }, [configQuery.data]); + + const saveMutation = useMutation({ + ...orpc.insightGeneration.upsertConfig.mutationOptions(), + onSuccess: async () => { + toast.success("Settings saved"); + await invalidateInsightGenerationQueries(queryClient, organizationId); + setOpen(false); + }, + onError: (error) => { + toast.error( + error instanceof Error ? error.message : "Could not save settings" + ); + }, + }); + + const triggerMutation = useMutation({ + ...orpc.insightGeneration.triggerRun.mutationOptions(), + onSuccess: async (data) => { + if (data.status === "queued") { + toast.success( + `Queued ${data.queuedItems} insight job${data.queuedItems === 1 ? "" : "s"}` + ); + } else if (data.status === "disabled") { + toast.info("Insight generation is disabled"); + } else { + toast.success("No websites available to run"); + } + await invalidateInsightGenerationQueries(queryClient, organizationId); + setOpen(false); + }, + onError: (error) => { + toast.error( + error instanceof Error ? error.message : "Could not start run" + ); + }, + }); + + const isBusy = + configQuery.isLoading || + saveMutation.isPending || + triggerMutation.isPending; + + const activeQuality = useMemo( + () => + QUALITY_PRESETS.find( + (p) => p.depth === form.depth && p.modelTier === form.modelTier + ) ?? QUALITY_PRESETS[1], + [form.depth, form.modelTier] + ); + + const handleSave = () => { + saveMutation.mutate({ + ...formToPatch(form), + organizationId, + }); + }; + + const handleRun = () => { + triggerMutation.mutate({ + ...formToPatch(form), + force: true, + organizationId, + websiteIds: websites.map((w) => w.id), + }); + }; + + return ( + + + + + } + /> + + + Insight generation + + Configure how and when insights are generated. + + + + + {configQuery.isLoading ? ( +
+ + + +
+ ) : ( + <> +
+
+

Enabled

+

+ Automatically generate insights on schedule +

+
+ + setForm((c) => ({ ...c, enabled: Boolean(value) })) + } + /> +
+ +
+

Frequency

+
+ {FREQUENCY_OPTIONS.map((opt) => ( + + ))} +
+
+ +
+

Quality

+
+ {QUALITY_PRESETS.map((preset) => ( + + ))} +
+
+ + + + + Advanced + + +
+ + Timezone + + setForm((c) => ({ ...c, timezone: tz })) + } + value={form.timezone} + /> + + {form.frequency === "custom" ? ( + + Cron + + setForm((c) => ({ + ...c, + cron: e.target.value, + })) + } + value={form.cron} + /> + + ) : null} +
+ + Lookback (days) + + setForm((c) => ({ + ...c, + lookbackDays: e.target.value, + })) + } + type="number" + value={form.lookbackDays} + /> + + + Cooldown (hours) + + setForm((c) => ({ + ...c, + cooldownHours: e.target.value, + })) + } + type="number" + value={form.cooldownHours} + /> + + + Max insights/site + + setForm((c) => ({ + ...c, + maxInsightsPerWebsite: e.target.value, + })) + } + type="number" + value={form.maxInsightsPerWebsite} + /> + + + Max steps + + setForm((c) => ({ + ...c, + maxSteps: e.target.value, + })) + } + type="number" + value={form.maxSteps} + /> + + + Max tool calls + + setForm((c) => ({ + ...c, + maxToolCalls: e.target.value, + })) + } + type="number" + value={form.maxToolCalls} + /> + +
+
+

Signals

+
+ {TOOL_OPTIONS.map((tool) => { + const selected = form.allowedTools.includes( + tool.value + ); + return ( + + ); + })} +
+
+
+
+
+ + )} +
+ + + + + +
+
+ ); +} + +function normalizeTools(tools: ToolName[]): ToolName[] { + const unique = new Set(tools); + unique.add("web_metrics"); + return TOOL_OPTIONS.map((t) => t.value).filter((t) => unique.has(t)); +} + +function normalizeFrequency(frequency: string): Frequency { + return frequency === "hourly" || + frequency === "daily" || + frequency === "weekly" || + frequency === "custom" + ? frequency + : "weekly"; +} + +function toggleTool( + current: ToolName[], + tool: ToolName, + enabled: boolean +): ToolName[] { + if (tool === "web_metrics") { + return normalizeTools(current); + } + const next = new Set(current); + if (enabled) { + next.add(tool); + } else { + next.delete(tool); + } + next.add("web_metrics"); + return normalizeTools([...next]); +} + +function boundedInt( + value: string, + fallback: number, + min: number, + max: number +): number { + const parsed = Number.parseInt(value, 10); + if (!Number.isSafeInteger(parsed)) { + return fallback; + } + return Math.max(min, Math.min(max, parsed)); +} + +function formToPatch(form: ConfigFormState) { + return { + allowedTools: normalizeTools(form.allowedTools), + cooldownHours: boundedInt(form.cooldownHours, 6, 1, 168), + cron: form.frequency === "custom" ? form.cron.trim() || null : null, + depth: form.depth, + enabled: form.enabled, + frequency: form.frequency, + lookbackDays: boundedInt(form.lookbackDays, 7, 1, 90), + maxInsightsPerWebsite: boundedInt(form.maxInsightsPerWebsite, 3, 1, 10), + maxSteps: boundedInt(form.maxSteps, 24, 1, 64), + maxToolCalls: boundedInt(form.maxToolCalls, 16, 1, 64), + modelTier: form.modelTier, + timezone: form.timezone.trim() || guessTimezone(), + }; +} + +async function invalidateInsightGenerationQueries( + queryClient: ReturnType, + organizationId?: string +) { + await Promise.all([ + queryClient.invalidateQueries({ queryKey: orpc.insightGeneration.key() }), + queryClient.invalidateQueries({ queryKey: insightQueries.all() }), + organizationId + ? queryClient.invalidateQueries({ + queryKey: insightQueries.ai(organizationId).queryKey, + }) + : Promise.resolve(), + ]); +} + +const TIMEZONES: string[] = Intl.supportedValuesOf("timeZone"); + +function TimezonePicker({ + disabled, + onChange, + value, +}: { + disabled: boolean; + onChange: (tz: string) => void; + value: string; +}) { + const [open, setOpen] = useState(false); + + return ( + + + + {value || guessTimezone()} + + + + } + /> + + + + + No timezone found. + {TIMEZONES.map((tz) => ( + { + onChange(tz); + setOpen(false); + }} + value={tz} + > + {tz.replace(/_/g, " ")} + + ))} + + + + + ); +} diff --git a/apps/dashboard/app/(main)/insights/_components/insights-page-content.tsx b/apps/dashboard/app/(main)/insights/_components/insights-page-content.tsx index 67d8c1e71..e054939c0 100644 --- a/apps/dashboard/app/(main)/insights/_components/insights-page-content.tsx +++ b/apps/dashboard/app/(main)/insights/_components/insights-page-content.tsx @@ -17,6 +17,7 @@ import { orpc } from "@/lib/orpc"; import { cn } from "@/lib/utils"; import { CockpitNarrative } from "./cockpit-narrative"; import { CockpitSignals } from "./cockpit-signals"; +import { InsightGenerationSettings } from "./insight-generation-settings"; import { TimeRangeSelector } from "./time-range-selector"; import { ArrowClockwiseIcon, GlobeIcon, TrashIcon } from "@databuddy/ui/icons"; import { DeleteDialog } from "@databuddy/ui/client"; @@ -126,12 +127,16 @@ export function InsightsPageContent() { Clear all + {hasNoWebsites ? ( ) : ( -
+
diff --git a/apps/dashboard/app/(main)/insights/hooks/use-insights-feed.ts b/apps/dashboard/app/(main)/insights/hooks/use-insights-feed.ts index dde577d95..6d0c5639d 100644 --- a/apps/dashboard/app/(main)/insights/hooks/use-insights-feed.ts +++ b/apps/dashboard/app/(main)/insights/hooks/use-insights-feed.ts @@ -99,6 +99,7 @@ export function useInsightsFeed() { return { insights: mergedInsights, + generation: aiQuery.data?.generation, source: aiQuery.data?.source ?? null, isLoading: isInitialLoading, isRefreshing, diff --git a/apps/dashboard/app/(main)/websites/[id]/_components/filters/filters-section.tsx b/apps/dashboard/app/(main)/websites/[id]/_components/filters/filters-section.tsx index b5801f8f6..ad9a0fda8 100644 --- a/apps/dashboard/app/(main)/websites/[id]/_components/filters/filters-section.tsx +++ b/apps/dashboard/app/(main)/websites/[id]/_components/filters/filters-section.tsx @@ -136,28 +136,33 @@ export function FiltersSection() {
- {filters.map((filter, index) => ( -
- {getFieldLabel(filter.field)} - - {getOperatorLabel(filter.operator)} - - - {formatFilterValue(filter.value)} - - -
- ))} + {fieldLabel} + {operatorLabel} + + {valueLabel} + + + + ); + })}
diff --git a/apps/dashboard/app/(main)/websites/[id]/layout.tsx b/apps/dashboard/app/(main)/websites/[id]/layout.tsx index 9a4b541cc..27d7b7635 100644 --- a/apps/dashboard/app/(main)/websites/[id]/layout.tsx +++ b/apps/dashboard/app/(main)/websites/[id]/layout.tsx @@ -1,11 +1,11 @@ "use client"; -import { useQueryClient } from "@tanstack/react-query"; +import { useMutation, useQueryClient } from "@tanstack/react-query"; import { useAtom, useSetAtom } from "jotai"; import Link from "next/link"; import { useParams, usePathname } from "next/navigation"; import { parseAsBoolean, parseAsString, useQueryState } from "nuqs"; -import { useEffect, useMemo } from "react"; +import { useCallback, useEffect, useRef } from "react"; import { toast } from "sonner"; import { NoticeBanner } from "@/app/(main)/websites/_components/notice-banner"; import { LiveUserIndicator } from "@/components/analytics"; @@ -15,12 +15,13 @@ import { batchDynamicQueryKeys, dynamicQueryKeys, } from "@/hooks/use-dynamic-query"; -import { useWebsite } from "@/hooks/use-websites"; +import { updateWebsiteCache, useWebsite } from "@/hooks/use-websites"; import { DASHBOARD_FILTERS_QUERY_PARAM, parseDashboardFiltersParam, serializeDashboardFilters, } from "@/lib/dashboard-navigation-actions"; +import { orpc } from "@/lib/orpc"; import { cn } from "@/lib/utils"; import { addDynamicFilterAtom, @@ -35,8 +36,12 @@ import { FiltersSection } from "./_components/filters/filters-section"; import { SavedFiltersToolbar } from "./_components/filters/saved-filters-toolbar"; import { WebsiteTrackingSetupTab } from "./_components/tabs/tracking-setup-tab"; import { useTrackingSetup } from "./hooks/use-tracking-setup"; -import { ArrowClockwiseIcon, WarningCircleIcon } from "@databuddy/ui/icons"; -import { Button } from "@databuddy/ui"; +import { Button, usePersistentState } from "@databuddy/ui"; +import { + ArrowClockwiseIcon, + WarningCircleIcon, + XMarkIcon, +} from "@databuddy/ui/icons"; const ROUTES_WITHOUT_ANALYTICS_TOOLBAR = new Set([ "agent", @@ -47,6 +52,34 @@ const ROUTES_WITHOUT_ANALYTICS_TOOLBAR = new Set([ "settings", "users", ]); +const TRACKING_ISSUE_DISMISS_MS = 24 * 60 * 60 * 1000; +const TRACKING_ISSUE_ACTION_CLASS = + "h-7 rounded border border-border/70 bg-background px-2.5 font-medium text-foreground shadow-xs hover:bg-accent hover:text-foreground"; +const TRACKING_ISSUE_ICON_ACTION_CLASS = + "size-7 rounded text-muted-foreground hover:bg-accent hover:text-foreground"; + +function readStringSettingList( + settings: unknown, + key: "allowedOrigins" | "ignoredTrackingOrigins" +): string[] { + if (!settings || typeof settings !== "object" || Array.isArray(settings)) { + return []; + } + const value = (settings as Record)[key]; + return Array.isArray(value) + ? value.filter((item): item is string => typeof item === "string") + : []; +} + +function appendUniqueString(values: string[], value: string): string[] { + const normalized = value.trim().toLowerCase(); + if (!normalized) { + return values; + } + return values.some((item) => item.trim().toLowerCase() === normalized) + ? values + : [...values, normalized]; +} function shouldHideAnalyticsToolbar( pathname: string, @@ -82,13 +115,11 @@ export default function WebsiteLayout({ children }: WebsiteLayoutProps) { parseAsString ); const [, addFilter] = useAtom(addDynamicFilterAtom); - const serializedDynamicFilters = useMemo( - () => - dynamicFilters.length > 0 - ? serializeDashboardFilters(dynamicFilters) - : null, - [dynamicFilters] - ); + const skipNextFilterUrlSync = useRef(false); + const serializedDynamicFilters = + dynamicFilters.length > 0 + ? serializeDashboardFilters(dynamicFilters) + : null; useEffect(() => { setCurrentFilterWebsiteId(websiteId); @@ -98,23 +129,22 @@ export default function WebsiteLayout({ children }: WebsiteLayoutProps) { const parsedFilters = parseDashboardFiltersParam(filtersParam); if (parsedFilters === null) { if (filtersParam === null) { + skipNextFilterUrlSync.current = true; setDynamicFilters([]); } return; } - const serializedParsedFilters = - parsedFilters.length > 0 - ? serializeDashboardFilters(parsedFilters) - : null; - if (serializedParsedFilters === serializedDynamicFilters) { - return; - } - + skipNextFilterUrlSync.current = true; setDynamicFilters(parsedFilters); - }, [filtersParam, serializedDynamicFilters, setDynamicFilters]); + }, [filtersParam, setDynamicFilters]); useEffect(() => { + if (skipNextFilterUrlSync.current) { + skipNextFilterUrlSync.current = false; + return; + } + if (serializedDynamicFilters === filtersParam) { return; } @@ -133,6 +163,33 @@ export default function WebsiteLayout({ children }: WebsiteLayoutProps) { const { isTrackingSetup, isTrackingSetupLoading, trackingIssue } = useTrackingSetup(websiteId); + const [dismissedTrackingIssueKeys, setDismissedTrackingIssueKeys] = + usePersistentState>( + `tracking-issue-banner-dismissed-${websiteId}`, + {} + ); + const trackingIssueDismissalKey = trackingIssue + ? [ + websiteId, + trackingIssue.type, + trackingIssue.originHost ?? trackingIssue.origin ?? "missing-origin", + ].join(":") + : null; + const trackingIssueDismissedAt = trackingIssueDismissalKey + ? (dismissedTrackingIssueKeys[trackingIssueDismissalKey] ?? 0) + : 0; + const isTrackingIssueDismissed = + Date.now() - trackingIssueDismissedAt < TRACKING_ISSUE_DISMISS_MS; + + const updateSettingsMutation = useMutation({ + ...orpc.websites.updateSettings.mutationOptions(), + onSuccess: (updatedWebsite) => { + updateWebsiteCache(queryClient, updatedWebsite); + queryClient.invalidateQueries({ + queryKey: ["websites", "isTrackingSetup", websiteId], + }); + }, + }); const isToolbarLoading = isWebsiteLoading || @@ -146,7 +203,73 @@ export default function WebsiteLayout({ children }: WebsiteLayoutProps) { websiteData && isTrackingSetup === false; const showTrackingIssue = - !(isDemoRoute || isTrackingSetupLoading) && trackingIssue; + !(isDemoRoute || isTrackingSetupLoading) && + trackingIssue && + !isTrackingIssueDismissed; + + const handleDismissTrackingIssue = useCallback(() => { + if (!trackingIssueDismissalKey) { + return; + } + setDismissedTrackingIssueKeys((prev) => ({ + ...prev, + [trackingIssueDismissalKey]: Date.now(), + })); + }, [setDismissedTrackingIssueKeys, trackingIssueDismissalKey]); + + const handleAllowTrackingOrigin = useCallback(() => { + if (!(trackingIssue?.originHost && websiteData)) { + return; + } + const allowedOrigins = appendUniqueString( + readStringSettingList(websiteData.settings, "allowedOrigins"), + trackingIssue.originHost + ); + + toast.promise( + updateSettingsMutation.mutateAsync({ + id: websiteId, + settings: { allowedOrigins }, + }), + { + loading: "Allowing tracking origin...", + success: `${trackingIssue.originHost} can now send analytics`, + error: "Failed to allow tracking origin", + } + ); + }, [ + trackingIssue?.originHost, + websiteData, + websiteId, + updateSettingsMutation, + ]); + + const handleIgnoreTrackingOrigin = useCallback(() => { + if (!(trackingIssue?.originHost && websiteData)) { + return; + } + const ignoredTrackingOrigins = appendUniqueString( + readStringSettingList(websiteData.settings, "ignoredTrackingOrigins"), + trackingIssue.originHost + ); + + toast.promise( + updateSettingsMutation.mutateAsync({ + id: websiteId, + settings: { ignoredTrackingOrigins }, + }), + { + loading: "Ignoring tracking origin...", + success: `${trackingIssue.originHost} warning hidden`, + error: "Failed to ignore tracking origin", + } + ); + }, [ + trackingIssue?.originHost, + websiteData, + websiteId, + updateSettingsMutation, + ]); const handleRefresh = async () => { setIsRefreshing(true); @@ -238,22 +361,66 @@ export default function WebsiteLayout({ children }: WebsiteLayoutProps) {
} + icon={} title="Tracking requests are being blocked" + tone="warning" >
- {trackingIssue.type === "origin_not_authorized" ? ( - + ) : null} + {trackingIssue.originHost ? ( + + ) : null} + {trackingIssue.type === "origin_not_authorized" && + !trackingIssue.originHost ? ( + ) : null} - +
diff --git a/apps/dashboard/app/(main)/websites/[id]/settings/security/page.tsx b/apps/dashboard/app/(main)/websites/[id]/settings/security/page.tsx index 60698d888..7c22fae64 100644 --- a/apps/dashboard/app/(main)/websites/[id]/settings/security/page.tsx +++ b/apps/dashboard/app/(main)/websites/[id]/settings/security/page.tsx @@ -12,6 +12,7 @@ import { } from "@/hooks/use-websites"; import { orpc } from "@/lib/orpc"; import { Button, Card, Input } from "@databuddy/ui"; +import { Switch } from "@databuddy/ui/client"; import { LockIcon, PlusIcon, XMarkIcon as XIcon } from "@databuddy/ui/icons"; import { areSecuritySettingsEqual, @@ -34,7 +35,14 @@ function validateOrigin(value: string): { success: boolean; error?: string } { return { success: true }; } if (trimmed.startsWith("*.")) { - if (domainRegex.test(trimmed.slice(2))) { + const domain = trimmed.slice(2); + if (domain.startsWith("www.")) { + return { + success: false, + error: "Use the apex domain instead of a www-prefixed domain", + }; + } + if (domainRegex.test(domain)) { return { success: true }; } return { @@ -42,6 +50,12 @@ function validateOrigin(value: string): { success: boolean; error?: string } { error: "Invalid wildcard domain format (e.g., *.cal.com)", }; } + if (trimmed.startsWith("www.")) { + return { + success: false, + error: "Use the apex domain instead of a www-prefixed domain", + }; + } if (domainRegex.test(trimmed)) { return { success: true }; } @@ -51,6 +65,35 @@ function validateOrigin(value: string): { success: boolean; error?: string } { }; } +function validateIgnoredTrackingOrigin(value: string): { + success: boolean; + error?: string; +} { + const trimmed = value.trim(); + if (trimmed === "*") { + return { + success: false, + error: "Use the warning toggle to hide every tracking warning", + }; + } + if (trimmed.startsWith("*.")) { + if (domainRegex.test(trimmed.slice(2))) { + return { success: true }; + } + return { + success: false, + error: "Invalid wildcard domain format (e.g., *.preview.example.com)", + }; + } + if (domainRegex.test(trimmed)) { + return { success: true }; + } + return { + success: false, + error: "Must be a valid domain (e.g., staging.example.com)", + }; +} + function validateIp(value: string): { success: boolean; error?: string } { const trimmed = value.trim(); if ( @@ -201,15 +244,32 @@ export default function SecurityPage() { const [allowedOrigins, setAllowedOrigins] = useState([]); const [allowedIps, setAllowedIps] = useState([]); + const [ignoredTrackingOrigins, setIgnoredTrackingOrigins] = useState< + string[] + >([]); + const [trackingIssueWarningsDisabled, setTrackingIssueWarningsDisabled] = + useState(false); + const [settingsHydrated, setSettingsHydrated] = useState(false); const savedSettings = useMemo( () => readSecuritySettings(websiteData?.settings), [websiteData?.settings] ); const draftSettings = useMemo( - () => ({ allowedIps, allowedOrigins }), - [allowedIps, allowedOrigins] + () => ({ + allowedIps, + allowedOrigins, + ignoredTrackingOrigins, + trackingIssueWarningsDisabled, + }), + [ + allowedIps, + allowedOrigins, + ignoredTrackingOrigins, + trackingIssueWarningsDisabled, + ] ); - const hasChanges = !areSecuritySettingsEqual(savedSettings, draftSettings); + const hasChanges = + settingsHydrated && !areSecuritySettingsEqual(savedSettings, draftSettings); const updateMutation = useMutation({ ...orpc.websites.updateSettings.mutationOptions(), @@ -221,6 +281,11 @@ export default function SecurityPage() { const initializeSettings = useCallback(() => { setAllowedOrigins(savedSettings.allowedOrigins); setAllowedIps(savedSettings.allowedIps); + setIgnoredTrackingOrigins(savedSettings.ignoredTrackingOrigins); + setTrackingIssueWarningsDisabled( + savedSettings.trackingIssueWarningsDisabled + ); + setSettingsHydrated(true); }, [savedSettings]); useEffect(() => { @@ -230,7 +295,7 @@ export default function SecurityPage() { }, [websiteData, initializeSettings]); const handleSave = useCallback(() => { - if (!websiteData) { + if (!(websiteData && settingsHydrated)) { return; } @@ -250,7 +315,14 @@ export default function SecurityPage() { error: "Failed to update security settings", } ); - }, [websiteData, websiteId, draftSettings, hasChanges, updateMutation]); + }, [ + websiteData, + settingsHydrated, + websiteId, + draftSettings, + hasChanges, + updateMutation, + ]); const handleOriginAdd = useCallback((value: string) => { setAllowedOrigins((prev) => [...prev, value]); @@ -268,6 +340,14 @@ export default function SecurityPage() { setAllowedIps((prev) => prev.filter((v) => v !== value)); }, []); + const handleIgnoredOriginAdd = useCallback((value: string) => { + setIgnoredTrackingOrigins((prev) => [...prev, value]); + }, []); + + const handleIgnoredOriginRemove = useCallback((value: string) => { + setIgnoredTrackingOrigins((prev) => prev.filter((v) => v !== value)); + }, []); + if (!websiteData) { return (
@@ -335,6 +415,36 @@ export default function SecurityPage() { + + +
+ Tracking Warnings + + Hide dashboard warnings for known noisy origins without + allowing those origins to send analytics. + +
+ + setTrackingIssueWarningsDisabled(!checked) + } + /> +
+ + + +
+ } diff --git a/apps/dashboard/app/(main)/websites/[id]/settings/security/security-settings.test.ts b/apps/dashboard/app/(main)/websites/[id]/settings/security/security-settings.test.ts index 09ecec762..85b35bfb9 100644 --- a/apps/dashboard/app/(main)/websites/[id]/settings/security/security-settings.test.ts +++ b/apps/dashboard/app/(main)/websites/[id]/settings/security/security-settings.test.ts @@ -9,8 +9,18 @@ import { describe("security settings helpers", () => { it("keeps empty arrays in the mutation payload so removals serialize", () => { expect( - createSecuritySettingsPayload({ allowedIps: [], allowedOrigins: [] }) - ).toEqual({ allowedIps: [], allowedOrigins: [] }); + createSecuritySettingsPayload({ + allowedIps: [], + allowedOrigins: [], + ignoredTrackingOrigins: [], + trackingIssueWarningsDisabled: false, + }) + ).toEqual({ + allowedIps: [], + allowedOrigins: [], + ignoredTrackingOrigins: [], + trackingIssueWarningsDisabled: false, + }); }); it("reads only string lists from stored website settings", () => { @@ -18,22 +28,49 @@ describe("security settings helpers", () => { readSecuritySettings({ allowedIps: ["10.0.0.1", 42], allowedOrigins: ["cal.com", null], + ignoredTrackingOrigins: ["staging.cal.com", false], + trackingIssueWarningsDisabled: true, }) - ).toEqual({ allowedIps: ["10.0.0.1"], allowedOrigins: ["cal.com"] }); + ).toEqual({ + allowedIps: ["10.0.0.1"], + allowedOrigins: ["cal.com"], + ignoredTrackingOrigins: ["staging.cal.com"], + trackingIssueWarningsDisabled: true, + }); }); it("detects exact draft changes", () => { expect( areSecuritySettingsEqual( - { allowedIps: [], allowedOrigins: ["cal.com"] }, - { allowedIps: [], allowedOrigins: ["cal.com"] } + { + allowedIps: [], + allowedOrigins: ["cal.com"], + ignoredTrackingOrigins: [], + trackingIssueWarningsDisabled: false, + }, + { + allowedIps: [], + allowedOrigins: ["cal.com"], + ignoredTrackingOrigins: [], + trackingIssueWarningsDisabled: false, + } ) ).toBe(true); expect( areSecuritySettingsEqual( - { allowedIps: [], allowedOrigins: ["cal.com"] }, - { allowedIps: [], allowedOrigins: [] } + { + allowedIps: [], + allowedOrigins: ["cal.com"], + ignoredTrackingOrigins: [], + trackingIssueWarningsDisabled: false, + }, + { + allowedIps: [], + allowedOrigins: [], + ignoredTrackingOrigins: [], + trackingIssueWarningsDisabled: false, + } ) ).toBe(false); }); diff --git a/apps/dashboard/app/(main)/websites/[id]/settings/security/security-settings.ts b/apps/dashboard/app/(main)/websites/[id]/settings/security/security-settings.ts index d50e8838f..28482694a 100644 --- a/apps/dashboard/app/(main)/websites/[id]/settings/security/security-settings.ts +++ b/apps/dashboard/app/(main)/websites/[id]/settings/security/security-settings.ts @@ -1,6 +1,8 @@ export interface SecuritySettingsDraft { allowedIps: string[]; allowedOrigins: string[]; + ignoredTrackingOrigins: string[]; + trackingIssueWarningsDisabled: boolean; } function readStringList(value: unknown): string[] { @@ -15,13 +17,21 @@ function sameList(a: string[], b: string[]): boolean { export function readSecuritySettings(settings: unknown): SecuritySettingsDraft { if (!settings || typeof settings !== "object" || Array.isArray(settings)) { - return { allowedIps: [], allowedOrigins: [] }; + return { + allowedIps: [], + allowedOrigins: [], + ignoredTrackingOrigins: [], + trackingIssueWarningsDisabled: false, + }; } const record = settings as Record; return { allowedIps: readStringList(record.allowedIps), allowedOrigins: readStringList(record.allowedOrigins), + ignoredTrackingOrigins: readStringList(record.ignoredTrackingOrigins), + trackingIssueWarningsDisabled: + record.trackingIssueWarningsDisabled === true, }; } @@ -31,6 +41,8 @@ export function createSecuritySettingsPayload( return { allowedIps: [...settings.allowedIps], allowedOrigins: [...settings.allowedOrigins], + ignoredTrackingOrigins: [...settings.ignoredTrackingOrigins], + trackingIssueWarningsDisabled: settings.trackingIssueWarningsDisabled, }; } @@ -40,7 +52,9 @@ export function areSecuritySettingsEqual( ): boolean { return ( sameList(a.allowedOrigins, b.allowedOrigins) && - sameList(a.allowedIps, b.allowedIps) + sameList(a.allowedIps, b.allowedIps) && + sameList(a.ignoredTrackingOrigins, b.ignoredTrackingOrigins) && + a.trackingIssueWarningsDisabled === b.trackingIssueWarningsDisabled ); } diff --git a/apps/dashboard/app/(main)/websites/_components/notice-banner.tsx b/apps/dashboard/app/(main)/websites/_components/notice-banner.tsx index 5a0325e6e..1e87adb1f 100644 --- a/apps/dashboard/app/(main)/websites/_components/notice-banner.tsx +++ b/apps/dashboard/app/(main)/websites/_components/notice-banner.tsx @@ -8,6 +8,7 @@ export const NoticeBanner = ({ icon, className, description, + tone = "default", }: { title?: string; children?: React.ReactNode; @@ -16,10 +17,14 @@ export const NoticeBanner = ({ >; className?: string; description?: string; + tone?: "default" | "warning"; }) => (
@@ -30,7 +35,10 @@ export const NoticeBanner = ({ ? cloneElement(icon, { ...icon.props, className: cn( - "shrink-0 text-accent-foreground", + "shrink-0", + tone === "default" + ? "text-accent-foreground" + : "text-amber-500", icon.props.className ), "aria-hidden": true, @@ -40,12 +48,26 @@ export const NoticeBanner = ({ : null}
{title ? ( -

+

{title}

) : null} {description ? ( -

+

{description}

) : null} diff --git a/apps/dashboard/app/api/test/e2e/clickhouse/route.ts b/apps/dashboard/app/api/test/e2e/clickhouse/route.ts index 8573f3e8e..da884bc08 100644 --- a/apps/dashboard/app/api/test/e2e/clickhouse/route.ts +++ b/apps/dashboard/app/api/test/e2e/clickhouse/route.ts @@ -51,6 +51,30 @@ function pageTitle(path: string): string { return path === "/" ? "Home" : path.slice(1).replaceAll("-", " "); } +async function clearExistingSeedData(websiteId: string): Promise { + const params = { websiteId }; + const settings = { mutations_sync: "1", wait_end_of_query: 1 } as const; + + await Promise.all([ + clickHouse.command({ + clickhouse_settings: settings, + query: `ALTER TABLE ${TABLE_NAMES.events} DELETE WHERE client_id = {websiteId:String}`, + query_params: params, + }), + clickHouse.command({ + clickhouse_settings: settings, + query: `ALTER TABLE ${TABLE_NAMES.outgoing_links} DELETE WHERE client_id = {websiteId:String}`, + query_params: params, + }), + clickHouse.command({ + clickhouse_settings: settings, + query: + "ALTER TABLE analytics.daily_pageviews DELETE WHERE client_id = {websiteId:String}", + query_params: params, + }), + ]); +} + export async function POST(request: Request): Promise { const denied = assertE2EAccess(request); if (denied) { @@ -67,6 +91,8 @@ export async function POST(request: Request): Promise { } const eventCount = normalizeEventCount(body.eventCount); + await clearExistingSeedData(body.websiteId); + const now = Date.now(); const users = Array.from( { length: Math.max(3, Math.ceil(eventCount / 40)) }, diff --git a/apps/dashboard/lib/insight-api.ts b/apps/dashboard/lib/insight-api.ts index 60456927f..ea79df153 100644 --- a/apps/dashboard/lib/insight-api.ts +++ b/apps/dashboard/lib/insight-api.ts @@ -1,4 +1,3 @@ -import { publicConfig } from "@databuddy/env/public"; import { infiniteQueryOptions, keepPreviousData, @@ -6,8 +5,7 @@ import { } from "@tanstack/react-query"; import { guessTimezone } from "@databuddy/ui"; import type { HistoryInsightRow, Insight } from "@/lib/insight-types"; - -const API_URL = publicConfig.urls.api; +import { orpc } from "@/lib/orpc"; export const INSIGHT_CACHE = { staleTime: 15 * 60 * 1000, @@ -17,6 +15,27 @@ export const INSIGHT_CACHE = { const INSIGHTS_ROOT = ["insights"] as const; const HISTORY_PAGE_SIZE = 50; +const INSIGHTS_FAST_TIMEOUT_MS = 30_000; +const INSIGHTS_SLOW_TIMEOUT_MS = 90_000; + +function withTimeout( + label: string, + promise: Promise, + timeoutMs: number +): Promise { + let timeout: ReturnType | undefined; + const timeoutPromise = new Promise((_, reject) => { + timeout = setTimeout( + () => reject(new Error(`${label} timed out`)), + timeoutMs + ); + }); + return Promise.race([promise, timeoutPromise]).finally(() => { + if (timeout) { + clearTimeout(timeout); + } + }); +} export const insightQueries = { all: () => INSIGHTS_ROOT, @@ -71,6 +90,11 @@ export const insightQueries = { }; export interface InsightsAiResponse { + generation?: { + queuedItems?: number; + runId?: string; + status: "queued" | "skipped" | "disabled" | "unavailable"; + }; insights: Insight[]; source: "ai" | "fallback"; success: boolean; @@ -82,53 +106,33 @@ export interface InsightsHistoryPage { success: boolean; } -export async function fetchInsightsAi( +export function fetchInsightsAi( organizationId: string ): Promise { - const res = await fetch(`${API_URL}/v1/insights/ai`, { - method: "POST", - credentials: "include", - headers: { "Content-Type": "application/json" }, - body: JSON.stringify({ organizationId, timezone: guessTimezone() }), - signal: AbortSignal.timeout(90_000), - }); - - if (!res.ok) { - throw new Error(`Insights request failed: ${res.status}`); - } - - const data = (await res.json()) as InsightsAiResponse; - - if (!data.success) { - throw new Error("Insights response unsuccessful"); - } - - return data; + return withTimeout( + "Insights feed request", + orpc.insights.feed.call({ + organizationId, + timezone: guessTimezone(), + }) as Promise, + INSIGHTS_SLOW_TIMEOUT_MS + ); } -export async function fetchInsightsHistoryPage( +export function fetchInsightsHistoryPage( organizationId: string, offset: number, limit = 50 ): Promise { - const params = new URLSearchParams({ - organizationId, - limit: String(limit), - offset: String(offset), - }); - const res = await fetch( - `${API_URL}/v1/insights/history?${params.toString()}`, - { - credentials: "include", - signal: AbortSignal.timeout(30_000), - } + return withTimeout( + "Insights history request", + orpc.insights.history.call({ + organizationId, + limit, + offset, + }) as Promise, + INSIGHTS_FAST_TIMEOUT_MS ); - - if (!res.ok) { - throw new Error(`Insights history failed: ${res.status}`); - } - - return (await res.json()) as InsightsHistoryPage; } export interface ClearInsightsResponse { @@ -137,24 +141,16 @@ export interface ClearInsightsResponse { success: boolean; } -export async function clearInsightsHistory( +export function clearInsightsHistory( organizationId: string ): Promise { - const res = await fetch(`${API_URL}/v1/insights/clear`, { - method: "POST", - credentials: "include", - headers: { "Content-Type": "application/json" }, - body: JSON.stringify({ organizationId }), - signal: AbortSignal.timeout(30_000), - }); - - const data = (await res.json()) as ClearInsightsResponse; - - if (!res.ok) { - throw new Error(data.error ?? `Clear insights failed: ${res.status}`); - } - - return data; + return withTimeout( + "Clear insights history request", + orpc.insights.clearHistory.call({ + organizationId, + }) as Promise, + INSIGHTS_FAST_TIMEOUT_MS + ); } export type OrgNarrativeResponse = @@ -168,20 +164,16 @@ export type OrgNarrativeResponse = error: string; }; -export async function fetchInsightsOrgNarrative( +export function fetchInsightsOrgNarrative( organizationId: string, range: "7d" | "30d" | "90d" ): Promise { - const url = new URL(`${API_URL}/v1/insights/org-narrative`); - url.searchParams.set("organizationId", organizationId); - url.searchParams.set("range", range); - const res = await fetch(url.toString(), { - method: "GET", - credentials: "include", - signal: AbortSignal.timeout(30_000), - }); - if (!res.ok) { - return { success: false, error: `HTTP ${res.status}` }; - } - return (await res.json()) as OrgNarrativeResponse; + return withTimeout( + "Insights narrative request", + orpc.insights.orgNarrative.call({ + organizationId, + range, + }) as Promise, + INSIGHTS_FAST_TIMEOUT_MS + ); } diff --git a/apps/dashboard/lib/insight-signal-key.ts b/apps/dashboard/lib/insight-signal-key.ts index 0a8587520..6a3267a69 100644 --- a/apps/dashboard/lib/insight-signal-key.ts +++ b/apps/dashboard/lib/insight-signal-key.ts @@ -26,7 +26,7 @@ function directionFromParts( return "flat"; } -/** Matches server `insightDedupeKey` in apps/api/src/routes/insights.ts */ +/** Matches server `insightDedupeKey` in @databuddy/ai/insights/dedupe. */ export function insightSignalDedupeKey(insight: { websiteId: string; type: InsightType; diff --git a/apps/dashboard/test/e2e/fixtures.ts b/apps/dashboard/test/e2e/fixtures.ts index 829c40cb9..960c6bb63 100644 --- a/apps/dashboard/test/e2e/fixtures.ts +++ b/apps/dashboard/test/e2e/fixtures.ts @@ -36,12 +36,14 @@ function e2eTestKey(): string { return key; } -function testScope(testTitle: string): string { - return testTitle +function testScope(testTitle: string, retry: number): string { + const retrySuffix = retry > 0 ? `-retry-${retry.toString()}` : ""; + const maxTitleLength = 48 - retrySuffix.length; + return `${testTitle .toLowerCase() .replaceAll(/[^a-z0-9]+/g, "-") .replaceAll(/^-+|-+$/g, "") - .slice(0, 48); + .slice(0, maxTitleLength)}${retrySuffix}`; } async function seedClickHouse( @@ -76,7 +78,7 @@ export const test = base.extend({ .request.post("/api/test/e2e/session", { data: { runScope: process.env.DATABUDDY_E2E_RUN_ID ?? "local", - testScope: testScope(testInfo.title), + testScope: testScope(testInfo.title, testInfo.retry), withWebsite: true, }, headers: { "x-e2e-test-key": e2eTestKey() }, diff --git a/apps/dashboard/test/e2e/specs/regressions/website-analytics.spec.ts b/apps/dashboard/test/e2e/specs/regressions/website-analytics.spec.ts index 66ac00dc4..6106b1225 100644 --- a/apps/dashboard/test/e2e/specs/regressions/website-analytics.spec.ts +++ b/apps/dashboard/test/e2e/specs/regressions/website-analytics.spec.ts @@ -32,6 +32,7 @@ test( "shows seeded analytics data and applies a topbar filter", { tag: ["@regression", "@core"] }, async ({ authenticatedPage, e2eSession }) => { + test.setTimeout(60_000); expect(e2eSession.websiteId).toBeTruthy(); await authenticatedPage.goto(`/demo/${e2eSession.websiteId}`); @@ -58,14 +59,22 @@ test( }); await topbar.getByRole("button", { name: "Filter" }).click(); - await authenticatedPage.getByPlaceholder("Search fields…").fill("Country"); - await authenticatedPage.getByText("Country", { exact: true }).click(); - await authenticatedPage.getByPlaceholder("Enter country…").fill("US"); - await authenticatedPage.getByRole("button", { name: "Add filter" }).click(); + const filterDialog = authenticatedPage.getByRole("dialog", { + name: "Add Filter", + }); + await expect(filterDialog).toBeVisible(); + await filterDialog.getByPlaceholder("Search fields…").fill("Country"); + await filterDialog.getByText("Country", { exact: true }).click(); + await filterDialog.getByPlaceholder("Enter country…").fill("US"); + await filterDialog + .getByRole("button", { exact: true, name: "Add filter" }) + .click(); + await expect(filterDialog).toBeHidden(); const main = authenticatedPage.getByRole("main"); - await expect(main.getByText("Country")).toBeVisible(); - await expect(main.getByText("US", { exact: true })).toBeVisible(); + await expect( + main.getByRole("group", { name: "Country = US filter" }) + ).toBeVisible(); await expect( main.getByText(formattedCount(seed.screenViewsByCountry.US ?? 0)).first() ).toBeVisible({ timeout: 20_000 }); diff --git a/apps/insights/package.json b/apps/insights/package.json new file mode 100644 index 000000000..a93ccb66a --- /dev/null +++ b/apps/insights/package.json @@ -0,0 +1,28 @@ +{ + "name": "@databuddy/insights", + "private": true, + "version": "1.0.0", + "type": "module", + "scripts": { + "dev": "bun --watch run src/index.ts", + "test": "bun test src", + "test:integration": "INSIGHTS_INTEGRATION_TESTS=true bun test src/scheduler.integration.test.ts src/idempotency.integration.test.ts", + "check-types": "tsc --noEmit" + }, + "dependencies": { + "@databuddy/ai": "workspace:*", + "@databuddy/db": "workspace:*", + "@databuddy/env": "workspace:*", + "@databuddy/redis": "workspace:*", + "@databuddy/rpc": "workspace:*", + "ai": "^6.0.154", + "bullmq": "^5.66.5", + "dayjs": "^1.11.19", + "elysia": "catalog:", + "evlog": "catalog:" + }, + "devDependencies": { + "@databuddy/test": "workspace:*" + }, + "packageManager": "bun@1.3.14" +} diff --git a/apps/insights/src/generation.ts b/apps/insights/src/generation.ts new file mode 100644 index 000000000..2e1775a61 --- /dev/null +++ b/apps/insights/src/generation.ts @@ -0,0 +1,1140 @@ +import type { AppContext } from "@databuddy/ai/config/context"; +import { ANTHROPIC_CACHE_1H, models } from "@databuddy/ai/config/models"; +import { insightDedupeKey } from "@databuddy/ai/insights/dedupe"; +import { + fetchWebPeriodData, + hasWebInsightData, +} from "@databuddy/ai/insights/fetch-context"; +import { formatLegacyWebDataForPrompt } from "@databuddy/ai/insights/normalize"; +import type { + InsightMetricRow, + WeekOverWeekPeriod, +} from "@databuddy/ai/insights/types"; +import { validateInsights } from "@databuddy/ai/insights/validate"; +import { getAILogger } from "@databuddy/ai/lib/ai-logger"; +import { storeAnalyticsSummary } from "@databuddy/ai/lib/supermemory"; +import type { ParsedInsight } from "@databuddy/ai/schemas/smart-insights-output"; +import { insightsOutputSchema } from "@databuddy/ai/schemas/smart-insights-output"; +import { createInsightsAgentTools } from "@databuddy/ai/tools/insights-agent-tools"; +import { + and, + db, + desc, + eq, + gte, + inArray, + isNotNull, + isNull, + sql, +} from "@databuddy/db"; +import { + analyticsInsights, + annotations, + type InsightGenerationConfigSnapshot, + type InsightGenerationTool, + websites, +} from "@databuddy/db/schema"; +import { + invalidateAgentContextSnapshotsForWebsite, + invalidateInsightsCachesForOrganization, +} from "@databuddy/redis"; +import { generateText, Output, stepCountIs, ToolLoopAgent } from "ai"; +import { randomUUIDv7 } from "bun"; +import dayjs from "dayjs"; +import { + captureInsightsError, + emitInsightsEvent, + setInsightsLog, +} from "./lib/evlog-insights"; + +const LEGACY_TIMEOUT_MS = 60_000; +const AGENT_TIMEOUT_MS = 120_000; +const RECENT_INSIGHTS_PROMPT_LIMIT = 12; +const DEFAULT_MAX_INSIGHTS = 3; +const TOOL_NAMES = [ + "web_metrics", + "product_metrics", + "ops_context", + "business_context", +] as const satisfies readonly InsightGenerationTool[]; + +interface ExecutableTool { + execute?: (...args: never[]) => unknown; +} + +function withToolCallBudget( + tools: T, + onExecute: (toolName: string) => void +): T { + return Object.fromEntries( + Object.entries(tools).map(([name, tool]) => { + const executable = tool as ExecutableTool; + return [ + name, + { + ...(tool as object), + execute: (...args: never[]) => { + onExecute(name); + return executable.execute?.(...args); + }, + }, + ]; + }) + ) as T; +} + +interface OrgWebsiteRow { + domain: string; + id: string; + name: string | null; +} + +interface GeneratedWebsiteInsight extends ParsedInsight { + id: string; + websiteDomain: string; + websiteId: string; + websiteName: string | null; +} + +export interface GenerateWebsiteInsightsInput { + config: InsightGenerationConfigSnapshot; + organizationId: string; + reason: string; + requestedByUserId: string | null; + runId: string; + websiteId: string; +} + +export interface GenerateWebsiteInsightsResult { + insightIds: string[]; + message?: string; + resultCount: number; + status: "skipped" | "succeeded"; +} + +function maxInsights(config: InsightGenerationConfigSnapshot): number { + return Math.max( + 1, + Math.min(10, config.maxInsightsPerWebsite || DEFAULT_MAX_INSIGHTS) + ); +} + +function promptLookbackDays(config: InsightGenerationConfigSnapshot): number { + return Math.max(14, Math.min(180, config.lookbackDays * 2)); +} + +function getComparisonPeriod(lookbackDays: number): WeekOverWeekPeriod { + const days = Math.max(1, Math.min(90, lookbackDays)); + const now = dayjs(); + return { + current: { + from: now.subtract(days, "day").format("YYYY-MM-DD"), + to: now.format("YYYY-MM-DD"), + }, + previous: { + from: now.subtract(days * 2, "day").format("YYYY-MM-DD"), + to: now.subtract(days, "day").format("YYYY-MM-DD"), + }, + }; +} + +function modelForTier(tier: InsightGenerationConfigSnapshot["modelTier"]) { + if (tier === "fast") { + return models.quick; + } + if (tier === "deep") { + return models.deep; + } + return models.balanced; +} + +function normalizeAllowedTools( + tools: InsightGenerationConfigSnapshot["allowedTools"] +): InsightGenerationTool[] { + const allowed = new Set( + tools.filter((tool): tool is InsightGenerationTool => + (TOOL_NAMES as readonly string[]).includes(tool) + ) + ); + allowed.add("web_metrics"); + return TOOL_NAMES.filter((tool) => allowed.has(tool)); +} + +function dedupeKeyFor(insight: GeneratedWebsiteInsight): string { + return insightDedupeKey({ + ...insight, + changePercent: insight.changePercent ?? null, + }); +} + +async function fetchInsightDedupeKeyToIdMap( + organizationId: string, + cooldownHours: number +): Promise> { + const cutoff = dayjs().subtract(Math.max(1, cooldownHours), "hour").toDate(); + const rows = await db + .select({ + id: analyticsInsights.id, + websiteId: analyticsInsights.websiteId, + type: analyticsInsights.type, + sentiment: analyticsInsights.sentiment, + changePercent: analyticsInsights.changePercent, + dedupeKey: analyticsInsights.dedupeKey, + subjectKey: analyticsInsights.subjectKey, + title: analyticsInsights.title, + }) + .from(analyticsInsights) + .where( + and( + eq(analyticsInsights.organizationId, organizationId), + gte(analyticsInsights.createdAt, cutoff) + ) + ) + .orderBy(desc(analyticsInsights.createdAt)); + + const map = new Map(); + for (const row of rows) { + const key = + row.dedupeKey ?? + insightDedupeKey({ + websiteId: row.websiteId, + type: row.type as ParsedInsight["type"], + sentiment: row.sentiment as ParsedInsight["sentiment"], + changePercent: row.changePercent, + subjectKey: row.subjectKey, + title: row.title, + }); + if (!map.has(key)) { + map.set(key, row.id); + } + } + return map; +} + +async function fetchRecentAnnotations( + websiteId: string, + config: InsightGenerationConfigSnapshot +): Promise { + const since = dayjs().subtract(promptLookbackDays(config), "day").toDate(); + const rows = await db + .select({ + text: annotations.text, + xValue: annotations.xValue, + tags: annotations.tags, + }) + .from(annotations) + .where( + and( + eq(annotations.websiteId, websiteId), + gte(annotations.xValue, since), + isNull(annotations.deletedAt) + ) + ) + .orderBy(annotations.xValue) + .limit(20); + + if (rows.length === 0) { + return ""; + } + + const lines = rows.map((row) => { + const date = dayjs(row.xValue).format("YYYY-MM-DD"); + const tags = row.tags?.length ? ` [${row.tags.join(", ")}]` : ""; + return `- ${date}: ${row.text}${tags}`; + }); + + return `\n\nUser annotations (known events that may explain changes):\n${lines.join("\n")}`; +} + +async function fetchRecentInsightsForPrompt( + organizationId: string, + websiteId: string, + config: InsightGenerationConfigSnapshot +): Promise { + const since = dayjs().subtract(promptLookbackDays(config), "day").toDate(); + const rows = await db + .select({ + title: analyticsInsights.title, + type: analyticsInsights.type, + createdAt: analyticsInsights.createdAt, + }) + .from(analyticsInsights) + .where( + and( + eq(analyticsInsights.organizationId, organizationId), + eq(analyticsInsights.websiteId, websiteId), + gte(analyticsInsights.createdAt, since) + ) + ) + .orderBy(desc(analyticsInsights.createdAt)) + .limit(RECENT_INSIGHTS_PROMPT_LIMIT); + + if (rows.length === 0) { + return ""; + } + + const lines = rows.map( + (row) => + `- [${row.type}] ${row.title} (${dayjs(row.createdAt).format("YYYY-MM-DD")})` + ); + + return `\n\n## Recently reported insights for this website (avoid repeating the same narrative unless something materially changed)\n${lines.join("\n")}`; +} + +function formatOrgWebsitesContext( + orgSites: OrgWebsiteRow[], + currentWebsiteId: string +): string { + if (orgSites.length <= 1) { + return ""; + } + const sorted = [...orgSites].sort((a, b) => + a.domain.localeCompare(b.domain, "en") + ); + const lines = sorted.map((site) => { + const label = site.name?.trim() ? site.name.trim() : site.domain; + const marker = + site.id === currentWebsiteId + ? " - metrics below are for this site only" + : ""; + return `- ${label} (${site.domain})${marker}`; + }); + return `## Organization websites (same account, separate analytics) +Each row is a different tracked property (e.g. marketing site vs app vs docs). The period metrics in this message apply only to the site marked "metrics below". Do not blend numbers across rows. If referrers include another domain from this list, treat it as cross-property traffic and name both sides clearly. + +${lines.join("\n")} + +`; +} + +function buildSystemPrompt(config: InsightGenerationConfigSnapshot): string { + const targetCount = maxInsights(config); + const depthInstruction = + config.depth === "light" + ? "Use the smallest useful tool set. Prefer 1-2 high-confidence insights and skip speculative cross-domain analysis." + : config.depth === "deep" + ? "Actively cross-check web, product, ops, and business context when those tools are enabled. Prefer a fuller ranked set, but only when signals are distinct and data-backed." + : "Explore enough context to produce concise, distinct, high-confidence insights without over-querying."; + + return ` +You are Databuddy's analytics insights worker. Return up to ${targetCount} period-over-period insights when that many distinct data-backed signals exist. Rank by actionability and user/business impact. + + + +- Depth: ${config.depth}. ${depthInstruction} +- Max model/tool-loop steps: ${config.maxSteps} +- Max requested tool calls: ${config.maxToolCalls} +- Lookback period length: ${config.lookbackDays} day(s) +- Enabled tools: ${normalizeAllowedTools(config.allowedTools).join(", ")} + + + +- Write for a founder/operator, not an analytics engineer. Translate technical metrics into plain outcomes: "interactions got slower", "pages feel slower", "setup is leaking users", "one source now dominates traffic". +- Prefer reliability, conversion/product impact, engagement quality, broken instrumentation, and meaningful behavior changes over vanity traffic spikes. +- Score actionability times impact, not raw percentage magnitude. Reserve priority 8-10 for likely user, revenue, or operational impact. +- Prefer fewer, sharper insights over broad coverage. Return only signals a user can act on this period. +- Avoid repeating recently reported narratives unless the signal materially changed. + + + +- Use only provided data, tool results, annotations, and recent-insight context. +- Do not invent revenue, signups, retention, funnel conversion, causality, root causes, or business impact. +- If multiple org websites are listed, keep properties separate; cross-domain referrers are cross-property traffic, not generic referrals. +- Use cautious language for correlations unless segment-level evidence directly proves the cause. +- Do not punt, apologize, or say you cannot produce insights when any useful metrics exist. If one query is sparse, use stronger available evidence and lower confidence. + + + +- Return no more than ${targetCount} concise insights: reliability/product risk first, then engagement/acquisition opportunity. Do not make near-duplicates. +- Each insight must be one clear signal with 1-5 metrics; primary metric first. +- Metrics array owns the numbers. Description/suggestion should reference metric labels, not restate values. +- Keep title under 80 chars, description under 320 chars, suggestion under 260 chars. +- Titles must be plain English and user-facing. Do not put raw metric jargon like INP, LCP, FCP, TTFB, CLS, or p75 in titles; put technical metric names only in the metrics array. +- Keep description 1-2 concise sentences: what changed, why it matters, and whether cause is evidence or hypothesis. +- Suggestion must be a specific next action with an operational verb such as inspect, review, compare, segment, drill into, fix, audit, trace, or verify. Never use generic monitoring advice. +- Suggestion must name the exact product surface to inspect next: funnel step, goal, referrer segment, page path, error class, session stream, web vital, flag rollout, or agent diagnostic prompt. +- subjectKey must be stable; sources must include only evidence domains used; confidence 0-1 should reflect evidence strength. +- impactSummary is optional, one sentence under 220 characters. + + + +Good: Error Rate rose while Sessions stayed stable -> reliability issue; suggest reviewing affected page/errors first. +Good: INP p75 rose -> title "Interactions got slower"; metrics can still include "INP p75". +Good: Onboarding step 2 drop-off is 80% -> title "Onboarding is leaking at step 2". +Bad: Pricing Visitors rose -> "revenue opportunity" without business data. +Bad: Twitter rose and Bounce Rate worsened -> "Twitter caused the drop" without segmented engagement data. +Bad: "INP p75 still rising" as a title; users should not need to know web-vitals acronyms. +`; +} + +async function validateOrRepairInsights( + insights: ParsedInsight[], + context: { + config: InsightGenerationConfigSnapshot; + domain: string; + mode: "agent" | "legacy"; + organizationId: string; + websiteId: string; + } +): Promise { + const validated = validateInsights(insights); + if (validated.warnings.length > 0) { + emitInsightsEvent("warn", "generation.validation_warnings", { + organization_id: context.organizationId, + website_id: context.websiteId, + mode: context.mode, + input_count: insights.length, + output_count: validated.insights.length, + warning_count: validated.warnings.length, + warnings: validated.warnings, + }); + } + + const targetCount = Math.min(maxInsights(context.config), insights.length); + if (targetCount === 0 || validated.insights.length >= targetCount) { + return validated.insights.slice(0, targetCount); + } + + const repairStartedAt = performance.now(); + try { + const ai = getAILogger(); + const repair = await generateText({ + model: ai.wrap(modelForTier(context.config.modelTier)), + output: Output.object({ schema: insightsOutputSchema }), + messages: [ + { + role: "system", + content: `Repair Databuddy insight cards. Return up to ${targetCount} concise, valid cards when the source contains distinct data-backed signals. Use only the provided metrics and claims; do not invent numbers, causes, revenue impact, or new entities. Keep title <=80 chars, description <=320 chars, suggestion <=260 chars. Write for a founder/operator: titles must be plain English and avoid raw metric jargon like INP, LCP, FCP, TTFB, CLS, or p75. Technical metric names may remain in the metrics array. Suggestions need specific operational actions, not monitoring. Soften unsupported causality.`, + }, + { + role: "user", + content: JSON.stringify( + { + domain: context.domain, + validationWarnings: validated.warnings, + originalInsights: insights, + }, + null, + 2 + ), + }, + ], + temperature: 0, + maxOutputTokens: 4096, + abortSignal: AbortSignal.timeout(30_000), + experimental_telemetry: { + isEnabled: true, + functionId: "databuddy.insights.worker.repair", + metadata: { + source: "insights_worker", + feature: "smart_insights", + mode: context.mode, + organizationId: context.organizationId, + websiteId: context.websiteId, + websiteDomain: context.domain, + }, + }, + }); + + const repairedOutput = repair.output?.insights ?? []; + const repaired = validateInsights(repairedOutput); + if (repaired.warnings.length > 0) { + emitInsightsEvent("warn", "generation.repair.validation_warnings", { + organization_id: context.organizationId, + website_id: context.websiteId, + mode: context.mode, + input_count: repairedOutput.length, + output_count: repaired.insights.length, + warning_count: repaired.warnings.length, + warnings: repaired.warnings, + }); + } + + if (repaired.insights.length >= validated.insights.length) { + emitInsightsEvent("info", "generation.repair.completed", { + organization_id: context.organizationId, + website_id: context.websiteId, + mode: context.mode, + duration_ms: Math.round(performance.now() - repairStartedAt), + input_count: insights.length, + output_count: repaired.insights.length, + }); + return repaired.insights.slice(0, targetCount); + } + } catch (error) { + captureInsightsError(error, "generation.repair.failed", { + organization_id: context.organizationId, + website_id: context.websiteId, + mode: context.mode, + duration_ms: Math.round(performance.now() - repairStartedAt), + input_count: insights.length, + target_count: targetCount, + }); + } + + return validated.insights.slice(0, targetCount); +} + +async function analyzeWebsiteLegacy(params: { + config: InsightGenerationConfigSnapshot; + domain: string; + organizationId: string; + orgSites: OrgWebsiteRow[]; + period: WeekOverWeekPeriod; + recentInsightsBlock: string; + annotationContext: string; + userId: string; + websiteId: string; +}): Promise { + const startedAt = performance.now(); + const currentRange = params.period.current; + const previousRange = params.period.previous; + const [current, previous] = await Promise.all([ + fetchWebPeriodData( + params.websiteId, + params.domain, + currentRange.from, + currentRange.to, + params.config.timezone + ), + fetchWebPeriodData( + params.websiteId, + params.domain, + previousRange.from, + previousRange.to, + params.config.timezone + ), + ]); + + if (current.summary.length === 0 && current.topPages.length === 0) { + emitInsightsEvent("info", "generation.legacy.skipped_no_data", { + organization_id: params.organizationId, + website_id: params.websiteId, + duration_ms: Math.round(performance.now() - startedAt), + }); + return []; + } + + const dataSection = formatLegacyWebDataForPrompt( + current, + previous, + currentRange, + previousRange + ); + const orgContext = formatOrgWebsitesContext( + params.orgSites, + params.websiteId + ); + const prompt = `Analyze this website's period-over-period data and return insights. + +${orgContext}${dataSection}${params.annotationContext}${params.recentInsightsBlock}`; + + try { + const ai = getAILogger(); + const result = await generateText({ + model: ai.wrap(modelForTier(params.config.modelTier)), + output: Output.object({ schema: insightsOutputSchema }), + messages: [ + { + role: "system", + content: buildSystemPrompt(params.config), + providerOptions: ANTHROPIC_CACHE_1H, + }, + { role: "user", content: prompt }, + ], + temperature: 0.2, + maxOutputTokens: 8192, + abortSignal: AbortSignal.timeout(LEGACY_TIMEOUT_MS), + experimental_telemetry: { + isEnabled: true, + functionId: "databuddy.insights.worker.analyze_website", + metadata: { + source: "insights_worker", + feature: "smart_insights", + mode: "legacy_fallback", + organizationId: params.organizationId, + userId: params.userId, + websiteId: params.websiteId, + websiteDomain: params.domain, + timezone: params.config.timezone, + }, + }, + }); + + const validated = await validateOrRepairInsights( + result.output?.insights ?? [], + { + config: params.config, + domain: params.domain, + mode: "legacy", + organizationId: params.organizationId, + websiteId: params.websiteId, + } + ); + emitInsightsEvent("info", "generation.legacy.completed", { + organization_id: params.organizationId, + website_id: params.websiteId, + duration_ms: Math.round(performance.now() - startedAt), + raw_output_count: result.output?.insights?.length ?? 0, + output_count: validated.length, + }); + return validated; + } catch (error) { + captureInsightsError(error, "generation.legacy.failed", { + organization_id: params.organizationId, + website_id: params.websiteId, + duration_ms: Math.round(performance.now() - startedAt), + }); + return []; + } +} + +async function analyzeWebsite(params: { + config: InsightGenerationConfigSnapshot; + domain: string; + organizationId: string; + orgSites: OrgWebsiteRow[]; + period: WeekOverWeekPeriod; + userId: string; + websiteId: string; +}): Promise { + const startedAt = performance.now(); + const currentRange = params.period.current; + const previousRange = params.period.previous; + const [hasCurrentData, hasPreviousData] = await Promise.all([ + hasWebInsightData( + params.websiteId, + params.domain, + currentRange.from, + currentRange.to, + params.config.timezone + ), + hasWebInsightData( + params.websiteId, + params.domain, + previousRange.from, + previousRange.to, + params.config.timezone + ), + ]); + if (!(hasCurrentData || hasPreviousData)) { + emitInsightsEvent("info", "generation.agent.skipped_no_data", { + organization_id: params.organizationId, + website_id: params.websiteId, + duration_ms: Math.round(performance.now() - startedAt), + }); + return []; + } + + const [annotationContext, recentInsightsBlock] = await Promise.all([ + fetchRecentAnnotations(params.websiteId, params.config), + fetchRecentInsightsForPrompt( + params.organizationId, + params.websiteId, + params.config + ), + ]); + + const allowedTools = normalizeAllowedTools(params.config.allowedTools); + const orgContext = formatOrgWebsitesContext( + params.orgSites, + params.websiteId + ); + const userPrompt = `Analyze this website's period-over-period data and produce insights. + +**Current period:** ${currentRange.from} to ${currentRange.to} +**Previous period:** ${previousRange.from} to ${previousRange.to} +**Timezone:** ${params.config.timezone} +**Domain:** ${params.domain} + +Use web_metrics to pull metrics for both current and previous periods before inferring trends. Start with summary_metrics for both periods, then add top_pages, error_summary, top_referrers, country, browser_name, vitals_overview, or custom_events queries only when they sharpen the narrative. Use product_metrics for goals, funnels, retention, and custom event behavior when a traffic change may have downstream product impact. Use ops_context for page-level errors, uptime, anomaly signals, and recent flag rollouts when reliability or product changes may explain the trend. Use business_context for revenue totals, attribution, and product mix when commercial impact matters. + +Only call these enabled tools: ${allowedTools.join(", ")}. + +${orgContext}${annotationContext}${recentInsightsBlock}`; + + const { tools: allTools } = createInsightsAgentTools({ + websiteId: params.websiteId, + domain: params.domain, + timezone: params.config.timezone, + periodBounds: { current: currentRange, previous: previousRange }, + }); + const availableTools = Object.fromEntries( + Object.entries(allTools).filter(([name]) => + allowedTools.includes(name as InsightGenerationTool) + ) + ) as Partial; + const activeToolNames = allowedTools.filter((name) => name in availableTools); + + try { + const appContext: AppContext = { + userId: params.userId, + organizationId: params.organizationId, + websiteId: params.websiteId, + websiteDomain: params.domain, + timezone: params.config.timezone, + currentDateTime: new Date().toISOString(), + chatId: `insights:${params.organizationId}:${params.websiteId}`, + }; + let toolCallCount = 0; + let executedToolCallCount = 0; + const maxToolCalls = Math.max(1, params.config.maxToolCalls); + const tools = withToolCallBudget(availableTools, (toolName) => { + if (executedToolCallCount >= maxToolCalls) { + throw new Error( + `Insight generation tool-call budget exceeded before ${toolName}` + ); + } + executedToolCallCount += 1; + }); + const ai = getAILogger(); + const agent = new ToolLoopAgent({ + model: ai.wrap(modelForTier(params.config.modelTier)), + instructions: { + role: "system", + content: buildSystemPrompt(params.config), + providerOptions: ANTHROPIC_CACHE_1H, + }, + output: Output.object({ schema: insightsOutputSchema }), + tools, + stopWhen: stepCountIs( + Math.max( + 1, + Math.min(params.config.maxSteps, params.config.maxToolCalls + 2) + ) + ), + prepareStep: ({ stepNumber }) => { + const remainingToolCalls = maxToolCalls - executedToolCallCount; + if (remainingToolCalls <= 0) { + return { activeTools: [] }; + } + const activeTools = activeToolNames.slice(0, remainingToolCalls); + if (stepNumber === 0 && activeTools.includes("web_metrics")) { + return { + activeTools: ["web_metrics"], + toolChoice: { type: "tool", toolName: "web_metrics" }, + }; + } + return { activeTools }; + }, + onStepFinish: ({ usage, finishReason, toolCalls }) => { + toolCallCount += toolCalls.length; + emitInsightsEvent("info", "generation.agent.step_finished", { + organization_id: params.organizationId, + website_id: params.websiteId, + finish_reason: finishReason, + tool_calls: toolCalls.flatMap((toolCall) => + toolCall ? [toolCall.toolName] : [] + ), + total_tokens: usage?.totalTokens, + tool_call_count: toolCallCount, + executed_tool_call_count: executedToolCallCount, + }); + }, + temperature: 0.2, + experimental_context: appContext, + experimental_telemetry: { + isEnabled: true, + functionId: "databuddy.insights.worker.analyze_website", + metadata: { + source: "insights_worker", + feature: "smart_insights", + mode: "agent", + organizationId: params.organizationId, + userId: params.userId, + websiteId: params.websiteId, + websiteDomain: params.domain, + timezone: params.config.timezone, + depth: params.config.depth, + modelTier: params.config.modelTier, + }, + }, + }); + + const result = await agent.generate({ + messages: [{ role: "user", content: userPrompt }], + abortSignal: AbortSignal.timeout(AGENT_TIMEOUT_MS), + }); + + if (result.output?.insights?.length) { + const validated = await validateOrRepairInsights(result.output.insights, { + config: params.config, + domain: params.domain, + mode: "agent", + organizationId: params.organizationId, + websiteId: params.websiteId, + }); + emitInsightsEvent("info", "generation.agent.completed", { + organization_id: params.organizationId, + website_id: params.websiteId, + duration_ms: Math.round(performance.now() - startedAt), + raw_output_count: result.output.insights.length, + output_count: validated.length, + tool_call_count: toolCallCount, + }); + setInsightsLog({ + generation_mode: "agent", + tool_call_count: toolCallCount, + generated_candidate_count: validated.length, + }); + return validated; + } + + emitInsightsEvent("warn", "generation.agent.missing_output", { + organization_id: params.organizationId, + website_id: params.websiteId, + duration_ms: Math.round(performance.now() - startedAt), + tool_call_count: toolCallCount, + }); + } catch (error) { + captureInsightsError(error, "generation.agent.failed_using_legacy", { + organization_id: params.organizationId, + website_id: params.websiteId, + duration_ms: Math.round(performance.now() - startedAt), + }); + } + + return analyzeWebsiteLegacy({ + ...params, + annotationContext, + recentInsightsBlock, + }); +} + +async function persistWebsiteInsights(params: { + config: InsightGenerationConfigSnapshot; + insights: GeneratedWebsiteInsight[]; + organizationId: string; + period: WeekOverWeekPeriod; + runId: string; +}): Promise { + const startedAt = performance.now(); + const dedupeKeyToId = await fetchInsightDedupeKeyToIdMap( + params.organizationId, + params.config.cooldownHours + ); + const seenInBatch = new Set(); + const finalInsights: GeneratedWebsiteInsight[] = []; + let duplicateCandidates = 0; + + for (const insight of [...params.insights].sort( + (a, b) => b.priority - a.priority + )) { + const key = dedupeKeyFor(insight); + if (seenInBatch.has(key)) { + duplicateCandidates += 1; + continue; + } + seenInBatch.add(key); + const existingId = dedupeKeyToId.get(key); + finalInsights.push(existingId ? { ...insight, id: existingId } : insight); + if (finalInsights.length >= maxInsights(params.config)) { + break; + } + } + + if (finalInsights.length === 0) { + emitInsightsEvent("info", "generation.persistence.skipped_empty", { + organization_id: params.organizationId, + run_id: params.runId, + candidate_count: params.insights.length, + duplicate_candidate_count: duplicateCandidates, + dedupe_window_count: dedupeKeyToId.size, + }); + return []; + } + + const updatePayload = { + runId: params.runId, + timezone: params.config.timezone, + currentPeriodFrom: params.period.current.from, + currentPeriodTo: params.period.current.to, + previousPeriodFrom: params.period.previous.from, + previousPeriodTo: params.period.previous.to, + createdAt: new Date(), + }; + + const toInsert = finalInsights + .filter((insight) => { + const existingId = dedupeKeyToId.get(dedupeKeyFor(insight)); + return !(existingId && insight.id === existingId); + }) + .map((insight) => ({ + id: insight.id, + organizationId: params.organizationId, + websiteId: insight.websiteId, + runId: params.runId, + title: insight.title, + description: insight.description, + suggestion: insight.suggestion, + severity: insight.severity, + sentiment: insight.sentiment, + type: insight.type, + priority: insight.priority, + changePercent: insight.changePercent ?? null, + dedupeKey: dedupeKeyFor(insight), + subjectKey: insight.subjectKey, + sources: insight.sources, + confidence: insight.confidence, + impactSummary: insight.impactSummary ?? null, + metrics: + insight.metrics.length > 0 + ? (insight.metrics as InsightMetricRow[]) + : null, + timezone: params.config.timezone, + currentPeriodFrom: params.period.current.from, + currentPeriodTo: params.period.current.to, + previousPeriodFrom: params.period.previous.from, + previousPeriodTo: params.period.previous.to, + })); + + const toRefresh = finalInsights.filter((insight) => { + const existingId = dedupeKeyToId.get(dedupeKeyFor(insight)); + return existingId !== undefined && insight.id === existingId; + }); + + if (toInsert.length > 0) { + await db + .insert(analyticsInsights) + .values(toInsert) + .onConflictDoUpdate({ + target: [analyticsInsights.organizationId, analyticsInsights.dedupeKey], + targetWhere: isNotNull(analyticsInsights.dedupeKey), + set: { + runId: params.runId, + timezone: params.config.timezone, + currentPeriodFrom: params.period.current.from, + currentPeriodTo: params.period.current.to, + previousPeriodFrom: params.period.previous.from, + previousPeriodTo: params.period.previous.to, + createdAt: new Date(), + title: sql.raw("excluded.title"), + description: sql.raw("excluded.description"), + suggestion: sql.raw("excluded.suggestion"), + severity: sql.raw("excluded.severity"), + sentiment: sql.raw("excluded.sentiment"), + type: sql.raw("excluded.type"), + priority: sql.raw("excluded.priority"), + changePercent: sql.raw("excluded.change_percent"), + subjectKey: sql.raw("excluded.subject_key"), + sources: sql.raw("excluded.sources"), + confidence: sql.raw("excluded.confidence"), + impactSummary: sql.raw("excluded.impact_summary"), + metrics: sql.raw("excluded.metrics"), + }, + }); + } + await Promise.all( + toRefresh.map((insight) => + db + .update(analyticsInsights) + .set({ + ...updatePayload, + title: insight.title, + description: insight.description, + suggestion: insight.suggestion, + severity: insight.severity, + sentiment: insight.sentiment, + type: insight.type, + priority: insight.priority, + changePercent: insight.changePercent ?? null, + dedupeKey: dedupeKeyFor(insight), + subjectKey: insight.subjectKey, + sources: insight.sources, + confidence: insight.confidence, + impactSummary: insight.impactSummary ?? null, + metrics: + insight.metrics.length > 0 + ? (insight.metrics as InsightMetricRow[]) + : null, + }) + .where(eq(analyticsInsights.id, insight.id)) + ) + ); + + const persistedRows = await db + .select({ + dedupeKey: analyticsInsights.dedupeKey, + id: analyticsInsights.id, + }) + .from(analyticsInsights) + .where( + and( + eq(analyticsInsights.organizationId, params.organizationId), + inArray( + analyticsInsights.dedupeKey, + finalInsights.map((insight) => dedupeKeyFor(insight)) + ) + ) + ); + const persistedIdByDedupeKey = new Map( + persistedRows.flatMap((row) => + row.dedupeKey ? [[row.dedupeKey, row.id] as const] : [] + ) + ); + const persistedInsights = finalInsights.map((insight) => { + const persistedId = persistedIdByDedupeKey.get(dedupeKeyFor(insight)); + return persistedId ? { ...insight, id: persistedId } : insight; + }); + + const websiteInvalidations = [ + ...new Set(persistedInsights.map((insight) => insight.websiteId)), + ].map((websiteId) => invalidateAgentContextSnapshotsForWebsite(websiteId)); + + await Promise.all([ + invalidateInsightsCachesForOrganization(params.organizationId), + ...websiteInvalidations, + ]); + + emitInsightsEvent("info", "generation.persistence.completed", { + organization_id: params.organizationId, + run_id: params.runId, + duration_ms: Math.round(performance.now() - startedAt), + result_count: persistedInsights.length, + insert_count: toInsert.length, + refresh_count: toRefresh.length, + invalidated_website_count: websiteInvalidations.length, + }); + + return persistedInsights; +} + +function storeWebsiteSummary( + site: OrgWebsiteRow, + insights: GeneratedWebsiteInsight[] +): void { + if (insights.length === 0) { + return; + } + const summary = insights + .map( + (insight) => + `[${insight.severity}] ${insight.title}: ${insight.description} Suggestion: ${insight.suggestion}` + ) + .join("\n"); + + storeAnalyticsSummary( + `Insights for ${site.domain} (${dayjs().format("YYYY-MM-DD")}):\n${summary}`, + site.id, + { period: "configured" } + ) + .then(() => { + emitInsightsEvent("info", "generation.summary_stored", { + website_id: site.id, + insight_count: insights.length, + }); + }) + .catch((error: unknown) => { + captureInsightsError(error, "generation.summary_store_failed", { + website_id: site.id, + }); + }); +} + +export async function generateWebsiteInsights( + input: GenerateWebsiteInsightsInput +): Promise { + const startedAt = performance.now(); + const [site] = await db + .select({ id: websites.id, name: websites.name, domain: websites.domain }) + .from(websites) + .where( + and( + eq(websites.id, input.websiteId), + eq(websites.organizationId, input.organizationId), + isNull(websites.deletedAt) + ) + ) + .limit(1); + + if (!site) { + emitInsightsEvent("warn", "generation.website.skipped_missing_site", { + organization_id: input.organizationId, + website_id: input.websiteId, + run_id: input.runId, + duration_ms: Math.round(performance.now() - startedAt), + }); + return { + status: "skipped", + resultCount: 0, + insightIds: [], + message: "Website not found or deleted", + }; + } + + const orgSites = await db + .select({ id: websites.id, name: websites.name, domain: websites.domain }) + .from(websites) + .where( + and( + eq(websites.organizationId, input.organizationId), + isNull(websites.deletedAt) + ) + ) + .orderBy(websites.domain) + .limit(100); + setInsightsLog({ + organization_site_count: orgSites.length, + }); + + const period = getComparisonPeriod(input.config.lookbackDays); + const userId = input.requestedByUserId ?? "insights-worker"; + const insights = await analyzeWebsite({ + config: input.config, + domain: site.domain, + organizationId: input.organizationId, + orgSites, + period, + userId, + websiteId: site.id, + }); + + const candidates = insights.map( + (insight): GeneratedWebsiteInsight => ({ + ...insight, + id: randomUUIDv7(), + websiteId: site.id, + websiteName: site.name, + websiteDomain: site.domain, + }) + ); + + const saved = await persistWebsiteInsights({ + config: input.config, + insights: candidates, + organizationId: input.organizationId, + period, + runId: input.runId, + }); + + storeWebsiteSummary(site, saved); + + emitInsightsEvent("info", "generation.website.completed", { + organization_id: input.organizationId, + website_id: input.websiteId, + run_id: input.runId, + duration_ms: Math.round(performance.now() - startedAt), + result_count: saved.length, + reason: input.reason, + depth: input.config.depth, + model_tier: input.config.modelTier, + allowed_tools: input.config.allowedTools, + }); + setInsightsLog({ + generation_result_count: saved.length, + generation_status: saved.length > 0 ? "succeeded" : "skipped", + }); + + return saved.length > 0 + ? { + status: "succeeded", + resultCount: saved.length, + insightIds: saved.map((insight) => insight.id), + } + : { + status: "skipped", + resultCount: 0, + insightIds: [], + message: "No data-backed insights generated", + }; +} diff --git a/apps/insights/src/idempotency.integration.test.ts b/apps/insights/src/idempotency.integration.test.ts new file mode 100644 index 000000000..751dcce52 --- /dev/null +++ b/apps/insights/src/idempotency.integration.test.ts @@ -0,0 +1,125 @@ +import "@databuddy/test/env"; +import { afterAll, beforeEach, describe, expect, it } from "bun:test"; +import { isNotNull, shutdownPostgres, sql } from "@databuddy/db"; +import { analyticsInsights, insightRuns } from "@databuddy/db/schema"; +import { + closePostgres, + db, + hasTestDb, + insertOrganization, + insertWebsite, + truncatePostgres, +} from "@databuddy/test"; +import { eq } from "drizzle-orm"; +import { randomUUIDv7 } from "bun"; + +const runIntegration = + process.env.INSIGHTS_INTEGRATION_TESTS === "true" && hasTestDb; +const describeIntegration = runIntegration ? describe : describe.skip; + +describeIntegration("insights idempotency integration", () => { + beforeEach(async () => { + await truncatePostgres(); + }); + + afterAll(async () => { + await truncatePostgres(); + await shutdownPostgres(); + await closePostgres(); + }); + + it("upserts generated insights by organization dedupe key", async () => { + const org = await insertOrganization(); + const website = await insertWebsite({ organizationId: org.id }); + const firstRunId = randomUUIDv7(); + const secondRunId = randomUUIDv7(); + const dedupeKey = `integration:${randomUUIDv7()}`; + + await db().insert(insightRuns).values([ + { id: firstRunId, organizationId: org.id, reason: "manual" }, + { id: secondRunId, organizationId: org.id, reason: "manual" }, + ]); + + await db().insert(analyticsInsights).values( + insightRow({ + id: randomUUIDv7(), + runId: firstRunId, + organizationId: org.id, + websiteId: website.id, + dedupeKey, + title: "Original checkout signal", + }) + ); + + await db() + .insert(analyticsInsights) + .values( + insightRow({ + id: randomUUIDv7(), + runId: secondRunId, + organizationId: org.id, + websiteId: website.id, + dedupeKey, + title: "Updated checkout signal", + }) + ) + .onConflictDoUpdate({ + target: [analyticsInsights.organizationId, analyticsInsights.dedupeKey], + targetWhere: isNotNull(analyticsInsights.dedupeKey), + set: { + runId: secondRunId, + title: sql`excluded.title`, + }, + }); + + const rows = await db() + .select({ + id: analyticsInsights.id, + runId: analyticsInsights.runId, + title: analyticsInsights.title, + }) + .from(analyticsInsights) + .where(eq(analyticsInsights.organizationId, org.id)); + + expect(rows).toHaveLength(1); + expect(rows[0]).toMatchObject({ + runId: secondRunId, + title: "Updated checkout signal", + }); + }); +}); + +function insightRow(input: { + dedupeKey: string; + id: string; + organizationId: string; + runId: string; + title: string; + websiteId: string; +}): typeof analyticsInsights.$inferInsert { + return { + id: input.id, + organizationId: input.organizationId, + websiteId: input.websiteId, + runId: input.runId, + dedupeKey: input.dedupeKey, + title: input.title, + description: "A test insight description.", + suggestion: "Inspect the affected flow.", + severity: "warning", + sentiment: "negative", + type: "conversion_leak", + priority: 8, + changePercent: -12, + subjectKey: "checkout", + sources: ["web"], + confidence: 0.82, + impactSummary: "Checkout needs review.", + metrics: [{ label: "Errors", current: 12, previous: 6, format: "number" }], + timezone: "UTC", + currentPeriodFrom: "2026-01-01", + currentPeriodTo: "2026-01-08", + previousPeriodFrom: "2025-12-25", + previousPeriodTo: "2026-01-01", + }; +} diff --git a/apps/insights/src/index.ts b/apps/insights/src/index.ts new file mode 100644 index 000000000..6d43663c6 --- /dev/null +++ b/apps/insights/src/index.ts @@ -0,0 +1,200 @@ +import { setAiRequestLoggerProvider } from "@databuddy/ai/lib/request-logger"; +import { db, shutdownPostgres, sql } from "@databuddy/db"; +import { closeInsightsQueue, getInsightsQueue } from "@databuddy/redis"; +import { Elysia } from "elysia"; +import { initLogger } from "evlog"; +import { + captureInsightsError, + emitInsightsEvent, + flushBatchedInsightsDrain, + getActiveInsightsLog, + insightsLoggerDrain, +} from "./lib/evlog-insights"; +import { + ensureInsightsDispatchSchedule, + ensureInsightsMaintenanceSchedule, +} from "./scheduler"; +import { startInsightsWorker } from "./worker"; + +const environment = + process.env.APP_ENV ?? + process.env.RAILWAY_ENVIRONMENT_NAME ?? + (process.env.NODE_ENV === "development" ? "development" : "production"); +const workerEnabled = process.env.INSIGHTS_WORKER_ENABLED !== "false"; +const DRAIN_TIMEOUT_MS = 10_000; + +initLogger({ + env: { + service: "insights", + environment, + region: process.env.RAILWAY_REPLICA_REGION, + commitHash: process.env.RAILWAY_GIT_COMMIT_SHA, + }, + drain: insightsLoggerDrain, + sampling: {}, +}); + +setAiRequestLoggerProvider(getActiveInsightsLog); + +process.on("unhandledRejection", (reason) => { + captureInsightsError(reason, "process.unhandled_rejection", { + process: "unhandledRejection", + }); + exitAfterDrain(1); +}); + +process.on("uncaughtException", (error) => { + captureInsightsError(error, "process.uncaught_exception", { + process: "uncaughtException", + error_source: "process", + }); + exitAfterDrain(1); +}); + +let shuttingDown = false; +let insightsWorker: ReturnType | null = null; + +async function withTimeout( + promise: Promise, + timeoutMs: number +): Promise { + let timeout: ReturnType | undefined; + try { + return await Promise.race([ + promise, + new Promise((_, reject) => { + timeout = setTimeout( + () => reject(new Error("shutdown timeout")), + timeoutMs + ); + }), + ]); + } finally { + if (timeout) { + clearTimeout(timeout); + } + } +} + +async function drainAll() { + await withTimeout( + Promise.allSettled([ + insightsWorker?.close() ?? Promise.resolve(), + closeInsightsQueue(), + flushBatchedInsightsDrain(), + shutdownPostgres(), + ]), + DRAIN_TIMEOUT_MS + ).catch((error) => { + captureInsightsError(error, "lifecycle.shutdown_failed", { + lifecycle: "shutdown", + }); + }); +} + +function exitAfterDrain(code: number) { + if (shuttingDown) { + return; + } + shuttingDown = true; + drainAll() + .catch((error) => { + captureInsightsError(error, "lifecycle.shutdown_failed", { + lifecycle: "shutdown", + }); + }) + .finally(() => process.exit(code)); +} + +async function shutdown(signal: string) { + if (shuttingDown) { + return; + } + shuttingDown = true; + emitInsightsEvent("info", "lifecycle.shutdown_requested", { + lifecycle: "shutdown", + signal, + }); + await drainAll(); + process.exit(0); +} + +async function startRuntime() { + emitInsightsEvent("info", "lifecycle.starting", { + worker_enabled: workerEnabled, + }); + if (workerEnabled) { + insightsWorker = startInsightsWorker(); + await Promise.all([ + ensureInsightsDispatchSchedule(), + ensureInsightsMaintenanceSchedule(), + ]); + emitInsightsEvent("info", "lifecycle.started", { + worker_enabled: true, + }); + } else { + emitInsightsEvent("info", "lifecycle.disabled", { + worker_enabled: false, + }); + } +} + +startRuntime().catch((error) => { + captureInsightsError(error, "lifecycle.start_failed", { + lifecycle: "startup", + }); + exitAfterDrain(1); +}); + +process.on("SIGTERM", () => shutdown("SIGTERM")); +process.on("SIGINT", () => shutdown("SIGINT")); + +type ProbeResult = + | { status: "ok"; latency_ms: number } + | { status: "error"; latency_ms: number; error: string }; + +async function probe(fn: () => Promise): Promise { + const start = performance.now(); + try { + await fn(); + return { status: "ok", latency_ms: Math.round(performance.now() - start) }; + } catch (error) { + return { + status: "error", + latency_ms: Math.round(performance.now() - start), + error: error instanceof Error ? error.message : "unknown", + }; + } +} + +const app = new Elysia() + .onError(({ code, error }) => { + captureInsightsError(error, "http.error", { + elysia_code: String(code), + }); + }) + .get("/health/status", async () => { + const [postgres, bullmqRedis] = await Promise.all([ + probe(() => db.execute(sql`SELECT 1`).then(() => {})), + probe(async () => { + const client = await getInsightsQueue().client; + await client.ping(); + }), + ]); + + const services = { postgres, bullmqRedis }; + const status = Object.values(services).every((s) => s.status === "ok") + ? "ok" + : "degraded"; + + return Response.json( + { status, workerEnabled, services }, + { status: status === "ok" ? 200 : 503 } + ); + }) + .get("/health", () => ({ status: "ok", workerEnabled })); + +export default { + port: Number(process.env.PORT ?? 4002), + fetch: app.fetch, +}; diff --git a/apps/insights/src/jobs.ts b/apps/insights/src/jobs.ts new file mode 100644 index 000000000..45e02a058 --- /dev/null +++ b/apps/insights/src/jobs.ts @@ -0,0 +1,194 @@ +import { db, eq, sql } from "@databuddy/db"; +import { insightRunItems, insightRuns } from "@databuddy/db/schema"; +import { + INSIGHTS_DISPATCH_JOB_NAME, + INSIGHTS_GENERATE_WEBSITE_JOB_NAME, + INSIGHTS_MAINTENANCE_JOB_NAME, + INSIGHTS_QUEUE_NAME, + INSIGHTS_ROLLUP_JOB_NAME, + type InsightsGenerateWebsiteJobData, + type InsightsQueueJobData, + type InsightsRollupJobData, +} from "@databuddy/redis"; +import type { Job } from "bullmq"; +import { generateWebsiteInsights } from "./generation"; +import { + queueRollupIfSettled, + recoverStaleInsightRuns, + syncRunStatus, +} from "./recovery"; +import { + captureInsightsError, + createInsightsEventLog, + emitInsightsEvent, + setInsightsLog, + toError, + withInsightsLogContext, +} from "./lib/evlog-insights"; +import { processRollupJob } from "./rollup"; +import { dispatchDueInsightRuns } from "./scheduler"; + +function errorMessage(error: unknown): string { + return error instanceof Error ? error.message : String(error); +} + +function isFinalAttempt(job: Job): boolean { + return job.attemptsMade + 1 >= (job.opts.attempts ?? 1); +} + +function jobContext(job: Job) { + const data = job.data as Partial & + Partial & { reason?: string }; + return { + attempts_configured: job.opts.attempts, + attempts_made: job.attemptsMade, + job_id: job.id, + job_name: job.name, + organization_id: data.organizationId, + queue_name: INSIGHTS_QUEUE_NAME, + reason: data.reason, + run_id: data.runId, + website_id: data.websiteId, + }; +} + +async function processGenerateWebsiteJob( + data: InsightsGenerateWebsiteJobData, + job: Job +): Promise<{ resultCount: number; status: "skipped" | "succeeded" }> { + const now = new Date(); + await Promise.all([ + db + .update(insightRuns) + .set({ + status: "running", + startedAt: sql`coalesce(${insightRuns.startedAt}, ${now})`, + updatedAt: now, + }) + .where(eq(insightRuns.id, data.runId)), + db + .update(insightRunItems) + .set({ + attempts: job.attemptsMade + 1, + errorMessage: null, + finishedAt: null, + startedAt: now, + status: "running", + updatedAt: now, + }) + .where(eq(insightRunItems.id, data.itemId)), + ]); + + try { + const result = await generateWebsiteInsights({ + config: data.config, + organizationId: data.organizationId, + reason: data.reason, + requestedByUserId: data.requestedByUserId ?? null, + runId: data.runId, + websiteId: data.websiteId, + }); + + await db + .update(insightRunItems) + .set({ + errorMessage: result.message ?? null, + finishedAt: new Date(), + resultCount: result.resultCount, + status: result.status, + updatedAt: new Date(), + }) + .where(eq(insightRunItems.id, data.itemId)); + const summary = await syncRunStatus(data.runId); + setInsightsLog({ + run_status: summary.status, + run_completed_items: summary.completedItems, + run_failed_items: summary.failedItems, + run_skipped_items: summary.skippedItems, + run_total_items: summary.totalItems, + }); + await queueRollupIfSettled(summary); + return { resultCount: result.resultCount, status: result.status }; + } catch (error) { + const finalAttempt = isFinalAttempt(job); + const message = errorMessage(error); + await db + .update(insightRunItems) + .set({ + errorMessage: finalAttempt + ? message + : `Attempt ${job.attemptsMade + 1} failed, retrying: ${message}`, + finishedAt: finalAttempt ? new Date() : null, + status: finalAttempt ? "failed" : "queued", + updatedAt: new Date(), + }) + .where(eq(insightRunItems.id, data.itemId)); + const summary = await syncRunStatus(data.runId); + await queueRollupIfSettled(summary); + captureInsightsError(error, "job.generate_website.failed", { + ...jobContext(job), + item_id: data.itemId, + final_attempt: finalAttempt, + next_status: finalAttempt ? "failed" : "queued", + run_status: summary.status, + }); + throw error; + } +} + +export async function processInsightsJob(job: Job) { + const startedAt = performance.now(); + const context = jobContext(job); + const logger = createInsightsEventLog({ + ...context, + insights_event: "job.process", + }); + + return await withInsightsLogContext(logger, async () => { + emitInsightsEvent("info", "job.started", context); + try { + let result: unknown; + if (job.name === INSIGHTS_DISPATCH_JOB_NAME) { + result = await dispatchDueInsightRuns(); + } else if (job.name === INSIGHTS_MAINTENANCE_JOB_NAME) { + result = await recoverStaleInsightRuns(); + } else if (job.name === INSIGHTS_GENERATE_WEBSITE_JOB_NAME) { + result = await processGenerateWebsiteJob( + job.data as InsightsGenerateWebsiteJobData, + job + ); + } else if (job.name === INSIGHTS_ROLLUP_JOB_NAME) { + result = await processRollupJob(job.data as InsightsRollupJobData); + } else { + throw new Error(`Unknown insights job: ${job.name}`); + } + + const durationMs = Math.round(performance.now() - startedAt); + setInsightsLog({ + duration_ms: durationMs, + job_status: "succeeded", + }); + emitInsightsEvent("info", "job.completed", { + ...context, + duration_ms: durationMs, + }); + logger.emit({ duration_ms: durationMs, job_status: "succeeded" }); + return result; + } catch (error) { + const durationMs = Math.round(performance.now() - startedAt); + const err = toError(error); + logger.error(err); + logger.emit({ + duration_ms: durationMs, + error_message: err.message, + job_status: "failed", + _forceKeep: true, + }); + captureInsightsError(error, "job.failed", { + ...context, + duration_ms: durationMs, + }); + throw error; + } + }); +} diff --git a/apps/insights/src/lib/evlog-insights.ts b/apps/insights/src/lib/evlog-insights.ts new file mode 100644 index 000000000..c38751ac7 --- /dev/null +++ b/apps/insights/src/lib/evlog-insights.ts @@ -0,0 +1,142 @@ +import { AsyncLocalStorage } from "node:async_hooks"; +import { dirname, join } from "node:path"; +import { fileURLToPath } from "node:url"; +import { readBooleanEnv } from "@databuddy/env/boolean"; +import type { DrainContext, RequestLogger } from "evlog"; +import { createLogger, log } from "evlog"; +import { createAxiomDrain } from "evlog/axiom"; +import { createFsDrain } from "evlog/fs"; +import { createDrainPipeline } from "evlog/pipeline"; + +type PrimitiveLogValue = string | number | boolean; +type LogValue = PrimitiveLogValue | PrimitiveLogValue[]; +type LogFields = Record; +type LogLevel = "error" | "info" | "warn"; + +const activeInsightsLog = new AsyncLocalStorage(); + +const pipeline = createDrainPipeline({ + batch: { size: 50, intervalMs: 5000 }, + maxBufferSize: 2000, +}); + +const batchedAxiomDrain = pipeline(createAxiomDrain()); + +const fsDrain = + process.env.NODE_ENV === "development" || readBooleanEnv("INSIGHTS_EVLOG_FS") + ? createFsDrain({ + dir: join( + dirname(fileURLToPath(import.meta.url)), + "..", + "..", + ".evlog", + "logs" + ), + pretty: false, + }) + : null; + +const deploymentMeta: Record = {}; +if (process.env.RAILWAY_REPLICA_ID) { + deploymentMeta.instance_id = process.env.RAILWAY_REPLICA_ID; +} +if (process.env.RAILWAY_DEPLOYMENT_ID) { + deploymentMeta.deployment_id = process.env.RAILWAY_DEPLOYMENT_ID; +} + +function normalizeWideEventForAxiom(event: Record): void { + if (typeof event.error === "string") { + event.error_message = event.error; + event.error = undefined; + } + Object.assign(event, deploymentMeta); +} + +export async function insightsLoggerDrain(ctx: DrainContext): Promise { + normalizeWideEventForAxiom(ctx.event as Record); + + if (fsDrain) { + await fsDrain(ctx); + } + try { + await batchedAxiomDrain(ctx); + } catch { + // Drain failures must not break background workers. + } +} + +export async function flushBatchedInsightsDrain(): Promise { + await batchedAxiomDrain.flush(); +} + +export function createInsightsEventLog(fields: LogFields): RequestLogger { + return createLogger(cleanFields({ service: "insights", ...fields })); +} + +export function getActiveInsightsLog(): RequestLogger { + const logger = activeInsightsLog.getStore(); + if (!logger) { + throw new Error("No active insights evlog context"); + } + return logger; +} + +export async function withInsightsLogContext( + logger: RequestLogger, + fn: () => Promise +): Promise { + return await activeInsightsLog.run(logger, fn); +} + +export function setInsightsLog(fields: LogFields): void { + activeInsightsLog.getStore()?.set(cleanFields(fields)); +} + +export function emitInsightsEvent( + level: LogLevel, + event: string, + fields: LogFields = {} +): void { + const payload = cleanFields({ + service: "insights", + insights_event: event, + ...fields, + }); + + if (level === "error") { + log.error(payload); + return; + } + if (level === "warn") { + log.warn(payload); + return; + } + log.info(payload); +} + +export function captureInsightsError( + error: unknown, + event: string, + fields: LogFields = {} +): void { + const err = toError(error); + emitInsightsEvent("error", event, { + ...fields, + error_message: err.message, + error_stack: err.stack, + }); +} + +export function toError(error: unknown): Error { + return error instanceof Error ? error : new Error(String(error)); +} + +function cleanFields(fields: LogFields): Record { + const clean: Record = {}; + for (const [key, value] of Object.entries(fields)) { + if (value !== undefined && value !== null) { + clean[key] = value; + } + } + return clean; +} diff --git a/apps/insights/src/recovery.test.ts b/apps/insights/src/recovery.test.ts new file mode 100644 index 000000000..13e697733 --- /dev/null +++ b/apps/insights/src/recovery.test.ts @@ -0,0 +1,28 @@ +import { describe, expect, it } from "bun:test"; +import { INSIGHTS_JOB_TIMEOUT_MS } from "@databuddy/redis"; +import { + getInsightsMaintenanceIntervalMs, + getInsightsStaleItemMs, +} from "./recovery"; + +describe("insights recovery config", () => { + it("uses a five minute maintenance interval by default", () => { + expect(getInsightsMaintenanceIntervalMs(undefined)).toBe(300_000); + }); + + it("rejects maintenance intervals below one minute", () => { + expect(getInsightsMaintenanceIntervalMs("5000")).toBe(300_000); + }); + + it("uses a stale timeout above the BullMQ lock window by default", () => { + expect(getInsightsStaleItemMs(undefined)).toBeGreaterThan( + INSIGHTS_JOB_TIMEOUT_MS * 2 + ); + }); + + it("rejects stale item timeouts below the worker retry window", () => { + expect(getInsightsStaleItemMs(String(INSIGHTS_JOB_TIMEOUT_MS))).toBe( + getInsightsStaleItemMs(undefined) + ); + }); +}); diff --git a/apps/insights/src/recovery.ts b/apps/insights/src/recovery.ts new file mode 100644 index 000000000..9eb76ef34 --- /dev/null +++ b/apps/insights/src/recovery.ts @@ -0,0 +1,319 @@ +import { and, asc, db, eq, inArray, lt } from "@databuddy/db"; +import { + insightRunItems, + insightRuns, + type InsightRun, + type InsightRunItem, + type InsightRunStatus, +} from "@databuddy/db/schema"; +import { + getInsightsQueue, + INSIGHTS_JOB_TIMEOUT_MS, + INSIGHTS_ROLLUP_JOB_NAME, + insightsRollupJobId, +} from "@databuddy/redis"; +import { + captureInsightsError, + emitInsightsEvent, + setInsightsLog, +} from "./lib/evlog-insights"; + +const DEFAULT_MAINTENANCE_INTERVAL_MS = 5 * 60 * 1000; +const MIN_MAINTENANCE_INTERVAL_MS = 60 * 1000; +const DEFAULT_STALE_ITEM_MS = Math.max( + 15 * 60 * 1000, + INSIGHTS_JOB_TIMEOUT_MS * 4 +); +const MIN_STALE_ITEM_MS = INSIGHTS_JOB_TIMEOUT_MS * 2; +const MAX_STALE_ITEMS_PER_SWEEP = 100; +const MAX_STALE_RUNS_PER_SWEEP = 100; + +const ACTIVE_QUEUE_STATES = new Set([ + "active", + "delayed", + "prioritized", + "waiting", + "waiting-children", +]); + +type RecoverableItem = Pick< + InsightRunItem, + "id" | "queueJobId" | "runId" | "status" +>; + +interface RunStatusSummary { + completedItems: number; + failedItems: number; + queuedItems: number; + run: InsightRun | null; + runningItems: number; + settled: boolean; + skippedItems: number; + status: InsightRunStatus; + totalItems: number; +} + +export interface InsightRecoveryResult { + failedItems: number; + keptItems: number; + scannedItems: number; + scannedRuns: number; + syncedRuns: number; +} + +function parseDurationMs( + value: string | undefined, + fallback: number, + min: number +): number { + if (value === undefined || value.trim() === "") { + return fallback; + } + const parsed = Number.parseInt(value, 10); + if (!Number.isSafeInteger(parsed) || parsed < min) { + return fallback; + } + return parsed; +} + +export function getInsightsMaintenanceIntervalMs( + value = process.env.INSIGHTS_MAINTENANCE_INTERVAL_MS +): number { + return parseDurationMs( + value, + DEFAULT_MAINTENANCE_INTERVAL_MS, + MIN_MAINTENANCE_INTERVAL_MS + ); +} + +export function getInsightsStaleItemMs( + value = process.env.INSIGHTS_STALE_ITEM_MS +): number { + return parseDurationMs(value, DEFAULT_STALE_ITEM_MS, MIN_STALE_ITEM_MS); +} + +async function staleItemFailureReason( + item: RecoverableItem +): Promise { + if (!item.queueJobId) { + return "Insight queue job id is missing after stale timeout"; + } + + const job = await getInsightsQueue().getJob(item.queueJobId); + if (!job) { + return "Insight queue job is missing after stale timeout"; + } + + const state = await job.getState(); + if (ACTIVE_QUEUE_STATES.has(state)) { + return null; + } + return `Insight queue job is ${state} but the database item is still ${item.status}`; +} + +async function staleItems(cutoff: Date): Promise { + return await db + .select({ + id: insightRunItems.id, + queueJobId: insightRunItems.queueJobId, + runId: insightRunItems.runId, + status: insightRunItems.status, + }) + .from(insightRunItems) + .where( + and( + inArray(insightRunItems.status, ["queued", "running"]), + lt(insightRunItems.updatedAt, cutoff) + ) + ) + .orderBy(asc(insightRunItems.updatedAt)) + .limit(MAX_STALE_ITEMS_PER_SWEEP); +} + +async function staleRunIds(cutoff: Date): Promise { + const rows = await db + .select({ id: insightRuns.id }) + .from(insightRuns) + .where( + and( + inArray(insightRuns.status, ["queued", "running"]), + lt(insightRuns.updatedAt, cutoff) + ) + ) + .orderBy(asc(insightRuns.updatedAt)) + .limit(MAX_STALE_RUNS_PER_SWEEP); + + return rows.map((row) => row.id); +} + +export async function syncRunStatus(runId: string): Promise { + const [run, items] = await Promise.all([ + db.query.insightRuns.findFirst({ where: { id: runId } }), + db + .select({ status: insightRunItems.status }) + .from(insightRunItems) + .where(eq(insightRunItems.runId, runId)), + ]); + + const completedItems = items.filter( + (item) => item.status === "succeeded" + ).length; + const failedItems = items.filter((item) => item.status === "failed").length; + const queuedItems = items.filter((item) => item.status === "queued").length; + const runningItems = items.filter((item) => item.status === "running").length; + const skippedItems = items.filter((item) => item.status === "skipped").length; + const settledItems = completedItems + failedItems + skippedItems; + const totalItems = items.length; + const settled = settledItems === totalItems; + + let status: InsightRunStatus = + queuedItems === totalItems ? "queued" : "running"; + if (totalItems === 0) { + status = "skipped"; + } else if (settled) { + if (completedItems > 0 && failedItems === 0) { + status = "succeeded"; + } else if (completedItems > 0) { + status = "partially_succeeded"; + } else if (skippedItems === totalItems) { + status = "skipped"; + } else { + status = "failed"; + } + } + + const now = new Date(); + await db + .update(insightRuns) + .set({ + completedItems, + failedItems, + skippedItems, + status, + updatedAt: now, + ...(settled ? { finishedAt: now } : {}), + }) + .where(eq(insightRuns.id, runId)); + + setInsightsLog({ + run_status: status, + run_total_items: totalItems, + run_completed_items: completedItems, + run_failed_items: failedItems, + run_queued_items: queuedItems, + run_running_items: runningItems, + run_skipped_items: skippedItems, + run_settled: settled, + }); + + return { + completedItems, + failedItems, + queuedItems, + run: run ?? null, + runningItems, + settled, + skippedItems, + status, + totalItems, + }; +} + +export async function queueRollupIfSettled( + summary: RunStatusSummary +): Promise { + if (!(summary.run && summary.settled && summary.completedItems > 0)) { + return; + } + if ( + summary.status !== "succeeded" && + summary.status !== "partially_succeeded" + ) { + return; + } + + try { + await getInsightsQueue().add( + INSIGHTS_ROLLUP_JOB_NAME, + { + organizationId: summary.run.organizationId, + reason: summary.run.reason, + runId: summary.run.id, + timezone: summary.run.timezone, + }, + { jobId: insightsRollupJobId(summary.run.id) } + ); + emitInsightsEvent("info", "recovery.rollup_queued", { + run_id: summary.run.id, + organization_id: summary.run.organizationId, + run_status: summary.status, + completed_items: summary.completedItems, + }); + } catch (error) { + captureInsightsError(error, "recovery.rollup_queue_failed", { + run_id: summary.run.id, + organization_id: summary.run.organizationId, + }); + } +} + +export async function recoverStaleInsightRuns( + now = new Date() +): Promise { + const startedAt = performance.now(); + const cutoff = new Date(now.getTime() - getInsightsStaleItemMs()); + const items = await staleItems(cutoff); + const affectedRunIds = new Set(); + let failedItems = 0; + let keptItems = 0; + + for (const item of items) { + const reason = await staleItemFailureReason(item); + if (!reason) { + keptItems += 1; + continue; + } + + await db + .update(insightRunItems) + .set({ + errorMessage: reason, + finishedAt: now, + status: "failed", + updatedAt: now, + }) + .where(eq(insightRunItems.id, item.id)); + affectedRunIds.add(item.runId); + failedItems += 1; + emitInsightsEvent("warn", "recovery.stale_item_failed", { + item_id: item.id, + queue_job_id: item.queueJobId, + run_id: item.runId, + previous_status: item.status, + reason, + }); + } + + const runIds = new Set([...affectedRunIds, ...(await staleRunIds(cutoff))]); + + for (const runId of runIds) { + const summary = await syncRunStatus(runId); + await queueRollupIfSettled(summary); + } + + emitInsightsEvent("info", "recovery.sweep_completed", { + duration_ms: Math.round(performance.now() - startedAt), + failed_items: failedItems, + kept_items: keptItems, + scanned_items: items.length, + synced_runs: runIds.size, + }); + + return { + failedItems, + keptItems, + scannedItems: items.length, + scannedRuns: runIds.size, + syncedRuns: runIds.size, + }; +} diff --git a/apps/insights/src/rollup.test.ts b/apps/insights/src/rollup.test.ts new file mode 100644 index 000000000..1bfa5dd08 --- /dev/null +++ b/apps/insights/src/rollup.test.ts @@ -0,0 +1,61 @@ +import { describe, expect, it } from "bun:test"; +import { buildDeterministicRollupNarrative } from "./rollup"; + +describe("buildDeterministicRollupNarrative", () => { + it("returns a healthy fallback when no insights exist", () => { + expect(buildDeterministicRollupNarrative("7d", [])).toBe( + "All systems healthy this week. No actionable signals detected." + ); + }); + + it("summarizes the top signal with site context", () => { + const narrative = buildDeterministicRollupNarrative("30d", [ + { + title: "Checkout errors increased", + description: "Errors rose on checkout.", + suggestion: "Review checkout errors.", + severity: "critical", + sentiment: "negative", + priority: 9, + changePercent: 42, + websiteName: "App", + websiteDomain: "app.example.com", + }, + ]); + + expect(narrative).toBe( + "This month: Checkout errors increased (+42%) on App." + ); + }); + + it("mentions an additional signal when multiple cards exist", () => { + const narrative = buildDeterministicRollupNarrative("90d", [ + { + title: "Interactions got slower", + description: "INP regressed.", + suggestion: "Audit slow pages.", + severity: "warning", + sentiment: "negative", + priority: 8, + changePercent: null, + websiteName: null, + websiteDomain: "www.example.com", + }, + { + title: "Docs traffic improved", + description: "Organic sessions rose.", + suggestion: "Compare landing pages.", + severity: "info", + sentiment: "positive", + priority: 6, + changePercent: 18, + websiteName: "Docs", + websiteDomain: "docs.example.com", + }, + ]); + + expect(narrative).toBe( + "This quarter: Interactions got slower on www.example.com. Also review Docs traffic improved on Docs." + ); + }); +}); diff --git a/apps/insights/src/rollup.ts b/apps/insights/src/rollup.ts new file mode 100644 index 000000000..1fc5b6309 --- /dev/null +++ b/apps/insights/src/rollup.ts @@ -0,0 +1,279 @@ +import { ANTHROPIC_CACHE_1H, models } from "@databuddy/ai/config/models"; +import { getAILogger } from "@databuddy/ai/lib/ai-logger"; +import { and, db, desc, eq, gte, isNull, sql } from "@databuddy/db"; +import { + analyticsInsights, + insightRollups, + type InsightRollupRange, + websites, +} from "@databuddy/db/schema"; +import { + invalidateInsightsCachesForOrganization, + type InsightsRollupJobData, +} from "@databuddy/redis"; +import { generateText } from "ai"; +import { randomUUIDv7 } from "bun"; +import dayjs from "dayjs"; +import { captureInsightsError, emitInsightsEvent } from "./lib/evlog-insights"; + +const ROLLUP_RANGES = ["7d", "30d", "90d"] as const; +const RANGE_TO_DAYS: Record = { + "7d": 7, + "30d": 30, + "90d": 90, +}; +const RANGE_TO_LABEL: Record = { + "7d": "week", + "30d": "month", + "90d": "quarter", +}; +const ROLLUP_INSIGHT_LIMIT = 12; +const MAX_NARRATIVE_LENGTH = 700; + +export interface RollupInsightSummary { + changePercent: number | null; + description: string; + priority: number; + sentiment: string; + severity: string; + suggestion: string; + title: string; + websiteDomain: string; + websiteName: string | null; +} + +function sanitizeNarrative(value: string): string { + const text = value.replace(/\s+/g, " ").trim(); + if (text.length <= MAX_NARRATIVE_LENGTH) { + return text; + } + return `${text.slice(0, MAX_NARRATIVE_LENGTH - 3).trimEnd()}...`; +} + +export function buildDeterministicRollupNarrative( + range: InsightRollupRange, + insights: RollupInsightSummary[] +): string { + const label = RANGE_TO_LABEL[range]; + const headline = insights[0]; + if (!headline) { + return `All systems healthy this ${label}. No actionable signals detected.`; + } + + const siteName = headline.websiteName ?? headline.websiteDomain; + const change = + headline.changePercent == null + ? "" + : ` (${headline.changePercent > 0 ? "+" : ""}${headline.changePercent.toFixed(0)}%)`; + const opener = `This ${label}: ${headline.title}${change} on ${siteName}.`; + if (insights.length === 1) { + return opener; + } + + const extra = insights.length - 1; + const second = insights[1]; + const secondSite = second.websiteName ?? second.websiteDomain; + if (extra === 1) { + return `${opener} Also review ${second.title} on ${secondSite}.`; + } + const remaining = extra - 1; + return `${opener} Also review ${second.title} on ${secondSite}, plus ${remaining} more signal${remaining === 1 ? "" : "s"}.`; +} + +async function fetchRollupInsights( + organizationId: string, + range: InsightRollupRange +): Promise { + const cutoff = dayjs() + .subtract(RANGE_TO_DAYS[range], "day") + .format("YYYY-MM-DD"); + const rows = await db + .select({ + title: analyticsInsights.title, + description: analyticsInsights.description, + suggestion: analyticsInsights.suggestion, + severity: analyticsInsights.severity, + sentiment: analyticsInsights.sentiment, + priority: analyticsInsights.priority, + changePercent: analyticsInsights.changePercent, + createdAt: analyticsInsights.createdAt, + websiteName: websites.name, + websiteDomain: websites.domain, + }) + .from(analyticsInsights) + .innerJoin(websites, eq(analyticsInsights.websiteId, websites.id)) + .where( + and( + eq(analyticsInsights.organizationId, organizationId), + gte(analyticsInsights.currentPeriodTo, cutoff), + isNull(websites.deletedAt) + ) + ) + .orderBy( + desc(analyticsInsights.priority), + desc(analyticsInsights.createdAt) + ) + .limit(ROLLUP_INSIGHT_LIMIT); + + return rows.map((row) => ({ + title: row.title, + description: row.description, + suggestion: row.suggestion, + severity: row.severity, + sentiment: row.sentiment, + priority: row.priority, + changePercent: row.changePercent, + websiteName: row.websiteName, + websiteDomain: row.websiteDomain, + })); +} + +async function generateRollupNarrative( + range: InsightRollupRange, + organizationId: string, + insights: RollupInsightSummary[] +): Promise { + const fallback = buildDeterministicRollupNarrative(range, insights); + if (insights.length === 0) { + emitInsightsEvent("info", "rollup.narrative_deterministic_empty", { + organization_id: organizationId, + range, + }); + return fallback; + } + + try { + const startedAt = performance.now(); + const ai = getAILogger(); + const result = await generateText({ + model: ai.wrap(models.balanced), + messages: [ + { + role: "system", + content: + "Write one compact Databuddy executive analytics brief from stored insight cards. Use only the supplied cards. Be specific, operational, and plain English. Mention the most important website names. Do not invent causes, revenue, user counts, or metrics. Return one paragraph under 90 words.", + providerOptions: ANTHROPIC_CACHE_1H, + }, + { + role: "user", + content: JSON.stringify( + { + range, + insights, + }, + null, + 2 + ), + }, + ], + temperature: 0.2, + maxOutputTokens: 512, + abortSignal: AbortSignal.timeout(30_000), + experimental_telemetry: { + isEnabled: true, + functionId: "databuddy.insights.worker.rollup", + metadata: { + source: "insights_worker", + feature: "smart_insights", + organizationId, + range, + }, + }, + }); + + const text = sanitizeNarrative(result.text); + emitInsightsEvent("info", "rollup.narrative_generation_completed", { + organization_id: organizationId, + range, + duration_ms: Math.round(performance.now() - startedAt), + insight_count: insights.length, + used_fallback: text.length === 0, + }); + return text || fallback; + } catch (error) { + captureInsightsError(error, "rollup.narrative_generation_failed", { + organization_id: organizationId, + range, + }); + return fallback; + } +} + +async function persistRollup(input: { + generatedAt: Date; + narrative: string; + organizationId: string; + range: InsightRollupRange; + runId: string | null; +}): Promise { + await db + .insert(insightRollups) + .values({ + id: randomUUIDv7(), + organizationId: input.organizationId, + runId: input.runId, + range: input.range, + narrative: input.narrative, + generatedAt: input.generatedAt, + updatedAt: input.generatedAt, + }) + .onConflictDoUpdate({ + target: [insightRollups.organizationId, insightRollups.range], + set: { + runId: input.runId, + narrative: sql.raw("excluded.narrative"), + generatedAt: input.generatedAt, + updatedAt: input.generatedAt, + }, + }); +} + +async function generateRangeRollup( + data: InsightsRollupJobData, + range: InsightRollupRange, + generatedAt: Date +): Promise { + const startedAt = performance.now(); + const insights = await fetchRollupInsights(data.organizationId, range); + const narrative = await generateRollupNarrative( + range, + data.organizationId, + insights + ); + + await persistRollup({ + generatedAt, + narrative, + organizationId: data.organizationId, + range, + runId: data.runId, + }); + emitInsightsEvent("info", "rollup.range_completed", { + organization_id: data.organizationId, + run_id: data.runId, + range, + duration_ms: Math.round(performance.now() - startedAt), + insight_count: insights.length, + }); +} + +export async function processRollupJob( + data: InsightsRollupJobData +): Promise<{ ranges: number; status: "succeeded" }> { + const startedAt = performance.now(); + const generatedAt = new Date(); + await Promise.all( + ROLLUP_RANGES.map((range) => generateRangeRollup(data, range, generatedAt)) + ); + await invalidateInsightsCachesForOrganization(data.organizationId); + + emitInsightsEvent("info", "rollup.job_completed", { + organization_id: data.organizationId, + run_id: data.runId, + reason: data.reason, + duration_ms: Math.round(performance.now() - startedAt), + ranges: ROLLUP_RANGES.length, + }); + + return { status: "succeeded", ranges: ROLLUP_RANGES.length }; +} diff --git a/apps/insights/src/scheduler.integration.test.ts b/apps/insights/src/scheduler.integration.test.ts new file mode 100644 index 000000000..ef436175e --- /dev/null +++ b/apps/insights/src/scheduler.integration.test.ts @@ -0,0 +1,222 @@ +import "@databuddy/test/env"; +import { afterAll, afterEach, beforeEach, describe, expect, it } from "bun:test"; +import { db as appDb, shutdownPostgres } from "@databuddy/db"; +import { + insightGenerationConfigs, + insightRunItems, + insightRuns, +} from "@databuddy/db/schema"; +import { + closeInsightsQueue, + getInsightsQueue, + type InsightsGenerateWebsiteJobData, +} from "@databuddy/redis"; +import { + closePostgres, + db, + hasTestDb, + insertOrganization, + insertWebsite, + truncatePostgres, +} from "@databuddy/test"; +import { and, asc, eq, isNull } from "drizzle-orm"; +import { randomUUIDv7 } from "bun"; +import { dispatchDueInsightRuns } from "./scheduler"; + +const runIntegration = + process.env.INSIGHTS_INTEGRATION_TESTS === "true" && hasTestDb; +const describeIntegration = runIntegration ? describe : describe.skip; + +describeIntegration("insights scheduler integration", () => { + const organizationIds = new Set(); + + beforeEach(async () => { + await truncatePostgres(); + }); + + afterEach(async () => { + await cleanupQueueJobs(); + await truncatePostgres(); + organizationIds.clear(); + }); + + afterAll(async () => { + await cleanupQueueJobs(); + await closeInsightsQueue(); + await shutdownPostgres(); + await closePostgres(); + }); + + it("dispatches an org config only to websites without website overrides", async () => { + const org = await insertOrganization(); + organizationIds.add(org.id); + const included = await insertWebsite({ + organizationId: org.id, + domain: "included.example.com", + }); + const overridden = await insertWebsite({ + organizationId: org.id, + domain: "overridden.example.com", + }); + const now = new Date(); + + await db().insert(insightGenerationConfigs).values([ + { + id: randomUUIDv7(), + organizationId: org.id, + websiteId: null, + enabled: true, + frequency: "daily", + nextRunAt: new Date(now.getTime() - 1000), + }, + { + id: randomUUIDv7(), + organizationId: org.id, + websiteId: overridden.id, + enabled: true, + frequency: "weekly", + nextRunAt: new Date(now.getTime() + 86_400_000), + }, + ]); + + const result = await dispatchDueInsightRuns(now); + + expect(result).toMatchObject({ + scannedConfigs: 1, + claimedConfigs: 1, + dispatchedRuns: 1, + queuedItems: 1, + skippedConfigs: 0, + }); + + const runs = await runsForOrg(org.id); + expect(runs).toHaveLength(1); + expect(runs[0]).toMatchObject({ + organizationId: org.id, + reason: "scheduled", + status: "queued", + totalItems: 1, + }); + + const items = await itemsForRun(runs[0].id); + expect(items.map((item) => item.websiteId)).toEqual([included.id]); + + const jobs = await queueJobsForOrg(org.id); + expect(jobs).toHaveLength(1); + expect(jobs[0]?.name).toBe("insights-generate-website"); + expect(jobs[0]?.data.websiteId).toBe(included.id); + expect(jobs[0]?.data.runId).toBe(runs[0].id); + + const [config] = await db() + .select({ + lastRunAt: insightGenerationConfigs.lastRunAt, + nextRunAt: insightGenerationConfigs.nextRunAt, + }) + .from(insightGenerationConfigs) + .where( + and( + eq(insightGenerationConfigs.organizationId, org.id), + isNull(insightGenerationConfigs.websiteId) + ) + ) + .limit(1); + + expect(config?.lastRunAt?.getTime()).toBe(now.getTime()); + expect(config?.nextRunAt && config.nextRunAt.getTime() > now.getTime()).toBe( + true + ); + }); + + it("dispatches due website configs independently", async () => { + const org = await insertOrganization(); + organizationIds.add(org.id); + const website = await insertWebsite({ + organizationId: org.id, + domain: "website-scope.example.com", + }); + const now = new Date(); + + await db().insert(insightGenerationConfigs).values({ + id: randomUUIDv7(), + organizationId: org.id, + websiteId: website.id, + enabled: true, + frequency: "hourly", + nextRunAt: new Date(now.getTime() - 1000), + }); + + const result = await dispatchDueInsightRuns(now); + + expect(result).toMatchObject({ + scannedConfigs: 1, + claimedConfigs: 1, + dispatchedRuns: 1, + queuedItems: 1, + skippedConfigs: 0, + }); + + const runs = await runsForOrg(org.id); + const items = await itemsForRun(runs[0].id); + const jobs = await queueJobsForOrg(org.id); + + expect(items.map((item) => item.websiteId)).toEqual([website.id]); + expect(jobs).toHaveLength(1); + expect(jobs[0]?.data.websiteId).toBe(website.id); + }); + + async function runsForOrg(organizationId: string) { + return await appDb + .select() + .from(insightRuns) + .where(eq(insightRuns.organizationId, organizationId)) + .orderBy(asc(insightRuns.createdAt)); + } + + async function itemsForRun(runId: string) { + return await appDb + .select() + .from(insightRunItems) + .where(eq(insightRunItems.runId, runId)) + .orderBy(asc(insightRunItems.websiteId)); + } + + async function queueJobsForOrg(organizationId: string) { + const jobs = await getInsightsQueue().getJobs( + ["waiting", "delayed", "prioritized", "paused", "completed", "failed"], + 0, + -1 + ); + return jobs + .filter((job) => { + const data = job.data as Partial; + return data.organizationId === organizationId; + }) + .sort((a, b) => + String(a.data.websiteId ?? "").localeCompare( + String(b.data.websiteId ?? "") + ) + ); + } + + async function cleanupQueueJobs(): Promise { + if (organizationIds.size === 0) { + return; + } + const jobs = await getInsightsQueue().getJobs( + ["waiting", "delayed", "prioritized", "paused", "completed", "failed"], + 0, + -1 + ); + await Promise.allSettled( + jobs + .filter((job) => { + const data = job.data as Partial; + return ( + typeof data.organizationId === "string" && + organizationIds.has(data.organizationId) + ); + }) + .map((job) => job.remove()) + ); + } +}); diff --git a/apps/insights/src/scheduler.ts b/apps/insights/src/scheduler.ts new file mode 100644 index 000000000..b3d19c799 --- /dev/null +++ b/apps/insights/src/scheduler.ts @@ -0,0 +1,293 @@ +import { and, asc, db, eq, isNotNull, isNull, lte } from "@databuddy/db"; +import { + insightGenerationConfigs, + websites, + type InsightGenerationFrequency, +} from "@databuddy/db/schema"; +import { queueInsightGenerationRun } from "@databuddy/rpc/insight-generation"; +import { getNextInsightRunAt } from "@databuddy/rpc/insight-schedule"; +import { + getInsightsQueue, + INSIGHTS_DISPATCH_JOB_NAME, + INSIGHTS_MAINTENANCE_JOB_NAME, + type InsightGenerationReason, +} from "@databuddy/redis"; +import { captureInsightsError, emitInsightsEvent } from "./lib/evlog-insights"; +import { getInsightsMaintenanceIntervalMs } from "./recovery"; + +const DEFAULT_DISPATCH_INTERVAL_MS = 5 * 60 * 1000; +const MIN_DISPATCH_INTERVAL_MS = 60 * 1000; +const MAX_DUE_CONFIGS_PER_TICK = 100; +const FAILED_DISPATCH_RETRY_MS = 60 * 1000; + +type DueConfig = typeof insightGenerationConfigs.$inferSelect; + +export interface DispatchDueInsightRunsResult { + claimedConfigs: number; + dispatchedRuns: number; + queuedItems: number; + scannedConfigs: number; + skippedConfigs: number; +} + +function dispatchIntervalMs(): number { + const raw = process.env.INSIGHTS_DISPATCH_INTERVAL_MS; + if (!raw) { + return DEFAULT_DISPATCH_INTERVAL_MS; + } + const parsed = Number.parseInt(raw, 10); + if (!Number.isSafeInteger(parsed) || parsed < MIN_DISPATCH_INTERVAL_MS) { + return DEFAULT_DISPATCH_INTERVAL_MS; + } + return parsed; +} + +function nextRunAtFor(config: DueConfig, from: Date): Date | null { + return getNextInsightRunAt( + { + cron: config.cron, + enabled: config.enabled, + frequency: config.frequency as InsightGenerationFrequency, + timezone: config.timezone, + }, + from + ); +} + +async function dueConfigs(now: Date): Promise { + return await db + .select() + .from(insightGenerationConfigs) + .where( + and( + eq(insightGenerationConfigs.enabled, true), + lte(insightGenerationConfigs.nextRunAt, now) + ) + ) + .orderBy(asc(insightGenerationConfigs.nextRunAt)) + .limit(MAX_DUE_CONFIGS_PER_TICK); +} + +async function claimConfig( + config: DueConfig, + now: Date +): Promise { + const [claimed] = await db + .update(insightGenerationConfigs) + .set({ + nextRunAt: nextRunAtFor(config, now), + updatedAt: now, + }) + .where( + and( + eq(insightGenerationConfigs.id, config.id), + eq(insightGenerationConfigs.enabled, true), + lte(insightGenerationConfigs.nextRunAt, now) + ) + ) + .returning(); + + return claimed ?? null; +} + +async function markConfigDispatched( + configId: string, + now: Date +): Promise { + await db + .update(insightGenerationConfigs) + .set({ lastRunAt: now, updatedAt: now }) + .where(eq(insightGenerationConfigs.id, configId)); +} + +async function retryConfigSoon(configId: string, now: Date): Promise { + await db + .update(insightGenerationConfigs) + .set({ + nextRunAt: new Date(now.getTime() + FAILED_DISPATCH_RETRY_MS), + updatedAt: now, + }) + .where(eq(insightGenerationConfigs.id, configId)); +} + +async function websiteIdsWithOverrides( + organizationId: string +): Promise> { + const rows = await db + .select({ websiteId: insightGenerationConfigs.websiteId }) + .from(insightGenerationConfigs) + .where( + and( + eq(insightGenerationConfigs.organizationId, organizationId), + isNotNull(insightGenerationConfigs.websiteId) + ) + ); + + const ids = new Set(); + for (const row of rows) { + if (row.websiteId) { + ids.add(row.websiteId); + } + } + return ids; +} + +async function orgScheduledWebsiteIds( + organizationId: string +): Promise { + const overrideIds = await websiteIdsWithOverrides(organizationId); + const rows = await db + .select({ id: websites.id }) + .from(websites) + .where( + and( + eq(websites.organizationId, organizationId), + isNull(websites.deletedAt) + ) + ) + .orderBy(asc(websites.createdAt)); + + return rows + .map((row) => row.id) + .filter((websiteId) => !overrideIds.has(websiteId)); +} + +async function targetWebsiteIds(config: DueConfig): Promise { + if (config.websiteId) { + const [website] = await db + .select({ id: websites.id }) + .from(websites) + .where( + and( + eq(websites.id, config.websiteId), + eq(websites.organizationId, config.organizationId), + isNull(websites.deletedAt) + ) + ) + .limit(1); + return website ? [website.id] : []; + } + return await orgScheduledWebsiteIds(config.organizationId); +} + +export async function ensureInsightsDispatchSchedule(): Promise { + const intervalMs = dispatchIntervalMs(); + await getInsightsQueue().upsertJobScheduler( + INSIGHTS_DISPATCH_JOB_NAME, + { every: intervalMs }, + { + name: INSIGHTS_DISPATCH_JOB_NAME, + data: { + reason: "scheduled", + triggeredAt: new Date().toISOString(), + }, + } + ); + + emitInsightsEvent("info", "scheduler.dispatch_ensured", { + interval_ms: intervalMs, + }); +} + +export async function ensureInsightsMaintenanceSchedule(): Promise { + const intervalMs = getInsightsMaintenanceIntervalMs(); + await getInsightsQueue().upsertJobScheduler( + INSIGHTS_MAINTENANCE_JOB_NAME, + { every: intervalMs }, + { + name: INSIGHTS_MAINTENANCE_JOB_NAME, + data: { + reason: "maintenance", + triggeredAt: new Date().toISOString(), + }, + } + ); + + emitInsightsEvent("info", "scheduler.maintenance_ensured", { + interval_ms: intervalMs, + }); +} + +export async function dispatchDueInsightRuns( + now = new Date() +): Promise { + const startedAt = performance.now(); + const configs = await dueConfigs(now); + const result: DispatchDueInsightRunsResult = { + scannedConfigs: configs.length, + claimedConfigs: 0, + dispatchedRuns: 0, + queuedItems: 0, + skippedConfigs: 0, + }; + + for (const config of configs) { + const claimed = await claimConfig(config, now); + if (!claimed) { + result.skippedConfigs += 1; + continue; + } + result.claimedConfigs += 1; + + try { + const websiteIds = await targetWebsiteIds(claimed); + if (websiteIds.length === 0) { + await markConfigDispatched(claimed.id, now); + result.skippedConfigs += 1; + emitInsightsEvent("warn", "scheduler.config_skipped_no_targets", { + config_id: claimed.id, + organization_id: claimed.organizationId, + website_id: claimed.websiteId, + }); + continue; + } + + const queued = await queueInsightGenerationRun({ + organizationId: claimed.organizationId, + reason: "scheduled" satisfies InsightGenerationReason, + websiteIds, + }); + if (queued.reusedRun) { + await retryConfigSoon(claimed.id, now); + result.skippedConfigs += 1; + emitInsightsEvent("warn", "scheduler.config_skipped_active_run", { + config_id: claimed.id, + organization_id: claimed.organizationId, + website_id: claimed.websiteId, + run_id: queued.runId, + }); + continue; + } + await markConfigDispatched(claimed.id, now); + result.dispatchedRuns += 1; + result.queuedItems += queued.queuedItems; + emitInsightsEvent("info", "scheduler.config_dispatched", { + config_id: claimed.id, + organization_id: claimed.organizationId, + website_id: claimed.websiteId, + target_website_count: websiteIds.length, + queued_items: queued.queuedItems, + run_id: queued.runId, + }); + } catch (error) { + await retryConfigSoon(claimed.id, now); + result.skippedConfigs += 1; + captureInsightsError(error, "scheduler.config_dispatch_failed", { + config_id: claimed.id, + organization_id: claimed.organizationId, + website_id: claimed.websiteId, + }); + } + } + + emitInsightsEvent("info", "scheduler.dispatch_tick.completed", { + duration_ms: Math.round(performance.now() - startedAt), + scanned_configs: result.scannedConfigs, + claimed_configs: result.claimedConfigs, + dispatched_runs: result.dispatchedRuns, + queued_items: result.queuedItems, + skipped_configs: result.skippedConfigs, + }); + + return result; +} diff --git a/apps/insights/src/worker.ts b/apps/insights/src/worker.ts new file mode 100644 index 000000000..86b74043d --- /dev/null +++ b/apps/insights/src/worker.ts @@ -0,0 +1,87 @@ +import { + getBullMQWorkerConnectionOptions, + INSIGHTS_JOB_TIMEOUT_MS, + INSIGHTS_QUEUE_ENV_PREFIX, + INSIGHTS_QUEUE_NAME, + type InsightsQueueJobData, +} from "@databuddy/redis"; +import { Worker } from "bullmq"; +import { processInsightsJob } from "./jobs"; +import { emitInsightsEvent } from "./lib/evlog-insights"; + +const DEFAULT_INSIGHTS_WORKER_CONCURRENCY = 5; + +export function getInsightsWorkerConcurrency( + value = process.env.INSIGHTS_WORKER_CONCURRENCY +): number { + if (value === undefined || value.trim() === "") { + return DEFAULT_INSIGHTS_WORKER_CONCURRENCY; + } + + const parsed = Number.parseInt(value, 10); + if (!Number.isSafeInteger(parsed) || parsed <= 0) { + return DEFAULT_INSIGHTS_WORKER_CONCURRENCY; + } + + return parsed; +} + +export function startInsightsWorker() { + const concurrency = getInsightsWorkerConcurrency(); + emitInsightsEvent("info", "worker.starting", { + queue_name: INSIGHTS_QUEUE_NAME, + concurrency, + lock_duration_ms: INSIGHTS_JOB_TIMEOUT_MS * 2, + stalled_interval_ms: INSIGHTS_JOB_TIMEOUT_MS * 3, + }); + + const worker = new Worker( + INSIGHTS_QUEUE_NAME, + async (job) => await processInsightsJob(job), + { + connection: getBullMQWorkerConnectionOptions({ + envPrefix: INSIGHTS_QUEUE_ENV_PREFIX, + }), + concurrency, + lockDuration: INSIGHTS_JOB_TIMEOUT_MS * 2, + stalledInterval: INSIGHTS_JOB_TIMEOUT_MS * 3, + } + ); + + worker.on("failed", (job, error) => { + emitInsightsEvent("error", "worker.job_failed", { + error_message: error.message, + error_stack: error.stack, + job_id: job?.id, + job_name: job?.name, + attempts_made: job?.attemptsMade ?? 0, + }); + }); + + worker.on("completed", (job) => { + emitInsightsEvent("info", "worker.job_completed", { + job_id: job.id, + job_name: job.name, + attempts_made: job.attemptsMade, + duration_ms: + job.finishedOn && job.processedOn + ? job.finishedOn - job.processedOn + : undefined, + }); + }); + + worker.on("stalled", (jobId) => { + emitInsightsEvent("error", "worker.job_stalled", { + job_id: jobId, + }); + }); + + worker.on("error", (error) => { + emitInsightsEvent("error", "worker.error", { + error_message: error.message, + error_stack: error.stack, + }); + }); + + return worker; +} diff --git a/apps/insights/tsconfig.json b/apps/insights/tsconfig.json new file mode 100644 index 000000000..5bdc61c25 --- /dev/null +++ b/apps/insights/tsconfig.json @@ -0,0 +1,12 @@ +{ + "extends": "../../tsconfig/default.json", + "include": ["src/**/*"], + "exclude": [ + "node_modules", + "dist", + "**/*.test.ts", + "**/*.test.tsx", + "**/*.spec.ts", + "**/*.spec.tsx" + ] +} diff --git a/apps/status/app/[slug]/_components/monitor-card-interactive.tsx b/apps/status/app/[slug]/_components/monitor-card-interactive.tsx new file mode 100644 index 000000000..c5181b594 --- /dev/null +++ b/apps/status/app/[slug]/_components/monitor-card-interactive.tsx @@ -0,0 +1,119 @@ +"use client"; + +import { useId, useMemo, useState } from "react"; +import { cn } from "@databuddy/ui"; +import { CaretDownIcon } from "@databuddy/ui/icons"; +import { + type MonitorDailyData, + MonitorRowInteractive, +} from "./monitor-row-interactive"; + +export interface MonitorCardInteractiveProps { + anchorId: string; + dailyData: MonitorDailyData; + days: number; + domain?: string; + id: string; + name: string; + uptimePercentage?: number; +} + +function uptimeColor(pct: number): string { + if (pct >= 99.9) { + return "text-emerald-600 dark:text-emerald-400"; + } + if (pct >= 99) { + return "text-amber-600 dark:text-amber-400"; + } + return "text-red-600 dark:text-red-400"; +} + +export function MonitorCardInteractive({ + anchorId, + dailyData, + days, + domain, + id, + name, + uptimePercentage, +}: MonitorCardInteractiveProps) { + const [isOpen, setIsOpen] = useState(true); + const panelId = useId(); + const hasLatencyData = useMemo( + () => + dailyData.some( + (d) => d.avg_response_time != null || d.p95_response_time != null + ), + [dailyData] + ); + + return ( +
+ + +
+
+
+ +
+
+
+
+ ); +} diff --git a/apps/status/app/[slug]/_components/monitor-row-interactive.tsx b/apps/status/app/[slug]/_components/monitor-row-interactive.tsx index a656be98e..2b99fd16a 100644 --- a/apps/status/app/[slug]/_components/monitor-row-interactive.tsx +++ b/apps/status/app/[slug]/_components/monitor-row-interactive.tsx @@ -1,13 +1,8 @@ "use client"; import dynamic from "next/dynamic"; -import { useMemo } from "react"; -import { formatDateOnly, localDayjs } from "@databuddy/ui"; -import { - buildUptimeHeatmapDays, - UptimeHeatmapStrip, - LatencyChartChunkPlaceholder, -} from "@databuddy/ui/uptime"; +import { LatencyChartChunkPlaceholder } from "@databuddy/ui/uptime"; +import { UptimeHistory } from "./uptime-history"; const LatencyChart = dynamic( () => @@ -20,87 +15,41 @@ const LatencyChart = dynamic( } ); +export type MonitorDailyData = Array<{ + avg_response_time?: number; + date: string; + downtime_seconds?: number; + p95_response_time?: number; + successful_checks?: number; + total_checks?: number; + uptime_percentage?: number; +}>; + interface MonitorRowInteractiveProps { - dailyData: Array<{ - avg_response_time?: number; - date: string; - p95_response_time?: number; - uptime_percentage?: number; - }>; + dailyData: MonitorDailyData; days: number; hasLatencyData: boolean; hasUptimeData?: boolean; id: string; } -interface MonthMarker { - label: string; - offset: number; -} - -function buildMonthMarkers(days: number): MonthMarker[] { - const today = localDayjs().endOf("day"); - const markers: MonthMarker[] = []; - let prevMonth = -1; - - for (let i = 0; i < days; i++) { - const date = today.subtract(days - 1 - i, "day"); - const month = date.month(); - - if (month !== prevMonth && i > 0) { - markers.push({ - label: date.format("MMM"), - offset: (i / days) * 100, - }); - } - prevMonth = month; - } - - return markers; -} - export function MonitorRowInteractive({ - id, dailyData, days, hasLatencyData, hasUptimeData = true, + id, }: MonitorRowInteractiveProps) { - const heatmapData = useMemo( - () => buildUptimeHeatmapDays(dailyData, days), - [dailyData, days] - ); - - const monthMarkers = useMemo(() => buildMonthMarkers(days), [days]); - return ( <> {hasUptimeData ? ( -
- formatDateOnly(d)} - interactive - isActive - stripClassName="flex h-7 w-full gap-[1px] sm:gap-[2px]" - /> -
- {monthMarkers.map((marker) => ( - - {marker.label} - - ))} -
-
+ ) : null} {hasLatencyData ? ( - +
+ +
) : null} ); diff --git a/apps/status/app/[slug]/_components/status-navbar.tsx b/apps/status/app/[slug]/_components/status-navbar.tsx index 06922b683..f0ca35f6f 100644 --- a/apps/status/app/[slug]/_components/status-navbar.tsx +++ b/apps/status/app/[slug]/_components/status-navbar.tsx @@ -3,43 +3,49 @@ import { LifebuoyIcon } from "@databuddy/ui/icons"; interface StatusNavbarProps { logoUrl?: string | null; + name: string; supportUrl?: string | null; websiteUrl?: string | null; } export function StatusNavbar({ logoUrl, + name, websiteUrl, supportUrl, }: StatusNavbarProps) { const logo = logoUrl ? ( ) : null; + const brand = ( + + {logo} + + {name} + + + ); return ( -
-