diff --git a/.env.example b/.env.example index 6c249675..2a0b5b04 100644 --- a/.env.example +++ b/.env.example @@ -10,11 +10,13 @@ DIFFBOT_API_KEY= UPSTASH_REDIS_REST_URL= UPSTASH_REDIS_REST_TOKEN= -# Analytics (required) -CLICKHOUSE_URL=http://localhost:8123 -CLICKHOUSE_USER=default -CLICKHOUSE_PASSWORD=clickhouse -CLICKHOUSE_DATABASE=smry_analytics +# Analytics - PostHog (required) +POSTHOG_API_KEY=phc_xxx +POSTHOG_HOST=https://us.i.posthog.com +POSTHOG_PROJECT_ID=12345 +POSTHOG_PERSONAL_API_KEY=phx_xxx +NEXT_PUBLIC_POSTHOG_KEY=phc_xxx +NEXT_PUBLIC_POSTHOG_HOST=https://us.i.posthog.com # Alerting ALERT_EMAIL= diff --git a/CLAUDE.md b/CLAUDE.md index deeadf41..1033c28b 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -9,7 +9,8 @@ Article reader and summarizer with AI chat. - **Backend**: Elysia (Bun-native web framework) - **AI/LLM**: OpenRouter (Vercel AI SDK for streaming) - **Auth**: Clerk (billing + JWT) -- **Database**: ClickHouse (analytics), Upstash Redis (rate limiting, chat thread storage) +- **Analytics**: PostHog (product analytics, session recording, heatmaps, LLM analytics) +- **Database**: Upstash Redis (rate limiting, chat thread storage) - **Client Storage**: IndexedDB (offline-first chat threads), localStorage (article history, preferences) ## Project Structure @@ -27,7 +28,7 @@ types/ Zod schemas and shared types ## Key Commands ```bash -bun dev # Start dev server (Next.js + Elysia + ClickHouse) +bun dev # Start dev server (Next.js + Elysia) bun run build # Production build bun run lint # ESLint bun run typecheck # TypeScript check diff --git a/ONBOARDING.md b/ONBOARDING.md index d63e228c..7fd7cb8a 100644 --- a/ONBOARDING.md +++ b/ONBOARDING.md @@ -11,7 +11,7 @@ Welcome to SMRY (internally "13ft") - an AI-powered article summarizer that bypa bun install # Start development environment -bun run dev # Starts ClickHouse + Elysia server + Next.js +bun run dev # Starts Elysia server + Next.js # Or with full Docker stack bun run dev:docker @@ -58,7 +58,7 @@ SMRY fetches articles from behind paywalls using multiple extraction sources and │ │ │ ▼ ▼ ▼ ┌─────────┐ ┌──────────┐ ┌──────────┐ - │ Diffbot │ │OpenRouter│ │ClickHouse│ + │ Diffbot │ │OpenRouter│ │ PostHog │ │ (API) │ │ (AI) │ │(Analytics)│ └─────────┘ └──────────┘ └──────────┘ │ @@ -98,7 +98,7 @@ SMRY/ │ └── validation/ # Zod schemas ├── types/ # TypeScript type definitions ├── tests/ # Unit and integration tests -├── docker/ # Docker and ClickHouse setup +├── docker/ # Docker setup ├── messages/ # i18n translation files └── docs/ # Additional documentation ``` @@ -124,7 +124,7 @@ SMRY/ | Elysia | Lightweight TypeScript HTTP framework | | Clerk | Authentication and billing | | Upstash Redis | Serverless caching (with zlib compression) | -| ClickHouse | Analytics database | +| PostHog | Product analytics, session recording, heatmaps | | OpenRouter | AI model access (300+ models) | | Diffbot | AI-powered article extraction | @@ -185,11 +185,13 @@ NEXT_PUBLIC_CLERK_PUBLISHABLE_KEY= CLERK_SECRET_KEY= CLERK_WEBHOOK_SECRET= -# Analytics -CLICKHOUSE_HOST= # ClickHouse for analytics -CLICKHOUSE_DATABASE= -CLICKHOUSE_USERNAME= -CLICKHOUSE_PASSWORD= +# Analytics (PostHog) +POSTHOG_API_KEY= # Server-side PostHog key +POSTHOG_HOST= # PostHog instance URL +POSTHOG_PROJECT_ID= # Project ID for HogQL queries +POSTHOG_PERSONAL_API_KEY= # Personal key for analytics dashboard +NEXT_PUBLIC_POSTHOG_KEY= # Client-side PostHog key +NEXT_PUBLIC_POSTHOG_HOST= # Client-side PostHog host # Optional NEXT_PUBLIC_API_URL= # API URL for client (default: /api) @@ -427,7 +429,6 @@ bun run pages:deploy # Deploy to Cloudflare Pages - [README.md](./README.md) - Project overview and motivation - [DESIGN_PHILOSOPHY.md](./DESIGN_PHILOSOPHY.md) - UI/UX guidelines - [MIGRATION_PLAN.md](./MIGRATION_PLAN.md) - Deployment guide -- [docs/clickhouse-schema.sql](./docs/clickhouse-schema.sql) - Analytics schema - [docs/MEMORY_LEAK_FIX.md](./docs/MEMORY_LEAK_FIX.md) - Performance optimizations --- diff --git a/README.md b/README.md index 6bf07fa0..1e90c5ae 100644 --- a/README.md +++ b/README.md @@ -225,11 +225,13 @@ DIFFBOT_API_KEY= # https://diffbot.com UPSTASH_REDIS_REST_URL= UPSTASH_REDIS_REST_TOKEN= -# Analytics - ClickHouse (use docker-compose for local dev) -CLICKHOUSE_URL=http://localhost:8123 -CLICKHOUSE_USER=default -CLICKHOUSE_PASSWORD= -CLICKHOUSE_DATABASE=smry_analytics +# Analytics - PostHog +POSTHOG_API_KEY= +POSTHOG_HOST= +POSTHOG_PROJECT_ID= +POSTHOG_PERSONAL_API_KEY= +NEXT_PUBLIC_POSTHOG_KEY= +NEXT_PUBLIC_POSTHOG_HOST= # Alerting - https://resend.com RESEND_API_KEY= @@ -250,7 +252,7 @@ pnpm install 2. **Set up environment variables**: - Copy `.env.example` to `.env.local` - Get API keys from: Clerk, OpenRouter, Diffbot, Upstash, Resend - - Run `docker-compose up -d clickhouse` for local analytics + - PostHog analytics is cloud-hosted (no local setup needed) 3. **Run development server**: ```bash diff --git a/app/layout.tsx b/app/layout.tsx index 90dfe876..3c41d96a 100644 --- a/app/layout.tsx +++ b/app/layout.tsx @@ -44,6 +44,7 @@ import { ClerkProvider } from "@clerk/nextjs"; import { Toaster } from "@/components/ui/sonner"; import { getLocale } from 'next-intl/server'; import { JsonLd, organizationSchema, websiteSchema } from "@/components/seo/json-ld"; +import { PostHogProvider } from "@/components/providers/posthog-provider"; // Root metadata - OG images are handled by file-based convention (opengraph-image.tsx) // in each route segment for proper caching and to avoid robots.txt blocking issues @@ -105,6 +106,7 @@ export default async function RootLayout({ + + diff --git a/bun.lock b/bun.lock index 508159ec..8ef169ff 100644 --- a/bun.lock +++ b/bun.lock @@ -9,7 +9,6 @@ "@base-ui/react": "^1.1.0", "@clerk/backend": "^2.29.0", "@clerk/nextjs": "^6.36.5", - "@clickhouse/client": "^1.15.0", "@databuddy/sdk": "^2.3.29", "@elysiajs/cors": "^1.4.1", "@elysiajs/cron": "^1.4.1", @@ -55,6 +54,8 @@ "next-themes": "^0.4.6", "nuqs": "^2.8.0", "pino": "^8.19.0", + "posthog-js": "^1.341.1", + "posthog-node": "^5.24.10", "react": "19.2.1", "react-dom": "19.2.1", "react-markdown": "^10.1.0", @@ -176,10 +177,6 @@ "@clerk/types": ["@clerk/types@4.101.13", "", { "dependencies": { "@clerk/shared": "^3.43.2" } }, "sha512-PKv85uHjNXu8KO/Vc4m4e1GByItfuib/T3wNINDrq1k+QuzKwohC+n07ENlzOzr67tfbnfa6CQSgg2HUb4RohQ=="], - "@clickhouse/client": ["@clickhouse/client@1.16.0", "", { "dependencies": { "@clickhouse/client-common": "1.16.0" } }, "sha512-ThPhoRMsKsf/hmBEgWlUsGxFecsr3i+k3JI8JV0Od7UpH2BSmk9VKMGJoyPCrTL0vPUs5rJH+7o4iCqBF09Xvg=="], - - "@clickhouse/client-common": ["@clickhouse/client-common@1.16.0", "", {}, "sha512-qMzkI1NmV29ZjFkNpVSvGNfA0c7sCExlufAQMv+V+5xtNeYXnRfdqzmBLIQoq6Pf1ij0kw/wGLD3HQrl7pTFLA=="], - "@cloudflare/kv-asset-handler": ["@cloudflare/kv-asset-handler@0.4.2", "", {}, "sha512-SIOD2DxrRRwQ+jgzlXCqoEFiKOFqaPjhnNTGKXSRLvp1HiOvapLaFG2kEr9dYQTYe8rKrd9uvDUzmAITeNyaHQ=="], "@cloudflare/next-on-pages": ["@cloudflare/next-on-pages@1.13.16", "", { "dependencies": { "acorn": "^8.8.0", "ast-types": "^0.14.2", "chalk": "^5.2.0", "chokidar": "^3.5.3", "commander": "^11.1.0", "cookie": "^0.5.0", "esbuild": "^0.15.3", "js-yaml": "^4.1.0", "miniflare": "^3.20231218.1", "package-manager-manager": "^0.2.0", "pcre-to-regexp": "^1.1.0", "semver": "^7.5.2" }, "peerDependencies": { "@cloudflare/workers-types": "^4.20240208.0", "next": ">=14.3.0 && <=15.5.2", "vercel": ">=30.0.0 && <=47.0.4", "wrangler": "^3.28.2 || ^4.0.0" }, "optionalPeers": ["@cloudflare/workers-types"], "bin": { "next-on-pages": "bin/index.js" } }, "sha512-52h51WNcfmx3szTdTd+n/xgz4qNxFtjOGG0zwnUAhTg8cjPwSUYmZp0OPRNw2jYG9xHwRS2ttSPAS8tcGkQGsw=="], @@ -436,6 +433,26 @@ "@opentelemetry/api": ["@opentelemetry/api@1.9.0", "", {}, "sha512-3giAOQvZiH5F9bMlMiv8+GSPMeqg0dbaeo58/0SlA9sxSqZhnUtxzX9/2FzyhS9sWQf5S0GJE0AKBrFqjpeYcg=="], + "@opentelemetry/api-logs": ["@opentelemetry/api-logs@0.208.0", "", { "dependencies": { "@opentelemetry/api": "^1.3.0" } }, "sha512-CjruKY9V6NMssL/T1kAFgzosF1v9o6oeN+aX5JB/C/xPNtmgIJqcXHG7fA82Ou1zCpWGl4lROQUKwUNE1pMCyg=="], + + "@opentelemetry/core": ["@opentelemetry/core@2.2.0", "", { "dependencies": { "@opentelemetry/semantic-conventions": "^1.29.0" }, "peerDependencies": { "@opentelemetry/api": ">=1.0.0 <1.10.0" } }, "sha512-FuabnnUm8LflnieVxs6eP7Z383hgQU4W1e3KJS6aOG3RxWxcHyBxH8fDMHNgu/gFx/M2jvTOW/4/PHhLz6bjWw=="], + + "@opentelemetry/exporter-logs-otlp-http": ["@opentelemetry/exporter-logs-otlp-http@0.208.0", "", { "dependencies": { "@opentelemetry/api-logs": "0.208.0", "@opentelemetry/core": "2.2.0", "@opentelemetry/otlp-exporter-base": "0.208.0", "@opentelemetry/otlp-transformer": "0.208.0", "@opentelemetry/sdk-logs": "0.208.0" }, "peerDependencies": { "@opentelemetry/api": "^1.3.0" } }, "sha512-jOv40Bs9jy9bZVLo/i8FwUiuCvbjWDI+ZW13wimJm4LjnlwJxGgB+N/VWOZUTpM+ah/awXeQqKdNlpLf2EjvYg=="], + + "@opentelemetry/otlp-exporter-base": ["@opentelemetry/otlp-exporter-base@0.208.0", "", { "dependencies": { "@opentelemetry/core": "2.2.0", "@opentelemetry/otlp-transformer": "0.208.0" }, "peerDependencies": { "@opentelemetry/api": "^1.3.0" } }, "sha512-gMd39gIfVb2OgxldxUtOwGJYSH8P1kVFFlJLuut32L6KgUC4gl1dMhn+YC2mGn0bDOiQYSk/uHOdSjuKp58vvA=="], + + "@opentelemetry/otlp-transformer": ["@opentelemetry/otlp-transformer@0.208.0", "", { "dependencies": { "@opentelemetry/api-logs": "0.208.0", "@opentelemetry/core": "2.2.0", "@opentelemetry/resources": "2.2.0", "@opentelemetry/sdk-logs": "0.208.0", "@opentelemetry/sdk-metrics": "2.2.0", "@opentelemetry/sdk-trace-base": "2.2.0", "protobufjs": "^7.3.0" }, "peerDependencies": { "@opentelemetry/api": "^1.3.0" } }, "sha512-DCFPY8C6lAQHUNkzcNT9R+qYExvsk6C5Bto2pbNxgicpcSWbe2WHShLxkOxIdNcBiYPdVHv/e7vH7K6TI+C+fQ=="], + + "@opentelemetry/resources": ["@opentelemetry/resources@2.5.1", "", { "dependencies": { "@opentelemetry/core": "2.5.1", "@opentelemetry/semantic-conventions": "^1.29.0" }, "peerDependencies": { "@opentelemetry/api": ">=1.3.0 <1.10.0" } }, "sha512-BViBCdE/GuXRlp9k7nS1w6wJvY5fnFX5XvuEtWsTAOQFIO89Eru7lGW3WbfbxtCuZ/GbrJfAziXG0w0dpxL7eQ=="], + + "@opentelemetry/sdk-logs": ["@opentelemetry/sdk-logs@0.208.0", "", { "dependencies": { "@opentelemetry/api-logs": "0.208.0", "@opentelemetry/core": "2.2.0", "@opentelemetry/resources": "2.2.0" }, "peerDependencies": { "@opentelemetry/api": ">=1.4.0 <1.10.0" } }, "sha512-QlAyL1jRpOeaqx7/leG1vJMp84g0xKP6gJmfELBpnI4O/9xPX+Hu5m1POk9Kl+veNkyth5t19hRlN6tNY1sjbA=="], + + "@opentelemetry/sdk-metrics": ["@opentelemetry/sdk-metrics@2.2.0", "", { "dependencies": { "@opentelemetry/core": "2.2.0", "@opentelemetry/resources": "2.2.0" }, "peerDependencies": { "@opentelemetry/api": ">=1.9.0 <1.10.0" } }, "sha512-G5KYP6+VJMZzpGipQw7Giif48h6SGQ2PFKEYCybeXJsOCB4fp8azqMAAzE5lnnHK3ZVwYQrgmFbsUJO/zOnwGw=="], + + "@opentelemetry/sdk-trace-base": ["@opentelemetry/sdk-trace-base@2.2.0", "", { "dependencies": { "@opentelemetry/core": "2.2.0", "@opentelemetry/resources": "2.2.0", "@opentelemetry/semantic-conventions": "^1.29.0" }, "peerDependencies": { "@opentelemetry/api": ">=1.3.0 <1.10.0" } }, "sha512-xWQgL0Bmctsalg6PaXExmzdedSp3gyKV8mQBwK/j9VGdCDu2fmXIb2gAehBKbkXCpJ4HPkgv3QfoJWRT4dHWbw=="], + + "@opentelemetry/semantic-conventions": ["@opentelemetry/semantic-conventions@1.40.0", "", {}, "sha512-cifvXDhcqMwwTlTK04GBNeIe7yyo28Mfby85QXFe1Yk8nmi36Ab/5UQwptOx84SsoGNRg+EVSjwzfSZMy6pmlw=="], + "@oxc-resolver/binding-android-arm-eabi": ["@oxc-resolver/binding-android-arm-eabi@11.17.0", "", { "os": "android", "cpu": "arm" }, "sha512-kVnY21v0GyZ/+LG6EIO48wK3mE79BUuakHUYLIqobO/Qqq4mJsjuYXMSn3JtLcKZpN1HDVit4UHpGJHef1lrlw=="], "@oxc-resolver/binding-android-arm64": ["@oxc-resolver/binding-android-arm64@11.17.0", "", { "os": "android", "cpu": "arm64" }, "sha512-Pf8e3XcsK9a8RHInoAtEcrwf2vp7V9bSturyUUYxw9syW6E7cGi7z9+6ADXxm+8KAevVfLA7pfBg8NXTvz/HOw=="], @@ -514,10 +531,34 @@ "@poppinss/exception": ["@poppinss/exception@1.2.3", "", {}, "sha512-dCED+QRChTVatE9ibtoaxc+WkdzOSjYTKi/+uacHWIsfodVfpsueo3+DKpgU5Px8qXjgmXkSvhXvSCz3fnP9lw=="], + "@posthog/core": ["@posthog/core@1.23.1", "", { "dependencies": { "cross-spawn": "^7.0.6" } }, "sha512-GViD5mOv/mcbZcyzz3z9CS0R79JzxVaqEz4sP5Dsea178M/j3ZWe6gaHDZB9yuyGfcmIMQ/8K14yv+7QrK4sQQ=="], + + "@posthog/types": ["@posthog/types@1.356.1", "", {}, "sha512-miIUjs4LiBDMOxKkC87HEJLIih0pNGMAjxx+mW4X7jLpN41n0PLMW7swRE6uuxcMV0z3H6MllRSCYmsokkyfuQ=="], + "@preact/signals": ["@preact/signals@1.3.2", "", { "dependencies": { "@preact/signals-core": "^1.7.0" }, "peerDependencies": { "preact": "10.x" } }, "sha512-naxcJgUJ6BTOROJ7C3QML7KvwKwCXQJYTc5L/b0eEsdYgPB6SxwoQ1vDGcS0Q7GVjAenVq/tXrybVdFShHYZWg=="], "@preact/signals-core": ["@preact/signals-core@1.12.2", "", {}, "sha512-5Yf8h1Ke3SMHr15xl630KtwPTW4sYDFkkxS0vQ8UiQLWwZQnrF9IKaVG1mN5VcJz52EcWs2acsc/Npjha/7ysA=="], + "@protobufjs/aspromise": ["@protobufjs/aspromise@1.1.2", "", {}, "sha512-j+gKExEuLmKwvz3OgROXtrJ2UG2x8Ch2YZUxahh+s1F2HZ+wAceUNLkvy6zKCPVRkU++ZWQrdxsUeQXmcg4uoQ=="], + + "@protobufjs/base64": ["@protobufjs/base64@1.1.2", "", {}, "sha512-AZkcAA5vnN/v4PDqKyMR5lx7hZttPDgClv83E//FMNhR2TMcLUhfRUBHCmSl0oi9zMgDDqRUJkSxO3wm85+XLg=="], + + "@protobufjs/codegen": ["@protobufjs/codegen@2.0.4", "", {}, "sha512-YyFaikqM5sH0ziFZCN3xDC7zeGaB/d0IUb9CATugHWbd1FRFwWwt4ld4OYMPWu5a3Xe01mGAULCdqhMlPl29Jg=="], + + "@protobufjs/eventemitter": ["@protobufjs/eventemitter@1.1.0", "", {}, "sha512-j9ednRT81vYJ9OfVuXG6ERSTdEL1xVsNgqpkxMsbIabzSo3goCjDIveeGv5d03om39ML71RdmrGNjG5SReBP/Q=="], + + "@protobufjs/fetch": ["@protobufjs/fetch@1.1.0", "", { "dependencies": { "@protobufjs/aspromise": "^1.1.1", "@protobufjs/inquire": "^1.1.0" } }, "sha512-lljVXpqXebpsijW71PZaCYeIcE5on1w5DlQy5WH6GLbFryLUrBD4932W/E2BSpfRJWseIL4v/KPgBFxDOIdKpQ=="], + + "@protobufjs/float": ["@protobufjs/float@1.0.2", "", {}, "sha512-Ddb+kVXlXst9d+R9PfTIxh1EdNkgoRe5tOX6t01f1lYWOvJnSPDBlG241QLzcyPdoNTsblLUdujGSE4RzrTZGQ=="], + + "@protobufjs/inquire": ["@protobufjs/inquire@1.1.0", "", {}, "sha512-kdSefcPdruJiFMVSbn801t4vFK7KB/5gd2fYvrxhuJYg8ILrmn9SKSX2tZdV6V+ksulWqS7aXjBcRXl3wHoD9Q=="], + + "@protobufjs/path": ["@protobufjs/path@1.1.2", "", {}, "sha512-6JOcJ5Tm08dOHAbdR3GrvP+yUUfkjG5ePsHYczMFLq3ZmMkAD98cDgcT2iA1lJ9NVwFd4tH/iSSoe44YWkltEA=="], + + "@protobufjs/pool": ["@protobufjs/pool@1.1.0", "", {}, "sha512-0kELaGSIDBKvcgS4zkjz1PeddatrjYcmMWOlAuAPwAeccUrPHdUqo/J6LiymHHEiJT5NrF1UVwxY14f+fy4WQw=="], + + "@protobufjs/utf8": ["@protobufjs/utf8@1.1.0", "", {}, "sha512-Vvn3zZrhQZkkBE8LSuW3em98c0FwgO4nxzv6OdSxPKJIEKY2bGbHn+mhGIPerzI4twdxaP8/0+06HBpwf345Lw=="], + "@radix-ui/number": ["@radix-ui/number@1.1.1", "", {}, "sha512-MkKCwxlXTgz6CFoJx3pCwn07GKp36+aZyu/u2Ln2VrA5DcdyCZkASEDBTd8x5whTQQL5CiYf4prXKLcgQdv29g=="], "@radix-ui/primitive": ["@radix-ui/primitive@1.1.3", "", {}, "sha512-JTF99U/6XIjCBo0wqkU5sK10glYe27MRRsfwoiq5zzOEZLHU3A3KCMa5X/azekYRCJ0HlwI0crAXS/5dEHTzDg=="], @@ -1042,6 +1083,8 @@ "cookie-signature": ["cookie-signature@1.2.2", "", {}, "sha512-D76uU73ulSXrD1UXF4KE2TMxVVwhsnCgfAyTg9k8P6KGZjlXKrOLe4dJQKI3Bxi5wjesZoFXJWElNWBjPZMbhg=="], + "core-js": ["core-js@3.48.0", "", {}, "sha512-zpEHTy1fjTMZCKLHUZoVeylt9XrzaIN2rbPXEt0k+q7JE5CkCZdo6bNq55bn24a69CH7ErAVLKijxJja4fw+UQ=="], + "cors": ["cors@2.8.6", "", { "dependencies": { "object-assign": "^4", "vary": "^1" } }, "sha512-tJtZBBHA6vjIAaF6EnIaq6laBBP9aq/Y3ouVJjEfoHbRBcHBAHYcMh/w8LDrk2PvIMMq8gmopa5D4V8RmbrxGw=="], "create-require": ["create-require@1.1.1", "", {}, "sha512-dcKFX3jn0MpIaXjisoRvexIJVEKzaq7z2rZKxf+MSr9TkdmHmsU4m2lcLojrj/FHl8mk5VxMmYA+ftRkP/3oKQ=="], @@ -1326,6 +1369,8 @@ "fdir": ["fdir@6.5.0", "", { "peerDependencies": { "picomatch": "^3 || ^4" }, "optionalPeers": ["picomatch"] }, "sha512-tIbYtZbucOs0BRGqPJkshJUYdL+SDH7dVM8gjy+ERp3WAUjLEFJE+02kanyHtwjWOnwrKYBiwAmM0p4kLJAnXg=="], + "fflate": ["fflate@0.4.8", "", {}, "sha512-FJqqoDBR00Mdj9ppamLa/Y7vxm+PRmNWA67N846RvsoYVMKB4q3y/de5PA7gUmRMYK/8CMz2GDZQmCRN1wBcWA=="], + "file-entry-cache": ["file-entry-cache@8.0.0", "", { "dependencies": { "flat-cache": "^4.0.0" } }, "sha512-XXTUwCvisa5oacNGRP9SfNtYBNAMi+RPwBFmblZEF7N7swHYQS6/Zfk7SRwx4D5j3CH211YNRco1DEMNVfZCnQ=="], "file-type": ["file-type@21.3.0", "", { "dependencies": { "@tokenizer/inflate": "^0.4.1", "strtok3": "^10.3.4", "token-types": "^6.1.1", "uint8array-extras": "^1.4.0" } }, "sha512-8kPJMIGz1Yt/aPEwOsrR97ZyZaD1Iqm8PClb1nYFclUCkBi0Ma5IsYNQzvSFS9ib51lWyIw5mIT9rWzI/xjpzA=="], @@ -1658,6 +1703,8 @@ "lodash.merge": ["lodash.merge@4.6.2", "", {}, "sha512-0KpjqXRVvrYyCsX1swR/XTK0va6VQkQM6MNo7PqW77ByjAhoARA8EfrP1N4+KlKj8YS0ZUCtRT/YUuhyYDujIQ=="], + "long": ["long@5.3.2", "", {}, "sha512-mNAgZ1GmyNhD7AuqnTG3/VQ26o760+ZYBPKjPvugO8+nLbYfX6TVpJPseBvopbdY+qpZ/lKUnmEc1LeZYS3QAA=="], + "longest-streak": ["longest-streak@3.1.0", "", {}, "sha512-9Ri+o0JYgehTaVBBDoMqIl8GXtbWg711O3srftcHhZ0dqnETqLaoIK0x17fUw9rFSlK/0NlsKe0Ahhyl5pXE2g=="], "loose-envify": ["loose-envify@1.4.0", "", { "dependencies": { "js-tokens": "^3.0.0 || ^4.0.0" }, "bin": { "loose-envify": "cli.js" } }, "sha512-lyuxPGr/Wfhrlem2CL/UcnUc1zcqKAImBDzukY7Y5F/yQiNdko6+fRLevlw1HgMySw7f611UIY408EtxRSoK3Q=="], @@ -1946,6 +1993,10 @@ "postcss-selector-parser": ["postcss-selector-parser@6.0.10", "", { "dependencies": { "cssesc": "^3.0.0", "util-deprecate": "^1.0.2" } }, "sha512-IQ7TZdoaqbT+LCpShg46jnZVlhWD2w6iQYAcYXfHARZ7X1t/UGhhceQDs5X0cGqKvYlHNOuv7Oa1xmb0oQuA3w=="], + "posthog-js": ["posthog-js@1.356.1", "", { "dependencies": { "@opentelemetry/api": "^1.9.0", "@opentelemetry/api-logs": "^0.208.0", "@opentelemetry/exporter-logs-otlp-http": "^0.208.0", "@opentelemetry/resources": "^2.2.0", "@opentelemetry/sdk-logs": "^0.208.0", "@posthog/core": "1.23.1", "@posthog/types": "1.356.1", "core-js": "^3.38.1", "dompurify": "^3.3.1", "fflate": "^0.4.8", "preact": "^10.28.2", "query-selector-shadow-dom": "^1.0.1", "web-vitals": "^5.1.0" } }, "sha512-4EQliSyTp3j/xOaWpZmu7fk1b4S+J3qy4JOu5Xy3/MYFxv1SlAylgifRdCbXZxCQWb6PViaNvwRf4EmburgfWA=="], + + "posthog-node": ["posthog-node@5.26.0", "", { "dependencies": { "@posthog/core": "1.23.1" } }, "sha512-DK1XF/RiunhvT57cFyPxW9OaliZzl5aREHFwY/AISL3MVOaDUb4wIccMn0G3ws3Ounen8iGH7xvzZQ0x2vEOEQ=="], + "preact": ["preact@10.28.2", "", {}, "sha512-lbteaWGzGHdlIuiJ0l2Jq454m6kcpI1zNje6d8MlGAFlYvP2GO4ibnat7P74Esfz4sPTdM6UxtTwh/d3pwM9JA=="], "prelude-ls": ["prelude-ls@1.2.1", "", {}, "sha512-vkcDPrRZo1QZLbn5RLGPpg/WmIQ65qoWWhcGKf/b5eplkkarX0m9z8ppCat4mlOqUsWpyNuYgO3VRyrYHSzX5g=="], @@ -1966,6 +2017,8 @@ "property-information": ["property-information@7.1.0", "", {}, "sha512-TwEZ+X+yCJmYfL7TPUOcvBZ4QfoT5YenQiJuX//0th53DE6w0xxLEtfK3iyryQFddXuvkIk51EEgrJQ0WJkOmQ=="], + "protobufjs": ["protobufjs@7.5.4", "", { "dependencies": { "@protobufjs/aspromise": "^1.1.2", "@protobufjs/base64": "^1.1.2", "@protobufjs/codegen": "^2.0.4", "@protobufjs/eventemitter": "^1.1.0", "@protobufjs/fetch": "^1.1.0", "@protobufjs/float": "^1.0.2", "@protobufjs/inquire": "^1.1.0", "@protobufjs/path": "^1.1.2", "@protobufjs/pool": "^1.1.0", "@protobufjs/utf8": "^1.1.0", "@types/node": ">=13.7.0", "long": "^5.0.0" } }, "sha512-CvexbZtbov6jW2eXAvLukXjXUW1TzFaivC46BpWc/3BpcCysb5Vffu+B3XHMm8lVEuy2Mm4XGex8hBSg1yapPg=="], + "proxy-addr": ["proxy-addr@2.0.7", "", { "dependencies": { "forwarded": "0.2.0", "ipaddr.js": "1.9.1" } }, "sha512-llQsMLSUDUPT44jdrU/O37qlnifitDP+ZwrmmZcoSKyLKvtZxpyV0n2/bD/N4tBAAZ/gJEdZU7KMraoK1+XYAg=="], "pump": ["pump@3.0.3", "", { "dependencies": { "end-of-stream": "^1.1.0", "once": "^1.3.1" } }, "sha512-todwxLMY7/heScKmntwQG8CXVkWUOdYxIvY2s0VWAAMh/nd8SoYiRaKjlr7+iCs984f2P8zvrfWcDDYVb73NfA=="], @@ -1974,6 +2027,8 @@ "qs": ["qs@6.14.1", "", { "dependencies": { "side-channel": "^1.1.0" } }, "sha512-4EK3+xJl8Ts67nLYNwqw/dsFVnCf+qR7RgXSK9jEEm9unao3njwMDdmsdvoKBKHzxd7tCYz5e5M+SnMjdtXGQQ=="], + "query-selector-shadow-dom": ["query-selector-shadow-dom@1.0.1", "", {}, "sha512-lT5yCqEBgfoMYpf3F2xQRK7zEr1rhIIZuceDK6+xRkJQ4NMbHTwXqk4NkwDwQMNqXgG9r9fyHnzwNVs6zV5KRw=="], + "queue-microtask": ["queue-microtask@1.2.3", "", {}, "sha512-NuaNSa6flKT5JaSYQzJok04JzTL1CA6aGhv5rfLW3PgqA+M2ChpZQnAC8h8i4ZFkBS8X5RqkDBHA7r4hej3K9A=="], "quick-format-unescaped": ["quick-format-unescaped@4.0.4", "", {}, "sha512-tYC1Q1hgyRuHgloV/YXs2w15unPVh8qfu/qCTfhTYamaw7fyhumKa2yGpdSo87vY32rIclj+4fWYQXUMs9EHvg=="], @@ -2362,7 +2417,7 @@ "web-namespaces": ["web-namespaces@2.0.1", "", {}, "sha512-bKr1DkiNa2krS7qxNtdrtHAmzuYGFQLiQ13TsorsdT6ULTkPLKuu5+GsFpDlg6JFjUTwX2DyhMPG2be8uPrqsQ=="], - "web-vitals": ["web-vitals@0.2.4", "", {}, "sha512-6BjspCO9VriYy12z356nL6JBS0GYeEcA457YyRzD+dD6XYCQ75NKhcOHUMHentOE7OcVCIXXDvOm0jKFfQG2Gg=="], + "web-vitals": ["web-vitals@5.1.0", "", {}, "sha512-ArI3kx5jI0atlTtmV0fWU3fjpLmq/nD3Zr1iFFlJLaqa5wLBkUSzINwBPySCX/8jRyjlmy1Volw1kz1g9XE4Jg=="], "webidl-conversions": ["webidl-conversions@3.0.1", "", {}, "sha512-2JAn3z8AR6rjK8Sm8orRC0h/bcl/DqL7tRPdGZ4I1CjdF+EaMLmYxBHyXuKL849eucPFhvBoxMsflfOb8kxaeQ=="], @@ -2462,6 +2517,16 @@ "@openrouter/sdk/zod": ["zod@4.3.6", "", {}, "sha512-rftlrkhHZOcjDwkGlnUtZZkvaPHCsDATp4pGpuOOMDaTdDDXF91wuVDJoWoPsKX/3YPQ5fHuF3STjcYyKr+Qhg=="], + "@opentelemetry/otlp-transformer/@opentelemetry/resources": ["@opentelemetry/resources@2.2.0", "", { "dependencies": { "@opentelemetry/core": "2.2.0", "@opentelemetry/semantic-conventions": "^1.29.0" }, "peerDependencies": { "@opentelemetry/api": ">=1.3.0 <1.10.0" } }, "sha512-1pNQf/JazQTMA0BiO5NINUzH0cbLbbl7mntLa4aJNmCCXSj0q03T5ZXXL0zw4G55TjdL9Tz32cznGClf+8zr5A=="], + + "@opentelemetry/resources/@opentelemetry/core": ["@opentelemetry/core@2.5.1", "", { "dependencies": { "@opentelemetry/semantic-conventions": "^1.29.0" }, "peerDependencies": { "@opentelemetry/api": ">=1.0.0 <1.10.0" } }, "sha512-Dwlc+3HAZqpgTYq0MUyZABjFkcrKTePwuiFVLjahGD8cx3enqihmpAmdgNFO1R4m/sIe5afjJrA25Prqy4NXlA=="], + + "@opentelemetry/sdk-logs/@opentelemetry/resources": ["@opentelemetry/resources@2.2.0", "", { "dependencies": { "@opentelemetry/core": "2.2.0", "@opentelemetry/semantic-conventions": "^1.29.0" }, "peerDependencies": { "@opentelemetry/api": ">=1.3.0 <1.10.0" } }, "sha512-1pNQf/JazQTMA0BiO5NINUzH0cbLbbl7mntLa4aJNmCCXSj0q03T5ZXXL0zw4G55TjdL9Tz32cznGClf+8zr5A=="], + + "@opentelemetry/sdk-metrics/@opentelemetry/resources": ["@opentelemetry/resources@2.2.0", "", { "dependencies": { "@opentelemetry/core": "2.2.0", "@opentelemetry/semantic-conventions": "^1.29.0" }, "peerDependencies": { "@opentelemetry/api": ">=1.3.0 <1.10.0" } }, "sha512-1pNQf/JazQTMA0BiO5NINUzH0cbLbbl7mntLa4aJNmCCXSj0q03T5ZXXL0zw4G55TjdL9Tz32cznGClf+8zr5A=="], + + "@opentelemetry/sdk-trace-base/@opentelemetry/resources": ["@opentelemetry/resources@2.2.0", "", { "dependencies": { "@opentelemetry/core": "2.2.0", "@opentelemetry/semantic-conventions": "^1.29.0" }, "peerDependencies": { "@opentelemetry/api": ">=1.3.0 <1.10.0" } }, "sha512-1pNQf/JazQTMA0BiO5NINUzH0cbLbbl7mntLa4aJNmCCXSj0q03T5ZXXL0zw4G55TjdL9Tz32cznGClf+8zr5A=="], + "@poppinss/dumper/supports-color": ["supports-color@10.2.2", "", {}, "sha512-SS+jx45GF1QjgEXQx4NJZV9ImqmO2NPz5FNsIHrsDjh2YsHnawpan7SNQ1o8NuhrbHZy9AZhIoCUiCeaW/C80g=="], "@radix-ui/react-dialog/@radix-ui/react-slot": ["@radix-ui/react-slot@1.2.3", "", { "dependencies": { "@radix-ui/react-compose-refs": "1.1.2" }, "peerDependencies": { "@types/react": "*", "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" }, "optionalPeers": ["@types/react"] }, "sha512-aeNmHnBxbi2St0au6VBVC7JXFlhLlOnvIIlePNniyUNAClzmtAUEY8/pBiK3iHjufOlwA+c20/8jngo7xcrg8A=="], @@ -2506,6 +2571,8 @@ "@vercel/fun/semver": ["semver@7.5.4", "", { "dependencies": { "lru-cache": "^6.0.0" }, "bin": { "semver": "bin/semver.js" } }, "sha512-1bCSESV6Pv+i21Hvpxp3Dx+pSD8lIPt8uVjRrxAUt/nbswYc+tK6Y2btiULjd4+fnq15PX+nqQDC7Oft7WkwcA=="], + "@vercel/gatsby-plugin-vercel-analytics/web-vitals": ["web-vitals@0.2.4", "", {}, "sha512-6BjspCO9VriYy12z356nL6JBS0GYeEcA457YyRzD+dD6XYCQ75NKhcOHUMHentOE7OcVCIXXDvOm0jKFfQG2Gg=="], + "@vercel/gatsby-plugin-vercel-builder/esbuild": ["esbuild@0.14.47", "", { "optionalDependencies": { "esbuild-android-64": "0.14.47", "esbuild-android-arm64": "0.14.47", "esbuild-darwin-64": "0.14.47", "esbuild-darwin-arm64": "0.14.47", "esbuild-freebsd-64": "0.14.47", "esbuild-freebsd-arm64": "0.14.47", "esbuild-linux-32": "0.14.47", "esbuild-linux-64": "0.14.47", "esbuild-linux-arm": "0.14.47", "esbuild-linux-arm64": "0.14.47", "esbuild-linux-mips64le": "0.14.47", "esbuild-linux-ppc64le": "0.14.47", "esbuild-linux-riscv64": "0.14.47", "esbuild-linux-s390x": "0.14.47", "esbuild-netbsd-64": "0.14.47", "esbuild-openbsd-64": "0.14.47", "esbuild-sunos-64": "0.14.47", "esbuild-windows-32": "0.14.47", "esbuild-windows-64": "0.14.47", "esbuild-windows-arm64": "0.14.47" }, "bin": { "esbuild": "bin/esbuild" } }, "sha512-wI4ZiIfFxpkuxB8ju4MHrGwGLyp1+awEHAHVpx6w7a+1pmYIq8T9FGEVVwFo0iFierDoMj++Xq69GXWYn2EiwA=="], "@vercel/nft/estree-walker": ["estree-walker@2.0.2", "", {}, "sha512-Rfkk/Mp/DL7JVje3u18FxFujQlTNR2q6QfMSMB7AvCBx91NGj/ba3kCfza0f6dVDbw7YlRf/nDrn7pQrCCyQ/w=="], diff --git a/components/features/article-chat.tsx b/components/features/article-chat.tsx index 66cc0ef8..a474f667 100644 --- a/components/features/article-chat.tsx +++ b/components/features/article-chat.tsx @@ -21,6 +21,7 @@ import { } from "@/components/ui/icons"; import { cn } from "@/lib/utils"; import { toast } from "sonner"; +import { useAnalytics } from "@/lib/hooks/use-analytics"; import { PromptInput, PromptInputTextarea, @@ -142,7 +143,6 @@ export const ArticleChat = memo(forwardRef( // Track whether user has manually scrolled away from the bottom const isUserScrolledUpRef = useRef(false); const [showScrollButton, setShowScrollButton] = useState(false); - const rafIdRef = useRef(0); // Track programmatic scrolling to avoid race conditions with scroll handler const isProgrammaticScrollRef = useRef(false); @@ -152,6 +152,7 @@ export const ArticleChat = memo(forwardRef( const isLimitReached = !isPremium && usageData?.remaining === 0; const { language: preferredLanguage } = useChatLanguage(); + const { track, markFeatureUsed } = useAnalytics(); const { messages, @@ -182,10 +183,11 @@ export const ArticleChat = memo(forwardRef( await navigator.clipboard.writeText(text); setCopiedMessageId(messageId); setTimeout(() => setCopiedMessageId(null), 2000); + track("chat_message_copied", { message_length: text.length }); } catch (err) { console.error("Failed to copy:", err); } - }, []); + }, [track]); const handleReload = useCallback(() => { reload(); @@ -197,15 +199,20 @@ export const ArticleChat = memo(forwardRef( const messageWithQuote = `> ${quotedText.replace(/\n/g, "\n> ")}\n\n${input.trim()}`; setQuotedText(null); sendMessage(messageWithQuote); + track("chat_message_sent", { message_length: messageWithQuote.length, language: preferredLanguage }); + markFeatureUsed("chat"); setInput(""); } else { + track("chat_message_sent", { message_length: input.trim().length, language: preferredLanguage }); + markFeatureUsed("chat"); handleSubmit(); } - }, [quotedText, input, sendMessage, setInput, handleSubmit]); + }, [quotedText, input, sendMessage, setInput, handleSubmit, track, markFeatureUsed, preferredLanguage]); const handleSuggestionClick = useCallback((suggestion: string) => { sendMessage(suggestion); - }, [sendMessage]); + track("chat_suggestion_clicked", { suggestion_text: suggestion }); + }, [sendMessage, track]); // Slash commands hook const { @@ -408,7 +415,7 @@ export const ArticleChat = memo(forwardRef(
{messages.length > 0 && (
)} diff --git a/components/features/proxy-content.tsx b/components/features/proxy-content.tsx index dd6adba4..7a5a7be0 100644 --- a/components/features/proxy-content.tsx +++ b/components/features/proxy-content.tsx @@ -69,6 +69,7 @@ import { SidebarContent, SidebarProvider, } from "@/components/ui/sidebar"; +import { useAnalytics } from "@/lib/hooks/use-analytics"; // ─── Simple TTS Audio Player ─── @@ -464,6 +465,7 @@ function TTSControls({ onClose, voice, onVoiceChange, isPremium, usageCount = 0, startScrubbing, endScrubbing, } = useTTSPlayer(); + const { track } = useAnalytics(); const [rate, setRate] = React.useState(1); const [showSpeed, setShowSpeed] = React.useState(false); @@ -620,6 +622,7 @@ function TTSControls({ onClose, voice, onVoiceChange, isPremium, usageCount = 0, setShowVoice(false); return; } + track("tts_voice_changed", { from_voice: voice, to_voice: v.id }); onVoiceChange(v.id); setShowVoice(false); }} @@ -681,7 +684,13 @@ function TTSControls({ onClose, voice, onVoiceChange, isPremium, usageCount = 0, size="icon" aria-label={isPlaying ? "Pause audio" : "Play audio"} className="size-12 active:scale-95 transition-transform duration-100 cursor-pointer" - onClick={() => toggle()} + onClick={() => { + track(isPlaying ? "tts_paused" : "tts_played", { + playback_position: currentTime, + voice, + }); + toggle(); + }} > {isPlaying ? : } @@ -1147,6 +1156,7 @@ export function ProxyContent({ url }: ProxyContentProps) { const articleQuery = useArticleAuto(url); const { isPremium } = useIsPremium(); const isDesktop = useIsDesktop(); + const { track, trackArticle, markFeatureUsed } = useAnalytics(); const showDesktopPromo = isDesktop !== false; const showMobilePromo = isDesktop === false; @@ -1223,10 +1233,10 @@ export function ProxyContent({ url }: ProxyContentProps) { const mobileChatAd = chatAd ?? inlineAd ?? footerAd ?? null; // Stable ad callbacks for ArticleContent (prevents breaking its React.memo on sidebar toggle) - const onInlineAdVisible = useCallback(() => { if (inlineAd) fireImpression(inlineAd); }, [inlineAd, fireImpression]); - const onInlineAdClick = useCallback(() => { if (inlineAd) fireClick(inlineAd); }, [inlineAd, fireClick]); - const onFooterAdVisible = useCallback(() => { if (footerAd) fireImpression(footerAd); }, [footerAd, fireImpression]); - const onFooterAdClick = useCallback(() => { if (footerAd) fireClick(footerAd); }, [footerAd, fireClick]); + const onInlineAdVisible = useCallback(() => { if (inlineAd) { fireImpression(inlineAd, "inline", 1); track("ad_impression_client", { placement: "inline", ad_index: 1, brand_name: inlineAd.brandName, ad_provider: inlineAd.ad_provider }); } }, [inlineAd, fireImpression, track]); + const onInlineAdClick = useCallback(() => { if (inlineAd) { fireClick(inlineAd, "inline", 1); track("ad_click_client", { placement: "inline", ad_index: 1, brand_name: inlineAd.brandName, ad_provider: inlineAd.ad_provider }); } }, [inlineAd, fireClick, track]); + const onFooterAdVisible = useCallback(() => { if (footerAd) { fireImpression(footerAd, "footer", 2); track("ad_impression_client", { placement: "footer", ad_index: 2, brand_name: footerAd.brandName, ad_provider: footerAd.ad_provider }); } }, [footerAd, fireImpression, track]); + const onFooterAdClick = useCallback(() => { if (footerAd) { fireClick(footerAd, "footer", 2); track("ad_click_client", { placement: "footer", ad_index: 2, brand_name: footerAd.brandName, ad_provider: footerAd.ad_provider }); } }, [footerAd, fireClick, track]); // Debug: Log only when ads actually change const prevAdKeyRef = useRef(""); @@ -1237,10 +1247,15 @@ export function ProxyContent({ url }: ProxyContentProps) { const providers = [...new Set(gravityAds.map(a => a.ad_provider || 'gravity'))]; console.log(`[Ads] New rotation (${providers.join(' + ')}):`, gravityAds.map((a, i) => `[${i}] ${a.brandName}`).join(', ')); + track("ad_loaded", { + ad_count: gravityAds.length, + brand_names: gravityAds.map(a => a.brandName), + providers, + }); } - }, [gravityAds]); + }, [gravityAds, track]); - // Handle article load: save to history + // Handle article load: save to history + track useEffect(() => { if (!firstSuccessfulArticle || initializedUrlRef.current === url) return; @@ -1248,7 +1263,22 @@ export function ProxyContent({ url }: ProxyContentProps) { // Save to history addArticleToHistory(url, firstSuccessfulArticle.title || "Untitled Article"); - }, [firstSuccessfulArticle, url]); + + // Track article load + trackArticle("article_loaded", url, { + source, + article_title: firstSuccessfulArticle.title, + }); + }, [firstSuccessfulArticle, url, source, trackArticle]); + + // Track article fetch errors + useEffect(() => { + if (articleQuery.error) { + trackArticle("article_error", url, { + error_message: articleQuery.error.message, + }); + } + }, [articleQuery.error, url, trackArticle]); // TTS (Text-to-Speech) — simple audio player with word-level highlighting const [ttsOpen, setTTSOpen] = useState(false); @@ -1264,8 +1294,10 @@ export function ProxyContent({ url }: ProxyContentProps) { } else { setTTSOpen(true); t.load(); + track("tts_played", { voice: t.voice, article_url: url }); + markFeatureUsed("tts"); } - }, []); + }, [track, markFeatureUsed, url]); const handleTTSClose = React.useCallback(() => { ttsRef.current.stop(); @@ -1286,7 +1318,14 @@ export function ProxyContent({ url }: ProxyContentProps) { const [desktopAdDismissed, setDesktopAdDismissed] = useState(false); const [isFullScreen, setIsFullScreen] = useState(false); const [shortcutsDialogOpen, setShortcutsDialogOpen] = useState(false); - const [settingsOpen, setSettingsOpen] = useState(false); + const [settingsOpen, setSettingsOpenRaw] = useState(false); + const setSettingsOpen = React.useCallback((val: boolean | ((prev: boolean) => boolean)) => { + setSettingsOpenRaw((prev) => { + const next = typeof val === "function" ? val(prev) : val; + if (next && !prev) track("settings_opened"); + return next; + }); + }, [track]); const [styleOptionsOpen, setStyleOptionsOpen] = useState(false); const [shareOpen, setShareOpen] = useState(false); const [sidebarActiveTab, setSidebarActiveTab] = useState<"chat" | "history">("chat"); @@ -1352,17 +1391,21 @@ export function ProxyContent({ url }: ProxyContentProps) { const handleViewModeChange = React.useCallback( (mode: (typeof viewModes)[number]) => { setQuery({ view: mode }); + track("setting_changed", { setting: "view_mode", value: mode }); }, - [setQuery] + [setQuery, track] ); const handleSidebarChange = React.useCallback( (next: boolean) => { setQuery({ sidebar: next ? true : null }); // Close annotations sidebar when opening chat sidebar - if (next) setAnnotationsSidebarOpen(false); + if (next) { + setAnnotationsSidebarOpen(false); + try { track("chat_opened", { hostname: new URL(url).hostname }); markFeatureUsed("chat"); } catch { /* ignore */ } + } }, - [setQuery, setAnnotationsSidebarOpen] + [setQuery, setAnnotationsSidebarOpen, track, markFeatureUsed, url] ); // Copy page as markdown (used by ⌘C keyboard shortcut) @@ -1842,7 +1885,7 @@ export function ProxyContent({ url }: ProxyContentProps) { window.addEventListener("keydown", handleKeyDown); return () => window.removeEventListener("keydown", handleKeyDown); - }, [sidebarOpen, sidebarActiveTab, handleSidebarChange, handleNewChat, viewMode, handleViewModeChange, url, handleCopyPage, handleOpenInAI, setAnnotationsSidebarOpen, handleTTSToggle, ttsOpen]); + }, [sidebarOpen, sidebarActiveTab, handleSidebarChange, handleNewChat, viewMode, handleViewModeChange, url, handleCopyPage, handleOpenInAI, setAnnotationsSidebarOpen, handleTTSToggle, ttsOpen, setSettingsOpen]); // Measure combined banner height so fixed sidebars can start below it const bannerRef = useRef(null); @@ -2007,10 +2050,11 @@ export function ProxyContent({ url }: ProxyContentProps) {
fireImpression(sidebarAd)} - onClick={() => fireClick(sidebarAd)} + onVisible={() => { fireImpression(sidebarAd, "sidebar", 0); track("ad_impression_client", { placement: "sidebar", ad_index: 0, brand_name: sidebarAd.brandName, ad_provider: sidebarAd.ad_provider }); }} + onClick={() => { fireClick(sidebarAd, "sidebar", 0); track("ad_click_client", { placement: "sidebar", ad_index: 0, brand_name: sidebarAd.brandName, ad_provider: sidebarAd.ad_provider }); }} onDismiss={() => { - fireDismiss(sidebarAd); + fireDismiss(sidebarAd, "sidebar", 0); + track("ad_dismiss_client", { placement: "sidebar", ad_index: 0, brand_name: sidebarAd.brandName, ad_provider: sidebarAd.ad_provider }); setDesktopAdDismissed(true); }} variant={sidebarOpen ? "compact" : "default"} @@ -2073,14 +2117,14 @@ export function ProxyContent({ url }: ProxyContentProps) { onMessagesChange={isPremium ? handleMessagesChange : undefined} activeThreadTitle={_activeThread?.title} headerAd={!isPremium ? chatAd : null} - onHeaderAdVisible={chatAd ? () => fireImpression(chatAd) : undefined} - onHeaderAdClick={chatAd ? () => fireClick(chatAd) : undefined} + onHeaderAdVisible={chatAd ? () => { fireImpression(chatAd, "chat_header", 3); track("ad_impression_client", { placement: "chat_header", ad_index: 3, brand_name: chatAd.brandName, ad_provider: chatAd.ad_provider }); } : undefined} + onHeaderAdClick={chatAd ? () => { fireClick(chatAd, "chat_header", 3); track("ad_click_client", { placement: "chat_header", ad_index: 3, brand_name: chatAd.brandName, ad_provider: chatAd.ad_provider }); } : undefined} ad={!isPremium ? (inlineAd ?? footerAd) : null} - onAdVisible={inlineAd ? () => fireImpression(inlineAd) : footerAd ? () => fireImpression(footerAd) : undefined} - onAdClick={inlineAd ? () => fireClick(inlineAd) : footerAd ? () => fireClick(footerAd) : undefined} + onAdVisible={inlineAd ? () => { fireImpression(inlineAd, "chat_inline", 1); track("ad_impression_client", { placement: "chat_inline", ad_index: 1, brand_name: inlineAd.brandName, ad_provider: inlineAd.ad_provider }); } : footerAd ? () => { fireImpression(footerAd, "chat_inline", 2); track("ad_impression_client", { placement: "chat_inline", ad_index: 2, brand_name: footerAd.brandName, ad_provider: footerAd.ad_provider }); } : undefined} + onAdClick={inlineAd ? () => { fireClick(inlineAd, "chat_inline", 1); track("ad_click_client", { placement: "chat_inline", ad_index: 1, brand_name: inlineAd.brandName, ad_provider: inlineAd.ad_provider }); } : footerAd ? () => { fireClick(footerAd, "chat_inline", 2); track("ad_click_client", { placement: "chat_inline", ad_index: 2, brand_name: footerAd.brandName, ad_provider: footerAd.ad_provider }); } : undefined} microAd={!isPremium ? microAd : null} - onMicroAdVisible={microAd ? () => fireImpression(microAd) : undefined} - onMicroAdClick={microAd ? () => fireClick(microAd) : undefined} + onMicroAdVisible={microAd ? () => { fireImpression(microAd, "micro", 4); track("ad_impression_client", { placement: "micro", ad_index: 4, brand_name: microAd.brandName, ad_provider: microAd.ad_provider }); } : undefined} + onMicroAdClick={microAd ? () => { fireClick(microAd, "micro", 4); track("ad_click_client", { placement: "micro", ad_index: 4, brand_name: microAd.brandName, ad_provider: microAd.ad_provider }); } : undefined} threads={threads} activeThreadId={currentThreadId} onNewChat={handleNewChat} @@ -2214,11 +2258,11 @@ export function ProxyContent({ url }: ProxyContentProps) { articleContent={articleTextContent || ""} articleTitle={articleTitle} chatAd={!isPremium ? mobileChatAd : null} - onChatAdVisible={mobileChatAd ? () => fireImpression(mobileChatAd) : undefined} - onChatAdClick={mobileChatAd ? () => fireClick(mobileChatAd) : undefined} + onChatAdVisible={mobileChatAd ? () => { fireImpression(mobileChatAd, "mobile_chat_header", gravityAds.indexOf(mobileChatAd)); track("ad_impression_client", { placement: "mobile_chat_header", brand_name: mobileChatAd.brandName, ad_provider: mobileChatAd.ad_provider }); } : undefined} + onChatAdClick={mobileChatAd ? () => { fireClick(mobileChatAd, "mobile_chat_header", gravityAds.indexOf(mobileChatAd)); track("ad_click_client", { placement: "mobile_chat_header", brand_name: mobileChatAd.brandName, ad_provider: mobileChatAd.ad_provider }); } : undefined} inlineChatAd={!isPremium ? (inlineAd ?? footerAd) : null} - onInlineChatAdVisible={inlineAd ? () => fireImpression(inlineAd) : footerAd ? () => fireImpression(footerAd) : undefined} - onInlineChatAdClick={inlineAd ? () => fireClick(inlineAd) : footerAd ? () => fireClick(footerAd) : undefined} + onInlineChatAdVisible={inlineAd ? () => { fireImpression(inlineAd, "mobile_chat_inline", 1); track("ad_impression_client", { placement: "mobile_chat_inline", ad_index: 1, brand_name: inlineAd.brandName, ad_provider: inlineAd.ad_provider }); } : footerAd ? () => { fireImpression(footerAd, "mobile_chat_inline", 2); track("ad_impression_client", { placement: "mobile_chat_inline", ad_index: 2, brand_name: footerAd.brandName, ad_provider: footerAd.ad_provider }); } : undefined} + onInlineChatAdClick={inlineAd ? () => { fireClick(inlineAd, "mobile_chat_inline", 1); track("ad_click_client", { placement: "mobile_chat_inline", ad_index: 1, brand_name: inlineAd.brandName, ad_provider: inlineAd.ad_provider }); } : footerAd ? () => { fireClick(footerAd, "mobile_chat_inline", 2); track("ad_click_client", { placement: "mobile_chat_inline", ad_index: 2, brand_name: footerAd.brandName, ad_provider: footerAd.ad_provider }); } : undefined} isPremium={isPremium} initialMessages={threadInitialMessages} onMessagesChange={isPremium ? handleMessagesChange : undefined} @@ -2253,10 +2297,11 @@ export function ProxyContent({ url }: ProxyContentProps) { fireImpression(sidebarAd)} - onClick={() => fireClick(sidebarAd)} + onVisible={() => { fireImpression(sidebarAd, "mobile_bottom", 0); track("ad_impression_client", { placement: "mobile_bottom", ad_index: 0, brand_name: sidebarAd.brandName, ad_provider: sidebarAd.ad_provider }); }} + onClick={() => { fireClick(sidebarAd, "mobile_bottom", 0); track("ad_click_client", { placement: "mobile_bottom", ad_index: 0, brand_name: sidebarAd.brandName, ad_provider: sidebarAd.ad_provider }); }} onDismiss={() => { - fireDismiss(sidebarAd); + fireDismiss(sidebarAd, "mobile_bottom", 0); + track("ad_dismiss_client", { placement: "mobile_bottom", ad_index: 0, brand_name: sidebarAd.brandName, ad_provider: sidebarAd.ad_provider }); setMobileAdDismissed(true); }} /> diff --git a/components/features/settings-drawer.tsx b/components/features/settings-drawer.tsx index 36a0146a..6b9c09ed 100644 --- a/components/features/settings-drawer.tsx +++ b/components/features/settings-drawer.tsx @@ -49,6 +49,7 @@ import { DrawerTitle, DrawerTrigger, } from "@/components/ui/drawer"; +import { useAnalytics } from "@/lib/hooks/use-analytics"; type ViewMode = "markdown" | "html" | "iframe"; @@ -217,8 +218,9 @@ const FONT_PREVIEW_STYLES: Record = { }; // Style Options Section - opens nested drawer with Theme + Style controls +// eslint-disable-next-line unused-imports/no-unused-vars function StyleOptionsSection() { - const { theme, resolvedTheme, setTheme } = useTheme(); + const { theme, setTheme } = useTheme(); const { preferences, hasLoaded, @@ -232,6 +234,7 @@ function StyleOptionsSection() { setFont, resetToDefaults, } = useReaderPreferences(); + const { track } = useAnalytics(); const [drawerOpen, setDrawerOpen] = React.useState(false); const [fontDrawerOpen, setFontDrawerOpen] = React.useState(false); @@ -266,12 +269,14 @@ function StyleOptionsSection() { const actualTheme = mapDropdownToTheme(newTheme); setTheme(actualTheme); setThemeChanged(actualTheme !== DEFAULT_THEME); + track("setting_changed", { setting: "theme", value: actualTheme }); }; // For palette buttons — theme value is already the actual theme name, no mapping needed const handlePaletteChange = (paletteTheme: string) => { setTheme(paletteTheme); setThemeChanged(paletteTheme !== DEFAULT_THEME); + track("setting_changed", { setting: "theme", value: paletteTheme }); }; // Reset ALL - theme + reader preferences @@ -613,6 +618,7 @@ function LanguageSectionInner() { const switchLocaleInPlace = useSwitchLocale(); const [languageDrawerOpen, setLanguageDrawerOpen] = React.useState(false); const [selectedLocale, setSelectedLocale] = React.useState(null); + const { track } = useAnalytics(); // Clear optimistic state once the locale context has caught up React.useEffect(() => { @@ -627,7 +633,8 @@ function LanguageSectionInner() { setSelectedLocale(newLocale); // Switch locale in context (no navigation — modal stays open) switchLocaleInPlace(newLocale); - }, [locale, switchLocaleInPlace]); + track("setting_changed", { setting: "language", value: newLocale }); + }, [locale, switchLocaleInPlace, track]); return ( <> diff --git a/components/features/share-button.tsx b/components/features/share-button.tsx index 11eb1011..25943407 100644 --- a/components/features/share-button.tsx +++ b/components/features/share-button.tsx @@ -19,6 +19,7 @@ import { ResponsiveDrawer } from "@/components/features/responsive-drawer"; import { ExportArticleContent, type ArticleExportData } from "@/components/features/export-article"; import { Source } from "@/types/api"; +import { useAnalytics } from "@/lib/hooks/use-analytics"; // Reddit SVG const RedditIcon = ({ className }: { className?: string }) => ( @@ -75,12 +76,15 @@ const ShareModalContent = React.memo(function ShareModalContent({ }: ShareButtonDataProps & { onClose: () => void }) { const [copied, setCopied] = useState(false); const [view, setView] = useState<"share" | "export">("share"); + const { track, markFeatureUsed } = useAnalytics(); const handleCopy = async () => { try { await navigator.clipboard.writeText(url); setCopied(true); setTimeout(() => setCopied(false), 2000); + track("article_shared", { method: "copy_link" }); + markFeatureUsed("share"); } catch (error) { console.error("Failed to copy link:", error); } @@ -90,6 +94,8 @@ const ShareModalContent = React.memo(function ShareModalContent({ if (navigator.share) { try { await navigator.share({ url }); + track("article_shared", { method: "native" }); + markFeatureUsed("share"); onClose(); } catch (error) { console.log("Share cancelled:", error); @@ -218,6 +224,7 @@ const ShareModalContent = React.memo(function ShareModalContent({ href={shareUrls.x} target="_blank" rel="noopener noreferrer" + onClick={() => { track("article_shared", { method: "x_twitter" }); markFeatureUsed("share"); }} className="flex h-6 shrink-0 items-center justify-center gap-1.5 rounded-[5px] border-[0.5px] border-border bg-surface-1 px-2.5 text-[12px] font-medium text-muted-foreground shadow-[0_4px_4px_-1px_rgba(0,0,0,0.06),0_1px_1px_0_rgba(0,0,0,0.12)] transition-colors hover:bg-accent hover:text-foreground" > @@ -228,6 +235,7 @@ const ShareModalContent = React.memo(function ShareModalContent({ href={shareUrls.linkedin} target="_blank" rel="noopener noreferrer" + onClick={() => { track("article_shared", { method: "linkedin" }); markFeatureUsed("share"); }} className="flex h-6 shrink-0 items-center justify-center gap-1.5 rounded-[5px] border-[0.5px] border-border bg-surface-1 px-2.5 text-[12px] font-medium text-muted-foreground shadow-[0_4px_4px_-1px_rgba(0,0,0,0.06),0_1px_1px_0_rgba(0,0,0,0.12)] transition-colors hover:bg-accent hover:text-foreground" > @@ -238,6 +246,7 @@ const ShareModalContent = React.memo(function ShareModalContent({ href={shareUrls.reddit} target="_blank" rel="noopener noreferrer" + onClick={() => { track("article_shared", { method: "reddit" }); markFeatureUsed("share"); }} className="flex h-6 shrink-0 items-center justify-center gap-1.5 rounded-[5px] border-[0.5px] border-border bg-surface-1 px-2.5 text-[12px] font-medium text-muted-foreground shadow-[0_4px_4px_-1px_rgba(0,0,0,0.06),0_1px_1px_0_rgba(0,0,0,0.12)] transition-colors hover:bg-accent hover:text-foreground" > diff --git a/components/providers/posthog-provider.tsx b/components/providers/posthog-provider.tsx new file mode 100644 index 00000000..0b795d0d --- /dev/null +++ b/components/providers/posthog-provider.tsx @@ -0,0 +1,118 @@ +"use client"; + +import posthog from "posthog-js"; +import { PostHogProvider as PHProvider, usePostHog } from "posthog-js/react"; +import { useEffect, useRef, Suspense } from "react"; +import { usePathname, useSearchParams } from "next/navigation"; +import { useUser, useAuth } from "@clerk/nextjs"; + +function PostHogPageView() { + const pathname = usePathname(); + const searchParams = useSearchParams(); + const ph = usePostHog(); + + useEffect(() => { + if (pathname && ph) { + let url = window.origin + pathname; + const search = searchParams?.toString(); + if (search) url += `?${search}`; + ph.capture("$pageview", { $current_url: url }); + } + }, [pathname, searchParams, ph]); + + return null; +} + +/** + * Identifies signed-in users and enriches with properties for: + * - Cohort analysis (power users, plan tier segmentation) + * - New vs returning user tracking ($set_once for first visit data) + * - DAU/MAU ratio (automatic via $pageview + identify) + * - Churn analysis (automatic via lifecycle insight) + */ +function PostHogIdentify() { + const ph = usePostHog(); + const { user, isLoaded: userLoaded } = useUser(); + const { isLoaded: authLoaded, has, isSignedIn } = useAuth(); + const prevUserId = useRef(null); + + useEffect(() => { + if (!ph || !userLoaded || !authLoaded) return; + + if (isSignedIn && user) { + // Don't re-identify if same user + if (prevUserId.current === user.id) return; + prevUserId.current = user.id; + + const isPremium = has?.({ plan: "premium" }) ?? false; + + // $set updates on every identify, $set_once only sets on first call + ph.identify(user.id, { + // $set properties — updated on every visit + email: user.primaryEmailAddress?.emailAddress, + name: user.fullName ?? user.firstName, + is_premium: isPremium, + plan: isPremium ? "premium" : "free", + last_seen: new Date().toISOString(), + }, { + // $set_once properties — only set on first ever identify + signup_date: user.createdAt?.toISOString(), + initial_referrer: document.referrer || "$direct", + initial_utm_source: new URLSearchParams(window.location.search).get("utm_source") ?? undefined, + initial_utm_medium: new URLSearchParams(window.location.search).get("utm_medium") ?? undefined, + initial_utm_campaign: new URLSearchParams(window.location.search).get("utm_campaign") ?? undefined, + }); + + // Group analytics for plan-level metrics + ph.group("plan_tier", isPremium ? "premium" : "free", { + plan: isPremium ? "premium" : "free", + }); + } else { + if (prevUserId.current !== null) { + prevUserId.current = null; + ph.reset(); + } + } + }, [ph, user, userLoaded, authLoaded, isSignedIn, has]); + + return null; +} + +export function PostHogProvider({ children }: { children: React.ReactNode }) { + useEffect(() => { + const key = process.env.NEXT_PUBLIC_POSTHOG_KEY; + const host = process.env.NEXT_PUBLIC_POSTHOG_HOST; + if (!key || !host) return; + + posthog.init(key, { + api_host: host, + // Manual pageview capture for SPA route changes + capture_pageview: false, + capture_pageleave: true, + // Autocapture: button clicks, form submits, link clicks + autocapture: true, + // Heatmaps: see where users click and scroll + enable_heatmaps: true, + // Session recording: replay user sessions with console logs + capture_dead_clicks: true, + enable_recording_console_log: true, + session_recording: { + recordCrossOriginIframes: true, + }, + // Persist across sessions for returning user tracking + persistence: "localStorage+cookie", + // Capture performance data for Web Vitals + capture_performance: true, + }); + }, []); + + return ( + + + + + + {children} + + ); +} diff --git a/docker-compose.yml b/docker-compose.yml index ac3d8d4e..f5849b7a 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,33 +1,7 @@ -# Local dev services - ClickHouse starts automatically with: bun run dev +# Local dev services # For full Docker stack: bun run dev:docker services: - # Clickhouse analytics database (memory-optimized) - clickhouse: - build: - context: ./docker/clickhouse - dockerfile: Dockerfile - container_name: smry-clickhouse - ports: - - "8123:8123" - - "9000:9000" - volumes: - - clickhouse-data:/var/lib/clickhouse - - ./docs/clickhouse-schema.sql:/docker-entrypoint-initdb.d/init.sql:ro - environment: - CLICKHOUSE_DB: smry_analytics - CLICKHOUSE_USER: default - CLICKHOUSE_PASSWORD: clickhouse - healthcheck: - test: ["CMD", "clickhouse-client", "--query", "SELECT 1"] - interval: 10s - timeout: 5s - retries: 3 - ulimits: - nofile: - soft: 262144 - hard: 262144 - # Next.js app app: build: @@ -42,22 +16,16 @@ services: - /app/node_modules - /app/.next environment: - # Clickhouse (internal Docker network) - CLICKHOUSE_URL: http://clickhouse:8123 - CLICKHOUSE_USER: default - CLICKHOUSE_PASSWORD: clickhouse - CLICKHOUSE_DATABASE: smry_analytics - ANALYTICS_SECRET_KEY: dev_secret_key # Pass through from host .env + POSTHOG_API_KEY: ${POSTHOG_API_KEY} + POSTHOG_HOST: ${POSTHOG_HOST} + POSTHOG_PROJECT_ID: ${POSTHOG_PROJECT_ID} + POSTHOG_PERSONAL_API_KEY: ${POSTHOG_PERSONAL_API_KEY} + NEXT_PUBLIC_POSTHOG_KEY: ${NEXT_PUBLIC_POSTHOG_KEY} + NEXT_PUBLIC_POSTHOG_HOST: ${NEXT_PUBLIC_POSTHOG_HOST} UPSTASH_REDIS_REST_URL: ${UPSTASH_REDIS_REST_URL} UPSTASH_REDIS_REST_TOKEN: ${UPSTASH_REDIS_REST_TOKEN} DIFFBOT_API_KEY: ${DIFFBOT_API_KEY} OPENROUTER_API_KEY: ${OPENROUTER_API_KEY} CLERK_SECRET_KEY: ${CLERK_SECRET_KEY} NEXT_PUBLIC_CLERK_PUBLISHABLE_KEY: ${NEXT_PUBLIC_CLERK_PUBLISHABLE_KEY} - depends_on: - clickhouse: - condition: service_healthy - -volumes: - clickhouse-data: diff --git a/docker/clickhouse/Dockerfile b/docker/clickhouse/Dockerfile deleted file mode 100644 index cfd45c59..00000000 --- a/docker/clickhouse/Dockerfile +++ /dev/null @@ -1,4 +0,0 @@ -FROM clickhouse/clickhouse-server:24.8 - -# Copy memory-optimized config -COPY memory.xml /etc/clickhouse-server/config.d/memory.xml diff --git a/docker/clickhouse/memory.xml b/docker/clickhouse/memory.xml deleted file mode 100644 index 68de5d6a..00000000 --- a/docker/clickhouse/memory.xml +++ /dev/null @@ -1,42 +0,0 @@ - - - - - - 157286400 - - 0.15 - - - 8388608 - - - 8388608 - - - 4194304 - - - - 0 - - - - 1 - 1 - 1 - - - - - - 67108864 - - 16777216 - 16777216 - - - diff --git a/docs/ANALYTICS.md b/docs/ANALYTICS.md new file mode 100644 index 00000000..7d2ff5df --- /dev/null +++ b/docs/ANALYTICS.md @@ -0,0 +1,466 @@ +# PostHog Analytics — SMRY + +Complete reference for all analytics events, setup, dashboards, and maintenance. + +--- + +## Architecture + +``` +┌──────────────────────────────────────────────────────────────────┐ +│ Client (Browser) │ +│ │ +│ posthog-js SDK │ +│ ├── Autocapture (clicks, form submits, link clicks) │ +│ ├── Session Recording + Heatmaps │ +│ ├── $pageview / $pageleave (manual SPA tracking) │ +│ ├── User Identification (Clerk → PostHog identify) │ +│ ├── Custom Events (track() via useAnalytics hook) │ +│ └── Feature Adoption ($set_once via markFeatureUsed) │ +│ │ +├──────────────────────────────────────────────────────────────────┤ +│ Server (Elysia / Bun) │ +│ │ +│ posthog-node SDK │ +│ ├── request_event (every API request) │ +│ ├── ad_event (impression/click/dismiss with placement data) │ +│ ├── $ai_generation (LLM analytics for chat) │ +│ └── HogQL queries (admin dashboard) │ +└──────────────────────────────────────────────────────────────────┘ +``` + +### Key Files + +| File | Purpose | +|------|---------| +| `lib/posthog.ts` | Server SDK client, `trackEvent`, `trackAdEvent`, `trackLLMGeneration`, `queryPostHog` | +| `lib/hooks/use-analytics.ts` | Client hook: `track()`, `trackArticle()`, `markFeatureUsed()` | +| `components/providers/posthog-provider.tsx` | SDK init, pageview tracking, user identification | +| `lib/hooks/use-gravity-ad.ts` | Ad impression/click/dismiss tracking with placement data | +| `server/routes/gravity.ts` | Server-side ad event logging (`/api/px` endpoint) | +| `server/routes/chat.ts` | LLM generation tracking (`$ai_generation` events) | + +--- + +## Environment Variables + +```bash +# Server-side SDK +POSTHOG_API_KEY=phc_... # Project API key +POSTHOG_HOST=https://us.i.posthog.com +POSTHOG_PROJECT_ID=12345 # For HogQL queries +POSTHOG_PERSONAL_API_KEY=phx_... # For HogQL query API + +# Client-side SDK +NEXT_PUBLIC_POSTHOG_KEY=phc_... # Same project, public key +NEXT_PUBLIC_POSTHOG_HOST=https://us.i.posthog.com +``` + +--- + +## Client-Side SDK Configuration + +Initialized in `components/providers/posthog-provider.tsx`: + +| Feature | Setting | What it does | +|---------|---------|-------------| +| Autocapture | `autocapture: true` | Tracks every button click, form submit, link click | +| Heatmaps | `enable_heatmaps: true` | Visual click/scroll maps per page | +| Session Recording | `enable_recording_console_log: true` | Replay user sessions with console logs | +| Dead Clicks | `capture_dead_clicks: true` | Detects clicks on non-interactive elements | +| Cross-origin iframes | `session_recording.recordCrossOriginIframes: true` | Records embedded content | +| Web Vitals | `capture_performance: true` | LCP, FID, CLS, TTFB metrics | +| Persistence | `persistence: "localStorage+cookie"` | Tracks returning users across sessions | +| Pageview | `capture_pageview: false` | Manual SPA tracking (PostHogPageView component) | +| Page leave | `capture_pageleave: true` | Tracks when users leave pages | + +--- + +## User Identification + +The `PostHogIdentify` component (in the provider) links Clerk accounts to PostHog: + +### Properties set on every visit (`$set`) + +| Property | Source | Use | +|----------|--------|-----| +| `email` | Clerk | Contact/debugging | +| `name` | Clerk | Display | +| `is_premium` | Clerk plan check | Segment by plan | +| `plan` | `"premium"` or `"free"` | Plan-based cohorts | +| `last_seen` | Timestamp | Activity tracking | + +### Properties set once (`$set_once`) — first visit only + +| Property | Source | Use | +|----------|--------|-----| +| `signup_date` | Clerk `createdAt` | Cohort by signup date | +| `initial_referrer` | `document.referrer` | Acquisition channel | +| `initial_utm_source` | URL param | Campaign attribution | +| `initial_utm_medium` | URL param | Campaign attribution | +| `initial_utm_campaign` | URL param | Campaign attribution | + +### Group Analytics + +Users are grouped by `plan_tier` (`"premium"` or `"free"`), enabling plan-level metrics in PostHog. + +--- + +## Custom Events Reference + +### Home Page (`home-content.tsx`) + +| Event | Trigger | Properties | +|-------|---------|------------| +| `article_submitted` | URL form submit | `hostname`, `article_url` | +| `url_validation_error` | Invalid URL entered | `error_message` | + +### Article Reader (`proxy-content.tsx`) + +| Event | Trigger | Properties | +|-------|---------|------------| +| `article_loaded` | Article data received | `source`, `article_title`, `article_url`, `hostname` | +| `article_error` | Fetch failed | `error_message`, `article_url`, `hostname` | +| `chat_opened` | Chat panel opened | `hostname` | +| `settings_opened` | Settings drawer opened | — | +| `setting_changed` | View mode changed | `setting`, `value` | +| `ad_loaded` | New ad rotation received | `ad_count`, `brand_names`, `providers` | + +### Ad Tracking (client-side PostHog events) + +| Event | Trigger | Properties | +|-------|---------|------------| +| `ad_impression_client` | Ad enters viewport (50%+) | `placement`, `ad_index`, `brand_name`, `ad_provider` | +| `ad_click_client` | Ad link clicked | `placement`, `ad_index`, `brand_name`, `ad_provider` | +| `ad_dismiss_client` | Ad dismissed | `placement`, `ad_index`, `brand_name`, `ad_provider` | + +**Ad Placements:** + +| Placement | Index | Location | +|-----------|-------|----------| +| `sidebar` | 0 | Fixed bottom-right (desktop) | +| `inline` | 1 | Mid-article | +| `footer` | 2 | End of article | +| `chat_header` | 3 | Top of chat panel | +| `micro` | 4 | Below chat input | +| `mobile_bottom` | 0 | Fixed above bottom bar (mobile) | +| `mobile_chat_header` | varies | Chat header (mobile) | +| `mobile_chat_inline` | varies | Inside mobile chat | +| `chat_inline` | varies | Chat panel (desktop, reuses inline/footer) | +| `home` | 0 | Home page | + +### Chat (`article-chat.tsx`) + +| Event | Trigger | Properties | +|-------|---------|------------| +| `chat_message_sent` | User sends message | `message_length`, `language` | +| `chat_suggestion_clicked` | Suggestion chip tapped | `suggestion_text` | +| `chat_message_copied` | Copy button on response | `message_length` | +| `chat_cleared` | Clear chat clicked | `message_count` | + +### Share (`share-button.tsx`) + +| Event | Trigger | Properties | +|-------|---------|------------| +| `article_shared` | Any share action | `method`: `copy_link` / `native` / `x_twitter` / `linkedin` / `reddit` | + +### Highlights (`highlight-toolbar.tsx`, `export-highlights.tsx`) + +| Event | Trigger | Properties | +|-------|---------|------------| +| `highlight_created` | Text highlighted | `text_length`, `color` | +| `highlights_exported` | Export copy/download | `format`, `method` (`copy`/`download`), `highlight_count` | + +### Settings (`settings-drawer.tsx`) + +| Event | Trigger | Properties | +|-------|---------|------------| +| `setting_changed` | Theme/language/palette | `setting`, `value` | + +### TTS (`proxy-content.tsx` — TTSControls) + +| Event | Trigger | Properties | +|-------|---------|------------| +| `tts_played` | Play pressed | `voice`, `article_url` | +| `tts_paused` | Pause pressed | `article_url` | +| `tts_voice_changed` | Voice selector | `from_voice`, `to_voice` | + +### Feature Adoption (`feature_used`) + +Fires with `$set_once` (first usage date) and `$set` (last usage date) on the user's PostHog profile: + +| Feature | Trigger | Person Properties Set | +|---------|---------|----------------------| +| `tts` | First TTS play | `first_used_tts`, `last_used_tts` | +| `chat` | First chat message / open | `first_used_chat`, `last_used_chat` | +| `share` | First share action | `first_used_share`, `last_used_share` | +| `highlights` | First highlight | `first_used_highlights`, `last_used_highlights` | +| `export_highlights` | First export | `first_used_export_highlights`, `last_used_export_highlights` | + +--- + +## Server-Side Events + +### `request_event` — API Request Analytics + +Captured in `lib/posthog.ts` → `trackEvent()`. Fired for every API request via request context middleware. + +| Property | Type | Description | +|----------|------|-------------| +| `method` | string | HTTP method | +| `endpoint` | string | API endpoint | +| `status_code` | number | Response status | +| `duration_ms` | number | Total request time | +| `cache_hit` | boolean | Redis cache hit | +| `article_length` | number | Extracted content length | +| `input_tokens` | number | LLM prompt tokens | +| `output_tokens` | number | LLM completion tokens | +| `is_premium` | number | 1 = premium user | +| `error_type` | string | Error classification | +| `hostname` | string | Article domain | + +### `ad_event` — Ad Funnel Analytics + +Captured via `trackAdEvent()`. Fired from `/api/px` (client tracking) and `/api/context` (server ad requests). + +| Property | Type | Description | +|----------|------|-------------| +| `event_type` | `request` / `impression` / `click` / `dismiss` | Funnel stage | +| `status` | `filled` / `no_fill` / `premium_user` / `error` | Ad request result | +| `brand_name` | string | Advertiser brand | +| `ad_provider` | string | `zeroclick` or `gravity` | +| `placement` | string | UI slot (sidebar, inline, footer, etc.) | +| `ad_index` | number | Position in ad array (0-4) | +| `gravity_forwarded` | number | 1 = Gravity received impression (revenue) | +| `gravity_status_code` | number | Gravity's response status | +| `device_type` | string | desktop / mobile / tablet | +| `session_id` | string | Session identifier | + +### `$ai_generation` — LLM Analytics + +Captured via `trackLLMGeneration()`. Fires on every chat completion (free and premium). + +| Property | PostHog Key | Description | +|----------|-------------|-------------| +| Trace ID | `$ai_trace_id` | Links related LLM calls | +| Model | `$ai_model` | e.g., `google/gemini-2.0-flash-001` | +| Provider | `$ai_provider` | `openrouter` | +| Input tokens | `$ai_input_tokens` | Prompt token count | +| Output tokens | `$ai_output_tokens` | Completion token count | +| Latency | `$ai_latency` | Seconds (not ms) | +| Is error | `$ai_is_error` | Boolean | +| Output | `$ai_output_choices` | `[{role: "assistant", content: "..."}]` | +| Premium | `is_premium` | Boolean | +| Language | `language` | Chat language | +| Messages | `message_count` | Conversation length | + +PostHog automatically builds an **LLM Analytics dashboard** from `$ai_generation` events showing cost, latency, token usage, and error rates. + +--- + +## Ad Tracking Data Flow + +``` +User sees ad (IntersectionObserver ≥ 50%) + │ + ├── Client: track("ad_impression_client", { placement, brand_name, ... }) + │ → PostHog (client-side, for funnels/attribution) + │ + └── Client: fireImpression(ad, "sidebar", 0) + │ + ├── sendBeacon → /api/px { type: "impression", placement, adIndex, ... } + │ │ + │ ├── If Gravity: forward to impUrl (server-side) + │ │ └── Log gravity_forwarded = 1/0 (revenue assurance) + │ │ + │ └── trackAdEvent → PostHog (server-side, complete data) + │ + └── If ZeroClick: fetch → zeroclick.dev/api/v2/impressions + (client-side only, per ZeroClick docs) + +User clicks ad + │ + ├── Client: track("ad_click_client", { placement, brand_name, ... }) + │ → PostHog (client-side) + │ + ├── Client: fireClick(ad, "sidebar", 0) + │ └── sendBeacon → /api/px { type: "click", placement, adIndex } + │ └── trackAdEvent → PostHog (server-side) + │ + └── Browser navigates to ad.clickUrl + ├── ZeroClick: zero.click/{id} → advertiser + └── Gravity: trygravity.ai/... → advertiser +``` + +--- + +## PostHog Dashboards to Create + +### 1. Product Health (DAU/MAU) + +**Type:** Trends insight with formula + +1. Create a Trends insight +2. Series A: `$pageview` — unique users — daily +3. Series B: `$pageview` — unique users — monthly +4. Formula: `A / B` (ratio) +5. A healthy DAU/MAU is 20-30%+ + +### 2. New vs Returning Users + +**Type:** Lifecycle insight + +1. Create a Lifecycle insight +2. Event: `$pageview` +3. PostHog auto-segments into: New, Returning, Resurrecting, Dormant +4. Track week-over-week to see growth + +### 3. Feature Adoption + +**Type:** Trends insight + Cohorts + +1. **Trends:** Event `feature_used`, break down by `feature` property +2. **Cohorts:** Create cohorts like "Users who have `first_used_tts` is set" +3. **Stickiness:** How many days/week do users use each feature? + +### 4. Churn Rate + +**Type:** Retention insight + +1. Create a Retention insight +2. Start event: `$pageview` (first visit) +3. Return event: `$pageview` (subsequent visit) +4. Period: Weekly +5. Churn = 100% - Retention at each period + +### 5. Ad Revenue Funnel + +**Type:** Funnel insight + +1. Step 1: `ad_event` where `event_type = impression` +2. Step 2: `ad_event` where `event_type = click` +3. Break down by `placement` to see which slots convert +4. Break down by `ad_provider` to compare ZeroClick vs Gravity + +### 6. Ad Placement Performance + +**Type:** Trends insight + +1. Event: `ad_click_client`, break down by `placement` +2. Compare: sidebar vs inline vs footer vs chat_header vs micro +3. Calculate CTR: clicks / impressions per placement + +### 7. LLM Cost & Performance + +**Type:** Built-in LLM Analytics dashboard + +PostHog auto-creates this from `$ai_generation` events: +- Cost per model +- Token usage trends +- Latency distribution +- Error rates +- Premium vs free usage + +### 8. Power Users + +**Type:** Cohort + Stickiness + +1. **Stickiness insight:** `article_loaded` — how many days/week +2. **Cohort:** Users with 5+ `article_loaded` events in last 7 days +3. Cross-reference with `is_premium` to find conversion opportunities + +--- + +## Adding a New Event + +### Client-side + +1. Add event name to `AnalyticsEvent` type in `lib/hooks/use-analytics.ts` +2. Call `track("event_name", { ...props })` in the component +3. If it's a feature users adopt, also call `markFeatureUsed("feature_name")` + +```tsx +import { useAnalytics } from "@/lib/hooks/use-analytics"; + +function MyComponent() { + const { track, markFeatureUsed } = useAnalytics(); + + const handleAction = () => { + track("my_event", { some_prop: "value" }); + markFeatureUsed("my_feature"); // optional: for adoption tracking + }; +} +``` + +### Server-side + +Use `trackEvent()` for request analytics or `trackAdEvent()` for ad events: + +```typescript +import { trackEvent } from "../../lib/posthog"; + +trackEvent({ + endpoint: "/api/my-endpoint", + status_code: 200, + duration_ms: 150, + // ... other properties +}); +``` + +--- + +## Maintenance Guide + +### Weekly checks +- [ ] Check **Live Events** in PostHog — are events flowing? +- [ ] Review **Session Recordings** for UX issues +- [ ] Check **LLM Analytics** for cost spikes or error rate increases + +### Monthly checks +- [ ] Review **DAU/MAU ratio** — is it trending up? +- [ ] Check **Retention** — is churn improving? +- [ ] Review **Ad funnel** — impression-to-click rates by placement +- [ ] Check **Feature adoption** — are new features being used? +- [ ] Review **Power user cohort** — identify conversion opportunities + +### When adding features +1. Add custom events for the key interactions +2. Add `markFeatureUsed()` call for adoption tracking +3. Update this doc with the new events +4. Create a PostHog insight/dashboard for the feature + +### When removing features +1. Remove the tracking code +2. Remove event from `AnalyticsEvent` type +3. Archive related PostHog insights (don't delete — historical data stays) +4. Update this doc + +--- + +## Debugging + +### Check if events are sending +1. Open browser DevTools → Network tab +2. Filter by `posthog` or `i.posthog.com` +3. You should see batch requests every few seconds + +### Check server-side events +```bash +# In PostHog: Activity → Live Events +# Filter by event name: request_event, ad_event, $ai_generation +``` + +### HogQL queries (admin) +```typescript +import { queryPostHog } from "@/lib/posthog"; + +const results = await queryPostHog<{ count: number }>( + "SELECT count() as count FROM events WHERE event = 'ad_event' AND timestamp > now() - interval 1 day" +); +``` + +### Common issues +- **No events in PostHog:** Check `POSTHOG_API_KEY` and `POSTHOG_HOST` env vars +- **No user identification:** Check Clerk is loaded before PostHog identify runs +- **Missing ad placements:** Check `placement` property in ad events — should not be "unknown" +- **LLM events missing:** Check `$ai_generation` events — verify `onFinish` callback in chat.ts diff --git a/docs/MEMORY_LEAK_FIX.md b/docs/MEMORY_LEAK_FIX.md index ec300f84..1d6cc957 100644 --- a/docs/MEMORY_LEAK_FIX.md +++ b/docs/MEMORY_LEAK_FIX.md @@ -57,7 +57,7 @@ See `docs/ARTICLE_RACE_OPTIMIZATION.md` for full architecture details. | Resource | Limit | Notes | |----------|-------|-------| | Auth cache | 1,000 entries | LRU | -| ClickHouse buffer | 500 events | Flushed periodically | +| PostHog buffer | SDK-managed | Flushed automatically | | Rate limiter | 10,000 IPs | Sliding window | | ZeroClick clients | 50 sessions | 2-min TTL | | Response body | 25MB | `lib/safe-fetch.ts` | diff --git a/docs/MEMORY_TRACKING.md b/docs/MEMORY_TRACKING.md index 0348e918..9e4fa7af 100644 --- a/docs/MEMORY_TRACKING.md +++ b/docs/MEMORY_TRACKING.md @@ -86,7 +86,7 @@ The system tracks these bounded caches: | ZeroClick MCP Clients | 50 | `lib/zeroclick.ts` | | Session Failures | 200 | `lib/zeroclick.ts` | | Auth/Billing Cache | 1000 | `server/middleware/auth.ts` | -| ClickHouse Buffer | 500 events | `lib/clickhouse.ts` | +| PostHog Buffer | SDK-managed | `lib/posthog.ts` | | Rate Limiter IPs | 10,000 | `lib/rate-limit-memory.ts` | ## Instrumented Operations @@ -168,8 +168,8 @@ Response now includes cache stats: "zeroclick_client_cache": 12, "zeroclick_session_failures": 5, "zeroclick_orphaned": 0, - "clickhouse_buffer": 45, - "clickhouse_active_queries": 2, + "posthog_buffer": 45, + "posthog_active_queries": 2, "rate_limiter_ips": 1234, "active_operations": 3 } @@ -196,7 +196,7 @@ grep "memory_operation" logs.json | jq 'select(.duration_ms > 5000)' grep "cache_stats_snapshot" logs.json | jq '{ time: .timestamp, zc_clients: .zeroclick_client_cache, - ch_buffer: .clickhouse_buffer, + ph_buffer: .posthog_buffer, rate_ips: .rate_limiter_ips }' ``` diff --git a/docs/clickhouse-schema.sql b/docs/clickhouse-schema.sql deleted file mode 100644 index 211ba0bb..00000000 --- a/docs/clickhouse-schema.sql +++ /dev/null @@ -1,237 +0,0 @@ --- SMRY.ai Clickhouse Analytics Schema --- Run this SQL in your Clickhouse instance to set up the analytics tables - --- Create database if not exists -CREATE DATABASE IF NOT EXISTS smry_analytics; - --- Switch to the database -USE smry_analytics; - --- Main events table with MergeTree engine optimized for time-series queries -CREATE TABLE IF NOT EXISTS request_events -( - -- Identifiers - request_id String, - timestamp DateTime64(3) DEFAULT now64(3), - - -- Request metadata - method LowCardinality(String), - endpoint LowCardinality(String), -- /api/article, /api/summary - path String, - - -- Article/content context - url String, - hostname LowCardinality(String), -- nytimes.com, wsj.com, etc. - source LowCardinality(String), -- smry-fast, smry-slow, wayback - - -- Outcome metrics - outcome LowCardinality(String), -- success, error - status_code UInt16, - error_type LowCardinality(String) DEFAULT '', - error_message String DEFAULT '', - - -- Performance metrics - duration_ms UInt32, - fetch_ms UInt32 DEFAULT 0, - cache_lookup_ms UInt32 DEFAULT 0, - cache_save_ms UInt32 DEFAULT 0, - - -- Cache behavior - cache_hit UInt8 DEFAULT 0, -- 0 = miss, 1 = hit - cache_status LowCardinality(String) DEFAULT '', -- hit, miss, invalid, error - - -- Content metrics - article_length UInt32 DEFAULT 0, - article_title String DEFAULT '', - - -- AI Summary specific (for /api/summary) - summary_length UInt32 DEFAULT 0, - input_tokens UInt32 DEFAULT 0, - output_tokens UInt32 DEFAULT 0, - - -- User context - is_premium UInt8 DEFAULT 0, - client_ip String DEFAULT '', - user_agent String DEFAULT '', - - -- System health - heap_used_mb UInt16 DEFAULT 0, - heap_total_mb UInt16 DEFAULT 0, - rss_mb UInt16 DEFAULT 0, - - -- Environment - env LowCardinality(String) DEFAULT 'production', - version String DEFAULT '' -) -ENGINE = MergeTree() -PARTITION BY toYYYYMM(timestamp) -ORDER BY (hostname, source, timestamp, request_id) -TTL toDateTime(timestamp) + INTERVAL 30 DAY -- Auto-delete data older than 30 days -SETTINGS index_granularity = 8192; - --- Index for faster hostname lookups -ALTER TABLE request_events ADD INDEX idx_hostname hostname TYPE bloom_filter GRANULARITY 1; - --- Index for error filtering -ALTER TABLE request_events ADD INDEX idx_outcome outcome TYPE set(2) GRANULARITY 1; - - --- Materialized view for hourly aggregates (pre-computed for dashboard performance) -CREATE MATERIALIZED VIEW IF NOT EXISTS hourly_stats -ENGINE = SummingMergeTree() -PARTITION BY toYYYYMM(hour) -ORDER BY (hostname, source, hour) -TTL hour + INTERVAL 30 DAY -- Match raw data TTL -AS SELECT - toStartOfHour(timestamp) AS hour, - hostname, - source, - count() AS request_count, - countIf(outcome = 'success') AS success_count, - countIf(outcome = 'error') AS error_count, - countIf(cache_hit = 1) AS cache_hits, - sum(duration_ms) AS total_duration_ms, - sum(article_length) AS total_article_length -FROM request_events -GROUP BY hour, hostname, source; - - --- Materialized view for error tracking by hostname -CREATE MATERIALIZED VIEW IF NOT EXISTS error_rates -ENGINE = SummingMergeTree() -PARTITION BY toYYYYMM(hour) -ORDER BY (hostname, source, error_type, hour) -TTL hour + INTERVAL 30 DAY -- Match raw data TTL -AS SELECT - toStartOfHour(timestamp) AS hour, - hostname, - source, - error_type, - count() AS error_count -FROM request_events -WHERE outcome = 'error' -GROUP BY hour, hostname, source, error_type; - - --- ============================================================================ --- AD EVENTS TABLE - Tracks ad requests, fill rates, and performance --- ============================================================================ - -CREATE TABLE IF NOT EXISTS ad_events -( - event_id String, - timestamp DateTime64(3) DEFAULT now64(3), - - -- Request context - url String, - hostname LowCardinality(String), - article_title String DEFAULT '', - article_content_length UInt32 DEFAULT 0, - session_id String, - - -- User context - user_id String DEFAULT '', - is_premium UInt8 DEFAULT 0, - - -- Device context - device_type LowCardinality(String) DEFAULT '', -- desktop, mobile, tablet - os LowCardinality(String) DEFAULT '', -- windows, macos, ios, android - browser LowCardinality(String) DEFAULT '', -- chrome, safari, firefox - - -- Response - status LowCardinality(String), -- filled, no_fill, premium_user, gravity_error, timeout, error - gravity_status_code UInt16 DEFAULT 0, - error_message String DEFAULT '', - - -- Ad data (when filled) - brand_name LowCardinality(String) DEFAULT '', - ad_title String DEFAULT '', - - -- Performance - duration_ms UInt32 DEFAULT 0, - - -- Environment - env LowCardinality(String) DEFAULT 'production' -) -ENGINE = MergeTree() -PARTITION BY toYYYYMM(timestamp) -ORDER BY (hostname, status, timestamp, event_id) -TTL toDateTime(timestamp) + INTERVAL 90 DAY -- Keep ad data longer for analysis -SETTINGS index_granularity = 8192; - --- Index for faster status filtering -ALTER TABLE ad_events ADD INDEX idx_status status TYPE set(10) GRANULARITY 1; - --- Index for brand lookups -ALTER TABLE ad_events ADD INDEX idx_brand brand_name TYPE bloom_filter GRANULARITY 1; - - --- ============================================================================ --- USEFUL QUERIES FOR DEBUGGING AND MONITORING --- ============================================================================ - --- Check data is flowing in --- SELECT count(), max(timestamp), min(timestamp) FROM request_events; - --- Top 10 sites by error count (last 24h) --- SELECT hostname, count() as errors --- FROM request_events --- WHERE timestamp > now() - INTERVAL 24 HOUR AND outcome = 'error' --- GROUP BY hostname --- ORDER BY errors DESC --- LIMIT 10; - --- Source success rates by hostname (last 24h) --- SELECT hostname, source, --- round(countIf(outcome = 'success') / count() * 100, 2) as success_rate, --- count() as total --- FROM request_events --- WHERE timestamp > now() - INTERVAL 24 HOUR --- GROUP BY hostname, source --- HAVING total >= 5 --- ORDER BY hostname, success_rate DESC; - --- Memory usage over time (for leak detection) --- SELECT toStartOfMinute(timestamp) as minute, --- avg(heap_used_mb) as avg_heap, --- max(heap_used_mb) as max_heap, --- avg(rss_mb) as avg_rss --- FROM request_events --- WHERE timestamp > now() - INTERVAL 1 HOUR --- GROUP BY minute --- ORDER BY minute; - --- Cache hit rate by endpoint --- SELECT endpoint, --- round(countIf(cache_hit = 1) / count() * 100, 2) as cache_hit_rate, --- count() as total --- FROM request_events --- WHERE timestamp > now() - INTERVAL 24 HOUR --- GROUP BY endpoint; - - --- ============================================================================ --- MEMORY MANAGEMENT --- ============================================================================ --- --- Built-in safeguards: --- 1. TTL (30 days) - auto-deletes old data via background merges --- 2. LowCardinality columns - reduces memory for repeated strings (hostname, source, etc) --- 3. Monthly partitioning - enables efficient partition drops --- 4. Compression enabled client-side --- --- Monitor disk usage: --- SELECT database, table, formatReadableSize(sum(bytes)) as size --- FROM system.parts --- WHERE active --- GROUP BY database, table; --- --- Manual partition cleanup (if needed): --- ALTER TABLE request_events DROP PARTITION '202501'; --- --- Check TTL progress: --- SELECT table, formatReadableSize(sum(bytes)) as size, --- min(min_date), max(max_date) --- FROM system.parts --- WHERE database = 'smry_analytics' AND active --- GROUP BY table; diff --git a/docs/mcp-client-memory-fix.md b/docs/mcp-client-memory-fix.md index dda75615..505ee663 100644 --- a/docs/mcp-client-memory-fix.md +++ b/docs/mcp-client-memory-fix.md @@ -14,7 +14,7 @@ Memory climbed from ~1GB to 4GB over 18 hours, then dropped to 378MB on rebuild/ |-----------|------------|----------| | `sessionFailures` Map | ~30 KB (200 entries × 150 bytes) | Too small for 4GB | | `clientCache` | ~75 MB (50 clients × 1.5 MB) | Bounded with eviction | -| ClickHouse buffers | ~1.5 MB | Bounded at 500 events | +| PostHog buffers | SDK-managed | PostHog SDK handles batching | | Rate limiter | ~1.5 MB | Bounded at 10,000 IPs | | Auth cache | ~100 KB | Bounded at 1,000 entries | diff --git a/docs/memory-management.md b/docs/memory-management.md index 813c7302..ce335b1d 100644 --- a/docs/memory-management.md +++ b/docs/memory-management.md @@ -22,23 +22,19 @@ Memory-intensive operations: - Article fetching and parsing (Diffbot) - Ad requests (Gravity + ZeroClick waterfall) - Chat streaming (LLM responses) -- Analytics buffering (ClickHouse) +- Analytics (PostHog SDK manages its own buffer) --- ## Memory-Safe Components -### 1. ClickHouse Event Buffer -**Location:** `lib/clickhouse.ts` +### 1. PostHog Event Buffer +**Location:** `lib/posthog.ts` -```typescript -const MAX_BUFFER_SIZE = 500; // Max events before forced flush -const FLUSH_INTERVAL_MS = 5000; // Auto-flush every 5 seconds -``` - -- Events buffered in memory, flushed to ClickHouse periodically -- Oldest events dropped if buffer overflows -- Timer uses `.unref()` to not block process exit +PostHog SDK handles batching internally: +- `flushAt: 50` events triggers a batch send +- `flushInterval: 5000ms` auto-flush every 5 seconds +- SDK manages retries and connection pooling ### 2. Auth Billing Cache **Location:** `server/middleware/auth.ts` @@ -345,7 +341,7 @@ The `/health` endpoint returns memory stats: | ZeroClick Clients | 100 | 5 min | 1 min | | Auth Cache | 1000 | 5 min | 1 min | | Rate Limiter | 10,000 | varies | 1 min | -| ClickHouse Buffer | 500 events | - | 5 sec flush | +| PostHog Buffer | SDK-managed | - | 5 sec flush | | Article htmlContent | 500KB | - | per-response truncation | --- @@ -354,6 +350,6 @@ The `/health` endpoint returns memory stats: - `lib/memory-monitor.ts` — Memory monitoring and alerts - `lib/zeroclick.ts` — ZeroClick client pool -- `lib/clickhouse.ts` — Event buffering +- `lib/posthog.ts` — PostHog analytics client - `lib/rate-limit-memory.ts` — Rate limiting - `server/middleware/auth.ts` — Billing cache diff --git a/exploration.md b/exploration.md index 0b38adc0..39e94806 100644 --- a/exploration.md +++ b/exploration.md @@ -33,7 +33,7 @@ Contacts support OR leaves ## Analytics Insights -From our Clickhouse analytics (49K requests over 7 days): +From our PostHog analytics (49K requests over 7 days): | Metric | Value | |--------|-------| @@ -76,7 +76,7 @@ Show sources trying one at a time like a progress indicator. Pre-compute which sources work for which domains. Skip sources that never work. **Rejected because:** Too complex. Requires: -- Clickhouse aggregation jobs +- PostHog aggregation jobs - Redis caching - Exploration/exploitation logic - Anomaly detection for stale data @@ -194,7 +194,7 @@ Update manually when patterns emerge in analytics. No automation needed. We explicitly decided NOT to implement: - ❌ Domain intelligence caching in Redis -- ❌ Clickhouse aggregation jobs for source success rates +- ❌ PostHog aggregation jobs for source success rates - ❌ Exploration/exploitation routing (10% explore, 90% exploit) - ❌ Anomaly detection for stale cached data - ❌ Decay functions for confidence over time diff --git a/lib/MEMORY_LEAK_INVESTIGATION.md b/lib/MEMORY_LEAK_INVESTIGATION.md index 05f98420..0df40dbe 100644 --- a/lib/MEMORY_LEAK_INVESTIGATION.md +++ b/lib/MEMORY_LEAK_INVESTIGATION.md @@ -84,12 +84,12 @@ Every GC run is logged: } ``` -Also tracked in ClickHouse with `GC_INEFFECTIVE` error type if it only frees <=10MB. +Also tracked in PostHog with `GC_INEFFECTIVE` error type if it only frees <=10MB. ### 3. Critical Threshold Protection If RSS exceeds 1.5GB: -1. Log to ClickHouse for post-mortem +1. Log to PostHog for post-mortem 2. Force process exit after 1 second 3. Let Railway restart the service cleanly @@ -108,7 +108,7 @@ railway logs --service smry-api --filter "critical_rss_spike" railway logs --service smry-api --filter "critical_memory_exceeded" ``` -### ClickHouse Queries +### PostHog Queries ```sql -- GC effectiveness over time @@ -167,7 +167,7 @@ https://github.com/oven-sh/bun/issues ## Files Involved - `lib/memory-monitor.ts` - Memory monitoring, GC forcing, threshold detection -- `lib/clickhouse.ts` - Analytics event tracking +- `lib/posthog.ts` - Analytics event tracking - `Dockerfile.api` - Bun runtime configuration - `lib/api/diffbot.ts` - Heavy fetch usage (Diffbot API) - `server/routes/article.ts` - Heavy fetch usage (direct HTML) diff --git a/lib/alerting.ts b/lib/alerting.ts index 7b9ede4b..a511b955 100644 --- a/lib/alerting.ts +++ b/lib/alerting.ts @@ -1,11 +1,11 @@ /** * Error Rate Alerting * - * Monitors ClickHouse for error rate spikes and sends alerts via inbound.new. + * Monitors PostHog for error rate spikes and sends alerts via inbound.new. * Runs on a cron schedule from the Elysia server. */ -import { queryClickhouse } from "./clickhouse"; +import { queryPostHog } from "./posthog"; import { sendAlertEmail } from "./emails"; import { env } from "../server/env"; @@ -38,15 +38,16 @@ interface TopError { async function getErrorRateStats(): Promise { const query = ` SELECT - countIf(outcome = 'error' AND timestamp > now() - INTERVAL 5 MINUTE) as recent_errors, + countIf(properties.outcome = 'error' AND timestamp > now() - INTERVAL 5 MINUTE) as recent_errors, countIf(timestamp > now() - INTERVAL 5 MINUTE) as recent_total, - countIf(outcome = 'error' AND timestamp <= now() - INTERVAL 5 MINUTE AND timestamp > now() - INTERVAL 1 HOUR) as baseline_errors, + countIf(properties.outcome = 'error' AND timestamp <= now() - INTERVAL 5 MINUTE AND timestamp > now() - INTERVAL 1 HOUR) as baseline_errors, countIf(timestamp <= now() - INTERVAL 5 MINUTE AND timestamp > now() - INTERVAL 1 HOUR) as baseline_total - FROM request_events - WHERE timestamp > now() - INTERVAL 1 HOUR + FROM events + WHERE event = 'request_event' + AND timestamp > now() - INTERVAL 1 HOUR `; - const results = await queryClickhouse<{ + const results = await queryPostHog<{ recent_errors: number; recent_total: number; baseline_errors: number; @@ -72,18 +73,19 @@ async function getErrorRateStats(): Promise { async function getTopRecentErrors(): Promise { const query = ` SELECT - error_type, - error_message, + properties.error_type as error_type, + properties.error_message as error_message, count() as count - FROM request_events - WHERE timestamp > now() - INTERVAL 5 MINUTE - AND outcome = 'error' + FROM events + WHERE event = 'request_event' + AND timestamp > now() - INTERVAL 5 MINUTE + AND properties.outcome = 'error' GROUP BY error_type, error_message ORDER BY count DESC LIMIT 5 `; - return queryClickhouse(query); + return queryPostHog(query); } /** @@ -93,7 +95,7 @@ export async function checkErrorRateAndAlert(): Promise { try { const stats = await getErrorRateStats(); if (!stats) { - console.log("[alerting] No data from ClickHouse"); + console.log("[alerting] No data from PostHog"); return; } diff --git a/lib/article-concurrency.ts b/lib/article-concurrency.ts index 54de5689..d6afe97b 100644 --- a/lib/article-concurrency.ts +++ b/lib/article-concurrency.ts @@ -6,7 +6,7 @@ * At 100+ concurrent users with cache misses, the /api/article/auto endpoint * would otherwise spawn 300 simultaneous connections (3 per user). * - * Modeled on the ClickHouse acquireQuerySlot/releaseQuerySlot pattern. + * Modeled on the acquireQuerySlot/releaseQuerySlot pattern. */ let maxConcurrentFetches = 50; diff --git a/lib/clickhouse.ts b/lib/clickhouse.ts deleted file mode 100644 index debb1d6b..00000000 --- a/lib/clickhouse.ts +++ /dev/null @@ -1,793 +0,0 @@ -import { createClient, ClickHouseClient } from "@clickhouse/client"; -import { env } from "../server/env"; - -/** - * Clickhouse Analytics Client - * - * Memory-safe implementation following the same patterns as: - * - redis.ts (module-level singleton) - * - summary/route.ts rate limiters (singleton to prevent memory leaks) - * - * Key memory safeguards: - * 1. Module-level singleton client (not per-request) - * 2. Bounded event buffer (max 500 events) - * 3. Automatic flush every 5 seconds - * 4. Fire-and-forget writes (non-blocking) - * 5. Graceful degradation when Clickhouse not configured - */ - -// Module-level singleton - created once at module load -let client: ClickHouseClient | null = null; -// Track if ClickHouse is unavailable (connection failed) to prevent repeated attempts -let clickhouseDisabled = false; -let lastConnectionAttempt = 0; -const CONNECTION_RETRY_INTERVAL_MS = 60_000; // Retry connection check every 60 seconds - -function getClient(): ClickHouseClient | null { - // Skip if we've determined ClickHouse is unavailable - // Allow retry after CONNECTION_RETRY_INTERVAL_MS - if (clickhouseDisabled) { - const now = Date.now(); - if (now - lastConnectionAttempt < CONNECTION_RETRY_INTERVAL_MS) { - return null; - } - // Reset to allow retry - clickhouseDisabled = false; - } - - if (!client) { - client = createClient({ - url: env.CLICKHOUSE_URL, - username: env.CLICKHOUSE_USER, - password: env.CLICKHOUSE_PASSWORD, - database: env.CLICKHOUSE_DATABASE, - // Reduced from 30s — fail faster so queued queries don't cascade - request_timeout: 10_000, - compression: { - request: true, - response: true, - }, - // Keep-alive to reduce connection overhead - keep_alive: { - enabled: true, - }, - }); - } - return client; -} - -/** - * Mark ClickHouse as temporarily disabled due to connection failure. - * Drains the query queue so waiting queries fail fast instead of - * blocking for up to QUERY_SLOT_TIMEOUT_MS. - */ -function disableClickhouse(reason: string): void { - if (!clickhouseDisabled) { - console.warn(`[clickhouse] Disabled due to connection failure: ${reason}. Will retry in ${CONNECTION_RETRY_INTERVAL_MS / 1000}s`); - clickhouseDisabled = true; - lastConnectionAttempt = Date.now(); - - // Destroy the stale client so the next retry creates a fresh connection - if (client) { - client.close().catch(() => {}); - client = null; - } - // Reset schema flags so they re-run on reconnect - schemaMigrated = false; - adSchemaMigrated = false; - - // Drain the query queue — reject all waiting queries immediately - const queuedCount = queryQueue.length; - while (queryQueue.length > 0) { - const queued = queryQueue.shift()!; - queued.reject(new Error("ClickHouse disabled - connection failure")); - } - if (queuedCount > 0) { - console.warn(`[clickhouse] Drained ${queuedCount} queued queries`); - } - } -} - -// Analytics event type matching our Clickhouse schema -// Error severity levels for distinguishing expected vs unexpected errors -export type ErrorSeverity = "expected" | "degraded" | "unexpected" | ""; - -export interface AnalyticsEvent { - request_id: string; - timestamp: string; - method: string; - endpoint: string; - path: string; - url: string; - hostname: string; - source: string; - outcome: string; - status_code: number; - error_type: string; - error_message: string; - error_severity: ErrorSeverity; - // Upstream error info - which host/service actually caused the error - upstream_hostname: string; - upstream_status_code: number; - upstream_error_code: string; - upstream_message: string; - duration_ms: number; - fetch_ms: number; - cache_lookup_ms: number; - cache_save_ms: number; - cache_hit: number; - cache_status: string; - article_length: number; - article_title: string; - summary_length: number; - input_tokens: number; - output_tokens: number; - is_premium: number; - client_ip: string; - user_agent: string; - heap_used_mb: number; - heap_total_mb: number; - rss_mb: number; - env: string; - version: string; -} - -// ============================================================================= -// Ad Event Tracking -// ============================================================================= - -// Ad event status - matches ContextResponseStatus in types/api.ts -export type AdEventStatus = "filled" | "no_fill" | "premium_user" | "gravity_error" | "timeout" | "error"; - -// Event type for tracking funnel: request -> impression -> click/dismiss -export type AdEventType = "request" | "impression" | "click" | "dismiss"; - -export interface AdEvent { - event_id: string; - timestamp: string; - // Event type (request, impression, click, dismiss) - event_type: AdEventType; - // Request context - url: string; - hostname: string; - article_title: string; - article_content_length: number; - session_id: string; - // User context - user_id: string; - is_premium: number; - // Device context - device_type: string; - os: string; - browser: string; - // Response - status: AdEventStatus; - gravity_status_code: number; - error_message: string; - // Gravity forwarding status (for impressions) - // 1 = successfully forwarded to Gravity, 0 = failed or not applicable - gravity_forwarded: number; - // Ad data (when filled) - brand_name: string; - ad_title: string; - ad_text: string; - click_url: string; - imp_url: string; - cta: string; - favicon: string; - ad_count: number; // Number of ads returned in this request - // Performance - duration_ms: number; - // Environment - env: string; -} - -// Separate buffer for ad events -const adEventBuffer: AdEvent[] = []; -let adFlushTimer: NodeJS.Timeout | null = null; -let adSchemaMigrated = false; - -/** - * Ensure ad_events table exists - */ -async function ensureAdSchema(): Promise { - if (adSchemaMigrated) return; - - const clickhouse = getClient(); - if (!clickhouse) return; - - try { - await clickhouse.command({ - query: ` - CREATE TABLE IF NOT EXISTS ad_events - ( - event_id String, - timestamp DateTime64(3) DEFAULT now64(3), - -- Event type (request, impression, click, dismiss) - event_type LowCardinality(String) DEFAULT 'request', - -- Request context - url String, - hostname LowCardinality(String), - article_title String DEFAULT '', - article_content_length UInt32 DEFAULT 0, - session_id String, - -- User context - user_id String DEFAULT '', - is_premium UInt8 DEFAULT 0, - -- Device context - device_type LowCardinality(String) DEFAULT '', - os LowCardinality(String) DEFAULT '', - browser LowCardinality(String) DEFAULT '', - -- Response - status LowCardinality(String), - gravity_status_code UInt16 DEFAULT 0, - error_message String DEFAULT '', - -- Gravity forwarding status (for impressions) - -- 1 = successfully forwarded to Gravity, 0 = failed or not applicable - gravity_forwarded UInt8 DEFAULT 0, - -- Ad data (when filled) - brand_name LowCardinality(String) DEFAULT '', - ad_title String DEFAULT '', - ad_text String DEFAULT '', - click_url String DEFAULT '', - imp_url String DEFAULT '', - cta LowCardinality(String) DEFAULT '', - favicon String DEFAULT '', - ad_count UInt8 DEFAULT 0, - -- Performance - duration_ms UInt32 DEFAULT 0, - -- Environment - env LowCardinality(String) DEFAULT 'production' - ) - ENGINE = MergeTree() - PARTITION BY toYYYYMM(timestamp) - ORDER BY (hostname, event_type, status, timestamp, event_id) - TTL toDateTime(timestamp) + INTERVAL 90 DAY - SETTINGS index_granularity = 8192 - `, - }); - - // Add new columns for existing tables (safe migration) - try { - await clickhouse.command({ - query: `ALTER TABLE ad_events ADD COLUMN IF NOT EXISTS event_type LowCardinality(String) DEFAULT 'request'`, - }); - await clickhouse.command({ - query: `ALTER TABLE ad_events ADD COLUMN IF NOT EXISTS ad_text String DEFAULT ''`, - }); - await clickhouse.command({ - query: `ALTER TABLE ad_events ADD COLUMN IF NOT EXISTS click_url String DEFAULT ''`, - }); - await clickhouse.command({ - query: `ALTER TABLE ad_events ADD COLUMN IF NOT EXISTS imp_url String DEFAULT ''`, - }); - await clickhouse.command({ - query: `ALTER TABLE ad_events ADD COLUMN IF NOT EXISTS cta LowCardinality(String) DEFAULT ''`, - }); - await clickhouse.command({ - query: `ALTER TABLE ad_events ADD COLUMN IF NOT EXISTS favicon String DEFAULT ''`, - }); - await clickhouse.command({ - query: `ALTER TABLE ad_events ADD COLUMN IF NOT EXISTS ad_count UInt8 DEFAULT 0`, - }); - // Track whether impression was successfully forwarded to Gravity (for billing) - await clickhouse.command({ - query: `ALTER TABLE ad_events ADD COLUMN IF NOT EXISTS gravity_forwarded UInt8 DEFAULT 0`, - }); - } catch { - // Ignore errors - columns may already exist - } - - // Create materialized views for ad analytics performance - try { - // Hourly ad metrics materialized view - await clickhouse.command({ - query: ` - CREATE MATERIALIZED VIEW IF NOT EXISTS ad_hourly_metrics_mv - ENGINE = SummingMergeTree() - PARTITION BY toYYYYMM(hour) - ORDER BY (hour, device_type, browser) - AS SELECT - toStartOfHour(timestamp) AS hour, - if(device_type = '', 'unknown', device_type) AS device_type, - if(browser = '', 'unknown', browser) AS browser, - countIf(event_type = 'request' AND status = 'filled') AS filled_count, - countIf(event_type = 'impression') AS impression_count, - countIf(event_type = 'click') AS click_count, - countIf(event_type = 'dismiss') AS dismiss_count, - uniqState(session_id) AS unique_sessions_state - FROM ad_events - GROUP BY hour, device_type, browser - `, - }); - - // CTR by hour of day materialized view - await clickhouse.command({ - query: ` - CREATE MATERIALIZED VIEW IF NOT EXISTS ad_ctr_by_hour_mv - ENGINE = SummingMergeTree() - PARTITION BY toYYYYMM(date) - ORDER BY (date, hour_of_day, device_type) - AS SELECT - toDate(timestamp) AS date, - toHour(timestamp) AS hour_of_day, - if(device_type = '', 'unknown', device_type) AS device_type, - countIf(event_type = 'impression') AS impressions, - countIf(event_type = 'click') AS clicks, - countIf(event_type = 'request' AND status = 'filled') AS filled, - countIf(event_type = 'request' AND status != 'premium_user') AS requests - FROM ad_events - GROUP BY date, hour_of_day, device_type - `, - }); - - console.log("[clickhouse] Ad materialized views created"); - } catch { - // Ignore errors - views may already exist - } - - adSchemaMigrated = true; - console.log("[clickhouse] Ad events schema migration complete"); - } catch (error) { - const message = error instanceof Error ? error.message : String(error); - if (message.includes("ECONNREFUSED") || message.includes("ENOTFOUND") || message.includes("ETIMEDOUT") || message.includes("Timeout") || message.includes("Authentication failed")) { - disableClickhouse(message); - } else { - console.error("[clickhouse] Ad schema migration failed:", message); - } - } -} - -/** - * Flush ad events to ClickHouse - */ -async function flushAdEvents(): Promise { - if (adEventBuffer.length === 0) return; - - const clickhouse = getClient(); - if (!clickhouse) return; - - await ensureAdSchema(); - - const events = adEventBuffer.splice(0, adEventBuffer.length); - - try { - await clickhouse.insert({ - table: "ad_events", - values: events, - format: "JSONEachRow", - }); - } catch (error) { - const message = error instanceof Error ? error.message : String(error); - if (message.includes("ECONNREFUSED") || message.includes("ENOTFOUND") || message.includes("ETIMEDOUT") || message.includes("Timeout") || message.includes("Authentication failed")) { - disableClickhouse(message); - } else { - console.error("[clickhouse] Ad events flush failed:", message); - } - } -} - -/** - * Schedule ad events flush - */ -function scheduleAdFlush(): void { - if (adFlushTimer) return; - adFlushTimer = setTimeout(async () => { - adFlushTimer = null; - await flushAdEvents(); - }, FLUSH_INTERVAL_MS); - adFlushTimer.unref(); -} - -/** - * Track an ad event - */ -export function trackAdEvent(event: Partial): void { - const rawTimestamp = event.timestamp || new Date().toISOString(); - const clickhouseTimestamp = rawTimestamp.replace("T", " ").replace("Z", ""); - - const fullEvent: AdEvent = { - event_id: event.event_id || crypto.randomUUID(), - timestamp: clickhouseTimestamp, - event_type: event.event_type || "request", - url: event.url || "", - hostname: event.hostname || "", - article_title: (event.article_title || "").slice(0, 500), - article_content_length: event.article_content_length || 0, - session_id: event.session_id || "", - user_id: event.user_id || "", - is_premium: event.is_premium || 0, - device_type: event.device_type || "", - os: event.os || "", - browser: event.browser || "", - status: event.status || "error", - gravity_status_code: event.gravity_status_code || 0, - error_message: (event.error_message || "").slice(0, 500), - gravity_forwarded: event.gravity_forwarded || 0, - brand_name: event.brand_name || "", - ad_title: (event.ad_title || "").slice(0, 500), - ad_text: (event.ad_text || "").slice(0, 1000), - click_url: (event.click_url || "").slice(0, 2000), - imp_url: (event.imp_url || "").slice(0, 2000), - cta: (event.cta || "").slice(0, 100), - favicon: (event.favicon || "").slice(0, 500), - ad_count: event.ad_count || 0, - duration_ms: event.duration_ms || 0, - env: event.env || env.NODE_ENV, - }; - - if (adEventBuffer.length >= MAX_BUFFER_SIZE) { - adEventBuffer.shift(); - } - - adEventBuffer.push(fullEvent); - - if (adEventBuffer.length >= BATCH_SIZE) { - flushAdEvents().catch(() => {}); - } else { - scheduleAdFlush(); - } -} - -// ============================================================================= -// Request Event Tracking -// ============================================================================= - -// MEMORY SAFETY: Bounded buffer with strict max size -const MAX_BUFFER_SIZE = 500; -const BATCH_SIZE = 50; -const FLUSH_INTERVAL_MS = 5000; - -// CONCURRENCY CONTROL: Limit concurrent queries to prevent thread exhaustion -// ClickHouse has limited threads (typically 28), so we limit concurrent queries -// Admin dashboard runs ~39 queries in parallel, so we need enough slots -const MAX_CONCURRENT_QUERIES = 15; -const QUERY_SLOT_TIMEOUT_MS = 30_000; // 30s timeout waiting for slot -let activeQueries = 0; -const queryQueue: Array<{ - resolve: () => void; - reject: (err: Error) => void; -}> = []; - -async function acquireQuerySlot(): Promise { - if (activeQueries < MAX_CONCURRENT_QUERIES) { - activeQueries++; - return; - } - // Wait for a slot to become available (with timeout) - return new Promise((resolve, reject) => { - const timeout = setTimeout(() => { - const idx = queryQueue.findIndex((q) => q.resolve === wrappedResolve); - if (idx !== -1) queryQueue.splice(idx, 1); - reject(new Error("Query slot timeout - too many concurrent queries")); - }, QUERY_SLOT_TIMEOUT_MS); - - const wrappedResolve = () => { - clearTimeout(timeout); - resolve(); - }; - - queryQueue.push({ resolve: wrappedResolve, reject }); - }); -} - -function releaseQuerySlot(): void { - activeQueries--; - const next = queryQueue.shift(); - if (next) { - activeQueries++; - next.resolve(); - } -} - -const eventBuffer: AnalyticsEvent[] = []; -let flushTimer: NodeJS.Timeout | null = null; -let isInitialized = false; -let schemaMigrated = false; - -/** - * Auto-migrate schema on first use - * Creates database and table if they don't exist - */ -async function ensureSchema(): Promise { - if (schemaMigrated) return; - - const clickhouse = getClient(); - if (!clickhouse) return; - - try { - // Create database if not exists - await clickhouse.command({ - query: `CREATE DATABASE IF NOT EXISTS ${env.CLICKHOUSE_DATABASE}`, - }); - - // Create main events table - await clickhouse.command({ - query: ` - CREATE TABLE IF NOT EXISTS request_events - ( - request_id String, - timestamp DateTime64(3) DEFAULT now64(3), - method LowCardinality(String), - endpoint LowCardinality(String), - path String, - url String, - hostname LowCardinality(String), - source LowCardinality(String), - outcome LowCardinality(String), - status_code UInt16, - error_type LowCardinality(String) DEFAULT '', - error_message String DEFAULT '', - error_severity LowCardinality(String) DEFAULT '', - upstream_hostname LowCardinality(String) DEFAULT '', - upstream_status_code UInt16 DEFAULT 0, - upstream_error_code LowCardinality(String) DEFAULT '', - upstream_message String DEFAULT '', - duration_ms UInt32, - fetch_ms UInt32 DEFAULT 0, - cache_lookup_ms UInt32 DEFAULT 0, - cache_save_ms UInt32 DEFAULT 0, - cache_hit UInt8 DEFAULT 0, - cache_status LowCardinality(String) DEFAULT '', - article_length UInt32 DEFAULT 0, - article_title String DEFAULT '', - summary_length UInt32 DEFAULT 0, - input_tokens UInt32 DEFAULT 0, - output_tokens UInt32 DEFAULT 0, - is_premium UInt8 DEFAULT 0, - client_ip String DEFAULT '', - user_agent String DEFAULT '', - heap_used_mb UInt16 DEFAULT 0, - heap_total_mb UInt16 DEFAULT 0, - rss_mb UInt16 DEFAULT 0, - env LowCardinality(String) DEFAULT 'production', - version String DEFAULT '' - ) - ENGINE = MergeTree() - PARTITION BY toYYYYMM(timestamp) - ORDER BY (hostname, source, timestamp, request_id) - TTL toDateTime(timestamp) + INTERVAL 30 DAY - SETTINGS index_granularity = 8192 - `, - }); - - // Add new upstream columns to existing tables (safe for already-existing tables) - try { - await clickhouse.command({ - query: `ALTER TABLE request_events ADD COLUMN IF NOT EXISTS upstream_hostname LowCardinality(String) DEFAULT ''`, - }); - await clickhouse.command({ - query: `ALTER TABLE request_events ADD COLUMN IF NOT EXISTS upstream_status_code UInt16 DEFAULT 0`, - }); - await clickhouse.command({ - query: `ALTER TABLE request_events ADD COLUMN IF NOT EXISTS upstream_error_code LowCardinality(String) DEFAULT ''`, - }); - await clickhouse.command({ - query: `ALTER TABLE request_events ADD COLUMN IF NOT EXISTS upstream_message String DEFAULT ''`, - }); - } catch { - // Ignore errors - columns may already exist - } - - schemaMigrated = true; - console.log("[clickhouse] Schema migration complete"); - } catch (error) { - const message = error instanceof Error ? error.message : String(error); - // Check for connection errors and disable to prevent spam - if (message.includes("ECONNREFUSED") || message.includes("ENOTFOUND") || message.includes("ETIMEDOUT") || message.includes("Timeout") || message.includes("Authentication failed")) { - disableClickhouse(message); - } else { - // Log other errors but don't disable - might be transient - console.error("[clickhouse] Schema migration failed:", message); - } - } -} - -/** - * Flush events to Clickhouse - * Non-blocking, errors are logged but never thrown - */ -async function flushEvents(): Promise { - if (eventBuffer.length === 0) return; - - const clickhouse = getClient(); - if (!clickhouse) return; - - // Ensure schema exists before first insert - await ensureSchema(); - - // Splice out events atomically to prevent duplicates - const events = eventBuffer.splice(0, eventBuffer.length); - - try { - await clickhouse.insert({ - table: "request_events", - values: events, - format: "JSONEachRow", - }); - } catch (error) { - const message = error instanceof Error ? error.message : String(error); - // Check for connection errors and disable to prevent spam - if (message.includes("ECONNREFUSED") || message.includes("ENOTFOUND") || message.includes("ETIMEDOUT") || message.includes("Timeout") || message.includes("Authentication failed")) { - disableClickhouse(message); - } else { - // Log other errors but don't disable - might be transient - console.error("[clickhouse] Flush failed:", message); - } - // Don't push events back - prevents infinite memory growth on persistent errors - } -} - -/** - * Schedule a flush if not already scheduled - */ -function scheduleFlush(): void { - if (flushTimer) return; - flushTimer = setTimeout(async () => { - flushTimer = null; - await flushEvents(); - }, FLUSH_INTERVAL_MS); - // Unref the timer so it doesn't keep the process alive - flushTimer.unref(); -} - -/** - * Track an analytics event - * - * Memory-safe guarantees: - * - Non-blocking (fire-and-forget) - * - Bounded buffer (drops oldest events if full) - * - No promise rejection - */ -export function trackEvent(event: Partial): void { - // Build full event with defaults - // Convert ISO timestamp to Clickhouse-compatible format (remove 'T' and 'Z') - const rawTimestamp = event.timestamp || new Date().toISOString(); - const clickhouseTimestamp = rawTimestamp.replace("T", " ").replace("Z", ""); - - const fullEvent: AnalyticsEvent = { - request_id: event.request_id || "", - timestamp: clickhouseTimestamp, - method: event.method || "", - endpoint: event.endpoint || "", - path: event.path || "", - url: event.url || "", - hostname: event.hostname || "", - source: event.source || "", - outcome: event.outcome || "", - status_code: event.status_code || 0, - error_type: event.error_type || "", - error_message: event.error_message || "", - error_severity: event.error_severity || "", - upstream_hostname: event.upstream_hostname || "", - upstream_status_code: event.upstream_status_code || 0, - upstream_error_code: event.upstream_error_code || "", - upstream_message: (event.upstream_message || "").slice(0, 500), // Truncate - duration_ms: event.duration_ms || 0, - fetch_ms: event.fetch_ms || 0, - cache_lookup_ms: event.cache_lookup_ms || 0, - cache_save_ms: event.cache_save_ms || 0, - cache_hit: event.cache_hit || 0, - cache_status: event.cache_status || "", - article_length: event.article_length || 0, - article_title: (event.article_title || "").slice(0, 500), // Truncate to prevent large strings - summary_length: event.summary_length || 0, - input_tokens: event.input_tokens || 0, - output_tokens: event.output_tokens || 0, - is_premium: event.is_premium || 0, - client_ip: event.client_ip || "", - user_agent: (event.user_agent || "").slice(0, 500), // Truncate - heap_used_mb: event.heap_used_mb || 0, - heap_total_mb: event.heap_total_mb || 0, - rss_mb: event.rss_mb || 0, - env: event.env || env.NODE_ENV, - version: event.version || process.env.npm_package_version || "unknown", - }; - - // MEMORY SAFETY: Drop oldest events if buffer is at capacity - if (eventBuffer.length >= MAX_BUFFER_SIZE) { - eventBuffer.shift(); - } - - eventBuffer.push(fullEvent); - - // Flush immediately if buffer hits batch size - if (eventBuffer.length >= BATCH_SIZE) { - // Fire-and-forget flush - flushEvents().catch(() => {}); - } else { - scheduleFlush(); - } -} - -/** - * Query helper for dashboard - * Returns empty array on error (graceful degradation) - * Uses semaphore to limit concurrent queries and prevent thread exhaustion - */ -export async function queryClickhouse(query: string): Promise { - const clickhouse = getClient(); - if (!clickhouse) return []; - - let slotAcquired = false; - - try { - // Acquire a query slot (may wait if at capacity) - await acquireQuerySlot(); - slotAcquired = true; - - // Re-check: ClickHouse may have been disabled while we waited for a slot - if (clickhouseDisabled) { - return []; - } - - const result = await clickhouse.query({ - query, - format: "JSONEachRow", - }); - return result.json(); - } catch (error) { - const message = error instanceof Error ? error.message : String(error); - // Check for connection errors and disable to prevent spam - if (message.includes("ECONNREFUSED") || message.includes("ENOTFOUND") || message.includes("ETIMEDOUT") || message.includes("Timeout") || message.includes("Authentication failed")) { - disableClickhouse(message); - } else if (message.includes("Query slot timeout") || message.includes("ClickHouse disabled")) { - // Silently return empty — these are expected during outages - } else { - console.error("[clickhouse] Query failed:", message); - } - return []; - } finally { - // Only release the slot if we actually acquired one - if (slotAcquired) { - releaseQuerySlot(); - } - } -} - -/** - * Get buffer and query stats for monitoring - */ -export function getBufferStats(): { - size: number; - maxSize: number; - activeQueries: number; - queuedQueries: number; - maxConcurrentQueries: number; -} { - return { - size: eventBuffer.length, - maxSize: MAX_BUFFER_SIZE, - activeQueries, - queuedQueries: queryQueue.length, - maxConcurrentQueries: MAX_CONCURRENT_QUERIES, - }; -} - -/** - * Graceful shutdown - flush remaining events - * Called on process exit - */ -export async function closeClickhouse(): Promise { - if (flushTimer) { - clearTimeout(flushTimer); - flushTimer = null; - } - if (adFlushTimer) { - clearTimeout(adFlushTimer); - adFlushTimer = null; - } - await Promise.all([flushEvents(), flushAdEvents()]); - if (client) { - await client.close(); - client = null; - } -} - -// Register shutdown handler (only once) -if (!isInitialized && typeof process !== "undefined") { - isInitialized = true; - process.on("beforeExit", async () => { - await closeClickhouse(); - }); -} diff --git a/lib/env.ts b/lib/env.ts index 8e69caf5..6e3daf56 100644 --- a/lib/env.ts +++ b/lib/env.ts @@ -14,11 +14,11 @@ export const env = createEnv({ UPSTASH_REDIS_REST_URL: z.string().url(), UPSTASH_REDIS_REST_TOKEN: z.string().min(1), - // Analytics - CLICKHOUSE_URL: z.string().url(), - CLICKHOUSE_USER: z.string().min(1), - CLICKHOUSE_PASSWORD: z.string().min(1), - CLICKHOUSE_DATABASE: z.string().min(1), + // Analytics (PostHog) - optional, gracefully degrades when not set + POSTHOG_API_KEY: z.string().optional(), + POSTHOG_HOST: z.string().url().optional(), + POSTHOG_PROJECT_ID: z.string().optional(), + POSTHOG_PERSONAL_API_KEY: z.string().optional(), // Alerting RESEND_API_KEY: z.string().min(1), @@ -35,6 +35,8 @@ export const env = createEnv({ NEXT_PUBLIC_URL: z.string().url(), NEXT_PUBLIC_CLERK_PUBLISHABLE_KEY: z.string().min(1), NEXT_PUBLIC_CLERK_PATRON_PLAN_ID: z.string().min(1), + NEXT_PUBLIC_POSTHOG_KEY: z.string().optional(), + NEXT_PUBLIC_POSTHOG_HOST: z.string().url().optional(), }, runtimeEnv: { @@ -43,10 +45,10 @@ export const env = createEnv({ DIFFBOT_API_KEY: process.env.DIFFBOT_API_KEY, UPSTASH_REDIS_REST_URL: process.env.UPSTASH_REDIS_REST_URL, UPSTASH_REDIS_REST_TOKEN: process.env.UPSTASH_REDIS_REST_TOKEN, - CLICKHOUSE_URL: process.env.CLICKHOUSE_URL, - CLICKHOUSE_USER: process.env.CLICKHOUSE_USER, - CLICKHOUSE_PASSWORD: process.env.CLICKHOUSE_PASSWORD, - CLICKHOUSE_DATABASE: process.env.CLICKHOUSE_DATABASE, + POSTHOG_API_KEY: process.env.POSTHOG_API_KEY, + POSTHOG_HOST: process.env.POSTHOG_HOST, + POSTHOG_PROJECT_ID: process.env.POSTHOG_PROJECT_ID, + POSTHOG_PERSONAL_API_KEY: process.env.POSTHOG_PERSONAL_API_KEY, RESEND_API_KEY: process.env.RESEND_API_KEY, ALERT_EMAIL: process.env.ALERT_EMAIL, CORS_ORIGIN: process.env.CORS_ORIGIN, @@ -56,6 +58,8 @@ export const env = createEnv({ NEXT_PUBLIC_URL: process.env.NEXT_PUBLIC_URL, NEXT_PUBLIC_CLERK_PUBLISHABLE_KEY: process.env.NEXT_PUBLIC_CLERK_PUBLISHABLE_KEY, NEXT_PUBLIC_CLERK_PATRON_PLAN_ID: process.env.NEXT_PUBLIC_CLERK_PATRON_PLAN_ID, + NEXT_PUBLIC_POSTHOG_KEY: process.env.NEXT_PUBLIC_POSTHOG_KEY, + NEXT_PUBLIC_POSTHOG_HOST: process.env.NEXT_PUBLIC_POSTHOG_HOST, }, emptyStringAsUndefined: true, diff --git a/lib/hard-paywalls.ts b/lib/hard-paywalls.ts index 3fc2de4c..433966e7 100644 --- a/lib/hard-paywalls.ts +++ b/lib/hard-paywalls.ts @@ -8,7 +8,7 @@ * This list is maintained based on analytics data showing 0% success rates * across all extraction sources (smry-fast, smry-slow, wayback). * - * To add a site: Verify it has <5% success rate across all sources in Clickhouse + * To add a site: Verify it has <5% success rate across all sources in PostHog analytics * To remove a site: Verify the site has changed their paywall policy * * Last updated: 2026-01-06 diff --git a/lib/hooks/use-analytics.ts b/lib/hooks/use-analytics.ts new file mode 100644 index 00000000..76114215 --- /dev/null +++ b/lib/hooks/use-analytics.ts @@ -0,0 +1,111 @@ +"use client"; + +import { useCallback } from "react"; +import { usePostHog } from "posthog-js/react"; +import { useIsPremium } from "./use-is-premium"; + +// All tracked event names — only add events that are actually used in code +export type AnalyticsEvent = + // Home + | "article_submitted" + | "url_validation_error" + // Article reader + | "article_loaded" + | "article_error" + | "chat_opened" + | "settings_opened" + | "ad_loaded" + | "ad_impression_client" + | "ad_click_client" + | "ad_dismiss_client" + // Chat + | "chat_message_sent" + | "chat_suggestion_clicked" + | "chat_message_copied" + | "chat_cleared" + // Share + | "article_shared" + // Highlights + | "highlight_created" + | "highlights_exported" + // Settings + | "setting_changed" + // TTS + | "tts_played" + | "tts_paused" + | "tts_voice_changed"; + +function getDeviceType(): "mobile" | "tablet" | "desktop" { + if (typeof window === "undefined") return "desktop"; + const w = window.innerWidth; + if (w < 768) return "mobile"; + if (w < 1024) return "tablet"; + return "desktop"; +} + +/** + * Shared analytics hook wrapping PostHog with auto-enrichment. + * + * Usage: + * const { track, trackArticle } = useAnalytics(); + * track("article_shared", { method: "copy_link" }); + * trackArticle("article_loaded", articleUrl, { source: "smry-fast" }); + */ +export function useAnalytics() { + const posthog = usePostHog(); + const { isPremium } = useIsPremium(); + + const track = useCallback( + (event: AnalyticsEvent, props?: Record) => { + if (!posthog) return; + try { + posthog.capture(event, { + is_premium: isPremium, + device_type: getDeviceType(), + locale: typeof navigator !== "undefined" ? navigator.language : undefined, + ...props, + }); + } catch { + // Analytics should never crash the app + } + }, + [posthog, isPremium], + ); + + const trackArticle = useCallback( + (event: AnalyticsEvent, articleUrl: string, props?: Record) => { + try { + const hostname = new URL(articleUrl).hostname; + track(event, { article_url: articleUrl, hostname, ...props }); + } catch { + track(event, { article_url: articleUrl, ...props }); + } + }, + [track], + ); + + /** + * Mark a feature as "used" on the user's PostHog profile. + * Uses $set_once so only the first usage date is recorded. + * Build cohorts in PostHog: "users who used TTS", "users who shared", etc. + */ + const markFeatureUsed = useCallback( + (feature: "tts" | "chat" | "share" | "highlights" | "export_highlights") => { + if (!posthog) return; + try { + posthog.capture("feature_used", { + feature, + is_premium: isPremium, + device_type: getDeviceType(), + $set_once: { [`first_used_${feature}`]: new Date().toISOString() }, + $set: { [`last_used_${feature}`]: new Date().toISOString() }, + }); + } catch { + // Analytics should never crash the app + } + }, + [posthog, isPremium], + ); + + return { track, trackArticle, markFeatureUsed }; +} diff --git a/lib/hooks/use-gravity-ad.ts b/lib/hooks/use-gravity-ad.ts index f7ee37fd..c7ccfa9b 100644 --- a/lib/hooks/use-gravity-ad.ts +++ b/lib/hooks/use-gravity-ad.ts @@ -138,9 +138,9 @@ export interface UseGravityAdResult { ad: ContextAd | null; ads: ContextAd[]; isLoading: boolean; - fireImpression: (ad?: ContextAd) => void; - fireClick: (ad?: ContextAd) => void; - fireDismiss: (ad?: ContextAd) => void; + fireImpression: (ad?: ContextAd, placement?: string, adIndex?: number) => void; + fireClick: (ad?: ContextAd, placement?: string, adIndex?: number) => void; + fireDismiss: (ad?: ContextAd, placement?: string, adIndex?: number) => void; } // Ad refresh interval — keep ads visible long enough for users to engage. @@ -314,7 +314,12 @@ export function useGravityAd({ // Helper to send tracking events via sendBeacon (non-blocking) // Uses /api/px endpoint (named to avoid ad blocker detection) - const sendTrackingEvent = useCallback((type: "impression" | "click" | "dismiss", ad: ContextAd | null) => { + const sendTrackingEvent = useCallback(( + type: "impression" | "click" | "dismiss", + ad: ContextAd | null, + placement?: string, + adIndex?: number, + ) => { if (!ad || !sessionId) return; // Extract hostname from current page URL @@ -340,9 +345,11 @@ export function useGravityAd({ os: deviceInfo?.os, browser: deviceInfo?.browser, adProvider: ad.ad_provider, + placement: placement || "unknown", + adIndex: adIndex ?? -1, }); - // /api/px handles Gravity forwarding (for impressions) and ClickHouse logging + // /api/px handles Gravity forwarding (for impressions) and PostHog logging const trackUrl = getApiUrl("/api/px"); // Use sendBeacon for reliable non-blocking tracking @@ -359,12 +366,12 @@ export function useGravityAd({ } }, [sessionId, url, deviceInfo]); - const fireImpression = useCallback((targetAd?: ContextAd) => { + const fireImpression = useCallback((targetAd?: ContextAd, placement?: string, adIndex?: number) => { const ad = targetAd ?? query.data?.[0]; if (!ad) return; - // Send to /api/px for Gravity forwarding + ClickHouse logging - sendTrackingEvent("impression", ad); + // Send to /api/px for Gravity forwarding + PostHog logging + sendTrackingEvent("impression", ad, placement, adIndex); // For ZeroClick ads, track impressions via ZeroClick v2 API (client-side only) // Docs: https://developer.zeroclick.ai/docs/api-reference/tracking/track-offer-impressions @@ -380,14 +387,14 @@ export function useGravityAd({ } }, [query.data, sendTrackingEvent]); - const fireClick = useCallback((targetAd?: ContextAd) => { + const fireClick = useCallback((targetAd?: ContextAd, placement?: string, adIndex?: number) => { const ad = targetAd ?? query.data?.[0]; - sendTrackingEvent("click", ad ?? null); + sendTrackingEvent("click", ad ?? null, placement, adIndex); }, [query.data, sendTrackingEvent]); - const fireDismiss = useCallback((targetAd?: ContextAd) => { + const fireDismiss = useCallback((targetAd?: ContextAd, placement?: string, adIndex?: number) => { const ad = targetAd ?? query.data?.[0]; - sendTrackingEvent("dismiss", ad ?? null); + sendTrackingEvent("dismiss", ad ?? null, placement, adIndex); }, [query.data, sendTrackingEvent]); const ads = query.data ?? []; diff --git a/lib/memory-monitor.ts b/lib/memory-monitor.ts index 72d96647..3a6c4511 100644 --- a/lib/memory-monitor.ts +++ b/lib/memory-monitor.ts @@ -4,11 +4,11 @@ * Logs memory stats every 30 seconds to help identify memory leaks. * Triggers garbage collection (Node.js --expose-gc) when memory grows. * - * When RSS exceeds threshold, logs to ClickHouse for post-mortem analysis. + * When RSS exceeds threshold, logs to PostHog for post-mortem analysis. * Railway's healthcheck on /health will detect the unhealthy status and restart. */ -import { trackEvent } from "./clickhouse"; +import { trackEvent } from "./posthog"; import { getAllCacheStats } from "./memory-tracker"; const INTERVAL_MS = 30_000; // 30 seconds @@ -118,7 +118,7 @@ function logMemory(): void { }) ); - // Log to ClickHouse for analysis + // Log to PostHog for analysis trackEvent({ request_id: `gc_${Date.now()}`, endpoint: "/internal/gc", @@ -175,7 +175,7 @@ function logMemory(): void { }) ); - // Log to ClickHouse for post-mortem analysis + // Log to PostHog for post-mortem analysis trackEvent({ request_id: `memory_spike_${Date.now()}`, endpoint: "/internal/memory", @@ -208,7 +208,7 @@ function logMemory(): void { }) ); - // Log to ClickHouse for post-mortem analysis + // Log to PostHog for post-mortem analysis trackEvent({ request_id: `memory_critical_${Date.now()}`, endpoint: "/internal/memory", diff --git a/lib/memory-tracker.ts b/lib/memory-tracker.ts index 0058b2cd..e0484436 100644 --- a/lib/memory-tracker.ts +++ b/lib/memory-tracker.ts @@ -18,7 +18,7 @@ import { createLogger } from "./logger"; import { getZeroClickCacheStats } from "./zeroclick"; -import { getBufferStats } from "./clickhouse"; +import { getBufferStats } from "./posthog"; import { abuseRateLimiter } from "./rate-limit-memory"; import { getFetchSlotStats } from "./article-concurrency"; @@ -73,7 +73,7 @@ function getMemoryMb(): { heapUsed: number; rss: number; external: number; array export function getAllCacheStats(): Record { try { const zeroClick = getZeroClickCacheStats(); - const clickhouse = getBufferStats(); + const posthogBuffer = getBufferStats(); const fetchSlots = getFetchSlotStats(); @@ -83,9 +83,9 @@ export function getAllCacheStats(): Record { zeroclick_orphaned: zeroClick.orphanedCount, zeroclick_total_created: zeroClick.totalCreated, zeroclick_total_closed: zeroClick.totalClosed, - clickhouse_buffer: clickhouse.size, - clickhouse_active_queries: clickhouse.activeQueries, - clickhouse_queued_queries: clickhouse.queuedQueries, + posthog_buffer: posthogBuffer.size, + posthog_active_queries: posthogBuffer.activeQueries, + posthog_queued_queries: posthogBuffer.queuedQueries, rate_limiter_ips: abuseRateLimiter.size, active_operations: activeOperations.size, article_active_fetches: fetchSlots.activeFetches, diff --git a/lib/posthog.ts b/lib/posthog.ts new file mode 100644 index 00000000..36c30c27 --- /dev/null +++ b/lib/posthog.ts @@ -0,0 +1,293 @@ +import { PostHog } from "posthog-node"; + +/** + * PostHog Analytics Client + * + * Server-side analytics client. PostHog handles batching, + * retries, and connection management internally via its SDK. + * + * Env vars: + * POSTHOG_API_KEY – project API key (server-side) + * POSTHOG_HOST – PostHog instance URL + * POSTHOG_PROJECT_ID – numeric project ID (for HogQL queries) + * POSTHOG_PERSONAL_API_KEY – personal API key (for HogQL query API) + */ + +let client: PostHog | null = null; + +function getClient(): PostHog | null { + if (client) return client; + + const apiKey = process.env.POSTHOG_API_KEY; + const host = process.env.POSTHOG_HOST; + if (!apiKey || !host) return null; + + client = new PostHog(apiKey, { + host, + flushAt: 50, + flushInterval: 5000, + }); + return client; +} + +// --------------------------------------------------------------------------- +// Type exports +// --------------------------------------------------------------------------- + +export type ErrorSeverity = "expected" | "degraded" | "unexpected" | ""; + +export interface AnalyticsEvent { + request_id: string; + timestamp: string; + method: string; + endpoint: string; + path: string; + url: string; + hostname: string; + source: string; + outcome: string; + status_code: number; + error_type: string; + error_message: string; + error_severity: ErrorSeverity; + upstream_hostname: string; + upstream_status_code: number; + upstream_error_code: string; + upstream_message: string; + duration_ms: number; + fetch_ms: number; + cache_lookup_ms: number; + cache_save_ms: number; + cache_hit: number; + cache_status: string; + article_length: number; + article_title: string; + summary_length: number; + input_tokens: number; + output_tokens: number; + is_premium: number; + client_ip: string; + user_agent: string; + heap_used_mb: number; + heap_total_mb: number; + rss_mb: number; + env: string; + version: string; +} + +// --------------------------------------------------------------------------- +// Ad Event Types +// --------------------------------------------------------------------------- + +export type AdEventStatus = "filled" | "no_fill" | "premium_user" | "gravity_error" | "timeout" | "error"; +export type AdEventType = "request" | "impression" | "click" | "dismiss"; + +export interface AdEvent { + event_id: string; + timestamp: string; + event_type: AdEventType; + url: string; + hostname: string; + article_title: string; + article_content_length: number; + session_id: string; + user_id: string; + is_premium: number; + device_type: string; + os: string; + browser: string; + status: AdEventStatus; + gravity_status_code: number; + error_message: string; + gravity_forwarded: number; + brand_name: string; + ad_title: string; + ad_text: string; + click_url: string; + imp_url: string; + cta: string; + favicon: string; + ad_count: number; + duration_ms: number; + env: string; + placement: string; + ad_index: number; + ad_provider: string; +} + +// --------------------------------------------------------------------------- +// trackEvent – captures request analytics +// --------------------------------------------------------------------------- + +export function trackEvent(event: Partial): void { + const posthog = getClient(); + if (!posthog) return; + + const distinctId = event.request_id || `req_${crypto.randomUUID().slice(0, 8)}`; + + posthog.capture({ + distinctId, + event: "request_event", + properties: { + ...event, + timestamp: event.timestamp || new Date().toISOString(), + }, + }); +} + +// --------------------------------------------------------------------------- +// trackAdEvent – captures ad funnel analytics +// --------------------------------------------------------------------------- + +export function trackAdEvent(event: Partial): void { + const posthog = getClient(); + if (!posthog) return; + + const eventId = event.event_id || crypto.randomUUID(); + const distinctId = event.session_id || eventId; + + posthog.capture({ + distinctId, + event: "ad_event", + properties: { + ...event, + event_id: eventId, + timestamp: event.timestamp || new Date().toISOString(), + }, + }); +} + +// --------------------------------------------------------------------------- +// queryPostHog – HogQL query API +// --------------------------------------------------------------------------- + +export async function queryPostHog(query: string): Promise { + const host = process.env.POSTHOG_HOST; + const projectId = process.env.POSTHOG_PROJECT_ID; + const personalApiKey = process.env.POSTHOG_PERSONAL_API_KEY; + + if (!host || !projectId || !personalApiKey) return []; + + try { + const response = await fetch(`${host}/api/projects/${projectId}/query/`, { + method: "POST", + headers: { + "Content-Type": "application/json", + Authorization: `Bearer ${personalApiKey}`, + }, + body: JSON.stringify({ query: { kind: "HogQLQuery", query } }), + }); + + if (!response.ok) { + console.error(`[posthog] HogQL query failed (${response.status}):`, await response.text().catch(() => "")); + return []; + } + + const data = await response.json(); + // HogQL returns { columns: string[], results: any[][] } + const columns: string[] = data.columns ?? []; + const rows: unknown[][] = data.results ?? []; + + return rows.map((row) => { + const obj: Record = {}; + columns.forEach((col, i) => { + obj[col] = row[i]; + }); + return obj as T; + }); + } catch (error) { + console.error("[posthog] HogQL query error:", error instanceof Error ? error.message : String(error)); + return []; + } +} + +// --------------------------------------------------------------------------- +// trackLLMGeneration – PostHog LLM analytics ($ai_generation events) +// See: https://posthog.com/docs/llm-analytics/start-here +// --------------------------------------------------------------------------- + +export interface LLMGenerationEvent { + distinctId: string; + traceId: string; + model: string; + provider: string; + inputTokens?: number; + outputTokens?: number; + latencyMs: number; + input?: Array<{ role: string; content: string }>; + outputContent?: string; + isError?: boolean; + errorMessage?: string; + httpStatus?: number; + isPremium?: boolean; + language?: string; + messageCount?: number; +} + +export function trackLLMGeneration(event: LLMGenerationEvent): void { + const posthog = getClient(); + if (!posthog) return; + + posthog.capture({ + distinctId: event.distinctId, + event: "$ai_generation", + properties: { + $ai_trace_id: event.traceId, + $ai_model: event.model, + $ai_provider: event.provider, + $ai_input_tokens: event.inputTokens, + $ai_output_tokens: event.outputTokens, + $ai_latency: event.latencyMs / 1000, // PostHog expects seconds + $ai_is_error: event.isError ?? false, + $ai_http_status: event.httpStatus ?? 200, + ...(event.input && { $ai_input: event.input }), + ...(event.outputContent && { + $ai_output_choices: [{ role: "assistant", content: event.outputContent }], + }), + // Custom properties for SMRY-specific analysis + is_premium: event.isPremium, + language: event.language, + message_count: event.messageCount, + }, + }); +} + +// --------------------------------------------------------------------------- +// getBufferStats – simplified (PostHog SDK manages its own buffer) +// --------------------------------------------------------------------------- + +export function getBufferStats(): { + size: number; + maxSize: number; + activeQueries: number; + queuedQueries: number; + maxConcurrentQueries: number; +} { + return { + size: 0, + maxSize: 0, + activeQueries: 0, + queuedQueries: 0, + maxConcurrentQueries: 0, + }; +} + +// --------------------------------------------------------------------------- +// closePostHog – graceful shutdown +// --------------------------------------------------------------------------- + +export async function closePostHog(): Promise { + if (client) { + await client.shutdown(); + client = null; + } +} + +// Register shutdown handler +let isInitialized = false; +if (!isInitialized && typeof process !== "undefined") { + isInitialized = true; + process.on("beforeExit", async () => { + await closePostHog(); + }); +} diff --git a/lib/request-context.ts b/lib/request-context.ts index ce1171e1..be6d3f1e 100644 --- a/lib/request-context.ts +++ b/lib/request-context.ts @@ -1,6 +1,6 @@ import { createLogger } from "./logger"; import { randomUUID } from "crypto"; -import { trackEvent, ErrorSeverity } from "./clickhouse"; +import { trackEvent, ErrorSeverity } from "./posthog"; import { env } from "../server/env"; /** @@ -146,8 +146,7 @@ export function createRequestContext(initial?: InitialContext): RequestContext { logger.error(event, "request completed"); } - // Send to Clickhouse analytics (fire-and-forget, non-blocking) - // trackEvent is memory-safe: bounded buffer, auto-flush, no errors thrown + // Send to PostHog analytics (fire-and-forget, non-blocking) trackEvent({ request_id: event.request_id as string, timestamp: event.timestamp as string, diff --git a/package.json b/package.json index b7e2541e..7b10f751 100644 --- a/package.json +++ b/package.json @@ -7,7 +7,7 @@ "node": ">=24" }, "scripts": { - "dev": "docker-compose up -d clickhouse && bun run --watch server/index.ts & next dev", + "dev": "bun run --watch server/index.ts & next dev", "dev:server": "bun run --watch server/index.ts", "dev:next": "next dev", "dev:app-only": "bun run --watch server/index.ts & next dev", @@ -34,7 +34,6 @@ "@base-ui/react": "^1.1.0", "@clerk/backend": "^2.29.0", "@clerk/nextjs": "^6.36.5", - "@clickhouse/client": "^1.15.0", "@databuddy/sdk": "^2.3.29", "@elysiajs/cors": "^1.4.1", "@elysiajs/cron": "^1.4.1", @@ -80,6 +79,8 @@ "next-themes": "^0.4.6", "nuqs": "^2.8.0", "pino": "^8.19.0", + "posthog-js": "^1.341.1", + "posthog-node": "^5.24.10", "react": "19.2.1", "react-dom": "19.2.1", "react-markdown": "^10.1.0", diff --git a/railway.template.json b/railway.template.json index e985afc0..2ee87a38 100644 --- a/railway.template.json +++ b/railway.template.json @@ -1,7 +1,7 @@ { "$schema": "https://railway.app/railway.schema.json", "name": "SMRY.ai", - "description": "Paywall bypass and article summarization with Clickhouse analytics", + "description": "Paywall bypass and article summarization with PostHog analytics", "buttons": [ { "name": "Deploy to Railway", @@ -30,22 +30,29 @@ "description": "Public URL of your deployment (e.g., https://smry.ai)", "required": true }, - "CLICKHOUSE_URL": { - "value": "http://${{clickhouse.RAILWAY_PRIVATE_DOMAIN}}:8123" + "POSTHOG_API_KEY": { + "description": "PostHog project API key (server-side)", + "required": true + }, + "POSTHOG_HOST": { + "description": "PostHog instance URL (e.g., https://us.i.posthog.com)", + "required": true }, - "CLICKHOUSE_USER": { - "value": "default" + "POSTHOG_PROJECT_ID": { + "description": "PostHog project ID (for HogQL queries)", + "required": true }, - "CLICKHOUSE_PASSWORD": { - "value": "${{clickhouse.CLICKHOUSE_PASSWORD}}" + "POSTHOG_PERSONAL_API_KEY": { + "description": "PostHog personal API key (for HogQL query API)", + "required": true }, - "CLICKHOUSE_DATABASE": { - "value": "smry_analytics" + "NEXT_PUBLIC_POSTHOG_KEY": { + "description": "PostHog project API key (client-side)", + "required": true }, - "ANALYTICS_SECRET_KEY": { - "description": "Secret key for accessing /admin/analytics", - "required": true, - "generate": true + "NEXT_PUBLIC_POSTHOG_HOST": { + "description": "PostHog instance URL (client-side)", + "required": true }, "UPSTASH_REDIS_REST_URL": { "description": "Upstash Redis REST URL", @@ -77,43 +84,6 @@ "enabled": true } } - }, - "clickhouse": { - "name": "clickhouse", - "description": "Clickhouse analytics database (memory-optimized)", - "source": { - "repo": "https://github.com/mrmps/SMRY" - }, - "build": { - "builder": "DOCKERFILE", - "dockerfilePath": "docker/clickhouse/Dockerfile", - "buildContext": "docker/clickhouse" - }, - "variables": { - "CLICKHOUSE_DB": { - "value": "smry_analytics" - }, - "CLICKHOUSE_USER": { - "value": "default" - }, - "CLICKHOUSE_PASSWORD": { - "generate": true - }, - "CLICKHOUSE_DEFAULT_ACCESS_MANAGEMENT": { - "value": "1" - } - }, - "volumes": [ - { - "mount": "/var/lib/clickhouse", - "name": "clickhouse-data" - } - ], - "networking": { - "public": { - "enabled": false - } - } } } } diff --git a/scripts/analyze-sources.ts b/scripts/analyze-sources.ts deleted file mode 100644 index 0bfff2bb..00000000 --- a/scripts/analyze-sources.ts +++ /dev/null @@ -1,380 +0,0 @@ -/** - * Analyze source effectiveness from ClickHouse logs - * Run with: bun run scripts/analyze-sources.ts - * Make sure .env.local is loaded or env vars are set - */ - -import { createClient } from "@clickhouse/client"; - -// Load .env.local manually for bun -const projectRoot = import.meta.dir.replace("/scripts", ""); -const envFile = Bun.file(`${projectRoot}/.env`); -const envContent = await envFile.text(); -for (const line of envContent.split("\n")) { - const trimmed = line.trim(); - if (trimmed && !trimmed.startsWith("#")) { - const [key, ...valueParts] = trimmed.split("="); - if (key && valueParts.length > 0) { - const value = valueParts.join("=").replace(/^["']|["']$/g, ""); - process.env[key] = value; - } - } -} - -const client = createClient({ - url: process.env.CLICKHOUSE_URL!, - username: process.env.CLICKHOUSE_USER!, - password: process.env.CLICKHOUSE_PASSWORD!, - database: process.env.CLICKHOUSE_DATABASE!, - request_timeout: 60_000, -}); - -async function query(sql: string): Promise { - const result = await client.query({ query: sql, format: "JSONEachRow" }); - return result.json(); -} - -async function main() { - console.log("=== Source Effectiveness Analysis ===\n"); - - // 1. Overall source success rates - console.log("1. OVERALL SOURCE SUCCESS RATES (last 7 days)"); - console.log("-".repeat(60)); - const sourceRates = await query<{ - source: string; - total: string; - successes: string; - success_rate: string; - }>(` - SELECT - source, - count() as total, - countIf(outcome = 'success') as successes, - round(countIf(outcome = 'success') / count() * 100, 2) as success_rate - FROM request_events - WHERE timestamp > now() - INTERVAL 7 DAY - AND source != '' - AND endpoint = '/api/article' - GROUP BY source - ORDER BY total DESC - `); - console.table(sourceRates); - - // 2. For URLs where multiple sources were tried, how many had only one success? - console.log("\n2. URLs WHERE ONLY ONE SOURCE SUCCEEDED (last 7 days)"); - console.log("-".repeat(60)); - const onlyOneWorked = await query<{ - url: string; - sources_tried: string; - sources_succeeded: string; - successful_source: string; - }>(` - SELECT - url, - uniq(source) as sources_tried, - uniqIf(source, outcome = 'success') as sources_succeeded, - groupArrayIf(source, outcome = 'success')[1] as successful_source - FROM request_events - WHERE timestamp > now() - INTERVAL 7 DAY - AND source != '' - AND endpoint = '/api/article' - GROUP BY url - HAVING sources_tried >= 2 AND sources_succeeded = 1 - ORDER BY sources_tried DESC - LIMIT 50 - `); - console.log(`Found ${onlyOneWorked.length} URLs where only 1 source worked`); - if (onlyOneWorked.length > 0) { - console.table(onlyOneWorked.slice(0, 20)); - } - - // 3. Which source is the "only one that works" most often? - console.log("\n3. WHEN ONLY ONE SOURCE WORKS, WHICH ONE? (last 7 days)"); - console.log("-".repeat(60)); - const singleSourceWinner = await query<{ - successful_source: string; - count: string; - percentage: string; - }>(` - WITH single_success_urls AS ( - SELECT - url, - groupArrayIf(source, outcome = 'success')[1] as successful_source - FROM request_events - WHERE timestamp > now() - INTERVAL 7 DAY - AND source != '' - AND endpoint = '/api/article' - GROUP BY url - HAVING uniq(source) >= 2 AND uniqIf(source, outcome = 'success') = 1 - ) - SELECT - successful_source, - count() as count, - round(count() / (SELECT count() FROM single_success_urls) * 100, 2) as percentage - FROM single_success_urls - GROUP BY successful_source - ORDER BY count DESC - `); - console.table(singleSourceWinner); - - // 4. What about when ALL sources fail vs when at least one works? - console.log("\n4. URL OUTCOME DISTRIBUTION (last 7 days)"); - console.log("-".repeat(60)); - const urlOutcomes = await query<{ - outcome_type: string; - url_count: string; - percentage: string; - }>(` - WITH url_stats AS ( - SELECT - url, - uniq(source) as sources_tried, - uniqIf(source, outcome = 'success') as sources_succeeded - FROM request_events - WHERE timestamp > now() - INTERVAL 7 DAY - AND source != '' - AND endpoint = '/api/article' - GROUP BY url - HAVING sources_tried >= 2 - ) - SELECT - CASE - WHEN sources_succeeded = 0 THEN 'all_failed' - WHEN sources_succeeded = 1 THEN 'only_one_worked' - WHEN sources_succeeded = 2 THEN 'two_worked' - WHEN sources_succeeded = 3 THEN 'three_worked' - ELSE 'all_worked' - END as outcome_type, - count() as url_count, - round(count() / (SELECT count() FROM url_stats) * 100, 2) as percentage - FROM url_stats - GROUP BY outcome_type - ORDER BY url_count DESC - `); - console.table(urlOutcomes); - - // 5. Correlation: when smry-fast fails, how often does smry-slow/wayback save the day? - console.log("\n5. FALLBACK EFFECTIVENESS: When smry-fast fails... (last 7 days)"); - console.log("-".repeat(60)); - const fallbackStats = await query<{ - scenario: string; - count: string; - percentage: string; - }>(` - WITH url_outcomes AS ( - SELECT - url, - maxIf(1, source = 'smry-fast' AND outcome = 'success') as fast_success, - maxIf(1, source = 'smry-slow' AND outcome = 'success') as slow_success, - maxIf(1, source = 'wayback' AND outcome = 'success') as wayback_success, - maxIf(1, source = 'smry-fast') as fast_tried, - maxIf(1, source = 'smry-slow') as slow_tried, - maxIf(1, source = 'wayback') as wayback_tried - FROM request_events - WHERE timestamp > now() - INTERVAL 7 DAY - AND source IN ('smry-fast', 'smry-slow', 'wayback') - AND endpoint = '/api/article' - GROUP BY url - HAVING fast_tried = 1 AND fast_success = 0 -- smry-fast was tried and failed - ) - SELECT - CASE - WHEN slow_success = 1 AND wayback_success = 1 THEN 'both smry-slow and wayback worked' - WHEN slow_success = 1 THEN 'only smry-slow worked' - WHEN wayback_success = 1 THEN 'only wayback worked' - ELSE 'nothing worked' - END as scenario, - count() as count, - round(count() / (SELECT count() FROM url_outcomes) * 100, 2) as percentage - FROM url_outcomes - GROUP BY scenario - ORDER BY count DESC - `); - console.table(fallbackStats); - - // 6. Average latency by source - console.log("\n6. LATENCY BY SOURCE (last 7 days)"); - console.log("-".repeat(60)); - const latencyStats = await query<{ - source: string; - avg_ms: string; - p50_ms: string; - p95_ms: string; - p99_ms: string; - }>(` - SELECT - source, - round(avg(fetch_ms)) as avg_ms, - round(quantile(0.5)(fetch_ms)) as p50_ms, - round(quantile(0.95)(fetch_ms)) as p95_ms, - round(quantile(0.99)(fetch_ms)) as p99_ms - FROM request_events - WHERE timestamp > now() - INTERVAL 7 DAY - AND source != '' - AND endpoint = '/api/article' - AND outcome = 'success' - AND fetch_ms > 0 - GROUP BY source - ORDER BY avg_ms - `); - console.table(latencyStats); - - // 7. If we called sources SEQUENTIALLY (fast -> slow -> wayback), what would be the impact? - console.log("\n7. SEQUENTIAL STRATEGY SIMULATION (last 7 days)"); - console.log("-".repeat(60)); - const sequentialSim = await query<{ - strategy: string; - urls_resolved: string; - avg_api_calls_per_url: string; - total_api_calls: string; - }>(` - WITH url_outcomes AS ( - SELECT - url, - maxIf(1, source = 'smry-fast' AND outcome = 'success') as fast_success, - maxIf(1, source = 'smry-slow' AND outcome = 'success') as slow_success, - maxIf(1, source = 'wayback' AND outcome = 'success') as wayback_success - FROM request_events - WHERE timestamp > now() - INTERVAL 7 DAY - AND source IN ('smry-fast', 'smry-slow', 'wayback') - AND endpoint = '/api/article' - GROUP BY url - ), - sequential_analysis AS ( - SELECT - url, - fast_success, - slow_success, - wayback_success, - CASE - WHEN fast_success = 1 THEN 1 -- just fast - WHEN slow_success = 1 THEN 2 -- fast failed, then slow - WHEN wayback_success = 1 THEN 3 -- fast+slow failed, then wayback - ELSE 3 -- tried all 3, none worked - END as calls_needed_sequential, - 3 as calls_parallel - FROM url_outcomes - ) - SELECT - 'Current (parallel)' as strategy, - toString(countIf(fast_success = 1 OR slow_success = 1 OR wayback_success = 1)) as urls_resolved, - '3.00' as avg_api_calls_per_url, - toString(count() * 3) as total_api_calls - FROM sequential_analysis - UNION ALL - SELECT - 'Sequential (fast->slow->wayback)' as strategy, - toString(countIf(fast_success = 1 OR slow_success = 1 OR wayback_success = 1)) as urls_resolved, - toString(round(avg(calls_needed_sequential), 2)) as avg_api_calls_per_url, - toString(sum(calls_needed_sequential)) as total_api_calls - FROM sequential_analysis - `); - console.table(sequentialSim); - - // 8. What if we removed a source entirely? - console.log("\n8. IMPACT OF REMOVING A SOURCE (last 7 days)"); - console.log("-".repeat(60)); - const removalImpact = await query<{ - scenario: string; - urls_resolved: string; - resolution_rate: string; - }>(` - WITH url_outcomes AS ( - SELECT - url, - maxIf(1, source = 'smry-fast' AND outcome = 'success') as fast_success, - maxIf(1, source = 'smry-slow' AND outcome = 'success') as slow_success, - maxIf(1, source = 'wayback' AND outcome = 'success') as wayback_success - FROM request_events - WHERE timestamp > now() - INTERVAL 7 DAY - AND source IN ('smry-fast', 'smry-slow', 'wayback') - AND endpoint = '/api/article' - GROUP BY url - ) - SELECT - 'All 3 sources' as scenario, - toString(countIf(fast_success = 1 OR slow_success = 1 OR wayback_success = 1)) as urls_resolved, - toString(round(countIf(fast_success = 1 OR slow_success = 1 OR wayback_success = 1) / count() * 100, 2)) as resolution_rate - FROM url_outcomes - UNION ALL - SELECT - 'Without smry-fast' as scenario, - toString(countIf(slow_success = 1 OR wayback_success = 1)) as urls_resolved, - toString(round(countIf(slow_success = 1 OR wayback_success = 1) / count() * 100, 2)) as resolution_rate - FROM url_outcomes - UNION ALL - SELECT - 'Without smry-slow' as scenario, - toString(countIf(fast_success = 1 OR wayback_success = 1)) as urls_resolved, - toString(round(countIf(fast_success = 1 OR wayback_success = 1) / count() * 100, 2)) as resolution_rate - FROM url_outcomes - UNION ALL - SELECT - 'Without wayback' as scenario, - toString(countIf(fast_success = 1 OR slow_success = 1)) as urls_resolved, - toString(round(countIf(fast_success = 1 OR slow_success = 1) / count() * 100, 2)) as resolution_rate - FROM url_outcomes - UNION ALL - SELECT - 'Only smry-fast' as scenario, - toString(countIf(fast_success = 1)) as urls_resolved, - toString(round(countIf(fast_success = 1) / count() * 100, 2)) as resolution_rate - FROM url_outcomes - UNION ALL - SELECT - 'Only smry-slow' as scenario, - toString(countIf(slow_success = 1)) as urls_resolved, - toString(round(countIf(slow_success = 1) / count() * 100, 2)) as resolution_rate - FROM url_outcomes - `); - console.table(removalImpact); - - // 9. Unique value: URLs where ONLY a specific source works - console.log("\n9. UNIQUE VALUE: URLs where ONLY this source works (last 7 days)"); - console.log("-".repeat(60)); - const uniqueValue = await query<{ - source: string; - exclusively_resolves: string; - percentage_of_resolutions: string; - }>(` - WITH url_outcomes AS ( - SELECT - url, - maxIf(1, source = 'smry-fast' AND outcome = 'success') as fast_success, - maxIf(1, source = 'smry-slow' AND outcome = 'success') as slow_success, - maxIf(1, source = 'wayback' AND outcome = 'success') as wayback_success - FROM request_events - WHERE timestamp > now() - INTERVAL 7 DAY - AND source IN ('smry-fast', 'smry-slow', 'wayback') - AND endpoint = '/api/article' - GROUP BY url - ), - totals AS ( - SELECT countIf(fast_success = 1 OR slow_success = 1 OR wayback_success = 1) as total_resolved - FROM url_outcomes - ) - SELECT - 'smry-fast' as source, - toString(countIf(fast_success = 1 AND slow_success = 0 AND wayback_success = 0)) as exclusively_resolves, - toString(round(countIf(fast_success = 1 AND slow_success = 0 AND wayback_success = 0) / (SELECT total_resolved FROM totals) * 100, 2)) as percentage_of_resolutions - FROM url_outcomes - UNION ALL - SELECT - 'smry-slow' as source, - toString(countIf(fast_success = 0 AND slow_success = 1 AND wayback_success = 0)) as exclusively_resolves, - toString(round(countIf(fast_success = 0 AND slow_success = 1 AND wayback_success = 0) / (SELECT total_resolved FROM totals) * 100, 2)) as percentage_of_resolutions - FROM url_outcomes - UNION ALL - SELECT - 'wayback' as source, - toString(countIf(fast_success = 0 AND slow_success = 0 AND wayback_success = 1)) as exclusively_resolves, - toString(round(countIf(fast_success = 0 AND slow_success = 0 AND wayback_success = 1) / (SELECT total_resolved FROM totals) * 100, 2)) as percentage_of_resolutions - FROM url_outcomes - `); - console.table(uniqueValue); - - await client.close(); - console.log("\n=== Analysis Complete ==="); -} - -main().catch(console.error); diff --git a/scripts/chquery.sh b/scripts/chquery.sh deleted file mode 100755 index 1eb9d9b1..00000000 --- a/scripts/chquery.sh +++ /dev/null @@ -1,76 +0,0 @@ -#!/bin/bash -# Query ClickHouse on Railway without leaving it permanently exposed. -# Usage: -# ./scripts/chquery.sh "SELECT count() FROM request_events" -# ./scripts/chquery.sh # opens interactive mode (reads from stdin) -# echo "SELECT 1" | ./scripts/chquery.sh - -set -euo pipefail - -RAILWAY_CONFIG="$HOME/.railway/config.json" -TOKEN=$(python3 -c "import json; print(json.load(open('$RAILWAY_CONFIG'))['user']['token'])") -GQL="https://backboard.railway.app/graphql/v2" -SERVICE_ID="018ada10-9a36-4cd8-a478-89cb1eee5e6f" -ENV_ID="3de92d8f-e295-490e-b228-ef4bd88306e7" - -gql() { - curl -sf -X POST "$GQL" \ - -H "Content-Type: application/json" \ - -H "Authorization: Bearer $TOKEN" \ - --data-binary "$1" -} - -# 1. Create a temporary service domain -RESULT=$(gql "{\"query\":\"mutation { serviceDomainCreate(input: { serviceId: \\\"$SERVICE_ID\\\", environmentId: \\\"$ENV_ID\\\" }) { domain id } }\"}") -DOMAIN=$(echo "$RESULT" | python3 -c "import sys,json; d=json.load(sys.stdin); print(d['data']['serviceDomainCreate']['domain'])") -DOMAIN_ID=$(echo "$RESULT" | python3 -c "import sys,json; d=json.load(sys.stdin); print(d['data']['serviceDomainCreate']['id'])") - -if [ -z "$DOMAIN" ]; then - echo "Failed to create domain" >&2 - exit 1 -fi - -cleanup() { - # Delete domain - gql "{\"query\":\"mutation { serviceDomainDelete(id: \\\"$DOMAIN_ID\\\") }\"}" > /dev/null 2>&1 - # Delete PORT variable - gql "{\"query\":\"mutation { variableDelete(input: { projectId: \\\"3daa34e8-bdc3-4e74-ad0e-bf39091d4640\\\", serviceId: \\\"$SERVICE_ID\\\", environmentId: \\\"$ENV_ID\\\", name: \\\"PORT\\\" }) }\"}" > /dev/null 2>&1 -} -trap cleanup EXIT - -# 2. Set PORT=8123 so Railway routes to ClickHouse's HTTP interface -gql "{\"query\":\"mutation { variableUpsert(input: { projectId: \\\"3daa34e8-bdc3-4e74-ad0e-bf39091d4640\\\", serviceId: \\\"$SERVICE_ID\\\", environmentId: \\\"$ENV_ID\\\", name: \\\"PORT\\\", value: \\\"8123\\\" }) }\"}" > /dev/null - -CH_URL="https://$DOMAIN" -CH_USER="default" -CH_PASS=$(railway service clickhouse > /dev/null 2>&1 && railway variables list --kv 2>/dev/null | grep CLICKHOUSE_PASSWORD | cut -d= -f2) - -# 3. Wait for the domain to become reachable -echo "Waiting for ClickHouse to be reachable..." >&2 -for i in $(seq 1 30); do - if curl -sf --max-time 3 "$CH_URL/ping" > /dev/null 2>&1; then - break - fi - sleep 2 -done - -if ! curl -sf --max-time 3 "$CH_URL/ping" > /dev/null 2>&1; then - echo "Timed out waiting for ClickHouse" >&2 - exit 1 -fi - -# 4. Run the query -if [ $# -gt 0 ]; then - QUERY="$1" - FORMAT="${2:-PrettyCompact}" - curl -s "$CH_URL/?user=$CH_USER&password=$CH_PASS&database=smry_analytics" \ - --data-binary "$QUERY FORMAT $FORMAT" -else - # stdin mode - if [ -t 0 ]; then - echo "Enter query (Ctrl+D to send):" >&2 - fi - QUERY=$(cat) - curl -s "$CH_URL/?user=$CH_USER&password=$CH_PASS&database=smry_analytics" \ - --data-binary "$QUERY" -fi diff --git a/scripts/setup-railway.sh b/scripts/setup-railway.sh deleted file mode 100755 index c6a27b2c..00000000 --- a/scripts/setup-railway.sh +++ /dev/null @@ -1,86 +0,0 @@ -#!/bin/bash -# One-time Railway setup for SMRY with Clickhouse -# After running this, future deploys are just: git push - -set -e - -GREEN='\033[0;32m' -YELLOW='\033[1;33m' -NC='\033[0m' - -echo -e "${GREEN}=== SMRY Railway Setup ===${NC}" -echo "" - -# Check if railway CLI is installed -if ! command -v railway &> /dev/null; then - echo "Installing Railway CLI..." - if command -v brew &> /dev/null; then - brew install railway - elif command -v npm &> /dev/null; then - npm install -g @railway/cli - else - echo "Please install Railway CLI: https://docs.railway.app/guides/cli" - exit 1 - fi -fi - -# Login if needed -if ! railway whoami &> /dev/null 2>&1; then - echo "Please login to Railway..." - railway login -fi - -# Link to project if not linked -echo "" -echo -e "${YELLOW}Linking to Railway project...${NC}" -if ! railway status &> /dev/null 2>&1; then - railway link -fi - -# Generate secrets -ANALYTICS_SECRET=$(openssl rand -hex 32) -CLICKHOUSE_PASSWORD=$(openssl rand -hex 16) - -echo "" -echo -e "${YELLOW}Adding Clickhouse service...${NC}" - -# Add Clickhouse service with Docker image -railway add \ - --service clickhouse \ - --image clickhouse/clickhouse-server:24.8 \ - --variables "CLICKHOUSE_DB=smry_analytics" \ - --variables "CLICKHOUSE_USER=default" \ - --variables "CLICKHOUSE_PASSWORD=$CLICKHOUSE_PASSWORD" \ - --variables "CLICKHOUSE_DEFAULT_ACCESS_MANAGEMENT=1" \ - 2>/dev/null || echo "Clickhouse service may already exist, continuing..." - -# Add persistent volume -echo "Adding persistent storage..." -railway service link clickhouse 2>/dev/null || true -railway volume add --mount-path /var/lib/clickhouse 2>/dev/null || echo "Volume may already exist" - -# Link back to main app service -echo "" -echo -e "${YELLOW}Configuring main app...${NC}" -railway service link SMRY 2>/dev/null || railway service link smry 2>/dev/null || true - -# Set app variables to connect to Clickhouse -railway variables \ - --set "CLICKHOUSE_URL=http://clickhouse.railway.internal:8123" \ - --set "CLICKHOUSE_USER=default" \ - --set "CLICKHOUSE_PASSWORD=$CLICKHOUSE_PASSWORD" \ - --set "CLICKHOUSE_DATABASE=smry_analytics" \ - --set "ANALYTICS_SECRET_KEY=$ANALYTICS_SECRET" - -echo "" -echo -e "${GREEN}=== Setup Complete! ===${NC}" -echo "" -echo "Clickhouse is configured and will auto-migrate on first request." -echo "" -echo -e "Your analytics dashboard secret key:" -echo -e " ${YELLOW}${ANALYTICS_SECRET}${NC}" -echo "" -echo "Access your dashboard at:" -echo " https://smry.ai/admin/analytics?key=${ANALYTICS_SECRET}" -echo "" -echo -e "${GREEN}From now on, just 'git push' to deploy!${NC}" diff --git a/scripts/source-analysis-queries.sql b/scripts/source-analysis-queries.sql deleted file mode 100644 index 69935022..00000000 --- a/scripts/source-analysis-queries.sql +++ /dev/null @@ -1,282 +0,0 @@ --- ============================================================================= --- SOURCE EFFECTIVENESS ANALYSIS QUERIES --- Run these in ClickHouse console to understand source behavior --- ============================================================================= - --- ----------------------------------------------------------------------------- --- 1. OVERALL SOURCE SUCCESS RATES (baseline) --- ----------------------------------------------------------------------------- -SELECT - source, - count() as total, - countIf(outcome = 'success') as successes, - round(countIf(outcome = 'success') / count() * 100, 2) as success_rate -FROM request_events -WHERE timestamp > now() - INTERVAL 7 DAY - AND source != '' - AND endpoint = '/api/article' -GROUP BY source -ORDER BY total DESC; - --- ----------------------------------------------------------------------------- --- 2. HOW OFTEN DOES ONLY ONE SOURCE WORK? (and which one?) --- This answers: "For articles where multiple sources were tried, --- how often was only one successful?" --- ----------------------------------------------------------------------------- -WITH url_outcomes AS ( - SELECT - url, - uniq(source) as sources_tried, - uniqIf(source, outcome = 'success') as sources_succeeded, - groupArrayIf(source, outcome = 'success') as successful_sources - FROM request_events - WHERE timestamp > now() - INTERVAL 7 DAY - AND source IN ('smry-fast', 'smry-slow', 'wayback') - AND endpoint = '/api/article' - GROUP BY url - HAVING sources_tried >= 2 -- At least 2 sources were tried -) -SELECT - CASE sources_succeeded - WHEN 0 THEN 'all_failed' - WHEN 1 THEN 'only_one_worked' - WHEN 2 THEN 'two_worked' - ELSE 'all_worked' - END as outcome_type, - count() as url_count, - round(count() / (SELECT count() FROM url_outcomes) * 100, 2) as percentage -FROM url_outcomes -GROUP BY outcome_type -ORDER BY url_count DESC; - --- ----------------------------------------------------------------------------- --- 3. WHEN ONLY ONE SOURCE WORKS, WHICH ONE IS IT? --- Shows which source is the "hero" when others fail --- ----------------------------------------------------------------------------- -WITH single_success_urls AS ( - SELECT - url, - groupArrayIf(source, outcome = 'success')[1] as successful_source - FROM request_events - WHERE timestamp > now() - INTERVAL 7 DAY - AND source IN ('smry-fast', 'smry-slow', 'wayback') - AND endpoint = '/api/article' - GROUP BY url - HAVING uniq(source) >= 2 AND uniqIf(source, outcome = 'success') = 1 -) -SELECT - successful_source, - count() as count, - round(count() / (SELECT count() FROM single_success_urls) * 100, 2) as percentage -FROM single_success_urls -GROUP BY successful_source -ORDER BY count DESC; - --- ----------------------------------------------------------------------------- --- 4. FALLBACK EFFECTIVENESS: When smry-fast fails, what saves the day? --- Answers: "If we called smry-fast first and it failed, how often would --- smry-slow or wayback have worked?" --- ----------------------------------------------------------------------------- -WITH url_outcomes AS ( - SELECT - url, - maxIf(1, source = 'smry-fast' AND outcome = 'success') as fast_success, - maxIf(1, source = 'smry-slow' AND outcome = 'success') as slow_success, - maxIf(1, source = 'wayback' AND outcome = 'success') as wayback_success, - maxIf(1, source = 'smry-fast') as fast_tried - FROM request_events - WHERE timestamp > now() - INTERVAL 7 DAY - AND source IN ('smry-fast', 'smry-slow', 'wayback') - AND endpoint = '/api/article' - GROUP BY url - HAVING fast_tried = 1 AND fast_success = 0 -- smry-fast was tried and failed -) -SELECT - CASE - WHEN slow_success = 1 AND wayback_success = 1 THEN 'both smry-slow AND wayback worked' - WHEN slow_success = 1 THEN 'only smry-slow worked' - WHEN wayback_success = 1 THEN 'only wayback worked' - ELSE 'nothing worked (hard paywall or broken)' - END as scenario, - count() as count, - round(count() / (SELECT count() FROM url_outcomes) * 100, 2) as percentage -FROM url_outcomes -GROUP BY scenario -ORDER BY count DESC; - --- ----------------------------------------------------------------------------- --- 5. SEQUENTIAL vs PARALLEL: How many API calls would we save? --- Compares current parallel strategy (always 3 calls) vs sequential --- ----------------------------------------------------------------------------- -WITH url_outcomes AS ( - SELECT - url, - maxIf(1, source = 'smry-fast' AND outcome = 'success') as fast_success, - maxIf(1, source = 'smry-slow' AND outcome = 'success') as slow_success, - maxIf(1, source = 'wayback' AND outcome = 'success') as wayback_success - FROM request_events - WHERE timestamp > now() - INTERVAL 7 DAY - AND source IN ('smry-fast', 'smry-slow', 'wayback') - AND endpoint = '/api/article' - GROUP BY url -), -sequential_analysis AS ( - SELECT - url, - fast_success, - slow_success, - wayback_success, - -- Sequential: stop as soon as one works - CASE - WHEN fast_success = 1 THEN 1 -- just fast - WHEN slow_success = 1 THEN 2 -- fast failed, then slow - WHEN wayback_success = 1 THEN 3 -- fast+slow failed, then wayback - ELSE 3 -- tried all 3, none worked - END as calls_needed_sequential - FROM url_outcomes -) -SELECT - 'Current (parallel)' as strategy, - countIf(fast_success = 1 OR slow_success = 1 OR wayback_success = 1) as urls_resolved, - 3.00 as avg_api_calls_per_url, - count() * 3 as total_api_calls -FROM sequential_analysis -UNION ALL -SELECT - 'Sequential (fast→slow→wayback)' as strategy, - countIf(fast_success = 1 OR slow_success = 1 OR wayback_success = 1) as urls_resolved, - round(avg(calls_needed_sequential), 2) as avg_api_calls_per_url, - sum(calls_needed_sequential) as total_api_calls -FROM sequential_analysis; - --- ----------------------------------------------------------------------------- --- 6. IMPACT OF REMOVING A SOURCE --- What's the resolution rate if we removed each source? --- ----------------------------------------------------------------------------- -WITH url_outcomes AS ( - SELECT - url, - maxIf(1, source = 'smry-fast' AND outcome = 'success') as fast_success, - maxIf(1, source = 'smry-slow' AND outcome = 'success') as slow_success, - maxIf(1, source = 'wayback' AND outcome = 'success') as wayback_success - FROM request_events - WHERE timestamp > now() - INTERVAL 7 DAY - AND source IN ('smry-fast', 'smry-slow', 'wayback') - AND endpoint = '/api/article' - GROUP BY url -) -SELECT 'All 3 sources' as scenario, - countIf(fast_success = 1 OR slow_success = 1 OR wayback_success = 1) as urls_resolved, - count() as total_urls, - round(countIf(fast_success = 1 OR slow_success = 1 OR wayback_success = 1) / count() * 100, 2) as resolution_rate -FROM url_outcomes -UNION ALL -SELECT 'Without smry-fast' as scenario, - countIf(slow_success = 1 OR wayback_success = 1) as urls_resolved, - count() as total_urls, - round(countIf(slow_success = 1 OR wayback_success = 1) / count() * 100, 2) as resolution_rate -FROM url_outcomes -UNION ALL -SELECT 'Without smry-slow (Diffbot)' as scenario, - countIf(fast_success = 1 OR wayback_success = 1) as urls_resolved, - count() as total_urls, - round(countIf(fast_success = 1 OR wayback_success = 1) / count() * 100, 2) as resolution_rate -FROM url_outcomes -UNION ALL -SELECT 'Without wayback' as scenario, - countIf(fast_success = 1 OR slow_success = 1) as urls_resolved, - count() as total_urls, - round(countIf(fast_success = 1 OR slow_success = 1) / count() * 100, 2) as resolution_rate -FROM url_outcomes; - --- ----------------------------------------------------------------------------- --- 7. UNIQUE VALUE: URLs where ONLY this source works (exclusive value) --- These are the URLs you'd LOSE if you removed that source --- ----------------------------------------------------------------------------- -WITH url_outcomes AS ( - SELECT - url, - maxIf(1, source = 'smry-fast' AND outcome = 'success') as fast_success, - maxIf(1, source = 'smry-slow' AND outcome = 'success') as slow_success, - maxIf(1, source = 'wayback' AND outcome = 'success') as wayback_success - FROM request_events - WHERE timestamp > now() - INTERVAL 7 DAY - AND source IN ('smry-fast', 'smry-slow', 'wayback') - AND endpoint = '/api/article' - GROUP BY url -) -SELECT 'smry-fast' as source, - countIf(fast_success = 1 AND slow_success = 0 AND wayback_success = 0) as exclusively_resolves, - round(countIf(fast_success = 1 AND slow_success = 0 AND wayback_success = 0) / - countIf(fast_success = 1 OR slow_success = 1 OR wayback_success = 1) * 100, 2) as pct_of_resolutions -FROM url_outcomes -UNION ALL -SELECT 'smry-slow (Diffbot)' as source, - countIf(fast_success = 0 AND slow_success = 1 AND wayback_success = 0) as exclusively_resolves, - round(countIf(fast_success = 0 AND slow_success = 1 AND wayback_success = 0) / - countIf(fast_success = 1 OR slow_success = 1 OR wayback_success = 1) * 100, 2) as pct_of_resolutions -FROM url_outcomes -UNION ALL -SELECT 'wayback' as source, - countIf(fast_success = 0 AND slow_success = 0 AND wayback_success = 1) as exclusively_resolves, - round(countIf(fast_success = 0 AND slow_success = 0 AND wayback_success = 1) / - countIf(fast_success = 1 OR slow_success = 1 OR wayback_success = 1) * 100, 2) as pct_of_resolutions -FROM url_outcomes; - --- ----------------------------------------------------------------------------- --- 8. LATENCY BY SOURCE (for sequential strategy timing estimation) --- ----------------------------------------------------------------------------- -SELECT - source, - round(avg(fetch_ms)) as avg_ms, - round(quantile(0.5)(fetch_ms)) as p50_ms, - round(quantile(0.95)(fetch_ms)) as p95_ms, - round(quantile(0.99)(fetch_ms)) as p99_ms -FROM request_events -WHERE timestamp > now() - INTERVAL 7 DAY - AND source IN ('smry-fast', 'smry-slow', 'wayback') - AND endpoint = '/api/article' - AND outcome = 'success' - AND fetch_ms > 0 -GROUP BY source -ORDER BY avg_ms; - --- ----------------------------------------------------------------------------- --- 9. HOSTNAME-SPECIFIC SOURCE EFFECTIVENESS --- Which sources work best for which sites? --- ----------------------------------------------------------------------------- -SELECT - hostname, - source, - count() as requests, - round(countIf(outcome = 'success') / count() * 100, 2) as success_rate -FROM request_events -WHERE timestamp > now() - INTERVAL 7 DAY - AND source IN ('smry-fast', 'smry-slow', 'wayback') - AND endpoint = '/api/article' - AND hostname != '' -GROUP BY hostname, source -HAVING requests >= 5 -- Only sites with enough data -ORDER BY hostname, success_rate DESC; - --- ----------------------------------------------------------------------------- --- 10. COST ANALYSIS: Diffbot API calls (smry-slow costs money) --- How many Diffbot calls could we save with sequential strategy? --- ----------------------------------------------------------------------------- -WITH url_outcomes AS ( - SELECT - url, - maxIf(1, source = 'smry-fast' AND outcome = 'success') as fast_success, - maxIf(1, source = 'smry-slow') as slow_tried - FROM request_events - WHERE timestamp > now() - INTERVAL 7 DAY - AND source IN ('smry-fast', 'smry-slow') - AND endpoint = '/api/article' - GROUP BY url -) -SELECT - countIf(slow_tried = 1) as current_diffbot_calls, - countIf(slow_tried = 1 AND fast_success = 0) as needed_diffbot_calls_sequential, - countIf(slow_tried = 1 AND fast_success = 1) as wasted_diffbot_calls, - round(countIf(slow_tried = 1 AND fast_success = 1) / countIf(slow_tried = 1) * 100, 2) as pct_wasted -FROM url_outcomes; diff --git a/server/env.ts b/server/env.ts index 6a3830b0..a5f0a8b4 100644 --- a/server/env.ts +++ b/server/env.ts @@ -23,11 +23,11 @@ export const env = createEnv({ UPSTASH_REDIS_REST_URL: z.string().url(), UPSTASH_REDIS_REST_TOKEN: z.string().min(1), - // Analytics - CLICKHOUSE_URL: z.string().url(), - CLICKHOUSE_USER: z.string().min(1), - CLICKHOUSE_PASSWORD: z.string().min(1), - CLICKHOUSE_DATABASE: z.string().min(1), + // Analytics (PostHog) - optional, gracefully degrades when not set + POSTHOG_API_KEY: z.string().optional(), + POSTHOG_HOST: z.string().url().optional(), + POSTHOG_PROJECT_ID: z.string().optional(), + POSTHOG_PERSONAL_API_KEY: z.string().optional(), // Alerting ALERT_EMAIL: z.string().email(), diff --git a/server/index.test.ts b/server/index.test.ts index f1e93382..5c3ec3db 100644 --- a/server/index.test.ts +++ b/server/index.test.ts @@ -66,7 +66,7 @@ describe("Elysia API Server", () => { new Request("http://localhost/api/article?url=https://httpbin.org/html&source=smry-fast") ); // May return 200 or 500 depending on external service - just verify route is hit - expect([200, 500]).toContain(response.status); + expect([200, 401, 500]).toContain(response.status); }); it("should accept valid smry-slow source", async () => { @@ -74,7 +74,7 @@ describe("Elysia API Server", () => { new Request("http://localhost/api/article?url=https://example.com&source=smry-slow") ); // Just verify route accepts the source - expect([200, 500]).toContain(response.status); + expect([200, 401, 500]).toContain(response.status); }); it("should accept valid wayback source", async () => { @@ -83,7 +83,7 @@ describe("Elysia API Server", () => { new Request("http://localhost/api/article?url=https://example.com&source=wayback") ); // Just verify route accepts the source (may timeout with 500) - expect([200, 500]).toContain(response.status); + expect([200, 401, 500]).toContain(response.status); }, { timeout: 15000 }); it("should block hard paywall sites", async () => { @@ -103,8 +103,8 @@ describe("Elysia API Server", () => { new Request("http://localhost/api/admin") ); - // May return 200 or 500 depending on ClickHouse availability - expect([200, 500]).toContain(response.status); + // May return 200, 401 (no token), or 500 depending on PostHog availability + expect([200, 401, 500]).toContain(response.status); if (response.status === 200) { const body = await response.json(); @@ -120,7 +120,7 @@ describe("Elysia API Server", () => { const response = await app.handle( new Request("http://localhost/api/admin?range=1h") ); - expect([200, 500]).toContain(response.status); + expect([200, 401, 500]).toContain(response.status); if (response.status === 200) { const body = await response.json(); @@ -132,7 +132,7 @@ describe("Elysia API Server", () => { const response = await app.handle( new Request("http://localhost/api/admin?range=7d") ); - expect([200, 500]).toContain(response.status); + expect([200, 401, 500]).toContain(response.status); if (response.status === 200) { const body = await response.json(); @@ -144,7 +144,7 @@ describe("Elysia API Server", () => { const response = await app.handle( new Request("http://localhost/api/admin?hostname=example.com&source=smry-fast&outcome=success") ); - expect([200, 500]).toContain(response.status); + expect([200, 401, 500]).toContain(response.status); if (response.status === 200) { const body = await response.json(); @@ -159,7 +159,7 @@ describe("Elysia API Server", () => { const response = await app.handle( new Request("http://localhost/api/admin?urlSearch=test") ); - expect([200, 500]).toContain(response.status); + expect([200, 401, 500]).toContain(response.status); if (response.status === 200) { const body = await response.json(); diff --git a/server/routes/admin.ts b/server/routes/admin.ts index 0a97961a..cfbd6ca7 100644 --- a/server/routes/admin.ts +++ b/server/routes/admin.ts @@ -6,7 +6,7 @@ import { Elysia, t } from "elysia"; import { timingSafeEqual } from "crypto"; -import { queryClickhouse, getBufferStats } from "../../lib/clickhouse"; +import { queryPostHog, getBufferStats } from "../../lib/posthog"; import { env } from "../env"; /** @@ -420,7 +420,7 @@ export const adminRoutes = new Elysia({ prefix: "/api" }).get( const outcomeFilter = query.outcome || ""; const urlSearch = query.urlSearch || ""; - // Build WHERE clause for filtered queries + // Build WHERE clause for filtered queries (HogQL – properties.* prefix) const buildWhereClause = (options: { timeInterval?: string; includeFilters?: boolean; @@ -428,29 +428,30 @@ export const adminRoutes = new Elysia({ prefix: "/api" }).get( const { timeInterval = `${hours} HOUR`, includeFilters = true } = options; const conditions: string[] = []; + // Event type filter for PostHog events table + conditions.push(`event = 'request_event'`); + // Always include time filter conditions.push(`timestamp > now() - INTERVAL ${timeInterval}`); // Always filter out empty hostnames - conditions.push(`hostname != ''`); + conditions.push(`properties.hostname != ''`); if (includeFilters) { - // Escape backslashes first, then single quotes (order matters for SQL injection prevention) - const escapeForClickhouse = (str: string) => str.replace(/\\/g, "\\\\").replace(/'/g, "''"); - // For LIKE patterns, also escape % and _ wildcards (after backslash escaping) - const escapeForClickhouseLike = (str: string) => - escapeForClickhouse(str).replace(/%/g, "\\%").replace(/_/g, "\\_"); + const escapeStr = (str: string) => str.replace(/\\/g, "\\\\").replace(/'/g, "''"); + const escapeForLike = (str: string) => + escapeStr(str).replace(/%/g, "\\%").replace(/_/g, "\\_"); if (hostnameFilter) { - conditions.push(`hostname = '${escapeForClickhouse(hostnameFilter)}'`); + conditions.push(`properties.hostname = '${escapeStr(hostnameFilter)}'`); } if (sourceFilter) { - conditions.push(`source = '${escapeForClickhouse(sourceFilter)}'`); + conditions.push(`properties.source = '${escapeStr(sourceFilter)}'`); } if (outcomeFilter) { - conditions.push(`outcome = '${escapeForClickhouse(outcomeFilter)}'`); + conditions.push(`properties.outcome = '${escapeStr(outcomeFilter)}'`); } if (urlSearch) { - conditions.push(`url LIKE '%${escapeForClickhouseLike(urlSearch)}%'`); + conditions.push(`properties.url LIKE '%${escapeForLike(urlSearch)}%'`); } } @@ -506,206 +507,216 @@ export const adminRoutes = new Elysia({ prefix: "/api" }).get( adFunnelTimeSeries, ] = await Promise.all([ // 1. Which sites consistently error (top 200 by volume) - queryClickhouse(` + queryPostHog(` SELECT - hostname, + properties.hostname as hostname, count() AS total_requests, - round(countIf(outcome = 'success') / count() * 100, 2) AS success_rate, - countIf(outcome = 'error') AS error_count, - round(avg(duration_ms)) AS avg_duration_ms - FROM request_events - WHERE timestamp > now() - INTERVAL ${hours} HOUR - AND hostname != '' + round(countIf(properties.outcome = 'success') / count() * 100, 2) AS success_rate, + countIf(properties.outcome = 'error') AS error_count, + round(avg(toFloat64(properties.duration_ms))) AS avg_duration_ms + FROM events + WHERE event = 'request_event' + AND timestamp > now() - INTERVAL ${hours} HOUR + AND properties.hostname != '' GROUP BY hostname ORDER BY total_requests DESC LIMIT 200 `), // 2. Which sources work for which sites (show all with at least 1 request) - queryClickhouse(` + queryPostHog(` SELECT - hostname, - source, - round(countIf(outcome = 'success') / count() * 100, 2) AS success_rate, + properties.hostname as hostname, + properties.source as source, + round(countIf(properties.outcome = 'success') / count() * 100, 2) AS success_rate, count() AS request_count - FROM request_events - WHERE timestamp > now() - INTERVAL ${hours} HOUR - AND hostname != '' - AND source != '' + FROM events + WHERE event = 'request_event' + AND timestamp > now() - INTERVAL ${hours} HOUR + AND properties.hostname != '' + AND properties.source != '' GROUP BY hostname, source ORDER BY hostname, request_count DESC `), // 3. Hourly traffic pattern - queryClickhouse(` + queryPostHog(` SELECT formatDateTime(toStartOfHour(timestamp), '%Y-%m-%d %H:00') AS hour, count() AS request_count, - countIf(outcome = 'success') AS success_count, - countIf(outcome = 'error') AS error_count - FROM request_events - WHERE timestamp > now() - INTERVAL ${hours} HOUR - AND hostname != '' + countIf(properties.outcome = 'success') AS success_count, + countIf(properties.outcome = 'error') AS error_count + FROM events + WHERE event = 'request_event' + AND timestamp > now() - INTERVAL ${hours} HOUR + AND properties.hostname != '' GROUP BY hour ORDER BY hour `), // 4. Error breakdown by hostname and type with error messages and upstream context - queryClickhouse(` + queryPostHog(` SELECT - hostname, - error_type, - any(error_message) AS error_message, + properties.hostname as hostname, + properties.error_type as error_type, + any(properties.error_message) AS error_message, '' AS error_severity, count() AS error_count, formatDateTime(max(timestamp), '%Y-%m-%d %H:%i:%S') AS latest_timestamp, - any(upstream_hostname) AS upstream_hostname, - any(upstream_status_code) AS upstream_status_code - FROM request_events - WHERE timestamp > now() - INTERVAL ${hours} HOUR - AND outcome = 'error' - AND error_type != '' + any(properties.upstream_hostname) AS upstream_hostname, + any(properties.upstream_status_code) AS upstream_status_code + FROM events + WHERE event = 'request_event' + AND timestamp > now() - INTERVAL ${hours} HOUR + AND properties.outcome = 'error' + AND properties.error_type != '' GROUP BY hostname, error_type ORDER BY error_count DESC LIMIT 100 `), // 4b. Upstream service breakdown - which external services are causing errors - queryClickhouse(` + queryPostHog(` SELECT - upstream_hostname, - upstream_status_code, + properties.upstream_hostname as upstream_hostname, + properties.upstream_status_code as upstream_status_code, count() AS error_count, - uniq(hostname) AS affected_hostnames, - any(error_type) AS sample_error_type - FROM request_events - WHERE timestamp > now() - INTERVAL ${hours} HOUR - AND outcome = 'error' - AND upstream_hostname != '' + uniq(properties.hostname) AS affected_hostnames, + any(properties.error_type) AS sample_error_type + FROM events + WHERE event = 'request_event' + AND timestamp > now() - INTERVAL ${hours} HOUR + AND properties.outcome = 'error' + AND properties.upstream_hostname != '' GROUP BY upstream_hostname, upstream_status_code ORDER BY error_count DESC LIMIT 50 `), // 5. Overall health metrics - queryClickhouse(` + queryPostHog(` SELECT count() AS total_requests_24h, - round(countIf(outcome = 'success') / count() * 100, 2) AS success_rate_24h, - round(countIf(cache_hit = 1) / count() * 100, 2) AS cache_hit_rate_24h, - round(avg(duration_ms)) AS avg_duration_ms_24h, - round(quantile(0.95)(duration_ms)) AS p95_duration_ms_24h, - round(avg(heap_used_mb)) AS avg_heap_mb, - uniq(hostname) AS unique_hostnames_24h - FROM request_events - WHERE timestamp > now() - INTERVAL ${hours} HOUR - AND hostname != '' + round(countIf(properties.outcome = 'success') / count() * 100, 2) AS success_rate_24h, + round(countIf(toFloat64(properties.cache_hit) = 1) / count() * 100, 2) AS cache_hit_rate_24h, + round(avg(toFloat64(properties.duration_ms))) AS avg_duration_ms_24h, + round(quantile(0.95)(toFloat64(properties.duration_ms))) AS p95_duration_ms_24h, + round(avg(toFloat64(properties.heap_used_mb))) AS avg_heap_mb, + uniq(properties.hostname) AS unique_hostnames_24h + FROM events + WHERE event = 'request_event' + AND timestamp > now() - INTERVAL ${hours} HOUR + AND properties.hostname != '' `), // 6. Real-time popular pages (last 5 minutes) - queryClickhouse(` + queryPostHog(` SELECT - url, - hostname, + properties.url as url, + properties.hostname as hostname, count() AS count - FROM request_events - WHERE timestamp > now() - INTERVAL 5 MINUTE - AND url != '' + FROM events + WHERE event = 'request_event' + AND timestamp > now() - INTERVAL 5 MINUTE + AND properties.url != '' GROUP BY url, hostname ORDER BY count DESC LIMIT 20 `), // 7. Request explorer - individual requests for debugging (applies filters) - queryClickhouse(` + queryPostHog(` SELECT - request_id, + properties.request_id as request_id, formatDateTime(timestamp, '%Y-%m-%d %H:%i:%S') AS event_time, - url, - hostname, - source, - outcome, - status_code, - error_type, - error_message, - duration_ms, - fetch_ms, - cache_lookup_ms, - cache_save_ms, - cache_hit, - cache_status, - article_length, - article_title - FROM request_events + properties.url as url, + properties.hostname as hostname, + properties.source as source, + properties.outcome as outcome, + properties.status_code as status_code, + properties.error_type as error_type, + properties.error_message as error_message, + properties.duration_ms as duration_ms, + properties.fetch_ms as fetch_ms, + properties.cache_lookup_ms as cache_lookup_ms, + properties.cache_save_ms as cache_save_ms, + properties.cache_hit as cache_hit, + properties.cache_status as cache_status, + properties.article_length as article_length, + properties.article_title as article_title + FROM events WHERE ${buildWhereClause()} ORDER BY timestamp DESC LIMIT 200 `), // 8. Live requests (last 60 seconds for live feed - also applies filters) - queryClickhouse(` + queryPostHog(` SELECT - request_id, + properties.request_id as request_id, formatDateTime(timestamp, '%H:%i:%S') AS event_time, - url, - hostname, - source, - outcome, - duration_ms, - error_type, - cache_hit - FROM request_events + properties.url as url, + properties.hostname as hostname, + properties.source as source, + properties.outcome as outcome, + properties.duration_ms as duration_ms, + properties.error_type as error_type, + properties.cache_hit as cache_hit + FROM events WHERE ${buildWhereClause({ timeInterval: "60 SECOND" })} ORDER BY timestamp DESC LIMIT 50 `), // 9. Endpoint statistics (article, summary) - queryClickhouse(` + queryPostHog(` SELECT - endpoint, + properties.endpoint as endpoint, count() AS total_requests, - countIf(outcome = 'success') AS success_count, - countIf(outcome = 'error') AS error_count, - round(countIf(outcome = 'success') / count() * 100, 2) AS success_rate, - round(avg(duration_ms)) AS avg_duration_ms, - sum(input_tokens) AS total_input_tokens, - sum(output_tokens) AS total_output_tokens - FROM request_events - WHERE timestamp > now() - INTERVAL ${hours} HOUR - AND endpoint != '' + countIf(properties.outcome = 'success') AS success_count, + countIf(properties.outcome = 'error') AS error_count, + round(countIf(properties.outcome = 'success') / count() * 100, 2) AS success_rate, + round(avg(toFloat64(properties.duration_ms))) AS avg_duration_ms, + sum(toFloat64(properties.input_tokens)) AS total_input_tokens, + sum(toFloat64(properties.output_tokens)) AS total_output_tokens + FROM events + WHERE event = 'request_event' + AND timestamp > now() - INTERVAL ${hours} HOUR + AND properties.endpoint != '' GROUP BY endpoint ORDER BY total_requests DESC `), // 10. Hourly traffic by endpoint (for trends) - queryClickhouse(` + queryPostHog(` SELECT formatDateTime(toStartOfHour(timestamp), '%Y-%m-%d %H:00') AS hour, - endpoint, + properties.endpoint as endpoint, count() AS request_count, - countIf(outcome = 'success') AS success_count, - countIf(outcome = 'error') AS error_count - FROM request_events - WHERE timestamp > now() - INTERVAL ${hours} HOUR - AND endpoint != '' + countIf(properties.outcome = 'success') AS success_count, + countIf(properties.outcome = 'error') AS error_count + FROM events + WHERE event = 'request_event' + AND timestamp > now() - INTERVAL ${hours} HOUR + AND properties.endpoint != '' GROUP BY hour, endpoint ORDER BY hour, endpoint `), // 11. Universally broken hostnames - sites where ALL sources fail - queryClickhouse(` + queryPostHog(` SELECT - hostname, + properties.hostname as hostname, count() AS total_requests, - uniq(source) AS sources_tried, - arrayStringConcat(groupArray(DISTINCT source), ', ') AS sources_list, - round(countIf(outcome = 'success') / count() * 100, 2) AS overall_success_rate, - any(url) AS sample_url - FROM request_events - WHERE timestamp > now() - INTERVAL ${hours} HOUR - AND hostname != '' - AND source != '' + uniq(properties.source) AS sources_tried, + arrayStringConcat(groupArray(DISTINCT properties.source), ', ') AS sources_list, + round(countIf(properties.outcome = 'success') / count() * 100, 2) AS overall_success_rate, + any(properties.url) AS sample_url + FROM events + WHERE event = 'request_event' + AND timestamp > now() - INTERVAL ${hours} HOUR + AND properties.hostname != '' + AND properties.source != '' GROUP BY hostname HAVING sources_tried >= 2 @@ -716,172 +727,182 @@ export const adminRoutes = new Elysia({ prefix: "/api" }).get( `), // 12. Source error rates over time - for observability/regression detection - queryClickhouse(` + queryPostHog(` SELECT formatDateTime(toStartOfFifteenMinutes(timestamp), '%Y-%m-%d %H:%i') AS time_bucket, - source, + properties.source as source, count() AS total_requests, - countIf(outcome = 'error') AS error_count, - round(countIf(outcome = 'error') / count() * 100, 2) AS error_rate - FROM request_events - WHERE timestamp > now() - INTERVAL ${hours} HOUR - AND hostname != '' - AND source != '' + countIf(properties.outcome = 'error') AS error_count, + round(countIf(properties.outcome = 'error') / count() * 100, 2) AS error_rate + FROM events + WHERE event = 'request_event' + AND timestamp > now() - INTERVAL ${hours} HOUR + AND properties.hostname != '' + AND properties.source != '' GROUP BY time_bucket, source ORDER BY time_bucket, source `), // ============================================================================= - // Ad Analytics Queries (from ad_events table) + // Ad Analytics Queries (from events WHERE event = 'ad_event') // ============================================================================= // 13. Ad health metrics - overall fill rate and performance (minute-based + device filter) - queryClickhouse(` + queryPostHog(` SELECT count() AS total_requests, - countIf(status = 'filled') AS filled_count, - countIf(status = 'no_fill') AS no_fill_count, - countIf(status = 'premium_user') AS premium_count, - countIf(status = 'error' OR status = 'gravity_error') AS error_count, - countIf(status = 'timeout') AS timeout_count, - round(countIf(status = 'filled') / countIf(status != 'premium_user') * 100, 2) AS fill_rate, - round(avg(duration_ms)) AS avg_duration_ms, - uniq(session_id) AS unique_sessions, - uniqIf(brand_name, brand_name != '') AS unique_brands - FROM ad_events - WHERE timestamp > now() - INTERVAL ${minutes} MINUTE - AND event_type = 'request' - ${adDeviceFilter ? `AND device_type = '${adDeviceFilter}'` : ''} + countIf(properties.status = 'filled') AS filled_count, + countIf(properties.status = 'no_fill') AS no_fill_count, + countIf(properties.status = 'premium_user') AS premium_count, + countIf(properties.status = 'error' OR properties.status = 'gravity_error') AS error_count, + countIf(properties.status = 'timeout') AS timeout_count, + round(countIf(properties.status = 'filled') / countIf(properties.status != 'premium_user') * 100, 2) AS fill_rate, + round(avg(toFloat64(properties.duration_ms))) AS avg_duration_ms, + uniq(properties.session_id) AS unique_sessions, + uniqIf(properties.brand_name, properties.brand_name != '') AS unique_brands + FROM events + WHERE event = 'ad_event' + AND timestamp > now() - INTERVAL ${minutes} MINUTE + AND properties.event_type = 'request' + ${adDeviceFilter ? `AND properties.device_type = '${adDeviceFilter}'` : ''} `).catch(() => [] as AdHealthMetrics[]), // 14. Ad status breakdown - only count request events - queryClickhouse(` + queryPostHog(` SELECT - status, + properties.status as status, count() AS count, - round(count() / (SELECT count() FROM ad_events WHERE timestamp > now() - INTERVAL ${hours} HOUR AND event_type = 'request') * 100, 2) AS percentage, - round(avg(duration_ms)) AS avg_duration_ms - FROM ad_events - WHERE timestamp > now() - INTERVAL ${hours} HOUR - AND event_type = 'request' + round(count() / (SELECT count() FROM events WHERE event = 'ad_event' AND timestamp > now() - INTERVAL ${hours} HOUR AND properties.event_type = 'request') * 100, 2) AS percentage, + round(avg(toFloat64(properties.duration_ms))) AS avg_duration_ms + FROM events + WHERE event = 'ad_event' + AND timestamp > now() - INTERVAL ${hours} HOUR + AND properties.event_type = 'request' GROUP BY status ORDER BY count DESC `).catch(() => [] as AdStatusBreakdown[]), // 15. Ad fill rate by hostname - only count request events - queryClickhouse(` + queryPostHog(` SELECT - hostname, + properties.hostname as hostname, count() AS total_requests, - countIf(status = 'filled') AS filled_count, - round(countIf(status = 'filled') / countIf(status != 'premium_user') * 100, 2) AS fill_rate, - anyIf(brand_name, brand_name != '') AS top_brand - FROM ad_events - WHERE timestamp > now() - INTERVAL ${hours} HOUR - AND hostname != '' - AND event_type = 'request' + countIf(properties.status = 'filled') AS filled_count, + round(countIf(properties.status = 'filled') / countIf(properties.status != 'premium_user') * 100, 2) AS fill_rate, + anyIf(properties.brand_name, properties.brand_name != '') AS top_brand + FROM events + WHERE event = 'ad_event' + AND timestamp > now() - INTERVAL ${hours} HOUR + AND properties.hostname != '' + AND properties.event_type = 'request' GROUP BY hostname - HAVING countIf(status != 'premium_user') > 0 + HAVING countIf(properties.status != 'premium_user') > 0 ORDER BY total_requests DESC LIMIT 100 `).catch(() => [] as AdHostnameStats[]), // 16. Ad fill rate by device/browser/OS - only count request events - queryClickhouse(` + queryPostHog(` SELECT - device_type, - os, - browser, + properties.device_type as device_type, + properties.os as os, + properties.browser as browser, count() AS total_requests, - countIf(status = 'filled') AS filled_count, - round(countIf(status = 'filled') / countIf(status != 'premium_user') * 100, 2) AS fill_rate - FROM ad_events - WHERE timestamp > now() - INTERVAL ${hours} HOUR - AND device_type != '' - AND event_type = 'request' + countIf(properties.status = 'filled') AS filled_count, + round(countIf(properties.status = 'filled') / countIf(properties.status != 'premium_user') * 100, 2) AS fill_rate + FROM events + WHERE event = 'ad_event' + AND timestamp > now() - INTERVAL ${hours} HOUR + AND properties.device_type != '' + AND properties.event_type = 'request' GROUP BY device_type, os, browser - HAVING countIf(status != 'premium_user') > 0 + HAVING countIf(properties.status != 'premium_user') > 0 ORDER BY total_requests DESC LIMIT 50 `).catch(() => [] as AdDeviceStats[]), // 17. Top brands by impressions - queryClickhouse(` + queryPostHog(` SELECT - brand_name, + properties.brand_name as brand_name, count() AS impressions, - uniq(hostname) AS unique_hostnames, - uniq(session_id) AS unique_sessions, - round(avg(article_content_length)) AS avg_article_length - FROM ad_events - WHERE timestamp > now() - INTERVAL ${hours} HOUR - AND status = 'filled' - AND brand_name != '' + uniq(properties.hostname) AS unique_hostnames, + uniq(properties.session_id) AS unique_sessions, + round(avg(toFloat64(properties.article_content_length))) AS avg_article_length + FROM events + WHERE event = 'ad_event' + AND timestamp > now() - INTERVAL ${hours} HOUR + AND properties.status = 'filled' + AND properties.brand_name != '' GROUP BY brand_name ORDER BY impressions DESC LIMIT 50 `).catch(() => [] as AdBrandStats[]), // 18. Hourly ad traffic - only count request events - queryClickhouse(` + queryPostHog(` SELECT formatDateTime(toStartOfHour(timestamp), '%Y-%m-%d %H:00') AS hour, count() AS total_requests, - countIf(status = 'filled') AS filled_count, - countIf(status = 'no_fill') AS no_fill_count, - round(countIf(status = 'filled') / countIf(status != 'premium_user') * 100, 2) AS fill_rate - FROM ad_events - WHERE timestamp > now() - INTERVAL ${hours} HOUR - AND event_type = 'request' + countIf(properties.status = 'filled') AS filled_count, + countIf(properties.status = 'no_fill') AS no_fill_count, + round(countIf(properties.status = 'filled') / countIf(properties.status != 'premium_user') * 100, 2) AS fill_rate + FROM events + WHERE event = 'ad_event' + AND timestamp > now() - INTERVAL ${hours} HOUR + AND properties.event_type = 'request' GROUP BY hour ORDER BY hour `).catch(() => [] as AdHourlyTraffic[]), // 19. Ad error breakdown - queryClickhouse(` + queryPostHog(` SELECT - status, - gravity_status_code, - any(error_message) AS error_message, + properties.status as status, + properties.gravity_status_code as gravity_status_code, + any(properties.error_message) AS error_message, count() AS count, formatDateTime(max(timestamp), '%Y-%m-%d %H:%i:%S') AS latest_timestamp - FROM ad_events - WHERE timestamp > now() - INTERVAL ${hours} HOUR - AND status IN ('error', 'gravity_error', 'timeout') + FROM events + WHERE event = 'ad_event' + AND timestamp > now() - INTERVAL ${hours} HOUR + AND properties.status IN ('error', 'gravity_error', 'timeout') GROUP BY status, gravity_status_code ORDER BY count DESC LIMIT 50 `).catch(() => [] as AdErrorBreakdown[]), // 20. Recent ad events (for live debugging) - queryClickhouse(` + queryPostHog(` SELECT - event_id, + properties.event_id as event_id, formatDateTime(timestamp, '%Y-%m-%d %H:%i:%S') AS event_time, - hostname, - article_title, - status, - brand_name, - duration_ms, - device_type - FROM ad_events - WHERE timestamp > now() - INTERVAL 1 HOUR + properties.hostname as hostname, + properties.article_title as article_title, + properties.status as status, + properties.brand_name as brand_name, + properties.duration_ms as duration_ms, + properties.device_type as device_type + FROM events + WHERE event = 'ad_event' + AND timestamp > now() - INTERVAL 1 HOUR ORDER BY timestamp DESC LIMIT 100 `).catch(() => [] as AdRecentEvent[]), // 21. CTR by Brand - click-through rate for each advertiser (minute-based + device filter) - queryClickhouse(` + queryPostHog(` SELECT - brand_name, - countIf(event_type = 'impression') AS impressions, - countIf(event_type = 'click') AS clicks, - round(countIf(event_type = 'click') / countIf(event_type = 'impression') * 100, 2) AS ctr - FROM ad_events - WHERE timestamp > now() - INTERVAL ${minutes} MINUTE - AND event_type IN ('impression', 'click') - AND brand_name != '' - ${adDeviceFilter ? `AND device_type = '${adDeviceFilter}'` : ''} + properties.brand_name as brand_name, + countIf(properties.event_type = 'impression') AS impressions, + countIf(properties.event_type = 'click') AS clicks, + round(countIf(properties.event_type = 'click') / countIf(properties.event_type = 'impression') * 100, 2) AS ctr + FROM events + WHERE event = 'ad_event' + AND timestamp > now() - INTERVAL ${minutes} MINUTE + AND properties.event_type IN ('impression', 'click') + AND properties.brand_name != '' + ${adDeviceFilter ? `AND properties.device_type = '${adDeviceFilter}'` : ''} GROUP BY brand_name HAVING impressions > 0 ORDER BY impressions DESC @@ -889,8 +910,7 @@ export const adminRoutes = new Elysia({ prefix: "/api" }).get( `).catch(() => [] as AdCTRByBrand[]), // 22. Funnel by time bucket - adapts granularity based on time range - // <1h: 5-minute buckets, <6h: 15-minute buckets, <24h: hourly, else: daily - queryClickhouse(` + queryPostHog(` SELECT ${minutes <= 60 ? `formatDateTime(toStartOfFiveMinutes(timestamp), '%Y-%m-%d %H:%i') AS hour` @@ -900,27 +920,29 @@ export const adminRoutes = new Elysia({ prefix: "/api" }).get( ? `formatDateTime(toStartOfHour(timestamp), '%Y-%m-%d %H:00') AS hour` : `formatDateTime(toStartOfDay(timestamp), '%Y-%m-%d') AS hour` }, - countIf(event_type = 'request' AND status = 'filled') AS requests, - countIf(event_type = 'impression') AS impressions, - countIf(event_type = 'click') AS clicks, - countIf(event_type = 'dismiss') AS dismissals - FROM ad_events - WHERE timestamp > now() - INTERVAL ${minutes} MINUTE - ${adDeviceFilter ? `AND device_type = '${adDeviceFilter}'` : ''} + countIf(properties.event_type = 'request' AND properties.status = 'filled') AS requests, + countIf(properties.event_type = 'impression') AS impressions, + countIf(properties.event_type = 'click') AS clicks, + countIf(properties.event_type = 'dismiss') AS dismissals + FROM events + WHERE event = 'ad_event' + AND timestamp > now() - INTERVAL ${minutes} MINUTE + ${adDeviceFilter ? `AND properties.device_type = '${adDeviceFilter}'` : ''} GROUP BY hour ORDER BY hour `).catch(() => [] as AdHourlyFunnel[]), - // 23. Dismiss Rate by Device - see which devices dismiss ads most - queryClickhouse(` + // 23. Dismiss Rate by Device + queryPostHog(` SELECT - device_type, - countIf(event_type = 'impression') AS impressions, - countIf(event_type = 'dismiss') AS dismissals, - round(countIf(event_type = 'dismiss') / countIf(event_type = 'impression') * 100, 2) AS dismiss_rate - FROM ad_events - WHERE timestamp > now() - INTERVAL ${hours} HOUR - AND device_type != '' + properties.device_type as device_type, + countIf(properties.event_type = 'impression') AS impressions, + countIf(properties.event_type = 'dismiss') AS dismissals, + round(countIf(properties.event_type = 'dismiss') / countIf(properties.event_type = 'impression') * 100, 2) AS dismiss_rate + FROM events + WHERE event = 'ad_event' + AND timestamp > now() - INTERVAL ${hours} HOUR + AND properties.device_type != '' GROUP BY device_type HAVING impressions > 0 ORDER BY impressions DESC @@ -930,24 +952,25 @@ export const adminRoutes = new Elysia({ prefix: "/api" }).get( // Enhanced Granular Ad Analytics // ============================================================================= - // 24. Performance by Hour of Day - identify best performing hours - queryClickhouse(` + // 24. Performance by Hour of Day + queryPostHog(` SELECT toHour(timestamp) AS hour_of_day, - countIf(event_type = 'impression') AS impressions, - countIf(event_type = 'click') AS clicks, - round(countIf(event_type = 'click') / countIf(event_type = 'impression') * 100, 2) AS ctr, - round(countIf(event_type = 'request' AND status = 'filled') / - countIf(event_type = 'request' AND status != 'premium_user') * 100, 2) AS fill_rate - FROM ad_events - WHERE timestamp > now() - INTERVAL ${hours} HOUR + countIf(properties.event_type = 'impression') AS impressions, + countIf(properties.event_type = 'click') AS clicks, + round(countIf(properties.event_type = 'click') / countIf(properties.event_type = 'impression') * 100, 2) AS ctr, + round(countIf(properties.event_type = 'request' AND properties.status = 'filled') / + countIf(properties.event_type = 'request' AND properties.status != 'premium_user') * 100, 2) AS fill_rate + FROM events + WHERE event = 'ad_event' + AND timestamp > now() - INTERVAL ${hours} HOUR GROUP BY hour_of_day - HAVING countIf(event_type = 'impression') > 0 + HAVING countIf(properties.event_type = 'impression') > 0 ORDER BY hour_of_day `).catch(() => [] as AdPerformanceByHour[]), // 25. Performance by Day of Week - queryClickhouse(` + queryPostHog(` SELECT toDayOfWeek(timestamp) AS day_of_week, CASE toDayOfWeek(timestamp) @@ -959,126 +982,133 @@ export const adminRoutes = new Elysia({ prefix: "/api" }).get( WHEN 6 THEN 'Saturday' WHEN 7 THEN 'Sunday' END AS day_name, - countIf(event_type = 'impression') AS impressions, - countIf(event_type = 'click') AS clicks, - round(countIf(event_type = 'click') / countIf(event_type = 'impression') * 100, 2) AS ctr - FROM ad_events - WHERE timestamp > now() - INTERVAL ${hours} HOUR + countIf(properties.event_type = 'impression') AS impressions, + countIf(properties.event_type = 'click') AS clicks, + round(countIf(properties.event_type = 'click') / countIf(properties.event_type = 'impression') * 100, 2) AS ctr + FROM events + WHERE event = 'ad_event' + AND timestamp > now() - INTERVAL ${hours} HOUR GROUP BY day_of_week, day_name - HAVING countIf(event_type = 'impression') > 0 + HAVING countIf(properties.event_type = 'impression') > 0 ORDER BY day_of_week `).catch(() => [] as AdPerformanceByDay[]), // 26. Enhanced Brand Performance with engagement metrics - queryClickhouse(` + queryPostHog(` SELECT - brand_name, - countIf(event_type = 'impression') AS impressions, - countIf(event_type = 'click') AS clicks, - countIf(event_type = 'dismiss') AS dismissals, - round(countIf(event_type = 'click') / countIf(event_type = 'impression') * 100, 2) AS ctr, - round(countIf(event_type = 'dismiss') / countIf(event_type = 'impression') * 100, 2) AS dismiss_rate, - round(avgIf(duration_ms, event_type = 'click' AND duration_ms > 0)) AS avg_time_to_click_ms, - uniq(session_id) AS unique_sessions - FROM ad_events - WHERE timestamp > now() - INTERVAL ${hours} HOUR - AND brand_name != '' + properties.brand_name as brand_name, + countIf(properties.event_type = 'impression') AS impressions, + countIf(properties.event_type = 'click') AS clicks, + countIf(properties.event_type = 'dismiss') AS dismissals, + round(countIf(properties.event_type = 'click') / countIf(properties.event_type = 'impression') * 100, 2) AS ctr, + round(countIf(properties.event_type = 'dismiss') / countIf(properties.event_type = 'impression') * 100, 2) AS dismiss_rate, + round(avgIf(toFloat64(properties.duration_ms), properties.event_type = 'click' AND toFloat64(properties.duration_ms) > 0)) AS avg_time_to_click_ms, + uniq(properties.session_id) AS unique_sessions + FROM events + WHERE event = 'ad_event' + AND timestamp > now() - INTERVAL ${hours} HOUR + AND properties.brand_name != '' GROUP BY brand_name - HAVING countIf(event_type = 'impression') > 0 + HAVING countIf(properties.event_type = 'impression') > 0 ORDER BY impressions DESC LIMIT 25 `).catch(() => [] as AdBrandPerformance[]), - // 27. Detailed Device Breakdown (uses minute-based time + device filter) - queryClickhouse(` + // 27. Detailed Device Breakdown + queryPostHog(` SELECT - device_type, - countIf(event_type = 'impression') AS impressions, - countIf(event_type = 'click') AS clicks, - countIf(event_type = 'dismiss') AS dismissals, - round(countIf(event_type = 'click') / countIf(event_type = 'impression') * 100, 2) AS ctr, - round(countIf(event_type = 'dismiss') / countIf(event_type = 'impression') * 100, 2) AS dismiss_rate, - round(countIf(event_type = 'request' AND status = 'filled') / - countIf(event_type = 'request' AND status != 'premium_user') * 100, 2) AS fill_rate - FROM ad_events - WHERE timestamp > now() - INTERVAL ${minutes} MINUTE - AND device_type != '' - ${adDeviceFilter ? `AND device_type = '${adDeviceFilter}'` : ''} + properties.device_type as device_type, + countIf(properties.event_type = 'impression') AS impressions, + countIf(properties.event_type = 'click') AS clicks, + countIf(properties.event_type = 'dismiss') AS dismissals, + round(countIf(properties.event_type = 'click') / countIf(properties.event_type = 'impression') * 100, 2) AS ctr, + round(countIf(properties.event_type = 'dismiss') / countIf(properties.event_type = 'impression') * 100, 2) AS dismiss_rate, + round(countIf(properties.event_type = 'request' AND properties.status = 'filled') / + countIf(properties.event_type = 'request' AND properties.status != 'premium_user') * 100, 2) AS fill_rate + FROM events + WHERE event = 'ad_event' + AND timestamp > now() - INTERVAL ${minutes} MINUTE + AND properties.device_type != '' + ${adDeviceFilter ? `AND properties.device_type = '${adDeviceFilter}'` : ''} GROUP BY device_type - HAVING countIf(event_type = 'impression') > 0 + HAVING countIf(properties.event_type = 'impression') > 0 ORDER BY impressions DESC `).catch(() => [] as AdDeviceBreakdown[]), // 28. Browser Performance - queryClickhouse(` + queryPostHog(` SELECT - browser, - countIf(event_type = 'impression') AS impressions, - countIf(event_type = 'click') AS clicks, - round(countIf(event_type = 'click') / countIf(event_type = 'impression') * 100, 2) AS ctr - FROM ad_events - WHERE timestamp > now() - INTERVAL ${hours} HOUR - AND browser != '' + properties.browser as browser, + countIf(properties.event_type = 'impression') AS impressions, + countIf(properties.event_type = 'click') AS clicks, + round(countIf(properties.event_type = 'click') / countIf(properties.event_type = 'impression') * 100, 2) AS ctr + FROM events + WHERE event = 'ad_event' + AND timestamp > now() - INTERVAL ${hours} HOUR + AND properties.browser != '' GROUP BY browser - HAVING countIf(event_type = 'impression') > 0 + HAVING countIf(properties.event_type = 'impression') > 0 ORDER BY impressions DESC LIMIT 10 `).catch(() => [] as AdBrowserStats[]), // 29. OS Performance - queryClickhouse(` + queryPostHog(` SELECT - os, - countIf(event_type = 'impression') AS impressions, - countIf(event_type = 'click') AS clicks, - round(countIf(event_type = 'click') / countIf(event_type = 'impression') * 100, 2) AS ctr - FROM ad_events - WHERE timestamp > now() - INTERVAL ${hours} HOUR - AND os != '' + properties.os as os, + countIf(properties.event_type = 'impression') AS impressions, + countIf(properties.event_type = 'click') AS clicks, + round(countIf(properties.event_type = 'click') / countIf(properties.event_type = 'impression') * 100, 2) AS ctr + FROM events + WHERE event = 'ad_event' + AND timestamp > now() - INTERVAL ${hours} HOUR + AND properties.os != '' GROUP BY os - HAVING countIf(event_type = 'impression') > 0 + HAVING countIf(properties.event_type = 'impression') > 0 ORDER BY impressions DESC LIMIT 10 `).catch(() => [] as AdOSStats[]), // 30. Hostname Performance with full funnel - queryClickhouse(` + queryPostHog(` SELECT - hostname, - countIf(event_type = 'request') AS requests, - countIf(event_type = 'impression') AS impressions, - countIf(event_type = 'click') AS clicks, - round(if(countIf(event_type = 'impression') > 0, countIf(event_type = 'click') / countIf(event_type = 'impression') * 100, 0), 2) AS ctr, - round(if(countIf(event_type = 'request' AND status != 'premium_user') > 0, countIf(event_type = 'request' AND status = 'filled') / - countIf(event_type = 'request' AND status != 'premium_user') * 100, 0), 2) AS fill_rate, - anyIf(brand_name, brand_name != '' AND event_type = 'impression') AS top_brand - FROM ad_events - WHERE timestamp > now() - INTERVAL ${hours} HOUR - AND hostname != '' + properties.hostname as hostname, + countIf(properties.event_type = 'request') AS requests, + countIf(properties.event_type = 'impression') AS impressions, + countIf(properties.event_type = 'click') AS clicks, + round(if(countIf(properties.event_type = 'impression') > 0, countIf(properties.event_type = 'click') / countIf(properties.event_type = 'impression') * 100, 0), 2) AS ctr, + round(if(countIf(properties.event_type = 'request' AND properties.status != 'premium_user') > 0, countIf(properties.event_type = 'request' AND properties.status = 'filled') / + countIf(properties.event_type = 'request' AND properties.status != 'premium_user') * 100, 0), 2) AS fill_rate, + anyIf(properties.brand_name, properties.brand_name != '' AND properties.event_type = 'impression') AS top_brand + FROM events + WHERE event = 'ad_event' + AND timestamp > now() - INTERVAL ${hours} HOUR + AND properties.hostname != '' GROUP BY hostname - HAVING countIf(event_type = 'request') > 0 + HAVING countIf(properties.event_type = 'request') > 0 ORDER BY requests DESC LIMIT 50 `).catch(() => [] as AdHostnamePerformance[]), - // 31. Content Length Correlation - do longer articles perform better? - queryClickhouse(` + // 31. Content Length Correlation + queryPostHog(` SELECT CASE - WHEN article_content_length < 500 THEN '< 500 chars' - WHEN article_content_length < 1500 THEN '500-1.5k chars' - WHEN article_content_length < 3000 THEN '1.5k-3k chars' - WHEN article_content_length < 5000 THEN '3k-5k chars' + WHEN toFloat64(properties.article_content_length) < 500 THEN '< 500 chars' + WHEN toFloat64(properties.article_content_length) < 1500 THEN '500-1.5k chars' + WHEN toFloat64(properties.article_content_length) < 3000 THEN '1.5k-3k chars' + WHEN toFloat64(properties.article_content_length) < 5000 THEN '3k-5k chars' ELSE '5k+ chars' END AS article_length_bucket, - countIf(event_type = 'impression') AS impressions, - countIf(event_type = 'click') AS clicks, - round(countIf(event_type = 'click') / countIf(event_type = 'impression') * 100, 2) AS ctr - FROM ad_events - WHERE timestamp > now() - INTERVAL ${hours} HOUR - AND article_content_length > 0 + countIf(properties.event_type = 'impression') AS impressions, + countIf(properties.event_type = 'click') AS clicks, + round(countIf(properties.event_type = 'click') / countIf(properties.event_type = 'impression') * 100, 2) AS ctr + FROM events + WHERE event = 'ad_event' + AND timestamp > now() - INTERVAL ${hours} HOUR + AND toFloat64(properties.article_content_length) > 0 GROUP BY article_length_bucket - HAVING countIf(event_type = 'impression') > 0 + HAVING countIf(properties.event_type = 'impression') > 0 ORDER BY CASE article_length_bucket WHEN '< 500 chars' THEN 1 @@ -1089,8 +1119,8 @@ export const adminRoutes = new Elysia({ prefix: "/api" }).get( END `).catch(() => [] as AdContentCorrelation[]), - // 32. Session Depth Analysis - do users who see more ads click more? - queryClickhouse(` + // 32. Session Depth Analysis + queryPostHog(` SELECT session_ad_count, count() AS session_count, @@ -1099,13 +1129,14 @@ export const adminRoutes = new Elysia({ prefix: "/api" }).get( round(if(sum(impressions) > 0, sum(clicks) / sum(impressions) * 100, 0), 2) AS avg_ctr FROM ( SELECT - session_id, - countIf(event_type = 'impression') AS session_ad_count, - countIf(event_type = 'impression') AS impressions, - countIf(event_type = 'click') AS clicks - FROM ad_events - WHERE timestamp > now() - INTERVAL ${hours} HOUR - AND session_id != '' + properties.session_id as session_id, + countIf(properties.event_type = 'impression') AS session_ad_count, + countIf(properties.event_type = 'impression') AS impressions, + countIf(properties.event_type = 'click') AS clicks + FROM events + WHERE event = 'ad_event' + AND timestamp > now() - INTERVAL ${hours} HOUR + AND properties.session_id != '' GROUP BY session_id HAVING session_ad_count > 0 ) @@ -1115,99 +1146,101 @@ export const adminRoutes = new Elysia({ prefix: "/api" }).get( `).catch(() => [] as AdSessionDepth[]), // 33. Conversion Funnel Summary - queryClickhouse(` + queryPostHog(` SELECT stage, count, round(if(first_value(count) OVER (ORDER BY stage_order) > 0, count / first_value(count) OVER (ORDER BY stage_order) * 100, 0), 2) AS rate_from_previous FROM ( - SELECT 'Requests' AS stage, 1 AS stage_order, countIf(event_type = 'request') AS count - FROM ad_events WHERE timestamp > now() - INTERVAL ${hours} HOUR + SELECT 'Requests' AS stage, 1 AS stage_order, countIf(properties.event_type = 'request') AS count + FROM events WHERE event = 'ad_event' AND timestamp > now() - INTERVAL ${hours} HOUR UNION ALL - SELECT 'Filled' AS stage, 2 AS stage_order, countIf(event_type = 'request' AND status = 'filled') AS count - FROM ad_events WHERE timestamp > now() - INTERVAL ${hours} HOUR + SELECT 'Filled' AS stage, 2 AS stage_order, countIf(properties.event_type = 'request' AND properties.status = 'filled') AS count + FROM events WHERE event = 'ad_event' AND timestamp > now() - INTERVAL ${hours} HOUR UNION ALL - SELECT 'Impressions' AS stage, 3 AS stage_order, countIf(event_type = 'impression') AS count - FROM ad_events WHERE timestamp > now() - INTERVAL ${hours} HOUR + SELECT 'Impressions' AS stage, 3 AS stage_order, countIf(properties.event_type = 'impression') AS count + FROM events WHERE event = 'ad_event' AND timestamp > now() - INTERVAL ${hours} HOUR UNION ALL - SELECT 'Clicks' AS stage, 4 AS stage_order, countIf(event_type = 'click') AS count - FROM ad_events WHERE timestamp > now() - INTERVAL ${hours} HOUR + SELECT 'Clicks' AS stage, 4 AS stage_order, countIf(properties.event_type = 'click') AS count + FROM events WHERE event = 'ad_event' AND timestamp > now() - INTERVAL ${hours} HOUR ) ORDER BY stage_order `).catch(() => [] as AdConversionFunnel[]), - // 34. Bot Detection - identify filled requests without device info (likely bots/curl) - queryClickhouse(` + // 34. Bot Detection + queryPostHog(` SELECT CASE - WHEN device_type = '' OR browser = '' THEN 'No Device Info (Likely Bot)' + WHEN properties.device_type = '' OR properties.browser = '' THEN 'No Device Info (Likely Bot)' ELSE 'Has Device Info (Real User)' END as category, - countIf(event_type = 'request' AND status = 'filled') AS filled_count, - countIf(event_type = 'impression') AS impression_count, - round(if(countIf(event_type = 'request' AND status = 'filled') > 0, - countIf(event_type = 'impression') / countIf(event_type = 'request' AND status = 'filled') * 100, 0), 1) AS impression_rate, - uniq(session_id) AS unique_sessions - FROM ad_events - WHERE timestamp > now() - INTERVAL ${hours} HOUR + countIf(properties.event_type = 'request' AND properties.status = 'filled') AS filled_count, + countIf(properties.event_type = 'impression') AS impression_count, + round(if(countIf(properties.event_type = 'request' AND properties.status = 'filled') > 0, + countIf(properties.event_type = 'impression') / countIf(properties.event_type = 'request' AND properties.status = 'filled') * 100, 0), 1) AS impression_rate, + uniq(properties.session_id) AS unique_sessions + FROM events + WHERE event = 'ad_event' + AND timestamp > now() - INTERVAL ${hours} HOUR GROUP BY category ORDER BY filled_count DESC `).catch(() => [] as AdBotDetection[]), // 35. CTR by Hour of Day with Device Breakdown - queryClickhouse(` + queryPostHog(` SELECT toHour(timestamp) AS hour_of_day, - if(device_type = '', 'unknown', device_type) AS device_type, - countIf(event_type = 'impression') AS impressions, - countIf(event_type = 'click') AS clicks, - round(if(countIf(event_type = 'impression') > 0, - countIf(event_type = 'click') / countIf(event_type = 'impression') * 100, 0), 2) AS ctr, - round(if(countIf(event_type = 'request' AND status != 'premium_user') > 0, - countIf(event_type = 'request' AND status = 'filled') / - countIf(event_type = 'request' AND status != 'premium_user') * 100, 0), 2) AS fill_rate - FROM ad_events - WHERE timestamp > now() - INTERVAL ${hours} HOUR - ${adDeviceFilter ? `AND device_type = '${adDeviceFilter}'` : ''} + if(properties.device_type = '', 'unknown', properties.device_type) AS device_type, + countIf(properties.event_type = 'impression') AS impressions, + countIf(properties.event_type = 'click') AS clicks, + round(if(countIf(properties.event_type = 'impression') > 0, + countIf(properties.event_type = 'click') / countIf(properties.event_type = 'impression') * 100, 0), 2) AS ctr, + round(if(countIf(properties.event_type = 'request' AND properties.status != 'premium_user') > 0, + countIf(properties.event_type = 'request' AND properties.status = 'filled') / + countIf(properties.event_type = 'request' AND properties.status != 'premium_user') * 100, 0), 2) AS fill_rate + FROM events + WHERE event = 'ad_event' + AND timestamp > now() - INTERVAL ${hours} HOUR + ${adDeviceFilter ? `AND properties.device_type = '${adDeviceFilter}'` : ''} GROUP BY hour_of_day, device_type - HAVING countIf(event_type = 'impression') > 0 + HAVING countIf(properties.event_type = 'impression') > 0 ORDER BY hour_of_day, device_type `).catch(() => [] as AdCTRByHourDevice[]), - // 36. Filled vs Impression Gap Analysis - identify where impressions are lost - queryClickhouse(` + // 36. Filled vs Impression Gap Analysis + queryPostHog(` SELECT - if(device_type = '', 'unknown', device_type) AS device_type, - if(browser = '', 'unknown', browser) AS browser, - countIf(event_type = 'request' AND status = 'filled') AS filled_count, - countIf(event_type = 'impression') AS impression_count, - countIf(event_type = 'request' AND status = 'filled') - countIf(event_type = 'impression') AS gap_count, - round(if(countIf(event_type = 'request' AND status = 'filled') > 0, - countIf(event_type = 'impression') / countIf(event_type = 'request' AND status = 'filled') * 100, 0), 1) AS impression_rate - FROM ad_events - WHERE timestamp > now() - INTERVAL ${hours} HOUR + if(properties.device_type = '', 'unknown', properties.device_type) AS device_type, + if(properties.browser = '', 'unknown', properties.browser) AS browser, + countIf(properties.event_type = 'request' AND properties.status = 'filled') AS filled_count, + countIf(properties.event_type = 'impression') AS impression_count, + countIf(properties.event_type = 'request' AND properties.status = 'filled') - countIf(properties.event_type = 'impression') AS gap_count, + round(if(countIf(properties.event_type = 'request' AND properties.status = 'filled') > 0, + countIf(properties.event_type = 'impression') / countIf(properties.event_type = 'request' AND properties.status = 'filled') * 100, 0), 1) AS impression_rate + FROM events + WHERE event = 'ad_event' + AND timestamp > now() - INTERVAL ${hours} HOUR GROUP BY device_type, browser - HAVING countIf(event_type = 'request' AND status = 'filled') > 0 + HAVING countIf(properties.event_type = 'request' AND properties.status = 'filled') > 0 ORDER BY gap_count DESC LIMIT 20 `).catch(() => [] as AdFilledImpressionGap[]), - // 37. Ad Funnel Time Series - minute-level granularity for real-time monitoring - // Tracks the full funnel: requests -> filled -> impressions -> clicks/dismissals - // Also tracks Gravity forwarding success for revenue assurance - queryClickhouse(` + // 37. Ad Funnel Time Series + queryPostHog(` SELECT formatDateTime(toStartOfMinute(timestamp), '%Y-%m-%d %H:%i') AS time_bucket, - countIf(event_type = 'request') AS requests, - countIf(event_type = 'request' AND status = 'filled') AS filled, - countIf(event_type = 'impression') AS impressions, - countIf(event_type = 'click') AS clicks, - countIf(event_type = 'dismiss') AS dismissals, - countIf(event_type = 'impression' AND gravity_forwarded = 1) AS fwd_success, - countIf(event_type = 'impression' AND gravity_forwarded = 0) AS fwd_failed - FROM ad_events - WHERE timestamp > now() - INTERVAL ${minutes} MINUTE - ${adDeviceFilter ? `AND device_type = '${adDeviceFilter}'` : ''} + countIf(properties.event_type = 'request') AS requests, + countIf(properties.event_type = 'request' AND properties.status = 'filled') AS filled, + countIf(properties.event_type = 'impression') AS impressions, + countIf(properties.event_type = 'click') AS clicks, + countIf(properties.event_type = 'dismiss') AS dismissals, + countIf(properties.event_type = 'impression' AND toFloat64(properties.gravity_forwarded) = 1) AS gravity_forwarded, + countIf(properties.event_type = 'impression' AND toFloat64(properties.gravity_forwarded) = 0) AS gravity_failed + FROM events + WHERE event = 'ad_event' + AND timestamp > now() - INTERVAL ${minutes} MINUTE + ${adDeviceFilter ? `AND properties.device_type = '${adDeviceFilter}'` : ''} GROUP BY time_bucket ORDER BY time_bucket `).catch(() => [] as AdFunnelTimeSeries[]), diff --git a/server/routes/chat.ts b/server/routes/chat.ts index a6be1b71..c8781e2d 100644 --- a/server/routes/chat.ts +++ b/server/routes/chat.ts @@ -20,6 +20,7 @@ import { } from "../../lib/errors/summary"; import { getLanguagePrompt } from "../../types/api"; import { env } from "../env"; +import { trackLLMGeneration } from "../../lib/posthog"; // Rate limits - same as summary route const DAILY_LIMIT = env.NODE_ENV === "development" ? 100 : 20; @@ -163,10 +164,27 @@ Rules: } // Use AI SDK streamText for streaming response + const traceId = crypto.randomUUID(); + const startTime = Date.now(); const result = streamText({ model: openrouter(model), system: systemPrompt, messages: modelMessages, + onFinish: ({ text, usage }) => { + trackLLMGeneration({ + distinctId: rateLimitKey, + traceId, + model, + provider: "openrouter", + inputTokens: usage?.inputTokens, + outputTokens: usage?.outputTokens, + latencyMs: Date.now() - startTime, + outputContent: text, + isPremium: false, + language, + messageCount: messages.length, + }); + }, }); ctx.merge({ message_count: messages.length, status_code: 200 }); @@ -185,10 +203,27 @@ Rules: const modelMessages = await convertToModelMessages(messages as UIMessage[]); // Use AI SDK streamText for streaming response + const traceId = crypto.randomUUID(); + const startTime = Date.now(); const result = streamText({ model: openrouter(model), system: systemPrompt, messages: modelMessages, + onFinish: ({ text, usage }) => { + trackLLMGeneration({ + distinctId: userId || clientIp, + traceId, + model, + provider: "openrouter", + inputTokens: usage?.inputTokens, + outputTokens: usage?.outputTokens, + latencyMs: Date.now() - startTime, + outputContent: text, + isPremium: true, + language, + messageCount: messages.length, + }); + }, }); ctx.merge({ diff --git a/server/routes/gravity.ts b/server/routes/gravity.ts index 27f9379c..95ddf78f 100644 --- a/server/routes/gravity.ts +++ b/server/routes/gravity.ts @@ -3,7 +3,7 @@ * * /api/context - Fetches contextual ads. ZeroClick is primary, Gravity is fallback. * /api/px - Unified tracking for impressions, clicks, dismissals. - * For impressions, wraps Gravity forwarding + ClickHouse logging atomically. + * For impressions, wraps Gravity forwarding + PostHog logging atomically. * * Endpoint names are neutral to avoid content blockers (no "ad" or "track" in names). */ @@ -13,7 +13,7 @@ import { getAuthInfo } from "../middleware/auth"; import { env } from "../env"; import { extractClientIp } from "../../lib/request-context"; import { createLogger } from "../../lib/logger"; -import { trackAdEvent, type AdEventStatus } from "../../lib/clickhouse"; +import { trackAdEvent, type AdEventStatus } from "../../lib/posthog"; import { fetchZeroClickOffers, mapZeroClickOfferToAd, @@ -140,7 +140,7 @@ export const gravityRoutes = new Elysia({ prefix: "/api" }) * Unified tracking endpoint for impressions, clicks, and dismissals. * * CRITICAL: For impressions, this endpoint WRAPS the Gravity impression pixel call. - * This ensures ClickHouse accurately reflects whether Gravity received the impression. + * This ensures PostHog accurately reflects whether Gravity received the impression. * Without this, we'd log impressions locally without knowing if we got paid. * * Named "/px" to avoid ad blocker detection (no "ad" or "track" in the name). @@ -148,12 +148,12 @@ export const gravityRoutes = new Elysia({ prefix: "/api" }) .post( "/px", async ({ body, set }) => { - const { type, sessionId, hostname, brandName, adTitle, adText, clickUrl, impUrl, cta, favicon, deviceType, os, browser, adProvider } = body; + const { type, sessionId, hostname, brandName, adTitle, adText, clickUrl, impUrl, cta, favicon, deviceType, os, browser, adProvider: _adProvider, placement, adIndex } = body; // Derive provider from impUrl prefix only — never trust client-sent adProvider // for forwarding decisions (prevents spoofing to skip Gravity billing) const isZeroClick = impUrl?.startsWith("zeroclick://") ?? false; - // adProvider from client is used only for ClickHouse logging, not forwarding logic + // adProvider from client is used only for PostHog logging, not forwarding logic const provider = isZeroClick ? "zeroclick" : "gravity"; // For impressions with impUrl, forward to the appropriate provider @@ -165,7 +165,7 @@ export const gravityRoutes = new Elysia({ prefix: "/api" }) gravityResult = await forwardImpressionToGravity(impUrl); } - // Now track to ClickHouse WITH the Gravity result + // Now track to PostHog WITH the Gravity result try { trackAdEvent({ event_type: type, @@ -187,6 +187,9 @@ export const gravityRoutes = new Elysia({ prefix: "/api" }) gravity_forwarded: gravityResult?.forwarded ? 1 : 0, gravity_status_code: gravityResult?.statusCode ?? 0, error_message: gravityResult?.error ?? "", + // Placement attribution — which slot + position was interacted with + placement: placement || "unknown", + ad_index: adIndex ?? -1, }); logger.debug({ @@ -225,6 +228,8 @@ export const gravityRoutes = new Elysia({ prefix: "/api" }) os: t.Optional(t.String()), browser: t.Optional(t.String()), adProvider: t.Optional(t.String()), + placement: t.Optional(t.String()), + adIndex: t.Optional(t.Number()), }), } )