From 8e57150e4536a21a7b39057be79090139a67c97d Mon Sep 17 00:00:00 2001 From: iza <59828082+izadoesdev@users.noreply.github.com> Date: Fri, 15 May 2026 21:57:14 +0300 Subject: [PATCH 01/44] fix(query): optimize analytics builders --- .../ai/src/query/builders/custom-events.ts | 36 +-- packages/ai/src/query/builders/devices.ts | 60 ++--- packages/ai/src/query/builders/engagement.ts | 20 +- packages/ai/src/query/builders/geo.ts | 20 +- packages/ai/src/query/builders/links.ts | 8 +- packages/ai/src/query/builders/pages.ts | 244 +++++++++--------- packages/ai/src/query/builders/performance.ts | 76 +++--- packages/ai/src/query/builders/profiles.ts | 12 +- packages/ai/src/query/builders/revenue.ts | 9 +- packages/ai/src/query/builders/sessions.ts | 137 ++++++---- packages/ai/src/query/builders/traffic.ts | 28 +- packages/ai/src/query/builders/vitals.ts | 62 +++-- packages/ai/src/query/expressions.ts | 2 +- packages/ai/src/query/simple-builder.ts | 79 ++++-- packages/ai/src/query/types.ts | 6 + packages/db/src/clickhouse/client.ts | 3 +- 16 files changed, 445 insertions(+), 357 deletions(-) diff --git a/packages/ai/src/query/builders/custom-events.ts b/packages/ai/src/query/builders/custom-events.ts index f9be40aff..27be1c141 100644 --- a/packages/ai/src/query/builders/custom-events.ts +++ b/packages/ai/src/query/builders/custom-events.ts @@ -55,12 +55,12 @@ export const CustomEventsBuilders: Record = { SELECT event_name as name, COUNT(*) as total_events, - COUNT(DISTINCT anonymous_id) as unique_users, - COUNT(DISTINCT session_id) as unique_sessions, + uniq(anonymous_id) as unique_users, + uniq(session_id) as unique_sessions, MAX(timestamp) as last_occurrence, MIN(timestamp) as first_occurrence, countIf(properties != '{}' AND isValidJSON(properties)) as events_with_properties, - ROUND((COUNT(DISTINCT anonymous_id) / SUM(COUNT(DISTINCT anonymous_id)) OVER()) * 100, 2) as percentage + ROUND((uniq(anonymous_id) / SUM(uniq(anonymous_id)) OVER()) * 100, 2) as percentage FROM ${Analytics.custom_events} WHERE ${projectWhereClause(filterParams)} @@ -176,8 +176,8 @@ export const CustomEventsBuilders: Record = { SELECT path as name, COUNT(*) as total_events, - COUNT(DISTINCT event_name) as unique_event_types, - COUNT(DISTINCT anonymous_id) as unique_users + uniq(event_name) as unique_event_types, + uniq(anonymous_id) as unique_users FROM ${Analytics.custom_events} WHERE ${projectWhereClause(filterParams)} @@ -221,10 +221,10 @@ export const CustomEventsBuilders: Record = { SELECT toDate(timestamp) as date, COUNT(*) as total_events, - COUNT(DISTINCT event_name) as unique_event_types, - COUNT(DISTINCT anonymous_id) as unique_users, - COUNT(DISTINCT session_id) as unique_sessions, - COUNT(DISTINCT path) as unique_pages + uniq(event_name) as unique_event_types, + uniq(anonymous_id) as unique_users, + uniq(session_id) as unique_sessions, + uniq(path) as unique_pages FROM ${Analytics.custom_events} WHERE ${projectWhereClause(filterParams)} @@ -306,10 +306,10 @@ export const CustomEventsBuilders: Record = { sql: ` SELECT COUNT(*) as total_events, - COUNT(DISTINCT event_name) as unique_event_types, - COUNT(DISTINCT anonymous_id) as unique_users, - COUNT(DISTINCT session_id) as unique_sessions, - COUNT(DISTINCT path) as unique_pages + uniq(event_name) as unique_event_types, + uniq(anonymous_id) as unique_users, + uniq(session_id) as unique_sessions, + uniq(path) as unique_pages FROM ${Analytics.custom_events} WHERE ${projectWhereClause(filterParams)} @@ -516,7 +516,7 @@ export const CustomEventsBuilders: Record = { SELECT event_name, property_key, - COUNT(DISTINCT clean_value) as cardinality, + uniq(clean_value) as cardinality, COUNT(*) as total_count, AVG(length(clean_value)) as avg_length, MAX(length(clean_value)) as max_length, @@ -725,7 +725,7 @@ export const CustomEventsBuilders: Record = { event_name, property_key, SUM(count) as total, - COUNT(DISTINCT property_value) as cardinality + uniq(property_value) as cardinality FROM value_counts GROUP BY event_name, property_key HAVING cardinality <= 20 @@ -778,8 +778,8 @@ export const CustomEventsBuilders: Record = { SELECT event_name, COUNT(*) as total_events, - COUNT(DISTINCT anonymous_id) as unique_users, - COUNT(DISTINCT session_id) as unique_sessions + uniq(anonymous_id) as unique_users, + uniq(session_id) as unique_sessions FROM ${Analytics.custom_events} WHERE ${projectWhereClause(filterParams)} @@ -832,7 +832,7 @@ export const CustomEventsBuilders: Record = { SELECT event_name, property_key, - COUNT(DISTINCT property_value) as unique_values, + uniq(property_value) as unique_values, groupArray(tuple(property_value, count)) as top_values FROM ranked WHERE rn <= 5 diff --git a/packages/ai/src/query/builders/devices.ts b/packages/ai/src/query/builders/devices.ts index d2d96ab2b..ba1cd8101 100644 --- a/packages/ai/src/query/builders/devices.ts +++ b/packages/ai/src/query/builders/devices.ts @@ -44,9 +44,9 @@ export const DevicesBuilders: Record = { fields: [ "browser_name as name", "COUNT(*) as pageviews", - "COUNT(DISTINCT anonymous_id) as visitors", - "ROUND((COUNT(DISTINCT anonymous_id) / SUM(COUNT(DISTINCT anonymous_id)) OVER()) * 100, 2) as percentage", - ], + "uniq(anonymous_id) as visitors", + ], + percentageOf: { of: "visitors" }, where: ["browser_name != ''", "event_name = 'screen_view'"], groupBy: ["browser_name"], orderBy: "visitors DESC", @@ -97,9 +97,9 @@ export const DevicesBuilders: Record = { fields: [ "os_name as name", "COUNT(*) as pageviews", - "COUNT(DISTINCT anonymous_id) as visitors", - "ROUND((COUNT(DISTINCT anonymous_id) / SUM(COUNT(DISTINCT anonymous_id)) OVER()) * 100, 2) as percentage", - ], + "uniq(anonymous_id) as visitors", + ], + percentageOf: { of: "visitors" }, where: ["os_name != ''", "event_name = 'screen_view'"], groupBy: ["os_name"], orderBy: "visitors DESC", @@ -156,10 +156,10 @@ export const DevicesBuilders: Record = { fields: [ "viewport_size as name", "COUNT(*) as pageviews", - "COUNT(DISTINCT anonymous_id) as visitors", + "uniq(anonymous_id) as visitors", "device_type", - "ROUND((COUNT(DISTINCT anonymous_id) / SUM(COUNT(DISTINCT anonymous_id)) OVER()) * 100, 2) as percentage", - ], + ], + percentageOf: { of: "visitors" }, where: ["viewport_size != ''", "event_name = 'screen_view'"], groupBy: ["viewport_size", "device_type"], orderBy: "visitors DESC", @@ -223,8 +223,8 @@ export const DevicesBuilders: Record = { "browser_name", "browser_version", "COUNT(*) as pageviews", - "COUNT(DISTINCT anonymous_id) as visitors", - "COUNT(DISTINCT session_id) as sessions", + "uniq(anonymous_id) as visitors", + "uniq(session_id) as sessions", ], where: [ "browser_name != ''", @@ -281,9 +281,9 @@ export const DevicesBuilders: Record = { fields: [ "if(ifNull(device_type, '') = '', 'Desktop', initCap(device_type)) as name", "COUNT(*) as pageviews", - "COUNT(DISTINCT anonymous_id) as visitors", - "ROUND((COUNT(DISTINCT anonymous_id) / SUM(COUNT(DISTINCT anonymous_id)) OVER()) * 100, 2) as percentage", - ], + "uniq(anonymous_id) as visitors", + ], + percentageOf: { of: "visitors" }, where: ["event_name = 'screen_view'"], groupBy: ["name"], orderBy: "visitors DESC", @@ -297,9 +297,9 @@ export const DevicesBuilders: Record = { fields: [ "browser_name as name", "COUNT(*) as pageviews", - "COUNT(DISTINCT anonymous_id) as visitors", - "ROUND((COUNT(DISTINCT anonymous_id) / SUM(COUNT(DISTINCT anonymous_id)) OVER()) * 100, 2) as percentage", - ], + "uniq(anonymous_id) as visitors", + ], + percentageOf: { of: "visitors" }, where: ["browser_name != ''", "event_name = 'screen_view'"], groupBy: ["browser_name"], orderBy: "visitors DESC", @@ -314,9 +314,9 @@ export const DevicesBuilders: Record = { "browser_name", "browser_version", "COUNT(*) as pageviews", - "COUNT(DISTINCT anonymous_id) as visitors", - "ROUND((COUNT(DISTINCT anonymous_id) / SUM(COUNT(DISTINCT anonymous_id)) OVER()) * 100, 2) as percentage", - ], + "uniq(anonymous_id) as visitors", + ], + percentageOf: { of: "visitors" }, where: [ "browser_name != ''", "browser_version != ''", @@ -334,9 +334,9 @@ export const DevicesBuilders: Record = { fields: [ "os_name as name", "COUNT(*) as pageviews", - "COUNT(DISTINCT anonymous_id) as visitors", - "ROUND((COUNT(DISTINCT anonymous_id) / SUM(COUNT(DISTINCT anonymous_id)) OVER()) * 100, 2) as percentage", - ], + "uniq(anonymous_id) as visitors", + ], + percentageOf: { of: "visitors" }, where: ["os_name != ''", "event_name = 'screen_view'"], groupBy: ["os_name"], orderBy: "visitors DESC", @@ -350,7 +350,7 @@ export const DevicesBuilders: Record = { fields: [ "CONCAT(os_name, ' ', os_version) as name", "COUNT(*) as pageviews", - "COUNT(DISTINCT anonymous_id) as visitors", + "uniq(anonymous_id) as visitors", ], where: ["os_name != ''", "os_version != ''", "event_name = 'screen_view'"], groupBy: ["os_name", "os_version"], @@ -365,7 +365,7 @@ export const DevicesBuilders: Record = { fields: [ "viewport_size as name", "COUNT(*) as pageviews", - "COUNT(DISTINCT anonymous_id) as visitors", + "uniq(anonymous_id) as visitors", ], where: ["viewport_size != ''", "event_name = 'screen_view'"], groupBy: ["viewport_size"], @@ -415,7 +415,7 @@ export const DevicesBuilders: Record = { table: Analytics.events, fields: [ "viewport_size", - "COUNT(DISTINCT anonymous_id) as visitors", + "uniq(anonymous_id) as visitors", "COUNT(*) as pageviews", "device_type", ], @@ -473,10 +473,10 @@ export const DevicesBuilders: Record = { table: Analytics.events, fields: [ "viewport_size", - "COUNT(DISTINCT anonymous_id) as visitors", - "COUNT(DISTINCT session_id) as sessions", - "ROUND((COUNT(DISTINCT anonymous_id) / SUM(COUNT(DISTINCT anonymous_id)) OVER()) * 100, 2) as percentage", - ], + "uniq(anonymous_id) as visitors", + "uniq(session_id) as sessions", + ], + percentageOf: { of: "visitors" }, where: [ "event_name = 'screen_view'", "viewport_size != ''", diff --git a/packages/ai/src/query/builders/engagement.ts b/packages/ai/src/query/builders/engagement.ts index 1ad4fda21..d84864a0b 100644 --- a/packages/ai/src/query/builders/engagement.ts +++ b/packages/ai/src/query/builders/engagement.ts @@ -37,8 +37,8 @@ export const EngagementBuilders: Record = { table: Analytics.events, fields: [ "ROUND(AVG(CASE WHEN scroll_depth > 0 THEN scroll_depth ELSE NULL END), 1) as avg_scroll_depth", - "COUNT(DISTINCT session_id) as total_sessions", - "COUNT(DISTINCT anonymous_id) as visitors", + "uniq(session_id) as total_sessions", + "uniq(anonymous_id) as visitors", ], where: ["event_name = 'page_exit'", "scroll_depth > 0"], timeField: "time", @@ -92,10 +92,10 @@ export const EngagementBuilders: Record = { "WHEN scroll_depth < 100 THEN '75-100%' " + "ELSE '100%' " + "END as depth_range", - "COUNT(DISTINCT anonymous_id) as visitors", - "COUNT(DISTINCT session_id) as sessions", - "ROUND((COUNT(DISTINCT session_id) / SUM(COUNT(DISTINCT session_id)) OVER()) * 100, 2) as percentage", + "uniq(anonymous_id) as visitors", + "uniq(session_id) as sessions", ], + percentageOf: { of: "sessions" }, where: ["event_name = 'page_exit'", "scroll_depth > 0"], groupBy: ["depth_range"], orderBy: @@ -152,8 +152,8 @@ export const EngagementBuilders: Record = { fields: [ "trimRight(path(path), '/') as name", "ROUND(AVG(CASE WHEN scroll_depth > 0 THEN scroll_depth ELSE NULL END), 1) as avg_scroll_depth", - "COUNT(DISTINCT anonymous_id) as visitors", - "COUNT(DISTINCT session_id) as sessions", + "uniq(anonymous_id) as visitors", + "uniq(session_id) as sessions", "COUNT(*) as pageviews", ], where: ["event_name = 'page_exit'", "path != ''", "scroll_depth > 0"], @@ -213,9 +213,9 @@ export const EngagementBuilders: Record = { table: Analytics.events, fields: [ "ROUND(AVG(CASE WHEN interaction_count >= 0 THEN interaction_count ELSE NULL END), 1) as avg_interactions", - "COUNT(DISTINCT CASE WHEN interaction_count > 0 THEN session_id ELSE NULL END) as interactive_sessions", - "ROUND((COUNT(DISTINCT CASE WHEN interaction_count > 0 THEN session_id ELSE NULL END) / COUNT(DISTINCT session_id)) * 100, 1) as interaction_rate", - "COUNT(DISTINCT session_id) as total_sessions", + "uniqIf(session_id, interaction_count > 0) as interactive_sessions", + "ROUND((uniqIf(session_id, interaction_count > 0) / uniq(session_id)) * 100, 1) as interaction_rate", + "uniq(session_id) as total_sessions", ], where: ["event_name = 'screen_view'", "interaction_count >= 0"], timeField: "time", diff --git a/packages/ai/src/query/builders/geo.ts b/packages/ai/src/query/builders/geo.ts index 8065750fd..6c4512636 100644 --- a/packages/ai/src/query/builders/geo.ts +++ b/packages/ai/src/query/builders/geo.ts @@ -44,9 +44,9 @@ export const GeoBuilders: Record = { fields: [ "country as name", "COUNT(*) as pageviews", - "COUNT(DISTINCT anonymous_id) as visitors", - "ROUND((COUNT(DISTINCT anonymous_id) / SUM(COUNT(DISTINCT anonymous_id)) OVER()) * 100, 2) as percentage", + "uniq(anonymous_id) as visitors", ], + percentageOf: { of: "visitors" }, where: ["country != ''", "event_name = 'screen_view'"], groupBy: ["country"], orderBy: "visitors DESC", @@ -105,9 +105,9 @@ export const GeoBuilders: Record = { "region as name", "country", "COUNT(*) as pageviews", - "COUNT(DISTINCT anonymous_id) as visitors", - "ROUND((COUNT(DISTINCT anonymous_id) / SUM(COUNT(DISTINCT anonymous_id)) OVER()) * 100, 2) as percentage", + "uniq(anonymous_id) as visitors", ], + percentageOf: { of: "visitors" }, where: ["region != ''", "event_name = 'screen_view'"], groupBy: ["region", "country"], orderBy: "visitors DESC", @@ -122,9 +122,9 @@ export const GeoBuilders: Record = { fields: [ "timezone as name", "COUNT(*) as pageviews", - "COUNT(DISTINCT anonymous_id) as visitors", - "ROUND((COUNT(DISTINCT anonymous_id) / SUM(COUNT(DISTINCT anonymous_id)) OVER()) * 100, 2) as percentage", + "uniq(anonymous_id) as visitors", ], + percentageOf: { of: "visitors" }, where: ["timezone != ''", "event_name = 'screen_view'"], groupBy: ["timezone"], orderBy: "visitors DESC", @@ -138,9 +138,9 @@ export const GeoBuilders: Record = { fields: [ "language as name", "COUNT(*) as pageviews", - "COUNT(DISTINCT anonymous_id) as visitors", - "ROUND((COUNT(DISTINCT anonymous_id) / SUM(COUNT(DISTINCT anonymous_id)) OVER()) * 100, 2) as percentage", + "uniq(anonymous_id) as visitors", ], + percentageOf: { of: "visitors" }, where: ["language != ''", "event_name = 'screen_view'"], groupBy: ["language"], orderBy: "visitors DESC", @@ -198,9 +198,9 @@ export const GeoBuilders: Record = { "city as name", "country", "COUNT(*) as pageviews", - "COUNT(DISTINCT anonymous_id) as visitors", - "ROUND((COUNT(DISTINCT anonymous_id) / SUM(COUNT(DISTINCT anonymous_id)) OVER()) * 100, 2) as percentage", + "uniq(anonymous_id) as visitors", ], + percentageOf: { of: "visitors" }, where: ["city != ''", "event_name = 'screen_view'"], groupBy: ["city", "country"], orderBy: "visitors DESC", diff --git a/packages/ai/src/query/builders/links.ts b/packages/ai/src/query/builders/links.ts index 2c53c915a..f12623aab 100644 --- a/packages/ai/src/query/builders/links.ts +++ b/packages/ai/src/query/builders/links.ts @@ -513,8 +513,8 @@ export const LinksBuilders: Record = { href, text, COUNT(*) as total_clicks, - COUNT(DISTINCT anonymous_id) as unique_users, - COUNT(DISTINCT session_id) as unique_sessions, + uniq(anonymous_id) as unique_users, + uniq(session_id) as unique_sessions, ROUND((COUNT(*) / SUM(COUNT(*)) OVER()) * 100, 2) as percentage, MAX(timestamp) as last_clicked FROM enriched_links @@ -647,8 +647,8 @@ export const LinksBuilders: Record = { SELECT domain(href) as domain, COUNT(*) as total_clicks, - COUNT(DISTINCT anonymous_id) as unique_users, - COUNT(DISTINCT href) as unique_links, + uniq(anonymous_id) as unique_users, + uniq(href) as unique_links, ROUND((COUNT(*) / SUM(COUNT(*)) OVER()) * 100, 2) as percentage FROM enriched_links GROUP BY domain(href) diff --git a/packages/ai/src/query/builders/pages.ts b/packages/ai/src/query/builders/pages.ts index aaddb2c63..fff3ea52d 100644 --- a/packages/ai/src/query/builders/pages.ts +++ b/packages/ai/src/query/builders/pages.ts @@ -8,9 +8,9 @@ export const PagesBuilders: Record = { fields: [ "decodeURLComponent(CASE WHEN trimRight(path(path), '/') = '' THEN '/' ELSE trimRight(path(path), '/') END) as name", "COUNT(*) as pageviews", - "COUNT(DISTINCT anonymous_id) as visitors", - "ROUND((COUNT(DISTINCT anonymous_id) / SUM(COUNT(DISTINCT anonymous_id)) OVER()) * 100, 2) as percentage", + "uniq(anonymous_id) as visitors", ], + percentageOf: { of: "visitors" }, where: ["event_name = 'screen_view'"], groupBy: [ "decodeURLComponent(CASE WHEN trimRight(path(path), '/') = '' THEN '/' ELSE trimRight(path(path), '/') END)", @@ -120,18 +120,16 @@ export const PagesBuilders: Record = { session_entry AS ( SELECT e.session_id, - e.anonymous_id, - CASE WHEN trimRight(path(e.path), '/') = '' THEN '/' ELSE trimRight(path(e.path), '/') END as entry_page, - e.time as entry_time, - ROW_NUMBER() OVER (PARTITION BY e.session_id ORDER BY e.time) as page_rank, - sa.session_referrer as referrer, - sa.session_utm_source as utm_source, - sa.session_utm_medium as utm_medium, - sa.session_utm_campaign as utm_campaign, - sa.session_country as country, - sa.session_device_type as device_type, - sa.session_browser_name as browser_name, - sa.session_os_name as os_name + argMin(CASE WHEN trimRight(path(e.path), '/') = '' THEN '/' ELSE trimRight(path(e.path), '/') END, e.time) as entry_page, + argMin(e.anonymous_id, e.time) as anonymous_id, + any(sa.session_referrer) as referrer, + any(sa.session_utm_source) as utm_source, + any(sa.session_utm_medium) as utm_medium, + any(sa.session_utm_campaign) as utm_campaign, + any(sa.session_country) as country, + any(sa.session_device_type) as device_type, + any(sa.session_browser_name) as browser_name, + any(sa.session_os_name) as os_name FROM analytics.events e ${helpers.sessionAttributionJoin("e")} WHERE e.client_id = {websiteId:String} @@ -139,51 +137,45 @@ export const PagesBuilders: Record = { AND e.time <= toDateTime(concat({endDate:String}, ' 23:59:59')) AND e.event_name = 'screen_view' ${filterClause} + GROUP BY e.session_id )` : ` session_entry AS ( SELECT session_id, - anonymous_id, - CASE WHEN trimRight(path(path), '/') = '' THEN '/' ELSE trimRight(path(path), '/') END as entry_page, - time as entry_time, - ROW_NUMBER() OVER (PARTITION BY session_id ORDER BY time) as page_rank + argMin(CASE WHEN trimRight(path(path), '/') = '' THEN '/' ELSE trimRight(path(path), '/') END, time) as entry_page, + argMin(anonymous_id, time) as anonymous_id FROM analytics.events WHERE client_id = {websiteId:String} AND time >= toDateTime({startDate:String}) AND time <= toDateTime(concat({endDate:String}, ' 23:59:59')) AND event_name = 'screen_view' ${filterClause} + GROUP BY session_id )`; + const ctes = sessionAttributionCTE + ? `${sessionAttributionCTE}\n${sessionEntryQuery}` + : sessionEntryQuery; + return { - sql: sessionAttributionCTE - ? ` - WITH ${sessionAttributionCTE} - ${sessionEntryQuery} - SELECT - entry_page as name, - COUNT(*) as pageviews, - COUNT(DISTINCT anonymous_id) as visitors, - ROUND((COUNT(DISTINCT anonymous_id) / SUM(COUNT(DISTINCT anonymous_id)) OVER()) * 100, 2) as percentage - FROM session_entry - WHERE page_rank = 1 - GROUP BY entry_page - ORDER BY visitors DESC - LIMIT {limit:Int32} OFFSET {offset:Int32}` - : ` - WITH ${sessionEntryQuery} - SELECT - entry_page as name, - COUNT(*) as pageviews, - COUNT(DISTINCT anonymous_id) as visitors, - ROUND((COUNT(DISTINCT anonymous_id) / SUM(COUNT(DISTINCT anonymous_id)) OVER()) * 100, 2) as percentage - FROM session_entry - WHERE page_rank = 1 - GROUP BY entry_page + sql: ` + WITH ${ctes} + SELECT + name, + pageviews, + visitors, + ROUND(visitors / sum(visitors) OVER () * 100, 2) AS percentage + FROM ( + SELECT + entry_page as name, + COUNT(*) as pageviews, + uniq(anonymous_id) as visitors + FROM session_entry + GROUP BY entry_page + ) ORDER BY visitors DESC - LIMIT {limit:Int32} OFFSET {offset:Int32} - `, + LIMIT {limit:Int32} OFFSET {offset:Int32}`, params: { websiteId, startDate, @@ -230,20 +222,21 @@ export const PagesBuilders: Record = { ? `${helpers.sessionAttributionCTE("time")},` : ""; - const sessionsQuery = helpers?.sessionAttributionCTE + const sessionExitsQuery = helpers?.sessionAttributionCTE ? ` - sessions AS ( + session_exit AS ( SELECT e.session_id, - MAX(e.time) as session_end_time, - sa.session_referrer as referrer, - sa.session_utm_source as utm_source, - sa.session_utm_medium as utm_medium, - sa.session_utm_campaign as utm_campaign, - sa.session_country as country, - sa.session_device_type as device_type, - sa.session_browser_name as browser_name, - sa.session_os_name as os_name + argMax(CASE WHEN trimRight(path(e.path), '/') = '' THEN '/' ELSE trimRight(path(e.path), '/') END, e.time) as exit_page, + argMax(e.anonymous_id, e.time) as anonymous_id, + any(sa.session_referrer) as referrer, + any(sa.session_utm_source) as utm_source, + any(sa.session_utm_medium) as utm_medium, + any(sa.session_utm_campaign) as utm_campaign, + any(sa.session_country) as country, + any(sa.session_device_type) as device_type, + any(sa.session_browser_name) as browser_name, + any(sa.session_os_name) as os_name FROM analytics.events e ${helpers.sessionAttributionJoin("e")} WHERE e.client_id = {websiteId:String} @@ -251,13 +244,14 @@ export const PagesBuilders: Record = { AND e.time <= toDateTime(concat({endDate:String}, ' 23:59:59')) AND e.event_name = 'screen_view' ${filterClause} - GROUP BY e.session_id, sa.session_referrer, sa.session_utm_source, sa.session_utm_medium, sa.session_utm_campaign, sa.session_country, sa.session_device_type, sa.session_browser_name, sa.session_os_name - ),` + GROUP BY e.session_id + )` : ` - sessions AS ( + session_exit AS ( SELECT session_id, - MAX(time) as session_end_time + argMax(CASE WHEN trimRight(path(path), '/') = '' THEN '/' ELSE trimRight(path(path), '/') END, time) as exit_page, + argMax(anonymous_id, time) as anonymous_id FROM analytics.events WHERE client_id = {websiteId:String} AND time >= toDateTime({startDate:String}) @@ -265,35 +259,27 @@ export const PagesBuilders: Record = { AND event_name = 'screen_view' ${filterClause} GROUP BY session_id - ),`; + )`; return { sql: ` WITH ${sessionAttributionCTE} - ${sessionsQuery} - exit_pages AS ( + ${sessionExitsQuery} + SELECT + name, + pageviews, + visitors, + ROUND(visitors / sum(visitors) OVER () * 100, 2) AS percentage + FROM ( SELECT - CASE WHEN trimRight(path(e.path), '/') = '' THEN '/' ELSE trimRight(path(e.path), '/') END as path, - e.session_id, - e.anonymous_id - FROM analytics.events e - INNER JOIN sessions s ON e.session_id = s.session_id AND e.time = s.session_end_time - WHERE e.client_id = {websiteId:String} - AND e.time >= toDateTime({startDate:String}) - AND e.time <= toDateTime(concat({endDate:String}, ' 23:59:59')) - AND e.event_name = 'screen_view' - ${filterClause} + exit_page as name, + uniq(session_id) as pageviews, + uniq(anonymous_id) as visitors + FROM session_exit + GROUP BY exit_page ) - SELECT - path as name, - COUNT(DISTINCT session_id) as pageviews, - COUNT(DISTINCT anonymous_id) as visitors, - ROUND((COUNT(DISTINCT anonymous_id) / SUM(COUNT(DISTINCT anonymous_id)) OVER()) * 100, 2) as percentage - FROM exit_pages - GROUP BY path ORDER BY visitors DESC - LIMIT {limit:Int32} OFFSET {offset:Int32} - `, + LIMIT {limit:Int32} OFFSET {offset:Int32}`, params: { websiteId, startDate, @@ -312,7 +298,7 @@ export const PagesBuilders: Record = { "decodeURLComponent(CASE WHEN trimRight(path(path), '/') = '' THEN '/' ELSE trimRight(path(path), '/') END) as name", "COUNT(*) as pageviews", "ROUND(AVG(CASE WHEN time_on_page > 0 THEN time_on_page / 1000 ELSE NULL END), 2) as avg_time_on_page", - "COUNT(DISTINCT anonymous_id) as visitors", + "uniq(anonymous_id) as visitors", ], where: ["event_name = 'screen_view'"], groupBy: [ @@ -373,55 +359,59 @@ export const PagesBuilders: Record = { ? `${helpers.sessionAttributionCTE("time")}` : ""; - const baseQuery = helpers?.sessionAttributionCTE + const perPageCTE = helpers?.sessionAttributionCTE ? ` - SELECT - decodeURLComponent(CASE WHEN trimRight(path(e.path), '/') = '' THEN '/' ELSE trimRight(path(e.path), '/') END) as name, - COUNT(*) as sessions_with_time, - COUNT(DISTINCT e.anonymous_id) as visitors, - ROUND(quantileTDigest(0.5)(e.time_on_page), 2) as median_time_on_page, - ROUND((COUNT(DISTINCT e.anonymous_id) / SUM(COUNT(DISTINCT e.anonymous_id)) OVER()) * 100, 2) as percentage - FROM analytics.events e - ${helpers.sessionAttributionJoin("e")} - WHERE e.client_id = {websiteId:String} - AND e.time >= toDateTime({startDate:String}) - AND e.time <= toDateTime(concat({endDate:String}, ' 23:59:59')) - AND e.event_name = 'page_exit' - AND e.time_on_page IS NOT NULL - AND e.time_on_page > 1 - AND e.time_on_page < 3600 - ${filterClause} - GROUP BY name - HAVING COUNT(*) >= 1 - ORDER BY visitors DESC - LIMIT {limit:Int32} OFFSET {offset:Int32}` + per_page AS ( + SELECT + decodeURLComponent(CASE WHEN trimRight(path(e.path), '/') = '' THEN '/' ELSE trimRight(path(e.path), '/') END) as name, + COUNT(*) as sessions_with_time, + uniq(e.anonymous_id) as visitors, + quantileTDigest(0.5)(e.time_on_page) as median_raw + FROM analytics.events e + ${helpers.sessionAttributionJoin("e")} + WHERE e.client_id = {websiteId:String} + AND e.time >= toDateTime({startDate:String}) + AND e.time <= toDateTime(concat({endDate:String}, ' 23:59:59')) + AND e.event_name = 'page_exit' + AND e.time_on_page > 1 + AND e.time_on_page < 3600 + ${filterClause} + GROUP BY name + )` : ` - SELECT - decodeURLComponent(CASE WHEN trimRight(path(path), '/') = '' THEN '/' ELSE trimRight(path(path), '/') END) as name, - COUNT(*) as sessions_with_time, - COUNT(DISTINCT anonymous_id) as visitors, - ROUND(quantileTDigest(0.5)(time_on_page), 2) as median_time_on_page, - ROUND((COUNT(DISTINCT anonymous_id) / SUM(COUNT(DISTINCT anonymous_id)) OVER()) * 100, 2) as percentage - FROM analytics.events - WHERE client_id = {websiteId:String} - AND time >= toDateTime({startDate:String}) - AND time <= toDateTime(concat({endDate:String}, ' 23:59:59')) - AND event_name = 'page_exit' - AND time_on_page IS NOT NULL - AND time_on_page > 1 - AND time_on_page < 3600 - ${filterClause} - GROUP BY name - HAVING COUNT(*) >= 1 - ORDER BY visitors DESC - LIMIT {limit:Int32} OFFSET {offset:Int32}`; + per_page AS ( + SELECT + decodeURLComponent(CASE WHEN trimRight(path(path), '/') = '' THEN '/' ELSE trimRight(path(path), '/') END) as name, + COUNT(*) as sessions_with_time, + uniq(anonymous_id) as visitors, + quantileTDigest(0.5)(time_on_page) as median_raw + FROM analytics.events + WHERE client_id = {websiteId:String} + AND time >= toDateTime({startDate:String}) + AND time <= toDateTime(concat({endDate:String}, ' 23:59:59')) + AND event_name = 'page_exit' + AND time_on_page > 1 + AND time_on_page < 3600 + ${filterClause} + GROUP BY name + )`; + + const ctePrefix = sessionAttributionCTE + ? `${sessionAttributionCTE},\n${perPageCTE}` + : perPageCTE; return { - sql: sessionAttributionCTE - ? ` - WITH ${sessionAttributionCTE} - ${baseQuery}` - : baseQuery, + sql: ` + WITH ${ctePrefix} + SELECT + name, + sessions_with_time, + visitors, + ROUND(median_raw, 2) as median_time_on_page, + ROUND(visitors / sum(visitors) OVER () * 100, 2) as percentage + FROM per_page + ORDER BY visitors DESC + LIMIT {limit:Int32} OFFSET {offset:Int32}`, params: { websiteId, startDate, diff --git a/packages/ai/src/query/builders/performance.ts b/packages/ai/src/query/builders/performance.ts index 4980b53a9..da4dd194a 100644 --- a/packages/ai/src/query/builders/performance.ts +++ b/packages/ai/src/query/builders/performance.ts @@ -22,13 +22,13 @@ const WEB_VITALS_SESSION_DIMENSIONS_CTE = ` `; const WEB_VITALS_METRICS = ` - COUNT(DISTINCT wv.anonymous_id) as visitors, + uniq(wv.anonymous_id) as visitors, avgIf(wv.metric_value, wv.metric_name = 'FCP' AND wv.metric_value > 0) as avg_fcp, - quantileIf(0.50)(wv.metric_value, wv.metric_name = 'FCP' AND wv.metric_value > 0) as p50_fcp, + quantileTDigestIf(0.50)(wv.metric_value, wv.metric_name = 'FCP' AND wv.metric_value > 0) as p50_fcp, avgIf(wv.metric_value, wv.metric_name = 'LCP' AND wv.metric_value > 0) as avg_lcp, - quantileIf(0.50)(wv.metric_value, wv.metric_name = 'LCP' AND wv.metric_value > 0) as p50_lcp, + quantileTDigestIf(0.50)(wv.metric_value, wv.metric_name = 'LCP' AND wv.metric_value > 0) as p50_lcp, avgIf(wv.metric_value, wv.metric_name = 'CLS') as avg_cls, - quantileIf(0.50)(wv.metric_value, wv.metric_name = 'CLS') as p50_cls, + quantileTDigestIf(0.50)(wv.metric_value, wv.metric_name = 'CLS') as p50_cls, avgIf(wv.metric_value, wv.metric_name = 'INP' AND wv.metric_value > 0) as avg_inp, avgIf(wv.metric_value, wv.metric_name = 'TTFB' AND wv.metric_value > 0) as avg_ttfb, COUNT(*) as measurements @@ -79,15 +79,15 @@ export const PerformanceBuilders: Record = { table: Analytics.events, fields: [ "decodeURLComponent(CASE WHEN trimRight(path(path), '/') = '' THEN '/' ELSE trimRight(path(path), '/') END) as name", - "COUNT(DISTINCT anonymous_id) as visitors", - "AVG(CASE WHEN load_time > 0 THEN load_time ELSE NULL END) as avg_load_time", - "quantileTDigest(0.50)(CASE WHEN load_time > 0 THEN load_time ELSE NULL END) as p50_load_time", + "uniq(anonymous_id) as visitors", + "AVG(CASE WHEN dom_ready_time > 0 THEN dom_ready_time ELSE NULL END) as avg_load_time", + "quantileTDigest(0.50)(CASE WHEN dom_ready_time > 0 THEN dom_ready_time ELSE NULL END) as p50_load_time", "AVG(CASE WHEN ttfb > 0 THEN ttfb ELSE NULL END) as avg_ttfb", "AVG(CASE WHEN dom_ready_time > 0 THEN dom_ready_time ELSE NULL END) as avg_dom_ready_time", "AVG(CASE WHEN render_time > 0 THEN render_time ELSE NULL END) as avg_render_time", "COUNT(*) as pageviews", ], - where: ["event_name = 'screen_view'", "path != ''", "load_time > 0"], + where: ["event_name = 'screen_view'", "path != ''", "dom_ready_time > 0"], groupBy: [ "decodeURLComponent(CASE WHEN trimRight(path(path), '/') = '' THEN '/' ELSE trimRight(path(path), '/') END)", ], @@ -109,9 +109,9 @@ export const PerformanceBuilders: Record = { table: Analytics.events, fields: [ "browser_name as name", - "COUNT(DISTINCT anonymous_id) as visitors", - "AVG(CASE WHEN load_time > 0 THEN load_time ELSE NULL END) as avg_load_time", - "quantileTDigest(0.50)(CASE WHEN load_time > 0 THEN load_time ELSE NULL END) as p50_load_time", + "uniq(anonymous_id) as visitors", + "AVG(CASE WHEN dom_ready_time > 0 THEN dom_ready_time ELSE NULL END) as avg_load_time", + "quantileTDigest(0.50)(CASE WHEN dom_ready_time > 0 THEN dom_ready_time ELSE NULL END) as p50_load_time", "AVG(CASE WHEN ttfb > 0 THEN ttfb ELSE NULL END) as avg_ttfb", "AVG(CASE WHEN dom_ready_time > 0 THEN dom_ready_time ELSE NULL END) as avg_dom_ready_time", "AVG(CASE WHEN render_time > 0 THEN render_time ELSE NULL END) as avg_render_time", @@ -120,7 +120,7 @@ export const PerformanceBuilders: Record = { where: [ "event_name = 'screen_view'", "browser_name != ''", - "load_time > 0", + "dom_ready_time > 0", ], groupBy: ["browser_name"], orderBy: "p50_load_time DESC", @@ -142,15 +142,15 @@ export const PerformanceBuilders: Record = { table: Analytics.events, fields: [ "country as name", - "COUNT(DISTINCT anonymous_id) as visitors", - "AVG(CASE WHEN load_time > 0 THEN load_time ELSE NULL END) as avg_load_time", - "quantileTDigest(0.50)(CASE WHEN load_time > 0 THEN load_time ELSE NULL END) as p50_load_time", + "uniq(anonymous_id) as visitors", + "AVG(CASE WHEN dom_ready_time > 0 THEN dom_ready_time ELSE NULL END) as avg_load_time", + "quantileTDigest(0.50)(CASE WHEN dom_ready_time > 0 THEN dom_ready_time ELSE NULL END) as p50_load_time", "AVG(CASE WHEN ttfb > 0 THEN ttfb ELSE NULL END) as avg_ttfb", "AVG(CASE WHEN dom_ready_time > 0 THEN dom_ready_time ELSE NULL END) as avg_dom_ready_time", "AVG(CASE WHEN render_time > 0 THEN render_time ELSE NULL END) as avg_render_time", "COUNT(*) as pageviews", ], - where: ["event_name = 'screen_view'", "country != ''", "load_time > 0"], + where: ["event_name = 'screen_view'", "country != ''", "dom_ready_time > 0"], groupBy: ["country"], orderBy: "p50_load_time DESC", limit: 100, @@ -172,15 +172,15 @@ export const PerformanceBuilders: Record = { table: Analytics.events, fields: [ "os_name as name", - "COUNT(DISTINCT anonymous_id) as visitors", - "AVG(CASE WHEN load_time > 0 THEN load_time ELSE NULL END) as avg_load_time", - "quantileTDigest(0.50)(CASE WHEN load_time > 0 THEN load_time ELSE NULL END) as p50_load_time", + "uniq(anonymous_id) as visitors", + "AVG(CASE WHEN dom_ready_time > 0 THEN dom_ready_time ELSE NULL END) as avg_load_time", + "quantileTDigest(0.50)(CASE WHEN dom_ready_time > 0 THEN dom_ready_time ELSE NULL END) as p50_load_time", "AVG(CASE WHEN ttfb > 0 THEN ttfb ELSE NULL END) as avg_ttfb", "AVG(CASE WHEN dom_ready_time > 0 THEN dom_ready_time ELSE NULL END) as avg_dom_ready_time", "AVG(CASE WHEN render_time > 0 THEN render_time ELSE NULL END) as avg_render_time", "COUNT(*) as pageviews", ], - where: ["event_name = 'screen_view'", "os_name != ''", "load_time > 0"], + where: ["event_name = 'screen_view'", "os_name != ''", "dom_ready_time > 0"], groupBy: ["os_name"], orderBy: "p50_load_time DESC", limit: 100, @@ -201,15 +201,15 @@ export const PerformanceBuilders: Record = { table: Analytics.events, fields: [ "CONCAT(region, ', ', country) as name", - "COUNT(DISTINCT anonymous_id) as visitors", - "AVG(CASE WHEN load_time > 0 THEN load_time ELSE NULL END) as avg_load_time", - "quantileTDigest(0.50)(CASE WHEN load_time > 0 THEN load_time ELSE NULL END) as p50_load_time", + "uniq(anonymous_id) as visitors", + "AVG(CASE WHEN dom_ready_time > 0 THEN dom_ready_time ELSE NULL END) as avg_load_time", + "quantileTDigest(0.50)(CASE WHEN dom_ready_time > 0 THEN dom_ready_time ELSE NULL END) as p50_load_time", "AVG(CASE WHEN ttfb > 0 THEN ttfb ELSE NULL END) as avg_ttfb", "AVG(CASE WHEN dom_ready_time > 0 THEN dom_ready_time ELSE NULL END) as avg_dom_ready_time", "AVG(CASE WHEN render_time > 0 THEN render_time ELSE NULL END) as avg_render_time", "COUNT(*) as pageviews", ], - where: ["event_name = 'screen_view'", "region != ''", "load_time > 0"], + where: ["event_name = 'screen_view'", "region != ''", "dom_ready_time > 0"], groupBy: ["region", "country"], orderBy: "p50_load_time DESC", limit: 100, @@ -239,8 +239,8 @@ export const PerformanceBuilders: Record = { table: Analytics.events, fields: [ "toDate(time) as date", - "AVG(CASE WHEN load_time > 0 THEN load_time ELSE NULL END) as avg_load_time", - "quantileTDigest(0.50)(CASE WHEN load_time > 0 THEN load_time ELSE NULL END) as p50_load_time", + "AVG(CASE WHEN dom_ready_time > 0 THEN dom_ready_time ELSE NULL END) as avg_load_time", + "quantileTDigest(0.50)(CASE WHEN dom_ready_time > 0 THEN dom_ready_time ELSE NULL END) as p50_load_time", "AVG(CASE WHEN ttfb > 0 THEN ttfb ELSE NULL END) as avg_ttfb", "AVG(CASE WHEN dom_ready_time > 0 THEN dom_ready_time ELSE NULL END) as avg_dom_ready_time", "AVG(CASE WHEN render_time > 0 THEN render_time ELSE NULL END) as avg_render_time", @@ -272,11 +272,11 @@ export const PerformanceBuilders: Record = { table: Analytics.events, fields: [ "toDate(time) as date", - "AVG(CASE WHEN load_time > 0 THEN load_time ELSE NULL END) as avg_load_time", - "quantileTDigest(0.50)(CASE WHEN load_time > 0 THEN load_time ELSE NULL END) as p50_load_time", + "AVG(CASE WHEN dom_ready_time > 0 THEN dom_ready_time ELSE NULL END) as avg_load_time", + "quantileTDigest(0.50)(CASE WHEN dom_ready_time > 0 THEN dom_ready_time ELSE NULL END) as p50_load_time", "COUNT(*) as pageviews", ], - where: ["event_name = 'screen_view'", "load_time > 0"], + where: ["event_name = 'screen_view'", "dom_ready_time > 0"], groupBy: ["toDate(time)"], orderBy: "date ASC", timeField: "time", @@ -300,13 +300,13 @@ export const PerformanceBuilders: Record = { sql: ` SELECT decodeURLComponent(CASE WHEN trimRight(path(path), '/') = '' THEN '/' ELSE trimRight(path(path), '/') END) as name, - COUNT(DISTINCT anonymous_id) as visitors, + uniq(anonymous_id) as visitors, avgIf(metric_value, metric_name = 'FCP' AND metric_value > 0) as avg_fcp, - quantileIf(0.50)(metric_value, metric_name = 'FCP' AND metric_value > 0) as p50_fcp, + quantileTDigestIf(0.50)(metric_value, metric_name = 'FCP' AND metric_value > 0) as p50_fcp, avgIf(metric_value, metric_name = 'LCP' AND metric_value > 0) as avg_lcp, - quantileIf(0.50)(metric_value, metric_name = 'LCP' AND metric_value > 0) as p50_lcp, + quantileTDigestIf(0.50)(metric_value, metric_name = 'LCP' AND metric_value > 0) as p50_lcp, avgIf(metric_value, metric_name = 'CLS') as avg_cls, - quantileIf(0.50)(metric_value, metric_name = 'CLS') as p50_cls, + quantileTDigestIf(0.50)(metric_value, metric_name = 'CLS') as p50_cls, avgIf(metric_value, metric_name = 'INP' AND metric_value > 0) as avg_inp, avgIf(metric_value, metric_name = 'TTFB' AND metric_value > 0) as avg_ttfb, COUNT(*) as measurements @@ -508,15 +508,15 @@ export const PerformanceBuilders: Record = { SELECT toDate(timestamp) as date, avgIf(metric_value, metric_name = 'FCP' AND metric_value > 0) as avg_fcp, - quantileIf(0.50)(metric_value, metric_name = 'FCP' AND metric_value > 0) as p50_fcp, + quantileTDigestIf(0.50)(metric_value, metric_name = 'FCP' AND metric_value > 0) as p50_fcp, avgIf(metric_value, metric_name = 'LCP' AND metric_value > 0) as avg_lcp, - quantileIf(0.50)(metric_value, metric_name = 'LCP' AND metric_value > 0) as p50_lcp, + quantileTDigestIf(0.50)(metric_value, metric_name = 'LCP' AND metric_value > 0) as p50_lcp, avgIf(metric_value, metric_name = 'CLS') as avg_cls, - quantileIf(0.50)(metric_value, metric_name = 'CLS') as p50_cls, + quantileTDigestIf(0.50)(metric_value, metric_name = 'CLS') as p50_cls, avgIf(metric_value, metric_name = 'INP' AND metric_value > 0) as avg_inp, - quantileIf(0.50)(metric_value, metric_name = 'INP' AND metric_value > 0) as p50_inp, + quantileTDigestIf(0.50)(metric_value, metric_name = 'INP' AND metric_value > 0) as p50_inp, avgIf(metric_value, metric_name = 'TTFB' AND metric_value > 0) as avg_ttfb, - quantileIf(0.50)(metric_value, metric_name = 'TTFB' AND metric_value > 0) as p50_ttfb, + quantileTDigestIf(0.50)(metric_value, metric_name = 'TTFB' AND metric_value > 0) as p50_ttfb, COUNT(*) as measurements FROM ${Analytics.web_vitals_spans} WHERE diff --git a/packages/ai/src/query/builders/profiles.ts b/packages/ai/src/query/builders/profiles.ts index 55da2f51e..63c7f79c8 100644 --- a/packages/ai/src/query/builders/profiles.ts +++ b/packages/ai/src/query/builders/profiles.ts @@ -183,9 +183,9 @@ export const ProfilesBuilders: Record = { anonymous_id as visitor_id, MIN(time) as first_visit, MAX(time) as last_visit, - COUNT(DISTINCT session_id) as session_count, + uniq(session_id) as session_count, COUNT(*) as total_events, - COUNT(DISTINCT CASE WHEN event_name = 'screen_view' THEN path ELSE NULL END) as unique_pages, + uniqIf(path, event_name = 'screen_view') as unique_pages, any(user_agent) as user_agent, any(country) as country, any(region) as region, @@ -209,7 +209,7 @@ export const ProfilesBuilders: Record = { SELECT ce.anonymous_id as visitor_id, COUNT(*) as custom_event_count, - COUNT(DISTINCT ce.event_name) as unique_event_names + uniq(ce.event_name) as unique_event_names FROM ${Analytics.custom_events} ce INNER JOIN visitor_profiles vp ON ce.anonymous_id = vp.visitor_id WHERE (ce.owner_id = {websiteId:String} OR ce.website_id = {websiteId:String}) @@ -225,7 +225,7 @@ export const ProfilesBuilders: Record = { MAX(e.time) as session_end, LEAST(dateDiff('second', MIN(e.time), MAX(e.time)), 28800) as duration, COUNT(*) as page_views, - COUNT(DISTINCT CASE WHEN e.event_name = 'screen_view' THEN e.path ELSE NULL END) as unique_pages, + uniqIf(e.path, e.event_name = 'screen_view') as unique_pages, any(e.user_agent) as user_agent, any(e.country) as country, any(e.region) as region, @@ -326,7 +326,7 @@ export const ProfilesBuilders: Record = { {visitorId:String} as visitor_id, MIN(time) as first_visit, MAX(time) as last_visit, - COUNT(DISTINCT session_id) as total_sessions + uniq(session_id) as total_sessions FROM profile_activity WHERE session_id != '' ), @@ -420,7 +420,7 @@ export const ProfilesBuilders: Record = { SELECT e.session_id, countIf(event_name = 'screen_view') as page_views, - COUNT(DISTINCT CASE WHEN event_name = 'screen_view' THEN path ELSE NULL END) as unique_pages, + uniqIf(path, event_name = 'screen_view') as unique_pages, any(device_type) as device, any(browser_name) as browser, any(os_name) as os, diff --git a/packages/ai/src/query/builders/revenue.ts b/packages/ai/src/query/builders/revenue.ts index e4b1fa07b..cd335d11a 100644 --- a/packages/ai/src/query/builders/revenue.ts +++ b/packages/ai/src/query/builders/revenue.ts @@ -144,6 +144,13 @@ function buildAttributionCte( AND r.session_id IS NOT NULL AND r.session_id != '' GROUP BY r.customer_id ), + attributed_sessions AS ( + SELECT r_session_id AS session_id FROM revenue_base + WHERE r_session_id IS NOT NULL AND r_session_id != '' + UNION DISTINCT + SELECT mapped_session_id AS session_id FROM customer_session_map + WHERE mapped_session_id IS NOT NULL AND mapped_session_id != '' + ), first_touch_by_session AS ( SELECT session_id, @@ -160,7 +167,7 @@ function buildAttributionCte( argMin(path, time) as first_path FROM ${Analytics.events} WHERE client_id = {websiteId:String} - AND session_id != '' + AND session_id IN (SELECT session_id FROM attributed_sessions) AND time >= toDateTime({startDate:String}) - INTERVAL 90 DAY AND time <= toDateTime(concat({endDate:String}, ' 23:59:59')) GROUP BY session_id diff --git a/packages/ai/src/query/builders/sessions.ts b/packages/ai/src/query/builders/sessions.ts index b99cc6482..f831229e9 100644 --- a/packages/ai/src/query/builders/sessions.ts +++ b/packages/ai/src/query/builders/sessions.ts @@ -54,8 +54,8 @@ export const SessionsBuilders: Record = { "WHEN time_on_page < 3600 THEN '15m-1h' " + "ELSE '1h+' " + "END as duration_range", - "COUNT(DISTINCT session_id) as sessions", - "COUNT(DISTINCT anonymous_id) as visitors", + "uniq(session_id) as sessions", + "uniq(anonymous_id) as visitors", ], where: ["event_name = 'screen_view'", "time_on_page > 0"], groupBy: ["duration_range"], @@ -68,8 +68,8 @@ export const SessionsBuilders: Record = { table: Analytics.events, fields: [ "device_type as name", - "COUNT(DISTINCT session_id) as sessions", - "COUNT(DISTINCT anonymous_id) as visitors", + "uniq(session_id) as sessions", + "uniq(anonymous_id) as visitors", "ROUND(AVG(CASE WHEN time_on_page > 0 THEN time_on_page / 1000 ELSE NULL END), 2) as avg_session_duration", ], where: ["event_name = 'screen_view'", "device_type != ''"], @@ -83,8 +83,8 @@ export const SessionsBuilders: Record = { table: Analytics.events, fields: [ "browser_name as name", - "COUNT(DISTINCT session_id) as sessions", - "COUNT(DISTINCT anonymous_id) as visitors", + "uniq(session_id) as sessions", + "uniq(anonymous_id) as visitors", "ROUND(AVG(CASE WHEN time_on_page > 0 THEN time_on_page / 1000 ELSE NULL END), 2) as avg_session_duration", ], where: ["event_name = 'screen_view'", "browser_name != ''"], @@ -99,8 +99,8 @@ export const SessionsBuilders: Record = { table: Analytics.events, fields: [ "toDate(time) as date", - "COUNT(DISTINCT session_id) as sessions", - "COUNT(DISTINCT anonymous_id) as visitors", + "uniq(session_id) as sessions", + "uniq(anonymous_id) as visitors", "ROUND(AVG(CASE WHEN time_on_page > 0 THEN time_on_page / 1000 ELSE NULL END), 2) as avg_session_duration", ], where: ["event_name = 'screen_view'"], @@ -156,8 +156,8 @@ export const SessionsBuilders: Record = { table: Analytics.events, fields: [ "path as name", - "COUNT(DISTINCT session_id) as sessions", - "COUNT(DISTINCT anonymous_id) as visitors", + "uniq(session_id) as sessions", + "uniq(anonymous_id) as visitors", ], where: ["event_name = 'screen_view'", "path != ''"], groupBy: ["path"], @@ -188,8 +188,7 @@ export const SessionsBuilders: Record = { any(os_name) as os_name, countIf(event_name = 'screen_view') as page_views, uniqIf(path, event_name = 'screen_view' AND path != '') as unique_pages, - countIf(event_name NOT IN ('screen_view', 'page_exit', 'web_vitals', 'link_out')) as analytics_engagement_events, - groupUniqArrayIf(12)(path, event_name = 'screen_view' AND path != '') as paths + countIf(event_name NOT IN ('screen_view', 'page_exit', 'web_vitals', 'link_out')) as analytics_engagement_events FROM ${Analytics.events} WHERE client_id = {websiteId:String} @@ -198,11 +197,8 @@ export const SessionsBuilders: Record = { AND session_id != '' GROUP BY session_id ), - custom_by_session AS ( - SELECT - session_id, - count() as custom_events, - groupUniqArray(8)(event_name) as custom_event_names + custom_counts AS ( + SELECT session_id, count() as custom_events FROM ${Analytics.custom_events} WHERE website_id = {websiteId:String} @@ -220,38 +216,85 @@ export const SessionsBuilders: Record = { AND timestamp <= toDateTime({endDate:String}) AND session_id != '' GROUP BY session_id + ), + top_sessions AS ( + SELECT + bs.session_id, + bs.visitor_id, + bs.first_visit, + bs.last_visit, + bs.duration_seconds, + bs.page_views, + bs.unique_pages, + bs.analytics_engagement_events, + ifNull(cc.custom_events, 0) as custom_events, + ifNull(es.errors, 0) as errors, + bs.country, + bs.referrer, + bs.device_type, + bs.browser_name, + bs.os_name, + ( + least(bs.page_views, 10) * 2 + + least(bs.unique_pages, 8) * 3 + + least(bs.analytics_engagement_events + ifNull(cc.custom_events, 0), 20) + + least(ifNull(es.errors, 0), 10) * 2 + + if(bs.duration_seconds >= 120, 5, 0) + ) as interesting_score + FROM base_sessions bs + LEFT JOIN custom_counts cc ON bs.session_id = cc.session_id + LEFT JOIN errors_by_session es ON bs.session_id = es.session_id + WHERE bs.page_views > 0 + ORDER BY interesting_score DESC, bs.last_visit DESC + LIMIT {limit:Int32} OFFSET {offset:Int32} + ), + paths_for_top AS ( + SELECT + session_id, + groupUniqArrayIf(12)(path, event_name = 'screen_view' AND path != '') as paths + FROM ${Analytics.events} + WHERE + client_id = {websiteId:String} + AND time >= toDateTime({startDate:String}) + AND time <= toDateTime({endDate:String}) + AND session_id IN (SELECT session_id FROM top_sessions) + GROUP BY session_id + ), + names_for_top AS ( + SELECT + session_id, + groupUniqArray(8)(event_name) as custom_event_names + FROM ${Analytics.custom_events} + WHERE + website_id = {websiteId:String} + AND timestamp >= toDateTime({startDate:String}) + AND timestamp <= toDateTime({endDate:String}) + AND session_id IN (SELECT session_id FROM top_sessions) + GROUP BY session_id ) SELECT - bs.session_id, - bs.visitor_id, - bs.first_visit, - bs.last_visit, - bs.duration_seconds, - bs.page_views, - bs.unique_pages, - bs.analytics_engagement_events, - ifNull(cs.custom_events, 0) as custom_events, - ifNull(es.errors, 0) as errors, - bs.paths, - ifNull(cs.custom_event_names, []) as custom_event_names, - bs.country, - bs.referrer, - bs.device_type, - bs.browser_name, - bs.os_name, - ( - least(bs.page_views, 10) * 2 - + least(bs.unique_pages, 8) * 3 - + least(bs.analytics_engagement_events + ifNull(cs.custom_events, 0), 20) - + least(ifNull(es.errors, 0), 10) * 2 - + if(bs.duration_seconds >= 120, 5, 0) - ) as interesting_score - FROM base_sessions bs - LEFT JOIN custom_by_session cs ON bs.session_id = cs.session_id - LEFT JOIN errors_by_session es ON bs.session_id = es.session_id - WHERE bs.page_views > 0 - ORDER BY interesting_score DESC, bs.last_visit DESC - LIMIT {limit:Int32} OFFSET {offset:Int32} + ts.session_id, + ts.visitor_id, + ts.first_visit, + ts.last_visit, + ts.duration_seconds, + ts.page_views, + ts.unique_pages, + ts.analytics_engagement_events, + ts.custom_events, + ts.errors, + ifNull(pt.paths, []) as paths, + ifNull(nt.custom_event_names, []) as custom_event_names, + ts.country, + ts.referrer, + ts.device_type, + ts.browser_name, + ts.os_name, + ts.interesting_score + FROM top_sessions ts + LEFT JOIN paths_for_top pt ON ts.session_id = pt.session_id + LEFT JOIN names_for_top nt ON ts.session_id = nt.session_id + ORDER BY ts.interesting_score DESC, ts.last_visit DESC `, params: { websiteId, diff --git a/packages/ai/src/query/builders/traffic.ts b/packages/ai/src/query/builders/traffic.ts index 2564e447d..9b4fec8e8 100644 --- a/packages/ai/src/query/builders/traffic.ts +++ b/packages/ai/src/query/builders/traffic.ts @@ -46,9 +46,9 @@ export const TrafficBuilders: Record = { fields: [ `${Expressions.referrer.normalized} as name`, "COUNT(*) as pageviews", - "COUNT(DISTINCT anonymous_id) as visitors", - "ROUND((COUNT(DISTINCT anonymous_id) / SUM(COUNT(DISTINCT anonymous_id)) OVER()) * 100, 2) as percentage", + "uniq(anonymous_id) as visitors", ], + percentageOf: { of: "visitors" }, where: [ "referrer != ''", "referrer IS NOT NULL", @@ -123,9 +123,9 @@ export const TrafficBuilders: Record = { fields: [ "utm_source as name", "COUNT(*) as pageviews", - "COUNT(DISTINCT anonymous_id) as visitors", - "ROUND((COUNT(DISTINCT anonymous_id) / SUM(COUNT(DISTINCT anonymous_id)) OVER()) * 100, 2) as percentage", + "uniq(anonymous_id) as visitors", ], + percentageOf: { of: "visitors" }, where: ["utm_source != ''", "event_name = 'screen_view'"], groupBy: ["utm_source"], orderBy: "visitors DESC", @@ -154,9 +154,9 @@ export const TrafficBuilders: Record = { fields: [ "utm_medium as name", "COUNT(*) as pageviews", - "COUNT(DISTINCT anonymous_id) as visitors", - "ROUND((COUNT(DISTINCT anonymous_id) / SUM(COUNT(DISTINCT anonymous_id)) OVER()) * 100, 2) as percentage", + "uniq(anonymous_id) as visitors", ], + percentageOf: { of: "visitors" }, where: ["utm_medium != ''", "event_name = 'screen_view'"], groupBy: ["utm_medium"], orderBy: "visitors DESC", @@ -222,9 +222,9 @@ export const TrafficBuilders: Record = { fields: [ "utm_campaign as name", "COUNT(*) as pageviews", - "COUNT(DISTINCT anonymous_id) as visitors", - "ROUND((COUNT(DISTINCT anonymous_id) / SUM(COUNT(DISTINCT anonymous_id)) OVER()) * 100, 2) as percentage", + "uniq(anonymous_id) as visitors", ], + percentageOf: { of: "visitors" }, where: ["utm_campaign != ''", "event_name = 'screen_view'"], groupBy: ["utm_campaign"], orderBy: "visitors DESC", @@ -290,9 +290,9 @@ export const TrafficBuilders: Record = { fields: [ "utm_term as name", "COUNT(*) as pageviews", - "COUNT(DISTINCT anonymous_id) as visitors", - "ROUND((COUNT(DISTINCT anonymous_id) / SUM(COUNT(DISTINCT anonymous_id)) OVER()) * 100, 2) as percentage", + "uniq(anonymous_id) as visitors", ], + percentageOf: { of: "visitors" }, where: [ "utm_term != ''", "utm_term IS NOT NULL", @@ -360,9 +360,9 @@ export const TrafficBuilders: Record = { fields: [ "utm_content as name", "COUNT(*) as pageviews", - "COUNT(DISTINCT anonymous_id) as visitors", - "ROUND((COUNT(DISTINCT anonymous_id) / SUM(COUNT(DISTINCT anonymous_id)) OVER()) * 100, 2) as percentage", + "uniq(anonymous_id) as visitors", ], + percentageOf: { of: "visitors" }, where: [ "utm_content != ''", "utm_content IS NOT NULL", @@ -431,9 +431,9 @@ export const TrafficBuilders: Record = { fields: [ `${Expressions.referrer.sourceWithDirect()} as name`, "COUNT(*) as pageviews", - "COUNT(DISTINCT anonymous_id) as visitors", - "ROUND((COUNT(DISTINCT anonymous_id) / SUM(COUNT(DISTINCT anonymous_id)) OVER()) * 100, 2) as percentage", + "uniq(anonymous_id) as visitors", ], + percentageOf: { of: "visitors" }, where: ["event_name = 'screen_view'"], groupBy: ["name"], orderBy: "visitors DESC", diff --git a/packages/ai/src/query/builders/vitals.ts b/packages/ai/src/query/builders/vitals.ts index a917e9bc3..458b59358 100644 --- a/packages/ai/src/query/builders/vitals.ts +++ b/packages/ai/src/query/builders/vitals.ts @@ -23,12 +23,12 @@ const VITALS_SESSION_DIMENSIONS_CTE = ` `; const VITALS_P50_METRICS = ` - COUNT(DISTINCT wv.anonymous_id) as visitors, - quantileIf(0.50)(wv.metric_value, wv.metric_name = 'LCP' AND wv.metric_value > 0) as p50_lcp, - quantileIf(0.50)(wv.metric_value, wv.metric_name = 'FCP' AND wv.metric_value > 0) as p50_fcp, - quantileIf(0.50)(wv.metric_value, wv.metric_name = 'CLS') as p50_cls, - quantileIf(0.50)(wv.metric_value, wv.metric_name = 'INP' AND wv.metric_value > 0) as p50_inp, - quantileIf(0.50)(wv.metric_value, wv.metric_name = 'TTFB' AND wv.metric_value > 0) as p50_ttfb, + uniq(wv.anonymous_id) as visitors, + quantileTDigestIf(0.50)(wv.metric_value, wv.metric_name = 'LCP' AND wv.metric_value > 0) as p50_lcp, + quantileTDigestIf(0.50)(wv.metric_value, wv.metric_name = 'FCP' AND wv.metric_value > 0) as p50_fcp, + quantileTDigestIf(0.50)(wv.metric_value, wv.metric_name = 'CLS') as p50_cls, + quantileTDigestIf(0.50)(wv.metric_value, wv.metric_name = 'INP' AND wv.metric_value > 0) as p50_inp, + quantileTDigestIf(0.50)(wv.metric_value, wv.metric_name = 'TTFB' AND wv.metric_value > 0) as p50_ttfb, COUNT(*) as samples `; @@ -38,10 +38,12 @@ interface VitalsByDimensionConfig { groupBy: string; metrics?: string; selectName: string; + needsSessionDimensions?: boolean; } function vitalsByDimension(config: VitalsByDimensionConfig): CustomSqlFn { const metrics = config.metrics ?? VITALS_P50_METRICS; + const needsSd = config.needsSessionDimensions ?? true; return ({ websiteId, startDate, @@ -52,14 +54,18 @@ function vitalsByDimension(config: VitalsByDimensionConfig): CustomSqlFn { }) => { const effectiveLimit = limit ?? config.defaultLimit; const filterClause = appendFilterClause(filterConditions); + const withCte = needsSd ? `WITH ${VITALS_SESSION_DIMENSIONS_CTE}` : ""; + const joinSd = needsSd + ? "INNER JOIN session_dimensions sd ON wv.session_id = sd.session_id AND wv.client_id = sd.client_id" + : ""; return { sql: ` - WITH ${VITALS_SESSION_DIMENSIONS_CTE} + ${withCte} SELECT ${config.selectName}, ${metrics} FROM ${Analytics.web_vitals_spans} wv - INNER JOIN session_dimensions sd ON wv.session_id = sd.session_id AND wv.client_id = sd.client_id + ${joinSd} WHERE wv.client_id = {websiteId:String} AND wv.timestamp >= toDateTime({startDate:String}) @@ -83,11 +89,12 @@ function vitalsByDimension(config: VitalsByDimensionConfig): CustomSqlFn { const VITALS_PAGE_METRICS = ` wv.metric_name as metric_name, - quantileTDigest(0.50)(wv.metric_value) as p50, - quantileTDigest(0.75)(wv.metric_value) as p75, - quantileTDigest(0.90)(wv.metric_value) as p90, - quantileTDigest(0.95)(wv.metric_value) as p95, - quantileTDigest(0.99)(wv.metric_value) as p99, + quantilesTDigest(0.50, 0.75, 0.90, 0.95, 0.99)(wv.metric_value) as _q, + _q[1] as p50, + _q[2] as p75, + _q[3] as p90, + _q[4] as p95, + _q[5] as p99, count() as samples `; @@ -126,17 +133,18 @@ export const VitalsBuilders: Record = { const { websiteId, startDate, endDate } = ctx; return { sql: ` - SELECT + SELECT metric_name, - quantileTDigest(0.50)(metric_value) as p50, - quantileTDigest(0.75)(metric_value) as p75, - quantileTDigest(0.90)(metric_value) as p90, - quantileTDigest(0.95)(metric_value) as p95, - quantileTDigest(0.99)(metric_value) as p99, + quantilesTDigest(0.50, 0.75, 0.90, 0.95, 0.99)(metric_value) as _q, + _q[1] as p50, + _q[2] as p75, + _q[3] as p90, + _q[4] as p95, + _q[5] as p99, avg(metric_value) as avg_value, count() as samples FROM ${Analytics.web_vitals_spans} - WHERE + WHERE client_id = {websiteId:String} AND timestamp >= toDateTime({startDate:String}) AND timestamp <= toDateTime(concat({endDate:String}, ' 23:59:59')) @@ -174,14 +182,15 @@ export const VitalsBuilders: Record = { const { websiteId, startDate, endDate } = ctx; return { sql: ` - SELECT + SELECT toDate(timestamp) as date, metric_name, - quantileTDigest(0.50)(metric_value) as p50, - quantileTDigest(0.75)(metric_value) as p75, - quantileTDigest(0.90)(metric_value) as p90, - quantileTDigest(0.95)(metric_value) as p95, - quantileTDigest(0.99)(metric_value) as p99, + quantilesTDigest(0.50, 0.75, 0.90, 0.95, 0.99)(metric_value) as _q, + _q[1] as p50, + _q[2] as p75, + _q[3] as p90, + _q[4] as p95, + _q[5] as p99, count() as samples FROM ${Analytics.web_vitals_spans} WHERE @@ -229,6 +238,7 @@ export const VitalsBuilders: Record = { groupBy: "page, metric_name", extraWhere: "wv.path != ''", defaultLimit: 50, + needsSessionDimensions: false, }), timeField: "timestamp", customizable: true, diff --git a/packages/ai/src/query/expressions.ts b/packages/ai/src/query/expressions.ts index e49f13ce2..0105f88cf 100644 --- a/packages/ai/src/query/expressions.ts +++ b/packages/ai/src/query/expressions.ts @@ -103,7 +103,7 @@ export const Expressions = { }, } as const; -const SESSION_ATTRIBUTION_FIELDS = [ +export const SESSION_ATTRIBUTION_FIELDS = [ "referrer", "utm_source", "utm_medium", diff --git a/packages/ai/src/query/simple-builder.ts b/packages/ai/src/query/simple-builder.ts index f032911f1..502afdb08 100644 --- a/packages/ai/src/query/simple-builder.ts +++ b/packages/ai/src/query/simple-builder.ts @@ -8,6 +8,7 @@ import { compileConfigField, Expressions, normalizeGranularity, + SESSION_ATTRIBUTION_FIELDS, sessionAttribution, time, } from "./expressions"; @@ -519,7 +520,14 @@ export class SimpleQueryBuilder { whereClauseParams.__orgLevel = "true"; } - const helpers = this.config.plugins?.sessionAttribution + const needsAttribution = + !!this.config.plugins?.sessionAttribution && + !!this.request.filters?.some((f) => + SESSION_ATTRIBUTION_FIELDS.includes( + f.field as (typeof SESSION_ATTRIBUTION_FIELDS)[number] + ) + ); + const helpers = needsAttribution ? { sessionAttributionCTE: (timeField = "time") => this.generateSessionAttributionCTE( @@ -571,10 +579,16 @@ export class SimpleQueryBuilder { params.timezone = this.request.timezone as string; } - const hasCTEs = - this.config.with?.length || this.config.plugins?.sessionAttribution; + const needsAttribution = + !!this.config.plugins?.sessionAttribution && + !!this.request.filters?.some((f) => + SESSION_ATTRIBUTION_FIELDS.includes( + f.field as (typeof SESSION_ATTRIBUTION_FIELDS)[number] + ) + ); + const hasCTEs = this.config.with?.length || needsAttribution; - if (this.config.plugins?.sessionAttribution && !this.config.with?.length) { + if (needsAttribution && !this.config.with?.length) { return this.buildSessionAttributionQuery(params); } @@ -588,19 +602,21 @@ export class SimpleQueryBuilder { const ctesStr = hasCTEs ? this.compileCTEs(params) : ""; const fromSource = this.config.from || this.config.table; - let sql = ctesStr ? `${ctesStr}\n` : ""; - sql += `SELECT ${fieldsStr} FROM ${fromSource}`; + let body = `SELECT ${fieldsStr} FROM ${fromSource}`; if (!this.config.from) { const whereClause = this.buildWhereClause(params); - sql += ` WHERE ${whereClause.join(" AND ")}`; + body += ` WHERE ${whereClause.join(" AND ")}`; } else if (this.config.where?.length) { - sql += ` WHERE ${this.config.where.join(" AND ")}`; + body += ` WHERE ${this.config.where.join(" AND ")}`; } - sql = this.replaceDomainPlaceholders(sql); - sql += this.buildGroupByClause(); - sql += this.buildHavingClause(params); + body = this.replaceDomainPlaceholders(body); + body += this.buildGroupByClause(); + body += this.buildHavingClause(params); + + const ctePrefix = ctesStr ? `${ctesStr}\n` : ""; + let sql = ctePrefix + this.wrapPercentage(body); sql += this.buildOrderByClause(); sql += this.buildLimitClause(); sql += this.buildOffsetClause(); @@ -608,6 +624,15 @@ export class SimpleQueryBuilder { return this.finalizeCompiledQuery(sql, params); } + private wrapPercentage(innerSql: string): string { + const pct = this.config.percentageOf; + if (!pct) { + return innerSql; + } + const alias = pct.as ?? "percentage"; + return `SELECT *, ROUND(${pct.of} / sum(${pct.of}) OVER () * 100, 2) AS ${alias} FROM (${innerSql})`; + } + private compileFields(fields?: ConfigField[]): string { if (!fields?.length) { return "*"; @@ -685,7 +710,14 @@ export class SimpleQueryBuilder { private compileCTEs(params: Record): string { const ctes: string[] = []; - if (this.config.plugins?.sessionAttribution) { + const needsAttribution = + !!this.config.plugins?.sessionAttribution && + !!this.request.filters?.some((f) => + SESSION_ATTRIBUTION_FIELDS.includes( + f.field as (typeof SESSION_ATTRIBUTION_FIELDS)[number] + ) + ); + if (needsAttribution) { const timeField = this.config.timeField || "time"; const table = this.config.table || "analytics.events"; ctes.push( @@ -782,10 +814,10 @@ export class SimpleQueryBuilder { filterClauses.length > 0 ? filterClauses.join(" AND ") : "1=1"; const idField = this.getIdField(); - let sql = ` + let body = ` WITH ${this.generateSessionAttributionCTE(timeField, table, "from", "to")}, attributed_events AS ( - SELECT + SELECT e.* REPLACE( ${sessionAttribution.joinSelectFields("sa").join(",\n\t\t\t\t\t")} ) @@ -800,8 +832,10 @@ export class SimpleQueryBuilder { SELECT ${mainFields} FROM attributed_events`; - sql = this.replaceDomainPlaceholders(sql); - sql += this.buildGroupByClause(); + body = this.replaceDomainPlaceholders(body); + body += this.buildGroupByClause(); + + let sql = this.wrapPercentage(body); sql += this.buildOrderByClause(); sql += this.buildLimitClause(); sql += this.buildOffsetClause(); @@ -904,13 +938,12 @@ export class SimpleQueryBuilder { async execute(): Promise[]> { const { sql, params } = this.compile(); - const rawData = await chQuery>( - sql, - params, - this.config.noCache - ? { clickhouse_settings: { use_query_cache: 0 } } - : undefined - ); + const rawData = await chQuery>(sql, params, { + clickhouse_settings: { + use_query_cache: this.config.noCache ? 0 : 1, + allow_experimental_analyzer: 1, + }, + }); return applyPlugins(rawData, this.config, this.websiteDomain); } } diff --git a/packages/ai/src/query/types.ts b/packages/ai/src/query/types.ts index a5b9e436b..fb70b3db9 100644 --- a/packages/ai/src/query/types.ts +++ b/packages/ai/src/query/types.ts @@ -131,6 +131,11 @@ export type CustomSqlFn = ( ctx: CustomSqlContext ) => string | { sql: string; params: Record }; +export interface PercentageOf { + of: string; + as?: string; +} + export interface SimpleQueryConfig { allowedFilters?: string[]; appendEndOfDayToTo?: boolean; @@ -145,6 +150,7 @@ export interface SimpleQueryConfig { meta?: QueryBuilderMeta; noCache?: boolean; orderBy?: string; + percentageOf?: PercentageOf; plugins?: QueryPlugins; publicAccess?: boolean; requiredFilters?: string[]; diff --git a/packages/db/src/clickhouse/client.ts b/packages/db/src/clickhouse/client.ts index dc8beed16..ca877e3a4 100644 --- a/packages/db/src/clickhouse/client.ts +++ b/packages/db/src/clickhouse/client.ts @@ -98,7 +98,6 @@ export const CLICKHOUSE_OPTIONS: NodeClickHouseClientConfigOptions = { }; const READ_DEFAULT_SETTINGS: Record = { - max_threads: 4, max_memory_usage: 4_000_000_000, max_execution_time: 15, max_result_rows: 100_000, @@ -216,7 +215,7 @@ async function chQueryWithMeta( const json = await traced("ch.query", async () => { const settings: Record = { ...READ_DEFAULT_SETTINGS, - ...(options?.readonly && { readonly: "1" }), + ...(options?.readonly && { readonly: "2" }), ...options?.clickhouse_settings, }; assertCacheCompatibleSettings(settings); From 8c099de20a9a7ae2413365b8f26c94b3c73ba20b Mon Sep 17 00:00:00 2001 From: iza <59828082+izadoesdev@users.noreply.github.com> Date: Fri, 15 May 2026 21:57:42 +0300 Subject: [PATCH 02/44] fix(api): centralize public query access --- apps/api/src/lib/public-query-access.test.ts | 25 ------------------- apps/api/src/lib/public-query-access.ts | 8 ------ apps/api/src/routes/query.ts | 8 +++--- packages/ai/src/query/builders/index.ts | 9 +++++++ .../src/query/builders/public-access.test.ts | 25 ++++++++++++++++++- 5 files changed, 38 insertions(+), 37 deletions(-) delete mode 100644 apps/api/src/lib/public-query-access.test.ts delete mode 100644 apps/api/src/lib/public-query-access.ts diff --git a/apps/api/src/lib/public-query-access.test.ts b/apps/api/src/lib/public-query-access.test.ts deleted file mode 100644 index cceb9f372..000000000 --- a/apps/api/src/lib/public-query-access.test.ts +++ /dev/null @@ -1,25 +0,0 @@ -import { describe, expect, it } from "vitest"; -import { isPublicQueryAccess } from "./public-query-access"; - -describe("isPublicQueryAccess", () => { - it("allows only query types explicitly marked public-readable", () => { - expect( - isPublicQueryAccess([ - "summary_metrics", - "top_pages", - "custom_events_summary", - "recent_errors", - "vitals_overview", - ]) - ).toBe(true); - }); - - it("denies revenue, unknown, and empty public query requests", () => { - expect(isPublicQueryAccess(["revenue_overview"])).toBe(false); - expect(isPublicQueryAccess(["summary_metrics", "revenue_overview"])).toBe( - false - ); - expect(isPublicQueryAccess(["missing_query_type"])).toBe(false); - expect(isPublicQueryAccess([])).toBe(false); - }); -}); diff --git a/apps/api/src/lib/public-query-access.ts b/apps/api/src/lib/public-query-access.ts deleted file mode 100644 index d0a758604..000000000 --- a/apps/api/src/lib/public-query-access.ts +++ /dev/null @@ -1,8 +0,0 @@ -import { QueryBuilders } from "@databuddy/ai/query/builders"; - -export function isPublicQueryAccess(queryTypes: string[]): boolean { - return ( - queryTypes.length > 0 && - queryTypes.every((type) => QueryBuilders[type]?.publicAccess === true) - ); -} diff --git a/apps/api/src/routes/query.ts b/apps/api/src/routes/query.ts index 56b5d90c7..b76fcc382 100644 --- a/apps/api/src/routes/query.ts +++ b/apps/api/src/routes/query.ts @@ -20,7 +20,10 @@ import { } from "@databuddy/shared/types/features"; import type { CustomQueryRequest } from "@databuddy/ai/query/custom-query-types"; import { compileQuery, executeBatch } from "@databuddy/ai/query"; -import { QueryBuilders } from "@databuddy/ai/query/builders"; +import { + canReadQueryTypesPublicly, + QueryBuilders, +} from "@databuddy/ai/query/builders"; import { executeCustomQuery } from "@databuddy/ai/query/custom-query-builder"; import { isNormalizedQueryDate, @@ -30,7 +33,6 @@ import type { Filter, QueryRequest } from "@databuddy/ai/query/types"; import { Elysia, t } from "elysia"; import { getAccessibleWebsites } from "../lib/accessible-websites"; import { resolveDatePreset } from "../lib/date-presets"; -import { isPublicQueryAccess } from "../lib/public-query-access"; import { mergeWideEvent } from "../lib/tracing"; import { getCachedWebsiteDomain, getWebsiteDomain } from "../lib/website-utils"; import { @@ -479,7 +481,7 @@ async function verifyWebsiteAccess( return false; } - if (website.isPublic && isPublicQueryAccess(queryTypes)) { + if (website.isPublic && canReadQueryTypesPublicly(queryTypes)) { mergeWideEvent({ access_result: "public_query" }); return true; } diff --git a/packages/ai/src/query/builders/index.ts b/packages/ai/src/query/builders/index.ts index 7d6aaeb3d..5d3b502a7 100644 --- a/packages/ai/src/query/builders/index.ts +++ b/packages/ai/src/query/builders/index.ts @@ -104,4 +104,13 @@ export const QueryBuilders = Object.fromEntries( ]) ) as typeof BASE_QUERY_BUILDERS; +export function canReadQueryTypesPublicly( + queryTypes: readonly string[] +): boolean { + return ( + queryTypes.length > 0 && + queryTypes.every((type) => QueryBuilders[type]?.publicAccess === true) + ); +} + export type QueryType = keyof typeof QueryBuilders; diff --git a/packages/ai/src/query/builders/public-access.test.ts b/packages/ai/src/query/builders/public-access.test.ts index 2dfdd39b7..6e2e590b1 100644 --- a/packages/ai/src/query/builders/public-access.test.ts +++ b/packages/ai/src/query/builders/public-access.test.ts @@ -1,5 +1,9 @@ import { describe, expect, it } from "vitest"; -import { PUBLIC_QUERY_TYPES, QueryBuilders } from "./index"; +import { + canReadQueryTypesPublicly, + PUBLIC_QUERY_TYPES, + QueryBuilders, +} from "./index"; const PUBLIC_OVERVIEW_QUERY_TYPES = [ "summary_metrics", @@ -81,4 +85,23 @@ describe("query builder publicAccess", () => { expect(QueryBuilders[type]?.publicAccess, type).not.toBe(true); } }); + + it("allows public reads only when every requested builder opts in", () => { + expect( + canReadQueryTypesPublicly([ + "summary_metrics", + "top_pages", + "custom_events_summary", + "recent_errors", + "vitals_overview", + ]) + ).toBe(true); + + expect(canReadQueryTypesPublicly(["revenue_overview"])).toBe(false); + expect( + canReadQueryTypesPublicly(["summary_metrics", "revenue_overview"]) + ).toBe(false); + expect(canReadQueryTypesPublicly(["missing_query_type"])).toBe(false); + expect(canReadQueryTypesPublicly([])).toBe(false); + }); }); From 35a5b46ef2b1a393cef63b38a7d0b7297cd70d86 Mon Sep 17 00:00:00 2001 From: iza <59828082+izadoesdev@users.noreply.github.com> Date: Fri, 15 May 2026 21:58:06 +0300 Subject: [PATCH 03/44] feat(insights): add queued generation service --- .agents/skills/databuddy-internal/SKILL.md | 1 + .env.example | 7 + apps/api/src/index.ts | 2 - apps/api/src/routes/insights.ts | 1421 ----------------- .../insight-generation-settings.tsx | 546 +++++++ .../_components/insight-generation-status.tsx | 259 +++ .../_components/insights-page-content.tsx | 15 +- .../insights/hooks/use-insights-feed.ts | 1 + apps/dashboard/lib/insight-api.ts | 95 +- apps/dashboard/lib/insight-signal-key.ts | 2 +- apps/insights/package.json | 27 + apps/insights/src/generation.ts | 975 +++++++++++ .../src/idempotency.integration.test.ts | 125 ++ apps/insights/src/index.ts | 162 ++ apps/insights/src/jobs.ts | 207 +++ apps/insights/src/rollup.test.ts | 61 + apps/insights/src/rollup.ts | 264 +++ .../src/scheduler.integration.test.ts | 222 +++ apps/insights/src/scheduler.ts | 235 +++ apps/insights/src/worker.ts | 68 + apps/insights/tsconfig.json | 12 + bun.lock | 20 + package.json | 1 + packages/ai/package.json | 1 + .../src/ai/schemas/smart-insights-output.ts | 4 +- packages/db/drizzle.config.ts | 1 + packages/db/src/drizzle/schema/analytics.ts | 5 + packages/db/src/drizzle/schema/index.ts | 1 + packages/db/src/drizzle/schema/insights.ts | 296 ++++ packages/db/src/drizzle/schema/relations.ts | 71 + packages/redis/bullmq.test.ts | 33 + packages/redis/bullmq.ts | 36 +- packages/redis/cache-invalidation.ts | 4 + packages/redis/index.ts | 1 + packages/redis/insights-queue.ts | 114 ++ packages/redis/package.json | 1 + packages/rpc/package.json | 4 +- packages/rpc/src/index.ts | 6 + packages/rpc/src/root.ts | 2 + .../rpc/src/routers/insight-generation.ts | 635 ++++++++ packages/rpc/src/routers/insights.ts | 619 ++++++- .../rpc/src/services/insight-schedule.test.ts | 58 + packages/rpc/src/services/insight-schedule.ts | 115 ++ packages/test/src/db.ts | 5 + turbo.json | 4 + 45 files changed, 5237 insertions(+), 1507 deletions(-) delete mode 100644 apps/api/src/routes/insights.ts create mode 100644 apps/dashboard/app/(main)/insights/_components/insight-generation-settings.tsx create mode 100644 apps/dashboard/app/(main)/insights/_components/insight-generation-status.tsx create mode 100644 apps/insights/package.json create mode 100644 apps/insights/src/generation.ts create mode 100644 apps/insights/src/idempotency.integration.test.ts create mode 100644 apps/insights/src/index.ts create mode 100644 apps/insights/src/jobs.ts create mode 100644 apps/insights/src/rollup.test.ts create mode 100644 apps/insights/src/rollup.ts create mode 100644 apps/insights/src/scheduler.integration.test.ts create mode 100644 apps/insights/src/scheduler.ts create mode 100644 apps/insights/src/worker.ts create mode 100644 apps/insights/tsconfig.json create mode 100644 packages/db/src/drizzle/schema/insights.ts create mode 100644 packages/redis/insights-queue.ts create mode 100644 packages/rpc/src/routers/insight-generation.ts create mode 100644 packages/rpc/src/services/insight-schedule.test.ts create mode 100644 packages/rpc/src/services/insight-schedule.ts diff --git a/.agents/skills/databuddy-internal/SKILL.md b/.agents/skills/databuddy-internal/SKILL.md index 8663872d4..3e5b82499 100644 --- a/.agents/skills/databuddy-internal/SKILL.md +++ b/.agents/skills/databuddy-internal/SKILL.md @@ -30,6 +30,7 @@ Keep additions **minimal**: one bullet, a new `rg` hint, or a routing note—eno - Slack memory is separate from billing/auth: pass a Slack-scoped `memoryUserId` such as `slack-{team}-{user}` plus current-speaker context so one Slack user's saved name/preferences do not bleed into another user's replies. - Slack agent write tools need the integration automation API key to include the matching Databuddy API scopes (currently `read:data`, `read:links`, `write:links`, `manage:websites`, `manage:flags`); older installs may need reconnecting so a new key is minted. - Shared agent integrations should call `@databuddy/ai/agent` (`askDatabuddyAgent` / `streamDatabuddyAgent`) instead of importing internal MCP run/history helpers directly. +- Insights generation logic belongs in `apps/insights` and should reuse `@databuddy/ai`; `apps/api` should only read insight data or queue runs, not own prompts, model calls, tool loops, validation, or persistence orchestration. - Agent ClickHouse SQL must use the canonical analytics.events schema: `client_id`, `time`, `path`, `event_name`, and pageviews as `event_name = 'screen_view'`; never `website_id`, `created_at`, `page_path`, `event_type`, or `pageview`. - Slack agent evals live in `packages/evals`: use `bun run eval --surface slack` for the whole Slack surface. `--tag slack` is only a tiny smoke subset, and `cost_fallback` in agent telemetry is pricing-catalog fallback, not proof the model request fell back. - Slack agent expected stops such as exhausted Databunny credits should throw `DatabuddyAgentUserError` from `@databuddy/ai/agent/errors`; Slack surfaces those messages directly and reserves the generic reconnect copy for real infrastructure failures. diff --git a/.env.example b/.env.example index 1adf211d9..8391d07ce 100644 --- a/.env.example +++ b/.env.example @@ -5,8 +5,15 @@ DATABASE_URL="postgres://databuddy:databuddy_dev_password@localhost:5432/databud DB_POOL_MAX="10" REDIS_URL="redis://localhost:6379" BULLMQ_REDIS_URL="redis://localhost:6379" +# Optional dedicated BullMQ Redis URL for the insights worker. Falls back to BULLMQ_REDIS_URL. +INSIGHTS_PORT="4002" +INSIGHTS_BULLMQ_REDIS_URL="" +INSIGHTS_DISPATCH_INTERVAL_MS="300000" +INSIGHTS_WORKER_CONCURRENCY="5" +INSIGHTS_WORKER_ENABLED="true" AI_GATEWAY_API_KEY="" +SUPERMEMORY_API_KEY="" BETTER_AUTH_URL="http://localhost:3000" BETTER_AUTH_SECRET="generate-a-random-32-byte-base64-secret" diff --git a/apps/api/src/index.ts b/apps/api/src/index.ts index 78a9026af..e50db47b4 100644 --- a/apps/api/src/index.ts +++ b/apps/api/src/index.ts @@ -21,7 +21,6 @@ import { import { openApiHandler } from "@/rpc/openapi"; import { agent } from "./routes/agent"; import { health } from "./routes/health"; -import { insights } from "./routes/insights"; import { integrations } from "./routes/integrations"; import { mcp } from "./routes/mcp"; import { publicApi } from "./routes/public"; @@ -90,7 +89,6 @@ const app = new Elysia({ precompile: true }) .use(query) .use(agent) .use(integrations) - .use(insights) .use(mcp) .all("/rpc/*", handleRpcEndpoint, { parse: "none" }) .all("/", handleOpenApiReference, { parse: "none" }) diff --git a/apps/api/src/routes/insights.ts b/apps/api/src/routes/insights.ts deleted file mode 100644 index dc1fce257..000000000 --- a/apps/api/src/routes/insights.ts +++ /dev/null @@ -1,1421 +0,0 @@ -import { auth } from "@databuddy/auth"; -import { and, db, desc, eq, gte, inArray, isNull } from "@databuddy/db"; -import { - analyticsInsights, - annotations, - insightUserFeedback, - websites, -} from "@databuddy/db/schema"; -import { - cacheNamespaces, - cacheTags, - cacheable, - getRedisCache, - invalidateAgentContextSnapshotsForOwner, - invalidateAgentContextSnapshotsForWebsite, - invalidateInsightsCachesForOrganization, -} from "@databuddy/redis"; -import { getRateLimitHeaders, ratelimit } from "@databuddy/redis/rate-limit"; -import { generateText, Output, stepCountIs, ToolLoopAgent } from "ai"; -import dayjs from "dayjs"; -import { Elysia, t } from "elysia"; -import { useLogger } from "evlog/elysia"; -import type { AppContext } from "@databuddy/ai/config/context"; -import { ANTHROPIC_CACHE_1H, models } from "@databuddy/ai/config/models"; -import { createInsightsAgentTools } from "@databuddy/ai/tools/insights-agent-tools"; -import { - fetchInsightDedupeKeyToIdMap, - insightDedupeKey, -} from "@databuddy/ai/insights/dedupe"; -import { - fetchWebPeriodData, - getWeekOverWeekPeriod, - hasWebInsightData, -} from "@databuddy/ai/insights/fetch-context"; -import { formatLegacyWebDataForPrompt } from "@databuddy/ai/insights/normalize"; -import { validateInsights } from "@databuddy/ai/insights/validate"; -import type { - InsightMetricRow, - WeekOverWeekPeriod, -} from "@databuddy/ai/insights/types"; -import type { ParsedInsight } from "@databuddy/ai/schemas/smart-insights-output"; -import { insightsOutputSchema } from "@databuddy/ai/schemas/smart-insights-output"; -import { storeAnalyticsSummary } from "@databuddy/ai/lib/supermemory"; -import { getAILogger } from "../lib/ai-logger"; -import { captureError, mergeWideEvent } from "../lib/tracing"; - -const CACHE_TTL = 900; -const NEGATIVE_CACHE_TTL = Math.floor(CACHE_TTL / 3); -const CACHE_KEY_PREFIX = "ai-insights"; -const TIMEOUT_MS = 60_000; -const INSIGHTS_AGENT_MAX_STEPS = 24; -const INSIGHTS_AGENT_TIMEOUT_MS = 120_000; -const MAX_WEBSITES = 5; -const CONCURRENCY = 3; -const GENERATION_COOLDOWN_HOURS = 6; -const RECENT_INSIGHTS_LOOKBACK_DAYS = 14; -const RECENT_INSIGHTS_PROMPT_LIMIT = 12; -const TOP_INSIGHTS_LIMIT = 10; - -interface WebsiteInsight extends ParsedInsight { - id: string; - link: string; - websiteDomain: string; - websiteId: string; - websiteName: string | null; -} - -interface InsightsPayload { - insights: WebsiteInsight[]; - source: "ai" | "fallback"; -} - -interface OrgWebsiteRow { - domain: string; - id: string; - name: string | null; -} - -function dedupeKeyFor(insight: WebsiteInsight): string { - return insightDedupeKey({ - ...insight, - changePercent: insight.changePercent ?? null, - }); -} - -function buildInsightLink(websiteId: string, insight: ParsedInsight): string { - const base = `/websites/${websiteId}`; - if ( - [ - "error_spike", - "new_errors", - "persistent_error_hotspot", - "reliability_improved", - ].includes(insight.type) - ) { - return `${base}/errors`; - } - if ( - ["vitals_degraded", "performance", "performance_improved"].includes( - insight.type - ) - ) { - return `${base}/vitals`; - } - if (["conversion_leak", "funnel_regression"].includes(insight.type)) { - return `${base}/funnels`; - } - if ( - ["custom_event_spike", "engagement_change", "quality_shift"].includes( - insight.type - ) - ) { - return `${base}/events/stream`; - } - if (insight.type === "uptime_issue") { - return `${base}/anomalies`; - } - return base; -} - -interface RawInsightShape { - changePercent: number | null; - impactSummary: string | null; - metrics: unknown; - sentiment: string; - severity: string; - sources: unknown; - type: string; -} - -function parseInsightShape(r: RawInsightShape) { - return { - severity: r.severity as ParsedInsight["severity"], - sentiment: r.sentiment as ParsedInsight["sentiment"], - type: r.type as ParsedInsight["type"], - sources: - (r.sources as Array<"web" | "product" | "ops" | "business"> | null) ?? [], - metrics: (r.metrics as InsightMetricRow[] | null) ?? [], - changePercent: r.changePercent ?? undefined, - impactSummary: r.impactSummary ?? undefined, - }; -} - -async function userHasOrgAccess( - userId: string, - organizationId: string -): Promise { - const memberships = await db.query.member.findMany({ - where: { userId }, - columns: { organizationId: true }, - }); - return memberships.some((m) => m.organizationId === organizationId); -} - -async function userIsOrgAdmin( - userId: string, - organizationId: string -): Promise { - const membership = await db.query.member.findFirst({ - where: { userId, organizationId }, - columns: { role: true }, - }); - return membership?.role === "owner" || membership?.role === "admin"; -} - -function tryCacheSet( - redis: ReturnType, - key: string, - ttl: number, - payload: unknown -): void { - if (!redis) { - return; - } - redis.setex(key, ttl, JSON.stringify(payload)).catch((error: unknown) => { - useLogger().info("Insights cache write failed (best-effort)", { - insights: { key, error }, - }); - }); -} - -async function fetchRecentAnnotations(websiteId: string): Promise { - const since = dayjs().subtract(14, "day").toDate(); - - const rows = await db - .select({ - text: annotations.text, - xValue: annotations.xValue, - tags: annotations.tags, - }) - .from(annotations) - .where( - and( - eq(annotations.websiteId, websiteId), - gte(annotations.xValue, since), - isNull(annotations.deletedAt) - ) - ) - .orderBy(annotations.xValue) - .limit(20); - - if (rows.length === 0) { - return ""; - } - - const lines = rows.map((r) => { - const date = dayjs(r.xValue).format("YYYY-MM-DD"); - const tags = r.tags?.length ? ` [${r.tags.join(", ")}]` : ""; - return `- ${date}: ${r.text}${tags}`; - }); - - return `\n\nUser annotations (known events that may explain changes):\n${lines.join("\n")}`; -} - -async function fetchRecentInsightsForPrompt( - organizationId: string, - websiteId: string -): Promise { - const since = dayjs().subtract(RECENT_INSIGHTS_LOOKBACK_DAYS, "day").toDate(); - - const rows = await db - .select({ - title: analyticsInsights.title, - type: analyticsInsights.type, - createdAt: analyticsInsights.createdAt, - }) - .from(analyticsInsights) - .where( - and( - eq(analyticsInsights.organizationId, organizationId), - eq(analyticsInsights.websiteId, websiteId), - gte(analyticsInsights.createdAt, since) - ) - ) - .orderBy(desc(analyticsInsights.createdAt)) - .limit(RECENT_INSIGHTS_PROMPT_LIMIT); - - if (rows.length === 0) { - return ""; - } - - const lines = rows.map( - (r) => - `- [${r.type}] ${r.title} (${dayjs(r.createdAt).format("YYYY-MM-DD")})` - ); - - return `\n\n## Recently reported insights for this website (avoid repeating the same narrative unless something materially changed)\n${lines.join("\n")}`; -} - -function formatOrgWebsitesContext( - orgSites: OrgWebsiteRow[], - currentWebsiteId: string -): string { - if (orgSites.length <= 1) { - return ""; - } - const sorted = [...orgSites].sort((a, b) => - a.domain.localeCompare(b.domain, "en") - ); - const lines = sorted.map((s) => { - const label = s.name?.trim() ? s.name.trim() : s.domain; - const marker = - s.id === currentWebsiteId - ? " — **metrics below are for this site only**" - : ""; - return `- ${label} (${s.domain})${marker}`; - }); - return `## Organization websites (same account, separate analytics) -Each row is a different tracked property (e.g. marketing site vs app vs docs). The week-over-week metrics in this message apply only to the site marked "metrics below". Do not blend numbers across rows. If referrers include another domain from this list, treat it as cross-property traffic (e.g. landing → product) and name both sides clearly. - -${lines.join("\n")} - -`; -} - -const INSIGHTS_SYSTEM_PROMPT = ` -You are an analytics insights engine. Return exactly 3 week-over-week insights when there are 3 distinct data-backed signals; otherwise return only the distinct signals that exist. Rank by actionability and user/business impact. - - - -- Write for a founder/operator, not an analytics engineer. Translate technical metrics into plain outcomes: "interactions got slower", "pages feel slower", "setup is leaking users", "one source now dominates traffic". -- Prefer reliability, conversion/product impact, engagement quality, broken instrumentation, and meaningful behavior changes over vanity traffic spikes. -- Score actionability × impact, not raw percentage magnitude. Reserve priority 8-10 for likely user, revenue, or operational impact. -- Prefer fewer, sharper insights over broad coverage. Return only signals a user can act on this week. -- Avoid repeating recently reported narratives unless the signal materially changed. - - - -- Use only provided data, tool results, annotations, and recent-insight context. -- Do not invent revenue, signups, retention, funnel conversion, causality, root causes, or business impact. -- If multiple org websites are listed, keep properties separate; cross-domain referrers are cross-property traffic, not generic referrals. -- Use cautious language for correlations unless segment-level evidence directly proves the cause. -- Do not punt, apologize, or say you cannot produce insights when any useful metrics exist. If one query is sparse, use stronger available evidence and lower confidence. - - - -- Prefer 3 concise insights: reliability/product risk first, then engagement/acquisition opportunity. Do not make near-duplicates. -- Each insight must be one clear signal with 1-5 metrics; primary metric first. -- Metrics array owns the numbers. Description/suggestion should reference metric labels, not restate values. -- Keep title under 80 chars, description under 320 chars, suggestion under 260 chars. -- Titles must be plain English and user-facing. Do not put raw metric jargon like INP, LCP, FCP, TTFB, CLS, or p75 in titles; put technical metric names only in the metrics array. -- Keep description 1-2 concise sentences: what changed, why it matters, and whether cause is evidence or hypothesis. -- Suggestion must be a specific next action with an operational verb such as inspect, review, compare, segment, drill into, fix, audit, trace, or verify. Never use generic monitoring advice. -- Suggestion must name the exact product surface to inspect next: funnel step, goal, referrer segment, page path, error class, session stream, web vital, flag rollout, or agent diagnostic prompt. -- subjectKey must be stable; sources must include only evidence domains used; confidence 0-1 should reflect evidence strength. -- impactSummary is optional, one sentence under 220 characters. - - - -Good: Error Rate rose while Sessions stayed stable -> reliability issue; suggest reviewing affected page/errors first. -Good: INP p75 rose -> title "Interactions got slower"; metrics can still include "INP p75". -Good: Onboarding step 2 drop-off is 80% -> title "Onboarding is leaking at step 2". -Bad: Pricing Visitors rose -> "revenue opportunity" without business data. -Bad: Twitter rose and Bounce Rate worsened -> "Twitter caused the drop" without segmented engagement data. -Bad: "INP p75 still rising" as a title; users should not need to know web-vitals acronyms. - - - -Before finalizing: exactly 3 if data supports it, data-backed only, metrics present, primary metric first, no duplicate narrative, concise copy, specific action, named product surface, no punt on partial data. -`; - -async function validateOrRepairInsights( - insights: ParsedInsight[], - context: { domain: string; mode: "agent" | "legacy"; websiteId: string } -): Promise { - const validated = validateInsights(insights); - if (validated.warnings.length > 0) { - useLogger().warn("Insights validation repaired or dropped output", { - insights: { - websiteId: context.websiteId, - mode: context.mode, - warnings: validated.warnings, - }, - }); - } - - const targetCount = Math.min(3, insights.length); - if (targetCount === 0 || validated.insights.length >= targetCount) { - return validated.insights; - } - - try { - const ai = getAILogger(); - const repair = await generateText({ - model: ai.wrap(models.balanced), - output: Output.object({ schema: insightsOutputSchema }), - messages: [ - { - role: "system", - content: `Repair Databuddy insight cards. Return exactly ${targetCount} concise, valid cards when the source contains ${targetCount} distinct data-backed signals. Use only the provided metrics and claims; do not invent numbers, causes, revenue impact, or new entities. Keep title <=80 chars, description <=320 chars, suggestion <=260 chars. Write for a founder/operator: titles must be plain English and avoid raw metric jargon like INP, LCP, FCP, TTFB, CLS, or p75. Technical metric names may remain in the metrics array. Suggestions need specific operational actions, not monitoring. Soften unsupported causality.`, - }, - { - role: "user", - content: JSON.stringify( - { - domain: context.domain, - validationWarnings: validated.warnings, - originalInsights: insights, - }, - null, - 2 - ), - }, - ], - temperature: 0, - maxOutputTokens: 4096, - abortSignal: AbortSignal.timeout(30_000), - experimental_telemetry: { - isEnabled: true, - functionId: "databuddy.insights.repair", - metadata: { - source: "insights", - feature: "smart_insights", - mode: context.mode, - websiteId: context.websiteId, - websiteDomain: context.domain, - }, - }, - }); - - const repairedOutput = repair.output?.insights ?? []; - const repaired = validateInsights(repairedOutput); - if (repaired.warnings.length > 0) { - useLogger().warn("Insights repair validation warnings", { - insights: { - websiteId: context.websiteId, - mode: context.mode, - warnings: repaired.warnings, - }, - }); - } - - if (repaired.insights.length >= validated.insights.length) { - return repaired.insights.slice(0, targetCount); - } - } catch (error) { - useLogger().warn("Insights repair failed", { - insights: { websiteId: context.websiteId, mode: context.mode, error }, - }); - } - - return validated.insights; -} - -async function analyzeWebsiteLegacy( - organizationId: string, - userId: string, - websiteId: string, - domain: string, - timezone: string, - period: WeekOverWeekPeriod, - orgSites: OrgWebsiteRow[], - annotationContext: string, - recentInsightsBlock: string -): Promise { - const currentRange = period.current; - const previousRange = period.previous; - - const [current, previous] = await Promise.all([ - fetchWebPeriodData( - websiteId, - domain, - currentRange.from, - currentRange.to, - timezone - ), - fetchWebPeriodData( - websiteId, - domain, - previousRange.from, - previousRange.to, - timezone - ), - ]); - - const hasData = current.summary.length > 0 || current.topPages.length > 0; - if (!hasData) { - return []; - } - - const dataSection = formatLegacyWebDataForPrompt( - current, - previous, - currentRange, - previousRange - ); - - const orgContext = formatOrgWebsitesContext(orgSites, websiteId); - const prompt = `Analyze this website's week-over-week data and return insights.\n\n${orgContext}${dataSection}${annotationContext}${recentInsightsBlock}`; - - try { - const ai = getAILogger(); - const result = await generateText({ - model: ai.wrap(models.balanced), - output: Output.object({ schema: insightsOutputSchema }), - messages: [ - { - role: "system", - content: INSIGHTS_SYSTEM_PROMPT, - providerOptions: ANTHROPIC_CACHE_1H, - }, - { role: "user", content: prompt }, - ], - temperature: 0.2, - maxOutputTokens: 8192, - abortSignal: AbortSignal.timeout(TIMEOUT_MS), - experimental_telemetry: { - isEnabled: true, - functionId: "databuddy.insights.analyze_website", - metadata: { - source: "insights", - feature: "smart_insights", - mode: "legacy_fallback", - organizationId, - userId, - websiteId, - websiteDomain: domain, - timezone, - }, - }, - }); - - if (!result.output) { - useLogger().warn("No structured output from insights model (legacy)", { - insights: { websiteId }, - }); - return []; - } - - return await validateOrRepairInsights(result.output.insights, { - domain, - mode: "legacy", - websiteId, - }); - } catch (error) { - useLogger().warn("Failed to generate insights (legacy)", { - insights: { websiteId, error }, - }); - return []; - } -} - -async function analyzeWebsite( - organizationId: string, - userId: string, - websiteId: string, - domain: string, - timezone: string, - period: WeekOverWeekPeriod, - orgSites: OrgWebsiteRow[], - requestHeaders: Headers -): Promise { - const currentRange = period.current; - const previousRange = period.previous; - - const hasData = await hasWebInsightData( - websiteId, - domain, - currentRange.from, - currentRange.to, - timezone - ); - if (!hasData) { - return []; - } - - const [annotationContext, recentInsightsBlock] = await Promise.all([ - fetchRecentAnnotations(websiteId), - fetchRecentInsightsForPrompt(organizationId, websiteId), - ]); - - const orgContext = formatOrgWebsitesContext(orgSites, websiteId); - const userPrompt = `Analyze this website's week-over-week data and produce insights. - -**Current period:** ${currentRange.from} to ${currentRange.to} -**Previous period:** ${previousRange.from} to ${previousRange.to} -**Timezone:** ${timezone} -**Domain:** ${domain} - -Use web_metrics to pull metrics for both current and previous periods before inferring trends. Start with summary_metrics for both periods, then add top_pages, error_summary, top_referrers, country, browser_name, vitals_overview, or custom_events queries only when they sharpen the narrative. Use product_metrics for goals, funnels, retention, and custom event behavior when a traffic change may have downstream product impact. Use ops_context for page-level errors, uptime, anomaly signals, and recent flag rollouts when reliability or product changes may explain the trend. Use business_context for revenue totals, attribution, and product mix when commercial impact matters. - -${orgContext}${annotationContext}${recentInsightsBlock}`; - - const { tools } = createInsightsAgentTools({ - websiteId, - domain, - timezone, - periodBounds: { current: currentRange, previous: previousRange }, - }); - - try { - const appContext: AppContext = { - userId, - organizationId, - websiteId, - websiteDomain: domain, - timezone, - currentDateTime: new Date().toISOString(), - chatId: `insights:${organizationId}:${websiteId}`, - requestHeaders, - }; - - const ai = getAILogger(); - const agent = new ToolLoopAgent({ - model: ai.wrap(models.balanced), - instructions: { - role: "system", - content: INSIGHTS_SYSTEM_PROMPT, - providerOptions: ANTHROPIC_CACHE_1H, - }, - output: Output.object({ schema: insightsOutputSchema }), - tools, - stopWhen: stepCountIs(INSIGHTS_AGENT_MAX_STEPS), - prepareStep: ({ stepNumber }) => { - if (stepNumber === 0) { - return { - activeTools: ["web_metrics"], - toolChoice: { type: "tool", toolName: "web_metrics" }, - }; - } - return {}; - }, - onStepFinish: ({ usage, finishReason, toolCalls }) => { - const toolNames = toolCalls.map((toolCall) => toolCall.toolName); - mergeWideEvent({ - insights_agent_step_tool_calls: toolCalls.length, - insights_agent_step_total_tokens: usage?.totalTokens ?? 0, - insights_agent_step_used_tools: toolNames.length > 0, - }); - useLogger().info("Insights agent step finished", { - insights: { - websiteId, - finishReason, - toolCalls: toolNames, - totalTokens: usage?.totalTokens, - }, - }); - }, - temperature: 0.2, - experimental_context: appContext, - experimental_telemetry: { - isEnabled: true, - functionId: "databuddy.insights.analyze_website", - metadata: { - source: "insights", - feature: "smart_insights", - mode: "agent", - organizationId, - userId, - websiteId, - websiteDomain: domain, - timezone, - }, - }, - }); - - const result = await agent.generate({ - messages: [{ role: "user", content: userPrompt }], - abortSignal: AbortSignal.timeout(INSIGHTS_AGENT_TIMEOUT_MS), - }); - - if (result.output?.insights?.length) { - return await validateOrRepairInsights(result.output.insights, { - domain, - mode: "agent", - websiteId, - }); - } - - useLogger().warn("Insights agent finished without structured output", { - insights: { websiteId }, - }); - } catch (error) { - useLogger().warn("Insights agent failed, using legacy fallback", { - insights: { websiteId, error }, - }); - } - - return analyzeWebsiteLegacy( - organizationId, - userId, - websiteId, - domain, - timezone, - period, - orgSites, - annotationContext, - recentInsightsBlock - ); -} - -async function processInBatches( - items: T[], - action: (item: T) => Promise, - limit: number -): Promise { - const results: R[] = []; - let nextIndex = 0; - - async function worker() { - while (true) { - const index = nextIndex; - nextIndex += 1; - if (index >= items.length) { - break; - } - const item = items[index]; - if (item === undefined) { - break; - } - results.push(await action(item)); - } - } - - await Promise.all( - Array.from({ length: Math.min(limit, items.length) }, () => worker()) - ); - return results; -} - -async function getRecentInsightsFromDb( - organizationId: string -): Promise { - const cutoff = dayjs().subtract(GENERATION_COOLDOWN_HOURS, "hour").toDate(); - - const rows = await db - .select({ - id: analyticsInsights.id, - websiteId: analyticsInsights.websiteId, - websiteName: websites.name, - websiteDomain: websites.domain, - title: analyticsInsights.title, - description: analyticsInsights.description, - suggestion: analyticsInsights.suggestion, - severity: analyticsInsights.severity, - sentiment: analyticsInsights.sentiment, - type: analyticsInsights.type, - priority: analyticsInsights.priority, - changePercent: analyticsInsights.changePercent, - subjectKey: analyticsInsights.subjectKey, - sources: analyticsInsights.sources, - confidence: analyticsInsights.confidence, - impactSummary: analyticsInsights.impactSummary, - metrics: analyticsInsights.metrics, - createdAt: analyticsInsights.createdAt, - }) - .from(analyticsInsights) - .innerJoin(websites, eq(analyticsInsights.websiteId, websites.id)) - .where( - and( - eq(analyticsInsights.organizationId, organizationId), - gte(analyticsInsights.createdAt, cutoff), - isNull(websites.deletedAt) - ) - ) - .orderBy(desc(analyticsInsights.priority)) - .limit(10); - - if (rows.length === 0) { - return null; - } - - return rows.map( - (r): WebsiteInsight => ({ - id: r.id, - websiteId: r.websiteId, - websiteName: r.websiteName, - websiteDomain: r.websiteDomain, - link: `/websites/${r.websiteId}`, - title: r.title, - description: r.description, - suggestion: r.suggestion, - priority: r.priority, - subjectKey: r.subjectKey, - confidence: r.confidence, - ...parseInsightShape(r), - }) - ); -} - -function getRedis() { - try { - return getRedisCache(); - } catch { - return null; - } -} - -async function invalidateInsightsCacheForOrg( - organizationId: string -): Promise { - const redis = getRedis(); - if (!redis) { - return; - } - const pattern = `${CACHE_KEY_PREFIX}:${organizationId}:*`; - let cursor = "0"; - try { - do { - const [nextCursor, keys] = (await redis.scan( - cursor, - "MATCH", - pattern, - "COUNT", - 100 - )) as [string, string[]]; - cursor = nextCursor; - if (keys.length > 0) { - await redis.del(...keys); - } - } while (cursor !== "0"); - - await invalidateInsightsCachesForOrganization(organizationId); - } catch (error) { - useLogger().info("Insights cache invalidation scan failed (best-effort)", { - insights: { organizationId, error }, - }); - } -} - -const NARRATIVE_RATE_LIMIT = 30; -const NARRATIVE_RATE_WINDOW_SECS = 3600; -const NARRATIVE_CACHE_TTL_SECS = 3600; -const NARRATIVE_INSIGHTS_LIMIT = 5; - -const RANGE_WORDS: Record = { - "7d": "week", - "30d": "month", - "90d": "quarter", -}; - -function rangeWord(range: "7d" | "30d" | "90d"): string { - return RANGE_WORDS[range] ?? "quarter"; -} - -function buildDeterministicNarrative( - range: "7d" | "30d" | "90d", - topInsights: { - title: string; - severity: string; - websiteName: string | null; - }[] -): string { - const word = rangeWord(range); - const headline = topInsights[0]; - if (!headline) { - return `All systems healthy this ${word}. No actionable signals detected.`; - } - const siteSuffix = headline.websiteName ? ` on ${headline.websiteName}` : ""; - if (topInsights.length === 1) { - return `This ${word}: ${headline.title}${siteSuffix}.`; - } - const extra = topInsights.length - 1; - return `This ${word}: ${headline.title}${siteSuffix}, plus ${extra} more signal${extra === 1 ? "" : "s"} worth reviewing.`; -} - -const RANGE_TO_DAYS = { "7d": 7, "30d": 30, "90d": 90 } as const; - -const generateNarrativeCached = cacheable( - async function generateNarrativeCached( - organizationId: string, - range: "7d" | "30d" | "90d" - ): Promise<{ narrative: string }> { - const cutoff = dayjs().subtract(RANGE_TO_DAYS[range], "day").toDate(); - - const topInsights = await db - .select({ - title: analyticsInsights.title, - description: analyticsInsights.description, - severity: analyticsInsights.severity, - changePercent: analyticsInsights.changePercent, - websiteName: websites.name, - }) - .from(analyticsInsights) - .innerJoin(websites, eq(analyticsInsights.websiteId, websites.id)) - .where( - and( - eq(analyticsInsights.organizationId, organizationId), - gte(analyticsInsights.createdAt, cutoff), - isNull(websites.deletedAt) - ) - ) - .orderBy(desc(analyticsInsights.priority)) - .limit(NARRATIVE_INSIGHTS_LIMIT); - - if (topInsights.length === 0) { - return { - narrative: `All systems healthy this ${rangeWord(range)}. No actionable signals detected.`, - }; - } - - const insightLines = topInsights.map((ins) => { - const site = ins.websiteName ? ` [${ins.websiteName}]` : ""; - const change = - ins.changePercent == null - ? "" - : ` (${ins.changePercent > 0 ? "+" : ""}${ins.changePercent.toFixed(0)}%)`; - return `- [${ins.severity}] ${ins.title}${change}${site}: ${ins.description ?? ""}`; - }); - - const prompt = `You are an analytics assistant summarizing an organization's state over the last ${range}. - -Write a crisp 2–3 sentence executive summary of the top insights below. - -Rules: -- Lead with the most important change -- Include concrete numbers when available -- Never exceed 60 words total -- State facts, do not editorialize -- If nothing meaningful is happening, say so plainly - -Top signals this ${range}: -${insightLines.join("\n")}`; - - let narrative = ""; - try { - const result = await generateText({ - model: getAILogger().wrap(models.balanced), - prompt, - temperature: 0.2, - maxOutputTokens: 200, - }); - narrative = result.text.trim(); - } catch (error) { - useLogger().warn("Narrative LLM call failed", { - insights: { organizationId, range, error }, - }); - } - - if (!narrative) { - narrative = buildDeterministicNarrative(range, topInsights); - mergeWideEvent({ insights_narrative_fallback: true }); - } - - return { narrative }; - }, - { - expireInSec: NARRATIVE_CACHE_TTL_SECS, - prefix: cacheNamespaces.insightsNarrative, - tags: (_result, organizationId) => [cacheTags.organization(organizationId)], - } -); - -export const insights = new Elysia({ prefix: "/v1/insights" }) - .derive(async ({ request }) => { - const session = await auth.api.getSession({ headers: request.headers }); - return { user: session?.user ?? null, requestHeaders: request.headers }; - }) - .onBeforeHandle(({ user, set }) => { - if (!user) { - mergeWideEvent({ insights_ai_auth: "unauthorized" }); - set.status = 401; - return { - success: false, - error: "Authentication required", - code: "AUTH_REQUIRED", - }; - } - }) - .get( - "/history", - async ({ query, user, set }) => { - const userId = user?.id; - if (!userId) { - return { success: false, error: "User ID required", insights: [] }; - } - - const { organizationId, websiteId: websiteIdFilter } = query; - const limitParsed = Number.parseInt(query.limit ?? "50", 10); - const limit = Number.isFinite(limitParsed) - ? Math.min(Math.max(limitParsed, 1), 100) - : 50; - const offsetParsed = Number.parseInt(query.offset ?? "0", 10); - const offset = Number.isFinite(offsetParsed) - ? Math.max(offsetParsed, 0) - : 0; - - mergeWideEvent({ insights_history_org_id: organizationId }); - - if (!(await userHasOrgAccess(userId, organizationId))) { - mergeWideEvent({ insights_history_access: "denied" }); - set.status = 403; - return { - success: false, - error: "Access denied to this organization", - insights: [], - }; - } - - const whereClause = websiteIdFilter - ? and( - eq(analyticsInsights.organizationId, organizationId), - eq(analyticsInsights.websiteId, websiteIdFilter), - isNull(websites.deletedAt) - ) - : and( - eq(analyticsInsights.organizationId, organizationId), - isNull(websites.deletedAt) - ); - - const rows = await db - .select({ - id: analyticsInsights.id, - runId: analyticsInsights.runId, - websiteId: analyticsInsights.websiteId, - websiteName: websites.name, - websiteDomain: websites.domain, - title: analyticsInsights.title, - description: analyticsInsights.description, - suggestion: analyticsInsights.suggestion, - severity: analyticsInsights.severity, - sentiment: analyticsInsights.sentiment, - type: analyticsInsights.type, - priority: analyticsInsights.priority, - changePercent: analyticsInsights.changePercent, - subjectKey: analyticsInsights.subjectKey, - sources: analyticsInsights.sources, - confidence: analyticsInsights.confidence, - impactSummary: analyticsInsights.impactSummary, - metrics: analyticsInsights.metrics, - createdAt: analyticsInsights.createdAt, - currentPeriodFrom: analyticsInsights.currentPeriodFrom, - currentPeriodTo: analyticsInsights.currentPeriodTo, - previousPeriodFrom: analyticsInsights.previousPeriodFrom, - previousPeriodTo: analyticsInsights.previousPeriodTo, - timezone: analyticsInsights.timezone, - }) - .from(analyticsInsights) - .innerJoin(websites, eq(analyticsInsights.websiteId, websites.id)) - .where(whereClause) - .orderBy(desc(analyticsInsights.createdAt)) - .limit(limit) - .offset(offset); - - const insights = rows.map((r) => ({ - id: r.id, - runId: r.runId, - websiteId: r.websiteId, - websiteName: r.websiteName, - websiteDomain: r.websiteDomain, - link: `/websites/${r.websiteId}`, - title: r.title, - description: r.description, - suggestion: r.suggestion, - priority: r.priority, - subjectKey: r.subjectKey, - confidence: r.confidence, - ...parseInsightShape(r), - createdAt: r.createdAt.toISOString(), - currentPeriodFrom: r.currentPeriodFrom, - currentPeriodTo: r.currentPeriodTo, - previousPeriodFrom: r.previousPeriodFrom, - previousPeriodTo: r.previousPeriodTo, - timezone: r.timezone, - })); - - return { - success: true, - insights, - hasMore: rows.length === limit, - }; - }, - { - query: t.Object({ - organizationId: t.String(), - limit: t.Optional(t.String()), - offset: t.Optional(t.String()), - websiteId: t.Optional(t.String()), - }), - } - ) - .get( - "/org-narrative", - async ({ query, user, set }) => { - const userId = user?.id; - if (!userId) { - return { success: false, error: "User ID required" }; - } - - const { organizationId, range } = query; - mergeWideEvent({ - insights_narrative_org_id: organizationId, - insights_narrative_range: range, - }); - - if (!(await userHasOrgAccess(userId, organizationId))) { - mergeWideEvent({ insights_narrative_access: "denied" }); - set.status = 403; - return { success: false, error: "Access denied to this organization" }; - } - - const rl = await ratelimit( - `insights:narrative:${organizationId}:${userId}`, - NARRATIVE_RATE_LIMIT, - NARRATIVE_RATE_WINDOW_SECS - ); - const rlHeaders = getRateLimitHeaders(rl); - for (const [key, value] of Object.entries(rlHeaders)) { - set.headers[key] = value; - } - if (!rl.success) { - set.status = 429; - return { - success: false, - error: "Rate limit exceeded. Try again later.", - }; - } - - try { - const { narrative } = await generateNarrativeCached( - organizationId, - range - ); - return { - success: true, - narrative, - generatedAt: new Date().toISOString(), - }; - } catch (error) { - captureError(error, { - insights_narrative_org_id: organizationId, - insights_narrative_range: range, - }); - useLogger().warn("Failed to generate org narrative", { - insights: { organizationId, range, error }, - }); - set.status = 500; - return { success: false, error: "Could not generate narrative" }; - } - }, - { - query: t.Object({ - organizationId: t.String(), - range: t.Union([t.Literal("7d"), t.Literal("30d"), t.Literal("90d")]), - }), - } - ) - .post( - "/clear", - async ({ body, user, set }) => { - const userId = user?.id; - if (!userId) { - return { success: false, error: "User ID required", deleted: 0 }; - } - - const { organizationId } = body; - mergeWideEvent({ insights_clear_org_id: organizationId }); - - if (!(await userIsOrgAdmin(userId, organizationId))) { - set.status = 403; - return { - success: false, - error: "Owner or admin role required to clear insights", - deleted: 0, - }; - } - - const idRows = await db - .select({ id: analyticsInsights.id }) - .from(analyticsInsights) - .where(eq(analyticsInsights.organizationId, organizationId)); - - const ids = idRows.map((r) => r.id); - - if (ids.length > 0) { - await db - .delete(insightUserFeedback) - .where( - and( - eq(insightUserFeedback.organizationId, organizationId), - inArray(insightUserFeedback.insightId, ids) - ) - ); - await db - .delete(analyticsInsights) - .where(eq(analyticsInsights.organizationId, organizationId)); - } - - await invalidateInsightsCacheForOrg(organizationId); - await invalidateAgentContextSnapshotsForOwner(organizationId); - mergeWideEvent({ insights_cleared: ids.length }); - - return { success: true, deleted: ids.length }; - }, - { - body: t.Object({ - organizationId: t.String(), - }), - } - ) - .post( - "/ai", - async ({ body, user, set, requestHeaders }) => { - const userId = user?.id; - if (!userId) { - mergeWideEvent({ insights_ai_error: "missing_user_id" }); - return { success: false, error: "User ID required", insights: [] }; - } - - const { organizationId, timezone = "UTC" } = body; - mergeWideEvent({ - insights_org_id: organizationId, - insights_timezone: timezone, - }); - - if (!(await userHasOrgAccess(userId, organizationId))) { - mergeWideEvent({ insights_access: "denied" }); - set.status = 403; - return { - success: false, - error: "Access denied to this organization", - insights: [], - }; - } - - const redis = getRedis(); - const cacheKey = `${CACHE_KEY_PREFIX}:${organizationId}:${timezone}`; - - if (redis) { - try { - const cached = await redis.get(cacheKey); - if (cached) { - mergeWideEvent({ insights_cache: "hit" }); - const payload = JSON.parse(cached) as InsightsPayload; - return { success: true, ...payload }; - } - } catch (error) { - useLogger().info( - "Insights cache read failed; continuing without cache", - { - insights: { error }, - } - ); - } - } - - mergeWideEvent({ insights_cache: "miss" }); - - const recentInsights = await getRecentInsightsFromDb(organizationId); - if (recentInsights) { - mergeWideEvent({ - insights_returned: recentInsights.length, - insights_source: "db_cooldown", - }); - const payload: InsightsPayload = { - insights: recentInsights, - source: "ai", - }; - tryCacheSet(redis, cacheKey, CACHE_TTL, payload); - return { success: true, ...payload }; - } - - const orgSites = await db.query.websites.findMany({ - where: { organizationId, deletedAt: { isNull: true } }, - columns: { id: true, name: true, domain: true }, - }); - - if (orgSites.length === 0) { - mergeWideEvent({ insights_websites: 0 }); - return { success: true, insights: [], source: "ai" }; - } - - try { - const period = getWeekOverWeekPeriod(); - const dedupeKeyToId = - await fetchInsightDedupeKeyToIdMap(organizationId); - const groups = await processInBatches( - orgSites.slice(0, MAX_WEBSITES), - async (site: { id: string; name: string | null; domain: string }) => { - const results = await analyzeWebsite( - organizationId, - userId, - site.id, - site.domain, - timezone, - period, - orgSites, - requestHeaders - ); - return results.map( - (insight): WebsiteInsight => ({ - ...insight, - id: crypto.randomUUID(), - websiteId: site.id, - websiteName: site.name, - websiteDomain: site.domain, - link: buildInsightLink(site.id, insight), - }) - ); - }, - CONCURRENCY - ); - - const merged = groups.flat().sort((a, b) => b.priority - a.priority); - const seenInBatch = new Set(); - const sorted: WebsiteInsight[] = []; - for (const insight of merged) { - const key = dedupeKeyFor(insight); - if (seenInBatch.has(key)) { - continue; - } - seenInBatch.add(key); - const existingId = dedupeKeyToId.get(key); - sorted.push(existingId ? { ...insight, id: existingId } : insight); - if (sorted.length >= TOP_INSIGHTS_LIMIT) { - break; - } - } - - const runId = crypto.randomUUID(); - let finalInsights: WebsiteInsight[] = sorted; - if (sorted.length > 0) { - const toInsert = sorted - .filter((insight) => { - const existingId = dedupeKeyToId.get(dedupeKeyFor(insight)); - return !(existingId && insight.id === existingId); - }) - .map((insight) => ({ - id: insight.id, - organizationId, - websiteId: insight.websiteId, - runId, - title: insight.title, - description: insight.description, - suggestion: insight.suggestion, - severity: insight.severity, - sentiment: insight.sentiment, - type: insight.type, - priority: insight.priority, - changePercent: insight.changePercent ?? null, - subjectKey: insight.subjectKey, - sources: insight.sources, - confidence: insight.confidence, - impactSummary: insight.impactSummary ?? null, - metrics: insight.metrics.length > 0 ? insight.metrics : null, - timezone, - currentPeriodFrom: period.current.from, - currentPeriodTo: period.current.to, - previousPeriodFrom: period.previous.from, - previousPeriodTo: period.previous.to, - })); - - const updatePayload = { - runId, - timezone, - currentPeriodFrom: period.current.from, - currentPeriodTo: period.current.to, - previousPeriodFrom: period.previous.from, - previousPeriodTo: period.previous.to, - createdAt: new Date(), - }; - - try { - if (toInsert.length > 0) { - await db.insert(analyticsInsights).values(toInsert); - } - const toRefresh = sorted.filter((insight) => { - const existingId = dedupeKeyToId.get(dedupeKeyFor(insight)); - return existingId !== undefined && insight.id === existingId; - }); - await Promise.all( - toRefresh.map((insight) => - db - .update(analyticsInsights) - .set({ - ...updatePayload, - title: insight.title, - description: insight.description, - suggestion: insight.suggestion, - severity: insight.severity, - sentiment: insight.sentiment, - type: insight.type, - priority: insight.priority, - changePercent: insight.changePercent ?? null, - subjectKey: insight.subjectKey, - sources: insight.sources, - confidence: insight.confidence, - impactSummary: insight.impactSummary ?? null, - metrics: - insight.metrics.length > 0 ? insight.metrics : null, - }) - .where(eq(analyticsInsights.id, insight.id)) - ) - ); - } catch (error) { - useLogger().warn("Failed to persist analytics insights", { - insights: { organizationId, error }, - }); - finalInsights = []; - mergeWideEvent({ insights_persist_failed: true }); - } - - if (finalInsights.length > 0) { - await invalidateInsightsCacheForOrg(organizationId); - } - - await Promise.all( - [...new Set(finalInsights.map((insight) => insight.websiteId))].map( - (websiteId) => - invalidateAgentContextSnapshotsForWebsite(websiteId) - ) - ); - } - - for (const site of orgSites.slice(0, MAX_WEBSITES)) { - const siteInsights = finalInsights.filter( - (s) => s.websiteId === site.id - ); - if (siteInsights.length > 0) { - const summary = siteInsights - .map( - (s) => - `[${s.severity}] ${s.title}: ${s.description} Suggestion: ${s.suggestion}` - ) - .join("\n"); - storeAnalyticsSummary( - `Weekly insights for ${site.domain} (${dayjs().format("YYYY-MM-DD")}):\n${summary}`, - site.id, - { period: "weekly" } - ).catch((error: unknown) => { - useLogger().warn("Failed to store analytics summary", { - insights: { websiteId: site.id, error }, - }); - }); - } - } - - const payload: InsightsPayload = { - insights: finalInsights, - source: "ai", - }; - - tryCacheSet( - redis, - cacheKey, - finalInsights.length > 0 ? CACHE_TTL : NEGATIVE_CACHE_TTL, - payload - ); - - mergeWideEvent({ - insights_returned: finalInsights.length, - insights_source: "ai", - }); - return { success: true, ...payload }; - } catch (error) { - mergeWideEvent({ insights_error: true }); - useLogger().error( - error instanceof Error ? error : new Error(String(error)), - { insights: { organizationId } } - ); - return { success: false, insights: [], source: "fallback" }; - } - }, - { - body: t.Object({ - organizationId: t.String(), - timezone: t.Optional(t.String()), - }), - idleTimeout: 240_000, - } - ); diff --git a/apps/dashboard/app/(main)/insights/_components/insight-generation-settings.tsx b/apps/dashboard/app/(main)/insights/_components/insight-generation-settings.tsx new file mode 100644 index 000000000..2a6a6579c --- /dev/null +++ b/apps/dashboard/app/(main)/insights/_components/insight-generation-settings.tsx @@ -0,0 +1,546 @@ +"use client"; + +import { useMutation, useQuery, useQueryClient } from "@tanstack/react-query"; +import { useEffect, useMemo, useState } from "react"; +import { toast } from "sonner"; +import { insightQueries } from "@/lib/insight-api"; +import { orpc } from "@/lib/orpc"; +import { FloppyDiskIcon, GearIcon, MediaPlayIcon } from "@databuddy/ui/icons"; +import { + Badge, + Button, + Card, + Field, + Input, + Skeleton, + guessTimezone, +} from "@databuddy/ui"; +import { Checkbox, Select, Switch } from "@databuddy/ui/client"; + +type Depth = "light" | "standard" | "deep"; +type Frequency = "hourly" | "daily" | "weekly" | "custom"; +type ModelTier = "fast" | "balanced" | "deep"; +type ToolName = + | "web_metrics" + | "product_metrics" + | "ops_context" + | "business_context"; + +interface WebsiteOption { + domain: string; + id: string; + name: string | null; +} + +interface ConfigFormState { + allowedTools: ToolName[]; + cooldownHours: string; + cron: string; + depth: Depth; + enabled: boolean; + frequency: Frequency; + lookbackDays: string; + maxInsightsPerWebsite: string; + maxSteps: string; + maxToolCalls: string; + modelTier: ModelTier; + timezone: string; +} + +interface InsightGenerationSettingsProps { + organizationId?: string; + websites: WebsiteOption[]; +} + +const DEFAULT_FORM: ConfigFormState = { + allowedTools: ["web_metrics", "product_metrics", "ops_context"], + cooldownHours: "6", + cron: "", + depth: "standard", + enabled: true, + frequency: "weekly", + lookbackDays: "7", + maxInsightsPerWebsite: "3", + maxSteps: "24", + maxToolCalls: "16", + modelTier: "balanced", + timezone: "UTC", +}; + +const TOOL_OPTIONS: { label: string; value: ToolName }[] = [ + { label: "Web", value: "web_metrics" }, + { label: "Product", value: "product_metrics" }, + { label: "Ops", value: "ops_context" }, + { label: "Business", value: "business_context" }, +]; + +export function InsightGenerationSettings({ + organizationId, + websites, +}: InsightGenerationSettingsProps) { + const queryClient = useQueryClient(); + const [scope, setScope] = useState("organization"); + const websiteId = scope === "organization" ? null : scope; + const [form, setForm] = useState(DEFAULT_FORM); + + const configQuery = useQuery({ + ...orpc.insightGeneration.getConfig.queryOptions({ + input: { + organizationId, + websiteId: websiteId ?? undefined, + }, + }), + enabled: !!organizationId, + }); + + useEffect(() => { + const config = configQuery.data; + if (!config) { + return; + } + setForm({ + allowedTools: normalizeTools(config.allowedTools as ToolName[]), + cooldownHours: String(config.cooldownHours), + cron: config.cron ?? "", + depth: config.depth as Depth, + enabled: config.enabled, + frequency: config.frequency as Frequency, + lookbackDays: String(config.lookbackDays), + maxInsightsPerWebsite: String(config.maxInsightsPerWebsite), + maxSteps: String(config.maxSteps), + maxToolCalls: String(config.maxToolCalls), + modelTier: config.modelTier as ModelTier, + timezone: config.timezone || guessTimezone(), + }); + }, [configQuery.data]); + + const selectedScopeLabel = useMemo(() => { + if (scope === "organization") { + return "Organization"; + } + const website = websites.find((item) => item.id === scope); + return website?.name || website?.domain || "Website"; + }, [scope, websites]); + + const saveMutation = useMutation({ + ...orpc.insightGeneration.upsertConfig.mutationOptions(), + onSuccess: async () => { + toast.success("Insights settings saved"); + await invalidateInsightGenerationQueries(queryClient, organizationId); + }, + onError: (error) => { + toast.error( + error instanceof Error ? error.message : "Could not save settings" + ); + }, + }); + + const triggerMutation = useMutation({ + ...orpc.insightGeneration.triggerRun.mutationOptions(), + onSuccess: async (data) => { + toast.success( + data.status === "queued" + ? `Queued ${data.queuedItems} insight job${data.queuedItems === 1 ? "" : "s"}` + : "No websites available to run" + ); + await invalidateInsightGenerationQueries(queryClient, organizationId); + }, + onError: (error) => { + toast.error( + error instanceof Error ? error.message : "Could not start run" + ); + }, + }); + + const isBusy = + configQuery.isLoading || + saveMutation.isPending || + triggerMutation.isPending; + + const patch = () => ({ + ...formToPatch(form), + organizationId, + websiteId: websiteId ?? undefined, + }); + + return ( + + +
+
+ + Controls + {configQuery.data?.source && ( + + {configQuery.data.source} + + )} +
+ {selectedScopeLabel} +
+
+ +
+
+ + + {configQuery.isLoading ? ( +
+ {Array.from({ length: 8 }).map((_, index) => ( + + ))} +
+ ) : ( + <> +
+ + setForm((current) => ({ + ...current, + enabled: Boolean(value), + })) + } + /> + + + Frequency + + + + + Depth + + + + + Model + + +
+ +
+ + setForm((current) => ({ ...current, lookbackDays: value })) + } + suffix="days" + value={form.lookbackDays} + /> + + setForm((current) => ({ + ...current, + maxInsightsPerWebsite: value, + })) + } + suffix="per site" + value={form.maxInsightsPerWebsite} + /> + + setForm((current) => ({ ...current, maxSteps: value })) + } + value={form.maxSteps} + /> + + setForm((current) => ({ ...current, maxToolCalls: value })) + } + value={form.maxToolCalls} + /> +
+ +
+ + setForm((current) => ({ ...current, cooldownHours: value })) + } + suffix="hours" + value={form.cooldownHours} + /> + + Cron + + setForm((current) => ({ + ...current, + cron: event.target.value, + })) + } + placeholder="0 9 * * 1" + value={form.cron} + /> + + + Timezone + + setForm((current) => ({ + ...current, + timezone: event.target.value, + })) + } + value={form.timezone} + /> + +
+ +
+ {TOOL_OPTIONS.map((tool) => ( + + setForm((current) => ({ + ...current, + allowedTools: toggleTool( + current.allowedTools, + tool.value, + Boolean(checked) + ), + })) + } + /> + ))} +
+ + )} +
+ + + + + +
+ ); +} + +function NumberField({ + disabled, + label, + max, + min, + onChange, + suffix, + value, +}: { + disabled: boolean; + label: string; + max: number; + min: number; + onChange: (value: string) => void; + suffix?: string; + value: string; +}) { + return ( + + {label} + onChange(event.target.value)} + suffix={suffix} + type="number" + value={value} + /> + + ); +} + +function normalizeTools(tools: ToolName[]): ToolName[] { + const unique = new Set(tools); + unique.add("web_metrics"); + return TOOL_OPTIONS.map((tool) => tool.value).filter((tool) => + unique.has(tool) + ); +} + +function toggleTool( + current: ToolName[], + tool: ToolName, + enabled: boolean +): ToolName[] { + if (tool === "web_metrics") { + return normalizeTools(current); + } + const next = new Set(current); + if (enabled) { + next.add(tool); + } else { + next.delete(tool); + } + next.add("web_metrics"); + return normalizeTools([...next]); +} + +function boundedInt( + value: string, + fallback: number, + min: number, + max: number +): number { + const parsed = Number.parseInt(value, 10); + if (!Number.isSafeInteger(parsed)) { + return fallback; + } + return Math.max(min, Math.min(max, parsed)); +} + +function formToPatch(form: ConfigFormState) { + return { + allowedTools: normalizeTools(form.allowedTools), + cooldownHours: boundedInt(form.cooldownHours, 6, 1, 168), + cron: form.frequency === "custom" ? form.cron.trim() || null : null, + depth: form.depth, + enabled: form.enabled, + frequency: form.frequency, + lookbackDays: boundedInt(form.lookbackDays, 7, 1, 90), + maxInsightsPerWebsite: boundedInt(form.maxInsightsPerWebsite, 3, 1, 10), + maxSteps: boundedInt(form.maxSteps, 24, 1, 64), + maxToolCalls: boundedInt(form.maxToolCalls, 16, 1, 64), + modelTier: form.modelTier, + timezone: form.timezone.trim() || guessTimezone(), + }; +} + +async function invalidateInsightGenerationQueries( + queryClient: ReturnType, + organizationId?: string +) { + await Promise.all([ + queryClient.invalidateQueries({ queryKey: orpc.insightGeneration.key() }), + queryClient.invalidateQueries({ queryKey: insightQueries.all() }), + organizationId + ? queryClient.invalidateQueries({ + queryKey: insightQueries.ai(organizationId).queryKey, + }) + : Promise.resolve(), + ]); +} diff --git a/apps/dashboard/app/(main)/insights/_components/insight-generation-status.tsx b/apps/dashboard/app/(main)/insights/_components/insight-generation-status.tsx new file mode 100644 index 000000000..295cda3b9 --- /dev/null +++ b/apps/dashboard/app/(main)/insights/_components/insight-generation-status.tsx @@ -0,0 +1,259 @@ +"use client"; + +import { useQuery } from "@tanstack/react-query"; +import { useMemo } from "react"; +import type { InsightsAiResponse } from "@/lib/insight-api"; +import { orpc } from "@/lib/orpc"; +import { + ArrowClockwiseIcon, + CheckCircleIcon, + ClockIcon, + WarningCircleIcon, +} from "@databuddy/ui/icons"; +import { + Badge, + Card, + Progress, + Skeleton, + StatusDot, + dayjs, +} from "@databuddy/ui"; + +type RunStatus = + | "queued" + | "running" + | "succeeded" + | "partially_succeeded" + | "failed" + | "skipped"; + +interface InsightRunSummary { + completedItems: number; + createdAt: string | Date; + failedItems: number; + finishedAt: string | Date | null; + id: string; + reason: "manual" | "scheduled" | "cooldown_refresh"; + skippedItems: number; + startedAt: string | Date | null; + status: RunStatus; + totalItems: number; + updatedAt: string | Date; +} + +interface InsightGenerationStatusProps { + generation?: InsightsAiResponse["generation"]; + organizationId?: string; +} + +const ACTIVE_STATUSES = new Set(["queued", "running"]); + +export function InsightGenerationStatus({ + generation, + organizationId, +}: InsightGenerationStatusProps) { + const runsQuery = useQuery({ + ...orpc.insightGeneration.listRuns.queryOptions({ + input: { organizationId, limit: 5 }, + }), + enabled: !!organizationId, + refetchInterval: (query) => { + const latest = ( + query.state.data as { runs?: InsightRunSummary[] } | undefined + )?.runs?.[0]; + return latest && ACTIVE_STATUSES.has(latest.status) ? 2500 : false; + }, + refetchOnWindowFocus: false, + }); + + const latestRun = useMemo( + () => (runsQuery.data?.runs?.[0] as InsightRunSummary | undefined) ?? null, + [runsQuery.data?.runs] + ); + const activeQueuedRun: + | { queuedItems?: number; runId: string; status: "queued" } + | undefined = + generation?.status === "queued" && generation.runId + ? { + queuedItems: generation.queuedItems, + runId: generation.runId, + status: "queued", + } + : undefined; + + const status = latestRun?.status ?? activeQueuedRun?.status ?? "skipped"; + const progress = + latestRun && latestRun.totalItems > 0 + ? ((latestRun.completedItems + + latestRun.failedItems + + latestRun.skippedItems) / + latestRun.totalItems) * + 100 + : activeQueuedRun?.queuedItems + ? 2 + : 0; + + return ( + + +
+ +
+ Generation + {statusLabel(status)} +
+
+ +
+ + + {runsQuery.isLoading ? ( +
+ + +
+ ) : latestRun ? ( + <> + +
+ + + + +
+ + ) : activeQueuedRun ? ( + <> + +

+ Run {shortId(activeQueuedRun.runId)} queued with{" "} + {activeQueuedRun.queuedItems ?? 0} item + {activeQueuedRun.queuedItems === 1 ? "" : "s"}. +

+ + ) : ( +

No runs yet.

+ )} +
+
+ ); +} + +function StatusBadge({ status }: { status: RunStatus | "skipped" }) { + const variant = + status === "failed" + ? "destructive" + : status === "partially_succeeded" + ? "warning" + : status === "succeeded" + ? "success" + : ACTIVE_STATUSES.has(status as RunStatus) + ? "primary" + : "muted"; + const Icon = + status === "failed" + ? WarningCircleIcon + : status === "succeeded" + ? CheckCircleIcon + : ACTIVE_STATUSES.has(status as RunStatus) + ? ArrowClockwiseIcon + : undefined; + + return ( + + {Icon ? ( + + ) : ( + + )} + {status.replace("_", " ")} + + ); +} + +function Stat({ label, value }: { label: string; value: string }) { + return ( +
+

{label}

+

+ {value} +

+
+ ); +} + +function settledItems(run: InsightRunSummary): number { + return run.completedItems + run.failedItems + run.skippedItems; +} + +function progressTone( + status: RunStatus +): "primary" | "warning" | "destructive" | "success" { + if (status === "failed") { + return "destructive"; + } + if (status === "partially_succeeded") { + return "warning"; + } + if (status === "succeeded") { + return "success"; + } + return "primary"; +} + +function statusLabel(status: RunStatus | "skipped"): string { + if (status === "queued") { + return "Queued"; + } + if (status === "running") { + return "Running"; + } + if (status === "succeeded") { + return "Complete"; + } + if (status === "partially_succeeded") { + return "Completed with failures"; + } + if (status === "failed") { + return "Failed"; + } + return "Idle"; +} + +function formatRelative(value: string | Date | null): string { + if (!value) { + return "Never"; + } + return `${dayjs(value).fromNow(true)} ago`; +} + +function shortId(id: string): string { + return id.slice(0, 8); +} diff --git a/apps/dashboard/app/(main)/insights/_components/insights-page-content.tsx b/apps/dashboard/app/(main)/insights/_components/insights-page-content.tsx index 67d8c1e71..d025b8a81 100644 --- a/apps/dashboard/app/(main)/insights/_components/insights-page-content.tsx +++ b/apps/dashboard/app/(main)/insights/_components/insights-page-content.tsx @@ -17,6 +17,8 @@ import { orpc } from "@/lib/orpc"; import { cn } from "@/lib/utils"; import { CockpitNarrative } from "./cockpit-narrative"; import { CockpitSignals } from "./cockpit-signals"; +import { InsightGenerationSettings } from "./insight-generation-settings"; +import { InsightGenerationStatus } from "./insight-generation-status"; import { TimeRangeSelector } from "./time-range-selector"; import { ArrowClockwiseIcon, GlobeIcon, TrashIcon } from "@databuddy/ui/icons"; import { DeleteDialog } from "@databuddy/ui/client"; @@ -28,7 +30,8 @@ export function InsightsPageContent() { useOrganizationsContext(); const orgId = activeOrganization?.id ?? activeOrganizationId ?? undefined; - const { insights, isLoading, isRefreshing, refetch } = useInsightsFeed(); + const { insights, generation, isLoading, isRefreshing, refetch } = + useInsightsFeed(); const { websites, isLoading: websitesLoading } = useWebsitesLight(); @@ -132,6 +135,16 @@ export function InsightsPageContent() { ) : (
+
+ + +
diff --git a/apps/dashboard/app/(main)/insights/hooks/use-insights-feed.ts b/apps/dashboard/app/(main)/insights/hooks/use-insights-feed.ts index dde577d95..6d0c5639d 100644 --- a/apps/dashboard/app/(main)/insights/hooks/use-insights-feed.ts +++ b/apps/dashboard/app/(main)/insights/hooks/use-insights-feed.ts @@ -99,6 +99,7 @@ export function useInsightsFeed() { return { insights: mergedInsights, + generation: aiQuery.data?.generation, source: aiQuery.data?.source ?? null, isLoading: isInitialLoading, isRefreshing, diff --git a/apps/dashboard/lib/insight-api.ts b/apps/dashboard/lib/insight-api.ts index 60456927f..a39ba0697 100644 --- a/apps/dashboard/lib/insight-api.ts +++ b/apps/dashboard/lib/insight-api.ts @@ -1,4 +1,3 @@ -import { publicConfig } from "@databuddy/env/public"; import { infiniteQueryOptions, keepPreviousData, @@ -6,8 +5,7 @@ import { } from "@tanstack/react-query"; import { guessTimezone } from "@databuddy/ui"; import type { HistoryInsightRow, Insight } from "@/lib/insight-types"; - -const API_URL = publicConfig.urls.api; +import { orpc } from "@/lib/orpc"; export const INSIGHT_CACHE = { staleTime: 15 * 60 * 1000, @@ -71,6 +69,11 @@ export const insightQueries = { }; export interface InsightsAiResponse { + generation?: { + queuedItems?: number; + runId?: string; + status: "queued" | "skipped" | "unavailable"; + }; insights: Insight[]; source: "ai" | "fallback"; success: boolean; @@ -82,53 +85,25 @@ export interface InsightsHistoryPage { success: boolean; } -export async function fetchInsightsAi( +export function fetchInsightsAi( organizationId: string ): Promise { - const res = await fetch(`${API_URL}/v1/insights/ai`, { - method: "POST", - credentials: "include", - headers: { "Content-Type": "application/json" }, - body: JSON.stringify({ organizationId, timezone: guessTimezone() }), - signal: AbortSignal.timeout(90_000), - }); - - if (!res.ok) { - throw new Error(`Insights request failed: ${res.status}`); - } - - const data = (await res.json()) as InsightsAiResponse; - - if (!data.success) { - throw new Error("Insights response unsuccessful"); - } - - return data; + return orpc.insights.feed.call({ + organizationId, + timezone: guessTimezone(), + }) as Promise; } -export async function fetchInsightsHistoryPage( +export function fetchInsightsHistoryPage( organizationId: string, offset: number, limit = 50 ): Promise { - const params = new URLSearchParams({ + return orpc.insights.history.call({ organizationId, - limit: String(limit), - offset: String(offset), - }); - const res = await fetch( - `${API_URL}/v1/insights/history?${params.toString()}`, - { - credentials: "include", - signal: AbortSignal.timeout(30_000), - } - ); - - if (!res.ok) { - throw new Error(`Insights history failed: ${res.status}`); - } - - return (await res.json()) as InsightsHistoryPage; + limit, + offset, + }) as Promise; } export interface ClearInsightsResponse { @@ -137,24 +112,12 @@ export interface ClearInsightsResponse { success: boolean; } -export async function clearInsightsHistory( +export function clearInsightsHistory( organizationId: string ): Promise { - const res = await fetch(`${API_URL}/v1/insights/clear`, { - method: "POST", - credentials: "include", - headers: { "Content-Type": "application/json" }, - body: JSON.stringify({ organizationId }), - signal: AbortSignal.timeout(30_000), - }); - - const data = (await res.json()) as ClearInsightsResponse; - - if (!res.ok) { - throw new Error(data.error ?? `Clear insights failed: ${res.status}`); - } - - return data; + return orpc.insights.clearHistory.call({ + organizationId, + }) as Promise; } export type OrgNarrativeResponse = @@ -168,20 +131,12 @@ export type OrgNarrativeResponse = error: string; }; -export async function fetchInsightsOrgNarrative( +export function fetchInsightsOrgNarrative( organizationId: string, range: "7d" | "30d" | "90d" ): Promise { - const url = new URL(`${API_URL}/v1/insights/org-narrative`); - url.searchParams.set("organizationId", organizationId); - url.searchParams.set("range", range); - const res = await fetch(url.toString(), { - method: "GET", - credentials: "include", - signal: AbortSignal.timeout(30_000), - }); - if (!res.ok) { - return { success: false, error: `HTTP ${res.status}` }; - } - return (await res.json()) as OrgNarrativeResponse; + return orpc.insights.orgNarrative.call({ + organizationId, + range, + }) as Promise; } diff --git a/apps/dashboard/lib/insight-signal-key.ts b/apps/dashboard/lib/insight-signal-key.ts index 0a8587520..6a3267a69 100644 --- a/apps/dashboard/lib/insight-signal-key.ts +++ b/apps/dashboard/lib/insight-signal-key.ts @@ -26,7 +26,7 @@ function directionFromParts( return "flat"; } -/** Matches server `insightDedupeKey` in apps/api/src/routes/insights.ts */ +/** Matches server `insightDedupeKey` in @databuddy/ai/insights/dedupe. */ export function insightSignalDedupeKey(insight: { websiteId: string; type: InsightType; diff --git a/apps/insights/package.json b/apps/insights/package.json new file mode 100644 index 000000000..0c1903102 --- /dev/null +++ b/apps/insights/package.json @@ -0,0 +1,27 @@ +{ + "name": "@databuddy/insights", + "private": true, + "version": "1.0.0", + "type": "module", + "scripts": { + "dev": "bun --watch run src/index.ts", + "test": "bun test src", + "test:integration": "INSIGHTS_INTEGRATION_TESTS=true bun test src/scheduler.integration.test.ts src/idempotency.integration.test.ts", + "check-types": "tsc --noEmit" + }, + "dependencies": { + "@databuddy/ai": "workspace:*", + "@databuddy/db": "workspace:*", + "@databuddy/redis": "workspace:*", + "@databuddy/rpc": "workspace:*", + "ai": "^6.0.154", + "bullmq": "^5.66.5", + "dayjs": "^1.11.19", + "elysia": "catalog:", + "evlog": "catalog:" + }, + "devDependencies": { + "@databuddy/test": "workspace:*" + }, + "packageManager": "bun@1.3.14" +} diff --git a/apps/insights/src/generation.ts b/apps/insights/src/generation.ts new file mode 100644 index 000000000..5e4e73a10 --- /dev/null +++ b/apps/insights/src/generation.ts @@ -0,0 +1,975 @@ +import type { AppContext } from "@databuddy/ai/config/context"; +import { ANTHROPIC_CACHE_1H, models } from "@databuddy/ai/config/models"; +import { insightDedupeKey } from "@databuddy/ai/insights/dedupe"; +import { + fetchWebPeriodData, + hasWebInsightData, +} from "@databuddy/ai/insights/fetch-context"; +import { formatLegacyWebDataForPrompt } from "@databuddy/ai/insights/normalize"; +import type { + InsightMetricRow, + WeekOverWeekPeriod, +} from "@databuddy/ai/insights/types"; +import { validateInsights } from "@databuddy/ai/insights/validate"; +import { getAILogger } from "@databuddy/ai/lib/ai-logger"; +import { storeAnalyticsSummary } from "@databuddy/ai/lib/supermemory"; +import type { ParsedInsight } from "@databuddy/ai/schemas/smart-insights-output"; +import { insightsOutputSchema } from "@databuddy/ai/schemas/smart-insights-output"; +import { createInsightsAgentTools } from "@databuddy/ai/tools/insights-agent-tools"; +import { and, db, desc, eq, gte, isNotNull, isNull, sql } from "@databuddy/db"; +import { + analyticsInsights, + annotations, + type InsightGenerationConfigSnapshot, + type InsightGenerationTool, + websites, +} from "@databuddy/db/schema"; +import { + invalidateAgentContextSnapshotsForWebsite, + invalidateInsightsCachesForOrganization, +} from "@databuddy/redis"; +import { generateText, Output, stepCountIs, ToolLoopAgent } from "ai"; +import { randomUUIDv7 } from "bun"; +import dayjs from "dayjs"; +import { log } from "evlog"; + +const LEGACY_TIMEOUT_MS = 60_000; +const AGENT_TIMEOUT_MS = 120_000; +const RECENT_INSIGHTS_PROMPT_LIMIT = 12; +const DEFAULT_MAX_INSIGHTS = 3; +const TOOL_NAMES = [ + "web_metrics", + "product_metrics", + "ops_context", + "business_context", +] as const satisfies readonly InsightGenerationTool[]; + +interface OrgWebsiteRow { + domain: string; + id: string; + name: string | null; +} + +interface GeneratedWebsiteInsight extends ParsedInsight { + id: string; + websiteDomain: string; + websiteId: string; + websiteName: string | null; +} + +export interface GenerateWebsiteInsightsInput { + config: InsightGenerationConfigSnapshot; + organizationId: string; + reason: string; + requestedByUserId: string | null; + runId: string; + websiteId: string; +} + +export interface GenerateWebsiteInsightsResult { + insightIds: string[]; + message?: string; + resultCount: number; + status: "skipped" | "succeeded"; +} + +function errorMessage(error: unknown): string { + return error instanceof Error ? error.message : String(error); +} + +function maxInsights(config: InsightGenerationConfigSnapshot): number { + return Math.max( + 1, + Math.min(10, config.maxInsightsPerWebsite || DEFAULT_MAX_INSIGHTS) + ); +} + +function promptLookbackDays(config: InsightGenerationConfigSnapshot): number { + return Math.max(14, Math.min(180, config.lookbackDays * 2)); +} + +function getComparisonPeriod(lookbackDays: number): WeekOverWeekPeriod { + const days = Math.max(1, Math.min(90, lookbackDays)); + const now = dayjs(); + return { + current: { + from: now.subtract(days, "day").format("YYYY-MM-DD"), + to: now.format("YYYY-MM-DD"), + }, + previous: { + from: now.subtract(days * 2, "day").format("YYYY-MM-DD"), + to: now.subtract(days, "day").format("YYYY-MM-DD"), + }, + }; +} + +function modelForTier(tier: InsightGenerationConfigSnapshot["modelTier"]) { + if (tier === "fast") { + return models.quick; + } + if (tier === "deep") { + return models.deep; + } + return models.balanced; +} + +function normalizeAllowedTools( + tools: InsightGenerationConfigSnapshot["allowedTools"] +): InsightGenerationTool[] { + const allowed = new Set( + tools.filter((tool): tool is InsightGenerationTool => + (TOOL_NAMES as readonly string[]).includes(tool) + ) + ); + allowed.add("web_metrics"); + return TOOL_NAMES.filter((tool) => allowed.has(tool)); +} + +function dedupeKeyFor(insight: GeneratedWebsiteInsight): string { + return insightDedupeKey({ + ...insight, + changePercent: insight.changePercent ?? null, + }); +} + +async function fetchInsightDedupeKeyToIdMap( + organizationId: string, + cooldownHours: number +): Promise> { + const cutoff = dayjs().subtract(Math.max(1, cooldownHours), "hour").toDate(); + const rows = await db + .select({ + id: analyticsInsights.id, + websiteId: analyticsInsights.websiteId, + type: analyticsInsights.type, + sentiment: analyticsInsights.sentiment, + changePercent: analyticsInsights.changePercent, + dedupeKey: analyticsInsights.dedupeKey, + subjectKey: analyticsInsights.subjectKey, + title: analyticsInsights.title, + }) + .from(analyticsInsights) + .where( + and( + eq(analyticsInsights.organizationId, organizationId), + gte(analyticsInsights.createdAt, cutoff) + ) + ) + .orderBy(desc(analyticsInsights.createdAt)); + + const map = new Map(); + for (const row of rows) { + const key = + row.dedupeKey ?? + insightDedupeKey({ + websiteId: row.websiteId, + type: row.type as ParsedInsight["type"], + sentiment: row.sentiment as ParsedInsight["sentiment"], + changePercent: row.changePercent, + subjectKey: row.subjectKey, + title: row.title, + }); + if (!map.has(key)) { + map.set(key, row.id); + } + } + return map; +} + +async function fetchRecentAnnotations( + websiteId: string, + config: InsightGenerationConfigSnapshot +): Promise { + const since = dayjs().subtract(promptLookbackDays(config), "day").toDate(); + const rows = await db + .select({ + text: annotations.text, + xValue: annotations.xValue, + tags: annotations.tags, + }) + .from(annotations) + .where( + and( + eq(annotations.websiteId, websiteId), + gte(annotations.xValue, since), + isNull(annotations.deletedAt) + ) + ) + .orderBy(annotations.xValue) + .limit(20); + + if (rows.length === 0) { + return ""; + } + + const lines = rows.map((row) => { + const date = dayjs(row.xValue).format("YYYY-MM-DD"); + const tags = row.tags?.length ? ` [${row.tags.join(", ")}]` : ""; + return `- ${date}: ${row.text}${tags}`; + }); + + return `\n\nUser annotations (known events that may explain changes):\n${lines.join("\n")}`; +} + +async function fetchRecentInsightsForPrompt( + organizationId: string, + websiteId: string, + config: InsightGenerationConfigSnapshot +): Promise { + const since = dayjs().subtract(promptLookbackDays(config), "day").toDate(); + const rows = await db + .select({ + title: analyticsInsights.title, + type: analyticsInsights.type, + createdAt: analyticsInsights.createdAt, + }) + .from(analyticsInsights) + .where( + and( + eq(analyticsInsights.organizationId, organizationId), + eq(analyticsInsights.websiteId, websiteId), + gte(analyticsInsights.createdAt, since) + ) + ) + .orderBy(desc(analyticsInsights.createdAt)) + .limit(RECENT_INSIGHTS_PROMPT_LIMIT); + + if (rows.length === 0) { + return ""; + } + + const lines = rows.map( + (row) => + `- [${row.type}] ${row.title} (${dayjs(row.createdAt).format("YYYY-MM-DD")})` + ); + + return `\n\n## Recently reported insights for this website (avoid repeating the same narrative unless something materially changed)\n${lines.join("\n")}`; +} + +function formatOrgWebsitesContext( + orgSites: OrgWebsiteRow[], + currentWebsiteId: string +): string { + if (orgSites.length <= 1) { + return ""; + } + const sorted = [...orgSites].sort((a, b) => + a.domain.localeCompare(b.domain, "en") + ); + const lines = sorted.map((site) => { + const label = site.name?.trim() ? site.name.trim() : site.domain; + const marker = + site.id === currentWebsiteId + ? " - metrics below are for this site only" + : ""; + return `- ${label} (${site.domain})${marker}`; + }); + return `## Organization websites (same account, separate analytics) +Each row is a different tracked property (e.g. marketing site vs app vs docs). The period metrics in this message apply only to the site marked "metrics below". Do not blend numbers across rows. If referrers include another domain from this list, treat it as cross-property traffic and name both sides clearly. + +${lines.join("\n")} + +`; +} + +function buildSystemPrompt(config: InsightGenerationConfigSnapshot): string { + const targetCount = maxInsights(config); + const depthInstruction = + config.depth === "light" + ? "Use the smallest useful tool set. Prefer 1-2 high-confidence insights and skip speculative cross-domain analysis." + : config.depth === "deep" + ? "Actively cross-check web, product, ops, and business context when those tools are enabled. Prefer a fuller ranked set, but only when signals are distinct and data-backed." + : "Explore enough context to produce concise, distinct, high-confidence insights without over-querying."; + + return ` +You are Databuddy's analytics insights worker. Return up to ${targetCount} period-over-period insights when that many distinct data-backed signals exist. Rank by actionability and user/business impact. + + + +- Depth: ${config.depth}. ${depthInstruction} +- Max model/tool-loop steps: ${config.maxSteps} +- Max requested tool calls: ${config.maxToolCalls} +- Lookback period length: ${config.lookbackDays} day(s) +- Enabled tools: ${normalizeAllowedTools(config.allowedTools).join(", ")} + + + +- Write for a founder/operator, not an analytics engineer. Translate technical metrics into plain outcomes: "interactions got slower", "pages feel slower", "setup is leaking users", "one source now dominates traffic". +- Prefer reliability, conversion/product impact, engagement quality, broken instrumentation, and meaningful behavior changes over vanity traffic spikes. +- Score actionability times impact, not raw percentage magnitude. Reserve priority 8-10 for likely user, revenue, or operational impact. +- Prefer fewer, sharper insights over broad coverage. Return only signals a user can act on this period. +- Avoid repeating recently reported narratives unless the signal materially changed. + + + +- Use only provided data, tool results, annotations, and recent-insight context. +- Do not invent revenue, signups, retention, funnel conversion, causality, root causes, or business impact. +- If multiple org websites are listed, keep properties separate; cross-domain referrers are cross-property traffic, not generic referrals. +- Use cautious language for correlations unless segment-level evidence directly proves the cause. +- Do not punt, apologize, or say you cannot produce insights when any useful metrics exist. If one query is sparse, use stronger available evidence and lower confidence. + + + +- Return no more than ${targetCount} concise insights: reliability/product risk first, then engagement/acquisition opportunity. Do not make near-duplicates. +- Each insight must be one clear signal with 1-5 metrics; primary metric first. +- Metrics array owns the numbers. Description/suggestion should reference metric labels, not restate values. +- Keep title under 80 chars, description under 320 chars, suggestion under 260 chars. +- Titles must be plain English and user-facing. Do not put raw metric jargon like INP, LCP, FCP, TTFB, CLS, or p75 in titles; put technical metric names only in the metrics array. +- Keep description 1-2 concise sentences: what changed, why it matters, and whether cause is evidence or hypothesis. +- Suggestion must be a specific next action with an operational verb such as inspect, review, compare, segment, drill into, fix, audit, trace, or verify. Never use generic monitoring advice. +- Suggestion must name the exact product surface to inspect next: funnel step, goal, referrer segment, page path, error class, session stream, web vital, flag rollout, or agent diagnostic prompt. +- subjectKey must be stable; sources must include only evidence domains used; confidence 0-1 should reflect evidence strength. +- impactSummary is optional, one sentence under 220 characters. + + + +Good: Error Rate rose while Sessions stayed stable -> reliability issue; suggest reviewing affected page/errors first. +Good: INP p75 rose -> title "Interactions got slower"; metrics can still include "INP p75". +Good: Onboarding step 2 drop-off is 80% -> title "Onboarding is leaking at step 2". +Bad: Pricing Visitors rose -> "revenue opportunity" without business data. +Bad: Twitter rose and Bounce Rate worsened -> "Twitter caused the drop" without segmented engagement data. +Bad: "INP p75 still rising" as a title; users should not need to know web-vitals acronyms. +`; +} + +async function validateOrRepairInsights( + insights: ParsedInsight[], + context: { + config: InsightGenerationConfigSnapshot; + domain: string; + mode: "agent" | "legacy"; + organizationId: string; + websiteId: string; + } +): Promise { + const validated = validateInsights(insights); + if (validated.warnings.length > 0) { + log.warn({ + service: "insights", + message: "Insights validation repaired or dropped output", + organization_id: context.organizationId, + website_id: context.websiteId, + mode: context.mode, + warnings: validated.warnings, + }); + } + + const targetCount = Math.min(maxInsights(context.config), insights.length); + if (targetCount === 0 || validated.insights.length >= targetCount) { + return validated.insights.slice(0, targetCount); + } + + try { + const ai = getAILogger(); + const repair = await generateText({ + model: ai.wrap(modelForTier(context.config.modelTier)), + output: Output.object({ schema: insightsOutputSchema }), + messages: [ + { + role: "system", + content: `Repair Databuddy insight cards. Return up to ${targetCount} concise, valid cards when the source contains distinct data-backed signals. Use only the provided metrics and claims; do not invent numbers, causes, revenue impact, or new entities. Keep title <=80 chars, description <=320 chars, suggestion <=260 chars. Write for a founder/operator: titles must be plain English and avoid raw metric jargon like INP, LCP, FCP, TTFB, CLS, or p75. Technical metric names may remain in the metrics array. Suggestions need specific operational actions, not monitoring. Soften unsupported causality.`, + }, + { + role: "user", + content: JSON.stringify( + { + domain: context.domain, + validationWarnings: validated.warnings, + originalInsights: insights, + }, + null, + 2 + ), + }, + ], + temperature: 0, + maxOutputTokens: 4096, + abortSignal: AbortSignal.timeout(30_000), + experimental_telemetry: { + isEnabled: true, + functionId: "databuddy.insights.worker.repair", + metadata: { + source: "insights_worker", + feature: "smart_insights", + mode: context.mode, + organizationId: context.organizationId, + websiteId: context.websiteId, + websiteDomain: context.domain, + }, + }, + }); + + const repairedOutput = repair.output?.insights ?? []; + const repaired = validateInsights(repairedOutput); + if (repaired.warnings.length > 0) { + log.warn({ + service: "insights", + message: "Insights repair validation warnings", + organization_id: context.organizationId, + website_id: context.websiteId, + mode: context.mode, + warnings: repaired.warnings, + }); + } + + if (repaired.insights.length >= validated.insights.length) { + return repaired.insights.slice(0, targetCount); + } + } catch (error) { + log.warn({ + service: "insights", + message: "Insights repair failed", + error_message: errorMessage(error), + organization_id: context.organizationId, + website_id: context.websiteId, + mode: context.mode, + }); + } + + return validated.insights.slice(0, targetCount); +} + +async function analyzeWebsiteLegacy(params: { + config: InsightGenerationConfigSnapshot; + domain: string; + organizationId: string; + orgSites: OrgWebsiteRow[]; + period: WeekOverWeekPeriod; + recentInsightsBlock: string; + annotationContext: string; + userId: string; + websiteId: string; +}): Promise { + const currentRange = params.period.current; + const previousRange = params.period.previous; + const [current, previous] = await Promise.all([ + fetchWebPeriodData( + params.websiteId, + params.domain, + currentRange.from, + currentRange.to, + params.config.timezone + ), + fetchWebPeriodData( + params.websiteId, + params.domain, + previousRange.from, + previousRange.to, + params.config.timezone + ), + ]); + + if (current.summary.length === 0 && current.topPages.length === 0) { + return []; + } + + const dataSection = formatLegacyWebDataForPrompt( + current, + previous, + currentRange, + previousRange + ); + const orgContext = formatOrgWebsitesContext( + params.orgSites, + params.websiteId + ); + const prompt = `Analyze this website's period-over-period data and return insights. + +${orgContext}${dataSection}${params.annotationContext}${params.recentInsightsBlock}`; + + try { + const ai = getAILogger(); + const result = await generateText({ + model: ai.wrap(modelForTier(params.config.modelTier)), + output: Output.object({ schema: insightsOutputSchema }), + messages: [ + { + role: "system", + content: buildSystemPrompt(params.config), + providerOptions: ANTHROPIC_CACHE_1H, + }, + { role: "user", content: prompt }, + ], + temperature: 0.2, + maxOutputTokens: 8192, + abortSignal: AbortSignal.timeout(LEGACY_TIMEOUT_MS), + experimental_telemetry: { + isEnabled: true, + functionId: "databuddy.insights.worker.analyze_website", + metadata: { + source: "insights_worker", + feature: "smart_insights", + mode: "legacy_fallback", + organizationId: params.organizationId, + userId: params.userId, + websiteId: params.websiteId, + websiteDomain: params.domain, + timezone: params.config.timezone, + }, + }, + }); + + return await validateOrRepairInsights(result.output?.insights ?? [], { + config: params.config, + domain: params.domain, + mode: "legacy", + organizationId: params.organizationId, + websiteId: params.websiteId, + }); + } catch (error) { + log.warn({ + service: "insights", + message: "Failed to generate insights with legacy fallback", + error_message: errorMessage(error), + organization_id: params.organizationId, + website_id: params.websiteId, + }); + return []; + } +} + +async function analyzeWebsite(params: { + config: InsightGenerationConfigSnapshot; + domain: string; + organizationId: string; + orgSites: OrgWebsiteRow[]; + period: WeekOverWeekPeriod; + userId: string; + websiteId: string; +}): Promise { + const currentRange = params.period.current; + const previousRange = params.period.previous; + const hasData = await hasWebInsightData( + params.websiteId, + params.domain, + currentRange.from, + currentRange.to, + params.config.timezone + ); + if (!hasData) { + return []; + } + + const [annotationContext, recentInsightsBlock] = await Promise.all([ + fetchRecentAnnotations(params.websiteId, params.config), + fetchRecentInsightsForPrompt( + params.organizationId, + params.websiteId, + params.config + ), + ]); + + const allowedTools = normalizeAllowedTools(params.config.allowedTools); + const orgContext = formatOrgWebsitesContext( + params.orgSites, + params.websiteId + ); + const userPrompt = `Analyze this website's period-over-period data and produce insights. + +**Current period:** ${currentRange.from} to ${currentRange.to} +**Previous period:** ${previousRange.from} to ${previousRange.to} +**Timezone:** ${params.config.timezone} +**Domain:** ${params.domain} + +Use web_metrics to pull metrics for both current and previous periods before inferring trends. Start with summary_metrics for both periods, then add top_pages, error_summary, top_referrers, country, browser_name, vitals_overview, or custom_events queries only when they sharpen the narrative. Use product_metrics for goals, funnels, retention, and custom event behavior when a traffic change may have downstream product impact. Use ops_context for page-level errors, uptime, anomaly signals, and recent flag rollouts when reliability or product changes may explain the trend. Use business_context for revenue totals, attribution, and product mix when commercial impact matters. + +Only call these enabled tools: ${allowedTools.join(", ")}. + +${orgContext}${annotationContext}${recentInsightsBlock}`; + + const { tools: allTools } = createInsightsAgentTools({ + websiteId: params.websiteId, + domain: params.domain, + timezone: params.config.timezone, + periodBounds: { current: currentRange, previous: previousRange }, + }); + const tools = Object.fromEntries( + Object.entries(allTools).filter(([name]) => + allowedTools.includes(name as InsightGenerationTool) + ) + ) as Partial; + + try { + const appContext: AppContext = { + userId: params.userId, + organizationId: params.organizationId, + websiteId: params.websiteId, + websiteDomain: params.domain, + timezone: params.config.timezone, + currentDateTime: new Date().toISOString(), + chatId: `insights:${params.organizationId}:${params.websiteId}`, + }; + let toolCallCount = 0; + const ai = getAILogger(); + const agent = new ToolLoopAgent({ + model: ai.wrap(modelForTier(params.config.modelTier)), + instructions: { + role: "system", + content: buildSystemPrompt(params.config), + providerOptions: ANTHROPIC_CACHE_1H, + }, + output: Output.object({ schema: insightsOutputSchema }), + tools, + stopWhen: stepCountIs( + Math.max( + 1, + Math.min(params.config.maxSteps, params.config.maxToolCalls + 2) + ) + ), + prepareStep: ({ stepNumber }) => { + if (stepNumber === 0 && "web_metrics" in tools) { + return { + activeTools: ["web_metrics"], + toolChoice: { type: "tool", toolName: "web_metrics" }, + }; + } + return { activeTools: allowedTools }; + }, + onStepFinish: ({ usage, finishReason, toolCalls }) => { + toolCallCount += toolCalls.length; + log.info({ + service: "insights", + message: "Insights worker agent step finished", + organization_id: params.organizationId, + website_id: params.websiteId, + finish_reason: finishReason, + tool_calls: toolCalls.flatMap((toolCall) => + toolCall ? [toolCall.toolName] : [] + ), + total_tokens: usage?.totalTokens, + tool_call_count: toolCallCount, + }); + }, + temperature: 0.2, + experimental_context: appContext, + experimental_telemetry: { + isEnabled: true, + functionId: "databuddy.insights.worker.analyze_website", + metadata: { + source: "insights_worker", + feature: "smart_insights", + mode: "agent", + organizationId: params.organizationId, + userId: params.userId, + websiteId: params.websiteId, + websiteDomain: params.domain, + timezone: params.config.timezone, + depth: params.config.depth, + modelTier: params.config.modelTier, + }, + }, + }); + + const result = await agent.generate({ + messages: [{ role: "user", content: userPrompt }], + abortSignal: AbortSignal.timeout(AGENT_TIMEOUT_MS), + }); + + if (result.output?.insights?.length) { + return await validateOrRepairInsights(result.output.insights, { + config: params.config, + domain: params.domain, + mode: "agent", + organizationId: params.organizationId, + websiteId: params.websiteId, + }); + } + + log.warn({ + service: "insights", + message: "Insights worker agent finished without structured output", + organization_id: params.organizationId, + website_id: params.websiteId, + }); + } catch (error) { + log.warn({ + service: "insights", + message: "Insights worker agent failed, using legacy fallback", + error_message: errorMessage(error), + organization_id: params.organizationId, + website_id: params.websiteId, + }); + } + + return analyzeWebsiteLegacy({ + ...params, + annotationContext, + recentInsightsBlock, + }); +} + +async function persistWebsiteInsights(params: { + config: InsightGenerationConfigSnapshot; + insights: GeneratedWebsiteInsight[]; + organizationId: string; + period: WeekOverWeekPeriod; + runId: string; +}): Promise { + const dedupeKeyToId = await fetchInsightDedupeKeyToIdMap( + params.organizationId, + params.config.cooldownHours + ); + const seenInBatch = new Set(); + const finalInsights: GeneratedWebsiteInsight[] = []; + + for (const insight of [...params.insights].sort( + (a, b) => b.priority - a.priority + )) { + const key = dedupeKeyFor(insight); + if (seenInBatch.has(key)) { + continue; + } + seenInBatch.add(key); + const existingId = dedupeKeyToId.get(key); + finalInsights.push(existingId ? { ...insight, id: existingId } : insight); + if (finalInsights.length >= maxInsights(params.config)) { + break; + } + } + + if (finalInsights.length === 0) { + return []; + } + + const updatePayload = { + runId: params.runId, + timezone: params.config.timezone, + currentPeriodFrom: params.period.current.from, + currentPeriodTo: params.period.current.to, + previousPeriodFrom: params.period.previous.from, + previousPeriodTo: params.period.previous.to, + createdAt: new Date(), + }; + + const toInsert = finalInsights + .filter((insight) => { + const existingId = dedupeKeyToId.get(dedupeKeyFor(insight)); + return !(existingId && insight.id === existingId); + }) + .map((insight) => ({ + id: insight.id, + organizationId: params.organizationId, + websiteId: insight.websiteId, + runId: params.runId, + title: insight.title, + description: insight.description, + suggestion: insight.suggestion, + severity: insight.severity, + sentiment: insight.sentiment, + type: insight.type, + priority: insight.priority, + changePercent: insight.changePercent ?? null, + dedupeKey: dedupeKeyFor(insight), + subjectKey: insight.subjectKey, + sources: insight.sources, + confidence: insight.confidence, + impactSummary: insight.impactSummary ?? null, + metrics: + insight.metrics.length > 0 + ? (insight.metrics as InsightMetricRow[]) + : null, + timezone: params.config.timezone, + currentPeriodFrom: params.period.current.from, + currentPeriodTo: params.period.current.to, + previousPeriodFrom: params.period.previous.from, + previousPeriodTo: params.period.previous.to, + })); + + const toRefresh = finalInsights.filter((insight) => { + const existingId = dedupeKeyToId.get(dedupeKeyFor(insight)); + return existingId !== undefined && insight.id === existingId; + }); + + if (toInsert.length > 0) { + await db + .insert(analyticsInsights) + .values(toInsert) + .onConflictDoUpdate({ + target: [analyticsInsights.organizationId, analyticsInsights.dedupeKey], + targetWhere: isNotNull(analyticsInsights.dedupeKey), + set: { + runId: params.runId, + timezone: params.config.timezone, + currentPeriodFrom: params.period.current.from, + currentPeriodTo: params.period.current.to, + previousPeriodFrom: params.period.previous.from, + previousPeriodTo: params.period.previous.to, + createdAt: new Date(), + title: sql.raw("excluded.title"), + description: sql.raw("excluded.description"), + suggestion: sql.raw("excluded.suggestion"), + severity: sql.raw("excluded.severity"), + sentiment: sql.raw("excluded.sentiment"), + type: sql.raw("excluded.type"), + priority: sql.raw("excluded.priority"), + changePercent: sql.raw("excluded.change_percent"), + subjectKey: sql.raw("excluded.subject_key"), + sources: sql.raw("excluded.sources"), + confidence: sql.raw("excluded.confidence"), + impactSummary: sql.raw("excluded.impact_summary"), + metrics: sql.raw("excluded.metrics"), + }, + }); + } + await Promise.all( + toRefresh.map((insight) => + db + .update(analyticsInsights) + .set({ + ...updatePayload, + title: insight.title, + description: insight.description, + suggestion: insight.suggestion, + severity: insight.severity, + sentiment: insight.sentiment, + type: insight.type, + priority: insight.priority, + changePercent: insight.changePercent ?? null, + dedupeKey: dedupeKeyFor(insight), + subjectKey: insight.subjectKey, + sources: insight.sources, + confidence: insight.confidence, + impactSummary: insight.impactSummary ?? null, + metrics: + insight.metrics.length > 0 + ? (insight.metrics as InsightMetricRow[]) + : null, + }) + .where(eq(analyticsInsights.id, insight.id)) + ) + ); + + const websiteInvalidations = [ + ...new Set(finalInsights.map((insight) => insight.websiteId)), + ].map((websiteId) => invalidateAgentContextSnapshotsForWebsite(websiteId)); + + await Promise.all([ + invalidateInsightsCachesForOrganization(params.organizationId), + ...websiteInvalidations, + ]); + + return finalInsights; +} + +function storeWebsiteSummary( + site: OrgWebsiteRow, + insights: GeneratedWebsiteInsight[] +): void { + if (insights.length === 0) { + return; + } + const summary = insights + .map( + (insight) => + `[${insight.severity}] ${insight.title}: ${insight.description} Suggestion: ${insight.suggestion}` + ) + .join("\n"); + + storeAnalyticsSummary( + `Insights for ${site.domain} (${dayjs().format("YYYY-MM-DD")}):\n${summary}`, + site.id, + { period: "configured" } + ).catch((error: unknown) => { + log.warn({ + service: "insights", + message: "Failed to store analytics summary", + error_message: errorMessage(error), + website_id: site.id, + }); + }); +} + +export async function generateWebsiteInsights( + input: GenerateWebsiteInsightsInput +): Promise { + const [site] = await db + .select({ id: websites.id, name: websites.name, domain: websites.domain }) + .from(websites) + .where( + and( + eq(websites.id, input.websiteId), + eq(websites.organizationId, input.organizationId), + isNull(websites.deletedAt) + ) + ) + .limit(1); + + if (!site) { + return { + status: "skipped", + resultCount: 0, + insightIds: [], + message: "Website not found or deleted", + }; + } + + const orgSites = await db + .select({ id: websites.id, name: websites.name, domain: websites.domain }) + .from(websites) + .where( + and( + eq(websites.organizationId, input.organizationId), + isNull(websites.deletedAt) + ) + ) + .orderBy(websites.domain) + .limit(100); + + const period = getComparisonPeriod(input.config.lookbackDays); + const userId = input.requestedByUserId ?? "insights-worker"; + const insights = await analyzeWebsite({ + config: input.config, + domain: site.domain, + organizationId: input.organizationId, + orgSites, + period, + userId, + websiteId: site.id, + }); + + const candidates = insights.map( + (insight): GeneratedWebsiteInsight => ({ + ...insight, + id: randomUUIDv7(), + websiteId: site.id, + websiteName: site.name, + websiteDomain: site.domain, + }) + ); + + const saved = await persistWebsiteInsights({ + config: input.config, + insights: candidates, + organizationId: input.organizationId, + period, + runId: input.runId, + }); + + storeWebsiteSummary(site, saved); + + log.info({ + service: "insights", + message: "Generated website insights", + organization_id: input.organizationId, + website_id: input.websiteId, + run_id: input.runId, + result_count: saved.length, + reason: input.reason, + depth: input.config.depth, + model_tier: input.config.modelTier, + allowed_tools: input.config.allowedTools, + }); + + return saved.length > 0 + ? { + status: "succeeded", + resultCount: saved.length, + insightIds: saved.map((insight) => insight.id), + } + : { + status: "skipped", + resultCount: 0, + insightIds: [], + message: "No data-backed insights generated", + }; +} diff --git a/apps/insights/src/idempotency.integration.test.ts b/apps/insights/src/idempotency.integration.test.ts new file mode 100644 index 000000000..751dcce52 --- /dev/null +++ b/apps/insights/src/idempotency.integration.test.ts @@ -0,0 +1,125 @@ +import "@databuddy/test/env"; +import { afterAll, beforeEach, describe, expect, it } from "bun:test"; +import { isNotNull, shutdownPostgres, sql } from "@databuddy/db"; +import { analyticsInsights, insightRuns } from "@databuddy/db/schema"; +import { + closePostgres, + db, + hasTestDb, + insertOrganization, + insertWebsite, + truncatePostgres, +} from "@databuddy/test"; +import { eq } from "drizzle-orm"; +import { randomUUIDv7 } from "bun"; + +const runIntegration = + process.env.INSIGHTS_INTEGRATION_TESTS === "true" && hasTestDb; +const describeIntegration = runIntegration ? describe : describe.skip; + +describeIntegration("insights idempotency integration", () => { + beforeEach(async () => { + await truncatePostgres(); + }); + + afterAll(async () => { + await truncatePostgres(); + await shutdownPostgres(); + await closePostgres(); + }); + + it("upserts generated insights by organization dedupe key", async () => { + const org = await insertOrganization(); + const website = await insertWebsite({ organizationId: org.id }); + const firstRunId = randomUUIDv7(); + const secondRunId = randomUUIDv7(); + const dedupeKey = `integration:${randomUUIDv7()}`; + + await db().insert(insightRuns).values([ + { id: firstRunId, organizationId: org.id, reason: "manual" }, + { id: secondRunId, organizationId: org.id, reason: "manual" }, + ]); + + await db().insert(analyticsInsights).values( + insightRow({ + id: randomUUIDv7(), + runId: firstRunId, + organizationId: org.id, + websiteId: website.id, + dedupeKey, + title: "Original checkout signal", + }) + ); + + await db() + .insert(analyticsInsights) + .values( + insightRow({ + id: randomUUIDv7(), + runId: secondRunId, + organizationId: org.id, + websiteId: website.id, + dedupeKey, + title: "Updated checkout signal", + }) + ) + .onConflictDoUpdate({ + target: [analyticsInsights.organizationId, analyticsInsights.dedupeKey], + targetWhere: isNotNull(analyticsInsights.dedupeKey), + set: { + runId: secondRunId, + title: sql`excluded.title`, + }, + }); + + const rows = await db() + .select({ + id: analyticsInsights.id, + runId: analyticsInsights.runId, + title: analyticsInsights.title, + }) + .from(analyticsInsights) + .where(eq(analyticsInsights.organizationId, org.id)); + + expect(rows).toHaveLength(1); + expect(rows[0]).toMatchObject({ + runId: secondRunId, + title: "Updated checkout signal", + }); + }); +}); + +function insightRow(input: { + dedupeKey: string; + id: string; + organizationId: string; + runId: string; + title: string; + websiteId: string; +}): typeof analyticsInsights.$inferInsert { + return { + id: input.id, + organizationId: input.organizationId, + websiteId: input.websiteId, + runId: input.runId, + dedupeKey: input.dedupeKey, + title: input.title, + description: "A test insight description.", + suggestion: "Inspect the affected flow.", + severity: "warning", + sentiment: "negative", + type: "conversion_leak", + priority: 8, + changePercent: -12, + subjectKey: "checkout", + sources: ["web"], + confidence: 0.82, + impactSummary: "Checkout needs review.", + metrics: [{ label: "Errors", current: 12, previous: 6, format: "number" }], + timezone: "UTC", + currentPeriodFrom: "2026-01-01", + currentPeriodTo: "2026-01-08", + previousPeriodFrom: "2025-12-25", + previousPeriodTo: "2026-01-01", + }; +} diff --git a/apps/insights/src/index.ts b/apps/insights/src/index.ts new file mode 100644 index 000000000..1fa2a3125 --- /dev/null +++ b/apps/insights/src/index.ts @@ -0,0 +1,162 @@ +import { db, shutdownPostgres, sql } from "@databuddy/db"; +import { closeInsightsQueue, getInsightsQueue } from "@databuddy/redis"; +import { Elysia } from "elysia"; +import { initLogger, log } from "evlog"; +import { ensureInsightsDispatchSchedule } from "./scheduler"; +import { startInsightsWorker } from "./worker"; + +const environment = + process.env.UNKEY_ENVIRONMENT_SLUG ?? + (process.env.NODE_ENV === "development" ? "development" : "production"); +const workerEnabled = process.env.INSIGHTS_WORKER_ENABLED !== "false"; +const DRAIN_TIMEOUT_MS = 10_000; + +initLogger({ + env: { + service: "insights", + environment, + region: process.env.UNKEY_REGION, + commitHash: process.env.UNKEY_GIT_COMMIT_SHA, + }, + sampling: {}, +}); + +process.on("unhandledRejection", (reason) => { + log.error({ + process: "unhandledRejection", + reason: reason instanceof Error ? reason.message : String(reason), + }); + exitAfterDrain(1); +}); + +process.on("uncaughtException", (error) => { + log.error({ + process: "uncaughtException", + error_message: error.message, + error_stack: error.stack, + error_source: "process", + }); + exitAfterDrain(1); +}); + +let shuttingDown = false; +let insightsWorker: ReturnType | null = null; + +async function withTimeout( + promise: Promise, + timeoutMs: number +): Promise { + let timeout: ReturnType | undefined; + try { + return await Promise.race([ + promise, + new Promise((_, reject) => { + timeout = setTimeout( + () => reject(new Error("shutdown timeout")), + timeoutMs + ); + }), + ]); + } finally { + if (timeout) { + clearTimeout(timeout); + } + } +} + +async function drainAll() { + await withTimeout( + Promise.allSettled([ + insightsWorker?.close() ?? Promise.resolve(), + closeInsightsQueue(), + shutdownPostgres(), + ]), + DRAIN_TIMEOUT_MS + ).catch((error) => { + log.error({ + lifecycle: "shutdown", + error_message: error instanceof Error ? error.message : String(error), + }); + }); +} + +function exitAfterDrain(code: number) { + if (shuttingDown) { + process.exit(code); + } + shuttingDown = true; + drainAll() + .catch((error) => { + log.error({ + lifecycle: "shutdown", + error_message: error instanceof Error ? error.message : String(error), + }); + }) + .finally(() => process.exit(code)); +} + +async function shutdown(signal: string) { + if (shuttingDown) { + return; + } + shuttingDown = true; + log.info("lifecycle", `${signal} received, shutting down gracefully`); + await drainAll(); + process.exit(0); +} + +if (workerEnabled) { + insightsWorker = startInsightsWorker(); + await ensureInsightsDispatchSchedule(); + log.info("lifecycle", "insights worker started"); +} else { + log.info("lifecycle", "insights worker disabled"); +} + +process.on("SIGTERM", () => shutdown("SIGTERM")); +process.on("SIGINT", () => shutdown("SIGINT")); + +type ProbeResult = + | { status: "ok"; latency_ms: number } + | { status: "error"; latency_ms: number; error: string }; + +async function probe(fn: () => Promise): Promise { + const start = performance.now(); + try { + await fn(); + return { status: "ok", latency_ms: Math.round(performance.now() - start) }; + } catch (error) { + return { + status: "error", + latency_ms: Math.round(performance.now() - start), + error: error instanceof Error ? error.message : "unknown", + }; + } +} + +const app = new Elysia() + .get("/health/status", async () => { + const [postgres, bullmqRedis] = await Promise.all([ + probe(() => db.execute(sql`SELECT 1`).then(() => {})), + probe(async () => { + const client = await getInsightsQueue().client; + await client.ping(); + }), + ]); + + const services = { postgres, bullmqRedis }; + const status = Object.values(services).every((s) => s.status === "ok") + ? "ok" + : "degraded"; + + return Response.json( + { status, workerEnabled, services }, + { status: status === "ok" ? 200 : 503 } + ); + }) + .get("/health", () => ({ status: "ok", workerEnabled })); + +export default { + port: Number(process.env.PORT ?? 4002), + fetch: app.fetch, +}; diff --git a/apps/insights/src/jobs.ts b/apps/insights/src/jobs.ts new file mode 100644 index 000000000..d9c4cca4d --- /dev/null +++ b/apps/insights/src/jobs.ts @@ -0,0 +1,207 @@ +import { db, eq } from "@databuddy/db"; +import { + insightRunItems, + insightRuns, + type InsightRun, + type InsightRunStatus, +} from "@databuddy/db/schema"; +import { + getInsightsQueue, + INSIGHTS_DISPATCH_JOB_NAME, + INSIGHTS_GENERATE_WEBSITE_JOB_NAME, + INSIGHTS_ROLLUP_JOB_NAME, + insightsRollupJobId, + type InsightsGenerateWebsiteJobData, + type InsightsQueueJobData, + type InsightsRollupJobData, +} from "@databuddy/redis"; +import type { Job } from "bullmq"; +import { log } from "evlog"; +import { generateWebsiteInsights } from "./generation"; +import { processRollupJob } from "./rollup"; +import { dispatchDueInsightRuns } from "./scheduler"; + +function errorMessage(error: unknown): string { + return error instanceof Error ? error.message : String(error); +} + +interface RunStatusSummary { + completedItems: number; + failedItems: number; + run: InsightRun | null; + settled: boolean; + skippedItems: number; + status: InsightRunStatus; + totalItems: number; +} + +async function syncRunStatus(runId: string): Promise { + const [run, items] = await Promise.all([ + db.query.insightRuns.findFirst({ where: { id: runId } }), + db + .select({ status: insightRunItems.status }) + .from(insightRunItems) + .where(eq(insightRunItems.runId, runId)), + ]); + + const completedItems = items.filter( + (item) => item.status === "succeeded" + ).length; + const failedItems = items.filter((item) => item.status === "failed").length; + const skippedItems = items.filter((item) => item.status === "skipped").length; + const settledItems = completedItems + failedItems + skippedItems; + const totalItems = items.length; + const settled = settledItems === totalItems; + + let status: InsightRunStatus = "running"; + if (totalItems === 0) { + status = "skipped"; + } else if (settled) { + if (completedItems > 0 && failedItems === 0) { + status = "succeeded"; + } else if (completedItems > 0) { + status = "partially_succeeded"; + } else if (skippedItems === totalItems) { + status = "skipped"; + } else { + status = "failed"; + } + } + + const now = new Date(); + await db + .update(insightRuns) + .set({ + completedItems, + failedItems, + skippedItems, + status, + updatedAt: now, + ...(settled ? { finishedAt: now } : {}), + }) + .where(eq(insightRuns.id, runId)); + + return { + completedItems, + failedItems, + run: run ?? null, + settled, + skippedItems, + status, + totalItems, + }; +} + +async function queueRollupIfSettled(summary: RunStatusSummary): Promise { + if (!(summary.run && summary.settled && summary.completedItems > 0)) { + return; + } + if ( + summary.status !== "succeeded" && + summary.status !== "partially_succeeded" + ) { + return; + } + + try { + await getInsightsQueue().add( + INSIGHTS_ROLLUP_JOB_NAME, + { + organizationId: summary.run.organizationId, + reason: summary.run.reason, + runId: summary.run.id, + timezone: summary.run.timezone, + }, + { jobId: insightsRollupJobId(summary.run.id) } + ); + } catch (error) { + log.error({ + service: "insights", + message: "Failed to queue insight rollup job", + run_id: summary.run.id, + organization_id: summary.run.organizationId, + error_message: errorMessage(error), + }); + } +} + +async function processGenerateWebsiteJob( + data: InsightsGenerateWebsiteJobData, + job: Job +): Promise<{ resultCount: number; status: "skipped" | "succeeded" }> { + const now = new Date(); + await Promise.all([ + db + .update(insightRuns) + .set({ + status: "running", + startedAt: now, + updatedAt: now, + }) + .where(eq(insightRuns.id, data.runId)), + db + .update(insightRunItems) + .set({ + attempts: job.attemptsMade + 1, + startedAt: now, + status: "running", + updatedAt: now, + }) + .where(eq(insightRunItems.id, data.itemId)), + ]); + + try { + const result = await generateWebsiteInsights({ + config: data.config, + organizationId: data.organizationId, + reason: data.reason, + requestedByUserId: data.requestedByUserId ?? null, + runId: data.runId, + websiteId: data.websiteId, + }); + + await db + .update(insightRunItems) + .set({ + errorMessage: result.message ?? null, + finishedAt: new Date(), + resultCount: result.resultCount, + status: result.status, + }) + .where(eq(insightRunItems.id, data.itemId)); + const summary = await syncRunStatus(data.runId); + await queueRollupIfSettled(summary); + return { resultCount: result.resultCount, status: result.status }; + } catch (error) { + await db + .update(insightRunItems) + .set({ + errorMessage: errorMessage(error), + finishedAt: new Date(), + status: "failed", + }) + .where(eq(insightRunItems.id, data.itemId)); + const summary = await syncRunStatus(data.runId); + await queueRollupIfSettled(summary); + throw error; + } +} + +export function processInsightsJob(job: Job) { + if (job.name === INSIGHTS_DISPATCH_JOB_NAME) { + return dispatchDueInsightRuns(); + } + + if (job.name === INSIGHTS_GENERATE_WEBSITE_JOB_NAME) { + return processGenerateWebsiteJob( + job.data as InsightsGenerateWebsiteJobData, + job + ); + } + + if (job.name === INSIGHTS_ROLLUP_JOB_NAME) { + return processRollupJob(job.data as InsightsRollupJobData); + } + + throw new Error(`Unknown insights job: ${job.name}`); +} diff --git a/apps/insights/src/rollup.test.ts b/apps/insights/src/rollup.test.ts new file mode 100644 index 000000000..1bfa5dd08 --- /dev/null +++ b/apps/insights/src/rollup.test.ts @@ -0,0 +1,61 @@ +import { describe, expect, it } from "bun:test"; +import { buildDeterministicRollupNarrative } from "./rollup"; + +describe("buildDeterministicRollupNarrative", () => { + it("returns a healthy fallback when no insights exist", () => { + expect(buildDeterministicRollupNarrative("7d", [])).toBe( + "All systems healthy this week. No actionable signals detected." + ); + }); + + it("summarizes the top signal with site context", () => { + const narrative = buildDeterministicRollupNarrative("30d", [ + { + title: "Checkout errors increased", + description: "Errors rose on checkout.", + suggestion: "Review checkout errors.", + severity: "critical", + sentiment: "negative", + priority: 9, + changePercent: 42, + websiteName: "App", + websiteDomain: "app.example.com", + }, + ]); + + expect(narrative).toBe( + "This month: Checkout errors increased (+42%) on App." + ); + }); + + it("mentions an additional signal when multiple cards exist", () => { + const narrative = buildDeterministicRollupNarrative("90d", [ + { + title: "Interactions got slower", + description: "INP regressed.", + suggestion: "Audit slow pages.", + severity: "warning", + sentiment: "negative", + priority: 8, + changePercent: null, + websiteName: null, + websiteDomain: "www.example.com", + }, + { + title: "Docs traffic improved", + description: "Organic sessions rose.", + suggestion: "Compare landing pages.", + severity: "info", + sentiment: "positive", + priority: 6, + changePercent: 18, + websiteName: "Docs", + websiteDomain: "docs.example.com", + }, + ]); + + expect(narrative).toBe( + "This quarter: Interactions got slower on www.example.com. Also review Docs traffic improved on Docs." + ); + }); +}); diff --git a/apps/insights/src/rollup.ts b/apps/insights/src/rollup.ts new file mode 100644 index 000000000..0facbee37 --- /dev/null +++ b/apps/insights/src/rollup.ts @@ -0,0 +1,264 @@ +import { ANTHROPIC_CACHE_1H, models } from "@databuddy/ai/config/models"; +import { getAILogger } from "@databuddy/ai/lib/ai-logger"; +import { and, db, desc, eq, gte, isNull, sql } from "@databuddy/db"; +import { + analyticsInsights, + insightRollups, + type InsightRollupRange, + websites, +} from "@databuddy/db/schema"; +import { + invalidateInsightsCachesForOrganization, + type InsightsRollupJobData, +} from "@databuddy/redis"; +import { generateText } from "ai"; +import { randomUUIDv7 } from "bun"; +import dayjs from "dayjs"; +import { log } from "evlog"; + +const ROLLUP_RANGES = ["7d", "30d", "90d"] as const; +const RANGE_TO_DAYS: Record = { + "7d": 7, + "30d": 30, + "90d": 90, +}; +const RANGE_TO_LABEL: Record = { + "7d": "week", + "30d": "month", + "90d": "quarter", +}; +const ROLLUP_INSIGHT_LIMIT = 12; +const MAX_NARRATIVE_LENGTH = 700; + +export interface RollupInsightSummary { + changePercent: number | null; + description: string; + priority: number; + sentiment: string; + severity: string; + suggestion: string; + title: string; + websiteDomain: string; + websiteName: string | null; +} + +function errorMessage(error: unknown): string { + return error instanceof Error ? error.message : String(error); +} + +function sanitizeNarrative(value: string): string { + const text = value.replace(/\s+/g, " ").trim(); + if (text.length <= MAX_NARRATIVE_LENGTH) { + return text; + } + return `${text.slice(0, MAX_NARRATIVE_LENGTH - 3).trimEnd()}...`; +} + +export function buildDeterministicRollupNarrative( + range: InsightRollupRange, + insights: RollupInsightSummary[] +): string { + const label = RANGE_TO_LABEL[range]; + const headline = insights[0]; + if (!headline) { + return `All systems healthy this ${label}. No actionable signals detected.`; + } + + const siteName = headline.websiteName ?? headline.websiteDomain; + const change = + headline.changePercent == null + ? "" + : ` (${headline.changePercent > 0 ? "+" : ""}${headline.changePercent.toFixed(0)}%)`; + const opener = `This ${label}: ${headline.title}${change} on ${siteName}.`; + if (insights.length === 1) { + return opener; + } + + const extra = insights.length - 1; + const second = insights[1]; + const secondSite = second.websiteName ?? second.websiteDomain; + if (extra === 1) { + return `${opener} Also review ${second.title} on ${secondSite}.`; + } + const remaining = extra - 1; + return `${opener} Also review ${second.title} on ${secondSite}, plus ${remaining} more signal${remaining === 1 ? "" : "s"}.`; +} + +async function fetchRollupInsights( + organizationId: string, + range: InsightRollupRange +): Promise { + const cutoff = dayjs().subtract(RANGE_TO_DAYS[range], "day").toDate(); + const rows = await db + .select({ + title: analyticsInsights.title, + description: analyticsInsights.description, + suggestion: analyticsInsights.suggestion, + severity: analyticsInsights.severity, + sentiment: analyticsInsights.sentiment, + priority: analyticsInsights.priority, + changePercent: analyticsInsights.changePercent, + createdAt: analyticsInsights.createdAt, + websiteName: websites.name, + websiteDomain: websites.domain, + }) + .from(analyticsInsights) + .innerJoin(websites, eq(analyticsInsights.websiteId, websites.id)) + .where( + and( + eq(analyticsInsights.organizationId, organizationId), + gte(analyticsInsights.createdAt, cutoff), + isNull(websites.deletedAt) + ) + ) + .orderBy( + desc(analyticsInsights.priority), + desc(analyticsInsights.createdAt) + ) + .limit(ROLLUP_INSIGHT_LIMIT); + + return rows.map((row) => ({ + title: row.title, + description: row.description, + suggestion: row.suggestion, + severity: row.severity, + sentiment: row.sentiment, + priority: row.priority, + changePercent: row.changePercent, + websiteName: row.websiteName, + websiteDomain: row.websiteDomain, + })); +} + +async function generateRollupNarrative( + range: InsightRollupRange, + organizationId: string, + insights: RollupInsightSummary[] +): Promise { + const fallback = buildDeterministicRollupNarrative(range, insights); + if (insights.length === 0) { + return fallback; + } + + try { + const ai = getAILogger(); + const result = await generateText({ + model: ai.wrap(models.balanced), + messages: [ + { + role: "system", + content: + "Write one compact Databuddy executive analytics brief from stored insight cards. Use only the supplied cards. Be specific, operational, and plain English. Mention the most important website names. Do not invent causes, revenue, user counts, or metrics. Return one paragraph under 90 words.", + providerOptions: ANTHROPIC_CACHE_1H, + }, + { + role: "user", + content: JSON.stringify( + { + range, + insights, + }, + null, + 2 + ), + }, + ], + temperature: 0.2, + maxOutputTokens: 512, + abortSignal: AbortSignal.timeout(30_000), + experimental_telemetry: { + isEnabled: true, + functionId: "databuddy.insights.worker.rollup", + metadata: { + source: "insights_worker", + feature: "smart_insights", + organizationId, + range, + }, + }, + }); + + const text = sanitizeNarrative(result.text); + return text || fallback; + } catch (error) { + log.warn({ + service: "insights", + message: "Failed to generate insight rollup narrative", + organization_id: organizationId, + range, + error_message: errorMessage(error), + }); + return fallback; + } +} + +async function persistRollup(input: { + generatedAt: Date; + narrative: string; + organizationId: string; + range: InsightRollupRange; + runId: string | null; +}): Promise { + await db + .insert(insightRollups) + .values({ + id: randomUUIDv7(), + organizationId: input.organizationId, + runId: input.runId, + range: input.range, + narrative: input.narrative, + generatedAt: input.generatedAt, + updatedAt: input.generatedAt, + }) + .onConflictDoUpdate({ + target: [insightRollups.organizationId, insightRollups.range], + set: { + runId: input.runId, + narrative: sql.raw("excluded.narrative"), + generatedAt: input.generatedAt, + updatedAt: input.generatedAt, + }, + }); +} + +async function generateRangeRollup( + data: InsightsRollupJobData, + range: InsightRollupRange, + generatedAt: Date +): Promise { + const insights = await fetchRollupInsights(data.organizationId, range); + const narrative = await generateRollupNarrative( + range, + data.organizationId, + insights + ); + + await persistRollup({ + generatedAt, + narrative, + organizationId: data.organizationId, + range, + runId: data.runId, + }); +} + +export async function processRollupJob( + data: InsightsRollupJobData +): Promise<{ ranges: number; status: "succeeded" }> { + const generatedAt = new Date(); + await Promise.all( + ROLLUP_RANGES.map((range) => generateRangeRollup(data, range, generatedAt)) + ); + await invalidateInsightsCachesForOrganization(data.organizationId); + + log.info({ + service: "insights", + message: "Generated insight rollups", + organization_id: data.organizationId, + run_id: data.runId, + reason: data.reason, + ranges: ROLLUP_RANGES.length, + }); + + return { status: "succeeded", ranges: ROLLUP_RANGES.length }; +} diff --git a/apps/insights/src/scheduler.integration.test.ts b/apps/insights/src/scheduler.integration.test.ts new file mode 100644 index 000000000..ef436175e --- /dev/null +++ b/apps/insights/src/scheduler.integration.test.ts @@ -0,0 +1,222 @@ +import "@databuddy/test/env"; +import { afterAll, afterEach, beforeEach, describe, expect, it } from "bun:test"; +import { db as appDb, shutdownPostgres } from "@databuddy/db"; +import { + insightGenerationConfigs, + insightRunItems, + insightRuns, +} from "@databuddy/db/schema"; +import { + closeInsightsQueue, + getInsightsQueue, + type InsightsGenerateWebsiteJobData, +} from "@databuddy/redis"; +import { + closePostgres, + db, + hasTestDb, + insertOrganization, + insertWebsite, + truncatePostgres, +} from "@databuddy/test"; +import { and, asc, eq, isNull } from "drizzle-orm"; +import { randomUUIDv7 } from "bun"; +import { dispatchDueInsightRuns } from "./scheduler"; + +const runIntegration = + process.env.INSIGHTS_INTEGRATION_TESTS === "true" && hasTestDb; +const describeIntegration = runIntegration ? describe : describe.skip; + +describeIntegration("insights scheduler integration", () => { + const organizationIds = new Set(); + + beforeEach(async () => { + await truncatePostgres(); + }); + + afterEach(async () => { + await cleanupQueueJobs(); + await truncatePostgres(); + organizationIds.clear(); + }); + + afterAll(async () => { + await cleanupQueueJobs(); + await closeInsightsQueue(); + await shutdownPostgres(); + await closePostgres(); + }); + + it("dispatches an org config only to websites without website overrides", async () => { + const org = await insertOrganization(); + organizationIds.add(org.id); + const included = await insertWebsite({ + organizationId: org.id, + domain: "included.example.com", + }); + const overridden = await insertWebsite({ + organizationId: org.id, + domain: "overridden.example.com", + }); + const now = new Date(); + + await db().insert(insightGenerationConfigs).values([ + { + id: randomUUIDv7(), + organizationId: org.id, + websiteId: null, + enabled: true, + frequency: "daily", + nextRunAt: new Date(now.getTime() - 1000), + }, + { + id: randomUUIDv7(), + organizationId: org.id, + websiteId: overridden.id, + enabled: true, + frequency: "weekly", + nextRunAt: new Date(now.getTime() + 86_400_000), + }, + ]); + + const result = await dispatchDueInsightRuns(now); + + expect(result).toMatchObject({ + scannedConfigs: 1, + claimedConfigs: 1, + dispatchedRuns: 1, + queuedItems: 1, + skippedConfigs: 0, + }); + + const runs = await runsForOrg(org.id); + expect(runs).toHaveLength(1); + expect(runs[0]).toMatchObject({ + organizationId: org.id, + reason: "scheduled", + status: "queued", + totalItems: 1, + }); + + const items = await itemsForRun(runs[0].id); + expect(items.map((item) => item.websiteId)).toEqual([included.id]); + + const jobs = await queueJobsForOrg(org.id); + expect(jobs).toHaveLength(1); + expect(jobs[0]?.name).toBe("insights-generate-website"); + expect(jobs[0]?.data.websiteId).toBe(included.id); + expect(jobs[0]?.data.runId).toBe(runs[0].id); + + const [config] = await db() + .select({ + lastRunAt: insightGenerationConfigs.lastRunAt, + nextRunAt: insightGenerationConfigs.nextRunAt, + }) + .from(insightGenerationConfigs) + .where( + and( + eq(insightGenerationConfigs.organizationId, org.id), + isNull(insightGenerationConfigs.websiteId) + ) + ) + .limit(1); + + expect(config?.lastRunAt?.getTime()).toBe(now.getTime()); + expect(config?.nextRunAt && config.nextRunAt.getTime() > now.getTime()).toBe( + true + ); + }); + + it("dispatches due website configs independently", async () => { + const org = await insertOrganization(); + organizationIds.add(org.id); + const website = await insertWebsite({ + organizationId: org.id, + domain: "website-scope.example.com", + }); + const now = new Date(); + + await db().insert(insightGenerationConfigs).values({ + id: randomUUIDv7(), + organizationId: org.id, + websiteId: website.id, + enabled: true, + frequency: "hourly", + nextRunAt: new Date(now.getTime() - 1000), + }); + + const result = await dispatchDueInsightRuns(now); + + expect(result).toMatchObject({ + scannedConfigs: 1, + claimedConfigs: 1, + dispatchedRuns: 1, + queuedItems: 1, + skippedConfigs: 0, + }); + + const runs = await runsForOrg(org.id); + const items = await itemsForRun(runs[0].id); + const jobs = await queueJobsForOrg(org.id); + + expect(items.map((item) => item.websiteId)).toEqual([website.id]); + expect(jobs).toHaveLength(1); + expect(jobs[0]?.data.websiteId).toBe(website.id); + }); + + async function runsForOrg(organizationId: string) { + return await appDb + .select() + .from(insightRuns) + .where(eq(insightRuns.organizationId, organizationId)) + .orderBy(asc(insightRuns.createdAt)); + } + + async function itemsForRun(runId: string) { + return await appDb + .select() + .from(insightRunItems) + .where(eq(insightRunItems.runId, runId)) + .orderBy(asc(insightRunItems.websiteId)); + } + + async function queueJobsForOrg(organizationId: string) { + const jobs = await getInsightsQueue().getJobs( + ["waiting", "delayed", "prioritized", "paused", "completed", "failed"], + 0, + -1 + ); + return jobs + .filter((job) => { + const data = job.data as Partial; + return data.organizationId === organizationId; + }) + .sort((a, b) => + String(a.data.websiteId ?? "").localeCompare( + String(b.data.websiteId ?? "") + ) + ); + } + + async function cleanupQueueJobs(): Promise { + if (organizationIds.size === 0) { + return; + } + const jobs = await getInsightsQueue().getJobs( + ["waiting", "delayed", "prioritized", "paused", "completed", "failed"], + 0, + -1 + ); + await Promise.allSettled( + jobs + .filter((job) => { + const data = job.data as Partial; + return ( + typeof data.organizationId === "string" && + organizationIds.has(data.organizationId) + ); + }) + .map((job) => job.remove()) + ); + } +}); diff --git a/apps/insights/src/scheduler.ts b/apps/insights/src/scheduler.ts new file mode 100644 index 000000000..1b894b431 --- /dev/null +++ b/apps/insights/src/scheduler.ts @@ -0,0 +1,235 @@ +import { and, asc, db, eq, isNotNull, isNull, lte } from "@databuddy/db"; +import { + insightGenerationConfigs, + websites, + type InsightGenerationFrequency, +} from "@databuddy/db/schema"; +import { queueInsightGenerationRun } from "@databuddy/rpc/insight-generation"; +import { getNextInsightRunAt } from "@databuddy/rpc/insight-schedule"; +import { + getInsightsQueue, + INSIGHTS_DISPATCH_JOB_NAME, + type InsightGenerationReason, +} from "@databuddy/redis"; +import { log } from "evlog"; + +const DEFAULT_DISPATCH_INTERVAL_MS = 5 * 60 * 1000; +const MIN_DISPATCH_INTERVAL_MS = 60 * 1000; +const MAX_DUE_CONFIGS_PER_TICK = 100; +const FAILED_DISPATCH_RETRY_MS = 60 * 1000; + +type DueConfig = typeof insightGenerationConfigs.$inferSelect; + +export interface DispatchDueInsightRunsResult { + claimedConfigs: number; + dispatchedRuns: number; + queuedItems: number; + scannedConfigs: number; + skippedConfigs: number; +} + +function errorMessage(error: unknown): string { + return error instanceof Error ? error.message : String(error); +} + +function dispatchIntervalMs(): number { + const raw = process.env.INSIGHTS_DISPATCH_INTERVAL_MS; + if (!raw) { + return DEFAULT_DISPATCH_INTERVAL_MS; + } + const parsed = Number.parseInt(raw, 10); + if (!Number.isSafeInteger(parsed) || parsed < MIN_DISPATCH_INTERVAL_MS) { + return DEFAULT_DISPATCH_INTERVAL_MS; + } + return parsed; +} + +function nextRunAtFor(config: DueConfig, from: Date): Date | null { + return getNextInsightRunAt( + { + cron: config.cron, + enabled: config.enabled, + frequency: config.frequency as InsightGenerationFrequency, + }, + from + ); +} + +async function dueConfigs(now: Date): Promise { + return await db + .select() + .from(insightGenerationConfigs) + .where( + and( + eq(insightGenerationConfigs.enabled, true), + lte(insightGenerationConfigs.nextRunAt, now) + ) + ) + .orderBy(asc(insightGenerationConfigs.nextRunAt)) + .limit(MAX_DUE_CONFIGS_PER_TICK); +} + +async function claimConfig( + config: DueConfig, + now: Date +): Promise { + const [claimed] = await db + .update(insightGenerationConfigs) + .set({ + nextRunAt: nextRunAtFor(config, now), + updatedAt: now, + }) + .where( + and( + eq(insightGenerationConfigs.id, config.id), + eq(insightGenerationConfigs.enabled, true), + lte(insightGenerationConfigs.nextRunAt, now) + ) + ) + .returning(); + + return claimed ?? null; +} + +async function markConfigDispatched( + configId: string, + now: Date +): Promise { + await db + .update(insightGenerationConfigs) + .set({ lastRunAt: now, updatedAt: now }) + .where(eq(insightGenerationConfigs.id, configId)); +} + +async function retryConfigSoon(configId: string, now: Date): Promise { + await db + .update(insightGenerationConfigs) + .set({ + nextRunAt: new Date(now.getTime() + FAILED_DISPATCH_RETRY_MS), + updatedAt: now, + }) + .where(eq(insightGenerationConfigs.id, configId)); +} + +async function websiteIdsWithOverrides( + organizationId: string +): Promise> { + const rows = await db + .select({ websiteId: insightGenerationConfigs.websiteId }) + .from(insightGenerationConfigs) + .where( + and( + eq(insightGenerationConfigs.organizationId, organizationId), + isNotNull(insightGenerationConfigs.websiteId) + ) + ); + + const ids = new Set(); + for (const row of rows) { + if (row.websiteId) { + ids.add(row.websiteId); + } + } + return ids; +} + +async function orgScheduledWebsiteIds( + organizationId: string +): Promise { + const overrideIds = await websiteIdsWithOverrides(organizationId); + const rows = await db + .select({ id: websites.id }) + .from(websites) + .where( + and( + eq(websites.organizationId, organizationId), + isNull(websites.deletedAt) + ) + ) + .orderBy(asc(websites.createdAt)); + + return rows + .map((row) => row.id) + .filter((websiteId) => !overrideIds.has(websiteId)); +} + +async function targetWebsiteIds(config: DueConfig): Promise { + if (config.websiteId) { + return [config.websiteId]; + } + return await orgScheduledWebsiteIds(config.organizationId); +} + +export async function ensureInsightsDispatchSchedule(): Promise { + const intervalMs = dispatchIntervalMs(); + await getInsightsQueue().upsertJobScheduler( + INSIGHTS_DISPATCH_JOB_NAME, + { every: intervalMs }, + { + name: INSIGHTS_DISPATCH_JOB_NAME, + data: { + reason: "scheduled", + triggeredAt: new Date().toISOString(), + }, + } + ); + + log.info({ + service: "insights", + message: "Insights dispatch scheduler ensured", + interval_ms: intervalMs, + }); +} + +export async function dispatchDueInsightRuns( + now = new Date() +): Promise { + const configs = await dueConfigs(now); + const result: DispatchDueInsightRunsResult = { + scannedConfigs: configs.length, + claimedConfigs: 0, + dispatchedRuns: 0, + queuedItems: 0, + skippedConfigs: 0, + }; + + for (const config of configs) { + const claimed = await claimConfig(config, now); + if (!claimed) { + result.skippedConfigs += 1; + continue; + } + result.claimedConfigs += 1; + + try { + const websiteIds = await targetWebsiteIds(claimed); + if (websiteIds.length === 0) { + await markConfigDispatched(claimed.id, now); + result.skippedConfigs += 1; + continue; + } + + const queued = await queueInsightGenerationRun({ + organizationId: claimed.organizationId, + reason: "scheduled" satisfies InsightGenerationReason, + websiteIds, + }); + await markConfigDispatched(claimed.id, now); + result.dispatchedRuns += 1; + result.queuedItems += queued.queuedItems; + } catch (error) { + await retryConfigSoon(claimed.id, now); + result.skippedConfigs += 1; + log.error({ + service: "insights", + message: "Failed to dispatch scheduled insight run", + config_id: claimed.id, + organization_id: claimed.organizationId, + website_id: claimed.websiteId, + error_message: errorMessage(error), + }); + } + } + + return result; +} diff --git a/apps/insights/src/worker.ts b/apps/insights/src/worker.ts new file mode 100644 index 000000000..01d025b2e --- /dev/null +++ b/apps/insights/src/worker.ts @@ -0,0 +1,68 @@ +import { + getBullMQWorkerConnectionOptions, + INSIGHTS_JOB_TIMEOUT_MS, + INSIGHTS_QUEUE_ENV_PREFIX, + INSIGHTS_QUEUE_NAME, + type InsightsQueueJobData, +} from "@databuddy/redis"; +import { Worker } from "bullmq"; +import { log } from "evlog"; +import { processInsightsJob } from "./jobs"; + +const DEFAULT_INSIGHTS_WORKER_CONCURRENCY = 5; + +export function getInsightsWorkerConcurrency( + value = process.env.INSIGHTS_WORKER_CONCURRENCY +): number { + if (value === undefined || value.trim() === "") { + return DEFAULT_INSIGHTS_WORKER_CONCURRENCY; + } + + const parsed = Number.parseInt(value, 10); + if (!Number.isSafeInteger(parsed) || parsed <= 0) { + return DEFAULT_INSIGHTS_WORKER_CONCURRENCY; + } + + return parsed; +} + +export function startInsightsWorker() { + const worker = new Worker( + INSIGHTS_QUEUE_NAME, + async (job) => await processInsightsJob(job), + { + connection: getBullMQWorkerConnectionOptions({ + envPrefix: INSIGHTS_QUEUE_ENV_PREFIX, + }), + concurrency: getInsightsWorkerConcurrency(), + lockDuration: INSIGHTS_JOB_TIMEOUT_MS * 2, + stalledInterval: INSIGHTS_JOB_TIMEOUT_MS * 3, + } + ); + + worker.on("failed", (job, error) => { + log.error({ + insights_worker: "job_failed", + error_message: error.message, + job_id: job?.id, + job_name: job?.name, + attempts_made: job?.attemptsMade ?? 0, + }); + }); + + worker.on("stalled", (jobId) => { + log.error({ + insights_worker: "job_stalled", + job_id: jobId, + }); + }); + + worker.on("error", (error) => { + log.error({ + insights_worker: "worker_error", + error_message: error.message, + }); + }); + + return worker; +} diff --git a/apps/insights/tsconfig.json b/apps/insights/tsconfig.json new file mode 100644 index 000000000..5bdc61c25 --- /dev/null +++ b/apps/insights/tsconfig.json @@ -0,0 +1,12 @@ +{ + "extends": "../../tsconfig/default.json", + "include": ["src/**/*"], + "exclude": [ + "node_modules", + "dist", + "**/*.test.ts", + "**/*.test.tsx", + "**/*.spec.ts", + "**/*.spec.tsx" + ] +} diff --git a/bun.lock b/bun.lock index ed0290a31..8077b6123 100644 --- a/bun.lock +++ b/bun.lock @@ -326,6 +326,24 @@ "typescript": "^5.9.3", }, }, + "apps/insights": { + "name": "@databuddy/insights", + "version": "1.0.0", + "dependencies": { + "@databuddy/ai": "workspace:*", + "@databuddy/db": "workspace:*", + "@databuddy/redis": "workspace:*", + "@databuddy/rpc": "workspace:*", + "ai": "^6.0.154", + "bullmq": "^5.66.5", + "dayjs": "^1.11.19", + "elysia": "catalog:", + "evlog": "catalog:", + }, + "devDependencies": { + "@databuddy/test": "workspace:*", + }, + }, "apps/links": { "name": "@databuddy/links", "version": "1.0.0", @@ -1074,6 +1092,8 @@ "@databuddy/evals": ["@databuddy/evals@workspace:packages/evals"], + "@databuddy/insights": ["@databuddy/insights@workspace:apps/insights"], + "@databuddy/links": ["@databuddy/links@workspace:apps/links"], "@databuddy/mapper": ["@databuddy/mapper@workspace:packages/mapper"], diff --git a/package.json b/package.json index 19dc00831..381ccef33 100644 --- a/package.json +++ b/package.json @@ -54,6 +54,7 @@ "email:dev": "dotenv -- sh -c 'cd packages/email && bun run dev'", "sdk:build": "turbo run build --filter @databuddy/sdk --filter @databuddy/cache", "dev:dashboard": "dotenv -- turbo run dev --filter @databuddy/dashboard --filter @databuddy/api", + "dev:insights": "dotenv -- turbo run dev --filter @databuddy/insights", "dev:slack": "dotenv -- turbo dev --filter @databuddy/slack", "slack:manifest:validate": "bun run --cwd apps/slack manifest:validate", "slack:manifest:update": "bun run --cwd apps/slack manifest:update", diff --git a/packages/ai/package.json b/packages/ai/package.json index d668873b9..0f16dd1d0 100644 --- a/packages/ai/package.json +++ b/packages/ai/package.json @@ -23,6 +23,7 @@ "./insights/normalize": "./src/ai/insights/normalize.ts", "./insights/types": "./src/ai/insights/types.ts", "./insights/validate": "./src/ai/insights/validate.ts", + "./lib/ai-logger": "./src/lib/ai-logger.ts", "./lib/supermemory": "./src/lib/supermemory.ts", "./lib/request-logger": "./src/lib/request-logger.ts", "./mcp/http": "./src/mcp/http.ts", diff --git a/packages/ai/src/ai/schemas/smart-insights-output.ts b/packages/ai/src/ai/schemas/smart-insights-output.ts index bca965f70..484853d0b 100644 --- a/packages/ai/src/ai/schemas/smart-insights-output.ts +++ b/packages/ai/src/ai/schemas/smart-insights-output.ts @@ -127,9 +127,9 @@ export const insightSchema = z.object({ export const insightsOutputSchema = z.object({ insights: z .array(insightSchema) - .max(3) + .max(10) .describe( - "1-3 insights ranked by actionability × business impact. When the week is mostly positive, at least one insight MUST still call out a material risk or watch (e.g. session duration down, bounce up, single-channel dependency, volatile referrer, error count up in absolute terms) if those signals appear in the data—do not only celebrate wins. Skip repeating a narrative already listed under recently reported insights unless the change is materially new." + "Insight cards ranked by actionability × business impact. Default runs usually request 1-3 cards, but configured deep runs may request more. When the period is mostly positive, at least one insight MUST still call out a material risk or watch (e.g. session duration down, bounce up, single-channel dependency, volatile referrer, error count up in absolute terms) if those signals appear in the data—do not only celebrate wins. Skip repeating a narrative already listed under recently reported insights unless the change is materially new." ), }); diff --git a/packages/db/drizzle.config.ts b/packages/db/drizzle.config.ts index 2bbaf4880..2cc07763d 100644 --- a/packages/db/drizzle.config.ts +++ b/packages/db/drizzle.config.ts @@ -11,6 +11,7 @@ export default defineConfig({ "./src/drizzle/schema/billing.ts", "./src/drizzle/schema/feedback.ts", "./src/drizzle/schema/flags.ts", + "./src/drizzle/schema/insights.ts", "./src/drizzle/schema/integrations.ts", "./src/drizzle/schema/links.ts", "./src/drizzle/schema/tracker.ts", diff --git a/packages/db/src/drizzle/schema/analytics.ts b/packages/db/src/drizzle/schema/analytics.ts index a8efcf02c..7e90d9afe 100644 --- a/packages/db/src/drizzle/schema/analytics.ts +++ b/packages/db/src/drizzle/schema/analytics.ts @@ -1,3 +1,4 @@ +import { isNotNull } from "drizzle-orm"; import { boolean, doublePrecision, @@ -205,6 +206,7 @@ export const analyticsInsights = pgTable( type: text().notNull(), priority: integer().notNull(), changePercent: doublePrecision("change_percent"), + dedupeKey: text("dedupe_key"), subjectKey: text("subject_key").notNull().default(""), sources: jsonb().$type().notNull().default([]), confidence: doublePrecision().notNull().default(0), @@ -234,6 +236,9 @@ export const analyticsInsights = pgTable( table.subjectKey, table.createdAt.desc() ), + uniqueIndex("analytics_insights_org_dedupe_key_uidx") + .on(table.organizationId, table.dedupeKey) + .where(isNotNull(table.dedupeKey)), foreignKey({ columns: [table.organizationId], foreignColumns: [organization.id], diff --git a/packages/db/src/drizzle/schema/index.ts b/packages/db/src/drizzle/schema/index.ts index 7b6c3142b..7aca5a194 100644 --- a/packages/db/src/drizzle/schema/index.ts +++ b/packages/db/src/drizzle/schema/index.ts @@ -6,6 +6,7 @@ export * from "./billing"; export * from "./feedback"; export * from "./flags"; export * from "./integrations"; +export * from "./insights"; export * from "./links"; export * from "./uptime"; export * from "./tracker"; diff --git a/packages/db/src/drizzle/schema/insights.ts b/packages/db/src/drizzle/schema/insights.ts new file mode 100644 index 000000000..864233b0a --- /dev/null +++ b/packages/db/src/drizzle/schema/insights.ts @@ -0,0 +1,296 @@ +import { isNotNull, isNull } from "drizzle-orm"; +import { + boolean, + foreignKey, + index, + integer, + jsonb, + pgTable, + text, + timestamp, + uniqueIndex, +} from "drizzle-orm/pg-core"; +import { organization, user } from "./auth"; +import { websites } from "./websites"; + +export const INSIGHT_GENERATION_DEFAULT_TOOLS = [ + "web_metrics", + "product_metrics", + "ops_context", +] as const; + +export type InsightGenerationTool = + | "web_metrics" + | "product_metrics" + | "ops_context" + | "business_context"; +export type InsightGenerationDepth = "light" | "standard" | "deep"; +export type InsightGenerationFrequency = + | "hourly" + | "daily" + | "weekly" + | "custom"; +export type InsightGenerationModelTier = "fast" | "balanced" | "deep"; +export type InsightGenerationReason = + | "manual" + | "scheduled" + | "cooldown_refresh"; +export type InsightRollupRange = "7d" | "30d" | "90d"; +export type InsightRunStatus = + | "queued" + | "running" + | "succeeded" + | "partially_succeeded" + | "failed" + | "skipped"; +export type InsightRunItemStatus = + | "queued" + | "running" + | "succeeded" + | "failed" + | "skipped"; + +export interface InsightGenerationConfigSnapshot { + allowedTools: InsightGenerationTool[]; + cooldownHours: number; + depth: InsightGenerationDepth; + lookbackDays: number; + maxInsightsPerWebsite: number; + maxSteps: number; + maxToolCalls: number; + modelTier: InsightGenerationModelTier; + timezone: string; +} + +export const insightGenerationConfigs = pgTable( + "insight_generation_configs", + { + id: text().primaryKey(), + organizationId: text("organization_id").notNull(), + websiteId: text("website_id"), + enabled: boolean().default(true).notNull(), + frequency: text() + .$type() + .default("weekly") + .notNull(), + cron: text(), + depth: text().$type().default("standard").notNull(), + maxSteps: integer("max_steps").default(24).notNull(), + maxToolCalls: integer("max_tool_calls").default(16).notNull(), + maxInsightsPerWebsite: integer("max_insights_per_website") + .default(3) + .notNull(), + cooldownHours: integer("cooldown_hours").default(6).notNull(), + lookbackDays: integer("lookback_days").default(7).notNull(), + timezone: text().default("UTC").notNull(), + modelTier: text("model_tier") + .$type() + .default("balanced") + .notNull(), + allowedTools: jsonb("allowed_tools") + .$type() + .default([...INSIGHT_GENERATION_DEFAULT_TOOLS]) + .notNull(), + nextRunAt: timestamp("next_run_at", { + precision: 3, + withTimezone: true, + }), + lastRunAt: timestamp("last_run_at", { + precision: 3, + withTimezone: true, + }), + createdAt: timestamp("created_at", { precision: 3, withTimezone: true }) + .defaultNow() + .notNull(), + updatedAt: timestamp("updated_at", { precision: 3, withTimezone: true }) + .defaultNow() + .notNull() + .$onUpdate(() => new Date()), + }, + (table) => [ + uniqueIndex("insight_generation_configs_org_default_uidx") + .on(table.organizationId) + .where(isNull(table.websiteId)), + uniqueIndex("insight_generation_configs_org_website_uidx") + .on(table.organizationId, table.websiteId) + .where(isNotNull(table.websiteId)), + index("insight_generation_configs_org_next_run_idx").on( + table.organizationId, + table.nextRunAt + ), + index("insight_generation_configs_website_idx").on(table.websiteId), + foreignKey({ + columns: [table.organizationId], + foreignColumns: [organization.id], + name: "insight_generation_configs_organization_id_fkey", + }).onDelete("cascade"), + foreignKey({ + columns: [table.websiteId], + foreignColumns: [websites.id], + name: "insight_generation_configs_website_id_fkey", + }).onDelete("cascade"), + ] +); + +export const insightRuns = pgTable( + "insight_runs", + { + id: text().primaryKey(), + organizationId: text("organization_id").notNull(), + requestedByUserId: text("requested_by_user_id"), + reason: text().$type().default("manual").notNull(), + status: text().$type().default("queued").notNull(), + timezone: text().default("UTC").notNull(), + totalItems: integer("total_items").default(0).notNull(), + completedItems: integer("completed_items").default(0).notNull(), + failedItems: integer("failed_items").default(0).notNull(), + skippedItems: integer("skipped_items").default(0).notNull(), + errorMessage: text("error_message"), + startedAt: timestamp("started_at", { + precision: 3, + withTimezone: true, + }), + finishedAt: timestamp("finished_at", { + precision: 3, + withTimezone: true, + }), + createdAt: timestamp("created_at", { precision: 3, withTimezone: true }) + .defaultNow() + .notNull(), + updatedAt: timestamp("updated_at", { precision: 3, withTimezone: true }) + .defaultNow() + .notNull() + .$onUpdate(() => new Date()), + }, + (table) => [ + index("insight_runs_org_created_idx").on( + table.organizationId, + table.createdAt.desc() + ), + index("insight_runs_status_idx").on(table.status), + foreignKey({ + columns: [table.organizationId], + foreignColumns: [organization.id], + name: "insight_runs_organization_id_fkey", + }).onDelete("cascade"), + foreignKey({ + columns: [table.requestedByUserId], + foreignColumns: [user.id], + name: "insight_runs_requested_by_user_id_fkey", + }).onDelete("set null"), + ] +); + +export const insightRunItems = pgTable( + "insight_run_items", + { + id: text().primaryKey(), + runId: text("run_id").notNull(), + organizationId: text("organization_id").notNull(), + websiteId: text("website_id").notNull(), + queueJobId: text("queue_job_id"), + status: text().$type().default("queued").notNull(), + attempts: integer().default(0).notNull(), + configSnapshot: jsonb("config_snapshot") + .$type() + .notNull(), + resultCount: integer("result_count").default(0).notNull(), + errorMessage: text("error_message"), + startedAt: timestamp("started_at", { + precision: 3, + withTimezone: true, + }), + finishedAt: timestamp("finished_at", { + precision: 3, + withTimezone: true, + }), + createdAt: timestamp("created_at", { precision: 3, withTimezone: true }) + .defaultNow() + .notNull(), + updatedAt: timestamp("updated_at", { precision: 3, withTimezone: true }) + .defaultNow() + .notNull() + .$onUpdate(() => new Date()), + }, + (table) => [ + uniqueIndex("insight_run_items_run_website_uidx").on( + table.runId, + table.websiteId + ), + index("insight_run_items_run_status_idx").on(table.runId, table.status), + index("insight_run_items_org_website_idx").on( + table.organizationId, + table.websiteId + ), + foreignKey({ + columns: [table.runId], + foreignColumns: [insightRuns.id], + name: "insight_run_items_run_id_fkey", + }).onDelete("cascade"), + foreignKey({ + columns: [table.organizationId], + foreignColumns: [organization.id], + name: "insight_run_items_organization_id_fkey", + }).onDelete("cascade"), + foreignKey({ + columns: [table.websiteId], + foreignColumns: [websites.id], + name: "insight_run_items_website_id_fkey", + }).onDelete("cascade"), + ] +); + +export const insightRollups = pgTable( + "insight_rollups", + { + id: text().primaryKey(), + organizationId: text("organization_id").notNull(), + runId: text("run_id"), + range: text().$type().notNull(), + narrative: text().notNull(), + generatedAt: timestamp("generated_at", { + precision: 3, + withTimezone: true, + }) + .defaultNow() + .notNull(), + createdAt: timestamp("created_at", { precision: 3, withTimezone: true }) + .defaultNow() + .notNull(), + updatedAt: timestamp("updated_at", { precision: 3, withTimezone: true }) + .defaultNow() + .notNull() + .$onUpdate(() => new Date()), + }, + (table) => [ + uniqueIndex("insight_rollups_org_range_uidx").on( + table.organizationId, + table.range + ), + index("insight_rollups_org_generated_idx").on( + table.organizationId, + table.generatedAt.desc() + ), + foreignKey({ + columns: [table.organizationId], + foreignColumns: [organization.id], + name: "insight_rollups_organization_id_fkey", + }).onDelete("cascade"), + foreignKey({ + columns: [table.runId], + foreignColumns: [insightRuns.id], + name: "insight_rollups_run_id_fkey", + }).onDelete("set null"), + ] +); + +export type InsightGenerationConfig = + typeof insightGenerationConfigs.$inferSelect; +export type InsightGenerationConfigInsert = + typeof insightGenerationConfigs.$inferInsert; +export type InsightRun = typeof insightRuns.$inferSelect; +export type InsightRunInsert = typeof insightRuns.$inferInsert; +export type InsightRunItem = typeof insightRunItems.$inferSelect; +export type InsightRunItemInsert = typeof insightRunItems.$inferInsert; +export type InsightRollup = typeof insightRollups.$inferSelect; +export type InsightRollupInsert = typeof insightRollups.$inferInsert; diff --git a/packages/db/src/drizzle/schema/relations.ts b/packages/db/src/drizzle/schema/relations.ts index e6904d84e..fa431f23d 100644 --- a/packages/db/src/drizzle/schema/relations.ts +++ b/packages/db/src/drizzle/schema/relations.ts @@ -24,6 +24,12 @@ import { alarmDestinations, alarms, usageAlertLog } from "./billing"; import { feedback, feedbackRedemptions, insightUserFeedback } from "./feedback"; import { flags, flagsToTargetGroups, targetGroups } from "./flags"; import { slackChannelBindings, slackIntegrations } from "./integrations"; +import { + insightGenerationConfigs, + insightRunItems, + insightRollups, + insightRuns, +} from "./insights"; import { linkFolders, links } from "./links"; import { incidentAffectedMonitors, @@ -69,6 +75,10 @@ const schema = { feedback, feedbackRedemptions, insightUserFeedback, + insightGenerationConfigs, + insightRuns, + insightRunItems, + insightRollups, ssoProvider, agentChats, slackIntegrations, @@ -111,6 +121,9 @@ export const relations = defineRelations(schema, (r) => ({ linkFolders: r.many.linkFolders(), links: r.many.links(), slackIntegrations: r.many.slackIntegrations(), + insightGenerationConfigs: r.many.insightGenerationConfigs(), + insightRuns: r.many.insightRuns(), + insightRollups: r.many.insightRollups(), }, account: { @@ -180,6 +193,8 @@ export const relations = defineRelations(schema, (r) => ({ funnelDefinitions: r.many.funnelDefinitions(), alarms: r.many.alarms(), analyticsInsights: r.many.analyticsInsights(), + insightGenerationConfigs: r.many.insightGenerationConfigs(), + insightRunItems: r.many.insightRunItems(), }, analyticsInsights: { @@ -195,6 +210,62 @@ export const relations = defineRelations(schema, (r) => ({ }), }, + insightGenerationConfigs: { + organization: r.one.organization({ + from: r.insightGenerationConfigs.organizationId, + to: r.organization.id, + optional: false, + }), + website: r.one.websites({ + from: r.insightGenerationConfigs.websiteId, + to: r.websites.id, + }), + }, + + insightRuns: { + organization: r.one.organization({ + from: r.insightRuns.organizationId, + to: r.organization.id, + optional: false, + }), + requestedByUser: r.one.user({ + from: r.insightRuns.requestedByUserId, + to: r.user.id, + }), + items: r.many.insightRunItems(), + rollups: r.many.insightRollups(), + }, + + insightRunItems: { + run: r.one.insightRuns({ + from: r.insightRunItems.runId, + to: r.insightRuns.id, + optional: false, + }), + organization: r.one.organization({ + from: r.insightRunItems.organizationId, + to: r.organization.id, + optional: false, + }), + website: r.one.websites({ + from: r.insightRunItems.websiteId, + to: r.websites.id, + optional: false, + }), + }, + + insightRollups: { + organization: r.one.organization({ + from: r.insightRollups.organizationId, + to: r.organization.id, + optional: false, + }), + run: r.one.insightRuns({ + from: r.insightRollups.runId, + to: r.insightRuns.id, + }), + }, + funnelDefinitions: { website: r.one.websites({ from: r.funnelDefinitions.websiteId, diff --git a/packages/redis/bullmq.test.ts b/packages/redis/bullmq.test.ts index 147f1f0f6..ef46173ea 100644 --- a/packages/redis/bullmq.test.ts +++ b/packages/redis/bullmq.test.ts @@ -5,9 +5,11 @@ import { } from "./bullmq"; const ORIGINAL_URL = process.env.BULLMQ_REDIS_URL; +const ORIGINAL_INSIGHTS_URL = process.env.INSIGHTS_BULLMQ_REDIS_URL; afterEach(() => { process.env.BULLMQ_REDIS_URL = ORIGINAL_URL; + process.env.INSIGHTS_BULLMQ_REDIS_URL = ORIGINAL_INSIGHTS_URL; }); describe("BullMQ connection options", () => { @@ -74,4 +76,35 @@ describe("BullMQ connection options", () => { maxRetriesPerRequest: null, }); }); + + it("prefers a queue-specific Redis URL when an env prefix is provided", () => { + process.env.BULLMQ_REDIS_URL = "redis://default.test:6379/0"; + process.env.INSIGHTS_BULLMQ_REDIS_URL = + "redis://insights:secret@insights.test:6380/5"; + + expect( + getBullMQConnectionOptions({ envPrefix: "INSIGHTS" }) + ).toEqual({ + host: "insights.test", + port: 6380, + username: "insights", + password: "secret", + db: 5, + maxRetriesPerRequest: 1, + }); + }); + + it("falls back to the default Redis URL when a prefixed URL is blank", () => { + process.env.BULLMQ_REDIS_URL = "redis://default.test:6379/4"; + process.env.INSIGHTS_BULLMQ_REDIS_URL = ""; + + expect(getBullMQConnectionOptions({ envPrefix: "INSIGHTS" })).toEqual({ + host: "default.test", + port: 6379, + username: undefined, + password: undefined, + db: 4, + maxRetriesPerRequest: 1, + }); + }); }); diff --git a/packages/redis/bullmq.ts b/packages/redis/bullmq.ts index c7ac095bf..93ff86079 100644 --- a/packages/redis/bullmq.ts +++ b/packages/redis/bullmq.ts @@ -1,10 +1,28 @@ import type { RedisOptions } from "ioredis"; -function parseBullMQConnectionUrl(): RedisOptions { - const redisUrl = process.env.BULLMQ_REDIS_URL; +export interface BullMQConnectionConfig { + envPrefix?: string; +} + +function resolveBullMQRedisUrl(config: BullMQConnectionConfig = {}): string { + const prefixedName = config.envPrefix + ? `${config.envPrefix}_BULLMQ_REDIS_URL` + : null; + const prefixedUrl = prefixedName ? process.env[prefixedName]?.trim() : ""; + const fallbackUrl = process.env.BULLMQ_REDIS_URL?.trim(); + const redisUrl = prefixedUrl || fallbackUrl; if (!redisUrl) { - throw new Error("BULLMQ_REDIS_URL environment variable is required"); + throw new Error( + `${prefixedName ? `${prefixedName} or ` : ""}BULLMQ_REDIS_URL environment variable is required` + ); } + return redisUrl; +} + +function parseBullMQConnectionUrl( + config: BullMQConnectionConfig = {} +): RedisOptions { + const redisUrl = resolveBullMQRedisUrl(config); const url = new URL(redisUrl); @@ -18,16 +36,20 @@ function parseBullMQConnectionUrl(): RedisOptions { }; } -export function getBullMQConnectionOptions(): RedisOptions { +export function getBullMQConnectionOptions( + config: BullMQConnectionConfig = {} +): RedisOptions { return { - ...parseBullMQConnectionUrl(), + ...parseBullMQConnectionUrl(config), maxRetriesPerRequest: 1, }; } -export function getBullMQWorkerConnectionOptions(): RedisOptions { +export function getBullMQWorkerConnectionOptions( + config: BullMQConnectionConfig = {} +): RedisOptions { return { - ...parseBullMQConnectionUrl(), + ...parseBullMQConnectionUrl(config), maxRetriesPerRequest: null, }; } diff --git a/packages/redis/cache-invalidation.ts b/packages/redis/cache-invalidation.ts index 72ced7d39..067697b9c 100644 --- a/packages/redis/cache-invalidation.ts +++ b/packages/redis/cache-invalidation.ts @@ -104,6 +104,7 @@ const USER_PREFERENCES_CACHE_PREFIX = cacheNamespaces.userPreferences; const STATUS_PAGE_CACHE_PREFIX = cacheNamespaces.statusPage; const SLACK_INTEGRATION_CACHE_PREFIX = cacheNamespaces.slackIntegrationByTeam; const SLACK_CHANNEL_BINDING_CACHE_PREFIX = cacheNamespaces.slackChannelBinding; +const LEGACY_INSIGHTS_API_CACHE_PREFIX = "ai-insights"; export interface CacheInvalidationResult { attempted: number; @@ -537,6 +538,9 @@ export function invalidateInsightsCachesForOrganization( ): Promise { const organizationTag = cacheTags.organization(organizationId); return settleInvalidations([ + invalidateCacheablePattern( + `${LEGACY_INSIGHTS_API_CACHE_PREFIX}:${organizationId}:*` + ), invalidateCacheableTag(cacheNamespaces.insightsNarrative, organizationTag, { fallbackPattern: `cacheable:${cacheNamespaces.insightsNarrative}:*${organizationId}*`, }), diff --git a/packages/redis/index.ts b/packages/redis/index.ts index 4de889686..29d5d31c6 100644 --- a/packages/redis/index.ts +++ b/packages/redis/index.ts @@ -4,6 +4,7 @@ export * from "./cacheable"; export * from "./bullmq"; export * from "./click-dedup"; export * from "./drizzle-cache"; +export * from "./insights-queue"; export * from "./links-cache"; export * from "./rate-limit"; export * from "./redis"; diff --git a/packages/redis/insights-queue.ts b/packages/redis/insights-queue.ts new file mode 100644 index 000000000..194c587c5 --- /dev/null +++ b/packages/redis/insights-queue.ts @@ -0,0 +1,114 @@ +import { Queue } from "bullmq"; +import { getBullMQConnectionOptions } from "./bullmq"; + +export const INSIGHTS_QUEUE_ENV_PREFIX = "INSIGHTS"; +export const INSIGHTS_QUEUE_NAME = "insights-generation"; +export const INSIGHTS_DISPATCH_JOB_NAME = "insights-dispatch"; +export const INSIGHTS_GENERATE_WEBSITE_JOB_NAME = "insights-generate-website"; +export const INSIGHTS_ROLLUP_JOB_NAME = "insights-rollup"; + +export const INSIGHTS_JOB_TIMEOUT_MS = 120_000; + +export const INSIGHTS_JOB_OPTIONS = { + attempts: 2, + backoff: { + type: "exponential", + delay: 5000, + }, + removeOnComplete: { + age: 24 * 3600, + count: 1000, + }, + removeOnFail: { + age: 7 * 24 * 3600, + count: 5000, + }, +}; + +export const INSIGHT_GENERATION_TOOLS = [ + "web_metrics", + "product_metrics", + "ops_context", + "business_context", +] as const; + +export type InsightGenerationTool = (typeof INSIGHT_GENERATION_TOOLS)[number]; +export type InsightGenerationDepth = "light" | "standard" | "deep"; +export type InsightGenerationModelTier = "fast" | "balanced" | "deep"; +export type InsightGenerationReason = + | "manual" + | "scheduled" + | "cooldown_refresh"; + +export interface InsightGenerationConfigSnapshot { + allowedTools: InsightGenerationTool[]; + cooldownHours: number; + depth: InsightGenerationDepth; + lookbackDays: number; + maxInsightsPerWebsite: number; + maxSteps: number; + maxToolCalls: number; + modelTier: InsightGenerationModelTier; + timezone: string; +} + +export interface InsightsDispatchJobData { + reason: "scheduled"; + triggeredAt: string; +} + +export interface InsightsGenerateWebsiteJobData { + config: InsightGenerationConfigSnapshot; + itemId: string; + organizationId: string; + reason: InsightGenerationReason; + requestedByUserId?: string | null; + runId: string; + websiteId: string; +} + +export interface InsightsRollupJobData { + organizationId: string; + reason: InsightGenerationReason; + runId: string; + timezone: string; +} + +export type InsightsQueueJobData = + | InsightsDispatchJobData + | InsightsGenerateWebsiteJobData + | InsightsRollupJobData; + +let insightsQueue: Queue | null = null; + +export function getInsightsQueue(): Queue { + insightsQueue ??= new Queue(INSIGHTS_QUEUE_NAME, { + connection: getBullMQConnectionOptions({ + envPrefix: INSIGHTS_QUEUE_ENV_PREFIX, + }), + defaultJobOptions: INSIGHTS_JOB_OPTIONS, + }); + + return insightsQueue; +} + +export async function closeInsightsQueue(): Promise { + if (!insightsQueue) { + return; + } + const queue = insightsQueue; + insightsQueue = null; + await queue.close(); +} + +export function insightsWebsiteJobId(runId: string, websiteId: string): string { + return `insights-website-${runId}-${websiteId}`; +} + +export function insightsRollupJobId(runId: string): string { + return `insights-rollup-${runId}`; +} + +export function insightsDispatchJobId(triggeredAt: string): string { + return `insights-dispatch-${triggeredAt.slice(0, 16)}`; +} diff --git a/packages/redis/package.json b/packages/redis/package.json index 9df2d53bf..4d120f2b2 100644 --- a/packages/redis/package.json +++ b/packages/redis/package.json @@ -13,6 +13,7 @@ "./redis": "./redis.ts", "./stream-buffer": "./stream-buffer.ts", "./drizzle-cache": "./drizzle-cache.ts", + "./insights-queue": "./insights-queue.ts", "./cache-invalidation": "./cache-invalidation.ts", "./links-cache": "./links-cache.ts", "./click-dedup": "./click-dedup.ts", diff --git a/packages/rpc/package.json b/packages/rpc/package.json index 864c08bb8..53de54656 100644 --- a/packages/rpc/package.json +++ b/packages/rpc/package.json @@ -7,7 +7,7 @@ "types": "./src/index.ts", "scripts": { "check-types": "tsc --noEmit", - "test": "bun test src/routers src/services/uptime-lifecycle.test.ts src/services/uptime-scheduler.test.ts src/utils/*.test.ts", + "test": "bun test src/routers src/services/insight-schedule.test.ts src/services/uptime-lifecycle.test.ts src/services/uptime-scheduler.test.ts src/utils/*.test.ts", "test:integration": "bun test src/services/uptime-scheduler.integration.test.ts" }, "exports": { @@ -15,6 +15,8 @@ "./autumn": "./src/lib/autumn-client.ts", "./billing": "./src/utils/billing.ts", "./flags": "./src/utils/flags.ts", + "./insight-generation": "./src/routers/insight-generation.ts", + "./insight-schedule": "./src/services/insight-schedule.ts", "./log-context": "./src/lib/rpc-log-context.ts" }, "dependencies": { diff --git a/packages/rpc/src/index.ts b/packages/rpc/src/index.ts index ee29d813b..d91617f43 100644 --- a/packages/rpc/src/index.ts +++ b/packages/rpc/src/index.ts @@ -35,6 +35,12 @@ export { workspaceInputSchema, } from "./procedures/with-workspace"; export { type AppRouter, appRouter } from "./root"; +export { + queueInsightGenerationRun, + type QueueInsightGenerationRunInput, + type QueueInsightGenerationRunResult, +} from "./routers/insight-generation"; +export { getNextInsightRunAt } from "./services/insight-schedule"; export type { SlackIntegrationOutput } from "./routers/integrations"; export type { WebsiteOutput } from "./routers/websites"; export { diff --git a/packages/rpc/src/root.ts b/packages/rpc/src/root.ts index 34bff4255..cc2c47f89 100644 --- a/packages/rpc/src/root.ts +++ b/packages/rpc/src/root.ts @@ -9,6 +9,7 @@ import { feedbackRouter } from "./routers/feedback"; import { flagsRouter } from "./routers/flags"; import { funnelsRouter } from "./routers/funnels"; import { goalsRouter } from "./routers/goals"; +import { insightGenerationRouter } from "./routers/insight-generation"; import { insightsRouter } from "./routers/insights"; import { integrationsRouter } from "./routers/integrations"; import { linkFoldersRouter } from "./routers/link-folders"; @@ -36,6 +37,7 @@ export const appRouter = { integrations: integrationsRouter, feedback: feedbackRouter, flags: flagsRouter, + insightGeneration: insightGenerationRouter, insights: insightsRouter, targetGroups: targetGroupsRouter, organizations: organizationsRouter, diff --git a/packages/rpc/src/routers/insight-generation.ts b/packages/rpc/src/routers/insight-generation.ts new file mode 100644 index 000000000..5f2bf9b89 --- /dev/null +++ b/packages/rpc/src/routers/insight-generation.ts @@ -0,0 +1,635 @@ +import { and, db, desc, eq, inArray, isNull } from "@databuddy/db"; +import { + insightGenerationConfigs, + insightRunItems, + insightRuns, + type InsightGenerationConfig, + type InsightGenerationConfigSnapshot, + websites, +} from "@databuddy/db/schema"; +import { + getInsightsQueue, + INSIGHTS_GENERATE_WEBSITE_JOB_NAME, + insightsWebsiteJobId, + type InsightGenerationReason, +} from "@databuddy/redis"; +import { randomUUIDv7 } from "bun"; +import { z } from "zod"; +import { rpcError } from "../errors"; +import { type Context, protectedProcedure } from "../orpc"; +import { withWorkspace } from "../procedures/with-workspace"; +import { getNextInsightRunAt } from "../services/insight-schedule"; + +const generationToolSchema = z.enum([ + "web_metrics", + "product_metrics", + "ops_context", + "business_context", +]); +const depthSchema = z.enum(["light", "standard", "deep"]); +const frequencySchema = z.enum(["hourly", "daily", "weekly", "custom"]); +const modelTierSchema = z.enum(["fast", "balanced", "deep"]); +const reasonSchema = z.enum(["manual", "scheduled", "cooldown_refresh"]); + +const configPatchSchema = z.object({ + allowedTools: z.array(generationToolSchema).min(1).max(4).optional(), + cooldownHours: z.number().int().min(1).max(168).optional(), + cron: z.string().min(1).max(120).nullable().optional(), + depth: depthSchema.optional(), + enabled: z.boolean().optional(), + frequency: frequencySchema.optional(), + lookbackDays: z.number().int().min(1).max(90).optional(), + maxInsightsPerWebsite: z.number().int().min(1).max(10).optional(), + maxSteps: z.number().int().min(1).max(64).optional(), + maxToolCalls: z.number().int().min(1).max(64).optional(), + modelTier: modelTierSchema.optional(), + timezone: z.string().min(1).max(80).optional(), +}); + +const configOutputSchema = z.object({ + allowedTools: z.array(generationToolSchema), + cooldownHours: z.number(), + createdAt: z.union([z.date(), z.string()]).nullable(), + cron: z.string().nullable(), + depth: depthSchema, + enabled: z.boolean(), + frequency: frequencySchema, + id: z.string().nullable(), + lastRunAt: z.union([z.date(), z.string()]).nullable(), + lookbackDays: z.number(), + maxInsightsPerWebsite: z.number(), + maxSteps: z.number(), + maxToolCalls: z.number(), + modelTier: modelTierSchema, + nextRunAt: z.union([z.date(), z.string()]).nullable(), + organizationId: z.string(), + source: z.enum(["default", "organization", "website"]), + timezone: z.string(), + updatedAt: z.union([z.date(), z.string()]).nullable(), + websiteId: z.string().nullable(), +}); + +const runOutputSchema = z.object({ + completedItems: z.number(), + createdAt: z.union([z.date(), z.string()]), + errorMessage: z.string().nullable(), + failedItems: z.number(), + finishedAt: z.union([z.date(), z.string()]).nullable(), + id: z.string(), + organizationId: z.string(), + reason: reasonSchema, + requestedByUserId: z.string().nullable(), + skippedItems: z.number(), + startedAt: z.union([z.date(), z.string()]).nullable(), + status: z.enum([ + "queued", + "running", + "succeeded", + "partially_succeeded", + "failed", + "skipped", + ]), + timezone: z.string(), + totalItems: z.number(), + updatedAt: z.union([z.date(), z.string()]), +}); + +const runItemOutputSchema = z.object({ + attempts: z.number(), + configSnapshot: z.unknown(), + createdAt: z.union([z.date(), z.string()]), + errorMessage: z.string().nullable(), + finishedAt: z.union([z.date(), z.string()]).nullable(), + id: z.string(), + queueJobId: z.string().nullable(), + resultCount: z.number(), + runId: z.string(), + startedAt: z.union([z.date(), z.string()]).nullable(), + status: z.enum(["queued", "running", "succeeded", "failed", "skipped"]), + updatedAt: z.union([z.date(), z.string()]), + websiteId: z.string(), +}); + +const DEFAULT_CONFIG: Omit< + z.infer, + | "createdAt" + | "id" + | "lastRunAt" + | "nextRunAt" + | "organizationId" + | "source" + | "updatedAt" + | "websiteId" +> = { + allowedTools: ["web_metrics", "product_metrics", "ops_context"], + cooldownHours: 6, + cron: null, + depth: "standard", + enabled: true, + frequency: "weekly", + lookbackDays: 7, + maxInsightsPerWebsite: 3, + maxSteps: 24, + maxToolCalls: 16, + modelTier: "balanced", + timezone: "UTC", +}; + +type InsightGenerationConfigPatch = z.infer; +export interface QueueInsightGenerationRunInput + extends InsightGenerationConfigPatch { + force?: boolean; + organizationId: string; + reason?: z.infer; + requestedByUserId?: string | null; + websiteIds?: string[]; +} + +export interface QueueInsightGenerationRunResult { + queuedItems: number; + runId: string; + status: "queued" | "skipped"; +} + +function rowToConfig( + row: InsightGenerationConfig | null, + fallback: z.infer, + source: "default" | "organization" | "website" +): z.infer { + if (!row) { + return { ...fallback, source }; + } + + return { + allowedTools: row.allowedTools, + cooldownHours: row.cooldownHours, + createdAt: row.createdAt, + cron: row.cron, + depth: row.depth, + enabled: row.enabled, + frequency: row.frequency, + id: row.id, + lastRunAt: row.lastRunAt, + lookbackDays: row.lookbackDays, + maxInsightsPerWebsite: row.maxInsightsPerWebsite, + maxSteps: row.maxSteps, + maxToolCalls: row.maxToolCalls, + modelTier: row.modelTier, + nextRunAt: row.nextRunAt, + organizationId: row.organizationId, + source, + timezone: row.timezone, + updatedAt: row.updatedAt, + websiteId: row.websiteId, + }; +} + +function defaultConfig( + organizationId: string, + websiteId: string | null +): z.infer { + return { + ...DEFAULT_CONFIG, + createdAt: null, + id: null, + lastRunAt: null, + nextRunAt: null, + organizationId, + source: "default", + updatedAt: null, + websiteId, + }; +} + +function toSnapshot( + config: z.infer +): InsightGenerationConfigSnapshot { + return { + allowedTools: config.allowedTools, + cooldownHours: config.cooldownHours, + depth: config.depth, + lookbackDays: config.lookbackDays, + maxInsightsPerWebsite: config.maxInsightsPerWebsite, + maxSteps: config.maxSteps, + maxToolCalls: config.maxToolCalls, + modelTier: config.modelTier, + timezone: config.timezone, + }; +} + +function applyPatch( + config: z.infer, + patch: z.infer +): z.infer { + const next = { + ...config, + ...patch, + cron: patch.cron === undefined ? config.cron : patch.cron, + }; + if (next.frequency === "custom" && !next.cron) { + throw rpcError.badRequest("Custom frequency requires a cron expression"); + } + if (next.frequency !== "custom") { + next.cron = null; + } + return next; +} + +async function resolveScope( + context: Context, + input: { organizationId?: string | null; websiteId?: string | null }, + permission: "read" | "update" +): Promise<{ organizationId: string; websiteId: string | null }> { + if (input.websiteId) { + const workspace = await withWorkspace(context, { + websiteId: input.websiteId, + resource: "website", + permissions: [permission === "read" ? "view_analytics" : "update"], + }); + if ( + input.organizationId && + input.organizationId !== workspace.website.organizationId + ) { + throw rpcError.badRequest("Website does not belong to organization"); + } + return { + organizationId: workspace.website.organizationId, + websiteId: input.websiteId, + }; + } + + const organizationId = input.organizationId?.trim() || context.organizationId; + if (!organizationId) { + throw rpcError.badRequest("Organization ID is required"); + } + await withWorkspace(context, { + organizationId, + resource: "organization", + permissions: [permission], + }); + return { organizationId, websiteId: null }; +} + +async function findConfig( + organizationId: string, + websiteId: string | null +): Promise { + const rows = await db + .select() + .from(insightGenerationConfigs) + .where( + websiteId + ? and( + eq(insightGenerationConfigs.organizationId, organizationId), + eq(insightGenerationConfigs.websiteId, websiteId) + ) + : and( + eq(insightGenerationConfigs.organizationId, organizationId), + isNull(insightGenerationConfigs.websiteId) + ) + ) + .limit(1); + return rows[0] ?? null; +} + +async function getEffectiveConfig( + organizationId: string, + websiteId: string | null +): Promise> { + const fallback = defaultConfig(organizationId, websiteId); + const orgConfig = await findConfig(organizationId, null); + const orgEffective = rowToConfig( + orgConfig, + fallback, + orgConfig ? "organization" : "default" + ); + if (!websiteId) { + return orgEffective; + } + + const websiteConfig = await findConfig(organizationId, websiteId); + return rowToConfig( + websiteConfig, + orgEffective, + websiteConfig ? "website" : orgEffective.source + ); +} + +async function listTargetWebsites( + organizationId: string, + websiteIds: string[] | undefined +): Promise> { + const conditions = [ + eq(websites.organizationId, organizationId), + isNull(websites.deletedAt), + ]; + if (websiteIds?.length) { + conditions.push(inArray(websites.id, websiteIds)); + } + + const rows = await db + .select({ id: websites.id }) + .from(websites) + .where(and(...conditions)); + + if (websiteIds?.length && rows.length !== new Set(websiteIds).size) { + throw rpcError.badRequest( + "One or more websites are not in this organization" + ); + } + + return rows; +} + +export async function queueInsightGenerationRun( + input: QueueInsightGenerationRunInput +): Promise { + const targetWebsites = await listTargetWebsites( + input.organizationId, + input.websiteIds + ); + const runId = randomUUIDv7(); + const requestedByUserId = input.requestedByUserId ?? null; + const baseConfig = await getEffectiveConfig(input.organizationId, null); + const runConfig = applyPatch(baseConfig, input); + const now = new Date(); + const reason = input.reason ?? "manual"; + + await db.insert(insightRuns).values({ + id: runId, + organizationId: input.organizationId, + requestedByUserId, + reason, + status: targetWebsites.length === 0 ? "skipped" : "queued", + timezone: runConfig.timezone, + totalItems: targetWebsites.length, + ...(targetWebsites.length === 0 ? { finishedAt: now } : {}), + }); + + if (targetWebsites.length === 0) { + return { queuedItems: 0, runId, status: "skipped" }; + } + + const items = await Promise.all( + targetWebsites.map(async (website) => { + const websiteConfig = await getEffectiveConfig( + input.organizationId, + website.id + ); + const config = applyPatch(websiteConfig, input); + const itemId = randomUUIDv7(); + return { + config: toSnapshot(config), + itemId, + jobId: insightsWebsiteJobId(runId, website.id), + websiteId: website.id, + }; + }) + ); + + await db.insert(insightRunItems).values( + items.map((item) => ({ + id: item.itemId, + runId, + organizationId: input.organizationId, + websiteId: item.websiteId, + queueJobId: item.jobId, + configSnapshot: item.config, + })) + ); + + try { + const queue = getInsightsQueue(); + await Promise.all( + items.map((item) => + queue.add( + INSIGHTS_GENERATE_WEBSITE_JOB_NAME, + { + config: item.config, + itemId: item.itemId, + organizationId: input.organizationId, + reason: reason as InsightGenerationReason, + requestedByUserId, + runId, + websiteId: item.websiteId, + }, + { jobId: item.jobId } + ) + ) + ); + } catch (error) { + const message = error instanceof Error ? error.message : String(error); + await Promise.all([ + db + .update(insightRuns) + .set({ + errorMessage: message, + failedItems: items.length, + finishedAt: new Date(), + status: "failed", + }) + .where(eq(insightRuns.id, runId)), + db + .update(insightRunItems) + .set({ + errorMessage: message, + finishedAt: new Date(), + status: "failed", + }) + .where(eq(insightRunItems.runId, runId)), + ]); + throw rpcError.internal("Failed to queue insight generation"); + } + + return { + queuedItems: items.length, + runId, + status: "queued", + }; +} + +export const insightGenerationRouter = { + getConfig: protectedProcedure + .route({ + method: "POST", + path: "/insights/generation/getConfig", + summary: "Get insight generation config", + tags: ["Insights"], + }) + .input( + z.object({ + organizationId: z.string().nullish(), + websiteId: z.string().nullish(), + }) + ) + .output(configOutputSchema) + .handler(async ({ context, input }) => { + const scope = await resolveScope(context, input, "read"); + return getEffectiveConfig(scope.organizationId, scope.websiteId); + }), + + upsertConfig: protectedProcedure + .route({ + method: "POST", + path: "/insights/generation/upsertConfig", + summary: "Create or update insight generation config", + tags: ["Insights"], + }) + .input( + z + .object({ + organizationId: z.string().nullish(), + websiteId: z.string().nullish(), + }) + .extend(configPatchSchema.shape) + ) + .output(configOutputSchema) + .handler(async ({ context, input }) => { + const scope = await resolveScope(context, input, "update"); + const current = await getEffectiveConfig( + scope.organizationId, + scope.websiteId + ); + const next = applyPatch(current, input); + const existing = await findConfig(scope.organizationId, scope.websiteId); + const now = new Date(); + const nextRunAt = getNextInsightRunAt(next, now); + + if (existing) { + await db + .update(insightGenerationConfigs) + .set({ + allowedTools: next.allowedTools, + cooldownHours: next.cooldownHours, + cron: next.cron, + depth: next.depth, + enabled: next.enabled, + frequency: next.frequency, + lookbackDays: next.lookbackDays, + maxInsightsPerWebsite: next.maxInsightsPerWebsite, + maxSteps: next.maxSteps, + maxToolCalls: next.maxToolCalls, + modelTier: next.modelTier, + nextRunAt, + timezone: next.timezone, + updatedAt: now, + }) + .where(eq(insightGenerationConfigs.id, existing.id)); + } else { + await db.insert(insightGenerationConfigs).values({ + id: randomUUIDv7(), + organizationId: scope.organizationId, + websiteId: scope.websiteId, + allowedTools: next.allowedTools, + cooldownHours: next.cooldownHours, + cron: next.cron, + depth: next.depth, + enabled: next.enabled, + frequency: next.frequency, + lookbackDays: next.lookbackDays, + maxInsightsPerWebsite: next.maxInsightsPerWebsite, + maxSteps: next.maxSteps, + maxToolCalls: next.maxToolCalls, + modelTier: next.modelTier, + nextRunAt, + timezone: next.timezone, + }); + } + + return getEffectiveConfig(scope.organizationId, scope.websiteId); + }), + + triggerRun: protectedProcedure + .route({ + method: "POST", + path: "/insights/generation/triggerRun", + summary: "Queue an insight generation run", + tags: ["Insights"], + }) + .input( + z + .object({ + force: z.boolean().default(false), + organizationId: z.string().nullish(), + reason: reasonSchema.default("manual"), + websiteIds: z.array(z.string().min(1)).max(100).optional(), + }) + .extend(configPatchSchema.shape) + ) + .output( + z.object({ + queuedItems: z.number(), + runId: z.string(), + status: z.enum(["queued", "skipped"]), + }) + ) + .handler(async ({ context, input }) => { + const scope = await resolveScope( + context, + { organizationId: input.organizationId }, + "update" + ); + return queueInsightGenerationRun({ + ...input, + organizationId: scope.organizationId, + requestedByUserId: context.user?.id ?? null, + }); + }), + + getRun: protectedProcedure + .route({ + method: "POST", + path: "/insights/generation/getRun", + summary: "Get insight generation run", + tags: ["Insights"], + }) + .input(z.object({ runId: z.string() })) + .output( + z.object({ items: z.array(runItemOutputSchema), run: runOutputSchema }) + ) + .handler(async ({ context, input }) => { + const run = await db.query.insightRuns.findFirst({ + where: { id: input.runId }, + }); + if (!run) { + throw rpcError.notFound("InsightRun", input.runId); + } + + await withWorkspace(context, { + organizationId: run.organizationId, + resource: "organization", + permissions: ["read"], + }); + + const items = await db.query.insightRunItems.findMany({ + where: { runId: input.runId }, + }); + + return { items, run }; + }), + + listRuns: protectedProcedure + .route({ + method: "POST", + path: "/insights/generation/listRuns", + summary: "List insight generation runs", + tags: ["Insights"], + }) + .input( + z.object({ + limit: z.number().int().min(1).max(100).default(20), + organizationId: z.string().nullish(), + }) + ) + .output(z.object({ runs: z.array(runOutputSchema) })) + .handler(async ({ context, input }) => { + const scope = await resolveScope(context, input, "read"); + const runs = await db + .select() + .from(insightRuns) + .where(eq(insightRuns.organizationId, scope.organizationId)) + .orderBy(desc(insightRuns.createdAt)) + .limit(input.limit); + return { runs }; + }), +}; diff --git a/packages/rpc/src/routers/insights.ts b/packages/rpc/src/routers/insights.ts index acad9f15e..8e4eeee8e 100644 --- a/packages/rpc/src/routers/insights.ts +++ b/packages/rpc/src/routers/insights.ts @@ -1,13 +1,628 @@ -import { and, eq, inArray } from "@databuddy/db"; -import { insightUserFeedback } from "@databuddy/db/schema"; +import { and, db, desc, eq, gte, inArray, isNull } from "@databuddy/db"; +import { + analyticsInsights, + type AnalyticsInsightMetric, + type AnalyticsInsightSource, + insightRollups, + insightUserFeedback, + websites, +} from "@databuddy/db/schema"; +import { + cacheNamespaces, + cacheTags, + cacheable, + getRedisCache, + invalidateAgentContextSnapshotsForOwner, + invalidateInsightsCachesForOrganization, +} from "@databuddy/redis"; +import { ratelimit } from "@databuddy/redis/rate-limit"; import { randomUUIDv7 } from "bun"; +import dayjs from "dayjs"; import { z } from "zod"; import { rpcError } from "../errors"; import { sessionProcedure } from "../orpc"; +import { withWorkspace } from "../procedures/with-workspace"; +import { queueInsightGenerationRun } from "./insight-generation"; const voteSchema = z.enum(["up", "down"]); +const rangeSchema = z.enum(["7d", "30d", "90d"]); + +const CACHE_TTL = 900; +const NEGATIVE_CACHE_TTL = Math.floor(CACHE_TTL / 3); +const CACHE_KEY_PREFIX = "ai-insights"; +const GENERATION_COOLDOWN_HOURS = 6; +const STALE_INSIGHTS_LOOKBACK_DAYS = 14; +const TOP_INSIGHTS_LIMIT = 10; +const NARRATIVE_RATE_LIMIT = 30; +const NARRATIVE_RATE_WINDOW_SECS = 3600; +const NARRATIVE_CACHE_TTL_SECS = 3600; +const NARRATIVE_INSIGHTS_LIMIT = 5; + +const insightMetricSchema = z.object({ + current: z.number(), + format: z.enum(["number", "percent", "duration_ms", "duration_s"]), + label: z.string(), + previous: z.number().optional(), +}); + +const websiteInsightSchema = z.object({ + changePercent: z.number().optional(), + confidence: z.number(), + description: z.string(), + id: z.string(), + impactSummary: z.string().optional(), + link: z.string(), + metrics: z.array(insightMetricSchema), + priority: z.number(), + sentiment: z.string(), + severity: z.string(), + sources: z.array(z.enum(["web", "product", "ops", "business"])), + subjectKey: z.string(), + suggestion: z.string(), + title: z.string(), + type: z.string(), + websiteDomain: z.string(), + websiteId: z.string(), + websiteName: z.string().nullable(), +}); + +const historyInsightSchema = websiteInsightSchema.extend({ + createdAt: z.string(), + currentPeriodFrom: z.string().nullable(), + currentPeriodTo: z.string().nullable(), + previousPeriodFrom: z.string().nullable(), + previousPeriodTo: z.string().nullable(), + runId: z.string(), + timezone: z.string().nullable(), +}); + +interface RawInsightShape { + changePercent: number | null; + impactSummary: string | null; + metrics: unknown; + sentiment: string; + severity: string; + sources: unknown; + type: string; +} + +function buildInsightLink(websiteId: string, type: string): string { + const base = `/websites/${websiteId}`; + if ( + [ + "error_spike", + "new_errors", + "persistent_error_hotspot", + "reliability_improved", + ].includes(type) + ) { + return `${base}/errors`; + } + if ( + ["vitals_degraded", "performance", "performance_improved"].includes(type) + ) { + return `${base}/vitals`; + } + if (["conversion_leak", "funnel_regression"].includes(type)) { + return `${base}/funnels`; + } + if ( + ["custom_event_spike", "engagement_change", "quality_shift"].includes(type) + ) { + return `${base}/events/stream`; + } + if (type === "uptime_issue") { + return `${base}/anomalies`; + } + return base; +} + +function parseInsightShape(row: RawInsightShape) { + return { + severity: row.severity, + sentiment: row.sentiment, + type: row.type, + sources: (row.sources as AnalyticsInsightSource[] | null) ?? [], + metrics: (row.metrics as AnalyticsInsightMetric[] | null) ?? [], + changePercent: row.changePercent ?? undefined, + impactSummary: row.impactSummary ?? undefined, + }; +} + +function getRedis() { + try { + return getRedisCache(); + } catch { + return null; + } +} + +function tryCacheSet( + redis: ReturnType, + key: string, + ttl: number, + payload: unknown +): void { + if (!redis) { + return; + } + redis.setex(key, ttl, JSON.stringify(payload)).catch(() => {}); +} + +async function invalidateInsightsCacheForOrg( + organizationId: string +): Promise { + await Promise.all([ + invalidateInsightsCachesForOrganization(organizationId), + invalidateAgentContextSnapshotsForOwner(organizationId), + ]); +} + +async function getInsightsFromDb(options: { + limit?: number; + organizationId: string; + since?: Date; +}): Promise[]> { + const whereClause = options.since + ? and( + eq(analyticsInsights.organizationId, options.organizationId), + gte(analyticsInsights.createdAt, options.since), + isNull(websites.deletedAt) + ) + : and( + eq(analyticsInsights.organizationId, options.organizationId), + isNull(websites.deletedAt) + ); + + const rows = await db + .select({ + id: analyticsInsights.id, + websiteId: analyticsInsights.websiteId, + websiteName: websites.name, + websiteDomain: websites.domain, + title: analyticsInsights.title, + description: analyticsInsights.description, + suggestion: analyticsInsights.suggestion, + severity: analyticsInsights.severity, + sentiment: analyticsInsights.sentiment, + type: analyticsInsights.type, + priority: analyticsInsights.priority, + changePercent: analyticsInsights.changePercent, + subjectKey: analyticsInsights.subjectKey, + sources: analyticsInsights.sources, + confidence: analyticsInsights.confidence, + impactSummary: analyticsInsights.impactSummary, + metrics: analyticsInsights.metrics, + createdAt: analyticsInsights.createdAt, + }) + .from(analyticsInsights) + .innerJoin(websites, eq(analyticsInsights.websiteId, websites.id)) + .where(whereClause) + .orderBy( + desc(analyticsInsights.priority), + desc(analyticsInsights.createdAt) + ) + .limit(options.limit ?? TOP_INSIGHTS_LIMIT); + + return rows.map((row) => ({ + id: row.id, + websiteId: row.websiteId, + websiteName: row.websiteName, + websiteDomain: row.websiteDomain, + link: buildInsightLink(row.websiteId, row.type), + title: row.title, + description: row.description, + suggestion: row.suggestion, + priority: row.priority, + subjectKey: row.subjectKey, + confidence: row.confidence, + ...parseInsightShape(row), + })); +} + +const RANGE_WORDS: Record, string> = { + "7d": "week", + "30d": "month", + "90d": "quarter", +}; + +function rangeWord(range: z.infer): string { + return RANGE_WORDS[range]; +} + +function buildDeterministicNarrative( + range: z.infer, + topInsights: { + changePercent: number | null; + severity: string; + title: string; + websiteName: string | null; + }[] +): string { + const word = rangeWord(range); + const headline = topInsights[0]; + if (!headline) { + return `All systems healthy this ${word}. No actionable signals detected.`; + } + const siteSuffix = headline.websiteName ? ` on ${headline.websiteName}` : ""; + const change = + headline.changePercent == null + ? "" + : ` (${headline.changePercent > 0 ? "+" : ""}${headline.changePercent.toFixed(0)}%)`; + if (topInsights.length === 1) { + return `This ${word}: ${headline.title}${change}${siteSuffix}.`; + } + const extra = topInsights.length - 1; + return `This ${word}: ${headline.title}${change}${siteSuffix}, plus ${extra} more signal${extra === 1 ? "" : "s"} worth reviewing.`; +} + +const RANGE_TO_DAYS = { "7d": 7, "30d": 30, "90d": 90 } as const; + +const loadNarrativeCached = cacheable( + async function loadNarrativeCached( + organizationId: string, + range: z.infer + ): Promise<{ generatedAt: string; narrative: string }> { + const [rollup] = await db + .select({ + generatedAt: insightRollups.generatedAt, + narrative: insightRollups.narrative, + }) + .from(insightRollups) + .where( + and( + eq(insightRollups.organizationId, organizationId), + eq(insightRollups.range, range) + ) + ) + .limit(1); + + if (rollup) { + return { + generatedAt: rollup.generatedAt.toISOString(), + narrative: rollup.narrative, + }; + } + + const cutoff = dayjs().subtract(RANGE_TO_DAYS[range], "day").toDate(); + const topInsights = await db + .select({ + title: analyticsInsights.title, + severity: analyticsInsights.severity, + changePercent: analyticsInsights.changePercent, + websiteName: websites.name, + }) + .from(analyticsInsights) + .innerJoin(websites, eq(analyticsInsights.websiteId, websites.id)) + .where( + and( + eq(analyticsInsights.organizationId, organizationId), + gte(analyticsInsights.createdAt, cutoff), + isNull(websites.deletedAt) + ) + ) + .orderBy(desc(analyticsInsights.priority)) + .limit(NARRATIVE_INSIGHTS_LIMIT); + + return { + generatedAt: new Date().toISOString(), + narrative: buildDeterministicNarrative(range, topInsights), + }; + }, + { + expireInSec: NARRATIVE_CACHE_TTL_SECS, + prefix: cacheNamespaces.insightsNarrative, + tags: (_result, organizationId) => [cacheTags.organization(organizationId)], + } +); export const insightsRouter = { + feed: sessionProcedure + .route({ + method: "POST", + path: "/insights/feed", + tags: ["Insights"], + summary: "Get current insight feed and queue generation when stale", + }) + .input( + z.object({ + organizationId: z.string().min(1), + timezone: z.string().min(1).max(80).default("UTC"), + }) + ) + .output( + z.object({ + generation: z + .object({ + queuedItems: z.number().optional(), + runId: z.string().optional(), + status: z.enum(["queued", "skipped", "unavailable"]), + }) + .optional(), + insights: z.array(websiteInsightSchema), + source: z.enum(["ai", "fallback"]), + success: z.literal(true), + }) + ) + .handler(async ({ context, input }) => { + await withWorkspace(context, { + organizationId: input.organizationId, + resource: "organization", + permissions: ["read"], + }); + + const redis = getRedis(); + const cacheKey = `${CACHE_KEY_PREFIX}:${input.organizationId}:${input.timezone}`; + + if (redis) { + try { + const cached = await redis.get(cacheKey); + if (cached) { + return JSON.parse(cached) as { + generation?: { + queuedItems?: number; + runId?: string; + status: "queued" | "skipped" | "unavailable"; + }; + insights: z.infer[]; + source: "ai" | "fallback"; + success: true; + }; + } + } catch { + // Insights cache is advisory; continue to DB/queue. + } + } + + const recentInsights = await getInsightsFromDb({ + organizationId: input.organizationId, + since: dayjs().subtract(GENERATION_COOLDOWN_HOURS, "hour").toDate(), + }); + + if (recentInsights.length > 0) { + const payload = { + insights: recentInsights, + source: "ai" as const, + success: true as const, + }; + tryCacheSet(redis, cacheKey, CACHE_TTL, payload); + return payload; + } + + const staleInsights = await getInsightsFromDb({ + organizationId: input.organizationId, + since: dayjs().subtract(STALE_INSIGHTS_LOOKBACK_DAYS, "day").toDate(), + }); + + let generation: { + queuedItems?: number; + runId?: string; + status: "queued" | "skipped" | "unavailable"; + } = { status: "unavailable" }; + + try { + const queued = await queueInsightGenerationRun({ + organizationId: input.organizationId, + requestedByUserId: context.user.id, + reason: "manual", + timezone: input.timezone, + }); + generation = { + status: queued.status, + runId: queued.runId, + queuedItems: queued.queuedItems, + }; + } catch { + generation = { status: "unavailable" }; + } + + const payload = { + generation, + insights: staleInsights, + source: + staleInsights.length > 0 ? ("ai" as const) : ("fallback" as const), + success: true as const, + }; + tryCacheSet(redis, cacheKey, NEGATIVE_CACHE_TTL, payload); + return payload; + }), + + history: sessionProcedure + .route({ + method: "POST", + path: "/insights/history", + tags: ["Insights"], + summary: "List persisted insight history", + }) + .input( + z.object({ + limit: z.number().int().min(1).max(100).default(50), + offset: z.number().int().min(0).default(0), + organizationId: z.string().min(1), + websiteId: z.string().min(1).optional(), + }) + ) + .output( + z.object({ + hasMore: z.boolean(), + insights: z.array(historyInsightSchema), + success: z.literal(true), + }) + ) + .handler(async ({ context, input }) => { + await withWorkspace(context, { + organizationId: input.organizationId, + resource: "organization", + permissions: ["read"], + }); + + const whereClause = input.websiteId + ? and( + eq(analyticsInsights.organizationId, input.organizationId), + eq(analyticsInsights.websiteId, input.websiteId), + isNull(websites.deletedAt) + ) + : and( + eq(analyticsInsights.organizationId, input.organizationId), + isNull(websites.deletedAt) + ); + + const rows = await db + .select({ + id: analyticsInsights.id, + runId: analyticsInsights.runId, + websiteId: analyticsInsights.websiteId, + websiteName: websites.name, + websiteDomain: websites.domain, + title: analyticsInsights.title, + description: analyticsInsights.description, + suggestion: analyticsInsights.suggestion, + severity: analyticsInsights.severity, + sentiment: analyticsInsights.sentiment, + type: analyticsInsights.type, + priority: analyticsInsights.priority, + changePercent: analyticsInsights.changePercent, + subjectKey: analyticsInsights.subjectKey, + sources: analyticsInsights.sources, + confidence: analyticsInsights.confidence, + impactSummary: analyticsInsights.impactSummary, + metrics: analyticsInsights.metrics, + createdAt: analyticsInsights.createdAt, + currentPeriodFrom: analyticsInsights.currentPeriodFrom, + currentPeriodTo: analyticsInsights.currentPeriodTo, + previousPeriodFrom: analyticsInsights.previousPeriodFrom, + previousPeriodTo: analyticsInsights.previousPeriodTo, + timezone: analyticsInsights.timezone, + }) + .from(analyticsInsights) + .innerJoin(websites, eq(analyticsInsights.websiteId, websites.id)) + .where(whereClause) + .orderBy(desc(analyticsInsights.createdAt)) + .limit(input.limit) + .offset(input.offset); + + const insights = rows.map((row) => ({ + id: row.id, + runId: row.runId, + websiteId: row.websiteId, + websiteName: row.websiteName, + websiteDomain: row.websiteDomain, + link: buildInsightLink(row.websiteId, row.type), + title: row.title, + description: row.description, + suggestion: row.suggestion, + priority: row.priority, + subjectKey: row.subjectKey, + confidence: row.confidence, + ...parseInsightShape(row), + createdAt: row.createdAt.toISOString(), + currentPeriodFrom: row.currentPeriodFrom, + currentPeriodTo: row.currentPeriodTo, + previousPeriodFrom: row.previousPeriodFrom, + previousPeriodTo: row.previousPeriodTo, + timezone: row.timezone, + })); + + return { + success: true as const, + insights, + hasMore: rows.length === input.limit, + }; + }), + + orgNarrative: sessionProcedure + .route({ + method: "POST", + path: "/insights/orgNarrative", + tags: ["Insights"], + summary: "Get organization insights narrative", + }) + .input( + z.object({ + organizationId: z.string().min(1), + range: rangeSchema, + }) + ) + .output( + z.object({ + generatedAt: z.string(), + narrative: z.string(), + success: z.literal(true), + }) + ) + .handler(async ({ context, input }) => { + await withWorkspace(context, { + organizationId: input.organizationId, + resource: "organization", + permissions: ["read"], + }); + + const rl = await ratelimit( + `insights:narrative:${input.organizationId}:${context.user.id}`, + NARRATIVE_RATE_LIMIT, + NARRATIVE_RATE_WINDOW_SECS + ); + if (!rl.success) { + throw rpcError.rateLimited( + Math.max(1, Math.ceil((rl.reset - Date.now()) / 1000)) + ); + } + + const { generatedAt, narrative } = await loadNarrativeCached( + input.organizationId, + input.range + ); + return { + success: true as const, + narrative, + generatedAt, + }; + }), + + clearHistory: sessionProcedure + .route({ + method: "POST", + path: "/insights/clearHistory", + tags: ["Insights"], + summary: "Clear persisted insights for an organization", + }) + .input(z.object({ organizationId: z.string().min(1) })) + .output(z.object({ deleted: z.number(), success: z.literal(true) })) + .handler(async ({ context, input }) => { + await withWorkspace(context, { + organizationId: input.organizationId, + resource: "organization", + permissions: ["update"], + }); + + const idRows = await db + .select({ id: analyticsInsights.id }) + .from(analyticsInsights) + .where(eq(analyticsInsights.organizationId, input.organizationId)); + const ids = idRows.map((row) => row.id); + + await db + .delete(insightRollups) + .where(eq(insightRollups.organizationId, input.organizationId)); + + if (ids.length > 0) { + await db + .delete(insightUserFeedback) + .where( + and( + eq(insightUserFeedback.organizationId, input.organizationId), + inArray(insightUserFeedback.insightId, ids) + ) + ); + await db + .delete(analyticsInsights) + .where(eq(analyticsInsights.organizationId, input.organizationId)); + } + + await invalidateInsightsCacheForOrg(input.organizationId); + return { success: true as const, deleted: ids.length }; + }), + getVotes: sessionProcedure .route({ method: "POST", diff --git a/packages/rpc/src/services/insight-schedule.test.ts b/packages/rpc/src/services/insight-schedule.test.ts new file mode 100644 index 000000000..888c39710 --- /dev/null +++ b/packages/rpc/src/services/insight-schedule.test.ts @@ -0,0 +1,58 @@ +import { describe, expect, it } from "bun:test"; +import { getNextInsightRunAt } from "./insight-schedule"; + +describe("getNextInsightRunAt", () => { + it("returns null when scheduling is disabled", () => { + const next = getNextInsightRunAt( + { cron: null, enabled: false, frequency: "daily" }, + new Date(2026, 0, 15, 10, 30) + ); + + expect(next).toBeNull(); + }); + + it("schedules hourly runs at the next top of hour", () => { + const next = getNextInsightRunAt( + { cron: null, enabled: true, frequency: "hourly" }, + new Date(2026, 0, 15, 10, 30, 22) + ); + + expect(next).toEqual(new Date(2026, 0, 15, 11, 0, 0, 0)); + }); + + it("schedules daily runs for 9am the next day", () => { + const next = getNextInsightRunAt( + { cron: null, enabled: true, frequency: "daily" }, + new Date(2026, 0, 15, 10, 30) + ); + + expect(next).toEqual(new Date(2026, 0, 16, 9, 0, 0, 0)); + }); + + it("schedules weekly runs seven days out at 9am", () => { + const next = getNextInsightRunAt( + { cron: null, enabled: true, frequency: "weekly" }, + new Date(2026, 0, 15, 10, 30) + ); + + expect(next).toEqual(new Date(2026, 0, 22, 9, 0, 0, 0)); + }); + + it("supports simple five-field cron expressions", () => { + const next = getNextInsightRunAt( + { cron: "*/15 * * * *", enabled: true, frequency: "custom" }, + new Date(2026, 0, 15, 10, 1, 45) + ); + + expect(next).toEqual(new Date(2026, 0, 15, 10, 15, 0, 0)); + }); + + it("returns null for invalid custom cron", () => { + const next = getNextInsightRunAt( + { cron: "not cron", enabled: true, frequency: "custom" }, + new Date(2026, 0, 15, 10, 1, 45) + ); + + expect(next).toBeNull(); + }); +}); diff --git a/packages/rpc/src/services/insight-schedule.ts b/packages/rpc/src/services/insight-schedule.ts new file mode 100644 index 000000000..19067567f --- /dev/null +++ b/packages/rpc/src/services/insight-schedule.ts @@ -0,0 +1,115 @@ +export type InsightScheduleFrequency = "hourly" | "daily" | "weekly" | "custom"; + +export interface InsightScheduleConfig { + cron: string | null; + enabled: boolean; + frequency: InsightScheduleFrequency; +} + +const CRON_FIELD_SEPARATOR = /\s+/; + +function parseCronField( + value: string, + min: number, + max: number +): number[] | null { + if (value === "*") { + return Array.from({ length: max - min + 1 }, (_, index) => min + index); + } + + if (value.startsWith("*/")) { + const step = Number.parseInt(value.slice(2), 10); + if (!Number.isSafeInteger(step) || step <= 0) { + return null; + } + return Array.from( + { length: max - min + 1 }, + (_, index) => min + index + ).filter((item) => (item - min) % step === 0); + } + + const values = value.split(",").map((part) => Number.parseInt(part, 10)); + if ( + values.some( + (item) => !Number.isSafeInteger(item) || item < min || item > max + ) + ) { + return null; + } + return [...new Set(values)].sort((a, b) => a - b); +} + +function nextRunFromCron(cron: string | null, from: Date): Date | null { + if (!cron) { + return null; + } + + const parts = cron.trim().split(CRON_FIELD_SEPARATOR); + if (parts.length !== 5) { + return null; + } + + const [minutePart, hourPart, dayPart, monthPart, weekdayPart] = parts; + const minutes = parseCronField(minutePart ?? "", 0, 59); + const hours = parseCronField(hourPart ?? "", 0, 23); + const days = parseCronField(dayPart ?? "", 1, 31); + const months = parseCronField(monthPart ?? "", 1, 12); + const weekdays = parseCronField(weekdayPart ?? "", 0, 7); + if (!(minutes && hours && days && months && weekdays)) { + return null; + } + + const minuteSet = new Set(minutes); + const hourSet = new Set(hours); + const daySet = new Set(days); + const monthSet = new Set(months); + const weekdaySet = new Set(weekdays.map((day) => (day === 7 ? 0 : day))); + const candidate = new Date(from); + candidate.setSeconds(0, 0); + candidate.setMinutes(candidate.getMinutes() + 1); + + const maxMinutes = 366 * 24 * 60; + for (let i = 0; i < maxMinutes; i += 1) { + if ( + minuteSet.has(candidate.getMinutes()) && + hourSet.has(candidate.getHours()) && + daySet.has(candidate.getDate()) && + monthSet.has(candidate.getMonth() + 1) && + weekdaySet.has(candidate.getDay()) + ) { + return candidate; + } + candidate.setMinutes(candidate.getMinutes() + 1); + } + + return null; +} + +export function getNextInsightRunAt( + config: InsightScheduleConfig, + from = new Date() +): Date | null { + if (!config.enabled) { + return null; + } + + const next = new Date(from); + if (config.frequency === "hourly") { + next.setHours(next.getHours() + 1, 0, 0, 0); + return next; + } + + if (config.frequency === "daily") { + next.setDate(next.getDate() + 1); + next.setHours(9, 0, 0, 0); + return next; + } + + if (config.frequency === "weekly") { + next.setDate(next.getDate() + 7); + next.setHours(9, 0, 0, 0); + return next; + } + + return nextRunFromCron(config.cron, from); +} diff --git a/packages/test/src/db.ts b/packages/test/src/db.ts index b77721ac6..d1bcb163b 100644 --- a/packages/test/src/db.ts +++ b/packages/test/src/db.ts @@ -42,6 +42,11 @@ export function db(): DB { } const TABLES = [ + "insight_rollups", + "analytics_insights", + "insight_run_items", + "insight_runs", + "insight_generation_configs", "apikey", "websites", "member", diff --git a/turbo.json b/turbo.json index 692815d05..2c9792e62 100644 --- a/turbo.json +++ b/turbo.json @@ -23,6 +23,10 @@ "GITHUB_CLIENT_SECRET", "GOOGLE_CLIENT_ID", "GOOGLE_CLIENT_SECRET", + "INSIGHTS_BULLMQ_REDIS_URL", + "INSIGHTS_DISPATCH_INTERVAL_MS", + "INSIGHTS_WORKER_CONCURRENCY", + "INSIGHTS_WORKER_ENABLED", "MARBLE_API_URL", "MARBLE_WORKSPACE_KEY", "NEXT_PUBLIC_API_URL", From f3aefe9b7e20fd516ef64b65184463581b066ca0 Mon Sep 17 00:00:00 2001 From: iza <59828082+izadoesdev@users.noreply.github.com> Date: Fri, 15 May 2026 21:58:19 +0300 Subject: [PATCH 04/44] ci(insights): add worker deploy checks --- .github/workflows/ci.yml | 6 ++ .github/workflows/docker-publish.yml | 6 +- .github/workflows/health-check.yml | 111 +++++++++++++++++++++++++++ docker-compose.selfhost.yml | 39 ++++++++++ insights.Dockerfile | 45 +++++++++++ 5 files changed, 205 insertions(+), 2 deletions(-) create mode 100644 insights.Dockerfile diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index a072053d3..cc33b19c3 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -136,6 +136,12 @@ jobs: env: NODE_ENV: test run: bun run test + - name: Insights integration + env: + NODE_ENV: test + INSIGHTS_INTEGRATION_TESTS: "true" + BULLMQ_REDIS_URL: redis://localhost:6379/4 + run: bun run --cwd apps/insights test:integration - name: Uptime router integration env: NODE_ENV: test diff --git a/.github/workflows/docker-publish.yml b/.github/workflows/docker-publish.yml index b034215ed..c1f02fe15 100644 --- a/.github/workflows/docker-publish.yml +++ b/.github/workflows/docker-publish.yml @@ -49,7 +49,7 @@ jobs: EVENT_NAME: ${{ github.event_name }} BEFORE_SHA: ${{ github.event.before }} run: | - ALL='["api","basket","dashboard","links","uptime"]' + ALL='["api","basket","dashboard","insights","links","uptime"]' if [[ "$EVENT_NAME" != "push" ]]; then echo "services=$ALL" >> "$GITHUB_OUTPUT" exit 0 @@ -61,7 +61,7 @@ jobs: export TURBO_SCM_BASE="$BEFORE_SHA" export TURBO_SCM_HEAD="HEAD" affected=() - for svc in api basket dashboard links uptime; do + for svc in api basket dashboard insights links uptime; do count=$(bunx turbo ls --affected --filter="@databuddy/$svc" --output=json | jq -r '.packages.count') if [[ "$count" != "0" ]]; then affected+=("\"$svc\"") @@ -103,6 +103,8 @@ jobs: description: "Databuddy Basket service - event ingestion" - service: dashboard description: "Databuddy Dashboard service - web analytics UI" + - service: insights + description: "Databuddy Insights service - queued insight generation" - service: links description: "Databuddy Links service - URL shortening and tracking" - service: uptime diff --git a/.github/workflows/health-check.yml b/.github/workflows/health-check.yml index 158e19013..137707ad9 100644 --- a/.github/workflows/health-check.yml +++ b/.github/workflows/health-check.yml @@ -8,6 +8,7 @@ on: - ".dockerignore" - "apps/api/**" - "apps/basket/**" + - "apps/insights/**" - "packages/**" - "bun.lock" - "package.json" @@ -20,6 +21,7 @@ on: - ".dockerignore" - "apps/api/**" - "apps/basket/**" + - "apps/insights/**" - "packages/**" - "bun.lock" - "package.json" @@ -267,3 +269,112 @@ jobs: fi echo "Basket health check passed!" + + insights-health-check: + name: Insights Health Check + runs-on: blacksmith-4vcpu-ubuntu-2404 + timeout-minutes: 20 + + services: + redis: + image: redis:7-alpine + ports: + - 6379:6379 + options: >- + --health-cmd "redis-cli ping" + --health-interval 10s + --health-timeout 5s + --health-retries 5 + postgres: + image: postgres:17-alpine + env: + POSTGRES_USER: postgres + POSTGRES_PASSWORD: postgres + POSTGRES_DB: databuddy_test + ports: + - 5432:5432 + options: >- + --health-cmd "pg_isready -U postgres -d databuddy_test" + --health-interval 10s + --health-timeout 5s + --health-retries 5 + + steps: + - name: Checkout repository + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6 + + - name: Mount Docker build cache + uses: useblacksmith/stickydisk@41873b1513bb679f9c115504cbd13d3660432504 # v1 + with: + key: ${{ github.repository }}-docker-build-cache + path: /tmp/docker-build-cache + + - name: Set up Docker Builder + uses: useblacksmith/setup-docker-builder@ac083cc84672d01c60d5e8561d0a939b697de542 # v1 + + - name: Build Insights Docker image + uses: useblacksmith/build-push-action@cbd1f60d194a98cb3be5523b15134501eaf0fbf3 # v2 + with: + context: . + file: ./insights.Dockerfile + push: false + load: true + tags: insights:test + + - name: Run Insights health check + run: | + set -euo pipefail + trap 'docker rm -f insights-health-check >/dev/null 2>&1 || true' EXIT + + docker run -d \ + --name insights-health-check \ + --network host \ + -e NODE_ENV=test \ + -e PORT=4002 \ + -e DATABASE_URL=postgresql://postgres:postgres@localhost:5432/databuddy_test \ + -e REDIS_URL=redis://localhost:6379 \ + -e BULLMQ_REDIS_URL=redis://localhost:6379/4 \ + -e INSIGHTS_BULLMQ_REDIS_URL= \ + -e INSIGHTS_DISPATCH_INTERVAL_MS=60000 \ + -e INSIGHTS_WORKER_CONCURRENCY=1 \ + -e INSIGHTS_WORKER_ENABLED=true \ + -e AI_GATEWAY_API_KEY=test-ai-gateway-key \ + -e SUPERMEMORY_API_KEY= \ + insights:test + + echo "Waiting for Insights to start..." + for i in {1..30}; do + if curl -sf http://localhost:4002/health > /dev/null 2>&1; then + echo "Insights is responding!" + break + fi + if [ $i -eq 30 ]; then + echo "Insights failed to start within 30 seconds" + docker logs insights-health-check + exit 1 + fi + sleep 1 + done + + STATUS_BODY=$(curl -sS http://localhost:4002/health/status) + echo "Insights /health/status: $STATUS_BODY" + if echo "$STATUS_BODY" | grep -q '"status":"ok"'; then + echo "Insights dependency health is valid" + else + echo "Insights dependency health is not ok" + docker logs insights-health-check + exit 1 + fi + + RESPONSE=$(curl -sf http://localhost:4002/health || echo '{}') + echo "Insights /health: $RESPONSE" + + if echo "$RESPONSE" | grep -q '"workerEnabled"'; then + echo "Insights health endpoint structure is valid" + else + echo "Insights health endpoint response missing expected fields" + docker logs insights-health-check + exit 1 + fi + + echo "Insights health check passed!" diff --git a/docker-compose.selfhost.yml b/docker-compose.selfhost.yml index dffb17790..07ade8f8a 100644 --- a/docker-compose.selfhost.yml +++ b/docker-compose.selfhost.yml @@ -146,6 +146,45 @@ services: - databuddy <<: *logging + insights: + image: ghcr.io/databuddy-analytics/databuddy-insights:${IMAGE_TAG:?Set IMAGE_TAG to a release tag, for example v1.0.0} + container_name: databuddy-insights + ports: + - "${INSIGHTS_PORT:-4002}:4002" + environment: + NODE_ENV: production + PORT: "4002" + DATABASE_URL: ${DATABASE_URL:?Set DATABASE_URL in your environment} + DB_POOL_MAX: ${DB_POOL_MAX:-10} + SERVICE_NAME: databuddy-insights + REDIS_URL: ${REDIS_URL:?Set REDIS_URL in your environment} + BULLMQ_REDIS_URL: ${BULLMQ_REDIS_URL:?Set BULLMQ_REDIS_URL in your environment} + INSIGHTS_BULLMQ_REDIS_URL: ${INSIGHTS_BULLMQ_REDIS_URL:-} + INSIGHTS_DISPATCH_INTERVAL_MS: ${INSIGHTS_DISPATCH_INTERVAL_MS:-300000} + INSIGHTS_WORKER_CONCURRENCY: ${INSIGHTS_WORKER_CONCURRENCY:-5} + INSIGHTS_WORKER_ENABLED: ${INSIGHTS_WORKER_ENABLED:-true} + CLICKHOUSE_URL: ${CLICKHOUSE_URL:?Set CLICKHOUSE_URL in your environment} + AI_GATEWAY_API_KEY: ${AI_GATEWAY_API_KEY:-} + OPR_API_KEY: ${OPR_API_KEY:-} + SUPERMEMORY_API_KEY: ${SUPERMEMORY_API_KEY:-} + healthcheck: + test: [ "CMD", "bun", "-e", "fetch('http://localhost:4002/health').then(r=>process.exit(r.ok?0:1)).catch(()=>process.exit(1))" ] + interval: 30s + timeout: 3s + start_period: 10s + retries: 3 + depends_on: + postgres: + condition: service_healthy + clickhouse: + condition: service_healthy + redis: + condition: service_healthy + restart: unless-stopped + networks: + - databuddy + <<: *logging + links: image: ghcr.io/databuddy-analytics/databuddy-links:${IMAGE_TAG:?Set IMAGE_TAG to a release tag, for example v1.0.0} container_name: databuddy-links diff --git a/insights.Dockerfile b/insights.Dockerfile new file mode 100644 index 000000000..80df0397e --- /dev/null +++ b/insights.Dockerfile @@ -0,0 +1,45 @@ +FROM oven/bun:1.3.14-slim AS pruner + +WORKDIR /app + +COPY . . + +RUN bunx turbo prune @databuddy/insights --docker + +FROM oven/bun:1.3.14-slim AS builder + +WORKDIR /app + +COPY --from=pruner /app/out/json/ . +RUN bun install --ignore-scripts + +COPY --from=pruner /app/out/full/ . +COPY turbo.json turbo.json + +ENV NODE_ENV=production + +WORKDIR /app/apps/insights + +RUN bun build \ + --compile \ + --production \ + --minify \ + --sourcemap \ + --bytecode \ + --define 'process.env.NODE_ENV="production"' \ + --outfile /app/server \ + ./src/index.ts + +FROM oven/bun:1.3.14-distroless + +WORKDIR /app + +COPY --from=builder /app/server server + +ENV NODE_ENV=production +ENV BUN_CONFIG_MAX_HTTP_REQUESTS=16384 + +EXPOSE 4002 + +ENTRYPOINT [] +CMD ["./server"] From eb399012e80cadfe184bbf68c01a2300c4737c62 Mon Sep 17 00:00:00 2001 From: iza <59828082+izadoesdev@users.noreply.github.com> Date: Fri, 15 May 2026 21:58:29 +0300 Subject: [PATCH 05/44] feat(status): refresh public status page --- .../_components/monitor-card-interactive.tsx | 115 ++++++ .../_components/monitor-row-interactive.tsx | 87 +--- .../app/[slug]/_components/status-navbar.tsx | 43 +- .../app/[slug]/_components/status-page.tsx | 201 +++++----- .../app/[slug]/_components/uptime-history.tsx | 373 ++++++++++++++++++ apps/status/app/[slug]/loading.tsx | 45 ++- apps/status/app/[slug]/page.tsx | 13 +- apps/status/app/globals.css | 8 + packages/ui/src/lib/uptime/latency-chart.tsx | 121 ++++-- 9 files changed, 753 insertions(+), 253 deletions(-) create mode 100644 apps/status/app/[slug]/_components/monitor-card-interactive.tsx create mode 100644 apps/status/app/[slug]/_components/uptime-history.tsx diff --git a/apps/status/app/[slug]/_components/monitor-card-interactive.tsx b/apps/status/app/[slug]/_components/monitor-card-interactive.tsx new file mode 100644 index 000000000..375ea9e44 --- /dev/null +++ b/apps/status/app/[slug]/_components/monitor-card-interactive.tsx @@ -0,0 +1,115 @@ +"use client"; + +import { useId, useState } from "react"; +import { cn } from "@databuddy/ui"; +import { CaretDownIcon } from "@databuddy/ui/icons"; +import { + type MonitorDailyData, + MonitorRowInteractive, +} from "./monitor-row-interactive"; + +interface MonitorCardInteractiveProps { + anchorId: string; + dailyData: MonitorDailyData; + days: number; + domain?: string; + id: string; + name: string; + uptimePercentage?: number; +} + +function uptimeColor(pct: number): string { + if (pct >= 99.9) { + return "text-emerald-600 dark:text-emerald-400"; + } + if (pct >= 99) { + return "text-amber-600 dark:text-amber-400"; + } + return "text-red-600 dark:text-red-400"; +} + +export function MonitorCardInteractive({ + anchorId, + dailyData, + days, + domain, + id, + name, + uptimePercentage, +}: MonitorCardInteractiveProps) { + const [isOpen, setIsOpen] = useState(true); + const panelId = useId(); + const hasLatencyData = dailyData.some( + (d) => d.avg_response_time != null || d.p95_response_time != null + ); + + return ( +
+ + +
+
+
+ +
+
+
+
+ ); +} diff --git a/apps/status/app/[slug]/_components/monitor-row-interactive.tsx b/apps/status/app/[slug]/_components/monitor-row-interactive.tsx index a656be98e..2b99fd16a 100644 --- a/apps/status/app/[slug]/_components/monitor-row-interactive.tsx +++ b/apps/status/app/[slug]/_components/monitor-row-interactive.tsx @@ -1,13 +1,8 @@ "use client"; import dynamic from "next/dynamic"; -import { useMemo } from "react"; -import { formatDateOnly, localDayjs } from "@databuddy/ui"; -import { - buildUptimeHeatmapDays, - UptimeHeatmapStrip, - LatencyChartChunkPlaceholder, -} from "@databuddy/ui/uptime"; +import { LatencyChartChunkPlaceholder } from "@databuddy/ui/uptime"; +import { UptimeHistory } from "./uptime-history"; const LatencyChart = dynamic( () => @@ -20,87 +15,41 @@ const LatencyChart = dynamic( } ); +export type MonitorDailyData = Array<{ + avg_response_time?: number; + date: string; + downtime_seconds?: number; + p95_response_time?: number; + successful_checks?: number; + total_checks?: number; + uptime_percentage?: number; +}>; + interface MonitorRowInteractiveProps { - dailyData: Array<{ - avg_response_time?: number; - date: string; - p95_response_time?: number; - uptime_percentage?: number; - }>; + dailyData: MonitorDailyData; days: number; hasLatencyData: boolean; hasUptimeData?: boolean; id: string; } -interface MonthMarker { - label: string; - offset: number; -} - -function buildMonthMarkers(days: number): MonthMarker[] { - const today = localDayjs().endOf("day"); - const markers: MonthMarker[] = []; - let prevMonth = -1; - - for (let i = 0; i < days; i++) { - const date = today.subtract(days - 1 - i, "day"); - const month = date.month(); - - if (month !== prevMonth && i > 0) { - markers.push({ - label: date.format("MMM"), - offset: (i / days) * 100, - }); - } - prevMonth = month; - } - - return markers; -} - export function MonitorRowInteractive({ - id, dailyData, days, hasLatencyData, hasUptimeData = true, + id, }: MonitorRowInteractiveProps) { - const heatmapData = useMemo( - () => buildUptimeHeatmapDays(dailyData, days), - [dailyData, days] - ); - - const monthMarkers = useMemo(() => buildMonthMarkers(days), [days]); - return ( <> {hasUptimeData ? ( -
- formatDateOnly(d)} - interactive - isActive - stripClassName="flex h-7 w-full gap-[1px] sm:gap-[2px]" - /> -
- {monthMarkers.map((marker) => ( - - {marker.label} - - ))} -
-
+ ) : null} {hasLatencyData ? ( - +
+ +
) : null} ); diff --git a/apps/status/app/[slug]/_components/status-navbar.tsx b/apps/status/app/[slug]/_components/status-navbar.tsx index 06922b683..f0ca35f6f 100644 --- a/apps/status/app/[slug]/_components/status-navbar.tsx +++ b/apps/status/app/[slug]/_components/status-navbar.tsx @@ -3,43 +3,49 @@ import { LifebuoyIcon } from "@databuddy/ui/icons"; interface StatusNavbarProps { logoUrl?: string | null; + name: string; supportUrl?: string | null; websiteUrl?: string | null; } export function StatusNavbar({ logoUrl, + name, websiteUrl, supportUrl, }: StatusNavbarProps) { const logo = logoUrl ? ( ) : null; + const brand = ( + + {logo} + + {name} + + + ); return ( -
-