diff --git a/frontend/src/lib/constants.tsx b/frontend/src/lib/constants.tsx
index 7ba1766db390..0bb970db07a6 100644
--- a/frontend/src/lib/constants.tsx
+++ b/frontend/src/lib/constants.tsx
@@ -324,6 +324,7 @@ export const FEATURE_FLAGS = {
LLM_ANALYTICS_TRACE_NAVIGATION: 'llm-analytics-trace-navigation', // owner: #team-llm-analytics
LLM_ANALYTICS_EVALUATIONS_CUSTOM_MODELS: 'llm-analytics-evaluations-custom-models', // owner: #team-llm-analytics
LLM_ANALYTICS_EVALUATIONS_HOG_CODE: 'llm-analytics-evaluations-hog-code', // owner: #team-llm-analytics
+ LLM_ANALYTICS_EVALUATIONS_REPORTS: 'llm-analytics-evaluations-reports', // owner: #team-llm-analytics
LLM_ANALYTICS_EVALUATIONS_SUMMARY: 'llm-analytics-evaluations-summary', // owner: #team-llm-analytics
LLM_ANALYTICS_SESSION_SUMMARIZATION: 'llm-analytics-session-summarization', // owner: #team-llm-analytics
LLM_ANALYTICS_CLUSTERS_TAB: 'llm-analytics-clusters-tab', // owner: #team-llm-analytics
@@ -392,7 +393,6 @@ export const FEATURE_FLAGS = {
ONBOARDING_WIZARD_PROMINENCE: 'onboarding-wizard-prominence', // owner: #team-growth multivariate=control,wizard-hero,wizard-tab,wizard-only
ONBOARDING_WIZARD_INSTALLATION_IMPROVED_COPY: 'onboarding-wizard-installation-improved-copy', // owner: @fercgomes #team-growth multivariate=control,test
ONBOARDING_MOBILE_INSTALL_HELPER: 'onboarding-mobile-install-helper', // owner: @fercgomes #team-growth multivariate=control,test — target $device_type=Mobile at the flag level
- ONBOARDING_DATA_WAREHOUSE_VALUE_PROP: 'onboarding-data-warehouse-value-prop', // owner: @fercgomes #team-growth multivariate=control,table,query
OWNER_ONLY_BILLING: 'owner-only-billing', // owner: @pawelcebula #team-billing
POST_ONBOARDING_MODAL_EXPERIMENT: 'post-onboarding-modal-experiment', // owner: @fercgomes #team-growth multivariate=control,test
PASSKEY_SIGNUP_ENABLED: 'passkey-signup-enabled', // owner: @reecejones #team-platform-features
@@ -445,7 +445,6 @@ export const FEATURE_FLAGS = {
SURVEYS_FORM_BUILDER: 'surveys-form-builder', // owner: @adboio #team-surveys
SURVEY_HEADLINE_SUMMARY: 'survey-headline-summary', // owner: @adboio #team-surveys
SURVEYS_INSIGHT_BUTTON_EXPERIMENT: 'ask-users-why-ai-vs-quickcreate', // owner: @adboio #team-surveys multivariate=true
- SURVEYS_TOOLBAR: 'surveys-toolbar', // owner: @fcgomes
SURVEYS_WEB_ANALYTICS_CROSS_SELL: 'surveys-in-web-analytics', // owner: @adboio #team-surveys
TASK_SUMMARIES: 'task-summaries', // owner: #team-llm-analytics
TASK_TOOL: 'phai-task-tool', // owner: @kappa90 #team-posthog-ai
diff --git a/products/llm_analytics/frontend/evaluations/LLMAnalyticsEvaluation.tsx b/products/llm_analytics/frontend/evaluations/LLMAnalyticsEvaluation.tsx
index 06d6a895b706..12492391d81f 100644
--- a/products/llm_analytics/frontend/evaluations/LLMAnalyticsEvaluation.tsx
+++ b/products/llm_analytics/frontend/evaluations/LLMAnalyticsEvaluation.tsx
@@ -35,6 +35,8 @@ import { modelPickerLogic } from '../modelPickerLogic'
import { providerKeyStateIssueDescription, providerLabel } from '../settings/providerKeyStateUtils'
import { EvaluationCodeEditor } from './components/EvaluationCodeEditor'
import { EvaluationPromptEditor } from './components/EvaluationPromptEditor'
+import { EvaluationReportConfig } from './components/EvaluationReportConfig'
+import { EvaluationReportsTab } from './components/EvaluationReportsTab'
import { EvaluationRunsTable } from './components/EvaluationRunsTable'
import { EvaluationTriggers } from './components/EvaluationTriggers'
import { LLMEvaluationLogicProps, llmEvaluationLogic } from './llmEvaluationLogic'
@@ -297,6 +299,18 @@ export function LLMAnalyticsEvaluation(): JSX.Element {
),
},
+ !isNewEvaluation &&
+ !!featureFlags[FEATURE_FLAGS.LLM_ANALYTICS_EVALUATIONS_REPORTS] && {
+ key: 'reports',
+ label: 'Reports',
+ 'data-attr': 'llma-evaluation-reports-tab',
+ content: (
+ setActiveTab('configuration')}
+ />
+ ),
+ },
{
key: 'configuration',
label: 'Configuration',
@@ -453,7 +467,20 @@ export function LLMAnalyticsEvaluation(): JSX.Element {
+
+ {/* Scheduled Reports (inline config for new evaluations) */}
+ {isNewEvaluation &&
+ featureFlags[FEATURE_FLAGS.LLM_ANALYTICS_EVALUATIONS_REPORTS] && (
+
+ )}
+
+ {/* Scheduled Reports (for existing evaluations, outside the form) */}
+ {!isNewEvaluation && featureFlags[FEATURE_FLAGS.LLM_ANALYTICS_EVALUATIONS_REPORTS] && (
+
+
+
+ )}
),
},
diff --git a/products/llm_analytics/frontend/evaluations/components/EvaluationReportConfig.tsx b/products/llm_analytics/frontend/evaluations/components/EvaluationReportConfig.tsx
new file mode 100644
index 000000000000..49eac5149bfa
--- /dev/null
+++ b/products/llm_analytics/frontend/evaluations/components/EvaluationReportConfig.tsx
@@ -0,0 +1,459 @@
+import { useActions, useValues } from 'kea'
+import { useEffect, useState } from 'react'
+
+import { LemonButton, LemonDialog, LemonInput, LemonSelect, LemonSwitch, LemonTextArea } from '@posthog/lemon-ui'
+
+import { IntegrationChoice } from 'lib/components/CyclotronJob/integrations/IntegrationChoice'
+import { integrationsLogic } from 'lib/integrations/integrationsLogic'
+import { SlackChannelPicker, SlackNotConfiguredBanner } from 'lib/integrations/SlackIntegrationHelpers'
+
+import { evaluationReportLogic } from '../evaluationReportLogic'
+import type { EvaluationReportDeliveryTarget, EvaluationReportFrequency } from '../types'
+
+const GUIDANCE_PLACEHOLDER =
+ "Optional guidance for the report agent. e.g. 'Focus on cost regressions across models', 'Compare latency between gpt-4o-mini and claude-sonnet', 'Keep it to 2 sections max'"
+
+const FREQUENCY_OPTIONS = [
+ { value: 'hourly' as const, label: 'Hourly' },
+ { value: 'daily' as const, label: 'Daily' },
+ { value: 'weekly' as const, label: 'Weekly' },
+ { value: 'every_n' as const, label: 'Every N evaluations' },
+]
+
+const TRIGGER_THRESHOLD_MIN = 10
+const TRIGGER_THRESHOLD_MAX = 10_000
+const TRIGGER_THRESHOLD_DEFAULT = 100
+
+/** Threshold config shown when frequency is 'every_n' */
+function ThresholdConfig({ value, onChange }: { value: number; onChange: (value: number) => void }): JSX.Element {
+ return (
+
+
Evaluation count threshold
+
onChange(Number(val))}
+ fullWidth
+ />
+
+ A report will be generated after this many new evaluation results arrive. Checked every 5 minutes. Min{' '}
+ {TRIGGER_THRESHOLD_MIN}, max {TRIGGER_THRESHOLD_MAX.toLocaleString()}. Cooldown: at most one report per
+ hour, up to 10 per day.
+
+
+ )
+}
+
+/** Shared delivery targets configuration */
+function DeliveryTargetsConfig({
+ emailValue,
+ onEmailChange,
+ slackIntegrationId,
+ onSlackIntegrationChange,
+ slackChannelValue,
+ onSlackChannelChange,
+}: {
+ emailValue: string
+ onEmailChange: (value: string) => void
+ slackIntegrationId: number | null
+ onSlackIntegrationChange: (value: number | null) => void
+ slackChannelValue: string
+ onSlackChannelChange: (value: string) => void
+}): JSX.Element {
+ const { slackIntegrations, integrations } = useValues(integrationsLogic)
+
+ return (
+ <>
+
+
Email recipients
+
+
Comma-separated email addresses
+
+
+
Slack channel
+ {!slackIntegrations?.length ? (
+
+ ) : (
+
+ {
+ if (newValue !== slackIntegrationId) {
+ onSlackChannelChange('')
+ }
+ onSlackIntegrationChange(newValue)
+ }}
+ />
+ {slackIntegrationId &&
+ (() => {
+ const selectedIntegration = integrations?.find((i) => i.id === slackIntegrationId)
+ return selectedIntegration ? (
+ onSlackChannelChange(val || '')}
+ integration={selectedIntegration}
+ />
+ ) : null
+ })()}
+
+ )}
+
+ >
+ )
+}
+
+/** Inline config shown during new evaluation creation */
+function PendingReportConfig({ evaluationId }: { evaluationId: string }): JSX.Element {
+ const { pendingConfig } = useValues(evaluationReportLogic({ evaluationId }))
+ const {
+ setPendingEnabled,
+ setPendingFrequency,
+ setPendingEmailValue,
+ setPendingSlackIntegrationId,
+ setPendingSlackChannelValue,
+ setPendingReportPromptGuidance,
+ setPendingTriggerThreshold,
+ } = useActions(evaluationReportLogic({ evaluationId }))
+
+ return (
+
+
+
+
Scheduled reports
+
+ AI-generated analysis of evaluation results. Reports are always available in the Reports tab.
+ Optionally add email or Slack to get notified.
+
+
+
+
+
+ {pendingConfig.enabled && (
+
+
+ Frequency
+ val && setPendingFrequency(val)}
+ options={FREQUENCY_OPTIONS}
+ fullWidth
+ />
+
+ {pendingConfig.frequency === 'every_n' && (
+
+ )}
+
+
+
Report agent guidance (optional)
+
+
+ Steers the agent's focus, section choices, or scope. Appended to the base prompt.
+
+
+
+ )}
+
+ )
+}
+
+/** Toggle-based report management for existing evaluations */
+function ExistingReportConfig({ evaluationId }: { evaluationId: string }): JSX.Element {
+ const logic = evaluationReportLogic({ evaluationId })
+ const { activeReport, reportsLoading } = useValues(logic)
+ const { updateReport, deleteReport, createReport } = useActions(logic)
+
+ // Local state: toggle controls form visibility, Save button creates the report.
+ // All fields are local-first so the user can change multiple things before saving.
+ const [formEnabled, setFormEnabled] = useState(false)
+ const [frequency, setFrequency] = useState('daily')
+ const [emailValue, setEmailValue] = useState('')
+ const [slackIntegrationId, setSlackIntegrationId] = useState(null)
+ const [slackChannelValue, setSlackChannelValue] = useState('')
+ const [guidance, setGuidance] = useState('')
+ const [triggerThreshold, setTriggerThreshold] = useState(TRIGGER_THRESHOLD_DEFAULT)
+
+ // Seed local form state from the active report so the user can edit
+ // any field without having to disable + recreate the schedule.
+ useEffect(() => {
+ if (!activeReport) {
+ return
+ }
+ const emailTarget = activeReport.delivery_targets.find(
+ (t: EvaluationReportDeliveryTarget) => t.type === 'email'
+ )
+ const slackTarget = activeReport.delivery_targets.find(
+ (t: EvaluationReportDeliveryTarget) => t.type === 'slack'
+ )
+ setFrequency(activeReport.frequency)
+ setEmailValue(emailTarget?.value ?? '')
+ setSlackIntegrationId(slackTarget?.integration_id ?? null)
+ setSlackChannelValue(slackTarget?.channel ?? '')
+ setGuidance(activeReport.report_prompt_guidance ?? '')
+ setTriggerThreshold(activeReport.trigger_threshold ?? TRIGGER_THRESHOLD_DEFAULT)
+ }, [activeReport])
+
+ const isEnabled = !!activeReport || formEnabled
+
+ const handleToggle = (checked: boolean): void => {
+ if (checked) {
+ setFormEnabled(true)
+ } else if (activeReport) {
+ LemonDialog.open({
+ title: 'Disable scheduled reports?',
+ description: 'This will stop all future report deliveries. Past reports will be preserved.',
+ primaryButton: {
+ children: 'Disable',
+ status: 'danger',
+ onClick: () => deleteReport(activeReport.id),
+ },
+ secondaryButton: { children: 'Cancel' },
+ })
+ } else {
+ setFormEnabled(false)
+ }
+ }
+
+ const hasEmail = emailValue.trim().length > 0
+ const hasSlack = slackIntegrationId !== null && slackChannelValue.length > 0
+
+ const handleSave = (): void => {
+ const targets: EvaluationReportDeliveryTarget[] = []
+ if (hasEmail) {
+ targets.push({ type: 'email', value: emailValue.trim() })
+ }
+ if (hasSlack) {
+ targets.push({ type: 'slack', integration_id: slackIntegrationId!, channel: slackChannelValue })
+ }
+ createReport({
+ evaluationId,
+ frequency,
+ delivery_targets: targets,
+ report_prompt_guidance: guidance,
+ trigger_threshold: frequency === 'every_n' ? triggerThreshold : null,
+ })
+ setFormEnabled(false)
+ }
+
+ return (
+
+
+
+
Scheduled reports
+
+ AI-generated analysis of evaluation results. Reports are always available in the Reports tab.
+ Optionally add email or Slack to get notified.
+
+
+
+
+
+ {activeReport ? (
+
+
+ Frequency
+ val && setFrequency(val)}
+ options={FREQUENCY_OPTIONS}
+ fullWidth
+ />
+
+
+ {frequency === 'every_n' && (
+
+ )}
+
+
+
+
+
Report agent guidance (optional)
+
+
+ Steers the agent's focus, section choices, or scope. Appended to the base prompt.
+
+
+
+ {(() => {
+ const currentEmail =
+ activeReport.delivery_targets.find(
+ (t: EvaluationReportDeliveryTarget) => t.type === 'email'
+ )?.value ?? ''
+ const currentSlack = activeReport.delivery_targets.find(
+ (t: EvaluationReportDeliveryTarget) => t.type === 'slack'
+ )
+ const currentSlackIntegrationId: number | null = currentSlack?.integration_id ?? null
+ const currentSlackChannel = currentSlack?.channel ?? ''
+ const currentGuidance = activeReport.report_prompt_guidance ?? ''
+ const currentThreshold = activeReport.trigger_threshold ?? TRIGGER_THRESHOLD_DEFAULT
+ const frequencyDirty = frequency !== activeReport.frequency
+ const targetsDirty =
+ emailValue.trim() !== currentEmail ||
+ slackIntegrationId !== currentSlackIntegrationId ||
+ slackChannelValue !== currentSlackChannel
+ const guidanceDirty = guidance !== currentGuidance
+ const thresholdDirty = frequency === 'every_n' && triggerThreshold !== currentThreshold
+ const isDirty = frequencyDirty || targetsDirty || guidanceDirty || thresholdDirty
+ const hasAnyTarget = hasEmail || hasSlack
+ return (
+
+ {
+ const targets: EvaluationReportDeliveryTarget[] = []
+ if (hasEmail) {
+ targets.push({ type: 'email', value: emailValue.trim() })
+ }
+ if (hasSlack) {
+ targets.push({
+ type: 'slack',
+ integration_id: slackIntegrationId!,
+ channel: slackChannelValue,
+ })
+ }
+ const data: Record = {
+ frequency,
+ delivery_targets: targets,
+ report_prompt_guidance: guidance,
+ }
+ if (frequency === 'every_n') {
+ data.trigger_threshold = triggerThreshold
+ }
+ updateReport({
+ reportId: activeReport.id,
+ data,
+ })
+ }}
+ >
+ Save changes
+
+
+ )
+ })()}
+
+ {frequency === 'every_n' ? (
+
+ A report will be generated when{' '}
+ {activeReport.trigger_threshold ?? TRIGGER_THRESHOLD_DEFAULT} new evaluation results arrive.
+ Checked every 5 minutes.
+
+ ) : (
+ activeReport.next_delivery_date && (
+
+ Next delivery: {new Date(activeReport.next_delivery_date).toLocaleString()}
+
+ )
+ )}
+
+
Generated reports appear in the Reports tab.
+
+ ) : (
+ formEnabled && (
+
+
+ Frequency
+ val && setFrequency(val)}
+ options={FREQUENCY_OPTIONS}
+ fullWidth
+ />
+
+ {frequency === 'every_n' && (
+
+ )}
+
+
+
Report agent guidance (optional)
+
+
+ Steers the agent's focus, section choices, or scope. Appended to the base prompt.
+
+
+
+
+ Save report schedule
+
+
+
+ )
+ )}
+
+ )
+}
+
+export function EvaluationReportConfig({ evaluationId }: { evaluationId: string }): JSX.Element {
+ if (evaluationId === 'new') {
+ return
+ }
+ return
+}
diff --git a/products/llm_analytics/frontend/evaluations/components/EvaluationReportViewer.tsx b/products/llm_analytics/frontend/evaluations/components/EvaluationReportViewer.tsx
new file mode 100644
index 000000000000..9b8d225ea399
--- /dev/null
+++ b/products/llm_analytics/frontend/evaluations/components/EvaluationReportViewer.tsx
@@ -0,0 +1,265 @@
+import { useMemo, useState } from 'react'
+
+import { LemonBadge, LemonButton, LemonCollapse, LemonDivider } from '@posthog/lemon-ui'
+
+import { LemonMarkdown } from 'lib/lemon-ui/LemonMarkdown'
+import { urls } from 'scenes/urls'
+
+import type {
+ EvaluationReportMetrics,
+ EvaluationReportRun,
+ EvaluationReportRunContent,
+ EvaluationReportSection,
+} from '../types'
+
+// Match any UUID in the content — surrounding punctuation (backticks, angle brackets, etc.)
+// is stripped so we don't depend on how the LLM formats references.
+const UUID_REGEX = /[`<]*([0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})[`>]*/g
+
+// Rewrite `` backtick tokens into markdown links pointing to the correct
+// trace URL. Uses the citations list to map generation_id → trace_id so the link
+// opens the right trace with the generation highlighted.
+function linkifyUuids(content: string, citationMap: Record): string {
+ return content.replace(UUID_REGEX, (_match, generationId: string) => {
+ const traceId = citationMap[generationId]
+ const url = traceId
+ ? urls.llmAnalyticsTrace(traceId, { event: generationId })
+ : urls.llmAnalyticsTrace(generationId)
+ return `[\`${generationId.slice(0, 8)}...\`](${url})`
+ })
+}
+
+// Strip a leading markdown heading line if it matches the section title.
+// The agent sometimes prefixes each section's content with its own heading,
+// which duplicates the heading the renderer emits separately.
+function stripRedundantLeadingHeading(content: string, sectionTitle: string): string {
+ const match = content.match(/^\s*(#{1,6})\s+(.+?)\s*(?:\r?\n|$)/)
+ if (!match) {
+ return content
+ }
+ const headingText = match[2].trim().toLowerCase()
+ if (headingText.startsWith(sectionTitle.toLowerCase())) {
+ return content.slice(match[0].length).replace(/^\s+/, '')
+ }
+ return content
+}
+
+function ReportSectionContent({
+ section,
+ citationMap,
+}: {
+ section: EvaluationReportSection
+ citationMap: Record
+}): JSX.Element {
+ const markdown = linkifyUuids(stripRedundantLeadingHeading(section.content, section.title), citationMap)
+ return (
+
+ {markdown}
+
+ )
+}
+
+function formatPassRate(rate: number | null | undefined): string {
+ if (rate == null) {
+ return '—'
+ }
+ return `${rate.toFixed(2)}%`
+}
+
+function MetricsCard({ metrics }: { metrics: EvaluationReportMetrics }): JSX.Element {
+ // Period-over-period delta (if we have a previous pass rate to compare)
+ let deltaEl: JSX.Element | null = null
+ if (metrics.previous_pass_rate != null) {
+ const diff = metrics.pass_rate - metrics.previous_pass_rate
+ const arrow = diff > 0 ? '▲' : diff < 0 ? '▼' : '—'
+ const color = diff > 0 ? 'text-success' : diff < 0 ? 'text-danger' : 'text-muted'
+ deltaEl = (
+
+ {arrow} {Math.abs(diff).toFixed(2)}pp vs previous
+
+ )
+ }
+
+ return (
+
+
+
+
Pass rate
+
+ {formatPassRate(metrics.pass_rate)}
+ {deltaEl}
+
+
+
+
Total runs
+
{metrics.total_runs}
+
+
+
Pass
+
{metrics.pass_count}
+
+
+
Fail
+
{metrics.fail_count}
+
+
+
N/A
+
{metrics.na_count}
+
+ {metrics.previous_total_runs != null && (
+
+
Previous runs
+
{metrics.previous_total_runs}
+
+ )}
+
+
+ )
+}
+
+function DeliveryStatusBadge({ status }: { status: string }): JSX.Element {
+ const statusMap: Record = {
+ delivered: { label: 'Delivered', status: 'success' },
+ pending: { label: 'Pending', status: 'muted' },
+ partial_failure: { label: 'Partial failure', status: 'warning' },
+ failed: { label: 'Failed', status: 'danger' },
+ }
+ const info = statusMap[status] || { label: status, status: 'muted' as const }
+ return
+}
+
+export function EvaluationReportViewer({
+ reportRun,
+ onClose,
+ compact = false,
+}: {
+ reportRun: EvaluationReportRun
+ onClose?: () => void
+ /** When true, hides the header/close row — useful when the parent already provides framing (e.g. an expanded table row). */
+ compact?: boolean
+}): JSX.Element {
+ const content = reportRun.content as EvaluationReportRunContent
+ const sections = content.sections ?? []
+ const metrics = content.metrics
+
+ // Build generation_id → trace_id lookup from citations for correct trace URLs
+ const citationMap = useMemo(() => {
+ const map: Record = {}
+ for (const c of content.citations ?? []) {
+ if (c.generation_id && c.trace_id) {
+ map[c.generation_id] = c.trace_id
+ }
+ }
+ return map
+ }, [content.citations])
+
+ // Default to executive summary (first section) expanded. Memoized so Expand/Collapse all
+ // buttons can set the list deterministically.
+ const sectionKeys = useMemo(() => sections.map((_, i) => i.toString()), [sections])
+ const [expandedKeys, setExpandedKeys] = useState(sections.length > 0 ? ['0'] : [])
+
+ const allExpanded = expandedKeys.length === sectionKeys.length && sectionKeys.length > 0
+ const allCollapsed = expandedKeys.length === 0
+
+ return (
+
+ {!compact && (
+ <>
+
+
+
{content.title || 'Report'}
+
+
+ {onClose && (
+
+ Close
+
+ )}
+
+
+ Period: {new Date(reportRun.period_start).toLocaleString()} –{' '}
+ {new Date(reportRun.period_end).toLocaleString()}
+
+
+
+ >
+ )}
+
+ {compact && (
+
+ {content.title ?
{content.title} :
}
+ {sections.length > 0 && (
+
+ setExpandedKeys(sectionKeys)}
+ disabledReason={allExpanded ? 'All sections already expanded' : undefined}
+ >
+ Expand all
+
+ setExpandedKeys([])}
+ disabledReason={allCollapsed ? 'All sections already collapsed' : undefined}
+ >
+ Collapse all
+
+
+ )}
+
+ )}
+
+ {metrics &&
}
+
+ {sections.length > 0 && (
+ <>
+ {!compact && (
+
+ setExpandedKeys(sectionKeys)}
+ disabledReason={allExpanded ? 'All sections already expanded' : undefined}
+ >
+ Expand all
+
+ setExpandedKeys([])}
+ disabledReason={allCollapsed ? 'All sections already collapsed' : undefined}
+ >
+ Collapse all
+
+
+ )}
+
+
setExpandedKeys(keys as string[])}
+ panels={sections.map((section, idx) => ({
+ key: idx.toString(),
+ header: section.title,
+ content: ,
+ }))}
+ />
+ >
+ )}
+
+ {reportRun.delivery_errors.length > 0 && (
+
+
Delivery errors
+
+ {reportRun.delivery_errors.map((err, i) => (
+ {err}
+ ))}
+
+
+ )}
+
+ )
+}
diff --git a/products/llm_analytics/frontend/evaluations/components/EvaluationReportsTab.tsx b/products/llm_analytics/frontend/evaluations/components/EvaluationReportsTab.tsx
new file mode 100644
index 000000000000..4fec882bb347
--- /dev/null
+++ b/products/llm_analytics/frontend/evaluations/components/EvaluationReportsTab.tsx
@@ -0,0 +1,156 @@
+import { useActions, useValues } from 'kea'
+
+import { IconInfo } from '@posthog/icons'
+import { LemonButton, LemonTable, LemonTag, Tooltip } from '@posthog/lemon-ui'
+
+import { TZLabel } from 'lib/components/TZLabel'
+
+import { evaluationReportLogic } from '../evaluationReportLogic'
+import type { EvaluationReportRun } from '../types'
+import { EvaluationReportViewer } from './EvaluationReportViewer'
+
+interface EvaluationReportsTabProps {
+ evaluationId: string
+ /** Called when the user clicks the "Set up scheduled reports" CTA in the empty state. */
+ onConfigureClick?: () => void
+}
+
+const STATUS_STYLES: Record<
+ EvaluationReportRun['delivery_status'],
+ { label: string; type: 'success' | 'warning' | 'danger' | 'muted' }
+> = {
+ delivered: { label: 'Delivered', type: 'success' },
+ pending: { label: 'Pending', type: 'muted' },
+ partial_failure: { label: 'Partial failure', type: 'warning' },
+ failed: { label: 'Failed', type: 'danger' },
+}
+
+export function EvaluationReportsTab({ evaluationId, onConfigureClick }: EvaluationReportsTabProps): JSX.Element {
+ const logic = evaluationReportLogic({ evaluationId })
+ const { reportRuns, reportRunsLoading, reportsLoading, activeReport, generateResultLoading } = useValues(logic)
+ const { generateReport, loadReportRuns } = useActions(logic)
+
+ // No schedule configured at all → CTA pointing to the Configuration tab.
+ // Avoids hiding the Reports tab entirely so it stays discoverable.
+ if (!reportsLoading && !activeReport) {
+ return (
+
+
+
No scheduled reports yet
+
+ Scheduled reports deliver AI-generated analysis of this evaluation's results to email or Slack
+ on a recurring basis.
+
+ {onConfigureClick && (
+
+ Set up scheduled reports
+
+ )}
+
+
+ )
+ }
+
+ return (
+
+
+
+ History of AI-generated reports for this evaluation. Click a row to expand the full report. Schedule
+ and delivery targets are configured in the Configuration tab.
+
+ {activeReport && (
+
+ loadReportRuns(activeReport.id)}
+ loading={reportRunsLoading}
+ >
+ Refresh
+
+ generateReport(activeReport.id)}
+ loading={generateResultLoading}
+ >
+ Generate now
+
+
+ )}
+
+
+
,
+ },
+ {
+ title: 'Title',
+ key: 'title',
+ render: (_, run: EvaluationReportRun) => (
+
+ {run.content?.title || '–'}
+
+ ),
+ },
+ {
+ title: 'Pass rate',
+ key: 'pass_rate',
+ render: (_, run: EvaluationReportRun) => {
+ const pct = run.content?.metrics?.pass_rate ?? run.metadata?.pass_rate
+ return typeof pct === 'number' ? `${pct.toFixed(1)}%` : '–'
+ },
+ },
+ {
+ title: 'Runs',
+ key: 'total_runs',
+ render: (_, run: EvaluationReportRun) =>
+ run.content?.metrics?.total_runs ?? run.metadata?.total_runs ?? '–',
+ },
+ {
+ title: 'Status',
+ key: 'delivery_status',
+ render: (_, run: EvaluationReportRun) => {
+ const info = STATUS_STYLES[run.delivery_status] || {
+ label: run.delivery_status,
+ type: 'default' as const,
+ }
+ return (
+
+ {info.label}
+
+ )
+ },
+ },
+ {
+ key: 'info',
+ width: 0,
+ render: (_, run: EvaluationReportRun) => (
+
+
+
+ ),
+ },
+ ]}
+ expandable={{
+ noIndent: true,
+ expandedRowRender: (run: EvaluationReportRun) => (
+
+
+
+ ),
+ }}
+ emptyState="No reports generated yet"
+ size="small"
+ />
+
+ )
+}
diff --git a/products/llm_analytics/frontend/evaluations/evaluationReportLogic.ts b/products/llm_analytics/frontend/evaluations/evaluationReportLogic.ts
new file mode 100644
index 000000000000..0e7bb9647fcc
--- /dev/null
+++ b/products/llm_analytics/frontend/evaluations/evaluationReportLogic.ts
@@ -0,0 +1,241 @@
+import { actions, afterMount, connect, kea, key, listeners, path, props, reducers, selectors } from 'kea'
+import { loaders } from 'kea-loaders'
+
+import api from 'lib/api'
+import { lemonToast } from 'lib/lemon-ui/LemonToast'
+import { teamLogic } from 'scenes/teamLogic'
+
+import type { evaluationReportLogicType } from './evaluationReportLogicType'
+import type {
+ EvaluationReport,
+ EvaluationReportDeliveryTarget,
+ EvaluationReportFrequency,
+ EvaluationReportRun,
+} from './types'
+
+export interface EvaluationReportLogicProps {
+ evaluationId: string
+}
+
+export interface PendingReportConfig {
+ enabled: boolean
+ frequency: EvaluationReportFrequency
+ emailValue: string
+ slackIntegrationId: number | null
+ slackChannelValue: string
+ reportPromptGuidance: string
+ triggerThreshold: number
+}
+
+const DEFAULT_PENDING_CONFIG: PendingReportConfig = {
+ enabled: true,
+ frequency: 'every_n',
+ emailValue: '',
+ slackIntegrationId: null,
+ slackChannelValue: '',
+ reportPromptGuidance: '',
+ triggerThreshold: 100,
+}
+
+export const evaluationReportLogic = kea([
+ path(['products', 'llm_analytics', 'frontend', 'evaluations', 'evaluationReportLogic']),
+ props({} as EvaluationReportLogicProps),
+ key((props) => props.evaluationId),
+ connect({
+ values: [teamLogic, ['currentTeamId']],
+ }),
+
+ actions({
+ // Pending config for new evaluations
+ setPendingEnabled: (enabled: boolean) => ({ enabled }),
+ setPendingFrequency: (frequency: EvaluationReportFrequency) => ({ frequency }),
+ setPendingEmailValue: (emailValue: string) => ({ emailValue }),
+ setPendingSlackIntegrationId: (integrationId: number | null) => ({ integrationId }),
+ setPendingSlackChannelValue: (channelValue: string) => ({ channelValue }),
+ setPendingReportPromptGuidance: (reportPromptGuidance: string) => ({ reportPromptGuidance }),
+ setPendingTriggerThreshold: (triggerThreshold: number) => ({ triggerThreshold }),
+ createPendingReport: (evaluationId: string) => ({ evaluationId }),
+
+ // Existing report actions
+ selectReportRun: (reportRun: EvaluationReportRun | null) => ({ reportRun }),
+ }),
+
+ reducers({
+ pendingConfig: [
+ DEFAULT_PENDING_CONFIG as PendingReportConfig,
+ {
+ setPendingEnabled: (state, { enabled }) => ({ ...state, enabled }),
+ setPendingFrequency: (state, { frequency }) => ({ ...state, frequency }),
+ setPendingEmailValue: (state, { emailValue }) => ({ ...state, emailValue }),
+ setPendingSlackIntegrationId: (state, { integrationId }) => ({
+ ...state,
+ slackIntegrationId: integrationId,
+ slackChannelValue: integrationId !== state.slackIntegrationId ? '' : state.slackChannelValue,
+ }),
+ setPendingSlackChannelValue: (state, { channelValue }) => ({
+ ...state,
+ slackChannelValue: channelValue,
+ }),
+ setPendingReportPromptGuidance: (state, { reportPromptGuidance }) => ({
+ ...state,
+ reportPromptGuidance,
+ }),
+ setPendingTriggerThreshold: (state, { triggerThreshold }) => ({
+ ...state,
+ triggerThreshold,
+ }),
+ },
+ ],
+ selectedReportRun: [
+ null as EvaluationReportRun | null,
+ {
+ selectReportRun: (_, { reportRun }) => reportRun,
+ },
+ ],
+ }),
+
+ loaders(({ props, values }) => ({
+ reports: [
+ [] as EvaluationReport[],
+ {
+ loadReports: async () => {
+ if (props.evaluationId === 'new') {
+ return []
+ }
+ const response = await api.get(
+ `api/environments/${values.currentTeamId}/llm_analytics/evaluation_reports/?evaluation=${props.evaluationId}`
+ )
+ return response.results || []
+ },
+ createReport: async (params: {
+ evaluationId: string
+ frequency: EvaluationReportFrequency
+ delivery_targets: EvaluationReportDeliveryTarget[]
+ report_prompt_guidance?: string
+ trigger_threshold?: number | null
+ }) => {
+ const body: Record = {
+ evaluation: params.evaluationId,
+ frequency: params.frequency,
+ start_date: new Date().toISOString(),
+ delivery_targets: params.delivery_targets,
+ report_prompt_guidance: params.report_prompt_guidance ?? '',
+ enabled: true,
+ }
+ if (params.frequency === 'every_n' && params.trigger_threshold != null) {
+ body.trigger_threshold = params.trigger_threshold
+ }
+ const report = await api.create(
+ `api/environments/${values.currentTeamId}/llm_analytics/evaluation_reports/`,
+ body
+ )
+ return [...values.reports, report]
+ },
+ updateReport: async ({ reportId, data }: { reportId: string; data: Partial }) => {
+ const updated = await api.update(
+ `api/environments/${values.currentTeamId}/llm_analytics/evaluation_reports/${reportId}/`,
+ data
+ )
+ return values.reports.map((r) => (r.id === reportId ? updated : r))
+ },
+ deleteReport: async (reportId: string) => {
+ await api.update(
+ `api/environments/${values.currentTeamId}/llm_analytics/evaluation_reports/${reportId}/`,
+ { deleted: true }
+ )
+ return values.reports.filter((r) => r.id !== reportId)
+ },
+ },
+ ],
+ reportRuns: [
+ [] as EvaluationReportRun[],
+ {
+ loadReportRuns: async (reportId: string) => {
+ const response = await api.get(
+ `api/environments/${values.currentTeamId}/llm_analytics/evaluation_reports/${reportId}/runs/`
+ )
+ return response || []
+ },
+ },
+ ],
+ generateResult: [
+ null as null,
+ {
+ generateReport: async (reportId: string) => {
+ await api.create(
+ `api/environments/${values.currentTeamId}/llm_analytics/evaluation_reports/${reportId}/generate/`
+ )
+ return null
+ },
+ },
+ ],
+ })),
+
+ selectors({
+ isNewEvaluation: [(_, p) => [p.evaluationId], (evaluationId: string) => evaluationId === 'new'],
+ activeReport: [
+ (s) => [s.reports],
+ (reports): EvaluationReport | null => {
+ return reports.find((r: EvaluationReport) => r.enabled && !r.deleted) || null
+ },
+ ],
+ }),
+
+ listeners(({ actions, values }) => ({
+ loadReportsSuccess: ({ reports }: { reports: EvaluationReport[] }) => {
+ // Auto-load the run history for the active report so the Reports tab knows
+ // whether to render itself and can show data immediately.
+ const active = reports.find((r: EvaluationReport) => r.enabled && !r.deleted)
+ if (active) {
+ actions.loadReportRuns(active.id)
+ }
+ },
+ generateReportSuccess: () => {
+ lemonToast.success('Report is being generated and will be delivered to your configured targets shortly.')
+ },
+ generateReportFailure: () => {
+ lemonToast.error('Failed to trigger report generation. Please try again.')
+ },
+ createReportSuccess: () => {
+ actions.loadReports()
+ },
+ updateReportSuccess: () => {
+ actions.loadReports()
+ },
+ createPendingReport: ({ evaluationId }) => {
+ const { pendingConfig } = values
+ if (!pendingConfig.enabled) {
+ return
+ }
+ const targets: EvaluationReportDeliveryTarget[] = []
+ if (pendingConfig.emailValue.trim()) {
+ targets.push({ type: 'email', value: pendingConfig.emailValue.trim() })
+ }
+ if (pendingConfig.slackIntegrationId && pendingConfig.slackChannelValue) {
+ targets.push({
+ type: 'slack',
+ integration_id: pendingConfig.slackIntegrationId,
+ channel: pendingConfig.slackChannelValue,
+ })
+ }
+ // The backend auto-creates a default report config on eval creation.
+ // If the user configured delivery targets or custom settings, update
+ // the auto-created report after creation via the existing reports list.
+ if (targets.length > 0 || pendingConfig.reportPromptGuidance.trim()) {
+ actions.createReport({
+ evaluationId,
+ frequency: pendingConfig.frequency,
+ delivery_targets: targets,
+ report_prompt_guidance: pendingConfig.reportPromptGuidance,
+ trigger_threshold: pendingConfig.frequency === 'every_n' ? pendingConfig.triggerThreshold : null,
+ })
+ }
+ },
+ })),
+
+ afterMount(({ actions, props }) => {
+ if (props.evaluationId !== 'new') {
+ actions.loadReports()
+ }
+ }),
+])
diff --git a/products/llm_analytics/frontend/evaluations/llmEvaluationLogic.ts b/products/llm_analytics/frontend/evaluations/llmEvaluationLogic.ts
index a8c47c3cbcea..a694ea4e3293 100644
--- a/products/llm_analytics/frontend/evaluations/llmEvaluationLogic.ts
+++ b/products/llm_analytics/frontend/evaluations/llmEvaluationLogic.ts
@@ -18,6 +18,7 @@ import { LLMProviderKey, llmProviderKeysLogic } from '../settings/llmProviderKey
import { isUnhealthyProviderKeyState } from '../settings/providerKeyStateUtils'
import { queryEvaluationRuns } from '../utils'
import { EVALUATION_SUMMARY_MAX_RUNS } from './constants'
+import { evaluationReportLogic } from './evaluationReportLogic'
import type { llmEvaluationLogicType } from './llmEvaluationLogicType'
import { EvaluationTemplateKey, defaultEvaluationTemplates } from './templates'
import {
@@ -499,6 +500,13 @@ export const llmEvaluationLogic = kea([
})
},
+ saveEvaluationSuccess: ({ evaluation }) => {
+ if (props.evaluationId === 'new' && evaluation?.id) {
+ // Create the pending report if the user configured one during evaluation creation
+ evaluationReportLogic({ evaluationId: 'new' }).actions.createPendingReport(evaluation.id)
+ }
+ },
+
saveEvaluation: async () => {
try {
const teamId = teamLogic.values.currentTeamId
diff --git a/products/llm_analytics/frontend/evaluations/types.ts b/products/llm_analytics/frontend/evaluations/types.ts
index 44b1e3c1f551..01a0c29f0a24 100644
--- a/products/llm_analytics/frontend/evaluations/types.ts
+++ b/products/llm_analytics/frontend/evaluations/types.ts
@@ -82,6 +82,86 @@ export interface HogTestResult {
error: string | null
}
+export type EvaluationReportFrequency = 'hourly' | 'daily' | 'weekly' | 'every_n'
+
+export interface EvaluationReportDeliveryTarget {
+ type: 'email' | 'slack'
+ value?: string
+ integration_id?: number
+ channel?: string
+}
+
+export interface EvaluationReport {
+ id: string
+ evaluation: string
+ frequency: EvaluationReportFrequency
+ byweekday: string[] | null
+ start_date: string
+ next_delivery_date: string | null
+ delivery_targets: EvaluationReportDeliveryTarget[]
+ max_sample_size: number
+ enabled: boolean
+ deleted: boolean
+ last_delivered_at: string | null
+ /** Optional per-report custom guidance appended to the agent's system prompt. */
+ report_prompt_guidance: string
+ /** Number of new eval results that triggers a report (only for every_n frequency). */
+ trigger_threshold: number | null
+ /** Minimum minutes between count-triggered reports. */
+ cooldown_minutes: number
+ /** Maximum count-triggered report runs per calendar day (UTC). */
+ daily_run_cap: number
+ created_by: number | null
+ created_at: string
+}
+
+/** A titled markdown section of the report (v2: agent-chosen title). */
+export interface EvaluationReportSection {
+ title: string
+ content: string
+}
+
+/** A trace reference cited by the agent to ground a specific finding. */
+export interface EvaluationReportCitation {
+ generation_id: string
+ trace_id: string
+ reason: string
+}
+
+/** Structured metrics computed mechanically from ClickHouse (agent cannot fabricate). */
+export interface EvaluationReportMetrics {
+ total_runs: number
+ pass_count: number
+ fail_count: number
+ na_count: number
+ pass_rate: number
+ period_start: string
+ period_end: string
+ previous_total_runs: number | null
+ previous_pass_rate: number | null
+}
+
+/** Top-level report content stored in EvaluationReportRun.content. */
+export interface EvaluationReportRunContent {
+ title: string
+ sections: EvaluationReportSection[]
+ citations: EvaluationReportCitation[]
+ metrics: EvaluationReportMetrics
+}
+
+export interface EvaluationReportRun {
+ id: string
+ report: string
+ content: EvaluationReportRunContent
+ /** Legacy mirror of content.metrics — populated by the store activity for backwards compat. */
+ metadata: EvaluationReportMetrics
+ period_start: string
+ period_end: string
+ delivery_status: 'pending' | 'delivered' | 'partial_failure' | 'failed'
+ delivery_errors: string[]
+ created_at: string
+}
+
export type EvaluationSummaryFilter = 'all' | 'pass' | 'fail' | 'na'
export interface EvaluationPattern {