From c4afa06c3f4d823217927d144d03ea87b983fa5a Mon Sep 17 00:00:00 2001
From: Andrew Maguire
Date: Mon, 13 Apr 2026 22:59:35 +0100
Subject: [PATCH 1/2] feat(llma): add evaluation reports frontend
Add React components for evaluation report configuration, history
table, and report viewer. Includes kea logic for API interactions
and state management. All UI is gated behind the
LLM_ANALYTICS_EVALUATIONS_REPORTS feature flag.
---
frontend/src/lib/constants.tsx | 3 +-
.../evaluations/LLMAnalyticsEvaluation.tsx | 27 ++
.../components/EvaluationReportConfig.tsx | 455 ++++++++++++++++++
.../components/EvaluationReportViewer.tsx | 267 ++++++++++
.../components/EvaluationReportsTab.tsx | 156 ++++++
.../evaluations/evaluationReportLogic.ts | 241 ++++++++++
.../evaluations/llmEvaluationLogic.ts | 8 +
.../frontend/evaluations/types.ts | 80 +++
8 files changed, 1235 insertions(+), 2 deletions(-)
create mode 100644 products/llm_analytics/frontend/evaluations/components/EvaluationReportConfig.tsx
create mode 100644 products/llm_analytics/frontend/evaluations/components/EvaluationReportViewer.tsx
create mode 100644 products/llm_analytics/frontend/evaluations/components/EvaluationReportsTab.tsx
create mode 100644 products/llm_analytics/frontend/evaluations/evaluationReportLogic.ts
diff --git a/frontend/src/lib/constants.tsx b/frontend/src/lib/constants.tsx
index 7ba1766db390..0bb970db07a6 100644
--- a/frontend/src/lib/constants.tsx
+++ b/frontend/src/lib/constants.tsx
@@ -324,6 +324,7 @@ export const FEATURE_FLAGS = {
LLM_ANALYTICS_TRACE_NAVIGATION: 'llm-analytics-trace-navigation', // owner: #team-llm-analytics
LLM_ANALYTICS_EVALUATIONS_CUSTOM_MODELS: 'llm-analytics-evaluations-custom-models', // owner: #team-llm-analytics
LLM_ANALYTICS_EVALUATIONS_HOG_CODE: 'llm-analytics-evaluations-hog-code', // owner: #team-llm-analytics
+ LLM_ANALYTICS_EVALUATIONS_REPORTS: 'llm-analytics-evaluations-reports', // owner: #team-llm-analytics
LLM_ANALYTICS_EVALUATIONS_SUMMARY: 'llm-analytics-evaluations-summary', // owner: #team-llm-analytics
LLM_ANALYTICS_SESSION_SUMMARIZATION: 'llm-analytics-session-summarization', // owner: #team-llm-analytics
LLM_ANALYTICS_CLUSTERS_TAB: 'llm-analytics-clusters-tab', // owner: #team-llm-analytics
@@ -392,7 +393,6 @@ export const FEATURE_FLAGS = {
ONBOARDING_WIZARD_PROMINENCE: 'onboarding-wizard-prominence', // owner: #team-growth multivariate=control,wizard-hero,wizard-tab,wizard-only
ONBOARDING_WIZARD_INSTALLATION_IMPROVED_COPY: 'onboarding-wizard-installation-improved-copy', // owner: @fercgomes #team-growth multivariate=control,test
ONBOARDING_MOBILE_INSTALL_HELPER: 'onboarding-mobile-install-helper', // owner: @fercgomes #team-growth multivariate=control,test — target $device_type=Mobile at the flag level
- ONBOARDING_DATA_WAREHOUSE_VALUE_PROP: 'onboarding-data-warehouse-value-prop', // owner: @fercgomes #team-growth multivariate=control,table,query
OWNER_ONLY_BILLING: 'owner-only-billing', // owner: @pawelcebula #team-billing
POST_ONBOARDING_MODAL_EXPERIMENT: 'post-onboarding-modal-experiment', // owner: @fercgomes #team-growth multivariate=control,test
PASSKEY_SIGNUP_ENABLED: 'passkey-signup-enabled', // owner: @reecejones #team-platform-features
@@ -445,7 +445,6 @@ export const FEATURE_FLAGS = {
SURVEYS_FORM_BUILDER: 'surveys-form-builder', // owner: @adboio #team-surveys
SURVEY_HEADLINE_SUMMARY: 'survey-headline-summary', // owner: @adboio #team-surveys
SURVEYS_INSIGHT_BUTTON_EXPERIMENT: 'ask-users-why-ai-vs-quickcreate', // owner: @adboio #team-surveys multivariate=true
- SURVEYS_TOOLBAR: 'surveys-toolbar', // owner: @fcgomes
SURVEYS_WEB_ANALYTICS_CROSS_SELL: 'surveys-in-web-analytics', // owner: @adboio #team-surveys
TASK_SUMMARIES: 'task-summaries', // owner: #team-llm-analytics
TASK_TOOL: 'phai-task-tool', // owner: @kappa90 #team-posthog-ai
diff --git a/products/llm_analytics/frontend/evaluations/LLMAnalyticsEvaluation.tsx b/products/llm_analytics/frontend/evaluations/LLMAnalyticsEvaluation.tsx
index 06d6a895b706..12492391d81f 100644
--- a/products/llm_analytics/frontend/evaluations/LLMAnalyticsEvaluation.tsx
+++ b/products/llm_analytics/frontend/evaluations/LLMAnalyticsEvaluation.tsx
@@ -35,6 +35,8 @@ import { modelPickerLogic } from '../modelPickerLogic'
import { providerKeyStateIssueDescription, providerLabel } from '../settings/providerKeyStateUtils'
import { EvaluationCodeEditor } from './components/EvaluationCodeEditor'
import { EvaluationPromptEditor } from './components/EvaluationPromptEditor'
+import { EvaluationReportConfig } from './components/EvaluationReportConfig'
+import { EvaluationReportsTab } from './components/EvaluationReportsTab'
import { EvaluationRunsTable } from './components/EvaluationRunsTable'
import { EvaluationTriggers } from './components/EvaluationTriggers'
import { LLMEvaluationLogicProps, llmEvaluationLogic } from './llmEvaluationLogic'
@@ -297,6 +299,18 @@ export function LLMAnalyticsEvaluation(): JSX.Element {
),
},
+ !isNewEvaluation &&
+ !!featureFlags[FEATURE_FLAGS.LLM_ANALYTICS_EVALUATIONS_REPORTS] && {
+ key: 'reports',
+ label: 'Reports',
+ 'data-attr': 'llma-evaluation-reports-tab',
+ content: (
+ setActiveTab('configuration')}
+ />
+ ),
+ },
{
key: 'configuration',
label: 'Configuration',
@@ -453,7 +467,20 @@ export function LLMAnalyticsEvaluation(): JSX.Element {
+
+ {/* Scheduled Reports (inline config for new evaluations) */}
+ {isNewEvaluation &&
+ featureFlags[FEATURE_FLAGS.LLM_ANALYTICS_EVALUATIONS_REPORTS] && (
+
+ )}
+
+ {/* Scheduled Reports (for existing evaluations, outside the form) */}
+ {!isNewEvaluation && featureFlags[FEATURE_FLAGS.LLM_ANALYTICS_EVALUATIONS_REPORTS] && (
+
+
+
+ )}
),
},
diff --git a/products/llm_analytics/frontend/evaluations/components/EvaluationReportConfig.tsx b/products/llm_analytics/frontend/evaluations/components/EvaluationReportConfig.tsx
new file mode 100644
index 000000000000..d5752d28c0bc
--- /dev/null
+++ b/products/llm_analytics/frontend/evaluations/components/EvaluationReportConfig.tsx
@@ -0,0 +1,455 @@
+import { useActions, useValues } from 'kea'
+import { useEffect, useState } from 'react'
+
+import { LemonButton, LemonDialog, LemonInput, LemonSelect, LemonSwitch, LemonTextArea } from '@posthog/lemon-ui'
+
+import { IntegrationChoice } from 'lib/components/CyclotronJob/integrations/IntegrationChoice'
+import { integrationsLogic } from 'lib/integrations/integrationsLogic'
+import { SlackChannelPicker, SlackNotConfiguredBanner } from 'lib/integrations/SlackIntegrationHelpers'
+
+import { evaluationReportLogic } from '../evaluationReportLogic'
+import type { EvaluationReportDeliveryTarget, EvaluationReportFrequency } from '../types'
+
+const GUIDANCE_PLACEHOLDER =
+ "Optional guidance for the report agent. e.g. 'Focus on cost regressions across models', 'Compare latency between gpt-4o-mini and claude-sonnet', 'Keep it to 2 sections max'"
+
+const FREQUENCY_OPTIONS = [
+ { value: 'hourly' as const, label: 'Hourly' },
+ { value: 'daily' as const, label: 'Daily' },
+ { value: 'weekly' as const, label: 'Weekly' },
+ { value: 'every_n' as const, label: 'Every N evaluations' },
+]
+
+const TRIGGER_THRESHOLD_MIN = 10
+const TRIGGER_THRESHOLD_MAX = 10_000
+const TRIGGER_THRESHOLD_DEFAULT = 100
+
+/** Threshold config shown when frequency is 'every_n' */
+function ThresholdConfig({ value, onChange }: { value: number; onChange: (value: number) => void }): JSX.Element {
+ return (
+
+
Evaluation count threshold
+
onChange(Number(val))}
+ fullWidth
+ />
+
+ A report will be generated after this many new evaluation results arrive. Checked every 5 minutes. Min{' '}
+ {TRIGGER_THRESHOLD_MIN}, max {TRIGGER_THRESHOLD_MAX.toLocaleString()}. Cooldown: at most one report per
+ hour, up to 10 per day.
+
+
+ )
+}
+
+/** Shared delivery targets configuration */
+function DeliveryTargetsConfig({
+ emailValue,
+ onEmailChange,
+ slackIntegrationId,
+ onSlackIntegrationChange,
+ slackChannelValue,
+ onSlackChannelChange,
+}: {
+ emailValue: string
+ onEmailChange: (value: string) => void
+ slackIntegrationId: number | null
+ onSlackIntegrationChange: (value: number | null) => void
+ slackChannelValue: string
+ onSlackChannelChange: (value: string) => void
+}): JSX.Element {
+ const { slackIntegrations, integrations } = useValues(integrationsLogic)
+
+ return (
+ <>
+
+
Email recipients
+
+
Comma-separated email addresses
+
+
+
Slack channel
+ {!slackIntegrations?.length ? (
+
+ ) : (
+
+ {
+ if (newValue !== slackIntegrationId) {
+ onSlackChannelChange('')
+ }
+ onSlackIntegrationChange(newValue)
+ }}
+ />
+ {slackIntegrationId && (
+ onSlackChannelChange(val || '')}
+ integration={integrations!.find((i) => i.id === slackIntegrationId)!}
+ />
+ )}
+
+ )}
+
+ >
+ )
+}
+
+/** Inline config shown during new evaluation creation */
+function PendingReportConfig({ evaluationId }: { evaluationId: string }): JSX.Element {
+ const { pendingConfig } = useValues(evaluationReportLogic({ evaluationId }))
+ const {
+ setPendingEnabled,
+ setPendingFrequency,
+ setPendingEmailValue,
+ setPendingSlackIntegrationId,
+ setPendingSlackChannelValue,
+ setPendingReportPromptGuidance,
+ setPendingTriggerThreshold,
+ } = useActions(evaluationReportLogic({ evaluationId }))
+
+ return (
+
+
+
+
Scheduled reports
+
+ AI-generated analysis of evaluation results. Reports are always available in the Reports tab.
+ Optionally add email or Slack to get notified.
+
+
+
+
+
+ {pendingConfig.enabled && (
+
+
+ Frequency
+ val && setPendingFrequency(val)}
+ options={FREQUENCY_OPTIONS}
+ fullWidth
+ />
+
+ {pendingConfig.frequency === 'every_n' && (
+
+ )}
+
+
+
Report agent guidance (optional)
+
+
+ Steers the agent's focus, section choices, or scope. Appended to the base prompt.
+
+
+
+ )}
+
+ )
+}
+
+/** Toggle-based report management for existing evaluations */
+function ExistingReportConfig({ evaluationId }: { evaluationId: string }): JSX.Element {
+ const logic = evaluationReportLogic({ evaluationId })
+ const { activeReport, reportsLoading } = useValues(logic)
+ const { updateReport, deleteReport, createReport } = useActions(logic)
+
+ // Local state: toggle controls form visibility, Save button creates the report.
+ // All fields are local-first so the user can change multiple things before saving.
+ const [formEnabled, setFormEnabled] = useState(false)
+ const [frequency, setFrequency] = useState('daily')
+ const [emailValue, setEmailValue] = useState('')
+ const [slackIntegrationId, setSlackIntegrationId] = useState(null)
+ const [slackChannelValue, setSlackChannelValue] = useState('')
+ const [guidance, setGuidance] = useState('')
+ const [triggerThreshold, setTriggerThreshold] = useState(TRIGGER_THRESHOLD_DEFAULT)
+
+ // Seed local form state from the active report so the user can edit
+ // any field without having to disable + recreate the schedule.
+ useEffect(() => {
+ if (!activeReport) {
+ return
+ }
+ const emailTarget = activeReport.delivery_targets.find(
+ (t: EvaluationReportDeliveryTarget) => t.type === 'email'
+ )
+ const slackTarget = activeReport.delivery_targets.find(
+ (t: EvaluationReportDeliveryTarget) => t.type === 'slack'
+ )
+ setFrequency(activeReport.frequency)
+ setEmailValue(emailTarget?.value ?? '')
+ setSlackIntegrationId(slackTarget?.integration_id ?? null)
+ setSlackChannelValue(slackTarget?.channel ?? '')
+ setGuidance(activeReport.report_prompt_guidance ?? '')
+ setTriggerThreshold(activeReport.trigger_threshold ?? TRIGGER_THRESHOLD_DEFAULT)
+ }, [activeReport])
+
+ const isEnabled = !!activeReport || formEnabled
+
+ const handleToggle = (checked: boolean): void => {
+ if (checked) {
+ setFormEnabled(true)
+ } else if (activeReport) {
+ LemonDialog.open({
+ title: 'Disable scheduled reports?',
+ description: 'This will stop all future report deliveries. Past reports will be preserved.',
+ primaryButton: {
+ children: 'Disable',
+ status: 'danger',
+ onClick: () => deleteReport(activeReport.id),
+ },
+ secondaryButton: { children: 'Cancel' },
+ })
+ } else {
+ setFormEnabled(false)
+ }
+ }
+
+ const hasEmail = emailValue.trim().length > 0
+ const hasSlack = slackIntegrationId !== null && slackChannelValue.length > 0
+
+ const handleSave = (): void => {
+ const targets: EvaluationReportDeliveryTarget[] = []
+ if (hasEmail) {
+ targets.push({ type: 'email', value: emailValue.trim() })
+ }
+ if (hasSlack) {
+ targets.push({ type: 'slack', integration_id: slackIntegrationId!, channel: slackChannelValue })
+ }
+ createReport({
+ evaluationId,
+ frequency,
+ delivery_targets: targets,
+ report_prompt_guidance: guidance,
+ trigger_threshold: frequency === 'every_n' ? triggerThreshold : null,
+ })
+ setFormEnabled(false)
+ }
+
+ return (
+
+
+
+
Scheduled reports
+
+ AI-generated analysis of evaluation results. Reports are always available in the Reports tab.
+ Optionally add email or Slack to get notified.
+
+
+
+
+
+ {activeReport ? (
+
+
+ Frequency
+ val && setFrequency(val)}
+ options={FREQUENCY_OPTIONS}
+ fullWidth
+ />
+
+
+ {frequency === 'every_n' && (
+
+ )}
+
+
+
+
+
Report agent guidance (optional)
+
+
+ Steers the agent's focus, section choices, or scope. Appended to the base prompt.
+
+
+
+ {(() => {
+ const currentEmail =
+ activeReport.delivery_targets.find(
+ (t: EvaluationReportDeliveryTarget) => t.type === 'email'
+ )?.value ?? ''
+ const currentSlack = activeReport.delivery_targets.find(
+ (t: EvaluationReportDeliveryTarget) => t.type === 'slack'
+ )
+ const currentSlackIntegrationId: number | null = currentSlack?.integration_id ?? null
+ const currentSlackChannel = currentSlack?.channel ?? ''
+ const currentGuidance = activeReport.report_prompt_guidance ?? ''
+ const currentThreshold = activeReport.trigger_threshold ?? TRIGGER_THRESHOLD_DEFAULT
+ const frequencyDirty = frequency !== activeReport.frequency
+ const targetsDirty =
+ emailValue.trim() !== currentEmail ||
+ slackIntegrationId !== currentSlackIntegrationId ||
+ slackChannelValue !== currentSlackChannel
+ const guidanceDirty = guidance !== currentGuidance
+ const thresholdDirty = frequency === 'every_n' && triggerThreshold !== currentThreshold
+ const isDirty = frequencyDirty || targetsDirty || guidanceDirty || thresholdDirty
+ const hasAnyTarget = hasEmail || hasSlack
+ return (
+
+ {
+ const targets: EvaluationReportDeliveryTarget[] = []
+ if (hasEmail) {
+ targets.push({ type: 'email', value: emailValue.trim() })
+ }
+ if (hasSlack) {
+ targets.push({
+ type: 'slack',
+ integration_id: slackIntegrationId!,
+ channel: slackChannelValue,
+ })
+ }
+ const data: Record = {
+ frequency,
+ delivery_targets: targets,
+ report_prompt_guidance: guidance,
+ }
+ if (frequency === 'every_n') {
+ data.trigger_threshold = triggerThreshold
+ }
+ updateReport({
+ reportId: activeReport.id,
+ data,
+ })
+ }}
+ >
+ Save changes
+
+
+ )
+ })()}
+
+ {frequency === 'every_n' ? (
+
+ A report will be generated when{' '}
+ {activeReport.trigger_threshold ?? TRIGGER_THRESHOLD_DEFAULT} new evaluation results arrive.
+ Checked every 5 minutes.
+
+ ) : (
+ activeReport.next_delivery_date && (
+
+ Next delivery: {new Date(activeReport.next_delivery_date).toLocaleString()}
+
+ )
+ )}
+
+
Generated reports appear in the Reports tab.
+
+ ) : (
+ formEnabled && (
+
+
+ Frequency
+ val && setFrequency(val)}
+ options={FREQUENCY_OPTIONS}
+ fullWidth
+ />
+
+ {frequency === 'every_n' && (
+
+ )}
+
+
+
Report agent guidance (optional)
+
+
+ Steers the agent's focus, section choices, or scope. Appended to the base prompt.
+
+
+
+
+ Save report schedule
+
+
+
+ )
+ )}
+
+ )
+}
+
+export function EvaluationReportConfig({ evaluationId }: { evaluationId: string }): JSX.Element {
+ if (evaluationId === 'new') {
+ return
+ }
+ return
+}
diff --git a/products/llm_analytics/frontend/evaluations/components/EvaluationReportViewer.tsx b/products/llm_analytics/frontend/evaluations/components/EvaluationReportViewer.tsx
new file mode 100644
index 000000000000..09a3b9bbef8e
--- /dev/null
+++ b/products/llm_analytics/frontend/evaluations/components/EvaluationReportViewer.tsx
@@ -0,0 +1,267 @@
+import { useMemo, useState } from 'react'
+
+import { LemonBadge, LemonButton, LemonCollapse, LemonDivider } from '@posthog/lemon-ui'
+
+import { LemonMarkdown } from 'lib/lemon-ui/LemonMarkdown'
+import { urls } from 'scenes/urls'
+
+import type {
+ EvaluationReportMetrics,
+ EvaluationReportRun,
+ EvaluationReportRunContent,
+ EvaluationReportSection,
+} from '../types'
+
+// Match any UUID in the content — surrounding punctuation (backticks, angle brackets, etc.)
+// is stripped so we don't depend on how the LLM formats references.
+const UUID_REGEX = /[`<]*([0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})[`>]*/g
+
+// Rewrite `` backtick tokens into markdown links pointing to the correct
+// trace URL. Uses the citations list to map generation_id → trace_id so the link
+// opens the right trace with the generation highlighted.
+function linkifyUuids(content: string, citationMap: Record): string {
+ return content.replace(UUID_REGEX, (_match, generationId: string) => {
+ const traceId = citationMap[generationId]
+ const url = traceId
+ ? urls.llmAnalyticsTrace(traceId, { event: generationId })
+ : urls.llmAnalyticsTrace(generationId)
+ return `[\`${generationId.slice(0, 8)}...\`](${url})`
+ })
+}
+
+// Strip a leading markdown heading line if it matches the section title.
+// The agent sometimes prefixes each section's content with its own heading,
+// which duplicates the heading the renderer emits separately.
+function stripRedundantLeadingHeading(content: string, sectionTitle: string): string {
+ const match = content.match(/^\s*(#{1,6})\s+(.+?)\s*(?:\r?\n|$)/)
+ if (!match) {
+ return content
+ }
+ const headingText = match[2].trim().toLowerCase()
+ if (headingText.startsWith(sectionTitle.toLowerCase())) {
+ return content.slice(match[0].length).replace(/^\s+/, '')
+ }
+ return content
+}
+
+function ReportSectionContent({
+ section,
+ citationMap,
+}: {
+ section: EvaluationReportSection
+ citationMap: Record
+}): JSX.Element {
+ const markdown = linkifyUuids(stripRedundantLeadingHeading(section.content, section.title), citationMap)
+ return (
+
+ {markdown}
+
+ )
+}
+
+function formatPassRate(rate: number | null | undefined): string {
+ if (rate == null) {
+ return '—'
+ }
+ return `${rate.toFixed(2)}%`
+}
+
+function MetricsCard({ metrics }: { metrics: EvaluationReportMetrics }): JSX.Element {
+ // Period-over-period delta (if we have a previous pass rate to compare)
+ let deltaEl: JSX.Element | null = null
+ if (metrics.previous_pass_rate != null) {
+ const diff = metrics.pass_rate - metrics.previous_pass_rate
+ const arrow = diff > 0 ? '▲' : diff < 0 ? '▼' : '—'
+ const color = diff > 0 ? 'text-success' : diff < 0 ? 'text-danger' : 'text-muted'
+ deltaEl = (
+
+ {arrow} {Math.abs(diff).toFixed(2)}pp vs previous
+
+ )
+ }
+
+ return (
+
+
+
+
Pass rate
+
+ {formatPassRate(metrics.pass_rate)}
+ {deltaEl}
+
+
+
+
Total runs
+
{metrics.total_runs}
+
+
+
Pass
+
{metrics.pass_count}
+
+
+
Fail
+
{metrics.fail_count}
+
+
+
N/A
+
{metrics.na_count}
+
+ {metrics.previous_total_runs != null && (
+
+
Previous runs
+
{metrics.previous_total_runs}
+
+ )}
+
+
+ )
+}
+
+function DeliveryStatusBadge({ status }: { status: string }): JSX.Element {
+ const statusMap: Record = {
+ delivered: { label: 'Delivered', status: 'success' },
+ pending: { label: 'Pending', status: 'muted' },
+ partial_failure: { label: 'Partial failure', status: 'warning' },
+ failed: { label: 'Failed', status: 'danger' },
+ }
+ const info = statusMap[status] || { label: status, status: 'muted' as const }
+ return
+}
+
+export function EvaluationReportViewer({
+ reportRun,
+ onClose,
+ compact = false,
+}: {
+ reportRun: EvaluationReportRun
+ onClose?: () => void
+ /** When true, hides the header/close row — useful when the parent already provides framing (e.g. an expanded table row). */
+ compact?: boolean
+}): JSX.Element {
+ const content = reportRun.content as EvaluationReportRunContent
+ const sections = content.sections ?? []
+ const metrics = content.metrics
+
+ // Build generation_id → trace_id lookup from citations for correct trace URLs
+ const citationMap = useMemo(() => {
+ const map: Record = {}
+ for (const c of content.citations ?? []) {
+ if (c.generation_id && c.trace_id) {
+ map[c.generation_id] = c.trace_id
+ }
+ }
+ return map
+ }, [content.citations])
+
+ // Default to executive summary (first section) expanded. Memoized so Expand/Collapse all
+ // buttons can set the list deterministically.
+ const sectionKeys = useMemo(() => sections.map((_, i) => i.toString()), [sections])
+ const [expandedKeys, setExpandedKeys] = useState(sections.length > 0 ? ['0'] : [])
+
+ const allExpanded = expandedKeys.length === sectionKeys.length && sectionKeys.length > 0
+ const allCollapsed = expandedKeys.length === 0
+
+ return (
+
+ {!compact && (
+ <>
+
+
+
{content.title || 'Report'}
+
+
+ {onClose && (
+
+ Close
+
+ )}
+
+
+ Period: {new Date(reportRun.period_start).toLocaleString()} –{' '}
+ {new Date(reportRun.period_end).toLocaleString()}
+
+
+
+ >
+ )}
+
+ {compact && (
+
+ {content.title ?
{content.title} :
}
+ {sections.length > 0 && (
+
+ setExpandedKeys(sectionKeys)}
+ disabledReason={allExpanded ? 'All sections already expanded' : undefined}
+ >
+ Expand all
+
+ setExpandedKeys([])}
+ disabledReason={allCollapsed ? 'All sections already collapsed' : undefined}
+ >
+ Collapse all
+
+
+ )}
+
+ )}
+
+ {!compact && content.title &&
{content.title} }
+
+ {metrics &&
}
+
+ {sections.length > 0 && (
+ <>
+ {!compact && (
+
+ setExpandedKeys(sectionKeys)}
+ disabledReason={allExpanded ? 'All sections already expanded' : undefined}
+ >
+ Expand all
+
+ setExpandedKeys([])}
+ disabledReason={allCollapsed ? 'All sections already collapsed' : undefined}
+ >
+ Collapse all
+
+
+ )}
+
+
setExpandedKeys(keys as string[])}
+ panels={sections.map((section, idx) => ({
+ key: idx.toString(),
+ header: section.title,
+ content: ,
+ }))}
+ />
+ >
+ )}
+
+ {reportRun.delivery_errors.length > 0 && (
+
+
Delivery errors
+
+ {reportRun.delivery_errors.map((err, i) => (
+ {err}
+ ))}
+
+
+ )}
+
+ )
+}
diff --git a/products/llm_analytics/frontend/evaluations/components/EvaluationReportsTab.tsx b/products/llm_analytics/frontend/evaluations/components/EvaluationReportsTab.tsx
new file mode 100644
index 000000000000..4fec882bb347
--- /dev/null
+++ b/products/llm_analytics/frontend/evaluations/components/EvaluationReportsTab.tsx
@@ -0,0 +1,156 @@
+import { useActions, useValues } from 'kea'
+
+import { IconInfo } from '@posthog/icons'
+import { LemonButton, LemonTable, LemonTag, Tooltip } from '@posthog/lemon-ui'
+
+import { TZLabel } from 'lib/components/TZLabel'
+
+import { evaluationReportLogic } from '../evaluationReportLogic'
+import type { EvaluationReportRun } from '../types'
+import { EvaluationReportViewer } from './EvaluationReportViewer'
+
+interface EvaluationReportsTabProps {
+ evaluationId: string
+ /** Called when the user clicks the "Set up scheduled reports" CTA in the empty state. */
+ onConfigureClick?: () => void
+}
+
+const STATUS_STYLES: Record<
+ EvaluationReportRun['delivery_status'],
+ { label: string; type: 'success' | 'warning' | 'danger' | 'muted' }
+> = {
+ delivered: { label: 'Delivered', type: 'success' },
+ pending: { label: 'Pending', type: 'muted' },
+ partial_failure: { label: 'Partial failure', type: 'warning' },
+ failed: { label: 'Failed', type: 'danger' },
+}
+
+export function EvaluationReportsTab({ evaluationId, onConfigureClick }: EvaluationReportsTabProps): JSX.Element {
+ const logic = evaluationReportLogic({ evaluationId })
+ const { reportRuns, reportRunsLoading, reportsLoading, activeReport, generateResultLoading } = useValues(logic)
+ const { generateReport, loadReportRuns } = useActions(logic)
+
+ // No schedule configured at all → CTA pointing to the Configuration tab.
+ // Avoids hiding the Reports tab entirely so it stays discoverable.
+ if (!reportsLoading && !activeReport) {
+ return (
+
+
+
No scheduled reports yet
+
+ Scheduled reports deliver AI-generated analysis of this evaluation's results to email or Slack
+ on a recurring basis.
+
+ {onConfigureClick && (
+
+ Set up scheduled reports
+
+ )}
+
+
+ )
+ }
+
+ return (
+
+
+
+ History of AI-generated reports for this evaluation. Click a row to expand the full report. Schedule
+ and delivery targets are configured in the Configuration tab.
+
+ {activeReport && (
+
+ loadReportRuns(activeReport.id)}
+ loading={reportRunsLoading}
+ >
+ Refresh
+
+ generateReport(activeReport.id)}
+ loading={generateResultLoading}
+ >
+ Generate now
+
+
+ )}
+
+
+
,
+ },
+ {
+ title: 'Title',
+ key: 'title',
+ render: (_, run: EvaluationReportRun) => (
+
+ {run.content?.title || '–'}
+
+ ),
+ },
+ {
+ title: 'Pass rate',
+ key: 'pass_rate',
+ render: (_, run: EvaluationReportRun) => {
+ const pct = run.content?.metrics?.pass_rate ?? run.metadata?.pass_rate
+ return typeof pct === 'number' ? `${pct.toFixed(1)}%` : '–'
+ },
+ },
+ {
+ title: 'Runs',
+ key: 'total_runs',
+ render: (_, run: EvaluationReportRun) =>
+ run.content?.metrics?.total_runs ?? run.metadata?.total_runs ?? '–',
+ },
+ {
+ title: 'Status',
+ key: 'delivery_status',
+ render: (_, run: EvaluationReportRun) => {
+ const info = STATUS_STYLES[run.delivery_status] || {
+ label: run.delivery_status,
+ type: 'default' as const,
+ }
+ return (
+
+ {info.label}
+
+ )
+ },
+ },
+ {
+ key: 'info',
+ width: 0,
+ render: (_, run: EvaluationReportRun) => (
+
+
+
+ ),
+ },
+ ]}
+ expandable={{
+ noIndent: true,
+ expandedRowRender: (run: EvaluationReportRun) => (
+
+
+
+ ),
+ }}
+ emptyState="No reports generated yet"
+ size="small"
+ />
+
+ )
+}
diff --git a/products/llm_analytics/frontend/evaluations/evaluationReportLogic.ts b/products/llm_analytics/frontend/evaluations/evaluationReportLogic.ts
new file mode 100644
index 000000000000..99ebf4422e48
--- /dev/null
+++ b/products/llm_analytics/frontend/evaluations/evaluationReportLogic.ts
@@ -0,0 +1,241 @@
+import { actions, afterMount, connect, kea, key, listeners, path, props, reducers, selectors } from 'kea'
+import { loaders } from 'kea-loaders'
+
+import api from 'lib/api'
+import { lemonToast } from 'lib/lemon-ui/LemonToast'
+import { teamLogic } from 'scenes/teamLogic'
+
+import type { evaluationReportLogicType } from './evaluationReportLogicType'
+import type {
+ EvaluationReport,
+ EvaluationReportDeliveryTarget,
+ EvaluationReportFrequency,
+ EvaluationReportRun,
+} from './types'
+
+export interface EvaluationReportLogicProps {
+ evaluationId: string
+}
+
+export interface PendingReportConfig {
+ enabled: boolean
+ frequency: EvaluationReportFrequency
+ emailValue: string
+ slackIntegrationId: number | null
+ slackChannelValue: string
+ reportPromptGuidance: string
+ triggerThreshold: number
+}
+
+const DEFAULT_PENDING_CONFIG: PendingReportConfig = {
+ enabled: true,
+ frequency: 'every_n',
+ emailValue: '',
+ slackIntegrationId: null,
+ slackChannelValue: '',
+ reportPromptGuidance: '',
+ triggerThreshold: 100,
+}
+
+export const evaluationReportLogic = kea([
+ path(['products', 'llm_analytics', 'frontend', 'evaluations', 'evaluationReportLogic']),
+ props({} as EvaluationReportLogicProps),
+ key((props) => props.evaluationId),
+ connect({
+ values: [teamLogic, ['currentTeamId']],
+ }),
+
+ actions({
+ // Pending config for new evaluations
+ setPendingEnabled: (enabled: boolean) => ({ enabled }),
+ setPendingFrequency: (frequency: EvaluationReportFrequency) => ({ frequency }),
+ setPendingEmailValue: (emailValue: string) => ({ emailValue }),
+ setPendingSlackIntegrationId: (integrationId: number | null) => ({ integrationId }),
+ setPendingSlackChannelValue: (channelValue: string) => ({ channelValue }),
+ setPendingReportPromptGuidance: (reportPromptGuidance: string) => ({ reportPromptGuidance }),
+ setPendingTriggerThreshold: (triggerThreshold: number) => ({ triggerThreshold }),
+ createPendingReport: (evaluationId: string) => ({ evaluationId }),
+
+ // Existing report actions
+ selectReportRun: (reportRun: EvaluationReportRun | null) => ({ reportRun }),
+ }),
+
+ reducers({
+ pendingConfig: [
+ DEFAULT_PENDING_CONFIG as PendingReportConfig,
+ {
+ setPendingEnabled: (state, { enabled }) => ({ ...state, enabled }),
+ setPendingFrequency: (state, { frequency }) => ({ ...state, frequency }),
+ setPendingEmailValue: (state, { emailValue }) => ({ ...state, emailValue }),
+ setPendingSlackIntegrationId: (state, { integrationId }) => ({
+ ...state,
+ slackIntegrationId: integrationId,
+ slackChannelValue: integrationId !== state.slackIntegrationId ? '' : state.slackChannelValue,
+ }),
+ setPendingSlackChannelValue: (state, { channelValue }) => ({
+ ...state,
+ slackChannelValue: channelValue,
+ }),
+ setPendingReportPromptGuidance: (state, { reportPromptGuidance }) => ({
+ ...state,
+ reportPromptGuidance,
+ }),
+ setPendingTriggerThreshold: (state, { triggerThreshold }) => ({
+ ...state,
+ triggerThreshold,
+ }),
+ },
+ ],
+ selectedReportRun: [
+ null as EvaluationReportRun | null,
+ {
+ selectReportRun: (_, { reportRun }) => reportRun,
+ },
+ ],
+ }),
+
+ loaders(({ props, values }) => ({
+ reports: [
+ [] as EvaluationReport[],
+ {
+ loadReports: async () => {
+ if (props.evaluationId === 'new') {
+ return []
+ }
+ const response = await api.get(
+ `api/environments/${values.currentTeamId}/llm_analytics/evaluation_reports/`
+ )
+ return (response.results || []).filter((r: EvaluationReport) => r.evaluation === props.evaluationId)
+ },
+ createReport: async (params: {
+ evaluationId: string
+ frequency: EvaluationReportFrequency
+ delivery_targets: EvaluationReportDeliveryTarget[]
+ report_prompt_guidance?: string
+ trigger_threshold?: number | null
+ }) => {
+ const body: Record = {
+ evaluation: params.evaluationId,
+ frequency: params.frequency,
+ start_date: new Date().toISOString(),
+ delivery_targets: params.delivery_targets,
+ report_prompt_guidance: params.report_prompt_guidance ?? '',
+ enabled: true,
+ }
+ if (params.frequency === 'every_n' && params.trigger_threshold != null) {
+ body.trigger_threshold = params.trigger_threshold
+ }
+ const report = await api.create(
+ `api/environments/${values.currentTeamId}/llm_analytics/evaluation_reports/`,
+ body
+ )
+ return [...values.reports, report]
+ },
+ updateReport: async ({ reportId, data }: { reportId: string; data: Partial }) => {
+ const updated = await api.update(
+ `api/environments/${values.currentTeamId}/llm_analytics/evaluation_reports/${reportId}/`,
+ data
+ )
+ return values.reports.map((r) => (r.id === reportId ? updated : r))
+ },
+ deleteReport: async (reportId: string) => {
+ await api.update(
+ `api/environments/${values.currentTeamId}/llm_analytics/evaluation_reports/${reportId}/`,
+ { deleted: true }
+ )
+ return values.reports.filter((r) => r.id !== reportId)
+ },
+ },
+ ],
+ reportRuns: [
+ [] as EvaluationReportRun[],
+ {
+ loadReportRuns: async (reportId: string) => {
+ const response = await api.get(
+ `api/environments/${values.currentTeamId}/llm_analytics/evaluation_reports/${reportId}/runs/`
+ )
+ return response || []
+ },
+ },
+ ],
+ generateResult: [
+ null as null,
+ {
+ generateReport: async (reportId: string) => {
+ await api.create(
+ `api/environments/${values.currentTeamId}/llm_analytics/evaluation_reports/${reportId}/generate/`
+ )
+ return null
+ },
+ },
+ ],
+ })),
+
+ selectors({
+ isNewEvaluation: [(_, p) => [p.evaluationId], (evaluationId: string) => evaluationId === 'new'],
+ activeReport: [
+ (s) => [s.reports],
+ (reports): EvaluationReport | null => {
+ return reports.find((r: EvaluationReport) => r.enabled && !r.deleted) || null
+ },
+ ],
+ }),
+
+ listeners(({ actions, values }) => ({
+ loadReportsSuccess: ({ reports }: { reports: EvaluationReport[] }) => {
+ // Auto-load the run history for the active report so the Reports tab knows
+ // whether to render itself and can show data immediately.
+ const active = reports.find((r: EvaluationReport) => r.enabled && !r.deleted)
+ if (active) {
+ actions.loadReportRuns(active.id)
+ }
+ },
+ generateReportSuccess: () => {
+ lemonToast.success('Report is being generated and will be delivered to your configured targets shortly.')
+ },
+ generateReportFailure: () => {
+ lemonToast.error('Failed to trigger report generation. Please try again.')
+ },
+ createReportSuccess: () => {
+ actions.loadReports()
+ },
+ updateReportSuccess: () => {
+ actions.loadReports()
+ },
+ createPendingReport: ({ evaluationId }) => {
+ const { pendingConfig } = values
+ if (!pendingConfig.enabled) {
+ return
+ }
+ const targets: EvaluationReportDeliveryTarget[] = []
+ if (pendingConfig.emailValue.trim()) {
+ targets.push({ type: 'email', value: pendingConfig.emailValue.trim() })
+ }
+ if (pendingConfig.slackIntegrationId && pendingConfig.slackChannelValue) {
+ targets.push({
+ type: 'slack',
+ integration_id: pendingConfig.slackIntegrationId,
+ channel: pendingConfig.slackChannelValue,
+ })
+ }
+ // The backend auto-creates a default report config on eval creation.
+ // If the user configured delivery targets or custom settings, update
+ // the auto-created report after creation via the existing reports list.
+ if (targets.length > 0 || pendingConfig.reportPromptGuidance.trim()) {
+ actions.createReport({
+ evaluationId,
+ frequency: pendingConfig.frequency,
+ delivery_targets: targets,
+ report_prompt_guidance: pendingConfig.reportPromptGuidance,
+ trigger_threshold: pendingConfig.frequency === 'every_n' ? pendingConfig.triggerThreshold : null,
+ })
+ }
+ },
+ })),
+
+ afterMount(({ actions, props }) => {
+ if (props.evaluationId !== 'new') {
+ actions.loadReports()
+ }
+ }),
+])
diff --git a/products/llm_analytics/frontend/evaluations/llmEvaluationLogic.ts b/products/llm_analytics/frontend/evaluations/llmEvaluationLogic.ts
index a8c47c3cbcea..a694ea4e3293 100644
--- a/products/llm_analytics/frontend/evaluations/llmEvaluationLogic.ts
+++ b/products/llm_analytics/frontend/evaluations/llmEvaluationLogic.ts
@@ -18,6 +18,7 @@ import { LLMProviderKey, llmProviderKeysLogic } from '../settings/llmProviderKey
import { isUnhealthyProviderKeyState } from '../settings/providerKeyStateUtils'
import { queryEvaluationRuns } from '../utils'
import { EVALUATION_SUMMARY_MAX_RUNS } from './constants'
+import { evaluationReportLogic } from './evaluationReportLogic'
import type { llmEvaluationLogicType } from './llmEvaluationLogicType'
import { EvaluationTemplateKey, defaultEvaluationTemplates } from './templates'
import {
@@ -499,6 +500,13 @@ export const llmEvaluationLogic = kea([
})
},
+ saveEvaluationSuccess: ({ evaluation }) => {
+ if (props.evaluationId === 'new' && evaluation?.id) {
+ // Create the pending report if the user configured one during evaluation creation
+ evaluationReportLogic({ evaluationId: 'new' }).actions.createPendingReport(evaluation.id)
+ }
+ },
+
saveEvaluation: async () => {
try {
const teamId = teamLogic.values.currentTeamId
diff --git a/products/llm_analytics/frontend/evaluations/types.ts b/products/llm_analytics/frontend/evaluations/types.ts
index 44b1e3c1f551..01a0c29f0a24 100644
--- a/products/llm_analytics/frontend/evaluations/types.ts
+++ b/products/llm_analytics/frontend/evaluations/types.ts
@@ -82,6 +82,86 @@ export interface HogTestResult {
error: string | null
}
+export type EvaluationReportFrequency = 'hourly' | 'daily' | 'weekly' | 'every_n'
+
+export interface EvaluationReportDeliveryTarget {
+ type: 'email' | 'slack'
+ value?: string
+ integration_id?: number
+ channel?: string
+}
+
+export interface EvaluationReport {
+ id: string
+ evaluation: string
+ frequency: EvaluationReportFrequency
+ byweekday: string[] | null
+ start_date: string
+ next_delivery_date: string | null
+ delivery_targets: EvaluationReportDeliveryTarget[]
+ max_sample_size: number
+ enabled: boolean
+ deleted: boolean
+ last_delivered_at: string | null
+ /** Optional per-report custom guidance appended to the agent's system prompt. */
+ report_prompt_guidance: string
+ /** Number of new eval results that triggers a report (only for every_n frequency). */
+ trigger_threshold: number | null
+ /** Minimum minutes between count-triggered reports. */
+ cooldown_minutes: number
+ /** Maximum count-triggered report runs per calendar day (UTC). */
+ daily_run_cap: number
+ created_by: number | null
+ created_at: string
+}
+
+/** A titled markdown section of the report (v2: agent-chosen title). */
+export interface EvaluationReportSection {
+ title: string
+ content: string
+}
+
+/** A trace reference cited by the agent to ground a specific finding. */
+export interface EvaluationReportCitation {
+ generation_id: string
+ trace_id: string
+ reason: string
+}
+
+/** Structured metrics computed mechanically from ClickHouse (agent cannot fabricate). */
+export interface EvaluationReportMetrics {
+ total_runs: number
+ pass_count: number
+ fail_count: number
+ na_count: number
+ pass_rate: number
+ period_start: string
+ period_end: string
+ previous_total_runs: number | null
+ previous_pass_rate: number | null
+}
+
+/** Top-level report content stored in EvaluationReportRun.content. */
+export interface EvaluationReportRunContent {
+ title: string
+ sections: EvaluationReportSection[]
+ citations: EvaluationReportCitation[]
+ metrics: EvaluationReportMetrics
+}
+
+export interface EvaluationReportRun {
+ id: string
+ report: string
+ content: EvaluationReportRunContent
+ /** Legacy mirror of content.metrics — populated by the store activity for backwards compat. */
+ metadata: EvaluationReportMetrics
+ period_start: string
+ period_end: string
+ delivery_status: 'pending' | 'delivered' | 'partial_failure' | 'failed'
+ delivery_errors: string[]
+ created_at: string
+}
+
export type EvaluationSummaryFilter = 'all' | 'pass' | 'fail' | 'na'
export interface EvaluationPattern {
From 6544064ddf2f112aa4e078df837d9405012431b8 Mon Sep 17 00:00:00 2001
From: Andrew Maguire
Date: Tue, 14 Apr 2026 13:41:21 +0100
Subject: [PATCH 2/2] fix(llma): address review feedback on evaluation reports
frontend
- Use server-side ?evaluation= filter instead of client-side filtering
on paginated endpoint (prevents silent result drops)
- Replace double non-null assertion with safe find + guard for Slack
integration lookup
- Remove duplicate title rendering in non-compact report viewer mode
---
.../components/EvaluationReportConfig.tsx | 18 +++++++++++-------
.../components/EvaluationReportViewer.tsx | 2 --
.../evaluations/evaluationReportLogic.ts | 4 ++--
3 files changed, 13 insertions(+), 11 deletions(-)
diff --git a/products/llm_analytics/frontend/evaluations/components/EvaluationReportConfig.tsx b/products/llm_analytics/frontend/evaluations/components/EvaluationReportConfig.tsx
index d5752d28c0bc..49eac5149bfa 100644
--- a/products/llm_analytics/frontend/evaluations/components/EvaluationReportConfig.tsx
+++ b/products/llm_analytics/frontend/evaluations/components/EvaluationReportConfig.tsx
@@ -92,13 +92,17 @@ function DeliveryTargetsConfig({
onSlackIntegrationChange(newValue)
}}
/>
- {slackIntegrationId && (
- onSlackChannelChange(val || '')}
- integration={integrations!.find((i) => i.id === slackIntegrationId)!}
- />
- )}
+ {slackIntegrationId &&
+ (() => {
+ const selectedIntegration = integrations?.find((i) => i.id === slackIntegrationId)
+ return selectedIntegration ? (
+ onSlackChannelChange(val || '')}
+ integration={selectedIntegration}
+ />
+ ) : null
+ })()}
)}
diff --git a/products/llm_analytics/frontend/evaluations/components/EvaluationReportViewer.tsx b/products/llm_analytics/frontend/evaluations/components/EvaluationReportViewer.tsx
index 09a3b9bbef8e..9b8d225ea399 100644
--- a/products/llm_analytics/frontend/evaluations/components/EvaluationReportViewer.tsx
+++ b/products/llm_analytics/frontend/evaluations/components/EvaluationReportViewer.tsx
@@ -211,8 +211,6 @@ export function EvaluationReportViewer({
)}
- {!compact && content.title && {content.title} }
-
{metrics && }
{sections.length > 0 && (
diff --git a/products/llm_analytics/frontend/evaluations/evaluationReportLogic.ts b/products/llm_analytics/frontend/evaluations/evaluationReportLogic.ts
index 99ebf4422e48..0e7bb9647fcc 100644
--- a/products/llm_analytics/frontend/evaluations/evaluationReportLogic.ts
+++ b/products/llm_analytics/frontend/evaluations/evaluationReportLogic.ts
@@ -103,9 +103,9 @@ export const evaluationReportLogic = kea([
return []
}
const response = await api.get(
- `api/environments/${values.currentTeamId}/llm_analytics/evaluation_reports/`
+ `api/environments/${values.currentTeamId}/llm_analytics/evaluation_reports/?evaluation=${props.evaluationId}`
)
- return (response.results || []).filter((r: EvaluationReport) => r.evaluation === props.evaluationId)
+ return response.results || []
},
createReport: async (params: {
evaluationId: string