From f326fb8d970cdae9838d4d069223a2870627d88b Mon Sep 17 00:00:00 2001
From: Andrew Maguire <andrewm4894@gmail.com>
Date: Mon, 13 Apr 2026 23:00:23 +0100
Subject: [PATCH 1/3] feat(llma): wire up auto-create hook, generated types,
 and MCP tools

Auto-create an EvaluationReport when an evaluation is created via
perform_create hook. Add product-scoped generated API types and
MCP tool definitions for evaluation reports.

Note: global generated files (schema.json, schema.py, snapshots,
MCP codegen) should be regenerated via `hogli build:openapi` after
merge rather than cherry-picked.
---
 products/llm_analytics/backend/api/evaluations.py | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/products/llm_analytics/backend/api/evaluations.py b/products/llm_analytics/backend/api/evaluations.py
index 1a4574055298..e044f27b05e6 100644
--- a/products/llm_analytics/backend/api/evaluations.py
+++ b/products/llm_analytics/backend/api/evaluations.py
@@ -235,6 +235,16 @@ def _get_config_length(instance) -> int:
     def perform_create(self, serializer):
         instance = serializer.save()
 
+        # Auto-create a default report config so reports are generated from the start.
+        # Users can later add email/Slack delivery targets if they want notifications.
+        from products.llm_analytics.backend.models.evaluation_reports import EvaluationReport
+
+        EvaluationReport.objects.create(
+            team=self.team,
+            evaluation=instance,
+            start_date=instance.created_at,
+        )
+
         # Calculate properties for tracking
         conditions = instance.conditions or []
         condition_count = len(conditions)

From 811aae2e97825e0327a56679cfbba4d58571948c Mon Sep 17 00:00:00 2001
From: Andrew Maguire <andrewm4894@gmail.com>
Date: Tue, 14 Apr 2026 13:42:57 +0100
Subject: [PATCH 2/3] fix(llma): wrap eval+report auto-create in transaction
 and hoist import

- Wrap perform_create in transaction.atomic() so a failed report
  creation rolls back the evaluation too
- Move EvaluationReport import to module level (no circular risk)
---
 .../llm_analytics/backend/api/evaluations.py  | 21 ++++++++++---------
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/products/llm_analytics/backend/api/evaluations.py b/products/llm_analytics/backend/api/evaluations.py
index e044f27b05e6..66fdec0c88fe 100644
--- a/products/llm_analytics/backend/api/evaluations.py
+++ b/products/llm_analytics/backend/api/evaluations.py
@@ -1,6 +1,7 @@
 import json
 from typing import Any
 
+from django.db import transaction
 from django.db.models import Q, QuerySet
 
 import structlog
@@ -24,6 +25,7 @@
 
 from ..models.evaluation_config import EvaluationConfig
 from ..models.evaluation_configs import validate_evaluation_configs
+from ..models.evaluation_reports import EvaluationReport
 from ..models.evaluations import Evaluation
 from ..models.model_configuration import LLMModelConfiguration
 from ..models.provider_keys import LLMProvider, LLMProviderKey
@@ -233,17 +235,16 @@ def _get_config_length(instance) -> int:
         return 0
 
     def perform_create(self, serializer):
-        instance = serializer.save()
-
-        # Auto-create a default report config so reports are generated from the start.
-        # Users can later add email/Slack delivery targets if they want notifications.
-        from products.llm_analytics.backend.models.evaluation_reports import EvaluationReport
+        with transaction.atomic():
+            instance = serializer.save()
 
-        EvaluationReport.objects.create(
-            team=self.team,
-            evaluation=instance,
-            start_date=instance.created_at,
-        )
+            # Auto-create a default report config so reports are generated from the start.
+            # Users can later add email/Slack delivery targets if they want notifications.
+            EvaluationReport.objects.create(
+                team=self.team,
+                evaluation=instance,
+                start_date=instance.created_at,
+            )
 
         # Calculate properties for tracking
         conditions = instance.conditions or []

From a29adada46c17189287b847aa926cf0278edfad8 Mon Sep 17 00:00:00 2001
From: "tests-posthog[bot]"
 <250237707+tests-posthog[bot]@users.noreply.github.com>
Date: Tue, 14 Apr 2026 15:30:03 +0000
Subject: [PATCH 3/3] chore: update OpenAPI generated types

---
 .../frontend/generated/api.schemas.ts         | 47 ++++++++++++++++++
 .../llm_analytics/frontend/generated/api.ts   | 40 ++++++++++++----
 products/llm_analytics/mcp/tools.yaml         |  4 +-
 services/mcp/src/api/generated.ts             | 48 +++++++++++++++++++
 4 files changed, 128 insertions(+), 11 deletions(-)

diff --git a/products/llm_analytics/frontend/generated/api.schemas.ts b/products/llm_analytics/frontend/generated/api.schemas.ts
index bee6e11bae36..32147967ff16 100644
--- a/products/llm_analytics/frontend/generated/api.schemas.ts
+++ b/products/llm_analytics/frontend/generated/api.schemas.ts
@@ -453,6 +453,42 @@ export interface PatchedEvaluationReportApi {
     readonly created_at?: string
 }
 
+/**
+ * * `pending` - Pending
+ * `delivered` - Delivered
+ * `partial_failure` - Partial Failure
+ * `failed` - Failed
+ */
+export type DeliveryStatusEnumApi = (typeof DeliveryStatusEnumApi)[keyof typeof DeliveryStatusEnumApi]
+
+export const DeliveryStatusEnumApi = {
+    Pending: 'pending',
+    Delivered: 'delivered',
+    PartialFailure: 'partial_failure',
+    Failed: 'failed',
+} as const
+
+export interface EvaluationReportRunApi {
+    readonly id: string
+    readonly report: string
+    readonly content: unknown
+    readonly metadata: unknown
+    readonly period_start: string
+    readonly period_end: string
+    readonly delivery_status: DeliveryStatusEnumApi
+    readonly delivery_errors: unknown
+    readonly created_at: string
+}
+
+export interface PaginatedEvaluationReportRunListApi {
+    count: number
+    /** @nullable */
+    next?: string | null
+    /** @nullable */
+    previous?: string | null
+    results: EvaluationReportRunApi[]
+}
+
 /**
  * * `all` - all
  * `pass` - pass
@@ -1426,6 +1462,17 @@ export type LlmAnalyticsEvaluationReportsListParams = {
     offset?: number
 }
 
+export type LlmAnalyticsEvaluationReportsRunsListParams = {
+    /**
+     * Number of results to return per page.
+     */
+    limit?: number
+    /**
+     * The initial index from which to return the results.
+     */
+    offset?: number
+}
+
 export type LlmAnalyticsEvaluationSummaryCreate400 = { [key: string]: unknown }
 
 export type LlmAnalyticsEvaluationSummaryCreate403 = { [key: string]: unknown }
diff --git a/products/llm_analytics/frontend/generated/api.ts b/products/llm_analytics/frontend/generated/api.ts
index 25424ef6433c..d8e2de65688b 100644
--- a/products/llm_analytics/frontend/generated/api.ts
+++ b/products/llm_analytics/frontend/generated/api.ts
@@ -29,6 +29,7 @@ import type {
     LLMProviderKeyApi,
     LlmAnalyticsClusteringJobsListParams,
     LlmAnalyticsEvaluationReportsListParams,
+    LlmAnalyticsEvaluationReportsRunsListParams,
     LlmAnalyticsProviderKeysListParams,
     LlmAnalyticsReviewQueueItemsListParams,
     LlmAnalyticsReviewQueuesListParams,
@@ -42,6 +43,7 @@ import type {
     PaginatedDatasetListApi,
     PaginatedEvaluationListApi,
     PaginatedEvaluationReportListApi,
+    PaginatedEvaluationReportRunListApi,
     PaginatedLLMPromptListListApi,
     PaginatedLLMProviderKeyListApi,
     PaginatedReviewQueueItemListApi,
@@ -541,19 +543,39 @@ export const llmAnalyticsEvaluationReportsGenerateCreate = async (
 /**
  * List report runs (history) for this report.
  */
-export const getLlmAnalyticsEvaluationReportsRunsRetrieveUrl = (projectId: string, id: string) => {
-    return `/api/environments/${projectId}/llm_analytics/evaluation_reports/${id}/runs/`
+export const getLlmAnalyticsEvaluationReportsRunsListUrl = (
+    projectId: string,
+    id: string,
+    params?: LlmAnalyticsEvaluationReportsRunsListParams
+) => {
+    const normalizedParams = new URLSearchParams()
+
+    Object.entries(params || {}).forEach(([key, value]) => {
+        if (value !== undefined) {
+            normalizedParams.append(key, value === null ? 'null' : value.toString())
+        }
+    })
+
+    const stringifiedParams = normalizedParams.toString()
+
+    return stringifiedParams.length > 0
+        ? `/api/environments/${projectId}/llm_analytics/evaluation_reports/${id}/runs/?${stringifiedParams}`
+        : `/api/environments/${projectId}/llm_analytics/evaluation_reports/${id}/runs/`
 }
 
-export const llmAnalyticsEvaluationReportsRunsRetrieve = async (
+export const llmAnalyticsEvaluationReportsRunsList = async (
     projectId: string,
     id: string,
-    options?: RequestInit
-): Promise<EvaluationReportApi> => {
-    return apiMutator<EvaluationReportApi>(getLlmAnalyticsEvaluationReportsRunsRetrieveUrl(projectId, id), {
-        ...options,
-        method: 'GET',
-    })
+    params?: LlmAnalyticsEvaluationReportsRunsListParams,
+    options?: RequestInit
+): Promise<PaginatedEvaluationReportRunListApi> => {
+    return apiMutator<PaginatedEvaluationReportRunListApi>(
+        getLlmAnalyticsEvaluationReportsRunsListUrl(projectId, id, params),
+        {
+            ...options,
+            method: 'GET',
+        }
+    )
 }
 
 /**
diff --git a/products/llm_analytics/mcp/tools.yaml b/products/llm_analytics/mcp/tools.yaml
index fda008946a05..b6e174b1f890 100644
--- a/products/llm_analytics/mcp/tools.yaml
+++ b/products/llm_analytics/mcp/tools.yaml
@@ -312,6 +312,6 @@ tools:
     llm-analytics-evaluation-reports-generate-create:
         operation: llm_analytics_evaluation_reports_generate_create
         enabled: false
-    llm-analytics-evaluation-reports-runs-retrieve:
-        operation: llm_analytics_evaluation_reports_runs_retrieve
+    llm-analytics-evaluation-reports-runs-list:
+        operation: llm_analytics_evaluation_reports_runs_list
         enabled: false
diff --git a/services/mcp/src/api/generated.ts b/services/mcp/src/api/generated.ts
index aedcbfb5bd93..0545f1b08471 100644
--- a/services/mcp/src/api/generated.ts
+++ b/services/mcp/src/api/generated.ts
@@ -12996,6 +12996,22 @@ export namespace Schemas {
       Frequentist: 'frequentist',
     } as const;
 
+    /**
+     * * `pending` - Pending
+    * `delivered` - Delivered
+    * `partial_failure` - Partial Failure
+    * `failed` - Failed
+     */
+    export type DeliveryStatusEnum = typeof DeliveryStatusEnum[keyof typeof DeliveryStatusEnum];
+
+
+    export const DeliveryStatusEnum = {
+      Pending: 'pending',
+      Delivered: 'delivered',
+      PartialFailure: 'partial_failure',
+      Failed: 'failed',
+    } as const;
+
     export interface DependentFlag {
       /** Feature flag ID */
       id: number;
@@ -14527,6 +14543,18 @@ export namespace Schemas {
       readonly created_at: string;
     }
 
+    export interface EvaluationReportRun {
+      readonly id: string;
+      readonly report: string;
+      readonly content: unknown;
+      readonly metadata: unknown;
+      readonly period_start: string;
+      readonly period_end: string;
+      readonly delivery_status: DeliveryStatusEnum;
+      readonly delivery_errors: unknown;
+      readonly created_at: string;
+    }
+
     /**
      * * `all` - all
     * `pass` - pass
@@ -20547,6 +20575,15 @@ export namespace Schemas {
       results: EvaluationReport[];
     }
 
+    export interface PaginatedEvaluationReportRunList {
+      count: number;
+      /** @nullable */
+      next?: string | null;
+      /** @nullable */
+      previous?: string | null;
+      results: EvaluationReportRun[];
+    }
+
     export interface PaginatedEventSchemaList {
       count: number;
       /** @nullable */
@@ -33709,6 +33746,17 @@ export namespace Schemas {
     offset?: number;
     };
 
+    export type LlmAnalyticsEvaluationReportsRunsListParams = {
+    /**
+     * Number of results to return per page.
+     */
+    limit?: number;
+    /**
+     * The initial index from which to return the results.
+     */
+    offset?: number;
+    };
+
     export type LlmAnalyticsEvaluationSummaryCreate400 = {[key: string]: unknown};
 
     export type LlmAnalyticsEvaluationSummaryCreate403 = {[key: string]: unknown};