From f326fb8d970cdae9838d4d069223a2870627d88b Mon Sep 17 00:00:00 2001 From: Andrew Maguire Date: Mon, 13 Apr 2026 23:00:23 +0100 Subject: [PATCH 1/3] feat(llma): wire up auto-create hook, generated types, and MCP tools Auto-create an EvaluationReport when an evaluation is created via perform_create hook. Add product-scoped generated API types and MCP tool definitions for evaluation reports. Note: global generated files (schema.json, schema.py, snapshots, MCP codegen) should be regenerated via `hogli build:openapi` after merge rather than cherry-picked. --- products/llm_analytics/backend/api/evaluations.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/products/llm_analytics/backend/api/evaluations.py b/products/llm_analytics/backend/api/evaluations.py index 1a4574055298..e044f27b05e6 100644 --- a/products/llm_analytics/backend/api/evaluations.py +++ b/products/llm_analytics/backend/api/evaluations.py @@ -235,6 +235,16 @@ def _get_config_length(instance) -> int: def perform_create(self, serializer): instance = serializer.save() + # Auto-create a default report config so reports are generated from the start. + # Users can later add email/Slack delivery targets if they want notifications. + from products.llm_analytics.backend.models.evaluation_reports import EvaluationReport + + EvaluationReport.objects.create( + team=self.team, + evaluation=instance, + start_date=instance.created_at, + ) + # Calculate properties for tracking conditions = instance.conditions or [] condition_count = len(conditions) From 811aae2e97825e0327a56679cfbba4d58571948c Mon Sep 17 00:00:00 2001 From: Andrew Maguire Date: Tue, 14 Apr 2026 13:42:57 +0100 Subject: [PATCH 2/3] fix(llma): wrap eval+report auto-create in transaction and hoist import - Wrap perform_create in transaction.atomic() so a failed report creation rolls back the evaluation too - Move EvaluationReport import to module level (no circular risk) --- .../llm_analytics/backend/api/evaluations.py | 21 ++++++++++--------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/products/llm_analytics/backend/api/evaluations.py b/products/llm_analytics/backend/api/evaluations.py index e044f27b05e6..66fdec0c88fe 100644 --- a/products/llm_analytics/backend/api/evaluations.py +++ b/products/llm_analytics/backend/api/evaluations.py @@ -1,6 +1,7 @@ import json from typing import Any +from django.db import transaction from django.db.models import Q, QuerySet import structlog @@ -24,6 +25,7 @@ from ..models.evaluation_config import EvaluationConfig from ..models.evaluation_configs import validate_evaluation_configs +from ..models.evaluation_reports import EvaluationReport from ..models.evaluations import Evaluation from ..models.model_configuration import LLMModelConfiguration from ..models.provider_keys import LLMProvider, LLMProviderKey @@ -233,17 +235,16 @@ def _get_config_length(instance) -> int: return 0 def perform_create(self, serializer): - instance = serializer.save() - - # Auto-create a default report config so reports are generated from the start. - # Users can later add email/Slack delivery targets if they want notifications. - from products.llm_analytics.backend.models.evaluation_reports import EvaluationReport + with transaction.atomic(): + instance = serializer.save() - EvaluationReport.objects.create( - team=self.team, - evaluation=instance, - start_date=instance.created_at, - ) + # Auto-create a default report config so reports are generated from the start. + # Users can later add email/Slack delivery targets if they want notifications. + EvaluationReport.objects.create( + team=self.team, + evaluation=instance, + start_date=instance.created_at, + ) # Calculate properties for tracking conditions = instance.conditions or [] From a29adada46c17189287b847aa926cf0278edfad8 Mon Sep 17 00:00:00 2001 From: "tests-posthog[bot]" <250237707+tests-posthog[bot]@users.noreply.github.com> Date: Tue, 14 Apr 2026 15:30:03 +0000 Subject: [PATCH 3/3] chore: update OpenAPI generated types --- .../frontend/generated/api.schemas.ts | 47 ++++++++++++++++++ .../llm_analytics/frontend/generated/api.ts | 40 ++++++++++++---- products/llm_analytics/mcp/tools.yaml | 4 +- services/mcp/src/api/generated.ts | 48 +++++++++++++++++++ 4 files changed, 128 insertions(+), 11 deletions(-) diff --git a/products/llm_analytics/frontend/generated/api.schemas.ts b/products/llm_analytics/frontend/generated/api.schemas.ts index bee6e11bae36..32147967ff16 100644 --- a/products/llm_analytics/frontend/generated/api.schemas.ts +++ b/products/llm_analytics/frontend/generated/api.schemas.ts @@ -453,6 +453,42 @@ export interface PatchedEvaluationReportApi { readonly created_at?: string } +/** + * * `pending` - Pending + * `delivered` - Delivered + * `partial_failure` - Partial Failure + * `failed` - Failed + */ +export type DeliveryStatusEnumApi = (typeof DeliveryStatusEnumApi)[keyof typeof DeliveryStatusEnumApi] + +export const DeliveryStatusEnumApi = { + Pending: 'pending', + Delivered: 'delivered', + PartialFailure: 'partial_failure', + Failed: 'failed', +} as const + +export interface EvaluationReportRunApi { + readonly id: string + readonly report: string + readonly content: unknown + readonly metadata: unknown + readonly period_start: string + readonly period_end: string + readonly delivery_status: DeliveryStatusEnumApi + readonly delivery_errors: unknown + readonly created_at: string +} + +export interface PaginatedEvaluationReportRunListApi { + count: number + /** @nullable */ + next?: string | null + /** @nullable */ + previous?: string | null + results: EvaluationReportRunApi[] +} + /** * * `all` - all * `pass` - pass @@ -1426,6 +1462,17 @@ export type LlmAnalyticsEvaluationReportsListParams = { offset?: number } +export type LlmAnalyticsEvaluationReportsRunsListParams = { + /** + * Number of results to return per page. + */ + limit?: number + /** + * The initial index from which to return the results. + */ + offset?: number +} + export type LlmAnalyticsEvaluationSummaryCreate400 = { [key: string]: unknown } export type LlmAnalyticsEvaluationSummaryCreate403 = { [key: string]: unknown } diff --git a/products/llm_analytics/frontend/generated/api.ts b/products/llm_analytics/frontend/generated/api.ts index 25424ef6433c..d8e2de65688b 100644 --- a/products/llm_analytics/frontend/generated/api.ts +++ b/products/llm_analytics/frontend/generated/api.ts @@ -29,6 +29,7 @@ import type { LLMProviderKeyApi, LlmAnalyticsClusteringJobsListParams, LlmAnalyticsEvaluationReportsListParams, + LlmAnalyticsEvaluationReportsRunsListParams, LlmAnalyticsProviderKeysListParams, LlmAnalyticsReviewQueueItemsListParams, LlmAnalyticsReviewQueuesListParams, @@ -42,6 +43,7 @@ import type { PaginatedDatasetListApi, PaginatedEvaluationListApi, PaginatedEvaluationReportListApi, + PaginatedEvaluationReportRunListApi, PaginatedLLMPromptListListApi, PaginatedLLMProviderKeyListApi, PaginatedReviewQueueItemListApi, @@ -541,19 +543,39 @@ export const llmAnalyticsEvaluationReportsGenerateCreate = async ( /** * List report runs (history) for this report. */ -export const getLlmAnalyticsEvaluationReportsRunsRetrieveUrl = (projectId: string, id: string) => { - return `/api/environments/${projectId}/llm_analytics/evaluation_reports/${id}/runs/` +export const getLlmAnalyticsEvaluationReportsRunsListUrl = ( + projectId: string, + id: string, + params?: LlmAnalyticsEvaluationReportsRunsListParams +) => { + const normalizedParams = new URLSearchParams() + + Object.entries(params || {}).forEach(([key, value]) => { + if (value !== undefined) { + normalizedParams.append(key, value === null ? 'null' : value.toString()) + } + }) + + const stringifiedParams = normalizedParams.toString() + + return stringifiedParams.length > 0 + ? `/api/environments/${projectId}/llm_analytics/evaluation_reports/${id}/runs/?${stringifiedParams}` + : `/api/environments/${projectId}/llm_analytics/evaluation_reports/${id}/runs/` } -export const llmAnalyticsEvaluationReportsRunsRetrieve = async ( +export const llmAnalyticsEvaluationReportsRunsList = async ( projectId: string, id: string, - options?: RequestInit -): Promise => { - return apiMutator(getLlmAnalyticsEvaluationReportsRunsRetrieveUrl(projectId, id), { - ...options, - method: 'GET', - }) + params?: LlmAnalyticsEvaluationReportsRunsListParams, + options?: RequestInit +): Promise => { + return apiMutator( + getLlmAnalyticsEvaluationReportsRunsListUrl(projectId, id, params), + { + ...options, + method: 'GET', + } + ) } /** diff --git a/products/llm_analytics/mcp/tools.yaml b/products/llm_analytics/mcp/tools.yaml index fda008946a05..b6e174b1f890 100644 --- a/products/llm_analytics/mcp/tools.yaml +++ b/products/llm_analytics/mcp/tools.yaml @@ -312,6 +312,6 @@ tools: llm-analytics-evaluation-reports-generate-create: operation: llm_analytics_evaluation_reports_generate_create enabled: false - llm-analytics-evaluation-reports-runs-retrieve: - operation: llm_analytics_evaluation_reports_runs_retrieve + llm-analytics-evaluation-reports-runs-list: + operation: llm_analytics_evaluation_reports_runs_list enabled: false diff --git a/services/mcp/src/api/generated.ts b/services/mcp/src/api/generated.ts index aedcbfb5bd93..0545f1b08471 100644 --- a/services/mcp/src/api/generated.ts +++ b/services/mcp/src/api/generated.ts @@ -12996,6 +12996,22 @@ export namespace Schemas { Frequentist: 'frequentist', } as const; + /** + * * `pending` - Pending + * `delivered` - Delivered + * `partial_failure` - Partial Failure + * `failed` - Failed + */ + export type DeliveryStatusEnum = typeof DeliveryStatusEnum[keyof typeof DeliveryStatusEnum]; + + + export const DeliveryStatusEnum = { + Pending: 'pending', + Delivered: 'delivered', + PartialFailure: 'partial_failure', + Failed: 'failed', + } as const; + export interface DependentFlag { /** Feature flag ID */ id: number; @@ -14527,6 +14543,18 @@ export namespace Schemas { readonly created_at: string; } + export interface EvaluationReportRun { + readonly id: string; + readonly report: string; + readonly content: unknown; + readonly metadata: unknown; + readonly period_start: string; + readonly period_end: string; + readonly delivery_status: DeliveryStatusEnum; + readonly delivery_errors: unknown; + readonly created_at: string; + } + /** * * `all` - all * `pass` - pass @@ -20547,6 +20575,15 @@ export namespace Schemas { results: EvaluationReport[]; } + export interface PaginatedEvaluationReportRunList { + count: number; + /** @nullable */ + next?: string | null; + /** @nullable */ + previous?: string | null; + results: EvaluationReportRun[]; + } + export interface PaginatedEventSchemaList { count: number; /** @nullable */ @@ -33709,6 +33746,17 @@ export namespace Schemas { offset?: number; }; + export type LlmAnalyticsEvaluationReportsRunsListParams = { + /** + * Number of results to return per page. + */ + limit?: number; + /** + * The initial index from which to return the results. + */ + offset?: number; + }; + export type LlmAnalyticsEvaluationSummaryCreate400 = {[key: string]: unknown}; export type LlmAnalyticsEvaluationSummaryCreate403 = {[key: string]: unknown};