diff --git a/products/llm_analytics/backend/api/evaluations.py b/products/llm_analytics/backend/api/evaluations.py index 1a4574055298..66fdec0c88fe 100644 --- a/products/llm_analytics/backend/api/evaluations.py +++ b/products/llm_analytics/backend/api/evaluations.py @@ -1,6 +1,7 @@ import json from typing import Any +from django.db import transaction from django.db.models import Q, QuerySet import structlog @@ -24,6 +25,7 @@ from ..models.evaluation_config import EvaluationConfig from ..models.evaluation_configs import validate_evaluation_configs +from ..models.evaluation_reports import EvaluationReport from ..models.evaluations import Evaluation from ..models.model_configuration import LLMModelConfiguration from ..models.provider_keys import LLMProvider, LLMProviderKey @@ -233,7 +235,16 @@ def _get_config_length(instance) -> int: return 0 def perform_create(self, serializer): - instance = serializer.save() + with transaction.atomic(): + instance = serializer.save() + + # Auto-create a default report config so reports are generated from the start. + # Users can later add email/Slack delivery targets if they want notifications. + EvaluationReport.objects.create( + team=self.team, + evaluation=instance, + start_date=instance.created_at, + ) # Calculate properties for tracking conditions = instance.conditions or [] diff --git a/products/llm_analytics/frontend/generated/api.schemas.ts b/products/llm_analytics/frontend/generated/api.schemas.ts index bee6e11bae36..32147967ff16 100644 --- a/products/llm_analytics/frontend/generated/api.schemas.ts +++ b/products/llm_analytics/frontend/generated/api.schemas.ts @@ -453,6 +453,42 @@ export interface PatchedEvaluationReportApi { readonly created_at?: string } +/** + * * `pending` - Pending + * `delivered` - Delivered + * `partial_failure` - Partial Failure + * `failed` - Failed + */ +export type DeliveryStatusEnumApi = (typeof DeliveryStatusEnumApi)[keyof typeof DeliveryStatusEnumApi] + +export const DeliveryStatusEnumApi = { + Pending: 'pending', + Delivered: 'delivered', + PartialFailure: 'partial_failure', + Failed: 'failed', +} as const + +export interface EvaluationReportRunApi { + readonly id: string + readonly report: string + readonly content: unknown + readonly metadata: unknown + readonly period_start: string + readonly period_end: string + readonly delivery_status: DeliveryStatusEnumApi + readonly delivery_errors: unknown + readonly created_at: string +} + +export interface PaginatedEvaluationReportRunListApi { + count: number + /** @nullable */ + next?: string | null + /** @nullable */ + previous?: string | null + results: EvaluationReportRunApi[] +} + /** * * `all` - all * `pass` - pass @@ -1426,6 +1462,17 @@ export type LlmAnalyticsEvaluationReportsListParams = { offset?: number } +export type LlmAnalyticsEvaluationReportsRunsListParams = { + /** + * Number of results to return per page. + */ + limit?: number + /** + * The initial index from which to return the results. + */ + offset?: number +} + export type LlmAnalyticsEvaluationSummaryCreate400 = { [key: string]: unknown } export type LlmAnalyticsEvaluationSummaryCreate403 = { [key: string]: unknown } diff --git a/products/llm_analytics/frontend/generated/api.ts b/products/llm_analytics/frontend/generated/api.ts index 25424ef6433c..d8e2de65688b 100644 --- a/products/llm_analytics/frontend/generated/api.ts +++ b/products/llm_analytics/frontend/generated/api.ts @@ -29,6 +29,7 @@ import type { LLMProviderKeyApi, LlmAnalyticsClusteringJobsListParams, LlmAnalyticsEvaluationReportsListParams, + LlmAnalyticsEvaluationReportsRunsListParams, LlmAnalyticsProviderKeysListParams, LlmAnalyticsReviewQueueItemsListParams, LlmAnalyticsReviewQueuesListParams, @@ -42,6 +43,7 @@ import type { PaginatedDatasetListApi, PaginatedEvaluationListApi, PaginatedEvaluationReportListApi, + PaginatedEvaluationReportRunListApi, PaginatedLLMPromptListListApi, PaginatedLLMProviderKeyListApi, PaginatedReviewQueueItemListApi, @@ -541,19 +543,39 @@ export const llmAnalyticsEvaluationReportsGenerateCreate = async ( /** * List report runs (history) for this report. */ -export const getLlmAnalyticsEvaluationReportsRunsRetrieveUrl = (projectId: string, id: string) => { - return `/api/environments/${projectId}/llm_analytics/evaluation_reports/${id}/runs/` +export const getLlmAnalyticsEvaluationReportsRunsListUrl = ( + projectId: string, + id: string, + params?: LlmAnalyticsEvaluationReportsRunsListParams +) => { + const normalizedParams = new URLSearchParams() + + Object.entries(params || {}).forEach(([key, value]) => { + if (value !== undefined) { + normalizedParams.append(key, value === null ? 'null' : value.toString()) + } + }) + + const stringifiedParams = normalizedParams.toString() + + return stringifiedParams.length > 0 + ? `/api/environments/${projectId}/llm_analytics/evaluation_reports/${id}/runs/?${stringifiedParams}` + : `/api/environments/${projectId}/llm_analytics/evaluation_reports/${id}/runs/` } -export const llmAnalyticsEvaluationReportsRunsRetrieve = async ( +export const llmAnalyticsEvaluationReportsRunsList = async ( projectId: string, id: string, - options?: RequestInit -): Promise => { - return apiMutator(getLlmAnalyticsEvaluationReportsRunsRetrieveUrl(projectId, id), { - ...options, - method: 'GET', - }) + params?: LlmAnalyticsEvaluationReportsRunsListParams, + options?: RequestInit +): Promise => { + return apiMutator( + getLlmAnalyticsEvaluationReportsRunsListUrl(projectId, id, params), + { + ...options, + method: 'GET', + } + ) } /** diff --git a/products/llm_analytics/mcp/tools.yaml b/products/llm_analytics/mcp/tools.yaml index fda008946a05..b6e174b1f890 100644 --- a/products/llm_analytics/mcp/tools.yaml +++ b/products/llm_analytics/mcp/tools.yaml @@ -312,6 +312,6 @@ tools: llm-analytics-evaluation-reports-generate-create: operation: llm_analytics_evaluation_reports_generate_create enabled: false - llm-analytics-evaluation-reports-runs-retrieve: - operation: llm_analytics_evaluation_reports_runs_retrieve + llm-analytics-evaluation-reports-runs-list: + operation: llm_analytics_evaluation_reports_runs_list enabled: false diff --git a/services/mcp/src/api/generated.ts b/services/mcp/src/api/generated.ts index aedcbfb5bd93..0545f1b08471 100644 --- a/services/mcp/src/api/generated.ts +++ b/services/mcp/src/api/generated.ts @@ -12996,6 +12996,22 @@ export namespace Schemas { Frequentist: 'frequentist', } as const; + /** + * * `pending` - Pending + * `delivered` - Delivered + * `partial_failure` - Partial Failure + * `failed` - Failed + */ + export type DeliveryStatusEnum = typeof DeliveryStatusEnum[keyof typeof DeliveryStatusEnum]; + + + export const DeliveryStatusEnum = { + Pending: 'pending', + Delivered: 'delivered', + PartialFailure: 'partial_failure', + Failed: 'failed', + } as const; + export interface DependentFlag { /** Feature flag ID */ id: number; @@ -14527,6 +14543,18 @@ export namespace Schemas { readonly created_at: string; } + export interface EvaluationReportRun { + readonly id: string; + readonly report: string; + readonly content: unknown; + readonly metadata: unknown; + readonly period_start: string; + readonly period_end: string; + readonly delivery_status: DeliveryStatusEnum; + readonly delivery_errors: unknown; + readonly created_at: string; + } + /** * * `all` - all * `pass` - pass @@ -20547,6 +20575,15 @@ export namespace Schemas { results: EvaluationReport[]; } + export interface PaginatedEvaluationReportRunList { + count: number; + /** @nullable */ + next?: string | null; + /** @nullable */ + previous?: string | null; + results: EvaluationReportRun[]; + } + export interface PaginatedEventSchemaList { count: number; /** @nullable */ @@ -33709,6 +33746,17 @@ export namespace Schemas { offset?: number; }; + export type LlmAnalyticsEvaluationReportsRunsListParams = { + /** + * Number of results to return per page. + */ + limit?: number; + /** + * The initial index from which to return the results. + */ + offset?: number; + }; + export type LlmAnalyticsEvaluationSummaryCreate400 = {[key: string]: unknown}; export type LlmAnalyticsEvaluationSummaryCreate403 = {[key: string]: unknown};