Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .semgrep/rules/idor-team-scoped-models.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,7 @@ rules:
|Task
|TaskRun
|EmailChannel
|EvaluationReport
|Text
|Threshold
|Ticket
Expand Down Expand Up @@ -372,6 +373,7 @@ rules:
|Task
|TaskRun
|EmailChannel
|EvaluationReport
|Text
|Threshold
|Ticket
Expand Down
9 changes: 5 additions & 4 deletions frontend/src/generated/core/api.schemas.ts

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 8 additions & 0 deletions posthog/api/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@
DatasetItemViewSet,
DatasetViewSet,
EvaluationConfigViewSet,
EvaluationReportViewSet,
EvaluationRunViewSet,
EvaluationViewSet,
LLMAnalyticsClusteringRunViewSet,
Expand Down Expand Up @@ -1380,6 +1381,13 @@ def register_grandfathered_environment_nested_viewset(
["team_id"],
)

environments_router.register(
r"llm_analytics/evaluation_reports",
EvaluationReportViewSet,
"environment_llm_analytics_evaluation_reports",
["team_id"],
)

environments_router.register(
r"change_requests",
approval_api.ChangeRequestViewSet,
Expand Down
2 changes: 2 additions & 0 deletions products/llm_analytics/backend/api/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from .clustering_job import ClusteringJobViewSet
from .datasets import DatasetItemViewSet, DatasetViewSet
from .evaluation_config import EvaluationConfigViewSet
from .evaluation_reports import EvaluationReportViewSet
from .evaluation_runs import EvaluationRunViewSet
from .evaluation_summary import LLMEvaluationSummaryViewSet
from .evaluations import EvaluationViewSet
Expand Down Expand Up @@ -31,6 +32,7 @@
"DatasetViewSet",
"DatasetItemViewSet",
"EvaluationViewSet",
"EvaluationReportViewSet",
"EvaluationRunViewSet",
"EvaluationConfigViewSet",
"LLMProviderKeyViewSet",
Expand Down
201 changes: 201 additions & 0 deletions products/llm_analytics/backend/api/evaluation_reports.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,201 @@
"""API endpoints for evaluation report configuration and report run history."""

import datetime as dt

from django.conf import settings
from django.db.models import QuerySet

import structlog
from asgiref.sync import async_to_sync
from drf_spectacular.utils import extend_schema
from rest_framework import serializers, status, viewsets
from rest_framework.decorators import action
from rest_framework.request import Request
from rest_framework.response import Response

from posthog.api.routing import TeamAndOrgViewSetMixin
from posthog.permissions import AccessControlPermission

from products.llm_analytics.backend.api.metrics import llma_track_latency
from products.llm_analytics.backend.models.evaluation_reports import EvaluationReport, EvaluationReportRun

logger = structlog.get_logger(__name__)


class EvaluationReportSerializer(serializers.ModelSerializer):
class Meta:
model = EvaluationReport
fields = [
"id",
"evaluation",
"frequency",
"byweekday",
"start_date",
"next_delivery_date",
"delivery_targets",
"max_sample_size",
"enabled",
"deleted",
"last_delivered_at",
"report_prompt_guidance",
"trigger_threshold",
"cooldown_minutes",
"daily_run_cap",
"created_by",
"created_at",
]
read_only_fields = ["id", "next_delivery_date", "last_delivered_at", "created_by", "created_at"]

def validate_evaluation(self, value):
# Prevent creating a report in team A that references team B's evaluation:
# the FK queryset is unscoped, so a user with access to multiple teams could
# otherwise cross tenant boundaries by passing a foreign evaluation id.
team = self.context["get_team"]()
if value.team_id != team.id:
raise serializers.ValidationError("Evaluation does not belong to this team.")
return value

def validate(self, attrs):
attrs = super().validate(attrs)
frequency = attrs.get("frequency") or (self.instance.frequency if self.instance else None)
if frequency == EvaluationReport.Frequency.EVERY_N:
threshold = (
attrs.get("trigger_threshold")
if "trigger_threshold" in attrs
else (self.instance.trigger_threshold if self.instance else None)
)
if threshold is None:
raise serializers.ValidationError({"trigger_threshold": "Required when frequency is 'every_n'."})
if threshold < EvaluationReport.TRIGGER_THRESHOLD_MIN:
raise serializers.ValidationError(
{"trigger_threshold": f"Minimum is {EvaluationReport.TRIGGER_THRESHOLD_MIN}."}
)
if threshold > EvaluationReport.TRIGGER_THRESHOLD_MAX:
raise serializers.ValidationError(
{"trigger_threshold": f"Maximum is {EvaluationReport.TRIGGER_THRESHOLD_MAX}."}
)
cooldown = (
attrs.get("cooldown_minutes")
if "cooldown_minutes" in attrs
else (self.instance.cooldown_minutes if self.instance else EvaluationReport.COOLDOWN_MINUTES_DEFAULT)
)
if cooldown < EvaluationReport.COOLDOWN_MINUTES_MIN:
raise serializers.ValidationError(
{"cooldown_minutes": f"Minimum is {EvaluationReport.COOLDOWN_MINUTES_MIN} minutes."}
)
return attrs

def validate_delivery_targets(self, value: list) -> list:
if not isinstance(value, list):
raise serializers.ValidationError("Delivery targets must be a list.")
for target in value:
if not isinstance(target, dict):
raise serializers.ValidationError("Each delivery target must be an object.")
target_type = target.get("type")
if target_type not in ("email", "slack"):
raise serializers.ValidationError(f"Invalid delivery target type: {target_type}")
if target_type == "email" and not target.get("value"):
raise serializers.ValidationError("Email delivery target must include a 'value' field.")
if target_type == "slack" and (not target.get("integration_id") or not target.get("channel")):
raise serializers.ValidationError("Slack delivery target must include 'integration_id' and 'channel'.")
return value

def create(self, validated_data):
request = self.context["request"]
team = self.context["get_team"]()
validated_data["team"] = team
validated_data["created_by"] = request.user
return super().create(validated_data)


class EvaluationReportRunSerializer(serializers.ModelSerializer):
class Meta:
model = EvaluationReportRun
fields = [
"id",
"report",
"content",
"metadata",
"period_start",
"period_end",
"delivery_status",
"delivery_errors",
"created_at",
]
read_only_fields = fields


class EvaluationReportViewSet(TeamAndOrgViewSetMixin, viewsets.ModelViewSet):
"""CRUD for evaluation report configurations + report run history."""

scope_object = "llm_analytics"
permission_classes = [AccessControlPermission]
serializer_class = EvaluationReportSerializer
queryset = EvaluationReport.objects.all()

def safely_get_queryset(self, queryset: QuerySet[EvaluationReport]) -> QuerySet[EvaluationReport]:
queryset = queryset.filter(team_id=self.team_id).order_by("-created_at")
if self.action not in ("update", "partial_update"):
queryset = queryset.filter(deleted=False)
return queryset

@llma_track_latency("llma_evaluation_reports_list")
def list(self, request: Request, *args, **kwargs) -> Response:
return super().list(request, *args, **kwargs)

@llma_track_latency("llma_evaluation_reports_create")
def create(self, request: Request, *args, **kwargs) -> Response:
return super().create(request, *args, **kwargs)

@llma_track_latency("llma_evaluation_reports_retrieve")
def retrieve(self, request: Request, *args, **kwargs) -> Response:
return super().retrieve(request, *args, **kwargs)

@llma_track_latency("llma_evaluation_reports_update")
def update(self, request: Request, *args, **kwargs) -> Response:
return super().update(request, *args, **kwargs)

@llma_track_latency("llma_evaluation_reports_partial_update")
def partial_update(self, request: Request, *args, **kwargs) -> Response:
return super().partial_update(request, *args, **kwargs)

def perform_destroy(self, instance):
instance.deleted = True
instance.save(update_fields=["deleted"])

@action(detail=True, methods=["get"], url_path="runs")
@llma_track_latency("llma_evaluation_report_runs_list")
def runs(self, request: Request, **kwargs) -> Response:
"""List report runs (history) for this report."""
report = self.get_object()
runs = EvaluationReportRun.objects.filter(report=report).order_by("-created_at")[:50]
serializer = EvaluationReportRunSerializer(runs, many=True)
return Response(serializer.data)

@extend_schema(request=None, responses={202: None})
@action(detail=True, methods=["post"], url_path="generate")
@llma_track_latency("llma_evaluation_report_generate")
def generate(self, request: Request, **kwargs) -> Response:
"""Trigger immediate report generation."""
report = self.get_object()

try:
from posthog.temporal.common.client import sync_connect
from posthog.temporal.llm_analytics.eval_reports.constants import GENERATE_EVAL_REPORT_WORKFLOW_NAME
from posthog.temporal.llm_analytics.eval_reports.types import GenerateAndDeliverEvalReportWorkflowInput

client = sync_connect()
async_to_sync(client.start_workflow)(

Check failure on line 188 in products/llm_analytics/backend/api/evaluation_reports.py

View workflow job for this annotation

GitHub Actions / Python code quality

Function gets multiple values for keyword argument "id"
GENERATE_EVAL_REPORT_WORKFLOW_NAME,
GenerateAndDeliverEvalReportWorkflowInput(report_id=str(report.id), manual=True),
id=f"eval-report-manual-{report.id}-{dt.datetime.now(tz=dt.UTC).timestamp():.0f}",
task_queue=settings.GENERAL_PURPOSE_TASK_QUEUE,
)
except Exception:
logger.exception("Failed to trigger evaluation report generation", report_id=str(report.id))
return Response(
{"error": "Failed to trigger report generation"},
status=status.HTTP_500_INTERNAL_SERVER_ERROR,
)

return Response(status=status.HTTP_202_ACCEPTED)
Loading
Loading