Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions eslint.config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -276,6 +276,8 @@ export default typescript.config([
'src/sentry/templates/sentry/**/*',
'stylelint.config.js',
'.artifacts/**/*',
// auto-generated by figma code connect
'**/*.figma.tsx',
]),
/**
* Rules are grouped by plugin. If you want to override a specific rule inside
Expand Down Expand Up @@ -466,6 +468,7 @@ export default typescript.config([
rules: {
'@sentry/scraps/no-core-import': 'error',
'@sentry/scraps/no-token-import': 'error',
'@sentry/scraps/prefer-info-text': 'error',
'@sentry/scraps/use-semantic-token': [
'error',
{enabledCategories: ['background', 'border', 'content']},
Expand Down
87 changes: 56 additions & 31 deletions src/sentry/api/endpoints/organization_ai_conversation_details.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
from datetime import timedelta
from dataclasses import replace
from datetime import datetime, timedelta

import sentry_sdk
from django.utils import timezone
from rest_framework.request import Request
from rest_framework.response import Response
Expand All @@ -14,12 +16,15 @@
from sentry.models.organization import Organization
from sentry.search.eap.occurrences.query_utils import build_escaped_term_filter
from sentry.search.eap.types import SearchResolverConfig
from sentry.search.events.types import SnubaParams
from sentry.snuba.referrer import Referrer
from sentry.snuba.spans_rpc import Spans
from sentry.utils.dates import parse_stats_period

MAX_RETENTION_DAYS = 30

# Base span fields always returned
_WIDENING_STEPS = [timedelta(days=7), timedelta(days=14), timedelta(days=MAX_RETENTION_DAYS)]

AI_CONVERSATION_ATTRIBUTES = [
"span_id",
"trace",
Expand Down Expand Up @@ -68,26 +73,16 @@ def get(self, request: Request, organization: Organization, conversation_id: str
if not features.has("organizations:gen-ai-conversations", organization, actor=request.user):
return Response(status=404)

# Check what date params were passed before calling get_snuba_params
stats_period = request.GET.get("statsPeriod")
has_explicit_range = request.GET.get("start") or request.GET.get("end")

try:
snuba_params = self.get_snuba_params(request, organization)
except NoProjects:
return Response(status=404)

# Enforce 30-day retention limit
max_retention = timedelta(days=MAX_RETENTION_DAYS)
now = timezone.now()
max_retention_cutoff = now - max_retention

if stats_period or not has_explicit_range:
# Always use full 30d range when statsPeriod is passed or no date params
snuba_params.start = max_retention_cutoff
snuba_params.end = now
else:
# Validate explicit start/end aren't older than retention limit
max_retention_cutoff = now - timedelta(days=MAX_RETENTION_DAYS)
has_explicit_range = request.GET.get("start") or request.GET.get("end")

if has_explicit_range:
if snuba_params.start and snuba_params.start < max_retention_cutoff:
return Response(
{"detail": f"start time cannot be older than {MAX_RETENTION_DAYS} days"},
Expand All @@ -99,37 +94,67 @@ def get(self, request: Request, organization: Organization, conversation_id: str
status=400,
)

selected_columns = AI_CONVERSATION_ATTRIBUTES
with handle_query_errors():
if has_explicit_range:
resolved_params = snuba_params
else:
resolved_params = self._resolve_time_window(
snuba_params, request.GET.get("statsPeriod"), now, conversation_id
)

def data_fn(offset: int, limit: int):
return self._fetch_conversation_spans(
snuba_params=snuba_params,
conversation_id=conversation_id,
selected_columns=selected_columns,
offset=offset,
limit=limit,
)
def data_fn(offset: int, limit: int) -> list:
return self._fetch_spans(resolved_params, conversation_id, offset, limit)

with handle_query_errors():
return self.paginate(
request=request,
paginator=GenericOffsetPaginator(data_fn=data_fn),
default_per_page=100,
max_per_page=1000,
)

def _fetch_conversation_spans(
def _resolve_time_window(
self,
base_params: SnubaParams,
stats_period: str | None,
now: datetime,
conversation_id: str,
) -> SnubaParams:
"""Probe progressively wider windows to find which contains the conversation."""
candidates = self._build_widening_params(base_params, stats_period, now)
for params in candidates:
if self._fetch_spans(params, conversation_id, offset=0, limit=1):
return params
return candidates[-1]

def _build_widening_params(
self, base_params: SnubaParams, stats_period: str | None, now: datetime
) -> list[SnubaParams]:
max_retention = timedelta(days=MAX_RETENTION_DAYS)
requested_delta: timedelta | None = (
parse_stats_period(stats_period) if stats_period else None
)

steps: list[timedelta] = []
if requested_delta and requested_delta < max_retention:
steps.append(requested_delta)
for step in _WIDENING_STEPS:
if not steps or step > steps[-1]:
steps.append(step)

return [replace(base_params, start=now - delta, end=now) for delta in steps]

@sentry_sdk.trace
def _fetch_spans(
self,
snuba_params,
snuba_params: SnubaParams,
conversation_id: str,
selected_columns: list[str],
offset: int,
limit: int,
):
) -> list:
result = Spans.run_table_query(
params=snuba_params,
query_string=build_escaped_term_filter("gen_ai.conversation.id", [conversation_id]),
selected_columns=selected_columns,
selected_columns=AI_CONVERSATION_ATTRIBUTES,
orderby=["precise.start_ts"],
offset=offset,
limit=limit,
Expand Down
108 changes: 100 additions & 8 deletions src/sentry/dynamic_sampling/per_org/tasks/queries.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,17 @@
from __future__ import annotations

from collections import defaultdict
from collections.abc import Iterator, Mapping
from dataclasses import dataclass
from dataclasses import dataclass, field
from datetime import UTC, datetime, timedelta
from enum import StrEnum
from typing import Any
from typing import Any, Literal

from sentry_protos.snuba.v1.trace_item_attribute_pb2 import ExtrapolationMode

from sentry.dynamic_sampling.per_org.tasks.configuration import BaseDynamicSamplingConfiguration
from sentry.dynamic_sampling.rules.utils import ProjectId
from sentry.dynamic_sampling.tasks.boost_low_volume_transactions import ProjectTransactions
from sentry.dynamic_sampling.tasks.common import (
ACTIVE_ORGS_VOLUMES_DEFAULT_TIME_INTERVAL,
OrganizationDataVolume,
Expand All @@ -27,6 +29,7 @@ class DynamicSamplingQueryFilters(StrEnum):

class DynamicSamplingQueryFields(StrEnum):
DSC_PROJECT_ID = "sentry.dsc.project_id"
DSC_TRANSACTION = "sentry.dsc.transaction"
COUNT = "count()"
COUNT_SAMPLE = "count_sample()"

Expand All @@ -39,32 +42,48 @@ class ProjectVolume:
drop: int


@dataclass
class ProjectTransactionVolumesAccumulator:
transaction_counts: list[tuple[str, float]] = field(default_factory=list)
total_num_transactions: float = 0
num_classes: int = 0


def _get_aggregate_int(row: Mapping[str, Any], column: str) -> int:
return int(row.get(column, 0))


def _get_aggregate_float(row: Mapping[str, Any], column: str) -> float:
return float(row.get(column, 0))


def run_eap_spans_table_query_in_chunks(
query: dict[str, Any],
max_results: int | None = None,
chunk_size: int = 1000,
) -> Iterator[dict[str, Any]]:
offset = 0
current_chunk_size = chunk_size

while True:
result = Spans.run_table_query(**query, offset=offset, limit=chunk_size + 1)
if max_results is not None:
current_chunk_size = min(chunk_size, max_results - offset)

result = Spans.run_table_query(**query, offset=offset, limit=current_chunk_size + 1)
data = result.get("data", [])
more_results = len(data) > chunk_size
more_results = len(data) > current_chunk_size

if more_results:
data = data[:chunk_size]
data = data[:current_chunk_size]

if data:
yield from data
offset += len(data)

if not more_results:
# either we run out of results or we hit the max results limit, in both cases we should stop
if not more_results or (max_results is not None and offset >= max_results):
return

offset += chunk_size


def get_eap_organization_volume(
config: BaseDynamicSamplingConfiguration,
Expand Down Expand Up @@ -155,3 +174,76 @@ def get_eap_project_volumes(
)

return project_volumes


def get_eap_transaction_volumes(
config: BaseDynamicSamplingConfiguration,
time_interval: timedelta = ACTIVE_ORGS_VOLUMES_DEFAULT_TIME_INTERVAL,
order_by_volume: Literal["asc", "desc"] = "asc",
max_transactions: int = 100,
) -> list[ProjectTransactions]:
end_time = datetime.now(UTC)
start_time = end_time - time_interval
volumes_by_project: defaultdict[int, ProjectTransactionVolumesAccumulator] = defaultdict(
ProjectTransactionVolumesAccumulator
)

count_order = (
DynamicSamplingQueryFields.COUNT
if order_by_volume == "asc"
else f"-{DynamicSamplingQueryFields.COUNT}"
)
orderby = [
count_order,
DynamicSamplingQueryFields.DSC_PROJECT_ID,
DynamicSamplingQueryFields.DSC_TRANSACTION,
]

root_project_filter = ",".join(str(project.id) for project in config.projects)
result = Spans.run_table_query(
params=SnubaParams(
start=start_time,
end=end_time,
projects=config.projects,
organization=config.organization,
),
query_string=f"{DynamicSamplingQueryFilters.IS_SEGMENT} {DynamicSamplingQueryFields.DSC_PROJECT_ID}:[{root_project_filter}] has:{DynamicSamplingQueryFields.DSC_TRANSACTION}",
selected_columns=[
DynamicSamplingQueryFields.DSC_PROJECT_ID,
DynamicSamplingQueryFields.DSC_TRANSACTION,
DynamicSamplingQueryFields.COUNT,
],
orderby=orderby,
offset=0,
limit=max_transactions,
referrer=Referrer.DYNAMIC_SAMPLING_PER_ORG_GET_EAP_TRANSACTION_VOLUMES.value,
config=SearchResolverConfig(
auto_fields=True,
extrapolation_mode=ExtrapolationMode.EXTRAPOLATION_MODE_SERVER_ONLY,
),
sampling_mode=SAMPLING_MODE_HIGHEST_ACCURACY,
)

for row in result.get("data", []):
transaction = row.get(DynamicSamplingQueryFields.DSC_TRANSACTION)
total = _get_aggregate_float(row, DynamicSamplingQueryFields.COUNT)
if total <= 0:
continue

project_id = _get_aggregate_int(row, DynamicSamplingQueryFields.DSC_PROJECT_ID)
project_volumes = volumes_by_project[project_id]

project_volumes.transaction_counts.append((str(transaction), total))
project_volumes.total_num_transactions += total
project_volumes.num_classes += 1

return [
{
"org_id": config.organization.id,
"project_id": project_id,
"transaction_counts": project_volumes.transaction_counts,
"total_num_transactions": project_volumes.total_num_transactions,
"total_num_classes": project_volumes.num_classes,
}
for project_id, project_volumes in sorted(volumes_by_project.items())
]
4 changes: 4 additions & 0 deletions src/sentry/dynamic_sampling/per_org/tasks/scheduler.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from sentry.dynamic_sampling.per_org.tasks.queries import (
get_eap_organization_volume,
get_eap_project_volumes,
get_eap_transaction_volumes,
)
from sentry.dynamic_sampling.per_org.tasks.telemetry import (
SCHEDULER_BUCKET_ORG_STATUS_METRIC,
Expand Down Expand Up @@ -116,4 +117,7 @@ def run_calculations_per_org_task(org_id: OrganizationId) -> DynamicSamplingStat
if not project_volumes:
return DynamicSamplingStatus.NO_PROJECT_VOLUMES

if not get_eap_transaction_volumes(config):
return DynamicSamplingStatus.NO_TRANSACTION_VOLUMES

return None
1 change: 1 addition & 0 deletions src/sentry/dynamic_sampling/per_org/tasks/telemetry.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ class DynamicSamplingStatus(StrEnum):
NO_SUBSCRIPTION = "no_subscription"
NO_ORG_VOLUME = "no_org_volume"
NO_PROJECT_VOLUMES = "no_project_volumes"
NO_TRANSACTION_VOLUMES = "no_transaction_volumes"
NOT_IN_ROLLOUT = "not_in_rollout"
ORG_HAS_NO_DYNAMIC_SAMPLING = "org_has_no_dynamic_sampling"
ORG_HAS_NO_PROJECTS = "org_has_no_projects"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,8 @@
OrganizationIntegrationsLoosePermission,
)
from sentry.api.validators.project_codeowners import build_codeowners_associations
from sentry.constants import ObjectStatus
from sentry.integrations.services.integration import integration_service
from sentry.models.organization import Organization
from sentry.models.project import Project
from sentry.models.projectcodeowners import ProjectCodeOwners


Expand All @@ -30,10 +28,7 @@ def get(self, request: Request, organization: Organization) -> Response:
Returns all ProjectCodeOwners associations for an organization as a dict with projects as keys
e.g. {"projectSlug": {associations: {...}, errors: {...}}, ...]
"""
projects = Project.objects.filter(
organization=organization,
status=ObjectStatus.ACTIVE,
)
projects = self.get_projects(request, organization)
project_code_owners = ProjectCodeOwners.objects.filter(project__in=projects)
provider = request.GET.get("provider")
if provider:
Expand Down
3 changes: 3 additions & 0 deletions src/sentry/snuba/referrer.py
Original file line number Diff line number Diff line change
Expand Up @@ -643,6 +643,9 @@ class Referrer(StrEnum):
DYNAMIC_SAMPLING_PER_ORG_GET_EAP_PROJECT_VOLUMES = (
"dynamic_sampling.per_org.get_eap_project_volumes"
)
DYNAMIC_SAMPLING_PER_ORG_GET_EAP_TRANSACTION_VOLUMES = (
"dynamic_sampling.per_org.get_eap_transaction_volumes"
)
DYNAMIC_SAMPLING_COUNTERS_FETCH_PROJECTS_WITH_COUNT_PER_TRANSACTION = (
"dynamic_sampling.counters.fetch_projects_with_count_per_transaction_volumes"
)
Expand Down
Loading
Loading