Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 13 additions & 5 deletions aai_cli/core/telemetry.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,12 @@
# error line, while bounding the payload if an upstream message embeds a body.
_ERROR_MESSAGE_MAX_CHARS = 500

# Outcomes that are a normal part of CLI use, not failures: a clean exit and a
# user/SIGTERM cancellation (Ctrl-C, e.g. stopping a long `llm` or
# `transcripts get`). Both ship as ``status: info`` with no ``error`` block, so a
# cancel never lands in Datadog Error Tracking or inflates the crash rate.
_NON_ERROR_OUTCOMES = frozenset({"success", "cancelled"})


def client_token() -> str:
"""The write-only intake token: env override first, then the shipped one."""
Expand Down Expand Up @@ -143,20 +149,22 @@ def build_event(
device id is a random UUID minted locally — no account id, email, or
hostname ever rides along.

A failure additionally sets ``status: error`` and the reserved
A genuine failure additionally sets ``status: error`` and the reserved
``error.kind``/``error.message`` so the event feeds Datadog **Error
Tracking** (issue grouping), not just log search. ``error.kind`` reuses the
anonymous ``outcome`` (the ``CLIError.error_type``); ``error.message`` is
the one-line message the user saw (capped at ``_ERROR_MESSAGE_MAX_CHARS``).
Stack traces are still deliberately omitted.
Stack traces are still deliberately omitted. A ``cancelled`` outcome is *not*
a failure (see ``_NON_ERROR_OUTCOMES``) — it stays an ``info`` log with no
``error`` block, since stopping a command is normal CLI use, not a crash.
"""
succeeded = outcome == "success"
is_failure = outcome not in _NON_ERROR_OUTCOMES
event: dict[str, object] = {
"ddsource": "aai-cli",
"service": "aai-cli",
"ddtags": f"version:{__version__}",
"message": f"{command} {outcome}",
"status": "info" if succeeded else "error",
"status": "error" if is_failure else "info",
"command": command,
"outcome": outcome,
"exit_code": exit_code,
Expand All @@ -167,7 +175,7 @@ def build_event(
"ci": bool(env.get("CI")),
"device_id": config.get_device_id(),
}
if not succeeded:
if is_failure:
error: dict[str, object] = {"kind": outcome}
if error_message:
error["message"] = error_message[:_ERROR_MESSAGE_MAX_CHARS]
Expand Down
15 changes: 13 additions & 2 deletions tests/test_telemetry.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,15 @@ def test_build_event_success_is_info_with_no_error_attribute():
assert "error" not in event


def test_build_event_cancelled_is_info_with_no_error_attribute():
# Cancelling a command (Ctrl-C / SIGTERM) is a normal part of CLI use, not a crash:
# it stays an info log with no `error` namespace, so it never lands in Datadog Error
# Tracking nor inflates the crash rate (the regression these telemetry events showed).
event = telemetry.build_event("aai llm", outcome="cancelled", exit_code=130, duration_ms=1)
assert event["status"] == "info"
assert "error" not in event


def test_build_event_failure_feeds_error_tracking(monkeypatch):
monkeypatch.setenv("CI", "true")
event = telemetry.build_event("aai stream", outcome="api_error", exit_code=1, duration_ms=5)
Expand Down Expand Up @@ -371,8 +380,10 @@ def test_track_typer_exit_maps_code(events, code, outcome):
(event,) = events
assert event["outcome"] == outcome
assert event["exit_code"] == code
# A bare typer.Exit carries no message, so the failure event has only the kind.
assert event.get("error") == ({"kind": outcome} if code else None)
# Only a genuine error (exit 3) feeds Error Tracking; a clean exit (0) and a cancel
# (130) are normal CLI use and carry no `error` block. A bare typer.Exit has no message.
assert event.get("error") == ({"kind": outcome} if outcome == "error" else None)
assert event["status"] == ("error" if outcome == "error" else "info")


def test_track_keyboard_interrupt_is_cancelled(events):
Expand Down
Loading