Skip to content
24 changes: 22 additions & 2 deletions src/google/adk/agents/base_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@
from typing import Union

from google.genai import types
from opentelemetry import context as _otel_context
from opentelemetry import trace as _otel_trace_api
from pydantic import BaseModel
from pydantic import ConfigDict
from pydantic import Field
Expand All @@ -43,6 +45,7 @@
from ..features import experimental
from ..features import FeatureName
from ..telemetry import tracing
from ..telemetry.tracing import _safe_detach as _safe_detach_span
from ..telemetry.tracing import tracer
from ..utils.context_utils import Aclosing
from .base_agent_config import BaseAgentConfig
Expand Down Expand Up @@ -285,7 +288,13 @@ async def run_async(
Event: the events generated by the agent.
"""

with tracer.start_as_current_span(f'invoke_agent {self.name}') as span:
# Manual span management instead of start_as_current_span() so that
# _safe_detach_span() can tolerate GeneratorExit cleanup from a different
# contextvars.Context (asyncio asyncgen finalizer).
# See: https://github.com/google/adk-python/issues/4894
span = tracer.start_span(f'invoke_agent {self.name}')
_token = _otel_context.attach(_otel_trace_api.set_span_in_context(span))
try:
ctx = self._create_invocation_context(parent_context)
tracing.trace_agent_invocation(span, self, ctx)
if event := await self._handle_before_agent_callback(ctx):
Expand All @@ -302,6 +311,10 @@ async def run_async(

if event := await self._handle_after_agent_callback(ctx):
yield event
finally:
_safe_detach_span(_token)
if span.is_recording():
span.end()

@final
async def run_live(
Expand All @@ -318,7 +331,10 @@ async def run_live(
Event: the events generated by the agent.
"""

with tracer.start_as_current_span(f'invoke_agent {self.name}') as span:
# Same fix as run_async — see comment there for rationale.
span = tracer.start_span(f'invoke_agent {self.name}')
_token = _otel_context.attach(_otel_trace_api.set_span_in_context(span))
try:
ctx = self._create_invocation_context(parent_context)
tracing.trace_agent_invocation(span, self, ctx)
if event := await self._handle_before_agent_callback(ctx):
Expand All @@ -332,6 +348,10 @@ async def run_live(

if event := await self._handle_after_agent_callback(ctx):
yield event
finally:
_safe_detach_span(_token)
if span.is_recording():
span.end()

async def _run_async_impl(
self, ctx: InvocationContext
Expand Down
55 changes: 48 additions & 7 deletions src/google/adk/telemetry/tracing.py
Original file line number Diff line number Diff line change
Expand Up @@ -658,14 +658,47 @@ def _set_common_generate_content_attributes(
span.set_attributes(common_attributes)


def _safe_detach(token: object) -> None:
"""Detach an OTel context token, tolerating cross-context cleanup.

``ContextVar.reset(token)`` raises ``ValueError`` when called from a
different ``contextvars.Context`` than the one that produced the token.
This happens when an async generator is closed by asyncio's asyncgen
finalizer hook — scheduled via ``call_soon`` in the event-loop's base
context — rather than in the task context where the span was originally
opened.

``otel_context.detach()`` already catches the ``ValueError`` internally and
logs it at ERROR level, making the error undetectable at the call site.
We therefore call ``_RUNTIME_CONTEXT.detach()`` directly so we can absorb
the ``ValueError`` silently without emitting a spurious ERROR log.

Span data is fully preserved; only the context-variable state restoration
is skipped, which is acceptable for a generator that is being discarded.

See: https://github.com/google/adk-python/issues/4894
"""
from opentelemetry.context import _RUNTIME_CONTEXT # pylint: disable=import-outside-toplevel

try:
_RUNTIME_CONTEXT.detach(token)
except ValueError:
logger.debug(
'OTel context token from a different Context during generator cleanup'
' (generator cancelled mid-flight). Span data is preserved.'
)


@contextmanager
def _use_native_generate_content_span_stable_semconv(
llm_request: LlmRequest,
common_attributes: Mapping[str, AttributeValue],
) -> Iterator[GenerateContentSpan]:
with tracer.start_as_current_span(
f"generate_content {llm_request.model or ''}"
) as span:
# Use manual span management instead of start_as_current_span() so that
# _safe_detach() can handle GeneratorExit cleanup from a different context.
span = tracer.start_span(f"generate_content {llm_request.model or ''}")
token = otel_context.attach(trace.set_span_in_context(span))
try:
span.set_attribute(GEN_AI_SYSTEM, _guess_gemini_system_name())
_set_common_generate_content_attributes(
span, llm_request, common_attributes
Expand Down Expand Up @@ -693,6 +726,10 @@ def _use_native_generate_content_span_stable_semconv(
)

yield gc_span
finally:
_safe_detach(token)
if span.is_recording():
span.end()


@asynccontextmanager
Expand All @@ -707,10 +744,10 @@ async def _use_native_generate_content_span(
yield gc_span
return

with tracer.start_as_current_span(
f"generate_content {llm_request.model or ''}"
) as span:

# Manual span management — see _safe_detach() for rationale.
span = tracer.start_span(f"generate_content {llm_request.model or ''}")
token = otel_context.attach(trace.set_span_in_context(span))
try:
_set_common_generate_content_attributes(
span, llm_request, common_attributes
)
Expand All @@ -720,6 +757,10 @@ async def _use_native_generate_content_span(
gc_span.operation_details_attributes, llm_request
)
yield gc_span
finally:
_safe_detach(token)
if span.is_recording():
span.end()


class GenerateContentSpan:
Expand Down
1 change: 1 addition & 0 deletions tests/unittests/telemetry/test_functional.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ def do_replace(tracer):
monkeypatch.setattr(
tracer, 'start_as_current_span', real_tracer.start_as_current_span
)
monkeypatch.setattr(tracer, 'start_span', real_tracer.start_span)

do_replace(tracing.tracer)
do_replace(base_agent.tracer)
Expand Down
Loading