From b998543e75de3abb152589e523191b3c9a95a4ef Mon Sep 17 00:00:00 2001
From: Sapinder Singh <sapinderpal@outlook.com>
Date: Sat, 23 May 2026 00:10:06 +0530
Subject: [PATCH] Updated package versions and some compatability fixes

---
 .env.template                                 |  2 +-
 requirements.txt                              | 10 ++---
 src/agent_evaluation/agentic_ops/client.py    | 17 +++++---
 .../agent_inference/multi_tool_agent.py       | 39 +++++++++++++------
 4 files changed, 44 insertions(+), 24 deletions(-)

diff --git a/.env.template b/.env.template
index f53445e..d0d75d5 100644
--- a/.env.template
+++ b/.env.template
@@ -19,7 +19,7 @@ AZURE_AI_MODEL_DEPLOYMENT_NAME=""
 APPLICATIONINSIGHTS_CONNECTION_STRING=""
 APPLICATION_INSIGHTS_WORKSPACE_ID=""
 ENABLE_OTEL=true
-ENABLE_SENSITIVE_DATA=true
+ENABLE_SENSITIVE_DATA=false #Set to true to enable collection of sensitive data such as input prompts, model responses, and evaluation results. This is disabled by default to protect user privacy and comply with data protection regulations. Only enable this if you have a clear need for this data and have implemented appropriate safeguards to protect it.
 
 AZURE_OPENAI_ENDPOINT=""
 AZURE_OPENAI_CHAT_DEPLOYMENT_NAME=""""
diff --git a/requirements.txt b/requirements.txt
index 0bb8405..2aca86e 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,16 +1,16 @@
 # Azure SDK dependencies
 azure-identity>=1.25.3
 azure-ai-projects>=2.1.0
-azure-ai-evaluation==1.15.0
+azure-ai-evaluation==1.16.8
 azure-ai-inference>=1.0.0b9
 # Core Python packages
 python-dotenv>=1.2.2
 pyyaml>=6.0.3
 pip-system-certs>=5.3
 azure-monitor-query>=2.0.0
-azure-monitor-opentelemetry>=1.8.7
+azure-monitor-opentelemetry>=1.8.8
 aiohttp>=3.13.5
-agent-framework==1.0.1
-streamlit>=1.56.0
-pandas>=2.3.3
+agent-framework==1.5.0
+streamlit>=1.57.0
+pandas==2.3.3
 plotly>=6.7.0
diff --git a/src/agent_evaluation/agentic_ops/client.py b/src/agent_evaluation/agentic_ops/client.py
index b0f84ad..c34aeb1 100644
--- a/src/agent_evaluation/agentic_ops/client.py
+++ b/src/agent_evaluation/agentic_ops/client.py
@@ -9,6 +9,7 @@
 import json
 import logging
 from openai import AzureOpenAI
+from azure.identity import DefaultAzureCredential, get_bearer_token_provider
 import os
 from dotenv import load_dotenv
 import time
@@ -20,7 +21,6 @@
 
 # Azure OpenAI Configuration
 AZURE_ENDPOINT = os.getenv("EVAL_AZURE_OPENAI_ENDPOINT")
-API_KEY = os.getenv("EVAL_AZURE_OPENAI_KEY")
 API_VERSION = os.getenv("EVAL_AZURE_OPENAI_VERSION")
 DEPLOYMENT_NAME = os.getenv("EVAL_AZURE_OPENAI_MODEL")
 
@@ -32,17 +32,22 @@
 
 
 def get_llm_client_instance():
-    """Get an instance of the Azure OpenAI client."""
-    if not all([AZURE_ENDPOINT, API_KEY, API_VERSION]):
+    """Get an instance of the Azure OpenAI client using DefaultAzureCredential."""
+    if not all([AZURE_ENDPOINT, API_VERSION]):
         raise ValueError(
             "Missing required Azure OpenAI configuration. "
-            "Please check EVAL_AZURE_OPENAI_ENDPOINT, EVAL_AZURE_OPENAI_KEY, "
+            "Please check EVAL_AZURE_OPENAI_ENDPOINT "
             "and EVAL_AZURE_OPENAI_VERSION environment variables."
         )
-    
+
+    credential = DefaultAzureCredential()
+    token_provider = get_bearer_token_provider(
+        credential, "https://cognitiveservices.azure.com/.default"
+    )
+
     return AzureOpenAI(
         azure_endpoint=AZURE_ENDPOINT,
-        api_key=API_KEY,
+        azure_ad_token_provider=token_provider,
         api_version=API_VERSION,
     )
 
diff --git a/src/evaluations/offline/pipeline_multi_tool_agent_evaluation/agent_inference/multi_tool_agent.py b/src/evaluations/offline/pipeline_multi_tool_agent_evaluation/agent_inference/multi_tool_agent.py
index d711c9a..bdd05d0 100644
--- a/src/evaluations/offline/pipeline_multi_tool_agent_evaluation/agent_inference/multi_tool_agent.py
+++ b/src/evaluations/offline/pipeline_multi_tool_agent_evaluation/agent_inference/multi_tool_agent.py
@@ -16,7 +16,7 @@
 from agent_framework import Agent, ChatOptions
 from agent_framework.observability import enable_instrumentation, get_tracer
 from azure.monitor.opentelemetry import configure_azure_monitor
-from opentelemetry import trace
+from opentelemetry import context as otel_context
 from opentelemetry.trace import SpanKind
 from opentelemetry.trace.span import format_trace_id
 from agent_framework.openai import OpenAIChatClient
@@ -120,17 +120,17 @@ async def process_query(agent: Agent, query: str, query_id: str) -> tuple[str, s
     Returns:
         Tuple of (response, trace_id)
     """
-    # Create a new root span to get a unique trace ID
-    with trace.use_span(trace.NonRecordingSpan(trace.SpanContext(
-        trace_id=0,
-        span_id=0,
-        is_remote=False,
-        trace_flags=trace.TraceFlags(0)
-    )), end_on_exit=False):
-        with get_tracer().start_as_current_span(f"Query: {query_id}", kind=SpanKind.CLIENT) as span:
-            trace_id = format_trace_id(span.get_span_context().trace_id)
-            response = await agent.run(query)
-            return str(response), trace_id
+    # Start each query as a brand-new root trace by passing an empty Context.
+    # This avoids inheriting any ambient span state and prevents a NonRecordingSpan
+    # from leaking into agent_framework's instrumentation downstream.
+    with get_tracer().start_as_current_span(
+        f"Query: {query_id}",
+        kind=SpanKind.CLIENT,
+        context=otel_context.Context(),
+    ) as span:
+        trace_id = format_trace_id(span.get_span_context().trace_id)
+        response = await agent.run(query)
+        return str(response), trace_id
 
 
 async def run_inference_async(config: dict) -> None:
@@ -163,6 +163,21 @@ async def run_inference_async(config: dict) -> None:
     if connection_string:
         configure_azure_monitor(connection_string=connection_string)
         enable_instrumentation()
+        # Workaround: enable_instrumentation() activates azure-ai-projects'
+        # ResponsesInstrumentor, which crashes with
+        # "'NonRecordingSpan' object has no attribute 'attributes'" when a
+        # single OpenAI Responses call carries multiple tool-result messages
+        # (parallel tool calls). The data needed for evaluation
+        # (gen_ai.tool.definitions / tool calls) is emitted by
+        # agent_framework's own spans, so disabling just this instrumentor
+        # is safe.
+        try:
+            from azure.ai.projects.telemetry._responses_instrumentor import ResponsesInstrumentor
+            if ResponsesInstrumentor().is_instrumented():
+                ResponsesInstrumentor().uninstrument()
+                logger.info("[AGENT] Disabled azure-ai-projects ResponsesInstrumentor (parallel-tool-call bug workaround)")
+        except Exception as ex:  # pragma: no cover - best-effort workaround
+            logger.warning("[AGENT] Could not uninstrument ResponsesInstrumentor: %s", ex)
         logger.info("[AGENT] Azure Monitor configured with instrumentation")
     else:
         logger.warning("[AGENT] APPLICATIONINSIGHTS_CONNECTION_STRING not set — telemetry disabled")