From aef45fd5b0cf521fbc697259d46d3ffa3bc41f78 Mon Sep 17 00:00:00 2001
From: sanjay singh <sanjay.singh360@gmail.com>
Date: Fri, 15 May 2026 18:10:28 +0200
Subject: [PATCH 01/11] =?UTF-8?q?feat:=20Phase=202=20=E2=80=94=20add=20cod?=
 =?UTF-8?q?e=20interpreter=20for=20data=20analysis=20capability?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 agents/tech-trends-agent.json |  7 ++++---
 evals/eval-config.json        | 12 ++++++------
 prompts/tech-trends-agent.md  | 12 ++++++++++++
 3 files changed, 22 insertions(+), 9 deletions(-)

diff --git a/agents/tech-trends-agent.json b/agents/tech-trends-agent.json
index 9d6b181..efcf175 100644
--- a/agents/tech-trends-agent.json
+++ b/agents/tech-trends-agent.json
@@ -1,16 +1,17 @@
 {
   "agent_name": "tech-trends-agent",
-  "phase": "1",
+  "phase": "2",
   "definition": {
     "model": "${GPT_DEPLOYMENT}",
     "instructions_file": "prompts/tech-trends-agent.md",
     "tools": [
-      { "type": "web_search" }
+      { "type": "bing_grounding" },
+      { "type": "code_interpreter" }
     ]
   },
   "eval": {
     "dataset": "evals/golden-dataset.json",
-    "phase_filter": "1",
+    "phase_filter": null,
     "config": "evals/eval-config.json"
   },
   "_model_history": [
diff --git a/evals/eval-config.json b/evals/eval-config.json
index 5cafb0a..832031e 100644
--- a/evals/eval-config.json
+++ b/evals/eval-config.json
@@ -1,9 +1,9 @@
 {
   "evaluators": [
-    "builtin.task_adherence",
-    "builtin.relevance",
-    "builtin.groundedness",
-    "builtin.coherence"
+    "TaskAdherenceEvaluator",
+    "RelevanceEvaluator",
+    "GroundednessEvaluator",
+    "CoherenceEvaluator"
   ],
   "thresholds": {
     "task_adherence": 0.80,
@@ -11,6 +11,6 @@
     "groundedness": 0.75,
     "coherence": 0.80
   },
-  "phase_filter": "1",
-  "notes": "Phase 1: Only web search queries evaluated. Phase 2 data analysis queries excluded."
+  "phase_filter": null,
+  "notes": "Phase 2: All queries evaluated — both web search (Phase 1) and data analysis (Phase 2)."
 }
diff --git a/prompts/tech-trends-agent.md b/prompts/tech-trends-agent.md
index e01c7e9..da79c14 100644
--- a/prompts/tech-trends-agent.md
+++ b/prompts/tech-trends-agent.md
@@ -27,3 +27,15 @@ Always structure responses as:
 ## Tone
 Professional, objective, and jargon-aware. Assume the user is a technology
 professional who does not need basic concepts explained.
+
+## Data Analysis (Phase 2)
+You now have access to a code interpreter. Use it when:
+- The user asks you to calculate, compare, or rank numerical data
+- You have retrieved structured data (tables, CSVs) and analysis would add value
+- You need to produce a formatted comparison table from raw information
+
+When using code interpreter:
+1. First retrieve the data via web search
+2. Then write and run Python code to process or compare it
+3. Present results with the code output clearly labelled
+4. Always show the source of the raw data alongside the computed result

From 6b3414f2a41e3ea63f5d524cb2954479bd8036ee Mon Sep 17 00:00:00 2001
From: sanjay singh <sanjay.singh360@gmail.com>
Date: Fri, 15 May 2026 18:11:15 +0200
Subject: [PATCH 02/11] docs: update README to reflect web_search tool
 replacing Bing Grounding

---
 README.md | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/README.md b/README.md
index 6ec7b2c..a987ecc 100644
--- a/README.md
+++ b/README.md
@@ -5,7 +5,7 @@ versioned prompts, tool changes, model upgrades, evaluation gates, and rollback.
 
 ## Agent: Technology Trend Research & Analysis
 
-- **Phase 1:** Web search only (Bing Grounding)
+- **Phase 1:** Web search only (`web_search` tool)
 - **Phase 2:** Web search + Code Interpreter for data analysis
 
 ## Repository Structure
@@ -60,9 +60,7 @@ This creates:
 - An App Registration with a Service Principal
 - 3 federated credentials for GitHub OIDC (main branch, pull requests, tags)
 - RBAC role assignments (Azure AI User, Cognitive Services OpenAI User)
-- 7 GitHub repository variables (`AZURE_CLIENT_ID`, `AZURE_TENANT_ID`, `AZURE_SUBSCRIPTION_ID`, `FOUNDRY_TEST_ENDPOINT`, `FOUNDRY_PROD_ENDPOINT`, `GPT_DEPLOYMENT`, `BING_CONNECTION_NAME`)
-
-After bootstrap completes, manually configure the Bing Grounding connection in both Foundry projects via the [Azure AI Foundry portal](https://ai.azure.com) (Project > Connections > + New).
+- 6 GitHub repository variables (`AZURE_CLIENT_ID`, `AZURE_TENANT_ID`, `AZURE_SUBSCRIPTION_ID`, `FOUNDRY_TEST_ENDPOINT`, `FOUNDRY_PROD_ENDPOINT`, `GPT_DEPLOYMENT`)
 
 State is saved to `.bootstrap-state.json` for use by the teardown script.
 
@@ -88,7 +86,7 @@ Three scripts simulate the full agent lifecycle by creating PRs that trigger the
 ```
 
 - Creates branch `feature/phase1-web-search`
-- Configures the agent with Bing Grounding (web search) only
+- Configures the agent with the `web_search` tool
 - Evaluation runs 5 Phase 1 test cases
 - Opens a PR — `evaluate.yml` triggers, deploys to TEST, runs eval
 
@@ -101,7 +99,7 @@ Three scripts simulate the full agent lifecycle by creating PRs that trigger the
 ```
 
 - Creates branch `feature/phase2-code-interpreter` from updated `main`
-- Adds `code_interpreter` tool alongside `bing_grounding`
+- Adds `code_interpreter` tool alongside `web_search`
 - Extends the system prompt with a `## Data Analysis` section
 - Evaluation now runs all 8 test cases (Phase 1 + Phase 2) — checks for regressions
 - Opens a PR
@@ -163,7 +161,7 @@ az login
 
 # 4. Deploy to test
 source .env  # or export vars manually
-python scripts/deploy_agent.py --env test --semver 1.0.0 --tools bing_grounding
+python scripts/deploy_agent.py --env test --semver 1.0.0 --tools web_search
 ```
 
 ## CI/CD Workflows
@@ -194,7 +192,7 @@ Re-deploys the exact prompt, tools, and model from a saved artifact.
 ## Model Comparison
 
 ```bash
-python scripts/compare_models.py --current gpt-4o-2024-11-20 --candidate gpt-4.1 --tools bing_grounding
+python scripts/compare_models.py --current gpt-4o-2024-11-20 --candidate gpt-4.1 --tools web_search
 ```
 
 Deploys both model versions to test for side-by-side evaluation.

From 4a12a78e4ce1343307e41cbccccc75f57974c1dc Mon Sep 17 00:00:00 2001
From: sanjay singh <sanjay.singh360@gmail.com>
Date: Fri, 15 May 2026 18:13:51 +0200
Subject: [PATCH 03/11] =?UTF-8?q?docs:=20fix=20Phase=202=20description=20?=
 =?UTF-8?q?=E2=80=94=20code=5Finterpreter=20replaces=20web=5Fsearch,=20not?=
 =?UTF-8?q?=20additive?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index a987ecc..4a1c687 100644
--- a/README.md
+++ b/README.md
@@ -6,7 +6,7 @@ versioned prompts, tool changes, model upgrades, evaluation gates, and rollback.
 ## Agent: Technology Trend Research & Analysis
 
 - **Phase 1:** Web search only (`web_search` tool)
-- **Phase 2:** Web search + Code Interpreter for data analysis
+- **Phase 2:** Code Interpreter only (`code_interpreter` tool) for data analysis
 
 ## Repository Structure
 
@@ -99,7 +99,7 @@ Three scripts simulate the full agent lifecycle by creating PRs that trigger the
 ```
 
 - Creates branch `feature/phase2-code-interpreter` from updated `main`
-- Adds `code_interpreter` tool alongside `web_search`
+- Replaces `web_search` with `code_interpreter` tool
 - Extends the system prompt with a `## Data Analysis` section
 - Evaluation now runs all 8 test cases (Phase 1 + Phase 2) — checks for regressions
 - Opens a PR

From 6d91f99fe6baf14051f4620833d3f10ae0f3fe65 Mon Sep 17 00:00:00 2001
From: sanjay singh <sanjay.singh360@gmail.com>
Date: Fri, 15 May 2026 18:23:22 +0200
Subject: [PATCH 04/11] feat: add smoke test, fix tools config, model check in
 bootstrap
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- agents/tech-trends-agent.json: replace bing_grounding with code_interpreter
  (Phase 2 uses only code_interpreter, not both tools)
- evaluate.yml: add smoke test step that invokes the agent and validates a
  response before running the full evaluation suite; remove BING_CONNECTION_NAME
- bootstrap.sh: add model availability check (validates current + upgrade target
  gpt-4.1); remove Bing Grounding references and connection variable
- lifecycle/02-phase2-code-interpreter.sh: fix to deploy code_interpreter only
- README: document model upgrade (gpt-4o-2024-11-20 → gpt-4.1), note eval
  naming limitation (action creates new "Agent Evaluation" each run)
---
 .github/workflows/evaluate.yml                | 46 +++++++++-
 README.md                                     | 14 +++
 agents/tech-trends-agent.json                 |  1 -
 scripts/bootstrap.sh                          | 86 ++++++++++++++-----
 .../lifecycle/02-phase2-code-interpreter.sh   |  9 +-
 5 files changed, 126 insertions(+), 30 deletions(-)

diff --git a/.github/workflows/evaluate.yml b/.github/workflows/evaluate.yml
index 5e81110..8857b57 100644
--- a/.github/workflows/evaluate.yml
+++ b/.github/workflows/evaluate.yml
@@ -49,7 +49,6 @@ jobs:
         env:
           FOUNDRY_TEST_ENDPOINT:  ${{ vars.FOUNDRY_TEST_ENDPOINT }}
           GPT_DEPLOYMENT:         ${{ vars.GPT_DEPLOYMENT }}
-          BING_CONNECTION_NAME:   ${{ vars.BING_CONNECTION_NAME }}
         run: |
           TOOLS=$(python3 -c "
           import json
@@ -67,6 +66,47 @@ jobs:
             --tools "$TOOLS"
           echo "phase=$PHASE" >> $GITHUB_OUTPUT
 
+      - name: Smoke test — invoke agent and verify response
+        id: smoke
+        env:
+          FOUNDRY_TEST_ENDPOINT: ${{ vars.FOUNDRY_TEST_ENDPOINT }}
+        run: |
+          python3 << 'EOF'
+          import os
+          from azure.ai.projects import AIProjectClient
+          from azure.identity import DefaultAzureCredential
+
+          client = AIProjectClient(
+              endpoint=os.environ["FOUNDRY_TEST_ENDPOINT"],
+              credential=DefaultAzureCredential()
+          )
+
+          agent = client.agents.get_agent("tech-trends-agent")
+          thread = client.agents.threads.create()
+          client.agents.messages.create(
+              thread_id=thread.id,
+              role="user",
+              content="What are the latest trends in AI?"
+          )
+          run = client.agents.runs.create_and_process(
+              thread_id=thread.id,
+              agent_id=agent.id
+          )
+
+          if run.status != "completed":
+              print(f"FAIL: Agent run status = {run.status}")
+              raise SystemExit(1)
+
+          messages = client.agents.messages.list(thread_id=thread.id)
+          response = messages.data[0].content[0].text.value
+          if len(response) < 50:
+              print(f"FAIL: Response too short ({len(response)} chars)")
+              raise SystemExit(1)
+
+          print(f"PASS: Agent responded ({len(response)} chars)")
+          print(f"Preview: {response[:200]}...")
+          EOF
+
       - name: Run Foundry evaluation
         id: eval
         uses: microsoft/ai-agent-evals@v3-beta
@@ -82,11 +122,13 @@ jobs:
         uses: actions/github-script@v7
         with:
           script: |
-            const fs = require('fs');
             let body = '## Agent Evaluation Results\n\n';
+            body += `**Agent:** tech-trends-agent:${{ steps.deploy.outputs.agent_version }}\n`;
             body += `**Phase:** ${{ steps.deploy.outputs.phase }}\n`;
             body += `**Model:** ${{ vars.GPT_DEPLOYMENT }}\n`;
             body += `**Commit:** ${context.sha.slice(0,7)}\n\n`;
+            const smokeStatus = '${{ steps.smoke.outcome }}' === 'success' ? '✅' : '❌';
+            body += `**Smoke Test:** ${smokeStatus}\n\n`;
             body += 'Full results are in the [Actions summary](' +
               `${context.serverUrl}/${context.repo.owner}/${context.repo.repo}/actions/runs/${context.runId}).\n`;
             github.rest.issues.createComment({
diff --git a/README.md b/README.md
index 4a1c687..717a6e9 100644
--- a/README.md
+++ b/README.md
@@ -60,6 +60,7 @@ This creates:
 - An App Registration with a Service Principal
 - 3 federated credentials for GitHub OIDC (main branch, pull requests, tags)
 - RBAC role assignments (Azure AI User, Cognitive Services OpenAI User)
+- Model availability validation (checks current + upgrade target model)
 - 6 GitHub repository variables (`AZURE_CLIENT_ID`, `AZURE_TENANT_ID`, `AZURE_SUBSCRIPTION_ID`, `FOUNDRY_TEST_ENDPOINT`, `FOUNDRY_PROD_ENDPOINT`, `GPT_DEPLOYMENT`)
 
 State is saved to `.bootstrap-state.json` for use by the teardown script.
@@ -113,10 +114,15 @@ Three scripts simulate the full agent lifecycle by creating PRs that trigger the
 ```
 
 - Creates branch `chore/model-upgrade-gpt41`
+- Upgrades model from `gpt-4o-2024-11-20` (default) to `gpt-4.1`
 - Updates the `GPT_DEPLOYMENT` GitHub variable to `gpt-4.1`
 - Adds a model history entry in the agent config
 - Opens a PR — the eval gate verifies the new model scores at or above thresholds
 
+The bootstrap script validates that both the current model and the upgrade target
+(`gpt-4.1`) are available in your chosen Azure region. If `gpt-4.1` is not available,
+the script will list alternatives you can use instead.
+
 **After the eval passes, merge the PR.** The full lifecycle demo is complete.
 
 ### Lifecycle Flow Diagram
@@ -181,6 +187,14 @@ The eval gate uses `microsoft/ai-agent-evals@v3-beta` with four evaluators:
 - **Groundedness** (threshold: 0.75)
 - **Coherence** (threshold: 0.80)
 
+A smoke test step runs before evaluation — it invokes the agent with a test query
+and verifies a valid response is returned.
+
+**Note on evaluation naming:** The `ai-agent-evals` action creates a new evaluation
+group named "Agent Evaluation" on every run (custom names not yet supported). Each run
+is named `Agent tech-trends-agent:<version>`. The PR comment includes the commit SHA
+for traceability.
+
 ## Rollback
 
 ```bash
diff --git a/agents/tech-trends-agent.json b/agents/tech-trends-agent.json
index efcf175..1bb67ce 100644
--- a/agents/tech-trends-agent.json
+++ b/agents/tech-trends-agent.json
@@ -5,7 +5,6 @@
     "model": "${GPT_DEPLOYMENT}",
     "instructions_file": "prompts/tech-trends-agent.md",
     "tools": [
-      { "type": "bing_grounding" },
       { "type": "code_interpreter" }
     ]
   },
diff --git a/scripts/bootstrap.sh b/scripts/bootstrap.sh
index 05ef463..9123eeb 100644
--- a/scripts/bootstrap.sh
+++ b/scripts/bootstrap.sh
@@ -39,7 +39,6 @@ GPT_MODEL_NAME="gpt-4o"
 GPT_MODEL_VERSION="2024-11-20"
 GPT_DEPLOYMENT_NAME="gpt-4o-2024-11-20"
 GPT_CAPACITY=30
-BING_CONNECTION_NAME="bing-grounding"
 SKIP_FOUNDRY=false
 TEST_ENDPOINT=""
 PROD_ENDPOINT=""
@@ -95,7 +94,7 @@ echo "============================================"
 echo ""
 
 # ---------- Step 1: Resource Group ----------
-echo "[1/7] Creating resource group..."
+echo "[1/8] Creating resource group..."
 az group create \
   --name "$RESOURCE_GROUP" \
   --location "$LOCATION" \
@@ -103,10 +102,10 @@ az group create \
 
 # ---------- Step 2 & 3: Deploy Foundry projects (or skip) ----------
 if [[ "$SKIP_FOUNDRY" == true ]]; then
-  echo "[2/7] Skipping TEST Foundry project (using provided endpoint)"
-  echo "[3/7] Skipping PROD Foundry project (using provided endpoint)"
+  echo "[2/8] Skipping TEST Foundry project (using provided endpoint)"
+  echo "[3/8] Skipping PROD Foundry project (using provided endpoint)"
 else
-  echo "[2/7] Deploying TEST Foundry project..."
+  echo "[2/8] Deploying TEST Foundry project..."
   TEST_OUTPUT=$(az deployment group create \
     --resource-group "$RESOURCE_GROUP" \
     --template-file infra/main.bicep \
@@ -122,7 +121,7 @@ else
   TEST_ENDPOINT=$(echo "$TEST_OUTPUT" | python3 -c "import sys,json; print(json.load(sys.stdin)['properties']['outputs']['projectEndpoint']['value'])")
   echo "  TEST endpoint: $TEST_ENDPOINT"
 
-  echo "[3/7] Deploying PROD Foundry project..."
+  echo "[3/8] Deploying PROD Foundry project..."
   PROD_OUTPUT=$(az deployment group create \
     --resource-group "$RESOURCE_GROUP" \
     --template-file infra/main.bicep \
@@ -140,7 +139,56 @@ else
 fi
 
 # ---------- Step 4: App Registration + Service Principal ----------
-echo "[4/7] Creating App Registration and Service Principal..."
+echo "[4/8] Validating model availability..."
+echo "  Checking if '$GPT_MODEL_NAME' (version: $GPT_MODEL_VERSION) is available in $LOCATION..."
+
+AVAILABLE_MODELS=$(az cognitiveservices model list \
+  --location "$LOCATION" \
+  --query "[?model.name=='$GPT_MODEL_NAME' && model.version=='$GPT_MODEL_VERSION'].model.name" \
+  -o tsv 2>/dev/null || echo "")
+
+if [[ -z "$AVAILABLE_MODELS" ]]; then
+  echo ""
+  echo "  WARNING: Model '$GPT_MODEL_NAME' version '$GPT_MODEL_VERSION' not found in $LOCATION."
+  echo "  Available GPT models in $LOCATION:"
+  az cognitiveservices model list \
+    --location "$LOCATION" \
+    --query "[?model.name.starts_with(@,'gpt')].{name:model.name, version:model.version}" \
+    -o table 2>/dev/null || echo "  (Could not list models — check permissions)"
+  echo ""
+  echo "  The model upgrade lifecycle demo (Phase 3) requires a second model."
+  echo "  You can continue, but ensure GPT_DEPLOYMENT points to a valid model."
+  echo ""
+  read -rp "  Continue anyway? [y/N] " confirm
+  if [[ "$confirm" != "y" && "$confirm" != "Y" ]]; then
+    echo "Aborted."
+    exit 1
+  fi
+else
+  echo "  ✓ Model '$GPT_MODEL_NAME' version '$GPT_MODEL_VERSION' is available in $LOCATION"
+fi
+
+# Check for upgrade target model (gpt-4.1) availability for Phase 3 demo
+UPGRADE_MODEL="gpt-4.1"
+UPGRADE_AVAILABLE=$(az cognitiveservices model list \
+  --location "$LOCATION" \
+  --query "[?model.name=='$UPGRADE_MODEL'].model.name | [0]" \
+  -o tsv 2>/dev/null || echo "")
+
+if [[ -n "$UPGRADE_AVAILABLE" ]]; then
+  echo "  ✓ Upgrade target '$UPGRADE_MODEL' is also available (Phase 3 model upgrade demo ready)"
+else
+  echo "  ⚠ Upgrade target '$UPGRADE_MODEL' not found in $LOCATION."
+  echo "    Phase 3 model upgrade demo may need a different target model."
+  echo "    Available models:"
+  az cognitiveservices model list \
+    --location "$LOCATION" \
+    --query "[?model.name.starts_with(@,'gpt')].{name:model.name, version:model.version}" \
+    -o table 2>/dev/null || true
+fi
+
+# ---------- Step 5: App Registration + Service Principal ----------
+echo "[5/8] Creating App Registration and Service Principal..."
 APP_ID=$(az ad app create \
   --display-name "$APP_DISPLAY_NAME" \
   --query appId -o tsv)
@@ -149,8 +197,8 @@ SP_OBJ_ID=$(az ad sp create --id "$APP_ID" --query id -o tsv)
 echo "  Client ID:     $APP_ID"
 echo "  SP Object ID:  $SP_OBJ_ID"
 
-# ---------- Step 5: Federated Credentials ----------
-echo "[5/7] Adding federated credentials..."
+# ---------- Step 6: Federated Credentials ----------
+echo "[6/8] Adding federated credentials..."
 
 az ad app federated-credential create \
   --id "$APP_ID" \
@@ -182,8 +230,8 @@ az ad app federated-credential create \
   }" --output none
 echo "  + release tag credential"
 
-# ---------- Step 6: RBAC Role Assignments ----------
-echo "[6/7] Assigning RBAC roles..."
+# ---------- Step 7: RBAC Role Assignments ----------
+echo "[7/8] Assigning RBAC roles..."
 SUBSCRIPTION_ID=$(az account show --query id -o tsv)
 SCOPE="/subscriptions/$SUBSCRIPTION_ID/resourceGroups/$RESOURCE_GROUP"
 
@@ -219,8 +267,8 @@ if [[ -n "$TEST_ENDPOINT" ]]; then
   fi
 fi
 
-# ---------- Step 7: GitHub Variables ----------
-echo "[7/7] Setting GitHub repository variables..."
+# ---------- Step 8: GitHub Variables ----------
+echo "[8/8] Setting GitHub repository variables..."
 TENANT_ID=$(az account show --query tenantId -o tsv)
 
 gh variable set AZURE_CLIENT_ID       --body "$APP_ID"             --repo "$GITHUB_REPO"
@@ -229,8 +277,7 @@ gh variable set AZURE_SUBSCRIPTION_ID --body "$SUBSCRIPTION_ID"    --repo "$GITH
 gh variable set FOUNDRY_TEST_ENDPOINT --body "$TEST_ENDPOINT"      --repo "$GITHUB_REPO"
 gh variable set FOUNDRY_PROD_ENDPOINT --body "$PROD_ENDPOINT"      --repo "$GITHUB_REPO"
 gh variable set GPT_DEPLOYMENT        --body "$GPT_DEPLOYMENT_NAME" --repo "$GITHUB_REPO"
-gh variable set BING_CONNECTION_NAME  --body "$BING_CONNECTION_NAME" --repo "$GITHUB_REPO"
-echo "  Set 7 variables on $GITHUB_REPO"
+echo "  Set 6 variables on $GITHUB_REPO"
 
 # ---------- Summary ----------
 echo ""
@@ -252,13 +299,11 @@ echo "   Tenant ID:          $TENANT_ID"
 echo "   Subscription ID:    $SUBSCRIPTION_ID"
 echo ""
 echo " GitHub ($GITHUB_REPO):"
-echo "   7 repository variables set"
+echo "   6 repository variables set"
 echo "   3 federated credentials configured"
 echo ""
 echo " Next steps:"
-echo "   1. Configure Bing Grounding connection in both Foundry projects"
-echo "      (Portal: ai.azure.com -> project -> Connections -> + New)"
-echo "   2. Run lifecycle scripts in order:"
+echo "   1. Run lifecycle scripts in order:"
 echo "      ./scripts/lifecycle/01-phase1-web-search.sh"
 echo "      ./scripts/lifecycle/02-phase2-code-interpreter.sh"
 echo "      ./scripts/lifecycle/03-model-upgrade.sh"
@@ -301,9 +346,6 @@ FOUNDRY_PROD_ENDPOINT=$PROD_ENDPOINT
 # Model deployment
 GPT_DEPLOYMENT=$GPT_DEPLOYMENT_NAME
 
-# Bing Grounding connection name
-BING_CONNECTION_NAME=$BING_CONNECTION_NAME
-
 # Resource metadata
 RESOURCE_GROUP=$RESOURCE_GROUP
 LOCATION=$LOCATION
diff --git a/scripts/lifecycle/02-phase2-code-interpreter.sh b/scripts/lifecycle/02-phase2-code-interpreter.sh
index c2df73e..9fad89b 100644
--- a/scripts/lifecycle/02-phase2-code-interpreter.sh
+++ b/scripts/lifecycle/02-phase2-code-interpreter.sh
@@ -26,7 +26,7 @@ git pull origin main
 # Create feature branch
 git checkout -b "$BRANCH"
 
-# --- Agent config: Phase 2, add code_interpreter ---
+# --- Agent config: Phase 2, code_interpreter only ---
 cat > agents/tech-trends-agent.json << 'AGENT_EOF'
 {
   "agent_name": "tech-trends-agent",
@@ -35,7 +35,6 @@ cat > agents/tech-trends-agent.json << 'AGENT_EOF'
     "model": "${GPT_DEPLOYMENT}",
     "instructions_file": "prompts/tech-trends-agent.md",
     "tools": [
-      { "type": "bing_grounding" },
       { "type": "code_interpreter" }
     ]
   },
@@ -126,14 +125,14 @@ PR_URL=$(gh pr create \
   --title "Phase 2: Add Code Interpreter for Data Analysis" \
   --body "$(cat <<'PR_EOF'
 ## Summary
-- Adds `code_interpreter` tool alongside existing `bing_grounding`
+- Replaces `web_search` tool with `code_interpreter` for data analysis
 - Extends system prompt with `## Data Analysis` section
 - Evaluation now runs **all 8 queries** (Phase 1 + Phase 2)
 
 ## Changes
 | File | Change |
 |---|---|
-| `agents/tech-trends-agent.json` | Added `code_interpreter` to tools, phase → `"2"` |
+| `agents/tech-trends-agent.json` | Replaced `web_search` with `code_interpreter`, phase → `"2"` |
 | `prompts/tech-trends-agent.md` | Added `## Data Analysis (Phase 2)` section |
 | `evals/eval-config.json` | `phase_filter` → `null` (run all cases) |
 
@@ -156,7 +155,7 @@ echo " PR created: $PR_URL"
 echo "============================================"
 echo ""
 echo " The evaluate.yml workflow will now:"
-echo "   1. Deploy Phase 2 agent to TEST (both tools)"
+echo "   1. Deploy Phase 2 agent to TEST (code_interpreter only)"
 echo "   2. Run ALL 8 eval queries (Phase 1 + Phase 2)"
 echo "   3. Check for regressions on existing Phase 1 queries"
 echo ""

From 54ec420e8f3ea3f2431e1c30fcc992457bb92045 Mon Sep 17 00:00:00 2001
From: sanjay singh <sanjay.singh360@gmail.com>
Date: Fri, 15 May 2026 18:28:32 +0200
Subject: [PATCH 05/11] fix: use correct SDK methods for agent operations
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- get_agent() → agents.get(agent_name=...)
- Use OpenAI responses API via get_openai_client() for smoke test invocation
- Fix monitor.yml to use agent.latest_version
---
 .github/workflows/evaluate.yml | 33 +++++++++++++--------------------
 .github/workflows/monitor.yml  |  4 ++--
 2 files changed, 15 insertions(+), 22 deletions(-)

diff --git a/.github/workflows/evaluate.yml b/.github/workflows/evaluate.yml
index 8857b57..08a46a9 100644
--- a/.github/workflows/evaluate.yml
+++ b/.github/workflows/evaluate.yml
@@ -81,30 +81,23 @@ jobs:
               credential=DefaultAzureCredential()
           )
 
-          agent = client.agents.get_agent("tech-trends-agent")
-          thread = client.agents.threads.create()
-          client.agents.messages.create(
-              thread_id=thread.id,
-              role="user",
-              content="What are the latest trends in AI?"
-          )
-          run = client.agents.runs.create_and_process(
-              thread_id=thread.id,
-              agent_id=agent.id
-          )
+          agent = client.agents.get(agent_name="tech-trends-agent")
+          print(f"Agent: {agent.name} (latest version: {agent.latest_version})")
 
-          if run.status != "completed":
-              print(f"FAIL: Agent run status = {run.status}")
-              raise SystemExit(1)
+          openai_client = client.get_openai_client()
+
+          response = openai_client.responses.create(
+              input="What are the latest trends in AI?",
+              extra_body={"agent_reference": {"name": agent.name, "type": "agent_reference"}},
+          )
 
-          messages = client.agents.messages.list(thread_id=thread.id)
-          response = messages.data[0].content[0].text.value
-          if len(response) < 50:
-              print(f"FAIL: Response too short ({len(response)} chars)")
+          output = response.output_text
+          if len(output) < 50:
+              print(f"FAIL: Response too short ({len(output)} chars)")
               raise SystemExit(1)
 
-          print(f"PASS: Agent responded ({len(response)} chars)")
-          print(f"Preview: {response[:200]}...")
+          print(f"PASS: Agent responded ({len(output)} chars)")
+          print(f"Preview: {output[:200]}...")
           EOF
 
       - name: Run Foundry evaluation
diff --git a/.github/workflows/monitor.yml b/.github/workflows/monitor.yml
index cd9bbf3..f3d6e80 100644
--- a/.github/workflows/monitor.yml
+++ b/.github/workflows/monitor.yml
@@ -41,8 +41,8 @@ jobs:
           from azure.ai.projects import AIProjectClient
           from azure.identity import DefaultAzureCredential
           client = AIProjectClient(endpoint=os.environ['FOUNDRY_PROD_ENDPOINT'], credential=DefaultAzureCredential())
-          agent = client.agents.get_agent('tech-trends-agent')
-          print(agent.version)
+          agent = client.agents.get(agent_name='tech-trends-agent')
+          print(agent.latest_version)
           ")
           echo "version=$VERSION" >> $GITHUB_OUTPUT
 

From 8bb0fdcb6ce8f8f49e45be3ef471d225062e1add Mon Sep 17 00:00:00 2001
From: sanjay singh <sanjay.singh360@gmail.com>
Date: Fri, 15 May 2026 18:31:40 +0200
Subject: [PATCH 06/11] fix: remove non-existent latest_version attr from
 AgentDetails

- Smoke test: just print agent name (version not needed for invocation)
- Monitor: use list_versions() to find the latest version number
---
 .github/workflows/evaluate.yml | 2 +-
 .github/workflows/monitor.yml  | 5 +++--
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/evaluate.yml b/.github/workflows/evaluate.yml
index 08a46a9..ba77b76 100644
--- a/.github/workflows/evaluate.yml
+++ b/.github/workflows/evaluate.yml
@@ -82,7 +82,7 @@ jobs:
           )
 
           agent = client.agents.get(agent_name="tech-trends-agent")
-          print(f"Agent: {agent.name} (latest version: {agent.latest_version})")
+          print(f"Agent: {agent.name}")
 
           openai_client = client.get_openai_client()
 
diff --git a/.github/workflows/monitor.yml b/.github/workflows/monitor.yml
index f3d6e80..9972715 100644
--- a/.github/workflows/monitor.yml
+++ b/.github/workflows/monitor.yml
@@ -41,8 +41,9 @@ jobs:
           from azure.ai.projects import AIProjectClient
           from azure.identity import DefaultAzureCredential
           client = AIProjectClient(endpoint=os.environ['FOUNDRY_PROD_ENDPOINT'], credential=DefaultAzureCredential())
-          agent = client.agents.get(agent_name='tech-trends-agent')
-          print(agent.latest_version)
+          versions = client.agents.list_versions(agent_name='tech-trends-agent')
+          latest = max(versions, key=lambda v: int(v.version))
+          print(latest.version)
           ")
           echo "version=$VERSION" >> $GITHUB_OUTPUT
 

From 4458f5dcaba49d80cb4a397768d7064033db82df Mon Sep 17 00:00:00 2001
From: sanjay singh <sanjay.singh360@gmail.com>
Date: Fri, 15 May 2026 18:36:23 +0200
Subject: [PATCH 07/11] fix: add model param and GPT_DEPLOYMENT env to smoke
 test

The responses.create() call was missing the model parameter, causing
a 404 DeploymentNotFound error in Azure OpenAI.
---
 .github/workflows/evaluate.yml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.github/workflows/evaluate.yml b/.github/workflows/evaluate.yml
index ba77b76..42a20c3 100644
--- a/.github/workflows/evaluate.yml
+++ b/.github/workflows/evaluate.yml
@@ -70,6 +70,7 @@ jobs:
         id: smoke
         env:
           FOUNDRY_TEST_ENDPOINT: ${{ vars.FOUNDRY_TEST_ENDPOINT }}
+          GPT_DEPLOYMENT:        ${{ vars.GPT_DEPLOYMENT }}
         run: |
           python3 << 'EOF'
           import os
@@ -87,6 +88,7 @@ jobs:
           openai_client = client.get_openai_client()
 
           response = openai_client.responses.create(
+              model=os.environ["GPT_DEPLOYMENT"],
               input="What are the latest trends in AI?",
               extra_body={"agent_reference": {"name": agent.name, "type": "agent_reference"}},
           )

From 16eb58899099cff7739c105b95752febfa85a158 Mon Sep 17 00:00:00 2001
From: sanjay singh <sanjay.singh360@gmail.com>
Date: Fri, 15 May 2026 19:01:00 +0200
Subject: [PATCH 08/11] pipeline changes

---
 .github/workflows/evaluate.yml | 262 ++++++++++++++++++++++++++++++---
 requirements.txt               |   3 +-
 scripts/bootstrap.sh           |   2 +-
 3 files changed, 244 insertions(+), 23 deletions(-)

diff --git a/.github/workflows/evaluate.yml b/.github/workflows/evaluate.yml
index 42a20c3..7fc3cd4 100644
--- a/.github/workflows/evaluate.yml
+++ b/.github/workflows/evaluate.yml
@@ -70,36 +70,52 @@ jobs:
         id: smoke
         env:
           FOUNDRY_TEST_ENDPOINT: ${{ vars.FOUNDRY_TEST_ENDPOINT }}
-          GPT_DEPLOYMENT:        ${{ vars.GPT_DEPLOYMENT }}
         run: |
           python3 << 'EOF'
           import os
           from azure.ai.projects import AIProjectClient
           from azure.identity import DefaultAzureCredential
 
-          client = AIProjectClient(
+          # Create the project client
+          project = AIProjectClient(
               endpoint=os.environ["FOUNDRY_TEST_ENDPOINT"],
               credential=DefaultAzureCredential()
           )
 
-          agent = client.agents.get(agent_name="tech-trends-agent")
-          print(f"Agent: {agent.name}")
+          # Get agent metadata for display
+          agent = project.agents.get(agent_name="tech-trends-agent")
+          print(f"Agent: {agent.name} (version: {agent.version})")
 
-          openai_client = client.get_openai_client()
+          # Get the OpenAI client for Responses API
+          openai = project.get_openai_client()
 
-          response = openai_client.responses.create(
-              model=os.environ["GPT_DEPLOYMENT"],
+          # Invoke the agent using the Responses API with agent_reference
+          response = openai.responses.create(
               input="What are the latest trends in AI?",
-              extra_body={"agent_reference": {"name": agent.name, "type": "agent_reference"}},
+              extra_body={
+                  "agent_reference": {
+                      "name": "tech-trends-agent",
+                      "type": "agent_reference",
+                  }
+              },
           )
 
           output = response.output_text
+          print(f"Response ID: {response.id}")
+
           if len(output) < 50:
               print(f"FAIL: Response too short ({len(output)} chars)")
               raise SystemExit(1)
 
           print(f"PASS: Agent responded ({len(output)} chars)")
-          print(f"Preview: {output[:200]}...")
+          print(f"Preview: {output[:300]}...")
+
+          # Write smoke test result for downstream steps
+          gh_output = os.environ.get("GITHUB_OUTPUT", "")
+          if gh_output:
+              with open(gh_output, "a") as f:
+                  f.write(f"response_length={len(output)}\n")
+                  f.write(f"response_preview={output[:200]}\n")
           EOF
 
       - name: Run Foundry evaluation
@@ -117,18 +133,222 @@ jobs:
         uses: actions/github-script@v7
         with:
           script: |
-            let body = '## Agent Evaluation Results\n\n';
-            body += `**Agent:** tech-trends-agent:${{ steps.deploy.outputs.agent_version }}\n`;
-            body += `**Phase:** ${{ steps.deploy.outputs.phase }}\n`;
-            body += `**Model:** ${{ vars.GPT_DEPLOYMENT }}\n`;
-            body += `**Commit:** ${context.sha.slice(0,7)}\n\n`;
-            const smokeStatus = '${{ steps.smoke.outcome }}' === 'success' ? '✅' : '❌';
-            body += `**Smoke Test:** ${smokeStatus}\n\n`;
-            body += 'Full results are in the [Actions summary](' +
-              `${context.serverUrl}/${context.repo.owner}/${context.repo.repo}/actions/runs/${context.runId}).\n`;
-            github.rest.issues.createComment({
-              issue_number: context.issue.number,
+            const marker = '<!-- agent-eval-bot -->';
+            const smokeOutcome = '${{ steps.smoke.outcome }}';
+            const evalOutcome = '${{ steps.eval.outcome }}';
+            const deployOutcome = '${{ steps.deploy.outcome }}';
+            const agentVersion = '${{ steps.deploy.outputs.agent_version }}' || 'N/A';
+            const phase = '${{ steps.deploy.outputs.phase }}' || 'N/A';
+            const model = '${{ vars.GPT_DEPLOYMENT }}' || 'N/A';
+            const semver = '${{ steps.version.outputs.semver }}';
+            const sha = context.sha.slice(0, 7);
+            const runUrl = `${context.serverUrl}/${context.repo.owner}/${context.repo.repo}/actions/runs/${context.runId}`;
+            const timestamp = new Date().toISOString().replace('T', ' ').slice(0, 19) + ' UTC';
+
+            const icon = (outcome) => outcome === 'success' ? '✅' : outcome === 'failure' ? '❌' : '⚠️';
+            const badge = (outcome) => outcome === 'success'
+              ? '<span style="background:#22c55e;color:#fff;padding:2px 8px;border-radius:4px;font-size:12px;">PASSED</span>'
+              : outcome === 'failure'
+              ? '<span style="background:#ef4444;color:#fff;padding:2px 8px;border-radius:4px;font-size:12px;">FAILED</span>'
+              : '<span style="background:#f59e0b;color:#fff;padding:2px 8px;border-radius:4px;font-size:12px;">SKIPPED</span>';
+
+            const overallStatus = (smokeOutcome === 'success' && evalOutcome === 'success') ? 'PASSED' : 'FAILED';
+            const overallIcon = overallStatus === 'PASSED' ? '✅' : '❌';
+            const overallColor = overallStatus === 'PASSED' ? '#22c55e' : '#ef4444';
+
+            let body = `${marker}\n`;
+            body += `## ${overallIcon} Agent Deployment & Evaluation Report\n\n`;
+
+            // Agent Details Table
+            body += `### 🤖 Agent Details\n\n`;
+            body += `| Property | Value |\n`;
+            body += `|----------|-------|\n`;
+            body += `| **Agent** | \`tech-trends-agent\` |\n`;
+            body += `| **Version** | \`${agentVersion}\` |\n`;
+            body += `| **Semver** | \`${semver}\` |\n`;
+            body += `| **Phase** | ${phase} |\n`;
+            body += `| **Model** | \`${model}\` |\n`;
+            body += `| **Commit** | \`${sha}\` |\n`;
+            body += `| **Timestamp** | ${timestamp} |\n\n`;
+
+            // Pipeline Results
+            body += `### 📊 Pipeline Results\n\n`;
+            body += `| Step | Status | Details |\n`;
+            body += `|------|--------|----------|\n`;
+            body += `| Deploy to TEST | ${icon(deployOutcome)} ${badge(deployOutcome)} | Agent version \`${agentVersion}\` deployed |\n`;
+            body += `| Smoke Test | ${icon(smokeOutcome)} ${badge(smokeOutcome)} | Invoked agent via Responses API |\n`;
+            body += `| Foundry Evaluation | ${icon(evalOutcome)} ${badge(evalOutcome)} | Evaluated with golden dataset |\n\n`;
+
+            // Tools Configuration
+            body += `### 🛠️ Tools Configuration\n\n`;
+            body += `| Tool | Enabled |\n`;
+            body += `|------|----------|\n`;
+            const toolsInPhase = phase === '2' ? ['code_interpreter'] : phase === '1' ? ['web_search'] : ['web_search', 'code_interpreter'];
+            const allTools = ['web_search', 'code_interpreter'];
+            for (const tool of allTools) {
+              const enabled = toolsInPhase.includes(tool) ? '✅' : '—';
+              body += `| \`${tool}\` | ${enabled} |\n`;
+            }
+            body += '\n';
+
+            // Links
+            body += `### 🔗 Links\n\n`;
+            body += `- [📋 Full Actions Run](${runUrl})\n`;
+            body += `- [📁 Artifacts](${runUrl}#artifacts)\n\n`;
+
+            // Footer
+            body += `---\n`;
+            body += `<sub>🤖 Updated automatically by the CI pipeline · ${timestamp}</sub>\n`;
+
+            // Find existing comment with marker and update, or create new
+            const { data: comments } = await github.rest.issues.listComments({
               owner: context.repo.owner,
               repo: context.repo.repo,
-              body
+              issue_number: context.issue.number,
             });
+
+            const existingComment = comments.find(c => c.body.includes(marker));
+
+            if (existingComment) {
+              await github.rest.issues.updateComment({
+                owner: context.repo.owner,
+                repo: context.repo.repo,
+                comment_id: existingComment.id,
+                body,
+              });
+              console.log(`Updated existing comment #${existingComment.id}`);
+            } else {
+              await github.rest.issues.createComment({
+                owner: context.repo.owner,
+                repo: context.repo.repo,
+                issue_number: context.issue.number,
+                body,
+              });
+              console.log('Created new evaluation comment');
+            }
+
+      - name: Generate HTML report artifact
+        if: always()
+        run: |
+          python3 << 'EOF'
+          import os, json
+          from datetime import datetime, timezone
+
+          agent_version = os.environ.get("AGENT_VERSION", "N/A")
+          phase = os.environ.get("PHASE", "N/A")
+          model = os.environ.get("MODEL", "N/A")
+          semver = os.environ.get("SEMVER", "N/A")
+          sha = os.environ.get("SHA", "N/A")[:7]
+          smoke_outcome = os.environ.get("SMOKE_OUTCOME", "unknown")
+          eval_outcome = os.environ.get("EVAL_OUTCOME", "unknown")
+          deploy_outcome = os.environ.get("DEPLOY_OUTCOME", "unknown")
+          run_url = os.environ.get("RUN_URL", "#")
+          timestamp = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M:%S UTC")
+
+          def status_badge(outcome):
+              colors = {"success": ("#22c55e", "PASSED"), "failure": ("#ef4444", "FAILED")}
+              color, label = colors.get(outcome, ("#f59e0b", "SKIPPED"))
+              return f'<span class="badge" style="background:{color}">{label}</span>'
+
+          def status_icon(outcome):
+              return {"success": "✅", "failure": "❌"}.get(outcome, "⚠️")
+
+          overall = "PASSED" if smoke_outcome == "success" and eval_outcome == "success" else "FAILED"
+          overall_color = "#22c55e" if overall == "PASSED" else "#ef4444"
+
+          html = f"""<!DOCTYPE html>
+          <html lang="en">
+          <head>
+          <meta charset="UTF-8">
+          <title>Agent Evaluation Report — tech-trends-agent v{semver}</title>
+          <style>
+            * {{ margin: 0; padding: 0; box-sizing: border-box; }}
+            body {{ font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif; background: #f8fafc; color: #1e293b; padding: 2rem; }}
+            .container {{ max-width: 800px; margin: 0 auto; }}
+            .header {{ background: linear-gradient(135deg, #1e40af, #7c3aed); color: white; padding: 2rem; border-radius: 12px; margin-bottom: 1.5rem; }}
+            .header h1 {{ font-size: 1.5rem; margin-bottom: 0.5rem; }}
+            .header .overall {{ font-size: 2rem; font-weight: bold; margin-top: 0.5rem; }}
+            .card {{ background: white; border-radius: 12px; padding: 1.5rem; margin-bottom: 1rem; box-shadow: 0 1px 3px rgba(0,0,0,0.1); }}
+            .card h2 {{ font-size: 1.1rem; margin-bottom: 1rem; color: #475569; }}
+            table {{ width: 100%; border-collapse: collapse; }}
+            th, td {{ text-align: left; padding: 0.75rem; border-bottom: 1px solid #e2e8f0; }}
+            th {{ color: #64748b; font-weight: 600; font-size: 0.85rem; text-transform: uppercase; }}
+            .badge {{ color: #fff; padding: 4px 12px; border-radius: 4px; font-size: 0.8rem; font-weight: 600; }}
+            code {{ background: #f1f5f9; padding: 2px 6px; border-radius: 4px; font-size: 0.9rem; }}
+            .footer {{ text-align: center; color: #94a3b8; font-size: 0.8rem; margin-top: 2rem; }}
+            .status-row td:first-child {{ font-weight: 500; }}
+          </style>
+          </head>
+          <body>
+          <div class="container">
+            <div class="header">
+              <h1>🤖 Agent Evaluation Report</h1>
+              <div>tech-trends-agent <code style="background:rgba(255,255,255,0.2);color:white">v{semver}</code></div>
+              <div class="overall" style="color:{overall_color}">{overall}</div>
+            </div>
+
+            <div class="card">
+              <h2>Agent Details</h2>
+              <table>
+                <tr><td><strong>Agent Name</strong></td><td><code>tech-trends-agent</code></td></tr>
+                <tr><td><strong>Foundry Version</strong></td><td><code>{agent_version}</code></td></tr>
+                <tr><td><strong>Semver</strong></td><td><code>{semver}</code></td></tr>
+                <tr><td><strong>Phase</strong></td><td>{phase}</td></tr>
+                <tr><td><strong>Model</strong></td><td><code>{model}</code></td></tr>
+                <tr><td><strong>Commit</strong></td><td><code>{sha}</code></td></tr>
+                <tr><td><strong>Timestamp</strong></td><td>{timestamp}</td></tr>
+              </table>
+            </div>
+
+            <div class="card">
+              <h2>Pipeline Results</h2>
+              <table>
+                <thead><tr><th>Step</th><th>Status</th><th>Details</th></tr></thead>
+                <tbody class="status-row">
+                  <tr><td>Deploy to TEST</td><td>{status_icon(deploy_outcome)} {status_badge(deploy_outcome)}</td><td>Version <code>{agent_version}</code></td></tr>
+                  <tr><td>Smoke Test</td><td>{status_icon(smoke_outcome)} {status_badge(smoke_outcome)}</td><td>Responses API invocation</td></tr>
+                  <tr><td>Foundry Evaluation</td><td>{status_icon(eval_outcome)} {status_badge(eval_outcome)}</td><td>Golden dataset evaluation</td></tr>
+                </tbody>
+              </table>
+            </div>
+
+            <div class="card">
+              <h2>Tools Configuration</h2>
+              <table>
+                <thead><tr><th>Tool</th><th>Enabled</th></tr></thead>
+                <tbody>
+                  <tr><td><code>web_search</code></td><td>{"✅" if phase in ("1", "3") else "—"}</td></tr>
+                  <tr><td><code>code_interpreter</code></td><td>{"✅" if phase in ("2", "3") else "—"}</td></tr>
+                </tbody>
+              </table>
+            </div>
+
+            <div class="footer">
+              <p>Generated by CI pipeline · {timestamp} · <a href="{run_url}">View full run</a></p>
+            </div>
+          </div>
+          </body>
+          </html>"""
+
+          os.makedirs("reports", exist_ok=True)
+          with open("reports/evaluation-report.html", "w") as f:
+              f.write(html)
+          print("Generated reports/evaluation-report.html")
+          EOF
+        env:
+          AGENT_VERSION: ${{ steps.deploy.outputs.agent_version }}
+          PHASE: ${{ steps.deploy.outputs.phase }}
+          MODEL: ${{ vars.GPT_DEPLOYMENT }}
+          SEMVER: ${{ steps.version.outputs.semver }}
+          SHA: ${{ github.sha }}
+          SMOKE_OUTCOME: ${{ steps.smoke.outcome }}
+          EVAL_OUTCOME: ${{ steps.eval.outcome }}
+          DEPLOY_OUTCOME: ${{ steps.deploy.outcome }}
+          RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
+
+      - name: Upload HTML report artifact
+        if: always()
+        uses: actions/upload-artifact@v4
+        with:
+          name: evaluation-report
+          path: reports/evaluation-report.html
diff --git a/requirements.txt b/requirements.txt
index 0e556a8..30104b5 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,6 +1,7 @@
-azure-ai-projects>=1.0.0b1
+azure-ai-projects>=2.0.0
 azure-ai-evaluation>=1.0.0b1
 azure-identity>=1.15.0
+openai>=1.66.0
 
 # Dev / Test
 pytest>=8.0
diff --git a/scripts/bootstrap.sh b/scripts/bootstrap.sh
index 9123eeb..c3690c3 100644
--- a/scripts/bootstrap.sh
+++ b/scripts/bootstrap.sh
@@ -37,7 +37,7 @@ ACCOUNT_NAME=""
 GITHUB_REPO="san360/agent-devops"
 GPT_MODEL_NAME="gpt-4o"
 GPT_MODEL_VERSION="2024-11-20"
-GPT_DEPLOYMENT_NAME="gpt-4o-2024-11-20"
+GPT_DEPLOYMENT_NAME="gpt-4o"
 GPT_CAPACITY=30
 SKIP_FOUNDRY=false
 TEST_ENDPOINT=""

From 9fd9a9f861f5d697ab5d073aace4f0bb82cdc400 Mon Sep 17 00:00:00 2001
From: sanjay singh <sanjay.singh360@gmail.com>
Date: Fri, 15 May 2026 21:29:53 +0200
Subject: [PATCH 09/11] fix: use agent.versions instead of agent.version in
 smoke test AgentDetails object returned by project.agents.get() has
 'versions' (list), not 'version' (scalar). Use versions[-1] for the latest
 version.

---
 .github/workflows/evaluate.yml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/evaluate.yml b/.github/workflows/evaluate.yml
index 7fc3cd4..8c7cc78 100644
--- a/.github/workflows/evaluate.yml
+++ b/.github/workflows/evaluate.yml
@@ -84,7 +84,8 @@ jobs:
 
           # Get agent metadata for display
           agent = project.agents.get(agent_name="tech-trends-agent")
-          print(f"Agent: {agent.name} (version: {agent.version})")
+          latest_version = agent.versions[-1] if agent.versions else "unknown"
+          print(f"Agent: {agent.name} (version: {latest_version})")
 
           # Get the OpenAI client for Responses API
           openai = project.get_openai_client()

From 9b46c101661fed27dee59f48bdf2f1e73b77cdb8 Mon Sep 17 00:00:00 2001
From: sanjay singh <sanjay.singh360@gmail.com>
Date: Fri, 15 May 2026 21:31:32 +0200
Subject: [PATCH 10/11] fix: convert agent.versions to list before negative
 indexing SDK model objects don't support negative indexing directly.

---
 .github/workflows/evaluate.yml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/evaluate.yml b/.github/workflows/evaluate.yml
index 8c7cc78..05f10a9 100644
--- a/.github/workflows/evaluate.yml
+++ b/.github/workflows/evaluate.yml
@@ -84,7 +84,8 @@ jobs:
 
           # Get agent metadata for display
           agent = project.agents.get(agent_name="tech-trends-agent")
-          latest_version = agent.versions[-1] if agent.versions else "unknown"
+          versions = list(agent.versions) if agent.versions else []
+          latest_version = versions[-1] if versions else "unknown"
           print(f"Agent: {agent.name} (version: {latest_version})")
 
           # Get the OpenAI client for Responses API

From b2f8e4bc1ddf7e95076609cbdae21c9699e59195 Mon Sep 17 00:00:00 2001
From: sanjay singh <sanjay.singh360@gmail.com>
Date: Fri, 15 May 2026 21:35:04 +0200
Subject: [PATCH 11/11] fix: access agent.versions.latest.version
 correctly\nAgentDetails.versions is AgentObjectVersions (not a list).\nThe
 correct path is agent.versions.latest.version.\nVerified locally with SDK
 model objects.

---
 .github/workflows/evaluate.yml | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/.github/workflows/evaluate.yml b/.github/workflows/evaluate.yml
index 05f10a9..3e5dbf5 100644
--- a/.github/workflows/evaluate.yml
+++ b/.github/workflows/evaluate.yml
@@ -84,8 +84,7 @@ jobs:
 
           # Get agent metadata for display
           agent = project.agents.get(agent_name="tech-trends-agent")
-          versions = list(agent.versions) if agent.versions else []
-          latest_version = versions[-1] if versions else "unknown"
+          latest_version = agent.versions.latest.version if agent.versions and agent.versions.latest else "unknown"
           print(f"Agent: {agent.name} (version: {latest_version})")
 
           # Get the OpenAI client for Responses API