diff --git a/.github/workflows/evaluate.yml b/.github/workflows/evaluate.yml
index 5e81110..3e5dbf5 100644
--- a/.github/workflows/evaluate.yml
+++ b/.github/workflows/evaluate.yml
@@ -49,7 +49,6 @@ jobs:
env:
FOUNDRY_TEST_ENDPOINT: ${{ vars.FOUNDRY_TEST_ENDPOINT }}
GPT_DEPLOYMENT: ${{ vars.GPT_DEPLOYMENT }}
- BING_CONNECTION_NAME: ${{ vars.BING_CONNECTION_NAME }}
run: |
TOOLS=$(python3 -c "
import json
@@ -67,6 +66,59 @@ jobs:
--tools "$TOOLS"
echo "phase=$PHASE" >> $GITHUB_OUTPUT
+ - name: Smoke test — invoke agent and verify response
+ id: smoke
+ env:
+ FOUNDRY_TEST_ENDPOINT: ${{ vars.FOUNDRY_TEST_ENDPOINT }}
+ run: |
+ python3 << 'EOF'
+ import os
+ from azure.ai.projects import AIProjectClient
+ from azure.identity import DefaultAzureCredential
+
+ # Create the project client
+ project = AIProjectClient(
+ endpoint=os.environ["FOUNDRY_TEST_ENDPOINT"],
+ credential=DefaultAzureCredential()
+ )
+
+ # Get agent metadata for display
+ agent = project.agents.get(agent_name="tech-trends-agent")
+ latest_version = agent.versions.latest.version if agent.versions and agent.versions.latest else "unknown"
+ print(f"Agent: {agent.name} (version: {latest_version})")
+
+ # Get the OpenAI client for Responses API
+ openai = project.get_openai_client()
+
+ # Invoke the agent using the Responses API with agent_reference
+ response = openai.responses.create(
+ input="What are the latest trends in AI?",
+ extra_body={
+ "agent_reference": {
+ "name": "tech-trends-agent",
+ "type": "agent_reference",
+ }
+ },
+ )
+
+ output = response.output_text
+ print(f"Response ID: {response.id}")
+
+ if len(output) < 50:
+ print(f"FAIL: Response too short ({len(output)} chars)")
+ raise SystemExit(1)
+
+ print(f"PASS: Agent responded ({len(output)} chars)")
+ print(f"Preview: {output[:300]}...")
+
+ # Write smoke test result for downstream steps
+ gh_output = os.environ.get("GITHUB_OUTPUT", "")
+ if gh_output:
+ with open(gh_output, "a") as f:
+ f.write(f"response_length={len(output)}\n")
+ f.write(f"response_preview={output[:200]}\n")
+ EOF
+
- name: Run Foundry evaluation
id: eval
uses: microsoft/ai-agent-evals@v3-beta
@@ -82,16 +134,222 @@ jobs:
uses: actions/github-script@v7
with:
script: |
- const fs = require('fs');
- let body = '## Agent Evaluation Results\n\n';
- body += `**Phase:** ${{ steps.deploy.outputs.phase }}\n`;
- body += `**Model:** ${{ vars.GPT_DEPLOYMENT }}\n`;
- body += `**Commit:** ${context.sha.slice(0,7)}\n\n`;
- body += 'Full results are in the [Actions summary](' +
- `${context.serverUrl}/${context.repo.owner}/${context.repo.repo}/actions/runs/${context.runId}).\n`;
- github.rest.issues.createComment({
- issue_number: context.issue.number,
+ const marker = '';
+ const smokeOutcome = '${{ steps.smoke.outcome }}';
+ const evalOutcome = '${{ steps.eval.outcome }}';
+ const deployOutcome = '${{ steps.deploy.outcome }}';
+ const agentVersion = '${{ steps.deploy.outputs.agent_version }}' || 'N/A';
+ const phase = '${{ steps.deploy.outputs.phase }}' || 'N/A';
+ const model = '${{ vars.GPT_DEPLOYMENT }}' || 'N/A';
+ const semver = '${{ steps.version.outputs.semver }}';
+ const sha = context.sha.slice(0, 7);
+ const runUrl = `${context.serverUrl}/${context.repo.owner}/${context.repo.repo}/actions/runs/${context.runId}`;
+ const timestamp = new Date().toISOString().replace('T', ' ').slice(0, 19) + ' UTC';
+
+ const icon = (outcome) => outcome === 'success' ? '✅' : outcome === 'failure' ? '❌' : '⚠️';
+ const badge = (outcome) => outcome === 'success'
+ ? 'PASSED'
+ : outcome === 'failure'
+ ? 'FAILED'
+ : 'SKIPPED';
+
+ const overallStatus = (smokeOutcome === 'success' && evalOutcome === 'success') ? 'PASSED' : 'FAILED';
+ const overallIcon = overallStatus === 'PASSED' ? '✅' : '❌';
+ const overallColor = overallStatus === 'PASSED' ? '#22c55e' : '#ef4444';
+
+ let body = `${marker}\n`;
+ body += `## ${overallIcon} Agent Deployment & Evaluation Report\n\n`;
+
+ // Agent Details Table
+ body += `### 🤖 Agent Details\n\n`;
+ body += `| Property | Value |\n`;
+ body += `|----------|-------|\n`;
+ body += `| **Agent** | \`tech-trends-agent\` |\n`;
+ body += `| **Version** | \`${agentVersion}\` |\n`;
+ body += `| **Semver** | \`${semver}\` |\n`;
+ body += `| **Phase** | ${phase} |\n`;
+ body += `| **Model** | \`${model}\` |\n`;
+ body += `| **Commit** | \`${sha}\` |\n`;
+ body += `| **Timestamp** | ${timestamp} |\n\n`;
+
+ // Pipeline Results
+ body += `### 📊 Pipeline Results\n\n`;
+ body += `| Step | Status | Details |\n`;
+ body += `|------|--------|----------|\n`;
+ body += `| Deploy to TEST | ${icon(deployOutcome)} ${badge(deployOutcome)} | Agent version \`${agentVersion}\` deployed |\n`;
+ body += `| Smoke Test | ${icon(smokeOutcome)} ${badge(smokeOutcome)} | Invoked agent via Responses API |\n`;
+ body += `| Foundry Evaluation | ${icon(evalOutcome)} ${badge(evalOutcome)} | Evaluated with golden dataset |\n\n`;
+
+ // Tools Configuration
+ body += `### 🛠️ Tools Configuration\n\n`;
+ body += `| Tool | Enabled |\n`;
+ body += `|------|----------|\n`;
+ const toolsInPhase = phase === '2' ? ['code_interpreter'] : phase === '1' ? ['web_search'] : ['web_search', 'code_interpreter'];
+ const allTools = ['web_search', 'code_interpreter'];
+ for (const tool of allTools) {
+ const enabled = toolsInPhase.includes(tool) ? '✅' : '—';
+ body += `| \`${tool}\` | ${enabled} |\n`;
+ }
+ body += '\n';
+
+ // Links
+ body += `### 🔗 Links\n\n`;
+ body += `- [📋 Full Actions Run](${runUrl})\n`;
+ body += `- [📁 Artifacts](${runUrl}#artifacts)\n\n`;
+
+ // Footer
+ body += `---\n`;
+ body += `🤖 Updated automatically by the CI pipeline · ${timestamp}\n`;
+
+ // Find existing comment with marker and update, or create new
+ const { data: comments } = await github.rest.issues.listComments({
owner: context.repo.owner,
repo: context.repo.repo,
- body
+ issue_number: context.issue.number,
});
+
+ const existingComment = comments.find(c => c.body.includes(marker));
+
+ if (existingComment) {
+ await github.rest.issues.updateComment({
+ owner: context.repo.owner,
+ repo: context.repo.repo,
+ comment_id: existingComment.id,
+ body,
+ });
+ console.log(`Updated existing comment #${existingComment.id}`);
+ } else {
+ await github.rest.issues.createComment({
+ owner: context.repo.owner,
+ repo: context.repo.repo,
+ issue_number: context.issue.number,
+ body,
+ });
+ console.log('Created new evaluation comment');
+ }
+
+ - name: Generate HTML report artifact
+ if: always()
+ run: |
+ python3 << 'EOF'
+ import os, json
+ from datetime import datetime, timezone
+
+ agent_version = os.environ.get("AGENT_VERSION", "N/A")
+ phase = os.environ.get("PHASE", "N/A")
+ model = os.environ.get("MODEL", "N/A")
+ semver = os.environ.get("SEMVER", "N/A")
+ sha = os.environ.get("SHA", "N/A")[:7]
+ smoke_outcome = os.environ.get("SMOKE_OUTCOME", "unknown")
+ eval_outcome = os.environ.get("EVAL_OUTCOME", "unknown")
+ deploy_outcome = os.environ.get("DEPLOY_OUTCOME", "unknown")
+ run_url = os.environ.get("RUN_URL", "#")
+ timestamp = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M:%S UTC")
+
+ def status_badge(outcome):
+ colors = {"success": ("#22c55e", "PASSED"), "failure": ("#ef4444", "FAILED")}
+ color, label = colors.get(outcome, ("#f59e0b", "SKIPPED"))
+ return f'{label}'
+
+ def status_icon(outcome):
+ return {"success": "✅", "failure": "❌"}.get(outcome, "⚠️")
+
+ overall = "PASSED" if smoke_outcome == "success" and eval_outcome == "success" else "FAILED"
+ overall_color = "#22c55e" if overall == "PASSED" else "#ef4444"
+
+ html = f"""
+
+
+
+ Agent Evaluation Report — tech-trends-agent v{semver}
+
+
+
+
+
+
+
+
Agent Details
+
+ | Agent Name | tech-trends-agent |
+ | Foundry Version | {agent_version} |
+ | Semver | {semver} |
+ | Phase | {phase} |
+ | Model | {model} |
+ | Commit | {sha} |
+ | Timestamp | {timestamp} |
+
+
+
+
+
Pipeline Results
+
+ | Step | Status | Details |
+
+ | Deploy to TEST | {status_icon(deploy_outcome)} {status_badge(deploy_outcome)} | Version {agent_version} |
+ | Smoke Test | {status_icon(smoke_outcome)} {status_badge(smoke_outcome)} | Responses API invocation |
+ | Foundry Evaluation | {status_icon(eval_outcome)} {status_badge(eval_outcome)} | Golden dataset evaluation |
+
+
+
+
+
+
Tools Configuration
+
+ | Tool | Enabled |
+
+ web_search | {"✅" if phase in ("1", "3") else "—"} |
+ code_interpreter | {"✅" if phase in ("2", "3") else "—"} |
+
+
+
+
+
+
+
+ """
+
+ os.makedirs("reports", exist_ok=True)
+ with open("reports/evaluation-report.html", "w") as f:
+ f.write(html)
+ print("Generated reports/evaluation-report.html")
+ EOF
+ env:
+ AGENT_VERSION: ${{ steps.deploy.outputs.agent_version }}
+ PHASE: ${{ steps.deploy.outputs.phase }}
+ MODEL: ${{ vars.GPT_DEPLOYMENT }}
+ SEMVER: ${{ steps.version.outputs.semver }}
+ SHA: ${{ github.sha }}
+ SMOKE_OUTCOME: ${{ steps.smoke.outcome }}
+ EVAL_OUTCOME: ${{ steps.eval.outcome }}
+ DEPLOY_OUTCOME: ${{ steps.deploy.outcome }}
+ RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
+
+ - name: Upload HTML report artifact
+ if: always()
+ uses: actions/upload-artifact@v4
+ with:
+ name: evaluation-report
+ path: reports/evaluation-report.html
diff --git a/.github/workflows/monitor.yml b/.github/workflows/monitor.yml
index cd9bbf3..9972715 100644
--- a/.github/workflows/monitor.yml
+++ b/.github/workflows/monitor.yml
@@ -41,8 +41,9 @@ jobs:
from azure.ai.projects import AIProjectClient
from azure.identity import DefaultAzureCredential
client = AIProjectClient(endpoint=os.environ['FOUNDRY_PROD_ENDPOINT'], credential=DefaultAzureCredential())
- agent = client.agents.get_agent('tech-trends-agent')
- print(agent.version)
+ versions = client.agents.list_versions(agent_name='tech-trends-agent')
+ latest = max(versions, key=lambda v: int(v.version))
+ print(latest.version)
")
echo "version=$VERSION" >> $GITHUB_OUTPUT
diff --git a/README.md b/README.md
index 6ec7b2c..717a6e9 100644
--- a/README.md
+++ b/README.md
@@ -5,8 +5,8 @@ versioned prompts, tool changes, model upgrades, evaluation gates, and rollback.
## Agent: Technology Trend Research & Analysis
-- **Phase 1:** Web search only (Bing Grounding)
-- **Phase 2:** Web search + Code Interpreter for data analysis
+- **Phase 1:** Web search only (`web_search` tool)
+- **Phase 2:** Code Interpreter only (`code_interpreter` tool) for data analysis
## Repository Structure
@@ -60,9 +60,8 @@ This creates:
- An App Registration with a Service Principal
- 3 federated credentials for GitHub OIDC (main branch, pull requests, tags)
- RBAC role assignments (Azure AI User, Cognitive Services OpenAI User)
-- 7 GitHub repository variables (`AZURE_CLIENT_ID`, `AZURE_TENANT_ID`, `AZURE_SUBSCRIPTION_ID`, `FOUNDRY_TEST_ENDPOINT`, `FOUNDRY_PROD_ENDPOINT`, `GPT_DEPLOYMENT`, `BING_CONNECTION_NAME`)
-
-After bootstrap completes, manually configure the Bing Grounding connection in both Foundry projects via the [Azure AI Foundry portal](https://ai.azure.com) (Project > Connections > + New).
+- Model availability validation (checks current + upgrade target model)
+- 6 GitHub repository variables (`AZURE_CLIENT_ID`, `AZURE_TENANT_ID`, `AZURE_SUBSCRIPTION_ID`, `FOUNDRY_TEST_ENDPOINT`, `FOUNDRY_PROD_ENDPOINT`, `GPT_DEPLOYMENT`)
State is saved to `.bootstrap-state.json` for use by the teardown script.
@@ -88,7 +87,7 @@ Three scripts simulate the full agent lifecycle by creating PRs that trigger the
```
- Creates branch `feature/phase1-web-search`
-- Configures the agent with Bing Grounding (web search) only
+- Configures the agent with the `web_search` tool
- Evaluation runs 5 Phase 1 test cases
- Opens a PR — `evaluate.yml` triggers, deploys to TEST, runs eval
@@ -101,7 +100,7 @@ Three scripts simulate the full agent lifecycle by creating PRs that trigger the
```
- Creates branch `feature/phase2-code-interpreter` from updated `main`
-- Adds `code_interpreter` tool alongside `bing_grounding`
+- Replaces `web_search` with `code_interpreter` tool
- Extends the system prompt with a `## Data Analysis` section
- Evaluation now runs all 8 test cases (Phase 1 + Phase 2) — checks for regressions
- Opens a PR
@@ -115,10 +114,15 @@ Three scripts simulate the full agent lifecycle by creating PRs that trigger the
```
- Creates branch `chore/model-upgrade-gpt41`
+- Upgrades model from `gpt-4o-2024-11-20` (default) to `gpt-4.1`
- Updates the `GPT_DEPLOYMENT` GitHub variable to `gpt-4.1`
- Adds a model history entry in the agent config
- Opens a PR — the eval gate verifies the new model scores at or above thresholds
+The bootstrap script validates that both the current model and the upgrade target
+(`gpt-4.1`) are available in your chosen Azure region. If `gpt-4.1` is not available,
+the script will list alternatives you can use instead.
+
**After the eval passes, merge the PR.** The full lifecycle demo is complete.
### Lifecycle Flow Diagram
@@ -163,7 +167,7 @@ az login
# 4. Deploy to test
source .env # or export vars manually
-python scripts/deploy_agent.py --env test --semver 1.0.0 --tools bing_grounding
+python scripts/deploy_agent.py --env test --semver 1.0.0 --tools web_search
```
## CI/CD Workflows
@@ -183,6 +187,14 @@ The eval gate uses `microsoft/ai-agent-evals@v3-beta` with four evaluators:
- **Groundedness** (threshold: 0.75)
- **Coherence** (threshold: 0.80)
+A smoke test step runs before evaluation — it invokes the agent with a test query
+and verifies a valid response is returned.
+
+**Note on evaluation naming:** The `ai-agent-evals` action creates a new evaluation
+group named "Agent Evaluation" on every run (custom names not yet supported). Each run
+is named `Agent tech-trends-agent:`. The PR comment includes the commit SHA
+for traceability.
+
## Rollback
```bash
@@ -194,7 +206,7 @@ Re-deploys the exact prompt, tools, and model from a saved artifact.
## Model Comparison
```bash
-python scripts/compare_models.py --current gpt-4o-2024-11-20 --candidate gpt-4.1 --tools bing_grounding
+python scripts/compare_models.py --current gpt-4o-2024-11-20 --candidate gpt-4.1 --tools web_search
```
Deploys both model versions to test for side-by-side evaluation.
diff --git a/agents/tech-trends-agent.json b/agents/tech-trends-agent.json
index 9d6b181..1bb67ce 100644
--- a/agents/tech-trends-agent.json
+++ b/agents/tech-trends-agent.json
@@ -1,16 +1,16 @@
{
"agent_name": "tech-trends-agent",
- "phase": "1",
+ "phase": "2",
"definition": {
"model": "${GPT_DEPLOYMENT}",
"instructions_file": "prompts/tech-trends-agent.md",
"tools": [
- { "type": "web_search" }
+ { "type": "code_interpreter" }
]
},
"eval": {
"dataset": "evals/golden-dataset.json",
- "phase_filter": "1",
+ "phase_filter": null,
"config": "evals/eval-config.json"
},
"_model_history": [
diff --git a/evals/eval-config.json b/evals/eval-config.json
index 5cafb0a..832031e 100644
--- a/evals/eval-config.json
+++ b/evals/eval-config.json
@@ -1,9 +1,9 @@
{
"evaluators": [
- "builtin.task_adherence",
- "builtin.relevance",
- "builtin.groundedness",
- "builtin.coherence"
+ "TaskAdherenceEvaluator",
+ "RelevanceEvaluator",
+ "GroundednessEvaluator",
+ "CoherenceEvaluator"
],
"thresholds": {
"task_adherence": 0.80,
@@ -11,6 +11,6 @@
"groundedness": 0.75,
"coherence": 0.80
},
- "phase_filter": "1",
- "notes": "Phase 1: Only web search queries evaluated. Phase 2 data analysis queries excluded."
+ "phase_filter": null,
+ "notes": "Phase 2: All queries evaluated — both web search (Phase 1) and data analysis (Phase 2)."
}
diff --git a/prompts/tech-trends-agent.md b/prompts/tech-trends-agent.md
index e01c7e9..da79c14 100644
--- a/prompts/tech-trends-agent.md
+++ b/prompts/tech-trends-agent.md
@@ -27,3 +27,15 @@ Always structure responses as:
## Tone
Professional, objective, and jargon-aware. Assume the user is a technology
professional who does not need basic concepts explained.
+
+## Data Analysis (Phase 2)
+You now have access to a code interpreter. Use it when:
+- The user asks you to calculate, compare, or rank numerical data
+- You have retrieved structured data (tables, CSVs) and analysis would add value
+- You need to produce a formatted comparison table from raw information
+
+When using code interpreter:
+1. First retrieve the data via web search
+2. Then write and run Python code to process or compare it
+3. Present results with the code output clearly labelled
+4. Always show the source of the raw data alongside the computed result
diff --git a/requirements.txt b/requirements.txt
index 0e556a8..30104b5 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,6 +1,7 @@
-azure-ai-projects>=1.0.0b1
+azure-ai-projects>=2.0.0
azure-ai-evaluation>=1.0.0b1
azure-identity>=1.15.0
+openai>=1.66.0
# Dev / Test
pytest>=8.0
diff --git a/scripts/bootstrap.sh b/scripts/bootstrap.sh
index 05ef463..c3690c3 100644
--- a/scripts/bootstrap.sh
+++ b/scripts/bootstrap.sh
@@ -37,9 +37,8 @@ ACCOUNT_NAME=""
GITHUB_REPO="san360/agent-devops"
GPT_MODEL_NAME="gpt-4o"
GPT_MODEL_VERSION="2024-11-20"
-GPT_DEPLOYMENT_NAME="gpt-4o-2024-11-20"
+GPT_DEPLOYMENT_NAME="gpt-4o"
GPT_CAPACITY=30
-BING_CONNECTION_NAME="bing-grounding"
SKIP_FOUNDRY=false
TEST_ENDPOINT=""
PROD_ENDPOINT=""
@@ -95,7 +94,7 @@ echo "============================================"
echo ""
# ---------- Step 1: Resource Group ----------
-echo "[1/7] Creating resource group..."
+echo "[1/8] Creating resource group..."
az group create \
--name "$RESOURCE_GROUP" \
--location "$LOCATION" \
@@ -103,10 +102,10 @@ az group create \
# ---------- Step 2 & 3: Deploy Foundry projects (or skip) ----------
if [[ "$SKIP_FOUNDRY" == true ]]; then
- echo "[2/7] Skipping TEST Foundry project (using provided endpoint)"
- echo "[3/7] Skipping PROD Foundry project (using provided endpoint)"
+ echo "[2/8] Skipping TEST Foundry project (using provided endpoint)"
+ echo "[3/8] Skipping PROD Foundry project (using provided endpoint)"
else
- echo "[2/7] Deploying TEST Foundry project..."
+ echo "[2/8] Deploying TEST Foundry project..."
TEST_OUTPUT=$(az deployment group create \
--resource-group "$RESOURCE_GROUP" \
--template-file infra/main.bicep \
@@ -122,7 +121,7 @@ else
TEST_ENDPOINT=$(echo "$TEST_OUTPUT" | python3 -c "import sys,json; print(json.load(sys.stdin)['properties']['outputs']['projectEndpoint']['value'])")
echo " TEST endpoint: $TEST_ENDPOINT"
- echo "[3/7] Deploying PROD Foundry project..."
+ echo "[3/8] Deploying PROD Foundry project..."
PROD_OUTPUT=$(az deployment group create \
--resource-group "$RESOURCE_GROUP" \
--template-file infra/main.bicep \
@@ -140,7 +139,56 @@ else
fi
# ---------- Step 4: App Registration + Service Principal ----------
-echo "[4/7] Creating App Registration and Service Principal..."
+echo "[4/8] Validating model availability..."
+echo " Checking if '$GPT_MODEL_NAME' (version: $GPT_MODEL_VERSION) is available in $LOCATION..."
+
+AVAILABLE_MODELS=$(az cognitiveservices model list \
+ --location "$LOCATION" \
+ --query "[?model.name=='$GPT_MODEL_NAME' && model.version=='$GPT_MODEL_VERSION'].model.name" \
+ -o tsv 2>/dev/null || echo "")
+
+if [[ -z "$AVAILABLE_MODELS" ]]; then
+ echo ""
+ echo " WARNING: Model '$GPT_MODEL_NAME' version '$GPT_MODEL_VERSION' not found in $LOCATION."
+ echo " Available GPT models in $LOCATION:"
+ az cognitiveservices model list \
+ --location "$LOCATION" \
+ --query "[?model.name.starts_with(@,'gpt')].{name:model.name, version:model.version}" \
+ -o table 2>/dev/null || echo " (Could not list models — check permissions)"
+ echo ""
+ echo " The model upgrade lifecycle demo (Phase 3) requires a second model."
+ echo " You can continue, but ensure GPT_DEPLOYMENT points to a valid model."
+ echo ""
+ read -rp " Continue anyway? [y/N] " confirm
+ if [[ "$confirm" != "y" && "$confirm" != "Y" ]]; then
+ echo "Aborted."
+ exit 1
+ fi
+else
+ echo " ✓ Model '$GPT_MODEL_NAME' version '$GPT_MODEL_VERSION' is available in $LOCATION"
+fi
+
+# Check for upgrade target model (gpt-4.1) availability for Phase 3 demo
+UPGRADE_MODEL="gpt-4.1"
+UPGRADE_AVAILABLE=$(az cognitiveservices model list \
+ --location "$LOCATION" \
+ --query "[?model.name=='$UPGRADE_MODEL'].model.name | [0]" \
+ -o tsv 2>/dev/null || echo "")
+
+if [[ -n "$UPGRADE_AVAILABLE" ]]; then
+ echo " ✓ Upgrade target '$UPGRADE_MODEL' is also available (Phase 3 model upgrade demo ready)"
+else
+ echo " ⚠ Upgrade target '$UPGRADE_MODEL' not found in $LOCATION."
+ echo " Phase 3 model upgrade demo may need a different target model."
+ echo " Available models:"
+ az cognitiveservices model list \
+ --location "$LOCATION" \
+ --query "[?model.name.starts_with(@,'gpt')].{name:model.name, version:model.version}" \
+ -o table 2>/dev/null || true
+fi
+
+# ---------- Step 5: App Registration + Service Principal ----------
+echo "[5/8] Creating App Registration and Service Principal..."
APP_ID=$(az ad app create \
--display-name "$APP_DISPLAY_NAME" \
--query appId -o tsv)
@@ -149,8 +197,8 @@ SP_OBJ_ID=$(az ad sp create --id "$APP_ID" --query id -o tsv)
echo " Client ID: $APP_ID"
echo " SP Object ID: $SP_OBJ_ID"
-# ---------- Step 5: Federated Credentials ----------
-echo "[5/7] Adding federated credentials..."
+# ---------- Step 6: Federated Credentials ----------
+echo "[6/8] Adding federated credentials..."
az ad app federated-credential create \
--id "$APP_ID" \
@@ -182,8 +230,8 @@ az ad app federated-credential create \
}" --output none
echo " + release tag credential"
-# ---------- Step 6: RBAC Role Assignments ----------
-echo "[6/7] Assigning RBAC roles..."
+# ---------- Step 7: RBAC Role Assignments ----------
+echo "[7/8] Assigning RBAC roles..."
SUBSCRIPTION_ID=$(az account show --query id -o tsv)
SCOPE="/subscriptions/$SUBSCRIPTION_ID/resourceGroups/$RESOURCE_GROUP"
@@ -219,8 +267,8 @@ if [[ -n "$TEST_ENDPOINT" ]]; then
fi
fi
-# ---------- Step 7: GitHub Variables ----------
-echo "[7/7] Setting GitHub repository variables..."
+# ---------- Step 8: GitHub Variables ----------
+echo "[8/8] Setting GitHub repository variables..."
TENANT_ID=$(az account show --query tenantId -o tsv)
gh variable set AZURE_CLIENT_ID --body "$APP_ID" --repo "$GITHUB_REPO"
@@ -229,8 +277,7 @@ gh variable set AZURE_SUBSCRIPTION_ID --body "$SUBSCRIPTION_ID" --repo "$GITH
gh variable set FOUNDRY_TEST_ENDPOINT --body "$TEST_ENDPOINT" --repo "$GITHUB_REPO"
gh variable set FOUNDRY_PROD_ENDPOINT --body "$PROD_ENDPOINT" --repo "$GITHUB_REPO"
gh variable set GPT_DEPLOYMENT --body "$GPT_DEPLOYMENT_NAME" --repo "$GITHUB_REPO"
-gh variable set BING_CONNECTION_NAME --body "$BING_CONNECTION_NAME" --repo "$GITHUB_REPO"
-echo " Set 7 variables on $GITHUB_REPO"
+echo " Set 6 variables on $GITHUB_REPO"
# ---------- Summary ----------
echo ""
@@ -252,13 +299,11 @@ echo " Tenant ID: $TENANT_ID"
echo " Subscription ID: $SUBSCRIPTION_ID"
echo ""
echo " GitHub ($GITHUB_REPO):"
-echo " 7 repository variables set"
+echo " 6 repository variables set"
echo " 3 federated credentials configured"
echo ""
echo " Next steps:"
-echo " 1. Configure Bing Grounding connection in both Foundry projects"
-echo " (Portal: ai.azure.com -> project -> Connections -> + New)"
-echo " 2. Run lifecycle scripts in order:"
+echo " 1. Run lifecycle scripts in order:"
echo " ./scripts/lifecycle/01-phase1-web-search.sh"
echo " ./scripts/lifecycle/02-phase2-code-interpreter.sh"
echo " ./scripts/lifecycle/03-model-upgrade.sh"
@@ -301,9 +346,6 @@ FOUNDRY_PROD_ENDPOINT=$PROD_ENDPOINT
# Model deployment
GPT_DEPLOYMENT=$GPT_DEPLOYMENT_NAME
-# Bing Grounding connection name
-BING_CONNECTION_NAME=$BING_CONNECTION_NAME
-
# Resource metadata
RESOURCE_GROUP=$RESOURCE_GROUP
LOCATION=$LOCATION
diff --git a/scripts/lifecycle/02-phase2-code-interpreter.sh b/scripts/lifecycle/02-phase2-code-interpreter.sh
index c2df73e..9fad89b 100644
--- a/scripts/lifecycle/02-phase2-code-interpreter.sh
+++ b/scripts/lifecycle/02-phase2-code-interpreter.sh
@@ -26,7 +26,7 @@ git pull origin main
# Create feature branch
git checkout -b "$BRANCH"
-# --- Agent config: Phase 2, add code_interpreter ---
+# --- Agent config: Phase 2, code_interpreter only ---
cat > agents/tech-trends-agent.json << 'AGENT_EOF'
{
"agent_name": "tech-trends-agent",
@@ -35,7 +35,6 @@ cat > agents/tech-trends-agent.json << 'AGENT_EOF'
"model": "${GPT_DEPLOYMENT}",
"instructions_file": "prompts/tech-trends-agent.md",
"tools": [
- { "type": "bing_grounding" },
{ "type": "code_interpreter" }
]
},
@@ -126,14 +125,14 @@ PR_URL=$(gh pr create \
--title "Phase 2: Add Code Interpreter for Data Analysis" \
--body "$(cat <<'PR_EOF'
## Summary
-- Adds `code_interpreter` tool alongside existing `bing_grounding`
+- Replaces `web_search` tool with `code_interpreter` for data analysis
- Extends system prompt with `## Data Analysis` section
- Evaluation now runs **all 8 queries** (Phase 1 + Phase 2)
## Changes
| File | Change |
|---|---|
-| `agents/tech-trends-agent.json` | Added `code_interpreter` to tools, phase → `"2"` |
+| `agents/tech-trends-agent.json` | Replaced `web_search` with `code_interpreter`, phase → `"2"` |
| `prompts/tech-trends-agent.md` | Added `## Data Analysis (Phase 2)` section |
| `evals/eval-config.json` | `phase_filter` → `null` (run all cases) |
@@ -156,7 +155,7 @@ echo " PR created: $PR_URL"
echo "============================================"
echo ""
echo " The evaluate.yml workflow will now:"
-echo " 1. Deploy Phase 2 agent to TEST (both tools)"
+echo " 1. Deploy Phase 2 agent to TEST (code_interpreter only)"
echo " 2. Run ALL 8 eval queries (Phase 1 + Phase 2)"
echo " 3. Check for regressions on existing Phase 1 queries"
echo ""