diff --git a/.github/workflows/evaluate.yml b/.github/workflows/evaluate.yml index 5e81110..3e5dbf5 100644 --- a/.github/workflows/evaluate.yml +++ b/.github/workflows/evaluate.yml @@ -49,7 +49,6 @@ jobs: env: FOUNDRY_TEST_ENDPOINT: ${{ vars.FOUNDRY_TEST_ENDPOINT }} GPT_DEPLOYMENT: ${{ vars.GPT_DEPLOYMENT }} - BING_CONNECTION_NAME: ${{ vars.BING_CONNECTION_NAME }} run: | TOOLS=$(python3 -c " import json @@ -67,6 +66,59 @@ jobs: --tools "$TOOLS" echo "phase=$PHASE" >> $GITHUB_OUTPUT + - name: Smoke test — invoke agent and verify response + id: smoke + env: + FOUNDRY_TEST_ENDPOINT: ${{ vars.FOUNDRY_TEST_ENDPOINT }} + run: | + python3 << 'EOF' + import os + from azure.ai.projects import AIProjectClient + from azure.identity import DefaultAzureCredential + + # Create the project client + project = AIProjectClient( + endpoint=os.environ["FOUNDRY_TEST_ENDPOINT"], + credential=DefaultAzureCredential() + ) + + # Get agent metadata for display + agent = project.agents.get(agent_name="tech-trends-agent") + latest_version = agent.versions.latest.version if agent.versions and agent.versions.latest else "unknown" + print(f"Agent: {agent.name} (version: {latest_version})") + + # Get the OpenAI client for Responses API + openai = project.get_openai_client() + + # Invoke the agent using the Responses API with agent_reference + response = openai.responses.create( + input="What are the latest trends in AI?", + extra_body={ + "agent_reference": { + "name": "tech-trends-agent", + "type": "agent_reference", + } + }, + ) + + output = response.output_text + print(f"Response ID: {response.id}") + + if len(output) < 50: + print(f"FAIL: Response too short ({len(output)} chars)") + raise SystemExit(1) + + print(f"PASS: Agent responded ({len(output)} chars)") + print(f"Preview: {output[:300]}...") + + # Write smoke test result for downstream steps + gh_output = os.environ.get("GITHUB_OUTPUT", "") + if gh_output: + with open(gh_output, "a") as f: + f.write(f"response_length={len(output)}\n") + f.write(f"response_preview={output[:200]}\n") + EOF + - name: Run Foundry evaluation id: eval uses: microsoft/ai-agent-evals@v3-beta @@ -82,16 +134,222 @@ jobs: uses: actions/github-script@v7 with: script: | - const fs = require('fs'); - let body = '## Agent Evaluation Results\n\n'; - body += `**Phase:** ${{ steps.deploy.outputs.phase }}\n`; - body += `**Model:** ${{ vars.GPT_DEPLOYMENT }}\n`; - body += `**Commit:** ${context.sha.slice(0,7)}\n\n`; - body += 'Full results are in the [Actions summary](' + - `${context.serverUrl}/${context.repo.owner}/${context.repo.repo}/actions/runs/${context.runId}).\n`; - github.rest.issues.createComment({ - issue_number: context.issue.number, + const marker = ''; + const smokeOutcome = '${{ steps.smoke.outcome }}'; + const evalOutcome = '${{ steps.eval.outcome }}'; + const deployOutcome = '${{ steps.deploy.outcome }}'; + const agentVersion = '${{ steps.deploy.outputs.agent_version }}' || 'N/A'; + const phase = '${{ steps.deploy.outputs.phase }}' || 'N/A'; + const model = '${{ vars.GPT_DEPLOYMENT }}' || 'N/A'; + const semver = '${{ steps.version.outputs.semver }}'; + const sha = context.sha.slice(0, 7); + const runUrl = `${context.serverUrl}/${context.repo.owner}/${context.repo.repo}/actions/runs/${context.runId}`; + const timestamp = new Date().toISOString().replace('T', ' ').slice(0, 19) + ' UTC'; + + const icon = (outcome) => outcome === 'success' ? '✅' : outcome === 'failure' ? '❌' : '⚠️'; + const badge = (outcome) => outcome === 'success' + ? 'PASSED' + : outcome === 'failure' + ? 'FAILED' + : 'SKIPPED'; + + const overallStatus = (smokeOutcome === 'success' && evalOutcome === 'success') ? 'PASSED' : 'FAILED'; + const overallIcon = overallStatus === 'PASSED' ? '✅' : '❌'; + const overallColor = overallStatus === 'PASSED' ? '#22c55e' : '#ef4444'; + + let body = `${marker}\n`; + body += `## ${overallIcon} Agent Deployment & Evaluation Report\n\n`; + + // Agent Details Table + body += `### 🤖 Agent Details\n\n`; + body += `| Property | Value |\n`; + body += `|----------|-------|\n`; + body += `| **Agent** | \`tech-trends-agent\` |\n`; + body += `| **Version** | \`${agentVersion}\` |\n`; + body += `| **Semver** | \`${semver}\` |\n`; + body += `| **Phase** | ${phase} |\n`; + body += `| **Model** | \`${model}\` |\n`; + body += `| **Commit** | \`${sha}\` |\n`; + body += `| **Timestamp** | ${timestamp} |\n\n`; + + // Pipeline Results + body += `### 📊 Pipeline Results\n\n`; + body += `| Step | Status | Details |\n`; + body += `|------|--------|----------|\n`; + body += `| Deploy to TEST | ${icon(deployOutcome)} ${badge(deployOutcome)} | Agent version \`${agentVersion}\` deployed |\n`; + body += `| Smoke Test | ${icon(smokeOutcome)} ${badge(smokeOutcome)} | Invoked agent via Responses API |\n`; + body += `| Foundry Evaluation | ${icon(evalOutcome)} ${badge(evalOutcome)} | Evaluated with golden dataset |\n\n`; + + // Tools Configuration + body += `### 🛠️ Tools Configuration\n\n`; + body += `| Tool | Enabled |\n`; + body += `|------|----------|\n`; + const toolsInPhase = phase === '2' ? ['code_interpreter'] : phase === '1' ? ['web_search'] : ['web_search', 'code_interpreter']; + const allTools = ['web_search', 'code_interpreter']; + for (const tool of allTools) { + const enabled = toolsInPhase.includes(tool) ? '✅' : '—'; + body += `| \`${tool}\` | ${enabled} |\n`; + } + body += '\n'; + + // Links + body += `### 🔗 Links\n\n`; + body += `- [📋 Full Actions Run](${runUrl})\n`; + body += `- [📁 Artifacts](${runUrl}#artifacts)\n\n`; + + // Footer + body += `---\n`; + body += `🤖 Updated automatically by the CI pipeline · ${timestamp}\n`; + + // Find existing comment with marker and update, or create new + const { data: comments } = await github.rest.issues.listComments({ owner: context.repo.owner, repo: context.repo.repo, - body + issue_number: context.issue.number, }); + + const existingComment = comments.find(c => c.body.includes(marker)); + + if (existingComment) { + await github.rest.issues.updateComment({ + owner: context.repo.owner, + repo: context.repo.repo, + comment_id: existingComment.id, + body, + }); + console.log(`Updated existing comment #${existingComment.id}`); + } else { + await github.rest.issues.createComment({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: context.issue.number, + body, + }); + console.log('Created new evaluation comment'); + } + + - name: Generate HTML report artifact + if: always() + run: | + python3 << 'EOF' + import os, json + from datetime import datetime, timezone + + agent_version = os.environ.get("AGENT_VERSION", "N/A") + phase = os.environ.get("PHASE", "N/A") + model = os.environ.get("MODEL", "N/A") + semver = os.environ.get("SEMVER", "N/A") + sha = os.environ.get("SHA", "N/A")[:7] + smoke_outcome = os.environ.get("SMOKE_OUTCOME", "unknown") + eval_outcome = os.environ.get("EVAL_OUTCOME", "unknown") + deploy_outcome = os.environ.get("DEPLOY_OUTCOME", "unknown") + run_url = os.environ.get("RUN_URL", "#") + timestamp = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M:%S UTC") + + def status_badge(outcome): + colors = {"success": ("#22c55e", "PASSED"), "failure": ("#ef4444", "FAILED")} + color, label = colors.get(outcome, ("#f59e0b", "SKIPPED")) + return f'{label}' + + def status_icon(outcome): + return {"success": "✅", "failure": "❌"}.get(outcome, "⚠️") + + overall = "PASSED" if smoke_outcome == "success" and eval_outcome == "success" else "FAILED" + overall_color = "#22c55e" if overall == "PASSED" else "#ef4444" + + html = f""" + + + + Agent Evaluation Report — tech-trends-agent v{semver} + + + +
+
+

🤖 Agent Evaluation Report

+
tech-trends-agent v{semver}
+
{overall}
+
+ +
+

Agent Details

+ + + + + + + + +
Agent Nametech-trends-agent
Foundry Version{agent_version}
Semver{semver}
Phase{phase}
Model{model}
Commit{sha}
Timestamp{timestamp}
+
+ +
+

Pipeline Results

+ + + + + + + +
StepStatusDetails
Deploy to TEST{status_icon(deploy_outcome)} {status_badge(deploy_outcome)}Version {agent_version}
Smoke Test{status_icon(smoke_outcome)} {status_badge(smoke_outcome)}Responses API invocation
Foundry Evaluation{status_icon(eval_outcome)} {status_badge(eval_outcome)}Golden dataset evaluation
+
+ +
+

Tools Configuration

+ + + + + + +
ToolEnabled
web_search{"✅" if phase in ("1", "3") else "—"}
code_interpreter{"✅" if phase in ("2", "3") else "—"}
+
+ + +
+ + """ + + os.makedirs("reports", exist_ok=True) + with open("reports/evaluation-report.html", "w") as f: + f.write(html) + print("Generated reports/evaluation-report.html") + EOF + env: + AGENT_VERSION: ${{ steps.deploy.outputs.agent_version }} + PHASE: ${{ steps.deploy.outputs.phase }} + MODEL: ${{ vars.GPT_DEPLOYMENT }} + SEMVER: ${{ steps.version.outputs.semver }} + SHA: ${{ github.sha }} + SMOKE_OUTCOME: ${{ steps.smoke.outcome }} + EVAL_OUTCOME: ${{ steps.eval.outcome }} + DEPLOY_OUTCOME: ${{ steps.deploy.outcome }} + RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} + + - name: Upload HTML report artifact + if: always() + uses: actions/upload-artifact@v4 + with: + name: evaluation-report + path: reports/evaluation-report.html diff --git a/.github/workflows/monitor.yml b/.github/workflows/monitor.yml index cd9bbf3..9972715 100644 --- a/.github/workflows/monitor.yml +++ b/.github/workflows/monitor.yml @@ -41,8 +41,9 @@ jobs: from azure.ai.projects import AIProjectClient from azure.identity import DefaultAzureCredential client = AIProjectClient(endpoint=os.environ['FOUNDRY_PROD_ENDPOINT'], credential=DefaultAzureCredential()) - agent = client.agents.get_agent('tech-trends-agent') - print(agent.version) + versions = client.agents.list_versions(agent_name='tech-trends-agent') + latest = max(versions, key=lambda v: int(v.version)) + print(latest.version) ") echo "version=$VERSION" >> $GITHUB_OUTPUT diff --git a/README.md b/README.md index 6ec7b2c..717a6e9 100644 --- a/README.md +++ b/README.md @@ -5,8 +5,8 @@ versioned prompts, tool changes, model upgrades, evaluation gates, and rollback. ## Agent: Technology Trend Research & Analysis -- **Phase 1:** Web search only (Bing Grounding) -- **Phase 2:** Web search + Code Interpreter for data analysis +- **Phase 1:** Web search only (`web_search` tool) +- **Phase 2:** Code Interpreter only (`code_interpreter` tool) for data analysis ## Repository Structure @@ -60,9 +60,8 @@ This creates: - An App Registration with a Service Principal - 3 federated credentials for GitHub OIDC (main branch, pull requests, tags) - RBAC role assignments (Azure AI User, Cognitive Services OpenAI User) -- 7 GitHub repository variables (`AZURE_CLIENT_ID`, `AZURE_TENANT_ID`, `AZURE_SUBSCRIPTION_ID`, `FOUNDRY_TEST_ENDPOINT`, `FOUNDRY_PROD_ENDPOINT`, `GPT_DEPLOYMENT`, `BING_CONNECTION_NAME`) - -After bootstrap completes, manually configure the Bing Grounding connection in both Foundry projects via the [Azure AI Foundry portal](https://ai.azure.com) (Project > Connections > + New). +- Model availability validation (checks current + upgrade target model) +- 6 GitHub repository variables (`AZURE_CLIENT_ID`, `AZURE_TENANT_ID`, `AZURE_SUBSCRIPTION_ID`, `FOUNDRY_TEST_ENDPOINT`, `FOUNDRY_PROD_ENDPOINT`, `GPT_DEPLOYMENT`) State is saved to `.bootstrap-state.json` for use by the teardown script. @@ -88,7 +87,7 @@ Three scripts simulate the full agent lifecycle by creating PRs that trigger the ``` - Creates branch `feature/phase1-web-search` -- Configures the agent with Bing Grounding (web search) only +- Configures the agent with the `web_search` tool - Evaluation runs 5 Phase 1 test cases - Opens a PR — `evaluate.yml` triggers, deploys to TEST, runs eval @@ -101,7 +100,7 @@ Three scripts simulate the full agent lifecycle by creating PRs that trigger the ``` - Creates branch `feature/phase2-code-interpreter` from updated `main` -- Adds `code_interpreter` tool alongside `bing_grounding` +- Replaces `web_search` with `code_interpreter` tool - Extends the system prompt with a `## Data Analysis` section - Evaluation now runs all 8 test cases (Phase 1 + Phase 2) — checks for regressions - Opens a PR @@ -115,10 +114,15 @@ Three scripts simulate the full agent lifecycle by creating PRs that trigger the ``` - Creates branch `chore/model-upgrade-gpt41` +- Upgrades model from `gpt-4o-2024-11-20` (default) to `gpt-4.1` - Updates the `GPT_DEPLOYMENT` GitHub variable to `gpt-4.1` - Adds a model history entry in the agent config - Opens a PR — the eval gate verifies the new model scores at or above thresholds +The bootstrap script validates that both the current model and the upgrade target +(`gpt-4.1`) are available in your chosen Azure region. If `gpt-4.1` is not available, +the script will list alternatives you can use instead. + **After the eval passes, merge the PR.** The full lifecycle demo is complete. ### Lifecycle Flow Diagram @@ -163,7 +167,7 @@ az login # 4. Deploy to test source .env # or export vars manually -python scripts/deploy_agent.py --env test --semver 1.0.0 --tools bing_grounding +python scripts/deploy_agent.py --env test --semver 1.0.0 --tools web_search ``` ## CI/CD Workflows @@ -183,6 +187,14 @@ The eval gate uses `microsoft/ai-agent-evals@v3-beta` with four evaluators: - **Groundedness** (threshold: 0.75) - **Coherence** (threshold: 0.80) +A smoke test step runs before evaluation — it invokes the agent with a test query +and verifies a valid response is returned. + +**Note on evaluation naming:** The `ai-agent-evals` action creates a new evaluation +group named "Agent Evaluation" on every run (custom names not yet supported). Each run +is named `Agent tech-trends-agent:`. The PR comment includes the commit SHA +for traceability. + ## Rollback ```bash @@ -194,7 +206,7 @@ Re-deploys the exact prompt, tools, and model from a saved artifact. ## Model Comparison ```bash -python scripts/compare_models.py --current gpt-4o-2024-11-20 --candidate gpt-4.1 --tools bing_grounding +python scripts/compare_models.py --current gpt-4o-2024-11-20 --candidate gpt-4.1 --tools web_search ``` Deploys both model versions to test for side-by-side evaluation. diff --git a/agents/tech-trends-agent.json b/agents/tech-trends-agent.json index 9d6b181..1bb67ce 100644 --- a/agents/tech-trends-agent.json +++ b/agents/tech-trends-agent.json @@ -1,16 +1,16 @@ { "agent_name": "tech-trends-agent", - "phase": "1", + "phase": "2", "definition": { "model": "${GPT_DEPLOYMENT}", "instructions_file": "prompts/tech-trends-agent.md", "tools": [ - { "type": "web_search" } + { "type": "code_interpreter" } ] }, "eval": { "dataset": "evals/golden-dataset.json", - "phase_filter": "1", + "phase_filter": null, "config": "evals/eval-config.json" }, "_model_history": [ diff --git a/evals/eval-config.json b/evals/eval-config.json index 5cafb0a..832031e 100644 --- a/evals/eval-config.json +++ b/evals/eval-config.json @@ -1,9 +1,9 @@ { "evaluators": [ - "builtin.task_adherence", - "builtin.relevance", - "builtin.groundedness", - "builtin.coherence" + "TaskAdherenceEvaluator", + "RelevanceEvaluator", + "GroundednessEvaluator", + "CoherenceEvaluator" ], "thresholds": { "task_adherence": 0.80, @@ -11,6 +11,6 @@ "groundedness": 0.75, "coherence": 0.80 }, - "phase_filter": "1", - "notes": "Phase 1: Only web search queries evaluated. Phase 2 data analysis queries excluded." + "phase_filter": null, + "notes": "Phase 2: All queries evaluated — both web search (Phase 1) and data analysis (Phase 2)." } diff --git a/prompts/tech-trends-agent.md b/prompts/tech-trends-agent.md index e01c7e9..da79c14 100644 --- a/prompts/tech-trends-agent.md +++ b/prompts/tech-trends-agent.md @@ -27,3 +27,15 @@ Always structure responses as: ## Tone Professional, objective, and jargon-aware. Assume the user is a technology professional who does not need basic concepts explained. + +## Data Analysis (Phase 2) +You now have access to a code interpreter. Use it when: +- The user asks you to calculate, compare, or rank numerical data +- You have retrieved structured data (tables, CSVs) and analysis would add value +- You need to produce a formatted comparison table from raw information + +When using code interpreter: +1. First retrieve the data via web search +2. Then write and run Python code to process or compare it +3. Present results with the code output clearly labelled +4. Always show the source of the raw data alongside the computed result diff --git a/requirements.txt b/requirements.txt index 0e556a8..30104b5 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,7 @@ -azure-ai-projects>=1.0.0b1 +azure-ai-projects>=2.0.0 azure-ai-evaluation>=1.0.0b1 azure-identity>=1.15.0 +openai>=1.66.0 # Dev / Test pytest>=8.0 diff --git a/scripts/bootstrap.sh b/scripts/bootstrap.sh index 05ef463..c3690c3 100644 --- a/scripts/bootstrap.sh +++ b/scripts/bootstrap.sh @@ -37,9 +37,8 @@ ACCOUNT_NAME="" GITHUB_REPO="san360/agent-devops" GPT_MODEL_NAME="gpt-4o" GPT_MODEL_VERSION="2024-11-20" -GPT_DEPLOYMENT_NAME="gpt-4o-2024-11-20" +GPT_DEPLOYMENT_NAME="gpt-4o" GPT_CAPACITY=30 -BING_CONNECTION_NAME="bing-grounding" SKIP_FOUNDRY=false TEST_ENDPOINT="" PROD_ENDPOINT="" @@ -95,7 +94,7 @@ echo "============================================" echo "" # ---------- Step 1: Resource Group ---------- -echo "[1/7] Creating resource group..." +echo "[1/8] Creating resource group..." az group create \ --name "$RESOURCE_GROUP" \ --location "$LOCATION" \ @@ -103,10 +102,10 @@ az group create \ # ---------- Step 2 & 3: Deploy Foundry projects (or skip) ---------- if [[ "$SKIP_FOUNDRY" == true ]]; then - echo "[2/7] Skipping TEST Foundry project (using provided endpoint)" - echo "[3/7] Skipping PROD Foundry project (using provided endpoint)" + echo "[2/8] Skipping TEST Foundry project (using provided endpoint)" + echo "[3/8] Skipping PROD Foundry project (using provided endpoint)" else - echo "[2/7] Deploying TEST Foundry project..." + echo "[2/8] Deploying TEST Foundry project..." TEST_OUTPUT=$(az deployment group create \ --resource-group "$RESOURCE_GROUP" \ --template-file infra/main.bicep \ @@ -122,7 +121,7 @@ else TEST_ENDPOINT=$(echo "$TEST_OUTPUT" | python3 -c "import sys,json; print(json.load(sys.stdin)['properties']['outputs']['projectEndpoint']['value'])") echo " TEST endpoint: $TEST_ENDPOINT" - echo "[3/7] Deploying PROD Foundry project..." + echo "[3/8] Deploying PROD Foundry project..." PROD_OUTPUT=$(az deployment group create \ --resource-group "$RESOURCE_GROUP" \ --template-file infra/main.bicep \ @@ -140,7 +139,56 @@ else fi # ---------- Step 4: App Registration + Service Principal ---------- -echo "[4/7] Creating App Registration and Service Principal..." +echo "[4/8] Validating model availability..." +echo " Checking if '$GPT_MODEL_NAME' (version: $GPT_MODEL_VERSION) is available in $LOCATION..." + +AVAILABLE_MODELS=$(az cognitiveservices model list \ + --location "$LOCATION" \ + --query "[?model.name=='$GPT_MODEL_NAME' && model.version=='$GPT_MODEL_VERSION'].model.name" \ + -o tsv 2>/dev/null || echo "") + +if [[ -z "$AVAILABLE_MODELS" ]]; then + echo "" + echo " WARNING: Model '$GPT_MODEL_NAME' version '$GPT_MODEL_VERSION' not found in $LOCATION." + echo " Available GPT models in $LOCATION:" + az cognitiveservices model list \ + --location "$LOCATION" \ + --query "[?model.name.starts_with(@,'gpt')].{name:model.name, version:model.version}" \ + -o table 2>/dev/null || echo " (Could not list models — check permissions)" + echo "" + echo " The model upgrade lifecycle demo (Phase 3) requires a second model." + echo " You can continue, but ensure GPT_DEPLOYMENT points to a valid model." + echo "" + read -rp " Continue anyway? [y/N] " confirm + if [[ "$confirm" != "y" && "$confirm" != "Y" ]]; then + echo "Aborted." + exit 1 + fi +else + echo " ✓ Model '$GPT_MODEL_NAME' version '$GPT_MODEL_VERSION' is available in $LOCATION" +fi + +# Check for upgrade target model (gpt-4.1) availability for Phase 3 demo +UPGRADE_MODEL="gpt-4.1" +UPGRADE_AVAILABLE=$(az cognitiveservices model list \ + --location "$LOCATION" \ + --query "[?model.name=='$UPGRADE_MODEL'].model.name | [0]" \ + -o tsv 2>/dev/null || echo "") + +if [[ -n "$UPGRADE_AVAILABLE" ]]; then + echo " ✓ Upgrade target '$UPGRADE_MODEL' is also available (Phase 3 model upgrade demo ready)" +else + echo " ⚠ Upgrade target '$UPGRADE_MODEL' not found in $LOCATION." + echo " Phase 3 model upgrade demo may need a different target model." + echo " Available models:" + az cognitiveservices model list \ + --location "$LOCATION" \ + --query "[?model.name.starts_with(@,'gpt')].{name:model.name, version:model.version}" \ + -o table 2>/dev/null || true +fi + +# ---------- Step 5: App Registration + Service Principal ---------- +echo "[5/8] Creating App Registration and Service Principal..." APP_ID=$(az ad app create \ --display-name "$APP_DISPLAY_NAME" \ --query appId -o tsv) @@ -149,8 +197,8 @@ SP_OBJ_ID=$(az ad sp create --id "$APP_ID" --query id -o tsv) echo " Client ID: $APP_ID" echo " SP Object ID: $SP_OBJ_ID" -# ---------- Step 5: Federated Credentials ---------- -echo "[5/7] Adding federated credentials..." +# ---------- Step 6: Federated Credentials ---------- +echo "[6/8] Adding federated credentials..." az ad app federated-credential create \ --id "$APP_ID" \ @@ -182,8 +230,8 @@ az ad app federated-credential create \ }" --output none echo " + release tag credential" -# ---------- Step 6: RBAC Role Assignments ---------- -echo "[6/7] Assigning RBAC roles..." +# ---------- Step 7: RBAC Role Assignments ---------- +echo "[7/8] Assigning RBAC roles..." SUBSCRIPTION_ID=$(az account show --query id -o tsv) SCOPE="/subscriptions/$SUBSCRIPTION_ID/resourceGroups/$RESOURCE_GROUP" @@ -219,8 +267,8 @@ if [[ -n "$TEST_ENDPOINT" ]]; then fi fi -# ---------- Step 7: GitHub Variables ---------- -echo "[7/7] Setting GitHub repository variables..." +# ---------- Step 8: GitHub Variables ---------- +echo "[8/8] Setting GitHub repository variables..." TENANT_ID=$(az account show --query tenantId -o tsv) gh variable set AZURE_CLIENT_ID --body "$APP_ID" --repo "$GITHUB_REPO" @@ -229,8 +277,7 @@ gh variable set AZURE_SUBSCRIPTION_ID --body "$SUBSCRIPTION_ID" --repo "$GITH gh variable set FOUNDRY_TEST_ENDPOINT --body "$TEST_ENDPOINT" --repo "$GITHUB_REPO" gh variable set FOUNDRY_PROD_ENDPOINT --body "$PROD_ENDPOINT" --repo "$GITHUB_REPO" gh variable set GPT_DEPLOYMENT --body "$GPT_DEPLOYMENT_NAME" --repo "$GITHUB_REPO" -gh variable set BING_CONNECTION_NAME --body "$BING_CONNECTION_NAME" --repo "$GITHUB_REPO" -echo " Set 7 variables on $GITHUB_REPO" +echo " Set 6 variables on $GITHUB_REPO" # ---------- Summary ---------- echo "" @@ -252,13 +299,11 @@ echo " Tenant ID: $TENANT_ID" echo " Subscription ID: $SUBSCRIPTION_ID" echo "" echo " GitHub ($GITHUB_REPO):" -echo " 7 repository variables set" +echo " 6 repository variables set" echo " 3 federated credentials configured" echo "" echo " Next steps:" -echo " 1. Configure Bing Grounding connection in both Foundry projects" -echo " (Portal: ai.azure.com -> project -> Connections -> + New)" -echo " 2. Run lifecycle scripts in order:" +echo " 1. Run lifecycle scripts in order:" echo " ./scripts/lifecycle/01-phase1-web-search.sh" echo " ./scripts/lifecycle/02-phase2-code-interpreter.sh" echo " ./scripts/lifecycle/03-model-upgrade.sh" @@ -301,9 +346,6 @@ FOUNDRY_PROD_ENDPOINT=$PROD_ENDPOINT # Model deployment GPT_DEPLOYMENT=$GPT_DEPLOYMENT_NAME -# Bing Grounding connection name -BING_CONNECTION_NAME=$BING_CONNECTION_NAME - # Resource metadata RESOURCE_GROUP=$RESOURCE_GROUP LOCATION=$LOCATION diff --git a/scripts/lifecycle/02-phase2-code-interpreter.sh b/scripts/lifecycle/02-phase2-code-interpreter.sh index c2df73e..9fad89b 100644 --- a/scripts/lifecycle/02-phase2-code-interpreter.sh +++ b/scripts/lifecycle/02-phase2-code-interpreter.sh @@ -26,7 +26,7 @@ git pull origin main # Create feature branch git checkout -b "$BRANCH" -# --- Agent config: Phase 2, add code_interpreter --- +# --- Agent config: Phase 2, code_interpreter only --- cat > agents/tech-trends-agent.json << 'AGENT_EOF' { "agent_name": "tech-trends-agent", @@ -35,7 +35,6 @@ cat > agents/tech-trends-agent.json << 'AGENT_EOF' "model": "${GPT_DEPLOYMENT}", "instructions_file": "prompts/tech-trends-agent.md", "tools": [ - { "type": "bing_grounding" }, { "type": "code_interpreter" } ] }, @@ -126,14 +125,14 @@ PR_URL=$(gh pr create \ --title "Phase 2: Add Code Interpreter for Data Analysis" \ --body "$(cat <<'PR_EOF' ## Summary -- Adds `code_interpreter` tool alongside existing `bing_grounding` +- Replaces `web_search` tool with `code_interpreter` for data analysis - Extends system prompt with `## Data Analysis` section - Evaluation now runs **all 8 queries** (Phase 1 + Phase 2) ## Changes | File | Change | |---|---| -| `agents/tech-trends-agent.json` | Added `code_interpreter` to tools, phase → `"2"` | +| `agents/tech-trends-agent.json` | Replaced `web_search` with `code_interpreter`, phase → `"2"` | | `prompts/tech-trends-agent.md` | Added `## Data Analysis (Phase 2)` section | | `evals/eval-config.json` | `phase_filter` → `null` (run all cases) | @@ -156,7 +155,7 @@ echo " PR created: $PR_URL" echo "============================================" echo "" echo " The evaluate.yml workflow will now:" -echo " 1. Deploy Phase 2 agent to TEST (both tools)" +echo " 1. Deploy Phase 2 agent to TEST (code_interpreter only)" echo " 2. Run ALL 8 eval queries (Phase 1 + Phase 2)" echo " 3. Check for regressions on existing Phase 1 queries" echo ""