From c470cbd285dcd4cdb64cdfe40d78f3a70d8e36d4 Mon Sep 17 00:00:00 2001 From: sanjay singh Date: Mon, 18 May 2026 16:21:46 +0200 Subject: [PATCH] =?UTF-8?q?feat:=20Phase=202=20=E2=80=94=20add=20code=20in?= =?UTF-8?q?terpreter=20for=20data=20analysis=20capability?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- agents/tech-trends-agent.json | 7 ++++--- evals/eval-config.json | 4 ++-- prompts/tech-trends-agent.md | 12 ++++++++++++ 3 files changed, 18 insertions(+), 5 deletions(-) diff --git a/agents/tech-trends-agent.json b/agents/tech-trends-agent.json index 9d6b181..27b84c4 100644 --- a/agents/tech-trends-agent.json +++ b/agents/tech-trends-agent.json @@ -1,16 +1,17 @@ { "agent_name": "tech-trends-agent", - "phase": "1", + "phase": "2", "definition": { "model": "${GPT_DEPLOYMENT}", "instructions_file": "prompts/tech-trends-agent.md", "tools": [ - { "type": "web_search" } + { "type": "web_search" }, + { "type": "code_interpreter" } ] }, "eval": { "dataset": "evals/golden-dataset.json", - "phase_filter": "1", + "phase_filter": null, "config": "evals/eval-config.json" }, "_model_history": [ diff --git a/evals/eval-config.json b/evals/eval-config.json index 5cafb0a..dc7cf18 100644 --- a/evals/eval-config.json +++ b/evals/eval-config.json @@ -11,6 +11,6 @@ "groundedness": 0.75, "coherence": 0.80 }, - "phase_filter": "1", - "notes": "Phase 1: Only web search queries evaluated. Phase 2 data analysis queries excluded." + "phase_filter": null, + "notes": "Phase 2: All queries evaluated — both web search (Phase 1) and data analysis (Phase 2)." } diff --git a/prompts/tech-trends-agent.md b/prompts/tech-trends-agent.md index e01c7e9..da79c14 100644 --- a/prompts/tech-trends-agent.md +++ b/prompts/tech-trends-agent.md @@ -27,3 +27,15 @@ Always structure responses as: ## Tone Professional, objective, and jargon-aware. Assume the user is a technology professional who does not need basic concepts explained. + +## Data Analysis (Phase 2) +You now have access to a code interpreter. Use it when: +- The user asks you to calculate, compare, or rank numerical data +- You have retrieved structured data (tables, CSVs) and analysis would add value +- You need to produce a formatted comparison table from raw information + +When using code interpreter: +1. First retrieve the data via web search +2. Then write and run Python code to process or compare it +3. Present results with the code output clearly labelled +4. Always show the source of the raw data alongside the computed result