From d1b0becc645f44f7869c1c2f016394efd021d758 Mon Sep 17 00:00:00 2001 From: Ashwin Giridharan Date: Wed, 24 Jun 2026 00:10:38 -0700 Subject: [PATCH 1/5] test: add stateful multi-turn tool-call cassettes with previous_response_id MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace stateless multi-turn cassettes with properly stateful ones recorded against gpt-oss-20b (vLLM 0.22) with VLLM_ENABLE_RESPONSES_API_STORE=1. Each turn chains via previous_response_id with store=true, exercising the accumulator on real server-side conversation state. Cassettes recorded using the existing record_cassette.py (--mode responses): - 3-turn non-streaming: get_job_status → get_error_logs → search_runbook - 5-turn non-streaming: full investigation pipeline (5 different tools) - 3-turn streaming: SSE events with previous_response_id chaining - 3-turn branch: turn 3 diverges from turn 1 (not turn 2) Also includes: - FunctionToolCall.status changed from String to MessageStatus enum - Custom deserializer handles gpt-oss `status: null` → Completed - record.sh + pipeline_tools.json for reproducibility Signed-off-by: Ashwin Giridharan --- .../agentic-core/src/executor/accumulator.rs | 8 +- crates/agentic-core/src/types/io.rs | 18 +- .../tests/accumulator_cassette_test.rs | 439 ++++- .../tool_calls/multi_turn/pipeline_tools.json | 93 + .../cassettes/tool_calls/multi_turn/record.sh | 76 + .../responses_tool_calls_3turn.yaml | 979 ++++++++++ .../responses_tool_calls_3turn_streaming.yaml | 1513 +++++++++++++++ .../responses_tool_calls_5turn.yaml | 1638 +++++++++++++++++ .../responses_tool_calls_branch.yaml | 979 ++++++++++ 9 files changed, 5734 insertions(+), 9 deletions(-) create mode 100644 crates/agentic-core/tests/cassettes/tool_calls/multi_turn/pipeline_tools.json create mode 100755 crates/agentic-core/tests/cassettes/tool_calls/multi_turn/record.sh create mode 100644 crates/agentic-core/tests/cassettes/tool_calls/multi_turn/responses_tool_calls_3turn.yaml create mode 100644 crates/agentic-core/tests/cassettes/tool_calls/multi_turn/responses_tool_calls_3turn_streaming.yaml create mode 100644 crates/agentic-core/tests/cassettes/tool_calls/multi_turn/responses_tool_calls_5turn.yaml create mode 100644 crates/agentic-core/tests/cassettes/tool_calls/multi_turn/responses_tool_calls_branch.yaml diff --git a/crates/agentic-core/src/executor/accumulator.rs b/crates/agentic-core/src/executor/accumulator.rs index bdd5cd0..ccf7cd4 100644 --- a/crates/agentic-core/src/executor/accumulator.rs +++ b/crates/agentic-core/src/executor/accumulator.rs @@ -203,7 +203,7 @@ impl ResponseAccumulator { if !self.accumulated_arguments.is_empty() && fc.arguments.is_empty() { fc.arguments = std::mem::take(&mut self.accumulated_arguments); } - fc.status = "completed".to_string(); + fc.status = MessageStatus::Completed; self.output.push(OutputItem::FunctionCall(fc)); } self.accumulated_arguments.clear(); @@ -277,7 +277,7 @@ impl ResponseAccumulator { call_id: call_id.clone().unwrap_or_default(), name: name.clone().unwrap_or_default(), arguments: String::new(), - status: "in_progress".to_string(), + status: MessageStatus::InProgress, }); } _ => { @@ -776,7 +776,7 @@ mod tests { assert_eq!(fc.call_id, "call_abc"); assert_eq!(fc.name, "get_weather"); assert_eq!(fc.arguments, r#"{"location":"Paris"}"#); - assert_eq!(fc.status, "completed"); + assert_eq!(fc.status, MessageStatus::Completed); } else { panic!("expected FunctionCall"); } @@ -1099,7 +1099,7 @@ mod tests { assert_eq!(acc.output.len(), 1); if let OutputItem::FunctionCall(fc) = &acc.output[0] { assert_eq!(fc.arguments, r#"{"x":1}"#); - assert_eq!(fc.status, "completed"); + assert_eq!(fc.status, MessageStatus::Completed); } else { panic!("expected FunctionCall"); } diff --git a/crates/agentic-core/src/types/io.rs b/crates/agentic-core/src/types/io.rs index 9e24d8e..b0fe4ef 100644 --- a/crates/agentic-core/src/types/io.rs +++ b/crates/agentic-core/src/types/io.rs @@ -1,6 +1,8 @@ use serde::{Deserialize, Serialize}; use serde_json::Value; +use super::event::MessageStatus; + #[derive(Debug, Clone, Serialize, Deserialize)] pub struct InputTextContent { #[serde(rename = "type")] @@ -121,7 +123,21 @@ pub struct FunctionToolCall { pub call_id: String, pub name: String, pub arguments: String, - pub status: String, + #[serde(default = "default_completed_status")] + #[serde(deserialize_with = "deserialize_status_or_default")] + pub status: MessageStatus, +} + +fn default_completed_status() -> MessageStatus { + MessageStatus::Completed +} + +fn deserialize_status_or_default<'de, D>(deserializer: D) -> Result +where + D: serde::Deserializer<'de>, +{ + let opt: Option = Option::deserialize(deserializer)?; + Ok(opt.unwrap_or(MessageStatus::Completed)) } #[derive(Debug, Clone, Serialize, Deserialize)] diff --git a/crates/agentic-core/tests/accumulator_cassette_test.rs b/crates/agentic-core/tests/accumulator_cassette_test.rs index 3a06524..0182e9c 100644 --- a/crates/agentic-core/tests/accumulator_cassette_test.rs +++ b/crates/agentic-core/tests/accumulator_cassette_test.rs @@ -8,6 +8,7 @@ use serde::Deserialize; use agentic_core::executor::accumulator::ResponseAccumulator; +use agentic_core::types::event::MessageStatus; use agentic_core::types::io::OutputItem; const CASSETTE_DIR: &str = concat!(env!("CARGO_MANIFEST_DIR"), "/tests/cassettes/events"); @@ -106,7 +107,7 @@ fn test_accumulator_cassette_function_call_vllm_gemma4() { if let OutputItem::FunctionCall(fc) = &payload.output[0] { assert_eq!(fc.name, expected_fc.name); assert_eq!(fc.arguments, expected_fc.arguments); - assert_eq!(fc.status, "completed"); + assert_eq!(fc.status, MessageStatus::Completed); assert!(!fc.call_id.is_empty(), "call_id should be populated"); assert!(!fc.id.is_empty(), "id should be populated"); } else { @@ -167,7 +168,7 @@ fn test_tool_calls_cassette_auto_streaming() { if let OutputItem::FunctionCall(fc) = item { assert!(!fc.name.is_empty(), "function call name must not be empty"); assert!(!fc.arguments.is_empty(), "function call arguments must not be empty"); - assert_eq!(fc.status, "completed"); + assert_eq!(fc.status, MessageStatus::Completed); assert!(!fc.call_id.is_empty(), "call_id must be populated"); } } @@ -200,7 +201,7 @@ fn test_tool_calls_cassette_required_streaming() { for item in &function_calls { if let OutputItem::FunctionCall(fc) = item { - assert_eq!(fc.status, "completed"); + assert_eq!(fc.status, MessageStatus::Completed); } } } @@ -292,7 +293,7 @@ fn test_tool_calls_cassette_auto_nonstreaming() { if let OutputItem::FunctionCall(fc) = item { assert!(!fc.name.is_empty()); assert!(!fc.arguments.is_empty()); - assert_eq!(fc.status, "completed"); + assert_eq!(fc.status, MessageStatus::Completed); assert!(!fc.call_id.is_empty()); } } @@ -467,3 +468,433 @@ fn test_reasoning_cassette_gpt_oss_streaming() { "reasoning-only cassette should not produce function calls" ); } + +// === Stateful multi-turn cassette tests (previous_response_id chaining) === +// +// These cassettes are recorded against gpt-oss-20b with `store=true` and +// `previous_response_id` chaining (via record_cassette.py --mode responses). +// They exercise realistic multi-turn conversations where the server maintains +// conversation state — the key pattern our accumulator must handle for PR #67. +// +// Scenario: SRE debugging a failed ETL pipeline job-382. +// Tools: get_job_status, get_error_logs, search_runbook, run_analysis, +// restart_job, web_search. + +const MULTI_TURN_DIR: &str = concat!(env!("CARGO_MANIFEST_DIR"), "/tests/cassettes/tool_calls/multi_turn"); + +// --- Helpers --- + +fn process_nonstreaming_turn(cassette: &TurnCassette, turn_idx: usize, model: &str) -> Vec { + let body = cassette.turns[turn_idx] + .response + .body + .as_ref() + .unwrap_or_else(|| panic!("turn {} must have response body", turn_idx + 1)); + let body_str = serde_json::to_string(body).unwrap(); + let acc = ResponseAccumulator::from_json(&body_str, None).unwrap(); + let payload = acc.finalize(model, None, None); + assert_eq!(payload.status, "completed"); + payload.output +} + +fn process_streaming_turn(cassette: &TurnCassette, turn_idx: usize, model: &str) -> Vec { + let data_lines = extract_data_lines(&cassette.turns[turn_idx].response.sse); + assert!( + !data_lines.is_empty(), + "streaming turn {} must have SSE data lines", + turn_idx + 1 + ); + let acc = ResponseAccumulator::from_sse_lines(data_lines, None); + let payload = acc.finalize(model, None, None); + assert_eq!(payload.status, "completed"); + payload.output +} + +fn count_function_calls(output: &[OutputItem]) -> usize { + output + .iter() + .filter(|item| matches!(item, OutputItem::FunctionCall(_))) + .count() +} + +fn get_function_call_names(output: &[OutputItem]) -> Vec { + output + .iter() + .filter_map(|item| { + if let OutputItem::FunctionCall(fc) = item { + Some(fc.name.clone()) + } else { + None + } + }) + .collect() +} + +fn has_reasoning(output: &[OutputItem]) -> bool { + output.iter().any(|item| matches!(item, OutputItem::Reasoning(_))) +} + +/// Verifies that the cassette has proper `previous_response_id` chaining: +/// turn 1 has no `prev_id`, subsequent turns reference the prior response. +fn assert_stateful_chaining(cassette: &TurnCassette) { + for (i, turn) in cassette.turns.iter().enumerate() { + let body = turn.request.as_mapping().expect("request must be a map"); + let req_body = body + .get(serde_yml::Value::String("body".into())) + .and_then(serde_yml::Value::as_mapping) + .expect("request.body must be a map"); + let store = req_body + .get(serde_yml::Value::String("store".into())) + .and_then(serde_yml::Value::as_bool) + .unwrap_or(false); + assert!(store, "turn {} must have store=true", i + 1); + + if i == 0 { + let prev_id = req_body.get(serde_yml::Value::String("previous_response_id".into())); + assert!( + prev_id.is_none() || prev_id.unwrap().is_null(), + "turn 1 should have no previous_response_id" + ); + } else { + let prev_id = req_body.get(serde_yml::Value::String("previous_response_id".into())); + assert!( + prev_id.is_some() && !prev_id.unwrap().is_null(), + "turn {} must have a previous_response_id", + i + 1 + ); + } + } +} + +/// Extracts the `arguments` JSON string from the first function call in output items. +fn get_first_fc_arguments(output: &[OutputItem]) -> String { + output + .iter() + .find_map(|item| { + if let OutputItem::FunctionCall(fc) = item { + Some(fc.arguments.clone()) + } else { + None + } + }) + .expect("output must contain at least one function call") +} + +// ═══════════════════════════════════════════════════════════════════ +// Stateful 3-turn: get_job_status → get_error_logs → search_runbook +// Non-streaming, store=true, previous_response_id chain +// ═══════════════════════════════════════════════════════════════════ + +#[test] +fn test_stateful_responses_3turn_chaining() { + let cassette = load_turn_cassette_from(MULTI_TURN_DIR, "responses_tool_calls_3turn.yaml"); + assert_eq!(cassette.turns.len(), 3); + assert_stateful_chaining(&cassette); +} + +#[test] +fn test_stateful_responses_3turn_tool_calls() { + let cassette = load_turn_cassette_from(MULTI_TURN_DIR, "responses_tool_calls_3turn.yaml"); + + let t1 = process_nonstreaming_turn(&cassette, 0, "openai/gpt-oss-20b"); + let t1_names = get_function_call_names(&t1); + assert_eq!(count_function_calls(&t1), 1); + assert_eq!(t1_names, vec!["get_job_status"]); + assert!(has_reasoning(&t1)); + + let t2 = process_nonstreaming_turn(&cassette, 1, "openai/gpt-oss-20b"); + let t2_names = get_function_call_names(&t2); + assert_eq!(count_function_calls(&t2), 1); + assert_eq!(t2_names, vec!["get_error_logs"]); + + let t3 = process_nonstreaming_turn(&cassette, 2, "openai/gpt-oss-20b"); + let t3_names = get_function_call_names(&t3); + assert_eq!(count_function_calls(&t3), 1); + assert_eq!(t3_names, vec!["search_runbook"]); +} + +/// Context retention proof: turn 2 prompt says "that job" (no explicit job ID), +/// but the model resolves it to "job-382" because `previous_response_id` gives +/// it access to turn 1's conversation state. +#[test] +fn test_stateful_responses_3turn_context_retention() { + let cassette = load_turn_cassette_from(MULTI_TURN_DIR, "responses_tool_calls_3turn.yaml"); + + // Turn 2 prompt says "that job" — model must resolve from turn 1 context + let t2 = process_nonstreaming_turn(&cassette, 1, "openai/gpt-oss-20b"); + let t2_args = get_first_fc_arguments(&t2); + assert!( + t2_args.contains("job-382"), + "turn 2 must resolve 'that job' to 'job-382' via retained context, got: {t2_args}" + ); + + // Turn 3 prompt says "those errors" — model must recall turn 2's investigation + let t3 = process_nonstreaming_turn(&cassette, 2, "openai/gpt-oss-20b"); + let t3_args = get_first_fc_arguments(&t3); + assert!( + t3_args.contains("job-382") || t3_args.contains("error") || t3_args.contains("ETL"), + "turn 3 must reference context from earlier turns, got: {t3_args}" + ); +} + +#[test] +fn test_stateful_responses_3turn_null_status_deserialization() { + let cassette = load_turn_cassette_from(MULTI_TURN_DIR, "responses_tool_calls_3turn.yaml"); + for i in 0..3 { + let output = process_nonstreaming_turn(&cassette, i, "openai/gpt-oss-20b"); + for item in &output { + if let OutputItem::FunctionCall(fc) = item { + assert_eq!( + fc.status, + MessageStatus::Completed, + "turn {} function_call status must default to Completed (gpt-oss emits null)", + i + 1 + ); + } + } + } +} + +// ═══════════════════════════════════════════════════════════════════ +// Stateful 5-turn: full investigation pipeline +// get_job_status → get_error_logs → search_runbook → run_analysis → restart_job +// ═══════════════════════════════════════════════════════════════════ + +#[test] +fn test_stateful_responses_5turn_chaining() { + let cassette = load_turn_cassette_from(MULTI_TURN_DIR, "responses_tool_calls_5turn.yaml"); + assert_eq!(cassette.turns.len(), 5); + assert_stateful_chaining(&cassette); +} + +#[test] +fn test_stateful_responses_5turn_tool_sequence() { + let cassette = load_turn_cassette_from(MULTI_TURN_DIR, "responses_tool_calls_5turn.yaml"); + + let expected_tools = [ + "get_job_status", + "get_error_logs", + "search_runbook", + "run_analysis", + "restart_job", + ]; + for (i, expected) in expected_tools.iter().enumerate() { + let output = process_nonstreaming_turn(&cassette, i, "openai/gpt-oss-20b"); + let names = get_function_call_names(&output); + assert_eq!(names.len(), 1, "turn {} should call exactly 1 tool", i + 1); + assert_eq!(&names[0], expected, "turn {} should call {expected}", i + 1); + assert!(has_reasoning(&output), "turn {} should have reasoning", i + 1); + } +} + +/// Context retention proof for 5-turn: turn 5 says "restart it" without naming +/// job-382, but the model resolves correctly because all prior context is retained. +#[test] +fn test_stateful_responses_5turn_context_retention() { + let cassette = load_turn_cassette_from(MULTI_TURN_DIR, "responses_tool_calls_5turn.yaml"); + + // Turn 2: "that failed job" → must resolve to job-382 + let t2 = process_nonstreaming_turn(&cassette, 1, "openai/gpt-oss-20b"); + let t2_args = get_first_fc_arguments(&t2); + assert!( + t2_args.contains("job-382"), + "turn 2 'that failed job' must resolve to job-382, got: {t2_args}" + ); + + // Turn 5: "restart it" → must resolve to job-382 with correct params + let t5 = process_nonstreaming_turn(&cassette, 4, "openai/gpt-oss-20b"); + let t5_args = get_first_fc_arguments(&t5); + assert!( + t5_args.contains("job-382"), + "turn 5 'restart it' must resolve to job-382, got: {t5_args}" + ); + assert!( + t5_args.contains("64"), + "turn 5 must include memory_override_gb=64, got: {t5_args}" + ); +} + +#[test] +fn test_stateful_responses_5turn_function_call_fields() { + let cassette = load_turn_cassette_from(MULTI_TURN_DIR, "responses_tool_calls_5turn.yaml"); + for i in 0..5 { + let output = process_nonstreaming_turn(&cassette, i, "openai/gpt-oss-20b"); + for item in &output { + if let OutputItem::FunctionCall(fc) = item { + assert!(!fc.id.is_empty(), "turn {} fc.id must not be empty", i + 1); + assert!(!fc.call_id.is_empty(), "turn {} fc.call_id must not be empty", i + 1); + assert!(!fc.name.is_empty(), "turn {} fc.name must not be empty", i + 1); + assert!( + !fc.arguments.is_empty(), + "turn {} fc.arguments must not be empty", + i + 1 + ); + assert_eq!(fc.status, MessageStatus::Completed); + } + } + } +} + +// ═══════════════════════════════════════════════════════════════════ +// Stateful 3-turn streaming: SSE events with previous_response_id +// ═══════════════════════════════════════════════════════════════════ + +#[test] +fn test_stateful_responses_streaming_3turn() { + let cassette = load_turn_cassette_from(MULTI_TURN_DIR, "responses_tool_calls_3turn_streaming.yaml"); + assert_eq!(cassette.turns.len(), 3); + + for i in 0..3 { + let output = process_streaming_turn(&cassette, i, "openai/gpt-oss-20b"); + assert!( + count_function_calls(&output) >= 1, + "streaming turn {} must produce at least one function_call", + i + 1 + ); + for item in &output { + if let OutputItem::FunctionCall(fc) = item { + assert!(!fc.call_id.is_empty(), "streaming fc must have call_id"); + assert!(!fc.name.is_empty(), "streaming fc must have name"); + assert!(!fc.arguments.is_empty(), "streaming fc must have arguments"); + assert_eq!(fc.status, MessageStatus::Completed); + } + } + } +} + +/// Context retention in streaming mode: turn 2 says "that job" and the model +/// resolves it to "job-382" even in streaming (SSE) delivery. +#[test] +fn test_stateful_responses_streaming_context_retention() { + let cassette = load_turn_cassette_from(MULTI_TURN_DIR, "responses_tool_calls_3turn_streaming.yaml"); + + // Turn 2: "that job" → must resolve to job-382 in streaming mode + let t2 = process_streaming_turn(&cassette, 1, "openai/gpt-oss-20b"); + let t2_args = get_first_fc_arguments(&t2); + assert!( + t2_args.contains("job-382"), + "streaming turn 2 must resolve 'that job' to job-382, got: {t2_args}" + ); +} + +// ═══════════════════════════════════════════════════════════════════ +// Branching: turn 3 diverges from turn 1 (not turn 2) +// Tests previous_response_id pointing back to an earlier response +// ═══════════════════════════════════════════════════════════════════ + +#[test] +fn test_stateful_responses_branch_divergence() { + let cassette = load_turn_cassette_from(MULTI_TURN_DIR, "responses_tool_calls_branch.yaml"); + assert_eq!(cassette.turns.len(), 3); + + // Turn 1: no prev_id + let body1 = cassette.turns[0].request.as_mapping().unwrap(); + let req1 = body1 + .get(serde_yml::Value::String("body".into())) + .and_then(serde_yml::Value::as_mapping) + .unwrap(); + let prev1 = req1.get(serde_yml::Value::String("previous_response_id".into())); + assert!(prev1.is_none() || prev1.unwrap().is_null()); + + // Turn 2: prev_id = turn 1's response id + let body2 = cassette.turns[1].request.as_mapping().unwrap(); + let req2 = body2 + .get(serde_yml::Value::String("body".into())) + .and_then(serde_yml::Value::as_mapping) + .unwrap(); + let prev2 = req2 + .get(serde_yml::Value::String("previous_response_id".into())) + .and_then(serde_yml::Value::as_str) + .expect("turn 2 must have prev_id"); + + // Turn 3: prev_id = turn 1's response id (branches back, NOT from turn 2) + let body3 = cassette.turns[2].request.as_mapping().unwrap(); + let req3 = body3 + .get(serde_yml::Value::String("body".into())) + .and_then(serde_yml::Value::as_mapping) + .unwrap(); + let prev3 = req3 + .get(serde_yml::Value::String("previous_response_id".into())) + .and_then(serde_yml::Value::as_str) + .expect("turn 3 must have prev_id"); + + // Turn 2 and Turn 3 both point to the same response (turn 1) + assert_eq!( + prev2, prev3, + "branch: turn 3 should reference same prev_id as turn 2 (turn 1's response)" + ); +} + +#[test] +fn test_stateful_responses_branch_all_turns_parse() { + let cassette = load_turn_cassette_from(MULTI_TURN_DIR, "responses_tool_calls_branch.yaml"); + for i in 0..3 { + let output = process_nonstreaming_turn(&cassette, i, "openai/gpt-oss-20b"); + assert!( + count_function_calls(&output) >= 1, + "branch turn {} must produce a function_call", + i + 1 + ); + assert!(has_reasoning(&output), "branch turn {} should have reasoning", i + 1); + } +} + +// ═══════════════════════════════════════════════════════════════════ +// Cross-cassette: all stateful cassettes parse without error +// ═══════════════════════════════════════════════════════════════════ + +#[test] +fn test_all_stateful_cassettes_parse_without_error() { + let nonstreaming = [ + "responses_tool_calls_3turn.yaml", + "responses_tool_calls_5turn.yaml", + "responses_tool_calls_branch.yaml", + ]; + + for filename in &nonstreaming { + let cassette = load_turn_cassette_from(MULTI_TURN_DIR, filename); + for i in 0..cassette.turns.len() { + let body = cassette.turns[i] + .response + .body + .as_ref() + .unwrap_or_else(|| panic!("{filename} turn {i} must have body")); + let body_str = serde_json::to_string(body).unwrap(); + let result = ResponseAccumulator::from_json(&body_str, None); + assert!( + result.is_ok(), + "{filename} turn {} failed to parse: {:?}", + i + 1, + result.err() + ); + let payload = result.unwrap().finalize("openai/gpt-oss-20b", None, None); + assert_eq!( + payload.status, + "completed", + "{filename} turn {} status != completed", + i + 1 + ); + } + } + + let streaming = ["responses_tool_calls_3turn_streaming.yaml"]; + for filename in &streaming { + let cassette = load_turn_cassette_from(MULTI_TURN_DIR, filename); + for i in 0..cassette.turns.len() { + let data_lines = extract_data_lines(&cassette.turns[i].response.sse); + assert!( + !data_lines.is_empty(), + "{filename} turn {} has no SSE data lines", + i + 1 + ); + let acc = ResponseAccumulator::from_sse_lines(data_lines, None); + let payload = acc.finalize("openai/gpt-oss-20b", None, None); + assert_eq!( + payload.status, + "completed", + "{filename} turn {} status != completed", + i + 1 + ); + } + } +} diff --git a/crates/agentic-core/tests/cassettes/tool_calls/multi_turn/pipeline_tools.json b/crates/agentic-core/tests/cassettes/tool_calls/multi_turn/pipeline_tools.json new file mode 100644 index 0000000..d3df440 --- /dev/null +++ b/crates/agentic-core/tests/cassettes/tool_calls/multi_turn/pipeline_tools.json @@ -0,0 +1,93 @@ +[ + { + "type": "function", + "name": "get_job_status", + "description": "Query the pipeline orchestrator for a job's current state, metadata, and failure info.", + "parameters": { + "type": "object", + "properties": { + "job_id": {"type": "string", "description": "Pipeline job identifier, e.g. job-382"} + }, + "required": ["job_id"], + "additionalProperties": false + }, + "strict": true + }, + { + "type": "function", + "name": "get_error_logs", + "description": "Fetch error logs and stack traces for a failed pipeline job.", + "parameters": { + "type": "object", + "properties": { + "job_id": {"type": "string", "description": "Pipeline job identifier"}, + "severity": {"type": "string", "enum": ["WARN", "ERROR", "FATAL", "ALL"], "description": "Minimum severity filter"}, + "max_entries": {"type": "integer", "description": "Maximum log entries to return (default 50)"} + }, + "required": ["job_id", "severity", "max_entries"], + "additionalProperties": false + }, + "strict": true + }, + { + "type": "function", + "name": "search_runbook", + "description": "Search internal operations runbooks for troubleshooting procedures.", + "parameters": { + "type": "object", + "properties": { + "query": {"type": "string", "description": "Natural language search query"}, + "max_results": {"type": "integer", "description": "Maximum results to return (default 5)"} + }, + "required": ["query", "max_results"], + "additionalProperties": false + }, + "strict": true + }, + { + "type": "function", + "name": "run_analysis", + "description": "Execute a Python analysis script in a sandboxed environment for log parsing or data inspection.", + "parameters": { + "type": "object", + "properties": { + "code": {"type": "string", "description": "Python code to execute"}, + "timeout_seconds": {"type": "integer", "description": "Execution timeout (default 30)"} + }, + "required": ["code", "timeout_seconds"], + "additionalProperties": false + }, + "strict": true + }, + { + "type": "function", + "name": "restart_job", + "description": "Restart a failed pipeline job with optional configuration overrides.", + "parameters": { + "type": "object", + "properties": { + "job_id": {"type": "string", "description": "Pipeline job identifier to restart"}, + "memory_override_gb": {"type": "integer", "description": "Override memory limit in GB"}, + "skip_completed_stages": {"type": "boolean", "description": "Skip stages that completed successfully (default true)"}, + "priority": {"type": "string", "enum": ["low", "normal", "high"], "description": "Queue priority"} + }, + "required": ["job_id", "memory_override_gb", "skip_completed_stages", "priority"], + "additionalProperties": false + }, + "strict": true + }, + { + "type": "function", + "name": "web_search", + "description": "Search the public web for technical documentation, blog posts, and known issues.", + "parameters": { + "type": "object", + "properties": { + "query": {"type": "string", "description": "Search query"} + }, + "required": ["query"], + "additionalProperties": false + }, + "strict": true + } +] diff --git a/crates/agentic-core/tests/cassettes/tool_calls/multi_turn/record.sh b/crates/agentic-core/tests/cassettes/tool_calls/multi_turn/record.sh new file mode 100755 index 0000000..ed004db --- /dev/null +++ b/crates/agentic-core/tests/cassettes/tool_calls/multi_turn/record.sh @@ -0,0 +1,76 @@ +#!/bin/bash +# Records stateful multi-turn tool-call cassettes using record_cassette.py +# Backend: vLLM gpt-oss-20b with VLLM_ENABLE_RESPONSES_API_STORE=1 +# Scenario: SRE debugging failed ETL pipeline job-382 +# +# IMPORTANT: These cassettes prove context retention via ambiguous prompts. +# Turns 2+ use pronouns ("that job", "it", "those errors") that can ONLY resolve +# correctly if previous_response_id preserves server-side conversation state. +# +# Prerequisites: +# - SSH tunnel to G6e instance: ssh -L 8100:localhost:8100 ubuntu@ +# - gpt-oss container running with VLLM_ENABLE_RESPONSES_API_STORE=1 +# - Tools file at /tmp/pipeline_tools.json (6 tools: get_job_status, +# get_error_logs, search_runbook, run_analysis, restart_job, web_search) + +set -euo pipefail + +RECORDER="$(dirname "$0")/../../cassettes/record_cassette.py" +TOOLS="/tmp/pipeline_tools.json" +OUTPUT_DIR="$(dirname "$0")" +VLLM_URL="http://localhost:8100" +MODEL="openai/gpt-oss-20b" + +echo "=== 3-turn non-streaming (context retention: 'that job' resolves to job-382) ===" +printf '%s\n' \ + "You are an SRE assistant. Check the current status of ETL pipeline job-382." \ + "Now pull the error logs for that job. Use severity ERROR and max 10 entries." \ + "Based on those errors, search the runbook for troubleshooting procedures. Max 5 results." \ +| python3 "$RECORDER" \ + --turns 3 --mode responses --no-stream \ + --model "$MODEL" --vllm "$VLLM_URL" \ + --tools "$TOOLS" --tool-choice auto \ + --output "$OUTPUT_DIR/responses_tool_calls_3turn.yaml" + +echo "" +echo "=== 5-turn non-streaming (context retention: 'restart it' resolves to job-382) ===" +printf '%s\n' \ + "You are an SRE assistant. ETL pipeline job-382 failed overnight. What is its current status?" \ + "Pull the error logs for that failed job. Use severity ERROR and max 20 entries." \ + "Search the runbook for how to fix the issue found in those logs. Max 5 results." \ + "Run this analysis code to summarize: import json; print(json.dumps({'job': 'job-382', 'error': 'OOM', 'stage': 'transform', 'recommendation': 'increase memory to 64GB'}))" \ + "Great. Now restart it with 64 GB memory, skip completed stages, and high priority." \ +| python3 "$RECORDER" \ + --turns 5 --mode responses --no-stream \ + --model "$MODEL" --vllm "$VLLM_URL" \ + --tools "$TOOLS" --tool-choice auto \ + --output "$OUTPUT_DIR/responses_tool_calls_5turn.yaml" + +echo "" +echo "=== 3-turn streaming (context retention in SSE mode: 'that job' resolves) ===" +printf '%s\n' \ + "You are an SRE assistant. Check the status of pipeline job-382." \ + "Get the error logs for that job with severity FATAL and max 5 entries." \ + "Search the web for how to fix that type of error in Spark pipelines." \ +| python3 "$RECORDER" \ + --turns 3 --mode responses --stream \ + --model "$MODEL" --vllm "$VLLM_URL" \ + --tools "$TOOLS" --tool-choice auto \ + --output "$OUTPUT_DIR/responses_tool_calls_3turn_streaming.yaml" + +echo "" +echo "=== 3-turn branch (turn 3 diverges from turn 1, skipping turn 2's context) ===" +printf '%s\n' \ + "You are an SRE assistant. Check the current status of ETL pipeline job-382." \ + "Get the error logs for that job with severity ERROR and max 10 entries." \ + "Instead of investigating errors, search the runbook for how to increase memory limits for ETL jobs. Max 3 results." \ +| python3 "$RECORDER" \ + --turns 3 --mode responses --no-stream \ + --model "$MODEL" --vllm "$VLLM_URL" \ + --tools "$TOOLS" --tool-choice auto \ + --branch-from 1 --branch-turn-number 3 \ + --output "$OUTPUT_DIR/responses_tool_calls_branch.yaml" + +echo "" +echo "=== All cassettes recorded ===" +ls -la "$OUTPUT_DIR"/*.yaml diff --git a/crates/agentic-core/tests/cassettes/tool_calls/multi_turn/responses_tool_calls_3turn.yaml b/crates/agentic-core/tests/cassettes/tool_calls/multi_turn/responses_tool_calls_3turn.yaml new file mode 100644 index 0000000..1c9ac53 --- /dev/null +++ b/crates/agentic-core/tests/cassettes/tool_calls/multi_turn/responses_tool_calls_3turn.yaml @@ -0,0 +1,979 @@ +turns: +- filename: t1 + request: + body: + input: You are an SRE assistant. Check the current status of ETL pipeline job-382. + model: openai/gpt-oss-20b + store: true + stream: false + tool_choice: auto + tools: + - description: Query the pipeline orchestrator for a job's current state, metadata, + and failure info. + name: get_job_status + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier, e.g. job-382 + type: string + required: + - job_id + type: object + strict: true + type: function + - description: Fetch error logs and stack traces for a failed pipeline job. + name: get_error_logs + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier + type: string + max_entries: + description: Maximum log entries to return (default 50) + type: integer + severity: + description: Minimum severity filter + enum: + - WARN + - ERROR + - FATAL + - ALL + type: string + required: + - job_id + - severity + - max_entries + type: object + strict: true + type: function + - description: Search internal operations runbooks for troubleshooting procedures. + name: search_runbook + parameters: + additionalProperties: false + properties: + max_results: + description: Maximum results to return (default 5) + type: integer + query: + description: Natural language search query + type: string + required: + - query + - max_results + type: object + strict: true + type: function + - description: Execute a Python analysis script in a sandboxed environment for + log parsing or data inspection. + name: run_analysis + parameters: + additionalProperties: false + properties: + code: + description: Python code to execute + type: string + timeout_seconds: + description: Execution timeout (default 30) + type: integer + required: + - code + - timeout_seconds + type: object + strict: true + type: function + - description: Restart a failed pipeline job with optional configuration overrides. + name: restart_job + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier to restart + type: string + memory_override_gb: + description: Override memory limit in GB + type: integer + priority: + description: Queue priority + enum: + - low + - normal + - high + type: string + skip_completed_stages: + description: Skip stages that completed successfully (default true) + type: boolean + required: + - job_id + - memory_override_gb + - skip_completed_stages + - priority + type: object + strict: true + type: function + - description: Search the public web for technical documentation, blog posts, + and known issues. + name: web_search + parameters: + additionalProperties: false + properties: + query: + description: Search query + type: string + required: + - query + type: object + strict: true + type: function + headers: + accept: '*/*' + content-type: application/json + user-agent: python-httpx/0.28.1 + method: POST + path: /v1/responses + query_params: {} + response: + body: + background: false + created_at: 1782285886 + frequency_penalty: 0.0 + id: resp_aed8c5ff2dbee0fe + incomplete_details: null + input_messages: null + instructions: null + kv_transfer_params: null + max_output_tokens: 3660 + max_tool_calls: null + metadata: null + model: openai/gpt-oss-20b + object: response + output: + - content: + - text: We need to query status. + type: reasoning_text + encrypted_content: null + id: rs_bb29cc3827967d2c + status: null + summary: [] + type: reasoning + - arguments: '{"job_id":"job-382"}' + call_id: call_ac5da1db6eed3a34 + id: fc_ac5da1db6eed3a34 + name: get_job_status + namespace: null + status: null + type: function_call + output_messages: null + parallel_tool_calls: true + presence_penalty: 0.0 + previous_response_id: null + prompt: null + reasoning: null + service_tier: auto + status: completed + temperature: 1.0 + text: null + tool_choice: auto + tools: + - defer_loading: null + description: Query the pipeline orchestrator for a job's current state, metadata, + and failure info. + name: get_job_status + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier, e.g. job-382 + type: string + required: + - job_id + type: object + strict: true + type: function + - defer_loading: null + description: Fetch error logs and stack traces for a failed pipeline job. + name: get_error_logs + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier + type: string + max_entries: + description: Maximum log entries to return (default 50) + type: integer + severity: + description: Minimum severity filter + enum: + - WARN + - ERROR + - FATAL + - ALL + type: string + required: + - job_id + - severity + - max_entries + type: object + strict: true + type: function + - defer_loading: null + description: Search internal operations runbooks for troubleshooting procedures. + name: search_runbook + parameters: + additionalProperties: false + properties: + max_results: + description: Maximum results to return (default 5) + type: integer + query: + description: Natural language search query + type: string + required: + - query + - max_results + type: object + strict: true + type: function + - defer_loading: null + description: Execute a Python analysis script in a sandboxed environment for + log parsing or data inspection. + name: run_analysis + parameters: + additionalProperties: false + properties: + code: + description: Python code to execute + type: string + timeout_seconds: + description: Execution timeout (default 30) + type: integer + required: + - code + - timeout_seconds + type: object + strict: true + type: function + - defer_loading: null + description: Restart a failed pipeline job with optional configuration overrides. + name: restart_job + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier to restart + type: string + memory_override_gb: + description: Override memory limit in GB + type: integer + priority: + description: Queue priority + enum: + - low + - normal + - high + type: string + skip_completed_stages: + description: Skip stages that completed successfully (default true) + type: boolean + required: + - job_id + - memory_override_gb + - skip_completed_stages + - priority + type: object + strict: true + type: function + - defer_loading: null + description: Search the public web for technical documentation, blog posts, + and known issues. + name: web_search + parameters: + additionalProperties: false + properties: + query: + description: Search query + type: string + required: + - query + type: object + strict: true + type: function + top_logprobs: null + top_p: 1.0 + truncation: disabled + usage: + input_tokens: 436 + input_tokens_details: + cached_tokens: 416 + cached_tokens_per_turn: + - 416 + input_tokens_per_turn: + - 436 + output_tokens: 34 + output_tokens_details: + output_tokens_per_turn: + - 34 + reasoning_tokens: 16 + tool_output_tokens: 0 + tool_output_tokens_per_turn: + - 0 + total_tokens: 470 + user: null + headers: + content-type: application/json + status_code: 200 +- filename: t2 + request: + body: + input: Now pull the error logs for that job. Use severity ERROR and max 10 entries. + model: openai/gpt-oss-20b + previous_response_id: resp_aed8c5ff2dbee0fe + store: true + stream: false + tool_choice: auto + tools: + - description: Query the pipeline orchestrator for a job's current state, metadata, + and failure info. + name: get_job_status + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier, e.g. job-382 + type: string + required: + - job_id + type: object + strict: true + type: function + - description: Fetch error logs and stack traces for a failed pipeline job. + name: get_error_logs + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier + type: string + max_entries: + description: Maximum log entries to return (default 50) + type: integer + severity: + description: Minimum severity filter + enum: + - WARN + - ERROR + - FATAL + - ALL + type: string + required: + - job_id + - severity + - max_entries + type: object + strict: true + type: function + - description: Search internal operations runbooks for troubleshooting procedures. + name: search_runbook + parameters: + additionalProperties: false + properties: + max_results: + description: Maximum results to return (default 5) + type: integer + query: + description: Natural language search query + type: string + required: + - query + - max_results + type: object + strict: true + type: function + - description: Execute a Python analysis script in a sandboxed environment for + log parsing or data inspection. + name: run_analysis + parameters: + additionalProperties: false + properties: + code: + description: Python code to execute + type: string + timeout_seconds: + description: Execution timeout (default 30) + type: integer + required: + - code + - timeout_seconds + type: object + strict: true + type: function + - description: Restart a failed pipeline job with optional configuration overrides. + name: restart_job + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier to restart + type: string + memory_override_gb: + description: Override memory limit in GB + type: integer + priority: + description: Queue priority + enum: + - low + - normal + - high + type: string + skip_completed_stages: + description: Skip stages that completed successfully (default true) + type: boolean + required: + - job_id + - memory_override_gb + - skip_completed_stages + - priority + type: object + strict: true + type: function + - description: Search the public web for technical documentation, blog posts, + and known issues. + name: web_search + parameters: + additionalProperties: false + properties: + query: + description: Search query + type: string + required: + - query + type: object + strict: true + type: function + headers: + accept: '*/*' + content-type: application/json + user-agent: python-httpx/0.28.1 + method: POST + path: /v1/responses + query_params: {} + response: + body: + background: false + created_at: 1782285887 + frequency_penalty: 0.0 + id: resp_b23d3955c1c74ce6 + incomplete_details: null + input_messages: null + instructions: null + kv_transfer_params: null + max_output_tokens: 3602 + max_tool_calls: null + metadata: null + model: openai/gpt-oss-20b + object: response + output: + - content: + - text: Need error logs. + type: reasoning_text + encrypted_content: null + id: rs_b1ad7d4fe3886bad + status: null + summary: [] + type: reasoning + - arguments: '{"job_id":"job-382","severity":"ERROR","max_entries":10}' + call_id: call_8467ac7d52d0ea0b + id: fc_8467ac7d52d0ea0b + name: get_error_logs + namespace: null + status: null + type: function_call + output_messages: null + parallel_tool_calls: true + presence_penalty: 0.0 + previous_response_id: resp_aed8c5ff2dbee0fe + prompt: null + reasoning: null + service_tier: auto + status: completed + temperature: 1.0 + text: null + tool_choice: auto + tools: + - defer_loading: null + description: Query the pipeline orchestrator for a job's current state, metadata, + and failure info. + name: get_job_status + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier, e.g. job-382 + type: string + required: + - job_id + type: object + strict: true + type: function + - defer_loading: null + description: Fetch error logs and stack traces for a failed pipeline job. + name: get_error_logs + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier + type: string + max_entries: + description: Maximum log entries to return (default 50) + type: integer + severity: + description: Minimum severity filter + enum: + - WARN + - ERROR + - FATAL + - ALL + type: string + required: + - job_id + - severity + - max_entries + type: object + strict: true + type: function + - defer_loading: null + description: Search internal operations runbooks for troubleshooting procedures. + name: search_runbook + parameters: + additionalProperties: false + properties: + max_results: + description: Maximum results to return (default 5) + type: integer + query: + description: Natural language search query + type: string + required: + - query + - max_results + type: object + strict: true + type: function + - defer_loading: null + description: Execute a Python analysis script in a sandboxed environment for + log parsing or data inspection. + name: run_analysis + parameters: + additionalProperties: false + properties: + code: + description: Python code to execute + type: string + timeout_seconds: + description: Execution timeout (default 30) + type: integer + required: + - code + - timeout_seconds + type: object + strict: true + type: function + - defer_loading: null + description: Restart a failed pipeline job with optional configuration overrides. + name: restart_job + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier to restart + type: string + memory_override_gb: + description: Override memory limit in GB + type: integer + priority: + description: Queue priority + enum: + - low + - normal + - high + type: string + skip_completed_stages: + description: Skip stages that completed successfully (default true) + type: boolean + required: + - job_id + - memory_override_gb + - skip_completed_stages + - priority + type: object + strict: true + type: function + - defer_loading: null + description: Search the public web for technical documentation, blog posts, + and known issues. + name: web_search + parameters: + additionalProperties: false + properties: + query: + description: Search query + type: string + required: + - query + type: object + strict: true + type: function + top_logprobs: null + top_p: 1.0 + truncation: disabled + usage: + input_tokens: 494 + input_tokens_details: + cached_tokens: 448 + cached_tokens_per_turn: + - 448 + input_tokens_per_turn: + - 494 + output_tokens: 41 + output_tokens_details: + output_tokens_per_turn: + - 41 + reasoning_tokens: 23 + tool_output_tokens: 0 + tool_output_tokens_per_turn: + - 0 + total_tokens: 535 + user: null + headers: + content-type: application/json + status_code: 200 +- filename: t3 + request: + body: + input: Based on those errors, search the runbook for troubleshooting procedures. + Max 5 results. + model: openai/gpt-oss-20b + previous_response_id: resp_b23d3955c1c74ce6 + store: true + stream: false + tool_choice: auto + tools: + - description: Query the pipeline orchestrator for a job's current state, metadata, + and failure info. + name: get_job_status + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier, e.g. job-382 + type: string + required: + - job_id + type: object + strict: true + type: function + - description: Fetch error logs and stack traces for a failed pipeline job. + name: get_error_logs + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier + type: string + max_entries: + description: Maximum log entries to return (default 50) + type: integer + severity: + description: Minimum severity filter + enum: + - WARN + - ERROR + - FATAL + - ALL + type: string + required: + - job_id + - severity + - max_entries + type: object + strict: true + type: function + - description: Search internal operations runbooks for troubleshooting procedures. + name: search_runbook + parameters: + additionalProperties: false + properties: + max_results: + description: Maximum results to return (default 5) + type: integer + query: + description: Natural language search query + type: string + required: + - query + - max_results + type: object + strict: true + type: function + - description: Execute a Python analysis script in a sandboxed environment for + log parsing or data inspection. + name: run_analysis + parameters: + additionalProperties: false + properties: + code: + description: Python code to execute + type: string + timeout_seconds: + description: Execution timeout (default 30) + type: integer + required: + - code + - timeout_seconds + type: object + strict: true + type: function + - description: Restart a failed pipeline job with optional configuration overrides. + name: restart_job + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier to restart + type: string + memory_override_gb: + description: Override memory limit in GB + type: integer + priority: + description: Queue priority + enum: + - low + - normal + - high + type: string + skip_completed_stages: + description: Skip stages that completed successfully (default true) + type: boolean + required: + - job_id + - memory_override_gb + - skip_completed_stages + - priority + type: object + strict: true + type: function + - description: Search the public web for technical documentation, blog posts, + and known issues. + name: web_search + parameters: + additionalProperties: false + properties: + query: + description: Search query + type: string + required: + - query + type: object + strict: true + type: function + headers: + accept: '*/*' + content-type: application/json + user-agent: python-httpx/0.28.1 + method: POST + path: /v1/responses + query_params: {} + response: + body: + background: false + created_at: 1782285887 + frequency_penalty: 0.0 + id: resp_b48bd4930aecdef6 + incomplete_details: null + input_messages: null + instructions: null + kv_transfer_params: null + max_output_tokens: 3537 + max_tool_calls: null + metadata: null + model: openai/gpt-oss-20b + object: response + output: + - content: + - text: We need to search runbook. + type: reasoning_text + encrypted_content: null + id: rs_a76eedf3b8dff018 + status: null + summary: [] + type: reasoning + - arguments: '{"query":"ETL pipeline job-382 error handling", "max_results":5}' + call_id: call_9a6c34c7b699ef2a + id: fc_9a6c34c7b699ef2a + name: search_runbook + namespace: null + status: null + type: function_call + output_messages: null + parallel_tool_calls: true + presence_penalty: 0.0 + previous_response_id: resp_b23d3955c1c74ce6 + prompt: null + reasoning: null + service_tier: auto + status: completed + temperature: 1.0 + text: null + tool_choice: auto + tools: + - defer_loading: null + description: Query the pipeline orchestrator for a job's current state, metadata, + and failure info. + name: get_job_status + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier, e.g. job-382 + type: string + required: + - job_id + type: object + strict: true + type: function + - defer_loading: null + description: Fetch error logs and stack traces for a failed pipeline job. + name: get_error_logs + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier + type: string + max_entries: + description: Maximum log entries to return (default 50) + type: integer + severity: + description: Minimum severity filter + enum: + - WARN + - ERROR + - FATAL + - ALL + type: string + required: + - job_id + - severity + - max_entries + type: object + strict: true + type: function + - defer_loading: null + description: Search internal operations runbooks for troubleshooting procedures. + name: search_runbook + parameters: + additionalProperties: false + properties: + max_results: + description: Maximum results to return (default 5) + type: integer + query: + description: Natural language search query + type: string + required: + - query + - max_results + type: object + strict: true + type: function + - defer_loading: null + description: Execute a Python analysis script in a sandboxed environment for + log parsing or data inspection. + name: run_analysis + parameters: + additionalProperties: false + properties: + code: + description: Python code to execute + type: string + timeout_seconds: + description: Execution timeout (default 30) + type: integer + required: + - code + - timeout_seconds + type: object + strict: true + type: function + - defer_loading: null + description: Restart a failed pipeline job with optional configuration overrides. + name: restart_job + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier to restart + type: string + memory_override_gb: + description: Override memory limit in GB + type: integer + priority: + description: Queue priority + enum: + - low + - normal + - high + type: string + skip_completed_stages: + description: Skip stages that completed successfully (default true) + type: boolean + required: + - job_id + - memory_override_gb + - skip_completed_stages + - priority + type: object + strict: true + type: function + - defer_loading: null + description: Search the public web for technical documentation, blog posts, + and known issues. + name: web_search + parameters: + additionalProperties: false + properties: + query: + description: Search query + type: string + required: + - query + type: object + strict: true + type: function + top_logprobs: null + top_p: 1.0 + truncation: disabled + usage: + input_tokens: 559 + input_tokens_details: + cached_tokens: 496 + cached_tokens_per_turn: + - 496 + input_tokens_per_turn: + - 559 + output_tokens: 45 + output_tokens_details: + output_tokens_per_turn: + - 45 + reasoning_tokens: 27 + tool_output_tokens: 0 + tool_output_tokens_per_turn: + - 0 + total_tokens: 604 + user: null + headers: + content-type: application/json + status_code: 200 diff --git a/crates/agentic-core/tests/cassettes/tool_calls/multi_turn/responses_tool_calls_3turn_streaming.yaml b/crates/agentic-core/tests/cassettes/tool_calls/multi_turn/responses_tool_calls_3turn_streaming.yaml new file mode 100644 index 0000000..6045dab --- /dev/null +++ b/crates/agentic-core/tests/cassettes/tool_calls/multi_turn/responses_tool_calls_3turn_streaming.yaml @@ -0,0 +1,1513 @@ +turns: +- filename: t1 + request: + body: + input: You are an SRE assistant. Check the status of pipeline job-382. + model: openai/gpt-oss-20b + store: true + stream: true + tool_choice: auto + tools: + - description: Query the pipeline orchestrator for a job's current state, metadata, + and failure info. + name: get_job_status + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier, e.g. job-382 + type: string + required: + - job_id + type: object + strict: true + type: function + - description: Fetch error logs and stack traces for a failed pipeline job. + name: get_error_logs + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier + type: string + max_entries: + description: Maximum log entries to return (default 50) + type: integer + severity: + description: Minimum severity filter + enum: + - WARN + - ERROR + - FATAL + - ALL + type: string + required: + - job_id + - severity + - max_entries + type: object + strict: true + type: function + - description: Search internal operations runbooks for troubleshooting procedures. + name: search_runbook + parameters: + additionalProperties: false + properties: + max_results: + description: Maximum results to return (default 5) + type: integer + query: + description: Natural language search query + type: string + required: + - query + - max_results + type: object + strict: true + type: function + - description: Execute a Python analysis script in a sandboxed environment for + log parsing or data inspection. + name: run_analysis + parameters: + additionalProperties: false + properties: + code: + description: Python code to execute + type: string + timeout_seconds: + description: Execution timeout (default 30) + type: integer + required: + - code + - timeout_seconds + type: object + strict: true + type: function + - description: Restart a failed pipeline job with optional configuration overrides. + name: restart_job + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier to restart + type: string + memory_override_gb: + description: Override memory limit in GB + type: integer + priority: + description: Queue priority + enum: + - low + - normal + - high + type: string + skip_completed_stages: + description: Skip stages that completed successfully (default true) + type: boolean + required: + - job_id + - memory_override_gb + - skip_completed_stages + - priority + type: object + strict: true + type: function + - description: Search the public web for technical documentation, blog posts, + and known issues. + name: web_search + parameters: + additionalProperties: false + properties: + query: + description: Search query + type: string + required: + - query + type: object + strict: true + type: function + headers: + accept: '*/*' + content-type: application/json + user-agent: python-httpx/0.28.1 + method: POST + path: /v1/responses + query_params: {} + response: + headers: + content-type: text/event-stream; charset=utf-8 + sse: + - 'event: response.created + + ' + - 'data: {"response":{"id":"resp_84fc22eb02e37767","created_at":1782285919,"incomplete_details":null,"instructions":null,"metadata":null,"model":"openai/gpt-oss-20b","object":"response","output":[],"parallel_tool_calls":true,"temperature":1.0,"tool_choice":"auto","tools":[{"name":"get_job_status","parameters":{"type":"object","properties":{"job_id":{"type":"string","description":"Pipeline + job identifier, e.g. job-382"}},"required":["job_id"],"additionalProperties":false},"strict":true,"type":"function","defer_loading":null,"description":"Query + the pipeline orchestrator for a job''s current state, metadata, and failure + info."},{"name":"get_error_logs","parameters":{"type":"object","properties":{"job_id":{"type":"string","description":"Pipeline + job identifier"},"severity":{"type":"string","enum":["WARN","ERROR","FATAL","ALL"],"description":"Minimum + severity filter"},"max_entries":{"type":"integer","description":"Maximum log + entries to return (default 50)"}},"required":["job_id","severity","max_entries"],"additionalProperties":false},"strict":true,"type":"function","defer_loading":null,"description":"Fetch + error logs and stack traces for a failed pipeline job."},{"name":"search_runbook","parameters":{"type":"object","properties":{"query":{"type":"string","description":"Natural + language search query"},"max_results":{"type":"integer","description":"Maximum + results to return (default 5)"}},"required":["query","max_results"],"additionalProperties":false},"strict":true,"type":"function","defer_loading":null,"description":"Search + internal operations runbooks for troubleshooting procedures."},{"name":"run_analysis","parameters":{"type":"object","properties":{"code":{"type":"string","description":"Python + code to execute"},"timeout_seconds":{"type":"integer","description":"Execution + timeout (default 30)"}},"required":["code","timeout_seconds"],"additionalProperties":false},"strict":true,"type":"function","defer_loading":null,"description":"Execute + a Python analysis script in a sandboxed environment for log parsing or data + inspection."},{"name":"restart_job","parameters":{"type":"object","properties":{"job_id":{"type":"string","description":"Pipeline + job identifier to restart"},"memory_override_gb":{"type":"integer","description":"Override + memory limit in GB"},"skip_completed_stages":{"type":"boolean","description":"Skip + stages that completed successfully (default true)"},"priority":{"type":"string","enum":["low","normal","high"],"description":"Queue + priority"}},"required":["job_id","memory_override_gb","skip_completed_stages","priority"],"additionalProperties":false},"strict":true,"type":"function","defer_loading":null,"description":"Restart + a failed pipeline job with optional configuration overrides."},{"name":"web_search","parameters":{"type":"object","properties":{"query":{"type":"string","description":"Search + query"}},"required":["query"],"additionalProperties":false},"strict":true,"type":"function","defer_loading":null,"description":"Search + the public web for technical documentation, blog posts, and known issues."}],"top_p":1.0,"background":false,"max_output_tokens":3663,"max_tool_calls":null,"previous_response_id":null,"prompt":null,"reasoning":null,"service_tier":"auto","status":"in_progress","text":null,"top_logprobs":null,"truncation":"disabled","usage":null,"user":null,"presence_penalty":0.0,"frequency_penalty":0.0,"kv_transfer_params":null,"input_messages":null,"output_messages":null},"sequence_number":0,"type":"response.created"} + + ' + - ' + + ' + - 'event: response.in_progress + + ' + - 'data: {"response":{"id":"resp_84fc22eb02e37767","created_at":1782285919,"incomplete_details":null,"instructions":null,"metadata":null,"model":"openai/gpt-oss-20b","object":"response","output":[],"parallel_tool_calls":true,"temperature":1.0,"tool_choice":"auto","tools":[{"name":"get_job_status","parameters":{"type":"object","properties":{"job_id":{"type":"string","description":"Pipeline + job identifier, e.g. job-382"}},"required":["job_id"],"additionalProperties":false},"strict":true,"type":"function","defer_loading":null,"description":"Query + the pipeline orchestrator for a job''s current state, metadata, and failure + info."},{"name":"get_error_logs","parameters":{"type":"object","properties":{"job_id":{"type":"string","description":"Pipeline + job identifier"},"severity":{"type":"string","enum":["WARN","ERROR","FATAL","ALL"],"description":"Minimum + severity filter"},"max_entries":{"type":"integer","description":"Maximum log + entries to return (default 50)"}},"required":["job_id","severity","max_entries"],"additionalProperties":false},"strict":true,"type":"function","defer_loading":null,"description":"Fetch + error logs and stack traces for a failed pipeline job."},{"name":"search_runbook","parameters":{"type":"object","properties":{"query":{"type":"string","description":"Natural + language search query"},"max_results":{"type":"integer","description":"Maximum + results to return (default 5)"}},"required":["query","max_results"],"additionalProperties":false},"strict":true,"type":"function","defer_loading":null,"description":"Search + internal operations runbooks for troubleshooting procedures."},{"name":"run_analysis","parameters":{"type":"object","properties":{"code":{"type":"string","description":"Python + code to execute"},"timeout_seconds":{"type":"integer","description":"Execution + timeout (default 30)"}},"required":["code","timeout_seconds"],"additionalProperties":false},"strict":true,"type":"function","defer_loading":null,"description":"Execute + a Python analysis script in a sandboxed environment for log parsing or data + inspection."},{"name":"restart_job","parameters":{"type":"object","properties":{"job_id":{"type":"string","description":"Pipeline + job identifier to restart"},"memory_override_gb":{"type":"integer","description":"Override + memory limit in GB"},"skip_completed_stages":{"type":"boolean","description":"Skip + stages that completed successfully (default true)"},"priority":{"type":"string","enum":["low","normal","high"],"description":"Queue + priority"}},"required":["job_id","memory_override_gb","skip_completed_stages","priority"],"additionalProperties":false},"strict":true,"type":"function","defer_loading":null,"description":"Restart + a failed pipeline job with optional configuration overrides."},{"name":"web_search","parameters":{"type":"object","properties":{"query":{"type":"string","description":"Search + query"}},"required":["query"],"additionalProperties":false},"strict":true,"type":"function","defer_loading":null,"description":"Search + the public web for technical documentation, blog posts, and known issues."}],"top_p":1.0,"background":false,"max_output_tokens":3663,"max_tool_calls":null,"previous_response_id":null,"prompt":null,"reasoning":null,"service_tier":"auto","status":"in_progress","text":null,"top_logprobs":null,"truncation":"disabled","usage":null,"user":null,"presence_penalty":0.0,"frequency_penalty":0.0,"kv_transfer_params":null,"input_messages":null,"output_messages":null},"sequence_number":1,"type":"response.in_progress"} + + ' + - ' + + ' + - 'event: response.output_item.added + + ' + - 'data: {"item":{"id":"msg_9f8f443f3d36c276","summary":[],"type":"reasoning","content":null,"encrypted_content":null,"status":"in_progress"},"output_index":0,"sequence_number":2,"type":"response.output_item.added"} + + ' + - ' + + ' + - 'event: response.reasoning_part.added + + ' + - 'data: {"content_index":0,"item_id":"msg_9f8f443f3d36c276","output_index":0,"part":{"text":"","type":"reasoning_text"},"sequence_number":3,"type":"response.reasoning_part.added"} + + ' + - ' + + ' + - 'event: response.reasoning_text.delta + + ' + - 'data: {"content_index":0,"delta":"We","item_id":"msg_9f8f443f3d36c276","output_index":0,"sequence_number":4,"type":"response.reasoning_text.delta"} + + ' + - ' + + ' + - 'event: response.reasoning_text.delta + + ' + - 'data: {"content_index":0,"delta":" need","item_id":"msg_9f8f443f3d36c276","output_index":0,"sequence_number":5,"type":"response.reasoning_text.delta"} + + ' + - ' + + ' + - 'event: response.reasoning_text.delta + + ' + - 'data: {"content_index":0,"delta":" to","item_id":"msg_9f8f443f3d36c276","output_index":0,"sequence_number":6,"type":"response.reasoning_text.delta"} + + ' + - ' + + ' + - 'event: response.reasoning_text.delta + + ' + - 'data: {"content_index":0,"delta":" query","item_id":"msg_9f8f443f3d36c276","output_index":0,"sequence_number":7,"type":"response.reasoning_text.delta"} + + ' + - ' + + ' + - 'event: response.reasoning_text.delta + + ' + - 'data: {"content_index":0,"delta":" pipeline","item_id":"msg_9f8f443f3d36c276","output_index":0,"sequence_number":8,"type":"response.reasoning_text.delta"} + + ' + - ' + + ' + - 'event: response.reasoning_text.delta + + ' + - 'data: {"content_index":0,"delta":" orchestr","item_id":"msg_9f8f443f3d36c276","output_index":0,"sequence_number":9,"type":"response.reasoning_text.delta"} + + ' + - ' + + ' + - 'event: response.reasoning_text.delta + + ' + - 'data: {"content_index":0,"delta":"ator","item_id":"msg_9f8f443f3d36c276","output_index":0,"sequence_number":10,"type":"response.reasoning_text.delta"} + + ' + - ' + + ' + - 'event: response.reasoning_text.delta + + ' + - 'data: {"content_index":0,"delta":" for","item_id":"msg_9f8f443f3d36c276","output_index":0,"sequence_number":11,"type":"response.reasoning_text.delta"} + + ' + - ' + + ' + - 'event: response.reasoning_text.delta + + ' + - 'data: {"content_index":0,"delta":" job","item_id":"msg_9f8f443f3d36c276","output_index":0,"sequence_number":12,"type":"response.reasoning_text.delta"} + + ' + - ' + + ' + - 'event: response.reasoning_text.delta + + ' + - 'data: {"content_index":0,"delta":"-","item_id":"msg_9f8f443f3d36c276","output_index":0,"sequence_number":13,"type":"response.reasoning_text.delta"} + + ' + - ' + + ' + - 'event: response.reasoning_text.delta + + ' + - 'data: {"content_index":0,"delta":"382","item_id":"msg_9f8f443f3d36c276","output_index":0,"sequence_number":14,"type":"response.reasoning_text.delta"} + + ' + - ' + + ' + - 'event: response.reasoning_text.delta + + ' + - 'data: {"content_index":0,"delta":".","item_id":"msg_9f8f443f3d36c276","output_index":0,"sequence_number":15,"type":"response.reasoning_text.delta"} + + ' + - ' + + ' + - 'event: response.reasoning_text.delta + + ' + - 'data: {"content_index":0,"delta":" Use","item_id":"msg_9f8f443f3d36c276","output_index":0,"sequence_number":16,"type":"response.reasoning_text.delta"} + + ' + - ' + + ' + - 'event: response.reasoning_text.delta + + ' + - 'data: {"content_index":0,"delta":" functions","item_id":"msg_9f8f443f3d36c276","output_index":0,"sequence_number":17,"type":"response.reasoning_text.delta"} + + ' + - ' + + ' + - 'event: response.reasoning_text.delta + + ' + - 'data: {"content_index":0,"delta":".get","item_id":"msg_9f8f443f3d36c276","output_index":0,"sequence_number":18,"type":"response.reasoning_text.delta"} + + ' + - ' + + ' + - 'event: response.reasoning_text.delta + + ' + - 'data: {"content_index":0,"delta":"_job","item_id":"msg_9f8f443f3d36c276","output_index":0,"sequence_number":19,"type":"response.reasoning_text.delta"} + + ' + - ' + + ' + - 'event: response.reasoning_text.delta + + ' + - 'data: {"content_index":0,"delta":"_status","item_id":"msg_9f8f443f3d36c276","output_index":0,"sequence_number":20,"type":"response.reasoning_text.delta"} + + ' + - ' + + ' + - 'event: response.reasoning_text.delta + + ' + - 'data: {"content_index":0,"delta":".","item_id":"msg_9f8f443f3d36c276","output_index":0,"sequence_number":21,"type":"response.reasoning_text.delta"} + + ' + - ' + + ' + - 'event: response.reasoning_text.done + + ' + - 'data: {"content_index":0,"item_id":"msg_9f8f443f3d36c276","output_index":0,"sequence_number":22,"text":"We + need to query pipeline orchestrator for job-382. Use functions.get_job_status.","type":"response.reasoning_text.done"} + + ' + - ' + + ' + - 'event: response.reasoning_part.done + + ' + - 'data: {"content_index":0,"item_id":"msg_9f8f443f3d36c276","output_index":0,"part":{"text":"We + need to query pipeline orchestrator for job-382. Use functions.get_job_status.","type":"reasoning_text"},"sequence_number":23,"type":"response.reasoning_part.done"} + + ' + - ' + + ' + - 'event: response.output_item.done + + ' + - 'data: {"item":{"id":"msg_9f8f443f3d36c276","summary":[],"type":"reasoning","content":[{"text":"We + need to query pipeline orchestrator for job-382. Use functions.get_job_status.","type":"reasoning_text"}],"encrypted_content":null,"status":"completed"},"output_index":0,"sequence_number":24,"type":"response.output_item.done"} + + ' + - ' + + ' + - 'event: response.output_item.added + + ' + - 'data: {"item":{"arguments":"","call_id":"call_bfdbcf3562684b32","name":"get_job_status","type":"function_call","id":"fc_a7e910a365eca0be","namespace":null,"status":"in_progress"},"output_index":1,"sequence_number":25,"type":"response.output_item.added"} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"delta":"{\"","item_id":"fc_a7e910a365eca0be","output_index":1,"sequence_number":26,"type":"response.function_call_arguments.delta"} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"delta":"job","item_id":"fc_a7e910a365eca0be","output_index":1,"sequence_number":27,"type":"response.function_call_arguments.delta"} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"delta":"_id","item_id":"fc_a7e910a365eca0be","output_index":1,"sequence_number":28,"type":"response.function_call_arguments.delta"} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"delta":"\":\"","item_id":"fc_a7e910a365eca0be","output_index":1,"sequence_number":29,"type":"response.function_call_arguments.delta"} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"delta":"job","item_id":"fc_a7e910a365eca0be","output_index":1,"sequence_number":30,"type":"response.function_call_arguments.delta"} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"delta":"-","item_id":"fc_a7e910a365eca0be","output_index":1,"sequence_number":31,"type":"response.function_call_arguments.delta"} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"delta":"382","item_id":"fc_a7e910a365eca0be","output_index":1,"sequence_number":32,"type":"response.function_call_arguments.delta"} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"delta":"\"}","item_id":"fc_a7e910a365eca0be","output_index":1,"sequence_number":33,"type":"response.function_call_arguments.delta"} + + ' + - ' + + ' + - 'event: response.function_call_arguments.done + + ' + - 'data: {"arguments":"{\"job_id\":\"job-382\"}","item_id":"fc_a7e910a365eca0be","name":"get_job_status","output_index":1,"sequence_number":34,"type":"response.function_call_arguments.done"} + + ' + - ' + + ' + - 'event: response.output_item.done + + ' + - 'data: {"item":{"arguments":"{\"job_id\":\"job-382\"}","call_id":"call_bfdbcf3562684b32","name":"get_job_status","type":"function_call","id":null,"namespace":null,"status":"completed","item_id":"fc_a7e910a365eca0be","output_index":1,"sequence_number":-1},"output_index":1,"sequence_number":35,"type":"response.output_item.done"} + + ' + - ' + + ' + - 'event: response.completed + + ' + - 'data: {"response":{"id":"resp_84fc22eb02e37767","created_at":1782285919,"incomplete_details":null,"instructions":null,"metadata":null,"model":"openai/gpt-oss-20b","object":"response","output":[{"id":"rs_b20f9cf797c0fb6d","summary":[],"type":"reasoning","content":[{"text":"We + need to query pipeline orchestrator for job-382. Use functions.get_job_status.","type":"reasoning_text"}],"encrypted_content":null,"status":null},{"arguments":"{\"job_id\":\"job-382\"}","call_id":"call_814e8f02d1cc07ae","name":"get_job_status","type":"function_call","id":"fc_814e8f02d1cc07ae","namespace":null,"status":null}],"parallel_tool_calls":true,"temperature":1.0,"tool_choice":"auto","tools":[{"name":"get_job_status","parameters":{"type":"object","properties":{"job_id":{"type":"string","description":"Pipeline + job identifier, e.g. job-382"}},"required":["job_id"],"additionalProperties":false},"strict":true,"type":"function","defer_loading":null,"description":"Query + the pipeline orchestrator for a job''s current state, metadata, and failure + info."},{"name":"get_error_logs","parameters":{"type":"object","properties":{"job_id":{"type":"string","description":"Pipeline + job identifier"},"severity":{"type":"string","enum":["WARN","ERROR","FATAL","ALL"],"description":"Minimum + severity filter"},"max_entries":{"type":"integer","description":"Maximum log + entries to return (default 50)"}},"required":["job_id","severity","max_entries"],"additionalProperties":false},"strict":true,"type":"function","defer_loading":null,"description":"Fetch + error logs and stack traces for a failed pipeline job."},{"name":"search_runbook","parameters":{"type":"object","properties":{"query":{"type":"string","description":"Natural + language search query"},"max_results":{"type":"integer","description":"Maximum + results to return (default 5)"}},"required":["query","max_results"],"additionalProperties":false},"strict":true,"type":"function","defer_loading":null,"description":"Search + internal operations runbooks for troubleshooting procedures."},{"name":"run_analysis","parameters":{"type":"object","properties":{"code":{"type":"string","description":"Python + code to execute"},"timeout_seconds":{"type":"integer","description":"Execution + timeout (default 30)"}},"required":["code","timeout_seconds"],"additionalProperties":false},"strict":true,"type":"function","defer_loading":null,"description":"Execute + a Python analysis script in a sandboxed environment for log parsing or data + inspection."},{"name":"restart_job","parameters":{"type":"object","properties":{"job_id":{"type":"string","description":"Pipeline + job identifier to restart"},"memory_override_gb":{"type":"integer","description":"Override + memory limit in GB"},"skip_completed_stages":{"type":"boolean","description":"Skip + stages that completed successfully (default true)"},"priority":{"type":"string","enum":["low","normal","high"],"description":"Queue + priority"}},"required":["job_id","memory_override_gb","skip_completed_stages","priority"],"additionalProperties":false},"strict":true,"type":"function","defer_loading":null,"description":"Restart + a failed pipeline job with optional configuration overrides."},{"name":"web_search","parameters":{"type":"object","properties":{"query":{"type":"string","description":"Search + query"}},"required":["query"],"additionalProperties":false},"strict":true,"type":"function","defer_loading":null,"description":"Search + the public web for technical documentation, blog posts, and known issues."}],"top_p":1.0,"background":false,"max_output_tokens":3663,"max_tool_calls":null,"previous_response_id":null,"prompt":null,"reasoning":null,"service_tier":"auto","status":"completed","text":null,"top_logprobs":null,"truncation":"disabled","usage":{"input_tokens":433,"input_tokens_details":{"cached_tokens":432,"input_tokens_per_turn":[433],"cached_tokens_per_turn":[432]},"output_tokens":46,"output_tokens_details":{"reasoning_tokens":28,"tool_output_tokens":0,"output_tokens_per_turn":[46],"tool_output_tokens_per_turn":[0]},"total_tokens":479},"user":null,"presence_penalty":0.0,"frequency_penalty":0.0,"kv_transfer_params":null,"input_messages":null,"output_messages":null},"sequence_number":36,"type":"response.completed"} + + ' + - ' + + ' + status_code: 200 +- filename: t2 + request: + body: + input: Get the error logs for that job with severity FATAL and max 5 entries. + model: openai/gpt-oss-20b + previous_response_id: resp_84fc22eb02e37767 + store: true + stream: true + tool_choice: auto + tools: + - description: Query the pipeline orchestrator for a job's current state, metadata, + and failure info. + name: get_job_status + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier, e.g. job-382 + type: string + required: + - job_id + type: object + strict: true + type: function + - description: Fetch error logs and stack traces for a failed pipeline job. + name: get_error_logs + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier + type: string + max_entries: + description: Maximum log entries to return (default 50) + type: integer + severity: + description: Minimum severity filter + enum: + - WARN + - ERROR + - FATAL + - ALL + type: string + required: + - job_id + - severity + - max_entries + type: object + strict: true + type: function + - description: Search internal operations runbooks for troubleshooting procedures. + name: search_runbook + parameters: + additionalProperties: false + properties: + max_results: + description: Maximum results to return (default 5) + type: integer + query: + description: Natural language search query + type: string + required: + - query + - max_results + type: object + strict: true + type: function + - description: Execute a Python analysis script in a sandboxed environment for + log parsing or data inspection. + name: run_analysis + parameters: + additionalProperties: false + properties: + code: + description: Python code to execute + type: string + timeout_seconds: + description: Execution timeout (default 30) + type: integer + required: + - code + - timeout_seconds + type: object + strict: true + type: function + - description: Restart a failed pipeline job with optional configuration overrides. + name: restart_job + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier to restart + type: string + memory_override_gb: + description: Override memory limit in GB + type: integer + priority: + description: Queue priority + enum: + - low + - normal + - high + type: string + skip_completed_stages: + description: Skip stages that completed successfully (default true) + type: boolean + required: + - job_id + - memory_override_gb + - skip_completed_stages + - priority + type: object + strict: true + type: function + - description: Search the public web for technical documentation, blog posts, + and known issues. + name: web_search + parameters: + additionalProperties: false + properties: + query: + description: Search query + type: string + required: + - query + type: object + strict: true + type: function + headers: + accept: '*/*' + content-type: application/json + user-agent: python-httpx/0.28.1 + method: POST + path: /v1/responses + query_params: {} + response: + headers: + content-type: text/event-stream; charset=utf-8 + sse: + - 'event: response.created + + ' + - 'data: {"response":{"id":"resp_bc7651309b00838d","created_at":1782285919,"incomplete_details":null,"instructions":null,"metadata":null,"model":"openai/gpt-oss-20b","object":"response","output":[],"parallel_tool_calls":true,"temperature":1.0,"tool_choice":"auto","tools":[{"name":"get_job_status","parameters":{"type":"object","properties":{"job_id":{"type":"string","description":"Pipeline + job identifier, e.g. job-382"}},"required":["job_id"],"additionalProperties":false},"strict":true,"type":"function","defer_loading":null,"description":"Query + the pipeline orchestrator for a job''s current state, metadata, and failure + info."},{"name":"get_error_logs","parameters":{"type":"object","properties":{"job_id":{"type":"string","description":"Pipeline + job identifier"},"severity":{"type":"string","enum":["WARN","ERROR","FATAL","ALL"],"description":"Minimum + severity filter"},"max_entries":{"type":"integer","description":"Maximum log + entries to return (default 50)"}},"required":["job_id","severity","max_entries"],"additionalProperties":false},"strict":true,"type":"function","defer_loading":null,"description":"Fetch + error logs and stack traces for a failed pipeline job."},{"name":"search_runbook","parameters":{"type":"object","properties":{"query":{"type":"string","description":"Natural + language search query"},"max_results":{"type":"integer","description":"Maximum + results to return (default 5)"}},"required":["query","max_results"],"additionalProperties":false},"strict":true,"type":"function","defer_loading":null,"description":"Search + internal operations runbooks for troubleshooting procedures."},{"name":"run_analysis","parameters":{"type":"object","properties":{"code":{"type":"string","description":"Python + code to execute"},"timeout_seconds":{"type":"integer","description":"Execution + timeout (default 30)"}},"required":["code","timeout_seconds"],"additionalProperties":false},"strict":true,"type":"function","defer_loading":null,"description":"Execute + a Python analysis script in a sandboxed environment for log parsing or data + inspection."},{"name":"restart_job","parameters":{"type":"object","properties":{"job_id":{"type":"string","description":"Pipeline + job identifier to restart"},"memory_override_gb":{"type":"integer","description":"Override + memory limit in GB"},"skip_completed_stages":{"type":"boolean","description":"Skip + stages that completed successfully (default true)"},"priority":{"type":"string","enum":["low","normal","high"],"description":"Queue + priority"}},"required":["job_id","memory_override_gb","skip_completed_stages","priority"],"additionalProperties":false},"strict":true,"type":"function","defer_loading":null,"description":"Restart + a failed pipeline job with optional configuration overrides."},{"name":"web_search","parameters":{"type":"object","properties":{"query":{"type":"string","description":"Search + query"}},"required":["query"],"additionalProperties":false},"strict":true,"type":"function","defer_loading":null,"description":"Search + the public web for technical documentation, blog posts, and known issues."}],"top_p":1.0,"background":false,"max_output_tokens":3594,"max_tool_calls":null,"previous_response_id":"resp_84fc22eb02e37767","prompt":null,"reasoning":null,"service_tier":"auto","status":"in_progress","text":null,"top_logprobs":null,"truncation":"disabled","usage":null,"user":null,"presence_penalty":0.0,"frequency_penalty":0.0,"kv_transfer_params":null,"input_messages":null,"output_messages":null},"sequence_number":0,"type":"response.created"} + + ' + - ' + + ' + - 'event: response.in_progress + + ' + - 'data: {"response":{"id":"resp_bc7651309b00838d","created_at":1782285919,"incomplete_details":null,"instructions":null,"metadata":null,"model":"openai/gpt-oss-20b","object":"response","output":[],"parallel_tool_calls":true,"temperature":1.0,"tool_choice":"auto","tools":[{"name":"get_job_status","parameters":{"type":"object","properties":{"job_id":{"type":"string","description":"Pipeline + job identifier, e.g. job-382"}},"required":["job_id"],"additionalProperties":false},"strict":true,"type":"function","defer_loading":null,"description":"Query + the pipeline orchestrator for a job''s current state, metadata, and failure + info."},{"name":"get_error_logs","parameters":{"type":"object","properties":{"job_id":{"type":"string","description":"Pipeline + job identifier"},"severity":{"type":"string","enum":["WARN","ERROR","FATAL","ALL"],"description":"Minimum + severity filter"},"max_entries":{"type":"integer","description":"Maximum log + entries to return (default 50)"}},"required":["job_id","severity","max_entries"],"additionalProperties":false},"strict":true,"type":"function","defer_loading":null,"description":"Fetch + error logs and stack traces for a failed pipeline job."},{"name":"search_runbook","parameters":{"type":"object","properties":{"query":{"type":"string","description":"Natural + language search query"},"max_results":{"type":"integer","description":"Maximum + results to return (default 5)"}},"required":["query","max_results"],"additionalProperties":false},"strict":true,"type":"function","defer_loading":null,"description":"Search + internal operations runbooks for troubleshooting procedures."},{"name":"run_analysis","parameters":{"type":"object","properties":{"code":{"type":"string","description":"Python + code to execute"},"timeout_seconds":{"type":"integer","description":"Execution + timeout (default 30)"}},"required":["code","timeout_seconds"],"additionalProperties":false},"strict":true,"type":"function","defer_loading":null,"description":"Execute + a Python analysis script in a sandboxed environment for log parsing or data + inspection."},{"name":"restart_job","parameters":{"type":"object","properties":{"job_id":{"type":"string","description":"Pipeline + job identifier to restart"},"memory_override_gb":{"type":"integer","description":"Override + memory limit in GB"},"skip_completed_stages":{"type":"boolean","description":"Skip + stages that completed successfully (default true)"},"priority":{"type":"string","enum":["low","normal","high"],"description":"Queue + priority"}},"required":["job_id","memory_override_gb","skip_completed_stages","priority"],"additionalProperties":false},"strict":true,"type":"function","defer_loading":null,"description":"Restart + a failed pipeline job with optional configuration overrides."},{"name":"web_search","parameters":{"type":"object","properties":{"query":{"type":"string","description":"Search + query"}},"required":["query"],"additionalProperties":false},"strict":true,"type":"function","defer_loading":null,"description":"Search + the public web for technical documentation, blog posts, and known issues."}],"top_p":1.0,"background":false,"max_output_tokens":3594,"max_tool_calls":null,"previous_response_id":"resp_84fc22eb02e37767","prompt":null,"reasoning":null,"service_tier":"auto","status":"in_progress","text":null,"top_logprobs":null,"truncation":"disabled","usage":null,"user":null,"presence_penalty":0.0,"frequency_penalty":0.0,"kv_transfer_params":null,"input_messages":null,"output_messages":null},"sequence_number":1,"type":"response.in_progress"} + + ' + - ' + + ' + - 'event: response.output_item.added + + ' + - 'data: {"item":{"id":"msg_a4d66d1800b6b304","summary":[],"type":"reasoning","content":null,"encrypted_content":null,"status":"in_progress"},"output_index":0,"sequence_number":2,"type":"response.output_item.added"} + + ' + - ' + + ' + - 'event: response.reasoning_part.added + + ' + - 'data: {"content_index":0,"item_id":"msg_a4d66d1800b6b304","output_index":0,"part":{"text":"","type":"reasoning_text"},"sequence_number":3,"type":"response.reasoning_part.added"} + + ' + - ' + + ' + - 'event: response.reasoning_text.delta + + ' + - 'data: {"content_index":0,"delta":"Call","item_id":"msg_a4d66d1800b6b304","output_index":0,"sequence_number":4,"type":"response.reasoning_text.delta"} + + ' + - ' + + ' + - 'event: response.reasoning_text.delta + + ' + - 'data: {"content_index":0,"delta":" get","item_id":"msg_a4d66d1800b6b304","output_index":0,"sequence_number":5,"type":"response.reasoning_text.delta"} + + ' + - ' + + ' + - 'event: response.reasoning_text.delta + + ' + - 'data: {"content_index":0,"delta":"_error","item_id":"msg_a4d66d1800b6b304","output_index":0,"sequence_number":6,"type":"response.reasoning_text.delta"} + + ' + - ' + + ' + - 'event: response.reasoning_text.delta + + ' + - 'data: {"content_index":0,"delta":"_logs","item_id":"msg_a4d66d1800b6b304","output_index":0,"sequence_number":7,"type":"response.reasoning_text.delta"} + + ' + - ' + + ' + - 'event: response.reasoning_text.delta + + ' + - 'data: {"content_index":0,"delta":".","item_id":"msg_a4d66d1800b6b304","output_index":0,"sequence_number":8,"type":"response.reasoning_text.delta"} + + ' + - ' + + ' + - 'event: response.reasoning_text.done + + ' + - 'data: {"content_index":0,"item_id":"msg_a4d66d1800b6b304","output_index":0,"sequence_number":9,"text":"Call + get_error_logs.","type":"response.reasoning_text.done"} + + ' + - ' + + ' + - 'event: response.reasoning_part.done + + ' + - 'data: {"content_index":0,"item_id":"msg_a4d66d1800b6b304","output_index":0,"part":{"text":"Call + get_error_logs.","type":"reasoning_text"},"sequence_number":10,"type":"response.reasoning_part.done"} + + ' + - ' + + ' + - 'event: response.output_item.done + + ' + - 'data: {"item":{"id":"msg_a4d66d1800b6b304","summary":[],"type":"reasoning","content":[{"text":"Call + get_error_logs.","type":"reasoning_text"}],"encrypted_content":null,"status":"completed"},"output_index":0,"sequence_number":11,"type":"response.output_item.done"} + + ' + - ' + + ' + - 'event: response.output_item.added + + ' + - 'data: {"item":{"arguments":"","call_id":"call_a7b9ccec2aceeeac","name":"get_error_logs","type":"function_call","id":"fc_bd0b870ba5f84b7f","namespace":null,"status":"in_progress"},"output_index":1,"sequence_number":12,"type":"response.output_item.added"} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"delta":"{\"","item_id":"fc_bd0b870ba5f84b7f","output_index":1,"sequence_number":13,"type":"response.function_call_arguments.delta"} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"delta":"job","item_id":"fc_bd0b870ba5f84b7f","output_index":1,"sequence_number":14,"type":"response.function_call_arguments.delta"} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"delta":"_id","item_id":"fc_bd0b870ba5f84b7f","output_index":1,"sequence_number":15,"type":"response.function_call_arguments.delta"} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"delta":"\":\"","item_id":"fc_bd0b870ba5f84b7f","output_index":1,"sequence_number":16,"type":"response.function_call_arguments.delta"} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"delta":"job","item_id":"fc_bd0b870ba5f84b7f","output_index":1,"sequence_number":17,"type":"response.function_call_arguments.delta"} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"delta":"-","item_id":"fc_bd0b870ba5f84b7f","output_index":1,"sequence_number":18,"type":"response.function_call_arguments.delta"} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"delta":"382","item_id":"fc_bd0b870ba5f84b7f","output_index":1,"sequence_number":19,"type":"response.function_call_arguments.delta"} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"delta":"\",\"","item_id":"fc_bd0b870ba5f84b7f","output_index":1,"sequence_number":20,"type":"response.function_call_arguments.delta"} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"delta":"severity","item_id":"fc_bd0b870ba5f84b7f","output_index":1,"sequence_number":21,"type":"response.function_call_arguments.delta"} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"delta":"\":\"","item_id":"fc_bd0b870ba5f84b7f","output_index":1,"sequence_number":22,"type":"response.function_call_arguments.delta"} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"delta":"F","item_id":"fc_bd0b870ba5f84b7f","output_index":1,"sequence_number":23,"type":"response.function_call_arguments.delta"} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"delta":"ATAL","item_id":"fc_bd0b870ba5f84b7f","output_index":1,"sequence_number":24,"type":"response.function_call_arguments.delta"} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"delta":"\",\"","item_id":"fc_bd0b870ba5f84b7f","output_index":1,"sequence_number":25,"type":"response.function_call_arguments.delta"} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"delta":"max","item_id":"fc_bd0b870ba5f84b7f","output_index":1,"sequence_number":26,"type":"response.function_call_arguments.delta"} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"delta":"_entries","item_id":"fc_bd0b870ba5f84b7f","output_index":1,"sequence_number":27,"type":"response.function_call_arguments.delta"} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"delta":"\":","item_id":"fc_bd0b870ba5f84b7f","output_index":1,"sequence_number":28,"type":"response.function_call_arguments.delta"} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"delta":"5","item_id":"fc_bd0b870ba5f84b7f","output_index":1,"sequence_number":29,"type":"response.function_call_arguments.delta"} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"delta":"}","item_id":"fc_bd0b870ba5f84b7f","output_index":1,"sequence_number":30,"type":"response.function_call_arguments.delta"} + + ' + - ' + + ' + - 'event: response.function_call_arguments.done + + ' + - 'data: {"arguments":"{\"job_id\":\"job-382\",\"severity\":\"FATAL\",\"max_entries\":5}","item_id":"fc_bd0b870ba5f84b7f","name":"get_error_logs","output_index":1,"sequence_number":31,"type":"response.function_call_arguments.done"} + + ' + - ' + + ' + - 'event: response.output_item.done + + ' + - 'data: {"item":{"arguments":"{\"job_id\":\"job-382\",\"severity\":\"FATAL\",\"max_entries\":5}","call_id":"call_a7b9ccec2aceeeac","name":"get_error_logs","type":"function_call","id":null,"namespace":null,"status":"completed","item_id":"fc_bd0b870ba5f84b7f","output_index":1,"sequence_number":-1},"output_index":1,"sequence_number":32,"type":"response.output_item.done"} + + ' + - ' + + ' + - 'event: response.completed + + ' + - 'data: {"response":{"id":"resp_bc7651309b00838d","created_at":1782285919,"incomplete_details":null,"instructions":null,"metadata":null,"model":"openai/gpt-oss-20b","object":"response","output":[{"id":"rs_811e7867685d357b","summary":[],"type":"reasoning","content":[{"text":"Call + get_error_logs.","type":"reasoning_text"}],"encrypted_content":null,"status":null},{"arguments":"{\"job_id\":\"job-382\",\"severity\":\"FATAL\",\"max_entries\":5}","call_id":"call_b7430f4eccc1208e","name":"get_error_logs","type":"function_call","id":"fc_b7430f4eccc1208e","namespace":null,"status":null}],"parallel_tool_calls":true,"temperature":1.0,"tool_choice":"auto","tools":[{"name":"get_job_status","parameters":{"type":"object","properties":{"job_id":{"type":"string","description":"Pipeline + job identifier, e.g. job-382"}},"required":["job_id"],"additionalProperties":false},"strict":true,"type":"function","defer_loading":null,"description":"Query + the pipeline orchestrator for a job''s current state, metadata, and failure + info."},{"name":"get_error_logs","parameters":{"type":"object","properties":{"job_id":{"type":"string","description":"Pipeline + job identifier"},"severity":{"type":"string","enum":["WARN","ERROR","FATAL","ALL"],"description":"Minimum + severity filter"},"max_entries":{"type":"integer","description":"Maximum log + entries to return (default 50)"}},"required":["job_id","severity","max_entries"],"additionalProperties":false},"strict":true,"type":"function","defer_loading":null,"description":"Fetch + error logs and stack traces for a failed pipeline job."},{"name":"search_runbook","parameters":{"type":"object","properties":{"query":{"type":"string","description":"Natural + language search query"},"max_results":{"type":"integer","description":"Maximum + results to return (default 5)"}},"required":["query","max_results"],"additionalProperties":false},"strict":true,"type":"function","defer_loading":null,"description":"Search + internal operations runbooks for troubleshooting procedures."},{"name":"run_analysis","parameters":{"type":"object","properties":{"code":{"type":"string","description":"Python + code to execute"},"timeout_seconds":{"type":"integer","description":"Execution + timeout (default 30)"}},"required":["code","timeout_seconds"],"additionalProperties":false},"strict":true,"type":"function","defer_loading":null,"description":"Execute + a Python analysis script in a sandboxed environment for log parsing or data + inspection."},{"name":"restart_job","parameters":{"type":"object","properties":{"job_id":{"type":"string","description":"Pipeline + job identifier to restart"},"memory_override_gb":{"type":"integer","description":"Override + memory limit in GB"},"skip_completed_stages":{"type":"boolean","description":"Skip + stages that completed successfully (default true)"},"priority":{"type":"string","enum":["low","normal","high"],"description":"Queue + priority"}},"required":["job_id","memory_override_gb","skip_completed_stages","priority"],"additionalProperties":false},"strict":true,"type":"function","defer_loading":null,"description":"Restart + a failed pipeline job with optional configuration overrides."},{"name":"web_search","parameters":{"type":"object","properties":{"query":{"type":"string","description":"Search + query"}},"required":["query"],"additionalProperties":false},"strict":true,"type":"function","defer_loading":null,"description":"Search + the public web for technical documentation, blog posts, and known issues."}],"top_p":1.0,"background":false,"max_output_tokens":3594,"max_tool_calls":null,"previous_response_id":"resp_84fc22eb02e37767","prompt":null,"reasoning":null,"service_tier":"auto","status":"completed","text":null,"top_logprobs":null,"truncation":"disabled","usage":{"input_tokens":502,"input_tokens_details":{"cached_tokens":448,"input_tokens_per_turn":[502],"cached_tokens_per_turn":[448]},"output_tokens":43,"output_tokens_details":{"reasoning_tokens":25,"tool_output_tokens":0,"output_tokens_per_turn":[43],"tool_output_tokens_per_turn":[0]},"total_tokens":545},"user":null,"presence_penalty":0.0,"frequency_penalty":0.0,"kv_transfer_params":null,"input_messages":null,"output_messages":null},"sequence_number":33,"type":"response.completed"} + + ' + - ' + + ' + status_code: 200 +- filename: t3 + request: + body: + input: Search the web for how to fix that type of error in Spark pipelines. + model: openai/gpt-oss-20b + previous_response_id: resp_bc7651309b00838d + store: true + stream: true + tool_choice: auto + tools: + - description: Query the pipeline orchestrator for a job's current state, metadata, + and failure info. + name: get_job_status + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier, e.g. job-382 + type: string + required: + - job_id + type: object + strict: true + type: function + - description: Fetch error logs and stack traces for a failed pipeline job. + name: get_error_logs + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier + type: string + max_entries: + description: Maximum log entries to return (default 50) + type: integer + severity: + description: Minimum severity filter + enum: + - WARN + - ERROR + - FATAL + - ALL + type: string + required: + - job_id + - severity + - max_entries + type: object + strict: true + type: function + - description: Search internal operations runbooks for troubleshooting procedures. + name: search_runbook + parameters: + additionalProperties: false + properties: + max_results: + description: Maximum results to return (default 5) + type: integer + query: + description: Natural language search query + type: string + required: + - query + - max_results + type: object + strict: true + type: function + - description: Execute a Python analysis script in a sandboxed environment for + log parsing or data inspection. + name: run_analysis + parameters: + additionalProperties: false + properties: + code: + description: Python code to execute + type: string + timeout_seconds: + description: Execution timeout (default 30) + type: integer + required: + - code + - timeout_seconds + type: object + strict: true + type: function + - description: Restart a failed pipeline job with optional configuration overrides. + name: restart_job + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier to restart + type: string + memory_override_gb: + description: Override memory limit in GB + type: integer + priority: + description: Queue priority + enum: + - low + - normal + - high + type: string + skip_completed_stages: + description: Skip stages that completed successfully (default true) + type: boolean + required: + - job_id + - memory_override_gb + - skip_completed_stages + - priority + type: object + strict: true + type: function + - description: Search the public web for technical documentation, blog posts, + and known issues. + name: web_search + parameters: + additionalProperties: false + properties: + query: + description: Search query + type: string + required: + - query + type: object + strict: true + type: function + headers: + accept: '*/*' + content-type: application/json + user-agent: python-httpx/0.28.1 + method: POST + path: /v1/responses + query_params: {} + response: + headers: + content-type: text/event-stream; charset=utf-8 + sse: + - 'event: response.created + + ' + - 'data: {"response":{"id":"resp_94ce25968dc3b4f4","created_at":1782285920,"incomplete_details":null,"instructions":null,"metadata":null,"model":"openai/gpt-oss-20b","object":"response","output":[],"parallel_tool_calls":true,"temperature":1.0,"tool_choice":"auto","tools":[{"name":"get_job_status","parameters":{"type":"object","properties":{"job_id":{"type":"string","description":"Pipeline + job identifier, e.g. job-382"}},"required":["job_id"],"additionalProperties":false},"strict":true,"type":"function","defer_loading":null,"description":"Query + the pipeline orchestrator for a job''s current state, metadata, and failure + info."},{"name":"get_error_logs","parameters":{"type":"object","properties":{"job_id":{"type":"string","description":"Pipeline + job identifier"},"severity":{"type":"string","enum":["WARN","ERROR","FATAL","ALL"],"description":"Minimum + severity filter"},"max_entries":{"type":"integer","description":"Maximum log + entries to return (default 50)"}},"required":["job_id","severity","max_entries"],"additionalProperties":false},"strict":true,"type":"function","defer_loading":null,"description":"Fetch + error logs and stack traces for a failed pipeline job."},{"name":"search_runbook","parameters":{"type":"object","properties":{"query":{"type":"string","description":"Natural + language search query"},"max_results":{"type":"integer","description":"Maximum + results to return (default 5)"}},"required":["query","max_results"],"additionalProperties":false},"strict":true,"type":"function","defer_loading":null,"description":"Search + internal operations runbooks for troubleshooting procedures."},{"name":"run_analysis","parameters":{"type":"object","properties":{"code":{"type":"string","description":"Python + code to execute"},"timeout_seconds":{"type":"integer","description":"Execution + timeout (default 30)"}},"required":["code","timeout_seconds"],"additionalProperties":false},"strict":true,"type":"function","defer_loading":null,"description":"Execute + a Python analysis script in a sandboxed environment for log parsing or data + inspection."},{"name":"restart_job","parameters":{"type":"object","properties":{"job_id":{"type":"string","description":"Pipeline + job identifier to restart"},"memory_override_gb":{"type":"integer","description":"Override + memory limit in GB"},"skip_completed_stages":{"type":"boolean","description":"Skip + stages that completed successfully (default true)"},"priority":{"type":"string","enum":["low","normal","high"],"description":"Queue + priority"}},"required":["job_id","memory_override_gb","skip_completed_stages","priority"],"additionalProperties":false},"strict":true,"type":"function","defer_loading":null,"description":"Restart + a failed pipeline job with optional configuration overrides."},{"name":"web_search","parameters":{"type":"object","properties":{"query":{"type":"string","description":"Search + query"}},"required":["query"],"additionalProperties":false},"strict":true,"type":"function","defer_loading":null,"description":"Search + the public web for technical documentation, blog posts, and known issues."}],"top_p":1.0,"background":false,"max_output_tokens":3530,"max_tool_calls":null,"previous_response_id":"resp_bc7651309b00838d","prompt":null,"reasoning":null,"service_tier":"auto","status":"in_progress","text":null,"top_logprobs":null,"truncation":"disabled","usage":null,"user":null,"presence_penalty":0.0,"frequency_penalty":0.0,"kv_transfer_params":null,"input_messages":null,"output_messages":null},"sequence_number":0,"type":"response.created"} + + ' + - ' + + ' + - 'event: response.in_progress + + ' + - 'data: {"response":{"id":"resp_94ce25968dc3b4f4","created_at":1782285920,"incomplete_details":null,"instructions":null,"metadata":null,"model":"openai/gpt-oss-20b","object":"response","output":[],"parallel_tool_calls":true,"temperature":1.0,"tool_choice":"auto","tools":[{"name":"get_job_status","parameters":{"type":"object","properties":{"job_id":{"type":"string","description":"Pipeline + job identifier, e.g. job-382"}},"required":["job_id"],"additionalProperties":false},"strict":true,"type":"function","defer_loading":null,"description":"Query + the pipeline orchestrator for a job''s current state, metadata, and failure + info."},{"name":"get_error_logs","parameters":{"type":"object","properties":{"job_id":{"type":"string","description":"Pipeline + job identifier"},"severity":{"type":"string","enum":["WARN","ERROR","FATAL","ALL"],"description":"Minimum + severity filter"},"max_entries":{"type":"integer","description":"Maximum log + entries to return (default 50)"}},"required":["job_id","severity","max_entries"],"additionalProperties":false},"strict":true,"type":"function","defer_loading":null,"description":"Fetch + error logs and stack traces for a failed pipeline job."},{"name":"search_runbook","parameters":{"type":"object","properties":{"query":{"type":"string","description":"Natural + language search query"},"max_results":{"type":"integer","description":"Maximum + results to return (default 5)"}},"required":["query","max_results"],"additionalProperties":false},"strict":true,"type":"function","defer_loading":null,"description":"Search + internal operations runbooks for troubleshooting procedures."},{"name":"run_analysis","parameters":{"type":"object","properties":{"code":{"type":"string","description":"Python + code to execute"},"timeout_seconds":{"type":"integer","description":"Execution + timeout (default 30)"}},"required":["code","timeout_seconds"],"additionalProperties":false},"strict":true,"type":"function","defer_loading":null,"description":"Execute + a Python analysis script in a sandboxed environment for log parsing or data + inspection."},{"name":"restart_job","parameters":{"type":"object","properties":{"job_id":{"type":"string","description":"Pipeline + job identifier to restart"},"memory_override_gb":{"type":"integer","description":"Override + memory limit in GB"},"skip_completed_stages":{"type":"boolean","description":"Skip + stages that completed successfully (default true)"},"priority":{"type":"string","enum":["low","normal","high"],"description":"Queue + priority"}},"required":["job_id","memory_override_gb","skip_completed_stages","priority"],"additionalProperties":false},"strict":true,"type":"function","defer_loading":null,"description":"Restart + a failed pipeline job with optional configuration overrides."},{"name":"web_search","parameters":{"type":"object","properties":{"query":{"type":"string","description":"Search + query"}},"required":["query"],"additionalProperties":false},"strict":true,"type":"function","defer_loading":null,"description":"Search + the public web for technical documentation, blog posts, and known issues."}],"top_p":1.0,"background":false,"max_output_tokens":3530,"max_tool_calls":null,"previous_response_id":"resp_bc7651309b00838d","prompt":null,"reasoning":null,"service_tier":"auto","status":"in_progress","text":null,"top_logprobs":null,"truncation":"disabled","usage":null,"user":null,"presence_penalty":0.0,"frequency_penalty":0.0,"kv_transfer_params":null,"input_messages":null,"output_messages":null},"sequence_number":1,"type":"response.in_progress"} + + ' + - ' + + ' + - 'event: response.output_item.added + + ' + - 'data: {"item":{"id":"msg_b211473eff0066b4","summary":[],"type":"reasoning","content":null,"encrypted_content":null,"status":"in_progress"},"output_index":0,"sequence_number":2,"type":"response.output_item.added"} + + ' + - ' + + ' + - 'event: response.reasoning_part.added + + ' + - 'data: {"content_index":0,"item_id":"msg_b211473eff0066b4","output_index":0,"part":{"text":"","type":"reasoning_text"},"sequence_number":3,"type":"response.reasoning_part.added"} + + ' + - ' + + ' + - 'event: response.reasoning_text.delta + + ' + - 'data: {"content_index":0,"delta":"Use","item_id":"msg_b211473eff0066b4","output_index":0,"sequence_number":4,"type":"response.reasoning_text.delta"} + + ' + - ' + + ' + - 'event: response.reasoning_text.delta + + ' + - 'data: {"content_index":0,"delta":" web","item_id":"msg_b211473eff0066b4","output_index":0,"sequence_number":5,"type":"response.reasoning_text.delta"} + + ' + - ' + + ' + - 'event: response.reasoning_text.delta + + ' + - 'data: {"content_index":0,"delta":"_search","item_id":"msg_b211473eff0066b4","output_index":0,"sequence_number":6,"type":"response.reasoning_text.delta"} + + ' + - ' + + ' + - 'event: response.reasoning_text.delta + + ' + - 'data: {"content_index":0,"delta":".","item_id":"msg_b211473eff0066b4","output_index":0,"sequence_number":7,"type":"response.reasoning_text.delta"} + + ' + - ' + + ' + - 'event: response.reasoning_text.done + + ' + - 'data: {"content_index":0,"item_id":"msg_b211473eff0066b4","output_index":0,"sequence_number":8,"text":"Use + web_search.","type":"response.reasoning_text.done"} + + ' + - ' + + ' + - 'event: response.reasoning_part.done + + ' + - 'data: {"content_index":0,"item_id":"msg_b211473eff0066b4","output_index":0,"part":{"text":"Use + web_search.","type":"reasoning_text"},"sequence_number":9,"type":"response.reasoning_part.done"} + + ' + - ' + + ' + - 'event: response.output_item.done + + ' + - 'data: {"item":{"id":"msg_b211473eff0066b4","summary":[],"type":"reasoning","content":[{"text":"Use + web_search.","type":"reasoning_text"}],"encrypted_content":null,"status":"completed"},"output_index":0,"sequence_number":10,"type":"response.output_item.done"} + + ' + - ' + + ' + - 'event: response.output_item.added + + ' + - 'data: {"item":{"arguments":"","call_id":"call_a8f833f716bcbc25","name":"web_search","type":"function_call","id":"fc_a389865814a7198c","namespace":null,"status":"in_progress"},"output_index":1,"sequence_number":11,"type":"response.output_item.added"} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"delta":"{\"","item_id":"fc_a389865814a7198c","output_index":1,"sequence_number":12,"type":"response.function_call_arguments.delta"} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"delta":"query","item_id":"fc_a389865814a7198c","output_index":1,"sequence_number":13,"type":"response.function_call_arguments.delta"} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"delta":"\":\"","item_id":"fc_a389865814a7198c","output_index":1,"sequence_number":14,"type":"response.function_call_arguments.delta"} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"delta":"how","item_id":"fc_a389865814a7198c","output_index":1,"sequence_number":15,"type":"response.function_call_arguments.delta"} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"delta":" to","item_id":"fc_a389865814a7198c","output_index":1,"sequence_number":16,"type":"response.function_call_arguments.delta"} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"delta":" fix","item_id":"fc_a389865814a7198c","output_index":1,"sequence_number":17,"type":"response.function_call_arguments.delta"} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"delta":" F","item_id":"fc_a389865814a7198c","output_index":1,"sequence_number":18,"type":"response.function_call_arguments.delta"} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"delta":"ATAL","item_id":"fc_a389865814a7198c","output_index":1,"sequence_number":19,"type":"response.function_call_arguments.delta"} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"delta":" error","item_id":"fc_a389865814a7198c","output_index":1,"sequence_number":20,"type":"response.function_call_arguments.delta"} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"delta":" in","item_id":"fc_a389865814a7198c","output_index":1,"sequence_number":21,"type":"response.function_call_arguments.delta"} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"delta":" Spark","item_id":"fc_a389865814a7198c","output_index":1,"sequence_number":22,"type":"response.function_call_arguments.delta"} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"delta":" pipelines","item_id":"fc_a389865814a7198c","output_index":1,"sequence_number":23,"type":"response.function_call_arguments.delta"} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"delta":"\"}","item_id":"fc_a389865814a7198c","output_index":1,"sequence_number":24,"type":"response.function_call_arguments.delta"} + + ' + - ' + + ' + - 'event: response.function_call_arguments.done + + ' + - 'data: {"arguments":"{\"query\":\"how to fix FATAL error in Spark pipelines\"}","item_id":"fc_a389865814a7198c","name":"web_search","output_index":1,"sequence_number":25,"type":"response.function_call_arguments.done"} + + ' + - ' + + ' + - 'event: response.output_item.done + + ' + - 'data: {"item":{"arguments":"{\"query\":\"how to fix FATAL error in Spark pipelines\"}","call_id":"call_a8f833f716bcbc25","name":"web_search","type":"function_call","id":null,"namespace":null,"status":"completed","item_id":"fc_a389865814a7198c","output_index":1,"sequence_number":-1},"output_index":1,"sequence_number":26,"type":"response.output_item.done"} + + ' + - ' + + ' + - 'event: response.completed + + ' + - 'data: {"response":{"id":"resp_94ce25968dc3b4f4","created_at":1782285920,"incomplete_details":null,"instructions":null,"metadata":null,"model":"openai/gpt-oss-20b","object":"response","output":[{"id":"rs_a101c61e5eef4c23","summary":[],"type":"reasoning","content":[{"text":"Use + web_search.","type":"reasoning_text"}],"encrypted_content":null,"status":null},{"arguments":"{\"query\":\"how + to fix FATAL error in Spark pipelines\"}","call_id":"call_9abf8aa146f1da2b","name":"web_search","type":"function_call","id":"fc_9abf8aa146f1da2b","namespace":null,"status":null}],"parallel_tool_calls":true,"temperature":1.0,"tool_choice":"auto","tools":[{"name":"get_job_status","parameters":{"type":"object","properties":{"job_id":{"type":"string","description":"Pipeline + job identifier, e.g. job-382"}},"required":["job_id"],"additionalProperties":false},"strict":true,"type":"function","defer_loading":null,"description":"Query + the pipeline orchestrator for a job''s current state, metadata, and failure + info."},{"name":"get_error_logs","parameters":{"type":"object","properties":{"job_id":{"type":"string","description":"Pipeline + job identifier"},"severity":{"type":"string","enum":["WARN","ERROR","FATAL","ALL"],"description":"Minimum + severity filter"},"max_entries":{"type":"integer","description":"Maximum log + entries to return (default 50)"}},"required":["job_id","severity","max_entries"],"additionalProperties":false},"strict":true,"type":"function","defer_loading":null,"description":"Fetch + error logs and stack traces for a failed pipeline job."},{"name":"search_runbook","parameters":{"type":"object","properties":{"query":{"type":"string","description":"Natural + language search query"},"max_results":{"type":"integer","description":"Maximum + results to return (default 5)"}},"required":["query","max_results"],"additionalProperties":false},"strict":true,"type":"function","defer_loading":null,"description":"Search + internal operations runbooks for troubleshooting procedures."},{"name":"run_analysis","parameters":{"type":"object","properties":{"code":{"type":"string","description":"Python + code to execute"},"timeout_seconds":{"type":"integer","description":"Execution + timeout (default 30)"}},"required":["code","timeout_seconds"],"additionalProperties":false},"strict":true,"type":"function","defer_loading":null,"description":"Execute + a Python analysis script in a sandboxed environment for log parsing or data + inspection."},{"name":"restart_job","parameters":{"type":"object","properties":{"job_id":{"type":"string","description":"Pipeline + job identifier to restart"},"memory_override_gb":{"type":"integer","description":"Override + memory limit in GB"},"skip_completed_stages":{"type":"boolean","description":"Skip + stages that completed successfully (default true)"},"priority":{"type":"string","enum":["low","normal","high"],"description":"Queue + priority"}},"required":["job_id","memory_override_gb","skip_completed_stages","priority"],"additionalProperties":false},"strict":true,"type":"function","defer_loading":null,"description":"Restart + a failed pipeline job with optional configuration overrides."},{"name":"web_search","parameters":{"type":"object","properties":{"query":{"type":"string","description":"Search + query"}},"required":["query"],"additionalProperties":false},"strict":true,"type":"function","defer_loading":null,"description":"Search + the public web for technical documentation, blog posts, and known issues."}],"top_p":1.0,"background":false,"max_output_tokens":3530,"max_tool_calls":null,"previous_response_id":"resp_bc7651309b00838d","prompt":null,"reasoning":null,"service_tier":"auto","status":"completed","text":null,"top_logprobs":null,"truncation":"disabled","usage":{"input_tokens":566,"input_tokens_details":{"cached_tokens":512,"input_tokens_per_turn":[566],"cached_tokens_per_turn":[512]},"output_tokens":36,"output_tokens_details":{"reasoning_tokens":19,"tool_output_tokens":0,"output_tokens_per_turn":[36],"tool_output_tokens_per_turn":[0]},"total_tokens":602},"user":null,"presence_penalty":0.0,"frequency_penalty":0.0,"kv_transfer_params":null,"input_messages":null,"output_messages":null},"sequence_number":27,"type":"response.completed"} + + ' + - ' + + ' + status_code: 200 diff --git a/crates/agentic-core/tests/cassettes/tool_calls/multi_turn/responses_tool_calls_5turn.yaml b/crates/agentic-core/tests/cassettes/tool_calls/multi_turn/responses_tool_calls_5turn.yaml new file mode 100644 index 0000000..3c9b981 --- /dev/null +++ b/crates/agentic-core/tests/cassettes/tool_calls/multi_turn/responses_tool_calls_5turn.yaml @@ -0,0 +1,1638 @@ +turns: +- filename: t1 + request: + body: + input: You are an SRE assistant. ETL pipeline job-382 failed overnight. What + is its current status? + model: openai/gpt-oss-20b + store: true + stream: false + tool_choice: auto + tools: + - description: Query the pipeline orchestrator for a job's current state, metadata, + and failure info. + name: get_job_status + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier, e.g. job-382 + type: string + required: + - job_id + type: object + strict: true + type: function + - description: Fetch error logs and stack traces for a failed pipeline job. + name: get_error_logs + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier + type: string + max_entries: + description: Maximum log entries to return (default 50) + type: integer + severity: + description: Minimum severity filter + enum: + - WARN + - ERROR + - FATAL + - ALL + type: string + required: + - job_id + - severity + - max_entries + type: object + strict: true + type: function + - description: Search internal operations runbooks for troubleshooting procedures. + name: search_runbook + parameters: + additionalProperties: false + properties: + max_results: + description: Maximum results to return (default 5) + type: integer + query: + description: Natural language search query + type: string + required: + - query + - max_results + type: object + strict: true + type: function + - description: Execute a Python analysis script in a sandboxed environment for + log parsing or data inspection. + name: run_analysis + parameters: + additionalProperties: false + properties: + code: + description: Python code to execute + type: string + timeout_seconds: + description: Execution timeout (default 30) + type: integer + required: + - code + - timeout_seconds + type: object + strict: true + type: function + - description: Restart a failed pipeline job with optional configuration overrides. + name: restart_job + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier to restart + type: string + memory_override_gb: + description: Override memory limit in GB + type: integer + priority: + description: Queue priority + enum: + - low + - normal + - high + type: string + skip_completed_stages: + description: Skip stages that completed successfully (default true) + type: boolean + required: + - job_id + - memory_override_gb + - skip_completed_stages + - priority + type: object + strict: true + type: function + - description: Search the public web for technical documentation, blog posts, + and known issues. + name: web_search + parameters: + additionalProperties: false + properties: + query: + description: Search query + type: string + required: + - query + type: object + strict: true + type: function + headers: + accept: '*/*' + content-type: application/json + user-agent: python-httpx/0.28.1 + method: POST + path: /v1/responses + query_params: {} + response: + body: + background: false + created_at: 1782285901 + frequency_penalty: 0.0 + id: resp_87c0e67a68693479 + incomplete_details: null + input_messages: null + instructions: null + kv_transfer_params: null + max_output_tokens: 3657 + max_tool_calls: null + metadata: null + model: openai/gpt-oss-20b + object: response + output: + - content: + - text: Need to call get_job_status. + type: reasoning_text + encrypted_content: null + id: rs_923202f17e327c43 + status: null + summary: [] + type: reasoning + - arguments: '{"job_id":"job-382"}' + call_id: call_80a4bed3ce07c6e4 + id: fc_80a4bed3ce07c6e4 + name: get_job_status + namespace: null + status: null + type: function_call + output_messages: null + parallel_tool_calls: true + presence_penalty: 0.0 + previous_response_id: null + prompt: null + reasoning: null + service_tier: auto + status: completed + temperature: 1.0 + text: null + tool_choice: auto + tools: + - defer_loading: null + description: Query the pipeline orchestrator for a job's current state, metadata, + and failure info. + name: get_job_status + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier, e.g. job-382 + type: string + required: + - job_id + type: object + strict: true + type: function + - defer_loading: null + description: Fetch error logs and stack traces for a failed pipeline job. + name: get_error_logs + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier + type: string + max_entries: + description: Maximum log entries to return (default 50) + type: integer + severity: + description: Minimum severity filter + enum: + - WARN + - ERROR + - FATAL + - ALL + type: string + required: + - job_id + - severity + - max_entries + type: object + strict: true + type: function + - defer_loading: null + description: Search internal operations runbooks for troubleshooting procedures. + name: search_runbook + parameters: + additionalProperties: false + properties: + max_results: + description: Maximum results to return (default 5) + type: integer + query: + description: Natural language search query + type: string + required: + - query + - max_results + type: object + strict: true + type: function + - defer_loading: null + description: Execute a Python analysis script in a sandboxed environment for + log parsing or data inspection. + name: run_analysis + parameters: + additionalProperties: false + properties: + code: + description: Python code to execute + type: string + timeout_seconds: + description: Execution timeout (default 30) + type: integer + required: + - code + - timeout_seconds + type: object + strict: true + type: function + - defer_loading: null + description: Restart a failed pipeline job with optional configuration overrides. + name: restart_job + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier to restart + type: string + memory_override_gb: + description: Override memory limit in GB + type: integer + priority: + description: Queue priority + enum: + - low + - normal + - high + type: string + skip_completed_stages: + description: Skip stages that completed successfully (default true) + type: boolean + required: + - job_id + - memory_override_gb + - skip_completed_stages + - priority + type: object + strict: true + type: function + - defer_loading: null + description: Search the public web for technical documentation, blog posts, + and known issues. + name: web_search + parameters: + additionalProperties: false + properties: + query: + description: Search query + type: string + required: + - query + type: object + strict: true + type: function + top_logprobs: null + top_p: 1.0 + truncation: disabled + usage: + input_tokens: 439 + input_tokens_details: + cached_tokens: 432 + cached_tokens_per_turn: + - 432 + input_tokens_per_turn: + - 439 + output_tokens: 35 + output_tokens_details: + output_tokens_per_turn: + - 35 + reasoning_tokens: 17 + tool_output_tokens: 0 + tool_output_tokens_per_turn: + - 0 + total_tokens: 474 + user: null + headers: + content-type: application/json + status_code: 200 +- filename: t2 + request: + body: + input: Pull the error logs for that failed job. Use severity ERROR and max 20 + entries. + model: openai/gpt-oss-20b + previous_response_id: resp_87c0e67a68693479 + store: true + stream: false + tool_choice: auto + tools: + - description: Query the pipeline orchestrator for a job's current state, metadata, + and failure info. + name: get_job_status + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier, e.g. job-382 + type: string + required: + - job_id + type: object + strict: true + type: function + - description: Fetch error logs and stack traces for a failed pipeline job. + name: get_error_logs + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier + type: string + max_entries: + description: Maximum log entries to return (default 50) + type: integer + severity: + description: Minimum severity filter + enum: + - WARN + - ERROR + - FATAL + - ALL + type: string + required: + - job_id + - severity + - max_entries + type: object + strict: true + type: function + - description: Search internal operations runbooks for troubleshooting procedures. + name: search_runbook + parameters: + additionalProperties: false + properties: + max_results: + description: Maximum results to return (default 5) + type: integer + query: + description: Natural language search query + type: string + required: + - query + - max_results + type: object + strict: true + type: function + - description: Execute a Python analysis script in a sandboxed environment for + log parsing or data inspection. + name: run_analysis + parameters: + additionalProperties: false + properties: + code: + description: Python code to execute + type: string + timeout_seconds: + description: Execution timeout (default 30) + type: integer + required: + - code + - timeout_seconds + type: object + strict: true + type: function + - description: Restart a failed pipeline job with optional configuration overrides. + name: restart_job + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier to restart + type: string + memory_override_gb: + description: Override memory limit in GB + type: integer + priority: + description: Queue priority + enum: + - low + - normal + - high + type: string + skip_completed_stages: + description: Skip stages that completed successfully (default true) + type: boolean + required: + - job_id + - memory_override_gb + - skip_completed_stages + - priority + type: object + strict: true + type: function + - description: Search the public web for technical documentation, blog posts, + and known issues. + name: web_search + parameters: + additionalProperties: false + properties: + query: + description: Search query + type: string + required: + - query + type: object + strict: true + type: function + headers: + accept: '*/*' + content-type: application/json + user-agent: python-httpx/0.28.1 + method: POST + path: /v1/responses + query_params: {} + response: + body: + background: false + created_at: 1782285902 + frequency_penalty: 0.0 + id: resp_ae25eb0ed9f87a15 + incomplete_details: null + input_messages: null + instructions: null + kv_transfer_params: null + max_output_tokens: 3598 + max_tool_calls: null + metadata: null + model: openai/gpt-oss-20b + object: response + output: + - content: + - text: Call get_error_logs. + type: reasoning_text + encrypted_content: null + id: rs_99a65d2fb5f1b2f7 + status: null + summary: [] + type: reasoning + - arguments: '{"job_id":"job-382","severity":"ERROR","max_entries":20}' + call_id: call_b3fe180eeb3ba93c + id: fc_b3fe180eeb3ba93c + name: get_error_logs + namespace: null + status: null + type: function_call + output_messages: null + parallel_tool_calls: true + presence_penalty: 0.0 + previous_response_id: resp_87c0e67a68693479 + prompt: null + reasoning: null + service_tier: auto + status: completed + temperature: 1.0 + text: null + tool_choice: auto + tools: + - defer_loading: null + description: Query the pipeline orchestrator for a job's current state, metadata, + and failure info. + name: get_job_status + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier, e.g. job-382 + type: string + required: + - job_id + type: object + strict: true + type: function + - defer_loading: null + description: Fetch error logs and stack traces for a failed pipeline job. + name: get_error_logs + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier + type: string + max_entries: + description: Maximum log entries to return (default 50) + type: integer + severity: + description: Minimum severity filter + enum: + - WARN + - ERROR + - FATAL + - ALL + type: string + required: + - job_id + - severity + - max_entries + type: object + strict: true + type: function + - defer_loading: null + description: Search internal operations runbooks for troubleshooting procedures. + name: search_runbook + parameters: + additionalProperties: false + properties: + max_results: + description: Maximum results to return (default 5) + type: integer + query: + description: Natural language search query + type: string + required: + - query + - max_results + type: object + strict: true + type: function + - defer_loading: null + description: Execute a Python analysis script in a sandboxed environment for + log parsing or data inspection. + name: run_analysis + parameters: + additionalProperties: false + properties: + code: + description: Python code to execute + type: string + timeout_seconds: + description: Execution timeout (default 30) + type: integer + required: + - code + - timeout_seconds + type: object + strict: true + type: function + - defer_loading: null + description: Restart a failed pipeline job with optional configuration overrides. + name: restart_job + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier to restart + type: string + memory_override_gb: + description: Override memory limit in GB + type: integer + priority: + description: Queue priority + enum: + - low + - normal + - high + type: string + skip_completed_stages: + description: Skip stages that completed successfully (default true) + type: boolean + required: + - job_id + - memory_override_gb + - skip_completed_stages + - priority + type: object + strict: true + type: function + - defer_loading: null + description: Search the public web for technical documentation, blog posts, + and known issues. + name: web_search + parameters: + additionalProperties: false + properties: + query: + description: Search query + type: string + required: + - query + type: object + strict: true + type: function + top_logprobs: null + top_p: 1.0 + truncation: disabled + usage: + input_tokens: 498 + input_tokens_details: + cached_tokens: 448 + cached_tokens_per_turn: + - 448 + input_tokens_per_turn: + - 498 + output_tokens: 42 + output_tokens_details: + output_tokens_per_turn: + - 42 + reasoning_tokens: 24 + tool_output_tokens: 0 + tool_output_tokens_per_turn: + - 0 + total_tokens: 540 + user: null + headers: + content-type: application/json + status_code: 200 +- filename: t3 + request: + body: + input: Search the runbook for how to fix the issue found in those logs. Max + 5 results. + model: openai/gpt-oss-20b + previous_response_id: resp_ae25eb0ed9f87a15 + store: true + stream: false + tool_choice: auto + tools: + - description: Query the pipeline orchestrator for a job's current state, metadata, + and failure info. + name: get_job_status + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier, e.g. job-382 + type: string + required: + - job_id + type: object + strict: true + type: function + - description: Fetch error logs and stack traces for a failed pipeline job. + name: get_error_logs + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier + type: string + max_entries: + description: Maximum log entries to return (default 50) + type: integer + severity: + description: Minimum severity filter + enum: + - WARN + - ERROR + - FATAL + - ALL + type: string + required: + - job_id + - severity + - max_entries + type: object + strict: true + type: function + - description: Search internal operations runbooks for troubleshooting procedures. + name: search_runbook + parameters: + additionalProperties: false + properties: + max_results: + description: Maximum results to return (default 5) + type: integer + query: + description: Natural language search query + type: string + required: + - query + - max_results + type: object + strict: true + type: function + - description: Execute a Python analysis script in a sandboxed environment for + log parsing or data inspection. + name: run_analysis + parameters: + additionalProperties: false + properties: + code: + description: Python code to execute + type: string + timeout_seconds: + description: Execution timeout (default 30) + type: integer + required: + - code + - timeout_seconds + type: object + strict: true + type: function + - description: Restart a failed pipeline job with optional configuration overrides. + name: restart_job + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier to restart + type: string + memory_override_gb: + description: Override memory limit in GB + type: integer + priority: + description: Queue priority + enum: + - low + - normal + - high + type: string + skip_completed_stages: + description: Skip stages that completed successfully (default true) + type: boolean + required: + - job_id + - memory_override_gb + - skip_completed_stages + - priority + type: object + strict: true + type: function + - description: Search the public web for technical documentation, blog posts, + and known issues. + name: web_search + parameters: + additionalProperties: false + properties: + query: + description: Search query + type: string + required: + - query + type: object + strict: true + type: function + headers: + accept: '*/*' + content-type: application/json + user-agent: python-httpx/0.28.1 + method: POST + path: /v1/responses + query_params: {} + response: + body: + background: false + created_at: 1782285902 + frequency_penalty: 0.0 + id: resp_854c134ceb4f6127 + incomplete_details: null + input_messages: null + instructions: null + kv_transfer_params: null + max_output_tokens: 3530 + max_tool_calls: null + metadata: null + model: openai/gpt-oss-20b + object: response + output: + - content: + - text: Search runbook. + type: reasoning_text + encrypted_content: null + id: rs_a8d24e26d57877e0 + status: null + summary: [] + type: reasoning + - arguments: '{"query":"pipeline job-382 error logs fix","max_results":5}' + call_id: call_8fae155d7b2ca7f9 + id: fc_8fae155d7b2ca7f9 + name: search_runbook + namespace: null + status: null + type: function_call + output_messages: null + parallel_tool_calls: true + presence_penalty: 0.0 + previous_response_id: resp_ae25eb0ed9f87a15 + prompt: null + reasoning: null + service_tier: auto + status: completed + temperature: 1.0 + text: null + tool_choice: auto + tools: + - defer_loading: null + description: Query the pipeline orchestrator for a job's current state, metadata, + and failure info. + name: get_job_status + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier, e.g. job-382 + type: string + required: + - job_id + type: object + strict: true + type: function + - defer_loading: null + description: Fetch error logs and stack traces for a failed pipeline job. + name: get_error_logs + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier + type: string + max_entries: + description: Maximum log entries to return (default 50) + type: integer + severity: + description: Minimum severity filter + enum: + - WARN + - ERROR + - FATAL + - ALL + type: string + required: + - job_id + - severity + - max_entries + type: object + strict: true + type: function + - defer_loading: null + description: Search internal operations runbooks for troubleshooting procedures. + name: search_runbook + parameters: + additionalProperties: false + properties: + max_results: + description: Maximum results to return (default 5) + type: integer + query: + description: Natural language search query + type: string + required: + - query + - max_results + type: object + strict: true + type: function + - defer_loading: null + description: Execute a Python analysis script in a sandboxed environment for + log parsing or data inspection. + name: run_analysis + parameters: + additionalProperties: false + properties: + code: + description: Python code to execute + type: string + timeout_seconds: + description: Execution timeout (default 30) + type: integer + required: + - code + - timeout_seconds + type: object + strict: true + type: function + - defer_loading: null + description: Restart a failed pipeline job with optional configuration overrides. + name: restart_job + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier to restart + type: string + memory_override_gb: + description: Override memory limit in GB + type: integer + priority: + description: Queue priority + enum: + - low + - normal + - high + type: string + skip_completed_stages: + description: Skip stages that completed successfully (default true) + type: boolean + required: + - job_id + - memory_override_gb + - skip_completed_stages + - priority + type: object + strict: true + type: function + - defer_loading: null + description: Search the public web for technical documentation, blog posts, + and known issues. + name: web_search + parameters: + additionalProperties: false + properties: + query: + description: Search query + type: string + required: + - query + type: object + strict: true + type: function + top_logprobs: null + top_p: 1.0 + truncation: disabled + usage: + input_tokens: 566 + input_tokens_details: + cached_tokens: 496 + cached_tokens_per_turn: + - 496 + input_tokens_per_turn: + - 566 + output_tokens: 40 + output_tokens_details: + output_tokens_per_turn: + - 40 + reasoning_tokens: 22 + tool_output_tokens: 0 + tool_output_tokens_per_turn: + - 0 + total_tokens: 606 + user: null + headers: + content-type: application/json + status_code: 200 +- filename: t4 + request: + body: + input: 'Run this analysis code to summarize: import json; print(json.dumps({''job'': + ''job-382'', ''error'': ''OOM'', ''stage'': ''transform'', ''recommendation'': + ''increase memory to 64GB''}))' + model: openai/gpt-oss-20b + previous_response_id: resp_854c134ceb4f6127 + store: true + stream: false + tool_choice: auto + tools: + - description: Query the pipeline orchestrator for a job's current state, metadata, + and failure info. + name: get_job_status + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier, e.g. job-382 + type: string + required: + - job_id + type: object + strict: true + type: function + - description: Fetch error logs and stack traces for a failed pipeline job. + name: get_error_logs + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier + type: string + max_entries: + description: Maximum log entries to return (default 50) + type: integer + severity: + description: Minimum severity filter + enum: + - WARN + - ERROR + - FATAL + - ALL + type: string + required: + - job_id + - severity + - max_entries + type: object + strict: true + type: function + - description: Search internal operations runbooks for troubleshooting procedures. + name: search_runbook + parameters: + additionalProperties: false + properties: + max_results: + description: Maximum results to return (default 5) + type: integer + query: + description: Natural language search query + type: string + required: + - query + - max_results + type: object + strict: true + type: function + - description: Execute a Python analysis script in a sandboxed environment for + log parsing or data inspection. + name: run_analysis + parameters: + additionalProperties: false + properties: + code: + description: Python code to execute + type: string + timeout_seconds: + description: Execution timeout (default 30) + type: integer + required: + - code + - timeout_seconds + type: object + strict: true + type: function + - description: Restart a failed pipeline job with optional configuration overrides. + name: restart_job + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier to restart + type: string + memory_override_gb: + description: Override memory limit in GB + type: integer + priority: + description: Queue priority + enum: + - low + - normal + - high + type: string + skip_completed_stages: + description: Skip stages that completed successfully (default true) + type: boolean + required: + - job_id + - memory_override_gb + - skip_completed_stages + - priority + type: object + strict: true + type: function + - description: Search the public web for technical documentation, blog posts, + and known issues. + name: web_search + parameters: + additionalProperties: false + properties: + query: + description: Search query + type: string + required: + - query + type: object + strict: true + type: function + headers: + accept: '*/*' + content-type: application/json + user-agent: python-httpx/0.28.1 + method: POST + path: /v1/responses + query_params: {} + response: + body: + background: false + created_at: 1782285902 + frequency_penalty: 0.0 + id: resp_addadf40977f7a22 + incomplete_details: null + input_messages: null + instructions: null + kv_transfer_params: null + max_output_tokens: 3438 + max_tool_calls: null + metadata: null + model: openai/gpt-oss-20b + object: response + output: + - content: + - text: Execute run_analysis. + type: reasoning_text + encrypted_content: null + id: rs_930ef0af2fb14041 + status: null + summary: [] + type: reasoning + - arguments: '{"code":"import json; print(json.dumps({''job'': ''job-382'', + ''error'': ''OOM'', ''stage'': ''transform'', ''recommendation'': ''increase + memory to 64GB''}))","timeout_seconds":30}' + call_id: call_81a626f9aa98b83f + id: fc_81a626f9aa98b83f + name: run_analysis + namespace: null + status: null + type: function_call + output_messages: null + parallel_tool_calls: true + presence_penalty: 0.0 + previous_response_id: resp_854c134ceb4f6127 + prompt: null + reasoning: null + service_tier: auto + status: completed + temperature: 1.0 + text: null + tool_choice: auto + tools: + - defer_loading: null + description: Query the pipeline orchestrator for a job's current state, metadata, + and failure info. + name: get_job_status + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier, e.g. job-382 + type: string + required: + - job_id + type: object + strict: true + type: function + - defer_loading: null + description: Fetch error logs and stack traces for a failed pipeline job. + name: get_error_logs + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier + type: string + max_entries: + description: Maximum log entries to return (default 50) + type: integer + severity: + description: Minimum severity filter + enum: + - WARN + - ERROR + - FATAL + - ALL + type: string + required: + - job_id + - severity + - max_entries + type: object + strict: true + type: function + - defer_loading: null + description: Search internal operations runbooks for troubleshooting procedures. + name: search_runbook + parameters: + additionalProperties: false + properties: + max_results: + description: Maximum results to return (default 5) + type: integer + query: + description: Natural language search query + type: string + required: + - query + - max_results + type: object + strict: true + type: function + - defer_loading: null + description: Execute a Python analysis script in a sandboxed environment for + log parsing or data inspection. + name: run_analysis + parameters: + additionalProperties: false + properties: + code: + description: Python code to execute + type: string + timeout_seconds: + description: Execution timeout (default 30) + type: integer + required: + - code + - timeout_seconds + type: object + strict: true + type: function + - defer_loading: null + description: Restart a failed pipeline job with optional configuration overrides. + name: restart_job + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier to restart + type: string + memory_override_gb: + description: Override memory limit in GB + type: integer + priority: + description: Queue priority + enum: + - low + - normal + - high + type: string + skip_completed_stages: + description: Skip stages that completed successfully (default true) + type: boolean + required: + - job_id + - memory_override_gb + - skip_completed_stages + - priority + type: object + strict: true + type: function + - defer_loading: null + description: Search the public web for technical documentation, blog posts, + and known issues. + name: web_search + parameters: + additionalProperties: false + properties: + query: + description: Search query + type: string + required: + - query + type: object + strict: true + type: function + top_logprobs: null + top_p: 1.0 + truncation: disabled + usage: + input_tokens: 658 + input_tokens_details: + cached_tokens: 576 + cached_tokens_per_turn: + - 576 + input_tokens_per_turn: + - 658 + output_tokens: 71 + output_tokens_details: + output_tokens_per_turn: + - 71 + reasoning_tokens: 54 + tool_output_tokens: 0 + tool_output_tokens_per_turn: + - 0 + total_tokens: 729 + user: null + headers: + content-type: application/json + status_code: 200 +- filename: t5 + request: + body: + input: Great. Now restart it with 64 GB memory, skip completed stages, and high + priority. + model: openai/gpt-oss-20b + previous_response_id: resp_addadf40977f7a22 + store: true + stream: false + tool_choice: auto + tools: + - description: Query the pipeline orchestrator for a job's current state, metadata, + and failure info. + name: get_job_status + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier, e.g. job-382 + type: string + required: + - job_id + type: object + strict: true + type: function + - description: Fetch error logs and stack traces for a failed pipeline job. + name: get_error_logs + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier + type: string + max_entries: + description: Maximum log entries to return (default 50) + type: integer + severity: + description: Minimum severity filter + enum: + - WARN + - ERROR + - FATAL + - ALL + type: string + required: + - job_id + - severity + - max_entries + type: object + strict: true + type: function + - description: Search internal operations runbooks for troubleshooting procedures. + name: search_runbook + parameters: + additionalProperties: false + properties: + max_results: + description: Maximum results to return (default 5) + type: integer + query: + description: Natural language search query + type: string + required: + - query + - max_results + type: object + strict: true + type: function + - description: Execute a Python analysis script in a sandboxed environment for + log parsing or data inspection. + name: run_analysis + parameters: + additionalProperties: false + properties: + code: + description: Python code to execute + type: string + timeout_seconds: + description: Execution timeout (default 30) + type: integer + required: + - code + - timeout_seconds + type: object + strict: true + type: function + - description: Restart a failed pipeline job with optional configuration overrides. + name: restart_job + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier to restart + type: string + memory_override_gb: + description: Override memory limit in GB + type: integer + priority: + description: Queue priority + enum: + - low + - normal + - high + type: string + skip_completed_stages: + description: Skip stages that completed successfully (default true) + type: boolean + required: + - job_id + - memory_override_gb + - skip_completed_stages + - priority + type: object + strict: true + type: function + - description: Search the public web for technical documentation, blog posts, + and known issues. + name: web_search + parameters: + additionalProperties: false + properties: + query: + description: Search query + type: string + required: + - query + type: object + strict: true + type: function + headers: + accept: '*/*' + content-type: application/json + user-agent: python-httpx/0.28.1 + method: POST + path: /v1/responses + query_params: {} + response: + body: + background: false + created_at: 1782285903 + frequency_penalty: 0.0 + id: resp_8d3aac3867ddc3b7 + incomplete_details: null + input_messages: null + instructions: null + kv_transfer_params: null + max_output_tokens: 3342 + max_tool_calls: null + metadata: null + model: openai/gpt-oss-20b + object: response + output: + - content: + - text: Restart_job. + type: reasoning_text + encrypted_content: null + id: rs_9af96e78c16c9ac1 + status: null + summary: [] + type: reasoning + - arguments: '{"job_id":"job-382","memory_override_gb":64,"skip_completed_stages":true,"priority":"high"}' + call_id: call_935ef947ceb46eec + id: fc_935ef947ceb46eec + name: restart_job + namespace: null + status: null + type: function_call + output_messages: null + parallel_tool_calls: true + presence_penalty: 0.0 + previous_response_id: resp_addadf40977f7a22 + prompt: null + reasoning: null + service_tier: auto + status: completed + temperature: 1.0 + text: null + tool_choice: auto + tools: + - defer_loading: null + description: Query the pipeline orchestrator for a job's current state, metadata, + and failure info. + name: get_job_status + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier, e.g. job-382 + type: string + required: + - job_id + type: object + strict: true + type: function + - defer_loading: null + description: Fetch error logs and stack traces for a failed pipeline job. + name: get_error_logs + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier + type: string + max_entries: + description: Maximum log entries to return (default 50) + type: integer + severity: + description: Minimum severity filter + enum: + - WARN + - ERROR + - FATAL + - ALL + type: string + required: + - job_id + - severity + - max_entries + type: object + strict: true + type: function + - defer_loading: null + description: Search internal operations runbooks for troubleshooting procedures. + name: search_runbook + parameters: + additionalProperties: false + properties: + max_results: + description: Maximum results to return (default 5) + type: integer + query: + description: Natural language search query + type: string + required: + - query + - max_results + type: object + strict: true + type: function + - defer_loading: null + description: Execute a Python analysis script in a sandboxed environment for + log parsing or data inspection. + name: run_analysis + parameters: + additionalProperties: false + properties: + code: + description: Python code to execute + type: string + timeout_seconds: + description: Execution timeout (default 30) + type: integer + required: + - code + - timeout_seconds + type: object + strict: true + type: function + - defer_loading: null + description: Restart a failed pipeline job with optional configuration overrides. + name: restart_job + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier to restart + type: string + memory_override_gb: + description: Override memory limit in GB + type: integer + priority: + description: Queue priority + enum: + - low + - normal + - high + type: string + skip_completed_stages: + description: Skip stages that completed successfully (default true) + type: boolean + required: + - job_id + - memory_override_gb + - skip_completed_stages + - priority + type: object + strict: true + type: function + - defer_loading: null + description: Search the public web for technical documentation, blog posts, + and known issues. + name: web_search + parameters: + additionalProperties: false + properties: + query: + description: Search query + type: string + required: + - query + type: object + strict: true + type: function + top_logprobs: null + top_p: 1.0 + truncation: disabled + usage: + input_tokens: 754 + input_tokens_details: + cached_tokens: 656 + cached_tokens_per_turn: + - 656 + input_tokens_per_turn: + - 754 + output_tokens: 48 + output_tokens_details: + output_tokens_per_turn: + - 48 + reasoning_tokens: 31 + tool_output_tokens: 0 + tool_output_tokens_per_turn: + - 0 + total_tokens: 802 + user: null + headers: + content-type: application/json + status_code: 200 diff --git a/crates/agentic-core/tests/cassettes/tool_calls/multi_turn/responses_tool_calls_branch.yaml b/crates/agentic-core/tests/cassettes/tool_calls/multi_turn/responses_tool_calls_branch.yaml new file mode 100644 index 0000000..bb554a2 --- /dev/null +++ b/crates/agentic-core/tests/cassettes/tool_calls/multi_turn/responses_tool_calls_branch.yaml @@ -0,0 +1,979 @@ +turns: +- filename: t1 + request: + body: + input: You are an SRE assistant. Check the current status of ETL pipeline job-382. + model: openai/gpt-oss-20b + store: true + stream: false + tool_choice: auto + tools: + - description: Query the pipeline orchestrator for a job's current state, metadata, + and failure info. + name: get_job_status + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier, e.g. job-382 + type: string + required: + - job_id + type: object + strict: true + type: function + - description: Fetch error logs and stack traces for a failed pipeline job. + name: get_error_logs + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier + type: string + max_entries: + description: Maximum log entries to return (default 50) + type: integer + severity: + description: Minimum severity filter + enum: + - WARN + - ERROR + - FATAL + - ALL + type: string + required: + - job_id + - severity + - max_entries + type: object + strict: true + type: function + - description: Search internal operations runbooks for troubleshooting procedures. + name: search_runbook + parameters: + additionalProperties: false + properties: + max_results: + description: Maximum results to return (default 5) + type: integer + query: + description: Natural language search query + type: string + required: + - query + - max_results + type: object + strict: true + type: function + - description: Execute a Python analysis script in a sandboxed environment for + log parsing or data inspection. + name: run_analysis + parameters: + additionalProperties: false + properties: + code: + description: Python code to execute + type: string + timeout_seconds: + description: Execution timeout (default 30) + type: integer + required: + - code + - timeout_seconds + type: object + strict: true + type: function + - description: Restart a failed pipeline job with optional configuration overrides. + name: restart_job + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier to restart + type: string + memory_override_gb: + description: Override memory limit in GB + type: integer + priority: + description: Queue priority + enum: + - low + - normal + - high + type: string + skip_completed_stages: + description: Skip stages that completed successfully (default true) + type: boolean + required: + - job_id + - memory_override_gb + - skip_completed_stages + - priority + type: object + strict: true + type: function + - description: Search the public web for technical documentation, blog posts, + and known issues. + name: web_search + parameters: + additionalProperties: false + properties: + query: + description: Search query + type: string + required: + - query + type: object + strict: true + type: function + headers: + accept: '*/*' + content-type: application/json + user-agent: python-httpx/0.28.1 + method: POST + path: /v1/responses + query_params: {} + response: + body: + background: false + created_at: 1782285929 + frequency_penalty: 0.0 + id: resp_887f6295aa43fb96 + incomplete_details: null + input_messages: null + instructions: null + kv_transfer_params: null + max_output_tokens: 3660 + max_tool_calls: null + metadata: null + model: openai/gpt-oss-20b + object: response + output: + - content: + - text: We need to call get_job_status. + type: reasoning_text + encrypted_content: null + id: rs_91cfc91f69b6f252 + status: null + summary: [] + type: reasoning + - arguments: '{"job_id":"job-382"}' + call_id: call_9e471a12b0e01b14 + id: fc_9e471a12b0e01b14 + name: get_job_status + namespace: null + status: null + type: function_call + output_messages: null + parallel_tool_calls: true + presence_penalty: 0.0 + previous_response_id: null + prompt: null + reasoning: null + service_tier: auto + status: completed + temperature: 1.0 + text: null + tool_choice: auto + tools: + - defer_loading: null + description: Query the pipeline orchestrator for a job's current state, metadata, + and failure info. + name: get_job_status + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier, e.g. job-382 + type: string + required: + - job_id + type: object + strict: true + type: function + - defer_loading: null + description: Fetch error logs and stack traces for a failed pipeline job. + name: get_error_logs + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier + type: string + max_entries: + description: Maximum log entries to return (default 50) + type: integer + severity: + description: Minimum severity filter + enum: + - WARN + - ERROR + - FATAL + - ALL + type: string + required: + - job_id + - severity + - max_entries + type: object + strict: true + type: function + - defer_loading: null + description: Search internal operations runbooks for troubleshooting procedures. + name: search_runbook + parameters: + additionalProperties: false + properties: + max_results: + description: Maximum results to return (default 5) + type: integer + query: + description: Natural language search query + type: string + required: + - query + - max_results + type: object + strict: true + type: function + - defer_loading: null + description: Execute a Python analysis script in a sandboxed environment for + log parsing or data inspection. + name: run_analysis + parameters: + additionalProperties: false + properties: + code: + description: Python code to execute + type: string + timeout_seconds: + description: Execution timeout (default 30) + type: integer + required: + - code + - timeout_seconds + type: object + strict: true + type: function + - defer_loading: null + description: Restart a failed pipeline job with optional configuration overrides. + name: restart_job + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier to restart + type: string + memory_override_gb: + description: Override memory limit in GB + type: integer + priority: + description: Queue priority + enum: + - low + - normal + - high + type: string + skip_completed_stages: + description: Skip stages that completed successfully (default true) + type: boolean + required: + - job_id + - memory_override_gb + - skip_completed_stages + - priority + type: object + strict: true + type: function + - defer_loading: null + description: Search the public web for technical documentation, blog posts, + and known issues. + name: web_search + parameters: + additionalProperties: false + properties: + query: + description: Search query + type: string + required: + - query + type: object + strict: true + type: function + top_logprobs: null + top_p: 1.0 + truncation: disabled + usage: + input_tokens: 436 + input_tokens_details: + cached_tokens: 432 + cached_tokens_per_turn: + - 432 + input_tokens_per_turn: + - 436 + output_tokens: 36 + output_tokens_details: + output_tokens_per_turn: + - 36 + reasoning_tokens: 18 + tool_output_tokens: 0 + tool_output_tokens_per_turn: + - 0 + total_tokens: 472 + user: null + headers: + content-type: application/json + status_code: 200 +- filename: t2 + request: + body: + input: Get the error logs for that job with severity ERROR and max 10 entries. + model: openai/gpt-oss-20b + previous_response_id: resp_887f6295aa43fb96 + store: true + stream: false + tool_choice: auto + tools: + - description: Query the pipeline orchestrator for a job's current state, metadata, + and failure info. + name: get_job_status + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier, e.g. job-382 + type: string + required: + - job_id + type: object + strict: true + type: function + - description: Fetch error logs and stack traces for a failed pipeline job. + name: get_error_logs + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier + type: string + max_entries: + description: Maximum log entries to return (default 50) + type: integer + severity: + description: Minimum severity filter + enum: + - WARN + - ERROR + - FATAL + - ALL + type: string + required: + - job_id + - severity + - max_entries + type: object + strict: true + type: function + - description: Search internal operations runbooks for troubleshooting procedures. + name: search_runbook + parameters: + additionalProperties: false + properties: + max_results: + description: Maximum results to return (default 5) + type: integer + query: + description: Natural language search query + type: string + required: + - query + - max_results + type: object + strict: true + type: function + - description: Execute a Python analysis script in a sandboxed environment for + log parsing or data inspection. + name: run_analysis + parameters: + additionalProperties: false + properties: + code: + description: Python code to execute + type: string + timeout_seconds: + description: Execution timeout (default 30) + type: integer + required: + - code + - timeout_seconds + type: object + strict: true + type: function + - description: Restart a failed pipeline job with optional configuration overrides. + name: restart_job + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier to restart + type: string + memory_override_gb: + description: Override memory limit in GB + type: integer + priority: + description: Queue priority + enum: + - low + - normal + - high + type: string + skip_completed_stages: + description: Skip stages that completed successfully (default true) + type: boolean + required: + - job_id + - memory_override_gb + - skip_completed_stages + - priority + type: object + strict: true + type: function + - description: Search the public web for technical documentation, blog posts, + and known issues. + name: web_search + parameters: + additionalProperties: false + properties: + query: + description: Search query + type: string + required: + - query + type: object + strict: true + type: function + headers: + accept: '*/*' + content-type: application/json + user-agent: python-httpx/0.28.1 + method: POST + path: /v1/responses + query_params: {} + response: + body: + background: false + created_at: 1782285930 + frequency_penalty: 0.0 + id: resp_bd929a6ae43e5ff9 + incomplete_details: null + input_messages: null + instructions: null + kv_transfer_params: null + max_output_tokens: 3602 + max_tool_calls: null + metadata: null + model: openai/gpt-oss-20b + object: response + output: + - content: + - text: Use get_error_logs. + type: reasoning_text + encrypted_content: null + id: rs_825ec278d2c86186 + status: null + summary: [] + type: reasoning + - arguments: '{"job_id":"job-382","severity":"ERROR","max_entries":10}' + call_id: call_80c557a803ad1d26 + id: fc_80c557a803ad1d26 + name: get_error_logs + namespace: null + status: null + type: function_call + output_messages: null + parallel_tool_calls: true + presence_penalty: 0.0 + previous_response_id: resp_887f6295aa43fb96 + prompt: null + reasoning: null + service_tier: auto + status: completed + temperature: 1.0 + text: null + tool_choice: auto + tools: + - defer_loading: null + description: Query the pipeline orchestrator for a job's current state, metadata, + and failure info. + name: get_job_status + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier, e.g. job-382 + type: string + required: + - job_id + type: object + strict: true + type: function + - defer_loading: null + description: Fetch error logs and stack traces for a failed pipeline job. + name: get_error_logs + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier + type: string + max_entries: + description: Maximum log entries to return (default 50) + type: integer + severity: + description: Minimum severity filter + enum: + - WARN + - ERROR + - FATAL + - ALL + type: string + required: + - job_id + - severity + - max_entries + type: object + strict: true + type: function + - defer_loading: null + description: Search internal operations runbooks for troubleshooting procedures. + name: search_runbook + parameters: + additionalProperties: false + properties: + max_results: + description: Maximum results to return (default 5) + type: integer + query: + description: Natural language search query + type: string + required: + - query + - max_results + type: object + strict: true + type: function + - defer_loading: null + description: Execute a Python analysis script in a sandboxed environment for + log parsing or data inspection. + name: run_analysis + parameters: + additionalProperties: false + properties: + code: + description: Python code to execute + type: string + timeout_seconds: + description: Execution timeout (default 30) + type: integer + required: + - code + - timeout_seconds + type: object + strict: true + type: function + - defer_loading: null + description: Restart a failed pipeline job with optional configuration overrides. + name: restart_job + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier to restart + type: string + memory_override_gb: + description: Override memory limit in GB + type: integer + priority: + description: Queue priority + enum: + - low + - normal + - high + type: string + skip_completed_stages: + description: Skip stages that completed successfully (default true) + type: boolean + required: + - job_id + - memory_override_gb + - skip_completed_stages + - priority + type: object + strict: true + type: function + - defer_loading: null + description: Search the public web for technical documentation, blog posts, + and known issues. + name: web_search + parameters: + additionalProperties: false + properties: + query: + description: Search query + type: string + required: + - query + type: object + strict: true + type: function + top_logprobs: null + top_p: 1.0 + truncation: disabled + usage: + input_tokens: 494 + input_tokens_details: + cached_tokens: 448 + cached_tokens_per_turn: + - 448 + input_tokens_per_turn: + - 494 + output_tokens: 42 + output_tokens_details: + output_tokens_per_turn: + - 42 + reasoning_tokens: 24 + tool_output_tokens: 0 + tool_output_tokens_per_turn: + - 0 + total_tokens: 536 + user: null + headers: + content-type: application/json + status_code: 200 +- filename: t3 + request: + body: + input: Instead of investigating errors, search the runbook for how to increase + memory limits for ETL jobs. Max 3 results. + model: openai/gpt-oss-20b + previous_response_id: resp_887f6295aa43fb96 + store: true + stream: false + tool_choice: auto + tools: + - description: Query the pipeline orchestrator for a job's current state, metadata, + and failure info. + name: get_job_status + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier, e.g. job-382 + type: string + required: + - job_id + type: object + strict: true + type: function + - description: Fetch error logs and stack traces for a failed pipeline job. + name: get_error_logs + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier + type: string + max_entries: + description: Maximum log entries to return (default 50) + type: integer + severity: + description: Minimum severity filter + enum: + - WARN + - ERROR + - FATAL + - ALL + type: string + required: + - job_id + - severity + - max_entries + type: object + strict: true + type: function + - description: Search internal operations runbooks for troubleshooting procedures. + name: search_runbook + parameters: + additionalProperties: false + properties: + max_results: + description: Maximum results to return (default 5) + type: integer + query: + description: Natural language search query + type: string + required: + - query + - max_results + type: object + strict: true + type: function + - description: Execute a Python analysis script in a sandboxed environment for + log parsing or data inspection. + name: run_analysis + parameters: + additionalProperties: false + properties: + code: + description: Python code to execute + type: string + timeout_seconds: + description: Execution timeout (default 30) + type: integer + required: + - code + - timeout_seconds + type: object + strict: true + type: function + - description: Restart a failed pipeline job with optional configuration overrides. + name: restart_job + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier to restart + type: string + memory_override_gb: + description: Override memory limit in GB + type: integer + priority: + description: Queue priority + enum: + - low + - normal + - high + type: string + skip_completed_stages: + description: Skip stages that completed successfully (default true) + type: boolean + required: + - job_id + - memory_override_gb + - skip_completed_stages + - priority + type: object + strict: true + type: function + - description: Search the public web for technical documentation, blog posts, + and known issues. + name: web_search + parameters: + additionalProperties: false + properties: + query: + description: Search query + type: string + required: + - query + type: object + strict: true + type: function + headers: + accept: '*/*' + content-type: application/json + user-agent: python-httpx/0.28.1 + method: POST + path: /v1/responses + query_params: {} + response: + body: + background: false + created_at: 1782285930 + frequency_penalty: 0.0 + id: resp_926736eebb257cfd + incomplete_details: null + input_messages: null + instructions: null + kv_transfer_params: null + max_output_tokens: 3593 + max_tool_calls: null + metadata: null + model: openai/gpt-oss-20b + object: response + output: + - content: + - text: We need runbook search. + type: reasoning_text + encrypted_content: null + id: rs_8f6c080dca467649 + status: null + summary: [] + type: reasoning + - arguments: '{"query":"increase memory limits for ETL jobs","max_results":3}' + call_id: call_815e8956def890df + id: fc_815e8956def890df + name: search_runbook + namespace: null + status: null + type: function_call + output_messages: null + parallel_tool_calls: true + presence_penalty: 0.0 + previous_response_id: resp_887f6295aa43fb96 + prompt: null + reasoning: null + service_tier: auto + status: completed + temperature: 1.0 + text: null + tool_choice: auto + tools: + - defer_loading: null + description: Query the pipeline orchestrator for a job's current state, metadata, + and failure info. + name: get_job_status + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier, e.g. job-382 + type: string + required: + - job_id + type: object + strict: true + type: function + - defer_loading: null + description: Fetch error logs and stack traces for a failed pipeline job. + name: get_error_logs + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier + type: string + max_entries: + description: Maximum log entries to return (default 50) + type: integer + severity: + description: Minimum severity filter + enum: + - WARN + - ERROR + - FATAL + - ALL + type: string + required: + - job_id + - severity + - max_entries + type: object + strict: true + type: function + - defer_loading: null + description: Search internal operations runbooks for troubleshooting procedures. + name: search_runbook + parameters: + additionalProperties: false + properties: + max_results: + description: Maximum results to return (default 5) + type: integer + query: + description: Natural language search query + type: string + required: + - query + - max_results + type: object + strict: true + type: function + - defer_loading: null + description: Execute a Python analysis script in a sandboxed environment for + log parsing or data inspection. + name: run_analysis + parameters: + additionalProperties: false + properties: + code: + description: Python code to execute + type: string + timeout_seconds: + description: Execution timeout (default 30) + type: integer + required: + - code + - timeout_seconds + type: object + strict: true + type: function + - defer_loading: null + description: Restart a failed pipeline job with optional configuration overrides. + name: restart_job + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier to restart + type: string + memory_override_gb: + description: Override memory limit in GB + type: integer + priority: + description: Queue priority + enum: + - low + - normal + - high + type: string + skip_completed_stages: + description: Skip stages that completed successfully (default true) + type: boolean + required: + - job_id + - memory_override_gb + - skip_completed_stages + - priority + type: object + strict: true + type: function + - defer_loading: null + description: Search the public web for technical documentation, blog posts, + and known issues. + name: web_search + parameters: + additionalProperties: false + properties: + query: + description: Search query + type: string + required: + - query + type: object + strict: true + type: function + top_logprobs: null + top_p: 1.0 + truncation: disabled + usage: + input_tokens: 503 + input_tokens_details: + cached_tokens: 464 + cached_tokens_per_turn: + - 464 + input_tokens_per_turn: + - 503 + output_tokens: 42 + output_tokens_details: + output_tokens_per_turn: + - 42 + reasoning_tokens: 24 + tool_output_tokens: 0 + tool_output_tokens_per_turn: + - 0 + total_tokens: 545 + user: null + headers: + content-type: application/json + status_code: 200 From 4ed6bb87b65116c43bc94c1fd235076e66a5cdc5 Mon Sep 17 00:00:00 2001 From: Ashwin Giridharan Date: Wed, 24 Jun 2026 13:17:04 -0700 Subject: [PATCH 2/5] test: add OpenAI cassettes, parallel calls, and tool-output-only patterns Record 6 new OpenAI (gpt-4o) cassettes structurally aligned with the existing vLLM cassettes. Add 2 new vLLM cassettes for parallel tool calls and tool-output-only turns. Re-record existing vLLM cassettes with function_call_output items in request inputs for realistic flow. Patch record_cassette.py with --tool-outputs support to automatically inject function_call_output items between turns (required by OpenAI, aligned for vLLM). New test coverage: - OpenAI responses parse identically (status="completed" not null) - Parallel tool calls (2 function_calls in single response) - Tool-output-only turn (no user message, model continues) - function_call_output items present in request inputs - Context retention verified across both backends Signed-off-by: Ashwin Giridharan --- .../tests/accumulator_cassette_test.rs | 178 +- .../tests/cassettes/record_cassette.py | 115 +- .../openai_responses_tool_calls_3turn.yaml | 966 ++++++++++ ..._responses_tool_calls_3turn_streaming.yaml | 1168 ++++++++++++ .../openai_responses_tool_calls_5turn.yaml | 1619 +++++++++++++++++ .../openai_responses_tool_calls_branch.yaml | 959 ++++++++++ .../openai_responses_tool_calls_parallel.yaml | 981 ++++++++++ ...responses_tool_calls_tool_output_only.yaml | 970 ++++++++++ .../multi_turn/pipeline_tool_outputs.json | 8 + .../responses_tool_calls_3turn.yaml | 117 +- .../responses_tool_calls_3turn_streaming.yaml | 305 ++-- .../responses_tool_calls_5turn.yaml | 202 +- .../responses_tool_calls_branch.yaml | 96 +- .../responses_tool_calls_parallel.yaml | 1002 ++++++++++ ...responses_tool_calls_tool_output_only.yaml | 1014 +++++++++++ 15 files changed, 9381 insertions(+), 319 deletions(-) create mode 100644 crates/agentic-core/tests/cassettes/tool_calls/multi_turn/openai_responses_tool_calls_3turn.yaml create mode 100644 crates/agentic-core/tests/cassettes/tool_calls/multi_turn/openai_responses_tool_calls_3turn_streaming.yaml create mode 100644 crates/agentic-core/tests/cassettes/tool_calls/multi_turn/openai_responses_tool_calls_5turn.yaml create mode 100644 crates/agentic-core/tests/cassettes/tool_calls/multi_turn/openai_responses_tool_calls_branch.yaml create mode 100644 crates/agentic-core/tests/cassettes/tool_calls/multi_turn/openai_responses_tool_calls_parallel.yaml create mode 100644 crates/agentic-core/tests/cassettes/tool_calls/multi_turn/openai_responses_tool_calls_tool_output_only.yaml create mode 100644 crates/agentic-core/tests/cassettes/tool_calls/multi_turn/pipeline_tool_outputs.json create mode 100644 crates/agentic-core/tests/cassettes/tool_calls/multi_turn/responses_tool_calls_parallel.yaml create mode 100644 crates/agentic-core/tests/cassettes/tool_calls/multi_turn/responses_tool_calls_tool_output_only.yaml diff --git a/crates/agentic-core/tests/accumulator_cassette_test.rs b/crates/agentic-core/tests/accumulator_cassette_test.rs index 0182e9c..7e97eb2 100644 --- a/crates/agentic-core/tests/accumulator_cassette_test.rs +++ b/crates/agentic-core/tests/accumulator_cassette_test.rs @@ -843,12 +843,181 @@ fn test_stateful_responses_branch_all_turns_parse() { // Cross-cassette: all stateful cassettes parse without error // ═══════════════════════════════════════════════════════════════════ +// ═══════════════════════════════════════════════════════════════════ +// Tool-output-only turn: model responds autonomously with text +// ═══════════════════════════════════════════════════════════════════ + +#[test] +fn test_stateful_responses_tool_output_only_produces_text() { + let cassette = load_turn_cassette_from(MULTI_TURN_DIR, "responses_tool_calls_tool_output_only.yaml"); + assert_eq!(cassette.turns.len(), 3); + + // Turn 2 has tool output only (no user message) → model should produce text + let t2 = process_nonstreaming_turn(&cassette, 1, "openai/gpt-oss-20b"); + let has_text = t2.iter().any(|item| matches!(item, OutputItem::Message(_))); + assert!(has_text, "tool-output-only turn should produce a text response"); +} + +// ═══════════════════════════════════════════════════════════════════ +// Parallel tool calls (OpenAI only — gpt-4o reliably produces these) +// ═══════════════════════════════════════════════════════════════════ + +#[test] +fn test_openai_parallel_tool_calls() { + let cassette = load_turn_cassette_from(MULTI_TURN_DIR, "openai_responses_tool_calls_parallel.yaml"); + assert_eq!(cassette.turns.len(), 3); + + // Turn 1 should have 2 parallel function calls + let t1 = process_nonstreaming_turn(&cassette, 0, "gpt-4o"); + let t1_names = get_function_call_names(&t1); + assert!( + t1_names.len() >= 2, + "parallel cassette turn 1 must have 2+ function calls, got: {t1_names:?}" + ); + assert!(t1_names.contains(&"get_job_status".to_string())); + assert!(t1_names.contains(&"web_search".to_string())); +} + +/// Verifies that the request input for turn 2 contains multiple `function_call_output` +/// items (one per parallel call from turn 1). +#[test] +fn test_openai_parallel_tool_outputs_in_request() { + let cassette = load_turn_cassette_from(MULTI_TURN_DIR, "openai_responses_tool_calls_parallel.yaml"); + + let body2 = cassette.turns[1].request.as_mapping().unwrap(); + let req2 = body2 + .get(serde_yml::Value::String("body".into())) + .and_then(serde_yml::Value::as_mapping) + .unwrap(); + let input2 = req2 + .get(serde_yml::Value::String("input".into())) + .expect("turn 2 must have input"); + let input_seq = input2.as_sequence().expect("turn 2 input must be a list"); + + let tool_outputs: Vec<_> = input_seq + .iter() + .filter(|item| { + item.as_mapping() + .and_then(|m| m.get(serde_yml::Value::String("type".into()))) + .and_then(serde_yml::Value::as_str) + == Some("function_call_output") + }) + .collect(); + + assert!( + tool_outputs.len() >= 2, + "turn 2 input must contain 2+ function_call_output items for parallel calls, got {}", + tool_outputs.len() + ); +} + +// ═══════════════════════════════════════════════════════════════════ +// OpenAI cassettes: verify they parse identically to vLLM +// (status is "completed" string, not null) +// ═══════════════════════════════════════════════════════════════════ + +#[test] +fn test_openai_3turn_parses_and_retains_context() { + let cassette = load_turn_cassette_from(MULTI_TURN_DIR, "openai_responses_tool_calls_3turn.yaml"); + assert_eq!(cassette.turns.len(), 3); + assert_stateful_chaining(&cassette); + + let t1 = process_nonstreaming_turn(&cassette, 0, "gpt-4o"); + assert_eq!(get_function_call_names(&t1), vec!["get_job_status"]); + + // Context retention: turn 2 says "that job" + let t2 = process_nonstreaming_turn(&cassette, 1, "gpt-4o"); + let t2_args = get_first_fc_arguments(&t2); + assert!( + t2_args.contains("job-382"), + "OpenAI turn 2 must resolve 'that job' to job-382, got: {t2_args}" + ); +} + +#[test] +fn test_openai_5turn_full_sequence() { + let cassette = load_turn_cassette_from(MULTI_TURN_DIR, "openai_responses_tool_calls_5turn.yaml"); + assert_eq!(cassette.turns.len(), 5); + assert_stateful_chaining(&cassette); + + let expected_tools = [ + "get_job_status", + "get_error_logs", + "search_runbook", + "run_analysis", + "restart_job", + ]; + for (i, expected) in expected_tools.iter().enumerate() { + let output = process_nonstreaming_turn(&cassette, i, "gpt-4o"); + let names = get_function_call_names(&output); + assert_eq!(names.len(), 1, "OpenAI turn {} should call 1 tool", i + 1); + assert_eq!(&names[0], expected, "OpenAI turn {} should call {expected}", i + 1); + } +} + +#[test] +fn test_openai_streaming_3turn() { + let cassette = load_turn_cassette_from(MULTI_TURN_DIR, "openai_responses_tool_calls_3turn_streaming.yaml"); + assert_eq!(cassette.turns.len(), 3); + + for i in 0..3 { + let output = process_streaming_turn(&cassette, i, "gpt-4o"); + assert!( + count_function_calls(&output) >= 1, + "OpenAI streaming turn {} must produce a function_call", + i + 1 + ); + } +} + +#[test] +fn test_openai_branch_divergence() { + let cassette = load_turn_cassette_from(MULTI_TURN_DIR, "openai_responses_tool_calls_branch.yaml"); + assert_eq!(cassette.turns.len(), 3); + + let body2 = cassette.turns[1].request.as_mapping().unwrap(); + let req2 = body2 + .get(serde_yml::Value::String("body".into())) + .and_then(serde_yml::Value::as_mapping) + .unwrap(); + let prev2 = req2 + .get(serde_yml::Value::String("previous_response_id".into())) + .and_then(serde_yml::Value::as_str) + .expect("turn 2 must have prev_id"); + + let body3 = cassette.turns[2].request.as_mapping().unwrap(); + let req3 = body3 + .get(serde_yml::Value::String("body".into())) + .and_then(serde_yml::Value::as_mapping) + .unwrap(); + let prev3 = req3 + .get(serde_yml::Value::String("previous_response_id".into())) + .and_then(serde_yml::Value::as_str) + .expect("turn 3 must have prev_id"); + + assert_eq!( + prev2, prev3, + "OpenAI branch: turn 3 must branch from turn 1 (same prev_id as turn 2)" + ); +} + +// ═══════════════════════════════════════════════════════════════════ +// Cross-cassette: ALL stateful cassettes parse without error +// ═══════════════════════════════════════════════════════════════════ + #[test] fn test_all_stateful_cassettes_parse_without_error() { let nonstreaming = [ "responses_tool_calls_3turn.yaml", "responses_tool_calls_5turn.yaml", "responses_tool_calls_branch.yaml", + "responses_tool_calls_parallel.yaml", + "responses_tool_calls_tool_output_only.yaml", + "openai_responses_tool_calls_3turn.yaml", + "openai_responses_tool_calls_5turn.yaml", + "openai_responses_tool_calls_branch.yaml", + "openai_responses_tool_calls_parallel.yaml", + "openai_responses_tool_calls_tool_output_only.yaml", ]; for filename in &nonstreaming { @@ -867,7 +1036,7 @@ fn test_all_stateful_cassettes_parse_without_error() { i + 1, result.err() ); - let payload = result.unwrap().finalize("openai/gpt-oss-20b", None, None); + let payload = result.unwrap().finalize("gpt-4o", None, None); assert_eq!( payload.status, "completed", @@ -877,7 +1046,10 @@ fn test_all_stateful_cassettes_parse_without_error() { } } - let streaming = ["responses_tool_calls_3turn_streaming.yaml"]; + let streaming = [ + "responses_tool_calls_3turn_streaming.yaml", + "openai_responses_tool_calls_3turn_streaming.yaml", + ]; for filename in &streaming { let cassette = load_turn_cassette_from(MULTI_TURN_DIR, filename); for i in 0..cassette.turns.len() { @@ -888,7 +1060,7 @@ fn test_all_stateful_cassettes_parse_without_error() { i + 1 ); let acc = ResponseAccumulator::from_sse_lines(data_lines, None); - let payload = acc.finalize("openai/gpt-oss-20b", None, None); + let payload = acc.finalize("gpt-4o", None, None); assert_eq!( payload.status, "completed", diff --git a/crates/agentic-core/tests/cassettes/record_cassette.py b/crates/agentic-core/tests/cassettes/record_cassette.py index d95390f..41caa0f 100644 --- a/crates/agentic-core/tests/cassettes/record_cassette.py +++ b/crates/agentic-core/tests/cassettes/record_cassette.py @@ -279,16 +279,16 @@ def _create_conversation(client: httpx.Client, proxy_url: str) -> str: return conv_id -def _send_nonstreaming(client: httpx.Client, body: dict, proxy_url: str) -> str | None: +def _send_nonstreaming(client: httpx.Client, body: dict, proxy_url: str) -> dict | None: resp = client.post(f"{proxy_url}/v1/responses", json=body, timeout=300) resp.raise_for_status() data = resp.json() print(f"\n[Response]\n{json.dumps(data, indent=2)}\n") - return data.get("id") + return data -def _send_streaming(client: httpx.Client, body: dict, proxy_url: str) -> str | None: - response_id = None +def _send_streaming(client: httpx.Client, body: dict, proxy_url: str) -> dict | None: + response_data = None print("\n[Streaming response]") with client.stream( "POST", f"{proxy_url}/v1/responses", json=body, timeout=300 @@ -302,14 +302,14 @@ def _send_streaming(client: httpx.Client, body: dict, proxy_url: str) -> str | N try: payload = json.loads(line[5:].strip()) if payload.get("type") == "response.completed": - response_id = payload.get("response", {}).get("id") + response_data = payload.get("response") except Exception: pass print() - return response_id + return response_data -def _send(client: httpx.Client, body: dict, stream: bool, proxy_url: str) -> str | None: +def _send(client: httpx.Client, body: dict, stream: bool, proxy_url: str) -> dict | None: return ( _send_streaming(client, body, proxy_url) if stream @@ -332,6 +332,48 @@ def _inject_tools(body: dict, tools: list | None, tool_choice: Any) -> None: body["tool_choice"] = tool_choice +def _extract_function_calls(response_data: dict | None) -> list[dict]: + """Extract function_call items from a response's output array.""" + if not response_data: + return [] + output = response_data.get("output", []) + return [item for item in output if item.get("type") == "function_call"] + + +def _build_tool_output_input( + function_calls: list[dict], + tool_outputs: dict[str, str], + user_prompt: str | None, +) -> list[dict]: + """Build an input list with function_call_output items followed by optional user message. + + Args: + function_calls: function_call items from the previous response. + tool_outputs: mapping of tool name -> fake JSON output string. + user_prompt: the next user message (None for tool-output-only turns). + + Returns: + A list suitable for the `input` field of the next request. + """ + input_items: list[dict] = [] + for fc in function_calls: + call_id = fc.get("call_id", "") + name = fc.get("name", "") + output = tool_outputs.get(name, json.dumps({"result": f"mock output for {name}"})) + input_items.append({ + "type": "function_call_output", + "call_id": call_id, + "output": output, + }) + if user_prompt: + input_items.append({ + "type": "message", + "role": "user", + "content": user_prompt, + }) + return input_items + + def run_conv( client: httpx.Client, turns: int, @@ -371,7 +413,8 @@ def run_conv( body["previous_response_id"] = previous_response_id else: body["conversation"] = conv_id - response_id = _send(client, body, stream, proxy_url) + response_data = _send(client, body, stream, proxy_url) + response_id = response_data.get("id") if response_data else None if response_id: response_ids[turn] = response_id previous_response_id = response_id @@ -466,7 +509,8 @@ def run_mixed( body["previous_response_id"] = previous_response_id else: body["conversation"] = conv_id - previous_response_id = _send(client, body, stream, proxy_url) + response_data = _send(client, body, stream, proxy_url) + previous_response_id = response_data.get("id") if response_data else None def run_responses( @@ -479,8 +523,10 @@ def run_responses( proxy_url: str, tools: list | None = None, tool_choice: Any = None, + tool_outputs: dict[str, str] | None = None, ) -> None: response_ids: dict[int, str] = {} + responses: dict[int, dict] = {} branch_map: dict[int, int] = {} extra_branches: list[int] = [] for branch_from, branch_turn_number in branches: @@ -490,6 +536,7 @@ def run_responses( extra_branches.append(branch_from) previous_response_id: str | None = None + last_response: dict | None = None for turn in range(1, turns + 1): if turn in branch_map: branch_from = branch_map[turn] @@ -499,18 +546,32 @@ def run_responses( f"(available: {sorted(response_ids)})" ) previous_response_id = response_ids[branch_from] + last_response = responses.get(branch_from) click.echo( f"\n[Branch] turn {turn} chains from turn {branch_from} (response_id={previous_response_id})" ) prompt = _prompt(f"Turn {turn}/{turns} — enter prompt: ") - body: dict = {"model": model, "input": prompt, "stream": stream, "store": store} + + # Build input: if previous response had function calls and we have tool_outputs, + # inject function_call_output items before the user message. + pending_calls = _extract_function_calls(last_response) if tool_outputs else [] + if pending_calls and tool_outputs: + input_value: Any = _build_tool_output_input(pending_calls, tool_outputs, prompt if prompt else None) + click.echo(f" [injecting {len(pending_calls)} tool output(s) before user message]") + else: + input_value = prompt + + body: dict = {"model": model, "input": input_value, "stream": stream, "store": store} if previous_response_id and store: body["previous_response_id"] = previous_response_id _inject_tools(body, tools, tool_choice) - response_id = _send(client, body, stream, proxy_url) + response_data = _send(client, body, stream, proxy_url) + response_id = response_data.get("id") if response_data else None previous_response_id = response_id if store else None + last_response = response_data if response_id: response_ids[turn] = response_id + responses[turn] = response_data for b_idx, branch_from in enumerate(extra_branches, start=1): if branch_from not in response_ids: @@ -519,15 +580,24 @@ def run_responses( f"(available: {sorted(response_ids)})" ) branch_resp_id = response_ids[branch_from] + branch_response = responses.get(branch_from) click.echo( f"\n[Extra branch {b_idx}] from turn {branch_from} (response_id={branch_resp_id}), turn {turns + 1}" ) prompt = _prompt( f"Turn {turns + 1} (extra branch from turn {branch_from}) — enter prompt: " ) + + pending_calls = _extract_function_calls(branch_response) if tool_outputs else [] + if pending_calls and tool_outputs: + input_value = _build_tool_output_input(pending_calls, tool_outputs, prompt if prompt else None) + click.echo(f" [injecting {len(pending_calls)} tool output(s) before user message]") + else: + input_value = prompt + body = { "model": model, - "input": prompt, + "input": input_value, "stream": stream, "store": store, "previous_response_id": branch_resp_id, @@ -619,6 +689,16 @@ def run_responses( default=None, help='tool_choice value: "auto", "none", "required", or JSON e.g. \'{"type":"function","name":"foo"}\'.', ) +@click.option( + "--tool-outputs", + "tool_outputs_file", + metavar="FILE", + default=None, + type=click.Path(exists=True), + help="Path to a JSON file mapping tool names to fake output strings. " + "When provided, function_call_output items are automatically injected " + "between turns (required for OpenAI Responses API).", +) def main( turns: int, output: str, @@ -633,6 +713,7 @@ def main( vllm_url: str | None, tools_file: str | None, tool_choice_raw: str | None, + tool_outputs_file: str | None, ) -> None: """Interactive multi-turn cassette recorder (proxy embedded).""" if branch_turn_number and not branch_from: @@ -668,6 +749,14 @@ def main( else: tool_choice = stripped + tool_outputs: dict[str, str] | None = None + if tool_outputs_file: + with open(tool_outputs_file, encoding="utf-8") as f: + tool_outputs = json.load(f) + if not isinstance(tool_outputs, dict): + raise click.UsageError("--tool-outputs file must contain a JSON object (name -> output string).") + click.echo(f"Tool outputs: {list(tool_outputs.keys())}") + if vllm_url: target = vllm_url.rstrip("/") headers: dict = {} @@ -703,7 +792,7 @@ def main( elif mode == "mixed": run_mixed(client, turns, model, stream, store, proxy_url) elif mode == "responses": - run_responses(client, turns, model, stream, store, branches, proxy_url, tools, tool_choice) + run_responses(client, turns, model, stream, store, branches, proxy_url, tools, tool_choice, tool_outputs) elif mode == "store_true_then_store_false": run_store_true_then_store_false(client, turns, model, stream, proxy_url) finally: diff --git a/crates/agentic-core/tests/cassettes/tool_calls/multi_turn/openai_responses_tool_calls_3turn.yaml b/crates/agentic-core/tests/cassettes/tool_calls/multi_turn/openai_responses_tool_calls_3turn.yaml new file mode 100644 index 0000000..ae27810 --- /dev/null +++ b/crates/agentic-core/tests/cassettes/tool_calls/multi_turn/openai_responses_tool_calls_3turn.yaml @@ -0,0 +1,966 @@ +turns: +- filename: t1 + request: + body: + input: You are an SRE assistant. Check the current status of ETL pipeline job-382. + model: gpt-4o + store: true + stream: false + tool_choice: auto + tools: + - description: Query the pipeline orchestrator for a job's current state, metadata, + and failure info. + name: get_job_status + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier, e.g. job-382 + type: string + required: + - job_id + type: object + strict: true + type: function + - description: Fetch error logs and stack traces for a failed pipeline job. + name: get_error_logs + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier + type: string + max_entries: + description: Maximum log entries to return (default 50) + type: integer + severity: + description: Minimum severity filter + enum: + - WARN + - ERROR + - FATAL + - ALL + type: string + required: + - job_id + - severity + - max_entries + type: object + strict: true + type: function + - description: Search internal operations runbooks for troubleshooting procedures. + name: search_runbook + parameters: + additionalProperties: false + properties: + max_results: + description: Maximum results to return (default 5) + type: integer + query: + description: Natural language search query + type: string + required: + - query + - max_results + type: object + strict: true + type: function + - description: Execute a Python analysis script in a sandboxed environment for + log parsing or data inspection. + name: run_analysis + parameters: + additionalProperties: false + properties: + code: + description: Python code to execute + type: string + timeout_seconds: + description: Execution timeout (default 30) + type: integer + required: + - code + - timeout_seconds + type: object + strict: true + type: function + - description: Restart a failed pipeline job with optional configuration overrides. + name: restart_job + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier to restart + type: string + memory_override_gb: + description: Override memory limit in GB + type: integer + priority: + description: Queue priority + enum: + - low + - normal + - high + type: string + skip_completed_stages: + description: Skip stages that completed successfully (default true) + type: boolean + required: + - job_id + - memory_override_gb + - skip_completed_stages + - priority + type: object + strict: true + type: function + - description: Search the public web for technical documentation, blog posts, + and known issues. + name: web_search + parameters: + additionalProperties: false + properties: + query: + description: Search query + type: string + required: + - query + type: object + strict: true + type: function + headers: + accept: '*/*' + authorization: Bearer *** + content-type: application/json + user-agent: python-httpx/0.28.1 + method: POST + path: /v1/responses + query_params: {} + response: + body: + background: false + billing: + payer: developer + completed_at: 1782331467 + created_at: 1782331466 + error: null + frequency_penalty: 0.0 + id: resp_049b5e7584cd0867006a3c384ab9a88199a7bb53c011a2c1fa + incomplete_details: null + instructions: null + max_output_tokens: null + max_tool_calls: null + metadata: {} + model: gpt-4o-2024-08-06 + moderation: null + object: response + output: + - arguments: '{"job_id":"job-382"}' + call_id: call_WHuX19CANJfWBjbT8lqdEiya + id: fc_049b5e7584cd0867006a3c384bc20481999a4ed6ff6b79fac6 + name: get_job_status + status: completed + type: function_call + parallel_tool_calls: true + presence_penalty: 0.0 + previous_response_id: null + prompt_cache_key: null + prompt_cache_retention: in_memory + reasoning: + context: null + effort: null + summary: null + safety_identifier: null + service_tier: default + status: completed + store: true + temperature: 1.0 + text: + format: + type: text + verbosity: medium + tool_choice: auto + tools: + - description: Query the pipeline orchestrator for a job's current state, metadata, + and failure info. + name: get_job_status + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier, e.g. job-382 + type: string + required: + - job_id + type: object + strict: true + type: function + - description: Fetch error logs and stack traces for a failed pipeline job. + name: get_error_logs + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier + type: string + max_entries: + description: Maximum log entries to return (default 50) + type: integer + severity: + description: Minimum severity filter + enum: + - WARN + - ERROR + - FATAL + - ALL + type: string + required: + - job_id + - severity + - max_entries + type: object + strict: true + type: function + - description: Search internal operations runbooks for troubleshooting procedures. + name: search_runbook + parameters: + additionalProperties: false + properties: + max_results: + description: Maximum results to return (default 5) + type: integer + query: + description: Natural language search query + type: string + required: + - query + - max_results + type: object + strict: true + type: function + - description: Execute a Python analysis script in a sandboxed environment for + log parsing or data inspection. + name: run_analysis + parameters: + additionalProperties: false + properties: + code: + description: Python code to execute + type: string + timeout_seconds: + description: Execution timeout (default 30) + type: integer + required: + - code + - timeout_seconds + type: object + strict: true + type: function + - description: Restart a failed pipeline job with optional configuration overrides. + name: restart_job + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier to restart + type: string + memory_override_gb: + description: Override memory limit in GB + type: integer + priority: + description: Queue priority + enum: + - low + - normal + - high + type: string + skip_completed_stages: + description: Skip stages that completed successfully (default true) + type: boolean + required: + - job_id + - memory_override_gb + - skip_completed_stages + - priority + type: object + strict: true + type: function + - description: Search the public web for technical documentation, blog posts, + and known issues. + name: web_search + parameters: + additionalProperties: false + properties: + query: + description: Search query + type: string + required: + - query + type: object + strict: true + type: function + top_logprobs: 0 + top_p: 1.0 + truncation: disabled + usage: + input_tokens: 355 + input_tokens_details: + cached_tokens: 0 + output_tokens: 19 + output_tokens_details: + reasoning_tokens: 0 + total_tokens: 374 + user: null + headers: + content-type: application/json + status_code: 200 +- filename: t2 + request: + body: + input: + - call_id: call_WHuX19CANJfWBjbT8lqdEiya + output: '{"job_id": "job-382", "status": "failed", "error_type": "OutOfMemoryError", + "failed_at": "2024-01-15T03:22:11Z", "stage": "transform", "memory_used_gb": + 31.2, "memory_limit_gb": 32, "pipeline": "etl-daily-ingest", "owner": "data-platform-team"}' + type: function_call_output + - content: Now pull the error logs for that job. Use severity ERROR and max + 10 entries. + role: user + type: message + model: gpt-4o + previous_response_id: resp_049b5e7584cd0867006a3c384ab9a88199a7bb53c011a2c1fa + store: true + stream: false + tool_choice: auto + tools: + - description: Query the pipeline orchestrator for a job's current state, metadata, + and failure info. + name: get_job_status + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier, e.g. job-382 + type: string + required: + - job_id + type: object + strict: true + type: function + - description: Fetch error logs and stack traces for a failed pipeline job. + name: get_error_logs + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier + type: string + max_entries: + description: Maximum log entries to return (default 50) + type: integer + severity: + description: Minimum severity filter + enum: + - WARN + - ERROR + - FATAL + - ALL + type: string + required: + - job_id + - severity + - max_entries + type: object + strict: true + type: function + - description: Search internal operations runbooks for troubleshooting procedures. + name: search_runbook + parameters: + additionalProperties: false + properties: + max_results: + description: Maximum results to return (default 5) + type: integer + query: + description: Natural language search query + type: string + required: + - query + - max_results + type: object + strict: true + type: function + - description: Execute a Python analysis script in a sandboxed environment for + log parsing or data inspection. + name: run_analysis + parameters: + additionalProperties: false + properties: + code: + description: Python code to execute + type: string + timeout_seconds: + description: Execution timeout (default 30) + type: integer + required: + - code + - timeout_seconds + type: object + strict: true + type: function + - description: Restart a failed pipeline job with optional configuration overrides. + name: restart_job + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier to restart + type: string + memory_override_gb: + description: Override memory limit in GB + type: integer + priority: + description: Queue priority + enum: + - low + - normal + - high + type: string + skip_completed_stages: + description: Skip stages that completed successfully (default true) + type: boolean + required: + - job_id + - memory_override_gb + - skip_completed_stages + - priority + type: object + strict: true + type: function + - description: Search the public web for technical documentation, blog posts, + and known issues. + name: web_search + parameters: + additionalProperties: false + properties: + query: + description: Search query + type: string + required: + - query + type: object + strict: true + type: function + headers: + accept: '*/*' + authorization: Bearer *** + content-type: application/json + user-agent: python-httpx/0.28.1 + method: POST + path: /v1/responses + query_params: {} + response: + body: + background: false + billing: + payer: developer + completed_at: 1782331468 + created_at: 1782331468 + error: null + frequency_penalty: 0.0 + id: resp_049b5e7584cd0867006a3c384c03748199949f46ad0cd9b7f2 + incomplete_details: null + instructions: null + max_output_tokens: null + max_tool_calls: null + metadata: {} + model: gpt-4o-2024-08-06 + moderation: null + object: response + output: + - arguments: '{"job_id":"job-382","severity":"ERROR","max_entries":10}' + call_id: call_43JRhsN6hGDMwwMx06hK3SWY + id: fc_049b5e7584cd0867006a3c384caea881999eba9bcd4ec6ee9b + name: get_error_logs + status: completed + type: function_call + parallel_tool_calls: true + presence_penalty: 0.0 + previous_response_id: resp_049b5e7584cd0867006a3c384ab9a88199a7bb53c011a2c1fa + prompt_cache_key: null + prompt_cache_retention: in_memory + reasoning: + context: null + effort: null + summary: null + safety_identifier: null + service_tier: default + status: completed + store: true + temperature: 1.0 + text: + format: + type: text + verbosity: medium + tool_choice: auto + tools: + - description: Query the pipeline orchestrator for a job's current state, metadata, + and failure info. + name: get_job_status + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier, e.g. job-382 + type: string + required: + - job_id + type: object + strict: true + type: function + - description: Fetch error logs and stack traces for a failed pipeline job. + name: get_error_logs + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier + type: string + max_entries: + description: Maximum log entries to return (default 50) + type: integer + severity: + description: Minimum severity filter + enum: + - WARN + - ERROR + - FATAL + - ALL + type: string + required: + - job_id + - severity + - max_entries + type: object + strict: true + type: function + - description: Search internal operations runbooks for troubleshooting procedures. + name: search_runbook + parameters: + additionalProperties: false + properties: + max_results: + description: Maximum results to return (default 5) + type: integer + query: + description: Natural language search query + type: string + required: + - query + - max_results + type: object + strict: true + type: function + - description: Execute a Python analysis script in a sandboxed environment for + log parsing or data inspection. + name: run_analysis + parameters: + additionalProperties: false + properties: + code: + description: Python code to execute + type: string + timeout_seconds: + description: Execution timeout (default 30) + type: integer + required: + - code + - timeout_seconds + type: object + strict: true + type: function + - description: Restart a failed pipeline job with optional configuration overrides. + name: restart_job + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier to restart + type: string + memory_override_gb: + description: Override memory limit in GB + type: integer + priority: + description: Queue priority + enum: + - low + - normal + - high + type: string + skip_completed_stages: + description: Skip stages that completed successfully (default true) + type: boolean + required: + - job_id + - memory_override_gb + - skip_completed_stages + - priority + type: object + strict: true + type: function + - description: Search the public web for technical documentation, blog posts, + and known issues. + name: web_search + parameters: + additionalProperties: false + properties: + query: + description: Search query + type: string + required: + - query + type: object + strict: true + type: function + top_logprobs: 0 + top_p: 1.0 + truncation: disabled + usage: + input_tokens: 493 + input_tokens_details: + cached_tokens: 0 + output_tokens: 28 + output_tokens_details: + reasoning_tokens: 0 + total_tokens: 521 + user: null + headers: + content-type: application/json + status_code: 200 +- filename: t3 + request: + body: + input: + - call_id: call_43JRhsN6hGDMwwMx06hK3SWY + output: '[{"timestamp": "2024-01-15T03:22:09Z", "severity": "ERROR", "message": + "OutOfMemoryError at stage ''transform'': heap space exhausted (31.2GB/32GB)", + "stack_trace": "java.lang.OutOfMemoryError: Java heap space\n at org.apache.spark.memory.TaskMemoryManager.allocatePage"}, + {"timestamp": "2024-01-15T03:22:10Z", "severity": "ERROR", "message": "Task + failed: transform-partition-47 OOM after processing 2.1M rows", "stack_trace": + "org.apache.spark.SparkException: Task failed while writing rows"}, {"timestamp": + "2024-01-15T03:22:11Z", "severity": "ERROR", "message": "Job job-382 marked + FAILED after 3 consecutive OOM errors in transform stage", "stack_trace": + null}]' + type: function_call_output + - content: Based on those errors, search the runbook for troubleshooting procedures. + Max 5 results. + role: user + type: message + model: gpt-4o + previous_response_id: resp_049b5e7584cd0867006a3c384c03748199949f46ad0cd9b7f2 + store: true + stream: false + tool_choice: auto + tools: + - description: Query the pipeline orchestrator for a job's current state, metadata, + and failure info. + name: get_job_status + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier, e.g. job-382 + type: string + required: + - job_id + type: object + strict: true + type: function + - description: Fetch error logs and stack traces for a failed pipeline job. + name: get_error_logs + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier + type: string + max_entries: + description: Maximum log entries to return (default 50) + type: integer + severity: + description: Minimum severity filter + enum: + - WARN + - ERROR + - FATAL + - ALL + type: string + required: + - job_id + - severity + - max_entries + type: object + strict: true + type: function + - description: Search internal operations runbooks for troubleshooting procedures. + name: search_runbook + parameters: + additionalProperties: false + properties: + max_results: + description: Maximum results to return (default 5) + type: integer + query: + description: Natural language search query + type: string + required: + - query + - max_results + type: object + strict: true + type: function + - description: Execute a Python analysis script in a sandboxed environment for + log parsing or data inspection. + name: run_analysis + parameters: + additionalProperties: false + properties: + code: + description: Python code to execute + type: string + timeout_seconds: + description: Execution timeout (default 30) + type: integer + required: + - code + - timeout_seconds + type: object + strict: true + type: function + - description: Restart a failed pipeline job with optional configuration overrides. + name: restart_job + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier to restart + type: string + memory_override_gb: + description: Override memory limit in GB + type: integer + priority: + description: Queue priority + enum: + - low + - normal + - high + type: string + skip_completed_stages: + description: Skip stages that completed successfully (default true) + type: boolean + required: + - job_id + - memory_override_gb + - skip_completed_stages + - priority + type: object + strict: true + type: function + - description: Search the public web for technical documentation, blog posts, + and known issues. + name: web_search + parameters: + additionalProperties: false + properties: + query: + description: Search query + type: string + required: + - query + type: object + strict: true + type: function + headers: + accept: '*/*' + authorization: Bearer *** + content-type: application/json + user-agent: python-httpx/0.28.1 + method: POST + path: /v1/responses + query_params: {} + response: + body: + background: false + billing: + payer: developer + completed_at: 1782331470 + created_at: 1782331469 + error: null + frequency_penalty: 0.0 + id: resp_049b5e7584cd0867006a3c384d13d48199a2782d3f3d47efe8 + incomplete_details: null + instructions: null + max_output_tokens: null + max_tool_calls: null + metadata: {} + model: gpt-4o-2024-08-06 + moderation: null + object: response + output: + - arguments: '{"query":"OutOfMemoryError in ETL transform stage","max_results":5}' + call_id: call_tSIYLWKg6qeX4YLQC9jBLctZ + id: fc_049b5e7584cd0867006a3c384e2488819995cd62e88ea95a97 + name: search_runbook + status: completed + type: function_call + parallel_tool_calls: true + presence_penalty: 0.0 + previous_response_id: resp_049b5e7584cd0867006a3c384c03748199949f46ad0cd9b7f2 + prompt_cache_key: null + prompt_cache_retention: in_memory + reasoning: + context: null + effort: null + summary: null + safety_identifier: null + service_tier: default + status: completed + store: true + temperature: 1.0 + text: + format: + type: text + verbosity: medium + tool_choice: auto + tools: + - description: Query the pipeline orchestrator for a job's current state, metadata, + and failure info. + name: get_job_status + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier, e.g. job-382 + type: string + required: + - job_id + type: object + strict: true + type: function + - description: Fetch error logs and stack traces for a failed pipeline job. + name: get_error_logs + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier + type: string + max_entries: + description: Maximum log entries to return (default 50) + type: integer + severity: + description: Minimum severity filter + enum: + - WARN + - ERROR + - FATAL + - ALL + type: string + required: + - job_id + - severity + - max_entries + type: object + strict: true + type: function + - description: Search internal operations runbooks for troubleshooting procedures. + name: search_runbook + parameters: + additionalProperties: false + properties: + max_results: + description: Maximum results to return (default 5) + type: integer + query: + description: Natural language search query + type: string + required: + - query + - max_results + type: object + strict: true + type: function + - description: Execute a Python analysis script in a sandboxed environment for + log parsing or data inspection. + name: run_analysis + parameters: + additionalProperties: false + properties: + code: + description: Python code to execute + type: string + timeout_seconds: + description: Execution timeout (default 30) + type: integer + required: + - code + - timeout_seconds + type: object + strict: true + type: function + - description: Restart a failed pipeline job with optional configuration overrides. + name: restart_job + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier to restart + type: string + memory_override_gb: + description: Override memory limit in GB + type: integer + priority: + description: Queue priority + enum: + - low + - normal + - high + type: string + skip_completed_stages: + description: Skip stages that completed successfully (default true) + type: boolean + required: + - job_id + - memory_override_gb + - skip_completed_stages + - priority + type: object + strict: true + type: function + - description: Search the public web for technical documentation, blog posts, + and known issues. + name: web_search + parameters: + additionalProperties: false + properties: + query: + description: Search query + type: string + required: + - query + type: object + strict: true + type: function + top_logprobs: 0 + top_p: 1.0 + truncation: disabled + usage: + input_tokens: 746 + input_tokens_details: + cached_tokens: 0 + output_tokens: 29 + output_tokens_details: + reasoning_tokens: 0 + total_tokens: 775 + user: null + headers: + content-type: application/json + status_code: 200 diff --git a/crates/agentic-core/tests/cassettes/tool_calls/multi_turn/openai_responses_tool_calls_3turn_streaming.yaml b/crates/agentic-core/tests/cassettes/tool_calls/multi_turn/openai_responses_tool_calls_3turn_streaming.yaml new file mode 100644 index 0000000..cb4220e --- /dev/null +++ b/crates/agentic-core/tests/cassettes/tool_calls/multi_turn/openai_responses_tool_calls_3turn_streaming.yaml @@ -0,0 +1,1168 @@ +turns: +- filename: t1 + request: + body: + input: You are an SRE assistant. Check the status of pipeline job-382. + model: gpt-4o + store: true + stream: true + tool_choice: auto + tools: + - description: Query the pipeline orchestrator for a job's current state, metadata, + and failure info. + name: get_job_status + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier, e.g. job-382 + type: string + required: + - job_id + type: object + strict: true + type: function + - description: Fetch error logs and stack traces for a failed pipeline job. + name: get_error_logs + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier + type: string + max_entries: + description: Maximum log entries to return (default 50) + type: integer + severity: + description: Minimum severity filter + enum: + - WARN + - ERROR + - FATAL + - ALL + type: string + required: + - job_id + - severity + - max_entries + type: object + strict: true + type: function + - description: Search internal operations runbooks for troubleshooting procedures. + name: search_runbook + parameters: + additionalProperties: false + properties: + max_results: + description: Maximum results to return (default 5) + type: integer + query: + description: Natural language search query + type: string + required: + - query + - max_results + type: object + strict: true + type: function + - description: Execute a Python analysis script in a sandboxed environment for + log parsing or data inspection. + name: run_analysis + parameters: + additionalProperties: false + properties: + code: + description: Python code to execute + type: string + timeout_seconds: + description: Execution timeout (default 30) + type: integer + required: + - code + - timeout_seconds + type: object + strict: true + type: function + - description: Restart a failed pipeline job with optional configuration overrides. + name: restart_job + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier to restart + type: string + memory_override_gb: + description: Override memory limit in GB + type: integer + priority: + description: Queue priority + enum: + - low + - normal + - high + type: string + skip_completed_stages: + description: Skip stages that completed successfully (default true) + type: boolean + required: + - job_id + - memory_override_gb + - skip_completed_stages + - priority + type: object + strict: true + type: function + - description: Search the public web for technical documentation, blog posts, + and known issues. + name: web_search + parameters: + additionalProperties: false + properties: + query: + description: Search query + type: string + required: + - query + type: object + strict: true + type: function + headers: + accept: '*/*' + authorization: Bearer *** + content-type: application/json + user-agent: python-httpx/0.28.1 + method: POST + path: /v1/responses + query_params: {} + response: + headers: + content-type: text/event-stream; charset=utf-8 + sse: + - 'event: response.created + + ' + - 'data: {"type":"response.created","response":{"id":"resp_0fc8e0e2ea46fc95006a3c3867c1d0819a8d8faf02ad45a9f1","object":"response","created_at":1782331495,"status":"in_progress","background":false,"completed_at":null,"error":null,"frequency_penalty":0.0,"incomplete_details":null,"instructions":null,"max_output_tokens":null,"max_tool_calls":null,"model":"gpt-4o-2024-08-06","moderation":null,"output":[],"parallel_tool_calls":true,"presence_penalty":0.0,"previous_response_id":null,"prompt_cache_key":null,"prompt_cache_retention":"in_memory","reasoning":{"context":null,"effort":null,"summary":null},"safety_identifier":null,"service_tier":"auto","store":true,"temperature":1.0,"text":{"format":{"type":"text"},"verbosity":"medium"},"tool_choice":"auto","tools":[{"type":"function","description":"Query + the pipeline orchestrator for a job''s current state, metadata, and failure + info.","name":"get_job_status","parameters":{"type":"object","properties":{"job_id":{"type":"string","description":"Pipeline + job identifier, e.g. job-382"}},"required":["job_id"],"additionalProperties":false},"strict":true},{"type":"function","description":"Fetch + error logs and stack traces for a failed pipeline job.","name":"get_error_logs","parameters":{"type":"object","properties":{"job_id":{"type":"string","description":"Pipeline + job identifier"},"severity":{"type":"string","enum":["WARN","ERROR","FATAL","ALL"],"description":"Minimum + severity filter"},"max_entries":{"type":"integer","description":"Maximum log + entries to return (default 50)"}},"required":["job_id","severity","max_entries"],"additionalProperties":false},"strict":true},{"type":"function","description":"Search + internal operations runbooks for troubleshooting procedures.","name":"search_runbook","parameters":{"type":"object","properties":{"query":{"type":"string","description":"Natural + language search query"},"max_results":{"type":"integer","description":"Maximum + results to return (default 5)"}},"required":["query","max_results"],"additionalProperties":false},"strict":true},{"type":"function","description":"Execute + a Python analysis script in a sandboxed environment for log parsing or data + inspection.","name":"run_analysis","parameters":{"type":"object","properties":{"code":{"type":"string","description":"Python + code to execute"},"timeout_seconds":{"type":"integer","description":"Execution + timeout (default 30)"}},"required":["code","timeout_seconds"],"additionalProperties":false},"strict":true},{"type":"function","description":"Restart + a failed pipeline job with optional configuration overrides.","name":"restart_job","parameters":{"type":"object","properties":{"job_id":{"type":"string","description":"Pipeline + job identifier to restart"},"memory_override_gb":{"type":"integer","description":"Override + memory limit in GB"},"skip_completed_stages":{"type":"boolean","description":"Skip + stages that completed successfully (default true)"},"priority":{"type":"string","enum":["low","normal","high"],"description":"Queue + priority"}},"required":["job_id","memory_override_gb","skip_completed_stages","priority"],"additionalProperties":false},"strict":true},{"type":"function","description":"Search + the public web for technical documentation, blog posts, and known issues.","name":"web_search","parameters":{"type":"object","properties":{"query":{"type":"string","description":"Search + query"}},"required":["query"],"additionalProperties":false},"strict":true}],"top_logprobs":0,"top_p":1.0,"truncation":"disabled","usage":null,"user":null,"metadata":{}},"sequence_number":0} + + ' + - ' + + ' + - 'event: response.in_progress + + ' + - 'data: {"type":"response.in_progress","response":{"id":"resp_0fc8e0e2ea46fc95006a3c3867c1d0819a8d8faf02ad45a9f1","object":"response","created_at":1782331495,"status":"in_progress","background":false,"completed_at":null,"error":null,"frequency_penalty":0.0,"incomplete_details":null,"instructions":null,"max_output_tokens":null,"max_tool_calls":null,"model":"gpt-4o-2024-08-06","moderation":null,"output":[],"parallel_tool_calls":true,"presence_penalty":0.0,"previous_response_id":null,"prompt_cache_key":null,"prompt_cache_retention":"in_memory","reasoning":{"context":null,"effort":null,"summary":null},"safety_identifier":null,"service_tier":"auto","store":true,"temperature":1.0,"text":{"format":{"type":"text"},"verbosity":"medium"},"tool_choice":"auto","tools":[{"type":"function","description":"Query + the pipeline orchestrator for a job''s current state, metadata, and failure + info.","name":"get_job_status","parameters":{"type":"object","properties":{"job_id":{"type":"string","description":"Pipeline + job identifier, e.g. job-382"}},"required":["job_id"],"additionalProperties":false},"strict":true},{"type":"function","description":"Fetch + error logs and stack traces for a failed pipeline job.","name":"get_error_logs","parameters":{"type":"object","properties":{"job_id":{"type":"string","description":"Pipeline + job identifier"},"severity":{"type":"string","enum":["WARN","ERROR","FATAL","ALL"],"description":"Minimum + severity filter"},"max_entries":{"type":"integer","description":"Maximum log + entries to return (default 50)"}},"required":["job_id","severity","max_entries"],"additionalProperties":false},"strict":true},{"type":"function","description":"Search + internal operations runbooks for troubleshooting procedures.","name":"search_runbook","parameters":{"type":"object","properties":{"query":{"type":"string","description":"Natural + language search query"},"max_results":{"type":"integer","description":"Maximum + results to return (default 5)"}},"required":["query","max_results"],"additionalProperties":false},"strict":true},{"type":"function","description":"Execute + a Python analysis script in a sandboxed environment for log parsing or data + inspection.","name":"run_analysis","parameters":{"type":"object","properties":{"code":{"type":"string","description":"Python + code to execute"},"timeout_seconds":{"type":"integer","description":"Execution + timeout (default 30)"}},"required":["code","timeout_seconds"],"additionalProperties":false},"strict":true},{"type":"function","description":"Restart + a failed pipeline job with optional configuration overrides.","name":"restart_job","parameters":{"type":"object","properties":{"job_id":{"type":"string","description":"Pipeline + job identifier to restart"},"memory_override_gb":{"type":"integer","description":"Override + memory limit in GB"},"skip_completed_stages":{"type":"boolean","description":"Skip + stages that completed successfully (default true)"},"priority":{"type":"string","enum":["low","normal","high"],"description":"Queue + priority"}},"required":["job_id","memory_override_gb","skip_completed_stages","priority"],"additionalProperties":false},"strict":true},{"type":"function","description":"Search + the public web for technical documentation, blog posts, and known issues.","name":"web_search","parameters":{"type":"object","properties":{"query":{"type":"string","description":"Search + query"}},"required":["query"],"additionalProperties":false},"strict":true}],"top_logprobs":0,"top_p":1.0,"truncation":"disabled","usage":null,"user":null,"metadata":{}},"sequence_number":1} + + ' + - ' + + ' + - 'event: response.output_item.added + + ' + - 'data: {"type":"response.output_item.added","item":{"id":"fc_0fc8e0e2ea46fc95006a3c386915f8819abdbaed3d24918512","type":"function_call","status":"in_progress","arguments":"","call_id":"call_oGw30yoywjhkYA1B6t8FesEm","name":"get_job_status"},"output_index":0,"sequence_number":2} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"type":"response.function_call_arguments.delta","delta":"{\"","item_id":"fc_0fc8e0e2ea46fc95006a3c386915f8819abdbaed3d24918512","obfuscation":"Qdc2CisBnggcQe","output_index":0,"sequence_number":3} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"type":"response.function_call_arguments.delta","delta":"job","item_id":"fc_0fc8e0e2ea46fc95006a3c386915f8819abdbaed3d24918512","obfuscation":"iOdjIZMB0OV6i","output_index":0,"sequence_number":4} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"type":"response.function_call_arguments.delta","delta":"_id","item_id":"fc_0fc8e0e2ea46fc95006a3c386915f8819abdbaed3d24918512","obfuscation":"lrEcFJPqAFPgc","output_index":0,"sequence_number":5} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"type":"response.function_call_arguments.delta","delta":"\":\"","item_id":"fc_0fc8e0e2ea46fc95006a3c386915f8819abdbaed3d24918512","obfuscation":"xgi6zjbPoF8B8","output_index":0,"sequence_number":6} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"type":"response.function_call_arguments.delta","delta":"job","item_id":"fc_0fc8e0e2ea46fc95006a3c386915f8819abdbaed3d24918512","obfuscation":"wSK1VaNG3cTqV","output_index":0,"sequence_number":7} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"type":"response.function_call_arguments.delta","delta":"-","item_id":"fc_0fc8e0e2ea46fc95006a3c386915f8819abdbaed3d24918512","obfuscation":"4Aj06mCLUT4Azme","output_index":0,"sequence_number":8} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"type":"response.function_call_arguments.delta","delta":"382","item_id":"fc_0fc8e0e2ea46fc95006a3c386915f8819abdbaed3d24918512","obfuscation":"I567UCFBL57QD","output_index":0,"sequence_number":9} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"type":"response.function_call_arguments.delta","delta":"\"}","item_id":"fc_0fc8e0e2ea46fc95006a3c386915f8819abdbaed3d24918512","obfuscation":"GDdy5wrzzwwyCu","output_index":0,"sequence_number":10} + + ' + - ' + + ' + - 'event: response.function_call_arguments.done + + ' + - 'data: {"type":"response.function_call_arguments.done","arguments":"{\"job_id\":\"job-382\"}","item_id":"fc_0fc8e0e2ea46fc95006a3c386915f8819abdbaed3d24918512","output_index":0,"sequence_number":11} + + ' + - ' + + ' + - 'event: response.output_item.done + + ' + - 'data: {"type":"response.output_item.done","item":{"id":"fc_0fc8e0e2ea46fc95006a3c386915f8819abdbaed3d24918512","type":"function_call","status":"completed","arguments":"{\"job_id\":\"job-382\"}","call_id":"call_oGw30yoywjhkYA1B6t8FesEm","name":"get_job_status"},"output_index":0,"sequence_number":12} + + ' + - ' + + ' + - 'event: response.completed + + ' + - 'data: {"type":"response.completed","response":{"id":"resp_0fc8e0e2ea46fc95006a3c3867c1d0819a8d8faf02ad45a9f1","object":"response","created_at":1782331495,"status":"completed","background":false,"completed_at":1782331497,"error":null,"frequency_penalty":0.0,"incomplete_details":null,"instructions":null,"max_output_tokens":null,"max_tool_calls":null,"model":"gpt-4o-2024-08-06","moderation":null,"output":[{"id":"fc_0fc8e0e2ea46fc95006a3c386915f8819abdbaed3d24918512","type":"function_call","status":"completed","arguments":"{\"job_id\":\"job-382\"}","call_id":"call_oGw30yoywjhkYA1B6t8FesEm","name":"get_job_status"}],"parallel_tool_calls":true,"presence_penalty":0.0,"previous_response_id":null,"prompt_cache_key":null,"prompt_cache_retention":"in_memory","reasoning":{"context":null,"effort":null,"summary":null},"safety_identifier":null,"service_tier":"default","store":true,"temperature":1.0,"text":{"format":{"type":"text"},"verbosity":"medium"},"tool_choice":"auto","tools":[{"type":"function","description":"Query + the pipeline orchestrator for a job''s current state, metadata, and failure + info.","name":"get_job_status","parameters":{"type":"object","properties":{"job_id":{"type":"string","description":"Pipeline + job identifier, e.g. job-382"}},"required":["job_id"],"additionalProperties":false},"strict":true},{"type":"function","description":"Fetch + error logs and stack traces for a failed pipeline job.","name":"get_error_logs","parameters":{"type":"object","properties":{"job_id":{"type":"string","description":"Pipeline + job identifier"},"severity":{"type":"string","enum":["WARN","ERROR","FATAL","ALL"],"description":"Minimum + severity filter"},"max_entries":{"type":"integer","description":"Maximum log + entries to return (default 50)"}},"required":["job_id","severity","max_entries"],"additionalProperties":false},"strict":true},{"type":"function","description":"Search + internal operations runbooks for troubleshooting procedures.","name":"search_runbook","parameters":{"type":"object","properties":{"query":{"type":"string","description":"Natural + language search query"},"max_results":{"type":"integer","description":"Maximum + results to return (default 5)"}},"required":["query","max_results"],"additionalProperties":false},"strict":true},{"type":"function","description":"Execute + a Python analysis script in a sandboxed environment for log parsing or data + inspection.","name":"run_analysis","parameters":{"type":"object","properties":{"code":{"type":"string","description":"Python + code to execute"},"timeout_seconds":{"type":"integer","description":"Execution + timeout (default 30)"}},"required":["code","timeout_seconds"],"additionalProperties":false},"strict":true},{"type":"function","description":"Restart + a failed pipeline job with optional configuration overrides.","name":"restart_job","parameters":{"type":"object","properties":{"job_id":{"type":"string","description":"Pipeline + job identifier to restart"},"memory_override_gb":{"type":"integer","description":"Override + memory limit in GB"},"skip_completed_stages":{"type":"boolean","description":"Skip + stages that completed successfully (default true)"},"priority":{"type":"string","enum":["low","normal","high"],"description":"Queue + priority"}},"required":["job_id","memory_override_gb","skip_completed_stages","priority"],"additionalProperties":false},"strict":true},{"type":"function","description":"Search + the public web for technical documentation, blog posts, and known issues.","name":"web_search","parameters":{"type":"object","properties":{"query":{"type":"string","description":"Search + query"}},"required":["query"],"additionalProperties":false},"strict":true}],"top_logprobs":0,"top_p":1.0,"truncation":"disabled","usage":{"input_tokens":352,"input_tokens_details":{"cached_tokens":0},"output_tokens":19,"output_tokens_details":{"reasoning_tokens":0},"total_tokens":371},"user":null,"metadata":{}},"sequence_number":13} + + ' + - ' + + ' + status_code: 200 +- filename: t2 + request: + body: + input: + - call_id: call_oGw30yoywjhkYA1B6t8FesEm + output: '{"job_id": "job-382", "status": "failed", "error_type": "OutOfMemoryError", + "failed_at": "2024-01-15T03:22:11Z", "stage": "transform", "memory_used_gb": + 31.2, "memory_limit_gb": 32, "pipeline": "etl-daily-ingest", "owner": "data-platform-team"}' + type: function_call_output + - content: Get the error logs for that job with severity FATAL and max 5 entries. + role: user + type: message + model: gpt-4o + previous_response_id: resp_0fc8e0e2ea46fc95006a3c3867c1d0819a8d8faf02ad45a9f1 + store: true + stream: true + tool_choice: auto + tools: + - description: Query the pipeline orchestrator for a job's current state, metadata, + and failure info. + name: get_job_status + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier, e.g. job-382 + type: string + required: + - job_id + type: object + strict: true + type: function + - description: Fetch error logs and stack traces for a failed pipeline job. + name: get_error_logs + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier + type: string + max_entries: + description: Maximum log entries to return (default 50) + type: integer + severity: + description: Minimum severity filter + enum: + - WARN + - ERROR + - FATAL + - ALL + type: string + required: + - job_id + - severity + - max_entries + type: object + strict: true + type: function + - description: Search internal operations runbooks for troubleshooting procedures. + name: search_runbook + parameters: + additionalProperties: false + properties: + max_results: + description: Maximum results to return (default 5) + type: integer + query: + description: Natural language search query + type: string + required: + - query + - max_results + type: object + strict: true + type: function + - description: Execute a Python analysis script in a sandboxed environment for + log parsing or data inspection. + name: run_analysis + parameters: + additionalProperties: false + properties: + code: + description: Python code to execute + type: string + timeout_seconds: + description: Execution timeout (default 30) + type: integer + required: + - code + - timeout_seconds + type: object + strict: true + type: function + - description: Restart a failed pipeline job with optional configuration overrides. + name: restart_job + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier to restart + type: string + memory_override_gb: + description: Override memory limit in GB + type: integer + priority: + description: Queue priority + enum: + - low + - normal + - high + type: string + skip_completed_stages: + description: Skip stages that completed successfully (default true) + type: boolean + required: + - job_id + - memory_override_gb + - skip_completed_stages + - priority + type: object + strict: true + type: function + - description: Search the public web for technical documentation, blog posts, + and known issues. + name: web_search + parameters: + additionalProperties: false + properties: + query: + description: Search query + type: string + required: + - query + type: object + strict: true + type: function + headers: + accept: '*/*' + authorization: Bearer *** + content-type: application/json + user-agent: python-httpx/0.28.1 + method: POST + path: /v1/responses + query_params: {} + response: + headers: + content-type: text/event-stream; charset=utf-8 + sse: + - 'event: response.created + + ' + - 'data: {"type":"response.created","response":{"id":"resp_0fc8e0e2ea46fc95006a3c386968f8819a98462765fe2e3fb9","object":"response","created_at":1782331497,"status":"in_progress","background":false,"completed_at":null,"error":null,"frequency_penalty":0.0,"incomplete_details":null,"instructions":null,"max_output_tokens":null,"max_tool_calls":null,"model":"gpt-4o-2024-08-06","moderation":null,"output":[],"parallel_tool_calls":true,"presence_penalty":0.0,"previous_response_id":"resp_0fc8e0e2ea46fc95006a3c3867c1d0819a8d8faf02ad45a9f1","prompt_cache_key":null,"prompt_cache_retention":"in_memory","reasoning":{"context":null,"effort":null,"summary":null},"safety_identifier":null,"service_tier":"auto","store":true,"temperature":1.0,"text":{"format":{"type":"text"},"verbosity":"medium"},"tool_choice":"auto","tools":[{"type":"function","description":"Query + the pipeline orchestrator for a job''s current state, metadata, and failure + info.","name":"get_job_status","parameters":{"type":"object","properties":{"job_id":{"type":"string","description":"Pipeline + job identifier, e.g. job-382"}},"required":["job_id"],"additionalProperties":false},"strict":true},{"type":"function","description":"Fetch + error logs and stack traces for a failed pipeline job.","name":"get_error_logs","parameters":{"type":"object","properties":{"job_id":{"type":"string","description":"Pipeline + job identifier"},"severity":{"type":"string","enum":["WARN","ERROR","FATAL","ALL"],"description":"Minimum + severity filter"},"max_entries":{"type":"integer","description":"Maximum log + entries to return (default 50)"}},"required":["job_id","severity","max_entries"],"additionalProperties":false},"strict":true},{"type":"function","description":"Search + internal operations runbooks for troubleshooting procedures.","name":"search_runbook","parameters":{"type":"object","properties":{"query":{"type":"string","description":"Natural + language search query"},"max_results":{"type":"integer","description":"Maximum + results to return (default 5)"}},"required":["query","max_results"],"additionalProperties":false},"strict":true},{"type":"function","description":"Execute + a Python analysis script in a sandboxed environment for log parsing or data + inspection.","name":"run_analysis","parameters":{"type":"object","properties":{"code":{"type":"string","description":"Python + code to execute"},"timeout_seconds":{"type":"integer","description":"Execution + timeout (default 30)"}},"required":["code","timeout_seconds"],"additionalProperties":false},"strict":true},{"type":"function","description":"Restart + a failed pipeline job with optional configuration overrides.","name":"restart_job","parameters":{"type":"object","properties":{"job_id":{"type":"string","description":"Pipeline + job identifier to restart"},"memory_override_gb":{"type":"integer","description":"Override + memory limit in GB"},"skip_completed_stages":{"type":"boolean","description":"Skip + stages that completed successfully (default true)"},"priority":{"type":"string","enum":["low","normal","high"],"description":"Queue + priority"}},"required":["job_id","memory_override_gb","skip_completed_stages","priority"],"additionalProperties":false},"strict":true},{"type":"function","description":"Search + the public web for technical documentation, blog posts, and known issues.","name":"web_search","parameters":{"type":"object","properties":{"query":{"type":"string","description":"Search + query"}},"required":["query"],"additionalProperties":false},"strict":true}],"top_logprobs":0,"top_p":1.0,"truncation":"disabled","usage":null,"user":null,"metadata":{}},"sequence_number":0} + + ' + - ' + + ' + - 'event: response.in_progress + + ' + - 'data: {"type":"response.in_progress","response":{"id":"resp_0fc8e0e2ea46fc95006a3c386968f8819a98462765fe2e3fb9","object":"response","created_at":1782331497,"status":"in_progress","background":false,"completed_at":null,"error":null,"frequency_penalty":0.0,"incomplete_details":null,"instructions":null,"max_output_tokens":null,"max_tool_calls":null,"model":"gpt-4o-2024-08-06","moderation":null,"output":[],"parallel_tool_calls":true,"presence_penalty":0.0,"previous_response_id":"resp_0fc8e0e2ea46fc95006a3c3867c1d0819a8d8faf02ad45a9f1","prompt_cache_key":null,"prompt_cache_retention":"in_memory","reasoning":{"context":null,"effort":null,"summary":null},"safety_identifier":null,"service_tier":"auto","store":true,"temperature":1.0,"text":{"format":{"type":"text"},"verbosity":"medium"},"tool_choice":"auto","tools":[{"type":"function","description":"Query + the pipeline orchestrator for a job''s current state, metadata, and failure + info.","name":"get_job_status","parameters":{"type":"object","properties":{"job_id":{"type":"string","description":"Pipeline + job identifier, e.g. job-382"}},"required":["job_id"],"additionalProperties":false},"strict":true},{"type":"function","description":"Fetch + error logs and stack traces for a failed pipeline job.","name":"get_error_logs","parameters":{"type":"object","properties":{"job_id":{"type":"string","description":"Pipeline + job identifier"},"severity":{"type":"string","enum":["WARN","ERROR","FATAL","ALL"],"description":"Minimum + severity filter"},"max_entries":{"type":"integer","description":"Maximum log + entries to return (default 50)"}},"required":["job_id","severity","max_entries"],"additionalProperties":false},"strict":true},{"type":"function","description":"Search + internal operations runbooks for troubleshooting procedures.","name":"search_runbook","parameters":{"type":"object","properties":{"query":{"type":"string","description":"Natural + language search query"},"max_results":{"type":"integer","description":"Maximum + results to return (default 5)"}},"required":["query","max_results"],"additionalProperties":false},"strict":true},{"type":"function","description":"Execute + a Python analysis script in a sandboxed environment for log parsing or data + inspection.","name":"run_analysis","parameters":{"type":"object","properties":{"code":{"type":"string","description":"Python + code to execute"},"timeout_seconds":{"type":"integer","description":"Execution + timeout (default 30)"}},"required":["code","timeout_seconds"],"additionalProperties":false},"strict":true},{"type":"function","description":"Restart + a failed pipeline job with optional configuration overrides.","name":"restart_job","parameters":{"type":"object","properties":{"job_id":{"type":"string","description":"Pipeline + job identifier to restart"},"memory_override_gb":{"type":"integer","description":"Override + memory limit in GB"},"skip_completed_stages":{"type":"boolean","description":"Skip + stages that completed successfully (default true)"},"priority":{"type":"string","enum":["low","normal","high"],"description":"Queue + priority"}},"required":["job_id","memory_override_gb","skip_completed_stages","priority"],"additionalProperties":false},"strict":true},{"type":"function","description":"Search + the public web for technical documentation, blog posts, and known issues.","name":"web_search","parameters":{"type":"object","properties":{"query":{"type":"string","description":"Search + query"}},"required":["query"],"additionalProperties":false},"strict":true}],"top_logprobs":0,"top_p":1.0,"truncation":"disabled","usage":null,"user":null,"metadata":{}},"sequence_number":1} + + ' + - ' + + ' + - 'event: response.output_item.added + + ' + - 'data: {"type":"response.output_item.added","item":{"id":"fc_0fc8e0e2ea46fc95006a3c386a8880819a8064195a73e8fea7","type":"function_call","status":"in_progress","arguments":"","call_id":"call_Q2WtyAJvbr2BZpKoheDRCVxc","name":"get_error_logs"},"output_index":0,"sequence_number":2} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"type":"response.function_call_arguments.delta","delta":"{\"","item_id":"fc_0fc8e0e2ea46fc95006a3c386a8880819a8064195a73e8fea7","obfuscation":"ivfOJuey5E1Pck","output_index":0,"sequence_number":3} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"type":"response.function_call_arguments.delta","delta":"job","item_id":"fc_0fc8e0e2ea46fc95006a3c386a8880819a8064195a73e8fea7","obfuscation":"mZ9WgWBZQ8s3n","output_index":0,"sequence_number":4} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"type":"response.function_call_arguments.delta","delta":"_id","item_id":"fc_0fc8e0e2ea46fc95006a3c386a8880819a8064195a73e8fea7","obfuscation":"9n8hZdvelCbVb","output_index":0,"sequence_number":5} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"type":"response.function_call_arguments.delta","delta":"\":\"","item_id":"fc_0fc8e0e2ea46fc95006a3c386a8880819a8064195a73e8fea7","obfuscation":"1fkQ64Rf2n8Z8","output_index":0,"sequence_number":6} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"type":"response.function_call_arguments.delta","delta":"job","item_id":"fc_0fc8e0e2ea46fc95006a3c386a8880819a8064195a73e8fea7","obfuscation":"S4M9mxe7MFnLv","output_index":0,"sequence_number":7} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"type":"response.function_call_arguments.delta","delta":"-","item_id":"fc_0fc8e0e2ea46fc95006a3c386a8880819a8064195a73e8fea7","obfuscation":"LH7tUz1Q6TYE7lN","output_index":0,"sequence_number":8} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"type":"response.function_call_arguments.delta","delta":"382","item_id":"fc_0fc8e0e2ea46fc95006a3c386a8880819a8064195a73e8fea7","obfuscation":"8FO8FbM4VZow2","output_index":0,"sequence_number":9} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"type":"response.function_call_arguments.delta","delta":"\",\"","item_id":"fc_0fc8e0e2ea46fc95006a3c386a8880819a8064195a73e8fea7","obfuscation":"xCmrJJc0eji3d","output_index":0,"sequence_number":10} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"type":"response.function_call_arguments.delta","delta":"severity","item_id":"fc_0fc8e0e2ea46fc95006a3c386a8880819a8064195a73e8fea7","obfuscation":"zZ6JjI99","output_index":0,"sequence_number":11} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"type":"response.function_call_arguments.delta","delta":"\":\"","item_id":"fc_0fc8e0e2ea46fc95006a3c386a8880819a8064195a73e8fea7","obfuscation":"vihOScwv3S8mO","output_index":0,"sequence_number":12} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"type":"response.function_call_arguments.delta","delta":"F","item_id":"fc_0fc8e0e2ea46fc95006a3c386a8880819a8064195a73e8fea7","obfuscation":"AKW9KVdNA7Nk8WI","output_index":0,"sequence_number":13} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"type":"response.function_call_arguments.delta","delta":"ATAL","item_id":"fc_0fc8e0e2ea46fc95006a3c386a8880819a8064195a73e8fea7","obfuscation":"MJsY6dviJiIf","output_index":0,"sequence_number":14} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"type":"response.function_call_arguments.delta","delta":"\",\"","item_id":"fc_0fc8e0e2ea46fc95006a3c386a8880819a8064195a73e8fea7","obfuscation":"uVztKDB8l8Kgf","output_index":0,"sequence_number":15} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"type":"response.function_call_arguments.delta","delta":"max","item_id":"fc_0fc8e0e2ea46fc95006a3c386a8880819a8064195a73e8fea7","obfuscation":"qjTsrWviOXxJY","output_index":0,"sequence_number":16} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"type":"response.function_call_arguments.delta","delta":"_entries","item_id":"fc_0fc8e0e2ea46fc95006a3c386a8880819a8064195a73e8fea7","obfuscation":"OMYy2WHN","output_index":0,"sequence_number":17} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"type":"response.function_call_arguments.delta","delta":"\":","item_id":"fc_0fc8e0e2ea46fc95006a3c386a8880819a8064195a73e8fea7","obfuscation":"9pmHbITBfejkZc","output_index":0,"sequence_number":18} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"type":"response.function_call_arguments.delta","delta":"5","item_id":"fc_0fc8e0e2ea46fc95006a3c386a8880819a8064195a73e8fea7","obfuscation":"Pm5LRPb5UBK5TfH","output_index":0,"sequence_number":19} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"type":"response.function_call_arguments.delta","delta":"}","item_id":"fc_0fc8e0e2ea46fc95006a3c386a8880819a8064195a73e8fea7","obfuscation":"3W9IodMx0hFrinv","output_index":0,"sequence_number":20} + + ' + - ' + + ' + - 'event: response.function_call_arguments.done + + ' + - 'data: {"type":"response.function_call_arguments.done","arguments":"{\"job_id\":\"job-382\",\"severity\":\"FATAL\",\"max_entries\":5}","item_id":"fc_0fc8e0e2ea46fc95006a3c386a8880819a8064195a73e8fea7","output_index":0,"sequence_number":21} + + ' + - ' + + ' + - 'event: response.output_item.done + + ' + - 'data: {"type":"response.output_item.done","item":{"id":"fc_0fc8e0e2ea46fc95006a3c386a8880819a8064195a73e8fea7","type":"function_call","status":"completed","arguments":"{\"job_id\":\"job-382\",\"severity\":\"FATAL\",\"max_entries\":5}","call_id":"call_Q2WtyAJvbr2BZpKoheDRCVxc","name":"get_error_logs"},"output_index":0,"sequence_number":22} + + ' + - ' + + ' + - 'event: response.completed + + ' + - 'data: {"type":"response.completed","response":{"id":"resp_0fc8e0e2ea46fc95006a3c386968f8819a98462765fe2e3fb9","object":"response","created_at":1782331497,"status":"completed","background":false,"completed_at":1782331498,"error":null,"frequency_penalty":0.0,"incomplete_details":null,"instructions":null,"max_output_tokens":null,"max_tool_calls":null,"model":"gpt-4o-2024-08-06","moderation":null,"output":[{"id":"fc_0fc8e0e2ea46fc95006a3c386a8880819a8064195a73e8fea7","type":"function_call","status":"completed","arguments":"{\"job_id\":\"job-382\",\"severity\":\"FATAL\",\"max_entries\":5}","call_id":"call_Q2WtyAJvbr2BZpKoheDRCVxc","name":"get_error_logs"}],"parallel_tool_calls":true,"presence_penalty":0.0,"previous_response_id":"resp_0fc8e0e2ea46fc95006a3c3867c1d0819a8d8faf02ad45a9f1","prompt_cache_key":null,"prompt_cache_retention":"in_memory","reasoning":{"context":null,"effort":null,"summary":null},"safety_identifier":null,"service_tier":"default","store":true,"temperature":1.0,"text":{"format":{"type":"text"},"verbosity":"medium"},"tool_choice":"auto","tools":[{"type":"function","description":"Query + the pipeline orchestrator for a job''s current state, metadata, and failure + info.","name":"get_job_status","parameters":{"type":"object","properties":{"job_id":{"type":"string","description":"Pipeline + job identifier, e.g. job-382"}},"required":["job_id"],"additionalProperties":false},"strict":true},{"type":"function","description":"Fetch + error logs and stack traces for a failed pipeline job.","name":"get_error_logs","parameters":{"type":"object","properties":{"job_id":{"type":"string","description":"Pipeline + job identifier"},"severity":{"type":"string","enum":["WARN","ERROR","FATAL","ALL"],"description":"Minimum + severity filter"},"max_entries":{"type":"integer","description":"Maximum log + entries to return (default 50)"}},"required":["job_id","severity","max_entries"],"additionalProperties":false},"strict":true},{"type":"function","description":"Search + internal operations runbooks for troubleshooting procedures.","name":"search_runbook","parameters":{"type":"object","properties":{"query":{"type":"string","description":"Natural + language search query"},"max_results":{"type":"integer","description":"Maximum + results to return (default 5)"}},"required":["query","max_results"],"additionalProperties":false},"strict":true},{"type":"function","description":"Execute + a Python analysis script in a sandboxed environment for log parsing or data + inspection.","name":"run_analysis","parameters":{"type":"object","properties":{"code":{"type":"string","description":"Python + code to execute"},"timeout_seconds":{"type":"integer","description":"Execution + timeout (default 30)"}},"required":["code","timeout_seconds"],"additionalProperties":false},"strict":true},{"type":"function","description":"Restart + a failed pipeline job with optional configuration overrides.","name":"restart_job","parameters":{"type":"object","properties":{"job_id":{"type":"string","description":"Pipeline + job identifier to restart"},"memory_override_gb":{"type":"integer","description":"Override + memory limit in GB"},"skip_completed_stages":{"type":"boolean","description":"Skip + stages that completed successfully (default true)"},"priority":{"type":"string","enum":["low","normal","high"],"description":"Queue + priority"}},"required":["job_id","memory_override_gb","skip_completed_stages","priority"],"additionalProperties":false},"strict":true},{"type":"function","description":"Search + the public web for technical documentation, blog posts, and known issues.","name":"web_search","parameters":{"type":"object","properties":{"query":{"type":"string","description":"Search + query"}},"required":["query"],"additionalProperties":false},"strict":true}],"top_logprobs":0,"top_p":1.0,"truncation":"disabled","usage":{"input_tokens":489,"input_tokens_details":{"cached_tokens":0},"output_tokens":29,"output_tokens_details":{"reasoning_tokens":0},"total_tokens":518},"user":null,"metadata":{}},"sequence_number":23} + + ' + - ' + + ' + status_code: 200 +- filename: t3 + request: + body: + input: + - call_id: call_Q2WtyAJvbr2BZpKoheDRCVxc + output: '[{"timestamp": "2024-01-15T03:22:09Z", "severity": "ERROR", "message": + "OutOfMemoryError at stage ''transform'': heap space exhausted (31.2GB/32GB)", + "stack_trace": "java.lang.OutOfMemoryError: Java heap space\n at org.apache.spark.memory.TaskMemoryManager.allocatePage"}, + {"timestamp": "2024-01-15T03:22:10Z", "severity": "ERROR", "message": "Task + failed: transform-partition-47 OOM after processing 2.1M rows", "stack_trace": + "org.apache.spark.SparkException: Task failed while writing rows"}, {"timestamp": + "2024-01-15T03:22:11Z", "severity": "ERROR", "message": "Job job-382 marked + FAILED after 3 consecutive OOM errors in transform stage", "stack_trace": + null}]' + type: function_call_output + - content: Search the web for how to fix that type of error in Spark pipelines. + role: user + type: message + model: gpt-4o + previous_response_id: resp_0fc8e0e2ea46fc95006a3c386968f8819a98462765fe2e3fb9 + store: true + stream: true + tool_choice: auto + tools: + - description: Query the pipeline orchestrator for a job's current state, metadata, + and failure info. + name: get_job_status + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier, e.g. job-382 + type: string + required: + - job_id + type: object + strict: true + type: function + - description: Fetch error logs and stack traces for a failed pipeline job. + name: get_error_logs + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier + type: string + max_entries: + description: Maximum log entries to return (default 50) + type: integer + severity: + description: Minimum severity filter + enum: + - WARN + - ERROR + - FATAL + - ALL + type: string + required: + - job_id + - severity + - max_entries + type: object + strict: true + type: function + - description: Search internal operations runbooks for troubleshooting procedures. + name: search_runbook + parameters: + additionalProperties: false + properties: + max_results: + description: Maximum results to return (default 5) + type: integer + query: + description: Natural language search query + type: string + required: + - query + - max_results + type: object + strict: true + type: function + - description: Execute a Python analysis script in a sandboxed environment for + log parsing or data inspection. + name: run_analysis + parameters: + additionalProperties: false + properties: + code: + description: Python code to execute + type: string + timeout_seconds: + description: Execution timeout (default 30) + type: integer + required: + - code + - timeout_seconds + type: object + strict: true + type: function + - description: Restart a failed pipeline job with optional configuration overrides. + name: restart_job + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier to restart + type: string + memory_override_gb: + description: Override memory limit in GB + type: integer + priority: + description: Queue priority + enum: + - low + - normal + - high + type: string + skip_completed_stages: + description: Skip stages that completed successfully (default true) + type: boolean + required: + - job_id + - memory_override_gb + - skip_completed_stages + - priority + type: object + strict: true + type: function + - description: Search the public web for technical documentation, blog posts, + and known issues. + name: web_search + parameters: + additionalProperties: false + properties: + query: + description: Search query + type: string + required: + - query + type: object + strict: true + type: function + headers: + accept: '*/*' + authorization: Bearer *** + content-type: application/json + user-agent: python-httpx/0.28.1 + method: POST + path: /v1/responses + query_params: {} + response: + headers: + content-type: text/event-stream; charset=utf-8 + sse: + - 'event: response.created + + ' + - 'data: {"type":"response.created","response":{"id":"resp_0fc8e0e2ea46fc95006a3c386afa84819a97b6538b6b5ac51c","object":"response","created_at":1782331499,"status":"in_progress","background":false,"completed_at":null,"error":null,"frequency_penalty":0.0,"incomplete_details":null,"instructions":null,"max_output_tokens":null,"max_tool_calls":null,"model":"gpt-4o-2024-08-06","moderation":null,"output":[],"parallel_tool_calls":true,"presence_penalty":0.0,"previous_response_id":"resp_0fc8e0e2ea46fc95006a3c386968f8819a98462765fe2e3fb9","prompt_cache_key":null,"prompt_cache_retention":"in_memory","reasoning":{"context":null,"effort":null,"summary":null},"safety_identifier":null,"service_tier":"auto","store":true,"temperature":1.0,"text":{"format":{"type":"text"},"verbosity":"medium"},"tool_choice":"auto","tools":[{"type":"function","description":"Query + the pipeline orchestrator for a job''s current state, metadata, and failure + info.","name":"get_job_status","parameters":{"type":"object","properties":{"job_id":{"type":"string","description":"Pipeline + job identifier, e.g. job-382"}},"required":["job_id"],"additionalProperties":false},"strict":true},{"type":"function","description":"Fetch + error logs and stack traces for a failed pipeline job.","name":"get_error_logs","parameters":{"type":"object","properties":{"job_id":{"type":"string","description":"Pipeline + job identifier"},"severity":{"type":"string","enum":["WARN","ERROR","FATAL","ALL"],"description":"Minimum + severity filter"},"max_entries":{"type":"integer","description":"Maximum log + entries to return (default 50)"}},"required":["job_id","severity","max_entries"],"additionalProperties":false},"strict":true},{"type":"function","description":"Search + internal operations runbooks for troubleshooting procedures.","name":"search_runbook","parameters":{"type":"object","properties":{"query":{"type":"string","description":"Natural + language search query"},"max_results":{"type":"integer","description":"Maximum + results to return (default 5)"}},"required":["query","max_results"],"additionalProperties":false},"strict":true},{"type":"function","description":"Execute + a Python analysis script in a sandboxed environment for log parsing or data + inspection.","name":"run_analysis","parameters":{"type":"object","properties":{"code":{"type":"string","description":"Python + code to execute"},"timeout_seconds":{"type":"integer","description":"Execution + timeout (default 30)"}},"required":["code","timeout_seconds"],"additionalProperties":false},"strict":true},{"type":"function","description":"Restart + a failed pipeline job with optional configuration overrides.","name":"restart_job","parameters":{"type":"object","properties":{"job_id":{"type":"string","description":"Pipeline + job identifier to restart"},"memory_override_gb":{"type":"integer","description":"Override + memory limit in GB"},"skip_completed_stages":{"type":"boolean","description":"Skip + stages that completed successfully (default true)"},"priority":{"type":"string","enum":["low","normal","high"],"description":"Queue + priority"}},"required":["job_id","memory_override_gb","skip_completed_stages","priority"],"additionalProperties":false},"strict":true},{"type":"function","description":"Search + the public web for technical documentation, blog posts, and known issues.","name":"web_search","parameters":{"type":"object","properties":{"query":{"type":"string","description":"Search + query"}},"required":["query"],"additionalProperties":false},"strict":true}],"top_logprobs":0,"top_p":1.0,"truncation":"disabled","usage":null,"user":null,"metadata":{}},"sequence_number":0} + + ' + - ' + + ' + - 'event: response.in_progress + + ' + - 'data: {"type":"response.in_progress","response":{"id":"resp_0fc8e0e2ea46fc95006a3c386afa84819a97b6538b6b5ac51c","object":"response","created_at":1782331499,"status":"in_progress","background":false,"completed_at":null,"error":null,"frequency_penalty":0.0,"incomplete_details":null,"instructions":null,"max_output_tokens":null,"max_tool_calls":null,"model":"gpt-4o-2024-08-06","moderation":null,"output":[],"parallel_tool_calls":true,"presence_penalty":0.0,"previous_response_id":"resp_0fc8e0e2ea46fc95006a3c386968f8819a98462765fe2e3fb9","prompt_cache_key":null,"prompt_cache_retention":"in_memory","reasoning":{"context":null,"effort":null,"summary":null},"safety_identifier":null,"service_tier":"auto","store":true,"temperature":1.0,"text":{"format":{"type":"text"},"verbosity":"medium"},"tool_choice":"auto","tools":[{"type":"function","description":"Query + the pipeline orchestrator for a job''s current state, metadata, and failure + info.","name":"get_job_status","parameters":{"type":"object","properties":{"job_id":{"type":"string","description":"Pipeline + job identifier, e.g. job-382"}},"required":["job_id"],"additionalProperties":false},"strict":true},{"type":"function","description":"Fetch + error logs and stack traces for a failed pipeline job.","name":"get_error_logs","parameters":{"type":"object","properties":{"job_id":{"type":"string","description":"Pipeline + job identifier"},"severity":{"type":"string","enum":["WARN","ERROR","FATAL","ALL"],"description":"Minimum + severity filter"},"max_entries":{"type":"integer","description":"Maximum log + entries to return (default 50)"}},"required":["job_id","severity","max_entries"],"additionalProperties":false},"strict":true},{"type":"function","description":"Search + internal operations runbooks for troubleshooting procedures.","name":"search_runbook","parameters":{"type":"object","properties":{"query":{"type":"string","description":"Natural + language search query"},"max_results":{"type":"integer","description":"Maximum + results to return (default 5)"}},"required":["query","max_results"],"additionalProperties":false},"strict":true},{"type":"function","description":"Execute + a Python analysis script in a sandboxed environment for log parsing or data + inspection.","name":"run_analysis","parameters":{"type":"object","properties":{"code":{"type":"string","description":"Python + code to execute"},"timeout_seconds":{"type":"integer","description":"Execution + timeout (default 30)"}},"required":["code","timeout_seconds"],"additionalProperties":false},"strict":true},{"type":"function","description":"Restart + a failed pipeline job with optional configuration overrides.","name":"restart_job","parameters":{"type":"object","properties":{"job_id":{"type":"string","description":"Pipeline + job identifier to restart"},"memory_override_gb":{"type":"integer","description":"Override + memory limit in GB"},"skip_completed_stages":{"type":"boolean","description":"Skip + stages that completed successfully (default true)"},"priority":{"type":"string","enum":["low","normal","high"],"description":"Queue + priority"}},"required":["job_id","memory_override_gb","skip_completed_stages","priority"],"additionalProperties":false},"strict":true},{"type":"function","description":"Search + the public web for technical documentation, blog posts, and known issues.","name":"web_search","parameters":{"type":"object","properties":{"query":{"type":"string","description":"Search + query"}},"required":["query"],"additionalProperties":false},"strict":true}],"top_logprobs":0,"top_p":1.0,"truncation":"disabled","usage":null,"user":null,"metadata":{}},"sequence_number":1} + + ' + - ' + + ' + - 'event: response.output_item.added + + ' + - 'data: {"type":"response.output_item.added","item":{"id":"fc_0fc8e0e2ea46fc95006a3c386ba180819aaa23f095627ed8d3","type":"function_call","status":"in_progress","arguments":"","call_id":"call_zkoVfNgi8D69hLqAlPnAUwpr","name":"web_search"},"output_index":0,"sequence_number":2} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"type":"response.function_call_arguments.delta","delta":"{\"","item_id":"fc_0fc8e0e2ea46fc95006a3c386ba180819aaa23f095627ed8d3","obfuscation":"prGEfgSrbWSDln","output_index":0,"sequence_number":3} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"type":"response.function_call_arguments.delta","delta":"query","item_id":"fc_0fc8e0e2ea46fc95006a3c386ba180819aaa23f095627ed8d3","obfuscation":"YtrXZtJHOJ3","output_index":0,"sequence_number":4} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"type":"response.function_call_arguments.delta","delta":"\":\"","item_id":"fc_0fc8e0e2ea46fc95006a3c386ba180819aaa23f095627ed8d3","obfuscation":"TaglTitGTDkkp","output_index":0,"sequence_number":5} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"type":"response.function_call_arguments.delta","delta":"Fix","item_id":"fc_0fc8e0e2ea46fc95006a3c386ba180819aaa23f095627ed8d3","obfuscation":"kknaGTpAd0BwF","output_index":0,"sequence_number":6} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"type":"response.function_call_arguments.delta","delta":" Out","item_id":"fc_0fc8e0e2ea46fc95006a3c386ba180819aaa23f095627ed8d3","obfuscation":"m7Tk3lSDqTfC","output_index":0,"sequence_number":7} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"type":"response.function_call_arguments.delta","delta":"Of","item_id":"fc_0fc8e0e2ea46fc95006a3c386ba180819aaa23f095627ed8d3","obfuscation":"HlNdJCWpCaYMe8","output_index":0,"sequence_number":8} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"type":"response.function_call_arguments.delta","delta":"Memory","item_id":"fc_0fc8e0e2ea46fc95006a3c386ba180819aaa23f095627ed8d3","obfuscation":"85sF8zSf4k","output_index":0,"sequence_number":9} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"type":"response.function_call_arguments.delta","delta":"Error","item_id":"fc_0fc8e0e2ea46fc95006a3c386ba180819aaa23f095627ed8d3","obfuscation":"LhCNyjZcqT5","output_index":0,"sequence_number":10} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"type":"response.function_call_arguments.delta","delta":" in","item_id":"fc_0fc8e0e2ea46fc95006a3c386ba180819aaa23f095627ed8d3","obfuscation":"eqbHtHEBIHxQ2","output_index":0,"sequence_number":11} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"type":"response.function_call_arguments.delta","delta":" Apache","item_id":"fc_0fc8e0e2ea46fc95006a3c386ba180819aaa23f095627ed8d3","obfuscation":"RekdgKeND","output_index":0,"sequence_number":12} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"type":"response.function_call_arguments.delta","delta":" Spark","item_id":"fc_0fc8e0e2ea46fc95006a3c386ba180819aaa23f095627ed8d3","obfuscation":"qp6u77FnUu","output_index":0,"sequence_number":13} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"type":"response.function_call_arguments.delta","delta":" pipeline","item_id":"fc_0fc8e0e2ea46fc95006a3c386ba180819aaa23f095627ed8d3","obfuscation":"k2x2AHy","output_index":0,"sequence_number":14} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"type":"response.function_call_arguments.delta","delta":" ET","item_id":"fc_0fc8e0e2ea46fc95006a3c386ba180819aaa23f095627ed8d3","obfuscation":"oaZaL8JDvMiXi","output_index":0,"sequence_number":15} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"type":"response.function_call_arguments.delta","delta":"L","item_id":"fc_0fc8e0e2ea46fc95006a3c386ba180819aaa23f095627ed8d3","obfuscation":"QUV1RyPNtooXDOD","output_index":0,"sequence_number":16} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"type":"response.function_call_arguments.delta","delta":"\"}","item_id":"fc_0fc8e0e2ea46fc95006a3c386ba180819aaa23f095627ed8d3","obfuscation":"C3WNpsIiwNPvFr","output_index":0,"sequence_number":17} + + ' + - ' + + ' + - 'event: response.function_call_arguments.done + + ' + - 'data: {"type":"response.function_call_arguments.done","arguments":"{\"query\":\"Fix + OutOfMemoryError in Apache Spark pipeline ETL\"}","item_id":"fc_0fc8e0e2ea46fc95006a3c386ba180819aaa23f095627ed8d3","output_index":0,"sequence_number":18} + + ' + - ' + + ' + - 'event: response.output_item.done + + ' + - 'data: {"type":"response.output_item.done","item":{"id":"fc_0fc8e0e2ea46fc95006a3c386ba180819aaa23f095627ed8d3","type":"function_call","status":"completed","arguments":"{\"query\":\"Fix + OutOfMemoryError in Apache Spark pipeline ETL\"}","call_id":"call_zkoVfNgi8D69hLqAlPnAUwpr","name":"web_search"},"output_index":0,"sequence_number":19} + + ' + - ' + + ' + - 'event: response.completed + + ' + - 'data: {"type":"response.completed","response":{"id":"resp_0fc8e0e2ea46fc95006a3c386afa84819a97b6538b6b5ac51c","object":"response","created_at":1782331499,"status":"completed","background":false,"completed_at":1782331499,"error":null,"frequency_penalty":0.0,"incomplete_details":null,"instructions":null,"max_output_tokens":null,"max_tool_calls":null,"model":"gpt-4o-2024-08-06","moderation":null,"output":[{"id":"fc_0fc8e0e2ea46fc95006a3c386ba180819aaa23f095627ed8d3","type":"function_call","status":"completed","arguments":"{\"query\":\"Fix + OutOfMemoryError in Apache Spark pipeline ETL\"}","call_id":"call_zkoVfNgi8D69hLqAlPnAUwpr","name":"web_search"}],"parallel_tool_calls":true,"presence_penalty":0.0,"previous_response_id":"resp_0fc8e0e2ea46fc95006a3c386968f8819a98462765fe2e3fb9","prompt_cache_key":null,"prompt_cache_retention":"in_memory","reasoning":{"context":null,"effort":null,"summary":null},"safety_identifier":null,"service_tier":"default","store":true,"temperature":1.0,"text":{"format":{"type":"text"},"verbosity":"medium"},"tool_choice":"auto","tools":[{"type":"function","description":"Query + the pipeline orchestrator for a job''s current state, metadata, and failure + info.","name":"get_job_status","parameters":{"type":"object","properties":{"job_id":{"type":"string","description":"Pipeline + job identifier, e.g. job-382"}},"required":["job_id"],"additionalProperties":false},"strict":true},{"type":"function","description":"Fetch + error logs and stack traces for a failed pipeline job.","name":"get_error_logs","parameters":{"type":"object","properties":{"job_id":{"type":"string","description":"Pipeline + job identifier"},"severity":{"type":"string","enum":["WARN","ERROR","FATAL","ALL"],"description":"Minimum + severity filter"},"max_entries":{"type":"integer","description":"Maximum log + entries to return (default 50)"}},"required":["job_id","severity","max_entries"],"additionalProperties":false},"strict":true},{"type":"function","description":"Search + internal operations runbooks for troubleshooting procedures.","name":"search_runbook","parameters":{"type":"object","properties":{"query":{"type":"string","description":"Natural + language search query"},"max_results":{"type":"integer","description":"Maximum + results to return (default 5)"}},"required":["query","max_results"],"additionalProperties":false},"strict":true},{"type":"function","description":"Execute + a Python analysis script in a sandboxed environment for log parsing or data + inspection.","name":"run_analysis","parameters":{"type":"object","properties":{"code":{"type":"string","description":"Python + code to execute"},"timeout_seconds":{"type":"integer","description":"Execution + timeout (default 30)"}},"required":["code","timeout_seconds"],"additionalProperties":false},"strict":true},{"type":"function","description":"Restart + a failed pipeline job with optional configuration overrides.","name":"restart_job","parameters":{"type":"object","properties":{"job_id":{"type":"string","description":"Pipeline + job identifier to restart"},"memory_override_gb":{"type":"integer","description":"Override + memory limit in GB"},"skip_completed_stages":{"type":"boolean","description":"Skip + stages that completed successfully (default true)"},"priority":{"type":"string","enum":["low","normal","high"],"description":"Queue + priority"}},"required":["job_id","memory_override_gb","skip_completed_stages","priority"],"additionalProperties":false},"strict":true},{"type":"function","description":"Search + the public web for technical documentation, blog posts, and known issues.","name":"web_search","parameters":{"type":"object","properties":{"query":{"type":"string","description":"Search + query"}},"required":["query"],"additionalProperties":false},"strict":true}],"top_logprobs":0,"top_p":1.0,"truncation":"disabled","usage":{"input_tokens":740,"input_tokens_details":{"cached_tokens":0},"output_tokens":25,"output_tokens_details":{"reasoning_tokens":0},"total_tokens":765},"user":null,"metadata":{}},"sequence_number":20} + + ' + - ' + + ' + status_code: 200 diff --git a/crates/agentic-core/tests/cassettes/tool_calls/multi_turn/openai_responses_tool_calls_5turn.yaml b/crates/agentic-core/tests/cassettes/tool_calls/multi_turn/openai_responses_tool_calls_5turn.yaml new file mode 100644 index 0000000..6e11708 --- /dev/null +++ b/crates/agentic-core/tests/cassettes/tool_calls/multi_turn/openai_responses_tool_calls_5turn.yaml @@ -0,0 +1,1619 @@ +turns: +- filename: t1 + request: + body: + input: You are an SRE assistant. ETL pipeline job-382 failed overnight. What + is its current status? + model: gpt-4o + store: true + stream: false + tool_choice: auto + tools: + - description: Query the pipeline orchestrator for a job's current state, metadata, + and failure info. + name: get_job_status + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier, e.g. job-382 + type: string + required: + - job_id + type: object + strict: true + type: function + - description: Fetch error logs and stack traces for a failed pipeline job. + name: get_error_logs + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier + type: string + max_entries: + description: Maximum log entries to return (default 50) + type: integer + severity: + description: Minimum severity filter + enum: + - WARN + - ERROR + - FATAL + - ALL + type: string + required: + - job_id + - severity + - max_entries + type: object + strict: true + type: function + - description: Search internal operations runbooks for troubleshooting procedures. + name: search_runbook + parameters: + additionalProperties: false + properties: + max_results: + description: Maximum results to return (default 5) + type: integer + query: + description: Natural language search query + type: string + required: + - query + - max_results + type: object + strict: true + type: function + - description: Execute a Python analysis script in a sandboxed environment for + log parsing or data inspection. + name: run_analysis + parameters: + additionalProperties: false + properties: + code: + description: Python code to execute + type: string + timeout_seconds: + description: Execution timeout (default 30) + type: integer + required: + - code + - timeout_seconds + type: object + strict: true + type: function + - description: Restart a failed pipeline job with optional configuration overrides. + name: restart_job + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier to restart + type: string + memory_override_gb: + description: Override memory limit in GB + type: integer + priority: + description: Queue priority + enum: + - low + - normal + - high + type: string + skip_completed_stages: + description: Skip stages that completed successfully (default true) + type: boolean + required: + - job_id + - memory_override_gb + - skip_completed_stages + - priority + type: object + strict: true + type: function + - description: Search the public web for technical documentation, blog posts, + and known issues. + name: web_search + parameters: + additionalProperties: false + properties: + query: + description: Search query + type: string + required: + - query + type: object + strict: true + type: function + headers: + accept: '*/*' + authorization: Bearer *** + content-type: application/json + user-agent: python-httpx/0.28.1 + method: POST + path: /v1/responses + query_params: {} + response: + body: + background: false + billing: + payer: developer + completed_at: 1782331481 + created_at: 1782331481 + error: null + frequency_penalty: 0.0 + id: resp_03fd10c22367f14c006a3c38590640819981d8e689417d6da2 + incomplete_details: null + instructions: null + max_output_tokens: null + max_tool_calls: null + metadata: {} + model: gpt-4o-2024-08-06 + moderation: null + object: response + output: + - arguments: '{"job_id":"job-382"}' + call_id: call_KvqKd7Oqx9UM0kV4BIRAyLEi + id: fc_03fd10c22367f14c006a3c385990e481999a7ef70c7dd82761 + name: get_job_status + status: completed + type: function_call + parallel_tool_calls: true + presence_penalty: 0.0 + previous_response_id: null + prompt_cache_key: null + prompt_cache_retention: in_memory + reasoning: + context: null + effort: null + summary: null + safety_identifier: null + service_tier: default + status: completed + store: true + temperature: 1.0 + text: + format: + type: text + verbosity: medium + tool_choice: auto + tools: + - description: Query the pipeline orchestrator for a job's current state, metadata, + and failure info. + name: get_job_status + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier, e.g. job-382 + type: string + required: + - job_id + type: object + strict: true + type: function + - description: Fetch error logs and stack traces for a failed pipeline job. + name: get_error_logs + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier + type: string + max_entries: + description: Maximum log entries to return (default 50) + type: integer + severity: + description: Minimum severity filter + enum: + - WARN + - ERROR + - FATAL + - ALL + type: string + required: + - job_id + - severity + - max_entries + type: object + strict: true + type: function + - description: Search internal operations runbooks for troubleshooting procedures. + name: search_runbook + parameters: + additionalProperties: false + properties: + max_results: + description: Maximum results to return (default 5) + type: integer + query: + description: Natural language search query + type: string + required: + - query + - max_results + type: object + strict: true + type: function + - description: Execute a Python analysis script in a sandboxed environment for + log parsing or data inspection. + name: run_analysis + parameters: + additionalProperties: false + properties: + code: + description: Python code to execute + type: string + timeout_seconds: + description: Execution timeout (default 30) + type: integer + required: + - code + - timeout_seconds + type: object + strict: true + type: function + - description: Restart a failed pipeline job with optional configuration overrides. + name: restart_job + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier to restart + type: string + memory_override_gb: + description: Override memory limit in GB + type: integer + priority: + description: Queue priority + enum: + - low + - normal + - high + type: string + skip_completed_stages: + description: Skip stages that completed successfully (default true) + type: boolean + required: + - job_id + - memory_override_gb + - skip_completed_stages + - priority + type: object + strict: true + type: function + - description: Search the public web for technical documentation, blog posts, + and known issues. + name: web_search + parameters: + additionalProperties: false + properties: + query: + description: Search query + type: string + required: + - query + type: object + strict: true + type: function + top_logprobs: 0 + top_p: 1.0 + truncation: disabled + usage: + input_tokens: 358 + input_tokens_details: + cached_tokens: 0 + output_tokens: 19 + output_tokens_details: + reasoning_tokens: 0 + total_tokens: 377 + user: null + headers: + content-type: application/json + status_code: 200 +- filename: t2 + request: + body: + input: + - call_id: call_KvqKd7Oqx9UM0kV4BIRAyLEi + output: '{"job_id": "job-382", "status": "failed", "error_type": "OutOfMemoryError", + "failed_at": "2024-01-15T03:22:11Z", "stage": "transform", "memory_used_gb": + 31.2, "memory_limit_gb": 32, "pipeline": "etl-daily-ingest", "owner": "data-platform-team"}' + type: function_call_output + - content: Pull the error logs for that failed job. Use severity ERROR and max + 20 entries. + role: user + type: message + model: gpt-4o + previous_response_id: resp_03fd10c22367f14c006a3c38590640819981d8e689417d6da2 + store: true + stream: false + tool_choice: auto + tools: + - description: Query the pipeline orchestrator for a job's current state, metadata, + and failure info. + name: get_job_status + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier, e.g. job-382 + type: string + required: + - job_id + type: object + strict: true + type: function + - description: Fetch error logs and stack traces for a failed pipeline job. + name: get_error_logs + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier + type: string + max_entries: + description: Maximum log entries to return (default 50) + type: integer + severity: + description: Minimum severity filter + enum: + - WARN + - ERROR + - FATAL + - ALL + type: string + required: + - job_id + - severity + - max_entries + type: object + strict: true + type: function + - description: Search internal operations runbooks for troubleshooting procedures. + name: search_runbook + parameters: + additionalProperties: false + properties: + max_results: + description: Maximum results to return (default 5) + type: integer + query: + description: Natural language search query + type: string + required: + - query + - max_results + type: object + strict: true + type: function + - description: Execute a Python analysis script in a sandboxed environment for + log parsing or data inspection. + name: run_analysis + parameters: + additionalProperties: false + properties: + code: + description: Python code to execute + type: string + timeout_seconds: + description: Execution timeout (default 30) + type: integer + required: + - code + - timeout_seconds + type: object + strict: true + type: function + - description: Restart a failed pipeline job with optional configuration overrides. + name: restart_job + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier to restart + type: string + memory_override_gb: + description: Override memory limit in GB + type: integer + priority: + description: Queue priority + enum: + - low + - normal + - high + type: string + skip_completed_stages: + description: Skip stages that completed successfully (default true) + type: boolean + required: + - job_id + - memory_override_gb + - skip_completed_stages + - priority + type: object + strict: true + type: function + - description: Search the public web for technical documentation, blog posts, + and known issues. + name: web_search + parameters: + additionalProperties: false + properties: + query: + description: Search query + type: string + required: + - query + type: object + strict: true + type: function + headers: + accept: '*/*' + authorization: Bearer *** + content-type: application/json + user-agent: python-httpx/0.28.1 + method: POST + path: /v1/responses + query_params: {} + response: + body: + background: false + billing: + payer: developer + completed_at: 1782331482 + created_at: 1782331482 + error: null + frequency_penalty: 0.0 + id: resp_03fd10c22367f14c006a3c3859dfe4819982720510863bf1fa + incomplete_details: null + instructions: null + max_output_tokens: null + max_tool_calls: null + metadata: {} + model: gpt-4o-2024-08-06 + moderation: null + object: response + output: + - arguments: '{"job_id":"job-382","severity":"ERROR","max_entries":20}' + call_id: call_LhB0vE8IH2AIMglFujcSYxwk + id: fc_03fd10c22367f14c006a3c385a9efc8199bc2db350998c1ea9 + name: get_error_logs + status: completed + type: function_call + parallel_tool_calls: true + presence_penalty: 0.0 + previous_response_id: resp_03fd10c22367f14c006a3c38590640819981d8e689417d6da2 + prompt_cache_key: null + prompt_cache_retention: in_memory + reasoning: + context: null + effort: null + summary: null + safety_identifier: null + service_tier: default + status: completed + store: true + temperature: 1.0 + text: + format: + type: text + verbosity: medium + tool_choice: auto + tools: + - description: Query the pipeline orchestrator for a job's current state, metadata, + and failure info. + name: get_job_status + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier, e.g. job-382 + type: string + required: + - job_id + type: object + strict: true + type: function + - description: Fetch error logs and stack traces for a failed pipeline job. + name: get_error_logs + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier + type: string + max_entries: + description: Maximum log entries to return (default 50) + type: integer + severity: + description: Minimum severity filter + enum: + - WARN + - ERROR + - FATAL + - ALL + type: string + required: + - job_id + - severity + - max_entries + type: object + strict: true + type: function + - description: Search internal operations runbooks for troubleshooting procedures. + name: search_runbook + parameters: + additionalProperties: false + properties: + max_results: + description: Maximum results to return (default 5) + type: integer + query: + description: Natural language search query + type: string + required: + - query + - max_results + type: object + strict: true + type: function + - description: Execute a Python analysis script in a sandboxed environment for + log parsing or data inspection. + name: run_analysis + parameters: + additionalProperties: false + properties: + code: + description: Python code to execute + type: string + timeout_seconds: + description: Execution timeout (default 30) + type: integer + required: + - code + - timeout_seconds + type: object + strict: true + type: function + - description: Restart a failed pipeline job with optional configuration overrides. + name: restart_job + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier to restart + type: string + memory_override_gb: + description: Override memory limit in GB + type: integer + priority: + description: Queue priority + enum: + - low + - normal + - high + type: string + skip_completed_stages: + description: Skip stages that completed successfully (default true) + type: boolean + required: + - job_id + - memory_override_gb + - skip_completed_stages + - priority + type: object + strict: true + type: function + - description: Search the public web for technical documentation, blog posts, + and known issues. + name: web_search + parameters: + additionalProperties: false + properties: + query: + description: Search query + type: string + required: + - query + type: object + strict: true + type: function + top_logprobs: 0 + top_p: 1.0 + truncation: disabled + usage: + input_tokens: 496 + input_tokens_details: + cached_tokens: 0 + output_tokens: 28 + output_tokens_details: + reasoning_tokens: 0 + total_tokens: 524 + user: null + headers: + content-type: application/json + status_code: 200 +- filename: t3 + request: + body: + input: + - call_id: call_LhB0vE8IH2AIMglFujcSYxwk + output: '[{"timestamp": "2024-01-15T03:22:09Z", "severity": "ERROR", "message": + "OutOfMemoryError at stage ''transform'': heap space exhausted (31.2GB/32GB)", + "stack_trace": "java.lang.OutOfMemoryError: Java heap space\n at org.apache.spark.memory.TaskMemoryManager.allocatePage"}, + {"timestamp": "2024-01-15T03:22:10Z", "severity": "ERROR", "message": "Task + failed: transform-partition-47 OOM after processing 2.1M rows", "stack_trace": + "org.apache.spark.SparkException: Task failed while writing rows"}, {"timestamp": + "2024-01-15T03:22:11Z", "severity": "ERROR", "message": "Job job-382 marked + FAILED after 3 consecutive OOM errors in transform stage", "stack_trace": + null}]' + type: function_call_output + - content: Search the runbook for how to fix the issue found in those logs. + Max 5 results. + role: user + type: message + model: gpt-4o + previous_response_id: resp_03fd10c22367f14c006a3c3859dfe4819982720510863bf1fa + store: true + stream: false + tool_choice: auto + tools: + - description: Query the pipeline orchestrator for a job's current state, metadata, + and failure info. + name: get_job_status + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier, e.g. job-382 + type: string + required: + - job_id + type: object + strict: true + type: function + - description: Fetch error logs and stack traces for a failed pipeline job. + name: get_error_logs + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier + type: string + max_entries: + description: Maximum log entries to return (default 50) + type: integer + severity: + description: Minimum severity filter + enum: + - WARN + - ERROR + - FATAL + - ALL + type: string + required: + - job_id + - severity + - max_entries + type: object + strict: true + type: function + - description: Search internal operations runbooks for troubleshooting procedures. + name: search_runbook + parameters: + additionalProperties: false + properties: + max_results: + description: Maximum results to return (default 5) + type: integer + query: + description: Natural language search query + type: string + required: + - query + - max_results + type: object + strict: true + type: function + - description: Execute a Python analysis script in a sandboxed environment for + log parsing or data inspection. + name: run_analysis + parameters: + additionalProperties: false + properties: + code: + description: Python code to execute + type: string + timeout_seconds: + description: Execution timeout (default 30) + type: integer + required: + - code + - timeout_seconds + type: object + strict: true + type: function + - description: Restart a failed pipeline job with optional configuration overrides. + name: restart_job + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier to restart + type: string + memory_override_gb: + description: Override memory limit in GB + type: integer + priority: + description: Queue priority + enum: + - low + - normal + - high + type: string + skip_completed_stages: + description: Skip stages that completed successfully (default true) + type: boolean + required: + - job_id + - memory_override_gb + - skip_completed_stages + - priority + type: object + strict: true + type: function + - description: Search the public web for technical documentation, blog posts, + and known issues. + name: web_search + parameters: + additionalProperties: false + properties: + query: + description: Search query + type: string + required: + - query + type: object + strict: true + type: function + headers: + accept: '*/*' + authorization: Bearer *** + content-type: application/json + user-agent: python-httpx/0.28.1 + method: POST + path: /v1/responses + query_params: {} + response: + body: + background: false + billing: + payer: developer + completed_at: 1782331484 + created_at: 1782331483 + error: null + frequency_penalty: 0.0 + id: resp_03fd10c22367f14c006a3c385b44a48199af24641819516d31 + incomplete_details: null + instructions: null + max_output_tokens: null + max_tool_calls: null + metadata: {} + model: gpt-4o-2024-08-06 + moderation: null + object: response + output: + - arguments: '{"query":"OutOfMemoryError in ETL pipeline transform stage","max_results":5}' + call_id: call_Uuq0Gf92pAL1YAUEgnbr8p6B + id: fc_03fd10c22367f14c006a3c385c0a248199a0f952e077038d08 + name: search_runbook + status: completed + type: function_call + parallel_tool_calls: true + presence_penalty: 0.0 + previous_response_id: resp_03fd10c22367f14c006a3c3859dfe4819982720510863bf1fa + prompt_cache_key: null + prompt_cache_retention: in_memory + reasoning: + context: null + effort: null + summary: null + safety_identifier: null + service_tier: default + status: completed + store: true + temperature: 1.0 + text: + format: + type: text + verbosity: medium + tool_choice: auto + tools: + - description: Query the pipeline orchestrator for a job's current state, metadata, + and failure info. + name: get_job_status + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier, e.g. job-382 + type: string + required: + - job_id + type: object + strict: true + type: function + - description: Fetch error logs and stack traces for a failed pipeline job. + name: get_error_logs + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier + type: string + max_entries: + description: Maximum log entries to return (default 50) + type: integer + severity: + description: Minimum severity filter + enum: + - WARN + - ERROR + - FATAL + - ALL + type: string + required: + - job_id + - severity + - max_entries + type: object + strict: true + type: function + - description: Search internal operations runbooks for troubleshooting procedures. + name: search_runbook + parameters: + additionalProperties: false + properties: + max_results: + description: Maximum results to return (default 5) + type: integer + query: + description: Natural language search query + type: string + required: + - query + - max_results + type: object + strict: true + type: function + - description: Execute a Python analysis script in a sandboxed environment for + log parsing or data inspection. + name: run_analysis + parameters: + additionalProperties: false + properties: + code: + description: Python code to execute + type: string + timeout_seconds: + description: Execution timeout (default 30) + type: integer + required: + - code + - timeout_seconds + type: object + strict: true + type: function + - description: Restart a failed pipeline job with optional configuration overrides. + name: restart_job + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier to restart + type: string + memory_override_gb: + description: Override memory limit in GB + type: integer + priority: + description: Queue priority + enum: + - low + - normal + - high + type: string + skip_completed_stages: + description: Skip stages that completed successfully (default true) + type: boolean + required: + - job_id + - memory_override_gb + - skip_completed_stages + - priority + type: object + strict: true + type: function + - description: Search the public web for technical documentation, blog posts, + and known issues. + name: web_search + parameters: + additionalProperties: false + properties: + query: + description: Search query + type: string + required: + - query + type: object + strict: true + type: function + top_logprobs: 0 + top_p: 1.0 + truncation: disabled + usage: + input_tokens: 751 + input_tokens_details: + cached_tokens: 0 + output_tokens: 30 + output_tokens_details: + reasoning_tokens: 0 + total_tokens: 781 + user: null + headers: + content-type: application/json + status_code: 200 +- filename: t4 + request: + body: + input: + - call_id: call_Uuq0Gf92pAL1YAUEgnbr8p6B + output: '[{"title": "OOM Troubleshooting for ETL Pipelines", "path": "/runbooks/data-platform/oom-etl.md", + "snippet": "When transform stage OOMs: 1) Check partition skew with get_partition_stats, + 2) Increase executor memory (default 32GB -> 64GB), 3) Enable adaptive query + execution"}, {"title": "Memory Limit Override Procedures", "path": "/runbooks/data-platform/memory-overrides.md", + "snippet": "To increase memory: use restart_job with memory_override_gb. + Standard escalation: 32->64->128. Requires approval above 128GB."}]' + type: function_call_output + - content: 'Run this analysis code to summarize: import json; print(json.dumps({''job'': + ''job-382'', ''error'': ''OOM'', ''stage'': ''transform'', ''recommendation'': + ''increase memory to 64GB''}))' + role: user + type: message + model: gpt-4o + previous_response_id: resp_03fd10c22367f14c006a3c385b44a48199af24641819516d31 + store: true + stream: false + tool_choice: auto + tools: + - description: Query the pipeline orchestrator for a job's current state, metadata, + and failure info. + name: get_job_status + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier, e.g. job-382 + type: string + required: + - job_id + type: object + strict: true + type: function + - description: Fetch error logs and stack traces for a failed pipeline job. + name: get_error_logs + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier + type: string + max_entries: + description: Maximum log entries to return (default 50) + type: integer + severity: + description: Minimum severity filter + enum: + - WARN + - ERROR + - FATAL + - ALL + type: string + required: + - job_id + - severity + - max_entries + type: object + strict: true + type: function + - description: Search internal operations runbooks for troubleshooting procedures. + name: search_runbook + parameters: + additionalProperties: false + properties: + max_results: + description: Maximum results to return (default 5) + type: integer + query: + description: Natural language search query + type: string + required: + - query + - max_results + type: object + strict: true + type: function + - description: Execute a Python analysis script in a sandboxed environment for + log parsing or data inspection. + name: run_analysis + parameters: + additionalProperties: false + properties: + code: + description: Python code to execute + type: string + timeout_seconds: + description: Execution timeout (default 30) + type: integer + required: + - code + - timeout_seconds + type: object + strict: true + type: function + - description: Restart a failed pipeline job with optional configuration overrides. + name: restart_job + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier to restart + type: string + memory_override_gb: + description: Override memory limit in GB + type: integer + priority: + description: Queue priority + enum: + - low + - normal + - high + type: string + skip_completed_stages: + description: Skip stages that completed successfully (default true) + type: boolean + required: + - job_id + - memory_override_gb + - skip_completed_stages + - priority + type: object + strict: true + type: function + - description: Search the public web for technical documentation, blog posts, + and known issues. + name: web_search + parameters: + additionalProperties: false + properties: + query: + description: Search query + type: string + required: + - query + type: object + strict: true + type: function + headers: + accept: '*/*' + authorization: Bearer *** + content-type: application/json + user-agent: python-httpx/0.28.1 + method: POST + path: /v1/responses + query_params: {} + response: + body: + background: false + billing: + payer: developer + completed_at: 1782331485 + created_at: 1782331484 + error: null + frequency_penalty: 0.0 + id: resp_03fd10c22367f14c006a3c385c88788199bfcfc70ff88ce28f + incomplete_details: null + instructions: null + max_output_tokens: null + max_tool_calls: null + metadata: {} + model: gpt-4o-2024-08-06 + moderation: null + object: response + output: + - arguments: '{"code":"import json\nprint(json.dumps({''job'': ''job-382'', + ''error'': ''OOM'', ''stage'': ''transform'', ''recommendation'': ''increase + memory to 64GB''}))","timeout_seconds":30}' + call_id: call_hy09mPn87sDCueIq8Mxd3JBo + id: fc_03fd10c22367f14c006a3c385d69c0819980fcbf225cde945f + name: run_analysis + status: completed + type: function_call + parallel_tool_calls: true + presence_penalty: 0.0 + previous_response_id: resp_03fd10c22367f14c006a3c385b44a48199af24641819516d31 + prompt_cache_key: null + prompt_cache_retention: in_memory + reasoning: + context: null + effort: null + summary: null + safety_identifier: null + service_tier: default + status: completed + store: true + temperature: 1.0 + text: + format: + type: text + verbosity: medium + tool_choice: auto + tools: + - description: Query the pipeline orchestrator for a job's current state, metadata, + and failure info. + name: get_job_status + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier, e.g. job-382 + type: string + required: + - job_id + type: object + strict: true + type: function + - description: Fetch error logs and stack traces for a failed pipeline job. + name: get_error_logs + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier + type: string + max_entries: + description: Maximum log entries to return (default 50) + type: integer + severity: + description: Minimum severity filter + enum: + - WARN + - ERROR + - FATAL + - ALL + type: string + required: + - job_id + - severity + - max_entries + type: object + strict: true + type: function + - description: Search internal operations runbooks for troubleshooting procedures. + name: search_runbook + parameters: + additionalProperties: false + properties: + max_results: + description: Maximum results to return (default 5) + type: integer + query: + description: Natural language search query + type: string + required: + - query + - max_results + type: object + strict: true + type: function + - description: Execute a Python analysis script in a sandboxed environment for + log parsing or data inspection. + name: run_analysis + parameters: + additionalProperties: false + properties: + code: + description: Python code to execute + type: string + timeout_seconds: + description: Execution timeout (default 30) + type: integer + required: + - code + - timeout_seconds + type: object + strict: true + type: function + - description: Restart a failed pipeline job with optional configuration overrides. + name: restart_job + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier to restart + type: string + memory_override_gb: + description: Override memory limit in GB + type: integer + priority: + description: Queue priority + enum: + - low + - normal + - high + type: string + skip_completed_stages: + description: Skip stages that completed successfully (default true) + type: boolean + required: + - job_id + - memory_override_gb + - skip_completed_stages + - priority + type: object + strict: true + type: function + - description: Search the public web for technical documentation, blog posts, + and known issues. + name: web_search + parameters: + additionalProperties: false + properties: + query: + description: Search query + type: string + required: + - query + type: object + strict: true + type: function + top_logprobs: 0 + top_p: 1.0 + truncation: disabled + usage: + input_tokens: 972 + input_tokens_details: + cached_tokens: 0 + output_tokens: 58 + output_tokens_details: + reasoning_tokens: 0 + total_tokens: 1030 + user: null + headers: + content-type: application/json + status_code: 200 +- filename: t5 + request: + body: + input: + - call_id: call_hy09mPn87sDCueIq8Mxd3JBo + output: '{"stdout": "Failed stage: transform, count: 2\nPattern: consistent + OOM at partition-47 (2.1M rows)\nRecommendation: increase memory to 64GB + or repartition", "stderr": "", "exit_code": 0, "duration_ms": 120}' + type: function_call_output + - content: Great. Now restart it with 64 GB memory, skip completed stages, and + high priority. + role: user + type: message + model: gpt-4o + previous_response_id: resp_03fd10c22367f14c006a3c385c88788199bfcfc70ff88ce28f + store: true + stream: false + tool_choice: auto + tools: + - description: Query the pipeline orchestrator for a job's current state, metadata, + and failure info. + name: get_job_status + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier, e.g. job-382 + type: string + required: + - job_id + type: object + strict: true + type: function + - description: Fetch error logs and stack traces for a failed pipeline job. + name: get_error_logs + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier + type: string + max_entries: + description: Maximum log entries to return (default 50) + type: integer + severity: + description: Minimum severity filter + enum: + - WARN + - ERROR + - FATAL + - ALL + type: string + required: + - job_id + - severity + - max_entries + type: object + strict: true + type: function + - description: Search internal operations runbooks for troubleshooting procedures. + name: search_runbook + parameters: + additionalProperties: false + properties: + max_results: + description: Maximum results to return (default 5) + type: integer + query: + description: Natural language search query + type: string + required: + - query + - max_results + type: object + strict: true + type: function + - description: Execute a Python analysis script in a sandboxed environment for + log parsing or data inspection. + name: run_analysis + parameters: + additionalProperties: false + properties: + code: + description: Python code to execute + type: string + timeout_seconds: + description: Execution timeout (default 30) + type: integer + required: + - code + - timeout_seconds + type: object + strict: true + type: function + - description: Restart a failed pipeline job with optional configuration overrides. + name: restart_job + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier to restart + type: string + memory_override_gb: + description: Override memory limit in GB + type: integer + priority: + description: Queue priority + enum: + - low + - normal + - high + type: string + skip_completed_stages: + description: Skip stages that completed successfully (default true) + type: boolean + required: + - job_id + - memory_override_gb + - skip_completed_stages + - priority + type: object + strict: true + type: function + - description: Search the public web for technical documentation, blog posts, + and known issues. + name: web_search + parameters: + additionalProperties: false + properties: + query: + description: Search query + type: string + required: + - query + type: object + strict: true + type: function + headers: + accept: '*/*' + authorization: Bearer *** + content-type: application/json + user-agent: python-httpx/0.28.1 + method: POST + path: /v1/responses + query_params: {} + response: + body: + background: false + billing: + payer: developer + completed_at: 1782331486 + created_at: 1782331486 + error: null + frequency_penalty: 0.0 + id: resp_03fd10c22367f14c006a3c385e0a38819983064ba1cbb83062 + incomplete_details: null + instructions: null + max_output_tokens: null + max_tool_calls: null + metadata: {} + model: gpt-4o-2024-08-06 + moderation: null + object: response + output: + - arguments: '{"job_id":"job-382","memory_override_gb":64,"skip_completed_stages":true,"priority":"high"}' + call_id: call_EgLYkmLEPhzgZl9drilwu1B7 + id: fc_03fd10c22367f14c006a3c385eafa88199b729b7ba313d626c + name: restart_job + status: completed + type: function_call + parallel_tool_calls: true + presence_penalty: 0.0 + previous_response_id: resp_03fd10c22367f14c006a3c385c88788199bfcfc70ff88ce28f + prompt_cache_key: null + prompt_cache_retention: in_memory + reasoning: + context: null + effort: null + summary: null + safety_identifier: null + service_tier: default + status: completed + store: true + temperature: 1.0 + text: + format: + type: text + verbosity: medium + tool_choice: auto + tools: + - description: Query the pipeline orchestrator for a job's current state, metadata, + and failure info. + name: get_job_status + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier, e.g. job-382 + type: string + required: + - job_id + type: object + strict: true + type: function + - description: Fetch error logs and stack traces for a failed pipeline job. + name: get_error_logs + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier + type: string + max_entries: + description: Maximum log entries to return (default 50) + type: integer + severity: + description: Minimum severity filter + enum: + - WARN + - ERROR + - FATAL + - ALL + type: string + required: + - job_id + - severity + - max_entries + type: object + strict: true + type: function + - description: Search internal operations runbooks for troubleshooting procedures. + name: search_runbook + parameters: + additionalProperties: false + properties: + max_results: + description: Maximum results to return (default 5) + type: integer + query: + description: Natural language search query + type: string + required: + - query + - max_results + type: object + strict: true + type: function + - description: Execute a Python analysis script in a sandboxed environment for + log parsing or data inspection. + name: run_analysis + parameters: + additionalProperties: false + properties: + code: + description: Python code to execute + type: string + timeout_seconds: + description: Execution timeout (default 30) + type: integer + required: + - code + - timeout_seconds + type: object + strict: true + type: function + - description: Restart a failed pipeline job with optional configuration overrides. + name: restart_job + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier to restart + type: string + memory_override_gb: + description: Override memory limit in GB + type: integer + priority: + description: Queue priority + enum: + - low + - normal + - high + type: string + skip_completed_stages: + description: Skip stages that completed successfully (default true) + type: boolean + required: + - job_id + - memory_override_gb + - skip_completed_stages + - priority + type: object + strict: true + type: function + - description: Search the public web for technical documentation, blog posts, + and known issues. + name: web_search + parameters: + additionalProperties: false + properties: + query: + description: Search query + type: string + required: + - query + type: object + strict: true + type: function + top_logprobs: 0 + top_p: 1.0 + truncation: disabled + usage: + input_tokens: 1122 + input_tokens_details: + cached_tokens: 0 + output_tokens: 36 + output_tokens_details: + reasoning_tokens: 0 + total_tokens: 1158 + user: null + headers: + content-type: application/json + status_code: 200 diff --git a/crates/agentic-core/tests/cassettes/tool_calls/multi_turn/openai_responses_tool_calls_branch.yaml b/crates/agentic-core/tests/cassettes/tool_calls/multi_turn/openai_responses_tool_calls_branch.yaml new file mode 100644 index 0000000..0c831ba --- /dev/null +++ b/crates/agentic-core/tests/cassettes/tool_calls/multi_turn/openai_responses_tool_calls_branch.yaml @@ -0,0 +1,959 @@ +turns: +- filename: t1 + request: + body: + input: You are an SRE assistant. Check the current status of ETL pipeline job-382. + model: gpt-4o + store: true + stream: false + tool_choice: auto + tools: + - description: Query the pipeline orchestrator for a job's current state, metadata, + and failure info. + name: get_job_status + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier, e.g. job-382 + type: string + required: + - job_id + type: object + strict: true + type: function + - description: Fetch error logs and stack traces for a failed pipeline job. + name: get_error_logs + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier + type: string + max_entries: + description: Maximum log entries to return (default 50) + type: integer + severity: + description: Minimum severity filter + enum: + - WARN + - ERROR + - FATAL + - ALL + type: string + required: + - job_id + - severity + - max_entries + type: object + strict: true + type: function + - description: Search internal operations runbooks for troubleshooting procedures. + name: search_runbook + parameters: + additionalProperties: false + properties: + max_results: + description: Maximum results to return (default 5) + type: integer + query: + description: Natural language search query + type: string + required: + - query + - max_results + type: object + strict: true + type: function + - description: Execute a Python analysis script in a sandboxed environment for + log parsing or data inspection. + name: run_analysis + parameters: + additionalProperties: false + properties: + code: + description: Python code to execute + type: string + timeout_seconds: + description: Execution timeout (default 30) + type: integer + required: + - code + - timeout_seconds + type: object + strict: true + type: function + - description: Restart a failed pipeline job with optional configuration overrides. + name: restart_job + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier to restart + type: string + memory_override_gb: + description: Override memory limit in GB + type: integer + priority: + description: Queue priority + enum: + - low + - normal + - high + type: string + skip_completed_stages: + description: Skip stages that completed successfully (default true) + type: boolean + required: + - job_id + - memory_override_gb + - skip_completed_stages + - priority + type: object + strict: true + type: function + - description: Search the public web for technical documentation, blog posts, + and known issues. + name: web_search + parameters: + additionalProperties: false + properties: + query: + description: Search query + type: string + required: + - query + type: object + strict: true + type: function + headers: + accept: '*/*' + authorization: Bearer *** + content-type: application/json + user-agent: python-httpx/0.28.1 + method: POST + path: /v1/responses + query_params: {} + response: + body: + background: false + billing: + payer: developer + completed_at: 1782331511 + created_at: 1782331509 + error: null + frequency_penalty: 0.0 + id: resp_05bbd2f607378fa0006a3c387599cc81998df1b061f645e587 + incomplete_details: null + instructions: null + max_output_tokens: null + max_tool_calls: null + metadata: {} + model: gpt-4o-2024-08-06 + moderation: null + object: response + output: + - arguments: '{"job_id":"job-382"}' + call_id: call_NWcEmKRN0AkUr4ZmlHWaqcQn + id: fc_05bbd2f607378fa0006a3c3877757c81999eedf1f10d0b4309 + name: get_job_status + status: completed + type: function_call + parallel_tool_calls: true + presence_penalty: 0.0 + previous_response_id: null + prompt_cache_key: null + prompt_cache_retention: in_memory + reasoning: + context: null + effort: null + summary: null + safety_identifier: null + service_tier: default + status: completed + store: true + temperature: 1.0 + text: + format: + type: text + verbosity: medium + tool_choice: auto + tools: + - description: Query the pipeline orchestrator for a job's current state, metadata, + and failure info. + name: get_job_status + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier, e.g. job-382 + type: string + required: + - job_id + type: object + strict: true + type: function + - description: Fetch error logs and stack traces for a failed pipeline job. + name: get_error_logs + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier + type: string + max_entries: + description: Maximum log entries to return (default 50) + type: integer + severity: + description: Minimum severity filter + enum: + - WARN + - ERROR + - FATAL + - ALL + type: string + required: + - job_id + - severity + - max_entries + type: object + strict: true + type: function + - description: Search internal operations runbooks for troubleshooting procedures. + name: search_runbook + parameters: + additionalProperties: false + properties: + max_results: + description: Maximum results to return (default 5) + type: integer + query: + description: Natural language search query + type: string + required: + - query + - max_results + type: object + strict: true + type: function + - description: Execute a Python analysis script in a sandboxed environment for + log parsing or data inspection. + name: run_analysis + parameters: + additionalProperties: false + properties: + code: + description: Python code to execute + type: string + timeout_seconds: + description: Execution timeout (default 30) + type: integer + required: + - code + - timeout_seconds + type: object + strict: true + type: function + - description: Restart a failed pipeline job with optional configuration overrides. + name: restart_job + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier to restart + type: string + memory_override_gb: + description: Override memory limit in GB + type: integer + priority: + description: Queue priority + enum: + - low + - normal + - high + type: string + skip_completed_stages: + description: Skip stages that completed successfully (default true) + type: boolean + required: + - job_id + - memory_override_gb + - skip_completed_stages + - priority + type: object + strict: true + type: function + - description: Search the public web for technical documentation, blog posts, + and known issues. + name: web_search + parameters: + additionalProperties: false + properties: + query: + description: Search query + type: string + required: + - query + type: object + strict: true + type: function + top_logprobs: 0 + top_p: 1.0 + truncation: disabled + usage: + input_tokens: 355 + input_tokens_details: + cached_tokens: 0 + output_tokens: 19 + output_tokens_details: + reasoning_tokens: 0 + total_tokens: 374 + user: null + headers: + content-type: application/json + status_code: 200 +- filename: t2 + request: + body: + input: + - call_id: call_NWcEmKRN0AkUr4ZmlHWaqcQn + output: '{"job_id": "job-382", "status": "failed", "error_type": "OutOfMemoryError", + "failed_at": "2024-01-15T03:22:11Z", "stage": "transform", "memory_used_gb": + 31.2, "memory_limit_gb": 32, "pipeline": "etl-daily-ingest", "owner": "data-platform-team"}' + type: function_call_output + - content: Get the error logs for that job with severity ERROR and max 10 entries. + role: user + type: message + model: gpt-4o + previous_response_id: resp_05bbd2f607378fa0006a3c387599cc81998df1b061f645e587 + store: true + stream: false + tool_choice: auto + tools: + - description: Query the pipeline orchestrator for a job's current state, metadata, + and failure info. + name: get_job_status + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier, e.g. job-382 + type: string + required: + - job_id + type: object + strict: true + type: function + - description: Fetch error logs and stack traces for a failed pipeline job. + name: get_error_logs + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier + type: string + max_entries: + description: Maximum log entries to return (default 50) + type: integer + severity: + description: Minimum severity filter + enum: + - WARN + - ERROR + - FATAL + - ALL + type: string + required: + - job_id + - severity + - max_entries + type: object + strict: true + type: function + - description: Search internal operations runbooks for troubleshooting procedures. + name: search_runbook + parameters: + additionalProperties: false + properties: + max_results: + description: Maximum results to return (default 5) + type: integer + query: + description: Natural language search query + type: string + required: + - query + - max_results + type: object + strict: true + type: function + - description: Execute a Python analysis script in a sandboxed environment for + log parsing or data inspection. + name: run_analysis + parameters: + additionalProperties: false + properties: + code: + description: Python code to execute + type: string + timeout_seconds: + description: Execution timeout (default 30) + type: integer + required: + - code + - timeout_seconds + type: object + strict: true + type: function + - description: Restart a failed pipeline job with optional configuration overrides. + name: restart_job + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier to restart + type: string + memory_override_gb: + description: Override memory limit in GB + type: integer + priority: + description: Queue priority + enum: + - low + - normal + - high + type: string + skip_completed_stages: + description: Skip stages that completed successfully (default true) + type: boolean + required: + - job_id + - memory_override_gb + - skip_completed_stages + - priority + type: object + strict: true + type: function + - description: Search the public web for technical documentation, blog posts, + and known issues. + name: web_search + parameters: + additionalProperties: false + properties: + query: + description: Search query + type: string + required: + - query + type: object + strict: true + type: function + headers: + accept: '*/*' + authorization: Bearer *** + content-type: application/json + user-agent: python-httpx/0.28.1 + method: POST + path: /v1/responses + query_params: {} + response: + body: + background: false + billing: + payer: developer + completed_at: 1782331512 + created_at: 1782331511 + error: null + frequency_penalty: 0.0 + id: resp_05bbd2f607378fa0006a3c3877c0f0819983a3f1411a2242b9 + incomplete_details: null + instructions: null + max_output_tokens: null + max_tool_calls: null + metadata: {} + model: gpt-4o-2024-08-06 + moderation: null + object: response + output: + - arguments: '{"job_id":"job-382","severity":"ERROR","max_entries":10}' + call_id: call_8SfacO4G0hZKrTnfTI76zeAa + id: fc_05bbd2f607378fa0006a3c38789348819980949dfc83474c4b + name: get_error_logs + status: completed + type: function_call + parallel_tool_calls: true + presence_penalty: 0.0 + previous_response_id: resp_05bbd2f607378fa0006a3c387599cc81998df1b061f645e587 + prompt_cache_key: null + prompt_cache_retention: in_memory + reasoning: + context: null + effort: null + summary: null + safety_identifier: null + service_tier: default + status: completed + store: true + temperature: 1.0 + text: + format: + type: text + verbosity: medium + tool_choice: auto + tools: + - description: Query the pipeline orchestrator for a job's current state, metadata, + and failure info. + name: get_job_status + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier, e.g. job-382 + type: string + required: + - job_id + type: object + strict: true + type: function + - description: Fetch error logs and stack traces for a failed pipeline job. + name: get_error_logs + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier + type: string + max_entries: + description: Maximum log entries to return (default 50) + type: integer + severity: + description: Minimum severity filter + enum: + - WARN + - ERROR + - FATAL + - ALL + type: string + required: + - job_id + - severity + - max_entries + type: object + strict: true + type: function + - description: Search internal operations runbooks for troubleshooting procedures. + name: search_runbook + parameters: + additionalProperties: false + properties: + max_results: + description: Maximum results to return (default 5) + type: integer + query: + description: Natural language search query + type: string + required: + - query + - max_results + type: object + strict: true + type: function + - description: Execute a Python analysis script in a sandboxed environment for + log parsing or data inspection. + name: run_analysis + parameters: + additionalProperties: false + properties: + code: + description: Python code to execute + type: string + timeout_seconds: + description: Execution timeout (default 30) + type: integer + required: + - code + - timeout_seconds + type: object + strict: true + type: function + - description: Restart a failed pipeline job with optional configuration overrides. + name: restart_job + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier to restart + type: string + memory_override_gb: + description: Override memory limit in GB + type: integer + priority: + description: Queue priority + enum: + - low + - normal + - high + type: string + skip_completed_stages: + description: Skip stages that completed successfully (default true) + type: boolean + required: + - job_id + - memory_override_gb + - skip_completed_stages + - priority + type: object + strict: true + type: function + - description: Search the public web for technical documentation, blog posts, + and known issues. + name: web_search + parameters: + additionalProperties: false + properties: + query: + description: Search query + type: string + required: + - query + type: object + strict: true + type: function + top_logprobs: 0 + top_p: 1.0 + truncation: disabled + usage: + input_tokens: 491 + input_tokens_details: + cached_tokens: 0 + output_tokens: 28 + output_tokens_details: + reasoning_tokens: 0 + total_tokens: 519 + user: null + headers: + content-type: application/json + status_code: 200 +- filename: t3 + request: + body: + input: + - call_id: call_NWcEmKRN0AkUr4ZmlHWaqcQn + output: '{"job_id": "job-382", "status": "failed", "error_type": "OutOfMemoryError", + "failed_at": "2024-01-15T03:22:11Z", "stage": "transform", "memory_used_gb": + 31.2, "memory_limit_gb": 32, "pipeline": "etl-daily-ingest", "owner": "data-platform-team"}' + type: function_call_output + - content: Instead of investigating errors, search the runbook for how to increase + memory limits for ETL jobs. Max 3 results. + role: user + type: message + model: gpt-4o + previous_response_id: resp_05bbd2f607378fa0006a3c387599cc81998df1b061f645e587 + store: true + stream: false + tool_choice: auto + tools: + - description: Query the pipeline orchestrator for a job's current state, metadata, + and failure info. + name: get_job_status + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier, e.g. job-382 + type: string + required: + - job_id + type: object + strict: true + type: function + - description: Fetch error logs and stack traces for a failed pipeline job. + name: get_error_logs + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier + type: string + max_entries: + description: Maximum log entries to return (default 50) + type: integer + severity: + description: Minimum severity filter + enum: + - WARN + - ERROR + - FATAL + - ALL + type: string + required: + - job_id + - severity + - max_entries + type: object + strict: true + type: function + - description: Search internal operations runbooks for troubleshooting procedures. + name: search_runbook + parameters: + additionalProperties: false + properties: + max_results: + description: Maximum results to return (default 5) + type: integer + query: + description: Natural language search query + type: string + required: + - query + - max_results + type: object + strict: true + type: function + - description: Execute a Python analysis script in a sandboxed environment for + log parsing or data inspection. + name: run_analysis + parameters: + additionalProperties: false + properties: + code: + description: Python code to execute + type: string + timeout_seconds: + description: Execution timeout (default 30) + type: integer + required: + - code + - timeout_seconds + type: object + strict: true + type: function + - description: Restart a failed pipeline job with optional configuration overrides. + name: restart_job + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier to restart + type: string + memory_override_gb: + description: Override memory limit in GB + type: integer + priority: + description: Queue priority + enum: + - low + - normal + - high + type: string + skip_completed_stages: + description: Skip stages that completed successfully (default true) + type: boolean + required: + - job_id + - memory_override_gb + - skip_completed_stages + - priority + type: object + strict: true + type: function + - description: Search the public web for technical documentation, blog posts, + and known issues. + name: web_search + parameters: + additionalProperties: false + properties: + query: + description: Search query + type: string + required: + - query + type: object + strict: true + type: function + headers: + accept: '*/*' + authorization: Bearer *** + content-type: application/json + user-agent: python-httpx/0.28.1 + method: POST + path: /v1/responses + query_params: {} + response: + body: + background: false + billing: + payer: developer + completed_at: 1782331514 + created_at: 1782331512 + error: null + frequency_penalty: 0.0 + id: resp_05bbd2f607378fa0006a3c3878eb5481999b020abb1bbaaa0f + incomplete_details: null + instructions: null + max_output_tokens: null + max_tool_calls: null + metadata: {} + model: gpt-4o-2024-08-06 + moderation: null + object: response + output: + - arguments: '{"query":"increase memory limits for ETL jobs","max_results":3}' + call_id: call_XVIOIR7ZtfV0urAs3C4ZXPK8 + id: fc_05bbd2f607378fa0006a3c3879e768819997294f712fc6012c + name: search_runbook + status: completed + type: function_call + parallel_tool_calls: true + presence_penalty: 0.0 + previous_response_id: resp_05bbd2f607378fa0006a3c387599cc81998df1b061f645e587 + prompt_cache_key: null + prompt_cache_retention: in_memory + reasoning: + context: null + effort: null + summary: null + safety_identifier: null + service_tier: default + status: completed + store: true + temperature: 1.0 + text: + format: + type: text + verbosity: medium + tool_choice: auto + tools: + - description: Query the pipeline orchestrator for a job's current state, metadata, + and failure info. + name: get_job_status + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier, e.g. job-382 + type: string + required: + - job_id + type: object + strict: true + type: function + - description: Fetch error logs and stack traces for a failed pipeline job. + name: get_error_logs + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier + type: string + max_entries: + description: Maximum log entries to return (default 50) + type: integer + severity: + description: Minimum severity filter + enum: + - WARN + - ERROR + - FATAL + - ALL + type: string + required: + - job_id + - severity + - max_entries + type: object + strict: true + type: function + - description: Search internal operations runbooks for troubleshooting procedures. + name: search_runbook + parameters: + additionalProperties: false + properties: + max_results: + description: Maximum results to return (default 5) + type: integer + query: + description: Natural language search query + type: string + required: + - query + - max_results + type: object + strict: true + type: function + - description: Execute a Python analysis script in a sandboxed environment for + log parsing or data inspection. + name: run_analysis + parameters: + additionalProperties: false + properties: + code: + description: Python code to execute + type: string + timeout_seconds: + description: Execution timeout (default 30) + type: integer + required: + - code + - timeout_seconds + type: object + strict: true + type: function + - description: Restart a failed pipeline job with optional configuration overrides. + name: restart_job + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier to restart + type: string + memory_override_gb: + description: Override memory limit in GB + type: integer + priority: + description: Queue priority + enum: + - low + - normal + - high + type: string + skip_completed_stages: + description: Skip stages that completed successfully (default true) + type: boolean + required: + - job_id + - memory_override_gb + - skip_completed_stages + - priority + type: object + strict: true + type: function + - description: Search the public web for technical documentation, blog posts, + and known issues. + name: web_search + parameters: + additionalProperties: false + properties: + query: + description: Search query + type: string + required: + - query + type: object + strict: true + type: function + top_logprobs: 0 + top_p: 1.0 + truncation: disabled + usage: + input_tokens: 500 + input_tokens_details: + cached_tokens: 0 + output_tokens: 27 + output_tokens_details: + reasoning_tokens: 0 + total_tokens: 527 + user: null + headers: + content-type: application/json + status_code: 200 diff --git a/crates/agentic-core/tests/cassettes/tool_calls/multi_turn/openai_responses_tool_calls_parallel.yaml b/crates/agentic-core/tests/cassettes/tool_calls/multi_turn/openai_responses_tool_calls_parallel.yaml new file mode 100644 index 0000000..ba6a486 --- /dev/null +++ b/crates/agentic-core/tests/cassettes/tool_calls/multi_turn/openai_responses_tool_calls_parallel.yaml @@ -0,0 +1,981 @@ +turns: +- filename: t1 + request: + body: + input: 'You are an SRE assistant. I need you to do TWO things in parallel: 1) + call get_job_status for job-382, and 2) call web_search for ''Spark ETL OOM + causes''. Do both now.' + model: gpt-4o + store: true + stream: false + tool_choice: auto + tools: + - description: Query the pipeline orchestrator for a job's current state, metadata, + and failure info. + name: get_job_status + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier, e.g. job-382 + type: string + required: + - job_id + type: object + strict: true + type: function + - description: Fetch error logs and stack traces for a failed pipeline job. + name: get_error_logs + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier + type: string + max_entries: + description: Maximum log entries to return (default 50) + type: integer + severity: + description: Minimum severity filter + enum: + - WARN + - ERROR + - FATAL + - ALL + type: string + required: + - job_id + - severity + - max_entries + type: object + strict: true + type: function + - description: Search internal operations runbooks for troubleshooting procedures. + name: search_runbook + parameters: + additionalProperties: false + properties: + max_results: + description: Maximum results to return (default 5) + type: integer + query: + description: Natural language search query + type: string + required: + - query + - max_results + type: object + strict: true + type: function + - description: Execute a Python analysis script in a sandboxed environment for + log parsing or data inspection. + name: run_analysis + parameters: + additionalProperties: false + properties: + code: + description: Python code to execute + type: string + timeout_seconds: + description: Execution timeout (default 30) + type: integer + required: + - code + - timeout_seconds + type: object + strict: true + type: function + - description: Restart a failed pipeline job with optional configuration overrides. + name: restart_job + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier to restart + type: string + memory_override_gb: + description: Override memory limit in GB + type: integer + priority: + description: Queue priority + enum: + - low + - normal + - high + type: string + skip_completed_stages: + description: Skip stages that completed successfully (default true) + type: boolean + required: + - job_id + - memory_override_gb + - skip_completed_stages + - priority + type: object + strict: true + type: function + - description: Search the public web for technical documentation, blog posts, + and known issues. + name: web_search + parameters: + additionalProperties: false + properties: + query: + description: Search query + type: string + required: + - query + type: object + strict: true + type: function + headers: + accept: '*/*' + authorization: Bearer *** + content-type: application/json + user-agent: python-httpx/0.28.1 + method: POST + path: /v1/responses + query_params: {} + response: + body: + background: false + billing: + payer: developer + completed_at: 1782331523 + created_at: 1782331523 + error: null + frequency_penalty: 0.0 + id: resp_0862f67eff09df0d006a3c38830c6881998c50cd3fe21a1e49 + incomplete_details: null + instructions: null + max_output_tokens: null + max_tool_calls: null + metadata: {} + model: gpt-4o-2024-08-06 + moderation: null + object: response + output: + - arguments: '{"job_id":"job-382"}' + call_id: call_wsICgbUO4BlbKuK1o6fGzNSp + id: fc_0862f67eff09df0d006a3c3883c1bc8199993f62d77975f8dc + name: get_job_status + status: completed + type: function_call + - arguments: '{"query":"Spark ETL OOM causes"}' + call_id: call_kDwkX53Mu7EbLjLOkeymIMya + id: fc_0862f67eff09df0d006a3c3883c1d48199949f18556c62e1ec + name: web_search + status: completed + type: function_call + parallel_tool_calls: true + presence_penalty: 0.0 + previous_response_id: null + prompt_cache_key: null + prompt_cache_retention: in_memory + reasoning: + context: null + effort: null + summary: null + safety_identifier: null + service_tier: default + status: completed + store: true + temperature: 1.0 + text: + format: + type: text + verbosity: medium + tool_choice: auto + tools: + - description: Query the pipeline orchestrator for a job's current state, metadata, + and failure info. + name: get_job_status + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier, e.g. job-382 + type: string + required: + - job_id + type: object + strict: true + type: function + - description: Fetch error logs and stack traces for a failed pipeline job. + name: get_error_logs + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier + type: string + max_entries: + description: Maximum log entries to return (default 50) + type: integer + severity: + description: Minimum severity filter + enum: + - WARN + - ERROR + - FATAL + - ALL + type: string + required: + - job_id + - severity + - max_entries + type: object + strict: true + type: function + - description: Search internal operations runbooks for troubleshooting procedures. + name: search_runbook + parameters: + additionalProperties: false + properties: + max_results: + description: Maximum results to return (default 5) + type: integer + query: + description: Natural language search query + type: string + required: + - query + - max_results + type: object + strict: true + type: function + - description: Execute a Python analysis script in a sandboxed environment for + log parsing or data inspection. + name: run_analysis + parameters: + additionalProperties: false + properties: + code: + description: Python code to execute + type: string + timeout_seconds: + description: Execution timeout (default 30) + type: integer + required: + - code + - timeout_seconds + type: object + strict: true + type: function + - description: Restart a failed pipeline job with optional configuration overrides. + name: restart_job + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier to restart + type: string + memory_override_gb: + description: Override memory limit in GB + type: integer + priority: + description: Queue priority + enum: + - low + - normal + - high + type: string + skip_completed_stages: + description: Skip stages that completed successfully (default true) + type: boolean + required: + - job_id + - memory_override_gb + - skip_completed_stages + - priority + type: object + strict: true + type: function + - description: Search the public web for technical documentation, blog posts, + and known issues. + name: web_search + parameters: + additionalProperties: false + properties: + query: + description: Search query + type: string + required: + - query + type: object + strict: true + type: function + top_logprobs: 0 + top_p: 1.0 + truncation: disabled + usage: + input_tokens: 385 + input_tokens_details: + cached_tokens: 0 + output_tokens: 54 + output_tokens_details: + reasoning_tokens: 0 + total_tokens: 439 + user: null + headers: + content-type: application/json + status_code: 200 +- filename: t2 + request: + body: + input: + - call_id: call_wsICgbUO4BlbKuK1o6fGzNSp + output: '{"job_id": "job-382", "status": "failed", "error_type": "OutOfMemoryError", + "failed_at": "2024-01-15T03:22:11Z", "stage": "transform", "memory_used_gb": + 31.2, "memory_limit_gb": 32, "pipeline": "etl-daily-ingest", "owner": "data-platform-team"}' + type: function_call_output + - call_id: call_kDwkX53Mu7EbLjLOkeymIMya + output: '[{"title": "Spark OOM in ETL: Root Causes and Fixes - Databricks + Blog", "url": "https://docs.databricks.com/en/optimizations/oom.html", "snippet": + "Common causes: data skew in shuffle partitions, broadcast join threshold + too high, insufficient executor memory. Fix: spark.sql.adaptive.enabled=true + + increase spark.executor.memory"}, {"title": "Troubleshooting OutOfMemoryError + in Apache Spark", "url": "https://spark.apache.org/docs/latest/tuning.html#memory-tuning", + "snippet": "Increase spark.executor.memory or spark.driver.memory. For shuffle-heavy + workloads, also increase spark.sql.shuffle.partitions."}]' + type: function_call_output + - content: Based on those results, search the runbook for memory increase procedures. + Max 3 results. + role: user + type: message + model: gpt-4o + previous_response_id: resp_0862f67eff09df0d006a3c38830c6881998c50cd3fe21a1e49 + store: true + stream: false + tool_choice: auto + tools: + - description: Query the pipeline orchestrator for a job's current state, metadata, + and failure info. + name: get_job_status + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier, e.g. job-382 + type: string + required: + - job_id + type: object + strict: true + type: function + - description: Fetch error logs and stack traces for a failed pipeline job. + name: get_error_logs + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier + type: string + max_entries: + description: Maximum log entries to return (default 50) + type: integer + severity: + description: Minimum severity filter + enum: + - WARN + - ERROR + - FATAL + - ALL + type: string + required: + - job_id + - severity + - max_entries + type: object + strict: true + type: function + - description: Search internal operations runbooks for troubleshooting procedures. + name: search_runbook + parameters: + additionalProperties: false + properties: + max_results: + description: Maximum results to return (default 5) + type: integer + query: + description: Natural language search query + type: string + required: + - query + - max_results + type: object + strict: true + type: function + - description: Execute a Python analysis script in a sandboxed environment for + log parsing or data inspection. + name: run_analysis + parameters: + additionalProperties: false + properties: + code: + description: Python code to execute + type: string + timeout_seconds: + description: Execution timeout (default 30) + type: integer + required: + - code + - timeout_seconds + type: object + strict: true + type: function + - description: Restart a failed pipeline job with optional configuration overrides. + name: restart_job + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier to restart + type: string + memory_override_gb: + description: Override memory limit in GB + type: integer + priority: + description: Queue priority + enum: + - low + - normal + - high + type: string + skip_completed_stages: + description: Skip stages that completed successfully (default true) + type: boolean + required: + - job_id + - memory_override_gb + - skip_completed_stages + - priority + type: object + strict: true + type: function + - description: Search the public web for technical documentation, blog posts, + and known issues. + name: web_search + parameters: + additionalProperties: false + properties: + query: + description: Search query + type: string + required: + - query + type: object + strict: true + type: function + headers: + accept: '*/*' + authorization: Bearer *** + content-type: application/json + user-agent: python-httpx/0.28.1 + method: POST + path: /v1/responses + query_params: {} + response: + body: + background: false + billing: + payer: developer + completed_at: 1782331524 + created_at: 1782331524 + error: null + frequency_penalty: 0.0 + id: resp_0862f67eff09df0d006a3c3883f6108199bf628e4a2c6469dd + incomplete_details: null + instructions: null + max_output_tokens: null + max_tool_calls: null + metadata: {} + model: gpt-4o-2024-08-06 + moderation: null + object: response + output: + - arguments: '{"query":"memory increase procedures for Spark","max_results":3}' + call_id: call_l2MaoNnfIGtswWuyJUBJ3G3G + id: fc_0862f67eff09df0d006a3c38849ba4819991a40c71a014ca1f + name: search_runbook + status: completed + type: function_call + parallel_tool_calls: true + presence_penalty: 0.0 + previous_response_id: resp_0862f67eff09df0d006a3c38830c6881998c50cd3fe21a1e49 + prompt_cache_key: null + prompt_cache_retention: in_memory + reasoning: + context: null + effort: null + summary: null + safety_identifier: null + service_tier: default + status: completed + store: true + temperature: 1.0 + text: + format: + type: text + verbosity: medium + tool_choice: auto + tools: + - description: Query the pipeline orchestrator for a job's current state, metadata, + and failure info. + name: get_job_status + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier, e.g. job-382 + type: string + required: + - job_id + type: object + strict: true + type: function + - description: Fetch error logs and stack traces for a failed pipeline job. + name: get_error_logs + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier + type: string + max_entries: + description: Maximum log entries to return (default 50) + type: integer + severity: + description: Minimum severity filter + enum: + - WARN + - ERROR + - FATAL + - ALL + type: string + required: + - job_id + - severity + - max_entries + type: object + strict: true + type: function + - description: Search internal operations runbooks for troubleshooting procedures. + name: search_runbook + parameters: + additionalProperties: false + properties: + max_results: + description: Maximum results to return (default 5) + type: integer + query: + description: Natural language search query + type: string + required: + - query + - max_results + type: object + strict: true + type: function + - description: Execute a Python analysis script in a sandboxed environment for + log parsing or data inspection. + name: run_analysis + parameters: + additionalProperties: false + properties: + code: + description: Python code to execute + type: string + timeout_seconds: + description: Execution timeout (default 30) + type: integer + required: + - code + - timeout_seconds + type: object + strict: true + type: function + - description: Restart a failed pipeline job with optional configuration overrides. + name: restart_job + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier to restart + type: string + memory_override_gb: + description: Override memory limit in GB + type: integer + priority: + description: Queue priority + enum: + - low + - normal + - high + type: string + skip_completed_stages: + description: Skip stages that completed successfully (default true) + type: boolean + required: + - job_id + - memory_override_gb + - skip_completed_stages + - priority + type: object + strict: true + type: function + - description: Search the public web for technical documentation, blog posts, + and known issues. + name: web_search + parameters: + additionalProperties: false + properties: + query: + description: Search query + type: string + required: + - query + type: object + strict: true + type: function + top_logprobs: 0 + top_p: 1.0 + truncation: disabled + usage: + input_tokens: 686 + input_tokens_details: + cached_tokens: 0 + output_tokens: 25 + output_tokens_details: + reasoning_tokens: 0 + total_tokens: 711 + user: null + headers: + content-type: application/json + status_code: 200 +- filename: t3 + request: + body: + input: + - call_id: call_l2MaoNnfIGtswWuyJUBJ3G3G + output: '[{"title": "OOM Troubleshooting for ETL Pipelines", "path": "/runbooks/data-platform/oom-etl.md", + "snippet": "When transform stage OOMs: 1) Check partition skew with get_partition_stats, + 2) Increase executor memory (default 32GB -> 64GB), 3) Enable adaptive query + execution"}, {"title": "Memory Limit Override Procedures", "path": "/runbooks/data-platform/memory-overrides.md", + "snippet": "To increase memory: use restart_job with memory_override_gb. + Standard escalation: 32->64->128. Requires approval above 128GB."}]' + type: function_call_output + - content: Now restart that job with 64 GB memory, skip completed stages, high + priority. + role: user + type: message + model: gpt-4o + previous_response_id: resp_0862f67eff09df0d006a3c3883f6108199bf628e4a2c6469dd + store: true + stream: false + tool_choice: auto + tools: + - description: Query the pipeline orchestrator for a job's current state, metadata, + and failure info. + name: get_job_status + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier, e.g. job-382 + type: string + required: + - job_id + type: object + strict: true + type: function + - description: Fetch error logs and stack traces for a failed pipeline job. + name: get_error_logs + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier + type: string + max_entries: + description: Maximum log entries to return (default 50) + type: integer + severity: + description: Minimum severity filter + enum: + - WARN + - ERROR + - FATAL + - ALL + type: string + required: + - job_id + - severity + - max_entries + type: object + strict: true + type: function + - description: Search internal operations runbooks for troubleshooting procedures. + name: search_runbook + parameters: + additionalProperties: false + properties: + max_results: + description: Maximum results to return (default 5) + type: integer + query: + description: Natural language search query + type: string + required: + - query + - max_results + type: object + strict: true + type: function + - description: Execute a Python analysis script in a sandboxed environment for + log parsing or data inspection. + name: run_analysis + parameters: + additionalProperties: false + properties: + code: + description: Python code to execute + type: string + timeout_seconds: + description: Execution timeout (default 30) + type: integer + required: + - code + - timeout_seconds + type: object + strict: true + type: function + - description: Restart a failed pipeline job with optional configuration overrides. + name: restart_job + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier to restart + type: string + memory_override_gb: + description: Override memory limit in GB + type: integer + priority: + description: Queue priority + enum: + - low + - normal + - high + type: string + skip_completed_stages: + description: Skip stages that completed successfully (default true) + type: boolean + required: + - job_id + - memory_override_gb + - skip_completed_stages + - priority + type: object + strict: true + type: function + - description: Search the public web for technical documentation, blog posts, + and known issues. + name: web_search + parameters: + additionalProperties: false + properties: + query: + description: Search query + type: string + required: + - query + type: object + strict: true + type: function + headers: + accept: '*/*' + authorization: Bearer *** + content-type: application/json + user-agent: python-httpx/0.28.1 + method: POST + path: /v1/responses + query_params: {} + response: + body: + background: false + billing: + payer: developer + completed_at: 1782331525 + created_at: 1782331525 + error: null + frequency_penalty: 0.0 + id: resp_0862f67eff09df0d006a3c3884f8cc81998a1d105f46eb9e69 + incomplete_details: null + instructions: null + max_output_tokens: null + max_tool_calls: null + metadata: {} + model: gpt-4o-2024-08-06 + moderation: null + object: response + output: + - arguments: '{"job_id":"job-382","memory_override_gb":64,"skip_completed_stages":true,"priority":"high"}' + call_id: call_JLVwHJ1DWkQvlsvhJqL5DuF5 + id: fc_0862f67eff09df0d006a3c3885bf5081999e5a54327b5f629b + name: restart_job + status: completed + type: function_call + parallel_tool_calls: true + presence_penalty: 0.0 + previous_response_id: resp_0862f67eff09df0d006a3c3883f6108199bf628e4a2c6469dd + prompt_cache_key: null + prompt_cache_retention: in_memory + reasoning: + context: null + effort: null + summary: null + safety_identifier: null + service_tier: default + status: completed + store: true + temperature: 1.0 + text: + format: + type: text + verbosity: medium + tool_choice: auto + tools: + - description: Query the pipeline orchestrator for a job's current state, metadata, + and failure info. + name: get_job_status + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier, e.g. job-382 + type: string + required: + - job_id + type: object + strict: true + type: function + - description: Fetch error logs and stack traces for a failed pipeline job. + name: get_error_logs + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier + type: string + max_entries: + description: Maximum log entries to return (default 50) + type: integer + severity: + description: Minimum severity filter + enum: + - WARN + - ERROR + - FATAL + - ALL + type: string + required: + - job_id + - severity + - max_entries + type: object + strict: true + type: function + - description: Search internal operations runbooks for troubleshooting procedures. + name: search_runbook + parameters: + additionalProperties: false + properties: + max_results: + description: Maximum results to return (default 5) + type: integer + query: + description: Natural language search query + type: string + required: + - query + - max_results + type: object + strict: true + type: function + - description: Execute a Python analysis script in a sandboxed environment for + log parsing or data inspection. + name: run_analysis + parameters: + additionalProperties: false + properties: + code: + description: Python code to execute + type: string + timeout_seconds: + description: Execution timeout (default 30) + type: integer + required: + - code + - timeout_seconds + type: object + strict: true + type: function + - description: Restart a failed pipeline job with optional configuration overrides. + name: restart_job + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier to restart + type: string + memory_override_gb: + description: Override memory limit in GB + type: integer + priority: + description: Queue priority + enum: + - low + - normal + - high + type: string + skip_completed_stages: + description: Skip stages that completed successfully (default true) + type: boolean + required: + - job_id + - memory_override_gb + - skip_completed_stages + - priority + type: object + strict: true + type: function + - description: Search the public web for technical documentation, blog posts, + and known issues. + name: web_search + parameters: + additionalProperties: false + properties: + query: + description: Search query + type: string + required: + - query + type: object + strict: true + type: function + top_logprobs: 0 + top_p: 1.0 + truncation: disabled + usage: + input_tokens: 873 + input_tokens_details: + cached_tokens: 0 + output_tokens: 36 + output_tokens_details: + reasoning_tokens: 0 + total_tokens: 909 + user: null + headers: + content-type: application/json + status_code: 200 diff --git a/crates/agentic-core/tests/cassettes/tool_calls/multi_turn/openai_responses_tool_calls_tool_output_only.yaml b/crates/agentic-core/tests/cassettes/tool_calls/multi_turn/openai_responses_tool_calls_tool_output_only.yaml new file mode 100644 index 0000000..a1d5b80 --- /dev/null +++ b/crates/agentic-core/tests/cassettes/tool_calls/multi_turn/openai_responses_tool_calls_tool_output_only.yaml @@ -0,0 +1,970 @@ +turns: +- filename: t1 + request: + body: + input: You are an SRE assistant. Check the current status of ETL pipeline job-382. + model: gpt-4o + store: true + stream: false + tool_choice: auto + tools: + - description: Query the pipeline orchestrator for a job's current state, metadata, + and failure info. + name: get_job_status + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier, e.g. job-382 + type: string + required: + - job_id + type: object + strict: true + type: function + - description: Fetch error logs and stack traces for a failed pipeline job. + name: get_error_logs + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier + type: string + max_entries: + description: Maximum log entries to return (default 50) + type: integer + severity: + description: Minimum severity filter + enum: + - WARN + - ERROR + - FATAL + - ALL + type: string + required: + - job_id + - severity + - max_entries + type: object + strict: true + type: function + - description: Search internal operations runbooks for troubleshooting procedures. + name: search_runbook + parameters: + additionalProperties: false + properties: + max_results: + description: Maximum results to return (default 5) + type: integer + query: + description: Natural language search query + type: string + required: + - query + - max_results + type: object + strict: true + type: function + - description: Execute a Python analysis script in a sandboxed environment for + log parsing or data inspection. + name: run_analysis + parameters: + additionalProperties: false + properties: + code: + description: Python code to execute + type: string + timeout_seconds: + description: Execution timeout (default 30) + type: integer + required: + - code + - timeout_seconds + type: object + strict: true + type: function + - description: Restart a failed pipeline job with optional configuration overrides. + name: restart_job + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier to restart + type: string + memory_override_gb: + description: Override memory limit in GB + type: integer + priority: + description: Queue priority + enum: + - low + - normal + - high + type: string + skip_completed_stages: + description: Skip stages that completed successfully (default true) + type: boolean + required: + - job_id + - memory_override_gb + - skip_completed_stages + - priority + type: object + strict: true + type: function + - description: Search the public web for technical documentation, blog posts, + and known issues. + name: web_search + parameters: + additionalProperties: false + properties: + query: + description: Search query + type: string + required: + - query + type: object + strict: true + type: function + headers: + accept: '*/*' + authorization: Bearer *** + content-type: application/json + user-agent: python-httpx/0.28.1 + method: POST + path: /v1/responses + query_params: {} + response: + body: + background: false + billing: + payer: developer + completed_at: 1782331535 + created_at: 1782331534 + error: null + frequency_penalty: 0.0 + id: resp_0ec7c4ddd5fc13e2006a3c388edc30819ab696d5b8584fb6c6 + incomplete_details: null + instructions: null + max_output_tokens: null + max_tool_calls: null + metadata: {} + model: gpt-4o-2024-08-06 + moderation: null + object: response + output: + - arguments: '{"job_id":"job-382"}' + call_id: call_xuHSO4xB7M872HdpG65mhXWb + id: fc_0ec7c4ddd5fc13e2006a3c388f7f64819aab560a66e56e0f41 + name: get_job_status + status: completed + type: function_call + parallel_tool_calls: true + presence_penalty: 0.0 + previous_response_id: null + prompt_cache_key: null + prompt_cache_retention: in_memory + reasoning: + context: null + effort: null + summary: null + safety_identifier: null + service_tier: default + status: completed + store: true + temperature: 1.0 + text: + format: + type: text + verbosity: medium + tool_choice: auto + tools: + - description: Query the pipeline orchestrator for a job's current state, metadata, + and failure info. + name: get_job_status + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier, e.g. job-382 + type: string + required: + - job_id + type: object + strict: true + type: function + - description: Fetch error logs and stack traces for a failed pipeline job. + name: get_error_logs + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier + type: string + max_entries: + description: Maximum log entries to return (default 50) + type: integer + severity: + description: Minimum severity filter + enum: + - WARN + - ERROR + - FATAL + - ALL + type: string + required: + - job_id + - severity + - max_entries + type: object + strict: true + type: function + - description: Search internal operations runbooks for troubleshooting procedures. + name: search_runbook + parameters: + additionalProperties: false + properties: + max_results: + description: Maximum results to return (default 5) + type: integer + query: + description: Natural language search query + type: string + required: + - query + - max_results + type: object + strict: true + type: function + - description: Execute a Python analysis script in a sandboxed environment for + log parsing or data inspection. + name: run_analysis + parameters: + additionalProperties: false + properties: + code: + description: Python code to execute + type: string + timeout_seconds: + description: Execution timeout (default 30) + type: integer + required: + - code + - timeout_seconds + type: object + strict: true + type: function + - description: Restart a failed pipeline job with optional configuration overrides. + name: restart_job + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier to restart + type: string + memory_override_gb: + description: Override memory limit in GB + type: integer + priority: + description: Queue priority + enum: + - low + - normal + - high + type: string + skip_completed_stages: + description: Skip stages that completed successfully (default true) + type: boolean + required: + - job_id + - memory_override_gb + - skip_completed_stages + - priority + type: object + strict: true + type: function + - description: Search the public web for technical documentation, blog posts, + and known issues. + name: web_search + parameters: + additionalProperties: false + properties: + query: + description: Search query + type: string + required: + - query + type: object + strict: true + type: function + top_logprobs: 0 + top_p: 1.0 + truncation: disabled + usage: + input_tokens: 355 + input_tokens_details: + cached_tokens: 0 + output_tokens: 19 + output_tokens_details: + reasoning_tokens: 0 + total_tokens: 374 + user: null + headers: + content-type: application/json + status_code: 200 +- filename: t2 + request: + body: + input: + - call_id: call_xuHSO4xB7M872HdpG65mhXWb + output: '{"job_id": "job-382", "status": "failed", "error_type": "OutOfMemoryError", + "failed_at": "2024-01-15T03:22:11Z", "stage": "transform", "memory_used_gb": + 31.2, "memory_limit_gb": 32, "pipeline": "etl-daily-ingest", "owner": "data-platform-team"}' + type: function_call_output + model: gpt-4o + previous_response_id: resp_0ec7c4ddd5fc13e2006a3c388edc30819ab696d5b8584fb6c6 + store: true + stream: false + tool_choice: auto + tools: + - description: Query the pipeline orchestrator for a job's current state, metadata, + and failure info. + name: get_job_status + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier, e.g. job-382 + type: string + required: + - job_id + type: object + strict: true + type: function + - description: Fetch error logs and stack traces for a failed pipeline job. + name: get_error_logs + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier + type: string + max_entries: + description: Maximum log entries to return (default 50) + type: integer + severity: + description: Minimum severity filter + enum: + - WARN + - ERROR + - FATAL + - ALL + type: string + required: + - job_id + - severity + - max_entries + type: object + strict: true + type: function + - description: Search internal operations runbooks for troubleshooting procedures. + name: search_runbook + parameters: + additionalProperties: false + properties: + max_results: + description: Maximum results to return (default 5) + type: integer + query: + description: Natural language search query + type: string + required: + - query + - max_results + type: object + strict: true + type: function + - description: Execute a Python analysis script in a sandboxed environment for + log parsing or data inspection. + name: run_analysis + parameters: + additionalProperties: false + properties: + code: + description: Python code to execute + type: string + timeout_seconds: + description: Execution timeout (default 30) + type: integer + required: + - code + - timeout_seconds + type: object + strict: true + type: function + - description: Restart a failed pipeline job with optional configuration overrides. + name: restart_job + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier to restart + type: string + memory_override_gb: + description: Override memory limit in GB + type: integer + priority: + description: Queue priority + enum: + - low + - normal + - high + type: string + skip_completed_stages: + description: Skip stages that completed successfully (default true) + type: boolean + required: + - job_id + - memory_override_gb + - skip_completed_stages + - priority + type: object + strict: true + type: function + - description: Search the public web for technical documentation, blog posts, + and known issues. + name: web_search + parameters: + additionalProperties: false + properties: + query: + description: Search query + type: string + required: + - query + type: object + strict: true + type: function + headers: + accept: '*/*' + authorization: Bearer *** + content-type: application/json + user-agent: python-httpx/0.28.1 + method: POST + path: /v1/responses + query_params: {} + response: + body: + background: false + billing: + payer: developer + completed_at: 1782331537 + created_at: 1782331535 + error: null + frequency_penalty: 0.0 + id: resp_0ec7c4ddd5fc13e2006a3c388fd0cc819a9407adfbb0c2c208 + incomplete_details: null + instructions: null + max_output_tokens: null + max_tool_calls: null + metadata: {} + model: gpt-4o-2024-08-06 + moderation: null + object: response + output: + - content: + - annotations: [] + logprobs: [] + text: 'The ETL pipeline job `job-382` has failed. Here are the details: + + + - **Status**: Failed + + - **Error Type**: OutOfMemoryError + + - **Failed At**: 2024-01-15T03:22:11Z + + - **Stage**: Transform + + - **Memory Used**: 31.2 GB + + - **Memory Limit**: 32 GB + + - **Pipeline**: etl-daily-ingest + + - **Owner**: Data Platform Team + + + Would you like me to investigate further or take any action?' + type: output_text + id: msg_0ec7c4ddd5fc13e2006a3c38909204819ab534a8798edadde1 + role: assistant + status: completed + type: message + parallel_tool_calls: true + presence_penalty: 0.0 + previous_response_id: resp_0ec7c4ddd5fc13e2006a3c388edc30819ab696d5b8584fb6c6 + prompt_cache_key: null + prompt_cache_retention: in_memory + reasoning: + context: null + effort: null + summary: null + safety_identifier: null + service_tier: default + status: completed + store: true + temperature: 1.0 + text: + format: + type: text + verbosity: medium + tool_choice: auto + tools: + - description: Query the pipeline orchestrator for a job's current state, metadata, + and failure info. + name: get_job_status + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier, e.g. job-382 + type: string + required: + - job_id + type: object + strict: true + type: function + - description: Fetch error logs and stack traces for a failed pipeline job. + name: get_error_logs + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier + type: string + max_entries: + description: Maximum log entries to return (default 50) + type: integer + severity: + description: Minimum severity filter + enum: + - WARN + - ERROR + - FATAL + - ALL + type: string + required: + - job_id + - severity + - max_entries + type: object + strict: true + type: function + - description: Search internal operations runbooks for troubleshooting procedures. + name: search_runbook + parameters: + additionalProperties: false + properties: + max_results: + description: Maximum results to return (default 5) + type: integer + query: + description: Natural language search query + type: string + required: + - query + - max_results + type: object + strict: true + type: function + - description: Execute a Python analysis script in a sandboxed environment for + log parsing or data inspection. + name: run_analysis + parameters: + additionalProperties: false + properties: + code: + description: Python code to execute + type: string + timeout_seconds: + description: Execution timeout (default 30) + type: integer + required: + - code + - timeout_seconds + type: object + strict: true + type: function + - description: Restart a failed pipeline job with optional configuration overrides. + name: restart_job + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier to restart + type: string + memory_override_gb: + description: Override memory limit in GB + type: integer + priority: + description: Queue priority + enum: + - low + - normal + - high + type: string + skip_completed_stages: + description: Skip stages that completed successfully (default true) + type: boolean + required: + - job_id + - memory_override_gb + - skip_completed_stages + - priority + type: object + strict: true + type: function + - description: Search the public web for technical documentation, blog posts, + and known issues. + name: web_search + parameters: + additionalProperties: false + properties: + query: + description: Search query + type: string + required: + - query + type: object + strict: true + type: function + top_logprobs: 0 + top_p: 1.0 + truncation: disabled + usage: + input_tokens: 471 + input_tokens_details: + cached_tokens: 0 + output_tokens: 121 + output_tokens_details: + reasoning_tokens: 0 + total_tokens: 592 + user: null + headers: + content-type: application/json + status_code: 200 +- filename: t3 + request: + body: + input: Great, now search the runbook for how to fix that OOM error. Max 5 results. + model: gpt-4o + previous_response_id: resp_0ec7c4ddd5fc13e2006a3c388fd0cc819a9407adfbb0c2c208 + store: true + stream: false + tool_choice: auto + tools: + - description: Query the pipeline orchestrator for a job's current state, metadata, + and failure info. + name: get_job_status + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier, e.g. job-382 + type: string + required: + - job_id + type: object + strict: true + type: function + - description: Fetch error logs and stack traces for a failed pipeline job. + name: get_error_logs + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier + type: string + max_entries: + description: Maximum log entries to return (default 50) + type: integer + severity: + description: Minimum severity filter + enum: + - WARN + - ERROR + - FATAL + - ALL + type: string + required: + - job_id + - severity + - max_entries + type: object + strict: true + type: function + - description: Search internal operations runbooks for troubleshooting procedures. + name: search_runbook + parameters: + additionalProperties: false + properties: + max_results: + description: Maximum results to return (default 5) + type: integer + query: + description: Natural language search query + type: string + required: + - query + - max_results + type: object + strict: true + type: function + - description: Execute a Python analysis script in a sandboxed environment for + log parsing or data inspection. + name: run_analysis + parameters: + additionalProperties: false + properties: + code: + description: Python code to execute + type: string + timeout_seconds: + description: Execution timeout (default 30) + type: integer + required: + - code + - timeout_seconds + type: object + strict: true + type: function + - description: Restart a failed pipeline job with optional configuration overrides. + name: restart_job + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier to restart + type: string + memory_override_gb: + description: Override memory limit in GB + type: integer + priority: + description: Queue priority + enum: + - low + - normal + - high + type: string + skip_completed_stages: + description: Skip stages that completed successfully (default true) + type: boolean + required: + - job_id + - memory_override_gb + - skip_completed_stages + - priority + type: object + strict: true + type: function + - description: Search the public web for technical documentation, blog posts, + and known issues. + name: web_search + parameters: + additionalProperties: false + properties: + query: + description: Search query + type: string + required: + - query + type: object + strict: true + type: function + headers: + accept: '*/*' + authorization: Bearer *** + content-type: application/json + user-agent: python-httpx/0.28.1 + method: POST + path: /v1/responses + query_params: {} + response: + body: + background: false + billing: + payer: developer + completed_at: 1782331538 + created_at: 1782331537 + error: null + frequency_penalty: 0.0 + id: resp_0ec7c4ddd5fc13e2006a3c3891a958819a8bf49dba0916fef0 + incomplete_details: null + instructions: null + max_output_tokens: null + max_tool_calls: null + metadata: {} + model: gpt-4o-2024-08-06 + moderation: null + object: response + output: + - arguments: '{"query":"OutOfMemoryError ETL pipeline","max_results":5}' + call_id: call_wmlJBIMN1WijA0cAJMjjes0X + id: fc_0ec7c4ddd5fc13e2006a3c38922b24819ab44bfbe155a4ecb2 + name: search_runbook + status: completed + type: function_call + parallel_tool_calls: true + presence_penalty: 0.0 + previous_response_id: resp_0ec7c4ddd5fc13e2006a3c388fd0cc819a9407adfbb0c2c208 + prompt_cache_key: null + prompt_cache_retention: in_memory + reasoning: + context: null + effort: null + summary: null + safety_identifier: null + service_tier: default + status: completed + store: true + temperature: 1.0 + text: + format: + type: text + verbosity: medium + tool_choice: auto + tools: + - description: Query the pipeline orchestrator for a job's current state, metadata, + and failure info. + name: get_job_status + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier, e.g. job-382 + type: string + required: + - job_id + type: object + strict: true + type: function + - description: Fetch error logs and stack traces for a failed pipeline job. + name: get_error_logs + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier + type: string + max_entries: + description: Maximum log entries to return (default 50) + type: integer + severity: + description: Minimum severity filter + enum: + - WARN + - ERROR + - FATAL + - ALL + type: string + required: + - job_id + - severity + - max_entries + type: object + strict: true + type: function + - description: Search internal operations runbooks for troubleshooting procedures. + name: search_runbook + parameters: + additionalProperties: false + properties: + max_results: + description: Maximum results to return (default 5) + type: integer + query: + description: Natural language search query + type: string + required: + - query + - max_results + type: object + strict: true + type: function + - description: Execute a Python analysis script in a sandboxed environment for + log parsing or data inspection. + name: run_analysis + parameters: + additionalProperties: false + properties: + code: + description: Python code to execute + type: string + timeout_seconds: + description: Execution timeout (default 30) + type: integer + required: + - code + - timeout_seconds + type: object + strict: true + type: function + - description: Restart a failed pipeline job with optional configuration overrides. + name: restart_job + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier to restart + type: string + memory_override_gb: + description: Override memory limit in GB + type: integer + priority: + description: Queue priority + enum: + - low + - normal + - high + type: string + skip_completed_stages: + description: Skip stages that completed successfully (default true) + type: boolean + required: + - job_id + - memory_override_gb + - skip_completed_stages + - priority + type: object + strict: true + type: function + - description: Search the public web for technical documentation, blog posts, + and known issues. + name: web_search + parameters: + additionalProperties: false + properties: + query: + description: Search query + type: string + required: + - query + type: object + strict: true + type: function + top_logprobs: 0 + top_p: 1.0 + truncation: disabled + usage: + input_tokens: 619 + input_tokens_details: + cached_tokens: 0 + output_tokens: 27 + output_tokens_details: + reasoning_tokens: 0 + total_tokens: 646 + user: null + headers: + content-type: application/json + status_code: 200 diff --git a/crates/agentic-core/tests/cassettes/tool_calls/multi_turn/pipeline_tool_outputs.json b/crates/agentic-core/tests/cassettes/tool_calls/multi_turn/pipeline_tool_outputs.json new file mode 100644 index 0000000..8c6ec54 --- /dev/null +++ b/crates/agentic-core/tests/cassettes/tool_calls/multi_turn/pipeline_tool_outputs.json @@ -0,0 +1,8 @@ +{ + "get_job_status": "{\"job_id\": \"job-382\", \"status\": \"failed\", \"error_type\": \"OutOfMemoryError\", \"failed_at\": \"2024-01-15T03:22:11Z\", \"stage\": \"transform\", \"memory_used_gb\": 31.2, \"memory_limit_gb\": 32, \"pipeline\": \"etl-daily-ingest\", \"owner\": \"data-platform-team\"}", + "get_error_logs": "[{\"timestamp\": \"2024-01-15T03:22:09Z\", \"severity\": \"ERROR\", \"message\": \"OutOfMemoryError at stage 'transform': heap space exhausted (31.2GB/32GB)\", \"stack_trace\": \"java.lang.OutOfMemoryError: Java heap space\\n at org.apache.spark.memory.TaskMemoryManager.allocatePage\"}, {\"timestamp\": \"2024-01-15T03:22:10Z\", \"severity\": \"ERROR\", \"message\": \"Task failed: transform-partition-47 OOM after processing 2.1M rows\", \"stack_trace\": \"org.apache.spark.SparkException: Task failed while writing rows\"}, {\"timestamp\": \"2024-01-15T03:22:11Z\", \"severity\": \"ERROR\", \"message\": \"Job job-382 marked FAILED after 3 consecutive OOM errors in transform stage\", \"stack_trace\": null}]", + "search_runbook": "[{\"title\": \"OOM Troubleshooting for ETL Pipelines\", \"path\": \"/runbooks/data-platform/oom-etl.md\", \"snippet\": \"When transform stage OOMs: 1) Check partition skew with get_partition_stats, 2) Increase executor memory (default 32GB -> 64GB), 3) Enable adaptive query execution\"}, {\"title\": \"Memory Limit Override Procedures\", \"path\": \"/runbooks/data-platform/memory-overrides.md\", \"snippet\": \"To increase memory: use restart_job with memory_override_gb. Standard escalation: 32->64->128. Requires approval above 128GB.\"}]", + "run_analysis": "{\"stdout\": \"Failed stage: transform, count: 2\\nPattern: consistent OOM at partition-47 (2.1M rows)\\nRecommendation: increase memory to 64GB or repartition\", \"stderr\": \"\", \"exit_code\": 0, \"duration_ms\": 120}", + "restart_job": "{\"job_id\": \"job-382\", \"new_run_id\": \"run-8847\", \"status\": \"queued\", \"memory_gb\": 64, \"priority\": \"high\", \"skipped_stages\": [\"extract\", \"validate\"], \"estimated_start\": \"2024-01-15T03:30:00Z\"}", + "web_search": "[{\"title\": \"Spark OOM in ETL: Root Causes and Fixes - Databricks Blog\", \"url\": \"https://docs.databricks.com/en/optimizations/oom.html\", \"snippet\": \"Common causes: data skew in shuffle partitions, broadcast join threshold too high, insufficient executor memory. Fix: spark.sql.adaptive.enabled=true + increase spark.executor.memory\"}, {\"title\": \"Troubleshooting OutOfMemoryError in Apache Spark\", \"url\": \"https://spark.apache.org/docs/latest/tuning.html#memory-tuning\", \"snippet\": \"Increase spark.executor.memory or spark.driver.memory. For shuffle-heavy workloads, also increase spark.sql.shuffle.partitions.\"}]" +} diff --git a/crates/agentic-core/tests/cassettes/tool_calls/multi_turn/responses_tool_calls_3turn.yaml b/crates/agentic-core/tests/cassettes/tool_calls/multi_turn/responses_tool_calls_3turn.yaml index 1c9ac53..ef33c6c 100644 --- a/crates/agentic-core/tests/cassettes/tool_calls/multi_turn/responses_tool_calls_3turn.yaml +++ b/crates/agentic-core/tests/cassettes/tool_calls/multi_turn/responses_tool_calls_3turn.yaml @@ -136,9 +136,9 @@ turns: response: body: background: false - created_at: 1782285886 + created_at: 1782331360 frequency_penalty: 0.0 - id: resp_aed8c5ff2dbee0fe + id: resp_99ff5424f2627e43 incomplete_details: null input_messages: null instructions: null @@ -150,16 +150,16 @@ turns: object: response output: - content: - - text: We need to query status. + - text: We need to call get_job_status. type: reasoning_text encrypted_content: null - id: rs_bb29cc3827967d2c + id: rs_901adb851069f6c7 status: null summary: [] type: reasoning - arguments: '{"job_id":"job-382"}' - call_id: call_ac5da1db6eed3a34 - id: fc_ac5da1db6eed3a34 + call_id: call_af81636bdbc9dec5 + id: fc_af81636bdbc9dec5 name: get_job_status namespace: null status: null @@ -306,20 +306,20 @@ turns: usage: input_tokens: 436 input_tokens_details: - cached_tokens: 416 + cached_tokens: 432 cached_tokens_per_turn: - - 416 + - 432 input_tokens_per_turn: - 436 - output_tokens: 34 + output_tokens: 36 output_tokens_details: output_tokens_per_turn: - - 34 - reasoning_tokens: 16 + - 36 + reasoning_tokens: 18 tool_output_tokens: 0 tool_output_tokens_per_turn: - 0 - total_tokens: 470 + total_tokens: 472 user: null headers: content-type: application/json @@ -327,9 +327,18 @@ turns: - filename: t2 request: body: - input: Now pull the error logs for that job. Use severity ERROR and max 10 entries. + input: + - call_id: call_af81636bdbc9dec5 + output: '{"job_id": "job-382", "status": "failed", "error_type": "OutOfMemoryError", + "failed_at": "2024-01-15T03:22:11Z", "stage": "transform", "memory_used_gb": + 31.2, "memory_limit_gb": 32, "pipeline": "etl-daily-ingest", "owner": "data-platform-team"}' + type: function_call_output + - content: Now pull the error logs for that job. Use severity ERROR and max + 10 entries. + role: user + type: message model: openai/gpt-oss-20b - previous_response_id: resp_aed8c5ff2dbee0fe + previous_response_id: resp_99ff5424f2627e43 store: true stream: false tool_choice: auto @@ -462,30 +471,30 @@ turns: response: body: background: false - created_at: 1782285887 + created_at: 1782331361 frequency_penalty: 0.0 - id: resp_b23d3955c1c74ce6 + id: resp_b44e3326e81a597c incomplete_details: null input_messages: null instructions: null kv_transfer_params: null - max_output_tokens: 3602 + max_output_tokens: 3499 max_tool_calls: null metadata: null model: openai/gpt-oss-20b object: response output: - content: - - text: Need error logs. + - text: Call get_error_logs. type: reasoning_text encrypted_content: null - id: rs_b1ad7d4fe3886bad + id: rs_8f36d456b69a137a status: null summary: [] type: reasoning - arguments: '{"job_id":"job-382","severity":"ERROR","max_entries":10}' - call_id: call_8467ac7d52d0ea0b - id: fc_8467ac7d52d0ea0b + call_id: call_961a299b68e2f0d9 + id: fc_961a299b68e2f0d9 name: get_error_logs namespace: null status: null @@ -493,7 +502,7 @@ turns: output_messages: null parallel_tool_calls: true presence_penalty: 0.0 - previous_response_id: resp_aed8c5ff2dbee0fe + previous_response_id: resp_99ff5424f2627e43 prompt: null reasoning: null service_tier: auto @@ -630,22 +639,22 @@ turns: top_p: 1.0 truncation: disabled usage: - input_tokens: 494 + input_tokens: 597 input_tokens_details: - cached_tokens: 448 + cached_tokens: 464 cached_tokens_per_turn: - - 448 + - 464 input_tokens_per_turn: - - 494 - output_tokens: 41 + - 597 + output_tokens: 42 output_tokens_details: output_tokens_per_turn: - - 41 - reasoning_tokens: 23 + - 42 + reasoning_tokens: 24 tool_output_tokens: 0 tool_output_tokens_per_turn: - 0 - total_tokens: 535 + total_tokens: 639 user: null headers: content-type: application/json @@ -653,10 +662,24 @@ turns: - filename: t3 request: body: - input: Based on those errors, search the runbook for troubleshooting procedures. - Max 5 results. + input: + - call_id: call_961a299b68e2f0d9 + output: '[{"timestamp": "2024-01-15T03:22:09Z", "severity": "ERROR", "message": + "OutOfMemoryError at stage ''transform'': heap space exhausted (31.2GB/32GB)", + "stack_trace": "java.lang.OutOfMemoryError: Java heap space\n at org.apache.spark.memory.TaskMemoryManager.allocatePage"}, + {"timestamp": "2024-01-15T03:22:10Z", "severity": "ERROR", "message": "Task + failed: transform-partition-47 OOM after processing 2.1M rows", "stack_trace": + "org.apache.spark.SparkException: Task failed while writing rows"}, {"timestamp": + "2024-01-15T03:22:11Z", "severity": "ERROR", "message": "Job job-382 marked + FAILED after 3 consecutive OOM errors in transform stage", "stack_trace": + null}]' + type: function_call_output + - content: Based on those errors, search the runbook for troubleshooting procedures. + Max 5 results. + role: user + type: message model: openai/gpt-oss-20b - previous_response_id: resp_b23d3955c1c74ce6 + previous_response_id: resp_b44e3326e81a597c store: true stream: false tool_choice: auto @@ -789,30 +812,30 @@ turns: response: body: background: false - created_at: 1782285887 + created_at: 1782331361 frequency_penalty: 0.0 - id: resp_b48bd4930aecdef6 + id: resp_8dfe29723818b937 incomplete_details: null input_messages: null instructions: null kv_transfer_params: null - max_output_tokens: 3537 + max_output_tokens: 3226 max_tool_calls: null metadata: null model: openai/gpt-oss-20b object: response output: - content: - - text: We need to search runbook. + - text: Use search_runbook. type: reasoning_text encrypted_content: null - id: rs_a76eedf3b8dff018 + id: rs_8682e2c6b269a60c status: null summary: [] type: reasoning - - arguments: '{"query":"ETL pipeline job-382 error handling", "max_results":5}' - call_id: call_9a6c34c7b699ef2a - id: fc_9a6c34c7b699ef2a + - arguments: '{"query":"OutOfMemoryError transform stage ETL daily ingest troubleshoot","max_results":5}' + call_id: call_847a146847337019 + id: fc_847a146847337019 name: search_runbook namespace: null status: null @@ -820,7 +843,7 @@ turns: output_messages: null parallel_tool_calls: true presence_penalty: 0.0 - previous_response_id: resp_b23d3955c1c74ce6 + previous_response_id: resp_b44e3326e81a597c prompt: null reasoning: null service_tier: auto @@ -957,13 +980,13 @@ turns: top_p: 1.0 truncation: disabled usage: - input_tokens: 559 + input_tokens: 870 input_tokens_details: - cached_tokens: 496 + cached_tokens: 608 cached_tokens_per_turn: - - 496 + - 608 input_tokens_per_turn: - - 559 + - 870 output_tokens: 45 output_tokens_details: output_tokens_per_turn: @@ -972,7 +995,7 @@ turns: tool_output_tokens: 0 tool_output_tokens_per_turn: - 0 - total_tokens: 604 + total_tokens: 915 user: null headers: content-type: application/json diff --git a/crates/agentic-core/tests/cassettes/tool_calls/multi_turn/responses_tool_calls_3turn_streaming.yaml b/crates/agentic-core/tests/cassettes/tool_calls/multi_turn/responses_tool_calls_3turn_streaming.yaml index 6045dab..f428f78 100644 --- a/crates/agentic-core/tests/cassettes/tool_calls/multi_turn/responses_tool_calls_3turn_streaming.yaml +++ b/crates/agentic-core/tests/cassettes/tool_calls/multi_turn/responses_tool_calls_3turn_streaming.yaml @@ -140,7 +140,7 @@ turns: - 'event: response.created ' - - 'data: {"response":{"id":"resp_84fc22eb02e37767","created_at":1782285919,"incomplete_details":null,"instructions":null,"metadata":null,"model":"openai/gpt-oss-20b","object":"response","output":[],"parallel_tool_calls":true,"temperature":1.0,"tool_choice":"auto","tools":[{"name":"get_job_status","parameters":{"type":"object","properties":{"job_id":{"type":"string","description":"Pipeline + - 'data: {"response":{"id":"resp_ac3c1315a91f31a2","created_at":1782331382,"incomplete_details":null,"instructions":null,"metadata":null,"model":"openai/gpt-oss-20b","object":"response","output":[],"parallel_tool_calls":true,"temperature":1.0,"tool_choice":"auto","tools":[{"name":"get_job_status","parameters":{"type":"object","properties":{"job_id":{"type":"string","description":"Pipeline job identifier, e.g. job-382"}},"required":["job_id"],"additionalProperties":false},"strict":true,"type":"function","defer_loading":null,"description":"Query the pipeline orchestrator for a job''s current state, metadata, and failure info."},{"name":"get_error_logs","parameters":{"type":"object","properties":{"job_id":{"type":"string","description":"Pipeline @@ -170,7 +170,7 @@ turns: - 'event: response.in_progress ' - - 'data: {"response":{"id":"resp_84fc22eb02e37767","created_at":1782285919,"incomplete_details":null,"instructions":null,"metadata":null,"model":"openai/gpt-oss-20b","object":"response","output":[],"parallel_tool_calls":true,"temperature":1.0,"tool_choice":"auto","tools":[{"name":"get_job_status","parameters":{"type":"object","properties":{"job_id":{"type":"string","description":"Pipeline + - 'data: {"response":{"id":"resp_ac3c1315a91f31a2","created_at":1782331382,"incomplete_details":null,"instructions":null,"metadata":null,"model":"openai/gpt-oss-20b","object":"response","output":[],"parallel_tool_calls":true,"temperature":1.0,"tool_choice":"auto","tools":[{"name":"get_job_status","parameters":{"type":"object","properties":{"job_id":{"type":"string","description":"Pipeline job identifier, e.g. job-382"}},"required":["job_id"],"additionalProperties":false},"strict":true,"type":"function","defer_loading":null,"description":"Query the pipeline orchestrator for a job''s current state, metadata, and failure info."},{"name":"get_error_logs","parameters":{"type":"object","properties":{"job_id":{"type":"string","description":"Pipeline @@ -200,7 +200,7 @@ turns: - 'event: response.output_item.added ' - - 'data: {"item":{"id":"msg_9f8f443f3d36c276","summary":[],"type":"reasoning","content":null,"encrypted_content":null,"status":"in_progress"},"output_index":0,"sequence_number":2,"type":"response.output_item.added"} + - 'data: {"item":{"id":"msg_b13a0755fc859b6a","summary":[],"type":"reasoning","content":null,"encrypted_content":null,"status":"in_progress"},"output_index":0,"sequence_number":2,"type":"response.output_item.added"} ' - ' @@ -209,7 +209,7 @@ turns: - 'event: response.reasoning_part.added ' - - 'data: {"content_index":0,"item_id":"msg_9f8f443f3d36c276","output_index":0,"part":{"text":"","type":"reasoning_text"},"sequence_number":3,"type":"response.reasoning_part.added"} + - 'data: {"content_index":0,"item_id":"msg_b13a0755fc859b6a","output_index":0,"part":{"text":"","type":"reasoning_text"},"sequence_number":3,"type":"response.reasoning_part.added"} ' - ' @@ -218,7 +218,7 @@ turns: - 'event: response.reasoning_text.delta ' - - 'data: {"content_index":0,"delta":"We","item_id":"msg_9f8f443f3d36c276","output_index":0,"sequence_number":4,"type":"response.reasoning_text.delta"} + - 'data: {"content_index":0,"delta":"We","item_id":"msg_b13a0755fc859b6a","output_index":0,"sequence_number":4,"type":"response.reasoning_text.delta"} ' - ' @@ -227,7 +227,7 @@ turns: - 'event: response.reasoning_text.delta ' - - 'data: {"content_index":0,"delta":" need","item_id":"msg_9f8f443f3d36c276","output_index":0,"sequence_number":5,"type":"response.reasoning_text.delta"} + - 'data: {"content_index":0,"delta":" need","item_id":"msg_b13a0755fc859b6a","output_index":0,"sequence_number":5,"type":"response.reasoning_text.delta"} ' - ' @@ -236,7 +236,7 @@ turns: - 'event: response.reasoning_text.delta ' - - 'data: {"content_index":0,"delta":" to","item_id":"msg_9f8f443f3d36c276","output_index":0,"sequence_number":6,"type":"response.reasoning_text.delta"} + - 'data: {"content_index":0,"delta":" to","item_id":"msg_b13a0755fc859b6a","output_index":0,"sequence_number":6,"type":"response.reasoning_text.delta"} ' - ' @@ -245,7 +245,7 @@ turns: - 'event: response.reasoning_text.delta ' - - 'data: {"content_index":0,"delta":" query","item_id":"msg_9f8f443f3d36c276","output_index":0,"sequence_number":7,"type":"response.reasoning_text.delta"} + - 'data: {"content_index":0,"delta":" call","item_id":"msg_b13a0755fc859b6a","output_index":0,"sequence_number":7,"type":"response.reasoning_text.delta"} ' - ' @@ -254,7 +254,7 @@ turns: - 'event: response.reasoning_text.delta ' - - 'data: {"content_index":0,"delta":" pipeline","item_id":"msg_9f8f443f3d36c276","output_index":0,"sequence_number":8,"type":"response.reasoning_text.delta"} + - 'data: {"content_index":0,"delta":" get","item_id":"msg_b13a0755fc859b6a","output_index":0,"sequence_number":8,"type":"response.reasoning_text.delta"} ' - ' @@ -263,7 +263,7 @@ turns: - 'event: response.reasoning_text.delta ' - - 'data: {"content_index":0,"delta":" orchestr","item_id":"msg_9f8f443f3d36c276","output_index":0,"sequence_number":9,"type":"response.reasoning_text.delta"} + - 'data: {"content_index":0,"delta":"_job","item_id":"msg_b13a0755fc859b6a","output_index":0,"sequence_number":9,"type":"response.reasoning_text.delta"} ' - ' @@ -272,7 +272,7 @@ turns: - 'event: response.reasoning_text.delta ' - - 'data: {"content_index":0,"delta":"ator","item_id":"msg_9f8f443f3d36c276","output_index":0,"sequence_number":10,"type":"response.reasoning_text.delta"} + - 'data: {"content_index":0,"delta":"_status","item_id":"msg_b13a0755fc859b6a","output_index":0,"sequence_number":10,"type":"response.reasoning_text.delta"} ' - ' @@ -281,7 +281,7 @@ turns: - 'event: response.reasoning_text.delta ' - - 'data: {"content_index":0,"delta":" for","item_id":"msg_9f8f443f3d36c276","output_index":0,"sequence_number":11,"type":"response.reasoning_text.delta"} + - 'data: {"content_index":0,"delta":" with","item_id":"msg_b13a0755fc859b6a","output_index":0,"sequence_number":11,"type":"response.reasoning_text.delta"} ' - ' @@ -290,7 +290,7 @@ turns: - 'event: response.reasoning_text.delta ' - - 'data: {"content_index":0,"delta":" job","item_id":"msg_9f8f443f3d36c276","output_index":0,"sequence_number":12,"type":"response.reasoning_text.delta"} + - 'data: {"content_index":0,"delta":" job","item_id":"msg_b13a0755fc859b6a","output_index":0,"sequence_number":12,"type":"response.reasoning_text.delta"} ' - ' @@ -299,7 +299,7 @@ turns: - 'event: response.reasoning_text.delta ' - - 'data: {"content_index":0,"delta":"-","item_id":"msg_9f8f443f3d36c276","output_index":0,"sequence_number":13,"type":"response.reasoning_text.delta"} + - 'data: {"content_index":0,"delta":"_id","item_id":"msg_b13a0755fc859b6a","output_index":0,"sequence_number":13,"type":"response.reasoning_text.delta"} ' - ' @@ -308,7 +308,7 @@ turns: - 'event: response.reasoning_text.delta ' - - 'data: {"content_index":0,"delta":"382","item_id":"msg_9f8f443f3d36c276","output_index":0,"sequence_number":14,"type":"response.reasoning_text.delta"} + - 'data: {"content_index":0,"delta":":","item_id":"msg_b13a0755fc859b6a","output_index":0,"sequence_number":14,"type":"response.reasoning_text.delta"} ' - ' @@ -317,7 +317,7 @@ turns: - 'event: response.reasoning_text.delta ' - - 'data: {"content_index":0,"delta":".","item_id":"msg_9f8f443f3d36c276","output_index":0,"sequence_number":15,"type":"response.reasoning_text.delta"} + - 'data: {"content_index":0,"delta":" \"","item_id":"msg_b13a0755fc859b6a","output_index":0,"sequence_number":15,"type":"response.reasoning_text.delta"} ' - ' @@ -326,7 +326,7 @@ turns: - 'event: response.reasoning_text.delta ' - - 'data: {"content_index":0,"delta":" Use","item_id":"msg_9f8f443f3d36c276","output_index":0,"sequence_number":16,"type":"response.reasoning_text.delta"} + - 'data: {"content_index":0,"delta":"job","item_id":"msg_b13a0755fc859b6a","output_index":0,"sequence_number":16,"type":"response.reasoning_text.delta"} ' - ' @@ -335,7 +335,7 @@ turns: - 'event: response.reasoning_text.delta ' - - 'data: {"content_index":0,"delta":" functions","item_id":"msg_9f8f443f3d36c276","output_index":0,"sequence_number":17,"type":"response.reasoning_text.delta"} + - 'data: {"content_index":0,"delta":"-","item_id":"msg_b13a0755fc859b6a","output_index":0,"sequence_number":17,"type":"response.reasoning_text.delta"} ' - ' @@ -344,7 +344,7 @@ turns: - 'event: response.reasoning_text.delta ' - - 'data: {"content_index":0,"delta":".get","item_id":"msg_9f8f443f3d36c276","output_index":0,"sequence_number":18,"type":"response.reasoning_text.delta"} + - 'data: {"content_index":0,"delta":"382","item_id":"msg_b13a0755fc859b6a","output_index":0,"sequence_number":18,"type":"response.reasoning_text.delta"} ' - ' @@ -353,25 +353,7 @@ turns: - 'event: response.reasoning_text.delta ' - - 'data: {"content_index":0,"delta":"_job","item_id":"msg_9f8f443f3d36c276","output_index":0,"sequence_number":19,"type":"response.reasoning_text.delta"} - - ' - - ' - - ' - - 'event: response.reasoning_text.delta - - ' - - 'data: {"content_index":0,"delta":"_status","item_id":"msg_9f8f443f3d36c276","output_index":0,"sequence_number":20,"type":"response.reasoning_text.delta"} - - ' - - ' - - ' - - 'event: response.reasoning_text.delta - - ' - - 'data: {"content_index":0,"delta":".","item_id":"msg_9f8f443f3d36c276","output_index":0,"sequence_number":21,"type":"response.reasoning_text.delta"} + - 'data: {"content_index":0,"delta":"\".","item_id":"msg_b13a0755fc859b6a","output_index":0,"sequence_number":19,"type":"response.reasoning_text.delta"} ' - ' @@ -380,8 +362,8 @@ turns: - 'event: response.reasoning_text.done ' - - 'data: {"content_index":0,"item_id":"msg_9f8f443f3d36c276","output_index":0,"sequence_number":22,"text":"We - need to query pipeline orchestrator for job-382. Use functions.get_job_status.","type":"response.reasoning_text.done"} + - 'data: {"content_index":0,"item_id":"msg_b13a0755fc859b6a","output_index":0,"sequence_number":20,"text":"We + need to call get_job_status with job_id: \"job-382\".","type":"response.reasoning_text.done"} ' - ' @@ -390,8 +372,8 @@ turns: - 'event: response.reasoning_part.done ' - - 'data: {"content_index":0,"item_id":"msg_9f8f443f3d36c276","output_index":0,"part":{"text":"We - need to query pipeline orchestrator for job-382. Use functions.get_job_status.","type":"reasoning_text"},"sequence_number":23,"type":"response.reasoning_part.done"} + - 'data: {"content_index":0,"item_id":"msg_b13a0755fc859b6a","output_index":0,"part":{"text":"We + need to call get_job_status with job_id: \"job-382\".","type":"reasoning_text"},"sequence_number":21,"type":"response.reasoning_part.done"} ' - ' @@ -400,8 +382,8 @@ turns: - 'event: response.output_item.done ' - - 'data: {"item":{"id":"msg_9f8f443f3d36c276","summary":[],"type":"reasoning","content":[{"text":"We - need to query pipeline orchestrator for job-382. Use functions.get_job_status.","type":"reasoning_text"}],"encrypted_content":null,"status":"completed"},"output_index":0,"sequence_number":24,"type":"response.output_item.done"} + - 'data: {"item":{"id":"msg_b13a0755fc859b6a","summary":[],"type":"reasoning","content":[{"text":"We + need to call get_job_status with job_id: \"job-382\".","type":"reasoning_text"}],"encrypted_content":null,"status":"completed"},"output_index":0,"sequence_number":22,"type":"response.output_item.done"} ' - ' @@ -410,7 +392,7 @@ turns: - 'event: response.output_item.added ' - - 'data: {"item":{"arguments":"","call_id":"call_bfdbcf3562684b32","name":"get_job_status","type":"function_call","id":"fc_a7e910a365eca0be","namespace":null,"status":"in_progress"},"output_index":1,"sequence_number":25,"type":"response.output_item.added"} + - 'data: {"item":{"arguments":"","call_id":"call_be4f9dfe0052d170","name":"get_job_status","type":"function_call","id":"fc_820d82db029349a4","namespace":null,"status":"in_progress"},"output_index":1,"sequence_number":23,"type":"response.output_item.added"} ' - ' @@ -419,7 +401,7 @@ turns: - 'event: response.function_call_arguments.delta ' - - 'data: {"delta":"{\"","item_id":"fc_a7e910a365eca0be","output_index":1,"sequence_number":26,"type":"response.function_call_arguments.delta"} + - 'data: {"delta":"{\"","item_id":"fc_820d82db029349a4","output_index":1,"sequence_number":24,"type":"response.function_call_arguments.delta"} ' - ' @@ -428,7 +410,7 @@ turns: - 'event: response.function_call_arguments.delta ' - - 'data: {"delta":"job","item_id":"fc_a7e910a365eca0be","output_index":1,"sequence_number":27,"type":"response.function_call_arguments.delta"} + - 'data: {"delta":"job","item_id":"fc_820d82db029349a4","output_index":1,"sequence_number":25,"type":"response.function_call_arguments.delta"} ' - ' @@ -437,7 +419,7 @@ turns: - 'event: response.function_call_arguments.delta ' - - 'data: {"delta":"_id","item_id":"fc_a7e910a365eca0be","output_index":1,"sequence_number":28,"type":"response.function_call_arguments.delta"} + - 'data: {"delta":"_id","item_id":"fc_820d82db029349a4","output_index":1,"sequence_number":26,"type":"response.function_call_arguments.delta"} ' - ' @@ -446,7 +428,7 @@ turns: - 'event: response.function_call_arguments.delta ' - - 'data: {"delta":"\":\"","item_id":"fc_a7e910a365eca0be","output_index":1,"sequence_number":29,"type":"response.function_call_arguments.delta"} + - 'data: {"delta":"\":\"","item_id":"fc_820d82db029349a4","output_index":1,"sequence_number":27,"type":"response.function_call_arguments.delta"} ' - ' @@ -455,7 +437,7 @@ turns: - 'event: response.function_call_arguments.delta ' - - 'data: {"delta":"job","item_id":"fc_a7e910a365eca0be","output_index":1,"sequence_number":30,"type":"response.function_call_arguments.delta"} + - 'data: {"delta":"job","item_id":"fc_820d82db029349a4","output_index":1,"sequence_number":28,"type":"response.function_call_arguments.delta"} ' - ' @@ -464,7 +446,7 @@ turns: - 'event: response.function_call_arguments.delta ' - - 'data: {"delta":"-","item_id":"fc_a7e910a365eca0be","output_index":1,"sequence_number":31,"type":"response.function_call_arguments.delta"} + - 'data: {"delta":"-","item_id":"fc_820d82db029349a4","output_index":1,"sequence_number":29,"type":"response.function_call_arguments.delta"} ' - ' @@ -473,7 +455,7 @@ turns: - 'event: response.function_call_arguments.delta ' - - 'data: {"delta":"382","item_id":"fc_a7e910a365eca0be","output_index":1,"sequence_number":32,"type":"response.function_call_arguments.delta"} + - 'data: {"delta":"382","item_id":"fc_820d82db029349a4","output_index":1,"sequence_number":30,"type":"response.function_call_arguments.delta"} ' - ' @@ -482,7 +464,7 @@ turns: - 'event: response.function_call_arguments.delta ' - - 'data: {"delta":"\"}","item_id":"fc_a7e910a365eca0be","output_index":1,"sequence_number":33,"type":"response.function_call_arguments.delta"} + - 'data: {"delta":"\"}","item_id":"fc_820d82db029349a4","output_index":1,"sequence_number":31,"type":"response.function_call_arguments.delta"} ' - ' @@ -491,7 +473,7 @@ turns: - 'event: response.function_call_arguments.done ' - - 'data: {"arguments":"{\"job_id\":\"job-382\"}","item_id":"fc_a7e910a365eca0be","name":"get_job_status","output_index":1,"sequence_number":34,"type":"response.function_call_arguments.done"} + - 'data: {"arguments":"{\"job_id\":\"job-382\"}","item_id":"fc_820d82db029349a4","name":"get_job_status","output_index":1,"sequence_number":32,"type":"response.function_call_arguments.done"} ' - ' @@ -500,7 +482,7 @@ turns: - 'event: response.output_item.done ' - - 'data: {"item":{"arguments":"{\"job_id\":\"job-382\"}","call_id":"call_bfdbcf3562684b32","name":"get_job_status","type":"function_call","id":null,"namespace":null,"status":"completed","item_id":"fc_a7e910a365eca0be","output_index":1,"sequence_number":-1},"output_index":1,"sequence_number":35,"type":"response.output_item.done"} + - 'data: {"item":{"arguments":"{\"job_id\":\"job-382\"}","call_id":"call_be4f9dfe0052d170","name":"get_job_status","type":"function_call","id":null,"namespace":null,"status":"completed","item_id":"fc_820d82db029349a4","output_index":1,"sequence_number":-1},"output_index":1,"sequence_number":33,"type":"response.output_item.done"} ' - ' @@ -509,8 +491,8 @@ turns: - 'event: response.completed ' - - 'data: {"response":{"id":"resp_84fc22eb02e37767","created_at":1782285919,"incomplete_details":null,"instructions":null,"metadata":null,"model":"openai/gpt-oss-20b","object":"response","output":[{"id":"rs_b20f9cf797c0fb6d","summary":[],"type":"reasoning","content":[{"text":"We - need to query pipeline orchestrator for job-382. Use functions.get_job_status.","type":"reasoning_text"}],"encrypted_content":null,"status":null},{"arguments":"{\"job_id\":\"job-382\"}","call_id":"call_814e8f02d1cc07ae","name":"get_job_status","type":"function_call","id":"fc_814e8f02d1cc07ae","namespace":null,"status":null}],"parallel_tool_calls":true,"temperature":1.0,"tool_choice":"auto","tools":[{"name":"get_job_status","parameters":{"type":"object","properties":{"job_id":{"type":"string","description":"Pipeline + - 'data: {"response":{"id":"resp_ac3c1315a91f31a2","created_at":1782331382,"incomplete_details":null,"instructions":null,"metadata":null,"model":"openai/gpt-oss-20b","object":"response","output":[{"id":"rs_b661a3bbd2228d16","summary":[],"type":"reasoning","content":[{"text":"We + need to call get_job_status with job_id: \"job-382\".","type":"reasoning_text"}],"encrypted_content":null,"status":null},{"arguments":"{\"job_id\":\"job-382\"}","call_id":"call_8ce70f6987366936","name":"get_job_status","type":"function_call","id":"fc_8ce70f6987366936","namespace":null,"status":null}],"parallel_tool_calls":true,"temperature":1.0,"tool_choice":"auto","tools":[{"name":"get_job_status","parameters":{"type":"object","properties":{"job_id":{"type":"string","description":"Pipeline job identifier, e.g. job-382"}},"required":["job_id"],"additionalProperties":false},"strict":true,"type":"function","defer_loading":null,"description":"Query the pipeline orchestrator for a job''s current state, metadata, and failure info."},{"name":"get_error_logs","parameters":{"type":"object","properties":{"job_id":{"type":"string","description":"Pipeline @@ -531,7 +513,7 @@ turns: priority"}},"required":["job_id","memory_override_gb","skip_completed_stages","priority"],"additionalProperties":false},"strict":true,"type":"function","defer_loading":null,"description":"Restart a failed pipeline job with optional configuration overrides."},{"name":"web_search","parameters":{"type":"object","properties":{"query":{"type":"string","description":"Search query"}},"required":["query"],"additionalProperties":false},"strict":true,"type":"function","defer_loading":null,"description":"Search - the public web for technical documentation, blog posts, and known issues."}],"top_p":1.0,"background":false,"max_output_tokens":3663,"max_tool_calls":null,"previous_response_id":null,"prompt":null,"reasoning":null,"service_tier":"auto","status":"completed","text":null,"top_logprobs":null,"truncation":"disabled","usage":{"input_tokens":433,"input_tokens_details":{"cached_tokens":432,"input_tokens_per_turn":[433],"cached_tokens_per_turn":[432]},"output_tokens":46,"output_tokens_details":{"reasoning_tokens":28,"tool_output_tokens":0,"output_tokens_per_turn":[46],"tool_output_tokens_per_turn":[0]},"total_tokens":479},"user":null,"presence_penalty":0.0,"frequency_penalty":0.0,"kv_transfer_params":null,"input_messages":null,"output_messages":null},"sequence_number":36,"type":"response.completed"} + the public web for technical documentation, blog posts, and known issues."}],"top_p":1.0,"background":false,"max_output_tokens":3663,"max_tool_calls":null,"previous_response_id":null,"prompt":null,"reasoning":null,"service_tier":"auto","status":"completed","text":null,"top_logprobs":null,"truncation":"disabled","usage":{"input_tokens":433,"input_tokens_details":{"cached_tokens":432,"input_tokens_per_turn":[433],"cached_tokens_per_turn":[432]},"output_tokens":44,"output_tokens_details":{"reasoning_tokens":26,"tool_output_tokens":0,"output_tokens_per_turn":[44],"tool_output_tokens_per_turn":[0]},"total_tokens":477},"user":null,"presence_penalty":0.0,"frequency_penalty":0.0,"kv_transfer_params":null,"input_messages":null,"output_messages":null},"sequence_number":34,"type":"response.completed"} ' - ' @@ -541,9 +523,17 @@ turns: - filename: t2 request: body: - input: Get the error logs for that job with severity FATAL and max 5 entries. + input: + - call_id: call_8ce70f6987366936 + output: '{"job_id": "job-382", "status": "failed", "error_type": "OutOfMemoryError", + "failed_at": "2024-01-15T03:22:11Z", "stage": "transform", "memory_used_gb": + 31.2, "memory_limit_gb": 32, "pipeline": "etl-daily-ingest", "owner": "data-platform-team"}' + type: function_call_output + - content: Get the error logs for that job with severity FATAL and max 5 entries. + role: user + type: message model: openai/gpt-oss-20b - previous_response_id: resp_84fc22eb02e37767 + previous_response_id: resp_ac3c1315a91f31a2 store: true stream: true tool_choice: auto @@ -680,7 +670,7 @@ turns: - 'event: response.created ' - - 'data: {"response":{"id":"resp_bc7651309b00838d","created_at":1782285919,"incomplete_details":null,"instructions":null,"metadata":null,"model":"openai/gpt-oss-20b","object":"response","output":[],"parallel_tool_calls":true,"temperature":1.0,"tool_choice":"auto","tools":[{"name":"get_job_status","parameters":{"type":"object","properties":{"job_id":{"type":"string","description":"Pipeline + - 'data: {"response":{"id":"resp_8f993b44ca8e52e3","created_at":1782331383,"incomplete_details":null,"instructions":null,"metadata":null,"model":"openai/gpt-oss-20b","object":"response","output":[],"parallel_tool_calls":true,"temperature":1.0,"tool_choice":"auto","tools":[{"name":"get_job_status","parameters":{"type":"object","properties":{"job_id":{"type":"string","description":"Pipeline job identifier, e.g. job-382"}},"required":["job_id"],"additionalProperties":false},"strict":true,"type":"function","defer_loading":null,"description":"Query the pipeline orchestrator for a job''s current state, metadata, and failure info."},{"name":"get_error_logs","parameters":{"type":"object","properties":{"job_id":{"type":"string","description":"Pipeline @@ -701,7 +691,7 @@ turns: priority"}},"required":["job_id","memory_override_gb","skip_completed_stages","priority"],"additionalProperties":false},"strict":true,"type":"function","defer_loading":null,"description":"Restart a failed pipeline job with optional configuration overrides."},{"name":"web_search","parameters":{"type":"object","properties":{"query":{"type":"string","description":"Search query"}},"required":["query"],"additionalProperties":false},"strict":true,"type":"function","defer_loading":null,"description":"Search - the public web for technical documentation, blog posts, and known issues."}],"top_p":1.0,"background":false,"max_output_tokens":3594,"max_tool_calls":null,"previous_response_id":"resp_84fc22eb02e37767","prompt":null,"reasoning":null,"service_tier":"auto","status":"in_progress","text":null,"top_logprobs":null,"truncation":"disabled","usage":null,"user":null,"presence_penalty":0.0,"frequency_penalty":0.0,"kv_transfer_params":null,"input_messages":null,"output_messages":null},"sequence_number":0,"type":"response.created"} + the public web for technical documentation, blog posts, and known issues."}],"top_p":1.0,"background":false,"max_output_tokens":3495,"max_tool_calls":null,"previous_response_id":"resp_ac3c1315a91f31a2","prompt":null,"reasoning":null,"service_tier":"auto","status":"in_progress","text":null,"top_logprobs":null,"truncation":"disabled","usage":null,"user":null,"presence_penalty":0.0,"frequency_penalty":0.0,"kv_transfer_params":null,"input_messages":null,"output_messages":null},"sequence_number":0,"type":"response.created"} ' - ' @@ -710,7 +700,7 @@ turns: - 'event: response.in_progress ' - - 'data: {"response":{"id":"resp_bc7651309b00838d","created_at":1782285919,"incomplete_details":null,"instructions":null,"metadata":null,"model":"openai/gpt-oss-20b","object":"response","output":[],"parallel_tool_calls":true,"temperature":1.0,"tool_choice":"auto","tools":[{"name":"get_job_status","parameters":{"type":"object","properties":{"job_id":{"type":"string","description":"Pipeline + - 'data: {"response":{"id":"resp_8f993b44ca8e52e3","created_at":1782331383,"incomplete_details":null,"instructions":null,"metadata":null,"model":"openai/gpt-oss-20b","object":"response","output":[],"parallel_tool_calls":true,"temperature":1.0,"tool_choice":"auto","tools":[{"name":"get_job_status","parameters":{"type":"object","properties":{"job_id":{"type":"string","description":"Pipeline job identifier, e.g. job-382"}},"required":["job_id"],"additionalProperties":false},"strict":true,"type":"function","defer_loading":null,"description":"Query the pipeline orchestrator for a job''s current state, metadata, and failure info."},{"name":"get_error_logs","parameters":{"type":"object","properties":{"job_id":{"type":"string","description":"Pipeline @@ -731,7 +721,7 @@ turns: priority"}},"required":["job_id","memory_override_gb","skip_completed_stages","priority"],"additionalProperties":false},"strict":true,"type":"function","defer_loading":null,"description":"Restart a failed pipeline job with optional configuration overrides."},{"name":"web_search","parameters":{"type":"object","properties":{"query":{"type":"string","description":"Search query"}},"required":["query"],"additionalProperties":false},"strict":true,"type":"function","defer_loading":null,"description":"Search - the public web for technical documentation, blog posts, and known issues."}],"top_p":1.0,"background":false,"max_output_tokens":3594,"max_tool_calls":null,"previous_response_id":"resp_84fc22eb02e37767","prompt":null,"reasoning":null,"service_tier":"auto","status":"in_progress","text":null,"top_logprobs":null,"truncation":"disabled","usage":null,"user":null,"presence_penalty":0.0,"frequency_penalty":0.0,"kv_transfer_params":null,"input_messages":null,"output_messages":null},"sequence_number":1,"type":"response.in_progress"} + the public web for technical documentation, blog posts, and known issues."}],"top_p":1.0,"background":false,"max_output_tokens":3495,"max_tool_calls":null,"previous_response_id":"resp_ac3c1315a91f31a2","prompt":null,"reasoning":null,"service_tier":"auto","status":"in_progress","text":null,"top_logprobs":null,"truncation":"disabled","usage":null,"user":null,"presence_penalty":0.0,"frequency_penalty":0.0,"kv_transfer_params":null,"input_messages":null,"output_messages":null},"sequence_number":1,"type":"response.in_progress"} ' - ' @@ -740,7 +730,7 @@ turns: - 'event: response.output_item.added ' - - 'data: {"item":{"id":"msg_a4d66d1800b6b304","summary":[],"type":"reasoning","content":null,"encrypted_content":null,"status":"in_progress"},"output_index":0,"sequence_number":2,"type":"response.output_item.added"} + - 'data: {"item":{"id":"msg_85bbe0941f1ebf9d","summary":[],"type":"reasoning","content":null,"encrypted_content":null,"status":"in_progress"},"output_index":0,"sequence_number":2,"type":"response.output_item.added"} ' - ' @@ -749,7 +739,16 @@ turns: - 'event: response.reasoning_part.added ' - - 'data: {"content_index":0,"item_id":"msg_a4d66d1800b6b304","output_index":0,"part":{"text":"","type":"reasoning_text"},"sequence_number":3,"type":"response.reasoning_part.added"} + - 'data: {"content_index":0,"item_id":"msg_85bbe0941f1ebf9d","output_index":0,"part":{"text":"","type":"reasoning_text"},"sequence_number":3,"type":"response.reasoning_part.added"} + + ' + - ' + + ' + - 'event: response.reasoning_text.delta + + ' + - 'data: {"content_index":0,"delta":"Need","item_id":"msg_85bbe0941f1ebf9d","output_index":0,"sequence_number":4,"type":"response.reasoning_text.delta"} ' - ' @@ -758,7 +757,7 @@ turns: - 'event: response.reasoning_text.delta ' - - 'data: {"content_index":0,"delta":"Call","item_id":"msg_a4d66d1800b6b304","output_index":0,"sequence_number":4,"type":"response.reasoning_text.delta"} + - 'data: {"content_index":0,"delta":" get","item_id":"msg_85bbe0941f1ebf9d","output_index":0,"sequence_number":5,"type":"response.reasoning_text.delta"} ' - ' @@ -767,7 +766,7 @@ turns: - 'event: response.reasoning_text.delta ' - - 'data: {"content_index":0,"delta":" get","item_id":"msg_a4d66d1800b6b304","output_index":0,"sequence_number":5,"type":"response.reasoning_text.delta"} + - 'data: {"content_index":0,"delta":"_error","item_id":"msg_85bbe0941f1ebf9d","output_index":0,"sequence_number":6,"type":"response.reasoning_text.delta"} ' - ' @@ -776,7 +775,7 @@ turns: - 'event: response.reasoning_text.delta ' - - 'data: {"content_index":0,"delta":"_error","item_id":"msg_a4d66d1800b6b304","output_index":0,"sequence_number":6,"type":"response.reasoning_text.delta"} + - 'data: {"content_index":0,"delta":"_logs","item_id":"msg_85bbe0941f1ebf9d","output_index":0,"sequence_number":7,"type":"response.reasoning_text.delta"} ' - ' @@ -785,7 +784,7 @@ turns: - 'event: response.reasoning_text.delta ' - - 'data: {"content_index":0,"delta":"_logs","item_id":"msg_a4d66d1800b6b304","output_index":0,"sequence_number":7,"type":"response.reasoning_text.delta"} + - 'data: {"content_index":0,"delta":" function","item_id":"msg_85bbe0941f1ebf9d","output_index":0,"sequence_number":8,"type":"response.reasoning_text.delta"} ' - ' @@ -794,7 +793,7 @@ turns: - 'event: response.reasoning_text.delta ' - - 'data: {"content_index":0,"delta":".","item_id":"msg_a4d66d1800b6b304","output_index":0,"sequence_number":8,"type":"response.reasoning_text.delta"} + - 'data: {"content_index":0,"delta":".","item_id":"msg_85bbe0941f1ebf9d","output_index":0,"sequence_number":9,"type":"response.reasoning_text.delta"} ' - ' @@ -803,8 +802,8 @@ turns: - 'event: response.reasoning_text.done ' - - 'data: {"content_index":0,"item_id":"msg_a4d66d1800b6b304","output_index":0,"sequence_number":9,"text":"Call - get_error_logs.","type":"response.reasoning_text.done"} + - 'data: {"content_index":0,"item_id":"msg_85bbe0941f1ebf9d","output_index":0,"sequence_number":10,"text":"Need + get_error_logs function.","type":"response.reasoning_text.done"} ' - ' @@ -813,8 +812,8 @@ turns: - 'event: response.reasoning_part.done ' - - 'data: {"content_index":0,"item_id":"msg_a4d66d1800b6b304","output_index":0,"part":{"text":"Call - get_error_logs.","type":"reasoning_text"},"sequence_number":10,"type":"response.reasoning_part.done"} + - 'data: {"content_index":0,"item_id":"msg_85bbe0941f1ebf9d","output_index":0,"part":{"text":"Need + get_error_logs function.","type":"reasoning_text"},"sequence_number":11,"type":"response.reasoning_part.done"} ' - ' @@ -823,8 +822,8 @@ turns: - 'event: response.output_item.done ' - - 'data: {"item":{"id":"msg_a4d66d1800b6b304","summary":[],"type":"reasoning","content":[{"text":"Call - get_error_logs.","type":"reasoning_text"}],"encrypted_content":null,"status":"completed"},"output_index":0,"sequence_number":11,"type":"response.output_item.done"} + - 'data: {"item":{"id":"msg_85bbe0941f1ebf9d","summary":[],"type":"reasoning","content":[{"text":"Need + get_error_logs function.","type":"reasoning_text"}],"encrypted_content":null,"status":"completed"},"output_index":0,"sequence_number":12,"type":"response.output_item.done"} ' - ' @@ -833,7 +832,7 @@ turns: - 'event: response.output_item.added ' - - 'data: {"item":{"arguments":"","call_id":"call_a7b9ccec2aceeeac","name":"get_error_logs","type":"function_call","id":"fc_bd0b870ba5f84b7f","namespace":null,"status":"in_progress"},"output_index":1,"sequence_number":12,"type":"response.output_item.added"} + - 'data: {"item":{"arguments":"","call_id":"call_a12afd5ebc0e5edd","name":"get_error_logs","type":"function_call","id":"fc_b5165a0ca873fb55","namespace":null,"status":"in_progress"},"output_index":1,"sequence_number":13,"type":"response.output_item.added"} ' - ' @@ -842,7 +841,7 @@ turns: - 'event: response.function_call_arguments.delta ' - - 'data: {"delta":"{\"","item_id":"fc_bd0b870ba5f84b7f","output_index":1,"sequence_number":13,"type":"response.function_call_arguments.delta"} + - 'data: {"delta":"{\"","item_id":"fc_b5165a0ca873fb55","output_index":1,"sequence_number":14,"type":"response.function_call_arguments.delta"} ' - ' @@ -851,7 +850,7 @@ turns: - 'event: response.function_call_arguments.delta ' - - 'data: {"delta":"job","item_id":"fc_bd0b870ba5f84b7f","output_index":1,"sequence_number":14,"type":"response.function_call_arguments.delta"} + - 'data: {"delta":"job","item_id":"fc_b5165a0ca873fb55","output_index":1,"sequence_number":15,"type":"response.function_call_arguments.delta"} ' - ' @@ -860,7 +859,7 @@ turns: - 'event: response.function_call_arguments.delta ' - - 'data: {"delta":"_id","item_id":"fc_bd0b870ba5f84b7f","output_index":1,"sequence_number":15,"type":"response.function_call_arguments.delta"} + - 'data: {"delta":"_id","item_id":"fc_b5165a0ca873fb55","output_index":1,"sequence_number":16,"type":"response.function_call_arguments.delta"} ' - ' @@ -869,7 +868,7 @@ turns: - 'event: response.function_call_arguments.delta ' - - 'data: {"delta":"\":\"","item_id":"fc_bd0b870ba5f84b7f","output_index":1,"sequence_number":16,"type":"response.function_call_arguments.delta"} + - 'data: {"delta":"\":\"","item_id":"fc_b5165a0ca873fb55","output_index":1,"sequence_number":17,"type":"response.function_call_arguments.delta"} ' - ' @@ -878,7 +877,7 @@ turns: - 'event: response.function_call_arguments.delta ' - - 'data: {"delta":"job","item_id":"fc_bd0b870ba5f84b7f","output_index":1,"sequence_number":17,"type":"response.function_call_arguments.delta"} + - 'data: {"delta":"job","item_id":"fc_b5165a0ca873fb55","output_index":1,"sequence_number":18,"type":"response.function_call_arguments.delta"} ' - ' @@ -887,7 +886,7 @@ turns: - 'event: response.function_call_arguments.delta ' - - 'data: {"delta":"-","item_id":"fc_bd0b870ba5f84b7f","output_index":1,"sequence_number":18,"type":"response.function_call_arguments.delta"} + - 'data: {"delta":"-","item_id":"fc_b5165a0ca873fb55","output_index":1,"sequence_number":19,"type":"response.function_call_arguments.delta"} ' - ' @@ -896,7 +895,7 @@ turns: - 'event: response.function_call_arguments.delta ' - - 'data: {"delta":"382","item_id":"fc_bd0b870ba5f84b7f","output_index":1,"sequence_number":19,"type":"response.function_call_arguments.delta"} + - 'data: {"delta":"382","item_id":"fc_b5165a0ca873fb55","output_index":1,"sequence_number":20,"type":"response.function_call_arguments.delta"} ' - ' @@ -905,7 +904,7 @@ turns: - 'event: response.function_call_arguments.delta ' - - 'data: {"delta":"\",\"","item_id":"fc_bd0b870ba5f84b7f","output_index":1,"sequence_number":20,"type":"response.function_call_arguments.delta"} + - 'data: {"delta":"\",\"","item_id":"fc_b5165a0ca873fb55","output_index":1,"sequence_number":21,"type":"response.function_call_arguments.delta"} ' - ' @@ -914,7 +913,7 @@ turns: - 'event: response.function_call_arguments.delta ' - - 'data: {"delta":"severity","item_id":"fc_bd0b870ba5f84b7f","output_index":1,"sequence_number":21,"type":"response.function_call_arguments.delta"} + - 'data: {"delta":"severity","item_id":"fc_b5165a0ca873fb55","output_index":1,"sequence_number":22,"type":"response.function_call_arguments.delta"} ' - ' @@ -923,7 +922,7 @@ turns: - 'event: response.function_call_arguments.delta ' - - 'data: {"delta":"\":\"","item_id":"fc_bd0b870ba5f84b7f","output_index":1,"sequence_number":22,"type":"response.function_call_arguments.delta"} + - 'data: {"delta":"\":\"","item_id":"fc_b5165a0ca873fb55","output_index":1,"sequence_number":23,"type":"response.function_call_arguments.delta"} ' - ' @@ -932,7 +931,7 @@ turns: - 'event: response.function_call_arguments.delta ' - - 'data: {"delta":"F","item_id":"fc_bd0b870ba5f84b7f","output_index":1,"sequence_number":23,"type":"response.function_call_arguments.delta"} + - 'data: {"delta":"F","item_id":"fc_b5165a0ca873fb55","output_index":1,"sequence_number":24,"type":"response.function_call_arguments.delta"} ' - ' @@ -941,7 +940,7 @@ turns: - 'event: response.function_call_arguments.delta ' - - 'data: {"delta":"ATAL","item_id":"fc_bd0b870ba5f84b7f","output_index":1,"sequence_number":24,"type":"response.function_call_arguments.delta"} + - 'data: {"delta":"ATAL","item_id":"fc_b5165a0ca873fb55","output_index":1,"sequence_number":25,"type":"response.function_call_arguments.delta"} ' - ' @@ -950,7 +949,7 @@ turns: - 'event: response.function_call_arguments.delta ' - - 'data: {"delta":"\",\"","item_id":"fc_bd0b870ba5f84b7f","output_index":1,"sequence_number":25,"type":"response.function_call_arguments.delta"} + - 'data: {"delta":"\",\"","item_id":"fc_b5165a0ca873fb55","output_index":1,"sequence_number":26,"type":"response.function_call_arguments.delta"} ' - ' @@ -959,7 +958,7 @@ turns: - 'event: response.function_call_arguments.delta ' - - 'data: {"delta":"max","item_id":"fc_bd0b870ba5f84b7f","output_index":1,"sequence_number":26,"type":"response.function_call_arguments.delta"} + - 'data: {"delta":"max","item_id":"fc_b5165a0ca873fb55","output_index":1,"sequence_number":27,"type":"response.function_call_arguments.delta"} ' - ' @@ -968,7 +967,7 @@ turns: - 'event: response.function_call_arguments.delta ' - - 'data: {"delta":"_entries","item_id":"fc_bd0b870ba5f84b7f","output_index":1,"sequence_number":27,"type":"response.function_call_arguments.delta"} + - 'data: {"delta":"_entries","item_id":"fc_b5165a0ca873fb55","output_index":1,"sequence_number":28,"type":"response.function_call_arguments.delta"} ' - ' @@ -977,7 +976,7 @@ turns: - 'event: response.function_call_arguments.delta ' - - 'data: {"delta":"\":","item_id":"fc_bd0b870ba5f84b7f","output_index":1,"sequence_number":28,"type":"response.function_call_arguments.delta"} + - 'data: {"delta":"\":","item_id":"fc_b5165a0ca873fb55","output_index":1,"sequence_number":29,"type":"response.function_call_arguments.delta"} ' - ' @@ -986,7 +985,7 @@ turns: - 'event: response.function_call_arguments.delta ' - - 'data: {"delta":"5","item_id":"fc_bd0b870ba5f84b7f","output_index":1,"sequence_number":29,"type":"response.function_call_arguments.delta"} + - 'data: {"delta":"5","item_id":"fc_b5165a0ca873fb55","output_index":1,"sequence_number":30,"type":"response.function_call_arguments.delta"} ' - ' @@ -995,7 +994,7 @@ turns: - 'event: response.function_call_arguments.delta ' - - 'data: {"delta":"}","item_id":"fc_bd0b870ba5f84b7f","output_index":1,"sequence_number":30,"type":"response.function_call_arguments.delta"} + - 'data: {"delta":"}","item_id":"fc_b5165a0ca873fb55","output_index":1,"sequence_number":31,"type":"response.function_call_arguments.delta"} ' - ' @@ -1004,7 +1003,7 @@ turns: - 'event: response.function_call_arguments.done ' - - 'data: {"arguments":"{\"job_id\":\"job-382\",\"severity\":\"FATAL\",\"max_entries\":5}","item_id":"fc_bd0b870ba5f84b7f","name":"get_error_logs","output_index":1,"sequence_number":31,"type":"response.function_call_arguments.done"} + - 'data: {"arguments":"{\"job_id\":\"job-382\",\"severity\":\"FATAL\",\"max_entries\":5}","item_id":"fc_b5165a0ca873fb55","name":"get_error_logs","output_index":1,"sequence_number":32,"type":"response.function_call_arguments.done"} ' - ' @@ -1013,7 +1012,7 @@ turns: - 'event: response.output_item.done ' - - 'data: {"item":{"arguments":"{\"job_id\":\"job-382\",\"severity\":\"FATAL\",\"max_entries\":5}","call_id":"call_a7b9ccec2aceeeac","name":"get_error_logs","type":"function_call","id":null,"namespace":null,"status":"completed","item_id":"fc_bd0b870ba5f84b7f","output_index":1,"sequence_number":-1},"output_index":1,"sequence_number":32,"type":"response.output_item.done"} + - 'data: {"item":{"arguments":"{\"job_id\":\"job-382\",\"severity\":\"FATAL\",\"max_entries\":5}","call_id":"call_a12afd5ebc0e5edd","name":"get_error_logs","type":"function_call","id":null,"namespace":null,"status":"completed","item_id":"fc_b5165a0ca873fb55","output_index":1,"sequence_number":-1},"output_index":1,"sequence_number":33,"type":"response.output_item.done"} ' - ' @@ -1022,8 +1021,8 @@ turns: - 'event: response.completed ' - - 'data: {"response":{"id":"resp_bc7651309b00838d","created_at":1782285919,"incomplete_details":null,"instructions":null,"metadata":null,"model":"openai/gpt-oss-20b","object":"response","output":[{"id":"rs_811e7867685d357b","summary":[],"type":"reasoning","content":[{"text":"Call - get_error_logs.","type":"reasoning_text"}],"encrypted_content":null,"status":null},{"arguments":"{\"job_id\":\"job-382\",\"severity\":\"FATAL\",\"max_entries\":5}","call_id":"call_b7430f4eccc1208e","name":"get_error_logs","type":"function_call","id":"fc_b7430f4eccc1208e","namespace":null,"status":null}],"parallel_tool_calls":true,"temperature":1.0,"tool_choice":"auto","tools":[{"name":"get_job_status","parameters":{"type":"object","properties":{"job_id":{"type":"string","description":"Pipeline + - 'data: {"response":{"id":"resp_8f993b44ca8e52e3","created_at":1782331383,"incomplete_details":null,"instructions":null,"metadata":null,"model":"openai/gpt-oss-20b","object":"response","output":[{"id":"rs_ac77fd030765f30a","summary":[],"type":"reasoning","content":[{"text":"Need + get_error_logs function.","type":"reasoning_text"}],"encrypted_content":null,"status":null},{"arguments":"{\"job_id\":\"job-382\",\"severity\":\"FATAL\",\"max_entries\":5}","call_id":"call_82bf682ac0599fd9","name":"get_error_logs","type":"function_call","id":"fc_82bf682ac0599fd9","namespace":null,"status":null}],"parallel_tool_calls":true,"temperature":1.0,"tool_choice":"auto","tools":[{"name":"get_job_status","parameters":{"type":"object","properties":{"job_id":{"type":"string","description":"Pipeline job identifier, e.g. job-382"}},"required":["job_id"],"additionalProperties":false},"strict":true,"type":"function","defer_loading":null,"description":"Query the pipeline orchestrator for a job''s current state, metadata, and failure info."},{"name":"get_error_logs","parameters":{"type":"object","properties":{"job_id":{"type":"string","description":"Pipeline @@ -1044,7 +1043,7 @@ turns: priority"}},"required":["job_id","memory_override_gb","skip_completed_stages","priority"],"additionalProperties":false},"strict":true,"type":"function","defer_loading":null,"description":"Restart a failed pipeline job with optional configuration overrides."},{"name":"web_search","parameters":{"type":"object","properties":{"query":{"type":"string","description":"Search query"}},"required":["query"],"additionalProperties":false},"strict":true,"type":"function","defer_loading":null,"description":"Search - the public web for technical documentation, blog posts, and known issues."}],"top_p":1.0,"background":false,"max_output_tokens":3594,"max_tool_calls":null,"previous_response_id":"resp_84fc22eb02e37767","prompt":null,"reasoning":null,"service_tier":"auto","status":"completed","text":null,"top_logprobs":null,"truncation":"disabled","usage":{"input_tokens":502,"input_tokens_details":{"cached_tokens":448,"input_tokens_per_turn":[502],"cached_tokens_per_turn":[448]},"output_tokens":43,"output_tokens_details":{"reasoning_tokens":25,"tool_output_tokens":0,"output_tokens_per_turn":[43],"tool_output_tokens_per_turn":[0]},"total_tokens":545},"user":null,"presence_penalty":0.0,"frequency_penalty":0.0,"kv_transfer_params":null,"input_messages":null,"output_messages":null},"sequence_number":33,"type":"response.completed"} + the public web for technical documentation, blog posts, and known issues."}],"top_p":1.0,"background":false,"max_output_tokens":3495,"max_tool_calls":null,"previous_response_id":"resp_ac3c1315a91f31a2","prompt":null,"reasoning":null,"service_tier":"auto","status":"completed","text":null,"top_logprobs":null,"truncation":"disabled","usage":{"input_tokens":601,"input_tokens_details":{"cached_tokens":448,"input_tokens_per_turn":[601],"cached_tokens_per_turn":[448]},"output_tokens":44,"output_tokens_details":{"reasoning_tokens":26,"tool_output_tokens":0,"output_tokens_per_turn":[44],"tool_output_tokens_per_turn":[0]},"total_tokens":645},"user":null,"presence_penalty":0.0,"frequency_penalty":0.0,"kv_transfer_params":null,"input_messages":null,"output_messages":null},"sequence_number":34,"type":"response.completed"} ' - ' @@ -1054,9 +1053,23 @@ turns: - filename: t3 request: body: - input: Search the web for how to fix that type of error in Spark pipelines. + input: + - call_id: call_82bf682ac0599fd9 + output: '[{"timestamp": "2024-01-15T03:22:09Z", "severity": "ERROR", "message": + "OutOfMemoryError at stage ''transform'': heap space exhausted (31.2GB/32GB)", + "stack_trace": "java.lang.OutOfMemoryError: Java heap space\n at org.apache.spark.memory.TaskMemoryManager.allocatePage"}, + {"timestamp": "2024-01-15T03:22:10Z", "severity": "ERROR", "message": "Task + failed: transform-partition-47 OOM after processing 2.1M rows", "stack_trace": + "org.apache.spark.SparkException: Task failed while writing rows"}, {"timestamp": + "2024-01-15T03:22:11Z", "severity": "ERROR", "message": "Job job-382 marked + FAILED after 3 consecutive OOM errors in transform stage", "stack_trace": + null}]' + type: function_call_output + - content: Search the web for how to fix that type of error in Spark pipelines. + role: user + type: message model: openai/gpt-oss-20b - previous_response_id: resp_bc7651309b00838d + previous_response_id: resp_8f993b44ca8e52e3 store: true stream: true tool_choice: auto @@ -1193,7 +1206,7 @@ turns: - 'event: response.created ' - - 'data: {"response":{"id":"resp_94ce25968dc3b4f4","created_at":1782285920,"incomplete_details":null,"instructions":null,"metadata":null,"model":"openai/gpt-oss-20b","object":"response","output":[],"parallel_tool_calls":true,"temperature":1.0,"tool_choice":"auto","tools":[{"name":"get_job_status","parameters":{"type":"object","properties":{"job_id":{"type":"string","description":"Pipeline + - 'data: {"response":{"id":"resp_8d9428f89b5bbe23","created_at":1782331383,"incomplete_details":null,"instructions":null,"metadata":null,"model":"openai/gpt-oss-20b","object":"response","output":[],"parallel_tool_calls":true,"temperature":1.0,"tool_choice":"auto","tools":[{"name":"get_job_status","parameters":{"type":"object","properties":{"job_id":{"type":"string","description":"Pipeline job identifier, e.g. job-382"}},"required":["job_id"],"additionalProperties":false},"strict":true,"type":"function","defer_loading":null,"description":"Query the pipeline orchestrator for a job''s current state, metadata, and failure info."},{"name":"get_error_logs","parameters":{"type":"object","properties":{"job_id":{"type":"string","description":"Pipeline @@ -1214,7 +1227,7 @@ turns: priority"}},"required":["job_id","memory_override_gb","skip_completed_stages","priority"],"additionalProperties":false},"strict":true,"type":"function","defer_loading":null,"description":"Restart a failed pipeline job with optional configuration overrides."},{"name":"web_search","parameters":{"type":"object","properties":{"query":{"type":"string","description":"Search query"}},"required":["query"],"additionalProperties":false},"strict":true,"type":"function","defer_loading":null,"description":"Search - the public web for technical documentation, blog posts, and known issues."}],"top_p":1.0,"background":false,"max_output_tokens":3530,"max_tool_calls":null,"previous_response_id":"resp_bc7651309b00838d","prompt":null,"reasoning":null,"service_tier":"auto","status":"in_progress","text":null,"top_logprobs":null,"truncation":"disabled","usage":null,"user":null,"presence_penalty":0.0,"frequency_penalty":0.0,"kv_transfer_params":null,"input_messages":null,"output_messages":null},"sequence_number":0,"type":"response.created"} + the public web for technical documentation, blog posts, and known issues."}],"top_p":1.0,"background":false,"max_output_tokens":3223,"max_tool_calls":null,"previous_response_id":"resp_8f993b44ca8e52e3","prompt":null,"reasoning":null,"service_tier":"auto","status":"in_progress","text":null,"top_logprobs":null,"truncation":"disabled","usage":null,"user":null,"presence_penalty":0.0,"frequency_penalty":0.0,"kv_transfer_params":null,"input_messages":null,"output_messages":null},"sequence_number":0,"type":"response.created"} ' - ' @@ -1223,7 +1236,7 @@ turns: - 'event: response.in_progress ' - - 'data: {"response":{"id":"resp_94ce25968dc3b4f4","created_at":1782285920,"incomplete_details":null,"instructions":null,"metadata":null,"model":"openai/gpt-oss-20b","object":"response","output":[],"parallel_tool_calls":true,"temperature":1.0,"tool_choice":"auto","tools":[{"name":"get_job_status","parameters":{"type":"object","properties":{"job_id":{"type":"string","description":"Pipeline + - 'data: {"response":{"id":"resp_8d9428f89b5bbe23","created_at":1782331383,"incomplete_details":null,"instructions":null,"metadata":null,"model":"openai/gpt-oss-20b","object":"response","output":[],"parallel_tool_calls":true,"temperature":1.0,"tool_choice":"auto","tools":[{"name":"get_job_status","parameters":{"type":"object","properties":{"job_id":{"type":"string","description":"Pipeline job identifier, e.g. job-382"}},"required":["job_id"],"additionalProperties":false},"strict":true,"type":"function","defer_loading":null,"description":"Query the pipeline orchestrator for a job''s current state, metadata, and failure info."},{"name":"get_error_logs","parameters":{"type":"object","properties":{"job_id":{"type":"string","description":"Pipeline @@ -1244,7 +1257,7 @@ turns: priority"}},"required":["job_id","memory_override_gb","skip_completed_stages","priority"],"additionalProperties":false},"strict":true,"type":"function","defer_loading":null,"description":"Restart a failed pipeline job with optional configuration overrides."},{"name":"web_search","parameters":{"type":"object","properties":{"query":{"type":"string","description":"Search query"}},"required":["query"],"additionalProperties":false},"strict":true,"type":"function","defer_loading":null,"description":"Search - the public web for technical documentation, blog posts, and known issues."}],"top_p":1.0,"background":false,"max_output_tokens":3530,"max_tool_calls":null,"previous_response_id":"resp_bc7651309b00838d","prompt":null,"reasoning":null,"service_tier":"auto","status":"in_progress","text":null,"top_logprobs":null,"truncation":"disabled","usage":null,"user":null,"presence_penalty":0.0,"frequency_penalty":0.0,"kv_transfer_params":null,"input_messages":null,"output_messages":null},"sequence_number":1,"type":"response.in_progress"} + the public web for technical documentation, blog posts, and known issues."}],"top_p":1.0,"background":false,"max_output_tokens":3223,"max_tool_calls":null,"previous_response_id":"resp_8f993b44ca8e52e3","prompt":null,"reasoning":null,"service_tier":"auto","status":"in_progress","text":null,"top_logprobs":null,"truncation":"disabled","usage":null,"user":null,"presence_penalty":0.0,"frequency_penalty":0.0,"kv_transfer_params":null,"input_messages":null,"output_messages":null},"sequence_number":1,"type":"response.in_progress"} ' - ' @@ -1253,7 +1266,7 @@ turns: - 'event: response.output_item.added ' - - 'data: {"item":{"id":"msg_b211473eff0066b4","summary":[],"type":"reasoning","content":null,"encrypted_content":null,"status":"in_progress"},"output_index":0,"sequence_number":2,"type":"response.output_item.added"} + - 'data: {"item":{"id":"msg_b5d30a9e826b60fb","summary":[],"type":"reasoning","content":null,"encrypted_content":null,"status":"in_progress"},"output_index":0,"sequence_number":2,"type":"response.output_item.added"} ' - ' @@ -1262,7 +1275,7 @@ turns: - 'event: response.reasoning_part.added ' - - 'data: {"content_index":0,"item_id":"msg_b211473eff0066b4","output_index":0,"part":{"text":"","type":"reasoning_text"},"sequence_number":3,"type":"response.reasoning_part.added"} + - 'data: {"content_index":0,"item_id":"msg_b5d30a9e826b60fb","output_index":0,"part":{"text":"","type":"reasoning_text"},"sequence_number":3,"type":"response.reasoning_part.added"} ' - ' @@ -1271,7 +1284,7 @@ turns: - 'event: response.reasoning_text.delta ' - - 'data: {"content_index":0,"delta":"Use","item_id":"msg_b211473eff0066b4","output_index":0,"sequence_number":4,"type":"response.reasoning_text.delta"} + - 'data: {"content_index":0,"delta":"Using","item_id":"msg_b5d30a9e826b60fb","output_index":0,"sequence_number":4,"type":"response.reasoning_text.delta"} ' - ' @@ -1280,7 +1293,7 @@ turns: - 'event: response.reasoning_text.delta ' - - 'data: {"content_index":0,"delta":" web","item_id":"msg_b211473eff0066b4","output_index":0,"sequence_number":5,"type":"response.reasoning_text.delta"} + - 'data: {"content_index":0,"delta":" web","item_id":"msg_b5d30a9e826b60fb","output_index":0,"sequence_number":5,"type":"response.reasoning_text.delta"} ' - ' @@ -1289,7 +1302,7 @@ turns: - 'event: response.reasoning_text.delta ' - - 'data: {"content_index":0,"delta":"_search","item_id":"msg_b211473eff0066b4","output_index":0,"sequence_number":6,"type":"response.reasoning_text.delta"} + - 'data: {"content_index":0,"delta":"_search","item_id":"msg_b5d30a9e826b60fb","output_index":0,"sequence_number":6,"type":"response.reasoning_text.delta"} ' - ' @@ -1298,7 +1311,7 @@ turns: - 'event: response.reasoning_text.delta ' - - 'data: {"content_index":0,"delta":".","item_id":"msg_b211473eff0066b4","output_index":0,"sequence_number":7,"type":"response.reasoning_text.delta"} + - 'data: {"content_index":0,"delta":".","item_id":"msg_b5d30a9e826b60fb","output_index":0,"sequence_number":7,"type":"response.reasoning_text.delta"} ' - ' @@ -1307,7 +1320,7 @@ turns: - 'event: response.reasoning_text.done ' - - 'data: {"content_index":0,"item_id":"msg_b211473eff0066b4","output_index":0,"sequence_number":8,"text":"Use + - 'data: {"content_index":0,"item_id":"msg_b5d30a9e826b60fb","output_index":0,"sequence_number":8,"text":"Using web_search.","type":"response.reasoning_text.done"} ' @@ -1317,7 +1330,7 @@ turns: - 'event: response.reasoning_part.done ' - - 'data: {"content_index":0,"item_id":"msg_b211473eff0066b4","output_index":0,"part":{"text":"Use + - 'data: {"content_index":0,"item_id":"msg_b5d30a9e826b60fb","output_index":0,"part":{"text":"Using web_search.","type":"reasoning_text"},"sequence_number":9,"type":"response.reasoning_part.done"} ' @@ -1327,7 +1340,7 @@ turns: - 'event: response.output_item.done ' - - 'data: {"item":{"id":"msg_b211473eff0066b4","summary":[],"type":"reasoning","content":[{"text":"Use + - 'data: {"item":{"id":"msg_b5d30a9e826b60fb","summary":[],"type":"reasoning","content":[{"text":"Using web_search.","type":"reasoning_text"}],"encrypted_content":null,"status":"completed"},"output_index":0,"sequence_number":10,"type":"response.output_item.done"} ' @@ -1337,7 +1350,25 @@ turns: - 'event: response.output_item.added ' - - 'data: {"item":{"arguments":"","call_id":"call_a8f833f716bcbc25","name":"web_search","type":"function_call","id":"fc_a389865814a7198c","namespace":null,"status":"in_progress"},"output_index":1,"sequence_number":11,"type":"response.output_item.added"} + - 'data: {"item":{"arguments":"","call_id":"call_b5ab6fded3b63402","name":"web_search","type":"function_call","id":"fc_b107336aafb717b5","namespace":null,"status":"in_progress"},"output_index":1,"sequence_number":11,"type":"response.output_item.added"} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"delta":"{\"","item_id":"fc_b107336aafb717b5","output_index":1,"sequence_number":12,"type":"response.function_call_arguments.delta"} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"delta":"query","item_id":"fc_b107336aafb717b5","output_index":1,"sequence_number":13,"type":"response.function_call_arguments.delta"} ' - ' @@ -1346,7 +1377,7 @@ turns: - 'event: response.function_call_arguments.delta ' - - 'data: {"delta":"{\"","item_id":"fc_a389865814a7198c","output_index":1,"sequence_number":12,"type":"response.function_call_arguments.delta"} + - 'data: {"delta":"\":\"","item_id":"fc_b107336aafb717b5","output_index":1,"sequence_number":14,"type":"response.function_call_arguments.delta"} ' - ' @@ -1355,7 +1386,7 @@ turns: - 'event: response.function_call_arguments.delta ' - - 'data: {"delta":"query","item_id":"fc_a389865814a7198c","output_index":1,"sequence_number":13,"type":"response.function_call_arguments.delta"} + - 'data: {"delta":"Spark","item_id":"fc_b107336aafb717b5","output_index":1,"sequence_number":15,"type":"response.function_call_arguments.delta"} ' - ' @@ -1364,7 +1395,7 @@ turns: - 'event: response.function_call_arguments.delta ' - - 'data: {"delta":"\":\"","item_id":"fc_a389865814a7198c","output_index":1,"sequence_number":14,"type":"response.function_call_arguments.delta"} + - 'data: {"delta":" Out","item_id":"fc_b107336aafb717b5","output_index":1,"sequence_number":16,"type":"response.function_call_arguments.delta"} ' - ' @@ -1373,7 +1404,7 @@ turns: - 'event: response.function_call_arguments.delta ' - - 'data: {"delta":"how","item_id":"fc_a389865814a7198c","output_index":1,"sequence_number":15,"type":"response.function_call_arguments.delta"} + - 'data: {"delta":"Of","item_id":"fc_b107336aafb717b5","output_index":1,"sequence_number":17,"type":"response.function_call_arguments.delta"} ' - ' @@ -1382,7 +1413,7 @@ turns: - 'event: response.function_call_arguments.delta ' - - 'data: {"delta":" to","item_id":"fc_a389865814a7198c","output_index":1,"sequence_number":16,"type":"response.function_call_arguments.delta"} + - 'data: {"delta":"Memory","item_id":"fc_b107336aafb717b5","output_index":1,"sequence_number":18,"type":"response.function_call_arguments.delta"} ' - ' @@ -1391,7 +1422,7 @@ turns: - 'event: response.function_call_arguments.delta ' - - 'data: {"delta":" fix","item_id":"fc_a389865814a7198c","output_index":1,"sequence_number":17,"type":"response.function_call_arguments.delta"} + - 'data: {"delta":"Error","item_id":"fc_b107336aafb717b5","output_index":1,"sequence_number":19,"type":"response.function_call_arguments.delta"} ' - ' @@ -1400,7 +1431,7 @@ turns: - 'event: response.function_call_arguments.delta ' - - 'data: {"delta":" F","item_id":"fc_a389865814a7198c","output_index":1,"sequence_number":18,"type":"response.function_call_arguments.delta"} + - 'data: {"delta":" transform","item_id":"fc_b107336aafb717b5","output_index":1,"sequence_number":20,"type":"response.function_call_arguments.delta"} ' - ' @@ -1409,7 +1440,7 @@ turns: - 'event: response.function_call_arguments.delta ' - - 'data: {"delta":"ATAL","item_id":"fc_a389865814a7198c","output_index":1,"sequence_number":19,"type":"response.function_call_arguments.delta"} + - 'data: {"delta":" stage","item_id":"fc_b107336aafb717b5","output_index":1,"sequence_number":21,"type":"response.function_call_arguments.delta"} ' - ' @@ -1418,7 +1449,7 @@ turns: - 'event: response.function_call_arguments.delta ' - - 'data: {"delta":" error","item_id":"fc_a389865814a7198c","output_index":1,"sequence_number":20,"type":"response.function_call_arguments.delta"} + - 'data: {"delta":" fix","item_id":"fc_b107336aafb717b5","output_index":1,"sequence_number":22,"type":"response.function_call_arguments.delta"} ' - ' @@ -1427,7 +1458,7 @@ turns: - 'event: response.function_call_arguments.delta ' - - 'data: {"delta":" in","item_id":"fc_a389865814a7198c","output_index":1,"sequence_number":21,"type":"response.function_call_arguments.delta"} + - 'data: {"delta":" memory","item_id":"fc_b107336aafb717b5","output_index":1,"sequence_number":23,"type":"response.function_call_arguments.delta"} ' - ' @@ -1436,7 +1467,7 @@ turns: - 'event: response.function_call_arguments.delta ' - - 'data: {"delta":" Spark","item_id":"fc_a389865814a7198c","output_index":1,"sequence_number":22,"type":"response.function_call_arguments.delta"} + - 'data: {"delta":" limits","item_id":"fc_b107336aafb717b5","output_index":1,"sequence_number":24,"type":"response.function_call_arguments.delta"} ' - ' @@ -1445,7 +1476,7 @@ turns: - 'event: response.function_call_arguments.delta ' - - 'data: {"delta":" pipelines","item_id":"fc_a389865814a7198c","output_index":1,"sequence_number":23,"type":"response.function_call_arguments.delta"} + - 'data: {"delta":" solve","item_id":"fc_b107336aafb717b5","output_index":1,"sequence_number":25,"type":"response.function_call_arguments.delta"} ' - ' @@ -1454,7 +1485,7 @@ turns: - 'event: response.function_call_arguments.delta ' - - 'data: {"delta":"\"}","item_id":"fc_a389865814a7198c","output_index":1,"sequence_number":24,"type":"response.function_call_arguments.delta"} + - 'data: {"delta":"\"}","item_id":"fc_b107336aafb717b5","output_index":1,"sequence_number":26,"type":"response.function_call_arguments.delta"} ' - ' @@ -1463,7 +1494,8 @@ turns: - 'event: response.function_call_arguments.done ' - - 'data: {"arguments":"{\"query\":\"how to fix FATAL error in Spark pipelines\"}","item_id":"fc_a389865814a7198c","name":"web_search","output_index":1,"sequence_number":25,"type":"response.function_call_arguments.done"} + - 'data: {"arguments":"{\"query\":\"Spark OutOfMemoryError transform stage fix + memory limits solve\"}","item_id":"fc_b107336aafb717b5","name":"web_search","output_index":1,"sequence_number":27,"type":"response.function_call_arguments.done"} ' - ' @@ -1472,7 +1504,8 @@ turns: - 'event: response.output_item.done ' - - 'data: {"item":{"arguments":"{\"query\":\"how to fix FATAL error in Spark pipelines\"}","call_id":"call_a8f833f716bcbc25","name":"web_search","type":"function_call","id":null,"namespace":null,"status":"completed","item_id":"fc_a389865814a7198c","output_index":1,"sequence_number":-1},"output_index":1,"sequence_number":26,"type":"response.output_item.done"} + - 'data: {"item":{"arguments":"{\"query\":\"Spark OutOfMemoryError transform stage + fix memory limits solve\"}","call_id":"call_b5ab6fded3b63402","name":"web_search","type":"function_call","id":null,"namespace":null,"status":"completed","item_id":"fc_b107336aafb717b5","output_index":1,"sequence_number":-1},"output_index":1,"sequence_number":28,"type":"response.output_item.done"} ' - ' @@ -1481,9 +1514,9 @@ turns: - 'event: response.completed ' - - 'data: {"response":{"id":"resp_94ce25968dc3b4f4","created_at":1782285920,"incomplete_details":null,"instructions":null,"metadata":null,"model":"openai/gpt-oss-20b","object":"response","output":[{"id":"rs_a101c61e5eef4c23","summary":[],"type":"reasoning","content":[{"text":"Use - web_search.","type":"reasoning_text"}],"encrypted_content":null,"status":null},{"arguments":"{\"query\":\"how - to fix FATAL error in Spark pipelines\"}","call_id":"call_9abf8aa146f1da2b","name":"web_search","type":"function_call","id":"fc_9abf8aa146f1da2b","namespace":null,"status":null}],"parallel_tool_calls":true,"temperature":1.0,"tool_choice":"auto","tools":[{"name":"get_job_status","parameters":{"type":"object","properties":{"job_id":{"type":"string","description":"Pipeline + - 'data: {"response":{"id":"resp_8d9428f89b5bbe23","created_at":1782331383,"incomplete_details":null,"instructions":null,"metadata":null,"model":"openai/gpt-oss-20b","object":"response","output":[{"id":"rs_abc5949f81a3505f","summary":[],"type":"reasoning","content":[{"text":"Using + web_search.","type":"reasoning_text"}],"encrypted_content":null,"status":null},{"arguments":"{\"query\":\"Spark + OutOfMemoryError transform stage fix memory limits solve\"}","call_id":"call_8682d661d7f3a9fd","name":"web_search","type":"function_call","id":"fc_8682d661d7f3a9fd","namespace":null,"status":null}],"parallel_tool_calls":true,"temperature":1.0,"tool_choice":"auto","tools":[{"name":"get_job_status","parameters":{"type":"object","properties":{"job_id":{"type":"string","description":"Pipeline job identifier, e.g. job-382"}},"required":["job_id"],"additionalProperties":false},"strict":true,"type":"function","defer_loading":null,"description":"Query the pipeline orchestrator for a job''s current state, metadata, and failure info."},{"name":"get_error_logs","parameters":{"type":"object","properties":{"job_id":{"type":"string","description":"Pipeline @@ -1504,7 +1537,7 @@ turns: priority"}},"required":["job_id","memory_override_gb","skip_completed_stages","priority"],"additionalProperties":false},"strict":true,"type":"function","defer_loading":null,"description":"Restart a failed pipeline job with optional configuration overrides."},{"name":"web_search","parameters":{"type":"object","properties":{"query":{"type":"string","description":"Search query"}},"required":["query"],"additionalProperties":false},"strict":true,"type":"function","defer_loading":null,"description":"Search - the public web for technical documentation, blog posts, and known issues."}],"top_p":1.0,"background":false,"max_output_tokens":3530,"max_tool_calls":null,"previous_response_id":"resp_bc7651309b00838d","prompt":null,"reasoning":null,"service_tier":"auto","status":"completed","text":null,"top_logprobs":null,"truncation":"disabled","usage":{"input_tokens":566,"input_tokens_details":{"cached_tokens":512,"input_tokens_per_turn":[566],"cached_tokens_per_turn":[512]},"output_tokens":36,"output_tokens_details":{"reasoning_tokens":19,"tool_output_tokens":0,"output_tokens_per_turn":[36],"tool_output_tokens_per_turn":[0]},"total_tokens":602},"user":null,"presence_penalty":0.0,"frequency_penalty":0.0,"kv_transfer_params":null,"input_messages":null,"output_messages":null},"sequence_number":27,"type":"response.completed"} + the public web for technical documentation, blog posts, and known issues."}],"top_p":1.0,"background":false,"max_output_tokens":3223,"max_tool_calls":null,"previous_response_id":"resp_8f993b44ca8e52e3","prompt":null,"reasoning":null,"service_tier":"auto","status":"completed","text":null,"top_logprobs":null,"truncation":"disabled","usage":{"input_tokens":873,"input_tokens_details":{"cached_tokens":608,"input_tokens_per_turn":[873],"cached_tokens_per_turn":[608]},"output_tokens":38,"output_tokens_details":{"reasoning_tokens":21,"tool_output_tokens":0,"output_tokens_per_turn":[38],"tool_output_tokens_per_turn":[0]},"total_tokens":911},"user":null,"presence_penalty":0.0,"frequency_penalty":0.0,"kv_transfer_params":null,"input_messages":null,"output_messages":null},"sequence_number":29,"type":"response.completed"} ' - ' diff --git a/crates/agentic-core/tests/cassettes/tool_calls/multi_turn/responses_tool_calls_5turn.yaml b/crates/agentic-core/tests/cassettes/tool_calls/multi_turn/responses_tool_calls_5turn.yaml index 3c9b981..c8af8e1 100644 --- a/crates/agentic-core/tests/cassettes/tool_calls/multi_turn/responses_tool_calls_5turn.yaml +++ b/crates/agentic-core/tests/cassettes/tool_calls/multi_turn/responses_tool_calls_5turn.yaml @@ -137,9 +137,9 @@ turns: response: body: background: false - created_at: 1782285901 + created_at: 1782331372 frequency_penalty: 0.0 - id: resp_87c0e67a68693479 + id: resp_9951741be0cfd0aa incomplete_details: null input_messages: null instructions: null @@ -151,16 +151,16 @@ turns: object: response output: - content: - - text: Need to call get_job_status. + - text: We should use get_job_status. type: reasoning_text encrypted_content: null - id: rs_923202f17e327c43 + id: rs_b9b65102428a224d status: null summary: [] type: reasoning - arguments: '{"job_id":"job-382"}' - call_id: call_80a4bed3ce07c6e4 - id: fc_80a4bed3ce07c6e4 + call_id: call_b9bae339250e867f + id: fc_b9bae339250e867f name: get_job_status namespace: null status: null @@ -328,10 +328,18 @@ turns: - filename: t2 request: body: - input: Pull the error logs for that failed job. Use severity ERROR and max 20 - entries. + input: + - call_id: call_b9bae339250e867f + output: '{"job_id": "job-382", "status": "failed", "error_type": "OutOfMemoryError", + "failed_at": "2024-01-15T03:22:11Z", "stage": "transform", "memory_used_gb": + 31.2, "memory_limit_gb": 32, "pipeline": "etl-daily-ingest", "owner": "data-platform-team"}' + type: function_call_output + - content: Pull the error logs for that failed job. Use severity ERROR and max + 20 entries. + role: user + type: message model: openai/gpt-oss-20b - previous_response_id: resp_87c0e67a68693479 + previous_response_id: resp_9951741be0cfd0aa store: true stream: false tool_choice: auto @@ -464,30 +472,30 @@ turns: response: body: background: false - created_at: 1782285902 + created_at: 1782331372 frequency_penalty: 0.0 - id: resp_ae25eb0ed9f87a15 + id: resp_a0d45c68cfdf0dc3 incomplete_details: null input_messages: null instructions: null kv_transfer_params: null - max_output_tokens: 3598 + max_output_tokens: 3497 max_tool_calls: null metadata: null model: openai/gpt-oss-20b object: response output: - content: - - text: Call get_error_logs. + - text: Use get_error_logs. type: reasoning_text encrypted_content: null - id: rs_99a65d2fb5f1b2f7 + id: rs_81a485924cbf2205 status: null summary: [] type: reasoning - arguments: '{"job_id":"job-382","severity":"ERROR","max_entries":20}' - call_id: call_b3fe180eeb3ba93c - id: fc_b3fe180eeb3ba93c + call_id: call_b2ab544b587ed933 + id: fc_b2ab544b587ed933 name: get_error_logs namespace: null status: null @@ -495,7 +503,7 @@ turns: output_messages: null parallel_tool_calls: true presence_penalty: 0.0 - previous_response_id: resp_87c0e67a68693479 + previous_response_id: resp_9951741be0cfd0aa prompt: null reasoning: null service_tier: auto @@ -632,13 +640,13 @@ turns: top_p: 1.0 truncation: disabled usage: - input_tokens: 498 + input_tokens: 599 input_tokens_details: cached_tokens: 448 cached_tokens_per_turn: - 448 input_tokens_per_turn: - - 498 + - 599 output_tokens: 42 output_tokens_details: output_tokens_per_turn: @@ -647,7 +655,7 @@ turns: tool_output_tokens: 0 tool_output_tokens_per_turn: - 0 - total_tokens: 540 + total_tokens: 641 user: null headers: content-type: application/json @@ -655,10 +663,24 @@ turns: - filename: t3 request: body: - input: Search the runbook for how to fix the issue found in those logs. Max - 5 results. + input: + - call_id: call_b2ab544b587ed933 + output: '[{"timestamp": "2024-01-15T03:22:09Z", "severity": "ERROR", "message": + "OutOfMemoryError at stage ''transform'': heap space exhausted (31.2GB/32GB)", + "stack_trace": "java.lang.OutOfMemoryError: Java heap space\n at org.apache.spark.memory.TaskMemoryManager.allocatePage"}, + {"timestamp": "2024-01-15T03:22:10Z", "severity": "ERROR", "message": "Task + failed: transform-partition-47 OOM after processing 2.1M rows", "stack_trace": + "org.apache.spark.SparkException: Task failed while writing rows"}, {"timestamp": + "2024-01-15T03:22:11Z", "severity": "ERROR", "message": "Job job-382 marked + FAILED after 3 consecutive OOM errors in transform stage", "stack_trace": + null}]' + type: function_call_output + - content: Search the runbook for how to fix the issue found in those logs. + Max 5 results. + role: user + type: message model: openai/gpt-oss-20b - previous_response_id: resp_ae25eb0ed9f87a15 + previous_response_id: resp_a0d45c68cfdf0dc3 store: true stream: false tool_choice: auto @@ -791,30 +813,31 @@ turns: response: body: background: false - created_at: 1782285902 + created_at: 1782331372 frequency_penalty: 0.0 - id: resp_854c134ceb4f6127 + id: resp_942924910373a289 incomplete_details: null input_messages: null instructions: null kv_transfer_params: null - max_output_tokens: 3530 + max_output_tokens: 3222 max_tool_calls: null metadata: null model: openai/gpt-oss-20b object: response output: - content: - - text: Search runbook. + - text: Use search_runbook. type: reasoning_text encrypted_content: null - id: rs_a8d24e26d57877e0 + id: rs_99598f9b7ee05702 status: null summary: [] type: reasoning - - arguments: '{"query":"pipeline job-382 error logs fix","max_results":5}' - call_id: call_8fae155d7b2ca7f9 - id: fc_8fae155d7b2ca7f9 + - arguments: '{"query":"OutOfMemoryError transform stage spark partition 20 + entries","max_results":5}' + call_id: call_bd3e5a22c5de5a46 + id: fc_bd3e5a22c5de5a46 name: search_runbook namespace: null status: null @@ -822,7 +845,7 @@ turns: output_messages: null parallel_tool_calls: true presence_penalty: 0.0 - previous_response_id: resp_ae25eb0ed9f87a15 + previous_response_id: resp_a0d45c68cfdf0dc3 prompt: null reasoning: null service_tier: auto @@ -959,22 +982,22 @@ turns: top_p: 1.0 truncation: disabled usage: - input_tokens: 566 + input_tokens: 874 input_tokens_details: - cached_tokens: 496 + cached_tokens: 608 cached_tokens_per_turn: - - 496 + - 608 input_tokens_per_turn: - - 566 - output_tokens: 40 + - 874 + output_tokens: 45 output_tokens_details: output_tokens_per_turn: - - 40 - reasoning_tokens: 22 + - 45 + reasoning_tokens: 27 tool_output_tokens: 0 tool_output_tokens_per_turn: - 0 - total_tokens: 606 + total_tokens: 919 user: null headers: content-type: application/json @@ -982,11 +1005,22 @@ turns: - filename: t4 request: body: - input: 'Run this analysis code to summarize: import json; print(json.dumps({''job'': - ''job-382'', ''error'': ''OOM'', ''stage'': ''transform'', ''recommendation'': - ''increase memory to 64GB''}))' + input: + - call_id: call_bd3e5a22c5de5a46 + output: '[{"title": "OOM Troubleshooting for ETL Pipelines", "path": "/runbooks/data-platform/oom-etl.md", + "snippet": "When transform stage OOMs: 1) Check partition skew with get_partition_stats, + 2) Increase executor memory (default 32GB -> 64GB), 3) Enable adaptive query + execution"}, {"title": "Memory Limit Override Procedures", "path": "/runbooks/data-platform/memory-overrides.md", + "snippet": "To increase memory: use restart_job with memory_override_gb. + Standard escalation: 32->64->128. Requires approval above 128GB."}]' + type: function_call_output + - content: 'Run this analysis code to summarize: import json; print(json.dumps({''job'': + ''job-382'', ''error'': ''OOM'', ''stage'': ''transform'', ''recommendation'': + ''increase memory to 64GB''}))' + role: user + type: message model: openai/gpt-oss-20b - previous_response_id: resp_854c134ceb4f6127 + previous_response_id: resp_942924910373a289 store: true stream: false tool_choice: auto @@ -1119,32 +1153,32 @@ turns: response: body: background: false - created_at: 1782285902 + created_at: 1782331373 frequency_penalty: 0.0 - id: resp_addadf40977f7a22 + id: resp_ad167af2a228ea80 incomplete_details: null input_messages: null instructions: null kv_transfer_params: null - max_output_tokens: 3438 + max_output_tokens: 2980 max_tool_calls: null metadata: null model: openai/gpt-oss-20b object: response output: - content: - - text: Execute run_analysis. + - text: We need to call run_analysis. type: reasoning_text encrypted_content: null - id: rs_930ef0af2fb14041 + id: rs_a7f023b8835b666c status: null summary: [] type: reasoning - - arguments: '{"code":"import json; print(json.dumps({''job'': ''job-382'', + - arguments: '{"code":"import json\nprint(json.dumps({''job'': ''job-382'', ''error'': ''OOM'', ''stage'': ''transform'', ''recommendation'': ''increase memory to 64GB''}))","timeout_seconds":30}' - call_id: call_81a626f9aa98b83f - id: fc_81a626f9aa98b83f + call_id: call_9972b03347027652 + id: fc_9972b03347027652 name: run_analysis namespace: null status: null @@ -1152,7 +1186,7 @@ turns: output_messages: null parallel_tool_calls: true presence_penalty: 0.0 - previous_response_id: resp_854c134ceb4f6127 + previous_response_id: resp_942924910373a289 prompt: null reasoning: null service_tier: auto @@ -1289,22 +1323,22 @@ turns: top_p: 1.0 truncation: disabled usage: - input_tokens: 658 + input_tokens: 1116 input_tokens_details: - cached_tokens: 576 + cached_tokens: 880 cached_tokens_per_turn: - - 576 + - 880 input_tokens_per_turn: - - 658 - output_tokens: 71 + - 1116 + output_tokens: 74 output_tokens_details: output_tokens_per_turn: - - 71 - reasoning_tokens: 54 + - 74 + reasoning_tokens: 57 tool_output_tokens: 0 tool_output_tokens_per_turn: - 0 - total_tokens: 729 + total_tokens: 1190 user: null headers: content-type: application/json @@ -1312,10 +1346,18 @@ turns: - filename: t5 request: body: - input: Great. Now restart it with 64 GB memory, skip completed stages, and high - priority. + input: + - call_id: call_9972b03347027652 + output: '{"stdout": "Failed stage: transform, count: 2\nPattern: consistent + OOM at partition-47 (2.1M rows)\nRecommendation: increase memory to 64GB + or repartition", "stderr": "", "exit_code": 0, "duration_ms": 120}' + type: function_call_output + - content: Great. Now restart it with 64 GB memory, skip completed stages, and + high priority. + role: user + type: message model: openai/gpt-oss-20b - previous_response_id: resp_addadf40977f7a22 + previous_response_id: resp_ad167af2a228ea80 store: true stream: false tool_choice: auto @@ -1448,30 +1490,30 @@ turns: response: body: background: false - created_at: 1782285903 + created_at: 1782331373 frequency_penalty: 0.0 - id: resp_8d3aac3867ddc3b7 + id: resp_ba8c42b503457f18 incomplete_details: null input_messages: null instructions: null kv_transfer_params: null - max_output_tokens: 3342 + max_output_tokens: 2808 max_tool_calls: null metadata: null model: openai/gpt-oss-20b object: response output: - content: - - text: Restart_job. + - text: Use restart_job. type: reasoning_text encrypted_content: null - id: rs_9af96e78c16c9ac1 + id: rs_9a2415f2829c554b status: null summary: [] type: reasoning - arguments: '{"job_id":"job-382","memory_override_gb":64,"skip_completed_stages":true,"priority":"high"}' - call_id: call_935ef947ceb46eec - id: fc_935ef947ceb46eec + call_id: call_945de04a969d9596 + id: fc_945de04a969d9596 name: restart_job namespace: null status: null @@ -1479,7 +1521,7 @@ turns: output_messages: null parallel_tool_calls: true presence_penalty: 0.0 - previous_response_id: resp_addadf40977f7a22 + previous_response_id: resp_ad167af2a228ea80 prompt: null reasoning: null service_tier: auto @@ -1616,22 +1658,22 @@ turns: top_p: 1.0 truncation: disabled usage: - input_tokens: 754 + input_tokens: 1288 input_tokens_details: - cached_tokens: 656 + cached_tokens: 1120 cached_tokens_per_turn: - - 656 + - 1120 input_tokens_per_turn: - - 754 - output_tokens: 48 + - 1288 + output_tokens: 49 output_tokens_details: output_tokens_per_turn: - - 48 - reasoning_tokens: 31 + - 49 + reasoning_tokens: 32 tool_output_tokens: 0 tool_output_tokens_per_turn: - 0 - total_tokens: 802 + total_tokens: 1337 user: null headers: content-type: application/json diff --git a/crates/agentic-core/tests/cassettes/tool_calls/multi_turn/responses_tool_calls_branch.yaml b/crates/agentic-core/tests/cassettes/tool_calls/multi_turn/responses_tool_calls_branch.yaml index bb554a2..fd6cd6e 100644 --- a/crates/agentic-core/tests/cassettes/tool_calls/multi_turn/responses_tool_calls_branch.yaml +++ b/crates/agentic-core/tests/cassettes/tool_calls/multi_turn/responses_tool_calls_branch.yaml @@ -136,9 +136,9 @@ turns: response: body: background: false - created_at: 1782285929 + created_at: 1782331392 frequency_penalty: 0.0 - id: resp_887f6295aa43fb96 + id: resp_bcc40e72c8d9c69d incomplete_details: null input_messages: null instructions: null @@ -153,13 +153,13 @@ turns: - text: We need to call get_job_status. type: reasoning_text encrypted_content: null - id: rs_91cfc91f69b6f252 + id: rs_be52377bf2edcc74 status: null summary: [] type: reasoning - arguments: '{"job_id":"job-382"}' - call_id: call_9e471a12b0e01b14 - id: fc_9e471a12b0e01b14 + call_id: call_bb5389175f60b859 + id: fc_bb5389175f60b859 name: get_job_status namespace: null status: null @@ -327,9 +327,17 @@ turns: - filename: t2 request: body: - input: Get the error logs for that job with severity ERROR and max 10 entries. + input: + - call_id: call_bb5389175f60b859 + output: '{"job_id": "job-382", "status": "failed", "error_type": "OutOfMemoryError", + "failed_at": "2024-01-15T03:22:11Z", "stage": "transform", "memory_used_gb": + 31.2, "memory_limit_gb": 32, "pipeline": "etl-daily-ingest", "owner": "data-platform-team"}' + type: function_call_output + - content: Get the error logs for that job with severity ERROR and max 10 entries. + role: user + type: message model: openai/gpt-oss-20b - previous_response_id: resp_887f6295aa43fb96 + previous_response_id: resp_bcc40e72c8d9c69d store: true stream: false tool_choice: auto @@ -462,30 +470,30 @@ turns: response: body: background: false - created_at: 1782285930 + created_at: 1782331392 frequency_penalty: 0.0 - id: resp_bd929a6ae43e5ff9 + id: resp_b6f737f9fa6c4cf8 incomplete_details: null input_messages: null instructions: null kv_transfer_params: null - max_output_tokens: 3602 + max_output_tokens: 3501 max_tool_calls: null metadata: null model: openai/gpt-oss-20b object: response output: - content: - - text: Use get_error_logs. + - text: Call get_error_logs. type: reasoning_text encrypted_content: null - id: rs_825ec278d2c86186 + id: rs_b726441da4677400 status: null summary: [] type: reasoning - arguments: '{"job_id":"job-382","severity":"ERROR","max_entries":10}' - call_id: call_80c557a803ad1d26 - id: fc_80c557a803ad1d26 + call_id: call_88a9e761caa65554 + id: fc_88a9e761caa65554 name: get_error_logs namespace: null status: null @@ -493,7 +501,7 @@ turns: output_messages: null parallel_tool_calls: true presence_penalty: 0.0 - previous_response_id: resp_887f6295aa43fb96 + previous_response_id: resp_bcc40e72c8d9c69d prompt: null reasoning: null service_tier: auto @@ -630,13 +638,13 @@ turns: top_p: 1.0 truncation: disabled usage: - input_tokens: 494 + input_tokens: 595 input_tokens_details: - cached_tokens: 448 + cached_tokens: 576 cached_tokens_per_turn: - - 448 + - 576 input_tokens_per_turn: - - 494 + - 595 output_tokens: 42 output_tokens_details: output_tokens_per_turn: @@ -645,7 +653,7 @@ turns: tool_output_tokens: 0 tool_output_tokens_per_turn: - 0 - total_tokens: 536 + total_tokens: 637 user: null headers: content-type: application/json @@ -653,10 +661,18 @@ turns: - filename: t3 request: body: - input: Instead of investigating errors, search the runbook for how to increase - memory limits for ETL jobs. Max 3 results. + input: + - call_id: call_bb5389175f60b859 + output: '{"job_id": "job-382", "status": "failed", "error_type": "OutOfMemoryError", + "failed_at": "2024-01-15T03:22:11Z", "stage": "transform", "memory_used_gb": + 31.2, "memory_limit_gb": 32, "pipeline": "etl-daily-ingest", "owner": "data-platform-team"}' + type: function_call_output + - content: Instead of investigating errors, search the runbook for how to increase + memory limits for ETL jobs. Max 3 results. + role: user + type: message model: openai/gpt-oss-20b - previous_response_id: resp_887f6295aa43fb96 + previous_response_id: resp_bcc40e72c8d9c69d store: true stream: false tool_choice: auto @@ -789,30 +805,30 @@ turns: response: body: background: false - created_at: 1782285930 + created_at: 1782331393 frequency_penalty: 0.0 - id: resp_926736eebb257cfd + id: resp_991eab30a6ad96c8 incomplete_details: null input_messages: null instructions: null kv_transfer_params: null - max_output_tokens: 3593 + max_output_tokens: 3492 max_tool_calls: null metadata: null model: openai/gpt-oss-20b object: response output: - content: - - text: We need runbook search. + - text: We need to call search_runbook. type: reasoning_text encrypted_content: null - id: rs_8f6c080dca467649 + id: rs_978d79d63851ced6 status: null summary: [] type: reasoning - - arguments: '{"query":"increase memory limits for ETL jobs","max_results":3}' - call_id: call_815e8956def890df - id: fc_815e8956def890df + - arguments: '{"query":"increase memory limits for ETL jobs runbook","max_results":3}' + call_id: call_9339bb4ac308c0fc + id: fc_9339bb4ac308c0fc name: search_runbook namespace: null status: null @@ -820,7 +836,7 @@ turns: output_messages: null parallel_tool_calls: true presence_penalty: 0.0 - previous_response_id: resp_887f6295aa43fb96 + previous_response_id: resp_bcc40e72c8d9c69d prompt: null reasoning: null service_tier: auto @@ -957,22 +973,22 @@ turns: top_p: 1.0 truncation: disabled usage: - input_tokens: 503 + input_tokens: 604 input_tokens_details: - cached_tokens: 464 + cached_tokens: 576 cached_tokens_per_turn: - - 464 + - 576 input_tokens_per_turn: - - 503 - output_tokens: 42 + - 604 + output_tokens: 46 output_tokens_details: output_tokens_per_turn: - - 42 - reasoning_tokens: 24 + - 46 + reasoning_tokens: 28 tool_output_tokens: 0 tool_output_tokens_per_turn: - 0 - total_tokens: 545 + total_tokens: 650 user: null headers: content-type: application/json diff --git a/crates/agentic-core/tests/cassettes/tool_calls/multi_turn/responses_tool_calls_parallel.yaml b/crates/agentic-core/tests/cassettes/tool_calls/multi_turn/responses_tool_calls_parallel.yaml new file mode 100644 index 0000000..d1cc276 --- /dev/null +++ b/crates/agentic-core/tests/cassettes/tool_calls/multi_turn/responses_tool_calls_parallel.yaml @@ -0,0 +1,1002 @@ +turns: +- filename: t1 + request: + body: + input: 'You are an SRE assistant. I need you to do TWO things in parallel: 1) + call get_job_status for job-382, and 2) call web_search for ''Spark ETL OOM + causes''. Do both now.' + model: openai/gpt-oss-20b + store: true + stream: false + tool_choice: auto + tools: + - description: Query the pipeline orchestrator for a job's current state, metadata, + and failure info. + name: get_job_status + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier, e.g. job-382 + type: string + required: + - job_id + type: object + strict: true + type: function + - description: Fetch error logs and stack traces for a failed pipeline job. + name: get_error_logs + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier + type: string + max_entries: + description: Maximum log entries to return (default 50) + type: integer + severity: + description: Minimum severity filter + enum: + - WARN + - ERROR + - FATAL + - ALL + type: string + required: + - job_id + - severity + - max_entries + type: object + strict: true + type: function + - description: Search internal operations runbooks for troubleshooting procedures. + name: search_runbook + parameters: + additionalProperties: false + properties: + max_results: + description: Maximum results to return (default 5) + type: integer + query: + description: Natural language search query + type: string + required: + - query + - max_results + type: object + strict: true + type: function + - description: Execute a Python analysis script in a sandboxed environment for + log parsing or data inspection. + name: run_analysis + parameters: + additionalProperties: false + properties: + code: + description: Python code to execute + type: string + timeout_seconds: + description: Execution timeout (default 30) + type: integer + required: + - code + - timeout_seconds + type: object + strict: true + type: function + - description: Restart a failed pipeline job with optional configuration overrides. + name: restart_job + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier to restart + type: string + memory_override_gb: + description: Override memory limit in GB + type: integer + priority: + description: Queue priority + enum: + - low + - normal + - high + type: string + skip_completed_stages: + description: Skip stages that completed successfully (default true) + type: boolean + required: + - job_id + - memory_override_gb + - skip_completed_stages + - priority + type: object + strict: true + type: function + - description: Search the public web for technical documentation, blog posts, + and known issues. + name: web_search + parameters: + additionalProperties: false + properties: + query: + description: Search query + type: string + required: + - query + type: object + strict: true + type: function + headers: + accept: '*/*' + content-type: application/json + user-agent: python-httpx/0.28.1 + method: POST + path: /v1/responses + query_params: {} + response: + body: + background: false + created_at: 1782331421 + frequency_penalty: 0.0 + id: resp_8d2ba1fd0c04698e + incomplete_details: null + input_messages: null + instructions: null + kv_transfer_params: null + max_output_tokens: 3630 + max_tool_calls: null + metadata: null + model: openai/gpt-oss-20b + object: response + output: + - content: + - text: We have two tasks. Need to issue function calls. In parallel but we + can do sequential calls. We'll return a response with both calls. + type: reasoning_text + encrypted_content: null + id: rs_80890552364f4920 + status: null + summary: [] + type: reasoning + - arguments: '{"job_id":"job-382"}' + call_id: call_8e475b02312e69a7 + id: fc_8e475b02312e69a7 + name: get_job_status + namespace: null + status: null + type: function_call + output_messages: null + parallel_tool_calls: true + presence_penalty: 0.0 + previous_response_id: null + prompt: null + reasoning: null + service_tier: auto + status: completed + temperature: 1.0 + text: null + tool_choice: auto + tools: + - defer_loading: null + description: Query the pipeline orchestrator for a job's current state, metadata, + and failure info. + name: get_job_status + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier, e.g. job-382 + type: string + required: + - job_id + type: object + strict: true + type: function + - defer_loading: null + description: Fetch error logs and stack traces for a failed pipeline job. + name: get_error_logs + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier + type: string + max_entries: + description: Maximum log entries to return (default 50) + type: integer + severity: + description: Minimum severity filter + enum: + - WARN + - ERROR + - FATAL + - ALL + type: string + required: + - job_id + - severity + - max_entries + type: object + strict: true + type: function + - defer_loading: null + description: Search internal operations runbooks for troubleshooting procedures. + name: search_runbook + parameters: + additionalProperties: false + properties: + max_results: + description: Maximum results to return (default 5) + type: integer + query: + description: Natural language search query + type: string + required: + - query + - max_results + type: object + strict: true + type: function + - defer_loading: null + description: Execute a Python analysis script in a sandboxed environment for + log parsing or data inspection. + name: run_analysis + parameters: + additionalProperties: false + properties: + code: + description: Python code to execute + type: string + timeout_seconds: + description: Execution timeout (default 30) + type: integer + required: + - code + - timeout_seconds + type: object + strict: true + type: function + - defer_loading: null + description: Restart a failed pipeline job with optional configuration overrides. + name: restart_job + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier to restart + type: string + memory_override_gb: + description: Override memory limit in GB + type: integer + priority: + description: Queue priority + enum: + - low + - normal + - high + type: string + skip_completed_stages: + description: Skip stages that completed successfully (default true) + type: boolean + required: + - job_id + - memory_override_gb + - skip_completed_stages + - priority + type: object + strict: true + type: function + - defer_loading: null + description: Search the public web for technical documentation, blog posts, + and known issues. + name: web_search + parameters: + additionalProperties: false + properties: + query: + description: Search query + type: string + required: + - query + type: object + strict: true + type: function + top_logprobs: null + top_p: 1.0 + truncation: disabled + usage: + input_tokens: 466 + input_tokens_details: + cached_tokens: 416 + cached_tokens_per_turn: + - 416 + input_tokens_per_turn: + - 466 + output_tokens: 56 + output_tokens_details: + output_tokens_per_turn: + - 56 + reasoning_tokens: 38 + tool_output_tokens: 0 + tool_output_tokens_per_turn: + - 0 + total_tokens: 522 + user: null + headers: + content-type: application/json + status_code: 200 +- filename: t2 + request: + body: + input: + - call_id: call_8e475b02312e69a7 + output: '{"job_id": "job-382", "status": "failed", "error_type": "OutOfMemoryError", + "failed_at": "2024-01-15T03:22:11Z", "stage": "transform", "memory_used_gb": + 31.2, "memory_limit_gb": 32, "pipeline": "etl-daily-ingest", "owner": "data-platform-team"}' + type: function_call_output + - content: Based on those results, search the runbook for memory increase procedures. + Max 3 results. + role: user + type: message + model: openai/gpt-oss-20b + previous_response_id: resp_8d2ba1fd0c04698e + store: true + stream: false + tool_choice: auto + tools: + - description: Query the pipeline orchestrator for a job's current state, metadata, + and failure info. + name: get_job_status + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier, e.g. job-382 + type: string + required: + - job_id + type: object + strict: true + type: function + - description: Fetch error logs and stack traces for a failed pipeline job. + name: get_error_logs + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier + type: string + max_entries: + description: Maximum log entries to return (default 50) + type: integer + severity: + description: Minimum severity filter + enum: + - WARN + - ERROR + - FATAL + - ALL + type: string + required: + - job_id + - severity + - max_entries + type: object + strict: true + type: function + - description: Search internal operations runbooks for troubleshooting procedures. + name: search_runbook + parameters: + additionalProperties: false + properties: + max_results: + description: Maximum results to return (default 5) + type: integer + query: + description: Natural language search query + type: string + required: + - query + - max_results + type: object + strict: true + type: function + - description: Execute a Python analysis script in a sandboxed environment for + log parsing or data inspection. + name: run_analysis + parameters: + additionalProperties: false + properties: + code: + description: Python code to execute + type: string + timeout_seconds: + description: Execution timeout (default 30) + type: integer + required: + - code + - timeout_seconds + type: object + strict: true + type: function + - description: Restart a failed pipeline job with optional configuration overrides. + name: restart_job + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier to restart + type: string + memory_override_gb: + description: Override memory limit in GB + type: integer + priority: + description: Queue priority + enum: + - low + - normal + - high + type: string + skip_completed_stages: + description: Skip stages that completed successfully (default true) + type: boolean + required: + - job_id + - memory_override_gb + - skip_completed_stages + - priority + type: object + strict: true + type: function + - description: Search the public web for technical documentation, blog posts, + and known issues. + name: web_search + parameters: + additionalProperties: false + properties: + query: + description: Search query + type: string + required: + - query + type: object + strict: true + type: function + headers: + accept: '*/*' + content-type: application/json + user-agent: python-httpx/0.28.1 + method: POST + path: /v1/responses + query_params: {} + response: + body: + background: false + created_at: 1782331422 + frequency_penalty: 0.0 + id: resp_aba6b58081c2af00 + incomplete_details: null + input_messages: null + instructions: null + kv_transfer_params: null + max_output_tokens: 3448 + max_tool_calls: null + metadata: null + model: openai/gpt-oss-20b + object: response + output: + - content: + - text: 'We use search_runbook with query: "increase memory in spark etl job".' + type: reasoning_text + encrypted_content: null + id: rs_bdf90e9ca5f2cfe1 + status: null + summary: [] + type: reasoning + - arguments: '{"query":"increase memory in spark etl job","max_results":3}' + call_id: call_bdca8527c50ba7c3 + id: fc_bdca8527c50ba7c3 + name: search_runbook + namespace: null + status: null + type: function_call + output_messages: null + parallel_tool_calls: true + presence_penalty: 0.0 + previous_response_id: resp_8d2ba1fd0c04698e + prompt: null + reasoning: null + service_tier: auto + status: completed + temperature: 1.0 + text: null + tool_choice: auto + tools: + - defer_loading: null + description: Query the pipeline orchestrator for a job's current state, metadata, + and failure info. + name: get_job_status + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier, e.g. job-382 + type: string + required: + - job_id + type: object + strict: true + type: function + - defer_loading: null + description: Fetch error logs and stack traces for a failed pipeline job. + name: get_error_logs + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier + type: string + max_entries: + description: Maximum log entries to return (default 50) + type: integer + severity: + description: Minimum severity filter + enum: + - WARN + - ERROR + - FATAL + - ALL + type: string + required: + - job_id + - severity + - max_entries + type: object + strict: true + type: function + - defer_loading: null + description: Search internal operations runbooks for troubleshooting procedures. + name: search_runbook + parameters: + additionalProperties: false + properties: + max_results: + description: Maximum results to return (default 5) + type: integer + query: + description: Natural language search query + type: string + required: + - query + - max_results + type: object + strict: true + type: function + - defer_loading: null + description: Execute a Python analysis script in a sandboxed environment for + log parsing or data inspection. + name: run_analysis + parameters: + additionalProperties: false + properties: + code: + description: Python code to execute + type: string + timeout_seconds: + description: Execution timeout (default 30) + type: integer + required: + - code + - timeout_seconds + type: object + strict: true + type: function + - defer_loading: null + description: Restart a failed pipeline job with optional configuration overrides. + name: restart_job + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier to restart + type: string + memory_override_gb: + description: Override memory limit in GB + type: integer + priority: + description: Queue priority + enum: + - low + - normal + - high + type: string + skip_completed_stages: + description: Skip stages that completed successfully (default true) + type: boolean + required: + - job_id + - memory_override_gb + - skip_completed_stages + - priority + type: object + strict: true + type: function + - defer_loading: null + description: Search the public web for technical documentation, blog posts, + and known issues. + name: web_search + parameters: + additionalProperties: false + properties: + query: + description: Search query + type: string + required: + - query + type: object + strict: true + type: function + top_logprobs: null + top_p: 1.0 + truncation: disabled + usage: + input_tokens: 648 + input_tokens_details: + cached_tokens: 496 + cached_tokens_per_turn: + - 496 + input_tokens_per_turn: + - 648 + output_tokens: 53 + output_tokens_details: + output_tokens_per_turn: + - 53 + reasoning_tokens: 35 + tool_output_tokens: 0 + tool_output_tokens_per_turn: + - 0 + total_tokens: 701 + user: null + headers: + content-type: application/json + status_code: 200 +- filename: t3 + request: + body: + input: + - call_id: call_bdca8527c50ba7c3 + output: '[{"title": "OOM Troubleshooting for ETL Pipelines", "path": "/runbooks/data-platform/oom-etl.md", + "snippet": "When transform stage OOMs: 1) Check partition skew with get_partition_stats, + 2) Increase executor memory (default 32GB -> 64GB), 3) Enable adaptive query + execution"}, {"title": "Memory Limit Override Procedures", "path": "/runbooks/data-platform/memory-overrides.md", + "snippet": "To increase memory: use restart_job with memory_override_gb. + Standard escalation: 32->64->128. Requires approval above 128GB."}]' + type: function_call_output + - content: Now restart that job with 64 GB memory, skip completed stages, high + priority. + role: user + type: message + model: openai/gpt-oss-20b + previous_response_id: resp_aba6b58081c2af00 + store: true + stream: false + tool_choice: auto + tools: + - description: Query the pipeline orchestrator for a job's current state, metadata, + and failure info. + name: get_job_status + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier, e.g. job-382 + type: string + required: + - job_id + type: object + strict: true + type: function + - description: Fetch error logs and stack traces for a failed pipeline job. + name: get_error_logs + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier + type: string + max_entries: + description: Maximum log entries to return (default 50) + type: integer + severity: + description: Minimum severity filter + enum: + - WARN + - ERROR + - FATAL + - ALL + type: string + required: + - job_id + - severity + - max_entries + type: object + strict: true + type: function + - description: Search internal operations runbooks for troubleshooting procedures. + name: search_runbook + parameters: + additionalProperties: false + properties: + max_results: + description: Maximum results to return (default 5) + type: integer + query: + description: Natural language search query + type: string + required: + - query + - max_results + type: object + strict: true + type: function + - description: Execute a Python analysis script in a sandboxed environment for + log parsing or data inspection. + name: run_analysis + parameters: + additionalProperties: false + properties: + code: + description: Python code to execute + type: string + timeout_seconds: + description: Execution timeout (default 30) + type: integer + required: + - code + - timeout_seconds + type: object + strict: true + type: function + - description: Restart a failed pipeline job with optional configuration overrides. + name: restart_job + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier to restart + type: string + memory_override_gb: + description: Override memory limit in GB + type: integer + priority: + description: Queue priority + enum: + - low + - normal + - high + type: string + skip_completed_stages: + description: Skip stages that completed successfully (default true) + type: boolean + required: + - job_id + - memory_override_gb + - skip_completed_stages + - priority + type: object + strict: true + type: function + - description: Search the public web for technical documentation, blog posts, + and known issues. + name: web_search + parameters: + additionalProperties: false + properties: + query: + description: Search query + type: string + required: + - query + type: object + strict: true + type: function + headers: + accept: '*/*' + content-type: application/json + user-agent: python-httpx/0.28.1 + method: POST + path: /v1/responses + query_params: {} + response: + body: + background: false + created_at: 1782331422 + frequency_penalty: 0.0 + id: resp_ad6b4fc0f049ab2c + incomplete_details: null + input_messages: null + instructions: null + kv_transfer_params: null + max_output_tokens: 3227 + max_tool_calls: null + metadata: null + model: openai/gpt-oss-20b + object: response + output: + - content: + - text: We call restart_job. + type: reasoning_text + encrypted_content: null + id: rs_bdb91c841cb8efe5 + status: null + summary: [] + type: reasoning + - arguments: '{"job_id":"job-382","memory_override_gb":64,"skip_completed_stages":true,"priority":"high"}' + call_id: call_ab53b40d671c09d9 + id: fc_ab53b40d671c09d9 + name: restart_job + namespace: null + status: null + type: function_call + output_messages: null + parallel_tool_calls: true + presence_penalty: 0.0 + previous_response_id: resp_aba6b58081c2af00 + prompt: null + reasoning: null + service_tier: auto + status: completed + temperature: 1.0 + text: null + tool_choice: auto + tools: + - defer_loading: null + description: Query the pipeline orchestrator for a job's current state, metadata, + and failure info. + name: get_job_status + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier, e.g. job-382 + type: string + required: + - job_id + type: object + strict: true + type: function + - defer_loading: null + description: Fetch error logs and stack traces for a failed pipeline job. + name: get_error_logs + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier + type: string + max_entries: + description: Maximum log entries to return (default 50) + type: integer + severity: + description: Minimum severity filter + enum: + - WARN + - ERROR + - FATAL + - ALL + type: string + required: + - job_id + - severity + - max_entries + type: object + strict: true + type: function + - defer_loading: null + description: Search internal operations runbooks for troubleshooting procedures. + name: search_runbook + parameters: + additionalProperties: false + properties: + max_results: + description: Maximum results to return (default 5) + type: integer + query: + description: Natural language search query + type: string + required: + - query + - max_results + type: object + strict: true + type: function + - defer_loading: null + description: Execute a Python analysis script in a sandboxed environment for + log parsing or data inspection. + name: run_analysis + parameters: + additionalProperties: false + properties: + code: + description: Python code to execute + type: string + timeout_seconds: + description: Execution timeout (default 30) + type: integer + required: + - code + - timeout_seconds + type: object + strict: true + type: function + - defer_loading: null + description: Restart a failed pipeline job with optional configuration overrides. + name: restart_job + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier to restart + type: string + memory_override_gb: + description: Override memory limit in GB + type: integer + priority: + description: Queue priority + enum: + - low + - normal + - high + type: string + skip_completed_stages: + description: Skip stages that completed successfully (default true) + type: boolean + required: + - job_id + - memory_override_gb + - skip_completed_stages + - priority + type: object + strict: true + type: function + - defer_loading: null + description: Search the public web for technical documentation, blog posts, + and known issues. + name: web_search + parameters: + additionalProperties: false + properties: + query: + description: Search query + type: string + required: + - query + type: object + strict: true + type: function + top_logprobs: null + top_p: 1.0 + truncation: disabled + usage: + input_tokens: 869 + input_tokens_details: + cached_tokens: 656 + cached_tokens_per_turn: + - 656 + input_tokens_per_turn: + - 869 + output_tokens: 50 + output_tokens_details: + output_tokens_per_turn: + - 50 + reasoning_tokens: 33 + tool_output_tokens: 0 + tool_output_tokens_per_turn: + - 0 + total_tokens: 919 + user: null + headers: + content-type: application/json + status_code: 200 diff --git a/crates/agentic-core/tests/cassettes/tool_calls/multi_turn/responses_tool_calls_tool_output_only.yaml b/crates/agentic-core/tests/cassettes/tool_calls/multi_turn/responses_tool_calls_tool_output_only.yaml new file mode 100644 index 0000000..15afd2a --- /dev/null +++ b/crates/agentic-core/tests/cassettes/tool_calls/multi_turn/responses_tool_calls_tool_output_only.yaml @@ -0,0 +1,1014 @@ +turns: +- filename: t1 + request: + body: + input: You are an SRE assistant. Check the current status of ETL pipeline job-382. + model: openai/gpt-oss-20b + store: true + stream: false + tool_choice: auto + tools: + - description: Query the pipeline orchestrator for a job's current state, metadata, + and failure info. + name: get_job_status + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier, e.g. job-382 + type: string + required: + - job_id + type: object + strict: true + type: function + - description: Fetch error logs and stack traces for a failed pipeline job. + name: get_error_logs + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier + type: string + max_entries: + description: Maximum log entries to return (default 50) + type: integer + severity: + description: Minimum severity filter + enum: + - WARN + - ERROR + - FATAL + - ALL + type: string + required: + - job_id + - severity + - max_entries + type: object + strict: true + type: function + - description: Search internal operations runbooks for troubleshooting procedures. + name: search_runbook + parameters: + additionalProperties: false + properties: + max_results: + description: Maximum results to return (default 5) + type: integer + query: + description: Natural language search query + type: string + required: + - query + - max_results + type: object + strict: true + type: function + - description: Execute a Python analysis script in a sandboxed environment for + log parsing or data inspection. + name: run_analysis + parameters: + additionalProperties: false + properties: + code: + description: Python code to execute + type: string + timeout_seconds: + description: Execution timeout (default 30) + type: integer + required: + - code + - timeout_seconds + type: object + strict: true + type: function + - description: Restart a failed pipeline job with optional configuration overrides. + name: restart_job + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier to restart + type: string + memory_override_gb: + description: Override memory limit in GB + type: integer + priority: + description: Queue priority + enum: + - low + - normal + - high + type: string + skip_completed_stages: + description: Skip stages that completed successfully (default true) + type: boolean + required: + - job_id + - memory_override_gb + - skip_completed_stages + - priority + type: object + strict: true + type: function + - description: Search the public web for technical documentation, blog posts, + and known issues. + name: web_search + parameters: + additionalProperties: false + properties: + query: + description: Search query + type: string + required: + - query + type: object + strict: true + type: function + headers: + accept: '*/*' + content-type: application/json + user-agent: python-httpx/0.28.1 + method: POST + path: /v1/responses + query_params: {} + response: + body: + background: false + created_at: 1782331440 + frequency_penalty: 0.0 + id: resp_8a090f3a77b00e12 + incomplete_details: null + input_messages: null + instructions: null + kv_transfer_params: null + max_output_tokens: 3660 + max_tool_calls: null + metadata: null + model: openai/gpt-oss-20b + object: response + output: + - content: + - text: We need to call get_job_status function. + type: reasoning_text + encrypted_content: null + id: rs_817b973ff481e19e + status: null + summary: [] + type: reasoning + - arguments: '{"job_id":"job-382"}' + call_id: call_b565b3670d5793e9 + id: fc_b565b3670d5793e9 + name: get_job_status + namespace: null + status: null + type: function_call + output_messages: null + parallel_tool_calls: true + presence_penalty: 0.0 + previous_response_id: null + prompt: null + reasoning: null + service_tier: auto + status: completed + temperature: 1.0 + text: null + tool_choice: auto + tools: + - defer_loading: null + description: Query the pipeline orchestrator for a job's current state, metadata, + and failure info. + name: get_job_status + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier, e.g. job-382 + type: string + required: + - job_id + type: object + strict: true + type: function + - defer_loading: null + description: Fetch error logs and stack traces for a failed pipeline job. + name: get_error_logs + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier + type: string + max_entries: + description: Maximum log entries to return (default 50) + type: integer + severity: + description: Minimum severity filter + enum: + - WARN + - ERROR + - FATAL + - ALL + type: string + required: + - job_id + - severity + - max_entries + type: object + strict: true + type: function + - defer_loading: null + description: Search internal operations runbooks for troubleshooting procedures. + name: search_runbook + parameters: + additionalProperties: false + properties: + max_results: + description: Maximum results to return (default 5) + type: integer + query: + description: Natural language search query + type: string + required: + - query + - max_results + type: object + strict: true + type: function + - defer_loading: null + description: Execute a Python analysis script in a sandboxed environment for + log parsing or data inspection. + name: run_analysis + parameters: + additionalProperties: false + properties: + code: + description: Python code to execute + type: string + timeout_seconds: + description: Execution timeout (default 30) + type: integer + required: + - code + - timeout_seconds + type: object + strict: true + type: function + - defer_loading: null + description: Restart a failed pipeline job with optional configuration overrides. + name: restart_job + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier to restart + type: string + memory_override_gb: + description: Override memory limit in GB + type: integer + priority: + description: Queue priority + enum: + - low + - normal + - high + type: string + skip_completed_stages: + description: Skip stages that completed successfully (default true) + type: boolean + required: + - job_id + - memory_override_gb + - skip_completed_stages + - priority + type: object + strict: true + type: function + - defer_loading: null + description: Search the public web for technical documentation, blog posts, + and known issues. + name: web_search + parameters: + additionalProperties: false + properties: + query: + description: Search query + type: string + required: + - query + type: object + strict: true + type: function + top_logprobs: null + top_p: 1.0 + truncation: disabled + usage: + input_tokens: 436 + input_tokens_details: + cached_tokens: 432 + cached_tokens_per_turn: + - 432 + input_tokens_per_turn: + - 436 + output_tokens: 37 + output_tokens_details: + output_tokens_per_turn: + - 37 + reasoning_tokens: 19 + tool_output_tokens: 0 + tool_output_tokens_per_turn: + - 0 + total_tokens: 473 + user: null + headers: + content-type: application/json + status_code: 200 +- filename: t2 + request: + body: + input: + - call_id: call_b565b3670d5793e9 + output: '{"job_id": "job-382", "status": "failed", "error_type": "OutOfMemoryError", + "failed_at": "2024-01-15T03:22:11Z", "stage": "transform", "memory_used_gb": + 31.2, "memory_limit_gb": 32, "pipeline": "etl-daily-ingest", "owner": "data-platform-team"}' + type: function_call_output + model: openai/gpt-oss-20b + previous_response_id: resp_8a090f3a77b00e12 + store: true + stream: false + tool_choice: auto + tools: + - description: Query the pipeline orchestrator for a job's current state, metadata, + and failure info. + name: get_job_status + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier, e.g. job-382 + type: string + required: + - job_id + type: object + strict: true + type: function + - description: Fetch error logs and stack traces for a failed pipeline job. + name: get_error_logs + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier + type: string + max_entries: + description: Maximum log entries to return (default 50) + type: integer + severity: + description: Minimum severity filter + enum: + - WARN + - ERROR + - FATAL + - ALL + type: string + required: + - job_id + - severity + - max_entries + type: object + strict: true + type: function + - description: Search internal operations runbooks for troubleshooting procedures. + name: search_runbook + parameters: + additionalProperties: false + properties: + max_results: + description: Maximum results to return (default 5) + type: integer + query: + description: Natural language search query + type: string + required: + - query + - max_results + type: object + strict: true + type: function + - description: Execute a Python analysis script in a sandboxed environment for + log parsing or data inspection. + name: run_analysis + parameters: + additionalProperties: false + properties: + code: + description: Python code to execute + type: string + timeout_seconds: + description: Execution timeout (default 30) + type: integer + required: + - code + - timeout_seconds + type: object + strict: true + type: function + - description: Restart a failed pipeline job with optional configuration overrides. + name: restart_job + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier to restart + type: string + memory_override_gb: + description: Override memory limit in GB + type: integer + priority: + description: Queue priority + enum: + - low + - normal + - high + type: string + skip_completed_stages: + description: Skip stages that completed successfully (default true) + type: boolean + required: + - job_id + - memory_override_gb + - skip_completed_stages + - priority + type: object + strict: true + type: function + - description: Search the public web for technical documentation, blog posts, + and known issues. + name: web_search + parameters: + additionalProperties: false + properties: + query: + description: Search query + type: string + required: + - query + type: object + strict: true + type: function + headers: + accept: '*/*' + content-type: application/json + user-agent: python-httpx/0.28.1 + method: POST + path: /v1/responses + query_params: {} + response: + body: + background: false + created_at: 1782331440 + frequency_penalty: 0.0 + id: resp_a7852d7a7444baf5 + incomplete_details: null + input_messages: null + instructions: null + kv_transfer_params: null + max_output_tokens: 3520 + max_tool_calls: null + metadata: null + model: openai/gpt-oss-20b + object: response + output: + - content: + - text: The job failed due to OutOfMemoryError in transform stage. We need + to provide troubleshoot advice or offer restart. Let's provide status + and recommended next steps. + type: reasoning_text + encrypted_content: null + id: rs_b963ebd664a8f5b9 + status: null + summary: [] + type: reasoning + - content: + - annotations: [] + logprobs: null + text: "**ETL Pipeline – Job `job-382`**\n\n| Metric | Value |\n|--------|-------|\n\ + | **Status** | Failed |\n| **Pipeline** | `etl-daily-ingest` |\n| **Stage**\ + \ | `transform` |\n| **Error** | `OutOfMemoryError` |\n| **Timestamp**\ + \ | 2024‑01‑15T03:22:11Z |\n| **Memory used** | 31.2 GB |\n| **Memory\ + \ limit** | 32 GB |\n| **Owner** | `data-platform-team` |\n\n---\n\n##\ + \ Likely Cause\nThe `transform` stage exceeded the allocated 32 GB RAM,\ + \ triggering the JVM/worker process to kill the job. Possible root causes:\n\ + \n| Factor | How it impacts |\n|--------|----------------|\n| **Large\ + \ intermediate dataset** | Consumes a lot of RAM before writing to disk.\ + \ |\n| **Inefficient transformations** | e.g., `groupBy`, `collect_list`,\ + \ missing `repartition` or `broadcast`. |\n| **Missing shuffle savings**\ + \ | Repartitioning to reduce shuffles can lower per‑task memory usage.\ + \ |\n| **Insufficient worker memory** | The job was allowed only 32 GB,\ + \ but the dataset size grew beyond that. |\n\n---\n\n## Next Steps\n\n\ + 1. **Inspect the Logs** \n Pull the detailed logs to see exactly which\ + \ transform caused the spike.\n\n ```bash\n get_error_logs(job_id=\"\ + job-382\", severity=\"ERROR\", max_entries=200)\n ```\n\n2. **Check\ + \ the Spark UI** \n Look at the shuffle and storage tab for the transform\ + \ stage. \n - Any tasks spilling to disk? \n - Are there many small\ + \ partitions? \n\n3. **Tune Memory Configuration** \n *You can add\ + \ a job restart with higher memory:*\n\n ```bash\n restart_job(job_id=\"\ + job-382\", memory_override_gb=48, skip_completed_stages=true, priority=\"\ + high\")\n ```\n\n *or add a memory hint in the job definition for\ + \ subsequent runs.*\n\n4. **Optimize the Transform** \n - Use `repartition`\ + \ or `coalesce` to reduce partition count before heavy ops. \n - Offload\ + \ non‑critical columns to a temporary dataset (`select`). \n - If using\ + \ `groupBy`, check if a `join` can reduce the data size first. \n -\ + \ Cache / persist intermediate results only when necessary.\n\n5. **Run\ + \ a Dry‑Run** \n After changes, submit a small subset of data (e.g.,\ + \ 1 % or a specific date range) to confirm the memory usage stays within\ + \ limits.\n\n---\n\n### Recommended Immediate Action\n\nIf you need a\ + \ quick fix, I suggest restarting the job with 48 GB memory:\n\n```json\n\ + {\n \"job_id\": \"job-382\",\n \"memory_override_gb\": 48,\n \"skip_completed_stages\"\ + : true,\n \"priority\": \"high\"\n}\n```\n\nWould you like me to initiate\ + \ that restart or fetch the logs for deeper analysis first?" + type: output_text + id: msg_96929b0b2ac2ca7f + phase: null + role: assistant + status: completed + type: message + output_messages: null + parallel_tool_calls: true + presence_penalty: 0.0 + previous_response_id: resp_8a090f3a77b00e12 + prompt: null + reasoning: null + service_tier: auto + status: completed + temperature: 1.0 + text: null + tool_choice: auto + tools: + - defer_loading: null + description: Query the pipeline orchestrator for a job's current state, metadata, + and failure info. + name: get_job_status + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier, e.g. job-382 + type: string + required: + - job_id + type: object + strict: true + type: function + - defer_loading: null + description: Fetch error logs and stack traces for a failed pipeline job. + name: get_error_logs + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier + type: string + max_entries: + description: Maximum log entries to return (default 50) + type: integer + severity: + description: Minimum severity filter + enum: + - WARN + - ERROR + - FATAL + - ALL + type: string + required: + - job_id + - severity + - max_entries + type: object + strict: true + type: function + - defer_loading: null + description: Search internal operations runbooks for troubleshooting procedures. + name: search_runbook + parameters: + additionalProperties: false + properties: + max_results: + description: Maximum results to return (default 5) + type: integer + query: + description: Natural language search query + type: string + required: + - query + - max_results + type: object + strict: true + type: function + - defer_loading: null + description: Execute a Python analysis script in a sandboxed environment for + log parsing or data inspection. + name: run_analysis + parameters: + additionalProperties: false + properties: + code: + description: Python code to execute + type: string + timeout_seconds: + description: Execution timeout (default 30) + type: integer + required: + - code + - timeout_seconds + type: object + strict: true + type: function + - defer_loading: null + description: Restart a failed pipeline job with optional configuration overrides. + name: restart_job + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier to restart + type: string + memory_override_gb: + description: Override memory limit in GB + type: integer + priority: + description: Queue priority + enum: + - low + - normal + - high + type: string + skip_completed_stages: + description: Skip stages that completed successfully (default true) + type: boolean + required: + - job_id + - memory_override_gb + - skip_completed_stages + - priority + type: object + strict: true + type: function + - defer_loading: null + description: Search the public web for technical documentation, blog posts, + and known issues. + name: web_search + parameters: + additionalProperties: false + properties: + query: + description: Search query + type: string + required: + - query + type: object + strict: true + type: function + top_logprobs: null + top_p: 1.0 + truncation: disabled + usage: + input_tokens: 576 + input_tokens_details: + cached_tokens: 448 + cached_tokens_per_turn: + - 448 + input_tokens_per_turn: + - 576 + output_tokens: 675 + output_tokens_details: + output_tokens_per_turn: + - 675 + reasoning_tokens: 32 + tool_output_tokens: 0 + tool_output_tokens_per_turn: + - 0 + total_tokens: 1251 + user: null + headers: + content-type: application/json + status_code: 200 +- filename: t3 + request: + body: + input: Great, now search the runbook for how to fix that OOM error. Max 5 results. + model: openai/gpt-oss-20b + previous_response_id: resp_a7852d7a7444baf5 + store: true + stream: false + tool_choice: auto + tools: + - description: Query the pipeline orchestrator for a job's current state, metadata, + and failure info. + name: get_job_status + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier, e.g. job-382 + type: string + required: + - job_id + type: object + strict: true + type: function + - description: Fetch error logs and stack traces for a failed pipeline job. + name: get_error_logs + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier + type: string + max_entries: + description: Maximum log entries to return (default 50) + type: integer + severity: + description: Minimum severity filter + enum: + - WARN + - ERROR + - FATAL + - ALL + type: string + required: + - job_id + - severity + - max_entries + type: object + strict: true + type: function + - description: Search internal operations runbooks for troubleshooting procedures. + name: search_runbook + parameters: + additionalProperties: false + properties: + max_results: + description: Maximum results to return (default 5) + type: integer + query: + description: Natural language search query + type: string + required: + - query + - max_results + type: object + strict: true + type: function + - description: Execute a Python analysis script in a sandboxed environment for + log parsing or data inspection. + name: run_analysis + parameters: + additionalProperties: false + properties: + code: + description: Python code to execute + type: string + timeout_seconds: + description: Execution timeout (default 30) + type: integer + required: + - code + - timeout_seconds + type: object + strict: true + type: function + - description: Restart a failed pipeline job with optional configuration overrides. + name: restart_job + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier to restart + type: string + memory_override_gb: + description: Override memory limit in GB + type: integer + priority: + description: Queue priority + enum: + - low + - normal + - high + type: string + skip_completed_stages: + description: Skip stages that completed successfully (default true) + type: boolean + required: + - job_id + - memory_override_gb + - skip_completed_stages + - priority + type: object + strict: true + type: function + - description: Search the public web for technical documentation, blog posts, + and known issues. + name: web_search + parameters: + additionalProperties: false + properties: + query: + description: Search query + type: string + required: + - query + type: object + strict: true + type: function + headers: + accept: '*/*' + content-type: application/json + user-agent: python-httpx/0.28.1 + method: POST + path: /v1/responses + query_params: {} + response: + body: + background: false + created_at: 1782331445 + frequency_penalty: 0.0 + id: resp_a89e4ab94e1181a6 + incomplete_details: null + input_messages: null + instructions: null + kv_transfer_params: null + max_output_tokens: 2870 + max_tool_calls: null + metadata: null + model: openai/gpt-oss-20b + object: response + output: + - arguments: '{"query":"ETL OutOfMemoryError transform stage runbook steps","max_results":5}' + call_id: call_88528befdb8e146c + id: fc_88528befdb8e146c + name: search_runbook + namespace: null + status: null + type: function_call + output_messages: null + parallel_tool_calls: true + presence_penalty: 0.0 + previous_response_id: resp_a7852d7a7444baf5 + prompt: null + reasoning: null + service_tier: auto + status: completed + temperature: 1.0 + text: null + tool_choice: auto + tools: + - defer_loading: null + description: Query the pipeline orchestrator for a job's current state, metadata, + and failure info. + name: get_job_status + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier, e.g. job-382 + type: string + required: + - job_id + type: object + strict: true + type: function + - defer_loading: null + description: Fetch error logs and stack traces for a failed pipeline job. + name: get_error_logs + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier + type: string + max_entries: + description: Maximum log entries to return (default 50) + type: integer + severity: + description: Minimum severity filter + enum: + - WARN + - ERROR + - FATAL + - ALL + type: string + required: + - job_id + - severity + - max_entries + type: object + strict: true + type: function + - defer_loading: null + description: Search internal operations runbooks for troubleshooting procedures. + name: search_runbook + parameters: + additionalProperties: false + properties: + max_results: + description: Maximum results to return (default 5) + type: integer + query: + description: Natural language search query + type: string + required: + - query + - max_results + type: object + strict: true + type: function + - defer_loading: null + description: Execute a Python analysis script in a sandboxed environment for + log parsing or data inspection. + name: run_analysis + parameters: + additionalProperties: false + properties: + code: + description: Python code to execute + type: string + timeout_seconds: + description: Execution timeout (default 30) + type: integer + required: + - code + - timeout_seconds + type: object + strict: true + type: function + - defer_loading: null + description: Restart a failed pipeline job with optional configuration overrides. + name: restart_job + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier to restart + type: string + memory_override_gb: + description: Override memory limit in GB + type: integer + priority: + description: Queue priority + enum: + - low + - normal + - high + type: string + skip_completed_stages: + description: Skip stages that completed successfully (default true) + type: boolean + required: + - job_id + - memory_override_gb + - skip_completed_stages + - priority + type: object + strict: true + type: function + - defer_loading: null + description: Search the public web for technical documentation, blog posts, + and known issues. + name: web_search + parameters: + additionalProperties: false + properties: + query: + description: Search query + type: string + required: + - query + type: object + strict: true + type: function + top_logprobs: null + top_p: 1.0 + truncation: disabled + usage: + input_tokens: 1226 + input_tokens_details: + cached_tokens: 432 + cached_tokens_per_turn: + - 432 + input_tokens_per_turn: + - 1226 + output_tokens: 34 + output_tokens_details: + output_tokens_per_turn: + - 34 + reasoning_tokens: 21 + tool_output_tokens: 0 + tool_output_tokens_per_turn: + - 0 + total_tokens: 1260 + user: null + headers: + content-type: application/json + status_code: 200 From cb86e14e07446729e8cf2fb5d360b8f521a380af Mon Sep 17 00:00:00 2001 From: Ashwin Giridharan Date: Wed, 24 Jun 2026 13:26:10 -0700 Subject: [PATCH 3/5] docs: update record.sh with all 12 cassette recording commands Add missing vLLM recordings (parallel, tool-output-only) and all 6 OpenAI recordings. Support ./record.sh [vllm|openai|all] to select backend. All commands now include --tool-outputs for realistic flow. Signed-off-by: Ashwin Giridharan --- .../cassettes/tool_calls/multi_turn/record.sh | 184 ++++++++++++++++-- 1 file changed, 172 insertions(+), 12 deletions(-) diff --git a/crates/agentic-core/tests/cassettes/tool_calls/multi_turn/record.sh b/crates/agentic-core/tests/cassettes/tool_calls/multi_turn/record.sh index ed004db..e17ddeb 100755 --- a/crates/agentic-core/tests/cassettes/tool_calls/multi_turn/record.sh +++ b/crates/agentic-core/tests/cassettes/tool_calls/multi_turn/record.sh @@ -1,26 +1,49 @@ #!/bin/bash # Records stateful multi-turn tool-call cassettes using record_cassette.py -# Backend: vLLM gpt-oss-20b with VLLM_ENABLE_RESPONSES_API_STORE=1 +# # Scenario: SRE debugging failed ETL pipeline job-382 +# Tools: get_job_status, get_error_logs, search_runbook, run_analysis, +# restart_job, web_search # # IMPORTANT: These cassettes prove context retention via ambiguous prompts. # Turns 2+ use pronouns ("that job", "it", "those errors") that can ONLY resolve # correctly if previous_response_id preserves server-side conversation state. # -# Prerequisites: +# Prerequisites (vLLM): # - SSH tunnel to G6e instance: ssh -L 8100:localhost:8100 ubuntu@ # - gpt-oss container running with VLLM_ENABLE_RESPONSES_API_STORE=1 -# - Tools file at /tmp/pipeline_tools.json (6 tools: get_job_status, -# get_error_logs, search_runbook, run_analysis, restart_job, web_search) +# +# Prerequisites (OpenAI): +# - OPENAI_API_KEY env var set (or ~/.openai_api_key file) +# +# Usage: +# ./record.sh # Record all (vLLM + OpenAI) +# ./record.sh vllm # Record vLLM only +# ./record.sh openai # Record OpenAI only set -euo pipefail RECORDER="$(dirname "$0")/../../cassettes/record_cassette.py" -TOOLS="/tmp/pipeline_tools.json" +TOOLS="$(dirname "$0")/pipeline_tools.json" +TOOL_OUTPUTS="$(dirname "$0")/pipeline_tool_outputs.json" OUTPUT_DIR="$(dirname "$0")" VLLM_URL="http://localhost:8100" -MODEL="openai/gpt-oss-20b" +VLLM_MODEL="openai/gpt-oss-20b" +OPENAI_MODEL="gpt-4o" + +TARGET="${1:-all}" + +# ═══════════════════════════════════════════════════════════════════ +# vLLM cassettes (gpt-oss-20b) +# ═══════════════════════════════════════════════════════════════════ + +if [[ "$TARGET" == "all" || "$TARGET" == "vllm" ]]; then +echo "══════════════════════════════════════════════════════════════" +echo " Recording vLLM cassettes (gpt-oss-20b)" +echo "══════════════════════════════════════════════════════════════" + +echo "" echo "=== 3-turn non-streaming (context retention: 'that job' resolves to job-382) ===" printf '%s\n' \ "You are an SRE assistant. Check the current status of ETL pipeline job-382." \ @@ -28,8 +51,9 @@ printf '%s\n' \ "Based on those errors, search the runbook for troubleshooting procedures. Max 5 results." \ | python3 "$RECORDER" \ --turns 3 --mode responses --no-stream \ - --model "$MODEL" --vllm "$VLLM_URL" \ + --model "$VLLM_MODEL" --vllm "$VLLM_URL" \ --tools "$TOOLS" --tool-choice auto \ + --tool-outputs "$TOOL_OUTPUTS" \ --output "$OUTPUT_DIR/responses_tool_calls_3turn.yaml" echo "" @@ -42,8 +66,9 @@ printf '%s\n' \ "Great. Now restart it with 64 GB memory, skip completed stages, and high priority." \ | python3 "$RECORDER" \ --turns 5 --mode responses --no-stream \ - --model "$MODEL" --vllm "$VLLM_URL" \ + --model "$VLLM_MODEL" --vllm "$VLLM_URL" \ --tools "$TOOLS" --tool-choice auto \ + --tool-outputs "$TOOL_OUTPUTS" \ --output "$OUTPUT_DIR/responses_tool_calls_5turn.yaml" echo "" @@ -54,8 +79,9 @@ printf '%s\n' \ "Search the web for how to fix that type of error in Spark pipelines." \ | python3 "$RECORDER" \ --turns 3 --mode responses --stream \ - --model "$MODEL" --vllm "$VLLM_URL" \ + --model "$VLLM_MODEL" --vllm "$VLLM_URL" \ --tools "$TOOLS" --tool-choice auto \ + --tool-outputs "$TOOL_OUTPUTS" \ --output "$OUTPUT_DIR/responses_tool_calls_3turn_streaming.yaml" echo "" @@ -66,11 +92,145 @@ printf '%s\n' \ "Instead of investigating errors, search the runbook for how to increase memory limits for ETL jobs. Max 3 results." \ | python3 "$RECORDER" \ --turns 3 --mode responses --no-stream \ - --model "$MODEL" --vllm "$VLLM_URL" \ + --model "$VLLM_MODEL" --vllm "$VLLM_URL" \ --tools "$TOOLS" --tool-choice auto \ + --tool-outputs "$TOOL_OUTPUTS" \ --branch-from 1 --branch-turn-number 3 \ --output "$OUTPUT_DIR/responses_tool_calls_branch.yaml" echo "" -echo "=== All cassettes recorded ===" -ls -la "$OUTPUT_DIR"/*.yaml +echo "=== 3-turn parallel (attempts 2 tools in one turn) ===" +printf '%s\n' \ + "You are an SRE assistant. Do TWO things in parallel: 1) check the status of job-382 AND 2) search the web for Spark OOM fixes. Call BOTH tools now." \ + "Based on those results, search the runbook for memory override procedures. Max 3 results." \ + "Now restart that job with 64GB memory and high priority." \ +| python3 "$RECORDER" \ + --turns 3 --mode responses --no-stream \ + --model "$VLLM_MODEL" --vllm "$VLLM_URL" \ + --tools "$TOOLS" --tool-choice auto \ + --tool-outputs "$TOOL_OUTPUTS" \ + --output "$OUTPUT_DIR/responses_tool_calls_parallel.yaml" + +echo "" +echo "=== 3-turn tool-output-only (turn 2 has no user message, just tool output) ===" +printf '%s\n' \ + "You are an SRE assistant. Check the current status of ETL pipeline job-382." \ + "" \ + "Based on what you found, search the runbook for how to fix it. Max 5 results." \ +| python3 "$RECORDER" \ + --turns 3 --mode responses --no-stream \ + --model "$VLLM_MODEL" --vllm "$VLLM_URL" \ + --tools "$TOOLS" --tool-choice auto \ + --tool-outputs "$TOOL_OUTPUTS" \ + --tool-output-only-turn 2 \ + --output "$OUTPUT_DIR/responses_tool_calls_tool_output_only.yaml" + +echo "" +echo "=== vLLM cassettes done ===" +ls -la "$OUTPUT_DIR"/responses_*.yaml + +fi + +# ═══════════════════════════════════════════════════════════════════ +# OpenAI cassettes (gpt-4o) +# ═══════════════════════════════════════════════════════════════════ + +if [[ "$TARGET" == "all" || "$TARGET" == "openai" ]]; then + +echo "" +echo "══════════════════════════════════════════════════════════════" +echo " Recording OpenAI cassettes (gpt-4o)" +echo "══════════════════════════════════════════════════════════════" + +echo "" +echo "=== 3-turn non-streaming (context retention: 'that job' resolves to job-382) ===" +printf '%s\n' \ + "You are an SRE assistant. Check the current status of ETL pipeline job-382." \ + "Now pull the error logs for that job. Use severity ERROR and max 10 entries." \ + "Based on those errors, search the runbook for troubleshooting procedures. Max 5 results." \ +| python3 "$RECORDER" \ + --turns 3 --mode responses --no-stream \ + --model "$OPENAI_MODEL" \ + --tools "$TOOLS" --tool-choice auto \ + --tool-outputs "$TOOL_OUTPUTS" \ + --output "$OUTPUT_DIR/openai_responses_tool_calls_3turn.yaml" + +echo "" +echo "=== 5-turn non-streaming (context retention: 'restart it' resolves to job-382) ===" +printf '%s\n' \ + "You are an SRE assistant. ETL pipeline job-382 failed overnight. What is its current status?" \ + "Pull the error logs for that failed job. Use severity ERROR and max 20 entries." \ + "Search the runbook for how to fix the issue found in those logs. Max 5 results." \ + "Run this analysis code to summarize: import json; print(json.dumps({'job': 'job-382', 'error': 'OOM', 'stage': 'transform', 'recommendation': 'increase memory to 64GB'}))" \ + "Great. Now restart it with 64 GB memory, skip completed stages, and high priority." \ +| python3 "$RECORDER" \ + --turns 5 --mode responses --no-stream \ + --model "$OPENAI_MODEL" \ + --tools "$TOOLS" --tool-choice auto \ + --tool-outputs "$TOOL_OUTPUTS" \ + --output "$OUTPUT_DIR/openai_responses_tool_calls_5turn.yaml" + +echo "" +echo "=== 3-turn streaming (context retention in SSE mode) ===" +printf '%s\n' \ + "You are an SRE assistant. Check the status of pipeline job-382." \ + "Get the error logs for that job with severity FATAL and max 5 entries." \ + "Search the web for how to fix that type of error in Spark pipelines." \ +| python3 "$RECORDER" \ + --turns 3 --mode responses --stream \ + --model "$OPENAI_MODEL" \ + --tools "$TOOLS" --tool-choice auto \ + --tool-outputs "$TOOL_OUTPUTS" \ + --output "$OUTPUT_DIR/openai_responses_tool_calls_3turn_streaming.yaml" + +echo "" +echo "=== 3-turn branch (turn 3 diverges from turn 1) ===" +printf '%s\n' \ + "You are an SRE assistant. Check the current status of ETL pipeline job-382." \ + "Get the error logs for that job with severity ERROR and max 10 entries." \ + "Instead of investigating errors, search the runbook for how to increase memory limits for ETL jobs. Max 3 results." \ +| python3 "$RECORDER" \ + --turns 3 --mode responses --no-stream \ + --model "$OPENAI_MODEL" \ + --tools "$TOOLS" --tool-choice auto \ + --tool-outputs "$TOOL_OUTPUTS" \ + --branch-from 1 --branch-turn-number 3 \ + --output "$OUTPUT_DIR/openai_responses_tool_calls_branch.yaml" + +echo "" +echo "=== 3-turn parallel (2 tools in one turn — gpt-4o reliably does this) ===" +printf '%s\n' \ + "You are an SRE assistant. Do TWO things in parallel: 1) check the status of job-382 AND 2) search the web for Spark OOM fixes. Call BOTH tools now." \ + "Based on those results, search the runbook for memory override procedures. Max 3 results." \ + "Now restart that job with 64GB memory and high priority." \ +| python3 "$RECORDER" \ + --turns 3 --mode responses --no-stream \ + --model "$OPENAI_MODEL" \ + --tools "$TOOLS" --tool-choice auto \ + --tool-outputs "$TOOL_OUTPUTS" \ + --output "$OUTPUT_DIR/openai_responses_tool_calls_parallel.yaml" + +echo "" +echo "=== 3-turn tool-output-only (turn 2 has no user message) ===" +printf '%s\n' \ + "You are an SRE assistant. Check the current status of ETL pipeline job-382." \ + "" \ + "Based on what you found, search the runbook for how to fix it. Max 5 results." \ +| python3 "$RECORDER" \ + --turns 3 --mode responses --no-stream \ + --model "$OPENAI_MODEL" \ + --tools "$TOOLS" --tool-choice auto \ + --tool-outputs "$TOOL_OUTPUTS" \ + --tool-output-only-turn 2 \ + --output "$OUTPUT_DIR/openai_responses_tool_calls_tool_output_only.yaml" + +echo "" +echo "=== OpenAI cassettes done ===" +ls -la "$OUTPUT_DIR"/openai_*.yaml + +fi + +echo "" +echo "══════════════════════════════════════════════════════════════" +echo " All done. 12 cassettes total." +echo "══════════════════════════════════════════════════════════════" From 7f4d2cbc2a41bd8098627e8364e789afa283b1a4 Mon Sep 17 00:00:00 2001 From: Ashwin Giridharan Date: Wed, 24 Jun 2026 13:33:39 -0700 Subject: [PATCH 4/5] test: re-record all 12 cassettes from record.sh for determinism Re-recorded all vLLM and OpenAI cassettes using the updated record.sh script to ensure reproducibility. Fixed record.sh path resolution and removed nonexistent --tool-output-only-turn flag (empty stdin line already achieves this). Final summary now shows actual file count. Signed-off-by: Ashwin Giridharan --- .../openai_responses_tool_calls_3turn.yaml | 44 +- ..._responses_tool_calls_3turn_streaming.yaml | 141 ++--- .../openai_responses_tool_calls_5turn.yaml | 88 +-- .../openai_responses_tool_calls_branch.yaml | 42 +- .../openai_responses_tool_calls_parallel.yaml | 78 ++- ...responses_tool_calls_tool_output_only.yaml | 74 +-- .../cassettes/tool_calls/multi_turn/record.sh | 9 +- .../responses_tool_calls_3turn.yaml | 77 +-- .../responses_tool_calls_3turn_streaming.yaml | 568 ++++++++++++++---- .../responses_tool_calls_5turn.yaml | 148 ++--- .../responses_tool_calls_branch.yaml | 90 +-- .../responses_tool_calls_parallel.yaml | 97 ++- ...responses_tool_calls_tool_output_only.yaml | 139 ++--- 13 files changed, 938 insertions(+), 657 deletions(-) diff --git a/crates/agentic-core/tests/cassettes/tool_calls/multi_turn/openai_responses_tool_calls_3turn.yaml b/crates/agentic-core/tests/cassettes/tool_calls/multi_turn/openai_responses_tool_calls_3turn.yaml index ae27810..936d837 100644 --- a/crates/agentic-core/tests/cassettes/tool_calls/multi_turn/openai_responses_tool_calls_3turn.yaml +++ b/crates/agentic-core/tests/cassettes/tool_calls/multi_turn/openai_responses_tool_calls_3turn.yaml @@ -139,11 +139,11 @@ turns: background: false billing: payer: developer - completed_at: 1782331467 - created_at: 1782331466 + completed_at: 1782332973 + created_at: 1782332972 error: null frequency_penalty: 0.0 - id: resp_049b5e7584cd0867006a3c384ab9a88199a7bb53c011a2c1fa + id: resp_053fd4e04cde0a13006a3c3e2cb15481999f99b43d8b7ba0eb incomplete_details: null instructions: null max_output_tokens: null @@ -154,8 +154,8 @@ turns: object: response output: - arguments: '{"job_id":"job-382"}' - call_id: call_WHuX19CANJfWBjbT8lqdEiya - id: fc_049b5e7584cd0867006a3c384bc20481999a4ed6ff6b79fac6 + call_id: call_3X1PVcQ6ehCZzHs06wCQYUKS + id: fc_053fd4e04cde0a13006a3c3e2d915c8199bbe2658507aca5a3 name: get_job_status status: completed type: function_call @@ -316,7 +316,7 @@ turns: request: body: input: - - call_id: call_WHuX19CANJfWBjbT8lqdEiya + - call_id: call_3X1PVcQ6ehCZzHs06wCQYUKS output: '{"job_id": "job-382", "status": "failed", "error_type": "OutOfMemoryError", "failed_at": "2024-01-15T03:22:11Z", "stage": "transform", "memory_used_gb": 31.2, "memory_limit_gb": 32, "pipeline": "etl-daily-ingest", "owner": "data-platform-team"}' @@ -326,7 +326,7 @@ turns: role: user type: message model: gpt-4o - previous_response_id: resp_049b5e7584cd0867006a3c384ab9a88199a7bb53c011a2c1fa + previous_response_id: resp_053fd4e04cde0a13006a3c3e2cb15481999f99b43d8b7ba0eb store: true stream: false tool_choice: auto @@ -462,11 +462,11 @@ turns: background: false billing: payer: developer - completed_at: 1782331468 - created_at: 1782331468 + completed_at: 1782332974 + created_at: 1782332974 error: null frequency_penalty: 0.0 - id: resp_049b5e7584cd0867006a3c384c03748199949f46ad0cd9b7f2 + id: resp_053fd4e04cde0a13006a3c3e2e09fc8199b946b967b1807204 incomplete_details: null instructions: null max_output_tokens: null @@ -477,14 +477,14 @@ turns: object: response output: - arguments: '{"job_id":"job-382","severity":"ERROR","max_entries":10}' - call_id: call_43JRhsN6hGDMwwMx06hK3SWY - id: fc_049b5e7584cd0867006a3c384caea881999eba9bcd4ec6ee9b + call_id: call_fOWZK3bqWYpKzLkZZ43j8CkC + id: fc_053fd4e04cde0a13006a3c3e2ea0148199bca9b82969b56771 name: get_error_logs status: completed type: function_call parallel_tool_calls: true presence_penalty: 0.0 - previous_response_id: resp_049b5e7584cd0867006a3c384ab9a88199a7bb53c011a2c1fa + previous_response_id: resp_053fd4e04cde0a13006a3c3e2cb15481999f99b43d8b7ba0eb prompt_cache_key: null prompt_cache_retention: in_memory reasoning: @@ -639,7 +639,7 @@ turns: request: body: input: - - call_id: call_43JRhsN6hGDMwwMx06hK3SWY + - call_id: call_fOWZK3bqWYpKzLkZZ43j8CkC output: '[{"timestamp": "2024-01-15T03:22:09Z", "severity": "ERROR", "message": "OutOfMemoryError at stage ''transform'': heap space exhausted (31.2GB/32GB)", "stack_trace": "java.lang.OutOfMemoryError: Java heap space\n at org.apache.spark.memory.TaskMemoryManager.allocatePage"}, @@ -655,7 +655,7 @@ turns: role: user type: message model: gpt-4o - previous_response_id: resp_049b5e7584cd0867006a3c384c03748199949f46ad0cd9b7f2 + previous_response_id: resp_053fd4e04cde0a13006a3c3e2e09fc8199b946b967b1807204 store: true stream: false tool_choice: auto @@ -791,11 +791,11 @@ turns: background: false billing: payer: developer - completed_at: 1782331470 - created_at: 1782331469 + completed_at: 1782332976 + created_at: 1782332975 error: null frequency_penalty: 0.0 - id: resp_049b5e7584cd0867006a3c384d13d48199a2782d3f3d47efe8 + id: resp_053fd4e04cde0a13006a3c3e2f21b481999e17dd868d5c3d17 incomplete_details: null instructions: null max_output_tokens: null @@ -805,15 +805,15 @@ turns: moderation: null object: response output: - - arguments: '{"query":"OutOfMemoryError in ETL transform stage","max_results":5}' - call_id: call_tSIYLWKg6qeX4YLQC9jBLctZ - id: fc_049b5e7584cd0867006a3c384e2488819995cd62e88ea95a97 + - arguments: '{"query":"OutOfMemoryError in Spark ETL pipeline","max_results":5}' + call_id: call_YDBEHTdaQUL2SLAoITTufq5z + id: fc_053fd4e04cde0a13006a3c3e3070e8819985588ede678d1791 name: search_runbook status: completed type: function_call parallel_tool_calls: true presence_penalty: 0.0 - previous_response_id: resp_049b5e7584cd0867006a3c384c03748199949f46ad0cd9b7f2 + previous_response_id: resp_053fd4e04cde0a13006a3c3e2e09fc8199b946b967b1807204 prompt_cache_key: null prompt_cache_retention: in_memory reasoning: diff --git a/crates/agentic-core/tests/cassettes/tool_calls/multi_turn/openai_responses_tool_calls_3turn_streaming.yaml b/crates/agentic-core/tests/cassettes/tool_calls/multi_turn/openai_responses_tool_calls_3turn_streaming.yaml index cb4220e..f501914 100644 --- a/crates/agentic-core/tests/cassettes/tool_calls/multi_turn/openai_responses_tool_calls_3turn_streaming.yaml +++ b/crates/agentic-core/tests/cassettes/tool_calls/multi_turn/openai_responses_tool_calls_3turn_streaming.yaml @@ -141,7 +141,7 @@ turns: - 'event: response.created ' - - 'data: {"type":"response.created","response":{"id":"resp_0fc8e0e2ea46fc95006a3c3867c1d0819a8d8faf02ad45a9f1","object":"response","created_at":1782331495,"status":"in_progress","background":false,"completed_at":null,"error":null,"frequency_penalty":0.0,"incomplete_details":null,"instructions":null,"max_output_tokens":null,"max_tool_calls":null,"model":"gpt-4o-2024-08-06","moderation":null,"output":[],"parallel_tool_calls":true,"presence_penalty":0.0,"previous_response_id":null,"prompt_cache_key":null,"prompt_cache_retention":"in_memory","reasoning":{"context":null,"effort":null,"summary":null},"safety_identifier":null,"service_tier":"auto","store":true,"temperature":1.0,"text":{"format":{"type":"text"},"verbosity":"medium"},"tool_choice":"auto","tools":[{"type":"function","description":"Query + - 'data: {"type":"response.created","response":{"id":"resp_0e42cef53189ada4006a3c3e409e88819bae4c63a2de75dad7","object":"response","created_at":1782332992,"status":"in_progress","background":false,"completed_at":null,"error":null,"frequency_penalty":0.0,"incomplete_details":null,"instructions":null,"max_output_tokens":null,"max_tool_calls":null,"model":"gpt-4o-2024-08-06","moderation":null,"output":[],"parallel_tool_calls":true,"presence_penalty":0.0,"previous_response_id":null,"prompt_cache_key":null,"prompt_cache_retention":"in_memory","reasoning":{"context":null,"effort":null,"summary":null},"safety_identifier":null,"service_tier":"auto","store":true,"temperature":1.0,"text":{"format":{"type":"text"},"verbosity":"medium"},"tool_choice":"auto","tools":[{"type":"function","description":"Query the pipeline orchestrator for a job''s current state, metadata, and failure info.","name":"get_job_status","parameters":{"type":"object","properties":{"job_id":{"type":"string","description":"Pipeline job identifier, e.g. job-382"}},"required":["job_id"],"additionalProperties":false},"strict":true},{"type":"function","description":"Fetch @@ -171,7 +171,7 @@ turns: - 'event: response.in_progress ' - - 'data: {"type":"response.in_progress","response":{"id":"resp_0fc8e0e2ea46fc95006a3c3867c1d0819a8d8faf02ad45a9f1","object":"response","created_at":1782331495,"status":"in_progress","background":false,"completed_at":null,"error":null,"frequency_penalty":0.0,"incomplete_details":null,"instructions":null,"max_output_tokens":null,"max_tool_calls":null,"model":"gpt-4o-2024-08-06","moderation":null,"output":[],"parallel_tool_calls":true,"presence_penalty":0.0,"previous_response_id":null,"prompt_cache_key":null,"prompt_cache_retention":"in_memory","reasoning":{"context":null,"effort":null,"summary":null},"safety_identifier":null,"service_tier":"auto","store":true,"temperature":1.0,"text":{"format":{"type":"text"},"verbosity":"medium"},"tool_choice":"auto","tools":[{"type":"function","description":"Query + - 'data: {"type":"response.in_progress","response":{"id":"resp_0e42cef53189ada4006a3c3e409e88819bae4c63a2de75dad7","object":"response","created_at":1782332992,"status":"in_progress","background":false,"completed_at":null,"error":null,"frequency_penalty":0.0,"incomplete_details":null,"instructions":null,"max_output_tokens":null,"max_tool_calls":null,"model":"gpt-4o-2024-08-06","moderation":null,"output":[],"parallel_tool_calls":true,"presence_penalty":0.0,"previous_response_id":null,"prompt_cache_key":null,"prompt_cache_retention":"in_memory","reasoning":{"context":null,"effort":null,"summary":null},"safety_identifier":null,"service_tier":"auto","store":true,"temperature":1.0,"text":{"format":{"type":"text"},"verbosity":"medium"},"tool_choice":"auto","tools":[{"type":"function","description":"Query the pipeline orchestrator for a job''s current state, metadata, and failure info.","name":"get_job_status","parameters":{"type":"object","properties":{"job_id":{"type":"string","description":"Pipeline job identifier, e.g. job-382"}},"required":["job_id"],"additionalProperties":false},"strict":true},{"type":"function","description":"Fetch @@ -201,7 +201,7 @@ turns: - 'event: response.output_item.added ' - - 'data: {"type":"response.output_item.added","item":{"id":"fc_0fc8e0e2ea46fc95006a3c386915f8819abdbaed3d24918512","type":"function_call","status":"in_progress","arguments":"","call_id":"call_oGw30yoywjhkYA1B6t8FesEm","name":"get_job_status"},"output_index":0,"sequence_number":2} + - 'data: {"type":"response.output_item.added","item":{"id":"fc_0e42cef53189ada4006a3c3e413cf0819baecedc47e8a4b589","type":"function_call","status":"in_progress","arguments":"","call_id":"call_m0qbDty5eurPBYlBk4SYj56W","name":"get_job_status"},"output_index":0,"sequence_number":2} ' - ' @@ -210,7 +210,7 @@ turns: - 'event: response.function_call_arguments.delta ' - - 'data: {"type":"response.function_call_arguments.delta","delta":"{\"","item_id":"fc_0fc8e0e2ea46fc95006a3c386915f8819abdbaed3d24918512","obfuscation":"Qdc2CisBnggcQe","output_index":0,"sequence_number":3} + - 'data: {"type":"response.function_call_arguments.delta","delta":"{\"","item_id":"fc_0e42cef53189ada4006a3c3e413cf0819baecedc47e8a4b589","obfuscation":"WODcq0wkHt5FMF","output_index":0,"sequence_number":3} ' - ' @@ -219,7 +219,7 @@ turns: - 'event: response.function_call_arguments.delta ' - - 'data: {"type":"response.function_call_arguments.delta","delta":"job","item_id":"fc_0fc8e0e2ea46fc95006a3c386915f8819abdbaed3d24918512","obfuscation":"iOdjIZMB0OV6i","output_index":0,"sequence_number":4} + - 'data: {"type":"response.function_call_arguments.delta","delta":"job","item_id":"fc_0e42cef53189ada4006a3c3e413cf0819baecedc47e8a4b589","obfuscation":"Jskf7BfO75Zhk","output_index":0,"sequence_number":4} ' - ' @@ -228,7 +228,7 @@ turns: - 'event: response.function_call_arguments.delta ' - - 'data: {"type":"response.function_call_arguments.delta","delta":"_id","item_id":"fc_0fc8e0e2ea46fc95006a3c386915f8819abdbaed3d24918512","obfuscation":"lrEcFJPqAFPgc","output_index":0,"sequence_number":5} + - 'data: {"type":"response.function_call_arguments.delta","delta":"_id","item_id":"fc_0e42cef53189ada4006a3c3e413cf0819baecedc47e8a4b589","obfuscation":"llConHT5qKDTS","output_index":0,"sequence_number":5} ' - ' @@ -237,7 +237,7 @@ turns: - 'event: response.function_call_arguments.delta ' - - 'data: {"type":"response.function_call_arguments.delta","delta":"\":\"","item_id":"fc_0fc8e0e2ea46fc95006a3c386915f8819abdbaed3d24918512","obfuscation":"xgi6zjbPoF8B8","output_index":0,"sequence_number":6} + - 'data: {"type":"response.function_call_arguments.delta","delta":"\":\"","item_id":"fc_0e42cef53189ada4006a3c3e413cf0819baecedc47e8a4b589","obfuscation":"dCtjoUPsYOrEJ","output_index":0,"sequence_number":6} ' - ' @@ -246,7 +246,7 @@ turns: - 'event: response.function_call_arguments.delta ' - - 'data: {"type":"response.function_call_arguments.delta","delta":"job","item_id":"fc_0fc8e0e2ea46fc95006a3c386915f8819abdbaed3d24918512","obfuscation":"wSK1VaNG3cTqV","output_index":0,"sequence_number":7} + - 'data: {"type":"response.function_call_arguments.delta","delta":"job","item_id":"fc_0e42cef53189ada4006a3c3e413cf0819baecedc47e8a4b589","obfuscation":"FKScgiPaaGmzx","output_index":0,"sequence_number":7} ' - ' @@ -255,7 +255,7 @@ turns: - 'event: response.function_call_arguments.delta ' - - 'data: {"type":"response.function_call_arguments.delta","delta":"-","item_id":"fc_0fc8e0e2ea46fc95006a3c386915f8819abdbaed3d24918512","obfuscation":"4Aj06mCLUT4Azme","output_index":0,"sequence_number":8} + - 'data: {"type":"response.function_call_arguments.delta","delta":"-","item_id":"fc_0e42cef53189ada4006a3c3e413cf0819baecedc47e8a4b589","obfuscation":"JNBPd8KZl6rYnqi","output_index":0,"sequence_number":8} ' - ' @@ -264,7 +264,7 @@ turns: - 'event: response.function_call_arguments.delta ' - - 'data: {"type":"response.function_call_arguments.delta","delta":"382","item_id":"fc_0fc8e0e2ea46fc95006a3c386915f8819abdbaed3d24918512","obfuscation":"I567UCFBL57QD","output_index":0,"sequence_number":9} + - 'data: {"type":"response.function_call_arguments.delta","delta":"382","item_id":"fc_0e42cef53189ada4006a3c3e413cf0819baecedc47e8a4b589","obfuscation":"WtumUsbY9K08L","output_index":0,"sequence_number":9} ' - ' @@ -273,7 +273,7 @@ turns: - 'event: response.function_call_arguments.delta ' - - 'data: {"type":"response.function_call_arguments.delta","delta":"\"}","item_id":"fc_0fc8e0e2ea46fc95006a3c386915f8819abdbaed3d24918512","obfuscation":"GDdy5wrzzwwyCu","output_index":0,"sequence_number":10} + - 'data: {"type":"response.function_call_arguments.delta","delta":"\"}","item_id":"fc_0e42cef53189ada4006a3c3e413cf0819baecedc47e8a4b589","obfuscation":"7A1QatmnZVJGk8","output_index":0,"sequence_number":10} ' - ' @@ -282,7 +282,7 @@ turns: - 'event: response.function_call_arguments.done ' - - 'data: {"type":"response.function_call_arguments.done","arguments":"{\"job_id\":\"job-382\"}","item_id":"fc_0fc8e0e2ea46fc95006a3c386915f8819abdbaed3d24918512","output_index":0,"sequence_number":11} + - 'data: {"type":"response.function_call_arguments.done","arguments":"{\"job_id\":\"job-382\"}","item_id":"fc_0e42cef53189ada4006a3c3e413cf0819baecedc47e8a4b589","output_index":0,"sequence_number":11} ' - ' @@ -291,7 +291,7 @@ turns: - 'event: response.output_item.done ' - - 'data: {"type":"response.output_item.done","item":{"id":"fc_0fc8e0e2ea46fc95006a3c386915f8819abdbaed3d24918512","type":"function_call","status":"completed","arguments":"{\"job_id\":\"job-382\"}","call_id":"call_oGw30yoywjhkYA1B6t8FesEm","name":"get_job_status"},"output_index":0,"sequence_number":12} + - 'data: {"type":"response.output_item.done","item":{"id":"fc_0e42cef53189ada4006a3c3e413cf0819baecedc47e8a4b589","type":"function_call","status":"completed","arguments":"{\"job_id\":\"job-382\"}","call_id":"call_m0qbDty5eurPBYlBk4SYj56W","name":"get_job_status"},"output_index":0,"sequence_number":12} ' - ' @@ -300,7 +300,7 @@ turns: - 'event: response.completed ' - - 'data: {"type":"response.completed","response":{"id":"resp_0fc8e0e2ea46fc95006a3c3867c1d0819a8d8faf02ad45a9f1","object":"response","created_at":1782331495,"status":"completed","background":false,"completed_at":1782331497,"error":null,"frequency_penalty":0.0,"incomplete_details":null,"instructions":null,"max_output_tokens":null,"max_tool_calls":null,"model":"gpt-4o-2024-08-06","moderation":null,"output":[{"id":"fc_0fc8e0e2ea46fc95006a3c386915f8819abdbaed3d24918512","type":"function_call","status":"completed","arguments":"{\"job_id\":\"job-382\"}","call_id":"call_oGw30yoywjhkYA1B6t8FesEm","name":"get_job_status"}],"parallel_tool_calls":true,"presence_penalty":0.0,"previous_response_id":null,"prompt_cache_key":null,"prompt_cache_retention":"in_memory","reasoning":{"context":null,"effort":null,"summary":null},"safety_identifier":null,"service_tier":"default","store":true,"temperature":1.0,"text":{"format":{"type":"text"},"verbosity":"medium"},"tool_choice":"auto","tools":[{"type":"function","description":"Query + - 'data: {"type":"response.completed","response":{"id":"resp_0e42cef53189ada4006a3c3e409e88819bae4c63a2de75dad7","object":"response","created_at":1782332992,"status":"completed","background":false,"completed_at":1782332993,"error":null,"frequency_penalty":0.0,"incomplete_details":null,"instructions":null,"max_output_tokens":null,"max_tool_calls":null,"model":"gpt-4o-2024-08-06","moderation":null,"output":[{"id":"fc_0e42cef53189ada4006a3c3e413cf0819baecedc47e8a4b589","type":"function_call","status":"completed","arguments":"{\"job_id\":\"job-382\"}","call_id":"call_m0qbDty5eurPBYlBk4SYj56W","name":"get_job_status"}],"parallel_tool_calls":true,"presence_penalty":0.0,"previous_response_id":null,"prompt_cache_key":null,"prompt_cache_retention":"in_memory","reasoning":{"context":null,"effort":null,"summary":null},"safety_identifier":null,"service_tier":"default","store":true,"temperature":1.0,"text":{"format":{"type":"text"},"verbosity":"medium"},"tool_choice":"auto","tools":[{"type":"function","description":"Query the pipeline orchestrator for a job''s current state, metadata, and failure info.","name":"get_job_status","parameters":{"type":"object","properties":{"job_id":{"type":"string","description":"Pipeline job identifier, e.g. job-382"}},"required":["job_id"],"additionalProperties":false},"strict":true},{"type":"function","description":"Fetch @@ -332,7 +332,7 @@ turns: request: body: input: - - call_id: call_oGw30yoywjhkYA1B6t8FesEm + - call_id: call_m0qbDty5eurPBYlBk4SYj56W output: '{"job_id": "job-382", "status": "failed", "error_type": "OutOfMemoryError", "failed_at": "2024-01-15T03:22:11Z", "stage": "transform", "memory_used_gb": 31.2, "memory_limit_gb": 32, "pipeline": "etl-daily-ingest", "owner": "data-platform-team"}' @@ -341,7 +341,7 @@ turns: role: user type: message model: gpt-4o - previous_response_id: resp_0fc8e0e2ea46fc95006a3c3867c1d0819a8d8faf02ad45a9f1 + previous_response_id: resp_0e42cef53189ada4006a3c3e409e88819bae4c63a2de75dad7 store: true stream: true tool_choice: auto @@ -479,7 +479,7 @@ turns: - 'event: response.created ' - - 'data: {"type":"response.created","response":{"id":"resp_0fc8e0e2ea46fc95006a3c386968f8819a98462765fe2e3fb9","object":"response","created_at":1782331497,"status":"in_progress","background":false,"completed_at":null,"error":null,"frequency_penalty":0.0,"incomplete_details":null,"instructions":null,"max_output_tokens":null,"max_tool_calls":null,"model":"gpt-4o-2024-08-06","moderation":null,"output":[],"parallel_tool_calls":true,"presence_penalty":0.0,"previous_response_id":"resp_0fc8e0e2ea46fc95006a3c3867c1d0819a8d8faf02ad45a9f1","prompt_cache_key":null,"prompt_cache_retention":"in_memory","reasoning":{"context":null,"effort":null,"summary":null},"safety_identifier":null,"service_tier":"auto","store":true,"temperature":1.0,"text":{"format":{"type":"text"},"verbosity":"medium"},"tool_choice":"auto","tools":[{"type":"function","description":"Query + - 'data: {"type":"response.created","response":{"id":"resp_0e42cef53189ada4006a3c3e41a210819b9302803f31f7ce3e","object":"response","created_at":1782332993,"status":"in_progress","background":false,"completed_at":null,"error":null,"frequency_penalty":0.0,"incomplete_details":null,"instructions":null,"max_output_tokens":null,"max_tool_calls":null,"model":"gpt-4o-2024-08-06","moderation":null,"output":[],"parallel_tool_calls":true,"presence_penalty":0.0,"previous_response_id":"resp_0e42cef53189ada4006a3c3e409e88819bae4c63a2de75dad7","prompt_cache_key":null,"prompt_cache_retention":"in_memory","reasoning":{"context":null,"effort":null,"summary":null},"safety_identifier":null,"service_tier":"auto","store":true,"temperature":1.0,"text":{"format":{"type":"text"},"verbosity":"medium"},"tool_choice":"auto","tools":[{"type":"function","description":"Query the pipeline orchestrator for a job''s current state, metadata, and failure info.","name":"get_job_status","parameters":{"type":"object","properties":{"job_id":{"type":"string","description":"Pipeline job identifier, e.g. job-382"}},"required":["job_id"],"additionalProperties":false},"strict":true},{"type":"function","description":"Fetch @@ -509,7 +509,7 @@ turns: - 'event: response.in_progress ' - - 'data: {"type":"response.in_progress","response":{"id":"resp_0fc8e0e2ea46fc95006a3c386968f8819a98462765fe2e3fb9","object":"response","created_at":1782331497,"status":"in_progress","background":false,"completed_at":null,"error":null,"frequency_penalty":0.0,"incomplete_details":null,"instructions":null,"max_output_tokens":null,"max_tool_calls":null,"model":"gpt-4o-2024-08-06","moderation":null,"output":[],"parallel_tool_calls":true,"presence_penalty":0.0,"previous_response_id":"resp_0fc8e0e2ea46fc95006a3c3867c1d0819a8d8faf02ad45a9f1","prompt_cache_key":null,"prompt_cache_retention":"in_memory","reasoning":{"context":null,"effort":null,"summary":null},"safety_identifier":null,"service_tier":"auto","store":true,"temperature":1.0,"text":{"format":{"type":"text"},"verbosity":"medium"},"tool_choice":"auto","tools":[{"type":"function","description":"Query + - 'data: {"type":"response.in_progress","response":{"id":"resp_0e42cef53189ada4006a3c3e41a210819b9302803f31f7ce3e","object":"response","created_at":1782332993,"status":"in_progress","background":false,"completed_at":null,"error":null,"frequency_penalty":0.0,"incomplete_details":null,"instructions":null,"max_output_tokens":null,"max_tool_calls":null,"model":"gpt-4o-2024-08-06","moderation":null,"output":[],"parallel_tool_calls":true,"presence_penalty":0.0,"previous_response_id":"resp_0e42cef53189ada4006a3c3e409e88819bae4c63a2de75dad7","prompt_cache_key":null,"prompt_cache_retention":"in_memory","reasoning":{"context":null,"effort":null,"summary":null},"safety_identifier":null,"service_tier":"auto","store":true,"temperature":1.0,"text":{"format":{"type":"text"},"verbosity":"medium"},"tool_choice":"auto","tools":[{"type":"function","description":"Query the pipeline orchestrator for a job''s current state, metadata, and failure info.","name":"get_job_status","parameters":{"type":"object","properties":{"job_id":{"type":"string","description":"Pipeline job identifier, e.g. job-382"}},"required":["job_id"],"additionalProperties":false},"strict":true},{"type":"function","description":"Fetch @@ -539,7 +539,7 @@ turns: - 'event: response.output_item.added ' - - 'data: {"type":"response.output_item.added","item":{"id":"fc_0fc8e0e2ea46fc95006a3c386a8880819a8064195a73e8fea7","type":"function_call","status":"in_progress","arguments":"","call_id":"call_Q2WtyAJvbr2BZpKoheDRCVxc","name":"get_error_logs"},"output_index":0,"sequence_number":2} + - 'data: {"type":"response.output_item.added","item":{"id":"fc_0e42cef53189ada4006a3c3e42f9dc819bbf2e41d26f4a6f5c","type":"function_call","status":"in_progress","arguments":"","call_id":"call_aq4lgp6yhETf6w1Zpj2SHntC","name":"get_error_logs"},"output_index":0,"sequence_number":2} ' - ' @@ -548,7 +548,7 @@ turns: - 'event: response.function_call_arguments.delta ' - - 'data: {"type":"response.function_call_arguments.delta","delta":"{\"","item_id":"fc_0fc8e0e2ea46fc95006a3c386a8880819a8064195a73e8fea7","obfuscation":"ivfOJuey5E1Pck","output_index":0,"sequence_number":3} + - 'data: {"type":"response.function_call_arguments.delta","delta":"{\"","item_id":"fc_0e42cef53189ada4006a3c3e42f9dc819bbf2e41d26f4a6f5c","obfuscation":"iLw0NMihFPMApr","output_index":0,"sequence_number":3} ' - ' @@ -557,7 +557,7 @@ turns: - 'event: response.function_call_arguments.delta ' - - 'data: {"type":"response.function_call_arguments.delta","delta":"job","item_id":"fc_0fc8e0e2ea46fc95006a3c386a8880819a8064195a73e8fea7","obfuscation":"mZ9WgWBZQ8s3n","output_index":0,"sequence_number":4} + - 'data: {"type":"response.function_call_arguments.delta","delta":"job","item_id":"fc_0e42cef53189ada4006a3c3e42f9dc819bbf2e41d26f4a6f5c","obfuscation":"FjGAyVccdn2gu","output_index":0,"sequence_number":4} ' - ' @@ -566,7 +566,7 @@ turns: - 'event: response.function_call_arguments.delta ' - - 'data: {"type":"response.function_call_arguments.delta","delta":"_id","item_id":"fc_0fc8e0e2ea46fc95006a3c386a8880819a8064195a73e8fea7","obfuscation":"9n8hZdvelCbVb","output_index":0,"sequence_number":5} + - 'data: {"type":"response.function_call_arguments.delta","delta":"_id","item_id":"fc_0e42cef53189ada4006a3c3e42f9dc819bbf2e41d26f4a6f5c","obfuscation":"Aa3qC3zt8w6mm","output_index":0,"sequence_number":5} ' - ' @@ -575,7 +575,7 @@ turns: - 'event: response.function_call_arguments.delta ' - - 'data: {"type":"response.function_call_arguments.delta","delta":"\":\"","item_id":"fc_0fc8e0e2ea46fc95006a3c386a8880819a8064195a73e8fea7","obfuscation":"1fkQ64Rf2n8Z8","output_index":0,"sequence_number":6} + - 'data: {"type":"response.function_call_arguments.delta","delta":"\":\"","item_id":"fc_0e42cef53189ada4006a3c3e42f9dc819bbf2e41d26f4a6f5c","obfuscation":"EtsFDF4zrqZ0g","output_index":0,"sequence_number":6} ' - ' @@ -584,7 +584,7 @@ turns: - 'event: response.function_call_arguments.delta ' - - 'data: {"type":"response.function_call_arguments.delta","delta":"job","item_id":"fc_0fc8e0e2ea46fc95006a3c386a8880819a8064195a73e8fea7","obfuscation":"S4M9mxe7MFnLv","output_index":0,"sequence_number":7} + - 'data: {"type":"response.function_call_arguments.delta","delta":"job","item_id":"fc_0e42cef53189ada4006a3c3e42f9dc819bbf2e41d26f4a6f5c","obfuscation":"HIZZBI73fLyOf","output_index":0,"sequence_number":7} ' - ' @@ -593,7 +593,7 @@ turns: - 'event: response.function_call_arguments.delta ' - - 'data: {"type":"response.function_call_arguments.delta","delta":"-","item_id":"fc_0fc8e0e2ea46fc95006a3c386a8880819a8064195a73e8fea7","obfuscation":"LH7tUz1Q6TYE7lN","output_index":0,"sequence_number":8} + - 'data: {"type":"response.function_call_arguments.delta","delta":"-","item_id":"fc_0e42cef53189ada4006a3c3e42f9dc819bbf2e41d26f4a6f5c","obfuscation":"yhGCenW9GMXRXZe","output_index":0,"sequence_number":8} ' - ' @@ -602,7 +602,7 @@ turns: - 'event: response.function_call_arguments.delta ' - - 'data: {"type":"response.function_call_arguments.delta","delta":"382","item_id":"fc_0fc8e0e2ea46fc95006a3c386a8880819a8064195a73e8fea7","obfuscation":"8FO8FbM4VZow2","output_index":0,"sequence_number":9} + - 'data: {"type":"response.function_call_arguments.delta","delta":"382","item_id":"fc_0e42cef53189ada4006a3c3e42f9dc819bbf2e41d26f4a6f5c","obfuscation":"XE8ryIoPINJff","output_index":0,"sequence_number":9} ' - ' @@ -611,7 +611,7 @@ turns: - 'event: response.function_call_arguments.delta ' - - 'data: {"type":"response.function_call_arguments.delta","delta":"\",\"","item_id":"fc_0fc8e0e2ea46fc95006a3c386a8880819a8064195a73e8fea7","obfuscation":"xCmrJJc0eji3d","output_index":0,"sequence_number":10} + - 'data: {"type":"response.function_call_arguments.delta","delta":"\",\"","item_id":"fc_0e42cef53189ada4006a3c3e42f9dc819bbf2e41d26f4a6f5c","obfuscation":"zSI6VXrNICvg3","output_index":0,"sequence_number":10} ' - ' @@ -620,7 +620,7 @@ turns: - 'event: response.function_call_arguments.delta ' - - 'data: {"type":"response.function_call_arguments.delta","delta":"severity","item_id":"fc_0fc8e0e2ea46fc95006a3c386a8880819a8064195a73e8fea7","obfuscation":"zZ6JjI99","output_index":0,"sequence_number":11} + - 'data: {"type":"response.function_call_arguments.delta","delta":"severity","item_id":"fc_0e42cef53189ada4006a3c3e42f9dc819bbf2e41d26f4a6f5c","obfuscation":"a53mC2Si","output_index":0,"sequence_number":11} ' - ' @@ -629,7 +629,7 @@ turns: - 'event: response.function_call_arguments.delta ' - - 'data: {"type":"response.function_call_arguments.delta","delta":"\":\"","item_id":"fc_0fc8e0e2ea46fc95006a3c386a8880819a8064195a73e8fea7","obfuscation":"vihOScwv3S8mO","output_index":0,"sequence_number":12} + - 'data: {"type":"response.function_call_arguments.delta","delta":"\":\"","item_id":"fc_0e42cef53189ada4006a3c3e42f9dc819bbf2e41d26f4a6f5c","obfuscation":"aLEgybiscXeEF","output_index":0,"sequence_number":12} ' - ' @@ -638,7 +638,7 @@ turns: - 'event: response.function_call_arguments.delta ' - - 'data: {"type":"response.function_call_arguments.delta","delta":"F","item_id":"fc_0fc8e0e2ea46fc95006a3c386a8880819a8064195a73e8fea7","obfuscation":"AKW9KVdNA7Nk8WI","output_index":0,"sequence_number":13} + - 'data: {"type":"response.function_call_arguments.delta","delta":"F","item_id":"fc_0e42cef53189ada4006a3c3e42f9dc819bbf2e41d26f4a6f5c","obfuscation":"99FsVG7EViie3lm","output_index":0,"sequence_number":13} ' - ' @@ -647,7 +647,7 @@ turns: - 'event: response.function_call_arguments.delta ' - - 'data: {"type":"response.function_call_arguments.delta","delta":"ATAL","item_id":"fc_0fc8e0e2ea46fc95006a3c386a8880819a8064195a73e8fea7","obfuscation":"MJsY6dviJiIf","output_index":0,"sequence_number":14} + - 'data: {"type":"response.function_call_arguments.delta","delta":"ATAL","item_id":"fc_0e42cef53189ada4006a3c3e42f9dc819bbf2e41d26f4a6f5c","obfuscation":"ijPqMdZnQWIX","output_index":0,"sequence_number":14} ' - ' @@ -656,7 +656,7 @@ turns: - 'event: response.function_call_arguments.delta ' - - 'data: {"type":"response.function_call_arguments.delta","delta":"\",\"","item_id":"fc_0fc8e0e2ea46fc95006a3c386a8880819a8064195a73e8fea7","obfuscation":"uVztKDB8l8Kgf","output_index":0,"sequence_number":15} + - 'data: {"type":"response.function_call_arguments.delta","delta":"\",\"","item_id":"fc_0e42cef53189ada4006a3c3e42f9dc819bbf2e41d26f4a6f5c","obfuscation":"jskgORycqIzNC","output_index":0,"sequence_number":15} ' - ' @@ -665,7 +665,7 @@ turns: - 'event: response.function_call_arguments.delta ' - - 'data: {"type":"response.function_call_arguments.delta","delta":"max","item_id":"fc_0fc8e0e2ea46fc95006a3c386a8880819a8064195a73e8fea7","obfuscation":"qjTsrWviOXxJY","output_index":0,"sequence_number":16} + - 'data: {"type":"response.function_call_arguments.delta","delta":"max","item_id":"fc_0e42cef53189ada4006a3c3e42f9dc819bbf2e41d26f4a6f5c","obfuscation":"DHRNHXN9lsOCk","output_index":0,"sequence_number":16} ' - ' @@ -674,7 +674,7 @@ turns: - 'event: response.function_call_arguments.delta ' - - 'data: {"type":"response.function_call_arguments.delta","delta":"_entries","item_id":"fc_0fc8e0e2ea46fc95006a3c386a8880819a8064195a73e8fea7","obfuscation":"OMYy2WHN","output_index":0,"sequence_number":17} + - 'data: {"type":"response.function_call_arguments.delta","delta":"_entries","item_id":"fc_0e42cef53189ada4006a3c3e42f9dc819bbf2e41d26f4a6f5c","obfuscation":"y1xNGfLu","output_index":0,"sequence_number":17} ' - ' @@ -683,7 +683,7 @@ turns: - 'event: response.function_call_arguments.delta ' - - 'data: {"type":"response.function_call_arguments.delta","delta":"\":","item_id":"fc_0fc8e0e2ea46fc95006a3c386a8880819a8064195a73e8fea7","obfuscation":"9pmHbITBfejkZc","output_index":0,"sequence_number":18} + - 'data: {"type":"response.function_call_arguments.delta","delta":"\":","item_id":"fc_0e42cef53189ada4006a3c3e42f9dc819bbf2e41d26f4a6f5c","obfuscation":"ckcPggGOsxFHbN","output_index":0,"sequence_number":18} ' - ' @@ -692,7 +692,7 @@ turns: - 'event: response.function_call_arguments.delta ' - - 'data: {"type":"response.function_call_arguments.delta","delta":"5","item_id":"fc_0fc8e0e2ea46fc95006a3c386a8880819a8064195a73e8fea7","obfuscation":"Pm5LRPb5UBK5TfH","output_index":0,"sequence_number":19} + - 'data: {"type":"response.function_call_arguments.delta","delta":"5","item_id":"fc_0e42cef53189ada4006a3c3e42f9dc819bbf2e41d26f4a6f5c","obfuscation":"h0gQDgoxY4rBOoo","output_index":0,"sequence_number":19} ' - ' @@ -701,7 +701,7 @@ turns: - 'event: response.function_call_arguments.delta ' - - 'data: {"type":"response.function_call_arguments.delta","delta":"}","item_id":"fc_0fc8e0e2ea46fc95006a3c386a8880819a8064195a73e8fea7","obfuscation":"3W9IodMx0hFrinv","output_index":0,"sequence_number":20} + - 'data: {"type":"response.function_call_arguments.delta","delta":"}","item_id":"fc_0e42cef53189ada4006a3c3e42f9dc819bbf2e41d26f4a6f5c","obfuscation":"GeNkUbiOsYTk0uI","output_index":0,"sequence_number":20} ' - ' @@ -710,7 +710,7 @@ turns: - 'event: response.function_call_arguments.done ' - - 'data: {"type":"response.function_call_arguments.done","arguments":"{\"job_id\":\"job-382\",\"severity\":\"FATAL\",\"max_entries\":5}","item_id":"fc_0fc8e0e2ea46fc95006a3c386a8880819a8064195a73e8fea7","output_index":0,"sequence_number":21} + - 'data: {"type":"response.function_call_arguments.done","arguments":"{\"job_id\":\"job-382\",\"severity\":\"FATAL\",\"max_entries\":5}","item_id":"fc_0e42cef53189ada4006a3c3e42f9dc819bbf2e41d26f4a6f5c","output_index":0,"sequence_number":21} ' - ' @@ -719,7 +719,7 @@ turns: - 'event: response.output_item.done ' - - 'data: {"type":"response.output_item.done","item":{"id":"fc_0fc8e0e2ea46fc95006a3c386a8880819a8064195a73e8fea7","type":"function_call","status":"completed","arguments":"{\"job_id\":\"job-382\",\"severity\":\"FATAL\",\"max_entries\":5}","call_id":"call_Q2WtyAJvbr2BZpKoheDRCVxc","name":"get_error_logs"},"output_index":0,"sequence_number":22} + - 'data: {"type":"response.output_item.done","item":{"id":"fc_0e42cef53189ada4006a3c3e42f9dc819bbf2e41d26f4a6f5c","type":"function_call","status":"completed","arguments":"{\"job_id\":\"job-382\",\"severity\":\"FATAL\",\"max_entries\":5}","call_id":"call_aq4lgp6yhETf6w1Zpj2SHntC","name":"get_error_logs"},"output_index":0,"sequence_number":22} ' - ' @@ -728,7 +728,7 @@ turns: - 'event: response.completed ' - - 'data: {"type":"response.completed","response":{"id":"resp_0fc8e0e2ea46fc95006a3c386968f8819a98462765fe2e3fb9","object":"response","created_at":1782331497,"status":"completed","background":false,"completed_at":1782331498,"error":null,"frequency_penalty":0.0,"incomplete_details":null,"instructions":null,"max_output_tokens":null,"max_tool_calls":null,"model":"gpt-4o-2024-08-06","moderation":null,"output":[{"id":"fc_0fc8e0e2ea46fc95006a3c386a8880819a8064195a73e8fea7","type":"function_call","status":"completed","arguments":"{\"job_id\":\"job-382\",\"severity\":\"FATAL\",\"max_entries\":5}","call_id":"call_Q2WtyAJvbr2BZpKoheDRCVxc","name":"get_error_logs"}],"parallel_tool_calls":true,"presence_penalty":0.0,"previous_response_id":"resp_0fc8e0e2ea46fc95006a3c3867c1d0819a8d8faf02ad45a9f1","prompt_cache_key":null,"prompt_cache_retention":"in_memory","reasoning":{"context":null,"effort":null,"summary":null},"safety_identifier":null,"service_tier":"default","store":true,"temperature":1.0,"text":{"format":{"type":"text"},"verbosity":"medium"},"tool_choice":"auto","tools":[{"type":"function","description":"Query + - 'data: {"type":"response.completed","response":{"id":"resp_0e42cef53189ada4006a3c3e41a210819b9302803f31f7ce3e","object":"response","created_at":1782332993,"status":"completed","background":false,"completed_at":1782332995,"error":null,"frequency_penalty":0.0,"incomplete_details":null,"instructions":null,"max_output_tokens":null,"max_tool_calls":null,"model":"gpt-4o-2024-08-06","moderation":null,"output":[{"id":"fc_0e42cef53189ada4006a3c3e42f9dc819bbf2e41d26f4a6f5c","type":"function_call","status":"completed","arguments":"{\"job_id\":\"job-382\",\"severity\":\"FATAL\",\"max_entries\":5}","call_id":"call_aq4lgp6yhETf6w1Zpj2SHntC","name":"get_error_logs"}],"parallel_tool_calls":true,"presence_penalty":0.0,"previous_response_id":"resp_0e42cef53189ada4006a3c3e409e88819bae4c63a2de75dad7","prompt_cache_key":null,"prompt_cache_retention":"in_memory","reasoning":{"context":null,"effort":null,"summary":null},"safety_identifier":null,"service_tier":"default","store":true,"temperature":1.0,"text":{"format":{"type":"text"},"verbosity":"medium"},"tool_choice":"auto","tools":[{"type":"function","description":"Query the pipeline orchestrator for a job''s current state, metadata, and failure info.","name":"get_job_status","parameters":{"type":"object","properties":{"job_id":{"type":"string","description":"Pipeline job identifier, e.g. job-382"}},"required":["job_id"],"additionalProperties":false},"strict":true},{"type":"function","description":"Fetch @@ -760,7 +760,7 @@ turns: request: body: input: - - call_id: call_Q2WtyAJvbr2BZpKoheDRCVxc + - call_id: call_aq4lgp6yhETf6w1Zpj2SHntC output: '[{"timestamp": "2024-01-15T03:22:09Z", "severity": "ERROR", "message": "OutOfMemoryError at stage ''transform'': heap space exhausted (31.2GB/32GB)", "stack_trace": "java.lang.OutOfMemoryError: Java heap space\n at org.apache.spark.memory.TaskMemoryManager.allocatePage"}, @@ -775,7 +775,7 @@ turns: role: user type: message model: gpt-4o - previous_response_id: resp_0fc8e0e2ea46fc95006a3c386968f8819a98462765fe2e3fb9 + previous_response_id: resp_0e42cef53189ada4006a3c3e41a210819b9302803f31f7ce3e store: true stream: true tool_choice: auto @@ -913,7 +913,7 @@ turns: - 'event: response.created ' - - 'data: {"type":"response.created","response":{"id":"resp_0fc8e0e2ea46fc95006a3c386afa84819a97b6538b6b5ac51c","object":"response","created_at":1782331499,"status":"in_progress","background":false,"completed_at":null,"error":null,"frequency_penalty":0.0,"incomplete_details":null,"instructions":null,"max_output_tokens":null,"max_tool_calls":null,"model":"gpt-4o-2024-08-06","moderation":null,"output":[],"parallel_tool_calls":true,"presence_penalty":0.0,"previous_response_id":"resp_0fc8e0e2ea46fc95006a3c386968f8819a98462765fe2e3fb9","prompt_cache_key":null,"prompt_cache_retention":"in_memory","reasoning":{"context":null,"effort":null,"summary":null},"safety_identifier":null,"service_tier":"auto","store":true,"temperature":1.0,"text":{"format":{"type":"text"},"verbosity":"medium"},"tool_choice":"auto","tools":[{"type":"function","description":"Query + - 'data: {"type":"response.created","response":{"id":"resp_0e42cef53189ada4006a3c3e437b4c819bb1448a01a9362641","object":"response","created_at":1782332995,"status":"in_progress","background":false,"completed_at":null,"error":null,"frequency_penalty":0.0,"incomplete_details":null,"instructions":null,"max_output_tokens":null,"max_tool_calls":null,"model":"gpt-4o-2024-08-06","moderation":null,"output":[],"parallel_tool_calls":true,"presence_penalty":0.0,"previous_response_id":"resp_0e42cef53189ada4006a3c3e41a210819b9302803f31f7ce3e","prompt_cache_key":null,"prompt_cache_retention":"in_memory","reasoning":{"context":null,"effort":null,"summary":null},"safety_identifier":null,"service_tier":"auto","store":true,"temperature":1.0,"text":{"format":{"type":"text"},"verbosity":"medium"},"tool_choice":"auto","tools":[{"type":"function","description":"Query the pipeline orchestrator for a job''s current state, metadata, and failure info.","name":"get_job_status","parameters":{"type":"object","properties":{"job_id":{"type":"string","description":"Pipeline job identifier, e.g. job-382"}},"required":["job_id"],"additionalProperties":false},"strict":true},{"type":"function","description":"Fetch @@ -943,7 +943,7 @@ turns: - 'event: response.in_progress ' - - 'data: {"type":"response.in_progress","response":{"id":"resp_0fc8e0e2ea46fc95006a3c386afa84819a97b6538b6b5ac51c","object":"response","created_at":1782331499,"status":"in_progress","background":false,"completed_at":null,"error":null,"frequency_penalty":0.0,"incomplete_details":null,"instructions":null,"max_output_tokens":null,"max_tool_calls":null,"model":"gpt-4o-2024-08-06","moderation":null,"output":[],"parallel_tool_calls":true,"presence_penalty":0.0,"previous_response_id":"resp_0fc8e0e2ea46fc95006a3c386968f8819a98462765fe2e3fb9","prompt_cache_key":null,"prompt_cache_retention":"in_memory","reasoning":{"context":null,"effort":null,"summary":null},"safety_identifier":null,"service_tier":"auto","store":true,"temperature":1.0,"text":{"format":{"type":"text"},"verbosity":"medium"},"tool_choice":"auto","tools":[{"type":"function","description":"Query + - 'data: {"type":"response.in_progress","response":{"id":"resp_0e42cef53189ada4006a3c3e437b4c819bb1448a01a9362641","object":"response","created_at":1782332995,"status":"in_progress","background":false,"completed_at":null,"error":null,"frequency_penalty":0.0,"incomplete_details":null,"instructions":null,"max_output_tokens":null,"max_tool_calls":null,"model":"gpt-4o-2024-08-06","moderation":null,"output":[],"parallel_tool_calls":true,"presence_penalty":0.0,"previous_response_id":"resp_0e42cef53189ada4006a3c3e41a210819b9302803f31f7ce3e","prompt_cache_key":null,"prompt_cache_retention":"in_memory","reasoning":{"context":null,"effort":null,"summary":null},"safety_identifier":null,"service_tier":"auto","store":true,"temperature":1.0,"text":{"format":{"type":"text"},"verbosity":"medium"},"tool_choice":"auto","tools":[{"type":"function","description":"Query the pipeline orchestrator for a job''s current state, metadata, and failure info.","name":"get_job_status","parameters":{"type":"object","properties":{"job_id":{"type":"string","description":"Pipeline job identifier, e.g. job-382"}},"required":["job_id"],"additionalProperties":false},"strict":true},{"type":"function","description":"Fetch @@ -973,7 +973,7 @@ turns: - 'event: response.output_item.added ' - - 'data: {"type":"response.output_item.added","item":{"id":"fc_0fc8e0e2ea46fc95006a3c386ba180819aaa23f095627ed8d3","type":"function_call","status":"in_progress","arguments":"","call_id":"call_zkoVfNgi8D69hLqAlPnAUwpr","name":"web_search"},"output_index":0,"sequence_number":2} + - 'data: {"type":"response.output_item.added","item":{"id":"fc_0e42cef53189ada4006a3c3e449634819bb12e15903e5c908c","type":"function_call","status":"in_progress","arguments":"","call_id":"call_qsJ9rSGOQCBFJB5BS1GNhiE5","name":"web_search"},"output_index":0,"sequence_number":2} ' - ' @@ -982,7 +982,7 @@ turns: - 'event: response.function_call_arguments.delta ' - - 'data: {"type":"response.function_call_arguments.delta","delta":"{\"","item_id":"fc_0fc8e0e2ea46fc95006a3c386ba180819aaa23f095627ed8d3","obfuscation":"prGEfgSrbWSDln","output_index":0,"sequence_number":3} + - 'data: {"type":"response.function_call_arguments.delta","delta":"{\"","item_id":"fc_0e42cef53189ada4006a3c3e449634819bb12e15903e5c908c","obfuscation":"cMDEYfYOr0JfYj","output_index":0,"sequence_number":3} ' - ' @@ -991,7 +991,7 @@ turns: - 'event: response.function_call_arguments.delta ' - - 'data: {"type":"response.function_call_arguments.delta","delta":"query","item_id":"fc_0fc8e0e2ea46fc95006a3c386ba180819aaa23f095627ed8d3","obfuscation":"YtrXZtJHOJ3","output_index":0,"sequence_number":4} + - 'data: {"type":"response.function_call_arguments.delta","delta":"query","item_id":"fc_0e42cef53189ada4006a3c3e449634819bb12e15903e5c908c","obfuscation":"Ef5ze955AFg","output_index":0,"sequence_number":4} ' - ' @@ -1000,7 +1000,7 @@ turns: - 'event: response.function_call_arguments.delta ' - - 'data: {"type":"response.function_call_arguments.delta","delta":"\":\"","item_id":"fc_0fc8e0e2ea46fc95006a3c386ba180819aaa23f095627ed8d3","obfuscation":"TaglTitGTDkkp","output_index":0,"sequence_number":5} + - 'data: {"type":"response.function_call_arguments.delta","delta":"\":\"","item_id":"fc_0e42cef53189ada4006a3c3e449634819bb12e15903e5c908c","obfuscation":"0Dbr6cTYwR2JZ","output_index":0,"sequence_number":5} ' - ' @@ -1009,7 +1009,7 @@ turns: - 'event: response.function_call_arguments.delta ' - - 'data: {"type":"response.function_call_arguments.delta","delta":"Fix","item_id":"fc_0fc8e0e2ea46fc95006a3c386ba180819aaa23f095627ed8d3","obfuscation":"kknaGTpAd0BwF","output_index":0,"sequence_number":6} + - 'data: {"type":"response.function_call_arguments.delta","delta":"how","item_id":"fc_0e42cef53189ada4006a3c3e449634819bb12e15903e5c908c","obfuscation":"gcfzzZLIVwMCZ","output_index":0,"sequence_number":6} ' - ' @@ -1018,7 +1018,7 @@ turns: - 'event: response.function_call_arguments.delta ' - - 'data: {"type":"response.function_call_arguments.delta","delta":" Out","item_id":"fc_0fc8e0e2ea46fc95006a3c386ba180819aaa23f095627ed8d3","obfuscation":"m7Tk3lSDqTfC","output_index":0,"sequence_number":7} + - 'data: {"type":"response.function_call_arguments.delta","delta":" to","item_id":"fc_0e42cef53189ada4006a3c3e449634819bb12e15903e5c908c","obfuscation":"4UwsnTZlgYlMc","output_index":0,"sequence_number":7} ' - ' @@ -1027,7 +1027,7 @@ turns: - 'event: response.function_call_arguments.delta ' - - 'data: {"type":"response.function_call_arguments.delta","delta":"Of","item_id":"fc_0fc8e0e2ea46fc95006a3c386ba180819aaa23f095627ed8d3","obfuscation":"HlNdJCWpCaYMe8","output_index":0,"sequence_number":8} + - 'data: {"type":"response.function_call_arguments.delta","delta":" fix","item_id":"fc_0e42cef53189ada4006a3c3e449634819bb12e15903e5c908c","obfuscation":"pLBDJkxRXVD5","output_index":0,"sequence_number":8} ' - ' @@ -1036,7 +1036,7 @@ turns: - 'event: response.function_call_arguments.delta ' - - 'data: {"type":"response.function_call_arguments.delta","delta":"Memory","item_id":"fc_0fc8e0e2ea46fc95006a3c386ba180819aaa23f095627ed8d3","obfuscation":"85sF8zSf4k","output_index":0,"sequence_number":9} + - 'data: {"type":"response.function_call_arguments.delta","delta":" Out","item_id":"fc_0e42cef53189ada4006a3c3e449634819bb12e15903e5c908c","obfuscation":"c9Tfq6056S7M","output_index":0,"sequence_number":9} ' - ' @@ -1045,7 +1045,7 @@ turns: - 'event: response.function_call_arguments.delta ' - - 'data: {"type":"response.function_call_arguments.delta","delta":"Error","item_id":"fc_0fc8e0e2ea46fc95006a3c386ba180819aaa23f095627ed8d3","obfuscation":"LhCNyjZcqT5","output_index":0,"sequence_number":10} + - 'data: {"type":"response.function_call_arguments.delta","delta":"Of","item_id":"fc_0e42cef53189ada4006a3c3e449634819bb12e15903e5c908c","obfuscation":"D2vVlttcDF40sT","output_index":0,"sequence_number":10} ' - ' @@ -1054,7 +1054,7 @@ turns: - 'event: response.function_call_arguments.delta ' - - 'data: {"type":"response.function_call_arguments.delta","delta":" in","item_id":"fc_0fc8e0e2ea46fc95006a3c386ba180819aaa23f095627ed8d3","obfuscation":"eqbHtHEBIHxQ2","output_index":0,"sequence_number":11} + - 'data: {"type":"response.function_call_arguments.delta","delta":"Memory","item_id":"fc_0e42cef53189ada4006a3c3e449634819bb12e15903e5c908c","obfuscation":"wF7wqcvXQC","output_index":0,"sequence_number":11} ' - ' @@ -1063,7 +1063,7 @@ turns: - 'event: response.function_call_arguments.delta ' - - 'data: {"type":"response.function_call_arguments.delta","delta":" Apache","item_id":"fc_0fc8e0e2ea46fc95006a3c386ba180819aaa23f095627ed8d3","obfuscation":"RekdgKeND","output_index":0,"sequence_number":12} + - 'data: {"type":"response.function_call_arguments.delta","delta":"Error","item_id":"fc_0e42cef53189ada4006a3c3e449634819bb12e15903e5c908c","obfuscation":"onxosRL9d1Q","output_index":0,"sequence_number":12} ' - ' @@ -1072,7 +1072,7 @@ turns: - 'event: response.function_call_arguments.delta ' - - 'data: {"type":"response.function_call_arguments.delta","delta":" Spark","item_id":"fc_0fc8e0e2ea46fc95006a3c386ba180819aaa23f095627ed8d3","obfuscation":"qp6u77FnUu","output_index":0,"sequence_number":13} + - 'data: {"type":"response.function_call_arguments.delta","delta":" in","item_id":"fc_0e42cef53189ada4006a3c3e449634819bb12e15903e5c908c","obfuscation":"q62JXm0Jsdm2y","output_index":0,"sequence_number":13} ' - ' @@ -1081,7 +1081,7 @@ turns: - 'event: response.function_call_arguments.delta ' - - 'data: {"type":"response.function_call_arguments.delta","delta":" pipeline","item_id":"fc_0fc8e0e2ea46fc95006a3c386ba180819aaa23f095627ed8d3","obfuscation":"k2x2AHy","output_index":0,"sequence_number":14} + - 'data: {"type":"response.function_call_arguments.delta","delta":" Spark","item_id":"fc_0e42cef53189ada4006a3c3e449634819bb12e15903e5c908c","obfuscation":"WJIU4dpRMI","output_index":0,"sequence_number":14} ' - ' @@ -1090,7 +1090,7 @@ turns: - 'event: response.function_call_arguments.delta ' - - 'data: {"type":"response.function_call_arguments.delta","delta":" ET","item_id":"fc_0fc8e0e2ea46fc95006a3c386ba180819aaa23f095627ed8d3","obfuscation":"oaZaL8JDvMiXi","output_index":0,"sequence_number":15} + - 'data: {"type":"response.function_call_arguments.delta","delta":" pipeline","item_id":"fc_0e42cef53189ada4006a3c3e449634819bb12e15903e5c908c","obfuscation":"SuIqBKP","output_index":0,"sequence_number":15} ' - ' @@ -1099,16 +1099,7 @@ turns: - 'event: response.function_call_arguments.delta ' - - 'data: {"type":"response.function_call_arguments.delta","delta":"L","item_id":"fc_0fc8e0e2ea46fc95006a3c386ba180819aaa23f095627ed8d3","obfuscation":"QUV1RyPNtooXDOD","output_index":0,"sequence_number":16} - - ' - - ' - - ' - - 'event: response.function_call_arguments.delta - - ' - - 'data: {"type":"response.function_call_arguments.delta","delta":"\"}","item_id":"fc_0fc8e0e2ea46fc95006a3c386ba180819aaa23f095627ed8d3","obfuscation":"C3WNpsIiwNPvFr","output_index":0,"sequence_number":17} + - 'data: {"type":"response.function_call_arguments.delta","delta":"\"}","item_id":"fc_0e42cef53189ada4006a3c3e449634819bb12e15903e5c908c","obfuscation":"G3bqmOIR56xYlS","output_index":0,"sequence_number":16} ' - ' @@ -1117,8 +1108,8 @@ turns: - 'event: response.function_call_arguments.done ' - - 'data: {"type":"response.function_call_arguments.done","arguments":"{\"query\":\"Fix - OutOfMemoryError in Apache Spark pipeline ETL\"}","item_id":"fc_0fc8e0e2ea46fc95006a3c386ba180819aaa23f095627ed8d3","output_index":0,"sequence_number":18} + - 'data: {"type":"response.function_call_arguments.done","arguments":"{\"query\":\"how + to fix OutOfMemoryError in Spark pipeline\"}","item_id":"fc_0e42cef53189ada4006a3c3e449634819bb12e15903e5c908c","output_index":0,"sequence_number":17} ' - ' @@ -1127,8 +1118,8 @@ turns: - 'event: response.output_item.done ' - - 'data: {"type":"response.output_item.done","item":{"id":"fc_0fc8e0e2ea46fc95006a3c386ba180819aaa23f095627ed8d3","type":"function_call","status":"completed","arguments":"{\"query\":\"Fix - OutOfMemoryError in Apache Spark pipeline ETL\"}","call_id":"call_zkoVfNgi8D69hLqAlPnAUwpr","name":"web_search"},"output_index":0,"sequence_number":19} + - 'data: {"type":"response.output_item.done","item":{"id":"fc_0e42cef53189ada4006a3c3e449634819bb12e15903e5c908c","type":"function_call","status":"completed","arguments":"{\"query\":\"how + to fix OutOfMemoryError in Spark pipeline\"}","call_id":"call_qsJ9rSGOQCBFJB5BS1GNhiE5","name":"web_search"},"output_index":0,"sequence_number":18} ' - ' @@ -1137,8 +1128,8 @@ turns: - 'event: response.completed ' - - 'data: {"type":"response.completed","response":{"id":"resp_0fc8e0e2ea46fc95006a3c386afa84819a97b6538b6b5ac51c","object":"response","created_at":1782331499,"status":"completed","background":false,"completed_at":1782331499,"error":null,"frequency_penalty":0.0,"incomplete_details":null,"instructions":null,"max_output_tokens":null,"max_tool_calls":null,"model":"gpt-4o-2024-08-06","moderation":null,"output":[{"id":"fc_0fc8e0e2ea46fc95006a3c386ba180819aaa23f095627ed8d3","type":"function_call","status":"completed","arguments":"{\"query\":\"Fix - OutOfMemoryError in Apache Spark pipeline ETL\"}","call_id":"call_zkoVfNgi8D69hLqAlPnAUwpr","name":"web_search"}],"parallel_tool_calls":true,"presence_penalty":0.0,"previous_response_id":"resp_0fc8e0e2ea46fc95006a3c386968f8819a98462765fe2e3fb9","prompt_cache_key":null,"prompt_cache_retention":"in_memory","reasoning":{"context":null,"effort":null,"summary":null},"safety_identifier":null,"service_tier":"default","store":true,"temperature":1.0,"text":{"format":{"type":"text"},"verbosity":"medium"},"tool_choice":"auto","tools":[{"type":"function","description":"Query + - 'data: {"type":"response.completed","response":{"id":"resp_0e42cef53189ada4006a3c3e437b4c819bb1448a01a9362641","object":"response","created_at":1782332995,"status":"completed","background":false,"completed_at":1782332996,"error":null,"frequency_penalty":0.0,"incomplete_details":null,"instructions":null,"max_output_tokens":null,"max_tool_calls":null,"model":"gpt-4o-2024-08-06","moderation":null,"output":[{"id":"fc_0e42cef53189ada4006a3c3e449634819bb12e15903e5c908c","type":"function_call","status":"completed","arguments":"{\"query\":\"how + to fix OutOfMemoryError in Spark pipeline\"}","call_id":"call_qsJ9rSGOQCBFJB5BS1GNhiE5","name":"web_search"}],"parallel_tool_calls":true,"presence_penalty":0.0,"previous_response_id":"resp_0e42cef53189ada4006a3c3e41a210819b9302803f31f7ce3e","prompt_cache_key":null,"prompt_cache_retention":"in_memory","reasoning":{"context":null,"effort":null,"summary":null},"safety_identifier":null,"service_tier":"default","store":true,"temperature":1.0,"text":{"format":{"type":"text"},"verbosity":"medium"},"tool_choice":"auto","tools":[{"type":"function","description":"Query the pipeline orchestrator for a job''s current state, metadata, and failure info.","name":"get_job_status","parameters":{"type":"object","properties":{"job_id":{"type":"string","description":"Pipeline job identifier, e.g. job-382"}},"required":["job_id"],"additionalProperties":false},"strict":true},{"type":"function","description":"Fetch @@ -1159,7 +1150,7 @@ turns: stages that completed successfully (default true)"},"priority":{"type":"string","enum":["low","normal","high"],"description":"Queue priority"}},"required":["job_id","memory_override_gb","skip_completed_stages","priority"],"additionalProperties":false},"strict":true},{"type":"function","description":"Search the public web for technical documentation, blog posts, and known issues.","name":"web_search","parameters":{"type":"object","properties":{"query":{"type":"string","description":"Search - query"}},"required":["query"],"additionalProperties":false},"strict":true}],"top_logprobs":0,"top_p":1.0,"truncation":"disabled","usage":{"input_tokens":740,"input_tokens_details":{"cached_tokens":0},"output_tokens":25,"output_tokens_details":{"reasoning_tokens":0},"total_tokens":765},"user":null,"metadata":{}},"sequence_number":20} + query"}},"required":["query"],"additionalProperties":false},"strict":true}],"top_logprobs":0,"top_p":1.0,"truncation":"disabled","usage":{"input_tokens":740,"input_tokens_details":{"cached_tokens":0},"output_tokens":24,"output_tokens_details":{"reasoning_tokens":0},"total_tokens":764},"user":null,"metadata":{}},"sequence_number":19} ' - ' diff --git a/crates/agentic-core/tests/cassettes/tool_calls/multi_turn/openai_responses_tool_calls_5turn.yaml b/crates/agentic-core/tests/cassettes/tool_calls/multi_turn/openai_responses_tool_calls_5turn.yaml index 6e11708..f65edc5 100644 --- a/crates/agentic-core/tests/cassettes/tool_calls/multi_turn/openai_responses_tool_calls_5turn.yaml +++ b/crates/agentic-core/tests/cassettes/tool_calls/multi_turn/openai_responses_tool_calls_5turn.yaml @@ -140,11 +140,11 @@ turns: background: false billing: payer: developer - completed_at: 1782331481 - created_at: 1782331481 + completed_at: 1782332981 + created_at: 1782332978 error: null frequency_penalty: 0.0 - id: resp_03fd10c22367f14c006a3c38590640819981d8e689417d6da2 + id: resp_05ffdb1c1fbb4099006a3c3e321f04819b8f5178e57393bdbb incomplete_details: null instructions: null max_output_tokens: null @@ -155,8 +155,8 @@ turns: object: response output: - arguments: '{"job_id":"job-382"}' - call_id: call_KvqKd7Oqx9UM0kV4BIRAyLEi - id: fc_03fd10c22367f14c006a3c385990e481999a7ef70c7dd82761 + call_id: call_zUYtpSPtiZDIA0xgaf9MKQsL + id: fc_05ffdb1c1fbb4099006a3c3e3569f8819b8303dec8c3c61e7f name: get_job_status status: completed type: function_call @@ -317,7 +317,7 @@ turns: request: body: input: - - call_id: call_KvqKd7Oqx9UM0kV4BIRAyLEi + - call_id: call_zUYtpSPtiZDIA0xgaf9MKQsL output: '{"job_id": "job-382", "status": "failed", "error_type": "OutOfMemoryError", "failed_at": "2024-01-15T03:22:11Z", "stage": "transform", "memory_used_gb": 31.2, "memory_limit_gb": 32, "pipeline": "etl-daily-ingest", "owner": "data-platform-team"}' @@ -327,7 +327,7 @@ turns: role: user type: message model: gpt-4o - previous_response_id: resp_03fd10c22367f14c006a3c38590640819981d8e689417d6da2 + previous_response_id: resp_05ffdb1c1fbb4099006a3c3e321f04819b8f5178e57393bdbb store: true stream: false tool_choice: auto @@ -463,11 +463,11 @@ turns: background: false billing: payer: developer - completed_at: 1782331482 - created_at: 1782331482 + completed_at: 1782332984 + created_at: 1782332981 error: null frequency_penalty: 0.0 - id: resp_03fd10c22367f14c006a3c3859dfe4819982720510863bf1fa + id: resp_05ffdb1c1fbb4099006a3c3e35df20819b8b6fb4d784425cbd incomplete_details: null instructions: null max_output_tokens: null @@ -478,14 +478,14 @@ turns: object: response output: - arguments: '{"job_id":"job-382","severity":"ERROR","max_entries":20}' - call_id: call_LhB0vE8IH2AIMglFujcSYxwk - id: fc_03fd10c22367f14c006a3c385a9efc8199bc2db350998c1ea9 + call_id: call_bO7EyX3P64ApXE1D3og5t24S + id: fc_05ffdb1c1fbb4099006a3c3e37da80819bab37e6b91f212f18 name: get_error_logs status: completed type: function_call parallel_tool_calls: true presence_penalty: 0.0 - previous_response_id: resp_03fd10c22367f14c006a3c38590640819981d8e689417d6da2 + previous_response_id: resp_05ffdb1c1fbb4099006a3c3e321f04819b8f5178e57393bdbb prompt_cache_key: null prompt_cache_retention: in_memory reasoning: @@ -640,7 +640,7 @@ turns: request: body: input: - - call_id: call_LhB0vE8IH2AIMglFujcSYxwk + - call_id: call_bO7EyX3P64ApXE1D3og5t24S output: '[{"timestamp": "2024-01-15T03:22:09Z", "severity": "ERROR", "message": "OutOfMemoryError at stage ''transform'': heap space exhausted (31.2GB/32GB)", "stack_trace": "java.lang.OutOfMemoryError: Java heap space\n at org.apache.spark.memory.TaskMemoryManager.allocatePage"}, @@ -656,7 +656,7 @@ turns: role: user type: message model: gpt-4o - previous_response_id: resp_03fd10c22367f14c006a3c3859dfe4819982720510863bf1fa + previous_response_id: resp_05ffdb1c1fbb4099006a3c3e35df20819b8b6fb4d784425cbd store: true stream: false tool_choice: auto @@ -792,11 +792,11 @@ turns: background: false billing: payer: developer - completed_at: 1782331484 - created_at: 1782331483 + completed_at: 1782332987 + created_at: 1782332984 error: null frequency_penalty: 0.0 - id: resp_03fd10c22367f14c006a3c385b44a48199af24641819516d31 + id: resp_05ffdb1c1fbb4099006a3c3e38693c819bb3f436b554202a58 incomplete_details: null instructions: null max_output_tokens: null @@ -806,15 +806,15 @@ turns: moderation: null object: response output: - - arguments: '{"query":"OutOfMemoryError in ETL pipeline transform stage","max_results":5}' - call_id: call_Uuq0Gf92pAL1YAUEgnbr8p6B - id: fc_03fd10c22367f14c006a3c385c0a248199a0f952e077038d08 + - arguments: '{"query":"OutOfMemoryError heap space transform stage","max_results":5}' + call_id: call_05MIRpeAmcRcfWqIo2fNo0qL + id: fc_05ffdb1c1fbb4099006a3c3e3aca44819b90eed8647106b319 name: search_runbook status: completed type: function_call parallel_tool_calls: true presence_penalty: 0.0 - previous_response_id: resp_03fd10c22367f14c006a3c3859dfe4819982720510863bf1fa + previous_response_id: resp_05ffdb1c1fbb4099006a3c3e35df20819b8b6fb4d784425cbd prompt_cache_key: null prompt_cache_retention: in_memory reasoning: @@ -957,10 +957,10 @@ turns: input_tokens: 751 input_tokens_details: cached_tokens: 0 - output_tokens: 30 + output_tokens: 28 output_tokens_details: reasoning_tokens: 0 - total_tokens: 781 + total_tokens: 779 user: null headers: content-type: application/json @@ -969,7 +969,7 @@ turns: request: body: input: - - call_id: call_Uuq0Gf92pAL1YAUEgnbr8p6B + - call_id: call_05MIRpeAmcRcfWqIo2fNo0qL output: '[{"title": "OOM Troubleshooting for ETL Pipelines", "path": "/runbooks/data-platform/oom-etl.md", "snippet": "When transform stage OOMs: 1) Check partition skew with get_partition_stats, 2) Increase executor memory (default 32GB -> 64GB), 3) Enable adaptive query @@ -983,7 +983,7 @@ turns: role: user type: message model: gpt-4o - previous_response_id: resp_03fd10c22367f14c006a3c385b44a48199af24641819516d31 + previous_response_id: resp_05ffdb1c1fbb4099006a3c3e38693c819bb3f436b554202a58 store: true stream: false tool_choice: auto @@ -1119,11 +1119,11 @@ turns: background: false billing: payer: developer - completed_at: 1782331485 - created_at: 1782331484 + completed_at: 1782332989 + created_at: 1782332987 error: null frequency_penalty: 0.0 - id: resp_03fd10c22367f14c006a3c385c88788199bfcfc70ff88ce28f + id: resp_05ffdb1c1fbb4099006a3c3e3ba4b0819ba847aac847612c9c incomplete_details: null instructions: null max_output_tokens: null @@ -1136,14 +1136,14 @@ turns: - arguments: '{"code":"import json\nprint(json.dumps({''job'': ''job-382'', ''error'': ''OOM'', ''stage'': ''transform'', ''recommendation'': ''increase memory to 64GB''}))","timeout_seconds":30}' - call_id: call_hy09mPn87sDCueIq8Mxd3JBo - id: fc_03fd10c22367f14c006a3c385d69c0819980fcbf225cde945f + call_id: call_aLDbBvIdBv8z49RXYjC0bpk7 + id: fc_05ffdb1c1fbb4099006a3c3e3d4dbc819b86d79c1e871caafc name: run_analysis status: completed type: function_call parallel_tool_calls: true presence_penalty: 0.0 - previous_response_id: resp_03fd10c22367f14c006a3c385b44a48199af24641819516d31 + previous_response_id: resp_05ffdb1c1fbb4099006a3c3e38693c819bb3f436b554202a58 prompt_cache_key: null prompt_cache_retention: in_memory reasoning: @@ -1283,13 +1283,13 @@ turns: top_p: 1.0 truncation: disabled usage: - input_tokens: 972 + input_tokens: 970 input_tokens_details: cached_tokens: 0 output_tokens: 58 output_tokens_details: reasoning_tokens: 0 - total_tokens: 1030 + total_tokens: 1028 user: null headers: content-type: application/json @@ -1298,7 +1298,7 @@ turns: request: body: input: - - call_id: call_hy09mPn87sDCueIq8Mxd3JBo + - call_id: call_aLDbBvIdBv8z49RXYjC0bpk7 output: '{"stdout": "Failed stage: transform, count: 2\nPattern: consistent OOM at partition-47 (2.1M rows)\nRecommendation: increase memory to 64GB or repartition", "stderr": "", "exit_code": 0, "duration_ms": 120}' @@ -1308,7 +1308,7 @@ turns: role: user type: message model: gpt-4o - previous_response_id: resp_03fd10c22367f14c006a3c385c88788199bfcfc70ff88ce28f + previous_response_id: resp_05ffdb1c1fbb4099006a3c3e3ba4b0819ba847aac847612c9c store: true stream: false tool_choice: auto @@ -1444,11 +1444,11 @@ turns: background: false billing: payer: developer - completed_at: 1782331486 - created_at: 1782331486 + completed_at: 1782332991 + created_at: 1782332990 error: null frequency_penalty: 0.0 - id: resp_03fd10c22367f14c006a3c385e0a38819983064ba1cbb83062 + id: resp_05ffdb1c1fbb4099006a3c3e3e2ed8819b977b9a9f508f2751 incomplete_details: null instructions: null max_output_tokens: null @@ -1459,14 +1459,14 @@ turns: object: response output: - arguments: '{"job_id":"job-382","memory_override_gb":64,"skip_completed_stages":true,"priority":"high"}' - call_id: call_EgLYkmLEPhzgZl9drilwu1B7 - id: fc_03fd10c22367f14c006a3c385eafa88199b729b7ba313d626c + call_id: call_he6F2Xnx5vqdxnN2LS3rc5Dj + id: fc_05ffdb1c1fbb4099006a3c3e3ed060819b82206a634d7c8dd3 name: restart_job status: completed type: function_call parallel_tool_calls: true presence_penalty: 0.0 - previous_response_id: resp_03fd10c22367f14c006a3c385c88788199bfcfc70ff88ce28f + previous_response_id: resp_05ffdb1c1fbb4099006a3c3e3ba4b0819ba847aac847612c9c prompt_cache_key: null prompt_cache_retention: in_memory reasoning: @@ -1606,13 +1606,13 @@ turns: top_p: 1.0 truncation: disabled usage: - input_tokens: 1122 + input_tokens: 1120 input_tokens_details: cached_tokens: 0 output_tokens: 36 output_tokens_details: reasoning_tokens: 0 - total_tokens: 1158 + total_tokens: 1156 user: null headers: content-type: application/json diff --git a/crates/agentic-core/tests/cassettes/tool_calls/multi_turn/openai_responses_tool_calls_branch.yaml b/crates/agentic-core/tests/cassettes/tool_calls/multi_turn/openai_responses_tool_calls_branch.yaml index 0c831ba..3a946cd 100644 --- a/crates/agentic-core/tests/cassettes/tool_calls/multi_turn/openai_responses_tool_calls_branch.yaml +++ b/crates/agentic-core/tests/cassettes/tool_calls/multi_turn/openai_responses_tool_calls_branch.yaml @@ -139,11 +139,11 @@ turns: background: false billing: payer: developer - completed_at: 1782331511 - created_at: 1782331509 + completed_at: 1782333000 + created_at: 1782332998 error: null frequency_penalty: 0.0 - id: resp_05bbd2f607378fa0006a3c387599cc81998df1b061f645e587 + id: resp_060930d6abfe49c7006a3c3e468914819aa9bba6693b0b3070 incomplete_details: null instructions: null max_output_tokens: null @@ -154,8 +154,8 @@ turns: object: response output: - arguments: '{"job_id":"job-382"}' - call_id: call_NWcEmKRN0AkUr4ZmlHWaqcQn - id: fc_05bbd2f607378fa0006a3c3877757c81999eedf1f10d0b4309 + call_id: call_pn7qCsEVkAGhDdjzZLCH9cWT + id: fc_060930d6abfe49c7006a3c3e47edc0819ab59873692fe0f672 name: get_job_status status: completed type: function_call @@ -316,7 +316,7 @@ turns: request: body: input: - - call_id: call_NWcEmKRN0AkUr4ZmlHWaqcQn + - call_id: call_pn7qCsEVkAGhDdjzZLCH9cWT output: '{"job_id": "job-382", "status": "failed", "error_type": "OutOfMemoryError", "failed_at": "2024-01-15T03:22:11Z", "stage": "transform", "memory_used_gb": 31.2, "memory_limit_gb": 32, "pipeline": "etl-daily-ingest", "owner": "data-platform-team"}' @@ -325,7 +325,7 @@ turns: role: user type: message model: gpt-4o - previous_response_id: resp_05bbd2f607378fa0006a3c387599cc81998df1b061f645e587 + previous_response_id: resp_060930d6abfe49c7006a3c3e468914819aa9bba6693b0b3070 store: true stream: false tool_choice: auto @@ -461,11 +461,11 @@ turns: background: false billing: payer: developer - completed_at: 1782331512 - created_at: 1782331511 + completed_at: 1782333001 + created_at: 1782333000 error: null frequency_penalty: 0.0 - id: resp_05bbd2f607378fa0006a3c3877c0f0819983a3f1411a2242b9 + id: resp_060930d6abfe49c7006a3c3e48529c819a944ed725bbe55cd0 incomplete_details: null instructions: null max_output_tokens: null @@ -476,14 +476,14 @@ turns: object: response output: - arguments: '{"job_id":"job-382","severity":"ERROR","max_entries":10}' - call_id: call_8SfacO4G0hZKrTnfTI76zeAa - id: fc_05bbd2f607378fa0006a3c38789348819980949dfc83474c4b + call_id: call_nDUFTXwJnhbqZjpaQws3XdGa + id: fc_060930d6abfe49c7006a3c3e491504819aa6c312a6b9216ce9 name: get_error_logs status: completed type: function_call parallel_tool_calls: true presence_penalty: 0.0 - previous_response_id: resp_05bbd2f607378fa0006a3c387599cc81998df1b061f645e587 + previous_response_id: resp_060930d6abfe49c7006a3c3e468914819aa9bba6693b0b3070 prompt_cache_key: null prompt_cache_retention: in_memory reasoning: @@ -638,7 +638,7 @@ turns: request: body: input: - - call_id: call_NWcEmKRN0AkUr4ZmlHWaqcQn + - call_id: call_pn7qCsEVkAGhDdjzZLCH9cWT output: '{"job_id": "job-382", "status": "failed", "error_type": "OutOfMemoryError", "failed_at": "2024-01-15T03:22:11Z", "stage": "transform", "memory_used_gb": 31.2, "memory_limit_gb": 32, "pipeline": "etl-daily-ingest", "owner": "data-platform-team"}' @@ -648,7 +648,7 @@ turns: role: user type: message model: gpt-4o - previous_response_id: resp_05bbd2f607378fa0006a3c387599cc81998df1b061f645e587 + previous_response_id: resp_060930d6abfe49c7006a3c3e468914819aa9bba6693b0b3070 store: true stream: false tool_choice: auto @@ -784,11 +784,11 @@ turns: background: false billing: payer: developer - completed_at: 1782331514 - created_at: 1782331512 + completed_at: 1782333002 + created_at: 1782333001 error: null frequency_penalty: 0.0 - id: resp_05bbd2f607378fa0006a3c3878eb5481999b020abb1bbaaa0f + id: resp_060930d6abfe49c7006a3c3e497860819aa2284f65a4dcb398 incomplete_details: null instructions: null max_output_tokens: null @@ -799,14 +799,14 @@ turns: object: response output: - arguments: '{"query":"increase memory limits for ETL jobs","max_results":3}' - call_id: call_XVIOIR7ZtfV0urAs3C4ZXPK8 - id: fc_05bbd2f607378fa0006a3c3879e768819997294f712fc6012c + call_id: call_o2XvimyEnM5NRIzViu7DEDsN + id: fc_060930d6abfe49c7006a3c3e4a093c819aa7b19626148ad151 name: search_runbook status: completed type: function_call parallel_tool_calls: true presence_penalty: 0.0 - previous_response_id: resp_05bbd2f607378fa0006a3c387599cc81998df1b061f645e587 + previous_response_id: resp_060930d6abfe49c7006a3c3e468914819aa9bba6693b0b3070 prompt_cache_key: null prompt_cache_retention: in_memory reasoning: diff --git a/crates/agentic-core/tests/cassettes/tool_calls/multi_turn/openai_responses_tool_calls_parallel.yaml b/crates/agentic-core/tests/cassettes/tool_calls/multi_turn/openai_responses_tool_calls_parallel.yaml index ba6a486..224a68d 100644 --- a/crates/agentic-core/tests/cassettes/tool_calls/multi_turn/openai_responses_tool_calls_parallel.yaml +++ b/crates/agentic-core/tests/cassettes/tool_calls/multi_turn/openai_responses_tool_calls_parallel.yaml @@ -2,9 +2,8 @@ turns: - filename: t1 request: body: - input: 'You are an SRE assistant. I need you to do TWO things in parallel: 1) - call get_job_status for job-382, and 2) call web_search for ''Spark ETL OOM - causes''. Do both now.' + input: 'You are an SRE assistant. Do TWO things in parallel: 1) check the status + of job-382 AND 2) search the web for Spark OOM fixes. Call BOTH tools now.' model: gpt-4o store: true stream: false @@ -141,11 +140,11 @@ turns: background: false billing: payer: developer - completed_at: 1782331523 - created_at: 1782331523 + completed_at: 1782333004 + created_at: 1782333003 error: null frequency_penalty: 0.0 - id: resp_0862f67eff09df0d006a3c38830c6881998c50cd3fe21a1e49 + id: resp_0d6f4d0f7ecfbcbe006a3c3e4b94b08199adefa3fb3b9e6032 incomplete_details: null instructions: null max_output_tokens: null @@ -156,14 +155,14 @@ turns: object: response output: - arguments: '{"job_id":"job-382"}' - call_id: call_wsICgbUO4BlbKuK1o6fGzNSp - id: fc_0862f67eff09df0d006a3c3883c1bc8199993f62d77975f8dc + call_id: call_qsZfyBw1H87UfhQzVJUoQ7Zg + id: fc_0d6f4d0f7ecfbcbe006a3c3e4c67b08199b20c7ffffd19dfd1 name: get_job_status status: completed type: function_call - - arguments: '{"query":"Spark ETL OOM causes"}' - call_id: call_kDwkX53Mu7EbLjLOkeymIMya - id: fc_0862f67eff09df0d006a3c3883c1d48199949f18556c62e1ec + - arguments: '{"query":"Spark OOM fixes"}' + call_id: call_9NgvfTtmjW3F1zy9oUmM0Wna + id: fc_0d6f4d0f7ecfbcbe006a3c3e4c67bc8199a5d2ff4e72a5e3b5 name: web_search status: completed type: function_call @@ -309,13 +308,13 @@ turns: top_p: 1.0 truncation: disabled usage: - input_tokens: 385 + input_tokens: 377 input_tokens_details: cached_tokens: 0 - output_tokens: 54 + output_tokens: 52 output_tokens_details: reasoning_tokens: 0 - total_tokens: 439 + total_tokens: 429 user: null headers: content-type: application/json @@ -324,12 +323,12 @@ turns: request: body: input: - - call_id: call_wsICgbUO4BlbKuK1o6fGzNSp + - call_id: call_qsZfyBw1H87UfhQzVJUoQ7Zg output: '{"job_id": "job-382", "status": "failed", "error_type": "OutOfMemoryError", "failed_at": "2024-01-15T03:22:11Z", "stage": "transform", "memory_used_gb": 31.2, "memory_limit_gb": 32, "pipeline": "etl-daily-ingest", "owner": "data-platform-team"}' type: function_call_output - - call_id: call_kDwkX53Mu7EbLjLOkeymIMya + - call_id: call_9NgvfTtmjW3F1zy9oUmM0Wna output: '[{"title": "Spark OOM in ETL: Root Causes and Fixes - Databricks Blog", "url": "https://docs.databricks.com/en/optimizations/oom.html", "snippet": "Common causes: data skew in shuffle partitions, broadcast join threshold @@ -339,12 +338,12 @@ turns: "snippet": "Increase spark.executor.memory or spark.driver.memory. For shuffle-heavy workloads, also increase spark.sql.shuffle.partitions."}]' type: function_call_output - - content: Based on those results, search the runbook for memory increase procedures. + - content: Based on those results, search the runbook for memory override procedures. Max 3 results. role: user type: message model: gpt-4o - previous_response_id: resp_0862f67eff09df0d006a3c38830c6881998c50cd3fe21a1e49 + previous_response_id: resp_0d6f4d0f7ecfbcbe006a3c3e4b94b08199adefa3fb3b9e6032 store: true stream: false tool_choice: auto @@ -480,11 +479,11 @@ turns: background: false billing: payer: developer - completed_at: 1782331524 - created_at: 1782331524 + completed_at: 1782333006 + created_at: 1782333004 error: null frequency_penalty: 0.0 - id: resp_0862f67eff09df0d006a3c3883f6108199bf628e4a2c6469dd + id: resp_0d6f4d0f7ecfbcbe006a3c3e4ca1708199a0c174c7eca6dd03 incomplete_details: null instructions: null max_output_tokens: null @@ -494,15 +493,15 @@ turns: moderation: null object: response output: - - arguments: '{"query":"memory increase procedures for Spark","max_results":3}' - call_id: call_l2MaoNnfIGtswWuyJUBJ3G3G - id: fc_0862f67eff09df0d006a3c38849ba4819991a40c71a014ca1f + - arguments: '{"query":"memory override procedures","max_results":3}' + call_id: call_RaHQ2EJUGrXRWTgM7qUlfdhg + id: fc_0d6f4d0f7ecfbcbe006a3c3e4dfe488199abf55a5d0896e72f name: search_runbook status: completed type: function_call parallel_tool_calls: true presence_penalty: 0.0 - previous_response_id: resp_0862f67eff09df0d006a3c38830c6881998c50cd3fe21a1e49 + previous_response_id: resp_0d6f4d0f7ecfbcbe006a3c3e4b94b08199adefa3fb3b9e6032 prompt_cache_key: null prompt_cache_retention: in_memory reasoning: @@ -642,13 +641,13 @@ turns: top_p: 1.0 truncation: disabled usage: - input_tokens: 686 + input_tokens: 676 input_tokens_details: cached_tokens: 0 - output_tokens: 25 + output_tokens: 23 output_tokens_details: reasoning_tokens: 0 - total_tokens: 711 + total_tokens: 699 user: null headers: content-type: application/json @@ -657,7 +656,7 @@ turns: request: body: input: - - call_id: call_l2MaoNnfIGtswWuyJUBJ3G3G + - call_id: call_RaHQ2EJUGrXRWTgM7qUlfdhg output: '[{"title": "OOM Troubleshooting for ETL Pipelines", "path": "/runbooks/data-platform/oom-etl.md", "snippet": "When transform stage OOMs: 1) Check partition skew with get_partition_stats, 2) Increase executor memory (default 32GB -> 64GB), 3) Enable adaptive query @@ -665,12 +664,11 @@ turns: "snippet": "To increase memory: use restart_job with memory_override_gb. Standard escalation: 32->64->128. Requires approval above 128GB."}]' type: function_call_output - - content: Now restart that job with 64 GB memory, skip completed stages, high - priority. + - content: Now restart that job with 64GB memory and high priority. role: user type: message model: gpt-4o - previous_response_id: resp_0862f67eff09df0d006a3c3883f6108199bf628e4a2c6469dd + previous_response_id: resp_0d6f4d0f7ecfbcbe006a3c3e4ca1708199a0c174c7eca6dd03 store: true stream: false tool_choice: auto @@ -806,11 +804,11 @@ turns: background: false billing: payer: developer - completed_at: 1782331525 - created_at: 1782331525 + completed_at: 1782333006 + created_at: 1782333006 error: null frequency_penalty: 0.0 - id: resp_0862f67eff09df0d006a3c3884f8cc81998a1d105f46eb9e69 + id: resp_0d6f4d0f7ecfbcbe006a3c3e4e530481999b6062fbd63f7df9 incomplete_details: null instructions: null max_output_tokens: null @@ -821,14 +819,14 @@ turns: object: response output: - arguments: '{"job_id":"job-382","memory_override_gb":64,"skip_completed_stages":true,"priority":"high"}' - call_id: call_JLVwHJ1DWkQvlsvhJqL5DuF5 - id: fc_0862f67eff09df0d006a3c3885bf5081999e5a54327b5f629b + call_id: call_141clJIyorJeRB9bew8mY8Hf + id: fc_0d6f4d0f7ecfbcbe006a3c3e4ebc748199875c7dc7d0025c1e name: restart_job status: completed type: function_call parallel_tool_calls: true presence_penalty: 0.0 - previous_response_id: resp_0862f67eff09df0d006a3c3883f6108199bf628e4a2c6469dd + previous_response_id: resp_0d6f4d0f7ecfbcbe006a3c3e4ca1708199a0c174c7eca6dd03 prompt_cache_key: null prompt_cache_retention: in_memory reasoning: @@ -968,13 +966,13 @@ turns: top_p: 1.0 truncation: disabled usage: - input_tokens: 873 + input_tokens: 857 input_tokens_details: cached_tokens: 0 output_tokens: 36 output_tokens_details: reasoning_tokens: 0 - total_tokens: 909 + total_tokens: 893 user: null headers: content-type: application/json diff --git a/crates/agentic-core/tests/cassettes/tool_calls/multi_turn/openai_responses_tool_calls_tool_output_only.yaml b/crates/agentic-core/tests/cassettes/tool_calls/multi_turn/openai_responses_tool_calls_tool_output_only.yaml index a1d5b80..4eb796e 100644 --- a/crates/agentic-core/tests/cassettes/tool_calls/multi_turn/openai_responses_tool_calls_tool_output_only.yaml +++ b/crates/agentic-core/tests/cassettes/tool_calls/multi_turn/openai_responses_tool_calls_tool_output_only.yaml @@ -139,11 +139,11 @@ turns: background: false billing: payer: developer - completed_at: 1782331535 - created_at: 1782331534 + completed_at: 1782333011 + created_at: 1782333008 error: null frequency_penalty: 0.0 - id: resp_0ec7c4ddd5fc13e2006a3c388edc30819ab696d5b8584fb6c6 + id: resp_0cd8ef891c0f4c71006a3c3e507b68819b84f7f3f1a42a599a incomplete_details: null instructions: null max_output_tokens: null @@ -154,8 +154,8 @@ turns: object: response output: - arguments: '{"job_id":"job-382"}' - call_id: call_xuHSO4xB7M872HdpG65mhXWb - id: fc_0ec7c4ddd5fc13e2006a3c388f7f64819aab560a66e56e0f41 + call_id: call_Be8nCBDD2O0m7GSL5ATsZn1i + id: fc_0cd8ef891c0f4c71006a3c3e52f520819b96aa59c35fff1b5d name: get_job_status status: completed type: function_call @@ -316,13 +316,13 @@ turns: request: body: input: - - call_id: call_xuHSO4xB7M872HdpG65mhXWb + - call_id: call_Be8nCBDD2O0m7GSL5ATsZn1i output: '{"job_id": "job-382", "status": "failed", "error_type": "OutOfMemoryError", "failed_at": "2024-01-15T03:22:11Z", "stage": "transform", "memory_used_gb": 31.2, "memory_limit_gb": 32, "pipeline": "etl-daily-ingest", "owner": "data-platform-team"}' type: function_call_output model: gpt-4o - previous_response_id: resp_0ec7c4ddd5fc13e2006a3c388edc30819ab696d5b8584fb6c6 + previous_response_id: resp_0cd8ef891c0f4c71006a3c3e507b68819b84f7f3f1a42a599a store: true stream: false tool_choice: auto @@ -458,11 +458,11 @@ turns: background: false billing: payer: developer - completed_at: 1782331537 - created_at: 1782331535 + completed_at: 1782333013 + created_at: 1782333012 error: null frequency_penalty: 0.0 - id: resp_0ec7c4ddd5fc13e2006a3c388fd0cc819a9407adfbb0c2c208 + id: resp_0cd8ef891c0f4c71006a3c3e53eef8819bbcac061137bbf660 incomplete_details: null instructions: null max_output_tokens: null @@ -475,35 +475,34 @@ turns: - content: - annotations: [] logprobs: [] - text: 'The ETL pipeline job `job-382` has failed. Here are the details: + text: 'The ETL pipeline job **job-382** has failed. Here are the details: - - **Status**: Failed + - **Error Type:** OutOfMemoryError - - **Error Type**: OutOfMemoryError + - **Failed At:** 2024-01-15T03:22:11Z - - **Failed At**: 2024-01-15T03:22:11Z + - **Stage:** Transform - - **Stage**: Transform + - **Memory Used:** 31.2 GB - - **Memory Used**: 31.2 GB + - **Memory Limit:** 32 GB - - **Memory Limit**: 32 GB + - **Pipeline:** etl-daily-ingest - - **Pipeline**: etl-daily-ingest + - **Owner:** Data Platform Team - - **Owner**: Data Platform Team - - Would you like me to investigate further or take any action?' + Would you like me to retrieve error logs or attempt to restart the job + with adjusted parameters?' type: output_text - id: msg_0ec7c4ddd5fc13e2006a3c38909204819ab534a8798edadde1 + id: msg_0cd8ef891c0f4c71006a3c3e5478c4819bbab78f4548e94859 role: assistant status: completed type: message parallel_tool_calls: true presence_penalty: 0.0 - previous_response_id: resp_0ec7c4ddd5fc13e2006a3c388edc30819ab696d5b8584fb6c6 + previous_response_id: resp_0cd8ef891c0f4c71006a3c3e507b68819b84f7f3f1a42a599a prompt_cache_key: null prompt_cache_retention: in_memory reasoning: @@ -646,10 +645,10 @@ turns: input_tokens: 471 input_tokens_details: cached_tokens: 0 - output_tokens: 121 + output_tokens: 113 output_tokens_details: reasoning_tokens: 0 - total_tokens: 592 + total_tokens: 584 user: null headers: content-type: application/json @@ -657,9 +656,10 @@ turns: - filename: t3 request: body: - input: Great, now search the runbook for how to fix that OOM error. Max 5 results. + input: Based on what you found, search the runbook for how to fix it. Max 5 + results. model: gpt-4o - previous_response_id: resp_0ec7c4ddd5fc13e2006a3c388fd0cc819a9407adfbb0c2c208 + previous_response_id: resp_0cd8ef891c0f4c71006a3c3e53eef8819bbcac061137bbf660 store: true stream: false tool_choice: auto @@ -795,11 +795,11 @@ turns: background: false billing: payer: developer - completed_at: 1782331538 - created_at: 1782331537 + completed_at: 1782333014 + created_at: 1782333014 error: null frequency_penalty: 0.0 - id: resp_0ec7c4ddd5fc13e2006a3c3891a958819a8bf49dba0916fef0 + id: resp_0cd8ef891c0f4c71006a3c3e561858819bba80615c21372611 incomplete_details: null instructions: null max_output_tokens: null @@ -809,15 +809,15 @@ turns: moderation: null object: response output: - - arguments: '{"query":"OutOfMemoryError ETL pipeline","max_results":5}' - call_id: call_wmlJBIMN1WijA0cAJMjjes0X - id: fc_0ec7c4ddd5fc13e2006a3c38922b24819ab44bfbe155a4ecb2 + - arguments: '{"query":"OutOfMemoryError in ETL pipeline","max_results":5}' + call_id: call_I0IVG6tBnGhK5zZGic76mhPo + id: fc_0cd8ef891c0f4c71006a3c3e568658819b82e90c7976061f87 name: search_runbook status: completed type: function_call parallel_tool_calls: true presence_penalty: 0.0 - previous_response_id: resp_0ec7c4ddd5fc13e2006a3c388fd0cc819a9407adfbb0c2c208 + previous_response_id: resp_0cd8ef891c0f4c71006a3c3e53eef8819bbcac061137bbf660 prompt_cache_key: null prompt_cache_retention: in_memory reasoning: @@ -957,13 +957,13 @@ turns: top_p: 1.0 truncation: disabled usage: - input_tokens: 619 + input_tokens: 611 input_tokens_details: cached_tokens: 0 - output_tokens: 27 + output_tokens: 28 output_tokens_details: reasoning_tokens: 0 - total_tokens: 646 + total_tokens: 639 user: null headers: content-type: application/json diff --git a/crates/agentic-core/tests/cassettes/tool_calls/multi_turn/record.sh b/crates/agentic-core/tests/cassettes/tool_calls/multi_turn/record.sh index e17ddeb..2f4e54f 100755 --- a/crates/agentic-core/tests/cassettes/tool_calls/multi_turn/record.sh +++ b/crates/agentic-core/tests/cassettes/tool_calls/multi_turn/record.sh @@ -23,7 +23,7 @@ set -euo pipefail -RECORDER="$(dirname "$0")/../../cassettes/record_cassette.py" +RECORDER="$(dirname "$0")/../../record_cassette.py" TOOLS="$(dirname "$0")/pipeline_tools.json" TOOL_OUTPUTS="$(dirname "$0")/pipeline_tool_outputs.json" OUTPUT_DIR="$(dirname "$0")" @@ -113,6 +113,7 @@ printf '%s\n' \ echo "" echo "=== 3-turn tool-output-only (turn 2 has no user message, just tool output) ===" +# Turn 2 prompt is empty string → _build_tool_output_input omits user message printf '%s\n' \ "You are an SRE assistant. Check the current status of ETL pipeline job-382." \ "" \ @@ -122,7 +123,6 @@ printf '%s\n' \ --model "$VLLM_MODEL" --vllm "$VLLM_URL" \ --tools "$TOOLS" --tool-choice auto \ --tool-outputs "$TOOL_OUTPUTS" \ - --tool-output-only-turn 2 \ --output "$OUTPUT_DIR/responses_tool_calls_tool_output_only.yaml" echo "" @@ -212,6 +212,7 @@ printf '%s\n' \ echo "" echo "=== 3-turn tool-output-only (turn 2 has no user message) ===" +# Turn 2 prompt is empty string → _build_tool_output_input omits user message printf '%s\n' \ "You are an SRE assistant. Check the current status of ETL pipeline job-382." \ "" \ @@ -221,7 +222,6 @@ printf '%s\n' \ --model "$OPENAI_MODEL" \ --tools "$TOOLS" --tool-choice auto \ --tool-outputs "$TOOL_OUTPUTS" \ - --tool-output-only-turn 2 \ --output "$OUTPUT_DIR/openai_responses_tool_calls_tool_output_only.yaml" echo "" @@ -232,5 +232,6 @@ fi echo "" echo "══════════════════════════════════════════════════════════════" -echo " All done. 12 cassettes total." +CASSETTE_COUNT=$(ls "$OUTPUT_DIR"/*.yaml 2>/dev/null | wc -l | tr -d ' ') +echo " All done. ${CASSETTE_COUNT} cassettes in ${OUTPUT_DIR}." echo "══════════════════════════════════════════════════════════════" diff --git a/crates/agentic-core/tests/cassettes/tool_calls/multi_turn/responses_tool_calls_3turn.yaml b/crates/agentic-core/tests/cassettes/tool_calls/multi_turn/responses_tool_calls_3turn.yaml index ef33c6c..388373b 100644 --- a/crates/agentic-core/tests/cassettes/tool_calls/multi_turn/responses_tool_calls_3turn.yaml +++ b/crates/agentic-core/tests/cassettes/tool_calls/multi_turn/responses_tool_calls_3turn.yaml @@ -136,9 +136,9 @@ turns: response: body: background: false - created_at: 1782331360 + created_at: 1782332947 frequency_penalty: 0.0 - id: resp_99ff5424f2627e43 + id: resp_800ce8df847a299d incomplete_details: null input_messages: null instructions: null @@ -150,16 +150,16 @@ turns: object: response output: - content: - - text: We need to call get_job_status. + - text: We need to query get_job_status. type: reasoning_text encrypted_content: null - id: rs_901adb851069f6c7 + id: rs_8586850ba9dcee47 status: null summary: [] type: reasoning - arguments: '{"job_id":"job-382"}' - call_id: call_af81636bdbc9dec5 - id: fc_af81636bdbc9dec5 + call_id: call_80b2f4f754dbba6b + id: fc_80b2f4f754dbba6b name: get_job_status namespace: null status: null @@ -328,7 +328,7 @@ turns: request: body: input: - - call_id: call_af81636bdbc9dec5 + - call_id: call_80b2f4f754dbba6b output: '{"job_id": "job-382", "status": "failed", "error_type": "OutOfMemoryError", "failed_at": "2024-01-15T03:22:11Z", "stage": "transform", "memory_used_gb": 31.2, "memory_limit_gb": 32, "pipeline": "etl-daily-ingest", "owner": "data-platform-team"}' @@ -338,7 +338,7 @@ turns: role: user type: message model: openai/gpt-oss-20b - previous_response_id: resp_99ff5424f2627e43 + previous_response_id: resp_800ce8df847a299d store: true stream: false tool_choice: auto @@ -471,9 +471,9 @@ turns: response: body: background: false - created_at: 1782331361 + created_at: 1782332947 frequency_penalty: 0.0 - id: resp_b44e3326e81a597c + id: resp_96a3170ed78e87b3 incomplete_details: null input_messages: null instructions: null @@ -485,16 +485,16 @@ turns: object: response output: - content: - - text: Call get_error_logs. + - text: We need get_error_logs. type: reasoning_text encrypted_content: null - id: rs_8f36d456b69a137a + id: rs_b35e1283a0afe646 status: null summary: [] type: reasoning - arguments: '{"job_id":"job-382","severity":"ERROR","max_entries":10}' - call_id: call_961a299b68e2f0d9 - id: fc_961a299b68e2f0d9 + call_id: call_a4919ceec5777dd0 + id: fc_a4919ceec5777dd0 name: get_error_logs namespace: null status: null @@ -502,7 +502,7 @@ turns: output_messages: null parallel_tool_calls: true presence_penalty: 0.0 - previous_response_id: resp_99ff5424f2627e43 + previous_response_id: resp_800ce8df847a299d prompt: null reasoning: null service_tier: auto @@ -641,20 +641,20 @@ turns: usage: input_tokens: 597 input_tokens_details: - cached_tokens: 464 + cached_tokens: 448 cached_tokens_per_turn: - - 464 + - 448 input_tokens_per_turn: - 597 - output_tokens: 42 + output_tokens: 43 output_tokens_details: output_tokens_per_turn: - - 42 - reasoning_tokens: 24 + - 43 + reasoning_tokens: 25 tool_output_tokens: 0 tool_output_tokens_per_turn: - 0 - total_tokens: 639 + total_tokens: 640 user: null headers: content-type: application/json @@ -663,7 +663,7 @@ turns: request: body: input: - - call_id: call_961a299b68e2f0d9 + - call_id: call_a4919ceec5777dd0 output: '[{"timestamp": "2024-01-15T03:22:09Z", "severity": "ERROR", "message": "OutOfMemoryError at stage ''transform'': heap space exhausted (31.2GB/32GB)", "stack_trace": "java.lang.OutOfMemoryError: Java heap space\n at org.apache.spark.memory.TaskMemoryManager.allocatePage"}, @@ -679,7 +679,7 @@ turns: role: user type: message model: openai/gpt-oss-20b - previous_response_id: resp_b44e3326e81a597c + previous_response_id: resp_96a3170ed78e87b3 store: true stream: false tool_choice: auto @@ -812,30 +812,31 @@ turns: response: body: background: false - created_at: 1782331361 + created_at: 1782332947 frequency_penalty: 0.0 - id: resp_8dfe29723818b937 + id: resp_a89ba667cd7e86df incomplete_details: null input_messages: null instructions: null kv_transfer_params: null - max_output_tokens: 3226 + max_output_tokens: 3225 max_tool_calls: null metadata: null model: openai/gpt-oss-20b object: response output: - content: - - text: Use search_runbook. + - text: Search runbook. type: reasoning_text encrypted_content: null - id: rs_8682e2c6b269a60c + id: rs_81269817d22797d0 status: null summary: [] type: reasoning - - arguments: '{"query":"OutOfMemoryError transform stage ETL daily ingest troubleshoot","max_results":5}' - call_id: call_847a146847337019 - id: fc_847a146847337019 + - arguments: '{"query":"OutOfMemoryError ETL transform stage Spark restart job + memory limit", "max_results":5}' + call_id: call_bb883df29ae253d1 + id: fc_bb883df29ae253d1 name: search_runbook namespace: null status: null @@ -843,7 +844,7 @@ turns: output_messages: null parallel_tool_calls: true presence_penalty: 0.0 - previous_response_id: resp_b44e3326e81a597c + previous_response_id: resp_96a3170ed78e87b3 prompt: null reasoning: null service_tier: auto @@ -980,22 +981,22 @@ turns: top_p: 1.0 truncation: disabled usage: - input_tokens: 870 + input_tokens: 871 input_tokens_details: cached_tokens: 608 cached_tokens_per_turn: - 608 input_tokens_per_turn: - - 870 - output_tokens: 45 + - 871 + output_tokens: 47 output_tokens_details: output_tokens_per_turn: - - 45 - reasoning_tokens: 27 + - 47 + reasoning_tokens: 29 tool_output_tokens: 0 tool_output_tokens_per_turn: - 0 - total_tokens: 915 + total_tokens: 918 user: null headers: content-type: application/json diff --git a/crates/agentic-core/tests/cassettes/tool_calls/multi_turn/responses_tool_calls_3turn_streaming.yaml b/crates/agentic-core/tests/cassettes/tool_calls/multi_turn/responses_tool_calls_3turn_streaming.yaml index f428f78..ea9d95f 100644 --- a/crates/agentic-core/tests/cassettes/tool_calls/multi_turn/responses_tool_calls_3turn_streaming.yaml +++ b/crates/agentic-core/tests/cassettes/tool_calls/multi_turn/responses_tool_calls_3turn_streaming.yaml @@ -140,7 +140,7 @@ turns: - 'event: response.created ' - - 'data: {"response":{"id":"resp_ac3c1315a91f31a2","created_at":1782331382,"incomplete_details":null,"instructions":null,"metadata":null,"model":"openai/gpt-oss-20b","object":"response","output":[],"parallel_tool_calls":true,"temperature":1.0,"tool_choice":"auto","tools":[{"name":"get_job_status","parameters":{"type":"object","properties":{"job_id":{"type":"string","description":"Pipeline + - 'data: {"response":{"id":"resp_a2613187feeed7eb","created_at":1782332952,"incomplete_details":null,"instructions":null,"metadata":null,"model":"openai/gpt-oss-20b","object":"response","output":[],"parallel_tool_calls":true,"temperature":1.0,"tool_choice":"auto","tools":[{"name":"get_job_status","parameters":{"type":"object","properties":{"job_id":{"type":"string","description":"Pipeline job identifier, e.g. job-382"}},"required":["job_id"],"additionalProperties":false},"strict":true,"type":"function","defer_loading":null,"description":"Query the pipeline orchestrator for a job''s current state, metadata, and failure info."},{"name":"get_error_logs","parameters":{"type":"object","properties":{"job_id":{"type":"string","description":"Pipeline @@ -170,7 +170,7 @@ turns: - 'event: response.in_progress ' - - 'data: {"response":{"id":"resp_ac3c1315a91f31a2","created_at":1782331382,"incomplete_details":null,"instructions":null,"metadata":null,"model":"openai/gpt-oss-20b","object":"response","output":[],"parallel_tool_calls":true,"temperature":1.0,"tool_choice":"auto","tools":[{"name":"get_job_status","parameters":{"type":"object","properties":{"job_id":{"type":"string","description":"Pipeline + - 'data: {"response":{"id":"resp_a2613187feeed7eb","created_at":1782332952,"incomplete_details":null,"instructions":null,"metadata":null,"model":"openai/gpt-oss-20b","object":"response","output":[],"parallel_tool_calls":true,"temperature":1.0,"tool_choice":"auto","tools":[{"name":"get_job_status","parameters":{"type":"object","properties":{"job_id":{"type":"string","description":"Pipeline job identifier, e.g. job-382"}},"required":["job_id"],"additionalProperties":false},"strict":true,"type":"function","defer_loading":null,"description":"Query the pipeline orchestrator for a job''s current state, metadata, and failure info."},{"name":"get_error_logs","parameters":{"type":"object","properties":{"job_id":{"type":"string","description":"Pipeline @@ -200,7 +200,7 @@ turns: - 'event: response.output_item.added ' - - 'data: {"item":{"id":"msg_b13a0755fc859b6a","summary":[],"type":"reasoning","content":null,"encrypted_content":null,"status":"in_progress"},"output_index":0,"sequence_number":2,"type":"response.output_item.added"} + - 'data: {"item":{"id":"msg_a8e6149a1e573d1b","summary":[],"type":"reasoning","content":null,"encrypted_content":null,"status":"in_progress"},"output_index":0,"sequence_number":2,"type":"response.output_item.added"} ' - ' @@ -209,7 +209,7 @@ turns: - 'event: response.reasoning_part.added ' - - 'data: {"content_index":0,"item_id":"msg_b13a0755fc859b6a","output_index":0,"part":{"text":"","type":"reasoning_text"},"sequence_number":3,"type":"response.reasoning_part.added"} + - 'data: {"content_index":0,"item_id":"msg_a8e6149a1e573d1b","output_index":0,"part":{"text":"","type":"reasoning_text"},"sequence_number":3,"type":"response.reasoning_part.added"} ' - ' @@ -218,7 +218,7 @@ turns: - 'event: response.reasoning_text.delta ' - - 'data: {"content_index":0,"delta":"We","item_id":"msg_b13a0755fc859b6a","output_index":0,"sequence_number":4,"type":"response.reasoning_text.delta"} + - 'data: {"content_index":0,"delta":"The","item_id":"msg_a8e6149a1e573d1b","output_index":0,"sequence_number":4,"type":"response.reasoning_text.delta"} ' - ' @@ -227,7 +227,7 @@ turns: - 'event: response.reasoning_text.delta ' - - 'data: {"content_index":0,"delta":" need","item_id":"msg_b13a0755fc859b6a","output_index":0,"sequence_number":5,"type":"response.reasoning_text.delta"} + - 'data: {"content_index":0,"delta":" user","item_id":"msg_a8e6149a1e573d1b","output_index":0,"sequence_number":5,"type":"response.reasoning_text.delta"} ' - ' @@ -236,7 +236,7 @@ turns: - 'event: response.reasoning_text.delta ' - - 'data: {"content_index":0,"delta":" to","item_id":"msg_b13a0755fc859b6a","output_index":0,"sequence_number":6,"type":"response.reasoning_text.delta"} + - 'data: {"content_index":0,"delta":" wants","item_id":"msg_a8e6149a1e573d1b","output_index":0,"sequence_number":6,"type":"response.reasoning_text.delta"} ' - ' @@ -245,7 +245,7 @@ turns: - 'event: response.reasoning_text.delta ' - - 'data: {"content_index":0,"delta":" call","item_id":"msg_b13a0755fc859b6a","output_index":0,"sequence_number":7,"type":"response.reasoning_text.delta"} + - 'data: {"content_index":0,"delta":" to","item_id":"msg_a8e6149a1e573d1b","output_index":0,"sequence_number":7,"type":"response.reasoning_text.delta"} ' - ' @@ -254,7 +254,7 @@ turns: - 'event: response.reasoning_text.delta ' - - 'data: {"content_index":0,"delta":" get","item_id":"msg_b13a0755fc859b6a","output_index":0,"sequence_number":8,"type":"response.reasoning_text.delta"} + - 'data: {"content_index":0,"delta":" check","item_id":"msg_a8e6149a1e573d1b","output_index":0,"sequence_number":8,"type":"response.reasoning_text.delta"} ' - ' @@ -263,7 +263,7 @@ turns: - 'event: response.reasoning_text.delta ' - - 'data: {"content_index":0,"delta":"_job","item_id":"msg_b13a0755fc859b6a","output_index":0,"sequence_number":9,"type":"response.reasoning_text.delta"} + - 'data: {"content_index":0,"delta":" status","item_id":"msg_a8e6149a1e573d1b","output_index":0,"sequence_number":9,"type":"response.reasoning_text.delta"} ' - ' @@ -272,7 +272,7 @@ turns: - 'event: response.reasoning_text.delta ' - - 'data: {"content_index":0,"delta":"_status","item_id":"msg_b13a0755fc859b6a","output_index":0,"sequence_number":10,"type":"response.reasoning_text.delta"} + - 'data: {"content_index":0,"delta":" of","item_id":"msg_a8e6149a1e573d1b","output_index":0,"sequence_number":10,"type":"response.reasoning_text.delta"} ' - ' @@ -281,7 +281,7 @@ turns: - 'event: response.reasoning_text.delta ' - - 'data: {"content_index":0,"delta":" with","item_id":"msg_b13a0755fc859b6a","output_index":0,"sequence_number":11,"type":"response.reasoning_text.delta"} + - 'data: {"content_index":0,"delta":" job","item_id":"msg_a8e6149a1e573d1b","output_index":0,"sequence_number":11,"type":"response.reasoning_text.delta"} ' - ' @@ -290,7 +290,7 @@ turns: - 'event: response.reasoning_text.delta ' - - 'data: {"content_index":0,"delta":" job","item_id":"msg_b13a0755fc859b6a","output_index":0,"sequence_number":12,"type":"response.reasoning_text.delta"} + - 'data: {"content_index":0,"delta":"-","item_id":"msg_a8e6149a1e573d1b","output_index":0,"sequence_number":12,"type":"response.reasoning_text.delta"} ' - ' @@ -299,7 +299,7 @@ turns: - 'event: response.reasoning_text.delta ' - - 'data: {"content_index":0,"delta":"_id","item_id":"msg_b13a0755fc859b6a","output_index":0,"sequence_number":13,"type":"response.reasoning_text.delta"} + - 'data: {"content_index":0,"delta":"382","item_id":"msg_a8e6149a1e573d1b","output_index":0,"sequence_number":13,"type":"response.reasoning_text.delta"} ' - ' @@ -308,7 +308,7 @@ turns: - 'event: response.reasoning_text.delta ' - - 'data: {"content_index":0,"delta":":","item_id":"msg_b13a0755fc859b6a","output_index":0,"sequence_number":14,"type":"response.reasoning_text.delta"} + - 'data: {"content_index":0,"delta":".","item_id":"msg_a8e6149a1e573d1b","output_index":0,"sequence_number":14,"type":"response.reasoning_text.delta"} ' - ' @@ -317,7 +317,7 @@ turns: - 'event: response.reasoning_text.delta ' - - 'data: {"content_index":0,"delta":" \"","item_id":"msg_b13a0755fc859b6a","output_index":0,"sequence_number":15,"type":"response.reasoning_text.delta"} + - 'data: {"content_index":0,"delta":" We","item_id":"msg_a8e6149a1e573d1b","output_index":0,"sequence_number":15,"type":"response.reasoning_text.delta"} ' - ' @@ -326,7 +326,7 @@ turns: - 'event: response.reasoning_text.delta ' - - 'data: {"content_index":0,"delta":"job","item_id":"msg_b13a0755fc859b6a","output_index":0,"sequence_number":16,"type":"response.reasoning_text.delta"} + - 'data: {"content_index":0,"delta":" should","item_id":"msg_a8e6149a1e573d1b","output_index":0,"sequence_number":16,"type":"response.reasoning_text.delta"} ' - ' @@ -335,7 +335,7 @@ turns: - 'event: response.reasoning_text.delta ' - - 'data: {"content_index":0,"delta":"-","item_id":"msg_b13a0755fc859b6a","output_index":0,"sequence_number":17,"type":"response.reasoning_text.delta"} + - 'data: {"content_index":0,"delta":" call","item_id":"msg_a8e6149a1e573d1b","output_index":0,"sequence_number":17,"type":"response.reasoning_text.delta"} ' - ' @@ -344,7 +344,7 @@ turns: - 'event: response.reasoning_text.delta ' - - 'data: {"content_index":0,"delta":"382","item_id":"msg_b13a0755fc859b6a","output_index":0,"sequence_number":18,"type":"response.reasoning_text.delta"} + - 'data: {"content_index":0,"delta":" get","item_id":"msg_a8e6149a1e573d1b","output_index":0,"sequence_number":18,"type":"response.reasoning_text.delta"} ' - ' @@ -353,7 +353,25 @@ turns: - 'event: response.reasoning_text.delta ' - - 'data: {"content_index":0,"delta":"\".","item_id":"msg_b13a0755fc859b6a","output_index":0,"sequence_number":19,"type":"response.reasoning_text.delta"} + - 'data: {"content_index":0,"delta":"_job","item_id":"msg_a8e6149a1e573d1b","output_index":0,"sequence_number":19,"type":"response.reasoning_text.delta"} + + ' + - ' + + ' + - 'event: response.reasoning_text.delta + + ' + - 'data: {"content_index":0,"delta":"_status","item_id":"msg_a8e6149a1e573d1b","output_index":0,"sequence_number":20,"type":"response.reasoning_text.delta"} + + ' + - ' + + ' + - 'event: response.reasoning_text.delta + + ' + - 'data: {"content_index":0,"delta":".","item_id":"msg_a8e6149a1e573d1b","output_index":0,"sequence_number":21,"type":"response.reasoning_text.delta"} ' - ' @@ -362,8 +380,8 @@ turns: - 'event: response.reasoning_text.done ' - - 'data: {"content_index":0,"item_id":"msg_b13a0755fc859b6a","output_index":0,"sequence_number":20,"text":"We - need to call get_job_status with job_id: \"job-382\".","type":"response.reasoning_text.done"} + - 'data: {"content_index":0,"item_id":"msg_a8e6149a1e573d1b","output_index":0,"sequence_number":22,"text":"The + user wants to check status of job-382. We should call get_job_status.","type":"response.reasoning_text.done"} ' - ' @@ -372,8 +390,8 @@ turns: - 'event: response.reasoning_part.done ' - - 'data: {"content_index":0,"item_id":"msg_b13a0755fc859b6a","output_index":0,"part":{"text":"We - need to call get_job_status with job_id: \"job-382\".","type":"reasoning_text"},"sequence_number":21,"type":"response.reasoning_part.done"} + - 'data: {"content_index":0,"item_id":"msg_a8e6149a1e573d1b","output_index":0,"part":{"text":"The + user wants to check status of job-382. We should call get_job_status.","type":"reasoning_text"},"sequence_number":23,"type":"response.reasoning_part.done"} ' - ' @@ -382,8 +400,8 @@ turns: - 'event: response.output_item.done ' - - 'data: {"item":{"id":"msg_b13a0755fc859b6a","summary":[],"type":"reasoning","content":[{"text":"We - need to call get_job_status with job_id: \"job-382\".","type":"reasoning_text"}],"encrypted_content":null,"status":"completed"},"output_index":0,"sequence_number":22,"type":"response.output_item.done"} + - 'data: {"item":{"id":"msg_a8e6149a1e573d1b","summary":[],"type":"reasoning","content":[{"text":"The + user wants to check status of job-382. We should call get_job_status.","type":"reasoning_text"}],"encrypted_content":null,"status":"completed"},"output_index":0,"sequence_number":24,"type":"response.output_item.done"} ' - ' @@ -392,7 +410,43 @@ turns: - 'event: response.output_item.added ' - - 'data: {"item":{"arguments":"","call_id":"call_be4f9dfe0052d170","name":"get_job_status","type":"function_call","id":"fc_820d82db029349a4","namespace":null,"status":"in_progress"},"output_index":1,"sequence_number":23,"type":"response.output_item.added"} + - 'data: {"item":{"arguments":"","call_id":"call_8febbe99b4e4e607","name":"get_job_status","type":"function_call","id":"fc_9d06e8f43a16a23b","namespace":null,"status":"in_progress"},"output_index":1,"sequence_number":25,"type":"response.output_item.added"} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"delta":"{\n","item_id":"fc_9d06e8f43a16a23b","output_index":1,"sequence_number":26,"type":"response.function_call_arguments.delta"} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"delta":" ","item_id":"fc_9d06e8f43a16a23b","output_index":1,"sequence_number":27,"type":"response.function_call_arguments.delta"} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"delta":" \"","item_id":"fc_9d06e8f43a16a23b","output_index":1,"sequence_number":28,"type":"response.function_call_arguments.delta"} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"delta":"job","item_id":"fc_9d06e8f43a16a23b","output_index":1,"sequence_number":29,"type":"response.function_call_arguments.delta"} ' - ' @@ -401,7 +455,7 @@ turns: - 'event: response.function_call_arguments.delta ' - - 'data: {"delta":"{\"","item_id":"fc_820d82db029349a4","output_index":1,"sequence_number":24,"type":"response.function_call_arguments.delta"} + - 'data: {"delta":"_id","item_id":"fc_9d06e8f43a16a23b","output_index":1,"sequence_number":30,"type":"response.function_call_arguments.delta"} ' - ' @@ -410,7 +464,7 @@ turns: - 'event: response.function_call_arguments.delta ' - - 'data: {"delta":"job","item_id":"fc_820d82db029349a4","output_index":1,"sequence_number":25,"type":"response.function_call_arguments.delta"} + - 'data: {"delta":"\":","item_id":"fc_9d06e8f43a16a23b","output_index":1,"sequence_number":31,"type":"response.function_call_arguments.delta"} ' - ' @@ -419,7 +473,7 @@ turns: - 'event: response.function_call_arguments.delta ' - - 'data: {"delta":"_id","item_id":"fc_820d82db029349a4","output_index":1,"sequence_number":26,"type":"response.function_call_arguments.delta"} + - 'data: {"delta":" \"","item_id":"fc_9d06e8f43a16a23b","output_index":1,"sequence_number":32,"type":"response.function_call_arguments.delta"} ' - ' @@ -428,7 +482,7 @@ turns: - 'event: response.function_call_arguments.delta ' - - 'data: {"delta":"\":\"","item_id":"fc_820d82db029349a4","output_index":1,"sequence_number":27,"type":"response.function_call_arguments.delta"} + - 'data: {"delta":"job","item_id":"fc_9d06e8f43a16a23b","output_index":1,"sequence_number":33,"type":"response.function_call_arguments.delta"} ' - ' @@ -437,7 +491,7 @@ turns: - 'event: response.function_call_arguments.delta ' - - 'data: {"delta":"job","item_id":"fc_820d82db029349a4","output_index":1,"sequence_number":28,"type":"response.function_call_arguments.delta"} + - 'data: {"delta":"-","item_id":"fc_9d06e8f43a16a23b","output_index":1,"sequence_number":34,"type":"response.function_call_arguments.delta"} ' - ' @@ -446,7 +500,7 @@ turns: - 'event: response.function_call_arguments.delta ' - - 'data: {"delta":"-","item_id":"fc_820d82db029349a4","output_index":1,"sequence_number":29,"type":"response.function_call_arguments.delta"} + - 'data: {"delta":"382","item_id":"fc_9d06e8f43a16a23b","output_index":1,"sequence_number":35,"type":"response.function_call_arguments.delta"} ' - ' @@ -455,7 +509,7 @@ turns: - 'event: response.function_call_arguments.delta ' - - 'data: {"delta":"382","item_id":"fc_820d82db029349a4","output_index":1,"sequence_number":30,"type":"response.function_call_arguments.delta"} + - 'data: {"delta":"\"\n","item_id":"fc_9d06e8f43a16a23b","output_index":1,"sequence_number":36,"type":"response.function_call_arguments.delta"} ' - ' @@ -464,7 +518,7 @@ turns: - 'event: response.function_call_arguments.delta ' - - 'data: {"delta":"\"}","item_id":"fc_820d82db029349a4","output_index":1,"sequence_number":31,"type":"response.function_call_arguments.delta"} + - 'data: {"delta":"}","item_id":"fc_9d06e8f43a16a23b","output_index":1,"sequence_number":37,"type":"response.function_call_arguments.delta"} ' - ' @@ -473,7 +527,7 @@ turns: - 'event: response.function_call_arguments.done ' - - 'data: {"arguments":"{\"job_id\":\"job-382\"}","item_id":"fc_820d82db029349a4","name":"get_job_status","output_index":1,"sequence_number":32,"type":"response.function_call_arguments.done"} + - 'data: {"arguments":"{\n \"job_id\": \"job-382\"\n}","item_id":"fc_9d06e8f43a16a23b","name":"get_job_status","output_index":1,"sequence_number":38,"type":"response.function_call_arguments.done"} ' - ' @@ -482,7 +536,7 @@ turns: - 'event: response.output_item.done ' - - 'data: {"item":{"arguments":"{\"job_id\":\"job-382\"}","call_id":"call_be4f9dfe0052d170","name":"get_job_status","type":"function_call","id":null,"namespace":null,"status":"completed","item_id":"fc_820d82db029349a4","output_index":1,"sequence_number":-1},"output_index":1,"sequence_number":33,"type":"response.output_item.done"} + - 'data: {"item":{"arguments":"{\n \"job_id\": \"job-382\"\n}","call_id":"call_8febbe99b4e4e607","name":"get_job_status","type":"function_call","id":null,"namespace":null,"status":"completed","item_id":"fc_9d06e8f43a16a23b","output_index":1,"sequence_number":-1},"output_index":1,"sequence_number":39,"type":"response.output_item.done"} ' - ' @@ -491,8 +545,9 @@ turns: - 'event: response.completed ' - - 'data: {"response":{"id":"resp_ac3c1315a91f31a2","created_at":1782331382,"incomplete_details":null,"instructions":null,"metadata":null,"model":"openai/gpt-oss-20b","object":"response","output":[{"id":"rs_b661a3bbd2228d16","summary":[],"type":"reasoning","content":[{"text":"We - need to call get_job_status with job_id: \"job-382\".","type":"reasoning_text"}],"encrypted_content":null,"status":null},{"arguments":"{\"job_id\":\"job-382\"}","call_id":"call_8ce70f6987366936","name":"get_job_status","type":"function_call","id":"fc_8ce70f6987366936","namespace":null,"status":null}],"parallel_tool_calls":true,"temperature":1.0,"tool_choice":"auto","tools":[{"name":"get_job_status","parameters":{"type":"object","properties":{"job_id":{"type":"string","description":"Pipeline + - 'data: {"response":{"id":"resp_a2613187feeed7eb","created_at":1782332952,"incomplete_details":null,"instructions":null,"metadata":null,"model":"openai/gpt-oss-20b","object":"response","output":[{"id":"rs_9c1f68eb64edc98c","summary":[],"type":"reasoning","content":[{"text":"The + user wants to check status of job-382. We should call get_job_status.","type":"reasoning_text"}],"encrypted_content":null,"status":null},{"arguments":"{\n \"job_id\": + \"job-382\"\n}","call_id":"call_b6631e5373a38e91","name":"get_job_status","type":"function_call","id":"fc_b6631e5373a38e91","namespace":null,"status":null}],"parallel_tool_calls":true,"temperature":1.0,"tool_choice":"auto","tools":[{"name":"get_job_status","parameters":{"type":"object","properties":{"job_id":{"type":"string","description":"Pipeline job identifier, e.g. job-382"}},"required":["job_id"],"additionalProperties":false},"strict":true,"type":"function","defer_loading":null,"description":"Query the pipeline orchestrator for a job''s current state, metadata, and failure info."},{"name":"get_error_logs","parameters":{"type":"object","properties":{"job_id":{"type":"string","description":"Pipeline @@ -513,7 +568,7 @@ turns: priority"}},"required":["job_id","memory_override_gb","skip_completed_stages","priority"],"additionalProperties":false},"strict":true,"type":"function","defer_loading":null,"description":"Restart a failed pipeline job with optional configuration overrides."},{"name":"web_search","parameters":{"type":"object","properties":{"query":{"type":"string","description":"Search query"}},"required":["query"],"additionalProperties":false},"strict":true,"type":"function","defer_loading":null,"description":"Search - the public web for technical documentation, blog posts, and known issues."}],"top_p":1.0,"background":false,"max_output_tokens":3663,"max_tool_calls":null,"previous_response_id":null,"prompt":null,"reasoning":null,"service_tier":"auto","status":"completed","text":null,"top_logprobs":null,"truncation":"disabled","usage":{"input_tokens":433,"input_tokens_details":{"cached_tokens":432,"input_tokens_per_turn":[433],"cached_tokens_per_turn":[432]},"output_tokens":44,"output_tokens_details":{"reasoning_tokens":26,"tool_output_tokens":0,"output_tokens_per_turn":[44],"tool_output_tokens_per_turn":[0]},"total_tokens":477},"user":null,"presence_penalty":0.0,"frequency_penalty":0.0,"kv_transfer_params":null,"input_messages":null,"output_messages":null},"sequence_number":34,"type":"response.completed"} + the public web for technical documentation, blog posts, and known issues."}],"top_p":1.0,"background":false,"max_output_tokens":3663,"max_tool_calls":null,"previous_response_id":null,"prompt":null,"reasoning":null,"service_tier":"auto","status":"completed","text":null,"top_logprobs":null,"truncation":"disabled","usage":{"input_tokens":433,"input_tokens_details":{"cached_tokens":432,"input_tokens_per_turn":[433],"cached_tokens_per_turn":[432]},"output_tokens":50,"output_tokens_details":{"reasoning_tokens":32,"tool_output_tokens":0,"output_tokens_per_turn":[50],"tool_output_tokens_per_turn":[0]},"total_tokens":483},"user":null,"presence_penalty":0.0,"frequency_penalty":0.0,"kv_transfer_params":null,"input_messages":null,"output_messages":null},"sequence_number":40,"type":"response.completed"} ' - ' @@ -524,7 +579,7 @@ turns: request: body: input: - - call_id: call_8ce70f6987366936 + - call_id: call_b6631e5373a38e91 output: '{"job_id": "job-382", "status": "failed", "error_type": "OutOfMemoryError", "failed_at": "2024-01-15T03:22:11Z", "stage": "transform", "memory_used_gb": 31.2, "memory_limit_gb": 32, "pipeline": "etl-daily-ingest", "owner": "data-platform-team"}' @@ -533,7 +588,7 @@ turns: role: user type: message model: openai/gpt-oss-20b - previous_response_id: resp_ac3c1315a91f31a2 + previous_response_id: resp_a2613187feeed7eb store: true stream: true tool_choice: auto @@ -670,7 +725,7 @@ turns: - 'event: response.created ' - - 'data: {"response":{"id":"resp_8f993b44ca8e52e3","created_at":1782331383,"incomplete_details":null,"instructions":null,"metadata":null,"model":"openai/gpt-oss-20b","object":"response","output":[],"parallel_tool_calls":true,"temperature":1.0,"tool_choice":"auto","tools":[{"name":"get_job_status","parameters":{"type":"object","properties":{"job_id":{"type":"string","description":"Pipeline + - 'data: {"response":{"id":"resp_86f66db434f30025","created_at":1782332953,"incomplete_details":null,"instructions":null,"metadata":null,"model":"openai/gpt-oss-20b","object":"response","output":[],"parallel_tool_calls":true,"temperature":1.0,"tool_choice":"auto","tools":[{"name":"get_job_status","parameters":{"type":"object","properties":{"job_id":{"type":"string","description":"Pipeline job identifier, e.g. job-382"}},"required":["job_id"],"additionalProperties":false},"strict":true,"type":"function","defer_loading":null,"description":"Query the pipeline orchestrator for a job''s current state, metadata, and failure info."},{"name":"get_error_logs","parameters":{"type":"object","properties":{"job_id":{"type":"string","description":"Pipeline @@ -691,7 +746,7 @@ turns: priority"}},"required":["job_id","memory_override_gb","skip_completed_stages","priority"],"additionalProperties":false},"strict":true,"type":"function","defer_loading":null,"description":"Restart a failed pipeline job with optional configuration overrides."},{"name":"web_search","parameters":{"type":"object","properties":{"query":{"type":"string","description":"Search query"}},"required":["query"],"additionalProperties":false},"strict":true,"type":"function","defer_loading":null,"description":"Search - the public web for technical documentation, blog posts, and known issues."}],"top_p":1.0,"background":false,"max_output_tokens":3495,"max_tool_calls":null,"previous_response_id":"resp_ac3c1315a91f31a2","prompt":null,"reasoning":null,"service_tier":"auto","status":"in_progress","text":null,"top_logprobs":null,"truncation":"disabled","usage":null,"user":null,"presence_penalty":0.0,"frequency_penalty":0.0,"kv_transfer_params":null,"input_messages":null,"output_messages":null},"sequence_number":0,"type":"response.created"} + the public web for technical documentation, blog posts, and known issues."}],"top_p":1.0,"background":false,"max_output_tokens":3489,"max_tool_calls":null,"previous_response_id":"resp_a2613187feeed7eb","prompt":null,"reasoning":null,"service_tier":"auto","status":"in_progress","text":null,"top_logprobs":null,"truncation":"disabled","usage":null,"user":null,"presence_penalty":0.0,"frequency_penalty":0.0,"kv_transfer_params":null,"input_messages":null,"output_messages":null},"sequence_number":0,"type":"response.created"} ' - ' @@ -700,7 +755,7 @@ turns: - 'event: response.in_progress ' - - 'data: {"response":{"id":"resp_8f993b44ca8e52e3","created_at":1782331383,"incomplete_details":null,"instructions":null,"metadata":null,"model":"openai/gpt-oss-20b","object":"response","output":[],"parallel_tool_calls":true,"temperature":1.0,"tool_choice":"auto","tools":[{"name":"get_job_status","parameters":{"type":"object","properties":{"job_id":{"type":"string","description":"Pipeline + - 'data: {"response":{"id":"resp_86f66db434f30025","created_at":1782332953,"incomplete_details":null,"instructions":null,"metadata":null,"model":"openai/gpt-oss-20b","object":"response","output":[],"parallel_tool_calls":true,"temperature":1.0,"tool_choice":"auto","tools":[{"name":"get_job_status","parameters":{"type":"object","properties":{"job_id":{"type":"string","description":"Pipeline job identifier, e.g. job-382"}},"required":["job_id"],"additionalProperties":false},"strict":true,"type":"function","defer_loading":null,"description":"Query the pipeline orchestrator for a job''s current state, metadata, and failure info."},{"name":"get_error_logs","parameters":{"type":"object","properties":{"job_id":{"type":"string","description":"Pipeline @@ -721,7 +776,7 @@ turns: priority"}},"required":["job_id","memory_override_gb","skip_completed_stages","priority"],"additionalProperties":false},"strict":true,"type":"function","defer_loading":null,"description":"Restart a failed pipeline job with optional configuration overrides."},{"name":"web_search","parameters":{"type":"object","properties":{"query":{"type":"string","description":"Search query"}},"required":["query"],"additionalProperties":false},"strict":true,"type":"function","defer_loading":null,"description":"Search - the public web for technical documentation, blog posts, and known issues."}],"top_p":1.0,"background":false,"max_output_tokens":3495,"max_tool_calls":null,"previous_response_id":"resp_ac3c1315a91f31a2","prompt":null,"reasoning":null,"service_tier":"auto","status":"in_progress","text":null,"top_logprobs":null,"truncation":"disabled","usage":null,"user":null,"presence_penalty":0.0,"frequency_penalty":0.0,"kv_transfer_params":null,"input_messages":null,"output_messages":null},"sequence_number":1,"type":"response.in_progress"} + the public web for technical documentation, blog posts, and known issues."}],"top_p":1.0,"background":false,"max_output_tokens":3489,"max_tool_calls":null,"previous_response_id":"resp_a2613187feeed7eb","prompt":null,"reasoning":null,"service_tier":"auto","status":"in_progress","text":null,"top_logprobs":null,"truncation":"disabled","usage":null,"user":null,"presence_penalty":0.0,"frequency_penalty":0.0,"kv_transfer_params":null,"input_messages":null,"output_messages":null},"sequence_number":1,"type":"response.in_progress"} ' - ' @@ -730,7 +785,7 @@ turns: - 'event: response.output_item.added ' - - 'data: {"item":{"id":"msg_85bbe0941f1ebf9d","summary":[],"type":"reasoning","content":null,"encrypted_content":null,"status":"in_progress"},"output_index":0,"sequence_number":2,"type":"response.output_item.added"} + - 'data: {"item":{"id":"msg_92b62cd073692e7b","summary":[],"type":"reasoning","content":null,"encrypted_content":null,"status":"in_progress"},"output_index":0,"sequence_number":2,"type":"response.output_item.added"} ' - ' @@ -739,7 +794,160 @@ turns: - 'event: response.reasoning_part.added ' - - 'data: {"content_index":0,"item_id":"msg_85bbe0941f1ebf9d","output_index":0,"part":{"text":"","type":"reasoning_text"},"sequence_number":3,"type":"response.reasoning_part.added"} + - 'data: {"content_index":0,"item_id":"msg_92b62cd073692e7b","output_index":0,"part":{"text":"","type":"reasoning_text"},"sequence_number":3,"type":"response.reasoning_part.added"} + + ' + - ' + + ' + - 'event: response.reasoning_text.delta + + ' + - 'data: {"content_index":0,"delta":"We","item_id":"msg_92b62cd073692e7b","output_index":0,"sequence_number":4,"type":"response.reasoning_text.delta"} + + ' + - ' + + ' + - 'event: response.reasoning_text.delta + + ' + - 'data: {"content_index":0,"delta":" need","item_id":"msg_92b62cd073692e7b","output_index":0,"sequence_number":5,"type":"response.reasoning_text.delta"} + + ' + - ' + + ' + - 'event: response.reasoning_text.delta + + ' + - 'data: {"content_index":0,"delta":" to","item_id":"msg_92b62cd073692e7b","output_index":0,"sequence_number":6,"type":"response.reasoning_text.delta"} + + ' + - ' + + ' + - 'event: response.reasoning_text.delta + + ' + - 'data: {"content_index":0,"delta":" call","item_id":"msg_92b62cd073692e7b","output_index":0,"sequence_number":7,"type":"response.reasoning_text.delta"} + + ' + - ' + + ' + - 'event: response.reasoning_text.delta + + ' + - 'data: {"content_index":0,"delta":" get","item_id":"msg_92b62cd073692e7b","output_index":0,"sequence_number":8,"type":"response.reasoning_text.delta"} + + ' + - ' + + ' + - 'event: response.reasoning_text.delta + + ' + - 'data: {"content_index":0,"delta":"_error","item_id":"msg_92b62cd073692e7b","output_index":0,"sequence_number":9,"type":"response.reasoning_text.delta"} + + ' + - ' + + ' + - 'event: response.reasoning_text.delta + + ' + - 'data: {"content_index":0,"delta":"_logs","item_id":"msg_92b62cd073692e7b","output_index":0,"sequence_number":10,"type":"response.reasoning_text.delta"} + + ' + - ' + + ' + - 'event: response.reasoning_text.delta + + ' + - 'data: {"content_index":0,"delta":" with","item_id":"msg_92b62cd073692e7b","output_index":0,"sequence_number":11,"type":"response.reasoning_text.delta"} + + ' + - ' + + ' + - 'event: response.reasoning_text.delta + + ' + - 'data: {"content_index":0,"delta":" job","item_id":"msg_92b62cd073692e7b","output_index":0,"sequence_number":12,"type":"response.reasoning_text.delta"} + + ' + - ' + + ' + - 'event: response.reasoning_text.delta + + ' + - 'data: {"content_index":0,"delta":"_id","item_id":"msg_92b62cd073692e7b","output_index":0,"sequence_number":13,"type":"response.reasoning_text.delta"} + + ' + - ' + + ' + - 'event: response.reasoning_text.delta + + ' + - 'data: {"content_index":0,"delta":" \"","item_id":"msg_92b62cd073692e7b","output_index":0,"sequence_number":14,"type":"response.reasoning_text.delta"} + + ' + - ' + + ' + - 'event: response.reasoning_text.delta + + ' + - 'data: {"content_index":0,"delta":"job","item_id":"msg_92b62cd073692e7b","output_index":0,"sequence_number":15,"type":"response.reasoning_text.delta"} + + ' + - ' + + ' + - 'event: response.reasoning_text.delta + + ' + - 'data: {"content_index":0,"delta":"-","item_id":"msg_92b62cd073692e7b","output_index":0,"sequence_number":16,"type":"response.reasoning_text.delta"} + + ' + - ' + + ' + - 'event: response.reasoning_text.delta + + ' + - 'data: {"content_index":0,"delta":"382","item_id":"msg_92b62cd073692e7b","output_index":0,"sequence_number":17,"type":"response.reasoning_text.delta"} + + ' + - ' + + ' + - 'event: response.reasoning_text.delta + + ' + - 'data: {"content_index":0,"delta":"\",","item_id":"msg_92b62cd073692e7b","output_index":0,"sequence_number":18,"type":"response.reasoning_text.delta"} + + ' + - ' + + ' + - 'event: response.reasoning_text.delta + + ' + - 'data: {"content_index":0,"delta":" severity","item_id":"msg_92b62cd073692e7b","output_index":0,"sequence_number":19,"type":"response.reasoning_text.delta"} + + ' + - ' + + ' + - 'event: response.reasoning_text.delta + + ' + - 'data: {"content_index":0,"delta":" F","item_id":"msg_92b62cd073692e7b","output_index":0,"sequence_number":20,"type":"response.reasoning_text.delta"} ' - ' @@ -748,7 +956,7 @@ turns: - 'event: response.reasoning_text.delta ' - - 'data: {"content_index":0,"delta":"Need","item_id":"msg_85bbe0941f1ebf9d","output_index":0,"sequence_number":4,"type":"response.reasoning_text.delta"} + - 'data: {"content_index":0,"delta":"ATAL","item_id":"msg_92b62cd073692e7b","output_index":0,"sequence_number":21,"type":"response.reasoning_text.delta"} ' - ' @@ -757,7 +965,7 @@ turns: - 'event: response.reasoning_text.delta ' - - 'data: {"content_index":0,"delta":" get","item_id":"msg_85bbe0941f1ebf9d","output_index":0,"sequence_number":5,"type":"response.reasoning_text.delta"} + - 'data: {"content_index":0,"delta":",","item_id":"msg_92b62cd073692e7b","output_index":0,"sequence_number":22,"type":"response.reasoning_text.delta"} ' - ' @@ -766,7 +974,7 @@ turns: - 'event: response.reasoning_text.delta ' - - 'data: {"content_index":0,"delta":"_error","item_id":"msg_85bbe0941f1ebf9d","output_index":0,"sequence_number":6,"type":"response.reasoning_text.delta"} + - 'data: {"content_index":0,"delta":" max","item_id":"msg_92b62cd073692e7b","output_index":0,"sequence_number":23,"type":"response.reasoning_text.delta"} ' - ' @@ -775,7 +983,7 @@ turns: - 'event: response.reasoning_text.delta ' - - 'data: {"content_index":0,"delta":"_logs","item_id":"msg_85bbe0941f1ebf9d","output_index":0,"sequence_number":7,"type":"response.reasoning_text.delta"} + - 'data: {"content_index":0,"delta":"_entries","item_id":"msg_92b62cd073692e7b","output_index":0,"sequence_number":24,"type":"response.reasoning_text.delta"} ' - ' @@ -784,7 +992,7 @@ turns: - 'event: response.reasoning_text.delta ' - - 'data: {"content_index":0,"delta":" function","item_id":"msg_85bbe0941f1ebf9d","output_index":0,"sequence_number":8,"type":"response.reasoning_text.delta"} + - 'data: {"content_index":0,"delta":" ","item_id":"msg_92b62cd073692e7b","output_index":0,"sequence_number":25,"type":"response.reasoning_text.delta"} ' - ' @@ -793,7 +1001,16 @@ turns: - 'event: response.reasoning_text.delta ' - - 'data: {"content_index":0,"delta":".","item_id":"msg_85bbe0941f1ebf9d","output_index":0,"sequence_number":9,"type":"response.reasoning_text.delta"} + - 'data: {"content_index":0,"delta":"5","item_id":"msg_92b62cd073692e7b","output_index":0,"sequence_number":26,"type":"response.reasoning_text.delta"} + + ' + - ' + + ' + - 'event: response.reasoning_text.delta + + ' + - 'data: {"content_index":0,"delta":".","item_id":"msg_92b62cd073692e7b","output_index":0,"sequence_number":27,"type":"response.reasoning_text.delta"} ' - ' @@ -802,8 +1019,9 @@ turns: - 'event: response.reasoning_text.done ' - - 'data: {"content_index":0,"item_id":"msg_85bbe0941f1ebf9d","output_index":0,"sequence_number":10,"text":"Need - get_error_logs function.","type":"response.reasoning_text.done"} + - 'data: {"content_index":0,"item_id":"msg_92b62cd073692e7b","output_index":0,"sequence_number":28,"text":"We + need to call get_error_logs with job_id \"job-382\", severity FATAL, max_entries + 5.","type":"response.reasoning_text.done"} ' - ' @@ -812,8 +1030,9 @@ turns: - 'event: response.reasoning_part.done ' - - 'data: {"content_index":0,"item_id":"msg_85bbe0941f1ebf9d","output_index":0,"part":{"text":"Need - get_error_logs function.","type":"reasoning_text"},"sequence_number":11,"type":"response.reasoning_part.done"} + - 'data: {"content_index":0,"item_id":"msg_92b62cd073692e7b","output_index":0,"part":{"text":"We + need to call get_error_logs with job_id \"job-382\", severity FATAL, max_entries + 5.","type":"reasoning_text"},"sequence_number":29,"type":"response.reasoning_part.done"} ' - ' @@ -822,8 +1041,9 @@ turns: - 'event: response.output_item.done ' - - 'data: {"item":{"id":"msg_85bbe0941f1ebf9d","summary":[],"type":"reasoning","content":[{"text":"Need - get_error_logs function.","type":"reasoning_text"}],"encrypted_content":null,"status":"completed"},"output_index":0,"sequence_number":12,"type":"response.output_item.done"} + - 'data: {"item":{"id":"msg_92b62cd073692e7b","summary":[],"type":"reasoning","content":[{"text":"We + need to call get_error_logs with job_id \"job-382\", severity FATAL, max_entries + 5.","type":"reasoning_text"}],"encrypted_content":null,"status":"completed"},"output_index":0,"sequence_number":30,"type":"response.output_item.done"} ' - ' @@ -832,7 +1052,79 @@ turns: - 'event: response.output_item.added ' - - 'data: {"item":{"arguments":"","call_id":"call_a12afd5ebc0e5edd","name":"get_error_logs","type":"function_call","id":"fc_b5165a0ca873fb55","namespace":null,"status":"in_progress"},"output_index":1,"sequence_number":13,"type":"response.output_item.added"} + - 'data: {"item":{"arguments":"","call_id":"call_9b2372a2800336e0","name":"get_error_logs","type":"function_call","id":"fc_8b3ba0677d2822bc","namespace":null,"status":"in_progress"},"output_index":1,"sequence_number":31,"type":"response.output_item.added"} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"delta":"{\n","item_id":"fc_8b3ba0677d2822bc","output_index":1,"sequence_number":32,"type":"response.function_call_arguments.delta"} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"delta":" ","item_id":"fc_8b3ba0677d2822bc","output_index":1,"sequence_number":33,"type":"response.function_call_arguments.delta"} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"delta":" \"","item_id":"fc_8b3ba0677d2822bc","output_index":1,"sequence_number":34,"type":"response.function_call_arguments.delta"} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"delta":"job","item_id":"fc_8b3ba0677d2822bc","output_index":1,"sequence_number":35,"type":"response.function_call_arguments.delta"} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"delta":"_id","item_id":"fc_8b3ba0677d2822bc","output_index":1,"sequence_number":36,"type":"response.function_call_arguments.delta"} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"delta":"\":","item_id":"fc_8b3ba0677d2822bc","output_index":1,"sequence_number":37,"type":"response.function_call_arguments.delta"} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"delta":" \"","item_id":"fc_8b3ba0677d2822bc","output_index":1,"sequence_number":38,"type":"response.function_call_arguments.delta"} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"delta":"job","item_id":"fc_8b3ba0677d2822bc","output_index":1,"sequence_number":39,"type":"response.function_call_arguments.delta"} ' - ' @@ -841,7 +1133,7 @@ turns: - 'event: response.function_call_arguments.delta ' - - 'data: {"delta":"{\"","item_id":"fc_b5165a0ca873fb55","output_index":1,"sequence_number":14,"type":"response.function_call_arguments.delta"} + - 'data: {"delta":"-","item_id":"fc_8b3ba0677d2822bc","output_index":1,"sequence_number":40,"type":"response.function_call_arguments.delta"} ' - ' @@ -850,7 +1142,7 @@ turns: - 'event: response.function_call_arguments.delta ' - - 'data: {"delta":"job","item_id":"fc_b5165a0ca873fb55","output_index":1,"sequence_number":15,"type":"response.function_call_arguments.delta"} + - 'data: {"delta":"382","item_id":"fc_8b3ba0677d2822bc","output_index":1,"sequence_number":41,"type":"response.function_call_arguments.delta"} ' - ' @@ -859,7 +1151,7 @@ turns: - 'event: response.function_call_arguments.delta ' - - 'data: {"delta":"_id","item_id":"fc_b5165a0ca873fb55","output_index":1,"sequence_number":16,"type":"response.function_call_arguments.delta"} + - 'data: {"delta":"\",\n","item_id":"fc_8b3ba0677d2822bc","output_index":1,"sequence_number":42,"type":"response.function_call_arguments.delta"} ' - ' @@ -868,7 +1160,7 @@ turns: - 'event: response.function_call_arguments.delta ' - - 'data: {"delta":"\":\"","item_id":"fc_b5165a0ca873fb55","output_index":1,"sequence_number":17,"type":"response.function_call_arguments.delta"} + - 'data: {"delta":" ","item_id":"fc_8b3ba0677d2822bc","output_index":1,"sequence_number":43,"type":"response.function_call_arguments.delta"} ' - ' @@ -877,7 +1169,7 @@ turns: - 'event: response.function_call_arguments.delta ' - - 'data: {"delta":"job","item_id":"fc_b5165a0ca873fb55","output_index":1,"sequence_number":18,"type":"response.function_call_arguments.delta"} + - 'data: {"delta":" \"","item_id":"fc_8b3ba0677d2822bc","output_index":1,"sequence_number":44,"type":"response.function_call_arguments.delta"} ' - ' @@ -886,7 +1178,7 @@ turns: - 'event: response.function_call_arguments.delta ' - - 'data: {"delta":"-","item_id":"fc_b5165a0ca873fb55","output_index":1,"sequence_number":19,"type":"response.function_call_arguments.delta"} + - 'data: {"delta":"severity","item_id":"fc_8b3ba0677d2822bc","output_index":1,"sequence_number":45,"type":"response.function_call_arguments.delta"} ' - ' @@ -895,7 +1187,7 @@ turns: - 'event: response.function_call_arguments.delta ' - - 'data: {"delta":"382","item_id":"fc_b5165a0ca873fb55","output_index":1,"sequence_number":20,"type":"response.function_call_arguments.delta"} + - 'data: {"delta":"\":","item_id":"fc_8b3ba0677d2822bc","output_index":1,"sequence_number":46,"type":"response.function_call_arguments.delta"} ' - ' @@ -904,7 +1196,7 @@ turns: - 'event: response.function_call_arguments.delta ' - - 'data: {"delta":"\",\"","item_id":"fc_b5165a0ca873fb55","output_index":1,"sequence_number":21,"type":"response.function_call_arguments.delta"} + - 'data: {"delta":" \"","item_id":"fc_8b3ba0677d2822bc","output_index":1,"sequence_number":47,"type":"response.function_call_arguments.delta"} ' - ' @@ -913,7 +1205,7 @@ turns: - 'event: response.function_call_arguments.delta ' - - 'data: {"delta":"severity","item_id":"fc_b5165a0ca873fb55","output_index":1,"sequence_number":22,"type":"response.function_call_arguments.delta"} + - 'data: {"delta":"F","item_id":"fc_8b3ba0677d2822bc","output_index":1,"sequence_number":48,"type":"response.function_call_arguments.delta"} ' - ' @@ -922,7 +1214,7 @@ turns: - 'event: response.function_call_arguments.delta ' - - 'data: {"delta":"\":\"","item_id":"fc_b5165a0ca873fb55","output_index":1,"sequence_number":23,"type":"response.function_call_arguments.delta"} + - 'data: {"delta":"ATAL","item_id":"fc_8b3ba0677d2822bc","output_index":1,"sequence_number":49,"type":"response.function_call_arguments.delta"} ' - ' @@ -931,7 +1223,7 @@ turns: - 'event: response.function_call_arguments.delta ' - - 'data: {"delta":"F","item_id":"fc_b5165a0ca873fb55","output_index":1,"sequence_number":24,"type":"response.function_call_arguments.delta"} + - 'data: {"delta":"\",\n","item_id":"fc_8b3ba0677d2822bc","output_index":1,"sequence_number":50,"type":"response.function_call_arguments.delta"} ' - ' @@ -940,7 +1232,7 @@ turns: - 'event: response.function_call_arguments.delta ' - - 'data: {"delta":"ATAL","item_id":"fc_b5165a0ca873fb55","output_index":1,"sequence_number":25,"type":"response.function_call_arguments.delta"} + - 'data: {"delta":" ","item_id":"fc_8b3ba0677d2822bc","output_index":1,"sequence_number":51,"type":"response.function_call_arguments.delta"} ' - ' @@ -949,7 +1241,7 @@ turns: - 'event: response.function_call_arguments.delta ' - - 'data: {"delta":"\",\"","item_id":"fc_b5165a0ca873fb55","output_index":1,"sequence_number":26,"type":"response.function_call_arguments.delta"} + - 'data: {"delta":" \"","item_id":"fc_8b3ba0677d2822bc","output_index":1,"sequence_number":52,"type":"response.function_call_arguments.delta"} ' - ' @@ -958,7 +1250,7 @@ turns: - 'event: response.function_call_arguments.delta ' - - 'data: {"delta":"max","item_id":"fc_b5165a0ca873fb55","output_index":1,"sequence_number":27,"type":"response.function_call_arguments.delta"} + - 'data: {"delta":"max","item_id":"fc_8b3ba0677d2822bc","output_index":1,"sequence_number":53,"type":"response.function_call_arguments.delta"} ' - ' @@ -967,7 +1259,7 @@ turns: - 'event: response.function_call_arguments.delta ' - - 'data: {"delta":"_entries","item_id":"fc_b5165a0ca873fb55","output_index":1,"sequence_number":28,"type":"response.function_call_arguments.delta"} + - 'data: {"delta":"_entries","item_id":"fc_8b3ba0677d2822bc","output_index":1,"sequence_number":54,"type":"response.function_call_arguments.delta"} ' - ' @@ -976,7 +1268,7 @@ turns: - 'event: response.function_call_arguments.delta ' - - 'data: {"delta":"\":","item_id":"fc_b5165a0ca873fb55","output_index":1,"sequence_number":29,"type":"response.function_call_arguments.delta"} + - 'data: {"delta":"\":","item_id":"fc_8b3ba0677d2822bc","output_index":1,"sequence_number":55,"type":"response.function_call_arguments.delta"} ' - ' @@ -985,7 +1277,7 @@ turns: - 'event: response.function_call_arguments.delta ' - - 'data: {"delta":"5","item_id":"fc_b5165a0ca873fb55","output_index":1,"sequence_number":30,"type":"response.function_call_arguments.delta"} + - 'data: {"delta":" ","item_id":"fc_8b3ba0677d2822bc","output_index":1,"sequence_number":56,"type":"response.function_call_arguments.delta"} ' - ' @@ -994,7 +1286,25 @@ turns: - 'event: response.function_call_arguments.delta ' - - 'data: {"delta":"}","item_id":"fc_b5165a0ca873fb55","output_index":1,"sequence_number":31,"type":"response.function_call_arguments.delta"} + - 'data: {"delta":"5","item_id":"fc_8b3ba0677d2822bc","output_index":1,"sequence_number":57,"type":"response.function_call_arguments.delta"} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"delta":"\n","item_id":"fc_8b3ba0677d2822bc","output_index":1,"sequence_number":58,"type":"response.function_call_arguments.delta"} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"delta":"}","item_id":"fc_8b3ba0677d2822bc","output_index":1,"sequence_number":59,"type":"response.function_call_arguments.delta"} ' - ' @@ -1003,7 +1313,8 @@ turns: - 'event: response.function_call_arguments.done ' - - 'data: {"arguments":"{\"job_id\":\"job-382\",\"severity\":\"FATAL\",\"max_entries\":5}","item_id":"fc_b5165a0ca873fb55","name":"get_error_logs","output_index":1,"sequence_number":32,"type":"response.function_call_arguments.done"} + - 'data: {"arguments":"{\n \"job_id\": \"job-382\",\n \"severity\": \"FATAL\",\n \"max_entries\": + 5\n}","item_id":"fc_8b3ba0677d2822bc","name":"get_error_logs","output_index":1,"sequence_number":60,"type":"response.function_call_arguments.done"} ' - ' @@ -1012,7 +1323,8 @@ turns: - 'event: response.output_item.done ' - - 'data: {"item":{"arguments":"{\"job_id\":\"job-382\",\"severity\":\"FATAL\",\"max_entries\":5}","call_id":"call_a12afd5ebc0e5edd","name":"get_error_logs","type":"function_call","id":null,"namespace":null,"status":"completed","item_id":"fc_b5165a0ca873fb55","output_index":1,"sequence_number":-1},"output_index":1,"sequence_number":33,"type":"response.output_item.done"} + - 'data: {"item":{"arguments":"{\n \"job_id\": \"job-382\",\n \"severity\": + \"FATAL\",\n \"max_entries\": 5\n}","call_id":"call_9b2372a2800336e0","name":"get_error_logs","type":"function_call","id":null,"namespace":null,"status":"completed","item_id":"fc_8b3ba0677d2822bc","output_index":1,"sequence_number":-1},"output_index":1,"sequence_number":61,"type":"response.output_item.done"} ' - ' @@ -1021,8 +1333,10 @@ turns: - 'event: response.completed ' - - 'data: {"response":{"id":"resp_8f993b44ca8e52e3","created_at":1782331383,"incomplete_details":null,"instructions":null,"metadata":null,"model":"openai/gpt-oss-20b","object":"response","output":[{"id":"rs_ac77fd030765f30a","summary":[],"type":"reasoning","content":[{"text":"Need - get_error_logs function.","type":"reasoning_text"}],"encrypted_content":null,"status":null},{"arguments":"{\"job_id\":\"job-382\",\"severity\":\"FATAL\",\"max_entries\":5}","call_id":"call_82bf682ac0599fd9","name":"get_error_logs","type":"function_call","id":"fc_82bf682ac0599fd9","namespace":null,"status":null}],"parallel_tool_calls":true,"temperature":1.0,"tool_choice":"auto","tools":[{"name":"get_job_status","parameters":{"type":"object","properties":{"job_id":{"type":"string","description":"Pipeline + - 'data: {"response":{"id":"resp_86f66db434f30025","created_at":1782332953,"incomplete_details":null,"instructions":null,"metadata":null,"model":"openai/gpt-oss-20b","object":"response","output":[{"id":"rs_aeaf98915a49d703","summary":[],"type":"reasoning","content":[{"text":"We + need to call get_error_logs with job_id \"job-382\", severity FATAL, max_entries + 5.","type":"reasoning_text"}],"encrypted_content":null,"status":null},{"arguments":"{\n \"job_id\": + \"job-382\",\n \"severity\": \"FATAL\",\n \"max_entries\": 5\n}","call_id":"call_bb83b6c9e24d7a4a","name":"get_error_logs","type":"function_call","id":"fc_bb83b6c9e24d7a4a","namespace":null,"status":null}],"parallel_tool_calls":true,"temperature":1.0,"tool_choice":"auto","tools":[{"name":"get_job_status","parameters":{"type":"object","properties":{"job_id":{"type":"string","description":"Pipeline job identifier, e.g. job-382"}},"required":["job_id"],"additionalProperties":false},"strict":true,"type":"function","defer_loading":null,"description":"Query the pipeline orchestrator for a job''s current state, metadata, and failure info."},{"name":"get_error_logs","parameters":{"type":"object","properties":{"job_id":{"type":"string","description":"Pipeline @@ -1043,7 +1357,7 @@ turns: priority"}},"required":["job_id","memory_override_gb","skip_completed_stages","priority"],"additionalProperties":false},"strict":true,"type":"function","defer_loading":null,"description":"Restart a failed pipeline job with optional configuration overrides."},{"name":"web_search","parameters":{"type":"object","properties":{"query":{"type":"string","description":"Search query"}},"required":["query"],"additionalProperties":false},"strict":true,"type":"function","defer_loading":null,"description":"Search - the public web for technical documentation, blog posts, and known issues."}],"top_p":1.0,"background":false,"max_output_tokens":3495,"max_tool_calls":null,"previous_response_id":"resp_ac3c1315a91f31a2","prompt":null,"reasoning":null,"service_tier":"auto","status":"completed","text":null,"top_logprobs":null,"truncation":"disabled","usage":{"input_tokens":601,"input_tokens_details":{"cached_tokens":448,"input_tokens_per_turn":[601],"cached_tokens_per_turn":[448]},"output_tokens":44,"output_tokens_details":{"reasoning_tokens":26,"tool_output_tokens":0,"output_tokens_per_turn":[44],"tool_output_tokens_per_turn":[0]},"total_tokens":645},"user":null,"presence_penalty":0.0,"frequency_penalty":0.0,"kv_transfer_params":null,"input_messages":null,"output_messages":null},"sequence_number":34,"type":"response.completed"} + the public web for technical documentation, blog posts, and known issues."}],"top_p":1.0,"background":false,"max_output_tokens":3489,"max_tool_calls":null,"previous_response_id":"resp_a2613187feeed7eb","prompt":null,"reasoning":null,"service_tier":"auto","status":"completed","text":null,"top_logprobs":null,"truncation":"disabled","usage":{"input_tokens":607,"input_tokens_details":{"cached_tokens":448,"input_tokens_per_turn":[607],"cached_tokens_per_turn":[448]},"output_tokens":72,"output_tokens_details":{"reasoning_tokens":54,"tool_output_tokens":0,"output_tokens_per_turn":[72],"tool_output_tokens_per_turn":[0]},"total_tokens":679},"user":null,"presence_penalty":0.0,"frequency_penalty":0.0,"kv_transfer_params":null,"input_messages":null,"output_messages":null},"sequence_number":62,"type":"response.completed"} ' - ' @@ -1054,7 +1368,7 @@ turns: request: body: input: - - call_id: call_82bf682ac0599fd9 + - call_id: call_bb83b6c9e24d7a4a output: '[{"timestamp": "2024-01-15T03:22:09Z", "severity": "ERROR", "message": "OutOfMemoryError at stage ''transform'': heap space exhausted (31.2GB/32GB)", "stack_trace": "java.lang.OutOfMemoryError: Java heap space\n at org.apache.spark.memory.TaskMemoryManager.allocatePage"}, @@ -1069,7 +1383,7 @@ turns: role: user type: message model: openai/gpt-oss-20b - previous_response_id: resp_8f993b44ca8e52e3 + previous_response_id: resp_86f66db434f30025 store: true stream: true tool_choice: auto @@ -1206,7 +1520,7 @@ turns: - 'event: response.created ' - - 'data: {"response":{"id":"resp_8d9428f89b5bbe23","created_at":1782331383,"incomplete_details":null,"instructions":null,"metadata":null,"model":"openai/gpt-oss-20b","object":"response","output":[],"parallel_tool_calls":true,"temperature":1.0,"tool_choice":"auto","tools":[{"name":"get_job_status","parameters":{"type":"object","properties":{"job_id":{"type":"string","description":"Pipeline + - 'data: {"response":{"id":"resp_a58eda69543e3142","created_at":1782332953,"incomplete_details":null,"instructions":null,"metadata":null,"model":"openai/gpt-oss-20b","object":"response","output":[],"parallel_tool_calls":true,"temperature":1.0,"tool_choice":"auto","tools":[{"name":"get_job_status","parameters":{"type":"object","properties":{"job_id":{"type":"string","description":"Pipeline job identifier, e.g. job-382"}},"required":["job_id"],"additionalProperties":false},"strict":true,"type":"function","defer_loading":null,"description":"Query the pipeline orchestrator for a job''s current state, metadata, and failure info."},{"name":"get_error_logs","parameters":{"type":"object","properties":{"job_id":{"type":"string","description":"Pipeline @@ -1227,7 +1541,7 @@ turns: priority"}},"required":["job_id","memory_override_gb","skip_completed_stages","priority"],"additionalProperties":false},"strict":true,"type":"function","defer_loading":null,"description":"Restart a failed pipeline job with optional configuration overrides."},{"name":"web_search","parameters":{"type":"object","properties":{"query":{"type":"string","description":"Search query"}},"required":["query"],"additionalProperties":false},"strict":true,"type":"function","defer_loading":null,"description":"Search - the public web for technical documentation, blog posts, and known issues."}],"top_p":1.0,"background":false,"max_output_tokens":3223,"max_tool_calls":null,"previous_response_id":"resp_8f993b44ca8e52e3","prompt":null,"reasoning":null,"service_tier":"auto","status":"in_progress","text":null,"top_logprobs":null,"truncation":"disabled","usage":null,"user":null,"presence_penalty":0.0,"frequency_penalty":0.0,"kv_transfer_params":null,"input_messages":null,"output_messages":null},"sequence_number":0,"type":"response.created"} + the public web for technical documentation, blog posts, and known issues."}],"top_p":1.0,"background":false,"max_output_tokens":3189,"max_tool_calls":null,"previous_response_id":"resp_86f66db434f30025","prompt":null,"reasoning":null,"service_tier":"auto","status":"in_progress","text":null,"top_logprobs":null,"truncation":"disabled","usage":null,"user":null,"presence_penalty":0.0,"frequency_penalty":0.0,"kv_transfer_params":null,"input_messages":null,"output_messages":null},"sequence_number":0,"type":"response.created"} ' - ' @@ -1236,7 +1550,7 @@ turns: - 'event: response.in_progress ' - - 'data: {"response":{"id":"resp_8d9428f89b5bbe23","created_at":1782331383,"incomplete_details":null,"instructions":null,"metadata":null,"model":"openai/gpt-oss-20b","object":"response","output":[],"parallel_tool_calls":true,"temperature":1.0,"tool_choice":"auto","tools":[{"name":"get_job_status","parameters":{"type":"object","properties":{"job_id":{"type":"string","description":"Pipeline + - 'data: {"response":{"id":"resp_a58eda69543e3142","created_at":1782332953,"incomplete_details":null,"instructions":null,"metadata":null,"model":"openai/gpt-oss-20b","object":"response","output":[],"parallel_tool_calls":true,"temperature":1.0,"tool_choice":"auto","tools":[{"name":"get_job_status","parameters":{"type":"object","properties":{"job_id":{"type":"string","description":"Pipeline job identifier, e.g. job-382"}},"required":["job_id"],"additionalProperties":false},"strict":true,"type":"function","defer_loading":null,"description":"Query the pipeline orchestrator for a job''s current state, metadata, and failure info."},{"name":"get_error_logs","parameters":{"type":"object","properties":{"job_id":{"type":"string","description":"Pipeline @@ -1257,7 +1571,7 @@ turns: priority"}},"required":["job_id","memory_override_gb","skip_completed_stages","priority"],"additionalProperties":false},"strict":true,"type":"function","defer_loading":null,"description":"Restart a failed pipeline job with optional configuration overrides."},{"name":"web_search","parameters":{"type":"object","properties":{"query":{"type":"string","description":"Search query"}},"required":["query"],"additionalProperties":false},"strict":true,"type":"function","defer_loading":null,"description":"Search - the public web for technical documentation, blog posts, and known issues."}],"top_p":1.0,"background":false,"max_output_tokens":3223,"max_tool_calls":null,"previous_response_id":"resp_8f993b44ca8e52e3","prompt":null,"reasoning":null,"service_tier":"auto","status":"in_progress","text":null,"top_logprobs":null,"truncation":"disabled","usage":null,"user":null,"presence_penalty":0.0,"frequency_penalty":0.0,"kv_transfer_params":null,"input_messages":null,"output_messages":null},"sequence_number":1,"type":"response.in_progress"} + the public web for technical documentation, blog posts, and known issues."}],"top_p":1.0,"background":false,"max_output_tokens":3189,"max_tool_calls":null,"previous_response_id":"resp_86f66db434f30025","prompt":null,"reasoning":null,"service_tier":"auto","status":"in_progress","text":null,"top_logprobs":null,"truncation":"disabled","usage":null,"user":null,"presence_penalty":0.0,"frequency_penalty":0.0,"kv_transfer_params":null,"input_messages":null,"output_messages":null},"sequence_number":1,"type":"response.in_progress"} ' - ' @@ -1266,7 +1580,7 @@ turns: - 'event: response.output_item.added ' - - 'data: {"item":{"id":"msg_b5d30a9e826b60fb","summary":[],"type":"reasoning","content":null,"encrypted_content":null,"status":"in_progress"},"output_index":0,"sequence_number":2,"type":"response.output_item.added"} + - 'data: {"item":{"id":"msg_b1ed6fcc9b2f9086","summary":[],"type":"reasoning","content":null,"encrypted_content":null,"status":"in_progress"},"output_index":0,"sequence_number":2,"type":"response.output_item.added"} ' - ' @@ -1275,7 +1589,7 @@ turns: - 'event: response.reasoning_part.added ' - - 'data: {"content_index":0,"item_id":"msg_b5d30a9e826b60fb","output_index":0,"part":{"text":"","type":"reasoning_text"},"sequence_number":3,"type":"response.reasoning_part.added"} + - 'data: {"content_index":0,"item_id":"msg_b1ed6fcc9b2f9086","output_index":0,"part":{"text":"","type":"reasoning_text"},"sequence_number":3,"type":"response.reasoning_part.added"} ' - ' @@ -1284,7 +1598,7 @@ turns: - 'event: response.reasoning_text.delta ' - - 'data: {"content_index":0,"delta":"Using","item_id":"msg_b5d30a9e826b60fb","output_index":0,"sequence_number":4,"type":"response.reasoning_text.delta"} + - 'data: {"content_index":0,"delta":"Call","item_id":"msg_b1ed6fcc9b2f9086","output_index":0,"sequence_number":4,"type":"response.reasoning_text.delta"} ' - ' @@ -1293,7 +1607,7 @@ turns: - 'event: response.reasoning_text.delta ' - - 'data: {"content_index":0,"delta":" web","item_id":"msg_b5d30a9e826b60fb","output_index":0,"sequence_number":5,"type":"response.reasoning_text.delta"} + - 'data: {"content_index":0,"delta":" web","item_id":"msg_b1ed6fcc9b2f9086","output_index":0,"sequence_number":5,"type":"response.reasoning_text.delta"} ' - ' @@ -1302,7 +1616,7 @@ turns: - 'event: response.reasoning_text.delta ' - - 'data: {"content_index":0,"delta":"_search","item_id":"msg_b5d30a9e826b60fb","output_index":0,"sequence_number":6,"type":"response.reasoning_text.delta"} + - 'data: {"content_index":0,"delta":"_search","item_id":"msg_b1ed6fcc9b2f9086","output_index":0,"sequence_number":6,"type":"response.reasoning_text.delta"} ' - ' @@ -1311,7 +1625,7 @@ turns: - 'event: response.reasoning_text.delta ' - - 'data: {"content_index":0,"delta":".","item_id":"msg_b5d30a9e826b60fb","output_index":0,"sequence_number":7,"type":"response.reasoning_text.delta"} + - 'data: {"content_index":0,"delta":".","item_id":"msg_b1ed6fcc9b2f9086","output_index":0,"sequence_number":7,"type":"response.reasoning_text.delta"} ' - ' @@ -1320,7 +1634,7 @@ turns: - 'event: response.reasoning_text.done ' - - 'data: {"content_index":0,"item_id":"msg_b5d30a9e826b60fb","output_index":0,"sequence_number":8,"text":"Using + - 'data: {"content_index":0,"item_id":"msg_b1ed6fcc9b2f9086","output_index":0,"sequence_number":8,"text":"Call web_search.","type":"response.reasoning_text.done"} ' @@ -1330,7 +1644,7 @@ turns: - 'event: response.reasoning_part.done ' - - 'data: {"content_index":0,"item_id":"msg_b5d30a9e826b60fb","output_index":0,"part":{"text":"Using + - 'data: {"content_index":0,"item_id":"msg_b1ed6fcc9b2f9086","output_index":0,"part":{"text":"Call web_search.","type":"reasoning_text"},"sequence_number":9,"type":"response.reasoning_part.done"} ' @@ -1340,7 +1654,7 @@ turns: - 'event: response.output_item.done ' - - 'data: {"item":{"id":"msg_b5d30a9e826b60fb","summary":[],"type":"reasoning","content":[{"text":"Using + - 'data: {"item":{"id":"msg_b1ed6fcc9b2f9086","summary":[],"type":"reasoning","content":[{"text":"Call web_search.","type":"reasoning_text"}],"encrypted_content":null,"status":"completed"},"output_index":0,"sequence_number":10,"type":"response.output_item.done"} ' @@ -1350,7 +1664,16 @@ turns: - 'event: response.output_item.added ' - - 'data: {"item":{"arguments":"","call_id":"call_b5ab6fded3b63402","name":"web_search","type":"function_call","id":"fc_b107336aafb717b5","namespace":null,"status":"in_progress"},"output_index":1,"sequence_number":11,"type":"response.output_item.added"} + - 'data: {"item":{"arguments":"","call_id":"call_a7d34f6d95f8ff9a","name":"web_search","type":"function_call","id":"fc_aa8e3909617fdeeb","namespace":null,"status":"in_progress"},"output_index":1,"sequence_number":11,"type":"response.output_item.added"} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"delta":"{\n","item_id":"fc_aa8e3909617fdeeb","output_index":1,"sequence_number":12,"type":"response.function_call_arguments.delta"} ' - ' @@ -1359,7 +1682,7 @@ turns: - 'event: response.function_call_arguments.delta ' - - 'data: {"delta":"{\"","item_id":"fc_b107336aafb717b5","output_index":1,"sequence_number":12,"type":"response.function_call_arguments.delta"} + - 'data: {"delta":" ","item_id":"fc_aa8e3909617fdeeb","output_index":1,"sequence_number":13,"type":"response.function_call_arguments.delta"} ' - ' @@ -1368,7 +1691,7 @@ turns: - 'event: response.function_call_arguments.delta ' - - 'data: {"delta":"query","item_id":"fc_b107336aafb717b5","output_index":1,"sequence_number":13,"type":"response.function_call_arguments.delta"} + - 'data: {"delta":" \"","item_id":"fc_aa8e3909617fdeeb","output_index":1,"sequence_number":14,"type":"response.function_call_arguments.delta"} ' - ' @@ -1377,7 +1700,7 @@ turns: - 'event: response.function_call_arguments.delta ' - - 'data: {"delta":"\":\"","item_id":"fc_b107336aafb717b5","output_index":1,"sequence_number":14,"type":"response.function_call_arguments.delta"} + - 'data: {"delta":"query","item_id":"fc_aa8e3909617fdeeb","output_index":1,"sequence_number":15,"type":"response.function_call_arguments.delta"} ' - ' @@ -1386,7 +1709,7 @@ turns: - 'event: response.function_call_arguments.delta ' - - 'data: {"delta":"Spark","item_id":"fc_b107336aafb717b5","output_index":1,"sequence_number":15,"type":"response.function_call_arguments.delta"} + - 'data: {"delta":"\":","item_id":"fc_aa8e3909617fdeeb","output_index":1,"sequence_number":16,"type":"response.function_call_arguments.delta"} ' - ' @@ -1395,7 +1718,7 @@ turns: - 'event: response.function_call_arguments.delta ' - - 'data: {"delta":" Out","item_id":"fc_b107336aafb717b5","output_index":1,"sequence_number":16,"type":"response.function_call_arguments.delta"} + - 'data: {"delta":" \"","item_id":"fc_aa8e3909617fdeeb","output_index":1,"sequence_number":17,"type":"response.function_call_arguments.delta"} ' - ' @@ -1404,7 +1727,7 @@ turns: - 'event: response.function_call_arguments.delta ' - - 'data: {"delta":"Of","item_id":"fc_b107336aafb717b5","output_index":1,"sequence_number":17,"type":"response.function_call_arguments.delta"} + - 'data: {"delta":"spark","item_id":"fc_aa8e3909617fdeeb","output_index":1,"sequence_number":18,"type":"response.function_call_arguments.delta"} ' - ' @@ -1413,7 +1736,7 @@ turns: - 'event: response.function_call_arguments.delta ' - - 'data: {"delta":"Memory","item_id":"fc_b107336aafb717b5","output_index":1,"sequence_number":18,"type":"response.function_call_arguments.delta"} + - 'data: {"delta":" out","item_id":"fc_aa8e3909617fdeeb","output_index":1,"sequence_number":19,"type":"response.function_call_arguments.delta"} ' - ' @@ -1422,7 +1745,7 @@ turns: - 'event: response.function_call_arguments.delta ' - - 'data: {"delta":"Error","item_id":"fc_b107336aafb717b5","output_index":1,"sequence_number":19,"type":"response.function_call_arguments.delta"} + - 'data: {"delta":"of","item_id":"fc_aa8e3909617fdeeb","output_index":1,"sequence_number":20,"type":"response.function_call_arguments.delta"} ' - ' @@ -1431,7 +1754,7 @@ turns: - 'event: response.function_call_arguments.delta ' - - 'data: {"delta":" transform","item_id":"fc_b107336aafb717b5","output_index":1,"sequence_number":20,"type":"response.function_call_arguments.delta"} + - 'data: {"delta":"memory","item_id":"fc_aa8e3909617fdeeb","output_index":1,"sequence_number":21,"type":"response.function_call_arguments.delta"} ' - ' @@ -1440,7 +1763,7 @@ turns: - 'event: response.function_call_arguments.delta ' - - 'data: {"delta":" stage","item_id":"fc_b107336aafb717b5","output_index":1,"sequence_number":21,"type":"response.function_call_arguments.delta"} + - 'data: {"delta":"error","item_id":"fc_aa8e3909617fdeeb","output_index":1,"sequence_number":22,"type":"response.function_call_arguments.delta"} ' - ' @@ -1449,7 +1772,7 @@ turns: - 'event: response.function_call_arguments.delta ' - - 'data: {"delta":" fix","item_id":"fc_b107336aafb717b5","output_index":1,"sequence_number":22,"type":"response.function_call_arguments.delta"} + - 'data: {"delta":" how","item_id":"fc_aa8e3909617fdeeb","output_index":1,"sequence_number":23,"type":"response.function_call_arguments.delta"} ' - ' @@ -1458,7 +1781,7 @@ turns: - 'event: response.function_call_arguments.delta ' - - 'data: {"delta":" memory","item_id":"fc_b107336aafb717b5","output_index":1,"sequence_number":23,"type":"response.function_call_arguments.delta"} + - 'data: {"delta":" to","item_id":"fc_aa8e3909617fdeeb","output_index":1,"sequence_number":24,"type":"response.function_call_arguments.delta"} ' - ' @@ -1467,7 +1790,7 @@ turns: - 'event: response.function_call_arguments.delta ' - - 'data: {"delta":" limits","item_id":"fc_b107336aafb717b5","output_index":1,"sequence_number":24,"type":"response.function_call_arguments.delta"} + - 'data: {"delta":" fix","item_id":"fc_aa8e3909617fdeeb","output_index":1,"sequence_number":25,"type":"response.function_call_arguments.delta"} ' - ' @@ -1476,7 +1799,7 @@ turns: - 'event: response.function_call_arguments.delta ' - - 'data: {"delta":" solve","item_id":"fc_b107336aafb717b5","output_index":1,"sequence_number":25,"type":"response.function_call_arguments.delta"} + - 'data: {"delta":"\"\n","item_id":"fc_aa8e3909617fdeeb","output_index":1,"sequence_number":26,"type":"response.function_call_arguments.delta"} ' - ' @@ -1485,7 +1808,7 @@ turns: - 'event: response.function_call_arguments.delta ' - - 'data: {"delta":"\"}","item_id":"fc_b107336aafb717b5","output_index":1,"sequence_number":26,"type":"response.function_call_arguments.delta"} + - 'data: {"delta":"}","item_id":"fc_aa8e3909617fdeeb","output_index":1,"sequence_number":27,"type":"response.function_call_arguments.delta"} ' - ' @@ -1494,8 +1817,7 @@ turns: - 'event: response.function_call_arguments.done ' - - 'data: {"arguments":"{\"query\":\"Spark OutOfMemoryError transform stage fix - memory limits solve\"}","item_id":"fc_b107336aafb717b5","name":"web_search","output_index":1,"sequence_number":27,"type":"response.function_call_arguments.done"} + - 'data: {"arguments":"{\n \"query\": \"spark outofmemoryerror how to fix\"\n}","item_id":"fc_aa8e3909617fdeeb","name":"web_search","output_index":1,"sequence_number":28,"type":"response.function_call_arguments.done"} ' - ' @@ -1504,8 +1826,8 @@ turns: - 'event: response.output_item.done ' - - 'data: {"item":{"arguments":"{\"query\":\"Spark OutOfMemoryError transform stage - fix memory limits solve\"}","call_id":"call_b5ab6fded3b63402","name":"web_search","type":"function_call","id":null,"namespace":null,"status":"completed","item_id":"fc_b107336aafb717b5","output_index":1,"sequence_number":-1},"output_index":1,"sequence_number":28,"type":"response.output_item.done"} + - 'data: {"item":{"arguments":"{\n \"query\": \"spark outofmemoryerror how to + fix\"\n}","call_id":"call_a7d34f6d95f8ff9a","name":"web_search","type":"function_call","id":null,"namespace":null,"status":"completed","item_id":"fc_aa8e3909617fdeeb","output_index":1,"sequence_number":-1},"output_index":1,"sequence_number":29,"type":"response.output_item.done"} ' - ' @@ -1514,9 +1836,9 @@ turns: - 'event: response.completed ' - - 'data: {"response":{"id":"resp_8d9428f89b5bbe23","created_at":1782331383,"incomplete_details":null,"instructions":null,"metadata":null,"model":"openai/gpt-oss-20b","object":"response","output":[{"id":"rs_abc5949f81a3505f","summary":[],"type":"reasoning","content":[{"text":"Using - web_search.","type":"reasoning_text"}],"encrypted_content":null,"status":null},{"arguments":"{\"query\":\"Spark - OutOfMemoryError transform stage fix memory limits solve\"}","call_id":"call_8682d661d7f3a9fd","name":"web_search","type":"function_call","id":"fc_8682d661d7f3a9fd","namespace":null,"status":null}],"parallel_tool_calls":true,"temperature":1.0,"tool_choice":"auto","tools":[{"name":"get_job_status","parameters":{"type":"object","properties":{"job_id":{"type":"string","description":"Pipeline + - 'data: {"response":{"id":"resp_a58eda69543e3142","created_at":1782332953,"incomplete_details":null,"instructions":null,"metadata":null,"model":"openai/gpt-oss-20b","object":"response","output":[{"id":"rs_8d22ca49bb7cce52","summary":[],"type":"reasoning","content":[{"text":"Call + web_search.","type":"reasoning_text"}],"encrypted_content":null,"status":null},{"arguments":"{\n \"query\": + \"spark outofmemoryerror how to fix\"\n}","call_id":"call_a385d854446fb281","name":"web_search","type":"function_call","id":"fc_a385d854446fb281","namespace":null,"status":null}],"parallel_tool_calls":true,"temperature":1.0,"tool_choice":"auto","tools":[{"name":"get_job_status","parameters":{"type":"object","properties":{"job_id":{"type":"string","description":"Pipeline job identifier, e.g. job-382"}},"required":["job_id"],"additionalProperties":false},"strict":true,"type":"function","defer_loading":null,"description":"Query the pipeline orchestrator for a job''s current state, metadata, and failure info."},{"name":"get_error_logs","parameters":{"type":"object","properties":{"job_id":{"type":"string","description":"Pipeline @@ -1537,7 +1859,7 @@ turns: priority"}},"required":["job_id","memory_override_gb","skip_completed_stages","priority"],"additionalProperties":false},"strict":true,"type":"function","defer_loading":null,"description":"Restart a failed pipeline job with optional configuration overrides."},{"name":"web_search","parameters":{"type":"object","properties":{"query":{"type":"string","description":"Search query"}},"required":["query"],"additionalProperties":false},"strict":true,"type":"function","defer_loading":null,"description":"Search - the public web for technical documentation, blog posts, and known issues."}],"top_p":1.0,"background":false,"max_output_tokens":3223,"max_tool_calls":null,"previous_response_id":"resp_8f993b44ca8e52e3","prompt":null,"reasoning":null,"service_tier":"auto","status":"completed","text":null,"top_logprobs":null,"truncation":"disabled","usage":{"input_tokens":873,"input_tokens_details":{"cached_tokens":608,"input_tokens_per_turn":[873],"cached_tokens_per_turn":[608]},"output_tokens":38,"output_tokens_details":{"reasoning_tokens":21,"tool_output_tokens":0,"output_tokens_per_turn":[38],"tool_output_tokens_per_turn":[0]},"total_tokens":911},"user":null,"presence_penalty":0.0,"frequency_penalty":0.0,"kv_transfer_params":null,"input_messages":null,"output_messages":null},"sequence_number":29,"type":"response.completed"} + the public web for technical documentation, blog posts, and known issues."}],"top_p":1.0,"background":false,"max_output_tokens":3189,"max_tool_calls":null,"previous_response_id":"resp_86f66db434f30025","prompt":null,"reasoning":null,"service_tier":"auto","status":"completed","text":null,"top_logprobs":null,"truncation":"disabled","usage":{"input_tokens":907,"input_tokens_details":{"cached_tokens":624,"input_tokens_per_turn":[907],"cached_tokens_per_turn":[624]},"output_tokens":39,"output_tokens_details":{"reasoning_tokens":22,"tool_output_tokens":0,"output_tokens_per_turn":[39],"tool_output_tokens_per_turn":[0]},"total_tokens":946},"user":null,"presence_penalty":0.0,"frequency_penalty":0.0,"kv_transfer_params":null,"input_messages":null,"output_messages":null},"sequence_number":30,"type":"response.completed"} ' - ' diff --git a/crates/agentic-core/tests/cassettes/tool_calls/multi_turn/responses_tool_calls_5turn.yaml b/crates/agentic-core/tests/cassettes/tool_calls/multi_turn/responses_tool_calls_5turn.yaml index c8af8e1..418aac7 100644 --- a/crates/agentic-core/tests/cassettes/tool_calls/multi_turn/responses_tool_calls_5turn.yaml +++ b/crates/agentic-core/tests/cassettes/tool_calls/multi_turn/responses_tool_calls_5turn.yaml @@ -137,9 +137,9 @@ turns: response: body: background: false - created_at: 1782331372 + created_at: 1782332949 frequency_penalty: 0.0 - id: resp_9951741be0cfd0aa + id: resp_bdd5b90cf69f193e incomplete_details: null input_messages: null instructions: null @@ -151,16 +151,16 @@ turns: object: response output: - content: - - text: We should use get_job_status. + - text: We need to use get_job_status. type: reasoning_text encrypted_content: null - id: rs_b9b65102428a224d + id: rs_99dcd006b8361f42 status: null summary: [] type: reasoning - arguments: '{"job_id":"job-382"}' - call_id: call_b9bae339250e867f - id: fc_b9bae339250e867f + call_id: call_a8ec7f8606f82fcb + id: fc_a8ec7f8606f82fcb name: get_job_status namespace: null status: null @@ -312,15 +312,15 @@ turns: - 432 input_tokens_per_turn: - 439 - output_tokens: 35 + output_tokens: 36 output_tokens_details: output_tokens_per_turn: - - 35 - reasoning_tokens: 17 + - 36 + reasoning_tokens: 18 tool_output_tokens: 0 tool_output_tokens_per_turn: - 0 - total_tokens: 474 + total_tokens: 475 user: null headers: content-type: application/json @@ -329,7 +329,7 @@ turns: request: body: input: - - call_id: call_b9bae339250e867f + - call_id: call_a8ec7f8606f82fcb output: '{"job_id": "job-382", "status": "failed", "error_type": "OutOfMemoryError", "failed_at": "2024-01-15T03:22:11Z", "stage": "transform", "memory_used_gb": 31.2, "memory_limit_gb": 32, "pipeline": "etl-daily-ingest", "owner": "data-platform-team"}' @@ -339,7 +339,7 @@ turns: role: user type: message model: openai/gpt-oss-20b - previous_response_id: resp_9951741be0cfd0aa + previous_response_id: resp_bdd5b90cf69f193e store: true stream: false tool_choice: auto @@ -472,30 +472,30 @@ turns: response: body: background: false - created_at: 1782331372 + created_at: 1782332949 frequency_penalty: 0.0 - id: resp_a0d45c68cfdf0dc3 + id: resp_b333098339569097 incomplete_details: null input_messages: null instructions: null kv_transfer_params: null - max_output_tokens: 3497 + max_output_tokens: 3496 max_tool_calls: null metadata: null model: openai/gpt-oss-20b object: response output: - content: - - text: Use get_error_logs. + - text: We will get_error_logs. type: reasoning_text encrypted_content: null - id: rs_81a485924cbf2205 + id: rs_8475b754aec8bc57 status: null summary: [] type: reasoning - arguments: '{"job_id":"job-382","severity":"ERROR","max_entries":20}' - call_id: call_b2ab544b587ed933 - id: fc_b2ab544b587ed933 + call_id: call_8473140fc968508b + id: fc_8473140fc968508b name: get_error_logs namespace: null status: null @@ -503,7 +503,7 @@ turns: output_messages: null parallel_tool_calls: true presence_penalty: 0.0 - previous_response_id: resp_9951741be0cfd0aa + previous_response_id: resp_bdd5b90cf69f193e prompt: null reasoning: null service_tier: auto @@ -640,22 +640,22 @@ turns: top_p: 1.0 truncation: disabled usage: - input_tokens: 599 + input_tokens: 600 input_tokens_details: cached_tokens: 448 cached_tokens_per_turn: - 448 input_tokens_per_turn: - - 599 - output_tokens: 42 + - 600 + output_tokens: 43 output_tokens_details: output_tokens_per_turn: - - 42 - reasoning_tokens: 24 + - 43 + reasoning_tokens: 25 tool_output_tokens: 0 tool_output_tokens_per_turn: - 0 - total_tokens: 641 + total_tokens: 643 user: null headers: content-type: application/json @@ -664,7 +664,7 @@ turns: request: body: input: - - call_id: call_b2ab544b587ed933 + - call_id: call_8473140fc968508b output: '[{"timestamp": "2024-01-15T03:22:09Z", "severity": "ERROR", "message": "OutOfMemoryError at stage ''transform'': heap space exhausted (31.2GB/32GB)", "stack_trace": "java.lang.OutOfMemoryError: Java heap space\n at org.apache.spark.memory.TaskMemoryManager.allocatePage"}, @@ -680,7 +680,7 @@ turns: role: user type: message model: openai/gpt-oss-20b - previous_response_id: resp_a0d45c68cfdf0dc3 + previous_response_id: resp_b333098339569097 store: true stream: false tool_choice: auto @@ -813,31 +813,31 @@ turns: response: body: background: false - created_at: 1782331372 + created_at: 1782332950 frequency_penalty: 0.0 - id: resp_942924910373a289 + id: resp_965a415718fbf3d6 incomplete_details: null input_messages: null instructions: null kv_transfer_params: null - max_output_tokens: 3222 + max_output_tokens: 3220 max_tool_calls: null metadata: null model: openai/gpt-oss-20b object: response output: - content: - - text: Use search_runbook. + - text: Call search_runbook. type: reasoning_text encrypted_content: null - id: rs_99598f9b7ee05702 + id: rs_9d57d334a763e6cd status: null summary: [] type: reasoning - - arguments: '{"query":"OutOfMemoryError transform stage spark partition 20 - entries","max_results":5}' - call_id: call_bd3e5a22c5de5a46 - id: fc_bd3e5a22c5de5a46 + - arguments: '{"query":"OutOfMemoryError in transform stage spark job-382 fix + runbook", "max_results":5}' + call_id: call_9f8fb58fca2c18a3 + id: fc_9f8fb58fca2c18a3 name: search_runbook namespace: null status: null @@ -845,7 +845,7 @@ turns: output_messages: null parallel_tool_calls: true presence_penalty: 0.0 - previous_response_id: resp_a0d45c68cfdf0dc3 + previous_response_id: resp_b333098339569097 prompt: null reasoning: null service_tier: auto @@ -982,22 +982,22 @@ turns: top_p: 1.0 truncation: disabled usage: - input_tokens: 874 + input_tokens: 876 input_tokens_details: cached_tokens: 608 cached_tokens_per_turn: - 608 input_tokens_per_turn: - - 874 - output_tokens: 45 + - 876 + output_tokens: 49 output_tokens_details: output_tokens_per_turn: - - 45 - reasoning_tokens: 27 + - 49 + reasoning_tokens: 31 tool_output_tokens: 0 tool_output_tokens_per_turn: - 0 - total_tokens: 919 + total_tokens: 925 user: null headers: content-type: application/json @@ -1006,7 +1006,7 @@ turns: request: body: input: - - call_id: call_bd3e5a22c5de5a46 + - call_id: call_9f8fb58fca2c18a3 output: '[{"title": "OOM Troubleshooting for ETL Pipelines", "path": "/runbooks/data-platform/oom-etl.md", "snippet": "When transform stage OOMs: 1) Check partition skew with get_partition_stats, 2) Increase executor memory (default 32GB -> 64GB), 3) Enable adaptive query @@ -1020,7 +1020,7 @@ turns: role: user type: message model: openai/gpt-oss-20b - previous_response_id: resp_942924910373a289 + previous_response_id: resp_965a415718fbf3d6 store: true stream: false tool_choice: auto @@ -1153,32 +1153,32 @@ turns: response: body: background: false - created_at: 1782331373 + created_at: 1782332950 frequency_penalty: 0.0 - id: resp_ad167af2a228ea80 + id: resp_bd9e5303a45132a7 incomplete_details: null input_messages: null instructions: null kv_transfer_params: null - max_output_tokens: 2980 + max_output_tokens: 2974 max_tool_calls: null metadata: null model: openai/gpt-oss-20b object: response output: - content: - - text: We need to call run_analysis. + - text: We call run_analysis. type: reasoning_text encrypted_content: null - id: rs_a7f023b8835b666c + id: rs_98daf1dc1d378213 status: null summary: [] type: reasoning - - arguments: '{"code":"import json\nprint(json.dumps({''job'': ''job-382'', + - arguments: '{"code":"import json; print(json.dumps({''job'': ''job-382'', ''error'': ''OOM'', ''stage'': ''transform'', ''recommendation'': ''increase memory to 64GB''}))","timeout_seconds":30}' - call_id: call_9972b03347027652 - id: fc_9972b03347027652 + call_id: call_87c2a52cfc83b243 + id: fc_87c2a52cfc83b243 name: run_analysis namespace: null status: null @@ -1186,7 +1186,7 @@ turns: output_messages: null parallel_tool_calls: true presence_penalty: 0.0 - previous_response_id: resp_942924910373a289 + previous_response_id: resp_965a415718fbf3d6 prompt: null reasoning: null service_tier: auto @@ -1323,22 +1323,22 @@ turns: top_p: 1.0 truncation: disabled usage: - input_tokens: 1116 + input_tokens: 1122 input_tokens_details: cached_tokens: 880 cached_tokens_per_turn: - 880 input_tokens_per_turn: - - 1116 - output_tokens: 74 + - 1122 + output_tokens: 72 output_tokens_details: output_tokens_per_turn: - - 74 - reasoning_tokens: 57 + - 72 + reasoning_tokens: 55 tool_output_tokens: 0 tool_output_tokens_per_turn: - 0 - total_tokens: 1190 + total_tokens: 1194 user: null headers: content-type: application/json @@ -1347,7 +1347,7 @@ turns: request: body: input: - - call_id: call_9972b03347027652 + - call_id: call_87c2a52cfc83b243 output: '{"stdout": "Failed stage: transform, count: 2\nPattern: consistent OOM at partition-47 (2.1M rows)\nRecommendation: increase memory to 64GB or repartition", "stderr": "", "exit_code": 0, "duration_ms": 120}' @@ -1357,7 +1357,7 @@ turns: role: user type: message model: openai/gpt-oss-20b - previous_response_id: resp_ad167af2a228ea80 + previous_response_id: resp_bd9e5303a45132a7 store: true stream: false tool_choice: auto @@ -1490,30 +1490,30 @@ turns: response: body: background: false - created_at: 1782331373 + created_at: 1782332951 frequency_penalty: 0.0 - id: resp_ba8c42b503457f18 + id: resp_82fb53818571c25a incomplete_details: null input_messages: null instructions: null kv_transfer_params: null - max_output_tokens: 2808 + max_output_tokens: 2804 max_tool_calls: null metadata: null model: openai/gpt-oss-20b object: response output: - content: - - text: Use restart_job. + - text: Call restart_job. type: reasoning_text encrypted_content: null - id: rs_9a2415f2829c554b + id: rs_83a9d72111ec93c4 status: null summary: [] type: reasoning - arguments: '{"job_id":"job-382","memory_override_gb":64,"skip_completed_stages":true,"priority":"high"}' - call_id: call_945de04a969d9596 - id: fc_945de04a969d9596 + call_id: call_a255f9a0224bf7d9 + id: fc_a255f9a0224bf7d9 name: restart_job namespace: null status: null @@ -1521,7 +1521,7 @@ turns: output_messages: null parallel_tool_calls: true presence_penalty: 0.0 - previous_response_id: resp_ad167af2a228ea80 + previous_response_id: resp_bd9e5303a45132a7 prompt: null reasoning: null service_tier: auto @@ -1658,13 +1658,13 @@ turns: top_p: 1.0 truncation: disabled usage: - input_tokens: 1288 + input_tokens: 1292 input_tokens_details: cached_tokens: 1120 cached_tokens_per_turn: - 1120 input_tokens_per_turn: - - 1288 + - 1292 output_tokens: 49 output_tokens_details: output_tokens_per_turn: @@ -1673,7 +1673,7 @@ turns: tool_output_tokens: 0 tool_output_tokens_per_turn: - 0 - total_tokens: 1337 + total_tokens: 1341 user: null headers: content-type: application/json diff --git a/crates/agentic-core/tests/cassettes/tool_calls/multi_turn/responses_tool_calls_branch.yaml b/crates/agentic-core/tests/cassettes/tool_calls/multi_turn/responses_tool_calls_branch.yaml index fd6cd6e..7301589 100644 --- a/crates/agentic-core/tests/cassettes/tool_calls/multi_turn/responses_tool_calls_branch.yaml +++ b/crates/agentic-core/tests/cassettes/tool_calls/multi_turn/responses_tool_calls_branch.yaml @@ -136,9 +136,9 @@ turns: response: body: background: false - created_at: 1782331392 + created_at: 1782332955 frequency_penalty: 0.0 - id: resp_bcc40e72c8d9c69d + id: resp_88c245c603a1d2c5 incomplete_details: null input_messages: null instructions: null @@ -150,16 +150,16 @@ turns: object: response output: - content: - - text: We need to call get_job_status. + - text: We need to query the pipeline orchestrator. Use get_job_status. type: reasoning_text encrypted_content: null - id: rs_be52377bf2edcc74 + id: rs_b8144bee19fa21a2 status: null summary: [] type: reasoning - arguments: '{"job_id":"job-382"}' - call_id: call_bb5389175f60b859 - id: fc_bb5389175f60b859 + call_id: call_a8129fdfb7970a9c + id: fc_a8129fdfb7970a9c name: get_job_status namespace: null status: null @@ -311,15 +311,15 @@ turns: - 432 input_tokens_per_turn: - 436 - output_tokens: 36 + output_tokens: 42 output_tokens_details: output_tokens_per_turn: - - 36 - reasoning_tokens: 18 + - 42 + reasoning_tokens: 24 tool_output_tokens: 0 tool_output_tokens_per_turn: - 0 - total_tokens: 472 + total_tokens: 478 user: null headers: content-type: application/json @@ -328,7 +328,7 @@ turns: request: body: input: - - call_id: call_bb5389175f60b859 + - call_id: call_a8129fdfb7970a9c output: '{"job_id": "job-382", "status": "failed", "error_type": "OutOfMemoryError", "failed_at": "2024-01-15T03:22:11Z", "stage": "transform", "memory_used_gb": 31.2, "memory_limit_gb": 32, "pipeline": "etl-daily-ingest", "owner": "data-platform-team"}' @@ -337,7 +337,7 @@ turns: role: user type: message model: openai/gpt-oss-20b - previous_response_id: resp_bcc40e72c8d9c69d + previous_response_id: resp_88c245c603a1d2c5 store: true stream: false tool_choice: auto @@ -470,30 +470,30 @@ turns: response: body: background: false - created_at: 1782331392 + created_at: 1782332955 frequency_penalty: 0.0 - id: resp_b6f737f9fa6c4cf8 + id: resp_b4b63e6cf26bac63 incomplete_details: null input_messages: null instructions: null kv_transfer_params: null - max_output_tokens: 3501 + max_output_tokens: 3495 max_tool_calls: null metadata: null model: openai/gpt-oss-20b object: response output: - content: - - text: Call get_error_logs. + - text: We call get_error_logs. type: reasoning_text encrypted_content: null - id: rs_b726441da4677400 + id: rs_9fe2fe7333bbe17b status: null summary: [] type: reasoning - arguments: '{"job_id":"job-382","severity":"ERROR","max_entries":10}' - call_id: call_88a9e761caa65554 - id: fc_88a9e761caa65554 + call_id: call_a8d716a0954a6f32 + id: fc_a8d716a0954a6f32 name: get_error_logs namespace: null status: null @@ -501,7 +501,7 @@ turns: output_messages: null parallel_tool_calls: true presence_penalty: 0.0 - previous_response_id: resp_bcc40e72c8d9c69d + previous_response_id: resp_88c245c603a1d2c5 prompt: null reasoning: null service_tier: auto @@ -638,22 +638,22 @@ turns: top_p: 1.0 truncation: disabled usage: - input_tokens: 595 + input_tokens: 601 input_tokens_details: - cached_tokens: 576 + cached_tokens: 448 cached_tokens_per_turn: - - 576 + - 448 input_tokens_per_turn: - - 595 - output_tokens: 42 + - 601 + output_tokens: 43 output_tokens_details: output_tokens_per_turn: - - 42 - reasoning_tokens: 24 + - 43 + reasoning_tokens: 25 tool_output_tokens: 0 tool_output_tokens_per_turn: - 0 - total_tokens: 637 + total_tokens: 644 user: null headers: content-type: application/json @@ -662,7 +662,7 @@ turns: request: body: input: - - call_id: call_bb5389175f60b859 + - call_id: call_a8129fdfb7970a9c output: '{"job_id": "job-382", "status": "failed", "error_type": "OutOfMemoryError", "failed_at": "2024-01-15T03:22:11Z", "stage": "transform", "memory_used_gb": 31.2, "memory_limit_gb": 32, "pipeline": "etl-daily-ingest", "owner": "data-platform-team"}' @@ -672,7 +672,7 @@ turns: role: user type: message model: openai/gpt-oss-20b - previous_response_id: resp_bcc40e72c8d9c69d + previous_response_id: resp_88c245c603a1d2c5 store: true stream: false tool_choice: auto @@ -805,30 +805,30 @@ turns: response: body: background: false - created_at: 1782331393 + created_at: 1782332956 frequency_penalty: 0.0 - id: resp_991eab30a6ad96c8 + id: resp_8a4dcb6fa1e203ed incomplete_details: null input_messages: null instructions: null kv_transfer_params: null - max_output_tokens: 3492 + max_output_tokens: 3486 max_tool_calls: null metadata: null model: openai/gpt-oss-20b object: response output: - content: - - text: We need to call search_runbook. + - text: We search runbook. type: reasoning_text encrypted_content: null - id: rs_978d79d63851ced6 + id: rs_bbd8fccb1a5f6fe6 status: null summary: [] type: reasoning - - arguments: '{"query":"increase memory limits for ETL jobs runbook","max_results":3}' - call_id: call_9339bb4ac308c0fc - id: fc_9339bb4ac308c0fc + - arguments: '{"query":"increase memory limits for ETL jobs", "max_results":3}' + call_id: call_bb3377332bb7ed73 + id: fc_bb3377332bb7ed73 name: search_runbook namespace: null status: null @@ -836,7 +836,7 @@ turns: output_messages: null parallel_tool_calls: true presence_penalty: 0.0 - previous_response_id: resp_bcc40e72c8d9c69d + previous_response_id: resp_88c245c603a1d2c5 prompt: null reasoning: null service_tier: auto @@ -973,22 +973,22 @@ turns: top_p: 1.0 truncation: disabled usage: - input_tokens: 604 + input_tokens: 610 input_tokens_details: cached_tokens: 576 cached_tokens_per_turn: - 576 input_tokens_per_turn: - - 604 - output_tokens: 46 + - 610 + output_tokens: 42 output_tokens_details: output_tokens_per_turn: - - 46 - reasoning_tokens: 28 + - 42 + reasoning_tokens: 24 tool_output_tokens: 0 tool_output_tokens_per_turn: - 0 - total_tokens: 650 + total_tokens: 652 user: null headers: content-type: application/json diff --git a/crates/agentic-core/tests/cassettes/tool_calls/multi_turn/responses_tool_calls_parallel.yaml b/crates/agentic-core/tests/cassettes/tool_calls/multi_turn/responses_tool_calls_parallel.yaml index d1cc276..4c2c560 100644 --- a/crates/agentic-core/tests/cassettes/tool_calls/multi_turn/responses_tool_calls_parallel.yaml +++ b/crates/agentic-core/tests/cassettes/tool_calls/multi_turn/responses_tool_calls_parallel.yaml @@ -2,9 +2,8 @@ turns: - filename: t1 request: body: - input: 'You are an SRE assistant. I need you to do TWO things in parallel: 1) - call get_job_status for job-382, and 2) call web_search for ''Spark ETL OOM - causes''. Do both now.' + input: 'You are an SRE assistant. Do TWO things in parallel: 1) check the status + of job-382 AND 2) search the web for Spark OOM fixes. Call BOTH tools now.' model: openai/gpt-oss-20b store: true stream: false @@ -138,31 +137,31 @@ turns: response: body: background: false - created_at: 1782331421 + created_at: 1782332957 frequency_penalty: 0.0 - id: resp_8d2ba1fd0c04698e + id: resp_9bfc4dacc826288b incomplete_details: null input_messages: null instructions: null kv_transfer_params: null - max_output_tokens: 3630 + max_output_tokens: 3638 max_tool_calls: null metadata: null model: openai/gpt-oss-20b object: response output: - content: - - text: We have two tasks. Need to issue function calls. In parallel but we - can do sequential calls. We'll return a response with both calls. + - text: 'We need to call two tools: get_job_status with job_id "job-382" and + web_search for "Spark OOM fixes". We''ll output both. We''ll use tools.' type: reasoning_text encrypted_content: null - id: rs_80890552364f4920 + id: rs_9e89d5f70cbf5c3b status: null summary: [] type: reasoning - arguments: '{"job_id":"job-382"}' - call_id: call_8e475b02312e69a7 - id: fc_8e475b02312e69a7 + call_id: call_adbd2bbc5203b3d4 + id: fc_adbd2bbc5203b3d4 name: get_job_status namespace: null status: null @@ -307,18 +306,18 @@ turns: top_p: 1.0 truncation: disabled usage: - input_tokens: 466 + input_tokens: 458 input_tokens_details: cached_tokens: 416 cached_tokens_per_turn: - 416 input_tokens_per_turn: - - 466 - output_tokens: 56 + - 458 + output_tokens: 64 output_tokens_details: output_tokens_per_turn: - - 56 - reasoning_tokens: 38 + - 64 + reasoning_tokens: 46 tool_output_tokens: 0 tool_output_tokens_per_turn: - 0 @@ -331,17 +330,17 @@ turns: request: body: input: - - call_id: call_8e475b02312e69a7 + - call_id: call_adbd2bbc5203b3d4 output: '{"job_id": "job-382", "status": "failed", "error_type": "OutOfMemoryError", "failed_at": "2024-01-15T03:22:11Z", "stage": "transform", "memory_used_gb": 31.2, "memory_limit_gb": 32, "pipeline": "etl-daily-ingest", "owner": "data-platform-team"}' type: function_call_output - - content: Based on those results, search the runbook for memory increase procedures. + - content: Based on those results, search the runbook for memory override procedures. Max 3 results. role: user type: message model: openai/gpt-oss-20b - previous_response_id: resp_8d2ba1fd0c04698e + previous_response_id: resp_9bfc4dacc826288b store: true stream: false tool_choice: auto @@ -474,9 +473,9 @@ turns: response: body: background: false - created_at: 1782331422 + created_at: 1782332958 frequency_penalty: 0.0 - id: resp_aba6b58081c2af00 + id: resp_9e29702b26e8d17f incomplete_details: null input_messages: null instructions: null @@ -488,16 +487,17 @@ turns: object: response output: - content: - - text: 'We use search_runbook with query: "increase memory in spark etl job".' + - text: Call search_runbook with query "memory override procedure pipeline + job" maybe. type: reasoning_text encrypted_content: null - id: rs_bdf90e9ca5f2cfe1 + id: rs_8564e8af0bfe516e status: null summary: [] type: reasoning - - arguments: '{"query":"increase memory in spark etl job","max_results":3}' - call_id: call_bdca8527c50ba7c3 - id: fc_bdca8527c50ba7c3 + - arguments: '{"query":"memory override procedure pipeline job", "max_results":3}' + call_id: call_8641aa37edcc8a37 + id: fc_8641aa37edcc8a37 name: search_runbook namespace: null status: null @@ -505,7 +505,7 @@ turns: output_messages: null parallel_tool_calls: true presence_penalty: 0.0 - previous_response_id: resp_8d2ba1fd0c04698e + previous_response_id: resp_9bfc4dacc826288b prompt: null reasoning: null service_tier: auto @@ -649,15 +649,15 @@ turns: - 496 input_tokens_per_turn: - 648 - output_tokens: 53 + output_tokens: 50 output_tokens_details: output_tokens_per_turn: - - 53 - reasoning_tokens: 35 + - 50 + reasoning_tokens: 32 tool_output_tokens: 0 tool_output_tokens_per_turn: - 0 - total_tokens: 701 + total_tokens: 698 user: null headers: content-type: application/json @@ -666,7 +666,7 @@ turns: request: body: input: - - call_id: call_bdca8527c50ba7c3 + - call_id: call_8641aa37edcc8a37 output: '[{"title": "OOM Troubleshooting for ETL Pipelines", "path": "/runbooks/data-platform/oom-etl.md", "snippet": "When transform stage OOMs: 1) Check partition skew with get_partition_stats, 2) Increase executor memory (default 32GB -> 64GB), 3) Enable adaptive query @@ -674,12 +674,11 @@ turns: "snippet": "To increase memory: use restart_job with memory_override_gb. Standard escalation: 32->64->128. Requires approval above 128GB."}]' type: function_call_output - - content: Now restart that job with 64 GB memory, skip completed stages, high - priority. + - content: Now restart that job with 64GB memory and high priority. role: user type: message model: openai/gpt-oss-20b - previous_response_id: resp_aba6b58081c2af00 + previous_response_id: resp_9e29702b26e8d17f store: true stream: false tool_choice: auto @@ -812,30 +811,30 @@ turns: response: body: background: false - created_at: 1782331422 + created_at: 1782332958 frequency_penalty: 0.0 - id: resp_ad6b4fc0f049ab2c + id: resp_9d863819516f196f incomplete_details: null input_messages: null instructions: null kv_transfer_params: null - max_output_tokens: 3227 + max_output_tokens: 3234 max_tool_calls: null metadata: null model: openai/gpt-oss-20b object: response output: - content: - - text: We call restart_job. + - text: Call restart_job. type: reasoning_text encrypted_content: null - id: rs_bdb91c841cb8efe5 + id: rs_ae5f6f0e38b5f7d4 status: null summary: [] type: reasoning - arguments: '{"job_id":"job-382","memory_override_gb":64,"skip_completed_stages":true,"priority":"high"}' - call_id: call_ab53b40d671c09d9 - id: fc_ab53b40d671c09d9 + call_id: call_b231aec112209369 + id: fc_b231aec112209369 name: restart_job namespace: null status: null @@ -843,7 +842,7 @@ turns: output_messages: null parallel_tool_calls: true presence_penalty: 0.0 - previous_response_id: resp_aba6b58081c2af00 + previous_response_id: resp_9e29702b26e8d17f prompt: null reasoning: null service_tier: auto @@ -980,22 +979,22 @@ turns: top_p: 1.0 truncation: disabled usage: - input_tokens: 869 + input_tokens: 862 input_tokens_details: cached_tokens: 656 cached_tokens_per_turn: - 656 input_tokens_per_turn: - - 869 - output_tokens: 50 + - 862 + output_tokens: 49 output_tokens_details: output_tokens_per_turn: - - 50 - reasoning_tokens: 33 + - 49 + reasoning_tokens: 32 tool_output_tokens: 0 tool_output_tokens_per_turn: - 0 - total_tokens: 919 + total_tokens: 911 user: null headers: content-type: application/json diff --git a/crates/agentic-core/tests/cassettes/tool_calls/multi_turn/responses_tool_calls_tool_output_only.yaml b/crates/agentic-core/tests/cassettes/tool_calls/multi_turn/responses_tool_calls_tool_output_only.yaml index 15afd2a..ab6bc38 100644 --- a/crates/agentic-core/tests/cassettes/tool_calls/multi_turn/responses_tool_calls_tool_output_only.yaml +++ b/crates/agentic-core/tests/cassettes/tool_calls/multi_turn/responses_tool_calls_tool_output_only.yaml @@ -136,9 +136,9 @@ turns: response: body: background: false - created_at: 1782331440 + created_at: 1782332960 frequency_penalty: 0.0 - id: resp_8a090f3a77b00e12 + id: resp_a4885b37a5cc8f6a incomplete_details: null input_messages: null instructions: null @@ -150,16 +150,16 @@ turns: object: response output: - content: - - text: We need to call get_job_status function. + - text: Need to call function get_job_status. type: reasoning_text encrypted_content: null - id: rs_817b973ff481e19e + id: rs_abbd07ed9c3d37f1 status: null summary: [] type: reasoning - arguments: '{"job_id":"job-382"}' - call_id: call_b565b3670d5793e9 - id: fc_b565b3670d5793e9 + call_id: call_abfcd3d8ebc4441c + id: fc_abfcd3d8ebc4441c name: get_job_status namespace: null status: null @@ -311,15 +311,15 @@ turns: - 432 input_tokens_per_turn: - 436 - output_tokens: 37 + output_tokens: 36 output_tokens_details: output_tokens_per_turn: - - 37 - reasoning_tokens: 19 + - 36 + reasoning_tokens: 18 tool_output_tokens: 0 tool_output_tokens_per_turn: - 0 - total_tokens: 473 + total_tokens: 472 user: null headers: content-type: application/json @@ -328,13 +328,13 @@ turns: request: body: input: - - call_id: call_b565b3670d5793e9 + - call_id: call_abfcd3d8ebc4441c output: '{"job_id": "job-382", "status": "failed", "error_type": "OutOfMemoryError", "failed_at": "2024-01-15T03:22:11Z", "stage": "transform", "memory_used_gb": 31.2, "memory_limit_gb": 32, "pipeline": "etl-daily-ingest", "owner": "data-platform-team"}' type: function_call_output model: openai/gpt-oss-20b - previous_response_id: resp_8a090f3a77b00e12 + previous_response_id: resp_a4885b37a5cc8f6a store: true stream: false tool_choice: auto @@ -467,69 +467,36 @@ turns: response: body: background: false - created_at: 1782331440 + created_at: 1782332960 frequency_penalty: 0.0 - id: resp_a7852d7a7444baf5 + id: resp_a5ee681c0f39cf97 incomplete_details: null input_messages: null instructions: null kv_transfer_params: null - max_output_tokens: 3520 + max_output_tokens: 3521 max_tool_calls: null metadata: null model: openai/gpt-oss-20b object: response output: - - content: - - text: The job failed due to OutOfMemoryError in transform stage. We need - to provide troubleshoot advice or offer restart. Let's provide status - and recommended next steps. - type: reasoning_text - encrypted_content: null - id: rs_b963ebd664a8f5b9 - status: null - summary: [] - type: reasoning - content: - annotations: [] logprobs: null - text: "**ETL Pipeline – Job `job-382`**\n\n| Metric | Value |\n|--------|-------|\n\ - | **Status** | Failed |\n| **Pipeline** | `etl-daily-ingest` |\n| **Stage**\ - \ | `transform` |\n| **Error** | `OutOfMemoryError` |\n| **Timestamp**\ - \ | 2024‑01‑15T03:22:11Z |\n| **Memory used** | 31.2 GB |\n| **Memory\ - \ limit** | 32 GB |\n| **Owner** | `data-platform-team` |\n\n---\n\n##\ - \ Likely Cause\nThe `transform` stage exceeded the allocated 32 GB RAM,\ - \ triggering the JVM/worker process to kill the job. Possible root causes:\n\ - \n| Factor | How it impacts |\n|--------|----------------|\n| **Large\ - \ intermediate dataset** | Consumes a lot of RAM before writing to disk.\ - \ |\n| **Inefficient transformations** | e.g., `groupBy`, `collect_list`,\ - \ missing `repartition` or `broadcast`. |\n| **Missing shuffle savings**\ - \ | Repartitioning to reduce shuffles can lower per‑task memory usage.\ - \ |\n| **Insufficient worker memory** | The job was allowed only 32 GB,\ - \ but the dataset size grew beyond that. |\n\n---\n\n## Next Steps\n\n\ - 1. **Inspect the Logs** \n Pull the detailed logs to see exactly which\ - \ transform caused the spike.\n\n ```bash\n get_error_logs(job_id=\"\ - job-382\", severity=\"ERROR\", max_entries=200)\n ```\n\n2. **Check\ - \ the Spark UI** \n Look at the shuffle and storage tab for the transform\ - \ stage. \n - Any tasks spilling to disk? \n - Are there many small\ - \ partitions? \n\n3. **Tune Memory Configuration** \n *You can add\ - \ a job restart with higher memory:*\n\n ```bash\n restart_job(job_id=\"\ - job-382\", memory_override_gb=48, skip_completed_stages=true, priority=\"\ - high\")\n ```\n\n *or add a memory hint in the job definition for\ - \ subsequent runs.*\n\n4. **Optimize the Transform** \n - Use `repartition`\ - \ or `coalesce` to reduce partition count before heavy ops. \n - Offload\ - \ non‑critical columns to a temporary dataset (`select`). \n - If using\ - \ `groupBy`, check if a `join` can reduce the data size first. \n -\ - \ Cache / persist intermediate results only when necessary.\n\n5. **Run\ - \ a Dry‑Run** \n After changes, submit a small subset of data (e.g.,\ - \ 1 % or a specific date range) to confirm the memory usage stays within\ - \ limits.\n\n---\n\n### Recommended Immediate Action\n\nIf you need a\ - \ quick fix, I suggest restarting the job with 48 GB memory:\n\n```json\n\ - {\n \"job_id\": \"job-382\",\n \"memory_override_gb\": 48,\n \"skip_completed_stages\"\ - : true,\n \"priority\": \"high\"\n}\n```\n\nWould you like me to initiate\ - \ that restart or fetch the logs for deeper analysis first?" + text: "**Job status (job‑382)** \n\n| Field | Value |\n|-------|-------|\n\ + | **Status** | **failed** |\n| **Error type** | *OutOfMemoryError* |\n\ + | **Failed at** | 2024‑01‑15 T03:22:11 Z |\n| **Stage** | transform |\n\ + | **Memory used** | 31.2 GB |\n| **Memory limit** | 32 GB |\n| **Pipeline**\ + \ | etl‑daily‑ingest |\n| **Owner** | data‑platform‑team |\n\n---\n\n\ + ### Next steps\n\n* The pipeline failed because it exceeded the memory\ + \ budget during the **transform** stage. \n* You could \n 1. **Increase\ + \ the memory limit** (e.g., to 36 GB) and **restart** the job. \n 2.\ + \ **Skip completed stages** so it doesn’t re‑run the data ingestion step.\ + \ \n 3. Optionally lower the queue priority if you want a quick retry.\n\ + \nLet me know how you’d like to proceed (e.g., `restart_job`, view error\ + \ logs, or investigate further)." type: output_text - id: msg_96929b0b2ac2ca7f + id: msg_80b908ea16ee9e3c phase: null role: assistant status: completed @@ -537,7 +504,7 @@ turns: output_messages: null parallel_tool_calls: true presence_penalty: 0.0 - previous_response_id: resp_8a090f3a77b00e12 + previous_response_id: resp_a4885b37a5cc8f6a prompt: null reasoning: null service_tier: auto @@ -674,22 +641,22 @@ turns: top_p: 1.0 truncation: disabled usage: - input_tokens: 576 + input_tokens: 575 input_tokens_details: cached_tokens: 448 cached_tokens_per_turn: - 448 input_tokens_per_turn: - - 576 - output_tokens: 675 + - 575 + output_tokens: 242 output_tokens_details: output_tokens_per_turn: - - 675 - reasoning_tokens: 32 + - 242 + reasoning_tokens: 0 tool_output_tokens: 0 tool_output_tokens_per_turn: - 0 - total_tokens: 1251 + total_tokens: 817 user: null headers: content-type: application/json @@ -697,9 +664,10 @@ turns: - filename: t3 request: body: - input: Great, now search the runbook for how to fix that OOM error. Max 5 results. + input: Based on what you found, search the runbook for how to fix it. Max 5 + results. model: openai/gpt-oss-20b - previous_response_id: resp_a7852d7a7444baf5 + previous_response_id: resp_a5ee681c0f39cf97 store: true stream: false tool_choice: auto @@ -832,22 +800,23 @@ turns: response: body: background: false - created_at: 1782331445 + created_at: 1782332962 frequency_penalty: 0.0 - id: resp_a89e4ab94e1181a6 + id: resp_9394e8e449e8e40b incomplete_details: null input_messages: null instructions: null kv_transfer_params: null - max_output_tokens: 2870 + max_output_tokens: 3266 max_tool_calls: null metadata: null model: openai/gpt-oss-20b object: response output: - - arguments: '{"query":"ETL OutOfMemoryError transform stage runbook steps","max_results":5}' - call_id: call_88528befdb8e146c - id: fc_88528befdb8e146c + - arguments: '{"query":"etl-daily-ingest OutOfMemoryError transform stage retry + runbook","max_results":5}' + call_id: call_af062f63e1fe3d72 + id: fc_af062f63e1fe3d72 name: search_runbook namespace: null status: null @@ -855,7 +824,7 @@ turns: output_messages: null parallel_tool_calls: true presence_penalty: 0.0 - previous_response_id: resp_a7852d7a7444baf5 + previous_response_id: resp_a5ee681c0f39cf97 prompt: null reasoning: null service_tier: auto @@ -992,22 +961,22 @@ turns: top_p: 1.0 truncation: disabled usage: - input_tokens: 1226 + input_tokens: 830 input_tokens_details: - cached_tokens: 432 + cached_tokens: 560 cached_tokens_per_turn: - - 432 + - 560 input_tokens_per_turn: - - 1226 - output_tokens: 34 + - 830 + output_tokens: 37 output_tokens_details: output_tokens_per_turn: - - 34 - reasoning_tokens: 21 + - 37 + reasoning_tokens: 24 tool_output_tokens: 0 tool_output_tokens_per_turn: - 0 - total_tokens: 1260 + total_tokens: 867 user: null headers: content-type: application/json From 8ca36bb9e5b76c6be7c599fe649972d7c4fd858a Mon Sep 17 00:00:00 2001 From: Ashwin Giridharan Date: Wed, 24 Jun 2026 22:32:40 -0700 Subject: [PATCH 5/5] refactor: remove assert_stateful_chaining from accumulator tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The accumulator only processes response bodies — request structure (previous_response_id, store) is not its concern. Context retention is already proven by content assertions (e.g. "that job" resolving to job-382). Remove the helper and 2 tests that only validated recording integrity rather than accumulator behavior. Signed-off-by: Ashwin Giridharan --- .../tests/accumulator_cassette_test.rs | 48 ------------------- 1 file changed, 48 deletions(-) diff --git a/crates/agentic-core/tests/accumulator_cassette_test.rs b/crates/agentic-core/tests/accumulator_cassette_test.rs index 7e97eb2..cecc22e 100644 --- a/crates/agentic-core/tests/accumulator_cassette_test.rs +++ b/crates/agentic-core/tests/accumulator_cassette_test.rs @@ -534,38 +534,6 @@ fn has_reasoning(output: &[OutputItem]) -> bool { output.iter().any(|item| matches!(item, OutputItem::Reasoning(_))) } -/// Verifies that the cassette has proper `previous_response_id` chaining: -/// turn 1 has no `prev_id`, subsequent turns reference the prior response. -fn assert_stateful_chaining(cassette: &TurnCassette) { - for (i, turn) in cassette.turns.iter().enumerate() { - let body = turn.request.as_mapping().expect("request must be a map"); - let req_body = body - .get(serde_yml::Value::String("body".into())) - .and_then(serde_yml::Value::as_mapping) - .expect("request.body must be a map"); - let store = req_body - .get(serde_yml::Value::String("store".into())) - .and_then(serde_yml::Value::as_bool) - .unwrap_or(false); - assert!(store, "turn {} must have store=true", i + 1); - - if i == 0 { - let prev_id = req_body.get(serde_yml::Value::String("previous_response_id".into())); - assert!( - prev_id.is_none() || prev_id.unwrap().is_null(), - "turn 1 should have no previous_response_id" - ); - } else { - let prev_id = req_body.get(serde_yml::Value::String("previous_response_id".into())); - assert!( - prev_id.is_some() && !prev_id.unwrap().is_null(), - "turn {} must have a previous_response_id", - i + 1 - ); - } - } -} - /// Extracts the `arguments` JSON string from the first function call in output items. fn get_first_fc_arguments(output: &[OutputItem]) -> String { output @@ -585,13 +553,6 @@ fn get_first_fc_arguments(output: &[OutputItem]) -> String { // Non-streaming, store=true, previous_response_id chain // ═══════════════════════════════════════════════════════════════════ -#[test] -fn test_stateful_responses_3turn_chaining() { - let cassette = load_turn_cassette_from(MULTI_TURN_DIR, "responses_tool_calls_3turn.yaml"); - assert_eq!(cassette.turns.len(), 3); - assert_stateful_chaining(&cassette); -} - #[test] fn test_stateful_responses_3turn_tool_calls() { let cassette = load_turn_cassette_from(MULTI_TURN_DIR, "responses_tool_calls_3turn.yaml"); @@ -660,13 +621,6 @@ fn test_stateful_responses_3turn_null_status_deserialization() { // get_job_status → get_error_logs → search_runbook → run_analysis → restart_job // ═══════════════════════════════════════════════════════════════════ -#[test] -fn test_stateful_responses_5turn_chaining() { - let cassette = load_turn_cassette_from(MULTI_TURN_DIR, "responses_tool_calls_5turn.yaml"); - assert_eq!(cassette.turns.len(), 5); - assert_stateful_chaining(&cassette); -} - #[test] fn test_stateful_responses_5turn_tool_sequence() { let cassette = load_turn_cassette_from(MULTI_TURN_DIR, "responses_tool_calls_5turn.yaml"); @@ -920,7 +874,6 @@ fn test_openai_parallel_tool_outputs_in_request() { fn test_openai_3turn_parses_and_retains_context() { let cassette = load_turn_cassette_from(MULTI_TURN_DIR, "openai_responses_tool_calls_3turn.yaml"); assert_eq!(cassette.turns.len(), 3); - assert_stateful_chaining(&cassette); let t1 = process_nonstreaming_turn(&cassette, 0, "gpt-4o"); assert_eq!(get_function_call_names(&t1), vec!["get_job_status"]); @@ -938,7 +891,6 @@ fn test_openai_3turn_parses_and_retains_context() { fn test_openai_5turn_full_sequence() { let cassette = load_turn_cassette_from(MULTI_TURN_DIR, "openai_responses_tool_calls_5turn.yaml"); assert_eq!(cassette.turns.len(), 5); - assert_stateful_chaining(&cassette); let expected_tools = [ "get_job_status",