diff --git a/crates/agentic-core/src/executor/accumulator.rs b/crates/agentic-core/src/executor/accumulator.rs index bdd5cd0..ccf7cd4 100644 --- a/crates/agentic-core/src/executor/accumulator.rs +++ b/crates/agentic-core/src/executor/accumulator.rs @@ -203,7 +203,7 @@ impl ResponseAccumulator { if !self.accumulated_arguments.is_empty() && fc.arguments.is_empty() { fc.arguments = std::mem::take(&mut self.accumulated_arguments); } - fc.status = "completed".to_string(); + fc.status = MessageStatus::Completed; self.output.push(OutputItem::FunctionCall(fc)); } self.accumulated_arguments.clear(); @@ -277,7 +277,7 @@ impl ResponseAccumulator { call_id: call_id.clone().unwrap_or_default(), name: name.clone().unwrap_or_default(), arguments: String::new(), - status: "in_progress".to_string(), + status: MessageStatus::InProgress, }); } _ => { @@ -776,7 +776,7 @@ mod tests { assert_eq!(fc.call_id, "call_abc"); assert_eq!(fc.name, "get_weather"); assert_eq!(fc.arguments, r#"{"location":"Paris"}"#); - assert_eq!(fc.status, "completed"); + assert_eq!(fc.status, MessageStatus::Completed); } else { panic!("expected FunctionCall"); } @@ -1099,7 +1099,7 @@ mod tests { assert_eq!(acc.output.len(), 1); if let OutputItem::FunctionCall(fc) = &acc.output[0] { assert_eq!(fc.arguments, r#"{"x":1}"#); - assert_eq!(fc.status, "completed"); + assert_eq!(fc.status, MessageStatus::Completed); } else { panic!("expected FunctionCall"); } diff --git a/crates/agentic-core/src/types/io.rs b/crates/agentic-core/src/types/io.rs index 9e24d8e..b0fe4ef 100644 --- a/crates/agentic-core/src/types/io.rs +++ b/crates/agentic-core/src/types/io.rs @@ -1,6 +1,8 @@ use serde::{Deserialize, Serialize}; use serde_json::Value; +use super::event::MessageStatus; + #[derive(Debug, Clone, Serialize, Deserialize)] pub struct InputTextContent { #[serde(rename = "type")] @@ -121,7 +123,21 @@ pub struct FunctionToolCall { pub call_id: String, pub name: String, pub arguments: String, - pub status: String, + #[serde(default = "default_completed_status")] + #[serde(deserialize_with = "deserialize_status_or_default")] + pub status: MessageStatus, +} + +fn default_completed_status() -> MessageStatus { + MessageStatus::Completed +} + +fn deserialize_status_or_default<'de, D>(deserializer: D) -> Result +where + D: serde::Deserializer<'de>, +{ + let opt: Option = Option::deserialize(deserializer)?; + Ok(opt.unwrap_or(MessageStatus::Completed)) } #[derive(Debug, Clone, Serialize, Deserialize)] diff --git a/crates/agentic-core/tests/accumulator_cassette_test.rs b/crates/agentic-core/tests/accumulator_cassette_test.rs index 3a06524..cecc22e 100644 --- a/crates/agentic-core/tests/accumulator_cassette_test.rs +++ b/crates/agentic-core/tests/accumulator_cassette_test.rs @@ -8,6 +8,7 @@ use serde::Deserialize; use agentic_core::executor::accumulator::ResponseAccumulator; +use agentic_core::types::event::MessageStatus; use agentic_core::types::io::OutputItem; const CASSETTE_DIR: &str = concat!(env!("CARGO_MANIFEST_DIR"), "/tests/cassettes/events"); @@ -106,7 +107,7 @@ fn test_accumulator_cassette_function_call_vllm_gemma4() { if let OutputItem::FunctionCall(fc) = &payload.output[0] { assert_eq!(fc.name, expected_fc.name); assert_eq!(fc.arguments, expected_fc.arguments); - assert_eq!(fc.status, "completed"); + assert_eq!(fc.status, MessageStatus::Completed); assert!(!fc.call_id.is_empty(), "call_id should be populated"); assert!(!fc.id.is_empty(), "id should be populated"); } else { @@ -167,7 +168,7 @@ fn test_tool_calls_cassette_auto_streaming() { if let OutputItem::FunctionCall(fc) = item { assert!(!fc.name.is_empty(), "function call name must not be empty"); assert!(!fc.arguments.is_empty(), "function call arguments must not be empty"); - assert_eq!(fc.status, "completed"); + assert_eq!(fc.status, MessageStatus::Completed); assert!(!fc.call_id.is_empty(), "call_id must be populated"); } } @@ -200,7 +201,7 @@ fn test_tool_calls_cassette_required_streaming() { for item in &function_calls { if let OutputItem::FunctionCall(fc) = item { - assert_eq!(fc.status, "completed"); + assert_eq!(fc.status, MessageStatus::Completed); } } } @@ -292,7 +293,7 @@ fn test_tool_calls_cassette_auto_nonstreaming() { if let OutputItem::FunctionCall(fc) = item { assert!(!fc.name.is_empty()); assert!(!fc.arguments.is_empty()); - assert_eq!(fc.status, "completed"); + assert_eq!(fc.status, MessageStatus::Completed); assert!(!fc.call_id.is_empty()); } } @@ -467,3 +468,557 @@ fn test_reasoning_cassette_gpt_oss_streaming() { "reasoning-only cassette should not produce function calls" ); } + +// === Stateful multi-turn cassette tests (previous_response_id chaining) === +// +// These cassettes are recorded against gpt-oss-20b with `store=true` and +// `previous_response_id` chaining (via record_cassette.py --mode responses). +// They exercise realistic multi-turn conversations where the server maintains +// conversation state — the key pattern our accumulator must handle for PR #67. +// +// Scenario: SRE debugging a failed ETL pipeline job-382. +// Tools: get_job_status, get_error_logs, search_runbook, run_analysis, +// restart_job, web_search. + +const MULTI_TURN_DIR: &str = concat!(env!("CARGO_MANIFEST_DIR"), "/tests/cassettes/tool_calls/multi_turn"); + +// --- Helpers --- + +fn process_nonstreaming_turn(cassette: &TurnCassette, turn_idx: usize, model: &str) -> Vec { + let body = cassette.turns[turn_idx] + .response + .body + .as_ref() + .unwrap_or_else(|| panic!("turn {} must have response body", turn_idx + 1)); + let body_str = serde_json::to_string(body).unwrap(); + let acc = ResponseAccumulator::from_json(&body_str, None).unwrap(); + let payload = acc.finalize(model, None, None); + assert_eq!(payload.status, "completed"); + payload.output +} + +fn process_streaming_turn(cassette: &TurnCassette, turn_idx: usize, model: &str) -> Vec { + let data_lines = extract_data_lines(&cassette.turns[turn_idx].response.sse); + assert!( + !data_lines.is_empty(), + "streaming turn {} must have SSE data lines", + turn_idx + 1 + ); + let acc = ResponseAccumulator::from_sse_lines(data_lines, None); + let payload = acc.finalize(model, None, None); + assert_eq!(payload.status, "completed"); + payload.output +} + +fn count_function_calls(output: &[OutputItem]) -> usize { + output + .iter() + .filter(|item| matches!(item, OutputItem::FunctionCall(_))) + .count() +} + +fn get_function_call_names(output: &[OutputItem]) -> Vec { + output + .iter() + .filter_map(|item| { + if let OutputItem::FunctionCall(fc) = item { + Some(fc.name.clone()) + } else { + None + } + }) + .collect() +} + +fn has_reasoning(output: &[OutputItem]) -> bool { + output.iter().any(|item| matches!(item, OutputItem::Reasoning(_))) +} + +/// Extracts the `arguments` JSON string from the first function call in output items. +fn get_first_fc_arguments(output: &[OutputItem]) -> String { + output + .iter() + .find_map(|item| { + if let OutputItem::FunctionCall(fc) = item { + Some(fc.arguments.clone()) + } else { + None + } + }) + .expect("output must contain at least one function call") +} + +// ═══════════════════════════════════════════════════════════════════ +// Stateful 3-turn: get_job_status → get_error_logs → search_runbook +// Non-streaming, store=true, previous_response_id chain +// ═══════════════════════════════════════════════════════════════════ + +#[test] +fn test_stateful_responses_3turn_tool_calls() { + let cassette = load_turn_cassette_from(MULTI_TURN_DIR, "responses_tool_calls_3turn.yaml"); + + let t1 = process_nonstreaming_turn(&cassette, 0, "openai/gpt-oss-20b"); + let t1_names = get_function_call_names(&t1); + assert_eq!(count_function_calls(&t1), 1); + assert_eq!(t1_names, vec!["get_job_status"]); + assert!(has_reasoning(&t1)); + + let t2 = process_nonstreaming_turn(&cassette, 1, "openai/gpt-oss-20b"); + let t2_names = get_function_call_names(&t2); + assert_eq!(count_function_calls(&t2), 1); + assert_eq!(t2_names, vec!["get_error_logs"]); + + let t3 = process_nonstreaming_turn(&cassette, 2, "openai/gpt-oss-20b"); + let t3_names = get_function_call_names(&t3); + assert_eq!(count_function_calls(&t3), 1); + assert_eq!(t3_names, vec!["search_runbook"]); +} + +/// Context retention proof: turn 2 prompt says "that job" (no explicit job ID), +/// but the model resolves it to "job-382" because `previous_response_id` gives +/// it access to turn 1's conversation state. +#[test] +fn test_stateful_responses_3turn_context_retention() { + let cassette = load_turn_cassette_from(MULTI_TURN_DIR, "responses_tool_calls_3turn.yaml"); + + // Turn 2 prompt says "that job" — model must resolve from turn 1 context + let t2 = process_nonstreaming_turn(&cassette, 1, "openai/gpt-oss-20b"); + let t2_args = get_first_fc_arguments(&t2); + assert!( + t2_args.contains("job-382"), + "turn 2 must resolve 'that job' to 'job-382' via retained context, got: {t2_args}" + ); + + // Turn 3 prompt says "those errors" — model must recall turn 2's investigation + let t3 = process_nonstreaming_turn(&cassette, 2, "openai/gpt-oss-20b"); + let t3_args = get_first_fc_arguments(&t3); + assert!( + t3_args.contains("job-382") || t3_args.contains("error") || t3_args.contains("ETL"), + "turn 3 must reference context from earlier turns, got: {t3_args}" + ); +} + +#[test] +fn test_stateful_responses_3turn_null_status_deserialization() { + let cassette = load_turn_cassette_from(MULTI_TURN_DIR, "responses_tool_calls_3turn.yaml"); + for i in 0..3 { + let output = process_nonstreaming_turn(&cassette, i, "openai/gpt-oss-20b"); + for item in &output { + if let OutputItem::FunctionCall(fc) = item { + assert_eq!( + fc.status, + MessageStatus::Completed, + "turn {} function_call status must default to Completed (gpt-oss emits null)", + i + 1 + ); + } + } + } +} + +// ═══════════════════════════════════════════════════════════════════ +// Stateful 5-turn: full investigation pipeline +// get_job_status → get_error_logs → search_runbook → run_analysis → restart_job +// ═══════════════════════════════════════════════════════════════════ + +#[test] +fn test_stateful_responses_5turn_tool_sequence() { + let cassette = load_turn_cassette_from(MULTI_TURN_DIR, "responses_tool_calls_5turn.yaml"); + + let expected_tools = [ + "get_job_status", + "get_error_logs", + "search_runbook", + "run_analysis", + "restart_job", + ]; + for (i, expected) in expected_tools.iter().enumerate() { + let output = process_nonstreaming_turn(&cassette, i, "openai/gpt-oss-20b"); + let names = get_function_call_names(&output); + assert_eq!(names.len(), 1, "turn {} should call exactly 1 tool", i + 1); + assert_eq!(&names[0], expected, "turn {} should call {expected}", i + 1); + assert!(has_reasoning(&output), "turn {} should have reasoning", i + 1); + } +} + +/// Context retention proof for 5-turn: turn 5 says "restart it" without naming +/// job-382, but the model resolves correctly because all prior context is retained. +#[test] +fn test_stateful_responses_5turn_context_retention() { + let cassette = load_turn_cassette_from(MULTI_TURN_DIR, "responses_tool_calls_5turn.yaml"); + + // Turn 2: "that failed job" → must resolve to job-382 + let t2 = process_nonstreaming_turn(&cassette, 1, "openai/gpt-oss-20b"); + let t2_args = get_first_fc_arguments(&t2); + assert!( + t2_args.contains("job-382"), + "turn 2 'that failed job' must resolve to job-382, got: {t2_args}" + ); + + // Turn 5: "restart it" → must resolve to job-382 with correct params + let t5 = process_nonstreaming_turn(&cassette, 4, "openai/gpt-oss-20b"); + let t5_args = get_first_fc_arguments(&t5); + assert!( + t5_args.contains("job-382"), + "turn 5 'restart it' must resolve to job-382, got: {t5_args}" + ); + assert!( + t5_args.contains("64"), + "turn 5 must include memory_override_gb=64, got: {t5_args}" + ); +} + +#[test] +fn test_stateful_responses_5turn_function_call_fields() { + let cassette = load_turn_cassette_from(MULTI_TURN_DIR, "responses_tool_calls_5turn.yaml"); + for i in 0..5 { + let output = process_nonstreaming_turn(&cassette, i, "openai/gpt-oss-20b"); + for item in &output { + if let OutputItem::FunctionCall(fc) = item { + assert!(!fc.id.is_empty(), "turn {} fc.id must not be empty", i + 1); + assert!(!fc.call_id.is_empty(), "turn {} fc.call_id must not be empty", i + 1); + assert!(!fc.name.is_empty(), "turn {} fc.name must not be empty", i + 1); + assert!( + !fc.arguments.is_empty(), + "turn {} fc.arguments must not be empty", + i + 1 + ); + assert_eq!(fc.status, MessageStatus::Completed); + } + } + } +} + +// ═══════════════════════════════════════════════════════════════════ +// Stateful 3-turn streaming: SSE events with previous_response_id +// ═══════════════════════════════════════════════════════════════════ + +#[test] +fn test_stateful_responses_streaming_3turn() { + let cassette = load_turn_cassette_from(MULTI_TURN_DIR, "responses_tool_calls_3turn_streaming.yaml"); + assert_eq!(cassette.turns.len(), 3); + + for i in 0..3 { + let output = process_streaming_turn(&cassette, i, "openai/gpt-oss-20b"); + assert!( + count_function_calls(&output) >= 1, + "streaming turn {} must produce at least one function_call", + i + 1 + ); + for item in &output { + if let OutputItem::FunctionCall(fc) = item { + assert!(!fc.call_id.is_empty(), "streaming fc must have call_id"); + assert!(!fc.name.is_empty(), "streaming fc must have name"); + assert!(!fc.arguments.is_empty(), "streaming fc must have arguments"); + assert_eq!(fc.status, MessageStatus::Completed); + } + } + } +} + +/// Context retention in streaming mode: turn 2 says "that job" and the model +/// resolves it to "job-382" even in streaming (SSE) delivery. +#[test] +fn test_stateful_responses_streaming_context_retention() { + let cassette = load_turn_cassette_from(MULTI_TURN_DIR, "responses_tool_calls_3turn_streaming.yaml"); + + // Turn 2: "that job" → must resolve to job-382 in streaming mode + let t2 = process_streaming_turn(&cassette, 1, "openai/gpt-oss-20b"); + let t2_args = get_first_fc_arguments(&t2); + assert!( + t2_args.contains("job-382"), + "streaming turn 2 must resolve 'that job' to job-382, got: {t2_args}" + ); +} + +// ═══════════════════════════════════════════════════════════════════ +// Branching: turn 3 diverges from turn 1 (not turn 2) +// Tests previous_response_id pointing back to an earlier response +// ═══════════════════════════════════════════════════════════════════ + +#[test] +fn test_stateful_responses_branch_divergence() { + let cassette = load_turn_cassette_from(MULTI_TURN_DIR, "responses_tool_calls_branch.yaml"); + assert_eq!(cassette.turns.len(), 3); + + // Turn 1: no prev_id + let body1 = cassette.turns[0].request.as_mapping().unwrap(); + let req1 = body1 + .get(serde_yml::Value::String("body".into())) + .and_then(serde_yml::Value::as_mapping) + .unwrap(); + let prev1 = req1.get(serde_yml::Value::String("previous_response_id".into())); + assert!(prev1.is_none() || prev1.unwrap().is_null()); + + // Turn 2: prev_id = turn 1's response id + let body2 = cassette.turns[1].request.as_mapping().unwrap(); + let req2 = body2 + .get(serde_yml::Value::String("body".into())) + .and_then(serde_yml::Value::as_mapping) + .unwrap(); + let prev2 = req2 + .get(serde_yml::Value::String("previous_response_id".into())) + .and_then(serde_yml::Value::as_str) + .expect("turn 2 must have prev_id"); + + // Turn 3: prev_id = turn 1's response id (branches back, NOT from turn 2) + let body3 = cassette.turns[2].request.as_mapping().unwrap(); + let req3 = body3 + .get(serde_yml::Value::String("body".into())) + .and_then(serde_yml::Value::as_mapping) + .unwrap(); + let prev3 = req3 + .get(serde_yml::Value::String("previous_response_id".into())) + .and_then(serde_yml::Value::as_str) + .expect("turn 3 must have prev_id"); + + // Turn 2 and Turn 3 both point to the same response (turn 1) + assert_eq!( + prev2, prev3, + "branch: turn 3 should reference same prev_id as turn 2 (turn 1's response)" + ); +} + +#[test] +fn test_stateful_responses_branch_all_turns_parse() { + let cassette = load_turn_cassette_from(MULTI_TURN_DIR, "responses_tool_calls_branch.yaml"); + for i in 0..3 { + let output = process_nonstreaming_turn(&cassette, i, "openai/gpt-oss-20b"); + assert!( + count_function_calls(&output) >= 1, + "branch turn {} must produce a function_call", + i + 1 + ); + assert!(has_reasoning(&output), "branch turn {} should have reasoning", i + 1); + } +} + +// ═══════════════════════════════════════════════════════════════════ +// Cross-cassette: all stateful cassettes parse without error +// ═══════════════════════════════════════════════════════════════════ + +// ═══════════════════════════════════════════════════════════════════ +// Tool-output-only turn: model responds autonomously with text +// ═══════════════════════════════════════════════════════════════════ + +#[test] +fn test_stateful_responses_tool_output_only_produces_text() { + let cassette = load_turn_cassette_from(MULTI_TURN_DIR, "responses_tool_calls_tool_output_only.yaml"); + assert_eq!(cassette.turns.len(), 3); + + // Turn 2 has tool output only (no user message) → model should produce text + let t2 = process_nonstreaming_turn(&cassette, 1, "openai/gpt-oss-20b"); + let has_text = t2.iter().any(|item| matches!(item, OutputItem::Message(_))); + assert!(has_text, "tool-output-only turn should produce a text response"); +} + +// ═══════════════════════════════════════════════════════════════════ +// Parallel tool calls (OpenAI only — gpt-4o reliably produces these) +// ═══════════════════════════════════════════════════════════════════ + +#[test] +fn test_openai_parallel_tool_calls() { + let cassette = load_turn_cassette_from(MULTI_TURN_DIR, "openai_responses_tool_calls_parallel.yaml"); + assert_eq!(cassette.turns.len(), 3); + + // Turn 1 should have 2 parallel function calls + let t1 = process_nonstreaming_turn(&cassette, 0, "gpt-4o"); + let t1_names = get_function_call_names(&t1); + assert!( + t1_names.len() >= 2, + "parallel cassette turn 1 must have 2+ function calls, got: {t1_names:?}" + ); + assert!(t1_names.contains(&"get_job_status".to_string())); + assert!(t1_names.contains(&"web_search".to_string())); +} + +/// Verifies that the request input for turn 2 contains multiple `function_call_output` +/// items (one per parallel call from turn 1). +#[test] +fn test_openai_parallel_tool_outputs_in_request() { + let cassette = load_turn_cassette_from(MULTI_TURN_DIR, "openai_responses_tool_calls_parallel.yaml"); + + let body2 = cassette.turns[1].request.as_mapping().unwrap(); + let req2 = body2 + .get(serde_yml::Value::String("body".into())) + .and_then(serde_yml::Value::as_mapping) + .unwrap(); + let input2 = req2 + .get(serde_yml::Value::String("input".into())) + .expect("turn 2 must have input"); + let input_seq = input2.as_sequence().expect("turn 2 input must be a list"); + + let tool_outputs: Vec<_> = input_seq + .iter() + .filter(|item| { + item.as_mapping() + .and_then(|m| m.get(serde_yml::Value::String("type".into()))) + .and_then(serde_yml::Value::as_str) + == Some("function_call_output") + }) + .collect(); + + assert!( + tool_outputs.len() >= 2, + "turn 2 input must contain 2+ function_call_output items for parallel calls, got {}", + tool_outputs.len() + ); +} + +// ═══════════════════════════════════════════════════════════════════ +// OpenAI cassettes: verify they parse identically to vLLM +// (status is "completed" string, not null) +// ═══════════════════════════════════════════════════════════════════ + +#[test] +fn test_openai_3turn_parses_and_retains_context() { + let cassette = load_turn_cassette_from(MULTI_TURN_DIR, "openai_responses_tool_calls_3turn.yaml"); + assert_eq!(cassette.turns.len(), 3); + + let t1 = process_nonstreaming_turn(&cassette, 0, "gpt-4o"); + assert_eq!(get_function_call_names(&t1), vec!["get_job_status"]); + + // Context retention: turn 2 says "that job" + let t2 = process_nonstreaming_turn(&cassette, 1, "gpt-4o"); + let t2_args = get_first_fc_arguments(&t2); + assert!( + t2_args.contains("job-382"), + "OpenAI turn 2 must resolve 'that job' to job-382, got: {t2_args}" + ); +} + +#[test] +fn test_openai_5turn_full_sequence() { + let cassette = load_turn_cassette_from(MULTI_TURN_DIR, "openai_responses_tool_calls_5turn.yaml"); + assert_eq!(cassette.turns.len(), 5); + + let expected_tools = [ + "get_job_status", + "get_error_logs", + "search_runbook", + "run_analysis", + "restart_job", + ]; + for (i, expected) in expected_tools.iter().enumerate() { + let output = process_nonstreaming_turn(&cassette, i, "gpt-4o"); + let names = get_function_call_names(&output); + assert_eq!(names.len(), 1, "OpenAI turn {} should call 1 tool", i + 1); + assert_eq!(&names[0], expected, "OpenAI turn {} should call {expected}", i + 1); + } +} + +#[test] +fn test_openai_streaming_3turn() { + let cassette = load_turn_cassette_from(MULTI_TURN_DIR, "openai_responses_tool_calls_3turn_streaming.yaml"); + assert_eq!(cassette.turns.len(), 3); + + for i in 0..3 { + let output = process_streaming_turn(&cassette, i, "gpt-4o"); + assert!( + count_function_calls(&output) >= 1, + "OpenAI streaming turn {} must produce a function_call", + i + 1 + ); + } +} + +#[test] +fn test_openai_branch_divergence() { + let cassette = load_turn_cassette_from(MULTI_TURN_DIR, "openai_responses_tool_calls_branch.yaml"); + assert_eq!(cassette.turns.len(), 3); + + let body2 = cassette.turns[1].request.as_mapping().unwrap(); + let req2 = body2 + .get(serde_yml::Value::String("body".into())) + .and_then(serde_yml::Value::as_mapping) + .unwrap(); + let prev2 = req2 + .get(serde_yml::Value::String("previous_response_id".into())) + .and_then(serde_yml::Value::as_str) + .expect("turn 2 must have prev_id"); + + let body3 = cassette.turns[2].request.as_mapping().unwrap(); + let req3 = body3 + .get(serde_yml::Value::String("body".into())) + .and_then(serde_yml::Value::as_mapping) + .unwrap(); + let prev3 = req3 + .get(serde_yml::Value::String("previous_response_id".into())) + .and_then(serde_yml::Value::as_str) + .expect("turn 3 must have prev_id"); + + assert_eq!( + prev2, prev3, + "OpenAI branch: turn 3 must branch from turn 1 (same prev_id as turn 2)" + ); +} + +// ═══════════════════════════════════════════════════════════════════ +// Cross-cassette: ALL stateful cassettes parse without error +// ═══════════════════════════════════════════════════════════════════ + +#[test] +fn test_all_stateful_cassettes_parse_without_error() { + let nonstreaming = [ + "responses_tool_calls_3turn.yaml", + "responses_tool_calls_5turn.yaml", + "responses_tool_calls_branch.yaml", + "responses_tool_calls_parallel.yaml", + "responses_tool_calls_tool_output_only.yaml", + "openai_responses_tool_calls_3turn.yaml", + "openai_responses_tool_calls_5turn.yaml", + "openai_responses_tool_calls_branch.yaml", + "openai_responses_tool_calls_parallel.yaml", + "openai_responses_tool_calls_tool_output_only.yaml", + ]; + + for filename in &nonstreaming { + let cassette = load_turn_cassette_from(MULTI_TURN_DIR, filename); + for i in 0..cassette.turns.len() { + let body = cassette.turns[i] + .response + .body + .as_ref() + .unwrap_or_else(|| panic!("{filename} turn {i} must have body")); + let body_str = serde_json::to_string(body).unwrap(); + let result = ResponseAccumulator::from_json(&body_str, None); + assert!( + result.is_ok(), + "{filename} turn {} failed to parse: {:?}", + i + 1, + result.err() + ); + let payload = result.unwrap().finalize("gpt-4o", None, None); + assert_eq!( + payload.status, + "completed", + "{filename} turn {} status != completed", + i + 1 + ); + } + } + + let streaming = [ + "responses_tool_calls_3turn_streaming.yaml", + "openai_responses_tool_calls_3turn_streaming.yaml", + ]; + for filename in &streaming { + let cassette = load_turn_cassette_from(MULTI_TURN_DIR, filename); + for i in 0..cassette.turns.len() { + let data_lines = extract_data_lines(&cassette.turns[i].response.sse); + assert!( + !data_lines.is_empty(), + "{filename} turn {} has no SSE data lines", + i + 1 + ); + let acc = ResponseAccumulator::from_sse_lines(data_lines, None); + let payload = acc.finalize("gpt-4o", None, None); + assert_eq!( + payload.status, + "completed", + "{filename} turn {} status != completed", + i + 1 + ); + } + } +} diff --git a/crates/agentic-core/tests/cassettes/record_cassette.py b/crates/agentic-core/tests/cassettes/record_cassette.py index d95390f..41caa0f 100644 --- a/crates/agentic-core/tests/cassettes/record_cassette.py +++ b/crates/agentic-core/tests/cassettes/record_cassette.py @@ -279,16 +279,16 @@ def _create_conversation(client: httpx.Client, proxy_url: str) -> str: return conv_id -def _send_nonstreaming(client: httpx.Client, body: dict, proxy_url: str) -> str | None: +def _send_nonstreaming(client: httpx.Client, body: dict, proxy_url: str) -> dict | None: resp = client.post(f"{proxy_url}/v1/responses", json=body, timeout=300) resp.raise_for_status() data = resp.json() print(f"\n[Response]\n{json.dumps(data, indent=2)}\n") - return data.get("id") + return data -def _send_streaming(client: httpx.Client, body: dict, proxy_url: str) -> str | None: - response_id = None +def _send_streaming(client: httpx.Client, body: dict, proxy_url: str) -> dict | None: + response_data = None print("\n[Streaming response]") with client.stream( "POST", f"{proxy_url}/v1/responses", json=body, timeout=300 @@ -302,14 +302,14 @@ def _send_streaming(client: httpx.Client, body: dict, proxy_url: str) -> str | N try: payload = json.loads(line[5:].strip()) if payload.get("type") == "response.completed": - response_id = payload.get("response", {}).get("id") + response_data = payload.get("response") except Exception: pass print() - return response_id + return response_data -def _send(client: httpx.Client, body: dict, stream: bool, proxy_url: str) -> str | None: +def _send(client: httpx.Client, body: dict, stream: bool, proxy_url: str) -> dict | None: return ( _send_streaming(client, body, proxy_url) if stream @@ -332,6 +332,48 @@ def _inject_tools(body: dict, tools: list | None, tool_choice: Any) -> None: body["tool_choice"] = tool_choice +def _extract_function_calls(response_data: dict | None) -> list[dict]: + """Extract function_call items from a response's output array.""" + if not response_data: + return [] + output = response_data.get("output", []) + return [item for item in output if item.get("type") == "function_call"] + + +def _build_tool_output_input( + function_calls: list[dict], + tool_outputs: dict[str, str], + user_prompt: str | None, +) -> list[dict]: + """Build an input list with function_call_output items followed by optional user message. + + Args: + function_calls: function_call items from the previous response. + tool_outputs: mapping of tool name -> fake JSON output string. + user_prompt: the next user message (None for tool-output-only turns). + + Returns: + A list suitable for the `input` field of the next request. + """ + input_items: list[dict] = [] + for fc in function_calls: + call_id = fc.get("call_id", "") + name = fc.get("name", "") + output = tool_outputs.get(name, json.dumps({"result": f"mock output for {name}"})) + input_items.append({ + "type": "function_call_output", + "call_id": call_id, + "output": output, + }) + if user_prompt: + input_items.append({ + "type": "message", + "role": "user", + "content": user_prompt, + }) + return input_items + + def run_conv( client: httpx.Client, turns: int, @@ -371,7 +413,8 @@ def run_conv( body["previous_response_id"] = previous_response_id else: body["conversation"] = conv_id - response_id = _send(client, body, stream, proxy_url) + response_data = _send(client, body, stream, proxy_url) + response_id = response_data.get("id") if response_data else None if response_id: response_ids[turn] = response_id previous_response_id = response_id @@ -466,7 +509,8 @@ def run_mixed( body["previous_response_id"] = previous_response_id else: body["conversation"] = conv_id - previous_response_id = _send(client, body, stream, proxy_url) + response_data = _send(client, body, stream, proxy_url) + previous_response_id = response_data.get("id") if response_data else None def run_responses( @@ -479,8 +523,10 @@ def run_responses( proxy_url: str, tools: list | None = None, tool_choice: Any = None, + tool_outputs: dict[str, str] | None = None, ) -> None: response_ids: dict[int, str] = {} + responses: dict[int, dict] = {} branch_map: dict[int, int] = {} extra_branches: list[int] = [] for branch_from, branch_turn_number in branches: @@ -490,6 +536,7 @@ def run_responses( extra_branches.append(branch_from) previous_response_id: str | None = None + last_response: dict | None = None for turn in range(1, turns + 1): if turn in branch_map: branch_from = branch_map[turn] @@ -499,18 +546,32 @@ def run_responses( f"(available: {sorted(response_ids)})" ) previous_response_id = response_ids[branch_from] + last_response = responses.get(branch_from) click.echo( f"\n[Branch] turn {turn} chains from turn {branch_from} (response_id={previous_response_id})" ) prompt = _prompt(f"Turn {turn}/{turns} — enter prompt: ") - body: dict = {"model": model, "input": prompt, "stream": stream, "store": store} + + # Build input: if previous response had function calls and we have tool_outputs, + # inject function_call_output items before the user message. + pending_calls = _extract_function_calls(last_response) if tool_outputs else [] + if pending_calls and tool_outputs: + input_value: Any = _build_tool_output_input(pending_calls, tool_outputs, prompt if prompt else None) + click.echo(f" [injecting {len(pending_calls)} tool output(s) before user message]") + else: + input_value = prompt + + body: dict = {"model": model, "input": input_value, "stream": stream, "store": store} if previous_response_id and store: body["previous_response_id"] = previous_response_id _inject_tools(body, tools, tool_choice) - response_id = _send(client, body, stream, proxy_url) + response_data = _send(client, body, stream, proxy_url) + response_id = response_data.get("id") if response_data else None previous_response_id = response_id if store else None + last_response = response_data if response_id: response_ids[turn] = response_id + responses[turn] = response_data for b_idx, branch_from in enumerate(extra_branches, start=1): if branch_from not in response_ids: @@ -519,15 +580,24 @@ def run_responses( f"(available: {sorted(response_ids)})" ) branch_resp_id = response_ids[branch_from] + branch_response = responses.get(branch_from) click.echo( f"\n[Extra branch {b_idx}] from turn {branch_from} (response_id={branch_resp_id}), turn {turns + 1}" ) prompt = _prompt( f"Turn {turns + 1} (extra branch from turn {branch_from}) — enter prompt: " ) + + pending_calls = _extract_function_calls(branch_response) if tool_outputs else [] + if pending_calls and tool_outputs: + input_value = _build_tool_output_input(pending_calls, tool_outputs, prompt if prompt else None) + click.echo(f" [injecting {len(pending_calls)} tool output(s) before user message]") + else: + input_value = prompt + body = { "model": model, - "input": prompt, + "input": input_value, "stream": stream, "store": store, "previous_response_id": branch_resp_id, @@ -619,6 +689,16 @@ def run_responses( default=None, help='tool_choice value: "auto", "none", "required", or JSON e.g. \'{"type":"function","name":"foo"}\'.', ) +@click.option( + "--tool-outputs", + "tool_outputs_file", + metavar="FILE", + default=None, + type=click.Path(exists=True), + help="Path to a JSON file mapping tool names to fake output strings. " + "When provided, function_call_output items are automatically injected " + "between turns (required for OpenAI Responses API).", +) def main( turns: int, output: str, @@ -633,6 +713,7 @@ def main( vllm_url: str | None, tools_file: str | None, tool_choice_raw: str | None, + tool_outputs_file: str | None, ) -> None: """Interactive multi-turn cassette recorder (proxy embedded).""" if branch_turn_number and not branch_from: @@ -668,6 +749,14 @@ def main( else: tool_choice = stripped + tool_outputs: dict[str, str] | None = None + if tool_outputs_file: + with open(tool_outputs_file, encoding="utf-8") as f: + tool_outputs = json.load(f) + if not isinstance(tool_outputs, dict): + raise click.UsageError("--tool-outputs file must contain a JSON object (name -> output string).") + click.echo(f"Tool outputs: {list(tool_outputs.keys())}") + if vllm_url: target = vllm_url.rstrip("/") headers: dict = {} @@ -703,7 +792,7 @@ def main( elif mode == "mixed": run_mixed(client, turns, model, stream, store, proxy_url) elif mode == "responses": - run_responses(client, turns, model, stream, store, branches, proxy_url, tools, tool_choice) + run_responses(client, turns, model, stream, store, branches, proxy_url, tools, tool_choice, tool_outputs) elif mode == "store_true_then_store_false": run_store_true_then_store_false(client, turns, model, stream, proxy_url) finally: diff --git a/crates/agentic-core/tests/cassettes/tool_calls/multi_turn/openai_responses_tool_calls_3turn.yaml b/crates/agentic-core/tests/cassettes/tool_calls/multi_turn/openai_responses_tool_calls_3turn.yaml new file mode 100644 index 0000000..936d837 --- /dev/null +++ b/crates/agentic-core/tests/cassettes/tool_calls/multi_turn/openai_responses_tool_calls_3turn.yaml @@ -0,0 +1,966 @@ +turns: +- filename: t1 + request: + body: + input: You are an SRE assistant. Check the current status of ETL pipeline job-382. + model: gpt-4o + store: true + stream: false + tool_choice: auto + tools: + - description: Query the pipeline orchestrator for a job's current state, metadata, + and failure info. + name: get_job_status + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier, e.g. job-382 + type: string + required: + - job_id + type: object + strict: true + type: function + - description: Fetch error logs and stack traces for a failed pipeline job. + name: get_error_logs + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier + type: string + max_entries: + description: Maximum log entries to return (default 50) + type: integer + severity: + description: Minimum severity filter + enum: + - WARN + - ERROR + - FATAL + - ALL + type: string + required: + - job_id + - severity + - max_entries + type: object + strict: true + type: function + - description: Search internal operations runbooks for troubleshooting procedures. + name: search_runbook + parameters: + additionalProperties: false + properties: + max_results: + description: Maximum results to return (default 5) + type: integer + query: + description: Natural language search query + type: string + required: + - query + - max_results + type: object + strict: true + type: function + - description: Execute a Python analysis script in a sandboxed environment for + log parsing or data inspection. + name: run_analysis + parameters: + additionalProperties: false + properties: + code: + description: Python code to execute + type: string + timeout_seconds: + description: Execution timeout (default 30) + type: integer + required: + - code + - timeout_seconds + type: object + strict: true + type: function + - description: Restart a failed pipeline job with optional configuration overrides. + name: restart_job + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier to restart + type: string + memory_override_gb: + description: Override memory limit in GB + type: integer + priority: + description: Queue priority + enum: + - low + - normal + - high + type: string + skip_completed_stages: + description: Skip stages that completed successfully (default true) + type: boolean + required: + - job_id + - memory_override_gb + - skip_completed_stages + - priority + type: object + strict: true + type: function + - description: Search the public web for technical documentation, blog posts, + and known issues. + name: web_search + parameters: + additionalProperties: false + properties: + query: + description: Search query + type: string + required: + - query + type: object + strict: true + type: function + headers: + accept: '*/*' + authorization: Bearer *** + content-type: application/json + user-agent: python-httpx/0.28.1 + method: POST + path: /v1/responses + query_params: {} + response: + body: + background: false + billing: + payer: developer + completed_at: 1782332973 + created_at: 1782332972 + error: null + frequency_penalty: 0.0 + id: resp_053fd4e04cde0a13006a3c3e2cb15481999f99b43d8b7ba0eb + incomplete_details: null + instructions: null + max_output_tokens: null + max_tool_calls: null + metadata: {} + model: gpt-4o-2024-08-06 + moderation: null + object: response + output: + - arguments: '{"job_id":"job-382"}' + call_id: call_3X1PVcQ6ehCZzHs06wCQYUKS + id: fc_053fd4e04cde0a13006a3c3e2d915c8199bbe2658507aca5a3 + name: get_job_status + status: completed + type: function_call + parallel_tool_calls: true + presence_penalty: 0.0 + previous_response_id: null + prompt_cache_key: null + prompt_cache_retention: in_memory + reasoning: + context: null + effort: null + summary: null + safety_identifier: null + service_tier: default + status: completed + store: true + temperature: 1.0 + text: + format: + type: text + verbosity: medium + tool_choice: auto + tools: + - description: Query the pipeline orchestrator for a job's current state, metadata, + and failure info. + name: get_job_status + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier, e.g. job-382 + type: string + required: + - job_id + type: object + strict: true + type: function + - description: Fetch error logs and stack traces for a failed pipeline job. + name: get_error_logs + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier + type: string + max_entries: + description: Maximum log entries to return (default 50) + type: integer + severity: + description: Minimum severity filter + enum: + - WARN + - ERROR + - FATAL + - ALL + type: string + required: + - job_id + - severity + - max_entries + type: object + strict: true + type: function + - description: Search internal operations runbooks for troubleshooting procedures. + name: search_runbook + parameters: + additionalProperties: false + properties: + max_results: + description: Maximum results to return (default 5) + type: integer + query: + description: Natural language search query + type: string + required: + - query + - max_results + type: object + strict: true + type: function + - description: Execute a Python analysis script in a sandboxed environment for + log parsing or data inspection. + name: run_analysis + parameters: + additionalProperties: false + properties: + code: + description: Python code to execute + type: string + timeout_seconds: + description: Execution timeout (default 30) + type: integer + required: + - code + - timeout_seconds + type: object + strict: true + type: function + - description: Restart a failed pipeline job with optional configuration overrides. + name: restart_job + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier to restart + type: string + memory_override_gb: + description: Override memory limit in GB + type: integer + priority: + description: Queue priority + enum: + - low + - normal + - high + type: string + skip_completed_stages: + description: Skip stages that completed successfully (default true) + type: boolean + required: + - job_id + - memory_override_gb + - skip_completed_stages + - priority + type: object + strict: true + type: function + - description: Search the public web for technical documentation, blog posts, + and known issues. + name: web_search + parameters: + additionalProperties: false + properties: + query: + description: Search query + type: string + required: + - query + type: object + strict: true + type: function + top_logprobs: 0 + top_p: 1.0 + truncation: disabled + usage: + input_tokens: 355 + input_tokens_details: + cached_tokens: 0 + output_tokens: 19 + output_tokens_details: + reasoning_tokens: 0 + total_tokens: 374 + user: null + headers: + content-type: application/json + status_code: 200 +- filename: t2 + request: + body: + input: + - call_id: call_3X1PVcQ6ehCZzHs06wCQYUKS + output: '{"job_id": "job-382", "status": "failed", "error_type": "OutOfMemoryError", + "failed_at": "2024-01-15T03:22:11Z", "stage": "transform", "memory_used_gb": + 31.2, "memory_limit_gb": 32, "pipeline": "etl-daily-ingest", "owner": "data-platform-team"}' + type: function_call_output + - content: Now pull the error logs for that job. Use severity ERROR and max + 10 entries. + role: user + type: message + model: gpt-4o + previous_response_id: resp_053fd4e04cde0a13006a3c3e2cb15481999f99b43d8b7ba0eb + store: true + stream: false + tool_choice: auto + tools: + - description: Query the pipeline orchestrator for a job's current state, metadata, + and failure info. + name: get_job_status + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier, e.g. job-382 + type: string + required: + - job_id + type: object + strict: true + type: function + - description: Fetch error logs and stack traces for a failed pipeline job. + name: get_error_logs + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier + type: string + max_entries: + description: Maximum log entries to return (default 50) + type: integer + severity: + description: Minimum severity filter + enum: + - WARN + - ERROR + - FATAL + - ALL + type: string + required: + - job_id + - severity + - max_entries + type: object + strict: true + type: function + - description: Search internal operations runbooks for troubleshooting procedures. + name: search_runbook + parameters: + additionalProperties: false + properties: + max_results: + description: Maximum results to return (default 5) + type: integer + query: + description: Natural language search query + type: string + required: + - query + - max_results + type: object + strict: true + type: function + - description: Execute a Python analysis script in a sandboxed environment for + log parsing or data inspection. + name: run_analysis + parameters: + additionalProperties: false + properties: + code: + description: Python code to execute + type: string + timeout_seconds: + description: Execution timeout (default 30) + type: integer + required: + - code + - timeout_seconds + type: object + strict: true + type: function + - description: Restart a failed pipeline job with optional configuration overrides. + name: restart_job + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier to restart + type: string + memory_override_gb: + description: Override memory limit in GB + type: integer + priority: + description: Queue priority + enum: + - low + - normal + - high + type: string + skip_completed_stages: + description: Skip stages that completed successfully (default true) + type: boolean + required: + - job_id + - memory_override_gb + - skip_completed_stages + - priority + type: object + strict: true + type: function + - description: Search the public web for technical documentation, blog posts, + and known issues. + name: web_search + parameters: + additionalProperties: false + properties: + query: + description: Search query + type: string + required: + - query + type: object + strict: true + type: function + headers: + accept: '*/*' + authorization: Bearer *** + content-type: application/json + user-agent: python-httpx/0.28.1 + method: POST + path: /v1/responses + query_params: {} + response: + body: + background: false + billing: + payer: developer + completed_at: 1782332974 + created_at: 1782332974 + error: null + frequency_penalty: 0.0 + id: resp_053fd4e04cde0a13006a3c3e2e09fc8199b946b967b1807204 + incomplete_details: null + instructions: null + max_output_tokens: null + max_tool_calls: null + metadata: {} + model: gpt-4o-2024-08-06 + moderation: null + object: response + output: + - arguments: '{"job_id":"job-382","severity":"ERROR","max_entries":10}' + call_id: call_fOWZK3bqWYpKzLkZZ43j8CkC + id: fc_053fd4e04cde0a13006a3c3e2ea0148199bca9b82969b56771 + name: get_error_logs + status: completed + type: function_call + parallel_tool_calls: true + presence_penalty: 0.0 + previous_response_id: resp_053fd4e04cde0a13006a3c3e2cb15481999f99b43d8b7ba0eb + prompt_cache_key: null + prompt_cache_retention: in_memory + reasoning: + context: null + effort: null + summary: null + safety_identifier: null + service_tier: default + status: completed + store: true + temperature: 1.0 + text: + format: + type: text + verbosity: medium + tool_choice: auto + tools: + - description: Query the pipeline orchestrator for a job's current state, metadata, + and failure info. + name: get_job_status + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier, e.g. job-382 + type: string + required: + - job_id + type: object + strict: true + type: function + - description: Fetch error logs and stack traces for a failed pipeline job. + name: get_error_logs + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier + type: string + max_entries: + description: Maximum log entries to return (default 50) + type: integer + severity: + description: Minimum severity filter + enum: + - WARN + - ERROR + - FATAL + - ALL + type: string + required: + - job_id + - severity + - max_entries + type: object + strict: true + type: function + - description: Search internal operations runbooks for troubleshooting procedures. + name: search_runbook + parameters: + additionalProperties: false + properties: + max_results: + description: Maximum results to return (default 5) + type: integer + query: + description: Natural language search query + type: string + required: + - query + - max_results + type: object + strict: true + type: function + - description: Execute a Python analysis script in a sandboxed environment for + log parsing or data inspection. + name: run_analysis + parameters: + additionalProperties: false + properties: + code: + description: Python code to execute + type: string + timeout_seconds: + description: Execution timeout (default 30) + type: integer + required: + - code + - timeout_seconds + type: object + strict: true + type: function + - description: Restart a failed pipeline job with optional configuration overrides. + name: restart_job + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier to restart + type: string + memory_override_gb: + description: Override memory limit in GB + type: integer + priority: + description: Queue priority + enum: + - low + - normal + - high + type: string + skip_completed_stages: + description: Skip stages that completed successfully (default true) + type: boolean + required: + - job_id + - memory_override_gb + - skip_completed_stages + - priority + type: object + strict: true + type: function + - description: Search the public web for technical documentation, blog posts, + and known issues. + name: web_search + parameters: + additionalProperties: false + properties: + query: + description: Search query + type: string + required: + - query + type: object + strict: true + type: function + top_logprobs: 0 + top_p: 1.0 + truncation: disabled + usage: + input_tokens: 493 + input_tokens_details: + cached_tokens: 0 + output_tokens: 28 + output_tokens_details: + reasoning_tokens: 0 + total_tokens: 521 + user: null + headers: + content-type: application/json + status_code: 200 +- filename: t3 + request: + body: + input: + - call_id: call_fOWZK3bqWYpKzLkZZ43j8CkC + output: '[{"timestamp": "2024-01-15T03:22:09Z", "severity": "ERROR", "message": + "OutOfMemoryError at stage ''transform'': heap space exhausted (31.2GB/32GB)", + "stack_trace": "java.lang.OutOfMemoryError: Java heap space\n at org.apache.spark.memory.TaskMemoryManager.allocatePage"}, + {"timestamp": "2024-01-15T03:22:10Z", "severity": "ERROR", "message": "Task + failed: transform-partition-47 OOM after processing 2.1M rows", "stack_trace": + "org.apache.spark.SparkException: Task failed while writing rows"}, {"timestamp": + "2024-01-15T03:22:11Z", "severity": "ERROR", "message": "Job job-382 marked + FAILED after 3 consecutive OOM errors in transform stage", "stack_trace": + null}]' + type: function_call_output + - content: Based on those errors, search the runbook for troubleshooting procedures. + Max 5 results. + role: user + type: message + model: gpt-4o + previous_response_id: resp_053fd4e04cde0a13006a3c3e2e09fc8199b946b967b1807204 + store: true + stream: false + tool_choice: auto + tools: + - description: Query the pipeline orchestrator for a job's current state, metadata, + and failure info. + name: get_job_status + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier, e.g. job-382 + type: string + required: + - job_id + type: object + strict: true + type: function + - description: Fetch error logs and stack traces for a failed pipeline job. + name: get_error_logs + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier + type: string + max_entries: + description: Maximum log entries to return (default 50) + type: integer + severity: + description: Minimum severity filter + enum: + - WARN + - ERROR + - FATAL + - ALL + type: string + required: + - job_id + - severity + - max_entries + type: object + strict: true + type: function + - description: Search internal operations runbooks for troubleshooting procedures. + name: search_runbook + parameters: + additionalProperties: false + properties: + max_results: + description: Maximum results to return (default 5) + type: integer + query: + description: Natural language search query + type: string + required: + - query + - max_results + type: object + strict: true + type: function + - description: Execute a Python analysis script in a sandboxed environment for + log parsing or data inspection. + name: run_analysis + parameters: + additionalProperties: false + properties: + code: + description: Python code to execute + type: string + timeout_seconds: + description: Execution timeout (default 30) + type: integer + required: + - code + - timeout_seconds + type: object + strict: true + type: function + - description: Restart a failed pipeline job with optional configuration overrides. + name: restart_job + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier to restart + type: string + memory_override_gb: + description: Override memory limit in GB + type: integer + priority: + description: Queue priority + enum: + - low + - normal + - high + type: string + skip_completed_stages: + description: Skip stages that completed successfully (default true) + type: boolean + required: + - job_id + - memory_override_gb + - skip_completed_stages + - priority + type: object + strict: true + type: function + - description: Search the public web for technical documentation, blog posts, + and known issues. + name: web_search + parameters: + additionalProperties: false + properties: + query: + description: Search query + type: string + required: + - query + type: object + strict: true + type: function + headers: + accept: '*/*' + authorization: Bearer *** + content-type: application/json + user-agent: python-httpx/0.28.1 + method: POST + path: /v1/responses + query_params: {} + response: + body: + background: false + billing: + payer: developer + completed_at: 1782332976 + created_at: 1782332975 + error: null + frequency_penalty: 0.0 + id: resp_053fd4e04cde0a13006a3c3e2f21b481999e17dd868d5c3d17 + incomplete_details: null + instructions: null + max_output_tokens: null + max_tool_calls: null + metadata: {} + model: gpt-4o-2024-08-06 + moderation: null + object: response + output: + - arguments: '{"query":"OutOfMemoryError in Spark ETL pipeline","max_results":5}' + call_id: call_YDBEHTdaQUL2SLAoITTufq5z + id: fc_053fd4e04cde0a13006a3c3e3070e8819985588ede678d1791 + name: search_runbook + status: completed + type: function_call + parallel_tool_calls: true + presence_penalty: 0.0 + previous_response_id: resp_053fd4e04cde0a13006a3c3e2e09fc8199b946b967b1807204 + prompt_cache_key: null + prompt_cache_retention: in_memory + reasoning: + context: null + effort: null + summary: null + safety_identifier: null + service_tier: default + status: completed + store: true + temperature: 1.0 + text: + format: + type: text + verbosity: medium + tool_choice: auto + tools: + - description: Query the pipeline orchestrator for a job's current state, metadata, + and failure info. + name: get_job_status + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier, e.g. job-382 + type: string + required: + - job_id + type: object + strict: true + type: function + - description: Fetch error logs and stack traces for a failed pipeline job. + name: get_error_logs + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier + type: string + max_entries: + description: Maximum log entries to return (default 50) + type: integer + severity: + description: Minimum severity filter + enum: + - WARN + - ERROR + - FATAL + - ALL + type: string + required: + - job_id + - severity + - max_entries + type: object + strict: true + type: function + - description: Search internal operations runbooks for troubleshooting procedures. + name: search_runbook + parameters: + additionalProperties: false + properties: + max_results: + description: Maximum results to return (default 5) + type: integer + query: + description: Natural language search query + type: string + required: + - query + - max_results + type: object + strict: true + type: function + - description: Execute a Python analysis script in a sandboxed environment for + log parsing or data inspection. + name: run_analysis + parameters: + additionalProperties: false + properties: + code: + description: Python code to execute + type: string + timeout_seconds: + description: Execution timeout (default 30) + type: integer + required: + - code + - timeout_seconds + type: object + strict: true + type: function + - description: Restart a failed pipeline job with optional configuration overrides. + name: restart_job + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier to restart + type: string + memory_override_gb: + description: Override memory limit in GB + type: integer + priority: + description: Queue priority + enum: + - low + - normal + - high + type: string + skip_completed_stages: + description: Skip stages that completed successfully (default true) + type: boolean + required: + - job_id + - memory_override_gb + - skip_completed_stages + - priority + type: object + strict: true + type: function + - description: Search the public web for technical documentation, blog posts, + and known issues. + name: web_search + parameters: + additionalProperties: false + properties: + query: + description: Search query + type: string + required: + - query + type: object + strict: true + type: function + top_logprobs: 0 + top_p: 1.0 + truncation: disabled + usage: + input_tokens: 746 + input_tokens_details: + cached_tokens: 0 + output_tokens: 29 + output_tokens_details: + reasoning_tokens: 0 + total_tokens: 775 + user: null + headers: + content-type: application/json + status_code: 200 diff --git a/crates/agentic-core/tests/cassettes/tool_calls/multi_turn/openai_responses_tool_calls_3turn_streaming.yaml b/crates/agentic-core/tests/cassettes/tool_calls/multi_turn/openai_responses_tool_calls_3turn_streaming.yaml new file mode 100644 index 0000000..f501914 --- /dev/null +++ b/crates/agentic-core/tests/cassettes/tool_calls/multi_turn/openai_responses_tool_calls_3turn_streaming.yaml @@ -0,0 +1,1159 @@ +turns: +- filename: t1 + request: + body: + input: You are an SRE assistant. Check the status of pipeline job-382. + model: gpt-4o + store: true + stream: true + tool_choice: auto + tools: + - description: Query the pipeline orchestrator for a job's current state, metadata, + and failure info. + name: get_job_status + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier, e.g. job-382 + type: string + required: + - job_id + type: object + strict: true + type: function + - description: Fetch error logs and stack traces for a failed pipeline job. + name: get_error_logs + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier + type: string + max_entries: + description: Maximum log entries to return (default 50) + type: integer + severity: + description: Minimum severity filter + enum: + - WARN + - ERROR + - FATAL + - ALL + type: string + required: + - job_id + - severity + - max_entries + type: object + strict: true + type: function + - description: Search internal operations runbooks for troubleshooting procedures. + name: search_runbook + parameters: + additionalProperties: false + properties: + max_results: + description: Maximum results to return (default 5) + type: integer + query: + description: Natural language search query + type: string + required: + - query + - max_results + type: object + strict: true + type: function + - description: Execute a Python analysis script in a sandboxed environment for + log parsing or data inspection. + name: run_analysis + parameters: + additionalProperties: false + properties: + code: + description: Python code to execute + type: string + timeout_seconds: + description: Execution timeout (default 30) + type: integer + required: + - code + - timeout_seconds + type: object + strict: true + type: function + - description: Restart a failed pipeline job with optional configuration overrides. + name: restart_job + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier to restart + type: string + memory_override_gb: + description: Override memory limit in GB + type: integer + priority: + description: Queue priority + enum: + - low + - normal + - high + type: string + skip_completed_stages: + description: Skip stages that completed successfully (default true) + type: boolean + required: + - job_id + - memory_override_gb + - skip_completed_stages + - priority + type: object + strict: true + type: function + - description: Search the public web for technical documentation, blog posts, + and known issues. + name: web_search + parameters: + additionalProperties: false + properties: + query: + description: Search query + type: string + required: + - query + type: object + strict: true + type: function + headers: + accept: '*/*' + authorization: Bearer *** + content-type: application/json + user-agent: python-httpx/0.28.1 + method: POST + path: /v1/responses + query_params: {} + response: + headers: + content-type: text/event-stream; charset=utf-8 + sse: + - 'event: response.created + + ' + - 'data: {"type":"response.created","response":{"id":"resp_0e42cef53189ada4006a3c3e409e88819bae4c63a2de75dad7","object":"response","created_at":1782332992,"status":"in_progress","background":false,"completed_at":null,"error":null,"frequency_penalty":0.0,"incomplete_details":null,"instructions":null,"max_output_tokens":null,"max_tool_calls":null,"model":"gpt-4o-2024-08-06","moderation":null,"output":[],"parallel_tool_calls":true,"presence_penalty":0.0,"previous_response_id":null,"prompt_cache_key":null,"prompt_cache_retention":"in_memory","reasoning":{"context":null,"effort":null,"summary":null},"safety_identifier":null,"service_tier":"auto","store":true,"temperature":1.0,"text":{"format":{"type":"text"},"verbosity":"medium"},"tool_choice":"auto","tools":[{"type":"function","description":"Query + the pipeline orchestrator for a job''s current state, metadata, and failure + info.","name":"get_job_status","parameters":{"type":"object","properties":{"job_id":{"type":"string","description":"Pipeline + job identifier, e.g. job-382"}},"required":["job_id"],"additionalProperties":false},"strict":true},{"type":"function","description":"Fetch + error logs and stack traces for a failed pipeline job.","name":"get_error_logs","parameters":{"type":"object","properties":{"job_id":{"type":"string","description":"Pipeline + job identifier"},"severity":{"type":"string","enum":["WARN","ERROR","FATAL","ALL"],"description":"Minimum + severity filter"},"max_entries":{"type":"integer","description":"Maximum log + entries to return (default 50)"}},"required":["job_id","severity","max_entries"],"additionalProperties":false},"strict":true},{"type":"function","description":"Search + internal operations runbooks for troubleshooting procedures.","name":"search_runbook","parameters":{"type":"object","properties":{"query":{"type":"string","description":"Natural + language search query"},"max_results":{"type":"integer","description":"Maximum + results to return (default 5)"}},"required":["query","max_results"],"additionalProperties":false},"strict":true},{"type":"function","description":"Execute + a Python analysis script in a sandboxed environment for log parsing or data + inspection.","name":"run_analysis","parameters":{"type":"object","properties":{"code":{"type":"string","description":"Python + code to execute"},"timeout_seconds":{"type":"integer","description":"Execution + timeout (default 30)"}},"required":["code","timeout_seconds"],"additionalProperties":false},"strict":true},{"type":"function","description":"Restart + a failed pipeline job with optional configuration overrides.","name":"restart_job","parameters":{"type":"object","properties":{"job_id":{"type":"string","description":"Pipeline + job identifier to restart"},"memory_override_gb":{"type":"integer","description":"Override + memory limit in GB"},"skip_completed_stages":{"type":"boolean","description":"Skip + stages that completed successfully (default true)"},"priority":{"type":"string","enum":["low","normal","high"],"description":"Queue + priority"}},"required":["job_id","memory_override_gb","skip_completed_stages","priority"],"additionalProperties":false},"strict":true},{"type":"function","description":"Search + the public web for technical documentation, blog posts, and known issues.","name":"web_search","parameters":{"type":"object","properties":{"query":{"type":"string","description":"Search + query"}},"required":["query"],"additionalProperties":false},"strict":true}],"top_logprobs":0,"top_p":1.0,"truncation":"disabled","usage":null,"user":null,"metadata":{}},"sequence_number":0} + + ' + - ' + + ' + - 'event: response.in_progress + + ' + - 'data: {"type":"response.in_progress","response":{"id":"resp_0e42cef53189ada4006a3c3e409e88819bae4c63a2de75dad7","object":"response","created_at":1782332992,"status":"in_progress","background":false,"completed_at":null,"error":null,"frequency_penalty":0.0,"incomplete_details":null,"instructions":null,"max_output_tokens":null,"max_tool_calls":null,"model":"gpt-4o-2024-08-06","moderation":null,"output":[],"parallel_tool_calls":true,"presence_penalty":0.0,"previous_response_id":null,"prompt_cache_key":null,"prompt_cache_retention":"in_memory","reasoning":{"context":null,"effort":null,"summary":null},"safety_identifier":null,"service_tier":"auto","store":true,"temperature":1.0,"text":{"format":{"type":"text"},"verbosity":"medium"},"tool_choice":"auto","tools":[{"type":"function","description":"Query + the pipeline orchestrator for a job''s current state, metadata, and failure + info.","name":"get_job_status","parameters":{"type":"object","properties":{"job_id":{"type":"string","description":"Pipeline + job identifier, e.g. job-382"}},"required":["job_id"],"additionalProperties":false},"strict":true},{"type":"function","description":"Fetch + error logs and stack traces for a failed pipeline job.","name":"get_error_logs","parameters":{"type":"object","properties":{"job_id":{"type":"string","description":"Pipeline + job identifier"},"severity":{"type":"string","enum":["WARN","ERROR","FATAL","ALL"],"description":"Minimum + severity filter"},"max_entries":{"type":"integer","description":"Maximum log + entries to return (default 50)"}},"required":["job_id","severity","max_entries"],"additionalProperties":false},"strict":true},{"type":"function","description":"Search + internal operations runbooks for troubleshooting procedures.","name":"search_runbook","parameters":{"type":"object","properties":{"query":{"type":"string","description":"Natural + language search query"},"max_results":{"type":"integer","description":"Maximum + results to return (default 5)"}},"required":["query","max_results"],"additionalProperties":false},"strict":true},{"type":"function","description":"Execute + a Python analysis script in a sandboxed environment for log parsing or data + inspection.","name":"run_analysis","parameters":{"type":"object","properties":{"code":{"type":"string","description":"Python + code to execute"},"timeout_seconds":{"type":"integer","description":"Execution + timeout (default 30)"}},"required":["code","timeout_seconds"],"additionalProperties":false},"strict":true},{"type":"function","description":"Restart + a failed pipeline job with optional configuration overrides.","name":"restart_job","parameters":{"type":"object","properties":{"job_id":{"type":"string","description":"Pipeline + job identifier to restart"},"memory_override_gb":{"type":"integer","description":"Override + memory limit in GB"},"skip_completed_stages":{"type":"boolean","description":"Skip + stages that completed successfully (default true)"},"priority":{"type":"string","enum":["low","normal","high"],"description":"Queue + priority"}},"required":["job_id","memory_override_gb","skip_completed_stages","priority"],"additionalProperties":false},"strict":true},{"type":"function","description":"Search + the public web for technical documentation, blog posts, and known issues.","name":"web_search","parameters":{"type":"object","properties":{"query":{"type":"string","description":"Search + query"}},"required":["query"],"additionalProperties":false},"strict":true}],"top_logprobs":0,"top_p":1.0,"truncation":"disabled","usage":null,"user":null,"metadata":{}},"sequence_number":1} + + ' + - ' + + ' + - 'event: response.output_item.added + + ' + - 'data: {"type":"response.output_item.added","item":{"id":"fc_0e42cef53189ada4006a3c3e413cf0819baecedc47e8a4b589","type":"function_call","status":"in_progress","arguments":"","call_id":"call_m0qbDty5eurPBYlBk4SYj56W","name":"get_job_status"},"output_index":0,"sequence_number":2} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"type":"response.function_call_arguments.delta","delta":"{\"","item_id":"fc_0e42cef53189ada4006a3c3e413cf0819baecedc47e8a4b589","obfuscation":"WODcq0wkHt5FMF","output_index":0,"sequence_number":3} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"type":"response.function_call_arguments.delta","delta":"job","item_id":"fc_0e42cef53189ada4006a3c3e413cf0819baecedc47e8a4b589","obfuscation":"Jskf7BfO75Zhk","output_index":0,"sequence_number":4} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"type":"response.function_call_arguments.delta","delta":"_id","item_id":"fc_0e42cef53189ada4006a3c3e413cf0819baecedc47e8a4b589","obfuscation":"llConHT5qKDTS","output_index":0,"sequence_number":5} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"type":"response.function_call_arguments.delta","delta":"\":\"","item_id":"fc_0e42cef53189ada4006a3c3e413cf0819baecedc47e8a4b589","obfuscation":"dCtjoUPsYOrEJ","output_index":0,"sequence_number":6} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"type":"response.function_call_arguments.delta","delta":"job","item_id":"fc_0e42cef53189ada4006a3c3e413cf0819baecedc47e8a4b589","obfuscation":"FKScgiPaaGmzx","output_index":0,"sequence_number":7} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"type":"response.function_call_arguments.delta","delta":"-","item_id":"fc_0e42cef53189ada4006a3c3e413cf0819baecedc47e8a4b589","obfuscation":"JNBPd8KZl6rYnqi","output_index":0,"sequence_number":8} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"type":"response.function_call_arguments.delta","delta":"382","item_id":"fc_0e42cef53189ada4006a3c3e413cf0819baecedc47e8a4b589","obfuscation":"WtumUsbY9K08L","output_index":0,"sequence_number":9} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"type":"response.function_call_arguments.delta","delta":"\"}","item_id":"fc_0e42cef53189ada4006a3c3e413cf0819baecedc47e8a4b589","obfuscation":"7A1QatmnZVJGk8","output_index":0,"sequence_number":10} + + ' + - ' + + ' + - 'event: response.function_call_arguments.done + + ' + - 'data: {"type":"response.function_call_arguments.done","arguments":"{\"job_id\":\"job-382\"}","item_id":"fc_0e42cef53189ada4006a3c3e413cf0819baecedc47e8a4b589","output_index":0,"sequence_number":11} + + ' + - ' + + ' + - 'event: response.output_item.done + + ' + - 'data: {"type":"response.output_item.done","item":{"id":"fc_0e42cef53189ada4006a3c3e413cf0819baecedc47e8a4b589","type":"function_call","status":"completed","arguments":"{\"job_id\":\"job-382\"}","call_id":"call_m0qbDty5eurPBYlBk4SYj56W","name":"get_job_status"},"output_index":0,"sequence_number":12} + + ' + - ' + + ' + - 'event: response.completed + + ' + - 'data: {"type":"response.completed","response":{"id":"resp_0e42cef53189ada4006a3c3e409e88819bae4c63a2de75dad7","object":"response","created_at":1782332992,"status":"completed","background":false,"completed_at":1782332993,"error":null,"frequency_penalty":0.0,"incomplete_details":null,"instructions":null,"max_output_tokens":null,"max_tool_calls":null,"model":"gpt-4o-2024-08-06","moderation":null,"output":[{"id":"fc_0e42cef53189ada4006a3c3e413cf0819baecedc47e8a4b589","type":"function_call","status":"completed","arguments":"{\"job_id\":\"job-382\"}","call_id":"call_m0qbDty5eurPBYlBk4SYj56W","name":"get_job_status"}],"parallel_tool_calls":true,"presence_penalty":0.0,"previous_response_id":null,"prompt_cache_key":null,"prompt_cache_retention":"in_memory","reasoning":{"context":null,"effort":null,"summary":null},"safety_identifier":null,"service_tier":"default","store":true,"temperature":1.0,"text":{"format":{"type":"text"},"verbosity":"medium"},"tool_choice":"auto","tools":[{"type":"function","description":"Query + the pipeline orchestrator for a job''s current state, metadata, and failure + info.","name":"get_job_status","parameters":{"type":"object","properties":{"job_id":{"type":"string","description":"Pipeline + job identifier, e.g. job-382"}},"required":["job_id"],"additionalProperties":false},"strict":true},{"type":"function","description":"Fetch + error logs and stack traces for a failed pipeline job.","name":"get_error_logs","parameters":{"type":"object","properties":{"job_id":{"type":"string","description":"Pipeline + job identifier"},"severity":{"type":"string","enum":["WARN","ERROR","FATAL","ALL"],"description":"Minimum + severity filter"},"max_entries":{"type":"integer","description":"Maximum log + entries to return (default 50)"}},"required":["job_id","severity","max_entries"],"additionalProperties":false},"strict":true},{"type":"function","description":"Search + internal operations runbooks for troubleshooting procedures.","name":"search_runbook","parameters":{"type":"object","properties":{"query":{"type":"string","description":"Natural + language search query"},"max_results":{"type":"integer","description":"Maximum + results to return (default 5)"}},"required":["query","max_results"],"additionalProperties":false},"strict":true},{"type":"function","description":"Execute + a Python analysis script in a sandboxed environment for log parsing or data + inspection.","name":"run_analysis","parameters":{"type":"object","properties":{"code":{"type":"string","description":"Python + code to execute"},"timeout_seconds":{"type":"integer","description":"Execution + timeout (default 30)"}},"required":["code","timeout_seconds"],"additionalProperties":false},"strict":true},{"type":"function","description":"Restart + a failed pipeline job with optional configuration overrides.","name":"restart_job","parameters":{"type":"object","properties":{"job_id":{"type":"string","description":"Pipeline + job identifier to restart"},"memory_override_gb":{"type":"integer","description":"Override + memory limit in GB"},"skip_completed_stages":{"type":"boolean","description":"Skip + stages that completed successfully (default true)"},"priority":{"type":"string","enum":["low","normal","high"],"description":"Queue + priority"}},"required":["job_id","memory_override_gb","skip_completed_stages","priority"],"additionalProperties":false},"strict":true},{"type":"function","description":"Search + the public web for technical documentation, blog posts, and known issues.","name":"web_search","parameters":{"type":"object","properties":{"query":{"type":"string","description":"Search + query"}},"required":["query"],"additionalProperties":false},"strict":true}],"top_logprobs":0,"top_p":1.0,"truncation":"disabled","usage":{"input_tokens":352,"input_tokens_details":{"cached_tokens":0},"output_tokens":19,"output_tokens_details":{"reasoning_tokens":0},"total_tokens":371},"user":null,"metadata":{}},"sequence_number":13} + + ' + - ' + + ' + status_code: 200 +- filename: t2 + request: + body: + input: + - call_id: call_m0qbDty5eurPBYlBk4SYj56W + output: '{"job_id": "job-382", "status": "failed", "error_type": "OutOfMemoryError", + "failed_at": "2024-01-15T03:22:11Z", "stage": "transform", "memory_used_gb": + 31.2, "memory_limit_gb": 32, "pipeline": "etl-daily-ingest", "owner": "data-platform-team"}' + type: function_call_output + - content: Get the error logs for that job with severity FATAL and max 5 entries. + role: user + type: message + model: gpt-4o + previous_response_id: resp_0e42cef53189ada4006a3c3e409e88819bae4c63a2de75dad7 + store: true + stream: true + tool_choice: auto + tools: + - description: Query the pipeline orchestrator for a job's current state, metadata, + and failure info. + name: get_job_status + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier, e.g. job-382 + type: string + required: + - job_id + type: object + strict: true + type: function + - description: Fetch error logs and stack traces for a failed pipeline job. + name: get_error_logs + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier + type: string + max_entries: + description: Maximum log entries to return (default 50) + type: integer + severity: + description: Minimum severity filter + enum: + - WARN + - ERROR + - FATAL + - ALL + type: string + required: + - job_id + - severity + - max_entries + type: object + strict: true + type: function + - description: Search internal operations runbooks for troubleshooting procedures. + name: search_runbook + parameters: + additionalProperties: false + properties: + max_results: + description: Maximum results to return (default 5) + type: integer + query: + description: Natural language search query + type: string + required: + - query + - max_results + type: object + strict: true + type: function + - description: Execute a Python analysis script in a sandboxed environment for + log parsing or data inspection. + name: run_analysis + parameters: + additionalProperties: false + properties: + code: + description: Python code to execute + type: string + timeout_seconds: + description: Execution timeout (default 30) + type: integer + required: + - code + - timeout_seconds + type: object + strict: true + type: function + - description: Restart a failed pipeline job with optional configuration overrides. + name: restart_job + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier to restart + type: string + memory_override_gb: + description: Override memory limit in GB + type: integer + priority: + description: Queue priority + enum: + - low + - normal + - high + type: string + skip_completed_stages: + description: Skip stages that completed successfully (default true) + type: boolean + required: + - job_id + - memory_override_gb + - skip_completed_stages + - priority + type: object + strict: true + type: function + - description: Search the public web for technical documentation, blog posts, + and known issues. + name: web_search + parameters: + additionalProperties: false + properties: + query: + description: Search query + type: string + required: + - query + type: object + strict: true + type: function + headers: + accept: '*/*' + authorization: Bearer *** + content-type: application/json + user-agent: python-httpx/0.28.1 + method: POST + path: /v1/responses + query_params: {} + response: + headers: + content-type: text/event-stream; charset=utf-8 + sse: + - 'event: response.created + + ' + - 'data: {"type":"response.created","response":{"id":"resp_0e42cef53189ada4006a3c3e41a210819b9302803f31f7ce3e","object":"response","created_at":1782332993,"status":"in_progress","background":false,"completed_at":null,"error":null,"frequency_penalty":0.0,"incomplete_details":null,"instructions":null,"max_output_tokens":null,"max_tool_calls":null,"model":"gpt-4o-2024-08-06","moderation":null,"output":[],"parallel_tool_calls":true,"presence_penalty":0.0,"previous_response_id":"resp_0e42cef53189ada4006a3c3e409e88819bae4c63a2de75dad7","prompt_cache_key":null,"prompt_cache_retention":"in_memory","reasoning":{"context":null,"effort":null,"summary":null},"safety_identifier":null,"service_tier":"auto","store":true,"temperature":1.0,"text":{"format":{"type":"text"},"verbosity":"medium"},"tool_choice":"auto","tools":[{"type":"function","description":"Query + the pipeline orchestrator for a job''s current state, metadata, and failure + info.","name":"get_job_status","parameters":{"type":"object","properties":{"job_id":{"type":"string","description":"Pipeline + job identifier, e.g. job-382"}},"required":["job_id"],"additionalProperties":false},"strict":true},{"type":"function","description":"Fetch + error logs and stack traces for a failed pipeline job.","name":"get_error_logs","parameters":{"type":"object","properties":{"job_id":{"type":"string","description":"Pipeline + job identifier"},"severity":{"type":"string","enum":["WARN","ERROR","FATAL","ALL"],"description":"Minimum + severity filter"},"max_entries":{"type":"integer","description":"Maximum log + entries to return (default 50)"}},"required":["job_id","severity","max_entries"],"additionalProperties":false},"strict":true},{"type":"function","description":"Search + internal operations runbooks for troubleshooting procedures.","name":"search_runbook","parameters":{"type":"object","properties":{"query":{"type":"string","description":"Natural + language search query"},"max_results":{"type":"integer","description":"Maximum + results to return (default 5)"}},"required":["query","max_results"],"additionalProperties":false},"strict":true},{"type":"function","description":"Execute + a Python analysis script in a sandboxed environment for log parsing or data + inspection.","name":"run_analysis","parameters":{"type":"object","properties":{"code":{"type":"string","description":"Python + code to execute"},"timeout_seconds":{"type":"integer","description":"Execution + timeout (default 30)"}},"required":["code","timeout_seconds"],"additionalProperties":false},"strict":true},{"type":"function","description":"Restart + a failed pipeline job with optional configuration overrides.","name":"restart_job","parameters":{"type":"object","properties":{"job_id":{"type":"string","description":"Pipeline + job identifier to restart"},"memory_override_gb":{"type":"integer","description":"Override + memory limit in GB"},"skip_completed_stages":{"type":"boolean","description":"Skip + stages that completed successfully (default true)"},"priority":{"type":"string","enum":["low","normal","high"],"description":"Queue + priority"}},"required":["job_id","memory_override_gb","skip_completed_stages","priority"],"additionalProperties":false},"strict":true},{"type":"function","description":"Search + the public web for technical documentation, blog posts, and known issues.","name":"web_search","parameters":{"type":"object","properties":{"query":{"type":"string","description":"Search + query"}},"required":["query"],"additionalProperties":false},"strict":true}],"top_logprobs":0,"top_p":1.0,"truncation":"disabled","usage":null,"user":null,"metadata":{}},"sequence_number":0} + + ' + - ' + + ' + - 'event: response.in_progress + + ' + - 'data: {"type":"response.in_progress","response":{"id":"resp_0e42cef53189ada4006a3c3e41a210819b9302803f31f7ce3e","object":"response","created_at":1782332993,"status":"in_progress","background":false,"completed_at":null,"error":null,"frequency_penalty":0.0,"incomplete_details":null,"instructions":null,"max_output_tokens":null,"max_tool_calls":null,"model":"gpt-4o-2024-08-06","moderation":null,"output":[],"parallel_tool_calls":true,"presence_penalty":0.0,"previous_response_id":"resp_0e42cef53189ada4006a3c3e409e88819bae4c63a2de75dad7","prompt_cache_key":null,"prompt_cache_retention":"in_memory","reasoning":{"context":null,"effort":null,"summary":null},"safety_identifier":null,"service_tier":"auto","store":true,"temperature":1.0,"text":{"format":{"type":"text"},"verbosity":"medium"},"tool_choice":"auto","tools":[{"type":"function","description":"Query + the pipeline orchestrator for a job''s current state, metadata, and failure + info.","name":"get_job_status","parameters":{"type":"object","properties":{"job_id":{"type":"string","description":"Pipeline + job identifier, e.g. job-382"}},"required":["job_id"],"additionalProperties":false},"strict":true},{"type":"function","description":"Fetch + error logs and stack traces for a failed pipeline job.","name":"get_error_logs","parameters":{"type":"object","properties":{"job_id":{"type":"string","description":"Pipeline + job identifier"},"severity":{"type":"string","enum":["WARN","ERROR","FATAL","ALL"],"description":"Minimum + severity filter"},"max_entries":{"type":"integer","description":"Maximum log + entries to return (default 50)"}},"required":["job_id","severity","max_entries"],"additionalProperties":false},"strict":true},{"type":"function","description":"Search + internal operations runbooks for troubleshooting procedures.","name":"search_runbook","parameters":{"type":"object","properties":{"query":{"type":"string","description":"Natural + language search query"},"max_results":{"type":"integer","description":"Maximum + results to return (default 5)"}},"required":["query","max_results"],"additionalProperties":false},"strict":true},{"type":"function","description":"Execute + a Python analysis script in a sandboxed environment for log parsing or data + inspection.","name":"run_analysis","parameters":{"type":"object","properties":{"code":{"type":"string","description":"Python + code to execute"},"timeout_seconds":{"type":"integer","description":"Execution + timeout (default 30)"}},"required":["code","timeout_seconds"],"additionalProperties":false},"strict":true},{"type":"function","description":"Restart + a failed pipeline job with optional configuration overrides.","name":"restart_job","parameters":{"type":"object","properties":{"job_id":{"type":"string","description":"Pipeline + job identifier to restart"},"memory_override_gb":{"type":"integer","description":"Override + memory limit in GB"},"skip_completed_stages":{"type":"boolean","description":"Skip + stages that completed successfully (default true)"},"priority":{"type":"string","enum":["low","normal","high"],"description":"Queue + priority"}},"required":["job_id","memory_override_gb","skip_completed_stages","priority"],"additionalProperties":false},"strict":true},{"type":"function","description":"Search + the public web for technical documentation, blog posts, and known issues.","name":"web_search","parameters":{"type":"object","properties":{"query":{"type":"string","description":"Search + query"}},"required":["query"],"additionalProperties":false},"strict":true}],"top_logprobs":0,"top_p":1.0,"truncation":"disabled","usage":null,"user":null,"metadata":{}},"sequence_number":1} + + ' + - ' + + ' + - 'event: response.output_item.added + + ' + - 'data: {"type":"response.output_item.added","item":{"id":"fc_0e42cef53189ada4006a3c3e42f9dc819bbf2e41d26f4a6f5c","type":"function_call","status":"in_progress","arguments":"","call_id":"call_aq4lgp6yhETf6w1Zpj2SHntC","name":"get_error_logs"},"output_index":0,"sequence_number":2} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"type":"response.function_call_arguments.delta","delta":"{\"","item_id":"fc_0e42cef53189ada4006a3c3e42f9dc819bbf2e41d26f4a6f5c","obfuscation":"iLw0NMihFPMApr","output_index":0,"sequence_number":3} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"type":"response.function_call_arguments.delta","delta":"job","item_id":"fc_0e42cef53189ada4006a3c3e42f9dc819bbf2e41d26f4a6f5c","obfuscation":"FjGAyVccdn2gu","output_index":0,"sequence_number":4} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"type":"response.function_call_arguments.delta","delta":"_id","item_id":"fc_0e42cef53189ada4006a3c3e42f9dc819bbf2e41d26f4a6f5c","obfuscation":"Aa3qC3zt8w6mm","output_index":0,"sequence_number":5} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"type":"response.function_call_arguments.delta","delta":"\":\"","item_id":"fc_0e42cef53189ada4006a3c3e42f9dc819bbf2e41d26f4a6f5c","obfuscation":"EtsFDF4zrqZ0g","output_index":0,"sequence_number":6} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"type":"response.function_call_arguments.delta","delta":"job","item_id":"fc_0e42cef53189ada4006a3c3e42f9dc819bbf2e41d26f4a6f5c","obfuscation":"HIZZBI73fLyOf","output_index":0,"sequence_number":7} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"type":"response.function_call_arguments.delta","delta":"-","item_id":"fc_0e42cef53189ada4006a3c3e42f9dc819bbf2e41d26f4a6f5c","obfuscation":"yhGCenW9GMXRXZe","output_index":0,"sequence_number":8} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"type":"response.function_call_arguments.delta","delta":"382","item_id":"fc_0e42cef53189ada4006a3c3e42f9dc819bbf2e41d26f4a6f5c","obfuscation":"XE8ryIoPINJff","output_index":0,"sequence_number":9} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"type":"response.function_call_arguments.delta","delta":"\",\"","item_id":"fc_0e42cef53189ada4006a3c3e42f9dc819bbf2e41d26f4a6f5c","obfuscation":"zSI6VXrNICvg3","output_index":0,"sequence_number":10} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"type":"response.function_call_arguments.delta","delta":"severity","item_id":"fc_0e42cef53189ada4006a3c3e42f9dc819bbf2e41d26f4a6f5c","obfuscation":"a53mC2Si","output_index":0,"sequence_number":11} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"type":"response.function_call_arguments.delta","delta":"\":\"","item_id":"fc_0e42cef53189ada4006a3c3e42f9dc819bbf2e41d26f4a6f5c","obfuscation":"aLEgybiscXeEF","output_index":0,"sequence_number":12} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"type":"response.function_call_arguments.delta","delta":"F","item_id":"fc_0e42cef53189ada4006a3c3e42f9dc819bbf2e41d26f4a6f5c","obfuscation":"99FsVG7EViie3lm","output_index":0,"sequence_number":13} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"type":"response.function_call_arguments.delta","delta":"ATAL","item_id":"fc_0e42cef53189ada4006a3c3e42f9dc819bbf2e41d26f4a6f5c","obfuscation":"ijPqMdZnQWIX","output_index":0,"sequence_number":14} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"type":"response.function_call_arguments.delta","delta":"\",\"","item_id":"fc_0e42cef53189ada4006a3c3e42f9dc819bbf2e41d26f4a6f5c","obfuscation":"jskgORycqIzNC","output_index":0,"sequence_number":15} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"type":"response.function_call_arguments.delta","delta":"max","item_id":"fc_0e42cef53189ada4006a3c3e42f9dc819bbf2e41d26f4a6f5c","obfuscation":"DHRNHXN9lsOCk","output_index":0,"sequence_number":16} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"type":"response.function_call_arguments.delta","delta":"_entries","item_id":"fc_0e42cef53189ada4006a3c3e42f9dc819bbf2e41d26f4a6f5c","obfuscation":"y1xNGfLu","output_index":0,"sequence_number":17} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"type":"response.function_call_arguments.delta","delta":"\":","item_id":"fc_0e42cef53189ada4006a3c3e42f9dc819bbf2e41d26f4a6f5c","obfuscation":"ckcPggGOsxFHbN","output_index":0,"sequence_number":18} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"type":"response.function_call_arguments.delta","delta":"5","item_id":"fc_0e42cef53189ada4006a3c3e42f9dc819bbf2e41d26f4a6f5c","obfuscation":"h0gQDgoxY4rBOoo","output_index":0,"sequence_number":19} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"type":"response.function_call_arguments.delta","delta":"}","item_id":"fc_0e42cef53189ada4006a3c3e42f9dc819bbf2e41d26f4a6f5c","obfuscation":"GeNkUbiOsYTk0uI","output_index":0,"sequence_number":20} + + ' + - ' + + ' + - 'event: response.function_call_arguments.done + + ' + - 'data: {"type":"response.function_call_arguments.done","arguments":"{\"job_id\":\"job-382\",\"severity\":\"FATAL\",\"max_entries\":5}","item_id":"fc_0e42cef53189ada4006a3c3e42f9dc819bbf2e41d26f4a6f5c","output_index":0,"sequence_number":21} + + ' + - ' + + ' + - 'event: response.output_item.done + + ' + - 'data: {"type":"response.output_item.done","item":{"id":"fc_0e42cef53189ada4006a3c3e42f9dc819bbf2e41d26f4a6f5c","type":"function_call","status":"completed","arguments":"{\"job_id\":\"job-382\",\"severity\":\"FATAL\",\"max_entries\":5}","call_id":"call_aq4lgp6yhETf6w1Zpj2SHntC","name":"get_error_logs"},"output_index":0,"sequence_number":22} + + ' + - ' + + ' + - 'event: response.completed + + ' + - 'data: {"type":"response.completed","response":{"id":"resp_0e42cef53189ada4006a3c3e41a210819b9302803f31f7ce3e","object":"response","created_at":1782332993,"status":"completed","background":false,"completed_at":1782332995,"error":null,"frequency_penalty":0.0,"incomplete_details":null,"instructions":null,"max_output_tokens":null,"max_tool_calls":null,"model":"gpt-4o-2024-08-06","moderation":null,"output":[{"id":"fc_0e42cef53189ada4006a3c3e42f9dc819bbf2e41d26f4a6f5c","type":"function_call","status":"completed","arguments":"{\"job_id\":\"job-382\",\"severity\":\"FATAL\",\"max_entries\":5}","call_id":"call_aq4lgp6yhETf6w1Zpj2SHntC","name":"get_error_logs"}],"parallel_tool_calls":true,"presence_penalty":0.0,"previous_response_id":"resp_0e42cef53189ada4006a3c3e409e88819bae4c63a2de75dad7","prompt_cache_key":null,"prompt_cache_retention":"in_memory","reasoning":{"context":null,"effort":null,"summary":null},"safety_identifier":null,"service_tier":"default","store":true,"temperature":1.0,"text":{"format":{"type":"text"},"verbosity":"medium"},"tool_choice":"auto","tools":[{"type":"function","description":"Query + the pipeline orchestrator for a job''s current state, metadata, and failure + info.","name":"get_job_status","parameters":{"type":"object","properties":{"job_id":{"type":"string","description":"Pipeline + job identifier, e.g. job-382"}},"required":["job_id"],"additionalProperties":false},"strict":true},{"type":"function","description":"Fetch + error logs and stack traces for a failed pipeline job.","name":"get_error_logs","parameters":{"type":"object","properties":{"job_id":{"type":"string","description":"Pipeline + job identifier"},"severity":{"type":"string","enum":["WARN","ERROR","FATAL","ALL"],"description":"Minimum + severity filter"},"max_entries":{"type":"integer","description":"Maximum log + entries to return (default 50)"}},"required":["job_id","severity","max_entries"],"additionalProperties":false},"strict":true},{"type":"function","description":"Search + internal operations runbooks for troubleshooting procedures.","name":"search_runbook","parameters":{"type":"object","properties":{"query":{"type":"string","description":"Natural + language search query"},"max_results":{"type":"integer","description":"Maximum + results to return (default 5)"}},"required":["query","max_results"],"additionalProperties":false},"strict":true},{"type":"function","description":"Execute + a Python analysis script in a sandboxed environment for log parsing or data + inspection.","name":"run_analysis","parameters":{"type":"object","properties":{"code":{"type":"string","description":"Python + code to execute"},"timeout_seconds":{"type":"integer","description":"Execution + timeout (default 30)"}},"required":["code","timeout_seconds"],"additionalProperties":false},"strict":true},{"type":"function","description":"Restart + a failed pipeline job with optional configuration overrides.","name":"restart_job","parameters":{"type":"object","properties":{"job_id":{"type":"string","description":"Pipeline + job identifier to restart"},"memory_override_gb":{"type":"integer","description":"Override + memory limit in GB"},"skip_completed_stages":{"type":"boolean","description":"Skip + stages that completed successfully (default true)"},"priority":{"type":"string","enum":["low","normal","high"],"description":"Queue + priority"}},"required":["job_id","memory_override_gb","skip_completed_stages","priority"],"additionalProperties":false},"strict":true},{"type":"function","description":"Search + the public web for technical documentation, blog posts, and known issues.","name":"web_search","parameters":{"type":"object","properties":{"query":{"type":"string","description":"Search + query"}},"required":["query"],"additionalProperties":false},"strict":true}],"top_logprobs":0,"top_p":1.0,"truncation":"disabled","usage":{"input_tokens":489,"input_tokens_details":{"cached_tokens":0},"output_tokens":29,"output_tokens_details":{"reasoning_tokens":0},"total_tokens":518},"user":null,"metadata":{}},"sequence_number":23} + + ' + - ' + + ' + status_code: 200 +- filename: t3 + request: + body: + input: + - call_id: call_aq4lgp6yhETf6w1Zpj2SHntC + output: '[{"timestamp": "2024-01-15T03:22:09Z", "severity": "ERROR", "message": + "OutOfMemoryError at stage ''transform'': heap space exhausted (31.2GB/32GB)", + "stack_trace": "java.lang.OutOfMemoryError: Java heap space\n at org.apache.spark.memory.TaskMemoryManager.allocatePage"}, + {"timestamp": "2024-01-15T03:22:10Z", "severity": "ERROR", "message": "Task + failed: transform-partition-47 OOM after processing 2.1M rows", "stack_trace": + "org.apache.spark.SparkException: Task failed while writing rows"}, {"timestamp": + "2024-01-15T03:22:11Z", "severity": "ERROR", "message": "Job job-382 marked + FAILED after 3 consecutive OOM errors in transform stage", "stack_trace": + null}]' + type: function_call_output + - content: Search the web for how to fix that type of error in Spark pipelines. + role: user + type: message + model: gpt-4o + previous_response_id: resp_0e42cef53189ada4006a3c3e41a210819b9302803f31f7ce3e + store: true + stream: true + tool_choice: auto + tools: + - description: Query the pipeline orchestrator for a job's current state, metadata, + and failure info. + name: get_job_status + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier, e.g. job-382 + type: string + required: + - job_id + type: object + strict: true + type: function + - description: Fetch error logs and stack traces for a failed pipeline job. + name: get_error_logs + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier + type: string + max_entries: + description: Maximum log entries to return (default 50) + type: integer + severity: + description: Minimum severity filter + enum: + - WARN + - ERROR + - FATAL + - ALL + type: string + required: + - job_id + - severity + - max_entries + type: object + strict: true + type: function + - description: Search internal operations runbooks for troubleshooting procedures. + name: search_runbook + parameters: + additionalProperties: false + properties: + max_results: + description: Maximum results to return (default 5) + type: integer + query: + description: Natural language search query + type: string + required: + - query + - max_results + type: object + strict: true + type: function + - description: Execute a Python analysis script in a sandboxed environment for + log parsing or data inspection. + name: run_analysis + parameters: + additionalProperties: false + properties: + code: + description: Python code to execute + type: string + timeout_seconds: + description: Execution timeout (default 30) + type: integer + required: + - code + - timeout_seconds + type: object + strict: true + type: function + - description: Restart a failed pipeline job with optional configuration overrides. + name: restart_job + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier to restart + type: string + memory_override_gb: + description: Override memory limit in GB + type: integer + priority: + description: Queue priority + enum: + - low + - normal + - high + type: string + skip_completed_stages: + description: Skip stages that completed successfully (default true) + type: boolean + required: + - job_id + - memory_override_gb + - skip_completed_stages + - priority + type: object + strict: true + type: function + - description: Search the public web for technical documentation, blog posts, + and known issues. + name: web_search + parameters: + additionalProperties: false + properties: + query: + description: Search query + type: string + required: + - query + type: object + strict: true + type: function + headers: + accept: '*/*' + authorization: Bearer *** + content-type: application/json + user-agent: python-httpx/0.28.1 + method: POST + path: /v1/responses + query_params: {} + response: + headers: + content-type: text/event-stream; charset=utf-8 + sse: + - 'event: response.created + + ' + - 'data: {"type":"response.created","response":{"id":"resp_0e42cef53189ada4006a3c3e437b4c819bb1448a01a9362641","object":"response","created_at":1782332995,"status":"in_progress","background":false,"completed_at":null,"error":null,"frequency_penalty":0.0,"incomplete_details":null,"instructions":null,"max_output_tokens":null,"max_tool_calls":null,"model":"gpt-4o-2024-08-06","moderation":null,"output":[],"parallel_tool_calls":true,"presence_penalty":0.0,"previous_response_id":"resp_0e42cef53189ada4006a3c3e41a210819b9302803f31f7ce3e","prompt_cache_key":null,"prompt_cache_retention":"in_memory","reasoning":{"context":null,"effort":null,"summary":null},"safety_identifier":null,"service_tier":"auto","store":true,"temperature":1.0,"text":{"format":{"type":"text"},"verbosity":"medium"},"tool_choice":"auto","tools":[{"type":"function","description":"Query + the pipeline orchestrator for a job''s current state, metadata, and failure + info.","name":"get_job_status","parameters":{"type":"object","properties":{"job_id":{"type":"string","description":"Pipeline + job identifier, e.g. job-382"}},"required":["job_id"],"additionalProperties":false},"strict":true},{"type":"function","description":"Fetch + error logs and stack traces for a failed pipeline job.","name":"get_error_logs","parameters":{"type":"object","properties":{"job_id":{"type":"string","description":"Pipeline + job identifier"},"severity":{"type":"string","enum":["WARN","ERROR","FATAL","ALL"],"description":"Minimum + severity filter"},"max_entries":{"type":"integer","description":"Maximum log + entries to return (default 50)"}},"required":["job_id","severity","max_entries"],"additionalProperties":false},"strict":true},{"type":"function","description":"Search + internal operations runbooks for troubleshooting procedures.","name":"search_runbook","parameters":{"type":"object","properties":{"query":{"type":"string","description":"Natural + language search query"},"max_results":{"type":"integer","description":"Maximum + results to return (default 5)"}},"required":["query","max_results"],"additionalProperties":false},"strict":true},{"type":"function","description":"Execute + a Python analysis script in a sandboxed environment for log parsing or data + inspection.","name":"run_analysis","parameters":{"type":"object","properties":{"code":{"type":"string","description":"Python + code to execute"},"timeout_seconds":{"type":"integer","description":"Execution + timeout (default 30)"}},"required":["code","timeout_seconds"],"additionalProperties":false},"strict":true},{"type":"function","description":"Restart + a failed pipeline job with optional configuration overrides.","name":"restart_job","parameters":{"type":"object","properties":{"job_id":{"type":"string","description":"Pipeline + job identifier to restart"},"memory_override_gb":{"type":"integer","description":"Override + memory limit in GB"},"skip_completed_stages":{"type":"boolean","description":"Skip + stages that completed successfully (default true)"},"priority":{"type":"string","enum":["low","normal","high"],"description":"Queue + priority"}},"required":["job_id","memory_override_gb","skip_completed_stages","priority"],"additionalProperties":false},"strict":true},{"type":"function","description":"Search + the public web for technical documentation, blog posts, and known issues.","name":"web_search","parameters":{"type":"object","properties":{"query":{"type":"string","description":"Search + query"}},"required":["query"],"additionalProperties":false},"strict":true}],"top_logprobs":0,"top_p":1.0,"truncation":"disabled","usage":null,"user":null,"metadata":{}},"sequence_number":0} + + ' + - ' + + ' + - 'event: response.in_progress + + ' + - 'data: {"type":"response.in_progress","response":{"id":"resp_0e42cef53189ada4006a3c3e437b4c819bb1448a01a9362641","object":"response","created_at":1782332995,"status":"in_progress","background":false,"completed_at":null,"error":null,"frequency_penalty":0.0,"incomplete_details":null,"instructions":null,"max_output_tokens":null,"max_tool_calls":null,"model":"gpt-4o-2024-08-06","moderation":null,"output":[],"parallel_tool_calls":true,"presence_penalty":0.0,"previous_response_id":"resp_0e42cef53189ada4006a3c3e41a210819b9302803f31f7ce3e","prompt_cache_key":null,"prompt_cache_retention":"in_memory","reasoning":{"context":null,"effort":null,"summary":null},"safety_identifier":null,"service_tier":"auto","store":true,"temperature":1.0,"text":{"format":{"type":"text"},"verbosity":"medium"},"tool_choice":"auto","tools":[{"type":"function","description":"Query + the pipeline orchestrator for a job''s current state, metadata, and failure + info.","name":"get_job_status","parameters":{"type":"object","properties":{"job_id":{"type":"string","description":"Pipeline + job identifier, e.g. job-382"}},"required":["job_id"],"additionalProperties":false},"strict":true},{"type":"function","description":"Fetch + error logs and stack traces for a failed pipeline job.","name":"get_error_logs","parameters":{"type":"object","properties":{"job_id":{"type":"string","description":"Pipeline + job identifier"},"severity":{"type":"string","enum":["WARN","ERROR","FATAL","ALL"],"description":"Minimum + severity filter"},"max_entries":{"type":"integer","description":"Maximum log + entries to return (default 50)"}},"required":["job_id","severity","max_entries"],"additionalProperties":false},"strict":true},{"type":"function","description":"Search + internal operations runbooks for troubleshooting procedures.","name":"search_runbook","parameters":{"type":"object","properties":{"query":{"type":"string","description":"Natural + language search query"},"max_results":{"type":"integer","description":"Maximum + results to return (default 5)"}},"required":["query","max_results"],"additionalProperties":false},"strict":true},{"type":"function","description":"Execute + a Python analysis script in a sandboxed environment for log parsing or data + inspection.","name":"run_analysis","parameters":{"type":"object","properties":{"code":{"type":"string","description":"Python + code to execute"},"timeout_seconds":{"type":"integer","description":"Execution + timeout (default 30)"}},"required":["code","timeout_seconds"],"additionalProperties":false},"strict":true},{"type":"function","description":"Restart + a failed pipeline job with optional configuration overrides.","name":"restart_job","parameters":{"type":"object","properties":{"job_id":{"type":"string","description":"Pipeline + job identifier to restart"},"memory_override_gb":{"type":"integer","description":"Override + memory limit in GB"},"skip_completed_stages":{"type":"boolean","description":"Skip + stages that completed successfully (default true)"},"priority":{"type":"string","enum":["low","normal","high"],"description":"Queue + priority"}},"required":["job_id","memory_override_gb","skip_completed_stages","priority"],"additionalProperties":false},"strict":true},{"type":"function","description":"Search + the public web for technical documentation, blog posts, and known issues.","name":"web_search","parameters":{"type":"object","properties":{"query":{"type":"string","description":"Search + query"}},"required":["query"],"additionalProperties":false},"strict":true}],"top_logprobs":0,"top_p":1.0,"truncation":"disabled","usage":null,"user":null,"metadata":{}},"sequence_number":1} + + ' + - ' + + ' + - 'event: response.output_item.added + + ' + - 'data: {"type":"response.output_item.added","item":{"id":"fc_0e42cef53189ada4006a3c3e449634819bb12e15903e5c908c","type":"function_call","status":"in_progress","arguments":"","call_id":"call_qsJ9rSGOQCBFJB5BS1GNhiE5","name":"web_search"},"output_index":0,"sequence_number":2} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"type":"response.function_call_arguments.delta","delta":"{\"","item_id":"fc_0e42cef53189ada4006a3c3e449634819bb12e15903e5c908c","obfuscation":"cMDEYfYOr0JfYj","output_index":0,"sequence_number":3} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"type":"response.function_call_arguments.delta","delta":"query","item_id":"fc_0e42cef53189ada4006a3c3e449634819bb12e15903e5c908c","obfuscation":"Ef5ze955AFg","output_index":0,"sequence_number":4} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"type":"response.function_call_arguments.delta","delta":"\":\"","item_id":"fc_0e42cef53189ada4006a3c3e449634819bb12e15903e5c908c","obfuscation":"0Dbr6cTYwR2JZ","output_index":0,"sequence_number":5} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"type":"response.function_call_arguments.delta","delta":"how","item_id":"fc_0e42cef53189ada4006a3c3e449634819bb12e15903e5c908c","obfuscation":"gcfzzZLIVwMCZ","output_index":0,"sequence_number":6} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"type":"response.function_call_arguments.delta","delta":" to","item_id":"fc_0e42cef53189ada4006a3c3e449634819bb12e15903e5c908c","obfuscation":"4UwsnTZlgYlMc","output_index":0,"sequence_number":7} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"type":"response.function_call_arguments.delta","delta":" fix","item_id":"fc_0e42cef53189ada4006a3c3e449634819bb12e15903e5c908c","obfuscation":"pLBDJkxRXVD5","output_index":0,"sequence_number":8} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"type":"response.function_call_arguments.delta","delta":" Out","item_id":"fc_0e42cef53189ada4006a3c3e449634819bb12e15903e5c908c","obfuscation":"c9Tfq6056S7M","output_index":0,"sequence_number":9} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"type":"response.function_call_arguments.delta","delta":"Of","item_id":"fc_0e42cef53189ada4006a3c3e449634819bb12e15903e5c908c","obfuscation":"D2vVlttcDF40sT","output_index":0,"sequence_number":10} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"type":"response.function_call_arguments.delta","delta":"Memory","item_id":"fc_0e42cef53189ada4006a3c3e449634819bb12e15903e5c908c","obfuscation":"wF7wqcvXQC","output_index":0,"sequence_number":11} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"type":"response.function_call_arguments.delta","delta":"Error","item_id":"fc_0e42cef53189ada4006a3c3e449634819bb12e15903e5c908c","obfuscation":"onxosRL9d1Q","output_index":0,"sequence_number":12} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"type":"response.function_call_arguments.delta","delta":" in","item_id":"fc_0e42cef53189ada4006a3c3e449634819bb12e15903e5c908c","obfuscation":"q62JXm0Jsdm2y","output_index":0,"sequence_number":13} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"type":"response.function_call_arguments.delta","delta":" Spark","item_id":"fc_0e42cef53189ada4006a3c3e449634819bb12e15903e5c908c","obfuscation":"WJIU4dpRMI","output_index":0,"sequence_number":14} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"type":"response.function_call_arguments.delta","delta":" pipeline","item_id":"fc_0e42cef53189ada4006a3c3e449634819bb12e15903e5c908c","obfuscation":"SuIqBKP","output_index":0,"sequence_number":15} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"type":"response.function_call_arguments.delta","delta":"\"}","item_id":"fc_0e42cef53189ada4006a3c3e449634819bb12e15903e5c908c","obfuscation":"G3bqmOIR56xYlS","output_index":0,"sequence_number":16} + + ' + - ' + + ' + - 'event: response.function_call_arguments.done + + ' + - 'data: {"type":"response.function_call_arguments.done","arguments":"{\"query\":\"how + to fix OutOfMemoryError in Spark pipeline\"}","item_id":"fc_0e42cef53189ada4006a3c3e449634819bb12e15903e5c908c","output_index":0,"sequence_number":17} + + ' + - ' + + ' + - 'event: response.output_item.done + + ' + - 'data: {"type":"response.output_item.done","item":{"id":"fc_0e42cef53189ada4006a3c3e449634819bb12e15903e5c908c","type":"function_call","status":"completed","arguments":"{\"query\":\"how + to fix OutOfMemoryError in Spark pipeline\"}","call_id":"call_qsJ9rSGOQCBFJB5BS1GNhiE5","name":"web_search"},"output_index":0,"sequence_number":18} + + ' + - ' + + ' + - 'event: response.completed + + ' + - 'data: {"type":"response.completed","response":{"id":"resp_0e42cef53189ada4006a3c3e437b4c819bb1448a01a9362641","object":"response","created_at":1782332995,"status":"completed","background":false,"completed_at":1782332996,"error":null,"frequency_penalty":0.0,"incomplete_details":null,"instructions":null,"max_output_tokens":null,"max_tool_calls":null,"model":"gpt-4o-2024-08-06","moderation":null,"output":[{"id":"fc_0e42cef53189ada4006a3c3e449634819bb12e15903e5c908c","type":"function_call","status":"completed","arguments":"{\"query\":\"how + to fix OutOfMemoryError in Spark pipeline\"}","call_id":"call_qsJ9rSGOQCBFJB5BS1GNhiE5","name":"web_search"}],"parallel_tool_calls":true,"presence_penalty":0.0,"previous_response_id":"resp_0e42cef53189ada4006a3c3e41a210819b9302803f31f7ce3e","prompt_cache_key":null,"prompt_cache_retention":"in_memory","reasoning":{"context":null,"effort":null,"summary":null},"safety_identifier":null,"service_tier":"default","store":true,"temperature":1.0,"text":{"format":{"type":"text"},"verbosity":"medium"},"tool_choice":"auto","tools":[{"type":"function","description":"Query + the pipeline orchestrator for a job''s current state, metadata, and failure + info.","name":"get_job_status","parameters":{"type":"object","properties":{"job_id":{"type":"string","description":"Pipeline + job identifier, e.g. job-382"}},"required":["job_id"],"additionalProperties":false},"strict":true},{"type":"function","description":"Fetch + error logs and stack traces for a failed pipeline job.","name":"get_error_logs","parameters":{"type":"object","properties":{"job_id":{"type":"string","description":"Pipeline + job identifier"},"severity":{"type":"string","enum":["WARN","ERROR","FATAL","ALL"],"description":"Minimum + severity filter"},"max_entries":{"type":"integer","description":"Maximum log + entries to return (default 50)"}},"required":["job_id","severity","max_entries"],"additionalProperties":false},"strict":true},{"type":"function","description":"Search + internal operations runbooks for troubleshooting procedures.","name":"search_runbook","parameters":{"type":"object","properties":{"query":{"type":"string","description":"Natural + language search query"},"max_results":{"type":"integer","description":"Maximum + results to return (default 5)"}},"required":["query","max_results"],"additionalProperties":false},"strict":true},{"type":"function","description":"Execute + a Python analysis script in a sandboxed environment for log parsing or data + inspection.","name":"run_analysis","parameters":{"type":"object","properties":{"code":{"type":"string","description":"Python + code to execute"},"timeout_seconds":{"type":"integer","description":"Execution + timeout (default 30)"}},"required":["code","timeout_seconds"],"additionalProperties":false},"strict":true},{"type":"function","description":"Restart + a failed pipeline job with optional configuration overrides.","name":"restart_job","parameters":{"type":"object","properties":{"job_id":{"type":"string","description":"Pipeline + job identifier to restart"},"memory_override_gb":{"type":"integer","description":"Override + memory limit in GB"},"skip_completed_stages":{"type":"boolean","description":"Skip + stages that completed successfully (default true)"},"priority":{"type":"string","enum":["low","normal","high"],"description":"Queue + priority"}},"required":["job_id","memory_override_gb","skip_completed_stages","priority"],"additionalProperties":false},"strict":true},{"type":"function","description":"Search + the public web for technical documentation, blog posts, and known issues.","name":"web_search","parameters":{"type":"object","properties":{"query":{"type":"string","description":"Search + query"}},"required":["query"],"additionalProperties":false},"strict":true}],"top_logprobs":0,"top_p":1.0,"truncation":"disabled","usage":{"input_tokens":740,"input_tokens_details":{"cached_tokens":0},"output_tokens":24,"output_tokens_details":{"reasoning_tokens":0},"total_tokens":764},"user":null,"metadata":{}},"sequence_number":19} + + ' + - ' + + ' + status_code: 200 diff --git a/crates/agentic-core/tests/cassettes/tool_calls/multi_turn/openai_responses_tool_calls_5turn.yaml b/crates/agentic-core/tests/cassettes/tool_calls/multi_turn/openai_responses_tool_calls_5turn.yaml new file mode 100644 index 0000000..f65edc5 --- /dev/null +++ b/crates/agentic-core/tests/cassettes/tool_calls/multi_turn/openai_responses_tool_calls_5turn.yaml @@ -0,0 +1,1619 @@ +turns: +- filename: t1 + request: + body: + input: You are an SRE assistant. ETL pipeline job-382 failed overnight. What + is its current status? + model: gpt-4o + store: true + stream: false + tool_choice: auto + tools: + - description: Query the pipeline orchestrator for a job's current state, metadata, + and failure info. + name: get_job_status + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier, e.g. job-382 + type: string + required: + - job_id + type: object + strict: true + type: function + - description: Fetch error logs and stack traces for a failed pipeline job. + name: get_error_logs + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier + type: string + max_entries: + description: Maximum log entries to return (default 50) + type: integer + severity: + description: Minimum severity filter + enum: + - WARN + - ERROR + - FATAL + - ALL + type: string + required: + - job_id + - severity + - max_entries + type: object + strict: true + type: function + - description: Search internal operations runbooks for troubleshooting procedures. + name: search_runbook + parameters: + additionalProperties: false + properties: + max_results: + description: Maximum results to return (default 5) + type: integer + query: + description: Natural language search query + type: string + required: + - query + - max_results + type: object + strict: true + type: function + - description: Execute a Python analysis script in a sandboxed environment for + log parsing or data inspection. + name: run_analysis + parameters: + additionalProperties: false + properties: + code: + description: Python code to execute + type: string + timeout_seconds: + description: Execution timeout (default 30) + type: integer + required: + - code + - timeout_seconds + type: object + strict: true + type: function + - description: Restart a failed pipeline job with optional configuration overrides. + name: restart_job + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier to restart + type: string + memory_override_gb: + description: Override memory limit in GB + type: integer + priority: + description: Queue priority + enum: + - low + - normal + - high + type: string + skip_completed_stages: + description: Skip stages that completed successfully (default true) + type: boolean + required: + - job_id + - memory_override_gb + - skip_completed_stages + - priority + type: object + strict: true + type: function + - description: Search the public web for technical documentation, blog posts, + and known issues. + name: web_search + parameters: + additionalProperties: false + properties: + query: + description: Search query + type: string + required: + - query + type: object + strict: true + type: function + headers: + accept: '*/*' + authorization: Bearer *** + content-type: application/json + user-agent: python-httpx/0.28.1 + method: POST + path: /v1/responses + query_params: {} + response: + body: + background: false + billing: + payer: developer + completed_at: 1782332981 + created_at: 1782332978 + error: null + frequency_penalty: 0.0 + id: resp_05ffdb1c1fbb4099006a3c3e321f04819b8f5178e57393bdbb + incomplete_details: null + instructions: null + max_output_tokens: null + max_tool_calls: null + metadata: {} + model: gpt-4o-2024-08-06 + moderation: null + object: response + output: + - arguments: '{"job_id":"job-382"}' + call_id: call_zUYtpSPtiZDIA0xgaf9MKQsL + id: fc_05ffdb1c1fbb4099006a3c3e3569f8819b8303dec8c3c61e7f + name: get_job_status + status: completed + type: function_call + parallel_tool_calls: true + presence_penalty: 0.0 + previous_response_id: null + prompt_cache_key: null + prompt_cache_retention: in_memory + reasoning: + context: null + effort: null + summary: null + safety_identifier: null + service_tier: default + status: completed + store: true + temperature: 1.0 + text: + format: + type: text + verbosity: medium + tool_choice: auto + tools: + - description: Query the pipeline orchestrator for a job's current state, metadata, + and failure info. + name: get_job_status + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier, e.g. job-382 + type: string + required: + - job_id + type: object + strict: true + type: function + - description: Fetch error logs and stack traces for a failed pipeline job. + name: get_error_logs + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier + type: string + max_entries: + description: Maximum log entries to return (default 50) + type: integer + severity: + description: Minimum severity filter + enum: + - WARN + - ERROR + - FATAL + - ALL + type: string + required: + - job_id + - severity + - max_entries + type: object + strict: true + type: function + - description: Search internal operations runbooks for troubleshooting procedures. + name: search_runbook + parameters: + additionalProperties: false + properties: + max_results: + description: Maximum results to return (default 5) + type: integer + query: + description: Natural language search query + type: string + required: + - query + - max_results + type: object + strict: true + type: function + - description: Execute a Python analysis script in a sandboxed environment for + log parsing or data inspection. + name: run_analysis + parameters: + additionalProperties: false + properties: + code: + description: Python code to execute + type: string + timeout_seconds: + description: Execution timeout (default 30) + type: integer + required: + - code + - timeout_seconds + type: object + strict: true + type: function + - description: Restart a failed pipeline job with optional configuration overrides. + name: restart_job + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier to restart + type: string + memory_override_gb: + description: Override memory limit in GB + type: integer + priority: + description: Queue priority + enum: + - low + - normal + - high + type: string + skip_completed_stages: + description: Skip stages that completed successfully (default true) + type: boolean + required: + - job_id + - memory_override_gb + - skip_completed_stages + - priority + type: object + strict: true + type: function + - description: Search the public web for technical documentation, blog posts, + and known issues. + name: web_search + parameters: + additionalProperties: false + properties: + query: + description: Search query + type: string + required: + - query + type: object + strict: true + type: function + top_logprobs: 0 + top_p: 1.0 + truncation: disabled + usage: + input_tokens: 358 + input_tokens_details: + cached_tokens: 0 + output_tokens: 19 + output_tokens_details: + reasoning_tokens: 0 + total_tokens: 377 + user: null + headers: + content-type: application/json + status_code: 200 +- filename: t2 + request: + body: + input: + - call_id: call_zUYtpSPtiZDIA0xgaf9MKQsL + output: '{"job_id": "job-382", "status": "failed", "error_type": "OutOfMemoryError", + "failed_at": "2024-01-15T03:22:11Z", "stage": "transform", "memory_used_gb": + 31.2, "memory_limit_gb": 32, "pipeline": "etl-daily-ingest", "owner": "data-platform-team"}' + type: function_call_output + - content: Pull the error logs for that failed job. Use severity ERROR and max + 20 entries. + role: user + type: message + model: gpt-4o + previous_response_id: resp_05ffdb1c1fbb4099006a3c3e321f04819b8f5178e57393bdbb + store: true + stream: false + tool_choice: auto + tools: + - description: Query the pipeline orchestrator for a job's current state, metadata, + and failure info. + name: get_job_status + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier, e.g. job-382 + type: string + required: + - job_id + type: object + strict: true + type: function + - description: Fetch error logs and stack traces for a failed pipeline job. + name: get_error_logs + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier + type: string + max_entries: + description: Maximum log entries to return (default 50) + type: integer + severity: + description: Minimum severity filter + enum: + - WARN + - ERROR + - FATAL + - ALL + type: string + required: + - job_id + - severity + - max_entries + type: object + strict: true + type: function + - description: Search internal operations runbooks for troubleshooting procedures. + name: search_runbook + parameters: + additionalProperties: false + properties: + max_results: + description: Maximum results to return (default 5) + type: integer + query: + description: Natural language search query + type: string + required: + - query + - max_results + type: object + strict: true + type: function + - description: Execute a Python analysis script in a sandboxed environment for + log parsing or data inspection. + name: run_analysis + parameters: + additionalProperties: false + properties: + code: + description: Python code to execute + type: string + timeout_seconds: + description: Execution timeout (default 30) + type: integer + required: + - code + - timeout_seconds + type: object + strict: true + type: function + - description: Restart a failed pipeline job with optional configuration overrides. + name: restart_job + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier to restart + type: string + memory_override_gb: + description: Override memory limit in GB + type: integer + priority: + description: Queue priority + enum: + - low + - normal + - high + type: string + skip_completed_stages: + description: Skip stages that completed successfully (default true) + type: boolean + required: + - job_id + - memory_override_gb + - skip_completed_stages + - priority + type: object + strict: true + type: function + - description: Search the public web for technical documentation, blog posts, + and known issues. + name: web_search + parameters: + additionalProperties: false + properties: + query: + description: Search query + type: string + required: + - query + type: object + strict: true + type: function + headers: + accept: '*/*' + authorization: Bearer *** + content-type: application/json + user-agent: python-httpx/0.28.1 + method: POST + path: /v1/responses + query_params: {} + response: + body: + background: false + billing: + payer: developer + completed_at: 1782332984 + created_at: 1782332981 + error: null + frequency_penalty: 0.0 + id: resp_05ffdb1c1fbb4099006a3c3e35df20819b8b6fb4d784425cbd + incomplete_details: null + instructions: null + max_output_tokens: null + max_tool_calls: null + metadata: {} + model: gpt-4o-2024-08-06 + moderation: null + object: response + output: + - arguments: '{"job_id":"job-382","severity":"ERROR","max_entries":20}' + call_id: call_bO7EyX3P64ApXE1D3og5t24S + id: fc_05ffdb1c1fbb4099006a3c3e37da80819bab37e6b91f212f18 + name: get_error_logs + status: completed + type: function_call + parallel_tool_calls: true + presence_penalty: 0.0 + previous_response_id: resp_05ffdb1c1fbb4099006a3c3e321f04819b8f5178e57393bdbb + prompt_cache_key: null + prompt_cache_retention: in_memory + reasoning: + context: null + effort: null + summary: null + safety_identifier: null + service_tier: default + status: completed + store: true + temperature: 1.0 + text: + format: + type: text + verbosity: medium + tool_choice: auto + tools: + - description: Query the pipeline orchestrator for a job's current state, metadata, + and failure info. + name: get_job_status + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier, e.g. job-382 + type: string + required: + - job_id + type: object + strict: true + type: function + - description: Fetch error logs and stack traces for a failed pipeline job. + name: get_error_logs + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier + type: string + max_entries: + description: Maximum log entries to return (default 50) + type: integer + severity: + description: Minimum severity filter + enum: + - WARN + - ERROR + - FATAL + - ALL + type: string + required: + - job_id + - severity + - max_entries + type: object + strict: true + type: function + - description: Search internal operations runbooks for troubleshooting procedures. + name: search_runbook + parameters: + additionalProperties: false + properties: + max_results: + description: Maximum results to return (default 5) + type: integer + query: + description: Natural language search query + type: string + required: + - query + - max_results + type: object + strict: true + type: function + - description: Execute a Python analysis script in a sandboxed environment for + log parsing or data inspection. + name: run_analysis + parameters: + additionalProperties: false + properties: + code: + description: Python code to execute + type: string + timeout_seconds: + description: Execution timeout (default 30) + type: integer + required: + - code + - timeout_seconds + type: object + strict: true + type: function + - description: Restart a failed pipeline job with optional configuration overrides. + name: restart_job + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier to restart + type: string + memory_override_gb: + description: Override memory limit in GB + type: integer + priority: + description: Queue priority + enum: + - low + - normal + - high + type: string + skip_completed_stages: + description: Skip stages that completed successfully (default true) + type: boolean + required: + - job_id + - memory_override_gb + - skip_completed_stages + - priority + type: object + strict: true + type: function + - description: Search the public web for technical documentation, blog posts, + and known issues. + name: web_search + parameters: + additionalProperties: false + properties: + query: + description: Search query + type: string + required: + - query + type: object + strict: true + type: function + top_logprobs: 0 + top_p: 1.0 + truncation: disabled + usage: + input_tokens: 496 + input_tokens_details: + cached_tokens: 0 + output_tokens: 28 + output_tokens_details: + reasoning_tokens: 0 + total_tokens: 524 + user: null + headers: + content-type: application/json + status_code: 200 +- filename: t3 + request: + body: + input: + - call_id: call_bO7EyX3P64ApXE1D3og5t24S + output: '[{"timestamp": "2024-01-15T03:22:09Z", "severity": "ERROR", "message": + "OutOfMemoryError at stage ''transform'': heap space exhausted (31.2GB/32GB)", + "stack_trace": "java.lang.OutOfMemoryError: Java heap space\n at org.apache.spark.memory.TaskMemoryManager.allocatePage"}, + {"timestamp": "2024-01-15T03:22:10Z", "severity": "ERROR", "message": "Task + failed: transform-partition-47 OOM after processing 2.1M rows", "stack_trace": + "org.apache.spark.SparkException: Task failed while writing rows"}, {"timestamp": + "2024-01-15T03:22:11Z", "severity": "ERROR", "message": "Job job-382 marked + FAILED after 3 consecutive OOM errors in transform stage", "stack_trace": + null}]' + type: function_call_output + - content: Search the runbook for how to fix the issue found in those logs. + Max 5 results. + role: user + type: message + model: gpt-4o + previous_response_id: resp_05ffdb1c1fbb4099006a3c3e35df20819b8b6fb4d784425cbd + store: true + stream: false + tool_choice: auto + tools: + - description: Query the pipeline orchestrator for a job's current state, metadata, + and failure info. + name: get_job_status + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier, e.g. job-382 + type: string + required: + - job_id + type: object + strict: true + type: function + - description: Fetch error logs and stack traces for a failed pipeline job. + name: get_error_logs + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier + type: string + max_entries: + description: Maximum log entries to return (default 50) + type: integer + severity: + description: Minimum severity filter + enum: + - WARN + - ERROR + - FATAL + - ALL + type: string + required: + - job_id + - severity + - max_entries + type: object + strict: true + type: function + - description: Search internal operations runbooks for troubleshooting procedures. + name: search_runbook + parameters: + additionalProperties: false + properties: + max_results: + description: Maximum results to return (default 5) + type: integer + query: + description: Natural language search query + type: string + required: + - query + - max_results + type: object + strict: true + type: function + - description: Execute a Python analysis script in a sandboxed environment for + log parsing or data inspection. + name: run_analysis + parameters: + additionalProperties: false + properties: + code: + description: Python code to execute + type: string + timeout_seconds: + description: Execution timeout (default 30) + type: integer + required: + - code + - timeout_seconds + type: object + strict: true + type: function + - description: Restart a failed pipeline job with optional configuration overrides. + name: restart_job + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier to restart + type: string + memory_override_gb: + description: Override memory limit in GB + type: integer + priority: + description: Queue priority + enum: + - low + - normal + - high + type: string + skip_completed_stages: + description: Skip stages that completed successfully (default true) + type: boolean + required: + - job_id + - memory_override_gb + - skip_completed_stages + - priority + type: object + strict: true + type: function + - description: Search the public web for technical documentation, blog posts, + and known issues. + name: web_search + parameters: + additionalProperties: false + properties: + query: + description: Search query + type: string + required: + - query + type: object + strict: true + type: function + headers: + accept: '*/*' + authorization: Bearer *** + content-type: application/json + user-agent: python-httpx/0.28.1 + method: POST + path: /v1/responses + query_params: {} + response: + body: + background: false + billing: + payer: developer + completed_at: 1782332987 + created_at: 1782332984 + error: null + frequency_penalty: 0.0 + id: resp_05ffdb1c1fbb4099006a3c3e38693c819bb3f436b554202a58 + incomplete_details: null + instructions: null + max_output_tokens: null + max_tool_calls: null + metadata: {} + model: gpt-4o-2024-08-06 + moderation: null + object: response + output: + - arguments: '{"query":"OutOfMemoryError heap space transform stage","max_results":5}' + call_id: call_05MIRpeAmcRcfWqIo2fNo0qL + id: fc_05ffdb1c1fbb4099006a3c3e3aca44819b90eed8647106b319 + name: search_runbook + status: completed + type: function_call + parallel_tool_calls: true + presence_penalty: 0.0 + previous_response_id: resp_05ffdb1c1fbb4099006a3c3e35df20819b8b6fb4d784425cbd + prompt_cache_key: null + prompt_cache_retention: in_memory + reasoning: + context: null + effort: null + summary: null + safety_identifier: null + service_tier: default + status: completed + store: true + temperature: 1.0 + text: + format: + type: text + verbosity: medium + tool_choice: auto + tools: + - description: Query the pipeline orchestrator for a job's current state, metadata, + and failure info. + name: get_job_status + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier, e.g. job-382 + type: string + required: + - job_id + type: object + strict: true + type: function + - description: Fetch error logs and stack traces for a failed pipeline job. + name: get_error_logs + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier + type: string + max_entries: + description: Maximum log entries to return (default 50) + type: integer + severity: + description: Minimum severity filter + enum: + - WARN + - ERROR + - FATAL + - ALL + type: string + required: + - job_id + - severity + - max_entries + type: object + strict: true + type: function + - description: Search internal operations runbooks for troubleshooting procedures. + name: search_runbook + parameters: + additionalProperties: false + properties: + max_results: + description: Maximum results to return (default 5) + type: integer + query: + description: Natural language search query + type: string + required: + - query + - max_results + type: object + strict: true + type: function + - description: Execute a Python analysis script in a sandboxed environment for + log parsing or data inspection. + name: run_analysis + parameters: + additionalProperties: false + properties: + code: + description: Python code to execute + type: string + timeout_seconds: + description: Execution timeout (default 30) + type: integer + required: + - code + - timeout_seconds + type: object + strict: true + type: function + - description: Restart a failed pipeline job with optional configuration overrides. + name: restart_job + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier to restart + type: string + memory_override_gb: + description: Override memory limit in GB + type: integer + priority: + description: Queue priority + enum: + - low + - normal + - high + type: string + skip_completed_stages: + description: Skip stages that completed successfully (default true) + type: boolean + required: + - job_id + - memory_override_gb + - skip_completed_stages + - priority + type: object + strict: true + type: function + - description: Search the public web for technical documentation, blog posts, + and known issues. + name: web_search + parameters: + additionalProperties: false + properties: + query: + description: Search query + type: string + required: + - query + type: object + strict: true + type: function + top_logprobs: 0 + top_p: 1.0 + truncation: disabled + usage: + input_tokens: 751 + input_tokens_details: + cached_tokens: 0 + output_tokens: 28 + output_tokens_details: + reasoning_tokens: 0 + total_tokens: 779 + user: null + headers: + content-type: application/json + status_code: 200 +- filename: t4 + request: + body: + input: + - call_id: call_05MIRpeAmcRcfWqIo2fNo0qL + output: '[{"title": "OOM Troubleshooting for ETL Pipelines", "path": "/runbooks/data-platform/oom-etl.md", + "snippet": "When transform stage OOMs: 1) Check partition skew with get_partition_stats, + 2) Increase executor memory (default 32GB -> 64GB), 3) Enable adaptive query + execution"}, {"title": "Memory Limit Override Procedures", "path": "/runbooks/data-platform/memory-overrides.md", + "snippet": "To increase memory: use restart_job with memory_override_gb. + Standard escalation: 32->64->128. Requires approval above 128GB."}]' + type: function_call_output + - content: 'Run this analysis code to summarize: import json; print(json.dumps({''job'': + ''job-382'', ''error'': ''OOM'', ''stage'': ''transform'', ''recommendation'': + ''increase memory to 64GB''}))' + role: user + type: message + model: gpt-4o + previous_response_id: resp_05ffdb1c1fbb4099006a3c3e38693c819bb3f436b554202a58 + store: true + stream: false + tool_choice: auto + tools: + - description: Query the pipeline orchestrator for a job's current state, metadata, + and failure info. + name: get_job_status + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier, e.g. job-382 + type: string + required: + - job_id + type: object + strict: true + type: function + - description: Fetch error logs and stack traces for a failed pipeline job. + name: get_error_logs + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier + type: string + max_entries: + description: Maximum log entries to return (default 50) + type: integer + severity: + description: Minimum severity filter + enum: + - WARN + - ERROR + - FATAL + - ALL + type: string + required: + - job_id + - severity + - max_entries + type: object + strict: true + type: function + - description: Search internal operations runbooks for troubleshooting procedures. + name: search_runbook + parameters: + additionalProperties: false + properties: + max_results: + description: Maximum results to return (default 5) + type: integer + query: + description: Natural language search query + type: string + required: + - query + - max_results + type: object + strict: true + type: function + - description: Execute a Python analysis script in a sandboxed environment for + log parsing or data inspection. + name: run_analysis + parameters: + additionalProperties: false + properties: + code: + description: Python code to execute + type: string + timeout_seconds: + description: Execution timeout (default 30) + type: integer + required: + - code + - timeout_seconds + type: object + strict: true + type: function + - description: Restart a failed pipeline job with optional configuration overrides. + name: restart_job + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier to restart + type: string + memory_override_gb: + description: Override memory limit in GB + type: integer + priority: + description: Queue priority + enum: + - low + - normal + - high + type: string + skip_completed_stages: + description: Skip stages that completed successfully (default true) + type: boolean + required: + - job_id + - memory_override_gb + - skip_completed_stages + - priority + type: object + strict: true + type: function + - description: Search the public web for technical documentation, blog posts, + and known issues. + name: web_search + parameters: + additionalProperties: false + properties: + query: + description: Search query + type: string + required: + - query + type: object + strict: true + type: function + headers: + accept: '*/*' + authorization: Bearer *** + content-type: application/json + user-agent: python-httpx/0.28.1 + method: POST + path: /v1/responses + query_params: {} + response: + body: + background: false + billing: + payer: developer + completed_at: 1782332989 + created_at: 1782332987 + error: null + frequency_penalty: 0.0 + id: resp_05ffdb1c1fbb4099006a3c3e3ba4b0819ba847aac847612c9c + incomplete_details: null + instructions: null + max_output_tokens: null + max_tool_calls: null + metadata: {} + model: gpt-4o-2024-08-06 + moderation: null + object: response + output: + - arguments: '{"code":"import json\nprint(json.dumps({''job'': ''job-382'', + ''error'': ''OOM'', ''stage'': ''transform'', ''recommendation'': ''increase + memory to 64GB''}))","timeout_seconds":30}' + call_id: call_aLDbBvIdBv8z49RXYjC0bpk7 + id: fc_05ffdb1c1fbb4099006a3c3e3d4dbc819b86d79c1e871caafc + name: run_analysis + status: completed + type: function_call + parallel_tool_calls: true + presence_penalty: 0.0 + previous_response_id: resp_05ffdb1c1fbb4099006a3c3e38693c819bb3f436b554202a58 + prompt_cache_key: null + prompt_cache_retention: in_memory + reasoning: + context: null + effort: null + summary: null + safety_identifier: null + service_tier: default + status: completed + store: true + temperature: 1.0 + text: + format: + type: text + verbosity: medium + tool_choice: auto + tools: + - description: Query the pipeline orchestrator for a job's current state, metadata, + and failure info. + name: get_job_status + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier, e.g. job-382 + type: string + required: + - job_id + type: object + strict: true + type: function + - description: Fetch error logs and stack traces for a failed pipeline job. + name: get_error_logs + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier + type: string + max_entries: + description: Maximum log entries to return (default 50) + type: integer + severity: + description: Minimum severity filter + enum: + - WARN + - ERROR + - FATAL + - ALL + type: string + required: + - job_id + - severity + - max_entries + type: object + strict: true + type: function + - description: Search internal operations runbooks for troubleshooting procedures. + name: search_runbook + parameters: + additionalProperties: false + properties: + max_results: + description: Maximum results to return (default 5) + type: integer + query: + description: Natural language search query + type: string + required: + - query + - max_results + type: object + strict: true + type: function + - description: Execute a Python analysis script in a sandboxed environment for + log parsing or data inspection. + name: run_analysis + parameters: + additionalProperties: false + properties: + code: + description: Python code to execute + type: string + timeout_seconds: + description: Execution timeout (default 30) + type: integer + required: + - code + - timeout_seconds + type: object + strict: true + type: function + - description: Restart a failed pipeline job with optional configuration overrides. + name: restart_job + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier to restart + type: string + memory_override_gb: + description: Override memory limit in GB + type: integer + priority: + description: Queue priority + enum: + - low + - normal + - high + type: string + skip_completed_stages: + description: Skip stages that completed successfully (default true) + type: boolean + required: + - job_id + - memory_override_gb + - skip_completed_stages + - priority + type: object + strict: true + type: function + - description: Search the public web for technical documentation, blog posts, + and known issues. + name: web_search + parameters: + additionalProperties: false + properties: + query: + description: Search query + type: string + required: + - query + type: object + strict: true + type: function + top_logprobs: 0 + top_p: 1.0 + truncation: disabled + usage: + input_tokens: 970 + input_tokens_details: + cached_tokens: 0 + output_tokens: 58 + output_tokens_details: + reasoning_tokens: 0 + total_tokens: 1028 + user: null + headers: + content-type: application/json + status_code: 200 +- filename: t5 + request: + body: + input: + - call_id: call_aLDbBvIdBv8z49RXYjC0bpk7 + output: '{"stdout": "Failed stage: transform, count: 2\nPattern: consistent + OOM at partition-47 (2.1M rows)\nRecommendation: increase memory to 64GB + or repartition", "stderr": "", "exit_code": 0, "duration_ms": 120}' + type: function_call_output + - content: Great. Now restart it with 64 GB memory, skip completed stages, and + high priority. + role: user + type: message + model: gpt-4o + previous_response_id: resp_05ffdb1c1fbb4099006a3c3e3ba4b0819ba847aac847612c9c + store: true + stream: false + tool_choice: auto + tools: + - description: Query the pipeline orchestrator for a job's current state, metadata, + and failure info. + name: get_job_status + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier, e.g. job-382 + type: string + required: + - job_id + type: object + strict: true + type: function + - description: Fetch error logs and stack traces for a failed pipeline job. + name: get_error_logs + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier + type: string + max_entries: + description: Maximum log entries to return (default 50) + type: integer + severity: + description: Minimum severity filter + enum: + - WARN + - ERROR + - FATAL + - ALL + type: string + required: + - job_id + - severity + - max_entries + type: object + strict: true + type: function + - description: Search internal operations runbooks for troubleshooting procedures. + name: search_runbook + parameters: + additionalProperties: false + properties: + max_results: + description: Maximum results to return (default 5) + type: integer + query: + description: Natural language search query + type: string + required: + - query + - max_results + type: object + strict: true + type: function + - description: Execute a Python analysis script in a sandboxed environment for + log parsing or data inspection. + name: run_analysis + parameters: + additionalProperties: false + properties: + code: + description: Python code to execute + type: string + timeout_seconds: + description: Execution timeout (default 30) + type: integer + required: + - code + - timeout_seconds + type: object + strict: true + type: function + - description: Restart a failed pipeline job with optional configuration overrides. + name: restart_job + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier to restart + type: string + memory_override_gb: + description: Override memory limit in GB + type: integer + priority: + description: Queue priority + enum: + - low + - normal + - high + type: string + skip_completed_stages: + description: Skip stages that completed successfully (default true) + type: boolean + required: + - job_id + - memory_override_gb + - skip_completed_stages + - priority + type: object + strict: true + type: function + - description: Search the public web for technical documentation, blog posts, + and known issues. + name: web_search + parameters: + additionalProperties: false + properties: + query: + description: Search query + type: string + required: + - query + type: object + strict: true + type: function + headers: + accept: '*/*' + authorization: Bearer *** + content-type: application/json + user-agent: python-httpx/0.28.1 + method: POST + path: /v1/responses + query_params: {} + response: + body: + background: false + billing: + payer: developer + completed_at: 1782332991 + created_at: 1782332990 + error: null + frequency_penalty: 0.0 + id: resp_05ffdb1c1fbb4099006a3c3e3e2ed8819b977b9a9f508f2751 + incomplete_details: null + instructions: null + max_output_tokens: null + max_tool_calls: null + metadata: {} + model: gpt-4o-2024-08-06 + moderation: null + object: response + output: + - arguments: '{"job_id":"job-382","memory_override_gb":64,"skip_completed_stages":true,"priority":"high"}' + call_id: call_he6F2Xnx5vqdxnN2LS3rc5Dj + id: fc_05ffdb1c1fbb4099006a3c3e3ed060819b82206a634d7c8dd3 + name: restart_job + status: completed + type: function_call + parallel_tool_calls: true + presence_penalty: 0.0 + previous_response_id: resp_05ffdb1c1fbb4099006a3c3e3ba4b0819ba847aac847612c9c + prompt_cache_key: null + prompt_cache_retention: in_memory + reasoning: + context: null + effort: null + summary: null + safety_identifier: null + service_tier: default + status: completed + store: true + temperature: 1.0 + text: + format: + type: text + verbosity: medium + tool_choice: auto + tools: + - description: Query the pipeline orchestrator for a job's current state, metadata, + and failure info. + name: get_job_status + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier, e.g. job-382 + type: string + required: + - job_id + type: object + strict: true + type: function + - description: Fetch error logs and stack traces for a failed pipeline job. + name: get_error_logs + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier + type: string + max_entries: + description: Maximum log entries to return (default 50) + type: integer + severity: + description: Minimum severity filter + enum: + - WARN + - ERROR + - FATAL + - ALL + type: string + required: + - job_id + - severity + - max_entries + type: object + strict: true + type: function + - description: Search internal operations runbooks for troubleshooting procedures. + name: search_runbook + parameters: + additionalProperties: false + properties: + max_results: + description: Maximum results to return (default 5) + type: integer + query: + description: Natural language search query + type: string + required: + - query + - max_results + type: object + strict: true + type: function + - description: Execute a Python analysis script in a sandboxed environment for + log parsing or data inspection. + name: run_analysis + parameters: + additionalProperties: false + properties: + code: + description: Python code to execute + type: string + timeout_seconds: + description: Execution timeout (default 30) + type: integer + required: + - code + - timeout_seconds + type: object + strict: true + type: function + - description: Restart a failed pipeline job with optional configuration overrides. + name: restart_job + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier to restart + type: string + memory_override_gb: + description: Override memory limit in GB + type: integer + priority: + description: Queue priority + enum: + - low + - normal + - high + type: string + skip_completed_stages: + description: Skip stages that completed successfully (default true) + type: boolean + required: + - job_id + - memory_override_gb + - skip_completed_stages + - priority + type: object + strict: true + type: function + - description: Search the public web for technical documentation, blog posts, + and known issues. + name: web_search + parameters: + additionalProperties: false + properties: + query: + description: Search query + type: string + required: + - query + type: object + strict: true + type: function + top_logprobs: 0 + top_p: 1.0 + truncation: disabled + usage: + input_tokens: 1120 + input_tokens_details: + cached_tokens: 0 + output_tokens: 36 + output_tokens_details: + reasoning_tokens: 0 + total_tokens: 1156 + user: null + headers: + content-type: application/json + status_code: 200 diff --git a/crates/agentic-core/tests/cassettes/tool_calls/multi_turn/openai_responses_tool_calls_branch.yaml b/crates/agentic-core/tests/cassettes/tool_calls/multi_turn/openai_responses_tool_calls_branch.yaml new file mode 100644 index 0000000..3a946cd --- /dev/null +++ b/crates/agentic-core/tests/cassettes/tool_calls/multi_turn/openai_responses_tool_calls_branch.yaml @@ -0,0 +1,959 @@ +turns: +- filename: t1 + request: + body: + input: You are an SRE assistant. Check the current status of ETL pipeline job-382. + model: gpt-4o + store: true + stream: false + tool_choice: auto + tools: + - description: Query the pipeline orchestrator for a job's current state, metadata, + and failure info. + name: get_job_status + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier, e.g. job-382 + type: string + required: + - job_id + type: object + strict: true + type: function + - description: Fetch error logs and stack traces for a failed pipeline job. + name: get_error_logs + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier + type: string + max_entries: + description: Maximum log entries to return (default 50) + type: integer + severity: + description: Minimum severity filter + enum: + - WARN + - ERROR + - FATAL + - ALL + type: string + required: + - job_id + - severity + - max_entries + type: object + strict: true + type: function + - description: Search internal operations runbooks for troubleshooting procedures. + name: search_runbook + parameters: + additionalProperties: false + properties: + max_results: + description: Maximum results to return (default 5) + type: integer + query: + description: Natural language search query + type: string + required: + - query + - max_results + type: object + strict: true + type: function + - description: Execute a Python analysis script in a sandboxed environment for + log parsing or data inspection. + name: run_analysis + parameters: + additionalProperties: false + properties: + code: + description: Python code to execute + type: string + timeout_seconds: + description: Execution timeout (default 30) + type: integer + required: + - code + - timeout_seconds + type: object + strict: true + type: function + - description: Restart a failed pipeline job with optional configuration overrides. + name: restart_job + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier to restart + type: string + memory_override_gb: + description: Override memory limit in GB + type: integer + priority: + description: Queue priority + enum: + - low + - normal + - high + type: string + skip_completed_stages: + description: Skip stages that completed successfully (default true) + type: boolean + required: + - job_id + - memory_override_gb + - skip_completed_stages + - priority + type: object + strict: true + type: function + - description: Search the public web for technical documentation, blog posts, + and known issues. + name: web_search + parameters: + additionalProperties: false + properties: + query: + description: Search query + type: string + required: + - query + type: object + strict: true + type: function + headers: + accept: '*/*' + authorization: Bearer *** + content-type: application/json + user-agent: python-httpx/0.28.1 + method: POST + path: /v1/responses + query_params: {} + response: + body: + background: false + billing: + payer: developer + completed_at: 1782333000 + created_at: 1782332998 + error: null + frequency_penalty: 0.0 + id: resp_060930d6abfe49c7006a3c3e468914819aa9bba6693b0b3070 + incomplete_details: null + instructions: null + max_output_tokens: null + max_tool_calls: null + metadata: {} + model: gpt-4o-2024-08-06 + moderation: null + object: response + output: + - arguments: '{"job_id":"job-382"}' + call_id: call_pn7qCsEVkAGhDdjzZLCH9cWT + id: fc_060930d6abfe49c7006a3c3e47edc0819ab59873692fe0f672 + name: get_job_status + status: completed + type: function_call + parallel_tool_calls: true + presence_penalty: 0.0 + previous_response_id: null + prompt_cache_key: null + prompt_cache_retention: in_memory + reasoning: + context: null + effort: null + summary: null + safety_identifier: null + service_tier: default + status: completed + store: true + temperature: 1.0 + text: + format: + type: text + verbosity: medium + tool_choice: auto + tools: + - description: Query the pipeline orchestrator for a job's current state, metadata, + and failure info. + name: get_job_status + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier, e.g. job-382 + type: string + required: + - job_id + type: object + strict: true + type: function + - description: Fetch error logs and stack traces for a failed pipeline job. + name: get_error_logs + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier + type: string + max_entries: + description: Maximum log entries to return (default 50) + type: integer + severity: + description: Minimum severity filter + enum: + - WARN + - ERROR + - FATAL + - ALL + type: string + required: + - job_id + - severity + - max_entries + type: object + strict: true + type: function + - description: Search internal operations runbooks for troubleshooting procedures. + name: search_runbook + parameters: + additionalProperties: false + properties: + max_results: + description: Maximum results to return (default 5) + type: integer + query: + description: Natural language search query + type: string + required: + - query + - max_results + type: object + strict: true + type: function + - description: Execute a Python analysis script in a sandboxed environment for + log parsing or data inspection. + name: run_analysis + parameters: + additionalProperties: false + properties: + code: + description: Python code to execute + type: string + timeout_seconds: + description: Execution timeout (default 30) + type: integer + required: + - code + - timeout_seconds + type: object + strict: true + type: function + - description: Restart a failed pipeline job with optional configuration overrides. + name: restart_job + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier to restart + type: string + memory_override_gb: + description: Override memory limit in GB + type: integer + priority: + description: Queue priority + enum: + - low + - normal + - high + type: string + skip_completed_stages: + description: Skip stages that completed successfully (default true) + type: boolean + required: + - job_id + - memory_override_gb + - skip_completed_stages + - priority + type: object + strict: true + type: function + - description: Search the public web for technical documentation, blog posts, + and known issues. + name: web_search + parameters: + additionalProperties: false + properties: + query: + description: Search query + type: string + required: + - query + type: object + strict: true + type: function + top_logprobs: 0 + top_p: 1.0 + truncation: disabled + usage: + input_tokens: 355 + input_tokens_details: + cached_tokens: 0 + output_tokens: 19 + output_tokens_details: + reasoning_tokens: 0 + total_tokens: 374 + user: null + headers: + content-type: application/json + status_code: 200 +- filename: t2 + request: + body: + input: + - call_id: call_pn7qCsEVkAGhDdjzZLCH9cWT + output: '{"job_id": "job-382", "status": "failed", "error_type": "OutOfMemoryError", + "failed_at": "2024-01-15T03:22:11Z", "stage": "transform", "memory_used_gb": + 31.2, "memory_limit_gb": 32, "pipeline": "etl-daily-ingest", "owner": "data-platform-team"}' + type: function_call_output + - content: Get the error logs for that job with severity ERROR and max 10 entries. + role: user + type: message + model: gpt-4o + previous_response_id: resp_060930d6abfe49c7006a3c3e468914819aa9bba6693b0b3070 + store: true + stream: false + tool_choice: auto + tools: + - description: Query the pipeline orchestrator for a job's current state, metadata, + and failure info. + name: get_job_status + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier, e.g. job-382 + type: string + required: + - job_id + type: object + strict: true + type: function + - description: Fetch error logs and stack traces for a failed pipeline job. + name: get_error_logs + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier + type: string + max_entries: + description: Maximum log entries to return (default 50) + type: integer + severity: + description: Minimum severity filter + enum: + - WARN + - ERROR + - FATAL + - ALL + type: string + required: + - job_id + - severity + - max_entries + type: object + strict: true + type: function + - description: Search internal operations runbooks for troubleshooting procedures. + name: search_runbook + parameters: + additionalProperties: false + properties: + max_results: + description: Maximum results to return (default 5) + type: integer + query: + description: Natural language search query + type: string + required: + - query + - max_results + type: object + strict: true + type: function + - description: Execute a Python analysis script in a sandboxed environment for + log parsing or data inspection. + name: run_analysis + parameters: + additionalProperties: false + properties: + code: + description: Python code to execute + type: string + timeout_seconds: + description: Execution timeout (default 30) + type: integer + required: + - code + - timeout_seconds + type: object + strict: true + type: function + - description: Restart a failed pipeline job with optional configuration overrides. + name: restart_job + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier to restart + type: string + memory_override_gb: + description: Override memory limit in GB + type: integer + priority: + description: Queue priority + enum: + - low + - normal + - high + type: string + skip_completed_stages: + description: Skip stages that completed successfully (default true) + type: boolean + required: + - job_id + - memory_override_gb + - skip_completed_stages + - priority + type: object + strict: true + type: function + - description: Search the public web for technical documentation, blog posts, + and known issues. + name: web_search + parameters: + additionalProperties: false + properties: + query: + description: Search query + type: string + required: + - query + type: object + strict: true + type: function + headers: + accept: '*/*' + authorization: Bearer *** + content-type: application/json + user-agent: python-httpx/0.28.1 + method: POST + path: /v1/responses + query_params: {} + response: + body: + background: false + billing: + payer: developer + completed_at: 1782333001 + created_at: 1782333000 + error: null + frequency_penalty: 0.0 + id: resp_060930d6abfe49c7006a3c3e48529c819a944ed725bbe55cd0 + incomplete_details: null + instructions: null + max_output_tokens: null + max_tool_calls: null + metadata: {} + model: gpt-4o-2024-08-06 + moderation: null + object: response + output: + - arguments: '{"job_id":"job-382","severity":"ERROR","max_entries":10}' + call_id: call_nDUFTXwJnhbqZjpaQws3XdGa + id: fc_060930d6abfe49c7006a3c3e491504819aa6c312a6b9216ce9 + name: get_error_logs + status: completed + type: function_call + parallel_tool_calls: true + presence_penalty: 0.0 + previous_response_id: resp_060930d6abfe49c7006a3c3e468914819aa9bba6693b0b3070 + prompt_cache_key: null + prompt_cache_retention: in_memory + reasoning: + context: null + effort: null + summary: null + safety_identifier: null + service_tier: default + status: completed + store: true + temperature: 1.0 + text: + format: + type: text + verbosity: medium + tool_choice: auto + tools: + - description: Query the pipeline orchestrator for a job's current state, metadata, + and failure info. + name: get_job_status + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier, e.g. job-382 + type: string + required: + - job_id + type: object + strict: true + type: function + - description: Fetch error logs and stack traces for a failed pipeline job. + name: get_error_logs + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier + type: string + max_entries: + description: Maximum log entries to return (default 50) + type: integer + severity: + description: Minimum severity filter + enum: + - WARN + - ERROR + - FATAL + - ALL + type: string + required: + - job_id + - severity + - max_entries + type: object + strict: true + type: function + - description: Search internal operations runbooks for troubleshooting procedures. + name: search_runbook + parameters: + additionalProperties: false + properties: + max_results: + description: Maximum results to return (default 5) + type: integer + query: + description: Natural language search query + type: string + required: + - query + - max_results + type: object + strict: true + type: function + - description: Execute a Python analysis script in a sandboxed environment for + log parsing or data inspection. + name: run_analysis + parameters: + additionalProperties: false + properties: + code: + description: Python code to execute + type: string + timeout_seconds: + description: Execution timeout (default 30) + type: integer + required: + - code + - timeout_seconds + type: object + strict: true + type: function + - description: Restart a failed pipeline job with optional configuration overrides. + name: restart_job + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier to restart + type: string + memory_override_gb: + description: Override memory limit in GB + type: integer + priority: + description: Queue priority + enum: + - low + - normal + - high + type: string + skip_completed_stages: + description: Skip stages that completed successfully (default true) + type: boolean + required: + - job_id + - memory_override_gb + - skip_completed_stages + - priority + type: object + strict: true + type: function + - description: Search the public web for technical documentation, blog posts, + and known issues. + name: web_search + parameters: + additionalProperties: false + properties: + query: + description: Search query + type: string + required: + - query + type: object + strict: true + type: function + top_logprobs: 0 + top_p: 1.0 + truncation: disabled + usage: + input_tokens: 491 + input_tokens_details: + cached_tokens: 0 + output_tokens: 28 + output_tokens_details: + reasoning_tokens: 0 + total_tokens: 519 + user: null + headers: + content-type: application/json + status_code: 200 +- filename: t3 + request: + body: + input: + - call_id: call_pn7qCsEVkAGhDdjzZLCH9cWT + output: '{"job_id": "job-382", "status": "failed", "error_type": "OutOfMemoryError", + "failed_at": "2024-01-15T03:22:11Z", "stage": "transform", "memory_used_gb": + 31.2, "memory_limit_gb": 32, "pipeline": "etl-daily-ingest", "owner": "data-platform-team"}' + type: function_call_output + - content: Instead of investigating errors, search the runbook for how to increase + memory limits for ETL jobs. Max 3 results. + role: user + type: message + model: gpt-4o + previous_response_id: resp_060930d6abfe49c7006a3c3e468914819aa9bba6693b0b3070 + store: true + stream: false + tool_choice: auto + tools: + - description: Query the pipeline orchestrator for a job's current state, metadata, + and failure info. + name: get_job_status + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier, e.g. job-382 + type: string + required: + - job_id + type: object + strict: true + type: function + - description: Fetch error logs and stack traces for a failed pipeline job. + name: get_error_logs + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier + type: string + max_entries: + description: Maximum log entries to return (default 50) + type: integer + severity: + description: Minimum severity filter + enum: + - WARN + - ERROR + - FATAL + - ALL + type: string + required: + - job_id + - severity + - max_entries + type: object + strict: true + type: function + - description: Search internal operations runbooks for troubleshooting procedures. + name: search_runbook + parameters: + additionalProperties: false + properties: + max_results: + description: Maximum results to return (default 5) + type: integer + query: + description: Natural language search query + type: string + required: + - query + - max_results + type: object + strict: true + type: function + - description: Execute a Python analysis script in a sandboxed environment for + log parsing or data inspection. + name: run_analysis + parameters: + additionalProperties: false + properties: + code: + description: Python code to execute + type: string + timeout_seconds: + description: Execution timeout (default 30) + type: integer + required: + - code + - timeout_seconds + type: object + strict: true + type: function + - description: Restart a failed pipeline job with optional configuration overrides. + name: restart_job + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier to restart + type: string + memory_override_gb: + description: Override memory limit in GB + type: integer + priority: + description: Queue priority + enum: + - low + - normal + - high + type: string + skip_completed_stages: + description: Skip stages that completed successfully (default true) + type: boolean + required: + - job_id + - memory_override_gb + - skip_completed_stages + - priority + type: object + strict: true + type: function + - description: Search the public web for technical documentation, blog posts, + and known issues. + name: web_search + parameters: + additionalProperties: false + properties: + query: + description: Search query + type: string + required: + - query + type: object + strict: true + type: function + headers: + accept: '*/*' + authorization: Bearer *** + content-type: application/json + user-agent: python-httpx/0.28.1 + method: POST + path: /v1/responses + query_params: {} + response: + body: + background: false + billing: + payer: developer + completed_at: 1782333002 + created_at: 1782333001 + error: null + frequency_penalty: 0.0 + id: resp_060930d6abfe49c7006a3c3e497860819aa2284f65a4dcb398 + incomplete_details: null + instructions: null + max_output_tokens: null + max_tool_calls: null + metadata: {} + model: gpt-4o-2024-08-06 + moderation: null + object: response + output: + - arguments: '{"query":"increase memory limits for ETL jobs","max_results":3}' + call_id: call_o2XvimyEnM5NRIzViu7DEDsN + id: fc_060930d6abfe49c7006a3c3e4a093c819aa7b19626148ad151 + name: search_runbook + status: completed + type: function_call + parallel_tool_calls: true + presence_penalty: 0.0 + previous_response_id: resp_060930d6abfe49c7006a3c3e468914819aa9bba6693b0b3070 + prompt_cache_key: null + prompt_cache_retention: in_memory + reasoning: + context: null + effort: null + summary: null + safety_identifier: null + service_tier: default + status: completed + store: true + temperature: 1.0 + text: + format: + type: text + verbosity: medium + tool_choice: auto + tools: + - description: Query the pipeline orchestrator for a job's current state, metadata, + and failure info. + name: get_job_status + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier, e.g. job-382 + type: string + required: + - job_id + type: object + strict: true + type: function + - description: Fetch error logs and stack traces for a failed pipeline job. + name: get_error_logs + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier + type: string + max_entries: + description: Maximum log entries to return (default 50) + type: integer + severity: + description: Minimum severity filter + enum: + - WARN + - ERROR + - FATAL + - ALL + type: string + required: + - job_id + - severity + - max_entries + type: object + strict: true + type: function + - description: Search internal operations runbooks for troubleshooting procedures. + name: search_runbook + parameters: + additionalProperties: false + properties: + max_results: + description: Maximum results to return (default 5) + type: integer + query: + description: Natural language search query + type: string + required: + - query + - max_results + type: object + strict: true + type: function + - description: Execute a Python analysis script in a sandboxed environment for + log parsing or data inspection. + name: run_analysis + parameters: + additionalProperties: false + properties: + code: + description: Python code to execute + type: string + timeout_seconds: + description: Execution timeout (default 30) + type: integer + required: + - code + - timeout_seconds + type: object + strict: true + type: function + - description: Restart a failed pipeline job with optional configuration overrides. + name: restart_job + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier to restart + type: string + memory_override_gb: + description: Override memory limit in GB + type: integer + priority: + description: Queue priority + enum: + - low + - normal + - high + type: string + skip_completed_stages: + description: Skip stages that completed successfully (default true) + type: boolean + required: + - job_id + - memory_override_gb + - skip_completed_stages + - priority + type: object + strict: true + type: function + - description: Search the public web for technical documentation, blog posts, + and known issues. + name: web_search + parameters: + additionalProperties: false + properties: + query: + description: Search query + type: string + required: + - query + type: object + strict: true + type: function + top_logprobs: 0 + top_p: 1.0 + truncation: disabled + usage: + input_tokens: 500 + input_tokens_details: + cached_tokens: 0 + output_tokens: 27 + output_tokens_details: + reasoning_tokens: 0 + total_tokens: 527 + user: null + headers: + content-type: application/json + status_code: 200 diff --git a/crates/agentic-core/tests/cassettes/tool_calls/multi_turn/openai_responses_tool_calls_parallel.yaml b/crates/agentic-core/tests/cassettes/tool_calls/multi_turn/openai_responses_tool_calls_parallel.yaml new file mode 100644 index 0000000..224a68d --- /dev/null +++ b/crates/agentic-core/tests/cassettes/tool_calls/multi_turn/openai_responses_tool_calls_parallel.yaml @@ -0,0 +1,979 @@ +turns: +- filename: t1 + request: + body: + input: 'You are an SRE assistant. Do TWO things in parallel: 1) check the status + of job-382 AND 2) search the web for Spark OOM fixes. Call BOTH tools now.' + model: gpt-4o + store: true + stream: false + tool_choice: auto + tools: + - description: Query the pipeline orchestrator for a job's current state, metadata, + and failure info. + name: get_job_status + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier, e.g. job-382 + type: string + required: + - job_id + type: object + strict: true + type: function + - description: Fetch error logs and stack traces for a failed pipeline job. + name: get_error_logs + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier + type: string + max_entries: + description: Maximum log entries to return (default 50) + type: integer + severity: + description: Minimum severity filter + enum: + - WARN + - ERROR + - FATAL + - ALL + type: string + required: + - job_id + - severity + - max_entries + type: object + strict: true + type: function + - description: Search internal operations runbooks for troubleshooting procedures. + name: search_runbook + parameters: + additionalProperties: false + properties: + max_results: + description: Maximum results to return (default 5) + type: integer + query: + description: Natural language search query + type: string + required: + - query + - max_results + type: object + strict: true + type: function + - description: Execute a Python analysis script in a sandboxed environment for + log parsing or data inspection. + name: run_analysis + parameters: + additionalProperties: false + properties: + code: + description: Python code to execute + type: string + timeout_seconds: + description: Execution timeout (default 30) + type: integer + required: + - code + - timeout_seconds + type: object + strict: true + type: function + - description: Restart a failed pipeline job with optional configuration overrides. + name: restart_job + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier to restart + type: string + memory_override_gb: + description: Override memory limit in GB + type: integer + priority: + description: Queue priority + enum: + - low + - normal + - high + type: string + skip_completed_stages: + description: Skip stages that completed successfully (default true) + type: boolean + required: + - job_id + - memory_override_gb + - skip_completed_stages + - priority + type: object + strict: true + type: function + - description: Search the public web for technical documentation, blog posts, + and known issues. + name: web_search + parameters: + additionalProperties: false + properties: + query: + description: Search query + type: string + required: + - query + type: object + strict: true + type: function + headers: + accept: '*/*' + authorization: Bearer *** + content-type: application/json + user-agent: python-httpx/0.28.1 + method: POST + path: /v1/responses + query_params: {} + response: + body: + background: false + billing: + payer: developer + completed_at: 1782333004 + created_at: 1782333003 + error: null + frequency_penalty: 0.0 + id: resp_0d6f4d0f7ecfbcbe006a3c3e4b94b08199adefa3fb3b9e6032 + incomplete_details: null + instructions: null + max_output_tokens: null + max_tool_calls: null + metadata: {} + model: gpt-4o-2024-08-06 + moderation: null + object: response + output: + - arguments: '{"job_id":"job-382"}' + call_id: call_qsZfyBw1H87UfhQzVJUoQ7Zg + id: fc_0d6f4d0f7ecfbcbe006a3c3e4c67b08199b20c7ffffd19dfd1 + name: get_job_status + status: completed + type: function_call + - arguments: '{"query":"Spark OOM fixes"}' + call_id: call_9NgvfTtmjW3F1zy9oUmM0Wna + id: fc_0d6f4d0f7ecfbcbe006a3c3e4c67bc8199a5d2ff4e72a5e3b5 + name: web_search + status: completed + type: function_call + parallel_tool_calls: true + presence_penalty: 0.0 + previous_response_id: null + prompt_cache_key: null + prompt_cache_retention: in_memory + reasoning: + context: null + effort: null + summary: null + safety_identifier: null + service_tier: default + status: completed + store: true + temperature: 1.0 + text: + format: + type: text + verbosity: medium + tool_choice: auto + tools: + - description: Query the pipeline orchestrator for a job's current state, metadata, + and failure info. + name: get_job_status + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier, e.g. job-382 + type: string + required: + - job_id + type: object + strict: true + type: function + - description: Fetch error logs and stack traces for a failed pipeline job. + name: get_error_logs + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier + type: string + max_entries: + description: Maximum log entries to return (default 50) + type: integer + severity: + description: Minimum severity filter + enum: + - WARN + - ERROR + - FATAL + - ALL + type: string + required: + - job_id + - severity + - max_entries + type: object + strict: true + type: function + - description: Search internal operations runbooks for troubleshooting procedures. + name: search_runbook + parameters: + additionalProperties: false + properties: + max_results: + description: Maximum results to return (default 5) + type: integer + query: + description: Natural language search query + type: string + required: + - query + - max_results + type: object + strict: true + type: function + - description: Execute a Python analysis script in a sandboxed environment for + log parsing or data inspection. + name: run_analysis + parameters: + additionalProperties: false + properties: + code: + description: Python code to execute + type: string + timeout_seconds: + description: Execution timeout (default 30) + type: integer + required: + - code + - timeout_seconds + type: object + strict: true + type: function + - description: Restart a failed pipeline job with optional configuration overrides. + name: restart_job + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier to restart + type: string + memory_override_gb: + description: Override memory limit in GB + type: integer + priority: + description: Queue priority + enum: + - low + - normal + - high + type: string + skip_completed_stages: + description: Skip stages that completed successfully (default true) + type: boolean + required: + - job_id + - memory_override_gb + - skip_completed_stages + - priority + type: object + strict: true + type: function + - description: Search the public web for technical documentation, blog posts, + and known issues. + name: web_search + parameters: + additionalProperties: false + properties: + query: + description: Search query + type: string + required: + - query + type: object + strict: true + type: function + top_logprobs: 0 + top_p: 1.0 + truncation: disabled + usage: + input_tokens: 377 + input_tokens_details: + cached_tokens: 0 + output_tokens: 52 + output_tokens_details: + reasoning_tokens: 0 + total_tokens: 429 + user: null + headers: + content-type: application/json + status_code: 200 +- filename: t2 + request: + body: + input: + - call_id: call_qsZfyBw1H87UfhQzVJUoQ7Zg + output: '{"job_id": "job-382", "status": "failed", "error_type": "OutOfMemoryError", + "failed_at": "2024-01-15T03:22:11Z", "stage": "transform", "memory_used_gb": + 31.2, "memory_limit_gb": 32, "pipeline": "etl-daily-ingest", "owner": "data-platform-team"}' + type: function_call_output + - call_id: call_9NgvfTtmjW3F1zy9oUmM0Wna + output: '[{"title": "Spark OOM in ETL: Root Causes and Fixes - Databricks + Blog", "url": "https://docs.databricks.com/en/optimizations/oom.html", "snippet": + "Common causes: data skew in shuffle partitions, broadcast join threshold + too high, insufficient executor memory. Fix: spark.sql.adaptive.enabled=true + + increase spark.executor.memory"}, {"title": "Troubleshooting OutOfMemoryError + in Apache Spark", "url": "https://spark.apache.org/docs/latest/tuning.html#memory-tuning", + "snippet": "Increase spark.executor.memory or spark.driver.memory. For shuffle-heavy + workloads, also increase spark.sql.shuffle.partitions."}]' + type: function_call_output + - content: Based on those results, search the runbook for memory override procedures. + Max 3 results. + role: user + type: message + model: gpt-4o + previous_response_id: resp_0d6f4d0f7ecfbcbe006a3c3e4b94b08199adefa3fb3b9e6032 + store: true + stream: false + tool_choice: auto + tools: + - description: Query the pipeline orchestrator for a job's current state, metadata, + and failure info. + name: get_job_status + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier, e.g. job-382 + type: string + required: + - job_id + type: object + strict: true + type: function + - description: Fetch error logs and stack traces for a failed pipeline job. + name: get_error_logs + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier + type: string + max_entries: + description: Maximum log entries to return (default 50) + type: integer + severity: + description: Minimum severity filter + enum: + - WARN + - ERROR + - FATAL + - ALL + type: string + required: + - job_id + - severity + - max_entries + type: object + strict: true + type: function + - description: Search internal operations runbooks for troubleshooting procedures. + name: search_runbook + parameters: + additionalProperties: false + properties: + max_results: + description: Maximum results to return (default 5) + type: integer + query: + description: Natural language search query + type: string + required: + - query + - max_results + type: object + strict: true + type: function + - description: Execute a Python analysis script in a sandboxed environment for + log parsing or data inspection. + name: run_analysis + parameters: + additionalProperties: false + properties: + code: + description: Python code to execute + type: string + timeout_seconds: + description: Execution timeout (default 30) + type: integer + required: + - code + - timeout_seconds + type: object + strict: true + type: function + - description: Restart a failed pipeline job with optional configuration overrides. + name: restart_job + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier to restart + type: string + memory_override_gb: + description: Override memory limit in GB + type: integer + priority: + description: Queue priority + enum: + - low + - normal + - high + type: string + skip_completed_stages: + description: Skip stages that completed successfully (default true) + type: boolean + required: + - job_id + - memory_override_gb + - skip_completed_stages + - priority + type: object + strict: true + type: function + - description: Search the public web for technical documentation, blog posts, + and known issues. + name: web_search + parameters: + additionalProperties: false + properties: + query: + description: Search query + type: string + required: + - query + type: object + strict: true + type: function + headers: + accept: '*/*' + authorization: Bearer *** + content-type: application/json + user-agent: python-httpx/0.28.1 + method: POST + path: /v1/responses + query_params: {} + response: + body: + background: false + billing: + payer: developer + completed_at: 1782333006 + created_at: 1782333004 + error: null + frequency_penalty: 0.0 + id: resp_0d6f4d0f7ecfbcbe006a3c3e4ca1708199a0c174c7eca6dd03 + incomplete_details: null + instructions: null + max_output_tokens: null + max_tool_calls: null + metadata: {} + model: gpt-4o-2024-08-06 + moderation: null + object: response + output: + - arguments: '{"query":"memory override procedures","max_results":3}' + call_id: call_RaHQ2EJUGrXRWTgM7qUlfdhg + id: fc_0d6f4d0f7ecfbcbe006a3c3e4dfe488199abf55a5d0896e72f + name: search_runbook + status: completed + type: function_call + parallel_tool_calls: true + presence_penalty: 0.0 + previous_response_id: resp_0d6f4d0f7ecfbcbe006a3c3e4b94b08199adefa3fb3b9e6032 + prompt_cache_key: null + prompt_cache_retention: in_memory + reasoning: + context: null + effort: null + summary: null + safety_identifier: null + service_tier: default + status: completed + store: true + temperature: 1.0 + text: + format: + type: text + verbosity: medium + tool_choice: auto + tools: + - description: Query the pipeline orchestrator for a job's current state, metadata, + and failure info. + name: get_job_status + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier, e.g. job-382 + type: string + required: + - job_id + type: object + strict: true + type: function + - description: Fetch error logs and stack traces for a failed pipeline job. + name: get_error_logs + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier + type: string + max_entries: + description: Maximum log entries to return (default 50) + type: integer + severity: + description: Minimum severity filter + enum: + - WARN + - ERROR + - FATAL + - ALL + type: string + required: + - job_id + - severity + - max_entries + type: object + strict: true + type: function + - description: Search internal operations runbooks for troubleshooting procedures. + name: search_runbook + parameters: + additionalProperties: false + properties: + max_results: + description: Maximum results to return (default 5) + type: integer + query: + description: Natural language search query + type: string + required: + - query + - max_results + type: object + strict: true + type: function + - description: Execute a Python analysis script in a sandboxed environment for + log parsing or data inspection. + name: run_analysis + parameters: + additionalProperties: false + properties: + code: + description: Python code to execute + type: string + timeout_seconds: + description: Execution timeout (default 30) + type: integer + required: + - code + - timeout_seconds + type: object + strict: true + type: function + - description: Restart a failed pipeline job with optional configuration overrides. + name: restart_job + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier to restart + type: string + memory_override_gb: + description: Override memory limit in GB + type: integer + priority: + description: Queue priority + enum: + - low + - normal + - high + type: string + skip_completed_stages: + description: Skip stages that completed successfully (default true) + type: boolean + required: + - job_id + - memory_override_gb + - skip_completed_stages + - priority + type: object + strict: true + type: function + - description: Search the public web for technical documentation, blog posts, + and known issues. + name: web_search + parameters: + additionalProperties: false + properties: + query: + description: Search query + type: string + required: + - query + type: object + strict: true + type: function + top_logprobs: 0 + top_p: 1.0 + truncation: disabled + usage: + input_tokens: 676 + input_tokens_details: + cached_tokens: 0 + output_tokens: 23 + output_tokens_details: + reasoning_tokens: 0 + total_tokens: 699 + user: null + headers: + content-type: application/json + status_code: 200 +- filename: t3 + request: + body: + input: + - call_id: call_RaHQ2EJUGrXRWTgM7qUlfdhg + output: '[{"title": "OOM Troubleshooting for ETL Pipelines", "path": "/runbooks/data-platform/oom-etl.md", + "snippet": "When transform stage OOMs: 1) Check partition skew with get_partition_stats, + 2) Increase executor memory (default 32GB -> 64GB), 3) Enable adaptive query + execution"}, {"title": "Memory Limit Override Procedures", "path": "/runbooks/data-platform/memory-overrides.md", + "snippet": "To increase memory: use restart_job with memory_override_gb. + Standard escalation: 32->64->128. Requires approval above 128GB."}]' + type: function_call_output + - content: Now restart that job with 64GB memory and high priority. + role: user + type: message + model: gpt-4o + previous_response_id: resp_0d6f4d0f7ecfbcbe006a3c3e4ca1708199a0c174c7eca6dd03 + store: true + stream: false + tool_choice: auto + tools: + - description: Query the pipeline orchestrator for a job's current state, metadata, + and failure info. + name: get_job_status + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier, e.g. job-382 + type: string + required: + - job_id + type: object + strict: true + type: function + - description: Fetch error logs and stack traces for a failed pipeline job. + name: get_error_logs + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier + type: string + max_entries: + description: Maximum log entries to return (default 50) + type: integer + severity: + description: Minimum severity filter + enum: + - WARN + - ERROR + - FATAL + - ALL + type: string + required: + - job_id + - severity + - max_entries + type: object + strict: true + type: function + - description: Search internal operations runbooks for troubleshooting procedures. + name: search_runbook + parameters: + additionalProperties: false + properties: + max_results: + description: Maximum results to return (default 5) + type: integer + query: + description: Natural language search query + type: string + required: + - query + - max_results + type: object + strict: true + type: function + - description: Execute a Python analysis script in a sandboxed environment for + log parsing or data inspection. + name: run_analysis + parameters: + additionalProperties: false + properties: + code: + description: Python code to execute + type: string + timeout_seconds: + description: Execution timeout (default 30) + type: integer + required: + - code + - timeout_seconds + type: object + strict: true + type: function + - description: Restart a failed pipeline job with optional configuration overrides. + name: restart_job + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier to restart + type: string + memory_override_gb: + description: Override memory limit in GB + type: integer + priority: + description: Queue priority + enum: + - low + - normal + - high + type: string + skip_completed_stages: + description: Skip stages that completed successfully (default true) + type: boolean + required: + - job_id + - memory_override_gb + - skip_completed_stages + - priority + type: object + strict: true + type: function + - description: Search the public web for technical documentation, blog posts, + and known issues. + name: web_search + parameters: + additionalProperties: false + properties: + query: + description: Search query + type: string + required: + - query + type: object + strict: true + type: function + headers: + accept: '*/*' + authorization: Bearer *** + content-type: application/json + user-agent: python-httpx/0.28.1 + method: POST + path: /v1/responses + query_params: {} + response: + body: + background: false + billing: + payer: developer + completed_at: 1782333006 + created_at: 1782333006 + error: null + frequency_penalty: 0.0 + id: resp_0d6f4d0f7ecfbcbe006a3c3e4e530481999b6062fbd63f7df9 + incomplete_details: null + instructions: null + max_output_tokens: null + max_tool_calls: null + metadata: {} + model: gpt-4o-2024-08-06 + moderation: null + object: response + output: + - arguments: '{"job_id":"job-382","memory_override_gb":64,"skip_completed_stages":true,"priority":"high"}' + call_id: call_141clJIyorJeRB9bew8mY8Hf + id: fc_0d6f4d0f7ecfbcbe006a3c3e4ebc748199875c7dc7d0025c1e + name: restart_job + status: completed + type: function_call + parallel_tool_calls: true + presence_penalty: 0.0 + previous_response_id: resp_0d6f4d0f7ecfbcbe006a3c3e4ca1708199a0c174c7eca6dd03 + prompt_cache_key: null + prompt_cache_retention: in_memory + reasoning: + context: null + effort: null + summary: null + safety_identifier: null + service_tier: default + status: completed + store: true + temperature: 1.0 + text: + format: + type: text + verbosity: medium + tool_choice: auto + tools: + - description: Query the pipeline orchestrator for a job's current state, metadata, + and failure info. + name: get_job_status + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier, e.g. job-382 + type: string + required: + - job_id + type: object + strict: true + type: function + - description: Fetch error logs and stack traces for a failed pipeline job. + name: get_error_logs + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier + type: string + max_entries: + description: Maximum log entries to return (default 50) + type: integer + severity: + description: Minimum severity filter + enum: + - WARN + - ERROR + - FATAL + - ALL + type: string + required: + - job_id + - severity + - max_entries + type: object + strict: true + type: function + - description: Search internal operations runbooks for troubleshooting procedures. + name: search_runbook + parameters: + additionalProperties: false + properties: + max_results: + description: Maximum results to return (default 5) + type: integer + query: + description: Natural language search query + type: string + required: + - query + - max_results + type: object + strict: true + type: function + - description: Execute a Python analysis script in a sandboxed environment for + log parsing or data inspection. + name: run_analysis + parameters: + additionalProperties: false + properties: + code: + description: Python code to execute + type: string + timeout_seconds: + description: Execution timeout (default 30) + type: integer + required: + - code + - timeout_seconds + type: object + strict: true + type: function + - description: Restart a failed pipeline job with optional configuration overrides. + name: restart_job + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier to restart + type: string + memory_override_gb: + description: Override memory limit in GB + type: integer + priority: + description: Queue priority + enum: + - low + - normal + - high + type: string + skip_completed_stages: + description: Skip stages that completed successfully (default true) + type: boolean + required: + - job_id + - memory_override_gb + - skip_completed_stages + - priority + type: object + strict: true + type: function + - description: Search the public web for technical documentation, blog posts, + and known issues. + name: web_search + parameters: + additionalProperties: false + properties: + query: + description: Search query + type: string + required: + - query + type: object + strict: true + type: function + top_logprobs: 0 + top_p: 1.0 + truncation: disabled + usage: + input_tokens: 857 + input_tokens_details: + cached_tokens: 0 + output_tokens: 36 + output_tokens_details: + reasoning_tokens: 0 + total_tokens: 893 + user: null + headers: + content-type: application/json + status_code: 200 diff --git a/crates/agentic-core/tests/cassettes/tool_calls/multi_turn/openai_responses_tool_calls_tool_output_only.yaml b/crates/agentic-core/tests/cassettes/tool_calls/multi_turn/openai_responses_tool_calls_tool_output_only.yaml new file mode 100644 index 0000000..4eb796e --- /dev/null +++ b/crates/agentic-core/tests/cassettes/tool_calls/multi_turn/openai_responses_tool_calls_tool_output_only.yaml @@ -0,0 +1,970 @@ +turns: +- filename: t1 + request: + body: + input: You are an SRE assistant. Check the current status of ETL pipeline job-382. + model: gpt-4o + store: true + stream: false + tool_choice: auto + tools: + - description: Query the pipeline orchestrator for a job's current state, metadata, + and failure info. + name: get_job_status + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier, e.g. job-382 + type: string + required: + - job_id + type: object + strict: true + type: function + - description: Fetch error logs and stack traces for a failed pipeline job. + name: get_error_logs + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier + type: string + max_entries: + description: Maximum log entries to return (default 50) + type: integer + severity: + description: Minimum severity filter + enum: + - WARN + - ERROR + - FATAL + - ALL + type: string + required: + - job_id + - severity + - max_entries + type: object + strict: true + type: function + - description: Search internal operations runbooks for troubleshooting procedures. + name: search_runbook + parameters: + additionalProperties: false + properties: + max_results: + description: Maximum results to return (default 5) + type: integer + query: + description: Natural language search query + type: string + required: + - query + - max_results + type: object + strict: true + type: function + - description: Execute a Python analysis script in a sandboxed environment for + log parsing or data inspection. + name: run_analysis + parameters: + additionalProperties: false + properties: + code: + description: Python code to execute + type: string + timeout_seconds: + description: Execution timeout (default 30) + type: integer + required: + - code + - timeout_seconds + type: object + strict: true + type: function + - description: Restart a failed pipeline job with optional configuration overrides. + name: restart_job + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier to restart + type: string + memory_override_gb: + description: Override memory limit in GB + type: integer + priority: + description: Queue priority + enum: + - low + - normal + - high + type: string + skip_completed_stages: + description: Skip stages that completed successfully (default true) + type: boolean + required: + - job_id + - memory_override_gb + - skip_completed_stages + - priority + type: object + strict: true + type: function + - description: Search the public web for technical documentation, blog posts, + and known issues. + name: web_search + parameters: + additionalProperties: false + properties: + query: + description: Search query + type: string + required: + - query + type: object + strict: true + type: function + headers: + accept: '*/*' + authorization: Bearer *** + content-type: application/json + user-agent: python-httpx/0.28.1 + method: POST + path: /v1/responses + query_params: {} + response: + body: + background: false + billing: + payer: developer + completed_at: 1782333011 + created_at: 1782333008 + error: null + frequency_penalty: 0.0 + id: resp_0cd8ef891c0f4c71006a3c3e507b68819b84f7f3f1a42a599a + incomplete_details: null + instructions: null + max_output_tokens: null + max_tool_calls: null + metadata: {} + model: gpt-4o-2024-08-06 + moderation: null + object: response + output: + - arguments: '{"job_id":"job-382"}' + call_id: call_Be8nCBDD2O0m7GSL5ATsZn1i + id: fc_0cd8ef891c0f4c71006a3c3e52f520819b96aa59c35fff1b5d + name: get_job_status + status: completed + type: function_call + parallel_tool_calls: true + presence_penalty: 0.0 + previous_response_id: null + prompt_cache_key: null + prompt_cache_retention: in_memory + reasoning: + context: null + effort: null + summary: null + safety_identifier: null + service_tier: default + status: completed + store: true + temperature: 1.0 + text: + format: + type: text + verbosity: medium + tool_choice: auto + tools: + - description: Query the pipeline orchestrator for a job's current state, metadata, + and failure info. + name: get_job_status + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier, e.g. job-382 + type: string + required: + - job_id + type: object + strict: true + type: function + - description: Fetch error logs and stack traces for a failed pipeline job. + name: get_error_logs + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier + type: string + max_entries: + description: Maximum log entries to return (default 50) + type: integer + severity: + description: Minimum severity filter + enum: + - WARN + - ERROR + - FATAL + - ALL + type: string + required: + - job_id + - severity + - max_entries + type: object + strict: true + type: function + - description: Search internal operations runbooks for troubleshooting procedures. + name: search_runbook + parameters: + additionalProperties: false + properties: + max_results: + description: Maximum results to return (default 5) + type: integer + query: + description: Natural language search query + type: string + required: + - query + - max_results + type: object + strict: true + type: function + - description: Execute a Python analysis script in a sandboxed environment for + log parsing or data inspection. + name: run_analysis + parameters: + additionalProperties: false + properties: + code: + description: Python code to execute + type: string + timeout_seconds: + description: Execution timeout (default 30) + type: integer + required: + - code + - timeout_seconds + type: object + strict: true + type: function + - description: Restart a failed pipeline job with optional configuration overrides. + name: restart_job + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier to restart + type: string + memory_override_gb: + description: Override memory limit in GB + type: integer + priority: + description: Queue priority + enum: + - low + - normal + - high + type: string + skip_completed_stages: + description: Skip stages that completed successfully (default true) + type: boolean + required: + - job_id + - memory_override_gb + - skip_completed_stages + - priority + type: object + strict: true + type: function + - description: Search the public web for technical documentation, blog posts, + and known issues. + name: web_search + parameters: + additionalProperties: false + properties: + query: + description: Search query + type: string + required: + - query + type: object + strict: true + type: function + top_logprobs: 0 + top_p: 1.0 + truncation: disabled + usage: + input_tokens: 355 + input_tokens_details: + cached_tokens: 0 + output_tokens: 19 + output_tokens_details: + reasoning_tokens: 0 + total_tokens: 374 + user: null + headers: + content-type: application/json + status_code: 200 +- filename: t2 + request: + body: + input: + - call_id: call_Be8nCBDD2O0m7GSL5ATsZn1i + output: '{"job_id": "job-382", "status": "failed", "error_type": "OutOfMemoryError", + "failed_at": "2024-01-15T03:22:11Z", "stage": "transform", "memory_used_gb": + 31.2, "memory_limit_gb": 32, "pipeline": "etl-daily-ingest", "owner": "data-platform-team"}' + type: function_call_output + model: gpt-4o + previous_response_id: resp_0cd8ef891c0f4c71006a3c3e507b68819b84f7f3f1a42a599a + store: true + stream: false + tool_choice: auto + tools: + - description: Query the pipeline orchestrator for a job's current state, metadata, + and failure info. + name: get_job_status + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier, e.g. job-382 + type: string + required: + - job_id + type: object + strict: true + type: function + - description: Fetch error logs and stack traces for a failed pipeline job. + name: get_error_logs + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier + type: string + max_entries: + description: Maximum log entries to return (default 50) + type: integer + severity: + description: Minimum severity filter + enum: + - WARN + - ERROR + - FATAL + - ALL + type: string + required: + - job_id + - severity + - max_entries + type: object + strict: true + type: function + - description: Search internal operations runbooks for troubleshooting procedures. + name: search_runbook + parameters: + additionalProperties: false + properties: + max_results: + description: Maximum results to return (default 5) + type: integer + query: + description: Natural language search query + type: string + required: + - query + - max_results + type: object + strict: true + type: function + - description: Execute a Python analysis script in a sandboxed environment for + log parsing or data inspection. + name: run_analysis + parameters: + additionalProperties: false + properties: + code: + description: Python code to execute + type: string + timeout_seconds: + description: Execution timeout (default 30) + type: integer + required: + - code + - timeout_seconds + type: object + strict: true + type: function + - description: Restart a failed pipeline job with optional configuration overrides. + name: restart_job + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier to restart + type: string + memory_override_gb: + description: Override memory limit in GB + type: integer + priority: + description: Queue priority + enum: + - low + - normal + - high + type: string + skip_completed_stages: + description: Skip stages that completed successfully (default true) + type: boolean + required: + - job_id + - memory_override_gb + - skip_completed_stages + - priority + type: object + strict: true + type: function + - description: Search the public web for technical documentation, blog posts, + and known issues. + name: web_search + parameters: + additionalProperties: false + properties: + query: + description: Search query + type: string + required: + - query + type: object + strict: true + type: function + headers: + accept: '*/*' + authorization: Bearer *** + content-type: application/json + user-agent: python-httpx/0.28.1 + method: POST + path: /v1/responses + query_params: {} + response: + body: + background: false + billing: + payer: developer + completed_at: 1782333013 + created_at: 1782333012 + error: null + frequency_penalty: 0.0 + id: resp_0cd8ef891c0f4c71006a3c3e53eef8819bbcac061137bbf660 + incomplete_details: null + instructions: null + max_output_tokens: null + max_tool_calls: null + metadata: {} + model: gpt-4o-2024-08-06 + moderation: null + object: response + output: + - content: + - annotations: [] + logprobs: [] + text: 'The ETL pipeline job **job-382** has failed. Here are the details: + + + - **Error Type:** OutOfMemoryError + + - **Failed At:** 2024-01-15T03:22:11Z + + - **Stage:** Transform + + - **Memory Used:** 31.2 GB + + - **Memory Limit:** 32 GB + + - **Pipeline:** etl-daily-ingest + + - **Owner:** Data Platform Team + + + Would you like me to retrieve error logs or attempt to restart the job + with adjusted parameters?' + type: output_text + id: msg_0cd8ef891c0f4c71006a3c3e5478c4819bbab78f4548e94859 + role: assistant + status: completed + type: message + parallel_tool_calls: true + presence_penalty: 0.0 + previous_response_id: resp_0cd8ef891c0f4c71006a3c3e507b68819b84f7f3f1a42a599a + prompt_cache_key: null + prompt_cache_retention: in_memory + reasoning: + context: null + effort: null + summary: null + safety_identifier: null + service_tier: default + status: completed + store: true + temperature: 1.0 + text: + format: + type: text + verbosity: medium + tool_choice: auto + tools: + - description: Query the pipeline orchestrator for a job's current state, metadata, + and failure info. + name: get_job_status + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier, e.g. job-382 + type: string + required: + - job_id + type: object + strict: true + type: function + - description: Fetch error logs and stack traces for a failed pipeline job. + name: get_error_logs + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier + type: string + max_entries: + description: Maximum log entries to return (default 50) + type: integer + severity: + description: Minimum severity filter + enum: + - WARN + - ERROR + - FATAL + - ALL + type: string + required: + - job_id + - severity + - max_entries + type: object + strict: true + type: function + - description: Search internal operations runbooks for troubleshooting procedures. + name: search_runbook + parameters: + additionalProperties: false + properties: + max_results: + description: Maximum results to return (default 5) + type: integer + query: + description: Natural language search query + type: string + required: + - query + - max_results + type: object + strict: true + type: function + - description: Execute a Python analysis script in a sandboxed environment for + log parsing or data inspection. + name: run_analysis + parameters: + additionalProperties: false + properties: + code: + description: Python code to execute + type: string + timeout_seconds: + description: Execution timeout (default 30) + type: integer + required: + - code + - timeout_seconds + type: object + strict: true + type: function + - description: Restart a failed pipeline job with optional configuration overrides. + name: restart_job + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier to restart + type: string + memory_override_gb: + description: Override memory limit in GB + type: integer + priority: + description: Queue priority + enum: + - low + - normal + - high + type: string + skip_completed_stages: + description: Skip stages that completed successfully (default true) + type: boolean + required: + - job_id + - memory_override_gb + - skip_completed_stages + - priority + type: object + strict: true + type: function + - description: Search the public web for technical documentation, blog posts, + and known issues. + name: web_search + parameters: + additionalProperties: false + properties: + query: + description: Search query + type: string + required: + - query + type: object + strict: true + type: function + top_logprobs: 0 + top_p: 1.0 + truncation: disabled + usage: + input_tokens: 471 + input_tokens_details: + cached_tokens: 0 + output_tokens: 113 + output_tokens_details: + reasoning_tokens: 0 + total_tokens: 584 + user: null + headers: + content-type: application/json + status_code: 200 +- filename: t3 + request: + body: + input: Based on what you found, search the runbook for how to fix it. Max 5 + results. + model: gpt-4o + previous_response_id: resp_0cd8ef891c0f4c71006a3c3e53eef8819bbcac061137bbf660 + store: true + stream: false + tool_choice: auto + tools: + - description: Query the pipeline orchestrator for a job's current state, metadata, + and failure info. + name: get_job_status + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier, e.g. job-382 + type: string + required: + - job_id + type: object + strict: true + type: function + - description: Fetch error logs and stack traces for a failed pipeline job. + name: get_error_logs + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier + type: string + max_entries: + description: Maximum log entries to return (default 50) + type: integer + severity: + description: Minimum severity filter + enum: + - WARN + - ERROR + - FATAL + - ALL + type: string + required: + - job_id + - severity + - max_entries + type: object + strict: true + type: function + - description: Search internal operations runbooks for troubleshooting procedures. + name: search_runbook + parameters: + additionalProperties: false + properties: + max_results: + description: Maximum results to return (default 5) + type: integer + query: + description: Natural language search query + type: string + required: + - query + - max_results + type: object + strict: true + type: function + - description: Execute a Python analysis script in a sandboxed environment for + log parsing or data inspection. + name: run_analysis + parameters: + additionalProperties: false + properties: + code: + description: Python code to execute + type: string + timeout_seconds: + description: Execution timeout (default 30) + type: integer + required: + - code + - timeout_seconds + type: object + strict: true + type: function + - description: Restart a failed pipeline job with optional configuration overrides. + name: restart_job + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier to restart + type: string + memory_override_gb: + description: Override memory limit in GB + type: integer + priority: + description: Queue priority + enum: + - low + - normal + - high + type: string + skip_completed_stages: + description: Skip stages that completed successfully (default true) + type: boolean + required: + - job_id + - memory_override_gb + - skip_completed_stages + - priority + type: object + strict: true + type: function + - description: Search the public web for technical documentation, blog posts, + and known issues. + name: web_search + parameters: + additionalProperties: false + properties: + query: + description: Search query + type: string + required: + - query + type: object + strict: true + type: function + headers: + accept: '*/*' + authorization: Bearer *** + content-type: application/json + user-agent: python-httpx/0.28.1 + method: POST + path: /v1/responses + query_params: {} + response: + body: + background: false + billing: + payer: developer + completed_at: 1782333014 + created_at: 1782333014 + error: null + frequency_penalty: 0.0 + id: resp_0cd8ef891c0f4c71006a3c3e561858819bba80615c21372611 + incomplete_details: null + instructions: null + max_output_tokens: null + max_tool_calls: null + metadata: {} + model: gpt-4o-2024-08-06 + moderation: null + object: response + output: + - arguments: '{"query":"OutOfMemoryError in ETL pipeline","max_results":5}' + call_id: call_I0IVG6tBnGhK5zZGic76mhPo + id: fc_0cd8ef891c0f4c71006a3c3e568658819b82e90c7976061f87 + name: search_runbook + status: completed + type: function_call + parallel_tool_calls: true + presence_penalty: 0.0 + previous_response_id: resp_0cd8ef891c0f4c71006a3c3e53eef8819bbcac061137bbf660 + prompt_cache_key: null + prompt_cache_retention: in_memory + reasoning: + context: null + effort: null + summary: null + safety_identifier: null + service_tier: default + status: completed + store: true + temperature: 1.0 + text: + format: + type: text + verbosity: medium + tool_choice: auto + tools: + - description: Query the pipeline orchestrator for a job's current state, metadata, + and failure info. + name: get_job_status + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier, e.g. job-382 + type: string + required: + - job_id + type: object + strict: true + type: function + - description: Fetch error logs and stack traces for a failed pipeline job. + name: get_error_logs + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier + type: string + max_entries: + description: Maximum log entries to return (default 50) + type: integer + severity: + description: Minimum severity filter + enum: + - WARN + - ERROR + - FATAL + - ALL + type: string + required: + - job_id + - severity + - max_entries + type: object + strict: true + type: function + - description: Search internal operations runbooks for troubleshooting procedures. + name: search_runbook + parameters: + additionalProperties: false + properties: + max_results: + description: Maximum results to return (default 5) + type: integer + query: + description: Natural language search query + type: string + required: + - query + - max_results + type: object + strict: true + type: function + - description: Execute a Python analysis script in a sandboxed environment for + log parsing or data inspection. + name: run_analysis + parameters: + additionalProperties: false + properties: + code: + description: Python code to execute + type: string + timeout_seconds: + description: Execution timeout (default 30) + type: integer + required: + - code + - timeout_seconds + type: object + strict: true + type: function + - description: Restart a failed pipeline job with optional configuration overrides. + name: restart_job + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier to restart + type: string + memory_override_gb: + description: Override memory limit in GB + type: integer + priority: + description: Queue priority + enum: + - low + - normal + - high + type: string + skip_completed_stages: + description: Skip stages that completed successfully (default true) + type: boolean + required: + - job_id + - memory_override_gb + - skip_completed_stages + - priority + type: object + strict: true + type: function + - description: Search the public web for technical documentation, blog posts, + and known issues. + name: web_search + parameters: + additionalProperties: false + properties: + query: + description: Search query + type: string + required: + - query + type: object + strict: true + type: function + top_logprobs: 0 + top_p: 1.0 + truncation: disabled + usage: + input_tokens: 611 + input_tokens_details: + cached_tokens: 0 + output_tokens: 28 + output_tokens_details: + reasoning_tokens: 0 + total_tokens: 639 + user: null + headers: + content-type: application/json + status_code: 200 diff --git a/crates/agentic-core/tests/cassettes/tool_calls/multi_turn/pipeline_tool_outputs.json b/crates/agentic-core/tests/cassettes/tool_calls/multi_turn/pipeline_tool_outputs.json new file mode 100644 index 0000000..8c6ec54 --- /dev/null +++ b/crates/agentic-core/tests/cassettes/tool_calls/multi_turn/pipeline_tool_outputs.json @@ -0,0 +1,8 @@ +{ + "get_job_status": "{\"job_id\": \"job-382\", \"status\": \"failed\", \"error_type\": \"OutOfMemoryError\", \"failed_at\": \"2024-01-15T03:22:11Z\", \"stage\": \"transform\", \"memory_used_gb\": 31.2, \"memory_limit_gb\": 32, \"pipeline\": \"etl-daily-ingest\", \"owner\": \"data-platform-team\"}", + "get_error_logs": "[{\"timestamp\": \"2024-01-15T03:22:09Z\", \"severity\": \"ERROR\", \"message\": \"OutOfMemoryError at stage 'transform': heap space exhausted (31.2GB/32GB)\", \"stack_trace\": \"java.lang.OutOfMemoryError: Java heap space\\n at org.apache.spark.memory.TaskMemoryManager.allocatePage\"}, {\"timestamp\": \"2024-01-15T03:22:10Z\", \"severity\": \"ERROR\", \"message\": \"Task failed: transform-partition-47 OOM after processing 2.1M rows\", \"stack_trace\": \"org.apache.spark.SparkException: Task failed while writing rows\"}, {\"timestamp\": \"2024-01-15T03:22:11Z\", \"severity\": \"ERROR\", \"message\": \"Job job-382 marked FAILED after 3 consecutive OOM errors in transform stage\", \"stack_trace\": null}]", + "search_runbook": "[{\"title\": \"OOM Troubleshooting for ETL Pipelines\", \"path\": \"/runbooks/data-platform/oom-etl.md\", \"snippet\": \"When transform stage OOMs: 1) Check partition skew with get_partition_stats, 2) Increase executor memory (default 32GB -> 64GB), 3) Enable adaptive query execution\"}, {\"title\": \"Memory Limit Override Procedures\", \"path\": \"/runbooks/data-platform/memory-overrides.md\", \"snippet\": \"To increase memory: use restart_job with memory_override_gb. Standard escalation: 32->64->128. Requires approval above 128GB.\"}]", + "run_analysis": "{\"stdout\": \"Failed stage: transform, count: 2\\nPattern: consistent OOM at partition-47 (2.1M rows)\\nRecommendation: increase memory to 64GB or repartition\", \"stderr\": \"\", \"exit_code\": 0, \"duration_ms\": 120}", + "restart_job": "{\"job_id\": \"job-382\", \"new_run_id\": \"run-8847\", \"status\": \"queued\", \"memory_gb\": 64, \"priority\": \"high\", \"skipped_stages\": [\"extract\", \"validate\"], \"estimated_start\": \"2024-01-15T03:30:00Z\"}", + "web_search": "[{\"title\": \"Spark OOM in ETL: Root Causes and Fixes - Databricks Blog\", \"url\": \"https://docs.databricks.com/en/optimizations/oom.html\", \"snippet\": \"Common causes: data skew in shuffle partitions, broadcast join threshold too high, insufficient executor memory. Fix: spark.sql.adaptive.enabled=true + increase spark.executor.memory\"}, {\"title\": \"Troubleshooting OutOfMemoryError in Apache Spark\", \"url\": \"https://spark.apache.org/docs/latest/tuning.html#memory-tuning\", \"snippet\": \"Increase spark.executor.memory or spark.driver.memory. For shuffle-heavy workloads, also increase spark.sql.shuffle.partitions.\"}]" +} diff --git a/crates/agentic-core/tests/cassettes/tool_calls/multi_turn/pipeline_tools.json b/crates/agentic-core/tests/cassettes/tool_calls/multi_turn/pipeline_tools.json new file mode 100644 index 0000000..d3df440 --- /dev/null +++ b/crates/agentic-core/tests/cassettes/tool_calls/multi_turn/pipeline_tools.json @@ -0,0 +1,93 @@ +[ + { + "type": "function", + "name": "get_job_status", + "description": "Query the pipeline orchestrator for a job's current state, metadata, and failure info.", + "parameters": { + "type": "object", + "properties": { + "job_id": {"type": "string", "description": "Pipeline job identifier, e.g. job-382"} + }, + "required": ["job_id"], + "additionalProperties": false + }, + "strict": true + }, + { + "type": "function", + "name": "get_error_logs", + "description": "Fetch error logs and stack traces for a failed pipeline job.", + "parameters": { + "type": "object", + "properties": { + "job_id": {"type": "string", "description": "Pipeline job identifier"}, + "severity": {"type": "string", "enum": ["WARN", "ERROR", "FATAL", "ALL"], "description": "Minimum severity filter"}, + "max_entries": {"type": "integer", "description": "Maximum log entries to return (default 50)"} + }, + "required": ["job_id", "severity", "max_entries"], + "additionalProperties": false + }, + "strict": true + }, + { + "type": "function", + "name": "search_runbook", + "description": "Search internal operations runbooks for troubleshooting procedures.", + "parameters": { + "type": "object", + "properties": { + "query": {"type": "string", "description": "Natural language search query"}, + "max_results": {"type": "integer", "description": "Maximum results to return (default 5)"} + }, + "required": ["query", "max_results"], + "additionalProperties": false + }, + "strict": true + }, + { + "type": "function", + "name": "run_analysis", + "description": "Execute a Python analysis script in a sandboxed environment for log parsing or data inspection.", + "parameters": { + "type": "object", + "properties": { + "code": {"type": "string", "description": "Python code to execute"}, + "timeout_seconds": {"type": "integer", "description": "Execution timeout (default 30)"} + }, + "required": ["code", "timeout_seconds"], + "additionalProperties": false + }, + "strict": true + }, + { + "type": "function", + "name": "restart_job", + "description": "Restart a failed pipeline job with optional configuration overrides.", + "parameters": { + "type": "object", + "properties": { + "job_id": {"type": "string", "description": "Pipeline job identifier to restart"}, + "memory_override_gb": {"type": "integer", "description": "Override memory limit in GB"}, + "skip_completed_stages": {"type": "boolean", "description": "Skip stages that completed successfully (default true)"}, + "priority": {"type": "string", "enum": ["low", "normal", "high"], "description": "Queue priority"} + }, + "required": ["job_id", "memory_override_gb", "skip_completed_stages", "priority"], + "additionalProperties": false + }, + "strict": true + }, + { + "type": "function", + "name": "web_search", + "description": "Search the public web for technical documentation, blog posts, and known issues.", + "parameters": { + "type": "object", + "properties": { + "query": {"type": "string", "description": "Search query"} + }, + "required": ["query"], + "additionalProperties": false + }, + "strict": true + } +] diff --git a/crates/agentic-core/tests/cassettes/tool_calls/multi_turn/record.sh b/crates/agentic-core/tests/cassettes/tool_calls/multi_turn/record.sh new file mode 100755 index 0000000..2f4e54f --- /dev/null +++ b/crates/agentic-core/tests/cassettes/tool_calls/multi_turn/record.sh @@ -0,0 +1,237 @@ +#!/bin/bash +# Records stateful multi-turn tool-call cassettes using record_cassette.py +# +# Scenario: SRE debugging failed ETL pipeline job-382 +# Tools: get_job_status, get_error_logs, search_runbook, run_analysis, +# restart_job, web_search +# +# IMPORTANT: These cassettes prove context retention via ambiguous prompts. +# Turns 2+ use pronouns ("that job", "it", "those errors") that can ONLY resolve +# correctly if previous_response_id preserves server-side conversation state. +# +# Prerequisites (vLLM): +# - SSH tunnel to G6e instance: ssh -L 8100:localhost:8100 ubuntu@ +# - gpt-oss container running with VLLM_ENABLE_RESPONSES_API_STORE=1 +# +# Prerequisites (OpenAI): +# - OPENAI_API_KEY env var set (or ~/.openai_api_key file) +# +# Usage: +# ./record.sh # Record all (vLLM + OpenAI) +# ./record.sh vllm # Record vLLM only +# ./record.sh openai # Record OpenAI only + +set -euo pipefail + +RECORDER="$(dirname "$0")/../../record_cassette.py" +TOOLS="$(dirname "$0")/pipeline_tools.json" +TOOL_OUTPUTS="$(dirname "$0")/pipeline_tool_outputs.json" +OUTPUT_DIR="$(dirname "$0")" +VLLM_URL="http://localhost:8100" +VLLM_MODEL="openai/gpt-oss-20b" +OPENAI_MODEL="gpt-4o" + +TARGET="${1:-all}" + +# ═══════════════════════════════════════════════════════════════════ +# vLLM cassettes (gpt-oss-20b) +# ═══════════════════════════════════════════════════════════════════ + +if [[ "$TARGET" == "all" || "$TARGET" == "vllm" ]]; then + +echo "══════════════════════════════════════════════════════════════" +echo " Recording vLLM cassettes (gpt-oss-20b)" +echo "══════════════════════════════════════════════════════════════" + +echo "" +echo "=== 3-turn non-streaming (context retention: 'that job' resolves to job-382) ===" +printf '%s\n' \ + "You are an SRE assistant. Check the current status of ETL pipeline job-382." \ + "Now pull the error logs for that job. Use severity ERROR and max 10 entries." \ + "Based on those errors, search the runbook for troubleshooting procedures. Max 5 results." \ +| python3 "$RECORDER" \ + --turns 3 --mode responses --no-stream \ + --model "$VLLM_MODEL" --vllm "$VLLM_URL" \ + --tools "$TOOLS" --tool-choice auto \ + --tool-outputs "$TOOL_OUTPUTS" \ + --output "$OUTPUT_DIR/responses_tool_calls_3turn.yaml" + +echo "" +echo "=== 5-turn non-streaming (context retention: 'restart it' resolves to job-382) ===" +printf '%s\n' \ + "You are an SRE assistant. ETL pipeline job-382 failed overnight. What is its current status?" \ + "Pull the error logs for that failed job. Use severity ERROR and max 20 entries." \ + "Search the runbook for how to fix the issue found in those logs. Max 5 results." \ + "Run this analysis code to summarize: import json; print(json.dumps({'job': 'job-382', 'error': 'OOM', 'stage': 'transform', 'recommendation': 'increase memory to 64GB'}))" \ + "Great. Now restart it with 64 GB memory, skip completed stages, and high priority." \ +| python3 "$RECORDER" \ + --turns 5 --mode responses --no-stream \ + --model "$VLLM_MODEL" --vllm "$VLLM_URL" \ + --tools "$TOOLS" --tool-choice auto \ + --tool-outputs "$TOOL_OUTPUTS" \ + --output "$OUTPUT_DIR/responses_tool_calls_5turn.yaml" + +echo "" +echo "=== 3-turn streaming (context retention in SSE mode: 'that job' resolves) ===" +printf '%s\n' \ + "You are an SRE assistant. Check the status of pipeline job-382." \ + "Get the error logs for that job with severity FATAL and max 5 entries." \ + "Search the web for how to fix that type of error in Spark pipelines." \ +| python3 "$RECORDER" \ + --turns 3 --mode responses --stream \ + --model "$VLLM_MODEL" --vllm "$VLLM_URL" \ + --tools "$TOOLS" --tool-choice auto \ + --tool-outputs "$TOOL_OUTPUTS" \ + --output "$OUTPUT_DIR/responses_tool_calls_3turn_streaming.yaml" + +echo "" +echo "=== 3-turn branch (turn 3 diverges from turn 1, skipping turn 2's context) ===" +printf '%s\n' \ + "You are an SRE assistant. Check the current status of ETL pipeline job-382." \ + "Get the error logs for that job with severity ERROR and max 10 entries." \ + "Instead of investigating errors, search the runbook for how to increase memory limits for ETL jobs. Max 3 results." \ +| python3 "$RECORDER" \ + --turns 3 --mode responses --no-stream \ + --model "$VLLM_MODEL" --vllm "$VLLM_URL" \ + --tools "$TOOLS" --tool-choice auto \ + --tool-outputs "$TOOL_OUTPUTS" \ + --branch-from 1 --branch-turn-number 3 \ + --output "$OUTPUT_DIR/responses_tool_calls_branch.yaml" + +echo "" +echo "=== 3-turn parallel (attempts 2 tools in one turn) ===" +printf '%s\n' \ + "You are an SRE assistant. Do TWO things in parallel: 1) check the status of job-382 AND 2) search the web for Spark OOM fixes. Call BOTH tools now." \ + "Based on those results, search the runbook for memory override procedures. Max 3 results." \ + "Now restart that job with 64GB memory and high priority." \ +| python3 "$RECORDER" \ + --turns 3 --mode responses --no-stream \ + --model "$VLLM_MODEL" --vllm "$VLLM_URL" \ + --tools "$TOOLS" --tool-choice auto \ + --tool-outputs "$TOOL_OUTPUTS" \ + --output "$OUTPUT_DIR/responses_tool_calls_parallel.yaml" + +echo "" +echo "=== 3-turn tool-output-only (turn 2 has no user message, just tool output) ===" +# Turn 2 prompt is empty string → _build_tool_output_input omits user message +printf '%s\n' \ + "You are an SRE assistant. Check the current status of ETL pipeline job-382." \ + "" \ + "Based on what you found, search the runbook for how to fix it. Max 5 results." \ +| python3 "$RECORDER" \ + --turns 3 --mode responses --no-stream \ + --model "$VLLM_MODEL" --vllm "$VLLM_URL" \ + --tools "$TOOLS" --tool-choice auto \ + --tool-outputs "$TOOL_OUTPUTS" \ + --output "$OUTPUT_DIR/responses_tool_calls_tool_output_only.yaml" + +echo "" +echo "=== vLLM cassettes done ===" +ls -la "$OUTPUT_DIR"/responses_*.yaml + +fi + +# ═══════════════════════════════════════════════════════════════════ +# OpenAI cassettes (gpt-4o) +# ═══════════════════════════════════════════════════════════════════ + +if [[ "$TARGET" == "all" || "$TARGET" == "openai" ]]; then + +echo "" +echo "══════════════════════════════════════════════════════════════" +echo " Recording OpenAI cassettes (gpt-4o)" +echo "══════════════════════════════════════════════════════════════" + +echo "" +echo "=== 3-turn non-streaming (context retention: 'that job' resolves to job-382) ===" +printf '%s\n' \ + "You are an SRE assistant. Check the current status of ETL pipeline job-382." \ + "Now pull the error logs for that job. Use severity ERROR and max 10 entries." \ + "Based on those errors, search the runbook for troubleshooting procedures. Max 5 results." \ +| python3 "$RECORDER" \ + --turns 3 --mode responses --no-stream \ + --model "$OPENAI_MODEL" \ + --tools "$TOOLS" --tool-choice auto \ + --tool-outputs "$TOOL_OUTPUTS" \ + --output "$OUTPUT_DIR/openai_responses_tool_calls_3turn.yaml" + +echo "" +echo "=== 5-turn non-streaming (context retention: 'restart it' resolves to job-382) ===" +printf '%s\n' \ + "You are an SRE assistant. ETL pipeline job-382 failed overnight. What is its current status?" \ + "Pull the error logs for that failed job. Use severity ERROR and max 20 entries." \ + "Search the runbook for how to fix the issue found in those logs. Max 5 results." \ + "Run this analysis code to summarize: import json; print(json.dumps({'job': 'job-382', 'error': 'OOM', 'stage': 'transform', 'recommendation': 'increase memory to 64GB'}))" \ + "Great. Now restart it with 64 GB memory, skip completed stages, and high priority." \ +| python3 "$RECORDER" \ + --turns 5 --mode responses --no-stream \ + --model "$OPENAI_MODEL" \ + --tools "$TOOLS" --tool-choice auto \ + --tool-outputs "$TOOL_OUTPUTS" \ + --output "$OUTPUT_DIR/openai_responses_tool_calls_5turn.yaml" + +echo "" +echo "=== 3-turn streaming (context retention in SSE mode) ===" +printf '%s\n' \ + "You are an SRE assistant. Check the status of pipeline job-382." \ + "Get the error logs for that job with severity FATAL and max 5 entries." \ + "Search the web for how to fix that type of error in Spark pipelines." \ +| python3 "$RECORDER" \ + --turns 3 --mode responses --stream \ + --model "$OPENAI_MODEL" \ + --tools "$TOOLS" --tool-choice auto \ + --tool-outputs "$TOOL_OUTPUTS" \ + --output "$OUTPUT_DIR/openai_responses_tool_calls_3turn_streaming.yaml" + +echo "" +echo "=== 3-turn branch (turn 3 diverges from turn 1) ===" +printf '%s\n' \ + "You are an SRE assistant. Check the current status of ETL pipeline job-382." \ + "Get the error logs for that job with severity ERROR and max 10 entries." \ + "Instead of investigating errors, search the runbook for how to increase memory limits for ETL jobs. Max 3 results." \ +| python3 "$RECORDER" \ + --turns 3 --mode responses --no-stream \ + --model "$OPENAI_MODEL" \ + --tools "$TOOLS" --tool-choice auto \ + --tool-outputs "$TOOL_OUTPUTS" \ + --branch-from 1 --branch-turn-number 3 \ + --output "$OUTPUT_DIR/openai_responses_tool_calls_branch.yaml" + +echo "" +echo "=== 3-turn parallel (2 tools in one turn — gpt-4o reliably does this) ===" +printf '%s\n' \ + "You are an SRE assistant. Do TWO things in parallel: 1) check the status of job-382 AND 2) search the web for Spark OOM fixes. Call BOTH tools now." \ + "Based on those results, search the runbook for memory override procedures. Max 3 results." \ + "Now restart that job with 64GB memory and high priority." \ +| python3 "$RECORDER" \ + --turns 3 --mode responses --no-stream \ + --model "$OPENAI_MODEL" \ + --tools "$TOOLS" --tool-choice auto \ + --tool-outputs "$TOOL_OUTPUTS" \ + --output "$OUTPUT_DIR/openai_responses_tool_calls_parallel.yaml" + +echo "" +echo "=== 3-turn tool-output-only (turn 2 has no user message) ===" +# Turn 2 prompt is empty string → _build_tool_output_input omits user message +printf '%s\n' \ + "You are an SRE assistant. Check the current status of ETL pipeline job-382." \ + "" \ + "Based on what you found, search the runbook for how to fix it. Max 5 results." \ +| python3 "$RECORDER" \ + --turns 3 --mode responses --no-stream \ + --model "$OPENAI_MODEL" \ + --tools "$TOOLS" --tool-choice auto \ + --tool-outputs "$TOOL_OUTPUTS" \ + --output "$OUTPUT_DIR/openai_responses_tool_calls_tool_output_only.yaml" + +echo "" +echo "=== OpenAI cassettes done ===" +ls -la "$OUTPUT_DIR"/openai_*.yaml + +fi + +echo "" +echo "══════════════════════════════════════════════════════════════" +CASSETTE_COUNT=$(ls "$OUTPUT_DIR"/*.yaml 2>/dev/null | wc -l | tr -d ' ') +echo " All done. ${CASSETTE_COUNT} cassettes in ${OUTPUT_DIR}." +echo "══════════════════════════════════════════════════════════════" diff --git a/crates/agentic-core/tests/cassettes/tool_calls/multi_turn/responses_tool_calls_3turn.yaml b/crates/agentic-core/tests/cassettes/tool_calls/multi_turn/responses_tool_calls_3turn.yaml new file mode 100644 index 0000000..388373b --- /dev/null +++ b/crates/agentic-core/tests/cassettes/tool_calls/multi_turn/responses_tool_calls_3turn.yaml @@ -0,0 +1,1003 @@ +turns: +- filename: t1 + request: + body: + input: You are an SRE assistant. Check the current status of ETL pipeline job-382. + model: openai/gpt-oss-20b + store: true + stream: false + tool_choice: auto + tools: + - description: Query the pipeline orchestrator for a job's current state, metadata, + and failure info. + name: get_job_status + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier, e.g. job-382 + type: string + required: + - job_id + type: object + strict: true + type: function + - description: Fetch error logs and stack traces for a failed pipeline job. + name: get_error_logs + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier + type: string + max_entries: + description: Maximum log entries to return (default 50) + type: integer + severity: + description: Minimum severity filter + enum: + - WARN + - ERROR + - FATAL + - ALL + type: string + required: + - job_id + - severity + - max_entries + type: object + strict: true + type: function + - description: Search internal operations runbooks for troubleshooting procedures. + name: search_runbook + parameters: + additionalProperties: false + properties: + max_results: + description: Maximum results to return (default 5) + type: integer + query: + description: Natural language search query + type: string + required: + - query + - max_results + type: object + strict: true + type: function + - description: Execute a Python analysis script in a sandboxed environment for + log parsing or data inspection. + name: run_analysis + parameters: + additionalProperties: false + properties: + code: + description: Python code to execute + type: string + timeout_seconds: + description: Execution timeout (default 30) + type: integer + required: + - code + - timeout_seconds + type: object + strict: true + type: function + - description: Restart a failed pipeline job with optional configuration overrides. + name: restart_job + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier to restart + type: string + memory_override_gb: + description: Override memory limit in GB + type: integer + priority: + description: Queue priority + enum: + - low + - normal + - high + type: string + skip_completed_stages: + description: Skip stages that completed successfully (default true) + type: boolean + required: + - job_id + - memory_override_gb + - skip_completed_stages + - priority + type: object + strict: true + type: function + - description: Search the public web for technical documentation, blog posts, + and known issues. + name: web_search + parameters: + additionalProperties: false + properties: + query: + description: Search query + type: string + required: + - query + type: object + strict: true + type: function + headers: + accept: '*/*' + content-type: application/json + user-agent: python-httpx/0.28.1 + method: POST + path: /v1/responses + query_params: {} + response: + body: + background: false + created_at: 1782332947 + frequency_penalty: 0.0 + id: resp_800ce8df847a299d + incomplete_details: null + input_messages: null + instructions: null + kv_transfer_params: null + max_output_tokens: 3660 + max_tool_calls: null + metadata: null + model: openai/gpt-oss-20b + object: response + output: + - content: + - text: We need to query get_job_status. + type: reasoning_text + encrypted_content: null + id: rs_8586850ba9dcee47 + status: null + summary: [] + type: reasoning + - arguments: '{"job_id":"job-382"}' + call_id: call_80b2f4f754dbba6b + id: fc_80b2f4f754dbba6b + name: get_job_status + namespace: null + status: null + type: function_call + output_messages: null + parallel_tool_calls: true + presence_penalty: 0.0 + previous_response_id: null + prompt: null + reasoning: null + service_tier: auto + status: completed + temperature: 1.0 + text: null + tool_choice: auto + tools: + - defer_loading: null + description: Query the pipeline orchestrator for a job's current state, metadata, + and failure info. + name: get_job_status + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier, e.g. job-382 + type: string + required: + - job_id + type: object + strict: true + type: function + - defer_loading: null + description: Fetch error logs and stack traces for a failed pipeline job. + name: get_error_logs + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier + type: string + max_entries: + description: Maximum log entries to return (default 50) + type: integer + severity: + description: Minimum severity filter + enum: + - WARN + - ERROR + - FATAL + - ALL + type: string + required: + - job_id + - severity + - max_entries + type: object + strict: true + type: function + - defer_loading: null + description: Search internal operations runbooks for troubleshooting procedures. + name: search_runbook + parameters: + additionalProperties: false + properties: + max_results: + description: Maximum results to return (default 5) + type: integer + query: + description: Natural language search query + type: string + required: + - query + - max_results + type: object + strict: true + type: function + - defer_loading: null + description: Execute a Python analysis script in a sandboxed environment for + log parsing or data inspection. + name: run_analysis + parameters: + additionalProperties: false + properties: + code: + description: Python code to execute + type: string + timeout_seconds: + description: Execution timeout (default 30) + type: integer + required: + - code + - timeout_seconds + type: object + strict: true + type: function + - defer_loading: null + description: Restart a failed pipeline job with optional configuration overrides. + name: restart_job + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier to restart + type: string + memory_override_gb: + description: Override memory limit in GB + type: integer + priority: + description: Queue priority + enum: + - low + - normal + - high + type: string + skip_completed_stages: + description: Skip stages that completed successfully (default true) + type: boolean + required: + - job_id + - memory_override_gb + - skip_completed_stages + - priority + type: object + strict: true + type: function + - defer_loading: null + description: Search the public web for technical documentation, blog posts, + and known issues. + name: web_search + parameters: + additionalProperties: false + properties: + query: + description: Search query + type: string + required: + - query + type: object + strict: true + type: function + top_logprobs: null + top_p: 1.0 + truncation: disabled + usage: + input_tokens: 436 + input_tokens_details: + cached_tokens: 432 + cached_tokens_per_turn: + - 432 + input_tokens_per_turn: + - 436 + output_tokens: 36 + output_tokens_details: + output_tokens_per_turn: + - 36 + reasoning_tokens: 18 + tool_output_tokens: 0 + tool_output_tokens_per_turn: + - 0 + total_tokens: 472 + user: null + headers: + content-type: application/json + status_code: 200 +- filename: t2 + request: + body: + input: + - call_id: call_80b2f4f754dbba6b + output: '{"job_id": "job-382", "status": "failed", "error_type": "OutOfMemoryError", + "failed_at": "2024-01-15T03:22:11Z", "stage": "transform", "memory_used_gb": + 31.2, "memory_limit_gb": 32, "pipeline": "etl-daily-ingest", "owner": "data-platform-team"}' + type: function_call_output + - content: Now pull the error logs for that job. Use severity ERROR and max + 10 entries. + role: user + type: message + model: openai/gpt-oss-20b + previous_response_id: resp_800ce8df847a299d + store: true + stream: false + tool_choice: auto + tools: + - description: Query the pipeline orchestrator for a job's current state, metadata, + and failure info. + name: get_job_status + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier, e.g. job-382 + type: string + required: + - job_id + type: object + strict: true + type: function + - description: Fetch error logs and stack traces for a failed pipeline job. + name: get_error_logs + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier + type: string + max_entries: + description: Maximum log entries to return (default 50) + type: integer + severity: + description: Minimum severity filter + enum: + - WARN + - ERROR + - FATAL + - ALL + type: string + required: + - job_id + - severity + - max_entries + type: object + strict: true + type: function + - description: Search internal operations runbooks for troubleshooting procedures. + name: search_runbook + parameters: + additionalProperties: false + properties: + max_results: + description: Maximum results to return (default 5) + type: integer + query: + description: Natural language search query + type: string + required: + - query + - max_results + type: object + strict: true + type: function + - description: Execute a Python analysis script in a sandboxed environment for + log parsing or data inspection. + name: run_analysis + parameters: + additionalProperties: false + properties: + code: + description: Python code to execute + type: string + timeout_seconds: + description: Execution timeout (default 30) + type: integer + required: + - code + - timeout_seconds + type: object + strict: true + type: function + - description: Restart a failed pipeline job with optional configuration overrides. + name: restart_job + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier to restart + type: string + memory_override_gb: + description: Override memory limit in GB + type: integer + priority: + description: Queue priority + enum: + - low + - normal + - high + type: string + skip_completed_stages: + description: Skip stages that completed successfully (default true) + type: boolean + required: + - job_id + - memory_override_gb + - skip_completed_stages + - priority + type: object + strict: true + type: function + - description: Search the public web for technical documentation, blog posts, + and known issues. + name: web_search + parameters: + additionalProperties: false + properties: + query: + description: Search query + type: string + required: + - query + type: object + strict: true + type: function + headers: + accept: '*/*' + content-type: application/json + user-agent: python-httpx/0.28.1 + method: POST + path: /v1/responses + query_params: {} + response: + body: + background: false + created_at: 1782332947 + frequency_penalty: 0.0 + id: resp_96a3170ed78e87b3 + incomplete_details: null + input_messages: null + instructions: null + kv_transfer_params: null + max_output_tokens: 3499 + max_tool_calls: null + metadata: null + model: openai/gpt-oss-20b + object: response + output: + - content: + - text: We need get_error_logs. + type: reasoning_text + encrypted_content: null + id: rs_b35e1283a0afe646 + status: null + summary: [] + type: reasoning + - arguments: '{"job_id":"job-382","severity":"ERROR","max_entries":10}' + call_id: call_a4919ceec5777dd0 + id: fc_a4919ceec5777dd0 + name: get_error_logs + namespace: null + status: null + type: function_call + output_messages: null + parallel_tool_calls: true + presence_penalty: 0.0 + previous_response_id: resp_800ce8df847a299d + prompt: null + reasoning: null + service_tier: auto + status: completed + temperature: 1.0 + text: null + tool_choice: auto + tools: + - defer_loading: null + description: Query the pipeline orchestrator for a job's current state, metadata, + and failure info. + name: get_job_status + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier, e.g. job-382 + type: string + required: + - job_id + type: object + strict: true + type: function + - defer_loading: null + description: Fetch error logs and stack traces for a failed pipeline job. + name: get_error_logs + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier + type: string + max_entries: + description: Maximum log entries to return (default 50) + type: integer + severity: + description: Minimum severity filter + enum: + - WARN + - ERROR + - FATAL + - ALL + type: string + required: + - job_id + - severity + - max_entries + type: object + strict: true + type: function + - defer_loading: null + description: Search internal operations runbooks for troubleshooting procedures. + name: search_runbook + parameters: + additionalProperties: false + properties: + max_results: + description: Maximum results to return (default 5) + type: integer + query: + description: Natural language search query + type: string + required: + - query + - max_results + type: object + strict: true + type: function + - defer_loading: null + description: Execute a Python analysis script in a sandboxed environment for + log parsing or data inspection. + name: run_analysis + parameters: + additionalProperties: false + properties: + code: + description: Python code to execute + type: string + timeout_seconds: + description: Execution timeout (default 30) + type: integer + required: + - code + - timeout_seconds + type: object + strict: true + type: function + - defer_loading: null + description: Restart a failed pipeline job with optional configuration overrides. + name: restart_job + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier to restart + type: string + memory_override_gb: + description: Override memory limit in GB + type: integer + priority: + description: Queue priority + enum: + - low + - normal + - high + type: string + skip_completed_stages: + description: Skip stages that completed successfully (default true) + type: boolean + required: + - job_id + - memory_override_gb + - skip_completed_stages + - priority + type: object + strict: true + type: function + - defer_loading: null + description: Search the public web for technical documentation, blog posts, + and known issues. + name: web_search + parameters: + additionalProperties: false + properties: + query: + description: Search query + type: string + required: + - query + type: object + strict: true + type: function + top_logprobs: null + top_p: 1.0 + truncation: disabled + usage: + input_tokens: 597 + input_tokens_details: + cached_tokens: 448 + cached_tokens_per_turn: + - 448 + input_tokens_per_turn: + - 597 + output_tokens: 43 + output_tokens_details: + output_tokens_per_turn: + - 43 + reasoning_tokens: 25 + tool_output_tokens: 0 + tool_output_tokens_per_turn: + - 0 + total_tokens: 640 + user: null + headers: + content-type: application/json + status_code: 200 +- filename: t3 + request: + body: + input: + - call_id: call_a4919ceec5777dd0 + output: '[{"timestamp": "2024-01-15T03:22:09Z", "severity": "ERROR", "message": + "OutOfMemoryError at stage ''transform'': heap space exhausted (31.2GB/32GB)", + "stack_trace": "java.lang.OutOfMemoryError: Java heap space\n at org.apache.spark.memory.TaskMemoryManager.allocatePage"}, + {"timestamp": "2024-01-15T03:22:10Z", "severity": "ERROR", "message": "Task + failed: transform-partition-47 OOM after processing 2.1M rows", "stack_trace": + "org.apache.spark.SparkException: Task failed while writing rows"}, {"timestamp": + "2024-01-15T03:22:11Z", "severity": "ERROR", "message": "Job job-382 marked + FAILED after 3 consecutive OOM errors in transform stage", "stack_trace": + null}]' + type: function_call_output + - content: Based on those errors, search the runbook for troubleshooting procedures. + Max 5 results. + role: user + type: message + model: openai/gpt-oss-20b + previous_response_id: resp_96a3170ed78e87b3 + store: true + stream: false + tool_choice: auto + tools: + - description: Query the pipeline orchestrator for a job's current state, metadata, + and failure info. + name: get_job_status + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier, e.g. job-382 + type: string + required: + - job_id + type: object + strict: true + type: function + - description: Fetch error logs and stack traces for a failed pipeline job. + name: get_error_logs + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier + type: string + max_entries: + description: Maximum log entries to return (default 50) + type: integer + severity: + description: Minimum severity filter + enum: + - WARN + - ERROR + - FATAL + - ALL + type: string + required: + - job_id + - severity + - max_entries + type: object + strict: true + type: function + - description: Search internal operations runbooks for troubleshooting procedures. + name: search_runbook + parameters: + additionalProperties: false + properties: + max_results: + description: Maximum results to return (default 5) + type: integer + query: + description: Natural language search query + type: string + required: + - query + - max_results + type: object + strict: true + type: function + - description: Execute a Python analysis script in a sandboxed environment for + log parsing or data inspection. + name: run_analysis + parameters: + additionalProperties: false + properties: + code: + description: Python code to execute + type: string + timeout_seconds: + description: Execution timeout (default 30) + type: integer + required: + - code + - timeout_seconds + type: object + strict: true + type: function + - description: Restart a failed pipeline job with optional configuration overrides. + name: restart_job + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier to restart + type: string + memory_override_gb: + description: Override memory limit in GB + type: integer + priority: + description: Queue priority + enum: + - low + - normal + - high + type: string + skip_completed_stages: + description: Skip stages that completed successfully (default true) + type: boolean + required: + - job_id + - memory_override_gb + - skip_completed_stages + - priority + type: object + strict: true + type: function + - description: Search the public web for technical documentation, blog posts, + and known issues. + name: web_search + parameters: + additionalProperties: false + properties: + query: + description: Search query + type: string + required: + - query + type: object + strict: true + type: function + headers: + accept: '*/*' + content-type: application/json + user-agent: python-httpx/0.28.1 + method: POST + path: /v1/responses + query_params: {} + response: + body: + background: false + created_at: 1782332947 + frequency_penalty: 0.0 + id: resp_a89ba667cd7e86df + incomplete_details: null + input_messages: null + instructions: null + kv_transfer_params: null + max_output_tokens: 3225 + max_tool_calls: null + metadata: null + model: openai/gpt-oss-20b + object: response + output: + - content: + - text: Search runbook. + type: reasoning_text + encrypted_content: null + id: rs_81269817d22797d0 + status: null + summary: [] + type: reasoning + - arguments: '{"query":"OutOfMemoryError ETL transform stage Spark restart job + memory limit", "max_results":5}' + call_id: call_bb883df29ae253d1 + id: fc_bb883df29ae253d1 + name: search_runbook + namespace: null + status: null + type: function_call + output_messages: null + parallel_tool_calls: true + presence_penalty: 0.0 + previous_response_id: resp_96a3170ed78e87b3 + prompt: null + reasoning: null + service_tier: auto + status: completed + temperature: 1.0 + text: null + tool_choice: auto + tools: + - defer_loading: null + description: Query the pipeline orchestrator for a job's current state, metadata, + and failure info. + name: get_job_status + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier, e.g. job-382 + type: string + required: + - job_id + type: object + strict: true + type: function + - defer_loading: null + description: Fetch error logs and stack traces for a failed pipeline job. + name: get_error_logs + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier + type: string + max_entries: + description: Maximum log entries to return (default 50) + type: integer + severity: + description: Minimum severity filter + enum: + - WARN + - ERROR + - FATAL + - ALL + type: string + required: + - job_id + - severity + - max_entries + type: object + strict: true + type: function + - defer_loading: null + description: Search internal operations runbooks for troubleshooting procedures. + name: search_runbook + parameters: + additionalProperties: false + properties: + max_results: + description: Maximum results to return (default 5) + type: integer + query: + description: Natural language search query + type: string + required: + - query + - max_results + type: object + strict: true + type: function + - defer_loading: null + description: Execute a Python analysis script in a sandboxed environment for + log parsing or data inspection. + name: run_analysis + parameters: + additionalProperties: false + properties: + code: + description: Python code to execute + type: string + timeout_seconds: + description: Execution timeout (default 30) + type: integer + required: + - code + - timeout_seconds + type: object + strict: true + type: function + - defer_loading: null + description: Restart a failed pipeline job with optional configuration overrides. + name: restart_job + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier to restart + type: string + memory_override_gb: + description: Override memory limit in GB + type: integer + priority: + description: Queue priority + enum: + - low + - normal + - high + type: string + skip_completed_stages: + description: Skip stages that completed successfully (default true) + type: boolean + required: + - job_id + - memory_override_gb + - skip_completed_stages + - priority + type: object + strict: true + type: function + - defer_loading: null + description: Search the public web for technical documentation, blog posts, + and known issues. + name: web_search + parameters: + additionalProperties: false + properties: + query: + description: Search query + type: string + required: + - query + type: object + strict: true + type: function + top_logprobs: null + top_p: 1.0 + truncation: disabled + usage: + input_tokens: 871 + input_tokens_details: + cached_tokens: 608 + cached_tokens_per_turn: + - 608 + input_tokens_per_turn: + - 871 + output_tokens: 47 + output_tokens_details: + output_tokens_per_turn: + - 47 + reasoning_tokens: 29 + tool_output_tokens: 0 + tool_output_tokens_per_turn: + - 0 + total_tokens: 918 + user: null + headers: + content-type: application/json + status_code: 200 diff --git a/crates/agentic-core/tests/cassettes/tool_calls/multi_turn/responses_tool_calls_3turn_streaming.yaml b/crates/agentic-core/tests/cassettes/tool_calls/multi_turn/responses_tool_calls_3turn_streaming.yaml new file mode 100644 index 0000000..ea9d95f --- /dev/null +++ b/crates/agentic-core/tests/cassettes/tool_calls/multi_turn/responses_tool_calls_3turn_streaming.yaml @@ -0,0 +1,1868 @@ +turns: +- filename: t1 + request: + body: + input: You are an SRE assistant. Check the status of pipeline job-382. + model: openai/gpt-oss-20b + store: true + stream: true + tool_choice: auto + tools: + - description: Query the pipeline orchestrator for a job's current state, metadata, + and failure info. + name: get_job_status + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier, e.g. job-382 + type: string + required: + - job_id + type: object + strict: true + type: function + - description: Fetch error logs and stack traces for a failed pipeline job. + name: get_error_logs + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier + type: string + max_entries: + description: Maximum log entries to return (default 50) + type: integer + severity: + description: Minimum severity filter + enum: + - WARN + - ERROR + - FATAL + - ALL + type: string + required: + - job_id + - severity + - max_entries + type: object + strict: true + type: function + - description: Search internal operations runbooks for troubleshooting procedures. + name: search_runbook + parameters: + additionalProperties: false + properties: + max_results: + description: Maximum results to return (default 5) + type: integer + query: + description: Natural language search query + type: string + required: + - query + - max_results + type: object + strict: true + type: function + - description: Execute a Python analysis script in a sandboxed environment for + log parsing or data inspection. + name: run_analysis + parameters: + additionalProperties: false + properties: + code: + description: Python code to execute + type: string + timeout_seconds: + description: Execution timeout (default 30) + type: integer + required: + - code + - timeout_seconds + type: object + strict: true + type: function + - description: Restart a failed pipeline job with optional configuration overrides. + name: restart_job + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier to restart + type: string + memory_override_gb: + description: Override memory limit in GB + type: integer + priority: + description: Queue priority + enum: + - low + - normal + - high + type: string + skip_completed_stages: + description: Skip stages that completed successfully (default true) + type: boolean + required: + - job_id + - memory_override_gb + - skip_completed_stages + - priority + type: object + strict: true + type: function + - description: Search the public web for technical documentation, blog posts, + and known issues. + name: web_search + parameters: + additionalProperties: false + properties: + query: + description: Search query + type: string + required: + - query + type: object + strict: true + type: function + headers: + accept: '*/*' + content-type: application/json + user-agent: python-httpx/0.28.1 + method: POST + path: /v1/responses + query_params: {} + response: + headers: + content-type: text/event-stream; charset=utf-8 + sse: + - 'event: response.created + + ' + - 'data: {"response":{"id":"resp_a2613187feeed7eb","created_at":1782332952,"incomplete_details":null,"instructions":null,"metadata":null,"model":"openai/gpt-oss-20b","object":"response","output":[],"parallel_tool_calls":true,"temperature":1.0,"tool_choice":"auto","tools":[{"name":"get_job_status","parameters":{"type":"object","properties":{"job_id":{"type":"string","description":"Pipeline + job identifier, e.g. job-382"}},"required":["job_id"],"additionalProperties":false},"strict":true,"type":"function","defer_loading":null,"description":"Query + the pipeline orchestrator for a job''s current state, metadata, and failure + info."},{"name":"get_error_logs","parameters":{"type":"object","properties":{"job_id":{"type":"string","description":"Pipeline + job identifier"},"severity":{"type":"string","enum":["WARN","ERROR","FATAL","ALL"],"description":"Minimum + severity filter"},"max_entries":{"type":"integer","description":"Maximum log + entries to return (default 50)"}},"required":["job_id","severity","max_entries"],"additionalProperties":false},"strict":true,"type":"function","defer_loading":null,"description":"Fetch + error logs and stack traces for a failed pipeline job."},{"name":"search_runbook","parameters":{"type":"object","properties":{"query":{"type":"string","description":"Natural + language search query"},"max_results":{"type":"integer","description":"Maximum + results to return (default 5)"}},"required":["query","max_results"],"additionalProperties":false},"strict":true,"type":"function","defer_loading":null,"description":"Search + internal operations runbooks for troubleshooting procedures."},{"name":"run_analysis","parameters":{"type":"object","properties":{"code":{"type":"string","description":"Python + code to execute"},"timeout_seconds":{"type":"integer","description":"Execution + timeout (default 30)"}},"required":["code","timeout_seconds"],"additionalProperties":false},"strict":true,"type":"function","defer_loading":null,"description":"Execute + a Python analysis script in a sandboxed environment for log parsing or data + inspection."},{"name":"restart_job","parameters":{"type":"object","properties":{"job_id":{"type":"string","description":"Pipeline + job identifier to restart"},"memory_override_gb":{"type":"integer","description":"Override + memory limit in GB"},"skip_completed_stages":{"type":"boolean","description":"Skip + stages that completed successfully (default true)"},"priority":{"type":"string","enum":["low","normal","high"],"description":"Queue + priority"}},"required":["job_id","memory_override_gb","skip_completed_stages","priority"],"additionalProperties":false},"strict":true,"type":"function","defer_loading":null,"description":"Restart + a failed pipeline job with optional configuration overrides."},{"name":"web_search","parameters":{"type":"object","properties":{"query":{"type":"string","description":"Search + query"}},"required":["query"],"additionalProperties":false},"strict":true,"type":"function","defer_loading":null,"description":"Search + the public web for technical documentation, blog posts, and known issues."}],"top_p":1.0,"background":false,"max_output_tokens":3663,"max_tool_calls":null,"previous_response_id":null,"prompt":null,"reasoning":null,"service_tier":"auto","status":"in_progress","text":null,"top_logprobs":null,"truncation":"disabled","usage":null,"user":null,"presence_penalty":0.0,"frequency_penalty":0.0,"kv_transfer_params":null,"input_messages":null,"output_messages":null},"sequence_number":0,"type":"response.created"} + + ' + - ' + + ' + - 'event: response.in_progress + + ' + - 'data: {"response":{"id":"resp_a2613187feeed7eb","created_at":1782332952,"incomplete_details":null,"instructions":null,"metadata":null,"model":"openai/gpt-oss-20b","object":"response","output":[],"parallel_tool_calls":true,"temperature":1.0,"tool_choice":"auto","tools":[{"name":"get_job_status","parameters":{"type":"object","properties":{"job_id":{"type":"string","description":"Pipeline + job identifier, e.g. job-382"}},"required":["job_id"],"additionalProperties":false},"strict":true,"type":"function","defer_loading":null,"description":"Query + the pipeline orchestrator for a job''s current state, metadata, and failure + info."},{"name":"get_error_logs","parameters":{"type":"object","properties":{"job_id":{"type":"string","description":"Pipeline + job identifier"},"severity":{"type":"string","enum":["WARN","ERROR","FATAL","ALL"],"description":"Minimum + severity filter"},"max_entries":{"type":"integer","description":"Maximum log + entries to return (default 50)"}},"required":["job_id","severity","max_entries"],"additionalProperties":false},"strict":true,"type":"function","defer_loading":null,"description":"Fetch + error logs and stack traces for a failed pipeline job."},{"name":"search_runbook","parameters":{"type":"object","properties":{"query":{"type":"string","description":"Natural + language search query"},"max_results":{"type":"integer","description":"Maximum + results to return (default 5)"}},"required":["query","max_results"],"additionalProperties":false},"strict":true,"type":"function","defer_loading":null,"description":"Search + internal operations runbooks for troubleshooting procedures."},{"name":"run_analysis","parameters":{"type":"object","properties":{"code":{"type":"string","description":"Python + code to execute"},"timeout_seconds":{"type":"integer","description":"Execution + timeout (default 30)"}},"required":["code","timeout_seconds"],"additionalProperties":false},"strict":true,"type":"function","defer_loading":null,"description":"Execute + a Python analysis script in a sandboxed environment for log parsing or data + inspection."},{"name":"restart_job","parameters":{"type":"object","properties":{"job_id":{"type":"string","description":"Pipeline + job identifier to restart"},"memory_override_gb":{"type":"integer","description":"Override + memory limit in GB"},"skip_completed_stages":{"type":"boolean","description":"Skip + stages that completed successfully (default true)"},"priority":{"type":"string","enum":["low","normal","high"],"description":"Queue + priority"}},"required":["job_id","memory_override_gb","skip_completed_stages","priority"],"additionalProperties":false},"strict":true,"type":"function","defer_loading":null,"description":"Restart + a failed pipeline job with optional configuration overrides."},{"name":"web_search","parameters":{"type":"object","properties":{"query":{"type":"string","description":"Search + query"}},"required":["query"],"additionalProperties":false},"strict":true,"type":"function","defer_loading":null,"description":"Search + the public web for technical documentation, blog posts, and known issues."}],"top_p":1.0,"background":false,"max_output_tokens":3663,"max_tool_calls":null,"previous_response_id":null,"prompt":null,"reasoning":null,"service_tier":"auto","status":"in_progress","text":null,"top_logprobs":null,"truncation":"disabled","usage":null,"user":null,"presence_penalty":0.0,"frequency_penalty":0.0,"kv_transfer_params":null,"input_messages":null,"output_messages":null},"sequence_number":1,"type":"response.in_progress"} + + ' + - ' + + ' + - 'event: response.output_item.added + + ' + - 'data: {"item":{"id":"msg_a8e6149a1e573d1b","summary":[],"type":"reasoning","content":null,"encrypted_content":null,"status":"in_progress"},"output_index":0,"sequence_number":2,"type":"response.output_item.added"} + + ' + - ' + + ' + - 'event: response.reasoning_part.added + + ' + - 'data: {"content_index":0,"item_id":"msg_a8e6149a1e573d1b","output_index":0,"part":{"text":"","type":"reasoning_text"},"sequence_number":3,"type":"response.reasoning_part.added"} + + ' + - ' + + ' + - 'event: response.reasoning_text.delta + + ' + - 'data: {"content_index":0,"delta":"The","item_id":"msg_a8e6149a1e573d1b","output_index":0,"sequence_number":4,"type":"response.reasoning_text.delta"} + + ' + - ' + + ' + - 'event: response.reasoning_text.delta + + ' + - 'data: {"content_index":0,"delta":" user","item_id":"msg_a8e6149a1e573d1b","output_index":0,"sequence_number":5,"type":"response.reasoning_text.delta"} + + ' + - ' + + ' + - 'event: response.reasoning_text.delta + + ' + - 'data: {"content_index":0,"delta":" wants","item_id":"msg_a8e6149a1e573d1b","output_index":0,"sequence_number":6,"type":"response.reasoning_text.delta"} + + ' + - ' + + ' + - 'event: response.reasoning_text.delta + + ' + - 'data: {"content_index":0,"delta":" to","item_id":"msg_a8e6149a1e573d1b","output_index":0,"sequence_number":7,"type":"response.reasoning_text.delta"} + + ' + - ' + + ' + - 'event: response.reasoning_text.delta + + ' + - 'data: {"content_index":0,"delta":" check","item_id":"msg_a8e6149a1e573d1b","output_index":0,"sequence_number":8,"type":"response.reasoning_text.delta"} + + ' + - ' + + ' + - 'event: response.reasoning_text.delta + + ' + - 'data: {"content_index":0,"delta":" status","item_id":"msg_a8e6149a1e573d1b","output_index":0,"sequence_number":9,"type":"response.reasoning_text.delta"} + + ' + - ' + + ' + - 'event: response.reasoning_text.delta + + ' + - 'data: {"content_index":0,"delta":" of","item_id":"msg_a8e6149a1e573d1b","output_index":0,"sequence_number":10,"type":"response.reasoning_text.delta"} + + ' + - ' + + ' + - 'event: response.reasoning_text.delta + + ' + - 'data: {"content_index":0,"delta":" job","item_id":"msg_a8e6149a1e573d1b","output_index":0,"sequence_number":11,"type":"response.reasoning_text.delta"} + + ' + - ' + + ' + - 'event: response.reasoning_text.delta + + ' + - 'data: {"content_index":0,"delta":"-","item_id":"msg_a8e6149a1e573d1b","output_index":0,"sequence_number":12,"type":"response.reasoning_text.delta"} + + ' + - ' + + ' + - 'event: response.reasoning_text.delta + + ' + - 'data: {"content_index":0,"delta":"382","item_id":"msg_a8e6149a1e573d1b","output_index":0,"sequence_number":13,"type":"response.reasoning_text.delta"} + + ' + - ' + + ' + - 'event: response.reasoning_text.delta + + ' + - 'data: {"content_index":0,"delta":".","item_id":"msg_a8e6149a1e573d1b","output_index":0,"sequence_number":14,"type":"response.reasoning_text.delta"} + + ' + - ' + + ' + - 'event: response.reasoning_text.delta + + ' + - 'data: {"content_index":0,"delta":" We","item_id":"msg_a8e6149a1e573d1b","output_index":0,"sequence_number":15,"type":"response.reasoning_text.delta"} + + ' + - ' + + ' + - 'event: response.reasoning_text.delta + + ' + - 'data: {"content_index":0,"delta":" should","item_id":"msg_a8e6149a1e573d1b","output_index":0,"sequence_number":16,"type":"response.reasoning_text.delta"} + + ' + - ' + + ' + - 'event: response.reasoning_text.delta + + ' + - 'data: {"content_index":0,"delta":" call","item_id":"msg_a8e6149a1e573d1b","output_index":0,"sequence_number":17,"type":"response.reasoning_text.delta"} + + ' + - ' + + ' + - 'event: response.reasoning_text.delta + + ' + - 'data: {"content_index":0,"delta":" get","item_id":"msg_a8e6149a1e573d1b","output_index":0,"sequence_number":18,"type":"response.reasoning_text.delta"} + + ' + - ' + + ' + - 'event: response.reasoning_text.delta + + ' + - 'data: {"content_index":0,"delta":"_job","item_id":"msg_a8e6149a1e573d1b","output_index":0,"sequence_number":19,"type":"response.reasoning_text.delta"} + + ' + - ' + + ' + - 'event: response.reasoning_text.delta + + ' + - 'data: {"content_index":0,"delta":"_status","item_id":"msg_a8e6149a1e573d1b","output_index":0,"sequence_number":20,"type":"response.reasoning_text.delta"} + + ' + - ' + + ' + - 'event: response.reasoning_text.delta + + ' + - 'data: {"content_index":0,"delta":".","item_id":"msg_a8e6149a1e573d1b","output_index":0,"sequence_number":21,"type":"response.reasoning_text.delta"} + + ' + - ' + + ' + - 'event: response.reasoning_text.done + + ' + - 'data: {"content_index":0,"item_id":"msg_a8e6149a1e573d1b","output_index":0,"sequence_number":22,"text":"The + user wants to check status of job-382. We should call get_job_status.","type":"response.reasoning_text.done"} + + ' + - ' + + ' + - 'event: response.reasoning_part.done + + ' + - 'data: {"content_index":0,"item_id":"msg_a8e6149a1e573d1b","output_index":0,"part":{"text":"The + user wants to check status of job-382. We should call get_job_status.","type":"reasoning_text"},"sequence_number":23,"type":"response.reasoning_part.done"} + + ' + - ' + + ' + - 'event: response.output_item.done + + ' + - 'data: {"item":{"id":"msg_a8e6149a1e573d1b","summary":[],"type":"reasoning","content":[{"text":"The + user wants to check status of job-382. We should call get_job_status.","type":"reasoning_text"}],"encrypted_content":null,"status":"completed"},"output_index":0,"sequence_number":24,"type":"response.output_item.done"} + + ' + - ' + + ' + - 'event: response.output_item.added + + ' + - 'data: {"item":{"arguments":"","call_id":"call_8febbe99b4e4e607","name":"get_job_status","type":"function_call","id":"fc_9d06e8f43a16a23b","namespace":null,"status":"in_progress"},"output_index":1,"sequence_number":25,"type":"response.output_item.added"} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"delta":"{\n","item_id":"fc_9d06e8f43a16a23b","output_index":1,"sequence_number":26,"type":"response.function_call_arguments.delta"} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"delta":" ","item_id":"fc_9d06e8f43a16a23b","output_index":1,"sequence_number":27,"type":"response.function_call_arguments.delta"} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"delta":" \"","item_id":"fc_9d06e8f43a16a23b","output_index":1,"sequence_number":28,"type":"response.function_call_arguments.delta"} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"delta":"job","item_id":"fc_9d06e8f43a16a23b","output_index":1,"sequence_number":29,"type":"response.function_call_arguments.delta"} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"delta":"_id","item_id":"fc_9d06e8f43a16a23b","output_index":1,"sequence_number":30,"type":"response.function_call_arguments.delta"} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"delta":"\":","item_id":"fc_9d06e8f43a16a23b","output_index":1,"sequence_number":31,"type":"response.function_call_arguments.delta"} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"delta":" \"","item_id":"fc_9d06e8f43a16a23b","output_index":1,"sequence_number":32,"type":"response.function_call_arguments.delta"} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"delta":"job","item_id":"fc_9d06e8f43a16a23b","output_index":1,"sequence_number":33,"type":"response.function_call_arguments.delta"} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"delta":"-","item_id":"fc_9d06e8f43a16a23b","output_index":1,"sequence_number":34,"type":"response.function_call_arguments.delta"} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"delta":"382","item_id":"fc_9d06e8f43a16a23b","output_index":1,"sequence_number":35,"type":"response.function_call_arguments.delta"} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"delta":"\"\n","item_id":"fc_9d06e8f43a16a23b","output_index":1,"sequence_number":36,"type":"response.function_call_arguments.delta"} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"delta":"}","item_id":"fc_9d06e8f43a16a23b","output_index":1,"sequence_number":37,"type":"response.function_call_arguments.delta"} + + ' + - ' + + ' + - 'event: response.function_call_arguments.done + + ' + - 'data: {"arguments":"{\n \"job_id\": \"job-382\"\n}","item_id":"fc_9d06e8f43a16a23b","name":"get_job_status","output_index":1,"sequence_number":38,"type":"response.function_call_arguments.done"} + + ' + - ' + + ' + - 'event: response.output_item.done + + ' + - 'data: {"item":{"arguments":"{\n \"job_id\": \"job-382\"\n}","call_id":"call_8febbe99b4e4e607","name":"get_job_status","type":"function_call","id":null,"namespace":null,"status":"completed","item_id":"fc_9d06e8f43a16a23b","output_index":1,"sequence_number":-1},"output_index":1,"sequence_number":39,"type":"response.output_item.done"} + + ' + - ' + + ' + - 'event: response.completed + + ' + - 'data: {"response":{"id":"resp_a2613187feeed7eb","created_at":1782332952,"incomplete_details":null,"instructions":null,"metadata":null,"model":"openai/gpt-oss-20b","object":"response","output":[{"id":"rs_9c1f68eb64edc98c","summary":[],"type":"reasoning","content":[{"text":"The + user wants to check status of job-382. We should call get_job_status.","type":"reasoning_text"}],"encrypted_content":null,"status":null},{"arguments":"{\n \"job_id\": + \"job-382\"\n}","call_id":"call_b6631e5373a38e91","name":"get_job_status","type":"function_call","id":"fc_b6631e5373a38e91","namespace":null,"status":null}],"parallel_tool_calls":true,"temperature":1.0,"tool_choice":"auto","tools":[{"name":"get_job_status","parameters":{"type":"object","properties":{"job_id":{"type":"string","description":"Pipeline + job identifier, e.g. job-382"}},"required":["job_id"],"additionalProperties":false},"strict":true,"type":"function","defer_loading":null,"description":"Query + the pipeline orchestrator for a job''s current state, metadata, and failure + info."},{"name":"get_error_logs","parameters":{"type":"object","properties":{"job_id":{"type":"string","description":"Pipeline + job identifier"},"severity":{"type":"string","enum":["WARN","ERROR","FATAL","ALL"],"description":"Minimum + severity filter"},"max_entries":{"type":"integer","description":"Maximum log + entries to return (default 50)"}},"required":["job_id","severity","max_entries"],"additionalProperties":false},"strict":true,"type":"function","defer_loading":null,"description":"Fetch + error logs and stack traces for a failed pipeline job."},{"name":"search_runbook","parameters":{"type":"object","properties":{"query":{"type":"string","description":"Natural + language search query"},"max_results":{"type":"integer","description":"Maximum + results to return (default 5)"}},"required":["query","max_results"],"additionalProperties":false},"strict":true,"type":"function","defer_loading":null,"description":"Search + internal operations runbooks for troubleshooting procedures."},{"name":"run_analysis","parameters":{"type":"object","properties":{"code":{"type":"string","description":"Python + code to execute"},"timeout_seconds":{"type":"integer","description":"Execution + timeout (default 30)"}},"required":["code","timeout_seconds"],"additionalProperties":false},"strict":true,"type":"function","defer_loading":null,"description":"Execute + a Python analysis script in a sandboxed environment for log parsing or data + inspection."},{"name":"restart_job","parameters":{"type":"object","properties":{"job_id":{"type":"string","description":"Pipeline + job identifier to restart"},"memory_override_gb":{"type":"integer","description":"Override + memory limit in GB"},"skip_completed_stages":{"type":"boolean","description":"Skip + stages that completed successfully (default true)"},"priority":{"type":"string","enum":["low","normal","high"],"description":"Queue + priority"}},"required":["job_id","memory_override_gb","skip_completed_stages","priority"],"additionalProperties":false},"strict":true,"type":"function","defer_loading":null,"description":"Restart + a failed pipeline job with optional configuration overrides."},{"name":"web_search","parameters":{"type":"object","properties":{"query":{"type":"string","description":"Search + query"}},"required":["query"],"additionalProperties":false},"strict":true,"type":"function","defer_loading":null,"description":"Search + the public web for technical documentation, blog posts, and known issues."}],"top_p":1.0,"background":false,"max_output_tokens":3663,"max_tool_calls":null,"previous_response_id":null,"prompt":null,"reasoning":null,"service_tier":"auto","status":"completed","text":null,"top_logprobs":null,"truncation":"disabled","usage":{"input_tokens":433,"input_tokens_details":{"cached_tokens":432,"input_tokens_per_turn":[433],"cached_tokens_per_turn":[432]},"output_tokens":50,"output_tokens_details":{"reasoning_tokens":32,"tool_output_tokens":0,"output_tokens_per_turn":[50],"tool_output_tokens_per_turn":[0]},"total_tokens":483},"user":null,"presence_penalty":0.0,"frequency_penalty":0.0,"kv_transfer_params":null,"input_messages":null,"output_messages":null},"sequence_number":40,"type":"response.completed"} + + ' + - ' + + ' + status_code: 200 +- filename: t2 + request: + body: + input: + - call_id: call_b6631e5373a38e91 + output: '{"job_id": "job-382", "status": "failed", "error_type": "OutOfMemoryError", + "failed_at": "2024-01-15T03:22:11Z", "stage": "transform", "memory_used_gb": + 31.2, "memory_limit_gb": 32, "pipeline": "etl-daily-ingest", "owner": "data-platform-team"}' + type: function_call_output + - content: Get the error logs for that job with severity FATAL and max 5 entries. + role: user + type: message + model: openai/gpt-oss-20b + previous_response_id: resp_a2613187feeed7eb + store: true + stream: true + tool_choice: auto + tools: + - description: Query the pipeline orchestrator for a job's current state, metadata, + and failure info. + name: get_job_status + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier, e.g. job-382 + type: string + required: + - job_id + type: object + strict: true + type: function + - description: Fetch error logs and stack traces for a failed pipeline job. + name: get_error_logs + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier + type: string + max_entries: + description: Maximum log entries to return (default 50) + type: integer + severity: + description: Minimum severity filter + enum: + - WARN + - ERROR + - FATAL + - ALL + type: string + required: + - job_id + - severity + - max_entries + type: object + strict: true + type: function + - description: Search internal operations runbooks for troubleshooting procedures. + name: search_runbook + parameters: + additionalProperties: false + properties: + max_results: + description: Maximum results to return (default 5) + type: integer + query: + description: Natural language search query + type: string + required: + - query + - max_results + type: object + strict: true + type: function + - description: Execute a Python analysis script in a sandboxed environment for + log parsing or data inspection. + name: run_analysis + parameters: + additionalProperties: false + properties: + code: + description: Python code to execute + type: string + timeout_seconds: + description: Execution timeout (default 30) + type: integer + required: + - code + - timeout_seconds + type: object + strict: true + type: function + - description: Restart a failed pipeline job with optional configuration overrides. + name: restart_job + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier to restart + type: string + memory_override_gb: + description: Override memory limit in GB + type: integer + priority: + description: Queue priority + enum: + - low + - normal + - high + type: string + skip_completed_stages: + description: Skip stages that completed successfully (default true) + type: boolean + required: + - job_id + - memory_override_gb + - skip_completed_stages + - priority + type: object + strict: true + type: function + - description: Search the public web for technical documentation, blog posts, + and known issues. + name: web_search + parameters: + additionalProperties: false + properties: + query: + description: Search query + type: string + required: + - query + type: object + strict: true + type: function + headers: + accept: '*/*' + content-type: application/json + user-agent: python-httpx/0.28.1 + method: POST + path: /v1/responses + query_params: {} + response: + headers: + content-type: text/event-stream; charset=utf-8 + sse: + - 'event: response.created + + ' + - 'data: {"response":{"id":"resp_86f66db434f30025","created_at":1782332953,"incomplete_details":null,"instructions":null,"metadata":null,"model":"openai/gpt-oss-20b","object":"response","output":[],"parallel_tool_calls":true,"temperature":1.0,"tool_choice":"auto","tools":[{"name":"get_job_status","parameters":{"type":"object","properties":{"job_id":{"type":"string","description":"Pipeline + job identifier, e.g. job-382"}},"required":["job_id"],"additionalProperties":false},"strict":true,"type":"function","defer_loading":null,"description":"Query + the pipeline orchestrator for a job''s current state, metadata, and failure + info."},{"name":"get_error_logs","parameters":{"type":"object","properties":{"job_id":{"type":"string","description":"Pipeline + job identifier"},"severity":{"type":"string","enum":["WARN","ERROR","FATAL","ALL"],"description":"Minimum + severity filter"},"max_entries":{"type":"integer","description":"Maximum log + entries to return (default 50)"}},"required":["job_id","severity","max_entries"],"additionalProperties":false},"strict":true,"type":"function","defer_loading":null,"description":"Fetch + error logs and stack traces for a failed pipeline job."},{"name":"search_runbook","parameters":{"type":"object","properties":{"query":{"type":"string","description":"Natural + language search query"},"max_results":{"type":"integer","description":"Maximum + results to return (default 5)"}},"required":["query","max_results"],"additionalProperties":false},"strict":true,"type":"function","defer_loading":null,"description":"Search + internal operations runbooks for troubleshooting procedures."},{"name":"run_analysis","parameters":{"type":"object","properties":{"code":{"type":"string","description":"Python + code to execute"},"timeout_seconds":{"type":"integer","description":"Execution + timeout (default 30)"}},"required":["code","timeout_seconds"],"additionalProperties":false},"strict":true,"type":"function","defer_loading":null,"description":"Execute + a Python analysis script in a sandboxed environment for log parsing or data + inspection."},{"name":"restart_job","parameters":{"type":"object","properties":{"job_id":{"type":"string","description":"Pipeline + job identifier to restart"},"memory_override_gb":{"type":"integer","description":"Override + memory limit in GB"},"skip_completed_stages":{"type":"boolean","description":"Skip + stages that completed successfully (default true)"},"priority":{"type":"string","enum":["low","normal","high"],"description":"Queue + priority"}},"required":["job_id","memory_override_gb","skip_completed_stages","priority"],"additionalProperties":false},"strict":true,"type":"function","defer_loading":null,"description":"Restart + a failed pipeline job with optional configuration overrides."},{"name":"web_search","parameters":{"type":"object","properties":{"query":{"type":"string","description":"Search + query"}},"required":["query"],"additionalProperties":false},"strict":true,"type":"function","defer_loading":null,"description":"Search + the public web for technical documentation, blog posts, and known issues."}],"top_p":1.0,"background":false,"max_output_tokens":3489,"max_tool_calls":null,"previous_response_id":"resp_a2613187feeed7eb","prompt":null,"reasoning":null,"service_tier":"auto","status":"in_progress","text":null,"top_logprobs":null,"truncation":"disabled","usage":null,"user":null,"presence_penalty":0.0,"frequency_penalty":0.0,"kv_transfer_params":null,"input_messages":null,"output_messages":null},"sequence_number":0,"type":"response.created"} + + ' + - ' + + ' + - 'event: response.in_progress + + ' + - 'data: {"response":{"id":"resp_86f66db434f30025","created_at":1782332953,"incomplete_details":null,"instructions":null,"metadata":null,"model":"openai/gpt-oss-20b","object":"response","output":[],"parallel_tool_calls":true,"temperature":1.0,"tool_choice":"auto","tools":[{"name":"get_job_status","parameters":{"type":"object","properties":{"job_id":{"type":"string","description":"Pipeline + job identifier, e.g. job-382"}},"required":["job_id"],"additionalProperties":false},"strict":true,"type":"function","defer_loading":null,"description":"Query + the pipeline orchestrator for a job''s current state, metadata, and failure + info."},{"name":"get_error_logs","parameters":{"type":"object","properties":{"job_id":{"type":"string","description":"Pipeline + job identifier"},"severity":{"type":"string","enum":["WARN","ERROR","FATAL","ALL"],"description":"Minimum + severity filter"},"max_entries":{"type":"integer","description":"Maximum log + entries to return (default 50)"}},"required":["job_id","severity","max_entries"],"additionalProperties":false},"strict":true,"type":"function","defer_loading":null,"description":"Fetch + error logs and stack traces for a failed pipeline job."},{"name":"search_runbook","parameters":{"type":"object","properties":{"query":{"type":"string","description":"Natural + language search query"},"max_results":{"type":"integer","description":"Maximum + results to return (default 5)"}},"required":["query","max_results"],"additionalProperties":false},"strict":true,"type":"function","defer_loading":null,"description":"Search + internal operations runbooks for troubleshooting procedures."},{"name":"run_analysis","parameters":{"type":"object","properties":{"code":{"type":"string","description":"Python + code to execute"},"timeout_seconds":{"type":"integer","description":"Execution + timeout (default 30)"}},"required":["code","timeout_seconds"],"additionalProperties":false},"strict":true,"type":"function","defer_loading":null,"description":"Execute + a Python analysis script in a sandboxed environment for log parsing or data + inspection."},{"name":"restart_job","parameters":{"type":"object","properties":{"job_id":{"type":"string","description":"Pipeline + job identifier to restart"},"memory_override_gb":{"type":"integer","description":"Override + memory limit in GB"},"skip_completed_stages":{"type":"boolean","description":"Skip + stages that completed successfully (default true)"},"priority":{"type":"string","enum":["low","normal","high"],"description":"Queue + priority"}},"required":["job_id","memory_override_gb","skip_completed_stages","priority"],"additionalProperties":false},"strict":true,"type":"function","defer_loading":null,"description":"Restart + a failed pipeline job with optional configuration overrides."},{"name":"web_search","parameters":{"type":"object","properties":{"query":{"type":"string","description":"Search + query"}},"required":["query"],"additionalProperties":false},"strict":true,"type":"function","defer_loading":null,"description":"Search + the public web for technical documentation, blog posts, and known issues."}],"top_p":1.0,"background":false,"max_output_tokens":3489,"max_tool_calls":null,"previous_response_id":"resp_a2613187feeed7eb","prompt":null,"reasoning":null,"service_tier":"auto","status":"in_progress","text":null,"top_logprobs":null,"truncation":"disabled","usage":null,"user":null,"presence_penalty":0.0,"frequency_penalty":0.0,"kv_transfer_params":null,"input_messages":null,"output_messages":null},"sequence_number":1,"type":"response.in_progress"} + + ' + - ' + + ' + - 'event: response.output_item.added + + ' + - 'data: {"item":{"id":"msg_92b62cd073692e7b","summary":[],"type":"reasoning","content":null,"encrypted_content":null,"status":"in_progress"},"output_index":0,"sequence_number":2,"type":"response.output_item.added"} + + ' + - ' + + ' + - 'event: response.reasoning_part.added + + ' + - 'data: {"content_index":0,"item_id":"msg_92b62cd073692e7b","output_index":0,"part":{"text":"","type":"reasoning_text"},"sequence_number":3,"type":"response.reasoning_part.added"} + + ' + - ' + + ' + - 'event: response.reasoning_text.delta + + ' + - 'data: {"content_index":0,"delta":"We","item_id":"msg_92b62cd073692e7b","output_index":0,"sequence_number":4,"type":"response.reasoning_text.delta"} + + ' + - ' + + ' + - 'event: response.reasoning_text.delta + + ' + - 'data: {"content_index":0,"delta":" need","item_id":"msg_92b62cd073692e7b","output_index":0,"sequence_number":5,"type":"response.reasoning_text.delta"} + + ' + - ' + + ' + - 'event: response.reasoning_text.delta + + ' + - 'data: {"content_index":0,"delta":" to","item_id":"msg_92b62cd073692e7b","output_index":0,"sequence_number":6,"type":"response.reasoning_text.delta"} + + ' + - ' + + ' + - 'event: response.reasoning_text.delta + + ' + - 'data: {"content_index":0,"delta":" call","item_id":"msg_92b62cd073692e7b","output_index":0,"sequence_number":7,"type":"response.reasoning_text.delta"} + + ' + - ' + + ' + - 'event: response.reasoning_text.delta + + ' + - 'data: {"content_index":0,"delta":" get","item_id":"msg_92b62cd073692e7b","output_index":0,"sequence_number":8,"type":"response.reasoning_text.delta"} + + ' + - ' + + ' + - 'event: response.reasoning_text.delta + + ' + - 'data: {"content_index":0,"delta":"_error","item_id":"msg_92b62cd073692e7b","output_index":0,"sequence_number":9,"type":"response.reasoning_text.delta"} + + ' + - ' + + ' + - 'event: response.reasoning_text.delta + + ' + - 'data: {"content_index":0,"delta":"_logs","item_id":"msg_92b62cd073692e7b","output_index":0,"sequence_number":10,"type":"response.reasoning_text.delta"} + + ' + - ' + + ' + - 'event: response.reasoning_text.delta + + ' + - 'data: {"content_index":0,"delta":" with","item_id":"msg_92b62cd073692e7b","output_index":0,"sequence_number":11,"type":"response.reasoning_text.delta"} + + ' + - ' + + ' + - 'event: response.reasoning_text.delta + + ' + - 'data: {"content_index":0,"delta":" job","item_id":"msg_92b62cd073692e7b","output_index":0,"sequence_number":12,"type":"response.reasoning_text.delta"} + + ' + - ' + + ' + - 'event: response.reasoning_text.delta + + ' + - 'data: {"content_index":0,"delta":"_id","item_id":"msg_92b62cd073692e7b","output_index":0,"sequence_number":13,"type":"response.reasoning_text.delta"} + + ' + - ' + + ' + - 'event: response.reasoning_text.delta + + ' + - 'data: {"content_index":0,"delta":" \"","item_id":"msg_92b62cd073692e7b","output_index":0,"sequence_number":14,"type":"response.reasoning_text.delta"} + + ' + - ' + + ' + - 'event: response.reasoning_text.delta + + ' + - 'data: {"content_index":0,"delta":"job","item_id":"msg_92b62cd073692e7b","output_index":0,"sequence_number":15,"type":"response.reasoning_text.delta"} + + ' + - ' + + ' + - 'event: response.reasoning_text.delta + + ' + - 'data: {"content_index":0,"delta":"-","item_id":"msg_92b62cd073692e7b","output_index":0,"sequence_number":16,"type":"response.reasoning_text.delta"} + + ' + - ' + + ' + - 'event: response.reasoning_text.delta + + ' + - 'data: {"content_index":0,"delta":"382","item_id":"msg_92b62cd073692e7b","output_index":0,"sequence_number":17,"type":"response.reasoning_text.delta"} + + ' + - ' + + ' + - 'event: response.reasoning_text.delta + + ' + - 'data: {"content_index":0,"delta":"\",","item_id":"msg_92b62cd073692e7b","output_index":0,"sequence_number":18,"type":"response.reasoning_text.delta"} + + ' + - ' + + ' + - 'event: response.reasoning_text.delta + + ' + - 'data: {"content_index":0,"delta":" severity","item_id":"msg_92b62cd073692e7b","output_index":0,"sequence_number":19,"type":"response.reasoning_text.delta"} + + ' + - ' + + ' + - 'event: response.reasoning_text.delta + + ' + - 'data: {"content_index":0,"delta":" F","item_id":"msg_92b62cd073692e7b","output_index":0,"sequence_number":20,"type":"response.reasoning_text.delta"} + + ' + - ' + + ' + - 'event: response.reasoning_text.delta + + ' + - 'data: {"content_index":0,"delta":"ATAL","item_id":"msg_92b62cd073692e7b","output_index":0,"sequence_number":21,"type":"response.reasoning_text.delta"} + + ' + - ' + + ' + - 'event: response.reasoning_text.delta + + ' + - 'data: {"content_index":0,"delta":",","item_id":"msg_92b62cd073692e7b","output_index":0,"sequence_number":22,"type":"response.reasoning_text.delta"} + + ' + - ' + + ' + - 'event: response.reasoning_text.delta + + ' + - 'data: {"content_index":0,"delta":" max","item_id":"msg_92b62cd073692e7b","output_index":0,"sequence_number":23,"type":"response.reasoning_text.delta"} + + ' + - ' + + ' + - 'event: response.reasoning_text.delta + + ' + - 'data: {"content_index":0,"delta":"_entries","item_id":"msg_92b62cd073692e7b","output_index":0,"sequence_number":24,"type":"response.reasoning_text.delta"} + + ' + - ' + + ' + - 'event: response.reasoning_text.delta + + ' + - 'data: {"content_index":0,"delta":" ","item_id":"msg_92b62cd073692e7b","output_index":0,"sequence_number":25,"type":"response.reasoning_text.delta"} + + ' + - ' + + ' + - 'event: response.reasoning_text.delta + + ' + - 'data: {"content_index":0,"delta":"5","item_id":"msg_92b62cd073692e7b","output_index":0,"sequence_number":26,"type":"response.reasoning_text.delta"} + + ' + - ' + + ' + - 'event: response.reasoning_text.delta + + ' + - 'data: {"content_index":0,"delta":".","item_id":"msg_92b62cd073692e7b","output_index":0,"sequence_number":27,"type":"response.reasoning_text.delta"} + + ' + - ' + + ' + - 'event: response.reasoning_text.done + + ' + - 'data: {"content_index":0,"item_id":"msg_92b62cd073692e7b","output_index":0,"sequence_number":28,"text":"We + need to call get_error_logs with job_id \"job-382\", severity FATAL, max_entries + 5.","type":"response.reasoning_text.done"} + + ' + - ' + + ' + - 'event: response.reasoning_part.done + + ' + - 'data: {"content_index":0,"item_id":"msg_92b62cd073692e7b","output_index":0,"part":{"text":"We + need to call get_error_logs with job_id \"job-382\", severity FATAL, max_entries + 5.","type":"reasoning_text"},"sequence_number":29,"type":"response.reasoning_part.done"} + + ' + - ' + + ' + - 'event: response.output_item.done + + ' + - 'data: {"item":{"id":"msg_92b62cd073692e7b","summary":[],"type":"reasoning","content":[{"text":"We + need to call get_error_logs with job_id \"job-382\", severity FATAL, max_entries + 5.","type":"reasoning_text"}],"encrypted_content":null,"status":"completed"},"output_index":0,"sequence_number":30,"type":"response.output_item.done"} + + ' + - ' + + ' + - 'event: response.output_item.added + + ' + - 'data: {"item":{"arguments":"","call_id":"call_9b2372a2800336e0","name":"get_error_logs","type":"function_call","id":"fc_8b3ba0677d2822bc","namespace":null,"status":"in_progress"},"output_index":1,"sequence_number":31,"type":"response.output_item.added"} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"delta":"{\n","item_id":"fc_8b3ba0677d2822bc","output_index":1,"sequence_number":32,"type":"response.function_call_arguments.delta"} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"delta":" ","item_id":"fc_8b3ba0677d2822bc","output_index":1,"sequence_number":33,"type":"response.function_call_arguments.delta"} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"delta":" \"","item_id":"fc_8b3ba0677d2822bc","output_index":1,"sequence_number":34,"type":"response.function_call_arguments.delta"} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"delta":"job","item_id":"fc_8b3ba0677d2822bc","output_index":1,"sequence_number":35,"type":"response.function_call_arguments.delta"} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"delta":"_id","item_id":"fc_8b3ba0677d2822bc","output_index":1,"sequence_number":36,"type":"response.function_call_arguments.delta"} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"delta":"\":","item_id":"fc_8b3ba0677d2822bc","output_index":1,"sequence_number":37,"type":"response.function_call_arguments.delta"} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"delta":" \"","item_id":"fc_8b3ba0677d2822bc","output_index":1,"sequence_number":38,"type":"response.function_call_arguments.delta"} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"delta":"job","item_id":"fc_8b3ba0677d2822bc","output_index":1,"sequence_number":39,"type":"response.function_call_arguments.delta"} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"delta":"-","item_id":"fc_8b3ba0677d2822bc","output_index":1,"sequence_number":40,"type":"response.function_call_arguments.delta"} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"delta":"382","item_id":"fc_8b3ba0677d2822bc","output_index":1,"sequence_number":41,"type":"response.function_call_arguments.delta"} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"delta":"\",\n","item_id":"fc_8b3ba0677d2822bc","output_index":1,"sequence_number":42,"type":"response.function_call_arguments.delta"} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"delta":" ","item_id":"fc_8b3ba0677d2822bc","output_index":1,"sequence_number":43,"type":"response.function_call_arguments.delta"} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"delta":" \"","item_id":"fc_8b3ba0677d2822bc","output_index":1,"sequence_number":44,"type":"response.function_call_arguments.delta"} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"delta":"severity","item_id":"fc_8b3ba0677d2822bc","output_index":1,"sequence_number":45,"type":"response.function_call_arguments.delta"} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"delta":"\":","item_id":"fc_8b3ba0677d2822bc","output_index":1,"sequence_number":46,"type":"response.function_call_arguments.delta"} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"delta":" \"","item_id":"fc_8b3ba0677d2822bc","output_index":1,"sequence_number":47,"type":"response.function_call_arguments.delta"} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"delta":"F","item_id":"fc_8b3ba0677d2822bc","output_index":1,"sequence_number":48,"type":"response.function_call_arguments.delta"} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"delta":"ATAL","item_id":"fc_8b3ba0677d2822bc","output_index":1,"sequence_number":49,"type":"response.function_call_arguments.delta"} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"delta":"\",\n","item_id":"fc_8b3ba0677d2822bc","output_index":1,"sequence_number":50,"type":"response.function_call_arguments.delta"} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"delta":" ","item_id":"fc_8b3ba0677d2822bc","output_index":1,"sequence_number":51,"type":"response.function_call_arguments.delta"} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"delta":" \"","item_id":"fc_8b3ba0677d2822bc","output_index":1,"sequence_number":52,"type":"response.function_call_arguments.delta"} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"delta":"max","item_id":"fc_8b3ba0677d2822bc","output_index":1,"sequence_number":53,"type":"response.function_call_arguments.delta"} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"delta":"_entries","item_id":"fc_8b3ba0677d2822bc","output_index":1,"sequence_number":54,"type":"response.function_call_arguments.delta"} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"delta":"\":","item_id":"fc_8b3ba0677d2822bc","output_index":1,"sequence_number":55,"type":"response.function_call_arguments.delta"} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"delta":" ","item_id":"fc_8b3ba0677d2822bc","output_index":1,"sequence_number":56,"type":"response.function_call_arguments.delta"} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"delta":"5","item_id":"fc_8b3ba0677d2822bc","output_index":1,"sequence_number":57,"type":"response.function_call_arguments.delta"} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"delta":"\n","item_id":"fc_8b3ba0677d2822bc","output_index":1,"sequence_number":58,"type":"response.function_call_arguments.delta"} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"delta":"}","item_id":"fc_8b3ba0677d2822bc","output_index":1,"sequence_number":59,"type":"response.function_call_arguments.delta"} + + ' + - ' + + ' + - 'event: response.function_call_arguments.done + + ' + - 'data: {"arguments":"{\n \"job_id\": \"job-382\",\n \"severity\": \"FATAL\",\n \"max_entries\": + 5\n}","item_id":"fc_8b3ba0677d2822bc","name":"get_error_logs","output_index":1,"sequence_number":60,"type":"response.function_call_arguments.done"} + + ' + - ' + + ' + - 'event: response.output_item.done + + ' + - 'data: {"item":{"arguments":"{\n \"job_id\": \"job-382\",\n \"severity\": + \"FATAL\",\n \"max_entries\": 5\n}","call_id":"call_9b2372a2800336e0","name":"get_error_logs","type":"function_call","id":null,"namespace":null,"status":"completed","item_id":"fc_8b3ba0677d2822bc","output_index":1,"sequence_number":-1},"output_index":1,"sequence_number":61,"type":"response.output_item.done"} + + ' + - ' + + ' + - 'event: response.completed + + ' + - 'data: {"response":{"id":"resp_86f66db434f30025","created_at":1782332953,"incomplete_details":null,"instructions":null,"metadata":null,"model":"openai/gpt-oss-20b","object":"response","output":[{"id":"rs_aeaf98915a49d703","summary":[],"type":"reasoning","content":[{"text":"We + need to call get_error_logs with job_id \"job-382\", severity FATAL, max_entries + 5.","type":"reasoning_text"}],"encrypted_content":null,"status":null},{"arguments":"{\n \"job_id\": + \"job-382\",\n \"severity\": \"FATAL\",\n \"max_entries\": 5\n}","call_id":"call_bb83b6c9e24d7a4a","name":"get_error_logs","type":"function_call","id":"fc_bb83b6c9e24d7a4a","namespace":null,"status":null}],"parallel_tool_calls":true,"temperature":1.0,"tool_choice":"auto","tools":[{"name":"get_job_status","parameters":{"type":"object","properties":{"job_id":{"type":"string","description":"Pipeline + job identifier, e.g. job-382"}},"required":["job_id"],"additionalProperties":false},"strict":true,"type":"function","defer_loading":null,"description":"Query + the pipeline orchestrator for a job''s current state, metadata, and failure + info."},{"name":"get_error_logs","parameters":{"type":"object","properties":{"job_id":{"type":"string","description":"Pipeline + job identifier"},"severity":{"type":"string","enum":["WARN","ERROR","FATAL","ALL"],"description":"Minimum + severity filter"},"max_entries":{"type":"integer","description":"Maximum log + entries to return (default 50)"}},"required":["job_id","severity","max_entries"],"additionalProperties":false},"strict":true,"type":"function","defer_loading":null,"description":"Fetch + error logs and stack traces for a failed pipeline job."},{"name":"search_runbook","parameters":{"type":"object","properties":{"query":{"type":"string","description":"Natural + language search query"},"max_results":{"type":"integer","description":"Maximum + results to return (default 5)"}},"required":["query","max_results"],"additionalProperties":false},"strict":true,"type":"function","defer_loading":null,"description":"Search + internal operations runbooks for troubleshooting procedures."},{"name":"run_analysis","parameters":{"type":"object","properties":{"code":{"type":"string","description":"Python + code to execute"},"timeout_seconds":{"type":"integer","description":"Execution + timeout (default 30)"}},"required":["code","timeout_seconds"],"additionalProperties":false},"strict":true,"type":"function","defer_loading":null,"description":"Execute + a Python analysis script in a sandboxed environment for log parsing or data + inspection."},{"name":"restart_job","parameters":{"type":"object","properties":{"job_id":{"type":"string","description":"Pipeline + job identifier to restart"},"memory_override_gb":{"type":"integer","description":"Override + memory limit in GB"},"skip_completed_stages":{"type":"boolean","description":"Skip + stages that completed successfully (default true)"},"priority":{"type":"string","enum":["low","normal","high"],"description":"Queue + priority"}},"required":["job_id","memory_override_gb","skip_completed_stages","priority"],"additionalProperties":false},"strict":true,"type":"function","defer_loading":null,"description":"Restart + a failed pipeline job with optional configuration overrides."},{"name":"web_search","parameters":{"type":"object","properties":{"query":{"type":"string","description":"Search + query"}},"required":["query"],"additionalProperties":false},"strict":true,"type":"function","defer_loading":null,"description":"Search + the public web for technical documentation, blog posts, and known issues."}],"top_p":1.0,"background":false,"max_output_tokens":3489,"max_tool_calls":null,"previous_response_id":"resp_a2613187feeed7eb","prompt":null,"reasoning":null,"service_tier":"auto","status":"completed","text":null,"top_logprobs":null,"truncation":"disabled","usage":{"input_tokens":607,"input_tokens_details":{"cached_tokens":448,"input_tokens_per_turn":[607],"cached_tokens_per_turn":[448]},"output_tokens":72,"output_tokens_details":{"reasoning_tokens":54,"tool_output_tokens":0,"output_tokens_per_turn":[72],"tool_output_tokens_per_turn":[0]},"total_tokens":679},"user":null,"presence_penalty":0.0,"frequency_penalty":0.0,"kv_transfer_params":null,"input_messages":null,"output_messages":null},"sequence_number":62,"type":"response.completed"} + + ' + - ' + + ' + status_code: 200 +- filename: t3 + request: + body: + input: + - call_id: call_bb83b6c9e24d7a4a + output: '[{"timestamp": "2024-01-15T03:22:09Z", "severity": "ERROR", "message": + "OutOfMemoryError at stage ''transform'': heap space exhausted (31.2GB/32GB)", + "stack_trace": "java.lang.OutOfMemoryError: Java heap space\n at org.apache.spark.memory.TaskMemoryManager.allocatePage"}, + {"timestamp": "2024-01-15T03:22:10Z", "severity": "ERROR", "message": "Task + failed: transform-partition-47 OOM after processing 2.1M rows", "stack_trace": + "org.apache.spark.SparkException: Task failed while writing rows"}, {"timestamp": + "2024-01-15T03:22:11Z", "severity": "ERROR", "message": "Job job-382 marked + FAILED after 3 consecutive OOM errors in transform stage", "stack_trace": + null}]' + type: function_call_output + - content: Search the web for how to fix that type of error in Spark pipelines. + role: user + type: message + model: openai/gpt-oss-20b + previous_response_id: resp_86f66db434f30025 + store: true + stream: true + tool_choice: auto + tools: + - description: Query the pipeline orchestrator for a job's current state, metadata, + and failure info. + name: get_job_status + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier, e.g. job-382 + type: string + required: + - job_id + type: object + strict: true + type: function + - description: Fetch error logs and stack traces for a failed pipeline job. + name: get_error_logs + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier + type: string + max_entries: + description: Maximum log entries to return (default 50) + type: integer + severity: + description: Minimum severity filter + enum: + - WARN + - ERROR + - FATAL + - ALL + type: string + required: + - job_id + - severity + - max_entries + type: object + strict: true + type: function + - description: Search internal operations runbooks for troubleshooting procedures. + name: search_runbook + parameters: + additionalProperties: false + properties: + max_results: + description: Maximum results to return (default 5) + type: integer + query: + description: Natural language search query + type: string + required: + - query + - max_results + type: object + strict: true + type: function + - description: Execute a Python analysis script in a sandboxed environment for + log parsing or data inspection. + name: run_analysis + parameters: + additionalProperties: false + properties: + code: + description: Python code to execute + type: string + timeout_seconds: + description: Execution timeout (default 30) + type: integer + required: + - code + - timeout_seconds + type: object + strict: true + type: function + - description: Restart a failed pipeline job with optional configuration overrides. + name: restart_job + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier to restart + type: string + memory_override_gb: + description: Override memory limit in GB + type: integer + priority: + description: Queue priority + enum: + - low + - normal + - high + type: string + skip_completed_stages: + description: Skip stages that completed successfully (default true) + type: boolean + required: + - job_id + - memory_override_gb + - skip_completed_stages + - priority + type: object + strict: true + type: function + - description: Search the public web for technical documentation, blog posts, + and known issues. + name: web_search + parameters: + additionalProperties: false + properties: + query: + description: Search query + type: string + required: + - query + type: object + strict: true + type: function + headers: + accept: '*/*' + content-type: application/json + user-agent: python-httpx/0.28.1 + method: POST + path: /v1/responses + query_params: {} + response: + headers: + content-type: text/event-stream; charset=utf-8 + sse: + - 'event: response.created + + ' + - 'data: {"response":{"id":"resp_a58eda69543e3142","created_at":1782332953,"incomplete_details":null,"instructions":null,"metadata":null,"model":"openai/gpt-oss-20b","object":"response","output":[],"parallel_tool_calls":true,"temperature":1.0,"tool_choice":"auto","tools":[{"name":"get_job_status","parameters":{"type":"object","properties":{"job_id":{"type":"string","description":"Pipeline + job identifier, e.g. job-382"}},"required":["job_id"],"additionalProperties":false},"strict":true,"type":"function","defer_loading":null,"description":"Query + the pipeline orchestrator for a job''s current state, metadata, and failure + info."},{"name":"get_error_logs","parameters":{"type":"object","properties":{"job_id":{"type":"string","description":"Pipeline + job identifier"},"severity":{"type":"string","enum":["WARN","ERROR","FATAL","ALL"],"description":"Minimum + severity filter"},"max_entries":{"type":"integer","description":"Maximum log + entries to return (default 50)"}},"required":["job_id","severity","max_entries"],"additionalProperties":false},"strict":true,"type":"function","defer_loading":null,"description":"Fetch + error logs and stack traces for a failed pipeline job."},{"name":"search_runbook","parameters":{"type":"object","properties":{"query":{"type":"string","description":"Natural + language search query"},"max_results":{"type":"integer","description":"Maximum + results to return (default 5)"}},"required":["query","max_results"],"additionalProperties":false},"strict":true,"type":"function","defer_loading":null,"description":"Search + internal operations runbooks for troubleshooting procedures."},{"name":"run_analysis","parameters":{"type":"object","properties":{"code":{"type":"string","description":"Python + code to execute"},"timeout_seconds":{"type":"integer","description":"Execution + timeout (default 30)"}},"required":["code","timeout_seconds"],"additionalProperties":false},"strict":true,"type":"function","defer_loading":null,"description":"Execute + a Python analysis script in a sandboxed environment for log parsing or data + inspection."},{"name":"restart_job","parameters":{"type":"object","properties":{"job_id":{"type":"string","description":"Pipeline + job identifier to restart"},"memory_override_gb":{"type":"integer","description":"Override + memory limit in GB"},"skip_completed_stages":{"type":"boolean","description":"Skip + stages that completed successfully (default true)"},"priority":{"type":"string","enum":["low","normal","high"],"description":"Queue + priority"}},"required":["job_id","memory_override_gb","skip_completed_stages","priority"],"additionalProperties":false},"strict":true,"type":"function","defer_loading":null,"description":"Restart + a failed pipeline job with optional configuration overrides."},{"name":"web_search","parameters":{"type":"object","properties":{"query":{"type":"string","description":"Search + query"}},"required":["query"],"additionalProperties":false},"strict":true,"type":"function","defer_loading":null,"description":"Search + the public web for technical documentation, blog posts, and known issues."}],"top_p":1.0,"background":false,"max_output_tokens":3189,"max_tool_calls":null,"previous_response_id":"resp_86f66db434f30025","prompt":null,"reasoning":null,"service_tier":"auto","status":"in_progress","text":null,"top_logprobs":null,"truncation":"disabled","usage":null,"user":null,"presence_penalty":0.0,"frequency_penalty":0.0,"kv_transfer_params":null,"input_messages":null,"output_messages":null},"sequence_number":0,"type":"response.created"} + + ' + - ' + + ' + - 'event: response.in_progress + + ' + - 'data: {"response":{"id":"resp_a58eda69543e3142","created_at":1782332953,"incomplete_details":null,"instructions":null,"metadata":null,"model":"openai/gpt-oss-20b","object":"response","output":[],"parallel_tool_calls":true,"temperature":1.0,"tool_choice":"auto","tools":[{"name":"get_job_status","parameters":{"type":"object","properties":{"job_id":{"type":"string","description":"Pipeline + job identifier, e.g. job-382"}},"required":["job_id"],"additionalProperties":false},"strict":true,"type":"function","defer_loading":null,"description":"Query + the pipeline orchestrator for a job''s current state, metadata, and failure + info."},{"name":"get_error_logs","parameters":{"type":"object","properties":{"job_id":{"type":"string","description":"Pipeline + job identifier"},"severity":{"type":"string","enum":["WARN","ERROR","FATAL","ALL"],"description":"Minimum + severity filter"},"max_entries":{"type":"integer","description":"Maximum log + entries to return (default 50)"}},"required":["job_id","severity","max_entries"],"additionalProperties":false},"strict":true,"type":"function","defer_loading":null,"description":"Fetch + error logs and stack traces for a failed pipeline job."},{"name":"search_runbook","parameters":{"type":"object","properties":{"query":{"type":"string","description":"Natural + language search query"},"max_results":{"type":"integer","description":"Maximum + results to return (default 5)"}},"required":["query","max_results"],"additionalProperties":false},"strict":true,"type":"function","defer_loading":null,"description":"Search + internal operations runbooks for troubleshooting procedures."},{"name":"run_analysis","parameters":{"type":"object","properties":{"code":{"type":"string","description":"Python + code to execute"},"timeout_seconds":{"type":"integer","description":"Execution + timeout (default 30)"}},"required":["code","timeout_seconds"],"additionalProperties":false},"strict":true,"type":"function","defer_loading":null,"description":"Execute + a Python analysis script in a sandboxed environment for log parsing or data + inspection."},{"name":"restart_job","parameters":{"type":"object","properties":{"job_id":{"type":"string","description":"Pipeline + job identifier to restart"},"memory_override_gb":{"type":"integer","description":"Override + memory limit in GB"},"skip_completed_stages":{"type":"boolean","description":"Skip + stages that completed successfully (default true)"},"priority":{"type":"string","enum":["low","normal","high"],"description":"Queue + priority"}},"required":["job_id","memory_override_gb","skip_completed_stages","priority"],"additionalProperties":false},"strict":true,"type":"function","defer_loading":null,"description":"Restart + a failed pipeline job with optional configuration overrides."},{"name":"web_search","parameters":{"type":"object","properties":{"query":{"type":"string","description":"Search + query"}},"required":["query"],"additionalProperties":false},"strict":true,"type":"function","defer_loading":null,"description":"Search + the public web for technical documentation, blog posts, and known issues."}],"top_p":1.0,"background":false,"max_output_tokens":3189,"max_tool_calls":null,"previous_response_id":"resp_86f66db434f30025","prompt":null,"reasoning":null,"service_tier":"auto","status":"in_progress","text":null,"top_logprobs":null,"truncation":"disabled","usage":null,"user":null,"presence_penalty":0.0,"frequency_penalty":0.0,"kv_transfer_params":null,"input_messages":null,"output_messages":null},"sequence_number":1,"type":"response.in_progress"} + + ' + - ' + + ' + - 'event: response.output_item.added + + ' + - 'data: {"item":{"id":"msg_b1ed6fcc9b2f9086","summary":[],"type":"reasoning","content":null,"encrypted_content":null,"status":"in_progress"},"output_index":0,"sequence_number":2,"type":"response.output_item.added"} + + ' + - ' + + ' + - 'event: response.reasoning_part.added + + ' + - 'data: {"content_index":0,"item_id":"msg_b1ed6fcc9b2f9086","output_index":0,"part":{"text":"","type":"reasoning_text"},"sequence_number":3,"type":"response.reasoning_part.added"} + + ' + - ' + + ' + - 'event: response.reasoning_text.delta + + ' + - 'data: {"content_index":0,"delta":"Call","item_id":"msg_b1ed6fcc9b2f9086","output_index":0,"sequence_number":4,"type":"response.reasoning_text.delta"} + + ' + - ' + + ' + - 'event: response.reasoning_text.delta + + ' + - 'data: {"content_index":0,"delta":" web","item_id":"msg_b1ed6fcc9b2f9086","output_index":0,"sequence_number":5,"type":"response.reasoning_text.delta"} + + ' + - ' + + ' + - 'event: response.reasoning_text.delta + + ' + - 'data: {"content_index":0,"delta":"_search","item_id":"msg_b1ed6fcc9b2f9086","output_index":0,"sequence_number":6,"type":"response.reasoning_text.delta"} + + ' + - ' + + ' + - 'event: response.reasoning_text.delta + + ' + - 'data: {"content_index":0,"delta":".","item_id":"msg_b1ed6fcc9b2f9086","output_index":0,"sequence_number":7,"type":"response.reasoning_text.delta"} + + ' + - ' + + ' + - 'event: response.reasoning_text.done + + ' + - 'data: {"content_index":0,"item_id":"msg_b1ed6fcc9b2f9086","output_index":0,"sequence_number":8,"text":"Call + web_search.","type":"response.reasoning_text.done"} + + ' + - ' + + ' + - 'event: response.reasoning_part.done + + ' + - 'data: {"content_index":0,"item_id":"msg_b1ed6fcc9b2f9086","output_index":0,"part":{"text":"Call + web_search.","type":"reasoning_text"},"sequence_number":9,"type":"response.reasoning_part.done"} + + ' + - ' + + ' + - 'event: response.output_item.done + + ' + - 'data: {"item":{"id":"msg_b1ed6fcc9b2f9086","summary":[],"type":"reasoning","content":[{"text":"Call + web_search.","type":"reasoning_text"}],"encrypted_content":null,"status":"completed"},"output_index":0,"sequence_number":10,"type":"response.output_item.done"} + + ' + - ' + + ' + - 'event: response.output_item.added + + ' + - 'data: {"item":{"arguments":"","call_id":"call_a7d34f6d95f8ff9a","name":"web_search","type":"function_call","id":"fc_aa8e3909617fdeeb","namespace":null,"status":"in_progress"},"output_index":1,"sequence_number":11,"type":"response.output_item.added"} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"delta":"{\n","item_id":"fc_aa8e3909617fdeeb","output_index":1,"sequence_number":12,"type":"response.function_call_arguments.delta"} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"delta":" ","item_id":"fc_aa8e3909617fdeeb","output_index":1,"sequence_number":13,"type":"response.function_call_arguments.delta"} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"delta":" \"","item_id":"fc_aa8e3909617fdeeb","output_index":1,"sequence_number":14,"type":"response.function_call_arguments.delta"} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"delta":"query","item_id":"fc_aa8e3909617fdeeb","output_index":1,"sequence_number":15,"type":"response.function_call_arguments.delta"} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"delta":"\":","item_id":"fc_aa8e3909617fdeeb","output_index":1,"sequence_number":16,"type":"response.function_call_arguments.delta"} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"delta":" \"","item_id":"fc_aa8e3909617fdeeb","output_index":1,"sequence_number":17,"type":"response.function_call_arguments.delta"} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"delta":"spark","item_id":"fc_aa8e3909617fdeeb","output_index":1,"sequence_number":18,"type":"response.function_call_arguments.delta"} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"delta":" out","item_id":"fc_aa8e3909617fdeeb","output_index":1,"sequence_number":19,"type":"response.function_call_arguments.delta"} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"delta":"of","item_id":"fc_aa8e3909617fdeeb","output_index":1,"sequence_number":20,"type":"response.function_call_arguments.delta"} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"delta":"memory","item_id":"fc_aa8e3909617fdeeb","output_index":1,"sequence_number":21,"type":"response.function_call_arguments.delta"} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"delta":"error","item_id":"fc_aa8e3909617fdeeb","output_index":1,"sequence_number":22,"type":"response.function_call_arguments.delta"} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"delta":" how","item_id":"fc_aa8e3909617fdeeb","output_index":1,"sequence_number":23,"type":"response.function_call_arguments.delta"} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"delta":" to","item_id":"fc_aa8e3909617fdeeb","output_index":1,"sequence_number":24,"type":"response.function_call_arguments.delta"} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"delta":" fix","item_id":"fc_aa8e3909617fdeeb","output_index":1,"sequence_number":25,"type":"response.function_call_arguments.delta"} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"delta":"\"\n","item_id":"fc_aa8e3909617fdeeb","output_index":1,"sequence_number":26,"type":"response.function_call_arguments.delta"} + + ' + - ' + + ' + - 'event: response.function_call_arguments.delta + + ' + - 'data: {"delta":"}","item_id":"fc_aa8e3909617fdeeb","output_index":1,"sequence_number":27,"type":"response.function_call_arguments.delta"} + + ' + - ' + + ' + - 'event: response.function_call_arguments.done + + ' + - 'data: {"arguments":"{\n \"query\": \"spark outofmemoryerror how to fix\"\n}","item_id":"fc_aa8e3909617fdeeb","name":"web_search","output_index":1,"sequence_number":28,"type":"response.function_call_arguments.done"} + + ' + - ' + + ' + - 'event: response.output_item.done + + ' + - 'data: {"item":{"arguments":"{\n \"query\": \"spark outofmemoryerror how to + fix\"\n}","call_id":"call_a7d34f6d95f8ff9a","name":"web_search","type":"function_call","id":null,"namespace":null,"status":"completed","item_id":"fc_aa8e3909617fdeeb","output_index":1,"sequence_number":-1},"output_index":1,"sequence_number":29,"type":"response.output_item.done"} + + ' + - ' + + ' + - 'event: response.completed + + ' + - 'data: {"response":{"id":"resp_a58eda69543e3142","created_at":1782332953,"incomplete_details":null,"instructions":null,"metadata":null,"model":"openai/gpt-oss-20b","object":"response","output":[{"id":"rs_8d22ca49bb7cce52","summary":[],"type":"reasoning","content":[{"text":"Call + web_search.","type":"reasoning_text"}],"encrypted_content":null,"status":null},{"arguments":"{\n \"query\": + \"spark outofmemoryerror how to fix\"\n}","call_id":"call_a385d854446fb281","name":"web_search","type":"function_call","id":"fc_a385d854446fb281","namespace":null,"status":null}],"parallel_tool_calls":true,"temperature":1.0,"tool_choice":"auto","tools":[{"name":"get_job_status","parameters":{"type":"object","properties":{"job_id":{"type":"string","description":"Pipeline + job identifier, e.g. job-382"}},"required":["job_id"],"additionalProperties":false},"strict":true,"type":"function","defer_loading":null,"description":"Query + the pipeline orchestrator for a job''s current state, metadata, and failure + info."},{"name":"get_error_logs","parameters":{"type":"object","properties":{"job_id":{"type":"string","description":"Pipeline + job identifier"},"severity":{"type":"string","enum":["WARN","ERROR","FATAL","ALL"],"description":"Minimum + severity filter"},"max_entries":{"type":"integer","description":"Maximum log + entries to return (default 50)"}},"required":["job_id","severity","max_entries"],"additionalProperties":false},"strict":true,"type":"function","defer_loading":null,"description":"Fetch + error logs and stack traces for a failed pipeline job."},{"name":"search_runbook","parameters":{"type":"object","properties":{"query":{"type":"string","description":"Natural + language search query"},"max_results":{"type":"integer","description":"Maximum + results to return (default 5)"}},"required":["query","max_results"],"additionalProperties":false},"strict":true,"type":"function","defer_loading":null,"description":"Search + internal operations runbooks for troubleshooting procedures."},{"name":"run_analysis","parameters":{"type":"object","properties":{"code":{"type":"string","description":"Python + code to execute"},"timeout_seconds":{"type":"integer","description":"Execution + timeout (default 30)"}},"required":["code","timeout_seconds"],"additionalProperties":false},"strict":true,"type":"function","defer_loading":null,"description":"Execute + a Python analysis script in a sandboxed environment for log parsing or data + inspection."},{"name":"restart_job","parameters":{"type":"object","properties":{"job_id":{"type":"string","description":"Pipeline + job identifier to restart"},"memory_override_gb":{"type":"integer","description":"Override + memory limit in GB"},"skip_completed_stages":{"type":"boolean","description":"Skip + stages that completed successfully (default true)"},"priority":{"type":"string","enum":["low","normal","high"],"description":"Queue + priority"}},"required":["job_id","memory_override_gb","skip_completed_stages","priority"],"additionalProperties":false},"strict":true,"type":"function","defer_loading":null,"description":"Restart + a failed pipeline job with optional configuration overrides."},{"name":"web_search","parameters":{"type":"object","properties":{"query":{"type":"string","description":"Search + query"}},"required":["query"],"additionalProperties":false},"strict":true,"type":"function","defer_loading":null,"description":"Search + the public web for technical documentation, blog posts, and known issues."}],"top_p":1.0,"background":false,"max_output_tokens":3189,"max_tool_calls":null,"previous_response_id":"resp_86f66db434f30025","prompt":null,"reasoning":null,"service_tier":"auto","status":"completed","text":null,"top_logprobs":null,"truncation":"disabled","usage":{"input_tokens":907,"input_tokens_details":{"cached_tokens":624,"input_tokens_per_turn":[907],"cached_tokens_per_turn":[624]},"output_tokens":39,"output_tokens_details":{"reasoning_tokens":22,"tool_output_tokens":0,"output_tokens_per_turn":[39],"tool_output_tokens_per_turn":[0]},"total_tokens":946},"user":null,"presence_penalty":0.0,"frequency_penalty":0.0,"kv_transfer_params":null,"input_messages":null,"output_messages":null},"sequence_number":30,"type":"response.completed"} + + ' + - ' + + ' + status_code: 200 diff --git a/crates/agentic-core/tests/cassettes/tool_calls/multi_turn/responses_tool_calls_5turn.yaml b/crates/agentic-core/tests/cassettes/tool_calls/multi_turn/responses_tool_calls_5turn.yaml new file mode 100644 index 0000000..418aac7 --- /dev/null +++ b/crates/agentic-core/tests/cassettes/tool_calls/multi_turn/responses_tool_calls_5turn.yaml @@ -0,0 +1,1680 @@ +turns: +- filename: t1 + request: + body: + input: You are an SRE assistant. ETL pipeline job-382 failed overnight. What + is its current status? + model: openai/gpt-oss-20b + store: true + stream: false + tool_choice: auto + tools: + - description: Query the pipeline orchestrator for a job's current state, metadata, + and failure info. + name: get_job_status + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier, e.g. job-382 + type: string + required: + - job_id + type: object + strict: true + type: function + - description: Fetch error logs and stack traces for a failed pipeline job. + name: get_error_logs + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier + type: string + max_entries: + description: Maximum log entries to return (default 50) + type: integer + severity: + description: Minimum severity filter + enum: + - WARN + - ERROR + - FATAL + - ALL + type: string + required: + - job_id + - severity + - max_entries + type: object + strict: true + type: function + - description: Search internal operations runbooks for troubleshooting procedures. + name: search_runbook + parameters: + additionalProperties: false + properties: + max_results: + description: Maximum results to return (default 5) + type: integer + query: + description: Natural language search query + type: string + required: + - query + - max_results + type: object + strict: true + type: function + - description: Execute a Python analysis script in a sandboxed environment for + log parsing or data inspection. + name: run_analysis + parameters: + additionalProperties: false + properties: + code: + description: Python code to execute + type: string + timeout_seconds: + description: Execution timeout (default 30) + type: integer + required: + - code + - timeout_seconds + type: object + strict: true + type: function + - description: Restart a failed pipeline job with optional configuration overrides. + name: restart_job + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier to restart + type: string + memory_override_gb: + description: Override memory limit in GB + type: integer + priority: + description: Queue priority + enum: + - low + - normal + - high + type: string + skip_completed_stages: + description: Skip stages that completed successfully (default true) + type: boolean + required: + - job_id + - memory_override_gb + - skip_completed_stages + - priority + type: object + strict: true + type: function + - description: Search the public web for technical documentation, blog posts, + and known issues. + name: web_search + parameters: + additionalProperties: false + properties: + query: + description: Search query + type: string + required: + - query + type: object + strict: true + type: function + headers: + accept: '*/*' + content-type: application/json + user-agent: python-httpx/0.28.1 + method: POST + path: /v1/responses + query_params: {} + response: + body: + background: false + created_at: 1782332949 + frequency_penalty: 0.0 + id: resp_bdd5b90cf69f193e + incomplete_details: null + input_messages: null + instructions: null + kv_transfer_params: null + max_output_tokens: 3657 + max_tool_calls: null + metadata: null + model: openai/gpt-oss-20b + object: response + output: + - content: + - text: We need to use get_job_status. + type: reasoning_text + encrypted_content: null + id: rs_99dcd006b8361f42 + status: null + summary: [] + type: reasoning + - arguments: '{"job_id":"job-382"}' + call_id: call_a8ec7f8606f82fcb + id: fc_a8ec7f8606f82fcb + name: get_job_status + namespace: null + status: null + type: function_call + output_messages: null + parallel_tool_calls: true + presence_penalty: 0.0 + previous_response_id: null + prompt: null + reasoning: null + service_tier: auto + status: completed + temperature: 1.0 + text: null + tool_choice: auto + tools: + - defer_loading: null + description: Query the pipeline orchestrator for a job's current state, metadata, + and failure info. + name: get_job_status + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier, e.g. job-382 + type: string + required: + - job_id + type: object + strict: true + type: function + - defer_loading: null + description: Fetch error logs and stack traces for a failed pipeline job. + name: get_error_logs + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier + type: string + max_entries: + description: Maximum log entries to return (default 50) + type: integer + severity: + description: Minimum severity filter + enum: + - WARN + - ERROR + - FATAL + - ALL + type: string + required: + - job_id + - severity + - max_entries + type: object + strict: true + type: function + - defer_loading: null + description: Search internal operations runbooks for troubleshooting procedures. + name: search_runbook + parameters: + additionalProperties: false + properties: + max_results: + description: Maximum results to return (default 5) + type: integer + query: + description: Natural language search query + type: string + required: + - query + - max_results + type: object + strict: true + type: function + - defer_loading: null + description: Execute a Python analysis script in a sandboxed environment for + log parsing or data inspection. + name: run_analysis + parameters: + additionalProperties: false + properties: + code: + description: Python code to execute + type: string + timeout_seconds: + description: Execution timeout (default 30) + type: integer + required: + - code + - timeout_seconds + type: object + strict: true + type: function + - defer_loading: null + description: Restart a failed pipeline job with optional configuration overrides. + name: restart_job + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier to restart + type: string + memory_override_gb: + description: Override memory limit in GB + type: integer + priority: + description: Queue priority + enum: + - low + - normal + - high + type: string + skip_completed_stages: + description: Skip stages that completed successfully (default true) + type: boolean + required: + - job_id + - memory_override_gb + - skip_completed_stages + - priority + type: object + strict: true + type: function + - defer_loading: null + description: Search the public web for technical documentation, blog posts, + and known issues. + name: web_search + parameters: + additionalProperties: false + properties: + query: + description: Search query + type: string + required: + - query + type: object + strict: true + type: function + top_logprobs: null + top_p: 1.0 + truncation: disabled + usage: + input_tokens: 439 + input_tokens_details: + cached_tokens: 432 + cached_tokens_per_turn: + - 432 + input_tokens_per_turn: + - 439 + output_tokens: 36 + output_tokens_details: + output_tokens_per_turn: + - 36 + reasoning_tokens: 18 + tool_output_tokens: 0 + tool_output_tokens_per_turn: + - 0 + total_tokens: 475 + user: null + headers: + content-type: application/json + status_code: 200 +- filename: t2 + request: + body: + input: + - call_id: call_a8ec7f8606f82fcb + output: '{"job_id": "job-382", "status": "failed", "error_type": "OutOfMemoryError", + "failed_at": "2024-01-15T03:22:11Z", "stage": "transform", "memory_used_gb": + 31.2, "memory_limit_gb": 32, "pipeline": "etl-daily-ingest", "owner": "data-platform-team"}' + type: function_call_output + - content: Pull the error logs for that failed job. Use severity ERROR and max + 20 entries. + role: user + type: message + model: openai/gpt-oss-20b + previous_response_id: resp_bdd5b90cf69f193e + store: true + stream: false + tool_choice: auto + tools: + - description: Query the pipeline orchestrator for a job's current state, metadata, + and failure info. + name: get_job_status + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier, e.g. job-382 + type: string + required: + - job_id + type: object + strict: true + type: function + - description: Fetch error logs and stack traces for a failed pipeline job. + name: get_error_logs + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier + type: string + max_entries: + description: Maximum log entries to return (default 50) + type: integer + severity: + description: Minimum severity filter + enum: + - WARN + - ERROR + - FATAL + - ALL + type: string + required: + - job_id + - severity + - max_entries + type: object + strict: true + type: function + - description: Search internal operations runbooks for troubleshooting procedures. + name: search_runbook + parameters: + additionalProperties: false + properties: + max_results: + description: Maximum results to return (default 5) + type: integer + query: + description: Natural language search query + type: string + required: + - query + - max_results + type: object + strict: true + type: function + - description: Execute a Python analysis script in a sandboxed environment for + log parsing or data inspection. + name: run_analysis + parameters: + additionalProperties: false + properties: + code: + description: Python code to execute + type: string + timeout_seconds: + description: Execution timeout (default 30) + type: integer + required: + - code + - timeout_seconds + type: object + strict: true + type: function + - description: Restart a failed pipeline job with optional configuration overrides. + name: restart_job + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier to restart + type: string + memory_override_gb: + description: Override memory limit in GB + type: integer + priority: + description: Queue priority + enum: + - low + - normal + - high + type: string + skip_completed_stages: + description: Skip stages that completed successfully (default true) + type: boolean + required: + - job_id + - memory_override_gb + - skip_completed_stages + - priority + type: object + strict: true + type: function + - description: Search the public web for technical documentation, blog posts, + and known issues. + name: web_search + parameters: + additionalProperties: false + properties: + query: + description: Search query + type: string + required: + - query + type: object + strict: true + type: function + headers: + accept: '*/*' + content-type: application/json + user-agent: python-httpx/0.28.1 + method: POST + path: /v1/responses + query_params: {} + response: + body: + background: false + created_at: 1782332949 + frequency_penalty: 0.0 + id: resp_b333098339569097 + incomplete_details: null + input_messages: null + instructions: null + kv_transfer_params: null + max_output_tokens: 3496 + max_tool_calls: null + metadata: null + model: openai/gpt-oss-20b + object: response + output: + - content: + - text: We will get_error_logs. + type: reasoning_text + encrypted_content: null + id: rs_8475b754aec8bc57 + status: null + summary: [] + type: reasoning + - arguments: '{"job_id":"job-382","severity":"ERROR","max_entries":20}' + call_id: call_8473140fc968508b + id: fc_8473140fc968508b + name: get_error_logs + namespace: null + status: null + type: function_call + output_messages: null + parallel_tool_calls: true + presence_penalty: 0.0 + previous_response_id: resp_bdd5b90cf69f193e + prompt: null + reasoning: null + service_tier: auto + status: completed + temperature: 1.0 + text: null + tool_choice: auto + tools: + - defer_loading: null + description: Query the pipeline orchestrator for a job's current state, metadata, + and failure info. + name: get_job_status + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier, e.g. job-382 + type: string + required: + - job_id + type: object + strict: true + type: function + - defer_loading: null + description: Fetch error logs and stack traces for a failed pipeline job. + name: get_error_logs + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier + type: string + max_entries: + description: Maximum log entries to return (default 50) + type: integer + severity: + description: Minimum severity filter + enum: + - WARN + - ERROR + - FATAL + - ALL + type: string + required: + - job_id + - severity + - max_entries + type: object + strict: true + type: function + - defer_loading: null + description: Search internal operations runbooks for troubleshooting procedures. + name: search_runbook + parameters: + additionalProperties: false + properties: + max_results: + description: Maximum results to return (default 5) + type: integer + query: + description: Natural language search query + type: string + required: + - query + - max_results + type: object + strict: true + type: function + - defer_loading: null + description: Execute a Python analysis script in a sandboxed environment for + log parsing or data inspection. + name: run_analysis + parameters: + additionalProperties: false + properties: + code: + description: Python code to execute + type: string + timeout_seconds: + description: Execution timeout (default 30) + type: integer + required: + - code + - timeout_seconds + type: object + strict: true + type: function + - defer_loading: null + description: Restart a failed pipeline job with optional configuration overrides. + name: restart_job + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier to restart + type: string + memory_override_gb: + description: Override memory limit in GB + type: integer + priority: + description: Queue priority + enum: + - low + - normal + - high + type: string + skip_completed_stages: + description: Skip stages that completed successfully (default true) + type: boolean + required: + - job_id + - memory_override_gb + - skip_completed_stages + - priority + type: object + strict: true + type: function + - defer_loading: null + description: Search the public web for technical documentation, blog posts, + and known issues. + name: web_search + parameters: + additionalProperties: false + properties: + query: + description: Search query + type: string + required: + - query + type: object + strict: true + type: function + top_logprobs: null + top_p: 1.0 + truncation: disabled + usage: + input_tokens: 600 + input_tokens_details: + cached_tokens: 448 + cached_tokens_per_turn: + - 448 + input_tokens_per_turn: + - 600 + output_tokens: 43 + output_tokens_details: + output_tokens_per_turn: + - 43 + reasoning_tokens: 25 + tool_output_tokens: 0 + tool_output_tokens_per_turn: + - 0 + total_tokens: 643 + user: null + headers: + content-type: application/json + status_code: 200 +- filename: t3 + request: + body: + input: + - call_id: call_8473140fc968508b + output: '[{"timestamp": "2024-01-15T03:22:09Z", "severity": "ERROR", "message": + "OutOfMemoryError at stage ''transform'': heap space exhausted (31.2GB/32GB)", + "stack_trace": "java.lang.OutOfMemoryError: Java heap space\n at org.apache.spark.memory.TaskMemoryManager.allocatePage"}, + {"timestamp": "2024-01-15T03:22:10Z", "severity": "ERROR", "message": "Task + failed: transform-partition-47 OOM after processing 2.1M rows", "stack_trace": + "org.apache.spark.SparkException: Task failed while writing rows"}, {"timestamp": + "2024-01-15T03:22:11Z", "severity": "ERROR", "message": "Job job-382 marked + FAILED after 3 consecutive OOM errors in transform stage", "stack_trace": + null}]' + type: function_call_output + - content: Search the runbook for how to fix the issue found in those logs. + Max 5 results. + role: user + type: message + model: openai/gpt-oss-20b + previous_response_id: resp_b333098339569097 + store: true + stream: false + tool_choice: auto + tools: + - description: Query the pipeline orchestrator for a job's current state, metadata, + and failure info. + name: get_job_status + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier, e.g. job-382 + type: string + required: + - job_id + type: object + strict: true + type: function + - description: Fetch error logs and stack traces for a failed pipeline job. + name: get_error_logs + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier + type: string + max_entries: + description: Maximum log entries to return (default 50) + type: integer + severity: + description: Minimum severity filter + enum: + - WARN + - ERROR + - FATAL + - ALL + type: string + required: + - job_id + - severity + - max_entries + type: object + strict: true + type: function + - description: Search internal operations runbooks for troubleshooting procedures. + name: search_runbook + parameters: + additionalProperties: false + properties: + max_results: + description: Maximum results to return (default 5) + type: integer + query: + description: Natural language search query + type: string + required: + - query + - max_results + type: object + strict: true + type: function + - description: Execute a Python analysis script in a sandboxed environment for + log parsing or data inspection. + name: run_analysis + parameters: + additionalProperties: false + properties: + code: + description: Python code to execute + type: string + timeout_seconds: + description: Execution timeout (default 30) + type: integer + required: + - code + - timeout_seconds + type: object + strict: true + type: function + - description: Restart a failed pipeline job with optional configuration overrides. + name: restart_job + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier to restart + type: string + memory_override_gb: + description: Override memory limit in GB + type: integer + priority: + description: Queue priority + enum: + - low + - normal + - high + type: string + skip_completed_stages: + description: Skip stages that completed successfully (default true) + type: boolean + required: + - job_id + - memory_override_gb + - skip_completed_stages + - priority + type: object + strict: true + type: function + - description: Search the public web for technical documentation, blog posts, + and known issues. + name: web_search + parameters: + additionalProperties: false + properties: + query: + description: Search query + type: string + required: + - query + type: object + strict: true + type: function + headers: + accept: '*/*' + content-type: application/json + user-agent: python-httpx/0.28.1 + method: POST + path: /v1/responses + query_params: {} + response: + body: + background: false + created_at: 1782332950 + frequency_penalty: 0.0 + id: resp_965a415718fbf3d6 + incomplete_details: null + input_messages: null + instructions: null + kv_transfer_params: null + max_output_tokens: 3220 + max_tool_calls: null + metadata: null + model: openai/gpt-oss-20b + object: response + output: + - content: + - text: Call search_runbook. + type: reasoning_text + encrypted_content: null + id: rs_9d57d334a763e6cd + status: null + summary: [] + type: reasoning + - arguments: '{"query":"OutOfMemoryError in transform stage spark job-382 fix + runbook", "max_results":5}' + call_id: call_9f8fb58fca2c18a3 + id: fc_9f8fb58fca2c18a3 + name: search_runbook + namespace: null + status: null + type: function_call + output_messages: null + parallel_tool_calls: true + presence_penalty: 0.0 + previous_response_id: resp_b333098339569097 + prompt: null + reasoning: null + service_tier: auto + status: completed + temperature: 1.0 + text: null + tool_choice: auto + tools: + - defer_loading: null + description: Query the pipeline orchestrator for a job's current state, metadata, + and failure info. + name: get_job_status + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier, e.g. job-382 + type: string + required: + - job_id + type: object + strict: true + type: function + - defer_loading: null + description: Fetch error logs and stack traces for a failed pipeline job. + name: get_error_logs + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier + type: string + max_entries: + description: Maximum log entries to return (default 50) + type: integer + severity: + description: Minimum severity filter + enum: + - WARN + - ERROR + - FATAL + - ALL + type: string + required: + - job_id + - severity + - max_entries + type: object + strict: true + type: function + - defer_loading: null + description: Search internal operations runbooks for troubleshooting procedures. + name: search_runbook + parameters: + additionalProperties: false + properties: + max_results: + description: Maximum results to return (default 5) + type: integer + query: + description: Natural language search query + type: string + required: + - query + - max_results + type: object + strict: true + type: function + - defer_loading: null + description: Execute a Python analysis script in a sandboxed environment for + log parsing or data inspection. + name: run_analysis + parameters: + additionalProperties: false + properties: + code: + description: Python code to execute + type: string + timeout_seconds: + description: Execution timeout (default 30) + type: integer + required: + - code + - timeout_seconds + type: object + strict: true + type: function + - defer_loading: null + description: Restart a failed pipeline job with optional configuration overrides. + name: restart_job + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier to restart + type: string + memory_override_gb: + description: Override memory limit in GB + type: integer + priority: + description: Queue priority + enum: + - low + - normal + - high + type: string + skip_completed_stages: + description: Skip stages that completed successfully (default true) + type: boolean + required: + - job_id + - memory_override_gb + - skip_completed_stages + - priority + type: object + strict: true + type: function + - defer_loading: null + description: Search the public web for technical documentation, blog posts, + and known issues. + name: web_search + parameters: + additionalProperties: false + properties: + query: + description: Search query + type: string + required: + - query + type: object + strict: true + type: function + top_logprobs: null + top_p: 1.0 + truncation: disabled + usage: + input_tokens: 876 + input_tokens_details: + cached_tokens: 608 + cached_tokens_per_turn: + - 608 + input_tokens_per_turn: + - 876 + output_tokens: 49 + output_tokens_details: + output_tokens_per_turn: + - 49 + reasoning_tokens: 31 + tool_output_tokens: 0 + tool_output_tokens_per_turn: + - 0 + total_tokens: 925 + user: null + headers: + content-type: application/json + status_code: 200 +- filename: t4 + request: + body: + input: + - call_id: call_9f8fb58fca2c18a3 + output: '[{"title": "OOM Troubleshooting for ETL Pipelines", "path": "/runbooks/data-platform/oom-etl.md", + "snippet": "When transform stage OOMs: 1) Check partition skew with get_partition_stats, + 2) Increase executor memory (default 32GB -> 64GB), 3) Enable adaptive query + execution"}, {"title": "Memory Limit Override Procedures", "path": "/runbooks/data-platform/memory-overrides.md", + "snippet": "To increase memory: use restart_job with memory_override_gb. + Standard escalation: 32->64->128. Requires approval above 128GB."}]' + type: function_call_output + - content: 'Run this analysis code to summarize: import json; print(json.dumps({''job'': + ''job-382'', ''error'': ''OOM'', ''stage'': ''transform'', ''recommendation'': + ''increase memory to 64GB''}))' + role: user + type: message + model: openai/gpt-oss-20b + previous_response_id: resp_965a415718fbf3d6 + store: true + stream: false + tool_choice: auto + tools: + - description: Query the pipeline orchestrator for a job's current state, metadata, + and failure info. + name: get_job_status + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier, e.g. job-382 + type: string + required: + - job_id + type: object + strict: true + type: function + - description: Fetch error logs and stack traces for a failed pipeline job. + name: get_error_logs + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier + type: string + max_entries: + description: Maximum log entries to return (default 50) + type: integer + severity: + description: Minimum severity filter + enum: + - WARN + - ERROR + - FATAL + - ALL + type: string + required: + - job_id + - severity + - max_entries + type: object + strict: true + type: function + - description: Search internal operations runbooks for troubleshooting procedures. + name: search_runbook + parameters: + additionalProperties: false + properties: + max_results: + description: Maximum results to return (default 5) + type: integer + query: + description: Natural language search query + type: string + required: + - query + - max_results + type: object + strict: true + type: function + - description: Execute a Python analysis script in a sandboxed environment for + log parsing or data inspection. + name: run_analysis + parameters: + additionalProperties: false + properties: + code: + description: Python code to execute + type: string + timeout_seconds: + description: Execution timeout (default 30) + type: integer + required: + - code + - timeout_seconds + type: object + strict: true + type: function + - description: Restart a failed pipeline job with optional configuration overrides. + name: restart_job + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier to restart + type: string + memory_override_gb: + description: Override memory limit in GB + type: integer + priority: + description: Queue priority + enum: + - low + - normal + - high + type: string + skip_completed_stages: + description: Skip stages that completed successfully (default true) + type: boolean + required: + - job_id + - memory_override_gb + - skip_completed_stages + - priority + type: object + strict: true + type: function + - description: Search the public web for technical documentation, blog posts, + and known issues. + name: web_search + parameters: + additionalProperties: false + properties: + query: + description: Search query + type: string + required: + - query + type: object + strict: true + type: function + headers: + accept: '*/*' + content-type: application/json + user-agent: python-httpx/0.28.1 + method: POST + path: /v1/responses + query_params: {} + response: + body: + background: false + created_at: 1782332950 + frequency_penalty: 0.0 + id: resp_bd9e5303a45132a7 + incomplete_details: null + input_messages: null + instructions: null + kv_transfer_params: null + max_output_tokens: 2974 + max_tool_calls: null + metadata: null + model: openai/gpt-oss-20b + object: response + output: + - content: + - text: We call run_analysis. + type: reasoning_text + encrypted_content: null + id: rs_98daf1dc1d378213 + status: null + summary: [] + type: reasoning + - arguments: '{"code":"import json; print(json.dumps({''job'': ''job-382'', + ''error'': ''OOM'', ''stage'': ''transform'', ''recommendation'': ''increase + memory to 64GB''}))","timeout_seconds":30}' + call_id: call_87c2a52cfc83b243 + id: fc_87c2a52cfc83b243 + name: run_analysis + namespace: null + status: null + type: function_call + output_messages: null + parallel_tool_calls: true + presence_penalty: 0.0 + previous_response_id: resp_965a415718fbf3d6 + prompt: null + reasoning: null + service_tier: auto + status: completed + temperature: 1.0 + text: null + tool_choice: auto + tools: + - defer_loading: null + description: Query the pipeline orchestrator for a job's current state, metadata, + and failure info. + name: get_job_status + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier, e.g. job-382 + type: string + required: + - job_id + type: object + strict: true + type: function + - defer_loading: null + description: Fetch error logs and stack traces for a failed pipeline job. + name: get_error_logs + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier + type: string + max_entries: + description: Maximum log entries to return (default 50) + type: integer + severity: + description: Minimum severity filter + enum: + - WARN + - ERROR + - FATAL + - ALL + type: string + required: + - job_id + - severity + - max_entries + type: object + strict: true + type: function + - defer_loading: null + description: Search internal operations runbooks for troubleshooting procedures. + name: search_runbook + parameters: + additionalProperties: false + properties: + max_results: + description: Maximum results to return (default 5) + type: integer + query: + description: Natural language search query + type: string + required: + - query + - max_results + type: object + strict: true + type: function + - defer_loading: null + description: Execute a Python analysis script in a sandboxed environment for + log parsing or data inspection. + name: run_analysis + parameters: + additionalProperties: false + properties: + code: + description: Python code to execute + type: string + timeout_seconds: + description: Execution timeout (default 30) + type: integer + required: + - code + - timeout_seconds + type: object + strict: true + type: function + - defer_loading: null + description: Restart a failed pipeline job with optional configuration overrides. + name: restart_job + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier to restart + type: string + memory_override_gb: + description: Override memory limit in GB + type: integer + priority: + description: Queue priority + enum: + - low + - normal + - high + type: string + skip_completed_stages: + description: Skip stages that completed successfully (default true) + type: boolean + required: + - job_id + - memory_override_gb + - skip_completed_stages + - priority + type: object + strict: true + type: function + - defer_loading: null + description: Search the public web for technical documentation, blog posts, + and known issues. + name: web_search + parameters: + additionalProperties: false + properties: + query: + description: Search query + type: string + required: + - query + type: object + strict: true + type: function + top_logprobs: null + top_p: 1.0 + truncation: disabled + usage: + input_tokens: 1122 + input_tokens_details: + cached_tokens: 880 + cached_tokens_per_turn: + - 880 + input_tokens_per_turn: + - 1122 + output_tokens: 72 + output_tokens_details: + output_tokens_per_turn: + - 72 + reasoning_tokens: 55 + tool_output_tokens: 0 + tool_output_tokens_per_turn: + - 0 + total_tokens: 1194 + user: null + headers: + content-type: application/json + status_code: 200 +- filename: t5 + request: + body: + input: + - call_id: call_87c2a52cfc83b243 + output: '{"stdout": "Failed stage: transform, count: 2\nPattern: consistent + OOM at partition-47 (2.1M rows)\nRecommendation: increase memory to 64GB + or repartition", "stderr": "", "exit_code": 0, "duration_ms": 120}' + type: function_call_output + - content: Great. Now restart it with 64 GB memory, skip completed stages, and + high priority. + role: user + type: message + model: openai/gpt-oss-20b + previous_response_id: resp_bd9e5303a45132a7 + store: true + stream: false + tool_choice: auto + tools: + - description: Query the pipeline orchestrator for a job's current state, metadata, + and failure info. + name: get_job_status + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier, e.g. job-382 + type: string + required: + - job_id + type: object + strict: true + type: function + - description: Fetch error logs and stack traces for a failed pipeline job. + name: get_error_logs + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier + type: string + max_entries: + description: Maximum log entries to return (default 50) + type: integer + severity: + description: Minimum severity filter + enum: + - WARN + - ERROR + - FATAL + - ALL + type: string + required: + - job_id + - severity + - max_entries + type: object + strict: true + type: function + - description: Search internal operations runbooks for troubleshooting procedures. + name: search_runbook + parameters: + additionalProperties: false + properties: + max_results: + description: Maximum results to return (default 5) + type: integer + query: + description: Natural language search query + type: string + required: + - query + - max_results + type: object + strict: true + type: function + - description: Execute a Python analysis script in a sandboxed environment for + log parsing or data inspection. + name: run_analysis + parameters: + additionalProperties: false + properties: + code: + description: Python code to execute + type: string + timeout_seconds: + description: Execution timeout (default 30) + type: integer + required: + - code + - timeout_seconds + type: object + strict: true + type: function + - description: Restart a failed pipeline job with optional configuration overrides. + name: restart_job + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier to restart + type: string + memory_override_gb: + description: Override memory limit in GB + type: integer + priority: + description: Queue priority + enum: + - low + - normal + - high + type: string + skip_completed_stages: + description: Skip stages that completed successfully (default true) + type: boolean + required: + - job_id + - memory_override_gb + - skip_completed_stages + - priority + type: object + strict: true + type: function + - description: Search the public web for technical documentation, blog posts, + and known issues. + name: web_search + parameters: + additionalProperties: false + properties: + query: + description: Search query + type: string + required: + - query + type: object + strict: true + type: function + headers: + accept: '*/*' + content-type: application/json + user-agent: python-httpx/0.28.1 + method: POST + path: /v1/responses + query_params: {} + response: + body: + background: false + created_at: 1782332951 + frequency_penalty: 0.0 + id: resp_82fb53818571c25a + incomplete_details: null + input_messages: null + instructions: null + kv_transfer_params: null + max_output_tokens: 2804 + max_tool_calls: null + metadata: null + model: openai/gpt-oss-20b + object: response + output: + - content: + - text: Call restart_job. + type: reasoning_text + encrypted_content: null + id: rs_83a9d72111ec93c4 + status: null + summary: [] + type: reasoning + - arguments: '{"job_id":"job-382","memory_override_gb":64,"skip_completed_stages":true,"priority":"high"}' + call_id: call_a255f9a0224bf7d9 + id: fc_a255f9a0224bf7d9 + name: restart_job + namespace: null + status: null + type: function_call + output_messages: null + parallel_tool_calls: true + presence_penalty: 0.0 + previous_response_id: resp_bd9e5303a45132a7 + prompt: null + reasoning: null + service_tier: auto + status: completed + temperature: 1.0 + text: null + tool_choice: auto + tools: + - defer_loading: null + description: Query the pipeline orchestrator for a job's current state, metadata, + and failure info. + name: get_job_status + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier, e.g. job-382 + type: string + required: + - job_id + type: object + strict: true + type: function + - defer_loading: null + description: Fetch error logs and stack traces for a failed pipeline job. + name: get_error_logs + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier + type: string + max_entries: + description: Maximum log entries to return (default 50) + type: integer + severity: + description: Minimum severity filter + enum: + - WARN + - ERROR + - FATAL + - ALL + type: string + required: + - job_id + - severity + - max_entries + type: object + strict: true + type: function + - defer_loading: null + description: Search internal operations runbooks for troubleshooting procedures. + name: search_runbook + parameters: + additionalProperties: false + properties: + max_results: + description: Maximum results to return (default 5) + type: integer + query: + description: Natural language search query + type: string + required: + - query + - max_results + type: object + strict: true + type: function + - defer_loading: null + description: Execute a Python analysis script in a sandboxed environment for + log parsing or data inspection. + name: run_analysis + parameters: + additionalProperties: false + properties: + code: + description: Python code to execute + type: string + timeout_seconds: + description: Execution timeout (default 30) + type: integer + required: + - code + - timeout_seconds + type: object + strict: true + type: function + - defer_loading: null + description: Restart a failed pipeline job with optional configuration overrides. + name: restart_job + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier to restart + type: string + memory_override_gb: + description: Override memory limit in GB + type: integer + priority: + description: Queue priority + enum: + - low + - normal + - high + type: string + skip_completed_stages: + description: Skip stages that completed successfully (default true) + type: boolean + required: + - job_id + - memory_override_gb + - skip_completed_stages + - priority + type: object + strict: true + type: function + - defer_loading: null + description: Search the public web for technical documentation, blog posts, + and known issues. + name: web_search + parameters: + additionalProperties: false + properties: + query: + description: Search query + type: string + required: + - query + type: object + strict: true + type: function + top_logprobs: null + top_p: 1.0 + truncation: disabled + usage: + input_tokens: 1292 + input_tokens_details: + cached_tokens: 1120 + cached_tokens_per_turn: + - 1120 + input_tokens_per_turn: + - 1292 + output_tokens: 49 + output_tokens_details: + output_tokens_per_turn: + - 49 + reasoning_tokens: 32 + tool_output_tokens: 0 + tool_output_tokens_per_turn: + - 0 + total_tokens: 1341 + user: null + headers: + content-type: application/json + status_code: 200 diff --git a/crates/agentic-core/tests/cassettes/tool_calls/multi_turn/responses_tool_calls_branch.yaml b/crates/agentic-core/tests/cassettes/tool_calls/multi_turn/responses_tool_calls_branch.yaml new file mode 100644 index 0000000..7301589 --- /dev/null +++ b/crates/agentic-core/tests/cassettes/tool_calls/multi_turn/responses_tool_calls_branch.yaml @@ -0,0 +1,995 @@ +turns: +- filename: t1 + request: + body: + input: You are an SRE assistant. Check the current status of ETL pipeline job-382. + model: openai/gpt-oss-20b + store: true + stream: false + tool_choice: auto + tools: + - description: Query the pipeline orchestrator for a job's current state, metadata, + and failure info. + name: get_job_status + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier, e.g. job-382 + type: string + required: + - job_id + type: object + strict: true + type: function + - description: Fetch error logs and stack traces for a failed pipeline job. + name: get_error_logs + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier + type: string + max_entries: + description: Maximum log entries to return (default 50) + type: integer + severity: + description: Minimum severity filter + enum: + - WARN + - ERROR + - FATAL + - ALL + type: string + required: + - job_id + - severity + - max_entries + type: object + strict: true + type: function + - description: Search internal operations runbooks for troubleshooting procedures. + name: search_runbook + parameters: + additionalProperties: false + properties: + max_results: + description: Maximum results to return (default 5) + type: integer + query: + description: Natural language search query + type: string + required: + - query + - max_results + type: object + strict: true + type: function + - description: Execute a Python analysis script in a sandboxed environment for + log parsing or data inspection. + name: run_analysis + parameters: + additionalProperties: false + properties: + code: + description: Python code to execute + type: string + timeout_seconds: + description: Execution timeout (default 30) + type: integer + required: + - code + - timeout_seconds + type: object + strict: true + type: function + - description: Restart a failed pipeline job with optional configuration overrides. + name: restart_job + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier to restart + type: string + memory_override_gb: + description: Override memory limit in GB + type: integer + priority: + description: Queue priority + enum: + - low + - normal + - high + type: string + skip_completed_stages: + description: Skip stages that completed successfully (default true) + type: boolean + required: + - job_id + - memory_override_gb + - skip_completed_stages + - priority + type: object + strict: true + type: function + - description: Search the public web for technical documentation, blog posts, + and known issues. + name: web_search + parameters: + additionalProperties: false + properties: + query: + description: Search query + type: string + required: + - query + type: object + strict: true + type: function + headers: + accept: '*/*' + content-type: application/json + user-agent: python-httpx/0.28.1 + method: POST + path: /v1/responses + query_params: {} + response: + body: + background: false + created_at: 1782332955 + frequency_penalty: 0.0 + id: resp_88c245c603a1d2c5 + incomplete_details: null + input_messages: null + instructions: null + kv_transfer_params: null + max_output_tokens: 3660 + max_tool_calls: null + metadata: null + model: openai/gpt-oss-20b + object: response + output: + - content: + - text: We need to query the pipeline orchestrator. Use get_job_status. + type: reasoning_text + encrypted_content: null + id: rs_b8144bee19fa21a2 + status: null + summary: [] + type: reasoning + - arguments: '{"job_id":"job-382"}' + call_id: call_a8129fdfb7970a9c + id: fc_a8129fdfb7970a9c + name: get_job_status + namespace: null + status: null + type: function_call + output_messages: null + parallel_tool_calls: true + presence_penalty: 0.0 + previous_response_id: null + prompt: null + reasoning: null + service_tier: auto + status: completed + temperature: 1.0 + text: null + tool_choice: auto + tools: + - defer_loading: null + description: Query the pipeline orchestrator for a job's current state, metadata, + and failure info. + name: get_job_status + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier, e.g. job-382 + type: string + required: + - job_id + type: object + strict: true + type: function + - defer_loading: null + description: Fetch error logs and stack traces for a failed pipeline job. + name: get_error_logs + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier + type: string + max_entries: + description: Maximum log entries to return (default 50) + type: integer + severity: + description: Minimum severity filter + enum: + - WARN + - ERROR + - FATAL + - ALL + type: string + required: + - job_id + - severity + - max_entries + type: object + strict: true + type: function + - defer_loading: null + description: Search internal operations runbooks for troubleshooting procedures. + name: search_runbook + parameters: + additionalProperties: false + properties: + max_results: + description: Maximum results to return (default 5) + type: integer + query: + description: Natural language search query + type: string + required: + - query + - max_results + type: object + strict: true + type: function + - defer_loading: null + description: Execute a Python analysis script in a sandboxed environment for + log parsing or data inspection. + name: run_analysis + parameters: + additionalProperties: false + properties: + code: + description: Python code to execute + type: string + timeout_seconds: + description: Execution timeout (default 30) + type: integer + required: + - code + - timeout_seconds + type: object + strict: true + type: function + - defer_loading: null + description: Restart a failed pipeline job with optional configuration overrides. + name: restart_job + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier to restart + type: string + memory_override_gb: + description: Override memory limit in GB + type: integer + priority: + description: Queue priority + enum: + - low + - normal + - high + type: string + skip_completed_stages: + description: Skip stages that completed successfully (default true) + type: boolean + required: + - job_id + - memory_override_gb + - skip_completed_stages + - priority + type: object + strict: true + type: function + - defer_loading: null + description: Search the public web for technical documentation, blog posts, + and known issues. + name: web_search + parameters: + additionalProperties: false + properties: + query: + description: Search query + type: string + required: + - query + type: object + strict: true + type: function + top_logprobs: null + top_p: 1.0 + truncation: disabled + usage: + input_tokens: 436 + input_tokens_details: + cached_tokens: 432 + cached_tokens_per_turn: + - 432 + input_tokens_per_turn: + - 436 + output_tokens: 42 + output_tokens_details: + output_tokens_per_turn: + - 42 + reasoning_tokens: 24 + tool_output_tokens: 0 + tool_output_tokens_per_turn: + - 0 + total_tokens: 478 + user: null + headers: + content-type: application/json + status_code: 200 +- filename: t2 + request: + body: + input: + - call_id: call_a8129fdfb7970a9c + output: '{"job_id": "job-382", "status": "failed", "error_type": "OutOfMemoryError", + "failed_at": "2024-01-15T03:22:11Z", "stage": "transform", "memory_used_gb": + 31.2, "memory_limit_gb": 32, "pipeline": "etl-daily-ingest", "owner": "data-platform-team"}' + type: function_call_output + - content: Get the error logs for that job with severity ERROR and max 10 entries. + role: user + type: message + model: openai/gpt-oss-20b + previous_response_id: resp_88c245c603a1d2c5 + store: true + stream: false + tool_choice: auto + tools: + - description: Query the pipeline orchestrator for a job's current state, metadata, + and failure info. + name: get_job_status + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier, e.g. job-382 + type: string + required: + - job_id + type: object + strict: true + type: function + - description: Fetch error logs and stack traces for a failed pipeline job. + name: get_error_logs + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier + type: string + max_entries: + description: Maximum log entries to return (default 50) + type: integer + severity: + description: Minimum severity filter + enum: + - WARN + - ERROR + - FATAL + - ALL + type: string + required: + - job_id + - severity + - max_entries + type: object + strict: true + type: function + - description: Search internal operations runbooks for troubleshooting procedures. + name: search_runbook + parameters: + additionalProperties: false + properties: + max_results: + description: Maximum results to return (default 5) + type: integer + query: + description: Natural language search query + type: string + required: + - query + - max_results + type: object + strict: true + type: function + - description: Execute a Python analysis script in a sandboxed environment for + log parsing or data inspection. + name: run_analysis + parameters: + additionalProperties: false + properties: + code: + description: Python code to execute + type: string + timeout_seconds: + description: Execution timeout (default 30) + type: integer + required: + - code + - timeout_seconds + type: object + strict: true + type: function + - description: Restart a failed pipeline job with optional configuration overrides. + name: restart_job + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier to restart + type: string + memory_override_gb: + description: Override memory limit in GB + type: integer + priority: + description: Queue priority + enum: + - low + - normal + - high + type: string + skip_completed_stages: + description: Skip stages that completed successfully (default true) + type: boolean + required: + - job_id + - memory_override_gb + - skip_completed_stages + - priority + type: object + strict: true + type: function + - description: Search the public web for technical documentation, blog posts, + and known issues. + name: web_search + parameters: + additionalProperties: false + properties: + query: + description: Search query + type: string + required: + - query + type: object + strict: true + type: function + headers: + accept: '*/*' + content-type: application/json + user-agent: python-httpx/0.28.1 + method: POST + path: /v1/responses + query_params: {} + response: + body: + background: false + created_at: 1782332955 + frequency_penalty: 0.0 + id: resp_b4b63e6cf26bac63 + incomplete_details: null + input_messages: null + instructions: null + kv_transfer_params: null + max_output_tokens: 3495 + max_tool_calls: null + metadata: null + model: openai/gpt-oss-20b + object: response + output: + - content: + - text: We call get_error_logs. + type: reasoning_text + encrypted_content: null + id: rs_9fe2fe7333bbe17b + status: null + summary: [] + type: reasoning + - arguments: '{"job_id":"job-382","severity":"ERROR","max_entries":10}' + call_id: call_a8d716a0954a6f32 + id: fc_a8d716a0954a6f32 + name: get_error_logs + namespace: null + status: null + type: function_call + output_messages: null + parallel_tool_calls: true + presence_penalty: 0.0 + previous_response_id: resp_88c245c603a1d2c5 + prompt: null + reasoning: null + service_tier: auto + status: completed + temperature: 1.0 + text: null + tool_choice: auto + tools: + - defer_loading: null + description: Query the pipeline orchestrator for a job's current state, metadata, + and failure info. + name: get_job_status + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier, e.g. job-382 + type: string + required: + - job_id + type: object + strict: true + type: function + - defer_loading: null + description: Fetch error logs and stack traces for a failed pipeline job. + name: get_error_logs + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier + type: string + max_entries: + description: Maximum log entries to return (default 50) + type: integer + severity: + description: Minimum severity filter + enum: + - WARN + - ERROR + - FATAL + - ALL + type: string + required: + - job_id + - severity + - max_entries + type: object + strict: true + type: function + - defer_loading: null + description: Search internal operations runbooks for troubleshooting procedures. + name: search_runbook + parameters: + additionalProperties: false + properties: + max_results: + description: Maximum results to return (default 5) + type: integer + query: + description: Natural language search query + type: string + required: + - query + - max_results + type: object + strict: true + type: function + - defer_loading: null + description: Execute a Python analysis script in a sandboxed environment for + log parsing or data inspection. + name: run_analysis + parameters: + additionalProperties: false + properties: + code: + description: Python code to execute + type: string + timeout_seconds: + description: Execution timeout (default 30) + type: integer + required: + - code + - timeout_seconds + type: object + strict: true + type: function + - defer_loading: null + description: Restart a failed pipeline job with optional configuration overrides. + name: restart_job + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier to restart + type: string + memory_override_gb: + description: Override memory limit in GB + type: integer + priority: + description: Queue priority + enum: + - low + - normal + - high + type: string + skip_completed_stages: + description: Skip stages that completed successfully (default true) + type: boolean + required: + - job_id + - memory_override_gb + - skip_completed_stages + - priority + type: object + strict: true + type: function + - defer_loading: null + description: Search the public web for technical documentation, blog posts, + and known issues. + name: web_search + parameters: + additionalProperties: false + properties: + query: + description: Search query + type: string + required: + - query + type: object + strict: true + type: function + top_logprobs: null + top_p: 1.0 + truncation: disabled + usage: + input_tokens: 601 + input_tokens_details: + cached_tokens: 448 + cached_tokens_per_turn: + - 448 + input_tokens_per_turn: + - 601 + output_tokens: 43 + output_tokens_details: + output_tokens_per_turn: + - 43 + reasoning_tokens: 25 + tool_output_tokens: 0 + tool_output_tokens_per_turn: + - 0 + total_tokens: 644 + user: null + headers: + content-type: application/json + status_code: 200 +- filename: t3 + request: + body: + input: + - call_id: call_a8129fdfb7970a9c + output: '{"job_id": "job-382", "status": "failed", "error_type": "OutOfMemoryError", + "failed_at": "2024-01-15T03:22:11Z", "stage": "transform", "memory_used_gb": + 31.2, "memory_limit_gb": 32, "pipeline": "etl-daily-ingest", "owner": "data-platform-team"}' + type: function_call_output + - content: Instead of investigating errors, search the runbook for how to increase + memory limits for ETL jobs. Max 3 results. + role: user + type: message + model: openai/gpt-oss-20b + previous_response_id: resp_88c245c603a1d2c5 + store: true + stream: false + tool_choice: auto + tools: + - description: Query the pipeline orchestrator for a job's current state, metadata, + and failure info. + name: get_job_status + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier, e.g. job-382 + type: string + required: + - job_id + type: object + strict: true + type: function + - description: Fetch error logs and stack traces for a failed pipeline job. + name: get_error_logs + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier + type: string + max_entries: + description: Maximum log entries to return (default 50) + type: integer + severity: + description: Minimum severity filter + enum: + - WARN + - ERROR + - FATAL + - ALL + type: string + required: + - job_id + - severity + - max_entries + type: object + strict: true + type: function + - description: Search internal operations runbooks for troubleshooting procedures. + name: search_runbook + parameters: + additionalProperties: false + properties: + max_results: + description: Maximum results to return (default 5) + type: integer + query: + description: Natural language search query + type: string + required: + - query + - max_results + type: object + strict: true + type: function + - description: Execute a Python analysis script in a sandboxed environment for + log parsing or data inspection. + name: run_analysis + parameters: + additionalProperties: false + properties: + code: + description: Python code to execute + type: string + timeout_seconds: + description: Execution timeout (default 30) + type: integer + required: + - code + - timeout_seconds + type: object + strict: true + type: function + - description: Restart a failed pipeline job with optional configuration overrides. + name: restart_job + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier to restart + type: string + memory_override_gb: + description: Override memory limit in GB + type: integer + priority: + description: Queue priority + enum: + - low + - normal + - high + type: string + skip_completed_stages: + description: Skip stages that completed successfully (default true) + type: boolean + required: + - job_id + - memory_override_gb + - skip_completed_stages + - priority + type: object + strict: true + type: function + - description: Search the public web for technical documentation, blog posts, + and known issues. + name: web_search + parameters: + additionalProperties: false + properties: + query: + description: Search query + type: string + required: + - query + type: object + strict: true + type: function + headers: + accept: '*/*' + content-type: application/json + user-agent: python-httpx/0.28.1 + method: POST + path: /v1/responses + query_params: {} + response: + body: + background: false + created_at: 1782332956 + frequency_penalty: 0.0 + id: resp_8a4dcb6fa1e203ed + incomplete_details: null + input_messages: null + instructions: null + kv_transfer_params: null + max_output_tokens: 3486 + max_tool_calls: null + metadata: null + model: openai/gpt-oss-20b + object: response + output: + - content: + - text: We search runbook. + type: reasoning_text + encrypted_content: null + id: rs_bbd8fccb1a5f6fe6 + status: null + summary: [] + type: reasoning + - arguments: '{"query":"increase memory limits for ETL jobs", "max_results":3}' + call_id: call_bb3377332bb7ed73 + id: fc_bb3377332bb7ed73 + name: search_runbook + namespace: null + status: null + type: function_call + output_messages: null + parallel_tool_calls: true + presence_penalty: 0.0 + previous_response_id: resp_88c245c603a1d2c5 + prompt: null + reasoning: null + service_tier: auto + status: completed + temperature: 1.0 + text: null + tool_choice: auto + tools: + - defer_loading: null + description: Query the pipeline orchestrator for a job's current state, metadata, + and failure info. + name: get_job_status + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier, e.g. job-382 + type: string + required: + - job_id + type: object + strict: true + type: function + - defer_loading: null + description: Fetch error logs and stack traces for a failed pipeline job. + name: get_error_logs + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier + type: string + max_entries: + description: Maximum log entries to return (default 50) + type: integer + severity: + description: Minimum severity filter + enum: + - WARN + - ERROR + - FATAL + - ALL + type: string + required: + - job_id + - severity + - max_entries + type: object + strict: true + type: function + - defer_loading: null + description: Search internal operations runbooks for troubleshooting procedures. + name: search_runbook + parameters: + additionalProperties: false + properties: + max_results: + description: Maximum results to return (default 5) + type: integer + query: + description: Natural language search query + type: string + required: + - query + - max_results + type: object + strict: true + type: function + - defer_loading: null + description: Execute a Python analysis script in a sandboxed environment for + log parsing or data inspection. + name: run_analysis + parameters: + additionalProperties: false + properties: + code: + description: Python code to execute + type: string + timeout_seconds: + description: Execution timeout (default 30) + type: integer + required: + - code + - timeout_seconds + type: object + strict: true + type: function + - defer_loading: null + description: Restart a failed pipeline job with optional configuration overrides. + name: restart_job + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier to restart + type: string + memory_override_gb: + description: Override memory limit in GB + type: integer + priority: + description: Queue priority + enum: + - low + - normal + - high + type: string + skip_completed_stages: + description: Skip stages that completed successfully (default true) + type: boolean + required: + - job_id + - memory_override_gb + - skip_completed_stages + - priority + type: object + strict: true + type: function + - defer_loading: null + description: Search the public web for technical documentation, blog posts, + and known issues. + name: web_search + parameters: + additionalProperties: false + properties: + query: + description: Search query + type: string + required: + - query + type: object + strict: true + type: function + top_logprobs: null + top_p: 1.0 + truncation: disabled + usage: + input_tokens: 610 + input_tokens_details: + cached_tokens: 576 + cached_tokens_per_turn: + - 576 + input_tokens_per_turn: + - 610 + output_tokens: 42 + output_tokens_details: + output_tokens_per_turn: + - 42 + reasoning_tokens: 24 + tool_output_tokens: 0 + tool_output_tokens_per_turn: + - 0 + total_tokens: 652 + user: null + headers: + content-type: application/json + status_code: 200 diff --git a/crates/agentic-core/tests/cassettes/tool_calls/multi_turn/responses_tool_calls_parallel.yaml b/crates/agentic-core/tests/cassettes/tool_calls/multi_turn/responses_tool_calls_parallel.yaml new file mode 100644 index 0000000..4c2c560 --- /dev/null +++ b/crates/agentic-core/tests/cassettes/tool_calls/multi_turn/responses_tool_calls_parallel.yaml @@ -0,0 +1,1001 @@ +turns: +- filename: t1 + request: + body: + input: 'You are an SRE assistant. Do TWO things in parallel: 1) check the status + of job-382 AND 2) search the web for Spark OOM fixes. Call BOTH tools now.' + model: openai/gpt-oss-20b + store: true + stream: false + tool_choice: auto + tools: + - description: Query the pipeline orchestrator for a job's current state, metadata, + and failure info. + name: get_job_status + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier, e.g. job-382 + type: string + required: + - job_id + type: object + strict: true + type: function + - description: Fetch error logs and stack traces for a failed pipeline job. + name: get_error_logs + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier + type: string + max_entries: + description: Maximum log entries to return (default 50) + type: integer + severity: + description: Minimum severity filter + enum: + - WARN + - ERROR + - FATAL + - ALL + type: string + required: + - job_id + - severity + - max_entries + type: object + strict: true + type: function + - description: Search internal operations runbooks for troubleshooting procedures. + name: search_runbook + parameters: + additionalProperties: false + properties: + max_results: + description: Maximum results to return (default 5) + type: integer + query: + description: Natural language search query + type: string + required: + - query + - max_results + type: object + strict: true + type: function + - description: Execute a Python analysis script in a sandboxed environment for + log parsing or data inspection. + name: run_analysis + parameters: + additionalProperties: false + properties: + code: + description: Python code to execute + type: string + timeout_seconds: + description: Execution timeout (default 30) + type: integer + required: + - code + - timeout_seconds + type: object + strict: true + type: function + - description: Restart a failed pipeline job with optional configuration overrides. + name: restart_job + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier to restart + type: string + memory_override_gb: + description: Override memory limit in GB + type: integer + priority: + description: Queue priority + enum: + - low + - normal + - high + type: string + skip_completed_stages: + description: Skip stages that completed successfully (default true) + type: boolean + required: + - job_id + - memory_override_gb + - skip_completed_stages + - priority + type: object + strict: true + type: function + - description: Search the public web for technical documentation, blog posts, + and known issues. + name: web_search + parameters: + additionalProperties: false + properties: + query: + description: Search query + type: string + required: + - query + type: object + strict: true + type: function + headers: + accept: '*/*' + content-type: application/json + user-agent: python-httpx/0.28.1 + method: POST + path: /v1/responses + query_params: {} + response: + body: + background: false + created_at: 1782332957 + frequency_penalty: 0.0 + id: resp_9bfc4dacc826288b + incomplete_details: null + input_messages: null + instructions: null + kv_transfer_params: null + max_output_tokens: 3638 + max_tool_calls: null + metadata: null + model: openai/gpt-oss-20b + object: response + output: + - content: + - text: 'We need to call two tools: get_job_status with job_id "job-382" and + web_search for "Spark OOM fixes". We''ll output both. We''ll use tools.' + type: reasoning_text + encrypted_content: null + id: rs_9e89d5f70cbf5c3b + status: null + summary: [] + type: reasoning + - arguments: '{"job_id":"job-382"}' + call_id: call_adbd2bbc5203b3d4 + id: fc_adbd2bbc5203b3d4 + name: get_job_status + namespace: null + status: null + type: function_call + output_messages: null + parallel_tool_calls: true + presence_penalty: 0.0 + previous_response_id: null + prompt: null + reasoning: null + service_tier: auto + status: completed + temperature: 1.0 + text: null + tool_choice: auto + tools: + - defer_loading: null + description: Query the pipeline orchestrator for a job's current state, metadata, + and failure info. + name: get_job_status + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier, e.g. job-382 + type: string + required: + - job_id + type: object + strict: true + type: function + - defer_loading: null + description: Fetch error logs and stack traces for a failed pipeline job. + name: get_error_logs + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier + type: string + max_entries: + description: Maximum log entries to return (default 50) + type: integer + severity: + description: Minimum severity filter + enum: + - WARN + - ERROR + - FATAL + - ALL + type: string + required: + - job_id + - severity + - max_entries + type: object + strict: true + type: function + - defer_loading: null + description: Search internal operations runbooks for troubleshooting procedures. + name: search_runbook + parameters: + additionalProperties: false + properties: + max_results: + description: Maximum results to return (default 5) + type: integer + query: + description: Natural language search query + type: string + required: + - query + - max_results + type: object + strict: true + type: function + - defer_loading: null + description: Execute a Python analysis script in a sandboxed environment for + log parsing or data inspection. + name: run_analysis + parameters: + additionalProperties: false + properties: + code: + description: Python code to execute + type: string + timeout_seconds: + description: Execution timeout (default 30) + type: integer + required: + - code + - timeout_seconds + type: object + strict: true + type: function + - defer_loading: null + description: Restart a failed pipeline job with optional configuration overrides. + name: restart_job + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier to restart + type: string + memory_override_gb: + description: Override memory limit in GB + type: integer + priority: + description: Queue priority + enum: + - low + - normal + - high + type: string + skip_completed_stages: + description: Skip stages that completed successfully (default true) + type: boolean + required: + - job_id + - memory_override_gb + - skip_completed_stages + - priority + type: object + strict: true + type: function + - defer_loading: null + description: Search the public web for technical documentation, blog posts, + and known issues. + name: web_search + parameters: + additionalProperties: false + properties: + query: + description: Search query + type: string + required: + - query + type: object + strict: true + type: function + top_logprobs: null + top_p: 1.0 + truncation: disabled + usage: + input_tokens: 458 + input_tokens_details: + cached_tokens: 416 + cached_tokens_per_turn: + - 416 + input_tokens_per_turn: + - 458 + output_tokens: 64 + output_tokens_details: + output_tokens_per_turn: + - 64 + reasoning_tokens: 46 + tool_output_tokens: 0 + tool_output_tokens_per_turn: + - 0 + total_tokens: 522 + user: null + headers: + content-type: application/json + status_code: 200 +- filename: t2 + request: + body: + input: + - call_id: call_adbd2bbc5203b3d4 + output: '{"job_id": "job-382", "status": "failed", "error_type": "OutOfMemoryError", + "failed_at": "2024-01-15T03:22:11Z", "stage": "transform", "memory_used_gb": + 31.2, "memory_limit_gb": 32, "pipeline": "etl-daily-ingest", "owner": "data-platform-team"}' + type: function_call_output + - content: Based on those results, search the runbook for memory override procedures. + Max 3 results. + role: user + type: message + model: openai/gpt-oss-20b + previous_response_id: resp_9bfc4dacc826288b + store: true + stream: false + tool_choice: auto + tools: + - description: Query the pipeline orchestrator for a job's current state, metadata, + and failure info. + name: get_job_status + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier, e.g. job-382 + type: string + required: + - job_id + type: object + strict: true + type: function + - description: Fetch error logs and stack traces for a failed pipeline job. + name: get_error_logs + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier + type: string + max_entries: + description: Maximum log entries to return (default 50) + type: integer + severity: + description: Minimum severity filter + enum: + - WARN + - ERROR + - FATAL + - ALL + type: string + required: + - job_id + - severity + - max_entries + type: object + strict: true + type: function + - description: Search internal operations runbooks for troubleshooting procedures. + name: search_runbook + parameters: + additionalProperties: false + properties: + max_results: + description: Maximum results to return (default 5) + type: integer + query: + description: Natural language search query + type: string + required: + - query + - max_results + type: object + strict: true + type: function + - description: Execute a Python analysis script in a sandboxed environment for + log parsing or data inspection. + name: run_analysis + parameters: + additionalProperties: false + properties: + code: + description: Python code to execute + type: string + timeout_seconds: + description: Execution timeout (default 30) + type: integer + required: + - code + - timeout_seconds + type: object + strict: true + type: function + - description: Restart a failed pipeline job with optional configuration overrides. + name: restart_job + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier to restart + type: string + memory_override_gb: + description: Override memory limit in GB + type: integer + priority: + description: Queue priority + enum: + - low + - normal + - high + type: string + skip_completed_stages: + description: Skip stages that completed successfully (default true) + type: boolean + required: + - job_id + - memory_override_gb + - skip_completed_stages + - priority + type: object + strict: true + type: function + - description: Search the public web for technical documentation, blog posts, + and known issues. + name: web_search + parameters: + additionalProperties: false + properties: + query: + description: Search query + type: string + required: + - query + type: object + strict: true + type: function + headers: + accept: '*/*' + content-type: application/json + user-agent: python-httpx/0.28.1 + method: POST + path: /v1/responses + query_params: {} + response: + body: + background: false + created_at: 1782332958 + frequency_penalty: 0.0 + id: resp_9e29702b26e8d17f + incomplete_details: null + input_messages: null + instructions: null + kv_transfer_params: null + max_output_tokens: 3448 + max_tool_calls: null + metadata: null + model: openai/gpt-oss-20b + object: response + output: + - content: + - text: Call search_runbook with query "memory override procedure pipeline + job" maybe. + type: reasoning_text + encrypted_content: null + id: rs_8564e8af0bfe516e + status: null + summary: [] + type: reasoning + - arguments: '{"query":"memory override procedure pipeline job", "max_results":3}' + call_id: call_8641aa37edcc8a37 + id: fc_8641aa37edcc8a37 + name: search_runbook + namespace: null + status: null + type: function_call + output_messages: null + parallel_tool_calls: true + presence_penalty: 0.0 + previous_response_id: resp_9bfc4dacc826288b + prompt: null + reasoning: null + service_tier: auto + status: completed + temperature: 1.0 + text: null + tool_choice: auto + tools: + - defer_loading: null + description: Query the pipeline orchestrator for a job's current state, metadata, + and failure info. + name: get_job_status + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier, e.g. job-382 + type: string + required: + - job_id + type: object + strict: true + type: function + - defer_loading: null + description: Fetch error logs and stack traces for a failed pipeline job. + name: get_error_logs + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier + type: string + max_entries: + description: Maximum log entries to return (default 50) + type: integer + severity: + description: Minimum severity filter + enum: + - WARN + - ERROR + - FATAL + - ALL + type: string + required: + - job_id + - severity + - max_entries + type: object + strict: true + type: function + - defer_loading: null + description: Search internal operations runbooks for troubleshooting procedures. + name: search_runbook + parameters: + additionalProperties: false + properties: + max_results: + description: Maximum results to return (default 5) + type: integer + query: + description: Natural language search query + type: string + required: + - query + - max_results + type: object + strict: true + type: function + - defer_loading: null + description: Execute a Python analysis script in a sandboxed environment for + log parsing or data inspection. + name: run_analysis + parameters: + additionalProperties: false + properties: + code: + description: Python code to execute + type: string + timeout_seconds: + description: Execution timeout (default 30) + type: integer + required: + - code + - timeout_seconds + type: object + strict: true + type: function + - defer_loading: null + description: Restart a failed pipeline job with optional configuration overrides. + name: restart_job + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier to restart + type: string + memory_override_gb: + description: Override memory limit in GB + type: integer + priority: + description: Queue priority + enum: + - low + - normal + - high + type: string + skip_completed_stages: + description: Skip stages that completed successfully (default true) + type: boolean + required: + - job_id + - memory_override_gb + - skip_completed_stages + - priority + type: object + strict: true + type: function + - defer_loading: null + description: Search the public web for technical documentation, blog posts, + and known issues. + name: web_search + parameters: + additionalProperties: false + properties: + query: + description: Search query + type: string + required: + - query + type: object + strict: true + type: function + top_logprobs: null + top_p: 1.0 + truncation: disabled + usage: + input_tokens: 648 + input_tokens_details: + cached_tokens: 496 + cached_tokens_per_turn: + - 496 + input_tokens_per_turn: + - 648 + output_tokens: 50 + output_tokens_details: + output_tokens_per_turn: + - 50 + reasoning_tokens: 32 + tool_output_tokens: 0 + tool_output_tokens_per_turn: + - 0 + total_tokens: 698 + user: null + headers: + content-type: application/json + status_code: 200 +- filename: t3 + request: + body: + input: + - call_id: call_8641aa37edcc8a37 + output: '[{"title": "OOM Troubleshooting for ETL Pipelines", "path": "/runbooks/data-platform/oom-etl.md", + "snippet": "When transform stage OOMs: 1) Check partition skew with get_partition_stats, + 2) Increase executor memory (default 32GB -> 64GB), 3) Enable adaptive query + execution"}, {"title": "Memory Limit Override Procedures", "path": "/runbooks/data-platform/memory-overrides.md", + "snippet": "To increase memory: use restart_job with memory_override_gb. + Standard escalation: 32->64->128. Requires approval above 128GB."}]' + type: function_call_output + - content: Now restart that job with 64GB memory and high priority. + role: user + type: message + model: openai/gpt-oss-20b + previous_response_id: resp_9e29702b26e8d17f + store: true + stream: false + tool_choice: auto + tools: + - description: Query the pipeline orchestrator for a job's current state, metadata, + and failure info. + name: get_job_status + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier, e.g. job-382 + type: string + required: + - job_id + type: object + strict: true + type: function + - description: Fetch error logs and stack traces for a failed pipeline job. + name: get_error_logs + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier + type: string + max_entries: + description: Maximum log entries to return (default 50) + type: integer + severity: + description: Minimum severity filter + enum: + - WARN + - ERROR + - FATAL + - ALL + type: string + required: + - job_id + - severity + - max_entries + type: object + strict: true + type: function + - description: Search internal operations runbooks for troubleshooting procedures. + name: search_runbook + parameters: + additionalProperties: false + properties: + max_results: + description: Maximum results to return (default 5) + type: integer + query: + description: Natural language search query + type: string + required: + - query + - max_results + type: object + strict: true + type: function + - description: Execute a Python analysis script in a sandboxed environment for + log parsing or data inspection. + name: run_analysis + parameters: + additionalProperties: false + properties: + code: + description: Python code to execute + type: string + timeout_seconds: + description: Execution timeout (default 30) + type: integer + required: + - code + - timeout_seconds + type: object + strict: true + type: function + - description: Restart a failed pipeline job with optional configuration overrides. + name: restart_job + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier to restart + type: string + memory_override_gb: + description: Override memory limit in GB + type: integer + priority: + description: Queue priority + enum: + - low + - normal + - high + type: string + skip_completed_stages: + description: Skip stages that completed successfully (default true) + type: boolean + required: + - job_id + - memory_override_gb + - skip_completed_stages + - priority + type: object + strict: true + type: function + - description: Search the public web for technical documentation, blog posts, + and known issues. + name: web_search + parameters: + additionalProperties: false + properties: + query: + description: Search query + type: string + required: + - query + type: object + strict: true + type: function + headers: + accept: '*/*' + content-type: application/json + user-agent: python-httpx/0.28.1 + method: POST + path: /v1/responses + query_params: {} + response: + body: + background: false + created_at: 1782332958 + frequency_penalty: 0.0 + id: resp_9d863819516f196f + incomplete_details: null + input_messages: null + instructions: null + kv_transfer_params: null + max_output_tokens: 3234 + max_tool_calls: null + metadata: null + model: openai/gpt-oss-20b + object: response + output: + - content: + - text: Call restart_job. + type: reasoning_text + encrypted_content: null + id: rs_ae5f6f0e38b5f7d4 + status: null + summary: [] + type: reasoning + - arguments: '{"job_id":"job-382","memory_override_gb":64,"skip_completed_stages":true,"priority":"high"}' + call_id: call_b231aec112209369 + id: fc_b231aec112209369 + name: restart_job + namespace: null + status: null + type: function_call + output_messages: null + parallel_tool_calls: true + presence_penalty: 0.0 + previous_response_id: resp_9e29702b26e8d17f + prompt: null + reasoning: null + service_tier: auto + status: completed + temperature: 1.0 + text: null + tool_choice: auto + tools: + - defer_loading: null + description: Query the pipeline orchestrator for a job's current state, metadata, + and failure info. + name: get_job_status + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier, e.g. job-382 + type: string + required: + - job_id + type: object + strict: true + type: function + - defer_loading: null + description: Fetch error logs and stack traces for a failed pipeline job. + name: get_error_logs + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier + type: string + max_entries: + description: Maximum log entries to return (default 50) + type: integer + severity: + description: Minimum severity filter + enum: + - WARN + - ERROR + - FATAL + - ALL + type: string + required: + - job_id + - severity + - max_entries + type: object + strict: true + type: function + - defer_loading: null + description: Search internal operations runbooks for troubleshooting procedures. + name: search_runbook + parameters: + additionalProperties: false + properties: + max_results: + description: Maximum results to return (default 5) + type: integer + query: + description: Natural language search query + type: string + required: + - query + - max_results + type: object + strict: true + type: function + - defer_loading: null + description: Execute a Python analysis script in a sandboxed environment for + log parsing or data inspection. + name: run_analysis + parameters: + additionalProperties: false + properties: + code: + description: Python code to execute + type: string + timeout_seconds: + description: Execution timeout (default 30) + type: integer + required: + - code + - timeout_seconds + type: object + strict: true + type: function + - defer_loading: null + description: Restart a failed pipeline job with optional configuration overrides. + name: restart_job + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier to restart + type: string + memory_override_gb: + description: Override memory limit in GB + type: integer + priority: + description: Queue priority + enum: + - low + - normal + - high + type: string + skip_completed_stages: + description: Skip stages that completed successfully (default true) + type: boolean + required: + - job_id + - memory_override_gb + - skip_completed_stages + - priority + type: object + strict: true + type: function + - defer_loading: null + description: Search the public web for technical documentation, blog posts, + and known issues. + name: web_search + parameters: + additionalProperties: false + properties: + query: + description: Search query + type: string + required: + - query + type: object + strict: true + type: function + top_logprobs: null + top_p: 1.0 + truncation: disabled + usage: + input_tokens: 862 + input_tokens_details: + cached_tokens: 656 + cached_tokens_per_turn: + - 656 + input_tokens_per_turn: + - 862 + output_tokens: 49 + output_tokens_details: + output_tokens_per_turn: + - 49 + reasoning_tokens: 32 + tool_output_tokens: 0 + tool_output_tokens_per_turn: + - 0 + total_tokens: 911 + user: null + headers: + content-type: application/json + status_code: 200 diff --git a/crates/agentic-core/tests/cassettes/tool_calls/multi_turn/responses_tool_calls_tool_output_only.yaml b/crates/agentic-core/tests/cassettes/tool_calls/multi_turn/responses_tool_calls_tool_output_only.yaml new file mode 100644 index 0000000..ab6bc38 --- /dev/null +++ b/crates/agentic-core/tests/cassettes/tool_calls/multi_turn/responses_tool_calls_tool_output_only.yaml @@ -0,0 +1,983 @@ +turns: +- filename: t1 + request: + body: + input: You are an SRE assistant. Check the current status of ETL pipeline job-382. + model: openai/gpt-oss-20b + store: true + stream: false + tool_choice: auto + tools: + - description: Query the pipeline orchestrator for a job's current state, metadata, + and failure info. + name: get_job_status + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier, e.g. job-382 + type: string + required: + - job_id + type: object + strict: true + type: function + - description: Fetch error logs and stack traces for a failed pipeline job. + name: get_error_logs + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier + type: string + max_entries: + description: Maximum log entries to return (default 50) + type: integer + severity: + description: Minimum severity filter + enum: + - WARN + - ERROR + - FATAL + - ALL + type: string + required: + - job_id + - severity + - max_entries + type: object + strict: true + type: function + - description: Search internal operations runbooks for troubleshooting procedures. + name: search_runbook + parameters: + additionalProperties: false + properties: + max_results: + description: Maximum results to return (default 5) + type: integer + query: + description: Natural language search query + type: string + required: + - query + - max_results + type: object + strict: true + type: function + - description: Execute a Python analysis script in a sandboxed environment for + log parsing or data inspection. + name: run_analysis + parameters: + additionalProperties: false + properties: + code: + description: Python code to execute + type: string + timeout_seconds: + description: Execution timeout (default 30) + type: integer + required: + - code + - timeout_seconds + type: object + strict: true + type: function + - description: Restart a failed pipeline job with optional configuration overrides. + name: restart_job + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier to restart + type: string + memory_override_gb: + description: Override memory limit in GB + type: integer + priority: + description: Queue priority + enum: + - low + - normal + - high + type: string + skip_completed_stages: + description: Skip stages that completed successfully (default true) + type: boolean + required: + - job_id + - memory_override_gb + - skip_completed_stages + - priority + type: object + strict: true + type: function + - description: Search the public web for technical documentation, blog posts, + and known issues. + name: web_search + parameters: + additionalProperties: false + properties: + query: + description: Search query + type: string + required: + - query + type: object + strict: true + type: function + headers: + accept: '*/*' + content-type: application/json + user-agent: python-httpx/0.28.1 + method: POST + path: /v1/responses + query_params: {} + response: + body: + background: false + created_at: 1782332960 + frequency_penalty: 0.0 + id: resp_a4885b37a5cc8f6a + incomplete_details: null + input_messages: null + instructions: null + kv_transfer_params: null + max_output_tokens: 3660 + max_tool_calls: null + metadata: null + model: openai/gpt-oss-20b + object: response + output: + - content: + - text: Need to call function get_job_status. + type: reasoning_text + encrypted_content: null + id: rs_abbd07ed9c3d37f1 + status: null + summary: [] + type: reasoning + - arguments: '{"job_id":"job-382"}' + call_id: call_abfcd3d8ebc4441c + id: fc_abfcd3d8ebc4441c + name: get_job_status + namespace: null + status: null + type: function_call + output_messages: null + parallel_tool_calls: true + presence_penalty: 0.0 + previous_response_id: null + prompt: null + reasoning: null + service_tier: auto + status: completed + temperature: 1.0 + text: null + tool_choice: auto + tools: + - defer_loading: null + description: Query the pipeline orchestrator for a job's current state, metadata, + and failure info. + name: get_job_status + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier, e.g. job-382 + type: string + required: + - job_id + type: object + strict: true + type: function + - defer_loading: null + description: Fetch error logs and stack traces for a failed pipeline job. + name: get_error_logs + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier + type: string + max_entries: + description: Maximum log entries to return (default 50) + type: integer + severity: + description: Minimum severity filter + enum: + - WARN + - ERROR + - FATAL + - ALL + type: string + required: + - job_id + - severity + - max_entries + type: object + strict: true + type: function + - defer_loading: null + description: Search internal operations runbooks for troubleshooting procedures. + name: search_runbook + parameters: + additionalProperties: false + properties: + max_results: + description: Maximum results to return (default 5) + type: integer + query: + description: Natural language search query + type: string + required: + - query + - max_results + type: object + strict: true + type: function + - defer_loading: null + description: Execute a Python analysis script in a sandboxed environment for + log parsing or data inspection. + name: run_analysis + parameters: + additionalProperties: false + properties: + code: + description: Python code to execute + type: string + timeout_seconds: + description: Execution timeout (default 30) + type: integer + required: + - code + - timeout_seconds + type: object + strict: true + type: function + - defer_loading: null + description: Restart a failed pipeline job with optional configuration overrides. + name: restart_job + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier to restart + type: string + memory_override_gb: + description: Override memory limit in GB + type: integer + priority: + description: Queue priority + enum: + - low + - normal + - high + type: string + skip_completed_stages: + description: Skip stages that completed successfully (default true) + type: boolean + required: + - job_id + - memory_override_gb + - skip_completed_stages + - priority + type: object + strict: true + type: function + - defer_loading: null + description: Search the public web for technical documentation, blog posts, + and known issues. + name: web_search + parameters: + additionalProperties: false + properties: + query: + description: Search query + type: string + required: + - query + type: object + strict: true + type: function + top_logprobs: null + top_p: 1.0 + truncation: disabled + usage: + input_tokens: 436 + input_tokens_details: + cached_tokens: 432 + cached_tokens_per_turn: + - 432 + input_tokens_per_turn: + - 436 + output_tokens: 36 + output_tokens_details: + output_tokens_per_turn: + - 36 + reasoning_tokens: 18 + tool_output_tokens: 0 + tool_output_tokens_per_turn: + - 0 + total_tokens: 472 + user: null + headers: + content-type: application/json + status_code: 200 +- filename: t2 + request: + body: + input: + - call_id: call_abfcd3d8ebc4441c + output: '{"job_id": "job-382", "status": "failed", "error_type": "OutOfMemoryError", + "failed_at": "2024-01-15T03:22:11Z", "stage": "transform", "memory_used_gb": + 31.2, "memory_limit_gb": 32, "pipeline": "etl-daily-ingest", "owner": "data-platform-team"}' + type: function_call_output + model: openai/gpt-oss-20b + previous_response_id: resp_a4885b37a5cc8f6a + store: true + stream: false + tool_choice: auto + tools: + - description: Query the pipeline orchestrator for a job's current state, metadata, + and failure info. + name: get_job_status + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier, e.g. job-382 + type: string + required: + - job_id + type: object + strict: true + type: function + - description: Fetch error logs and stack traces for a failed pipeline job. + name: get_error_logs + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier + type: string + max_entries: + description: Maximum log entries to return (default 50) + type: integer + severity: + description: Minimum severity filter + enum: + - WARN + - ERROR + - FATAL + - ALL + type: string + required: + - job_id + - severity + - max_entries + type: object + strict: true + type: function + - description: Search internal operations runbooks for troubleshooting procedures. + name: search_runbook + parameters: + additionalProperties: false + properties: + max_results: + description: Maximum results to return (default 5) + type: integer + query: + description: Natural language search query + type: string + required: + - query + - max_results + type: object + strict: true + type: function + - description: Execute a Python analysis script in a sandboxed environment for + log parsing or data inspection. + name: run_analysis + parameters: + additionalProperties: false + properties: + code: + description: Python code to execute + type: string + timeout_seconds: + description: Execution timeout (default 30) + type: integer + required: + - code + - timeout_seconds + type: object + strict: true + type: function + - description: Restart a failed pipeline job with optional configuration overrides. + name: restart_job + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier to restart + type: string + memory_override_gb: + description: Override memory limit in GB + type: integer + priority: + description: Queue priority + enum: + - low + - normal + - high + type: string + skip_completed_stages: + description: Skip stages that completed successfully (default true) + type: boolean + required: + - job_id + - memory_override_gb + - skip_completed_stages + - priority + type: object + strict: true + type: function + - description: Search the public web for technical documentation, blog posts, + and known issues. + name: web_search + parameters: + additionalProperties: false + properties: + query: + description: Search query + type: string + required: + - query + type: object + strict: true + type: function + headers: + accept: '*/*' + content-type: application/json + user-agent: python-httpx/0.28.1 + method: POST + path: /v1/responses + query_params: {} + response: + body: + background: false + created_at: 1782332960 + frequency_penalty: 0.0 + id: resp_a5ee681c0f39cf97 + incomplete_details: null + input_messages: null + instructions: null + kv_transfer_params: null + max_output_tokens: 3521 + max_tool_calls: null + metadata: null + model: openai/gpt-oss-20b + object: response + output: + - content: + - annotations: [] + logprobs: null + text: "**Job status (job‑382)** \n\n| Field | Value |\n|-------|-------|\n\ + | **Status** | **failed** |\n| **Error type** | *OutOfMemoryError* |\n\ + | **Failed at** | 2024‑01‑15 T03:22:11 Z |\n| **Stage** | transform |\n\ + | **Memory used** | 31.2 GB |\n| **Memory limit** | 32 GB |\n| **Pipeline**\ + \ | etl‑daily‑ingest |\n| **Owner** | data‑platform‑team |\n\n---\n\n\ + ### Next steps\n\n* The pipeline failed because it exceeded the memory\ + \ budget during the **transform** stage. \n* You could \n 1. **Increase\ + \ the memory limit** (e.g., to 36 GB) and **restart** the job. \n 2.\ + \ **Skip completed stages** so it doesn’t re‑run the data ingestion step.\ + \ \n 3. Optionally lower the queue priority if you want a quick retry.\n\ + \nLet me know how you’d like to proceed (e.g., `restart_job`, view error\ + \ logs, or investigate further)." + type: output_text + id: msg_80b908ea16ee9e3c + phase: null + role: assistant + status: completed + type: message + output_messages: null + parallel_tool_calls: true + presence_penalty: 0.0 + previous_response_id: resp_a4885b37a5cc8f6a + prompt: null + reasoning: null + service_tier: auto + status: completed + temperature: 1.0 + text: null + tool_choice: auto + tools: + - defer_loading: null + description: Query the pipeline orchestrator for a job's current state, metadata, + and failure info. + name: get_job_status + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier, e.g. job-382 + type: string + required: + - job_id + type: object + strict: true + type: function + - defer_loading: null + description: Fetch error logs and stack traces for a failed pipeline job. + name: get_error_logs + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier + type: string + max_entries: + description: Maximum log entries to return (default 50) + type: integer + severity: + description: Minimum severity filter + enum: + - WARN + - ERROR + - FATAL + - ALL + type: string + required: + - job_id + - severity + - max_entries + type: object + strict: true + type: function + - defer_loading: null + description: Search internal operations runbooks for troubleshooting procedures. + name: search_runbook + parameters: + additionalProperties: false + properties: + max_results: + description: Maximum results to return (default 5) + type: integer + query: + description: Natural language search query + type: string + required: + - query + - max_results + type: object + strict: true + type: function + - defer_loading: null + description: Execute a Python analysis script in a sandboxed environment for + log parsing or data inspection. + name: run_analysis + parameters: + additionalProperties: false + properties: + code: + description: Python code to execute + type: string + timeout_seconds: + description: Execution timeout (default 30) + type: integer + required: + - code + - timeout_seconds + type: object + strict: true + type: function + - defer_loading: null + description: Restart a failed pipeline job with optional configuration overrides. + name: restart_job + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier to restart + type: string + memory_override_gb: + description: Override memory limit in GB + type: integer + priority: + description: Queue priority + enum: + - low + - normal + - high + type: string + skip_completed_stages: + description: Skip stages that completed successfully (default true) + type: boolean + required: + - job_id + - memory_override_gb + - skip_completed_stages + - priority + type: object + strict: true + type: function + - defer_loading: null + description: Search the public web for technical documentation, blog posts, + and known issues. + name: web_search + parameters: + additionalProperties: false + properties: + query: + description: Search query + type: string + required: + - query + type: object + strict: true + type: function + top_logprobs: null + top_p: 1.0 + truncation: disabled + usage: + input_tokens: 575 + input_tokens_details: + cached_tokens: 448 + cached_tokens_per_turn: + - 448 + input_tokens_per_turn: + - 575 + output_tokens: 242 + output_tokens_details: + output_tokens_per_turn: + - 242 + reasoning_tokens: 0 + tool_output_tokens: 0 + tool_output_tokens_per_turn: + - 0 + total_tokens: 817 + user: null + headers: + content-type: application/json + status_code: 200 +- filename: t3 + request: + body: + input: Based on what you found, search the runbook for how to fix it. Max 5 + results. + model: openai/gpt-oss-20b + previous_response_id: resp_a5ee681c0f39cf97 + store: true + stream: false + tool_choice: auto + tools: + - description: Query the pipeline orchestrator for a job's current state, metadata, + and failure info. + name: get_job_status + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier, e.g. job-382 + type: string + required: + - job_id + type: object + strict: true + type: function + - description: Fetch error logs and stack traces for a failed pipeline job. + name: get_error_logs + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier + type: string + max_entries: + description: Maximum log entries to return (default 50) + type: integer + severity: + description: Minimum severity filter + enum: + - WARN + - ERROR + - FATAL + - ALL + type: string + required: + - job_id + - severity + - max_entries + type: object + strict: true + type: function + - description: Search internal operations runbooks for troubleshooting procedures. + name: search_runbook + parameters: + additionalProperties: false + properties: + max_results: + description: Maximum results to return (default 5) + type: integer + query: + description: Natural language search query + type: string + required: + - query + - max_results + type: object + strict: true + type: function + - description: Execute a Python analysis script in a sandboxed environment for + log parsing or data inspection. + name: run_analysis + parameters: + additionalProperties: false + properties: + code: + description: Python code to execute + type: string + timeout_seconds: + description: Execution timeout (default 30) + type: integer + required: + - code + - timeout_seconds + type: object + strict: true + type: function + - description: Restart a failed pipeline job with optional configuration overrides. + name: restart_job + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier to restart + type: string + memory_override_gb: + description: Override memory limit in GB + type: integer + priority: + description: Queue priority + enum: + - low + - normal + - high + type: string + skip_completed_stages: + description: Skip stages that completed successfully (default true) + type: boolean + required: + - job_id + - memory_override_gb + - skip_completed_stages + - priority + type: object + strict: true + type: function + - description: Search the public web for technical documentation, blog posts, + and known issues. + name: web_search + parameters: + additionalProperties: false + properties: + query: + description: Search query + type: string + required: + - query + type: object + strict: true + type: function + headers: + accept: '*/*' + content-type: application/json + user-agent: python-httpx/0.28.1 + method: POST + path: /v1/responses + query_params: {} + response: + body: + background: false + created_at: 1782332962 + frequency_penalty: 0.0 + id: resp_9394e8e449e8e40b + incomplete_details: null + input_messages: null + instructions: null + kv_transfer_params: null + max_output_tokens: 3266 + max_tool_calls: null + metadata: null + model: openai/gpt-oss-20b + object: response + output: + - arguments: '{"query":"etl-daily-ingest OutOfMemoryError transform stage retry + runbook","max_results":5}' + call_id: call_af062f63e1fe3d72 + id: fc_af062f63e1fe3d72 + name: search_runbook + namespace: null + status: null + type: function_call + output_messages: null + parallel_tool_calls: true + presence_penalty: 0.0 + previous_response_id: resp_a5ee681c0f39cf97 + prompt: null + reasoning: null + service_tier: auto + status: completed + temperature: 1.0 + text: null + tool_choice: auto + tools: + - defer_loading: null + description: Query the pipeline orchestrator for a job's current state, metadata, + and failure info. + name: get_job_status + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier, e.g. job-382 + type: string + required: + - job_id + type: object + strict: true + type: function + - defer_loading: null + description: Fetch error logs and stack traces for a failed pipeline job. + name: get_error_logs + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier + type: string + max_entries: + description: Maximum log entries to return (default 50) + type: integer + severity: + description: Minimum severity filter + enum: + - WARN + - ERROR + - FATAL + - ALL + type: string + required: + - job_id + - severity + - max_entries + type: object + strict: true + type: function + - defer_loading: null + description: Search internal operations runbooks for troubleshooting procedures. + name: search_runbook + parameters: + additionalProperties: false + properties: + max_results: + description: Maximum results to return (default 5) + type: integer + query: + description: Natural language search query + type: string + required: + - query + - max_results + type: object + strict: true + type: function + - defer_loading: null + description: Execute a Python analysis script in a sandboxed environment for + log parsing or data inspection. + name: run_analysis + parameters: + additionalProperties: false + properties: + code: + description: Python code to execute + type: string + timeout_seconds: + description: Execution timeout (default 30) + type: integer + required: + - code + - timeout_seconds + type: object + strict: true + type: function + - defer_loading: null + description: Restart a failed pipeline job with optional configuration overrides. + name: restart_job + parameters: + additionalProperties: false + properties: + job_id: + description: Pipeline job identifier to restart + type: string + memory_override_gb: + description: Override memory limit in GB + type: integer + priority: + description: Queue priority + enum: + - low + - normal + - high + type: string + skip_completed_stages: + description: Skip stages that completed successfully (default true) + type: boolean + required: + - job_id + - memory_override_gb + - skip_completed_stages + - priority + type: object + strict: true + type: function + - defer_loading: null + description: Search the public web for technical documentation, blog posts, + and known issues. + name: web_search + parameters: + additionalProperties: false + properties: + query: + description: Search query + type: string + required: + - query + type: object + strict: true + type: function + top_logprobs: null + top_p: 1.0 + truncation: disabled + usage: + input_tokens: 830 + input_tokens_details: + cached_tokens: 560 + cached_tokens_per_turn: + - 560 + input_tokens_per_turn: + - 830 + output_tokens: 37 + output_tokens_details: + output_tokens_per_turn: + - 37 + reasoning_tokens: 24 + tool_output_tokens: 0 + tool_output_tokens_per_turn: + - 0 + total_tokens: 867 + user: null + headers: + content-type: application/json + status_code: 200