From 3ca9068840bfea3c4d555ca3ed44454a84f7e48a Mon Sep 17 00:00:00 2001
From: Ashwin Giridharan <girida@amazon.com>
Date: Wed, 1 Jul 2026 22:14:03 -0700
Subject: [PATCH 1/9] feat: add dispatch_tools + execute_loop (PR B tool
 dispatch layer)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Implements the gateway-side agentic tool loop on top of the ToolRegistry
and GatewayExecutor traits landed in PR A (#80):

- executor/dispatch.rs: LoopDecision enum (#[non_exhaustive]) +
  dispatch_tools() — classifies FunctionToolCall items via
  ToolRegistry::gateway_owned(), executes in parallel with 30s
  per-call timeout, maps failures to error-JSON FunctionCallOutput
  items (never aborts the loop on tool error).

- executor/agentic_loop.rs: execute_loop() — multi-turn orchestrator
  that clears all three persistence triggers before looping and
  restores original IDs on the final payload. Rejects stream=true
  (StreamTee is a future PR). Hard guard of 128 iterations, soft cap
  via max_iterations param (default: 10).

Client-owned function tools (ToolType::Function) return Done for now;
RequiresAction and ContinuePartial are deferred per staging agreement
in PR #67 — LoopDecision is #[non_exhaustive] to make the addition safe.

MCP tool names are absent from the registry until PR C adds discovery;
any function_call for an MCP tool name is treated as client-owned.

244 tests pass; cargo clippy --workspace --all-targets -- -D warnings clean.

Signed-off-by: Ashwin Giridharan <girida@amazon.com>
---
 .../agentic-core/src/executor/agentic_loop.rs | 254 ++++++++++++
 crates/agentic-core/src/executor/dispatch.rs  | 362 ++++++++++++++++++
 crates/agentic-core/src/executor/mod.rs       |   4 +
 crates/agentic-core/src/lib.rs                |   1 +
 4 files changed, 621 insertions(+)
 create mode 100644 crates/agentic-core/src/executor/agentic_loop.rs
 create mode 100644 crates/agentic-core/src/executor/dispatch.rs

diff --git a/crates/agentic-core/src/executor/agentic_loop.rs b/crates/agentic-core/src/executor/agentic_loop.rs
new file mode 100644
index 0000000..a7d2c18
--- /dev/null
+++ b/crates/agentic-core/src/executor/agentic_loop.rs
@@ -0,0 +1,254 @@
+//! Agentic tool loop — multi-turn executor that dispatches tool calls between inference steps.
+
+use std::collections::HashMap;
+use std::sync::Arc;
+
+use either::Either;
+
+use crate::executor::dispatch::{LoopDecision, dispatch_tools};
+use crate::executor::engine::execute;
+use crate::executor::error::{ExecutorError, ExecutorResult};
+use crate::executor::request::ExecutionContext;
+use crate::tool::{GatewayExecutor, ToolRegistry, ToolType};
+use crate::types::io::input::{InputItem, ResponsesInput};
+use crate::types::request_response::{RequestPayload, ResponsePayload};
+
+/// Hard safety guard — prevents runaway loops regardless of `max_iterations`.
+const MAX_LOOP_GUARD: usize = 128;
+
+/// Default soft cap on tool-dispatch iterations per request.
+pub const DEFAULT_MAX_ITERATIONS: usize = 10;
+
+/// Run the full agentic tool loop.
+///
+/// Calls [`execute`] repeatedly, dispatching gateway-owned tool calls after each
+/// inference turn, until the model stops producing tool calls or `max_iterations`
+/// is reached.
+///
+/// ## Persistence contract
+///
+/// This function **never writes to the database**. All three persistence triggers
+/// (`store`, `previous_response_id`, `conversation_id`) are cleared before the
+/// first iteration and restored onto the final payload before returning. The
+/// caller owns [`crate::executor::engine::persist_response`] and must call it
+/// after this function returns using the original [`crate::executor::request::RequestContext`].
+///
+/// ## MCP discovery
+///
+/// `registry` must be built via [`ToolRegistry::build`] before calling. MCP tool
+/// names are absent from the registry in this PR — discovery is added in PR C.
+/// Any `function_call` for an MCP tool name is treated as client-owned (skipped).
+///
+/// # Errors
+///
+/// - [`ExecutorError::InvalidRequest`] if `request.stream` is `true` (streaming
+///   + tool dispatch requires the `StreamTee`, which is a future PR).
+/// - [`ExecutorError`] variants from [`execute`] if LLM inference fails.
+pub async fn execute_loop<S: std::hash::BuildHasher>(
+    mut request: RequestPayload,
+    exec_ctx: Arc<ExecutionContext>,
+    registry: ToolRegistry,
+    executors: HashMap<ToolType, Arc<dyn GatewayExecutor>, S>,
+    max_iterations: usize,
+) -> ExecutorResult<ResponsePayload> {
+    if request.stream {
+        return Err(ExecutorError::InvalidRequest(
+            "execute_loop does not support streaming requests; use execute() directly or wait for StreamTee PR"
+                .to_owned(),
+        ));
+    }
+
+    // Capture original persistence triggers before clearing them.
+    // The loop calls execute() internally — we must suppress intermediate persists.
+    let original_store = request.store;
+    let original_prev_id = request.previous_response_id.clone();
+    let original_conv_id = request.conversation_id.clone();
+
+    request.store = false;
+    request.previous_response_id = None;
+    request.conversation_id = None;
+
+    let effective_max = max_iterations.min(MAX_LOOP_GUARD);
+    let mut payload = ResponsePayload {
+        id: String::new(),
+        object: "response".to_owned(),
+        created_at: 0,
+        model: request.model.clone(),
+        status: "completed".to_owned(),
+        output: vec![],
+        usage: None,
+        incomplete_details: None,
+        error: None,
+        previous_response_id: None,
+        conversation_id: None,
+        instructions: None,
+    };
+
+    for iteration in 0..MAX_LOOP_GUARD {
+        let result = execute(request.clone(), Arc::clone(&exec_ctx)).await?;
+
+        payload = match result {
+            Either::Left(p) => p,
+            Either::Right(_) => {
+                // execute() returned a stream — shouldn't happen since we set stream=false,
+                // but guard defensively.
+                return Err(ExecutorError::InvalidRequest(
+                    "execute() returned a stream despite stream=false".to_owned(),
+                ));
+            }
+        };
+
+        match dispatch_tools(&payload.output, &registry, &executors, iteration, effective_max).await? {
+            LoopDecision::Done => break,
+
+            LoopDecision::Incomplete(reason) => {
+                "incomplete".clone_into(&mut payload.status);
+                payload.incomplete_details =
+                    Some(crate::types::request_response::IncompleteDetails { reason: Some(reason) });
+                break;
+            }
+
+            LoopDecision::Continue(tool_results) => {
+                // Extend request.input with the tool results for the next iteration.
+                let existing_items: Vec<InputItem> = Vec::from(&request.input);
+                // Also append the model's tool call items so vLLM sees the full call/output pair.
+                let mut fc_items: Vec<InputItem> = payload
+                    .output
+                    .iter()
+                    .filter_map(|item| {
+                        if let crate::types::io::output::OutputItem::FunctionCall(fc) = item {
+                            Some(InputItem::FunctionCall(fc.clone()))
+                        } else {
+                            None
+                        }
+                    })
+                    .collect();
+
+                let mut next_items = existing_items;
+                next_items.append(&mut fc_items);
+                next_items.extend(tool_results);
+                request.input = ResponsesInput::Items(next_items);
+            }
+        }
+    }
+
+    // Restore original IDs onto the final payload.
+    payload.previous_response_id = original_prev_id;
+    payload.conversation_id = original_conv_id;
+    // Restore store flag on the request (not used after this, but for correctness).
+    let _ = original_store;
+
+    Ok(payload)
+}
+
+#[cfg(test)]
+mod tests {
+    use std::collections::HashMap;
+    use std::sync::Arc;
+
+    use super::*;
+    use crate::ToolRegistry;
+    use crate::tool::ToolType;
+    use crate::types::request_response::RequestPayload;
+
+    // ── Helpers ───────────────────────────────────────────────────────────────
+
+    fn make_request(stream: bool) -> RequestPayload {
+        RequestPayload {
+            model: "test-model".to_owned(),
+            input: crate::types::io::input::ResponsesInput::Text("hello".to_owned()),
+            instructions: None,
+            previous_response_id: None,
+            conversation_id: None,
+            tools: None,
+            tool_choice: crate::types::io::ToolChoice::Auto,
+            stream,
+            store: false,
+            include: None,
+            temperature: None,
+            top_p: None,
+            max_output_tokens: None,
+            truncation: None,
+            metadata: None,
+        }
+    }
+
+    fn no_executors() -> HashMap<ToolType, Arc<dyn GatewayExecutor>> {
+        HashMap::new()
+    }
+
+    // ── Tests ─────────────────────────────────────────────────────────────────
+
+    #[tokio::test]
+    async fn rejects_streaming_request() {
+        let request = make_request(true);
+        let exec_ctx = Arc::new(ExecutionContext::new(
+            crate::executor::modes::ConversationHandler::new(crate::storage::ConversationStore::new(
+                crate::storage::create_pool(None).await.unwrap(),
+            )),
+            crate::executor::modes::ResponseHandler::new(crate::storage::ResponseStore::new(
+                crate::storage::create_pool(None).await.unwrap(),
+            )),
+            Arc::new(reqwest::Client::new()),
+            "http://localhost:9999".to_owned(),
+            None,
+        ));
+        let result = execute_loop(request, exec_ctx, ToolRegistry::default(), no_executors(), 10).await;
+        assert!(matches!(result, Err(ExecutorError::InvalidRequest(_))));
+    }
+
+    #[tokio::test]
+    async fn persistence_triggers_cleared_for_internal_calls() {
+        // This test verifies the contract by inspecting the request clone.
+        // We can't call a real LLM — but we can confirm the trigger-clearing
+        // logic compiles and runs by checking that store/ids are reset.
+        let mut request = make_request(false);
+        request.store = true;
+        request.previous_response_id = Some("resp_orig".to_owned());
+        request.conversation_id = Some("conv_orig".to_owned());
+
+        // The loop will attempt to call the LLM; fail immediately (no server).
+        // We just verify the function handles the error path — not a behavior test.
+        let exec_ctx = Arc::new(ExecutionContext::new(
+            crate::executor::modes::ConversationHandler::new(crate::storage::ConversationStore::new(
+                crate::storage::create_pool(None).await.unwrap(),
+            )),
+            crate::executor::modes::ResponseHandler::new(crate::storage::ResponseStore::new(
+                crate::storage::create_pool(None).await.unwrap(),
+            )),
+            Arc::new(reqwest::Client::new()),
+            "http://localhost:9999".to_owned(), // unreachable — will error
+            None,
+        ));
+
+        // Should fail with a network/LLM error, not a panic
+        let result = execute_loop(request, exec_ctx, ToolRegistry::default(), no_executors(), 10).await;
+        assert!(result.is_err(), "expected error from unreachable LLM");
+    }
+
+    #[test]
+    fn default_max_iterations_is_ten() {
+        assert_eq!(DEFAULT_MAX_ITERATIONS, 10);
+    }
+
+    #[test]
+    fn max_loop_guard_is_128() {
+        assert_eq!(MAX_LOOP_GUARD, 128);
+    }
+
+    #[test]
+    fn loop_decision_done_is_non_exhaustive() {
+        // Compile-time check that the enum is #[non_exhaustive] — adding a new
+        // variant won't silently break this match.
+        let d = LoopDecision::Done;
+        // Compile-time check: all current variants are handled, and #[non_exhaustive]
+        // means downstream match arms must include a wildcard for future variants.
+        #[allow(unreachable_patterns, clippy::match_same_arms)]
+        match d {
+            LoopDecision::Done => {}
+            LoopDecision::Continue(_) => {}
+            LoopDecision::Incomplete(_) => {}
+            _ => {}
+        }
+    }
+}
diff --git a/crates/agentic-core/src/executor/dispatch.rs b/crates/agentic-core/src/executor/dispatch.rs
new file mode 100644
index 0000000..8c3c847
--- /dev/null
+++ b/crates/agentic-core/src/executor/dispatch.rs
@@ -0,0 +1,362 @@
+//! Tool dispatch — routes one batch of model-output tool calls and returns a loop decision.
+
+use std::collections::HashMap;
+use std::sync::Arc;
+use std::time::Duration;
+
+use futures::future::join_all;
+
+use crate::executor::error::ExecutorResult;
+use crate::tool::{GatewayExecutor, ToolRegistry, ToolType};
+use crate::types::io::input::{FunctionToolResultMessage, InputItem};
+use crate::types::io::output::OutputItem;
+
+/// Per-call timeout for gateway tool execution.
+const TOOL_TIMEOUT: Duration = Duration::from_secs(30);
+
+/// Decision returned by [`dispatch_tools`] after processing one batch of tool calls.
+///
+/// `#[non_exhaustive]` allows adding `RequiresAction` and `ContinuePartial` in a
+/// follow-up PR without breaking existing match arms.
+#[derive(Debug)]
+#[non_exhaustive]
+pub enum LoopDecision {
+    /// At least one gateway-owned tool call was executed. The caller must extend
+    /// `request.input` with these items and call the LLM again.
+    Continue(Vec<InputItem>),
+    /// No tool calls in the model output, or all calls were client-owned functions.
+    /// The loop terminates and the current payload is the final response.
+    ///
+    /// Post-MVP: client-owned calls will produce `RequiresAction` instead.
+    Done,
+    /// The loop hit `max_iterations` before the model stopped producing tool calls.
+    /// The response is returned with `status: "incomplete"`.
+    Incomplete(String),
+}
+
+/// Route one batch of [`FunctionToolCall`] items from the model's output.
+///
+/// Gateway-owned calls (anything other than `ToolType::Function`) are executed in
+/// parallel with a per-call timeout. Client-owned function calls are silently
+/// skipped for MVP (post-MVP: `RequiresAction`).
+///
+/// Tool execution failures become error-JSON [`InputItem::FunctionCallOutput`]
+/// entries — they are never propagated as `Err`. Only infrastructure failures
+/// (e.g. serialization) return `Err`.
+///
+/// # Errors
+///
+/// Returns [`ExecutorError::InvalidRequest`] if called with `iteration >= max_iterations`.
+pub async fn dispatch_tools<S: std::hash::BuildHasher>(
+    output: &[OutputItem],
+    registry: &ToolRegistry,
+    executors: &HashMap<ToolType, Arc<dyn GatewayExecutor>, S>,
+    iteration: usize,
+    max_iterations: usize,
+) -> ExecutorResult<LoopDecision> {
+    let calls: Vec<crate::types::io::output::FunctionToolCall> = output
+        .iter()
+        .filter_map(|item| {
+            if let OutputItem::FunctionCall(fc) = item {
+                Some(fc.clone())
+            } else {
+                None
+            }
+        })
+        .collect();
+
+    if calls.is_empty() {
+        return Ok(LoopDecision::Done);
+    }
+
+    if iteration >= max_iterations {
+        return Ok(LoopDecision::Incomplete(format!(
+            "max tool iterations reached ({iteration}/{max_iterations})"
+        )));
+    }
+
+    let gateway_calls = registry.gateway_owned(&calls);
+
+    if gateway_calls.is_empty() {
+        // All calls are client-owned function tools — no gateway execution.
+        // Post-MVP: return LoopDecision::RequiresAction(client_calls).
+        tracing::debug!(
+            count = calls.len(),
+            "all tool calls are client-owned functions — returning Done (RequiresAction post-MVP)"
+        );
+        return Ok(LoopDecision::Done);
+    }
+
+    let results = execute_gateway_calls(&gateway_calls, registry, executors).await;
+    Ok(LoopDecision::Continue(results))
+}
+
+async fn execute_gateway_calls<S: std::hash::BuildHasher>(
+    calls: &[&crate::types::io::output::FunctionToolCall],
+    registry: &ToolRegistry,
+    executors: &HashMap<ToolType, Arc<dyn GatewayExecutor>, S>,
+) -> Vec<InputItem> {
+    join_all(calls.iter().map(|call| async move {
+        let entry = registry.lookup(&call.name);
+        let executor = entry.and_then(|e| executors.get(&e.tool_type));
+
+        let output = if let (Some(e), Some(exec)) = (entry, executor) {
+            match tokio::time::timeout(TOOL_TIMEOUT, exec.execute(&call.name, &call.arguments, &e.config)).await {
+                Ok(Ok(tool_out)) => tool_out.output,
+                Ok(Err(e)) => {
+                    tracing::warn!(tool = %call.name, error = %e, "tool execution failed");
+                    serde_json::json!({ "error": e.to_string() }).to_string()
+                }
+                Err(_) => {
+                    tracing::warn!(tool = %call.name, "tool execution timed out");
+                    serde_json::json!({ "error": format!("tool '{}' timed out after {}s", call.name, TOOL_TIMEOUT.as_secs()) })
+                        .to_string()
+                }
+            }
+        } else {
+            tracing::warn!(tool = %call.name, "no executor registered for tool");
+            serde_json::json!({ "error": format!("no executor registered for tool '{}'", call.name) }).to_string()
+        };
+
+        InputItem::FunctionCallOutput(FunctionToolResultMessage {
+            call_id: call.call_id.clone(),
+            output,
+        })
+    }))
+    .await
+}
+
+#[cfg(test)]
+mod tests {
+    use std::collections::HashMap;
+    use std::pin::Pin;
+    use std::sync::Arc;
+
+    use serde_json::Value;
+
+    use super::*;
+    use crate::ToolRegistry;
+    use crate::tool::{GatewayExecutor, ToolError, ToolHandler, ToolOutput, ToolType};
+    use crate::types::event::MessageStatus;
+    use crate::types::io::output::{FunctionToolCall, OutputItem};
+    use crate::types::tools::ResponsesTool;
+
+    // ── Mock executor ──────────────────────────────────────────────────────────
+
+    struct MockExecutor {
+        tool_type: ToolType,
+        result: Result<String, String>,
+    }
+
+    impl ToolHandler for MockExecutor {
+        fn tool_type(&self) -> ToolType {
+            self.tool_type
+        }
+
+        fn validate(&self, _param: &Value) -> Result<(), ToolError> {
+            Ok(())
+        }
+
+        fn normalize(&self, _param: &Value) -> Vec<crate::types::io::FunctionTool> {
+            vec![]
+        }
+    }
+
+    impl GatewayExecutor for MockExecutor {
+        fn execute(
+            &self,
+            _tool_name: &str,
+            _arguments: &str,
+            _config: &Value,
+        ) -> Pin<Box<dyn std::future::Future<Output = Result<ToolOutput, ToolError>> + Send + '_>> {
+            let r = self.result.clone();
+            Box::pin(async move {
+                match r {
+                    Ok(output) => Ok(ToolOutput {
+                        call_id: String::new(),
+                        output,
+                    }),
+                    Err(e) => Err(ToolError::Execution(e)),
+                }
+            })
+        }
+    }
+
+    fn make_fc(name: &str, call_id: &str) -> OutputItem {
+        OutputItem::FunctionCall(FunctionToolCall {
+            id: call_id.to_owned(),
+            call_id: call_id.to_owned(),
+            name: name.to_owned(),
+            arguments: "{}".to_owned(),
+            status: MessageStatus::Completed,
+        })
+    }
+
+    fn registry_with_web_search() -> ToolRegistry {
+        let tools = vec![ResponsesTool::WebSearch(crate::types::tools::WebSearchToolParam {})];
+        ToolRegistry::build(&tools)
+    }
+
+    fn web_search_executor(result: Result<String, String>) -> HashMap<ToolType, Arc<dyn GatewayExecutor>> {
+        let mut map: HashMap<ToolType, Arc<dyn GatewayExecutor>> = HashMap::new();
+        map.insert(
+            ToolType::WebSearch,
+            Arc::new(MockExecutor {
+                tool_type: ToolType::WebSearch,
+                result,
+            }),
+        );
+        map
+    }
+
+    // ── Tests ──────────────────────────────────────────────────────────────────
+
+    #[tokio::test]
+    async fn no_function_calls_returns_done() {
+        let output = vec![OutputItem::Message(crate::types::io::output::OutputMessage::new(
+            "msg_1",
+            MessageStatus::Completed,
+        ))];
+        let registry = ToolRegistry::default();
+        let executors = HashMap::new();
+        let decision = dispatch_tools(&output, &registry, &executors, 0, 10).await.unwrap();
+        assert!(matches!(decision, LoopDecision::Done));
+    }
+
+    #[tokio::test]
+    async fn empty_output_returns_done() {
+        let registry = ToolRegistry::default();
+        let executors = HashMap::new();
+        let decision = dispatch_tools(&[], &registry, &executors, 0, 10).await.unwrap();
+        assert!(matches!(decision, LoopDecision::Done));
+    }
+
+    #[tokio::test]
+    async fn max_iterations_returns_incomplete() {
+        let output = vec![make_fc("web_search", "fc_1")];
+        let registry = registry_with_web_search();
+        let executors = web_search_executor(Ok("results".into()));
+        // iteration == max_iterations → Incomplete
+        let decision = dispatch_tools(&output, &registry, &executors, 5, 5).await.unwrap();
+        assert!(matches!(decision, LoopDecision::Incomplete(_)));
+    }
+
+    #[tokio::test]
+    async fn gateway_call_succeeds_returns_continue() {
+        let output = vec![make_fc("web_search", "fc_1")];
+        let registry = registry_with_web_search();
+        let executors = web_search_executor(Ok("search results".into()));
+        let decision = dispatch_tools(&output, &registry, &executors, 0, 10).await.unwrap();
+        match decision {
+            LoopDecision::Continue(items) => {
+                assert_eq!(items.len(), 1);
+                if let InputItem::FunctionCallOutput(msg) = &items[0] {
+                    assert_eq!(msg.call_id, "fc_1");
+                    assert_eq!(msg.output, "search results");
+                } else {
+                    panic!("expected FunctionCallOutput");
+                }
+            }
+            other => panic!("expected Continue, got {other:?}"),
+        }
+    }
+
+    #[tokio::test]
+    async fn failing_executor_returns_continue_with_error_json() {
+        let output = vec![make_fc("web_search", "fc_err")];
+        let registry = registry_with_web_search();
+        let executors = web_search_executor(Err("network failure".into()));
+        let decision = dispatch_tools(&output, &registry, &executors, 0, 10).await.unwrap();
+        match decision {
+            LoopDecision::Continue(items) => {
+                assert_eq!(items.len(), 1);
+                if let InputItem::FunctionCallOutput(msg) = &items[0] {
+                    assert!(
+                        msg.output.contains("error"),
+                        "output should be error JSON: {}",
+                        msg.output
+                    );
+                } else {
+                    panic!("expected FunctionCallOutput");
+                }
+            }
+            other => panic!("expected Continue with error, got {other:?}"),
+        }
+    }
+
+    #[tokio::test]
+    async fn unregistered_tool_name_returns_error_json() {
+        let output = vec![make_fc("unknown_tool", "fc_unknown")];
+        let registry = ToolRegistry::default(); // empty — nothing registered
+        let executors = web_search_executor(Ok("unused".into()));
+        let decision = dispatch_tools(&output, &registry, &executors, 0, 10).await.unwrap();
+        // unknown name → client_owned → Done (post-MVP: RequiresAction)
+        assert!(matches!(decision, LoopDecision::Done));
+    }
+
+    #[tokio::test]
+    async fn client_owned_function_call_returns_done() {
+        // Register a function tool in the registry
+        let tools = vec![ResponsesTool::Function(crate::types::tools::FunctionToolParam {
+            name: crate::types::tools::NonEmptyToolName::try_from("get_weather".to_owned()).unwrap(),
+            description: None,
+            parameters: None,
+            strict: None,
+        })];
+        let registry = ToolRegistry::build(&tools);
+        let executors: HashMap<ToolType, Arc<dyn GatewayExecutor>> = HashMap::new();
+        let output = vec![make_fc("get_weather", "fc_fn")];
+        let decision = dispatch_tools(&output, &registry, &executors, 0, 10).await.unwrap();
+        assert!(matches!(decision, LoopDecision::Done));
+    }
+
+    #[tokio::test]
+    async fn parallel_gateway_calls_all_execute() {
+        let output = vec![make_fc("web_search", "fc_1"), make_fc("web_search", "fc_2")];
+        let registry = registry_with_web_search();
+        let executors = web_search_executor(Ok("ok".into()));
+        let decision = dispatch_tools(&output, &registry, &executors, 0, 10).await.unwrap();
+        match decision {
+            LoopDecision::Continue(items) => assert_eq!(items.len(), 2),
+            other => panic!("expected Continue, got {other:?}"),
+        }
+    }
+
+    #[tokio::test]
+    async fn registered_gateway_tool_but_no_executor_returns_error_json_continue() {
+        // Registry has WebSearch entry but executors map is empty
+        let output = vec![make_fc("web_search", "fc_no_exec")];
+        let registry = registry_with_web_search();
+        let executors: HashMap<ToolType, Arc<dyn GatewayExecutor>> = HashMap::new();
+        let decision = dispatch_tools(&output, &registry, &executors, 0, 10).await.unwrap();
+        match decision {
+            LoopDecision::Continue(items) => {
+                assert_eq!(items.len(), 1);
+                if let InputItem::FunctionCallOutput(msg) = &items[0] {
+                    assert!(msg.output.contains("error"), "expected error JSON: {}", msg.output);
+                } else {
+                    panic!("expected FunctionCallOutput");
+                }
+            }
+            other => panic!("expected Continue with error, got {other:?}"),
+        }
+    }
+
+    #[tokio::test]
+    async fn iteration_zero_below_max_executes() {
+        let output = vec![make_fc("web_search", "fc_1")];
+        let registry = registry_with_web_search();
+        let executors = web_search_executor(Ok("ok".into()));
+        let decision = dispatch_tools(&output, &registry, &executors, 0, 1).await.unwrap();
+        assert!(matches!(decision, LoopDecision::Continue(_)));
+    }
+
+    #[tokio::test]
+    async fn iteration_at_max_returns_incomplete() {
+        let output = vec![make_fc("web_search", "fc_1")];
+        let registry = registry_with_web_search();
+        let executors = web_search_executor(Ok("ok".into()));
+        let decision = dispatch_tools(&output, &registry, &executors, 1, 1).await.unwrap();
+        assert!(matches!(decision, LoopDecision::Incomplete(_)));
+    }
+}
diff --git a/crates/agentic-core/src/executor/mod.rs b/crates/agentic-core/src/executor/mod.rs
index 925e34b..0e702d0 100644
--- a/crates/agentic-core/src/executor/mod.rs
+++ b/crates/agentic-core/src/executor/mod.rs
@@ -1,6 +1,8 @@
 //! Agentic executor.
 
 pub mod accumulator;
+pub mod agentic_loop;
+pub mod dispatch;
 pub mod engine;
 pub mod error;
 pub mod inference;
@@ -9,6 +11,8 @@ pub mod persist;
 pub mod rehydrate;
 pub mod request;
 
+pub use agentic_loop::{DEFAULT_MAX_ITERATIONS, execute_loop};
+pub use dispatch::{LoopDecision, dispatch_tools};
 pub use engine::{BoxStream, ExecuteRequest, create_conversation, execute};
 pub use error::{ExecutorError, ExecutorResult};
 pub use inference::call_inference;
diff --git a/crates/agentic-core/src/lib.rs b/crates/agentic-core/src/lib.rs
index 93bb05a..f0fab18 100644
--- a/crates/agentic-core/src/lib.rs
+++ b/crates/agentic-core/src/lib.rs
@@ -9,6 +9,7 @@ pub mod tool;
 pub mod types;
 pub mod utils;
 
+pub use executor::{DEFAULT_MAX_ITERATIONS, LoopDecision, dispatch_tools, execute_loop};
 pub use storage::{
     ConversationData, ConversationStore, DbPool, InOutItem, ItemKind, ResponseData, ResponseMetadata, ResponseStore,
     SchemaManager, StorageError, StoreResult, create_pool, create_pool_with_schema,

From 23c921c96ff8a57a20eb5fe4bcce6daf6078356e Mon Sep 17 00:00:00 2001
From: Ashwin Giridharan <girida@amazon.com>
Date: Wed, 1 Jul 2026 22:22:35 -0700
Subject: [PATCH 2/9] fix: address review findings in dispatch_tools +
 execute_loop

- Fix loop guard bug: change `0..MAX_LOOP_GUARD` to `0..=effective_max`
  so dispatch_tools is called at iteration == effective_max and returns
  Incomplete rather than silently exiting with status "completed"
- Add #[must_use] to dispatch_tools and execute_loop (api-must-use rule)
- Fix incorrect # Errors doc on dispatch_tools (iteration limit returns
  Ok(Incomplete), not Err)
- Correct persistence contract doc: RequestContext is not returned, so
  describe the caller's responsibility accurately
- Fix mixed gateway+client tool history: only include FunctionCall input
  items whose call_id has a matching tool result, preventing orphaned
  function_call entries that most LLM backends reject
- Remove dead `let _ = original_store` (Concern-4)
- Fix line exceeding 120-char rustfmt limit in dispatch.rs

Signed-off-by: Ashwin Giridharan <girida@amazon.com>
---
 .../agentic-core/src/executor/agentic_loop.rs | 50 +++++++++++++------
 crates/agentic-core/src/executor/dispatch.rs  | 14 ++++--
 2 files changed, 47 insertions(+), 17 deletions(-)

diff --git a/crates/agentic-core/src/executor/agentic_loop.rs b/crates/agentic-core/src/executor/agentic_loop.rs
index a7d2c18..cbab580 100644
--- a/crates/agentic-core/src/executor/agentic_loop.rs
+++ b/crates/agentic-core/src/executor/agentic_loop.rs
@@ -29,9 +29,14 @@ pub const DEFAULT_MAX_ITERATIONS: usize = 10;
 ///
 /// This function **never writes to the database**. All three persistence triggers
 /// (`store`, `previous_response_id`, `conversation_id`) are cleared before the
-/// first iteration and restored onto the final payload before returning. The
-/// caller owns [`crate::executor::engine::persist_response`] and must call it
-/// after this function returns using the original [`crate::executor::request::RequestContext`].
+/// first iteration to suppress intermediate persists inside [`execute`]. The
+/// original IDs are restored onto the final payload before returning.
+///
+/// Persistence is the caller's responsibility. The caller must obtain a
+/// [`crate::executor::request::RequestContext`] (e.g. via
+/// [`crate::executor::engine::rehydrate_conversation`]) before calling this
+/// function and pass it to [`crate::executor::engine::persist_response`]
+/// after this function returns.
 ///
 /// ## MCP discovery
 ///
@@ -44,6 +49,7 @@ pub const DEFAULT_MAX_ITERATIONS: usize = 10;
 /// - [`ExecutorError::InvalidRequest`] if `request.stream` is `true` (streaming
 ///   + tool dispatch requires the `StreamTee`, which is a future PR).
 /// - [`ExecutorError`] variants from [`execute`] if LLM inference fails.
+#[must_use = "the ResponsePayload contains the final inference result"]
 pub async fn execute_loop<S: std::hash::BuildHasher>(
     mut request: RequestPayload,
     exec_ctx: Arc<ExecutionContext>,
@@ -58,9 +64,8 @@ pub async fn execute_loop<S: std::hash::BuildHasher>(
         ));
     }
 
-    // Capture original persistence triggers before clearing them.
-    // The loop calls execute() internally — we must suppress intermediate persists.
-    let original_store = request.store;
+    // Capture original IDs before clearing persistence triggers.
+    // The loop calls execute() internally — clearing these suppresses intermediate persists.
     let original_prev_id = request.previous_response_id.clone();
     let original_conv_id = request.conversation_id.clone();
 
@@ -68,6 +73,9 @@ pub async fn execute_loop<S: std::hash::BuildHasher>(
     request.previous_response_id = None;
     request.conversation_id = None;
 
+    // Clamp caller's max to the hard guard. The loop runs up to and including
+    // effective_max iterations so that dispatch_tools sees iteration == effective_max
+    // and returns Incomplete rather than silently stopping at the loop boundary.
     let effective_max = max_iterations.min(MAX_LOOP_GUARD);
     let mut payload = ResponsePayload {
         id: String::new(),
@@ -84,7 +92,7 @@ pub async fn execute_loop<S: std::hash::BuildHasher>(
         instructions: None,
     };
 
-    for iteration in 0..MAX_LOOP_GUARD {
+    for iteration in 0..=effective_max {
         let result = execute(request.clone(), Arc::clone(&exec_ctx)).await?;
 
         payload = match result {
@@ -109,18 +117,34 @@ pub async fn execute_loop<S: std::hash::BuildHasher>(
             }
 
             LoopDecision::Continue(tool_results) => {
-                // Extend request.input with the tool results for the next iteration.
+                // Collect the call_ids for which we have results so we can
+                // include only the matching function_call input items. Including
+                // client-owned FCs (for which there is no result) would leave
+                // unmatched function_call/function_call_output pairs in history,
+                // which most LLM backends reject or misbehave on.
+                let result_call_ids: std::collections::HashSet<&str> = tool_results
+                    .iter()
+                    .filter_map(|item| {
+                        if let InputItem::FunctionCallOutput(msg) = item {
+                            Some(msg.call_id.as_str())
+                        } else {
+                            None
+                        }
+                    })
+                    .collect();
+
                 let existing_items: Vec<InputItem> = Vec::from(&request.input);
-                // Also append the model's tool call items so vLLM sees the full call/output pair.
+                // Only append FC items whose call_id has a matching tool result.
                 let mut fc_items: Vec<InputItem> = payload
                     .output
                     .iter()
                     .filter_map(|item| {
                         if let crate::types::io::output::OutputItem::FunctionCall(fc) = item {
-                            Some(InputItem::FunctionCall(fc.clone()))
-                        } else {
-                            None
+                            if result_call_ids.contains(fc.call_id.as_str()) {
+                                return Some(InputItem::FunctionCall(fc.clone()));
+                            }
                         }
+                        None
                     })
                     .collect();
 
@@ -135,8 +159,6 @@ pub async fn execute_loop<S: std::hash::BuildHasher>(
     // Restore original IDs onto the final payload.
     payload.previous_response_id = original_prev_id;
     payload.conversation_id = original_conv_id;
-    // Restore store flag on the request (not used after this, but for correctness).
-    let _ = original_store;
 
     Ok(payload)
 }
diff --git a/crates/agentic-core/src/executor/dispatch.rs b/crates/agentic-core/src/executor/dispatch.rs
index 8c3c847..3bebe3f 100644
--- a/crates/agentic-core/src/executor/dispatch.rs
+++ b/crates/agentic-core/src/executor/dispatch.rs
@@ -44,9 +44,15 @@ pub enum LoopDecision {
 /// entries — they are never propagated as `Err`. Only infrastructure failures
 /// (e.g. serialization) return `Err`.
 ///
+/// Reaching `max_iterations` is not an error — it returns
+/// `Ok(LoopDecision::Incomplete(...))` so the caller can decide how to handle it.
+///
 /// # Errors
 ///
-/// Returns [`ExecutorError::InvalidRequest`] if called with `iteration >= max_iterations`.
+/// This function currently has no `Err` paths. The return type is
+/// `ExecutorResult` for forward compatibility (future infrastructure
+/// failures may return `Err`).
+#[must_use = "the LoopDecision determines whether the caller should loop or stop"]
 pub async fn dispatch_tools<S: std::hash::BuildHasher>(
     output: &[OutputItem],
     registry: &ToolRegistry,
@@ -109,8 +115,10 @@ async fn execute_gateway_calls<S: std::hash::BuildHasher>(
                 }
                 Err(_) => {
                     tracing::warn!(tool = %call.name, "tool execution timed out");
-                    serde_json::json!({ "error": format!("tool '{}' timed out after {}s", call.name, TOOL_TIMEOUT.as_secs()) })
-                        .to_string()
+                    serde_json::json!({
+                        "error": format!("tool '{}' timed out after {}s", call.name, TOOL_TIMEOUT.as_secs())
+                    })
+                    .to_string()
                 }
             }
         } else {

From 2ab27ef17130c89280fcab70cede305cfeb1ae2c Mon Sep 17 00:00:00 2001
From: Ashwin Giridharan <girida@amazon.com>
Date: Wed, 1 Jul 2026 22:50:09 -0700
Subject: [PATCH 3/9] test: exhaustive unit + cassette integration tests for
 dispatch_tools + execute_loop
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Adds 28 new tests across three files:

dispatch.rs (inline, +7 tests):
- zero_max_iterations_returns_incomplete
- mixed_batch_gateway_and_client_owned
- incomplete_message_contains_iteration_counts
- multiple_tool_types_dispatch_to_correct_executor
- error_json_output_is_valid_json
- call_id_preserved_in_output
- output_with_only_non_fc_items_returns_done

agentic_loop.rs (inline, +3 tests):
- streaming_request_returns_err_without_llm_call
- loop_guard_boundary_formula_is_correct
- default_and_guard_constants_are_sane (const assert)

tests/dispatch_loop_test.rs (integration, 10 tests):
- No-tool baseline, one + two parallel gateway tools
- max_iterations → Incomplete with 3 LLM calls verified
- Tool error feeds error JSON to model; loop continues
- prev_id and conv_id restored on final payload
- 3 cassette tests: responses_tool_calls_{3turn,parallel,5turn}
  verify all function-type tools → Done in 1 LLM call, no executor
  invocations

Also adds inline code comments to dispatch.rs and agentic_loop.rs
explaining the clone rationale, inclusive loop bound, orphaned FC
filtering, and payload stub initialisation.

Signed-off-by: Ashwin Giridharan <girida@amazon.com>
---
 .../agentic-core/src/executor/agentic_loop.rs |  82 ++-
 crates/agentic-core/src/executor/dispatch.rs  | 256 ++++++-
 .../agentic-core/tests/dispatch_loop_test.rs  | 695 ++++++++++++++++++
 3 files changed, 1027 insertions(+), 6 deletions(-)
 create mode 100644 crates/agentic-core/tests/dispatch_loop_test.rs

diff --git a/crates/agentic-core/src/executor/agentic_loop.rs b/crates/agentic-core/src/executor/agentic_loop.rs
index cbab580..45924f8 100644
--- a/crates/agentic-core/src/executor/agentic_loop.rs
+++ b/crates/agentic-core/src/executor/agentic_loop.rs
@@ -77,6 +77,9 @@ pub async fn execute_loop<S: std::hash::BuildHasher>(
     // effective_max iterations so that dispatch_tools sees iteration == effective_max
     // and returns Incomplete rather than silently stopping at the loop boundary.
     let effective_max = max_iterations.min(MAX_LOOP_GUARD);
+    // Stub payload replaced on the first inference call. Initialised here so
+    // Rust's definite-assignment rules are satisfied — the loop always overwrites
+    // it at least once before returning (stream=false path).
     let mut payload = ResponsePayload {
         id: String::new(),
         object: "response".to_owned(),
@@ -87,11 +90,16 @@ pub async fn execute_loop<S: std::hash::BuildHasher>(
         usage: None,
         incomplete_details: None,
         error: None,
+        // prev_id / conv_id are stripped for internal calls and restored below.
         previous_response_id: None,
         conversation_id: None,
         instructions: None,
     };
 
+    // Inclusive upper bound: iteration `effective_max` is the last allowed
+    // inference call. After it we call dispatch_tools with iteration==effective_max
+    // which triggers Incomplete, so the guard fires rather than the loop silently
+    // ending with a misleading `status: "completed"`.
     for iteration in 0..=effective_max {
         let result = execute(request.clone(), Arc::clone(&exec_ctx)).await?;
 
@@ -117,11 +125,11 @@ pub async fn execute_loop<S: std::hash::BuildHasher>(
             }
 
             LoopDecision::Continue(tool_results) => {
-                // Collect the call_ids for which we have results so we can
-                // include only the matching function_call input items. Including
-                // client-owned FCs (for which there is no result) would leave
-                // unmatched function_call/function_call_output pairs in history,
-                // which most LLM backends reject or misbehave on.
+                // Build the set of call_ids that have a matching tool result.
+                // We need this to filter `fc_items` below: including FunctionCall
+                // items that have no corresponding FunctionCallOutput (e.g.
+                // client-owned tools not executed by the gateway) produces orphaned
+                // pairs in the conversation history that vLLM rejects.
                 let result_call_ids: std::collections::HashSet<&str> = tool_results
                     .iter()
                     .filter_map(|item| {
@@ -273,4 +281,68 @@ mod tests {
             _ => {}
         }
     }
+
+    // ── Plan Section B: additional unit tests ─────────────────────────────────
+
+    /// When `stream=true`, `execute_loop` must return Err before making any LLM
+    /// call. The streaming path is blocked until the `StreamTee` PR lands.
+    #[tokio::test]
+    async fn streaming_request_returns_err_without_llm_call() {
+        // Point at an unreachable LLM — if the function calls it the test panics
+        // (connection refused), proving the guard fires before the first call.
+        let exec_ctx = Arc::new(ExecutionContext::new(
+            crate::executor::modes::ConversationHandler::new(crate::storage::ConversationStore::new(
+                crate::storage::create_pool(None).await.unwrap(),
+            )),
+            crate::executor::modes::ResponseHandler::new(crate::storage::ResponseStore::new(
+                crate::storage::create_pool(None).await.unwrap(),
+            )),
+            Arc::new(reqwest::Client::new()),
+            "http://127.0.0.1:1".to_owned(), // port 1 — always refused
+            None,
+        ));
+        let mut request = make_request(true);
+        request.store = false;
+
+        let result = execute_loop(request, exec_ctx, ToolRegistry::default(), no_executors(), 10).await;
+        assert!(
+            matches!(result, Err(ExecutorError::InvalidRequest(_))),
+            "expected InvalidRequest for streaming, got: {result:?}"
+        );
+    }
+
+    /// `max_iterations` == `MAX_LOOP_GUARD` (128) must still produce Incomplete —
+    /// the fix to use `0..=effective_max` must hold at the boundary.
+    /// We cannot actually run 128 LLM calls in a unit test, but we can verify
+    /// the constants and formula:
+    ///   `effective_max` = 128.min(128) = 128
+    ///   loop runs 0..=128 → `dispatch_tools` sees iteration==128 >= max==128 → Incomplete
+    #[test]
+    fn loop_guard_boundary_formula_is_correct() {
+        // Prove: effective_max == MAX_LOOP_GUARD when max_iterations >= MAX_LOOP_GUARD
+        let caller_max = MAX_LOOP_GUARD; // == 128
+        let effective = caller_max.min(MAX_LOOP_GUARD);
+        assert_eq!(effective, 128);
+
+        // The loop runs 0..=128 (129 iterations).
+        // On iteration 128: dispatch_tools(128, 128) → 128 >= 128 → Incomplete.
+        // This is the key invariant that prevents silent "completed" on truncation.
+        let loop_count = (0..=effective).count();
+        assert_eq!(loop_count, 129, "loop must include iteration {effective}");
+    }
+
+    /// Verify `prev_id` is stored and restored: the function captures it,
+    /// clears it for internal LLM calls, and restores it on the returned payload.
+    /// Since we can't easily reach a real LLM here, the test verifies the
+    /// constant declarations and logic are consistent.
+    #[test]
+    fn default_and_guard_constants_are_sane() {
+        // DEFAULT_MAX_ITERATIONS must be < MAX_LOOP_GUARD so a default call never
+        // silently hits the guard. Expressed as a const assertion so the compiler
+        // enforces it rather than a runtime test.
+        const _: () = assert!(
+            DEFAULT_MAX_ITERATIONS < MAX_LOOP_GUARD,
+            "DEFAULT_MAX_ITERATIONS must be less than MAX_LOOP_GUARD"
+        );
+    }
 }
diff --git a/crates/agentic-core/src/executor/dispatch.rs b/crates/agentic-core/src/executor/dispatch.rs
index 3bebe3f..12e84bc 100644
--- a/crates/agentic-core/src/executor/dispatch.rs
+++ b/crates/agentic-core/src/executor/dispatch.rs
@@ -60,6 +60,9 @@ pub async fn dispatch_tools<S: std::hash::BuildHasher>(
     iteration: usize,
     max_iterations: usize,
 ) -> ExecutorResult<LoopDecision> {
+    // Clone FunctionToolCalls out of the output slice. Each async task produced
+    // by join_all takes ownership of its call struct, so we cannot hold borrows
+    // into `output` across the await point.
     let calls: Vec<crate::types::io::output::FunctionToolCall> = output
         .iter()
         .filter_map(|item| {
@@ -75,6 +78,8 @@ pub async fn dispatch_tools<S: std::hash::BuildHasher>(
         return Ok(LoopDecision::Done);
     }
 
+    // Check the iteration cap before any execution so that the caller's
+    // max_iterations=N contract means "at most N+1 LLM calls before giving up".
     if iteration >= max_iterations {
         return Ok(LoopDecision::Incomplete(format!(
             "max tool iterations reached ({iteration}/{max_iterations})"
@@ -84,7 +89,9 @@ pub async fn dispatch_tools<S: std::hash::BuildHasher>(
     let gateway_calls = registry.gateway_owned(&calls);
 
     if gateway_calls.is_empty() {
-        // All calls are client-owned function tools — no gateway execution.
+        // All calls are client-owned `type: "function"` tools — the gateway does
+        // not execute them. Return Done so the loop terminates and the caller
+        // forwards the FunctionCall items to the client unchanged.
         // Post-MVP: return LoopDecision::RequiresAction(client_calls).
         tracing::debug!(
             count = calls.len(),
@@ -367,4 +374,251 @@ mod tests {
         let decision = dispatch_tools(&output, &registry, &executors, 1, 1).await.unwrap();
         assert!(matches!(decision, LoopDecision::Incomplete(_)));
     }
+
+    // ── Plan Section A: additional unit tests ─────────────────────────────────
+
+    /// `max_iterations` = 0: the first call (iteration 0 >= max 0) returns
+    /// `Incomplete` without ever reaching the executor.
+    #[tokio::test]
+    async fn zero_max_iterations_returns_incomplete() {
+        let output = vec![make_fc("web_search", "fc_1")];
+        let registry = registry_with_web_search();
+        // Executor would panic if called — proves it is never reached.
+        let executors: HashMap<ToolType, Arc<dyn GatewayExecutor>> = HashMap::new();
+        let decision = dispatch_tools(&output, &registry, &executors, 0, 0).await.unwrap();
+        assert!(matches!(decision, LoopDecision::Incomplete(_)));
+    }
+
+    /// A batch with one registered gateway tool and one unregistered name (treated
+    /// as client-owned) must still return Continue with the gateway result only.
+    #[tokio::test]
+    async fn mixed_batch_gateway_and_client_owned() {
+        // "web_search" is gateway-owned; "get_weather" is not registered → client-owned.
+        let output = vec![make_fc("web_search", "fc_gw"), make_fc("get_weather", "fc_client")];
+        let registry = registry_with_web_search();
+        let executors = web_search_executor(Ok("results".into()));
+        let decision = dispatch_tools(&output, &registry, &executors, 0, 10).await.unwrap();
+
+        match decision {
+            LoopDecision::Continue(items) => {
+                // Only the gateway tool produces a result item.
+                assert_eq!(items.len(), 1, "only gateway call should produce output");
+                if let InputItem::FunctionCallOutput(msg) = &items[0] {
+                    assert_eq!(msg.call_id, "fc_gw");
+                } else {
+                    panic!("expected FunctionCallOutput");
+                }
+            }
+            other => panic!("expected Continue, got {other:?}"),
+        }
+    }
+
+    /// The Incomplete reason string must contain the iteration and limit counts
+    /// so callers can surface a useful diagnostic.
+    #[tokio::test]
+    async fn incomplete_message_contains_iteration_counts() {
+        let output = vec![make_fc("web_search", "fc_1")];
+        let registry = registry_with_web_search();
+        let executors = web_search_executor(Ok("ok".into()));
+        let decision = dispatch_tools(&output, &registry, &executors, 7, 7).await.unwrap();
+        match decision {
+            LoopDecision::Incomplete(msg) => {
+                assert!(msg.contains('7'), "message should contain iteration number, got: {msg}");
+            }
+            other => panic!("expected Incomplete, got {other:?}"),
+        }
+    }
+
+    /// When two different gateway tool types are registered, each executor is
+    /// dispatched to only for its own `call_id`.
+    #[tokio::test]
+    async fn multiple_tool_types_dispatch_to_correct_executor() {
+        use std::sync::atomic::{AtomicUsize, Ordering};
+
+        struct CountingExecutor {
+            tool_type: ToolType,
+            call_count: Arc<AtomicUsize>,
+        }
+
+        impl ToolHandler for CountingExecutor {
+            fn tool_type(&self) -> ToolType {
+                self.tool_type
+            }
+            fn validate(&self, _: &Value) -> Result<(), ToolError> {
+                Ok(())
+            }
+            fn normalize(&self, _: &Value) -> Vec<crate::types::io::FunctionTool> {
+                vec![]
+            }
+        }
+
+        impl GatewayExecutor for CountingExecutor {
+            fn execute(
+                &self,
+                _: &str,
+                _: &str,
+                _: &Value,
+            ) -> Pin<Box<dyn std::future::Future<Output = Result<ToolOutput, ToolError>> + Send + '_>> {
+                self.call_count.fetch_add(1, Ordering::SeqCst);
+                Box::pin(async move {
+                    Ok(ToolOutput {
+                        call_id: String::new(),
+                        output: "ok".to_owned(),
+                    })
+                })
+            }
+        }
+
+        let ws_count = Arc::new(AtomicUsize::new(0));
+        let fs_count = Arc::new(AtomicUsize::new(0));
+
+        let mut executors: HashMap<ToolType, Arc<dyn GatewayExecutor>> = HashMap::new();
+        executors.insert(
+            ToolType::WebSearch,
+            Arc::new(CountingExecutor {
+                tool_type: ToolType::WebSearch,
+                call_count: Arc::clone(&ws_count),
+            }),
+        );
+        executors.insert(
+            ToolType::FileSearch,
+            Arc::new(CountingExecutor {
+                tool_type: ToolType::FileSearch,
+                call_count: Arc::clone(&fs_count),
+            }),
+        );
+
+        let tools = vec![
+            ResponsesTool::WebSearch(crate::types::tools::WebSearchToolParam {}),
+            ResponsesTool::FileSearch(crate::types::tools::FileSearchToolParam { vector_store_ids: None }),
+        ];
+        let registry = ToolRegistry::build(&tools);
+
+        let output = vec![make_fc("web_search", "fc_ws"), make_fc("file_search", "fc_fs")];
+        let decision = dispatch_tools(&output, &registry, &executors, 0, 10).await.unwrap();
+
+        assert!(matches!(decision, LoopDecision::Continue(_)));
+        assert_eq!(ws_count.load(Ordering::SeqCst), 1, "web_search executor called once");
+        assert_eq!(fs_count.load(Ordering::SeqCst), 1, "file_search executor called once");
+    }
+
+    /// Error output injected by the gateway must be parseable JSON with an "error" key.
+    #[tokio::test]
+    async fn error_json_output_is_valid_json() {
+        let output = vec![make_fc("web_search", "fc_err")];
+        let registry = registry_with_web_search();
+        let executors = web_search_executor(Err("boom".into()));
+        let decision = dispatch_tools(&output, &registry, &executors, 0, 10).await.unwrap();
+
+        if let LoopDecision::Continue(items) = decision {
+            if let InputItem::FunctionCallOutput(msg) = &items[0] {
+                let parsed: serde_json::Value =
+                    serde_json::from_str(&msg.output).expect("error output must be valid JSON");
+                assert!(
+                    parsed.get("error").is_some(),
+                    "JSON must have 'error' key, got: {}",
+                    msg.output
+                );
+            } else {
+                panic!("expected FunctionCallOutput");
+            }
+        } else {
+            panic!("expected Continue");
+        }
+    }
+
+    /// The `call_id` from `FunctionToolCall` flows unchanged into `FunctionCallOutput`
+    /// for both success and error paths.
+    #[tokio::test]
+    async fn call_id_preserved_in_output() {
+        use std::sync::atomic::{AtomicUsize, Ordering};
+
+        // Alternating executor: first call succeeds, subsequent calls fail.
+        struct AlternatingExecutor {
+            counter: Arc<AtomicUsize>,
+        }
+        impl ToolHandler for AlternatingExecutor {
+            fn tool_type(&self) -> ToolType {
+                ToolType::WebSearch
+            }
+            fn validate(&self, _: &Value) -> Result<(), ToolError> {
+                Ok(())
+            }
+            fn normalize(&self, _: &Value) -> Vec<crate::types::io::FunctionTool> {
+                vec![]
+            }
+        }
+        impl GatewayExecutor for AlternatingExecutor {
+            fn execute(
+                &self,
+                _: &str,
+                _: &str,
+                _: &Value,
+            ) -> Pin<Box<dyn std::future::Future<Output = Result<ToolOutput, ToolError>> + Send + '_>> {
+                let n = self.counter.fetch_add(1, Ordering::SeqCst);
+                Box::pin(async move {
+                    if n == 0 {
+                        Ok(ToolOutput {
+                            call_id: String::new(),
+                            output: "success".to_owned(),
+                        })
+                    } else {
+                        Err(ToolError::Execution("fail".to_owned()))
+                    }
+                })
+            }
+        }
+
+        let output = vec![
+            make_fc("web_search", "call_success_123"),
+            make_fc("web_search", "call_fail_456"),
+        ];
+        let registry = registry_with_web_search();
+        let mut executors: HashMap<ToolType, Arc<dyn GatewayExecutor>> = HashMap::new();
+        executors.insert(
+            ToolType::WebSearch,
+            Arc::new(AlternatingExecutor {
+                counter: Arc::new(AtomicUsize::new(0)),
+            }),
+        );
+
+        let decision = dispatch_tools(&output, &registry, &executors, 0, 10).await.unwrap();
+        if let LoopDecision::Continue(items) = decision {
+            let call_ids: Vec<&str> = items
+                .iter()
+                .filter_map(|item| {
+                    if let InputItem::FunctionCallOutput(msg) = item {
+                        Some(msg.call_id.as_str())
+                    } else {
+                        None
+                    }
+                })
+                .collect();
+            // Both original call_ids must appear in the output (order may vary).
+            assert!(
+                call_ids.contains(&"call_success_123"),
+                "success call_id missing: {call_ids:?}"
+            );
+            assert!(
+                call_ids.contains(&"call_fail_456"),
+                "fail call_id missing: {call_ids:?}"
+            );
+        } else {
+            panic!("expected Continue");
+        }
+    }
+
+    /// Output with only non-FC items (e.g. a Reasoning item only) returns Done.
+    #[tokio::test]
+    async fn output_with_only_non_fc_items_returns_done() {
+        use crate::types::io::output::{OutputMessage, ReasoningOutput};
+        let output = vec![
+            OutputItem::Reasoning(ReasoningOutput::new("rs_1")),
+            OutputItem::Message(OutputMessage::new("msg_1", MessageStatus::Completed)),
+        ];
+        let registry = ToolRegistry::default();
+        let executors: HashMap<ToolType, Arc<dyn GatewayExecutor>> = HashMap::new();
+        let decision = dispatch_tools(&output, &registry, &executors, 0, 10).await.unwrap();
+        assert!(matches!(decision, LoopDecision::Done));
+    }
 }
diff --git a/crates/agentic-core/tests/dispatch_loop_test.rs b/crates/agentic-core/tests/dispatch_loop_test.rs
new file mode 100644
index 0000000..e315ff1
--- /dev/null
+++ b/crates/agentic-core/tests/dispatch_loop_test.rs
@@ -0,0 +1,695 @@
+//! Integration tests for `execute_loop` — the multi-turn agentic tool loop.
+//!
+//! All tests drive `execute_loop` through a real `MockServer` (axum-based HTTP
+//! mock from `tests/support/`) and verify end-to-end behavior: LLM call count,
+//! input history construction, ID restoration, and status values.
+//!
+//! ## Test organisation
+//!
+//! - **Section 1**: No-tool and single-turn baselines
+//! - **Section 2**: Gateway tool execution (mock `GatewayExecutor` impls)
+//! - **Section 3**: ID restoration (`prev_id` / `conv_id` survive the loop)
+//! - **Section 4**: Cassette-based — all tools are `type: "function"` (client-owned)
+//!   so the loop terminates after a single LLM call, but the accumulator output is
+//!   verified against the recorded response.
+
+mod support;
+
+use std::collections::HashMap;
+use std::pin::Pin;
+use std::sync::Arc;
+use std::sync::atomic::{AtomicUsize, Ordering};
+
+use serde_json::Value;
+
+use agentic_core::executor::agentic_loop::{DEFAULT_MAX_ITERATIONS, execute_loop};
+use agentic_core::tool::{GatewayExecutor, ToolError, ToolHandler, ToolOutput, ToolType};
+use agentic_core::types::io::output::OutputItem;
+use agentic_core::types::io::{ResponsesInput, ToolChoice};
+use agentic_core::types::request_response::RequestPayload;
+use agentic_core::types::tools::ResponsesTool;
+use agentic_core::{FunctionTool, ToolRegistry};
+use support::{MockResponse, TestFixture, output_text, text_response};
+
+// ── Cassette directories ──────────────────────────────────────────────────────
+
+const MULTI_TURN_DIR: &str = concat!(env!("CARGO_MANIFEST_DIR"), "/tests/cassettes/tool_calls/multi_turn");
+
+// ── Shared helpers ────────────────────────────────────────────────────────────
+
+/// Build a minimal non-streaming `RequestPayload`.
+fn make_request(
+    input: &str,
+    store: bool,
+    prev_id: Option<String>,
+    conv_id: Option<String>,
+    tools: Option<Vec<ResponsesTool>>,
+) -> RequestPayload {
+    RequestPayload {
+        model: "test-model".to_owned(),
+        input: ResponsesInput::Text(input.to_owned()),
+        instructions: None,
+        previous_response_id: prev_id,
+        conversation_id: conv_id,
+        tools,
+        tool_choice: ToolChoice::Auto,
+        stream: false,
+        store,
+        include: None,
+        temperature: None,
+        top_p: None,
+        max_output_tokens: None,
+        truncation: None,
+        metadata: None,
+    }
+}
+
+/// Build a `MockResponse::Json` that carries `FunctionCall` output items,
+/// simulating a model that wants to call one tool.
+fn fc_response(name: &str, call_id: &str, arguments: &str) -> MockResponse {
+    MockResponse::Json(
+        serde_json::json!({
+            "id": format!("resp_fc_{call_id}"),
+            "object": "response",
+            "created_at": 0,
+            "model": "test-model",
+            "status": "completed",
+            "output": [{
+                "id": format!("fc_{call_id}"),
+                "type": "function_call",
+                "call_id": call_id,
+                "name": name,
+                "arguments": arguments,
+                "status": "completed"
+            }],
+            "usage": null,
+            "incomplete_details": null,
+            "error": null,
+            "previous_response_id": null,
+            "conversation_id": null,
+            "instructions": null
+        })
+        .to_string(),
+    )
+}
+
+/// Build a `MockResponse::Json` with two parallel `FunctionCall` output items.
+fn two_fc_response(name1: &str, call_id1: &str, args1: &str, name2: &str, call_id2: &str, args2: &str) -> MockResponse {
+    MockResponse::Json(
+        serde_json::json!({
+            "id": "resp_parallel",
+            "object": "response",
+            "created_at": 0,
+            "model": "test-model",
+            "status": "completed",
+            "output": [
+                {
+                    "id": format!("fc_{call_id1}"),
+                    "type": "function_call",
+                    "call_id": call_id1,
+                    "name": name1,
+                    "arguments": args1,
+                    "status": "completed"
+                },
+                {
+                    "id": format!("fc_{call_id2}"),
+                    "type": "function_call",
+                    "call_id": call_id2,
+                    "name": name2,
+                    "arguments": args2,
+                    "status": "completed"
+                }
+            ],
+            "usage": null,
+            "incomplete_details": null,
+            "error": null,
+            "previous_response_id": null,
+            "conversation_id": null,
+            "instructions": null
+        })
+        .to_string(),
+    )
+}
+
+// ── Mock GatewayExecutor ──────────────────────────────────────────────────────
+
+/// A `GatewayExecutor` that counts calls and returns a fixed string result.
+struct MockGatewayExecutor {
+    tool_type: ToolType,
+    result: String,
+    call_count: Arc<AtomicUsize>,
+}
+
+impl MockGatewayExecutor {
+    fn new(tool_type: ToolType, result: &str) -> (Arc<AtomicUsize>, Arc<Self>) {
+        let counter = Arc::new(AtomicUsize::new(0));
+        let exec = Arc::new(Self {
+            tool_type,
+            result: result.to_owned(),
+            call_count: Arc::clone(&counter),
+        });
+        (counter, exec)
+    }
+}
+
+impl ToolHandler for MockGatewayExecutor {
+    fn tool_type(&self) -> ToolType {
+        self.tool_type
+    }
+    fn validate(&self, _: &Value) -> Result<(), ToolError> {
+        Ok(())
+    }
+    fn normalize(&self, _: &Value) -> Vec<FunctionTool> {
+        vec![]
+    }
+}
+
+impl GatewayExecutor for MockGatewayExecutor {
+    fn execute(
+        &self,
+        _tool_name: &str,
+        _arguments: &str,
+        _config: &Value,
+    ) -> Pin<Box<dyn std::future::Future<Output = Result<ToolOutput, ToolError>> + Send + '_>> {
+        self.call_count.fetch_add(1, Ordering::SeqCst);
+        let result = self.result.clone();
+        Box::pin(async move {
+            Ok(ToolOutput {
+                call_id: String::new(),
+                output: result,
+            })
+        })
+    }
+}
+
+/// A `GatewayExecutor` that always returns `Err`.
+struct FailingExecutor {
+    tool_type: ToolType,
+}
+
+impl ToolHandler for FailingExecutor {
+    fn tool_type(&self) -> ToolType {
+        self.tool_type
+    }
+    fn validate(&self, _: &Value) -> Result<(), ToolError> {
+        Ok(())
+    }
+    fn normalize(&self, _: &Value) -> Vec<FunctionTool> {
+        vec![]
+    }
+}
+
+impl GatewayExecutor for FailingExecutor {
+    fn execute(
+        &self,
+        _: &str,
+        _: &str,
+        _: &Value,
+    ) -> Pin<Box<dyn std::future::Future<Output = Result<ToolOutput, ToolError>> + Send + '_>> {
+        Box::pin(async { Err(ToolError::Execution("injected failure".to_owned())) })
+    }
+}
+
+// ── Helper: build WebSearch ToolRegistry ──────────────────────────────────────
+
+fn registry_web_search() -> ToolRegistry {
+    ToolRegistry::build(&[ResponsesTool::WebSearch(
+        agentic_core::types::tools::WebSearchToolParam {},
+    )])
+}
+
+fn executors_web_search(result: &str) -> (Arc<AtomicUsize>, HashMap<ToolType, Arc<dyn GatewayExecutor>>) {
+    let (counter, exec) = MockGatewayExecutor::new(ToolType::WebSearch, result);
+    let mut map: HashMap<ToolType, Arc<dyn GatewayExecutor>> = HashMap::new();
+    map.insert(ToolType::WebSearch, exec);
+    (counter, map)
+}
+
+// ── Section 1: No-tool and single-turn baselines ──────────────────────────────
+
+/// Single-turn loop with a text-only response terminates in exactly 1 LLM call
+/// and returns the correct text with `status: "completed"`.
+#[tokio::test]
+async fn test_execute_loop_no_tools_returns_text() {
+    // Arrange: mock LLM returns a text response; no tools registered.
+    let fixture = TestFixture::new_with_responses(vec![text_response("hello world")]).await;
+
+    // Act
+    let result = execute_loop(
+        make_request("hi", false, None, None, None),
+        Arc::clone(&fixture.exec_ctx),
+        ToolRegistry::default(),
+        HashMap::new(),
+        DEFAULT_MAX_ITERATIONS,
+    )
+    .await
+    .expect("execute_loop should succeed");
+
+    // Assert
+    assert_eq!(result.status, "completed");
+    assert_eq!(output_text(&result), "hello world");
+}
+
+// ── Section 2: Gateway tool execution ────────────────────────────────────────
+
+/// One gateway tool call: LLM returns FC on turn 1, text on turn 2.
+/// Verify: 2 LLM calls made, tool result injected into turn 2 input,
+/// final output is the text from turn 2.
+#[tokio::test]
+async fn test_execute_loop_one_gateway_tool_then_done() {
+    // Mock: turn 1 returns a web_search FC; turn 2 returns text.
+    let fixture = TestFixture::new_with_responses(vec![
+        fc_response("web_search", "call_ws_001", r#"{"query":"Rust async"}"#),
+        text_response("Here are the results"),
+    ])
+    .await;
+
+    // Register web_search as a gateway-owned tool with a mock executor.
+    let (call_count, executors) = executors_web_search("search results content");
+    let registry = registry_web_search();
+
+    // Act
+    let result = execute_loop(
+        make_request("search for Rust async", false, None, None, None),
+        Arc::clone(&fixture.exec_ctx),
+        registry,
+        executors,
+        DEFAULT_MAX_ITERATIONS,
+    )
+    .await
+    .expect("execute_loop should succeed");
+
+    // Assert: executor was called once; final response is the text from turn 2.
+    assert_eq!(call_count.load(Ordering::SeqCst), 1, "executor called once");
+    assert_eq!(result.status, "completed");
+    assert_eq!(output_text(&result), "Here are the results");
+
+    // The mock server should have received 2 requests (turn 1 + turn 2).
+    let bodies = fixture.request_bodies().await;
+    assert_eq!(bodies.len(), 2, "LLM called exactly twice");
+
+    // Turn 2 input must contain the tool result for call_ws_001.
+    let t2_body = &bodies[1];
+    let t2_input_str = serde_json::to_string(&t2_body["input"]).unwrap();
+    assert!(
+        t2_input_str.contains("call_ws_001"),
+        "turn 2 input must reference the tool call_id, got: {t2_input_str}"
+    );
+    assert!(
+        t2_input_str.contains("search results content"),
+        "turn 2 input must contain the tool output, got: {t2_input_str}"
+    );
+}
+
+/// Two parallel FCs on turn 1 — both executors must be called and both results
+/// injected into turn 2.
+#[tokio::test]
+async fn test_execute_loop_parallel_gateway_tools() {
+    // Mock: turn 1 returns 2 FCs (both web_search); turn 2 returns text.
+    let fixture = TestFixture::new_with_responses(vec![
+        two_fc_response(
+            "web_search",
+            "call_ws_001",
+            r#"{"query":"Rust async"}"#,
+            "web_search",
+            "call_ws_002",
+            r#"{"query":"Tokio docs"}"#,
+        ),
+        text_response("Combined search results"),
+    ])
+    .await;
+
+    let (call_count, executors) = executors_web_search("search output");
+    let registry = registry_web_search();
+
+    let result = execute_loop(
+        make_request("parallel search", false, None, None, None),
+        Arc::clone(&fixture.exec_ctx),
+        registry,
+        executors,
+        DEFAULT_MAX_ITERATIONS,
+    )
+    .await
+    .expect("execute_loop should succeed");
+
+    // Both FCs were gateway-owned → executor called twice (parallel).
+    assert_eq!(
+        call_count.load(Ordering::SeqCst),
+        2,
+        "executor called twice for parallel FCs"
+    );
+    assert_eq!(result.status, "completed");
+
+    // Turn 2 input must contain both call_ids.
+    let bodies = fixture.request_bodies().await;
+    let t2_input_str = serde_json::to_string(&bodies[1]["input"]).unwrap();
+    assert!(
+        t2_input_str.contains("call_ws_001"),
+        "t2 must have call_ws_001: {t2_input_str}"
+    );
+    assert!(
+        t2_input_str.contains("call_ws_002"),
+        "t2 must have call_ws_002: {t2_input_str}"
+    );
+}
+
+/// Loop bounded by `max_iterations`: status must be "incomplete" and
+/// `incomplete_details` must be populated.
+#[tokio::test]
+async fn test_execute_loop_max_iterations_produces_incomplete() {
+    // max_iterations = 2 → loop runs iterations 0, 1, 2 (inclusive).
+    // All three LLM calls return FCs → dispatch_tools returns Incomplete at iter 2.
+    let fixture = TestFixture::new_with_responses(vec![
+        fc_response("web_search", "call_a", "{}"),
+        fc_response("web_search", "call_b", "{}"),
+        fc_response("web_search", "call_c", "{}"),
+    ])
+    .await;
+
+    let (_counter, executors) = executors_web_search("result");
+    let registry = registry_web_search();
+
+    let result = execute_loop(
+        make_request("keep searching", false, None, None, None),
+        Arc::clone(&fixture.exec_ctx),
+        registry,
+        executors,
+        2, // max_iterations = 2
+    )
+    .await
+    .expect("execute_loop should succeed (Incomplete is not an Err)");
+
+    // Assert Incomplete status and reason.
+    assert_eq!(result.status, "incomplete", "status must be 'incomplete'");
+    let details = result
+        .incomplete_details
+        .as_ref()
+        .expect("incomplete_details must be set");
+    assert!(
+        details.reason.as_deref().is_some_and(|r| r.contains('2')),
+        "reason must reference max_iterations count, got: {:?}",
+        details.reason
+    );
+
+    // Exactly 3 LLM calls: iters 0, 1, 2.
+    let bodies = fixture.request_bodies().await;
+    assert_eq!(bodies.len(), 3, "exactly 3 LLM calls for max_iterations=2");
+}
+
+/// Executor returns Err → error JSON injected into turn 2 input; loop continues
+/// to the text response.
+#[tokio::test]
+async fn test_execute_loop_tool_error_feeds_error_to_model() {
+    // Turn 1: FC; turn 2: text.
+    let fixture = TestFixture::new_with_responses(vec![
+        fc_response("web_search", "call_fail", "{}"),
+        text_response("I saw an error"),
+    ])
+    .await;
+
+    // Executor always fails.
+    let mut executors: HashMap<ToolType, Arc<dyn GatewayExecutor>> = HashMap::new();
+    executors.insert(
+        ToolType::WebSearch,
+        Arc::new(FailingExecutor {
+            tool_type: ToolType::WebSearch,
+        }),
+    );
+    let registry = registry_web_search();
+
+    let result = execute_loop(
+        make_request("try a search", false, None, None, None),
+        Arc::clone(&fixture.exec_ctx),
+        registry,
+        executors,
+        DEFAULT_MAX_ITERATIONS,
+    )
+    .await
+    .expect("execute_loop should succeed despite tool failure");
+
+    assert_eq!(result.status, "completed");
+
+    // Turn 2 must contain error JSON for call_fail.
+    let bodies = fixture.request_bodies().await;
+    let t2_input_str = serde_json::to_string(&bodies[1]["input"]).unwrap();
+    assert!(
+        t2_input_str.contains("call_fail"),
+        "t2 input must reference the failed call_id: {t2_input_str}"
+    );
+    assert!(
+        t2_input_str.contains("error"),
+        "t2 input must contain error JSON: {t2_input_str}"
+    );
+}
+
+// ── Section 3: ID restoration ─────────────────────────────────────────────────
+
+/// `previous_response_id` must survive the loop and appear in the returned payload.
+#[tokio::test]
+async fn test_execute_loop_restores_prev_id() {
+    let fixture = TestFixture::new_with_responses(vec![text_response("done")]).await;
+
+    let result = execute_loop(
+        make_request("hi", false, Some("resp_abc_123".to_owned()), None, None),
+        Arc::clone(&fixture.exec_ctx),
+        ToolRegistry::default(),
+        HashMap::new(),
+        DEFAULT_MAX_ITERATIONS,
+    )
+    .await
+    .expect("execute_loop should succeed");
+
+    assert_eq!(
+        result.previous_response_id.as_deref(),
+        Some("resp_abc_123"),
+        "previous_response_id must be restored on the final payload"
+    );
+
+    // The internal LLM call must NOT carry the prev_id (it was cleared for
+    // the internal call to prevent the LLM from rehydrating history again).
+    let bodies = fixture.request_bodies().await;
+    assert!(
+        bodies[0]["previous_response_id"].is_null(),
+        "internal LLM call must not carry previous_response_id"
+    );
+}
+
+/// `conversation_id` must survive the loop and appear in the returned payload.
+#[tokio::test]
+async fn test_execute_loop_restores_conv_id() {
+    let fixture = TestFixture::new_with_responses(vec![text_response("done")]).await;
+
+    let result = execute_loop(
+        make_request("hi", false, None, Some("conv_xyz_456".to_owned()), None),
+        Arc::clone(&fixture.exec_ctx),
+        ToolRegistry::default(),
+        HashMap::new(),
+        DEFAULT_MAX_ITERATIONS,
+    )
+    .await
+    .expect("execute_loop should succeed");
+
+    assert_eq!(
+        result.conversation_id.as_deref(),
+        Some("conv_xyz_456"),
+        "conversation_id must be restored on the final payload"
+    );
+}
+
+// ── Section 4: Cassette-based tests ──────────────────────────────────────────
+//
+// All tools in the cassettes are `type: "function"` — they are client-owned.
+// ToolRegistry::build() recognises them as Function type and gateway_owned()
+// returns an empty slice. dispatch_tools returns Done after the first LLM call.
+// These tests verify:
+//  - The accumulator correctly produces FunctionCall output items
+//  - execute_loop terminates in exactly 1 LLM call (no tool execution)
+//  - Status is "completed"
+//  - The specific call names and call_ids from the cassette are present
+
+/// Load the first turn's response body from a multi-turn YAML cassette.
+/// The multi-turn format has `input` as array in turns 2+ which the generic
+/// `load_cassette` parser cannot handle; we load only what we need here.
+fn load_t1_response_body(filename: &str) -> serde_json::Value {
+    use serde::Deserialize;
+
+    #[derive(Deserialize)]
+    struct YamlDoc {
+        turns: Vec<YamlTurn>,
+    }
+    #[derive(Deserialize)]
+    struct YamlTurn {
+        response: YamlResponse,
+    }
+    #[derive(Deserialize)]
+    struct YamlResponse {
+        body: serde_json::Value,
+    }
+
+    let path = format!("{MULTI_TURN_DIR}/{filename}");
+    let text = std::fs::read_to_string(&path).unwrap_or_else(|e| panic!("read {path}: {e}"));
+    let doc: YamlDoc = serde_yaml::from_str(&text).unwrap_or_else(|e| panic!("parse {path}: {e}"));
+    doc.turns
+        .into_iter()
+        .next()
+        .expect("cassette must have at least one turn")
+        .response
+        .body
+}
+
+/// 3-turn cassette, turn 1 only: function-type tools → loop terminates in
+/// 1 LLM call; output contains `get_job_status` `function_call` item.
+#[tokio::test]
+async fn test_cassette_3turn_linear_loop_terminates_on_first_call() {
+    // Load only the turn 1 response body from the cassette.
+    let t1_body = load_t1_response_body("responses_tool_calls_3turn.yaml");
+    let mock_response = MockResponse::Json(serde_json::to_string(&t1_body).unwrap());
+
+    let fixture = TestFixture::new_with_responses(vec![mock_response]).await;
+
+    // No GatewayExecutors registered — dispatch_tools will return Done immediately.
+    let result = execute_loop(
+        make_request("Check ETL job status", false, None, None, None),
+        Arc::clone(&fixture.exec_ctx),
+        ToolRegistry::default(),
+        HashMap::new(),
+        DEFAULT_MAX_ITERATIONS,
+    )
+    .await
+    .expect("execute_loop should succeed");
+
+    // The loop must terminate after exactly 1 LLM call.
+    let bodies = fixture.request_bodies().await;
+    assert_eq!(bodies.len(), 1, "only 1 LLM call for client-owned tools");
+
+    // Output must contain the get_job_status function_call recorded in the cassette.
+    assert_eq!(result.status, "completed");
+    let fc_names: Vec<&str> = result
+        .output
+        .iter()
+        .filter_map(|item| {
+            if let OutputItem::FunctionCall(fc) = item {
+                Some(fc.name.as_str())
+            } else {
+                None
+            }
+        })
+        .collect();
+    assert!(
+        fc_names.contains(&"get_job_status"),
+        "output must contain get_job_status FC from cassette, got: {fc_names:?}"
+    );
+}
+
+/// Parallel cassette, turn 1: model emits 2 FCs simultaneously.
+/// Both must appear in output; loop terminates in 1 LLM call.
+#[tokio::test]
+async fn test_cassette_parallel_tools_loop_terminates_on_first_call() {
+    let t1_body = load_t1_response_body("responses_tool_calls_parallel.yaml");
+    let mock_response = MockResponse::Json(serde_json::to_string(&t1_body).unwrap());
+
+    let fixture = TestFixture::new_with_responses(vec![mock_response]).await;
+
+    let result = execute_loop(
+        make_request("parallel tool call", false, None, None, None),
+        Arc::clone(&fixture.exec_ctx),
+        ToolRegistry::default(),
+        HashMap::new(),
+        DEFAULT_MAX_ITERATIONS,
+    )
+    .await
+    .expect("execute_loop should succeed");
+
+    let bodies = fixture.request_bodies().await;
+    assert_eq!(bodies.len(), 1, "only 1 LLM call");
+    assert_eq!(result.status, "completed");
+
+    // The cassette records get_job_status as the FC in turn 1.
+    let fc_count = result
+        .output
+        .iter()
+        .filter(|i| matches!(i, OutputItem::FunctionCall(_)))
+        .count();
+    assert!(
+        fc_count >= 1,
+        "at least 1 FC in parallel cassette output, got {fc_count}"
+    );
+}
+
+// Tracking executor for the 5-turn test — defined at module level to avoid
+// `items_after_statements` clippy lint inside async fns.
+struct TrackingExecutor {
+    counter: Arc<AtomicUsize>,
+}
+impl ToolHandler for TrackingExecutor {
+    fn tool_type(&self) -> ToolType {
+        ToolType::WebSearch
+    }
+    fn validate(&self, _: &Value) -> Result<(), ToolError> {
+        Ok(())
+    }
+    fn normalize(&self, _: &Value) -> Vec<FunctionTool> {
+        vec![]
+    }
+}
+impl GatewayExecutor for TrackingExecutor {
+    fn execute(
+        &self,
+        _: &str,
+        _: &str,
+        _: &Value,
+    ) -> Pin<Box<dyn std::future::Future<Output = Result<ToolOutput, ToolError>> + Send + '_>> {
+        self.counter.fetch_add(1, Ordering::SeqCst);
+        Box::pin(async {
+            Ok(ToolOutput {
+                call_id: String::new(),
+                output: "should not run".to_owned(),
+            })
+        })
+    }
+}
+
+/// 5-turn cassette, turn 1: all function-type tools, no gateway execution.
+/// Loop terminates in 1 LLM call; no executor invocations.
+#[tokio::test]
+async fn test_cassette_5turn_all_function_tools_no_gateway_execution() {
+    let t1_body = load_t1_response_body("responses_tool_calls_5turn.yaml");
+    let mock_response = MockResponse::Json(serde_json::to_string(&t1_body).unwrap());
+
+    // Tracking executor — would increment if called.
+    let call_count = Arc::new(AtomicUsize::new(0));
+
+    // Even with a WebSearch executor registered, the tools in the cassette are
+    // all "type: function" — gateway_owned() will return empty → Done → executor
+    // never called.
+    let mut executors: HashMap<ToolType, Arc<dyn GatewayExecutor>> = HashMap::new();
+    executors.insert(
+        ToolType::WebSearch,
+        Arc::new(TrackingExecutor {
+            counter: Arc::clone(&call_count),
+        }),
+    );
+
+    let fixture = TestFixture::new_with_responses(vec![mock_response]).await;
+
+    let result = execute_loop(
+        make_request("5-turn job analysis", false, None, None, None),
+        Arc::clone(&fixture.exec_ctx),
+        ToolRegistry::default(), // empty registry → all calls client-owned
+        executors,
+        DEFAULT_MAX_ITERATIONS,
+    )
+    .await
+    .expect("execute_loop should succeed");
+
+    // Executor must never have been invoked.
+    assert_eq!(
+        call_count.load(Ordering::SeqCst),
+        0,
+        "no executor calls for client-owned tools"
+    );
+    assert_eq!(result.status, "completed");
+
+    let bodies = fixture.request_bodies().await;
+    assert_eq!(bodies.len(), 1, "exactly 1 LLM call");
+}

From fb075f3c4928e38712e605592f95612703a5b0cb Mon Sep 17 00:00:00 2001
From: Ashwin Giridharan <girida@amazon.com>
Date: Thu, 2 Jul 2026 15:33:32 -0700
Subject: [PATCH 4/9] fix: adapt to PR #75 executor refactor (rebase conflict
 resolution)

PR #75 split engine.rs into inference.rs, persist.rs, rehydrate.rs and
removed client_auth from ExecutionContext::new(). Update executor/mod.rs
re-exports and test helpers accordingly.

Signed-off-by: Ashwin Giridharan <girida@amazon.com>
---
 crates/agentic-core/src/executor/agentic_loop.rs | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/crates/agentic-core/src/executor/agentic_loop.rs b/crates/agentic-core/src/executor/agentic_loop.rs
index 45924f8..463026d 100644
--- a/crates/agentic-core/src/executor/agentic_loop.rs
+++ b/crates/agentic-core/src/executor/agentic_loop.rs
@@ -221,7 +221,6 @@ mod tests {
             )),
             Arc::new(reqwest::Client::new()),
             "http://localhost:9999".to_owned(),
-            None,
         ));
         let result = execute_loop(request, exec_ctx, ToolRegistry::default(), no_executors(), 10).await;
         assert!(matches!(result, Err(ExecutorError::InvalidRequest(_))));
@@ -248,7 +247,6 @@ mod tests {
             )),
             Arc::new(reqwest::Client::new()),
             "http://localhost:9999".to_owned(), // unreachable — will error
-            None,
         ));
 
         // Should fail with a network/LLM error, not a panic
@@ -299,7 +297,6 @@ mod tests {
             )),
             Arc::new(reqwest::Client::new()),
             "http://127.0.0.1:1".to_owned(), // port 1 — always refused
-            None,
         ));
         let mut request = make_request(true);
         request.store = false;

From da4ed2da4a92e4c2c8e749c5d7e88cda864d18ba Mon Sep 17 00:00:00 2001
From: Ashwin Giridharan <girida@amazon.com>
Date: Thu, 2 Jul 2026 15:43:44 -0700
Subject: [PATCH 5/9] test: add parallel FC + Items-input cassette tests for
 execute_loop
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Two new integration tests in dispatch_loop_test.rs:

- test_cassette_openai_parallel_two_fcs_in_output: verifies the
  openai_responses_tool_calls_parallel cassette produces exactly 2
  FunctionCall items (get_job_status + web_search) — the only cassette
  with confirmed parallel FCs. Proves the accumulator handles genuine
  parallel output.

- test_cassette_tool_output_only_items_input_path: exercises the
  ResponsesInput::Items input path. Turn 1 uses Text input, extracts
  the resulting FC call_id, then turn 2 starts with function_call_output
  as Items input. Validates the accumulator handles item-list input
  correctly end-to-end — a path all other cassette tests skip.

Also adapts ExecutionContext::new() calls in unit tests to the 4-arg
signature introduced by PR #75 (client_auth removed from constructor).

Signed-off-by: Ashwin Giridharan <girida@amazon.com>
---
 .../agentic-core/tests/dispatch_loop_test.rs  | 183 ++++++++++++++++++
 1 file changed, 183 insertions(+)

diff --git a/crates/agentic-core/tests/dispatch_loop_test.rs b/crates/agentic-core/tests/dispatch_loop_test.rs
index e315ff1..b072c00 100644
--- a/crates/agentic-core/tests/dispatch_loop_test.rs
+++ b/crates/agentic-core/tests/dispatch_loop_test.rs
@@ -693,3 +693,186 @@ async fn test_cassette_5turn_all_function_tools_no_gateway_execution() {
     let bodies = fixture.request_bodies().await;
     assert_eq!(bodies.len(), 1, "exactly 1 LLM call");
 }
+
+// ── Section 4 continued: additional cassette tests ────────────────────────────
+
+/// `openai_responses_tool_calls_parallel` cassette, turn 1: model emits 2 function calls
+/// simultaneously (`get_job_status` + `web_search`). Verifies the accumulator produces
+/// 2 `FunctionCall` output items and the loop terminates in 1 LLM call (client-owned).
+#[tokio::test]
+async fn test_cassette_openai_parallel_two_fcs_in_output() {
+    // This cassette has 2 confirmed FCs in turn 1 (unlike the vllm parallel
+    // cassette which only has 1). It is the definitive parallel-FC test.
+    let t1_body = load_t1_response_body("openai_responses_tool_calls_parallel.yaml");
+    let mock_response = MockResponse::Json(serde_json::to_string(&t1_body).unwrap());
+
+    let fixture = TestFixture::new_with_responses(vec![mock_response]).await;
+
+    let result = execute_loop(
+        make_request("parallel: check job status and search web", false, None, None, None),
+        Arc::clone(&fixture.exec_ctx),
+        ToolRegistry::default(), // all function-type → Done in 1 call
+        HashMap::new(),
+        DEFAULT_MAX_ITERATIONS,
+    )
+    .await
+    .expect("execute_loop should succeed");
+
+    let bodies = fixture.request_bodies().await;
+    assert_eq!(bodies.len(), 1, "only 1 LLM call for client-owned tools");
+    assert_eq!(result.status, "completed");
+
+    // The cassette has get_job_status + web_search in parallel.
+    let fc_names: Vec<&str> = result
+        .output
+        .iter()
+        .filter_map(|item| {
+            if let OutputItem::FunctionCall(fc) = item {
+                Some(fc.name.as_str())
+            } else {
+                None
+            }
+        })
+        .collect();
+    assert_eq!(fc_names.len(), 2, "must have exactly 2 FCs, got: {fc_names:?}");
+    assert!(
+        fc_names.contains(&"get_job_status"),
+        "expected get_job_status in {fc_names:?}"
+    );
+    assert!(fc_names.contains(&"web_search"), "expected web_search in {fc_names:?}");
+}
+
+/// `tool_output_only` cassette, turn 1: the first user turn is plain text,
+/// the model responds with a `get_job_status` FC. Turn 2 starts with
+/// `function_call_output` as input (not user text) — tests the `Items` input
+/// path that all other cassette tests skip (they use `Text` input throughout).
+///
+/// We test turn 1 via the loop (client-owned → Done) then turn 2 via a second
+/// loop call with the tool output prepended, verifying the accumulator handles
+/// `Items` input correctly and the loop still terminates in 1 call.
+#[derive(serde::Deserialize)]
+struct ToolOutputYamlDoc {
+    turns: Vec<ToolOutputYamlTurn>,
+}
+#[derive(serde::Deserialize)]
+struct ToolOutputYamlTurn {
+    response: ToolOutputYamlResp,
+}
+#[derive(serde::Deserialize)]
+struct ToolOutputYamlResp {
+    body: serde_json::Value,
+}
+
+#[tokio::test]
+async fn test_cassette_tool_output_only_items_input_path() {
+    use agentic_core::types::io::input::{FunctionToolResultMessage, InputItem, ResponsesInput};
+
+    let base = concat!(
+        env!("CARGO_MANIFEST_DIR"),
+        "/tests/cassettes/tool_calls/multi_turn/responses_tool_calls_tool_output_only.yaml"
+    );
+
+    let text = std::fs::read_to_string(base).expect("read cassette");
+    let doc: ToolOutputYamlDoc = serde_yaml::from_str(&text).expect("parse cassette");
+    let t1_body = doc.turns[0].response.body.clone();
+    let t2_body = doc.turns[1].response.body.clone();
+
+    // ── Turn 1: Text input → get_job_status FC ─────────────────────────────
+    let fixture1 =
+        TestFixture::new_with_responses(vec![MockResponse::Json(serde_json::to_string(&t1_body).unwrap())]).await;
+
+    let result1 = execute_loop(
+        make_request("Check the ETL job status.", false, None, None, None),
+        Arc::clone(&fixture1.exec_ctx),
+        ToolRegistry::default(),
+        HashMap::new(),
+        DEFAULT_MAX_ITERATIONS,
+    )
+    .await
+    .expect("turn 1 should succeed");
+
+    assert_eq!(result1.status, "completed");
+    let t1_fcs: Vec<&str> = result1
+        .output
+        .iter()
+        .filter_map(|item| {
+            if let OutputItem::FunctionCall(fc) = item {
+                Some(fc.name.as_str())
+            } else {
+                None
+            }
+        })
+        .collect();
+    assert_eq!(t1_fcs, vec!["get_job_status"], "turn 1 must produce get_job_status FC");
+
+    // Extract the call_id from turn 1 to build the function_call_output input.
+    // Note: reasoning output precedes the FC in the vllm cassette, so search
+    // by type rather than assuming output[0] is the FC.
+    let call_id = result1
+        .output
+        .iter()
+        .find_map(|item| {
+            if let OutputItem::FunctionCall(fc) = item {
+                Some(fc.call_id.clone())
+            } else {
+                None
+            }
+        })
+        .expect("expected a FunctionCall in turn 1 output");
+
+    // ── Turn 2: Items input starting with function_call_output ─────────────
+    // This exercises the ResponsesInput::Items path — the input is already a
+    // list of items, not a plain string. This is the path the loop constructs
+    // internally when feeding tool results back; testing it with a cassette
+    // validates the accumulator handles Items input correctly end-to-end.
+    let items_input = RequestPayload {
+        model: "test-model".to_owned(),
+        input: ResponsesInput::Items(vec![InputItem::FunctionCallOutput(FunctionToolResultMessage {
+            call_id,
+            output: r#"{"status":"failed","stage":"transform"}"#.to_owned(),
+        })]),
+        instructions: None,
+        previous_response_id: None,
+        conversation_id: None,
+        tools: None,
+        tool_choice: agentic_core::types::io::ToolChoice::Auto,
+        stream: false,
+        store: false,
+        include: None,
+        temperature: None,
+        top_p: None,
+        max_output_tokens: None,
+        truncation: None,
+        metadata: None,
+    };
+
+    let fixture2 =
+        TestFixture::new_with_responses(vec![MockResponse::Json(serde_json::to_string(&t2_body).unwrap())]).await;
+
+    let result2 = execute_loop(
+        items_input,
+        Arc::clone(&fixture2.exec_ctx),
+        ToolRegistry::default(),
+        HashMap::new(),
+        DEFAULT_MAX_ITERATIONS,
+    )
+    .await
+    .expect("turn 2 (Items input) should succeed");
+
+    assert_eq!(result2.status, "completed");
+
+    // Turn 2 in the cassette produces a text message response (the model
+    // summarises the tool output — no further FCs).
+    let has_message = result2.output.iter().any(|item| matches!(item, OutputItem::Message(_)));
+    assert!(has_message, "turn 2 must produce a message output from the tool result");
+
+    let fc_count = result2
+        .output
+        .iter()
+        .filter(|i| matches!(i, OutputItem::FunctionCall(_)))
+        .count();
+    assert_eq!(fc_count, 0, "turn 2 must not produce FCs (cassette ends the loop here)");
+
+    // Only 1 LLM call per fixture — proves the loop terminated correctly.
+    assert_eq!(fixture2.request_bodies().await.len(), 1);
+}

From 80472266b6e6dbf3ecbeb45a39e9d542a25317a3 Mon Sep 17 00:00:00 2001
From: Ashwin Giridharan <girida@amazon.com>
Date: Thu, 2 Jul 2026 15:55:52 -0700
Subject: [PATCH 6/9] test: add gpt-4o 3turn + tool-output-only cassette tests
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Two new cassette tests covering the remaining OpenAI-recorded scenarios:

- test_cassette_openai_3turn_no_reasoning_fc_only: gpt-4o produces no
  reasoning items — FC is at output[0] directly. Confirms the accumulator
  handles FC-only output (no preceding Reasoning item) and terminates in
  1 LLM call with the correct name.

- test_cassette_openai_tool_output_only_items_input: gpt-4o variant of
  the tool-output-only scenario. Two-turn test: t1 Text input -> FC
  (no reasoning), t2 Items(function_call_output) input -> message.
  Validates Items input path against gpt-4o-recorded data, distinct from
  the vLLM variant which has reasoning in t1.

All 6 OpenAI cassettes are now covered (parallel 2-FC, 3turn, tool-output-
only). Streaming cassettes remain untested (blocked by StreamTee PR).

Signed-off-by: Ashwin Giridharan <girida@amazon.com>
---
 .../agentic-core/tests/dispatch_loop_test.rs  | 177 ++++++++++++++++++
 1 file changed, 177 insertions(+)

diff --git a/crates/agentic-core/tests/dispatch_loop_test.rs b/crates/agentic-core/tests/dispatch_loop_test.rs
index b072c00..0063d3a 100644
--- a/crates/agentic-core/tests/dispatch_loop_test.rs
+++ b/crates/agentic-core/tests/dispatch_loop_test.rs
@@ -876,3 +876,180 @@ async fn test_cassette_tool_output_only_items_input_path() {
     // Only 1 LLM call per fixture — proves the loop terminated correctly.
     assert_eq!(fixture2.request_bodies().await.len(), 1);
 }
+
+/// `openai_responses_tool_calls_3turn` cassette, turn 1.
+///
+/// Distinct from the vLLM 3-turn cassette: the gpt-4o model produces no
+/// reasoning items — output contains only the `FunctionCall`. Verifies the
+/// accumulator handles an FC-only response (no preceding `Reasoning` item)
+/// and that `execute_loop` terminates in 1 call with the correct FC name.
+#[tokio::test]
+async fn test_cassette_openai_3turn_no_reasoning_fc_only() {
+    let t1_body = load_t1_response_body("openai_responses_tool_calls_3turn.yaml");
+    let mock_response = MockResponse::Json(serde_json::to_string(&t1_body).unwrap());
+
+    let fixture = TestFixture::new_with_responses(vec![mock_response]).await;
+
+    let result = execute_loop(
+        make_request("Check ETL job status", false, None, None, None),
+        Arc::clone(&fixture.exec_ctx),
+        ToolRegistry::default(), // all function-type → Done in 1 call
+        HashMap::new(),
+        DEFAULT_MAX_ITERATIONS,
+    )
+    .await
+    .expect("execute_loop should succeed");
+
+    assert_eq!(fixture.request_bodies().await.len(), 1, "exactly 1 LLM call");
+    assert_eq!(result.status, "completed");
+
+    // The output must contain only the FC — no reasoning item (OpenAI format).
+    let fc_names: Vec<&str> = result
+        .output
+        .iter()
+        .filter_map(|item| {
+            if let OutputItem::FunctionCall(fc) = item {
+                Some(fc.name.as_str())
+            } else {
+                None
+            }
+        })
+        .collect();
+    assert_eq!(fc_names, vec!["get_job_status"], "wrong FC names: {fc_names:?}");
+
+    let reasoning_count = result
+        .output
+        .iter()
+        .filter(|item| matches!(item, OutputItem::Reasoning(_)))
+        .count();
+    assert_eq!(reasoning_count, 0, "gpt-4o cassette must have no reasoning items");
+}
+
+/// `openai_responses_tool_calls_tool_output_only` cassette, turns 1 and 2.
+///
+/// The gpt-4o variant of the tool-output-only scenario. Different from the
+/// vLLM variant (`test_cassette_tool_output_only_items_input_path`) because
+/// t1 output has no reasoning item — the FC appears at `output[0]` directly.
+/// Validates that `call_id` extraction works regardless of output ordering
+/// and that the `Items` input path works with gpt-4o-recorded data.
+// Cassette deserialization types for openai_tool_output_only test — defined
+// at module level to satisfy `clippy::items_after_statements`.
+#[derive(serde::Deserialize)]
+struct OpenAiToolOutputDoc {
+    turns: Vec<OpenAiToolOutputTurn>,
+}
+#[derive(serde::Deserialize)]
+struct OpenAiToolOutputTurn {
+    response: OpenAiToolOutputResp,
+}
+#[derive(serde::Deserialize)]
+struct OpenAiToolOutputResp {
+    body: serde_json::Value,
+}
+
+#[tokio::test]
+async fn test_cassette_openai_tool_output_only_items_input() {
+    use agentic_core::types::io::input::{FunctionToolResultMessage, InputItem, ResponsesInput};
+
+    let base_path = concat!(
+        env!("CARGO_MANIFEST_DIR"),
+        "/tests/cassettes/tool_calls/multi_turn/openai_responses_tool_calls_tool_output_only.yaml"
+    );
+
+    let text = std::fs::read_to_string(base_path).expect("read cassette");
+    let doc: OpenAiToolOutputDoc = serde_yaml::from_str(&text).expect("parse cassette");
+    let t1_body = doc.turns[0].response.body.clone();
+    let t2_body = doc.turns[1].response.body.clone();
+
+    // ── Turn 1: Text input → get_job_status FC (no reasoning) ──────────────
+    let fixture1 =
+        TestFixture::new_with_responses(vec![MockResponse::Json(serde_json::to_string(&t1_body).unwrap())]).await;
+
+    let result1 = execute_loop(
+        make_request("Check the job status", false, None, None, None),
+        Arc::clone(&fixture1.exec_ctx),
+        ToolRegistry::default(),
+        HashMap::new(),
+        DEFAULT_MAX_ITERATIONS,
+    )
+    .await
+    .expect("turn 1 should succeed");
+
+    assert_eq!(result1.status, "completed");
+
+    // OpenAI format: FC is at output[0] (no preceding reasoning item).
+    let call_id = result1
+        .output
+        .iter()
+        .find_map(|item| {
+            if let OutputItem::FunctionCall(fc) = item {
+                Some(fc.call_id.clone())
+            } else {
+                None
+            }
+        })
+        .expect("turn 1 must have a FunctionCall");
+
+    // No reasoning items in OpenAI output — assert to catch format regressions.
+    let reasoning_count = result1
+        .output
+        .iter()
+        .filter(|item| matches!(item, OutputItem::Reasoning(_)))
+        .count();
+    assert_eq!(reasoning_count, 0, "gpt-4o cassette must have no reasoning items in t1");
+
+    // ── Turn 2: Items input (function_call_output) → text message ──────────
+    let items_request = RequestPayload {
+        model: "test-model".to_owned(),
+        input: ResponsesInput::Items(vec![InputItem::FunctionCallOutput(FunctionToolResultMessage {
+            call_id,
+            output: r#"{"status":"failed","stage":"transform"}"#.to_owned(),
+        })]),
+        instructions: None,
+        previous_response_id: None,
+        conversation_id: None,
+        tools: None,
+        tool_choice: agentic_core::types::io::ToolChoice::Auto,
+        stream: false,
+        store: false,
+        include: None,
+        temperature: None,
+        top_p: None,
+        max_output_tokens: None,
+        truncation: None,
+        metadata: None,
+    };
+
+    let fixture2 =
+        TestFixture::new_with_responses(vec![MockResponse::Json(serde_json::to_string(&t2_body).unwrap())]).await;
+
+    let result2 = execute_loop(
+        items_request,
+        Arc::clone(&fixture2.exec_ctx),
+        ToolRegistry::default(),
+        HashMap::new(),
+        DEFAULT_MAX_ITERATIONS,
+    )
+    .await
+    .expect("turn 2 (Items input) should succeed");
+
+    assert_eq!(result2.status, "completed");
+    assert!(
+        result2.output.iter().any(|item| matches!(item, OutputItem::Message(_))),
+        "turn 2 must produce a message"
+    );
+    assert_eq!(
+        result2
+            .output
+            .iter()
+            .filter(|i| matches!(i, OutputItem::FunctionCall(_)))
+            .count(),
+        0,
+        "turn 2 must not produce FCs"
+    );
+    assert_eq!(
+        fixture2.request_bodies().await.len(),
+        1,
+        "exactly 1 LLM call for turn 2"
+    );
+}

From a46e7e04d4b863d1e141ede6d9aeedc32c1988a7 Mon Sep 17 00:00:00 2001
From: Ashwin Giridharan <girida@amazon.com>
Date: Thu, 2 Jul 2026 16:09:11 -0700
Subject: [PATCH 7/9] test: mark vLLM cassette tests as ignored; gpt-4o
 cassettes are required baseline
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The 4 vLLM-recorded cassette tests are now skipped by default with
#[ignore]. The 3 gpt-4o cassette tests (openai_parallel_two_fcs,
openai_3turn_no_reasoning, openai_tool_output_only) run on every
`cargo test` as the required baseline.

vLLM cassettes still pass — run with `--include-ignored` for full
coverage (e.g. when validating against a live vLLM deployment).

Signed-off-by: Ashwin Giridharan <girida@amazon.com>
---
 crates/agentic-core/tests/dispatch_loop_test.rs | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/crates/agentic-core/tests/dispatch_loop_test.rs b/crates/agentic-core/tests/dispatch_loop_test.rs
index 0063d3a..3623e0a 100644
--- a/crates/agentic-core/tests/dispatch_loop_test.rs
+++ b/crates/agentic-core/tests/dispatch_loop_test.rs
@@ -539,7 +539,10 @@ fn load_t1_response_body(filename: &str) -> serde_json::Value {
 
 /// 3-turn cassette, turn 1 only: function-type tools → loop terminates in
 /// 1 LLM call; output contains `get_job_status` `function_call` item.
+///
+/// Skipped by default — vLLM-recorded. Run with `--include-ignored` for full coverage.
 #[tokio::test]
+#[ignore = "vLLM cassette — run with --include-ignored"]
 async fn test_cassette_3turn_linear_loop_terminates_on_first_call() {
     // Load only the turn 1 response body from the cassette.
     let t1_body = load_t1_response_body("responses_tool_calls_3turn.yaml");
@@ -583,7 +586,10 @@ async fn test_cassette_3turn_linear_loop_terminates_on_first_call() {
 
 /// Parallel cassette, turn 1: model emits 2 FCs simultaneously.
 /// Both must appear in output; loop terminates in 1 LLM call.
+///
+/// Skipped by default — vLLM-recorded. Run with `--include-ignored` for full coverage.
 #[tokio::test]
+#[ignore = "vLLM cassette — run with --include-ignored"]
 async fn test_cassette_parallel_tools_loop_terminates_on_first_call() {
     let t1_body = load_t1_response_body("responses_tool_calls_parallel.yaml");
     let mock_response = MockResponse::Json(serde_json::to_string(&t1_body).unwrap());
@@ -651,7 +657,10 @@ impl GatewayExecutor for TrackingExecutor {
 
 /// 5-turn cassette, turn 1: all function-type tools, no gateway execution.
 /// Loop terminates in 1 LLM call; no executor invocations.
+///
+/// Skipped by default — vLLM-recorded. Run with `--include-ignored` for full coverage.
 #[tokio::test]
+#[ignore = "vLLM cassette — run with --include-ignored"]
 async fn test_cassette_5turn_all_function_tools_no_gateway_execution() {
     let t1_body = load_t1_response_body("responses_tool_calls_5turn.yaml");
     let mock_response = MockResponse::Json(serde_json::to_string(&t1_body).unwrap());
@@ -763,7 +772,9 @@ struct ToolOutputYamlResp {
     body: serde_json::Value,
 }
 
+/// Skipped by default — vLLM-recorded. Run with `--include-ignored` for full coverage.
 #[tokio::test]
+#[ignore = "vLLM cassette — run with --include-ignored"]
 async fn test_cassette_tool_output_only_items_input_path() {
     use agentic_core::types::io::input::{FunctionToolResultMessage, InputItem, ResponsesInput};
 

From de4edc7f62d25316deb4cc59fe9309c4f8a41936 Mon Sep 17 00:00:00 2001
From: Ashwin Giridharan <girida@amazon.com>
Date: Thu, 2 Jul 2026 16:12:37 -0700
Subject: [PATCH 8/9] =?UTF-8?q?test:=20complete=20gpt-4o=20cassette=20cove?=
 =?UTF-8?q?rage=20=E2=80=94=20add=205turn=20and=20branch=20t1=20tests?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

All 5 non-streaming gpt-4o cassettes are now covered:
- openai_parallel    (2 FCs)              ✓ existing
- openai_3turn       (1 FC, no reasoning) ✓ existing
- openai_tool_output (Items input path)   ✓ existing
- openai_5turn       (1 FC, t1 only)      ✓ new
- openai_branch      (1 FC, t1 only)      ✓ new

openai_3turn_streaming is blocked (execute_loop rejects stream=true).
vLLM cassettes remain present but #[ignore]d — run with --include-ignored.

Signed-off-by: Ashwin Giridharan <girida@amazon.com>
---
 .../agentic-core/tests/dispatch_loop_test.rs  | 100 ++++++++++++++++++
 1 file changed, 100 insertions(+)

diff --git a/crates/agentic-core/tests/dispatch_loop_test.rs b/crates/agentic-core/tests/dispatch_loop_test.rs
index 3623e0a..0b4e6b2 100644
--- a/crates/agentic-core/tests/dispatch_loop_test.rs
+++ b/crates/agentic-core/tests/dispatch_loop_test.rs
@@ -1064,3 +1064,103 @@ async fn test_cassette_openai_tool_output_only_items_input() {
         "exactly 1 LLM call for turn 2"
     );
 }
+
+/// `openai_responses_tool_calls_5turn` cassette, turn 1.
+///
+/// Five-turn gpt-4o session; turn 1 produces a single `get_job_status` FC
+/// with no reasoning. Verifies this longer cassette's first turn parses
+/// correctly and the loop terminates in 1 call (all tools client-owned).
+#[tokio::test]
+async fn test_cassette_openai_5turn_t1_get_job_status() {
+    let t1_body = load_t1_response_body("openai_responses_tool_calls_5turn.yaml");
+    let fixture =
+        TestFixture::new_with_responses(vec![MockResponse::Json(serde_json::to_string(&t1_body).unwrap())]).await;
+
+    let result = execute_loop(
+        make_request(
+            "ETL job-382 failed overnight. What is its status?",
+            false,
+            None,
+            None,
+            None,
+        ),
+        Arc::clone(&fixture.exec_ctx),
+        ToolRegistry::default(),
+        HashMap::new(),
+        DEFAULT_MAX_ITERATIONS,
+    )
+    .await
+    .expect("execute_loop should succeed");
+
+    assert_eq!(fixture.request_bodies().await.len(), 1, "exactly 1 LLM call");
+    assert_eq!(result.status, "completed");
+
+    let fc_names: Vec<&str> = result
+        .output
+        .iter()
+        .filter_map(|item| {
+            if let OutputItem::FunctionCall(fc) = item {
+                Some(fc.name.as_str())
+            } else {
+                None
+            }
+        })
+        .collect();
+    assert_eq!(fc_names, vec!["get_job_status"]);
+    assert_eq!(
+        result
+            .output
+            .iter()
+            .filter(|i| matches!(i, OutputItem::Reasoning(_)))
+            .count(),
+        0,
+        "gpt-4o cassette must have no reasoning items"
+    );
+}
+
+/// `openai_responses_tool_calls_branch` cassette, turn 1.
+///
+/// Three-turn gpt-4o session with branching logic; turn 1 produces a single
+/// `get_job_status` FC with no reasoning. Verifies the branch cassette's
+/// first turn parses correctly and the loop terminates in 1 call.
+#[tokio::test]
+async fn test_cassette_openai_branch_t1_get_job_status() {
+    let t1_body = load_t1_response_body("openai_responses_tool_calls_branch.yaml");
+    let fixture =
+        TestFixture::new_with_responses(vec![MockResponse::Json(serde_json::to_string(&t1_body).unwrap())]).await;
+
+    let result = execute_loop(
+        make_request("Check ETL pipeline job-382 status", false, None, None, None),
+        Arc::clone(&fixture.exec_ctx),
+        ToolRegistry::default(),
+        HashMap::new(),
+        DEFAULT_MAX_ITERATIONS,
+    )
+    .await
+    .expect("execute_loop should succeed");
+
+    assert_eq!(fixture.request_bodies().await.len(), 1, "exactly 1 LLM call");
+    assert_eq!(result.status, "completed");
+
+    let fc_names: Vec<&str> = result
+        .output
+        .iter()
+        .filter_map(|item| {
+            if let OutputItem::FunctionCall(fc) = item {
+                Some(fc.name.as_str())
+            } else {
+                None
+            }
+        })
+        .collect();
+    assert_eq!(fc_names, vec!["get_job_status"]);
+    assert_eq!(
+        result
+            .output
+            .iter()
+            .filter(|i| matches!(i, OutputItem::Reasoning(_)))
+            .count(),
+        0,
+        "gpt-4o cassette must have no reasoning items"
+    );
+}

From b1c0a90546fb7e3b5825d1f4672e9e76b65324e1 Mon Sep 17 00:00:00 2001
From: Ashwin Giridharan <girida@amazon.com>
Date: Thu, 2 Jul 2026 16:30:19 -0700
Subject: [PATCH 9/9] =?UTF-8?q?refactor:=20modularize=20tests=20=E2=80=94?=
 =?UTF-8?q?=20extract=20dispatch=20tests=20+=20shared=20mock=20infra?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Three changes:

1. Move dispatch.rs inline tests → tests/dispatch_test.rs
   dispatch.rs was 624 lines (77% tests). Production module is now
   144 lines of pure logic. 18 unit tests live in the dedicated file,
   consistent with engine.rs, registry.rs, inference.rs (all zero
   inline tests).

2. Add MockGatewayExecutor + FailingExecutor to tests/support/mod.rs
   Both were defined locally in dispatch_loop_test.rs. Moving them to
   the shared support module makes them available to any future
   integration test without redefinition.

3. Extract assert_single_fc_no_reasoning helper in dispatch_loop_test.rs
   The 3-turn, 5-turn, and branch gpt-4o cassette tests had identical
   assertion bodies (4 assertions each). One helper call replaces ~20
   lines in each test.

Signed-off-by: Ashwin Giridharan <girida@amazon.com>
---
 crates/agentic-core/src/executor/dispatch.rs  | 482 +---------------
 .../agentic-core/tests/dispatch_loop_test.rs  | 224 ++------
 crates/agentic-core/tests/dispatch_test.rs    | 532 ++++++++++++++++++
 crates/agentic-core/tests/support/mod.rs      |  97 ++++
 4 files changed, 685 insertions(+), 650 deletions(-)
 create mode 100644 crates/agentic-core/tests/dispatch_test.rs

diff --git a/crates/agentic-core/src/executor/dispatch.rs b/crates/agentic-core/src/executor/dispatch.rs
index 12e84bc..37c82e0 100644
--- a/crates/agentic-core/src/executor/dispatch.rs
+++ b/crates/agentic-core/src/executor/dispatch.rs
@@ -141,484 +141,4 @@ async fn execute_gateway_calls<S: std::hash::BuildHasher>(
     .await
 }
 
-#[cfg(test)]
-mod tests {
-    use std::collections::HashMap;
-    use std::pin::Pin;
-    use std::sync::Arc;
-
-    use serde_json::Value;
-
-    use super::*;
-    use crate::ToolRegistry;
-    use crate::tool::{GatewayExecutor, ToolError, ToolHandler, ToolOutput, ToolType};
-    use crate::types::event::MessageStatus;
-    use crate::types::io::output::{FunctionToolCall, OutputItem};
-    use crate::types::tools::ResponsesTool;
-
-    // ── Mock executor ──────────────────────────────────────────────────────────
-
-    struct MockExecutor {
-        tool_type: ToolType,
-        result: Result<String, String>,
-    }
-
-    impl ToolHandler for MockExecutor {
-        fn tool_type(&self) -> ToolType {
-            self.tool_type
-        }
-
-        fn validate(&self, _param: &Value) -> Result<(), ToolError> {
-            Ok(())
-        }
-
-        fn normalize(&self, _param: &Value) -> Vec<crate::types::io::FunctionTool> {
-            vec![]
-        }
-    }
-
-    impl GatewayExecutor for MockExecutor {
-        fn execute(
-            &self,
-            _tool_name: &str,
-            _arguments: &str,
-            _config: &Value,
-        ) -> Pin<Box<dyn std::future::Future<Output = Result<ToolOutput, ToolError>> + Send + '_>> {
-            let r = self.result.clone();
-            Box::pin(async move {
-                match r {
-                    Ok(output) => Ok(ToolOutput {
-                        call_id: String::new(),
-                        output,
-                    }),
-                    Err(e) => Err(ToolError::Execution(e)),
-                }
-            })
-        }
-    }
-
-    fn make_fc(name: &str, call_id: &str) -> OutputItem {
-        OutputItem::FunctionCall(FunctionToolCall {
-            id: call_id.to_owned(),
-            call_id: call_id.to_owned(),
-            name: name.to_owned(),
-            arguments: "{}".to_owned(),
-            status: MessageStatus::Completed,
-        })
-    }
-
-    fn registry_with_web_search() -> ToolRegistry {
-        let tools = vec![ResponsesTool::WebSearch(crate::types::tools::WebSearchToolParam {})];
-        ToolRegistry::build(&tools)
-    }
-
-    fn web_search_executor(result: Result<String, String>) -> HashMap<ToolType, Arc<dyn GatewayExecutor>> {
-        let mut map: HashMap<ToolType, Arc<dyn GatewayExecutor>> = HashMap::new();
-        map.insert(
-            ToolType::WebSearch,
-            Arc::new(MockExecutor {
-                tool_type: ToolType::WebSearch,
-                result,
-            }),
-        );
-        map
-    }
-
-    // ── Tests ──────────────────────────────────────────────────────────────────
-
-    #[tokio::test]
-    async fn no_function_calls_returns_done() {
-        let output = vec![OutputItem::Message(crate::types::io::output::OutputMessage::new(
-            "msg_1",
-            MessageStatus::Completed,
-        ))];
-        let registry = ToolRegistry::default();
-        let executors = HashMap::new();
-        let decision = dispatch_tools(&output, &registry, &executors, 0, 10).await.unwrap();
-        assert!(matches!(decision, LoopDecision::Done));
-    }
-
-    #[tokio::test]
-    async fn empty_output_returns_done() {
-        let registry = ToolRegistry::default();
-        let executors = HashMap::new();
-        let decision = dispatch_tools(&[], &registry, &executors, 0, 10).await.unwrap();
-        assert!(matches!(decision, LoopDecision::Done));
-    }
-
-    #[tokio::test]
-    async fn max_iterations_returns_incomplete() {
-        let output = vec![make_fc("web_search", "fc_1")];
-        let registry = registry_with_web_search();
-        let executors = web_search_executor(Ok("results".into()));
-        // iteration == max_iterations → Incomplete
-        let decision = dispatch_tools(&output, &registry, &executors, 5, 5).await.unwrap();
-        assert!(matches!(decision, LoopDecision::Incomplete(_)));
-    }
-
-    #[tokio::test]
-    async fn gateway_call_succeeds_returns_continue() {
-        let output = vec![make_fc("web_search", "fc_1")];
-        let registry = registry_with_web_search();
-        let executors = web_search_executor(Ok("search results".into()));
-        let decision = dispatch_tools(&output, &registry, &executors, 0, 10).await.unwrap();
-        match decision {
-            LoopDecision::Continue(items) => {
-                assert_eq!(items.len(), 1);
-                if let InputItem::FunctionCallOutput(msg) = &items[0] {
-                    assert_eq!(msg.call_id, "fc_1");
-                    assert_eq!(msg.output, "search results");
-                } else {
-                    panic!("expected FunctionCallOutput");
-                }
-            }
-            other => panic!("expected Continue, got {other:?}"),
-        }
-    }
-
-    #[tokio::test]
-    async fn failing_executor_returns_continue_with_error_json() {
-        let output = vec![make_fc("web_search", "fc_err")];
-        let registry = registry_with_web_search();
-        let executors = web_search_executor(Err("network failure".into()));
-        let decision = dispatch_tools(&output, &registry, &executors, 0, 10).await.unwrap();
-        match decision {
-            LoopDecision::Continue(items) => {
-                assert_eq!(items.len(), 1);
-                if let InputItem::FunctionCallOutput(msg) = &items[0] {
-                    assert!(
-                        msg.output.contains("error"),
-                        "output should be error JSON: {}",
-                        msg.output
-                    );
-                } else {
-                    panic!("expected FunctionCallOutput");
-                }
-            }
-            other => panic!("expected Continue with error, got {other:?}"),
-        }
-    }
-
-    #[tokio::test]
-    async fn unregistered_tool_name_returns_error_json() {
-        let output = vec![make_fc("unknown_tool", "fc_unknown")];
-        let registry = ToolRegistry::default(); // empty — nothing registered
-        let executors = web_search_executor(Ok("unused".into()));
-        let decision = dispatch_tools(&output, &registry, &executors, 0, 10).await.unwrap();
-        // unknown name → client_owned → Done (post-MVP: RequiresAction)
-        assert!(matches!(decision, LoopDecision::Done));
-    }
-
-    #[tokio::test]
-    async fn client_owned_function_call_returns_done() {
-        // Register a function tool in the registry
-        let tools = vec![ResponsesTool::Function(crate::types::tools::FunctionToolParam {
-            name: crate::types::tools::NonEmptyToolName::try_from("get_weather".to_owned()).unwrap(),
-            description: None,
-            parameters: None,
-            strict: None,
-        })];
-        let registry = ToolRegistry::build(&tools);
-        let executors: HashMap<ToolType, Arc<dyn GatewayExecutor>> = HashMap::new();
-        let output = vec![make_fc("get_weather", "fc_fn")];
-        let decision = dispatch_tools(&output, &registry, &executors, 0, 10).await.unwrap();
-        assert!(matches!(decision, LoopDecision::Done));
-    }
-
-    #[tokio::test]
-    async fn parallel_gateway_calls_all_execute() {
-        let output = vec![make_fc("web_search", "fc_1"), make_fc("web_search", "fc_2")];
-        let registry = registry_with_web_search();
-        let executors = web_search_executor(Ok("ok".into()));
-        let decision = dispatch_tools(&output, &registry, &executors, 0, 10).await.unwrap();
-        match decision {
-            LoopDecision::Continue(items) => assert_eq!(items.len(), 2),
-            other => panic!("expected Continue, got {other:?}"),
-        }
-    }
-
-    #[tokio::test]
-    async fn registered_gateway_tool_but_no_executor_returns_error_json_continue() {
-        // Registry has WebSearch entry but executors map is empty
-        let output = vec![make_fc("web_search", "fc_no_exec")];
-        let registry = registry_with_web_search();
-        let executors: HashMap<ToolType, Arc<dyn GatewayExecutor>> = HashMap::new();
-        let decision = dispatch_tools(&output, &registry, &executors, 0, 10).await.unwrap();
-        match decision {
-            LoopDecision::Continue(items) => {
-                assert_eq!(items.len(), 1);
-                if let InputItem::FunctionCallOutput(msg) = &items[0] {
-                    assert!(msg.output.contains("error"), "expected error JSON: {}", msg.output);
-                } else {
-                    panic!("expected FunctionCallOutput");
-                }
-            }
-            other => panic!("expected Continue with error, got {other:?}"),
-        }
-    }
-
-    #[tokio::test]
-    async fn iteration_zero_below_max_executes() {
-        let output = vec![make_fc("web_search", "fc_1")];
-        let registry = registry_with_web_search();
-        let executors = web_search_executor(Ok("ok".into()));
-        let decision = dispatch_tools(&output, &registry, &executors, 0, 1).await.unwrap();
-        assert!(matches!(decision, LoopDecision::Continue(_)));
-    }
-
-    #[tokio::test]
-    async fn iteration_at_max_returns_incomplete() {
-        let output = vec![make_fc("web_search", "fc_1")];
-        let registry = registry_with_web_search();
-        let executors = web_search_executor(Ok("ok".into()));
-        let decision = dispatch_tools(&output, &registry, &executors, 1, 1).await.unwrap();
-        assert!(matches!(decision, LoopDecision::Incomplete(_)));
-    }
-
-    // ── Plan Section A: additional unit tests ─────────────────────────────────
-
-    /// `max_iterations` = 0: the first call (iteration 0 >= max 0) returns
-    /// `Incomplete` without ever reaching the executor.
-    #[tokio::test]
-    async fn zero_max_iterations_returns_incomplete() {
-        let output = vec![make_fc("web_search", "fc_1")];
-        let registry = registry_with_web_search();
-        // Executor would panic if called — proves it is never reached.
-        let executors: HashMap<ToolType, Arc<dyn GatewayExecutor>> = HashMap::new();
-        let decision = dispatch_tools(&output, &registry, &executors, 0, 0).await.unwrap();
-        assert!(matches!(decision, LoopDecision::Incomplete(_)));
-    }
-
-    /// A batch with one registered gateway tool and one unregistered name (treated
-    /// as client-owned) must still return Continue with the gateway result only.
-    #[tokio::test]
-    async fn mixed_batch_gateway_and_client_owned() {
-        // "web_search" is gateway-owned; "get_weather" is not registered → client-owned.
-        let output = vec![make_fc("web_search", "fc_gw"), make_fc("get_weather", "fc_client")];
-        let registry = registry_with_web_search();
-        let executors = web_search_executor(Ok("results".into()));
-        let decision = dispatch_tools(&output, &registry, &executors, 0, 10).await.unwrap();
-
-        match decision {
-            LoopDecision::Continue(items) => {
-                // Only the gateway tool produces a result item.
-                assert_eq!(items.len(), 1, "only gateway call should produce output");
-                if let InputItem::FunctionCallOutput(msg) = &items[0] {
-                    assert_eq!(msg.call_id, "fc_gw");
-                } else {
-                    panic!("expected FunctionCallOutput");
-                }
-            }
-            other => panic!("expected Continue, got {other:?}"),
-        }
-    }
-
-    /// The Incomplete reason string must contain the iteration and limit counts
-    /// so callers can surface a useful diagnostic.
-    #[tokio::test]
-    async fn incomplete_message_contains_iteration_counts() {
-        let output = vec![make_fc("web_search", "fc_1")];
-        let registry = registry_with_web_search();
-        let executors = web_search_executor(Ok("ok".into()));
-        let decision = dispatch_tools(&output, &registry, &executors, 7, 7).await.unwrap();
-        match decision {
-            LoopDecision::Incomplete(msg) => {
-                assert!(msg.contains('7'), "message should contain iteration number, got: {msg}");
-            }
-            other => panic!("expected Incomplete, got {other:?}"),
-        }
-    }
-
-    /// When two different gateway tool types are registered, each executor is
-    /// dispatched to only for its own `call_id`.
-    #[tokio::test]
-    async fn multiple_tool_types_dispatch_to_correct_executor() {
-        use std::sync::atomic::{AtomicUsize, Ordering};
-
-        struct CountingExecutor {
-            tool_type: ToolType,
-            call_count: Arc<AtomicUsize>,
-        }
-
-        impl ToolHandler for CountingExecutor {
-            fn tool_type(&self) -> ToolType {
-                self.tool_type
-            }
-            fn validate(&self, _: &Value) -> Result<(), ToolError> {
-                Ok(())
-            }
-            fn normalize(&self, _: &Value) -> Vec<crate::types::io::FunctionTool> {
-                vec![]
-            }
-        }
-
-        impl GatewayExecutor for CountingExecutor {
-            fn execute(
-                &self,
-                _: &str,
-                _: &str,
-                _: &Value,
-            ) -> Pin<Box<dyn std::future::Future<Output = Result<ToolOutput, ToolError>> + Send + '_>> {
-                self.call_count.fetch_add(1, Ordering::SeqCst);
-                Box::pin(async move {
-                    Ok(ToolOutput {
-                        call_id: String::new(),
-                        output: "ok".to_owned(),
-                    })
-                })
-            }
-        }
-
-        let ws_count = Arc::new(AtomicUsize::new(0));
-        let fs_count = Arc::new(AtomicUsize::new(0));
-
-        let mut executors: HashMap<ToolType, Arc<dyn GatewayExecutor>> = HashMap::new();
-        executors.insert(
-            ToolType::WebSearch,
-            Arc::new(CountingExecutor {
-                tool_type: ToolType::WebSearch,
-                call_count: Arc::clone(&ws_count),
-            }),
-        );
-        executors.insert(
-            ToolType::FileSearch,
-            Arc::new(CountingExecutor {
-                tool_type: ToolType::FileSearch,
-                call_count: Arc::clone(&fs_count),
-            }),
-        );
-
-        let tools = vec![
-            ResponsesTool::WebSearch(crate::types::tools::WebSearchToolParam {}),
-            ResponsesTool::FileSearch(crate::types::tools::FileSearchToolParam { vector_store_ids: None }),
-        ];
-        let registry = ToolRegistry::build(&tools);
-
-        let output = vec![make_fc("web_search", "fc_ws"), make_fc("file_search", "fc_fs")];
-        let decision = dispatch_tools(&output, &registry, &executors, 0, 10).await.unwrap();
-
-        assert!(matches!(decision, LoopDecision::Continue(_)));
-        assert_eq!(ws_count.load(Ordering::SeqCst), 1, "web_search executor called once");
-        assert_eq!(fs_count.load(Ordering::SeqCst), 1, "file_search executor called once");
-    }
-
-    /// Error output injected by the gateway must be parseable JSON with an "error" key.
-    #[tokio::test]
-    async fn error_json_output_is_valid_json() {
-        let output = vec![make_fc("web_search", "fc_err")];
-        let registry = registry_with_web_search();
-        let executors = web_search_executor(Err("boom".into()));
-        let decision = dispatch_tools(&output, &registry, &executors, 0, 10).await.unwrap();
-
-        if let LoopDecision::Continue(items) = decision {
-            if let InputItem::FunctionCallOutput(msg) = &items[0] {
-                let parsed: serde_json::Value =
-                    serde_json::from_str(&msg.output).expect("error output must be valid JSON");
-                assert!(
-                    parsed.get("error").is_some(),
-                    "JSON must have 'error' key, got: {}",
-                    msg.output
-                );
-            } else {
-                panic!("expected FunctionCallOutput");
-            }
-        } else {
-            panic!("expected Continue");
-        }
-    }
-
-    /// The `call_id` from `FunctionToolCall` flows unchanged into `FunctionCallOutput`
-    /// for both success and error paths.
-    #[tokio::test]
-    async fn call_id_preserved_in_output() {
-        use std::sync::atomic::{AtomicUsize, Ordering};
-
-        // Alternating executor: first call succeeds, subsequent calls fail.
-        struct AlternatingExecutor {
-            counter: Arc<AtomicUsize>,
-        }
-        impl ToolHandler for AlternatingExecutor {
-            fn tool_type(&self) -> ToolType {
-                ToolType::WebSearch
-            }
-            fn validate(&self, _: &Value) -> Result<(), ToolError> {
-                Ok(())
-            }
-            fn normalize(&self, _: &Value) -> Vec<crate::types::io::FunctionTool> {
-                vec![]
-            }
-        }
-        impl GatewayExecutor for AlternatingExecutor {
-            fn execute(
-                &self,
-                _: &str,
-                _: &str,
-                _: &Value,
-            ) -> Pin<Box<dyn std::future::Future<Output = Result<ToolOutput, ToolError>> + Send + '_>> {
-                let n = self.counter.fetch_add(1, Ordering::SeqCst);
-                Box::pin(async move {
-                    if n == 0 {
-                        Ok(ToolOutput {
-                            call_id: String::new(),
-                            output: "success".to_owned(),
-                        })
-                    } else {
-                        Err(ToolError::Execution("fail".to_owned()))
-                    }
-                })
-            }
-        }
-
-        let output = vec![
-            make_fc("web_search", "call_success_123"),
-            make_fc("web_search", "call_fail_456"),
-        ];
-        let registry = registry_with_web_search();
-        let mut executors: HashMap<ToolType, Arc<dyn GatewayExecutor>> = HashMap::new();
-        executors.insert(
-            ToolType::WebSearch,
-            Arc::new(AlternatingExecutor {
-                counter: Arc::new(AtomicUsize::new(0)),
-            }),
-        );
-
-        let decision = dispatch_tools(&output, &registry, &executors, 0, 10).await.unwrap();
-        if let LoopDecision::Continue(items) = decision {
-            let call_ids: Vec<&str> = items
-                .iter()
-                .filter_map(|item| {
-                    if let InputItem::FunctionCallOutput(msg) = item {
-                        Some(msg.call_id.as_str())
-                    } else {
-                        None
-                    }
-                })
-                .collect();
-            // Both original call_ids must appear in the output (order may vary).
-            assert!(
-                call_ids.contains(&"call_success_123"),
-                "success call_id missing: {call_ids:?}"
-            );
-            assert!(
-                call_ids.contains(&"call_fail_456"),
-                "fail call_id missing: {call_ids:?}"
-            );
-        } else {
-            panic!("expected Continue");
-        }
-    }
-
-    /// Output with only non-FC items (e.g. a Reasoning item only) returns Done.
-    #[tokio::test]
-    async fn output_with_only_non_fc_items_returns_done() {
-        use crate::types::io::output::{OutputMessage, ReasoningOutput};
-        let output = vec![
-            OutputItem::Reasoning(ReasoningOutput::new("rs_1")),
-            OutputItem::Message(OutputMessage::new("msg_1", MessageStatus::Completed)),
-        ];
-        let registry = ToolRegistry::default();
-        let executors: HashMap<ToolType, Arc<dyn GatewayExecutor>> = HashMap::new();
-        let decision = dispatch_tools(&output, &registry, &executors, 0, 10).await.unwrap();
-        assert!(matches!(decision, LoopDecision::Done));
-    }
-}
+// Tests live in `tests/dispatch_test.rs` to keep this module focused on production logic.
diff --git a/crates/agentic-core/tests/dispatch_loop_test.rs b/crates/agentic-core/tests/dispatch_loop_test.rs
index 0b4e6b2..bd7c7f6 100644
--- a/crates/agentic-core/tests/dispatch_loop_test.rs
+++ b/crates/agentic-core/tests/dispatch_loop_test.rs
@@ -22,14 +22,14 @@ use std::sync::atomic::{AtomicUsize, Ordering};
 
 use serde_json::Value;
 
+use agentic_core::ToolRegistry;
 use agentic_core::executor::agentic_loop::{DEFAULT_MAX_ITERATIONS, execute_loop};
 use agentic_core::tool::{GatewayExecutor, ToolError, ToolHandler, ToolOutput, ToolType};
 use agentic_core::types::io::output::OutputItem;
-use agentic_core::types::io::{ResponsesInput, ToolChoice};
+use agentic_core::types::io::{FunctionTool, ResponsesInput, ToolChoice};
 use agentic_core::types::request_response::RequestPayload;
 use agentic_core::types::tools::ResponsesTool;
-use agentic_core::{FunctionTool, ToolRegistry};
-use support::{MockResponse, TestFixture, output_text, text_response};
+use support::{FailingExecutor, MockGatewayExecutor, MockResponse, TestFixture, output_text, text_response};
 
 // ── Cassette directories ──────────────────────────────────────────────────────
 
@@ -133,84 +133,8 @@ fn two_fc_response(name1: &str, call_id1: &str, args1: &str, name2: &str, call_i
 
 // ── Mock GatewayExecutor ──────────────────────────────────────────────────────
 
-/// A `GatewayExecutor` that counts calls and returns a fixed string result.
-struct MockGatewayExecutor {
-    tool_type: ToolType,
-    result: String,
-    call_count: Arc<AtomicUsize>,
-}
-
-impl MockGatewayExecutor {
-    fn new(tool_type: ToolType, result: &str) -> (Arc<AtomicUsize>, Arc<Self>) {
-        let counter = Arc::new(AtomicUsize::new(0));
-        let exec = Arc::new(Self {
-            tool_type,
-            result: result.to_owned(),
-            call_count: Arc::clone(&counter),
-        });
-        (counter, exec)
-    }
-}
-
-impl ToolHandler for MockGatewayExecutor {
-    fn tool_type(&self) -> ToolType {
-        self.tool_type
-    }
-    fn validate(&self, _: &Value) -> Result<(), ToolError> {
-        Ok(())
-    }
-    fn normalize(&self, _: &Value) -> Vec<FunctionTool> {
-        vec![]
-    }
-}
-
-impl GatewayExecutor for MockGatewayExecutor {
-    fn execute(
-        &self,
-        _tool_name: &str,
-        _arguments: &str,
-        _config: &Value,
-    ) -> Pin<Box<dyn std::future::Future<Output = Result<ToolOutput, ToolError>> + Send + '_>> {
-        self.call_count.fetch_add(1, Ordering::SeqCst);
-        let result = self.result.clone();
-        Box::pin(async move {
-            Ok(ToolOutput {
-                call_id: String::new(),
-                output: result,
-            })
-        })
-    }
-}
-
-/// A `GatewayExecutor` that always returns `Err`.
-struct FailingExecutor {
-    tool_type: ToolType,
-}
-
-impl ToolHandler for FailingExecutor {
-    fn tool_type(&self) -> ToolType {
-        self.tool_type
-    }
-    fn validate(&self, _: &Value) -> Result<(), ToolError> {
-        Ok(())
-    }
-    fn normalize(&self, _: &Value) -> Vec<FunctionTool> {
-        vec![]
-    }
-}
-
-impl GatewayExecutor for FailingExecutor {
-    fn execute(
-        &self,
-        _: &str,
-        _: &str,
-        _: &Value,
-    ) -> Pin<Box<dyn std::future::Future<Output = Result<ToolOutput, ToolError>> + Send + '_>> {
-        Box::pin(async { Err(ToolError::Execution("injected failure".to_owned())) })
-    }
-}
-
-// ── Helper: build WebSearch ToolRegistry ──────────────────────────────────────
+// ── Helper: build WebSearch ToolRegistry + executor map ──────────────────────
+// MockGatewayExecutor and FailingExecutor live in tests/support/mod.rs.
 
 fn registry_web_search() -> ToolRegistry {
     ToolRegistry::build(&[ResponsesTool::WebSearch(
@@ -225,6 +149,40 @@ fn executors_web_search(result: &str) -> (Arc<AtomicUsize>, HashMap<ToolType, Ar
     (counter, map)
 }
 
+// ── Helper: assert a single FC + no reasoning in the output ──────────────────
+
+/// Asserts that `payload` contains exactly the named FCs (in any order), no
+/// reasoning items, `status == "completed"`, and that the mock LLM was called
+/// exactly once. Used by the simple single-turn gpt-4o cassette tests.
+async fn assert_single_fc_no_reasoning(
+    result: &agentic_core::types::request_response::ResponsePayload,
+    fixture: &TestFixture,
+    expected_fc_names: &[&str],
+) {
+    assert_eq!(fixture.request_bodies().await.len(), 1, "exactly 1 LLM call");
+    assert_eq!(result.status, "completed");
+
+    let fc_names: Vec<&str> = result
+        .output
+        .iter()
+        .filter_map(|item| {
+            if let OutputItem::FunctionCall(fc) = item {
+                Some(fc.name.as_str())
+            } else {
+                None
+            }
+        })
+        .collect();
+    assert_eq!(fc_names, expected_fc_names, "wrong FC names");
+
+    let reasoning_count = result
+        .output
+        .iter()
+        .filter(|i| matches!(i, OutputItem::Reasoning(_)))
+        .count();
+    assert_eq!(reasoning_count, 0, "gpt-4o cassette must have no reasoning items");
+}
+
 // ── Section 1: No-tool and single-turn baselines ──────────────────────────────
 
 /// Single-turn loop with a text-only response terminates in exactly 1 LLM call
@@ -896,44 +854,20 @@ async fn test_cassette_tool_output_only_items_input_path() {
 /// and that `execute_loop` terminates in 1 call with the correct FC name.
 #[tokio::test]
 async fn test_cassette_openai_3turn_no_reasoning_fc_only() {
-    let t1_body = load_t1_response_body("openai_responses_tool_calls_3turn.yaml");
-    let mock_response = MockResponse::Json(serde_json::to_string(&t1_body).unwrap());
-
-    let fixture = TestFixture::new_with_responses(vec![mock_response]).await;
-
+    let fixture = TestFixture::new_with_responses(vec![MockResponse::Json(
+        serde_json::to_string(&load_t1_response_body("openai_responses_tool_calls_3turn.yaml")).unwrap(),
+    )])
+    .await;
     let result = execute_loop(
         make_request("Check ETL job status", false, None, None, None),
         Arc::clone(&fixture.exec_ctx),
-        ToolRegistry::default(), // all function-type → Done in 1 call
+        ToolRegistry::default(),
         HashMap::new(),
         DEFAULT_MAX_ITERATIONS,
     )
     .await
     .expect("execute_loop should succeed");
-
-    assert_eq!(fixture.request_bodies().await.len(), 1, "exactly 1 LLM call");
-    assert_eq!(result.status, "completed");
-
-    // The output must contain only the FC — no reasoning item (OpenAI format).
-    let fc_names: Vec<&str> = result
-        .output
-        .iter()
-        .filter_map(|item| {
-            if let OutputItem::FunctionCall(fc) = item {
-                Some(fc.name.as_str())
-            } else {
-                None
-            }
-        })
-        .collect();
-    assert_eq!(fc_names, vec!["get_job_status"], "wrong FC names: {fc_names:?}");
-
-    let reasoning_count = result
-        .output
-        .iter()
-        .filter(|item| matches!(item, OutputItem::Reasoning(_)))
-        .count();
-    assert_eq!(reasoning_count, 0, "gpt-4o cassette must have no reasoning items");
+    assert_single_fc_no_reasoning(&result, &fixture, &["get_job_status"]).await;
 }
 
 /// `openai_responses_tool_calls_tool_output_only` cassette, turns 1 and 2.
@@ -1072,10 +1006,10 @@ async fn test_cassette_openai_tool_output_only_items_input() {
 /// correctly and the loop terminates in 1 call (all tools client-owned).
 #[tokio::test]
 async fn test_cassette_openai_5turn_t1_get_job_status() {
-    let t1_body = load_t1_response_body("openai_responses_tool_calls_5turn.yaml");
-    let fixture =
-        TestFixture::new_with_responses(vec![MockResponse::Json(serde_json::to_string(&t1_body).unwrap())]).await;
-
+    let fixture = TestFixture::new_with_responses(vec![MockResponse::Json(
+        serde_json::to_string(&load_t1_response_body("openai_responses_tool_calls_5turn.yaml")).unwrap(),
+    )])
+    .await;
     let result = execute_loop(
         make_request(
             "ETL job-382 failed overnight. What is its status?",
@@ -1091,31 +1025,7 @@ async fn test_cassette_openai_5turn_t1_get_job_status() {
     )
     .await
     .expect("execute_loop should succeed");
-
-    assert_eq!(fixture.request_bodies().await.len(), 1, "exactly 1 LLM call");
-    assert_eq!(result.status, "completed");
-
-    let fc_names: Vec<&str> = result
-        .output
-        .iter()
-        .filter_map(|item| {
-            if let OutputItem::FunctionCall(fc) = item {
-                Some(fc.name.as_str())
-            } else {
-                None
-            }
-        })
-        .collect();
-    assert_eq!(fc_names, vec!["get_job_status"]);
-    assert_eq!(
-        result
-            .output
-            .iter()
-            .filter(|i| matches!(i, OutputItem::Reasoning(_)))
-            .count(),
-        0,
-        "gpt-4o cassette must have no reasoning items"
-    );
+    assert_single_fc_no_reasoning(&result, &fixture, &["get_job_status"]).await;
 }
 
 /// `openai_responses_tool_calls_branch` cassette, turn 1.
@@ -1125,10 +1035,10 @@ async fn test_cassette_openai_5turn_t1_get_job_status() {
 /// first turn parses correctly and the loop terminates in 1 call.
 #[tokio::test]
 async fn test_cassette_openai_branch_t1_get_job_status() {
-    let t1_body = load_t1_response_body("openai_responses_tool_calls_branch.yaml");
-    let fixture =
-        TestFixture::new_with_responses(vec![MockResponse::Json(serde_json::to_string(&t1_body).unwrap())]).await;
-
+    let fixture = TestFixture::new_with_responses(vec![MockResponse::Json(
+        serde_json::to_string(&load_t1_response_body("openai_responses_tool_calls_branch.yaml")).unwrap(),
+    )])
+    .await;
     let result = execute_loop(
         make_request("Check ETL pipeline job-382 status", false, None, None, None),
         Arc::clone(&fixture.exec_ctx),
@@ -1138,29 +1048,5 @@ async fn test_cassette_openai_branch_t1_get_job_status() {
     )
     .await
     .expect("execute_loop should succeed");
-
-    assert_eq!(fixture.request_bodies().await.len(), 1, "exactly 1 LLM call");
-    assert_eq!(result.status, "completed");
-
-    let fc_names: Vec<&str> = result
-        .output
-        .iter()
-        .filter_map(|item| {
-            if let OutputItem::FunctionCall(fc) = item {
-                Some(fc.name.as_str())
-            } else {
-                None
-            }
-        })
-        .collect();
-    assert_eq!(fc_names, vec!["get_job_status"]);
-    assert_eq!(
-        result
-            .output
-            .iter()
-            .filter(|i| matches!(i, OutputItem::Reasoning(_)))
-            .count(),
-        0,
-        "gpt-4o cassette must have no reasoning items"
-    );
+    assert_single_fc_no_reasoning(&result, &fixture, &["get_job_status"]).await;
 }
diff --git a/crates/agentic-core/tests/dispatch_test.rs b/crates/agentic-core/tests/dispatch_test.rs
new file mode 100644
index 0000000..efb232c
--- /dev/null
+++ b/crates/agentic-core/tests/dispatch_test.rs
@@ -0,0 +1,532 @@
+//! Unit tests for `executor::dispatch` — `dispatch_tools` and `LoopDecision`.
+//!
+//! Mirrors the inline `#[cfg(test)]` block formerly inside `dispatch.rs`,
+//! moved here to keep the production module readable (143 lines of logic,
+//! no test infrastructure).
+
+use std::collections::HashMap;
+use std::pin::Pin;
+use std::sync::Arc;
+use std::sync::atomic::{AtomicUsize, Ordering};
+
+use serde_json::Value;
+
+use agentic_core::executor::dispatch::{LoopDecision, dispatch_tools};
+use agentic_core::tool::{GatewayExecutor, ToolError, ToolHandler, ToolOutput, ToolType};
+use agentic_core::types::event::MessageStatus;
+use agentic_core::types::io::input::InputItem;
+use agentic_core::types::io::output::{FunctionToolCall, OutputItem};
+use agentic_core::types::tools::ResponsesTool;
+use agentic_core::{FunctionTool, ToolRegistry};
+
+// ── Mock executor (dispatch-test–specific: supports both Ok and Err results) ──
+
+struct MockExecutor {
+    tool_type: ToolType,
+    result: Result<String, String>,
+}
+
+impl ToolHandler for MockExecutor {
+    fn tool_type(&self) -> ToolType {
+        self.tool_type
+    }
+    fn validate(&self, _: &Value) -> Result<(), ToolError> {
+        Ok(())
+    }
+    fn normalize(&self, _: &Value) -> Vec<FunctionTool> {
+        vec![]
+    }
+}
+
+impl GatewayExecutor for MockExecutor {
+    fn execute(
+        &self,
+        _: &str,
+        _: &str,
+        _: &Value,
+    ) -> Pin<Box<dyn std::future::Future<Output = Result<ToolOutput, ToolError>> + Send + '_>> {
+        let r = self.result.clone();
+        Box::pin(async move {
+            match r {
+                Ok(output) => Ok(ToolOutput {
+                    call_id: String::new(),
+                    output,
+                }),
+                Err(e) => Err(ToolError::Execution(e)),
+            }
+        })
+    }
+}
+
+// ── Helpers ───────────────────────────────────────────────────────────────────
+
+fn make_fc(name: &str, call_id: &str) -> OutputItem {
+    OutputItem::FunctionCall(FunctionToolCall {
+        id: call_id.to_owned(),
+        call_id: call_id.to_owned(),
+        name: name.to_owned(),
+        arguments: "{}".to_owned(),
+        status: MessageStatus::Completed,
+    })
+}
+
+fn registry_with_web_search() -> ToolRegistry {
+    ToolRegistry::build(&[ResponsesTool::WebSearch(
+        agentic_core::types::tools::WebSearchToolParam {},
+    )])
+}
+
+fn web_search_executor(result: Result<String, String>) -> HashMap<ToolType, Arc<dyn GatewayExecutor>> {
+    let mut map: HashMap<ToolType, Arc<dyn GatewayExecutor>> = HashMap::new();
+    map.insert(
+        ToolType::WebSearch,
+        Arc::new(MockExecutor {
+            tool_type: ToolType::WebSearch,
+            result,
+        }),
+    );
+    map
+}
+
+// ── Tests ─────────────────────────────────────────────────────────────────────
+
+#[tokio::test]
+async fn no_function_calls_returns_done() {
+    let output = vec![OutputItem::Message(
+        agentic_core::types::io::output::OutputMessage::new("msg_1", MessageStatus::Completed),
+    )];
+    let decision = dispatch_tools(&output, &ToolRegistry::default(), &HashMap::new(), 0, 10)
+        .await
+        .unwrap();
+    assert!(matches!(decision, LoopDecision::Done));
+}
+
+#[tokio::test]
+async fn empty_output_returns_done() {
+    let decision = dispatch_tools(&[], &ToolRegistry::default(), &HashMap::new(), 0, 10)
+        .await
+        .unwrap();
+    assert!(matches!(decision, LoopDecision::Done));
+}
+
+#[tokio::test]
+async fn max_iterations_returns_incomplete() {
+    let output = vec![make_fc("web_search", "fc_1")];
+    let decision = dispatch_tools(
+        &output,
+        &registry_with_web_search(),
+        &web_search_executor(Ok("r".into())),
+        5,
+        5,
+    )
+    .await
+    .unwrap();
+    assert!(matches!(decision, LoopDecision::Incomplete(_)));
+}
+
+#[tokio::test]
+async fn gateway_call_succeeds_returns_continue() {
+    let output = vec![make_fc("web_search", "fc_1")];
+    let decision = dispatch_tools(
+        &output,
+        &registry_with_web_search(),
+        &web_search_executor(Ok("search results".into())),
+        0,
+        10,
+    )
+    .await
+    .unwrap();
+    match decision {
+        LoopDecision::Continue(items) => {
+            assert_eq!(items.len(), 1);
+            if let InputItem::FunctionCallOutput(msg) = &items[0] {
+                assert_eq!(msg.call_id, "fc_1");
+                assert_eq!(msg.output, "search results");
+            } else {
+                panic!("expected FunctionCallOutput");
+            }
+        }
+        other => panic!("expected Continue, got {other:?}"),
+    }
+}
+
+#[tokio::test]
+async fn failing_executor_returns_continue_with_error_json() {
+    let output = vec![make_fc("web_search", "fc_err")];
+    let decision = dispatch_tools(
+        &output,
+        &registry_with_web_search(),
+        &web_search_executor(Err("network failure".into())),
+        0,
+        10,
+    )
+    .await
+    .unwrap();
+    match decision {
+        LoopDecision::Continue(items) => {
+            if let InputItem::FunctionCallOutput(msg) = &items[0] {
+                assert!(msg.output.contains("error"), "expected error JSON: {}", msg.output);
+            } else {
+                panic!("expected FunctionCallOutput");
+            }
+        }
+        other => panic!("expected Continue with error, got {other:?}"),
+    }
+}
+
+#[tokio::test]
+async fn unregistered_tool_name_returns_done() {
+    let output = vec![make_fc("unknown_tool", "fc_unknown")];
+    let decision = dispatch_tools(
+        &output,
+        &ToolRegistry::default(),
+        &web_search_executor(Ok("unused".into())),
+        0,
+        10,
+    )
+    .await
+    .unwrap();
+    assert!(matches!(decision, LoopDecision::Done));
+}
+
+#[tokio::test]
+async fn client_owned_function_call_returns_done() {
+    let tools = vec![ResponsesTool::Function(agentic_core::types::tools::FunctionToolParam {
+        name: agentic_core::types::tools::NonEmptyToolName::try_from("get_weather".to_owned()).unwrap(),
+        description: None,
+        parameters: None,
+        strict: None,
+    })];
+    let registry = ToolRegistry::build(&tools);
+    let output = vec![make_fc("get_weather", "fc_fn")];
+    let decision = dispatch_tools(
+        &output,
+        &registry,
+        &HashMap::<ToolType, Arc<dyn GatewayExecutor>>::new(),
+        0,
+        10,
+    )
+    .await
+    .unwrap();
+    assert!(matches!(decision, LoopDecision::Done));
+}
+
+#[tokio::test]
+async fn parallel_gateway_calls_all_execute() {
+    let output = vec![make_fc("web_search", "fc_1"), make_fc("web_search", "fc_2")];
+    let decision = dispatch_tools(
+        &output,
+        &registry_with_web_search(),
+        &web_search_executor(Ok("ok".into())),
+        0,
+        10,
+    )
+    .await
+    .unwrap();
+    match decision {
+        LoopDecision::Continue(items) => assert_eq!(items.len(), 2),
+        other => panic!("expected Continue, got {other:?}"),
+    }
+}
+
+#[tokio::test]
+async fn registered_gateway_tool_but_no_executor_returns_error_json_continue() {
+    let output = vec![make_fc("web_search", "fc_no_exec")];
+    let decision = dispatch_tools(
+        &output,
+        &registry_with_web_search(),
+        &HashMap::<ToolType, Arc<dyn GatewayExecutor>>::new(),
+        0,
+        10,
+    )
+    .await
+    .unwrap();
+    match decision {
+        LoopDecision::Continue(items) => {
+            if let InputItem::FunctionCallOutput(msg) = &items[0] {
+                assert!(msg.output.contains("error"), "expected error JSON: {}", msg.output);
+            } else {
+                panic!("expected FunctionCallOutput");
+            }
+        }
+        other => panic!("expected Continue with error, got {other:?}"),
+    }
+}
+
+#[tokio::test]
+async fn iteration_zero_below_max_executes() {
+    let output = vec![make_fc("web_search", "fc_1")];
+    let decision = dispatch_tools(
+        &output,
+        &registry_with_web_search(),
+        &web_search_executor(Ok("ok".into())),
+        0,
+        1,
+    )
+    .await
+    .unwrap();
+    assert!(matches!(decision, LoopDecision::Continue(_)));
+}
+
+#[tokio::test]
+async fn iteration_at_max_returns_incomplete() {
+    let output = vec![make_fc("web_search", "fc_1")];
+    let decision = dispatch_tools(
+        &output,
+        &registry_with_web_search(),
+        &web_search_executor(Ok("ok".into())),
+        1,
+        1,
+    )
+    .await
+    .unwrap();
+    assert!(matches!(decision, LoopDecision::Incomplete(_)));
+}
+
+/// `max_iterations` = 0: iter=0 >= max=0 → `Incomplete` without calling any executor.
+#[tokio::test]
+async fn zero_max_iterations_returns_incomplete() {
+    let output = vec![make_fc("web_search", "fc_1")];
+    let decision = dispatch_tools(
+        &output,
+        &registry_with_web_search(),
+        &HashMap::<ToolType, Arc<dyn GatewayExecutor>>::new(),
+        0,
+        0,
+    )
+    .await
+    .unwrap();
+    assert!(matches!(decision, LoopDecision::Incomplete(_)));
+}
+
+/// Mixed batch: 1 gateway-owned + 1 client-owned → only gateway produces output.
+#[tokio::test]
+async fn mixed_batch_gateway_and_client_owned() {
+    let output = vec![make_fc("web_search", "fc_gw"), make_fc("get_weather", "fc_client")];
+    let decision = dispatch_tools(
+        &output,
+        &registry_with_web_search(),
+        &web_search_executor(Ok("results".into())),
+        0,
+        10,
+    )
+    .await
+    .unwrap();
+    match decision {
+        LoopDecision::Continue(items) => {
+            assert_eq!(items.len(), 1, "only gateway call should produce output");
+            if let InputItem::FunctionCallOutput(msg) = &items[0] {
+                assert_eq!(msg.call_id, "fc_gw");
+            } else {
+                panic!("expected FunctionCallOutput");
+            }
+        }
+        other => panic!("expected Continue, got {other:?}"),
+    }
+}
+
+/// `Incomplete` reason must include the iteration and limit counts.
+#[tokio::test]
+async fn incomplete_message_contains_iteration_counts() {
+    let output = vec![make_fc("web_search", "fc_1")];
+    let decision = dispatch_tools(
+        &output,
+        &registry_with_web_search(),
+        &web_search_executor(Ok("ok".into())),
+        7,
+        7,
+    )
+    .await
+    .unwrap();
+    match decision {
+        LoopDecision::Incomplete(msg) => {
+            assert!(msg.contains('7'), "message should contain iteration number, got: {msg}");
+        }
+        other => panic!("expected Incomplete, got {other:?}"),
+    }
+}
+
+/// Two different gateway tool types each dispatch to their own executor.
+#[tokio::test]
+async fn multiple_tool_types_dispatch_to_correct_executor() {
+    struct CountingExecutor {
+        tool_type: ToolType,
+        call_count: Arc<AtomicUsize>,
+    }
+    impl ToolHandler for CountingExecutor {
+        fn tool_type(&self) -> ToolType {
+            self.tool_type
+        }
+        fn validate(&self, _: &Value) -> Result<(), ToolError> {
+            Ok(())
+        }
+        fn normalize(&self, _: &Value) -> Vec<FunctionTool> {
+            vec![]
+        }
+    }
+    impl GatewayExecutor for CountingExecutor {
+        fn execute(
+            &self,
+            _: &str,
+            _: &str,
+            _: &Value,
+        ) -> Pin<Box<dyn std::future::Future<Output = Result<ToolOutput, ToolError>> + Send + '_>> {
+            self.call_count.fetch_add(1, Ordering::SeqCst);
+            Box::pin(async {
+                Ok(ToolOutput {
+                    call_id: String::new(),
+                    output: "ok".to_owned(),
+                })
+            })
+        }
+    }
+
+    let ws_count = Arc::new(AtomicUsize::new(0));
+    let fs_count = Arc::new(AtomicUsize::new(0));
+    let mut executors: HashMap<ToolType, Arc<dyn GatewayExecutor>> = HashMap::new();
+    executors.insert(
+        ToolType::WebSearch,
+        Arc::new(CountingExecutor {
+            tool_type: ToolType::WebSearch,
+            call_count: Arc::clone(&ws_count),
+        }),
+    );
+    executors.insert(
+        ToolType::FileSearch,
+        Arc::new(CountingExecutor {
+            tool_type: ToolType::FileSearch,
+            call_count: Arc::clone(&fs_count),
+        }),
+    );
+
+    let tools = vec![
+        ResponsesTool::WebSearch(agentic_core::types::tools::WebSearchToolParam {}),
+        ResponsesTool::FileSearch(agentic_core::types::tools::FileSearchToolParam { vector_store_ids: None }),
+    ];
+    let registry = ToolRegistry::build(&tools);
+    let output = vec![make_fc("web_search", "fc_ws"), make_fc("file_search", "fc_fs")];
+    let decision = dispatch_tools(&output, &registry, &executors, 0, 10).await.unwrap();
+
+    assert!(matches!(decision, LoopDecision::Continue(_)));
+    assert_eq!(ws_count.load(Ordering::SeqCst), 1, "web_search executor called once");
+    assert_eq!(fs_count.load(Ordering::SeqCst), 1, "file_search executor called once");
+}
+
+/// Error output injected by the gateway must be valid JSON with an `"error"` key.
+#[tokio::test]
+async fn error_json_output_is_valid_json() {
+    let output = vec![make_fc("web_search", "fc_err")];
+    let decision = dispatch_tools(
+        &output,
+        &registry_with_web_search(),
+        &web_search_executor(Err("boom".into())),
+        0,
+        10,
+    )
+    .await
+    .unwrap();
+    if let LoopDecision::Continue(items) = decision {
+        if let InputItem::FunctionCallOutput(msg) = &items[0] {
+            let parsed: serde_json::Value = serde_json::from_str(&msg.output).expect("error output must be valid JSON");
+            assert!(
+                parsed.get("error").is_some(),
+                "JSON must have 'error' key, got: {}",
+                msg.output
+            );
+        } else {
+            panic!("expected FunctionCallOutput");
+        }
+    } else {
+        panic!("expected Continue");
+    }
+}
+
+/// `call_id` from `FunctionToolCall` flows unchanged into `FunctionCallOutput`.
+#[tokio::test]
+async fn call_id_preserved_in_output() {
+    struct AlternatingExecutor {
+        counter: Arc<AtomicUsize>,
+    }
+    impl ToolHandler for AlternatingExecutor {
+        fn tool_type(&self) -> ToolType {
+            ToolType::WebSearch
+        }
+        fn validate(&self, _: &Value) -> Result<(), ToolError> {
+            Ok(())
+        }
+        fn normalize(&self, _: &Value) -> Vec<FunctionTool> {
+            vec![]
+        }
+    }
+    impl GatewayExecutor for AlternatingExecutor {
+        fn execute(
+            &self,
+            _: &str,
+            _: &str,
+            _: &Value,
+        ) -> Pin<Box<dyn std::future::Future<Output = Result<ToolOutput, ToolError>> + Send + '_>> {
+            let n = self.counter.fetch_add(1, Ordering::SeqCst);
+            Box::pin(async move {
+                if n == 0 {
+                    Ok(ToolOutput {
+                        call_id: String::new(),
+                        output: "success".to_owned(),
+                    })
+                } else {
+                    Err(ToolError::Execution("fail".to_owned()))
+                }
+            })
+        }
+    }
+
+    let output = vec![
+        make_fc("web_search", "call_success_123"),
+        make_fc("web_search", "call_fail_456"),
+    ];
+    let registry = registry_with_web_search();
+    let mut executors: HashMap<ToolType, Arc<dyn GatewayExecutor>> = HashMap::new();
+    executors.insert(
+        ToolType::WebSearch,
+        Arc::new(AlternatingExecutor {
+            counter: Arc::new(AtomicUsize::new(0)),
+        }),
+    );
+
+    let decision = dispatch_tools(&output, &registry, &executors, 0, 10).await.unwrap();
+    if let LoopDecision::Continue(items) = decision {
+        let call_ids: Vec<&str> = items
+            .iter()
+            .filter_map(|item| {
+                if let InputItem::FunctionCallOutput(msg) = item {
+                    Some(msg.call_id.as_str())
+                } else {
+                    None
+                }
+            })
+            .collect();
+        assert!(
+            call_ids.contains(&"call_success_123"),
+            "success call_id missing: {call_ids:?}"
+        );
+        assert!(
+            call_ids.contains(&"call_fail_456"),
+            "fail call_id missing: {call_ids:?}"
+        );
+    } else {
+        panic!("expected Continue");
+    }
+}
+
+/// Output with only non-FC items returns `Done`.
+#[tokio::test]
+async fn output_with_only_non_fc_items_returns_done() {
+    use agentic_core::types::io::output::{OutputMessage, ReasoningOutput};
+    let output = vec![
+        OutputItem::Reasoning(ReasoningOutput::new("rs_1")),
+        OutputItem::Message(OutputMessage::new("msg_1", MessageStatus::Completed)),
+    ];
+    let decision = dispatch_tools(&output, &ToolRegistry::default(), &HashMap::new(), 0, 10)
+        .await
+        .unwrap();
+    assert!(matches!(decision, LoopDecision::Done));
+}
diff --git a/crates/agentic-core/tests/support/mod.rs b/crates/agentic-core/tests/support/mod.rs
index 4837593..74fafe5 100644
--- a/crates/agentic-core/tests/support/mod.rs
+++ b/crates/agentic-core/tests/support/mod.rs
@@ -395,6 +395,103 @@ pub async fn collect_stream(result: Either<ResponsePayload, BoxStream>) -> Respo
     panic!("stream ended without a ResponsePayload chunk");
 }
 
+// ── Shared mock executors ─────────────────────────────────────────────────────
+
+use std::pin::Pin;
+use std::sync::atomic::{AtomicUsize, Ordering};
+
+use agentic_core::tool::{GatewayExecutor, ToolError, ToolHandler, ToolOutput, ToolType};
+use agentic_core::types::io::FunctionTool;
+
+/// A `GatewayExecutor` that returns a fixed string and counts invocations.
+///
+/// Construct with [`MockGatewayExecutor::new`] which returns the shared counter
+/// alongside the executor so callers can assert call counts after the test.
+pub struct MockGatewayExecutor {
+    pub tool_type: ToolType,
+    pub result: String,
+    pub call_count: Arc<AtomicUsize>,
+}
+
+impl MockGatewayExecutor {
+    /// Build a new executor. Returns `(counter, Arc<executor>)`.
+    pub fn new(tool_type: ToolType, result: &str) -> (Arc<AtomicUsize>, Arc<Self>) {
+        let counter = Arc::new(AtomicUsize::new(0));
+        let exec = Arc::new(Self {
+            tool_type,
+            result: result.to_owned(),
+            call_count: Arc::clone(&counter),
+        });
+        (counter, exec)
+    }
+
+    /// Read the current invocation count.
+    #[must_use]
+    pub fn count(counter: &AtomicUsize) -> usize {
+        counter.load(Ordering::SeqCst)
+    }
+}
+
+impl ToolHandler for MockGatewayExecutor {
+    fn tool_type(&self) -> ToolType {
+        self.tool_type
+    }
+    fn validate(&self, _: &serde_json::Value) -> Result<(), ToolError> {
+        Ok(())
+    }
+    fn normalize(&self, _: &serde_json::Value) -> Vec<FunctionTool> {
+        vec![]
+    }
+}
+
+impl GatewayExecutor for MockGatewayExecutor {
+    fn execute(
+        &self,
+        _tool_name: &str,
+        _arguments: &str,
+        _config: &serde_json::Value,
+    ) -> Pin<Box<dyn std::future::Future<Output = Result<ToolOutput, ToolError>> + Send + '_>> {
+        self.call_count.fetch_add(1, Ordering::SeqCst);
+        let result = self.result.clone();
+        Box::pin(async move {
+            Ok(ToolOutput {
+                call_id: String::new(),
+                output: result,
+            })
+        })
+    }
+}
+
+/// A `GatewayExecutor` that always returns `Err("injected failure")`.
+pub struct FailingExecutor {
+    pub tool_type: ToolType,
+}
+
+impl ToolHandler for FailingExecutor {
+    fn tool_type(&self) -> ToolType {
+        self.tool_type
+    }
+    fn validate(&self, _: &serde_json::Value) -> Result<(), ToolError> {
+        Ok(())
+    }
+    fn normalize(&self, _: &serde_json::Value) -> Vec<FunctionTool> {
+        vec![]
+    }
+}
+
+impl GatewayExecutor for FailingExecutor {
+    fn execute(
+        &self,
+        _: &str,
+        _: &str,
+        _: &serde_json::Value,
+    ) -> Pin<Box<dyn std::future::Future<Output = Result<ToolOutput, ToolError>> + Send + '_>> {
+        Box::pin(async { Err(ToolError::Execution("injected failure".to_owned())) })
+    }
+}
+
+// ── End shared mock executors ─────────────────────────────────────────────────
+
 /// Extract concatenated text content from a `ResponsePayload`.
 pub fn output_text(payload: &ResponsePayload) -> String {
     payload