From 5e78015f8913b8914726724c8ca04049031790be Mon Sep 17 00:00:00 2001 From: Sky Moore Date: Mon, 16 Mar 2026 13:49:55 +0000 Subject: [PATCH 1/8] feat: programmatic tool calling --- crates/openfang-kernel/src/kernel.rs | 3 + crates/openfang-kernel/src/registry.rs | 1 + crates/openfang-kernel/src/wizard.rs | 1 + crates/openfang-runtime/src/agent_loop.rs | 301 +++++++++- crates/openfang-runtime/src/lib.rs | 1 + crates/openfang-runtime/src/ptc/executor.rs | 172 ++++++ crates/openfang-runtime/src/ptc/ipc_server.rs | 390 +++++++++++++ crates/openfang-runtime/src/ptc/mod.rs | 208 +++++++ .../openfang-runtime/src/ptc/sdk_generator.rs | 538 ++++++++++++++++++ .../src/ptc/tool_classifier.rs | 85 +++ crates/openfang-types/src/agent.rs | 6 + crates/openfang-types/src/config.rs | 34 ++ 12 files changed, 1735 insertions(+), 5 deletions(-) create mode 100644 crates/openfang-runtime/src/ptc/executor.rs create mode 100644 crates/openfang-runtime/src/ptc/ipc_server.rs create mode 100644 crates/openfang-runtime/src/ptc/mod.rs create mode 100644 crates/openfang-runtime/src/ptc/sdk_generator.rs create mode 100644 crates/openfang-runtime/src/ptc/tool_classifier.rs diff --git a/crates/openfang-kernel/src/kernel.rs b/crates/openfang-kernel/src/kernel.rs index f449addac..0df65dd1d 100644 --- a/crates/openfang-kernel/src/kernel.rs +++ b/crates/openfang-kernel/src/kernel.rs @@ -3370,6 +3370,7 @@ impl OpenFangKernel { None }, tool_blocklist: Vec::new(), + ptc_enabled: None, // Custom profile avoids ToolProfile-based expansion overriding the // explicit tool list. profile: if !def.tools.is_empty() { @@ -6631,6 +6632,7 @@ mod tests { exec_policy: None, tool_allowlist: vec![], tool_blocklist: vec![], + ptc_enabled: None, }; manifest.capabilities.tools = vec!["file_read".to_string(), "web_fetch".to_string()]; manifest.capabilities.agent_spawn = true; @@ -6668,6 +6670,7 @@ mod tests { exec_policy: None, tool_allowlist: vec![], tool_blocklist: vec![], + ptc_enabled: None, } } diff --git a/crates/openfang-kernel/src/registry.rs b/crates/openfang-kernel/src/registry.rs index 841085ad3..fbb947c0c 100644 --- a/crates/openfang-kernel/src/registry.rs +++ b/crates/openfang-kernel/src/registry.rs @@ -395,6 +395,7 @@ mod tests { exec_policy: None, tool_allowlist: vec![], tool_blocklist: vec![], + ptc_enabled: None, }, state: AgentState::Created, mode: AgentMode::default(), diff --git a/crates/openfang-kernel/src/wizard.rs b/crates/openfang-kernel/src/wizard.rs index ad6dafe84..c6cce6b42 100644 --- a/crates/openfang-kernel/src/wizard.rs +++ b/crates/openfang-kernel/src/wizard.rs @@ -182,6 +182,7 @@ impl SetupWizard { exec_policy: None, tool_allowlist: vec![], tool_blocklist: vec![], + ptc_enabled: None, }; let skills_to_install: Vec = intent diff --git a/crates/openfang-runtime/src/agent_loop.rs b/crates/openfang-runtime/src/agent_loop.rs index f773def41..42ef4f384 100644 --- a/crates/openfang-runtime/src/agent_loop.rs +++ b/crates/openfang-runtime/src/agent_loop.rs @@ -102,6 +102,39 @@ fn append_tool_error_guidance(tool_result_blocks: &mut Vec) { } } +/// System prompt supplement appended when Programmatic Tool Calling (PTC) is enabled. +const PTC_SYSTEM_PROMPT_SUPPLEMENT: &str = "\n\n\ +## Programmatic Tool Calling (execute_code)\n\n\ +You have access to an `execute_code` tool that runs Python code with tool functions.\n\ +Use `execute_code` whenever you need to:\n\ +- Read or edit multiple files (loop instead of N separate calls)\n\ +- Search and filter results, then print only relevant data\n\ +- Perform any workflow with 2+ tool calls where intermediate data can be filtered\n\ +- Batch operations (create tasks, check endpoints, process items)\n\n\ +**How it works:** Tool functions are plain synchronous Python functions (no async/await).\n\ +Call them directly: `result = file_read(path=\"src/main.ts\")`.\n\ +Tool results go to your code, NOT your context window. Only `print()` output enters\n\ +your context. This dramatically reduces context usage.\n\n\ +**Important rules:**\n\ +- All tool functions are **synchronous** — call them directly, no `await`, no `asyncio`.\n\ +- `print()` is the ONLY way to return data to your context. Slice large output: `print(result[:2000])`\n\ +- Always use try/except for error handling.\n\ +- Some params are renamed to avoid Python reserved words: `type` -> `type_`, `class` -> `class_`, `from` -> `from_`\n\ +- All tool functions return `str`. Parse JSON results with `json.loads(result)` if needed.\n\n\ +**Example — reading and filtering files:**\n\ +```python\n\ +import json\n\ +try:\n\ + files = [\"src/main.ts\", \"src/config.ts\", \"src/utils.ts\"]\n\ + for f in files:\n\ + content = file_read(path=f)\n\ + if \"TODO\" in content:\n\ + print(f\"Found TODO in {f}\")\n\ + print(content[:500])\n\ +except Exception as e:\n\ + print(f\"Error: {e}\")\n\ +```\n"; + /// Strip a provider prefix from a model ID before sending to the API. /// /// Many models are stored as `provider/org/model` (e.g. `openrouter/google/gemini-2.5-flash`) @@ -326,6 +359,39 @@ pub async fn run_agent_loop( let mut total_usage = TokenUsage::default(); let final_response; + // ── Programmatic Tool Calling (PTC) ───────────────────────────────── + // If PTC is enabled, replace the tool list with: direct tools + execute_code. + // PTC tools get compact Python function signatures instead of full JSON schemas. + let ptc_global_enabled = manifest.ptc_enabled.unwrap_or(true); + let ptc_config = crate::ptc::PtcConfig::default(); + + let mut ptc_instance: Option = + if ptc_global_enabled && !available_tools.is_empty() { + match crate::ptc::init_ptc(available_tools).await { + Ok(instance) => Some(instance), + Err(e) => { + warn!("PTC initialization failed, falling back to direct tools: {e}"); + None + } + } + } else { + None + }; + + // If PTC is active, swap the tool list: direct tools + execute_code + let ptc_tools_vec: Vec; + let available_tools = if let Some(ref ptc) = ptc_instance { + ptc_tools_vec = ptc.agent_tools(); + &ptc_tools_vec[..] + } else { + available_tools + }; + + // Append PTC system prompt supplement if PTC is active + if ptc_instance.is_some() { + system_prompt.push_str(PTC_SYSTEM_PROMPT_SUPPLEMENT); + } + // Safety valve: trim excessively long message histories to prevent context overflow. // The full compaction system handles sophisticated summarization, but this prevents // the catastrophic case where 200+ messages cause instant context overflow. @@ -663,9 +729,18 @@ pub async fn run_agent_loop( content: MessageContent::Blocks(assistant_blocks), }); - // Build allowed tool names list for capability enforcement - let allowed_tool_names: Vec = + // Build allowed tool names list for capability enforcement. + // When PTC is active, include all PTC tools too (they're called + // from the IPC server, not directly by the LLM). + let mut allowed_tool_names: Vec = available_tools.iter().map(|t| t.name.clone()).collect(); + if let Some(ref ptc) = ptc_instance { + for t in &ptc.ptc_tools { + if !allowed_tool_names.contains(&t.name) { + allowed_tool_names.push(t.name.clone()); + } + } + } let caller_id_str = session.agent_id.to_string(); // Execute each tool call with loop guard, timeout, and truncation @@ -752,9 +827,84 @@ pub async fn run_agent_loop( let effective_exec_policy = manifest.exec_policy.as_ref(); // Timeout-wrapped execution + // PTC interception: if this is execute_code and PTC is active, + // run Python and concurrently dispatch tool calls from the IPC channel. + let result = if let (true, Some(ptc)) = ( + tool_call.name == "execute_code", + ptc_instance.as_mut(), + ) { + let code = tool_call.input["code"].as_str().unwrap_or(""); + let ptc_timeout = tool_call.input["timeout"] + .as_u64() + .unwrap_or(ptc_config.timeout_secs) + .clamp(10, 600); + + // Generate SDK and run Python + let sdk = crate::ptc::generate_python_sdk(&ptc.ptc_tools, ptc.ipc_server.port()); + let full_script = crate::ptc::wrap_user_code(&sdk, code); + + // Spawn the Python subprocess as a future + let ws = workspace_root.map(|p| p.to_path_buf()); + let mut python_fut = tokio::spawn(async move { + crate::ptc::execute_python(&full_script, ptc_timeout, ws.as_deref()).await + }); + + // Concurrently handle IPC tool requests while Python runs. + // JoinHandle is Unpin so we can select! on &mut directly. + let python_result: Option = loop { + tokio::select! { + // Python finished + py_result = &mut python_fut => { + break py_result.ok(); + } + // IPC tool request from Python + Some(req) = ptc.ipc_server.request_rx.recv() => { + let eff_exec_policy = manifest.exec_policy.as_ref(); + let tool_result = tool_runner::execute_tool( + &req.tool_call_id, + &req.tool_name, + &req.input, + kernel.as_ref(), + Some(&allowed_tool_names), + Some(&caller_id_str), + skill_registry, + mcp_connections, + web_ctx, + browser_ctx, + if hand_allowed_env.is_empty() { + None + } else { + Some(&hand_allowed_env) + }, + workspace_root, + media_engine, + eff_exec_policy, + tts_engine, + docker_config, + process_manager, + ) + .await; + let _ = req.response_tx.send(tool_result); + } + } + }; + + match python_result { + Some(py) => ptc_python_result_to_tool_result( + py, + &tool_call.id, + ptc_config.max_stdout_bytes, + ), + None => openfang_types::tool::ToolResult { + tool_use_id: tool_call.id.clone(), + content: "execute_code: Python subprocess failed".to_string(), + is_error: true, + }, + } + } else { let timeout = tool_timeout_for(&tool_call.name); let timeout_secs = timeout.as_secs(); - let result = match tokio::time::timeout( + match tokio::time::timeout( timeout, tool_runner::execute_tool( &tool_call.id, @@ -794,6 +944,7 @@ pub async fn run_agent_loop( is_error: true, } } + } // end else (non-execute_code tool dispatch) }; // Fire AfterToolCall hook @@ -1490,6 +1641,36 @@ pub async fn run_agent_loop_streaming( let mut total_usage = TokenUsage::default(); let final_response; + // ── Programmatic Tool Calling (PTC) — streaming ───────────────────── + let ptc_global_enabled = manifest.ptc_enabled.unwrap_or(true); + let ptc_config = crate::ptc::PtcConfig::default(); + + let mut ptc_instance: Option = + if ptc_global_enabled && !available_tools.is_empty() { + match crate::ptc::init_ptc(available_tools).await { + Ok(instance) => Some(instance), + Err(e) => { + warn!("PTC initialization failed (streaming), falling back to direct tools: {e}"); + None + } + } + } else { + None + }; + + let ptc_tools_vec: Vec; + let available_tools = if let Some(ref ptc) = ptc_instance { + ptc_tools_vec = ptc.agent_tools(); + &ptc_tools_vec[..] + } else { + available_tools + }; + + // Append PTC system prompt supplement if PTC is active (streaming) + if ptc_instance.is_some() { + system_prompt.push_str(PTC_SYSTEM_PROMPT_SUPPLEMENT); + } + // Safety valve: trim excessively long message histories to prevent context overflow. if messages.len() > MAX_HISTORY_MESSAGES { let trim_count = messages.len() - MAX_HISTORY_MESSAGES; @@ -1818,8 +1999,16 @@ pub async fn run_agent_loop_streaming( content: MessageContent::Blocks(assistant_blocks), }); - let allowed_tool_names: Vec = + // Include PTC tools in allowed names (they're callable from IPC, not the LLM) + let mut allowed_tool_names: Vec = available_tools.iter().map(|t| t.name.clone()).collect(); + if let Some(ref ptc) = ptc_instance { + for t in &ptc.ptc_tools { + if !allowed_tool_names.contains(&t.name) { + allowed_tool_names.push(t.name.clone()); + } + } + } let caller_id_str = session.agent_id.to_string(); // Execute each tool call with loop guard, timeout, and truncation @@ -1905,9 +2094,73 @@ pub async fn run_agent_loop_streaming( let effective_exec_policy = manifest.exec_policy.as_ref(); // Timeout-wrapped execution + // PTC interception (streaming): same as non-streaming path. + let result = if let (true, Some(ptc)) = ( + tool_call.name == "execute_code", + ptc_instance.as_mut(), + ) { + let code = tool_call.input["code"].as_str().unwrap_or(""); + let ptc_timeout = tool_call.input["timeout"] + .as_u64() + .unwrap_or(ptc_config.timeout_secs) + .clamp(10, 600); + + let sdk = crate::ptc::generate_python_sdk(&ptc.ptc_tools, ptc.ipc_server.port()); + let full_script = crate::ptc::wrap_user_code(&sdk, code); + + let ws = workspace_root.map(|p| p.to_path_buf()); + let mut python_fut = tokio::spawn(async move { + crate::ptc::execute_python(&full_script, ptc_timeout, ws.as_deref()).await + }); + + let python_result: Option = loop { + tokio::select! { + py_result = &mut python_fut => { + break py_result.ok(); + } + Some(req) = ptc.ipc_server.request_rx.recv() => { + let eff_exec_policy = manifest.exec_policy.as_ref(); + let tool_result = tool_runner::execute_tool( + &req.tool_call_id, + &req.tool_name, + &req.input, + kernel.as_ref(), + Some(&allowed_tool_names), + Some(&caller_id_str), + skill_registry, + mcp_connections, + web_ctx, + browser_ctx, + if hand_allowed_env.is_empty() { None } else { Some(&hand_allowed_env) }, + workspace_root, + media_engine, + eff_exec_policy, + tts_engine, + docker_config, + process_manager, + ) + .await; + let _ = req.response_tx.send(tool_result); + } + } + }; + + match python_result { + Some(py) => ptc_python_result_to_tool_result( + py, + &tool_call.id, + ptc_config.max_stdout_bytes, + ), + None => openfang_types::tool::ToolResult { + tool_use_id: tool_call.id.clone(), + content: "execute_code: Python subprocess failed".to_string(), + is_error: true, + }, + } + } else { let timeout = tool_timeout_for(&tool_call.name); let timeout_secs = timeout.as_secs(); - let result = match tokio::time::timeout( + match tokio::time::timeout( timeout, tool_runner::execute_tool( &tool_call.id, @@ -1947,6 +2200,7 @@ pub async fn run_agent_loop_streaming( is_error: true, } } + } // end else (non-execute_code tool dispatch, streaming) }; // Fire AfterToolCall hook @@ -2141,6 +2395,43 @@ pub async fn run_agent_loop_streaming( /// 12. `tool_name\n{"key":"value"}` — bare name + JSON on next line (Llama 4 Scout) /// 13. `{"name":"tool","arguments":{...}}` — Llama 3.1+ variant /// +/// Build a ToolResult from a PythonResult, applying output truncation. +fn ptc_python_result_to_tool_result( + py: crate::ptc::executor::PythonResult, + tool_use_id: &str, + max_stdout_bytes: usize, +) -> openfang_types::tool::ToolResult { + let mut parts: Vec = Vec::new(); + if !py.stdout.trim().is_empty() { + let stdout = if py.stdout.len() > max_stdout_bytes { + format!( + "{}\n\n[output truncated at {} bytes]", + &py.stdout[..max_stdout_bytes], + max_stdout_bytes + ) + } else { + py.stdout.trim().to_string() + }; + parts.push(stdout); + } + if py.exit_code != 0 { + if !py.stderr.trim().is_empty() { + parts.push(format!("\n[stderr]\n{}", py.stderr.trim())); + } + parts.push(format!("\n[exit code: {}]", py.exit_code)); + } + let output = if parts.is_empty() { + "(no output)".to_string() + } else { + parts.join("\n") + }; + openfang_types::tool::ToolResult { + tool_use_id: tool_use_id.to_string(), + content: output, + is_error: py.exit_code != 0, + } +} + /// Validates tool names against available tools and returns synthetic `ToolCall` entries. fn recover_text_tool_calls(text: &str, available_tools: &[ToolDefinition]) -> Vec { let mut calls = Vec::new(); diff --git a/crates/openfang-runtime/src/lib.rs b/crates/openfang-runtime/src/lib.rs index 9e88fe8b9..bd811113b 100644 --- a/crates/openfang-runtime/src/lib.rs +++ b/crates/openfang-runtime/src/lib.rs @@ -36,6 +36,7 @@ pub mod media_understanding; pub mod model_catalog; pub mod process_manager; pub mod prompt_builder; +pub mod ptc; pub mod provider_health; pub mod python_runtime; pub mod reply_directives; diff --git a/crates/openfang-runtime/src/ptc/executor.rs b/crates/openfang-runtime/src/ptc/executor.rs new file mode 100644 index 000000000..863bb1c4d --- /dev/null +++ b/crates/openfang-runtime/src/ptc/executor.rs @@ -0,0 +1,172 @@ +//! Python subprocess executor for Programmatic Tool Calling. +//! +//! Spawns `python3 -u -c