diff --git a/CHANGELOG.md b/CHANGELOG.md index 086fde103..e5ec1f73f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -20,6 +20,17 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). - `zeph-orchestration`: add `Llm(#[from] zeph_llm::LlmError)` typed variant to `OrchestrationError` so callers can pattern-match on root LLM error kinds without string comparison (closes #3842). +### Changed + +- `zeph-orchestration`: add `#[tracing::instrument]` spans to `LlmPlanner::plan`, + `LlmPlanner::plan_with_hint`, and `LlmAggregator::aggregate`. Span names follow the + `orchestration..` convention with `goal_len` / `task_count` fields + for Perfetto trace analysis (closes #3850). +- `zeph-subagent`: add `#[tracing::instrument]` spans to `SubAgentManager::spawn`, + `SubAgentManager::collect`, `SubAgentManager::shutdown_all`, `run_agent_loop`, and + `run_turn`. Span names follow the `subagent..` convention with + `def_name` / `task_id` / `turn` fields (closes #3851). + ### Performance - `zeph-memory`: replace serial `embed()` calls with a single `embed_batch()` call in diff --git a/crates/zeph-orchestration/src/aggregator.rs b/crates/zeph-orchestration/src/aggregator.rs index 4be556361..b95880b1b 100644 --- a/crates/zeph-orchestration/src/aggregator.rs +++ b/crates/zeph-orchestration/src/aggregator.rs @@ -76,6 +76,7 @@ impl LlmAggregator

{ } impl Aggregator for LlmAggregator

{ + #[tracing::instrument(name = "orchestration.aggregator.aggregate", skip_all, fields(task_count = graph.tasks.len()))] async fn aggregate( &self, graph: &TaskGraph, diff --git a/crates/zeph-orchestration/src/planner.rs b/crates/zeph-orchestration/src/planner.rs index 331de742e..2525d0f6f 100644 --- a/crates/zeph-orchestration/src/planner.rs +++ b/crates/zeph-orchestration/src/planner.rs @@ -139,6 +139,7 @@ pub(crate) struct PlannedTask { } impl Planner for LlmPlanner

{ + #[tracing::instrument(name = "orchestration.planner.plan_with_hint", skip_all, fields(goal_len = goal.len()))] async fn plan_with_hint( &self, goal: &str, @@ -171,6 +172,7 @@ impl Planner for LlmPlanner

{ Ok((graph, usage)) } + #[tracing::instrument(name = "orchestration.planner.plan", skip_all, fields(goal_len = goal.len()))] async fn plan( &self, goal: &str, diff --git a/crates/zeph-subagent/src/agent_loop.rs b/crates/zeph-subagent/src/agent_loop.rs index 1f6f1a4a4..c75c3da40 100644 --- a/crates/zeph-subagent/src/agent_loop.rs +++ b/crates/zeph-subagent/src/agent_loop.rs @@ -370,6 +370,7 @@ enum TurnOutcome { /// Returns a [`TurnOutcome`] that drives the loop control flow in /// [`run_agent_loop`]. #[allow(clippy::too_many_arguments)] +#[tracing::instrument(name = "subagent.agent_loop.run_turn", skip_all, fields(task_id = task_id, turn = *turns))] async fn run_turn( provider: &AnyProvider, executor: &FilteredToolExecutor, @@ -560,6 +561,7 @@ async fn handle_tool_step( } } +#[tracing::instrument(name = "subagent.agent_loop.run", skip_all, fields(task_id = %args.task_id, agent_name = %args.agent_name))] pub(super) async fn run_agent_loop( args: AgentLoopArgs, ) -> Result { diff --git a/crates/zeph-subagent/src/manager.rs b/crates/zeph-subagent/src/manager.rs index b820aa63e..9bf48679e 100644 --- a/crates/zeph-subagent/src/manager.rs +++ b/crates/zeph-subagent/src/manager.rs @@ -701,7 +701,9 @@ impl SubAgentManager { /// [`SubAgentError::ConcurrencyLimit`] if the concurrency limit is exceeded, or /// [`SubAgentError::Invalid`] if the agent requests `bypass_permissions` but the config /// does not allow it (`allow_bypass_permissions: false`). - #[allow(clippy::too_many_arguments, clippy::too_many_lines)] // complex algorithm function; both suppressions justified until the function is decomposed in a future refactor + #[allow(clippy::too_many_arguments, clippy::too_many_lines)] + // complex algorithm function; both suppressions justified until the function is decomposed in a future refactor + #[tracing::instrument(name = "subagent.manager.spawn", skip_all, fields(def_name = def_name))] pub fn spawn( &mut self, def_name: &str, @@ -938,6 +940,7 @@ impl SubAgentManager { /// Iterates every agent ID and calls [`cancel`][Self::cancel] on each. /// Unlike [`cancel_all`][Self::cancel_all], this method goes through the normal /// cancel path including hook firing. Prefer this during planned shutdown. + #[tracing::instrument(name = "subagent.manager.shutdown_all", skip_all)] pub fn shutdown_all(&mut self) { let ids: Vec = self.agents.keys().cloned().collect(); for id in ids { @@ -1096,6 +1099,7 @@ impl SubAgentManager { /// /// Returns [`SubAgentError::NotFound`] if the task ID is unknown, /// [`SubAgentError::Spawn`] if the task panicked. + #[tracing::instrument(name = "subagent.manager.collect", skip_all, fields(task_id = task_id))] pub async fn collect(&mut self, task_id: &str) -> Result { let mut handle = self .agents