Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,17 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/).
- `zeph-orchestration`: add `Llm(#[from] zeph_llm::LlmError)` typed variant to `OrchestrationError`
so callers can pattern-match on root LLM error kinds without string comparison (closes #3842).

### Changed

- `zeph-orchestration`: add `#[tracing::instrument]` spans to `LlmPlanner::plan`,
`LlmPlanner::plan_with_hint`, and `LlmAggregator::aggregate`. Span names follow the
`orchestration.<component>.<operation>` convention with `goal_len` / `task_count` fields
for Perfetto trace analysis (closes #3850).
- `zeph-subagent`: add `#[tracing::instrument]` spans to `SubAgentManager::spawn`,
`SubAgentManager::collect`, `SubAgentManager::shutdown_all`, `run_agent_loop`, and
`run_turn`. Span names follow the `subagent.<component>.<operation>` convention with
`def_name` / `task_id` / `turn` fields (closes #3851).

### Performance

- `zeph-memory`: replace serial `embed()` calls with a single `embed_batch()` call in
Expand Down
1 change: 1 addition & 0 deletions crates/zeph-orchestration/src/aggregator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ impl<P: LlmProvider> LlmAggregator<P> {
}

impl<P: LlmProvider + Send + Sync> Aggregator for LlmAggregator<P> {
#[tracing::instrument(name = "orchestration.aggregator.aggregate", skip_all, fields(task_count = graph.tasks.len()))]
async fn aggregate(
&self,
graph: &TaskGraph,
Expand Down
2 changes: 2 additions & 0 deletions crates/zeph-orchestration/src/planner.rs
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,7 @@ pub(crate) struct PlannedTask {
}

impl<P: LlmProvider + Send + Sync> Planner for LlmPlanner<P> {
#[tracing::instrument(name = "orchestration.planner.plan_with_hint", skip_all, fields(goal_len = goal.len()))]
async fn plan_with_hint(
&self,
goal: &str,
Expand Down Expand Up @@ -171,6 +172,7 @@ impl<P: LlmProvider + Send + Sync> Planner for LlmPlanner<P> {
Ok((graph, usage))
}

#[tracing::instrument(name = "orchestration.planner.plan", skip_all, fields(goal_len = goal.len()))]
async fn plan(
&self,
goal: &str,
Expand Down
2 changes: 2 additions & 0 deletions crates/zeph-subagent/src/agent_loop.rs
Original file line number Diff line number Diff line change
Expand Up @@ -370,6 +370,7 @@ enum TurnOutcome {
/// Returns a [`TurnOutcome`] that drives the loop control flow in
/// [`run_agent_loop`].
#[allow(clippy::too_many_arguments)]
#[tracing::instrument(name = "subagent.agent_loop.run_turn", skip_all, fields(task_id = task_id, turn = *turns))]
async fn run_turn(
provider: &AnyProvider,
executor: &FilteredToolExecutor,
Expand Down Expand Up @@ -560,6 +561,7 @@ async fn handle_tool_step(
}
}

#[tracing::instrument(name = "subagent.agent_loop.run", skip_all, fields(task_id = %args.task_id, agent_name = %args.agent_name))]
pub(super) async fn run_agent_loop(
args: AgentLoopArgs,
) -> Result<String, super::error::SubAgentError> {
Expand Down
6 changes: 5 additions & 1 deletion crates/zeph-subagent/src/manager.rs
Original file line number Diff line number Diff line change
Expand Up @@ -701,7 +701,9 @@ impl SubAgentManager {
/// [`SubAgentError::ConcurrencyLimit`] if the concurrency limit is exceeded, or
/// [`SubAgentError::Invalid`] if the agent requests `bypass_permissions` but the config
/// does not allow it (`allow_bypass_permissions: false`).
#[allow(clippy::too_many_arguments, clippy::too_many_lines)] // complex algorithm function; both suppressions justified until the function is decomposed in a future refactor
#[allow(clippy::too_many_arguments, clippy::too_many_lines)]
// complex algorithm function; both suppressions justified until the function is decomposed in a future refactor
#[tracing::instrument(name = "subagent.manager.spawn", skip_all, fields(def_name = def_name))]
pub fn spawn(
&mut self,
def_name: &str,
Expand Down Expand Up @@ -938,6 +940,7 @@ impl SubAgentManager {
/// Iterates every agent ID and calls [`cancel`][Self::cancel] on each.
/// Unlike [`cancel_all`][Self::cancel_all], this method goes through the normal
/// cancel path including hook firing. Prefer this during planned shutdown.
#[tracing::instrument(name = "subagent.manager.shutdown_all", skip_all)]
pub fn shutdown_all(&mut self) {
let ids: Vec<String> = self.agents.keys().cloned().collect();
for id in ids {
Expand Down Expand Up @@ -1096,6 +1099,7 @@ impl SubAgentManager {
///
/// Returns [`SubAgentError::NotFound`] if the task ID is unknown,
/// [`SubAgentError::Spawn`] if the task panicked.
#[tracing::instrument(name = "subagent.manager.collect", skip_all, fields(task_id = task_id))]
pub async fn collect(&mut self, task_id: &str) -> Result<String, SubAgentError> {
let mut handle = self
.agents
Expand Down
Loading