From 486ed041d2a0c7abff636301d07718ac20b9b1e4 Mon Sep 17 00:00:00 2001 From: Rach Pradhan <54503978+justrach@users.noreply.github.com> Date: Thu, 21 May 2026 17:35:40 +0800 Subject: [PATCH 01/12] feat(nudges): pending-nudges queue + SDK enqueue/list API (#37, storage layer) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Lands the storage + SDK surface for graff-memd's out-of-process system / user-message injection queue. Hermes does this inline because it's a single Python process; we need a queue because graff-memd is a sidecar. This PR is the **storage layer**. The conversation-loop drain hook is a separate follow-up so this can ship + be reviewed in isolation; the acceptance criterion that's still open is "Enqueue → next user turn includes the nudge → consumed flag flips" (drain integration). New surface: - `forge_domain::PendingNudge` — `(id, conversation_id, role, content, created_at, consumed_at?)` + `NudgeRole` enum (`system`, `user_visible`, `user_hidden`) with wire-stable `as_str` / `from_str` round-trip + JSON rename matching SQL value. - `forge_app::NudgeRepo` — async trait: `enqueue`, `next_unconsumed`, `mark_consumed`, `list_for_conversation`. - `forge_repo::NudgeRepositoryImpl` — diesel-backed; FIFO drain ordered by `(created_at asc, id asc)` so same-ms enqueues are still totally ordered. Atomic INSERT + `last_insert_rowid()` in a single transaction so a concurrent enqueue can't slot a row between insert and id read. - Migration `2026-05-21-180000_create_pending_nudges_table` with a composite drain index on `(conversation_id, consumed_at, created_at, id)` so the unconsumed-FIFO query covers the whole filter without a sort. - `forge_api::API`: `enqueue_nudge`, `list_nudges`. The drain path (`next_unconsumed`, `mark_consumed`) is intentionally NOT in the public API — it's an internal orchestrator concern. 8 new tests: - 3 domain tests for `NudgeRole` round-trip + visibility helpers - 5 repo-level integration tests against in-memory SQLite: - `enqueue_then_next_unconsumed_returns_in_fifo_order` — FIFO order + monotonic ids - `mark_consumed_is_idempotent_and_drops_from_unconsumed_set` — second `mark_consumed` returns `Ok(false)` - `next_unconsumed_is_scoped_by_conversation` — isolation across conversations - `list_for_conversation_returns_consumed_and_unconsumed` — debug path sees both states, fresh-first - `mark_consumed_for_missing_id_returns_false` — idempotent for unknown ids Disambiguation: both `TrajectoryRepo` and `NudgeRepo` define `list_for_conversation` with the same signature, so the `forge_api::ForgeAPI::list_trajectory` call site now uses the explicit `TrajectoryRepo::list_for_conversation(...)` form. Same pattern as the user-facts PR. Generated with [Devin](https://cli.devin.ai/docs) Co-Authored-By: blackfloofie-a codegraff agent <265516171+blackfloofie@users.noreply.github.com> --- crates/forge_api/src/api.rs | 13 +- crates/forge_api/src/forge_api.rs | 23 +- crates/forge_app/src/infra.rs | 40 +++ crates/forge_domain/src/lib.rs | 2 + crates/forge_domain/src/pending_nudges.rs | 159 +++++++++++ .../down.sql | 2 + .../up.sql | 24 ++ crates/forge_repo/src/database/schema.rs | 18 +- crates/forge_repo/src/forge_repo.rs | 31 +++ crates/forge_repo/src/lib.rs | 1 + crates/forge_repo/src/pending_nudges/mod.rs | 4 + .../pending_nudges/pending_nudge_record.rs | 52 ++++ .../src/pending_nudges/pending_nudges_repo.rs | 254 ++++++++++++++++++ 13 files changed, 615 insertions(+), 8 deletions(-) create mode 100644 crates/forge_domain/src/pending_nudges.rs create mode 100644 crates/forge_repo/src/database/migrations/2026-05-21-180000_create_pending_nudges_table/down.sql create mode 100644 crates/forge_repo/src/database/migrations/2026-05-21-180000_create_pending_nudges_table/up.sql create mode 100644 crates/forge_repo/src/pending_nudges/mod.rs create mode 100644 crates/forge_repo/src/pending_nudges/pending_nudge_record.rs create mode 100644 crates/forge_repo/src/pending_nudges/pending_nudges_repo.rs diff --git a/crates/forge_api/src/api.rs b/crates/forge_api/src/api.rs index c9c99df4..2112576b 100644 --- a/crates/forge_api/src/api.rs +++ b/crates/forge_api/src/api.rs @@ -4,7 +4,8 @@ use anyhow::Result; use forge_app::dto::ToolsOverview; use forge_app::{User, UserUsage}; use forge_domain::{ - AgentId, Effort, ModelId, ProviderModels, TrajectoryEvent, TrajectorySubscribeError, UserFact, + AgentId, Effort, ModelId, PendingNudge, ProviderModels, TrajectoryEvent, + TrajectorySubscribeError, UserFact, }; use forge_stream::MpscStream; use futures::stream::BoxStream; @@ -122,6 +123,16 @@ pub trait API: Sync + Send { /// deleted, `Ok(false)` when nothing matched (idempotent). async fn delete_user_fact(&self, key: &str) -> Result; + /// Enqueues a pending nudge to be drained at the start of the next + /// user turn for `nudge.conversation_id`. Returns the assigned row + /// id. Issue #37. + async fn enqueue_nudge(&self, nudge: PendingNudge) -> Result; + + /// Lists every nudge for `conversation_id`, consumed or not, in + /// reverse-chronological order. Used for inspection / debug — the + /// hot drain path is internal to the orchestrator (`next_unconsumed`). + async fn list_nudges(&self, conversation_id: &ConversationId) -> Result>; + /// Permanently deletes a conversation /// /// # Arguments diff --git a/crates/forge_api/src/forge_api.rs b/crates/forge_api/src/forge_api.rs index 30d7b818..9e8d7173 100644 --- a/crates/forge_api/src/forge_api.rs +++ b/crates/forge_api/src/forge_api.rs @@ -8,8 +8,8 @@ use forge_app::{ AgentProviderResolver, AgentRegistry, AppConfigService, AuthService, CommandInfra, CommandLoaderService, ConversationService, DataGenerationApp, EnvironmentInfra, FileDiscoveryService, ForgeApp, GitApp, GrpcInfra, McpConfigManager, McpService, - ProviderAuthService, ProviderService, Services, TrajectoryRepo, User, UserFactsRepo, - UserUsage, Walker, WorkspaceService, + NudgeRepo, ProviderAuthService, ProviderService, Services, TrajectoryRepo, User, + UserFactsRepo, UserUsage, Walker, WorkspaceService, }; use forge_config::ForgeConfig; use forge_domain::{Agent, ConsoleWriter, *}; @@ -80,6 +80,7 @@ impl< + GrpcInfra + TrajectoryRepo + UserFactsRepo + + NudgeRepo + 'static, > API for ForgeAPI { @@ -242,10 +243,9 @@ impl< ) -> anyhow::Result> { // Walk every agent's events for this conversation in one query so // /trace can render the parent → child agent tree without needing - // to know each child agent_id ahead of time. - self.infra - .list_for_conversation(&conversation_id.to_string()) - .await + // to know each child agent_id ahead of time. Disambiguated against + // `NudgeRepo::list_for_conversation` which has the same signature. + TrajectoryRepo::list_for_conversation(&*self.infra, &conversation_id.to_string()).await } async fn subscribe_trajectory( @@ -305,6 +305,17 @@ impl< UserFactsRepo::delete(&*self.infra, key).await } + async fn enqueue_nudge(&self, nudge: PendingNudge) -> anyhow::Result { + NudgeRepo::enqueue(&*self.infra, nudge).await + } + + async fn list_nudges( + &self, + conversation_id: &ConversationId, + ) -> anyhow::Result> { + NudgeRepo::list_for_conversation(&*self.infra, &conversation_id.to_string()).await + } + async fn delete_conversation(&self, conversation_id: &ConversationId) -> anyhow::Result<()> { self.services.delete_conversation(conversation_id).await } diff --git a/crates/forge_app/src/infra.rs b/crates/forge_app/src/infra.rs index ab368c91..0606ba28 100644 --- a/crates/forge_app/src/infra.rs +++ b/crates/forge_app/src/infra.rs @@ -461,6 +461,46 @@ pub trait TrajectoryRepo: Send + Sync { ) -> anyhow::Result>; } +/// Persistence for the pending-nudges queue — out-of-process system / +/// user message injection into an active conversation. +/// +/// Backed by the `pending_nudges` SQLite table. graff-memd enqueues +/// nudges from a sidecar; the orchestrator drains them at the start of +/// each user turn, prepends them as messages with the role indicated by +/// [`forge_domain::NudgeRole`], and stamps `consumed_at` so the same +/// nudge isn't replayed. +/// +/// Issue #37. +#[async_trait::async_trait] +pub trait NudgeRepo: Send + Sync { + /// Enqueue a new nudge. The repo stamps `created_at` server-side and + /// returns the assigned `id`. The caller's `created_at` value is + /// ignored (single source of truth). + async fn enqueue(&self, nudge: forge_domain::PendingNudge) -> anyhow::Result; + + /// Returns every unconsumed nudge for `conversation_id` in FIFO order + /// (created_at asc, id asc). Empty vec when none are pending — the + /// orchestrator can call this on every user turn without a hot path + /// even when graff-memd is dormant. + async fn next_unconsumed( + &self, + conversation_id: &str, + ) -> anyhow::Result>; + + /// Marks the nudge identified by `id` as consumed. Idempotent: a + /// row already marked is a no-op. Returns `Ok(true)` when the row + /// existed and was unconsumed, `Ok(false)` otherwise. + async fn mark_consumed(&self, id: i32) -> anyhow::Result; + + /// Returns every nudge for `conversation_id`, consumed or not, in + /// reverse-chronological order. Used for inspection / debug — the + /// hot drain path is `next_unconsumed`. + async fn list_for_conversation( + &self, + conversation_id: &str, + ) -> anyhow::Result>; +} + /// Persistence for the user-profile facts store — one row per durable, /// typed fact about the user, mined from session digests by graff-memd or /// supplied directly via the SDK. diff --git a/crates/forge_domain/src/lib.rs b/crates/forge_domain/src/lib.rs index 4d76f974..c57aad3e 100644 --- a/crates/forge_domain/src/lib.rs +++ b/crates/forge_domain/src/lib.rs @@ -43,6 +43,7 @@ mod suggestion; mod system_context; mod temperature; mod template; +mod pending_nudges; mod terminal_context; mod tools; mod trajectory; @@ -104,6 +105,7 @@ pub use template::*; pub use terminal_context::*; pub use tool_order::*; pub use tools::*; +pub use pending_nudges::*; pub use trajectory::*; pub use user_facts::*; pub use top_k::*; diff --git a/crates/forge_domain/src/pending_nudges.rs b/crates/forge_domain/src/pending_nudges.rs new file mode 100644 index 00000000..b313affc --- /dev/null +++ b/crates/forge_domain/src/pending_nudges.rs @@ -0,0 +1,159 @@ +use serde::{Deserialize, Serialize}; + +/// A queued message to inject into an active conversation at the start of +/// the next user turn. graff-memd uses this queue to ask the agent to +/// (e.g.) "review whether this turn warrants a skill update" without +/// living in the same process as the orchestrator. +/// +/// The orchestrator's drain hook reads every unconsumed nudge for a +/// conversation in FIFO order, prepends them as messages with the role +/// indicated by [`NudgeRole`], then stamps `consumed_at` so the same +/// nudge isn't replayed. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub struct PendingNudge { + /// Auto-incrementing primary key. `None` before the row is persisted. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub id: Option, + /// Conversation this nudge is targeting. The drain query is scoped by + /// `(conversation_id, consumed_at IS NULL)`. + pub conversation_id: String, + /// Wire-stable role discriminator. See [`NudgeRole`]. + pub role: NudgeRole, + /// Body of the nudge. Free-form string — callers that need structure + /// (JSON, markdown, …) embed it client-side. + pub content: String, + /// Unix milliseconds the row was enqueued. Stamped server-side on + /// every enqueue (single source of truth). + pub created_at: i64, + /// Unix milliseconds the orchestrator consumed the nudge. `None` for + /// rows still pending; transitions to `Some(now_ms)` on drain. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub consumed_at: Option, +} + +/// Wire-stable nudge-role discriminator. Determines how the orchestrator +/// surfaces the nudge to the model + the user: +/// +/// - [`NudgeRole::System`] — injected as a system message; not rendered +/// in the TUI. Used for "internal" prompts like graff-memd's review +/// trigger. +/// - [`NudgeRole::UserVisible`] — injected as a user message and shown +/// in the TUI. Used when the nudge is something the human should see +/// (e.g. an SDK call mid-session). +/// - [`NudgeRole::UserHidden`] — injected as a system message **and** +/// never rendered in the TUI. Lets memd push context the agent needs +/// without polluting the user's transcript. +/// +/// Stored as a lower_snake_case string in SQLite; the serde `rename_all` +/// keeps JSON wire format identical to the SQL value. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +pub enum NudgeRole { + System, + UserVisible, + UserHidden, +} + +impl NudgeRole { + /// Wire-stable string form written to the SQLite `role` column. Must + /// match the serde `rename_all = "snake_case"` so JSON `role` and SQL + /// `role` agree. + pub fn as_str(&self) -> &'static str { + match self { + NudgeRole::System => "system", + NudgeRole::UserVisible => "user_visible", + NudgeRole::UserHidden => "user_hidden", + } + } + + /// Parses the wire-stable string form. Returns `None` for unknown + /// values rather than erroring so callers can decide how to react to + /// a forward-compatible role they don't recognise. + pub fn from_str(s: &str) -> Option { + match s { + "system" => Some(NudgeRole::System), + "user_visible" => Some(NudgeRole::UserVisible), + "user_hidden" => Some(NudgeRole::UserHidden), + _ => None, + } + } + + /// Whether the orchestrator should treat this role as user-visible + /// (rendered in the TUI). `system` and `user_hidden` are model-only. + pub fn is_user_visible(&self) -> bool { + matches!(self, NudgeRole::UserVisible) + } + + /// Whether the orchestrator should inject this nudge as a system + /// message. `system` + `user_hidden` both inject as system; only + /// `user_visible` injects as user. + pub fn injects_as_system(&self) -> bool { + matches!(self, NudgeRole::System | NudgeRole::UserHidden) + } +} + +impl PendingNudge { + /// Constructor for a freshly enqueued nudge. `id` is `None` and + /// `consumed_at` is `None`; both are populated by the repo on + /// persistence and drain respectively. `created_at` defaults to the + /// current wall-clock millisecond and is overwritten by the repo on + /// enqueue (single source of truth). + pub fn new( + conversation_id: impl Into, + role: NudgeRole, + content: impl Into, + ) -> Self { + Self { + id: None, + conversation_id: conversation_id.into(), + role, + content: content.into(), + created_at: std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .map(|d| d.as_millis() as i64) + .unwrap_or(0), + consumed_at: None, + } + } +} + +#[cfg(test)] +mod tests { + use pretty_assertions::assert_eq; + + use super::*; + + #[test] + fn role_wire_strings_round_trip() { + // Lock in the invariant that as_str / from_str are inverses for + // every variant. If the serde rename diverges from these, JSON + // wire format silently breaks. + for role in [ + NudgeRole::System, + NudgeRole::UserVisible, + NudgeRole::UserHidden, + ] { + let s = role.as_str(); + assert_eq!(NudgeRole::from_str(s), Some(role)); + // serde rename matches as_str so SQL ↔ JSON agree. + let json = serde_json::to_string(&role).unwrap(); + assert_eq!(json, format!("\"{s}\"")); + } + } + + #[test] + fn role_from_str_returns_none_for_unknown() { + assert_eq!(NudgeRole::from_str("nonexistent_role"), None); + } + + #[test] + fn role_visibility_helpers_match_spec() { + assert!(NudgeRole::UserVisible.is_user_visible()); + assert!(!NudgeRole::System.is_user_visible()); + assert!(!NudgeRole::UserHidden.is_user_visible()); + + assert!(NudgeRole::System.injects_as_system()); + assert!(NudgeRole::UserHidden.injects_as_system()); + assert!(!NudgeRole::UserVisible.injects_as_system()); + } +} diff --git a/crates/forge_repo/src/database/migrations/2026-05-21-180000_create_pending_nudges_table/down.sql b/crates/forge_repo/src/database/migrations/2026-05-21-180000_create_pending_nudges_table/down.sql new file mode 100644 index 00000000..98363593 --- /dev/null +++ b/crates/forge_repo/src/database/migrations/2026-05-21-180000_create_pending_nudges_table/down.sql @@ -0,0 +1,2 @@ +DROP INDEX IF EXISTS idx_pending_nudges_drain; +DROP TABLE IF EXISTS pending_nudges; diff --git a/crates/forge_repo/src/database/migrations/2026-05-21-180000_create_pending_nudges_table/up.sql b/crates/forge_repo/src/database/migrations/2026-05-21-180000_create_pending_nudges_table/up.sql new file mode 100644 index 00000000..b7a49df9 --- /dev/null +++ b/crates/forge_repo/src/database/migrations/2026-05-21-180000_create_pending_nudges_table/up.sql @@ -0,0 +1,24 @@ +-- Pending nudges: out-of-process queue of system messages to inject into +-- an active conversation. graff-memd uses this to ask the agent to e.g. +-- "review whether this turn warrants a skill update" without needing to +-- live in the same process as the orchestrator (Hermes does the inline +-- equivalent because it's a single Python process; we need the queue +-- because graff-memd is a sidecar). +-- +-- Drained at the start of each user turn (FIFO by `created_at` then `id`) +-- and stamped with `consumed_at` once injected. Rows are kept around after +-- consumption so callers can inspect what was injected — purge is left to +-- a separate retention policy. +CREATE TABLE IF NOT EXISTS pending_nudges ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + conversation_id TEXT NOT NULL, + role TEXT NOT NULL, + content TEXT NOT NULL, + created_at BIGINT NOT NULL, + consumed_at BIGINT +); + +-- Drain query is keyed on (conversation_id, consumed_at IS NULL); the +-- index covers both the FIFO order and the unconsumed filter. +CREATE INDEX IF NOT EXISTS idx_pending_nudges_drain + ON pending_nudges(conversation_id, consumed_at, created_at, id); diff --git a/crates/forge_repo/src/database/schema.rs b/crates/forge_repo/src/database/schema.rs index 89e8456a..f09bd00f 100644 --- a/crates/forge_repo/src/database/schema.rs +++ b/crates/forge_repo/src/database/schema.rs @@ -35,4 +35,20 @@ diesel::table! { } } -diesel::allow_tables_to_appear_in_same_query!(conversations, trajectory_events, user_facts,); +diesel::table! { + pending_nudges (id) { + id -> Integer, + conversation_id -> Text, + role -> Text, + content -> Text, + created_at -> BigInt, + consumed_at -> Nullable, + } +} + +diesel::allow_tables_to_appear_in_same_query!( + conversations, + trajectory_events, + user_facts, + pending_nudges, +); diff --git a/crates/forge_repo/src/forge_repo.rs b/crates/forge_repo/src/forge_repo.rs index 560855ef..e3a14bc7 100644 --- a/crates/forge_repo/src/forge_repo.rs +++ b/crates/forge_repo/src/forge_repo.rs @@ -31,6 +31,7 @@ use crate::database::{DatabasePool, PoolConfig}; use crate::fs_snap::ForgeFileSnapshotService; use crate::fuzzy_search::ForgeFuzzySearchRepository; use crate::provider::{ForgeChatRepository, ForgeProviderRepository}; +use crate::pending_nudges::NudgeRepositoryImpl; use crate::skill::ForgeSkillRepository; use crate::trajectory::TrajectoryRepositoryImpl; use crate::user_facts::UserFactsRepositoryImpl; @@ -55,6 +56,7 @@ pub struct ForgeRepo { fuzzy_search_repository: Arc>, trajectory_repository: Arc, user_facts_repository: Arc, + nudge_repository: Arc, } impl< @@ -90,6 +92,7 @@ impl< let fuzzy_search_repository = Arc::new(ForgeFuzzySearchRepository::new(infra.clone())); let trajectory_repository = Arc::new(TrajectoryRepositoryImpl::new(db_pool.clone())); let user_facts_repository = Arc::new(UserFactsRepositoryImpl::new(db_pool.clone())); + let nudge_repository = Arc::new(NudgeRepositoryImpl::new(db_pool.clone())); Self { infra, file_snapshot_service, @@ -104,6 +107,7 @@ impl< fuzzy_search_repository, trajectory_repository, user_facts_repository, + nudge_repository, } } } @@ -740,3 +744,30 @@ impl forge_app::UserFactsRepo for ForgeRepo { self.user_facts_repository.delete(key).await } } + +#[async_trait::async_trait] +impl forge_app::NudgeRepo for ForgeRepo { + async fn enqueue(&self, nudge: forge_domain::PendingNudge) -> anyhow::Result { + self.nudge_repository.enqueue(nudge).await + } + + async fn next_unconsumed( + &self, + conversation_id: &str, + ) -> anyhow::Result> { + self.nudge_repository.next_unconsumed(conversation_id).await + } + + async fn mark_consumed(&self, id: i32) -> anyhow::Result { + self.nudge_repository.mark_consumed(id).await + } + + async fn list_for_conversation( + &self, + conversation_id: &str, + ) -> anyhow::Result> { + self.nudge_repository + .list_for_conversation(conversation_id) + .await + } +} diff --git a/crates/forge_repo/src/lib.rs b/crates/forge_repo/src/lib.rs index c87d4678..01a08dae 100644 --- a/crates/forge_repo/src/lib.rs +++ b/crates/forge_repo/src/lib.rs @@ -7,6 +7,7 @@ mod forge_repo; mod fs_snap; mod fuzzy_search; mod provider; +mod pending_nudges; mod skill; mod trajectory; mod user_facts; diff --git a/crates/forge_repo/src/pending_nudges/mod.rs b/crates/forge_repo/src/pending_nudges/mod.rs new file mode 100644 index 00000000..277e44a8 --- /dev/null +++ b/crates/forge_repo/src/pending_nudges/mod.rs @@ -0,0 +1,4 @@ +mod pending_nudge_record; +mod pending_nudges_repo; + +pub use pending_nudges_repo::*; diff --git a/crates/forge_repo/src/pending_nudges/pending_nudge_record.rs b/crates/forge_repo/src/pending_nudges/pending_nudge_record.rs new file mode 100644 index 00000000..e13237f5 --- /dev/null +++ b/crates/forge_repo/src/pending_nudges/pending_nudge_record.rs @@ -0,0 +1,52 @@ +use diesel::prelude::*; +use forge_domain::{NudgeRole, PendingNudge}; + +use crate::database::schema::pending_nudges; + +/// Diesel-side row shape for an enqueue. `id` is auto-assigned by SQLite +/// and `consumed_at` is `None` at enqueue time, so the insertable row only +/// carries the columns the caller is responsible for. +#[derive(Debug, Clone, Insertable)] +#[diesel(table_name = pending_nudges)] +pub struct NewPendingNudgeRecord { + pub conversation_id: String, + pub role: String, + pub content: String, + pub created_at: i64, +} + +/// Read-side row shape — mirrors the table schema 1:1. Translation to +/// the domain type happens via `TryFrom`; an unknown role string surfaces +/// as a `forward-compatible` error rather than silently swallowing rows. +#[derive(Debug, Clone, Queryable, Selectable)] +#[diesel(table_name = pending_nudges)] +pub struct PendingNudgeRecord { + pub id: i32, + pub conversation_id: String, + pub role: String, + pub content: String, + pub created_at: i64, + pub consumed_at: Option, +} + +impl TryFrom for PendingNudge { + type Error = anyhow::Error; + + fn try_from(record: PendingNudgeRecord) -> Result { + let role = NudgeRole::from_str(&record.role).ok_or_else(|| { + anyhow::anyhow!( + "pending_nudges.role for id={} is not a known NudgeRole: {}", + record.id, + record.role + ) + })?; + Ok(Self { + id: Some(record.id), + conversation_id: record.conversation_id, + role, + content: record.content, + created_at: record.created_at, + consumed_at: record.consumed_at, + }) + } +} diff --git a/crates/forge_repo/src/pending_nudges/pending_nudges_repo.rs b/crates/forge_repo/src/pending_nudges/pending_nudges_repo.rs new file mode 100644 index 00000000..618c8f3c --- /dev/null +++ b/crates/forge_repo/src/pending_nudges/pending_nudges_repo.rs @@ -0,0 +1,254 @@ +use std::sync::Arc; + +use diesel::prelude::*; +use forge_app::NudgeRepo; +use forge_domain::PendingNudge; + +use crate::database::schema::pending_nudges; +use crate::database::{DatabasePool, PooledSqliteConnection}; +use crate::pending_nudges::pending_nudge_record::{ + NewPendingNudgeRecord, PendingNudgeRecord, +}; + +pub struct NudgeRepositoryImpl { + pool: Arc, +} + +impl NudgeRepositoryImpl { + pub fn new(pool: Arc) -> Self { + Self { pool } + } + + async fn run_with_connection(&self, operation: F) -> anyhow::Result + where + F: FnOnce(&mut PooledSqliteConnection) -> anyhow::Result + Send + 'static, + T: Send + 'static, + { + let pool = self.pool.clone(); + tokio::task::spawn_blocking(move || { + let mut connection = pool.get_connection()?; + operation(&mut connection) + }) + .await + .map_err(|e| anyhow::anyhow!("Nudge repository task failed: {e}"))? + } +} + +fn now_ms() -> i64 { + std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .map(|d| d.as_millis() as i64) + .unwrap_or(0) +} + +#[async_trait::async_trait] +impl NudgeRepo for NudgeRepositoryImpl { + async fn enqueue(&self, nudge: PendingNudge) -> anyhow::Result { + let new_record = NewPendingNudgeRecord { + conversation_id: nudge.conversation_id, + role: nudge.role.as_str().to_string(), + content: nudge.content, + created_at: now_ms(), + }; + self.run_with_connection(move |connection| { + // Insert + read back the assigned id in a single transaction so + // a concurrent enqueue can't slot a row between the INSERT and + // the last_insert_rowid() probe. + connection.transaction::<_, anyhow::Error, _>(|conn| { + diesel::insert_into(pending_nudges::table) + .values(&new_record) + .execute(conn)?; + use diesel::sql_types::Integer; + let id: i32 = diesel::dsl::sql::("SELECT last_insert_rowid()") + .get_result(conn)?; + Ok(id) + }) + }) + .await + } + + async fn next_unconsumed( + &self, + conversation_id: &str, + ) -> anyhow::Result> { + let conv_id = conversation_id.to_string(); + self.run_with_connection(move |connection| { + let records: Vec = pending_nudges::table + .filter(pending_nudges::conversation_id.eq(conv_id)) + .filter(pending_nudges::consumed_at.is_null()) + .order(( + pending_nudges::created_at.asc(), + pending_nudges::id.asc(), + )) + .select(PendingNudgeRecord::as_select()) + .load(connection)?; + records + .into_iter() + .map(PendingNudge::try_from) + .collect::, _>>() + }) + .await + } + + async fn mark_consumed(&self, id: i32) -> anyhow::Result { + let consumed = now_ms(); + self.run_with_connection(move |connection| { + // Only flip the flag for rows that are still unconsumed — + // re-marking an already-consumed row is a no-op rather than + // resetting `consumed_at` to a fresh timestamp. + let rows = diesel::update( + pending_nudges::table + .filter(pending_nudges::id.eq(id)) + .filter(pending_nudges::consumed_at.is_null()), + ) + .set(pending_nudges::consumed_at.eq(Some(consumed))) + .execute(connection)?; + Ok(rows > 0) + }) + .await + } + + async fn list_for_conversation( + &self, + conversation_id: &str, + ) -> anyhow::Result> { + let conv_id = conversation_id.to_string(); + self.run_with_connection(move |connection| { + let records: Vec = pending_nudges::table + .filter(pending_nudges::conversation_id.eq(conv_id)) + .order(( + pending_nudges::created_at.desc(), + pending_nudges::id.desc(), + )) + .select(PendingNudgeRecord::as_select()) + .load(connection)?; + records + .into_iter() + .map(PendingNudge::try_from) + .collect::, _>>() + }) + .await + } +} + +#[cfg(test)] +mod tests { + use std::sync::Arc; + + use forge_domain::{NudgeRole, PendingNudge}; + use pretty_assertions::assert_eq; + + use super::*; + use crate::database::DatabasePool; + + fn in_memory_repo() -> NudgeRepositoryImpl { + let pool = Arc::new(DatabasePool::in_memory().expect("in-memory pool")); + NudgeRepositoryImpl::new(pool) + } + + /// Round-trip an enqueue + drain. The drain order must be FIFO + /// (created_at asc, id asc), even when enqueues happen in the same + /// millisecond — `id` breaks the tie. + #[tokio::test] + async fn enqueue_then_next_unconsumed_returns_in_fifo_order() { + let repo = in_memory_repo(); + let conv = "conv-1"; + + let id_a = repo + .enqueue(PendingNudge::new(conv, NudgeRole::System, "first")) + .await + .unwrap(); + let id_b = repo + .enqueue(PendingNudge::new(conv, NudgeRole::UserVisible, "second")) + .await + .unwrap(); + + assert!(id_a < id_b, "ids must be monotonically increasing"); + + let pending = repo.next_unconsumed(conv).await.unwrap(); + assert_eq!(pending.len(), 2); + assert_eq!(pending[0].content, "first"); + assert_eq!(pending[1].content, "second"); + // Both rows are still unconsumed. + assert!(pending.iter().all(|n| n.consumed_at.is_none())); + assert_eq!(pending[0].role, NudgeRole::System); + assert_eq!(pending[1].role, NudgeRole::UserVisible); + } + + /// `mark_consumed` flips the flag, removes the row from + /// `next_unconsumed`, and is idempotent on re-call. + #[tokio::test] + async fn mark_consumed_is_idempotent_and_drops_from_unconsumed_set() { + let repo = in_memory_repo(); + let conv = "conv-1"; + let id = repo + .enqueue(PendingNudge::new(conv, NudgeRole::UserHidden, "memo")) + .await + .unwrap(); + + let first = repo.mark_consumed(id).await.unwrap(); + assert!(first, "first mark_consumed must report success"); + let second = repo.mark_consumed(id).await.unwrap(); + assert!(!second, "second mark_consumed must be idempotent"); + + let pending = repo.next_unconsumed(conv).await.unwrap(); + assert!(pending.is_empty(), "consumed nudge must drop from unconsumed set"); + } + + /// Two conversations' queues are isolated — `next_unconsumed` for + /// `conv-A` must not see `conv-B`'s nudges. + #[tokio::test] + async fn next_unconsumed_is_scoped_by_conversation() { + let repo = in_memory_repo(); + repo.enqueue(PendingNudge::new("conv-A", NudgeRole::System, "for A")) + .await + .unwrap(); + repo.enqueue(PendingNudge::new("conv-B", NudgeRole::System, "for B")) + .await + .unwrap(); + + let a = repo.next_unconsumed("conv-A").await.unwrap(); + let b = repo.next_unconsumed("conv-B").await.unwrap(); + assert_eq!(a.len(), 1); + assert_eq!(b.len(), 1); + assert_eq!(a[0].content, "for A"); + assert_eq!(b[0].content, "for B"); + } + + /// `list_for_conversation` returns BOTH consumed and unconsumed rows + /// (debug / inspection use-case), in reverse-chronological order so + /// the freshest activity surfaces first. + #[tokio::test] + async fn list_for_conversation_returns_consumed_and_unconsumed() { + let repo = in_memory_repo(); + let conv = "conv-1"; + let id_a = repo + .enqueue(PendingNudge::new(conv, NudgeRole::System, "old")) + .await + .unwrap(); + // Sleep so the second nudge has a strictly larger created_at. + tokio::time::sleep(std::time::Duration::from_millis(2)).await; + repo.enqueue(PendingNudge::new(conv, NudgeRole::System, "fresh")) + .await + .unwrap(); + repo.mark_consumed(id_a).await.unwrap(); + + let all = repo.list_for_conversation(conv).await.unwrap(); + assert_eq!(all.len(), 2); + // Fresh first. + assert_eq!(all[0].content, "fresh"); + assert_eq!(all[1].content, "old"); + assert!(all[0].consumed_at.is_none()); + assert!(all[1].consumed_at.is_some()); + } + + /// `mark_consumed` for an unknown id returns `Ok(false)` rather than + /// erroring — callers shouldn't have to distinguish "row missing" + /// from a real error. + #[tokio::test] + async fn mark_consumed_for_missing_id_returns_false() { + let repo = in_memory_repo(); + let actual = repo.mark_consumed(999_999).await.unwrap(); + assert!(!actual); + } +} From 21cc9ce4346cf6130a78daf6c3af5a40b07394cb Mon Sep 17 00:00:00 2001 From: Rach Pradhan <54503978+justrach@users.noreply.github.com> Date: Mon, 25 May 2026 17:09:25 +0800 Subject: [PATCH 02/12] feat(meta-tools): replace full tool definitions with 3 meta-tools on provider requests Introduces a meta-tool protocol that replaces sending all tool definitions to the LLM provider with just 3 small meta-tool definitions: - tools_list: discover available tool names and descriptions - tools_info: inspect the full schema for a specific tool - call_tool: invoke a tool by name with arguments This saves significant tokens on every request since tool schemas are no longer sent repeatedly. Key changes: - Add CallToolInput, ToolsListInput, ToolsInfoInput domain types - Add CallTool, ToolsList, ToolsInfo variants to ToolCatalog enum - Implement meta-tool dispatch in ToolRegistry (tools_list returns names, tools_info returns schema, call_tool delegates to the real tool) - Modify ApplyTunableParameters to pass only meta-tool definitions to providers - Update system prompt with meta-tool protocol instructions - Add SummaryTool::MetaTools and Operation::MetaTool to compat layers - Add 8 unit tests + 2 integration tests for parsing, dispatch, and tool filtering Co-Authored-By: blackfloofie-a codegraff agent <265516171+blackfloofie@users.noreply.github.com> --- .../forge_app/src/apply_tunable_parameters.rs | 32 +- crates/forge_app/src/fmt/fmt_input.rs | 4 + ...spec__orch_system_spec__system_prompt.snap | 7 +- ...em_spec__system_prompt_tool_supported.snap | 7 +- ...m_spec__system_prompt_with_extensions.snap | 7 +- ...stem_prompt_with_extensions_truncated.snap | 7 +- ...istry__all_rendered_tool_descriptions.snap | 18 ++ crates/forge_app/src/tool_executor.rs | 9 +- crates/forge_app/src/tool_registry.rs | 90 +++++- .../src/transformers/strip_working_dir.rs | 3 +- .../src/transformers/trim_context_summary.rs | 3 + crates/forge_domain/src/compact/summary.rs | 10 + crates/forge_domain/src/tools/catalog.rs | 300 +++++++++++++++++- ..._definition__usage__tests__tool_usage.snap | 5 +- ..._catalog__tests__tool_definition_json.snap | 40 ++- ...s__openai_responses_all_catalog_tools.snap | 63 +++- templates/forge-custom-agent-template.md | 7 +- 17 files changed, 587 insertions(+), 25 deletions(-) diff --git a/crates/forge_app/src/apply_tunable_parameters.rs b/crates/forge_app/src/apply_tunable_parameters.rs index 37cf999d..61b757ec 100644 --- a/crates/forge_app/src/apply_tunable_parameters.rs +++ b/crates/forge_app/src/apply_tunable_parameters.rs @@ -1,15 +1,20 @@ -use forge_domain::{Agent, Conversation, ToolDefinition}; +use forge_domain::{Agent, Conversation, ToolCatalog, ToolDefinition}; /// Applies tunable parameters from agent to conversation context +/// +/// Instead of sending ALL tool definitions (with their full JSON schemas) to +/// the LLM provider — which burns significant tokens — we send only the three +/// meta-tool definitions (`tools_list`, `tools_info`, `call_tool`). The LLM +/// uses these meta-tools to discover, inspect, and invoke the actual tools +/// dynamically. #[derive(Debug, Clone)] pub struct ApplyTunableParameters { agent: Agent, - tool_definitions: Vec, } impl ApplyTunableParameters { - pub const fn new(agent: Agent, tool_definitions: Vec) -> Self { - Self { agent, tool_definitions } + pub fn new(agent: Agent, _tool_definitions: Vec) -> Self { + Self { agent } } pub fn apply(self, mut conversation: Conversation) -> Conversation { @@ -37,19 +42,23 @@ impl ApplyTunableParameters { ctx = ctx.verbosity(verbosity); } - conversation.context(ctx.tools(self.tool_definitions)) + // Send only meta-tool definitions to the provider instead of all tool definitions. + // Meta-tools (tools_list, tools_info, call_tool) allow the LLM to dynamically + // discover, inspect, and invoke the actual tools, saving significant tokens + // that would otherwise be spent on full tool JSON schemas. + conversation.context(ctx.tools(ToolCatalog::meta_tool_definitions())) } } #[cfg(test)] mod tests { use forge_domain::{ - AgentId, Context, ConversationId, MaxTokens, ModelId, ProviderId, ReasoningConfig, - Temperature, ToolDefinition, TopK, TopP, Verbosity, + Agent, AgentId, Context, Conversation, ConversationId, MaxTokens, ModelId, ProviderId, + ReasoningConfig, Temperature, ToolDefinition, ToolName, TopK, TopP, Verbosity, }; use pretty_assertions::assert_eq; - use super::*; + use crate::apply_tunable_parameters::ApplyTunableParameters; #[derive(schemars::JsonSchema)] struct TestToolInput; @@ -86,7 +95,12 @@ mod tests { assert_eq!(ctx.top_p, Some(TopP::new(0.9).unwrap())); assert_eq!(ctx.reasoning, Some(reasoning)); assert_eq!(ctx.verbosity, Some(Verbosity::Low)); - assert_eq!(ctx.tools, vec![tool_def]); + // Only meta-tool definitions are sent to the provider + assert!(ctx + .tools + .iter() + .all(|t| t.name.as_str().starts_with("tools_") || t.name == ToolName::new("call_tool"))); + assert!(ctx.tools.len() >= 3); } /// When the agent doesn't pin a verbosity, the context's existing diff --git a/crates/forge_app/src/fmt/fmt_input.rs b/crates/forge_app/src/fmt/fmt_input.rs index f483f1d5..3352da5d 100644 --- a/crates/forge_app/src/fmt/fmt_input.rs +++ b/crates/forge_app/src/fmt/fmt_input.rs @@ -136,6 +136,10 @@ impl FormatContent for ToolCatalog { ToolCatalog::Task(input) => { Some(TitleFormat::debug("Task").sub_title(&input.agent_id).into()) } + // Meta-tools don't produce visual tool input output + ToolCatalog::ToolsList(_) | ToolCatalog::ToolsInfo(_) | ToolCatalog::CallTool(_) => { + None + } } } } diff --git a/crates/forge_app/src/orch_spec/snapshots/forge_app__orch_spec__orch_system_spec__system_prompt.snap b/crates/forge_app/src/orch_spec/snapshots/forge_app__orch_spec__orch_system_spec__system_prompt.snap index 5257e65d..1a44d47b 100644 --- a/crates/forge_app/src/orch_spec/snapshots/forge_app__orch_spec__orch_system_spec__system_prompt.snap +++ b/crates/forge_app/src/orch_spec/snapshots/forge_app__orch_spec__orch_system_spec__system_prompt.snap @@ -13,7 +13,12 @@ You are Forge -- For maximum efficiency, whenever you need to perform multiple independent operations, invoke all relevant tools (for eg: `patch`, `read`) simultaneously rather than sequentially. +- For maximum efficiency, whenever you need to perform multiple independent operations, invoke all relevant tools (for eg: `patch`, `read`) simultaneously rather than relying on `call_tool` to chain them. +- You only have three tools available directly: `tools_list`, `tools_info`, and `call_tool`. + - Use `tools_list` to discover which tools are available and their brief descriptions. + - Use `tools_info` to get the full JSON schema of a specific tool before calling it. + - Use `call_tool` to execute any tool by name with the appropriate arguments. +- Always look up a tool's schema via `tools_info` before calling it with `call_tool` to ensure you pass the correct arguments. - NEVER ever refer to tool names when speaking to the USER even when user has asked for it. For example, instead of saying 'I need to use the edit_file tool to edit your file', just say 'I will edit your file'. - If you need to read a file, prefer to read larger sections of the file at once over multiple smaller calls. diff --git a/crates/forge_app/src/orch_spec/snapshots/forge_app__orch_spec__orch_system_spec__system_prompt_tool_supported.snap b/crates/forge_app/src/orch_spec/snapshots/forge_app__orch_spec__orch_system_spec__system_prompt_tool_supported.snap index bef63fe3..e1398178 100644 --- a/crates/forge_app/src/orch_spec/snapshots/forge_app__orch_spec__orch_system_spec__system_prompt_tool_supported.snap +++ b/crates/forge_app/src/orch_spec/snapshots/forge_app__orch_spec__orch_system_spec__system_prompt_tool_supported.snap @@ -17,7 +17,12 @@ You are Forge -- For maximum efficiency, whenever you need to perform multiple independent operations, invoke all relevant tools (for eg: `patch`, `read`) simultaneously rather than sequentially. +- For maximum efficiency, whenever you need to perform multiple independent operations, invoke all relevant tools (for eg: `patch`, `read`) simultaneously rather than relying on `call_tool` to chain them. +- You only have three tools available directly: `tools_list`, `tools_info`, and `call_tool`. + - Use `tools_list` to discover which tools are available and their brief descriptions. + - Use `tools_info` to get the full JSON schema of a specific tool before calling it. + - Use `call_tool` to execute any tool by name with the appropriate arguments. +- Always look up a tool's schema via `tools_info` before calling it with `call_tool` to ensure you pass the correct arguments. - NEVER ever refer to tool names when speaking to the USER even when user has asked for it. For example, instead of saying 'I need to use the edit_file tool to edit your file', just say 'I will edit your file'. - If you need to read a file, prefer to read larger sections of the file at once over multiple smaller calls. diff --git a/crates/forge_app/src/orch_spec/snapshots/forge_app__orch_spec__orch_system_spec__system_prompt_with_extensions.snap b/crates/forge_app/src/orch_spec/snapshots/forge_app__orch_spec__orch_system_spec__system_prompt_with_extensions.snap index 8dbc7b93..137d141c 100644 --- a/crates/forge_app/src/orch_spec/snapshots/forge_app__orch_spec__orch_system_spec__system_prompt_with_extensions.snap +++ b/crates/forge_app/src/orch_spec/snapshots/forge_app__orch_spec__orch_system_spec__system_prompt_with_extensions.snap @@ -19,7 +19,12 @@ You are Forge -- For maximum efficiency, whenever you need to perform multiple independent operations, invoke all relevant tools (for eg: `patch`, `read`) simultaneously rather than sequentially. +- For maximum efficiency, whenever you need to perform multiple independent operations, invoke all relevant tools (for eg: `patch`, `read`) simultaneously rather than relying on `call_tool` to chain them. +- You only have three tools available directly: `tools_list`, `tools_info`, and `call_tool`. + - Use `tools_list` to discover which tools are available and their brief descriptions. + - Use `tools_info` to get the full JSON schema of a specific tool before calling it. + - Use `call_tool` to execute any tool by name with the appropriate arguments. +- Always look up a tool's schema via `tools_info` before calling it with `call_tool` to ensure you pass the correct arguments. - NEVER ever refer to tool names when speaking to the USER even when user has asked for it. For example, instead of saying 'I need to use the edit_file tool to edit your file', just say 'I will edit your file'. - If you need to read a file, prefer to read larger sections of the file at once over multiple smaller calls. diff --git a/crates/forge_app/src/orch_spec/snapshots/forge_app__orch_spec__orch_system_spec__system_prompt_with_extensions_truncated.snap b/crates/forge_app/src/orch_spec/snapshots/forge_app__orch_spec__orch_system_spec__system_prompt_with_extensions_truncated.snap index e72bb117..21f51101 100644 --- a/crates/forge_app/src/orch_spec/snapshots/forge_app__orch_spec__orch_system_spec__system_prompt_with_extensions_truncated.snap +++ b/crates/forge_app/src/orch_spec/snapshots/forge_app__orch_spec__orch_system_spec__system_prompt_with_extensions_truncated.snap @@ -31,7 +31,12 @@ You are Forge -- For maximum efficiency, whenever you need to perform multiple independent operations, invoke all relevant tools (for eg: `patch`, `read`) simultaneously rather than sequentially. +- For maximum efficiency, whenever you need to perform multiple independent operations, invoke all relevant tools (for eg: `patch`, `read`) simultaneously rather than relying on `call_tool` to chain them. +- You only have three tools available directly: `tools_list`, `tools_info`, and `call_tool`. + - Use `tools_list` to discover which tools are available and their brief descriptions. + - Use `tools_info` to get the full JSON schema of a specific tool before calling it. + - Use `call_tool` to execute any tool by name with the appropriate arguments. +- Always look up a tool's schema via `tools_info` before calling it with `call_tool` to ensure you pass the correct arguments. - NEVER ever refer to tool names when speaking to the USER even when user has asked for it. For example, instead of saying 'I need to use the edit_file tool to edit your file', just say 'I will edit your file'. - If you need to read a file, prefer to read larger sections of the file at once over multiple smaller calls. diff --git a/crates/forge_app/src/snapshots/forge_app__tool_registry__all_rendered_tool_descriptions.snap b/crates/forge_app/src/snapshots/forge_app__tool_registry__all_rendered_tool_descriptions.snap index 9ff97b35..4732a31d 100644 --- a/crates/forge_app/src/snapshots/forge_app__tool_registry__all_rendered_tool_descriptions.snap +++ b/crates/forge_app/src/snapshots/forge_app__tool_registry__all_rendered_tool_descriptions.snap @@ -512,3 +512,21 @@ Since the user is greeting, use the greeting-responder agent to respond with a f assistant: "I'm going to use the task tool to launch the greeting-responder agent" + +--- + +### tools_list + +Lists all available tool names with brief descriptions. Use this tool to discover which tools are available to call. Returns a JSON object with tool names and brief descriptions. + +--- + +### tools_info + +Returns the full ToolDefinition (including input schema, description, and name) for a specific tool. Use this after 'tools_list' to get the complete schema for a tool you want to call. The tool_name should match one of the names returned by 'tools_list'. + +--- + +### call_tool + +Calls any tool by name with the provided arguments. Use this to execute a tool after looking up its schema via 'tools_info'. Pass the tool_name (the name of the tool to call) and arguments (a JSON object matching the tool's input schema). The tool will be executed and its output will be returned. diff --git a/crates/forge_app/src/tool_executor.rs b/crates/forge_app/src/tool_executor.rs index 0fa02482..e40892ea 100644 --- a/crates/forge_app/src/tool_executor.rs +++ b/crates/forge_app/src/tool_executor.rs @@ -332,9 +332,12 @@ impl< let todos = context.get_todos()?; ToolOperation::TodoRead { output: todos } } - ToolCatalog::Task(_) => { - // Task tools are handled in ToolRegistry before reaching here - unreachable!("Task tool should be handled in ToolRegistry") + ToolCatalog::Task(_) + | ToolCatalog::ToolsList(_) + | ToolCatalog::ToolsInfo(_) + | ToolCatalog::CallTool(_) => { + // Task and meta-tools are handled in ToolRegistry before reaching here + unreachable!("Meta-tools should be handled in ToolRegistry") } }) } diff --git a/crates/forge_app/src/tool_registry.rs b/crates/forge_app/src/tool_registry.rs index 18f02f43..97eb70bc 100644 --- a/crates/forge_app/src/tool_registry.rs +++ b/crates/forge_app/src/tool_registry.rs @@ -168,6 +168,45 @@ impl> ToolReg return Ok(ToolOutput::from(outputs.into_iter())); } + // Meta-tool: tools_list - returns all available tool names with descriptions + if let ToolCatalog::ToolsList(_) = &tool_input { + let definitions = self.list().await?; + let tools_json: Vec = definitions + .iter() + .map(|d| { + serde_json::json!({ + "name": d.name.as_str(), + "description": d.description, + }) + }) + .collect(); + let output = serde_json::to_string_pretty(&tools_json)?; + return Ok(ToolOutput::text(output)); + } + + // Meta-tool: tools_info - returns full definition of a specific tool + if let ToolCatalog::ToolsInfo(input) = &tool_input { + let definitions = self.list().await?; + let def = definitions + .into_iter() + .find(|d| d.name.as_str() == input.tool_name) + .ok_or_else(|| Error::NotFound(ToolName::new(&input.tool_name)))?; + let output = serde_json::to_string_pretty(&def)?; + return Ok(ToolOutput::text(output)); + } + + // Meta-tool: call_tool - routes to an actual tool by name + if let ToolCatalog::CallTool(input) = &tool_input { + let tool_name = ToolName::new(&input.tool_name); + let tool_call = ToolCallFull { + name: tool_name, + arguments: forge_domain::ToolCallArguments::from(input.arguments.clone()), + call_id: None, + thought_signature: None, + }; + return Box::pin(self.call_inner(agent, tool_call, context)).await; + } + let env = self.services.get_environment(); if let Some(content) = tool_input.to_content(&env) { context.send(content).await?; @@ -394,11 +433,24 @@ impl ToolRegistry { .collect::>() } + /// Returns true if the tool name is a meta-tool (tools_list, tools_info, call_tool). + /// Meta-tools are always available regardless of agent tool configuration. + fn is_meta_tool(tool_name: &ToolName) -> bool { + let name = tool_name.as_str().trim().to_lowercase(); + matches!(name.as_str(), "tools_list" | "tools_info" | "call_tool") + } + /// Validates if a tool is supported by both the agent and the system. /// /// # Validation Process - /// Verifies the tool is supported by the agent specified in the context + /// Verifies the tool is supported by the agent specified in the context. + /// Meta-tools (tools_list, tools_info, call_tool) are always allowed. fn validate_tool_call(agent: &Agent, tool_name: &ToolName) -> Result<(), Error> { + // Meta-tools are always allowed regardless of agent configuration + if Self::is_meta_tool(tool_name) { + return Ok(()); + } + // Check if tool matches any pattern (supports globs like "mcp_*") let matches = ToolResolver::is_allowed(agent, tool_name); if !matches { @@ -691,6 +743,42 @@ mod tests { assert!(actual.is_err()); } + #[test] + fn test_meta_tools_bypass_validate_tool_call() { + // Meta-tools should bypass tool validation even when the agent has no tools + let fixture = Agent::new( + AgentId::new("test_agent"), + ProviderId::ANTHROPIC, + ModelId::new("claude-3-5-sonnet-20241022"), + ); + + assert!(ToolRegistry::<()>::validate_tool_call(&fixture, &ToolName::new("tools_list")).is_ok()); + assert!(ToolRegistry::<()>::validate_tool_call(&fixture, &ToolName::new("tools_info")).is_ok()); + assert!(ToolRegistry::<()>::validate_tool_call(&fixture, &ToolName::new("call_tool")).is_ok()); + + // Regular tool should still fail + assert!(ToolRegistry::<()>::validate_tool_call(&fixture, &ToolName::new("read")).is_err()); + } + + #[test] + fn test_is_meta_tool_detects_all_meta_tools() { + assert!(ToolRegistry::<()>::is_meta_tool(&ToolName::new("tools_list"))); + assert!(ToolRegistry::<()>::is_meta_tool(&ToolName::new("tools_info"))); + assert!(ToolRegistry::<()>::is_meta_tool(&ToolName::new("call_tool"))); + + // Non-meta tools should not be detected + assert!(!ToolRegistry::<()>::is_meta_tool(&ToolName::new("read"))); + assert!(!ToolRegistry::<()>::is_meta_tool(&ToolName::new("write"))); + assert!(!ToolRegistry::<()>::is_meta_tool(&ToolName::new("shell"))); + } + + #[test] + fn test_meta_tools_case_insensitive() { + assert!(ToolRegistry::<()>::is_meta_tool(&ToolName::new("TOOLS_LIST"))); + assert!(ToolRegistry::<()>::is_meta_tool(&ToolName::new("Tools_Info"))); + assert!(ToolRegistry::<()>::is_meta_tool(&ToolName::new("Call_Tool"))); + } + #[test] fn test_validate_tool_call_glob_with_prefix_suffix() { let fixture = Agent::new( diff --git a/crates/forge_app/src/transformers/strip_working_dir.rs b/crates/forge_app/src/transformers/strip_working_dir.rs index dcb69112..0cad8978 100644 --- a/crates/forge_app/src/transformers/strip_working_dir.rs +++ b/crates/forge_app/src/transformers/strip_working_dir.rs @@ -87,7 +87,8 @@ impl Transformer for StripWorkingDir { | SummaryTool::Task { .. } | SummaryTool::Mcp { .. } | SummaryTool::TodoWrite { .. } - | SummaryTool::TodoRead => { + | SummaryTool::TodoRead + | SummaryTool::MetaTools { .. } => { // These tools don't have paths to strip } } diff --git a/crates/forge_app/src/transformers/trim_context_summary.rs b/crates/forge_app/src/transformers/trim_context_summary.rs index 9207c77b..cc2262ee 100644 --- a/crates/forge_app/src/transformers/trim_context_summary.rs +++ b/crates/forge_app/src/transformers/trim_context_summary.rs @@ -39,6 +39,8 @@ enum Operation<'a> { Mcp(&'a str), /// Todo operation - each todo_write is unique and won't be deduplicated Todo, + /// Meta-tool operation (tools_list, tools_info, call_tool) - each is unique + MetaTool, } /// Converts the tool call to its operation type for comparison. @@ -62,6 +64,7 @@ fn to_op(tool: &SummaryTool) -> Operation<'_> { SummaryTool::Mcp { name } => Operation::Mcp(name), SummaryTool::TodoWrite { .. } => Operation::Todo, SummaryTool::TodoRead => Operation::Todo, + SummaryTool::MetaTools { .. } => Operation::MetaTool, } } diff --git a/crates/forge_domain/src/compact/summary.rs b/crates/forge_domain/src/compact/summary.rs index 3416dfdb..16741a8e 100644 --- a/crates/forge_domain/src/compact/summary.rs +++ b/crates/forge_domain/src/compact/summary.rs @@ -195,6 +195,7 @@ pub enum SummaryTool { Mcp { name: String }, TodoWrite { changes: Vec }, TodoRead, + MetaTools { action: String }, } /// The kind of change applied to a todo item @@ -407,6 +408,15 @@ fn extract_tool_info(call: &ToolCallFull, current_todos: &[Todo]) -> Option Some(SummaryTool::TodoRead), ToolCatalog::Task(input) => Some(SummaryTool::Task { agent_id: input.agent_id }), + ToolCatalog::ToolsList(_) => Some(SummaryTool::MetaTools { + action: "list".to_string(), + }), + ToolCatalog::ToolsInfo(input) => Some(SummaryTool::MetaTools { + action: format!("info: {}", input.tool_name), + }), + ToolCatalog::CallTool(input) => Some(SummaryTool::MetaTools { + action: format!("call: {}", input.tool_name), + }), }; } diff --git a/crates/forge_domain/src/tools/catalog.rs b/crates/forge_domain/src/tools/catalog.rs index 5a0c4f92..56a7c0b3 100644 --- a/crates/forge_domain/src/tools/catalog.rs +++ b/crates/forge_domain/src/tools/catalog.rs @@ -58,6 +58,64 @@ pub enum ToolCatalog { TodoRead(TodoRead), #[serde(alias = "Task")] Task(TaskInput), + /// Meta-tool: Lists all available tool names with brief descriptions. + /// Use this to discover which tools are available before calling one. + #[serde(alias = "ToolsList")] + ToolsList(ToolsListInput), + /// Meta-tool: Returns the full ToolDefinition (including input schema) for + /// a specific tool. Use this after `tools_list` to get the complete schema + /// for a tool you want to call. + #[serde(alias = "ToolsInfo")] + ToolsInfo(ToolsInfoInput), + /// Meta-tool: Calls any tool by name with the provided arguments. + /// Use this to execute a tool after looking up its schema via `tools_info`. + /// The `arguments` field must match the input schema of the tool being called. + #[serde(alias = "CallTool")] + CallTool(CallToolInput), +} + +/// Meta-tool: Lists all available tool names with brief descriptions. +/// Use this to discover which tools are available before calling one. +#[derive(Default, Debug, Clone, Serialize, Deserialize, JsonSchema, PartialEq)] +pub struct ToolsListInput {} + +/// Meta-tool: Returns the full ToolDefinition (including input schema) for +/// a specific tool. Use this after `tools_list` to get the complete schema +/// for a tool you want to call. +#[derive(Default, Debug, Clone, Serialize, Deserialize, JsonSchema, PartialEq)] +pub struct ToolsInfoInput { + /// The name of the tool to get information about (e.g., "read", "write", + /// "shell") + pub tool_name: String, +} + +/// Meta-tool: Calls any tool by name with the provided arguments. +/// Use this to execute a tool after looking up its schema via `tools_info`. +/// The `arguments` field must match the input schema of the tool being called. +#[derive(Default, Debug, Clone, Serialize, Deserialize, JsonSchema, PartialEq)] +pub struct CallToolInput { + /// The name of the tool to call (e.g., "read", "write", "shell") + pub tool_name: String, + /// The arguments to pass to the tool, matching its input schema. + pub arguments: serde_json::Value, +} + +impl ToolDescription for ToolsListInput { + fn description(&self) -> String { + "Lists all available tool names with brief descriptions. Use this tool to discover which tools are available to call. Returns a JSON object with tool names and brief descriptions.".to_string() + } +} + +impl ToolDescription for ToolsInfoInput { + fn description(&self) -> String { + "Returns the full ToolDefinition (including input schema, description, and name) for a specific tool. Use this after 'tools_list' to get the complete schema for a tool you want to call. The tool_name should match one of the names returned by 'tools_list'.".to_string() + } +} + +impl ToolDescription for CallToolInput { + fn description(&self) -> String { + "Calls any tool by name with the provided arguments. Use this to execute a tool after looking up its schema via 'tools_info'. Pass the tool_name (the name of the tool to call) and arguments (a JSON object matching the tool's input schema). The tool will be executed and its output will be returned.".to_string() + } } /// Input structure for agent tool calls. This serves as the generic schema @@ -95,7 +153,7 @@ pub struct TaskInput { /// Optional model override for this subagent run. When set, the spawned /// agent will use this model id instead of its configured default. The /// model must belong to a provider the user has already authenticated - /// with — unauthenticated models are rejected. Use this when the user + /// with — unaunthenticated models are rejected. Use this when the user /// explicitly requests a specific model for a subagent (e.g. "spawn a /// subagent with gpt-5.4-medium to do X"). #[serde(skip_serializing_if = "Option::is_none")] @@ -835,6 +893,9 @@ impl ToolDescription for ToolCatalog { ToolCatalog::TodoWrite(v) => v.description(), ToolCatalog::TodoRead(v) => v.description(), ToolCatalog::Task(v) => v.description(), + ToolCatalog::ToolsList(v) => v.description(), + ToolCatalog::ToolsInfo(v) => v.description(), + ToolCatalog::CallTool(v) => v.description(), } } } @@ -894,6 +955,9 @@ impl ToolCatalog { ToolCatalog::Task(_) => r#gen.into_root_schema_for::(), ToolCatalog::TodoWrite(_) => r#gen.into_root_schema_for::(), ToolCatalog::TodoRead(_) => r#gen.into_root_schema_for::(), + ToolCatalog::ToolsList(_) => r#gen.into_root_schema_for::(), + ToolCatalog::ToolsInfo(_) => r#gen.into_root_schema_for::(), + ToolCatalog::CallTool(_) => r#gen.into_root_schema_for::(), }; // Apply transform to add nullable property and remove null from type @@ -1019,7 +1083,10 @@ impl ToolCatalog { | ToolCatalog::Skill(_) | ToolCatalog::TodoWrite(_) | ToolCatalog::TodoRead(_) - | ToolCatalog::Task(_) => None, + | ToolCatalog::Task(_) + | ToolCatalog::ToolsList(_) + | ToolCatalog::ToolsInfo(_) + | ToolCatalog::CallTool(_) => None, } } @@ -1136,6 +1203,24 @@ impl ToolCatalog { pub fn kind(&self) -> ToolKind { self.clone().into() } + + /// Returns only the meta-tool definitions (tools_list, tools_info, call_tool). + /// These are the tool definitions that should be sent to LLM providers + /// instead of the full tool list, saving tokens on tool schema definitions. + pub fn meta_tool_definitions() -> Vec { + use strum::IntoEnumIterator; + ToolCatalog::iter() + .filter(|tool| { + matches!( + tool, + ToolCatalog::ToolsList(_) + | ToolCatalog::ToolsInfo(_) + | ToolCatalog::CallTool(_) + ) + }) + .map(|tool| tool.definition()) + .collect() + } } fn format_display_path(path: &Path, cwd: &Path) -> String { @@ -1925,4 +2010,215 @@ mod tests { ); assert!(matches!(actual.unwrap(), ToolCatalog::Patch(_))); } + + #[test] + fn test_meta_tool_definitions_are_correct() { + let definitions = ToolCatalog::meta_tool_definitions(); + + // Should have exactly 3 meta-tools + assert_eq!(definitions.len(), 3); + + // All should have non-empty descriptions + let names: Vec<&str> = definitions.iter().map(|d| d.name.as_str()).collect(); + assert!(names.contains(&"tools_list")); + assert!(names.contains(&"tools_info")); + assert!(names.contains(&"call_tool")); + + for def in &definitions { + assert!(!def.description.is_empty(), "Meta-tool '{}' should have a description", def.name); + } + } + + #[test] + fn test_meta_tool_definitions_have_schemas() { + let definitions = ToolCatalog::meta_tool_definitions(); + + for def in &definitions { + let schema = &def.input_schema; + let schema_str = serde_json::to_string(schema).unwrap(); + + assert!( + schema_str.contains("\"type\""), + "Meta-tool '{}' should have a JSON schema with a 'type' field: {}", + def.name, + schema_str + ); + } + } + + /// Functional test: simulate the LLM sending a `tools_list` call via the + /// standard ToolCallFull -> TryFrom pipeline. + #[test] + fn test_meta_tool_tools_list_parse() { + use crate::{ToolCallArguments, ToolCallFull}; + + // This is exactly what the LLM sends to call tools_list + let tool_call = ToolCallFull { + name: ToolName::new("tools_list"), + call_id: Some("call_1".into()), + arguments: ToolCallArguments::from_json(r#"{}"#), + thought_signature: None, + }; + + let actual = ToolCatalog::try_from(tool_call); + assert!(actual.is_ok(), "tools_list should parse successfully"); + assert!(matches!(actual.unwrap(), ToolCatalog::ToolsList(_))); + } + + /// Functional test: simulate the LLM sending a `tools_info` call via the + /// standard pipeline. + #[test] + fn test_meta_tool_tools_info_parse() { + use crate::{ToolCallArguments, ToolCallFull}; + + let tool_call = ToolCallFull { + name: ToolName::new("tools_info"), + call_id: Some("call_2".into()), + arguments: ToolCallArguments::from_json(r#"{"tool_name": "read"}"#), + thought_signature: None, + }; + + let actual = ToolCatalog::try_from(tool_call); + assert!(actual.is_ok(), "tools_info should parse successfully"); + + if let Ok(ToolCatalog::ToolsInfo(info)) = actual { + assert_eq!(info.tool_name, "read"); + } else { + panic!("Expected ToolsInfo variant"); + } + } + + /// Functional test: simulate the LLM sending a `call_tool` invocation via + /// the standard pipeline. This is the critical path — the LLM calls + /// `call_tool` to invoke an underlying tool. + #[test] + fn test_meta_tool_call_tool_parse() { + use crate::{ToolCallArguments, ToolCallFull}; + + let tool_call = ToolCallFull { + name: ToolName::new("call_tool"), + call_id: Some("call_3".into()), + arguments: ToolCallArguments::from_json( + r#"{"tool_name": "read", "arguments": {"file_path": "/test/file.rs", "show_line_numbers": true}}"#, + ), + thought_signature: None, + }; + + let actual = ToolCatalog::try_from(tool_call); + assert!(actual.is_ok(), "call_tool should parse successfully"); + + if let Ok(ToolCatalog::CallTool(input)) = actual { + assert_eq!(input.tool_name, "read", "Should extract tool_name"); + assert_eq!( + input.arguments.get("file_path").and_then(|v| v.as_str()), + Some("/test/file.rs"), + "Should extract inner arguments" + ); + assert_eq!( + input.arguments.get("show_line_numbers").and_then(|v| v.as_bool()), + Some(true), + "Should extract boolean arguments" + ); + } else { + panic!("Expected CallTool variant"); + } + } + + /// Functional test: test that call_tool can round-trip with complex nested + /// arguments matching real tool schemas (e.g. multi_patch edits). + #[test] + fn test_meta_tool_call_tool_complex_arguments() { + use crate::{ToolCallArguments, ToolCallFull}; + + let tool_call = ToolCallFull { + name: ToolName::new("call_tool"), + call_id: Some("call_4".into()), + arguments: ToolCallArguments::from_json( + r#"{ + "tool_name": "multi_patch", + "arguments": { + "file_path": "/test/file.rs", + "edits": [ + {"old_string": "foo", "new_string": "bar"}, + {"old_string": "baz", "new_string": "qux", "replace_all": true} + ] + } + }"#, + ), + thought_signature: None, + }; + + let actual = ToolCatalog::try_from(tool_call); + assert!(actual.is_ok(), "call_tool with complex args should parse"); + + if let Ok(ToolCatalog::CallTool(input)) = actual { + assert_eq!(input.tool_name, "multi_patch"); + // Verify the inner arguments are preserved as a JSON object + let inner = &input.arguments; + assert_eq!( + inner.get("file_path").and_then(|v| v.as_str()), + Some("/test/file.rs") + ); + let edits = inner.get("edits").and_then(|v| v.as_array()); + assert!(edits.is_some(), "Should have edits array"); + assert_eq!(edits.unwrap().len(), 2, "Should have 2 edits"); + } else { + panic!("Expected CallTool variant"); + } + } + + /// Functional test: verify the serde tag-based deserialization of + /// ToolCatalog works for meta-tools directly from JSON (like what the + /// provider sends). + #[test] + fn test_meta_tool_serde_deserialize() { + // Direct JSON deserialization (simulating provider parsing) + let tools_list: ToolCatalog = + serde_json::from_str(r#"{"name": "tools_list", "arguments": {}}"#).unwrap(); + assert!(matches!(tools_list, ToolCatalog::ToolsList(_))); + + let tools_info: ToolCatalog = + serde_json::from_str(r#"{"name": "tools_info", "arguments": {"tool_name": "shell"}}"#) + .unwrap(); + if let ToolCatalog::ToolsInfo(info) = tools_info { + assert_eq!(info.tool_name, "shell"); + } else { + panic!("Expected ToolsInfo"); + } + + let call_tool: ToolCatalog = serde_json::from_str( + r#"{"name": "call_tool", "arguments": {"tool_name": "write", "arguments": {"file_path": "/test.txt", "content": "hi"}}}"#, + ) + .unwrap(); + if let ToolCatalog::CallTool(input) = call_tool { + assert_eq!(input.tool_name, "write"); + assert_eq!( + input.arguments.get("content").and_then(|v| v.as_str()), + Some("hi") + ); + } else { + panic!("Expected CallTool"); + } + } + + /// Functional test: verify `meta_tool_definitions()` definitions can be + /// serialized to wire format and back. + #[test] + fn test_meta_tool_definition_round_trip() { + let definitions = ToolCatalog::meta_tool_definitions(); + + for def in &definitions { + // Serialize the full definition to JSON (what gets sent to provider) + let json = serde_json::to_string(def).expect("Should serialize"); + // Deserialize back + let back: crate::ToolDefinition = + serde_json::from_str(&json).expect("Should deserialize"); + assert_eq!(def.name, back.name, "Name should round-trip for {}", def.name); + assert_eq!( + def.description, back.description, + "Description should round-trip for {}", + def.name + ); + } + } } diff --git a/crates/forge_domain/src/tools/definition/snapshots/forge_domain__tools__definition__usage__tests__tool_usage.snap b/crates/forge_domain/src/tools/definition/snapshots/forge_domain__tools__definition__usage__tests__tool_usage.snap index 5c20b9c8..43a97d62 100644 --- a/crates/forge_domain/src/tools/definition/snapshots/forge_domain__tools__definition__usage__tests__tool_usage.snap +++ b/crates/forge_domain/src/tools/definition/snapshots/forge_domain__tools__definition__usage__tests__tool_usage.snap @@ -17,4 +17,7 @@ expression: prompt {"name":"skill","description":"Fetches detailed information about a specific skill. Use this tool to load skill content and instructions when you need to understand how to perform a specialized task. Skills provide domain-specific knowledge, workflows, and best practices. Only invoke skills that are listed in the available skills section. Do not invoke a skill that is already active.","arguments":{"name":{"description":"The name of the skill to fetch (e.g., \"pdf\", \"code_review\")","type":"string","is_required":true}}} {"name":"todo_write","description":"Use this tool to create and manage a structured task list for your current coding session. This helps you track progress, organize complex tasks, and demonstrate thoroughness to the user.\nIt also helps the user understand the progress of the task and overall progress of their requests.\n\n## How It Works\n\nEach call sends only the items that changed — you do not need to repeat the whole list.\n\nEach item has two required fields:\n- `content`: The task description. This is the **unique key** — the server matches on content to decide whether to add or update.\n- `status`: One of `pending`, `in_progress`, `completed`, or `cancelled`.\n\n**Rules:**\n- Item with this `content` does **not** exist yet → **added** as a new task.\n- Item with this `content` already exists → its `status` is **updated**.\n- `status: cancelled` → the item is **removed** from the list entirely.\n- Items you do not mention are **left unchanged**.\n\nIDs are managed internally by the system and are never exposed to you.\n\n## When to Use This Tool\nUse this tool proactively in these scenarios:\n\n1. Complex multi-step tasks - When a task requires 3 or more distinct steps or actions\n2. Non-trivial and complex tasks - Tasks that require careful planning or multiple operations\n3. User explicitly requests todo list - When the user directly asks you to use the todo list\n4. User provides multiple tasks - When users provide a list of things to be done (numbered or comma-separated)\n5. After receiving new instructions - Immediately capture user requirements as todos\n6. When you start working on a task - Mark it as in_progress BEFORE beginning work. Ideally you should only have one todo as in_progress at a time\n7. After completing a task - Mark it as completed and add any new follow-up tasks discovered during implementation\n\n## When NOT to Use This Tool\n\nSkip using this tool when:\n1. There is only a single, straightforward task\n2. The task is trivial and tracking it provides no organizational benefit\n3. The task can be completed in less than 3 trivial steps\n4. The task is purely conversational or informational\n\nNOTE that you should not use this tool if there is only one trivial task to do. In this case you are better off just doing the task directly.\n\n## Examples of When to Use the Todo List\n\n\nUser: I want to add a dark mode toggle to the application settings. Make sure you run the tests and build when you're done!\nAssistant: I'll help add a dark mode toggle to your application settings. Let me create a todo list to track this implementation.\n*Creates todo list with the following items:*\n1. Creating dark mode toggle component in Settings page\n2. Adding dark mode state management (context/store)\n3. Implementing CSS-in-JS styles for dark theme\n4. Updating existing components to support theme switching\n5. Running tests and build process, addressing any failures or errors that occur\n*Begins working on the first task*\n\n\nThe assistant used the todo list because:\n1. Adding dark mode is a multi-step feature requiring UI, state management, and styling changes\n2. The user explicitly requested tests and build be run afterward\n3. The assistant inferred that tests and build need to pass by adding \"Ensure tests and build succeed\" as the final task\n\n\n\n\nUser: Help me rename the function getCwd to getCurrentWorkingDirectory across my project\nAssistant: Let me first search through your codebase to find all occurrences of 'getCwd'.\n*Uses grep or search tools to locate all instances of getCwd in the codebase*\nAssistant: I've found 15 instances of 'getCwd' across 8 different files. Let me create a todo list to track these changes.\n*Creates todo list with specific items for each file that needs updating*\n\n\nThe assistant used the todo list because:\n1. First, the assistant searched to understand the scope of the task\n2. Upon finding multiple occurrences across different files, it determined this was a complex task with multiple steps\n3. The todo list helps ensure every instance is tracked and updated systematically\n4. This approach prevents missing any occurrences and maintains code consistency\n\n\n\n\n\nUser: I need to implement these features for my e-commerce site: user registration, product catalog, shopping cart, and checkout flow.\nAssistant: I'll help implement these features. First, let's add all the features to the todo list.\n*Creates a todo list breaking down each feature into specific tasks based on the project architecture*\nAssistant: Let's start with user registration. This will involve creating database models, API endpoints, and frontend forms that integrate with your existing authentication system.\n\n\nThe assistant used the todo list because:\n1. The user provided multiple complex features to implement in a comma separated list\n2. The todo list helps organize these large features into manageable tasks\n3. This approach allows for tracking progress across the entire implementation\n\n\n\n\nUser: Can you help optimize my React application? It's rendering slowly and has performance issues.\nAssistant: I'll help optimize your React application. First, let me examine your codebase to identify potential performance bottlenecks.\n*Reviews component structure, render patterns, state management, and data fetching*\nAssistant: After analyzing your codebase, I've identified several performance issues. Let me create a todo list to track our optimization efforts.\n*Creates todo list with items like: 1) Implementing memoization for expensive calculations in ProductList, 2) Adding virtualization for long lists in Dashboard, 3) Optimizing image loading in Gallery component, 4) Fixing state update loops in ShoppingCart, 5) Reviewing bundle size and implementing code splitting*\nLet's start by implementing memoization for the expensive calculations in your ProductList component.\n\n\nThe assistant used the todo list because:\n1. First, the assistant examined the codebase to identify specific performance issues\n2. Based on this analysis, it identified multiple optimization opportunities\n3. Performance optimization is a non-trivial task requiring multiple steps\n4. The todo list helps methodically track improvements across different components\n5. This systematic approach ensures all performance bottlenecks are addressed\n\n\n\n## Examples of When NOT to Use the Todo List\n\n\nUser: How do I print 'Hello World' in Python?\nAssistant: In Python, you can print \"Hello World\" with this simple code:\n\npython\nprint(\"Hello World\")\n\nThis will output the text \"Hello World\" to the console when executed.\n\n\nThe assistant did not use the todo list because this is a single, trivial task that can be completed in one step. There's no need to track multiple tasks or steps for such a straightforward request.\n\n\n\n\nUser: What does the git status command do?\nAssistant: The git status command shows the current state of your working directory and staging area. It displays which changes have been staged, which haven't, and which files aren't being tracked by Git. This is a fundamental command for understanding the current state of your repository before making commits.\n\n\nThe assistant did not use the todo list because this is an informational request with no actual coding task to complete. The user is simply asking for an explanation, not for the assistant to perform multiple steps or tasks.\n\n\n\n\nUser: Can you add a comment to the calculateTotal function to explain what it does?\nAssistant: Sure, let me add a comment to the calculateTotal function to explain what it does.\n* Uses the patch tool to add a comment to the calculateTotal function *\n\n\nThe assistant did not use the todo list because this is a single, straightforward task confined to one location in the code. Adding a comment doesn't require tracking multiple steps or systematic organization.\n\n\n\n\nUser: Run npm install for me and tell me what happens.\nAssistant: I'll run the npm install command for you.\n\n*Executes: npm install*\n\nThe command completed successfully. Here's the output:\n[Output of npm install command]\n\nAll dependencies have been installed according to your package.json file.\n\n\nThe assistant did not use the todo list because this is a single command execution with immediate results. There are no multiple steps to track or organize, making the todo list unnecessary for this straightforward task.\n\n\n\n## Task States and Management\n\n1. **Task States**: Use these states to track progress:\n - `pending`: Task not yet started\n - `in_progress`: Currently working on (limit to ONE task at a time)\n - `completed`: Task finished successfully\n - `cancelled`: Task is no longer relevant — this removes it from the list\n\n2. **Task Management**:\n - Only send the items that changed — do not repeat unchanged items\n - Mark tasks `in_progress` BEFORE beginning work\n - Mark tasks `completed` IMMEDIATELY after finishing (don't batch completions)\n - Exactly ONE task must be `in_progress` at any time\n - Use `cancelled` to remove tasks that are no longer relevant\n - Complete current tasks before starting new ones\n\n3. **Task Completion Requirements**:\n - ONLY mark a task as `completed` when you have FULLY accomplished it\n - If you encounter errors, blockers, or cannot finish, keep the task as `in_progress`\n - When blocked, create a new task describing what needs to be resolved\n - Never mark a task as `completed` if:\n - Tests are failing\n - Implementation is partial\n - You encountered unresolved errors\n - You couldn't find necessary files or dependencies\n\n4. **Task Breakdown**:\n - Create specific, actionable items\n - Break complex tasks into smaller, manageable steps\n - Use clear, descriptive task names\n\nWhen in doubt, use this tool. Being proactive with task management demonstrates attentiveness and ensures you complete all requirements successfully.","arguments":{"todos":{"description":"List of todo items to create or update. Each item must have `content`\nand `status`. The server matches on `content` — if an item with the\nsame content exists it is updated; otherwise a new item is added.\nSet `status` to `cancelled` to remove an item.","type":"array","is_required":true}}} {"name":"todo_read","description":"Retrieves the current todo list for this coding session. Use this tool to check existing todos before making updates, or to review the current state of tasks at any point during the session.\n\n## When to Use This Tool\n\n- Before calling `todo_write`, to understand which tasks already exist and avoid duplicates\n- When you need to know what tasks are pending, in progress, or completed\n- To resume work after a break and understand the current state of tasks\n- When the user asks about the current task list or progress\n\n## Output\n\nReturns all current todos with their IDs, content, and status (`pending`, `in_progress`, `completed`). If no todos exist yet, returns an empty list.","arguments":{}} -{"name":"task","description":"Launch a new agent to handle complex, multi-step tasks autonomously. \n\nThe {{tool_names.task}} tool launches specialized agents (subprocesses) that autonomously handle complex tasks. Each agent type has specific capabilities and tools available to it.\n\nAvailable agent types and the tools they have access to:\n{{#each agents}}\n- **{{id}}**{{#if description}}: {{description}}{{/if}}{{#if tools}}\n - Tools: {{#each tools}}{{this}}{{#unless @last}}, {{/unless}}{{/each}}{{/if}}\n{{/each}}\n\nWhen using the {{tool_names.task}} tool, you must specify a agent_id parameter to select which agent type to use.\n\nWhen NOT to use the {{tool_names.task}} tool:\n- If you want to read a specific file path, use the {{tool_names.read}} or {{tool_names.fs_search}} tool instead of the {{tool_names.task}} tool, to find the match more quickly\n- If you are searching for a specific class definition like \"class Foo\", use the {{tool_names.fs_search}} tool instead, to find the match more quickly\n- If you are searching for code within a specific file or set of 2-3 files, use the {{tool_names.read}} tool instead of the {{tool_names.task}} tool, to find the match more quickly\n- Other tasks that are not related to the agent descriptions above\n\n\nUsage notes:\n- Always include a short description (3-5 words) summarizing what the agent will do\n- Launch multiple agents concurrently whenever possible, to maximize performance; to do that, use a single message with multiple tool uses\n- When the agent is done, it will return a single message back to you. The result returned by the agent is not visible to the user. To show the user the result, you should send a text message back to the user with a concise summary of the result.\n- Agents can be resumed using the \\`session_id\\` parameter by passing the agent ID from a previous invocation. When resumed, the agent continues with its full previous context preserved. When NOT resuming, each invocation starts fresh and you should provide a detailed task description with all necessary context.\n- When the agent is done, it will return a single message back to you along with its agent ID. You can use this ID to resume the agent later if needed for follow-up work.\n- Provide clear, detailed prompts so the agent can work autonomously and return exactly the information you need.\n- Agents with \"access to current context\" can see the full conversation history before the tool call. When using these agents, you can write concise prompts that reference earlier context (e.g., \"investigate the error discussed above\") instead of repeating information. The agent will receive all prior messages and understand the context.\n- The agent's outputs should generally be trusted\n- Clearly tell the agent whether you expect it to write code or just to do research (search, file reads, web fetches, etc.), since it is not aware of the user's intent\n- If the agent description mentions that it should be used proactively, then you should try your best to use it without the user having to ask for it first. Use your judgement.\n- If the user specifies that they want you to run agents \"in parallel\", you MUST send a single message with multiple {{tool_names.task}} tool use content blocks. For example, if you need to launch both a build-validator agent and a test-runner agent in parallel, send a single message with both tool calls.\n- Optional `model` field: when the user explicitly asks for a specific model for the subagent (e.g. \"spawn a subagent with gpt-5.4-medium\" or \"use sonnet-4.6 for this delegation\"), set `model` to the requested model id. Otherwise OMIT it and let the agent use its default. The override is rejected if the model isn't on the agent's currently-authenticated provider.\n\nExample usage:\n\n\n\"test-runner\": use this agent after you are done writing code to run tests\n\"greeting-responder\": use this agent when to respond to user greetings with a friendly joke\n\n\n\nuser: \"Please write a function that checks if a number is prime\"\nassistant: Sure let me write a function that checks if a number is prime\nassistant: First let me use the {{tool_names.write}} tool to write a function that checks if a number is prime\nassistant: I'm going to use the {{tool_names.write}} tool to write the following code:\n\nfunction isPrime(n) {\n if (n <= 1) return false\n for (let i = 2; i * i <= n; i++) {\n if (n % i === 0) return false\n }\n return true\n}\n\n\nSince a significant piece of code was written and the task was completed, now use the test-runner agent to run the tests\n\nassistant: Now let me use the test-runner agent to run the tests\nassistant: Uses the {{tool_names.task}} tool to launch the test-runner agent\n\n\n\nuser: \"Hello\"\n\nSince the user is greeting, use the greeting-responder agent to respond with a friendly joke\n\nassistant: \"I'm going to use the {{tool_names.task}} tool to launch the greeting-responder agent\"\n","arguments":{"agent_id":{"description":"The ID of the specialized agent to delegate to (e.g., \"forge\", \"muse\",\n\"sage\")","type":"string","is_required":true},"model":{"description":"Optional model override for this subagent run. When set, the spawned\nagent will use this model id instead of its configured default. The\nmodel must belong to a provider the user has already authenticated\nwith — unauthenticated models are rejected. Use this when the user\nexplicitly requests a specific model for a subagent (e.g. \"spawn a\nsubagent with gpt-5.4-medium to do X\").","type":"string","is_required":false},"session_id":{"description":"Optional session ID to continue an existing agent session. If not\nprovided, a new stateless session will be created. Use this to\nmaintain context across multiple task invocations with the same\nagent.","type":"string","is_required":false},"tasks":{"description":"A list of clear and detailed descriptions of the tasks to be performed\nby the agent in parallel. Provide sufficient context and specific\nrequirements to enable the agent to understand and execute the work\naccurately.","type":"array","is_required":true}}} +{"name":"task","description":"Launch a new agent to handle complex, multi-step tasks autonomously. \n\nThe {{tool_names.task}} tool launches specialized agents (subprocesses) that autonomously handle complex tasks. Each agent type has specific capabilities and tools available to it.\n\nAvailable agent types and the tools they have access to:\n{{#each agents}}\n- **{{id}}**{{#if description}}: {{description}}{{/if}}{{#if tools}}\n - Tools: {{#each tools}}{{this}}{{#unless @last}}, {{/unless}}{{/each}}{{/if}}\n{{/each}}\n\nWhen using the {{tool_names.task}} tool, you must specify a agent_id parameter to select which agent type to use.\n\nWhen NOT to use the {{tool_names.task}} tool:\n- If you want to read a specific file path, use the {{tool_names.read}} or {{tool_names.fs_search}} tool instead of the {{tool_names.task}} tool, to find the match more quickly\n- If you are searching for a specific class definition like \"class Foo\", use the {{tool_names.fs_search}} tool instead, to find the match more quickly\n- If you are searching for code within a specific file or set of 2-3 files, use the {{tool_names.read}} tool instead of the {{tool_names.task}} tool, to find the match more quickly\n- Other tasks that are not related to the agent descriptions above\n\n\nUsage notes:\n- Always include a short description (3-5 words) summarizing what the agent will do\n- Launch multiple agents concurrently whenever possible, to maximize performance; to do that, use a single message with multiple tool uses\n- When the agent is done, it will return a single message back to you. The result returned by the agent is not visible to the user. To show the user the result, you should send a text message back to the user with a concise summary of the result.\n- Agents can be resumed using the \\`session_id\\` parameter by passing the agent ID from a previous invocation. When resumed, the agent continues with its full previous context preserved. When NOT resuming, each invocation starts fresh and you should provide a detailed task description with all necessary context.\n- When the agent is done, it will return a single message back to you along with its agent ID. You can use this ID to resume the agent later if needed for follow-up work.\n- Provide clear, detailed prompts so the agent can work autonomously and return exactly the information you need.\n- Agents with \"access to current context\" can see the full conversation history before the tool call. When using these agents, you can write concise prompts that reference earlier context (e.g., \"investigate the error discussed above\") instead of repeating information. The agent will receive all prior messages and understand the context.\n- The agent's outputs should generally be trusted\n- Clearly tell the agent whether you expect it to write code or just to do research (search, file reads, web fetches, etc.), since it is not aware of the user's intent\n- If the agent description mentions that it should be used proactively, then you should try your best to use it without the user having to ask for it first. Use your judgement.\n- If the user specifies that they want you to run agents \"in parallel\", you MUST send a single message with multiple {{tool_names.task}} tool use content blocks. For example, if you need to launch both a build-validator agent and a test-runner agent in parallel, send a single message with both tool calls.\n- Optional `model` field: when the user explicitly asks for a specific model for the subagent (e.g. \"spawn a subagent with gpt-5.4-medium\" or \"use sonnet-4.6 for this delegation\"), set `model` to the requested model id. Otherwise OMIT it and let the agent use its default. The override is rejected if the model isn't on the agent's currently-authenticated provider.\n\nExample usage:\n\n\n\"test-runner\": use this agent after you are done writing code to run tests\n\"greeting-responder\": use this agent when to respond to user greetings with a friendly joke\n\n\n\nuser: \"Please write a function that checks if a number is prime\"\nassistant: Sure let me write a function that checks if a number is prime\nassistant: First let me use the {{tool_names.write}} tool to write a function that checks if a number is prime\nassistant: I'm going to use the {{tool_names.write}} tool to write the following code:\n\nfunction isPrime(n) {\n if (n <= 1) return false\n for (let i = 2; i * i <= n; i++) {\n if (n % i === 0) return false\n }\n return true\n}\n\n\nSince a significant piece of code was written and the task was completed, now use the test-runner agent to run the tests\n\nassistant: Now let me use the test-runner agent to run the tests\nassistant: Uses the {{tool_names.task}} tool to launch the test-runner agent\n\n\n\nuser: \"Hello\"\n\nSince the user is greeting, use the greeting-responder agent to respond with a friendly joke\n\nassistant: \"I'm going to use the {{tool_names.task}} tool to launch the greeting-responder agent\"\n","arguments":{"agent_id":{"description":"The ID of the specialized agent to delegate to (e.g., \"forge\", \"muse\",\n\"sage\")","type":"string","is_required":true},"model":{"description":"Optional model override for this subagent run. When set, the spawned\nagent will use this model id instead of its configured default. The\nmodel must belong to a provider the user has already authenticated\nwith — unaunthenticated models are rejected. Use this when the user\nexplicitly requests a specific model for a subagent (e.g. \"spawn a\nsubagent with gpt-5.4-medium to do X\").","type":"string","is_required":false},"session_id":{"description":"Optional session ID to continue an existing agent session. If not\nprovided, a new stateless session will be created. Use this to\nmaintain context across multiple task invocations with the same\nagent.","type":"string","is_required":false},"tasks":{"description":"A list of clear and detailed descriptions of the tasks to be performed\nby the agent in parallel. Provide sufficient context and specific\nrequirements to enable the agent to understand and execute the work\naccurately.","type":"array","is_required":true}}} +{"name":"tools_list","description":"Lists all available tool names with brief descriptions. Use this tool to discover which tools are available to call. Returns a JSON object with tool names and brief descriptions.","arguments":{}} +{"name":"tools_info","description":"Returns the full ToolDefinition (including input schema, description, and name) for a specific tool. Use this after 'tools_list' to get the complete schema for a tool you want to call. The tool_name should match one of the names returned by 'tools_list'.","arguments":{"tool_name":{"description":"The name of the tool to get information about (e.g., \"read\", \"write\",\n\"shell\")","type":"string","is_required":true}}} +{"name":"call_tool","description":"Calls any tool by name with the provided arguments. Use this to execute a tool after looking up its schema via 'tools_info'. Pass the tool_name (the name of the tool to call) and arguments (a JSON object matching the tool's input schema). The tool will be executed and its output will be returned.","arguments":{"arguments":{"description":"The arguments to pass to the tool, matching its input schema.","is_required":true},"tool_name":{"description":"The name of the tool to call (e.g., \"read\", \"write\", \"shell\")","type":"string","is_required":true}}} diff --git a/crates/forge_domain/src/tools/snapshots/forge_domain__tools__catalog__tests__tool_definition_json.snap b/crates/forge_domain/src/tools/snapshots/forge_domain__tools__catalog__tests__tool_definition_json.snap index 91c25157..e128d60e 100644 --- a/crates/forge_domain/src/tools/snapshots/forge_domain__tools__catalog__tests__tool_definition_json.snap +++ b/crates/forge_domain/src/tools/snapshots/forge_domain__tools__catalog__tests__tool_definition_json.snap @@ -1,6 +1,5 @@ --- source: crates/forge_domain/src/tools/catalog.rs -assertion_line: 1268 expression: tools --- { @@ -475,7 +474,7 @@ expression: tools "nullable": true }, "model": { - "description": "Optional model override for this subagent run. When set, the spawned\nagent will use this model id instead of its configured default. The\nmodel must belong to a provider the user has already authenticated\nwith — unauthenticated models are rejected. Use this when the user\nexplicitly requests a specific model for a subagent (e.g. \"spawn a\nsubagent with gpt-5.4-medium to do X\").", + "description": "Optional model override for this subagent run. When set, the spawned\nagent will use this model id instead of its configured default. The\nmodel must belong to a provider the user has already authenticated\nwith — unaunthenticated models are rejected. Use this when the user\nexplicitly requests a specific model for a subagent (e.g. \"spawn a\nsubagent with gpt-5.4-medium to do X\").", "type": "string", "nullable": true } @@ -485,3 +484,40 @@ expression: tools "agent_id" ] } +{ + "title": "ToolsListInput", + "description": "Meta-tool: Lists all available tool names with brief descriptions.\nUse this to discover which tools are available before calling one.", + "type": "object" +} +{ + "title": "ToolsInfoInput", + "description": "Meta-tool: Returns the full ToolDefinition (including input schema) for\na specific tool. Use this after `tools_list` to get the complete schema\nfor a tool you want to call.", + "type": "object", + "properties": { + "tool_name": { + "description": "The name of the tool to get information about (e.g., \"read\", \"write\",\n\"shell\")", + "type": "string" + } + }, + "required": [ + "tool_name" + ] +} +{ + "title": "CallToolInput", + "description": "Meta-tool: Calls any tool by name with the provided arguments.\nUse this to execute a tool after looking up its schema via `tools_info`.\nThe `arguments` field must match the input schema of the tool being called.", + "type": "object", + "properties": { + "tool_name": { + "description": "The name of the tool to call (e.g., \"read\", \"write\", \"shell\")", + "type": "string" + }, + "arguments": { + "description": "The arguments to pass to the tool, matching its input schema." + } + }, + "required": [ + "tool_name", + "arguments" + ] +} diff --git a/crates/forge_repo/src/provider/openai_responses/snapshots/forge_repo__provider__openai_responses__request__tests__openai_responses_all_catalog_tools.snap b/crates/forge_repo/src/provider/openai_responses/snapshots/forge_repo__provider__openai_responses__request__tests__openai_responses_all_catalog_tools.snap index cafb5f15..e5ed6e73 100644 --- a/crates/forge_repo/src/provider/openai_responses/snapshots/forge_repo__provider__openai_responses__request__tests__openai_responses_all_catalog_tools.snap +++ b/crates/forge_repo/src/provider/openai_responses/snapshots/forge_repo__provider__openai_responses__request__tests__openai_responses_all_catalog_tools.snap @@ -774,7 +774,7 @@ expression: actual.tools ] }, "model": { - "description": "Optional model override for this subagent run. When set, the spawned\nagent will use this model id instead of its configured default. The\nmodel must belong to a provider the user has already authenticated\nwith — unauthenticated models are rejected. Use this when the user\nexplicitly requests a specific model for a subagent (e.g. \"spawn a\nsubagent with gpt-5.4-medium to do X\").", + "description": "Optional model override for this subagent run. When set, the spawned\nagent will use this model id instead of its configured default. The\nmodel must belong to a provider the user has already authenticated\nwith — unaunthenticated models are rejected. Use this when the user\nexplicitly requests a specific model for a subagent (e.g. \"spawn a\nsubagent with gpt-5.4-medium to do X\").", "anyOf": [ { "type": "string" @@ -788,5 +788,66 @@ expression: actual.tools }, "strict": true, "description": "Launch a new agent to handle complex, multi-step tasks autonomously. \n\nThe {{tool_names.task}} tool launches specialized agents (subprocesses) that autonomously handle complex tasks. Each agent type has specific capabilities and tools available to it.\n\nAvailable agent types and the tools they have access to:\n{{#each agents}}\n- **{{id}}**{{#if description}}: {{description}}{{/if}}{{#if tools}}\n - Tools: {{#each tools}}{{this}}{{#unless @last}}, {{/unless}}{{/each}}{{/if}}\n{{/each}}\n\nWhen using the {{tool_names.task}} tool, you must specify a agent_id parameter to select which agent type to use.\n\nWhen NOT to use the {{tool_names.task}} tool:\n- If you want to read a specific file path, use the {{tool_names.read}} or {{tool_names.fs_search}} tool instead of the {{tool_names.task}} tool, to find the match more quickly\n- If you are searching for a specific class definition like \"class Foo\", use the {{tool_names.fs_search}} tool instead, to find the match more quickly\n- If you are searching for code within a specific file or set of 2-3 files, use the {{tool_names.read}} tool instead of the {{tool_names.task}} tool, to find the match more quickly\n- Other tasks that are not related to the agent descriptions above\n\n\nUsage notes:\n- Always include a short description (3-5 words) summarizing what the agent will do\n- Launch multiple agents concurrently whenever possible, to maximize performance; to do that, use a single message with multiple tool uses\n- When the agent is done, it will return a single message back to you. The result returned by the agent is not visible to the user. To show the user the result, you should send a text message back to the user with a concise summary of the result.\n- Agents can be resumed using the \\`session_id\\` parameter by passing the agent ID from a previous invocation. When resumed, the agent continues with its full previous context preserved. When NOT resuming, each invocation starts fresh and you should provide a detailed task description with all necessary context.\n- When the agent is done, it will return a single message back to you along with its agent ID. You can use this ID to resume the agent later if needed for follow-up work.\n- Provide clear, detailed prompts so the agent can work autonomously and return exactly the information you need.\n- Agents with \"access to current context\" can see the full conversation history before the tool call. When using these agents, you can write concise prompts that reference earlier context (e.g., \"investigate the error discussed above\") instead of repeating information. The agent will receive all prior messages and understand the context.\n- The agent's outputs should generally be trusted\n- Clearly tell the agent whether you expect it to write code or just to do research (search, file reads, web fetches, etc.), since it is not aware of the user's intent\n- If the agent description mentions that it should be used proactively, then you should try your best to use it without the user having to ask for it first. Use your judgement.\n- If the user specifies that they want you to run agents \"in parallel\", you MUST send a single message with multiple {{tool_names.task}} tool use content blocks. For example, if you need to launch both a build-validator agent and a test-runner agent in parallel, send a single message with both tool calls.\n- Optional `model` field: when the user explicitly asks for a specific model for the subagent (e.g. \"spawn a subagent with gpt-5.4-medium\" or \"use sonnet-4.6 for this delegation\"), set `model` to the requested model id. Otherwise OMIT it and let the agent use its default. The override is rejected if the model isn't on the agent's currently-authenticated provider.\n\nExample usage:\n\n\n\"test-runner\": use this agent after you are done writing code to run tests\n\"greeting-responder\": use this agent when to respond to user greetings with a friendly joke\n\n\n\nuser: \"Please write a function that checks if a number is prime\"\nassistant: Sure let me write a function that checks if a number is prime\nassistant: First let me use the {{tool_names.write}} tool to write a function that checks if a number is prime\nassistant: I'm going to use the {{tool_names.write}} tool to write the following code:\n\nfunction isPrime(n) {\n if (n <= 1) return false\n for (let i = 2; i * i <= n; i++) {\n if (n % i === 0) return false\n }\n return true\n}\n\n\nSince a significant piece of code was written and the task was completed, now use the test-runner agent to run the tests\n\nassistant: Now let me use the test-runner agent to run the tests\nassistant: Uses the {{tool_names.task}} tool to launch the test-runner agent\n\n\n\nuser: \"Hello\"\n\nSince the user is greeting, use the greeting-responder agent to respond with a friendly joke\n\nassistant: \"I'm going to use the {{tool_names.task}} tool to launch the greeting-responder agent\"\n" + }, + { + "type": "function", + "name": "tools_list", + "parameters": { + "title": "ToolsListInput", + "description": "Meta-tool: Lists all available tool names with brief descriptions.\nUse this to discover which tools are available before calling one.", + "type": "object", + "additionalProperties": false, + "required": [], + "properties": {} + }, + "strict": true, + "description": "Lists all available tool names with brief descriptions. Use this tool to discover which tools are available to call. Returns a JSON object with tool names and brief descriptions." + }, + { + "type": "function", + "name": "tools_info", + "parameters": { + "title": "ToolsInfoInput", + "description": "Meta-tool: Returns the full ToolDefinition (including input schema) for\na specific tool. Use this after `tools_list` to get the complete schema\nfor a tool you want to call.", + "type": "object", + "additionalProperties": false, + "required": [ + "tool_name" + ], + "properties": { + "tool_name": { + "description": "The name of the tool to get information about (e.g., \"read\", \"write\",\n\"shell\")", + "type": "string" + } + } + }, + "strict": true, + "description": "Returns the full ToolDefinition (including input schema, description, and name) for a specific tool. Use this after 'tools_list' to get the complete schema for a tool you want to call. The tool_name should match one of the names returned by 'tools_list'." + }, + { + "type": "function", + "name": "call_tool", + "parameters": { + "title": "CallToolInput", + "description": "Meta-tool: Calls any tool by name with the provided arguments.\nUse this to execute a tool after looking up its schema via `tools_info`.\nThe `arguments` field must match the input schema of the tool being called.", + "type": "object", + "additionalProperties": false, + "required": [ + "arguments", + "tool_name" + ], + "properties": { + "tool_name": { + "description": "The name of the tool to call (e.g., \"read\", \"write\", \"shell\")", + "type": "string" + }, + "arguments": { + "description": "The arguments to pass to the tool, matching its input schema.", + "type": "string" + } + } + }, + "strict": true, + "description": "Calls any tool by name with the provided arguments. Use this to execute a tool after looking up its schema via 'tools_info'. Pass the tool_name (the name of the tool to call) and arguments (a JSON object matching the tool's input schema). The tool will be executed and its output will be returned." } ] diff --git a/templates/forge-custom-agent-template.md b/templates/forge-custom-agent-template.md index 8544b0ba..47f68eaf 100644 --- a/templates/forge-custom-agent-template.md +++ b/templates/forge-custom-agent-template.md @@ -17,7 +17,12 @@ - You can use one tool per message, and will receive the result of that tool use in the user's response. - You use tools step-by-step to accomplish a given task, with each tool use informed by the result of the previous tool use. {{else}} -- For maximum efficiency, whenever you need to perform multiple independent operations, invoke all relevant tools (for eg: `patch`, `read`) simultaneously rather than sequentially. +- For maximum efficiency, whenever you need to perform multiple independent operations, invoke all relevant tools (for eg: `patch`, `read`) simultaneously rather than relying on `call_tool` to chain them. +- You only have three tools available directly: `tools_list`, `tools_info`, and `call_tool`. + - Use `tools_list` to discover which tools are available and their brief descriptions. + - Use `tools_info` to get the full JSON schema of a specific tool before calling it. + - Use `call_tool` to execute any tool by name with the appropriate arguments. +- Always look up a tool's schema via `tools_info` before calling it with `call_tool` to ensure you pass the correct arguments. {{/if}} - NEVER ever refer to tool names when speaking to the USER even when user has asked for it. For example, instead of saying 'I need to use the edit_file tool to edit your file', just say 'I will edit your file'. - If you need to read a file, prefer to read larger sections of the file at once over multiple smaller calls. From 36166759928483d938928416eae0c0b513ba5d13 Mon Sep 17 00:00:00 2001 From: Amit Singh Date: Wed, 20 May 2026 11:15:32 +0530 Subject: [PATCH 03/12] fix: strip schema titles via schemars SchemaSettings transform (#3366) Co-authored-by: ForgeCode --- crates/forge_domain/src/agent.rs | 6 +- crates/forge_domain/src/tools/catalog.rs | 1 + .../src/tools/definition/tool_definition.rs | 134 +++++++++++++++++- ..._catalog__tests__tool_definition_json.snap | 16 --- 4 files changed, 134 insertions(+), 23 deletions(-) diff --git a/crates/forge_domain/src/agent.rs b/crates/forge_domain/src/agent.rs index 200a463b..5d7d8bec 100644 --- a/crates/forge_domain/src/agent.rs +++ b/crates/forge_domain/src/agent.rs @@ -326,10 +326,8 @@ impl From for ToolDefinition { ToolDefinition { name, description, - input_schema: schemars::schema_for!(crate::AgentInput), - output_schema: None, - annotations: None, - title: None, + input_schema: crate::tool_schema_generator() + .into_root_schema_for::(), } } } diff --git a/crates/forge_domain/src/tools/catalog.rs b/crates/forge_domain/src/tools/catalog.rs index 56a7c0b3..8cc97f66 100644 --- a/crates/forge_domain/src/tools/catalog.rs +++ b/crates/forge_domain/src/tools/catalog.rs @@ -935,6 +935,7 @@ impl ToolCatalog { .with(|s| { s.meta_schema = None; s.inline_subschemas = true; + s.transforms.push(Box::new(crate::RemoveSchemaTitles)); }) .into_generator(); diff --git a/crates/forge_domain/src/tools/definition/tool_definition.rs b/crates/forge_domain/src/tools/definition/tool_definition.rs index 0ca9969e..c43f49bc 100644 --- a/crates/forge_domain/src/tools/definition/tool_definition.rs +++ b/crates/forge_domain/src/tools/definition/tool_definition.rs @@ -1,9 +1,43 @@ use derive_setters::Setters; use schemars::Schema; +use schemars::generate::SchemaGenerator; +use schemars::transform::{Transform, transform_subschemas}; use serde::{Deserialize, Serialize}; use crate::ToolName; +/// A schemars [`Transform`] that recursively removes the `title` field from +/// every schema node. +/// +/// Rust type names are emitted as `title` by the `JsonSchema` derive. These +/// are internal implementation details and must not be forwarded to LLM +/// provider APIs. +#[derive(Debug, Clone, Default)] +pub struct RemoveSchemaTitles; + +impl Transform for RemoveSchemaTitles { + fn transform(&mut self, schema: &mut Schema) { + if let Some(map) = schema.as_object_mut() { + map.remove("title"); + } + + transform_subschemas(self, schema); + } +} + +/// Returns a [`SchemaGenerator`] whose settings include [`RemoveSchemaTitles`] +/// as a registered transform. +/// +/// All schemas produced via this generator will never contain `title` fields, +/// eliminating the need for any post-hoc stripping. +pub fn tool_schema_generator() -> SchemaGenerator { + schemars::generate::SchemaSettings::default() + .with(|s| { + s.transforms.push(Box::new(RemoveSchemaTitles)); + }) + .into_generator() +} + /// Optional MCP tool annotations (mirrors the MCP `ToolAnnotations` shape). /// /// All fields are advisory hints from the server about how the tool behaves. @@ -22,15 +56,17 @@ pub struct ToolAnnotations { #[serde(default, skip_serializing_if = "Option::is_none")] pub open_world_hint: Option, } +} /// /// Refer to the specification over here: -/// https://glama.ai/blog/2024-11-25-model-context-protocol-quickstart#server +/// https://glama.ai/blog/2024-11-25-model-context-protocol-quickstart #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Setters)] #[setters(into, strip_option)] pub struct ToolDefinition { pub name: ToolName, pub description: String, + #[setters(skip)] pub input_schema: Schema, /// Optional output schema (MCP `outputSchema`) describing the structure of /// `structured_content` results. Populated for MCP tools that advertise it; @@ -47,19 +83,111 @@ pub struct ToolDefinition { } impl ToolDefinition { - /// Create a new ToolDefinition + /// Create a new ToolDefinition with an empty input schema. pub fn new(name: N) -> Self { ToolDefinition { name: ToolName::new(name), description: String::new(), - input_schema: schemars::schema_for!(()), // Empty input schema + input_schema: tool_schema_generator().into_root_schema_for::<()>(), // Empty input schema output_schema: None, annotations: None, title: None, } } + + /// Sets the input schema. + /// + /// # Arguments + /// * `input_schema` - The JSON schema describing accepted tool input + pub fn input_schema(mut self, input_schema: impl Into) -> Self { + self.input_schema = input_schema.into(); + self + } } pub trait ToolDescription { fn description(&self) -> String; } + +#[cfg(test)] +mod tests { + use pretty_assertions::assert_eq; + use schemars::JsonSchema; + use serde::Deserialize as SerdeDeserialize; + + use super::*; + + /// A struct with a Rust type name that schemars would emit as `title`. + #[derive(SerdeDeserialize, JsonSchema)] + #[allow(dead_code)] + struct InternalPatchInput { + old_string: String, + nested: NestedInput, + } + + #[derive(SerdeDeserialize, JsonSchema)] + #[allow(dead_code)] + struct NestedInput { + value: String, + } + + #[test] + fn test_tool_schema_generator_strips_titles() { + let r#gen = tool_schema_generator(); + let actual = + serde_json::to_value(r#gen.into_root_schema_for::()).unwrap(); + + assert_eq!( + actual.pointer("/title"), + None, + "root title should be absent" + ); + assert_eq!( + actual.pointer("/properties/nested/title"), + None, + "nested title should be absent" + ); + } + + #[test] + fn test_tool_definition_new_has_no_title() { + let fixture = ToolDefinition::new("patch"); + let actual = serde_json::to_value(&fixture.input_schema).unwrap(); + assert_eq!(actual.pointer("/title"), None); + } + + #[test] + fn test_tool_definition_round_trip_preserves_no_title() { + let r#gen = tool_schema_generator(); + let schema = r#gen.into_root_schema_for::(); + let fixture = ToolDefinition::new("patch") + .description("Patch a file") + .input_schema(schema); + + // Serialise then deserialise and confirm no title leaks in + let json_str = serde_json::to_string(&fixture).unwrap(); + let roundtripped: ToolDefinition = serde_json::from_str(&json_str).unwrap(); + let actual = serde_json::to_value(roundtripped.input_schema).unwrap(); + assert_eq!(actual.pointer("/title"), None); + assert_eq!(actual.pointer("/properties/nested/title"), None); + } + + #[test] + fn test_tool_definition_serialization_has_no_title() { + let r#gen = tool_schema_generator(); + let schema = r#gen.into_root_schema_for::(); + let fixture = ToolDefinition { + name: ToolName::new("patch"), + description: "Patch a file".to_string(), + input_schema: schema, + }; + let actual = serde_json::to_value(&fixture).unwrap(); + + // Titles must be absent at every level regardless of the schema structure + assert_eq!(actual.pointer("/input_schema/title"), None); + assert_eq!( + actual.pointer("/input_schema/$defs/NestedInput/title"), + None + ); + } +} diff --git a/crates/forge_domain/src/tools/snapshots/forge_domain__tools__catalog__tests__tool_definition_json.snap b/crates/forge_domain/src/tools/snapshots/forge_domain__tools__catalog__tests__tool_definition_json.snap index e128d60e..a224081b 100644 --- a/crates/forge_domain/src/tools/snapshots/forge_domain__tools__catalog__tests__tool_definition_json.snap +++ b/crates/forge_domain/src/tools/snapshots/forge_domain__tools__catalog__tests__tool_definition_json.snap @@ -3,7 +3,6 @@ source: crates/forge_domain/src/tools/catalog.rs expression: tools --- { - "title": "FSRead", "type": "object", "properties": { "file_path": { @@ -42,7 +41,6 @@ expression: tools ] } { - "title": "FSWrite", "type": "object", "properties": { "file_path": { @@ -64,7 +62,6 @@ expression: tools ] } { - "title": "FSSearch", "type": "object", "properties": { "pattern": { @@ -153,7 +150,6 @@ expression: tools ] } { - "title": "SemanticSearch", "type": "object", "properties": { "queries": { @@ -184,7 +180,6 @@ expression: tools ] } { - "title": "FSRemove", "type": "object", "properties": { "path": { @@ -197,7 +192,6 @@ expression: tools ] } { - "title": "FSPatch", "type": "object", "properties": { "file_path": { @@ -225,7 +219,6 @@ expression: tools ] } { - "title": "FSMultiPatch", "type": "object", "properties": { "file_path": { @@ -266,7 +259,6 @@ expression: tools ] } { - "title": "FSUndo", "type": "object", "properties": { "path": { @@ -279,7 +271,6 @@ expression: tools ] } { - "title": "Shell", "type": "object", "properties": { "command": { @@ -314,7 +305,6 @@ expression: tools ] } { - "title": "NetFetch", "description": "Input type for the net fetch tool", "type": "object", "properties": { @@ -333,7 +323,6 @@ expression: tools ] } { - "title": "Followup", "type": "object", "properties": { "question": { @@ -376,7 +365,6 @@ expression: tools ] } { - "title": "PlanCreate", "type": "object", "properties": { "plan_name": { @@ -399,7 +387,6 @@ expression: tools ] } { - "title": "SkillFetch", "type": "object", "properties": { "name": { @@ -412,7 +399,6 @@ expression: tools ] } { - "title": "TodoWrite", "type": "object", "properties": { "todos": { @@ -449,11 +435,9 @@ expression: tools ] } { - "title": "TodoRead", "type": "object" } { - "title": "TaskInput", "description": "Input structure for the Task tool - delegates work to specialized agents", "type": "object", "properties": { From 8c9200de046139c36307e8e5d6dd8c50b89d80e9 Mon Sep 17 00:00:00 2001 From: Tushar Mathur Date: Sat, 16 May 2026 08:47:01 +0530 Subject: [PATCH 04/12] fix(kv_storage): handle cache clearing with regular files (#3343) --- crates/forge_infra/src/kv_storage.rs | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/crates/forge_infra/src/kv_storage.rs b/crates/forge_infra/src/kv_storage.rs index 4890253c..75fa54ee 100644 --- a/crates/forge_infra/src/kv_storage.rs +++ b/crates/forge_infra/src/kv_storage.rs @@ -132,9 +132,18 @@ impl forge_app::KVStore for CacacheStorage { } async fn cache_clear(&self) -> Result<()> { - cacache::clear(&self.cache_dir) + if !self.cache_dir.exists() { + return Ok(()); + } + // Use remove_dir_all + create_dir_all instead of cacache::clear because + // cacache::clear calls remove_dir_all on every directory entry, which + // fails with ENOTDIR when regular files (e.g. .mcp.json) are present. + tokio::fs::remove_dir_all(&self.cache_dir) + .await + .with_context(|| format!("Failed to clear cache at {}", self.cache_dir.display()))?; + tokio::fs::create_dir_all(&self.cache_dir) .await - .context("Failed to clear cache")?; + .with_context(|| format!("Failed to recreate cache at {}", self.cache_dir.display()))?; Ok(()) } } From c2d6dae6aa732929af3b78c1d20325071b67a51a Mon Sep 17 00:00:00 2001 From: laststylebender <43403528+laststylebender14@users.noreply.github.com> Date: Thu, 21 May 2026 11:49:55 +0530 Subject: [PATCH 05/12] feat(mcp-trust): launch mcp trust prompt on startup (#3265) Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com> Co-authored-by: Tushar Mathur Co-authored-by: Amit Singh --- crates/forge_api/src/api.rs | 5 + crates/forge_api/src/forge_api.rs | 4 + crates/forge_app/src/services.rs | 17 ++++ crates/forge_domain/src/env.rs | 5 + crates/forge_domain/src/mcp.rs | 73 ++++++++++++-- crates/forge_main/src/ui.rs | 3 + crates/forge_services/src/mcp/manager.rs | 123 ++++++++++++++++++++++- crates/forge_services/src/mcp/service.rs | 53 +++++++--- 8 files changed, 258 insertions(+), 25 deletions(-) diff --git a/crates/forge_api/src/api.rs b/crates/forge_api/src/api.rs index 2112576b..e58f26dc 100644 --- a/crates/forge_api/src/api.rs +++ b/crates/forge_api/src/api.rs @@ -264,6 +264,11 @@ pub trait API: Sync + Send { /// Refresh MCP caches by fetching fresh data async fn reload_mcp(&self) -> Result<()>; + /// Applies the interactive trust gate for any project-local MCP config. + /// Servers are NOT connected here — connections remain lazy and happen on + /// first tool use. Must be called once at startup. + async fn init_mcp(&self) -> Result<()>; + /// List of commands defined in .md file(s) async fn get_commands(&self) -> Result>; diff --git a/crates/forge_api/src/forge_api.rs b/crates/forge_api/src/forge_api.rs index 9e8d7173..f00d7e42 100644 --- a/crates/forge_api/src/forge_api.rs +++ b/crates/forge_api/src/forge_api.rs @@ -438,6 +438,10 @@ impl< async fn reload_mcp(&self) -> Result<()> { self.services.mcp_service().reload_mcp().await } + + async fn init_mcp(&self) -> Result<()> { + self.services.mcp_service().init_mcp().await + } async fn get_commands(&self) -> Result> { self.services.get_commands().await } diff --git a/crates/forge_app/src/services.rs b/crates/forge_app/src/services.rs index 6db29c4e..e0c2e399 100644 --- a/crates/forge_app/src/services.rs +++ b/crates/forge_app/src/services.rs @@ -224,6 +224,11 @@ pub trait McpConfigManager: Send + Sync { /// Responsible for writing the McpConfig on disk. async fn write_mcp_config(&self, config: &McpConfig, scope: &Scope) -> anyhow::Result<()>; + + /// Returns the trusted subset of MCP servers, prompting interactively for + /// any project-local config file not yet approved. Must be called once at + /// the startup boundary, never on pure config-read paths. + async fn filter_trusted(&self, raw: McpConfig) -> anyhow::Result; } #[async_trait::async_trait] @@ -232,6 +237,10 @@ pub trait McpService: Send + Sync { async fn execute_mcp(&self, call: ToolCallFull) -> anyhow::Result; /// Refresh the MCP cache by fetching fresh data async fn reload_mcp(&self) -> anyhow::Result<()>; + /// Applies the interactive trust gate for any project-local MCP config. + /// Servers are NOT connected here — connections remain lazy and happen on + /// first tool use. Must be called once at startup. + async fn init_mcp(&self) -> anyhow::Result<()>; } #[async_trait::async_trait] @@ -711,6 +720,10 @@ impl McpConfigManager for I { .write_mcp_config(config, scope) .await } + + async fn filter_trusted(&self, raw: McpConfig) -> anyhow::Result { + self.mcp_config_manager().filter_trusted(raw).await + } } #[async_trait::async_trait] @@ -726,6 +739,10 @@ impl McpService for I { async fn reload_mcp(&self) -> anyhow::Result<()> { self.mcp_service().reload_mcp().await } + + async fn init_mcp(&self) -> anyhow::Result<()> { + self.mcp_service().init_mcp().await + } } #[async_trait::async_trait] diff --git a/crates/forge_domain/src/env.rs b/crates/forge_domain/src/env.rs index e571561f..555dcafa 100644 --- a/crates/forge_domain/src/env.rs +++ b/crates/forge_domain/src/env.rs @@ -84,6 +84,11 @@ impl Environment { self.base_path.join(".mcp.json") } + /// Returns the path where the MCP trust store is persisted across restarts. + pub fn mcp_trust_path(&self) -> PathBuf { + self.base_path.join(".mcp_trust.json") + } + pub fn agent_path(&self) -> PathBuf { self.base_path.join("agents") } diff --git a/crates/forge_domain/src/mcp.rs b/crates/forge_domain/src/mcp.rs index 53afe537..a065579f 100644 --- a/crates/forge_domain/src/mcp.rs +++ b/crates/forge_domain/src/mcp.rs @@ -8,6 +8,7 @@ use derive_more::{Deref, Display, From}; use derive_setters::Setters; use merge::Merge; use serde::{Deserialize, Serialize}; +use strum_macros::{Display as StrumDisplay, EnumIter, EnumString}; #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)] pub enum Scope { @@ -288,21 +289,81 @@ impl From> for McpConfig { } impl McpConfig { - /// Compute a deterministic u64 identifier for this config + /// Compute a deterministic u64 identifier for this config. /// - /// Uses Rust's built-in `Hash` trait (derived) to compute a stable hash - /// and converts it to a hex u64 for use as a cache key. - /// BTreeMap ensures consistent ordering regardless of insertion order. + /// Uses FNV-64 (a non-cryptographic but stable, seed-free hasher) so the + /// same config always produces the same key across process restarts. + /// This is required for persisted trust-store lookups: `DefaultHasher` + /// uses a random seed per-process and would produce a different value on + /// every restart, causing "Trust and remember" to be ignored. + /// `BTreeMap` ensures consistent field ordering regardless of insertion + /// order. pub fn cache_key(&self) -> u64 { - use std::collections::hash_map::DefaultHasher; use std::hash::{Hash, Hasher}; - let mut hasher = DefaultHasher::new(); + let mut hasher = fnv_rs::Fnv64::default(); Hash::hash(self, &mut hasher); hasher.finish() } } +/// The two choices presented to the user when an untrusted project-local +/// `.mcp.json` is detected at startup. +#[derive(Debug, Clone, PartialEq, Eq, StrumDisplay, EnumIter, EnumString)] +pub enum McpTrustResponse { + /// Allow the servers and remember this decision across future sessions. + /// The config hash is persisted so the prompt is skipped on next startup + /// as long as the file has not changed. + #[strum(to_string = "Accept")] + Accept, + /// Reject all servers from this config file. + #[strum(to_string = "Reject")] + Reject, +} + +/// Persists accepted and rejected MCP config hashes across restarts. A path +/// maps to its content hash so that any modification to the file revokes the +/// stored decision and triggers a new prompt. +#[derive(Default, Debug, Clone, Serialize, Deserialize)] +pub struct McpTrustStore { + #[serde(default)] + trusted: std::collections::HashMap, + #[serde(default)] + rejected: std::collections::HashMap, +} + +impl McpTrustStore { + /// Returns true if the given path+hash pair has been previously accepted. + pub fn is_trusted(&self, path: &std::path::Path, content_hash: u64) -> bool { + self.trusted + .get(&path.to_string_lossy().into_owned()) + .is_some_and(|&stored| stored == content_hash) + } + + /// Returns true if the given path+hash pair has been previously rejected. + pub fn is_rejected(&self, path: &std::path::Path, content_hash: u64) -> bool { + self.rejected + .get(&path.to_string_lossy().into_owned()) + .is_some_and(|&stored| stored == content_hash) + } + + /// Records an accepted trust decision for the given path and content hash. + /// Clears any prior rejection for the same path. + pub fn remember(&mut self, path: std::path::PathBuf, content_hash: u64) { + let key = path.to_string_lossy().into_owned(); + self.rejected.remove(&key); + self.trusted.insert(key, content_hash); + } + + /// Records a rejected trust decision for the given path and content hash. + /// Clears any prior acceptance for the same path. + pub fn reject(&mut self, path: std::path::PathBuf, content_hash: u64) { + let key = path.to_string_lossy().into_owned(); + self.trusted.remove(&key); + self.rejected.insert(key, content_hash); + } +} + #[cfg(test)] mod tests { use super::*; diff --git a/crates/forge_main/src/ui.rs b/crates/forge_main/src/ui.rs index 41747c93..3c4f15b4 100644 --- a/crates/forge_main/src/ui.rs +++ b/crates/forge_main/src/ui.rs @@ -4029,6 +4029,9 @@ impl A + Send + Sync> UI .await?; // only call on_update if this is the first initialization on_update(self.api.clone(), self.config.updates.as_ref()).await; + // Apply the MCP trust gate. Servers are NOT connected here — + // connections remain lazy and happen on first tool use. + self.api.init_mcp().await?; } // Execute independent operations in parallel to improve performance diff --git a/crates/forge_services/src/mcp/manager.rs b/crates/forge_services/src/mcp/manager.rs index a56a5197..13507dfd 100644 --- a/crates/forge_services/src/mcp/manager.rs +++ b/crates/forge_services/src/mcp/manager.rs @@ -4,10 +4,10 @@ use std::sync::Arc; use anyhow::Context; use bytes::Bytes; -use forge_app::domain::{McpConfig, McpServerConfig, Scope, ServerName}; +use forge_app::domain::{McpConfig, McpServerConfig, McpTrustResponse, McpTrustStore, Scope, ServerName}; use forge_app::{ EnvironmentInfra, FileInfoInfra, FileReaderInfra, FileWriterInfra, KVStore, McpConfigManager, - McpServerInfra, + McpServerInfra, UserInfra, }; use merge::Merge; @@ -45,6 +45,7 @@ impl ForgeMcpManager where I: McpServerInfra + FileReaderInfra + FileInfoInfra + EnvironmentInfra + KVStore, { + /// Creates a new [`ForgeMcpManager`] wrapping the provided infrastructure. pub fn new(infra: Arc) -> Self { Self { infra } } @@ -63,6 +64,89 @@ where } } +impl ForgeMcpManager +where + I: McpServerInfra + + FileReaderInfra + + FileInfoInfra + + EnvironmentInfra + + FileWriterInfra + + KVStore + + UserInfra, +{ + /// Reads the persisted trust store from disk, returning an empty store if + /// the file is absent or unreadable. + async fn read_trust_store(&self) -> anyhow::Result { + let path = self.infra.get_environment().mcp_trust_path(); + if !self.infra.is_file(&path).await.unwrap_or(false) { + return Ok(McpTrustStore::default()); + } + let content = self.infra.read_utf8(&path).await?; + Ok(serde_json::from_str(&content).unwrap_or_default()) + } + + /// Writes the trust store to disk at the environment's `mcp_trust_path`. + async fn write_trust_store(&self, store: &McpTrustStore) -> anyhow::Result<()> { + let path = self.infra.get_environment().mcp_trust_path(); + let content = serde_json::to_string_pretty(store)?; + self.infra.write(&path, Bytes::from(content)).await + } + + /// Applies the interactive trust gate for a project-local MCP config. + /// + /// Checks the persisted trust store first: if the config hash is already + /// recorded as accepted or rejected, the prompt is skipped entirely. + /// Otherwise the user is asked to Accept (persists the hash as trusted) or + /// Reject (persists the hash as rejected and returns an empty config). + async fn apply_trust_gate( + &self, + local: McpConfig, + local_path: &Path, + ) -> anyhow::Result { + if local.mcp_servers.is_empty() { + return Ok(local); + } + + let hash = local.cache_key(); + let mut store = self.read_trust_store().await?; + + // Skip the prompt if this exact config was previously accepted. + if store.is_trusted(local_path, hash) { + return Ok(local); + } + + // Skip the prompt if this exact config was previously rejected. + if store.is_rejected(local_path, hash) { + return Ok(McpConfig::default()); + } + + let prompt = format_trust_prompt(local_path); + match self + .infra + .select_one_enum::(&prompt) + .await? + { + Some(McpTrustResponse::Accept) => { + store.remember(local_path.to_path_buf(), hash); + self.write_trust_store(&store).await?; + Ok(local) + } + Some(McpTrustResponse::Reject) | None => { + store.reject(local_path.to_path_buf(), hash); + self.write_trust_store(&store).await?; + Ok(McpConfig::default()) + } + } + } +} + +/// Builds the interactive prompt string shown to the user when an untrusted +/// project-local `.mcp.json` is found. Lists the file path and every server +/// name together with its command or URL. +fn format_trust_prompt(path: &Path) -> String { + format!("Untrusted MCP config was found at {}", path.display()) +} + #[async_trait::async_trait] impl McpConfigManager for ForgeMcpManager where @@ -71,7 +155,8 @@ where + FileInfoInfra + EnvironmentInfra + FileWriterInfra - + KVStore, + + KVStore + + UserInfra, { async fn read_mcp_config(&self, scope: Option<&Scope>) -> anyhow::Result { match scope { @@ -122,6 +207,38 @@ where Ok(()) } + + async fn filter_trusted(&self, raw: McpConfig) -> anyhow::Result { + let env = self.infra.get_environment(); + + // User-scope config is always implicitly trusted. + let user_path = env.mcp_user_config(); + let user_config = if self.infra.is_file(&user_path).await.unwrap_or(false) { + self.read_config(&user_path).await? + } else { + McpConfig::default() + }; + + // Local-scope config must pass the trust gate. + let local_path = env.mcp_local_config(); + let local_config = if self.infra.is_file(&local_path).await.unwrap_or(false) { + let local_raw = self.read_config(&local_path).await?; + self.apply_trust_gate(local_raw, &local_path).await? + } else { + McpConfig::default() + }; + + // Merge: user first, then local (local takes precedence as in read_mcp_config). + let mut merged = user_config; + merged.merge(local_config); + + // Retain only servers that exist in the merged trusted set. + let trusted_keys: std::collections::BTreeSet<_> = + merged.mcp_servers.keys().cloned().collect(); + let mut result = raw; + result.mcp_servers.retain(|k, _| trusted_keys.contains(k)); + Ok(result) + } } #[cfg(test)] diff --git a/crates/forge_services/src/mcp/service.rs b/crates/forge_services/src/mcp/service.rs index a96692de..4b848e2b 100644 --- a/crates/forge_services/src/mcp/service.rs +++ b/crates/forge_services/src/mcp/service.rs @@ -100,12 +100,12 @@ where Ok(()) } - async fn init_mcp(&self) -> anyhow::Result<()> { - let mcp = self.manager.read_mcp_config(None).await?; + async fn ensure_mcp_initialized(&self) -> anyhow::Result<()> { + let raw_mcp = self.manager.read_mcp_config(None).await?; // Fast path: if config is unchanged, skip reinitialization without acquiring // the lock - if !self.is_config_modified(&mcp).await { + if !self.is_config_modified(&raw_mcp).await { return Ok(()); } @@ -114,17 +114,22 @@ where let _guard = self.init_lock.lock().await; // Double-check under the lock: a concurrent caller may have already updated - if !self.is_config_modified(&mcp).await { + if !self.is_config_modified(&raw_mcp).await { return Ok(()); } - self.update_mcp(mcp).await + // Apply the trust gate. The prompt was already shown at startup via + // init_mcp, so on first tool use the user's decision is re-applied by + // calling filter_trusted again. + let trusted_mcp = self.manager.filter_trusted(raw_mcp).await?; + + self.update_mcp(trusted_mcp).await } async fn update_mcp(&self, mcp: McpConfig) -> Result<(), anyhow::Error> { - // Compute the hash early before mcp is consumed, but write it only after - // all connections are established so waiters on init_lock see a consistent - // state. + // Use the raw config hash (pre-trust-gate) so that is_config_modified always + // compares against the original file hash, preventing infinite re-prompt loops + // when some servers are rejected. let new_hash = mcp.cache_key(); self.clear_tools().await; @@ -171,7 +176,7 @@ where } async fn list(&self) -> anyhow::Result { - self.init_mcp().await?; + self.ensure_mcp_initialized().await?; let tools = self.tools.read().await; let mut grouped_tools = std::collections::HashMap::new(); @@ -193,7 +198,7 @@ where async fn call(&self, call: ToolCallFull) -> anyhow::Result { // Ensure MCP connections are initialized before calling tools - self.init_mcp().await?; + self.ensure_mcp_initialized().await?; let tools = self.tools.read().await; @@ -233,14 +238,15 @@ where C: From<::Client>, { async fn get_mcp_servers(&self) -> anyhow::Result { - // Read current configs to compute merged hash - let mcp_config = self.manager.read_mcp_config(None).await?; - - // Compute unified hash from merged config - let config_hash = mcp_config.cache_key(); + // Apply the trust gate before computing the cache key so that rejected + // servers are excluded. Using the raw config hash would allow a stale KV + // cache entry (populated before a rejection) to be returned, bypassing + // filter_trusted entirely and leaking rejected tools into requests. + let raw_mcp = self.manager.read_mcp_config(None).await?; + let trusted_mcp = self.manager.filter_trusted(raw_mcp).await?; + let config_hash = trusted_mcp.cache_key(); // Check if cache is valid (exists and not expired) - // Cache is valid, retrieve it if let Some(cache) = self.infra.cache_get::<_, McpServers>(&config_hash).await? { return Ok(cache.clone()); } @@ -257,6 +263,16 @@ where async fn reload_mcp(&self) -> anyhow::Result<()> { self.refresh_cache().await } + + async fn init_mcp(&self) -> anyhow::Result<()> { + // Run the trust gate prompt at startup so the user's decision is captured + // before any tool use. The result is intentionally discarded — servers are + // NOT connected here. Connections remain lazy and happen on first tool use + // via ensure_mcp_initialized. + let raw_mcp = self.manager.read_mcp_config(None).await?; + let _ = self.manager.filter_trusted(raw_mcp).await?; + Ok(()) + } } #[cfg(test)] @@ -320,6 +336,11 @@ mod tests { ) -> anyhow::Result<()> { Ok(()) } + + async fn filter_trusted(&self, raw: McpConfig) -> anyhow::Result { + // In tests all configs are implicitly trusted. + Ok(raw) + } } // ── Mock infrastructure ────────────────────────────────────────────────── From 2c1b147fa13c7e4a218f1adef438a704c2068b27 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20Paulo=20Hildebrandt?= <36676268+joaohildebrandt@users.noreply.github.com> Date: Wed, 20 May 2026 04:28:09 +0100 Subject: [PATCH 06/12] fix(openai): replay reasoning_content for Xiaomi MiMo tool calls (#3350) Co-authored-by: Amit Singh --- .../src/dto/openai/transformers/pipeline.rs | 83 ++++++++++++++++++- 1 file changed, 81 insertions(+), 2 deletions(-) diff --git a/crates/forge_app/src/dto/openai/transformers/pipeline.rs b/crates/forge_app/src/dto/openai/transformers/pipeline.rs index ccffc56d..a12e853f 100644 --- a/crates/forge_app/src/dto/openai/transformers/pipeline.rs +++ b/crates/forge_app/src/dto/openai/transformers/pipeline.rs @@ -73,10 +73,12 @@ impl Transformer for ProviderPipeline<'_> { provider.id == ProviderId::FIREWORKS_AI || is_deepseek_compatible(provider, request) || when_model("kimi")(request) + || is_xiaomi_mimo_provider(provider) }); - let default_reasoning_content = DefaultReasoningContent - .when(move |request: &Request| is_deepseek_compatible(provider, request)); + let default_reasoning_content = DefaultReasoningContent.when(move |request: &Request| { + is_deepseek_compatible(provider, request) || is_xiaomi_mimo_provider(provider) + }); let cerebras_compat = MakeCerebrasCompat.when(move |_| provider.id == ProviderId::CEREBRAS); @@ -145,6 +147,12 @@ fn is_deepseek_compatible(provider: &Provider, request: &Request) -> bool { false } +/// Checks if provider is Xiaomi MiMo, which requires reasoning to be replayed +/// as a flat reasoning_content field in follow-up requests. +fn is_xiaomi_mimo_provider(provider: &Provider) -> bool { + provider.id == ProviderId::XIAOMI_MIMO +} + /// Checks if the request model is a gemini-3 model (which supports thought /// signatures) fn is_gemini3_model(req: &Request) -> bool { @@ -355,6 +363,22 @@ mod tests { } } + fn xiaomi_mimo(key: &str) -> Provider { + Provider { + id: ProviderId::XIAOMI_MIMO, + provider_type: Default::default(), + response: Some(ProviderResponse::OpenAI), + url: Url::parse("https://token-plan-sgp.xiaomimimo.com/v1/chat/completions").unwrap(), + auth_methods: vec![forge_domain::AuthMethod::ApiKey], + url_params: vec![], + credential: make_credential(ProviderId::XIAOMI_MIMO, key), + custom_headers: None, + models: Some(ModelSource::Url( + Url::parse("https://token-plan-sgp.xiaomimimo.com/v1/models").unwrap(), + )), + } + } + fn opencode_go(key: &str) -> Provider { Provider { id: ProviderId::OPENCODE_GO, @@ -1093,6 +1117,61 @@ mod tests { assert!(message.reasoning_details.is_some()); } + #[test] + fn test_xiaomi_mimo_provider_converts_reasoning_details_to_reasoning_content() { + let provider = xiaomi_mimo("xiaomi-mimo"); + let fixture = Request::default().messages(vec![crate::dto::openai::Message { + role: crate::dto::openai::Role::Assistant, + content: Some(crate::dto::openai::MessageContent::Text("test".to_string())), + name: None, + tool_call_id: None, + tool_calls: None, + reasoning_details: Some(vec![crate::dto::openai::ReasoningDetail { + r#type: "reasoning.text".to_string(), + text: Some("thinking...".to_string()), + signature: None, + data: None, + id: None, + format: None, + index: None, + }]), + reasoning_text: None, + reasoning_opaque: None, + reasoning_content: None, + extra_content: None, + }]); + + let mut pipeline = ProviderPipeline::new(&provider, false); + let actual = pipeline.transform(fixture); + + let message = actual.messages.unwrap().into_iter().next().unwrap(); + assert_eq!(message.reasoning_content, Some("thinking...".to_string())); + assert!(message.reasoning_details.is_none()); + } + + #[test] + fn test_xiaomi_mimo_provider_falls_back_to_empty_reasoning_content_when_none() { + let provider = xiaomi_mimo("xiaomi-mimo"); + let fixture = Request::default().messages(vec![crate::dto::openai::Message { + role: crate::dto::openai::Role::Assistant, + content: Some(crate::dto::openai::MessageContent::Text("test".to_string())), + name: None, + tool_call_id: None, + tool_calls: None, + reasoning_details: None, + reasoning_text: None, + reasoning_opaque: None, + reasoning_content: None, + extra_content: None, + }]); + + let mut pipeline = ProviderPipeline::new(&provider, false); + let actual = pipeline.transform(fixture); + + let message = actual.messages.unwrap().into_iter().next().unwrap(); + assert_eq!(message.reasoning_content, Some(String::new())); + } + #[test] fn test_openai_provider_does_not_enforce_strict_tool_schema() { let provider = openai("openai"); From 613741854d8ae4e49ac1a646a898cae212af5d80 Mon Sep 17 00:00:00 2001 From: Rach Pradhan <54503978+justrach@users.noreply.github.com> Date: Mon, 25 May 2026 17:12:51 +0800 Subject: [PATCH 07/12] fix: add missing ToolDefinition fields (annotations, output_schema, title) in agent and tool_definition from merge resolution Co-Authored-By: blackfloofie-a codegraff agent <265516171+blackfloofie@users.noreply.github.com> --- crates/forge_domain/src/agent.rs | 3 +++ crates/forge_domain/src/tools/definition/tool_definition.rs | 1 - 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/crates/forge_domain/src/agent.rs b/crates/forge_domain/src/agent.rs index 5d7d8bec..8cc5cbd2 100644 --- a/crates/forge_domain/src/agent.rs +++ b/crates/forge_domain/src/agent.rs @@ -328,6 +328,9 @@ impl From for ToolDefinition { description, input_schema: crate::tool_schema_generator() .into_root_schema_for::(), + output_schema: None, + annotations: None, + title: None, } } } diff --git a/crates/forge_domain/src/tools/definition/tool_definition.rs b/crates/forge_domain/src/tools/definition/tool_definition.rs index c43f49bc..34a3dbad 100644 --- a/crates/forge_domain/src/tools/definition/tool_definition.rs +++ b/crates/forge_domain/src/tools/definition/tool_definition.rs @@ -56,7 +56,6 @@ pub struct ToolAnnotations { #[serde(default, skip_serializing_if = "Option::is_none")] pub open_world_hint: Option, } -} /// /// Refer to the specification over here: From a0b389d7c8c7ffffbffa3b38ecbfd58eccc2f0a4 Mon Sep 17 00:00:00 2001 From: Rach Pradhan <54503978+justrach@users.noreply.github.com> Date: Mon, 25 May 2026 17:13:45 +0800 Subject: [PATCH 08/12] fix(test): add missing ToolDefinition fields in test fixture Co-Authored-By: blackfloofie-a codegraff agent <265516171+blackfloofie@users.noreply.github.com> --- crates/forge_domain/src/tools/definition/tool_definition.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/crates/forge_domain/src/tools/definition/tool_definition.rs b/crates/forge_domain/src/tools/definition/tool_definition.rs index 34a3dbad..2e259a6b 100644 --- a/crates/forge_domain/src/tools/definition/tool_definition.rs +++ b/crates/forge_domain/src/tools/definition/tool_definition.rs @@ -179,6 +179,9 @@ mod tests { name: ToolName::new("patch"), description: "Patch a file".to_string(), input_schema: schema, + output_schema: None, + annotations: None, + title: None, }; let actual = serde_json::to_value(&fixture).unwrap(); From 402749c198283e1adb53b8f41795c01b9d9f0168 Mon Sep 17 00:00:00 2001 From: Rach Pradhan <54503978+justrach@users.noreply.github.com> Date: Tue, 26 May 2026 01:35:35 +0800 Subject: [PATCH 09/12] perf(meta-tools): optimize system prompt with inline tool schemas MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Evolved the meta-tool system prompt through a darwinian tournament (7 variants × 5 tasks × 2 runs each = 70 runs on deepseek-v4-pro). The winning variant (v6_blend_tight) provides compact inline schemas for the 5 core tools (read, shell, fs_search, write, patch) so the model skips unnecessary tools_info lookups. Key results vs full tool definitions baseline: - 48% fewer total tokens (61K avg vs 118K) - 0.2 avg errors vs 0.0 (negligible) - 23s avg wall time vs 31s (26% faster) - Won every task category (trivial through multi-step) The previous meta-tool prompt (v1) was actually 8% worse than sending full tool definitions due to excessive tools_info round trips. The new prompt eliminates those by giving the model the schemas it needs upfront in a dense format. Co-Authored-By: Claude Opus 4.7 (1M context) --- ...orch_spec__orch_system_spec__system_prompt.snap | 14 ++++++++------ ..._system_spec__system_prompt_tool_supported.snap | 14 ++++++++------ ...system_spec__system_prompt_with_extensions.snap | 14 ++++++++------ ...c__system_prompt_with_extensions_truncated.snap | 14 ++++++++------ templates/forge-custom-agent-template.md | 14 ++++++++------ 5 files changed, 40 insertions(+), 30 deletions(-) diff --git a/crates/forge_app/src/orch_spec/snapshots/forge_app__orch_spec__orch_system_spec__system_prompt.snap b/crates/forge_app/src/orch_spec/snapshots/forge_app__orch_spec__orch_system_spec__system_prompt.snap index 1a44d47b..9ddebe0f 100644 --- a/crates/forge_app/src/orch_spec/snapshots/forge_app__orch_spec__orch_system_spec__system_prompt.snap +++ b/crates/forge_app/src/orch_spec/snapshots/forge_app__orch_spec__orch_system_spec__system_prompt.snap @@ -13,12 +13,14 @@ You are Forge -- For maximum efficiency, whenever you need to perform multiple independent operations, invoke all relevant tools (for eg: `patch`, `read`) simultaneously rather than relying on `call_tool` to chain them. -- You only have three tools available directly: `tools_list`, `tools_info`, and `call_tool`. - - Use `tools_list` to discover which tools are available and their brief descriptions. - - Use `tools_info` to get the full JSON schema of a specific tool before calling it. - - Use `call_tool` to execute any tool by name with the appropriate arguments. -- Always look up a tool's schema via `tools_info` before calling it with `call_tool` to ensure you pass the correct arguments. +- Tools are called via `call_tool(tool_name, arguments)`. Common tools: + - `read`: `{"path": "..."}` optional `start_line`, `end_line` + - `shell`: `{"command": "..."}` + - `fs_search`: `{"pattern": "..."}` optional `path` + - `write`: `{"path": "...", "content": "..."}` + - `patch`: `{"path": "...", "diff": "..."}` +- These schemas are complete — skip `tools_info` for them. Use `tools_info` only for unfamiliar tools. +- Batch independent `call_tool` calls in parallel. Use `tools_list` only to discover unknown tools. - NEVER ever refer to tool names when speaking to the USER even when user has asked for it. For example, instead of saying 'I need to use the edit_file tool to edit your file', just say 'I will edit your file'. - If you need to read a file, prefer to read larger sections of the file at once over multiple smaller calls. diff --git a/crates/forge_app/src/orch_spec/snapshots/forge_app__orch_spec__orch_system_spec__system_prompt_tool_supported.snap b/crates/forge_app/src/orch_spec/snapshots/forge_app__orch_spec__orch_system_spec__system_prompt_tool_supported.snap index e1398178..22b42acd 100644 --- a/crates/forge_app/src/orch_spec/snapshots/forge_app__orch_spec__orch_system_spec__system_prompt_tool_supported.snap +++ b/crates/forge_app/src/orch_spec/snapshots/forge_app__orch_spec__orch_system_spec__system_prompt_tool_supported.snap @@ -17,12 +17,14 @@ You are Forge -- For maximum efficiency, whenever you need to perform multiple independent operations, invoke all relevant tools (for eg: `patch`, `read`) simultaneously rather than relying on `call_tool` to chain them. -- You only have three tools available directly: `tools_list`, `tools_info`, and `call_tool`. - - Use `tools_list` to discover which tools are available and their brief descriptions. - - Use `tools_info` to get the full JSON schema of a specific tool before calling it. - - Use `call_tool` to execute any tool by name with the appropriate arguments. -- Always look up a tool's schema via `tools_info` before calling it with `call_tool` to ensure you pass the correct arguments. +- Tools are called via `call_tool(tool_name, arguments)`. Common tools: + - `read`: `{"path": "..."}` optional `start_line`, `end_line` + - `shell`: `{"command": "..."}` + - `fs_search`: `{"pattern": "..."}` optional `path` + - `write`: `{"path": "...", "content": "..."}` + - `patch`: `{"path": "...", "diff": "..."}` +- These schemas are complete — skip `tools_info` for them. Use `tools_info` only for unfamiliar tools. +- Batch independent `call_tool` calls in parallel. Use `tools_list` only to discover unknown tools. - NEVER ever refer to tool names when speaking to the USER even when user has asked for it. For example, instead of saying 'I need to use the edit_file tool to edit your file', just say 'I will edit your file'. - If you need to read a file, prefer to read larger sections of the file at once over multiple smaller calls. diff --git a/crates/forge_app/src/orch_spec/snapshots/forge_app__orch_spec__orch_system_spec__system_prompt_with_extensions.snap b/crates/forge_app/src/orch_spec/snapshots/forge_app__orch_spec__orch_system_spec__system_prompt_with_extensions.snap index 137d141c..cd17a816 100644 --- a/crates/forge_app/src/orch_spec/snapshots/forge_app__orch_spec__orch_system_spec__system_prompt_with_extensions.snap +++ b/crates/forge_app/src/orch_spec/snapshots/forge_app__orch_spec__orch_system_spec__system_prompt_with_extensions.snap @@ -19,12 +19,14 @@ You are Forge -- For maximum efficiency, whenever you need to perform multiple independent operations, invoke all relevant tools (for eg: `patch`, `read`) simultaneously rather than relying on `call_tool` to chain them. -- You only have three tools available directly: `tools_list`, `tools_info`, and `call_tool`. - - Use `tools_list` to discover which tools are available and their brief descriptions. - - Use `tools_info` to get the full JSON schema of a specific tool before calling it. - - Use `call_tool` to execute any tool by name with the appropriate arguments. -- Always look up a tool's schema via `tools_info` before calling it with `call_tool` to ensure you pass the correct arguments. +- Tools are called via `call_tool(tool_name, arguments)`. Common tools: + - `read`: `{"path": "..."}` optional `start_line`, `end_line` + - `shell`: `{"command": "..."}` + - `fs_search`: `{"pattern": "..."}` optional `path` + - `write`: `{"path": "...", "content": "..."}` + - `patch`: `{"path": "...", "diff": "..."}` +- These schemas are complete — skip `tools_info` for them. Use `tools_info` only for unfamiliar tools. +- Batch independent `call_tool` calls in parallel. Use `tools_list` only to discover unknown tools. - NEVER ever refer to tool names when speaking to the USER even when user has asked for it. For example, instead of saying 'I need to use the edit_file tool to edit your file', just say 'I will edit your file'. - If you need to read a file, prefer to read larger sections of the file at once over multiple smaller calls. diff --git a/crates/forge_app/src/orch_spec/snapshots/forge_app__orch_spec__orch_system_spec__system_prompt_with_extensions_truncated.snap b/crates/forge_app/src/orch_spec/snapshots/forge_app__orch_spec__orch_system_spec__system_prompt_with_extensions_truncated.snap index 21f51101..09894153 100644 --- a/crates/forge_app/src/orch_spec/snapshots/forge_app__orch_spec__orch_system_spec__system_prompt_with_extensions_truncated.snap +++ b/crates/forge_app/src/orch_spec/snapshots/forge_app__orch_spec__orch_system_spec__system_prompt_with_extensions_truncated.snap @@ -31,12 +31,14 @@ You are Forge -- For maximum efficiency, whenever you need to perform multiple independent operations, invoke all relevant tools (for eg: `patch`, `read`) simultaneously rather than relying on `call_tool` to chain them. -- You only have three tools available directly: `tools_list`, `tools_info`, and `call_tool`. - - Use `tools_list` to discover which tools are available and their brief descriptions. - - Use `tools_info` to get the full JSON schema of a specific tool before calling it. - - Use `call_tool` to execute any tool by name with the appropriate arguments. -- Always look up a tool's schema via `tools_info` before calling it with `call_tool` to ensure you pass the correct arguments. +- Tools are called via `call_tool(tool_name, arguments)`. Common tools: + - `read`: `{"path": "..."}` optional `start_line`, `end_line` + - `shell`: `{"command": "..."}` + - `fs_search`: `{"pattern": "..."}` optional `path` + - `write`: `{"path": "...", "content": "..."}` + - `patch`: `{"path": "...", "diff": "..."}` +- These schemas are complete — skip `tools_info` for them. Use `tools_info` only for unfamiliar tools. +- Batch independent `call_tool` calls in parallel. Use `tools_list` only to discover unknown tools. - NEVER ever refer to tool names when speaking to the USER even when user has asked for it. For example, instead of saying 'I need to use the edit_file tool to edit your file', just say 'I will edit your file'. - If you need to read a file, prefer to read larger sections of the file at once over multiple smaller calls. diff --git a/templates/forge-custom-agent-template.md b/templates/forge-custom-agent-template.md index 47f68eaf..d7454173 100644 --- a/templates/forge-custom-agent-template.md +++ b/templates/forge-custom-agent-template.md @@ -17,12 +17,14 @@ - You can use one tool per message, and will receive the result of that tool use in the user's response. - You use tools step-by-step to accomplish a given task, with each tool use informed by the result of the previous tool use. {{else}} -- For maximum efficiency, whenever you need to perform multiple independent operations, invoke all relevant tools (for eg: `patch`, `read`) simultaneously rather than relying on `call_tool` to chain them. -- You only have three tools available directly: `tools_list`, `tools_info`, and `call_tool`. - - Use `tools_list` to discover which tools are available and their brief descriptions. - - Use `tools_info` to get the full JSON schema of a specific tool before calling it. - - Use `call_tool` to execute any tool by name with the appropriate arguments. -- Always look up a tool's schema via `tools_info` before calling it with `call_tool` to ensure you pass the correct arguments. +- Tools are called via `call_tool(tool_name, arguments)`. Common tools: + - `read`: `{"path": "..."}` optional `start_line`, `end_line` + - `shell`: `{"command": "..."}` + - `fs_search`: `{"pattern": "..."}` optional `path` + - `write`: `{"path": "...", "content": "..."}` + - `patch`: `{"path": "...", "diff": "..."}` +- These schemas are complete — skip `tools_info` for them. Use `tools_info` only for unfamiliar tools. +- Batch independent `call_tool` calls in parallel. Use `tools_list` only to discover unknown tools. {{/if}} - NEVER ever refer to tool names when speaking to the USER even when user has asked for it. For example, instead of saying 'I need to use the edit_file tool to edit your file', just say 'I will edit your file'. - If you need to read a file, prefer to read larger sections of the file at once over multiple smaller calls. From 27938ddd84688d57bfbc88071aff47f035062a27 Mon Sep 17 00:00:00 2001 From: Rach Pradhan <54503978+justrach@users.noreply.github.com> Date: Tue, 26 May 2026 01:51:24 +0800 Subject: [PATCH 10/12] fix(update): skip update check for 0.1.5 dev builds Builds from source have version 0.1.5 (from workspace Cargo.toml) which doesn't match any GitHub release tag. The update_informer check was hitting the GitHub API and producing a curl 404 on every launch. Skip the check for 0.1.5 like we already do for 0.1.0. Co-Authored-By: Claude Opus 4.7 (1M context) --- crates/forge_main/src/update.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/forge_main/src/update.rs b/crates/forge_main/src/update.rs index 2e94330c..015590d2 100644 --- a/crates/forge_main/src/update.rs +++ b/crates/forge_main/src/update.rs @@ -83,7 +83,7 @@ pub async fn on_update(api: Arc, update: Option<&Update>) { // Check if version is development version, in which case we skip the update // check - if VERSION.contains("dev") || VERSION == "0.1.0" { + if VERSION.contains("dev") || VERSION == "0.1.0" || VERSION == "0.1.5" { // Skip update for development version 0.1.0 return; } From 9ebe7cfe3cf72c66fa7a26b4a3a725ace626fd9d Mon Sep 17 00:00:00 2001 From: Rach Pradhan <54503978+justrach@users.noreply.github.com> Date: Tue, 26 May 2026 01:58:20 +0800 Subject: [PATCH 11/12] chore: bump workspace version to 0.2.11 Co-Authored-By: Claude Opus 4.7 (1M context) --- Cargo.lock | 56 ++++++++++++++++++--------------- Cargo.toml | 2 +- crates/forge_main/src/update.rs | 2 +- 3 files changed, 32 insertions(+), 28 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index be7d2cba..26a72cb3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1028,7 +1028,7 @@ checksum = "3f88a43d011fc4a6876cb7344703e297c71dda42494fee094d5f7c76bf13f746" [[package]] name = "codegraff" -version = "0.1.5" +version = "0.2.11" dependencies = [ "anyhow", "arboard", @@ -2271,7 +2271,7 @@ checksum = "00b0228411908ca8685dba7fc2cdd70ec9990a6e753e89b6ac91a84c40fbaf4b" [[package]] name = "forge_api" -version = "0.1.5" +version = "0.2.11" dependencies = [ "anyhow", "async-trait", @@ -2285,12 +2285,14 @@ dependencies = [ "futures", "serde_json", "tokio", + "tokio-stream", + "tracing", "url", ] [[package]] name = "forge_app" -version = "0.1.5" +version = "0.2.11" dependencies = [ "anyhow", "async-recursion", @@ -2342,7 +2344,7 @@ dependencies = [ [[package]] name = "forge_ci" -version = "0.1.5" +version = "0.2.11" dependencies = [ "derive_setters", "gh-workflow", @@ -2353,7 +2355,7 @@ dependencies = [ [[package]] name = "forge_config" -version = "0.1.5" +version = "0.2.11" dependencies = [ "anyhow", "config", @@ -2376,7 +2378,7 @@ dependencies = [ [[package]] name = "forge_display" -version = "0.1.5" +version = "0.2.11" dependencies = [ "console", "derive_setters", @@ -2393,7 +2395,7 @@ dependencies = [ [[package]] name = "forge_domain" -version = "0.1.5" +version = "0.2.11" dependencies = [ "anyhow", "async-trait", @@ -2435,7 +2437,7 @@ dependencies = [ [[package]] name = "forge_embed" -version = "0.1.5" +version = "0.2.11" dependencies = [ "anyhow", "handlebars", @@ -2444,7 +2446,7 @@ dependencies = [ [[package]] name = "forge_eventsource" -version = "0.1.5" +version = "0.2.11" dependencies = [ "forge_eventsource_stream", "futures", @@ -2463,7 +2465,7 @@ dependencies = [ [[package]] name = "forge_eventsource_stream" -version = "0.1.5" +version = "0.2.11" dependencies = [ "futures", "futures-core", @@ -2477,7 +2479,7 @@ dependencies = [ [[package]] name = "forge_fs" -version = "0.1.5" +version = "0.2.11" dependencies = [ "anyhow", "bstr", @@ -2493,7 +2495,7 @@ dependencies = [ [[package]] name = "forge_infra" -version = "0.1.5" +version = "0.2.11" dependencies = [ "anyhow", "async-trait", @@ -2544,7 +2546,7 @@ dependencies = [ [[package]] name = "forge_json_repair" -version = "0.1.5" +version = "0.2.11" dependencies = [ "pretty_assertions", "regex", @@ -2557,7 +2559,7 @@ dependencies = [ [[package]] name = "forge_main" -version = "0.1.5" +version = "0.2.11" dependencies = [ "anyhow", "arboard", @@ -2624,7 +2626,7 @@ dependencies = [ [[package]] name = "forge_markdown_stream" -version = "0.1.5" +version = "0.2.11" dependencies = [ "colored", "insta", @@ -2642,7 +2644,7 @@ dependencies = [ [[package]] name = "forge_repo" -version = "0.1.5" +version = "0.2.11" dependencies = [ "anyhow", "async-openai", @@ -2679,6 +2681,7 @@ dependencies = [ "google-cloud-auth", "gray_matter", "handlebars", + "hex", "http 1.4.0", "insta", "lazy_static", @@ -2709,7 +2712,7 @@ dependencies = [ [[package]] name = "forge_select" -version = "0.1.5" +version = "0.2.11" dependencies = [ "anyhow", "colored", @@ -2722,7 +2725,7 @@ dependencies = [ [[package]] name = "forge_services" -version = "0.1.5" +version = "0.2.11" dependencies = [ "anyhow", "async-recursion", @@ -2781,7 +2784,7 @@ dependencies = [ [[package]] name = "forge_snaps" -version = "0.1.5" +version = "0.2.11" dependencies = [ "anyhow", "chrono", @@ -2796,7 +2799,7 @@ dependencies = [ [[package]] name = "forge_spinner" -version = "0.1.5" +version = "0.2.11" dependencies = [ "anyhow", "colored", @@ -2812,7 +2815,7 @@ dependencies = [ [[package]] name = "forge_stream" -version = "0.1.5" +version = "0.2.11" dependencies = [ "futures", "tokio", @@ -2820,7 +2823,7 @@ dependencies = [ [[package]] name = "forge_template" -version = "0.1.5" +version = "0.2.11" dependencies = [ "html-escape", "pretty_assertions", @@ -2828,7 +2831,7 @@ dependencies = [ [[package]] name = "forge_test_kit" -version = "0.1.5" +version = "0.2.11" dependencies = [ "serde", "serde_json", @@ -2837,7 +2840,7 @@ dependencies = [ [[package]] name = "forge_tool_macros" -version = "0.1.5" +version = "0.2.11" dependencies = [ "proc-macro2", "quote", @@ -2846,7 +2849,7 @@ dependencies = [ [[package]] name = "forge_tracker" -version = "0.1.5" +version = "0.2.11" dependencies = [ "anyhow", "async-trait", @@ -2877,7 +2880,7 @@ dependencies = [ [[package]] name = "forge_walker" -version = "0.1.5" +version = "0.2.11" dependencies = [ "anyhow", "derive_setters", @@ -8562,6 +8565,7 @@ dependencies = [ "futures-core", "pin-project-lite", "tokio", + "tokio-util", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index 1152cc14..0d621f0f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -4,7 +4,7 @@ resolver = "2" [workspace.package] -version = "0.1.5" +version = "0.2.11" rust-version = "1.92" edition = "2024" diff --git a/crates/forge_main/src/update.rs b/crates/forge_main/src/update.rs index 015590d2..f34c0b8b 100644 --- a/crates/forge_main/src/update.rs +++ b/crates/forge_main/src/update.rs @@ -83,7 +83,7 @@ pub async fn on_update(api: Arc, update: Option<&Update>) { // Check if version is development version, in which case we skip the update // check - if VERSION.contains("dev") || VERSION == "0.1.0" || VERSION == "0.1.5" { + if VERSION.contains("dev") || VERSION == "0.1.0" || VERSION == "0.1.5" || VERSION == "0.2.11" { // Skip update for development version 0.1.0 return; } From 69cf1db21f86b46b20c9987d0d341fc75e9a7595 Mon Sep 17 00:00:00 2001 From: Rach Pradhan <54503978+justrach@users.noreply.github.com> Date: Tue, 26 May 2026 02:16:22 +0800 Subject: [PATCH 12/12] feat: cherry-pick 3 upstream improvements from forgecode Cherry-picked from tailcallhq/forgecode (adapted for our branch): 1. **Tool call argument validation** (from PR #3356) - Adds `parse_json()` to `ToolCallArguments` that validates JSON upfront instead of silently wrapping malformed input - Malformed args now surface as retryable errors 2. **Live context token counter** (from PR #3351) - Emits "Context ~45.2k / 900.0k" after each orchestrator turn - Adds `emit_context_usage()` and `humanize()` helpers to orch.rs 3. **Multi-signal auto-continue** (from PR #3357) - 5-signal confidence scoring detects when model stopped mid-task - Auto-resumes up to 3 times when confidence >= 60 - Fixes "stuck agent" problem with models that return stop mid-task Skipped unrelated bundled changes (pool.rs WAL hardening, fs_patch rewrite) that were scope creep in the upstream PRs. Co-Authored-By: Claude Opus 4.7 (1M context) --- crates/forge_app/src/orch.rs | 124 +++++- crates/forge_domain/src/agent.rs | 26 ++ crates/forge_domain/src/auto_continue.rs | 400 ++++++++++++++++++ crates/forge_domain/src/lib.rs | 2 + crates/forge_domain/src/result_stream_ext.rs | 20 +- crates/forge_domain/src/tools/call/args.rs | 58 +++ .../forge_domain/src/tools/call/tool_call.rs | 24 +- 7 files changed, 627 insertions(+), 27 deletions(-) create mode 100644 crates/forge_domain/src/auto_continue.rs diff --git a/crates/forge_app/src/orch.rs b/crates/forge_app/src/orch.rs index 8a395275..ba317511 100644 --- a/crates/forge_app/src/orch.rs +++ b/crates/forge_app/src/orch.rs @@ -28,6 +28,8 @@ pub struct Orchestrator { error_tracker: ToolErrorTracker, hook: Arc, config: forge_config::ForgeConfig, + auto_continue_analyzer: AutoContinueAnalyzer, + auto_continue_count: usize, /// Optional sidecar that records per-tool-call trajectory rows for /// debugging. None disables tracing entirely (zero cost). Wired in by /// the caller when `CODEGRAFF_TRACE=1`. @@ -51,6 +53,8 @@ impl> Orc models: Default::default(), error_tracker: Default::default(), hook: Arc::new(Hook::default()), + auto_continue_analyzer: AutoContinueAnalyzer::new(AutoContinueConfig::default()), + auto_continue_count: 0, trajectory_recorder: None, } } @@ -60,6 +64,37 @@ impl> Orc &self.conversation } + /// Computes the ratio of tool-result messages to assistant messages in the + /// most recent N entries of the context. Used as one signal for the + /// auto-continue analyzer. + fn calculate_recent_tool_call_ratio(&self, context: &Context) -> f64 { + const LOOKBACK_ENTRIES: usize = 10; + + let messages = &context.messages; + let recent = messages.iter().rev().take(LOOKBACK_ENTRIES); + + let mut tool_count = 0usize; + let mut assistant_count = 0usize; + + for entry in recent { + match &entry.message { + ContextMessage::Text(text_msg) if text_msg.role == Role::Assistant => { + assistant_count += 1; + } + ContextMessage::Tool(_) => { + tool_count += 1; + } + _ => {} + } + } + + if assistant_count == 0 { + return 0.0; + } + + tool_count as f64 / assistant_count as f64 + } + // Helper function to get all tool results from a vector of tool calls #[async_recursion] async fn execute_tool_calls( @@ -454,10 +489,52 @@ impl> Orc .handle(&response_event, &mut self.conversation) .await?; - // Turn is completed, if finish_reason is 'stop'. Gemini models return stop as - // finish reason with tool calls. - is_complete = - message.finish_reason == Some(FinishReason::Stop) && message.tool_calls.is_empty(); + // Auto-continue analysis: detect when the model stopped mid-task + // and decide whether to auto-resume. + let last_event_was_tool_result = context + .messages + .last() + .map(|e| matches!(&e.message, ContextMessage::Tool(_))) + .unwrap_or(false); + + let recent_tool_call_ratio = self.calculate_recent_tool_call_ratio(&context); + + let analysis_content = if message.content.is_empty() { + None + } else { + Some(message.content.clone()) + }; + + let decision = self.auto_continue_analyzer.analyze( + &analysis_content, + &message.finish_reason, + last_event_was_tool_result, + recent_tool_call_ratio, + ); + + if decision.should_continue + && self.auto_continue_count + < self.auto_continue_analyzer.config.max_auto_continues + { + tracing::warn!( + confidence = decision.confidence, + auto_continue_count = self.auto_continue_count, + "Auto-continue triggered: {}", + decision.reason + ); + + self.auto_continue_count += 1; + + is_complete = false; + } else { + // Turn is completed, if finish_reason is 'stop'. Gemini models + // return stop as finish reason with tool calls. + is_complete = message.finish_reason == Some(FinishReason::Stop) + && message.tool_calls.is_empty(); + if is_complete { + self.auto_continue_count = 0; + } + } // Should yield if a tool is asking for a follow-up should_yield = is_complete @@ -542,6 +619,11 @@ impl> Orc self.services.update(self.conversation.clone()).await?; request_count += 1; + // Emit live token usage after context is fully updated (LLM + // response + tool results folded in) so the UI shows the + // current context size before the next LLM call. + self.emit_context_usage(&context).await?; + // Update metrics in conversation tool_context.with_metrics(|metrics| { self.conversation.metrics = metrics.clone(); @@ -603,6 +685,40 @@ impl> Orc Ok(()) } + /// Emits a debug-style context usage line to the UI showing current + /// token count, cached tokens, and compaction threshold. + async fn emit_context_usage(&self, context: &Context) -> anyhow::Result<()> { + let token_count = context.token_count(); + let threshold_info = self + .agent + .compact + .token_threshold + .map(|t| format!(" / {}", Self::humanize(t))) + .unwrap_or_default(); + let prefix = match token_count { + TokenCount::Approx(_) => "~", + TokenCount::Actual(_) => "", + }; + self.send( + TitleFormat::debug(format!( + "Context {}{}{threshold_info}", + prefix, + Self::humanize(*token_count), + )) + .into(), + ) + .await + } + + /// Formats a token count into a human-readable string (e.g. 1.5k, 2.3M) + fn humanize(n: usize) -> String { + match n { + n if n >= 1_000_000 => format!("{:.1}M", n as f64 / 1_000_000.0), + n if n >= 1_000 => format!("{:.1}k", n as f64 / 1_000.0), + _ => n.to_string(), + } + } + fn get_model(&self) -> ModelId { self.agent.model.clone() } diff --git a/crates/forge_domain/src/agent.rs b/crates/forge_domain/src/agent.rs index 8cc5cbd2..60ec00e4 100644 --- a/crates/forge_domain/src/agent.rs +++ b/crates/forge_domain/src/agent.rs @@ -520,6 +520,32 @@ mod tests { ); } + #[test] + fn test_compaction_threshold_claude_opus_4_6_with_1m_context_window() { + // Simulates the user config: token_threshold = 1_000_000, + // token_threshold_percentage = 0.9 + // with claude-opus-4-6 which has context_length = 1_000_000 + let fixture = Agent::new( + AgentId::new("forge"), + ProviderId::OPENAI, + ModelId::new("claude-opus-4-6"), + ) + .compact( + Compact::new() + .token_threshold(1_000_000_usize) + .token_threshold_percentage(0.9_f64), + ); + + let selected_model = model_fixture("claude-opus-4-6", Some(1_000_000)); + + let actual = fixture.compaction_threshold(Some(&selected_model)); + // min(1_000_000, 1_000_000 * 0.9) = 900_000 + let expected = Some(900_000); + + assert_eq!(actual.compact.token_threshold, expected); + } + + /// BUG 3: Agent with no compact config and no model info should still work, /// but currently compaction_threshold does nothing and context grows /// unbounded. diff --git a/crates/forge_domain/src/auto_continue.rs b/crates/forge_domain/src/auto_continue.rs new file mode 100644 index 00000000..6165183b --- /dev/null +++ b/crates/forge_domain/src/auto_continue.rs @@ -0,0 +1,400 @@ +use crate::FinishReason; + +#[derive(Debug, Clone)] +pub struct AutoContinueConfig { + pub confidence_threshold: u8, + pub max_auto_continues: usize, + pub intent_phrases: Vec, + pub completion_phrases: Vec, + pub summary_phrases: Vec, +} + +impl Default for AutoContinueConfig { + fn default() -> Self { + Self { + confidence_threshold: 60, + max_auto_continues: 3, + intent_phrases: vec![ + "let me continue".into(), + "i'll continue".into(), + "i will continue".into(), + "now i'll".into(), + "next, i'll".into(), + "moving on to".into(), + "proceed with".into(), + "now let me".into(), + "i'll now".into(), + "next step".into(), + "continuing with".into(), + "now i need to".into(), + "i still need to".into(), + "remaining steps".into(), + ], + completion_phrases: vec![ + "task is complete".into(), + "i'm done".into(), + "all changes have been made".into(), + "the implementation is complete".into(), + "finished implementing".into(), + "all files have been".into(), + "everything is now".into(), + "successfully completed".into(), + "i have completed".into(), + "the bug is fixed".into(), + ], + summary_phrases: vec![ + "to summarize".into(), + "in summary".into(), + "here's a summary".into(), + "let me summarize".into(), + "overview of changes".into(), + "here's what was done".into(), + "recap of".into(), + "let me know if".into(), + "when you're ready".into(), + "waiting for your".into(), + "please review".into(), + "let me know if you'd like".into(), + ], + } + } +} + +#[derive(Debug, Clone)] +pub struct AutoContinueDecision { + pub should_continue: bool, + pub confidence: u8, + pub reason: String, + pub signals: Vec, +} + +#[derive(Debug, Clone)] +pub struct SignalResult { + pub name: String, + pub triggered: bool, + pub score: u8, + pub detail: String, +} + +#[derive(Clone)] +pub struct AutoContinueAnalyzer { + pub config: AutoContinueConfig, +} + +impl AutoContinueAnalyzer { + pub fn new(config: AutoContinueConfig) -> Self { + Self { config } + } + + pub fn analyze( + &self, + content: &Option, + finish_reason: &Option, + last_event_was_tool_result: bool, + recent_tool_call_ratio: f64, + ) -> AutoContinueDecision { + let mut signals = Vec::new(); + let mut total_score: u8 = 0; + + let s1 = self.analyze_finish_reason(finish_reason); + total_score += s1.score; + signals.push(s1); + + let s2 = self.analyze_last_event(last_event_was_tool_result); + total_score += s2.score; + signals.push(s2); + + let s3 = self.analyze_content_intent(content); + total_score += s3.score; + signals.push(s3); + + let s4 = self.analyze_summary_language(content); + total_score += s4.score; + signals.push(s4); + + let s5 = self.analyze_tool_call_ratio(recent_tool_call_ratio); + total_score += s5.score; + signals.push(s5); + + let should_continue = total_score >= self.config.confidence_threshold; + + let triggered_signals: Vec<&str> = signals + .iter() + .filter(|s| s.triggered) + .map(|s| s.name.as_str()) + .collect(); + + let reason = if should_continue { + format!( + "Auto-continue: score {} >= threshold {} (signals: {})", + total_score, + self.config.confidence_threshold, + triggered_signals.join(" + ") + ) + } else { + format!( + "Finish turn: score {} < threshold {}", + total_score, self.config.confidence_threshold + ) + }; + + AutoContinueDecision { + should_continue, + confidence: total_score, + reason, + signals, + } + } + + fn analyze_finish_reason(&self, finish_reason: &Option) -> SignalResult { + match finish_reason { + Some(FinishReason::ToolCalls) => SignalResult { + name: "finish_reason=tool_calls".into(), + triggered: true, + score: 30, + detail: "Model indicated tool use but didn't provide tool_calls".into(), + }, + Some(FinishReason::Stop) => SignalResult { + name: "finish_reason=stop".into(), + triggered: false, + score: 0, + detail: "Model explicitly stopped".into(), + }, + Some(FinishReason::ContentFilter) => SignalResult { + name: "finish_reason=content_filter".into(), + triggered: false, + score: 0, + detail: "Content was filtered".into(), + }, + Some(FinishReason::Length) => SignalResult { + name: "finish_reason=length".into(), + triggered: false, + score: 0, + detail: "Response hit length limit".into(), + }, + None => SignalResult { + name: "finish_reason=none".into(), + triggered: false, + score: 15, + detail: "No finish_reason provided".into(), + }, + } + } + + fn analyze_last_event(&self, last_was_tool_result: bool) -> SignalResult { + if last_was_tool_result { + SignalResult { + name: "last_event=ToolResult".into(), + triggered: true, + score: 25, + detail: "Last event was a tool result - model should continue".into(), + } + } else { + SignalResult { + name: "last_event=not_tool_result".into(), + triggered: false, + score: 0, + detail: "Last event was not a tool result".into(), + } + } + } + + fn analyze_content_intent(&self, content: &Option) -> SignalResult { + let Some(content) = content else { + return SignalResult { + name: "content_intent".into(), + triggered: false, + score: 0, + detail: "No content to analyze".into(), + }; + }; + + let lower = content.to_lowercase(); + + let has_completion = self.config.completion_phrases.iter() + .any(|phrase| lower.contains(&phrase.to_lowercase())); + + if has_completion { + return SignalResult { + name: "content_intent".into(), + triggered: false, + score: 0, + detail: "Content contains COMPLETION phrases - task likely done".into(), + }; + } + + let has_intent = self.config.intent_phrases.iter() + .any(|phrase| lower.contains(&phrase.to_lowercase())); + + if has_intent { + SignalResult { + name: "content_intent".into(), + triggered: true, + score: 25, + detail: "Content contains intent phrases - model wants to continue".into(), + } + } else { + SignalResult { + name: "content_intent".into(), + triggered: false, + score: 0, + detail: "No intent or completion phrases found".into(), + } + } + } + + fn analyze_summary_language(&self, content: &Option) -> SignalResult { + let Some(content) = content else { + return SignalResult { + name: "no_summary_language".into(), + triggered: true, + score: 10, + detail: "No content - no summary language".into(), + }; + }; + + let lower = content.to_lowercase(); + let has_summary = self.config.summary_phrases.iter() + .any(|phrase| lower.contains(&phrase.to_lowercase())); + + if has_summary { + SignalResult { + name: "no_summary_language".into(), + triggered: false, + score: 0, + detail: "Content contains SUMMARY phrases - model is summarizing".into(), + } + } else { + SignalResult { + name: "no_summary_language".into(), + triggered: true, + score: 10, + detail: "No summary language detected".into(), + } + } + } + + fn analyze_tool_call_ratio(&self, ratio: f64) -> SignalResult { + if ratio > 0.5 { + SignalResult { + name: "tool_call_ratio".into(), + triggered: true, + score: 10, + detail: format!("High tool call ratio ({:.0}%) - likely in multi-step task", ratio * 100.0), + } + } else { + SignalResult { + name: "tool_call_ratio".into(), + triggered: false, + score: 0, + detail: format!("Low tool call ratio ({:.0}%) - likely not in multi-step task", ratio * 100.0), + } + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn analyzer() -> AutoContinueAnalyzer { + AutoContinueAnalyzer::new(AutoContinueConfig::default()) + } + + #[test] + fn test_model_says_continue_after_tool_result() { + let analyzer = analyzer(); + let decision = analyzer.analyze( + &Some("I've written the file. Let me continue with the next step.".into()), + &Some(FinishReason::Stop), + true, + 0.75, + ); + assert!(decision.should_continue, "Should auto-continue: {}", decision.reason); + assert!(decision.confidence >= 60); + } + + #[test] + fn test_finish_reason_tool_calls_but_empty() { + let analyzer = analyzer(); + let decision = analyzer.analyze( + &None, + &Some(FinishReason::ToolCalls), + true, + 0.80, + ); + assert!(decision.should_continue, "Should auto-continue: {}", decision.reason); + } + + #[test] + fn test_no_finish_reason_after_tool_result() { + let analyzer = analyzer(); + let decision = analyzer.analyze( + &Some("Now I need to fix the imports.".into()), + &None, + true, + 0.60, + ); + assert!(decision.should_continue, "Should auto-continue: {}", decision.reason); + } + + #[test] + fn test_task_complete_with_summary() { + let analyzer = analyzer(); + let decision = analyzer.analyze( + &Some("The implementation is complete. To summarize, I fixed the bug by...".into()), + &Some(FinishReason::Stop), + true, + 0.50, + ); + assert!(!decision.should_continue, "Should NOT auto-continue: {}", decision.reason); + } + + #[test] + fn test_model_waiting_for_user() { + let analyzer = analyzer(); + let decision = analyzer.analyze( + &Some("Let me know if you'd like me to make any changes.".into()), + &Some(FinishReason::Stop), + false, + 0.30, + ); + assert!(!decision.should_continue, "Should NOT auto-continue: {}", decision.reason); + } + + #[test] + fn test_bug_fixed_summary() { + let analyzer = analyzer(); + let decision = analyzer.analyze( + &Some("The bug is fixed. All changes have been made successfully.".into()), + &Some(FinishReason::Stop), + true, + 0.60, + ); + assert!(!decision.should_continue, "Should NOT auto-continue: {}", decision.reason); + } + + #[test] + fn test_empty_content_after_tool_result() { + let analyzer = analyzer(); + let decision = analyzer.analyze( + &None, + &Some(FinishReason::Stop), + true, + 0.50, + ); + assert!(!decision.should_continue); + } + + #[test] + fn test_intent_but_no_tool_history() { + let analyzer = analyzer(); + let decision = analyzer.analyze( + &Some("Let me continue with the implementation.".into()), + &Some(FinishReason::Stop), + false, + 0.20, + ); + assert!(!decision.should_continue); + } +} diff --git a/crates/forge_domain/src/lib.rs b/crates/forge_domain/src/lib.rs index c57aad3e..e79a115f 100644 --- a/crates/forge_domain/src/lib.rs +++ b/crates/forge_domain/src/lib.rs @@ -1,5 +1,6 @@ mod agent; mod attachment; +mod auto_continue; mod auth; mod chat_request; mod chat_response; @@ -59,6 +60,7 @@ mod workspace; mod xml; pub use agent::*; +pub use auto_continue::*; pub use attachment::*; pub use chat_request::*; pub use chat_response::*; diff --git a/crates/forge_domain/src/result_stream_ext.rs b/crates/forge_domain/src/result_stream_ext.rs index 9250ff0a..d919fba7 100644 --- a/crates/forge_domain/src/result_stream_ext.rs +++ b/crates/forge_domain/src/result_stream_ext.rs @@ -288,7 +288,7 @@ mod tests { use super::*; use crate::{ - BoxStream, Content, FinishReason, TokenCount, ToolCall, ToolCallArguments, ToolCallId, + BoxStream, Content, FinishReason, TokenCount, ToolCall, ToolCallId, ToolName, }; @@ -765,16 +765,14 @@ mod tests { // Actual: Convert stream to full message let actual = result_stream.into_full(false).await; - // Expected: Should not fail with invalid tool calls - assert!(actual.is_ok()); - let actual = actual.unwrap(); - let expected = ToolCallFull { - name: ToolName::new("test_tool"), - call_id: Some(ToolCallId::new("call_123")), - arguments: ToolCallArguments::from_json("invalid json {"), - thought_signature: None, - }; - assert_eq!(actual.tool_calls[0], expected); + // Expected: Invalid JSON should now produce a retryable error + assert!(actual.is_err()); + let err = actual.unwrap_err(); + assert!( + err.downcast_ref::() + .is_some_and(|e| matches!(e, crate::Error::Retryable(_))), + "Expected Retryable error, got: {err:?}" + ); } #[tokio::test] diff --git a/crates/forge_domain/src/tools/call/args.rs b/crates/forge_domain/src/tools/call/args.rs index 80b47531..61d625a6 100644 --- a/crates/forge_domain/src/tools/call/args.rs +++ b/crates/forge_domain/src/tools/call/args.rs @@ -120,6 +120,19 @@ impl ToolCallArguments { ToolCallArguments::Unparsed(str.to_string()) } + pub fn parse_json(str: &str) -> Result { + if str.is_empty() { + return Ok(ToolCallArguments::default()); + } + + serde_json::from_str::(str) + .map(ToolCallArguments::Parsed) + .map_err(|error| Error::ToolCallArgument { + error: error.into(), + args: str.to_string(), + }) + } + pub fn from_parameters(object: BTreeMap) -> ToolCallArguments { let mut map = Map::new(); @@ -499,4 +512,49 @@ mod tests { let expected = r#"{"param":"value"}"#; assert_eq!(actual, expected); } + + #[test] + fn test_parse_json_valid_json() { + let result = ToolCallArguments::parse_json(r#"{"path": "test.rs", "content": "hello"}"#); + assert!(result.is_ok(), "Valid JSON should parse successfully"); + match result { + Ok(ToolCallArguments::Parsed(value)) => { + assert_eq!(value["path"], "test.rs"); + assert_eq!(value["content"], "hello"); + } + _ => panic!("Should be Parsed"), + } + } + + #[test] + fn test_parse_json_empty_string() { + let result = ToolCallArguments::parse_json(""); + assert!(result.is_ok(), "Empty string should return default"); + match result { + Ok(ToolCallArguments::Parsed(value)) => { + assert!(value.is_object(), "Empty should be empty object"); + assert!(value.as_object().unwrap().is_empty()); + } + _ => panic!("Should be Parsed"), + } + } + + #[test] + fn test_parse_json_malformed_json_returns_error() { + let result = ToolCallArguments::parse_json(r#"{invalid json here}"#); + assert!(result.is_err(), "Malformed JSON should return error"); + } + + #[test] + fn test_parse_json_minimax_example() { + let result = ToolCallArguments::parse_json(r#"{"query": "isExternal|internal", "file": "test.rs"}"#); + assert!(result.is_ok(), "Valid JSON with pipe character should parse"); + match result { + Ok(ToolCallArguments::Parsed(value)) => { + assert_eq!(value["query"], "isExternal|internal"); + assert_eq!(value["file"], "test.rs"); + } + _ => panic!("Should be Parsed"), + } + } } diff --git a/crates/forge_domain/src/tools/call/tool_call.rs b/crates/forge_domain/src/tools/call/tool_call.rs index 317906e6..8a961e53 100644 --- a/crates/forge_domain/src/tools/call/tool_call.rs +++ b/crates/forge_domain/src/tools/call/tool_call.rs @@ -148,7 +148,7 @@ impl ToolCallFull { let arguments = if current_arguments.is_empty() { ToolCallArguments::default() } else { - ToolCallArguments::from_json(current_arguments.as_str()) + ToolCallArguments::parse_json(current_arguments.as_str())? }; tool_calls.push(ToolCallFull { @@ -188,7 +188,7 @@ impl ToolCallFull { let arguments = if current_arguments.is_empty() { ToolCallArguments::default() } else { - ToolCallArguments::from_json(current_arguments.as_str()) + ToolCallArguments::parse_json(current_arguments.as_str())? }; tool_calls.push(ToolCallFull { @@ -384,23 +384,23 @@ mod tests { ToolCallFull { name: ToolName::new("read"), call_id: Some(ToolCallId("call_1".to_string())), - arguments: ToolCallArguments::from_json( + arguments: ToolCallArguments::parse_json( r#"{"path": "crates/forge_services/src/fixtures/mascot.md"}"#, - ), + ).unwrap(), thought_signature: None, }, ToolCallFull { name: ToolName::new("read"), call_id: Some(ToolCallId("call_2".to_string())), - arguments: ToolCallArguments::from_json(r#"{"path": "docs/onboarding.md"}"#), + arguments: ToolCallArguments::parse_json(r#"{"path": "docs/onboarding.md"}"#).unwrap(), thought_signature: None, }, ToolCallFull { name: ToolName::new("read"), call_id: Some(ToolCallId("call_3".to_string())), - arguments: ToolCallArguments::from_json( + arguments: ToolCallArguments::parse_json( r#"{"path": "crates/forge_services/src/service/service.md"}"#, - ), + ).unwrap(), thought_signature: None, }, ]; @@ -522,7 +522,7 @@ mod tests { let expected = vec![ToolCallFull { call_id: Some(ToolCallId("call_1".to_string())), name: ToolName::new("read"), - arguments: ToolCallArguments::from_json(r#"{"path": "docs/onboarding.md"}"#), + arguments: ToolCallArguments::parse_json(r#"{"path": "docs/onboarding.md"}"#).unwrap(), thought_signature: None, }]; @@ -604,7 +604,7 @@ mod tests { let expected = vec![ToolCallFull { call_id: Some(ToolCallId("0".to_string())), name: ToolName::new("read"), - arguments: ToolCallArguments::from_json(r#"{"path": "/test/file.md"}"#), + arguments: ToolCallArguments::parse_json(r#"{"path": "/test/file.md"}"#).unwrap(), thought_signature: None, }]; @@ -712,7 +712,7 @@ mod tests { let expected = vec![ToolCallFull { call_id: Some(ToolCallId("call_1".to_string())), name: ToolName::new("shell"), - arguments: ToolCallArguments::from_json(r#"{"command": "date"}"#), + arguments: ToolCallArguments::parse_json(r#"{"command": "date"}"#).unwrap(), thought_signature: Some("signature_abc123".to_string()), }]; @@ -742,13 +742,13 @@ mod tests { ToolCallFull { call_id: Some(ToolCallId("call_1".to_string())), name: ToolName::new("read"), - arguments: ToolCallArguments::from_json(r#"{"path": "file1.txt"}"#), + arguments: ToolCallArguments::parse_json(r#"{"path": "file1.txt"}"#).unwrap(), thought_signature: Some("sig_1".to_string()), }, ToolCallFull { call_id: Some(ToolCallId("call_2".to_string())), name: ToolName::new("read"), - arguments: ToolCallArguments::from_json(r#"{"path": "file2.txt"}"#), + arguments: ToolCallArguments::parse_json(r#"{"path": "file2.txt"}"#).unwrap(), thought_signature: Some("sig_2".to_string()), }, ];