From 1d1c4e1142cf902f3ae7cb53be48c7268aef79af Mon Sep 17 00:00:00 2001 From: Tomasz Kalinowski Date: Wed, 22 Apr 2026 11:22:40 -0400 Subject: [PATCH 1/5] feat: parse per-call sandbox metadata --- src/debug_repl.rs | 15 ++--- src/install.rs | 10 ++- src/main.rs | 55 +++++++++++++++- src/sandbox.rs | 161 ++++++++++++++++++++++++++++++--------------- src/sandbox_cli.rs | 157 +++++++++++++++++++++++++++++++++++++++---- 5 files changed, 318 insertions(+), 80 deletions(-) diff --git a/src/debug_repl.rs b/src/debug_repl.rs index e4f8d634..1c3339e0 100644 --- a/src/debug_repl.rs +++ b/src/debug_repl.rs @@ -12,7 +12,7 @@ use crate::sandbox_cli::SandboxCliPlan; use crate::server::response::{ ResponseState, TimeoutBundleReuse, text_stream_from_content, timeout_bundle_reuse_for_input, }; -use crate::worker_process::{WorkerError, WorkerManager}; +use crate::worker_process::{WorkerError, WorkerManager, WriteStdinOptions}; use crate::worker_protocol::{TextStream, WorkerContent, WorkerErrorCode, WorkerReply}; const DEFAULT_WRITE_STDIN_TIMEOUT: Duration = Duration::from_secs(60); @@ -49,6 +49,7 @@ pub(crate) fn run( } else { None }; + worker.bootstrap_local_inherited_sandbox_state()?; worker.warm_start()?; let reply = wait_for_initial_prompt(&mut worker, server_timeout)?; render_visible_reply( @@ -105,8 +106,7 @@ pub(crate) fn run( String::new(), DEFAULT_WRITE_STDIN_TIMEOUT, server_timeout, - None, - false, + WriteStdinOptions::default(), ); render_visible_reply( response.as_mut(), @@ -143,8 +143,7 @@ pub(crate) fn run( input, DEFAULT_WRITE_STDIN_TIMEOUT, server_timeout, - None, - false, + WriteStdinOptions::default(), ); render_visible_reply( response.as_mut(), @@ -176,8 +175,7 @@ fn wait_for_initial_prompt( String::new(), DEFAULT_WRITE_STDIN_TIMEOUT, server_timeout, - None, - false, + WriteStdinOptions::default(), )?; while !reply_has_prompt(&last_reply) && Instant::now() < deadline { thread::sleep(INITIAL_PROMPT_POLL_INTERVAL); @@ -185,8 +183,7 @@ fn wait_for_initial_prompt( String::new(), DEFAULT_WRITE_STDIN_TIMEOUT, server_timeout, - None, - false, + WriteStdinOptions::default(), )?; } Ok(last_reply) diff --git a/src/install.rs b/src/install.rs index 5dd14486..b666c6a6 100644 --- a/src/install.rs +++ b/src/install.rs @@ -11,7 +11,7 @@ use toml_edit::{Array, DocumentMut, Item, Table, value}; const CODEX_TOOL_TIMEOUT_SECS: i64 = 1_800; const CODEX_TOOL_TIMEOUT_COMMENT: &str = "\n# mcp-repl handles the primary timeout; this higher Codex timeout is only an outer guard.\n"; -const CODEX_SANDBOX_INHERIT_COMMENT: &str = "\n# --sandbox inherit: use sandbox policy updates sent by Codex for this session.\n# If no update is sent, mcp-repl exits with an error.\n"; +const CODEX_SANDBOX_INHERIT_COMMENT: &str = "\n# --sandbox inherit: use sandbox policy metadata sent by Codex on each tool call.\n# mcp-repl fails closed if the tool call omits or malforms that metadata.\n"; pub const DEFAULT_R_SERVER_NAME: &str = "r"; pub const DEFAULT_PYTHON_SERVER_NAME: &str = "python"; @@ -852,7 +852,9 @@ name="demo" let text = fs::read_to_string(config).expect("read config"); assert!( - text.contains("--sandbox inherit: use sandbox policy updates sent by Codex"), + text.contains( + "--sandbox inherit: use sandbox policy metadata sent by Codex on each tool call" + ), "expected inherit comment in codex config" ); } @@ -878,7 +880,9 @@ name="demo" let text = fs::read_to_string(config).expect("read config"); assert!( - !text.contains("--sandbox inherit: use sandbox policy updates sent by Codex"), + !text.contains( + "--sandbox inherit: use sandbox policy metadata sent by Codex on each tool call" + ), "inherit-only comment should be removed when inherit is no longer configured" ); } diff --git a/src/main.rs b/src/main.rs index 2971c50d..03da0225 100644 --- a/src/main.rs +++ b/src/main.rs @@ -461,7 +461,7 @@ mcp-repl install [--client ]... [--interpreter [,r|pytho --debug-dir: optional base directory for per-startup debug artifacts (env: MCP_REPL_DEBUG_DIR)\n\ --interpreter: choose REPL interpreter (default: r; env MCP_REPL_INTERPRETER)\n\ --oversized-output: choose oversized-output handling (pager: default legacy modal pager; files: spill oversized replies to files)\n\ ---sandbox: base sandbox mode (inherit requires client sandbox update)\n\ +--sandbox: base sandbox mode (inherit uses client tool-call metadata; --debug-repl bootstraps local defaults)\n\ --add-writable-root / --add-writeable-root: append absolute writable root in argument order\n\ --add-allowed-domain: append allowed domain pattern in argument order\n\ --config: apply advanced ordered sandbox/network override (Codex-compatible keys)\n\ @@ -486,7 +486,10 @@ If no --interpreter is specified, install uses the full interpreter grid for eac mod tests { use super::*; use crate::sandbox::{SandboxPolicy, SandboxState}; - use crate::sandbox_cli::{SandboxConfigOperation, resolve_effective_sandbox_state}; + use crate::sandbox_cli::{ + SandboxConfigOperation, resolve_effective_sandbox_state, + sandbox_plan_requests_inherited_state, + }; #[test] fn parse_backend_arg_accepts_interpreter_flag_forms() { @@ -689,6 +692,54 @@ mod tests { } } + #[test] + fn later_explicit_sandbox_mode_clears_inherit_requirement() { + let cli_override_plan = SandboxCliPlan { + operations: vec![ + SandboxCliOperation::SetMode(SandboxModeArg::Inherit), + SandboxCliOperation::SetMode(SandboxModeArg::WorkspaceWrite), + ], + }; + assert!( + !sandbox_plan_requests_inherited_state(&cli_override_plan), + "a later explicit --sandbox override should disable per-call inherit metadata" + ); + + let config_override_plan = SandboxCliPlan { + operations: vec![ + SandboxCliOperation::SetMode(SandboxModeArg::Inherit), + SandboxCliOperation::Config( + parse_sandbox_config_override("sandbox_mode=workspace-write") + .expect("config override"), + ), + ], + }; + assert!( + !sandbox_plan_requests_inherited_state(&config_override_plan), + "a later sandbox_mode override should disable per-call inherit metadata" + ); + } + + #[test] + fn later_explicit_sandbox_mode_still_validates_earlier_ordered_ops() { + let plan = SandboxCliPlan { + operations: vec![ + SandboxCliOperation::SetMode(SandboxModeArg::ReadOnly), + SandboxCliOperation::AddWritableRoot(std::env::temp_dir()), + SandboxCliOperation::SetMode(SandboxModeArg::WorkspaceWrite), + ], + }; + + let err = resolve_effective_sandbox_state(&plan, None) + .expect_err("earlier invalid ordered op should still fail"); + assert!( + err.contains( + "--add-writable-root can only be used while sandbox mode is workspace-write" + ), + "unexpected error: {err}" + ); + } + #[test] fn empty_plan_uses_inherited_state_when_available() { let plan = SandboxCliPlan::default(); diff --git a/src/sandbox.rs b/src/sandbox.rs index d5029b90..c1058a09 100644 --- a/src/sandbox.rs +++ b/src/sandbox.rs @@ -17,8 +17,7 @@ use url::Url; use serde::{Deserialize, Serialize}; use tempfile::Builder; -pub const SANDBOX_STATE_CAPABILITY: &str = "codex/sandbox-state"; -pub const SANDBOX_STATE_METHOD: &str = "codex/sandbox-state/update"; +pub const SANDBOX_STATE_META_CAPABILITY: &str = "codex/sandbox-state-meta"; pub const MANAGED_ALLOWED_DOMAINS_ENV_KEY: &str = "MCP_REPL_ALLOWED_DOMAINS"; pub const MANAGED_DENIED_DOMAINS_ENV_KEY: &str = "MCP_REPL_DENIED_DOMAINS"; #[cfg(target_os = "macos")] @@ -27,7 +26,6 @@ pub const CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR: &str = "CODEX_SANDBOX_NETWORK_ pub const R_SESSION_TMPDIR_ENV: &str = "MCP_REPL_R_SESSION_TMPDIR"; #[cfg(target_os = "macos")] pub const SANDBOX_LOG_DENIALS_ENV: &str = "MCP_REPL_SANDBOX_LOG_DENIALS"; -pub const INITIAL_SANDBOX_STATE_ENV: &str = "MCP_REPL_INITIAL_SANDBOX_STATE"; #[cfg(target_os = "linux")] pub const LINUX_BWRAP_ENABLED_ENV: &str = "MCP_REPL_USE_LINUX_BWRAP"; #[cfg(target_os = "linux")] @@ -412,17 +410,16 @@ pub fn log_sandbox_policy_update(policy: &SandboxPolicy) { })); } -pub fn log_sandbox_state_event(method: &str, params: Option<&serde_json::Value>) { +pub fn log_sandbox_state_meta(meta: &serde_json::Value) { crate::event_log::log( - "sandbox_state_event_received", + "sandbox_state_meta_received", serde_json::json!({ - "method": method, - "params": params, + "meta": meta, }), ); append_sandbox_state_log_line(&serde_json::json!({ - "method": method, - "params": params, + "kind": "tool-call-meta", + "meta": meta, })); } @@ -438,14 +435,66 @@ pub struct SandboxStateUpdate { pub use_legacy_landlock: Option, } -pub fn initial_sandbox_state_update() -> Option { - let raw = std::env::var(INITIAL_SANDBOX_STATE_ENV).ok()?; - match serde_json::from_str::(&raw) { - Ok(update) => Some(update), - Err(err) => { - eprintln!("Invalid initial sandbox state: {err}"); - None +#[derive(Debug, Clone, Deserialize, Serialize)] +#[serde(rename_all = "camelCase")] +pub struct CodexSandboxStateMeta { + pub sandbox_policy: SandboxPolicy, + #[serde(default)] + pub codex_linux_sandbox_exe: Option, + pub sandbox_cwd: PathBuf, + #[serde(default)] + pub use_legacy_landlock: bool, +} + +pub fn sandbox_state_update_from_codex_meta( + meta: &serde_json::Value, +) -> Result { + if let Some(field) = codex_unsupported_sandbox_policy_field(meta) { + return Err(format!( + "Codex sandbox metadata with {field} is not supported" + )); + } + let parsed = serde_json::from_value::(meta.clone()) + .map_err(|err| format!("failed to parse Codex sandbox state metadata: {err}"))?; + + if !parsed.sandbox_cwd.is_absolute() { + return Err(format!( + "Codex sandbox metadata requires an absolute sandboxCwd, got: {}", + parsed.sandbox_cwd.display() + )); + } + if let SandboxPolicy::WorkspaceWrite { writable_roots, .. } = &parsed.sandbox_policy + && let Some(root) = writable_roots.iter().find(|root| !root.is_absolute()) + { + return Err(format!( + "Codex sandbox metadata requires absolute sandboxPolicy.writable_roots entries, got: {}", + root.display() + )); + } + + Ok(SandboxStateUpdate { + sandbox_policy: parsed.sandbox_policy, + sandbox_cwd: Some(parsed.sandbox_cwd), + // Codex reports how its own Linux helper is configured, but mcp-repl's + // optional bwrap stage is a separate local best-effort knob. + use_linux_sandbox_bwrap: None, + use_legacy_landlock: None, + }) +} + +fn codex_unsupported_sandbox_policy_field(meta: &serde_json::Value) -> Option { + let policy = meta + .get("sandboxPolicy") + .and_then(serde_json::Value::as_object)?; + match policy.get("type").and_then(serde_json::Value::as_str) { + Some("workspace-write") if policy.contains_key("read_only_access") => { + Some("sandboxPolicy.read_only_access".to_string()) } + Some("read-only") => policy + .keys() + .find(|key| key.as_str() != "type") + .map(|key| format!("sandboxPolicy.{key}")), + _ => None, } } @@ -2307,6 +2356,7 @@ mod macos_denials { #[cfg(test)] mod tests { use super::*; + use serde_json::json; #[cfg(target_os = "macos")] use std::collections::HashMap; use std::path::Path; @@ -2700,52 +2750,57 @@ mod tests { ); } - #[cfg(target_os = "linux")] #[test] - fn sandbox_state_update_use_legacy_landlock_maps_to_internal_bwrap_mode() { - let mut state = SandboxState { - use_linux_sandbox_bwrap: true, - ..SandboxState::default() - }; - - let changed = state.apply_update(SandboxStateUpdate { - sandbox_policy: SandboxPolicy::WorkspaceWrite { - writable_roots: Vec::new(), - network_access: false, - exclude_tmpdir_env_var: false, - exclude_slash_tmp: false, + fn codex_sandbox_state_meta_does_not_force_internal_linux_bwrap() { + let sandbox_cwd = std::env::temp_dir().join("mcp-repl-codex-meta-cwd"); + let update = sandbox_state_update_from_codex_meta(&json!({ + "sandboxPolicy": { + "type": "danger-full-access" }, - sandbox_cwd: None, - use_linux_sandbox_bwrap: None, - use_legacy_landlock: Some(true), - }); - assert!( - changed, - "expected legacy-landlock update to modify sandbox state" - ); + "sandboxCwd": sandbox_cwd, + "useLegacyLandlock": false, + "codexLinuxSandboxExe": if cfg!(target_os = "linux") { + serde_json::Value::String("/tmp/codex-linux-sandbox".to_string()) + } else { + serde_json::Value::Null + }, + })) + .expect("codex sandbox metadata"); + assert!( - !state.use_linux_sandbox_bwrap, - "legacy Landlock updates should disable mcp-repl's internal bwrap stage" + update.use_linux_sandbox_bwrap.is_none(), + "Codex tool-call metadata should not force mcp-repl's internal best-effort bwrap mode" ); + } - let changed = state.apply_update(SandboxStateUpdate { - sandbox_policy: SandboxPolicy::WorkspaceWrite { - writable_roots: Vec::new(), - network_access: false, - exclude_tmpdir_env_var: false, - exclude_slash_tmp: false, + #[test] + fn codex_sandbox_state_meta_rejects_relative_workspace_write_roots() { + let sandbox_cwd = std::env::temp_dir().join("mcp-repl-codex-meta-cwd"); + let err = sandbox_state_update_from_codex_meta(&json!({ + "sandboxPolicy": { + "type": "workspace-write", + "writable_roots": ["relative-root"], + "network_access": false, + "exclude_tmpdir_env_var": false, + "exclude_slash_tmp": false }, - sandbox_cwd: None, - use_linux_sandbox_bwrap: None, - use_legacy_landlock: Some(false), - }); + "sandboxCwd": sandbox_cwd, + "useLegacyLandlock": false, + "codexLinuxSandboxExe": if cfg!(target_os = "linux") { + serde_json::Value::String("/tmp/codex-linux-sandbox".to_string()) + } else { + serde_json::Value::Null + }, + })) + .expect_err("relative writable roots should fail closed"); + assert!( - changed, - "expected non-legacy update to modify sandbox state" + err.contains("sandboxPolicy.writable_roots"), + "expected writable_roots validation error, got: {err}" ); assert!( - state.use_linux_sandbox_bwrap, - "non-legacy updates should re-enable mcp-repl's internal bwrap stage" + err.contains("relative-root"), + "expected failing relative root to be named in the error, got: {err}" ); } diff --git a/src/sandbox_cli.rs b/src/sandbox_cli.rs index 3ec7b713..f974c3d5 100644 --- a/src/sandbox_cli.rs +++ b/src/sandbox_cli.rs @@ -54,20 +54,45 @@ pub struct SandboxCliPlan { pub operations: Vec, } -pub fn sandbox_plan_requests_inherited_state(plan: &SandboxCliPlan) -> bool { - plan.operations.iter().any(|op| { - matches!( - op, - SandboxCliOperation::SetMode(SandboxModeArg::Inherit) - | SandboxCliOperation::Config(SandboxConfigOperation::SetMode( - SandboxModeArg::Inherit - )) - ) - }) +fn last_mode_operation(plan: &SandboxCliPlan) -> Option<(usize, SandboxModeArg)> { + plan.operations + .iter() + .enumerate() + .rev() + .find_map(|(index, op)| match op { + SandboxCliOperation::SetMode(mode) + | SandboxCliOperation::Config(SandboxConfigOperation::SetMode(mode)) => { + Some((index, *mode)) + } + _ => None, + }) } -pub fn is_missing_inherited_sandbox_state_error(message: &str) -> bool { - message == MISSING_INHERITED_SANDBOX_STATE_MESSAGE +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +enum SandboxValidationMode { + ReadOnly, + WorkspaceWrite, + DangerFullAccess, + DeferredInherit, + UnresolvedInherit, +} + +impl SandboxValidationMode { + fn from_policy(policy: &SandboxPolicy) -> Self { + match policy { + SandboxPolicy::ReadOnly => Self::ReadOnly, + SandboxPolicy::WorkspaceWrite { .. } => Self::WorkspaceWrite, + SandboxPolicy::DangerFullAccess | SandboxPolicy::ExternalSandbox { .. } => { + Self::DangerFullAccess + } + } + } +} + +pub fn sandbox_plan_requests_inherited_state(plan: &SandboxCliPlan) -> bool { + last_mode_operation(plan) + .map(|(_, mode)| mode) + .is_some_and(|mode| matches!(mode, SandboxModeArg::Inherit)) } pub fn parse_sandbox_config_override(raw: &str) -> Result { @@ -181,8 +206,14 @@ pub fn resolve_effective_sandbox_state_with_defaults( return Ok(inherited.cloned().unwrap_or_else(|| defaults.clone())); } + validate_sandbox_plan_operations(plan, inherited, defaults)?; + + let start_index = match last_mode_operation(plan) { + Some((index, mode)) if !matches!(mode, SandboxModeArg::Inherit) => index, + _ => 0, + }; let mut state = defaults.clone(); - for op in &plan.operations { + for op in &plan.operations[start_index..] { match op { SandboxCliOperation::SetMode(mode) => { apply_mode(&mut state, *mode, inherited, defaults)? @@ -225,6 +256,106 @@ pub fn resolve_effective_sandbox_state_with_defaults( Ok(state) } +fn validate_sandbox_plan_operations( + plan: &SandboxCliPlan, + inherited: Option<&SandboxState>, + defaults: &SandboxState, +) -> Result<(), String> { + let allow_deferred_workspace_write = + inherited.is_none() && sandbox_plan_requests_inherited_state(plan); + let mut mode = SandboxValidationMode::from_policy(&defaults.sandbox_policy); + for op in &plan.operations { + match op { + SandboxCliOperation::SetMode(next_mode) => { + mode = validation_mode_for_mode_arg( + *next_mode, + inherited, + allow_deferred_workspace_write, + ); + } + SandboxCliOperation::AddWritableRoot(_) => { + require_workspace_write_mode( + mode, + "--add-writable-root can only be used while sandbox mode is workspace-write", + )?; + } + SandboxCliOperation::AddAllowedDomain(domain) => { + if domain.trim().is_empty() { + return Err("--add-allowed-domain requires a non-empty value".to_string()); + } + } + SandboxCliOperation::Config(config_op) => match config_op { + SandboxConfigOperation::SetMode(next_mode) => { + mode = validation_mode_for_mode_arg( + *next_mode, + inherited, + allow_deferred_workspace_write, + ); + } + SandboxConfigOperation::SetWorkspaceNetworkAccess(_) => { + require_workspace_write_mode( + mode, + "sandbox_workspace_write.network_access requires workspace-write mode", + )? + } + SandboxConfigOperation::SetWorkspaceWritableRoots(_) => { + require_workspace_write_mode( + mode, + "sandbox_workspace_write.writable_roots requires workspace-write mode", + )? + } + SandboxConfigOperation::SetWorkspaceExcludeTmpdirEnvVar(_) => { + require_workspace_write_mode( + mode, + "sandbox_workspace_write.exclude_tmpdir_env_var requires workspace-write mode", + )? + } + SandboxConfigOperation::SetWorkspaceExcludeSlashTmp(_) => { + require_workspace_write_mode( + mode, + "sandbox_workspace_write.exclude_slash_tmp requires workspace-write mode", + )? + } + SandboxConfigOperation::SetAllowedDomains(_) + | SandboxConfigOperation::SetDeniedDomains(_) + | SandboxConfigOperation::SetAllowLocalBinding(_) + | SandboxConfigOperation::SetUseLinuxSandboxBwrap(_) => {} + }, + } + } + Ok(()) +} + +fn validation_mode_for_mode_arg( + mode: SandboxModeArg, + inherited: Option<&SandboxState>, + allow_deferred_workspace_write: bool, +) -> SandboxValidationMode { + match mode { + SandboxModeArg::Inherit => inherited + .map(|state| SandboxValidationMode::from_policy(&state.sandbox_policy)) + .unwrap_or(if allow_deferred_workspace_write { + SandboxValidationMode::DeferredInherit + } else { + SandboxValidationMode::UnresolvedInherit + }), + SandboxModeArg::ReadOnly => SandboxValidationMode::ReadOnly, + SandboxModeArg::WorkspaceWrite => SandboxValidationMode::WorkspaceWrite, + SandboxModeArg::DangerFullAccess => SandboxValidationMode::DangerFullAccess, + } +} + +fn require_workspace_write_mode(mode: SandboxValidationMode, message: &str) -> Result<(), String> { + if matches!( + mode, + SandboxValidationMode::WorkspaceWrite | SandboxValidationMode::DeferredInherit + ) { + Ok(()) + } else { + Err(message.to_string()) + } +} + fn apply_mode( state: &mut SandboxState, mode: SandboxModeArg, From c18a084ed10bba0ac1acefd58081150b4946931a Mon Sep 17 00:00:00 2001 From: Tomasz Kalinowski Date: Wed, 22 Apr 2026 11:22:44 -0400 Subject: [PATCH 2/5] fix: apply inherit metadata per tool call --- src/server.rs | 523 +++++++++++------- src/server/response.rs | 15 +- src/worker_process.rs | 1180 +++++++++++++++++++++++++++++++++++----- 3 files changed, 1378 insertions(+), 340 deletions(-) diff --git a/src/server.rs b/src/server.rs index dd8191fb..4acf3077 100644 --- a/src/server.rs +++ b/src/server.rs @@ -1,10 +1,10 @@ use rmcp::handler::server::tool::ToolRouter; use rmcp::handler::server::wrapper::Parameters; use rmcp::model::{ - CallToolResult, CustomNotification, CustomRequest, CustomResult, ErrorCode, - ErrorData as McpError, JsonObject, ProtocolVersion, ServerCapabilities, ServerInfo, + CallToolResult, ErrorData as McpError, JsonObject, Meta, ProtocolVersion, ServerCapabilities, + ServerInfo, }; -use rmcp::{RoleServer, ServerHandler, tool, tool_handler, tool_router}; +use rmcp::{ServerHandler, tool, tool_handler, tool_router}; use schemars::JsonSchema; use serde::Deserialize; use serde_json::json; @@ -27,9 +27,17 @@ use self::timeouts::{ use crate::backend::Backend; use crate::oversized_output::OversizedOutputMode; -use crate::sandbox::{SANDBOX_STATE_CAPABILITY, SANDBOX_STATE_METHOD, SandboxStateUpdate}; -use crate::sandbox_cli::SandboxCliPlan; -use crate::worker_process::{WorkerError, WorkerManager}; +use crate::sandbox::{SANDBOX_STATE_META_CAPABILITY, SandboxStateUpdate}; +use crate::sandbox_cli::{ + MISSING_INHERITED_SANDBOX_STATE_MESSAGE, SandboxCliPlan, sandbox_plan_requests_inherited_state, +}; +use crate::worker_process::{ + WorkerError, WorkerManager, WriteStdinControlAction, WriteStdinOptions, + is_prechecked_follow_up_requires_meta, split_write_stdin_control_prefix, +}; +use crate::worker_protocol::{WorkerContent, WorkerReply}; + +const BUSY_FOLLOW_UP_RECHECK_WAIT: Duration = Duration::from_millis(25); #[cfg(test)] fn repl_tool_description_for_backend( @@ -54,6 +62,7 @@ fn repl_tool_description_for_backend( #[derive(Clone)] struct SharedServer { + accepts_sandbox_state_meta: bool, state: Arc>, } @@ -69,7 +78,9 @@ impl SharedServer { sandbox_plan: SandboxCliPlan, oversized_output: OversizedOutputMode, ) -> Result { + let accepts_sandbox_state_meta = sandbox_plan_requests_inherited_state(&sandbox_plan); Ok(Self { + accepts_sandbox_state_meta, state: Arc::new(Mutex::new(ServerState { worker: WorkerManager::new(backend, sandbox_plan, oversized_output)?, response: ResponseState::new()?, @@ -82,6 +93,10 @@ impl SharedServer { Arc::clone(&self.state) } + fn accepts_sandbox_state_meta(&self) -> bool { + self.accepts_sandbox_state_meta + } + /// Runs a closure with exclusive access to the combined worker/response state. /// This keeps reply finalization in the same critical section as the worker call it seals. async fn run_state(&self, f: F) -> Result @@ -98,27 +113,271 @@ impl SharedServer { .map_err(|err| McpError::internal_error(err.to_string(), None)) } + fn sandbox_state_update_for_tool_call( + &self, + meta: &Meta, + ) -> Result, WorkerError> { + Self::sandbox_state_update_for_tool_call_meta(self.accepts_sandbox_state_meta(), meta) + } + + fn sandbox_state_update_for_tool_call_meta( + accepts_sandbox_state_meta: bool, + meta: &Meta, + ) -> Result, WorkerError> { + if !accepts_sandbox_state_meta { + return Ok(None); + } + + let Some(raw_meta) = meta.get(SANDBOX_STATE_META_CAPABILITY) else { + return Err(WorkerError::Sandbox( + MISSING_INHERITED_SANDBOX_STATE_MESSAGE.to_string(), + )); + }; + crate::sandbox::log_sandbox_state_meta(raw_meta); + let update = crate::sandbox::sandbox_state_update_from_codex_meta(raw_meta) + .map_err(WorkerError::Sandbox)?; + Ok(Some(update)) + } + + fn optional_sandbox_state_update_for_tool_call_meta( + accepts_sandbox_state_meta: bool, + meta: &Meta, + ) -> Result, WorkerError> { + if !accepts_sandbox_state_meta { + return Ok(None); + } + + let Some(raw_meta) = meta.get(SANDBOX_STATE_META_CAPABILITY) else { + return Ok(None); + }; + + match crate::sandbox::sandbox_state_update_from_codex_meta(raw_meta) { + Ok(update) => Ok(Some(update)), + Err(_) => Ok(None), + } + } + + fn apply_tool_call_sandbox_state( + state: &mut ServerState, + update: Option, + ) -> Result<(), WorkerError> { + let Some(update) = update else { + return Ok(()); + }; + + state + .worker + .update_sandbox_state(update, SANDBOX_UPDATE_TIMEOUT)?; + Ok(()) + } + + fn stage_tool_call_sandbox_state_for_reset( + state: &mut ServerState, + update: Option, + ) -> Result<(), WorkerError> { + let Some(update) = update else { + return Ok(()); + }; + + state.worker.stage_sandbox_state_update(update) + } + /// Executes one `repl` call and immediately finalizes the visible reply on the server side. /// The response layer needs `pending_request` after the worker call to decide transcript reuse. async fn run_write_input( &self, input: String, timeout: Duration, + meta: Meta, ) -> Result { let worker_timeout = apply_tool_call_margin(timeout); let server_timeout = apply_safety_margin(timeout); + let accepts_sandbox_state_meta = self.accepts_sandbox_state_meta(); self.run_state(move |state| { let timeout_bundle_reuse = timeout_bundle_reuse_for_input(&input); let raw_input = input; let use_inline_pager_materialization = matches!(state.oversized_output, OversizedOutputMode::Pager); - let result = state.worker.write_stdin( - raw_input.clone(), - worker_timeout, - server_timeout, - None, - false, - ); + state.worker.refresh_timeout_marker(); + let mut control_input_on_meta_error = None; + let parse_tool_call_sandbox_state = || { + SharedServer::sandbox_state_update_for_tool_call_meta( + accepts_sandbox_state_meta, + &meta, + ) + }; + let parse_optional_tool_call_sandbox_state = || { + SharedServer::optional_sandbox_state_update_for_tool_call_meta( + accepts_sandbox_state_meta, + &meta, + ) + }; + let sandbox_state_result = if raw_input.is_empty() { + // Empty-input polls may drain an existing request. Pass current + // metadata through so a session-end reset can stage it before + // respawning, but only apply it immediately when answering the + // call itself requires a spawn. + match state.worker.empty_input_requires_spawn() { + Ok(true) => { + if let Err(err) = parse_tool_call_sandbox_state().and_then(|update| { + SharedServer::apply_tool_call_sandbox_state(state, update) + }) { + Err(err) + } else { + Ok(None) + } + } + Ok(false) => parse_optional_tool_call_sandbox_state(), + Err(err) => Err(err), + } + } else { + // A timed-out request still owns busy follow-ups, but a fresh + // non-empty call after that request has already settled must + // run under the current tool call's sandbox metadata. + let mut deferred_sandbox_state_update = None; + if state.worker.pending_request() { + state + .worker + .refresh_timeout_marker_with_wait(BUSY_FOLLOW_UP_RECHECK_WAIT); + } + if state.worker.pending_request() { + if let Some((control, _remaining)) = + split_write_stdin_control_prefix(&raw_input) + { + control_input_on_meta_error = Some(( + match control { + WriteStdinControlAction::Interrupt => "\u{3}".to_string(), + WriteStdinControlAction::Restart => "\u{4}".to_string(), + }, + timeout_bundle_reuse_for_input(&raw_input), + matches!(control, WriteStdinControlAction::Interrupt), + )); + if let Err(err) = parse_tool_call_sandbox_state().map(|update| { + control_input_on_meta_error = None; + deferred_sandbox_state_update = update; + }) { + Err(err) + } else { + Ok(deferred_sandbox_state_update) + } + } else { + parse_optional_tool_call_sandbox_state() + } + } else { + match state + .worker + .nonexecuting_follow_up_uses_existing_state(&raw_input) + { + Ok(true) => { + // Local follow-ups like bare Ctrl-C or active pager + // commands can keep using existing state. Exact + // Ctrl-C is only in this path when it will not + // respawn a worker. + Ok(None) + } + Ok(false) => { + if matches!( + split_write_stdin_control_prefix(&raw_input), + Some((WriteStdinControlAction::Restart, _)) + ) { + if let Err(err) = parse_tool_call_sandbox_state().map(|update| { + deferred_sandbox_state_update = update; + }) { + Err(err) + } else { + Ok(deferred_sandbox_state_update) + } + } else { + match parse_tool_call_sandbox_state().and_then(|update| { + SharedServer::apply_tool_call_sandbox_state(state, update) + }) { + Ok(()) => Ok(None), + Err(err) => Err(err), + } + } + } + Err(err) => Err(err), + } + } + }; + let deferred_sandbox_state_update = match sandbox_state_result { + Ok(update) => update, + Err(err) => { + if let Some((control_input, timeout_bundle_reuse, detach_control_reply)) = + control_input_on_meta_error.take() + { + let control_result = state.worker.write_stdin( + control_input, + worker_timeout, + server_timeout, + WriteStdinOptions { + pending_state_prechecked: true, + ..WriteStdinOptions::default() + }, + ); + let pending_request_after = state.worker.pending_request(); + let detached_prefix_item_count = if detach_control_reply { + control_result + .as_ref() + .map_or(0, prefixed_worker_reply_item_count) + } else { + 0 + }; + let mut result = finalize_visible_reply( + state, + control_result, + pending_request_after, + timeout_bundle_reuse, + detached_prefix_item_count, + use_inline_pager_materialization + && !pending_request_after + && !state.response.has_timeout_bundle_state(), + ); + result.is_error = Some(true); + result + .content + .push(rmcp::model::Content::text(format!("worker error: {err}"))); + strip_text_stream_meta(&mut result); + return result; + } + let mut result = state.response.finalize_local_error(err); + strip_text_stream_meta(&mut result); + return result; + } + }; + let mut write_options = WriteStdinOptions { + pending_state_prechecked: true, + deferred_sandbox_state_update, + ..WriteStdinOptions::default() + }; + let mut retried_after_meta_refresh = false; + let result = loop { + let result = state.worker.write_stdin( + raw_input.clone(), + worker_timeout, + server_timeout, + write_options.clone(), + ); + match result { + Err(err) + if !retried_after_meta_refresh + && is_prechecked_follow_up_requires_meta(&err) => + { + match parse_tool_call_sandbox_state().and_then(|update| { + SharedServer::apply_tool_call_sandbox_state(state, update) + }) { + Ok(()) => { + retried_after_meta_refresh = true; + write_options.pending_state_prechecked = false; + write_options.deferred_sandbox_state_update = None; + continue; + } + Err(err) => break Err(err), + } + } + other => break other, + } + }; let pending_request_after = state.worker.pending_request(); let detached_prefix_item_count = state.worker.detached_prefix_item_count(); let mut result = finalize_visible_reply( @@ -136,170 +395,18 @@ impl SharedServer { }) .await } - - async fn on_custom_request(&self, request: CustomRequest) -> Result { - crate::event_log::log( - "custom_request_received", - json!({ - "method": request.method.clone(), - "params": request.params.clone(), - }), - ); - crate::sandbox::log_sandbox_state_event(&request.method, request.params.as_ref()); - if request.method != SANDBOX_STATE_METHOD { - crate::event_log::log( - "custom_request_rejected", - json!({ - "method": request.method.clone(), - "reason": "method_not_found", - }), - ); - return Err(McpError::new( - ErrorCode::METHOD_NOT_FOUND, - request.method, - None, - )); - } - - let update = request - .params_as::() - .map_err(|err| McpError::invalid_params(err.to_string(), None))? - .ok_or_else(|| McpError::invalid_params("missing sandbox state params", None))?; - let update_for_log = serde_json::to_value(&update) - .unwrap_or_else(|err| json!({"serialize_error": err.to_string()})); - - let outcome = self - .run_state(move |state| { - let outcome = state - .worker - .update_sandbox_state(update, SANDBOX_UPDATE_TIMEOUT); - if matches!(outcome, Ok(true)) - && let Err(err) = state.response.clear_active_timeout_bundle() - { - return Err(err); - } - outcome - }) - .await?; - match outcome { - Ok(changed) => { - crate::event_log::log( - "sandbox_state_request_applied", - json!({ - "changed": changed, - "update": update_for_log, - }), - ); - } - Err(err) => { - crate::event_log::log( - "sandbox_state_request_failed", - json!({ - "error": err.to_string(), - "update": update_for_log, - }), - ); - return Err(McpError::internal_error(err.to_string(), None)); - } - } - - Ok(CustomResult::new(json!({}))) - } - - async fn on_custom_notification(&self, notification: CustomNotification) { - crate::event_log::log( - "custom_notification_received", - json!({ - "method": notification.method.clone(), - "params": notification.params.clone(), - }), - ); - crate::sandbox::log_sandbox_state_event(¬ification.method, notification.params.as_ref()); - if notification.method != SANDBOX_STATE_METHOD { - return; - } - - let update = match notification.params_as::() { - Ok(Some(update)) => update, - Ok(None) => { - eprintln!("sandbox update missing params"); - crate::event_log::log( - "sandbox_state_notification_failed", - json!({ - "error": "missing sandbox state params", - }), - ); - return; - } - Err(err) => { - eprintln!("sandbox update parse error: {err}"); - crate::event_log::log( - "sandbox_state_notification_failed", - json!({ - "error": err.to_string(), - }), - ); - return; - } - }; - let update_for_log = serde_json::to_value(&update) - .unwrap_or_else(|err| json!({"serialize_error": err.to_string()})); - - match self - .run_state(move |state| { - let outcome = state - .worker - .update_sandbox_state(update, SANDBOX_UPDATE_TIMEOUT); - if matches!(outcome, Ok(true)) - && let Err(err) = state.response.clear_active_timeout_bundle() - { - return Err(err); - } - outcome - }) - .await - { - Ok(Ok(changed)) => { - crate::event_log::log( - "sandbox_state_notification_applied", - json!({ - "changed": changed, - "update": update_for_log.clone(), - }), - ); - } - Ok(Err(err)) => { - eprintln!("sandbox update failed: {err}"); - crate::event_log::log( - "sandbox_state_notification_failed", - json!({ - "error": err.to_string(), - "update": update_for_log.clone(), - }), - ); - } - Err(err) => { - eprintln!("sandbox update failed: {err}"); - crate::event_log::log( - "sandbox_state_notification_failed", - json!({ - "error": err.to_string(), - "update": update_for_log.clone(), - }), - ); - } - } - } } -fn server_info() -> ServerInfo { - ServerInfo::new( +fn server_info(advertise_sandbox_capabilities: bool) -> ServerInfo { + let capabilities = if advertise_sandbox_capabilities { ServerCapabilities::builder() .enable_tools() .enable_experimental_with(sandbox_capabilities()) - .build(), - ) - .with_protocol_version(ProtocolVersion::V_2025_06_18) + .build() + } else { + ServerCapabilities::builder().enable_tools().build() + }; + ServerInfo::new(capabilities).with_protocol_version(ProtocolVersion::V_2025_06_18) } #[derive(Clone, Copy)] @@ -327,6 +434,7 @@ where "tool": tool.as_ref(), "arguments": arguments, "task": task, + "meta": context.request_context.meta.clone(), }) }); let result = self.inner.call(context).await; @@ -384,7 +492,7 @@ macro_rules! define_backend_tool_server { } fn get_info(&self) -> ServerInfo { - server_info() + server_info(self.shared.accepts_sandbox_state_meta()) } fn logged_tool_router(&self) -> LoggedToolRouter<'_, Self> { @@ -400,10 +508,14 @@ macro_rules! define_backend_tool_server { open_world_hint = false ) )] - async fn repl(&self, params: Parameters) -> Result { + async fn repl( + &self, + meta: Meta, + params: Parameters, + ) -> Result { let ReplArgs { input, timeout_ms } = params.0; let timeout = resolve_timeout_ms(timeout_ms, "repl", true)?; - self.shared.run_write_input(input, timeout).await + self.shared.run_write_input(input, timeout, meta).await } #[doc = include_str!("../docs/tool-descriptions/repl_reset_tool.md")] @@ -417,13 +529,30 @@ macro_rules! define_backend_tool_server { )] async fn repl_reset( &self, + meta: Meta, _params: Parameters, ) -> Result { let timeout = parse_timeout(None, "repl_reset", false)?; let worker_timeout = apply_tool_call_margin(timeout); + let sandbox_state_update = self.shared.sandbox_state_update_for_tool_call(&meta); let result = self .shared .run_state(move |state| { + let sandbox_state_result = match &sandbox_state_update { + Ok(update) => SharedServer::stage_tool_call_sandbox_state_for_reset( + state, + update.clone(), + ), + Err(WorkerError::Sandbox(message)) => { + Err(WorkerError::Sandbox(message.clone())) + } + Err(err) => Err(WorkerError::Sandbox(err.to_string())), + }; + if let Err(err) = sandbox_state_result { + let mut result = state.response.finalize_local_error(err); + strip_text_stream_meta(&mut result); + return result; + } let result = state.worker.restart(worker_timeout); let pending_request_after = state.worker.pending_request(); let mut result = finalize_visible_reply( @@ -447,22 +576,6 @@ macro_rules! define_backend_tool_server { fn get_info(&self) -> ServerInfo { $server_ty::get_info(self) } - - async fn on_custom_request( - &self, - request: CustomRequest, - _context: rmcp::service::RequestContext, - ) -> Result { - self.shared.on_custom_request(request).await - } - - async fn on_custom_notification( - &self, - notification: CustomNotification, - _context: rmcp::service::NotificationContext, - ) { - self.shared.on_custom_notification(notification).await - } } }; } @@ -494,6 +607,32 @@ fn finalize_visible_reply( } } +fn prefixed_worker_reply_item_count(reply: &WorkerReply) -> usize { + let WorkerReply::Output { + contents, prompt, .. + } = reply; + let Some(prompt_text) = prompt.as_deref() else { + return contents.len(); + }; + if prompt_text.is_empty() { + return contents.len(); + } + let Some(idx) = contents + .iter() + .rposition(|content| matches!(content, WorkerContent::ContentText { .. })) + else { + return contents.len(); + }; + let WorkerContent::ContentText { text, .. } = &contents[idx] else { + return contents.len(); + }; + if matches!(text.strip_suffix(prompt_text), Some("")) { + contents.len().saturating_sub(1) + } else { + contents.len() + } +} + define_backend_tool_server!(RFilesToolServer, "../docs/tool-descriptions/repl_tool_r.md"); define_backend_tool_server!( RPagerToolServer, @@ -533,7 +672,7 @@ fn sandbox_capabilities() -> BTreeMap { let mut capability = JsonObject::new(); capability.insert("version".to_string(), json!("1.0.0")); let mut experimental = BTreeMap::new(); - experimental.insert(SANDBOX_STATE_CAPABILITY.to_string(), capability); + experimental.insert(SANDBOX_STATE_META_CAPABILITY.to_string(), capability); experimental } diff --git a/src/server/response.rs b/src/server/response.rs index 50a3f169..728494b2 100644 --- a/src/server/response.rs +++ b/src/server/response.rs @@ -278,11 +278,17 @@ impl ResponseState { "dropping closed timeout bundle after output-bundle error: {cleanup_err}" ); } - finalize_batch(vec![Content::text(format!("worker error: {err}"))], true) + finalize_error_batch(vec![Content::text(format!("worker error: {err}"))]) } } } + /// Returns a local pre-execution error without disturbing any active timeout-bundle state. + pub(crate) fn finalize_local_error(&mut self, err: WorkerError) -> CallToolResult { + eprintln!("worker write stdin error: {err}"); + finalize_error_batch(vec![Content::text(format!("worker error: {err}"))]) + } + /// Materializes a worker reply inline without applying files-mode bundle compaction. pub(crate) fn materialize_worker_result_inline( &mut self, @@ -301,7 +307,7 @@ impl ResponseState { } Err(err) => { eprintln!("worker write stdin error: {err}"); - finalize_batch(vec![Content::text(format!("worker error: {err}"))], true) + finalize_error_batch(vec![Content::text(format!("worker error: {err}"))]) } } } @@ -1831,6 +1837,11 @@ pub(crate) fn finalize_batch(mut contents: Vec, is_error: bool) -> Call CallToolResult::success(contents) } +fn finalize_error_batch(mut contents: Vec) -> CallToolResult { + ensure_nonempty_contents(&mut contents); + CallToolResult::error(contents) +} + pub(crate) fn strip_text_stream_meta(result: &mut CallToolResult) { for item in &mut result.content { let RawContent::Text(text) = &mut item.raw else { diff --git a/src/worker_process.rs b/src/worker_process.rs index 00bb61e3..1832d271 100644 --- a/src/worker_process.rs +++ b/src/worker_process.rs @@ -43,8 +43,7 @@ use crate::sandbox::{ }; use crate::sandbox_cli::{ MISSING_INHERITED_SANDBOX_STATE_MESSAGE, SandboxCliPlan, - is_missing_inherited_sandbox_state_error, resolve_effective_sandbox_state_with_defaults, - sandbox_plan_requests_inherited_state, + resolve_effective_sandbox_state_with_defaults, sandbox_plan_requests_inherited_state, }; use crate::worker_protocol::{ ContentOrigin, TextStream, WORKER_MODE_ARG, WorkerContent, WorkerErrorCode, WorkerReply, @@ -175,6 +174,8 @@ const WORKER_MEM_GUARDRAIL_IDLE_INTERVAL: Duration = Duration::from_secs(60); #[cfg(target_os = "linux")] const LINUX_BWRAP_FALLBACK_NOTICE: &str = "[repl] Linux bubblewrap sandbox unavailable; continuing without bwrap\n"; +const PRECHECKED_FOLLOW_UP_REQUIRES_META_MESSAGE: &str = + "worker follow-up needs current sandbox metadata after precheck"; #[derive(Debug)] pub enum WorkerError { @@ -185,6 +186,14 @@ pub enum WorkerError { Guardrail(String), } +pub(crate) fn is_prechecked_follow_up_requires_meta(err: &WorkerError) -> bool { + matches!(err, WorkerError::Protocol(message) if message == PRECHECKED_FOLLOW_UP_REQUIRES_META_MESSAGE) +} + +fn prechecked_follow_up_requires_meta_error() -> WorkerError { + WorkerError::Protocol(PRECHECKED_FOLLOW_UP_REQUIRES_META_MESSAGE.to_string()) +} + trait BackendDriver: Send { fn prepare_input_payload(&self, text: &str) -> Vec; fn on_input_start(&mut self, text: &str, ipc: &ServerIpcConnection); @@ -520,13 +529,36 @@ fn completion_info_from_ipc(ipc: &ServerIpcConnection, session_end_seen: bool) - } } +const DEFERRED_SANDBOX_UPDATE_TIMEOUT: Duration = Duration::from_secs(5); + #[derive(Clone, Copy)] -enum WriteStdinControlAction { +pub(crate) enum WriteStdinControlAction { Interrupt, Restart, } -fn split_write_stdin_control_prefix(input: &str) -> Option<(WriteStdinControlAction, &str)> { +#[derive(Debug, Clone, Default)] +pub(crate) struct WriteStdinOptions { + pub page_bytes_override: Option, + pub echo_input: bool, + pub pending_state_prechecked: bool, + pub deferred_sandbox_state_update: Option, +} + +impl WriteStdinOptions { + fn control_tail(&self, deferred_sandbox_state_update: Option) -> Self { + Self { + page_bytes_override: self.page_bytes_override, + echo_input: self.echo_input, + pending_state_prechecked: false, + deferred_sandbox_state_update, + } + } +} + +pub(crate) fn split_write_stdin_control_prefix( + input: &str, +) -> Option<(WriteStdinControlAction, &str)> { let first = input.chars().next()?; let action = match first { '\u{3}' => WriteStdinControlAction::Interrupt, @@ -572,7 +604,6 @@ pub struct WorkerManager { backend: Backend, process: Option, sandbox_plan: SandboxCliPlan, - awaiting_initial_sandbox_state_update: bool, inherited_sandbox_state: Option, sandbox_defaults: SandboxState, sandbox_state: SandboxState, @@ -597,6 +628,12 @@ pub struct WorkerManager { guardrail: GuardrailShared, } +struct PreparedSandboxStateUpdate { + update_for_log: serde_json::Value, + changed: bool, + missing_before: bool, +} + impl WorkerManager { pub fn new( backend: Backend, @@ -605,38 +642,25 @@ impl WorkerManager { ) -> Result { let exe_path = std::env::current_exe()?; let sandbox_defaults = crate::sandbox::sandbox_state_defaults_with_environment(); - let mut inherited_state = sandbox_defaults.clone(); - let mut inherited_update_received = false; - if let Some(update) = crate::sandbox::initial_sandbox_state_update() { - inherited_state.apply_update(update); - inherited_update_received = true; - } - let inherited = if inherited_update_received { - Some(&inherited_state) + let plan_requests_inherited_state = sandbox_plan_requests_inherited_state(&sandbox_plan); + let sandbox_state = if plan_requests_inherited_state { + sandbox_defaults.clone() } else { - None + resolve_effective_sandbox_state_with_defaults(&sandbox_plan, None, &sandbox_defaults) + .map_err(WorkerError::Sandbox)? }; - let (sandbox_state, awaiting_initial_sandbox_state_update) = - match resolve_effective_sandbox_state_with_defaults( - &sandbox_plan, - inherited, - &sandbox_defaults, - ) { - Ok(state) => (state, false), - Err(err) - if sandbox_plan_requests_inherited_state(&sandbox_plan) - && is_missing_inherited_sandbox_state_error(&err) => - { - // Allow MCP initialize to complete; first tool call will fail fast - // unless the client sends codex/sandbox-state/update. - (sandbox_defaults.clone(), true) - } - Err(err) => return Err(WorkerError::Sandbox(err)), - }; - crate::event_log::log_lazy("worker_manager_created", || { - worker_context_event_payload(backend, &sandbox_state) - }); - if !awaiting_initial_sandbox_state_update { + if plan_requests_inherited_state { + crate::event_log::log( + "worker_manager_created", + serde_json::json!({ + "backend": format!("{backend:?}"), + "awaiting_initial_tool_call_sandbox_state_meta": true, + }), + ); + } else { + crate::event_log::log_lazy("worker_manager_created", || { + worker_context_event_payload(backend, &sandbox_state) + }); crate::sandbox::log_initial_sandbox_policy(&sandbox_state.sandbox_policy); } let output_timeline = { @@ -650,8 +674,7 @@ impl WorkerManager { backend, process: None, sandbox_plan, - awaiting_initial_sandbox_state_update, - inherited_sandbox_state: inherited_update_received.then_some(inherited_state), + inherited_sandbox_state: None, sandbox_defaults, sandbox_state, #[cfg(target_os = "windows")] @@ -683,21 +706,153 @@ impl WorkerManager { } pub fn warm_start(&mut self) -> Result<(), WorkerError> { - if self.awaiting_initial_sandbox_state_update { + if self.missing_inherited_sandbox_state() { return Ok(()); } self.ensure_process() } + pub fn bootstrap_local_inherited_sandbox_state(&mut self) -> Result { + if !self.missing_inherited_sandbox_state() { + return Ok(false); + } + + let update = SandboxStateUpdate { + sandbox_policy: self.sandbox_defaults.sandbox_policy.clone(), + sandbox_cwd: Some(self.sandbox_defaults.sandbox_cwd.clone()), + use_linux_sandbox_bwrap: Some(self.sandbox_defaults.use_linux_sandbox_bwrap), + use_legacy_landlock: None, + }; + crate::event_log::log( + "worker_local_inherit_bootstrap", + serde_json::json!({ + "sandbox_policy": update.sandbox_policy.clone(), + "sandbox_cwd": update.sandbox_cwd.clone(), + "use_linux_sandbox_bwrap": update.use_linux_sandbox_bwrap, + }), + ); + self.stage_sandbox_state_update(update)?; + Ok(true) + } + + fn missing_inherited_sandbox_state(&self) -> bool { + sandbox_plan_requests_inherited_state(&self.sandbox_plan) + && self.inherited_sandbox_state.is_none() + } + /// Exposes whether a timed-out logical request still owns future empty-input polls. pub fn pending_request(&self) -> bool { self.pending_request } + pub fn refresh_timeout_marker_with_wait(&mut self, wait: Duration) { + self.resolve_timeout_marker_with_wait(wait); + } + + pub fn empty_input_requires_spawn(&mut self) -> Result { + if self.empty_input_uses_existing_state() { + return Ok(false); + } + let needs_spawn = match self.process.as_mut() { + Some(process) => !process.is_running()?, + None => true, + }; + Ok(needs_spawn) + } + + pub fn nonexecuting_follow_up_uses_existing_state( + &mut self, + text: &str, + ) -> Result { + if let Some((control, remaining)) = split_write_stdin_control_prefix(text) { + return match control { + WriteStdinControlAction::Interrupt => { + if remaining.is_empty() { + Ok(!self.control_only_interrupt_requires_spawn()?) + } else { + Ok(self.pager_follow_up_uses_existing_state(remaining) + && !self.control_only_interrupt_requires_spawn()?) + } + } + WriteStdinControlAction::Restart => Ok(false), + }; + } + + Ok(self.pager_follow_up_uses_existing_state(text) || self.guardrail_busy_event_pending()) + } + + fn control_only_interrupt_requires_spawn(&mut self) -> Result { + match self.process.as_mut() { + Some(process) => Ok(!process.is_running()?), + None => Ok(true), + } + } + pub fn detached_prefix_item_count(&self) -> usize { self.last_detached_prefix_item_count } + fn stage_deferred_sandbox_state_update( + &mut self, + update: Option, + ) -> Result<(), WorkerError> { + let Some(update) = update else { + return Ok(()); + }; + self.stage_sandbox_state_update(update) + } + + fn stage_session_end_sandbox_state_update( + &mut self, + update: Option, + pending_state_prechecked: bool, + ) -> Result<(), WorkerError> { + if pending_state_prechecked + && update.is_none() + && sandbox_plan_requests_inherited_state(&self.sandbox_plan) + { + return Err(prechecked_follow_up_requires_meta_error()); + } + + self.stage_deferred_sandbox_state_update(update) + } + + fn apply_deferred_sandbox_state_update( + &mut self, + update: Option, + ) -> Result<(), WorkerError> { + let Some(update) = update else { + return Ok(()); + }; + self.update_sandbox_state(update, DEFERRED_SANDBOX_UPDATE_TIMEOUT)?; + Ok(()) + } + + fn empty_input_uses_existing_state(&self) -> bool { + match self.oversized_output { + OversizedOutputMode::Files => { + self.pending_request + || self.pending_output_tape.has_pending() + || self.settled_pending_completion.is_some() + || self.guardrail_busy_event_pending() + } + OversizedOutputMode::Pager => { + self.pending_request + || self.output.has_pending_output() + || self.settled_pending_completion.is_some() + || self.pager.is_active() + || self.guardrail_busy_event_pending() + } + } + } + + fn pager_follow_up_uses_existing_state(&self, text: &str) -> bool { + matches!(self.oversized_output, OversizedOutputMode::Pager) && self.pager.is_active() && { + let trimmed = text.trim(); + trimmed.is_empty() || trimmed.starts_with(':') + } + } + fn reset_preserving_detached_prefix_item_count(&mut self) -> Result<(), WorkerError> { let detached_prefix_item_count = self.last_detached_prefix_item_count; let result = self.reset(); @@ -720,20 +875,15 @@ impl WorkerManager { text: String, worker_timeout: Duration, server_timeout: Duration, - page_bytes_override: Option, - echo_input: bool, + options: WriteStdinOptions, ) -> Result { match self.oversized_output { OversizedOutputMode::Files => { - self.write_stdin_files(text, worker_timeout, server_timeout) + self.write_stdin_files(text, worker_timeout, server_timeout, options) + } + OversizedOutputMode::Pager => { + self.write_stdin_pager(text, worker_timeout, server_timeout, options) } - OversizedOutputMode::Pager => self.write_stdin_pager( - text, - worker_timeout, - server_timeout, - page_bytes_override, - echo_input, - ), } } @@ -743,20 +893,55 @@ impl WorkerManager { text: String, worker_timeout: Duration, server_timeout: Duration, + options: WriteStdinOptions, ) -> Result { + let pending_state_prechecked = options.pending_state_prechecked; + let deferred_sandbox_state_update = options.deferred_sandbox_state_update.clone(); self.last_detached_prefix_item_count = 0; if let Some((control, remaining)) = split_write_stdin_control_prefix(&text) { self.clear_guardrail_busy_event(); + let control_requires_spawn = matches!(control, WriteStdinControlAction::Interrupt) + && self.control_only_interrupt_requires_spawn()?; + if pending_state_prechecked + && control_requires_spawn + && deferred_sandbox_state_update.is_none() + { + return Err(prechecked_follow_up_requires_meta_error()); + } + let stage_before_control = + control_requires_spawn || matches!(control, WriteStdinControlAction::Restart); + let stage_interrupt_after_session_end = + matches!(control, WriteStdinControlAction::Interrupt) && !stage_before_control; + let mut tail_sandbox_state_update = if stage_before_control { + self.stage_deferred_sandbox_state_update(deferred_sandbox_state_update.clone())?; + None + } else { + deferred_sandbox_state_update + }; let control_reply = match control { - WriteStdinControlAction::Interrupt => self.interrupt(worker_timeout), - WriteStdinControlAction::Restart => self.restart(worker_timeout), + WriteStdinControlAction::Interrupt if stage_interrupt_after_session_end => { + self.interrupt_files_control_tail(worker_timeout) + } + WriteStdinControlAction::Interrupt => self.interrupt_files(worker_timeout), + WriteStdinControlAction::Restart => self.restart_files(worker_timeout), }?; + if stage_interrupt_after_session_end && self.session_end_seen { + self.stage_session_end_sandbox_state_update( + tail_sandbox_state_update.take(), + pending_state_prechecked, + )?; + self.maybe_reset_after_session_end(); + } if remaining.is_empty() { return Ok(control_reply); } let control_prefix_item_count = prefixed_worker_reply_item_count(&control_reply); - let remaining_reply = - self.write_stdin_files(remaining.to_string(), worker_timeout, server_timeout)?; + let remaining_reply = self.write_stdin_files( + remaining.to_string(), + worker_timeout, + server_timeout, + options.control_tail(tail_sandbox_state_update), + )?; self.last_detached_prefix_item_count += control_prefix_item_count; return Ok(prefix_worker_reply(control_reply, remaining_reply)); } @@ -780,22 +965,34 @@ impl WorkerManager { return Ok(reply); } - if let Err(err) = self.ensure_process() { - let input_context = self.prepare_input_context_files(); - let reply = self.build_reply_from_worker_error_files(&err, input_context); - let reply = self.finalize_reply(reply); - self.maybe_reset_after_session_end(); - return Ok(reply); - } + let empty_input = text.is_empty(); self.maybe_emit_guardrail_notice(); - self.resolve_timeout_marker(); - if text.is_empty() { + if !pending_state_prechecked { + self.resolve_timeout_marker(); + } + if empty_input { if self.pending_request || self.pending_output_tape.has_pending() || self.settled_pending_completion.is_some() { let reply = self.poll_pending_output_files(worker_timeout)?; let reply = self.finalize_reply(reply); + if self.session_end_seen { + self.stage_session_end_sandbox_state_update( + deferred_sandbox_state_update, + pending_state_prechecked, + )?; + } + self.maybe_reset_after_session_end(); + return Ok(reply); + } + if pending_state_prechecked && self.control_only_interrupt_requires_spawn()? { + return Err(prechecked_follow_up_requires_meta_error()); + } + if let Err(err) = self.ensure_process() { + let input_context = self.prepare_input_context_files(); + let reply = self.build_reply_from_worker_error_files(&err, input_context); + let reply = self.finalize_reply(reply); self.maybe_reset_after_session_end(); return Ok(reply); } @@ -804,10 +1001,10 @@ impl WorkerManager { self.maybe_reset_after_session_end(); return Ok(reply); } - if !text.is_empty() && self.pending_request { + if !pending_state_prechecked && self.pending_request { self.resolve_timeout_marker_with_wait(Duration::from_millis(25)); } - if !text.is_empty() && self.pending_request { + if self.pending_request { let mut reply = self.poll_pending_output_files(worker_timeout)?; let detached_prefix_item_count = match &reply.reply { WorkerReply::Output { contents, .. } => contents.len(), @@ -815,6 +1012,20 @@ impl WorkerManager { self.last_detached_prefix_item_count = detached_prefix_item_count; mark_busy_follow_up_reply(&mut reply.reply); let reply = self.finalize_reply(reply); + if self.session_end_seen { + self.stage_session_end_sandbox_state_update( + deferred_sandbox_state_update, + pending_state_prechecked, + )?; + } + self.maybe_reset_after_session_end(); + return Ok(reply); + } + self.apply_deferred_sandbox_state_update(deferred_sandbox_state_update)?; + if let Err(err) = self.ensure_process() { + let input_context = self.prepare_input_context_files(); + let reply = self.build_reply_from_worker_error_files(&err, input_context); + let reply = self.finalize_reply(reply); self.maybe_reset_after_session_end(); return Ok(reply); } @@ -841,16 +1052,47 @@ impl WorkerManager { text: String, worker_timeout: Duration, server_timeout: Duration, - page_bytes_override: Option, - echo_input: bool, + options: WriteStdinOptions, ) -> Result { + let page_bytes_override = options.page_bytes_override; + let echo_input = options.echo_input; + let pending_state_prechecked = options.pending_state_prechecked; + let deferred_sandbox_state_update = options.deferred_sandbox_state_update.clone(); self.last_detached_prefix_item_count = 0; if let Some((control, remaining)) = split_write_stdin_control_prefix(&text) { self.clear_guardrail_busy_event(); + let control_requires_spawn = matches!(control, WriteStdinControlAction::Interrupt) + && self.control_only_interrupt_requires_spawn()?; + if pending_state_prechecked + && control_requires_spawn + && deferred_sandbox_state_update.is_none() + { + return Err(prechecked_follow_up_requires_meta_error()); + } + let stage_before_control = + control_requires_spawn || matches!(control, WriteStdinControlAction::Restart); + let stage_interrupt_after_session_end = + matches!(control, WriteStdinControlAction::Interrupt) && !stage_before_control; + let mut tail_sandbox_state_update = if stage_before_control { + self.stage_deferred_sandbox_state_update(deferred_sandbox_state_update.clone())?; + None + } else { + deferred_sandbox_state_update + }; let control_reply = match control { - WriteStdinControlAction::Interrupt => self.interrupt(worker_timeout), - WriteStdinControlAction::Restart => self.restart(worker_timeout), + WriteStdinControlAction::Interrupt if stage_interrupt_after_session_end => { + self.interrupt_pager_control_tail(worker_timeout) + } + WriteStdinControlAction::Interrupt => self.interrupt_pager(worker_timeout), + WriteStdinControlAction::Restart => self.restart_pager(worker_timeout), }?; + if stage_interrupt_after_session_end && self.session_end_seen { + self.stage_session_end_sandbox_state_update( + tail_sandbox_state_update.take(), + pending_state_prechecked, + )?; + self.maybe_reset_after_session_end(); + } if remaining.is_empty() { return Ok(control_reply); } @@ -859,8 +1101,7 @@ impl WorkerManager { remaining.to_string(), worker_timeout, server_timeout, - page_bytes_override, - echo_input, + options.control_tail(tail_sandbox_state_update), )?; self.last_detached_prefix_item_count += control_prefix_item_count; return Ok(prefix_worker_reply(control_reply, remaining_reply)); @@ -905,13 +1146,21 @@ impl WorkerManager { if empty_input { self.output.start_capture(); self.maybe_emit_guardrail_notice(); - self.resolve_timeout_marker(); + if !pending_state_prechecked { + self.resolve_timeout_marker(); + } if self.pending_request || self.output.has_pending_output() || self.settled_pending_completion.is_some() { let reply = self.poll_pending_output_pager(worker_timeout, page_bytes)?; let reply = self.finalize_reply(reply); + if self.session_end_seen { + self.stage_session_end_sandbox_state_update( + deferred_sandbox_state_update, + pending_state_prechecked, + )?; + } self.maybe_reset_after_session_end(); return Ok(reply); } @@ -922,6 +1171,9 @@ impl WorkerManager { self.maybe_reset_after_session_end(); return Ok(reply); } + if pending_state_prechecked && self.control_only_interrupt_requires_spawn()? { + return Err(prechecked_follow_up_requires_meta_error()); + } } if let Err(err) = self.ensure_process() { @@ -934,7 +1186,9 @@ impl WorkerManager { if !empty_input { self.output.start_capture(); self.maybe_emit_guardrail_notice(); - self.resolve_timeout_marker(); + if !pending_state_prechecked { + self.resolve_timeout_marker(); + } } if empty_input { let reply = self.build_idle_poll_reply_pager(); @@ -942,7 +1196,7 @@ impl WorkerManager { self.maybe_reset_after_session_end(); return Ok(reply); } - if self.pending_request { + if !pending_state_prechecked && self.pending_request { self.resolve_timeout_marker_with_wait(Duration::from_millis(25)); } if self.pending_request { @@ -953,9 +1207,16 @@ impl WorkerManager { self.last_detached_prefix_item_count = detached_prefix_item_count; mark_busy_follow_up_reply(&mut reply.reply); let reply = self.finalize_reply(reply); + if self.session_end_seen { + self.stage_session_end_sandbox_state_update( + deferred_sandbox_state_update, + pending_state_prechecked, + )?; + } self.maybe_reset_after_session_end(); return Ok(reply); } + self.apply_deferred_sandbox_state_update(deferred_sandbox_state_update)?; let input_context = self.prepare_input_context_pager(&text, echo_input); @@ -2091,6 +2352,21 @@ impl WorkerManager { } fn interrupt_files(&mut self, timeout: Duration) -> Result { + self.interrupt_files_inner(timeout, true) + } + + fn interrupt_files_control_tail( + &mut self, + timeout: Duration, + ) -> Result { + self.interrupt_files_inner(timeout, false) + } + + fn interrupt_files_inner( + &mut self, + timeout: Duration, + reset_after_session_end: bool, + ) -> Result { crate::event_log::log( "worker_interrupt_begin", serde_json::json!({ @@ -2128,7 +2404,9 @@ impl WorkerManager { append_prompt_if_missing(contents, Some(prompt)); } let reply = self.finalize_reply(reply); - self.maybe_reset_after_session_end(); + if reset_after_session_end { + self.maybe_reset_after_session_end(); + } return Ok(reply); } @@ -2215,7 +2493,7 @@ impl WorkerManager { "timeout_ms": timeout.as_millis(), }), ); - if self.awaiting_initial_sandbox_state_update { + if self.missing_inherited_sandbox_state() { return Err(WorkerError::Sandbox( MISSING_INHERITED_SANDBOX_STATE_MESSAGE.to_string(), )); @@ -2232,6 +2510,21 @@ impl WorkerManager { } fn interrupt_pager(&mut self, timeout: Duration) -> Result { + self.interrupt_pager_inner(timeout, true) + } + + fn interrupt_pager_control_tail( + &mut self, + timeout: Duration, + ) -> Result { + self.interrupt_pager_inner(timeout, false) + } + + fn interrupt_pager_inner( + &mut self, + timeout: Duration, + reset_after_session_end: bool, + ) -> Result { crate::event_log::log( "worker_interrupt_begin", serde_json::json!({ @@ -2273,7 +2566,9 @@ impl WorkerManager { } } let reply = self.finalize_reply(reply); - self.maybe_reset_after_session_end(); + if reset_after_session_end { + self.maybe_reset_after_session_end(); + } return Ok(reply); } @@ -2372,7 +2667,7 @@ impl WorkerManager { "timeout_ms": timeout.as_millis(), }), ); - if self.awaiting_initial_sandbox_state_update { + if self.missing_inherited_sandbox_state() { return Err(WorkerError::Sandbox( MISSING_INHERITED_SANDBOX_STATE_MESSAGE.to_string(), )); @@ -2398,7 +2693,7 @@ impl WorkerManager { } fn ensure_process(&mut self) -> Result<(), WorkerError> { - if self.awaiting_initial_sandbox_state_update { + if self.missing_inherited_sandbox_state() { return Err(WorkerError::Sandbox( MISSING_INHERITED_SANDBOX_STATE_MESSAGE.to_string(), )); @@ -2430,7 +2725,7 @@ impl WorkerManager { if let Some(process) = self.process.take() { let _ = process.kill(); } - if self.awaiting_initial_sandbox_state_update { + if self.missing_inherited_sandbox_state() { return Err(WorkerError::Sandbox( MISSING_INHERITED_SANDBOX_STATE_MESSAGE.to_string(), )); @@ -2457,7 +2752,7 @@ impl WorkerManager { if let Some(process) = self.process.take() { let _ = process.kill(); } - if self.awaiting_initial_sandbox_state_update { + if self.missing_inherited_sandbox_state() { return Err(WorkerError::Sandbox( MISSING_INHERITED_SANDBOX_STATE_MESSAGE.to_string(), )); @@ -2474,23 +2769,19 @@ impl WorkerManager { Ok(()) } - // Updates the server-side sandbox configuration. The new policy becomes - // effective in the worker only after the current process is recycled and a - // replacement worker is spawned. Session temp handling is separate: the - // server-owned temp dir is reset before each spawn, and today that reset - // reuses the same configured path in place. - pub fn update_sandbox_state( + // Replaces the inherited sandbox snapshot for this tool call. The new + // policy becomes effective in the worker only after the current process is + // recycled and a replacement worker is spawned. Session temp handling is + // separate: the server-owned temp dir is reset before each spawn, and + // today that reset reuses the same configured path in place. + fn prepare_sandbox_state_update( &mut self, update: SandboxStateUpdate, - timeout: Duration, - ) -> Result { + ) -> Result { let update_for_log = serde_json::to_value(&update) .unwrap_or_else(|err| serde_json::json!({"serialize_error": err.to_string()})); crate::sandbox::log_sandbox_policy_update(&update.sandbox_policy); - let mut inherited_state = self - .inherited_sandbox_state - .clone() - .unwrap_or_else(|| self.sandbox_defaults.clone()); + let mut inherited_state = self.sandbox_defaults.clone(); inherited_state.apply_update(update); let resolved_state = resolve_effective_sandbox_state_with_defaults( &self.sandbox_plan, @@ -2498,8 +2789,7 @@ impl WorkerManager { &self.sandbox_defaults, ) .map_err(WorkerError::Sandbox)?; - let awaiting_before = self.awaiting_initial_sandbox_state_update; - self.awaiting_initial_sandbox_state_update = false; + let missing_before = self.missing_inherited_sandbox_state(); self.inherited_sandbox_state = Some(inherited_state); let changed = self.sandbox_state != resolved_state; self.sandbox_state = resolved_state; @@ -2510,16 +2800,47 @@ impl WorkerManager { // picks up the updated sandbox state. self.windows_sandbox_launch = None; } + Ok(PreparedSandboxStateUpdate { + update_for_log, + changed, + missing_before, + }) + } + + fn log_sandbox_state_update( + prepared: &PreparedSandboxStateUpdate, + timeout: Option, + respawned: bool, + ) { crate::event_log::log( "worker_sandbox_state_update", serde_json::json!({ - "changed": changed, - "timeout_ms": timeout.as_millis(), - "update": update_for_log, + "changed": prepared.changed, + "timeout_ms": timeout.map(|timeout| timeout.as_millis()), + "respawned": respawned, + "update": prepared.update_for_log, }), ); - if !changed { - if awaiting_before && self.process.is_none() { + } + + pub fn stage_sandbox_state_update( + &mut self, + update: SandboxStateUpdate, + ) -> Result<(), WorkerError> { + let prepared = self.prepare_sandbox_state_update(update)?; + Self::log_sandbox_state_update(&prepared, None, false); + Ok(()) + } + + pub fn update_sandbox_state( + &mut self, + update: SandboxStateUpdate, + timeout: Duration, + ) -> Result { + let prepared = self.prepare_sandbox_state_update(update)?; + let mut respawned = false; + if !prepared.changed { + if prepared.missing_before && self.process.is_none() { match self.oversized_output { OversizedOutputMode::Files => self.reset_output_state_files(true), OversizedOutputMode::Pager => self.reset_output_state_pager(true, false), @@ -2528,46 +2849,97 @@ impl WorkerManager { OversizedOutputMode::Files => self.spawn_process_files()?, OversizedOutputMode::Pager => self.spawn_process_with_pager(false)?, }); - return Ok(true); + respawned = true; } - return Ok(false); + Self::log_sandbox_state_update(&prepared, Some(timeout), respawned); + return Ok(respawned); } if let Some(process) = self.process.take() { let _ = process.shutdown_graceful(timeout); } match self.oversized_output { + OversizedOutputMode::Files if self.has_detached_output_to_preserve() => { + self.reset_output_state_files_preserving_detached_output() + } OversizedOutputMode::Files => self.reset_output_state_files(true), + OversizedOutputMode::Pager if self.has_detached_output_to_preserve() => { + self.reset_output_state_pager_preserving_detached_output(self.pager.is_active()) + } OversizedOutputMode::Pager => self.reset_output_state_pager(true, false), } self.process = Some(match self.oversized_output { OversizedOutputMode::Files => self.spawn_process_files()?, OversizedOutputMode::Pager => self.spawn_process_with_pager(false)?, }); - Ok(true) + respawned = true; + Self::log_sandbox_state_update(&prepared, Some(timeout), respawned); + Ok(respawned) + } + + fn has_detached_output_to_preserve(&self) -> bool { + match self.oversized_output { + OversizedOutputMode::Files => { + self.pending_output_tape.has_pending() || self.settled_pending_completion.is_some() + } + OversizedOutputMode::Pager => { + self.output.has_pending_output() || self.settled_pending_completion.is_some() + } + } } fn reset_output_state_files(&mut self, clear_pending_output: bool) { + self.reset_output_state_files_inner(clear_pending_output, false); + } + + fn reset_output_state_files_preserving_detached_output(&mut self) { + self.reset_output_state_files_inner(false, true); + } + + fn reset_output_state_files_inner( + &mut self, + clear_pending_output: bool, + preserve_detached_output: bool, + ) { if clear_pending_output { self.pending_output_tape.clear(); } self.pending_request = false; self.pending_request_started_at = None; - self.pending_request_input = None; + if !preserve_detached_output { + self.pending_request_input = None; + } self.session_end_seen = false; - self.settled_pending_completion = None; - self.last_detached_prefix_item_count = 0; + if !preserve_detached_output { + self.settled_pending_completion = None; + self.last_detached_prefix_item_count = 0; + } self.last_prompt = None; self.guardrail.busy.store(false, Ordering::Relaxed); } fn reset_output_state_pager(&mut self, clear_pending_output: bool, preserve_pager: bool) { + self.reset_output_state_pager_inner(clear_pending_output, preserve_pager, false); + } + + fn reset_output_state_pager_preserving_detached_output(&mut self, preserve_pager: bool) { + self.reset_output_state_pager_inner(false, preserve_pager, true); + } + + fn reset_output_state_pager_inner( + &mut self, + clear_pending_output: bool, + preserve_pager: bool, + preserve_detached_output: bool, + ) { if clear_pending_output { self.pending_output_tape.clear(); } - reset_output_ring(); - reset_last_reply_marker_offset(); - self.output = OutputBuffer::default(); + if !preserve_detached_output { + reset_output_ring(); + reset_last_reply_marker_offset(); + self.output = OutputBuffer::default(); + } if !preserve_pager { self.pager = Pager::default(); } @@ -2575,8 +2947,10 @@ impl WorkerManager { self.pending_request_started_at = None; self.pending_request_input = None; self.session_end_seen = false; - self.settled_pending_completion = None; - self.last_detached_prefix_item_count = 0; + if !preserve_detached_output { + self.settled_pending_completion = None; + self.last_detached_prefix_item_count = 0; + } self.pager_prompt = None; self.last_prompt = None; self.guardrail.busy.store(false, Ordering::Relaxed); @@ -2922,6 +3296,10 @@ impl WorkerManager { self.resolve_timeout_marker_with_wait(Duration::from_millis(0)); } + pub fn refresh_timeout_marker(&mut self) { + self.resolve_timeout_marker(); + } + fn resolve_timeout_marker_with_wait(&mut self, wait: Duration) { if !self.pending_request { return; @@ -5330,6 +5708,10 @@ mod tests { reset_last_reply_marker_offset, reset_output_ring, }; use crate::sandbox::SandboxPolicy; + #[cfg(target_os = "linux")] + use crate::sandbox::sandbox_state_update_from_codex_meta; + #[cfg(target_os = "linux")] + use serde_json::json; use std::sync::{Mutex, MutexGuard, OnceLock}; fn cwd_test_mutex() -> &'static Mutex<()> { @@ -5390,6 +5772,14 @@ mod tests { .expect("spawn exiting test child") } + #[cfg(target_family = "windows")] + fn successful_test_child() -> Child { + Command::new("powershell.exe") + .args(["-NoProfile", "-Command", "exit 0"]) + .spawn() + .expect("spawn exiting test child") + } + #[cfg(target_family = "unix")] fn failing_test_status() -> std::process::ExitStatus { Command::new("sh") @@ -5935,8 +6325,7 @@ mod tests { "1+1".to_string(), Duration::from_millis(50), Duration::ZERO, - None, - false, + WriteStdinOptions::default(), ) .expect("reply"); @@ -6198,21 +6587,60 @@ mod tests { } #[test] - fn files_prepare_input_context_seals_split_utf8_at_request_boundary() { + fn files_reset_preserving_detached_output_keeps_pending_request_input_for_trim() { let mut manager = WorkerManager::new( Backend::Python, SandboxCliPlan::default(), crate::oversized_output::OversizedOutputMode::Files, ) .expect("worker manager"); - manager.pending_output_tape.append_stdout_bytes(&[0xC3]); - - let first = manager.prepare_input_context_files(); - assert_eq!( - contents_text(&first.prefix_contents), - "\\xC3", - "expected an accepted request to seal the detached utf-8 lead byte into the prefix" - ); + manager + .pending_output_tape + .append_stdout_bytes(b">>> import time; time.sleep(0.2)\nDETACHED_OK\n"); + manager.pending_request_input = Some("import time; time.sleep(0.2)\n".to_string()); + manager.settled_pending_completion = Some(CompletionInfo { + prompt: Some(">>> ".to_string()), + prompt_variants: Some(vec![">>> ".to_string()]), + echo_events: Vec::new(), + protocol_warnings: Vec::new(), + session_end_seen: false, + }); + + manager.reset_output_state_files_preserving_detached_output(); + + let context = manager.prepare_input_context_files(); + let text = contents_text(&context.prefix_contents); + + assert!( + text.contains("DETACHED_OK\n"), + "expected detached files-mode output to survive the preserved reset, got: {text:?}" + ); + assert!( + !text.contains("import time; time.sleep(0.2)"), + "did not expect the preserved reset to leak the original Python input echo, got: {text:?}" + ); + assert!( + manager.pending_request_input.is_none(), + "expected preserved pending input to be consumed once the detached prefix is prepared" + ); + } + + #[test] + fn files_prepare_input_context_seals_split_utf8_at_request_boundary() { + let mut manager = WorkerManager::new( + Backend::Python, + SandboxCliPlan::default(), + crate::oversized_output::OversizedOutputMode::Files, + ) + .expect("worker manager"); + manager.pending_output_tape.append_stdout_bytes(&[0xC3]); + + let first = manager.prepare_input_context_files(); + assert_eq!( + contents_text(&first.prefix_contents), + "\\xC3", + "expected an accepted request to seal the detached utf-8 lead byte into the prefix" + ); manager .pending_output_tape @@ -6390,8 +6818,11 @@ mod tests { String::new(), Duration::from_millis(0), Duration::from_millis(0), - Some(16), - true, + WriteStdinOptions { + page_bytes_override: Some(16), + echo_input: true, + ..WriteStdinOptions::default() + }, ) .expect("empty poll reply"); @@ -6440,8 +6871,11 @@ mod tests { String::new(), Duration::from_millis(0), Duration::from_millis(0), - Some(16), - true, + WriteStdinOptions { + page_bytes_override: Some(16), + echo_input: true, + ..WriteStdinOptions::default() + }, ) .expect("empty pager reply"); let WorkerReply::Output { contents, .. } = reply; @@ -6495,8 +6929,11 @@ mod tests { String::new(), Duration::from_millis(0), Duration::from_millis(0), - Some(256), - true, + WriteStdinOptions { + page_bytes_override: Some(256), + echo_input: true, + ..WriteStdinOptions::default() + }, ) .expect("empty poll reply"); let WorkerReply::Output { contents, .. } = reply; @@ -6662,25 +7099,132 @@ mod tests { ); } + #[cfg(target_os = "linux")] #[test] - fn failed_sandbox_update_does_not_commit_inherited_state() { - let _guard = env_test_mutex().lock().expect("env mutex"); - let _guard = cwd_test_mutex().lock().expect("cwd mutex"); - let original_initial = std::env::var_os(crate::sandbox::INITIAL_SANDBOX_STATE_ENV); - let initial = serde_json::json!({ + fn linux_bwrap_startup_retry_stays_disabled_after_followup_codex_meta_update() { + let plan = SandboxCliPlan { + operations: vec![crate::sandbox_cli::SandboxCliOperation::SetMode( + crate::sandbox_cli::SandboxModeArg::Inherit, + )], + }; + let mut manager = WorkerManager::new( + Backend::Python, + plan, + crate::oversized_output::OversizedOutputMode::Files, + ) + .expect("worker manager"); + let mut inherited_state = manager.sandbox_defaults.clone(); + inherited_state.apply_update(SandboxStateUpdate { + sandbox_policy: SandboxPolicy::WorkspaceWrite { + writable_roots: Vec::new(), + network_access: false, + exclude_tmpdir_env_var: false, + exclude_slash_tmp: false, + }, + sandbox_cwd: Some(std::env::temp_dir()), + use_linux_sandbox_bwrap: Some(true), + use_legacy_landlock: None, + }); + manager.inherited_sandbox_state = Some(inherited_state.clone()); + manager.sandbox_state = resolve_effective_sandbox_state_with_defaults( + &manager.sandbox_plan, + Some(&inherited_state), + &manager.sandbox_defaults, + ) + .expect("resolved initial sandbox state"); + assert!( + manager.sandbox_state.use_linux_sandbox_bwrap, + "test setup should start with bwrap enabled" + ); + + let retry = manager.maybe_retry_spawn_without_linux_bwrap( + &WorkerError::Protocol("ipc disconnected while waiting for backend info".to_string()), + false, + ); + assert!(retry, "expected startup failure to disable bwrap"); + + let update = sandbox_state_update_from_codex_meta(&json!({ "sandboxPolicy": { "type": "workspace-write", "writable_roots": [], "network_access": false, "exclude_tmpdir_env_var": false, "exclude_slash_tmp": false - } - }) - .to_string(); - unsafe { - std::env::set_var(crate::sandbox::INITIAL_SANDBOX_STATE_ENV, initial); - } + }, + "sandboxCwd": std::env::temp_dir(), + "useLegacyLandlock": false, + "codexLinuxSandboxExe": "/tmp/codex-linux-sandbox" + })) + .expect("Codex sandbox metadata"); + manager + .update_sandbox_state(update, Duration::from_millis(1)) + .expect("follow-up sandbox state"); + + assert!( + !manager.sandbox_state.use_linux_sandbox_bwrap, + "follow-up Codex metadata should preserve the local no-bwrap fallback" + ); + } + + #[test] + fn inherit_workspace_write_refinements_wait_for_client_state() { + let writable_root = std::env::temp_dir(); + let plan = SandboxCliPlan { + operations: vec![ + crate::sandbox_cli::SandboxCliOperation::SetMode( + crate::sandbox_cli::SandboxModeArg::Inherit, + ), + crate::sandbox_cli::SandboxCliOperation::AddWritableRoot(writable_root.clone()), + crate::sandbox_cli::SandboxCliOperation::Config( + crate::sandbox_cli::SandboxConfigOperation::SetWorkspaceNetworkAccess(true), + ), + ], + }; + let mut manager = WorkerManager::new( + Backend::Python, + plan, + crate::oversized_output::OversizedOutputMode::Files, + ) + .expect("worker manager"); + + manager + .stage_sandbox_state_update(SandboxStateUpdate { + sandbox_policy: SandboxPolicy::WorkspaceWrite { + writable_roots: Vec::new(), + network_access: false, + exclude_tmpdir_env_var: false, + exclude_slash_tmp: false, + }, + sandbox_cwd: Some(writable_root.clone()), + use_linux_sandbox_bwrap: None, + use_legacy_landlock: None, + }) + .expect("workspace-write Codex metadata should satisfy deferred refinements"); + + let SandboxPolicy::WorkspaceWrite { + writable_roots, + network_access, + .. + } = &manager.sandbox_state.sandbox_policy + else { + panic!( + "expected staged inherit refinements to resolve to workspace-write, got {:?}", + manager.sandbox_state.sandbox_policy + ); + }; + assert!( + *network_access, + "expected deferred workspace network setting to apply after client metadata" + ); + assert!( + writable_roots.iter().any(|path| path == &writable_root), + "expected deferred writable root to apply after client metadata" + ); + } + #[test] + fn failed_sandbox_update_does_not_commit_inherited_state() { + let _guard = cwd_test_mutex().lock().expect("cwd mutex"); let plan = crate::sandbox_cli::SandboxCliPlan { operations: vec![ crate::sandbox_cli::SandboxCliOperation::SetMode( @@ -6697,10 +7241,25 @@ mod tests { crate::oversized_output::OversizedOutputMode::Files, ) .expect("worker manager"); - let inherited_before = manager - .inherited_sandbox_state - .clone() - .expect("inherited state should be present"); + let mut inherited_before = manager.sandbox_defaults.clone(); + inherited_before.apply_update(SandboxStateUpdate { + sandbox_policy: SandboxPolicy::WorkspaceWrite { + writable_roots: Vec::new(), + network_access: false, + exclude_tmpdir_env_var: false, + exclude_slash_tmp: false, + }, + sandbox_cwd: None, + use_linux_sandbox_bwrap: None, + use_legacy_landlock: None, + }); + manager.inherited_sandbox_state = Some(inherited_before.clone()); + manager.sandbox_state = resolve_effective_sandbox_state_with_defaults( + &manager.sandbox_plan, + Some(&inherited_before), + &manager.sandbox_defaults, + ) + .expect("resolved initial inherited sandbox state"); let err = manager .update_sandbox_state( @@ -6722,15 +7281,344 @@ mod tests { Some(inherited_before), "failed updates must not mutate inherited sandbox baseline" ); + } - match original_initial { - Some(value) => unsafe { - std::env::set_var(crate::sandbox::INITIAL_SANDBOX_STATE_ENV, value); + #[test] + fn exact_interrupt_requires_current_sandbox_when_worker_would_respawn() { + let plan = SandboxCliPlan { + operations: vec![crate::sandbox_cli::SandboxCliOperation::SetMode( + crate::sandbox_cli::SandboxModeArg::Inherit, + )], + }; + let mut manager = WorkerManager::new( + Backend::Python, + plan, + crate::oversized_output::OversizedOutputMode::Files, + ) + .expect("worker manager"); + + assert!( + !manager + .nonexecuting_follow_up_uses_existing_state("\u{3}") + .expect("interrupt follow-up classification"), + "a bare Ctrl-C should require current per-call sandbox metadata when it would respawn" + ); + } + + #[test] + fn interrupt_pager_tail_requires_current_sandbox_when_worker_would_respawn() { + let plan = SandboxCliPlan { + operations: vec![crate::sandbox_cli::SandboxCliOperation::SetMode( + crate::sandbox_cli::SandboxModeArg::Inherit, + )], + }; + let mut manager = WorkerManager::new( + Backend::R, + plan, + crate::oversized_output::OversizedOutputMode::Pager, + ) + .expect("worker manager"); + let mut process = test_worker_process(successful_test_child()); + process + .child + .wait() + .expect("wait for the stub worker process to exit"); + manager.process = Some(process); + + manager.output.start_capture(); + manager.output_timeline.append_text( + b"line0001\nline0002\nline0003\nline0004\n", + false, + ContentOrigin::Worker, + ); + let end_offset = manager.output.end_offset().expect("output end offset"); + let SnapshotWithImages { buffer, .. } = + snapshot_page_with_images(&manager.output, end_offset, 16); + manager.pager.activate(buffer.expect("pager buffer"), false); + + assert!( + !manager + .nonexecuting_follow_up_uses_existing_state("\u{3}:q") + .expect("interrupt follow-up classification"), + "a pager ctrl-c tail should require current per-call sandbox metadata when it would respawn" + ); + } + + #[test] + fn empty_input_with_busy_guardrail_uses_existing_state() { + let plan = SandboxCliPlan { + operations: vec![crate::sandbox_cli::SandboxCliOperation::SetMode( + crate::sandbox_cli::SandboxModeArg::Inherit, + )], + }; + let mut manager = WorkerManager::new( + Backend::R, + plan, + crate::oversized_output::OversizedOutputMode::Files, + ) + .expect("worker manager"); + { + let mut slot = manager + .guardrail + .event + .lock() + .expect("guardrail event mutex poisoned"); + *slot = Some(GuardrailEvent { + message: "[repl] previous request aborted; retry your last input\n".to_string(), + was_busy: true, + }); + } + + assert!( + !manager + .empty_input_requires_spawn() + .expect("empty-input classification"), + "empty polls should keep pending busy-guardrail recovery local" + ); + } + + #[test] + fn nonempty_input_with_busy_guardrail_uses_existing_state() { + let plan = SandboxCliPlan { + operations: vec![crate::sandbox_cli::SandboxCliOperation::SetMode( + crate::sandbox_cli::SandboxModeArg::Inherit, + )], + }; + let mut manager = WorkerManager::new( + Backend::R, + plan, + crate::oversized_output::OversizedOutputMode::Files, + ) + .expect("worker manager"); + { + let mut slot = manager + .guardrail + .event + .lock() + .expect("guardrail event mutex poisoned"); + *slot = Some(GuardrailEvent { + message: "[repl] previous request aborted; retry your last input\n".to_string(), + was_busy: true, + }); + } + + assert!( + manager + .nonexecuting_follow_up_uses_existing_state("1+1") + .expect("follow-up classification"), + "busy-guardrail retries should keep pending recovery local" + ); + } + + #[test] + fn empty_input_with_idle_guardrail_requires_spawn() { + let plan = SandboxCliPlan { + operations: vec![crate::sandbox_cli::SandboxCliOperation::SetMode( + crate::sandbox_cli::SandboxModeArg::Inherit, + )], + }; + let mut manager = WorkerManager::new( + Backend::R, + plan, + crate::oversized_output::OversizedOutputMode::Files, + ) + .expect("worker manager"); + { + let mut slot = manager + .guardrail + .event + .lock() + .expect("guardrail event mutex poisoned"); + *slot = Some(GuardrailEvent { + message: "[repl] worker was idle; new session started\n".to_string(), + was_busy: false, + }); + } + + assert!( + manager + .empty_input_requires_spawn() + .expect("empty-input classification"), + "idle guardrail notices should still require current per-call sandbox metadata when a poll would respawn" + ); + } + + #[test] + fn prechecked_empty_input_requires_current_sandbox_when_worker_exited() { + let plan = SandboxCliPlan { + operations: vec![crate::sandbox_cli::SandboxCliOperation::SetMode( + crate::sandbox_cli::SandboxModeArg::Inherit, + )], + }; + let mut manager = WorkerManager::new( + Backend::R, + plan, + crate::oversized_output::OversizedOutputMode::Files, + ) + .expect("worker manager"); + manager + .stage_sandbox_state_update(SandboxStateUpdate { + sandbox_policy: SandboxPolicy::ReadOnly, + sandbox_cwd: None, + use_linux_sandbox_bwrap: None, + use_legacy_landlock: None, + }) + .expect("initial inherited state"); + let mut process = test_worker_process(successful_test_child()); + process + .child + .wait() + .expect("wait for the stub worker process to exit"); + manager.process = Some(process); + + let result = manager.write_stdin( + String::new(), + Duration::from_secs(1), + Duration::from_secs(1), + WriteStdinOptions { + pending_state_prechecked: true, + ..WriteStdinOptions::default() }, - None => unsafe { - std::env::remove_var(crate::sandbox::INITIAL_SANDBOX_STATE_ENV); + ); + + assert!( + matches!(result, Err(ref err) if is_prechecked_follow_up_requires_meta(err)), + "expected prechecked empty input to require current sandbox metadata once the worker has exited, got: {result:?}" + ); + } + + #[test] + fn prechecked_bare_interrupt_requires_current_sandbox_when_worker_exited() { + let plan = SandboxCliPlan { + operations: vec![crate::sandbox_cli::SandboxCliOperation::SetMode( + crate::sandbox_cli::SandboxModeArg::Inherit, + )], + }; + let mut manager = WorkerManager::new( + Backend::R, + plan, + crate::oversized_output::OversizedOutputMode::Files, + ) + .expect("worker manager"); + manager + .stage_sandbox_state_update(SandboxStateUpdate { + sandbox_policy: SandboxPolicy::ReadOnly, + sandbox_cwd: None, + use_linux_sandbox_bwrap: None, + use_legacy_landlock: None, + }) + .expect("initial inherited state"); + let mut process = test_worker_process(successful_test_child()); + process + .child + .wait() + .expect("wait for the stub worker process to exit"); + manager.process = Some(process); + + let result = manager.write_stdin( + "\u{3}".to_string(), + Duration::from_secs(1), + Duration::from_secs(1), + WriteStdinOptions { + pending_state_prechecked: true, + ..WriteStdinOptions::default() }, + ); + + assert!( + matches!(result, Err(ref err) if is_prechecked_follow_up_requires_meta(err)), + "expected prechecked bare ctrl-c to require current sandbox metadata once the worker has exited, got: {result:?}" + ); + } + + #[cfg(target_family = "unix")] + #[test] + fn interrupt_tail_uses_current_sandbox_for_the_respawn() { + let _guard = cwd_test_mutex().lock().expect("cwd mutex"); + let temp = tempfile::Builder::new() + .prefix(".tmp-interrupt-tail-current-sandbox-") + .tempdir_in(env!("CARGO_MANIFEST_DIR")) + .expect("tempdir"); + let sandbox_cwd = temp.path().to_path_buf(); + let plan = SandboxCliPlan { + operations: vec![crate::sandbox_cli::SandboxCliOperation::SetMode( + crate::sandbox_cli::SandboxModeArg::Inherit, + )], + }; + let mut manager = WorkerManager::new( + Backend::R, + plan, + crate::oversized_output::OversizedOutputMode::Files, + ) + .expect("worker manager"); + manager + .stage_sandbox_state_update(SandboxStateUpdate { + sandbox_policy: SandboxPolicy::ReadOnly, + sandbox_cwd: Some(sandbox_cwd.clone()), + use_linux_sandbox_bwrap: None, + use_legacy_landlock: None, + }) + .expect("initial inherited read-only state"); + let mut process = test_worker_process(successful_test_child()); + process + .child + .wait() + .expect("wait for the stub worker process to exit"); + manager.process = Some(process); + manager.exe_path = PathBuf::from("definitely-missing-worker-exe"); + + let result = manager.write_stdin( + "\u{3}1+1".to_string(), + Duration::from_secs(10), + Duration::from_secs(10), + WriteStdinOptions { + deferred_sandbox_state_update: Some(SandboxStateUpdate { + sandbox_policy: SandboxPolicy::WorkspaceWrite { + writable_roots: Vec::new(), + network_access: false, + exclude_tmpdir_env_var: false, + exclude_slash_tmp: false, + }, + sandbox_cwd: Some(sandbox_cwd.clone()), + use_linux_sandbox_bwrap: None, + use_legacy_landlock: None, + }), + ..WriteStdinOptions::default() + }, + ); + match result { + Ok(WorkerReply::Output { + contents, is_error, .. + }) => { + let text = contents_text(&contents); + assert!( + is_error, + "expected the failed interrupt-tail respawn attempt to surface as an error reply" + ); + assert!( + text.contains("worker error:"), + "expected the failed interrupt-tail respawn attempt to report a worker error, got: {text:?}" + ); + } + Err(WorkerError::Protocol(message)) => { + assert!( + message.contains("backend info") || message.contains("ipc disconnected"), + "expected the failed interrupt-tail respawn attempt to fail during worker startup, got: {message:?}" + ); + } + Err(err) => panic!("unexpected interrupt-tail respawn error: {err}"), } + assert!( + matches!( + manager.sandbox_state.sandbox_policy, + SandboxPolicy::WorkspaceWrite { .. } + ), + "expected deferred metadata to stage before interrupt attempts the respawn" + ); + assert_eq!( + manager.sandbox_state.sandbox_cwd, sandbox_cwd, + "expected deferred metadata to update the effective sandbox cwd before the respawn path" + ); } #[test] From 2987037819344605704d4062c27ad06241734082 Mon Sep 17 00:00:00 2001 From: Tomasz Kalinowski Date: Wed, 22 Apr 2026 11:22:50 -0400 Subject: [PATCH 3/5] test: cover inherit metadata regressions --- tests/common/mod.rs | 137 +- tests/debug_events_tool_calls.rs | 2 +- tests/debug_repl_prompt.rs | 14 +- tests/interrupt.rs | 14 +- tests/manage_session_behavior.rs | 4 +- tests/pager.rs | 27 +- tests/pager_flags.rs | 4 +- tests/pager_hits_seek.rs | 2 +- tests/pager_page_size.rs | 6 +- tests/pager_seek.rs | 2 +- tests/pager_skip.rs | 2 +- tests/pager_where.rs | 2 +- tests/plot_images.rs | 42 +- tests/python_backend.rs | 46 +- tests/python_plot_images.rs | 36 +- tests/r_console_encoding.rs | 2 +- tests/r_file_show.rs | 2 +- tests/r_help.rs | 2 +- tests/r_startup.rs | 2 +- tests/repl_surface.rs | 16 +- tests/reticulate_py_help.rs | 2 +- tests/sandbox.rs | 112 +- tests/sandbox_state_updates.rs | 2461 ++++++++++++++--- ...drain_plot_then_later_stdout_snapshot.snap | 58 - ...then_later_stdout_snapshot@transcript.snap | 15 - tests/write_stdin_batch.rs | 90 +- tests/write_stdin_behavior.rs | 36 +- tests/write_stdin_edge_cases.rs | 10 +- 28 files changed, 2463 insertions(+), 685 deletions(-) delete mode 100644 tests/snapshots/write_stdin_batch__write_stdin_files_multidrain_plot_then_later_stdout_snapshot.snap delete mode 100644 tests/snapshots/write_stdin_batch__write_stdin_files_multidrain_plot_then_later_stdout_snapshot@transcript.snap diff --git a/tests/common/mod.rs b/tests/common/mod.rs index 8a06cd34..d09fecb8 100644 --- a/tests/common/mod.rs +++ b/tests/common/mod.rs @@ -15,7 +15,7 @@ use rmcp::ServiceExt; use rmcp::handler::client::ClientHandler; use rmcp::model::{ CallToolRequestParams, ClientNotification, ClientRequest, CustomNotification, CustomRequest, - RawContent, + Meta, RawContent, }; use rmcp::service::ServiceError; use rmcp::transport::{ConfigureCommandExt, TokioChildProcess}; @@ -740,9 +740,18 @@ impl McpTestSession { } pub async fn call_tool_raw( - &mut self, + &self, tool: impl Into, arguments: Value, + ) -> Result { + self.call_tool_raw_with_meta(tool, arguments, None).await + } + + pub async fn call_tool_raw_with_meta( + &self, + tool: impl Into, + arguments: Value, + meta: Option, ) -> Result { let tool = tool.into(); let request_tool = normalize_tool_name_for_request(&tool).to_string(); @@ -756,18 +765,36 @@ impl McpTestSession { ))); } }; - let request = match arguments { + let mut request = match arguments { Some(arguments) => CallToolRequestParams::new(request_tool).with_arguments(arguments), None => CallToolRequestParams::new(request_tool), }; + if let Some(meta) = meta { + let Value::Object(meta_map) = meta else { + return Err(ServiceError::McpError(rmcp::ErrorData::invalid_params( + "tool metadata must be a JSON object", + None, + ))); + }; + request.meta = Some(Meta(meta_map.into_iter().collect())); + } self.service.call_tool(request).await } pub async fn write_stdin_raw_with( - &mut self, + &self, + input: impl Into, + timeout: Option, + ) -> Result { + self.write_stdin_raw_with_meta(input, timeout, None).await + } + + pub async fn write_stdin_raw_with_meta( + &self, input: impl Into, timeout: Option, + meta: Option, ) -> Result { let mut input = input.into(); if !input.is_empty() && !input.ends_with('\n') { @@ -783,14 +810,24 @@ impl McpTestSession { json!((timeout * 1000.0).round() as i64), ); } - self.call_tool_raw(self.repl_tool_name(), Value::Object(args)) + self.call_tool_raw_with_meta(self.repl_tool_name(), Value::Object(args), meta) .await } pub async fn write_stdin_raw_unterminated_with( - &mut self, + &self, input: impl Into, timeout: Option, + ) -> Result { + self.write_stdin_raw_unterminated_with_meta(input, timeout, None) + .await + } + + pub async fn write_stdin_raw_unterminated_with_meta( + &self, + input: impl Into, + timeout: Option, + meta: Option, ) -> Result { let input = input.into(); let timeout = normalized_test_timeout(timeout); @@ -802,12 +839,12 @@ impl McpTestSession { json!((timeout * 1000.0).round() as i64), ); } - self.call_tool_raw(self.repl_tool_name(), Value::Object(args)) + self.call_tool_raw_with_meta(self.repl_tool_name(), Value::Object(args), meta) .await } pub async fn send_custom_request( - &mut self, + &self, method: impl Into, params: Value, ) -> Result<(), ServiceError> { @@ -817,7 +854,7 @@ impl McpTestSession { } pub async fn send_custom_notification( - &mut self, + &self, method: impl Into, params: Value, ) -> Result<(), ServiceError> { @@ -919,7 +956,7 @@ impl McpSnapshot { )); out.push('\n'); out.push_str("response:\n"); - let response = normalize_snapshot_response(&step.response); + let response = normalize_snapshot_response(&step.response, &step.call); out.push_str(&pretty_json( &serde_json::to_value(&response) .unwrap_or_else(|_| json!({"error":"serialize outcome"})), @@ -943,7 +980,7 @@ impl McpSnapshot { out.push('\n'); } - let response = normalize_snapshot_response(&step.response); + let response = normalize_snapshot_response(&step.response, &step.call); let is_error = snapshot_response_is_error(&response); let (call_desc, input_lines) = format_snapshot_call(&step.call); @@ -968,22 +1005,88 @@ impl McpSnapshot { } } -fn normalize_snapshot_response(response: &SnapshotResponse) -> SnapshotResponse { +fn normalize_snapshot_response( + response: &SnapshotResponse, + call: &SnapshotCall, +) -> SnapshotResponse { match response { SnapshotResponse::ToolResult(result) => { + let mut content = result + .content + .iter() + .map(normalize_snapshot_content) + .collect::>(); + maybe_drop_settled_prompt_echo(&mut content, call); SnapshotResponse::ToolResult(SnapshotCallToolResult { is_error: result.is_error, - content: result - .content - .iter() - .map(normalize_snapshot_content) - .collect(), + content, }) } SnapshotResponse::ServiceError(err) => SnapshotResponse::ServiceError(err.clone()), } } +fn maybe_drop_settled_prompt_echo(content: &mut Vec, call: &SnapshotCall) { + if !is_repl_tool_name(&call.tool) { + return; + } + + let Some(Value::Object(args)) = &call.arguments else { + return; + }; + let Some(Value::String(input)) = args.get("input") else { + return; + }; + let mut input_lines = split_input_lines(input).into_iter(); + let Some(input_line) = input_lines.next() else { + return; + }; + if input_lines.next().is_some() { + return; + } + + let has_prompt_only = content.iter().any(|item| match item { + SnapshotContent::Text { text } => prompt_only_snapshot_text(text), + _ => false, + }); + let has_stderr = content.iter().any(|item| match item { + SnapshotContent::Text { text } => text.contains("stderr:"), + _ => false, + }); + if !(has_prompt_only && has_stderr) { + return; + } + + content.retain(|item| match item { + SnapshotContent::Text { text } => !prompt_echo_matches_input(text, &input_line), + _ => true, + }); +} + +fn prompt_only_snapshot_text(text: &str) -> bool { + text.lines().all(|line| { + if line.is_empty() { + return true; + } + strip_prompt_prefix(line) + .map(|rest| rest.trim().is_empty()) + .unwrap_or(false) + }) +} + +fn prompt_echo_matches_input(text: &str, input_line: &str) -> bool { + let mut lines = text.lines(); + let Some(line) = lines.next() else { + return false; + }; + if lines.next().is_some() { + return false; + } + strip_prompt_prefix(line) + .map(|rest| rest == input_line) + .unwrap_or(false) +} + fn normalize_snapshot_content(content: &SnapshotContent) -> SnapshotContent { match content { SnapshotContent::Text { text } => SnapshotContent::Text { diff --git a/tests/debug_events_tool_calls.rs b/tests/debug_events_tool_calls.rs index 95b823c0..573b1cdf 100644 --- a/tests/debug_events_tool_calls.rs +++ b/tests/debug_events_tool_calls.rs @@ -9,7 +9,7 @@ use serde_json::Value; async fn debug_events_include_tool_call_arguments_and_results() -> TestResult<()> { let temp = tempfile::tempdir()?; let debug_dir = temp.path().join("debug"); - let mut session = spawn_server_with_env_vars(vec![( + let session = spawn_server_with_env_vars(vec![( "MCP_REPL_DEBUG_DIR".to_string(), debug_dir.to_string_lossy().to_string(), )]) diff --git a/tests/debug_repl_prompt.rs b/tests/debug_repl_prompt.rs index a338ba64..3c00520e 100644 --- a/tests/debug_repl_prompt.rs +++ b/tests/debug_repl_prompt.rs @@ -103,14 +103,14 @@ fn wait_for_prompt_or_idle( (saw_prompt, saw_idle) } -#[test] -fn debug_repl_prints_initial_prompt() -> TestResult<()> { +fn assert_debug_repl_starts(extra_args: &[&str]) -> TestResult<()> { let _guard = debug_repl_test_mutex() .lock() .expect("debug repl prompt test mutex poisoned"); let exe = resolve_mcp_repl_path()?; let mut cmd = Command::new(exe); cmd.arg("--debug-repl"); + cmd.args(extra_args); #[cfg(target_os = "macos")] if !sandbox_exec_available() { cmd.arg("--sandbox").arg("danger-full-access"); @@ -185,6 +185,16 @@ fn debug_repl_prints_initial_prompt() -> TestResult<()> { Ok(()) } +#[test] +fn debug_repl_prints_initial_prompt() -> TestResult<()> { + assert_debug_repl_starts(&[]) +} + +#[test] +fn debug_repl_inherit_prints_initial_prompt() -> TestResult<()> { + assert_debug_repl_starts(&["--sandbox", "inherit"]) +} + #[test] fn debug_repl_files_mode_uses_output_bundle_dir_for_large_output() -> TestResult<()> { let _guard = debug_repl_test_mutex() diff --git a/tests/interrupt.rs b/tests/interrupt.rs index cdf2711c..51874055 100644 --- a/tests/interrupt.rs +++ b/tests/interrupt.rs @@ -86,7 +86,7 @@ fn backend_unavailable(text: &str) -> bool { #[tokio::test(flavor = "multi_thread")] async fn interrupt_unblocks_long_running_request() -> TestResult<()> { let _guard = lock_test_mutex(); - let mut session = spawn_interrupt_session().await?; + let session = spawn_interrupt_session().await?; let timeout_result = session .write_stdin_raw_with("Sys.sleep(30)", Some(0.5)) @@ -151,7 +151,7 @@ async fn interrupt_unblocks_long_running_request() -> TestResult<()> { #[tokio::test(flavor = "multi_thread")] async fn write_stdin_ctrl_c_prefix_interrupts_then_runs_remaining_input() -> TestResult<()> { let _guard = lock_test_mutex(); - let mut session = spawn_interrupt_session().await?; + let session = spawn_interrupt_session().await?; let timeout_result = session .write_stdin_raw_with("Sys.sleep(30)", Some(0.5)) @@ -191,7 +191,7 @@ async fn write_stdin_ctrl_c_prefix_interrupts_then_runs_remaining_input() -> Tes #[tokio::test(flavor = "multi_thread")] async fn pager_ctrl_c_prefix_preserves_interrupt_output() -> TestResult<()> { let _guard = lock_test_mutex(); - let mut session = spawn_interrupt_session().await?; + let session = spawn_interrupt_session().await?; let long_sleep = r#"tryCatch({ Sys.sleep(30) }, interrupt = function(e) cat("interrupt received\n"))"#; @@ -231,7 +231,7 @@ async fn pager_ctrl_c_prefix_preserves_interrupt_output() -> TestResult<()> { #[tokio::test(flavor = "multi_thread")] async fn write_stdin_ctrl_c_prefix_interrupts_then_runs_remaining_input_on_windows() -> TestResult<()> { - let mut session = spawn_interrupt_session().await?; + let session = spawn_interrupt_session().await?; let long_sleep = r#" cat("INTERRUPT_READY\n") @@ -323,7 +323,7 @@ tryCatch( #[tokio::test(flavor = "multi_thread")] async fn write_stdin_ctrl_d_prefix_restarts_then_runs_remaining_input() -> TestResult<()> { let _guard = lock_test_mutex(); - let mut session = spawn_interrupt_session().await?; + let session = spawn_interrupt_session().await?; let _ = session.write_stdin_raw_with("x <- 1", Some(5.0)).await?; @@ -375,7 +375,7 @@ async fn write_stdin_ctrl_d_prefix_restarts_then_runs_remaining_input() -> TestR #[tokio::test(flavor = "multi_thread")] async fn pager_ctrl_d_prefix_preserves_restart_notice() -> TestResult<()> { let _guard = lock_test_mutex(); - let mut session = spawn_interrupt_session().await?; + let session = spawn_interrupt_session().await?; let result = session .write_stdin_raw_with("\u{4}print('AFTER_RESET')", Some(10.0)) @@ -407,7 +407,7 @@ async fn pager_ctrl_d_prefix_preserves_restart_notice() -> TestResult<()> { #[tokio::test(flavor = "multi_thread")] async fn ctrl_d_prefix_in_files_mode_separates_restart_notice_from_output() -> TestResult<()> { let _guard = lock_test_mutex(); - let mut session = common::spawn_server_with_files().await?; + let session = common::spawn_server_with_files().await?; let result = session .write_stdin_raw_with("\u{4}cat('AFTER_RESET\\n')", Some(10.0)) diff --git a/tests/manage_session_behavior.rs b/tests/manage_session_behavior.rs index 7360d82b..fd5c54ba 100644 --- a/tests/manage_session_behavior.rs +++ b/tests/manage_session_behavior.rs @@ -49,7 +49,7 @@ async fn interrupt_without_active_request_returns_prompt() -> TestResult<()> { let _guard = test_mutex() .lock() .map_err(|_| "manage_session_behavior test mutex poisoned")?; - let mut session = spawn_manage_session().await?; + let session = spawn_manage_session().await?; let _ = session.write_stdin_raw_with("1+1", Some(5.0)).await?; let result = session.write_stdin_raw_with("\u{3}", Some(5.0)).await?; @@ -106,7 +106,7 @@ async fn restart_while_busy_resets_session() -> TestResult<()> { let _guard = test_mutex() .lock() .map_err(|_| "manage_session_behavior test mutex poisoned")?; - let mut session = spawn_manage_session().await?; + let session = spawn_manage_session().await?; let _ = session .write_stdin_raw_with("x <- 1; Sys.sleep(5)", Some(0.1)) diff --git a/tests/pager.rs b/tests/pager.rs index e071c732..877eddb1 100644 --- a/tests/pager.rs +++ b/tests/pager.rs @@ -90,7 +90,7 @@ fn assert_snapshot_or_skip(name: &str, snapshot: &McpSnapshot) -> TestResult<()> #[cfg(not(windows))] #[tokio::test(flavor = "multi_thread")] async fn pager_commands_are_handled_server_side() -> TestResult<()> { - let mut session = common::spawn_server_with_pager_page_chars(120).await?; + let session = common::spawn_server_with_pager_page_chars(120).await?; let initial = session .write_stdin_raw_with( @@ -151,7 +151,7 @@ async fn pager_commands_are_handled_server_side() -> TestResult<()> { #[cfg(not(windows))] #[tokio::test(flavor = "multi_thread")] async fn pager_matches_stays_inline_in_pager_mode() -> TestResult<()> { - let mut session = common::spawn_server_with_pager_page_chars(120).await?; + let session = common::spawn_server_with_pager_page_chars(120).await?; let initial = session .write_stdin_raw_with( @@ -403,8 +403,8 @@ async fn pager_smoke() -> TestResult<()> { } #[tokio::test(flavor = "multi_thread")] -async fn pager_empty_input_starts_worker_and_surfaces_startup_failure() -> TestResult<()> { - let mut session = common::spawn_server_with_args_env_and_pager_page_chars( +async fn pager_empty_input_does_not_start_worker_before_inherit_update() -> TestResult<()> { + let session = common::spawn_server_with_args_env_and_pager_page_chars( vec!["--sandbox".to_string(), "inherit".to_string()], Vec::new(), 80, @@ -416,12 +416,25 @@ async fn pager_empty_input_starts_worker_and_surfaces_startup_failure() -> TestR .await?; let text = result_text(&result); - session.cancel().await?; + if backend_unavailable(&text) { + eprintln!("pager backend unavailable in this environment; skipping"); + session.cancel().await?; + return Ok(()); + } assert!( text.contains("--sandbox inherit requested but no client sandbox state was provided"), - "expected initial empty pager input to surface worker startup failure, got: {text:?}" + "expected initial empty pager input to surface sandbox-state failure, got: {text:?}" ); + assert!( + !text.contains("<>"), + "did not expect empty pager input to spawn the worker before a sandbox update, got: {text:?}" + ); + assert!( + !text.contains(">"), + "did not expect empty pager input to surface a worker prompt before a sandbox update, got: {text:?}" + ); + session.cancel().await?; Ok(()) } @@ -487,7 +500,7 @@ async fn pager_empty_input_advances_page() -> TestResult<()> { #[cfg(windows)] #[tokio::test(flavor = "multi_thread")] async fn empty_poll_while_busy_preserves_busy_pager_state() -> TestResult<()> { - let mut session = common::spawn_server_with_pager_page_chars(80).await?; + let session = common::spawn_server_with_pager_page_chars(80).await?; let initial = session .write_stdin_raw_with( diff --git a/tests/pager_flags.rs b/tests/pager_flags.rs index df940648..99454227 100644 --- a/tests/pager_flags.rs +++ b/tests/pager_flags.rs @@ -38,7 +38,7 @@ async fn matches_limit_parses_long_flag() -> TestResult<()> { let _guard = test_mutex() .lock() .map_err(|_| "pager_flags test mutex poisoned")?; - let mut session = common::spawn_server_with_pager_page_chars(60).await?; + let session = common::spawn_server_with_pager_page_chars(60).await?; let setup = session .write_stdin_raw_with( @@ -75,7 +75,7 @@ async fn hits_count_parses_long_flag() -> TestResult<()> { let _guard = test_mutex() .lock() .map_err(|_| "pager_flags test mutex poisoned")?; - let mut session = common::spawn_server_with_pager_page_chars(60).await?; + let session = common::spawn_server_with_pager_page_chars(60).await?; let setup = session .write_stdin_raw_with( diff --git a/tests/pager_hits_seek.rs b/tests/pager_hits_seek.rs index 6ecd282e..526e4e0f 100644 --- a/tests/pager_hits_seek.rs +++ b/tests/pager_hits_seek.rs @@ -30,7 +30,7 @@ fn backend_unavailable(text: &str) -> bool { #[tokio::test(flavor = "multi_thread")] async fn hits_after_seek_does_not_repeat() -> TestResult<()> { - let mut session = common::spawn_server_with_pager_page_chars(120).await?; + let session = common::spawn_server_with_pager_page_chars(120).await?; let setup = session .write_stdin_raw_with( diff --git a/tests/pager_page_size.rs b/tests/pager_page_size.rs index 02546d0f..c998ea43 100644 --- a/tests/pager_page_size.rs +++ b/tests/pager_page_size.rs @@ -49,7 +49,7 @@ fn busy_response(text: &str) -> bool { #[tokio::test(flavor = "multi_thread")] async fn respects_configured_small_page_size() -> TestResult<()> { let page_bytes = 80; - let mut session = common::spawn_server_with_pager_page_chars(page_bytes).await?; + let session = common::spawn_server_with_pager_page_chars(page_bytes).await?; let mut result = session .write_stdin_raw_with("for (i in 1:50) cat('abcd\\n')", Some(30.0)) @@ -116,7 +116,7 @@ async fn respects_configured_small_page_size() -> TestResult<()> { #[tokio::test(flavor = "multi_thread")] async fn respects_configured_large_page_size() -> TestResult<()> { let page_bytes = 10_000; - let mut session = common::spawn_server_with_pager_page_chars(page_bytes).await?; + let session = common::spawn_server_with_pager_page_chars(page_bytes).await?; let mut result = session .write_stdin_raw_with("for (i in 1:10) cat('abcd\\n\')", Some(30.0)) @@ -166,7 +166,7 @@ async fn respects_configured_large_page_size() -> TestResult<()> { #[tokio::test(flavor = "multi_thread")] async fn large_page_size_keeps_pager_mode_instead_of_spilling_to_output_bundle() -> TestResult<()> { let page_bytes = 10_000; - let mut session = common::spawn_server_with_pager_page_chars(page_bytes).await?; + let session = common::spawn_server_with_pager_page_chars(page_bytes).await?; let initial = session .write_stdin_raw_with( diff --git a/tests/pager_seek.rs b/tests/pager_seek.rs index fdc4bb5a..e8dae9d5 100644 --- a/tests/pager_seek.rs +++ b/tests/pager_seek.rs @@ -30,7 +30,7 @@ fn backend_unavailable(text: &str) -> bool { #[tokio::test(flavor = "multi_thread")] async fn seek_parses_offset_and_percent() -> TestResult<()> { - let mut session = common::spawn_server_with_pager_page_chars(36).await?; + let session = common::spawn_server_with_pager_page_chars(36).await?; let setup = session .write_stdin_raw_with("for (i in 1:100) cat(sprintf(\"L%04d\\n\", i))", Some(30.0)) diff --git a/tests/pager_skip.rs b/tests/pager_skip.rs index 50b149ec..4e395dde 100644 --- a/tests/pager_skip.rs +++ b/tests/pager_skip.rs @@ -48,7 +48,7 @@ fn first_line_number(text: &str) -> Option { #[tokio::test(flavor = "multi_thread")] async fn skip_advances_without_printing_intermediate_pages() -> TestResult<()> { - let mut session = common::spawn_server_with_pager_page_chars(60).await?; + let session = common::spawn_server_with_pager_page_chars(60).await?; let result = session .write_stdin_raw_with("for (i in 1:60) cat(sprintf(\"L%04d\\n\", i))", Some(120.0)) diff --git a/tests/pager_where.rs b/tests/pager_where.rs index bbcd7353..7fc145a7 100644 --- a/tests/pager_where.rs +++ b/tests/pager_where.rs @@ -48,7 +48,7 @@ fn first_line_number(text: &str) -> Option { #[tokio::test(flavor = "multi_thread")] async fn where_does_not_advance_cursor() -> TestResult<()> { - let mut session = common::spawn_server_with_pager_page_chars(60).await?; + let session = common::spawn_server_with_pager_page_chars(60).await?; let initial = session .write_stdin_raw_with("for (i in 1:60) cat(sprintf(\"L%04d\\n\", i))", Some(30.0)) diff --git a/tests/plot_images.rs b/tests/plot_images.rs index 46af01b8..769539a1 100644 --- a/tests/plot_images.rs +++ b/tests/plot_images.rs @@ -526,7 +526,7 @@ fn is_prompt_line(line: &str) -> bool { #[tokio::test(flavor = "multi_thread")] async fn plots_emit_images_and_updates() -> TestResult<()> { - let mut session = spawn_server_with_files().await?; + let session = spawn_server_with_files().await?; let mut steps = Vec::new(); let plot_input = "plot(1:10)"; @@ -607,7 +607,7 @@ async fn plots_emit_images_and_updates() -> TestResult<()> { #[tokio::test(flavor = "multi_thread")] async fn plots_emit_stable_images_for_repeats() -> TestResult<()> { - let mut session = spawn_server_with_files().await?; + let session = spawn_server_with_files().await?; let mut steps = Vec::new(); let plot_input = "plot(1:10)"; @@ -681,7 +681,7 @@ async fn plots_emit_stable_images_for_repeats() -> TestResult<()> { #[tokio::test(flavor = "multi_thread")] async fn multi_panel_plots_emit_single_image() -> TestResult<()> { - let mut session = spawn_server_with_files().await?; + let session = spawn_server_with_files().await?; let mut steps = Vec::new(); let plot_input = "par(mfrow = c(2, 1)); plot(1:10); plot(10:1)"; @@ -733,7 +733,7 @@ async fn multi_panel_plots_emit_single_image() -> TestResult<()> { #[tokio::test(flavor = "multi_thread")] async fn plots_emit_images_when_paged_output() -> TestResult<()> { - let mut session = spawn_server_with_files().await?; + let session = spawn_server_with_files().await?; let input = "line <- paste(rep(\"x\", 200), collapse = \"\"); for (i in 1:50) cat(line, \"\\n\"); plot(1:10)"; let result = session.write_stdin_raw_with(input, Some(30.0)).await?; @@ -770,7 +770,7 @@ async fn plots_emit_images_when_paged_output() -> TestResult<()> { #[tokio::test(flavor = "multi_thread")] async fn plots_respect_numeric_size_options() -> TestResult<()> { - let mut session = spawn_server_with_files().await?; + let session = spawn_server_with_files().await?; let input = "options(console.plot.width = 4, console.plot.height = 3, console.plot.dpi = 100); plot(1:10)"; let result = session.write_stdin_raw_with(input, Some(30.0)).await?; @@ -804,7 +804,7 @@ async fn plots_respect_numeric_size_options() -> TestResult<()> { #[tokio::test(flavor = "multi_thread")] async fn grid_plots_emit_images_and_updates() -> TestResult<()> { - let mut session = spawn_server_with_files().await?; + let session = spawn_server_with_files().await?; let mut steps = Vec::new(); let plot_input = "grid::grid.newpage(); grid::grid.lines(x = c(0.1, 0.9), y = c(0.1, 0.9))"; @@ -885,7 +885,7 @@ async fn grid_plots_emit_images_and_updates() -> TestResult<()> { #[tokio::test(flavor = "multi_thread")] async fn grid_plots_emit_stable_images_for_repeats() -> TestResult<()> { - let mut session = spawn_server_with_files().await?; + let session = spawn_server_with_files().await?; let mut steps = Vec::new(); let plot_input = "grid::grid.newpage(); grid::grid.lines(x = c(0.1, 0.9), y = c(0.1, 0.9))"; @@ -959,7 +959,7 @@ async fn grid_plots_emit_stable_images_for_repeats() -> TestResult<()> { #[tokio::test(flavor = "multi_thread")] async fn plot_updates_in_single_request_collapse() -> TestResult<()> { - let mut session = spawn_server_with_files().await?; + let session = spawn_server_with_files().await?; let input = "plot(1:10); lines(2:9, 2:9); lines(2:9, 2:9)"; let result = session.write_stdin_raw_with(input, Some(30.0)).await?; @@ -988,7 +988,7 @@ async fn plot_updates_in_single_request_collapse() -> TestResult<()> { #[tokio::test(flavor = "multi_thread")] async fn plot_emitted_after_large_output() -> TestResult<()> { - let mut session = spawn_server_with_files().await?; + let session = spawn_server_with_files().await?; let input = r#" cat(paste(rep("x", 3000000), collapse = "")) @@ -1030,7 +1030,7 @@ plot(1:10) #[tokio::test(flavor = "multi_thread")] async fn mixed_plot_reply_with_four_images_and_under_grace_text_stays_inline() -> TestResult<()> { - let mut session = spawn_server_with_files().await?; + let session = spawn_server_with_files().await?; let input = format!( r#" @@ -1082,7 +1082,7 @@ for (i in 1:4) {{ #[tokio::test(flavor = "multi_thread")] async fn mixed_plot_reply_with_two_images_and_over_grace_text_uses_output_bundle() -> TestResult<()> { - let mut session = spawn_server_with_files().await?; + let session = spawn_server_with_files().await?; let input = format!( r#" @@ -1136,7 +1136,7 @@ for (i in 1:2) {{ #[tokio::test(flavor = "multi_thread")] async fn single_image_over_grace_text_does_not_duplicate_pre_image_preview() -> TestResult<()> { - let mut session = spawn_server_with_files().await?; + let session = spawn_server_with_files().await?; let input = format!( r#" @@ -1201,7 +1201,7 @@ cat("\nPOST_END\n") #[tokio::test(flavor = "multi_thread")] async fn mixed_plot_replies_output_bundle_and_keep_first_and_last_images() -> TestResult<()> { - let mut session = spawn_server_with_files().await?; + let session = spawn_server_with_files().await?; let input = r#" for (i in 1:6) { @@ -1308,7 +1308,7 @@ for (i in 1:6) { #[tokio::test(flavor = "multi_thread")] async fn mixed_output_bundle_events_log_keeps_partial_line_ranges_stable() -> TestResult<()> { - let mut session = spawn_server_with_files().await?; + let session = spawn_server_with_files().await?; let input = r#" cat("a") @@ -1376,7 +1376,7 @@ for (i in 1:5) { #[tokio::test(flavor = "multi_thread")] async fn timeout_image_output_bundle_backfills_earlier_worker_text() -> TestResult<()> { - let mut session = spawn_server_with_files().await?; + let session = spawn_server_with_files().await?; let input = r#" cat("warn000\n") @@ -1450,7 +1450,7 @@ for (i in 1:6) { #[tokio::test(flavor = "multi_thread")] async fn timeout_output_bundle_text_only_poll_does_not_duplicate_prefix_text() -> TestResult<()> { - let mut session = spawn_server_with_files().await?; + let session = spawn_server_with_files().await?; let input = r#" cat("HEAD_ONLY\n") @@ -1541,7 +1541,7 @@ cat("TAIL_ONLY\n") #[tokio::test(flavor = "multi_thread")] async fn timeout_output_bundle_image_only_omission_still_discloses_bundle_path() -> TestResult<()> { let temp = tempdir()?; - let mut session = spawn_server_with_files_env_vars(vec![ + let session = spawn_server_with_files_env_vars(vec![ ("TMPDIR".to_string(), temp.path().display().to_string()), ( "MCP_REPL_OUTPUT_BUNDLE_MAX_BYTES".to_string(), @@ -1597,7 +1597,7 @@ Sys.sleep(1) #[tokio::test(flavor = "multi_thread")] async fn timeout_output_bundle_survives_missing_anchor_image() -> TestResult<()> { let temp = tempdir()?; - let mut session = spawn_server_with_files_env_vars(vec![( + let session = spawn_server_with_files_env_vars(vec![( "TMPDIR".to_string(), temp.path().display().to_string(), )]) @@ -1670,7 +1670,7 @@ Sys.sleep(1) #[tokio::test(flavor = "multi_thread")] async fn same_reply_plot_updates_bundle_preserves_image_history() -> TestResult<()> { - let mut session = spawn_server_with_files().await?; + let session = spawn_server_with_files().await?; let input = format!( r#" @@ -1756,8 +1756,8 @@ lines(3:8, 3:8) #[tokio::test(flavor = "multi_thread")] async fn same_reply_plot_updates_stay_inline_and_show_final_state() -> TestResult<()> { - let mut batch_session = spawn_server_with_files().await?; - let mut control_session = spawn_server_with_files().await?; + let batch_session = spawn_server_with_files().await?; + let control_session = spawn_server_with_files().await?; let steps = [ "plot(1:10)", diff --git a/tests/python_backend.rs b/tests/python_backend.rs index 2b348253..a9374baf 100644 --- a/tests/python_backend.rs +++ b/tests/python_backend.rs @@ -82,7 +82,7 @@ async fn start_python_session_with_env_vars( return Ok(None); } - let mut session = common::spawn_server_with_args_env( + let session = common::spawn_server_with_args_env( vec![ "--interpreter".to_string(), "python".to_string(), @@ -94,7 +94,7 @@ async fn start_python_session_with_env_vars( env_vars, ) .await?; - let probe = session.write_stdin_raw_with("", Some(2.0)).await?; + let probe = session.write_stdin_raw_with("pass", Some(2.0)).await?; let probe_text = result_text(&probe); if probe_text.contains("worker io error: Permission denied") || probe_text.contains("python backend requires a unix-style pty") @@ -187,7 +187,7 @@ print("ipc background ready") #[tokio::test(flavor = "multi_thread")] async fn python_smoke() -> TestResult<()> { - let Some(mut session) = start_python_session().await? else { + let Some(session) = start_python_session().await? else { return Ok(()); }; @@ -216,7 +216,7 @@ async fn python_smoke_without_register_at_fork() -> TestResult<()> { "import os\ntry:\n del os.register_at_fork\nexcept AttributeError:\n pass\n", )?; - let Some(mut session) = start_python_session_with_env_vars(vec![( + let Some(session) = start_python_session_with_env_vars(vec![( "PYTHONPATH".to_string(), temp.path().display().to_string(), )]) @@ -241,7 +241,7 @@ async fn python_smoke_without_register_at_fork() -> TestResult<()> { #[tokio::test(flavor = "multi_thread")] async fn python_follow_up_after_resolved_timeout_trims_detached_echo_prefix_in_files_mode() -> TestResult<()> { - let Some(mut session) = start_python_session().await? else { + let Some(session) = start_python_session().await? else { return Ok(()); }; @@ -338,7 +338,7 @@ os.fdopen = _wrapped_fdopen "#, )?; - let Some(mut session) = start_python_session_with_env_vars(vec![ + let Some(session) = start_python_session_with_env_vars(vec![ ("PYTHONPATH".to_string(), temp.path().display().to_string()), ( "MCP_REPL_FORK_CLOSE_MARKER".to_string(), @@ -426,7 +426,7 @@ async fn python_quit_does_not_wait_for_detached_stdio_holders() -> TestResult<() #[cfg(unix)] #[tokio::test(flavor = "multi_thread")] async fn python_respawn_does_not_wait_for_detached_stdio_holders() -> TestResult<()> { - let Some(mut session) = start_python_session().await? else { + let Some(session) = start_python_session().await? else { return Ok(()); }; @@ -538,7 +538,7 @@ async fn python_quit_does_not_wait_for_background_ipc_holders() -> TestResult<() #[cfg(unix)] #[tokio::test(flavor = "multi_thread")] async fn python_respawn_does_not_wait_for_background_ipc_holders() -> TestResult<()> { - let Some(mut session) = start_python_session().await? else { + let Some(session) = start_python_session().await? else { return Ok(()); }; @@ -606,7 +606,7 @@ print("ipc respawn armed") #[tokio::test(flavor = "multi_thread")] async fn python_multiline_block() -> TestResult<()> { - let Some(mut session) = start_python_session().await? else { + let Some(session) = start_python_session().await? else { return Ok(()); }; @@ -627,7 +627,7 @@ async fn python_multiline_block() -> TestResult<()> { #[tokio::test(flavor = "multi_thread")] async fn python_multiline_block_does_not_echo_input_in_visible_reply() -> TestResult<()> { - let Some(mut session) = start_python_session().await? else { + let Some(session) = start_python_session().await? else { return Ok(()); }; @@ -660,7 +660,7 @@ async fn python_multiline_block_does_not_echo_input_in_visible_reply() -> TestRe #[tokio::test(flavor = "multi_thread")] async fn python_input_roundtrip() -> TestResult<()> { - let Some(mut session) = start_python_session().await? else { + let Some(session) = start_python_session().await? else { return Ok(()); }; @@ -708,7 +708,7 @@ async fn python_input_roundtrip() -> TestResult<()> { #[tokio::test(flavor = "multi_thread")] async fn python_busy_discards_input() -> TestResult<()> { - let Some(mut session) = start_python_session().await? else { + let Some(session) = start_python_session().await? else { return Ok(()); }; @@ -730,7 +730,7 @@ async fn python_busy_discards_input() -> TestResult<()> { #[tokio::test(flavor = "multi_thread")] async fn python_stderr_merged_into_output() -> TestResult<()> { - let Some(mut session) = start_python_session().await? else { + let Some(session) = start_python_session().await? else { return Ok(()); }; @@ -755,7 +755,7 @@ async fn python_stderr_merged_into_output() -> TestResult<()> { #[tokio::test(flavor = "multi_thread")] async fn python_interrupt_unblocks_long_running_request() -> TestResult<()> { - let Some(mut session) = start_python_session().await? else { + let Some(session) = start_python_session().await? else { return Ok(()); }; @@ -801,7 +801,7 @@ async fn python_interrupt_unblocks_long_running_request() -> TestResult<()> { #[tokio::test(flavor = "multi_thread")] async fn python_detached_idle_output_does_not_bundle_follow_up_reply() -> TestResult<()> { - let Some(mut session) = start_python_session().await? else { + let Some(session) = start_python_session().await? else { return Ok(()); }; @@ -871,7 +871,7 @@ print("parent ready") #[tokio::test(flavor = "multi_thread")] async fn python_idle_exit_preserves_detached_tail_before_respawn() -> TestResult<()> { - let Some(mut session) = start_python_session().await? else { + let Some(session) = start_python_session().await? else { return Ok(()); }; @@ -923,7 +923,7 @@ async fn python_idle_exit_preserves_detached_tail_before_respawn() -> TestResult #[tokio::test(flavor = "multi_thread")] async fn python_restart_does_not_leak_old_generation_output() -> TestResult<()> { - let Some(mut session) = start_python_session().await? else { + let Some(session) = start_python_session().await? else { return Ok(()); }; @@ -984,7 +984,7 @@ async fn python_restart_does_not_leak_old_generation_output() -> TestResult<()> #[tokio::test(flavor = "multi_thread")] async fn python_detached_incomplete_utf8_tail_does_not_merge_into_next_request() -> TestResult<()> { - let Some(mut session) = start_python_session().await? else { + let Some(session) = start_python_session().await? else { return Ok(()); }; @@ -1072,7 +1072,7 @@ print("parent ready") #[tokio::test(flavor = "multi_thread")] async fn python_interrupt_discards_buffered_tail_after_timeout() -> TestResult<()> { let _guard = lock_test_mutex(); - let Some(mut session) = start_python_session().await? else { + let Some(session) = start_python_session().await? else { return Ok(()); }; @@ -1143,7 +1143,7 @@ async fn python_interrupt_discards_buffered_tail_after_timeout() -> TestResult<( #[tokio::test(flavor = "multi_thread")] async fn python_multistatement_payload_completes() -> TestResult<()> { - let Some(mut session) = start_python_session().await? else { + let Some(session) = start_python_session().await? else { return Ok(()); }; @@ -1165,7 +1165,7 @@ async fn python_multistatement_payload_completes() -> TestResult<()> { #[tokio::test(flavor = "multi_thread")] async fn python_exception_reported_in_output() -> TestResult<()> { - let Some(mut session) = start_python_session().await? else { + let Some(session) = start_python_session().await? else { return Ok(()); }; @@ -1187,7 +1187,7 @@ async fn python_exception_reported_in_output() -> TestResult<()> { #[tokio::test(flavor = "multi_thread")] async fn python_pdb_roundtrip() -> TestResult<()> { - let Some(mut session) = start_python_session().await? else { + let Some(session) = start_python_session().await? else { return Ok(()); }; @@ -1220,7 +1220,7 @@ async fn python_pdb_roundtrip() -> TestResult<()> { #[tokio::test(flavor = "multi_thread")] async fn python_input_can_consume_buffered_lines() -> TestResult<()> { - let Some(mut session) = start_python_session().await? else { + let Some(session) = start_python_session().await? else { return Ok(()); }; diff --git a/tests/python_plot_images.rs b/tests/python_plot_images.rs index 62298d9e..899b00f6 100644 --- a/tests/python_plot_images.rs +++ b/tests/python_plot_images.rs @@ -400,7 +400,7 @@ async fn python_plots_emit_images_and_updates() -> TestResult<()> { if !python_plot_tests_enabled() { return Ok(()); } - let mut session = common::spawn_python_server_with_files().await?; + let session = common::spawn_python_server_with_files().await?; let mut steps = Vec::new(); let plot_input = format!( @@ -413,7 +413,7 @@ async fn python_plots_emit_images_and_updates() -> TestResult<()> { steps.push(step_snapshot(&plot_input, &plot_result)); session.cancel().await?; - let mut session = common::spawn_python_server_with_files().await?; + let session = common::spawn_python_server_with_files().await?; let update_input = format!( "{}; plt.figure(1); plt.plot(list(range(4, 9)), list(range(4, 9))); plt.show()", python_plot_preamble() @@ -424,7 +424,7 @@ async fn python_plots_emit_images_and_updates() -> TestResult<()> { steps.push(step_snapshot(&update_input, &update_result)); session.cancel().await?; - let mut session = common::spawn_python_server_with_files().await?; + let session = common::spawn_python_server_with_files().await?; let noop_input = "1+1"; let noop_result = session.write_stdin_raw_with(noop_input, Some(30.0)).await?; steps.push(step_snapshot(noop_input, &noop_result)); @@ -483,7 +483,7 @@ async fn python_plots_emit_stable_images_for_repeats() -> TestResult<()> { if !python_plot_tests_enabled() { return Ok(()); } - let mut session = common::spawn_python_server_with_files().await?; + let session = common::spawn_python_server_with_files().await?; let mut steps = Vec::new(); let plot_input = format!( @@ -496,14 +496,14 @@ async fn python_plots_emit_stable_images_for_repeats() -> TestResult<()> { steps.push(step_snapshot(&plot_input, &first_result)); session.cancel().await?; - let mut session = common::spawn_python_server_with_files().await?; + let session = common::spawn_python_server_with_files().await?; let second_result = session .write_stdin_raw_with(&plot_input, Some(30.0)) .await?; steps.push(step_snapshot(&plot_input, &second_result)); session.cancel().await?; - let mut session = common::spawn_python_server_with_files().await?; + let session = common::spawn_python_server_with_files().await?; let noop_input = "1+1"; let noop_result = session.write_stdin_raw_with(noop_input, Some(30.0)).await?; steps.push(step_snapshot(noop_input, &noop_result)); @@ -559,7 +559,7 @@ async fn python_multi_panel_plots_emit_single_image() -> TestResult<()> { if !python_plot_tests_enabled() { return Ok(()); } - let mut session = common::spawn_python_server_with_files().await?; + let session = common::spawn_python_server_with_files().await?; let mut steps = Vec::new(); let plot_input = format!( @@ -608,7 +608,7 @@ async fn python_plots_emit_images_when_paged_output() -> TestResult<()> { if !python_plot_tests_enabled() { return Ok(()); } - let mut session = spawn_python_server_with_pager_page_chars(200).await?; + let session = spawn_python_server_with_pager_page_chars(200).await?; let input = format!( "{}; line = 'x' * 200; exec(\"for _ in range(50):\\\\n print(line)\"); plt.figure(1); plt.clf(); plt.plot(list(range(1, 11))); plt.show()", @@ -647,7 +647,7 @@ async fn python_grid_plots_emit_images_and_updates() -> TestResult<()> { if !python_plot_tests_enabled() { return Ok(()); } - let mut session = common::spawn_python_server_with_files().await?; + let session = common::spawn_python_server_with_files().await?; let mut steps = Vec::new(); let plot_input = format!( @@ -660,7 +660,7 @@ async fn python_grid_plots_emit_images_and_updates() -> TestResult<()> { steps.push(step_snapshot(&plot_input, &plot_result)); session.cancel().await?; - let mut session = common::spawn_python_server_with_files().await?; + let session = common::spawn_python_server_with_files().await?; let update_input = format!( "{}; plt.figure(2); plt.plot([0.1, 0.9], [0.9, 0.1]); plt.show()", python_plot_preamble() @@ -671,7 +671,7 @@ async fn python_grid_plots_emit_images_and_updates() -> TestResult<()> { steps.push(step_snapshot(&update_input, &update_result)); session.cancel().await?; - let mut session = common::spawn_python_server_with_files().await?; + let session = common::spawn_python_server_with_files().await?; let noop_input = "1+1"; let noop_result = session.write_stdin_raw_with(noop_input, Some(30.0)).await?; steps.push(step_snapshot(noop_input, &noop_result)); @@ -730,7 +730,7 @@ async fn python_grid_plots_emit_stable_images_for_repeats() -> TestResult<()> { if !python_plot_tests_enabled() { return Ok(()); } - let mut session = common::spawn_python_server_with_files().await?; + let session = common::spawn_python_server_with_files().await?; let mut steps = Vec::new(); let plot_input = format!( @@ -743,14 +743,14 @@ async fn python_grid_plots_emit_stable_images_for_repeats() -> TestResult<()> { steps.push(step_snapshot(&plot_input, &first_result)); session.cancel().await?; - let mut session = common::spawn_python_server_with_files().await?; + let session = common::spawn_python_server_with_files().await?; let second_result = session .write_stdin_raw_with(&plot_input, Some(30.0)) .await?; steps.push(step_snapshot(&plot_input, &second_result)); session.cancel().await?; - let mut session = common::spawn_python_server_with_files().await?; + let session = common::spawn_python_server_with_files().await?; let noop_input = "1+1"; let noop_result = session.write_stdin_raw_with(noop_input, Some(30.0)).await?; steps.push(step_snapshot(noop_input, &noop_result)); @@ -809,7 +809,7 @@ async fn python_plot_updates_in_single_request_collapse() -> TestResult<()> { if !python_plot_tests_enabled() { return Ok(()); } - let mut session = common::spawn_python_server_with_files().await?; + let session = common::spawn_python_server_with_files().await?; let input = format!( "{}; plt.figure(1); plt.clf(); plt.plot(list(range(1, 11))); plt.plot(list(range(2, 10)), list(range(2, 10))); plt.plot(list(range(2, 10)), list(range(2, 10))); plt.show()", @@ -840,7 +840,7 @@ async fn python_plot_emitted_after_large_output() -> TestResult<()> { if !python_plot_tests_enabled() { return Ok(()); } - let mut session = spawn_python_server_with_pager_page_chars(5_000_000).await?; + let session = spawn_python_server_with_pager_page_chars(5_000_000).await?; let input = format!( "{}; print('x' * 3000000); print('END'); plt.figure(1); plt.clf(); plt.plot(list(range(1, 11))); plt.show()", @@ -881,7 +881,7 @@ async fn python_mixed_plot_replies_output_bundle_and_keep_first_and_last_images( if !python_plot_tests_enabled() { return Ok(()); } - let mut session = common::spawn_python_server_with_files().await?; + let session = common::spawn_python_server_with_files().await?; let input = format!( "{}; exec(\"for i in range(1, 7):\\n print(f'warn{{i:03d}}')\\n plt.figure(i)\\n plt.clf()\\n plt.plot(list(range(1, 11)))\\n plt.title(f'plot{{i:03d}}')\\n plt.show()\")", @@ -985,7 +985,7 @@ async fn python_same_reply_plot_updates_bundle_preserves_image_history() -> Test if !python_plot_tests_enabled() { return Ok(()); } - let mut session = common::spawn_python_server_with_files().await?; + let session = common::spawn_python_server_with_files().await?; let input = format!( "{}; exec(\"big = 'h' * {}\\nprint('HISTORY_START')\\nprint(big)\\nprint('HISTORY_END')\\nplt.figure(1)\\nplt.clf()\\nplt.plot(list(range(1, 11)))\\nplt.plot(list(range(2, 10)), list(range(2, 10)))\\nplt.plot(list(range(3, 9)), list(range(3, 9)))\\nplt.show()\")", diff --git a/tests/r_console_encoding.rs b/tests/r_console_encoding.rs index 4347614b..ea63595c 100644 --- a/tests/r_console_encoding.rs +++ b/tests/r_console_encoding.rs @@ -48,7 +48,7 @@ fn assert_no_console_encoding_artifacts(text: &str) { #[cfg(windows)] #[tokio::test(flavor = "multi_thread")] async fn write_stdin_windows_output_has_no_utf8_marker_artifacts() -> TestResult<()> { - let mut session = common::spawn_server_with_args(vec![ + let session = common::spawn_server_with_args(vec![ "--sandbox".to_string(), "danger-full-access".to_string(), ]) diff --git a/tests/r_file_show.rs b/tests/r_file_show.rs index 7cdf7165..8b68717f 100644 --- a/tests/r_file_show.rs +++ b/tests/r_file_show.rs @@ -30,7 +30,7 @@ fn backend_unavailable(text: &str) -> bool { #[tokio::test(flavor = "multi_thread")] async fn file_show_returns_full_output_without_pager() -> TestResult<()> { - let mut session = common::spawn_server_with_files().await?; + let session = common::spawn_server_with_files().await?; let timeout_secs = if cfg!(windows) { 60.0 } else { 30.0 }; let result = session diff --git a/tests/r_help.rs b/tests/r_help.rs index c9d47eae..6baab210 100644 --- a/tests/r_help.rs +++ b/tests/r_help.rs @@ -30,7 +30,7 @@ fn backend_unavailable(text: &str) -> bool { #[tokio::test(flavor = "multi_thread")] async fn text_help_is_llm_friendly() -> TestResult<()> { - let mut session = common::spawn_server_with_files().await?; + let session = common::spawn_server_with_files().await?; let result = session.write_stdin_raw_with("?mean", Some(30.0)).await?; session.cancel().await?; diff --git a/tests/r_startup.rs b/tests/r_startup.rs index 9b4d8a8d..52c811ba 100644 --- a/tests/r_startup.rs +++ b/tests/r_startup.rs @@ -66,7 +66,7 @@ async fn r_respects_rprofile_and_renviron_on_startup() -> TestResult<()> { } } - let mut session = common::spawn_server_with_env_vars(env_vars).await?; + let session = common::spawn_server_with_env_vars(env_vars).await?; let input = r#" cat("RPROFILE=", getOption("mcp_repl_rprofile_test"), "\n", sep = "") diff --git a/tests/repl_surface.rs b/tests/repl_surface.rs index 1e10447f..2f449a6e 100644 --- a/tests/repl_surface.rs +++ b/tests/repl_surface.rs @@ -78,7 +78,7 @@ fn events_log_path(text: &str) -> Option { #[tokio::test(flavor = "multi_thread")] async fn repl_tool_accepts_input_and_timeout_ms() -> TestResult<()> { - let mut session = common::spawn_server().await?; + let session = common::spawn_server().await?; let result = session .call_tool_raw( @@ -108,7 +108,7 @@ async fn repl_tool_accepts_input_and_timeout_ms() -> TestResult<()> { #[tokio::test(flavor = "multi_thread")] async fn pager_keeps_plot_image_before_later_stdout() -> TestResult<()> { - let mut session = common::spawn_server().await?; + let session = common::spawn_server().await?; let result = session .write_stdin_raw_with("plot(1:10)\ncat('done\\n')", Some(30.0)) @@ -140,7 +140,7 @@ async fn pager_keeps_plot_image_before_later_stdout() -> TestResult<()> { #[tokio::test(flavor = "multi_thread")] async fn files_keeps_plot_image_before_later_stdout() -> TestResult<()> { - let mut session = common::spawn_server_with_files().await?; + let session = common::spawn_server_with_files().await?; let result = session .write_stdin_raw_with("plot(1:10)\ncat('done\\n')", Some(30.0)) @@ -172,7 +172,7 @@ async fn files_keeps_plot_image_before_later_stdout() -> TestResult<()> { #[tokio::test(flavor = "multi_thread")] async fn repl_reset_clears_state() -> TestResult<()> { - let mut session = common::spawn_server().await?; + let session = common::spawn_server().await?; let set_var = session .call_tool_raw( @@ -228,7 +228,7 @@ async fn repl_reset_clears_state() -> TestResult<()> { #[tokio::test(flavor = "multi_thread")] async fn repl_tool_hides_ipc_fd_env_vars_from_r_user_code() -> TestResult<()> { - let mut session = common::spawn_server().await?; + let session = common::spawn_server().await?; let result = session .call_tool_raw( @@ -262,7 +262,7 @@ async fn repl_tool_hides_ipc_fd_env_vars_from_r_user_code() -> TestResult<()> { #[tokio::test(flavor = "multi_thread")] async fn first_base_plot_emits_one_nontrivial_image() -> TestResult<()> { - let mut session = common::spawn_server_with_files().await?; + let session = common::spawn_server_with_files().await?; let result = session .call_tool_raw( @@ -304,7 +304,7 @@ async fn first_base_plot_emits_one_nontrivial_image() -> TestResult<()> { #[tokio::test(flavor = "multi_thread")] async fn multiple_base_plots_in_one_reply_emit_each_image() -> TestResult<()> { - let mut session = common::spawn_server_with_files().await?; + let session = common::spawn_server_with_files().await?; let result = session .call_tool_raw( @@ -349,7 +349,7 @@ async fn multiple_base_plots_in_one_reply_emit_each_image() -> TestResult<()> { #[tokio::test(flavor = "multi_thread")] async fn base_plots_above_inline_limit_use_bundle_and_keep_two_anchors() -> TestResult<()> { - let mut session = common::spawn_server_with_files().await?; + let session = common::spawn_server_with_files().await?; let result = session .call_tool_raw( diff --git a/tests/reticulate_py_help.rs b/tests/reticulate_py_help.rs index 48f1371f..e8944a6c 100644 --- a/tests/reticulate_py_help.rs +++ b/tests/reticulate_py_help.rs @@ -28,7 +28,7 @@ fn prompt_only_reticulate_output_is_skipped() { #[tokio::test(flavor = "multi_thread")] async fn reticulate_py_help_is_rendered() -> TestResult<()> { - let mut session = common::spawn_server_with_files().await?; + let session = common::spawn_server_with_files().await?; let result = session .write_stdin_raw_with( diff --git a/tests/sandbox.rs b/tests/sandbox.rs index 400ba134..f529dd7c 100644 --- a/tests/sandbox.rs +++ b/tests/sandbox.rs @@ -603,7 +603,7 @@ async fn sandbox_read_only_blocks_workspace_writes() -> TestResult<()> { } let repo_root = std::env::current_dir()?; let target = unique_path(&repo_root, "read-only"); - let mut session = spawn_server_with_sandbox_state(sandbox_state_read_only()).await?; + let session = spawn_server_with_sandbox_state(sandbox_state_read_only()).await?; let result = session .write_stdin_raw_with(write_test_code(&target), Some(10.0)) .await?; @@ -635,7 +635,7 @@ async fn sandbox_workspace_write_allows_workspace_writes() -> TestResult<()> { } let repo_root = std::env::current_dir()?; let target = unique_path(&repo_root, "workspace-write"); - let mut session = spawn_server_with_sandbox_state(sandbox_state_workspace_write(false)).await?; + let session = spawn_server_with_sandbox_state(sandbox_state_workspace_write(false)).await?; let result = session .write_stdin_raw_with(write_test_code(&target), Some(10.0)) .await?; @@ -679,7 +679,7 @@ async fn sandbox_workspace_write_allows_r_package_cache_root_from_config() -> Te std::fs::create_dir_all(path)?; } - let mut session = spawn_server_with_sandbox_state(sandbox_state_workspace_write_with_roots( + let session = spawn_server_with_sandbox_state(sandbox_state_workspace_write_with_roots( false, vec![r_package_cache_root.clone()], )) @@ -742,8 +742,7 @@ async fn sandbox_read_only_blocks_r_package_cache_root_writes() -> TestResult<() "R_USER_CACHE_DIR".to_string(), xdg_cache_home.to_string_lossy().to_string(), )]; - let mut session = - spawn_server_with_sandbox_state_and_env(sandbox_state_read_only(), env).await?; + let session = spawn_server_with_sandbox_state_and_env(sandbox_state_read_only(), env).await?; let target = unique_path( &reticulate_uv_cache_root, @@ -788,7 +787,7 @@ async fn sandbox_full_access_allows_writes_outside_workspace() -> TestResult<()> return Ok(()); } let target = unique_path(&std::env::temp_dir(), "full-access"); - let mut session = spawn_server_with_sandbox_state(sandbox_state_full_access()).await?; + let session = spawn_server_with_sandbox_state(sandbox_state_full_access()).await?; let result = session .write_stdin_raw_with(write_test_code(&target), Some(10.0)) .await?; @@ -817,7 +816,7 @@ async fn sandbox_read_only_blocks_network_access() -> TestResult<()> { let Some(addr) = start_loopback_server_if_available().await? else { return Ok(()); }; - let mut session = spawn_server_with_sandbox_state(sandbox_state_read_only()).await?; + let session = spawn_server_with_sandbox_state(sandbox_state_read_only()).await?; let result = session .write_stdin_raw_with(network_test_code(addr), Some(10.0)) .await?; @@ -849,7 +848,7 @@ async fn sandbox_reticulate_keras_backend() -> TestResult<()> { if let Some(root) = reticulate_cache_dir() { writable_roots.push(root); } - let mut session = spawn_server_with_sandbox_state(sandbox_state_workspace_write_with_roots( + let session = spawn_server_with_sandbox_state(sandbox_state_workspace_write_with_roots( false, writable_roots, )) @@ -924,7 +923,7 @@ async fn sandbox_workspace_write_blocks_network_access() -> TestResult<()> { let Some(addr) = start_loopback_server_if_available().await? else { return Ok(()); }; - let mut session = spawn_server_with_sandbox_state(sandbox_state_workspace_write(false)).await?; + let session = spawn_server_with_sandbox_state(sandbox_state_workspace_write(false)).await?; let result = session .write_stdin_raw_with(network_test_code(addr), Some(10.0)) .await?; @@ -999,7 +998,7 @@ async fn sandbox_ignores_preexisting_r_session_tmpdir() -> TestResult<()> { .unwrap_or_default() .as_nanos(); let sentinel = format!("/tmp/mcp-repl-preexisting-{nanos}"); - let mut session = common::spawn_server_with_args_env_and_pager_page_chars( + let session = common::spawn_server_with_args_env_and_pager_page_chars( Vec::new(), vec![("R_SESSION_TMPDIR".to_string(), sentinel.clone())], SANDBOX_PAGER_PAGE_CHARS, @@ -1051,7 +1050,7 @@ async fn sandbox_workspace_write_allows_network_access() -> TestResult<()> { let Some(addr) = start_loopback_server_if_available().await? else { return Ok(()); }; - let mut session = spawn_server_with_sandbox_state(sandbox_state_workspace_write(true)).await?; + let session = spawn_server_with_sandbox_state(sandbox_state_workspace_write(true)).await?; let result = session .write_stdin_raw_with(network_test_code(addr), Some(10.0)) .await?; @@ -1078,7 +1077,7 @@ async fn sandbox_full_access_allows_network_access() -> TestResult<()> { let Some(addr) = start_loopback_server_if_available().await? else { return Ok(()); }; - let mut session = spawn_server_with_sandbox_state(sandbox_state_full_access()).await?; + let session = spawn_server_with_sandbox_state(sandbox_state_full_access()).await?; let result = session .write_stdin_raw_with(network_test_code(addr), Some(10.0)) .await?; @@ -1103,7 +1102,7 @@ async fn sandbox_allows_sysctl_used_by_quarto() -> TestResult<()> { return Ok(()); } - let mut session = spawn_server_with_sandbox_state(sandbox_state_workspace_write(false)).await?; + let session = spawn_server_with_sandbox_state(sandbox_state_workspace_write(false)).await?; let code = r#" brand <- suppressWarnings(system("/usr/sbin/sysctl machdep.cpu.brand_string", intern = TRUE)) status_ngroups <- system("sysctl -n kern.ngroups >/dev/null") @@ -1143,7 +1142,7 @@ async fn sandbox_allows_parallel_detect_cores() -> TestResult<()> { return Ok(()); } - let mut session = spawn_server_with_sandbox_state(sandbox_state_workspace_write(false)).await?; + let session = spawn_server_with_sandbox_state(sandbox_state_workspace_write(false)).await?; let code = r#" suppressWarnings({ logical <- parallel::detectCores(logical = TRUE) @@ -1181,7 +1180,7 @@ async fn sandbox_denials_linux() -> TestResult<()> { let forbidden = Path::new(&home).join(format!("mcp-repl-denied-{nanos}.txt")); let forbidden = forbidden.to_string_lossy().to_string(); - let mut session = spawn_server_with_sandbox_state(sandbox_state_workspace_write(false)).await?; + let session = spawn_server_with_sandbox_state(sandbox_state_workspace_write(false)).await?; let code = format!( r#" target <- {forbidden:?} @@ -1228,7 +1227,7 @@ async fn sandbox_denials_linux_bwrap() -> TestResult<()> { let forbidden = Path::new(&home).join(format!("mcp-repl-bwrap-denied-{nanos}.txt")); let forbidden = forbidden.to_string_lossy().to_string(); - let mut session = spawn_server_with_sandbox_state_and_env( + let session = spawn_server_with_sandbox_state_and_env( sandbox_state_workspace_write(false), vec![("MCP_REPL_USE_LINUX_BWRAP".to_string(), "1".to_string())], ) @@ -1280,7 +1279,7 @@ async fn sandbox_bwrap_protects_dot_git_codex_agents() -> TestResult<()> { std::fs::create_dir_all(writable_root.join(".codex"))?; std::fs::create_dir_all(writable_root.join(".agents"))?; - let mut session = spawn_server_with_sandbox_state_and_env( + let session = spawn_server_with_sandbox_state_and_env( sandbox_state_workspace_write_with_roots(false, vec![writable_root.clone()]), vec![("MCP_REPL_USE_LINUX_BWRAP".to_string(), "1".to_string())], ) @@ -1341,7 +1340,7 @@ async fn sandbox_workspace_write_blocks_network_access_bwrap() -> TestResult<()> let Some(addr) = start_loopback_server_if_available().await? else { return Ok(()); }; - let mut session = spawn_server_with_sandbox_state_and_env( + let session = spawn_server_with_sandbox_state_and_env( sandbox_state_workspace_write(false), vec![("MCP_REPL_USE_LINUX_BWRAP".to_string(), "1".to_string())], ) @@ -1374,7 +1373,7 @@ async fn sandbox_bwrap_no_proc_mode_starts_worker() -> TestResult<()> { eprintln!("bwrap unavailable; skipping"); return Ok(()); } - let mut session = spawn_server_with_sandbox_state_and_env( + let session = spawn_server_with_sandbox_state_and_env( sandbox_state_workspace_write(false), vec![ ("MCP_REPL_USE_LINUX_BWRAP".to_string(), "1".to_string()), @@ -1706,7 +1705,7 @@ async fn sandbox_denials_windows() -> TestResult<()> { // This test only needs an arbitrary writable workspace root; using a temp // workspace avoids recursive ACL refreshes over the repo's build output. - let (mut session, _workspace, _cwd) = + let (session, _workspace, _cwd) = spawn_server_with_sandbox_state_in_temp_cwd(sandbox_state_workspace_write(false)).await?; let code = format!( r#" @@ -1765,7 +1764,7 @@ async fn sandbox_workspace_write_restart_blocks_file_moved_outside_writable_root )); let source_r = r_string(&source.to_string_lossy()); let target_r = r_string(&target.to_string_lossy()); - let mut session = + let session = spawn_server_with_sandbox_state_in_cwd(sandbox_state_workspace_write(false), &repo_root) .await?; @@ -1866,7 +1865,7 @@ async fn sandbox_workspace_write_restart_blocks_moved_file_inside_git_dir() -> T )); let source_r = r_string(&source.to_string_lossy()); let target_r = r_string(&target.to_string_lossy()); - let mut session = + let session = spawn_server_with_sandbox_state_in_cwd(sandbox_state_workspace_write(false), &repo_root) .await?; @@ -1982,7 +1981,7 @@ async fn sandbox_workspace_write_concurrent_sessions_block_file_moved_into_git_d std::fs::create_dir_all(&nested_dir)?; scrub_unresolved_windows_sid_aces(&repo_root)?; let expected_stable_sid = windows_workspace_write_prepared_sid_for_cwd(&repo_root, &[])?; - let mut session_a = + let session_a = spawn_server_with_sandbox_state_in_cwd(sandbox_state_workspace_write(false), &repo_root) .await?; @@ -2024,7 +2023,7 @@ cat("SOURCE_READ=", paste(readLines(source, warn = FALSE), collapse = "|"), "\n" std::fs::rename(&source, &target)?; let target_acl_before = unresolved_windows_sid_acl_entries(&target)?; - let mut session_b = + let session_b = spawn_server_with_sandbox_state_in_cwd(sandbox_state_workspace_write(false), &repo_root) .await?; let ready_b = session_b @@ -2094,10 +2093,10 @@ async fn sandbox_workspace_write_concurrent_sessions_do_not_share_session_temp_c let workspace = temp_workspace_root()?; let repo_root = workspace.path().to_path_buf(); scrub_unresolved_windows_sid_aces(&repo_root)?; - let mut session_a = + let session_a = spawn_server_with_sandbox_state_in_cwd(sandbox_state_workspace_write(false), &repo_root) .await?; - let mut session_b = + let session_b = spawn_server_with_sandbox_state_in_cwd(sandbox_state_workspace_write(false), &repo_root) .await?; @@ -2194,11 +2193,10 @@ async fn sandbox_workspace_write_concurrent_sessions_do_not_share_in_workspace_s ("TMPDIR".to_string(), temp_root_value), ]; let state = sandbox_state_workspace_write(false); - let mut session_a = + let session_a = spawn_server_with_sandbox_state_and_env_in_cwd(state.clone(), env.clone(), &repo_root) .await?; - let mut session_b = - spawn_server_with_sandbox_state_and_env_in_cwd(state, env, &repo_root).await?; + let session_b = spawn_server_with_sandbox_state_and_env_in_cwd(state, env, &repo_root).await?; let create = session_a .write_stdin_raw_with( @@ -2332,7 +2330,7 @@ async fn sandbox_workspace_write_restart_unblocks_file_moved_out_of_git_dir() -> )); let protected_r = r_string(&protected.to_string_lossy()); let restored_r = r_string(&restored.to_string_lossy()); - let mut session = + let session = spawn_server_with_sandbox_state_in_cwd(sandbox_state_workspace_write(false), &repo_root) .await?; @@ -2451,7 +2449,7 @@ async fn sandbox_workspace_write_restart_allows_host_created_file_under_workspac let host_created_r = r_string(&host_created.to_string_lossy()); std::fs::create_dir_all(&nested_dir)?; - let mut session = + let session = spawn_server_with_sandbox_state_in_cwd(sandbox_state_workspace_write(false), &repo_root) .await?; @@ -2469,7 +2467,7 @@ async fn sandbox_workspace_write_restart_allows_host_created_file_under_workspac std::fs::write(&host_created, b"host before restart")?; - let mut restarted = + let restarted = spawn_server_with_sandbox_state_in_cwd(sandbox_state_workspace_write(false), &repo_root) .await?; let follow_up_code = format!( @@ -2530,7 +2528,7 @@ async fn sandbox_workspace_write_restart_allows_host_created_file_under_nested_w scrub_unresolved_windows_sid_aces(&repo_root)?; let expected_stable_sid = windows_workspace_write_prepared_sid_for_cwd(&repo_root, &[])?; - let mut session = + let session = spawn_server_with_sandbox_state_in_cwd(sandbox_state_workspace_write(false), &repo_root) .await?; @@ -2550,7 +2548,7 @@ async fn sandbox_workspace_write_restart_allows_host_created_file_under_nested_w std::fs::create_dir_all(&nested_dir)?; std::fs::write(&host_created, b"host before restart nested")?; - let mut restarted = + let restarted = spawn_server_with_sandbox_state_in_cwd(sandbox_state_workspace_write(false), &repo_root) .await?; let current_acl = unresolved_windows_sid_acl_entries(&host_created)?; @@ -2616,7 +2614,7 @@ async fn sandbox_workspace_write_nested_midrun_file_keeps_prepared_sid() -> Test std::fs::create_dir_all(&nested_dir)?; scrub_unresolved_windows_sid_aces(&repo_root)?; let expected_stable_sid = windows_workspace_write_prepared_sid_for_cwd(&repo_root, &[])?; - let mut session = + let session = spawn_server_with_sandbox_state_in_cwd(sandbox_state_workspace_write(false), &repo_root) .await?; @@ -2673,8 +2671,8 @@ async fn sandbox_workspace_write_concurrent_sessions_share_new_workspace_file() std::fs::create_dir_all(&cwd)?; let state = sandbox_state_workspace_write_with_roots(false, vec![writable_root.path().to_path_buf()]); - let mut session_a = spawn_server_with_sandbox_state_in_cwd(state.clone(), &cwd).await?; - let mut session_b = spawn_server_with_sandbox_state_in_cwd(state, &cwd).await?; + let session_a = spawn_server_with_sandbox_state_in_cwd(state.clone(), &cwd).await?; + let session_b = spawn_server_with_sandbox_state_in_cwd(state, &cwd).await?; let ready_a = session_a .write_stdin_raw_with("cat('SESSION_A_READY\\n')\n", Some(10.0)) @@ -2778,8 +2776,8 @@ async fn sandbox_workspace_write_concurrent_sessions_respawn_repairs_temp_rename std::fs::create_dir_all(&cwd)?; let state = sandbox_state_workspace_write_with_roots(false, vec![writable_root.path().to_path_buf()]); - let mut session_a = spawn_server_with_sandbox_state_in_cwd(state.clone(), &cwd).await?; - let mut session_b = spawn_server_with_sandbox_state_in_cwd(state, &cwd).await?; + let session_a = spawn_server_with_sandbox_state_in_cwd(state.clone(), &cwd).await?; + let session_b = spawn_server_with_sandbox_state_in_cwd(state, &cwd).await?; let ready_a = session_a .write_stdin_raw_with("cat('SESSION_A_READY\\n')\n", Some(10.0)) @@ -2921,8 +2919,8 @@ async fn sandbox_workspace_write_concurrent_sessions_share_direct_workspace_file std::fs::create_dir_all(&cwd)?; let state = sandbox_state_workspace_write_with_roots(false, vec![writable_root.path().to_path_buf()]); - let mut session_a = spawn_server_with_sandbox_state_in_cwd(state.clone(), &cwd).await?; - let mut session_b = spawn_server_with_sandbox_state_in_cwd(state, &cwd).await?; + let session_a = spawn_server_with_sandbox_state_in_cwd(state.clone(), &cwd).await?; + let session_b = spawn_server_with_sandbox_state_in_cwd(state, &cwd).await?; let ready_a = session_a .write_stdin_raw_with("cat('SESSION_A_READY\\n')\n", Some(10.0)) @@ -3019,7 +3017,7 @@ async fn sandbox_workspace_write_concurrent_sessions_share_host_created_nested_w let shared_r = r_string(&shared.to_string_lossy()); scrub_unresolved_windows_sid_aces(&repo_root)?; let expected_stable_sid = windows_workspace_write_prepared_sid_for_cwd(&repo_root, &[])?; - let mut session_a = + let session_a = spawn_server_with_sandbox_state_in_cwd(sandbox_state_workspace_write(false), &repo_root) .await?; @@ -3038,7 +3036,7 @@ async fn sandbox_workspace_write_concurrent_sessions_share_host_created_nested_w std::fs::create_dir_all(&nested_dir)?; std::fs::write(&shared, b"from host nested")?; - let mut session_b = + let session_b = spawn_server_with_sandbox_state_in_cwd(sandbox_state_workspace_write(false), &repo_root) .await?; let ready_b = session_b @@ -3128,7 +3126,7 @@ async fn sandbox_workspace_write_concurrent_sessions_share_host_renamed_nested_w &repo_root, &[writable_root.path().to_path_buf()], )?; - let mut session_a = spawn_server_with_sandbox_state_in_cwd(state.clone(), &repo_root).await?; + let session_a = spawn_server_with_sandbox_state_in_cwd(state.clone(), &repo_root).await?; let ready_a = session_a .write_stdin_raw_with("cat('SESSION_A_READY\\n')\n", Some(10.0)) @@ -3148,7 +3146,7 @@ async fn sandbox_workspace_write_concurrent_sessions_share_host_renamed_nested_w std::fs::write(&host_file, b"from moved host tree with extra root")?; std::fs::rename(&host_pkg, &nested_dir)?; - let mut session_b = spawn_server_with_sandbox_state_in_cwd(state, &repo_root).await?; + let session_b = spawn_server_with_sandbox_state_in_cwd(state, &repo_root).await?; let ready_b = session_b .write_stdin_raw_with("cat('SESSION_B_READY\\n')\n", Some(10.0)) .await?; @@ -3235,7 +3233,7 @@ async fn sandbox_workspace_write_concurrent_sessions_share_host_renamed_nested_w let shared_r = r_string(&shared.to_string_lossy()); scrub_unresolved_windows_sid_aces(&repo_root)?; let expected_stable_sid = windows_workspace_write_prepared_sid_for_cwd(&repo_root, &[])?; - let mut session_a = + let session_a = spawn_server_with_sandbox_state_in_cwd(sandbox_state_workspace_write(false), &repo_root) .await?; @@ -3256,7 +3254,7 @@ async fn sandbox_workspace_write_concurrent_sessions_share_host_renamed_nested_w std::fs::write(&host_temp, b"from host temp")?; std::fs::rename(&host_temp, &shared)?; - let mut session_b = + let session_b = spawn_server_with_sandbox_state_in_cwd(sandbox_state_workspace_write(false), &repo_root) .await?; let ready_b = session_b @@ -3341,7 +3339,7 @@ async fn sandbox_workspace_write_concurrent_sessions_share_host_renamed_nested_w std::fs::create_dir_all(&src_dir)?; scrub_unresolved_windows_sid_aces(&repo_root)?; let expected_stable_sid = windows_workspace_write_prepared_sid_for_cwd(&repo_root, &[])?; - let mut session_a = + let session_a = spawn_server_with_sandbox_state_in_cwd(sandbox_state_workspace_write(false), &repo_root) .await?; @@ -3361,7 +3359,7 @@ async fn sandbox_workspace_write_concurrent_sessions_share_host_renamed_nested_w std::fs::write(&host_file, b"from moved host tree")?; std::fs::rename(&host_pkg, &nested_dir)?; - let mut session_b = + let session_b = spawn_server_with_sandbox_state_in_cwd(sandbox_state_workspace_write(false), &repo_root) .await?; let ready_b = session_b @@ -3448,7 +3446,7 @@ async fn sandbox_workspace_write_concurrent_sessions_share_file_created_inside_h std::fs::create_dir_all(&src_dir)?; scrub_unresolved_windows_sid_aces(&repo_root)?; let expected_stable_sid = windows_workspace_write_prepared_sid_for_cwd(&repo_root, &[])?; - let mut session_a = + let session_a = spawn_server_with_sandbox_state_in_cwd(sandbox_state_workspace_write(false), &repo_root) .await?; @@ -3468,7 +3466,7 @@ async fn sandbox_workspace_write_concurrent_sessions_share_file_created_inside_h std::fs::rename(&host_pkg, &nested_dir)?; std::fs::write(&shared, b"from host file in renamed tree")?; - let mut session_b = + let session_b = spawn_server_with_sandbox_state_in_cwd(sandbox_state_workspace_write(false), &repo_root) .await?; let ready_b = session_b @@ -3549,7 +3547,7 @@ async fn sandbox_workspace_write_direct_midrun_file_keeps_prepared_sid() -> Test scrub_unresolved_windows_sid_aces(writable_root.path())?; let state = sandbox_state_workspace_write_with_roots(false, vec![writable_root.path().to_path_buf()]); - let (mut session, _workspace, cwd) = spawn_server_with_sandbox_state_in_temp_cwd(state).await?; + let (session, _workspace, cwd) = spawn_server_with_sandbox_state_in_temp_cwd(state).await?; let expected_stable_sid = windows_workspace_write_prepared_sid_for_cwd(&cwd, &[writable_root.path().to_path_buf()])?; @@ -3604,7 +3602,7 @@ async fn sandbox_workspace_write_session_exit_removes_launch_acl_from_nested_wor let artifact_r = r_string(&artifact.to_string_lossy()); scrub_unresolved_windows_sid_aces(&repo_root)?; let expected_stable_sid = windows_workspace_write_prepared_sid_for_cwd(&repo_root, &[])?; - let mut session = + let session = spawn_server_with_sandbox_state_in_cwd(sandbox_state_workspace_write(false), &repo_root) .await?; @@ -3762,7 +3760,7 @@ async fn sandbox_workspace_write_restart_repairs_stable_acl_for_late_created_nes let artifact_r = r_string(&artifact.to_string_lossy()); scrub_unresolved_windows_sid_aces(&repo_root)?; let expected_stable_sid = windows_workspace_write_prepared_sid_for_cwd(&repo_root, &[])?; - let mut session = + let session = spawn_server_with_sandbox_state_in_cwd(sandbox_state_workspace_write(false), &repo_root) .await?; @@ -3869,7 +3867,7 @@ async fn sandbox_workspace_write_first_launch_accepts_missing_writable_root_pare let artifact_r = r_string(&artifact.to_string_lossy()); scrub_unresolved_windows_sid_aces(writable_root_parent.path())?; std::fs::create_dir_all(&cwd)?; - let mut session = spawn_server_with_sandbox_state_in_cwd( + let session = spawn_server_with_sandbox_state_in_cwd( sandbox_state_workspace_write_with_roots(false, vec![declared_root]), &cwd, ) @@ -3934,7 +3932,7 @@ async fn sandbox_workspace_write_first_launch_accepts_temp_prefixed_writable_roo scrub_unresolved_windows_sid_aces(&writable_root)?; let expected_stable_sid = windows_workspace_write_prepared_sid_for_cwd(&cwd, std::slice::from_ref(&writable_root))?; - let mut session = spawn_server_with_sandbox_state_and_env_in_cwd( + let session = spawn_server_with_sandbox_state_and_env_in_cwd( sandbox_state_workspace_write_with_roots(false, vec![writable_root.clone()]), env, &cwd, @@ -4018,7 +4016,7 @@ async fn sandbox_workspace_write_session_exit_removes_launch_acl_from_midrun_fil std::fs::create_dir_all(&cwd)?; let expected_stable_sid = windows_workspace_write_prepared_sid_for_cwd(&cwd, &[writable_root.path().to_path_buf()])?; - let mut session = spawn_server_with_sandbox_state_in_cwd( + let session = spawn_server_with_sandbox_state_in_cwd( sandbox_state_workspace_write_with_roots(false, vec![writable_root.path().to_path_buf()]), &cwd, ) diff --git a/tests/sandbox_state_updates.rs b/tests/sandbox_state_updates.rs index bfaf999e..30300ab9 100644 --- a/tests/sandbox_state_updates.rs +++ b/tests/sandbox_state_updates.rs @@ -4,21 +4,30 @@ mod common; use common::{McpTestSession, TestResult}; use rmcp::model::{CallToolResult, RawContent}; -use serde_json::json; +use serde_json::{Value, json}; use std::fs; -use std::path::PathBuf; +use std::path::Path; use std::sync::{Mutex, OnceLock}; -use std::time::{Duration, Instant}; -use tempfile::tempdir; -use tokio::time::sleep; +use std::time::{SystemTime, UNIX_EPOCH}; +use tempfile::{Builder, TempDir, tempdir}; -const SANDBOX_STATE_METHOD: &str = "codex/sandbox-state/update"; +const SANDBOX_STATE_META_CAPABILITY: &str = "codex/sandbox-state-meta"; +const MISSING_INHERITED_STATE_MESSAGE: &str = + "--sandbox inherit requested but no client sandbox state was provided"; +const INLINE_TEXT_BUDGET_CHARS: usize = 3500; +const INLINE_TEXT_HARD_SPILL_THRESHOLD_CHARS: usize = INLINE_TEXT_BUDGET_CHARS * 5 / 4; +const UNDER_HARD_SPILL_TEXT_LEN: usize = INLINE_TEXT_BUDGET_CHARS + 200; +const OVER_HARD_SPILL_TEXT_LEN: usize = INLINE_TEXT_HARD_SPILL_THRESHOLD_CHARS + 200; fn test_mutex() -> &'static Mutex<()> { static TEST_MUTEX: OnceLock> = OnceLock::new(); TEST_MUTEX.get_or_init(|| Mutex::new(())) } +fn test_guard() -> std::sync::MutexGuard<'static, ()> { + test_mutex().lock().unwrap_or_else(|err| err.into_inner()) +} + fn collect_text(result: &CallToolResult) -> String { let text = result .content @@ -38,469 +47,2161 @@ fn collect_text(result: &CallToolResult) -> String { .join("\n") } -fn result_text(result: &CallToolResult) -> String { - result - .content - .iter() - .filter_map(|content| match &content.raw { - RawContent::Text(text) => Some(text.text.as_str()), - _ => None, - }) - .collect::>() - .join("") +fn linux_sandbox_exe_value(use_legacy_landlock: bool) -> Value { + #[cfg(target_os = "linux")] + { + if use_legacy_landlock { + Value::Null + } else { + Value::String("/tmp/codex-linux-sandbox".to_string()) + } + } + #[cfg(not(target_os = "linux"))] + { + let _ = use_legacy_landlock; + Value::Null + } +} + +fn codex_sandbox_state_meta( + sandbox_policy: Value, + sandbox_cwd: &Path, + use_legacy_landlock: bool, +) -> Value { + json!({ + SANDBOX_STATE_META_CAPABILITY: { + "sandboxPolicy": sandbox_policy, + "sandboxCwd": sandbox_cwd, + "useLegacyLandlock": use_legacy_landlock, + "codexLinuxSandboxExe": linux_sandbox_exe_value(use_legacy_landlock), + } + }) +} + +fn workspace_write_meta(sandbox_cwd: &Path) -> Value { + codex_sandbox_state_meta( + json!({ + "type": "workspace-write", + "writable_roots": [], + "network_access": false, + "exclude_tmpdir_env_var": false, + "exclude_slash_tmp": false, + }), + sandbox_cwd, + /*use_legacy_landlock*/ false, + ) +} + +fn workspace_write_restricted_read_meta(sandbox_cwd: &Path) -> Value { + codex_sandbox_state_meta( + json!({ + "type": "workspace-write", + "writable_roots": [], + "network_access": false, + "exclude_tmpdir_env_var": false, + "exclude_slash_tmp": false, + "read_only_access": { + "mode": "read-only", + }, + }), + sandbox_cwd, + /*use_legacy_landlock*/ false, + ) +} + +fn read_only_meta(sandbox_cwd: &Path) -> Value { + codex_sandbox_state_meta(json!({"type": "read-only"}), sandbox_cwd, false) +} + +fn read_only_restricted_access_meta(sandbox_cwd: &Path) -> Value { + codex_sandbox_state_meta( + json!({ + "type": "read-only", + "access": { + "mode": "read-only", + }, + }), + sandbox_cwd, + false, + ) +} + +fn read_only_network_access_meta(sandbox_cwd: &Path) -> Value { + codex_sandbox_state_meta( + json!({ + "type": "read-only", + "network_access": true, + }), + sandbox_cwd, + false, + ) } -fn disclosed_path(text: &str, suffix: &str) -> Option { +fn full_access_meta(sandbox_cwd: &Path) -> Value { + codex_sandbox_state_meta(json!({"type": "danger-full-access"}), sandbox_cwd, false) +} + +fn encode_path(path: &Path) -> TestResult { + Ok(serde_json::to_string(&path.to_string_lossy().to_string())?) +} + +fn bundle_transcript_path(text: &str) -> Option { + disclosed_path(text, "transcript.txt") +} + +fn disclosed_path(text: &str, suffix: &str) -> Option { let end = text.find(suffix)?.saturating_add(suffix.len()); let start = text[..end] .rfind(|ch: char| ch.is_whitespace() || matches!(ch, '"' | '\'' | '[' | '(')) .map_or(0, |idx| idx.saturating_add(1)); - Some(PathBuf::from(&text[start..end])) + Some(std::path::PathBuf::from(&text[start..end])) } -fn bundle_transcript_path(text: &str) -> Option { - disclosed_path(text, "transcript.txt") +fn outside_workspace_target(label: &str) -> TestResult { + let base = std::env::var_os("HOME") + .or_else(|| std::env::var_os("USERPROFILE")) + .map(std::path::PathBuf::from) + .ok_or_else(|| "missing HOME/USERPROFILE for sandbox test target".to_string())?; + let nanos = SystemTime::now().duration_since(UNIX_EPOCH)?.as_nanos(); + Ok(base.join(format!(".mcp-repl-{label}-{nanos}.txt"))) } -fn sandbox_update_params(network_access: bool) -> serde_json::Value { - json!({ - "sandboxPolicy": { - "type": "workspace-write", - "writable_roots": [], - "network_access": network_access, - "exclude_tmpdir_env_var": false, - "exclude_slash_tmp": false - } - }) +fn repo_scratch_dir(label: &str) -> TestResult { + Ok(Builder::new() + .prefix(&format!(".tmp-{label}-")) + .tempdir_in(env!("CARGO_MANIFEST_DIR"))?) +} + +fn write_file_code(path: &Path) -> TestResult { + let target = encode_path(path)?; + Ok(format!( + r#" +target <- {target} +tryCatch({{ + writeLines("allowed", target) + cat("WRITE_OK\n") +}}, error = function(e) {{ + message("WRITE_ERROR:", conditionMessage(e)) +}}) +"# + )) +} + +fn variable_probe_code() -> &'static str { + r#"cat(sprintf("X_EXISTS:%s\n", exists("x")))"# } fn backend_unavailable(text: &str) -> bool { - text.contains("Fatal error: cannot create 'R_TempDir'") - || text.contains("failed to start R session") - || text.contains("worker exited with signal") - || text.contains("worker exited with status") - || text.contains("worker io error: Broken pipe") - || text.contains("unable to initialize the JIT") - || text.contains("libR.so: cannot open shared object file") - || text.contains("options(\"defaultPackages\") was not found") - || text.contains( - "worker protocol error: ipc disconnected while waiting for request completion", - ) -} - -fn busy_response(text: &str) -> bool { - text.contains("< TestResult { - spawn_server_retry_with_env_vars(Vec::new()).await -} - -async fn spawn_server_retry_with_env_vars( - env_vars: Vec<(String, String)>, -) -> TestResult { - let mut last_error: Option> = None; - for _ in 0..3 { - match common::spawn_server_with_env_vars(env_vars.clone()).await { - Ok(session) => return Ok(session), - Err(err) => { - let message = err.to_string(); - if message.contains( - "failed to create session temp dir: The directory is not empty. (os error 145)", - ) { - last_error = Some(err); - tokio::time::sleep(Duration::from_millis(200)).await; - continue; - } - return Err(err); - } - } - } - Err(last_error.unwrap_or_else(|| { - Box::::from( - "failed to spawn server after temp-dir retries".to_string(), - ) - })) -} - -async fn spawn_server_with_files_retry_with_env_vars( - env_vars: Vec<(String, String)>, -) -> TestResult { - let mut last_error: Option> = None; - for _ in 0..3 { - match common::spawn_server_with_files_env_vars(env_vars.clone()).await { - Ok(session) => return Ok(session), - Err(err) => { - let message = err.to_string(); - if message.contains( - "failed to create session temp dir: The directory is not empty. (os error 145)", - ) { - last_error = Some(err); - tokio::time::sleep(Duration::from_millis(200)).await; - continue; - } - return Err(err); - } - } - } - Err(last_error.unwrap_or_else(|| { - Box::::from( - "failed to spawn files-mode server after temp-dir retries".to_string(), - ) - })) + common::backend_unavailable(text) } -enum SandboxUpdateKind { - Request, - Notification, +async fn spawn_inherit_server(cwd: &Path) -> TestResult { + common::spawn_server_with_args_env_and_cwd( + vec!["--sandbox".to_string(), "inherit".to_string()], + Vec::new(), + Some(cwd.to_path_buf()), + ) + .await } -async fn wait_for_timeout_bundle_transcript( - session: &mut McpTestSession, - input: &str, -) -> TestResult> { - let first = session.write_stdin_raw_with(input, Some(0.05)).await?; - let first_text = result_text(&first); - if backend_unavailable(&first_text) { - return Ok(None); - } +async fn spawn_inherit_server_with_env( + cwd: &Path, + env: Vec<(String, String)>, +) -> TestResult { + common::spawn_server_with_args_env_and_cwd( + vec!["--sandbox".to_string(), "inherit".to_string()], + env, + Some(cwd.to_path_buf()), + ) + .await +} - sleep(Duration::from_millis(260)).await; - let deadline = Instant::now() + Duration::from_secs(5); - while Instant::now() < deadline { - let spilled = session - .write_stdin_raw_unterminated_with("", Some(0.1)) - .await?; - let spilled_text = result_text(&spilled); - if let Some(path) = bundle_transcript_path(&spilled_text) { - return Ok(Some(path)); - } - if !busy_response(&spilled_text) { - return Err(format!( - "expected timeout bundle disclosure in spill poll, got: {spilled_text:?}" - ) - .into()); - } - sleep(Duration::from_millis(100)).await; - } +async fn spawn_inherit_then_workspace_write_server(cwd: &Path) -> TestResult { + common::spawn_server_with_args_env_and_cwd( + vec![ + "--sandbox".to_string(), + "inherit".to_string(), + "--sandbox".to_string(), + "workspace-write".to_string(), + ], + Vec::new(), + Some(cwd.to_path_buf()), + ) + .await +} - Err("timed out waiting for timeout bundle transcript".into()) +async fn spawn_inherit_files_server( + cwd: &Path, + env: Vec<(String, String)>, +) -> TestResult { + common::spawn_server_with_args_env_and_cwd( + vec![ + "--sandbox".to_string(), + "inherit".to_string(), + "--oversized-output".to_string(), + "files".to_string(), + ], + env, + Some(cwd.to_path_buf()), + ) + .await } -async fn poll_until_not_busy(session: &mut McpTestSession) -> TestResult { - let deadline = Instant::now() + Duration::from_secs(5); - while Instant::now() < deadline { - let result = session - .write_stdin_raw_unterminated_with("", Some(1.0)) - .await?; - let text = result_text(&result); - if !busy_response(&text) { - return Ok(result); - } - sleep(Duration::from_millis(100)).await; - } - Err("timed out waiting for non-busy empty poll".into()) +async fn spawn_inherit_pager_server(cwd: &Path, page_chars: u64) -> TestResult { + common::spawn_server_with_args_env_and_cwd_and_pager_page_chars( + vec!["--sandbox".to_string(), "inherit".to_string()], + Vec::new(), + Some(cwd.to_path_buf()), + page_chars, + ) + .await } -async fn assert_sandbox_update_clears_stale_timeout_bundle( - kind: SandboxUpdateKind, -) -> TestResult<()> { - let _guard = test_mutex() - .lock() - .map_err(|_| "sandbox_state_updates test mutex poisoned")?; - if !common::sandbox_exec_available() { - eprintln!("sandbox-exec unavailable; skipping"); - return Ok(()); - } +fn timeout_then_tail_code() -> &'static str { + r#" +Sys.sleep(0.2) +cat("MID\n") +flush.console() +Sys.sleep(1.0) +cat("TAIL\n") +flush.console() +"# +} - let temp = tempdir()?; - // Transcript-path assertions are part of the files-mode surface. The - // default pager mode can satisfy the same timeout semantics without - // deterministically disclosing a transcript path before the restart. - let mut session = spawn_server_with_files_retry_with_env_vars(vec![( - "TMPDIR".to_string(), - temp.path().display().to_string(), - )]) - .await?; - let input = "big <- paste(rep('q', 120), collapse = ''); cat('start\\n'); flush.console(); Sys.sleep(0.2); for (i in 1:80) cat(sprintf('mid%03d %s\\n', i, big)); flush.console(); Sys.sleep(30); cat('tail\\n')"; - let Some(transcript_path) = wait_for_timeout_bundle_transcript(&mut session, input).await? - else { - eprintln!("sandbox_state_updates backend unavailable; skipping"); - session.cancel().await?; - return Ok(()); - }; - let transcript_before = fs::read_to_string(&transcript_path)?; +fn timeout_then_done_code() -> &'static str { + r#" +Sys.sleep(0.2) +cat("DONE\n") +flush.console() +"# +} - match kind { - SandboxUpdateKind::Request => { - session - .send_custom_request(SANDBOX_STATE_METHOD, sandbox_update_params(true)) - .await?; - } - SandboxUpdateKind::Notification => { - session - .send_custom_notification(SANDBOX_STATE_METHOD, sandbox_update_params(true)) - .await?; - sleep(Duration::from_millis(200)).await; - } - } +#[cfg(unix)] +fn timeout_then_exit_code() -> &'static str { + r#" +cat("BEFORE_EXIT\n") +flush.console() +Sys.sleep(0.2) +q("no", status = 0, runLast = FALSE) +"# +} - let poll = poll_until_not_busy(&mut session).await?; - let poll_text = result_text(&poll); - let transcript_after = fs::read_to_string(&transcript_path)?; +#[cfg(unix)] +fn timeout_then_tail_exit_code() -> &'static str { + r#" +Sys.sleep(0.2) +cat("MID\n") +flush.console() +Sys.sleep(1.0) +q("no", status = 0, runLast = FALSE) +"# +} - session.cancel().await?; +#[cfg(unix)] +fn interrupt_then_exit_code() -> &'static str { + r#" +tryCatch({ + Sys.sleep(30) +}, interrupt = function(e) { + cat("INTERRUPT_EXIT\n") + flush.console() + q("no", status = 0, runLast = FALSE) +}) +"# +} - assert!( - bundle_transcript_path(&poll_text).is_none(), - "did not expect empty poll after sandbox restart to reuse prior timeout bundle: {poll_text:?}" - ); - assert_eq!( - transcript_after, transcript_before, - "did not expect sandbox-triggered restart output to append to prior timeout bundle" - ); - Ok(()) +#[cfg(unix)] +fn interrupt_then_prompt_code() -> &'static str { + r#" +tryCatch({ + Sys.sleep(30) +}, interrupt = function(e) { + cat("INTERRUPT_PROMPT\n") + flush.console() +}) +"# } -#[cfg(any(target_os = "macos", target_os = "linux"))] -fn sandbox_full_access_params() -> serde_json::Value { - json!({ - "sandboxPolicy": { - "type": "danger-full-access" - } +fn timeout_then_large_completion_code() -> &'static str { + Box::leak( + format!( + "small <- paste(rep('s', {UNDER_HARD_SPILL_TEXT_LEN}), collapse = ''); \ + big <- paste(rep('t', {OVER_HARD_SPILL_TEXT_LEN}), collapse = ''); \ + cat('FIRST_START\\n'); \ + cat(small); \ + cat('\\nFIRST_END\\n'); \ + flush.console(); \ + Sys.sleep(0.5); \ + cat('SECOND_START\\n'); \ + cat(big); \ + cat('\\nSECOND_END\\n'); \ + flush.console()" + ) + .into_boxed_str(), + ) +} + +fn test_delay_ms(default_ms: u64, windows_ms: u64) -> std::time::Duration { + std::time::Duration::from_millis(if cfg!(windows) { + windows_ms + } else { + default_ms }) } -async fn assert_session_reset(session: &mut McpTestSession) -> TestResult { - let deadline = Instant::now() + Duration::from_secs(30); - let mut last_text = String::new(); - while Instant::now() < deadline { - let result = session - .write_stdin_raw_with("x <- 42; print(exists(\"x\"))", Some(10.0)) - .await?; - last_text = collect_text(&result); - if backend_unavailable(&last_text) { - return Ok(false); - } - if last_text.contains("TRUE") { - return Ok(true); - } - if busy_response(&last_text) { - tokio::time::sleep(Duration::from_millis(50)).await; - continue; - } - tokio::time::sleep(Duration::from_millis(50)).await; - } - eprintln!("sandbox_state_updates pre-update check did not stabilize: {last_text}"); - Ok(false) +fn latest_debug_events(debug_dir: &Path) -> TestResult> { + let mut sessions = fs::read_dir(debug_dir)? + .filter_map(|entry| entry.ok().map(|entry| entry.path())) + .filter(|path| path.is_dir()) + .collect::>(); + sessions.sort(); + let session_dir = sessions + .last() + .cloned() + .ok_or_else(|| "missing debug session directory".to_string())?; + let log_text = fs::read_to_string(session_dir.join("events.jsonl"))?; + Ok(log_text + .lines() + .map(serde_json::from_str::) + .collect::, _>>()?) } -async fn assert_variable_cleared(session: &mut McpTestSession) -> TestResult { - let deadline = Instant::now() + Duration::from_secs(30); - let mut last_text = String::new(); - while Instant::now() < deadline { - let result = session - .write_stdin_raw_with("print(exists(\"x\"))", Some(10.0)) - .await?; - last_text = collect_text(&result); - if backend_unavailable(&last_text) { - return Ok(false); - } - if last_text.contains("FALSE") { - return Ok(true); - } - if busy_response(&last_text) { - tokio::time::sleep(Duration::from_millis(50)).await; - continue; - } - tokio::time::sleep(Duration::from_millis(50)).await; - } - eprintln!("sandbox_state_updates post-update check did not stabilize: {last_text}"); - Ok(false) +#[cfg(unix)] +fn worker_spawn_policy_types(events: &[Value]) -> Vec { + events + .iter() + .filter(|entry| entry["event"] == "worker_spawn_begin") + .filter_map(|entry| entry["payload"]["sandbox_policy"]["type"].as_str()) + .map(str::to_string) + .collect() } #[tokio::test(flavor = "multi_thread")] -async fn sandbox_state_update_request_restarts_worker() -> TestResult<()> { - let _guard = test_mutex() - .lock() - .map_err(|_| "sandbox_state_updates test mutex poisoned")?; - if !common::sandbox_exec_available() { - eprintln!("sandbox-exec unavailable; skipping"); - return Ok(()); - } - let mut session = spawn_server_retry().await?; - if !assert_session_reset(&mut session).await? { - eprintln!("sandbox_state_updates request backend unavailable; skipping"); - session.cancel().await?; - return Ok(()); - } - session - .send_custom_request(SANDBOX_STATE_METHOD, sandbox_update_params(true)) - .await?; - if !assert_variable_cleared(&mut session).await? { - eprintln!("sandbox_state_updates request backend unavailable; skipping"); - session.cancel().await?; - return Ok(()); - } +async fn sandbox_state_meta_capability_advertised_with_inherit() -> TestResult<()> { + let _guard = test_guard(); + let session = + common::spawn_server_with_args(vec!["--sandbox".to_string(), "inherit".to_string()]) + .await?; + let info = session.server_info().ok_or_else(|| { + Box::::from( + "missing server info from initialize".to_string(), + ) + })?; + let experimental = info.capabilities.experimental.as_ref().ok_or_else(|| { + Box::::from( + "missing experimental capabilities".to_string(), + ) + })?; + assert!( + experimental.contains_key(SANDBOX_STATE_META_CAPABILITY), + "expected sandbox state meta capability in experimental: {experimental:?}" + ); session.cancel().await?; Ok(()) } #[tokio::test(flavor = "multi_thread")] -async fn sandbox_state_update_request_clears_hidden_timeout_bundle() -> TestResult<()> { - assert_sandbox_update_clears_stale_timeout_bundle(SandboxUpdateKind::Request).await +async fn sandbox_state_meta_capability_hidden_without_inherit() -> TestResult<()> { + let _guard = test_guard(); + let session = common::spawn_server().await?; + let info = session.server_info().ok_or_else(|| { + Box::::from( + "missing server info from initialize".to_string(), + ) + })?; + let advertised = info + .capabilities + .experimental + .as_ref() + .is_some_and(|experimental| experimental.contains_key(SANDBOX_STATE_META_CAPABILITY)); + assert!( + !advertised, + "did not expect sandbox state meta capability without `--sandbox inherit`: {info:?}" + ); + session.cancel().await?; + Ok(()) } #[tokio::test(flavor = "multi_thread")] -async fn sandbox_state_update_notification_restarts_worker() -> TestResult<()> { - let _guard = test_mutex() - .lock() - .map_err(|_| "sandbox_state_updates test mutex poisoned")?; - if !common::sandbox_exec_available() { - eprintln!("sandbox-exec unavailable; skipping"); - return Ok(()); - } - let mut session = spawn_server_retry().await?; - if !assert_session_reset(&mut session).await? { - eprintln!("sandbox_state_updates notification backend unavailable; skipping"); - session.cancel().await?; - return Ok(()); - } - session - .send_custom_notification(SANDBOX_STATE_METHOD, sandbox_update_params(true)) +async fn sandbox_state_meta_capability_hidden_after_later_workspace_write_override() +-> TestResult<()> { + let _guard = test_guard(); + let scratch = repo_scratch_dir("sandbox-inherit-override-workspace-write")?; + let session = spawn_inherit_then_workspace_write_server(scratch.path()).await?; + let info = session.server_info().ok_or_else(|| { + Box::::from( + "missing server info from initialize".to_string(), + ) + })?; + let advertised = info + .capabilities + .experimental + .as_ref() + .is_some_and(|experimental| experimental.contains_key(SANDBOX_STATE_META_CAPABILITY)); + assert!( + !advertised, + "did not expect sandbox state meta capability after later workspace-write override: {info:?}" + ); + + let target = scratch.path().join("override-write.txt"); + let result = session + .write_stdin_raw_with(write_file_code(&target)?, Some(10.0)) .await?; - if !assert_variable_cleared(&mut session).await? { - eprintln!("sandbox_state_updates notification backend unavailable; skipping"); + let text = collect_text(&result); + if backend_unavailable(&text) { + eprintln!("sandbox_state_updates backend unavailable in this environment; skipping"); session.cancel().await?; return Ok(()); } + assert!( + text.contains("WRITE_OK"), + "expected later workspace-write override to avoid inherit metadata requirements, got: {text}" + ); session.cancel().await?; Ok(()) } #[tokio::test(flavor = "multi_thread")] -async fn sandbox_state_update_notification_clears_hidden_timeout_bundle() -> TestResult<()> { - assert_sandbox_update_clears_stale_timeout_bundle(SandboxUpdateKind::Notification).await -} - -#[cfg(any(target_os = "macos", target_os = "linux"))] -#[tokio::test(flavor = "multi_thread")] -async fn sandbox_state_update_applies_full_access_policy() -> TestResult<()> { - if !common::sandbox_exec_available() { - eprintln!("sandbox-exec unavailable; skipping"); - return Ok(()); - } - if std::env::var_os("CODEX_SANDBOX").is_some() { - return Ok(()); - } - let target = std::env::temp_dir().join("mcp-repl-sandbox-state-update.txt"); - let _ = std::fs::remove_file(&target); - let mut session = common::spawn_server().await?; - session - .send_custom_request(SANDBOX_STATE_METHOD, sandbox_full_access_params()) - .await?; - let target_literal = serde_json::to_string(&target.to_string_lossy().to_string()) - .map_err(|err| format!("failed to encode target path: {err}"))?; - let code = r#" -target <- __TARGET__ -tryCatch({ - writeLines("allowed", target) - cat("WRITE_OK\n") -}, error = function(e) { - message("WRITE_ERROR:", conditionMessage(e)) -}) -"# - .replace("__TARGET__", &target_literal); - let result = session.write_stdin_raw_with(code, Some(10.0)).await?; +async fn sandbox_inherit_without_state_meta_fails_on_first_tool_call() -> TestResult<()> { + let _guard = test_guard(); + let temp = tempdir()?; + let session = spawn_inherit_server(temp.path()).await?; + let result = session.write_stdin_raw_with("1+1", Some(2.0)).await?; let text = collect_text(&result); if backend_unavailable(&text) { - eprintln!("sandbox_state_updates full_access backend unavailable; skipping"); - let _ = std::fs::remove_file(&target); + eprintln!("sandbox_state_updates backend unavailable in this environment; skipping"); session.cancel().await?; return Ok(()); } assert!( - text.contains("WRITE_OK"), - "expected full access to allow write, got: {text}" + text.contains("--sandbox inherit requested but no client sandbox state was provided"), + "expected missing sandbox-state-meta error, got: {text}" + ); + assert_eq!( + result.is_error, + Some(true), + "expected missing sandbox-state-meta to be reported as an MCP tool error" ); assert!( - !text.contains("WRITE_ERROR:"), - "full access unexpectedly blocked write: {text}" + !text.contains("2"), + "did not expect successful evaluation, got: {text}" ); - let _ = std::fs::remove_file(&target); session.cancel().await?; Ok(()) } #[tokio::test(flavor = "multi_thread")] -async fn sandbox_state_capability_advertised() -> TestResult<()> { - let _guard = test_mutex() - .lock() - .map_err(|_| "sandbox_state_updates test mutex poisoned")?; - let session = spawn_server_retry().await?; - let info = session.server_info().ok_or_else(|| { - let message = "missing server info from initialize".to_string(); - Box::::from(message) - })?; - let experimental = info.capabilities.experimental.as_ref().ok_or_else(|| { - let message = "missing experimental capabilities".to_string(); - Box::::from(message) - })?; +async fn sandbox_inherit_with_malformed_state_meta_fails_on_first_tool_call() -> TestResult<()> { + let _guard = test_guard(); + let temp = tempdir()?; + let session = spawn_inherit_server(temp.path()).await?; + let meta = Some(json!({ + SANDBOX_STATE_META_CAPABILITY: "invalid", + })); + let result = session + .write_stdin_raw_with_meta("1+1", Some(2.0), meta) + .await?; + let text = collect_text(&result); + if backend_unavailable(&text) { + eprintln!("sandbox_state_updates backend unavailable in this environment; skipping"); + session.cancel().await?; + return Ok(()); + } + assert!( + text.contains("failed to parse Codex sandbox state metadata"), + "expected malformed sandbox-state-meta error, got: {text}" + ); + assert_eq!( + result.is_error, + Some(true), + "expected malformed sandbox-state-meta to be reported as an MCP tool error" + ); assert!( - experimental.contains_key("codex/sandbox-state"), - "expected sandbox state capability in experimental: {experimental:?}" + !text.contains("2"), + "did not expect successful evaluation, got: {text}" ); session.cancel().await?; Ok(()) } #[tokio::test(flavor = "multi_thread")] -async fn sandbox_inherit_allows_initialize_before_state_update() -> TestResult<()> { - let _guard = test_mutex() - .lock() - .map_err(|_| "sandbox_state_updates test mutex poisoned")?; - let session = - common::spawn_server_with_args(vec!["--sandbox".to_string(), "inherit".to_string()]) - .await?; +async fn sandbox_inherit_empty_repl_uses_state_meta_when_spawn_needed() -> TestResult<()> { + let _guard = test_guard(); + let temp = tempdir()?; + let session = spawn_inherit_server(temp.path()).await?; + let result = session + .write_stdin_raw_with_meta("", Some(2.0), Some(workspace_write_meta(temp.path()))) + .await?; + let text = collect_text(&result); + if backend_unavailable(&text) { + eprintln!("sandbox_state_updates backend unavailable in this environment; skipping"); + session.cancel().await?; + return Ok(()); + } + assert!( + text.contains("<>"), + "expected empty inherit repl call with metadata to return idle status, got: {text}" + ); + assert!( + !text.contains("--sandbox inherit requested but no client sandbox state was provided"), + "did not expect empty inherit repl call with metadata to fail closed, got: {text}" + ); session.cancel().await?; Ok(()) } #[tokio::test(flavor = "multi_thread")] -async fn sandbox_inherit_without_state_update_errors_on_first_tool_call() -> TestResult<()> { - let _guard = test_mutex() - .lock() - .map_err(|_| "sandbox_state_updates test mutex poisoned")?; - let mut session = - common::spawn_server_with_args(vec!["--sandbox".to_string(), "inherit".to_string()]) - .await?; - let result = session.write_stdin_raw_with("1+1", Some(2.0)).await?; - let text = collect_text(&result); +async fn sandbox_inherit_empty_repl_after_reset_uses_staged_state_meta() -> TestResult<()> { + let _guard = test_guard(); + let temp = tempdir()?; + let session = spawn_inherit_server(temp.path()).await?; + let reset = session + .call_tool_raw_with_meta( + "repl_reset", + json!({}), + Some(workspace_write_meta(temp.path())), + ) + .await?; + let reset_text = collect_text(&reset); assert!( - text.contains("--sandbox inherit requested but no client sandbox state was provided"), - "expected missing sandbox-state error, got: {text}" + reset_text.contains("new session started"), + "expected repl_reset with sandbox metadata to succeed, got: {reset_text}" ); + + let result = session + .write_stdin_raw_with_meta("", Some(2.0), Some(workspace_write_meta(temp.path()))) + .await?; + let text = collect_text(&result); session.cancel().await?; + + if backend_unavailable(&text) { + eprintln!("sandbox_state_updates backend unavailable in this environment; skipping"); + return Ok(()); + } + assert!( + text.contains("<>"), + "expected empty inherit repl call after reset to return idle status, got: {text}" + ); + assert_ne!( + result.is_error, + Some(true), + "did not expect empty inherit repl call after reset to fail closed" + ); Ok(()) } #[tokio::test(flavor = "multi_thread")] -async fn sandbox_inherit_without_state_update_errors_on_repl_reset() -> TestResult<()> { - let _guard = test_mutex() - .lock() - .map_err(|_| "sandbox_state_updates test mutex poisoned")?; - let mut session = - common::spawn_server_with_args(vec!["--sandbox".to_string(), "inherit".to_string()]) - .await?; - let result = session.call_tool_raw("repl_reset", json!({})).await?; - let text = collect_text(&result); +async fn sandbox_inherit_empty_poll_with_existing_worker_ignores_bad_state_meta() -> TestResult<()> +{ + let _guard = test_guard(); + let temp = tempdir()?; + let session = spawn_inherit_server(temp.path()).await?; + let initial = session + .write_stdin_raw_with_meta("1+1", Some(2.0), Some(workspace_write_meta(temp.path()))) + .await?; + let initial_text = collect_text(&initial); + if backend_unavailable(&initial_text) { + eprintln!("sandbox_state_updates backend unavailable in this environment; skipping"); + session.cancel().await?; + return Ok(()); + } + + let poll = session + .write_stdin_raw_with_meta( + "", + Some(2.0), + Some(json!({ SANDBOX_STATE_META_CAPABILITY: "invalid" })), + ) + .await?; + let poll_text = collect_text(&poll); + session.cancel().await?; + + assert_ne!( + poll.is_error, + Some(true), + "did not expect empty poll with existing worker to fail on malformed metadata" + ); assert!( - text.contains("--sandbox inherit requested but no client sandbox state was provided"), - "expected missing sandbox-state error, got: {text}" + !poll_text.contains("failed to parse Codex sandbox state metadata"), + "expected empty poll with existing worker to ignore malformed metadata, got: {poll_text}" + ); + assert!( + poll_text.contains("<>") || poll_text.contains(">"), + "expected empty poll with existing worker to return local status, got: {poll_text}" + ); + Ok(()) +} + +#[tokio::test(flavor = "multi_thread")] +async fn sandbox_inherit_interrupt_follow_up_ignores_local_meta_errors() -> TestResult<()> { + let _guard = test_guard(); + let temp = tempdir()?; + let session = spawn_inherit_server(temp.path()).await?; + let initial = session + .write_stdin_raw_with_meta("1+1", Some(2.0), Some(workspace_write_meta(temp.path()))) + .await?; + let initial_text = common::result_text(&initial); + if backend_unavailable(&initial_text) { + eprintln!("sandbox_state_updates backend unavailable in this environment; skipping"); + session.cancel().await?; + return Ok(()); + } + + let interrupt = session + .write_stdin_raw_with_meta( + "\u{3}", + Some(2.0), + Some(json!({ SANDBOX_STATE_META_CAPABILITY: "invalid" })), + ) + .await?; + let interrupt_text = common::result_text(&interrupt); + assert!( + !interrupt_text.contains("failed to parse Codex sandbox state metadata"), + "expected local interrupt follow-up to ignore malformed metadata, got: {interrupt_text}" + ); + assert!( + !interrupt_text.contains(MISSING_INHERITED_STATE_MESSAGE), + "expected local interrupt follow-up to ignore missing inherited metadata checks, got: {interrupt_text}" + ); + assert!( + interrupt_text.contains(">") + || interrupt_text.contains("<>"), + "expected interrupt follow-up to return local recovery output, got: {interrupt_text}" + ); + session.cancel().await?; + Ok(()) +} + +#[tokio::test(flavor = "multi_thread")] +async fn sandbox_inherit_metadata_error_preserves_hidden_timeout_bundle() -> TestResult<()> { + let _guard = test_guard(); + let temp = tempdir()?; + let session = spawn_inherit_files_server(temp.path(), Vec::new()).await?; + let first = session + .write_stdin_raw_with_meta( + timeout_then_large_completion_code(), + Some(0.05), + Some(workspace_write_meta(temp.path())), + ) + .await?; + let first_text = common::result_text(&first); + if backend_unavailable(&first_text) { + eprintln!("sandbox_state_updates backend unavailable in this environment; skipping"); + session.cancel().await?; + return Ok(()); + } + assert!( + bundle_transcript_path(&first_text).is_none(), + "did not expect the first under-threshold timeout reply to disclose a bundle path, got: {first_text:?}" + ); + + tokio::time::sleep(test_delay_ms(600, 900)).await; + + let metadata_error = session + .write_stdin_raw_with_meta( + "1+1", + Some(2.0), + Some(json!({ SANDBOX_STATE_META_CAPABILITY: "invalid" })), + ) + .await?; + let metadata_error_text = common::result_text(&metadata_error); + assert!( + metadata_error_text.contains("failed to parse Codex sandbox state metadata"), + "expected malformed metadata error, got: {metadata_error_text}" + ); + + let mut final_text = String::new(); + for _ in 0..10 { + let final_poll = session.write_stdin_raw_with("", Some(2.0)).await?; + final_text = common::result_text(&final_poll); + if !final_text.contains("< TestResult<()> { + let _guard = test_guard(); + let temp = tempdir()?; + let session = spawn_inherit_pager_server(temp.path(), 120).await?; + let initial = session + .write_stdin_raw_with_meta( + "line <- paste(rep(\"foo\", 80), collapse = \" \"); for (i in 1:300) cat(sprintf(\"line%04d %s\\n\", i, line))", + Some(30.0), + Some(workspace_write_meta(temp.path())), + ) + .await?; + let initial_text = common::result_text(&initial); + if backend_unavailable(&initial_text) { + eprintln!("sandbox_state_updates backend unavailable in this environment; skipping"); + session.cancel().await?; + return Ok(()); + } + assert!( + initial_text.contains("--More--"), + "expected pager to activate before local pager command test, got: {initial_text:?}" + ); + + let quit = session.write_stdin_raw_with(":q", Some(30.0)).await?; + let quit_text = common::result_text(&quit); + assert!( + !quit_text.contains(MISSING_INHERITED_STATE_MESSAGE), + "expected active pager :q to ignore missing inherited metadata, got: {quit_text}" + ); + assert!( + !quit_text.contains("failed to parse Codex sandbox state metadata"), + "expected active pager :q to skip sandbox metadata parsing, got: {quit_text}" + ); + assert!( + !quit_text.contains("unexpected ':'"), + "expected :q to be handled by pager after inherit warm-up, got: {quit_text}" + ); + assert!( + quit_text.contains(">"), + "expected prompt after pager quit, got: {quit_text}" + ); + session.cancel().await?; + Ok(()) +} + +#[tokio::test(flavor = "multi_thread")] +async fn sandbox_inherit_pending_interrupt_tail_with_bad_meta_still_interrupts() -> TestResult<()> { + let _guard = test_guard(); + let temp = tempdir()?; + let session = spawn_inherit_files_server(temp.path(), Vec::new()).await?; + let input = format!( + "small <- paste(rep('s', {UNDER_HARD_SPILL_TEXT_LEN}), collapse = ''); detached <- paste(rep('d', {OVER_HARD_SPILL_TEXT_LEN}), collapse = ''); cat('SMALL_START\\n'); cat(small); cat('\\nSMALL_END\\n'); flush.console(); tryCatch({{ Sys.sleep(30) }}, interrupt = function(e) {{ cat('DETACHED_START\\n'); cat(detached); cat('\\nDETACHED_END\\n'); flush.console() }})" + ); + let first = session + .write_stdin_raw_with_meta(input, Some(0.05), Some(workspace_write_meta(temp.path()))) + .await?; + let first_text = common::result_text(&first); + if backend_unavailable(&first_text) { + eprintln!("sandbox_state_updates backend unavailable in this environment; skipping"); + session.cancel().await?; + return Ok(()); + } + assert!( + bundle_transcript_path(&first_text).is_none(), + "did not expect timeout bundle disclosure before the interrupt-side metadata error, got: {first_text:?}" + ); + tokio::time::sleep(test_delay_ms(260, 700)).await; + + let interrupt_error = session + .write_stdin_raw_with_meta( + "\u{3}cat('AFTER_INTERRUPT\\n')", + Some(10.0), + Some(json!({ SANDBOX_STATE_META_CAPABILITY: "invalid" })), + ) + .await?; + assert_eq!( + interrupt_error.is_error, + Some(true), + "expected malformed metadata follow-up to be reported as an MCP tool error" + ); + let interrupt_error_text = common::result_text(&interrupt_error); + assert!( + interrupt_error_text.contains("failed to parse Codex sandbox state metadata"), + "expected malformed metadata error after local interrupt, got: {interrupt_error_text}" + ); + let transcript_path = bundle_transcript_path(&interrupt_error_text).unwrap_or_else(|| { + panic!( + "expected the interrupt-side metadata error reply to disclose the detached timeout transcript, got: {interrupt_error_text:?}" + ) + }); + let transcript = fs::read_to_string(&transcript_path)?; + assert!( + transcript.contains("SMALL_START") && transcript.contains("SMALL_END"), + "expected the earlier timed-out output to remain on the transcript path, got: {transcript:?}" + ); + assert!( + transcript.contains("DETACHED_START") && transcript.contains("DETACHED_END"), + "expected the interrupt-side detached output to remain on the transcript path, got: {transcript:?}" + ); + + let mut recovery_text = String::new(); + for _ in 0..20 { + let recovery = session + .write_stdin_raw_with_meta("1+1", Some(0.5), Some(workspace_write_meta(temp.path()))) + .await?; + recovery_text = common::result_text(&recovery); + if !recovery_text.contains("[repl] input discarded while worker busy") + && !recovery_text.contains("< TestResult<()> { + let _guard = test_guard(); + let temp = tempdir()?; + let session = spawn_inherit_files_server(temp.path(), Vec::new()).await?; + let first = session + .write_stdin_raw_with_meta( + timeout_then_tail_code(), + Some(0.05), + Some(workspace_write_meta(temp.path())), + ) + .await?; + let first_text = common::result_text(&first); + if backend_unavailable(&first_text) { + eprintln!("sandbox_state_updates backend unavailable in this environment; skipping"); + session.cancel().await?; + return Ok(()); + } + tokio::time::sleep(std::time::Duration::from_millis(260)).await; + + let restart_error = session + .write_stdin_raw_with_meta( + "\u{4}cat('AFTER_RESTART\\n')", + Some(0.1), + Some(json!({ SANDBOX_STATE_META_CAPABILITY: "invalid" })), + ) + .await?; + assert_eq!( + restart_error.is_error, + Some(true), + "expected malformed metadata restart follow-up to be reported as an MCP tool error" + ); + let restart_error_text = common::result_text(&restart_error); + assert!( + restart_error_text.contains("failed to parse Codex sandbox state metadata"), + "expected malformed metadata error after local restart, got: {restart_error_text}" + ); + assert!( + restart_error_text.contains("new session started"), + "expected the restart-side metadata error reply to include the restart notice, got: {restart_error_text}" + ); + + let recovery = session + .write_stdin_raw_with_meta("1+1", Some(1.0), Some(workspace_write_meta(temp.path()))) + .await?; + let recovery_text = common::result_text(&recovery); + session.cancel().await?; + + assert!( + recovery_text.contains("[1] 2"), + "expected the next valid call to run in the restarted session, got: {recovery_text}" + ); + assert!( + !recovery_text.contains("MID") && !recovery_text.contains("TAIL"), + "did not expect pre-restart timeout output to leak into the restarted session, got: {recovery_text}" + ); + assert!( + bundle_transcript_path(&recovery_text).is_none(), + "did not expect the restarted session to keep a stale timeout bundle attached, got: {recovery_text:?}" + ); + Ok(()) +} + +#[tokio::test(flavor = "multi_thread")] +async fn sandbox_inherit_pending_follow_up_ignores_new_state_meta() -> TestResult<()> { + let _guard = test_guard(); + let temp = tempdir()?; + let session = spawn_inherit_files_server(temp.path(), Vec::new()).await?; + let first = session + .write_stdin_raw_with_meta( + timeout_then_tail_code(), + Some(0.05), + Some(workspace_write_meta(temp.path())), + ) + .await?; + let first_text = collect_text(&first); + if backend_unavailable(&first_text) { + eprintln!("sandbox_state_updates backend unavailable in this environment; skipping"); + session.cancel().await?; + return Ok(()); + } + tokio::time::sleep(std::time::Duration::from_millis(260)).await; + + let second = session + .write_stdin_raw_with_meta("1+1", Some(0.1), Some(full_access_meta(temp.path()))) + .await?; + let second_text = collect_text(&second); + assert!( + second_text.contains("[repl] input discarded while worker busy"), + "expected busy follow-up to preserve the pending request, got: {second_text}" + ); + assert!( + !second_text.contains("[1] 2"), + "did not expect changed metadata to start a fresh request, got: {second_text}" + ); + session.cancel().await?; + Ok(()) +} + +#[cfg(unix)] +#[tokio::test(flavor = "multi_thread")] +async fn sandbox_inherit_busy_follow_up_stages_current_meta_before_session_end_reset() +-> TestResult<()> { + let _guard = test_guard(); + let temp = tempdir()?; + let debug_dir = temp.path().join("debug"); + let session = spawn_inherit_files_server( + temp.path(), + vec![( + "MCP_REPL_DEBUG_DIR".to_string(), + debug_dir.to_string_lossy().to_string(), + )], + ) + .await?; + + let timeout = session + .write_stdin_raw_with_meta( + timeout_then_tail_exit_code(), + Some(0.05), + Some(workspace_write_meta(temp.path())), + ) + .await?; + let timeout_text = collect_text(&timeout); + if backend_unavailable(&timeout_text) { + eprintln!("sandbox_state_updates backend unavailable in this environment; skipping"); + session.cancel().await?; + return Ok(()); + } + assert!( + timeout_text.contains("< TestResult<()> { + let _guard = test_guard(); + let temp = tempdir()?; + let session = spawn_inherit_files_server(temp.path(), Vec::new()).await?; + let first = session + .write_stdin_raw_with_meta( + timeout_then_tail_code(), + Some(0.05), + Some(workspace_write_meta(temp.path())), + ) + .await?; + let first_text = collect_text(&first); + if backend_unavailable(&first_text) { + eprintln!("sandbox_state_updates backend unavailable in this environment; skipping"); + session.cancel().await?; + return Ok(()); + } + tokio::time::sleep(std::time::Duration::from_millis(260)).await; + + let poll = session + .write_stdin_raw_with_meta("", Some(2.0), Some(full_access_meta(temp.path()))) + .await?; + let poll_text = collect_text(&poll); + assert!( + poll_text.contains("TAIL"), + "expected empty poll to continue draining the original request, got: {poll_text}" + ); + session.cancel().await?; + Ok(()) +} + +#[tokio::test(flavor = "multi_thread")] +async fn sandbox_inherit_applies_new_state_meta_after_timed_out_request_settles() -> TestResult<()> +{ + let _guard = test_guard(); + let scratch = repo_scratch_dir("sandbox-timeout-settle-fresh-call")?; + let target = scratch.path().join("fresh-call-write.txt"); + let session = spawn_inherit_files_server(scratch.path(), Vec::new()).await?; + let first = session + .write_stdin_raw_with_meta( + timeout_then_done_code(), + Some(0.05), + Some(read_only_meta(scratch.path())), + ) + .await?; + let first_text = collect_text(&first); + if backend_unavailable(&first_text) { + eprintln!("sandbox_state_updates backend unavailable in this environment; skipping"); + session.cancel().await?; + return Ok(()); + } + tokio::time::sleep(std::time::Duration::from_millis(260)).await; + + let second = session + .write_stdin_raw_with_meta( + write_file_code(&target)?, + Some(10.0), + Some(workspace_write_meta(scratch.path())), + ) + .await?; + let second_text = collect_text(&second); + assert!( + second_text.contains("WRITE_OK"), + "expected fresh follow-up call to apply current sandbox metadata, got: {second_text}" + ); + assert!( + !second_text.contains("WRITE_ERROR:"), + "did not expect stale settled timeout state to keep the old sandbox, got: {second_text}" + ); + session.cancel().await?; + Ok(()) +} + +#[tokio::test(flavor = "multi_thread")] +async fn sandbox_inherit_metadata_change_keeps_settled_timeout_output() -> TestResult<()> { + let _guard = test_guard(); + let scratch = repo_scratch_dir("sandbox-timeout-tail-across-state-change")?; + let session = spawn_inherit_files_server(scratch.path(), Vec::new()).await?; + let first = session + .write_stdin_raw_with_meta( + timeout_then_tail_code(), + Some(0.05), + Some(read_only_meta(scratch.path())), + ) + .await?; + let first_text = collect_text(&first); + if backend_unavailable(&first_text) { + eprintln!("sandbox_state_updates backend unavailable in this environment; skipping"); + session.cancel().await?; + return Ok(()); + } + assert!( + !first_text.contains("TAIL"), + "expected the late completion chunk to remain detached from the timeout reply, got: {first_text}" + ); + tokio::time::sleep(test_delay_ms(1400, 1800)).await; + + let second = session + .write_stdin_raw_with_meta( + "1+1", + Some(10.0), + Some(workspace_write_meta(scratch.path())), + ) + .await?; + let second_text = collect_text(&second); + assert!( + second_text.contains("TAIL"), + "expected settled timeout output to survive sandbox respawn, got: {second_text}" + ); + assert!( + second_text.contains("[1] 2"), + "expected the fresh call to still execute after the preserved timeout tail, got: {second_text}" + ); + session.cancel().await?; + Ok(()) +} + +#[tokio::test(flavor = "multi_thread")] +async fn sandbox_inherit_metadata_change_keeps_timeout_bundle_output() -> TestResult<()> { + let _guard = test_guard(); + let scratch = repo_scratch_dir("sandbox-timeout-bundle-across-state-change")?; + let session = spawn_inherit_files_server(scratch.path(), Vec::new()).await?; + let first = session + .write_stdin_raw_with_meta( + timeout_then_large_completion_code(), + Some(0.05), + Some(read_only_meta(scratch.path())), + ) + .await?; + let first_text = common::result_text(&first); + if backend_unavailable(&first_text) { + eprintln!("sandbox_state_updates backend unavailable in this environment; skipping"); + session.cancel().await?; + return Ok(()); + } + assert!( + bundle_transcript_path(&first_text).is_none(), + "did not expect the initial timeout reply to disclose a transcript path, got: {first_text:?}" + ); + + tokio::time::sleep(test_delay_ms(900, 1200)).await; + + let second = session + .write_stdin_raw_with_meta( + "1+1", + Some(10.0), + Some(workspace_write_meta(scratch.path())), + ) + .await?; + let second_text = common::result_text(&second); + let transcript_path = bundle_transcript_path(&second_text).unwrap_or_else(|| { + panic!( + "expected the metadata-changing follow-up to preserve and disclose the timeout transcript, got: {second_text:?}" + ) + }); + let transcript = fs::read_to_string(&transcript_path)?; + + session.cancel().await?; + + assert!( + transcript.contains("FIRST_START") && transcript.contains("FIRST_END"), + "expected the preserved timeout transcript to include the first timed-out chunk, got: {transcript:?}" + ); + assert!( + transcript.contains("SECOND_START") && transcript.contains("SECOND_END"), + "expected the preserved timeout transcript to include the settled completion chunk, got: {transcript:?}" + ); + assert!( + second_text.contains("[1] 2") || transcript.contains("[1] 2"), + "expected the fresh follow-up result to execute after preserving the timeout transcript, got reply {second_text:?} and transcript {transcript:?}" + ); + Ok(()) +} + +#[tokio::test(flavor = "multi_thread")] +async fn sandbox_inherit_workspace_write_meta_allows_write_in_cwd() -> TestResult<()> { + let _guard = test_guard(); + let scratch = repo_scratch_dir("sandbox-workspace-write")?; + let target = scratch.path().join("allowed.txt"); + let session = spawn_inherit_server(scratch.path()).await?; + let result = session + .write_stdin_raw_with_meta( + write_file_code(&target)?, + Some(10.0), + Some(workspace_write_meta(scratch.path())), + ) + .await?; + let text = collect_text(&result); + if backend_unavailable(&text) { + eprintln!("sandbox_state_updates backend unavailable in this environment; skipping"); + session.cancel().await?; + return Ok(()); + } + assert!( + text.contains("WRITE_OK"), + "expected write in cwd to succeed, got: {text}" + ); + assert!( + !text.contains("WRITE_ERROR:"), + "workspace-write unexpectedly blocked write in cwd: {text}" + ); + session.cancel().await?; + Ok(()) +} + +#[tokio::test(flavor = "multi_thread")] +async fn sandbox_inherit_rejects_restricted_read_workspace_write_meta() -> TestResult<()> { + let _guard = test_guard(); + let temp = tempdir()?; + let session = spawn_inherit_server(temp.path()).await?; + let result = session + .write_stdin_raw_with_meta( + "1+1", + Some(2.0), + Some(workspace_write_restricted_read_meta(temp.path())), + ) + .await?; + let text = collect_text(&result); + session.cancel().await?; + + if backend_unavailable(&text) { + eprintln!("sandbox_state_updates backend unavailable in this environment; skipping"); + return Ok(()); + } + assert_eq!( + result.is_error, + Some(true), + "expected restricted read metadata to be reported as an MCP tool error" + ); + assert!( + text.contains("read_only_access"), + "expected restricted read metadata rejection, got: {text}" + ); + assert!( + !text.contains("[1] 2"), + "did not expect input to run after unsupported restricted read metadata, got: {text}" + ); + Ok(()) +} + +#[tokio::test(flavor = "multi_thread")] +async fn sandbox_inherit_rejects_restricted_read_only_meta() -> TestResult<()> { + let _guard = test_guard(); + let temp = tempdir()?; + let session = spawn_inherit_server(temp.path()).await?; + let result = session + .write_stdin_raw_with_meta( + "1+1", + Some(2.0), + Some(read_only_restricted_access_meta(temp.path())), + ) + .await?; + let text = collect_text(&result); + session.cancel().await?; + + if backend_unavailable(&text) { + eprintln!("sandbox_state_updates backend unavailable in this environment; skipping"); + return Ok(()); + } + assert_eq!( + result.is_error, + Some(true), + "expected restricted read-only metadata to be reported as an MCP tool error" + ); + assert!( + text.contains("access"), + "expected restricted read-only metadata rejection, got: {text}" + ); + assert!( + !text.contains("[1] 2"), + "did not expect input to run after unsupported restricted read-only metadata, got: {text}" + ); + Ok(()) +} + +#[tokio::test(flavor = "multi_thread")] +async fn sandbox_inherit_rejects_read_only_network_access_meta() -> TestResult<()> { + let _guard = test_guard(); + let temp = tempdir()?; + let session = spawn_inherit_server(temp.path()).await?; + let result = session + .write_stdin_raw_with_meta( + "1+1", + Some(2.0), + Some(read_only_network_access_meta(temp.path())), + ) + .await?; + let text = collect_text(&result); + session.cancel().await?; + + if backend_unavailable(&text) { + eprintln!("sandbox_state_updates backend unavailable in this environment; skipping"); + return Ok(()); + } + assert_eq!( + result.is_error, + Some(true), + "expected read-only network metadata to be reported as an MCP tool error" + ); + assert!( + text.contains("network_access"), + "expected read-only network metadata rejection, got: {text}" + ); + assert!( + !text.contains("[1] 2"), + "did not expect input to run after unsupported read-only network metadata, got: {text}" + ); + Ok(()) +} + +#[tokio::test(flavor = "multi_thread")] +async fn sandbox_inherit_full_access_meta_allows_write_outside_cwd() -> TestResult<()> { + let _guard = test_guard(); + let temp = tempdir()?; + let target = outside_workspace_target("full-access")?; + let _ = std::fs::remove_file(&target); + let session = spawn_inherit_server(temp.path()).await?; + let result = session + .write_stdin_raw_with_meta( + write_file_code(&target)?, + Some(10.0), + Some(full_access_meta(temp.path())), + ) + .await?; + let text = collect_text(&result); + if backend_unavailable(&text) { + eprintln!("sandbox_state_updates backend unavailable in this environment; skipping"); + session.cancel().await?; + return Ok(()); + } + assert!( + text.contains("WRITE_OK"), + "expected full access to allow write outside cwd, got: {text}" + ); + assert!( + !text.contains("WRITE_ERROR:"), + "full access unexpectedly blocked outside write: {text}" + ); + let _ = std::fs::remove_file(&target); + session.cancel().await?; + Ok(()) +} + +#[tokio::test(flavor = "multi_thread")] +async fn sandbox_inherit_pending_ctrl_c_tail_applies_new_meta_before_running_tail_files() +-> TestResult<()> { + let _guard = test_guard(); + let temp = tempdir()?; + let target = outside_workspace_target("ctrl-c-tail-files")?; + let _ = std::fs::remove_file(&target); + let session = spawn_inherit_files_server(temp.path(), Vec::new()).await?; + let first = session + .write_stdin_raw_with_meta("1+1", Some(2.0), Some(workspace_write_meta(temp.path()))) + .await?; + let first_text = collect_text(&first); + if backend_unavailable(&first_text) { + eprintln!("sandbox_state_updates backend unavailable in this environment; skipping"); + session.cancel().await?; + return Ok(()); + } + + let timed_out = session + .write_stdin_raw_with_meta( + timeout_then_tail_code(), + Some(0.05), + Some(workspace_write_meta(temp.path())), + ) + .await?; + let timed_out_text = collect_text(&timed_out); + if backend_unavailable(&timed_out_text) { + eprintln!("sandbox_state_updates backend unavailable in this environment; skipping"); + session.cancel().await?; + return Ok(()); + } + tokio::time::sleep(test_delay_ms(260, 700)).await; + + let mut text = collect_text( + &session + .write_stdin_raw_with_meta( + format!("\u{3}{}", write_file_code(&target)?), + Some(10.0), + Some(full_access_meta(temp.path())), + ) + .await?, + ); + for _ in 0..20 { + if !text.contains("[repl] input discarded while worker busy") + && !text.contains("< TestResult<()> { + let _guard = test_guard(); + let temp = tempdir()?; + let target = outside_workspace_target("ctrl-c-tail-pager")?; + let _ = std::fs::remove_file(&target); + let session = spawn_inherit_pager_server(temp.path(), 120).await?; + let first = session + .write_stdin_raw_with_meta("1+1", Some(2.0), Some(workspace_write_meta(temp.path()))) + .await?; + let first_text = collect_text(&first); + if backend_unavailable(&first_text) { + eprintln!("sandbox_state_updates backend unavailable in this environment; skipping"); + session.cancel().await?; + return Ok(()); + } + + let timed_out = session + .write_stdin_raw_with_meta( + timeout_then_tail_code(), + Some(0.05), + Some(workspace_write_meta(temp.path())), + ) + .await?; + let timed_out_text = collect_text(&timed_out); + if backend_unavailable(&timed_out_text) { + eprintln!("sandbox_state_updates backend unavailable in this environment; skipping"); + session.cancel().await?; + return Ok(()); + } + tokio::time::sleep(test_delay_ms(260, 700)).await; + + let mut text = collect_text( + &session + .write_stdin_raw_with_meta( + format!("\u{3}{}", write_file_code(&target)?), + Some(10.0), + Some(full_access_meta(temp.path())), + ) + .await?, + ); + for _ in 0..20 { + if !text.contains("[repl] input discarded while worker busy") + && !text.contains("< TestResult<()> { + let _guard = test_guard(); + let temp = tempdir()?; + let target = outside_workspace_target("ignored-meta")?; + let _ = std::fs::remove_file(&target); + let session = common::spawn_server_with_args_env_and_cwd( + vec!["--sandbox".to_string(), "workspace-write".to_string()], + Vec::new(), + Some(temp.path().to_path_buf()), + ) + .await?; + let result = session + .write_stdin_raw_with_meta( + write_file_code(&target)?, + Some(10.0), + Some(full_access_meta(temp.path())), + ) + .await?; + let text = collect_text(&result); + if backend_unavailable(&text) { + eprintln!("sandbox_state_updates backend unavailable in this environment; skipping"); + session.cancel().await?; + return Ok(()); + } + assert!( + text.contains("WRITE_ERROR:"), + "expected explicit workspace-write mode to ignore full-access metadata, got: {text}" + ); + assert!( + !text.contains("WRITE_OK"), + "did not expect explicit workspace-write mode to allow outside write, got: {text}" + ); + let _ = std::fs::remove_file(&target); + session.cancel().await?; + Ok(()) +} + +#[tokio::test(flavor = "multi_thread")] +async fn sandbox_inherit_restarts_worker_when_state_meta_changes() -> TestResult<()> { + let _guard = test_guard(); + let temp = tempdir()?; + let session = spawn_inherit_server(temp.path()).await?; + let first = session + .write_stdin_raw_with_meta( + r#"x <- 42; cat("SET_OK\n")"#, + Some(10.0), + Some(workspace_write_meta(temp.path())), + ) + .await?; + let first_text = collect_text(&first); + if backend_unavailable(&first_text) { + eprintln!("sandbox_state_updates backend unavailable in this environment; skipping"); + session.cancel().await?; + return Ok(()); + } + assert!( + first_text.contains("SET_OK"), + "expected setup write, got: {first_text}" + ); + + let second = session + .write_stdin_raw_with_meta( + variable_probe_code(), + Some(10.0), + Some(full_access_meta(temp.path())), + ) + .await?; + let second_text = collect_text(&second); + assert!( + second_text.contains("X_EXISTS:FALSE"), + "expected sandbox state change to restart the worker session, got: {second_text}" + ); + session.cancel().await?; + Ok(()) +} + +#[tokio::test(flavor = "multi_thread")] +async fn sandbox_inherit_without_state_meta_fails_on_repl_reset() -> TestResult<()> { + let _guard = test_guard(); + let temp = tempdir()?; + let session = spawn_inherit_server(temp.path()).await?; + let result = session.call_tool_raw("repl_reset", json!({})).await?; + let text = collect_text(&result); + if backend_unavailable(&text) { + eprintln!("sandbox_state_updates backend unavailable in this environment; skipping"); + session.cancel().await?; + return Ok(()); + } + assert!( + text.contains("--sandbox inherit requested but no client sandbox state was provided"), + "expected missing sandbox-state-meta error, got: {text}" + ); + assert_eq!( + result.is_error, + Some(true), + "expected missing sandbox-state-meta reset to be reported as an MCP tool error" + ); + session.cancel().await?; + Ok(()) +} + +#[tokio::test(flavor = "multi_thread")] +async fn sandbox_inherit_repl_reset_uses_state_meta() -> TestResult<()> { + let _guard = test_guard(); + let temp = tempdir()?; + let session = spawn_inherit_server(temp.path()).await?; + let result = session + .call_tool_raw_with_meta( + "repl_reset", + json!({}), + Some(workspace_write_meta(temp.path())), + ) + .await?; + let text = collect_text(&result); + if backend_unavailable(&text) { + eprintln!("sandbox_state_updates backend unavailable in this environment; skipping"); + session.cancel().await?; + return Ok(()); + } + assert!( + text.contains("new session started"), + "expected repl_reset with sandbox metadata to succeed, got: {text}" + ); + session.cancel().await?; + Ok(()) +} + +#[tokio::test(flavor = "multi_thread")] +async fn sandbox_inherit_repl_reset_does_not_spawn_worker_just_to_stage_state() -> TestResult<()> { + let _guard = test_guard(); + let temp = tempdir()?; + let debug_dir = temp.path().join("debug"); + let session = spawn_inherit_server_with_env( + temp.path(), + vec![( + "MCP_REPL_DEBUG_DIR".to_string(), + debug_dir.to_string_lossy().to_string(), + )], + ) + .await?; + let result = session + .call_tool_raw_with_meta( + "repl_reset", + json!({}), + Some(workspace_write_meta(temp.path())), + ) + .await?; + let text = collect_text(&result); + assert!( + text.contains("new session started"), + "expected repl_reset with sandbox metadata to succeed, got: {text}" + ); + session.cancel().await?; + + let events = latest_debug_events(&debug_dir)?; + let saw_restart = events + .iter() + .any(|entry| entry["event"] == "worker_restart_begin"); + assert!(saw_restart, "expected repl_reset to emit a restart event"); + let saw_spawn = events + .iter() + .any(|entry| entry["event"] == "worker_spawn_begin"); + assert!( + !saw_spawn, + "did not expect repl_reset to spawn a worker just to stage sandbox metadata" + ); + Ok(()) +} + +#[tokio::test(flavor = "multi_thread")] +async fn sandbox_inherit_ctrl_d_does_not_spawn_worker_just_to_stage_state() -> TestResult<()> { + let _guard = test_guard(); + let temp = tempdir()?; + let debug_dir = temp.path().join("debug"); + let session = spawn_inherit_server_with_env( + temp.path(), + vec![( + "MCP_REPL_DEBUG_DIR".to_string(), + debug_dir.to_string_lossy().to_string(), + )], + ) + .await?; + let result = session + .write_stdin_raw_with_meta("\u{4}", Some(2.0), Some(workspace_write_meta(temp.path()))) + .await?; + let text = collect_text(&result); + assert!( + text.contains("new session started"), + "expected bare Ctrl-D with sandbox metadata to restart the session, got: {text}" + ); + session.cancel().await?; + + let events = latest_debug_events(&debug_dir)?; + let saw_restart = events + .iter() + .any(|entry| entry["event"] == "worker_restart_begin"); + assert!(saw_restart, "expected bare Ctrl-D to emit a restart event"); + let saw_spawn = events + .iter() + .any(|entry| entry["event"] == "worker_spawn_begin"); + assert!( + !saw_spawn, + "did not expect bare Ctrl-D to spawn a worker just to stage sandbox metadata" + ); + Ok(()) +} + +#[tokio::test(flavor = "multi_thread")] +async fn sandbox_inherit_first_ctrl_d_tail_stages_current_meta_before_restart() -> TestResult<()> { + let _guard = test_guard(); + let temp = tempdir()?; + let session = spawn_inherit_server(temp.path()).await?; + let result = session + .write_stdin_raw_with_meta( + "\u{4}1+1", + Some(2.0), + Some(workspace_write_meta(temp.path())), + ) + .await?; + let text = collect_text(&result); + session.cancel().await?; + + if backend_unavailable(&text) { + eprintln!("sandbox_state_updates backend unavailable in this environment; skipping"); + return Ok(()); + } + assert!( + text.contains("new session started"), + "expected Ctrl-D tail to restart before running tail input, got: {text}" + ); + assert!( + text.contains("[1] 2"), + "expected Ctrl-D tail to run with current sandbox metadata, got: {text}" + ); + assert!( + !text.contains(MISSING_INHERITED_STATE_MESSAGE), + "did not expect valid current metadata to fail closed, got: {text}" + ); + Ok(()) +} + +#[cfg(unix)] +#[tokio::test(flavor = "multi_thread")] +async fn sandbox_inherit_pending_ctrl_c_tail_stages_current_meta_before_session_end_reset() +-> TestResult<()> { + let _guard = test_guard(); + let temp = tempdir()?; + let debug_dir = temp.path().join("debug"); + let session = spawn_inherit_files_server( + temp.path(), + vec![( + "MCP_REPL_DEBUG_DIR".to_string(), + debug_dir.to_string_lossy().to_string(), + )], + ) + .await?; + + let timeout = session + .write_stdin_raw_with_meta( + interrupt_then_exit_code(), + Some(0.2), + Some(read_only_meta(temp.path())), + ) + .await?; + let timeout_text = collect_text(&timeout); + if backend_unavailable(&timeout_text) { + eprintln!("sandbox_state_updates backend unavailable in this environment; skipping"); + session.cancel().await?; + return Ok(()); + } + assert!( + timeout_text.contains("< TestResult<()> { + let _guard = test_guard(); + let temp = tempdir()?; + let debug_dir = temp.path().join("debug"); + let session = spawn_inherit_files_server( + temp.path(), + vec![( + "MCP_REPL_DEBUG_DIR".to_string(), + debug_dir.to_string_lossy().to_string(), + )], + ) + .await?; + + let timeout = session + .write_stdin_raw_with_meta( + interrupt_then_exit_code(), + Some(0.2), + Some(workspace_write_meta(temp.path())), + ) + .await?; + let timeout_text = collect_text(&timeout); + if backend_unavailable(&timeout_text) { + eprintln!("sandbox_state_updates backend unavailable in this environment; skipping"); + session.cancel().await?; + return Ok(()); + } + assert!( + timeout_text.contains("< TestResult<()> +{ + let _guard = test_guard(); + let temp = tempdir()?; + let debug_dir = temp.path().join("debug"); + let session = spawn_inherit_files_server( + temp.path(), + vec![( + "MCP_REPL_DEBUG_DIR".to_string(), + debug_dir.to_string_lossy().to_string(), + )], + ) + .await?; + + let timeout = session + .write_stdin_raw_with_meta( + interrupt_then_prompt_code(), + Some(0.2), + Some(workspace_write_meta(temp.path())), + ) + .await?; + let timeout_text = collect_text(&timeout); + if backend_unavailable(&timeout_text) { + eprintln!("sandbox_state_updates backend unavailable in this environment; skipping"); + session.cancel().await?; + return Ok(()); + } + assert!( + timeout_text.contains("< TestResult<()> +{ + let _guard = test_guard(); + let temp = tempdir()?; + let debug_dir = temp.path().join("debug"); + let session = spawn_inherit_files_server( + temp.path(), + vec![( + "MCP_REPL_DEBUG_DIR".to_string(), + debug_dir.to_string_lossy().to_string(), + )], + ) + .await?; + + let timeout = session + .write_stdin_raw_with_meta( + timeout_then_exit_code(), + Some(0.05), + Some(workspace_write_meta(temp.path())), + ) + .await?; + let timeout_text = collect_text(&timeout); + if backend_unavailable(&timeout_text) { + eprintln!("sandbox_state_updates backend unavailable in this environment; skipping"); + session.cancel().await?; + return Ok(()); + } + assert!( + timeout_text.contains("< TestResult<()> +{ + let _guard = test_guard(); + let temp = tempdir()?; + let session = spawn_inherit_files_server(temp.path(), Vec::new()).await?; + + let timeout = session + .write_stdin_raw_with_meta( + timeout_then_exit_code(), + Some(0.05), + Some(workspace_write_meta(temp.path())), + ) + .await?; + let timeout_text = collect_text(&timeout); + if backend_unavailable(&timeout_text) { + eprintln!("sandbox_state_updates backend unavailable in this environment; skipping"); + session.cancel().await?; + return Ok(()); + } + assert!( + timeout_text.contains("< TestResult<()> { + let _guard = test_guard(); + let temp = tempdir()?; + let session = spawn_inherit_server(temp.path()).await?; + let first = session + .write_stdin_raw_with_meta("x <- 1", Some(2.0), Some(workspace_write_meta(temp.path()))) + .await?; + let first_text = collect_text(&first); + if backend_unavailable(&first_text) { + eprintln!("sandbox_state_updates backend unavailable in this environment; skipping"); + session.cancel().await?; + return Ok(()); + } + + let restart_error = session.write_stdin_raw_with("\u{4}", Some(2.0)).await?; + assert_eq!( + restart_error.is_error, + Some(true), + "expected missing metadata Ctrl-D to be reported as an MCP tool error" + ); + let restart_error_text = collect_text(&restart_error); + assert!( + restart_error_text.contains(MISSING_INHERITED_STATE_MESSAGE), + "expected missing metadata error after bare Ctrl-D, got: {restart_error_text}" + ); + assert!( + !restart_error_text.contains("new session started"), + "did not expect missing metadata Ctrl-D to reset under stale state, got: {restart_error_text}" + ); + + let probe = session + .write_stdin_raw_with_meta( + variable_probe_code(), + Some(2.0), + Some(workspace_write_meta(temp.path())), + ) + .await?; + let probe_text = collect_text(&probe); + session.cancel().await?; + + assert!( + probe_text.contains("X_EXISTS:TRUE"), + "expected missing metadata Ctrl-D to preserve the existing session, got: {probe_text}" + ); + Ok(()) +} + +#[tokio::test(flavor = "multi_thread")] +async fn sandbox_inherit_idle_ctrl_d_with_bad_meta_does_not_restart() -> TestResult<()> { + let _guard = test_guard(); + let temp = tempdir()?; + let session = spawn_inherit_server(temp.path()).await?; + let first = session + .write_stdin_raw_with_meta("x <- 1", Some(2.0), Some(workspace_write_meta(temp.path()))) + .await?; + let first_text = collect_text(&first); + if backend_unavailable(&first_text) { + eprintln!("sandbox_state_updates backend unavailable in this environment; skipping"); + session.cancel().await?; + return Ok(()); + } + + let restart_error = session + .write_stdin_raw_with_meta( + "\u{4}", + Some(2.0), + Some(json!({ SANDBOX_STATE_META_CAPABILITY: "invalid" })), + ) + .await?; + assert_eq!( + restart_error.is_error, + Some(true), + "expected malformed metadata Ctrl-D to be reported as an MCP tool error" + ); + let restart_error_text = collect_text(&restart_error); + assert!( + restart_error_text.contains("failed to parse Codex sandbox state metadata"), + "expected malformed metadata error after bare Ctrl-D, got: {restart_error_text}" + ); + assert!( + !restart_error_text.contains("new session started"), + "did not expect malformed metadata Ctrl-D to reset under stale state, got: {restart_error_text}" + ); + + let probe = session + .write_stdin_raw_with_meta( + variable_probe_code(), + Some(2.0), + Some(workspace_write_meta(temp.path())), + ) + .await?; + let probe_text = collect_text(&probe); + session.cancel().await?; + + assert!( + probe_text.contains("X_EXISTS:TRUE"), + "expected malformed metadata Ctrl-D to preserve the existing session, got: {probe_text}" + ); + Ok(()) +} + +#[tokio::test(flavor = "multi_thread")] +async fn sandbox_inherit_idle_ctrl_d_tail_with_bad_meta_does_not_run_tail() -> TestResult<()> { + let _guard = test_guard(); + let temp = tempdir()?; + let session = spawn_inherit_server(temp.path()).await?; + let first = session + .write_stdin_raw_with_meta("1+1", Some(2.0), Some(workspace_write_meta(temp.path()))) + .await?; + let first_text = collect_text(&first); + if backend_unavailable(&first_text) { + eprintln!("sandbox_state_updates backend unavailable in this environment; skipping"); + session.cancel().await?; + return Ok(()); + } + + let restart_error = session + .write_stdin_raw_with_meta( + "\u{4}x <- 2", + Some(2.0), + Some(json!({ SANDBOX_STATE_META_CAPABILITY: "invalid" })), + ) + .await?; + assert_eq!( + restart_error.is_error, + Some(true), + "expected malformed metadata Ctrl-D tail to be reported as an MCP tool error" + ); + let restart_error_text = collect_text(&restart_error); + assert!( + restart_error_text.contains("failed to parse Codex sandbox state metadata"), + "expected malformed metadata error after bare Ctrl-D tail, got: {restart_error_text}" + ); + assert!( + !restart_error_text.contains("new session started"), + "did not expect malformed metadata Ctrl-D tail to reset under stale state, got: {restart_error_text}" + ); + + let probe = session + .write_stdin_raw_with_meta( + variable_probe_code(), + Some(2.0), + Some(workspace_write_meta(temp.path())), + ) + .await?; + let probe_text = collect_text(&probe); + session.cancel().await?; + + assert!( + probe_text.contains("X_EXISTS:FALSE"), + "expected malformed metadata Ctrl-D tail to avoid running fresh tail input, got: {probe_text}" ); - session.cancel().await?; Ok(()) } diff --git a/tests/snapshots/write_stdin_batch__write_stdin_files_multidrain_plot_then_later_stdout_snapshot.snap b/tests/snapshots/write_stdin_batch__write_stdin_files_multidrain_plot_then_later_stdout_snapshot.snap deleted file mode 100644 index b789f259..00000000 --- a/tests/snapshots/write_stdin_batch__write_stdin_files_multidrain_plot_then_later_stdout_snapshot.snap +++ /dev/null @@ -1,58 +0,0 @@ ---- -source: tests/write_stdin_batch.rs -expression: rendered ---- -== session: files_multidrain_plot_then_later_stdout == --- step 1 -- -call: -{ - "tool": "r_repl", - "arguments": { - "input": "plot(1:10)\nSys.sleep(2)\ncat('done\\n')\n", - "timeout_ms": 200 - } -} -response: -{ - "type": "tool_result", - "is_error": false, - "content": [ - { - "type": "image", - "mime_type": "image/png", - "data_len": 0 - }, - { - "type": "text", - "text": "> plot(1:10)\n> Sys.sleep(2)" - }, - { - "type": "text", - "text": "<>" - } - ] -} --- step 2 -- -call: -{ - "tool": "r_repl", - "arguments": { - "input": "", - "timeout_ms": 10000 - } -} -response: -{ - "type": "tool_result", - "is_error": false, - "content": [ - { - "type": "text", - "text": "done" - }, - { - "type": "text", - "text": "> " - } - ] -} diff --git a/tests/snapshots/write_stdin_batch__write_stdin_files_multidrain_plot_then_later_stdout_snapshot@transcript.snap b/tests/snapshots/write_stdin_batch__write_stdin_files_multidrain_plot_then_later_stdout_snapshot@transcript.snap deleted file mode 100644 index 1c1bd2b9..00000000 --- a/tests/snapshots/write_stdin_batch__write_stdin_files_multidrain_plot_then_later_stdout_snapshot@transcript.snap +++ /dev/null @@ -1,15 +0,0 @@ ---- -source: tests/write_stdin_batch.rs -expression: transcript ---- -== session: files_multidrain_plot_then_later_stdout == -1) r_repl timeout_ms=200 ->>> plot(1:10) ->>> Sys.sleep(2) ->>> cat('done\n') -<<< [image/png len=0] -<<< <> - -2) r_repl timeout_ms=10000 ->>> -<<< done diff --git a/tests/write_stdin_batch.rs b/tests/write_stdin_batch.rs index e82e24de..eba62db7 100644 --- a/tests/write_stdin_batch.rs +++ b/tests/write_stdin_batch.rs @@ -45,6 +45,15 @@ fn collect_text(result: &rmcp::model::CallToolResult) -> String { .join("") } +#[cfg(not(windows))] +fn count_images(result: &rmcp::model::CallToolResult) -> usize { + result + .content + .iter() + .filter(|item| matches!(item.raw, rmcp::model::RawContent::Image(_))) + .count() +} + fn backend_unavailable(text: &str) -> bool { text.contains("Fatal error: cannot create 'R_TempDir'") || text.contains("failed to start R session") @@ -141,45 +150,62 @@ async fn write_stdin_timeout_then_busy_then_recovers() -> TestResult<()> { #[tokio::test(flavor = "multi_thread")] async fn write_stdin_files_multidrain_plot_then_later_stdout_snapshot() -> TestResult<()> { let _guard = lock_test_mutex(); - let mut snapshot = McpSnapshot::new(); + let session = common::spawn_server_with_files().await?; - snapshot - .files_session( - "files_multidrain_plot_then_later_stdout", - mcp_session!(|session| { - session - .call_tool( - session.repl_tool_name(), - json!({ - "input": "plot(1:10)\nSys.sleep(2)\ncat('done\\n')\n", - "timeout_ms": 200 - }), - ) - .await; - session - .call_tool( - session.repl_tool_name(), - json!({ - "input": "", - "timeout_ms": 10000 - }), - ) - .await; - Ok(()) + let first = session + .call_tool_raw( + session.repl_tool_name(), + json!({ + "input": "plot(1:10)\nSys.sleep(2)\ncat('done\\n')\n", + "timeout_ms": 200 + }), + ) + .await?; + let first_text = collect_text(&first); + if backend_unavailable(&first_text) { + eprintln!("write_stdin_batch backend unavailable in this environment; skipping"); + session.cancel().await?; + return Ok(()); + } + + let second = session + .call_tool_raw( + session.repl_tool_name(), + json!({ + "input": "", + "timeout_ms": 10000 }), ) .await?; + let second_text = collect_text(&second); + session.cancel().await?; + + let combined_text = format!("{first_text}\n{second_text}"); + let total_images = count_images(&first) + count_images(&second); - assert_snapshot_or_skip( - "write_stdin_files_multidrain_plot_then_later_stdout_snapshot", - &snapshot, - ) + assert!( + first_text.contains("< TestResult<()> { - let mut session = common::spawn_server().await?; + let session = common::spawn_server().await?; let first = session .write_stdin_raw_with( @@ -274,7 +300,7 @@ async fn write_stdin_pager_hits() -> TestResult<()> { #[tokio::test(flavor = "multi_thread")] async fn write_stdin_recovers_after_error() -> TestResult<()> { - let mut session = common::spawn_server().await?; + let session = common::spawn_server().await?; let _ = session .write_stdin_raw_with("stop('boom')", Some(10.0)) .await?; @@ -302,7 +328,7 @@ async fn write_stdin_recovers_after_error() -> TestResult<()> { #[tokio::test(flavor = "multi_thread")] async fn write_stdin_drops_huge_echo_only_inputs() -> TestResult<()> { - let mut session = common::spawn_server().await?; + let session = common::spawn_server().await?; let input = (1..=2_000) .map(|idx| format!("x{idx} <- {idx}\n")) @@ -334,7 +360,7 @@ async fn write_stdin_drops_huge_echo_only_inputs() -> TestResult<()> { #[tokio::test(flavor = "multi_thread")] async fn write_stdin_trims_huge_leading_echo_prefix_and_preserves_later_echo() -> TestResult<()> { - let mut session = common::spawn_server_with_files().await?; + let session = common::spawn_server_with_files().await?; let mut input = String::new(); for idx in 1..=1_000 { diff --git a/tests/write_stdin_behavior.rs b/tests/write_stdin_behavior.rs index 65c05114..da190b22 100644 --- a/tests/write_stdin_behavior.rs +++ b/tests/write_stdin_behavior.rs @@ -241,7 +241,7 @@ async fn spawn_pager_behavior_session(page_chars: u64) -> TestResult TestResult<()> { let _guard = lock_test_mutex(); - let mut session = spawn_behavior_session().await?; + let session = spawn_behavior_session().await?; let _ = session .write_stdin_raw_with("Sys.sleep(2)", Some(0.1)) @@ -268,7 +268,7 @@ async fn write_stdin_discards_when_busy() -> TestResult<()> { #[tokio::test(flavor = "multi_thread")] async fn write_stdin_trims_continuation_echo_prefix() -> TestResult<()> { let _guard = lock_test_mutex(); - let mut session = spawn_behavior_session().await?; + let session = spawn_behavior_session().await?; let result = session.write_stdin_raw_with("1+\n1", Some(30.0)).await?; let text = result_text(&result); @@ -298,7 +298,7 @@ async fn write_stdin_trims_continuation_echo_prefix() -> TestResult<()> { #[tokio::test(flavor = "multi_thread")] async fn write_stdin_trims_full_noninterleaved_multiexpression_echo_prefix() -> TestResult<()> { let _guard = lock_test_mutex(); - let mut session = spawn_behavior_session().await?; + let session = spawn_behavior_session().await?; let result = session .write_stdin_raw_with("x <- 1\nx + 1", Some(30.0)) @@ -331,7 +331,7 @@ async fn write_stdin_trims_full_noninterleaved_multiexpression_echo_prefix() -> #[tokio::test(flavor = "multi_thread")] async fn write_stdin_drops_echo_only_multiexpression_reply() -> TestResult<()> { let _guard = lock_test_mutex(); - let mut session = spawn_behavior_session().await?; + let session = spawn_behavior_session().await?; let result = session .write_stdin_raw_with("x <- 1\ny <- 2", Some(30.0)) @@ -356,7 +356,7 @@ async fn write_stdin_drops_echo_only_multiexpression_reply() -> TestResult<()> { #[tokio::test(flavor = "multi_thread")] async fn write_stdin_preserves_later_echo_when_output_is_interleaved() -> TestResult<()> { let _guard = lock_test_mutex(); - let mut session = spawn_behavior_session().await?; + let session = spawn_behavior_session().await?; let result = session .write_stdin_raw_with("cat('A\\n')\n1+1", Some(30.0)) @@ -450,7 +450,7 @@ async fn write_stdin_trims_matched_readline_transcripts() -> TestResult<()> { #[tokio::test(flavor = "multi_thread")] async fn write_stdin_does_not_treat_colon_input_as_pager_command_by_default() -> TestResult<()> { let _guard = lock_test_mutex(); - let mut session = spawn_behavior_session().await?; + let session = spawn_behavior_session().await?; let result = session.write_stdin_raw_with(":q", Some(10.0)).await?; let text = result_text(&result); @@ -473,7 +473,7 @@ async fn write_stdin_does_not_treat_colon_input_as_pager_command_by_default() -> #[tokio::test(flavor = "multi_thread")] async fn write_stdin_mixed_stdout_stderr() -> TestResult<()> { let _guard = lock_test_mutex(); - let mut session = spawn_behavior_session().await?; + let session = spawn_behavior_session().await?; let result = session .write_stdin_raw_with( @@ -501,7 +501,7 @@ async fn write_stdin_mixed_stdout_stderr() -> TestResult<()> { #[tokio::test(flavor = "multi_thread")] async fn write_stdin_normalizes_error_prompt() -> TestResult<()> { let _guard = lock_test_mutex(); - let mut session = spawn_behavior_session().await?; + let session = spawn_behavior_session().await?; let result = session .write_stdin_raw_with("cat('> Error: boom\\n'); message('boom')", Some(30.0)) @@ -673,7 +673,7 @@ async fn text_only_oversized_reply_uses_output_bundle_dir() -> TestResult<()> { async fn timeout_output_bundle_backfills_earlier_worker_text_and_excludes_timeout_marker() -> TestResult<()> { let _guard = lock_test_mutex(); - let mut session = spawn_behavior_session().await?; + let session = spawn_behavior_session().await?; let input = "big <- paste(rep('x', 120), collapse = ''); cat('start\\n'); flush.console(); Sys.sleep(0.2); for (i in 1:80) cat(sprintf('mid%03d %s\\n', i, big)); flush.console(); Sys.sleep(0.1); cat('end\\n')"; let first = session.write_stdin_raw_with(input, Some(0.05)).await?; @@ -728,7 +728,7 @@ async fn timeout_output_bundle_backfills_earlier_worker_text_and_excludes_timeou async fn timeout_output_bundle_is_disclosed_only_after_poll_crosses_hard_spill_threshold() -> TestResult<()> { let _guard = lock_test_mutex(); - let mut session = spawn_behavior_session().await?; + let session = spawn_behavior_session().await?; // Keep the oversized output comfortably behind the initial 50 ms timeout. // The worker timeout path polls in 50 ms slices, so a narrower gap can make @@ -994,7 +994,7 @@ async fn pager_busy_follow_up_reuses_hidden_timeout_bundle_when_it_first_spills( #[tokio::test(flavor = "multi_thread")] async fn timeout_spill_file_path_stays_stable_across_later_small_poll() -> TestResult<()> { let _guard = lock_test_mutex(); - let mut session = spawn_behavior_session().await?; + let session = spawn_behavior_session().await?; let input = "big <- paste(rep('y', 120), collapse = ''); cat('start\\n'); flush.console(); Sys.sleep(0.2); for (i in 1:80) cat(sprintf('mid%03d %s\\n', i, big)); flush.console(); Sys.sleep(0.35); cat('tail\\n')"; let first = session.write_stdin_raw_with(input, Some(0.05)).await?; @@ -1052,7 +1052,7 @@ async fn timeout_spill_file_path_stays_stable_across_later_small_poll() -> TestR async fn timeout_bundle_file_creation_failure_preserves_inline_content() -> TestResult<()> { let _guard = lock_test_mutex(); let temp = tempdir()?; - let mut session = + let session = spawn_behavior_session_with_env_vars(output_bundle_temp_env_vars(temp.path())).await?; let input = "big <- paste(rep('z', 120), collapse = ''); cat('start\\n'); flush.console(); Sys.sleep(0.2); for (i in 1:80) cat(sprintf('mid%03d %s\\n', i, big)); flush.console(); Sys.sleep(0.1); cat('end\\n')"; @@ -1099,7 +1099,7 @@ async fn timeout_bundle_file_creation_failure_preserves_inline_content() -> Test async fn hidden_timeout_bundle_is_removed_after_request_finishes_inline() -> TestResult<()> { let _guard = lock_test_mutex(); let temp = tempdir()?; - let mut session = + let session = spawn_behavior_session_with_env_vars(output_bundle_temp_env_vars(temp.path())).await?; let first = session @@ -1154,7 +1154,7 @@ async fn hidden_timeout_bundle_is_removed_after_request_finishes_inline() -> Tes #[tokio::test(flavor = "multi_thread")] async fn timeout_bundle_stops_before_ctrl_d_restart_output() -> TestResult<()> { let _guard = lock_test_mutex(); - let mut session = spawn_behavior_session().await?; + let session = spawn_behavior_session().await?; let input = "big <- paste(rep('q', 120), collapse = ''); cat('start\\n'); flush.console(); Sys.sleep(0.2); for (i in 1:80) cat(sprintf('mid%03d %s\\n', i, big)); flush.console(); Sys.sleep(30); cat('tail\\n')"; let first = session.write_stdin_raw_with(input, Some(0.05)).await?; @@ -1277,7 +1277,7 @@ async fn ctrl_c_follow_up_keeps_detached_tail_out_of_fresh_reply_bundle() -> Tes #[tokio::test(flavor = "multi_thread")] async fn disclosed_timeout_bundle_keeps_appending_after_busy_follow_up() -> TestResult<()> { let _guard = lock_test_mutex(); - let mut session = spawn_behavior_session().await?; + let session = spawn_behavior_session().await?; let input = format!( "big <- paste(rep('d', {OVER_HARD_SPILL_TEXT_LEN}), collapse = ''); cat('BIG_START\\n'); cat(big); cat('\\nBIG_END\\n'); flush.console(); Sys.sleep(1.0); cat('TAIL\\n')" @@ -1342,7 +1342,7 @@ async fn disclosed_timeout_bundle_keeps_appending_after_busy_follow_up() -> Test #[tokio::test(flavor = "multi_thread")] async fn disclosed_timeout_bundle_keeps_appending_after_idle_busy_follow_up() -> TestResult<()> { let _guard = lock_test_mutex(); - let mut session = spawn_behavior_session().await?; + let session = spawn_behavior_session().await?; let input = format!( "big <- paste(rep('i', {OVER_HARD_SPILL_TEXT_LEN}), collapse = ''); cat('BIG_START\\n'); cat(big); cat('\\nBIG_END\\n'); flush.console(); Sys.sleep(1.5); cat('TAIL\\n')" @@ -1414,7 +1414,7 @@ async fn disclosed_timeout_bundle_keeps_appending_after_idle_busy_follow_up() -> #[tokio::test(flavor = "multi_thread")] async fn files_empty_poll_after_resolved_timeout_restores_prompt() -> TestResult<()> { let _guard = lock_test_mutex(); - let mut session = spawn_behavior_session().await?; + let session = spawn_behavior_session().await?; let first = session .write_stdin_raw_with("Sys.sleep(0.2); 1+1", Some(0.05)) @@ -1491,7 +1491,7 @@ async fn pager_follow_up_after_resolved_timeout_trims_detached_echo_prefix() -> #[tokio::test(flavor = "multi_thread")] async fn timeout_bundle_stops_before_fresh_follow_up_output() -> TestResult<()> { let _guard = lock_test_mutex(); - let mut session = spawn_behavior_session().await?; + let session = spawn_behavior_session().await?; let input = "big <- paste(rep('n', 120), collapse = ''); cat('start\\n'); flush.console(); Sys.sleep(0.2); for (i in 1:80) cat(sprintf('mid%03d %s\\n', i, big)); flush.console(); Sys.sleep(0.2); cat('tail\\n')"; let first = session.write_stdin_raw_with(input, Some(0.05)).await?; diff --git a/tests/write_stdin_edge_cases.rs b/tests/write_stdin_edge_cases.rs index f56aaf8d..b552f395 100644 --- a/tests/write_stdin_edge_cases.rs +++ b/tests/write_stdin_edge_cases.rs @@ -65,7 +65,7 @@ fn assert_invalid_timeout(err: ServiceError) { #[tokio::test(flavor = "multi_thread")] async fn write_stdin_timeout_zero_is_non_blocking() -> TestResult<()> { let _guard = lock_test_mutex(); - let mut session = common::spawn_server().await?; + let session = common::spawn_server().await?; let timeout_result = session .write_stdin_raw_unterminated_with("1+1", Some(0.0)) @@ -110,7 +110,7 @@ async fn write_stdin_timeout_zero_is_non_blocking() -> TestResult<()> { #[tokio::test(flavor = "multi_thread")] async fn write_stdin_accepts_crlf_input() -> TestResult<()> { let _guard = lock_test_mutex(); - let mut session = common::spawn_server().await?; + let session = common::spawn_server().await?; let input = "cat('A\\n')\r\ncat('B\\n')"; let result = session.write_stdin_raw_with(input, Some(10.0)).await?; @@ -151,7 +151,7 @@ async fn write_stdin_accepts_crlf_input() -> TestResult<()> { #[tokio::test(flavor = "multi_thread")] async fn write_stdin_without_trailing_newline_runs() -> TestResult<()> { let _guard = lock_test_mutex(); - let mut session = common::spawn_server().await?; + let session = common::spawn_server().await?; let result = session .write_stdin_raw_unterminated_with("1+1", Some(10.0)) @@ -173,7 +173,7 @@ async fn write_stdin_without_trailing_newline_runs() -> TestResult<()> { #[tokio::test(flavor = "multi_thread")] async fn write_stdin_empty_returns_prompt() -> TestResult<()> { let _guard = lock_test_mutex(); - let mut session = common::spawn_server().await?; + let session = common::spawn_server().await?; let result = session .write_stdin_raw_unterminated_with("", Some(1.0)) @@ -201,7 +201,7 @@ async fn write_stdin_empty_returns_prompt() -> TestResult<()> { #[tokio::test(flavor = "multi_thread")] async fn empty_poll_after_completed_request_returns_idle_status_and_prompt() -> TestResult<()> { let _guard = lock_test_mutex(); - let mut session = common::spawn_server().await?; + let session = common::spawn_server().await?; let result = session.write_stdin_raw_with("1+1", Some(10.0)).await?; let text = result_text(&result); From 21b6290a25d8ef15132011317183f797c127f594 Mon Sep 17 00:00:00 2001 From: Tomasz Kalinowski Date: Wed, 22 Apr 2026 11:22:55 -0400 Subject: [PATCH 4/5] test: cover Codex CLI metadata wiring --- tests/codex_approvals_tui.rs | 842 +++++++++++++++++- ...x__codex_exec_wire_sandbox_state_meta.snap | 100 +++ ...s__codex_exec_wire_sandbox_state_meta.snap | 100 +++ 3 files changed, 1001 insertions(+), 41 deletions(-) create mode 100644 tests/snapshots/codex_approvals_tui__linux__codex_exec_wire_sandbox_state_meta.snap create mode 100644 tests/snapshots/codex_approvals_tui__macos__codex_exec_wire_sandbox_state_meta.snap diff --git a/tests/codex_approvals_tui.rs b/tests/codex_approvals_tui.rs index 32bfcbba..925cd11d 100644 --- a/tests/codex_approvals_tui.rs +++ b/tests/codex_approvals_tui.rs @@ -2,29 +2,50 @@ mod common; use common::TestResult; -#[cfg(any(target_os = "macos", target_os = "linux"))] +#[cfg(any(target_os = "macos", target_os = "linux", target_os = "windows"))] mod unix_impl { - use super::{TestResult, common}; + use super::TestResult; + #[cfg(any(target_os = "macos", target_os = "linux"))] + use super::common; + #[cfg(any(target_os = "macos", target_os = "linux"))] use portable_pty::{CommandBuilder, PtySize, native_pty_system}; use serde_json::Value; use std::collections::BTreeMap; - use std::io::{ErrorKind, Read, Write}; + #[cfg(any(target_os = "macos", target_os = "linux"))] + use std::io::ErrorKind; + use std::io::Read; + #[cfg(any(target_os = "macos", target_os = "linux"))] + use std::io::Write; use std::net::SocketAddr; + #[cfg(any(target_os = "macos", target_os = "linux"))] use std::os::unix::process::CommandExt; use std::path::{Path, PathBuf}; use std::process::Stdio; + use std::sync::Arc; + #[cfg(any(target_os = "macos", target_os = "linux"))] + use std::sync::Mutex; + use std::sync::OnceLock; + #[cfg(any(target_os = "macos", target_os = "linux"))] use std::sync::mpsc::{Receiver, RecvTimeoutError}; - use std::sync::{Arc, Mutex}; use std::time::{Duration, Instant}; use tokio::io::{AsyncBufReadExt, AsyncReadExt, AsyncWriteExt, BufReader}; use tokio::net::{TcpListener, TcpStream}; + use toml_edit::DocumentMut; + #[cfg(any(target_os = "macos", target_os = "linux"))] use vt100::Parser; const WORKSPACE_WRITE_MARKER: &str = "SANDBOX_TEST_1"; const FULL_ACCESS_MARKER: &str = "SANDBOX_TEST_2"; const WARMUP_MARKER: &str = "WARMUP_TEST"; + const INSTALL_SCRIPTED_TOOL_CALL_MARKER: &str = "INSTALL_SCRIPTED_TOOL_CALL"; + #[cfg(any(target_os = "macos", target_os = "linux"))] const FULL_ACCESS_TEST_ENV: &str = "MCP_REPL_ENABLE_FULL_ACCESS_TUI_TEST"; + fn codex_exec_test_mutex() -> &'static tokio::sync::Mutex<()> { + static TEST_MUTEX: OnceLock> = OnceLock::new(); + TEST_MUTEX.get_or_init(|| tokio::sync::Mutex::new(())) + } + struct IsolatedCodexEnv { _temp_dir: tempfile::TempDir, workspace: PathBuf, @@ -42,10 +63,151 @@ mod unix_impl { run_codex_exec_initial_sandbox_state_for_mode(ExecSnapshotMode::Json).await } + #[cfg(any(target_os = "macos", target_os = "linux"))] pub(super) async fn run_codex_exec_initial_sandbox_state_plain() -> TestResult { run_codex_exec_initial_sandbox_state_for_mode(ExecSnapshotMode::Plain).await } + #[cfg(any(target_os = "macos", target_os = "linux"))] + pub(super) async fn run_codex_exec_wire_sandbox_state_meta() -> TestResult { + if !codex_available() { + eprintln!("codex not found on PATH; skipping"); + return Ok(String::new()); + } + if !loopback_bind_available().await { + eprintln!("loopback TCP bind unavailable; skipping"); + return Ok(String::new()); + } + let Some(python_program) = common::python_program() else { + eprintln!("python not found on PATH; skipping"); + return Ok(String::new()); + }; + let _guard = codex_exec_test_mutex().lock().await; + + let tool_args = tool_args_for_code(&sandbox_run_code()); + let mock_server = + MockResponsesServer::start(tool_name(), tool_args.clone(), Some(tool_args)).await?; + let mcp_repl = resolve_mcp_repl_path()?; + let trace_script = resolve_trace_script_path()?; + let env = create_isolated_codex_env_with_trace( + &mcp_repl, + &trace_script, + python_program, + &mock_server.base_url(), + )?; + let sandbox_mode = codex_exec_sandbox_mode(); + + let cmd = codex_exec_command( + &env, + &mock_server.base_url(), + &format!("{WORKSPACE_WRITE_MARKER}: run the sandbox write test"), + sandbox_mode, + None, + ); + + let output = run_command_with_timeout(cmd, Duration::from_secs(60))?; + let stdout = String::from_utf8(output.stdout) + .map_err(|err| format!("codex exec stdout was not valid UTF-8: {err}"))?; + let stderr = String::from_utf8(output.stderr) + .map_err(|err| format!("codex exec stderr was not valid UTF-8: {err}"))?; + let outputs = mock_server.function_call_outputs().await; + if codex_exec_environment_unavailable(&stdout, &stderr, &outputs) { + eprintln!("codex exec sandbox/backend unavailable in this environment; skipping"); + return Ok(String::new()); + } + if !output.status.success() { + let request_paths = mock_server.request_paths().await; + let last_request = mock_server.last_request().await; + return Err(format!( + "codex exec failed with status {status}\nrequest_paths: {request_paths:?}\nlast_request: {last_request:?}\nstdout:\n{stdout}\nstderr:\n{stderr}", + status = output.status + ) + .into()); + } + + wait_for_log_contains( + &env.debug_dir, + codex_exec_expected_sandbox_log(), + Duration::from_secs(10), + )?; + let saw_write_ok = outputs.iter().any(|out| out.contains("WRITE_OK")); + if !saw_write_ok { + let request_paths = mock_server.request_paths().await; + let last_request = mock_server.last_request().await; + return Err(format!( + "expected workspace-write call to succeed\nrequest_paths: {request_paths:?}\nlast_request: {last_request:?}\nstdout:\n{stdout}\nstderr:\n{stderr}\noutputs: {outputs:?}" + ) + .into()); + } + + render_wire_snapshot(&env.debug_dir, &env.workspace, &env.codex_home) + } + + pub(super) async fn run_install_then_codex_exec_uses_generated_config() -> TestResult<()> { + if !codex_available() { + eprintln!("codex not found on PATH; skipping"); + return Ok(()); + } + if !loopback_bind_available().await { + eprintln!("loopback TCP bind unavailable; skipping"); + return Ok(()); + } + let _guard = codex_exec_test_mutex().lock().await; + + let tool_args = tool_args_for_code("1+1"); + let mock_server = + MockResponsesServer::start_with_first_user_turn_tool_call(tool_name(), tool_args) + .await?; + let mcp_repl = resolve_mcp_repl_path()?; + let env = create_isolated_codex_env_for_install(&mock_server.base_url())?; + let sandbox_mode = codex_exec_sandbox_mode(); + + run_mcp_repl_install_for_codex_r(&mcp_repl, &env.codex_home)?; + assert_codex_install_wrote_r_inherit_config(&env.codex_home, &mcp_repl)?; + + let cmd = codex_exec_command( + &env, + &mock_server.base_url(), + INSTALL_SCRIPTED_TOOL_CALL_MARKER, + sandbox_mode, + Some("--json"), + ); + + let output = run_command_with_timeout(cmd, Duration::from_secs(60))?; + let stdout = String::from_utf8(output.stdout) + .map_err(|err| format!("codex exec stdout was not valid UTF-8: {err}"))?; + let stderr = String::from_utf8(output.stderr) + .map_err(|err| format!("codex exec stderr was not valid UTF-8: {err}"))?; + let outputs = mock_server.function_call_outputs().await; + if codex_exec_environment_unavailable(&stdout, &stderr, &outputs) { + eprintln!("codex exec sandbox/backend unavailable in this environment; skipping"); + return Ok(()); + } + if !output.status.success() { + let request_paths = mock_server.request_paths().await; + let last_request = mock_server.last_request().await; + return Err(format!( + "codex exec with installed config failed with status {status}\nrequest_paths: {request_paths:?}\nlast_request: {last_request:?}\nstdout:\n{stdout}\nstderr:\n{stderr}", + status = output.status + ) + .into()); + } + + assert_exec_output_contains_tool_call(&stdout, "r", "repl", "1+1\n")?; + + let saw_result = outputs.iter().any(|out| out.contains("[1] 2")); + if !saw_result { + let request_paths = mock_server.request_paths().await; + let last_request = mock_server.last_request().await; + return Err(format!( + "expected installed codex config to run r repl and return 2\nrequest_paths: {request_paths:?}\nlast_request: {last_request:?}\nstdout:\n{stdout}\nstderr:\n{stderr}\noutputs: {outputs:?}" + ) + .into()); + } + + Ok(()) + } + async fn run_codex_exec_initial_sandbox_state_for_mode( mode: ExecSnapshotMode, ) -> TestResult { @@ -57,34 +219,26 @@ mod unix_impl { eprintln!("loopback TCP bind unavailable; skipping"); return Ok(String::new()); } + let _guard = codex_exec_test_mutex().lock().await; let tool_args = tool_args_for_code(&sandbox_run_code()); let mock_server = MockResponsesServer::start(tool_name(), tool_args.clone(), Some(tool_args)).await?; let mcp_repl = resolve_mcp_repl_path()?; let env = create_isolated_codex_env(&mcp_repl, &mock_server.base_url())?; + let sandbox_mode = codex_exec_sandbox_mode(); - let prompt = format!("{WORKSPACE_WRITE_MARKER}: run the sandbox write test"); - let mode_flag = match mode { - ExecSnapshotMode::Json => "--json ", - ExecSnapshotMode::Plain => "", - }; - let shell_script = format!( - "codex exec {mode_flag}--sandbox workspace-write --skip-git-repo-check --cd {} {}", - sh_single_quote(&env.workspace.display().to_string()), - sh_single_quote(&prompt), + let cmd = codex_exec_command( + &env, + &mock_server.base_url(), + &format!("{WORKSPACE_WRITE_MARKER}: run the sandbox write test"), + sandbox_mode, + match mode { + ExecSnapshotMode::Json => Some("--json"), + ExecSnapshotMode::Plain => None, + }, ); - let mut cmd = std::process::Command::new("sh"); - cmd.env("CODEX_HOME", env.codex_home.display().to_string()); - cmd.env("CODEX_OSS_BASE_URL", mock_server.base_url()); - cmd.env("MCP_REPL_DEBUG_DIR", env.debug_dir.display().to_string()); - cmd.env("TERM", "xterm-256color"); - cmd.env("LANG", "C"); - cmd.arg("-c"); - cmd.arg(shell_script); - cmd.current_dir(&env.workspace); - let output = run_command_with_timeout(cmd, Duration::from_secs(60))?; let stdout = String::from_utf8(output.stdout) .map_err(|err| format!("codex exec stdout was not valid UTF-8: {err}"))?; @@ -105,7 +259,11 @@ mod unix_impl { .into()); } - wait_for_log_contains(&env.debug_dir, "workspace-write", Duration::from_secs(10))?; + wait_for_log_contains( + &env.debug_dir, + codex_exec_expected_sandbox_log(), + Duration::from_secs(10), + )?; let saw_write_ok = outputs.iter().any(|out| out.contains("WRITE_OK")); if !saw_write_ok { let request_paths = mock_server.request_paths().await; @@ -119,6 +277,7 @@ mod unix_impl { render_exec_snapshot(mode, &stdout, &stderr, &env.workspace, &env.codex_home) } + #[cfg(any(target_os = "macos", target_os = "linux"))] pub(super) async fn run_codex_tui_full_access_sandbox_update() -> TestResult<()> { if !full_access_test_enabled() { eprintln!( @@ -177,11 +336,7 @@ mod unix_impl { "danger-full-access", Duration::from_secs(20), )?; - wait_for_log_contains( - &env.debug_dir, - "codex/sandbox-state/update", - Duration::from_secs(20), - )?; + wait_for_log_contains(&env.debug_dir, "tool-call-meta", Duration::from_secs(20))?; driver.send_line(&format!( "{FULL_ACCESS_MARKER}: probe write after full access" @@ -202,6 +357,7 @@ mod unix_impl { Ok(()) } + #[cfg(any(target_os = "macos", target_os = "linux"))] pub(super) async fn run_mock_rejects_malformed_responses_payload() -> TestResult<()> { let tool_args = tool_args_for_code(&sandbox_run_code()); let mock_server = @@ -238,6 +394,7 @@ mod unix_impl { Ok(()) } + #[cfg(any(target_os = "macos", target_os = "linux"))] fn full_access_test_enabled() -> bool { std::env::var_os(FULL_ACCESS_TEST_ENV).is_some() } @@ -296,9 +453,8 @@ mod unix_impl { .is_ok() } - fn create_isolated_codex_env( - mcp_repl: &Path, - openai_base_url: &str, + fn create_isolated_codex_env_with_config( + build_config: impl FnOnce(&Path) -> String, ) -> TestResult { let temp_dir = tempfile::tempdir()?; let workspace = temp_dir.path().join("workspace"); @@ -320,7 +476,7 @@ mod unix_impl { let debug_dir = temp_dir.path().join("debug"); std::fs::create_dir_all(&debug_dir)?; - let config = codex_config(mcp_repl, &workspace, openai_base_url); + let config = build_config(&workspace); std::fs::write(codex_home.join("config.toml"), config)?; Ok(IsolatedCodexEnv { @@ -331,12 +487,90 @@ mod unix_impl { }) } + fn create_isolated_codex_env( + mcp_repl: &Path, + openai_base_url: &str, + ) -> TestResult { + create_isolated_codex_env_with_config(|workspace| { + codex_config(mcp_repl, workspace, openai_base_url) + }) + } + + #[cfg(any(target_os = "macos", target_os = "linux"))] + fn create_isolated_codex_env_with_trace( + mcp_repl: &Path, + trace_script: &Path, + python_program: &str, + openai_base_url: &str, + ) -> TestResult { + create_isolated_codex_env_with_config(|workspace| { + codex_traced_config( + mcp_repl, + trace_script, + python_program, + workspace, + openai_base_url, + ) + }) + } + + fn create_isolated_codex_env_for_install( + openai_base_url: &str, + ) -> TestResult { + create_isolated_codex_env_with_config(|workspace| { + codex_install_base_config(workspace, openai_base_url) + }) + } + + fn codex_exec_sandbox_mode() -> &'static str { + #[cfg(target_os = "windows")] + { + "danger-full-access" + } + #[cfg(any(target_os = "macos", target_os = "linux"))] + { + "workspace-write" + } + } + + fn codex_exec_expected_sandbox_log() -> &'static str { + codex_exec_sandbox_mode() + } + + fn codex_exec_command( + env: &IsolatedCodexEnv, + base_url: &str, + prompt: &str, + sandbox_mode: &str, + mode_flag: Option<&str>, + ) -> std::process::Command { + let mut cmd = std::process::Command::new("codex"); + cmd.env("CODEX_HOME", env.codex_home.display().to_string()); + cmd.env("CODEX_OSS_BASE_URL", base_url); + cmd.env("MCP_REPL_DEBUG_DIR", env.debug_dir.display().to_string()); + cmd.env("TERM", "xterm-256color"); + cmd.env("LANG", "C"); + cmd.arg("exec"); + if let Some(flag) = mode_flag { + cmd.arg(flag); + } + cmd.arg("--sandbox"); + cmd.arg(sandbox_mode); + cmd.arg("--skip-git-repo-check"); + cmd.arg("--cd"); + cmd.arg(&env.workspace); + cmd.arg(prompt); + cmd.current_dir(&env.workspace); + cmd + } + fn run_command_with_timeout( mut cmd: std::process::Command, timeout: Duration, ) -> TestResult { cmd.stdout(Stdio::piped()); cmd.stderr(Stdio::piped()); + #[cfg(any(target_os = "macos", target_os = "linux"))] cmd.process_group(0); let mut child = cmd.spawn()?; let mut stdout_reader = child @@ -367,10 +601,13 @@ mod unix_impl { break status; } if Instant::now() >= deadline { - let pid = child.id() as i32; - // Ensure the timeout path tears down the whole subtree (shell + codex + children). - unsafe { - libc::killpg(pid, libc::SIGKILL); + #[cfg(any(target_os = "macos", target_os = "linux"))] + { + let pid = child.id() as i32; + // Ensure the timeout path tears down the whole subtree (codex + children). + unsafe { + libc::killpg(pid, libc::SIGKILL); + } } let _ = child.kill(); let _ = child.wait(); @@ -573,6 +810,7 @@ mod unix_impl { out } + #[cfg(any(target_os = "macos", target_os = "linux"))] fn sh_single_quote(value: &str) -> String { format!("'{}'", value.replace('\'', "'\"'\"'")) } @@ -651,6 +889,55 @@ mod unix_impl { Err("unable to locate mcp-repl test binary".into()) } + fn run_mcp_repl_install_for_codex_r(mcp_repl: &Path, codex_home: &Path) -> TestResult<()> { + let output = std::process::Command::new(mcp_repl) + .arg("install") + .arg("--client") + .arg("codex") + .arg("--interpreter") + .arg("r") + .env("CODEX_HOME", codex_home) + .output()?; + if output.status.success() { + return Ok(()); + } + + let stdout = String::from_utf8_lossy(&output.stdout); + let stderr = String::from_utf8_lossy(&output.stderr); + Err(format!( + "mcp-repl install --client codex --interpreter r failed with status {status}\nstdout:\n{stdout}\nstderr:\n{stderr}", + status = output.status + ) + .into()) + } + + fn assert_codex_install_wrote_r_inherit_config( + codex_home: &Path, + mcp_repl: &Path, + ) -> TestResult<()> { + let config_path = codex_home.join("config.toml"); + let text = std::fs::read_to_string(&config_path)?; + let doc = text.parse::()?; + let expected_command = mcp_repl.to_string_lossy().to_string(); + assert_eq!( + doc["mcp_servers"]["r"]["command"].as_str(), + Some(expected_command.as_str()), + "expected install to register the current mcp-repl executable" + ); + let r_args = doc["mcp_servers"]["r"]["args"] + .as_array() + .ok_or_else(|| "expected mcp_servers.r.args array".to_string())?; + let has_sandbox_inherit = r_args + .iter() + .zip(r_args.iter().skip(1)) + .any(|(a, b)| a.as_str() == Some("--sandbox") && b.as_str() == Some("inherit")); + assert!( + has_sandbox_inherit, + "expected installed Codex config to include `--sandbox inherit`" + ); + Ok(()) + } + fn tool_name() -> String { "mcp__r__repl".to_string() } @@ -713,7 +1000,88 @@ responses_websockets = false [mcp_servers.r] command = "{mcp_repl}" +args = ["--sandbox", "inherit"] env_vars = ["MCP_REPL_DEBUG_DIR"] +[projects."{repo_root}"] +trust_level = "trusted" +"#, + ) + } + + #[cfg(any(target_os = "macos", target_os = "linux"))] + fn codex_traced_config( + mcp_repl: &Path, + trace_script: &Path, + python_program: &str, + repo_root: &Path, + openai_base_url: &str, + ) -> String { + let python_program = toml_escape(python_program); + let trace_script = toml_escape(&trace_script.display().to_string()); + let mcp_repl = toml_escape(&mcp_repl.display().to_string()); + let repo_root = toml_escape(&repo_root.display().to_string()); + let openai_base_url = toml_escape(openai_base_url); + format!( + r#"model_provider = "mock-openai" +disable_paste_burst = true +project_doc_max_bytes = 0 + +[model_providers.mock-openai] +name = "Mock OpenAI" +base_url = "{openai_base_url}" +wire_api = "responses" +requires_openai_auth = false +supports_websockets = false + +[notice] +hide_full_access_warning = true + +[tui] +alternate_screen = "never" +animations = false + +[features] +steer = true +remote_models = true +responses_websockets = false + +[mcp_servers.r] +command = "{python_program}" +args = ["{trace_script}", "{mcp_repl}", "--sandbox", "inherit"] +env_vars = ["MCP_REPL_DEBUG_DIR"] +[projects."{repo_root}"] +trust_level = "trusted" +"#, + ) + } + + fn codex_install_base_config(repo_root: &Path, openai_base_url: &str) -> String { + let repo_root = toml_escape(&repo_root.display().to_string()); + let openai_base_url = toml_escape(openai_base_url); + format!( + r#"model_provider = "mock-openai" +disable_paste_burst = true +project_doc_max_bytes = 0 + +[model_providers.mock-openai] +name = "Mock OpenAI" +base_url = "{openai_base_url}" +wire_api = "responses" +requires_openai_auth = false +supports_websockets = false + +[notice] +hide_full_access_warning = true + +[tui] +alternate_screen = "never" +animations = false + +[features] +steer = true +remote_models = true +responses_websockets = false + [projects."{repo_root}"] trust_level = "trusted" "#, @@ -729,6 +1097,7 @@ trust_level = "trusted" .to_string() } + #[cfg(any(target_os = "macos", target_os = "linux"))] fn outside_workspace_probe_code() -> TestResult { let nanos = std::time::SystemTime::now() .duration_since(std::time::UNIX_EPOCH)? @@ -754,6 +1123,19 @@ tryCatch({ .to_string() } + #[cfg(any(target_os = "macos", target_os = "linux"))] + fn resolve_trace_script_path() -> TestResult { + let path = PathBuf::from(env!("CARGO_MANIFEST_DIR")) + .join("scripts") + .join("mcp-stdio-trace.py"); + if path.is_file() { + Ok(path) + } else { + Err(format!("missing trace proxy script at {}", path.display()).into()) + } + } + + #[cfg(any(target_os = "macos", target_os = "linux"))] fn normalize_screen(screen: &str) -> String { fn is_prompt_line(line: &str) -> bool { line.as_bytes().starts_with(&[0xE2, 0x80, 0xBA]) @@ -943,6 +1325,7 @@ tryCatch({ normalized.join("\n").trim_end().to_string() } + #[cfg(any(target_os = "macos", target_os = "linux"))] fn scrub_seconds(line: &str) -> String { let mut out = String::with_capacity(line.len()); let mut chars = line.chars().peekable(); @@ -1056,6 +1439,249 @@ tryCatch({ } } + #[cfg(any(target_os = "macos", target_os = "linux"))] + fn latest_wire_log_path(debug_dir: &Path) -> Option { + let mut sessions = std::fs::read_dir(debug_dir) + .ok()? + .filter_map(|entry| entry.ok().map(|entry| entry.path())) + .filter(|path| path.is_dir()) + .collect::>(); + sessions.sort(); + sessions.last().map(|session| session.join("wire.jsonl")) + } + + #[cfg(any(target_os = "macos", target_os = "linux"))] + fn render_wire_snapshot( + debug_dir: &Path, + workspace: &Path, + codex_home: &Path, + ) -> TestResult { + let wire_path = latest_wire_log_path(debug_dir) + .ok_or_else(|| "missing wire.jsonl trace output".to_string())?; + let contents = std::fs::read_to_string(&wire_path)?; + let frames = extract_wire_messages(&contents)?; + + let initialize_request = frames + .iter() + .find(|(stream, message)| *stream == "stdin" && message["method"] == "initialize") + .map(|(_, message)| message.clone()) + .ok_or_else(|| format!("missing initialize request in {}", wire_path.display()))?; + let initialize_id = initialize_request.get("id").cloned(); + let initialize_response = matching_response(&frames, initialize_id.as_ref(), |message| { + message + .get("result") + .and_then(|result| result.get("capabilities")) + .is_some() + }) + .ok_or_else(|| format!("missing initialize response in {}", wire_path.display()))?; + + let tools_call_request = frames + .iter() + .find(|(stream, message)| *stream == "stdin" && message["method"] == "tools/call") + .map(|(_, message)| message.clone()) + .ok_or_else(|| format!("missing tools/call request in {}", wire_path.display()))?; + let tools_call_id = tools_call_request.get("id").cloned(); + let tools_call_response = matching_response(&frames, tools_call_id.as_ref(), |message| { + message + .get("result") + .and_then(|result| result.get("content")) + .is_some() + }) + .ok_or_else(|| format!("missing tools/call response in {}", wire_path.display()))?; + + let mut snapshot = serde_json::json!({ + "client_to_server": { + "initialize": simplify_wire_request(&initialize_request), + "tools_call": simplify_wire_request(&tools_call_request), + }, + "server_to_client": { + "initialize": simplify_wire_response(&initialize_response), + "tools_call": simplify_wire_response(&tools_call_response), + } + }); + normalize_wire_snapshot_value(&mut snapshot, workspace, codex_home); + Ok(serde_json::to_string_pretty(&snapshot)?) + } + + #[cfg(any(target_os = "macos", target_os = "linux"))] + fn extract_wire_messages(contents: &str) -> TestResult> { + let mut frames = Vec::new(); + for line in contents.lines().filter(|line| !line.trim().is_empty()) { + let record: Value = serde_json::from_str(line)?; + if record["event"] != "stream_chunk" { + continue; + } + let Some(stream) = record + .get("payload") + .and_then(|payload| payload.get("stream")) + .and_then(Value::as_str) + else { + continue; + }; + if stream != "stdin" && stream != "stdout" { + continue; + } + let Some(text_as_json) = record + .get("payload") + .and_then(|payload| payload.get("text_as_json")) + else { + continue; + }; + match text_as_json { + Value::Array(items) => { + for item in items { + if item.is_object() { + frames.push((stream.to_string(), item.clone())); + } + } + } + Value::Object(_) => frames.push((stream.to_string(), text_as_json.clone())), + _ => {} + } + } + Ok(frames) + } + + #[cfg(any(target_os = "macos", target_os = "linux"))] + fn matching_response( + frames: &[(String, Value)], + expected_id: Option<&Value>, + fallback: impl Fn(&Value) -> bool, + ) -> Option { + if let Some(expected_id) = expected_id + && let Some(message) = frames.iter().find_map(|(stream, message)| { + (*stream == "stdout" && message.get("id") == Some(expected_id)) + .then_some(message.clone()) + }) + { + return Some(message); + } + frames.iter().find_map(|(stream, message)| { + (*stream == "stdout" && fallback(message)).then_some(message.clone()) + }) + } + + #[cfg(any(target_os = "macos", target_os = "linux"))] + fn simplify_wire_request(message: &Value) -> Value { + serde_json::json!({ + "jsonrpc": message.get("jsonrpc").cloned().unwrap_or(Value::Null), + "method": message.get("method").cloned().unwrap_or(Value::Null), + "params": message.get("params").cloned().unwrap_or(Value::Null), + }) + } + + #[cfg(any(target_os = "macos", target_os = "linux"))] + fn simplify_wire_response(message: &Value) -> Value { + serde_json::json!({ + "jsonrpc": message.get("jsonrpc").cloned().unwrap_or(Value::Null), + "result": message.get("result").cloned().unwrap_or(Value::Null), + }) + } + + fn normalize_wire_snapshot_value(value: &mut Value, workspace: &Path, codex_home: &Path) { + fn path_matches(path: &[String], suffix: &[&str]) -> bool { + path.len() >= suffix.len() + && path[path.len() - suffix.len()..] + .iter() + .map(String::as_str) + .eq(suffix.iter().copied()) + } + + fn normalize_wire_string(text: &str, workspace: &Path, codex_home: &Path) -> String { + let workspace_display = workspace.display().to_string(); + let workspace_private = format!("/private{workspace_display}"); + let codex_home_display = codex_home.display().to_string(); + let codex_home_private = format!("/private{codex_home_display}"); + let mut normalized = text.to_string(); + for (needle, replacement) in [ + (&workspace_private, ""), + (&workspace_display, ""), + (&codex_home_private, ""), + (&codex_home_display, ""), + ] { + normalized = normalized.replace(needle, replacement); + } + normalize_temp_paths(&normalize_codex_home_path(&normalized)) + } + + fn normalize_inner( + value: &mut Value, + path: &mut Vec, + workspace: &Path, + codex_home: &Path, + ) { + match value { + Value::Object(map) => { + let original = std::mem::take(map); + for (key, mut child) in original { + let normalized_key = normalize_wire_string(&key, workspace, codex_home); + path.push(normalized_key.clone()); + normalize_inner(&mut child, path, workspace, codex_home); + path.pop(); + map.insert(normalized_key, child); + } + } + Value::Array(items) => { + for item in items { + path.push("[]".to_string()); + normalize_inner(item, path, workspace, codex_home); + path.pop(); + } + } + Value::String(text) => { + if path_matches(path, &["clientInfo", "version"]) + || path_matches(path, &["serverInfo", "version"]) + { + *text = "".to_string(); + return; + } + if path.last().is_some_and(|key| key == "session_id") { + *text = "".to_string(); + return; + } + if path.last().is_some_and(|key| key == "turn_id") { + *text = "".to_string(); + return; + } + if path.last().is_some_and(|key| key == "sandbox") { + *text = "".to_string(); + return; + } + if path.last().is_some_and(|key| key == "codexLinuxSandboxExe") { + *text = "".to_string(); + return; + } + *text = normalize_wire_string(text, workspace, codex_home); + } + Value::Null => {} + _ => {} + } + } + + let mut path = Vec::new(); + normalize_inner(value, &mut path, workspace, codex_home); + } + + #[test] + fn normalize_wire_snapshot_preserves_null_codex_linux_sandbox_exe() { + let workspace = std::env::temp_dir().join("mcp-repl-wire-workspace"); + let codex_home = std::env::temp_dir().join("mcp-repl-wire-codex-home"); + let mut value = serde_json::json!({ + "codexLinuxSandboxExe": null + }); + + normalize_wire_snapshot_value(&mut value, &workspace, &codex_home); + + assert_eq!( + value, + serde_json::json!({ + "codexLinuxSandboxExe": null + }), + "wire snapshots should preserve a null Codex Linux helper path" + ); + } + + #[cfg(any(target_os = "macos", target_os = "linux"))] fn detect_cursor_request( chunk: &[u8], carry: &mut Vec, @@ -1079,6 +1705,7 @@ tryCatch({ carry.extend_from_slice(&data[keep..]); } + #[cfg(any(target_os = "macos", target_os = "linux"))] struct CodexPtyDriver { child: Box, writer: Arc>>, @@ -1088,6 +1715,7 @@ tryCatch({ _slave: Box, } + #[cfg(any(target_os = "macos", target_os = "linux"))] impl CodexPtyDriver { fn spawn( codex_home: &Path, @@ -1299,6 +1927,7 @@ tryCatch({ tool_name: String, workspace_write_tool_args: String, full_access_tool_args: Option, + first_user_turn_tool_args: Option, requests: Vec, request_paths: Vec, next_call_ordinal: usize, @@ -1311,6 +1940,34 @@ tryCatch({ tool_name: String, workspace_write_tool_args: String, full_access_tool_args: Option, + ) -> TestResult { + Self::start_with_options( + tool_name, + workspace_write_tool_args, + full_access_tool_args, + None, + ) + .await + } + + async fn start_with_first_user_turn_tool_call( + tool_name: String, + first_user_turn_tool_args: String, + ) -> TestResult { + Self::start_with_options( + tool_name, + String::new(), + None, + Some(first_user_turn_tool_args), + ) + .await + } + + async fn start_with_options( + tool_name: String, + workspace_write_tool_args: String, + full_access_tool_args: Option, + first_user_turn_tool_args: Option, ) -> TestResult { let listener = TcpListener::bind("127.0.0.1:0").await?; let addr = listener.local_addr()?; @@ -1318,6 +1975,7 @@ tryCatch({ tool_name, workspace_write_tool_args, full_access_tool_args, + first_user_turn_tool_args, requests: Vec::new(), request_paths: Vec::new(), next_call_ordinal: 1, @@ -1534,6 +2192,12 @@ tryCatch({ ); } + if has_user_message(body) + && let Some(tool_args) = state.first_user_turn_tool_args.take() + { + return queue_tool_call(body, state, tool_args); + } + if has_user_marker(body, WORKSPACE_WRITE_MARKER) { return queue_tool_call(body, state, state.workspace_write_tool_args.clone()); } @@ -1667,6 +2331,16 @@ tryCatch({ .unwrap_or(false) } + fn has_user_message(body: &Value) -> bool { + let Some(items) = body.get("input").and_then(Value::as_array) else { + return false; + }; + items.iter().rev().any(|item| { + item.get("type").and_then(Value::as_str) == Some("message") + && item.get("role").and_then(Value::as_str) == Some("user") + }) + } + fn has_function_call_output(body: &Value, call_id: &str) -> bool { let Some(items) = body.get("input").and_then(Value::as_array) else { return false; @@ -1699,6 +2373,50 @@ tryCatch({ outputs } + fn assert_exec_output_contains_tool_call( + stdout: &str, + server: &str, + tool: &str, + input: &str, + ) -> TestResult<()> { + let mut saw_json = false; + for line in stdout.lines() { + let trimmed = line.trim(); + if !trimmed.starts_with('{') { + continue; + } + saw_json = true; + let Ok(event) = serde_json::from_str::(trimmed) else { + continue; + }; + let Some(item) = event.get("item") else { + continue; + }; + if item.get("type").and_then(Value::as_str) != Some("mcp_tool_call") { + continue; + } + let Some(arguments) = item.get("arguments") else { + continue; + }; + if item.get("server").and_then(Value::as_str) == Some(server) + && item.get("tool").and_then(Value::as_str) == Some(tool) + && arguments.get("input").and_then(Value::as_str) == Some(input) + { + return Ok(()); + } + } + + let json_note = if saw_json { + "saw json events, but not the expected mcp tool call" + } else { + "stdout did not contain json events" + }; + Err(format!( + "expected codex exec output to include {server}.{tool} with input {input:?}; {json_note}\nstdout:\n{stdout}" + ) + .into()) + } + #[test] fn resolve_tool_call_spec_prefers_namespace_shape_when_present() { let request = serde_json::json!({ @@ -1756,6 +2474,21 @@ mod linux { Ok(()) } + #[tokio::test(flavor = "multi_thread")] + async fn codex_exec_wire_sandbox_state_meta() -> TestResult<()> { + let snapshot = super::unix_impl::run_codex_exec_wire_sandbox_state_meta().await?; + if snapshot.is_empty() { + return Ok(()); + } + insta::assert_snapshot!("codex_exec_wire_sandbox_state_meta", snapshot); + Ok(()) + } + + #[tokio::test(flavor = "multi_thread")] + async fn install_then_codex_exec_uses_generated_config() -> TestResult<()> { + super::unix_impl::run_install_then_codex_exec_uses_generated_config().await + } + #[tokio::test(flavor = "multi_thread")] async fn codex_tui_full_access_sandbox_update() -> TestResult<()> { super::unix_impl::run_codex_tui_full_access_sandbox_update().await @@ -1791,6 +2524,21 @@ mod macos { Ok(()) } + #[tokio::test(flavor = "multi_thread")] + async fn codex_exec_wire_sandbox_state_meta() -> TestResult<()> { + let snapshot = super::unix_impl::run_codex_exec_wire_sandbox_state_meta().await?; + if snapshot.is_empty() { + return Ok(()); + } + insta::assert_snapshot!("codex_exec_wire_sandbox_state_meta", snapshot); + Ok(()) + } + + #[tokio::test(flavor = "multi_thread")] + async fn install_then_codex_exec_uses_generated_config() -> TestResult<()> { + super::unix_impl::run_install_then_codex_exec_uses_generated_config().await + } + #[tokio::test(flavor = "multi_thread")] async fn codex_tui_full_access_sandbox_update() -> TestResult<()> { super::unix_impl::run_codex_tui_full_access_sandbox_update().await @@ -1803,8 +2551,20 @@ mod macos { } #[cfg(target_os = "windows")] -#[test] -fn codex_exec_initial_sandbox_state_windows_stub() -> TestResult<()> { - eprintln!("codex exec sandbox state test is not implemented on Windows; skipping"); - Ok(()) +mod windows { + use super::TestResult; + + #[tokio::test(flavor = "multi_thread")] + async fn codex_exec_initial_sandbox_state() -> TestResult<()> { + let snapshot = super::unix_impl::run_codex_exec_initial_sandbox_state().await?; + if snapshot.is_empty() { + return Ok(()); + } + Ok(()) + } + + #[tokio::test(flavor = "multi_thread")] + async fn install_then_codex_exec_uses_generated_config() -> TestResult<()> { + super::unix_impl::run_install_then_codex_exec_uses_generated_config().await + } } diff --git a/tests/snapshots/codex_approvals_tui__linux__codex_exec_wire_sandbox_state_meta.snap b/tests/snapshots/codex_approvals_tui__linux__codex_exec_wire_sandbox_state_meta.snap new file mode 100644 index 00000000..43eae756 --- /dev/null +++ b/tests/snapshots/codex_approvals_tui__linux__codex_exec_wire_sandbox_state_meta.snap @@ -0,0 +1,100 @@ +--- +source: tests/codex_approvals_tui.rs +assertion_line: 2490 +expression: snapshot +--- +{ + "client_to_server": { + "initialize": { + "jsonrpc": "2.0", + "method": "initialize", + "params": { + "protocolVersion": "2025-06-18", + "capabilities": { + "elicitation": { + "form": {} + } + }, + "clientInfo": { + "name": "codex-mcp-client", + "title": "Codex", + "version": "" + } + } + }, + "tools_call": { + "jsonrpc": "2.0", + "method": "tools/call", + "params": { + "_meta": { + "progressToken": 1, + "x-codex-turn-metadata": { + "session_id": "", + "thread_source": "user", + "turn_id": "", + "workspaces": { + "": { + "has_changes": false + } + }, + "sandbox": "" + }, + "codex/sandbox-state-meta": { + "sandboxPolicy": { + "type": "workspace-write", + "writable_roots": [ + "/memories" + ], + "network_access": false, + "exclude_tmpdir_env_var": false, + "exclude_slash_tmp": false + }, + "codexLinuxSandboxExe": "", + "sandboxCwd": "", + "useLegacyLandlock": false + } + }, + "name": "repl", + "arguments": { + "input": "target <- tempfile(\"mcp-repl-codex\")\ntryCatch({\n writeLines(\"ok\", target)\n cat(\"WRITE_OK\\n\")\n unlink(target)\n}, error = function(e) {\n message(\"WRITE_ERROR:\", conditionMessage(e))\n})\n" + } + } + } + }, + "server_to_client": { + "initialize": { + "jsonrpc": "2.0", + "result": { + "protocolVersion": "2025-06-18", + "capabilities": { + "experimental": { + "codex/sandbox-state-meta": { + "version": "1.0.0" + } + }, + "tools": {} + }, + "serverInfo": { + "name": "rmcp", + "version": "" + } + } + }, + "tools_call": { + "jsonrpc": "2.0", + "result": { + "content": [ + { + "type": "text", + "text": "WRITE_OK\n" + }, + { + "type": "text", + "text": "> " + } + ], + "isError": false + } + } + } +} diff --git a/tests/snapshots/codex_approvals_tui__macos__codex_exec_wire_sandbox_state_meta.snap b/tests/snapshots/codex_approvals_tui__macos__codex_exec_wire_sandbox_state_meta.snap new file mode 100644 index 00000000..b1927e4e --- /dev/null +++ b/tests/snapshots/codex_approvals_tui__macos__codex_exec_wire_sandbox_state_meta.snap @@ -0,0 +1,100 @@ +--- +source: tests/codex_approvals_tui.rs +assertion_line: 2490 +expression: snapshot +--- +{ + "client_to_server": { + "initialize": { + "jsonrpc": "2.0", + "method": "initialize", + "params": { + "protocolVersion": "2025-06-18", + "capabilities": { + "elicitation": { + "form": {} + } + }, + "clientInfo": { + "name": "codex-mcp-client", + "title": "Codex", + "version": "" + } + } + }, + "tools_call": { + "jsonrpc": "2.0", + "method": "tools/call", + "params": { + "_meta": { + "progressToken": 1, + "x-codex-turn-metadata": { + "session_id": "", + "thread_source": "user", + "turn_id": "", + "workspaces": { + "": { + "has_changes": false + } + }, + "sandbox": "" + }, + "codex/sandbox-state-meta": { + "sandboxPolicy": { + "type": "workspace-write", + "writable_roots": [ + "/memories" + ], + "network_access": false, + "exclude_tmpdir_env_var": false, + "exclude_slash_tmp": false + }, + "codexLinuxSandboxExe": null, + "sandboxCwd": "", + "useLegacyLandlock": false + } + }, + "name": "repl", + "arguments": { + "input": "target <- tempfile(\"mcp-repl-codex\")\ntryCatch({\n writeLines(\"ok\", target)\n cat(\"WRITE_OK\\n\")\n unlink(target)\n}, error = function(e) {\n message(\"WRITE_ERROR:\", conditionMessage(e))\n})\n" + } + } + } + }, + "server_to_client": { + "initialize": { + "jsonrpc": "2.0", + "result": { + "protocolVersion": "2025-06-18", + "capabilities": { + "experimental": { + "codex/sandbox-state-meta": { + "version": "1.0.0" + } + }, + "tools": {} + }, + "serverInfo": { + "name": "rmcp", + "version": "" + } + } + }, + "tools_call": { + "jsonrpc": "2.0", + "result": { + "content": [ + { + "type": "text", + "text": "WRITE_OK\n" + }, + { + "type": "text", + "text": "> " + } + ], + "isError": false + } + } + } +} From 33eaaac5e18c25f800beb1fa855ce2065bdfe1ea Mon Sep 17 00:00:00 2001 From: Tomasz Kalinowski Date: Wed, 22 Apr 2026 11:22:59 -0400 Subject: [PATCH 5/5] docs: document per-call inherit metadata --- README.md | 12 +-- docs/architecture.md | 2 +- docs/debugging.md | 11 ++- docs/index.md | 3 +- .../codex-sandbox-state-meta-migration.md | 99 +++++++++++++++++++ docs/sandbox.md | 30 ++++-- docs/testing.md | 2 +- 7 files changed, 140 insertions(+), 19 deletions(-) create mode 100644 docs/plans/completed/codex-sandbox-state-meta-migration.md diff --git a/README.md b/README.md index 2d6b1ff2..0dae74bd 100644 --- a/README.md +++ b/README.md @@ -177,7 +177,7 @@ mcp-repl install --client codex --interpreter r Bare `mcp-repl` defaults to `--oversized-output pager`. `install --client codex` writes `--sandbox inherit --oversized-output files` by default. That -sentinel means `mcp-repl` should inherit sandbox policy updates from Codex for the session while +sentinel means `mcp-repl` should inherit sandbox policy metadata from Codex on each tool call while keeping installed Codex configs on the file-backed oversized-output path. Example `R` REPL Codex config (paths vary by OS/user): @@ -187,8 +187,8 @@ Example `R` REPL Codex config (paths vary by OS/user): command = "/Users/alice/.cargo/bin/mcp-repl" # mcp-repl handles the primary timeout; this higher Codex timeout is only an outer guard. tool_timeout_sec = 1800 -# --sandbox inherit: use sandbox policy updates sent by Codex for this session. -# If no update is sent, mcp-repl exits with an error. +# --sandbox inherit: use sandbox policy metadata sent by Codex on each tool call. +# mcp-repl fails closed if the tool call omits or malforms that metadata. args = [ "--sandbox", "inherit", "--oversized-output", "files", @@ -203,8 +203,8 @@ Example `Python` REPL Codex config: command = "/Users/alice/.cargo/bin/mcp-repl" # mcp-repl handles the primary timeout; this higher Codex timeout is only an outer guard. tool_timeout_sec = 1800 -# --sandbox inherit: use sandbox policy updates sent by Codex for this session. -# If no update is sent, mcp-repl exits with an error. +# --sandbox inherit: use sandbox policy metadata sent by Codex on each tool call. +# mcp-repl fails closed if the tool call omits or malforms that metadata. args = [ "--sandbox", "inherit", "--oversized-output", "files", @@ -213,7 +213,7 @@ args = [ ``` For Claude, `install --client claude` writes to `~/.claude.json` with explicit sandbox mode and -`--oversized-output files` because Claude does not propagate sandbox state updates to MCP servers: +`--oversized-output files` because Claude does not propagate Codex-style sandbox metadata to MCP servers: ```json // ~/.claude.json diff --git a/docs/architecture.md b/docs/architecture.md index 61595869..c51a1997 100644 --- a/docs/architecture.md +++ b/docs/architecture.md @@ -25,7 +25,7 @@ The repository is organized around a few concrete subsystems rather than deep pa ### Sandbox and process isolation -- `src/sandbox.rs`, `src/sandbox_cli.rs`, and `src/windows_sandbox.rs` implement OS-level sandboxing, writable-root policy, and client-driven sandbox updates. +- `src/sandbox.rs`, `src/sandbox_cli.rs`, and `src/windows_sandbox.rs` implement OS-level sandboxing, writable-root policy, and Codex per-tool-call sandbox metadata handling. - The sideband and sandbox contracts are documented in `docs/sandbox.md` and `docs/worker_sideband_protocol.md`. ### Output, images, and debug surfaces diff --git a/docs/debugging.md b/docs/debugging.md index 8d51268a..78e388f9 100644 --- a/docs/debugging.md +++ b/docs/debugging.md @@ -17,10 +17,10 @@ Enable per-startup JSONL logs with either: Each startup creates a fresh session directory under that root. `mcp-repl` writes: -- `events.jsonl` with startup metadata, tool calls, and sandbox custom request events +- `events.jsonl` with startup metadata, tool calls, and parsed sandbox metadata events - `startup.log` for server-side startup trace lines - `worker-startup.log` for worker-side startup trace lines -- `sandbox-state.jsonl` for the initial effective sandbox policy plus later sandbox policy/update payloads +- `sandbox-state.jsonl` for the initial effective sandbox policy plus later tool-call sandbox metadata and effective policy updates Example: @@ -43,7 +43,7 @@ MCP_REPL_DEBUG_DIR=/tmp/mcp-repl-debug mcp-repl --interpreter python ## MCP and sandbox tracing -These switches are useful when the client is sending custom sandbox updates or when the sandbox policy is the thing you are debugging. +These switches are useful when the client is sending Codex sandbox metadata or when the sandbox policy is the thing you are debugging. - `MCP_REPL_DEBUG_DIR=/path/to/debug-root` writes `sandbox-state.jsonl` inside the session directory - `MCP_REPL_KEEP_SESSION_TMPDIR=1` keeps the worker session temp directory after exit so you can inspect it @@ -59,6 +59,11 @@ MCP_REPL_DEBUG_DIR=/tmp/mcp-repl-debug mcp-repl --sandbox inherit `--debug-repl` runs `mcp-repl` as a local interactive driver for the worker instead of as an MCP server. This is the fastest way to reproduce REPL behavior without involving a client. +If you start it with `--sandbox inherit`, the debug REPL bootstraps one local +inherited sandbox snapshot from the current default sandbox state before the +first worker spawn. That keeps the inherit code path debuggable even though +there is no per-tool-call MCP metadata in local debug mode. + Start it with: ```sh diff --git a/docs/index.md b/docs/index.md index b2ebc11d..47073a22 100644 --- a/docs/index.md +++ b/docs/index.md @@ -10,9 +10,10 @@ checked-in execution plans without relying on stale notes. - `docs/output_timeline.md`: server-side model for merging text pipes and sideband events into visible reply order. - `docs/testing.md`: public validation surface and snapshot workflow. - `docs/debugging.md`: debug logs, `--debug-repl`, and wire tracing. -- `docs/sandbox.md`: sandbox modes, writable roots, and client-driven sandbox updates. +- `docs/sandbox.md`: sandbox modes, writable roots, and Codex per-tool-call sandbox metadata. - `docs/worker_sideband_protocol.md`: server/worker IPC contract. - `docs/plans/AGENTS.md`: when to write a checked-in execution plan and where it lives. +- `docs/plans/completed/codex-sandbox-state-meta-migration.md`: completed plan for migrating Codex `--sandbox inherit` from async updates to per-tool-call sandbox metadata. ## Normative Docs diff --git a/docs/plans/completed/codex-sandbox-state-meta-migration.md b/docs/plans/completed/codex-sandbox-state-meta-migration.md new file mode 100644 index 00000000..83c8ebb9 --- /dev/null +++ b/docs/plans/completed/codex-sandbox-state-meta-migration.md @@ -0,0 +1,99 @@ +# Codex Sandbox State Meta Migration + +## Summary + +- Migrate `mcp-repl`'s Codex `--sandbox inherit` integration from the obsolete async sandbox update protocol to Codex's current per-tool-call `_meta["codex/sandbox-state-meta"]` contract. +- Keep `--sandbox inherit` fail-closed: if current Codex does not provide usable sandbox metadata on a tool call, `mcp-repl` must reject the call instead of falling back to a local default. +- Keep explicit sandbox modes such as `--sandbox read-only` and `--sandbox workspace-write` authoritative; Codex metadata must not override them. +- Do not carry backward compatibility for older Codex releases that still depended on the old update channel. + +## Status + +- State: completed +- Last updated: 2026-04-18 +- Current phase: completed + +## Design Intent + +- When `mcp-repl` is configured with `--sandbox inherit`, Codex is the source of truth for the sandbox state. +- `mcp-repl` must not guess, substitute, or silently fall back to its own default sandbox when it is expecting Codex to provide one. +- Security is the main constraint: if Codex intended `read-only`, `mcp-repl` must not run with broader permissions just because sandbox information was late, missing, or malformed. +- MCP startup should stay fast. `mcp-repl` should not block initialization waiting for sandbox information that belongs to a later tool call. +- Waiting, if any, belongs only at the point where a tool call needs sandbox information in order to run safely. +- The repo should track the current public Codex contract and exercise the real Codex binary in integration coverage so protocol drift is surfaced quickly. + +## Motivation + +- The old design assumed Codex would push sandbox state out of band, asynchronously, at session startup. +- That assumption is brittle because it separates sandbox selection from the tool call that actually needs the sandbox. +- When that assumption fails, the failure mode is not just a functional bug. It is a security issue, because the effective sandbox can become broader than what Codex intended. +- The correct shape is request-scoped: if `mcp-repl` is inheriting sandbox policy from Codex, it should only run once it has the sandbox information that applies to that call. +- The simplest safe path is to target current Codex directly instead of layering compatibility logic around an obsolete protocol. + +## Current Direction + +- Treat the current release of Codex as the only target contract for this slice. +- Replace the server-side sandbox update listener with per-tool-call parsing of `_meta["codex/sandbox-state-meta"]`. +- Advertise only the current Codex experimental capability needed for that metadata path. +- Rebuild the public tests around the new contract before changing runtime code. + +## Long-Term Direction + +- The long-term contract should be simple: `mcp-repl` determines the inherited sandbox directly from the Codex tool call that is about to execute. +- Startup should stay fast. `mcp-repl` should not block MCP initialization waiting for sandbox state that belongs to a later tool call. +- Any server state retained between calls should be minimal bookkeeping, not a second sandbox synchronization protocol. + +## Phase Status + +- Phase 0: completed + - Audited the current `inherit` path, documented the protocol shift, and locked the bounded design. +- Phase 1: completed + - Added failing public regressions for metadata-driven sandbox inheritance and fail-closed behavior. +- Phase 2: completed + - Implemented the runtime migration and removed obsolete update-handling code. +- Phase 3: completed + - Refreshed real-Codex integration coverage, docs, and final verification. + +## Locked Decisions + +- Do not implement compatibility shims for older Codex sandbox update behavior. +- Do not infer `read-only` versus `workspace-write` from coarse metadata such as `x-codex-turn-metadata.sandbox`; that signal is not precise enough. +- Do not fall back from missing Codex sandbox metadata to `mcp-repl`'s local default policy. +- Do not let Codex metadata override explicit non-`inherit` CLI sandbox modes. +- Prefer a single happy path: current Codex should supply `codex/sandbox-state-meta` on each tool call, and `mcp-repl` should consume that directly. + +## Outcome + +- `mcp-repl` now advertises `codex/sandbox-state-meta` only when `--sandbox inherit` is configured. +- Later explicit sandbox mode overrides such as `--sandbox inherit --sandbox workspace-write` or `sandbox_mode=workspace-write` cancel inherit-mode metadata requirements, matching the documented later-wins CLI semantics. +- Ordered sandbox plans still validate earlier operations before later mode resets; later-wins resolution does not silently discard earlier invalid CLI/config ops. +- `--debug-repl --sandbox inherit` remains locally usable by bootstrapping one inherited snapshot from the current default sandbox state before the first worker spawn. +- `repl_reset` derives inherited sandbox state from the current tool call's `_meta["codex/sandbox-state-meta"]`. +- Non-empty `repl` calls derive inherited sandbox state from the current tool call's `_meta["codex/sandbox-state-meta"]` before executing fresh code. +- Empty-input `repl` polls ignore per-call sandbox metadata when they can be answered from existing state, but they still apply the current tool call's metadata before spawning a worker to answer an idle call on a fresh session. +- When a prior timed-out request has already settled, `mcp-repl` resolves the stale timeout marker before deciding whether a new non-empty `repl` call is still just a busy follow-up. +- Missing or malformed metadata fails closed with the existing inherit error path. +- Explicit non-`inherit` sandbox modes ignore Codex metadata. +- The old async sandbox update listener and startup settle logic were removed from the active runtime contract. + +## Verification + +- `cargo check` +- `cargo build` +- `cargo clippy --all-targets --all-features -- -D warnings` +- `cargo test` +- `cargo +nightly fmt --all` + +## Notes + +- Current Codex source and live traces both showed the old async update protocol was obsolete for the current release line. +- The migration stayed intentionally single-path: no compatibility layer for older Codex builds. +- Follow-up review fixes tightened the runtime sequencing so sandbox metadata is applied only for fresh execution or worker spawn, not for empty-input polls that are only draining prior output or using an already-running idle session. + +## Decision Log + +- 2026-04-17: Scoped the work to current-release Codex only. Older async sandbox update behavior is out of scope. +- 2026-04-17: Locked `--sandbox inherit` to remain fail-closed. Missing or malformed Codex sandbox metadata must reject the tool call. +- 2026-04-17: Chose per-tool-call `_meta["codex/sandbox-state-meta"]` as the source of truth after inspecting current Codex source and live traces. +- 2026-04-17: Completed the repo migration and verification against the real current Codex integration tests. +- 2026-04-18: Clarified the shipped contract for `repl`: empty-input polls ignore per-call sandbox metadata only when they can be answered from existing state, while fresh non-empty calls resolve stale timeout markers and then apply the current call's sandbox metadata before executing new code. diff --git a/docs/sandbox.md b/docs/sandbox.md index 19ee3fbc..86af22e2 100644 --- a/docs/sandbox.md +++ b/docs/sandbox.md @@ -10,9 +10,25 @@ When no CLI sandbox mode is provided, the default is: - `workspace-write` - `network_access: false` -When `--sandbox inherit` is used, the client must send a sandbox update -(`codex/sandbox-state/update`) before the first worker start/tool call. -If no update is provided, the first tool call fails fast. +When `--sandbox inherit` is used for MCP server operation, the client must +attach per-tool-call sandbox metadata in `_meta["codex/sandbox-state-meta"]`. +That metadata is the source of truth for the tool call that is about to run. If +it is missing or malformed, `mcp-repl` fails closed with `--sandbox inherit +requested but no client sandbox state was provided`. + +`--debug-repl` is the one local-only exception. Because there is no client +metadata channel in that mode, `mcp-repl --debug-repl --sandbox inherit` +bootstraps one local inherited snapshot from the current default sandbox state +before the first worker spawn. + +For `repl`, empty-input polls ignore per-call sandbox metadata when they can be +answered from existing state, such as draining a timed-out request or returning an +idle prompt from an already-running worker. If an empty-input call must spawn a +worker to answer the call, `mcp-repl` applies the current tool call's sandbox +metadata before that spawn. Non-empty `repl` calls resolve any stale timeout +marker first, then apply the current call's sandbox metadata before executing +fresh code. If a timed-out request is still genuinely in flight, follow-up calls +continue servicing that request instead of switching sandboxes mid-flight. The worker also gets a per-session temp directory, exported as: @@ -28,8 +44,8 @@ The worker also gets a per-session temp directory, exported as: `mcp-repl --add-allowed-domain ` - Advanced overrides: `mcp-repl --config key=value` with Codex-shaped keys -- MCP sandbox update method: - `codex/sandbox-state/update` (capability `codex/sandbox-state`) +- MCP sandbox metadata capability: + `codex/sandbox-state-meta` (advertised only when the effective CLI sandbox mode still resolves to `inherit` after later overrides) Operations are applied strictly in CLI argument order. Later operations win. `--sandbox ...` resets the base policy at the point where it appears. @@ -72,8 +88,8 @@ Sandboxing is enforced by a Linux sandbox helper that applies seccomp + Landlock - default Linux worker setup disables network unless explicitly enabled. - `mcp-repl` always uses its own internal Linux sandbox launcher; client-provided helper executable paths are ignored. -- inherited `useLegacyLandlock` updates are translated onto `mcp-repl`'s - internal `bwrap` on/off choice. +- Codex sandbox metadata does not control `mcp-repl`'s optional internal + `bwrap` stage. That remains a local best-effort setting. Optional `bwrap` stage: diff --git a/docs/testing.md b/docs/testing.md index 64d406fd..bb7069d1 100644 --- a/docs/testing.md +++ b/docs/testing.md @@ -9,7 +9,7 @@ This file is the entrypoint for deciding how to verify a change. - `tests/repl_surface.rs` and `tests/python_backend.rs`: IPC ownership coverage. Only the main worker may own sideband fds; user-spawned children must not. `tests/python_backend.rs` also covers detached-idle oversized-output behavior through the public `repl` API. - `tests/server_smoke.rs`: end-to-end MCP session smoke coverage. - `tests/write_stdin_behavior.rs`: timeout polling, oversized text replies, and transcript-file behavior through the public `repl` API. -- `tests/sandbox.rs` and `tests/sandbox_state_updates.rs`: sandbox policy behavior and client-driven updates. +- `tests/sandbox.rs` and `tests/sandbox_state_updates.rs`: sandbox policy behavior and Codex per-tool-call sandbox metadata. - `tests/plot_images.rs` and `tests/python_plot_images.rs`: plot/image behavior through the public tool surface. - `tests/codex_approvals_tui.rs` and `tests/claude_integration.rs`: client integration coverage. - `tests/docs_contracts.rs`: docs map and snapshot-facing documentation contracts.