From b14139ec606dcac8ce5d8744090db2d0d177d650 Mon Sep 17 00:00:00 2001 From: Tomasz Kalinowski Date: Fri, 17 Apr 2026 15:21:53 -0400 Subject: [PATCH 1/8] Make plot image timeout test deterministic --- .../active/one-way-output-bundle-previews.md | 83 +++++++++++++++++++ tests/plot_images.rs | 27 +++++- 2 files changed, 107 insertions(+), 3 deletions(-) create mode 100644 docs/plans/active/one-way-output-bundle-previews.md diff --git a/docs/plans/active/one-way-output-bundle-previews.md b/docs/plans/active/one-way-output-bundle-previews.md new file mode 100644 index 00000000..e2bc55d1 --- /dev/null +++ b/docs/plans/active/one-way-output-bundle-previews.md @@ -0,0 +1,83 @@ +# One-Way Output Bundle Previews + +## Summary + +- Move files-mode output-bundle previews onto a one-way memory-to-disk model. +- Keep the public bundle layout unchanged: `transcript.txt`, `events.log`, `images/`, and `images/history/` remain the server-owned on-disk history surface. +- Stop building later replies by rereading previously written bundle files from disk. +- Keep only the bounded preview state needed for the next reply in memory while spilling the full history to files. + +## Status + +- State: active +- Last updated: 2026-04-17 +- Current phase: planning + +## Current Direction + +- Treat disk as append-only bundle history, not as the source of truth for visible reply previews. +- Refactor `ActiveOutputBundle` so it caches the bounded preview state needed for later polls: + - first preview image + - last preview image + - bounded head text + - bounded tail text + - flags and counters needed to render the existing disclosure notice +- Start with image preview state first, because the current reply path still rereads old image files from the bundle directory when rebuilding later responses. +- Follow with text-preview caching so later small polls do not depend on reconstructing preview state from spilled files. + +## Long-Term Direction + +- The end-state contract is one-way: once content is spilled to the bundle directory, `mcp-repl` does not need to read it back to answer later polls. +- Disk remains the durable history surface for clients and debugging; memory owns the bounded preview shown inline in normal replies. +- Missing or externally modified bundle files should not matter to visible reply construction after the server has already retained the needed preview state in memory. +- Directory recreation for continued appends may still be reasonable when parents disappear, but visible reply generation should not depend on successful rereads of old bundle files. + +## Phase Status + +- Phase 0: active + - Define the one-way contract and the bounded in-memory preview state. +- Phase 1: pending + - Add cached image-preview state to `ActiveOutputBundle` and remove old-image rereads from later reply rendering. +- Phase 2: pending + - Add cached text head/tail preview state and stop reconstructing later previews from spilled file state. +- Phase 3: pending + - Replace tests that depend on disk rereads with tests for the one-way contract. +- Phase 4: pending + - Run full validation and update plan status for any remaining follow-on work. + +## Locked Decisions + +- Do not change the public files-mode bundle layout or path-disclosure contract as part of this refactor. +- Do not add tamper-detection logic for previously written bundle files. +- Do not rely on rereading previously written image files to build later visible replies. +- Keep preview state bounded in memory; do not retain the full spilled history in RAM. +- Preserve the current polling model and the existing distinction between inline preview content and the disclosed bundle path. + +## Open Questions + +- What exact text preview state should be cached in memory? + - Likely one bounded head window plus one bounded tail window, but the final shape should preserve current preview wording and stream ordering. +- Should the preview cache live only on `ActiveOutputBundle`, or should `StagedTimeoutOutput` also keep a lightweight preview summary before a bundle is materialized? +- When an output-bundle directory or subdirectory disappears mid-session, what minimal recreation behavior is worth supporting for continued appends without expanding this slice into generic filesystem recovery logic? +- Can image preview caching reuse the existing `ReplyImage` payloads directly, or should it store a more compact internal representation? + +## Next Safe Slice + +- Add explicit preview-cache fields to `ActiveOutputBundle`. +- Update `append_image()` to maintain first/last preview images in memory when writing new image files. +- Change `compact_output_bundle_items()` so it renders later image previews from the in-memory cache instead of `load_output_bundle_*` disk reads. +- Add focused unit coverage in `src/server/response.rs` that proves later reply rendering does not depend on old bundle image files remaining on disk. +- After the image path is one-way, plan the text-preview cache slice separately inside this same document before changing text compaction behavior. + +## Stop Conditions + +- Stop if the slice requires retaining unbounded image or text history in memory. +- Stop if preserving the current visible preview contract requires a broader redesign of reply materialization than this plan assumes. +- Stop if the work starts changing the public bundle file format, tool descriptions, or polling contract. +- Stop if directory-recreation behavior grows into speculative recovery logic for states the public API does not need to support. + +## Decision Log + +- 2026-04-17: Decided that files-mode output bundles should be one-way from memory to disk. Disk is the server-owned history surface; memory owns the bounded preview needed for later replies. +- 2026-04-17: Scoped the first implementation slice to image preview caching, because the current image bundle reply path still rereads previously written files from disk. +- 2026-04-17: Kept the public bundle layout unchanged for this initiative so the refactor can land without redefining the client-facing files-mode contract. diff --git a/tests/plot_images.rs b/tests/plot_images.rs index 769539a1..acff7177 100644 --- a/tests/plot_images.rs +++ b/tests/plot_images.rs @@ -238,6 +238,13 @@ fn parse_text_event_rows(events: &str) -> Vec { .collect() } +fn parse_image_event_paths(events: &str) -> Vec { + events + .lines() + .filter_map(|line| line.strip_prefix("I ").map(PathBuf::from)) + .collect() +} + fn advance_visible_lines( text: &str, visible_lines: usize, @@ -1609,7 +1616,7 @@ for (i in 1:6) { plot(1:10, main = sprintf("plot%03d", i)) } flush.console() -Sys.sleep(1) +Sys.sleep(2) "#; let first = session.write_stdin_raw_with(input, Some(0.05)).await?; if any_backend_unavailable(&[&first]) { @@ -1618,7 +1625,7 @@ Sys.sleep(1) return Ok(()); } - sleep(Duration::from_millis(600)).await; + sleep(Duration::from_millis(400)).await; let bundled = session.write_stdin_raw_with("", Some(0.05)).await?; let bundled_text = result_text(&bundled); if bundled_text.contains("< Date: Fri, 17 Apr 2026 15:34:43 -0400 Subject: [PATCH 2/8] refactor: cache output bundle image previews in memory --- .../active/one-way-output-bundle-previews.md | 17 ++-- src/server/response.rs | 87 ++++--------------- tests/plot_images.rs | 23 ++++- 3 files changed, 44 insertions(+), 83 deletions(-) diff --git a/docs/plans/active/one-way-output-bundle-previews.md b/docs/plans/active/one-way-output-bundle-previews.md index e2bc55d1..ff19f49c 100644 --- a/docs/plans/active/one-way-output-bundle-previews.md +++ b/docs/plans/active/one-way-output-bundle-previews.md @@ -11,7 +11,7 @@ - State: active - Last updated: 2026-04-17 -- Current phase: planning +- Current phase: implementation ## Current Direction @@ -34,9 +34,9 @@ ## Phase Status -- Phase 0: active +- Phase 0: completed - Define the one-way contract and the bounded in-memory preview state. -- Phase 1: pending +- Phase 1: completed - Add cached image-preview state to `ActiveOutputBundle` and remove old-image rereads from later reply rendering. - Phase 2: pending - Add cached text head/tail preview state and stop reconstructing later previews from spilled file state. @@ -63,11 +63,10 @@ ## Next Safe Slice -- Add explicit preview-cache fields to `ActiveOutputBundle`. -- Update `append_image()` to maintain first/last preview images in memory when writing new image files. -- Change `compact_output_bundle_items()` so it renders later image previews from the in-memory cache instead of `load_output_bundle_*` disk reads. -- Add focused unit coverage in `src/server/response.rs` that proves later reply rendering does not depend on old bundle image files remaining on disk. -- After the image path is one-way, plan the text-preview cache slice separately inside this same document before changing text compaction behavior. +- Decide whether the text-preview slice is needed at all. +- If text preview caching is needed, start that slice only after confirming that the remaining text path still violates the one-way contract in a user-visible way. +- Decide whether text preview caching is needed purely for architectural consistency or whether the current text spill path is already sufficiently one-way. +- If text caching is needed, define the exact bounded head/tail representation before changing text compaction behavior. ## Stop Conditions @@ -81,3 +80,5 @@ - 2026-04-17: Decided that files-mode output bundles should be one-way from memory to disk. Disk is the server-owned history surface; memory owns the bounded preview needed for later replies. - 2026-04-17: Scoped the first implementation slice to image preview caching, because the current image bundle reply path still rereads previously written files from disk. - 2026-04-17: Kept the public bundle layout unchanged for this initiative so the refactor can land without redefining the client-facing files-mode contract. +- 2026-04-17: Began the image-preview implementation by caching the first-history and latest image previews on `ActiveOutputBundle` and moving the public regression toward “later replies do not depend on old bundle image files remaining on disk”. +- 2026-04-17: Completed the image-preview slice. Later image-bundle replies now render from cached preview images in memory instead of rereading old image files from the bundle directory. diff --git a/src/server/response.rs b/src/server/response.rs index 77fa12aa..d3a844a3 100644 --- a/src/server/response.rs +++ b/src/server/response.rs @@ -61,6 +61,8 @@ struct ActiveOutputBundle { current_image_id: Option, current_image_history_number: usize, history_image_count: usize, + first_history_image: Option, + last_image: Option, transcript_bytes: usize, transcript_lines: usize, transcript_has_partial_line: bool, @@ -1013,6 +1015,8 @@ impl OutputStore { current_image_id: None, current_image_history_number: 0, history_image_count: 0, + first_history_image: None, + last_image: None, transcript_bytes: 0, transcript_lines: 0, transcript_has_partial_line: false, @@ -1572,6 +1576,10 @@ impl ActiveOutputBundle { self.current_image_id = Some(image.id.clone()); self.current_image_history_number = history_number; self.history_image_count = self.history_image_count.saturating_add(1); + if image_number == 1 && history_number == 1 { + self.first_history_image = Some(image.clone()); + } + self.last_image = Some(image.clone()); Ok(Some(ReplyItem::Image(image.clone()))) } @@ -1671,15 +1679,12 @@ impl ActiveOutputBundle { } } - fn image_path(&self, index: usize) -> PathBuf { - let stem = format!("{index:03}"); - for extension in ["png", "jpg", "jpeg", "gif", "webp", "svg"] { - let path = self.paths.images_dir.join(format!("{stem}.{extension}")); - if path.exists() { - return path; - } + fn inline_preview_images(&self) -> (Option<&ReplyImage>, Option<&ReplyImage>) { + match self.next_image_number { + 0 => (None, None), + 1 => (self.last_image.as_ref(), None), + _ => (self.first_history_image.as_ref(), self.last_image.as_ref()), } - self.paths.images_dir.join(format!("{stem}.png")) } fn existing_image_alias_path(&self, index: usize) -> Option { @@ -2023,14 +2028,7 @@ fn compact_output_bundle_items(items: &[ReplyItem], bundle: &ActiveOutputBundle) .rposition(|item| matches!(item, ReplyItem::Image(_))); let single_image = first_image_idx.is_some() && last_image_idx == first_image_idx; let mut out = Vec::new(); - let (first_anchor, last_anchor) = match bundle.next_image_number { - 0 => (None, None), - 1 => (load_output_bundle_image_content(bundle, 1), None), - _ => ( - load_output_bundle_history_image_content(bundle, 1, 1), - load_output_bundle_image_content(bundle, bundle.next_image_number), - ), - }; + let (first_anchor, last_anchor) = bundle.inline_preview_images(); let displayed_anchor_count = usize::from(first_anchor.is_some()) + usize::from(last_anchor.is_some()); @@ -2043,7 +2041,7 @@ fn compact_output_bundle_items(items: &[ReplyItem], bundle: &ActiveOutputBundle) out.push(Content::text(head_text.clone())); } if let Some(image) = first_anchor { - out.push(image); + out.push(image_to_content(image)); } out.push(Content::text(build_output_bundle_notice( bundle, @@ -2075,7 +2073,7 @@ fn compact_output_bundle_items(items: &[ReplyItem], bundle: &ActiveOutputBundle) out.push(Content::text(pre_last_text)); } if let Some(image) = last_anchor { - out.push(image); + out.push(image_to_content(image)); } let post_last_text = collect_prefix_text_after(items, last_image_idx, POST_LAST_TEXT_BUDGET); if !post_last_text.is_empty() { @@ -2550,59 +2548,6 @@ fn image_extension(mime_type: &str) -> &str { } } -fn mime_type_from_path(path: &Path) -> String { - match path - .extension() - .and_then(|ext| ext.to_str()) - .unwrap_or_default() - .to_ascii_lowercase() - .as_str() - { - "png" => "image/png".to_string(), - "jpg" | "jpeg" => "image/jpeg".to_string(), - "gif" => "image/gif".to_string(), - "webp" => "image/webp".to_string(), - "svg" => "image/svg+xml".to_string(), - _ => "image/png".to_string(), - } -} - -fn load_output_bundle_image_content(bundle: &ActiveOutputBundle, index: usize) -> Option { - let path = bundle.image_path(index); - load_output_bundle_image_content_at_path(&path) -} - -fn load_output_bundle_history_image_content( - bundle: &ActiveOutputBundle, - image_index: usize, - history_index: usize, -) -> Option { - let stem = format!("images/history/{image_index:03}/{history_index:03}"); - for extension in ["png", "jpg", "jpeg", "gif", "webp", "svg"] { - let path = bundle.paths.dir.join(format!("{stem}.{extension}")); - if path.exists() { - return load_output_bundle_image_content_at_path(&path); - } - } - load_output_bundle_image_content(bundle, image_index) -} - -fn load_output_bundle_image_content_at_path(path: &Path) -> Option { - let bytes = match fs::read(path) { - Ok(bytes) => bytes, - Err(err) => { - eprintln!( - "skipping unreadable output bundle image {}: {err}", - path.display() - ); - return None; - } - }; - let mime_type = mime_type_from_path(path); - let data = STANDARD.encode(bytes); - Some(content_image(data, mime_type)) -} - fn build_preview(text: &str, path: Option<&Path>, omitted_tail: bool) -> String { if omitted_tail && text.chars().count() <= INLINE_TEXT_BUDGET { return build_short_preview(text, path); diff --git a/tests/plot_images.rs b/tests/plot_images.rs index acff7177..515b9ec0 100644 --- a/tests/plot_images.rs +++ b/tests/plot_images.rs @@ -1602,7 +1602,8 @@ Sys.sleep(1) } #[tokio::test(flavor = "multi_thread")] -async fn timeout_output_bundle_survives_missing_anchor_image() -> TestResult<()> { +async fn timeout_output_bundle_keeps_inline_previews_after_bundle_files_disappear() -> TestResult<()> +{ let temp = tempdir()?; let session = spawn_server_with_files_env_vars(vec![( "TMPDIR".to_string(), @@ -1616,6 +1617,11 @@ for (i in 1:6) { plot(1:10, main = sprintf("plot%03d", i)) } flush.console() +Sys.sleep(1) +for (i in 7:8) { + plot(1:10, main = sprintf("plot%03d", i)) +} +flush.console() Sys.sleep(2) "#; let first = session.write_stdin_raw_with(input, Some(0.05)).await?; @@ -1653,20 +1659,29 @@ Sys.sleep(2) panic!("expected extension for first image path, got: {first_image_history:?}") }); let first_image_alias = bundle_dir.join(format!("images/001.{first_image_extension}")); + let last_image_alias = bundle_dir.join(format!("images/006.{first_image_extension}")); fs::remove_file(&first_image_history)?; fs::remove_file(&first_image_alias)?; + fs::remove_file(&last_image_alias)?; + sleep(Duration::from_millis(900)).await; let damaged = session.write_stdin_raw_with("", Some(0.05)).await?; let damaged_text = result_text(&damaged); + let damaged_images = extract_images(&damaged); assert_ne!( damaged.is_error, Some(true), - "missing anchor image poll reported an error: {}", + "bundle-file deletion poll reported an error: {}", damaged_text ); assert!( damaged_text.contains("events.log"), - "expected damaged anchor poll to keep disclosing the output bundle, got: {damaged_text:?}" + "expected bundle-file deletion poll to keep disclosing the output bundle, got: {damaged_text:?}" + ); + assert_eq!( + damaged_images.len(), + 2, + "expected bundle-file deletion poll to keep inline preview images from memory, got {damaged_images:?}" ); let mut settled_text = damaged_text; @@ -1683,7 +1698,7 @@ Sys.sleep(2) assert!( follow_up_text.contains("[1] 2"), - "expected session to stay alive after anchor image deletion, got: {follow_up_text:?}" + "expected session to stay alive after bundle-file deletion, got: {follow_up_text:?}" ); Ok(()) From fbe2248e2895812adb8a9f070dc6e520cd7644cb Mon Sep 17 00:00:00 2001 From: Tomasz Kalinowski Date: Fri, 17 Apr 2026 15:41:00 -0400 Subject: [PATCH 3/8] test: cover one-way text bundle behavior --- .../one-way-output-bundle-previews.md | 28 +++----- tests/write_stdin_behavior.rs | 72 +++++++++++++++++++ 2 files changed, 83 insertions(+), 17 deletions(-) rename docs/plans/{active => completed}/one-way-output-bundle-previews.md (71%) diff --git a/docs/plans/active/one-way-output-bundle-previews.md b/docs/plans/completed/one-way-output-bundle-previews.md similarity index 71% rename from docs/plans/active/one-way-output-bundle-previews.md rename to docs/plans/completed/one-way-output-bundle-previews.md index ff19f49c..2f565a92 100644 --- a/docs/plans/active/one-way-output-bundle-previews.md +++ b/docs/plans/completed/one-way-output-bundle-previews.md @@ -9,9 +9,9 @@ ## Status -- State: active +- State: completed - Last updated: 2026-04-17 -- Current phase: implementation +- Current phase: completed ## Current Direction @@ -38,12 +38,12 @@ - Define the one-way contract and the bounded in-memory preview state. - Phase 1: completed - Add cached image-preview state to `ActiveOutputBundle` and remove old-image rereads from later reply rendering. -- Phase 2: pending - - Add cached text head/tail preview state and stop reconstructing later previews from spilled file state. -- Phase 3: pending - - Replace tests that depend on disk rereads with tests for the one-way contract. -- Phase 4: pending - - Run full validation and update plan status for any remaining follow-on work. +- Phase 2: completed + - Confirm that later text replies already render from in-memory retained reply items and do not reread `transcript.txt`. +- Phase 3: completed + - Replace reread-oriented regressions with one-way contract coverage for deleted bundle image and transcript files. +- Phase 4: completed + - Run full validation and close the initiative. ## Locked Decisions @@ -55,18 +55,11 @@ ## Open Questions -- What exact text preview state should be cached in memory? - - Likely one bounded head window plus one bounded tail window, but the final shape should preserve current preview wording and stream ordering. -- Should the preview cache live only on `ActiveOutputBundle`, or should `StagedTimeoutOutput` also keep a lightweight preview summary before a bundle is materialized? -- When an output-bundle directory or subdirectory disappears mid-session, what minimal recreation behavior is worth supporting for continued appends without expanding this slice into generic filesystem recovery logic? -- Can image preview caching reuse the existing `ReplyImage` payloads directly, or should it store a more compact internal representation? +- None for this slice. ## Next Safe Slice -- Decide whether the text-preview slice is needed at all. -- If text preview caching is needed, start that slice only after confirming that the remaining text path still violates the one-way contract in a user-visible way. -- Decide whether text preview caching is needed purely for architectural consistency or whether the current text spill path is already sufficiently one-way. -- If text caching is needed, define the exact bounded head/tail representation before changing text compaction behavior. +- None. The planned work is complete. ## Stop Conditions @@ -82,3 +75,4 @@ - 2026-04-17: Kept the public bundle layout unchanged for this initiative so the refactor can land without redefining the client-facing files-mode contract. - 2026-04-17: Began the image-preview implementation by caching the first-history and latest image previews on `ActiveOutputBundle` and moving the public regression toward “later replies do not depend on old bundle image files remaining on disk”. - 2026-04-17: Completed the image-preview slice. Later image-bundle replies now render from cached preview images in memory instead of rereading old image files from the bundle directory. +- 2026-04-17: Confirmed the text spill path already satisfied the one-way contract for visible replies. Later text replies render from in-memory retained reply items, and a new public regression now covers transcript deletion and recreation without replaying previously spilled text. diff --git a/tests/write_stdin_behavior.rs b/tests/write_stdin_behavior.rs index da190b22..0ac042ea 100644 --- a/tests/write_stdin_behavior.rs +++ b/tests/write_stdin_behavior.rs @@ -1048,6 +1048,78 @@ async fn timeout_spill_file_path_stays_stable_across_later_small_poll() -> TestR Ok(()) } +#[tokio::test(flavor = "multi_thread")] +async fn timeout_spill_recreates_deleted_transcript_without_replaying_old_text() -> TestResult<()> { + let _guard = lock_test_mutex(); + let mut session = spawn_behavior_session().await?; + + let input = "big <- paste(rep('y', 120), collapse = ''); cat('start\\n'); flush.console(); Sys.sleep(0.2); for (i in 1:80) cat(sprintf('mid%03d %s\\n', i, big)); flush.console(); Sys.sleep(0.35); cat('tail\\n')"; + let first = session.write_stdin_raw_with(input, Some(0.05)).await?; + let first_text = result_text(&first); + if backend_unavailable(&first_text) { + eprintln!("write_stdin_behavior backend unavailable in this environment; skipping"); + session.cancel().await?; + return Ok(()); + } + + sleep(Duration::from_millis(260)).await; + let spilled = session.write_stdin_raw_with("", Some(0.1)).await?; + let spilled_text = result_text(&spilled); + let transcript_path = match bundle_transcript_path(&spilled_text) { + Some(path) => path, + None if spilled_text.contains("< { + eprintln!("write_stdin_behavior spill poll remained busy; skipping"); + session.cancel().await?; + return Ok(()); + } + None => { + panic!("expected transcript path in first oversized poll reply, got: {spilled_text:?}") + } + }; + + fs::remove_file(&transcript_path)?; + + sleep(Duration::from_millis(450)).await; + let final_poll = session.write_stdin_raw_with("", Some(2.0)).await?; + let final_text = result_text(&final_poll); + if final_text.contains("<>"), + "expected later small poll to either return inline tail text or settle idle after recreating the spill file, got: {final_text:?}" + ); + assert!( + follow_up_text.contains("[1] 2"), + "expected session to stay alive after transcript deletion, got: {follow_up_text:?}" + ); + + Ok(()) +} + #[tokio::test(flavor = "multi_thread")] async fn timeout_bundle_file_creation_failure_preserves_inline_content() -> TestResult<()> { let _guard = lock_test_mutex(); From 0b24eb034ac6403a24ba93029dfe80631ea22245 Mon Sep 17 00:00:00 2001 From: Tomasz Kalinowski Date: Wed, 22 Apr 2026 17:53:18 -0400 Subject: [PATCH 4/8] test: remove stale mutable binding --- tests/write_stdin_behavior.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/write_stdin_behavior.rs b/tests/write_stdin_behavior.rs index 0ac042ea..76f06e0c 100644 --- a/tests/write_stdin_behavior.rs +++ b/tests/write_stdin_behavior.rs @@ -1051,7 +1051,7 @@ async fn timeout_spill_file_path_stays_stable_across_later_small_poll() -> TestR #[tokio::test(flavor = "multi_thread")] async fn timeout_spill_recreates_deleted_transcript_without_replaying_old_text() -> TestResult<()> { let _guard = lock_test_mutex(); - let mut session = spawn_behavior_session().await?; + let session = spawn_behavior_session().await?; let input = "big <- paste(rep('y', 120), collapse = ''); cat('start\\n'); flush.console(); Sys.sleep(0.2); for (i in 1:80) cat(sprintf('mid%03d %s\\n', i, big)); flush.console(); Sys.sleep(0.35); cat('tail\\n')"; let first = session.write_stdin_raw_with(input, Some(0.05)).await?; From b7c966fcf6d3a26bbe16b5a2f97cbde2d1953b35 Mon Sep 17 00:00:00 2001 From: Tomasz Kalinowski Date: Wed, 22 Apr 2026 21:45:00 -0400 Subject: [PATCH 5/8] Fix CI test stability on Linux --- tests/codex_approvals_tui.rs | 3 +++ tests/plot_images.rs | 33 ++++++++++++++++++++++----------- 2 files changed, 25 insertions(+), 11 deletions(-) diff --git a/tests/codex_approvals_tui.rs b/tests/codex_approvals_tui.rs index 925cd11d..1f6e16a3 100644 --- a/tests/codex_approvals_tui.rs +++ b/tests/codex_approvals_tui.rs @@ -1615,6 +1615,9 @@ tryCatch({ let original = std::mem::take(map); for (key, mut child) in original { let normalized_key = normalize_wire_string(&key, workspace, codex_home); + if normalized_key == "threadId" { + continue; + } path.push(normalized_key.clone()); normalize_inner(&mut child, path, workspace, codex_home); path.pop(); diff --git a/tests/plot_images.rs b/tests/plot_images.rs index 515b9ec0..c78dfff4 100644 --- a/tests/plot_images.rs +++ b/tests/plot_images.rs @@ -13,7 +13,7 @@ use std::path::{Path, PathBuf}; use std::process::{Command, Stdio}; use std::sync::OnceLock; use tempfile::tempdir; -use tokio::time::{Duration, sleep}; +use tokio::time::{Duration, Instant, sleep}; #[derive(Debug)] struct ImageData { @@ -1664,16 +1664,27 @@ Sys.sleep(2) fs::remove_file(&first_image_alias)?; fs::remove_file(&last_image_alias)?; - sleep(Duration::from_millis(900)).await; - let damaged = session.write_stdin_raw_with("", Some(0.05)).await?; - let damaged_text = result_text(&damaged); - let damaged_images = extract_images(&damaged); - assert_ne!( - damaged.is_error, - Some(true), - "bundle-file deletion poll reported an error: {}", - damaged_text - ); + let deadline = Instant::now() + Duration::from_secs(8); + let (damaged_text, damaged_images) = loop { + sleep(Duration::from_millis(100)).await; + let damaged = session.write_stdin_raw_with("", Some(0.2)).await?; + let damaged_text = result_text(&damaged); + let damaged_images = extract_images(&damaged); + assert_ne!( + damaged.is_error, + Some(true), + "bundle-file deletion poll reported an error: {}", + damaged_text + ); + if damaged_images.len() == 2 { + break (damaged_text, damaged_images); + } + if !damaged_text.contains("<= deadline { + panic!( + "expected bundle-file deletion poll to keep inline preview images from memory, got text: {damaged_text:?}, images: {damaged_images:?}" + ); + } + }; assert!( damaged_text.contains("events.log"), "expected bundle-file deletion poll to keep disclosing the output bundle, got: {damaged_text:?}" From ca467cd4434f5d4793a1331d5634431ce875e4ae Mon Sep 17 00:00:00 2001 From: Tomasz Kalinowski Date: Thu, 23 Apr 2026 08:34:32 -0400 Subject: [PATCH 6/8] test: fix macos worker process CI flakes --- src/worker_process.rs | 49 +++++++++---------------------------------- 1 file changed, 10 insertions(+), 39 deletions(-) diff --git a/src/worker_process.rs b/src/worker_process.rs index 0bd419ec..bd6806cc 100644 --- a/src/worker_process.rs +++ b/src/worker_process.rs @@ -6045,14 +6045,13 @@ mod tests { use super::*; use crate::output_capture::{ OUTPUT_RING_CAPACITY_BYTES, OutputEventKind, OutputRing, OutputTextSpan, - ensure_output_ring, reset_last_reply_marker_offset, reset_output_ring, }; use crate::sandbox::SandboxPolicy; #[cfg(target_os = "linux")] use crate::sandbox::sandbox_state_update_from_codex_meta; #[cfg(target_os = "linux")] use serde_json::json; - use std::sync::{Mutex, MutexGuard, OnceLock}; + use std::sync::{Mutex, OnceLock}; fn cwd_test_mutex() -> &'static Mutex<()> { static TEST_MUTEX: OnceLock> = OnceLock::new(); @@ -6064,12 +6063,6 @@ mod tests { TEST_MUTEX.get_or_init(|| Mutex::new(())) } - fn output_ring_test_guard() -> MutexGuard<'static, ()> { - crate::output_capture::output_ring_test_mutex() - .lock() - .unwrap_or_else(|err| err.into_inner()) - } - fn echo_event(prompt: &str, line: &str) -> IpcEchoEvent { IpcEchoEvent { prompt: prompt.to_string(), @@ -7069,11 +7062,6 @@ mod tests { #[test] fn pager_respawned_pending_request_trims_echo_without_echo_events() { - let _guard = output_ring_test_guard(); - let _output_ring = ensure_output_ring(OUTPUT_RING_CAPACITY_BYTES); - reset_output_ring(); - reset_last_reply_marker_offset(); - let mut manager = WorkerManager::new( Backend::Python, SandboxCliPlan::default(), @@ -7302,10 +7290,9 @@ mod tests { manager.pending_server_notice.is_none(), "expected the restart notice to be emitted instead of lingering" ); - assert!( - manager.process.is_none(), - "did not expect the unit test to retain a spawned worker" - ); + if let Some(process) = manager.process.take() { + let _ = process.kill(); + } } #[test] @@ -7385,11 +7372,6 @@ mod tests { #[test] fn pager_empty_input_polls_pending_output_before_pager_commands() { - let _guard = output_ring_test_guard(); - let _output_ring = ensure_output_ring(OUTPUT_RING_CAPACITY_BYTES); - reset_output_ring(); - reset_last_reply_marker_offset(); - let mut manager = WorkerManager::new( Backend::R, SandboxCliPlan::default(), @@ -7431,31 +7413,25 @@ mod tests { #[test] fn pager_empty_input_advances_page_after_worker_exit() { - let _guard = output_ring_test_guard(); - let _output_ring = ensure_output_ring(OUTPUT_RING_CAPACITY_BYTES); - reset_output_ring(); - reset_last_reply_marker_offset(); - let mut manager = WorkerManager::new( Backend::R, SandboxCliPlan::default(), crate::oversized_output::OversizedOutputMode::Pager, ) .expect("worker manager"); - manager.process = Some(test_worker_process(sleeping_test_child())); + let mut process = test_worker_process(successful_test_child()); + process.exit_status = Some(process.child.wait().expect("wait test child")); + manager.process = Some(process); manager.exe_path = PathBuf::from("definitely-missing-worker-exe"); let output = (1..=24).map(|n| format!("L{n:04}\n")).collect::(); manager .pager .activate(static_pager_buffer_from_worker_text(&output), false); - - { - let process = manager.process.as_mut().expect("worker process"); - process.child.kill().expect("kill test child"); - process.exit_status = Some(process.child.wait().expect("wait test child")); + manager.output.start_capture(); + if let Some(end_offset) = manager.output.end_offset() { + manager.output.advance_offset_to(end_offset); } - let reply = manager .write_stdin_pager( String::new(), @@ -7532,11 +7508,6 @@ mod tests { #[test] fn pager_empty_input_preserves_idle_guardrail_notice() { - let _guard = output_ring_test_guard(); - let _output_ring = ensure_output_ring(OUTPUT_RING_CAPACITY_BYTES); - reset_output_ring(); - reset_last_reply_marker_offset(); - let mut manager = WorkerManager::new( Backend::R, SandboxCliPlan::default(), From 52aeca858d998c590c857cb82a29b5269d148521 Mon Sep 17 00:00:00 2001 From: Tomasz Kalinowski Date: Thu, 23 Apr 2026 10:28:42 -0400 Subject: [PATCH 7/8] test: address review findings for pager and plot regressions --- src/worker_process.rs | 32 ++++++++++++++++++++++++++++++- tests/plot_images.rs | 44 +++++++++++++++++++++++++++++-------------- 2 files changed, 61 insertions(+), 15 deletions(-) diff --git a/src/worker_process.rs b/src/worker_process.rs index bd6806cc..8416fec5 100644 --- a/src/worker_process.rs +++ b/src/worker_process.rs @@ -6045,13 +6045,14 @@ mod tests { use super::*; use crate::output_capture::{ OUTPUT_RING_CAPACITY_BYTES, OutputEventKind, OutputRing, OutputTextSpan, + ensure_output_ring, reset_last_reply_marker_offset, reset_output_ring, }; use crate::sandbox::SandboxPolicy; #[cfg(target_os = "linux")] use crate::sandbox::sandbox_state_update_from_codex_meta; #[cfg(target_os = "linux")] use serde_json::json; - use std::sync::{Mutex, OnceLock}; + use std::sync::{Mutex, MutexGuard, OnceLock}; fn cwd_test_mutex() -> &'static Mutex<()> { static TEST_MUTEX: OnceLock> = OnceLock::new(); @@ -6063,6 +6064,12 @@ mod tests { TEST_MUTEX.get_or_init(|| Mutex::new(())) } + fn output_ring_test_guard() -> MutexGuard<'static, ()> { + crate::output_capture::output_ring_test_mutex() + .lock() + .unwrap_or_else(|err| err.into_inner()) + } + fn echo_event(prompt: &str, line: &str) -> IpcEchoEvent { IpcEchoEvent { prompt: prompt.to_string(), @@ -7062,6 +7069,11 @@ mod tests { #[test] fn pager_respawned_pending_request_trims_echo_without_echo_events() { + let _guard = output_ring_test_guard(); + let _output_ring = ensure_output_ring(OUTPUT_RING_CAPACITY_BYTES); + reset_output_ring(); + reset_last_reply_marker_offset(); + let mut manager = WorkerManager::new( Backend::Python, SandboxCliPlan::default(), @@ -7372,6 +7384,11 @@ mod tests { #[test] fn pager_empty_input_polls_pending_output_before_pager_commands() { + let _guard = output_ring_test_guard(); + let _output_ring = ensure_output_ring(OUTPUT_RING_CAPACITY_BYTES); + reset_output_ring(); + reset_last_reply_marker_offset(); + let mut manager = WorkerManager::new( Backend::R, SandboxCliPlan::default(), @@ -7386,6 +7403,9 @@ mod tests { ); manager.output.start_capture(); + if let Some(end_offset) = manager.output.end_offset() { + manager.output.advance_offset_to(end_offset); + } manager .output_timeline .append_text(b"detached\n", false, ContentOrigin::Worker); @@ -7413,6 +7433,11 @@ mod tests { #[test] fn pager_empty_input_advances_page_after_worker_exit() { + let _guard = output_ring_test_guard(); + let _output_ring = ensure_output_ring(OUTPUT_RING_CAPACITY_BYTES); + reset_output_ring(); + reset_last_reply_marker_offset(); + let mut manager = WorkerManager::new( Backend::R, SandboxCliPlan::default(), @@ -7508,6 +7533,11 @@ mod tests { #[test] fn pager_empty_input_preserves_idle_guardrail_notice() { + let _guard = output_ring_test_guard(); + let _output_ring = ensure_output_ring(OUTPUT_RING_CAPACITY_BYTES); + reset_output_ring(); + reset_last_reply_marker_offset(); + let mut manager = WorkerManager::new( Backend::R, SandboxCliPlan::default(), diff --git a/tests/plot_images.rs b/tests/plot_images.rs index c78dfff4..e60c6301 100644 --- a/tests/plot_images.rs +++ b/tests/plot_images.rs @@ -245,6 +245,19 @@ fn parse_image_event_paths(events: &str) -> Vec { .collect() } +fn alias_path_for_history_image(bundle_dir: &Path, history_path: &Path) -> PathBuf { + let image_number = history_path + .parent() + .and_then(|path| path.file_name()) + .and_then(|name| name.to_str()) + .unwrap_or_else(|| panic!("expected image number in history path, got: {history_path:?}")); + let extension = history_path + .extension() + .and_then(|extension| extension.to_str()) + .unwrap_or_else(|| panic!("expected extension in history path, got: {history_path:?}")); + bundle_dir.join(format!("images/{image_number}.{extension}")) +} + fn advance_visible_lines( text: &str, visible_lines: usize, @@ -1605,10 +1618,13 @@ Sys.sleep(1) async fn timeout_output_bundle_keeps_inline_previews_after_bundle_files_disappear() -> TestResult<()> { let temp = tempdir()?; - let session = spawn_server_with_files_env_vars(vec![( - "TMPDIR".to_string(), - temp.path().display().to_string(), - )]) + let session = spawn_server_with_files_env_vars(vec![ + ("TMPDIR".to_string(), temp.path().display().to_string()), + ( + "MCP_REPL_OUTPUT_BUNDLE_MAX_BYTES".to_string(), + "200000".to_string(), + ), + ]) .await?; let input = r#" @@ -1618,8 +1634,9 @@ for (i in 1:6) { } flush.console() Sys.sleep(1) -for (i in 7:8) { - plot(1:10, main = sprintf("plot%03d", i)) +big <- paste(rep("x", 200), collapse = "") +for (i in 1:2000) { + cat(sprintf("line%04d %s\n", i, big)) } flush.console() Sys.sleep(2) @@ -1652,16 +1669,15 @@ Sys.sleep(2) .first() .map(|path| bundle_dir.join(path)) .unwrap_or_else(|| panic!("expected first image entry in events.log, got: {events:?}")); - let first_image_extension = first_image_history - .extension() - .and_then(|extension| extension.to_str()) - .unwrap_or_else(|| { - panic!("expected extension for first image path, got: {first_image_history:?}") - }); - let first_image_alias = bundle_dir.join(format!("images/001.{first_image_extension}")); - let last_image_alias = bundle_dir.join(format!("images/006.{first_image_extension}")); + let last_image_history = image_paths + .last() + .map(|path| bundle_dir.join(path)) + .unwrap_or_else(|| panic!("expected last image entry in events.log, got: {events:?}")); + let first_image_alias = alias_path_for_history_image(bundle_dir, &first_image_history); + let last_image_alias = alias_path_for_history_image(bundle_dir, &last_image_history); fs::remove_file(&first_image_history)?; fs::remove_file(&first_image_alias)?; + fs::remove_file(&last_image_history)?; fs::remove_file(&last_image_alias)?; let deadline = Instant::now() + Duration::from_secs(8); From 14cce38015eb9ce4bbe73177751bf71442d4fe20 Mon Sep 17 00:00:00 2001 From: Tomasz Kalinowski Date: Thu, 23 Apr 2026 10:54:19 -0400 Subject: [PATCH 8/8] test: retry plot follow-up after busy timeout poll --- tests/plot_images.rs | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/tests/plot_images.rs b/tests/plot_images.rs index e60c6301..1fda8167 100644 --- a/tests/plot_images.rs +++ b/tests/plot_images.rs @@ -3,7 +3,10 @@ mod common; use base64::Engine as _; -use common::{TestResult, spawn_server_with_files, spawn_server_with_files_env_vars}; +use common::{ + TestResult, spawn_server_with_files, spawn_server_with_files_env_vars, + wait_until_ready_with_input_retry, +}; use regex_lite::Regex; use rmcp::model::{CallToolResult, RawContent}; use serde::Serialize; @@ -1618,7 +1621,7 @@ Sys.sleep(1) async fn timeout_output_bundle_keeps_inline_previews_after_bundle_files_disappear() -> TestResult<()> { let temp = tempdir()?; - let session = spawn_server_with_files_env_vars(vec![ + let mut session = spawn_server_with_files_env_vars(vec![ ("TMPDIR".to_string(), temp.path().display().to_string()), ( "MCP_REPL_OUTPUT_BUNDLE_MAX_BYTES".to_string(), @@ -1718,7 +1721,16 @@ Sys.sleep(2) settled_text = result_text(&next); } - let follow_up = session.write_stdin_raw_with("1+1", Some(5.0)).await?; + let follow_up = session.write_stdin_raw_with("1+1", Some(0.5)).await?; + let follow_up = wait_until_ready_with_input_retry( + &mut session, + "1+1", + follow_up, + 0.5, + Duration::from_millis(100), + Duration::from_secs(5), + ) + .await?; let follow_up_text = result_text(&follow_up); session.cancel().await?;