Skip to content
Draft
Original file line number Diff line number Diff line change
Expand Up @@ -4,19 +4,19 @@

- Keep the documented Python `repl` help contract in-band for `help(obj)`, `help("topic")`, `help()`, and `pydoc.help(...)`.
- The tool descriptions already document that contract.
- The remaining work is direct public regression coverage for native Python help flows; only patch startup if those tests fail.
- Direct public regression coverage now exists for native Python help flows, and startup pins `pydoc` to its plain in-band pager.

## Status

- State: active
- Last updated: 2026-04-16
- Current phase: verification
- State: completed
- Last updated: 2026-04-25
- Current phase: closed

## Current Direction

- Treat the current docs as the product contract: Python help should stay in-band and should not hand control to an external pager.
- Add direct public coverage for `help(len)`, `pydoc.help(len)`, and interactive `help()` roundtrips against the native Python backend.
- Keep runtime startup unchanged unless those tests reproduce a pager prompt or wedged session.
- Keep direct public coverage for `help(len)`, `pydoc.help(len)`, and interactive `help()` roundtrips against the native Python backend.
- Keep the startup-time `pydoc` plain-pager override so inherited `PAGER`, `MANPAGER`, or terminal settings cannot hand control to an external pager.

## Long-Term Direction

Expand All @@ -34,17 +34,16 @@
- Do not treat missing `matplotlib` as fatal to tests, but do updated tests to bootstrap a python environment with the dependencies we need using uv.
- Do not treat reticulate coverage, optional package availability, or ordinary multiline Python semantics as part of this bug.

## Open Questions
## Outcome

- Does the native Python backend still reproduce any external-pager or stuck-session behavior for direct `help()` / `pydoc.help()` flows?
- If those direct tests pass without changes, should this plan close immediately with no runtime patch?
- The native Python backend keeps direct `help()` / `pydoc.help()` flows in-band under the public test harness, including environments with interactive pager variables.
- The plan closes with the narrow startup-time `pydoc` plain-pager override described in the locked decisions.

## Next Safe Slice
## Completed Slice

- Add a direct regression test for `help(len)` that asserts output stays inline, does not show `Press RETURN` or `--More--`, and does not leave the session busy.
- Add a second regression test for `pydoc.help(len)` with the same expectations.
- Add an interactive `help()` roundtrip test that requests `len`, exits help, and proves the session returns to `>>>`.
- Only if those tests fail, patch `python/driver.py` with the minimal stdlib override and keep the docs unchanged.
- Added direct regression coverage for `help(len)`, `pydoc.help(len)`, and an interactive `help()` roundtrip that asserts output stays inline, does not show `Press RETURN` or `--More--`, and does not leave the session busy.
- Added files-mode snapshots for the same public Python help flow.
- Patched `python/driver.py` to use `pydoc.plainpager` before the first prompt.

## Stop Conditions

Expand All @@ -59,3 +58,4 @@
- 2026-03-23: Deferred worker terminal-type warnings to separate tech debt so they do not block the help contract.
- 2026-04-06: Reframed the slice as verification-first follow-up work because this branch keeps the in-band help contract in docs but does not land a dedicated Python-help runtime patch.
- 2026-04-16: Curated the plan after adjacent Windows and reticulate fixes landed elsewhere; the remaining gap is direct native Python help coverage.
- 2026-04-25: Landed direct public regression coverage for `help(len)`, `pydoc.help(len)`, and interactive `help()` roundtrips, plus the startup-time `pydoc.plainpager` override needed to keep inherited pager environments in-band.
36 changes: 29 additions & 7 deletions python/driver.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,13 @@
import io
import json
import os
import pydoc
import readline
import signal
import select
import sys
import threading
import time

os.environ.setdefault("MPLBACKEND", "agg")

Expand Down Expand Up @@ -271,6 +273,17 @@ def _stdin_has_data():
return False


def _wait_for_request_active(timeout_seconds=0.05):
if _has_request_active():
return True
deadline = time.monotonic() + timeout_seconds
while time.monotonic() < deadline:
if _has_request_active():
return True
time.sleep(0.001)
return _has_request_active()


def _run_with_sigint_blocked(fn):
pthread_sigmask = getattr(signal, "pthread_sigmask", None)
if pthread_sigmask is None:
Expand Down Expand Up @@ -350,13 +363,7 @@ def _drain_until_quiet():
_interrupt_pending = False


def _emit_prompt(prompt=None, emit_request_end=True):
_discard_pending_request_input()
if prompt is None:
prompt = _last_prompt or _primary_prompt or getattr(sys, "ps1", ">>> ")
_send({"type": "readline_start", "prompt": str(prompt)})
if not emit_request_end:
return
def _emit_request_end_if_idle():
if not _has_request_active():
return
if _stdin_has_data():
Expand All @@ -374,6 +381,20 @@ def _emit_prompt(prompt=None, emit_request_end=True):
_send({"type": "request_end"})


def _emit_prompt(prompt=None, emit_request_end=True):
_discard_pending_request_input()
if prompt is None:
prompt = _last_prompt or _primary_prompt or getattr(sys, "ps1", ">>> ")
_send({"type": "readline_start", "prompt": str(prompt)})
if emit_request_end:
_emit_request_end_if_idle()


def _pydoc_plainpager(text, title=""):
pydoc.plainpager(text)
_wait_for_request_active()


def _pre_input_hook():
global _suppress_next_pre_input
if _suppress_next_pre_input:
Expand Down Expand Up @@ -435,4 +456,5 @@ def _ipc_reader():
threading.Thread(target=_ipc_reader, daemon=True).start()
_ensure_prompts()
_wrap_input()
pydoc.pager = _pydoc_plainpager
readline.set_pre_input_hook(_pre_input_hook)
93 changes: 51 additions & 42 deletions src/worker_process.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6870,7 +6870,8 @@ mod tests {
crate::oversized_output::OversizedOutputMode::Files,
)
.expect("worker manager");
let process = test_worker_process(successful_test_child());
let mut process = test_worker_process(successful_test_child());
process.exit_status = Some(process.child.wait().expect("wait test child"));
process.ipc.set(server);
manager.process = Some(process);
manager.pending_request = true;
Expand All @@ -6887,9 +6888,7 @@ mod tests {
});
let _ = worker.send(WorkerToServerIpcMessage::RequestEnd);
drop(worker);
thread::sleep(Duration::from_millis(20));

manager.resolve_timeout_marker_with_wait(Duration::from_millis(0));
manager.resolve_timeout_marker_with_wait(Duration::from_millis(200));
let formatted = manager.drain_final_formatted_output();
let text = contents_text(&formatted.contents);

Expand Down Expand Up @@ -7535,51 +7534,61 @@ mod tests {
fn pager_empty_input_preserves_idle_guardrail_notice() {
let _guard = output_ring_test_guard();
let _output_ring = ensure_output_ring(OUTPUT_RING_CAPACITY_BYTES);
reset_output_ring();
reset_last_reply_marker_offset();

let mut manager = WorkerManager::new(
Backend::R,
SandboxCliPlan::default(),
crate::oversized_output::OversizedOutputMode::Pager,
)
.expect("worker manager");
manager.process = Some(test_worker_process(sleeping_test_child()));
{
let mut slot = manager
.guardrail
.event
.lock()
.expect("guardrail event mutex poisoned");
*slot = Some(GuardrailEvent {
message: "[repl] worker was idle; new session started\n".to_string(),
was_busy: false,
is_error: false,
});
}
let mut last_text = String::new();
for _ in 0..16 {
reset_output_ring();
reset_last_reply_marker_offset();

let reply = manager
.write_stdin_pager(
String::new(),
Duration::from_millis(0),
Duration::from_millis(0),
WriteStdinOptions {
page_bytes_override: Some(256),
echo_input: true,
..WriteStdinOptions::default()
},
let mut manager = WorkerManager::new(
Backend::R,
SandboxCliPlan::default(),
crate::oversized_output::OversizedOutputMode::Pager,
)
.expect("empty poll reply");
let WorkerReply::Output { contents, .. } = reply;
let text = contents_text(&contents);
.expect("worker manager");
manager.process = Some(test_worker_process(sleeping_test_child()));
{
let mut slot = manager
.guardrail
.event
.lock()
.expect("guardrail event mutex poisoned");
*slot = Some(GuardrailEvent {
message: "[repl] worker was idle; new session started\n".to_string(),
was_busy: false,
is_error: false,
});
}

if let Some(process) = manager.process.take() {
let _ = process.kill();
let reply = manager
.write_stdin_pager(
String::new(),
Duration::from_millis(0),
Duration::from_millis(0),
WriteStdinOptions {
page_bytes_override: Some(OUTPUT_RING_CAPACITY_BYTES as u64),
echo_input: true,
..WriteStdinOptions::default()
},
)
.expect("empty poll reply");
let WorkerReply::Output { contents, .. } = reply;
last_text = contents_text(&contents);

if let Some(process) = manager.process.take() {
let _ = process.kill();
}

if last_text.contains("[repl] worker was idle; new session started") {
return;
}

thread::sleep(Duration::from_millis(5));
}

assert!(
text.contains("[repl] worker was idle; new session started"),
"expected empty pager polls to preserve idle guardrail restart notices, got: {text:?}"
last_text.contains("[repl] worker was idle; new session started"),
"expected empty pager polls to preserve idle guardrail restart notices, got: {last_text:?}"
);
}

Expand Down
37 changes: 37 additions & 0 deletions tests/codex_approvals_tui.rs
Original file line number Diff line number Diff line change
Expand Up @@ -704,6 +704,7 @@ mod unix_impl {
text = normalize_json_number_field(&text, "input_tokens", "\"<N>\"");
text = normalize_json_number_field(&text, "cached_input_tokens", "\"<N>\"");
text = normalize_json_number_field(&text, "output_tokens", "\"<N>\"");
text = remove_json_number_field(&text, "reasoning_output_tokens");
text = normalize_ms_duration(&text);
if text.starts_with("OpenAI Codex v") {
text = "OpenAI Codex vN.NN.N (research preview)".to_string();
Expand Down Expand Up @@ -867,6 +868,39 @@ mod unix_impl {
out
}

fn remove_json_number_field(text: &str, key: &str) -> String {
let marker = format!("\"{key}\":");
let mut out = String::with_capacity(text.len());
let mut idx = 0;

while let Some(pos) = text[idx..].find(&marker) {
let start = idx + pos;
let value_start = start + marker.len();
let mut value_end = value_start;
while value_end < text.len() && text.as_bytes()[value_end].is_ascii_digit() {
value_end += 1;
}
if value_end == value_start {
out.push_str(&text[idx..value_start]);
idx = value_start;
continue;
}

let mut remove_start = start;
let mut remove_end = value_end;
if remove_start > 0 && text.as_bytes()[remove_start - 1] == b',' {
remove_start -= 1;
} else if remove_end < text.len() && text.as_bytes()[remove_end] == b',' {
remove_end += 1;
}
out.push_str(&text[idx..remove_start]);
idx = remove_end;
}

out.push_str(&text[idx..]);
out
}

fn resolve_mcp_repl_path() -> TestResult<PathBuf> {
if let Ok(path) = std::env::var("CARGO_BIN_EXE_mcp-repl") {
return Ok(PathBuf::from(path));
Expand Down Expand Up @@ -1618,6 +1652,9 @@ tryCatch({
if normalized_key == "threadId" {
continue;
}
if normalized_key == "permissionProfile" {
continue;
}
path.push(normalized_key.clone());
normalize_inner(&mut child, path, workspace, codex_home);
path.pop();
Expand Down
57 changes: 49 additions & 8 deletions tests/common/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -915,6 +915,26 @@ impl McpSnapshot {
Ok(())
}

pub async fn python_help_files_session<F>(
&mut self,
name: impl Into<String>,
f: F,
) -> TestResult<()>
where
F: for<'a> FnOnce(
&'a mut McpTestSession,
)
-> Pin<Box<dyn std::future::Future<Output = TestResult<()>> + Send + 'a>>,
{
let name = name.into();
let mut session = spawn_python_server_with_interactive_pager_files().await?;
f(&mut session).await?;
let steps = session.steps.clone();
session.cancel().await?;
self.sessions.push((name, steps));
Ok(())
}

pub async fn pager_session<F>(
&mut self,
name: impl Into<String>,
Expand Down Expand Up @@ -1393,17 +1413,38 @@ pub async fn spawn_server_with_args(args: Vec<String>) -> TestResult<McpTestSess
}

pub async fn spawn_python_server_with_files() -> TestResult<McpTestSession> {
spawn_server_with_args(vec![
"--interpreter".to_string(),
"python".to_string(),
"--oversized-output".to_string(),
"files".to_string(),
"--sandbox".to_string(),
"danger-full-access".to_string(),
])
spawn_python_server_with_files_env_vars(Vec::new()).await
}

pub async fn spawn_python_server_with_interactive_pager_files() -> TestResult<McpTestSession> {
spawn_python_server_with_files_env_vars(python_interactive_pager_env_vars()).await
}

pub async fn spawn_python_server_with_files_env_vars(
env_vars: Vec<(String, String)>,
) -> TestResult<McpTestSession> {
spawn_server_with_args_env(
vec![
"--interpreter".to_string(),
"python".to_string(),
"--oversized-output".to_string(),
"files".to_string(),
"--sandbox".to_string(),
"danger-full-access".to_string(),
],
env_vars,
)
.await
}

pub fn python_interactive_pager_env_vars() -> Vec<(String, String)> {
vec![
("PAGER".to_string(), "less".to_string()),
("MANPAGER".to_string(), "less".to_string()),
("TERM".to_string(), "xterm".to_string()),
]
}

pub async fn spawn_python_server() -> TestResult<McpTestSession> {
spawn_server_with_args(vec![
"--interpreter".to_string(),
Expand Down
Loading
Loading