From 7d4f825f0bc5cbaba4a17d6e1c55506d5183f519 Mon Sep 17 00:00:00 2001 From: Wayland Yang Date: Wed, 24 Jun 2026 15:05:37 +0800 Subject: [PATCH] fix: bounded WP-uffd accept (#261) + harden tag/id path joins (#259, #260) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Triage of three automated-audit reports from @asmit25805. Verified each against the code — one real bug, two low-severity hardening (the "High security" ratings were overstated, but the fixes are cheap and correct). #261 (real, robustness): request_wp_uffd spawns an accept() thread with the UnixListener MOVED into it, then issues a PUT (10s timeout) that makes FC dial back. If FC accepts the PUT but never connects — or the PUT itself fails — the blocking accept() never returns, so accept_thread.join() wedges forever. The old "the listener drops at end of scope" comment was wrong: the listener is owned by the thread, not the outer scope. Extracted a deadline-bounded accept_with_deadline() (non-blocking poll, deadline = API timeout + 2s) so a non-connecting FC surfaces as a clean error instead of a hang. Linux-only regression test. #259 (defense in depth): pack_chain joined a chain's parent_tag onto snap_root without validation. The value comes from a local snapshot.json the packer owns (the untrusted direction — unpack — already validates via is_safe_pack_tag), but a corrupted/hand-edited edge like `../../etc` would escape snap_root. Reject non-tag names with the same rule unpack uses. #260 (defense in depth): delete_sandbox spliced a sandbox id into the request path unvalidated. Ids from --all/--tag come from the daemon's own list response (trusted); an explicit CLI id is user-controlled, and `../snapshots` would traverse the URL to another endpoint. No privilege boundary is crossed (the CLI talks only to the operator's own daemon), but reject `[^A-Za-z0-9_-]` up front for a clear error instead of a confusing 404. New sdk-side unit tests. Co-Authored-By: Claude Opus 4.8 (1M context) --- crates/forkd-cli/src/hub.rs | 11 ++++++ crates/forkd-cli/src/sandbox.rs | 45 ++++++++++++++++++++++ crates/forkd-vmm/src/lib.rs | 67 +++++++++++++++++++++++++++++++-- 3 files changed, 119 insertions(+), 4 deletions(-) diff --git a/crates/forkd-cli/src/hub.rs b/crates/forkd-cli/src/hub.rs index 84f84e5..da6fbb0 100644 --- a/crates/forkd-cli/src/hub.rs +++ b/crates/forkd-cli/src/hub.rs @@ -325,6 +325,17 @@ pub fn pack_chain( if !seen.insert(parent_tag.clone()) { bail!("chain for `{head_tag}` reaches `{parent_tag}` twice — cycle"); } + // Defense in depth (#259): parent_tag comes from a local + // snapshot.json the packer owns, but a corrupted/hand-edited + // chain edge like `../../etc` would otherwise escape snap_root on + // the join below. Reject anything that isn't a plain tag — the + // same rule the unpack side enforces on untrusted packs. + if !is_safe_pack_tag(&parent_tag) { + bail!( + "chain for `{head_tag}` references parent `{parent_tag}` with an unsafe tag \ + name (path separators / `..` not allowed) — refusing to resolve" + ); + } let parent_dir = snap_root.join(&parent_tag); if !parent_dir.join("vmstate").exists() { bail!( diff --git a/crates/forkd-cli/src/sandbox.rs b/crates/forkd-cli/src/sandbox.rs index 9d21abf..26365c2 100644 --- a/crates/forkd-cli/src/sandbox.rs +++ b/crates/forkd-cli/src/sandbox.rs @@ -150,7 +150,24 @@ fn list_sandboxes(daemon_url: &str, token: Option<&str>) -> Result-`). When it comes from an +/// explicit CLI arg rather than a list response it's user-controlled, so +/// reject anything that isn't `[A-Za-z0-9_-]` before splicing it into the +/// request path — otherwise an id like `../snapshots` would traverse the +/// URL to a different endpoint (#260). Defense in depth: the CLI only ever +/// talks to the operator's own daemon, but a clear up-front error beats a +/// confusing 404 from a mangled path. +fn is_valid_sandbox_id(id: &str) -> bool { + !id.is_empty() + && id + .chars() + .all(|c| c.is_ascii_alphanumeric() || c == '-' || c == '_') +} + fn delete_sandbox(daemon_url: &str, token: Option<&str>, id: &str) -> Result<()> { + if !is_valid_sandbox_id(id) { + anyhow::bail!("invalid sandbox id '{id}' (expected alphanumeric, '-' or '_')"); + } let agent = ureq::AgentBuilder::new() .timeout(Duration::from_secs(30)) .build(); @@ -172,3 +189,31 @@ fn map_err(e: ureq::Error) -> anyhow::Error { e => anyhow::anyhow!("transport: {e}"), } } + +#[cfg(test)] +mod tests { + use super::{delete_sandbox, is_valid_sandbox_id}; + + #[test] + fn accepts_real_daemon_ids() { + assert!(is_valid_sandbox_id("sb-6a1134f3-0001")); + assert!(is_valid_sandbox_id("abc123")); + assert!(is_valid_sandbox_id("a_b-C9")); + } + + // #260: traversal / junk ids must be rejected. + #[test] + fn rejects_traversal_and_junk() { + for bad in ["", "../snapshots", "a/b", "..", "sb 1", "sb/../x", "id\n"] { + assert!(!is_valid_sandbox_id(bad), "should reject {bad:?}"); + } + } + + #[test] + fn delete_sandbox_rejects_bad_id_before_any_request() { + // A bad id must fail validation, not attempt a connection — so a + // nonsense daemon_url never matters here. + let err = delete_sandbox("http://0.0.0.0:1", None, "../etc").unwrap_err(); + assert!(err.to_string().contains("invalid sandbox id"), "got: {err}"); + } +} diff --git a/crates/forkd-vmm/src/lib.rs b/crates/forkd-vmm/src/lib.rs index 0ebc2c5..fbe4a6f 100644 --- a/crates/forkd-vmm/src/lib.rs +++ b/crates/forkd-vmm/src/lib.rs @@ -248,6 +248,35 @@ pub struct Vm { pub memfd: Option, } +/// Accept one connection on an already-non-blocking `UnixListener`, +/// giving up at `deadline`. Prevents a peer that never connects from +/// wedging a blocking `accept()` (and therefore the caller's `join()`) +/// forever — see `request_wp_uffd` (#261). The returned stream is put +/// back into blocking mode for ordinary reads. +#[cfg(target_os = "linux")] +fn accept_with_deadline( + listener: &std::os::unix::net::UnixListener, + deadline: Instant, +) -> Result { + loop { + match listener.accept() { + Ok((stream, _)) => { + stream + .set_nonblocking(false) + .context("restore blocking on accepted stream")?; + return Ok(stream); + } + Err(e) if e.kind() == std::io::ErrorKind::WouldBlock => { + if Instant::now() >= deadline { + anyhow::bail!("peer did not connect to the socket before the deadline"); + } + std::thread::sleep(Duration::from_millis(5)); + } + Err(e) => return Err(e).context("accept connection"), + } + } +} + impl Vm { pub fn pid(&self) -> u32 { self.pid @@ -321,9 +350,21 @@ impl Vm { // BEFORE the HTTP response comes back, so the thread will // generally have the Handshake ready by the time the PUT // returns. + // + // The listener is MOVED into this thread, so it is NOT dropped on + // the error paths below — if FC accepts the PUT but never dials + // back (or the PUT itself fails), a plain blocking `accept()` + // would wedge `join()` forever. Bound it with a deadline that + // outlives the PUT timeout so a non-connecting FC surfaces as a + // clean error instead of a permanent hang (#261). + listener + .set_nonblocking(true) + .context("set WP-uffd UDS non-blocking")?; let accept_thread = std::thread::spawn(move || -> Result { - let (stream, _) = listener.accept().context("accept FC connection")?; + let deadline = Instant::now() + Duration::from_secs(DEFAULT_API_TIMEOUT_SECS + 2); + let stream = accept_with_deadline(&listener, deadline) + .context("waiting for Firecracker to connect to the WP-uffd socket")?; forkd_uffd::handshake::recv_handshake(&stream) .context("receive WP-uffd handshake from FC") }); @@ -341,9 +382,10 @@ impl Vm { DEFAULT_API_TIMEOUT_SECS, ); - // Always join the thread (even on PUT failure) to avoid a - // dangling accept blocking forever — UnixListener::accept will - // unblock when the listener drops at end of scope here. + // Join the accept thread (even on PUT failure). It self-terminates + // on its own deadline, so this can't hang even if FC never + // connected — the previous "listener drops at end of scope" + // reasoning was wrong (the listener is owned by the thread). let handshake_result = accept_thread .join() .map_err(|e| anyhow::anyhow!("WP-uffd accept thread panicked: {e:?}"))?; @@ -1745,6 +1787,23 @@ fn lseek_data_or_hole(f: &std::fs::File, offset: i64, want_data: bool) -> std::i mod tests { use super::*; + // #261: a deadline-bounded accept must give up instead of hanging + // forever when no peer ever connects. + #[test] + #[cfg(target_os = "linux")] + fn accept_with_deadline_times_out_when_nobody_connects() { + use std::os::unix::net::UnixListener; + let sock = std::env::temp_dir().join(format!("forkd-accept-{}.sock", std::process::id())); + let _ = std::fs::remove_file(&sock); + let listener = UnixListener::bind(&sock).unwrap(); + listener.set_nonblocking(true).unwrap(); + let start = Instant::now(); + let r = accept_with_deadline(&listener, start + Duration::from_millis(60)); + assert!(r.is_err(), "should time out, got {r:?}"); + assert!(start.elapsed() < Duration::from_secs(2), "must not hang"); + let _ = std::fs::remove_file(&sock); + } + #[test] fn boot_config_quickstart_has_sane_defaults() { let cfg = BootConfig::quickstart("/tmp/k".into(), "/tmp/r".into(), "/tmp/w".into());