From 11e31ae2e354f7717cf78df84f5f77c79024ce12 Mon Sep 17 00:00:00 2001 From: Weidong Cui Date: Wed, 1 Apr 2026 22:55:57 -0700 Subject: [PATCH 01/24] Trampoline format: redzone reservation, R11 restart, RIP-relative re-encoding New trampoline format changes for the syscall rewriter: Rewriter (litebox_syscall_rewriter): - Add redzone reservation (LEA RSP,[RSP-0x80]) before syscall callback entry on x86-64, allowing the callback to use the 128-byte red zone - Add R11 restart address (LEA R11,[RIP+disp32]) pointing back to the call-site JMP, enabling SA_RESTART signal re-execution - Re-encode RIP-relative memory operands in pre-syscall instructions when they are copied to the trampoline, using iced_x86::Encoder at the trampoline IP so displacements remain correct - Guard post-syscall instructions with RIP-relative operands by delegating to hook_syscall_before_and_after instead of raw-copying - Append header-only marker (trampoline_size=0) when no syscall instructions are found, so the loader can distinguish checked binaries from unpatched ones - Add 5 inline unit tests for Bun detection and RIP-relative encoding Loader (litebox_common_linux): - Handle trampoline_size==0 as a valid no-op (checked, no syscalls) - Add UnpatchedBinary error variant for binaries missing the magic - Add has_trampoline() accessor Platform/shim (litebox_platform_linux_userland): - Add saved_r11 TLS slot and save R11 on syscall callback entry - Add syscall_callback_redzone entry point that undoes red zone reservation before saving registers - Return syscall_callback_redzone from get_syscall_entry_point() Shim loader (litebox_shim_linux): - Treat UnpatchedBinary as non-fatal in parse_trampoline calls, allowing unpatched binaries to load without a trampoline --- litebox_common_linux/src/loader.rs | 22 +- litebox_platform_linux_userland/src/lib.rs | 34 ++- litebox_shim_linux/src/loader/elf.rs | 9 +- litebox_syscall_rewriter/src/lib.rs | 266 ++++++++++++++++-- .../snapshots/snapshot_tests__hello-diff.snap | 216 +++++++------- 5 files changed, 412 insertions(+), 135 deletions(-) diff --git a/litebox_common_linux/src/loader.rs b/litebox_common_linux/src/loader.rs index 3ae61266e..8d061b93d 100644 --- a/litebox_common_linux/src/loader.rs +++ b/litebox_common_linux/src/loader.rs @@ -128,6 +128,8 @@ pub enum ElfParseError { BadTrampoline, #[error("Invalid trampoline version")] BadTrampolineVersion, + #[error("Binary not patched for syscall rewriting")] + UnpatchedBinary, #[error("Unsupported ELF type")] UnsupportedType, #[error("Bad interpreter")] @@ -141,6 +143,7 @@ impl> From> for Errno { | ElfParseError::BadFormat | ElfParseError::BadTrampoline | ElfParseError::BadTrampolineVersion + | ElfParseError::UnpatchedBinary | ElfParseError::BadInterp | ElfParseError::UnsupportedType => Errno::ENOEXEC, ElfParseError::Io(err) => err.into(), @@ -218,6 +221,11 @@ impl ElfParsedFile { }) } + /// Returns `true` if a trampoline was parsed and will be mapped by `load()`. + pub fn has_trampoline(&self) -> bool { + self.trampoline.is_some() + } + /// Parse the LiteBox trampoline data, if any. /// /// The trampoline header is located at the end of the file (last 32/20 bytes). @@ -251,7 +259,8 @@ impl ElfParsedFile { // File must be large enough to contain the header if file_size < header_size as u64 { - return Ok(()); + // Too small for a trampoline header — binary is unpatched. + return Err(ElfParseError::UnpatchedBinary); } // Read the header from the end of the file @@ -267,8 +276,9 @@ impl ElfParsedFile { if &header_buf[0..7] == b"LITEBOX" { return Err(ElfParseError::BadTrampolineVersion); } - // No trampoline found, which is OK (not all binaries are rewritten) - return Ok(()); + // No trampoline found. When using the syscall rewriter backend + // (syscall_entry_point != 0), all binaries must be patched. + return Err(ElfParseError::UnpatchedBinary); } let (file_offset, vaddr, trampoline_size) = if cfg!(target_pointer_width = "64") { @@ -293,9 +303,11 @@ impl ElfParsedFile { ) }; - // Validate trampoline size + // trampoline_size == 0 means the rewriter checked this binary and found + // no syscall instructions. The magic header acts as a "checked" marker so + // the runtime skips eager code-segment patching. No trampoline to map. if trampoline_size == 0 { - return Err(ElfParseError::BadTrampoline); + return Ok(()); } // Verify the file offset is page-aligned (as required by the rewriter) diff --git a/litebox_platform_linux_userland/src/lib.rs b/litebox_platform_linux_userland/src/lib.rs index c3e60a83a..7babef6ca 100644 --- a/litebox_platform_linux_userland/src/lib.rs +++ b/litebox_platform_linux_userland/src/lib.rs @@ -537,6 +537,8 @@ core::arch::global_asm!( " .section .tbss .align 8 +saved_r11: + .quad 0 scratch: .quad 0 host_sp: @@ -651,6 +653,10 @@ syscall_callback: // expectations of `interrupt_signal_handler`. mov BYTE PTR gs:in_guest@tpoff, 0 + // Save guest R11 (syscall call-site address from rewriter trampoline) + // before it is clobbered by the fsbase/gsbase save sequence below. + mov gs:saved_r11@tpoff, r11 + // Restore host fs base. rdfsbase r11 mov gs:guest_fsbase@tpoff, r11 @@ -660,6 +666,25 @@ syscall_callback: // Switch to the top of the guest context. mov r11, rsp mov rsp, fs:guest_context_top@tpoff + jmp .Lsyscall_save_regs + + .globl syscall_callback_redzone +syscall_callback_redzone: + // Same as syscall_callback, but the trampoline has already reserved + // 128 bytes below RSP to protect the SysV red zone. + mov BYTE PTR gs:in_guest@tpoff, 0 + mov gs:saved_r11@tpoff, r11 + rdfsbase r11 + mov gs:guest_fsbase@tpoff, r11 + rdgsbase r11 + wrfsbase r11 + + // The trampoline lowered RSP by 128 bytes with LEA, so recover the + // architectural guest stack pointer before saving pt_regs. + lea r11, [rsp + 128] + mov rsp, fs:guest_context_top@tpoff + +.Lsyscall_save_regs: // TODO: save float and vector registers (xsave or fxsave) // Save caller-saved registers @@ -678,7 +703,7 @@ syscall_callback: push r8 // pt_regs->r8 push r9 // pt_regs->r9 push r10 // pt_regs->r10 - push [rsp + 88] // pt_regs->r11 = rflags + push QWORD PTR gs:saved_r11@tpoff // pt_regs->r11 (syscall call-site from rewriter) push rbx // pt_regs->bx push rbp // pt_regs->bp push r12 // pt_regs->r12 @@ -1967,6 +1992,7 @@ impl litebox::platform::StdioProvider for LinuxUserland { unsafe extern "C" { // Defined in asm blocks above fn syscall_callback() -> isize; + fn syscall_callback_redzone() -> isize; fn exception_callback(); fn interrupt_callback(); fn switch_to_guest_start(); @@ -2047,7 +2073,7 @@ impl ThreadContext<'_> { impl litebox::platform::SystemInfoProvider for LinuxUserland { fn get_syscall_entry_point(&self) -> usize { - syscall_callback as *const () as usize + syscall_callback_redzone as *const () as usize } fn get_vdso_address(&self) -> Option { @@ -2714,7 +2740,9 @@ unsafe fn interrupt_signal_handler( // FUTURE: handle trampoline code, too. This is somewhat less important // because it's probably fine for the shim to observe a guest context that // is inside the trampoline. - if ip == syscall_callback as *const () as usize { + if ip == syscall_callback as *const () as usize + || ip == syscall_callback_redzone as *const () as usize + { // No need to clear `in_guest` or set interrupt; the syscall handler will // clear `in_guest` and call into the shim. return; diff --git a/litebox_shim_linux/src/loader/elf.rs b/litebox_shim_linux/src/loader/elf.rs index 0d62030a8..63a9a5d1e 100644 --- a/litebox_shim_linux/src/loader/elf.rs +++ b/litebox_shim_linux/src/loader/elf.rs @@ -10,12 +10,12 @@ use litebox::{ platform::{RawConstPointer as _, SystemInfoProvider as _}, utils::{ReinterpretSignedExt, TruncateExt}, }; -use litebox_common_linux::{MapFlags, errno::Errno, loader::ElfParsedFile}; +use litebox_common_linux::{errno::Errno, loader::ElfParsedFile, MapFlags}; use thiserror::Error; use crate::{ - MutPtr, loader::auxv::{AuxKey, AuxVec}, + MutPtr, }; use super::stack::UserStack; @@ -172,7 +172,10 @@ impl<'a, FS: ShimFS> FileAndParsed<'a, FS> { let file = ElfFile::new(task, path).map_err(ElfLoaderError::OpenError)?; let mut parsed = litebox_common_linux::loader::ElfParsedFile::parse(&mut &file) .map_err(ElfLoaderError::ParseError)?; - parsed.parse_trampoline(&mut &file, task.global.platform.get_syscall_entry_point())?; + match parsed.parse_trampoline(&mut &file, task.global.platform.get_syscall_entry_point()) { + Ok(()) | Err(litebox_common_linux::loader::ElfParseError::UnpatchedBinary) => {} + Err(err) => return Err(ElfLoaderError::ParseError(err)), + } Ok(Self { file, parsed }) } } diff --git a/litebox_syscall_rewriter/src/lib.rs b/litebox_syscall_rewriter/src/lib.rs index f6de6503e..780d6409f 100644 --- a/litebox_syscall_rewriter/src/lib.rs +++ b/litebox_syscall_rewriter/src/lib.rs @@ -201,6 +201,7 @@ pub fn hook_syscalls_in_elf( } // Patch syscalls in-place in buf + let mut syscall_insns_found = false; for s in &text_sections { let section_data = section_slice_mut(buf, s)?; match hook_syscalls_in_section( @@ -214,11 +215,42 @@ pub fn hook_syscalls_in_elf( &mut trampoline_data, skipped_addrs, ) { - Ok(()) | Err(Error::NoSyscallInstructionsFound) => {} + Ok(()) => { + syscall_insns_found = true; + } + Err(Error::NoSyscallInstructionsFound) => {} Err(e) => return Err(e), } } + if !syscall_insns_found { + // No syscall instructions found. Append a header-only marker so the + // loader can distinguish "checked by rewriter, nothing to patch" from + // "never processed." The trampoline_size=0 sentinel tells the loader + // to skip trampoline mapping entirely. + // Use the original input (not `buf`) to avoid emitting the phdr + // alignment fixup that was only needed for the `object` crate parser. + let mut out = input_binary.to_vec(); + if arch == Arch::X86_64 { + let header = TrampolineHeader64 { + magic: *TRAMPOLINE_MAGIC, + file_offset: 0, + vaddr: 0, + trampoline_size: 0, + }; + out.extend_from_slice(header.as_bytes()); + } else { + let header = TrampolineHeader32 { + magic: *TRAMPOLINE_MAGIC, + file_offset: 0, + vaddr: 0, + trampoline_size: 0, + }; + out.extend_from_slice(header.as_bytes()); + } + return Ok(out); + } + // Patch fork → vfork: overwrite the first bytes of __libc_fork with a // JMP to __libc_vfork. This prevents glibc's fork wrapper from running // post-fork handlers that corrupt shared state under vfork semantics. @@ -228,11 +260,6 @@ pub fn hook_syscalls_in_elf( if off + 5 <= buf.len() { buf[off] = 0xE9; // JMP rel32 buf[off + 1..off + 5].copy_from_slice(&rel32.to_le_bytes()); - } else { - return Err(Error::ParseError(format!( - "fork→vfork patch offset {off:#x} + 5 exceeds buffer length {}", - buf.len() - ))); } } @@ -453,18 +480,90 @@ fn hook_syscalls_in_section( let replace_start = replace_start.unwrap(); let replace_len = usize::try_from(replace_end - replace_start).unwrap(); + let copied_presyscall_insts_have_ip_rel_mem = arch == Arch::X86_64 + && instruction_slice_has_ip_rel_memory_operand( + instructions + .iter() + .take(i) + .skip_while(|prev_inst| prev_inst.ip() < replace_start), + ); + let target_addr = trampoline_base_addr + trampoline_data.len() as u64; - // Copy the original instructions to the trampoline + // Copy the pre-syscall instructions to the trampoline. + // When any instruction has a RIP-relative memory operand, we + // re-encode them so the displacement targets the same absolute + // address from the new trampoline location. if replace_start < inst.ip() { - trampoline_data.extend_from_slice( - §ion_data[usize::try_from(replace_start - section_base_addr).unwrap() - ..usize::try_from(inst.ip() - section_base_addr).unwrap()], - ); + if copied_presyscall_insts_have_ip_rel_mem { + let mut reencoded = Vec::new(); + let mut ok = true; + let mut encoder = iced_x86::Encoder::new(64); + for pre_inst in instructions + .iter() + .take(i) + .skip_while(|p| p.ip() < replace_start) + { + let tramp_ip = target_addr + reencoded.len() as u64; + if encoder.encode(pre_inst, tramp_ip).is_err() { + ok = false; + break; + } + let bytes = encoder.take_buffer(); + if bytes.len() != pre_inst.len() { + ok = false; + break; + } + reencoded.extend_from_slice(&bytes); + } + if !ok { + match hook_syscall_and_after( + arch, + control_transfer_targets, + section_base_addr, + section_data, + trampoline_base_addr, + syscall_entry_addr, + trampoline_data, + &instructions, + i, + ) { + Ok(()) => {} + Err(Error::InsufficientBytesBeforeOrAfter(_)) => { + replace_with_ud2(section_data, section_base_addr, inst); + skipped_addrs.push(inst.ip()); + } + Err(e) => return Err(e), + } + continue; + } + trampoline_data.extend_from_slice(&reencoded); + } else { + trampoline_data.extend_from_slice( + §ion_data[usize::try_from(replace_start - section_base_addr).unwrap() + ..usize::try_from(inst.ip() - section_base_addr).unwrap()], + ); + } } let return_addr = inst.next_ip(); if arch == Arch::X86_64 { + // Reserve the SysV red zone before entering the shim so async + // guest signal delivery / interrupt handling cannot clobber + // stack locals parked below the architectural RSP. + // LEA RSP, [RSP - 0x80] = 48 8D 64 24 80 + trampoline_data.extend_from_slice(&[0x48, 0x8D, 0x64, 0x24, 0x80]); + + // Put the address of the original JMP (call-site) into R11 so + // that SA_RESTART can rewind ctx.rip to re-enter the trampoline. + // The real `syscall` instruction clobbers R11 with RFLAGS, so + // this register is free from the guest's perspective. + // LEA R11, [RIP + disp32] = 4C 8D 1D + let r11_disp = i64::try_from(replace_start).unwrap() + - i64::try_from(trampoline_base_addr + trampoline_data.len() as u64 + 7).unwrap(); + trampoline_data.extend_from_slice(&[0x4C, 0x8D, 0x1D]); // LEA R11, [RIP + disp32] + trampoline_data.extend_from_slice(&(i32::try_from(r11_disp).unwrap().to_le_bytes())); + // Put jump back location into rcx. let jmp_back_offset = i64::try_from(return_addr).unwrap() - i64::try_from(trampoline_base_addr + trampoline_data.len() as u64 + 7).unwrap(); @@ -569,8 +668,8 @@ fn fixup_phdr_alignment(buf: &mut [u8]) { let new_start = old_start + padding; let new_end = new_start + usize::try_from(phdr_size).expect("phdr_size must fit in usize"); - if old_end > buf.len() || new_end > buf.len() { - return; // corrupt phdr table or not enough room + if new_end > buf.len() { + return; // not enough room } // Move the phdr table forward (use copy_within since src and dst overlap). @@ -657,10 +756,12 @@ fn find_fork_vfork_patch( Some((fork_file_offset, rel32)) } -/// Check if the input binary has the Bun footer marker at the end. +/// Check if the input binary has the Bun footer marker near the end. fn has_bun_footer_marker(input_binary: &[u8]) -> bool { - input_binary.len() >= BUN_FOOTER_MARKER.len() - && input_binary[input_binary.len() - BUN_FOOTER_MARKER.len()..] == *BUN_FOOTER_MARKER + let window_len = input_binary.len().min(256); + input_binary[input_binary.len().saturating_sub(window_len)..] + .windows(BUN_FOOTER_MARKER.len()) + .any(|window| window == BUN_FOOTER_MARKER) } /// Replace an unpatchable syscall instruction with `UD2` (`0F 0B`) so that @@ -946,10 +1047,44 @@ fn hook_syscall_and_after( } let replace_end = replace_end.unwrap(); + let copied_postsyscall_insts_have_ip_rel_mem = arch == Arch::X86_64 + && instruction_slice_has_ip_rel_memory_operand( + instructions + .iter() + .skip(inst_index + 1) + .take_while(|next_inst| next_inst.ip() < replace_end), + ); + if copied_postsyscall_insts_have_ip_rel_mem { + return hook_syscall_before_and_after( + arch, + control_transfer_targets, + section_base_addr, + section_data, + trampoline_base_addr, + syscall_entry_addr, + trampoline_data, + instructions, + inst_index, + ); + } let target_addr = trampoline_base_addr + trampoline_data.len() as u64; if arch == Arch::X86_64 { + // Reserve the SysV red zone before entering the shim so async guest + // signal delivery / interrupt handling cannot clobber stack locals + // parked below the architectural RSP. + // LEA RSP, [RSP - 0x80] = 48 8D 64 24 80 + trampoline_data.extend_from_slice(&[0x48, 0x8D, 0x64, 0x24, 0x80]); + + // Put the address of the original JMP (call-site) into R11 so + // that SA_RESTART can rewind ctx.rip to re-enter the trampoline. + // LEA R11, [RIP + disp32] = 4C 8D 1D + let r11_disp = i64::try_from(replace_start).unwrap() + - i64::try_from(trampoline_base_addr + trampoline_data.len() as u64 + 7).unwrap(); + trampoline_data.extend_from_slice(&[0x4C, 0x8D, 0x1D]); // LEA R11, [RIP + disp32] + trampoline_data.extend_from_slice(&(i32::try_from(r11_disp).unwrap().to_le_bytes())); + // Put jump back location into rcx, via lea rcx, [next instruction] trampoline_data.extend_from_slice(&[0x48, 0x8D, 0x0D]); // LEA RCX, [RIP + disp32] trampoline_data.extend_from_slice(&6u32.to_le_bytes()); @@ -1013,6 +1148,14 @@ fn hook_syscall_and_after( Ok(()) } +fn instruction_slice_has_ip_rel_memory_operand<'a>( + instructions: impl IntoIterator, +) -> bool { + instructions + .into_iter() + .any(iced_x86::Instruction::is_ip_rel_memory_operand) +} + #[allow(clippy::too_many_arguments)] fn hook_syscall_before_and_after( arch: Arch, @@ -1136,3 +1279,94 @@ fn hook_syscall_before_and_after( Ok(()) } + +#[cfg(test)] +mod tests { + use super::{BUN_FOOTER_MARKER, has_bun_footer_marker, patch_code_segment}; + + #[test] + fn detects_bun_footer_marker_near_end() { + let mut bytes = vec![0u8; 512]; + let offset = bytes.len() - BUN_FOOTER_MARKER.len() - 8; + bytes[offset..offset + BUN_FOOTER_MARKER.len()].copy_from_slice(BUN_FOOTER_MARKER); + assert!(has_bun_footer_marker(&bytes)); + } + + #[test] + fn ignores_missing_bun_footer_marker() { + let bytes = vec![0u8; 512]; + assert!(!has_bun_footer_marker(&bytes)); + } + + #[test] + fn patch_code_segment_relocates_rip_relative_presyscall_to_trampoline() { + let mut code = vec![ + 0x48, 0x8D, 0x35, 0x10, 0x00, 0x00, 0x00, // lea rsi, [rip + 0x10] @ 0x1000 + 0x0F, 0x05, // syscall @ 0x1007 + 0x31, 0xC0, // xor eax, eax + 0xBA, 0x01, 0x00, 0x00, 0x00, // mov edx, 1 + ]; + + let trampoline = patch_code_segment(&mut code, 0x1000, 0x8000, 0x9000, &mut Vec::new()) + .expect("patch_code_segment should succeed"); + + assert!(!trampoline.is_empty()); + // The lea + syscall region (9 bytes starting at 0x1000) should now be a + // JMP to the trampoline followed by NOPs. + assert_eq!(code[0], 0xE9, "replace region should start with JMP rel32"); + // The trampoline should contain the re-encoded lea with an adjusted + // RIP-relative displacement targeting the same absolute address. + // Original: lea targets 0x1007 + 0x10 = 0x1017. + // Re-encoded at 0x8000: displacement = 0x1017 - (0x8000 + 7) = -0x6FF0 = 0xFFFF9010 + #[allow(clippy::cast_possible_truncation)] + let expected_disp: i32 = 0x1017_i64.wrapping_sub(0x8000 + 7) as i32; + assert_eq!( + &trampoline[3..7], + &expected_disp.to_le_bytes(), + "re-encoded lea displacement should target the original address" + ); + } + + #[test] + fn patch_code_segment_handles_rip_relative_on_both_sides_of_syscall() { + let mut code = vec![ + 0x48, 0x8D, 0x35, 0x10, 0x00, 0x00, 0x00, // lea rsi, [rip + 0x10] @ 0x1000 + 0x0F, 0x05, // syscall @ 0x1007 + 0x48, 0x8D, 0x3D, 0x10, 0x00, 0x00, 0x00, // lea rdi, [rip + 0x10] + ]; + + let mut skipped = Vec::new(); + let stubs = patch_code_segment(&mut code, 0x1000, 0x8000, 0x9000, &mut skipped) + .expect("patch_code_segment should succeed"); + // The pre-syscall lea is re-encoded in the trampoline; the + // post-syscall lea stays in place (not overwritten). + assert!(!stubs.is_empty(), "should be patched via re-encoding"); + assert_eq!(code[0], 0xE9, "replace region should start with JMP"); + assert!(skipped.is_empty(), "nothing should be skipped"); + } + + #[test] + fn patch_code_segment_patches_all_syscalls_including_rip_relative() { + let mut code = vec![ + // First syscall: patchable (3 nops before = 5 bytes total with syscall) + 0x90, 0x90, 0x90, // nop; nop; nop + 0x0F, 0x05, // syscall @ offset 3 + 0xC3, // ret + // Second syscall: RIP-relative before, now patchable via re-encoding + 0x48, 0x8D, 0x35, 0x10, 0x00, 0x00, 0x00, // lea rsi, [rip+0x10] + 0x0F, 0x05, // syscall @ offset 13 + 0x48, 0x8D, 0x3D, 0x10, 0x00, 0x00, 0x00, // lea rdi, [rip+0x10] + ]; + + let mut skipped = Vec::new(); + let stubs = patch_code_segment(&mut code, 0x1000, 0x8000, 0x9000, &mut skipped).unwrap(); + + assert!(!stubs.is_empty(), "both syscalls should be patched"); + assert_eq!(code[0], 0xE9, "first syscall site should be a JMP"); + assert_eq!( + code[6], 0xE9, + "second syscall site (lea start) should be a JMP" + ); + assert!(skipped.is_empty(), "nothing should be skipped"); + } +} diff --git a/litebox_syscall_rewriter/tests/snapshots/snapshot_tests__hello-diff.snap b/litebox_syscall_rewriter/tests/snapshots/snapshot_tests__hello-diff.snap index 9f933eb4d..aebaab30d 100644 --- a/litebox_syscall_rewriter/tests/snapshots/snapshot_tests__hello-diff.snap +++ b/litebox_syscall_rewriter/tests/snapshots/snapshot_tests__hello-diff.snap @@ -24,7 +24,7 @@ expression: diff - 401e78: 31 ff xor %edi,%edi - 401e7a: 89 d0 mov %edx,%eax - 401e7c: 0f 05 syscall -+ 401e78: ++ 401e78: + 401e7d: 90 nop 401e7e: eb f8 jmp 401e78 <__libc_start_call_main+0x88> 401e80: 31 c0 xor %eax,%eax @@ -35,7 +35,7 @@ expression: diff 403ee0: bf 01 50 00 00 mov $0x5001,%edi - 403ee5: b8 9e 00 00 00 mov $0x9e,%eax - 403eea: 0f 05 syscall -+ 403ee5: ++ 403ee5: + 403eea: 90 nop + 403eeb: 90 nop 403eec: 44 89 ef mov %r13d,%edi @@ -47,7 +47,7 @@ expression: diff 4043ce: 48 89 36 mov %rsi,(%rsi) - 4043d1: 48 89 76 10 mov %rsi,0x10(%rsi) - 4043d5: 0f 05 syscall -+ 4043d1: ++ 4043d1: + 4043d6: 90 nop 4043d7: 85 c0 test %eax,%eax 4043d9: 74 24 je 4043ff <__libc_setup_tls+0x1df> @@ -56,7 +56,7 @@ expression: diff 4043e5: b8 01 00 00 00 mov $0x1,%eax - 4043ea: 48 8d 35 c7 d1 07 00 lea 0x7d1c7(%rip),%rsi # 4815b8 - 4043f1: 0f 05 syscall -+ 4043ea: ++ 4043ea: + 4043ef: 90 nop + 4043f0: 90 nop + 4043f1: 90 nop @@ -64,7 +64,7 @@ expression: diff 4043f3: bf 7f 00 00 00 mov $0x7f,%edi - 4043f8: b8 e7 00 00 00 mov $0xe7,%eax - 4043fd: 0f 05 syscall -+ 4043f8: ++ 4043f8: + 4043fd: 90 nop + 4043fe: 90 nop 4043ff: e8 dc ba 01 00 call 41fee0 <__tls_init_tp> @@ -76,7 +76,7 @@ expression: diff 4044ba: b8 01 00 00 00 mov $0x1,%eax - 4044bf: 48 8d 35 f2 d0 07 00 lea 0x7d0f2(%rip),%rsi # 4815b8 - 4044c6: 0f 05 syscall -+ 4044bf: ++ 4044bf: + 4044c4: 90 nop + 4044c5: 90 nop + 4044c6: 90 nop @@ -84,7 +84,7 @@ expression: diff 4044c8: bf 7f 00 00 00 mov $0x7f,%edi - 4044cd: b8 e7 00 00 00 mov $0xe7,%eax - 4044d2: 0f 05 syscall -+ 4044cd: ++ 4044cd: + 4044d2: 90 nop + 4044d3: 90 nop 4044d4: e9 70 fe ff ff jmp 404349 <__libc_setup_tls+0x129> @@ -96,7 +96,7 @@ expression: diff 40a3e7: bf 02 00 00 00 mov $0x2,%edi - 40a3ec: 44 89 c8 mov %r9d,%eax - 40a3ef: 0f 05 syscall -+ 40a3ec: ++ 40a3ec: 40a3f1: 48 83 f8 fc cmp $0xfffffffffffffffc,%rax 40a3f5: 74 e9 je 40a3e0 <__libc_message_impl+0x150> 40a3f7: 45 31 c9 xor %r9d,%r9d @@ -106,7 +106,7 @@ expression: diff 40a5cf: be 80 00 00 00 mov $0x80,%esi - 40a5d4: b8 ca 00 00 00 mov $0xca,%eax - 40a5d9: 0f 05 syscall -+ 40a5d4: ++ 40a5d4: + 40a5d9: 90 nop + 40a5da: 90 nop 40a5db: 48 3d 00 f0 ff ff cmp $0xfffffffffffff000,%rax @@ -118,7 +118,7 @@ expression: diff 40a635: b8 ca 00 00 00 mov $0xca,%eax - 40a63a: 40 80 f6 80 xor $0x80,%sil - 40a63e: 0f 05 syscall -+ 40a63a: ++ 40a63a: + 40a63f: 90 nop 40a640: 48 3d 00 f0 ff ff cmp $0xfffffffffffff000,%rax 40a646: 76 d6 jbe 40a61e <__lll_lock_wait+0xe> @@ -129,7 +129,7 @@ expression: diff 40a67c: be 81 00 00 00 mov $0x81,%esi - 40a681: b8 ca 00 00 00 mov $0xca,%eax - 40a686: 0f 05 syscall -+ 40a681: ++ 40a681: + 40a686: 90 nop + 40a687: 90 nop 40a688: c3 ret @@ -141,7 +141,7 @@ expression: diff 40a69b: ba 01 00 00 00 mov $0x1,%edx - 40a6a0: b8 ca 00 00 00 mov $0xca,%eax - 40a6a5: 0f 05 syscall -+ 40a6a0: ++ 40a6a0: + 40a6a5: 90 nop + 40a6a6: 90 nop 40a6a7: c3 ret @@ -153,7 +153,7 @@ expression: diff 40bbdb: c6 05 3e 4c 0a 00 01 movb $0x1,0xa4c3e(%rip) # 4b0820 <__malloc_initialized> - 40bbe2: b8 3e 01 00 00 mov $0x13e,%eax - 40bbe7: 0f 05 syscall -+ 40bbe2: ++ 40bbe2: + 40bbe7: 90 nop + 40bbe8: 90 nop 40bbe9: 48 8d 5d d0 lea -0x30(%rbp),%rbx @@ -165,7 +165,7 @@ expression: diff 4181de: 66 90 xchg %ax,%ax - 4181e0: b8 e4 00 00 00 mov $0xe4,%eax - 4181e5: 0f 05 syscall -+ 4181e0: ++ 4181e0: + 4181e5: 90 nop + 4181e6: 90 nop 4181e7: 85 c0 test %eax,%eax @@ -177,7 +177,7 @@ expression: diff 418249: 89 d0 mov %edx,%eax - 41824b: 0f 05 syscall - 41824d: 48 3d 00 f0 ff ff cmp $0xfffffffffffff000,%rax -+ 41824b: ++ 41824b: + 418250: 90 nop + 418251: 90 nop + 418252: 90 nop @@ -190,7 +190,7 @@ expression: diff 418260: f3 0f 1e fa endbr64 - 418264: b8 05 00 00 00 mov $0x5,%eax - 418269: 0f 05 syscall -+ 418264: ++ 418264: + 418269: 90 nop + 41826a: 90 nop 41826b: 48 3d 00 f0 ff ff cmp $0xfffffffffffff000,%rax @@ -202,7 +202,7 @@ expression: diff 418290: f3 0f 1e fa endbr64 - 418294: b8 03 00 00 00 mov $0x3,%eax - 418299: 0f 05 syscall -+ 418294: ++ 418294: + 418299: 90 nop + 41829a: 90 nop 41829b: 48 3d 00 f0 ff ff cmp $0xfffffffffffff000,%rax @@ -214,7 +214,7 @@ expression: diff 4182f9: 74 25 je 418320 <__fcntl64_nocancel+0x60> - 4182fb: b8 48 00 00 00 mov $0x48,%eax - 418300: 0f 05 syscall -+ 4182fb: ++ 4182fb: + 418300: 90 nop + 418301: 90 nop 418302: 48 3d 00 f0 ff ff cmp $0xfffffffffffff000,%rax @@ -226,7 +226,7 @@ expression: diff 418324: be 10 00 00 00 mov $0x10,%esi - 418329: b8 48 00 00 00 mov $0x48,%eax - 41832e: 0f 05 syscall -+ 418329: ++ 418329: + 41832e: 90 nop + 41832f: 90 nop 418330: 3d 00 f0 ff ff cmp $0xfffff000,%eax @@ -238,7 +238,7 @@ expression: diff 41837e: 74 20 je 4183a0 <__fcntl64_nocancel_adjusted+0x40> - 418380: b8 48 00 00 00 mov $0x48,%eax - 418385: 0f 05 syscall -+ 418380: ++ 418380: + 418385: 90 nop + 418386: 90 nop 418387: 48 3d 00 f0 ff ff cmp $0xfffffffffffff000,%rax @@ -250,7 +250,7 @@ expression: diff 4183a4: be 10 00 00 00 mov $0x10,%esi - 4183a9: b8 48 00 00 00 mov $0x48,%eax - 4183ae: 0f 05 syscall -+ 4183a9: ++ 4183a9: + 4183ae: 90 nop + 4183af: 90 nop 4183b0: 3d 00 f0 ff ff cmp $0xfffff000,%eax @@ -262,7 +262,7 @@ expression: diff 41841a: 48 89 fe mov %rdi,%rsi - 41841d: bf 9c ff ff ff mov $0xffffff9c,%edi - 418422: 0f 05 syscall -+ 41841d: ++ 41841d: + 418422: 90 nop + 418423: 90 nop 418424: 48 3d 00 f0 ff ff cmp $0xfffffffffffff000,%rax @@ -275,7 +275,7 @@ expression: diff - 418480: f3 0f 1e fa endbr64 - 418484: 31 c0 xor %eax,%eax - 418486: 0f 05 syscall -+ 418480: ++ 418480: + 418485: 90 nop + 418486: 90 nop + 418487: 90 nop @@ -288,7 +288,7 @@ expression: diff 4184b0: f3 0f 1e fa endbr64 - 4184b4: b8 0c 00 00 00 mov $0xc,%eax - 4184b9: 0f 05 syscall -+ 4184b4: ++ 4184b4: + 4184b9: 90 nop + 4184ba: 90 nop 4184bb: 48 89 05 96 83 09 00 mov %rax,0x98396(%rip) # 4b0858 <__curbrk> @@ -300,7 +300,7 @@ expression: diff 41871a: 48 8d 95 f0 ef ff ff lea -0x1010(%rbp),%rdx - 418721: b8 cc 00 00 00 mov $0xcc,%eax - 418726: 0f 05 syscall -+ 418721: ++ 418721: + 418726: 90 nop + 418727: 90 nop 418728: 85 c0 test %eax,%eax @@ -312,7 +312,7 @@ expression: diff 418b40: f3 0f 1e fa endbr64 - 418b44: b8 1c 00 00 00 mov $0x1c,%eax - 418b49: 0f 05 syscall -+ 418b44: ++ 418b44: + 418b49: 90 nop + 418b4a: 90 nop 418b4b: 48 3d 01 f0 ff ff cmp $0xfffffffffffff001,%rax @@ -324,7 +324,7 @@ expression: diff 418b92: 48 89 df mov %rbx,%rdi - 418b95: b8 09 00 00 00 mov $0x9,%eax - 418b9a: 0f 05 syscall -+ 418b95: ++ 418b95: + 418b9a: 90 nop + 418b9b: 90 nop 418b9c: 48 3d 00 f0 ff ff cmp $0xfffffffffffff000,%rax @@ -336,7 +336,7 @@ expression: diff 418bed: b8 09 00 00 00 mov $0x9,%eax - 418bf2: 41 83 ca 40 or $0x40,%r10d - 418bf6: 0f 05 syscall -+ 418bf2: ++ 418bf2: + 418bf7: 90 nop 418bf8: 48 3d 00 f0 ff ff cmp $0xfffffffffffff000,%rax 418bfe: 76 a4 jbe 418ba4 <__mmap64+0x34> @@ -347,7 +347,7 @@ expression: diff 418c30: f3 0f 1e fa endbr64 - 418c34: b8 0a 00 00 00 mov $0xa,%eax - 418c39: 0f 05 syscall -+ 418c34: ++ 418c34: + 418c39: 90 nop + 418c3a: 90 nop 418c3b: 48 3d 01 f0 ff ff cmp $0xfffffffffffff001,%rax @@ -359,7 +359,7 @@ expression: diff 418c60: f3 0f 1e fa endbr64 - 418c64: b8 0b 00 00 00 mov $0xb,%eax - 418c69: 0f 05 syscall -+ 418c64: ++ 418c64: + 418c69: 90 nop + 418c6a: 90 nop 418c6b: 48 3d 01 f0 ff ff cmp $0xfffffffffffff001,%rax @@ -371,7 +371,7 @@ expression: diff 418d47: 45 31 c0 xor %r8d,%r8d - 418d4a: b8 19 00 00 00 mov $0x19,%eax - 418d4f: 0f 05 syscall -+ 418d4a: ++ 418d4a: + 418d4f: 90 nop + 418d50: 90 nop 418d51: 48 3d 00 f0 ff ff cmp $0xfffffffffffff000,%rax @@ -383,7 +383,7 @@ expression: diff 418e23: bf 41 4d 56 53 mov $0x53564d41,%edi - 418e28: b8 9d 00 00 00 mov $0x9d,%eax - 418e2d: 0f 05 syscall -+ 418e28: ++ 418e28: + 418e2d: 90 nop + 418e2e: 90 nop 418e2f: 83 f8 ea cmp $0xffffffea,%eax @@ -395,7 +395,7 @@ expression: diff 418e50: f3 0f 1e fa endbr64 - 418e54: b8 63 00 00 00 mov $0x63,%eax - 418e59: 0f 05 syscall -+ 418e54: ++ 418e54: + 418e59: 90 nop + 418e5a: 90 nop 418e5b: 48 3d 01 f0 ff ff cmp $0xfffffffffffff001,%rax @@ -407,7 +407,7 @@ expression: diff 41e494: 48 8d 9d e0 ef ff ff lea -0x1020(%rbp),%rbx - 41e49b: 48 89 da mov %rbx,%rdx - 41e49e: 0f 05 syscall -+ 41e49b: ++ 41e49b: 41e4a0: 85 c0 test %eax,%eax 41e4a2: 7e 5c jle 41e500 <_dl_get_origin+0xa0> 41e4a4: 0f b6 95 e0 ef ff ff movzbl -0x1020(%rbp),%edx @@ -417,7 +417,7 @@ expression: diff 41e6db: 48 8d b5 d0 f6 ff ff lea -0x930(%rbp),%rsi - 41e6e2: b8 14 00 00 00 mov $0x14,%eax - 41e6e7: 0f 05 syscall -+ 41e6e2: ++ 41e6e2: + 41e6e7: 90 nop + 41e6e8: 90 nop 41e6e9: 48 81 c4 38 09 00 00 add $0x938,%rsp @@ -429,7 +429,7 @@ expression: diff 41ff24: 48 8d bb d0 02 00 00 lea 0x2d0(%rbx),%rdi - 41ff2b: b8 da 00 00 00 mov $0xda,%eax - 41ff30: 0f 05 syscall -+ 41ff2b: ++ 41ff2b: + 41ff30: 90 nop + 41ff31: 90 nop 41ff32: 89 83 d0 02 00 00 mov %eax,0x2d0(%rbx) @@ -441,7 +441,7 @@ expression: diff 41ff81: 66 0f 6c c0 punpcklqdq %xmm0,%xmm0 - 41ff85: 0f 11 83 d8 02 00 00 movups %xmm0,0x2d8(%rbx) - 41ff8c: 0f 05 syscall -+ 41ff85: ++ 41ff85: + 41ff8a: 90 nop + 41ff8b: 90 nop + 41ff8c: 90 nop @@ -455,7 +455,7 @@ expression: diff 41fff4: 48 89 df mov %rbx,%rdi - 41fff7: b8 4e 01 00 00 mov $0x14e,%eax - 41fffc: 0f 05 syscall -+ 41fff7: ++ 41fff7: + 41fffc: 90 nop + 41fffd: 90 nop 41fffe: 3d 00 f0 ff ff cmp $0xfffff000,%eax @@ -467,7 +467,7 @@ expression: diff 421344: bf 02 50 00 00 mov $0x5002,%edi - 421349: b8 9e 00 00 00 mov $0x9e,%eax - 42134e: 0f 05 syscall -+ 421349: ++ 421349: + 42134e: 90 nop + 42134f: 90 nop 421350: 89 c7 mov %eax,%edi @@ -479,7 +479,7 @@ expression: diff 4213a5: 48 89 e5 mov %rsp,%rbp - 4213a8: 48 8d 75 f8 lea -0x8(%rbp),%rsi - 4213ac: 0f 05 syscall -+ 4213a8: ++ 4213a8: + 4213ad: 90 nop 4213ae: 48 85 c0 test %rax,%rax 4213b1: 74 15 je 4213c8 <_dl_cet_setup_features+0x38> @@ -491,7 +491,7 @@ expression: diff - 4213f7: bf 03 50 00 00 mov $0x5003,%edi - 4213fc: 89 d0 mov %edx,%eax - 4213fe: 0f 05 syscall -+ 4213f7: ++ 4213f7: + 4213fc: 90 nop + 4213fd: 90 nop + 4213fe: 90 nop @@ -506,13 +506,13 @@ expression: diff - 421455: 31 ff xor %edi,%edi - 421457: 89 f0 mov %esi,%eax - 421459: 0f 05 syscall -+ 421455: ++ 421455: + 42145a: 90 nop 42145b: 48 89 c2 mov %rax,%rdx - 42145e: 48 8d 3c 18 lea (%rax,%rbx,1),%rdi - 421462: 89 f0 mov %esi,%eax - 421464: 0f 05 syscall -+ 42145e: ++ 42145e: + 421463: 90 nop + 421464: 90 nop + 421465: 90 nop @@ -525,7 +525,7 @@ expression: diff 421481: 48 89 de mov %rbx,%rsi - 421484: b8 09 00 00 00 mov $0x9,%eax - 421489: 0f 05 syscall -+ 421484: ++ 421484: + 421489: 90 nop + 42148a: 90 nop 42148b: 31 d2 xor %edx,%edx @@ -537,7 +537,7 @@ expression: diff 444c16: 48 8d 35 b3 0a 04 00 lea 0x40ab3(%rip),%rsi # 4856d0 - 444c1d: b8 0e 00 00 00 mov $0xe,%eax - 444c22: 0f 05 syscall -+ 444c1d: ++ 444c1d: + 444c22: 90 nop + 444c23: 90 nop 444c24: 31 c0 xor %eax,%eax @@ -549,7 +549,7 @@ expression: diff 444c63: bf 02 00 00 00 mov $0x2,%edi - 444c68: b8 0e 00 00 00 mov $0xe,%eax - 444c6d: 0f 05 syscall -+ 444c68: ++ 444c68: + 444c6d: 90 nop + 444c6e: 90 nop 444c6f: 48 8b 45 d8 mov -0x28(%rbp),%rax @@ -561,7 +561,7 @@ expression: diff 444ca8: 89 de mov %ebx,%esi - 444caa: b8 ea 00 00 00 mov $0xea,%eax - 444caf: 0f 05 syscall -+ 444caa: ++ 444caa: + 444caf: 90 nop + 444cb0: 90 nop 444cb1: 3d 00 f0 ff ff cmp $0xfffff000,%eax @@ -572,7 +572,7 @@ expression: diff 444cbe: 66 90 xchg %ax,%ax - 444cc0: b8 ba 00 00 00 mov $0xba,%eax - 444cc5: 0f 05 syscall -+ 444cc0: ++ 444cc0: + 444cc5: 90 nop + 444cc6: 90 nop 444cc7: 89 c3 mov %eax,%ebx @@ -582,7 +582,7 @@ expression: diff 444cd3: 89 c7 mov %eax,%edi - 444cd5: b8 ea 00 00 00 mov $0xea,%eax - 444cda: 0f 05 syscall -+ 444cd5: ++ 444cd5: + 444cda: 90 nop + 444cdb: 90 nop 444cdc: 89 c3 mov %eax,%ebx @@ -594,7 +594,7 @@ expression: diff 444d78: 4c 89 fa mov %r15,%rdx - 444d7b: 48 8d 35 4e 09 04 00 lea 0x4094e(%rip),%rsi # 4856d0 - 444d82: 0f 05 syscall -+ 444d7b: ++ 444d7b: + 444d80: 90 nop + 444d81: 90 nop + 444d82: 90 nop @@ -608,7 +608,7 @@ expression: diff 444dc4: bf 02 00 00 00 mov $0x2,%edi - 444dc9: b8 0e 00 00 00 mov $0xe,%eax - 444dce: 0f 05 syscall -+ 444dc9: ++ 444dc9: + 444dce: 90 nop + 444dcf: 90 nop 444dd0: 48 8b 45 c8 mov -0x38(%rbp),%rax @@ -620,7 +620,7 @@ expression: diff 444e08: 89 de mov %ebx,%esi - 444e0a: b8 ea 00 00 00 mov $0xea,%eax - 444e0f: 0f 05 syscall -+ 444e0a: ++ 444e0a: + 444e0f: 90 nop + 444e10: 90 nop 444e11: 3d 00 f0 ff ff cmp $0xfffff000,%eax @@ -630,7 +630,7 @@ expression: diff 444e1e: eb 8a jmp 444daa <__pthread_kill+0x8a> - 444e20: b8 ba 00 00 00 mov $0xba,%eax - 444e25: 0f 05 syscall -+ 444e20: ++ 444e20: + 444e25: 90 nop + 444e26: 90 nop 444e27: 89 c3 mov %eax,%ebx @@ -640,7 +640,7 @@ expression: diff 444e33: 89 c7 mov %eax,%edi - 444e35: b8 ea 00 00 00 mov $0xea,%eax - 444e3a: 0f 05 syscall -+ 444e35: ++ 444e35: + 444e3a: 90 nop + 444e3b: 90 nop 444e3c: 41 89 c6 mov %eax,%r14d @@ -652,7 +652,7 @@ expression: diff 445107: f7 d6 not %esi - 445109: 81 e6 80 00 00 00 and $0x80,%esi - 44510f: 0f 05 syscall -+ 445109: ++ 445109: + 44510e: 90 nop + 44510f: 90 nop + 445110: 90 nop @@ -665,7 +665,7 @@ expression: diff 4452e4: 48 89 df mov %rbx,%rdi - 4452e7: b8 ca 00 00 00 mov $0xca,%eax - 4452ec: 0f 05 syscall -+ 4452e7: ++ 4452e7: + 4452ec: 90 nop + 4452ed: 90 nop 4452ee: 48 3d 00 f0 ff ff cmp $0xfffffffffffff000,%rax @@ -677,7 +677,7 @@ expression: diff 4454ff: be 07 00 00 00 mov $0x7,%esi - 445504: b8 ca 00 00 00 mov $0xca,%eax - 445509: 0f 05 syscall -+ 445504: ++ 445504: + 445509: 90 nop + 44550a: 90 nop 44550b: 48 3d 00 f0 ff ff cmp $0xfffffffffffff000,%rax @@ -689,7 +689,7 @@ expression: diff 445aa9: 81 e6 80 00 00 00 and $0x80,%esi - 445aaf: 40 80 f6 81 xor $0x81,%sil - 445ab3: 0f 05 syscall -+ 445aaf: ++ 445aaf: + 445ab4: 90 nop 445ab5: 48 3d 00 f0 ff ff cmp $0xfffffffffffff000,%rax 445abb: 0f 87 0e 02 00 00 ja 445ccf <__pthread_mutex_unlock_full+0x3bf> @@ -700,7 +700,7 @@ expression: diff 445cfd: 4c 89 c7 mov %r8,%rdi - 445d00: b8 ca 00 00 00 mov $0xca,%eax - 445d05: 0f 05 syscall -+ 445d00: ++ 445d00: + 445d05: 90 nop + 445d06: 90 nop 445d07: 48 3d 00 f0 ff ff cmp $0xfffffffffffff000,%rax @@ -712,7 +712,7 @@ expression: diff 445d29: 4c 89 c7 mov %r8,%rdi - 445d2c: b8 ca 00 00 00 mov $0xca,%eax - 445d31: 0f 05 syscall -+ 445d2c: ++ 445d2c: + 445d31: 90 nop + 445d32: 90 nop 445d33: 48 3d 00 f0 ff ff cmp $0xfffffffffffff000,%rax @@ -724,7 +724,7 @@ expression: diff 44600f: 48 89 df mov %rbx,%rdi - 446012: b8 ca 00 00 00 mov $0xca,%eax - 446017: 0f 05 syscall -+ 446012: ++ 446012: + 446017: 90 nop + 446018: 90 nop 446019: 48 3d 00 f0 ff ff cmp $0xfffffffffffff000,%rax @@ -736,7 +736,7 @@ expression: diff 4460b0: 48 89 df mov %rbx,%rdi - 4460b3: b8 ca 00 00 00 mov $0xca,%eax - 4460b8: 0f 05 syscall -+ 4460b3: ++ 4460b3: + 4460b8: 90 nop + 4460b9: 90 nop 4460ba: 48 3d 00 f0 ff ff cmp $0xfffffffffffff000,%rax @@ -748,7 +748,7 @@ expression: diff 446142: be 81 00 00 00 mov $0x81,%esi - 446147: b8 ca 00 00 00 mov $0xca,%eax - 44614c: 0f 05 syscall -+ 446147: ++ 446147: + 44614c: 90 nop + 44614d: 90 nop 44614e: 48 3d 00 f0 ff ff cmp $0xfffffffffffff000,%rax @@ -760,7 +760,7 @@ expression: diff 4462e3: c1 e6 07 shl $0x7,%esi - 4462e6: 40 80 f6 81 xor $0x81,%sil - 4462ea: 0f 05 syscall -+ 4462e6: ++ 4462e6: + 4462eb: 90 nop 4462ec: 48 3d 00 f0 ff ff cmp $0xfffffffffffff000,%rax 4462f2: 0f 86 2e ff ff ff jbe 446226 <___pthread_rwlock_rdlock+0x46> @@ -771,7 +771,7 @@ expression: diff 446437: 40 80 f6 81 xor $0x81,%sil - 44643b: b8 ca 00 00 00 mov $0xca,%eax - 446440: 0f 05 syscall -+ 44643b: ++ 44643b: + 446440: 90 nop + 446441: 90 nop 446442: 48 3d 00 f0 ff ff cmp $0xfffffffffffff000,%rax @@ -783,7 +783,7 @@ expression: diff 44648a: b8 ca 00 00 00 mov $0xca,%eax - 44648f: 40 80 f6 81 xor $0x81,%sil - 446493: 0f 05 syscall -+ 44648f: ++ 44648f: + 446494: 90 nop 446495: 48 3d 00 f0 ff ff cmp $0xfffffffffffff000,%rax 44649b: 76 83 jbe 446420 <___pthread_rwlock_unlock+0x50> @@ -794,7 +794,7 @@ expression: diff 446511: 40 80 f6 81 xor $0x81,%sil - 446515: b8 ca 00 00 00 mov $0xca,%eax - 44651a: 0f 05 syscall -+ 446515: ++ 446515: + 44651a: 90 nop + 44651b: 90 nop 44651c: 48 3d 00 f0 ff ff cmp $0xfffffffffffff000,%rax @@ -806,7 +806,7 @@ expression: diff 446577: b8 ca 00 00 00 mov $0xca,%eax - 44657c: 40 80 f6 81 xor $0x81,%sil - 446580: 0f 05 syscall -+ 44657c: ++ 44657c: + 446581: 90 nop 446582: 48 3d 00 f0 ff ff cmp $0xfffffffffffff000,%rax 446588: 0f 86 6c ff ff ff jbe 4464fa <___pthread_rwlock_unlock+0x12a> @@ -817,7 +817,7 @@ expression: diff 446855: 40 80 f6 81 xor $0x81,%sil - 446859: b8 ca 00 00 00 mov $0xca,%eax - 44685e: 0f 05 syscall -+ 446859: ++ 446859: + 44685e: 90 nop + 44685f: 90 nop 446860: 48 3d 00 f0 ff ff cmp $0xfffffffffffff000,%rax @@ -829,7 +829,7 @@ expression: diff 446880: 40 80 f6 81 xor $0x81,%sil - 446884: b8 ca 00 00 00 mov $0xca,%eax - 446889: 0f 05 syscall -+ 446884: ++ 446884: + 446889: 90 nop + 44688a: 90 nop 44688b: 48 3d 00 f0 ff ff cmp $0xfffffffffffff000,%rax @@ -841,7 +841,7 @@ expression: diff 446924: 40 80 f6 81 xor $0x81,%sil - 446928: b8 ca 00 00 00 mov $0xca,%eax - 44692d: 0f 05 syscall -+ 446928: ++ 446928: + 44692d: 90 nop + 44692e: 90 nop 44692f: 48 3d 00 f0 ff ff cmp $0xfffffffffffff000,%rax @@ -853,7 +853,7 @@ expression: diff 446a0b: 41 ba 08 00 00 00 mov $0x8,%r10d - 446a11: b8 0e 00 00 00 mov $0xe,%eax - 446a16: 0f 05 syscall -+ 446a11: ++ 446a11: + 446a16: 90 nop + 446a17: 90 nop 446a18: 89 c2 mov %eax,%edx @@ -865,7 +865,7 @@ expression: diff 45ba2c: 48 0f 47 d0 cmova %rax,%rdx - 45ba30: b8 d9 00 00 00 mov $0xd9,%eax - 45ba35: 0f 05 syscall -+ 45ba30: ++ 45ba30: + 45ba35: 90 nop + 45ba36: 90 nop 45ba37: 48 3d 00 f0 ff ff cmp $0xfffffffffffff000,%rax @@ -877,7 +877,7 @@ expression: diff 45bb50: f3 0f 1e fa endbr64 - 45bb54: b8 27 00 00 00 mov $0x27,%eax - 45bb59: 0f 05 syscall -+ 45bb54: ++ 45bb54: + 45bb59: 90 nop + 45bb5a: 90 nop 45bb5b: c3 ret @@ -889,7 +889,7 @@ expression: diff 45bba0: f3 0f 1e fa endbr64 - 45bba4: b8 8f 00 00 00 mov $0x8f,%eax - 45bba9: 0f 05 syscall -+ 45bba4: ++ 45bba4: + 45bba9: 90 nop + 45bbaa: 90 nop 45bbab: 48 3d 01 f0 ff ff cmp $0xfffffffffffff001,%rax @@ -901,7 +901,7 @@ expression: diff 45bbd0: f3 0f 1e fa endbr64 - 45bbd4: b8 91 00 00 00 mov $0x91,%eax - 45bbd9: 0f 05 syscall -+ 45bbd4: ++ 45bbd4: + 45bbd9: 90 nop + 45bbda: 90 nop 45bbdb: 48 3d 01 f0 ff ff cmp $0xfffffffffffff001,%rax @@ -913,7 +913,7 @@ expression: diff 45bc00: f3 0f 1e fa endbr64 - 45bc04: b8 92 00 00 00 mov $0x92,%eax - 45bc09: 0f 05 syscall -+ 45bc04: ++ 45bc04: + 45bc09: 90 nop + 45bc0a: 90 nop 45bc0b: 48 3d 01 f0 ff ff cmp $0xfffffffffffff001,%rax @@ -925,7 +925,7 @@ expression: diff 45bc30: f3 0f 1e fa endbr64 - 45bc34: b8 93 00 00 00 mov $0x93,%eax - 45bc39: 0f 05 syscall -+ 45bc34: ++ 45bc34: + 45bc39: 90 nop + 45bc3a: 90 nop 45bc3b: 48 3d 01 f0 ff ff cmp $0xfffffffffffff001,%rax @@ -937,7 +937,7 @@ expression: diff 45bc60: f3 0f 1e fa endbr64 - 45bc64: b8 90 00 00 00 mov $0x90,%eax - 45bc69: 0f 05 syscall -+ 45bc64: ++ 45bc64: + 45bc69: 90 nop + 45bc6a: 90 nop 45bc6b: 48 3d 01 f0 ff ff cmp $0xfffffffffffff001,%rax @@ -949,7 +949,7 @@ expression: diff 45bd0d: 48 8b bd 08 ff ff ff mov -0xf8(%rbp),%rdi - 45bd14: b8 4f 00 00 00 mov $0x4f,%eax - 45bd19: 0f 05 syscall -+ 45bd14: ++ 45bd14: + 45bd19: 90 nop + 45bd1a: 90 nop 45bd1b: 48 3d 00 f0 ff ff cmp $0xfffffffffffff000,%rax @@ -961,7 +961,7 @@ expression: diff 45c510: f3 0f 1e fa endbr64 - 45c514: b8 08 00 00 00 mov $0x8,%eax - 45c519: 0f 05 syscall -+ 45c514: ++ 45c514: + 45c519: 90 nop + 45c51a: 90 nop 45c51b: 48 3d 00 f0 ff ff cmp $0xfffffffffffff000,%rax @@ -973,7 +973,7 @@ expression: diff 45c5a9: bf 9c ff ff ff mov $0xffffff9c,%edi - 45c5ae: b8 01 01 00 00 mov $0x101,%eax - 45c5b3: 0f 05 syscall -+ 45c5ae: ++ 45c5ae: + 45c5b3: 90 nop + 45c5b4: 90 nop 45c5b5: 48 3d 00 f0 ff ff cmp $0xfffffffffffff000,%rax @@ -985,7 +985,7 @@ expression: diff 45c619: bf 9c ff ff ff mov $0xffffff9c,%edi - 45c61e: b8 01 01 00 00 mov $0x101,%eax - 45c623: 0f 05 syscall -+ 45c61e: ++ 45c61e: + 45c623: 90 nop + 45c624: 90 nop 45c625: 48 3d 00 f0 ff ff cmp $0xfffffffffffff000,%rax @@ -997,7 +997,7 @@ expression: diff 45c6b9: 74 51 je 45c70c <__libc_openat64+0x8c> - 45c6bb: b8 01 01 00 00 mov $0x101,%eax - 45c6c0: 0f 05 syscall -+ 45c6bb: ++ 45c6bb: + 45c6c0: 90 nop + 45c6c1: 90 nop 45c6c2: 48 3d 00 f0 ff ff cmp $0xfffffffffffff000,%rax @@ -1009,7 +1009,7 @@ expression: diff 45c72d: 8b 7d a8 mov -0x58(%rbp),%edi - 45c730: b8 01 01 00 00 mov $0x101,%eax - 45c735: 0f 05 syscall -+ 45c730: ++ 45c730: + 45c735: 90 nop + 45c736: 90 nop 45c737: 48 3d 00 f0 ff ff cmp $0xfffffffffffff000,%rax @@ -1021,7 +1021,7 @@ expression: diff 45c79d: 31 c0 xor %eax,%eax - 45c79f: 0f 05 syscall - 45c7a1: 48 3d 00 f0 ff ff cmp $0xfffffffffffff000,%rax -+ 45c79f: ++ 45c79f: + 45c7a4: 90 nop + 45c7a5: 90 nop + 45c7a6: 90 nop @@ -1035,7 +1035,7 @@ expression: diff - 45c7d3: 8b 7d f8 mov -0x8(%rbp),%edi - 45c7d6: 31 c0 xor %eax,%eax - 45c7d8: 0f 05 syscall -+ 45c7d3: ++ 45c7d3: + 45c7d8: 90 nop + 45c7d9: 90 nop 45c7da: 48 3d 00 f0 ff ff cmp $0xfffffffffffff000,%rax @@ -1047,7 +1047,7 @@ expression: diff 45c85b: 74 13 je 45c870 <__libc_write+0x20> - 45c85d: b8 01 00 00 00 mov $0x1,%eax - 45c862: 0f 05 syscall -+ 45c85d: ++ 45c85d: + 45c862: 90 nop + 45c863: 90 nop 45c864: 48 3d 00 f0 ff ff cmp $0xfffffffffffff000,%rax @@ -1059,7 +1059,7 @@ expression: diff 45c893: 8b 7d f8 mov -0x8(%rbp),%edi - 45c896: b8 01 00 00 00 mov $0x1,%eax - 45c89b: 0f 05 syscall -+ 45c896: ++ 45c896: + 45c89b: 90 nop + 45c89c: 90 nop 45c89d: 48 3d 00 f0 ff ff cmp $0xfffffffffffff000,%rax @@ -1071,7 +1071,7 @@ expression: diff 45c920: 74 26 je 45c948 <__openat64_nocancel+0x58> - 45c922: b8 01 01 00 00 mov $0x101,%eax - 45c927: 0f 05 syscall -+ 45c922: ++ 45c922: + 45c927: 90 nop + 45c928: 90 nop 45c929: 48 3d 00 f0 ff ff cmp $0xfffffffffffff000,%rax @@ -1083,7 +1083,7 @@ expression: diff 45c984: 49 89 ca mov %rcx,%r10 - 45c987: b8 11 00 00 00 mov $0x11,%eax - 45c98c: 0f 05 syscall -+ 45c987: ++ 45c987: + 45c98c: 90 nop + 45c98d: 90 nop 45c98e: 48 3d 00 f0 ff ff cmp $0xfffffffffffff000,%rax @@ -1095,7 +1095,7 @@ expression: diff 45c9c0: f3 0f 1e fa endbr64 - 45c9c4: b8 01 00 00 00 mov $0x1,%eax - 45c9c9: 0f 05 syscall -+ 45c9c4: ++ 45c9c4: + 45c9c9: 90 nop + 45c9ca: 90 nop 45c9cb: 48 3d 00 f0 ff ff cmp $0xfffffffffffff000,%rax @@ -1107,7 +1107,7 @@ expression: diff 45ca13: 48 8d 55 d0 lea -0x30(%rbp),%rdx - 45ca17: b8 10 00 00 00 mov $0x10,%eax - 45ca1c: 0f 05 syscall -+ 45ca17: ++ 45ca17: + 45ca1c: 90 nop + 45ca1d: 90 nop 45ca1e: 48 3d 00 f0 ff ff cmp $0xfffffffffffff000,%rax @@ -1120,7 +1120,7 @@ expression: diff - 45cabb: b8 2e 01 00 00 mov $0x12e,%eax - 45cac0: 31 ff xor %edi,%edi - 45cac2: 0f 05 syscall -+ 45cabb: ++ 45cabb: + 45cac0: 90 nop + 45cac1: 90 nop + 45cac2: 90 nop @@ -1134,7 +1134,7 @@ expression: diff 45ffa0: 48 8d 78 1c lea 0x1c(%rax),%rdi - 45ffa4: b8 ca 00 00 00 mov $0xca,%eax - 45ffa9: 0f 05 syscall -+ 45ffa4: ++ 45ffa4: + 45ffa9: 90 nop + 45ffaa: 90 nop 45ffab: 48 8d 3d 6e ab 04 00 lea 0x4ab6e(%rip),%rdi # 4aab20 <_dl_load_lock> @@ -1146,7 +1146,7 @@ expression: diff 46306a: be 80 00 00 00 mov $0x80,%esi - 46306f: 44 89 c8 mov %r9d,%eax - 463072: 0f 05 syscall -+ 46306f: ++ 46306f: 463074: 48 3d 00 f0 ff ff cmp $0xfffffffffffff000,%rax 46307a: 76 dc jbe 463058 <__thread_gscope_wait+0x88> 46307c: 83 f8 f5 cmp $0xfffffff5,%eax @@ -1156,7 +1156,7 @@ expression: diff 46310a: be 80 00 00 00 mov $0x80,%esi - 46310f: 44 89 c8 mov %r9d,%eax - 463112: 0f 05 syscall -+ 46310f: ++ 46310f: 463114: 48 3d 00 f0 ff ff cmp $0xfffffffffffff000,%rax 46311a: 76 dc jbe 4630f8 <__thread_gscope_wait+0x128> 46311c: 83 f8 f5 cmp $0xfffffff5,%eax @@ -1166,7 +1166,7 @@ expression: diff 00000000004669d0 <__restore_rt>: - 4669d0: 48 c7 c0 0f 00 00 00 mov $0xf,%rax - 4669d7: 0f 05 syscall -+ 4669d0: ++ 4669d0: + 4669d5: 90 nop + 4669d6: 90 nop + 4669d7: 90 nop @@ -1180,7 +1180,7 @@ expression: diff 466aad: 41 ba 08 00 00 00 mov $0x8,%r10d - 466ab3: b8 0d 00 00 00 mov $0xd,%eax - 466ab8: 0f 05 syscall -+ 466ab3: ++ 466ab3: + 466ab8: 90 nop + 466ab9: 90 nop 466aba: 48 3d 00 f0 ff ff cmp $0xfffffffffffff000,%rax @@ -1192,7 +1192,7 @@ expression: diff 46cb16: be 80 00 00 00 mov $0x80,%esi - 46cb1b: 44 89 c0 mov %r8d,%eax - 46cb1e: 0f 05 syscall -+ 46cb1b: ++ 46cb1b: 46cb20: 48 3d 00 f0 ff ff cmp $0xfffffffffffff000,%rax 46cb26: 77 0d ja 46cb35 <__pthread_disable_asynccancel+0x65> 46cb28: 8b 0f mov (%rdi),%ecx @@ -1202,7 +1202,7 @@ expression: diff 46ccdf: 44 31 c6 xor %r8d,%esi - 46cce2: 45 31 c0 xor %r8d,%r8d - 46cce5: 0f 05 syscall -+ 46cce2: ++ 46cce2: 46cce7: 85 c0 test %eax,%eax 46cce9: 7f 27 jg 46cd12 <__futex_abstimed_wait64+0x62> 46cceb: 83 f8 ea cmp $0xffffffea,%eax @@ -1212,7 +1212,7 @@ expression: diff 46cd89: 44 89 e2 mov %r12d,%edx - 46cd8c: b8 ca 00 00 00 mov $0xca,%eax - 46cd91: 0f 05 syscall -+ 46cd8c: ++ 46cd8c: + 46cd91: 90 nop + 46cd92: 90 nop 46cd93: 48 89 c3 mov %rax,%rbx @@ -1224,7 +1224,7 @@ expression: diff 46ce17: 44 89 e2 mov %r12d,%edx - 46ce1a: b8 ca 00 00 00 mov $0xca,%eax - 46ce1f: 0f 05 syscall -+ 46ce1a: ++ 46ce1a: + 46ce1f: 90 nop + 46ce20: 90 nop 46ce21: 44 89 ef mov %r13d,%edi @@ -1236,7 +1236,7 @@ expression: diff 46ce6c: 31 d2 xor %edx,%edx - 46ce6e: b8 ca 00 00 00 mov $0xca,%eax - 46ce73: 0f 05 syscall -+ 46ce6e: ++ 46ce6e: + 46ce73: 90 nop + 46ce74: 90 nop 46ce75: 83 f8 da cmp $0xffffffda,%eax @@ -1248,7 +1248,7 @@ expression: diff 46f344: 41 89 ca mov %ecx,%r10d - 46f347: b8 06 01 00 00 mov $0x106,%eax - 46f34c: 0f 05 syscall -+ 46f347: ++ 46f347: + 46f34c: 90 nop + 46f34d: 90 nop 46f34e: 3d 00 f0 ff ff cmp $0xfffff000,%eax @@ -1260,7 +1260,7 @@ expression: diff 472975: 48 8d 78 1c lea 0x1c(%rax),%rdi - 472979: b8 ca 00 00 00 mov $0xca,%eax - 47297e: 0f 05 syscall -+ 472979: ++ 472979: + 47297e: 90 nop + 47297f: 90 nop 472980: eb 8c jmp 47290e <_dl_fixup+0x10e> @@ -1272,7 +1272,7 @@ expression: diff 476c10: 48 8d 78 1c lea 0x1c(%rax),%rdi - 476c14: b8 ca 00 00 00 mov $0xca,%eax - 476c19: 0f 05 syscall -+ 476c14: ++ 476c14: + 476c19: 90 nop + 476c1a: 90 nop 476c1b: 48 83 7d 98 00 cmpq $0x0,-0x68(%rbp) @@ -1284,7 +1284,7 @@ expression: diff 476e4a: 48 8d 78 1c lea 0x1c(%rax),%rdi - 476e4e: b8 ca 00 00 00 mov $0xca,%eax - 476e53: 0f 05 syscall -+ 476e4e: ++ 476e4e: + 476e53: 90 nop + 476e54: 90 nop 476e55: 48 83 7d 98 00 cmpq $0x0,-0x68(%rbp) From 8b5a4baceefa9b69900d4a3c2ebfc3134c1c9039 Mon Sep 17 00:00:00 2001 From: Weidong Cui Date: Thu, 2 Apr 2026 06:26:31 -0700 Subject: [PATCH 02/24] Fix CI: 32-bit build, Windows redzone callback, rustfmt - Gate syscall_callback_redzone behind #[cfg(target_arch = "x86_64")] on Linux since the asm symbol only exists in the x86_64 asm block, fixing the i686 linker error. - Add syscall_callback_redzone entry point to the Windows platform so the new trampoline format (with redzone reservation) works correctly on the Windows emulator. Uses mov+add to SCRATCH to avoid clobbering rax. - Fix rustfmt import ordering in litebox_shim_linux/src/loader/elf.rs. --- litebox_platform_linux_userland/src/lib.rs | 18 ++++++++++++++---- litebox_platform_windows_userland/src/lib.rs | 19 ++++++++++++++++++- litebox_shim_linux/src/loader/elf.rs | 4 ++-- 3 files changed, 34 insertions(+), 7 deletions(-) diff --git a/litebox_platform_linux_userland/src/lib.rs b/litebox_platform_linux_userland/src/lib.rs index 7babef6ca..4c06e3c8a 100644 --- a/litebox_platform_linux_userland/src/lib.rs +++ b/litebox_platform_linux_userland/src/lib.rs @@ -1992,6 +1992,7 @@ impl litebox::platform::StdioProvider for LinuxUserland { unsafe extern "C" { // Defined in asm blocks above fn syscall_callback() -> isize; + #[cfg(target_arch = "x86_64")] fn syscall_callback_redzone() -> isize; fn exception_callback(); fn interrupt_callback(); @@ -2073,7 +2074,14 @@ impl ThreadContext<'_> { impl litebox::platform::SystemInfoProvider for LinuxUserland { fn get_syscall_entry_point(&self) -> usize { - syscall_callback_redzone as *const () as usize + #[cfg(target_arch = "x86_64")] + { + syscall_callback_redzone as *const () as usize + } + #[cfg(target_arch = "x86")] + { + syscall_callback as *const () as usize + } } fn get_vdso_address(&self) -> Option { @@ -2740,9 +2748,11 @@ unsafe fn interrupt_signal_handler( // FUTURE: handle trampoline code, too. This is somewhat less important // because it's probably fine for the shim to observe a guest context that // is inside the trampoline. - if ip == syscall_callback as *const () as usize - || ip == syscall_callback_redzone as *const () as usize - { + let is_at_syscall_callback = ip == syscall_callback as *const () as usize; + #[cfg(target_arch = "x86_64")] + let is_at_syscall_callback = + is_at_syscall_callback || ip == syscall_callback_redzone as *const () as usize; + if is_at_syscall_callback { // No need to clear `in_guest` or set interrupt; the syscall handler will // clear `in_guest` and call into the shim. return; diff --git a/litebox_platform_windows_userland/src/lib.rs b/litebox_platform_windows_userland/src/lib.rs index 9d827e057..ed694f3ee 100644 --- a/litebox_platform_windows_userland/src/lib.rs +++ b/litebox_platform_windows_userland/src/lib.rs @@ -562,6 +562,22 @@ syscall_callback: mov BYTE PTR [r11 + {IS_IN_GUEST}], 0 // Set rsp to the top of the guest context. mov QWORD PTR [r11 + {SCRATCH}], rsp + jmp .Lsyscall_callback_common + + .globl syscall_callback_redzone +syscall_callback_redzone: + // Same as syscall_callback, but the trampoline has already reserved + // 128 bytes below RSP to protect the SysV red zone. Recover the + // architectural guest stack pointer. + mov r11d, DWORD PTR [rip + {TLS_INDEX}] + mov r11, QWORD PTR gs:[r11 * 8 + TEB_TLS_SLOTS_OFFSET] + mov BYTE PTR [r11 + {IS_IN_GUEST}], 0 + // Save RSP + 128 to SCRATCH without clobbering any guest registers. + // Use SCRATCH as a temporary: store rsp, then add 128 in-place. + mov QWORD PTR [r11 + {SCRATCH}], rsp + add QWORD PTR [r11 + {SCRATCH}], 128 + +.Lsyscall_callback_common: mov rsp, QWORD PTR [r11 + {GUEST_CONTEXT_TOP}] // TODO: save float and vector registers (xsave or fxsave) @@ -1948,6 +1964,7 @@ impl litebox::mm::allocator::MemoryProvider for WindowsUserland { unsafe extern "C" { // Defined in asm blocks above fn syscall_callback() -> isize; + fn syscall_callback_redzone() -> isize; fn exception_callback() -> isize; fn interrupt_callback(); fn switch_to_guest_start(); @@ -2037,7 +2054,7 @@ impl ThreadContext<'_> { impl litebox::platform::SystemInfoProvider for WindowsUserland { fn get_syscall_entry_point(&self) -> usize { - syscall_callback as *const () as usize + syscall_callback_redzone as *const () as usize } fn get_vdso_address(&self) -> Option { diff --git a/litebox_shim_linux/src/loader/elf.rs b/litebox_shim_linux/src/loader/elf.rs index 63a9a5d1e..59e972b86 100644 --- a/litebox_shim_linux/src/loader/elf.rs +++ b/litebox_shim_linux/src/loader/elf.rs @@ -10,12 +10,12 @@ use litebox::{ platform::{RawConstPointer as _, SystemInfoProvider as _}, utils::{ReinterpretSignedExt, TruncateExt}, }; -use litebox_common_linux::{errno::Errno, loader::ElfParsedFile, MapFlags}; +use litebox_common_linux::{MapFlags, errno::Errno, loader::ElfParsedFile}; use thiserror::Error; use crate::{ - loader::auxv::{AuxKey, AuxVec}, MutPtr, + loader::auxv::{AuxKey, AuxVec}, }; use super::stack::UserStack; From aa7dd83b86dbe523f1ea3aab953b7117fb0e20cd Mon Sep 17 00:00:00 2001 From: Weidong Cui Date: Thu, 2 Apr 2026 07:43:41 -0700 Subject: [PATCH 03/24] Runtime ELF patching and rtld_audit removal Add runtime syscall patching in the shim's mmap hook: when an ELF segment with PROT_EXEC is mapped, patch syscall instructions in-place and set up a trampoline region. The loader also patches the main binary at load time when it lacks a trampoline. Remove rtld_audit entirely: gut build.rs, remove the audit .so injection from the runner, and remove the REQUIRE_RTLD_AUDIT global. Supporting changes: - Add ReadAt impl for &[u8] in litebox_common_linux - Hook finalize_elf_patch into sys_close to mprotect trampolines RX - Add elf_patch_cache on GlobalState and suppress_elf_runtime_patch on Task - Update ratchet test (runner has zero globals now) --- Cargo.lock | 1 + dev_tests/src/ratchet.rs | 1 - litebox_common_linux/src/loader.rs | 18 + litebox_runner_linux_userland/build.rs | 44 +- litebox_runner_linux_userland/src/lib.rs | 52 +- litebox_shim_linux/Cargo.toml | 1 + litebox_shim_linux/src/lib.rs | 9 + litebox_shim_linux/src/loader/elf.rs | 185 ++++++- litebox_shim_linux/src/syscalls/file.rs | 4 + litebox_shim_linux/src/syscalls/mm.rs | 538 ++++++++++++++++++++- litebox_shim_linux/src/syscalls/process.rs | 1 + 11 files changed, 747 insertions(+), 107 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 1466b96ea..4b4a073c1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1685,6 +1685,7 @@ dependencies = [ "litebox", "litebox_common_linux", "litebox_platform_multiplex", + "litebox_syscall_rewriter", "once_cell", "ringbuf", "seq-macro", diff --git a/dev_tests/src/ratchet.rs b/dev_tests/src/ratchet.rs index 276452d4d..8e6d35034 100644 --- a/dev_tests/src/ratchet.rs +++ b/dev_tests/src/ratchet.rs @@ -40,7 +40,6 @@ fn ratchet_globals() -> Result<()> { ("litebox_platform_lvbs/", 23), ("litebox_platform_multiplex/", 1), ("litebox_platform_windows_userland/", 8), - ("litebox_runner_linux_userland/", 1), ("litebox_runner_lvbs/", 5), ("litebox_runner_snp/", 1), ("litebox_shim_linux/", 1), diff --git a/litebox_common_linux/src/loader.rs b/litebox_common_linux/src/loader.rs index 8d061b93d..6b1fcc88b 100644 --- a/litebox_common_linux/src/loader.rs +++ b/litebox_common_linux/src/loader.rs @@ -579,6 +579,24 @@ pub trait ReadAt { fn size(&mut self) -> Result; } +impl ReadAt for &[u8] { + type Error = Errno; + + fn read_at(&mut self, offset: u64, buf: &mut [u8]) -> Result<(), Self::Error> { + let offset: usize = offset.truncate(); + let end = offset.checked_add(buf.len()).ok_or(Errno::ENODATA)?; + if end > self.len() { + return Err(Errno::ENODATA); + } + buf.copy_from_slice(&self[offset..end]); + Ok(()) + } + + fn size(&mut self) -> Result { + Ok(self.len() as u64) + } +} + pub trait MapMemory { type Error; diff --git a/litebox_runner_linux_userland/build.rs b/litebox_runner_linux_userland/build.rs index 3360e452a..f189226e4 100644 --- a/litebox_runner_linux_userland/build.rs +++ b/litebox_runner_linux_userland/build.rs @@ -1,48 +1,6 @@ // Copyright (c) Microsoft Corporation. // Licensed under the MIT license. -use std::path::PathBuf; - -const RTLD_AUDIT_DIR: &str = "../litebox_rtld_audit"; - fn main() { - let mut make_cmd = std::process::Command::new("make"); - let out_dir = PathBuf::from(std::env::var("OUT_DIR").unwrap()); - let target_arch = std::env::var("CARGO_CFG_TARGET_ARCH").unwrap(); - if target_arch != "x86_64" { - // XXX: Currently 32-bit x86 is unsupported (unimplemented), skip building - return; - } - make_cmd - .current_dir(RTLD_AUDIT_DIR) - .env("OUT_DIR", &out_dir) - .env("ARCH", target_arch); - if std::env::var("PROFILE").unwrap_or_default() == "debug" { - make_cmd.env("DEBUG", "1"); - } else { - // Explicitly remove DEBUG to prevent inheriting it from the - // parent environment, which would cause the C library to be - // built with debug prints enabled. - make_cmd.env_remove("DEBUG"); - } - // Force rebuild in case CFLAGS changed (e.g., debug -> release) but - // the source did not. - let _ = std::fs::remove_file(out_dir.join("litebox_rtld_audit.so")); - let output = make_cmd - .output() - .expect("Failed to execute make for rtld_audit"); - assert!( - output.status.success(), - "failed to build rtld_audit.so via make:\nstdout: {}\nstderr: {}", - String::from_utf8_lossy(&output.stdout), - String::from_utf8_lossy(&output.stderr), - ); - assert!( - out_dir.join("litebox_rtld_audit.so").exists(), - "Build failed to create necessary file" - ); - - println!("cargo:rerun-if-changed={RTLD_AUDIT_DIR}/rtld_audit.c"); - println!("cargo:rerun-if-changed={RTLD_AUDIT_DIR}/Makefile"); - println!("cargo:rerun-if-changed=build.rs"); + // rtld_audit has been removed; nothing to build. } diff --git a/litebox_runner_linux_userland/src/lib.rs b/litebox_runner_linux_userland/src/lib.rs index 28521c370..b2ee00a76 100644 --- a/litebox_runner_linux_userland/src/lib.rs +++ b/litebox_runner_linux_userland/src/lib.rs @@ -89,9 +89,6 @@ pub enum InterceptionBackend { Rewriter, } -static REQUIRE_RTLD_AUDIT: core::sync::atomic::AtomicBool = - core::sync::atomic::AtomicBool::new(false); - struct MmappedFile { data: &'static [u8], abs_path: PathBuf, @@ -130,14 +127,14 @@ pub fn run(cli_args: CliArgs) -> Result<()> { ) } - // --program-from-tar loads pre-rewritten binaries that depend on litebox_rtld_audit.so, - // which is only injected by the rewriter backend. + // --program-from-tar loads pre-rewritten binaries that require the rewriter + // backend's runtime trampoline setup. if cli_args.program_from_tar && !matches!(cli_args.interception_backend, InterceptionBackend::Rewriter) { anyhow::bail!( "--program-from-tar requires --interception-backend=rewriter \ - (the packaged binary is pre-rewritten and needs the audit library)" + (the packaged binary is pre-rewritten and needs the rewriter runtime)" ); } @@ -307,34 +304,10 @@ pub fn run(cli_args: CliArgs) -> Result<()> { } }); - // When using the rewriter backend, automatically include litebox_rtld_audit.so - // in the filesystem so tests and users don't need to include it in tar files + // When using the rewriter backend, the shim's mmap hook handles + // syscall patching at runtime — no audit library needed. match cli_args.interception_backend { - InterceptionBackend::Rewriter => { - #[cfg(not(target_arch = "x86_64"))] - eprintln!("WARN: litebox_rtld_audit not currently supported on non-x86_64 arch"); - #[cfg(target_arch = "x86_64")] - in_mem.with_root_privileges(|fs| { - let rwxr_xr_x = Mode::RWXU | Mode::RGRP | Mode::XGRP | Mode::ROTH | Mode::XOTH; - let _ = fs.mkdir("/lib", rwxr_xr_x); - let fd = fs - .open( - "/lib/litebox_rtld_audit.so", - litebox::fs::OFlags::WRONLY | litebox::fs::OFlags::CREAT, - rwxr_xr_x, - ) - .expect("Failed to create /lib/litebox_rtld_audit.so"); - fs.initialize_primarily_read_heavy_file( - &fd, - include_bytes!(concat!(env!("OUT_DIR"), "/litebox_rtld_audit.so")).into(), - ); - fs.close(&fd) - .expect("Failed to close /lib/litebox_rtld_audit.so"); - }); - } - InterceptionBackend::Seccomp => { - // No need to include rtld_audit.so for seccomp backend - } + InterceptionBackend::Rewriter | InterceptionBackend::Seccomp => {} } let tar_ro = litebox::fs::tar_ro::FileSystem::new(litebox, tar_data.into()); @@ -397,7 +370,7 @@ pub fn run(cli_args: CliArgs) -> Result<()> { match cli_args.interception_backend { InterceptionBackend::Seccomp => platform.enable_seccomp_based_syscall_interception(), InterceptionBackend::Rewriter => { - REQUIRE_RTLD_AUDIT.store(true, core::sync::atomic::Ordering::SeqCst); + // Runtime patching is handled by the shim's mmap hook — nothing to do here. } } @@ -479,13 +452,6 @@ fn pin_thread_to_cpu(cpu: usize) { } } -fn fixup_env(envp: &mut Vec) { - // Enable the audit library to load trampoline code for rewritten binaries. - if REQUIRE_RTLD_AUDIT.load(core::sync::atomic::Ordering::SeqCst) { - let p = c"LD_AUDIT=/lib/litebox_rtld_audit.so"; - let has_ld_audit = envp.iter().any(|var| var.as_c_str() == p); - if !has_ld_audit { - envp.push(p.into()); - } - } +fn fixup_env(_envp: &mut Vec) { + // No-op: rtld_audit has been removed; runtime patching is handled by the shim. } diff --git a/litebox_shim_linux/Cargo.toml b/litebox_shim_linux/Cargo.toml index 94d889a7f..ff0b4ea4e 100644 --- a/litebox_shim_linux/Cargo.toml +++ b/litebox_shim_linux/Cargo.toml @@ -16,6 +16,7 @@ syscalls = { version = "0.6", default-features = false } seq-macro = "0.3" ringbuf = { version = "0.4.8", default-features = false, features = ["alloc"] } zerocopy = { version = "0.8", default-features = false, features = ["derive"] } +litebox_syscall_rewriter = { version = "0.1.0", path = "../litebox_syscall_rewriter", default-features = false } [features] default = ["platform_linux_userland"] diff --git a/litebox_shim_linux/src/lib.rs b/litebox_shim_linux/src/lib.rs index 2834f7b72..38f96b535 100644 --- a/litebox_shim_linux/src/lib.rs +++ b/litebox_shim_linux/src/lib.rs @@ -200,6 +200,7 @@ impl LinuxShimBuilder { next_thread_id: 2.into(), // start from 2, as 1 is used by the main thread litebox: self.litebox, unix_addr_table: litebox::sync::RwLock::new(syscalls::unix::UnixAddrTable::new()), + elf_patch_cache: litebox::sync::Mutex::new(alloc::collections::BTreeMap::new()), }); LinuxShim(global) } @@ -257,6 +258,7 @@ impl LinuxShim { fs: Arc::new(syscalls::file::FsState::new()).into(), files: files.into(), signals: syscalls::signal::SignalState::new_process(), + suppress_elf_runtime_patch: Cell::new(false), }, }; entrypoints.task.load_program( @@ -1059,6 +1061,8 @@ struct GlobalState { next_thread_id: core::sync::atomic::AtomicI32, /// UNIX domain socket address table unix_addr_table: litebox::sync::RwLock>, + /// Per-process collection of ELF patching state for runtime syscall rewriting. + elf_patch_cache: litebox::sync::Mutex, } struct Task { @@ -1082,6 +1086,9 @@ struct Task { files: RefCell>>, /// Signal state signals: syscalls::signal::SignalState, + /// Suppresses runtime ELF patching in `do_mmap_file` while the ELF loader + /// is actively loading a binary (prevents double-mapping the trampoline). + suppress_elf_runtime_patch: Cell, } impl Drop for Task { @@ -1121,6 +1128,7 @@ mod test_utils { fs: Arc::new(syscalls::file::FsState::new()).into(), files: files.into(), signals: syscalls::signal::SignalState::new_process(), + suppress_elf_runtime_patch: Cell::new(false), global: self, } } @@ -1145,6 +1153,7 @@ mod test_utils { fs: self.fs.clone(), files: self.files.clone(), signals: self.signals.clone_for_new_task(), + suppress_elf_runtime_patch: Cell::new(false), }; Some(task) } diff --git a/litebox_shim_linux/src/loader/elf.rs b/litebox_shim_linux/src/loader/elf.rs index 59e972b86..6cbebaf98 100644 --- a/litebox_shim_linux/src/loader/elf.rs +++ b/litebox_shim_linux/src/loader/elf.rs @@ -7,10 +7,14 @@ use alloc::{ffi::CString, vec::Vec}; use litebox::{ fs::{Mode, OFlags}, mm::linux::{CreatePagesFlags, MappingError, PAGE_SIZE}, - platform::{RawConstPointer as _, SystemInfoProvider as _}, + platform::{RawConstPointer as _, RawMutPointer as _, SystemInfoProvider as _}, utils::{ReinterpretSignedExt, TruncateExt}, }; -use litebox_common_linux::{MapFlags, errno::Errno, loader::ElfParsedFile}; +use litebox_common_linux::{ + MapFlags, + errno::Errno, + loader::{ElfParsedFile, ReadAt as _}, +}; use thiserror::Error; use crate::{ @@ -148,6 +152,79 @@ impl litebox_common_linux::loader::MapMemory for ElfFile<'_, FS> { } } +/// A [`MapMemory`](litebox_common_linux::loader::MapMemory) wrapper that reads +/// file-backed data from an in-memory buffer instead of from a file descriptor. +/// Used when the loader has patched the ELF binary on the fly (e.g. syscall +/// rewriting of the dynamic linker). +/// +/// `reserve`, `map_zero`, and `protect` are delegated to the underlying +/// [`ElfFile`]; `map_file` is replaced by `map_zero` + a memory copy from the +/// patched buffer. +struct PatchedMapper<'a, 'b, FS: ShimFS> { + inner: &'b mut ElfFile<'a, FS>, + data: &'b [u8], +} + +impl litebox_common_linux::loader::MapMemory for PatchedMapper<'_, '_, FS> { + type Error = Errno; + + fn reserve(&mut self, len: usize, align: usize) -> Result { + self.inner.reserve(len, align) + } + + fn map_file( + &mut self, + address: usize, + len: usize, + offset: u64, + prot: &litebox_common_linux::loader::Protection, + ) -> Result<(), Self::Error> { + // Allocate anonymous RW pages, copy from the in-memory buffer, then + // apply the requested protection. + self.inner.map_zero( + address, + len, + &litebox_common_linux::loader::Protection { + read: true, + write: true, + execute: false, + }, + )?; + + let offset: usize = offset.truncate(); + if offset < self.data.len() { + let end = core::cmp::min(offset + len, self.data.len()); + let src = &self.data[offset..end]; + let dest = MutPtr::::from_usize(address); + dest.copy_from_slice(0, src).ok_or(Errno::EFAULT)?; + } + + // Set final permissions if different from the writable mapping above. + if !prot.write || prot.execute { + self.inner.protect(address, len, prot)?; + } + Ok(()) + } + + fn map_zero( + &mut self, + address: usize, + len: usize, + prot: &litebox_common_linux::loader::Protection, + ) -> Result<(), Self::Error> { + self.inner.map_zero(address, len, prot) + } + + fn protect( + &mut self, + address: usize, + len: usize, + prot: &litebox_common_linux::loader::Protection, + ) -> Result<(), Self::Error> { + self.inner.protect(address, len, prot) + } +} + /// Struct to hold the information needed to start the program /// (entry point and user stack top). pub struct ElfLoadInfo { @@ -165,6 +242,9 @@ pub(crate) struct ElfLoader<'a, FS: ShimFS> { struct FileAndParsed<'a, FS: ShimFS> { file: ElfFile<'a, FS>, parsed: ElfParsedFile, + /// When the rewriter backend is active and the binary was not pre-patched, + /// the loader patches it on the fly and loads from this in-memory copy. + patched_data: Option>, } impl<'a, FS: ShimFS> FileAndParsed<'a, FS> { @@ -172,11 +252,91 @@ impl<'a, FS: ShimFS> FileAndParsed<'a, FS> { let file = ElfFile::new(task, path).map_err(ElfLoaderError::OpenError)?; let mut parsed = litebox_common_linux::loader::ElfParsedFile::parse(&mut &file) .map_err(ElfLoaderError::ParseError)?; - match parsed.parse_trampoline(&mut &file, task.global.platform.get_syscall_entry_point()) { - Ok(()) | Err(litebox_common_linux::loader::ElfParseError::UnpatchedBinary) => {} - Err(err) => return Err(ElfLoaderError::ParseError(err)), - } - Ok(Self { file, parsed }) + + let syscall_entry_point = task.global.platform.get_syscall_entry_point(); + let trampoline_result = parsed.parse_trampoline(&mut &file, syscall_entry_point); + + // If the rewriter backend is active (syscall_entry_point != 0) and the + // binary lacks a trampoline, patch it on the fly so that both the main + // program and the dynamic linker are covered. + let patched_data = if syscall_entry_point != 0 && trampoline_result.is_err() { + let size: usize = (&mut &file) + .size() + .map_err(ElfLoaderError::OpenError)? + .truncate(); + let mut buf = alloc::vec![0u8; size]; + (&mut &file) + .read_at(0, &mut buf) + .map_err(ElfLoaderError::OpenError)?; + + let mut skipped_addrs = alloc::vec::Vec::new(); + match litebox_syscall_rewriter::hook_syscalls_in_elf(&buf, None, &mut skipped_addrs) { + Ok(patched) => { + if !skipped_addrs.is_empty() { + litebox::log_println!( + task.global.platform, + "warning: {} unpatchable syscall instruction(s) (addresses: {:?})", + skipped_addrs.len(), + skipped_addrs, + ); + } + // Re-parse the patched binary and extract its trampoline. + parsed = + litebox_common_linux::loader::ElfParsedFile::parse(&mut patched.as_slice()) + .map_err(ElfLoaderError::ParseError)?; + parsed + .parse_trampoline(&mut patched.as_slice(), syscall_entry_point) + .map_err(ElfLoaderError::ParseError)?; + Some(patched) + } + Err(_) => { + // Patching failed (e.g. ET_REL, no .text). Proceed without + // a trampoline — the binary may simply have no syscalls. + None + } + } + } else { + None + }; + + Ok(Self { + file, + parsed, + patched_data, + }) + } + + /// Load the ELF into guest memory, choosing the right mapper depending on + /// whether the binary was patched in memory. + fn load_mapped( + &mut self, + platform: &(impl litebox::platform::RawPointerProvider + litebox::platform::SystemInfoProvider), + ) -> Result { + // Suppress runtime ELF patching (maybe_patch_exec_segment) when the + // loader will map the trampoline itself via load_trampoline(). Without + // this, both paths would map the same region — the second MAP_FIXED + // destroys the first mapping. + // + // Only suppress when using the ElfFile mapper (which routes through + // do_mmap_file → maybe_patch_exec_segment) AND the loader actually + // has a trampoline to map. When patched_data is None and there's no + // trampoline (e.g. the rewriter declined the binary), the runtime + // fallback must remain enabled. + let has_loader_trampoline = self.patched_data.is_some() || self.parsed.has_trampoline(); + let suppress = has_loader_trampoline && self.patched_data.is_none(); + self.file.task.suppress_elf_runtime_patch.set(suppress); + let result = if let Some(ref data) = self.patched_data { + let mut mapper = PatchedMapper { + inner: &mut self.file, + data, + }; + self.parsed.load(&mut mapper, &mut &*platform) + } else { + self.parsed.load(&mut self.file, &mut &*platform) + }; + self.file.task.suppress_elf_runtime_patch.set(false); + + Ok(result?) } } @@ -207,18 +367,11 @@ impl<'a, FS: ShimFS> ElfLoader<'a, FS> { let global = &self.main.file.task.global; // Load the main ELF file first so that it gets privileged addresses. - let info = self - .main - .parsed - .load(&mut self.main.file, &mut &*global.platform)?; + let info = self.main.load_mapped(global.platform)?; // Load the interpreter ELF file, if any. let interp = if let Some(interp) = &mut self.interp { - Some( - interp - .parsed - .load(&mut interp.file, &mut &*global.platform)?, - ) + Some(interp.load_mapped(global.platform)?) } else { None }; diff --git a/litebox_shim_linux/src/syscalls/file.rs b/litebox_shim_linux/src/syscalls/file.rs index 03bf151ad..d1f219579 100644 --- a/litebox_shim_linux/src/syscalls/file.rs +++ b/litebox_shim_linux/src/syscalls/file.rs @@ -536,6 +536,10 @@ impl Task { /// Handle syscall `close` pub(crate) fn sys_close(&self, fd: i32) -> Result<(), Errno> { + // Finalize any in-progress ELF patching for this fd (mprotect + // trampoline RW→RX) before closing the descriptor. + self.finalize_elf_patch(fd); + let Ok(raw_fd) = u32::try_from(fd).and_then(usize::try_from) else { return Err(Errno::EBADF); }; diff --git a/litebox_shim_linux/src/syscalls/mm.rs b/litebox_shim_linux/src/syscalls/mm.rs index ce6c3513c..453039cba 100644 --- a/litebox_shim_linux/src/syscalls/mm.rs +++ b/litebox_shim_linux/src/syscalls/mm.rs @@ -4,10 +4,11 @@ //! Implementation of memory management related syscalls, eg., `mmap`, `munmap`, etc. //! Most of these syscalls which are not backed by files are implemented in [`litebox_common_linux::mm`]. +use alloc::collections::BTreeMap; use litebox::{ mm::linux::{MappingError, PAGE_SIZE, PageRange}, platform::{ - PageManagementProvider, RawConstPointer, RawMutPointer, + PageManagementProvider, RawConstPointer, RawMutPointer, SystemInfoProvider, page_mgmt::{FixedAddressBehavior, MemoryRegionPermissions}, }, }; @@ -17,6 +18,39 @@ use crate::MutPtr; use crate::ShimFS; use crate::Task; +/// Per-fd state for the shim's runtime ELF syscall rewriter. +/// +/// Tracks base address and trampoline write cursor for each ELF file that +/// has executable segments mapped via `do_mmap_file()`. +pub(crate) struct ElfPatchState { + /// Base virtual address of the ELF (recorded from first mmap at offset 0). + pub _base_addr: usize, + /// Whether this file is already pre-patched (trampoline magic found at file tail). + pub pre_patched: bool, + /// For pre-patched binaries: file offset and size of the trampoline data. + pub trampoline_file_offset: u64, + pub trampoline_file_size: usize, + /// For pre-patched binaries: virtual address offset of the trampoline in the ELF. + pub _trampoline_vaddr: usize, + /// Start address of the trampoline region (runtime). + pub trampoline_addr: usize, + /// Current write position within the trampoline (byte offset from `trampoline_addr`). + pub trampoline_cursor: usize, + /// Whether the trampoline region has been allocated. + pub trampoline_mapped: bool, + /// Total number of trampoline bytes currently mapped. + pub trampoline_mapped_len: usize, + /// Whether any runtime-generated stubs were successfully linked from code + /// in this fd to the trampoline. + pub runtime_patches_committed: bool, + /// File path of the ELF (from the fd path table, if available). + #[allow(dead_code)] + pub file_path: Option, +} + +/// Per-process collection of ELF patching state, keyed by fd number. +pub(crate) type ElfPatchCache = BTreeMap; + #[inline] fn align_up(addr: usize, align: usize) -> usize { debug_assert!(align.is_power_of_two()); @@ -76,12 +110,42 @@ impl Task { fd: i32, offset: usize, ) -> Result, MappingError> { - if let Some(cow_result) = + let is_exec = prot.contains(ProtFlags::PROT_EXEC); + + // Perform the normal mmap first (CoW or memcpy fallback). + let result = if let Some(cow_result) = self.try_cow_mmap_file(suggested_addr, len, &prot, &flags, fd, offset) { - return cow_result; + cow_result? + } else { + self.do_mmap_file_memcpy(suggested_addr, len, prot, flags, fd, offset)? + }; + + // Runtime syscall rewriting: patch PROT_EXEC segments in-place. + // Suppressed during ELF loader's load() sequence because the loader + // maps the trampoline itself via load_trampoline(). Running both + // paths would double-map the trampoline, with the second MAP_FIXED + // destroying the first mapping. + if !self.suppress_elf_runtime_patch.get() { + if is_exec { + let syscall_entry = self.global.platform.get_syscall_entry_point(); + if syscall_entry != 0 + && !self.maybe_patch_exec_segment(result, len, fd, offset, syscall_entry) + { + // Trampoline setup failed for a pre-patched binary whose + // .text already contains JMPs to the trampoline address. + // Continuing would guarantee a SIGSEGV on the first + // rewritten syscall, so fail the mmap instead. + let _ = self.sys_munmap(result, len); + return Err(MappingError::OutOfMemory); + } + } else if offset == 0 { + // First mmap at offset 0: record the base address for later patching. + self.init_elf_patch_state(fd, result.as_usize()); + } } - self.do_mmap_file_memcpy(suggested_addr, len, prot, flags, fd, offset) + + Ok(result) } /// Attempt to create a CoW mapping for a file with static backing data. @@ -352,6 +416,472 @@ impl Task { ) -> Result<(), Errno> { litebox_common_linux::mm::sys_madvise(&self.global.pm, addr, len, advice) } + + // ── Runtime ELF syscall patching ───────────────────────────────────── + + /// Initialize ELF patch state for an fd on its first mmap at offset 0. + /// + /// Reads the ELF header to determine the trampoline address (page-aligned + /// end of the highest PT_LOAD segment) and checks the file tail for the + /// trampoline magic to determine if it's pre-patched. + #[allow(clippy::cast_possible_truncation)] + fn init_elf_patch_state(&self, fd: i32, base_addr: usize) { + // Quick check: skip if already initialized. + if self.global.elf_patch_cache.lock().contains_key(&fd) { + return; + } + + // Read the ELF header (first 64 bytes covers both 32-bit and 64-bit). + let mut ehdr_buf = [0u8; 64]; + if self.sys_read(fd, &mut ehdr_buf, Some(0)).is_err() { + return; // Not readable, skip + } + + // Verify ELF magic + if &ehdr_buf[0..4] != b"\x7fELF" { + return; // Not an ELF file + } + + // Parse as 64-bit ELF (runtime patching is x86-64 only). + let e_phoff = u64::from_le_bytes(ehdr_buf[32..40].try_into().unwrap()) as usize; + let e_phentsize = u16::from_le_bytes(ehdr_buf[54..56].try_into().unwrap()) as usize; + let e_phnum = u16::from_le_bytes(ehdr_buf[56..58].try_into().unwrap()) as usize; + let e_type = u16::from_le_bytes(ehdr_buf[16..18].try_into().unwrap()); + + // Read program headers to find max PT_LOAD end + let phdrs_size = e_phentsize * e_phnum; + if phdrs_size == 0 || phdrs_size > 0x10000 { + return; // Sanity check + } + let mut phdrs_buf = alloc::vec![0u8; phdrs_size]; + if self.sys_read(fd, &mut phdrs_buf, Some(e_phoff)).is_err() { + return; + } + + // Find highest PT_LOAD end (p_vaddr + p_memsz) + let mut max_load_end: u64 = 0; + for i in 0..e_phnum { + let ph = &phdrs_buf[i * e_phentsize..][..e_phentsize]; + let p_type = u32::from_le_bytes(ph[0..4].try_into().unwrap()); + if p_type != 1 { + // PT_LOAD = 1 + continue; + } + let p_vaddr = u64::from_le_bytes(ph[16..24].try_into().unwrap()); + let p_memsz = u64::from_le_bytes(ph[40..48].try_into().unwrap()); + let end = p_vaddr + p_memsz; + if end > max_load_end { + max_load_end = end; + } + } + + if max_load_end == 0 { + return; // No PT_LOAD segments + } + + // For ET_DYN (PIE/shared libs), p_vaddr is relative to base_addr. + // For ET_EXEC, p_vaddr is absolute and base_addr is 0. + let trampoline_vaddr = if e_type == 3 { + // ET_DYN + base_addr + (max_load_end as usize).next_multiple_of(PAGE_SIZE) + } else { + // ET_EXEC + (max_load_end as usize).next_multiple_of(PAGE_SIZE) + }; + + // Check if file is pre-patched by reading the last 32 bytes for magic + let (pre_patched, tramp_file_offset, tramp_vaddr, tramp_file_size) = + self.check_trampoline_magic(fd); + + // For pre-patched binaries, use the vaddr from the header instead. + let trampoline_vaddr = if pre_patched { + if e_type == 3 { + base_addr + tramp_vaddr as usize + } else { + tramp_vaddr as usize + } + } else { + trampoline_vaddr + }; + + // Insert under lock (re-check for races). + let mut cache = self.global.elf_patch_cache.lock(); + cache.entry(fd).or_insert(ElfPatchState { + _base_addr: base_addr, + pre_patched, + trampoline_file_offset: tramp_file_offset, + trampoline_file_size: tramp_file_size as usize, + _trampoline_vaddr: tramp_vaddr as usize, + trampoline_addr: trampoline_vaddr, + trampoline_cursor: 0, + trampoline_mapped: false, + trampoline_mapped_len: 0, + runtime_patches_committed: false, + file_path: None, + }); + } + + /// Check if a file has the LITEBOX trampoline magic at its tail. + /// Returns (is_pre_patched, file_offset, vaddr, trampoline_size). + fn check_trampoline_magic(&self, fd: i32) -> (bool, u64, u64, u64) { + let Ok(stat) = self.sys_fstat(fd) else { + return (false, 0, 0, 0); + }; + let file_size = stat.st_size; + if file_size < 32 { + return (false, 0, 0, 0); + } + let mut tail = [0u8; 32]; + if self.sys_read(fd, &mut tail, Some(file_size - 32)).is_err() { + return (false, 0, 0, 0); + } + if &tail[0..8] != litebox_syscall_rewriter::TRAMPOLINE_MAGIC { + return (false, 0, 0, 0); + } + // Parse header: magic(8) | file_offset(8) | vaddr(8) | size(8) + let file_offset = u64::from_le_bytes(tail[8..16].try_into().unwrap()); + let vaddr = u64::from_le_bytes(tail[16..24].try_into().unwrap()); + let trampoline_size = u64::from_le_bytes(tail[24..32].try_into().unwrap()); + (true, file_offset, vaddr, trampoline_size) + } + + /// Patch an executable segment in-place after it has been mapped. + /// + /// For pre-patched binaries: maps the trampoline from the file and writes + /// the syscall entry point. + /// For unpatched binaries: calls `patch_code_segment()` to rewrite syscall + /// instructions and places the generated stubs in the trampoline region. + /// + /// Returns `true` on success or non-fatal skip. Returns `false` when a + /// pre-patched binary's trampoline could not be set up — the caller must + /// fail the mapping because the code already contains JMPs to the + /// trampoline address. + #[allow(clippy::cast_possible_truncation)] + fn maybe_patch_exec_segment( + &self, + mapped_addr: MutPtr, + len: usize, + fd: i32, + offset: usize, + syscall_entry: usize, + ) -> bool { + // Initialize patch state if this is the first mmap for this fd. + if offset == 0 { + self.init_elf_patch_state(fd, mapped_addr.as_usize()); + } + + let mut cache = self.global.elf_patch_cache.lock(); + let Some(state) = cache.get_mut(&fd) else { + return true; // No patch state — not an ELF we're tracking + }; + + if state.pre_patched { + // Pre-patched binary: map the trampoline data from the file. + if !state.trampoline_mapped && state.trampoline_file_size > 0 { + let tramp_addr = state.trampoline_addr; + let tramp_len = align_up(state.trampoline_file_size, PAGE_SIZE); + + // Allocate RW region at the trampoline address. + let alloc_result = self + .do_mmap_anonymous( + Some(tramp_addr), + tramp_len, + ProtFlags::PROT_READ | ProtFlags::PROT_WRITE, + MapFlags::MAP_ANONYMOUS | MapFlags::MAP_PRIVATE | MapFlags::MAP_FIXED, + ) + .or_else(|_| { + self.do_mmap_anonymous( + Some(tramp_addr), + tramp_len, + ProtFlags::PROT_READ | ProtFlags::PROT_WRITE, + MapFlags::MAP_ANONYMOUS | MapFlags::MAP_PRIVATE, + ) + }); + let Ok(alloc_ptr) = alloc_result else { + return false; + }; + let actual_addr = alloc_ptr.as_usize(); + if actual_addr != tramp_addr { + let _ = self.sys_munmap(MutPtr::::from_usize(actual_addr), tramp_len); + return false; + } + + // Read trampoline data from the file. + let mut tramp_data = alloc::vec![0u8; state.trampoline_file_size]; + let file_off = state.trampoline_file_offset as usize; + let tramp_ptr = MutPtr::::from_usize(tramp_addr); + match self.sys_read(fd, &mut tramp_data, Some(file_off)) { + Ok(n) if n == tramp_data.len() => {} + _ => { + let _ = self.sys_munmap(tramp_ptr, tramp_len); + return false; + } + } + + // Write syscall entry point to the first 8 bytes. + if tramp_data.len() >= 8 { + tramp_data[..8].copy_from_slice(&syscall_entry.to_le_bytes()); + } + + // Write to the mapped region. + if tramp_ptr.copy_from_slice(0, &tramp_data).is_none() { + let _ = self.sys_munmap(tramp_ptr, tramp_len); + return false; + } + + // Protect as RX immediately. + if self + .sys_mprotect( + tramp_ptr, + tramp_len, + ProtFlags::PROT_READ | ProtFlags::PROT_EXEC, + ) + .is_err() + { + let _ = self.sys_munmap(tramp_ptr, tramp_len); + return false; + } + + state.trampoline_mapped = true; + state.trampoline_mapped_len = tramp_len; + } + return true; + } + + // ── Runtime patching path (unpatched binaries) ─────────────── + + // Allocate the trampoline region if not yet done. + let addr_usize = mapped_addr.as_usize(); + if !state.trampoline_mapped { + let tramp_addr = state.trampoline_addr; + + // Try MAP_FIXED first — works when ensure_space_after reserved + // PROT_NONE space (shared libraries). Falls back to a hint-based + // allocation for the ElfLoader path where no headroom is reserved. + let actual_addr = self + .do_mmap_anonymous( + Some(tramp_addr), + PAGE_SIZE, + ProtFlags::PROT_READ | ProtFlags::PROT_WRITE, + MapFlags::MAP_ANONYMOUS | MapFlags::MAP_PRIVATE | MapFlags::MAP_FIXED, + ) + .or_else(|_| { + self.do_mmap_anonymous( + Some(tramp_addr), + PAGE_SIZE, + ProtFlags::PROT_READ | ProtFlags::PROT_WRITE, + MapFlags::MAP_ANONYMOUS | MapFlags::MAP_PRIVATE, + ) + }); + let actual_addr = match actual_addr { + Ok(ptr) => ptr.as_usize(), + Err(_) => return true, + }; + + // Verify the trampoline is within JMP rel32 range (+-2GB) of the code. + let distance = actual_addr.abs_diff(addr_usize); + if distance > 0x7FFF_0000 { + let _ = self.sys_munmap(MutPtr::::from_usize(actual_addr), PAGE_SIZE); + return true; + } + + state.trampoline_addr = actual_addr; + + // Write the 8-byte syscall entry point at the start. + let entry_ptr = MutPtr::::from_usize(actual_addr); + if entry_ptr + .copy_from_slice(0, &syscall_entry.to_le_bytes()) + .is_none() + { + let _ = self.sys_munmap(MutPtr::::from_usize(actual_addr), PAGE_SIZE); + return true; + } + state.trampoline_cursor = 8; // stubs start after the 8-byte entry + state.trampoline_mapped = true; + state.trampoline_mapped_len = PAGE_SIZE; + } + + let restore_trampoline_rx = |task: &Self, state: &ElfPatchState| { + if state.trampoline_mapped_len > 0 { + let _ = task.sys_mprotect( + MutPtr::::from_usize(state.trampoline_addr), + state.trampoline_mapped_len, + ProtFlags::PROT_READ | ProtFlags::PROT_EXEC, + ); + } + }; + + // Make the trampoline RW for writing stubs. + if state.trampoline_mapped_len > 0 + && self + .sys_mprotect( + MutPtr::::from_usize(state.trampoline_addr), + state.trampoline_mapped_len, + ProtFlags::PROT_READ | ProtFlags::PROT_WRITE, + ) + .is_err() + { + return true; + } + + // Make the code segment writable for in-place patching. + if self + .sys_mprotect( + mapped_addr, + len, + ProtFlags::PROT_READ | ProtFlags::PROT_WRITE, + ) + .is_err() + { + return true; + } + + // Read the mapped code into a buffer, patch it, write back. + let Some(code_owned) = mapped_addr.to_owned_slice(len) else { + let _ = self.sys_mprotect( + mapped_addr, + len, + ProtFlags::PROT_READ | ProtFlags::PROT_EXEC, + ); + restore_trampoline_rx(self, state); + return true; + }; + let mut code_buf = code_owned.into_vec(); + let original_code = code_buf.clone(); + + let code_vaddr = addr_usize as u64; + let trampoline_write_vaddr = (state.trampoline_addr + state.trampoline_cursor) as u64; + let syscall_entry_addr = state.trampoline_addr as u64; + + let mut skipped_addrs = alloc::vec::Vec::new(); + let patch_result = litebox_syscall_rewriter::patch_code_segment( + &mut code_buf, + code_vaddr, + trampoline_write_vaddr, + syscall_entry_addr, + &mut skipped_addrs, + ); + if !skipped_addrs.is_empty() { + litebox::log_println!( + self.global.platform, + "warning: {} syscall instruction(s) could not be patched (addresses: {:?})", + skipped_addrs.len(), + skipped_addrs, + ); + } + match patch_result { + Ok(stubs) if !stubs.is_empty() => { + let Some(new_cursor) = state.trampoline_cursor.checked_add(stubs.len()) else { + let _ = self.sys_mprotect( + mapped_addr, + len, + ProtFlags::PROT_READ | ProtFlags::PROT_EXEC, + ); + restore_trampoline_rx(self, state); + return true; + }; + let tramp_pages_needed = align_up(new_cursor, PAGE_SIZE); + if tramp_pages_needed > state.trampoline_mapped_len { + let extra_start = state.trampoline_addr + state.trampoline_mapped_len; + let extra_len = tramp_pages_needed - state.trampoline_mapped_len; + if self + .do_mmap_anonymous( + Some(extra_start), + extra_len, + ProtFlags::PROT_READ | ProtFlags::PROT_WRITE, + MapFlags::MAP_ANONYMOUS | MapFlags::MAP_PRIVATE | MapFlags::MAP_FIXED, + ) + .is_err() + { + let _ = self.sys_mprotect( + mapped_addr, + len, + ProtFlags::PROT_READ | ProtFlags::PROT_EXEC, + ); + restore_trampoline_rx(self, state); + return true; + } + state.trampoline_mapped_len = tramp_pages_needed; + } + + // Write stubs before patching the code so rewritten jumps + // never target an uninitialized trampoline. + let tramp_write_ptr = + MutPtr::::from_usize(state.trampoline_addr + state.trampoline_cursor); + if tramp_write_ptr.copy_from_slice(0, &stubs).is_none() { + let _ = self.sys_mprotect( + mapped_addr, + len, + ProtFlags::PROT_READ | ProtFlags::PROT_EXEC, + ); + restore_trampoline_rx(self, state); + return true; + } + + // Write patched code back to the mapped region. + if mapped_addr.copy_from_slice(0, &code_buf).is_none() { + let _ = mapped_addr.copy_from_slice(0, &original_code); + let _ = self.sys_mprotect( + mapped_addr, + len, + ProtFlags::PROT_READ | ProtFlags::PROT_EXEC, + ); + restore_trampoline_rx(self, state); + return true; + } + state.trampoline_cursor = new_cursor; + state.runtime_patches_committed = true; + } + Ok(_) => { + // No syscalls found — no patching needed. + } + Err(_) => { + let _ = self.sys_mprotect( + mapped_addr, + len, + ProtFlags::PROT_READ | ProtFlags::PROT_EXEC, + ); + restore_trampoline_rx(self, state); + return true; + } + } + + // Restore the code segment to RX. + let _ = self.sys_mprotect( + mapped_addr, + len, + ProtFlags::PROT_READ | ProtFlags::PROT_EXEC, + ); + restore_trampoline_rx(self, state); + true + } + + /// Finalize the ELF patching state for `fd`. + /// + /// If the fd has a trampoline region that was allocated (RW), mprotect it + /// to RX so the trampoline stubs become executable and non-writable. + /// The cache entry is removed regardless. + pub(crate) fn finalize_elf_patch(&self, fd: i32) { + let state = self.global.elf_patch_cache.lock().remove(&fd); + if let Some(state) = state + && state.trampoline_mapped + && !state.pre_patched + { + let tramp_len = state.trampoline_mapped_len; + if tramp_len > 0 { + if !state.runtime_patches_committed { + let _ = + self.sys_munmap(MutPtr::::from_usize(state.trampoline_addr), tramp_len); + return; + } + let _ = self.sys_mprotect( + MutPtr::::from_usize(state.trampoline_addr), + tramp_len, + ProtFlags::PROT_READ | ProtFlags::PROT_EXEC, + ); + } + } + } } #[cfg(test)] diff --git a/litebox_shim_linux/src/syscalls/process.rs b/litebox_shim_linux/src/syscalls/process.rs index 70f878cde..419afb09c 100644 --- a/litebox_shim_linux/src/syscalls/process.rs +++ b/litebox_shim_linux/src/syscalls/process.rs @@ -770,6 +770,7 @@ impl Task { fs: fs.into(), files: self.files.clone(), // TODO: !CLONE_FILES support signals: self.signals.clone_for_new_task(), + suppress_elf_runtime_patch: core::cell::Cell::new(false), }, }), ) From 6620fc75e1fb2afe5012dc7df748e0538d3fdd42 Mon Sep 17 00:00:00 2001 From: Weidong Cui Date: Thu, 2 Apr 2026 07:53:07 -0700 Subject: [PATCH 04/24] Fix Windows CI: suppress dead_code warning for syscall_callback extern --- litebox_platform_windows_userland/src/lib.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/litebox_platform_windows_userland/src/lib.rs b/litebox_platform_windows_userland/src/lib.rs index ed694f3ee..d6d22b702 100644 --- a/litebox_platform_windows_userland/src/lib.rs +++ b/litebox_platform_windows_userland/src/lib.rs @@ -1963,6 +1963,7 @@ impl litebox::mm::allocator::MemoryProvider for WindowsUserland { unsafe extern "C" { // Defined in asm blocks above + #[allow(dead_code)] // Referenced from inline asm, not directly from Rust fn syscall_callback() -> isize; fn syscall_callback_redzone() -> isize; fn exception_callback() -> isize; From e50aa0a34497d9a23acd21a470e4e185353a79d0 Mon Sep 17 00:00:00 2001 From: Weidong Cui Date: Thu, 2 Apr 2026 08:03:08 -0700 Subject: [PATCH 05/24] Fix Windows CI: remove rtld_audit.so from Windows test (incompatible with new trampoline format) --- litebox_runner_linux_on_windows_userland/src/lib.rs | 10 +++------- .../tests/loader.rs | 2 +- 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/litebox_runner_linux_on_windows_userland/src/lib.rs b/litebox_runner_linux_on_windows_userland/src/lib.rs index c5afcbc71..063c7197e 100644 --- a/litebox_runner_linux_on_windows_userland/src/lib.rs +++ b/litebox_runner_linux_on_windows_userland/src/lib.rs @@ -130,11 +130,7 @@ pub fn run(cli_args: CliArgs) -> Result<()> { } fn fixup_env(envp: &mut Vec) { - // Always inject LD_AUDIT so the dynamic linker loads the audit library - // that sets up trampolines for rewritten binaries. - let p = c"LD_AUDIT=/lib/litebox_rtld_audit.so"; - let has_ld_audit = envp.iter().any(|var| var.as_c_str() == p); - if !has_ld_audit { - envp.push(p.into()); - } + let _ = envp; + // No environment fixups needed — the shim's mmap hook handles + // syscall patching at runtime without LD_AUDIT. } diff --git a/litebox_runner_linux_on_windows_userland/tests/loader.rs b/litebox_runner_linux_on_windows_userland/tests/loader.rs index e6f470e34..b83fdb056 100644 --- a/litebox_runner_linux_on_windows_userland/tests/loader.rs +++ b/litebox_runner_linux_on_windows_userland/tests/loader.rs @@ -361,7 +361,7 @@ fn test_testcase_dynamic_with_rewriter() { ("libc.so.6", "/lib/x86_64-linux-gnu"), ("ld-linux-x86-64.so.2", "/lib64"), ]; - let libs_without_rewrite = [("litebox_rtld_audit.so", "/lib")]; + let libs_without_rewrite: [(&str, &str); 0] = []; // Run run_dynamic_linked_prog_with_rewriter( From 9729fed11b98b44ff12499c299bb207e73432bcc Mon Sep 17 00:00:00 2001 From: Weidong Cui Date: Thu, 2 Apr 2026 12:07:06 -0700 Subject: [PATCH 06/24] Windows: preserve guest R11 across syscall callback via TEB.ArbitraryUserPointer The new trampoline format loads a restart address into R11 (for SA_RESTART) before jumping to the callback. On Windows, the TLS index lookup clobbers R11, so we temporarily stash R11 in the per-thread TEB.ArbitraryUserPointer slot (gs:[0x28]) for the ~20 instructions of inline asm between callback entry and pt_regs save. Also removes the dead syscall_callback entry point (only syscall_callback_redzone is used since get_syscall_entry_point always returns the redzone variant). --- litebox_platform_windows_userland/src/lib.rs | 38 +++++++++----------- 1 file changed, 16 insertions(+), 22 deletions(-) diff --git a/litebox_platform_windows_userland/src/lib.rs b/litebox_platform_windows_userland/src/lib.rs index d6d22b702..19a5256bd 100644 --- a/litebox_platform_windows_userland/src/lib.rs +++ b/litebox_platform_windows_userland/src/lib.rs @@ -549,31 +549,26 @@ unsafe extern "C-unwind" fn run_thread_arch(thread_ctx: &mut ThreadContext, tls_ jmp .Ldone // This entry point is called from the guest when it issues a syscall - // instruction. + // instruction. The rewriter trampoline has already: + // 1. Reserved 128 bytes below RSP to protect the SysV red zone + // 2. Loaded the call-site restart address into R11 (for SA_RESTART) + // 3. Loaded the return address into RCX // - // At entry, the register context is the guest context with the - // return address in rcx. r11 is an available scratch register (it would - // contain rflags if the syscall instruction had actually been issued). - .globl syscall_callback -syscall_callback: - // Get the TLS state from the TLS slot and clear the in-guest flag. - mov r11d, DWORD PTR [rip + {TLS_INDEX}] - mov r11, QWORD PTR gs:[r11 * 8 + TEB_TLS_SLOTS_OFFSET] - mov BYTE PTR [r11 + {IS_IN_GUEST}], 0 - // Set rsp to the top of the guest context. - mov QWORD PTR [r11 + {SCRATCH}], rsp - jmp .Lsyscall_callback_common - + // All other registers hold guest state. .globl syscall_callback_redzone syscall_callback_redzone: - // Same as syscall_callback, but the trampoline has already reserved - // 128 bytes below RSP to protect the SysV red zone. Recover the - // architectural guest stack pointer. + // Save guest R11 (restart address from rewriter trampoline) into + // TEB.ArbitraryUserPointer (gs:[0x28]) before the TLS index lookup + // clobbers R11. This slot is per-thread and the window is very + // narrow: only ~20 instructions of inline asm with no API calls, + // no Rust code, and no DLL activity, so the ntdll loader (which + // also uses this slot for debugger communication) cannot interfere. + mov gs:[0x28], r11 + // Get the TLS state from the TLS slot and clear the in-guest flag. mov r11d, DWORD PTR [rip + {TLS_INDEX}] mov r11, QWORD PTR gs:[r11 * 8 + TEB_TLS_SLOTS_OFFSET] mov BYTE PTR [r11 + {IS_IN_GUEST}], 0 - // Save RSP + 128 to SCRATCH without clobbering any guest registers. - // Use SCRATCH as a temporary: store rsp, then add 128 in-place. + // Recover the architectural guest stack pointer (RSP + 128) into SCRATCH. mov QWORD PTR [r11 + {SCRATCH}], rsp add QWORD PTR [r11 + {SCRATCH}], 128 @@ -597,7 +592,8 @@ syscall_callback_redzone: push r8 // pt_regs->r8 push r9 // pt_regs->r9 push r10 // pt_regs->r10 - push [rsp + 88] // pt_regs->r11 = rflags + mov r10, gs:[0x28] // recover guest R11 saved at entry + push r10 // pt_regs->r11 = guest R11 (restart addr from rewriter) push rbx // pt_regs->bx push rbp // pt_regs->bp push r12 @@ -1963,8 +1959,6 @@ impl litebox::mm::allocator::MemoryProvider for WindowsUserland { unsafe extern "C" { // Defined in asm blocks above - #[allow(dead_code)] // Referenced from inline asm, not directly from Rust - fn syscall_callback() -> isize; fn syscall_callback_redzone() -> isize; fn exception_callback() -> isize; fn interrupt_callback(); From cc10a9a3f39c97d56156fc3a7b06b3dbbdc77ad2 Mon Sep 17 00:00:00 2001 From: Weidong Cui Date: Thu, 2 Apr 2026 14:58:36 -0700 Subject: [PATCH 07/24] Remove rtld_audit, fix RFLAGS on Windows, simplify callback dispatch, discriminate rewriter errors - Remove litebox_rtld_audit/ directory entirely (Makefile, rtld_audit.c, .gitignore) - Replace litebox_packager/build.rs with no-op (was building rtld_audit.so) - Remove rtld_audit tar entry from litebox_packager/src/lib.rs - Remove fixup_env and set_load_filter from both Linux and LoW runners - Fix RFLAGS clobber on Windows: use lea+mov instead of mov+add - Simplify is_at_syscall_callback: x86 checks syscall_callback, x86_64 checks syscall_callback_redzone - Discriminate trampoline parse errors: only UnpatchedBinary triggers runtime patching - Discriminate rewriter errors: expected non-fatal vs unexpected with logging - Restore fork-vfork patch error path from PR 1c - Simplify suppress_elf_runtime_patch logic - Clean up rtld_audit references in comments across codebase --- dev_bench/unixbench/prepare_unixbench.py | 4 +- dev_tests/src/boilerplate.rs | 1 - litebox_packager/build.rs | 39 +- litebox_packager/src/lib.rs | 17 - litebox_platform_linux_userland/src/lib.rs | 4 +- litebox_platform_windows_userland/src/lib.rs | 6 +- litebox_rtld_audit/.gitignore | 1 - litebox_rtld_audit/Makefile | 26 -- litebox_rtld_audit/rtld_audit.c | 384 ------------------ .../src/lib.rs | 19 +- .../tests/loader.rs | 32 +- litebox_runner_linux_userland/build.rs | 6 - litebox_runner_linux_userland/src/lib.rs | 7 +- litebox_runner_linux_userland/tests/run.rs | 2 +- litebox_shim_linux/src/loader/elf.rs | 62 ++- litebox_syscall_rewriter/src/lib.rs | 19 +- 16 files changed, 80 insertions(+), 549 deletions(-) delete mode 100644 litebox_rtld_audit/.gitignore delete mode 100644 litebox_rtld_audit/Makefile delete mode 100644 litebox_rtld_audit/rtld_audit.c delete mode 100644 litebox_runner_linux_userland/build.rs diff --git a/dev_bench/unixbench/prepare_unixbench.py b/dev_bench/unixbench/prepare_unixbench.py index 0d472d505..4eee4e6e1 100644 --- a/dev_bench/unixbench/prepare_unixbench.py +++ b/dev_bench/unixbench/prepare_unixbench.py @@ -61,8 +61,8 @@ def prepare_benchmark( """ Prepare a single benchmark using litebox_packager. - The packager discovers dependencies, rewrites all ELFs, and creates a tar - (including litebox_rtld_audit.so). The rewritten main binary is extracted + The packager discovers dependencies, rewrites all ELFs, and creates a tar. + The rewritten main binary is extracted from the tar and placed alongside it. Returns True on success. diff --git a/dev_tests/src/boilerplate.rs b/dev_tests/src/boilerplate.rs index a32cf70b6..c29e14ebf 100644 --- a/dev_tests/src/boilerplate.rs +++ b/dev_tests/src/boilerplate.rs @@ -133,7 +133,6 @@ const SKIP_FILES: &[&str] = &[ "LICENSE", "litebox/src/sync/mutex.rs", "litebox/src/sync/rwlock.rs", - "litebox_rtld_audit/Makefile", "litebox_runner_linux_on_windows_userland/tests/test-bins/hello_exec_nolibc", "litebox_runner_linux_on_windows_userland/tests/test-bins/hello_thread", "litebox_runner_linux_on_windows_userland/tests/test-bins/hello_thread_static", diff --git a/litebox_packager/build.rs b/litebox_packager/build.rs index 77956be92..f189226e4 100644 --- a/litebox_packager/build.rs +++ b/litebox_packager/build.rs @@ -1,43 +1,6 @@ // Copyright (c) Microsoft Corporation. // Licensed under the MIT license. -use std::path::PathBuf; - -const RTLD_AUDIT_DIR: &str = "../litebox_rtld_audit"; - fn main() { - let target_arch = std::env::var("CARGO_CFG_TARGET_ARCH").unwrap(); - if target_arch != "x86_64" { - return; - } - - let out_dir = PathBuf::from(std::env::var("OUT_DIR").unwrap()); - let mut make_cmd = std::process::Command::new("make"); - make_cmd - .current_dir(RTLD_AUDIT_DIR) - .env("OUT_DIR", &out_dir) - .env("ARCH", &target_arch); - // Always build without DEBUG for the packager -- packaged binaries are - // release artifacts. - make_cmd.env_remove("DEBUG"); - // Force rebuild in case a stale artifact exists from a different config. - let _ = std::fs::remove_file(out_dir.join("litebox_rtld_audit.so")); - - let output = make_cmd - .output() - .expect("Failed to execute make for rtld_audit"); - assert!( - output.status.success(), - "failed to build rtld_audit.so via make:\nstdout: {}\nstderr: {}", - String::from_utf8_lossy(&output.stdout), - String::from_utf8_lossy(&output.stderr), - ); - assert!( - out_dir.join("litebox_rtld_audit.so").exists(), - "Build failed to create litebox_rtld_audit.so" - ); - - println!("cargo:rerun-if-changed={RTLD_AUDIT_DIR}/rtld_audit.c"); - println!("cargo:rerun-if-changed={RTLD_AUDIT_DIR}/Makefile"); - println!("cargo:rerun-if-changed=build.rs"); + // rtld_audit has been removed; nothing to build. } diff --git a/litebox_packager/src/lib.rs b/litebox_packager/src/lib.rs index 0b5490a6a..95cf3bf20 100644 --- a/litebox_packager/src/lib.rs +++ b/litebox_packager/src/lib.rs @@ -358,23 +358,6 @@ fn finalize_tar( }); } - // Include the rtld audit library so the rewriter backend can load it. - #[cfg(target_arch = "x86_64")] - { - const RTLD_AUDIT_TAR_PATH: &str = "lib/litebox_rtld_audit.so"; - if !added_tar_paths.insert(RTLD_AUDIT_TAR_PATH.to_string()) { - bail!( - "tar already contains {RTLD_AUDIT_TAR_PATH} -- \ - remove the conflicting entry or use --no-rewrite" - ); - } - tar_entries.push(TarEntry { - tar_path: RTLD_AUDIT_TAR_PATH.to_string(), - data: include_bytes!(concat!(env!("OUT_DIR"), "/litebox_rtld_audit.so")).to_vec(), - mode: 0o755, - }); - } - // Build tar. eprintln!("Creating {}...", args.output.display()); build_tar(&tar_entries, &args.output)?; diff --git a/litebox_platform_linux_userland/src/lib.rs b/litebox_platform_linux_userland/src/lib.rs index 4c06e3c8a..777bfeada 100644 --- a/litebox_platform_linux_userland/src/lib.rs +++ b/litebox_platform_linux_userland/src/lib.rs @@ -2748,10 +2748,10 @@ unsafe fn interrupt_signal_handler( // FUTURE: handle trampoline code, too. This is somewhat less important // because it's probably fine for the shim to observe a guest context that // is inside the trampoline. + #[cfg(target_arch = "x86")] let is_at_syscall_callback = ip == syscall_callback as *const () as usize; #[cfg(target_arch = "x86_64")] - let is_at_syscall_callback = - is_at_syscall_callback || ip == syscall_callback_redzone as *const () as usize; + let is_at_syscall_callback = ip == syscall_callback_redzone as *const () as usize; if is_at_syscall_callback { // No need to clear `in_guest` or set interrupt; the syscall handler will // clear `in_guest` and call into the shim. diff --git a/litebox_platform_windows_userland/src/lib.rs b/litebox_platform_windows_userland/src/lib.rs index 19a5256bd..b43f2f790 100644 --- a/litebox_platform_windows_userland/src/lib.rs +++ b/litebox_platform_windows_userland/src/lib.rs @@ -568,9 +568,11 @@ syscall_callback_redzone: mov r11d, DWORD PTR [rip + {TLS_INDEX}] mov r11, QWORD PTR gs:[r11 * 8 + TEB_TLS_SLOTS_OFFSET] mov BYTE PTR [r11 + {IS_IN_GUEST}], 0 - // Recover the architectural guest stack pointer (RSP + 128) into SCRATCH. + // Recover the architectural guest stack pointer (undo the 128-byte + // red zone reservation) and store it in SCRATCH. LEA is used instead + // of ADD to avoid clobbering RFLAGS before pushfq. + lea rsp, [rsp + 128] mov QWORD PTR [r11 + {SCRATCH}], rsp - add QWORD PTR [r11 + {SCRATCH}], 128 .Lsyscall_callback_common: mov rsp, QWORD PTR [r11 + {GUEST_CONTEXT_TOP}] diff --git a/litebox_rtld_audit/.gitignore b/litebox_rtld_audit/.gitignore deleted file mode 100644 index 140f8cf80..000000000 --- a/litebox_rtld_audit/.gitignore +++ /dev/null @@ -1 +0,0 @@ -*.so diff --git a/litebox_rtld_audit/Makefile b/litebox_rtld_audit/Makefile deleted file mode 100644 index b3a3ad3a3..000000000 --- a/litebox_rtld_audit/Makefile +++ /dev/null @@ -1,26 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT license. - -SRC = rtld_audit.c -OUT_DIR ?= . -OUTPUT = $(OUT_DIR)/litebox_rtld_audit.so -CC ?= cc -CFLAGS ?= -Wall -Werror -fPIC -shared -nostdlib -ARCH ?= $(shell uname -m) -ifeq ($(ARCH),x86_64) - CFLAGS += -m64 -else - $(error Unsupported target architecture: $(ARCH)) -endif -ifdef DEBUG - CFLAGS += -DDEBUG -endif -all: $(OUTPUT) - -$(OUTPUT): $(SRC) - $(CC) $(CFLAGS) -o $@ $< - -clean: - rm -f $(OUTPUT) - -.PHONY: all clean diff --git a/litebox_rtld_audit/rtld_audit.c b/litebox_rtld_audit/rtld_audit.c deleted file mode 100644 index 51713f941..000000000 --- a/litebox_rtld_audit/rtld_audit.c +++ /dev/null @@ -1,384 +0,0 @@ -// Copyright (c) Microsoft Corporation. -// Licensed under the MIT license. - -#define _GNU_SOURCE -#include -#include -#include - -// The magic number used to identify the LiteBox trampoline. -// This must match `TRAMPOLINE_MAGIC` in `litebox_syscall_rewriter` and `litebox_common_linux`. -// Value 0x30584f424554494c is "LITEBOX0" in little-endian (bytes: 'L','I','T','E','B','O','X','0') -#define TRAMPOLINE_MAGIC ((uint64_t)0x30584f424554494c) - -#if !defined(__x86_64__) -# error "rtld_audit.c: build target must be x86_64" -#endif - -// Linux syscall numbers (x86_64) -#define SYS_openat 257 -#define SYS_read 0 -#define SYS_write 1 -#define SYS_close 3 -#define SYS_fstat 5 -#define SYS_mmap 9 -#define SYS_mprotect 10 -#define SYS_munmap 11 -#define SYS_exit_group 231 -#define AT_FDCWD -100 - -// Maximum valid userspace address (48-bit address space) -#define MAX_USERSPACE_ADDR 0x7FFFFFFFFFFFUL - -// Trampoline header layout for x86_64: magic(8) + file_offset(8) + vaddr(8) + size(8) = 32 bytes -struct __attribute__((packed)) TrampolineHeader { - uint64_t magic; - uint64_t file_offset; - uint64_t vaddr; - uint64_t trampoline_size; -}; - -// Linux flags -#define MAP_PRIVATE 0x02 -#define MAP_FIXED 0x10 -#define PROT_READ 0x1 -#define PROT_WRITE 0x2 -#define PROT_EXEC 0x4 - -typedef long (*syscall_stub_t)(void); -static syscall_stub_t syscall_entry = 0; -static char interp[256] = {0}; // Buffer for interpreter path - -#ifdef DEBUG -#define syscall_print(str, len) \ - do_syscall(SYS_write, 1, (long)(str), len, 0, 0, 0) -#else -#define syscall_print(str, len) -#endif - -static long do_syscall(long num, long a1, long a2, long a3, long a4, long a5, - long a6) { - if (!syscall_entry) - return -1; - - register long rax __asm__("rax") = num; - register long rdi __asm__("rdi") = a1; - register long rsi __asm__("rsi") = a2; - register long rdx __asm__("rdx") = a3; - register long r10 __asm__("r10") = a4; - register long r8 __asm__("r8") = a5; - register long r9 __asm__("r9") = a6; - - __asm__ volatile("leaq 1f(%%rip), %%rcx\n" - "jmp *%[entry]\n" - "1:\n" - : "+r"(rax) - : [entry] "r"(syscall_entry), "r"(rdi), "r"(rsi), "r"(rdx), - "r"(r10), "r"(r8), "r"(r9) - : "rcx", "r11", "memory"); - return rax; -} - -/* Re-implement some utility functions and re-define the structures to avoid - * dependency on libc. */ - -// Define the FileStat structure -struct FileStat { - unsigned long st_dev; - unsigned long st_ino; - unsigned long st_nlink; - - unsigned int st_mode; - unsigned int st_uid; - unsigned int st_gid; - unsigned int __pad0; - unsigned long st_rdev; - long st_size; - long st_blksize; - long st_blocks; /* Number 512-byte blocks allocated. */ - - unsigned long st_atime; - unsigned long st_atime_nsec; - unsigned long st_mtime; - unsigned long st_mtime_nsec; - unsigned long st_ctime; - unsigned long st_ctime_nsec; - long __unused[3]; -}; - -int memcmp(const void *s1, const void *s2, size_t n) { - const unsigned char *p1 = s1; - const unsigned char *p2 = s2; - while (n--) { - if (*p1 != *p2) { - return *p1 - *p2; - } - p1++; - p2++; - } - return 0; -} - -int strcmp(const char *s1, const char *s2) { - while (*s1 && (*s1 == *s2)) { - s1++; - s2++; - } - return *(unsigned char *)s1 - *(unsigned char *)s2; -} - -char *strncpy(char *dest, const char *src, size_t n) { - char *d = dest; - const char *s = src; - while (n-- && *s) { - *d++ = *s++; - } - while (n--) { - *d++ = '\0'; - } - return dest; -} - -static uint64_t read_u64(const void *p) { - uint64_t v; - __builtin_memcpy(&v, p, 8); - return v; -} - -static size_t align_up(size_t val, size_t align) { - size_t result = (val + align - 1) & ~(align - 1); - // Check for overflow (result < val means we wrapped) - if (result < val) return (size_t)-1; - return result; -} - -unsigned int la_version(unsigned int version __attribute__((unused))) { - return LAV_CURRENT; -} - -/// print value in hex -void print_hex(uint64_t data) { -#ifdef DEBUG - for (int i = 15; i >= 0; i--) { - unsigned char byte = (data >> (i * 4)) & 0xF; - if (byte < 10) { - syscall_print((&"0123456789"[byte]), 1); - } else { - syscall_print((&"abcdef"[byte - 10]), 1); - } - } - syscall_print("\n", 1); -#endif -} - -/// @brief Parse object to find the syscall entry point and the interpreter -/// path. -/// -/// The trampoline is already mapped by the litebox loader at (base + vaddr). -/// The entry point is at offset 0 of the mapped trampoline. The litebox loader -/// already validated the magic when parsing the file header. -int parse_object(const struct link_map *map) { - unsigned long max_addr = 0; - Elf64_Ehdr *eh = (Elf64_Ehdr *)map->l_addr; - if (memcmp(eh->e_ident, - "\x7f" - "ELF", - 4) != 0) { - syscall_print("[audit] not an ELF file\n", 24); - return 1; - } - Elf64_Phdr *phdrs = (Elf64_Phdr *)((char *)map->l_addr + eh->e_phoff); - for (int i = 0; i < eh->e_phnum; i++) { - if (phdrs[i].p_type == PT_LOAD) { - unsigned long vaddr_end = (phdrs[i].p_vaddr + phdrs[i].p_memsz); - if (vaddr_end > max_addr) { - max_addr = vaddr_end; - } - } else if (phdrs[i].p_type == PT_INTERP) { - strncpy(interp, (char *)map->l_addr + phdrs[i].p_vaddr, - sizeof(interp) - 1); - interp[sizeof(interp) - 1] = '\0'; // Ensure null termination - } - } - max_addr = align_up(max_addr, 0x1000); - void *trampoline_addr = (void *)map->l_addr + max_addr; - // The trampoline code has the syscall entry point at offset 0. - syscall_entry = (syscall_stub_t)read_u64(trampoline_addr); - if (syscall_entry == 0) { - syscall_print("[audit] syscall entry is null\n", 30); - return 1; - } - print_hex((uint64_t)syscall_entry); - return 0; -} - -unsigned int la_objopen(struct link_map *map, - Lmid_t lmid __attribute__((unused)), - uintptr_t *cookie __attribute__((unused))) { - syscall_print("[audit] la_objopen called\n", 26); - const char *path = map->l_name; - - if (!path || path[0] == '\0') { - // main binary should be called first. - if (map->l_addr != 0) { - // `map->l_addr` is zero for the main binary if it is not position - // independent. - if (parse_object(map) != 0) { - syscall_print("[audit] failed to parse main binary\n", 36); - return 0; - } - syscall_print("[audit] main binary is patched by libOS\n", 40); - syscall_print("[audit] interp=", 15); - syscall_print(interp, sizeof(interp) - 1); - syscall_print("\n", 1); - } - return 0; // main binary is patched by libOS - } - - if (syscall_entry == 0) { - // failed to get the syscall entry point from the main binary - // fall back to get it from ld-*.so, which should be called next. - if (parse_object(map) != 0) { - syscall_print("[audit] failed to parse ld\n", 27); - return 0; - } - syscall_print("[audit] ld is patched by libOS: \n", 33); - syscall_print(path, 32); - syscall_print("\n", 1); - return 0; // ld.so is patched by libOS - } - - if (interp[0] != '\0' && strcmp(path, interp) == 0) { - // successfully get the entry point and interpreter from the main binary - syscall_print("[audit] ld-*.so is patched by libOS\n", 36); - return 0; // ld.so is patched by libOS - } - - // Other shared libraries - syscall_print("[audit] la_objopen: path=", 25); - syscall_print(path, 32); - syscall_print("\n", 1); - - if (!syscall_entry) { - return 0; - } - - int fd = do_syscall(SYS_openat, AT_FDCWD, (long)path, 0, 0, 0, 0); - if (fd < 0) { - syscall_print("[audit] failed to open file\n", 28); - return 0; - } - - struct FileStat st; - if (do_syscall(SYS_fstat, fd, (long)&st, 0, 0, 0, 0) < 0) { - syscall_print("[audit] fstat failed\n", 21); - do_syscall(SYS_close, fd, 0, 0, 0, 0, 0); - return 0; - } - long file_size = st.st_size; - - // File must be large enough to contain at least a trampoline header - if (file_size < (long)sizeof(struct TrampolineHeader)) { - do_syscall(SYS_close, fd, 0, 0, 0, 0, 0); - return 0; - } - - // The trampoline header is at the end of the file (last 32 bytes for x86_64). - // File layout: [ELF][padding][trampoline code][header] - // Read the last page that contains the header. - long header_offset = file_size - sizeof(struct TrampolineHeader); - long header_page_offset = header_offset & ~0xFFFUL; - - // Map the page containing the header - void *header_page = (void *)do_syscall(SYS_mmap, 0, 0x1000, PROT_READ, MAP_PRIVATE, fd, header_page_offset); - if ((uintptr_t)header_page >= (uintptr_t)-4096) { - syscall_print("[audit] mmap header page failed\n", 32); - do_syscall(SYS_close, fd, 0, 0, 0, 0, 0); - return 0; - } - - // Read header from the mapped page - long header_in_page_offset = header_offset - header_page_offset; - const struct TrampolineHeader *header = (const struct TrampolineHeader *)((const char *)header_page + header_in_page_offset); - - // Check magic - if (header->magic != TRAMPOLINE_MAGIC) { - // If the prefix matches but the version differs, fail explicitly. - if (memcmp(header, "LITEBOX", 7) == 0) { - syscall_print("[audit] invalid trampoline version\n", 36); - do_syscall(SYS_munmap, (long)header_page, 0x1000, 0, 0, 0, 0); - do_syscall(SYS_close, fd, 0, 0, 0, 0, 0); - return 0; - } - // No trampoline found - do_syscall(SYS_munmap, (long)header_page, 0x1000, 0, 0, 0, 0); - do_syscall(SYS_close, fd, 0, 0, 0, 0, 0); - return 0; - } - - // Copy fields before unmapping - uint64_t tramp_file_offset = header->file_offset; - uint64_t tramp_vaddr = header->vaddr; - uint64_t tramp_size_raw = header->trampoline_size; - - do_syscall(SYS_munmap, (long)header_page, 0x1000, 0, 0, 0, 0); - syscall_print("[audit] found trampoline header at end of file\n", 47); - - // Validate trampoline size - if (tramp_size_raw == 0) { - syscall_print("[audit] trampoline code size invalid\n", 37); - do_syscall(SYS_close, fd, 0, 0, 0, 0, 0); - return 0; - } - - // Verify file offset is page-aligned - if ((tramp_file_offset & 0xFFF) != 0) { - syscall_print("[audit] trampoline code not page-aligned\n", 41); - do_syscall(SYS_close, fd, 0, 0, 0, 0, 0); - return 0; - } - - // The trampoline code should immediately precede the header. - if (tramp_file_offset + tramp_size_raw != (uint64_t)header_offset) { - syscall_print("[audit] trampoline extends beyond header\n", 41); - do_syscall(SYS_close, fd, 0, 0, 0, 0, 0); - return 0; - } - - // Validate tramp_vaddr is within reasonable userspace bounds and page-aligned - if (tramp_vaddr > MAX_USERSPACE_ADDR || (tramp_vaddr & 0xFFF) != 0) { - syscall_print("[audit] trampoline vaddr out of bounds\n", 39); - do_syscall(SYS_close, fd, 0, 0, 0, 0, 0); - return 0; - } - - uint64_t tramp_addr = map->l_addr + tramp_vaddr; - uint64_t tramp_size = align_up(tramp_size_raw, 0x1000); - - // Check for overflow in align_up or address calculation - if (tramp_size == (size_t)-1 || tramp_addr < map->l_addr) { - syscall_print("[audit] trampoline size/addr overflow\n", 38); - do_syscall(SYS_close, fd, 0, 0, 0, 0, 0); - return 0; - } - - // Use MAP_FIXED to place the trampoline at the exact required address. - // The loader ensures this range is not used by other mappings. - void *mapped = - (void *)do_syscall(SYS_mmap, tramp_addr, tramp_size, - PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_FIXED, fd, tramp_file_offset); - if ((uintptr_t)mapped >= (uintptr_t)-4096) { - syscall_print("[audit] mmap failed for trampoline\n", 35); - do_syscall(SYS_close, fd, 0, 0, 0, 0, 0); - return 0; - } - - // Write the syscall entry point at the start of the trampoline code - __builtin_memcpy((char *)mapped, (const void *)&syscall_entry, 8); - do_syscall(SYS_mprotect, (long)mapped, tramp_size, PROT_READ | PROT_EXEC, 0, - 0, 0); - syscall_print("[audit] trampoline patched and protected\n", 41); - - do_syscall(SYS_close, fd, 0, 0, 0, 0, 0); - return 0; -} diff --git a/litebox_runner_linux_on_windows_userland/src/lib.rs b/litebox_runner_linux_on_windows_userland/src/lib.rs index 063c7197e..826d42923 100644 --- a/litebox_runner_linux_on_windows_userland/src/lib.rs +++ b/litebox_runner_linux_on_windows_userland/src/lib.rs @@ -14,16 +14,16 @@ use std::path::PathBuf; /// Run Linux programs with LiteBox on unmodified Windows. /// -/// The program binary and all its dependencies (including `litebox_rtld_audit.so`) -/// must be provided inside a tar archive via `--initial-files`. The program path -/// refers to a path inside the tar archive. +/// The program binary and all its dependencies must be provided inside a tar +/// archive via `--initial-files`. The program path refers to a path inside the +/// tar archive. #[derive(Parser, Debug)] pub struct CliArgs { /// The program and arguments passed to it (e.g., `/bin/ls --color`). /// /// The program path refers to a path inside the tar archive provided via /// `--initial-files`. All binaries must be pre-rewritten with the syscall - /// rewriter and the tar must include `litebox_rtld_audit.so`. + /// rewriter. #[arg(required = true, trailing_var_arg = true, value_hint = clap::ValueHint::CommandWithArguments)] pub program_and_arguments: Vec, /// Environment variables passed to the program (`K=V` pairs; can be invoked multiple times) @@ -35,7 +35,7 @@ pub struct CliArgs { /// Allow using unstable options #[arg(short = 'Z', long = "unstable")] pub unstable: bool, - /// Tar archive containing the program, its shared libraries, and litebox_rtld_audit.so. + /// Tar archive containing the program and its shared libraries. /// /// All ELF binaries should be pre-rewritten with the syscall rewriter /// (e.g., via `litebox-packager`). @@ -60,7 +60,7 @@ pub fn run(cli_args: CliArgs) -> Result<()> { let platform = Platform::new(); litebox_platform_multiplex::set_platform(platform); - let mut shim_builder = litebox_shim_linux::LinuxShimBuilder::new(); + let shim_builder = litebox_shim_linux::LinuxShimBuilder::new(); let litebox = shim_builder.litebox(); // The program path is a Unix-style path inside the tar archive. @@ -83,7 +83,6 @@ pub fn run(cli_args: CliArgs) -> Result<()> { }; let initial_file_system = std::sync::Arc::new(initial_file_system); - shim_builder.set_load_filter(fixup_env); let shim = shim_builder.build(); let argv = cli_args .program_and_arguments @@ -128,9 +127,3 @@ pub fn run(cli_args: CliArgs) -> Result<()> { } std::process::exit(program.process.wait()) } - -fn fixup_env(envp: &mut Vec) { - let _ = envp; - // No environment fixups needed — the shim's mmap hook handles - // syscall patching at runtime without LD_AUDIT. -} diff --git a/litebox_runner_linux_on_windows_userland/tests/loader.rs b/litebox_runner_linux_on_windows_userland/tests/loader.rs index b83fdb056..1a0849aef 100644 --- a/litebox_runner_linux_on_windows_userland/tests/loader.rs +++ b/litebox_runner_linux_on_windows_userland/tests/loader.rs @@ -4,9 +4,8 @@ //! Tests for the Windows userland runner. //! //! **NOTE:** These tests depend on pre-built Linux ELF binaries in `tests/test-bins/`, -//! including `litebox_rtld_audit.so`, shared libraries (`libc.so.6`, `ld-linux-x86-64.so.2`), -//! and test executables. These binaries must be rebuilt on Linux and re-committed whenever -//! the corresponding source code changes (e.g., `litebox_rtld_audit/rtld_audit.c`). +//! including shared libraries (`libc.so.6`, `ld-linux-x86-64.so.2`) +//! and test executables. #![cfg(all(target_os = "windows", target_arch = "x86_64"))] @@ -198,7 +197,6 @@ fn test_static_linked_prog_with_rewriter() { fn run_dynamic_linked_prog_with_rewriter( libs_to_rewrite: &[(&str, &str)], - libs_without_rewrite: &[(&str, &str)], exec_name: &str, cmd_args: &[&str], install_files: fn(std::path::PathBuf), @@ -276,22 +274,6 @@ fn run_dynamic_linked_prog_with_rewriter( ); } - // Copy libraries that are not needed to be rewritten (`litebox_rtld_audit.so`) - // to the tar directory - for (file, prefix) in libs_without_rewrite { - let src = test_dir.join(file); - let dst_dir = tar_src_path.join(prefix.trim_start_matches('/')); - let dst = dst_dir.join(file); - std::fs::create_dir_all(&dst_dir).unwrap(); - let _ = std::fs::remove_file(&dst); - println!( - "Copying {} to {}", - src.to_str().unwrap(), - dst.to_str().unwrap() - ); - std::fs::copy(&src, &dst).unwrap(); - } - // Install the required files (e.g., scripts) to tar directory's /out install_files(tar_src_path.join("out")); @@ -361,14 +343,6 @@ fn test_testcase_dynamic_with_rewriter() { ("libc.so.6", "/lib/x86_64-linux-gnu"), ("ld-linux-x86-64.so.2", "/lib64"), ]; - let libs_without_rewrite: [(&str, &str); 0] = []; - // Run - run_dynamic_linked_prog_with_rewriter( - &libs_to_rewrite, - &libs_without_rewrite, - exec_name, - &[], - |_| {}, - ); + run_dynamic_linked_prog_with_rewriter(&libs_to_rewrite, exec_name, &[], |_| {}); } diff --git a/litebox_runner_linux_userland/build.rs b/litebox_runner_linux_userland/build.rs deleted file mode 100644 index f189226e4..000000000 --- a/litebox_runner_linux_userland/build.rs +++ /dev/null @@ -1,6 +0,0 @@ -// Copyright (c) Microsoft Corporation. -// Licensed under the MIT license. - -fn main() { - // rtld_audit has been removed; nothing to build. -} diff --git a/litebox_runner_linux_userland/src/lib.rs b/litebox_runner_linux_userland/src/lib.rs index b2ee00a76..edc670bae 100644 --- a/litebox_runner_linux_userland/src/lib.rs +++ b/litebox_runner_linux_userland/src/lib.rs @@ -225,7 +225,7 @@ pub fn run(cli_args: CliArgs) -> Result<()> { } litebox_platform_multiplex::set_platform(platform); - let mut shim_builder = litebox_shim_linux::LinuxShimBuilder::new(); + let shim_builder = litebox_shim_linux::LinuxShimBuilder::new(); let litebox = shim_builder.litebox(); let initial_file_system = { let mut in_mem = litebox::fs::in_mem::FileSystem::new(litebox); @@ -331,7 +331,6 @@ pub fn run(cli_args: CliArgs) -> Result<()> { let initial_file_system = std::sync::Arc::new(initial_file_system); - shim_builder.set_load_filter(fixup_env); let shim = shim_builder.build(); let shutdown = std::sync::Arc::new(core::sync::atomic::AtomicBool::new(false)); @@ -451,7 +450,3 @@ fn pin_thread_to_cpu(cpu: usize) { } } } - -fn fixup_env(_envp: &mut Vec) { - // No-op: rtld_audit has been removed; runtime patching is handled by the shim. -} diff --git a/litebox_runner_linux_userland/tests/run.rs b/litebox_runner_linux_userland/tests/run.rs index 219d27da1..e53303165 100644 --- a/litebox_runner_linux_userland/tests/run.rs +++ b/litebox_runner_linux_userland/tests/run.rs @@ -206,7 +206,7 @@ fn find_c_test_files(dir: &str) -> Vec { files } -// our rtld_audit does not support x86 yet +// Syscall rewriting does not support x86 yet #[cfg(target_arch = "x86_64")] #[test] fn test_dynamic_lib_with_rewriter() { diff --git a/litebox_shim_linux/src/loader/elf.rs b/litebox_shim_linux/src/loader/elf.rs index 6cbebaf98..8935ebbe3 100644 --- a/litebox_shim_linux/src/loader/elf.rs +++ b/litebox_shim_linux/src/loader/elf.rs @@ -11,15 +11,15 @@ use litebox::{ utils::{ReinterpretSignedExt, TruncateExt}, }; use litebox_common_linux::{ - MapFlags, errno::Errno, loader::{ElfParsedFile, ReadAt as _}, + MapFlags, }; use thiserror::Error; use crate::{ - MutPtr, loader::auxv::{AuxKey, AuxVec}, + MutPtr, }; use super::stack::UserStack; @@ -259,7 +259,15 @@ impl<'a, FS: ShimFS> FileAndParsed<'a, FS> { // If the rewriter backend is active (syscall_entry_point != 0) and the // binary lacks a trampoline, patch it on the fly so that both the main // program and the dynamic linker are covered. - let patched_data = if syscall_entry_point != 0 && trampoline_result.is_err() { + // + // Only attempt runtime patching for UnpatchedBinary — other errors + // (BadTrampolineVersion, BadTrampoline, Io) indicate a corrupt or + // incompatible pre-patched binary that should not be re-patched. + let patched_data = if syscall_entry_point != 0 + && matches!( + trampoline_result, + Err(litebox_common_linux::loader::ElfParseError::UnpatchedBinary) + ) { let size: usize = (&mut &file) .size() .map_err(ElfLoaderError::OpenError)? @@ -289,12 +297,39 @@ impl<'a, FS: ShimFS> FileAndParsed<'a, FS> { .map_err(ElfLoaderError::ParseError)?; Some(patched) } - Err(_) => { - // Patching failed (e.g. ET_REL, no .text). Proceed without - // a trampoline — the binary may simply have no syscalls. + Err( + litebox_syscall_rewriter::Error::UnsupportedBunExecutable + | litebox_syscall_rewriter::Error::UnsupportedObjectFile + | litebox_syscall_rewriter::Error::NoTextSectionFound + | litebox_syscall_rewriter::Error::NoSyscallInstructionsFound + | litebox_syscall_rewriter::Error::AlreadyHooked, + ) => { + // These are expected non-fatal cases: + // - BUN: can't be statically patched but the runtime mmap + // hook will patch code segments as they are mapped. + // - Object files / no .text / no syscalls / already hooked: + // nothing to patch. + None + } + Err(e) => { + // Unexpected rewriter failure (parse error, disassembly + // failure, etc.). Proceed without a trampoline — the + // runtime mmap hook may still patch individual segments. + litebox::log_println!( + task.global.platform, + "warning: syscall rewriter failed: {}; \ + falling back to runtime patching", + e + ); None } } + } else if syscall_entry_point != 0 { + // Rewriter is active but trampoline_result is an error other than + // UnpatchedBinary (e.g. BadTrampolineVersion, BadTrampoline, Io). + // Propagate the error rather than silently proceeding. + trampoline_result.map_err(ElfLoaderError::ParseError)?; + None } else { None }; @@ -317,14 +352,13 @@ impl<'a, FS: ShimFS> FileAndParsed<'a, FS> { // this, both paths would map the same region — the second MAP_FIXED // destroys the first mapping. // - // Only suppress when using the ElfFile mapper (which routes through - // do_mmap_file → maybe_patch_exec_segment) AND the loader actually - // has a trampoline to map. When patched_data is None and there's no - // trampoline (e.g. the rewriter declined the binary), the runtime - // fallback must remain enabled. - let has_loader_trampoline = self.patched_data.is_some() || self.parsed.has_trampoline(); - let suppress = has_loader_trampoline && self.patched_data.is_none(); - self.file.task.suppress_elf_runtime_patch.set(suppress); + // When patched_data is Some the PatchedMapper path doesn't go through + // do_mmap_file so the flag is a no-op, but setting it is harmless and + // keeps the logic simple. + self.file + .task + .suppress_elf_runtime_patch + .set(self.patched_data.is_some() || self.parsed.has_trampoline()); let result = if let Some(ref data) = self.patched_data { let mut mapper = PatchedMapper { inner: &mut self.file, diff --git a/litebox_syscall_rewriter/src/lib.rs b/litebox_syscall_rewriter/src/lib.rs index 780d6409f..ca22348bc 100644 --- a/litebox_syscall_rewriter/src/lib.rs +++ b/litebox_syscall_rewriter/src/lib.rs @@ -260,6 +260,11 @@ pub fn hook_syscalls_in_elf( if off + 5 <= buf.len() { buf[off] = 0xE9; // JMP rel32 buf[off + 1..off + 5].copy_from_slice(&rel32.to_le_bytes()); + } else { + return Err(Error::ParseError(format!( + "fork→vfork patch offset {off:#x} + 5 exceeds buffer length {}", + buf.len() + ))); } } @@ -587,8 +592,8 @@ fn hook_syscalls_in_section( trampoline_data.extend_from_slice(&[0xE8, 0x0, 0x0, 0x0, 0x0]); // CALL next instruction trampoline_data.push(0x58); // POP EAX (effectively store IP in EAX) trampoline_data.extend_from_slice(&[0xFF, 0x90]); // CALL [EAX + offset] - // EAX = trampoline_base_addr + (trampoline_data.len() - 3) - // We want: EAX + offset = syscall_entry_addr + // EAX = trampoline_base_addr + (trampoline_data.len() - 3) + // We want: EAX + offset = syscall_entry_addr #[allow(clippy::cast_possible_wrap)] let disp32 = i64::try_from(syscall_entry_addr).unwrap() - i64::try_from(trampoline_base_addr).unwrap() @@ -1104,8 +1109,8 @@ fn hook_syscall_and_after( trampoline_data.extend_from_slice(&[0xE8, 0x0, 0x0, 0x0, 0x0]); // CALL next instruction trampoline_data.push(0x58); // POP EAX (effectively store IP in EAX) trampoline_data.extend_from_slice(&[0xFF, 0x90]); // CALL [EAX + offset] - // EAX = trampoline_base_addr + (trampoline_data.len() - 3) - // We want: EAX + offset = syscall_entry_addr + // EAX = trampoline_base_addr + (trampoline_data.len() - 3) + // We want: EAX + offset = syscall_entry_addr #[allow(clippy::cast_possible_wrap)] let disp32 = i64::try_from(syscall_entry_addr).unwrap() - i64::try_from(trampoline_base_addr).unwrap() @@ -1238,8 +1243,8 @@ fn hook_syscall_before_and_after( trampoline_data.extend_from_slice(&[0xE8, 0x0, 0x0, 0x0, 0x0]); // CALL next instruction trampoline_data.push(0x58); // POP EAX (effectively store IP in EAX) trampoline_data.extend_from_slice(&[0xFF, 0x90]); // CALL [EAX + offset] - // EAX = trampoline_base_addr + (trampoline_data.len() - 3) - // We want: EAX + offset = syscall_entry_addr + // EAX = trampoline_base_addr + (trampoline_data.len() - 3) + // We want: EAX + offset = syscall_entry_addr #[allow(clippy::cast_possible_wrap)] let disp32 = i64::try_from(syscall_entry_addr).unwrap() - i64::try_from(trampoline_base_addr).unwrap() @@ -1282,7 +1287,7 @@ fn hook_syscall_before_and_after( #[cfg(test)] mod tests { - use super::{BUN_FOOTER_MARKER, has_bun_footer_marker, patch_code_segment}; + use super::{has_bun_footer_marker, patch_code_segment, BUN_FOOTER_MARKER}; #[test] fn detects_bun_footer_marker_near_end() { From bfa1587ea0ec28f456acb2565162e4ce59e0b424 Mon Sep 17 00:00:00 2001 From: Weidong Cui Date: Thu, 2 Apr 2026 20:07:04 -0700 Subject: [PATCH 08/24] Clean up rewriter: remove unit tests, revert bun footer to suffix check, add x86_64 comment, add post-syscall RIP-relative comment, fix formatting --- litebox_shim_linux/src/loader/elf.rs | 4 +- litebox_shim_linux/src/syscalls/mm.rs | 2 + litebox_syscall_rewriter/src/lib.rs | 115 ++++---------------------- 3 files changed, 18 insertions(+), 103 deletions(-) diff --git a/litebox_shim_linux/src/loader/elf.rs b/litebox_shim_linux/src/loader/elf.rs index 8935ebbe3..b9a345483 100644 --- a/litebox_shim_linux/src/loader/elf.rs +++ b/litebox_shim_linux/src/loader/elf.rs @@ -11,15 +11,15 @@ use litebox::{ utils::{ReinterpretSignedExt, TruncateExt}, }; use litebox_common_linux::{ + MapFlags, errno::Errno, loader::{ElfParsedFile, ReadAt as _}, - MapFlags, }; use thiserror::Error; use crate::{ - loader::auxv::{AuxKey, AuxVec}, MutPtr, + loader::auxv::{AuxKey, AuxVec}, }; use super::stack::UserStack; diff --git a/litebox_shim_linux/src/syscalls/mm.rs b/litebox_shim_linux/src/syscalls/mm.rs index 453039cba..d20583b3f 100644 --- a/litebox_shim_linux/src/syscalls/mm.rs +++ b/litebox_shim_linux/src/syscalls/mm.rs @@ -424,6 +424,8 @@ impl Task { /// Reads the ELF header to determine the trampoline address (page-aligned /// end of the highest PT_LOAD segment) and checks the file tail for the /// trampoline magic to determine if it's pre-patched. + /// + /// x86_64 only: assumes 64-bit ELF layout and program header offsets. #[allow(clippy::cast_possible_truncation)] fn init_elf_patch_state(&self, fd: i32, base_addr: usize) { // Quick check: skip if already initialized. diff --git a/litebox_syscall_rewriter/src/lib.rs b/litebox_syscall_rewriter/src/lib.rs index ca22348bc..dcf703377 100644 --- a/litebox_syscall_rewriter/src/lib.rs +++ b/litebox_syscall_rewriter/src/lib.rs @@ -592,8 +592,8 @@ fn hook_syscalls_in_section( trampoline_data.extend_from_slice(&[0xE8, 0x0, 0x0, 0x0, 0x0]); // CALL next instruction trampoline_data.push(0x58); // POP EAX (effectively store IP in EAX) trampoline_data.extend_from_slice(&[0xFF, 0x90]); // CALL [EAX + offset] - // EAX = trampoline_base_addr + (trampoline_data.len() - 3) - // We want: EAX + offset = syscall_entry_addr + // EAX = trampoline_base_addr + (trampoline_data.len() - 3) + // We want: EAX + offset = syscall_entry_addr #[allow(clippy::cast_possible_wrap)] let disp32 = i64::try_from(syscall_entry_addr).unwrap() - i64::try_from(trampoline_base_addr).unwrap() @@ -763,10 +763,8 @@ fn find_fork_vfork_patch( /// Check if the input binary has the Bun footer marker near the end. fn has_bun_footer_marker(input_binary: &[u8]) -> bool { - let window_len = input_binary.len().min(256); - input_binary[input_binary.len().saturating_sub(window_len)..] - .windows(BUN_FOOTER_MARKER.len()) - .any(|window| window == BUN_FOOTER_MARKER) + input_binary.len() >= BUN_FOOTER_MARKER.len() + && input_binary[input_binary.len() - BUN_FOOTER_MARKER.len()..] == *BUN_FOOTER_MARKER } /// Replace an unpatchable syscall instruction with `UD2` (`0F 0B`) so that @@ -1052,6 +1050,12 @@ fn hook_syscall_and_after( } let replace_end = replace_end.unwrap(); + // This function copies post-syscall instructions to the trampoline as raw + // bytes (no re-encoding). That only works for position-independent + // instructions. If any post-syscall instruction has a RIP-relative memory + // operand, the raw bytes would reference the wrong address from the + // trampoline's location, so fall back to hook_syscall_before_and_after + // which re-encodes both sides with corrected displacements. let copied_postsyscall_insts_have_ip_rel_mem = arch == Arch::X86_64 && instruction_slice_has_ip_rel_memory_operand( instructions @@ -1109,8 +1113,8 @@ fn hook_syscall_and_after( trampoline_data.extend_from_slice(&[0xE8, 0x0, 0x0, 0x0, 0x0]); // CALL next instruction trampoline_data.push(0x58); // POP EAX (effectively store IP in EAX) trampoline_data.extend_from_slice(&[0xFF, 0x90]); // CALL [EAX + offset] - // EAX = trampoline_base_addr + (trampoline_data.len() - 3) - // We want: EAX + offset = syscall_entry_addr + // EAX = trampoline_base_addr + (trampoline_data.len() - 3) + // We want: EAX + offset = syscall_entry_addr #[allow(clippy::cast_possible_wrap)] let disp32 = i64::try_from(syscall_entry_addr).unwrap() - i64::try_from(trampoline_base_addr).unwrap() @@ -1243,8 +1247,8 @@ fn hook_syscall_before_and_after( trampoline_data.extend_from_slice(&[0xE8, 0x0, 0x0, 0x0, 0x0]); // CALL next instruction trampoline_data.push(0x58); // POP EAX (effectively store IP in EAX) trampoline_data.extend_from_slice(&[0xFF, 0x90]); // CALL [EAX + offset] - // EAX = trampoline_base_addr + (trampoline_data.len() - 3) - // We want: EAX + offset = syscall_entry_addr + // EAX = trampoline_base_addr + (trampoline_data.len() - 3) + // We want: EAX + offset = syscall_entry_addr #[allow(clippy::cast_possible_wrap)] let disp32 = i64::try_from(syscall_entry_addr).unwrap() - i64::try_from(trampoline_base_addr).unwrap() @@ -1284,94 +1288,3 @@ fn hook_syscall_before_and_after( Ok(()) } - -#[cfg(test)] -mod tests { - use super::{has_bun_footer_marker, patch_code_segment, BUN_FOOTER_MARKER}; - - #[test] - fn detects_bun_footer_marker_near_end() { - let mut bytes = vec![0u8; 512]; - let offset = bytes.len() - BUN_FOOTER_MARKER.len() - 8; - bytes[offset..offset + BUN_FOOTER_MARKER.len()].copy_from_slice(BUN_FOOTER_MARKER); - assert!(has_bun_footer_marker(&bytes)); - } - - #[test] - fn ignores_missing_bun_footer_marker() { - let bytes = vec![0u8; 512]; - assert!(!has_bun_footer_marker(&bytes)); - } - - #[test] - fn patch_code_segment_relocates_rip_relative_presyscall_to_trampoline() { - let mut code = vec![ - 0x48, 0x8D, 0x35, 0x10, 0x00, 0x00, 0x00, // lea rsi, [rip + 0x10] @ 0x1000 - 0x0F, 0x05, // syscall @ 0x1007 - 0x31, 0xC0, // xor eax, eax - 0xBA, 0x01, 0x00, 0x00, 0x00, // mov edx, 1 - ]; - - let trampoline = patch_code_segment(&mut code, 0x1000, 0x8000, 0x9000, &mut Vec::new()) - .expect("patch_code_segment should succeed"); - - assert!(!trampoline.is_empty()); - // The lea + syscall region (9 bytes starting at 0x1000) should now be a - // JMP to the trampoline followed by NOPs. - assert_eq!(code[0], 0xE9, "replace region should start with JMP rel32"); - // The trampoline should contain the re-encoded lea with an adjusted - // RIP-relative displacement targeting the same absolute address. - // Original: lea targets 0x1007 + 0x10 = 0x1017. - // Re-encoded at 0x8000: displacement = 0x1017 - (0x8000 + 7) = -0x6FF0 = 0xFFFF9010 - #[allow(clippy::cast_possible_truncation)] - let expected_disp: i32 = 0x1017_i64.wrapping_sub(0x8000 + 7) as i32; - assert_eq!( - &trampoline[3..7], - &expected_disp.to_le_bytes(), - "re-encoded lea displacement should target the original address" - ); - } - - #[test] - fn patch_code_segment_handles_rip_relative_on_both_sides_of_syscall() { - let mut code = vec![ - 0x48, 0x8D, 0x35, 0x10, 0x00, 0x00, 0x00, // lea rsi, [rip + 0x10] @ 0x1000 - 0x0F, 0x05, // syscall @ 0x1007 - 0x48, 0x8D, 0x3D, 0x10, 0x00, 0x00, 0x00, // lea rdi, [rip + 0x10] - ]; - - let mut skipped = Vec::new(); - let stubs = patch_code_segment(&mut code, 0x1000, 0x8000, 0x9000, &mut skipped) - .expect("patch_code_segment should succeed"); - // The pre-syscall lea is re-encoded in the trampoline; the - // post-syscall lea stays in place (not overwritten). - assert!(!stubs.is_empty(), "should be patched via re-encoding"); - assert_eq!(code[0], 0xE9, "replace region should start with JMP"); - assert!(skipped.is_empty(), "nothing should be skipped"); - } - - #[test] - fn patch_code_segment_patches_all_syscalls_including_rip_relative() { - let mut code = vec![ - // First syscall: patchable (3 nops before = 5 bytes total with syscall) - 0x90, 0x90, 0x90, // nop; nop; nop - 0x0F, 0x05, // syscall @ offset 3 - 0xC3, // ret - // Second syscall: RIP-relative before, now patchable via re-encoding - 0x48, 0x8D, 0x35, 0x10, 0x00, 0x00, 0x00, // lea rsi, [rip+0x10] - 0x0F, 0x05, // syscall @ offset 13 - 0x48, 0x8D, 0x3D, 0x10, 0x00, 0x00, 0x00, // lea rdi, [rip+0x10] - ]; - - let mut skipped = Vec::new(); - let stubs = patch_code_segment(&mut code, 0x1000, 0x8000, 0x9000, &mut skipped).unwrap(); - - assert!(!stubs.is_empty(), "both syscalls should be patched"); - assert_eq!(code[0], 0xE9, "first syscall site should be a JMP"); - assert_eq!( - code[6], 0xE9, - "second syscall site (lea start) should be a JMP" - ); - assert!(skipped.is_empty(), "nothing should be skipped"); - } -} From 8bd36f4167b4c03b5d58b1f9c85dfbe3a454e9cc Mon Sep 17 00:00:00 2001 From: Weidong Cui Date: Thu, 2 Apr 2026 20:54:54 -0700 Subject: [PATCH 09/24] Fix signal handler to check both callback entry points, add short-read guards, remove unused ElfPatchState fields --- litebox_platform_linux_userland/src/lib.rs | 3 ++- litebox_shim_linux/src/syscalls/mm.rs | 25 ++++++++-------------- 2 files changed, 11 insertions(+), 17 deletions(-) diff --git a/litebox_platform_linux_userland/src/lib.rs b/litebox_platform_linux_userland/src/lib.rs index 777bfeada..871c4ccdd 100644 --- a/litebox_platform_linux_userland/src/lib.rs +++ b/litebox_platform_linux_userland/src/lib.rs @@ -2751,7 +2751,8 @@ unsafe fn interrupt_signal_handler( #[cfg(target_arch = "x86")] let is_at_syscall_callback = ip == syscall_callback as *const () as usize; #[cfg(target_arch = "x86_64")] - let is_at_syscall_callback = ip == syscall_callback_redzone as *const () as usize; + let is_at_syscall_callback = ip == syscall_callback_redzone as *const () as usize + || ip == syscall_callback as *const () as usize; if is_at_syscall_callback { // No need to clear `in_guest` or set interrupt; the syscall handler will // clear `in_guest` and call into the shim. diff --git a/litebox_shim_linux/src/syscalls/mm.rs b/litebox_shim_linux/src/syscalls/mm.rs index d20583b3f..ab6069b24 100644 --- a/litebox_shim_linux/src/syscalls/mm.rs +++ b/litebox_shim_linux/src/syscalls/mm.rs @@ -23,15 +23,11 @@ use crate::Task; /// Tracks base address and trampoline write cursor for each ELF file that /// has executable segments mapped via `do_mmap_file()`. pub(crate) struct ElfPatchState { - /// Base virtual address of the ELF (recorded from first mmap at offset 0). - pub _base_addr: usize, /// Whether this file is already pre-patched (trampoline magic found at file tail). pub pre_patched: bool, /// For pre-patched binaries: file offset and size of the trampoline data. pub trampoline_file_offset: u64, pub trampoline_file_size: usize, - /// For pre-patched binaries: virtual address offset of the trampoline in the ELF. - pub _trampoline_vaddr: usize, /// Start address of the trampoline region (runtime). pub trampoline_addr: usize, /// Current write position within the trampoline (byte offset from `trampoline_addr`). @@ -43,9 +39,6 @@ pub(crate) struct ElfPatchState { /// Whether any runtime-generated stubs were successfully linked from code /// in this fd to the trampoline. pub runtime_patches_committed: bool, - /// File path of the ELF (from the fd path table, if available). - #[allow(dead_code)] - pub file_path: Option, } /// Per-process collection of ELF patching state, keyed by fd number. @@ -435,8 +428,9 @@ impl Task { // Read the ELF header (first 64 bytes covers both 32-bit and 64-bit). let mut ehdr_buf = [0u8; 64]; - if self.sys_read(fd, &mut ehdr_buf, Some(0)).is_err() { - return; // Not readable, skip + match self.sys_read(fd, &mut ehdr_buf, Some(0)) { + Ok(n) if n == ehdr_buf.len() => {} + _ => return, // Not readable or short read, skip } // Verify ELF magic @@ -456,8 +450,9 @@ impl Task { return; // Sanity check } let mut phdrs_buf = alloc::vec![0u8; phdrs_size]; - if self.sys_read(fd, &mut phdrs_buf, Some(e_phoff)).is_err() { - return; + match self.sys_read(fd, &mut phdrs_buf, Some(e_phoff)) { + Ok(n) if n == phdrs_buf.len() => {} + _ => return, } // Find highest PT_LOAD end (p_vaddr + p_memsz) @@ -509,17 +504,14 @@ impl Task { // Insert under lock (re-check for races). let mut cache = self.global.elf_patch_cache.lock(); cache.entry(fd).or_insert(ElfPatchState { - _base_addr: base_addr, pre_patched, trampoline_file_offset: tramp_file_offset, trampoline_file_size: tramp_file_size as usize, - _trampoline_vaddr: tramp_vaddr as usize, trampoline_addr: trampoline_vaddr, trampoline_cursor: 0, trampoline_mapped: false, trampoline_mapped_len: 0, runtime_patches_committed: false, - file_path: None, }); } @@ -534,8 +526,9 @@ impl Task { return (false, 0, 0, 0); } let mut tail = [0u8; 32]; - if self.sys_read(fd, &mut tail, Some(file_size - 32)).is_err() { - return (false, 0, 0, 0); + match self.sys_read(fd, &mut tail, Some(file_size - 32)) { + Ok(n) if n == tail.len() => {} + _ => return (false, 0, 0, 0), } if &tail[0..8] != litebox_syscall_rewriter::TRAMPOLINE_MAGIC { return (false, 0, 0, 0); From 35d845dfd53140cc93f07b9d0d1fbc9e348339e3 Mon Sep 17 00:00:00 2001 From: Weidong Cui Date: Thu, 2 Apr 2026 21:19:26 -0700 Subject: [PATCH 10/24] Fix integration tests: replace OUT_DIR with CARGO_TARGET_TMPDIR Deleting litebox_runner_linux_userland/build.rs (rtld_audit removal) also removed Cargo's OUT_DIR env var from integration tests. Replace the three call sites with env!("CARGO_TARGET_TMPDIR"), a compile-time macro available since Rust 1.68 that requires no build.rs. --- litebox_runner_linux_userland/tests/common/mod.rs | 2 +- litebox_runner_linux_userland/tests/loader.rs | 2 +- litebox_runner_linux_userland/tests/run.rs | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/litebox_runner_linux_userland/tests/common/mod.rs b/litebox_runner_linux_userland/tests/common/mod.rs index e9b6a9810..3f761f64a 100644 --- a/litebox_runner_linux_userland/tests/common/mod.rs +++ b/litebox_runner_linux_userland/tests/common/mod.rs @@ -80,7 +80,7 @@ fn find_rewriter_source_files() -> Vec { /// Compile C code into an executable with caching pub fn compile(src_path: &str, unique_name: &str, exec_or_lib: bool, nolibc: bool) -> PathBuf { - let dir_path = std::env::var("OUT_DIR").unwrap(); + let dir_path = env!("CARGO_TARGET_TMPDIR").to_string(); let path = std::path::Path::new(dir_path.as_str()).join(unique_name); let output = path.to_str().unwrap(); diff --git a/litebox_runner_linux_userland/tests/loader.rs b/litebox_runner_linux_userland/tests/loader.rs index 9850ba843..2ff79f97c 100644 --- a/litebox_runner_linux_userland/tests/loader.rs +++ b/litebox_runner_linux_userland/tests/loader.rs @@ -234,7 +234,7 @@ void _start() { #[test] fn test_syscall_rewriter() { - let dir_path = std::env::var("OUT_DIR").unwrap(); + let dir_path = env!("CARGO_TARGET_TMPDIR").to_string(); let src_path = std::path::Path::new(dir_path.as_str()).join("hello_exec_nolibc.c"); std::fs::write(src_path.clone(), HELLO_WORLD_NOLIBC).unwrap(); let path = std::path::Path::new(dir_path.as_str()).join("hello_exec_nolibc"); diff --git a/litebox_runner_linux_userland/tests/run.rs b/litebox_runner_linux_userland/tests/run.rs index e53303165..3da964a4f 100644 --- a/litebox_runner_linux_userland/tests/run.rs +++ b/litebox_runner_linux_userland/tests/run.rs @@ -32,7 +32,7 @@ impl Runner { Backend::Rewriter => "rewriter", Backend::Seccomp => "seccomp", }; - let dir_path = PathBuf::from(std::env::var_os("OUT_DIR").unwrap()); + let dir_path = PathBuf::from(env!("CARGO_TARGET_TMPDIR")); let path = match backend { Backend::Seccomp => target.to_path_buf(), Backend::Rewriter => { From e8561c80387ca46505dd2a5ce2a1b32a8a38a465 Mon Sep 17 00:00:00 2001 From: Weidong Cui Date: Thu, 2 Apr 2026 22:23:19 -0700 Subject: [PATCH 11/24] Cross-platform packager: remove Linux-only gates, add OCI symlink resolution, rewrite-include flag Make litebox_packager compile and work on non-Linux hosts (primarily Windows) by: - Remove #![cfg(target_os = "linux")] crate-level gate and the dual-main pattern; gate only the host-mode code path behind cfg(target_os) - Add file_mode() helper with unix/non-unix variants to replace MetadataExt::mode() calls - Extract run_host_mode() behind #[cfg(target_os = "linux")] - Track OCI layer symlinks in-memory instead of creating OS symlinks (Windows requires special privileges for symlinks); materialize them after all layers are extracted via resolve_symlink_in_rootfs() - Add is_unix_absolute(), strip_unix_root(), normalize_path() helpers for cross-platform path handling - Force linux/amd64 platform when pulling OCI images - Normalize path separators to Unix-style in tar entries - Add --rewrite-include CLI flag for dlopen'd libraries - Change Bun executable detection from warning to hard error - Switch tar headers from GNU to UStar format --- litebox_packager/src/lib.rs | 165 +++++++++++++++--- litebox_packager/src/main.rs | 10 -- litebox_packager/src/oci.rs | 326 +++++++++++++++++++++++++++++++---- 3 files changed, 432 insertions(+), 69 deletions(-) diff --git a/litebox_packager/src/lib.rs b/litebox_packager/src/lib.rs index 95cf3bf20..f75e6fd54 100644 --- a/litebox_packager/src/lib.rs +++ b/litebox_packager/src/lib.rs @@ -1,21 +1,37 @@ // Copyright (c) Microsoft Corporation. // Licensed under the MIT license. -// Restrict this crate to only work on Linux, as it relies on `ldd` for -// dependency discovery and other Linux-specific functionality. -#![cfg(target_os = "linux")] - #[cfg(target_arch = "x86_64")] pub mod oci; use anyhow::{Context, bail}; use clap::Parser; use rayon::prelude::*; -use std::collections::{BTreeMap, BTreeSet}; -use std::os::unix::fs::MetadataExt as _; +#[cfg(target_os = "linux")] +use std::collections::BTreeMap; +use std::collections::BTreeSet; use std::path::{Path, PathBuf}; use tar::{Builder, Header}; +/// Return Unix permission mode bits for a file. +/// +/// On Unix this returns the real mode from metadata. On other platforms it +/// returns 0o755 for files with a read-only attribute cleared, 0o644 otherwise. +#[cfg(unix)] +fn file_mode(metadata: &std::fs::Metadata) -> u32 { + use std::os::unix::fs::MetadataExt as _; + metadata.mode() +} + +#[cfg(not(unix))] +fn file_mode(metadata: &std::fs::Metadata) -> u32 { + if metadata.permissions().readonly() { + 0o644 + } else { + 0o755 + } +} + /// Package Linux ELF programs for execution under LiteBox. /// /// Discovers shared library dependencies, rewrites all ELF files using the @@ -54,6 +70,14 @@ pub struct CliArgs { #[arg(long = "include", value_name = "HOST_PATH:TAR_PATH")] pub include: Vec, + /// Include extra ELF files in the tar **with** syscall rewriting. + /// Use this for shared libraries that are loaded at runtime via `dlopen` + /// (e.g., NSS modules like `libnss_dns.so.2`) and therefore not discovered + /// by the automatic dependency scan. + /// Format: HOST_PATH:TAR_PATH (same as `--include`). + #[arg(long = "rewrite-include", value_name = "HOST_PATH:TAR_PATH")] + pub rewrite_include: Vec, + /// Skip rewriting specific files (by their absolute path on the host). #[arg(long = "no-rewrite", value_name = "PATH")] pub no_rewrite: Vec, @@ -99,7 +123,24 @@ pub fn run(args: CliArgs) -> anyhow::Result<()> { } } - // --- Phase 1: Validate inputs --- + // Host mode (local ELF files + ldd dependency discovery) is Linux-only. + #[cfg(target_os = "linux")] + { + run_host_mode(args) + } + + #[cfg(not(target_os = "linux"))] + { + bail!( + "Host mode (local ELF files) is only supported on Linux. \ + Use --oci-image to pull a container image instead." + ); + } +} + +/// Host mode: package local ELF files with ldd-based dependency discovery. +#[cfg(target_os = "linux")] +fn run_host_mode(args: CliArgs) -> anyhow::Result<()> { let input_files: Vec = args .input_files .iter() @@ -116,6 +157,24 @@ pub fn run(args: CliArgs) -> anyhow::Result<()> { }) .collect::>>()?; + let includes: Vec = args + .include + .iter() + .map(|s| parse_include(s)) + .collect::>>()?; + + let rewrite_includes: Vec = args + .rewrite_include + .iter() + .map(|s| parse_include(s)) + .collect::>>()?; + + for inc in includes.iter().chain(&rewrite_includes) { + if !inc.host_path.exists() { + bail!("included file does not exist: {}", inc.host_path.display()); + } + } + let no_rewrite: BTreeSet = args .no_rewrite .iter() @@ -151,12 +210,13 @@ pub fn run(args: CliArgs) -> anyhow::Result<()> { let par_results: Vec>> = file_map_vec .into_par_iter() - .map(|(real_path, tar_paths)| { + .map(|(real_path, tar_paths): (&PathBuf, &Vec)| { let data = std::fs::read(real_path) .with_context(|| format!("failed to read {}", real_path.display()))?; - let mode = std::fs::metadata(real_path) - .with_context(|| format!("failed to stat {}", real_path.display()))? - .mode(); + let mode = file_mode( + &std::fs::metadata(real_path) + .with_context(|| format!("failed to stat {}", real_path.display()))?, + ); let rewritten = if no_rewrite.contains(real_path) { if verbose { @@ -208,7 +268,7 @@ fn run_oci(image_ref: &str, args: &CliArgs) -> anyhow::Result<()> { // --- Phase 2: Scan rootfs for files --- eprintln!("Scanning rootfs..."); - let file_map = oci::scan_rootfs(&extracted.rootfs_path, args.verbose)?; + let file_map = oci::scan_rootfs(&extracted.rootfs_path, &extracted.symlinks, args.verbose)?; let no_rewrite: BTreeSet = args .no_rewrite @@ -309,11 +369,11 @@ fn run_oci(image_ref: &str, args: &CliArgs) -> anyhow::Result<()> { } // --------------------------------------------------------------------------- -// Shared finalization: includes, rtld audit injection, tar build, size report +// Shared finalization: includes, tar build, size report // --------------------------------------------------------------------------- -/// Append `--include` files, inject the rtld audit library, build the output -/// tar, and print a size summary. +/// Append `--include` and `--rewrite-include` files, build the output tar, +/// and print a size summary. /// /// Both host mode and OCI mode call this after producing their rewritten /// `TarEntry` list. @@ -342,7 +402,7 @@ fn finalize_tar( let data = std::fs::read(&inc.host_path) .with_context(|| format!("failed to read included file {}", inc.host_path.display()))?; let mode = std::fs::metadata(&inc.host_path) - .map(|m| m.mode()) + .map(|m| file_mode(&m)) .unwrap_or(0o644); if args.verbose { eprintln!( @@ -358,6 +418,40 @@ fn finalize_tar( }); } + // Include extra ELF files **with** rewriting (for dlopen'd libraries). + let rewrite_includes: Vec = args + .rewrite_include + .iter() + .map(|s| parse_include(s)) + .collect::>>()?; + + for inc in &rewrite_includes { + if !added_tar_paths.insert(inc.tar_path.clone()) { + bail!( + "duplicate tar path from --rewrite-include: '{}' (already present)", + inc.tar_path + ); + } + let data = std::fs::read(&inc.host_path) + .with_context(|| format!("failed to read {}", inc.host_path.display()))?; + let mode = std::fs::metadata(&inc.host_path) + .map(|m| file_mode(&m)) + .unwrap_or(0o755); + let rewritten = rewrite_elf(&data, &inc.host_path, args.verbose)?; + if args.verbose { + eprintln!( + " rewrite-including {} as {}", + inc.host_path.display(), + inc.tar_path + ); + } + tar_entries.push(TarEntry { + tar_path: inc.tar_path.clone(), + data: rewritten, + mode, + }); + } + // Build tar. eprintln!("Creating {}...", args.output.display()); build_tar(&tar_entries, &args.output)?; @@ -378,20 +472,23 @@ fn finalize_tar( } // --------------------------------------------------------------------------- -// Dependency discovery (via ldd) +// Dependency discovery (via ldd) — Linux only // --------------------------------------------------------------------------- +#[cfg(target_os = "linux")] struct ResolvedDep { ldd_path: PathBuf, real_path: PathBuf, } +#[cfg(target_os = "linux")] struct DepDiscoveryResult { resolved: Vec, missing: Vec, } /// Run `ldd` on the given ELF and return resolved dependencies. +#[cfg(target_os = "linux")] fn find_dependencies(elf_path: &Path, verbose: bool) -> anyhow::Result { let output = std::process::Command::new("ldd") .arg(elf_path) @@ -483,6 +580,7 @@ fn find_dependencies(elf_path: &Path, verbose: bool) -> anyhow::Result anyhow::Result bail!( + "{} is a Bun-packaged executable and cannot be packaged as-is: \ + tar-loaded programs must already contain LiteBox syscall trampolines", + path.display() + ), Err(litebox_syscall_rewriter::Error::NoTextSectionFound) => { if verbose { eprintln!( @@ -602,15 +705,6 @@ fn rewrite_elf(data: &[u8], path: &Path, verbose: bool) -> anyhow::Result { - if verbose { - eprintln!( - " warning: {} is a Bun-packaged executable, using as-is", - path.display() - ); - } - Ok(data.to_vec()) - } Err(e) => Err(e).with_context(|| format!("failed to rewrite {}", path.display())), } } @@ -631,7 +725,7 @@ fn build_tar(entries: &[TarEntry], output: &Path) -> anyhow::Result<()> { let mut builder = Builder::new(file); for entry in entries { - let mut header = Header::new_gnu(); + let mut header = Header::new_ustar(); header.set_size(entry.data.len() as u64); // Mask to permission bits only (rwxrwxrwx). The full st_mode from // MetadataExt::mode() includes file type bits (e.g., 0o100755) which @@ -649,3 +743,20 @@ fn build_tar(entries: &[TarEntry], output: &Path) -> anyhow::Result<()> { builder.finish().context("failed to finalize tar archive")?; Ok(()) } + +#[cfg(test)] +mod tests { + use super::rewrite_elf; + use std::path::Path; + + #[test] + fn rewrite_elf_rejects_bun_packaged_executables() { + let mut bun_binary = b"\x7fELF".to_vec(); + bun_binary.extend_from_slice(b"\n---- Bun! ----\n"); + + let error = rewrite_elf(&bun_binary, Path::new("/tmp/claude"), false) + .expect_err("bun-packaged executable should not be packaged as-is"); + + assert!(error.to_string().contains("Bun-packaged executable")); + } +} diff --git a/litebox_packager/src/main.rs b/litebox_packager/src/main.rs index 2acb1167d..01987d6e8 100644 --- a/litebox_packager/src/main.rs +++ b/litebox_packager/src/main.rs @@ -1,18 +1,8 @@ // Copyright (c) Microsoft Corporation. // Licensed under the MIT license. -// Restrict this crate to only work on Linux, as it relies on `ldd` for -// dependency discovery and other Linux-specific functionality. - -#[cfg(target_os = "linux")] fn main() -> anyhow::Result<()> { use clap::Parser as _; use litebox_packager::CliArgs; litebox_packager::run(CliArgs::parse()) } - -#[cfg(not(target_os = "linux"))] -fn main() { - eprintln!("This program is only supported on Linux"); - std::process::exit(1); -} diff --git a/litebox_packager/src/oci.rs b/litebox_packager/src/oci.rs index adb951833..e14aecfb3 100644 --- a/litebox_packager/src/oci.rs +++ b/litebox_packager/src/oci.rs @@ -7,9 +7,8 @@ //! extracts its filesystem layers into a temporary rootfs directory, then //! walks the rootfs to discover all ELF files for syscall rewriting. -use std::collections::{BTreeMap, HashSet}; +use std::collections::{BTreeMap, HashMap, HashSet}; use std::io::Read; -use std::os::unix::fs::PermissionsExt as _; use std::path::{Path, PathBuf}; use anyhow::Context; @@ -38,6 +37,9 @@ pub struct ExtractedImage { pub config: ImageConfig, /// Raw OCI image config JSON blob (the full config descriptor data). pub config_json: Vec, + /// Symlinks collected during layer extraction (used for cross-platform + /// resolution since OS symlinks may not work on all platforms). + pub symlinks: Vec, } /// Result of scanning an extracted rootfs for files to package. @@ -96,6 +98,17 @@ pub fn pull_and_extract(image_ref: &str, verbose: bool) -> anyhow::Result anyhow::Result = Vec::new(); for (i, layer) in image_data.layers.iter().enumerate() { if verbose { eprintln!( @@ -143,10 +157,17 @@ pub fn pull_and_extract(image_ref: &str, verbose: bool) -> anyhow::Result anyhow::Result String { /// Extract a single OCI layer (tar or tar+gzip) into the rootfs directory. /// /// Handles OCI whiteout files (`.wh.*` prefixed entries) which indicate -/// files deleted in upper layers. -fn extract_layer(data: &[u8], media_type: &str, rootfs: &Path) -> anyhow::Result<()> { +/// files deleted in upper layers. Symlinks are collected into `symlinks` for +/// cross-platform resolution after all layers are extracted. +fn extract_layer( + data: &[u8], + media_type: &str, + rootfs: &Path, + symlinks: &mut Vec, +) -> anyhow::Result<()> { // Determine if the layer is gzipped let is_gzip = media_type.contains("gzip") || is_gzip_data(data); if is_gzip { let decoder = flate2::read::GzDecoder::new(data); - extract_tar(decoder, rootfs) + extract_tar(decoder, rootfs, symlinks) } else { - extract_tar(data, rootfs) + extract_tar(data, rootfs, symlinks) } } @@ -306,11 +334,26 @@ struct DeferredHardLink { link_source: PathBuf, } +/// Tracked symlink from a container image layer. +pub struct DeferredSymlink { + /// Relative path inside the rootfs (e.g., `usr/lib64/ld-linux-x86-64.so.2`). + rel_path: PathBuf, + /// Symlink target as stored in the tar (Unix-style, may be relative or absolute). + link_target: PathBuf, +} + /// Extract a tar archive into the rootfs, handling OCI whiteout files. /// -/// Hard links whose targets appear later in the archive are collected during -/// the first pass and resolved after all regular entries have been extracted. -fn extract_tar(reader: R, rootfs: &Path) -> anyhow::Result<()> { +/// Symlinks are NOT created as OS symlinks. Instead they are tracked in +/// `symlinks` so the caller can resolve them cross-platform after all layers +/// are extracted. Hard links whose targets appear later in the archive are +/// collected during the first pass and resolved after all regular entries +/// have been extracted. +fn extract_tar( + reader: R, + rootfs: &Path, + symlinks: &mut Vec, +) -> anyhow::Result<()> { let mut archive = tar::Archive::new(reader); archive.set_preserve_permissions(true); archive.set_unpack_xattrs(true); @@ -364,11 +407,12 @@ fn extract_tar(reader: R, rootfs: &Path) -> anyhow::Result<()> { std::fs::create_dir_all(parent)?; } + let entry_type = entry.header().entry_type(); + // Handle hard links: copy the link target instead of creating an OS // hard link. The tar crate's unpack() tries std::fs::hard_link which // can fail if the target hasn't been extracted yet (ordering issue), // and the litebox filesystem doesn't support hard links anyway. - let entry_type = entry.header().entry_type(); if entry_type == tar::EntryType::Link { let link_name = entry .link_name()? @@ -393,7 +437,26 @@ fn extract_tar(reader: R, rootfs: &Path) -> anyhow::Result<()> { continue; } - // Normal file/directory/symlink: use the standard unpack + // Track symlinks in memory instead of creating OS symlinks. + // OS symlinks on Windows require special privileges and don't handle + // Unix-style relative paths reliably, so we resolve them ourselves + // after all layers are extracted. + if entry_type == tar::EntryType::Symlink { + let link_target = entry + .link_name()? + .context("symlink entry has no link name")? + .into_owned(); + // A later layer may override this symlink, so remove any stale + // entry with the same rel_path. + symlinks.retain(|s| s.rel_path != path); + symlinks.push(DeferredSymlink { + rel_path: path.clone(), + link_target, + }); + continue; + } + + // Normal file/directory: use the standard unpack entry .unpack(&target) .with_context(|| format!("failed to unpack entry: {path_str}"))?; @@ -426,20 +489,220 @@ fn extract_tar(reader: R, rootfs: &Path) -> anyhow::Result<()> { Ok(()) } +/// Resolve a symlink target within the rootfs using the symlink map. +/// +/// Handles both absolute targets (e.g., `/lib/x86_64-linux-gnu/ld.so`) and +/// relative targets (e.g., `../lib/x86_64-linux-gnu/ld.so`). Follows symlink +/// chains up to `max_depth` hops. +fn resolve_symlink_in_rootfs( + rel_path: &Path, + rootfs: &Path, + symlink_map: &HashMap, + max_depth: u32, +) -> Option { + if max_depth == 0 { + return None; + } + + // Check if this rel_path is itself a symlink + if let Some(link_target) = symlink_map.get(rel_path) { + // Resolve the target to a new rel_path + let resolved_rel = if is_unix_absolute(link_target) { + strip_unix_root(link_target) + } else { + // Relative target: resolve from parent of the symlink + let parent = rel_path.parent().unwrap_or(Path::new("")); + normalize_path(&parent.join(link_target)) + }; + // Recurse to follow chains + return resolve_symlink_in_rootfs(&resolved_rel, rootfs, symlink_map, max_depth - 1); + } + + // Not a symlink — check if any ancestor is a symlink (e.g., `lib64/foo` where + // `lib64` → `usr/lib64`). + let components: Vec<_> = rel_path.components().collect(); + for i in 1..components.len() { + let prefix: PathBuf = components[..i].iter().collect(); + if let Some(link_target) = symlink_map.get(&prefix) { + let resolved_prefix = if is_unix_absolute(link_target) { + strip_unix_root(link_target) + } else { + let parent = prefix.parent().unwrap_or(Path::new("")); + normalize_path(&parent.join(link_target)) + }; + let suffix: PathBuf = components[i..].iter().collect(); + let new_rel = resolved_prefix.join(suffix); + return resolve_symlink_in_rootfs(&new_rel, rootfs, symlink_map, max_depth - 1); + } + } + + let host_path = rootfs.join(rel_path); + if host_path.exists() { + Some(host_path) + } else { + None + } +} + +/// Check if a path starts with `/` (Unix-style absolute). +/// +/// On Windows, `Path::is_absolute()` requires a drive letter, so Unix-style +/// paths like `/lib/foo` are not detected as absolute. This helper checks +/// the raw string instead. +fn is_unix_absolute(path: &Path) -> bool { + path.as_os_str() + .to_str() + .is_some_and(|s| s.starts_with('/')) + || path.is_absolute() +} + +/// Strip the leading `/` from a Unix-style absolute path to make it +/// rootfs-relative. Returns the path unchanged if it doesn't start with `/`. +fn strip_unix_root(path: &Path) -> PathBuf { + if let Some(stripped) = path.as_os_str().to_str().and_then(|s| s.strip_prefix('/')) { + return PathBuf::from(stripped); + } + path.strip_prefix("/").unwrap_or(path).to_path_buf() +} + +/// Normalize a path by resolving `.` and `..` components without touching the +/// filesystem (no symlink resolution, no existence checks). Strips any root +/// component so the result is always a relative path. +fn normalize_path(path: &Path) -> PathBuf { + let mut result = Vec::new(); + for component in path.components() { + match component { + std::path::Component::ParentDir => { + result.pop(); + } + std::path::Component::CurDir | std::path::Component::RootDir => {} + c => result.push(c), + } + } + result.iter().collect() +} + +/// Materialize all deferred symlinks by copying or creating directories. +/// +/// This is called after all OCI layers have been extracted, so every real file +/// should be on disk. Symlinks are resolved through the in-memory map (handling +/// chains like `lib64` → `usr/lib64` → real dir) and then: +/// - File symlinks: the target file is copied to the symlink location. +/// - Directory symlinks: an empty directory is created (its contents will be +/// expanded by `scan_rootfs`'s dir-symlink logic). +fn materialize_symlinks( + symlinks: &[DeferredSymlink], + rootfs: &Path, + verbose: bool, +) -> anyhow::Result<()> { + // Build a map for O(1) lookup during resolution. + let symlink_map: HashMap = symlinks + .iter() + .map(|s| (s.rel_path.clone(), s.link_target.clone())) + .collect(); + + for sym in symlinks { + let host_path = rootfs.join(&sym.rel_path); + if host_path.exists() { + // A later layer may have replaced the symlink with a real file. + continue; + } + + if let Some(resolved) = resolve_symlink_in_rootfs( + &sym.rel_path, + rootfs, + &symlink_map, + 32, // max chain depth + ) { + if let Some(parent) = host_path.parent() { + std::fs::create_dir_all(parent)?; + } + + if resolved.is_dir() { + // Directory symlink: create directory placeholder. + // scan_rootfs will discover this is a "dir symlink" and expand + // it through the symlink_map. + std::fs::create_dir_all(&host_path)?; + if verbose { + eprintln!( + " [symlink→dir] {} -> {}", + sym.rel_path.display(), + sym.link_target.display() + ); + } + } else if resolved.is_file() { + std::fs::copy(&resolved, &host_path).with_context(|| { + format!( + "failed to materialize symlink {} -> {}", + sym.rel_path.display(), + resolved.display() + ) + })?; + if verbose { + eprintln!( + " [symlink→file] {} -> {}", + sym.rel_path.display(), + sym.link_target.display() + ); + } + } + } else if verbose { + eprintln!( + " [symlink-broken] {} -> {} (unresolvable)", + sym.rel_path.display(), + sym.link_target.display() + ); + } + } + + Ok(()) +} + /// Scan an extracted rootfs directory and build a file map for packaging. /// /// Walks the rootfs directory tree and collects all regular files with their -/// paths and permission bits. Symlinks are resolved within the rootfs context -/// and flattened into regular file copies (the litebox tar RO filesystem does -/// not support symlinks). +/// paths and permission bits. After `materialize_symlinks` has been called, +/// file symlinks are already materialized as regular file copies on disk. /// -/// **Directory symlinks** (e.g., `/lib64` → `/usr/lib64`) are expanded: all -/// files under the target directory are duplicated under the symlink's path -/// prefix so that paths like `/lib64/ld-linux-x86-64.so.2` exist in the tar. -pub fn scan_rootfs(rootfs: &Path, verbose: bool) -> anyhow::Result { +/// `deferred_symlinks` provides the original symlink map from extraction so +/// that **directory symlinks** (e.g., `lib64` → `usr/lib64`) can be expanded: +/// all files under the target directory are duplicated under the symlink's +/// path prefix so that paths like `lib64/ld-linux-x86-64.so.2` exist in the tar. +pub fn scan_rootfs( + rootfs: &Path, + deferred_symlinks: &[DeferredSymlink], + verbose: bool, +) -> anyhow::Result { let mut files = BTreeMap::new(); - // Collect directory symlinks to expand after the initial walk. + + // Build the symlink map for resolution. + let symlink_map: HashMap = deferred_symlinks + .iter() + .map(|s| (s.rel_path.clone(), s.link_target.clone())) + .collect(); + + // Identify directory symlinks and their resolved targets on disk. let mut dir_symlinks: Vec<(PathBuf, PathBuf)> = Vec::new(); + for sym in deferred_symlinks { + let host_path = rootfs.join(&sym.rel_path); + if host_path.is_dir() { + // This dir symlink was materialized as an empty directory. + // Resolve the target to find the real directory to expand from. + if let Some(resolved) = + resolve_symlink_in_rootfs(&sym.rel_path, rootfs, &symlink_map, 32) + .filter(|r| r.is_dir()) + { + if verbose { + eprintln!( + " [dir-symlink] {} -> {}", + sym.rel_path.display(), + sym.link_target.display() + ); + } + dir_symlinks.push((host_path, resolved)); + } + } + } for entry in walkdir::WalkDir::new(rootfs) .follow_links(false) @@ -454,10 +717,12 @@ pub fn scan_rootfs(rootfs: &Path, verbose: bool) -> anyhow::Result anyhow::Result {}", resolved.display()); } @@ -552,6 +812,8 @@ pub fn scan_rootfs(rootfs: &Path, verbose: bool) -> anyhow::Result anyhow::Result Option Date: Thu, 2 Apr 2026 22:28:16 -0700 Subject: [PATCH 12/24] Fix OCI whiteout symlink pruning and degenerate symlink target resolution Two bugs found during review: 1. Opaque whiteouts (.wh..wh..opq) and regular whiteouts (.wh.) removed files from disk but did not prune corresponding entries from the in-memory symlinks vec. This caused materialize_symlinks() to resurrect deleted symlinks that a later layer intended to remove. 2. resolve_symlink_in_rootfs() could return Some(rootfs) when a degenerate symlink target with excess .. segments normalized to an empty path via normalize_path(). rootfs.join("") == rootfs, which exists as a directory, causing the entire rootfs to be treated as a resolution target. Guard against empty rel_path at function entry. --- litebox_packager/src/oci.rs | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/litebox_packager/src/oci.rs b/litebox_packager/src/oci.rs index e14aecfb3..25aa9a9fb 100644 --- a/litebox_packager/src/oci.rs +++ b/litebox_packager/src/oci.rs @@ -383,17 +383,25 @@ fn extract_tar( } } } + // Also prune in-memory symlinks under this directory so + // they are not resurrected by materialize_symlinks. + symlinks.retain(|s| !s.rel_path.starts_with(parent)); } continue; } if let Some(target_name) = file_name.strip_prefix(".wh.") { // Regular whiteout: delete the specific file/directory if let Some(parent) = path.parent() { - let target = rootfs.join(parent).join(target_name); + let whiteout_rel = parent.join(target_name); + let target = rootfs.join(&whiteout_rel); if target.is_dir() { let _ = std::fs::remove_dir_all(&target); + // Prune symlinks under the removed directory. + symlinks.retain(|s| !s.rel_path.starts_with(&whiteout_rel)); } else { let _ = std::fs::remove_file(&target); + // Prune the exact symlink entry if present. + symlinks.retain(|s| s.rel_path != whiteout_rel); } } continue; @@ -504,6 +512,12 @@ fn resolve_symlink_in_rootfs( return None; } + // Empty rel_path would resolve to the rootfs directory itself — treat + // as unresolvable to avoid accidentally matching the entire rootfs. + if rel_path.as_os_str().is_empty() { + return None; + } + // Check if this rel_path is itself a symlink if let Some(link_target) = symlink_map.get(rel_path) { // Resolve the target to a new rel_path From 1eec9f3ceb38cffacd603b1235f5d74117f9620b Mon Sep 17 00:00:00 2001 From: Weidong Cui Date: Thu, 2 Apr 2026 22:41:21 -0700 Subject: [PATCH 13/24] Remove no-op build.rs from litebox_packager (rtld_audit fully removed) --- litebox_packager/build.rs | 6 ------ 1 file changed, 6 deletions(-) delete mode 100644 litebox_packager/build.rs diff --git a/litebox_packager/build.rs b/litebox_packager/build.rs deleted file mode 100644 index f189226e4..000000000 --- a/litebox_packager/build.rs +++ /dev/null @@ -1,6 +0,0 @@ -// Copyright (c) Microsoft Corporation. -// Licensed under the MIT license. - -fn main() { - // rtld_audit has been removed; nothing to build. -} From 41d4da7c595921995c3e9687acd3d0fa4bd6a372 Mon Sep 17 00:00:00 2001 From: Weidong Cui Date: Fri, 3 Apr 2026 21:00:59 +0000 Subject: [PATCH 14/24] Preserve tar header permissions for cross-platform OCI extraction - Store Unix permission modes from tar headers in a HashMap during extraction, so permission bits are accurate on non-Unix hosts (Windows) instead of relying on the file_mode() heuristic which returns wrong answers (0o755 for most files). - Build symlink_map once in pull_and_extract and pass through to materialize_symlinks and scan_rootfs (was duplicated in both). - Add lookup_mode() helper that prefers tar header permissions, falls back to file_mode(), defaults to 0o644. - Add existence check for --rewrite-include in finalize_tar (was missing). - Remove redundant --include/--rewrite-include parsing from run_host_mode. - Replace Bun test with rewrite_elf_skips_non_elf_files test. - Add 22 unit tests for normalize_path, is_unix_absolute, strip_unix_root, resolve_symlink_in_rootfs, and lookup_mode. - Add litebox_packager to build_and_test_windows CI job. --- .github/workflows/ci.yml | 6 +- litebox_packager/src/lib.rs | 44 ++-- litebox_packager/src/oci.rs | 405 +++++++++++++++++++++++++++++++----- 3 files changed, 377 insertions(+), 78 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index b2255b608..7d328968c 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -188,16 +188,20 @@ jobs: tool: nextest@${{ env.NEXTEST_VERSION }} - uses: Swatinem/rust-cache@v2 - run: cargo clippy --locked --verbose --all-targets --all-features -p litebox_runner_linux_on_windows_userland + - run: cargo clippy --locked --verbose --all-targets --all-features -p litebox_packager - run: cargo build --locked --verbose -p litebox_runner_linux_on_windows_userland + - run: cargo build --locked --verbose -p litebox_packager - run: cargo nextest run --locked --profile ci -p litebox_runner_linux_on_windows_userland + - run: cargo nextest run --locked --profile ci -p litebox_packager - run: cargo nextest run --locked --profile ci -p litebox_shim_linux --no-default-features --features platform_windows_userland - run: | cargo test --locked --verbose --doc -p litebox_runner_linux_on_windows_userland + cargo test --locked --verbose --doc -p litebox_packager # We need to run `cargo test --doc` separately because doc tests # aren't included in nextest at the moment. See relevant discussion at # https://github.com/nextest-rs/nextest/issues/16 - name: Build documentation (fail on warnings) - run: cargo doc --locked --verbose --no-deps --all-features --document-private-items -p litebox_runner_linux_on_windows_userland + run: cargo doc --locked --verbose --no-deps --all-features --document-private-items -p litebox_runner_linux_on_windows_userland -p litebox_packager build_and_test_snp: name: Build and Test SNP diff --git a/litebox_packager/src/lib.rs b/litebox_packager/src/lib.rs index f75e6fd54..08080122b 100644 --- a/litebox_packager/src/lib.rs +++ b/litebox_packager/src/lib.rs @@ -157,24 +157,6 @@ fn run_host_mode(args: CliArgs) -> anyhow::Result<()> { }) .collect::>>()?; - let includes: Vec = args - .include - .iter() - .map(|s| parse_include(s)) - .collect::>>()?; - - let rewrite_includes: Vec = args - .rewrite_include - .iter() - .map(|s| parse_include(s)) - .collect::>>()?; - - for inc in includes.iter().chain(&rewrite_includes) { - if !inc.host_path.exists() { - bail!("included file does not exist: {}", inc.host_path.display()); - } - } - let no_rewrite: BTreeSet = args .no_rewrite .iter() @@ -268,7 +250,12 @@ fn run_oci(image_ref: &str, args: &CliArgs) -> anyhow::Result<()> { // --- Phase 2: Scan rootfs for files --- eprintln!("Scanning rootfs..."); - let file_map = oci::scan_rootfs(&extracted.rootfs_path, &extracted.symlinks, args.verbose)?; + let file_map = oci::scan_rootfs( + &extracted.rootfs_path, + &extracted.symlink_map, + &extracted.permissions, + args.verbose, + )?; let no_rewrite: BTreeSet = args .no_rewrite @@ -426,6 +413,12 @@ fn finalize_tar( .collect::>>()?; for inc in &rewrite_includes { + if !inc.host_path.exists() { + bail!( + "rewrite-included file does not exist: {}", + inc.host_path.display() + ); + } if !added_tar_paths.insert(inc.tar_path.clone()) { bail!( "duplicate tar path from --rewrite-include: '{}' (already present)", @@ -750,13 +743,10 @@ mod tests { use std::path::Path; #[test] - fn rewrite_elf_rejects_bun_packaged_executables() { - let mut bun_binary = b"\x7fELF".to_vec(); - bun_binary.extend_from_slice(b"\n---- Bun! ----\n"); - - let error = rewrite_elf(&bun_binary, Path::new("/tmp/claude"), false) - .expect_err("bun-packaged executable should not be packaged as-is"); - - assert!(error.to_string().contains("Bun-packaged executable")); + fn rewrite_elf_skips_non_elf_files() { + // Non-ELF data should be returned unmodified. + let data = b"#!/bin/sh\necho hello\n"; + let result = rewrite_elf(data, Path::new("/tmp/script.sh"), false).unwrap(); + assert_eq!(result, data); } } diff --git a/litebox_packager/src/oci.rs b/litebox_packager/src/oci.rs index 25aa9a9fb..28d65ff4e 100644 --- a/litebox_packager/src/oci.rs +++ b/litebox_packager/src/oci.rs @@ -37,9 +37,13 @@ pub struct ExtractedImage { pub config: ImageConfig, /// Raw OCI image config JSON blob (the full config descriptor data). pub config_json: Vec, - /// Symlinks collected during layer extraction (used for cross-platform - /// resolution since OS symlinks may not work on all platforms). - pub symlinks: Vec, + /// Symlink map from layer extraction: maps relative paths inside the + /// rootfs to their (Unix-style) link targets for cross-platform resolution. + pub symlink_map: HashMap, + /// Unix permission modes captured from tar headers during extraction. + /// Keyed by relative path inside the rootfs. Used instead of querying + /// filesystem metadata, which loses Unix mode bits on non-Unix hosts. + pub permissions: HashMap, } /// Result of scanning an extracted rootfs for files to package. @@ -148,6 +152,7 @@ pub fn pull_and_extract(image_ref: &str, verbose: bool) -> anyhow::Result = Vec::new(); + let mut permissions: HashMap = HashMap::new(); for (i, layer) in image_data.layers.iter().enumerate() { if verbose { eprintln!( @@ -157,16 +162,28 @@ pub fn pull_and_extract(image_ref: &str, verbose: bool) -> anyhow::Result = symlinks + .iter() + .map(|s| (s.rel_path.clone(), s.link_target.clone())) + .collect(); + // Materialize symlinks cross-platform: resolve chains through the in-memory // map and copy target files (or create directories) instead of OS symlinks. if verbose { eprintln!(" Resolving {} symlinks...", symlinks.len()); } - materialize_symlinks(&symlinks, &rootfs_path, verbose)?; + materialize_symlinks(&symlink_map, &rootfs_path, &mut permissions, verbose)?; if verbose { eprintln!(" Rootfs extracted to {}", rootfs_path.display()); @@ -209,7 +226,8 @@ pub fn pull_and_extract(image_ref: &str, verbose: bool) -> anyhow::Result String { /// /// Handles OCI whiteout files (`.wh.*` prefixed entries) which indicate /// files deleted in upper layers. Symlinks are collected into `symlinks` for -/// cross-platform resolution after all layers are extracted. +/// cross-platform resolution after all layers are extracted. Permission modes +/// from tar headers are recorded in `permissions` for cross-platform use. fn extract_layer( data: &[u8], media_type: &str, rootfs: &Path, symlinks: &mut Vec, + permissions: &mut HashMap, ) -> anyhow::Result<()> { // Determine if the layer is gzipped let is_gzip = media_type.contains("gzip") || is_gzip_data(data); if is_gzip { let decoder = flate2::read::GzDecoder::new(data); - extract_tar(decoder, rootfs, symlinks) + extract_tar(decoder, rootfs, symlinks, permissions) } else { - extract_tar(data, rootfs, symlinks) + extract_tar(data, rootfs, symlinks, permissions) } } @@ -332,10 +352,12 @@ struct DeferredHardLink { target: PathBuf, /// Source path inside the rootfs (the file the hard link points to). link_source: PathBuf, + /// Original link name from the tar header (used for permission lookup). + link_name: PathBuf, } /// Tracked symlink from a container image layer. -pub struct DeferredSymlink { +struct DeferredSymlink { /// Relative path inside the rootfs (e.g., `usr/lib64/ld-linux-x86-64.so.2`). rel_path: PathBuf, /// Symlink target as stored in the tar (Unix-style, may be relative or absolute). @@ -348,11 +370,13 @@ pub struct DeferredSymlink { /// `symlinks` so the caller can resolve them cross-platform after all layers /// are extracted. Hard links whose targets appear later in the archive are /// collected during the first pass and resolved after all regular entries -/// have been extracted. +/// have been extracted. Permission modes from tar headers are recorded in +/// `permissions` keyed by relative path. fn extract_tar( reader: R, rootfs: &Path, symlinks: &mut Vec, + permissions: &mut HashMap, ) -> anyhow::Result<()> { let mut archive = tar::Archive::new(reader); archive.set_preserve_permissions(true); @@ -386,6 +410,8 @@ fn extract_tar( // Also prune in-memory symlinks under this directory so // they are not resurrected by materialize_symlinks. symlinks.retain(|s| !s.rel_path.starts_with(parent)); + // Prune permissions for files under the cleared directory. + permissions.retain(|p, _| !p.starts_with(parent)); } continue; } @@ -398,10 +424,14 @@ fn extract_tar( let _ = std::fs::remove_dir_all(&target); // Prune symlinks under the removed directory. symlinks.retain(|s| !s.rel_path.starts_with(&whiteout_rel)); + // Prune permissions under the removed directory. + permissions.retain(|p, _| !p.starts_with(&whiteout_rel)); } else { let _ = std::fs::remove_file(&target); // Prune the exact symlink entry if present. symlinks.retain(|s| s.rel_path != whiteout_rel); + // Prune the exact permissions entry. + permissions.remove(&whiteout_rel); } } continue; @@ -435,11 +465,17 @@ fn extract_tar( target.display() ) })?; + // Copy permission mode from the link source. + let link_rel = normalize_path(&link_name); + if let Some(&mode) = permissions.get(&link_rel) { + permissions.insert(path.clone(), mode); + } } else { // Target hasn't been extracted yet — defer to second pass. deferred_links.push(DeferredHardLink { target, link_source, + link_name: link_name.clone(), }); } continue; @@ -468,6 +504,11 @@ fn extract_tar( entry .unpack(&target) .with_context(|| format!("failed to unpack entry: {path_str}"))?; + + // Record the permission mode from the tar header for cross-platform use. + if let Ok(mode) = entry.header().mode() { + permissions.insert(path.clone(), mode); + } } // Second pass: resolve deferred hard links now that all entries are extracted. @@ -483,6 +524,12 @@ fn extract_tar( link.target.display() ) })?; + // Copy permission mode from the link source. + let link_rel = normalize_path(&link.link_name); + if let Some(&mode) = permissions.get(&link_rel) { + let target_rel = link.target.strip_prefix(rootfs).unwrap_or(&link.target); + permissions.insert(target_rel.to_path_buf(), mode); + } } else { // Target still doesn't exist after the full layer extraction — // this is unusual but not fatal; warn and skip. @@ -602,30 +649,26 @@ fn normalize_path(path: &Path) -> PathBuf { /// should be on disk. Symlinks are resolved through the in-memory map (handling /// chains like `lib64` → `usr/lib64` → real dir) and then: /// - File symlinks: the target file is copied to the symlink location. +/// The resolved target's permission mode is also recorded for the symlink path. /// - Directory symlinks: an empty directory is created (its contents will be /// expanded by `scan_rootfs`'s dir-symlink logic). fn materialize_symlinks( - symlinks: &[DeferredSymlink], + symlink_map: &HashMap, rootfs: &Path, + permissions: &mut HashMap, verbose: bool, ) -> anyhow::Result<()> { - // Build a map for O(1) lookup during resolution. - let symlink_map: HashMap = symlinks - .iter() - .map(|s| (s.rel_path.clone(), s.link_target.clone())) - .collect(); - - for sym in symlinks { - let host_path = rootfs.join(&sym.rel_path); + for (rel_path, link_target) in symlink_map { + let host_path = rootfs.join(rel_path); if host_path.exists() { // A later layer may have replaced the symlink with a real file. continue; } if let Some(resolved) = resolve_symlink_in_rootfs( - &sym.rel_path, + rel_path, rootfs, - &symlink_map, + symlink_map, 32, // max chain depth ) { if let Some(parent) = host_path.parent() { @@ -640,31 +683,39 @@ fn materialize_symlinks( if verbose { eprintln!( " [symlink→dir] {} -> {}", - sym.rel_path.display(), - sym.link_target.display() + rel_path.display(), + link_target.display() ); } } else if resolved.is_file() { std::fs::copy(&resolved, &host_path).with_context(|| { format!( "failed to materialize symlink {} -> {}", - sym.rel_path.display(), + rel_path.display(), resolved.display() ) })?; + // Record the resolved target's permission mode for this symlink path. + let resolved_rel = resolved + .strip_prefix(rootfs) + .unwrap_or(&resolved) + .to_path_buf(); + if let Some(&mode) = permissions.get(&resolved_rel) { + permissions.insert(rel_path.clone(), mode); + } if verbose { eprintln!( " [symlink→file] {} -> {}", - sym.rel_path.display(), - sym.link_target.display() + rel_path.display(), + link_target.display() ); } } } else if verbose { eprintln!( " [symlink-broken] {} -> {} (unresolvable)", - sym.rel_path.display(), - sym.link_target.display() + rel_path.display(), + link_target.display() ); } } @@ -672,45 +723,59 @@ fn materialize_symlinks( Ok(()) } +/// Look up the Unix permission mode for a file. +/// +/// Prefers the tar-header–derived `permissions` map (keyed by rootfs-relative +/// path) which is accurate on all platforms. Falls back to `file_mode()` on +/// the host path (accurate on Unix, heuristic on Windows), and finally +/// defaults to 0o644 if neither source is available. +fn lookup_mode(rel_path: &Path, host_path: &Path, permissions: &HashMap) -> u32 { + if let Some(&mode) = permissions.get(rel_path) { + return mode & 0o7777; + } + if let Ok(metadata) = std::fs::metadata(host_path) { + return super::file_mode(&metadata) & 0o7777; + } + 0o644 +} + /// Scan an extracted rootfs directory and build a file map for packaging. /// /// Walks the rootfs directory tree and collects all regular files with their /// paths and permission bits. After `materialize_symlinks` has been called, /// file symlinks are already materialized as regular file copies on disk. /// -/// `deferred_symlinks` provides the original symlink map from extraction so +/// `symlink_map` provides the original symlink mapping from extraction so /// that **directory symlinks** (e.g., `lib64` → `usr/lib64`) can be expanded: /// all files under the target directory are duplicated under the symlink's /// path prefix so that paths like `lib64/ld-linux-x86-64.so.2` exist in the tar. +/// +/// `permissions` provides Unix permission modes captured from tar headers +/// during extraction, so permission bits are accurate on non-Unix hosts. +#[allow(clippy::implicit_hasher)] pub fn scan_rootfs( rootfs: &Path, - deferred_symlinks: &[DeferredSymlink], + symlink_map: &HashMap, + permissions: &HashMap, verbose: bool, ) -> anyhow::Result { let mut files = BTreeMap::new(); - // Build the symlink map for resolution. - let symlink_map: HashMap = deferred_symlinks - .iter() - .map(|s| (s.rel_path.clone(), s.link_target.clone())) - .collect(); - // Identify directory symlinks and their resolved targets on disk. let mut dir_symlinks: Vec<(PathBuf, PathBuf)> = Vec::new(); - for sym in deferred_symlinks { - let host_path = rootfs.join(&sym.rel_path); + for (rel_path, link_target) in symlink_map { + let host_path = rootfs.join(rel_path); if host_path.is_dir() { // This dir symlink was materialized as an empty directory. // Resolve the target to find the real directory to expand from. if let Some(resolved) = - resolve_symlink_in_rootfs(&sym.rel_path, rootfs, &symlink_map, 32) - .filter(|r| r.is_dir()) + resolve_symlink_in_rootfs(rel_path, rootfs, symlink_map, 32).filter(|r| r.is_dir()) { if verbose { eprintln!( " [dir-symlink] {} -> {}", - sym.rel_path.display(), - sym.link_target.display() + rel_path.display(), + link_target.display() ); } dir_symlinks.push((host_path, resolved)); @@ -735,8 +800,7 @@ pub fn scan_rootfs( let tar_path = tar_path.replace('\\', "/"); if entry.file_type().is_file() { - let metadata = entry.metadata()?; - let mode = super::file_mode(&metadata) & 0o7777; + let mode = lookup_mode(rel_path, entry.path(), permissions); let is_executable = mode & 0o111 != 0; if verbose && is_executable { @@ -756,8 +820,8 @@ pub fn scan_rootfs( // On platforms that still have OS symlinks (Linux), resolve them. if let Some(resolved) = resolve_in_rootfs(entry.path(), rootfs, 16) { if resolved.is_file() { - let metadata = std::fs::metadata(&resolved)?; - let mode = super::file_mode(&metadata) & 0o7777; + let resolved_rel = resolved.strip_prefix(rootfs).unwrap_or(&resolved); + let mode = lookup_mode(resolved_rel, &resolved, permissions); let is_executable = mode & 0o111 != 0; files.insert( @@ -838,8 +902,8 @@ pub fn scan_rootfs( continue; } - let metadata = std::fs::metadata(&read_path)?; - let mode = super::file_mode(&metadata) & 0o7777; + let read_rel = read_path.strip_prefix(rootfs).unwrap_or(&read_path); + let mode = lookup_mode(read_rel, &read_path, permissions); let is_executable = mode & 0o111 != 0; if verbose { @@ -918,4 +982,245 @@ mod tests { let result = resolve_in_rootfs(Path::new("/tmp"), Path::new("/tmp"), 0); assert!(result.is_none()); } + + // --- normalize_path --- + + #[test] + fn normalize_path_resolves_parent_components() { + let p = normalize_path(Path::new("usr/lib/../bin/sh")); + assert_eq!(p, PathBuf::from("usr/bin/sh")); + } + + #[test] + fn normalize_path_strips_current_dir() { + let p = normalize_path(Path::new("./usr/./bin/sh")); + assert_eq!(p, PathBuf::from("usr/bin/sh")); + } + + #[test] + fn normalize_path_strips_root() { + let p = normalize_path(Path::new("/usr/bin/sh")); + assert_eq!(p, PathBuf::from("usr/bin/sh")); + } + + #[test] + fn normalize_path_double_parent_at_start_clamps() { + // Going above root should just empty the stack. + let p = normalize_path(Path::new("../../foo")); + assert_eq!(p, PathBuf::from("foo")); + } + + #[test] + fn normalize_path_empty_input() { + let p = normalize_path(Path::new("")); + assert_eq!(p, PathBuf::from("")); + } + + // --- is_unix_absolute --- + + #[test] + fn is_unix_absolute_detects_slash_prefix() { + assert!(is_unix_absolute(Path::new("/usr/bin"))); + assert!(is_unix_absolute(Path::new("/"))); + } + + #[test] + fn is_unix_absolute_rejects_relative() { + assert!(!is_unix_absolute(Path::new("usr/bin"))); + assert!(!is_unix_absolute(Path::new("../lib"))); + assert!(!is_unix_absolute(Path::new(""))); + } + + // --- strip_unix_root --- + + #[test] + fn strip_unix_root_removes_leading_slash() { + assert_eq!( + strip_unix_root(Path::new("/usr/bin")), + PathBuf::from("usr/bin") + ); + } + + #[test] + fn strip_unix_root_noop_for_relative() { + assert_eq!( + strip_unix_root(Path::new("usr/bin")), + PathBuf::from("usr/bin") + ); + } + + #[test] + fn strip_unix_root_on_bare_slash() { + // "/" should become empty after stripping. + let p = strip_unix_root(Path::new("/")); + assert!(p.as_os_str().is_empty() || p == Path::new("")); + } + + // --- resolve_symlink_in_rootfs --- + + #[test] + fn resolve_symlink_direct_hit() { + // lib64 -> usr/lib64, and rootfs/usr/lib64/libc.so exists on disk. + let tmp = tempfile::tempdir().unwrap(); + let rootfs = tmp.path(); + std::fs::create_dir_all(rootfs.join("usr/lib64")).unwrap(); + std::fs::write(rootfs.join("usr/lib64/libc.so"), b"fake").unwrap(); + + let mut symlink_map = HashMap::new(); + symlink_map.insert(PathBuf::from("lib64"), PathBuf::from("usr/lib64")); + + // Resolving "lib64" itself should follow to rootfs/usr/lib64 (dir). + let resolved = resolve_symlink_in_rootfs(Path::new("lib64"), rootfs, &symlink_map, 32); + assert!(resolved.is_some()); + assert_eq!(resolved.unwrap(), rootfs.join("usr/lib64")); + } + + #[test] + fn resolve_symlink_chain() { + // a -> b, b -> c, rootfs/c exists. + let tmp = tempfile::tempdir().unwrap(); + let rootfs = tmp.path(); + std::fs::write(rootfs.join("c"), b"data").unwrap(); + + let mut symlink_map = HashMap::new(); + symlink_map.insert(PathBuf::from("a"), PathBuf::from("b")); + symlink_map.insert(PathBuf::from("b"), PathBuf::from("c")); + + let resolved = resolve_symlink_in_rootfs(Path::new("a"), rootfs, &symlink_map, 32); + assert_eq!(resolved, Some(rootfs.join("c"))); + } + + #[test] + fn resolve_symlink_max_depth_prevents_infinite_loop() { + // a -> b, b -> a (cycle). + let mut symlink_map = HashMap::new(); + symlink_map.insert(PathBuf::from("a"), PathBuf::from("b")); + symlink_map.insert(PathBuf::from("b"), PathBuf::from("a")); + + let tmp = tempfile::tempdir().unwrap(); + let resolved = resolve_symlink_in_rootfs(Path::new("a"), tmp.path(), &symlink_map, 32); + assert!(resolved.is_none()); + } + + #[test] + fn resolve_symlink_absolute_target() { + // link -> /usr/bin/sh, rootfs/usr/bin/sh exists. + let tmp = tempfile::tempdir().unwrap(); + let rootfs = tmp.path(); + std::fs::create_dir_all(rootfs.join("usr/bin")).unwrap(); + std::fs::write(rootfs.join("usr/bin/sh"), b"elf").unwrap(); + + let mut symlink_map = HashMap::new(); + symlink_map.insert(PathBuf::from("bin/sh"), PathBuf::from("/usr/bin/sh")); + + let resolved = resolve_symlink_in_rootfs(Path::new("bin/sh"), rootfs, &symlink_map, 32); + assert_eq!(resolved, Some(rootfs.join("usr/bin/sh"))); + } + + #[test] + fn resolve_symlink_relative_target() { + // usr/lib64/libfoo.so -> ../lib/libfoo.so, rootfs/usr/lib/libfoo.so exists. + let tmp = tempfile::tempdir().unwrap(); + let rootfs = tmp.path(); + std::fs::create_dir_all(rootfs.join("usr/lib")).unwrap(); + std::fs::write(rootfs.join("usr/lib/libfoo.so"), b"elf").unwrap(); + + let mut symlink_map = HashMap::new(); + symlink_map.insert( + PathBuf::from("usr/lib64/libfoo.so"), + PathBuf::from("../lib/libfoo.so"), + ); + + let resolved = + resolve_symlink_in_rootfs(Path::new("usr/lib64/libfoo.so"), rootfs, &symlink_map, 32); + assert_eq!(resolved, Some(rootfs.join("usr/lib/libfoo.so"))); + } + + #[test] + fn resolve_symlink_ancestor_is_symlink() { + // lib64 -> usr/lib64, resolve "lib64/foo.so" where rootfs/usr/lib64/foo.so exists. + let tmp = tempfile::tempdir().unwrap(); + let rootfs = tmp.path(); + std::fs::create_dir_all(rootfs.join("usr/lib64")).unwrap(); + std::fs::write(rootfs.join("usr/lib64/foo.so"), b"elf").unwrap(); + + let mut symlink_map = HashMap::new(); + symlink_map.insert(PathBuf::from("lib64"), PathBuf::from("usr/lib64")); + + let resolved = + resolve_symlink_in_rootfs(Path::new("lib64/foo.so"), rootfs, &symlink_map, 32); + assert_eq!(resolved, Some(rootfs.join("usr/lib64/foo.so"))); + } + + #[test] + fn resolve_symlink_empty_path_returns_none() { + let tmp = tempfile::tempdir().unwrap(); + let symlink_map = HashMap::new(); + let resolved = resolve_symlink_in_rootfs(Path::new(""), tmp.path(), &symlink_map, 32); + assert!(resolved.is_none()); + } + + #[test] + fn resolve_symlink_not_a_symlink_returns_host_path() { + // Regular file, not in symlink_map — should return host_path directly. + let tmp = tempfile::tempdir().unwrap(); + let rootfs = tmp.path(); + std::fs::write(rootfs.join("hello.txt"), b"hi").unwrap(); + + let symlink_map = HashMap::new(); + let resolved = resolve_symlink_in_rootfs(Path::new("hello.txt"), rootfs, &symlink_map, 32); + assert_eq!(resolved, Some(rootfs.join("hello.txt"))); + } + + #[test] + fn resolve_symlink_nonexistent_returns_none() { + let tmp = tempfile::tempdir().unwrap(); + let symlink_map = HashMap::new(); + let resolved = + resolve_symlink_in_rootfs(Path::new("does/not/exist"), tmp.path(), &symlink_map, 32); + assert!(resolved.is_none()); + } + + // --- lookup_mode --- + + #[test] + fn lookup_mode_prefers_permissions_map() { + let tmp = tempfile::tempdir().unwrap(); + let rootfs = tmp.path(); + std::fs::write(rootfs.join("file.sh"), b"#!/bin/sh").unwrap(); + + let mut permissions = HashMap::new(); + permissions.insert(PathBuf::from("file.sh"), 0o100755u32); + + // The permissions map value (masked) should win over filesystem metadata. + let mode = lookup_mode(Path::new("file.sh"), &rootfs.join("file.sh"), &permissions); + assert_eq!(mode, 0o755); + } + + #[test] + fn lookup_mode_falls_back_to_filesystem() { + let tmp = tempfile::tempdir().unwrap(); + let rootfs = tmp.path(); + std::fs::write(rootfs.join("file.txt"), b"data").unwrap(); + + let permissions = HashMap::new(); // empty + let mode = lookup_mode( + Path::new("file.txt"), + &rootfs.join("file.txt"), + &permissions, + ); + // On Unix the file should have some mode; just check it's non-zero. + assert!(mode > 0); + } + + #[test] + fn lookup_mode_defaults_to_644_when_nothing_available() { + let permissions = HashMap::new(); + let mode = lookup_mode( + Path::new("nonexistent"), + Path::new("/no/such/file"), + &permissions, + ); + assert_eq!(mode, 0o644); + } } From d880f73b0eb850353e8996eafa8af332cff15750 Mon Sep 17 00:00:00 2001 From: Weidong Cui Date: Mon, 6 Apr 2026 20:34:00 -0700 Subject: [PATCH 15/24] Prune dead observers on registration Add prune_dead_observers() to Subject, called during register_observer() to eagerly clean up stale weak references. Previously dead observers were only cleaned up during notify_observers(), which could leave orphaned entries indefinitely if notifications were infrequent. Includes a test verifying that registering a new observer prunes stale entries from a previously dropped observer. --- litebox/src/event/observer.rs | 86 +++++++++++++++++++++++++++++++---- 1 file changed, 77 insertions(+), 9 deletions(-) diff --git a/litebox/src/event/observer.rs b/litebox/src/event/observer.rs index 4e42a304e..42cdbd409 100644 --- a/litebox/src/event/observer.rs +++ b/litebox/src/event/observer.rs @@ -93,9 +93,28 @@ impl, Platform: RawSyncPrimitivesProvider> Subject, F>) { + observers.retain(|observer, _| { + if observer.upgrade().is_some() { + true + } else { + self.nums.fetch_sub(1, Ordering::Relaxed); + false + } + }); + } + /// Register an observer with the given filter. pub fn register_observer(&self, observer: Weak>, filter: F) { let mut observers = self.observers.lock(); + self.prune_dead_observers(&mut observers); if observers .insert(ObserverKey::new(observer), filter) .is_none() @@ -119,16 +138,65 @@ impl, Platform: RawSyncPrimitivesProvider> Subject for TestObserver { + fn on_events(&self, _events: &Events) { + self.notifications.fetch_add(1, Ordering::Relaxed); + } + } + + #[test] + fn register_observer_prunes_dead_entries() { + let subject = Subject::::new(); + + let stale = Arc::new(TestObserver { + notifications: AtomicUsize::new(0), + }); + subject.register_observer(Arc::downgrade(&stale) as _, Events::IN); + assert_eq!(subject.nums.load(Ordering::Relaxed), 1); + assert_eq!(subject.observers.lock().len(), 1); + drop(stale); + + let fresh = Arc::new(TestObserver { + notifications: AtomicUsize::new(0), }); + subject.register_observer(Arc::downgrade(&fresh) as _, Events::OUT); + { + let observers = subject.observers.lock(); + let registered = observers + .keys() + .next() + .and_then(super::ObserverKey::upgrade) + .expect("dead observer should be pruned during registration"); + let fresh_observer: Arc> = fresh.clone(); + assert!(Arc::ptr_eq(®istered, &fresh_observer)); + assert_eq!(subject.nums.load(Ordering::Relaxed), 1); + assert_eq!(observers.len(), 1); + } + subject.notify_observers(Events::OUT); + + assert_eq!(fresh.notifications.load(Ordering::Relaxed), 1); } } From 6c425be3325afae3a3b25aca9fb69e5f61838349 Mon Sep 17 00:00:00 2001 From: Weidong Cui Date: Mon, 6 Apr 2026 20:39:04 -0700 Subject: [PATCH 16/24] Add Clone support to AnyMap via type-erased clone functions Clone is not object-safe, so Box is not valid Rust. Instead, store a type-erased clone function pointer alongside each value that knows the concrete type and can clone through the trait object. This requires all types stored in AnyMap to implement Clone. Update the Clone bound on insert() and set_entry_metadata()/set_fd_metadata() in fd/mod.rs, and add #[derive(Clone)] to shim types that are stored as fd metadata: StdioStatusFlags, PipeStatusFlags, SocketOptions, SocketOFlags, SocketProxy. --- litebox/src/fd/mod.rs | 4 +- litebox/src/fd/tests.rs | 2 +- litebox/src/utilities/anymap.rs | 96 ++++++++++++++++++++++++-- litebox_shim_linux/src/lib.rs | 2 + litebox_shim_linux/src/syscalls/net.rs | 4 +- 5 files changed, 97 insertions(+), 11 deletions(-) diff --git a/litebox/src/fd/mod.rs b/litebox/src/fd/mod.rs index aeef757dd..d93da35b1 100644 --- a/litebox/src/fd/mod.rs +++ b/litebox/src/fd/mod.rs @@ -477,7 +477,7 @@ impl Descriptors { ) -> Option where Subsystem: FdEnabledSubsystem, - T: core::any::Any + Send + Sync, + T: core::any::Any + Clone + Send + Sync, { self.entries[fd.x.as_usize()?] .as_ref() @@ -506,7 +506,7 @@ impl Descriptors { ) -> Option where Subsystem: FdEnabledSubsystem, - T: core::any::Any + Send + Sync, + T: core::any::Any + Clone + Send + Sync, { self.entries[fd.x.as_usize()?] .as_mut() diff --git a/litebox/src/fd/tests.rs b/litebox/src/fd/tests.rs index 04a482b44..ac92d252d 100644 --- a/litebox/src/fd/tests.rs +++ b/litebox/src/fd/tests.rs @@ -6,10 +6,10 @@ use alloc::string::ToString as _; use alloc::vec; use alloc::vec::Vec; -use crate::LiteBox; use crate::fd::FdEnabledSubsystemEntry; use crate::fd::{ErrRawIntFd, FdEnabledSubsystem, TypedFd}; use crate::platform::mock::MockPlatform; +use crate::LiteBox; struct MockSubsystem; impl FdEnabledSubsystem for MockSubsystem { diff --git a/litebox/src/utilities/anymap.rs b/litebox/src/utilities/anymap.rs index 68af3f8a6..96253e8fb 100644 --- a/litebox/src/utilities/anymap.rs +++ b/litebox/src/utilities/anymap.rs @@ -18,14 +18,30 @@ use alloc::boxed::Box; use core::any::{Any, TypeId}; use hashbrown::HashMap; +/// Type-erased clone function stored alongside each value. +/// +/// We cannot use `Box` because `Clone` is not +/// object-safe (its `clone` method returns `Self`). Instead we store a +/// function pointer that knows the concrete type and can clone through the +/// trait object. +type CloneFn = fn(&(dyn Any + Send + Sync)) -> Box; + /// A safe store of exactly one value of any type `T`. pub(crate) struct AnyMap { // Invariant: the value at a particular typeid is guaranteed to be the correct type boxed up. - storage: HashMap>, + storage: HashMap, CloneFn)>, } const GUARANTEED: &str = "guaranteed correct type by invariant"; +/// Create a clone function for a specific concrete type. +fn make_clone_fn() -> CloneFn { + |val: &(dyn Any + Send + Sync)| -> Box { + let concrete = val.downcast_ref::().expect(GUARANTEED); + Box::new(concrete.clone()) + } +} + impl AnyMap { /// Create a new empty `AnyMap` pub(crate) fn new() -> Self { @@ -35,20 +51,26 @@ impl AnyMap { } /// Insert `v`, replacing and returning the old value if one existed already. - pub(crate) fn insert(&mut self, v: T) -> Option { - let old = self.storage.insert(TypeId::of::(), Box::new(v))?; - Some(*old.downcast().expect(GUARANTEED)) + /// + /// The `Clone` bound is required to capture a type-erased clone function + /// at insertion time. Read-only accessors (`get`, `get_mut`, `remove`) do + /// not require `Clone`. + pub(crate) fn insert(&mut self, v: T) -> Option { + let old = self + .storage + .insert(TypeId::of::(), (Box::new(v), make_clone_fn::()))?; + Some(*old.0.downcast().expect(GUARANTEED)) } /// Get a reference to a value of type `T` if it exists. pub(crate) fn get(&self) -> Option<&T> { - let v = self.storage.get(&TypeId::of::())?; + let v = &self.storage.get(&TypeId::of::())?.0; Some(v.downcast_ref().expect(GUARANTEED)) } /// Get a mutable reference to a value of type `T` if it exists. pub(crate) fn get_mut(&mut self) -> Option<&mut T> { - let v = self.storage.get_mut(&TypeId::of::())?; + let v = &mut self.storage.get_mut(&TypeId::of::())?.0; Some(v.downcast_mut().expect(GUARANTEED)) } @@ -58,7 +80,67 @@ impl AnyMap { )] /// Remove and return the value of type `T` if it exists. pub(crate) fn remove(&mut self) -> Option { - let v = self.storage.remove(&TypeId::of::())?; + let v = self.storage.remove(&TypeId::of::())?.0; Some(*v.downcast().expect(GUARANTEED)) } } + +impl Clone for AnyMap { + fn clone(&self) -> Self { + Self { + storage: self + .storage + .iter() + .map(|(&type_id, (val, clone_fn))| (type_id, (clone_fn(val.as_ref()), *clone_fn))) + .collect(), + } + } +} + +#[cfg(test)] +mod tests { + use alloc::string::String; + + use super::AnyMap; + + #[test] + fn insert_and_get() { + let mut map = AnyMap::new(); + assert!(map.insert(42u32).is_none()); + assert_eq!(map.get::(), Some(&42)); + } + + #[test] + fn clone_produces_independent_copy() { + let mut original = AnyMap::new(); + original.insert(10u32); + original.insert(String::from("hello")); + + let mut cloned = original.clone(); + + // Cloned values match. + assert_eq!(cloned.get::(), Some(&10)); + assert_eq!(cloned.get::().map(String::as_str), Some("hello")); + + // Mutating the clone does not affect the original. + *cloned.get_mut::().unwrap() = 99; + assert_eq!(original.get::(), Some(&10)); + assert_eq!(cloned.get::(), Some(&99)); + } + + #[test] + fn cloned_map_can_be_cloned_again() { + let mut map = AnyMap::new(); + map.insert(7u64); + let clone1 = map.clone(); + let clone2 = clone1.clone(); + assert_eq!(clone2.get::(), Some(&7)); + } + + #[test] + fn clone_empty_map() { + let map = AnyMap::new(); + let cloned = map.clone(); + assert_eq!(cloned.get::(), None); + } +} diff --git a/litebox_shim_linux/src/lib.rs b/litebox_shim_linux/src/lib.rs index 38f96b535..59c10023f 100644 --- a/litebox_shim_linux/src/lib.rs +++ b/litebox_shim_linux/src/lib.rs @@ -345,9 +345,11 @@ fn default_fs( } // Special override so that `GETFL` can return stdio-specific flags +#[derive(Clone)] pub(crate) struct StdioStatusFlags(litebox::fs::OFlags); /// Status flags for pipes +#[derive(Clone)] pub(crate) struct PipeStatusFlags(pub litebox::fs::OFlags); impl syscalls::file::FilesState { diff --git a/litebox_shim_linux/src/syscalls/net.rs b/litebox_shim_linux/src/syscalls/net.rs index d1b04e894..9192971c1 100644 --- a/litebox_shim_linux/src/syscalls/net.rs +++ b/litebox_shim_linux/src/syscalls/net.rs @@ -155,7 +155,7 @@ impl SocketAddress { } } -#[derive(Default)] +#[derive(Default, Clone)] pub(super) struct SocketOptions { pub(super) reuse_address: bool, pub(super) keep_alive: bool, @@ -171,7 +171,9 @@ pub(super) struct SocketOptions { pub(super) linger_timeout: Option, } +#[derive(Clone)] pub(crate) struct SocketOFlags(pub OFlags); +#[derive(Clone)] pub(crate) struct SocketProxy(pub Arc>); pub(super) enum SocketOptionValue { From 8adb37c1e1e278f72649128511deff57f2df7f90 Mon Sep 17 00:00:00 2001 From: Weidong Cui Date: Mon, 6 Apr 2026 20:42:53 -0700 Subject: [PATCH 17/24] Add needs_host_poll() and should_block_read() to IOPollable Add two new default methods to the IOPollable trait: - needs_host_poll(): indicates the pollable cannot deliver async observer notifications (e.g. host-backed stdin), so callers should poll periodically. - should_block_read(): indicates whether reads should block when no data is available. Returns false for fds that use epoll/poll and expect EAGAIN. Both default to the backward-compatible behavior (no host poll needed, reads block). Forward the methods through the Arc blanket impl. --- litebox/src/event/mod.rs | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/litebox/src/event/mod.rs b/litebox/src/event/mod.rs index 24d5b6832..98ec7e680 100644 --- a/litebox/src/event/mod.rs +++ b/litebox/src/event/mod.rs @@ -48,6 +48,29 @@ pub trait IOPollable { /// calls are what notify observers. This particular function itself however _may_ be used to /// essentially get "the current status" of events for the system. fn check_io_events(&self) -> Events; + + /// Returns `true` if this pollable cannot deliver asynchronous observer + /// notifications (e.g. host-backed stdin where the host has no callback + /// mechanism). Callers should use periodic polling instead of blocking + /// indefinitely on observer wakeups. + /// + /// Defaults to `false` (async notifications work). This is safe for all + /// existing implementors; callers that use this method arrive in subsequent + /// stacked PRs. + fn needs_host_poll(&self) -> bool { + false + } + + /// Returns `true` if reads on this pollable should block when no data is + /// available. Returns `false` for pollables whose callers perform + /// asynchronous readiness checks and expect a "would block" indication + /// immediately (e.g. PTY master side). + /// + /// Defaults to `true` (blocking reads). This is safe for all existing + /// implementors; callers arrive in subsequent stacked PRs. + fn should_block_read(&self) -> bool { + true + } } impl IOPollable for alloc::sync::Arc { @@ -61,4 +84,10 @@ impl IOPollable for alloc::sync::Arc { fn check_io_events(&self) -> Events { self.as_ref().check_io_events() } + fn needs_host_poll(&self) -> bool { + self.as_ref().needs_host_poll() + } + fn should_block_read(&self) -> bool { + self.as_ref().should_block_read() + } } From 5dedca6a6ce2cad0c3410823da20870da887ba89 Mon Sep 17 00:00:00 2001 From: Weidong Cui Date: Mon, 6 Apr 2026 20:46:58 -0700 Subject: [PATCH 18/24] Add noreserve parameter to allocate_pages and fix copy_pages chunk length Two changes to page_mgmt: 1. Add a 'noreserve' bool parameter to PageManagementProvider::allocate_pages(). When true, platforms that support it (Linux userland via MAP_NORESERVE) avoid reserving swap/commit upfront, enabling sparse memory reservations. Kernel, LVBS, and Windows platforms accept but ignore the parameter. All callers currently pass false (no behavior change). 2. Fix a bug in the default remap_pages() copy loop: the to_owned_slice() call was using old_range.len() (the entire range length) instead of the per-chunk length, causing each iteration to read beyond its chunk boundary. --- litebox/src/mm/linux.rs | 5 +- litebox/src/mm/tests.rs | 71 +++++++++---------- litebox/src/platform/page_mgmt.rs | 9 ++- .../src/arch/x86/mm/paging.rs | 12 ++-- .../src/host/snp/ghcb.rs | 8 ++- .../src/host/snp/snp_impl.rs | 4 +- litebox_platform_linux_kernel/src/lib.rs | 5 +- litebox_platform_linux_kernel/src/mm/tests.rs | 69 ++++++++---------- litebox_platform_linux_userland/src/lib.rs | 6 ++ litebox_platform_lvbs/src/lib.rs | 1 + litebox_platform_windows_userland/src/lib.rs | 4 ++ 11 files changed, 100 insertions(+), 94 deletions(-) diff --git a/litebox/src/mm/linux.rs b/litebox/src/mm/linux.rs index f33094971..cca098941 100644 --- a/litebox/src/mm/linux.rs +++ b/litebox/src/mm/linux.rs @@ -11,11 +11,11 @@ use alloc::vec::Vec; use rangemap::RangeMap; use thiserror::Error; -use crate::platform::PageManagementProvider; -use crate::platform::RawConstPointer; use crate::platform::page_mgmt::AllocationError; use crate::platform::page_mgmt::FixedAddressBehavior; use crate::platform::page_mgmt::MemoryRegionPermissions; +use crate::platform::PageManagementProvider; +use crate::platform::RawConstPointer; /// Page size in bytes pub const PAGE_SIZE: usize = 4096; @@ -509,6 +509,7 @@ impl + 'static, const ALIGN: usize> Vmem MemoryRegionPermissions::from_bits(permissions).unwrap(), vma.flags.contains(VmFlags::VM_GROWSDOWN), populate_pages_immediately, + false, platform_fixed_address_behavior, ) .map_err(|err| match err { diff --git a/litebox/src/mm/tests.rs b/litebox/src/mm/tests.rs index 3142971ef..1de0e5b85 100644 --- a/litebox/src/mm/tests.rs +++ b/litebox/src/mm/tests.rs @@ -9,15 +9,15 @@ use alloc::vec::Vec; use crate::{ mm::linux::{CreatePagesFlags, NonZeroAddress}, platform::{ - PageManagementProvider, RawConstPointer, page_mgmt::MemoryRegionPermissions, trivial_providers::{TransparentConstPtr, TransparentMutPtr}, + PageManagementProvider, RawConstPointer, }, }; use zerocopy::{FromBytes, IntoBytes}; use super::linux::{ - NonZeroPageSize, PAGE_SIZE, PageRange, VmArea, VmFlags, Vmem, VmemProtectError, VmemResizeError, + NonZeroPageSize, PageRange, VmArea, VmFlags, Vmem, VmemProtectError, VmemResizeError, PAGE_SIZE, }; /// A dummy implementation of [`VmemBackend`] that does nothing. @@ -43,6 +43,7 @@ impl crate::platform::PageManagementProvider for DummyVmemBackend { initial_permissions: crate::platform::page_mgmt::MemoryRegionPermissions, can_grow_down: bool, populate_pages_immediately: bool, + _noreserve: bool, fixed_address_behavior: crate::platform::page_mgmt::FixedAddressBehavior, ) -> Result, crate::platform::page_mgmt::AllocationError> { Ok(TransparentMutPtr::from_usize(suggested_range.start)) @@ -154,15 +155,13 @@ fn test_vmm_mapping() { Err(VmemProtectError::InvalidRange(_)) )); - assert!( - unsafe { - vmm.resize_mapping( - PageRange::new(start_addr, start_addr + 2 * PAGE_SIZE).unwrap(), - NonZeroPageSize::new(PAGE_SIZE * 4).unwrap(), - ) - } - .is_ok() - ); + assert!(unsafe { + vmm.resize_mapping( + PageRange::new(start_addr, start_addr + 2 * PAGE_SIZE).unwrap(), + NonZeroPageSize::new(PAGE_SIZE * 4).unwrap(), + ) + } + .is_ok()); // Grow and merge, [(0x1_0000, 0x1_c000)] assert_eq!( collect_mappings(&vmm), @@ -180,15 +179,13 @@ fn test_vmm_mapping() { Err(VmemProtectError::NoAccess { .. }) )); - assert!( - unsafe { - vmm.protect_mapping( - PageRange::new(start_addr + 2 * PAGE_SIZE, start_addr + 4 * PAGE_SIZE).unwrap(), - MemoryRegionPermissions::READ | MemoryRegionPermissions::WRITE, - ) - } - .is_ok() - ); + assert!(unsafe { + vmm.protect_mapping( + PageRange::new(start_addr + 2 * PAGE_SIZE, start_addr + 4 * PAGE_SIZE).unwrap(), + MemoryRegionPermissions::READ | MemoryRegionPermissions::WRITE, + ) + } + .is_ok()); // Change permission, [(0x1_0000, 0x1_2000), (0x1_2000, 0x1_4000), (0x1_4000, 0x1_c000)] assert_eq!( collect_mappings(&vmm), @@ -205,16 +202,14 @@ fn test_vmm_mapping() { unsafe { vmm.resize_mapping(r, NonZeroPageSize::new(PAGE_SIZE * 4).unwrap()) }, Err(VmemResizeError::RangeOccupied(_)) )); - assert!( - unsafe { - vmm.move_mappings( - r, - Some(NonZeroAddress::new(start_addr + 12 * PAGE_SIZE).unwrap()), - NonZeroPageSize::new(PAGE_SIZE * 4).unwrap(), - ) - } - .is_ok_and(|v| v.as_usize() == start_addr + 12 * PAGE_SIZE) - ); + assert!(unsafe { + vmm.move_mappings( + r, + Some(NonZeroAddress::new(start_addr + 12 * PAGE_SIZE).unwrap()), + NonZeroPageSize::new(PAGE_SIZE * 4).unwrap(), + ) + } + .is_ok_and(|v| v.as_usize() == start_addr + 12 * PAGE_SIZE)); assert_eq!( collect_mappings(&vmm), vec![ @@ -274,15 +269,13 @@ fn test_vmm_mapping() { ); // shrink mapping - assert!( - unsafe { - vmm.resize_mapping( - PageRange::new(start_addr + 4 * PAGE_SIZE, start_addr + 8 * PAGE_SIZE).unwrap(), - NonZeroPageSize::new(2 * PAGE_SIZE).unwrap(), - ) - } - .is_ok() - ); + assert!(unsafe { + vmm.resize_mapping( + PageRange::new(start_addr + 4 * PAGE_SIZE, start_addr + 8 * PAGE_SIZE).unwrap(), + NonZeroPageSize::new(2 * PAGE_SIZE).unwrap(), + ) + } + .is_ok()); assert_eq!( collect_mappings(&vmm), vec![ diff --git a/litebox/src/platform/page_mgmt.rs b/litebox/src/platform/page_mgmt.rs index c4fca057a..da9b03fd9 100644 --- a/litebox/src/platform/page_mgmt.rs +++ b/litebox/src/platform/page_mgmt.rs @@ -49,6 +49,8 @@ pub trait PageManagementProvider: RawPointerProvider { /// a page fault. /// - `populate_pages_immediately`: If `true`, the pages are populated immediately; otherwise, /// they are populated lazily. + /// - `noreserve`: If `true`, request a sparse reservation that avoids reserving swap/commit + /// upfront when the platform supports it. /// - `fixed_address_behavior`: Specifies the required semantics of `suggested_range`. /// /// # Returns @@ -64,6 +66,7 @@ pub trait PageManagementProvider: RawPointerProvider { initial_permissions: MemoryRegionPermissions, can_grow_down: bool, populate_pages_immediately: bool, + noreserve: bool, fixed_address_behavior: FixedAddressBehavior, ) -> Result, AllocationError>; @@ -108,6 +111,7 @@ pub trait PageManagementProvider: RawPointerProvider { temp_permissions, false, true, + false, FixedAddressBehavior::NoReplace, ) .map_err(|e| match e { @@ -135,12 +139,13 @@ pub trait PageManagementProvider: RawPointerProvider { let total_len = old_range.len(); let mut offset = 0; while offset < total_len { + let chunk_len = (total_len - offset).min(ALIGN); let old_ptr = ::RawConstPointer::from_usize(old_range.start + offset); new_ptr .write_slice_at_offset( isize::try_from(offset).unwrap(), - &old_ptr.to_owned_slice(old_range.len()).unwrap(), + &old_ptr.to_owned_slice(chunk_len).unwrap(), ) .unwrap(); offset += ALIGN; @@ -148,7 +153,7 @@ pub trait PageManagementProvider: RawPointerProvider { if temp_permissions != permissions { (unsafe { self.update_permissions(new_range.clone(), permissions) }) - .expect("failed to restore perrmissions on new range"); + .expect("failed to restore permissions on new range"); } (unsafe { self.deallocate_pages(old_range) }).expect("failed to deallocate old range"); diff --git a/litebox_platform_linux_kernel/src/arch/x86/mm/paging.rs b/litebox_platform_linux_kernel/src/arch/x86/mm/paging.rs index 210b51f14..1d7433447 100644 --- a/litebox_platform_linux_kernel/src/arch/x86/mm/paging.rs +++ b/litebox_platform_linux_kernel/src/arch/x86/mm/paging.rs @@ -2,28 +2,28 @@ // Licensed under the MIT license. use litebox::mm::linux::{PageFaultError, PageRange, VmFlags, VmemPageFaultHandler}; -use litebox::platform::{RawConstPointer as _, page_mgmt}; +use litebox::platform::{page_mgmt, RawConstPointer as _}; use x86_64::{ - PhysAddr, VirtAddr, structures::{ idt::PageFaultErrorCode, paging::{ - FrameAllocator, FrameDeallocator, MappedPageTable, Mapper, Page, PageSize, PageTable, - PageTableFlags, PhysFrame, Size4KiB, Translate, mapper::{ FlagUpdateError, MapToError, PageTableFrameMapping, TranslateResult, UnmapError as X64UnmapError, }, + FrameAllocator, FrameDeallocator, MappedPageTable, Mapper, Page, PageSize, PageTable, + PageTableFlags, PhysFrame, Size4KiB, Translate, }, }, + PhysAddr, VirtAddr, }; use crate::{ - UserMutPtr, mm::{ - MemoryProvider, pgtable::{PageTableAllocator, PageTableImpl}, + MemoryProvider, }, + UserMutPtr, }; #[cfg(not(test))] diff --git a/litebox_platform_linux_kernel/src/host/snp/ghcb.rs b/litebox_platform_linux_kernel/src/host/snp/ghcb.rs index 6ac0b7fe3..541301eaa 100644 --- a/litebox_platform_linux_kernel/src/host/snp/ghcb.rs +++ b/litebox_platform_linux_kernel/src/host/snp/ghcb.rs @@ -4,8 +4,8 @@ use litebox::utils::TruncateExt as _; use crate::arch::{ - PhysAddr, VirtAddr, instructions::{rdmsr, vc_vmgexit, wrmsr}, + PhysAddr, VirtAddr, }; // GHCB MSR @@ -63,7 +63,11 @@ fn ghcb_msr_call(request: u64) -> u64 { } fn num_to_char(n: u8) -> u8 { - if n < 10 { n + b'0' } else { n - 10 + b'a' } + if n < 10 { + n + b'0' + } else { + n - 10 + b'a' + } } pub fn num_to_buf(buf: &mut [u8; 40], mut n: u64, base: u64) -> usize { diff --git a/litebox_platform_linux_kernel/src/host/snp/snp_impl.rs b/litebox_platform_linux_kernel/src/host/snp/snp_impl.rs index 19673641a..2ea23e200 100644 --- a/litebox_platform_linux_kernel/src/host/snp/snp_impl.rs +++ b/litebox_platform_linux_kernel/src/host/snp/snp_impl.rs @@ -2,7 +2,7 @@ // Licensed under the MIT license. //! An implementation of [`HostInterface`] for SNP VMM -use ::alloc::boxed::Box; +use alloc::boxed::Box; use core::{ arch::asm, cell::{Cell, OnceCell}, @@ -38,8 +38,8 @@ type ArgsArray = [u64; MAX_ARGS_SIZE]; #[cfg(not(test))] mod alloc { - use crate::HostInterface; use crate::mm::MemoryProvider; + use crate::HostInterface; use litebox::utils::TruncateExt as _; const HEAP_ORDER: usize = super::bindings::SNP_VMPL_ALLOC_MAX_ORDER as usize + 12 + 1; diff --git a/litebox_platform_linux_kernel/src/lib.rs b/litebox_platform_linux_kernel/src/lib.rs index 12f6bc2d1..999b79d9b 100644 --- a/litebox_platform_linux_kernel/src/lib.rs +++ b/litebox_platform_linux_kernel/src/lib.rs @@ -10,15 +10,15 @@ use core::sync::atomic::AtomicU64; use core::{arch::asm, sync::atomic::AtomicU32}; use litebox::mm::linux::PageRange; -use litebox::platform::RawPointerProvider; use litebox::platform::page_mgmt::FixedAddressBehavior; +use litebox::platform::RawPointerProvider; use litebox::platform::{ DebugLogProvider, IPInterfaceProvider, ImmediatelyWokenUp, PageManagementProvider, Provider, Punchthrough, PunchthroughProvider, PunchthroughToken, RawMutexProvider, TimeProvider, UnblockedOrTimedOut, }; -use litebox_common_linux::PunchthroughSyscall; use litebox_common_linux::errno::Errno; +use litebox_common_linux::PunchthroughSyscall; extern crate alloc; @@ -424,6 +424,7 @@ impl PageManagementProvider for initial_permissions: litebox::platform::page_mgmt::MemoryRegionPermissions, can_grow_down: bool, populate_pages_immediately: bool, + _noreserve: bool, fixed_address_behavior: FixedAddressBehavior, ) -> Result, litebox::platform::page_mgmt::AllocationError> { let range = PageRange::new(suggested_range.start, suggested_range.end) diff --git a/litebox_platform_linux_kernel/src/mm/tests.rs b/litebox_platform_linux_kernel/src/mm/tests.rs index 3987b530b..740474298 100644 --- a/litebox_platform_linux_kernel/src/mm/tests.rs +++ b/litebox_platform_linux_kernel/src/mm/tests.rs @@ -7,29 +7,28 @@ use alloc::vec; use alloc::vec::Vec; use arrayvec::ArrayVec; use litebox::{ - LiteBox, mm::{ - PageManager, allocator::SafeZoneAllocator, linux::{ - CreatePagesFlags, NonZeroAddress, NonZeroPageSize, PAGE_SIZE, PageFaultError, - PageRange, VmFlags, + CreatePagesFlags, NonZeroAddress, NonZeroPageSize, PageFaultError, PageRange, VmFlags, + PAGE_SIZE, }, + PageManager, }, platform::RawConstPointer, + LiteBox, }; use spin::mutex::SpinMutex; use crate::{ - HostInterface, UserMutPtr, arch::{ + mm::paging::{vmflags_to_pteflags, X64PageTable}, MappedFrame, Page, PageFaultErrorCode, PageTableFlags, PhysAddr, Size4KiB, TranslateResult, VirtAddr, - mm::paging::{X64PageTable, vmflags_to_pteflags}, }, host::mock::{MockHostInterface, MockKernel}, - mm::{MemoryProvider, pgtable::PageTableAllocator}, - mock_log_println, + mm::{pgtable::PageTableAllocator, MemoryProvider}, + mock_log_println, HostInterface, UserMutPtr, }; use super::pgtable::PageTableImpl; @@ -161,14 +160,12 @@ fn test_page_table() { let new_vmflags = VmFlags::empty(); let new_pteflags = vmflags_to_pteflags(new_vmflags) | PageTableFlags::PRESENT; unsafe { - assert!( - pgtable - .mprotect_pages( - PageRange::new(start_addr + 2 * PAGE_SIZE, start_addr + 6 * PAGE_SIZE).unwrap(), - new_vmflags - ) - .is_ok() - ); + assert!(pgtable + .mprotect_pages( + PageRange::new(start_addr + 2 * PAGE_SIZE, start_addr + 6 * PAGE_SIZE).unwrap(), + new_vmflags + ) + .is_ok()); } for page in PageRange::::new(start_addr, start_addr + 2 * PAGE_SIZE).unwrap() { check_flags(&pgtable, page, pteflags); @@ -182,14 +179,12 @@ fn test_page_table() { // remap pages let new_addr: usize = 0x20_1000; unsafe { - assert!( - pgtable - .remap_pages( - PageRange::new(start_addr, start_addr + 2 * PAGE_SIZE).unwrap(), - PageRange::new(new_addr, new_addr + 2 * PAGE_SIZE).unwrap() - ) - .is_ok() - ); + assert!(pgtable + .remap_pages( + PageRange::new(start_addr, start_addr + 2 * PAGE_SIZE).unwrap(), + PageRange::new(new_addr, new_addr + 2 * PAGE_SIZE).unwrap() + ) + .is_ok()); } for page in PageRange::::new(start_addr, start_addr + 2 * PAGE_SIZE).unwrap() { assert!(matches!( @@ -246,15 +241,13 @@ fn test_vmm_page_fault() { )); // Access non-present page w/ mapping - assert!( - unsafe { - vmm.handle_page_fault( - start_addr + 2 * PAGE_SIZE, - PageFaultErrorCode::USER_MODE.bits(), - ) - } - .is_ok() - ); + assert!(unsafe { + vmm.handle_page_fault( + start_addr + 2 * PAGE_SIZE, + PageFaultErrorCode::USER_MODE.bits(), + ) + } + .is_ok()); // insert stack mapping let stack_addr: usize = 0x1000_0000; @@ -272,12 +265,10 @@ fn test_vmm_page_fault() { } // [0x1_0000, 0x1_4000), [0x1000_0000, 0x1000_4000) // Test stack growth - assert!( - unsafe { - vmm.handle_page_fault(stack_addr - PAGE_SIZE, PageFaultErrorCode::USER_MODE.bits()) - } - .is_ok() - ); + assert!(unsafe { + vmm.handle_page_fault(stack_addr - PAGE_SIZE, PageFaultErrorCode::USER_MODE.bits()) + } + .is_ok()); assert_eq!( vmm.mappings() .iter() diff --git a/litebox_platform_linux_userland/src/lib.rs b/litebox_platform_linux_userland/src/lib.rs index 871c4ccdd..986818829 100644 --- a/litebox_platform_linux_userland/src/lib.rs +++ b/litebox_platform_linux_userland/src/lib.rs @@ -1728,6 +1728,7 @@ impl litebox::platform::PageManagementProvider for Li initial_permissions: MemoryRegionPermissions, can_grow_down: bool, populate_pages_immediately: bool, + noreserve: bool, fixed_address_behavior: FixedAddressBehavior, ) -> Result, litebox::platform::page_mgmt::AllocationError> { let flags = MapFlags::MAP_PRIVATE @@ -1746,6 +1747,11 @@ impl litebox::platform::PageManagementProvider for Li MapFlags::MAP_POPULATE } else { MapFlags::empty() + } + | if noreserve { + MapFlags::MAP_NORESERVE + } else { + MapFlags::empty() }; let r = unsafe { syscalls::syscall6( diff --git a/litebox_platform_lvbs/src/lib.rs b/litebox_platform_lvbs/src/lib.rs index 2487086d7..4a615ab99 100644 --- a/litebox_platform_lvbs/src/lib.rs +++ b/litebox_platform_lvbs/src/lib.rs @@ -1228,6 +1228,7 @@ impl PageManagementProvider for initial_permissions: litebox::platform::page_mgmt::MemoryRegionPermissions, can_grow_down: bool, populate_pages_immediately: bool, + _noreserve: bool, fixed_address_behavior: FixedAddressBehavior, ) -> Result, litebox::platform::page_mgmt::AllocationError> { let range = PageRange::new(suggested_range.start, suggested_range.end) diff --git a/litebox_platform_windows_userland/src/lib.rs b/litebox_platform_windows_userland/src/lib.rs index b43f2f790..cba82e382 100644 --- a/litebox_platform_windows_userland/src/lib.rs +++ b/litebox_platform_windows_userland/src/lib.rs @@ -1661,6 +1661,7 @@ impl litebox::platform::PageManagementProvider for Wi initial_permissions: MemoryRegionPermissions, can_grow_down: bool, populate_pages_immediately: bool, + _noreserve: bool, fixed_address_behavior: FixedAddressBehavior, ) -> Result, AllocationError> { debug_assert!(ALIGN.is_multiple_of(self.sys_info.read().unwrap().dwPageSize as usize)); @@ -2180,6 +2181,7 @@ mod tests { MemoryRegionPermissions::WRITE, false, true, + false, FixedAddressBehavior::Hint, ) .unwrap() @@ -2206,6 +2208,7 @@ mod tests { MemoryRegionPermissions::WRITE, false, true, + false, FixedAddressBehavior::Hint, ) .unwrap() @@ -2238,6 +2241,7 @@ mod tests { MemoryRegionPermissions::WRITE, false, true, + false, FixedAddressBehavior::Hint, ) .unwrap() From 7c4d62ea879655ff347014673266b17f71b46ae9 Mon Sep 17 00:00:00 2001 From: Weidong Cui Date: Mon, 6 Apr 2026 20:50:55 -0700 Subject: [PATCH 19/24] Misc cleanup: pub(crate) mock fields, log_println allow, current_processor_number, stdin guard Small independent cleanups: - Make MockInstant::time and MockSystemTime::time pub(crate) so tests in other modules can construct instances with specific values. - Add #[allow(unused_imports)] to log_println! macro use-statements to suppress warnings when the macro is invoked in contexts where the import is redundant. - Add SystemInfoProvider::current_processor_number() default method returning 0, for platforms that don't expose processor topology. - Add empty-buffer early return in MockPlatform::read_from_stdin() to avoid unnecessarily consuming queued input on zero-length reads. --- litebox/src/platform/mock.rs | 7 +++++-- litebox/src/platform/mod.rs | 12 ++++++++++++ 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/litebox/src/platform/mock.rs b/litebox/src/platform/mock.rs index 3a3297aa6..332d41440 100644 --- a/litebox/src/platform/mock.rs +++ b/litebox/src/platform/mock.rs @@ -210,7 +210,7 @@ impl IPInterfaceProvider for MockPlatform { #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] pub(crate) struct MockInstant { - time: u64, + pub(crate) time: u64, } impl Instant for MockInstant { @@ -230,7 +230,7 @@ impl Instant for MockInstant { } pub(crate) struct MockSystemTime { - time: u64, + pub(crate) time: u64, } impl SystemTime for MockSystemTime { @@ -290,6 +290,9 @@ impl RawPointerProvider for MockPlatform { impl StdioProvider for MockPlatform { fn read_from_stdin(&self, buf: &mut [u8]) -> Result { + if buf.is_empty() { + return Ok(0); + } let Some(front) = self.stdin_queue.write().unwrap().pop_front() else { return Err(StdioReadError::Closed); }; diff --git a/litebox/src/platform/mod.rs b/litebox/src/platform/mod.rs index 983f20c84..4fa88066e 100644 --- a/litebox/src/platform/mod.rs +++ b/litebox/src/platform/mod.rs @@ -23,11 +23,13 @@ pub use page_mgmt::PageManagementProvider; #[macro_export] macro_rules! log_println { ($platform:expr, $s:expr) => {{ + #[allow(unused_imports)] use $crate::platform::DebugLogProvider as _; $platform.debug_log_print($s); }}; ($platform:expr, $($tt:tt)*) => {{ use core::fmt::Write as _; + #[allow(unused_imports)] use $crate::platform::DebugLogProvider as _; let mut t: arrayvec::ArrayString<8192> = arrayvec::ArrayString::new(); writeln!(t, $($tt)*).unwrap(); @@ -666,6 +668,16 @@ pub trait SystemInfoProvider { /// Return `Some(address)` if the VDSO is available on the platform, or `None` /// if the platform does not support or provide a VDSO. fn get_vdso_address(&self) -> Option; + + /// Returns the current processor number, used to emulate `getcpu`-family + /// syscalls and related VDSO interfaces. + /// + /// Platforms that do not expose a stable processor identifier, or that + /// virtualize CPU topology, may return `0`. Callers arrive in subsequent + /// stacked PRs. + fn current_processor_number(&self) -> u32 { + 0 + } } /// A provider for thread-local storage. From eedd3b902de2b442abe4ca4a9c72fdd466940793 Mon Sep 17 00:00:00 2001 From: Weidong Cui Date: Mon, 6 Apr 2026 20:58:07 -0700 Subject: [PATCH 20/24] Add RawMessageProvider trait and expand SendError/ReceiveError variants Add SendError::Io(i32) and ReceiveError::{ProtocolError, Eof} variants. Introduce RawMessageProvider trait for direct guest-broker byte-stream messaging (bypassing IP stack), with default stubs. Add Provider supertrait bound. Implement (empty defaults) for all platform crates. Update net/phy.rs to match on new error variants. --- litebox/src/net/phy.rs | 6 ++- litebox/src/platform/mock.rs | 2 + litebox/src/platform/mod.rs | 44 ++++++++++++++++++-- litebox_platform_linux_kernel/src/lib.rs | 2 + litebox_platform_linux_userland/src/lib.rs | 2 + litebox_platform_lvbs/src/lib.rs | 2 + litebox_platform_windows_userland/src/lib.rs | 2 + 7 files changed, 56 insertions(+), 4 deletions(-) diff --git a/litebox/src/net/phy.rs b/litebox/src/net/phy.rs index ff4d85c6e..e8ebc4beb 100644 --- a/litebox/src/net/phy.rs +++ b/litebox/src/net/phy.rs @@ -51,7 +51,11 @@ impl smoltcp::phy::Device for Device None, + Err( + platform::ReceiveError::WouldBlock + | platform::ReceiveError::ProtocolError + | platform::ReceiveError::Eof, + ) => None, } } diff --git a/litebox/src/platform/mock.rs b/litebox/src/platform/mock.rs index 332d41440..f8b20137a 100644 --- a/litebox/src/platform/mock.rs +++ b/litebox/src/platform/mock.rs @@ -59,6 +59,8 @@ impl MockPlatform { impl Provider for MockPlatform {} +impl RawMessageProvider for MockPlatform {} + pub(crate) struct MockRawMutex { inner: AtomicU32, internal_state: std::sync::RwLock, diff --git a/litebox/src/platform/mod.rs b/litebox/src/platform/mod.rs index 4fa88066e..4e7a6d202 100644 --- a/litebox/src/platform/mod.rs +++ b/litebox/src/platform/mod.rs @@ -45,6 +45,7 @@ macro_rules! log_println { pub trait Provider: RawMutexProvider + IPInterfaceProvider + + RawMessageProvider + TimeProvider + PunchthroughProvider + DebugLogProvider @@ -384,17 +385,54 @@ pub trait IPInterfaceProvider { fn receive_ip_packet(&self, packet: &mut [u8]) -> Result; } -/// A non-exhaustive list of errors that can be thrown by [`IPInterfaceProvider::send_ip_packet`]. +/// Errors from send operations on [`IPInterfaceProvider`] and [`RawMessageProvider`]. #[derive(Error, Debug)] #[non_exhaustive] -pub enum SendError {} +pub enum SendError { + /// The underlying device returned an I/O error. The packet was not sent. + #[error("I/O error on send: errno {0}")] + Io(i32), + /// The channel is not available on this platform. + #[error("send channel unavailable")] + Unavailable, +} -/// A non-exhaustive list of errors that can be thrown by [`IPInterfaceProvider::receive_ip_packet`]. +/// Errors from receive operations on [`IPInterfaceProvider`] and [`RawMessageProvider`]. #[derive(Error, Debug)] #[non_exhaustive] pub enum ReceiveError { #[error("Receive operation would block")] WouldBlock, + #[error("IPC protocol error: oversized frame")] + ProtocolError, + #[error("Channel closed (EOF)")] + Eof, +} + +/// A raw byte-stream channel for direct message passing between the guest and +/// the host (bypassing the IP network stack). +/// +/// When available, this provides a fast path for protocols like 9P that would +/// otherwise pay the overhead of traversing two smoltcp stacks. +/// +/// The default implementation returns [`ReceiveError::WouldBlock`] / +/// [`SendError::Unavailable`], indicating the channel is not available. +/// Platforms that support direct messaging override these methods. +pub trait RawMessageProvider { + /// Send bytes to the host over the raw channel. + /// + /// Returns `Ok(n)` with the number of bytes sent, or an error. + fn send_raw_message(&self, _data: &[u8]) -> Result { + Err(SendError::Unavailable) + } + + /// Receive bytes from the host over the raw channel. + /// + /// Returns `Ok(n)` with the number of bytes read into `buf`, or + /// [`ReceiveError::WouldBlock`] if no data is available yet. + fn recv_raw_message(&self, _buf: &mut [u8]) -> Result { + Err(ReceiveError::WouldBlock) + } } /// An interface to understanding time. diff --git a/litebox_platform_linux_kernel/src/lib.rs b/litebox_platform_linux_kernel/src/lib.rs index 999b79d9b..42ca7bf81 100644 --- a/litebox_platform_linux_kernel/src/lib.rs +++ b/litebox_platform_linux_kernel/src/lib.rs @@ -85,6 +85,8 @@ impl<'a, Host: HostInterface> PunchthroughToken for LinuxPunchthroughToken<'a, H impl Provider for LinuxKernel {} +impl litebox::platform::RawMessageProvider for LinuxKernel {} + // TODO: implement pointer validation to ensure the pointers are in user space. type UserConstPtr = litebox::platform::common_providers::userspace_pointers::UserConstPtr< litebox::platform::common_providers::userspace_pointers::NoValidation, diff --git a/litebox_platform_linux_userland/src/lib.rs b/litebox_platform_linux_userland/src/lib.rs index 986818829..6610391b1 100644 --- a/litebox_platform_linux_userland/src/lib.rs +++ b/litebox_platform_linux_userland/src/lib.rs @@ -445,6 +445,8 @@ impl LinuxUserland { impl litebox::platform::Provider for LinuxUserland {} +impl litebox::platform::RawMessageProvider for LinuxUserland {} + impl litebox::platform::SignalProvider for LinuxUserland { type Signal = litebox_common_linux::signal::Signal; diff --git a/litebox_platform_lvbs/src/lib.rs b/litebox_platform_lvbs/src/lib.rs index 4a615ab99..cda50d63e 100644 --- a/litebox_platform_lvbs/src/lib.rs +++ b/litebox_platform_lvbs/src/lib.rs @@ -1352,6 +1352,8 @@ impl litebox::platform::SystemInfoProvider for LinuxKernel< } } +impl litebox::platform::RawMessageProvider for LinuxKernel {} + #[cfg(feature = "optee_syscall")] /// Checks whether the given physical addresses are contiguous with respect to ALIGN. fn is_contiguous(addrs: &[PhysPageAddr]) -> bool { diff --git a/litebox_platform_windows_userland/src/lib.rs b/litebox_platform_windows_userland/src/lib.rs index cba82e382..b68f7c265 100644 --- a/litebox_platform_windows_userland/src/lib.rs +++ b/litebox_platform_windows_userland/src/lib.rs @@ -331,6 +331,8 @@ impl WindowsUserland { } } +impl litebox::platform::RawMessageProvider for WindowsUserland {} + impl litebox::platform::Provider for WindowsUserland {} impl litebox::platform::SignalProvider for WindowsUserland { From 88a6274ec827ec23ff1d6e46178c83e600fe5cec Mon Sep 17 00:00:00 2001 From: Weidong Cui Date: Mon, 6 Apr 2026 21:04:43 -0700 Subject: [PATCH 21/24] Expand StdioProvider with terminal types and nonblocking stdin Add StdioReadError::WouldBlock, StdioIoctlError, TerminalAttributes, WindowSize, SetTermiosWhen, and HostTtyDeviceInfo types. Add read_from_stdin_nonblocking, get/set_terminal_attributes, get/set_window_size, get_terminal_input_bytes, poll_stdin_readable, cancel_stdin, and host_stdin_tty_device_info methods to StdioProvider with sensible defaults. Add mock implementations for get_terminal_input_bytes and poll_stdin_readable. Add test for nonblocking stdin reads. --- litebox/src/fs/devices.rs | 13 ++- litebox/src/platform/mock.rs | 43 +++++++ litebox/src/platform/mod.rs | 216 +++++++++++++++++++++++++++++++++++ 3 files changed, 266 insertions(+), 6 deletions(-) diff --git a/litebox/src/fs/devices.rs b/litebox/src/fs/devices.rs index 90e715230..a8851d18e 100644 --- a/litebox/src/fs/devices.rs +++ b/litebox/src/fs/devices.rs @@ -8,16 +8,16 @@ use alloc::string::String; use crate::{ - LiteBox, fs::{ - FileStatus, FileType, Mode, NodeInfo, OFlags, SeekWhence, UserInfo, errors::{ ChmodError, ChownError, CloseError, FileStatusError, MkdirError, OpenError, PathError, ReadDirError, ReadError, RmdirError, SeekError, TruncateError, UnlinkError, WriteError, }, + FileStatus, FileType, Mode, NodeInfo, OFlags, SeekWhence, UserInfo, }, path::Arg, platform::{StdioOutStream, StdioReadError, StdioWriteError}, + LiteBox, }; /// Block size for stdio devices @@ -145,10 +145,10 @@ impl super::FileSystem for FileSystem + Platform: crate::sync::RawSyncPrimitivesProvider + + crate::platform::StdioProvider + + crate::platform::CrngProvider, + > super::FileSystem for FileSystem { fn open( &self, @@ -254,6 +254,7 @@ impl< .read_from_stdin(buf) .map_err(|e| match e { StdioReadError::Closed => unimplemented!(), + StdioReadError::WouldBlock => unimplemented!(), }) } diff --git a/litebox/src/platform/mock.rs b/litebox/src/platform/mock.rs index f8b20137a..994acf86c 100644 --- a/litebox/src/platform/mock.rs +++ b/litebox/src/platform/mock.rs @@ -323,6 +323,26 @@ impl StdioProvider for MockPlatform { fn is_a_tty(&self, _stream: StdioStream) -> bool { false } + + fn get_terminal_input_bytes(&self, stream: StdioStream) -> Result { + match stream { + StdioStream::Stdin => { + let len = self + .stdin_queue + .read() + .unwrap() + .iter() + .map(std::vec::Vec::len) + .sum::(); + Ok(u32::try_from(len).unwrap_or(u32::MAX)) + } + StdioStream::Stdout | StdioStream::Stderr => Err(StdioIoctlError::NotATerminal), + } + } + + fn poll_stdin_readable(&self) -> bool { + self.stdin_queue.read().unwrap().front().is_some() + } } impl CrngProvider for MockPlatform { @@ -338,6 +358,29 @@ impl CrngProvider for MockPlatform { } } +#[cfg(test)] +mod tests { + use super::{MockPlatform, StdioProvider}; + + #[test] + fn nonblocking_stdin_reads_queued_input() { + let platform = MockPlatform::new(); + platform + .stdin_queue + .write() + .unwrap() + .push_back(b"ready".to_vec()); + + let mut buf = [0u8; 8]; + let read = platform + .read_from_stdin_nonblocking(&mut buf) + .expect("queued stdin should not block"); + + assert_eq!(read, 5); + assert_eq!(&buf[..read], b"ready"); + } +} + std::thread_local! { static MOCK_TLS: core::cell::Cell<*mut()> = const { core::cell::Cell::new(core::ptr::null_mut()) }; } diff --git a/litebox/src/platform/mod.rs b/litebox/src/platform/mod.rs index 4e7a6d202..0b2885e1e 100644 --- a/litebox/src/platform/mod.rs +++ b/litebox/src/platform/mod.rs @@ -651,6 +651,8 @@ where pub enum StdioReadError { #[error("input stream has been closed")] Closed, + #[error("input would block")] + WouldBlock, } /// A non-exhaustive list of errors that can be thrown by [`StdioProvider::write_to`]. @@ -681,16 +683,230 @@ pub enum StdioStream { Stderr = 2, } +/// Errors from terminal operations on [`StdioProvider`]. +#[derive(Error, Debug)] +#[non_exhaustive] +pub enum StdioIoctlError { + /// The stream is not a terminal. + #[error("not a terminal")] + NotATerminal, + /// The operation failed with an OS error code (errno on Linux, mapped + /// equivalent on other platforms). + #[error("ioctl failed: {0}")] + OsError(i32), +} + +/// Platform-agnostic terminal attributes, mirroring the fields of Linux +/// `struct termios`. +/// +/// The shim layer translates these fields to and from the guest ABI. +/// Platform implementations fill this struct using their native APIs (e.g., +/// direct ioctl forwarding on Linux, `GetConsoleMode`/`SetConsoleMode` on +/// Windows). +#[derive(Debug, Clone)] +pub struct TerminalAttributes { + /// Input mode flags. + pub c_iflag: u32, + /// Output mode flags. + pub c_oflag: u32, + /// Control mode flags. + pub c_cflag: u32, + /// Local mode flags. + pub c_lflag: u32, + /// Line discipline (typically `0` for `N_TTY`). + pub c_line: u8, + /// Control characters. + pub c_cc: [u8; 19], +} + +// Terminal attribute flag constants. +const TERMATTR_ECHO: u32 = 0x0008; +const TERMATTR_ICRNL: u32 = 0x0100; +const TERMATTR_OPOST: u32 = 0x0001; +const TERMATTR_ONLCR: u32 = 0x0004; + +impl TerminalAttributes { + /// Default terminal attributes matching a freshly opened Linux PTY. + /// + /// These are realistic values that satisfy terminal detection in programs + /// such as Node.js Ink. **All-zero termios causes such programs to reject + /// the terminal silently.** + pub fn new_default() -> Self { + Self { + c_iflag: 0x6d02, // ICRNL | IXON | IXANY | IMAXBEL | IUTF8 + c_oflag: 0x0005, // OPOST | ONLCR + c_cflag: 0x04bf, // CS8 | CREAD | CLOCAL | B38400 + c_lflag: 0x8a3b, // ECHO | ECHOE | ECHOK | ISIG | ICANON | IEXTEN | ECHOCTL | ECHOKE + c_line: 0, // N_TTY + c_cc: [ + 0x03, 0x1c, 0x7f, 0x15, 0x04, 0x00, 0x01, 0x00, 0x11, 0x13, 0x1a, 0xff, 0x12, 0x0f, + 0x17, 0x16, 0xff, 0x00, 0x00, + ], + } + } + + /// Returns `true` if the `ECHO` local flag is set. + pub fn echo_enabled(&self) -> bool { + self.c_lflag & TERMATTR_ECHO != 0 + } + + /// Returns `true` if the `ICRNL` input flag is set. + pub fn icrnl_enabled(&self) -> bool { + self.c_iflag & TERMATTR_ICRNL != 0 + } + + /// Returns `true` if output post-processing with newline translation + /// (`OPOST | ONLCR`) is enabled. + pub fn onlcr_enabled(&self) -> bool { + (self.c_oflag & TERMATTR_OPOST != 0) && (self.c_oflag & TERMATTR_ONLCR != 0) + } +} + +/// Platform-agnostic terminal window size. +#[derive(Debug, Clone, Copy)] +pub struct WindowSize { + /// Number of rows (height in characters). + pub rows: u16, + /// Number of columns (width in characters). + pub cols: u16, + /// Horizontal size in pixels (informational, often zero). + pub xpixel: u16, + /// Vertical size in pixels (informational, often zero). + pub ypixel: u16, +} + +/// When to apply terminal attribute changes, corresponding to POSIX +/// `tcsetattr()` actions. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum SetTermiosWhen { + /// Apply immediately. + Now, + /// Drain output first, then apply. + AfterDrain, + /// Drain output first, flush pending input, then apply. + AfterDrainFlushInput, +} + /// A provider of standard input/output functionality. pub trait StdioProvider { /// Read from standard input. Returns number of bytes read. fn read_from_stdin(&self, buf: &mut [u8]) -> Result; + /// Read from standard input without blocking. + /// + /// Platforms with exact nonblocking stdin support should override this + /// instead of emulating it with a separate readiness probe. + fn read_from_stdin_nonblocking(&self, buf: &mut [u8]) -> Result { + if buf.is_empty() { + return Ok(0); + } + if !self.poll_stdin_readable() { + return Err(StdioReadError::WouldBlock); + } + self.read_from_stdin(buf) + } + /// Write to stdout/stderr. Returns number of bytes written. fn write_to(&self, stream: StdioOutStream, buf: &[u8]) -> Result; /// Check if a stream is connected to a TTY. fn is_a_tty(&self, stream: StdioStream) -> bool; + + /// Get the terminal attributes for a stdio stream. + /// + /// Platform implementations query the host terminal and populate a + /// [`TerminalAttributes`] struct. The default returns + /// [`StdioIoctlError::NotATerminal`]. + fn get_terminal_attributes( + &self, + _stream: StdioStream, + ) -> Result { + Err(StdioIoctlError::NotATerminal) + } + + /// Set the terminal attributes for a stdio stream. + /// + /// Platform implementations translate the requested attributes into native + /// terminal API calls. The default returns + /// [`StdioIoctlError::NotATerminal`]. + fn set_terminal_attributes( + &self, + _stream: StdioStream, + _attrs: &TerminalAttributes, + _when: SetTermiosWhen, + ) -> Result<(), StdioIoctlError> { + Err(StdioIoctlError::NotATerminal) + } + + /// Get the terminal window size for a stdio stream. + /// + /// The default returns [`StdioIoctlError::NotATerminal`]. + fn get_window_size(&self, _stream: StdioStream) -> Result { + Err(StdioIoctlError::NotATerminal) + } + + /// Get the number of input bytes currently readable from a terminal stream. + /// + /// Platforms that do not support terminal input-queue queries may return + /// [`StdioIoctlError::NotATerminal`]. + fn get_terminal_input_bytes(&self, _stream: StdioStream) -> Result { + Err(StdioIoctlError::NotATerminal) + } + + /// Set the terminal window size for a stdio stream. + /// + /// On some platforms this stores the size so that subsequent + /// `get_window_size` calls return the stored value (the actual console + /// is not resized). The default returns + /// [`StdioIoctlError::NotATerminal`]. + fn set_window_size( + &self, + _stream: StdioStream, + _size: &WindowSize, + ) -> Result<(), StdioIoctlError> { + Err(StdioIoctlError::NotATerminal) + } + + /// Check if stdin has data available for reading without blocking. + /// + /// Returns `true` if a `read()` on stdin would return data immediately. + /// Used by epoll/poll to report stdin readability. The default returns + /// `false`. + fn poll_stdin_readable(&self) -> bool { + false + } + + /// Cancel any pending `read_from_stdin()` call, causing it to return + /// [`StdioReadError::Closed`]. Used during process exit to unblock + /// threads waiting on stdin. The default is a no-op. + fn cancel_stdin(&self) {} + + /// Returns the host terminal device identity for stdin, if it is + /// connected to a real terminal. + /// + /// Used to report correct device info in guest-visible stat and readlink + /// operations, so that runtimes can discover and reopen the controlling + /// terminal by its actual device path. + /// + /// Returns `None` when stdin is not a terminal (pipes, files) or on + /// platforms that do not expose terminal device paths. + fn host_stdin_tty_device_info(&self) -> Option { + None + } +} + +/// Host terminal device identity, returned by +/// [`StdioProvider::host_stdin_tty_device_info`]. +#[derive(Debug, Clone)] +pub struct HostTtyDeviceInfo { + /// Device path on the host (e.g., a PTY path on Linux). + pub path: alloc::string::String, + /// Device number encoding (major/minor) from the host. + pub rdev: u64, + /// Device ID of the filesystem containing the device node. + pub dev: u64, + /// Inode number of the device node on the host. + pub ino: u64, } /// A provider for system information. From 95859329b5cc2845ff37a5fbb0fff0fd48033cc0 Mon Sep 17 00:00:00 2001 From: Weidong Cui Date: Mon, 6 Apr 2026 21:10:26 -0700 Subject: [PATCH 22/24] Add AddressSpaceProvider trait for multi-process address space management Introduce address_space.rs with AddressSpaceProvider trait, ForkedAddressSpace enum, and AddressSpaceError. The trait defines create/destroy/fork/activate/with_address_space operations with NotSupported defaults for platforms that lack multi-process support. Add Provider supertrait bound. Implement (with defaults) for all platform crates and mock. --- litebox/src/platform/address_space.rs | 137 +++++++++++++++++++ litebox/src/platform/mock.rs | 6 + litebox/src/platform/mod.rs | 3 + litebox_platform_linux_kernel/src/lib.rs | 4 + litebox_platform_linux_userland/src/lib.rs | 4 + litebox_platform_lvbs/src/lib.rs | 6 + litebox_platform_windows_userland/src/lib.rs | 4 + 7 files changed, 164 insertions(+) create mode 100644 litebox/src/platform/address_space.rs diff --git a/litebox/src/platform/address_space.rs b/litebox/src/platform/address_space.rs new file mode 100644 index 000000000..dc401e90d --- /dev/null +++ b/litebox/src/platform/address_space.rs @@ -0,0 +1,137 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT license. + +//! Address-space management types and traits for multi-process support. +//! +//! The [`AddressSpaceProvider`] trait is an **optional** South interface that +//! platforms implement to manage per-process address spaces. Platforms may use +//! separate page tables, VA-range partitioning, or other techniques to isolate +//! address spaces. + +use core::ops::Range; +use thiserror::Error; + +/// The result of forking an address space. +/// +/// The variant tells the caller what kind of copy was created so it can adjust +/// its behavior (e.g., whether to copy page contents or share them). +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum ForkedAddressSpace { + /// Independent copy-on-write copy with the full address range. The child + /// has its own backing structures; CoW faults are resolved by the + /// platform. + Independent(Id), + /// A new VA-range partition is assigned to the child. Parent memory is + /// shared; the shim is responsible for copying pages as needed. + SharedWithParent(Id), +} + +/// Errors that can occur during address-space operations. +#[derive(Error, Debug)] +#[non_exhaustive] +pub enum AddressSpaceError { + /// No free address-space slots or VA ranges available. + #[error("no address space slots available")] + NoSpace, + /// The given address-space ID is not valid (already destroyed, never + /// created, etc.). + #[error("invalid address space id")] + InvalidId, + /// The platform does not support this operation. + #[error("operation not supported by this platform")] + NotSupported, +} + +/// A provider for managing per-process address spaces. +/// +/// This is an **optional** trait — platforms that do not yet support +/// multi-process may leave all methods at the default (which returns +/// [`AddressSpaceError::NotSupported`]). +/// +/// # Associated Type +/// +/// `AddressSpaceId` is an opaque, lightweight handle that identifies one +/// address space. It must be `Copy + Eq + Send + Sync` so it can be stored +/// inside process contexts and passed across threads. +pub trait AddressSpaceProvider { + /// Opaque identifier for an address space. + type AddressSpaceId: Copy + Eq + Send + Sync + core::fmt::Debug; + + /// Create a new, empty address space. + /// + /// The platform allocates whatever backing structures are needed for the + /// new address space. + fn create_address_space(&self) -> Result { + Err(AddressSpaceError::NotSupported) + } + + /// Destroy an address space, releasing all associated resources. + /// + /// After this call, `id` is invalid and must not be reused. + fn destroy_address_space(&self, id: Self::AddressSpaceId) -> Result<(), AddressSpaceError> { + let _ = id; + Err(AddressSpaceError::NotSupported) + } + + /// Fork an address space from `parent`. + /// + /// Returns a [`ForkedAddressSpace`] indicating what kind of fork was + /// performed: + /// + /// * [`Independent`](ForkedAddressSpace::Independent) — full CoW copy. + /// * [`SharedWithParent`](ForkedAddressSpace::SharedWithParent) — new VA + /// partition, parent pages shared. + fn fork_address_space( + &self, + parent: Self::AddressSpaceId, + ) -> Result, AddressSpaceError> { + let _ = parent; + Err(AddressSpaceError::NotSupported) + } + + /// Make `id` the active address space for the current CPU / thread. + fn activate_address_space(&self, id: Self::AddressSpaceId) -> Result<(), AddressSpaceError> { + let _ = id; + Err(AddressSpaceError::NotSupported) + } + + /// Execute `f` with the given address space active, then restore the + /// previously active address space. + /// + /// Implementations **must** restore the prior address space even if `f` + /// panics (use a guard / RAII pattern). + /// + /// The default returns [`AddressSpaceError::NotSupported`]. Platforms that + /// implement [`activate_address_space`](Self::activate_address_space) should + /// also override this method with a proper save/restore sequence. + fn with_address_space( + &self, + id: Self::AddressSpaceId, + f: impl FnOnce() -> R, + ) -> Result { + let _ = (id, f); + Err(AddressSpaceError::NotSupported) + } + + /// Whether the platform requires eager copy-on-write snapshots during + /// fork instead of lazy page-fault-driven CoW. + /// + /// When `true`, the shim eagerly copies all writable guest pages before + /// spawning the forked child and restores them after the child execs or + /// exits. When `false` (the default), the shim marks writable pages + /// read-only and lazily snapshots individual pages on first write fault. + /// + /// Platforms where the exception/fault handler shares the guest address + /// space must set this to `true` because a CoW fault inside the handler + /// itself would be fatal. + const EAGER_COW_ON_FORK: bool = false; + + /// Return the VA range available to the given address space. + fn address_space_range( + &self, + id: Self::AddressSpaceId, + ) -> Result, AddressSpaceError> { + let _ = id; + Err(AddressSpaceError::NotSupported) + } +} diff --git a/litebox/src/platform/mock.rs b/litebox/src/platform/mock.rs index 994acf86c..cf1d60f7f 100644 --- a/litebox/src/platform/mock.rs +++ b/litebox/src/platform/mock.rs @@ -61,6 +61,12 @@ impl Provider for MockPlatform {} impl RawMessageProvider for MockPlatform {} +impl AddressSpaceProvider for MockPlatform { + // All methods default to `Err(NotSupported)`, which is correct for the + // mock platform (single-process only). + type AddressSpaceId = u32; +} + pub(crate) struct MockRawMutex { inner: AtomicU32, internal_state: std::sync::RwLock, diff --git a/litebox/src/platform/mod.rs b/litebox/src/platform/mod.rs index 0b2885e1e..593ad1686 100644 --- a/litebox/src/platform/mod.rs +++ b/litebox/src/platform/mod.rs @@ -7,6 +7,7 @@ //! trait is merely a collection of subtraits that could be composed independently from various //! other crates that implement them upon various types. +pub mod address_space; pub mod common_providers; pub mod page_mgmt; pub mod trivial_providers; @@ -18,6 +19,7 @@ use either::Either; use thiserror::Error; use zerocopy::{FromBytes, IntoBytes}; +pub use address_space::*; pub use page_mgmt::PageManagementProvider; #[macro_export] @@ -50,6 +52,7 @@ pub trait Provider: + PunchthroughProvider + DebugLogProvider + RawPointerProvider + + AddressSpaceProvider { } diff --git a/litebox_platform_linux_kernel/src/lib.rs b/litebox_platform_linux_kernel/src/lib.rs index 42ca7bf81..e54a6f56a 100644 --- a/litebox_platform_linux_kernel/src/lib.rs +++ b/litebox_platform_linux_kernel/src/lib.rs @@ -87,6 +87,10 @@ impl Provider for LinuxKernel {} impl litebox::platform::RawMessageProvider for LinuxKernel {} +impl litebox::platform::AddressSpaceProvider for LinuxKernel { + type AddressSpaceId = u32; +} + // TODO: implement pointer validation to ensure the pointers are in user space. type UserConstPtr = litebox::platform::common_providers::userspace_pointers::UserConstPtr< litebox::platform::common_providers::userspace_pointers::NoValidation, diff --git a/litebox_platform_linux_userland/src/lib.rs b/litebox_platform_linux_userland/src/lib.rs index 6610391b1..1e34d53ad 100644 --- a/litebox_platform_linux_userland/src/lib.rs +++ b/litebox_platform_linux_userland/src/lib.rs @@ -447,6 +447,10 @@ impl litebox::platform::Provider for LinuxUserland {} impl litebox::platform::RawMessageProvider for LinuxUserland {} +impl litebox::platform::AddressSpaceProvider for LinuxUserland { + type AddressSpaceId = u32; +} + impl litebox::platform::SignalProvider for LinuxUserland { type Signal = litebox_common_linux::signal::Signal; diff --git a/litebox_platform_lvbs/src/lib.rs b/litebox_platform_lvbs/src/lib.rs index cda50d63e..d636aa3ca 100644 --- a/litebox_platform_lvbs/src/lib.rs +++ b/litebox_platform_lvbs/src/lib.rs @@ -1354,6 +1354,12 @@ impl litebox::platform::SystemInfoProvider for LinuxKernel< impl litebox::platform::RawMessageProvider for LinuxKernel {} +impl litebox::platform::AddressSpaceProvider for LinuxKernel { + // All methods default to `Err(NotSupported)` — real implementation comes + // when LVBS multi-process (separate page tables) is added. + type AddressSpaceId = u32; +} + #[cfg(feature = "optee_syscall")] /// Checks whether the given physical addresses are contiguous with respect to ALIGN. fn is_contiguous(addrs: &[PhysPageAddr]) -> bool { diff --git a/litebox_platform_windows_userland/src/lib.rs b/litebox_platform_windows_userland/src/lib.rs index b68f7c265..59a9bf6ca 100644 --- a/litebox_platform_windows_userland/src/lib.rs +++ b/litebox_platform_windows_userland/src/lib.rs @@ -333,6 +333,10 @@ impl WindowsUserland { impl litebox::platform::RawMessageProvider for WindowsUserland {} +impl litebox::platform::AddressSpaceProvider for WindowsUserland { + type AddressSpaceId = u32; +} + impl litebox::platform::Provider for WindowsUserland {} impl litebox::platform::SignalProvider for WindowsUserland { From 7794e90447ff49f03d85fef61d9090a68bc0501c Mon Sep 17 00:00:00 2001 From: Weidong Cui Date: Mon, 6 Apr 2026 21:16:12 -0700 Subject: [PATCH 23/24] Add address_space_id parameter to futex wait/wake for multi-process support Add address_space_id: u64 discriminator to FutexEntry, bucket hashing, wait(), and wake() to prevent false aliasing between processes with overlapping virtual address ranges. On userland (non-overlapping VA partitions) callers pass 0. Update all shim callers and tests. --- litebox/src/sync/futex.rs | 52 ++++++++++++++++------ litebox_shim_linux/src/syscalls/process.rs | 4 +- 2 files changed, 42 insertions(+), 14 deletions(-) diff --git a/litebox/src/sync/futex.rs b/litebox/src/sync/futex.rs index 7785b116f..eb89b0f1b 100644 --- a/litebox/src/sync/futex.rs +++ b/litebox/src/sync/futex.rs @@ -25,8 +25,10 @@ use thiserror::Error; /// A manager of all available futexes. /// -/// Note: currently, this only supports "private" futexes, since it assumes only a single process. -/// In the future, this may be expanded to support multi-process futexes. +/// Supports both private and shared futexes. Callers provide an +/// `address_space_id` discriminator to distinguish futexes at the same virtual +/// address in different address spaces. Entries are only matched (for wake) when +/// both the address and address-space ID agree. pub struct FutexManager { /// Chaining hash table to map from futex address to waiter lists. table: alloc::boxed::Box<[LoanList>; HASH_TABLE_ENTRIES]>, @@ -41,6 +43,9 @@ const HASH_TABLE_ENTRIES: usize = 256; struct FutexEntry { addr: usize, + /// Opaque discriminator distinguishing address spaces. Entries with + /// different discriminators never match, even at the same virtual address. + address_space_id: u64, waker: Waker, bitset: u32, done: AtomicBool, @@ -62,9 +67,16 @@ impl } } - /// Returns the hash table bucket for the given futex address. - fn bucket(&self, addr: usize) -> &LoanList> { - let hash: usize = self.hash_builder.hash_one(addr).truncate(); + /// Returns the hash table bucket for the given futex key. + fn bucket( + &self, + addr: usize, + address_space_id: u64, + ) -> &LoanList> { + let hash: usize = self + .hash_builder + .hash_one((addr, address_space_id)) + .truncate(); &self.table[hash % HASH_TABLE_ENTRIES] } @@ -80,12 +92,16 @@ impl /// If `bitset` is `Some`, then the waiter is only woken if the wake call's /// `bitset` has a non-zero intersection with the waiter's mask. Specifying /// `None` is equivalent to setting all bits in the mask. + /// + /// `address_space_id` is an opaque discriminator that distinguishes futexes + /// at the same virtual address in different address spaces. pub fn wait( &self, cx: &WaitContext<'_, Platform>, futex_addr: Platform::RawMutPointer, expected_value: u32, bitset: Option, + address_space_id: u64, ) -> Result<(), FutexError> { let bitset = bitset.unwrap_or(ALL_BITS).get(); let addr = futex_addr.as_usize(); @@ -93,9 +109,10 @@ impl return Err(FutexError::NotAligned); } - let bucket = self.bucket(addr); + let bucket = self.bucket(addr, address_space_id); let mut entry = pin!(LoanListEntry::new(FutexEntry { addr, + address_space_id, waker: cx.waker().clone(), bitset, done: AtomicBool::new(false), @@ -131,12 +148,16 @@ impl /// (subject to the `num_to_wake` limit). If `bitset` is `None`, then all /// waiters are eligible to be woken. /// + /// `address_space_id` must match the value passed to the corresponding + /// [`wait`](Self::wait) call. + /// /// Returns the number of waiters that were woken up. pub fn wake( &self, futex_addr: Platform::RawMutPointer, num_to_wake_up: NonZeroU32, bitset: Option, + address_space_id: u64, ) -> Result { let addr = futex_addr.as_usize(); if !addr.is_multiple_of(align_of::()) { @@ -144,10 +165,13 @@ impl } let bitset = bitset.unwrap_or(ALL_BITS).get(); let mut woken = 0; - let bucket = self.bucket(addr); + let bucket = self.bucket(addr, address_space_id); // Extract matching entries from the bucket until we've woken enough. let entries = bucket.extract_if(|entry| { - if entry.addr != addr || entry.bitset & bitset == 0 { + if entry.addr != addr + || entry.address_space_id != address_space_id + || entry.bitset & bitset == 0 + { return core::ops::ControlFlow::Continue(false); } woken += 1; @@ -185,9 +209,9 @@ mod tests { extern crate std; use super::*; - use crate::LiteBox; use crate::event::wait::WaitState; use crate::platform::mock::MockPlatform; + use crate::LiteBox; use alloc::sync::Arc; use core::num::NonZeroU32; use core::sync::atomic::{AtomicU32, Ordering}; @@ -218,7 +242,7 @@ mod tests { barrier_clone.wait(); // Sync with main thread // Wait for value 0 - futex_manager_clone.wait(&WaitState::new(platform).context(), futex_addr, 0, None) + futex_manager_clone.wait(&WaitState::new(platform).context(), futex_addr, 0, None, 0) }); barrier.wait(); // Wait for waiter to be ready @@ -231,7 +255,7 @@ mod tests { futex_word.as_ptr() as usize, ); let woken = futex_manager - .wake(futex_addr, NonZeroU32::new(1).unwrap(), None) + .wake(futex_addr, NonZeroU32::new(1).unwrap(), None, 0) .unwrap(); // Wait for waiter thread to complete @@ -270,6 +294,7 @@ mod tests { futex_addr, 0, None, + 0, ) }); @@ -283,7 +308,7 @@ mod tests { futex_word.as_ptr() as usize, ); let woken = futex_manager - .wake(futex_addr, NonZeroU32::new(1).unwrap(), None) + .wake(futex_addr, NonZeroU32::new(1).unwrap(), None, 0) .unwrap(); // Wait for waiter thread to complete @@ -324,6 +349,7 @@ mod tests { futex_addr, 0, None, + 0, ) }); waiters.push(waiter); @@ -339,7 +365,7 @@ mod tests { futex_word.as_ptr() as usize, ); let woken = futex_manager - .wake(futex_addr, NonZeroU32::new(u32::MAX).unwrap(), None) + .wake(futex_addr, NonZeroU32::new(u32::MAX).unwrap(), None, 0) .unwrap(); // Wait for all waiter threads to complete diff --git a/litebox_shim_linux/src/syscalls/process.rs b/litebox_shim_linux/src/syscalls/process.rs index 419afb09c..a6605a409 100644 --- a/litebox_shim_linux/src/syscalls/process.rs +++ b/litebox_shim_linux/src/syscalls/process.rs @@ -1292,7 +1292,7 @@ impl Task { let Some(count) = core::num::NonZeroU32::new(count) else { return Ok(0); }; - self.global.futex_manager.wake(addr, count, None)? as usize + self.global.futex_manager.wake(addr, count, None, 0)? as usize } FutexArgs::Wait { addr, @@ -1307,6 +1307,7 @@ impl Task { addr, val, None, + 0, )?; 0 } @@ -1334,6 +1335,7 @@ impl Task { addr, val, core::num::NonZeroU32::new(bitmask), + 0, )?; 0 } From 870206f030b8553dc8323a57db899b6c007e4929 Mon Sep 17 00:00:00 2001 From: Weidong Cui Date: Mon, 6 Apr 2026 21:19:41 -0700 Subject: [PATCH 24/24] Add trace_fs feature gate for conditional DebugLogProvider bound Add trace_fs feature to litebox Cargo.toml. When enabled (without lock_tracing), RawSyncPrimitivesProvider additionally requires DebugLogProvider, allowing filesystem tracing code to log through the platform's debug output. --- litebox/Cargo.toml | 1 + litebox/src/sync/mod.rs | 22 ++++++++++++++++++++-- 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/litebox/Cargo.toml b/litebox/Cargo.toml index 9a8b2e401..8d3e59eed 100644 --- a/litebox/Cargo.toml +++ b/litebox/Cargo.toml @@ -31,6 +31,7 @@ windows-sys = { version = "0.60.2", features = [ [features] lock_tracing = ["dep:arrayvec", "spin/mutex"] +trace_fs = [] panic_on_unclosed_fd_drop = [] enforce_singleton_litebox_instance = [] diff --git a/litebox/src/sync/mod.rs b/litebox/src/sync/mod.rs index 0778d6d15..54ba89945 100644 --- a/litebox/src/sync/mod.rs +++ b/litebox/src/sync/mod.rs @@ -29,15 +29,33 @@ pub use rwlock::{ MappedRwLockReadGuard, MappedRwLockWriteGuard, RwLock, RwLockReadGuard, RwLockWriteGuard, }; -#[cfg(not(feature = "lock_tracing"))] +#[cfg(not(any(feature = "lock_tracing", feature = "trace_fs")))] /// A convenience name for specific requirements from the platform pub trait RawSyncPrimitivesProvider: platform::RawMutexProvider + Sync + 'static {} -#[cfg(not(feature = "lock_tracing"))] +#[cfg(not(any(feature = "lock_tracing", feature = "trace_fs")))] impl RawSyncPrimitivesProvider for Platform where Platform: platform::RawMutexProvider + Sync + 'static { } +// When `trace_fs` is enabled, filesystem tracing code logs through +// `DebugLogProvider`. Since the platform type is threaded through +// `RawSyncPrimitivesProvider` in fs-related contexts, the bound is added here +// so it is available wherever the platform is used. `lock_tracing` already +// includes `DebugLogProvider`, so this branch only applies when `trace_fs` is +// enabled without `lock_tracing`. +#[cfg(all(feature = "trace_fs", not(feature = "lock_tracing")))] +/// A convenience name for specific requirements from the platform +pub trait RawSyncPrimitivesProvider: + platform::RawMutexProvider + platform::DebugLogProvider + Sync + 'static +{ +} +#[cfg(all(feature = "trace_fs", not(feature = "lock_tracing")))] +impl RawSyncPrimitivesProvider for Platform where + Platform: platform::RawMutexProvider + platform::DebugLogProvider + Sync + 'static +{ +} + #[cfg(feature = "lock_tracing")] /// A convenience name for specific requirements from the platform pub trait RawSyncPrimitivesProvider: