diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index b2255b608..7d328968c 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -188,16 +188,20 @@ jobs: tool: nextest@${{ env.NEXTEST_VERSION }} - uses: Swatinem/rust-cache@v2 - run: cargo clippy --locked --verbose --all-targets --all-features -p litebox_runner_linux_on_windows_userland + - run: cargo clippy --locked --verbose --all-targets --all-features -p litebox_packager - run: cargo build --locked --verbose -p litebox_runner_linux_on_windows_userland + - run: cargo build --locked --verbose -p litebox_packager - run: cargo nextest run --locked --profile ci -p litebox_runner_linux_on_windows_userland + - run: cargo nextest run --locked --profile ci -p litebox_packager - run: cargo nextest run --locked --profile ci -p litebox_shim_linux --no-default-features --features platform_windows_userland - run: | cargo test --locked --verbose --doc -p litebox_runner_linux_on_windows_userland + cargo test --locked --verbose --doc -p litebox_packager # We need to run `cargo test --doc` separately because doc tests # aren't included in nextest at the moment. See relevant discussion at # https://github.com/nextest-rs/nextest/issues/16 - name: Build documentation (fail on warnings) - run: cargo doc --locked --verbose --no-deps --all-features --document-private-items -p litebox_runner_linux_on_windows_userland + run: cargo doc --locked --verbose --no-deps --all-features --document-private-items -p litebox_runner_linux_on_windows_userland -p litebox_packager build_and_test_snp: name: Build and Test SNP diff --git a/Cargo.lock b/Cargo.lock index 1466b96ea..4b4a073c1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1685,6 +1685,7 @@ dependencies = [ "litebox", "litebox_common_linux", "litebox_platform_multiplex", + "litebox_syscall_rewriter", "once_cell", "ringbuf", "seq-macro", diff --git a/dev_bench/unixbench/prepare_unixbench.py b/dev_bench/unixbench/prepare_unixbench.py index 0d472d505..4eee4e6e1 100644 --- a/dev_bench/unixbench/prepare_unixbench.py +++ b/dev_bench/unixbench/prepare_unixbench.py @@ -61,8 +61,8 @@ def prepare_benchmark( """ Prepare a single benchmark using litebox_packager. - The packager discovers dependencies, rewrites all ELFs, and creates a tar - (including litebox_rtld_audit.so). The rewritten main binary is extracted + The packager discovers dependencies, rewrites all ELFs, and creates a tar. + The rewritten main binary is extracted from the tar and placed alongside it. Returns True on success. diff --git a/dev_tests/src/boilerplate.rs b/dev_tests/src/boilerplate.rs index a32cf70b6..c29e14ebf 100644 --- a/dev_tests/src/boilerplate.rs +++ b/dev_tests/src/boilerplate.rs @@ -133,7 +133,6 @@ const SKIP_FILES: &[&str] = &[ "LICENSE", "litebox/src/sync/mutex.rs", "litebox/src/sync/rwlock.rs", - "litebox_rtld_audit/Makefile", "litebox_runner_linux_on_windows_userland/tests/test-bins/hello_exec_nolibc", "litebox_runner_linux_on_windows_userland/tests/test-bins/hello_thread", "litebox_runner_linux_on_windows_userland/tests/test-bins/hello_thread_static", diff --git a/dev_tests/src/ratchet.rs b/dev_tests/src/ratchet.rs index 276452d4d..8e6d35034 100644 --- a/dev_tests/src/ratchet.rs +++ b/dev_tests/src/ratchet.rs @@ -40,7 +40,6 @@ fn ratchet_globals() -> Result<()> { ("litebox_platform_lvbs/", 23), ("litebox_platform_multiplex/", 1), ("litebox_platform_windows_userland/", 8), - ("litebox_runner_linux_userland/", 1), ("litebox_runner_lvbs/", 5), ("litebox_runner_snp/", 1), ("litebox_shim_linux/", 1), diff --git a/litebox/Cargo.toml b/litebox/Cargo.toml index 9a8b2e401..8d3e59eed 100644 --- a/litebox/Cargo.toml +++ b/litebox/Cargo.toml @@ -31,6 +31,7 @@ windows-sys = { version = "0.60.2", features = [ [features] lock_tracing = ["dep:arrayvec", "spin/mutex"] +trace_fs = [] panic_on_unclosed_fd_drop = [] enforce_singleton_litebox_instance = [] diff --git a/litebox/src/event/mod.rs b/litebox/src/event/mod.rs index 24d5b6832..98ec7e680 100644 --- a/litebox/src/event/mod.rs +++ b/litebox/src/event/mod.rs @@ -48,6 +48,29 @@ pub trait IOPollable { /// calls are what notify observers. This particular function itself however _may_ be used to /// essentially get "the current status" of events for the system. fn check_io_events(&self) -> Events; + + /// Returns `true` if this pollable cannot deliver asynchronous observer + /// notifications (e.g. host-backed stdin where the host has no callback + /// mechanism). Callers should use periodic polling instead of blocking + /// indefinitely on observer wakeups. + /// + /// Defaults to `false` (async notifications work). This is safe for all + /// existing implementors; callers that use this method arrive in subsequent + /// stacked PRs. + fn needs_host_poll(&self) -> bool { + false + } + + /// Returns `true` if reads on this pollable should block when no data is + /// available. Returns `false` for pollables whose callers perform + /// asynchronous readiness checks and expect a "would block" indication + /// immediately (e.g. PTY master side). + /// + /// Defaults to `true` (blocking reads). This is safe for all existing + /// implementors; callers arrive in subsequent stacked PRs. + fn should_block_read(&self) -> bool { + true + } } impl IOPollable for alloc::sync::Arc { @@ -61,4 +84,10 @@ impl IOPollable for alloc::sync::Arc { fn check_io_events(&self) -> Events { self.as_ref().check_io_events() } + fn needs_host_poll(&self) -> bool { + self.as_ref().needs_host_poll() + } + fn should_block_read(&self) -> bool { + self.as_ref().should_block_read() + } } diff --git a/litebox/src/event/observer.rs b/litebox/src/event/observer.rs index 4e42a304e..42cdbd409 100644 --- a/litebox/src/event/observer.rs +++ b/litebox/src/event/observer.rs @@ -93,9 +93,28 @@ impl, Platform: RawSyncPrimitivesProvider> Subject, F>) { + observers.retain(|observer, _| { + if observer.upgrade().is_some() { + true + } else { + self.nums.fetch_sub(1, Ordering::Relaxed); + false + } + }); + } + /// Register an observer with the given filter. pub fn register_observer(&self, observer: Weak>, filter: F) { let mut observers = self.observers.lock(); + self.prune_dead_observers(&mut observers); if observers .insert(ObserverKey::new(observer), filter) .is_none() @@ -119,16 +138,65 @@ impl, Platform: RawSyncPrimitivesProvider> Subject for TestObserver { + fn on_events(&self, _events: &Events) { + self.notifications.fetch_add(1, Ordering::Relaxed); + } + } + + #[test] + fn register_observer_prunes_dead_entries() { + let subject = Subject::::new(); + + let stale = Arc::new(TestObserver { + notifications: AtomicUsize::new(0), + }); + subject.register_observer(Arc::downgrade(&stale) as _, Events::IN); + assert_eq!(subject.nums.load(Ordering::Relaxed), 1); + assert_eq!(subject.observers.lock().len(), 1); + drop(stale); + + let fresh = Arc::new(TestObserver { + notifications: AtomicUsize::new(0), }); + subject.register_observer(Arc::downgrade(&fresh) as _, Events::OUT); + { + let observers = subject.observers.lock(); + let registered = observers + .keys() + .next() + .and_then(super::ObserverKey::upgrade) + .expect("dead observer should be pruned during registration"); + let fresh_observer: Arc> = fresh.clone(); + assert!(Arc::ptr_eq(®istered, &fresh_observer)); + assert_eq!(subject.nums.load(Ordering::Relaxed), 1); + assert_eq!(observers.len(), 1); + } + subject.notify_observers(Events::OUT); + + assert_eq!(fresh.notifications.load(Ordering::Relaxed), 1); } } diff --git a/litebox/src/fd/mod.rs b/litebox/src/fd/mod.rs index aeef757dd..d93da35b1 100644 --- a/litebox/src/fd/mod.rs +++ b/litebox/src/fd/mod.rs @@ -477,7 +477,7 @@ impl Descriptors { ) -> Option where Subsystem: FdEnabledSubsystem, - T: core::any::Any + Send + Sync, + T: core::any::Any + Clone + Send + Sync, { self.entries[fd.x.as_usize()?] .as_ref() @@ -506,7 +506,7 @@ impl Descriptors { ) -> Option where Subsystem: FdEnabledSubsystem, - T: core::any::Any + Send + Sync, + T: core::any::Any + Clone + Send + Sync, { self.entries[fd.x.as_usize()?] .as_mut() diff --git a/litebox/src/fd/tests.rs b/litebox/src/fd/tests.rs index 04a482b44..ac92d252d 100644 --- a/litebox/src/fd/tests.rs +++ b/litebox/src/fd/tests.rs @@ -6,10 +6,10 @@ use alloc::string::ToString as _; use alloc::vec; use alloc::vec::Vec; -use crate::LiteBox; use crate::fd::FdEnabledSubsystemEntry; use crate::fd::{ErrRawIntFd, FdEnabledSubsystem, TypedFd}; use crate::platform::mock::MockPlatform; +use crate::LiteBox; struct MockSubsystem; impl FdEnabledSubsystem for MockSubsystem { diff --git a/litebox/src/fs/devices.rs b/litebox/src/fs/devices.rs index 90e715230..a8851d18e 100644 --- a/litebox/src/fs/devices.rs +++ b/litebox/src/fs/devices.rs @@ -8,16 +8,16 @@ use alloc::string::String; use crate::{ - LiteBox, fs::{ - FileStatus, FileType, Mode, NodeInfo, OFlags, SeekWhence, UserInfo, errors::{ ChmodError, ChownError, CloseError, FileStatusError, MkdirError, OpenError, PathError, ReadDirError, ReadError, RmdirError, SeekError, TruncateError, UnlinkError, WriteError, }, + FileStatus, FileType, Mode, NodeInfo, OFlags, SeekWhence, UserInfo, }, path::Arg, platform::{StdioOutStream, StdioReadError, StdioWriteError}, + LiteBox, }; /// Block size for stdio devices @@ -145,10 +145,10 @@ impl super::FileSystem for FileSystem + Platform: crate::sync::RawSyncPrimitivesProvider + + crate::platform::StdioProvider + + crate::platform::CrngProvider, + > super::FileSystem for FileSystem { fn open( &self, @@ -254,6 +254,7 @@ impl< .read_from_stdin(buf) .map_err(|e| match e { StdioReadError::Closed => unimplemented!(), + StdioReadError::WouldBlock => unimplemented!(), }) } diff --git a/litebox/src/mm/linux.rs b/litebox/src/mm/linux.rs index f33094971..cca098941 100644 --- a/litebox/src/mm/linux.rs +++ b/litebox/src/mm/linux.rs @@ -11,11 +11,11 @@ use alloc::vec::Vec; use rangemap::RangeMap; use thiserror::Error; -use crate::platform::PageManagementProvider; -use crate::platform::RawConstPointer; use crate::platform::page_mgmt::AllocationError; use crate::platform::page_mgmt::FixedAddressBehavior; use crate::platform::page_mgmt::MemoryRegionPermissions; +use crate::platform::PageManagementProvider; +use crate::platform::RawConstPointer; /// Page size in bytes pub const PAGE_SIZE: usize = 4096; @@ -509,6 +509,7 @@ impl + 'static, const ALIGN: usize> Vmem MemoryRegionPermissions::from_bits(permissions).unwrap(), vma.flags.contains(VmFlags::VM_GROWSDOWN), populate_pages_immediately, + false, platform_fixed_address_behavior, ) .map_err(|err| match err { diff --git a/litebox/src/mm/tests.rs b/litebox/src/mm/tests.rs index 3142971ef..1de0e5b85 100644 --- a/litebox/src/mm/tests.rs +++ b/litebox/src/mm/tests.rs @@ -9,15 +9,15 @@ use alloc::vec::Vec; use crate::{ mm::linux::{CreatePagesFlags, NonZeroAddress}, platform::{ - PageManagementProvider, RawConstPointer, page_mgmt::MemoryRegionPermissions, trivial_providers::{TransparentConstPtr, TransparentMutPtr}, + PageManagementProvider, RawConstPointer, }, }; use zerocopy::{FromBytes, IntoBytes}; use super::linux::{ - NonZeroPageSize, PAGE_SIZE, PageRange, VmArea, VmFlags, Vmem, VmemProtectError, VmemResizeError, + NonZeroPageSize, PageRange, VmArea, VmFlags, Vmem, VmemProtectError, VmemResizeError, PAGE_SIZE, }; /// A dummy implementation of [`VmemBackend`] that does nothing. @@ -43,6 +43,7 @@ impl crate::platform::PageManagementProvider for DummyVmemBackend { initial_permissions: crate::platform::page_mgmt::MemoryRegionPermissions, can_grow_down: bool, populate_pages_immediately: bool, + _noreserve: bool, fixed_address_behavior: crate::platform::page_mgmt::FixedAddressBehavior, ) -> Result, crate::platform::page_mgmt::AllocationError> { Ok(TransparentMutPtr::from_usize(suggested_range.start)) @@ -154,15 +155,13 @@ fn test_vmm_mapping() { Err(VmemProtectError::InvalidRange(_)) )); - assert!( - unsafe { - vmm.resize_mapping( - PageRange::new(start_addr, start_addr + 2 * PAGE_SIZE).unwrap(), - NonZeroPageSize::new(PAGE_SIZE * 4).unwrap(), - ) - } - .is_ok() - ); + assert!(unsafe { + vmm.resize_mapping( + PageRange::new(start_addr, start_addr + 2 * PAGE_SIZE).unwrap(), + NonZeroPageSize::new(PAGE_SIZE * 4).unwrap(), + ) + } + .is_ok()); // Grow and merge, [(0x1_0000, 0x1_c000)] assert_eq!( collect_mappings(&vmm), @@ -180,15 +179,13 @@ fn test_vmm_mapping() { Err(VmemProtectError::NoAccess { .. }) )); - assert!( - unsafe { - vmm.protect_mapping( - PageRange::new(start_addr + 2 * PAGE_SIZE, start_addr + 4 * PAGE_SIZE).unwrap(), - MemoryRegionPermissions::READ | MemoryRegionPermissions::WRITE, - ) - } - .is_ok() - ); + assert!(unsafe { + vmm.protect_mapping( + PageRange::new(start_addr + 2 * PAGE_SIZE, start_addr + 4 * PAGE_SIZE).unwrap(), + MemoryRegionPermissions::READ | MemoryRegionPermissions::WRITE, + ) + } + .is_ok()); // Change permission, [(0x1_0000, 0x1_2000), (0x1_2000, 0x1_4000), (0x1_4000, 0x1_c000)] assert_eq!( collect_mappings(&vmm), @@ -205,16 +202,14 @@ fn test_vmm_mapping() { unsafe { vmm.resize_mapping(r, NonZeroPageSize::new(PAGE_SIZE * 4).unwrap()) }, Err(VmemResizeError::RangeOccupied(_)) )); - assert!( - unsafe { - vmm.move_mappings( - r, - Some(NonZeroAddress::new(start_addr + 12 * PAGE_SIZE).unwrap()), - NonZeroPageSize::new(PAGE_SIZE * 4).unwrap(), - ) - } - .is_ok_and(|v| v.as_usize() == start_addr + 12 * PAGE_SIZE) - ); + assert!(unsafe { + vmm.move_mappings( + r, + Some(NonZeroAddress::new(start_addr + 12 * PAGE_SIZE).unwrap()), + NonZeroPageSize::new(PAGE_SIZE * 4).unwrap(), + ) + } + .is_ok_and(|v| v.as_usize() == start_addr + 12 * PAGE_SIZE)); assert_eq!( collect_mappings(&vmm), vec![ @@ -274,15 +269,13 @@ fn test_vmm_mapping() { ); // shrink mapping - assert!( - unsafe { - vmm.resize_mapping( - PageRange::new(start_addr + 4 * PAGE_SIZE, start_addr + 8 * PAGE_SIZE).unwrap(), - NonZeroPageSize::new(2 * PAGE_SIZE).unwrap(), - ) - } - .is_ok() - ); + assert!(unsafe { + vmm.resize_mapping( + PageRange::new(start_addr + 4 * PAGE_SIZE, start_addr + 8 * PAGE_SIZE).unwrap(), + NonZeroPageSize::new(2 * PAGE_SIZE).unwrap(), + ) + } + .is_ok()); assert_eq!( collect_mappings(&vmm), vec![ diff --git a/litebox/src/net/phy.rs b/litebox/src/net/phy.rs index ff4d85c6e..e8ebc4beb 100644 --- a/litebox/src/net/phy.rs +++ b/litebox/src/net/phy.rs @@ -51,7 +51,11 @@ impl smoltcp::phy::Device for Device None, + Err( + platform::ReceiveError::WouldBlock + | platform::ReceiveError::ProtocolError + | platform::ReceiveError::Eof, + ) => None, } } diff --git a/litebox/src/platform/address_space.rs b/litebox/src/platform/address_space.rs new file mode 100644 index 000000000..dc401e90d --- /dev/null +++ b/litebox/src/platform/address_space.rs @@ -0,0 +1,137 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT license. + +//! Address-space management types and traits for multi-process support. +//! +//! The [`AddressSpaceProvider`] trait is an **optional** South interface that +//! platforms implement to manage per-process address spaces. Platforms may use +//! separate page tables, VA-range partitioning, or other techniques to isolate +//! address spaces. + +use core::ops::Range; +use thiserror::Error; + +/// The result of forking an address space. +/// +/// The variant tells the caller what kind of copy was created so it can adjust +/// its behavior (e.g., whether to copy page contents or share them). +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum ForkedAddressSpace { + /// Independent copy-on-write copy with the full address range. The child + /// has its own backing structures; CoW faults are resolved by the + /// platform. + Independent(Id), + /// A new VA-range partition is assigned to the child. Parent memory is + /// shared; the shim is responsible for copying pages as needed. + SharedWithParent(Id), +} + +/// Errors that can occur during address-space operations. +#[derive(Error, Debug)] +#[non_exhaustive] +pub enum AddressSpaceError { + /// No free address-space slots or VA ranges available. + #[error("no address space slots available")] + NoSpace, + /// The given address-space ID is not valid (already destroyed, never + /// created, etc.). + #[error("invalid address space id")] + InvalidId, + /// The platform does not support this operation. + #[error("operation not supported by this platform")] + NotSupported, +} + +/// A provider for managing per-process address spaces. +/// +/// This is an **optional** trait — platforms that do not yet support +/// multi-process may leave all methods at the default (which returns +/// [`AddressSpaceError::NotSupported`]). +/// +/// # Associated Type +/// +/// `AddressSpaceId` is an opaque, lightweight handle that identifies one +/// address space. It must be `Copy + Eq + Send + Sync` so it can be stored +/// inside process contexts and passed across threads. +pub trait AddressSpaceProvider { + /// Opaque identifier for an address space. + type AddressSpaceId: Copy + Eq + Send + Sync + core::fmt::Debug; + + /// Create a new, empty address space. + /// + /// The platform allocates whatever backing structures are needed for the + /// new address space. + fn create_address_space(&self) -> Result { + Err(AddressSpaceError::NotSupported) + } + + /// Destroy an address space, releasing all associated resources. + /// + /// After this call, `id` is invalid and must not be reused. + fn destroy_address_space(&self, id: Self::AddressSpaceId) -> Result<(), AddressSpaceError> { + let _ = id; + Err(AddressSpaceError::NotSupported) + } + + /// Fork an address space from `parent`. + /// + /// Returns a [`ForkedAddressSpace`] indicating what kind of fork was + /// performed: + /// + /// * [`Independent`](ForkedAddressSpace::Independent) — full CoW copy. + /// * [`SharedWithParent`](ForkedAddressSpace::SharedWithParent) — new VA + /// partition, parent pages shared. + fn fork_address_space( + &self, + parent: Self::AddressSpaceId, + ) -> Result, AddressSpaceError> { + let _ = parent; + Err(AddressSpaceError::NotSupported) + } + + /// Make `id` the active address space for the current CPU / thread. + fn activate_address_space(&self, id: Self::AddressSpaceId) -> Result<(), AddressSpaceError> { + let _ = id; + Err(AddressSpaceError::NotSupported) + } + + /// Execute `f` with the given address space active, then restore the + /// previously active address space. + /// + /// Implementations **must** restore the prior address space even if `f` + /// panics (use a guard / RAII pattern). + /// + /// The default returns [`AddressSpaceError::NotSupported`]. Platforms that + /// implement [`activate_address_space`](Self::activate_address_space) should + /// also override this method with a proper save/restore sequence. + fn with_address_space( + &self, + id: Self::AddressSpaceId, + f: impl FnOnce() -> R, + ) -> Result { + let _ = (id, f); + Err(AddressSpaceError::NotSupported) + } + + /// Whether the platform requires eager copy-on-write snapshots during + /// fork instead of lazy page-fault-driven CoW. + /// + /// When `true`, the shim eagerly copies all writable guest pages before + /// spawning the forked child and restores them after the child execs or + /// exits. When `false` (the default), the shim marks writable pages + /// read-only and lazily snapshots individual pages on first write fault. + /// + /// Platforms where the exception/fault handler shares the guest address + /// space must set this to `true` because a CoW fault inside the handler + /// itself would be fatal. + const EAGER_COW_ON_FORK: bool = false; + + /// Return the VA range available to the given address space. + fn address_space_range( + &self, + id: Self::AddressSpaceId, + ) -> Result, AddressSpaceError> { + let _ = id; + Err(AddressSpaceError::NotSupported) + } +} diff --git a/litebox/src/platform/mock.rs b/litebox/src/platform/mock.rs index 3a3297aa6..cf1d60f7f 100644 --- a/litebox/src/platform/mock.rs +++ b/litebox/src/platform/mock.rs @@ -59,6 +59,14 @@ impl MockPlatform { impl Provider for MockPlatform {} +impl RawMessageProvider for MockPlatform {} + +impl AddressSpaceProvider for MockPlatform { + // All methods default to `Err(NotSupported)`, which is correct for the + // mock platform (single-process only). + type AddressSpaceId = u32; +} + pub(crate) struct MockRawMutex { inner: AtomicU32, internal_state: std::sync::RwLock, @@ -210,7 +218,7 @@ impl IPInterfaceProvider for MockPlatform { #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] pub(crate) struct MockInstant { - time: u64, + pub(crate) time: u64, } impl Instant for MockInstant { @@ -230,7 +238,7 @@ impl Instant for MockInstant { } pub(crate) struct MockSystemTime { - time: u64, + pub(crate) time: u64, } impl SystemTime for MockSystemTime { @@ -290,6 +298,9 @@ impl RawPointerProvider for MockPlatform { impl StdioProvider for MockPlatform { fn read_from_stdin(&self, buf: &mut [u8]) -> Result { + if buf.is_empty() { + return Ok(0); + } let Some(front) = self.stdin_queue.write().unwrap().pop_front() else { return Err(StdioReadError::Closed); }; @@ -318,6 +329,26 @@ impl StdioProvider for MockPlatform { fn is_a_tty(&self, _stream: StdioStream) -> bool { false } + + fn get_terminal_input_bytes(&self, stream: StdioStream) -> Result { + match stream { + StdioStream::Stdin => { + let len = self + .stdin_queue + .read() + .unwrap() + .iter() + .map(std::vec::Vec::len) + .sum::(); + Ok(u32::try_from(len).unwrap_or(u32::MAX)) + } + StdioStream::Stdout | StdioStream::Stderr => Err(StdioIoctlError::NotATerminal), + } + } + + fn poll_stdin_readable(&self) -> bool { + self.stdin_queue.read().unwrap().front().is_some() + } } impl CrngProvider for MockPlatform { @@ -333,6 +364,29 @@ impl CrngProvider for MockPlatform { } } +#[cfg(test)] +mod tests { + use super::{MockPlatform, StdioProvider}; + + #[test] + fn nonblocking_stdin_reads_queued_input() { + let platform = MockPlatform::new(); + platform + .stdin_queue + .write() + .unwrap() + .push_back(b"ready".to_vec()); + + let mut buf = [0u8; 8]; + let read = platform + .read_from_stdin_nonblocking(&mut buf) + .expect("queued stdin should not block"); + + assert_eq!(read, 5); + assert_eq!(&buf[..read], b"ready"); + } +} + std::thread_local! { static MOCK_TLS: core::cell::Cell<*mut()> = const { core::cell::Cell::new(core::ptr::null_mut()) }; } diff --git a/litebox/src/platform/mod.rs b/litebox/src/platform/mod.rs index 983f20c84..593ad1686 100644 --- a/litebox/src/platform/mod.rs +++ b/litebox/src/platform/mod.rs @@ -7,6 +7,7 @@ //! trait is merely a collection of subtraits that could be composed independently from various //! other crates that implement them upon various types. +pub mod address_space; pub mod common_providers; pub mod page_mgmt; pub mod trivial_providers; @@ -18,16 +19,19 @@ use either::Either; use thiserror::Error; use zerocopy::{FromBytes, IntoBytes}; +pub use address_space::*; pub use page_mgmt::PageManagementProvider; #[macro_export] macro_rules! log_println { ($platform:expr, $s:expr) => {{ + #[allow(unused_imports)] use $crate::platform::DebugLogProvider as _; $platform.debug_log_print($s); }}; ($platform:expr, $($tt:tt)*) => {{ use core::fmt::Write as _; + #[allow(unused_imports)] use $crate::platform::DebugLogProvider as _; let mut t: arrayvec::ArrayString<8192> = arrayvec::ArrayString::new(); writeln!(t, $($tt)*).unwrap(); @@ -43,10 +47,12 @@ macro_rules! log_println { pub trait Provider: RawMutexProvider + IPInterfaceProvider + + RawMessageProvider + TimeProvider + PunchthroughProvider + DebugLogProvider + RawPointerProvider + + AddressSpaceProvider { } @@ -382,17 +388,54 @@ pub trait IPInterfaceProvider { fn receive_ip_packet(&self, packet: &mut [u8]) -> Result; } -/// A non-exhaustive list of errors that can be thrown by [`IPInterfaceProvider::send_ip_packet`]. +/// Errors from send operations on [`IPInterfaceProvider`] and [`RawMessageProvider`]. #[derive(Error, Debug)] #[non_exhaustive] -pub enum SendError {} +pub enum SendError { + /// The underlying device returned an I/O error. The packet was not sent. + #[error("I/O error on send: errno {0}")] + Io(i32), + /// The channel is not available on this platform. + #[error("send channel unavailable")] + Unavailable, +} -/// A non-exhaustive list of errors that can be thrown by [`IPInterfaceProvider::receive_ip_packet`]. +/// Errors from receive operations on [`IPInterfaceProvider`] and [`RawMessageProvider`]. #[derive(Error, Debug)] #[non_exhaustive] pub enum ReceiveError { #[error("Receive operation would block")] WouldBlock, + #[error("IPC protocol error: oversized frame")] + ProtocolError, + #[error("Channel closed (EOF)")] + Eof, +} + +/// A raw byte-stream channel for direct message passing between the guest and +/// the host (bypassing the IP network stack). +/// +/// When available, this provides a fast path for protocols like 9P that would +/// otherwise pay the overhead of traversing two smoltcp stacks. +/// +/// The default implementation returns [`ReceiveError::WouldBlock`] / +/// [`SendError::Unavailable`], indicating the channel is not available. +/// Platforms that support direct messaging override these methods. +pub trait RawMessageProvider { + /// Send bytes to the host over the raw channel. + /// + /// Returns `Ok(n)` with the number of bytes sent, or an error. + fn send_raw_message(&self, _data: &[u8]) -> Result { + Err(SendError::Unavailable) + } + + /// Receive bytes from the host over the raw channel. + /// + /// Returns `Ok(n)` with the number of bytes read into `buf`, or + /// [`ReceiveError::WouldBlock`] if no data is available yet. + fn recv_raw_message(&self, _buf: &mut [u8]) -> Result { + Err(ReceiveError::WouldBlock) + } } /// An interface to understanding time. @@ -611,6 +654,8 @@ where pub enum StdioReadError { #[error("input stream has been closed")] Closed, + #[error("input would block")] + WouldBlock, } /// A non-exhaustive list of errors that can be thrown by [`StdioProvider::write_to`]. @@ -641,16 +686,230 @@ pub enum StdioStream { Stderr = 2, } +/// Errors from terminal operations on [`StdioProvider`]. +#[derive(Error, Debug)] +#[non_exhaustive] +pub enum StdioIoctlError { + /// The stream is not a terminal. + #[error("not a terminal")] + NotATerminal, + /// The operation failed with an OS error code (errno on Linux, mapped + /// equivalent on other platforms). + #[error("ioctl failed: {0}")] + OsError(i32), +} + +/// Platform-agnostic terminal attributes, mirroring the fields of Linux +/// `struct termios`. +/// +/// The shim layer translates these fields to and from the guest ABI. +/// Platform implementations fill this struct using their native APIs (e.g., +/// direct ioctl forwarding on Linux, `GetConsoleMode`/`SetConsoleMode` on +/// Windows). +#[derive(Debug, Clone)] +pub struct TerminalAttributes { + /// Input mode flags. + pub c_iflag: u32, + /// Output mode flags. + pub c_oflag: u32, + /// Control mode flags. + pub c_cflag: u32, + /// Local mode flags. + pub c_lflag: u32, + /// Line discipline (typically `0` for `N_TTY`). + pub c_line: u8, + /// Control characters. + pub c_cc: [u8; 19], +} + +// Terminal attribute flag constants. +const TERMATTR_ECHO: u32 = 0x0008; +const TERMATTR_ICRNL: u32 = 0x0100; +const TERMATTR_OPOST: u32 = 0x0001; +const TERMATTR_ONLCR: u32 = 0x0004; + +impl TerminalAttributes { + /// Default terminal attributes matching a freshly opened Linux PTY. + /// + /// These are realistic values that satisfy terminal detection in programs + /// such as Node.js Ink. **All-zero termios causes such programs to reject + /// the terminal silently.** + pub fn new_default() -> Self { + Self { + c_iflag: 0x6d02, // ICRNL | IXON | IXANY | IMAXBEL | IUTF8 + c_oflag: 0x0005, // OPOST | ONLCR + c_cflag: 0x04bf, // CS8 | CREAD | CLOCAL | B38400 + c_lflag: 0x8a3b, // ECHO | ECHOE | ECHOK | ISIG | ICANON | IEXTEN | ECHOCTL | ECHOKE + c_line: 0, // N_TTY + c_cc: [ + 0x03, 0x1c, 0x7f, 0x15, 0x04, 0x00, 0x01, 0x00, 0x11, 0x13, 0x1a, 0xff, 0x12, 0x0f, + 0x17, 0x16, 0xff, 0x00, 0x00, + ], + } + } + + /// Returns `true` if the `ECHO` local flag is set. + pub fn echo_enabled(&self) -> bool { + self.c_lflag & TERMATTR_ECHO != 0 + } + + /// Returns `true` if the `ICRNL` input flag is set. + pub fn icrnl_enabled(&self) -> bool { + self.c_iflag & TERMATTR_ICRNL != 0 + } + + /// Returns `true` if output post-processing with newline translation + /// (`OPOST | ONLCR`) is enabled. + pub fn onlcr_enabled(&self) -> bool { + (self.c_oflag & TERMATTR_OPOST != 0) && (self.c_oflag & TERMATTR_ONLCR != 0) + } +} + +/// Platform-agnostic terminal window size. +#[derive(Debug, Clone, Copy)] +pub struct WindowSize { + /// Number of rows (height in characters). + pub rows: u16, + /// Number of columns (width in characters). + pub cols: u16, + /// Horizontal size in pixels (informational, often zero). + pub xpixel: u16, + /// Vertical size in pixels (informational, often zero). + pub ypixel: u16, +} + +/// When to apply terminal attribute changes, corresponding to POSIX +/// `tcsetattr()` actions. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum SetTermiosWhen { + /// Apply immediately. + Now, + /// Drain output first, then apply. + AfterDrain, + /// Drain output first, flush pending input, then apply. + AfterDrainFlushInput, +} + /// A provider of standard input/output functionality. pub trait StdioProvider { /// Read from standard input. Returns number of bytes read. fn read_from_stdin(&self, buf: &mut [u8]) -> Result; + /// Read from standard input without blocking. + /// + /// Platforms with exact nonblocking stdin support should override this + /// instead of emulating it with a separate readiness probe. + fn read_from_stdin_nonblocking(&self, buf: &mut [u8]) -> Result { + if buf.is_empty() { + return Ok(0); + } + if !self.poll_stdin_readable() { + return Err(StdioReadError::WouldBlock); + } + self.read_from_stdin(buf) + } + /// Write to stdout/stderr. Returns number of bytes written. fn write_to(&self, stream: StdioOutStream, buf: &[u8]) -> Result; /// Check if a stream is connected to a TTY. fn is_a_tty(&self, stream: StdioStream) -> bool; + + /// Get the terminal attributes for a stdio stream. + /// + /// Platform implementations query the host terminal and populate a + /// [`TerminalAttributes`] struct. The default returns + /// [`StdioIoctlError::NotATerminal`]. + fn get_terminal_attributes( + &self, + _stream: StdioStream, + ) -> Result { + Err(StdioIoctlError::NotATerminal) + } + + /// Set the terminal attributes for a stdio stream. + /// + /// Platform implementations translate the requested attributes into native + /// terminal API calls. The default returns + /// [`StdioIoctlError::NotATerminal`]. + fn set_terminal_attributes( + &self, + _stream: StdioStream, + _attrs: &TerminalAttributes, + _when: SetTermiosWhen, + ) -> Result<(), StdioIoctlError> { + Err(StdioIoctlError::NotATerminal) + } + + /// Get the terminal window size for a stdio stream. + /// + /// The default returns [`StdioIoctlError::NotATerminal`]. + fn get_window_size(&self, _stream: StdioStream) -> Result { + Err(StdioIoctlError::NotATerminal) + } + + /// Get the number of input bytes currently readable from a terminal stream. + /// + /// Platforms that do not support terminal input-queue queries may return + /// [`StdioIoctlError::NotATerminal`]. + fn get_terminal_input_bytes(&self, _stream: StdioStream) -> Result { + Err(StdioIoctlError::NotATerminal) + } + + /// Set the terminal window size for a stdio stream. + /// + /// On some platforms this stores the size so that subsequent + /// `get_window_size` calls return the stored value (the actual console + /// is not resized). The default returns + /// [`StdioIoctlError::NotATerminal`]. + fn set_window_size( + &self, + _stream: StdioStream, + _size: &WindowSize, + ) -> Result<(), StdioIoctlError> { + Err(StdioIoctlError::NotATerminal) + } + + /// Check if stdin has data available for reading without blocking. + /// + /// Returns `true` if a `read()` on stdin would return data immediately. + /// Used by epoll/poll to report stdin readability. The default returns + /// `false`. + fn poll_stdin_readable(&self) -> bool { + false + } + + /// Cancel any pending `read_from_stdin()` call, causing it to return + /// [`StdioReadError::Closed`]. Used during process exit to unblock + /// threads waiting on stdin. The default is a no-op. + fn cancel_stdin(&self) {} + + /// Returns the host terminal device identity for stdin, if it is + /// connected to a real terminal. + /// + /// Used to report correct device info in guest-visible stat and readlink + /// operations, so that runtimes can discover and reopen the controlling + /// terminal by its actual device path. + /// + /// Returns `None` when stdin is not a terminal (pipes, files) or on + /// platforms that do not expose terminal device paths. + fn host_stdin_tty_device_info(&self) -> Option { + None + } +} + +/// Host terminal device identity, returned by +/// [`StdioProvider::host_stdin_tty_device_info`]. +#[derive(Debug, Clone)] +pub struct HostTtyDeviceInfo { + /// Device path on the host (e.g., a PTY path on Linux). + pub path: alloc::string::String, + /// Device number encoding (major/minor) from the host. + pub rdev: u64, + /// Device ID of the filesystem containing the device node. + pub dev: u64, + /// Inode number of the device node on the host. + pub ino: u64, } /// A provider for system information. @@ -666,6 +925,16 @@ pub trait SystemInfoProvider { /// Return `Some(address)` if the VDSO is available on the platform, or `None` /// if the platform does not support or provide a VDSO. fn get_vdso_address(&self) -> Option; + + /// Returns the current processor number, used to emulate `getcpu`-family + /// syscalls and related VDSO interfaces. + /// + /// Platforms that do not expose a stable processor identifier, or that + /// virtualize CPU topology, may return `0`. Callers arrive in subsequent + /// stacked PRs. + fn current_processor_number(&self) -> u32 { + 0 + } } /// A provider for thread-local storage. diff --git a/litebox/src/platform/page_mgmt.rs b/litebox/src/platform/page_mgmt.rs index c4fca057a..da9b03fd9 100644 --- a/litebox/src/platform/page_mgmt.rs +++ b/litebox/src/platform/page_mgmt.rs @@ -49,6 +49,8 @@ pub trait PageManagementProvider: RawPointerProvider { /// a page fault. /// - `populate_pages_immediately`: If `true`, the pages are populated immediately; otherwise, /// they are populated lazily. + /// - `noreserve`: If `true`, request a sparse reservation that avoids reserving swap/commit + /// upfront when the platform supports it. /// - `fixed_address_behavior`: Specifies the required semantics of `suggested_range`. /// /// # Returns @@ -64,6 +66,7 @@ pub trait PageManagementProvider: RawPointerProvider { initial_permissions: MemoryRegionPermissions, can_grow_down: bool, populate_pages_immediately: bool, + noreserve: bool, fixed_address_behavior: FixedAddressBehavior, ) -> Result, AllocationError>; @@ -108,6 +111,7 @@ pub trait PageManagementProvider: RawPointerProvider { temp_permissions, false, true, + false, FixedAddressBehavior::NoReplace, ) .map_err(|e| match e { @@ -135,12 +139,13 @@ pub trait PageManagementProvider: RawPointerProvider { let total_len = old_range.len(); let mut offset = 0; while offset < total_len { + let chunk_len = (total_len - offset).min(ALIGN); let old_ptr = ::RawConstPointer::from_usize(old_range.start + offset); new_ptr .write_slice_at_offset( isize::try_from(offset).unwrap(), - &old_ptr.to_owned_slice(old_range.len()).unwrap(), + &old_ptr.to_owned_slice(chunk_len).unwrap(), ) .unwrap(); offset += ALIGN; @@ -148,7 +153,7 @@ pub trait PageManagementProvider: RawPointerProvider { if temp_permissions != permissions { (unsafe { self.update_permissions(new_range.clone(), permissions) }) - .expect("failed to restore perrmissions on new range"); + .expect("failed to restore permissions on new range"); } (unsafe { self.deallocate_pages(old_range) }).expect("failed to deallocate old range"); diff --git a/litebox/src/sync/futex.rs b/litebox/src/sync/futex.rs index 7785b116f..eb89b0f1b 100644 --- a/litebox/src/sync/futex.rs +++ b/litebox/src/sync/futex.rs @@ -25,8 +25,10 @@ use thiserror::Error; /// A manager of all available futexes. /// -/// Note: currently, this only supports "private" futexes, since it assumes only a single process. -/// In the future, this may be expanded to support multi-process futexes. +/// Supports both private and shared futexes. Callers provide an +/// `address_space_id` discriminator to distinguish futexes at the same virtual +/// address in different address spaces. Entries are only matched (for wake) when +/// both the address and address-space ID agree. pub struct FutexManager { /// Chaining hash table to map from futex address to waiter lists. table: alloc::boxed::Box<[LoanList>; HASH_TABLE_ENTRIES]>, @@ -41,6 +43,9 @@ const HASH_TABLE_ENTRIES: usize = 256; struct FutexEntry { addr: usize, + /// Opaque discriminator distinguishing address spaces. Entries with + /// different discriminators never match, even at the same virtual address. + address_space_id: u64, waker: Waker, bitset: u32, done: AtomicBool, @@ -62,9 +67,16 @@ impl } } - /// Returns the hash table bucket for the given futex address. - fn bucket(&self, addr: usize) -> &LoanList> { - let hash: usize = self.hash_builder.hash_one(addr).truncate(); + /// Returns the hash table bucket for the given futex key. + fn bucket( + &self, + addr: usize, + address_space_id: u64, + ) -> &LoanList> { + let hash: usize = self + .hash_builder + .hash_one((addr, address_space_id)) + .truncate(); &self.table[hash % HASH_TABLE_ENTRIES] } @@ -80,12 +92,16 @@ impl /// If `bitset` is `Some`, then the waiter is only woken if the wake call's /// `bitset` has a non-zero intersection with the waiter's mask. Specifying /// `None` is equivalent to setting all bits in the mask. + /// + /// `address_space_id` is an opaque discriminator that distinguishes futexes + /// at the same virtual address in different address spaces. pub fn wait( &self, cx: &WaitContext<'_, Platform>, futex_addr: Platform::RawMutPointer, expected_value: u32, bitset: Option, + address_space_id: u64, ) -> Result<(), FutexError> { let bitset = bitset.unwrap_or(ALL_BITS).get(); let addr = futex_addr.as_usize(); @@ -93,9 +109,10 @@ impl return Err(FutexError::NotAligned); } - let bucket = self.bucket(addr); + let bucket = self.bucket(addr, address_space_id); let mut entry = pin!(LoanListEntry::new(FutexEntry { addr, + address_space_id, waker: cx.waker().clone(), bitset, done: AtomicBool::new(false), @@ -131,12 +148,16 @@ impl /// (subject to the `num_to_wake` limit). If `bitset` is `None`, then all /// waiters are eligible to be woken. /// + /// `address_space_id` must match the value passed to the corresponding + /// [`wait`](Self::wait) call. + /// /// Returns the number of waiters that were woken up. pub fn wake( &self, futex_addr: Platform::RawMutPointer, num_to_wake_up: NonZeroU32, bitset: Option, + address_space_id: u64, ) -> Result { let addr = futex_addr.as_usize(); if !addr.is_multiple_of(align_of::()) { @@ -144,10 +165,13 @@ impl } let bitset = bitset.unwrap_or(ALL_BITS).get(); let mut woken = 0; - let bucket = self.bucket(addr); + let bucket = self.bucket(addr, address_space_id); // Extract matching entries from the bucket until we've woken enough. let entries = bucket.extract_if(|entry| { - if entry.addr != addr || entry.bitset & bitset == 0 { + if entry.addr != addr + || entry.address_space_id != address_space_id + || entry.bitset & bitset == 0 + { return core::ops::ControlFlow::Continue(false); } woken += 1; @@ -185,9 +209,9 @@ mod tests { extern crate std; use super::*; - use crate::LiteBox; use crate::event::wait::WaitState; use crate::platform::mock::MockPlatform; + use crate::LiteBox; use alloc::sync::Arc; use core::num::NonZeroU32; use core::sync::atomic::{AtomicU32, Ordering}; @@ -218,7 +242,7 @@ mod tests { barrier_clone.wait(); // Sync with main thread // Wait for value 0 - futex_manager_clone.wait(&WaitState::new(platform).context(), futex_addr, 0, None) + futex_manager_clone.wait(&WaitState::new(platform).context(), futex_addr, 0, None, 0) }); barrier.wait(); // Wait for waiter to be ready @@ -231,7 +255,7 @@ mod tests { futex_word.as_ptr() as usize, ); let woken = futex_manager - .wake(futex_addr, NonZeroU32::new(1).unwrap(), None) + .wake(futex_addr, NonZeroU32::new(1).unwrap(), None, 0) .unwrap(); // Wait for waiter thread to complete @@ -270,6 +294,7 @@ mod tests { futex_addr, 0, None, + 0, ) }); @@ -283,7 +308,7 @@ mod tests { futex_word.as_ptr() as usize, ); let woken = futex_manager - .wake(futex_addr, NonZeroU32::new(1).unwrap(), None) + .wake(futex_addr, NonZeroU32::new(1).unwrap(), None, 0) .unwrap(); // Wait for waiter thread to complete @@ -324,6 +349,7 @@ mod tests { futex_addr, 0, None, + 0, ) }); waiters.push(waiter); @@ -339,7 +365,7 @@ mod tests { futex_word.as_ptr() as usize, ); let woken = futex_manager - .wake(futex_addr, NonZeroU32::new(u32::MAX).unwrap(), None) + .wake(futex_addr, NonZeroU32::new(u32::MAX).unwrap(), None, 0) .unwrap(); // Wait for all waiter threads to complete diff --git a/litebox/src/sync/mod.rs b/litebox/src/sync/mod.rs index 0778d6d15..54ba89945 100644 --- a/litebox/src/sync/mod.rs +++ b/litebox/src/sync/mod.rs @@ -29,15 +29,33 @@ pub use rwlock::{ MappedRwLockReadGuard, MappedRwLockWriteGuard, RwLock, RwLockReadGuard, RwLockWriteGuard, }; -#[cfg(not(feature = "lock_tracing"))] +#[cfg(not(any(feature = "lock_tracing", feature = "trace_fs")))] /// A convenience name for specific requirements from the platform pub trait RawSyncPrimitivesProvider: platform::RawMutexProvider + Sync + 'static {} -#[cfg(not(feature = "lock_tracing"))] +#[cfg(not(any(feature = "lock_tracing", feature = "trace_fs")))] impl RawSyncPrimitivesProvider for Platform where Platform: platform::RawMutexProvider + Sync + 'static { } +// When `trace_fs` is enabled, filesystem tracing code logs through +// `DebugLogProvider`. Since the platform type is threaded through +// `RawSyncPrimitivesProvider` in fs-related contexts, the bound is added here +// so it is available wherever the platform is used. `lock_tracing` already +// includes `DebugLogProvider`, so this branch only applies when `trace_fs` is +// enabled without `lock_tracing`. +#[cfg(all(feature = "trace_fs", not(feature = "lock_tracing")))] +/// A convenience name for specific requirements from the platform +pub trait RawSyncPrimitivesProvider: + platform::RawMutexProvider + platform::DebugLogProvider + Sync + 'static +{ +} +#[cfg(all(feature = "trace_fs", not(feature = "lock_tracing")))] +impl RawSyncPrimitivesProvider for Platform where + Platform: platform::RawMutexProvider + platform::DebugLogProvider + Sync + 'static +{ +} + #[cfg(feature = "lock_tracing")] /// A convenience name for specific requirements from the platform pub trait RawSyncPrimitivesProvider: diff --git a/litebox/src/utilities/anymap.rs b/litebox/src/utilities/anymap.rs index 68af3f8a6..96253e8fb 100644 --- a/litebox/src/utilities/anymap.rs +++ b/litebox/src/utilities/anymap.rs @@ -18,14 +18,30 @@ use alloc::boxed::Box; use core::any::{Any, TypeId}; use hashbrown::HashMap; +/// Type-erased clone function stored alongside each value. +/// +/// We cannot use `Box` because `Clone` is not +/// object-safe (its `clone` method returns `Self`). Instead we store a +/// function pointer that knows the concrete type and can clone through the +/// trait object. +type CloneFn = fn(&(dyn Any + Send + Sync)) -> Box; + /// A safe store of exactly one value of any type `T`. pub(crate) struct AnyMap { // Invariant: the value at a particular typeid is guaranteed to be the correct type boxed up. - storage: HashMap>, + storage: HashMap, CloneFn)>, } const GUARANTEED: &str = "guaranteed correct type by invariant"; +/// Create a clone function for a specific concrete type. +fn make_clone_fn() -> CloneFn { + |val: &(dyn Any + Send + Sync)| -> Box { + let concrete = val.downcast_ref::().expect(GUARANTEED); + Box::new(concrete.clone()) + } +} + impl AnyMap { /// Create a new empty `AnyMap` pub(crate) fn new() -> Self { @@ -35,20 +51,26 @@ impl AnyMap { } /// Insert `v`, replacing and returning the old value if one existed already. - pub(crate) fn insert(&mut self, v: T) -> Option { - let old = self.storage.insert(TypeId::of::(), Box::new(v))?; - Some(*old.downcast().expect(GUARANTEED)) + /// + /// The `Clone` bound is required to capture a type-erased clone function + /// at insertion time. Read-only accessors (`get`, `get_mut`, `remove`) do + /// not require `Clone`. + pub(crate) fn insert(&mut self, v: T) -> Option { + let old = self + .storage + .insert(TypeId::of::(), (Box::new(v), make_clone_fn::()))?; + Some(*old.0.downcast().expect(GUARANTEED)) } /// Get a reference to a value of type `T` if it exists. pub(crate) fn get(&self) -> Option<&T> { - let v = self.storage.get(&TypeId::of::())?; + let v = &self.storage.get(&TypeId::of::())?.0; Some(v.downcast_ref().expect(GUARANTEED)) } /// Get a mutable reference to a value of type `T` if it exists. pub(crate) fn get_mut(&mut self) -> Option<&mut T> { - let v = self.storage.get_mut(&TypeId::of::())?; + let v = &mut self.storage.get_mut(&TypeId::of::())?.0; Some(v.downcast_mut().expect(GUARANTEED)) } @@ -58,7 +80,67 @@ impl AnyMap { )] /// Remove and return the value of type `T` if it exists. pub(crate) fn remove(&mut self) -> Option { - let v = self.storage.remove(&TypeId::of::())?; + let v = self.storage.remove(&TypeId::of::())?.0; Some(*v.downcast().expect(GUARANTEED)) } } + +impl Clone for AnyMap { + fn clone(&self) -> Self { + Self { + storage: self + .storage + .iter() + .map(|(&type_id, (val, clone_fn))| (type_id, (clone_fn(val.as_ref()), *clone_fn))) + .collect(), + } + } +} + +#[cfg(test)] +mod tests { + use alloc::string::String; + + use super::AnyMap; + + #[test] + fn insert_and_get() { + let mut map = AnyMap::new(); + assert!(map.insert(42u32).is_none()); + assert_eq!(map.get::(), Some(&42)); + } + + #[test] + fn clone_produces_independent_copy() { + let mut original = AnyMap::new(); + original.insert(10u32); + original.insert(String::from("hello")); + + let mut cloned = original.clone(); + + // Cloned values match. + assert_eq!(cloned.get::(), Some(&10)); + assert_eq!(cloned.get::().map(String::as_str), Some("hello")); + + // Mutating the clone does not affect the original. + *cloned.get_mut::().unwrap() = 99; + assert_eq!(original.get::(), Some(&10)); + assert_eq!(cloned.get::(), Some(&99)); + } + + #[test] + fn cloned_map_can_be_cloned_again() { + let mut map = AnyMap::new(); + map.insert(7u64); + let clone1 = map.clone(); + let clone2 = clone1.clone(); + assert_eq!(clone2.get::(), Some(&7)); + } + + #[test] + fn clone_empty_map() { + let map = AnyMap::new(); + let cloned = map.clone(); + assert_eq!(cloned.get::(), None); + } +} diff --git a/litebox_common_linux/src/loader.rs b/litebox_common_linux/src/loader.rs index 3ae61266e..6b1fcc88b 100644 --- a/litebox_common_linux/src/loader.rs +++ b/litebox_common_linux/src/loader.rs @@ -128,6 +128,8 @@ pub enum ElfParseError { BadTrampoline, #[error("Invalid trampoline version")] BadTrampolineVersion, + #[error("Binary not patched for syscall rewriting")] + UnpatchedBinary, #[error("Unsupported ELF type")] UnsupportedType, #[error("Bad interpreter")] @@ -141,6 +143,7 @@ impl> From> for Errno { | ElfParseError::BadFormat | ElfParseError::BadTrampoline | ElfParseError::BadTrampolineVersion + | ElfParseError::UnpatchedBinary | ElfParseError::BadInterp | ElfParseError::UnsupportedType => Errno::ENOEXEC, ElfParseError::Io(err) => err.into(), @@ -218,6 +221,11 @@ impl ElfParsedFile { }) } + /// Returns `true` if a trampoline was parsed and will be mapped by `load()`. + pub fn has_trampoline(&self) -> bool { + self.trampoline.is_some() + } + /// Parse the LiteBox trampoline data, if any. /// /// The trampoline header is located at the end of the file (last 32/20 bytes). @@ -251,7 +259,8 @@ impl ElfParsedFile { // File must be large enough to contain the header if file_size < header_size as u64 { - return Ok(()); + // Too small for a trampoline header — binary is unpatched. + return Err(ElfParseError::UnpatchedBinary); } // Read the header from the end of the file @@ -267,8 +276,9 @@ impl ElfParsedFile { if &header_buf[0..7] == b"LITEBOX" { return Err(ElfParseError::BadTrampolineVersion); } - // No trampoline found, which is OK (not all binaries are rewritten) - return Ok(()); + // No trampoline found. When using the syscall rewriter backend + // (syscall_entry_point != 0), all binaries must be patched. + return Err(ElfParseError::UnpatchedBinary); } let (file_offset, vaddr, trampoline_size) = if cfg!(target_pointer_width = "64") { @@ -293,9 +303,11 @@ impl ElfParsedFile { ) }; - // Validate trampoline size + // trampoline_size == 0 means the rewriter checked this binary and found + // no syscall instructions. The magic header acts as a "checked" marker so + // the runtime skips eager code-segment patching. No trampoline to map. if trampoline_size == 0 { - return Err(ElfParseError::BadTrampoline); + return Ok(()); } // Verify the file offset is page-aligned (as required by the rewriter) @@ -567,6 +579,24 @@ pub trait ReadAt { fn size(&mut self) -> Result; } +impl ReadAt for &[u8] { + type Error = Errno; + + fn read_at(&mut self, offset: u64, buf: &mut [u8]) -> Result<(), Self::Error> { + let offset: usize = offset.truncate(); + let end = offset.checked_add(buf.len()).ok_or(Errno::ENODATA)?; + if end > self.len() { + return Err(Errno::ENODATA); + } + buf.copy_from_slice(&self[offset..end]); + Ok(()) + } + + fn size(&mut self) -> Result { + Ok(self.len() as u64) + } +} + pub trait MapMemory { type Error; diff --git a/litebox_packager/build.rs b/litebox_packager/build.rs deleted file mode 100644 index 77956be92..000000000 --- a/litebox_packager/build.rs +++ /dev/null @@ -1,43 +0,0 @@ -// Copyright (c) Microsoft Corporation. -// Licensed under the MIT license. - -use std::path::PathBuf; - -const RTLD_AUDIT_DIR: &str = "../litebox_rtld_audit"; - -fn main() { - let target_arch = std::env::var("CARGO_CFG_TARGET_ARCH").unwrap(); - if target_arch != "x86_64" { - return; - } - - let out_dir = PathBuf::from(std::env::var("OUT_DIR").unwrap()); - let mut make_cmd = std::process::Command::new("make"); - make_cmd - .current_dir(RTLD_AUDIT_DIR) - .env("OUT_DIR", &out_dir) - .env("ARCH", &target_arch); - // Always build without DEBUG for the packager -- packaged binaries are - // release artifacts. - make_cmd.env_remove("DEBUG"); - // Force rebuild in case a stale artifact exists from a different config. - let _ = std::fs::remove_file(out_dir.join("litebox_rtld_audit.so")); - - let output = make_cmd - .output() - .expect("Failed to execute make for rtld_audit"); - assert!( - output.status.success(), - "failed to build rtld_audit.so via make:\nstdout: {}\nstderr: {}", - String::from_utf8_lossy(&output.stdout), - String::from_utf8_lossy(&output.stderr), - ); - assert!( - out_dir.join("litebox_rtld_audit.so").exists(), - "Build failed to create litebox_rtld_audit.so" - ); - - println!("cargo:rerun-if-changed={RTLD_AUDIT_DIR}/rtld_audit.c"); - println!("cargo:rerun-if-changed={RTLD_AUDIT_DIR}/Makefile"); - println!("cargo:rerun-if-changed=build.rs"); -} diff --git a/litebox_packager/src/lib.rs b/litebox_packager/src/lib.rs index 0b5490a6a..08080122b 100644 --- a/litebox_packager/src/lib.rs +++ b/litebox_packager/src/lib.rs @@ -1,21 +1,37 @@ // Copyright (c) Microsoft Corporation. // Licensed under the MIT license. -// Restrict this crate to only work on Linux, as it relies on `ldd` for -// dependency discovery and other Linux-specific functionality. -#![cfg(target_os = "linux")] - #[cfg(target_arch = "x86_64")] pub mod oci; use anyhow::{Context, bail}; use clap::Parser; use rayon::prelude::*; -use std::collections::{BTreeMap, BTreeSet}; -use std::os::unix::fs::MetadataExt as _; +#[cfg(target_os = "linux")] +use std::collections::BTreeMap; +use std::collections::BTreeSet; use std::path::{Path, PathBuf}; use tar::{Builder, Header}; +/// Return Unix permission mode bits for a file. +/// +/// On Unix this returns the real mode from metadata. On other platforms it +/// returns 0o755 for files with a read-only attribute cleared, 0o644 otherwise. +#[cfg(unix)] +fn file_mode(metadata: &std::fs::Metadata) -> u32 { + use std::os::unix::fs::MetadataExt as _; + metadata.mode() +} + +#[cfg(not(unix))] +fn file_mode(metadata: &std::fs::Metadata) -> u32 { + if metadata.permissions().readonly() { + 0o644 + } else { + 0o755 + } +} + /// Package Linux ELF programs for execution under LiteBox. /// /// Discovers shared library dependencies, rewrites all ELF files using the @@ -54,6 +70,14 @@ pub struct CliArgs { #[arg(long = "include", value_name = "HOST_PATH:TAR_PATH")] pub include: Vec, + /// Include extra ELF files in the tar **with** syscall rewriting. + /// Use this for shared libraries that are loaded at runtime via `dlopen` + /// (e.g., NSS modules like `libnss_dns.so.2`) and therefore not discovered + /// by the automatic dependency scan. + /// Format: HOST_PATH:TAR_PATH (same as `--include`). + #[arg(long = "rewrite-include", value_name = "HOST_PATH:TAR_PATH")] + pub rewrite_include: Vec, + /// Skip rewriting specific files (by their absolute path on the host). #[arg(long = "no-rewrite", value_name = "PATH")] pub no_rewrite: Vec, @@ -99,7 +123,24 @@ pub fn run(args: CliArgs) -> anyhow::Result<()> { } } - // --- Phase 1: Validate inputs --- + // Host mode (local ELF files + ldd dependency discovery) is Linux-only. + #[cfg(target_os = "linux")] + { + run_host_mode(args) + } + + #[cfg(not(target_os = "linux"))] + { + bail!( + "Host mode (local ELF files) is only supported on Linux. \ + Use --oci-image to pull a container image instead." + ); + } +} + +/// Host mode: package local ELF files with ldd-based dependency discovery. +#[cfg(target_os = "linux")] +fn run_host_mode(args: CliArgs) -> anyhow::Result<()> { let input_files: Vec = args .input_files .iter() @@ -151,12 +192,13 @@ pub fn run(args: CliArgs) -> anyhow::Result<()> { let par_results: Vec>> = file_map_vec .into_par_iter() - .map(|(real_path, tar_paths)| { + .map(|(real_path, tar_paths): (&PathBuf, &Vec)| { let data = std::fs::read(real_path) .with_context(|| format!("failed to read {}", real_path.display()))?; - let mode = std::fs::metadata(real_path) - .with_context(|| format!("failed to stat {}", real_path.display()))? - .mode(); + let mode = file_mode( + &std::fs::metadata(real_path) + .with_context(|| format!("failed to stat {}", real_path.display()))?, + ); let rewritten = if no_rewrite.contains(real_path) { if verbose { @@ -208,7 +250,12 @@ fn run_oci(image_ref: &str, args: &CliArgs) -> anyhow::Result<()> { // --- Phase 2: Scan rootfs for files --- eprintln!("Scanning rootfs..."); - let file_map = oci::scan_rootfs(&extracted.rootfs_path, args.verbose)?; + let file_map = oci::scan_rootfs( + &extracted.rootfs_path, + &extracted.symlink_map, + &extracted.permissions, + args.verbose, + )?; let no_rewrite: BTreeSet = args .no_rewrite @@ -309,11 +356,11 @@ fn run_oci(image_ref: &str, args: &CliArgs) -> anyhow::Result<()> { } // --------------------------------------------------------------------------- -// Shared finalization: includes, rtld audit injection, tar build, size report +// Shared finalization: includes, tar build, size report // --------------------------------------------------------------------------- -/// Append `--include` files, inject the rtld audit library, build the output -/// tar, and print a size summary. +/// Append `--include` and `--rewrite-include` files, build the output tar, +/// and print a size summary. /// /// Both host mode and OCI mode call this after producing their rewritten /// `TarEntry` list. @@ -342,7 +389,7 @@ fn finalize_tar( let data = std::fs::read(&inc.host_path) .with_context(|| format!("failed to read included file {}", inc.host_path.display()))?; let mode = std::fs::metadata(&inc.host_path) - .map(|m| m.mode()) + .map(|m| file_mode(&m)) .unwrap_or(0o644); if args.verbose { eprintln!( @@ -358,20 +405,43 @@ fn finalize_tar( }); } - // Include the rtld audit library so the rewriter backend can load it. - #[cfg(target_arch = "x86_64")] - { - const RTLD_AUDIT_TAR_PATH: &str = "lib/litebox_rtld_audit.so"; - if !added_tar_paths.insert(RTLD_AUDIT_TAR_PATH.to_string()) { + // Include extra ELF files **with** rewriting (for dlopen'd libraries). + let rewrite_includes: Vec = args + .rewrite_include + .iter() + .map(|s| parse_include(s)) + .collect::>>()?; + + for inc in &rewrite_includes { + if !inc.host_path.exists() { bail!( - "tar already contains {RTLD_AUDIT_TAR_PATH} -- \ - remove the conflicting entry or use --no-rewrite" + "rewrite-included file does not exist: {}", + inc.host_path.display() + ); + } + if !added_tar_paths.insert(inc.tar_path.clone()) { + bail!( + "duplicate tar path from --rewrite-include: '{}' (already present)", + inc.tar_path + ); + } + let data = std::fs::read(&inc.host_path) + .with_context(|| format!("failed to read {}", inc.host_path.display()))?; + let mode = std::fs::metadata(&inc.host_path) + .map(|m| file_mode(&m)) + .unwrap_or(0o755); + let rewritten = rewrite_elf(&data, &inc.host_path, args.verbose)?; + if args.verbose { + eprintln!( + " rewrite-including {} as {}", + inc.host_path.display(), + inc.tar_path ); } tar_entries.push(TarEntry { - tar_path: RTLD_AUDIT_TAR_PATH.to_string(), - data: include_bytes!(concat!(env!("OUT_DIR"), "/litebox_rtld_audit.so")).to_vec(), - mode: 0o755, + tar_path: inc.tar_path.clone(), + data: rewritten, + mode, }); } @@ -395,20 +465,23 @@ fn finalize_tar( } // --------------------------------------------------------------------------- -// Dependency discovery (via ldd) +// Dependency discovery (via ldd) — Linux only // --------------------------------------------------------------------------- +#[cfg(target_os = "linux")] struct ResolvedDep { ldd_path: PathBuf, real_path: PathBuf, } +#[cfg(target_os = "linux")] struct DepDiscoveryResult { resolved: Vec, missing: Vec, } /// Run `ldd` on the given ELF and return resolved dependencies. +#[cfg(target_os = "linux")] fn find_dependencies(elf_path: &Path, verbose: bool) -> anyhow::Result { let output = std::process::Command::new("ldd") .arg(elf_path) @@ -500,6 +573,7 @@ fn find_dependencies(elf_path: &Path, verbose: bool) -> anyhow::Result anyhow::Result bail!( + "{} is a Bun-packaged executable and cannot be packaged as-is: \ + tar-loaded programs must already contain LiteBox syscall trampolines", + path.display() + ), Err(litebox_syscall_rewriter::Error::NoTextSectionFound) => { if verbose { eprintln!( @@ -619,15 +698,6 @@ fn rewrite_elf(data: &[u8], path: &Path, verbose: bool) -> anyhow::Result { - if verbose { - eprintln!( - " warning: {} is a Bun-packaged executable, using as-is", - path.display() - ); - } - Ok(data.to_vec()) - } Err(e) => Err(e).with_context(|| format!("failed to rewrite {}", path.display())), } } @@ -648,7 +718,7 @@ fn build_tar(entries: &[TarEntry], output: &Path) -> anyhow::Result<()> { let mut builder = Builder::new(file); for entry in entries { - let mut header = Header::new_gnu(); + let mut header = Header::new_ustar(); header.set_size(entry.data.len() as u64); // Mask to permission bits only (rwxrwxrwx). The full st_mode from // MetadataExt::mode() includes file type bits (e.g., 0o100755) which @@ -666,3 +736,17 @@ fn build_tar(entries: &[TarEntry], output: &Path) -> anyhow::Result<()> { builder.finish().context("failed to finalize tar archive")?; Ok(()) } + +#[cfg(test)] +mod tests { + use super::rewrite_elf; + use std::path::Path; + + #[test] + fn rewrite_elf_skips_non_elf_files() { + // Non-ELF data should be returned unmodified. + let data = b"#!/bin/sh\necho hello\n"; + let result = rewrite_elf(data, Path::new("/tmp/script.sh"), false).unwrap(); + assert_eq!(result, data); + } +} diff --git a/litebox_packager/src/main.rs b/litebox_packager/src/main.rs index 2acb1167d..01987d6e8 100644 --- a/litebox_packager/src/main.rs +++ b/litebox_packager/src/main.rs @@ -1,18 +1,8 @@ // Copyright (c) Microsoft Corporation. // Licensed under the MIT license. -// Restrict this crate to only work on Linux, as it relies on `ldd` for -// dependency discovery and other Linux-specific functionality. - -#[cfg(target_os = "linux")] fn main() -> anyhow::Result<()> { use clap::Parser as _; use litebox_packager::CliArgs; litebox_packager::run(CliArgs::parse()) } - -#[cfg(not(target_os = "linux"))] -fn main() { - eprintln!("This program is only supported on Linux"); - std::process::exit(1); -} diff --git a/litebox_packager/src/oci.rs b/litebox_packager/src/oci.rs index adb951833..28d65ff4e 100644 --- a/litebox_packager/src/oci.rs +++ b/litebox_packager/src/oci.rs @@ -7,9 +7,8 @@ //! extracts its filesystem layers into a temporary rootfs directory, then //! walks the rootfs to discover all ELF files for syscall rewriting. -use std::collections::{BTreeMap, HashSet}; +use std::collections::{BTreeMap, HashMap, HashSet}; use std::io::Read; -use std::os::unix::fs::PermissionsExt as _; use std::path::{Path, PathBuf}; use anyhow::Context; @@ -38,6 +37,13 @@ pub struct ExtractedImage { pub config: ImageConfig, /// Raw OCI image config JSON blob (the full config descriptor data). pub config_json: Vec, + /// Symlink map from layer extraction: maps relative paths inside the + /// rootfs to their (Unix-style) link targets for cross-platform resolution. + pub symlink_map: HashMap, + /// Unix permission modes captured from tar headers during extraction. + /// Keyed by relative path inside the rootfs. Used instead of querying + /// filesystem metadata, which loses Unix mode bits on non-Unix hosts. + pub permissions: HashMap, } /// Result of scanning an extracted rootfs for files to package. @@ -96,6 +102,17 @@ pub fn pull_and_extract(image_ref: &str, verbose: bool) -> anyhow::Result anyhow::Result = Vec::new(); + let mut permissions: HashMap = HashMap::new(); for (i, layer) in image_data.layers.iter().enumerate() { if verbose { eprintln!( @@ -143,10 +162,29 @@ pub fn pull_and_extract(image_ref: &str, verbose: bool) -> anyhow::Result = symlinks + .iter() + .map(|s| (s.rel_path.clone(), s.link_target.clone())) + .collect(); + + // Materialize symlinks cross-platform: resolve chains through the in-memory + // map and copy target files (or create directories) instead of OS symlinks. + if verbose { + eprintln!(" Resolving {} symlinks...", symlinks.len()); + } + materialize_symlinks(&symlink_map, &rootfs_path, &mut permissions, verbose)?; + if verbose { eprintln!(" Rootfs extracted to {}", rootfs_path.display()); } @@ -188,6 +226,8 @@ pub fn pull_and_extract(image_ref: &str, verbose: bool) -> anyhow::Result String { /// Extract a single OCI layer (tar or tar+gzip) into the rootfs directory. /// /// Handles OCI whiteout files (`.wh.*` prefixed entries) which indicate -/// files deleted in upper layers. -fn extract_layer(data: &[u8], media_type: &str, rootfs: &Path) -> anyhow::Result<()> { +/// files deleted in upper layers. Symlinks are collected into `symlinks` for +/// cross-platform resolution after all layers are extracted. Permission modes +/// from tar headers are recorded in `permissions` for cross-platform use. +fn extract_layer( + data: &[u8], + media_type: &str, + rootfs: &Path, + symlinks: &mut Vec, + permissions: &mut HashMap, +) -> anyhow::Result<()> { // Determine if the layer is gzipped let is_gzip = media_type.contains("gzip") || is_gzip_data(data); if is_gzip { let decoder = flate2::read::GzDecoder::new(data); - extract_tar(decoder, rootfs) + extract_tar(decoder, rootfs, symlinks, permissions) } else { - extract_tar(data, rootfs) + extract_tar(data, rootfs, symlinks, permissions) } } @@ -304,13 +352,32 @@ struct DeferredHardLink { target: PathBuf, /// Source path inside the rootfs (the file the hard link points to). link_source: PathBuf, + /// Original link name from the tar header (used for permission lookup). + link_name: PathBuf, +} + +/// Tracked symlink from a container image layer. +struct DeferredSymlink { + /// Relative path inside the rootfs (e.g., `usr/lib64/ld-linux-x86-64.so.2`). + rel_path: PathBuf, + /// Symlink target as stored in the tar (Unix-style, may be relative or absolute). + link_target: PathBuf, } /// Extract a tar archive into the rootfs, handling OCI whiteout files. /// -/// Hard links whose targets appear later in the archive are collected during -/// the first pass and resolved after all regular entries have been extracted. -fn extract_tar(reader: R, rootfs: &Path) -> anyhow::Result<()> { +/// Symlinks are NOT created as OS symlinks. Instead they are tracked in +/// `symlinks` so the caller can resolve them cross-platform after all layers +/// are extracted. Hard links whose targets appear later in the archive are +/// collected during the first pass and resolved after all regular entries +/// have been extracted. Permission modes from tar headers are recorded in +/// `permissions` keyed by relative path. +fn extract_tar( + reader: R, + rootfs: &Path, + symlinks: &mut Vec, + permissions: &mut HashMap, +) -> anyhow::Result<()> { let mut archive = tar::Archive::new(reader); archive.set_preserve_permissions(true); archive.set_unpack_xattrs(true); @@ -340,17 +407,31 @@ fn extract_tar(reader: R, rootfs: &Path) -> anyhow::Result<()> { } } } + // Also prune in-memory symlinks under this directory so + // they are not resurrected by materialize_symlinks. + symlinks.retain(|s| !s.rel_path.starts_with(parent)); + // Prune permissions for files under the cleared directory. + permissions.retain(|p, _| !p.starts_with(parent)); } continue; } if let Some(target_name) = file_name.strip_prefix(".wh.") { // Regular whiteout: delete the specific file/directory if let Some(parent) = path.parent() { - let target = rootfs.join(parent).join(target_name); + let whiteout_rel = parent.join(target_name); + let target = rootfs.join(&whiteout_rel); if target.is_dir() { let _ = std::fs::remove_dir_all(&target); + // Prune symlinks under the removed directory. + symlinks.retain(|s| !s.rel_path.starts_with(&whiteout_rel)); + // Prune permissions under the removed directory. + permissions.retain(|p, _| !p.starts_with(&whiteout_rel)); } else { let _ = std::fs::remove_file(&target); + // Prune the exact symlink entry if present. + symlinks.retain(|s| s.rel_path != whiteout_rel); + // Prune the exact permissions entry. + permissions.remove(&whiteout_rel); } } continue; @@ -364,11 +445,12 @@ fn extract_tar(reader: R, rootfs: &Path) -> anyhow::Result<()> { std::fs::create_dir_all(parent)?; } + let entry_type = entry.header().entry_type(); + // Handle hard links: copy the link target instead of creating an OS // hard link. The tar crate's unpack() tries std::fs::hard_link which // can fail if the target hasn't been extracted yet (ordering issue), // and the litebox filesystem doesn't support hard links anyway. - let entry_type = entry.header().entry_type(); if entry_type == tar::EntryType::Link { let link_name = entry .link_name()? @@ -383,20 +465,50 @@ fn extract_tar(reader: R, rootfs: &Path) -> anyhow::Result<()> { target.display() ) })?; + // Copy permission mode from the link source. + let link_rel = normalize_path(&link_name); + if let Some(&mode) = permissions.get(&link_rel) { + permissions.insert(path.clone(), mode); + } } else { // Target hasn't been extracted yet — defer to second pass. deferred_links.push(DeferredHardLink { target, link_source, + link_name: link_name.clone(), }); } continue; } - // Normal file/directory/symlink: use the standard unpack + // Track symlinks in memory instead of creating OS symlinks. + // OS symlinks on Windows require special privileges and don't handle + // Unix-style relative paths reliably, so we resolve them ourselves + // after all layers are extracted. + if entry_type == tar::EntryType::Symlink { + let link_target = entry + .link_name()? + .context("symlink entry has no link name")? + .into_owned(); + // A later layer may override this symlink, so remove any stale + // entry with the same rel_path. + symlinks.retain(|s| s.rel_path != path); + symlinks.push(DeferredSymlink { + rel_path: path.clone(), + link_target, + }); + continue; + } + + // Normal file/directory: use the standard unpack entry .unpack(&target) .with_context(|| format!("failed to unpack entry: {path_str}"))?; + + // Record the permission mode from the tar header for cross-platform use. + if let Ok(mode) = entry.header().mode() { + permissions.insert(path.clone(), mode); + } } // Second pass: resolve deferred hard links now that all entries are extracted. @@ -412,6 +524,12 @@ fn extract_tar(reader: R, rootfs: &Path) -> anyhow::Result<()> { link.target.display() ) })?; + // Copy permission mode from the link source. + let link_rel = normalize_path(&link.link_name); + if let Some(&mode) = permissions.get(&link_rel) { + let target_rel = link.target.strip_prefix(rootfs).unwrap_or(&link.target); + permissions.insert(target_rel.to_path_buf(), mode); + } } else { // Target still doesn't exist after the full layer extraction — // this is unusual but not fatal; warn and skip. @@ -426,20 +544,244 @@ fn extract_tar(reader: R, rootfs: &Path) -> anyhow::Result<()> { Ok(()) } +/// Resolve a symlink target within the rootfs using the symlink map. +/// +/// Handles both absolute targets (e.g., `/lib/x86_64-linux-gnu/ld.so`) and +/// relative targets (e.g., `../lib/x86_64-linux-gnu/ld.so`). Follows symlink +/// chains up to `max_depth` hops. +fn resolve_symlink_in_rootfs( + rel_path: &Path, + rootfs: &Path, + symlink_map: &HashMap, + max_depth: u32, +) -> Option { + if max_depth == 0 { + return None; + } + + // Empty rel_path would resolve to the rootfs directory itself — treat + // as unresolvable to avoid accidentally matching the entire rootfs. + if rel_path.as_os_str().is_empty() { + return None; + } + + // Check if this rel_path is itself a symlink + if let Some(link_target) = symlink_map.get(rel_path) { + // Resolve the target to a new rel_path + let resolved_rel = if is_unix_absolute(link_target) { + strip_unix_root(link_target) + } else { + // Relative target: resolve from parent of the symlink + let parent = rel_path.parent().unwrap_or(Path::new("")); + normalize_path(&parent.join(link_target)) + }; + // Recurse to follow chains + return resolve_symlink_in_rootfs(&resolved_rel, rootfs, symlink_map, max_depth - 1); + } + + // Not a symlink — check if any ancestor is a symlink (e.g., `lib64/foo` where + // `lib64` → `usr/lib64`). + let components: Vec<_> = rel_path.components().collect(); + for i in 1..components.len() { + let prefix: PathBuf = components[..i].iter().collect(); + if let Some(link_target) = symlink_map.get(&prefix) { + let resolved_prefix = if is_unix_absolute(link_target) { + strip_unix_root(link_target) + } else { + let parent = prefix.parent().unwrap_or(Path::new("")); + normalize_path(&parent.join(link_target)) + }; + let suffix: PathBuf = components[i..].iter().collect(); + let new_rel = resolved_prefix.join(suffix); + return resolve_symlink_in_rootfs(&new_rel, rootfs, symlink_map, max_depth - 1); + } + } + + let host_path = rootfs.join(rel_path); + if host_path.exists() { + Some(host_path) + } else { + None + } +} + +/// Check if a path starts with `/` (Unix-style absolute). +/// +/// On Windows, `Path::is_absolute()` requires a drive letter, so Unix-style +/// paths like `/lib/foo` are not detected as absolute. This helper checks +/// the raw string instead. +fn is_unix_absolute(path: &Path) -> bool { + path.as_os_str() + .to_str() + .is_some_and(|s| s.starts_with('/')) + || path.is_absolute() +} + +/// Strip the leading `/` from a Unix-style absolute path to make it +/// rootfs-relative. Returns the path unchanged if it doesn't start with `/`. +fn strip_unix_root(path: &Path) -> PathBuf { + if let Some(stripped) = path.as_os_str().to_str().and_then(|s| s.strip_prefix('/')) { + return PathBuf::from(stripped); + } + path.strip_prefix("/").unwrap_or(path).to_path_buf() +} + +/// Normalize a path by resolving `.` and `..` components without touching the +/// filesystem (no symlink resolution, no existence checks). Strips any root +/// component so the result is always a relative path. +fn normalize_path(path: &Path) -> PathBuf { + let mut result = Vec::new(); + for component in path.components() { + match component { + std::path::Component::ParentDir => { + result.pop(); + } + std::path::Component::CurDir | std::path::Component::RootDir => {} + c => result.push(c), + } + } + result.iter().collect() +} + +/// Materialize all deferred symlinks by copying or creating directories. +/// +/// This is called after all OCI layers have been extracted, so every real file +/// should be on disk. Symlinks are resolved through the in-memory map (handling +/// chains like `lib64` → `usr/lib64` → real dir) and then: +/// - File symlinks: the target file is copied to the symlink location. +/// The resolved target's permission mode is also recorded for the symlink path. +/// - Directory symlinks: an empty directory is created (its contents will be +/// expanded by `scan_rootfs`'s dir-symlink logic). +fn materialize_symlinks( + symlink_map: &HashMap, + rootfs: &Path, + permissions: &mut HashMap, + verbose: bool, +) -> anyhow::Result<()> { + for (rel_path, link_target) in symlink_map { + let host_path = rootfs.join(rel_path); + if host_path.exists() { + // A later layer may have replaced the symlink with a real file. + continue; + } + + if let Some(resolved) = resolve_symlink_in_rootfs( + rel_path, + rootfs, + symlink_map, + 32, // max chain depth + ) { + if let Some(parent) = host_path.parent() { + std::fs::create_dir_all(parent)?; + } + + if resolved.is_dir() { + // Directory symlink: create directory placeholder. + // scan_rootfs will discover this is a "dir symlink" and expand + // it through the symlink_map. + std::fs::create_dir_all(&host_path)?; + if verbose { + eprintln!( + " [symlink→dir] {} -> {}", + rel_path.display(), + link_target.display() + ); + } + } else if resolved.is_file() { + std::fs::copy(&resolved, &host_path).with_context(|| { + format!( + "failed to materialize symlink {} -> {}", + rel_path.display(), + resolved.display() + ) + })?; + // Record the resolved target's permission mode for this symlink path. + let resolved_rel = resolved + .strip_prefix(rootfs) + .unwrap_or(&resolved) + .to_path_buf(); + if let Some(&mode) = permissions.get(&resolved_rel) { + permissions.insert(rel_path.clone(), mode); + } + if verbose { + eprintln!( + " [symlink→file] {} -> {}", + rel_path.display(), + link_target.display() + ); + } + } + } else if verbose { + eprintln!( + " [symlink-broken] {} -> {} (unresolvable)", + rel_path.display(), + link_target.display() + ); + } + } + + Ok(()) +} + +/// Look up the Unix permission mode for a file. +/// +/// Prefers the tar-header–derived `permissions` map (keyed by rootfs-relative +/// path) which is accurate on all platforms. Falls back to `file_mode()` on +/// the host path (accurate on Unix, heuristic on Windows), and finally +/// defaults to 0o644 if neither source is available. +fn lookup_mode(rel_path: &Path, host_path: &Path, permissions: &HashMap) -> u32 { + if let Some(&mode) = permissions.get(rel_path) { + return mode & 0o7777; + } + if let Ok(metadata) = std::fs::metadata(host_path) { + return super::file_mode(&metadata) & 0o7777; + } + 0o644 +} + /// Scan an extracted rootfs directory and build a file map for packaging. /// /// Walks the rootfs directory tree and collects all regular files with their -/// paths and permission bits. Symlinks are resolved within the rootfs context -/// and flattened into regular file copies (the litebox tar RO filesystem does -/// not support symlinks). +/// paths and permission bits. After `materialize_symlinks` has been called, +/// file symlinks are already materialized as regular file copies on disk. +/// +/// `symlink_map` provides the original symlink mapping from extraction so +/// that **directory symlinks** (e.g., `lib64` → `usr/lib64`) can be expanded: +/// all files under the target directory are duplicated under the symlink's +/// path prefix so that paths like `lib64/ld-linux-x86-64.so.2` exist in the tar. /// -/// **Directory symlinks** (e.g., `/lib64` → `/usr/lib64`) are expanded: all -/// files under the target directory are duplicated under the symlink's path -/// prefix so that paths like `/lib64/ld-linux-x86-64.so.2` exist in the tar. -pub fn scan_rootfs(rootfs: &Path, verbose: bool) -> anyhow::Result { +/// `permissions` provides Unix permission modes captured from tar headers +/// during extraction, so permission bits are accurate on non-Unix hosts. +#[allow(clippy::implicit_hasher)] +pub fn scan_rootfs( + rootfs: &Path, + symlink_map: &HashMap, + permissions: &HashMap, + verbose: bool, +) -> anyhow::Result { let mut files = BTreeMap::new(); - // Collect directory symlinks to expand after the initial walk. + + // Identify directory symlinks and their resolved targets on disk. let mut dir_symlinks: Vec<(PathBuf, PathBuf)> = Vec::new(); + for (rel_path, link_target) in symlink_map { + let host_path = rootfs.join(rel_path); + if host_path.is_dir() { + // This dir symlink was materialized as an empty directory. + // Resolve the target to find the real directory to expand from. + if let Some(resolved) = + resolve_symlink_in_rootfs(rel_path, rootfs, symlink_map, 32).filter(|r| r.is_dir()) + { + if verbose { + eprintln!( + " [dir-symlink] {} -> {}", + rel_path.display(), + link_target.display() + ); + } + dir_symlinks.push((host_path, resolved)); + } + } + } for entry in walkdir::WalkDir::new(rootfs) .follow_links(false) @@ -454,10 +796,11 @@ pub fn scan_rootfs(rootfs: &Path, verbose: bool) -> anyhow::Result anyhow::Result {}", resolved.display()); } @@ -552,6 +890,8 @@ pub fn scan_rootfs(rootfs: &Path, verbose: bool) -> anyhow::Result anyhow::Result Option usr/lib64, and rootfs/usr/lib64/libc.so exists on disk. + let tmp = tempfile::tempdir().unwrap(); + let rootfs = tmp.path(); + std::fs::create_dir_all(rootfs.join("usr/lib64")).unwrap(); + std::fs::write(rootfs.join("usr/lib64/libc.so"), b"fake").unwrap(); + + let mut symlink_map = HashMap::new(); + symlink_map.insert(PathBuf::from("lib64"), PathBuf::from("usr/lib64")); + + // Resolving "lib64" itself should follow to rootfs/usr/lib64 (dir). + let resolved = resolve_symlink_in_rootfs(Path::new("lib64"), rootfs, &symlink_map, 32); + assert!(resolved.is_some()); + assert_eq!(resolved.unwrap(), rootfs.join("usr/lib64")); + } + + #[test] + fn resolve_symlink_chain() { + // a -> b, b -> c, rootfs/c exists. + let tmp = tempfile::tempdir().unwrap(); + let rootfs = tmp.path(); + std::fs::write(rootfs.join("c"), b"data").unwrap(); + + let mut symlink_map = HashMap::new(); + symlink_map.insert(PathBuf::from("a"), PathBuf::from("b")); + symlink_map.insert(PathBuf::from("b"), PathBuf::from("c")); + + let resolved = resolve_symlink_in_rootfs(Path::new("a"), rootfs, &symlink_map, 32); + assert_eq!(resolved, Some(rootfs.join("c"))); + } + + #[test] + fn resolve_symlink_max_depth_prevents_infinite_loop() { + // a -> b, b -> a (cycle). + let mut symlink_map = HashMap::new(); + symlink_map.insert(PathBuf::from("a"), PathBuf::from("b")); + symlink_map.insert(PathBuf::from("b"), PathBuf::from("a")); + + let tmp = tempfile::tempdir().unwrap(); + let resolved = resolve_symlink_in_rootfs(Path::new("a"), tmp.path(), &symlink_map, 32); + assert!(resolved.is_none()); + } + + #[test] + fn resolve_symlink_absolute_target() { + // link -> /usr/bin/sh, rootfs/usr/bin/sh exists. + let tmp = tempfile::tempdir().unwrap(); + let rootfs = tmp.path(); + std::fs::create_dir_all(rootfs.join("usr/bin")).unwrap(); + std::fs::write(rootfs.join("usr/bin/sh"), b"elf").unwrap(); + + let mut symlink_map = HashMap::new(); + symlink_map.insert(PathBuf::from("bin/sh"), PathBuf::from("/usr/bin/sh")); + + let resolved = resolve_symlink_in_rootfs(Path::new("bin/sh"), rootfs, &symlink_map, 32); + assert_eq!(resolved, Some(rootfs.join("usr/bin/sh"))); + } + + #[test] + fn resolve_symlink_relative_target() { + // usr/lib64/libfoo.so -> ../lib/libfoo.so, rootfs/usr/lib/libfoo.so exists. + let tmp = tempfile::tempdir().unwrap(); + let rootfs = tmp.path(); + std::fs::create_dir_all(rootfs.join("usr/lib")).unwrap(); + std::fs::write(rootfs.join("usr/lib/libfoo.so"), b"elf").unwrap(); + + let mut symlink_map = HashMap::new(); + symlink_map.insert( + PathBuf::from("usr/lib64/libfoo.so"), + PathBuf::from("../lib/libfoo.so"), + ); + + let resolved = + resolve_symlink_in_rootfs(Path::new("usr/lib64/libfoo.so"), rootfs, &symlink_map, 32); + assert_eq!(resolved, Some(rootfs.join("usr/lib/libfoo.so"))); + } + + #[test] + fn resolve_symlink_ancestor_is_symlink() { + // lib64 -> usr/lib64, resolve "lib64/foo.so" where rootfs/usr/lib64/foo.so exists. + let tmp = tempfile::tempdir().unwrap(); + let rootfs = tmp.path(); + std::fs::create_dir_all(rootfs.join("usr/lib64")).unwrap(); + std::fs::write(rootfs.join("usr/lib64/foo.so"), b"elf").unwrap(); + + let mut symlink_map = HashMap::new(); + symlink_map.insert(PathBuf::from("lib64"), PathBuf::from("usr/lib64")); + + let resolved = + resolve_symlink_in_rootfs(Path::new("lib64/foo.so"), rootfs, &symlink_map, 32); + assert_eq!(resolved, Some(rootfs.join("usr/lib64/foo.so"))); + } + + #[test] + fn resolve_symlink_empty_path_returns_none() { + let tmp = tempfile::tempdir().unwrap(); + let symlink_map = HashMap::new(); + let resolved = resolve_symlink_in_rootfs(Path::new(""), tmp.path(), &symlink_map, 32); + assert!(resolved.is_none()); + } + + #[test] + fn resolve_symlink_not_a_symlink_returns_host_path() { + // Regular file, not in symlink_map — should return host_path directly. + let tmp = tempfile::tempdir().unwrap(); + let rootfs = tmp.path(); + std::fs::write(rootfs.join("hello.txt"), b"hi").unwrap(); + + let symlink_map = HashMap::new(); + let resolved = resolve_symlink_in_rootfs(Path::new("hello.txt"), rootfs, &symlink_map, 32); + assert_eq!(resolved, Some(rootfs.join("hello.txt"))); + } + + #[test] + fn resolve_symlink_nonexistent_returns_none() { + let tmp = tempfile::tempdir().unwrap(); + let symlink_map = HashMap::new(); + let resolved = + resolve_symlink_in_rootfs(Path::new("does/not/exist"), tmp.path(), &symlink_map, 32); + assert!(resolved.is_none()); + } + + // --- lookup_mode --- + + #[test] + fn lookup_mode_prefers_permissions_map() { + let tmp = tempfile::tempdir().unwrap(); + let rootfs = tmp.path(); + std::fs::write(rootfs.join("file.sh"), b"#!/bin/sh").unwrap(); + + let mut permissions = HashMap::new(); + permissions.insert(PathBuf::from("file.sh"), 0o100755u32); + + // The permissions map value (masked) should win over filesystem metadata. + let mode = lookup_mode(Path::new("file.sh"), &rootfs.join("file.sh"), &permissions); + assert_eq!(mode, 0o755); + } + + #[test] + fn lookup_mode_falls_back_to_filesystem() { + let tmp = tempfile::tempdir().unwrap(); + let rootfs = tmp.path(); + std::fs::write(rootfs.join("file.txt"), b"data").unwrap(); + + let permissions = HashMap::new(); // empty + let mode = lookup_mode( + Path::new("file.txt"), + &rootfs.join("file.txt"), + &permissions, + ); + // On Unix the file should have some mode; just check it's non-zero. + assert!(mode > 0); + } + + #[test] + fn lookup_mode_defaults_to_644_when_nothing_available() { + let permissions = HashMap::new(); + let mode = lookup_mode( + Path::new("nonexistent"), + Path::new("/no/such/file"), + &permissions, + ); + assert_eq!(mode, 0o644); + } } diff --git a/litebox_platform_linux_kernel/src/arch/x86/mm/paging.rs b/litebox_platform_linux_kernel/src/arch/x86/mm/paging.rs index 210b51f14..1d7433447 100644 --- a/litebox_platform_linux_kernel/src/arch/x86/mm/paging.rs +++ b/litebox_platform_linux_kernel/src/arch/x86/mm/paging.rs @@ -2,28 +2,28 @@ // Licensed under the MIT license. use litebox::mm::linux::{PageFaultError, PageRange, VmFlags, VmemPageFaultHandler}; -use litebox::platform::{RawConstPointer as _, page_mgmt}; +use litebox::platform::{page_mgmt, RawConstPointer as _}; use x86_64::{ - PhysAddr, VirtAddr, structures::{ idt::PageFaultErrorCode, paging::{ - FrameAllocator, FrameDeallocator, MappedPageTable, Mapper, Page, PageSize, PageTable, - PageTableFlags, PhysFrame, Size4KiB, Translate, mapper::{ FlagUpdateError, MapToError, PageTableFrameMapping, TranslateResult, UnmapError as X64UnmapError, }, + FrameAllocator, FrameDeallocator, MappedPageTable, Mapper, Page, PageSize, PageTable, + PageTableFlags, PhysFrame, Size4KiB, Translate, }, }, + PhysAddr, VirtAddr, }; use crate::{ - UserMutPtr, mm::{ - MemoryProvider, pgtable::{PageTableAllocator, PageTableImpl}, + MemoryProvider, }, + UserMutPtr, }; #[cfg(not(test))] diff --git a/litebox_platform_linux_kernel/src/host/snp/ghcb.rs b/litebox_platform_linux_kernel/src/host/snp/ghcb.rs index 6ac0b7fe3..541301eaa 100644 --- a/litebox_platform_linux_kernel/src/host/snp/ghcb.rs +++ b/litebox_platform_linux_kernel/src/host/snp/ghcb.rs @@ -4,8 +4,8 @@ use litebox::utils::TruncateExt as _; use crate::arch::{ - PhysAddr, VirtAddr, instructions::{rdmsr, vc_vmgexit, wrmsr}, + PhysAddr, VirtAddr, }; // GHCB MSR @@ -63,7 +63,11 @@ fn ghcb_msr_call(request: u64) -> u64 { } fn num_to_char(n: u8) -> u8 { - if n < 10 { n + b'0' } else { n - 10 + b'a' } + if n < 10 { + n + b'0' + } else { + n - 10 + b'a' + } } pub fn num_to_buf(buf: &mut [u8; 40], mut n: u64, base: u64) -> usize { diff --git a/litebox_platform_linux_kernel/src/host/snp/snp_impl.rs b/litebox_platform_linux_kernel/src/host/snp/snp_impl.rs index 19673641a..2ea23e200 100644 --- a/litebox_platform_linux_kernel/src/host/snp/snp_impl.rs +++ b/litebox_platform_linux_kernel/src/host/snp/snp_impl.rs @@ -2,7 +2,7 @@ // Licensed under the MIT license. //! An implementation of [`HostInterface`] for SNP VMM -use ::alloc::boxed::Box; +use alloc::boxed::Box; use core::{ arch::asm, cell::{Cell, OnceCell}, @@ -38,8 +38,8 @@ type ArgsArray = [u64; MAX_ARGS_SIZE]; #[cfg(not(test))] mod alloc { - use crate::HostInterface; use crate::mm::MemoryProvider; + use crate::HostInterface; use litebox::utils::TruncateExt as _; const HEAP_ORDER: usize = super::bindings::SNP_VMPL_ALLOC_MAX_ORDER as usize + 12 + 1; diff --git a/litebox_platform_linux_kernel/src/lib.rs b/litebox_platform_linux_kernel/src/lib.rs index 12f6bc2d1..e54a6f56a 100644 --- a/litebox_platform_linux_kernel/src/lib.rs +++ b/litebox_platform_linux_kernel/src/lib.rs @@ -10,15 +10,15 @@ use core::sync::atomic::AtomicU64; use core::{arch::asm, sync::atomic::AtomicU32}; use litebox::mm::linux::PageRange; -use litebox::platform::RawPointerProvider; use litebox::platform::page_mgmt::FixedAddressBehavior; +use litebox::platform::RawPointerProvider; use litebox::platform::{ DebugLogProvider, IPInterfaceProvider, ImmediatelyWokenUp, PageManagementProvider, Provider, Punchthrough, PunchthroughProvider, PunchthroughToken, RawMutexProvider, TimeProvider, UnblockedOrTimedOut, }; -use litebox_common_linux::PunchthroughSyscall; use litebox_common_linux::errno::Errno; +use litebox_common_linux::PunchthroughSyscall; extern crate alloc; @@ -85,6 +85,12 @@ impl<'a, Host: HostInterface> PunchthroughToken for LinuxPunchthroughToken<'a, H impl Provider for LinuxKernel {} +impl litebox::platform::RawMessageProvider for LinuxKernel {} + +impl litebox::platform::AddressSpaceProvider for LinuxKernel { + type AddressSpaceId = u32; +} + // TODO: implement pointer validation to ensure the pointers are in user space. type UserConstPtr = litebox::platform::common_providers::userspace_pointers::UserConstPtr< litebox::platform::common_providers::userspace_pointers::NoValidation, @@ -424,6 +430,7 @@ impl PageManagementProvider for initial_permissions: litebox::platform::page_mgmt::MemoryRegionPermissions, can_grow_down: bool, populate_pages_immediately: bool, + _noreserve: bool, fixed_address_behavior: FixedAddressBehavior, ) -> Result, litebox::platform::page_mgmt::AllocationError> { let range = PageRange::new(suggested_range.start, suggested_range.end) diff --git a/litebox_platform_linux_kernel/src/mm/tests.rs b/litebox_platform_linux_kernel/src/mm/tests.rs index 3987b530b..740474298 100644 --- a/litebox_platform_linux_kernel/src/mm/tests.rs +++ b/litebox_platform_linux_kernel/src/mm/tests.rs @@ -7,29 +7,28 @@ use alloc::vec; use alloc::vec::Vec; use arrayvec::ArrayVec; use litebox::{ - LiteBox, mm::{ - PageManager, allocator::SafeZoneAllocator, linux::{ - CreatePagesFlags, NonZeroAddress, NonZeroPageSize, PAGE_SIZE, PageFaultError, - PageRange, VmFlags, + CreatePagesFlags, NonZeroAddress, NonZeroPageSize, PageFaultError, PageRange, VmFlags, + PAGE_SIZE, }, + PageManager, }, platform::RawConstPointer, + LiteBox, }; use spin::mutex::SpinMutex; use crate::{ - HostInterface, UserMutPtr, arch::{ + mm::paging::{vmflags_to_pteflags, X64PageTable}, MappedFrame, Page, PageFaultErrorCode, PageTableFlags, PhysAddr, Size4KiB, TranslateResult, VirtAddr, - mm::paging::{X64PageTable, vmflags_to_pteflags}, }, host::mock::{MockHostInterface, MockKernel}, - mm::{MemoryProvider, pgtable::PageTableAllocator}, - mock_log_println, + mm::{pgtable::PageTableAllocator, MemoryProvider}, + mock_log_println, HostInterface, UserMutPtr, }; use super::pgtable::PageTableImpl; @@ -161,14 +160,12 @@ fn test_page_table() { let new_vmflags = VmFlags::empty(); let new_pteflags = vmflags_to_pteflags(new_vmflags) | PageTableFlags::PRESENT; unsafe { - assert!( - pgtable - .mprotect_pages( - PageRange::new(start_addr + 2 * PAGE_SIZE, start_addr + 6 * PAGE_SIZE).unwrap(), - new_vmflags - ) - .is_ok() - ); + assert!(pgtable + .mprotect_pages( + PageRange::new(start_addr + 2 * PAGE_SIZE, start_addr + 6 * PAGE_SIZE).unwrap(), + new_vmflags + ) + .is_ok()); } for page in PageRange::::new(start_addr, start_addr + 2 * PAGE_SIZE).unwrap() { check_flags(&pgtable, page, pteflags); @@ -182,14 +179,12 @@ fn test_page_table() { // remap pages let new_addr: usize = 0x20_1000; unsafe { - assert!( - pgtable - .remap_pages( - PageRange::new(start_addr, start_addr + 2 * PAGE_SIZE).unwrap(), - PageRange::new(new_addr, new_addr + 2 * PAGE_SIZE).unwrap() - ) - .is_ok() - ); + assert!(pgtable + .remap_pages( + PageRange::new(start_addr, start_addr + 2 * PAGE_SIZE).unwrap(), + PageRange::new(new_addr, new_addr + 2 * PAGE_SIZE).unwrap() + ) + .is_ok()); } for page in PageRange::::new(start_addr, start_addr + 2 * PAGE_SIZE).unwrap() { assert!(matches!( @@ -246,15 +241,13 @@ fn test_vmm_page_fault() { )); // Access non-present page w/ mapping - assert!( - unsafe { - vmm.handle_page_fault( - start_addr + 2 * PAGE_SIZE, - PageFaultErrorCode::USER_MODE.bits(), - ) - } - .is_ok() - ); + assert!(unsafe { + vmm.handle_page_fault( + start_addr + 2 * PAGE_SIZE, + PageFaultErrorCode::USER_MODE.bits(), + ) + } + .is_ok()); // insert stack mapping let stack_addr: usize = 0x1000_0000; @@ -272,12 +265,10 @@ fn test_vmm_page_fault() { } // [0x1_0000, 0x1_4000), [0x1000_0000, 0x1000_4000) // Test stack growth - assert!( - unsafe { - vmm.handle_page_fault(stack_addr - PAGE_SIZE, PageFaultErrorCode::USER_MODE.bits()) - } - .is_ok() - ); + assert!(unsafe { + vmm.handle_page_fault(stack_addr - PAGE_SIZE, PageFaultErrorCode::USER_MODE.bits()) + } + .is_ok()); assert_eq!( vmm.mappings() .iter() diff --git a/litebox_platform_linux_userland/src/lib.rs b/litebox_platform_linux_userland/src/lib.rs index c3e60a83a..1e34d53ad 100644 --- a/litebox_platform_linux_userland/src/lib.rs +++ b/litebox_platform_linux_userland/src/lib.rs @@ -445,6 +445,12 @@ impl LinuxUserland { impl litebox::platform::Provider for LinuxUserland {} +impl litebox::platform::RawMessageProvider for LinuxUserland {} + +impl litebox::platform::AddressSpaceProvider for LinuxUserland { + type AddressSpaceId = u32; +} + impl litebox::platform::SignalProvider for LinuxUserland { type Signal = litebox_common_linux::signal::Signal; @@ -537,6 +543,8 @@ core::arch::global_asm!( " .section .tbss .align 8 +saved_r11: + .quad 0 scratch: .quad 0 host_sp: @@ -651,6 +659,10 @@ syscall_callback: // expectations of `interrupt_signal_handler`. mov BYTE PTR gs:in_guest@tpoff, 0 + // Save guest R11 (syscall call-site address from rewriter trampoline) + // before it is clobbered by the fsbase/gsbase save sequence below. + mov gs:saved_r11@tpoff, r11 + // Restore host fs base. rdfsbase r11 mov gs:guest_fsbase@tpoff, r11 @@ -660,6 +672,25 @@ syscall_callback: // Switch to the top of the guest context. mov r11, rsp mov rsp, fs:guest_context_top@tpoff + jmp .Lsyscall_save_regs + + .globl syscall_callback_redzone +syscall_callback_redzone: + // Same as syscall_callback, but the trampoline has already reserved + // 128 bytes below RSP to protect the SysV red zone. + mov BYTE PTR gs:in_guest@tpoff, 0 + mov gs:saved_r11@tpoff, r11 + rdfsbase r11 + mov gs:guest_fsbase@tpoff, r11 + rdgsbase r11 + wrfsbase r11 + + // The trampoline lowered RSP by 128 bytes with LEA, so recover the + // architectural guest stack pointer before saving pt_regs. + lea r11, [rsp + 128] + mov rsp, fs:guest_context_top@tpoff + +.Lsyscall_save_regs: // TODO: save float and vector registers (xsave or fxsave) // Save caller-saved registers @@ -678,7 +709,7 @@ syscall_callback: push r8 // pt_regs->r8 push r9 // pt_regs->r9 push r10 // pt_regs->r10 - push [rsp + 88] // pt_regs->r11 = rflags + push QWORD PTR gs:saved_r11@tpoff // pt_regs->r11 (syscall call-site from rewriter) push rbx // pt_regs->bx push rbp // pt_regs->bp push r12 // pt_regs->r12 @@ -1703,6 +1734,7 @@ impl litebox::platform::PageManagementProvider for Li initial_permissions: MemoryRegionPermissions, can_grow_down: bool, populate_pages_immediately: bool, + noreserve: bool, fixed_address_behavior: FixedAddressBehavior, ) -> Result, litebox::platform::page_mgmt::AllocationError> { let flags = MapFlags::MAP_PRIVATE @@ -1721,6 +1753,11 @@ impl litebox::platform::PageManagementProvider for Li MapFlags::MAP_POPULATE } else { MapFlags::empty() + } + | if noreserve { + MapFlags::MAP_NORESERVE + } else { + MapFlags::empty() }; let r = unsafe { syscalls::syscall6( @@ -1967,6 +2004,8 @@ impl litebox::platform::StdioProvider for LinuxUserland { unsafe extern "C" { // Defined in asm blocks above fn syscall_callback() -> isize; + #[cfg(target_arch = "x86_64")] + fn syscall_callback_redzone() -> isize; fn exception_callback(); fn interrupt_callback(); fn switch_to_guest_start(); @@ -2047,7 +2086,14 @@ impl ThreadContext<'_> { impl litebox::platform::SystemInfoProvider for LinuxUserland { fn get_syscall_entry_point(&self) -> usize { - syscall_callback as *const () as usize + #[cfg(target_arch = "x86_64")] + { + syscall_callback_redzone as *const () as usize + } + #[cfg(target_arch = "x86")] + { + syscall_callback as *const () as usize + } } fn get_vdso_address(&self) -> Option { @@ -2714,7 +2760,12 @@ unsafe fn interrupt_signal_handler( // FUTURE: handle trampoline code, too. This is somewhat less important // because it's probably fine for the shim to observe a guest context that // is inside the trampoline. - if ip == syscall_callback as *const () as usize { + #[cfg(target_arch = "x86")] + let is_at_syscall_callback = ip == syscall_callback as *const () as usize; + #[cfg(target_arch = "x86_64")] + let is_at_syscall_callback = ip == syscall_callback_redzone as *const () as usize + || ip == syscall_callback as *const () as usize; + if is_at_syscall_callback { // No need to clear `in_guest` or set interrupt; the syscall handler will // clear `in_guest` and call into the shim. return; diff --git a/litebox_platform_lvbs/src/lib.rs b/litebox_platform_lvbs/src/lib.rs index 2487086d7..d636aa3ca 100644 --- a/litebox_platform_lvbs/src/lib.rs +++ b/litebox_platform_lvbs/src/lib.rs @@ -1228,6 +1228,7 @@ impl PageManagementProvider for initial_permissions: litebox::platform::page_mgmt::MemoryRegionPermissions, can_grow_down: bool, populate_pages_immediately: bool, + _noreserve: bool, fixed_address_behavior: FixedAddressBehavior, ) -> Result, litebox::platform::page_mgmt::AllocationError> { let range = PageRange::new(suggested_range.start, suggested_range.end) @@ -1351,6 +1352,14 @@ impl litebox::platform::SystemInfoProvider for LinuxKernel< } } +impl litebox::platform::RawMessageProvider for LinuxKernel {} + +impl litebox::platform::AddressSpaceProvider for LinuxKernel { + // All methods default to `Err(NotSupported)` — real implementation comes + // when LVBS multi-process (separate page tables) is added. + type AddressSpaceId = u32; +} + #[cfg(feature = "optee_syscall")] /// Checks whether the given physical addresses are contiguous with respect to ALIGN. fn is_contiguous(addrs: &[PhysPageAddr]) -> bool { diff --git a/litebox_platform_windows_userland/src/lib.rs b/litebox_platform_windows_userland/src/lib.rs index 9d827e057..59a9bf6ca 100644 --- a/litebox_platform_windows_userland/src/lib.rs +++ b/litebox_platform_windows_userland/src/lib.rs @@ -331,6 +331,12 @@ impl WindowsUserland { } } +impl litebox::platform::RawMessageProvider for WindowsUserland {} + +impl litebox::platform::AddressSpaceProvider for WindowsUserland { + type AddressSpaceId = u32; +} + impl litebox::platform::Provider for WindowsUserland {} impl litebox::platform::SignalProvider for WindowsUserland { @@ -549,19 +555,32 @@ unsafe extern "C-unwind" fn run_thread_arch(thread_ctx: &mut ThreadContext, tls_ jmp .Ldone // This entry point is called from the guest when it issues a syscall - // instruction. + // instruction. The rewriter trampoline has already: + // 1. Reserved 128 bytes below RSP to protect the SysV red zone + // 2. Loaded the call-site restart address into R11 (for SA_RESTART) + // 3. Loaded the return address into RCX // - // At entry, the register context is the guest context with the - // return address in rcx. r11 is an available scratch register (it would - // contain rflags if the syscall instruction had actually been issued). - .globl syscall_callback -syscall_callback: + // All other registers hold guest state. + .globl syscall_callback_redzone +syscall_callback_redzone: + // Save guest R11 (restart address from rewriter trampoline) into + // TEB.ArbitraryUserPointer (gs:[0x28]) before the TLS index lookup + // clobbers R11. This slot is per-thread and the window is very + // narrow: only ~20 instructions of inline asm with no API calls, + // no Rust code, and no DLL activity, so the ntdll loader (which + // also uses this slot for debugger communication) cannot interfere. + mov gs:[0x28], r11 // Get the TLS state from the TLS slot and clear the in-guest flag. mov r11d, DWORD PTR [rip + {TLS_INDEX}] mov r11, QWORD PTR gs:[r11 * 8 + TEB_TLS_SLOTS_OFFSET] mov BYTE PTR [r11 + {IS_IN_GUEST}], 0 - // Set rsp to the top of the guest context. + // Recover the architectural guest stack pointer (undo the 128-byte + // red zone reservation) and store it in SCRATCH. LEA is used instead + // of ADD to avoid clobbering RFLAGS before pushfq. + lea rsp, [rsp + 128] mov QWORD PTR [r11 + {SCRATCH}], rsp + +.Lsyscall_callback_common: mov rsp, QWORD PTR [r11 + {GUEST_CONTEXT_TOP}] // TODO: save float and vector registers (xsave or fxsave) @@ -581,7 +600,8 @@ syscall_callback: push r8 // pt_regs->r8 push r9 // pt_regs->r9 push r10 // pt_regs->r10 - push [rsp + 88] // pt_regs->r11 = rflags + mov r10, gs:[0x28] // recover guest R11 saved at entry + push r10 // pt_regs->r11 = guest R11 (restart addr from rewriter) push rbx // pt_regs->bx push rbp // pt_regs->bp push r12 @@ -1647,6 +1667,7 @@ impl litebox::platform::PageManagementProvider for Wi initial_permissions: MemoryRegionPermissions, can_grow_down: bool, populate_pages_immediately: bool, + _noreserve: bool, fixed_address_behavior: FixedAddressBehavior, ) -> Result, AllocationError> { debug_assert!(ALIGN.is_multiple_of(self.sys_info.read().unwrap().dwPageSize as usize)); @@ -1947,7 +1968,7 @@ impl litebox::mm::allocator::MemoryProvider for WindowsUserland { unsafe extern "C" { // Defined in asm blocks above - fn syscall_callback() -> isize; + fn syscall_callback_redzone() -> isize; fn exception_callback() -> isize; fn interrupt_callback(); fn switch_to_guest_start(); @@ -2037,7 +2058,7 @@ impl ThreadContext<'_> { impl litebox::platform::SystemInfoProvider for WindowsUserland { fn get_syscall_entry_point(&self) -> usize { - syscall_callback as *const () as usize + syscall_callback_redzone as *const () as usize } fn get_vdso_address(&self) -> Option { @@ -2166,6 +2187,7 @@ mod tests { MemoryRegionPermissions::WRITE, false, true, + false, FixedAddressBehavior::Hint, ) .unwrap() @@ -2192,6 +2214,7 @@ mod tests { MemoryRegionPermissions::WRITE, false, true, + false, FixedAddressBehavior::Hint, ) .unwrap() @@ -2224,6 +2247,7 @@ mod tests { MemoryRegionPermissions::WRITE, false, true, + false, FixedAddressBehavior::Hint, ) .unwrap() diff --git a/litebox_rtld_audit/.gitignore b/litebox_rtld_audit/.gitignore deleted file mode 100644 index 140f8cf80..000000000 --- a/litebox_rtld_audit/.gitignore +++ /dev/null @@ -1 +0,0 @@ -*.so diff --git a/litebox_rtld_audit/Makefile b/litebox_rtld_audit/Makefile deleted file mode 100644 index b3a3ad3a3..000000000 --- a/litebox_rtld_audit/Makefile +++ /dev/null @@ -1,26 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT license. - -SRC = rtld_audit.c -OUT_DIR ?= . -OUTPUT = $(OUT_DIR)/litebox_rtld_audit.so -CC ?= cc -CFLAGS ?= -Wall -Werror -fPIC -shared -nostdlib -ARCH ?= $(shell uname -m) -ifeq ($(ARCH),x86_64) - CFLAGS += -m64 -else - $(error Unsupported target architecture: $(ARCH)) -endif -ifdef DEBUG - CFLAGS += -DDEBUG -endif -all: $(OUTPUT) - -$(OUTPUT): $(SRC) - $(CC) $(CFLAGS) -o $@ $< - -clean: - rm -f $(OUTPUT) - -.PHONY: all clean diff --git a/litebox_rtld_audit/rtld_audit.c b/litebox_rtld_audit/rtld_audit.c deleted file mode 100644 index 51713f941..000000000 --- a/litebox_rtld_audit/rtld_audit.c +++ /dev/null @@ -1,384 +0,0 @@ -// Copyright (c) Microsoft Corporation. -// Licensed under the MIT license. - -#define _GNU_SOURCE -#include -#include -#include - -// The magic number used to identify the LiteBox trampoline. -// This must match `TRAMPOLINE_MAGIC` in `litebox_syscall_rewriter` and `litebox_common_linux`. -// Value 0x30584f424554494c is "LITEBOX0" in little-endian (bytes: 'L','I','T','E','B','O','X','0') -#define TRAMPOLINE_MAGIC ((uint64_t)0x30584f424554494c) - -#if !defined(__x86_64__) -# error "rtld_audit.c: build target must be x86_64" -#endif - -// Linux syscall numbers (x86_64) -#define SYS_openat 257 -#define SYS_read 0 -#define SYS_write 1 -#define SYS_close 3 -#define SYS_fstat 5 -#define SYS_mmap 9 -#define SYS_mprotect 10 -#define SYS_munmap 11 -#define SYS_exit_group 231 -#define AT_FDCWD -100 - -// Maximum valid userspace address (48-bit address space) -#define MAX_USERSPACE_ADDR 0x7FFFFFFFFFFFUL - -// Trampoline header layout for x86_64: magic(8) + file_offset(8) + vaddr(8) + size(8) = 32 bytes -struct __attribute__((packed)) TrampolineHeader { - uint64_t magic; - uint64_t file_offset; - uint64_t vaddr; - uint64_t trampoline_size; -}; - -// Linux flags -#define MAP_PRIVATE 0x02 -#define MAP_FIXED 0x10 -#define PROT_READ 0x1 -#define PROT_WRITE 0x2 -#define PROT_EXEC 0x4 - -typedef long (*syscall_stub_t)(void); -static syscall_stub_t syscall_entry = 0; -static char interp[256] = {0}; // Buffer for interpreter path - -#ifdef DEBUG -#define syscall_print(str, len) \ - do_syscall(SYS_write, 1, (long)(str), len, 0, 0, 0) -#else -#define syscall_print(str, len) -#endif - -static long do_syscall(long num, long a1, long a2, long a3, long a4, long a5, - long a6) { - if (!syscall_entry) - return -1; - - register long rax __asm__("rax") = num; - register long rdi __asm__("rdi") = a1; - register long rsi __asm__("rsi") = a2; - register long rdx __asm__("rdx") = a3; - register long r10 __asm__("r10") = a4; - register long r8 __asm__("r8") = a5; - register long r9 __asm__("r9") = a6; - - __asm__ volatile("leaq 1f(%%rip), %%rcx\n" - "jmp *%[entry]\n" - "1:\n" - : "+r"(rax) - : [entry] "r"(syscall_entry), "r"(rdi), "r"(rsi), "r"(rdx), - "r"(r10), "r"(r8), "r"(r9) - : "rcx", "r11", "memory"); - return rax; -} - -/* Re-implement some utility functions and re-define the structures to avoid - * dependency on libc. */ - -// Define the FileStat structure -struct FileStat { - unsigned long st_dev; - unsigned long st_ino; - unsigned long st_nlink; - - unsigned int st_mode; - unsigned int st_uid; - unsigned int st_gid; - unsigned int __pad0; - unsigned long st_rdev; - long st_size; - long st_blksize; - long st_blocks; /* Number 512-byte blocks allocated. */ - - unsigned long st_atime; - unsigned long st_atime_nsec; - unsigned long st_mtime; - unsigned long st_mtime_nsec; - unsigned long st_ctime; - unsigned long st_ctime_nsec; - long __unused[3]; -}; - -int memcmp(const void *s1, const void *s2, size_t n) { - const unsigned char *p1 = s1; - const unsigned char *p2 = s2; - while (n--) { - if (*p1 != *p2) { - return *p1 - *p2; - } - p1++; - p2++; - } - return 0; -} - -int strcmp(const char *s1, const char *s2) { - while (*s1 && (*s1 == *s2)) { - s1++; - s2++; - } - return *(unsigned char *)s1 - *(unsigned char *)s2; -} - -char *strncpy(char *dest, const char *src, size_t n) { - char *d = dest; - const char *s = src; - while (n-- && *s) { - *d++ = *s++; - } - while (n--) { - *d++ = '\0'; - } - return dest; -} - -static uint64_t read_u64(const void *p) { - uint64_t v; - __builtin_memcpy(&v, p, 8); - return v; -} - -static size_t align_up(size_t val, size_t align) { - size_t result = (val + align - 1) & ~(align - 1); - // Check for overflow (result < val means we wrapped) - if (result < val) return (size_t)-1; - return result; -} - -unsigned int la_version(unsigned int version __attribute__((unused))) { - return LAV_CURRENT; -} - -/// print value in hex -void print_hex(uint64_t data) { -#ifdef DEBUG - for (int i = 15; i >= 0; i--) { - unsigned char byte = (data >> (i * 4)) & 0xF; - if (byte < 10) { - syscall_print((&"0123456789"[byte]), 1); - } else { - syscall_print((&"abcdef"[byte - 10]), 1); - } - } - syscall_print("\n", 1); -#endif -} - -/// @brief Parse object to find the syscall entry point and the interpreter -/// path. -/// -/// The trampoline is already mapped by the litebox loader at (base + vaddr). -/// The entry point is at offset 0 of the mapped trampoline. The litebox loader -/// already validated the magic when parsing the file header. -int parse_object(const struct link_map *map) { - unsigned long max_addr = 0; - Elf64_Ehdr *eh = (Elf64_Ehdr *)map->l_addr; - if (memcmp(eh->e_ident, - "\x7f" - "ELF", - 4) != 0) { - syscall_print("[audit] not an ELF file\n", 24); - return 1; - } - Elf64_Phdr *phdrs = (Elf64_Phdr *)((char *)map->l_addr + eh->e_phoff); - for (int i = 0; i < eh->e_phnum; i++) { - if (phdrs[i].p_type == PT_LOAD) { - unsigned long vaddr_end = (phdrs[i].p_vaddr + phdrs[i].p_memsz); - if (vaddr_end > max_addr) { - max_addr = vaddr_end; - } - } else if (phdrs[i].p_type == PT_INTERP) { - strncpy(interp, (char *)map->l_addr + phdrs[i].p_vaddr, - sizeof(interp) - 1); - interp[sizeof(interp) - 1] = '\0'; // Ensure null termination - } - } - max_addr = align_up(max_addr, 0x1000); - void *trampoline_addr = (void *)map->l_addr + max_addr; - // The trampoline code has the syscall entry point at offset 0. - syscall_entry = (syscall_stub_t)read_u64(trampoline_addr); - if (syscall_entry == 0) { - syscall_print("[audit] syscall entry is null\n", 30); - return 1; - } - print_hex((uint64_t)syscall_entry); - return 0; -} - -unsigned int la_objopen(struct link_map *map, - Lmid_t lmid __attribute__((unused)), - uintptr_t *cookie __attribute__((unused))) { - syscall_print("[audit] la_objopen called\n", 26); - const char *path = map->l_name; - - if (!path || path[0] == '\0') { - // main binary should be called first. - if (map->l_addr != 0) { - // `map->l_addr` is zero for the main binary if it is not position - // independent. - if (parse_object(map) != 0) { - syscall_print("[audit] failed to parse main binary\n", 36); - return 0; - } - syscall_print("[audit] main binary is patched by libOS\n", 40); - syscall_print("[audit] interp=", 15); - syscall_print(interp, sizeof(interp) - 1); - syscall_print("\n", 1); - } - return 0; // main binary is patched by libOS - } - - if (syscall_entry == 0) { - // failed to get the syscall entry point from the main binary - // fall back to get it from ld-*.so, which should be called next. - if (parse_object(map) != 0) { - syscall_print("[audit] failed to parse ld\n", 27); - return 0; - } - syscall_print("[audit] ld is patched by libOS: \n", 33); - syscall_print(path, 32); - syscall_print("\n", 1); - return 0; // ld.so is patched by libOS - } - - if (interp[0] != '\0' && strcmp(path, interp) == 0) { - // successfully get the entry point and interpreter from the main binary - syscall_print("[audit] ld-*.so is patched by libOS\n", 36); - return 0; // ld.so is patched by libOS - } - - // Other shared libraries - syscall_print("[audit] la_objopen: path=", 25); - syscall_print(path, 32); - syscall_print("\n", 1); - - if (!syscall_entry) { - return 0; - } - - int fd = do_syscall(SYS_openat, AT_FDCWD, (long)path, 0, 0, 0, 0); - if (fd < 0) { - syscall_print("[audit] failed to open file\n", 28); - return 0; - } - - struct FileStat st; - if (do_syscall(SYS_fstat, fd, (long)&st, 0, 0, 0, 0) < 0) { - syscall_print("[audit] fstat failed\n", 21); - do_syscall(SYS_close, fd, 0, 0, 0, 0, 0); - return 0; - } - long file_size = st.st_size; - - // File must be large enough to contain at least a trampoline header - if (file_size < (long)sizeof(struct TrampolineHeader)) { - do_syscall(SYS_close, fd, 0, 0, 0, 0, 0); - return 0; - } - - // The trampoline header is at the end of the file (last 32 bytes for x86_64). - // File layout: [ELF][padding][trampoline code][header] - // Read the last page that contains the header. - long header_offset = file_size - sizeof(struct TrampolineHeader); - long header_page_offset = header_offset & ~0xFFFUL; - - // Map the page containing the header - void *header_page = (void *)do_syscall(SYS_mmap, 0, 0x1000, PROT_READ, MAP_PRIVATE, fd, header_page_offset); - if ((uintptr_t)header_page >= (uintptr_t)-4096) { - syscall_print("[audit] mmap header page failed\n", 32); - do_syscall(SYS_close, fd, 0, 0, 0, 0, 0); - return 0; - } - - // Read header from the mapped page - long header_in_page_offset = header_offset - header_page_offset; - const struct TrampolineHeader *header = (const struct TrampolineHeader *)((const char *)header_page + header_in_page_offset); - - // Check magic - if (header->magic != TRAMPOLINE_MAGIC) { - // If the prefix matches but the version differs, fail explicitly. - if (memcmp(header, "LITEBOX", 7) == 0) { - syscall_print("[audit] invalid trampoline version\n", 36); - do_syscall(SYS_munmap, (long)header_page, 0x1000, 0, 0, 0, 0); - do_syscall(SYS_close, fd, 0, 0, 0, 0, 0); - return 0; - } - // No trampoline found - do_syscall(SYS_munmap, (long)header_page, 0x1000, 0, 0, 0, 0); - do_syscall(SYS_close, fd, 0, 0, 0, 0, 0); - return 0; - } - - // Copy fields before unmapping - uint64_t tramp_file_offset = header->file_offset; - uint64_t tramp_vaddr = header->vaddr; - uint64_t tramp_size_raw = header->trampoline_size; - - do_syscall(SYS_munmap, (long)header_page, 0x1000, 0, 0, 0, 0); - syscall_print("[audit] found trampoline header at end of file\n", 47); - - // Validate trampoline size - if (tramp_size_raw == 0) { - syscall_print("[audit] trampoline code size invalid\n", 37); - do_syscall(SYS_close, fd, 0, 0, 0, 0, 0); - return 0; - } - - // Verify file offset is page-aligned - if ((tramp_file_offset & 0xFFF) != 0) { - syscall_print("[audit] trampoline code not page-aligned\n", 41); - do_syscall(SYS_close, fd, 0, 0, 0, 0, 0); - return 0; - } - - // The trampoline code should immediately precede the header. - if (tramp_file_offset + tramp_size_raw != (uint64_t)header_offset) { - syscall_print("[audit] trampoline extends beyond header\n", 41); - do_syscall(SYS_close, fd, 0, 0, 0, 0, 0); - return 0; - } - - // Validate tramp_vaddr is within reasonable userspace bounds and page-aligned - if (tramp_vaddr > MAX_USERSPACE_ADDR || (tramp_vaddr & 0xFFF) != 0) { - syscall_print("[audit] trampoline vaddr out of bounds\n", 39); - do_syscall(SYS_close, fd, 0, 0, 0, 0, 0); - return 0; - } - - uint64_t tramp_addr = map->l_addr + tramp_vaddr; - uint64_t tramp_size = align_up(tramp_size_raw, 0x1000); - - // Check for overflow in align_up or address calculation - if (tramp_size == (size_t)-1 || tramp_addr < map->l_addr) { - syscall_print("[audit] trampoline size/addr overflow\n", 38); - do_syscall(SYS_close, fd, 0, 0, 0, 0, 0); - return 0; - } - - // Use MAP_FIXED to place the trampoline at the exact required address. - // The loader ensures this range is not used by other mappings. - void *mapped = - (void *)do_syscall(SYS_mmap, tramp_addr, tramp_size, - PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_FIXED, fd, tramp_file_offset); - if ((uintptr_t)mapped >= (uintptr_t)-4096) { - syscall_print("[audit] mmap failed for trampoline\n", 35); - do_syscall(SYS_close, fd, 0, 0, 0, 0, 0); - return 0; - } - - // Write the syscall entry point at the start of the trampoline code - __builtin_memcpy((char *)mapped, (const void *)&syscall_entry, 8); - do_syscall(SYS_mprotect, (long)mapped, tramp_size, PROT_READ | PROT_EXEC, 0, - 0, 0); - syscall_print("[audit] trampoline patched and protected\n", 41); - - do_syscall(SYS_close, fd, 0, 0, 0, 0, 0); - return 0; -} diff --git a/litebox_runner_linux_on_windows_userland/src/lib.rs b/litebox_runner_linux_on_windows_userland/src/lib.rs index c5afcbc71..826d42923 100644 --- a/litebox_runner_linux_on_windows_userland/src/lib.rs +++ b/litebox_runner_linux_on_windows_userland/src/lib.rs @@ -14,16 +14,16 @@ use std::path::PathBuf; /// Run Linux programs with LiteBox on unmodified Windows. /// -/// The program binary and all its dependencies (including `litebox_rtld_audit.so`) -/// must be provided inside a tar archive via `--initial-files`. The program path -/// refers to a path inside the tar archive. +/// The program binary and all its dependencies must be provided inside a tar +/// archive via `--initial-files`. The program path refers to a path inside the +/// tar archive. #[derive(Parser, Debug)] pub struct CliArgs { /// The program and arguments passed to it (e.g., `/bin/ls --color`). /// /// The program path refers to a path inside the tar archive provided via /// `--initial-files`. All binaries must be pre-rewritten with the syscall - /// rewriter and the tar must include `litebox_rtld_audit.so`. + /// rewriter. #[arg(required = true, trailing_var_arg = true, value_hint = clap::ValueHint::CommandWithArguments)] pub program_and_arguments: Vec, /// Environment variables passed to the program (`K=V` pairs; can be invoked multiple times) @@ -35,7 +35,7 @@ pub struct CliArgs { /// Allow using unstable options #[arg(short = 'Z', long = "unstable")] pub unstable: bool, - /// Tar archive containing the program, its shared libraries, and litebox_rtld_audit.so. + /// Tar archive containing the program and its shared libraries. /// /// All ELF binaries should be pre-rewritten with the syscall rewriter /// (e.g., via `litebox-packager`). @@ -60,7 +60,7 @@ pub fn run(cli_args: CliArgs) -> Result<()> { let platform = Platform::new(); litebox_platform_multiplex::set_platform(platform); - let mut shim_builder = litebox_shim_linux::LinuxShimBuilder::new(); + let shim_builder = litebox_shim_linux::LinuxShimBuilder::new(); let litebox = shim_builder.litebox(); // The program path is a Unix-style path inside the tar archive. @@ -83,7 +83,6 @@ pub fn run(cli_args: CliArgs) -> Result<()> { }; let initial_file_system = std::sync::Arc::new(initial_file_system); - shim_builder.set_load_filter(fixup_env); let shim = shim_builder.build(); let argv = cli_args .program_and_arguments @@ -128,13 +127,3 @@ pub fn run(cli_args: CliArgs) -> Result<()> { } std::process::exit(program.process.wait()) } - -fn fixup_env(envp: &mut Vec) { - // Always inject LD_AUDIT so the dynamic linker loads the audit library - // that sets up trampolines for rewritten binaries. - let p = c"LD_AUDIT=/lib/litebox_rtld_audit.so"; - let has_ld_audit = envp.iter().any(|var| var.as_c_str() == p); - if !has_ld_audit { - envp.push(p.into()); - } -} diff --git a/litebox_runner_linux_on_windows_userland/tests/loader.rs b/litebox_runner_linux_on_windows_userland/tests/loader.rs index e6f470e34..1a0849aef 100644 --- a/litebox_runner_linux_on_windows_userland/tests/loader.rs +++ b/litebox_runner_linux_on_windows_userland/tests/loader.rs @@ -4,9 +4,8 @@ //! Tests for the Windows userland runner. //! //! **NOTE:** These tests depend on pre-built Linux ELF binaries in `tests/test-bins/`, -//! including `litebox_rtld_audit.so`, shared libraries (`libc.so.6`, `ld-linux-x86-64.so.2`), -//! and test executables. These binaries must be rebuilt on Linux and re-committed whenever -//! the corresponding source code changes (e.g., `litebox_rtld_audit/rtld_audit.c`). +//! including shared libraries (`libc.so.6`, `ld-linux-x86-64.so.2`) +//! and test executables. #![cfg(all(target_os = "windows", target_arch = "x86_64"))] @@ -198,7 +197,6 @@ fn test_static_linked_prog_with_rewriter() { fn run_dynamic_linked_prog_with_rewriter( libs_to_rewrite: &[(&str, &str)], - libs_without_rewrite: &[(&str, &str)], exec_name: &str, cmd_args: &[&str], install_files: fn(std::path::PathBuf), @@ -276,22 +274,6 @@ fn run_dynamic_linked_prog_with_rewriter( ); } - // Copy libraries that are not needed to be rewritten (`litebox_rtld_audit.so`) - // to the tar directory - for (file, prefix) in libs_without_rewrite { - let src = test_dir.join(file); - let dst_dir = tar_src_path.join(prefix.trim_start_matches('/')); - let dst = dst_dir.join(file); - std::fs::create_dir_all(&dst_dir).unwrap(); - let _ = std::fs::remove_file(&dst); - println!( - "Copying {} to {}", - src.to_str().unwrap(), - dst.to_str().unwrap() - ); - std::fs::copy(&src, &dst).unwrap(); - } - // Install the required files (e.g., scripts) to tar directory's /out install_files(tar_src_path.join("out")); @@ -361,14 +343,6 @@ fn test_testcase_dynamic_with_rewriter() { ("libc.so.6", "/lib/x86_64-linux-gnu"), ("ld-linux-x86-64.so.2", "/lib64"), ]; - let libs_without_rewrite = [("litebox_rtld_audit.so", "/lib")]; - // Run - run_dynamic_linked_prog_with_rewriter( - &libs_to_rewrite, - &libs_without_rewrite, - exec_name, - &[], - |_| {}, - ); + run_dynamic_linked_prog_with_rewriter(&libs_to_rewrite, exec_name, &[], |_| {}); } diff --git a/litebox_runner_linux_userland/build.rs b/litebox_runner_linux_userland/build.rs deleted file mode 100644 index 3360e452a..000000000 --- a/litebox_runner_linux_userland/build.rs +++ /dev/null @@ -1,48 +0,0 @@ -// Copyright (c) Microsoft Corporation. -// Licensed under the MIT license. - -use std::path::PathBuf; - -const RTLD_AUDIT_DIR: &str = "../litebox_rtld_audit"; - -fn main() { - let mut make_cmd = std::process::Command::new("make"); - let out_dir = PathBuf::from(std::env::var("OUT_DIR").unwrap()); - let target_arch = std::env::var("CARGO_CFG_TARGET_ARCH").unwrap(); - if target_arch != "x86_64" { - // XXX: Currently 32-bit x86 is unsupported (unimplemented), skip building - return; - } - make_cmd - .current_dir(RTLD_AUDIT_DIR) - .env("OUT_DIR", &out_dir) - .env("ARCH", target_arch); - if std::env::var("PROFILE").unwrap_or_default() == "debug" { - make_cmd.env("DEBUG", "1"); - } else { - // Explicitly remove DEBUG to prevent inheriting it from the - // parent environment, which would cause the C library to be - // built with debug prints enabled. - make_cmd.env_remove("DEBUG"); - } - // Force rebuild in case CFLAGS changed (e.g., debug -> release) but - // the source did not. - let _ = std::fs::remove_file(out_dir.join("litebox_rtld_audit.so")); - let output = make_cmd - .output() - .expect("Failed to execute make for rtld_audit"); - assert!( - output.status.success(), - "failed to build rtld_audit.so via make:\nstdout: {}\nstderr: {}", - String::from_utf8_lossy(&output.stdout), - String::from_utf8_lossy(&output.stderr), - ); - assert!( - out_dir.join("litebox_rtld_audit.so").exists(), - "Build failed to create necessary file" - ); - - println!("cargo:rerun-if-changed={RTLD_AUDIT_DIR}/rtld_audit.c"); - println!("cargo:rerun-if-changed={RTLD_AUDIT_DIR}/Makefile"); - println!("cargo:rerun-if-changed=build.rs"); -} diff --git a/litebox_runner_linux_userland/src/lib.rs b/litebox_runner_linux_userland/src/lib.rs index 28521c370..edc670bae 100644 --- a/litebox_runner_linux_userland/src/lib.rs +++ b/litebox_runner_linux_userland/src/lib.rs @@ -89,9 +89,6 @@ pub enum InterceptionBackend { Rewriter, } -static REQUIRE_RTLD_AUDIT: core::sync::atomic::AtomicBool = - core::sync::atomic::AtomicBool::new(false); - struct MmappedFile { data: &'static [u8], abs_path: PathBuf, @@ -130,14 +127,14 @@ pub fn run(cli_args: CliArgs) -> Result<()> { ) } - // --program-from-tar loads pre-rewritten binaries that depend on litebox_rtld_audit.so, - // which is only injected by the rewriter backend. + // --program-from-tar loads pre-rewritten binaries that require the rewriter + // backend's runtime trampoline setup. if cli_args.program_from_tar && !matches!(cli_args.interception_backend, InterceptionBackend::Rewriter) { anyhow::bail!( "--program-from-tar requires --interception-backend=rewriter \ - (the packaged binary is pre-rewritten and needs the audit library)" + (the packaged binary is pre-rewritten and needs the rewriter runtime)" ); } @@ -228,7 +225,7 @@ pub fn run(cli_args: CliArgs) -> Result<()> { } litebox_platform_multiplex::set_platform(platform); - let mut shim_builder = litebox_shim_linux::LinuxShimBuilder::new(); + let shim_builder = litebox_shim_linux::LinuxShimBuilder::new(); let litebox = shim_builder.litebox(); let initial_file_system = { let mut in_mem = litebox::fs::in_mem::FileSystem::new(litebox); @@ -307,34 +304,10 @@ pub fn run(cli_args: CliArgs) -> Result<()> { } }); - // When using the rewriter backend, automatically include litebox_rtld_audit.so - // in the filesystem so tests and users don't need to include it in tar files + // When using the rewriter backend, the shim's mmap hook handles + // syscall patching at runtime — no audit library needed. match cli_args.interception_backend { - InterceptionBackend::Rewriter => { - #[cfg(not(target_arch = "x86_64"))] - eprintln!("WARN: litebox_rtld_audit not currently supported on non-x86_64 arch"); - #[cfg(target_arch = "x86_64")] - in_mem.with_root_privileges(|fs| { - let rwxr_xr_x = Mode::RWXU | Mode::RGRP | Mode::XGRP | Mode::ROTH | Mode::XOTH; - let _ = fs.mkdir("/lib", rwxr_xr_x); - let fd = fs - .open( - "/lib/litebox_rtld_audit.so", - litebox::fs::OFlags::WRONLY | litebox::fs::OFlags::CREAT, - rwxr_xr_x, - ) - .expect("Failed to create /lib/litebox_rtld_audit.so"); - fs.initialize_primarily_read_heavy_file( - &fd, - include_bytes!(concat!(env!("OUT_DIR"), "/litebox_rtld_audit.so")).into(), - ); - fs.close(&fd) - .expect("Failed to close /lib/litebox_rtld_audit.so"); - }); - } - InterceptionBackend::Seccomp => { - // No need to include rtld_audit.so for seccomp backend - } + InterceptionBackend::Rewriter | InterceptionBackend::Seccomp => {} } let tar_ro = litebox::fs::tar_ro::FileSystem::new(litebox, tar_data.into()); @@ -358,7 +331,6 @@ pub fn run(cli_args: CliArgs) -> Result<()> { let initial_file_system = std::sync::Arc::new(initial_file_system); - shim_builder.set_load_filter(fixup_env); let shim = shim_builder.build(); let shutdown = std::sync::Arc::new(core::sync::atomic::AtomicBool::new(false)); @@ -397,7 +369,7 @@ pub fn run(cli_args: CliArgs) -> Result<()> { match cli_args.interception_backend { InterceptionBackend::Seccomp => platform.enable_seccomp_based_syscall_interception(), InterceptionBackend::Rewriter => { - REQUIRE_RTLD_AUDIT.store(true, core::sync::atomic::Ordering::SeqCst); + // Runtime patching is handled by the shim's mmap hook — nothing to do here. } } @@ -478,14 +450,3 @@ fn pin_thread_to_cpu(cpu: usize) { } } } - -fn fixup_env(envp: &mut Vec) { - // Enable the audit library to load trampoline code for rewritten binaries. - if REQUIRE_RTLD_AUDIT.load(core::sync::atomic::Ordering::SeqCst) { - let p = c"LD_AUDIT=/lib/litebox_rtld_audit.so"; - let has_ld_audit = envp.iter().any(|var| var.as_c_str() == p); - if !has_ld_audit { - envp.push(p.into()); - } - } -} diff --git a/litebox_runner_linux_userland/tests/common/mod.rs b/litebox_runner_linux_userland/tests/common/mod.rs index e9b6a9810..3f761f64a 100644 --- a/litebox_runner_linux_userland/tests/common/mod.rs +++ b/litebox_runner_linux_userland/tests/common/mod.rs @@ -80,7 +80,7 @@ fn find_rewriter_source_files() -> Vec { /// Compile C code into an executable with caching pub fn compile(src_path: &str, unique_name: &str, exec_or_lib: bool, nolibc: bool) -> PathBuf { - let dir_path = std::env::var("OUT_DIR").unwrap(); + let dir_path = env!("CARGO_TARGET_TMPDIR").to_string(); let path = std::path::Path::new(dir_path.as_str()).join(unique_name); let output = path.to_str().unwrap(); diff --git a/litebox_runner_linux_userland/tests/loader.rs b/litebox_runner_linux_userland/tests/loader.rs index 9850ba843..2ff79f97c 100644 --- a/litebox_runner_linux_userland/tests/loader.rs +++ b/litebox_runner_linux_userland/tests/loader.rs @@ -234,7 +234,7 @@ void _start() { #[test] fn test_syscall_rewriter() { - let dir_path = std::env::var("OUT_DIR").unwrap(); + let dir_path = env!("CARGO_TARGET_TMPDIR").to_string(); let src_path = std::path::Path::new(dir_path.as_str()).join("hello_exec_nolibc.c"); std::fs::write(src_path.clone(), HELLO_WORLD_NOLIBC).unwrap(); let path = std::path::Path::new(dir_path.as_str()).join("hello_exec_nolibc"); diff --git a/litebox_runner_linux_userland/tests/run.rs b/litebox_runner_linux_userland/tests/run.rs index 219d27da1..3da964a4f 100644 --- a/litebox_runner_linux_userland/tests/run.rs +++ b/litebox_runner_linux_userland/tests/run.rs @@ -32,7 +32,7 @@ impl Runner { Backend::Rewriter => "rewriter", Backend::Seccomp => "seccomp", }; - let dir_path = PathBuf::from(std::env::var_os("OUT_DIR").unwrap()); + let dir_path = PathBuf::from(env!("CARGO_TARGET_TMPDIR")); let path = match backend { Backend::Seccomp => target.to_path_buf(), Backend::Rewriter => { @@ -206,7 +206,7 @@ fn find_c_test_files(dir: &str) -> Vec { files } -// our rtld_audit does not support x86 yet +// Syscall rewriting does not support x86 yet #[cfg(target_arch = "x86_64")] #[test] fn test_dynamic_lib_with_rewriter() { diff --git a/litebox_shim_linux/Cargo.toml b/litebox_shim_linux/Cargo.toml index 94d889a7f..ff0b4ea4e 100644 --- a/litebox_shim_linux/Cargo.toml +++ b/litebox_shim_linux/Cargo.toml @@ -16,6 +16,7 @@ syscalls = { version = "0.6", default-features = false } seq-macro = "0.3" ringbuf = { version = "0.4.8", default-features = false, features = ["alloc"] } zerocopy = { version = "0.8", default-features = false, features = ["derive"] } +litebox_syscall_rewriter = { version = "0.1.0", path = "../litebox_syscall_rewriter", default-features = false } [features] default = ["platform_linux_userland"] diff --git a/litebox_shim_linux/src/lib.rs b/litebox_shim_linux/src/lib.rs index 2834f7b72..59c10023f 100644 --- a/litebox_shim_linux/src/lib.rs +++ b/litebox_shim_linux/src/lib.rs @@ -200,6 +200,7 @@ impl LinuxShimBuilder { next_thread_id: 2.into(), // start from 2, as 1 is used by the main thread litebox: self.litebox, unix_addr_table: litebox::sync::RwLock::new(syscalls::unix::UnixAddrTable::new()), + elf_patch_cache: litebox::sync::Mutex::new(alloc::collections::BTreeMap::new()), }); LinuxShim(global) } @@ -257,6 +258,7 @@ impl LinuxShim { fs: Arc::new(syscalls::file::FsState::new()).into(), files: files.into(), signals: syscalls::signal::SignalState::new_process(), + suppress_elf_runtime_patch: Cell::new(false), }, }; entrypoints.task.load_program( @@ -343,9 +345,11 @@ fn default_fs( } // Special override so that `GETFL` can return stdio-specific flags +#[derive(Clone)] pub(crate) struct StdioStatusFlags(litebox::fs::OFlags); /// Status flags for pipes +#[derive(Clone)] pub(crate) struct PipeStatusFlags(pub litebox::fs::OFlags); impl syscalls::file::FilesState { @@ -1059,6 +1063,8 @@ struct GlobalState { next_thread_id: core::sync::atomic::AtomicI32, /// UNIX domain socket address table unix_addr_table: litebox::sync::RwLock>, + /// Per-process collection of ELF patching state for runtime syscall rewriting. + elf_patch_cache: litebox::sync::Mutex, } struct Task { @@ -1082,6 +1088,9 @@ struct Task { files: RefCell>>, /// Signal state signals: syscalls::signal::SignalState, + /// Suppresses runtime ELF patching in `do_mmap_file` while the ELF loader + /// is actively loading a binary (prevents double-mapping the trampoline). + suppress_elf_runtime_patch: Cell, } impl Drop for Task { @@ -1121,6 +1130,7 @@ mod test_utils { fs: Arc::new(syscalls::file::FsState::new()).into(), files: files.into(), signals: syscalls::signal::SignalState::new_process(), + suppress_elf_runtime_patch: Cell::new(false), global: self, } } @@ -1145,6 +1155,7 @@ mod test_utils { fs: self.fs.clone(), files: self.files.clone(), signals: self.signals.clone_for_new_task(), + suppress_elf_runtime_patch: Cell::new(false), }; Some(task) } diff --git a/litebox_shim_linux/src/loader/elf.rs b/litebox_shim_linux/src/loader/elf.rs index 0d62030a8..b9a345483 100644 --- a/litebox_shim_linux/src/loader/elf.rs +++ b/litebox_shim_linux/src/loader/elf.rs @@ -7,10 +7,14 @@ use alloc::{ffi::CString, vec::Vec}; use litebox::{ fs::{Mode, OFlags}, mm::linux::{CreatePagesFlags, MappingError, PAGE_SIZE}, - platform::{RawConstPointer as _, SystemInfoProvider as _}, + platform::{RawConstPointer as _, RawMutPointer as _, SystemInfoProvider as _}, utils::{ReinterpretSignedExt, TruncateExt}, }; -use litebox_common_linux::{MapFlags, errno::Errno, loader::ElfParsedFile}; +use litebox_common_linux::{ + MapFlags, + errno::Errno, + loader::{ElfParsedFile, ReadAt as _}, +}; use thiserror::Error; use crate::{ @@ -148,6 +152,79 @@ impl litebox_common_linux::loader::MapMemory for ElfFile<'_, FS> { } } +/// A [`MapMemory`](litebox_common_linux::loader::MapMemory) wrapper that reads +/// file-backed data from an in-memory buffer instead of from a file descriptor. +/// Used when the loader has patched the ELF binary on the fly (e.g. syscall +/// rewriting of the dynamic linker). +/// +/// `reserve`, `map_zero`, and `protect` are delegated to the underlying +/// [`ElfFile`]; `map_file` is replaced by `map_zero` + a memory copy from the +/// patched buffer. +struct PatchedMapper<'a, 'b, FS: ShimFS> { + inner: &'b mut ElfFile<'a, FS>, + data: &'b [u8], +} + +impl litebox_common_linux::loader::MapMemory for PatchedMapper<'_, '_, FS> { + type Error = Errno; + + fn reserve(&mut self, len: usize, align: usize) -> Result { + self.inner.reserve(len, align) + } + + fn map_file( + &mut self, + address: usize, + len: usize, + offset: u64, + prot: &litebox_common_linux::loader::Protection, + ) -> Result<(), Self::Error> { + // Allocate anonymous RW pages, copy from the in-memory buffer, then + // apply the requested protection. + self.inner.map_zero( + address, + len, + &litebox_common_linux::loader::Protection { + read: true, + write: true, + execute: false, + }, + )?; + + let offset: usize = offset.truncate(); + if offset < self.data.len() { + let end = core::cmp::min(offset + len, self.data.len()); + let src = &self.data[offset..end]; + let dest = MutPtr::::from_usize(address); + dest.copy_from_slice(0, src).ok_or(Errno::EFAULT)?; + } + + // Set final permissions if different from the writable mapping above. + if !prot.write || prot.execute { + self.inner.protect(address, len, prot)?; + } + Ok(()) + } + + fn map_zero( + &mut self, + address: usize, + len: usize, + prot: &litebox_common_linux::loader::Protection, + ) -> Result<(), Self::Error> { + self.inner.map_zero(address, len, prot) + } + + fn protect( + &mut self, + address: usize, + len: usize, + prot: &litebox_common_linux::loader::Protection, + ) -> Result<(), Self::Error> { + self.inner.protect(address, len, prot) + } +} + /// Struct to hold the information needed to start the program /// (entry point and user stack top). pub struct ElfLoadInfo { @@ -165,6 +242,9 @@ pub(crate) struct ElfLoader<'a, FS: ShimFS> { struct FileAndParsed<'a, FS: ShimFS> { file: ElfFile<'a, FS>, parsed: ElfParsedFile, + /// When the rewriter backend is active and the binary was not pre-patched, + /// the loader patches it on the fly and loads from this in-memory copy. + patched_data: Option>, } impl<'a, FS: ShimFS> FileAndParsed<'a, FS> { @@ -172,8 +252,125 @@ impl<'a, FS: ShimFS> FileAndParsed<'a, FS> { let file = ElfFile::new(task, path).map_err(ElfLoaderError::OpenError)?; let mut parsed = litebox_common_linux::loader::ElfParsedFile::parse(&mut &file) .map_err(ElfLoaderError::ParseError)?; - parsed.parse_trampoline(&mut &file, task.global.platform.get_syscall_entry_point())?; - Ok(Self { file, parsed }) + + let syscall_entry_point = task.global.platform.get_syscall_entry_point(); + let trampoline_result = parsed.parse_trampoline(&mut &file, syscall_entry_point); + + // If the rewriter backend is active (syscall_entry_point != 0) and the + // binary lacks a trampoline, patch it on the fly so that both the main + // program and the dynamic linker are covered. + // + // Only attempt runtime patching for UnpatchedBinary — other errors + // (BadTrampolineVersion, BadTrampoline, Io) indicate a corrupt or + // incompatible pre-patched binary that should not be re-patched. + let patched_data = if syscall_entry_point != 0 + && matches!( + trampoline_result, + Err(litebox_common_linux::loader::ElfParseError::UnpatchedBinary) + ) { + let size: usize = (&mut &file) + .size() + .map_err(ElfLoaderError::OpenError)? + .truncate(); + let mut buf = alloc::vec![0u8; size]; + (&mut &file) + .read_at(0, &mut buf) + .map_err(ElfLoaderError::OpenError)?; + + let mut skipped_addrs = alloc::vec::Vec::new(); + match litebox_syscall_rewriter::hook_syscalls_in_elf(&buf, None, &mut skipped_addrs) { + Ok(patched) => { + if !skipped_addrs.is_empty() { + litebox::log_println!( + task.global.platform, + "warning: {} unpatchable syscall instruction(s) (addresses: {:?})", + skipped_addrs.len(), + skipped_addrs, + ); + } + // Re-parse the patched binary and extract its trampoline. + parsed = + litebox_common_linux::loader::ElfParsedFile::parse(&mut patched.as_slice()) + .map_err(ElfLoaderError::ParseError)?; + parsed + .parse_trampoline(&mut patched.as_slice(), syscall_entry_point) + .map_err(ElfLoaderError::ParseError)?; + Some(patched) + } + Err( + litebox_syscall_rewriter::Error::UnsupportedBunExecutable + | litebox_syscall_rewriter::Error::UnsupportedObjectFile + | litebox_syscall_rewriter::Error::NoTextSectionFound + | litebox_syscall_rewriter::Error::NoSyscallInstructionsFound + | litebox_syscall_rewriter::Error::AlreadyHooked, + ) => { + // These are expected non-fatal cases: + // - BUN: can't be statically patched but the runtime mmap + // hook will patch code segments as they are mapped. + // - Object files / no .text / no syscalls / already hooked: + // nothing to patch. + None + } + Err(e) => { + // Unexpected rewriter failure (parse error, disassembly + // failure, etc.). Proceed without a trampoline — the + // runtime mmap hook may still patch individual segments. + litebox::log_println!( + task.global.platform, + "warning: syscall rewriter failed: {}; \ + falling back to runtime patching", + e + ); + None + } + } + } else if syscall_entry_point != 0 { + // Rewriter is active but trampoline_result is an error other than + // UnpatchedBinary (e.g. BadTrampolineVersion, BadTrampoline, Io). + // Propagate the error rather than silently proceeding. + trampoline_result.map_err(ElfLoaderError::ParseError)?; + None + } else { + None + }; + + Ok(Self { + file, + parsed, + patched_data, + }) + } + + /// Load the ELF into guest memory, choosing the right mapper depending on + /// whether the binary was patched in memory. + fn load_mapped( + &mut self, + platform: &(impl litebox::platform::RawPointerProvider + litebox::platform::SystemInfoProvider), + ) -> Result { + // Suppress runtime ELF patching (maybe_patch_exec_segment) when the + // loader will map the trampoline itself via load_trampoline(). Without + // this, both paths would map the same region — the second MAP_FIXED + // destroys the first mapping. + // + // When patched_data is Some the PatchedMapper path doesn't go through + // do_mmap_file so the flag is a no-op, but setting it is harmless and + // keeps the logic simple. + self.file + .task + .suppress_elf_runtime_patch + .set(self.patched_data.is_some() || self.parsed.has_trampoline()); + let result = if let Some(ref data) = self.patched_data { + let mut mapper = PatchedMapper { + inner: &mut self.file, + data, + }; + self.parsed.load(&mut mapper, &mut &*platform) + } else { + self.parsed.load(&mut self.file, &mut &*platform) + }; + self.file.task.suppress_elf_runtime_patch.set(false); + + Ok(result?) } } @@ -204,18 +401,11 @@ impl<'a, FS: ShimFS> ElfLoader<'a, FS> { let global = &self.main.file.task.global; // Load the main ELF file first so that it gets privileged addresses. - let info = self - .main - .parsed - .load(&mut self.main.file, &mut &*global.platform)?; + let info = self.main.load_mapped(global.platform)?; // Load the interpreter ELF file, if any. let interp = if let Some(interp) = &mut self.interp { - Some( - interp - .parsed - .load(&mut interp.file, &mut &*global.platform)?, - ) + Some(interp.load_mapped(global.platform)?) } else { None }; diff --git a/litebox_shim_linux/src/syscalls/file.rs b/litebox_shim_linux/src/syscalls/file.rs index 03bf151ad..d1f219579 100644 --- a/litebox_shim_linux/src/syscalls/file.rs +++ b/litebox_shim_linux/src/syscalls/file.rs @@ -536,6 +536,10 @@ impl Task { /// Handle syscall `close` pub(crate) fn sys_close(&self, fd: i32) -> Result<(), Errno> { + // Finalize any in-progress ELF patching for this fd (mprotect + // trampoline RW→RX) before closing the descriptor. + self.finalize_elf_patch(fd); + let Ok(raw_fd) = u32::try_from(fd).and_then(usize::try_from) else { return Err(Errno::EBADF); }; diff --git a/litebox_shim_linux/src/syscalls/mm.rs b/litebox_shim_linux/src/syscalls/mm.rs index ce6c3513c..ab6069b24 100644 --- a/litebox_shim_linux/src/syscalls/mm.rs +++ b/litebox_shim_linux/src/syscalls/mm.rs @@ -4,10 +4,11 @@ //! Implementation of memory management related syscalls, eg., `mmap`, `munmap`, etc. //! Most of these syscalls which are not backed by files are implemented in [`litebox_common_linux::mm`]. +use alloc::collections::BTreeMap; use litebox::{ mm::linux::{MappingError, PAGE_SIZE, PageRange}, platform::{ - PageManagementProvider, RawConstPointer, RawMutPointer, + PageManagementProvider, RawConstPointer, RawMutPointer, SystemInfoProvider, page_mgmt::{FixedAddressBehavior, MemoryRegionPermissions}, }, }; @@ -17,6 +18,32 @@ use crate::MutPtr; use crate::ShimFS; use crate::Task; +/// Per-fd state for the shim's runtime ELF syscall rewriter. +/// +/// Tracks base address and trampoline write cursor for each ELF file that +/// has executable segments mapped via `do_mmap_file()`. +pub(crate) struct ElfPatchState { + /// Whether this file is already pre-patched (trampoline magic found at file tail). + pub pre_patched: bool, + /// For pre-patched binaries: file offset and size of the trampoline data. + pub trampoline_file_offset: u64, + pub trampoline_file_size: usize, + /// Start address of the trampoline region (runtime). + pub trampoline_addr: usize, + /// Current write position within the trampoline (byte offset from `trampoline_addr`). + pub trampoline_cursor: usize, + /// Whether the trampoline region has been allocated. + pub trampoline_mapped: bool, + /// Total number of trampoline bytes currently mapped. + pub trampoline_mapped_len: usize, + /// Whether any runtime-generated stubs were successfully linked from code + /// in this fd to the trampoline. + pub runtime_patches_committed: bool, +} + +/// Per-process collection of ELF patching state, keyed by fd number. +pub(crate) type ElfPatchCache = BTreeMap; + #[inline] fn align_up(addr: usize, align: usize) -> usize { debug_assert!(align.is_power_of_two()); @@ -76,12 +103,42 @@ impl Task { fd: i32, offset: usize, ) -> Result, MappingError> { - if let Some(cow_result) = + let is_exec = prot.contains(ProtFlags::PROT_EXEC); + + // Perform the normal mmap first (CoW or memcpy fallback). + let result = if let Some(cow_result) = self.try_cow_mmap_file(suggested_addr, len, &prot, &flags, fd, offset) { - return cow_result; + cow_result? + } else { + self.do_mmap_file_memcpy(suggested_addr, len, prot, flags, fd, offset)? + }; + + // Runtime syscall rewriting: patch PROT_EXEC segments in-place. + // Suppressed during ELF loader's load() sequence because the loader + // maps the trampoline itself via load_trampoline(). Running both + // paths would double-map the trampoline, with the second MAP_FIXED + // destroying the first mapping. + if !self.suppress_elf_runtime_patch.get() { + if is_exec { + let syscall_entry = self.global.platform.get_syscall_entry_point(); + if syscall_entry != 0 + && !self.maybe_patch_exec_segment(result, len, fd, offset, syscall_entry) + { + // Trampoline setup failed for a pre-patched binary whose + // .text already contains JMPs to the trampoline address. + // Continuing would guarantee a SIGSEGV on the first + // rewritten syscall, so fail the mmap instead. + let _ = self.sys_munmap(result, len); + return Err(MappingError::OutOfMemory); + } + } else if offset == 0 { + // First mmap at offset 0: record the base address for later patching. + self.init_elf_patch_state(fd, result.as_usize()); + } } - self.do_mmap_file_memcpy(suggested_addr, len, prot, flags, fd, offset) + + Ok(result) } /// Attempt to create a CoW mapping for a file with static backing data. @@ -352,6 +409,474 @@ impl Task { ) -> Result<(), Errno> { litebox_common_linux::mm::sys_madvise(&self.global.pm, addr, len, advice) } + + // ── Runtime ELF syscall patching ───────────────────────────────────── + + /// Initialize ELF patch state for an fd on its first mmap at offset 0. + /// + /// Reads the ELF header to determine the trampoline address (page-aligned + /// end of the highest PT_LOAD segment) and checks the file tail for the + /// trampoline magic to determine if it's pre-patched. + /// + /// x86_64 only: assumes 64-bit ELF layout and program header offsets. + #[allow(clippy::cast_possible_truncation)] + fn init_elf_patch_state(&self, fd: i32, base_addr: usize) { + // Quick check: skip if already initialized. + if self.global.elf_patch_cache.lock().contains_key(&fd) { + return; + } + + // Read the ELF header (first 64 bytes covers both 32-bit and 64-bit). + let mut ehdr_buf = [0u8; 64]; + match self.sys_read(fd, &mut ehdr_buf, Some(0)) { + Ok(n) if n == ehdr_buf.len() => {} + _ => return, // Not readable or short read, skip + } + + // Verify ELF magic + if &ehdr_buf[0..4] != b"\x7fELF" { + return; // Not an ELF file + } + + // Parse as 64-bit ELF (runtime patching is x86-64 only). + let e_phoff = u64::from_le_bytes(ehdr_buf[32..40].try_into().unwrap()) as usize; + let e_phentsize = u16::from_le_bytes(ehdr_buf[54..56].try_into().unwrap()) as usize; + let e_phnum = u16::from_le_bytes(ehdr_buf[56..58].try_into().unwrap()) as usize; + let e_type = u16::from_le_bytes(ehdr_buf[16..18].try_into().unwrap()); + + // Read program headers to find max PT_LOAD end + let phdrs_size = e_phentsize * e_phnum; + if phdrs_size == 0 || phdrs_size > 0x10000 { + return; // Sanity check + } + let mut phdrs_buf = alloc::vec![0u8; phdrs_size]; + match self.sys_read(fd, &mut phdrs_buf, Some(e_phoff)) { + Ok(n) if n == phdrs_buf.len() => {} + _ => return, + } + + // Find highest PT_LOAD end (p_vaddr + p_memsz) + let mut max_load_end: u64 = 0; + for i in 0..e_phnum { + let ph = &phdrs_buf[i * e_phentsize..][..e_phentsize]; + let p_type = u32::from_le_bytes(ph[0..4].try_into().unwrap()); + if p_type != 1 { + // PT_LOAD = 1 + continue; + } + let p_vaddr = u64::from_le_bytes(ph[16..24].try_into().unwrap()); + let p_memsz = u64::from_le_bytes(ph[40..48].try_into().unwrap()); + let end = p_vaddr + p_memsz; + if end > max_load_end { + max_load_end = end; + } + } + + if max_load_end == 0 { + return; // No PT_LOAD segments + } + + // For ET_DYN (PIE/shared libs), p_vaddr is relative to base_addr. + // For ET_EXEC, p_vaddr is absolute and base_addr is 0. + let trampoline_vaddr = if e_type == 3 { + // ET_DYN + base_addr + (max_load_end as usize).next_multiple_of(PAGE_SIZE) + } else { + // ET_EXEC + (max_load_end as usize).next_multiple_of(PAGE_SIZE) + }; + + // Check if file is pre-patched by reading the last 32 bytes for magic + let (pre_patched, tramp_file_offset, tramp_vaddr, tramp_file_size) = + self.check_trampoline_magic(fd); + + // For pre-patched binaries, use the vaddr from the header instead. + let trampoline_vaddr = if pre_patched { + if e_type == 3 { + base_addr + tramp_vaddr as usize + } else { + tramp_vaddr as usize + } + } else { + trampoline_vaddr + }; + + // Insert under lock (re-check for races). + let mut cache = self.global.elf_patch_cache.lock(); + cache.entry(fd).or_insert(ElfPatchState { + pre_patched, + trampoline_file_offset: tramp_file_offset, + trampoline_file_size: tramp_file_size as usize, + trampoline_addr: trampoline_vaddr, + trampoline_cursor: 0, + trampoline_mapped: false, + trampoline_mapped_len: 0, + runtime_patches_committed: false, + }); + } + + /// Check if a file has the LITEBOX trampoline magic at its tail. + /// Returns (is_pre_patched, file_offset, vaddr, trampoline_size). + fn check_trampoline_magic(&self, fd: i32) -> (bool, u64, u64, u64) { + let Ok(stat) = self.sys_fstat(fd) else { + return (false, 0, 0, 0); + }; + let file_size = stat.st_size; + if file_size < 32 { + return (false, 0, 0, 0); + } + let mut tail = [0u8; 32]; + match self.sys_read(fd, &mut tail, Some(file_size - 32)) { + Ok(n) if n == tail.len() => {} + _ => return (false, 0, 0, 0), + } + if &tail[0..8] != litebox_syscall_rewriter::TRAMPOLINE_MAGIC { + return (false, 0, 0, 0); + } + // Parse header: magic(8) | file_offset(8) | vaddr(8) | size(8) + let file_offset = u64::from_le_bytes(tail[8..16].try_into().unwrap()); + let vaddr = u64::from_le_bytes(tail[16..24].try_into().unwrap()); + let trampoline_size = u64::from_le_bytes(tail[24..32].try_into().unwrap()); + (true, file_offset, vaddr, trampoline_size) + } + + /// Patch an executable segment in-place after it has been mapped. + /// + /// For pre-patched binaries: maps the trampoline from the file and writes + /// the syscall entry point. + /// For unpatched binaries: calls `patch_code_segment()` to rewrite syscall + /// instructions and places the generated stubs in the trampoline region. + /// + /// Returns `true` on success or non-fatal skip. Returns `false` when a + /// pre-patched binary's trampoline could not be set up — the caller must + /// fail the mapping because the code already contains JMPs to the + /// trampoline address. + #[allow(clippy::cast_possible_truncation)] + fn maybe_patch_exec_segment( + &self, + mapped_addr: MutPtr, + len: usize, + fd: i32, + offset: usize, + syscall_entry: usize, + ) -> bool { + // Initialize patch state if this is the first mmap for this fd. + if offset == 0 { + self.init_elf_patch_state(fd, mapped_addr.as_usize()); + } + + let mut cache = self.global.elf_patch_cache.lock(); + let Some(state) = cache.get_mut(&fd) else { + return true; // No patch state — not an ELF we're tracking + }; + + if state.pre_patched { + // Pre-patched binary: map the trampoline data from the file. + if !state.trampoline_mapped && state.trampoline_file_size > 0 { + let tramp_addr = state.trampoline_addr; + let tramp_len = align_up(state.trampoline_file_size, PAGE_SIZE); + + // Allocate RW region at the trampoline address. + let alloc_result = self + .do_mmap_anonymous( + Some(tramp_addr), + tramp_len, + ProtFlags::PROT_READ | ProtFlags::PROT_WRITE, + MapFlags::MAP_ANONYMOUS | MapFlags::MAP_PRIVATE | MapFlags::MAP_FIXED, + ) + .or_else(|_| { + self.do_mmap_anonymous( + Some(tramp_addr), + tramp_len, + ProtFlags::PROT_READ | ProtFlags::PROT_WRITE, + MapFlags::MAP_ANONYMOUS | MapFlags::MAP_PRIVATE, + ) + }); + let Ok(alloc_ptr) = alloc_result else { + return false; + }; + let actual_addr = alloc_ptr.as_usize(); + if actual_addr != tramp_addr { + let _ = self.sys_munmap(MutPtr::::from_usize(actual_addr), tramp_len); + return false; + } + + // Read trampoline data from the file. + let mut tramp_data = alloc::vec![0u8; state.trampoline_file_size]; + let file_off = state.trampoline_file_offset as usize; + let tramp_ptr = MutPtr::::from_usize(tramp_addr); + match self.sys_read(fd, &mut tramp_data, Some(file_off)) { + Ok(n) if n == tramp_data.len() => {} + _ => { + let _ = self.sys_munmap(tramp_ptr, tramp_len); + return false; + } + } + + // Write syscall entry point to the first 8 bytes. + if tramp_data.len() >= 8 { + tramp_data[..8].copy_from_slice(&syscall_entry.to_le_bytes()); + } + + // Write to the mapped region. + if tramp_ptr.copy_from_slice(0, &tramp_data).is_none() { + let _ = self.sys_munmap(tramp_ptr, tramp_len); + return false; + } + + // Protect as RX immediately. + if self + .sys_mprotect( + tramp_ptr, + tramp_len, + ProtFlags::PROT_READ | ProtFlags::PROT_EXEC, + ) + .is_err() + { + let _ = self.sys_munmap(tramp_ptr, tramp_len); + return false; + } + + state.trampoline_mapped = true; + state.trampoline_mapped_len = tramp_len; + } + return true; + } + + // ── Runtime patching path (unpatched binaries) ─────────────── + + // Allocate the trampoline region if not yet done. + let addr_usize = mapped_addr.as_usize(); + if !state.trampoline_mapped { + let tramp_addr = state.trampoline_addr; + + // Try MAP_FIXED first — works when ensure_space_after reserved + // PROT_NONE space (shared libraries). Falls back to a hint-based + // allocation for the ElfLoader path where no headroom is reserved. + let actual_addr = self + .do_mmap_anonymous( + Some(tramp_addr), + PAGE_SIZE, + ProtFlags::PROT_READ | ProtFlags::PROT_WRITE, + MapFlags::MAP_ANONYMOUS | MapFlags::MAP_PRIVATE | MapFlags::MAP_FIXED, + ) + .or_else(|_| { + self.do_mmap_anonymous( + Some(tramp_addr), + PAGE_SIZE, + ProtFlags::PROT_READ | ProtFlags::PROT_WRITE, + MapFlags::MAP_ANONYMOUS | MapFlags::MAP_PRIVATE, + ) + }); + let actual_addr = match actual_addr { + Ok(ptr) => ptr.as_usize(), + Err(_) => return true, + }; + + // Verify the trampoline is within JMP rel32 range (+-2GB) of the code. + let distance = actual_addr.abs_diff(addr_usize); + if distance > 0x7FFF_0000 { + let _ = self.sys_munmap(MutPtr::::from_usize(actual_addr), PAGE_SIZE); + return true; + } + + state.trampoline_addr = actual_addr; + + // Write the 8-byte syscall entry point at the start. + let entry_ptr = MutPtr::::from_usize(actual_addr); + if entry_ptr + .copy_from_slice(0, &syscall_entry.to_le_bytes()) + .is_none() + { + let _ = self.sys_munmap(MutPtr::::from_usize(actual_addr), PAGE_SIZE); + return true; + } + state.trampoline_cursor = 8; // stubs start after the 8-byte entry + state.trampoline_mapped = true; + state.trampoline_mapped_len = PAGE_SIZE; + } + + let restore_trampoline_rx = |task: &Self, state: &ElfPatchState| { + if state.trampoline_mapped_len > 0 { + let _ = task.sys_mprotect( + MutPtr::::from_usize(state.trampoline_addr), + state.trampoline_mapped_len, + ProtFlags::PROT_READ | ProtFlags::PROT_EXEC, + ); + } + }; + + // Make the trampoline RW for writing stubs. + if state.trampoline_mapped_len > 0 + && self + .sys_mprotect( + MutPtr::::from_usize(state.trampoline_addr), + state.trampoline_mapped_len, + ProtFlags::PROT_READ | ProtFlags::PROT_WRITE, + ) + .is_err() + { + return true; + } + + // Make the code segment writable for in-place patching. + if self + .sys_mprotect( + mapped_addr, + len, + ProtFlags::PROT_READ | ProtFlags::PROT_WRITE, + ) + .is_err() + { + return true; + } + + // Read the mapped code into a buffer, patch it, write back. + let Some(code_owned) = mapped_addr.to_owned_slice(len) else { + let _ = self.sys_mprotect( + mapped_addr, + len, + ProtFlags::PROT_READ | ProtFlags::PROT_EXEC, + ); + restore_trampoline_rx(self, state); + return true; + }; + let mut code_buf = code_owned.into_vec(); + let original_code = code_buf.clone(); + + let code_vaddr = addr_usize as u64; + let trampoline_write_vaddr = (state.trampoline_addr + state.trampoline_cursor) as u64; + let syscall_entry_addr = state.trampoline_addr as u64; + + let mut skipped_addrs = alloc::vec::Vec::new(); + let patch_result = litebox_syscall_rewriter::patch_code_segment( + &mut code_buf, + code_vaddr, + trampoline_write_vaddr, + syscall_entry_addr, + &mut skipped_addrs, + ); + if !skipped_addrs.is_empty() { + litebox::log_println!( + self.global.platform, + "warning: {} syscall instruction(s) could not be patched (addresses: {:?})", + skipped_addrs.len(), + skipped_addrs, + ); + } + match patch_result { + Ok(stubs) if !stubs.is_empty() => { + let Some(new_cursor) = state.trampoline_cursor.checked_add(stubs.len()) else { + let _ = self.sys_mprotect( + mapped_addr, + len, + ProtFlags::PROT_READ | ProtFlags::PROT_EXEC, + ); + restore_trampoline_rx(self, state); + return true; + }; + let tramp_pages_needed = align_up(new_cursor, PAGE_SIZE); + if tramp_pages_needed > state.trampoline_mapped_len { + let extra_start = state.trampoline_addr + state.trampoline_mapped_len; + let extra_len = tramp_pages_needed - state.trampoline_mapped_len; + if self + .do_mmap_anonymous( + Some(extra_start), + extra_len, + ProtFlags::PROT_READ | ProtFlags::PROT_WRITE, + MapFlags::MAP_ANONYMOUS | MapFlags::MAP_PRIVATE | MapFlags::MAP_FIXED, + ) + .is_err() + { + let _ = self.sys_mprotect( + mapped_addr, + len, + ProtFlags::PROT_READ | ProtFlags::PROT_EXEC, + ); + restore_trampoline_rx(self, state); + return true; + } + state.trampoline_mapped_len = tramp_pages_needed; + } + + // Write stubs before patching the code so rewritten jumps + // never target an uninitialized trampoline. + let tramp_write_ptr = + MutPtr::::from_usize(state.trampoline_addr + state.trampoline_cursor); + if tramp_write_ptr.copy_from_slice(0, &stubs).is_none() { + let _ = self.sys_mprotect( + mapped_addr, + len, + ProtFlags::PROT_READ | ProtFlags::PROT_EXEC, + ); + restore_trampoline_rx(self, state); + return true; + } + + // Write patched code back to the mapped region. + if mapped_addr.copy_from_slice(0, &code_buf).is_none() { + let _ = mapped_addr.copy_from_slice(0, &original_code); + let _ = self.sys_mprotect( + mapped_addr, + len, + ProtFlags::PROT_READ | ProtFlags::PROT_EXEC, + ); + restore_trampoline_rx(self, state); + return true; + } + state.trampoline_cursor = new_cursor; + state.runtime_patches_committed = true; + } + Ok(_) => { + // No syscalls found — no patching needed. + } + Err(_) => { + let _ = self.sys_mprotect( + mapped_addr, + len, + ProtFlags::PROT_READ | ProtFlags::PROT_EXEC, + ); + restore_trampoline_rx(self, state); + return true; + } + } + + // Restore the code segment to RX. + let _ = self.sys_mprotect( + mapped_addr, + len, + ProtFlags::PROT_READ | ProtFlags::PROT_EXEC, + ); + restore_trampoline_rx(self, state); + true + } + + /// Finalize the ELF patching state for `fd`. + /// + /// If the fd has a trampoline region that was allocated (RW), mprotect it + /// to RX so the trampoline stubs become executable and non-writable. + /// The cache entry is removed regardless. + pub(crate) fn finalize_elf_patch(&self, fd: i32) { + let state = self.global.elf_patch_cache.lock().remove(&fd); + if let Some(state) = state + && state.trampoline_mapped + && !state.pre_patched + { + let tramp_len = state.trampoline_mapped_len; + if tramp_len > 0 { + if !state.runtime_patches_committed { + let _ = + self.sys_munmap(MutPtr::::from_usize(state.trampoline_addr), tramp_len); + return; + } + let _ = self.sys_mprotect( + MutPtr::::from_usize(state.trampoline_addr), + tramp_len, + ProtFlags::PROT_READ | ProtFlags::PROT_EXEC, + ); + } + } + } } #[cfg(test)] diff --git a/litebox_shim_linux/src/syscalls/net.rs b/litebox_shim_linux/src/syscalls/net.rs index d1b04e894..9192971c1 100644 --- a/litebox_shim_linux/src/syscalls/net.rs +++ b/litebox_shim_linux/src/syscalls/net.rs @@ -155,7 +155,7 @@ impl SocketAddress { } } -#[derive(Default)] +#[derive(Default, Clone)] pub(super) struct SocketOptions { pub(super) reuse_address: bool, pub(super) keep_alive: bool, @@ -171,7 +171,9 @@ pub(super) struct SocketOptions { pub(super) linger_timeout: Option, } +#[derive(Clone)] pub(crate) struct SocketOFlags(pub OFlags); +#[derive(Clone)] pub(crate) struct SocketProxy(pub Arc>); pub(super) enum SocketOptionValue { diff --git a/litebox_shim_linux/src/syscalls/process.rs b/litebox_shim_linux/src/syscalls/process.rs index 70f878cde..a6605a409 100644 --- a/litebox_shim_linux/src/syscalls/process.rs +++ b/litebox_shim_linux/src/syscalls/process.rs @@ -770,6 +770,7 @@ impl Task { fs: fs.into(), files: self.files.clone(), // TODO: !CLONE_FILES support signals: self.signals.clone_for_new_task(), + suppress_elf_runtime_patch: core::cell::Cell::new(false), }, }), ) @@ -1291,7 +1292,7 @@ impl Task { let Some(count) = core::num::NonZeroU32::new(count) else { return Ok(0); }; - self.global.futex_manager.wake(addr, count, None)? as usize + self.global.futex_manager.wake(addr, count, None, 0)? as usize } FutexArgs::Wait { addr, @@ -1306,6 +1307,7 @@ impl Task { addr, val, None, + 0, )?; 0 } @@ -1333,6 +1335,7 @@ impl Task { addr, val, core::num::NonZeroU32::new(bitmask), + 0, )?; 0 } diff --git a/litebox_syscall_rewriter/src/lib.rs b/litebox_syscall_rewriter/src/lib.rs index f6de6503e..dcf703377 100644 --- a/litebox_syscall_rewriter/src/lib.rs +++ b/litebox_syscall_rewriter/src/lib.rs @@ -201,6 +201,7 @@ pub fn hook_syscalls_in_elf( } // Patch syscalls in-place in buf + let mut syscall_insns_found = false; for s in &text_sections { let section_data = section_slice_mut(buf, s)?; match hook_syscalls_in_section( @@ -214,11 +215,42 @@ pub fn hook_syscalls_in_elf( &mut trampoline_data, skipped_addrs, ) { - Ok(()) | Err(Error::NoSyscallInstructionsFound) => {} + Ok(()) => { + syscall_insns_found = true; + } + Err(Error::NoSyscallInstructionsFound) => {} Err(e) => return Err(e), } } + if !syscall_insns_found { + // No syscall instructions found. Append a header-only marker so the + // loader can distinguish "checked by rewriter, nothing to patch" from + // "never processed." The trampoline_size=0 sentinel tells the loader + // to skip trampoline mapping entirely. + // Use the original input (not `buf`) to avoid emitting the phdr + // alignment fixup that was only needed for the `object` crate parser. + let mut out = input_binary.to_vec(); + if arch == Arch::X86_64 { + let header = TrampolineHeader64 { + magic: *TRAMPOLINE_MAGIC, + file_offset: 0, + vaddr: 0, + trampoline_size: 0, + }; + out.extend_from_slice(header.as_bytes()); + } else { + let header = TrampolineHeader32 { + magic: *TRAMPOLINE_MAGIC, + file_offset: 0, + vaddr: 0, + trampoline_size: 0, + }; + out.extend_from_slice(header.as_bytes()); + } + return Ok(out); + } + // Patch fork → vfork: overwrite the first bytes of __libc_fork with a // JMP to __libc_vfork. This prevents glibc's fork wrapper from running // post-fork handlers that corrupt shared state under vfork semantics. @@ -453,18 +485,90 @@ fn hook_syscalls_in_section( let replace_start = replace_start.unwrap(); let replace_len = usize::try_from(replace_end - replace_start).unwrap(); + let copied_presyscall_insts_have_ip_rel_mem = arch == Arch::X86_64 + && instruction_slice_has_ip_rel_memory_operand( + instructions + .iter() + .take(i) + .skip_while(|prev_inst| prev_inst.ip() < replace_start), + ); + let target_addr = trampoline_base_addr + trampoline_data.len() as u64; - // Copy the original instructions to the trampoline + // Copy the pre-syscall instructions to the trampoline. + // When any instruction has a RIP-relative memory operand, we + // re-encode them so the displacement targets the same absolute + // address from the new trampoline location. if replace_start < inst.ip() { - trampoline_data.extend_from_slice( - §ion_data[usize::try_from(replace_start - section_base_addr).unwrap() - ..usize::try_from(inst.ip() - section_base_addr).unwrap()], - ); + if copied_presyscall_insts_have_ip_rel_mem { + let mut reencoded = Vec::new(); + let mut ok = true; + let mut encoder = iced_x86::Encoder::new(64); + for pre_inst in instructions + .iter() + .take(i) + .skip_while(|p| p.ip() < replace_start) + { + let tramp_ip = target_addr + reencoded.len() as u64; + if encoder.encode(pre_inst, tramp_ip).is_err() { + ok = false; + break; + } + let bytes = encoder.take_buffer(); + if bytes.len() != pre_inst.len() { + ok = false; + break; + } + reencoded.extend_from_slice(&bytes); + } + if !ok { + match hook_syscall_and_after( + arch, + control_transfer_targets, + section_base_addr, + section_data, + trampoline_base_addr, + syscall_entry_addr, + trampoline_data, + &instructions, + i, + ) { + Ok(()) => {} + Err(Error::InsufficientBytesBeforeOrAfter(_)) => { + replace_with_ud2(section_data, section_base_addr, inst); + skipped_addrs.push(inst.ip()); + } + Err(e) => return Err(e), + } + continue; + } + trampoline_data.extend_from_slice(&reencoded); + } else { + trampoline_data.extend_from_slice( + §ion_data[usize::try_from(replace_start - section_base_addr).unwrap() + ..usize::try_from(inst.ip() - section_base_addr).unwrap()], + ); + } } let return_addr = inst.next_ip(); if arch == Arch::X86_64 { + // Reserve the SysV red zone before entering the shim so async + // guest signal delivery / interrupt handling cannot clobber + // stack locals parked below the architectural RSP. + // LEA RSP, [RSP - 0x80] = 48 8D 64 24 80 + trampoline_data.extend_from_slice(&[0x48, 0x8D, 0x64, 0x24, 0x80]); + + // Put the address of the original JMP (call-site) into R11 so + // that SA_RESTART can rewind ctx.rip to re-enter the trampoline. + // The real `syscall` instruction clobbers R11 with RFLAGS, so + // this register is free from the guest's perspective. + // LEA R11, [RIP + disp32] = 4C 8D 1D + let r11_disp = i64::try_from(replace_start).unwrap() + - i64::try_from(trampoline_base_addr + trampoline_data.len() as u64 + 7).unwrap(); + trampoline_data.extend_from_slice(&[0x4C, 0x8D, 0x1D]); // LEA R11, [RIP + disp32] + trampoline_data.extend_from_slice(&(i32::try_from(r11_disp).unwrap().to_le_bytes())); + // Put jump back location into rcx. let jmp_back_offset = i64::try_from(return_addr).unwrap() - i64::try_from(trampoline_base_addr + trampoline_data.len() as u64 + 7).unwrap(); @@ -569,8 +673,8 @@ fn fixup_phdr_alignment(buf: &mut [u8]) { let new_start = old_start + padding; let new_end = new_start + usize::try_from(phdr_size).expect("phdr_size must fit in usize"); - if old_end > buf.len() || new_end > buf.len() { - return; // corrupt phdr table or not enough room + if new_end > buf.len() { + return; // not enough room } // Move the phdr table forward (use copy_within since src and dst overlap). @@ -657,7 +761,7 @@ fn find_fork_vfork_patch( Some((fork_file_offset, rel32)) } -/// Check if the input binary has the Bun footer marker at the end. +/// Check if the input binary has the Bun footer marker near the end. fn has_bun_footer_marker(input_binary: &[u8]) -> bool { input_binary.len() >= BUN_FOOTER_MARKER.len() && input_binary[input_binary.len() - BUN_FOOTER_MARKER.len()..] == *BUN_FOOTER_MARKER @@ -946,10 +1050,50 @@ fn hook_syscall_and_after( } let replace_end = replace_end.unwrap(); + // This function copies post-syscall instructions to the trampoline as raw + // bytes (no re-encoding). That only works for position-independent + // instructions. If any post-syscall instruction has a RIP-relative memory + // operand, the raw bytes would reference the wrong address from the + // trampoline's location, so fall back to hook_syscall_before_and_after + // which re-encodes both sides with corrected displacements. + let copied_postsyscall_insts_have_ip_rel_mem = arch == Arch::X86_64 + && instruction_slice_has_ip_rel_memory_operand( + instructions + .iter() + .skip(inst_index + 1) + .take_while(|next_inst| next_inst.ip() < replace_end), + ); + if copied_postsyscall_insts_have_ip_rel_mem { + return hook_syscall_before_and_after( + arch, + control_transfer_targets, + section_base_addr, + section_data, + trampoline_base_addr, + syscall_entry_addr, + trampoline_data, + instructions, + inst_index, + ); + } let target_addr = trampoline_base_addr + trampoline_data.len() as u64; if arch == Arch::X86_64 { + // Reserve the SysV red zone before entering the shim so async guest + // signal delivery / interrupt handling cannot clobber stack locals + // parked below the architectural RSP. + // LEA RSP, [RSP - 0x80] = 48 8D 64 24 80 + trampoline_data.extend_from_slice(&[0x48, 0x8D, 0x64, 0x24, 0x80]); + + // Put the address of the original JMP (call-site) into R11 so + // that SA_RESTART can rewind ctx.rip to re-enter the trampoline. + // LEA R11, [RIP + disp32] = 4C 8D 1D + let r11_disp = i64::try_from(replace_start).unwrap() + - i64::try_from(trampoline_base_addr + trampoline_data.len() as u64 + 7).unwrap(); + trampoline_data.extend_from_slice(&[0x4C, 0x8D, 0x1D]); // LEA R11, [RIP + disp32] + trampoline_data.extend_from_slice(&(i32::try_from(r11_disp).unwrap().to_le_bytes())); + // Put jump back location into rcx, via lea rcx, [next instruction] trampoline_data.extend_from_slice(&[0x48, 0x8D, 0x0D]); // LEA RCX, [RIP + disp32] trampoline_data.extend_from_slice(&6u32.to_le_bytes()); @@ -1013,6 +1157,14 @@ fn hook_syscall_and_after( Ok(()) } +fn instruction_slice_has_ip_rel_memory_operand<'a>( + instructions: impl IntoIterator, +) -> bool { + instructions + .into_iter() + .any(iced_x86::Instruction::is_ip_rel_memory_operand) +} + #[allow(clippy::too_many_arguments)] fn hook_syscall_before_and_after( arch: Arch, diff --git a/litebox_syscall_rewriter/tests/snapshots/snapshot_tests__hello-diff.snap b/litebox_syscall_rewriter/tests/snapshots/snapshot_tests__hello-diff.snap index 9f933eb4d..aebaab30d 100644 --- a/litebox_syscall_rewriter/tests/snapshots/snapshot_tests__hello-diff.snap +++ b/litebox_syscall_rewriter/tests/snapshots/snapshot_tests__hello-diff.snap @@ -24,7 +24,7 @@ expression: diff - 401e78: 31 ff xor %edi,%edi - 401e7a: 89 d0 mov %edx,%eax - 401e7c: 0f 05 syscall -+ 401e78: ++ 401e78: + 401e7d: 90 nop 401e7e: eb f8 jmp 401e78 <__libc_start_call_main+0x88> 401e80: 31 c0 xor %eax,%eax @@ -35,7 +35,7 @@ expression: diff 403ee0: bf 01 50 00 00 mov $0x5001,%edi - 403ee5: b8 9e 00 00 00 mov $0x9e,%eax - 403eea: 0f 05 syscall -+ 403ee5: ++ 403ee5: + 403eea: 90 nop + 403eeb: 90 nop 403eec: 44 89 ef mov %r13d,%edi @@ -47,7 +47,7 @@ expression: diff 4043ce: 48 89 36 mov %rsi,(%rsi) - 4043d1: 48 89 76 10 mov %rsi,0x10(%rsi) - 4043d5: 0f 05 syscall -+ 4043d1: ++ 4043d1: + 4043d6: 90 nop 4043d7: 85 c0 test %eax,%eax 4043d9: 74 24 je 4043ff <__libc_setup_tls+0x1df> @@ -56,7 +56,7 @@ expression: diff 4043e5: b8 01 00 00 00 mov $0x1,%eax - 4043ea: 48 8d 35 c7 d1 07 00 lea 0x7d1c7(%rip),%rsi # 4815b8 - 4043f1: 0f 05 syscall -+ 4043ea: ++ 4043ea: + 4043ef: 90 nop + 4043f0: 90 nop + 4043f1: 90 nop @@ -64,7 +64,7 @@ expression: diff 4043f3: bf 7f 00 00 00 mov $0x7f,%edi - 4043f8: b8 e7 00 00 00 mov $0xe7,%eax - 4043fd: 0f 05 syscall -+ 4043f8: ++ 4043f8: + 4043fd: 90 nop + 4043fe: 90 nop 4043ff: e8 dc ba 01 00 call 41fee0 <__tls_init_tp> @@ -76,7 +76,7 @@ expression: diff 4044ba: b8 01 00 00 00 mov $0x1,%eax - 4044bf: 48 8d 35 f2 d0 07 00 lea 0x7d0f2(%rip),%rsi # 4815b8 - 4044c6: 0f 05 syscall -+ 4044bf: ++ 4044bf: + 4044c4: 90 nop + 4044c5: 90 nop + 4044c6: 90 nop @@ -84,7 +84,7 @@ expression: diff 4044c8: bf 7f 00 00 00 mov $0x7f,%edi - 4044cd: b8 e7 00 00 00 mov $0xe7,%eax - 4044d2: 0f 05 syscall -+ 4044cd: ++ 4044cd: + 4044d2: 90 nop + 4044d3: 90 nop 4044d4: e9 70 fe ff ff jmp 404349 <__libc_setup_tls+0x129> @@ -96,7 +96,7 @@ expression: diff 40a3e7: bf 02 00 00 00 mov $0x2,%edi - 40a3ec: 44 89 c8 mov %r9d,%eax - 40a3ef: 0f 05 syscall -+ 40a3ec: ++ 40a3ec: 40a3f1: 48 83 f8 fc cmp $0xfffffffffffffffc,%rax 40a3f5: 74 e9 je 40a3e0 <__libc_message_impl+0x150> 40a3f7: 45 31 c9 xor %r9d,%r9d @@ -106,7 +106,7 @@ expression: diff 40a5cf: be 80 00 00 00 mov $0x80,%esi - 40a5d4: b8 ca 00 00 00 mov $0xca,%eax - 40a5d9: 0f 05 syscall -+ 40a5d4: ++ 40a5d4: + 40a5d9: 90 nop + 40a5da: 90 nop 40a5db: 48 3d 00 f0 ff ff cmp $0xfffffffffffff000,%rax @@ -118,7 +118,7 @@ expression: diff 40a635: b8 ca 00 00 00 mov $0xca,%eax - 40a63a: 40 80 f6 80 xor $0x80,%sil - 40a63e: 0f 05 syscall -+ 40a63a: ++ 40a63a: + 40a63f: 90 nop 40a640: 48 3d 00 f0 ff ff cmp $0xfffffffffffff000,%rax 40a646: 76 d6 jbe 40a61e <__lll_lock_wait+0xe> @@ -129,7 +129,7 @@ expression: diff 40a67c: be 81 00 00 00 mov $0x81,%esi - 40a681: b8 ca 00 00 00 mov $0xca,%eax - 40a686: 0f 05 syscall -+ 40a681: ++ 40a681: + 40a686: 90 nop + 40a687: 90 nop 40a688: c3 ret @@ -141,7 +141,7 @@ expression: diff 40a69b: ba 01 00 00 00 mov $0x1,%edx - 40a6a0: b8 ca 00 00 00 mov $0xca,%eax - 40a6a5: 0f 05 syscall -+ 40a6a0: ++ 40a6a0: + 40a6a5: 90 nop + 40a6a6: 90 nop 40a6a7: c3 ret @@ -153,7 +153,7 @@ expression: diff 40bbdb: c6 05 3e 4c 0a 00 01 movb $0x1,0xa4c3e(%rip) # 4b0820 <__malloc_initialized> - 40bbe2: b8 3e 01 00 00 mov $0x13e,%eax - 40bbe7: 0f 05 syscall -+ 40bbe2: ++ 40bbe2: + 40bbe7: 90 nop + 40bbe8: 90 nop 40bbe9: 48 8d 5d d0 lea -0x30(%rbp),%rbx @@ -165,7 +165,7 @@ expression: diff 4181de: 66 90 xchg %ax,%ax - 4181e0: b8 e4 00 00 00 mov $0xe4,%eax - 4181e5: 0f 05 syscall -+ 4181e0: ++ 4181e0: + 4181e5: 90 nop + 4181e6: 90 nop 4181e7: 85 c0 test %eax,%eax @@ -177,7 +177,7 @@ expression: diff 418249: 89 d0 mov %edx,%eax - 41824b: 0f 05 syscall - 41824d: 48 3d 00 f0 ff ff cmp $0xfffffffffffff000,%rax -+ 41824b: ++ 41824b: + 418250: 90 nop + 418251: 90 nop + 418252: 90 nop @@ -190,7 +190,7 @@ expression: diff 418260: f3 0f 1e fa endbr64 - 418264: b8 05 00 00 00 mov $0x5,%eax - 418269: 0f 05 syscall -+ 418264: ++ 418264: + 418269: 90 nop + 41826a: 90 nop 41826b: 48 3d 00 f0 ff ff cmp $0xfffffffffffff000,%rax @@ -202,7 +202,7 @@ expression: diff 418290: f3 0f 1e fa endbr64 - 418294: b8 03 00 00 00 mov $0x3,%eax - 418299: 0f 05 syscall -+ 418294: ++ 418294: + 418299: 90 nop + 41829a: 90 nop 41829b: 48 3d 00 f0 ff ff cmp $0xfffffffffffff000,%rax @@ -214,7 +214,7 @@ expression: diff 4182f9: 74 25 je 418320 <__fcntl64_nocancel+0x60> - 4182fb: b8 48 00 00 00 mov $0x48,%eax - 418300: 0f 05 syscall -+ 4182fb: ++ 4182fb: + 418300: 90 nop + 418301: 90 nop 418302: 48 3d 00 f0 ff ff cmp $0xfffffffffffff000,%rax @@ -226,7 +226,7 @@ expression: diff 418324: be 10 00 00 00 mov $0x10,%esi - 418329: b8 48 00 00 00 mov $0x48,%eax - 41832e: 0f 05 syscall -+ 418329: ++ 418329: + 41832e: 90 nop + 41832f: 90 nop 418330: 3d 00 f0 ff ff cmp $0xfffff000,%eax @@ -238,7 +238,7 @@ expression: diff 41837e: 74 20 je 4183a0 <__fcntl64_nocancel_adjusted+0x40> - 418380: b8 48 00 00 00 mov $0x48,%eax - 418385: 0f 05 syscall -+ 418380: ++ 418380: + 418385: 90 nop + 418386: 90 nop 418387: 48 3d 00 f0 ff ff cmp $0xfffffffffffff000,%rax @@ -250,7 +250,7 @@ expression: diff 4183a4: be 10 00 00 00 mov $0x10,%esi - 4183a9: b8 48 00 00 00 mov $0x48,%eax - 4183ae: 0f 05 syscall -+ 4183a9: ++ 4183a9: + 4183ae: 90 nop + 4183af: 90 nop 4183b0: 3d 00 f0 ff ff cmp $0xfffff000,%eax @@ -262,7 +262,7 @@ expression: diff 41841a: 48 89 fe mov %rdi,%rsi - 41841d: bf 9c ff ff ff mov $0xffffff9c,%edi - 418422: 0f 05 syscall -+ 41841d: ++ 41841d: + 418422: 90 nop + 418423: 90 nop 418424: 48 3d 00 f0 ff ff cmp $0xfffffffffffff000,%rax @@ -275,7 +275,7 @@ expression: diff - 418480: f3 0f 1e fa endbr64 - 418484: 31 c0 xor %eax,%eax - 418486: 0f 05 syscall -+ 418480: ++ 418480: + 418485: 90 nop + 418486: 90 nop + 418487: 90 nop @@ -288,7 +288,7 @@ expression: diff 4184b0: f3 0f 1e fa endbr64 - 4184b4: b8 0c 00 00 00 mov $0xc,%eax - 4184b9: 0f 05 syscall -+ 4184b4: ++ 4184b4: + 4184b9: 90 nop + 4184ba: 90 nop 4184bb: 48 89 05 96 83 09 00 mov %rax,0x98396(%rip) # 4b0858 <__curbrk> @@ -300,7 +300,7 @@ expression: diff 41871a: 48 8d 95 f0 ef ff ff lea -0x1010(%rbp),%rdx - 418721: b8 cc 00 00 00 mov $0xcc,%eax - 418726: 0f 05 syscall -+ 418721: ++ 418721: + 418726: 90 nop + 418727: 90 nop 418728: 85 c0 test %eax,%eax @@ -312,7 +312,7 @@ expression: diff 418b40: f3 0f 1e fa endbr64 - 418b44: b8 1c 00 00 00 mov $0x1c,%eax - 418b49: 0f 05 syscall -+ 418b44: ++ 418b44: + 418b49: 90 nop + 418b4a: 90 nop 418b4b: 48 3d 01 f0 ff ff cmp $0xfffffffffffff001,%rax @@ -324,7 +324,7 @@ expression: diff 418b92: 48 89 df mov %rbx,%rdi - 418b95: b8 09 00 00 00 mov $0x9,%eax - 418b9a: 0f 05 syscall -+ 418b95: ++ 418b95: + 418b9a: 90 nop + 418b9b: 90 nop 418b9c: 48 3d 00 f0 ff ff cmp $0xfffffffffffff000,%rax @@ -336,7 +336,7 @@ expression: diff 418bed: b8 09 00 00 00 mov $0x9,%eax - 418bf2: 41 83 ca 40 or $0x40,%r10d - 418bf6: 0f 05 syscall -+ 418bf2: ++ 418bf2: + 418bf7: 90 nop 418bf8: 48 3d 00 f0 ff ff cmp $0xfffffffffffff000,%rax 418bfe: 76 a4 jbe 418ba4 <__mmap64+0x34> @@ -347,7 +347,7 @@ expression: diff 418c30: f3 0f 1e fa endbr64 - 418c34: b8 0a 00 00 00 mov $0xa,%eax - 418c39: 0f 05 syscall -+ 418c34: ++ 418c34: + 418c39: 90 nop + 418c3a: 90 nop 418c3b: 48 3d 01 f0 ff ff cmp $0xfffffffffffff001,%rax @@ -359,7 +359,7 @@ expression: diff 418c60: f3 0f 1e fa endbr64 - 418c64: b8 0b 00 00 00 mov $0xb,%eax - 418c69: 0f 05 syscall -+ 418c64: ++ 418c64: + 418c69: 90 nop + 418c6a: 90 nop 418c6b: 48 3d 01 f0 ff ff cmp $0xfffffffffffff001,%rax @@ -371,7 +371,7 @@ expression: diff 418d47: 45 31 c0 xor %r8d,%r8d - 418d4a: b8 19 00 00 00 mov $0x19,%eax - 418d4f: 0f 05 syscall -+ 418d4a: ++ 418d4a: + 418d4f: 90 nop + 418d50: 90 nop 418d51: 48 3d 00 f0 ff ff cmp $0xfffffffffffff000,%rax @@ -383,7 +383,7 @@ expression: diff 418e23: bf 41 4d 56 53 mov $0x53564d41,%edi - 418e28: b8 9d 00 00 00 mov $0x9d,%eax - 418e2d: 0f 05 syscall -+ 418e28: ++ 418e28: + 418e2d: 90 nop + 418e2e: 90 nop 418e2f: 83 f8 ea cmp $0xffffffea,%eax @@ -395,7 +395,7 @@ expression: diff 418e50: f3 0f 1e fa endbr64 - 418e54: b8 63 00 00 00 mov $0x63,%eax - 418e59: 0f 05 syscall -+ 418e54: ++ 418e54: + 418e59: 90 nop + 418e5a: 90 nop 418e5b: 48 3d 01 f0 ff ff cmp $0xfffffffffffff001,%rax @@ -407,7 +407,7 @@ expression: diff 41e494: 48 8d 9d e0 ef ff ff lea -0x1020(%rbp),%rbx - 41e49b: 48 89 da mov %rbx,%rdx - 41e49e: 0f 05 syscall -+ 41e49b: ++ 41e49b: 41e4a0: 85 c0 test %eax,%eax 41e4a2: 7e 5c jle 41e500 <_dl_get_origin+0xa0> 41e4a4: 0f b6 95 e0 ef ff ff movzbl -0x1020(%rbp),%edx @@ -417,7 +417,7 @@ expression: diff 41e6db: 48 8d b5 d0 f6 ff ff lea -0x930(%rbp),%rsi - 41e6e2: b8 14 00 00 00 mov $0x14,%eax - 41e6e7: 0f 05 syscall -+ 41e6e2: ++ 41e6e2: + 41e6e7: 90 nop + 41e6e8: 90 nop 41e6e9: 48 81 c4 38 09 00 00 add $0x938,%rsp @@ -429,7 +429,7 @@ expression: diff 41ff24: 48 8d bb d0 02 00 00 lea 0x2d0(%rbx),%rdi - 41ff2b: b8 da 00 00 00 mov $0xda,%eax - 41ff30: 0f 05 syscall -+ 41ff2b: ++ 41ff2b: + 41ff30: 90 nop + 41ff31: 90 nop 41ff32: 89 83 d0 02 00 00 mov %eax,0x2d0(%rbx) @@ -441,7 +441,7 @@ expression: diff 41ff81: 66 0f 6c c0 punpcklqdq %xmm0,%xmm0 - 41ff85: 0f 11 83 d8 02 00 00 movups %xmm0,0x2d8(%rbx) - 41ff8c: 0f 05 syscall -+ 41ff85: ++ 41ff85: + 41ff8a: 90 nop + 41ff8b: 90 nop + 41ff8c: 90 nop @@ -455,7 +455,7 @@ expression: diff 41fff4: 48 89 df mov %rbx,%rdi - 41fff7: b8 4e 01 00 00 mov $0x14e,%eax - 41fffc: 0f 05 syscall -+ 41fff7: ++ 41fff7: + 41fffc: 90 nop + 41fffd: 90 nop 41fffe: 3d 00 f0 ff ff cmp $0xfffff000,%eax @@ -467,7 +467,7 @@ expression: diff 421344: bf 02 50 00 00 mov $0x5002,%edi - 421349: b8 9e 00 00 00 mov $0x9e,%eax - 42134e: 0f 05 syscall -+ 421349: ++ 421349: + 42134e: 90 nop + 42134f: 90 nop 421350: 89 c7 mov %eax,%edi @@ -479,7 +479,7 @@ expression: diff 4213a5: 48 89 e5 mov %rsp,%rbp - 4213a8: 48 8d 75 f8 lea -0x8(%rbp),%rsi - 4213ac: 0f 05 syscall -+ 4213a8: ++ 4213a8: + 4213ad: 90 nop 4213ae: 48 85 c0 test %rax,%rax 4213b1: 74 15 je 4213c8 <_dl_cet_setup_features+0x38> @@ -491,7 +491,7 @@ expression: diff - 4213f7: bf 03 50 00 00 mov $0x5003,%edi - 4213fc: 89 d0 mov %edx,%eax - 4213fe: 0f 05 syscall -+ 4213f7: ++ 4213f7: + 4213fc: 90 nop + 4213fd: 90 nop + 4213fe: 90 nop @@ -506,13 +506,13 @@ expression: diff - 421455: 31 ff xor %edi,%edi - 421457: 89 f0 mov %esi,%eax - 421459: 0f 05 syscall -+ 421455: ++ 421455: + 42145a: 90 nop 42145b: 48 89 c2 mov %rax,%rdx - 42145e: 48 8d 3c 18 lea (%rax,%rbx,1),%rdi - 421462: 89 f0 mov %esi,%eax - 421464: 0f 05 syscall -+ 42145e: ++ 42145e: + 421463: 90 nop + 421464: 90 nop + 421465: 90 nop @@ -525,7 +525,7 @@ expression: diff 421481: 48 89 de mov %rbx,%rsi - 421484: b8 09 00 00 00 mov $0x9,%eax - 421489: 0f 05 syscall -+ 421484: ++ 421484: + 421489: 90 nop + 42148a: 90 nop 42148b: 31 d2 xor %edx,%edx @@ -537,7 +537,7 @@ expression: diff 444c16: 48 8d 35 b3 0a 04 00 lea 0x40ab3(%rip),%rsi # 4856d0 - 444c1d: b8 0e 00 00 00 mov $0xe,%eax - 444c22: 0f 05 syscall -+ 444c1d: ++ 444c1d: + 444c22: 90 nop + 444c23: 90 nop 444c24: 31 c0 xor %eax,%eax @@ -549,7 +549,7 @@ expression: diff 444c63: bf 02 00 00 00 mov $0x2,%edi - 444c68: b8 0e 00 00 00 mov $0xe,%eax - 444c6d: 0f 05 syscall -+ 444c68: ++ 444c68: + 444c6d: 90 nop + 444c6e: 90 nop 444c6f: 48 8b 45 d8 mov -0x28(%rbp),%rax @@ -561,7 +561,7 @@ expression: diff 444ca8: 89 de mov %ebx,%esi - 444caa: b8 ea 00 00 00 mov $0xea,%eax - 444caf: 0f 05 syscall -+ 444caa: ++ 444caa: + 444caf: 90 nop + 444cb0: 90 nop 444cb1: 3d 00 f0 ff ff cmp $0xfffff000,%eax @@ -572,7 +572,7 @@ expression: diff 444cbe: 66 90 xchg %ax,%ax - 444cc0: b8 ba 00 00 00 mov $0xba,%eax - 444cc5: 0f 05 syscall -+ 444cc0: ++ 444cc0: + 444cc5: 90 nop + 444cc6: 90 nop 444cc7: 89 c3 mov %eax,%ebx @@ -582,7 +582,7 @@ expression: diff 444cd3: 89 c7 mov %eax,%edi - 444cd5: b8 ea 00 00 00 mov $0xea,%eax - 444cda: 0f 05 syscall -+ 444cd5: ++ 444cd5: + 444cda: 90 nop + 444cdb: 90 nop 444cdc: 89 c3 mov %eax,%ebx @@ -594,7 +594,7 @@ expression: diff 444d78: 4c 89 fa mov %r15,%rdx - 444d7b: 48 8d 35 4e 09 04 00 lea 0x4094e(%rip),%rsi # 4856d0 - 444d82: 0f 05 syscall -+ 444d7b: ++ 444d7b: + 444d80: 90 nop + 444d81: 90 nop + 444d82: 90 nop @@ -608,7 +608,7 @@ expression: diff 444dc4: bf 02 00 00 00 mov $0x2,%edi - 444dc9: b8 0e 00 00 00 mov $0xe,%eax - 444dce: 0f 05 syscall -+ 444dc9: ++ 444dc9: + 444dce: 90 nop + 444dcf: 90 nop 444dd0: 48 8b 45 c8 mov -0x38(%rbp),%rax @@ -620,7 +620,7 @@ expression: diff 444e08: 89 de mov %ebx,%esi - 444e0a: b8 ea 00 00 00 mov $0xea,%eax - 444e0f: 0f 05 syscall -+ 444e0a: ++ 444e0a: + 444e0f: 90 nop + 444e10: 90 nop 444e11: 3d 00 f0 ff ff cmp $0xfffff000,%eax @@ -630,7 +630,7 @@ expression: diff 444e1e: eb 8a jmp 444daa <__pthread_kill+0x8a> - 444e20: b8 ba 00 00 00 mov $0xba,%eax - 444e25: 0f 05 syscall -+ 444e20: ++ 444e20: + 444e25: 90 nop + 444e26: 90 nop 444e27: 89 c3 mov %eax,%ebx @@ -640,7 +640,7 @@ expression: diff 444e33: 89 c7 mov %eax,%edi - 444e35: b8 ea 00 00 00 mov $0xea,%eax - 444e3a: 0f 05 syscall -+ 444e35: ++ 444e35: + 444e3a: 90 nop + 444e3b: 90 nop 444e3c: 41 89 c6 mov %eax,%r14d @@ -652,7 +652,7 @@ expression: diff 445107: f7 d6 not %esi - 445109: 81 e6 80 00 00 00 and $0x80,%esi - 44510f: 0f 05 syscall -+ 445109: ++ 445109: + 44510e: 90 nop + 44510f: 90 nop + 445110: 90 nop @@ -665,7 +665,7 @@ expression: diff 4452e4: 48 89 df mov %rbx,%rdi - 4452e7: b8 ca 00 00 00 mov $0xca,%eax - 4452ec: 0f 05 syscall -+ 4452e7: ++ 4452e7: + 4452ec: 90 nop + 4452ed: 90 nop 4452ee: 48 3d 00 f0 ff ff cmp $0xfffffffffffff000,%rax @@ -677,7 +677,7 @@ expression: diff 4454ff: be 07 00 00 00 mov $0x7,%esi - 445504: b8 ca 00 00 00 mov $0xca,%eax - 445509: 0f 05 syscall -+ 445504: ++ 445504: + 445509: 90 nop + 44550a: 90 nop 44550b: 48 3d 00 f0 ff ff cmp $0xfffffffffffff000,%rax @@ -689,7 +689,7 @@ expression: diff 445aa9: 81 e6 80 00 00 00 and $0x80,%esi - 445aaf: 40 80 f6 81 xor $0x81,%sil - 445ab3: 0f 05 syscall -+ 445aaf: ++ 445aaf: + 445ab4: 90 nop 445ab5: 48 3d 00 f0 ff ff cmp $0xfffffffffffff000,%rax 445abb: 0f 87 0e 02 00 00 ja 445ccf <__pthread_mutex_unlock_full+0x3bf> @@ -700,7 +700,7 @@ expression: diff 445cfd: 4c 89 c7 mov %r8,%rdi - 445d00: b8 ca 00 00 00 mov $0xca,%eax - 445d05: 0f 05 syscall -+ 445d00: ++ 445d00: + 445d05: 90 nop + 445d06: 90 nop 445d07: 48 3d 00 f0 ff ff cmp $0xfffffffffffff000,%rax @@ -712,7 +712,7 @@ expression: diff 445d29: 4c 89 c7 mov %r8,%rdi - 445d2c: b8 ca 00 00 00 mov $0xca,%eax - 445d31: 0f 05 syscall -+ 445d2c: ++ 445d2c: + 445d31: 90 nop + 445d32: 90 nop 445d33: 48 3d 00 f0 ff ff cmp $0xfffffffffffff000,%rax @@ -724,7 +724,7 @@ expression: diff 44600f: 48 89 df mov %rbx,%rdi - 446012: b8 ca 00 00 00 mov $0xca,%eax - 446017: 0f 05 syscall -+ 446012: ++ 446012: + 446017: 90 nop + 446018: 90 nop 446019: 48 3d 00 f0 ff ff cmp $0xfffffffffffff000,%rax @@ -736,7 +736,7 @@ expression: diff 4460b0: 48 89 df mov %rbx,%rdi - 4460b3: b8 ca 00 00 00 mov $0xca,%eax - 4460b8: 0f 05 syscall -+ 4460b3: ++ 4460b3: + 4460b8: 90 nop + 4460b9: 90 nop 4460ba: 48 3d 00 f0 ff ff cmp $0xfffffffffffff000,%rax @@ -748,7 +748,7 @@ expression: diff 446142: be 81 00 00 00 mov $0x81,%esi - 446147: b8 ca 00 00 00 mov $0xca,%eax - 44614c: 0f 05 syscall -+ 446147: ++ 446147: + 44614c: 90 nop + 44614d: 90 nop 44614e: 48 3d 00 f0 ff ff cmp $0xfffffffffffff000,%rax @@ -760,7 +760,7 @@ expression: diff 4462e3: c1 e6 07 shl $0x7,%esi - 4462e6: 40 80 f6 81 xor $0x81,%sil - 4462ea: 0f 05 syscall -+ 4462e6: ++ 4462e6: + 4462eb: 90 nop 4462ec: 48 3d 00 f0 ff ff cmp $0xfffffffffffff000,%rax 4462f2: 0f 86 2e ff ff ff jbe 446226 <___pthread_rwlock_rdlock+0x46> @@ -771,7 +771,7 @@ expression: diff 446437: 40 80 f6 81 xor $0x81,%sil - 44643b: b8 ca 00 00 00 mov $0xca,%eax - 446440: 0f 05 syscall -+ 44643b: ++ 44643b: + 446440: 90 nop + 446441: 90 nop 446442: 48 3d 00 f0 ff ff cmp $0xfffffffffffff000,%rax @@ -783,7 +783,7 @@ expression: diff 44648a: b8 ca 00 00 00 mov $0xca,%eax - 44648f: 40 80 f6 81 xor $0x81,%sil - 446493: 0f 05 syscall -+ 44648f: ++ 44648f: + 446494: 90 nop 446495: 48 3d 00 f0 ff ff cmp $0xfffffffffffff000,%rax 44649b: 76 83 jbe 446420 <___pthread_rwlock_unlock+0x50> @@ -794,7 +794,7 @@ expression: diff 446511: 40 80 f6 81 xor $0x81,%sil - 446515: b8 ca 00 00 00 mov $0xca,%eax - 44651a: 0f 05 syscall -+ 446515: ++ 446515: + 44651a: 90 nop + 44651b: 90 nop 44651c: 48 3d 00 f0 ff ff cmp $0xfffffffffffff000,%rax @@ -806,7 +806,7 @@ expression: diff 446577: b8 ca 00 00 00 mov $0xca,%eax - 44657c: 40 80 f6 81 xor $0x81,%sil - 446580: 0f 05 syscall -+ 44657c: ++ 44657c: + 446581: 90 nop 446582: 48 3d 00 f0 ff ff cmp $0xfffffffffffff000,%rax 446588: 0f 86 6c ff ff ff jbe 4464fa <___pthread_rwlock_unlock+0x12a> @@ -817,7 +817,7 @@ expression: diff 446855: 40 80 f6 81 xor $0x81,%sil - 446859: b8 ca 00 00 00 mov $0xca,%eax - 44685e: 0f 05 syscall -+ 446859: ++ 446859: + 44685e: 90 nop + 44685f: 90 nop 446860: 48 3d 00 f0 ff ff cmp $0xfffffffffffff000,%rax @@ -829,7 +829,7 @@ expression: diff 446880: 40 80 f6 81 xor $0x81,%sil - 446884: b8 ca 00 00 00 mov $0xca,%eax - 446889: 0f 05 syscall -+ 446884: ++ 446884: + 446889: 90 nop + 44688a: 90 nop 44688b: 48 3d 00 f0 ff ff cmp $0xfffffffffffff000,%rax @@ -841,7 +841,7 @@ expression: diff 446924: 40 80 f6 81 xor $0x81,%sil - 446928: b8 ca 00 00 00 mov $0xca,%eax - 44692d: 0f 05 syscall -+ 446928: ++ 446928: + 44692d: 90 nop + 44692e: 90 nop 44692f: 48 3d 00 f0 ff ff cmp $0xfffffffffffff000,%rax @@ -853,7 +853,7 @@ expression: diff 446a0b: 41 ba 08 00 00 00 mov $0x8,%r10d - 446a11: b8 0e 00 00 00 mov $0xe,%eax - 446a16: 0f 05 syscall -+ 446a11: ++ 446a11: + 446a16: 90 nop + 446a17: 90 nop 446a18: 89 c2 mov %eax,%edx @@ -865,7 +865,7 @@ expression: diff 45ba2c: 48 0f 47 d0 cmova %rax,%rdx - 45ba30: b8 d9 00 00 00 mov $0xd9,%eax - 45ba35: 0f 05 syscall -+ 45ba30: ++ 45ba30: + 45ba35: 90 nop + 45ba36: 90 nop 45ba37: 48 3d 00 f0 ff ff cmp $0xfffffffffffff000,%rax @@ -877,7 +877,7 @@ expression: diff 45bb50: f3 0f 1e fa endbr64 - 45bb54: b8 27 00 00 00 mov $0x27,%eax - 45bb59: 0f 05 syscall -+ 45bb54: ++ 45bb54: + 45bb59: 90 nop + 45bb5a: 90 nop 45bb5b: c3 ret @@ -889,7 +889,7 @@ expression: diff 45bba0: f3 0f 1e fa endbr64 - 45bba4: b8 8f 00 00 00 mov $0x8f,%eax - 45bba9: 0f 05 syscall -+ 45bba4: ++ 45bba4: + 45bba9: 90 nop + 45bbaa: 90 nop 45bbab: 48 3d 01 f0 ff ff cmp $0xfffffffffffff001,%rax @@ -901,7 +901,7 @@ expression: diff 45bbd0: f3 0f 1e fa endbr64 - 45bbd4: b8 91 00 00 00 mov $0x91,%eax - 45bbd9: 0f 05 syscall -+ 45bbd4: ++ 45bbd4: + 45bbd9: 90 nop + 45bbda: 90 nop 45bbdb: 48 3d 01 f0 ff ff cmp $0xfffffffffffff001,%rax @@ -913,7 +913,7 @@ expression: diff 45bc00: f3 0f 1e fa endbr64 - 45bc04: b8 92 00 00 00 mov $0x92,%eax - 45bc09: 0f 05 syscall -+ 45bc04: ++ 45bc04: + 45bc09: 90 nop + 45bc0a: 90 nop 45bc0b: 48 3d 01 f0 ff ff cmp $0xfffffffffffff001,%rax @@ -925,7 +925,7 @@ expression: diff 45bc30: f3 0f 1e fa endbr64 - 45bc34: b8 93 00 00 00 mov $0x93,%eax - 45bc39: 0f 05 syscall -+ 45bc34: ++ 45bc34: + 45bc39: 90 nop + 45bc3a: 90 nop 45bc3b: 48 3d 01 f0 ff ff cmp $0xfffffffffffff001,%rax @@ -937,7 +937,7 @@ expression: diff 45bc60: f3 0f 1e fa endbr64 - 45bc64: b8 90 00 00 00 mov $0x90,%eax - 45bc69: 0f 05 syscall -+ 45bc64: ++ 45bc64: + 45bc69: 90 nop + 45bc6a: 90 nop 45bc6b: 48 3d 01 f0 ff ff cmp $0xfffffffffffff001,%rax @@ -949,7 +949,7 @@ expression: diff 45bd0d: 48 8b bd 08 ff ff ff mov -0xf8(%rbp),%rdi - 45bd14: b8 4f 00 00 00 mov $0x4f,%eax - 45bd19: 0f 05 syscall -+ 45bd14: ++ 45bd14: + 45bd19: 90 nop + 45bd1a: 90 nop 45bd1b: 48 3d 00 f0 ff ff cmp $0xfffffffffffff000,%rax @@ -961,7 +961,7 @@ expression: diff 45c510: f3 0f 1e fa endbr64 - 45c514: b8 08 00 00 00 mov $0x8,%eax - 45c519: 0f 05 syscall -+ 45c514: ++ 45c514: + 45c519: 90 nop + 45c51a: 90 nop 45c51b: 48 3d 00 f0 ff ff cmp $0xfffffffffffff000,%rax @@ -973,7 +973,7 @@ expression: diff 45c5a9: bf 9c ff ff ff mov $0xffffff9c,%edi - 45c5ae: b8 01 01 00 00 mov $0x101,%eax - 45c5b3: 0f 05 syscall -+ 45c5ae: ++ 45c5ae: + 45c5b3: 90 nop + 45c5b4: 90 nop 45c5b5: 48 3d 00 f0 ff ff cmp $0xfffffffffffff000,%rax @@ -985,7 +985,7 @@ expression: diff 45c619: bf 9c ff ff ff mov $0xffffff9c,%edi - 45c61e: b8 01 01 00 00 mov $0x101,%eax - 45c623: 0f 05 syscall -+ 45c61e: ++ 45c61e: + 45c623: 90 nop + 45c624: 90 nop 45c625: 48 3d 00 f0 ff ff cmp $0xfffffffffffff000,%rax @@ -997,7 +997,7 @@ expression: diff 45c6b9: 74 51 je 45c70c <__libc_openat64+0x8c> - 45c6bb: b8 01 01 00 00 mov $0x101,%eax - 45c6c0: 0f 05 syscall -+ 45c6bb: ++ 45c6bb: + 45c6c0: 90 nop + 45c6c1: 90 nop 45c6c2: 48 3d 00 f0 ff ff cmp $0xfffffffffffff000,%rax @@ -1009,7 +1009,7 @@ expression: diff 45c72d: 8b 7d a8 mov -0x58(%rbp),%edi - 45c730: b8 01 01 00 00 mov $0x101,%eax - 45c735: 0f 05 syscall -+ 45c730: ++ 45c730: + 45c735: 90 nop + 45c736: 90 nop 45c737: 48 3d 00 f0 ff ff cmp $0xfffffffffffff000,%rax @@ -1021,7 +1021,7 @@ expression: diff 45c79d: 31 c0 xor %eax,%eax - 45c79f: 0f 05 syscall - 45c7a1: 48 3d 00 f0 ff ff cmp $0xfffffffffffff000,%rax -+ 45c79f: ++ 45c79f: + 45c7a4: 90 nop + 45c7a5: 90 nop + 45c7a6: 90 nop @@ -1035,7 +1035,7 @@ expression: diff - 45c7d3: 8b 7d f8 mov -0x8(%rbp),%edi - 45c7d6: 31 c0 xor %eax,%eax - 45c7d8: 0f 05 syscall -+ 45c7d3: ++ 45c7d3: + 45c7d8: 90 nop + 45c7d9: 90 nop 45c7da: 48 3d 00 f0 ff ff cmp $0xfffffffffffff000,%rax @@ -1047,7 +1047,7 @@ expression: diff 45c85b: 74 13 je 45c870 <__libc_write+0x20> - 45c85d: b8 01 00 00 00 mov $0x1,%eax - 45c862: 0f 05 syscall -+ 45c85d: ++ 45c85d: + 45c862: 90 nop + 45c863: 90 nop 45c864: 48 3d 00 f0 ff ff cmp $0xfffffffffffff000,%rax @@ -1059,7 +1059,7 @@ expression: diff 45c893: 8b 7d f8 mov -0x8(%rbp),%edi - 45c896: b8 01 00 00 00 mov $0x1,%eax - 45c89b: 0f 05 syscall -+ 45c896: ++ 45c896: + 45c89b: 90 nop + 45c89c: 90 nop 45c89d: 48 3d 00 f0 ff ff cmp $0xfffffffffffff000,%rax @@ -1071,7 +1071,7 @@ expression: diff 45c920: 74 26 je 45c948 <__openat64_nocancel+0x58> - 45c922: b8 01 01 00 00 mov $0x101,%eax - 45c927: 0f 05 syscall -+ 45c922: ++ 45c922: + 45c927: 90 nop + 45c928: 90 nop 45c929: 48 3d 00 f0 ff ff cmp $0xfffffffffffff000,%rax @@ -1083,7 +1083,7 @@ expression: diff 45c984: 49 89 ca mov %rcx,%r10 - 45c987: b8 11 00 00 00 mov $0x11,%eax - 45c98c: 0f 05 syscall -+ 45c987: ++ 45c987: + 45c98c: 90 nop + 45c98d: 90 nop 45c98e: 48 3d 00 f0 ff ff cmp $0xfffffffffffff000,%rax @@ -1095,7 +1095,7 @@ expression: diff 45c9c0: f3 0f 1e fa endbr64 - 45c9c4: b8 01 00 00 00 mov $0x1,%eax - 45c9c9: 0f 05 syscall -+ 45c9c4: ++ 45c9c4: + 45c9c9: 90 nop + 45c9ca: 90 nop 45c9cb: 48 3d 00 f0 ff ff cmp $0xfffffffffffff000,%rax @@ -1107,7 +1107,7 @@ expression: diff 45ca13: 48 8d 55 d0 lea -0x30(%rbp),%rdx - 45ca17: b8 10 00 00 00 mov $0x10,%eax - 45ca1c: 0f 05 syscall -+ 45ca17: ++ 45ca17: + 45ca1c: 90 nop + 45ca1d: 90 nop 45ca1e: 48 3d 00 f0 ff ff cmp $0xfffffffffffff000,%rax @@ -1120,7 +1120,7 @@ expression: diff - 45cabb: b8 2e 01 00 00 mov $0x12e,%eax - 45cac0: 31 ff xor %edi,%edi - 45cac2: 0f 05 syscall -+ 45cabb: ++ 45cabb: + 45cac0: 90 nop + 45cac1: 90 nop + 45cac2: 90 nop @@ -1134,7 +1134,7 @@ expression: diff 45ffa0: 48 8d 78 1c lea 0x1c(%rax),%rdi - 45ffa4: b8 ca 00 00 00 mov $0xca,%eax - 45ffa9: 0f 05 syscall -+ 45ffa4: ++ 45ffa4: + 45ffa9: 90 nop + 45ffaa: 90 nop 45ffab: 48 8d 3d 6e ab 04 00 lea 0x4ab6e(%rip),%rdi # 4aab20 <_dl_load_lock> @@ -1146,7 +1146,7 @@ expression: diff 46306a: be 80 00 00 00 mov $0x80,%esi - 46306f: 44 89 c8 mov %r9d,%eax - 463072: 0f 05 syscall -+ 46306f: ++ 46306f: 463074: 48 3d 00 f0 ff ff cmp $0xfffffffffffff000,%rax 46307a: 76 dc jbe 463058 <__thread_gscope_wait+0x88> 46307c: 83 f8 f5 cmp $0xfffffff5,%eax @@ -1156,7 +1156,7 @@ expression: diff 46310a: be 80 00 00 00 mov $0x80,%esi - 46310f: 44 89 c8 mov %r9d,%eax - 463112: 0f 05 syscall -+ 46310f: ++ 46310f: 463114: 48 3d 00 f0 ff ff cmp $0xfffffffffffff000,%rax 46311a: 76 dc jbe 4630f8 <__thread_gscope_wait+0x128> 46311c: 83 f8 f5 cmp $0xfffffff5,%eax @@ -1166,7 +1166,7 @@ expression: diff 00000000004669d0 <__restore_rt>: - 4669d0: 48 c7 c0 0f 00 00 00 mov $0xf,%rax - 4669d7: 0f 05 syscall -+ 4669d0: ++ 4669d0: + 4669d5: 90 nop + 4669d6: 90 nop + 4669d7: 90 nop @@ -1180,7 +1180,7 @@ expression: diff 466aad: 41 ba 08 00 00 00 mov $0x8,%r10d - 466ab3: b8 0d 00 00 00 mov $0xd,%eax - 466ab8: 0f 05 syscall -+ 466ab3: ++ 466ab3: + 466ab8: 90 nop + 466ab9: 90 nop 466aba: 48 3d 00 f0 ff ff cmp $0xfffffffffffff000,%rax @@ -1192,7 +1192,7 @@ expression: diff 46cb16: be 80 00 00 00 mov $0x80,%esi - 46cb1b: 44 89 c0 mov %r8d,%eax - 46cb1e: 0f 05 syscall -+ 46cb1b: ++ 46cb1b: 46cb20: 48 3d 00 f0 ff ff cmp $0xfffffffffffff000,%rax 46cb26: 77 0d ja 46cb35 <__pthread_disable_asynccancel+0x65> 46cb28: 8b 0f mov (%rdi),%ecx @@ -1202,7 +1202,7 @@ expression: diff 46ccdf: 44 31 c6 xor %r8d,%esi - 46cce2: 45 31 c0 xor %r8d,%r8d - 46cce5: 0f 05 syscall -+ 46cce2: ++ 46cce2: 46cce7: 85 c0 test %eax,%eax 46cce9: 7f 27 jg 46cd12 <__futex_abstimed_wait64+0x62> 46cceb: 83 f8 ea cmp $0xffffffea,%eax @@ -1212,7 +1212,7 @@ expression: diff 46cd89: 44 89 e2 mov %r12d,%edx - 46cd8c: b8 ca 00 00 00 mov $0xca,%eax - 46cd91: 0f 05 syscall -+ 46cd8c: ++ 46cd8c: + 46cd91: 90 nop + 46cd92: 90 nop 46cd93: 48 89 c3 mov %rax,%rbx @@ -1224,7 +1224,7 @@ expression: diff 46ce17: 44 89 e2 mov %r12d,%edx - 46ce1a: b8 ca 00 00 00 mov $0xca,%eax - 46ce1f: 0f 05 syscall -+ 46ce1a: ++ 46ce1a: + 46ce1f: 90 nop + 46ce20: 90 nop 46ce21: 44 89 ef mov %r13d,%edi @@ -1236,7 +1236,7 @@ expression: diff 46ce6c: 31 d2 xor %edx,%edx - 46ce6e: b8 ca 00 00 00 mov $0xca,%eax - 46ce73: 0f 05 syscall -+ 46ce6e: ++ 46ce6e: + 46ce73: 90 nop + 46ce74: 90 nop 46ce75: 83 f8 da cmp $0xffffffda,%eax @@ -1248,7 +1248,7 @@ expression: diff 46f344: 41 89 ca mov %ecx,%r10d - 46f347: b8 06 01 00 00 mov $0x106,%eax - 46f34c: 0f 05 syscall -+ 46f347: ++ 46f347: + 46f34c: 90 nop + 46f34d: 90 nop 46f34e: 3d 00 f0 ff ff cmp $0xfffff000,%eax @@ -1260,7 +1260,7 @@ expression: diff 472975: 48 8d 78 1c lea 0x1c(%rax),%rdi - 472979: b8 ca 00 00 00 mov $0xca,%eax - 47297e: 0f 05 syscall -+ 472979: ++ 472979: + 47297e: 90 nop + 47297f: 90 nop 472980: eb 8c jmp 47290e <_dl_fixup+0x10e> @@ -1272,7 +1272,7 @@ expression: diff 476c10: 48 8d 78 1c lea 0x1c(%rax),%rdi - 476c14: b8 ca 00 00 00 mov $0xca,%eax - 476c19: 0f 05 syscall -+ 476c14: ++ 476c14: + 476c19: 90 nop + 476c1a: 90 nop 476c1b: 48 83 7d 98 00 cmpq $0x0,-0x68(%rbp) @@ -1284,7 +1284,7 @@ expression: diff 476e4a: 48 8d 78 1c lea 0x1c(%rax),%rdi - 476e4e: b8 ca 00 00 00 mov $0xca,%eax - 476e53: 0f 05 syscall -+ 476e4e: ++ 476e4e: + 476e53: 90 nop + 476e54: 90 nop 476e55: 48 83 7d 98 00 cmpq $0x0,-0x68(%rbp)