diff --git a/litebox/src/mm/linux.rs b/litebox/src/mm/linux.rs index 389bf9694..be29d1ad2 100644 --- a/litebox/src/mm/linux.rs +++ b/litebox/src/mm/linux.rs @@ -58,10 +58,16 @@ bitflags::bitflags! { impl VmFlags { /// Compute the default `VM_MAY*` and `VM_SHARED` flags for a mapping. /// - /// Write permission (`VM_MAYWRITE`) is restricted only for shared **file-backed** - /// mappings, because writes cannot be propagated back to the underlying file. - pub(super) fn may_flags_for_mapping(shared: bool, file_backed: bool) -> Self { - let restrict_write = shared && file_backed; + /// Write permission (`VM_MAYWRITE`) is restricted for shared file-backed + /// mappings opened read-only, because writeback to a non-writable fd + /// cannot succeed. When the fd is writable, the shim's explicit writeback + /// mechanism handles flushing dirty pages to the file. + pub(super) fn may_flags_for_mapping( + shared: bool, + file_backed: bool, + fd_writable: bool, + ) -> Self { + let restrict_write = shared && file_backed && !fd_writable; let may = if restrict_write { Self::VM_MAY_ACCESS_FLAGS & !Self::VM_MAYWRITE } else { @@ -122,10 +128,11 @@ impl From for MemoryRegionPermissions { } const DEFAULT_RESERVED_SPACE_SIZE: usize = 0x100_0000; // 16 MiB +const MAP_32BIT_ADDR_LIMIT: usize = 0x8000_0000; bitflags::bitflags! { /// Options for page creation. - pub struct CreatePagesFlags: u8 { + pub struct CreatePagesFlags: u16 { /// Force the mapping to be created at the given address, resulting in any /// existing overlapping mappings being removed. const FIXED_ADDR = 1 << 0; @@ -143,6 +150,15 @@ bitflags::bitflags! { const NOREPLACE = 1 << 5; /// The mapping is shared. const SHARED = 1 << 6; + /// The underlying fd is writable. When set for a shared file-backed + /// mapping, the VMA gets `VM_MAYWRITE` so `mprotect(PROT_WRITE)` can + /// succeed later. + const FD_WRITABLE = 1 << 7; + /// Request a sparse reservation without reserving swap/commit upfront + /// when the platform supports it. + const NORESERVE = 1 << 8; + /// Keep the mapping below Linux's MAP_32BIT 2 GiB ceiling. + const LOW_2G = 1 << 9; } } @@ -268,6 +284,8 @@ pub(super) struct VmArea { flags: VmFlags, /// Whether this area is backed by a file is_file_backed: bool, + /// Whether this mapping was created with MAP_NORESERVE semantics. + noreserve: bool, } impl VmArea { @@ -283,14 +301,28 @@ impl VmArea { self.is_file_backed } + /// Check whether this area uses sparse noreserve semantics. + #[inline] + pub(super) fn noreserve(self) -> bool { + self.noreserve + } + /// Create a new [`VmArea`] with the given flags. #[inline] pub(super) fn new(flags: VmFlags, is_file_backed: bool) -> Self { Self { flags, is_file_backed, + noreserve: false, } } + + /// Return a copy of this VMA with the requested MAP_NORESERVE semantics. + #[inline] + pub(super) fn with_noreserve(mut self, noreserve: bool) -> Self { + self.noreserve = noreserve; + self + } } /// Virtual Memory Manager @@ -300,38 +332,96 @@ impl VmArea { pub(super) struct Vmem + 'static, const ALIGN: usize> { /// Memory backend that provides the actual memory. pub(super) platform: &'static Platform, + /// Initial program break address. `brk` may not shrink below this value. + pub(super) brk_base: usize, /// Current program break address. pub(super) brk: usize, + /// Page-aligned heap frontier that is actually backed or intentionally + /// reserved for contiguous `brk` growth. + pub(super) brk_frontier: usize, /// Virtual memory areas. vmas: RangeMap, + /// Lower bound (inclusive) of the VA range this Vmem manages. + addr_min: usize, + /// Upper bound (exclusive) of the VA range this Vmem manages. + addr_max: usize, } impl + 'static, const ALIGN: usize> Vmem { pub(super) const STACK_GUARD_GAP: usize = 256 << 12; - /// Create a new [`Vmem`] instance with the given memory [backend](PageManagementProvider). - pub(super) fn new(platform: &'static Platform) -> Self { + /// Create a new [`Vmem`] instance with the given memory [backend](PageManagementProvider) + /// and VA range. + /// + /// # Panics + /// + /// Panics if the range is empty, not page-aligned, or exceeds the + /// platform's `TASK_ADDR_MIN..TASK_ADDR_MAX`. + pub(super) fn new(platform: &'static Platform, range: Range) -> Self { + assert!( + range.start < range.end, + "Vmem: range must be non-empty ({:#x}..{:#x})", + range.start, + range.end, + ); + assert!( + range.start.is_multiple_of(ALIGN) && range.end.is_multiple_of(ALIGN), + "Vmem: range bounds must be aligned to {ALIGN} bytes ({:#x}..{:#x})", + range.start, + range.end, + ); + assert!( + range.start >= Platform::TASK_ADDR_MIN, + "Vmem: range start {:#x} < TASK_ADDR_MIN {:#x}", + range.start, + Platform::TASK_ADDR_MIN, + ); + assert!( + range.end <= Platform::TASK_ADDR_MAX, + "Vmem: range end {:#x} > TASK_ADDR_MAX {:#x}", + range.end, + Platform::TASK_ADDR_MAX, + ); let mut vmem = Self { vmas: RangeMap::new(), + brk_base: 0, brk: 0, + brk_frontier: 0, platform, + addr_min: range.start, + addr_max: range.end, }; for each in platform.reserved_pages() { assert!( each.start % ALIGN == 0 && each.end % ALIGN == 0, "Vmem: reserved range is not aligned to {ALIGN} bytes" ); + // Clip reserved ranges to the configured process range. Entries + // entirely outside addr_min..addr_max are skipped; partially + // overlapping entries are clamped. + let clamped_start = each.start.max(vmem.addr_min); + let clamped_end = each.end.min(vmem.addr_max); + if clamped_start >= clamped_end { + continue; + } vmem.vmas.insert( - each.start..each.end, - VmArea { - flags: VmFlags::empty(), - is_file_backed: false, - }, + clamped_start..clamped_end, + VmArea::new(VmFlags::empty(), false), ); } vmem } + /// Lower bound (inclusive) of the VA range managed by this Vmem. + pub(super) fn addr_min(&self) -> usize { + self.addr_min + } + + /// Upper bound (exclusive) of the VA range managed by this Vmem. + pub(super) fn addr_max(&self) -> usize { + self.addr_max + } + /// Gets an iterator over all pairs of ([`Range`], [`VmArea`]), /// ordered by key range. pub(super) fn iter(&self) -> impl Iterator, &VmArea)> { @@ -421,8 +511,17 @@ impl + 'static, const ALIGN: usize> Vmem let start = r.start.max(range.start); let end = r.end.min(range.end); let new_range = PageRange::new(start, end).unwrap(); - unsafe { self.insert_mapping(new_range, vma, false, FixedAddressBehavior::Replace) } - .expect("failed to reset pages"); + unsafe { + self.insert_mapping( + new_range, + vma, + false, + vma.noreserve(), + FixedAddressBehavior::Replace, + false, + ) + } + .expect("failed to reset pages"); } if unmapped_error { Err(VmemResetError::AlreadyUnallocated) @@ -450,13 +549,15 @@ impl + 'static, const ALIGN: usize> Vmem suggested_range: PageRange, vma: VmArea, populate_pages_immediately: bool, + noreserve: bool, fixed_address_behavior: FixedAddressBehavior, + require_low_2g: bool, ) -> Result, AllocationError> { let (start, end) = (suggested_range.start, suggested_range.end); - if start < Platform::TASK_ADDR_MIN { + if start < self.addr_min { return Err(AllocationError::BelowMinAddress); } - if end > Platform::TASK_ADDR_MAX { + if end > self.addr_max { return Err(AllocationError::AboveMaxAddress); } let platform_fixed_address_behavior = match fixed_address_behavior { @@ -509,7 +610,7 @@ impl + 'static, const ALIGN: usize> Vmem MemoryRegionPermissions::from_bits(permissions).unwrap(), vma.flags.contains(VmFlags::VM_GROWSDOWN), populate_pages_immediately, - false, + noreserve, platform_fixed_address_behavior, ) .map_err(|err| match err { @@ -518,9 +619,20 @@ impl + 'static, const ALIGN: usize> Vmem })?; let new_start = ret.as_usize(); let new_end = new_start + suggested_range.len(); + // When the platform returns an address outside the managed VA range + // (e.g., VirtualAlloc chose a different location because the hinted + // address was occupied by the host), free the allocation and report + // out-of-memory instead of recording a VMA at an invalid address. + if new_start < self.addr_min + || new_end > self.addr_max + || (require_low_2g && new_end > MAP_32BIT_ADDR_LIMIT) + { + unsafe { + let _ = self.platform.deallocate_pages(new_start..new_end); + } + return Err(AllocationError::OutOfMemory); + } self.vmas.insert(new_start..new_end, vma); - debug_assert!(new_start >= Platform::TASK_ADDR_MIN); - debug_assert!(new_end <= Platform::TASK_ADDR_MAX); Ok(ret) } @@ -567,30 +679,69 @@ impl + 'static, const ALIGN: usize> Vmem 0 }) .unwrap(); - let new_addr = self - .get_unmmaped_area( - suggested_address, - total_length, - flags.contains(CreatePagesFlags::FIXED_ADDR), - ) - .ok_or(AllocationError::OutOfMemory)?; - // new_addr must be ALIGN aligned - let new_range = PageRange::new(new_addr, new_addr + length.as_usize()).unwrap(); - unsafe { - self.insert_mapping( - new_range, - vma, - flags.contains(CreatePagesFlags::POPULATE_PAGES_IMMEDIATELY), - if flags.contains(CreatePagesFlags::FIXED_ADDR) { - if flags.contains(CreatePagesFlags::NOREPLACE) { - FixedAddressBehavior::NoReplace - } else { - FixedAddressBehavior::Replace + let fixed_addr = flags.contains(CreatePagesFlags::FIXED_ADDR); + let require_low_2g = flags.contains(CreatePagesFlags::LOW_2G); + let fixed_address_behavior = if fixed_addr { + if flags.contains(CreatePagesFlags::NOREPLACE) { + FixedAddressBehavior::NoReplace + } else { + FixedAddressBehavior::Replace + } + } else if require_low_2g { + FixedAddressBehavior::NoReplace + } else { + FixedAddressBehavior::Hint + }; + let mut next_top_down_max_start = None; + let mut pending_hint = if fixed_addr { None } else { suggested_address }; + loop { + let candidate_hint = if fixed_addr { + suggested_address + } else if next_top_down_max_start.is_none() { + pending_hint.take() + } else { + None + }; + let used_hint = candidate_hint.is_some(); + let new_addr = self + .get_unmmaped_area( + candidate_hint, + total_length, + fixed_addr, + require_low_2g, + next_top_down_max_start, + ) + .ok_or(AllocationError::OutOfMemory)?; + // new_addr must be ALIGN aligned + let new_range = PageRange::new(new_addr, new_addr + length.as_usize()).unwrap(); + match unsafe { + self.insert_mapping( + new_range, + vma, + flags.contains(CreatePagesFlags::POPULATE_PAGES_IMMEDIATELY), + flags.contains(CreatePagesFlags::NORESERVE), + fixed_address_behavior, + require_low_2g, + ) + } { + Ok(ret) => return Ok(ret), + // Non-fixed mappings treat the requested address as a hint. + // If the host allocator can't realize that hint inside the + // guest partition, fall back to another guest VA instead of + // surfacing ENOMEM immediately. + Err(AllocationError::AddressInUseByPlatform | AllocationError::OutOfMemory) + if !fixed_addr => + { + if used_hint { + continue; } - } else { - FixedAddressBehavior::Hint - }, - ) + let Some(max_start) = new_addr.checked_sub(ALIGN) else { + return Err(AllocationError::OutOfMemory); + }; + next_top_down_max_start = Some(max_start); + } + Err(err) => return Err(err), + } } } @@ -657,7 +808,14 @@ impl + 'static, const ALIGN: usize> Vmem // litebox mappings in this range, this may fail if there are // platform mappings in the way. match unsafe { - self.insert_mapping(range, *cur_vma, false, FixedAddressBehavior::NoReplace) + self.insert_mapping( + range, + *cur_vma, + false, + cur_vma.noreserve(), + FixedAddressBehavior::NoReplace, + false, + ) } { Ok(_) => {} Err(AllocationError::OutOfMemory) => return Err(VmemResizeError::OutOfMemory), @@ -670,7 +828,7 @@ impl + 'static, const ALIGN: usize> Vmem AllocationError::Unaligned | AllocationError::BelowMinAddress | AllocationError::AboveMaxAddress, - ) => unreachable!(), + ) => return Err(VmemResizeError::OutOfRange), } return Ok(()); } @@ -715,7 +873,7 @@ impl + 'static, const ALIGN: usize> Vmem unimplemented!("file-backed mapping move is not supported yet"); } let new_addr = self - .get_unmmaped_area(suggested_new_address, new_size, false) + .get_unmmaped_area(suggested_new_address, new_size, false, false, None) .ok_or(VmemMoveError::OutOfMemory)?; let new_range = PageRange::::new(new_addr, new_addr + new_size.as_usize()).unwrap(); let new_addr = unsafe { @@ -792,6 +950,7 @@ impl + 'static, const ALIGN: usize> Vmem VmArea { flags: new_flags, is_file_backed: vma.is_file_backed, + noreserve: vma.noreserve, }, ); if !before.is_empty() { @@ -834,20 +993,22 @@ impl + 'static, const ALIGN: usize> Vmem ) -> Result, MappingError> { let shared = flags.contains(CreatePagesFlags::SHARED); let file_backed = flags.contains(CreatePagesFlags::MAP_FILE); + let fd_writable = flags.contains(CreatePagesFlags::FD_WRITABLE); unsafe { self.create_mapping( suggested_new_address, length, VmArea::new( VmFlags::from(perms) - | VmFlags::may_flags_for_mapping(shared, file_backed) + | VmFlags::may_flags_for_mapping(shared, file_backed, fd_writable) | if flags.contains(CreatePagesFlags::IS_STACK) { VmFlags::VM_GROWSDOWN } else { VmFlags::empty() }, flags.contains(CreatePagesFlags::MAP_FILE), - ), + ) + .with_noreserve(flags.contains(CreatePagesFlags::NORESERVE)), flags, ) } @@ -889,37 +1050,65 @@ impl + 'static, const ALIGN: usize> Vmem suggested_address: Option>, length: NonZeroPageSize, fixed_addr: bool, + require_low_2g: bool, + max_start: Option, ) -> Option { let size = length.as_usize(); - if size > Platform::TASK_ADDR_MAX { + let range_span = self.addr_max - self.addr_min; + if size > range_span { return None; } if let Some(suggested_address) = suggested_address { - if (Platform::TASK_ADDR_MAX - size) < suggested_address.0 { - return None; + let hint = suggested_address.0; + let hint_end = hint.saturating_add(size); + let addr_max = if require_low_2g { + self.addr_max.min(MAP_32BIT_ADDR_LIMIT) + } else { + self.addr_max + }; + let in_range = hint >= self.addr_min + && hint_end <= addr_max + && max_start.is_none_or(|max_start| hint <= max_start); + + if fixed_addr { + // Fixed: always honour the hint; insert_mapping will reject + // out-of-range addresses with BelowMinAddress / AboveMaxAddress. + return Some(hint); } - if fixed_addr - || !self - .vmas - .overlaps(&(suggested_address.0..(suggested_address.0 + size))) - { - return Some(suggested_address.0); + // Non-fixed: use the hint only when it fits inside the range and + // doesn't overlap existing mappings. Otherwise fall through to the + // top-down search. + if in_range && !self.vmas.overlaps(&(hint..hint_end)) { + return Some(hint); } } else if fixed_addr { // MAP_FIXED with addr=0: return 0 so insert_mapping rejects it - // via the TASK_ADDR_MIN check (BelowMinAddress → EPERM). + // via the addr_min check (BelowMinAddress → EPERM). return Some(0); } // top down // 1. check [last_end, TASK_SIZE_MAX) - let (low_limit, high_limit) = ( - Platform::TASK_ADDR_MIN, - Platform::TASK_ADDR_MAX - length.as_usize(), - ); - debug_assert!(Platform::TASK_ADDR_MIN % ALIGN == 0); - debug_assert!(Platform::TASK_ADDR_MAX % ALIGN == 0); - let last_end = self.vmas.last_range_value().map_or(low_limit, |r| r.0.end); + let low_limit = self.addr_min; + let mut high_limit = self.addr_max - length.as_usize(); + if require_low_2g { + if low_limit >= MAP_32BIT_ADDR_LIMIT || size > MAP_32BIT_ADDR_LIMIT - low_limit { + return None; + } + high_limit = high_limit.min(MAP_32BIT_ADDR_LIMIT - size); + } + if let Some(max_start) = max_start { + high_limit = high_limit.min(max_start); + } + debug_assert!(self.addr_min.is_multiple_of(ALIGN)); + debug_assert!(self.addr_max.is_multiple_of(ALIGN)); + let search_end = high_limit + size; + let last_end = self + .vmas + .iter() + .rev() + .find(|(r, _)| r.start < search_end) + .map_or(low_limit, |(r, _)| r.end); if last_end <= high_limit { return Some(high_limit); } @@ -982,6 +1171,8 @@ pub(super) enum VmemResizeError { RangeOccupied(Range), #[error("out of memory")] OutOfMemory, + #[error("expanded range exceeds address space limits")] + OutOfRange, } /// Error for moving mappings @@ -1031,11 +1222,21 @@ pub enum MappingError { /// Enable [`super::PageManager`] to handle page faults if its platform implements this trait pub trait VmemPageFaultHandler { + /// Whether the platform wants guest user-mode page faults to go through + /// [`super::PageManager::handle_page_fault`] in addition to kernel faults. + /// + /// Userland Windows uses this to service grow-down guest stacks through the + /// shared PageManager bookkeeping even though the fault originated from + /// guest user mode rather than shim code. + const HANDLE_USER_PAGE_FAULTS: bool = false; + /// Handle a page fault for the given address. /// /// # Safety /// - /// This should only be called from the kernel page fault handler. + /// This should only be called from platform page-fault recovery paths that + /// have already classified the fault and determined that the PageManager + /// should attempt to service it. unsafe fn handle_page_fault( &self, fault_addr: usize, diff --git a/litebox/src/mm/mod.rs b/litebox/src/mm/mod.rs index a46b3c855..be1208cf5 100644 --- a/litebox/src/mm/mod.rs +++ b/litebox/src/mm/mod.rs @@ -34,16 +34,41 @@ where Platform: RawSyncPrimitivesProvider + PageManagementProvider, { vmem: RwLock>, + /// Lower bound (inclusive) of the VA range (cached for lock-free checks). + addr_min: usize, + /// Upper bound (exclusive) of the VA range (cached for lock-free checks). + addr_max: usize, } impl PageManager where Platform: RawSyncPrimitivesProvider + PageManagementProvider, { - /// Create a new `PageManager` instance. - pub fn new(litebox: &LiteBox) -> Self { - let vmem = RwLock::new(linux::Vmem::new(litebox.x.platform)); - Self { vmem } + /// Lower bound (inclusive) of the managed VA range. + pub fn addr_min(&self) -> usize { + self.addr_min + } + + /// Upper bound (exclusive) of the managed VA range. + pub fn addr_max(&self) -> usize { + self.addr_max + } + + /// Create a new `PageManager` instance for the given VA `range`. + /// + /// For a single-process setup, pass the full platform range + /// `Platform::TASK_ADDR_MIN..Platform::TASK_ADDR_MAX`. For multi-process + /// setups, pass the sub-range obtained from + /// [`AddressSpaceProvider::address_space_range()`](crate::platform::AddressSpaceProvider::address_space_range). + pub fn new(litebox: &LiteBox, range: Range) -> Self { + let addr_min = range.start; + let addr_max = range.end; + let vmem = RwLock::new(linux::Vmem::new(litebox.x.platform, range)); + Self { + vmem, + addr_min, + addr_max, + } } /// Create a mapping with the given flags. @@ -289,7 +314,40 @@ where pub fn set_initial_brk(&self, brk: usize) { let mut vmem = self.vmem.write(); assert_eq!(vmem.brk, 0, "initial brk is already set"); + vmem.brk_base = brk; vmem.brk = brk; + vmem.brk_frontier = brk.next_multiple_of(linux::PAGE_SIZE); + } + + /// Returns the current logical program break without modifying mappings. + pub fn current_brk(&self) -> usize { + self.vmem.read().brk + } + + /// Returns the current page-aligned heap frontier that `brk` growth uses. + pub fn current_brk_frontier(&self) -> usize { + self.vmem.read().brk_frontier + } + + /// Advance the minimum program break, current break, and heap frontier to + /// at least `min_brk` without allocating new heap pages. + /// + /// This is used only when already-mapped non-heap pages, such as a + /// trampoline at the heap frontier, occupy space that future `brk` growth + /// must skip over. The skipped gap becomes a persistent floor: later + /// `brk` shrinks must not re-enter it, or subsequent growth would collide + /// with the same non-heap pages again. + pub fn ensure_brk_past(&self, min_brk: usize) { + let mut vmem = self.vmem.write(); + if vmem.brk_base < min_brk { + vmem.brk_base = min_brk; + } + if vmem.brk < min_brk { + vmem.brk = min_brk; + } + if vmem.brk_frontier < min_brk { + vmem.brk_frontier = min_brk; + } } /// Set the program break to the given address. @@ -319,10 +377,15 @@ where // Calling `brk` with 0 can be used to find the current location of the program break. return Ok(vmem.brk); } + if brk < vmem.brk_base { + // Linux keeps the current break unchanged when the request is + // below the permitted heap base. + return Ok(vmem.brk); + } - let old_brk = vmem.brk.next_multiple_of(linux::PAGE_SIZE); + let old_brk = vmem.brk_frontier; let new_brk = brk.next_multiple_of(linux::PAGE_SIZE); - if vmem.brk >= brk { + if new_brk < old_brk { // Shrink the memory region let brk = match unsafe { vmem.remove_mapping( @@ -331,6 +394,7 @@ where } { Ok(()) => { vmem.brk = brk; + vmem.brk_frontier = new_brk; brk } Err(_) => { @@ -340,20 +404,23 @@ where return Ok(brk); } - if vmem.overlapping(old_brk..new_brk).next().is_some() { - return Err(MappingError::OutOfMemory); - } - if let Some(range) = PageRange::::new(old_brk, new_brk) { - let (suggested_address, length) = range.start_and_length(); - let perms = MemoryRegionPermissions::READ | MemoryRegionPermissions::WRITE; - unsafe { - vmem.create_pages( - Some(suggested_address), - length, - CreatePagesFlags::FIXED_ADDR | CreatePagesFlags::POPULATE_PAGES_IMMEDIATELY, - perms, - ) - }?; + if new_brk > old_brk { + if vmem.overlapping(old_brk..new_brk).next().is_some() { + return Err(MappingError::OutOfMemory); + } + if let Some(range) = PageRange::::new(old_brk, new_brk) { + let (suggested_address, length) = range.start_and_length(); + let perms = MemoryRegionPermissions::READ | MemoryRegionPermissions::WRITE; + unsafe { + vmem.create_pages( + Some(suggested_address), + length, + CreatePagesFlags::FIXED_ADDR | CreatePagesFlags::POPULATE_PAGES_IMMEDIATELY, + perms, + ) + }?; + } + vmem.brk_frontier = new_brk; } vmem.brk = brk; Ok(brk) @@ -381,7 +448,9 @@ where // reset brk let mut vmem = self.vmem.write(); + vmem.brk_base = 0; vmem.brk = 0; + vmem.brk_frontier = 0; Ok(()) } @@ -442,6 +511,24 @@ where Err(linux::VmemResizeError::NotExist(_)) => Err(RemapError::AlreadyUnallocated), Err(linux::VmemResizeError::InvalidAddr { .. }) => Err(RemapError::AlreadyAllocated), Err(linux::VmemResizeError::OutOfMemory) => Err(RemapError::OutOfMemory), + Err(linux::VmemResizeError::OutOfRange) => { + // Expanded range exceeds address space limits — try moving + if !may_move { + return Err(RemapError::OutOfMemory); + } + match unsafe { + vmem.move_mappings( + old_range, + None, + NonZeroPageSize::new(new_size).ok_or(RemapError::Unaligned)?, + ) + } { + Ok(new_addr) => Ok(new_addr), + Err(linux::VmemMoveError::OutOfMemory) => Err(RemapError::OutOfMemory), + Err(linux::VmemMoveError::UnAligned) => Err(RemapError::Unaligned), + Err(linux::VmemMoveError::RemapError(err)) => Err(err), + } + } } } @@ -605,6 +692,7 @@ where /// /// The `range` must be an already-mapped region with the given `permissions`. #[must_use] + #[allow(clippy::fn_params_excessive_bools)] pub unsafe fn register_existing_mapping( &self, range: PageRange, @@ -612,9 +700,11 @@ where is_file_backed: bool, replace: bool, shared: bool, + fd_writable: bool, ) -> Option<()> { let vma = VmArea::new( - VmFlags::from(permissions) | VmFlags::may_flags_for_mapping(shared, is_file_backed), + VmFlags::from(permissions) + | VmFlags::may_flags_for_mapping(shared, is_file_backed, fd_writable), is_file_backed, ); let mut vmem = self.vmem.write(); @@ -665,14 +755,17 @@ where /// /// # Safety /// - /// This should only be called from the kernel page fault handler. + /// This must only be called while servicing a real page fault for the + /// current address space, and `fault_addr` / `error_code` must come from + /// that trap context. Depending on the platform, that may be a kernel fault + /// handler or an opted-in user-mode fault path. pub unsafe fn handle_page_fault( &self, fault_addr: usize, error_code: u64, ) -> Result<(), PageFaultError> { let fault_addr = fault_addr & !(ALIGN - 1); - if !(Platform::TASK_ADDR_MIN..Platform::TASK_ADDR_MAX).contains(&fault_addr) { + if !(self.addr_min..self.addr_max).contains(&fault_addr) { return Err(PageFaultError::AccessError("Invalid address")); } @@ -680,7 +773,7 @@ where // Find the range closest to the fault address let (start, vma) = { let (r, vma) = vmem - .overlapping(fault_addr..Platform::TASK_ADDR_MAX) + .overlapping(fault_addr..vmem.addr_max()) .next() .ok_or(PageFaultError::AccessError("no mapping"))?; (r.start, *vma) @@ -692,7 +785,7 @@ where } if !vmem - .overlapping(Platform::TASK_ADDR_MIN..fault_addr) + .overlapping(vmem.addr_min()..fault_addr) .next_back() .is_none_or(|(prev_range, prev_vma)| { // Enforce gap between stack and other preceding non-stack mappings. @@ -708,15 +801,19 @@ where let Some(range) = PageRange::new(fault_addr, start) else { unreachable!() }; - if let Err(err) = unsafe { + if unsafe { vmem.insert_mapping( range, vma, false, + vma.noreserve(), crate::platform::page_mgmt::FixedAddressBehavior::NoReplace, + false, ) - } { - unimplemented!("failed to grow stack: {:?}", err) + } + .is_err() + { + return Err(PageFaultError::AllocationFailed); } } diff --git a/litebox/src/mm/tests.rs b/litebox/src/mm/tests.rs index a6e9f772e..198b617a2 100644 --- a/litebox/src/mm/tests.rs +++ b/litebox/src/mm/tests.rs @@ -86,7 +86,10 @@ fn collect_mappings(vmm: &Vmem) -> Vec fn test_vmm_mapping() { let start_addr: usize = 0x1_0000; let range = PageRange::new(start_addr, start_addr + 12 * PAGE_SIZE).unwrap(); - let mut vmm = Vmem::new(&DummyVmemBackend); + let mut vmm = Vmem::new( + &DummyVmemBackend, + DummyVmemBackend::TASK_ADDR_MIN..DummyVmemBackend::TASK_ADDR_MAX, + ); // [] unsafe { @@ -97,7 +100,9 @@ fn test_vmm_mapping() { false, ), false, + false, crate::platform::page_mgmt::FixedAddressBehavior::Replace, + false, ) } .unwrap(); diff --git a/litebox_platform_linux_kernel/src/mm/tests.rs b/litebox_platform_linux_kernel/src/mm/tests.rs index 3987b530b..033d7908a 100644 --- a/litebox_platform_linux_kernel/src/mm/tests.rs +++ b/litebox_platform_linux_kernel/src/mm/tests.rs @@ -218,7 +218,11 @@ fn test_vmm_page_fault() { let p4 = PageTableAllocator::::allocate_frame(true).unwrap(); let platform = MockKernel::new(p4.start_address()); let litebox = LiteBox::new(platform); - let vmm = PageManager::<_, PAGE_SIZE>::new(&litebox); + let vmm = PageManager::<_, PAGE_SIZE>::new( + &litebox, + >::TASK_ADDR_MIN + ..>::TASK_ADDR_MAX, + ); unsafe { assert_eq!( vmm.create_writable_pages( diff --git a/litebox_platform_lvbs/src/mm/tests.rs b/litebox_platform_lvbs/src/mm/tests.rs index 7dccc55e7..333bfdc06 100644 --- a/litebox_platform_lvbs/src/mm/tests.rs +++ b/litebox_platform_lvbs/src/mm/tests.rs @@ -238,7 +238,11 @@ fn test_vmm_page_fault() { x86_64::PhysAddr::new(0), ); let litebox = LiteBox::new(platform); - let vmm = PageManager::<_, PAGE_SIZE>::new(&litebox); + let vmm = PageManager::<_, PAGE_SIZE>::new( + &litebox, + >::TASK_ADDR_MIN + ..>::TASK_ADDR_MAX, + ); unsafe { assert_eq!( vmm.create_writable_pages( diff --git a/litebox_shim_linux/src/lib.rs b/litebox_shim_linux/src/lib.rs index 59c10023f..ae34f4792 100644 --- a/litebox_shim_linux/src/lib.rs +++ b/litebox_shim_linux/src/lib.rs @@ -191,7 +191,11 @@ impl LinuxShimBuilder { net.set_platform_interaction(litebox::net::PlatformInteraction::Manual); let global = Arc::new(GlobalState { platform: self.platform, - pm: PageManager::new(&self.litebox), + pm: PageManager::new( + &self.litebox, + >::TASK_ADDR_MIN + ..>::TASK_ADDR_MAX, + ), futex_manager: FutexManager::new(), pipes: Pipes::new(&self.litebox), net: litebox::sync::Mutex::new(net), diff --git a/litebox_shim_linux/src/syscalls/mm.rs b/litebox_shim_linux/src/syscalls/mm.rs index ab6069b24..c65f3957a 100644 --- a/litebox_shim_linux/src/syscalls/mm.rs +++ b/litebox_shim_linux/src/syscalls/mm.rs @@ -238,6 +238,7 @@ impl Task { true, fixed_behavior == FixedAddressBehavior::Replace, flags.contains(MapFlags::MAP_SHARED), + false, ) } .unwrap(); diff --git a/litebox_shim_optee/src/lib.rs b/litebox_shim_optee/src/lib.rs index 9b68fb5e7..38dde9801 100644 --- a/litebox_shim_optee/src/lib.rs +++ b/litebox_shim_optee/src/lib.rs @@ -142,7 +142,11 @@ impl OpteeShimBuilder { pub fn build(self) -> OpteeShim { let global = Arc::new(GlobalState { platform: self.platform, - pm: PageManager::new(&self.litebox), + pm: PageManager::new( + &self.litebox, + >::TASK_ADDR_MIN + ..>::TASK_ADDR_MAX, + ), _litebox: self.litebox, ta_uuid_map: TaUuidMap::new(), });