From 3a7607bd01ef0152aba6d0bd85c6ba8fd2c7e7a2 Mon Sep 17 00:00:00 2001 From: Jingyu Ma Date: Wed, 25 Mar 2026 10:53:13 -0700 Subject: [PATCH] [Windows] Search for closest free page in x64 JIT memory allocation The old x86_64 Windows implementation of allocate_jit_memory scanned linearly from func_addr - 2GB upward, which often allocated memory ~2GB away from the function. This could land in/near the stack region, disrupting the stack guard page and causing STATUS_STACK_OVERFLOW (0xc00000fd) during parallel test execution. This change replaces the linear scan with a bidirectional closest-first search (matching the existing aarch64 Windows and Unix implementations). The allocator now searches outward from the function address at +offset and -offset, finding the closest available page first. This is an improved version of PR #122 that fixes two additional issues: - Searches both directions (not just downward) for robustness - Avoids an infinite loop when checked_sub fails by keeping offset increment outside the inner direction loop Also adds tests: - Unit test verifying JIT allocation is close to source (<128MB, not ~2GB) - Unit test verifying JIT allocation is not near the stack region - Integration test for stack growth after patching - Integration test for concurrent patching with deep stack usage (8 threads) Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/injector_core/common.rs | 164 +++++++++++++++++++++++++++++++----- tests/stack_safety.rs | 121 ++++++++++++++++++++++++++ 2 files changed, 262 insertions(+), 23 deletions(-) create mode 100644 tests/stack_safety.rs diff --git a/src/injector_core/common.rs b/src/injector_core/common.rs index c72f42c..fc6d3b4 100644 --- a/src/injector_core/common.rs +++ b/src/injector_core/common.rs @@ -213,33 +213,47 @@ fn allocate_jit_memory_windows(_src: &FuncPtrInternal, code_size: usize) -> *mut #[cfg(target_arch = "x86_64")] { - let max_range: usize = 0x8000_0000; // ±2GB - let original_addr = _src.as_ptr() as usize; - let page_size = unsafe { get_page_size() }; - let mut addr = original_addr.saturating_sub(max_range); - - while addr <= original_addr + max_range { - let ptr = unsafe { - VirtualAlloc( - addr as *mut c_void, - code_size, - MEM_COMMIT | MEM_RESERVE, - PAGE_EXECUTE_READWRITE, - ) - }; - - if !ptr.is_null() { - let allocated = ptr as usize; - if allocated.abs_diff(original_addr) <= max_range { - return ptr as *mut u8; + let max_range: u64 = 0x8000_0000; // ±2GB + let original_addr = _src.as_ptr() as u64; + let page_size = unsafe { get_page_size() as u64 }; + + // Search outward from the function address to find the CLOSEST free page. + // This avoids allocating far from the function (e.g., in/near stack memory), + // which could disrupt the stack guard page and cause STATUS_STACK_OVERFLOW. + let mut offset: u64 = 0; + while offset <= max_range { + for &dir in &[1i64, -1i64] { + let hint = if dir > 0 { + original_addr.checked_add(offset) + } else if offset > 0 { + original_addr.checked_sub(offset) } else { - unsafe { - VirtualFree(ptr, 0, MEM_RELEASE); + continue; // Already tried offset=0 with dir=1 + }; + + let Some(hint_addr) = hint else { continue }; + + let ptr = unsafe { + VirtualAlloc( + hint_addr as *mut c_void, + code_size, + MEM_COMMIT | MEM_RESERVE, + PAGE_EXECUTE_READWRITE, + ) + }; + if !ptr.is_null() { + let allocated = ptr as u64; + let diff = allocated.abs_diff(original_addr); + if diff <= max_range { + return ptr as *mut u8; + } else { + unsafe { + VirtualFree(ptr, 0, MEM_RELEASE); + } } } } - - addr += page_size; + offset += page_size; } panic!("Failed to allocate executable memory within ±2GB of original function address on x86_64 Windows"); @@ -496,3 +510,107 @@ unsafe fn clear_cache(start: *mut u8, end: *mut u8) { core::arch::asm!("dsb sy", "isb", options(nostack, nomem)); } } + +#[cfg(test)] +#[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] +mod tests { + use super::*; + + /// A dummy function used as the "source" address for JIT allocation tests. + #[inline(never)] + fn dummy_target_function() -> i32 { + std::hint::black_box(42) + } + + /// Verify that `allocate_jit_memory` returns an address close to the source function, + /// not at the far end of the ±2GB range where it could collide with the stack. + /// + /// The old x86_64 Windows implementation scanned linearly from `func_addr - 2GB`, + /// which could return memory near the stack guard pages. The fixed implementation + /// searches outward from the function address, so the result should be much closer. + #[test] + fn test_jit_allocation_is_close_to_source() { + let func_ptr = unsafe { + FuncPtrInternal::new( + std::ptr::NonNull::new(dummy_target_function as *mut ()).unwrap(), + ) + }; + let func_addr = func_ptr.as_ptr() as u64; + + let jit_ptr = allocate_jit_memory(&func_ptr, 256); + assert!(!jit_ptr.is_null(), "JIT allocation should succeed"); + + let jit_addr = jit_ptr as u64; + let distance = func_addr.abs_diff(jit_addr); + + // The allocation should be relatively close — within 128MB. + // The old buggy code would often return addresses ~2GB away, near the stack. + let max_acceptable_distance: u64 = 128 * 1024 * 1024; // 128MB + assert!( + distance <= max_acceptable_distance, + "JIT memory should be allocated close to the function. \ + Function at {func_addr:#x}, JIT at {jit_addr:#x}, distance: {distance} bytes ({} MB). \ + Expected within {max_acceptable_distance} bytes ({} MB).", + distance / (1024 * 1024), + max_acceptable_distance / (1024 * 1024), + ); + + // Clean up + unsafe { + #[cfg(any(target_os = "linux", target_os = "macos"))] + { + libc::munmap(jit_ptr as *mut c_void, 256); + } + #[cfg(target_os = "windows")] + { + VirtualFree(jit_ptr as *mut c_void, 0, MEM_RELEASE); + } + } + } + + /// Verify that JIT allocation does NOT land in the current thread's stack region. + /// This directly tests the root cause of the STATUS_STACK_OVERFLOW crash: the old + /// algorithm could allocate JIT memory in/near the stack, disrupting the guard page. + #[test] + fn test_jit_allocation_not_in_stack_region() { + let func_ptr = unsafe { + FuncPtrInternal::new( + std::ptr::NonNull::new(dummy_target_function as *mut ()).unwrap(), + ) + }; + + // Use a stack local's address to approximate the stack location + let stack_local: u64 = 0; + let stack_addr = &stack_local as *const u64 as u64; + + let jit_ptr = allocate_jit_memory(&func_ptr, 256); + assert!(!jit_ptr.is_null(), "JIT allocation should succeed"); + + let jit_addr = jit_ptr as u64; + // Stack on Windows x86_64 is typically 1-8MB. Use a conservative 16MB guard zone. + let stack_guard_zone: u64 = 16 * 1024 * 1024; + let distance_to_stack = jit_addr.abs_diff(stack_addr); + + assert!( + distance_to_stack > stack_guard_zone, + "JIT memory should NOT be near the stack! \ + JIT at {jit_addr:#x}, stack approx at {stack_addr:#x}, \ + distance: {distance_to_stack} bytes ({} MB). \ + Must be > {stack_guard_zone} bytes ({} MB) from stack.", + distance_to_stack / (1024 * 1024), + stack_guard_zone / (1024 * 1024), + ); + + // Clean up + unsafe { + #[cfg(any(target_os = "linux", target_os = "macos"))] + { + libc::munmap(jit_ptr as *mut c_void, 256); + } + #[cfg(target_os = "windows")] + { + VirtualFree(jit_ptr as *mut c_void, 0, MEM_RELEASE); + } + } + } +} diff --git a/tests/stack_safety.rs b/tests/stack_safety.rs new file mode 100644 index 0000000..7da17ce --- /dev/null +++ b/tests/stack_safety.rs @@ -0,0 +1,121 @@ +// Test that JIT memory allocation does not interfere with stack growth. +// +// The old x86_64 Windows implementation of `allocate_jit_memory` scanned linearly +// from `func_addr - 2GB`, which could allocate memory in/near the stack region, +// disrupting the stack guard page and causing STATUS_STACK_OVERFLOW (0xc00000fd) +// during parallel test execution. +// +// These tests verify that after patching functions, deep stack usage still works. +#![cfg(any(target_arch = "x86_64", target_arch = "aarch64", target_arch = "arm"))] + +use std::sync::{Arc, Barrier}; +use std::thread; + +use injectorpp::interface::injector::*; + +#[inline(never)] +fn get_value_stack_test() -> i32 { + std::hint::black_box(1) +} + +#[inline(never)] +fn get_other_value_stack_test() -> i32 { + std::hint::black_box(2) +} + +#[inline(never)] +fn is_enabled_stack_test() -> bool { + std::hint::black_box(false) +} + +/// Consume stack space via recursion. Each frame is ~256 bytes due to the array. +/// At depth 2000, this uses ~512KB of the thread's stack. +#[inline(never)] +fn consume_stack(depth: u32) -> u64 { + if depth == 0 { + return 1; + } + // Force the compiler to keep a sizable stack frame. + let buf = std::hint::black_box([0u8; 256]); + let result = consume_stack(depth - 1); + std::hint::black_box(buf[0] as u64) + result +} + +/// After patching a function, verify that deep recursion still works. +/// With the old buggy JIT allocator, the stack guard page could be disrupted, +/// causing a stack overflow even at moderate recursion depths. +#[test] +fn test_stack_growth_works_after_patching() { + let mut injector = InjectorPP::new(); + injector + .when_called(injectorpp::func!(fn(get_value_stack_test)() -> i32)) + .will_execute(injectorpp::fake!( + func_type: fn() -> i32, + returns: 42 + )); + + // Verify the fake works + assert_eq!(get_value_stack_test(), 42); + + // Now do deep recursion — this should NOT cause STATUS_STACK_OVERFLOW. + // If JIT memory was allocated in the stack region (old bug), this would crash. + let result = consume_stack(2000); + assert!(result > 0, "Deep recursion should succeed after patching"); +} + +/// Multiple threads concurrently patch different functions and then exercise +/// deep stack usage. This replicates the conditions of the original crash: +/// parallel tests + injectorpp patching + significant stack consumption. +#[test] +fn test_concurrent_patching_with_deep_stack_usage() { + let thread_count = 8; + let barrier = Arc::new(Barrier::new(thread_count)); + let mut handles = Vec::new(); + + for i in 0..thread_count { + let b = barrier.clone(); + handles.push(thread::spawn(move || { + let mut injector = InjectorPP::new(); + + // Each thread patches a function + match i % 3 { + 0 => { + injector + .when_called(injectorpp::func!(fn(get_value_stack_test)() -> i32)) + .will_execute(injectorpp::fake!( + func_type: fn() -> i32, + returns: 100 + )); + } + 1 => { + injector + .when_called(injectorpp::func!(fn(get_other_value_stack_test)() -> i32)) + .will_execute(injectorpp::fake!( + func_type: fn() -> i32, + returns: 200 + )); + } + _ => { + injector + .when_called(injectorpp::func!(fn(is_enabled_stack_test)() -> bool)) + .will_return_boolean(true); + } + }; + + // Synchronize: all threads have patched before anyone recurses + b.wait(); + + // Deep recursion — would crash if JIT memory disrupted the stack guard page + let result = consume_stack(1500); + assert!( + result > 0, + "Thread {i} should complete deep recursion without stack overflow" + ); + })); + } + + for (i, h) in handles.into_iter().enumerate() { + h.join() + .unwrap_or_else(|_| panic!("Thread {i} panicked — possible stack overflow")); + } +}