microsoft · mazong1123 · Mar 25, 2026 · Mar 25, 2026
diff --git a/src/injector_core/common.rs b/src/injector_core/common.rs
@@ -213,33 +213,47 @@ fn allocate_jit_memory_windows(_src: &FuncPtrInternal, code_size: usize) -> *mut
 
     #[cfg(target_arch = "x86_64")]
     {
-        let max_range: usize = 0x8000_0000; // ±2GB
-        let original_addr = _src.as_ptr() as usize;
-        let page_size = unsafe { get_page_size() };
-        let mut addr = original_addr.saturating_sub(max_range);
-
-        while addr <= original_addr + max_range {
-            let ptr = unsafe {
-                VirtualAlloc(
-                    addr as *mut c_void,
-                    code_size,
-                    MEM_COMMIT | MEM_RESERVE,
-                    PAGE_EXECUTE_READWRITE,
-                )
-            };
-
-            if !ptr.is_null() {
-                let allocated = ptr as usize;
-                if allocated.abs_diff(original_addr) <= max_range {
-                    return ptr as *mut u8;
+        let max_range: u64 = 0x8000_0000; // ±2GB
+        let original_addr = _src.as_ptr() as u64;
+        let page_size = unsafe { get_page_size() as u64 };
+
+        // Search outward from the function address to find the CLOSEST free page.
+        // This avoids allocating far from the function (e.g., in/near stack memory),
+        // which could disrupt the stack guard page and cause STATUS_STACK_OVERFLOW.
+        let mut offset: u64 = 0;
+        while offset <= max_range {
+            for &dir in &[1i64, -1i64] {
+                let hint = if dir > 0 {
+                    original_addr.checked_add(offset)
+                } else if offset > 0 {
+                    original_addr.checked_sub(offset)
                 } else {
-                    unsafe {
-                        VirtualFree(ptr, 0, MEM_RELEASE);
+                    continue; // Already tried offset=0 with dir=1
+                };
+
+                let Some(hint_addr) = hint else { continue };
+
+                let ptr = unsafe {
+                    VirtualAlloc(
+                        hint_addr as *mut c_void,
+                        code_size,
+                        MEM_COMMIT | MEM_RESERVE,
+                        PAGE_EXECUTE_READWRITE,
+                    )
+                };
+                if !ptr.is_null() {
+                    let allocated = ptr as u64;
+                    let diff = allocated.abs_diff(original_addr);
+                    if diff <= max_range {
+                        return ptr as *mut u8;
+                    } else {
+                        unsafe {
+                            VirtualFree(ptr, 0, MEM_RELEASE);
+                        }
                     }
                 }
             }
-
-            addr += page_size;
+            offset += page_size;
         }
 
         panic!("Failed to allocate executable memory within ±2GB of original function address on x86_64 Windows");
@@ -496,3 +510,107 @@ unsafe fn clear_cache(start: *mut u8, end: *mut u8) {
         core::arch::asm!("dsb sy", "isb", options(nostack, nomem));
     }
 }
+
+#[cfg(test)]
+#[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))]
+mod tests {
+    use super::*;
+
+    /// A dummy function used as the "source" address for JIT allocation tests.
+    #[inline(never)]
+    fn dummy_target_function() -> i32 {
+        std::hint::black_box(42)
+    }
+
+    /// Verify that `allocate_jit_memory` returns an address close to the source function,
+    /// not at the far end of the ±2GB range where it could collide with the stack.
+    ///
+    /// The old x86_64 Windows implementation scanned linearly from `func_addr - 2GB`,
+    /// which could return memory near the stack guard pages. The fixed implementation
+    /// searches outward from the function address, so the result should be much closer.
+    #[test]
+    fn test_jit_allocation_is_close_to_source() {
+        let func_ptr = unsafe {
+            FuncPtrInternal::new(
+                std::ptr::NonNull::new(dummy_target_function as *mut ()).unwrap(),
+            )
+        };
+        let func_addr = func_ptr.as_ptr() as u64;
+
+        let jit_ptr = allocate_jit_memory(&func_ptr, 256);
+        assert!(!jit_ptr.is_null(), "JIT allocation should succeed");
+
+        let jit_addr = jit_ptr as u64;
+        let distance = func_addr.abs_diff(jit_addr);
+
+        // The allocation should be relatively close — within 128MB.
+        // The old buggy code would often return addresses ~2GB away, near the stack.
+        let max_acceptable_distance: u64 = 128 * 1024 * 1024; // 128MB
+        assert!(
+            distance <= max_acceptable_distance,
+            "JIT memory should be allocated close to the function. \
+             Function at {func_addr:#x}, JIT at {jit_addr:#x}, distance: {distance} bytes ({} MB). \
+             Expected within {max_acceptable_distance} bytes ({} MB).",
+            distance / (1024 * 1024),
+            max_acceptable_distance / (1024 * 1024),
+        );
+
+        // Clean up
+        unsafe {
+            #[cfg(any(target_os = "linux", target_os = "macos"))]
+            {
+                libc::munmap(jit_ptr as *mut c_void, 256);
+            }
+            #[cfg(target_os = "windows")]
+            {
+                VirtualFree(jit_ptr as *mut c_void, 0, MEM_RELEASE);
+            }
+        }
+    }
+
+    /// Verify that JIT allocation does NOT land in the current thread's stack region.
+    /// This directly tests the root cause of the STATUS_STACK_OVERFLOW crash: the old
+    /// algorithm could allocate JIT memory in/near the stack, disrupting the guard page.
+    #[test]
+    fn test_jit_allocation_not_in_stack_region() {
+        let func_ptr = unsafe {
+            FuncPtrInternal::new(
+                std::ptr::NonNull::new(dummy_target_function as *mut ()).unwrap(),
+            )
+        };
+
+        // Use a stack local's address to approximate the stack location
+        let stack_local: u64 = 0;
+        let stack_addr = &stack_local as *const u64 as u64;
+
+        let jit_ptr = allocate_jit_memory(&func_ptr, 256);
+        assert!(!jit_ptr.is_null(), "JIT allocation should succeed");
+
+        let jit_addr = jit_ptr as u64;
+        // Stack on Windows x86_64 is typically 1-8MB. Use a conservative 16MB guard zone.
+        let stack_guard_zone: u64 = 16 * 1024 * 1024;
+        let distance_to_stack = jit_addr.abs_diff(stack_addr);
+
+        assert!(
+            distance_to_stack > stack_guard_zone,
+            "JIT memory should NOT be near the stack! \
+             JIT at {jit_addr:#x}, stack approx at {stack_addr:#x}, \
+             distance: {distance_to_stack} bytes ({} MB). \
+             Must be > {stack_guard_zone} bytes ({} MB) from stack.",
+            distance_to_stack / (1024 * 1024),
+            stack_guard_zone / (1024 * 1024),
+        );
+
+        // Clean up
+        unsafe {
+            #[cfg(any(target_os = "linux", target_os = "macos"))]
+            {
+                libc::munmap(jit_ptr as *mut c_void, 256);
+            }
+            #[cfg(target_os = "windows")]
+            {
+                VirtualFree(jit_ptr as *mut c_void, 0, MEM_RELEASE);
+            }
+        }
+    }
+}
diff --git a/tests/stack_safety.rs b/tests/stack_safety.rs
@@ -0,0 +1,121 @@
+// Test that JIT memory allocation does not interfere with stack growth.
+//
+// The old x86_64 Windows implementation of `allocate_jit_memory` scanned linearly
+// from `func_addr - 2GB`, which could allocate memory in/near the stack region,
+// disrupting the stack guard page and causing STATUS_STACK_OVERFLOW (0xc00000fd)
+// during parallel test execution.
+//
+// These tests verify that after patching functions, deep stack usage still works.
+#![cfg(any(target_arch = "x86_64", target_arch = "aarch64", target_arch = "arm"))]
+
+use std::sync::{Arc, Barrier};
+use std::thread;
+
+use injectorpp::interface::injector::*;
+
+#[inline(never)]
+fn get_value_stack_test() -> i32 {
+    std::hint::black_box(1)
+}
+
+#[inline(never)]
+fn get_other_value_stack_test() -> i32 {
+    std::hint::black_box(2)
+}
+
+#[inline(never)]
+fn is_enabled_stack_test() -> bool {
+    std::hint::black_box(false)
+}
+
+/// Consume stack space via recursion. Each frame is ~256 bytes due to the array.
+/// At depth 2000, this uses ~512KB of the thread's stack.
+#[inline(never)]
+fn consume_stack(depth: u32) -> u64 {
+    if depth == 0 {
+        return 1;
+    }
+    // Force the compiler to keep a sizable stack frame.
+    let buf = std::hint::black_box([0u8; 256]);
+    let result = consume_stack(depth - 1);
+    std::hint::black_box(buf[0] as u64) + result
+}
+
+/// After patching a function, verify that deep recursion still works.
+/// With the old buggy JIT allocator, the stack guard page could be disrupted,
+/// causing a stack overflow even at moderate recursion depths.
+#[test]
+fn test_stack_growth_works_after_patching() {
+    let mut injector = InjectorPP::new();
+    injector
+        .when_called(injectorpp::func!(fn(get_value_stack_test)() -> i32))
+        .will_execute(injectorpp::fake!(
+            func_type: fn() -> i32,
+            returns: 42
+        ));
+
+    // Verify the fake works
+    assert_eq!(get_value_stack_test(), 42);
+
+    // Now do deep recursion — this should NOT cause STATUS_STACK_OVERFLOW.
+    // If JIT memory was allocated in the stack region (old bug), this would crash.
+    let result = consume_stack(2000);
+    assert!(result > 0, "Deep recursion should succeed after patching");
+}
+
+/// Multiple threads concurrently patch different functions and then exercise
+/// deep stack usage. This replicates the conditions of the original crash:
+/// parallel tests + injectorpp patching + significant stack consumption.
+#[test]
+fn test_concurrent_patching_with_deep_stack_usage() {
+    let thread_count = 8;
+    let barrier = Arc::new(Barrier::new(thread_count));
+    let mut handles = Vec::new();
+
+    for i in 0..thread_count {
+        let b = barrier.clone();
+        handles.push(thread::spawn(move || {
+            let mut injector = InjectorPP::new();
+
+            // Each thread patches a function
+            match i % 3 {
+                0 => {
+                    injector
+                        .when_called(injectorpp::func!(fn(get_value_stack_test)() -> i32))
+                        .will_execute(injectorpp::fake!(
+                            func_type: fn() -> i32,
+                            returns: 100
+                        ));
+                }
+                1 => {
+                    injector
+                        .when_called(injectorpp::func!(fn(get_other_value_stack_test)() -> i32))
+                        .will_execute(injectorpp::fake!(
+                            func_type: fn() -> i32,
+                            returns: 200
+                        ));
+                }
+                _ => {
+                    injector
+                        .when_called(injectorpp::func!(fn(is_enabled_stack_test)() -> bool))
+                        .will_return_boolean(true);
+                }
+            };
+
+            // Synchronize: all threads have patched before anyone recurses
+            b.wait();
+
+            // Deep recursion — would crash if JIT memory disrupted the stack guard page
+            let result = consume_stack(1500);
+            assert!(
+                result > 0,
+                "Thread {i} should complete deep recursion without stack overflow"
+            );
+        }));
+    }
+
+    for (i, h) in handles.into_iter().enumerate() {
+        h.join()
+            .unwrap_or_else(|_| panic!("Thread {i} panicked — possible stack overflow"));
+    }
+}