Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
164 changes: 141 additions & 23 deletions src/injector_core/common.rs
Original file line number Diff line number Diff line change
Expand Up @@ -213,33 +213,47 @@ fn allocate_jit_memory_windows(_src: &FuncPtrInternal, code_size: usize) -> *mut

#[cfg(target_arch = "x86_64")]
{
let max_range: usize = 0x8000_0000; // ±2GB
let original_addr = _src.as_ptr() as usize;
let page_size = unsafe { get_page_size() };
let mut addr = original_addr.saturating_sub(max_range);

while addr <= original_addr + max_range {
let ptr = unsafe {
VirtualAlloc(
addr as *mut c_void,
code_size,
MEM_COMMIT | MEM_RESERVE,
PAGE_EXECUTE_READWRITE,
)
};

if !ptr.is_null() {
let allocated = ptr as usize;
if allocated.abs_diff(original_addr) <= max_range {
return ptr as *mut u8;
let max_range: u64 = 0x8000_0000; // ±2GB
let original_addr = _src.as_ptr() as u64;
let page_size = unsafe { get_page_size() as u64 };

// Search outward from the function address to find the CLOSEST free page.
// This avoids allocating far from the function (e.g., in/near stack memory),
// which could disrupt the stack guard page and cause STATUS_STACK_OVERFLOW.
let mut offset: u64 = 0;
while offset <= max_range {
for &dir in &[1i64, -1i64] {
let hint = if dir > 0 {
original_addr.checked_add(offset)
} else if offset > 0 {
original_addr.checked_sub(offset)
} else {
unsafe {
VirtualFree(ptr, 0, MEM_RELEASE);
continue; // Already tried offset=0 with dir=1
};

let Some(hint_addr) = hint else { continue };

let ptr = unsafe {
VirtualAlloc(
hint_addr as *mut c_void,
code_size,
MEM_COMMIT | MEM_RESERVE,
PAGE_EXECUTE_READWRITE,
)
};
if !ptr.is_null() {
let allocated = ptr as u64;
let diff = allocated.abs_diff(original_addr);
if diff <= max_range {
return ptr as *mut u8;
} else {
unsafe {
VirtualFree(ptr, 0, MEM_RELEASE);
}
}
}
}

addr += page_size;
offset += page_size;
}

panic!("Failed to allocate executable memory within ±2GB of original function address on x86_64 Windows");
Expand Down Expand Up @@ -496,3 +510,107 @@ unsafe fn clear_cache(start: *mut u8, end: *mut u8) {
core::arch::asm!("dsb sy", "isb", options(nostack, nomem));
}
}

#[cfg(test)]
#[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))]
mod tests {
use super::*;

/// A dummy function used as the "source" address for JIT allocation tests.
#[inline(never)]
fn dummy_target_function() -> i32 {
std::hint::black_box(42)
}

/// Verify that `allocate_jit_memory` returns an address close to the source function,
/// not at the far end of the ±2GB range where it could collide with the stack.
///
/// The old x86_64 Windows implementation scanned linearly from `func_addr - 2GB`,
/// which could return memory near the stack guard pages. The fixed implementation
/// searches outward from the function address, so the result should be much closer.
#[test]
fn test_jit_allocation_is_close_to_source() {
let func_ptr = unsafe {
FuncPtrInternal::new(
std::ptr::NonNull::new(dummy_target_function as *mut ()).unwrap(),
)
};
let func_addr = func_ptr.as_ptr() as u64;

let jit_ptr = allocate_jit_memory(&func_ptr, 256);
assert!(!jit_ptr.is_null(), "JIT allocation should succeed");

let jit_addr = jit_ptr as u64;
let distance = func_addr.abs_diff(jit_addr);

// The allocation should be relatively close — within 128MB.
// The old buggy code would often return addresses ~2GB away, near the stack.
let max_acceptable_distance: u64 = 128 * 1024 * 1024; // 128MB
assert!(
distance <= max_acceptable_distance,
"JIT memory should be allocated close to the function. \
Function at {func_addr:#x}, JIT at {jit_addr:#x}, distance: {distance} bytes ({} MB). \
Expected within {max_acceptable_distance} bytes ({} MB).",
distance / (1024 * 1024),
max_acceptable_distance / (1024 * 1024),
);

// Clean up
unsafe {
#[cfg(any(target_os = "linux", target_os = "macos"))]
{
libc::munmap(jit_ptr as *mut c_void, 256);
}
#[cfg(target_os = "windows")]
{
VirtualFree(jit_ptr as *mut c_void, 0, MEM_RELEASE);
}
}
}

/// Verify that JIT allocation does NOT land in the current thread's stack region.
/// This directly tests the root cause of the STATUS_STACK_OVERFLOW crash: the old
/// algorithm could allocate JIT memory in/near the stack, disrupting the guard page.
#[test]
fn test_jit_allocation_not_in_stack_region() {
let func_ptr = unsafe {
FuncPtrInternal::new(
std::ptr::NonNull::new(dummy_target_function as *mut ()).unwrap(),
)
};

// Use a stack local's address to approximate the stack location
let stack_local: u64 = 0;
let stack_addr = &stack_local as *const u64 as u64;

let jit_ptr = allocate_jit_memory(&func_ptr, 256);
assert!(!jit_ptr.is_null(), "JIT allocation should succeed");

let jit_addr = jit_ptr as u64;
// Stack on Windows x86_64 is typically 1-8MB. Use a conservative 16MB guard zone.
let stack_guard_zone: u64 = 16 * 1024 * 1024;
let distance_to_stack = jit_addr.abs_diff(stack_addr);

assert!(
distance_to_stack > stack_guard_zone,
"JIT memory should NOT be near the stack! \
JIT at {jit_addr:#x}, stack approx at {stack_addr:#x}, \
distance: {distance_to_stack} bytes ({} MB). \
Must be > {stack_guard_zone} bytes ({} MB) from stack.",
distance_to_stack / (1024 * 1024),
stack_guard_zone / (1024 * 1024),
);

// Clean up
unsafe {
#[cfg(any(target_os = "linux", target_os = "macos"))]
{
libc::munmap(jit_ptr as *mut c_void, 256);
}
#[cfg(target_os = "windows")]
{
VirtualFree(jit_ptr as *mut c_void, 0, MEM_RELEASE);
}
}
}
}
121 changes: 121 additions & 0 deletions tests/stack_safety.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
// Test that JIT memory allocation does not interfere with stack growth.
//
// The old x86_64 Windows implementation of `allocate_jit_memory` scanned linearly
// from `func_addr - 2GB`, which could allocate memory in/near the stack region,
// disrupting the stack guard page and causing STATUS_STACK_OVERFLOW (0xc00000fd)
// during parallel test execution.
//
// These tests verify that after patching functions, deep stack usage still works.
#![cfg(any(target_arch = "x86_64", target_arch = "aarch64", target_arch = "arm"))]

use std::sync::{Arc, Barrier};
use std::thread;

use injectorpp::interface::injector::*;

#[inline(never)]
fn get_value_stack_test() -> i32 {
std::hint::black_box(1)
}

#[inline(never)]
fn get_other_value_stack_test() -> i32 {
std::hint::black_box(2)
}

#[inline(never)]
fn is_enabled_stack_test() -> bool {
std::hint::black_box(false)
}

/// Consume stack space via recursion. Each frame is ~256 bytes due to the array.
/// At depth 2000, this uses ~512KB of the thread's stack.
#[inline(never)]
fn consume_stack(depth: u32) -> u64 {
if depth == 0 {
return 1;
}
// Force the compiler to keep a sizable stack frame.
let buf = std::hint::black_box([0u8; 256]);
let result = consume_stack(depth - 1);
std::hint::black_box(buf[0] as u64) + result
}

/// After patching a function, verify that deep recursion still works.
/// With the old buggy JIT allocator, the stack guard page could be disrupted,
/// causing a stack overflow even at moderate recursion depths.
#[test]
fn test_stack_growth_works_after_patching() {
let mut injector = InjectorPP::new();
injector
.when_called(injectorpp::func!(fn(get_value_stack_test)() -> i32))
.will_execute(injectorpp::fake!(
func_type: fn() -> i32,
returns: 42
));

// Verify the fake works
assert_eq!(get_value_stack_test(), 42);

// Now do deep recursion — this should NOT cause STATUS_STACK_OVERFLOW.
// If JIT memory was allocated in the stack region (old bug), this would crash.
let result = consume_stack(2000);
assert!(result > 0, "Deep recursion should succeed after patching");
}

/// Multiple threads concurrently patch different functions and then exercise
/// deep stack usage. This replicates the conditions of the original crash:
/// parallel tests + injectorpp patching + significant stack consumption.
#[test]
fn test_concurrent_patching_with_deep_stack_usage() {
let thread_count = 8;
let barrier = Arc::new(Barrier::new(thread_count));
let mut handles = Vec::new();

for i in 0..thread_count {
let b = barrier.clone();
handles.push(thread::spawn(move || {
let mut injector = InjectorPP::new();

// Each thread patches a function
match i % 3 {
0 => {
injector
.when_called(injectorpp::func!(fn(get_value_stack_test)() -> i32))
.will_execute(injectorpp::fake!(
func_type: fn() -> i32,
returns: 100
));
}
1 => {
injector
.when_called(injectorpp::func!(fn(get_other_value_stack_test)() -> i32))
.will_execute(injectorpp::fake!(
func_type: fn() -> i32,
returns: 200
));
}
_ => {
injector
.when_called(injectorpp::func!(fn(is_enabled_stack_test)() -> bool))
.will_return_boolean(true);
}
};

// Synchronize: all threads have patched before anyone recurses
b.wait();

// Deep recursion — would crash if JIT memory disrupted the stack guard page
let result = consume_stack(1500);
assert!(
result > 0,
"Thread {i} should complete deep recursion without stack overflow"
);
}));
}

for (i, h) in handles.into_iter().enumerate() {
h.join()
.unwrap_or_else(|_| panic!("Thread {i} panicked — possible stack overflow"));
}
}
Loading