Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ jobs:
PKG_CONFIG_PATH: /usr/lib/arm-linux-gnueabihf/pkgconfig
RUST_BACKTRACE: full
run: |
cargo test -j 1 --target ${{ matrix.target }} --tests -- --nocapture --test-threads=1
cargo test --target ${{ matrix.target }} --tests -- --nocapture

- name: Test (other targets)
if: matrix.target != 'armv7-unknown-linux-gnueabihf' && matrix.target != 'thumbv7neon-unknown-linux-gnueabihf'
Expand Down
81 changes: 71 additions & 10 deletions src/injector_core/thread_local_registry.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1431,7 +1431,12 @@ fn create_trampoline(func_addr: *mut u8, _method_key: usize) -> (*mut u8, usize,

// The jump-back uses jmp [rip+0] + 8-byte address = 14 bytes
let jump_back_size = 14;
let trampoline_total = copy_size + jump_back_size;
// Reserve extra space for indirect call/jmp stubs. When a CALL/JMP rel32
// in the copied code targets a function that's too far from the trampoline
// for a 32-bit displacement, we emit a `MOV RAX, imm64; JMP RAX` stub
// (12 bytes each). 64 bytes handles up to 5 such overflow cases.
let stub_space = 64;
let trampoline_total = copy_size + jump_back_size + stub_space;

// Allocate executable memory for the trampoline (near original for ±2GB reach)
let near_src =
Expand All @@ -1448,7 +1453,16 @@ fn create_trampoline(func_addr: *mut u8, _method_key: usize) -> (*mut u8, usize,
// instruction's position. After copying to the trampoline at a different address,
// we must adjust disp32 so it still points to the same absolute target.
let delta = func_addr as isize - trampoline as isize;
fixup_rip_relative_instructions(trampoline, &original_code, copy_size, delta);
let stub_start = copy_size + jump_back_size;
fixup_rip_relative_instructions(
trampoline,
&original_code,
copy_size,
delta,
func_addr,
stub_start,
trampoline_total,
);

// Append jump back to original + copy_size
// Using: jmp [rip+0] (FF 25 00 00 00 00) + 8-byte target address
Expand Down Expand Up @@ -1489,13 +1503,26 @@ fn create_trampoline(func_addr: *mut u8, _method_key: usize) -> (*mut u8, usize,
/// and the coverage counter is too far from the trampoline for a 32-bit
/// displacement. NOP-ing the counter increment is safe — it only affects
/// profiling accuracy, not functional behavior.
///
/// For CALL/JMP rel32 overflow, NOP-ing would break program logic (the callee
/// never executes, so its side effects and return value are lost). Instead,
/// we emit an indirect stub at the end of the trampoline:
/// `MOV RAX, <absolute_target>; JMP RAX` (12 bytes)
/// and rewrite the CALL/JMP to target the stub. The stub preserves CALL
/// semantics: `CALL stub` pushes the return address, then `JMP target`
/// transfers control; when the callee returns, execution resumes in the
/// trampoline right after the CALL.
fn fixup_rip_relative_instructions(
trampoline: *mut u8,
original_code: &[u8],
copy_size: usize,
delta: isize,
func_addr: *mut u8,
stub_start: usize,
trampoline_alloc_size: usize,
) {
let mut offset = 0;
let mut stub_cursor = stub_start;
while offset < copy_size {
let insn = &original_code[offset..];
let insn_len = x86_64_insn_len(insn);
Expand All @@ -1512,7 +1539,9 @@ fn fixup_rip_relative_instructions(
if new_disp >= i32::MIN as i64 && new_disp <= i32::MAX as i64 {
disp_ptr.write_unaligned(new_disp as i32);
} else {
// Overflow: NOP out the entire instruction in the trampoline
// Overflow: NOP out the entire instruction in the trampoline.
// This is safe for coverage/profiling counter increments
// (lock inc [rip+disp32]) which don't affect program logic.
for i in 0..insn_len {
*trampoline.add(offset + i) = 0x90; // NOP
}
Expand All @@ -1533,10 +1562,38 @@ fn fixup_rip_relative_instructions(
if new_rel >= i32::MIN as i64 && new_rel <= i32::MAX as i64 {
rel_ptr.write_unaligned(new_rel as i32);
} else {
// Overflow: NOP out the entire instruction
for i in 0..insn_len {
*trampoline.add(offset + i) = 0x90;
}
// Overflow: emit an indirect stub and redirect the CALL/JMP.
// Calculate the absolute target address from the original code.
let rip_after_insn =
func_addr as usize + offset + insn_len;
let absolute_target =
(rip_after_insn as i64 + old_rel as i64) as u64;

assert!(
stub_cursor + 12 <= trampoline_alloc_size,
"Trampoline stub space exhausted (too many CALL/JMP rel32 overflows)"
);

// Write stub: MOV RAX, imm64 (48 B8 + 8 bytes) + JMP RAX (FF E0)
let stub_ptr = trampoline.add(stub_cursor);
*stub_ptr = 0x48; // REX.W
*stub_ptr.add(1) = 0xB8; // MOV RAX, imm64
std::ptr::copy_nonoverlapping(
absolute_target.to_le_bytes().as_ptr(),
stub_ptr.add(2),
8,
);
*stub_ptr.add(10) = 0xFF; // JMP RAX
*stub_ptr.add(11) = 0xE0;

// Rewrite the CALL/JMP in the trampoline to target the stub.
// new_rel = stub_addr - rip_after_insn_in_trampoline
let rip_in_trampoline = trampoline as usize + offset + insn_len;
let stub_rel =
(trampoline as usize + stub_cursor) as i64 - rip_in_trampoline as i64;
rel_ptr.write_unaligned(stub_rel as i32);

stub_cursor += 12;
}
}
}
Expand All @@ -1552,9 +1609,13 @@ fn fixup_rip_relative_instructions(
if new_rel >= i32::MIN as i64 && new_rel <= i32::MAX as i64 {
rel_ptr.write_unaligned(new_rel as i32);
} else {
for i in 0..insn_len {
*trampoline.add(offset + i) = 0x90;
}
// Jcc overflow is extremely rare in function prologues.
// Panic rather than silently breaking control flow.
panic!(
"Jcc rel32 displacement overflow in trampoline fixup \
(function at {:p}, offset {}). This case is not yet handled.",
func_addr, offset
);
}
}
}
Expand Down
38 changes: 31 additions & 7 deletions tests/thread_safety.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1008,20 +1008,40 @@ fn concurrent_target() -> i32 {
/// Issue #42 scenario: One thread calls foo() in a tight loop, another thread
/// repeatedly creates/drops fakes. Without thread-local dispatch, this would
/// cause access violations or stack overruns.
///
/// This test verifies that concurrent setup/teardown of fakes does NOT cause
/// crashes, access violations, or incorrect return values.
#[test]
fn test_concurrent_call_during_setup_teardown_issue_42() {
let done = Arc::new(AtomicBool::new(false));
let error_count = Arc::new(AtomicUsize::new(0));

// Thread 1: continuously calls the function without faking
// First, install the dispatcher once so the caller thread never
// races with the initial non-atomic code patch.
{
let mut injector = InjectorPP::new();
injector
.when_called(injectorpp::func!(fn(concurrent_target)() -> i32))
.will_execute(injectorpp::fake!(
func_type: fn() -> i32,
returns: 99
));
assert_eq!(concurrent_target(), 99);
// Drop — dispatcher remains installed, TLS cleared
}
// Verify trampoline returns correct value on this thread
assert_eq!(concurrent_target(), 42);

// Thread 1: continuously calls the function without faking.
// After dispatcher installation, this thread always goes through
// the trampoline and should see the original value (42).
let d1 = done.clone();
let e1 = error_count.clone();
let caller = thread::spawn(move || {
while !d1.load(Ordering::SeqCst) {
while !d1.load(Ordering::Relaxed) {
let val = concurrent_target();
// Should always see 42 (original) since this thread has no fake
if val != 42 {
e1.fetch_add(1, Ordering::SeqCst);
e1.fetch_add(1, Ordering::Relaxed);
}
}
});
Expand All @@ -1037,14 +1057,18 @@ fn test_concurrent_call_during_setup_teardown_issue_42() {
));
// While the fake is active, this thread should see 99
assert_eq!(concurrent_target(), 99);
// Drop injector — restores for this thread
// Drop injector — removes this thread's TLS entry
}

done.store(true, Ordering::SeqCst);
caller.join().unwrap();

assert_eq!(error_count.load(Ordering::SeqCst), 0);
// After all fakes are dropped, original is restored
assert_eq!(
error_count.load(Ordering::SeqCst),
0,
"Caller thread without fake should always see the original value (42)"
);
// After all fakes are dropped, original is restored for this thread
assert_eq!(concurrent_target(), 42);
}

Expand Down
Loading