Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions litebox_common_linux/src/loader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -291,9 +291,10 @@ impl ElfParsedFile {
)
};

// Validate trampoline size
// Size=0 sentinel: the rewriter processed this binary but found no
// syscall instructions, so there is no trampoline region to map.
if trampoline_size == 0 {
return Err(ElfParseError::BadTrampoline);
return Ok(());
}

// Verify the file offset is page-aligned (as required by the rewriter)
Expand Down
39 changes: 29 additions & 10 deletions litebox_platform_linux_userland/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -508,6 +508,8 @@ core::arch::global_asm!(
"
.section .tbss
.align 8
saved_restart_addr:
.quad 0
scratch:
.quad 0
host_sp:
Expand Down Expand Up @@ -563,7 +565,7 @@ fn get_guest_fsbase() -> usize {
///
/// This saves all non-volatile register state then switches to the guest
/// context. When the guest makes a syscall, it jumps back into the middle of
/// this routine, at `syscall_callback`. This code then updates the guest
/// this routine, at the syscall callback. This code then updates the guest
/// context structure, switches back to the host stack, and calls the syscall
/// handler.
///
Expand Down Expand Up @@ -616,22 +618,34 @@ unsafe extern "C-unwind" fn run_thread_arch(
// At entry, the register context is the guest context with the
// return address in rcx. r11 is an available scratch register (it would
// contain rflags if the syscall instruction had actually been issued).
.globl syscall_callback
syscall_callback:
.globl syscall_callback_redzone
syscall_callback_redzone:
// the trampoline has already reserved 128 bytes below RSP to protect the
// SysV red zone.
// Clear in_guest flag. This must be the first instruction to match the
// expectations of `interrupt_signal_handler`.
mov BYTE PTR gs:in_guest@tpoff, 0

// Save guest R11 (syscall call-site restart address from the rewriter
// trampoline) to TLS before it is clobbered by the fsbase/gsbase save
// sequence below. This value is not placed in pt_regs (which holds
// RFLAGS in the r11 slot per the kernel ABI); instead it is kept in
// TLS for future SA_RESTART support.
mov gs:saved_restart_addr@tpoff, r11

// Restore host fs base.
rdfsbase r11
mov gs:guest_fsbase@tpoff, r11
rdgsbase r11
wrfsbase r11

// Switch to the top of the guest context.
mov r11, rsp
// The trampoline lowered RSP by 128 bytes with LEA, so recover the
// architectural guest stack pointer before saving pt_regs.
lea r11, [rsp + 128]
mov rsp, fs:guest_context_top@tpoff

.Lsyscall_save_regs:

// TODO: save float and vector registers (xsave or fxsave)
// Save caller-saved registers
push 0x2b // pt_regs->ss = __USER_DS
Expand All @@ -649,7 +663,7 @@ syscall_callback:
push r8 // pt_regs->r8
push r9 // pt_regs->r9
push r10 // pt_regs->r10
push [rsp + 88] // pt_regs->r11 = rflags
push [rsp + 88] // pt_regs->r11 = rflags (matching real syscall ABI)
push rbx // pt_regs->bx
push rbp // pt_regs->bp
push r12 // pt_regs->r12
Expand Down Expand Up @@ -811,7 +825,7 @@ fn thread_start(
let shim = init_thread.init();

run_thread_inner(shim.as_ref(), &mut ctx, false);
// TODO: have syscall_callback return if we need to terminate the process.
// TODO: have the syscall callback return if we need to terminate the process.
// We should return this value to the caller so load_program can return it
// to the user.
}
Expand Down Expand Up @@ -1584,7 +1598,8 @@ impl litebox::platform::StdioProvider for LinuxUserland {

unsafe extern "C" {
// Defined in asm blocks above
fn syscall_callback() -> isize;
#[cfg(target_arch = "x86_64")]
fn syscall_callback_redzone() -> isize;
fn exception_callback();
fn interrupt_callback();
fn switch_to_guest_start();
Expand Down Expand Up @@ -1665,7 +1680,10 @@ impl ThreadContext<'_> {

impl litebox::platform::SystemInfoProvider for LinuxUserland {
fn get_syscall_entry_point(&self) -> usize {
syscall_callback as *const () as usize
#[cfg(target_arch = "x86_64")]
{
syscall_callback_redzone as *const () as usize
}
}

fn get_vdso_address(&self) -> Option<usize> {
Expand Down Expand Up @@ -2186,7 +2204,8 @@ unsafe fn interrupt_signal_handler(
// FUTURE: handle trampoline code, too. This is somewhat less important
// because it's probably fine for the shim to observe a guest context that
// is inside the trampoline.
if ip == syscall_callback as *const () as usize {
#[cfg(target_arch = "x86_64")]
if ip == syscall_callback_redzone as *const () as usize {
// No need to clear `in_guest` or set interrupt; the syscall handler will
// clear `in_guest` and call into the shim.
return;
Expand Down
46 changes: 35 additions & 11 deletions litebox_platform_windows_userland/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -414,6 +414,10 @@ struct TlsState {
host_bp: Cell<*mut u128>,
guest_context_top: Cell<*mut litebox_common_linux::PtRegs>,
scratch: Cell<usize>,
/// Syscall call-site restart address from the rewriter trampoline,
/// saved here for future SA_RESTART support. Not stored in pt_regs
/// (which holds RFLAGS in the r11 slot per the kernel ABI).
saved_restart_addr: Cell<usize>,
is_in_guest: Cell<bool>,
interrupt: Cell<bool>,
continue_context:
Expand All @@ -433,6 +437,7 @@ impl TlsState {
host_bp: Cell::new(core::ptr::null_mut()),
guest_context_top: core::ptr::null_mut::<litebox_common_linux::PtRegs>().into(),
scratch: 0.into(),
saved_restart_addr: 0.into(),
is_in_guest: false.into(),
interrupt: false.into(),
continue_context: Box::default(),
Expand Down Expand Up @@ -480,7 +485,7 @@ fn get_tls_ptr() -> Option<*const TlsState> {
///
/// This saves all non-volatile register state then switches to the guest
/// context. When the guest makes a syscall, it jumps back into the middle of
/// this routine, at `syscall_callback`. This code then updates the guest
/// this routine, at the syscall callback. This code then updates the guest
/// context structure, switches back to the host stack, and calls the syscall
/// handler.
///
Expand Down Expand Up @@ -549,19 +554,37 @@ unsafe extern "C-unwind" fn run_thread_arch(thread_ctx: &mut ThreadContext, tls_
jmp .Ldone

// This entry point is called from the guest when it issues a syscall
// instruction.
// instruction. The rewriter trampoline has already:
// 1. Reserved 128 bytes below RSP to protect the SysV red zone
// 2. Loaded the call-site restart address into R11 (for SA_RESTART)
// 3. Loaded the return address into RCX
//
// At entry, the register context is the guest context with the
// return address in rcx. r11 is an available scratch register (it would
// contain rflags if the syscall instruction had actually been issued).
.globl syscall_callback
syscall_callback:
// All other registers hold guest state.
.globl syscall_callback_redzone
syscall_callback_redzone:
// Save guest R11 (restart address from rewriter trampoline) to
// TEB.ArbitraryUserPointer (gs:[0x28]) before the TLS index lookup
// clobbers R11. This slot is per-thread and the window is very
// narrow: only ~20 instructions of inline asm with no API calls,
// no Rust code, and no DLL activity, so the ntdll loader (which
// also uses this slot for debugger communication) cannot interfere.
mov gs:[0x28], r11
// Get the TLS state from the TLS slot and clear the in-guest flag.
mov r11d, DWORD PTR [rip + {TLS_INDEX}]
mov r11, QWORD PTR gs:[r11 * 8 + TEB_TLS_SLOTS_OFFSET]
mov BYTE PTR [r11 + {IS_IN_GUEST}], 0
// Set rsp to the top of the guest context.
// Recover the restart address from the TEB slot and store it in TLS.
// We use SCRATCH as a temporary since all guest GPRs must be preserved
// and RSP modifications would break the stack pointer recovery below.
push QWORD PTR gs:[0x28]
pop QWORD PTR [r11 + {SAVED_RESTART_ADDR}]
// Recover the architectural guest stack pointer (undo the 128-byte
// red zone reservation) and store it in SCRATCH. LEA is used instead
// of ADD to avoid clobbering RFLAGS before pushfq.
lea rsp, [rsp + 128]
mov QWORD PTR [r11 + {SCRATCH}], rsp

.Lsyscall_callback_common:
mov rsp, QWORD PTR [r11 + {GUEST_CONTEXT_TOP}]

// TODO: save float and vector registers (xsave or fxsave)
Expand All @@ -581,7 +604,7 @@ syscall_callback:
push r8 // pt_regs->r8
push r9 // pt_regs->r9
push r10 // pt_regs->r10
push [rsp + 88] // pt_regs->r11 = rflags
push [rsp + 88] // pt_regs->r11 = rflags (matching real syscall ABI)
push rbx // pt_regs->bx
push rbp // pt_regs->bp
push r12
Expand Down Expand Up @@ -646,6 +669,7 @@ interrupt_callback:
HOST_BP = const core::mem::offset_of!(TlsState, host_bp),
GUEST_CONTEXT_TOP = const core::mem::offset_of!(TlsState, guest_context_top),
SCRATCH = const core::mem::offset_of!(TlsState, scratch),
SAVED_RESTART_ADDR = const core::mem::offset_of!(TlsState, saved_restart_addr),
IS_IN_GUEST = const core::mem::offset_of!(TlsState, is_in_guest),
);
}
Expand Down Expand Up @@ -1938,7 +1962,7 @@ impl litebox::mm::allocator::MemoryProvider for WindowsUserland {

unsafe extern "C" {
// Defined in asm blocks above
fn syscall_callback() -> isize;
fn syscall_callback_redzone() -> isize;
fn exception_callback() -> isize;
fn interrupt_callback();
fn switch_to_guest_start();
Expand Down Expand Up @@ -2028,7 +2052,7 @@ impl ThreadContext<'_> {

impl litebox::platform::SystemInfoProvider for WindowsUserland {
fn get_syscall_entry_point(&self) -> usize {
syscall_callback as *const () as usize
syscall_callback_redzone as *const () as usize
}

fn get_vdso_address(&self) -> Option<usize> {
Expand Down
7 changes: 4 additions & 3 deletions litebox_rtld_audit/rtld_audit.c
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,9 @@ static long do_syscall(long num, long a1, long a2, long a3, long a4, long a5,
register long r8 __asm__("r8") = a5;
register long r9 __asm__("r9") = a6;

__asm__ volatile("leaq 1f(%%rip), %%rcx\n"
__asm__ volatile("leaq -128(%%rsp), %%rsp\n" // reserve red zone
"leaq 1f(%%rip), %%r11\n" // R11 = restart addr (call-site)
"leaq 1f(%%rip), %%rcx\n" // RCX = return addr
"jmp *%[entry]\n"
"1:\n"
: "+r"(rax)
Expand Down Expand Up @@ -324,9 +326,8 @@ unsigned int la_objopen(struct link_map *map,
do_syscall(SYS_munmap, (long)header_page, 0x1000, 0, 0, 0, 0);
syscall_print("[audit] found trampoline header at end of file\n", 47);

// Validate trampoline size
// Validate trampoline size (size=0 is a sentinel meaning "no syscalls to patch")
if (tramp_size_raw == 0) {
syscall_print("[audit] trampoline code size invalid\n", 37);
do_syscall(SYS_close, fd, 0, 0, 0, 0, 0);
return 0;
}
Expand Down
Binary file modified litebox_runner_linux_on_windows_userland/tests/test-bins/litebox_rtld_audit.so
100644 → 100755
Binary file not shown.
72 changes: 68 additions & 4 deletions litebox_syscall_rewriter/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -216,6 +216,7 @@ pub fn hook_syscalls_in_elf(input_binary: &[u8], trampoline: Option<u64>) -> Res
}
// Patch syscalls in-place in buf
let mut skipped_addrs = Vec::new();
let mut syscall_insns_found = false;
for s in &text_sections {
let section_data = section_slice_mut(buf, s)?;
match hook_syscalls_in_section(
Expand All @@ -228,13 +229,34 @@ pub fn hook_syscalls_in_elf(input_binary: &[u8], trampoline: Option<u64>) -> Res
dl_sysinfo_int80,
&mut trampoline_data,
) {
Ok(addrs) => skipped_addrs.extend(addrs),
Ok(addrs) => {
skipped_addrs.extend(addrs);
syscall_insns_found = true;
}
Err(InternalError::NoSyscallInstructionsFound) => {}
Err(InternalError::Public(e)) => return Err(e),
Err(e) => unreachable!("unexpected internal error: {e:?}"),
}
}

if !syscall_insns_found {
// No syscall instructions found. Append a header-only marker so the
// loader can distinguish "checked by rewriter, nothing to patch" from
// "never processed." The trampoline_size=0 sentinel tells the loader
// to skip trampoline mapping entirely.
// Use the original input (not `buf`) to avoid emitting the phdr
// alignment fixup that was only needed for the `object` crate parser.
let mut out = input_binary.to_vec();
let header = TrampolineHeader64 {
magic: *TRAMPOLINE_MAGIC,
file_offset: 0,
vaddr: 0,
trampoline_size: 0,
};
out.extend_from_slice(header.as_bytes());
return Ok(out);
}

// Build output: [patched ELF][padding to page boundary][trampoline code][header]
let mut out = buf.to_vec();
let remain = out.len() % 0x1000;
Expand Down Expand Up @@ -346,7 +368,9 @@ fn is_already_hooked(input_binary: &[u8], arch: Arch) -> bool {
};

if trampoline_size == 0 {
return false;
// Size=0 sentinel: the rewriter processed this binary but found no
// syscall instructions. It is already hooked (nothing to do).
return true;
}
if file_offset % 0x1000 != 0 {
return false;
Expand Down Expand Up @@ -475,6 +499,8 @@ fn hook_syscalls_in_section(

let return_addr = inst.next_ip();
if arch == Arch::X86_64 {
emit_trampoline_preamble(trampoline_base_addr, replace_start, trampoline_data)?;

// Put jump back location into rcx.
let jmp_back_base = checked_add_u64(
trampoline_base_addr,
Expand Down Expand Up @@ -616,8 +642,8 @@ fn fixup_phdr_alignment(buf: &mut [u8]) {
return;
};

if old_end > buf.len() || new_end > buf.len() {
return; // corrupt phdr table or not enough room
if new_end > buf.len() {
return; // not enough room
}

// Only relocate when the overwritten bytes are padding. Otherwise this would corrupt the file
Expand Down Expand Up @@ -714,6 +740,42 @@ fn checked_sub_u64(base: u64, subtrahend: u64, context: &'static str) -> Result<
.ok_or_else(|| Error::AddressOverflow(format!("{context} address underflow")))
}

/// Emit the trampoline preamble: reserve the SysV red zone and load R11 with
/// the call-site restart address.
///
/// The red zone reservation (`LEA RSP, [RSP - 0x80]`) prevents async guest
/// signal delivery / interrupt handling from clobbering stack locals parked
/// below the architectural RSP.
///
/// R11 is loaded with `call_site_addr` (the address of the original JMP that
/// entered the trampoline) so that SA_RESTART can rewind `ctx.rip` to re-enter
/// the trampoline. The real `syscall` instruction clobbers R11 with RFLAGS, so
/// this register is free from the guest's perspective.
///
/// CONTRACT: R11 carries the call-site restart address from this point until
/// the platform callback saves it to a dedicated TLS variable
/// (`saved_restart_addr`). The platform MUST preserve R11 before any clobbering
/// instructions (fsbase swap, TLS lookup).
fn emit_trampoline_preamble(
trampoline_base_addr: u64,
call_site_addr: u64,
trampoline_data: &mut Vec<u8>,
) -> Result<()> {
// LEA RSP, [RSP - 0x80]
trampoline_data.extend_from_slice(&[0x48, 0x8D, 0x64, 0x24, 0x80]);

// LEA R11, [RIP + disp32] — disp32 targets call_site_addr
let r11_rip = checked_add_u64(
trampoline_base_addr,
trampoline_data.len() as u64 + 7,
"trampoline R11 displacement base",
)?;
let r11_disp = i64::try_from(call_site_addr).unwrap() - i64::try_from(r11_rip).unwrap();
trampoline_data.extend_from_slice(&[0x4C, 0x8D, 0x1D]);
trampoline_data.extend_from_slice(&(i32::try_from(r11_disp).unwrap().to_le_bytes()));
Ok(())
}

fn rel32_bytes(target: u64, base: u64, context: &'static str) -> Result<[u8; 4]> {
let disp = i128::from(target) - i128::from(base);
let disp = i32::try_from(disp).map_err(|_| {
Expand Down Expand Up @@ -939,6 +1001,8 @@ fn hook_syscall_and_after(
)?;

if arch == Arch::X86_64 {
emit_trampoline_preamble(trampoline_base_addr, replace_start, trampoline_data)?;

// Put jump back location into rcx, via lea rcx, [next instruction]
trampoline_data.extend_from_slice(&[0x48, 0x8D, 0x0D]); // LEA RCX, [RIP + disp32]
trampoline_data.extend_from_slice(&6u32.to_le_bytes());
Expand Down
Loading
Loading