From dbc8d476dc60c9d394d9830448e13e46bd173f65 Mon Sep 17 00:00:00 2001 From: Florian Engelhardt Date: Fri, 13 Feb 2026 14:44:27 +0100 Subject: [PATCH 1/2] feat(prof): add I/O profiling for macOS Co-authored-by: Claude Opus 4.6 --- profiling/src/io/{got.rs => got_elf64.rs} | 7 +- profiling/src/io/got_macho.rs | 526 ++++++++++++++++++++++ profiling/src/io/mod.rs | 41 +- profiling/src/lib.rs | 4 +- profiling/src/profiling/mod.rs | 24 +- 5 files changed, 570 insertions(+), 32 deletions(-) rename profiling/src/io/{got.rs => got_elf64.rs} (98%) create mode 100644 profiling/src/io/got_macho.rs diff --git a/profiling/src/io/got.rs b/profiling/src/io/got_elf64.rs similarity index 98% rename from profiling/src/io/got.rs rename to profiling/src/io/got_elf64.rs index 3cf3df0ec27..7c2bc98bb31 100644 --- a/profiling/src/io/got.rs +++ b/profiling/src/io/got_elf64.rs @@ -1,3 +1,4 @@ +use super::GotSymbolOverwrite; use crate::bindings::{ Elf64_Dyn, Elf64_Rela, Elf64_Sym, Elf64_Xword, DT_JMPREL, DT_NULL, DT_PLTRELSZ, DT_STRTAB, DT_SYMTAB, PT_DYNAMIC, R_AARCH64_JUMP_SLOT, R_X86_64_JUMP_SLOT, @@ -15,12 +16,6 @@ fn elf64_r_sym(info: Elf64_Xword) -> u32 { (info >> 32) as u32 } -pub struct GotSymbolOverwrite { - pub symbol_name: &'static str, - pub new_func: *mut (), - pub orig_func: *mut *mut (), -} - /// Override the GOT entry for symbols specified in `overwrites`. /// /// See: https://cs4401.walls.ninja/notes/lecture/basics_global_offset_table.html diff --git a/profiling/src/io/got_macho.rs b/profiling/src/io/got_macho.rs new file mode 100644 index 00000000000..e14137c1ae0 --- /dev/null +++ b/profiling/src/io/got_macho.rs @@ -0,0 +1,526 @@ +// Mach-O symbol rebinding for macOS (the equivalent of GOT hooking on ELF/Linux). +// +// On macOS, calls to external functions (e.g. `read()`) go through pointer slots that the +// dynamic linker (`dyld`) fills in. By overwriting these pointers we redirect calls to our +// instrumented versions. This is the same idea as fishhook: https://github.com/facebook/fishhook +// +// ## Mach-O layout (the parts we care about) +// +// mach_header_64 +// ├─ LC_SEGMENT_64 __TEXT, __DATA, __DATA_CONST, __LINKEDIT +// ├─ LC_SYMTAB → symbol table + string table (in __LINKEDIT) +// └─ LC_DYSYMTAB → indirect symbol table (in __LINKEDIT) +// +// Symbol pointer slots live in __DATA/__DATA_CONST sections of type `S_LAZY_SYMBOL_POINTERS` +// (resolved on first call) or `S_NON_LAZY_SYMBOL_POINTERS` (resolved at load time). To map a +// slot to its symbol name we follow a chain of indirections: +// +// slot[i] ──▶ indirect_symtab[section.reserved1 + i] ──▶ symtab[idx].n_strx ──▶ "_recv" +// +// All three tables (symbol table, string table, indirect symbol table) live in __LINKEDIT. +// Their file offsets come from LC_SYMTAB/LC_DYSYMTAB; at runtime we convert them via: +// +// linkedit_base = slide + __LINKEDIT.vmaddr - __LINKEDIT.fileoff +// runtime_ptr = linkedit_base + file_offset +// +// The "slide" is the ASLR offset: each image is loaded at a random displacement from its +// preferred base address, and all in-file virtual addresses must be adjusted by this amount. +// +// ## __DATA_CONST and copy-on-write +// +// `__DATA` is writable — we patch it directly. `__DATA_CONST` is made read-only by dyld after +// binding, so we must temporarily call `vm_protect()` with `VM_PROT_COPY` before writing. +// +// `VM_PROT_COPY` triggers copy-on-write: macOS shares physical pages of the same dylib across +// processes, so a plain writable mapping would corrupt every process. With COW, the kernel +// lazily allocates a private page on first write: +// +// Before write: After write: +// Process A ─┐ Process A ──▶ [private page: patched ptr] +// ├──▶ [shared page] +// Process B ─┘ Process B ──▶ [shared page: original ptr] +// +// Restoring `VM_PROT_READ` afterwards makes our private copy read-only again — we keep the +// patched version and other processes are unaffected. +// +// ## References +// +// - Apple Mach-O format: https://developer.apple.com/documentation/kernel/mach_header_64 +// - Apple dyld source: https://opensource.apple.com/source/dyld/ +// - Mach-O headers: , + +// The libc crate deprecates Mach-O types in favor of the mach2 crate, but we only use a few +// types and don't want to add a dependency just for that. +#![allow(deprecated)] + +use super::GotSymbolOverwrite; +use libc::{c_char, c_void}; +use log::{error, trace}; +use std::ffi::CStr; + +// ----- Mach-O load command types ----- +// These identify what kind of metadata a load command carries. +// See: + +/// LC_SYMTAB: Points to the symbol table and string table in __LINKEDIT. +const LC_SYMTAB: u32 = 0x2; +/// LC_DYSYMTAB: Points to the dynamic symbol table (including the indirect symbol table). +const LC_DYSYMTAB: u32 = 0xB; +/// LC_SEGMENT_64: Describes a 64-bit memory segment (__TEXT, __DATA, __LINKEDIT, etc.). +const LC_SEGMENT_64: u32 = 0x19; + +// ----- Section types (lower 8 bits of section.flags) ----- +// These identify the kind of data stored in a section. + +/// Lazily-bound function pointers. dyld resolves these on first call. +const S_LAZY_SYMBOL_POINTERS: u32 = 0x7; +/// Non-lazily-bound function pointers. dyld resolves these at load time. +const S_NON_LAZY_SYMBOL_POINTERS: u32 = 0x6; + +// ----- Special indirect symbol table entries ----- +// Some entries in the indirect symbol table don't refer to real symbols. + +/// The pointer is for a local (non-external) symbol — skip it. +const INDIRECT_SYMBOL_LOCAL: u32 = 0x80000000; +/// The pointer is for an absolute symbol — skip it. +const INDIRECT_SYMBOL_ABS: u32 = 0x40000000; + +/// VM_PROT_COPY: When combined with vm_protect, creates a copy-on-write mapping. +/// Required to make __DATA_CONST temporarily writable without affecting shared pages. +const VM_PROT_COPY: libc::vm_prot_t = 0x10; + +// ----- Mach-O structures not available in the `libc` crate ----- +// These mirror the C structs from and . +// They must be `#[repr(C)]` to match the exact memory layout the dynamic linker produces. + +/// Corresponds to `struct symtab_command` in . +/// Tells us where the symbol table and string table are located in the file (as offsets +/// into __LINKEDIT). +#[repr(C)] +struct SymtabCommand { + cmd: u32, + cmdsize: u32, + /// File offset of the symbol table (array of Nlist64 entries). + symoff: u32, + /// Number of symbol table entries. + nsyms: u32, + /// File offset of the string table (null-terminated strings, indexed by Nlist64.n_strx). + stroff: u32, + /// Size of the string table in bytes. + strsize: u32, +} + +/// Corresponds to `struct dysymtab_command` in . +/// We only care about the indirect symbol table fields (`indirectsymoff`/`nindirectsyms`). +#[repr(C)] +struct DysymtabCommand { + cmd: u32, + cmdsize: u32, + ilocalsym: u32, + nlocalsym: u32, + iextdefsym: u32, + nextdefsym: u32, + iundefsym: u32, + nundefsym: u32, + tocoff: u32, + ntoc: u32, + modtaboff: u32, + nmodtab: u32, + extrefsymoff: u32, + nextrefsyms: u32, + /// File offset of the indirect symbol table (array of u32 symbol-table indices). + indirectsymoff: u32, + /// Number of entries in the indirect symbol table. + nindirectsyms: u32, + extreloff: u32, + nextrel: u32, + locreloff: u32, + nlocrel: u32, +} + +/// Corresponds to `struct nlist_64` in . +/// Each entry in the symbol table describes one symbol. +#[repr(C)] +struct Nlist64 { + /// Index into the string table where this symbol's name starts. + /// Note: Mach-O symbol names have a leading underscore (e.g. `_recv` for the C function + /// `recv`). + n_strx: u32, + n_type: u8, + n_sect: u8, + n_desc: u16, + n_value: u64, +} + +/// Corresponds to `struct section_64` in . +/// Describes a section within a segment (e.g. `__la_symbol_ptr` within `__DATA`). +#[repr(C)] +struct Section64 { + sectname: [u8; 16], + segname: [u8; 16], + /// Virtual memory address of this section (before ASLR slide). + addr: u64, + /// Size of this section in bytes. For symbol pointer sections, dividing by pointer size + /// gives the number of pointer entries. + size: u64, + offset: u32, + align: u32, + reloff: u32, + nreloc: u32, + /// Section type in the lower 8 bits (S_LAZY_SYMBOL_POINTERS, etc.) plus attributes in + /// upper bits. + flags: u32, + /// For symbol pointer sections: the index into the indirect symbol table where this + /// section's entries begin. + reserved1: u32, + reserved2: u32, + reserved3: u32, +} + +// ----- macOS dyld API ----- +// These functions are provided by the dynamic linker and let us enumerate all loaded images +// (the main executable and every loaded dylib). +extern "C" { + /// Returns the number of currently loaded Mach-O images. + fn _dyld_image_count() -> u32; + /// Returns a pointer to the mach_header_64 for the image at the given index. + fn _dyld_get_image_header(image_index: u32) -> *const libc::mach_header_64; + /// Returns the ASLR slide for the image at the given index. + fn _dyld_get_image_vmaddr_slide(image_index: u32) -> isize; + /// Returns the file path of the image at the given index. + fn _dyld_get_image_name(image_index: u32) -> *const c_char; + + /// Mach kernel call to change memory protection on a range of pages. + /// Used to make __DATA_CONST temporarily writable. + fn vm_protect( + target_task: libc::mach_port_t, + address: libc::mach_vm_address_t, + size: libc::mach_vm_size_t, + set_maximum: libc::boolean_t, + new_protection: libc::vm_prot_t, + ) -> libc::kern_return_t; + /// Returns the Mach port for the current task (needed by vm_protect). + fn mach_task_self() -> libc::mach_port_t; +} + +/// Rebind symbols in all currently loaded Mach-O images. +/// +/// This implements a [fishhook](https://github.com/facebook/fishhook)-style approach: +/// for each loaded dylib/binary, parse its Mach-O load commands to find `__la_symbol_ptr` +/// and `__nl_symbol_ptr` sections, resolve symbol names via the indirect symbol table, and +/// patch matching entries to redirect to our instrumented functions. +/// +/// Note: This only hooks images that are already loaded. Dylibs loaded later via `dlopen()` +/// will NOT be hooked. This is fine for our use case because PHP extensions are loaded before +/// the first RINIT where we call this. +/// +/// # Safety +/// This function modifies symbol pointers in loaded images. It must only be called once during +/// initialization, from a single thread, before the hooked functions are called concurrently. +/// The pointer arithmetic is safe because all images were successfully loaded by dyld — if an +/// image had an invalid Mach-O structure, dyld would have rejected it. +pub unsafe fn rebind_symbols(overwrites: &mut Vec) { + // Use dladdr on one of our own functions to find our image's base address, so we can + // skip patching our own image (we don't want to hook our own calls to libc). + let mut my_info: libc::Dl_info = std::mem::zeroed(); + if libc::dladdr(rebind_symbols as *const c_void, &mut my_info) == 0 { + error!("Did not find my own `dladdr` and therefore can't hook into the GOT."); + return; + } + let my_base_addr = my_info.dli_fbase as usize; + + // Iterate over every loaded Mach-O image (main executable + all dylibs) + let image_count = _dyld_image_count(); + for i in 0..image_count { + let header = _dyld_get_image_header(i); + if header.is_null() { + continue; + } + + // Skip our own image — we don't want to intercept our own libc calls + if header as usize == my_base_addr { + continue; + } + + let slide = _dyld_get_image_vmaddr_slide(i); + let name_ptr = _dyld_get_image_name(i); + let name = if name_ptr.is_null() { + "[Unknown]" + } else { + CStr::from_ptr(name_ptr).to_str().unwrap_or("[Unknown]") + }; + + if rebind_symbols_for_image(header, slide, overwrites) { + trace!("Hooked into {name}"); + } else { + trace!("Hooking {name} skipped or failed"); + } + } +} + +/// Rebind symbols for a single Mach-O image. +/// +/// This function does two passes over the load commands: +/// +/// **Pass 1**: Locate the three pieces of metadata we need: +/// - `LC_SYMTAB` → symbol table (maps index → symbol name via string table) +/// - `LC_DYSYMTAB` → indirect symbol table (maps symbol-pointer slot → symbol table index) +/// - `__LINKEDIT` segment → base address for computing runtime pointers from file offsets +/// +/// **Pass 2**: Walk `__DATA` and `__DATA_CONST` segments, find symbol pointer sections +/// (`__la_symbol_ptr` / `__nl_symbol_ptr`), and patch matching entries. +unsafe fn rebind_symbols_for_image( + header: *const libc::mach_header_64, + slide: isize, + overwrites: &mut Vec, +) -> bool { + if (*header).magic != libc::MH_MAGIC_64 { + trace!("Skipping image: not a 64-bit Mach-O (magic: {:#x})", (*header).magic); + return false; + } + + // Load commands are stored sequentially right after the mach_header_64. Each command has + // a `cmd` type and `cmdsize` telling us how many bytes to skip to reach the next command. + let mut cmd_ptr = (header as *const u8).add(std::mem::size_of::()); + let ncmds = (*header).ncmds; + + let mut symtab_cmd: *const SymtabCommand = std::ptr::null(); + let mut dysymtab_cmd: *const DysymtabCommand = std::ptr::null(); + let mut linkedit_base: usize = 0; + let mut linkedit_found = false; + + // ---- Pass 1: Locate LC_SYMTAB, LC_DYSYMTAB, and __LINKEDIT segment ---- + for _ in 0..ncmds { + let lc = &*(cmd_ptr as *const libc::load_command); + + match lc.cmd { + LC_SYMTAB => { + symtab_cmd = cmd_ptr as *const SymtabCommand; + } + LC_DYSYMTAB => { + dysymtab_cmd = cmd_ptr as *const DysymtabCommand; + } + LC_SEGMENT_64 => { + let seg = &*(cmd_ptr as *const libc::segment_command_64); + let segname = seg_name(seg); + if segname == "__LINKEDIT" { + // Compute the base address for __LINKEDIT data at runtime. + // File offsets in LC_SYMTAB/LC_DYSYMTAB are relative to the start of the + // file. At runtime, __LINKEDIT is mapped at (vmaddr + slide). By subtracting + // the file offset of __LINKEDIT itself, we get a base we can add any file + // offset to in order to get a valid runtime pointer. + linkedit_base = + (slide as usize).wrapping_add(seg.vmaddr as usize) - seg.fileoff as usize; + linkedit_found = true; + } + } + _ => {} + } + + cmd_ptr = cmd_ptr.add(lc.cmdsize as usize); + } + + if symtab_cmd.is_null() || dysymtab_cmd.is_null() || !linkedit_found { + trace!( + "Failed to locate required Mach-O sections (LC_SYMTAB: {}, LC_DYSYMTAB: {}, __LINKEDIT: {})", + !symtab_cmd.is_null(), + !dysymtab_cmd.is_null(), + linkedit_found, + ); + return false; + } + + // Convert file offsets from LC_SYMTAB and LC_DYSYMTAB into runtime pointers using + // linkedit_base (see diagram in the module-level documentation). + let symtab = (linkedit_base + (*symtab_cmd).symoff as usize) as *const Nlist64; + let strtab = (linkedit_base + (*symtab_cmd).stroff as usize) as *const c_char; + let indirect_symtab = + (linkedit_base + (*dysymtab_cmd).indirectsymoff as usize) as *const u32; + + // ---- Pass 2: Find symbol pointer sections in __DATA / __DATA_CONST and patch them ---- + cmd_ptr = (header as *const u8).add(std::mem::size_of::()); + let mut hooked = false; + + for _ in 0..ncmds { + let lc = &*(cmd_ptr as *const libc::load_command); + + if lc.cmd == LC_SEGMENT_64 { + let seg = &*(cmd_ptr as *const libc::segment_command_64); + let segname = seg_name(seg); + + // Symbol pointers live in __DATA (writable) or __DATA_CONST (read-only after + // dyld finishes binding). Other segments (__TEXT, __LINKEDIT) don't contain + // symbol pointers we can patch. + if segname == "__DATA" || segname == "__DATA_CONST" { + // Sections are stored immediately after the segment_command_64 struct + let sections_ptr = cmd_ptr.add(std::mem::size_of::()) + as *const Section64; + + for j in 0..seg.nsects { + let section = &*sections_ptr.add(j as usize); + let section_type = section.flags & 0xFF; // Lower 8 bits = section type + + // Only process symbol pointer sections — skip other sections like + // __got, __mod_init_func, __const, etc. + if section_type != S_LAZY_SYMBOL_POINTERS + && section_type != S_NON_LAZY_SYMBOL_POINTERS + { + continue; + } + + if rebind_symbols_in_section( + section, + slide, + symtab, + strtab, + indirect_symtab, + overwrites, + segname == "__DATA_CONST", + ) { + hooked = true; + } + } + } + } + + cmd_ptr = cmd_ptr.add(lc.cmdsize as usize); + } + + hooked +} + +/// Rebind symbols in a single `__la_symbol_ptr` or `__nl_symbol_ptr` section. +/// +/// For each pointer slot in the section: +/// 1. Look up the corresponding entry in the indirect symbol table (at offset `section.reserved1 +/// + i`) to get a symbol table index. +/// 2. Use that index into the symbol table (nlist64 array) to find the symbol's string table +/// offset (`n_strx`). +/// 3. Read the symbol name from the string table. Mach-O symbols have a leading underscore +/// (e.g. `_recv` for C's `recv()`), which we strip for comparison. +/// 4. If the name matches one of our overwrites, save the current pointer as the "original" +/// function and replace it with our instrumented version. +unsafe fn rebind_symbols_in_section( + section: &Section64, + slide: isize, + symtab: *const Nlist64, + strtab: *const c_char, + indirect_symtab: *const u32, + overwrites: &mut Vec, + is_data_const: bool, +) -> bool { + // Each slot in the section is one pointer (8 bytes on 64-bit). The number of slots is + // the section size divided by pointer size. + let num_indirect_syms = section.size as usize / std::mem::size_of::<*mut c_void>(); + + // The indirect symbol table entries for this section start at index `section.reserved1`. + // Entry `indirect_sym_indices[i]` tells us which symbol table entry corresponds to slot `i`. + let indirect_sym_indices = indirect_symtab.add(section.reserved1 as usize); + + // The actual pointer slots in memory (adjusted by ASLR slide). + let symbol_ptrs = + ((slide as usize).wrapping_add(section.addr as usize)) as *mut *mut c_void; + + let page_size = libc::sysconf(libc::_SC_PAGESIZE) as usize; + let mut hooked = false; + + for i in 0..num_indirect_syms { + // Step 1: Get the symbol table index from the indirect symbol table + let symtab_index = *indirect_sym_indices.add(i); + + // Skip special entries that don't refer to real external symbols + if symtab_index == INDIRECT_SYMBOL_LOCAL + || symtab_index == INDIRECT_SYMBOL_ABS + || symtab_index == (INDIRECT_SYMBOL_LOCAL | INDIRECT_SYMBOL_ABS) + { + continue; + } + + // Step 2: Look up the symbol in the symbol table to get its name + let nlist = &*symtab.add(symtab_index as usize); + let name_ptr = strtab.add(nlist.n_strx as usize); + let name = match CStr::from_ptr(name_ptr).to_str() { + Ok(n) => n, + Err(_) => continue, + }; + + // Step 3: Strip the Mach-O leading underscore (e.g. "_recv" → "recv") so we can + // compare against our overwrite list which uses plain C names. + let name = name.strip_prefix('_').unwrap_or(name); + + // Step 4: Check if this symbol is one we want to hook + for overwrite in overwrites.iter_mut() { + if name != overwrite.symbol_name { + continue; + } + + let slot = symbol_ptrs.add(i); + + // __DATA_CONST is read-only at this point (dyld marks it read-only after initial + // binding). We need to temporarily make the page writable using vm_protect with + // VM_PROT_COPY, which creates a copy-on-write mapping so we only affect our + // process, not shared pages. + if is_data_const { + let page_start = (slot as usize) & !(page_size - 1); + let result = vm_protect( + mach_task_self(), + page_start as libc::mach_vm_address_t, + page_size as libc::mach_vm_size_t, + 0, + libc::VM_PROT_READ | libc::VM_PROT_WRITE | VM_PROT_COPY, + ); + if result != 0 { + trace!("vm_protect failed for {name}: kern_return {result}"); + continue; + } + } + + trace!( + "Overriding symbol pointer for {} at {:p} pointing to {:p} (orig function at {:p})", + overwrite.symbol_name, + slot, + *slot, + *overwrite.orig_func, + ); + + // Save the original function pointer from the slot before we overwrite it. + // This is written on every matching image (last writer wins), but that's fine: + // by first RINIT all lazy bindings for common libc functions are resolved, so + // every image's slot points to the same canonical address in libSystem. + *overwrite.orig_func = *slot as *mut (); + *slot = overwrite.new_func as *mut c_void; + hooked = true; + + // Restore read-only protection for __DATA_CONST + if is_data_const { + let page_start = (slot as usize) & !(page_size - 1); + vm_protect( + mach_task_self(), + page_start as libc::mach_vm_address_t, + page_size as libc::mach_vm_size_t, + 0, + libc::VM_PROT_READ, + ); + } + + // Each slot can only match one overwrite, so stop searching + break; + } + } + + hooked +} + +/// Extract the segment name from a `segment_command_64` as a `&str`. +/// +/// Segment names are 16-byte fixed arrays (e.g. `__DATA\0\0\0\0\0\0\0\0\0\0`), so we find the +/// first null byte and return only the valid portion. +fn seg_name(seg: &libc::segment_command_64) -> &str { + let bytes = &seg.segname; + let len = bytes.iter().position(|&b| b == 0).unwrap_or(bytes.len()); + // SAFETY: segment names are always ASCII; cast from &[i8] to &[u8] is safe + // because i8 and u8 have the same size and alignment. + let bytes: &[u8] = + unsafe { std::slice::from_raw_parts(bytes.as_ptr() as *const u8, len) }; + std::str::from_utf8(bytes).unwrap_or("") +} diff --git a/profiling/src/io/mod.rs b/profiling/src/io/mod.rs index 625e08903c6..e63f098b587 100644 --- a/profiling/src/io/mod.rs +++ b/profiling/src/io/mod.rs @@ -1,9 +1,11 @@ -pub mod got; +#[cfg(target_os = "linux")] +pub mod got_elf64; +#[cfg(target_os = "macos")] +pub mod got_macho; use crate::profiling::Profiler; use crate::{zend, RefCellExt, REQUEST_LOCALS}; use ahash::{HashMap, HashMapExt}; -use got::GotSymbolOverwrite; use libc::{c_int, c_void, fstat, stat, S_IFMT, S_IFSOCK}; use rand::rngs::ThreadRng; use rand_distr::{Distribution, Poisson}; @@ -30,12 +32,15 @@ struct ErrnoBackup { impl ErrnoBackup { /// Snapshots the current `errno` value. #[inline] - fn new() -> Self { - // SAFETY: errno location is initialized in program/thread startup. - let location = unsafe { libc::__errno_location() }; - // SAFETY: errno pointer is safe to read from same thread. - let errno = unsafe { location.read() }; - Self { errno, location } + unsafe fn new() -> Self { + #[cfg(target_os = "linux")] + let location = libc::__errno_location(); + #[cfg(target_os = "macos")] + let location = libc::__error(); + Self { + errno: *location, + location, + } } } @@ -48,16 +53,24 @@ impl Drop for ErrnoBackup { } } -static mut ORIG_POLL: unsafe extern "C" fn(*mut libc::pollfd, u64, c_int) -> i32 = libc::poll; +pub struct GotSymbolOverwrite { + pub symbol_name: &'static str, + pub new_func: *mut (), + pub orig_func: *mut *mut (), +} + +static mut ORIG_POLL: unsafe extern "C" fn(*mut libc::pollfd, libc::nfds_t, c_int) -> i32 = + libc::poll; + /// The `poll()` libc call has only every been observed when reading/writing to/from a socket, -/// never when reading/writing to a file. There is two known cases in PHP: +/// never when reading/writing to a file. There are two known cases in PHP: /// - the PHP stream layer (e.g. `file_get_contents("proto://url")`) /// - the curl extension in `curl_exec()`/`curl_multi_exec()` /// /// The `nfds` argument is usually 1, in case of a `curl_multi_exec()` call it is >= 1 and exactly /// the number of concurrent requests. In rare cases the `nfds` argument is 0 and fds a /// NULL-pointer. This is basically and "old trick" to ms precision sleep() and currently ignored. -unsafe extern "C" fn observed_poll(fds: *mut libc::pollfd, nfds: u64, timeout: c_int) -> i32 { +unsafe extern "C" fn observed_poll(fds: *mut libc::pollfd, nfds: libc::nfds_t, timeout: c_int) -> i32 { let start = Instant::now(); let ret = ORIG_POLL(fds, nfds, timeout); let _errno_backup = ErrnoBackup::new(); @@ -692,10 +705,14 @@ pub fn io_prof_first_rinit() { orig_func: ptr::addr_of_mut!(ORIG_POLL) as *mut _ as *mut *mut (), }, ]; + #[cfg(target_os = "linux")] libc::dl_iterate_phdr( - Some(got::callback), + Some(got_elf64::callback), &mut overwrites as *mut _ as *mut libc::c_void, ); + + #[cfg(target_os = "macos")] + got_macho::rebind_symbols(&mut overwrites); }; } } diff --git a/profiling/src/lib.rs b/profiling/src/lib.rs index f65d42de8ca..4f64daafb98 100644 --- a/profiling/src/lib.rs +++ b/profiling/src/lib.rs @@ -15,7 +15,7 @@ mod string_set; #[macro_use] mod allocation; -#[cfg(all(feature = "io_profiling", target_os = "linux"))] +#[cfg(all(feature = "io_profiling", any(target_os = "linux", target_os = "macos")))] mod io; mod exception; @@ -673,7 +673,7 @@ extern "C" fn rinit(_type: c_int, _module_number: c_int) -> ZendResult { exception::exception_profiling_first_rinit(); - #[cfg(all(feature = "io_profiling", target_os = "linux"))] + #[cfg(all(feature = "io_profiling", any(target_os = "linux", target_os = "macos")))] io::io_prof_first_rinit(); allocation::first_rinit(system_settings); diff --git a/profiling/src/profiling/mod.rs b/profiling/src/profiling/mod.rs index ab291cad6a7..e63fa4a585f 100644 --- a/profiling/src/profiling/mod.rs +++ b/profiling/src/profiling/mod.rs @@ -46,7 +46,7 @@ use std::sync::{Arc, Barrier, OnceLock}; use std::thread::JoinHandle; use std::time::{Duration, Instant, SystemTime, UNIX_EPOCH}; -#[cfg(all(target_os = "linux", feature = "io_profiling"))] +#[cfg(all(any(target_os = "linux", target_os = "macos"), feature = "io_profiling"))] use crate::io::{ FILE_READ_SIZE_PROFILING_INTERVAL, FILE_READ_TIME_PROFILING_INTERVAL, FILE_WRITE_SIZE_PROFILING_INTERVAL, FILE_WRITE_TIME_PROFILING_INTERVAL, @@ -332,7 +332,7 @@ impl TimeCollector { (get_offset("alloc-samples"), get_offset("alloc-size")); // check if we have the IO sample types - #[cfg(all(target_os = "linux", feature = "io_profiling"))] + #[cfg(all(any(target_os = "linux", target_os = "macos"), feature = "io_profiling"))] let ( socket_read_time_offset, socket_read_time_samples_offset, @@ -400,7 +400,7 @@ impl TimeCollector { } } - #[cfg(all(target_os = "linux", feature = "io_profiling"))] + #[cfg(all(any(target_os = "linux", target_os = "macos"), feature = "io_profiling"))] { let add_io_upscaling_rule = |profile: &mut InternalProfile, @@ -1404,7 +1404,7 @@ impl Profiler { } } - #[cfg(all(feature = "io_profiling", target_os = "linux"))] + #[cfg(all(feature = "io_profiling", any(target_os = "linux", target_os = "macos")))] pub fn collect_socket_read_time(&self, ed: *mut zend_execute_data, socket_io_read_time: i64) { self.collect_io(ed, |vals| { vals.socket_read_time = socket_io_read_time; @@ -1412,7 +1412,7 @@ impl Profiler { }) } - #[cfg(all(feature = "io_profiling", target_os = "linux"))] + #[cfg(all(feature = "io_profiling", any(target_os = "linux", target_os = "macos")))] pub fn collect_socket_write_time(&self, ed: *mut zend_execute_data, socket_io_write_time: i64) { self.collect_io(ed, |vals| { vals.socket_write_time = socket_io_write_time; @@ -1420,7 +1420,7 @@ impl Profiler { }) } - #[cfg(all(feature = "io_profiling", target_os = "linux"))] + #[cfg(all(feature = "io_profiling", any(target_os = "linux", target_os = "macos")))] pub fn collect_file_read_time(&self, ed: *mut zend_execute_data, file_io_read_time: i64) { self.collect_io(ed, |vals| { vals.file_read_time = file_io_read_time; @@ -1428,7 +1428,7 @@ impl Profiler { }) } - #[cfg(all(feature = "io_profiling", target_os = "linux"))] + #[cfg(all(feature = "io_profiling", any(target_os = "linux", target_os = "macos")))] pub fn collect_file_write_time(&self, ed: *mut zend_execute_data, file_io_write_time: i64) { self.collect_io(ed, |vals| { vals.file_write_time = file_io_write_time; @@ -1436,7 +1436,7 @@ impl Profiler { }) } - #[cfg(all(feature = "io_profiling", target_os = "linux"))] + #[cfg(all(feature = "io_profiling", any(target_os = "linux", target_os = "macos")))] pub fn collect_socket_read_size(&self, ed: *mut zend_execute_data, socket_io_read_size: i64) { self.collect_io(ed, |vals| { vals.socket_read_size = socket_io_read_size; @@ -1444,7 +1444,7 @@ impl Profiler { }) } - #[cfg(all(feature = "io_profiling", target_os = "linux"))] + #[cfg(all(feature = "io_profiling", any(target_os = "linux", target_os = "macos")))] pub fn collect_socket_write_size(&self, ed: *mut zend_execute_data, socket_io_write_size: i64) { self.collect_io(ed, |vals| { vals.socket_write_size = socket_io_write_size; @@ -1452,7 +1452,7 @@ impl Profiler { }) } - #[cfg(all(feature = "io_profiling", target_os = "linux"))] + #[cfg(all(feature = "io_profiling", any(target_os = "linux", target_os = "macos")))] pub fn collect_file_read_size(&self, ed: *mut zend_execute_data, file_io_read_size: i64) { self.collect_io(ed, |vals| { vals.file_read_size = file_io_read_size; @@ -1460,7 +1460,7 @@ impl Profiler { }) } - #[cfg(all(feature = "io_profiling", target_os = "linux"))] + #[cfg(all(feature = "io_profiling", any(target_os = "linux", target_os = "macos")))] pub fn collect_file_write_size(&self, ed: *mut zend_execute_data, file_io_write_size: i64) { self.collect_io(ed, |vals| { vals.file_write_size = file_io_write_size; @@ -1468,7 +1468,7 @@ impl Profiler { }) } - #[cfg(all(feature = "io_profiling", target_os = "linux"))] + #[cfg(all(feature = "io_profiling", any(target_os = "linux", target_os = "macos")))] pub fn collect_io(&self, execute_data: *mut zend_execute_data, set_value: F) where F: FnOnce(&mut SampleValues), From 3252a71e0c0443cc21057363fd1879a92febce20 Mon Sep 17 00:00:00 2001 From: Florian Engelhardt Date: Mon, 2 Mar 2026 10:26:25 +0100 Subject: [PATCH 2/2] use `mach2` crate instead of depracated Mach-O types from `libc` crate --- Cargo.lock | 12 +++++++++++- profiling/Cargo.toml | 3 +++ profiling/src/io/got_macho.rs | 35 +++++++++++++++++++++++++++-------- 3 files changed, 41 insertions(+), 9 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 7fa7a3447f6..6b9ba0c9879 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1332,6 +1332,7 @@ dependencies = [ "libdd-library-config-ffi", "libdd-profiling", "log 0.4.25", + "mach2", "perfcnt", "rand 0.8.5", "rand_distr", @@ -3270,6 +3271,15 @@ version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "112b39cec0b298b6c1999fee3e31427f74f676e4cb9879ed1a121b43661a4154" +[[package]] +name = "mach2" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a1b95cd5421ec55b445b5ae102f5ea0e768de1f82bd3001e11f426c269c3aea" +dependencies = [ + "libc 0.2.177", +] + [[package]] name = "manual_future" version = "0.1.1" @@ -4203,7 +4213,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "059a34f111a9dee2ce1ac2826a68b24601c4298cfeb1a587c3cb493d5ab46f52" dependencies = [ "libc 0.2.177", - "nix 0.29.0", + "nix 0.30.1", ] [[package]] diff --git a/profiling/Cargo.toml b/profiling/Cargo.toml index eb431378862..e41cb405526 100644 --- a/profiling/Cargo.toml +++ b/profiling/Cargo.toml @@ -40,6 +40,9 @@ thiserror = "2" tracing = { version = "0.1", optional = true } uuid = { version = "1.0", features = ["v4"] } +[target.'cfg(target_vendor = "apple")'.dependencies] +mach2 = "0.5" # version 0.6 requires edition="2024" + [dependencies.tracing-subscriber] version = "0.3" optional = true diff --git a/profiling/src/io/got_macho.rs b/profiling/src/io/got_macho.rs index e14137c1ae0..4d953d42740 100644 --- a/profiling/src/io/got_macho.rs +++ b/profiling/src/io/got_macho.rs @@ -6,7 +6,7 @@ // // ## Mach-O layout (the parts we care about) // -// mach_header_64 +// mach_header // ├─ LC_SEGMENT_64 __TEXT, __DATA, __DATA_CONST, __LINKEDIT // ├─ LC_SYMTAB → symbol table + string table (in __LINKEDIT) // └─ LC_DYSYMTAB → indirect symbol table (in __LINKEDIT) @@ -51,11 +51,12 @@ // The libc crate deprecates Mach-O types in favor of the mach2 crate, but we only use a few // types and don't want to add a dependency just for that. -#![allow(deprecated)] +// #![allow(deprecated)] use super::GotSymbolOverwrite; use libc::{c_char, c_void}; use log::{error, trace}; +use mach2::loader; use std::ffi::CStr; // ----- Mach-O load command types ----- @@ -152,6 +153,24 @@ struct Nlist64 { n_value: u64, } +/// Corresponds to `struct mach_header_64` in . +/// Used only for computing the correct load-command offset on 64-bit images. +/// We intentionally define this locally: +/// - `libc::mach_header_64` has the right layout but is deprecated. +/// - `mach2::loader::mach_header` is missing the 64-bit `reserved` field (28 bytes vs 32), +/// so using it for `size_of`/offset calculations would misalign load-command parsing. +#[repr(C)] +struct MachHeader64 { + magic: u32, + cputype: libc::cpu_type_t, + cpusubtype: libc::cpu_subtype_t, + filetype: u32, + ncmds: u32, + sizeofcmds: u32, + flags: u32, + reserved: u32, +} + /// Corresponds to `struct section_64` in . /// Describes a section within a segment (e.g. `__la_symbol_ptr` within `__DATA`). #[repr(C)] @@ -183,8 +202,8 @@ struct Section64 { extern "C" { /// Returns the number of currently loaded Mach-O images. fn _dyld_image_count() -> u32; - /// Returns a pointer to the mach_header_64 for the image at the given index. - fn _dyld_get_image_header(image_index: u32) -> *const libc::mach_header_64; + /// Returns a pointer to the mach_header for the image at the given index. + fn _dyld_get_image_header(image_index: u32) -> *const loader::mach_header; /// Returns the ASLR slide for the image at the given index. fn _dyld_get_image_vmaddr_slide(image_index: u32) -> isize; /// Returns the file path of the image at the given index. @@ -270,7 +289,7 @@ pub unsafe fn rebind_symbols(overwrites: &mut Vec) { /// **Pass 2**: Walk `__DATA` and `__DATA_CONST` segments, find symbol pointer sections /// (`__la_symbol_ptr` / `__nl_symbol_ptr`), and patch matching entries. unsafe fn rebind_symbols_for_image( - header: *const libc::mach_header_64, + header: *const loader::mach_header, slide: isize, overwrites: &mut Vec, ) -> bool { @@ -281,7 +300,7 @@ unsafe fn rebind_symbols_for_image( // Load commands are stored sequentially right after the mach_header_64. Each command has // a `cmd` type and `cmdsize` telling us how many bytes to skip to reach the next command. - let mut cmd_ptr = (header as *const u8).add(std::mem::size_of::()); + let mut cmd_ptr = (header as *const u8).add(std::mem::size_of::()); let ncmds = (*header).ncmds; let mut symtab_cmd: *const SymtabCommand = std::ptr::null(); @@ -338,7 +357,7 @@ unsafe fn rebind_symbols_for_image( (linkedit_base + (*dysymtab_cmd).indirectsymoff as usize) as *const u32; // ---- Pass 2: Find symbol pointer sections in __DATA / __DATA_CONST and patch them ---- - cmd_ptr = (header as *const u8).add(std::mem::size_of::()); + cmd_ptr = (header as *const u8).add(std::mem::size_of::()); let mut hooked = false; for _ in 0..ncmds { @@ -406,7 +425,7 @@ unsafe fn rebind_symbols_in_section( symtab: *const Nlist64, strtab: *const c_char, indirect_symtab: *const u32, - overwrites: &mut Vec, + overwrites: &mut [GotSymbolOverwrite], is_data_const: bool, ) -> bool { // Each slot in the section is one pointer (8 bytes on 64-bit). The number of slots is