diff --git a/Cargo.lock b/Cargo.lock index c270d3c3db..73f686164a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1199,6 +1199,20 @@ dependencies = [ "syn 2.0.96", ] +[[package]] +name = "dashmap" +version = "6.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5041cc499144891f3790297212f32a74fb938e5136a14943f338ef9e0ae276cf" +dependencies = [ + "cfg-if", + "crossbeam-utils", + "hashbrown 0.14.5", + "lock_api", + "once_cell", + "parking_lot_core", +] + [[package]] name = "datadog-ipc" version = "0.1.0" @@ -1292,6 +1306,7 @@ dependencies = [ "criterion", "criterion-perf-events", "crossbeam-channel", + "dashmap", "datadog-php-profiling", "env_logger 0.11.6", "http", diff --git a/profiling/Cargo.toml b/profiling/Cargo.toml index b1869fd063..ae9369dd8b 100644 --- a/profiling/Cargo.toml +++ b/profiling/Cargo.toml @@ -22,6 +22,7 @@ cfg-if = { version = "1.0" } cpu-time = { version = "1.0" } chrono = { version = "0.4" } crossbeam-channel = { version = "0.5", default-features = false, features = ["std"] } +dashmap = { version = "6.1" } http = { version = "1.4" } libdd-alloc = { git = "https://github.com/DataDog/libdatadog", tag = "v27.0.0" } libdd-profiling = { git = "https://github.com/DataDog/libdatadog", tag = "v27.0.0" } diff --git a/profiling/src/allocation/allocation_ge84.rs b/profiling/src/allocation/allocation_ge84.rs index b73fae8627..e290649440 100644 --- a/profiling/src/allocation/allocation_ge84.rs +++ b/profiling/src/allocation/allocation_ge84.rs @@ -1,4 +1,4 @@ -use crate::allocation::{allocation_profiling_stats_should_collect, collect_allocation}; +use crate::allocation::{allocation_profiling_stats_should_collect, collect_allocation, free_allocation}; use crate::bindings as zend; use crate::PROFILER_NAME; use core::ptr; @@ -286,7 +286,7 @@ unsafe extern "C" fn alloc_prof_malloc(len: size_t) -> *mut c_void { } if allocation_profiling_stats_should_collect(len) { - collect_allocation(len); + collect_allocation(ptr, len); } ptr @@ -316,6 +316,11 @@ unsafe fn alloc_prof_orig_alloc(len: size_t) -> *mut c_void { /// custom handlers won't be installed. We cannot just point to the original /// `zend::_zend_mm_free()` as the function definitions differ. unsafe extern "C" fn alloc_prof_free(ptr: *mut c_void) { + // Check if this was a tracked allocation (before freeing!) + if !ptr.is_null() { + free_allocation(ptr); + } + tls_zend_mm_state_get!(free)(ptr); } @@ -348,12 +353,21 @@ unsafe extern "C" fn alloc_prof_realloc(prev_ptr: *mut c_void, len: size_t) -> * // during startup, minit, rinit, ... current_execute_data is null // we are only interested in allocations during userland operations - if zend::ddog_php_prof_get_current_execute_data().is_null() || ptr::eq(ptr, prev_ptr) { + if zend::ddog_php_prof_get_current_execute_data().is_null() { return ptr; } - if allocation_profiling_stats_should_collect(len) { - collect_allocation(len); + // If pointer changed, treat as free(old) + alloc(new) + if !ptr::eq(ptr, prev_ptr) { + // Untrack the old allocation if it was tracked + if !prev_ptr.is_null() { + free_allocation(prev_ptr); + } + + // Sample the new allocation + if allocation_profiling_stats_should_collect(len) { + collect_allocation(ptr, len); + } } ptr diff --git a/profiling/src/allocation/allocation_le83.rs b/profiling/src/allocation/allocation_le83.rs index 9f13b0a42d..8739ddb538 100644 --- a/profiling/src/allocation/allocation_le83.rs +++ b/profiling/src/allocation/allocation_le83.rs @@ -1,4 +1,4 @@ -use crate::allocation::{allocation_profiling_stats_should_collect, collect_allocation}; +use crate::allocation::{allocation_profiling_stats_should_collect, collect_allocation, free_allocation}; use crate::bindings::{ self as zend, datadog_php_install_handler, datadog_php_zif_handler, ddog_php_prof_copy_long_into_zval, @@ -300,7 +300,7 @@ unsafe extern "C" fn alloc_prof_malloc(len: size_t) -> *mut c_void { } if allocation_profiling_stats_should_collect(len) { - collect_allocation(len); + collect_allocation(ptr, len); } ptr @@ -330,6 +330,11 @@ unsafe fn alloc_prof_orig_alloc(len: size_t) -> *mut c_void { /// custom handlers won't be installed. We cannot just point to the original /// `zend::_zend_mm_free()` as the function definitions differ. unsafe extern "C" fn alloc_prof_free(ptr: *mut c_void) { + // Check if this was a tracked allocation (before freeing!) + if !ptr.is_null() { + free_allocation(ptr); + } + tls_zend_mm_state_get!(free)(ptr); } @@ -358,12 +363,21 @@ unsafe extern "C" fn alloc_prof_realloc(prev_ptr: *mut c_void, len: size_t) -> * // during startup, minit, rinit, ... current_execute_data is null // we are only interested in allocations during userland operations - if zend::ddog_php_prof_get_current_execute_data().is_null() || ptr::eq(ptr, prev_ptr) { + if zend::ddog_php_prof_get_current_execute_data().is_null() { return ptr; } - if allocation_profiling_stats_should_collect(len) { - collect_allocation(len); + // If pointer changed, treat as free(old) + alloc(new) + if !ptr::eq(ptr, prev_ptr) { + // Untrack the old allocation if it was tracked + if !prev_ptr.is_null() { + free_allocation(prev_ptr); + } + + // Sample the new allocation + if allocation_profiling_stats_should_collect(len) { + collect_allocation(ptr, len); + } } ptr diff --git a/profiling/src/allocation/mod.rs b/profiling/src/allocation/mod.rs index 5dd94e2cb4..6c508b5d2f 100644 --- a/profiling/src/allocation/mod.rs +++ b/profiling/src/allocation/mod.rs @@ -135,8 +135,13 @@ impl AllocationProfilingStats { } } +/// Collect an allocation sample and optionally track it for live heap profiling. +/// +/// # Arguments +/// * `ptr` - The pointer returned by the allocator (used for live heap tracking) +/// * `len` - The size of the allocation in bytes #[cold] -pub fn collect_allocation(len: size_t) { +pub fn collect_allocation(ptr: *mut c_void, len: size_t) { if let Some(profiler) = Profiler::get() { // Check if there's a pending time interrupt that we can handle now // instead of waiting for an interrupt handler. This is slightly more @@ -150,6 +155,7 @@ pub fn collect_allocation(len: size_t) { unsafe { profiler.collect_allocations( zend::ddog_php_prof_get_current_execute_data(), + ptr, 1_i64, len as i64, (interrupt_count > 0).then_some(interrupt_count), @@ -158,6 +164,14 @@ pub fn collect_allocation(len: size_t) { } } +/// Called when memory is freed. If this pointer was tracked for live heap, +/// sends the deallocation sample to cancel out the original allocation. +pub fn free_allocation(ptr: *mut c_void) { + if let Some(profiler) = Profiler::get() { + profiler.free_allocation(ptr); + } +} + #[cfg(not(php_zend_mm_set_custom_handlers_ex))] pub fn alloc_prof_startup() { allocation_le83::alloc_prof_startup(); diff --git a/profiling/src/config.rs b/profiling/src/config.rs index 3ddbb4abdc..a71079e1fc 100644 --- a/profiling/src/config.rs +++ b/profiling/src/config.rs @@ -45,6 +45,7 @@ pub struct SystemSettings { pub profiling_experimental_cpu_time_enabled: bool, pub profiling_allocation_enabled: bool, pub profiling_allocation_sampling_distance: NonZeroU32, + pub profiling_heap_live_enabled: bool, pub profiling_timeline_enabled: bool, pub profiling_exception_enabled: bool, pub profiling_exception_message_enabled: bool, @@ -69,6 +70,7 @@ impl SystemSettings { profiling_experimental_cpu_time_enabled: false, profiling_allocation_enabled: false, profiling_allocation_sampling_distance: NonZeroU32::MAX, + profiling_heap_live_enabled: false, profiling_timeline_enabled: false, profiling_exception_enabled: false, profiling_exception_message_enabled: false, @@ -98,6 +100,7 @@ impl SystemSettings { profiling_experimental_cpu_time_enabled: profiling_experimental_cpu_time_enabled(), profiling_allocation_enabled: profiling_allocation_enabled(), profiling_allocation_sampling_distance: profiling_allocation_sampling_distance(), + profiling_heap_live_enabled: profiling_heap_live_enabled(), profiling_timeline_enabled: profiling_timeline_enabled(), profiling_exception_enabled: profiling_exception_enabled(), profiling_exception_message_enabled: profiling_exception_message_enabled(), @@ -405,6 +408,7 @@ pub(crate) enum ConfigId { ProfilingExperimentalCpuTimeEnabled, ProfilingAllocationEnabled, ProfilingAllocationSamplingDistance, + ProfilingHeapLiveEnabled, ProfilingTimelineEnabled, ProfilingExceptionEnabled, ProfilingExceptionMessageEnabled, @@ -437,6 +441,7 @@ impl ConfigId { ProfilingExperimentalCpuTimeEnabled => b"DD_PROFILING_EXPERIMENTAL_CPU_TIME_ENABLED\0", ProfilingAllocationEnabled => b"DD_PROFILING_ALLOCATION_ENABLED\0", ProfilingAllocationSamplingDistance => b"DD_PROFILING_ALLOCATION_SAMPLING_DISTANCE\0", + ProfilingHeapLiveEnabled => b"DD_PROFILING_HEAP_LIVE_ENABLED\0", ProfilingTimelineEnabled => b"DD_PROFILING_TIMELINE_ENABLED\0", ProfilingExceptionEnabled => b"DD_PROFILING_EXCEPTION_ENABLED\0", ProfilingExceptionMessageEnabled => b"DD_PROFILING_EXCEPTION_MESSAGE_ENABLED\0", @@ -475,6 +480,7 @@ static DEFAULT_SYSTEM_SETTINGS: SystemSettings = SystemSettings { profiling_allocation_enabled: true, // SAFETY: value is > 0. profiling_allocation_sampling_distance: unsafe { NonZeroU32::new_unchecked(1024 * 4096) }, + profiling_heap_live_enabled: false, profiling_timeline_enabled: true, profiling_exception_enabled: true, profiling_exception_message_enabled: false, @@ -553,6 +559,17 @@ unsafe fn profiling_allocation_sampling_distance() -> NonZeroU32 { unsafe { NonZeroU32::new_unchecked(int) } } +/// # Safety +/// This function must only be called after config has been initialized in +/// rinit, and before it is uninitialized in mshutdown. +unsafe fn profiling_heap_live_enabled() -> bool { + profiling_allocation_enabled() + && get_system_bool( + ProfilingHeapLiveEnabled, + DEFAULT_SYSTEM_SETTINGS.profiling_heap_live_enabled, + ) +} + /// # Safety /// This function must only be called after config has been initialized in /// rinit, and before it is uninitialized in mshutdown. @@ -1014,6 +1031,18 @@ pub(crate) fn minit(module_number: libc::c_int) { displayer: None, env_config_fallback: None, }, + zai_config_entry { + id: transmute::(ProfilingHeapLiveEnabled), + name: ProfilingHeapLiveEnabled.env_var_name(), + type_: ZAI_CONFIG_TYPE_BOOL, + default_encoded_value: ZaiStr::literal(b"0\0"), + aliases: ptr::null_mut(), + aliases_count: 0, + ini_change: Some(zai_config_system_ini_change), + parser: None, + displayer: None, + env_config_fallback: None, + }, zai_config_entry { id: transmute::(ProfilingTimelineEnabled), name: ProfilingTimelineEnabled.env_var_name(), diff --git a/profiling/src/lib.rs b/profiling/src/lib.rs index 76049aafd9..aeb1093393 100644 --- a/profiling/src/lib.rs +++ b/profiling/src/lib.rs @@ -860,6 +860,19 @@ unsafe extern "C" fn minfo(module_ptr: *mut zend::ModuleEntry) { no_all } ); + zend::php_info_print_table_row( + 2, + c"Heap Live Profiling Enabled".as_ptr(), + if system_settings.profiling_heap_live_enabled { + yes + } else if !system_settings.profiling_allocation_enabled { + c"false (requires allocation profiling)".as_ptr() + } else if system_settings.profiling_enabled { + no + } else { + no_all + }, + ); zend::php_info_print_table_row( 2, c"Timeline Enabled".as_ptr(), diff --git a/profiling/src/profiling/backtrace.rs b/profiling/src/profiling/backtrace.rs index 9dfcc359d9..4897a31822 100644 --- a/profiling/src/profiling/backtrace.rs +++ b/profiling/src/profiling/backtrace.rs @@ -1,7 +1,7 @@ use crate::profiling::stack_walking::ZendFrame; use core::ops::Deref; -#[derive(Debug)] +#[derive(Clone, Debug)] pub struct Backtrace { frames: Vec, } diff --git a/profiling/src/profiling/mod.rs b/profiling/src/profiling/mod.rs index 4c7546d6e1..831c54926b 100644 --- a/profiling/src/profiling/mod.rs +++ b/profiling/src/profiling/mod.rs @@ -27,6 +27,7 @@ use core::mem::forget; use core::{ptr, str}; use cpu_time::ThreadTime; use crossbeam_channel::{Receiver, Sender, TrySendError}; +use dashmap::DashMap; use libdd_profiling::api::{ Function, Label as ApiLabel, Location, Period, Sample, SampleType as ApiSampleType, UpscalingInfo, ValueType as ApiValueType, @@ -36,7 +37,7 @@ use libdd_profiling::internal::Profile as InternalProfile; use log::{debug, info, trace, warn}; use std::borrow::Cow; use std::collections::HashMap; -use std::hash::Hash; +use std::hash::{BuildHasher, Hash, Hasher}; use std::num::NonZeroI64; use std::sync::atomic::{AtomicBool, AtomicPtr, AtomicU32, AtomicU64, Ordering}; use std::sync::{Arc, Barrier, OnceLock}; @@ -59,6 +60,49 @@ pub const NO_TIMESTAMP: i64 = 0; // magnitude for the capacity. const UPLOAD_CHANNEL_CAPACITY: usize = 8; +/// A fast, non-cryptographic hasher optimized for pointer addresses. +/// Since pointers are already well-distributed and typically aligned, +/// we use a simple bit mixing approach instead of expensive hashing. +#[derive(Default)] +struct PointerHasher(u64); + +impl Hasher for PointerHasher { + #[inline] + fn write(&mut self, _bytes: &[u8]) { + unreachable!("PointerHasher only supports write_usize"); + } + + #[inline] + fn write_usize(&mut self, ptr: usize) { + // Pointers are typically 8 or 16-byte aligned, so shift right to spread + // the entropy across the lower bits. XOR with shifted value for mixing. + let ptr = ptr as u64; + self.0 = ptr ^ (ptr >> 4); + } + + #[inline] + fn finish(&self) -> u64 { + self.0 + } +} + +/// BuildHasher that creates PointerHasher instances. +/// DashMap requires Clone for its internal sharding. +#[derive(Clone)] +struct PointerHasherBuilder; + +impl BuildHasher for PointerHasherBuilder { + type Hasher = PointerHasher; + + #[inline] + fn build_hasher(&self) -> Self::Hasher { + PointerHasher(0) + } +} + +/// Type alias for the heap tracker with our fast pointer hasher +type HeapTracker = DashMap; + /// The global profiler. Profiler gets made during the first rinit after an /// minit, and is destroyed on mshutdown. static mut PROFILER: OnceLock = OnceLock::new(); @@ -93,13 +137,15 @@ pub(crate) fn update_cpu_time_counter(last: &mut Option, counter: &A /// 1. Always enabled types. /// 2. On by default types. /// 3. Off by default types. -#[derive(Default, Debug)] +#[derive(Clone, Default, Debug)] pub struct SampleValues { interrupt_count: i64, wall_time: i64, cpu_time: i64, alloc_samples: i64, alloc_size: i64, + heap_live_samples: i64, + heap_live_size: i64, timeline: i64, exception: i64, socket_read_time: i64, @@ -228,6 +274,26 @@ pub enum ProfilerMessage { Wake, } +/// Tracked allocation for batched heap-live sample emission. +/// Unlike the .NET profiler which tracks CLR objects via weak handles, +/// we track raw allocation pointers. Samples are emitted in batches +/// at profile export time, not on each allocation/free. +#[derive(Clone, Debug)] +pub struct LiveHeapSample { + /// The profile index (sample_types + tags) for adding to correct profile + pub key: ProfileIndex, + /// The captured stack trace at allocation time + pub frames: Backtrace, + /// The labels at allocation time (thread id, span id, etc.) + pub labels: Vec