diff --git a/crates/perry-codegen/src/expr/class_field_inline_guard.rs b/crates/perry-codegen/src/expr/class_field_inline_guard.rs new file mode 100644 index 000000000..885ff31c6 --- /dev/null +++ b/crates/perry-codegen/src/expr/class_field_inline_guard.rs @@ -0,0 +1,176 @@ +//! #5093: codegen-inlined class-field shape guard. +//! +//! Monomorphic `this.field` reads/writes on a known class instance previously +//! routed every access through a cross-crate +//! `js_typed_feedback_class_field_{get,set}_guard` *call* before touching the +//! raw slot. Measurements in #5093 showed the call itself — not its body — was +//! the dominant cost on the `09_method_calls` benchmark (~290× Node). This +//! emits the cheap part of the guard's contract as inline IR: when the +//! monomorphic shape holds (and, for raw-f64 fields, the per-object typed-layout +//! intact bit is set), control branches straight to the fast slot load/store, +//! skipping the call. Because every operand is loaded from a loop-invariant +//! receiver, once the surrounding method is inlined (#5092) LLVM LICM can hoist +//! the whole shape check out of the hot loop, collapsing the body to a bare +//! `load`/`fadd`/`store`. +//! +//! The inline check is a strict subset of `class_field_fast_contract` (runtime +//! `typed_feedback/guards.rs`): if it passes, the guard call would have returned +//! "fast". On any miss it falls through to the unchanged guard-call path, so the +//! optimization is purely additive — it can never take the fast path the guard +//! would have rejected. The single per-object `GC_OBJ_TYPED_LAYOUT_INTACT` bit +//! (runtime `gc/layout.rs`) stands in for the thread-local raw-f64 layout probe: +//! it is set exactly when the object's canonical typed descriptor is installed +//! and cleared on any downgrade, so "intact bit set + class_id/keys match" ⟹ +//! "slot K is raw-f64" for any field the class declares as a raw-f64 candidate. + +use crate::types::{I1, I16, I32, I64, I8}; + +use super::FnCtx; + +// Mirror of the runtime constants the inline check reproduces. Kept as literal +// decimals because the emitted IR is textual. +const POINTER_TAG_HI16: &str = "32765"; // 0x7FFD — NaN-box tag for heap pointers +const HANDLE_BAND_TOP: &str = "1048575"; // 0x0FFFFF — handles are <= this; objects are above +const GC_TYPE_OBJECT: &str = "2"; +const GC_FLAG_FORWARDED_I8: &str = "-128"; // 0x80 as i8 +const OBJECT_TYPE_REGULAR: &str = "1"; +const TYPED_LAYOUT_INTACT_BIT: &str = "4096"; // GC_OBJ_TYPED_LAYOUT_INTACT (0x1000) +const OBJ_FLAG_FROZEN_BIT: &str = "1"; // OBJ_FLAG_FROZEN (0x01) +const F64_EXP_MASK: &str = "9218868437227405312"; // 0x7FF0_0000_0000_0000 + +/// Emit the inline class-field shape pre-check. +/// +/// Before calling, the caller must have already created `fast_label` (the slot +/// load/store block) and computed `obj_bits` (i64 bitcast of the receiver +/// NaN-box) and `obj_handle` (the low-48 masked pointer) in a block that +/// dominates everything that follows. On success the emitted IR branches to +/// `fast_label`; on any miss it branches to a freshly created "guardcall" block. +/// +/// Returns the guardcall block's label and leaves `ctx.current_block` set to it, +/// so the caller emits the unchanged `js_typed_feedback_class_field_*_guard` +/// call path next. +/// +/// `set_value_bits` is `Some(bits)` only for the property-set raw-f64 path: it +/// adds the not-frozen and plain-finite-number checks the set fast contract +/// requires (a non-number must downgrade through the boxed setter, never a raw +/// store). +#[allow(clippy::too_many_arguments)] +pub(crate) fn emit_class_field_inline_precheck( + ctx: &mut FnCtx, + obj_bits: &str, + obj_handle: &str, + expected_class_id: &str, + expected_keys: &str, + field_index: u32, + require_raw_f64: bool, + set_value_bits: Option<&str>, + fast_label: &str, +) -> String { + let deref_idx = ctx.new_block("class_field_inline.deref"); + let guardcall_idx = ctx.new_block("class_field_inline.guardcall"); + let deref_label = ctx.block_label(deref_idx); + let guardcall_label = ctx.block_label(guardcall_idx); + let field_index_str = field_index.to_string(); + + // Gate the dereference: a basic block has no short-circuit, so the field + // loads below must only run once we know (a) the inline path is enabled and + // (b) the receiver is a real heap object (POINTER_TAG and above the handle + // band). Otherwise fall to the guard call, which classifies non-pointer / + // handle receivers safely and (under PERRY_VERIFY_TYPED_INTACT) runs the + // intact-bit verifier. + // + // The enable flag is checked *first* so the escape hatch + // (PERRY_DISABLE_CLASS_FIELD_INLINE) and verify mode cleanly bypass the + // inline reads entirely. It is a `volatile` load: the runtime flips it + // (sticky 0 -> 1) the moment descriptors / typed-feedback come into use, so + // LLVM must not hoist a stale 0 across a mid-execution flip — matching the + // relaxed-atomic read the guard itself performs. + { + let blk = ctx.block(); + let flag = blk.load_volatile(I8, "@PERRY_CLASS_FIELD_INLINE_GUARD_DISABLED"); + let flag_ok = blk.icmp_eq(I8, &flag, "0"); + let tag = blk.lshr(I64, obj_bits, "48"); + let is_ptr = blk.icmp_eq(I64, &tag, POINTER_TAG_HI16); + let above_band = blk.icmp_ugt(I64, obj_handle, HANDLE_BAND_TOP); + let ptr_safe = blk.and(I1, &is_ptr, &above_band); + let can_inline = blk.and(I1, &ptr_safe, &flag_ok); + blk.cond_br(&can_inline, &deref_label, &guardcall_label); + } + + ctx.current_block = deref_idx; + { + let blk = ctx.block(); + let obj_ptr = blk.inttoptr(I64, obj_handle); + + // GcHeader (precedes the object by 8 bytes): obj_type @-8 (i8), + // gc_flags @-7 (i8), _reserved @-6 (i16). + let gtype_ptr = blk.gep(I8, &obj_ptr, &[(I64, "-8")]); + let gtype = blk.load(I8, >ype_ptr); + let gtype_ok = blk.icmp_eq(I8, >ype, GC_TYPE_OBJECT); + + let gflags_ptr = blk.gep(I8, &obj_ptr, &[(I64, "-7")]); + let gflags = blk.load(I8, &gflags_ptr); + let fwd = blk.and(I8, &gflags, GC_FLAG_FORWARDED_I8); + let not_fwd = blk.icmp_eq(I8, &fwd, "0"); + + let res_ptr = blk.gep(I8, &obj_ptr, &[(I64, "-6")]); + let reserved = blk.load(I16, &res_ptr); + + // ObjectHeader: object_type @0 (i32)==REGULAR, class_id @4 (i32), + // field_count @12 (i32), keys_array @16 (i64). + let object_type = blk.load(I32, &obj_ptr); + let ot_ok = blk.icmp_eq(I32, &object_type, OBJECT_TYPE_REGULAR); + + let cid_ptr = blk.gep(I8, &obj_ptr, &[(I64, "4")]); + let class_id = blk.load(I32, &cid_ptr); + let cid_ok = blk.icmp_eq(I32, &class_id, expected_class_id); + + let fc_ptr = blk.gep(I8, &obj_ptr, &[(I64, "12")]); + let field_count = blk.load(I32, &fc_ptr); + let fc_ok = blk.icmp_ugt(I32, &field_count, &field_index_str); + + let ka_ptr = blk.gep(I8, &obj_ptr, &[(I64, "16")]); + let keys_array = blk.load(I64, &ka_ptr); + let ka_ok = blk.icmp_eq(I64, &keys_array, expected_keys); + + // (The process-global enable flag was already checked at the gate above, + // before this dereference.) + let mut acc = blk.and(I1, >ype_ok, ¬_fwd); + acc = blk.and(I1, &acc, &ot_ok); + acc = blk.and(I1, &acc, &cid_ok); + acc = blk.and(I1, &acc, &fc_ok); + acc = blk.and(I1, &acc, &ka_ok); + + if require_raw_f64 { + // The slot is read/written as a raw double, so the per-object typed + // layout must be intact (no downgrade to a NaN-boxed value). + let intact = blk.and(I16, &reserved, TYPED_LAYOUT_INTACT_BIT); + let intact_ok = blk.icmp_ne(I16, &intact, "0"); + acc = blk.and(I1, &acc, &intact_ok); + } + + if let Some(value_bits) = set_value_bits { + // Frozen objects must route through the boxed setter (which is a + // no-op for frozen instances), never a raw store. + let frozen = blk.and(I16, &reserved, OBJ_FLAG_FROZEN_BIT); + let not_frozen = blk.icmp_eq(I16, &frozen, "0"); + acc = blk.and(I1, &acc, ¬_frozen); + + if require_raw_f64 { + // Only a plain finite number may be stored raw. Non-finite + // (exponent all-ones: ±Inf/NaN — rare) and every NaN-boxed tag + // share the all-ones exponent, so a single mask/compare both + // keeps the fast path correct and routes the boxed/downgrade + // cases to the guard call. + let exp = blk.and(I64, value_bits, F64_EXP_MASK); + let finite = blk.icmp_ne(I64, &exp, F64_EXP_MASK); + acc = blk.and(I1, &acc, &finite); + } + } + + blk.cond_br(&acc, fast_label, &guardcall_label); + } + + ctx.current_block = guardcall_idx; + guardcall_label +} diff --git a/crates/perry-codegen/src/expr/mod.rs b/crates/perry-codegen/src/expr/mod.rs index 7a024e577..d21125284 100644 --- a/crates/perry-codegen/src/expr/mod.rs +++ b/crates/perry-codegen/src/expr/mod.rs @@ -1413,6 +1413,7 @@ mod index_get; mod index_set; mod instance_misc1; pub(crate) use instance_misc1::builtin_parent_reserved_class_id; +mod class_field_inline_guard; mod js_runtime; mod literals_vars; mod logical_collections; diff --git a/crates/perry-codegen/src/expr/property_get.rs b/crates/perry-codegen/src/expr/property_get.rs index c82cd2f72..55f7b5ff5 100644 --- a/crates/perry-codegen/src/expr/property_get.rs +++ b/crates/perry-codegen/src/expr/property_get.rs @@ -1575,7 +1575,10 @@ pub(crate) fn lower(ctx: &mut FnCtx<'_>, expr: &Expr) -> Result { .as_ref() .is_some_and(crate::typed_shape::type_is_raw_f64_candidate); let requires_raw_f64_str = if requires_raw_f64 { "1" } else { "0" }; - let (obj_bits, obj_handle, key_raw, guard_ok) = { + // #5093: build the guard operands once, up front, so both + // the inline shape pre-check and the guard-call fallback + // can reference them. + let (obj_bits, obj_handle, key_raw, expected_keys) = { let blk = ctx.block(); let obj_bits = blk.bitcast_double_to_i64(&recv_box); let obj_handle = blk.and(I64, &obj_bits, POINTER_MASK_I64); @@ -1583,28 +1586,45 @@ pub(crate) fn lower(ctx: &mut FnCtx<'_>, expr: &Expr) -> Result { let key_bits = blk.bitcast_double_to_i64(&key_box); let key_raw = blk.and(I64, &key_bits, POINTER_MASK_I64); let expected_keys = blk.load(I64, &format!("@{}", keys_global_name)); - let guard_ok = blk.call( - I32, - "js_typed_feedback_class_field_get_guard", - &[ - (I64, &site_id), - (DOUBLE, &recv_box), - (I32, &expected_class_id_str), - (I64, &expected_keys), - (I64, &key_raw), - (I32, &field_idx_str), - (I32, requires_raw_f64_str), - ], - ); - (obj_bits, obj_handle, key_raw, guard_ok) + (obj_bits, obj_handle, key_raw, expected_keys) }; - let guard_pass = ctx.block().icmp_ne(I32, &guard_ok, "0"); let fast_idx = ctx.new_block("class_field_get.fast"); let fallback_idx = ctx.new_block("class_field_get.fallback"); let merge_idx = ctx.new_block("class_field_get.merge"); let fast_label = ctx.block_label(fast_idx); let fallback_label = ctx.block_label(fallback_idx); let merge_label = ctx.block_label(merge_idx); + + // #5093: inline shape pre-check. On a monomorphic hit it + // branches straight to the fast slot load, skipping the + // cross-crate guard call; on a miss it leaves the current + // block at the guard-call path below (unchanged). + let _guardcall_label = + crate::expr::class_field_inline_guard::emit_class_field_inline_precheck( + ctx, + &obj_bits, + &obj_handle, + &expected_class_id_str, + &expected_keys, + field_index, + requires_raw_f64, + None, + &fast_label, + ); + let guard_ok = ctx.block().call( + I32, + "js_typed_feedback_class_field_get_guard", + &[ + (I64, &site_id), + (DOUBLE, &recv_box), + (I32, &expected_class_id_str), + (I64, &expected_keys), + (I64, &key_raw), + (I32, &field_idx_str), + (I32, requires_raw_f64_str), + ], + ); + let guard_pass = ctx.block().icmp_ne(I32, &guard_ok, "0"); ctx.block() .cond_br(&guard_pass, &fast_label, &fallback_label); diff --git a/crates/perry-codegen/src/expr/property_set.rs b/crates/perry-codegen/src/expr/property_set.rs index b2dee78e3..ac9acf264 100644 --- a/crates/perry-codegen/src/expr/property_set.rs +++ b/crates/perry-codegen/src/expr/property_set.rs @@ -310,42 +310,67 @@ pub(crate) fn lower(ctx: &mut FnCtx<'_>, expr: &Expr) -> Result { .as_ref() .is_some_and(crate::typed_shape::type_is_raw_f64_candidate); let requires_raw_f64_str = if requires_raw_f64 { "1" } else { "0" }; - let (key_raw, guard_ok) = { + // #5093: build the guard operands once, up front, so both + // the inline shape pre-check and the guard-call fallback + // can reference them. + let (obj_bits, obj_handle, key_raw, expected_keys, val_bits) = { let blk = ctx.block(); + let obj_bits = blk.bitcast_double_to_i64(&recv_box); + let obj_handle = blk.and(I64, &obj_bits, POINTER_MASK_I64); let key_box = blk.load(DOUBLE, &key_handle_global); let key_bits = blk.bitcast_double_to_i64(&key_box); let key_raw = blk.and(I64, &key_bits, POINTER_MASK_I64); let expected_keys = blk.load(I64, &format!("@{}", keys_global_name)); - let guard_ok = blk.call( - I32, - "js_typed_feedback_class_field_set_guard", - &[ - (I64, &site_id), - (DOUBLE, &recv_box), - (I32, &expected_class_id_str), - (I64, &expected_keys), - (I64, &key_raw), - (I32, &field_idx_str), - (DOUBLE, &val_double), - (I32, requires_raw_f64_str), - ], - ); - (key_raw, guard_ok) + let val_bits = blk.bitcast_double_to_i64(&val_double); + (obj_bits, obj_handle, key_raw, expected_keys, val_bits) }; - let guard_pass = ctx.block().icmp_ne(I32, &guard_ok, "0"); let fast_idx = ctx.new_block("class_field_set.fast"); let fallback_idx = ctx.new_block("class_field_set.fallback"); let merge_idx = ctx.new_block("class_field_set.merge"); let fast_label = ctx.block_label(fast_idx); let fallback_label = ctx.block_label(fallback_idx); let merge_label = ctx.block_label(merge_idx); + + // #5093: inline shape pre-check, raw-f64 fields only. The + // boxed-store path keeps the guard call (its setter-in- + // chain handling and write barrier aren't reproduced + // inline). On a hit this branches straight to the raw + // store, skipping the call; on a miss the guard-call path + // below runs unchanged. + if requires_raw_f64 { + let _guardcall_label = + crate::expr::class_field_inline_guard::emit_class_field_inline_precheck( + ctx, + &obj_bits, + &obj_handle, + &expected_class_id_str, + &expected_keys, + field_index, + true, + Some(&val_bits), + &fast_label, + ); + } + let guard_ok = ctx.block().call( + I32, + "js_typed_feedback_class_field_set_guard", + &[ + (I64, &site_id), + (DOUBLE, &recv_box), + (I32, &expected_class_id_str), + (I64, &expected_keys), + (I64, &key_raw), + (I32, &field_idx_str), + (DOUBLE, &val_double), + (I32, requires_raw_f64_str), + ], + ); + let guard_pass = ctx.block().icmp_ne(I32, &guard_ok, "0"); ctx.block() .cond_br(&guard_pass, &fast_label, &fallback_label); ctx.current_block = fast_idx; let blk = ctx.block(); - let obj_bits = blk.bitcast_double_to_i64(&recv_box); - let obj_handle = blk.and(I64, &obj_bits, POINTER_MASK_I64); let obj_ptr = blk.inttoptr(I64, &obj_handle); let header_skip = "24".to_string(); let fields_base = blk.gep(I8, &obj_ptr, &[(I64, &header_skip)]); @@ -411,7 +436,6 @@ pub(crate) fn lower(ctx: &mut FnCtx<'_>, expr: &Expr) -> Result { ctx.current_block = fallback_idx; let blk = ctx.block(); - let obj_bits = blk.bitcast_double_to_i64(&recv_box); blk.call_void("js_typed_feedback_record_fallback_call", &[(I64, &site_id)]); blk.call_void( "js_object_set_field_by_name", diff --git a/crates/perry-codegen/src/runtime_decls/objects.rs b/crates/perry-codegen/src/runtime_decls/objects.rs index 0d7e32288..c4a68c8c8 100644 --- a/crates/perry-codegen/src/runtime_decls/objects.rs +++ b/crates/perry-codegen/src/runtime_decls/objects.rs @@ -23,6 +23,12 @@ use super::*; /// The inline bump allocator now handles most object allocation directly; /// `js_object_alloc(0, N)` is the fallback for dynamic cases. pub fn declare_phase_b_objects(module: &mut LlModule) { + // #5093: sticky runtime flag (i8, 0 = enabled) gating the codegen-inlined + // class-field shape-guard fast path. The inline guard loads this directly + // and falls back to the full `js_typed_feedback_class_field_*_guard` call + // when it is non-zero (descriptors / typed-feedback in use). Defined in + // perry-runtime as `PERRY_CLASS_FIELD_INLINE_GUARD_DISABLED`. + module.add_external_global("PERRY_CLASS_FIELD_INLINE_GUARD_DISABLED", I8); module.declare_function("js_object_alloc", I64, &[I32, I32]); // #3149: `Object(value)` plain-call coercion. Takes & returns a NaN-boxed // JSValue (DOUBLE): nullish/primitive -> fresh {}, object passes through. diff --git a/crates/perry-runtime/src/gc/layout.rs b/crates/perry-runtime/src/gc/layout.rs index 52d470b31..7279d6111 100644 --- a/crates/perry-runtime/src/gc/layout.rs +++ b/crates/perry-runtime/src/gc/layout.rs @@ -14,6 +14,48 @@ pub(super) const GC_LAYOUT_UNKNOWN: u16 = 0x0000; pub const GC_LAYOUT_POINTER_FREE: u16 = 0x4000; pub(crate) const GC_LAYOUT_SIDE_MASK: u16 = 0x8000; +// #5093: per-object "typed shape layout intact" flag, stored in a free bit of +// `GcHeader._reserved` (bit 12; bits 0..11 are object freeze/seal/proto/ +// descriptor flags + the copy survival age, bits 14..15 the layout state). Set +// whenever a `TypedLayoutDescriptor` is installed for the object — i.e. its +// canonical raw-f64 / pointer layout is known-valid — and cleared whenever that +// descriptor is removed. Every downgrade routes through `layout_set_typed_unknown` +// or the `layout_*` remove helpers below, all of which clear it, so the invariant +// intact bit set ⟹ TYPED_LAYOUTS holds this object's canonical descriptor +// holds at all times. The descriptor's raw-f64 mask is exactly the compile-time +// canonical mask codegen emits for the class, so combined with a class_id/ +// keys_array match the codegen-inlined class-field shape guard can conclude +// "slot K is raw-f64" from this single bit — no cross-crate guard call, no +// thread-local hashmap probe — for any field K the class declares as a raw-f64 +// candidate. The bit travels with `_reserved` across copying/evacuating GC (the +// collector copies the whole reserved word), and `layout_transfer` re-syncs it +// defensively after moving the descriptor. +pub const GC_OBJ_TYPED_LAYOUT_INTACT: u16 = 0x1000; + +#[inline] +pub(super) unsafe fn header_set_typed_layout_intact(header: *mut GcHeader) { + (*header)._reserved |= GC_OBJ_TYPED_LAYOUT_INTACT; +} + +#[inline] +pub(super) unsafe fn header_clear_typed_layout_intact(header: *mut GcHeader) { + (*header)._reserved &= !GC_OBJ_TYPED_LAYOUT_INTACT; +} + +// Clear the intact bit given only a user pointer (looks the header up). Used by +// the one remove path (`layout_clear_for_ptr`) that doesn't already hold a +// header. No-op for addresses too low to carry a Gc header. +#[inline] +pub(super) fn clear_typed_layout_intact_for_user(user_ptr: usize) { + if user_ptr < GC_HEADER_SIZE + 0x1000 { + return; + } + unsafe { + let header = header_from_user_ptr(user_ptr as *const u8); + (*header)._reserved &= !GC_OBJ_TYPED_LAYOUT_INTACT; + } +} + #[derive(Clone)] pub(super) enum LayoutSlotMask { Inline(u64), @@ -311,12 +353,14 @@ pub(crate) unsafe fn layout_init_pointer_free(user_ptr: *mut u8) { TYPED_LAYOUTS.with(|m| { m.borrow_mut().remove(&(user_ptr as usize)); }); + header_clear_typed_layout_intact(header); } pub(crate) unsafe fn layout_mark_unknown(user_ptr: *mut u8) { let Some(header) = layout_header_for_user(user_ptr as usize) else { return; }; + header_clear_typed_layout_intact(header); let state = (*header)._reserved & GC_LAYOUT_STATE_MASK; if state == GC_LAYOUT_UNKNOWN { TYPED_LAYOUTS.with(|m| { @@ -352,6 +396,7 @@ pub(crate) fn layout_clear_for_ptr(user_ptr: usize) { TYPED_LAYOUTS.with(|m| { m.borrow_mut().remove(&user_ptr); }); + clear_typed_layout_intact_for_user(user_ptr); } pub(crate) fn layout_has_typed_descriptor(user_ptr: usize) -> bool { @@ -363,6 +408,7 @@ pub(crate) fn layout_has_typed_descriptor(user_ptr: usize) -> bool { pub(super) unsafe fn layout_set_typed_unknown(header: *mut GcHeader, user_ptr: usize) { set_layout_state(header, GC_LAYOUT_UNKNOWN); + header_clear_typed_layout_intact(header); TYPED_LAYOUTS.with(|m| { m.borrow_mut().remove(&user_ptr); }); @@ -523,6 +569,7 @@ unsafe fn init_typed_shape_layout( TYPED_LAYOUTS.with(|m| { m.borrow_mut().insert(user_ptr, descriptor); }); + header_set_typed_layout_intact(header); if pointer_mask.is_empty() { set_layout_state(header, GC_LAYOUT_POINTER_FREE); LAYOUT_SLOT_MASKS.with(|m| { @@ -627,6 +674,7 @@ pub extern "C" fn js_gc_init_unboxed_object_layout( TYPED_LAYOUTS.with(|m| { m.borrow_mut().insert(user_ptr, descriptor); }); + header_set_typed_layout_intact(header); if pointer_mask.is_empty() { set_layout_state(header, GC_LAYOUT_POINTER_FREE); LAYOUT_SLOT_MASKS.with(|m| { @@ -653,6 +701,9 @@ pub(super) unsafe fn layout_rebuild_from_slots_with_policy( TYPED_LAYOUTS.with(|m| { m.borrow_mut().remove(&(user_ptr as usize)); }); + // The rebuild reconstructs only the pointer mask (no raw-f64 layout), so the + // object no longer has a canonical typed descriptor: drop the intact bit. + header_clear_typed_layout_intact(header); if slots.is_null() || slot_count == 0 { set_layout_state(header, GC_LAYOUT_POINTER_FREE); LAYOUT_SLOT_MASKS.with(|m| { @@ -718,13 +769,26 @@ pub(crate) unsafe fn layout_transfer(old_user: *mut u8, new_user: *mut u8) { } else { crate::array::clear_array_numeric_layout_ptr(new_user as usize); } - TYPED_LAYOUTS.with(|m| { + let new_has_typed = TYPED_LAYOUTS.with(|m| { let mut typed = m.borrow_mut(); typed.remove(&(new_user as usize)); if let Some(layout) = typed.remove(&(old_user as usize)) { typed.insert(new_user as usize, layout); + true + } else { + false } }); + // Keep the intact bit in lock-step with the moved descriptor. Copying GC + // normally propagates `_reserved` (so the bit already rode along), but + // re-sync defensively for callers that allocate the destination fresh + // (e.g. array growth) so a stale/missing bit can never desync from the map. + if new_has_typed { + header_set_typed_layout_intact(new_header); + } else { + header_clear_typed_layout_intact(new_header); + } + header_clear_typed_layout_intact(old_header); LAYOUT_SLOT_MASKS.with(|m| { let mut masks = m.borrow_mut(); masks.remove(&(new_user as usize)); @@ -767,6 +831,21 @@ pub(crate) fn layout_visit_pointer_slots_for_user( layout_visit_pointer_slots(user_ptr, slot_count, visit) } +/// #5093: read the per-object "typed shape layout intact" bit. This is the same +/// bit the codegen-inlined class-field shape guard tests; exposed for the +/// `PERRY_VERIFY_TYPED_INTACT=1` self-check in the typed-feedback fast contract, +/// which asserts the bit never claims a raw-f64 layout the side table disagrees +/// with. +pub(crate) fn layout_typed_intact_for_user(user_ptr: usize) -> bool { + if user_ptr < GC_HEADER_SIZE + 0x1000 { + return false; + } + unsafe { + let header = header_from_user_ptr(user_ptr as *const u8); + (*header)._reserved & GC_OBJ_TYPED_LAYOUT_INTACT != 0 + } +} + pub(crate) fn layout_typed_raw_f64_slot_for_user(user_ptr: usize, slot_index: usize) -> bool { TYPED_LAYOUTS.with(|m| { m.borrow() diff --git a/crates/perry-runtime/src/gc/mod.rs b/crates/perry-runtime/src/gc/mod.rs index 4ab5a78a5..800bce312 100644 --- a/crates/perry-runtime/src/gc/mod.rs +++ b/crates/perry-runtime/src/gc/mod.rs @@ -439,9 +439,37 @@ pub fn gc_init() { #[no_mangle] pub extern "C" fn js_gc_init() { crate::node_submodules::diagnostics_channel_init_main_thread(); + // #5093: force every class-field access back through the full guard call — + // i.e. disable the codegen-inlined fast path — when: + // - typed-feedback tracing is on (the guard observes every access), or + // - the intact-bit verifier is on (`PERRY_VERIFY_TYPED_INTACT`): the + // verifier lives in the guard's fast contract, so inline hits would skip + // it; disabling the inline path routes every access through it, or + // - the explicit escape hatch `PERRY_DISABLE_CLASS_FIELD_INLINE` is set to + // a truthy value (perf bisection / A-B measurement). `=0`/`=false`/`=off` + // leave the fast path enabled. + if crate::typed_feedback::typed_feedback_active() + || env_flag_enabled("PERRY_VERIFY_TYPED_INTACT") + || env_flag_enabled("PERRY_DISABLE_CLASS_FIELD_INLINE") + { + crate::object::disable_class_field_inline_guard(); + } gc_init(); } +/// #5093: parse a boolean-ish env var by value (not mere presence): true for +/// `1`/`true`/`on`/`yes` (case-insensitive), false for unset / `0`/`false`/`off` +/// / `no` / empty / anything else. +fn env_flag_enabled(name: &str) -> bool { + match std::env::var(name) { + Ok(v) => matches!( + v.trim().to_ascii_lowercase().as_str(), + "1" | "true" | "on" | "yes" + ), + Err(_) => false, + } +} + /// FFI: get GC stats #[no_mangle] pub extern "C" fn js_gc_stats( diff --git a/crates/perry-runtime/src/object/mod.rs b/crates/perry-runtime/src/object/mod.rs index 2ef0e0413..f82fadf01 100644 --- a/crates/perry-runtime/src/object/mod.rs +++ b/crates/perry-runtime/src/object/mod.rs @@ -12,7 +12,7 @@ use crate::JSValue; use std::cell::{Cell, RefCell, UnsafeCell}; use std::collections::HashMap; use std::ptr; -use std::sync::atomic::{AtomicBool, AtomicI64, AtomicU64, Ordering}; +use std::sync::atomic::{AtomicBool, AtomicI64, AtomicU64, AtomicU8, Ordering}; use std::sync::RwLock; // Submodules (issue #1103): behavior-preserving split of the former @@ -681,6 +681,30 @@ pub(crate) fn descriptors_in_use() -> bool { GLOBAL_DESCRIPTORS_IN_USE.load(Ordering::Relaxed) } +/// #5093: sticky process-global that disables the codegen-inlined class-field +/// shape-guard fast path. The emitted IR reads this byte directly (a single +/// relaxed load, hoistable out of hot loops) via the +/// `@PERRY_CLASS_FIELD_INLINE_GUARD_DISABLED` symbol and falls back to the full +/// `js_typed_feedback_class_field_{get,set}_guard` call whenever it is non-zero. +/// It flips to 1 the moment either (a) any accessor / property descriptor comes +/// into use — the guard then has to perform descriptor-aware dispatch the inline +/// path doesn't model — or (b) typed-feedback tracing is enabled, where the +/// guard records observations the inline path would silently skip. Both are +/// monotonic ("in use" never reverts), so the flag is set-only. +#[no_mangle] +pub static PERRY_CLASS_FIELD_INLINE_GUARD_DISABLED: AtomicU8 = AtomicU8::new(0); + +/// Disable the codegen-inlined class-field fast path process-wide (see +/// [`PERRY_CLASS_FIELD_INLINE_GUARD_DISABLED`]). Idempotent. +pub(crate) fn disable_class_field_inline_guard() { + PERRY_CLASS_FIELD_INLINE_GUARD_DISABLED.store(1, Ordering::Relaxed); +} + +/// True when the inline class-field fast path is still permitted. +pub(crate) fn class_field_inline_guard_enabled() -> bool { + PERRY_CLASS_FIELD_INLINE_GUARD_DISABLED.load(Ordering::Relaxed) == 0 +} + /// #5054: a descriptor (any kind) has been installed on the canonical /// `Object.prototype` — inherited setters / non-writable data props there /// must intercept writes of keys missing on the receiver, so the dynamic @@ -725,6 +749,7 @@ pub(crate) fn set_property_attrs(obj: usize, key: String, attrs: PropertyAttrs) note_descriptor_target(obj); PROPERTY_ATTRS_IN_USE.with(|c| c.set(true)); GLOBAL_DESCRIPTORS_IN_USE.store(true, Ordering::Relaxed); + disable_class_field_inline_guard(); PROPERTY_DESCRIPTORS.with(|m| { m.borrow_mut().insert((obj, key), attrs); }); @@ -830,6 +855,7 @@ pub(crate) fn set_accessor_descriptor(obj: usize, key: String, acc: AccessorDesc note_descriptor_target(obj); ACCESSORS_IN_USE.with(|c| c.set(true)); GLOBAL_DESCRIPTORS_IN_USE.store(true, Ordering::Relaxed); + disable_class_field_inline_guard(); ACCESSOR_DESCRIPTORS.with(|m| { m.borrow_mut().insert((obj, key), acc); }); diff --git a/crates/perry-runtime/src/typed_feedback.rs b/crates/perry-runtime/src/typed_feedback.rs index 88385a126..bba2164b4 100644 --- a/crates/perry-runtime/src/typed_feedback.rs +++ b/crates/perry-runtime/src/typed_feedback.rs @@ -43,6 +43,13 @@ fn typed_feedback_enabled() -> bool { } } +/// #5093: whether typed-feedback tracing is active. Read once at `js_gc_init` +/// to disable the codegen-inlined class-field fast path (which would skip the +/// observation recording the guard does in this mode). +pub(crate) fn typed_feedback_active() -> bool { + typed_feedback_enabled() +} + #[cfg(test)] pub(crate) static TYPED_FEEDBACK_TEST_LOCK: LazyLock> = LazyLock::new(|| Mutex::new(())); diff --git a/crates/perry-runtime/src/typed_feedback/guards.rs b/crates/perry-runtime/src/typed_feedback/guards.rs index f37ba8fc1..09fe2ad0b 100644 --- a/crates/perry-runtime/src/typed_feedback/guards.rs +++ b/crates/perry-runtime/src/typed_feedback/guards.rs @@ -302,10 +302,32 @@ fn class_field_fast_contract( return false; } let obj = object_addr as *const ObjectHeader; - (*obj).object_type == crate::error::OBJECT_TYPE_REGULAR + let shape_ok = (*obj).object_type == crate::error::OBJECT_TYPE_REGULAR && (*obj).class_id == expected_class_id && std::ptr::eq((*obj).keys_array as *const ArrayHeader, expected_keys) - && expected_field_index < (*obj).field_count + && expected_field_index < (*obj).field_count; + // #5093 self-check: the codegen-inlined fast path concludes "slot K is + // raw-f64" purely from the per-object intact bit (plus a class_id/keys + // match). Under PERRY_VERIFY_TYPED_INTACT=1, assert that whenever this + // contract sees a shape match for a raw-f64 candidate field with the + // intact bit set, the side table actually agrees the slot is raw-f64 — + // i.e. the inline path could never read a NaN-boxed value as a raw + // double. Any drift aborts loudly during the test sweep. + if require_raw_f64 && shape_ok && verify_typed_intact_enabled() { + let intact = crate::gc::layout_typed_intact_for_user(object_addr); + let raw = crate::gc::layout_typed_raw_f64_slot_for_user( + object_addr, + expected_field_index as usize, + ); + if intact && !raw { + eprintln!( + "PERRY_VERIFY_TYPED_INTACT: intact bit set on class {} but slot {} is not raw-f64 in the side table (inline fast path would corrupt)", + expected_class_id, expected_field_index + ); + std::process::abort(); + } + } + shape_ok && (!require_raw_f64 || crate::gc::layout_typed_raw_f64_slot_for_user( object_addr, @@ -314,6 +336,36 @@ fn class_field_fast_contract( } } +#[cfg(not(test))] +fn verify_typed_intact_enabled() -> bool { + use std::sync::atomic::{AtomicU8, Ordering}; + static STATE: AtomicU8 = AtomicU8::new(0); + match STATE.load(Ordering::Relaxed) { + 0 => { + // Parse by value so `=0`/`=false`/`=off` don't enable the verifier, + // matching `env_flag_enabled` in `gc/mod.rs` (which also disables the + // inline fast path when this is on, so the verifier sees every access). + let on = std::env::var("PERRY_VERIFY_TYPED_INTACT") + .map(|v| { + matches!( + v.trim().to_ascii_lowercase().as_str(), + "1" | "true" | "on" | "yes" + ) + }) + .unwrap_or(false); + STATE.store(if on { 2 } else { 1 }, Ordering::Relaxed); + on + } + 2 => true, + _ => false, + } +} + +#[cfg(test)] +fn verify_typed_intact_enabled() -> bool { + false +} + #[no_mangle] pub extern "C" fn js_typed_feedback_class_field_get_guard( site_id: u64, diff --git a/test-files/test_class_field_raw_f64_downgrade.ts b/test-files/test_class_field_raw_f64_downgrade.ts new file mode 100644 index 000000000..5711f14d9 --- /dev/null +++ b/test-files/test_class_field_raw_f64_downgrade.ts @@ -0,0 +1,51 @@ +// #5093: the codegen-inlined class-field shape guard reads a `number`-typed +// (raw-f64) field directly as a raw double when the per-object typed-layout +// intact bit is set. This test exercises the downgrade trap: writing a +// non-number into a `number`-typed slot through an `any` alias must clear that +// bit so a subsequent read returns the boxed value, never the pointer bits +// reinterpreted as a double. + +class Box { + v: number; + constructor(x: number) { + this.v = x; + } +} + +const b = new Box(42); + +// Warm the raw-f64 fast path (read + write) so any LICM-hoisted shape check is +// established before the downgrade. +let acc = 0; +for (let i = 0; i < 1000; i++) { + b.v = b.v + 1; + acc = acc + b.v; +} +console.log("after-loop-v:" + b.v); // 1042 +console.log("acc:" + acc); + +// Downgrade: store a non-number through an `any` alias. The boxed setter must +// run, the slot must stop being raw-f64, and the intact bit must clear. +const a: any = b; +a.v = "hello"; +console.log("downgraded-v:" + b.v); // hello (NOT a garbage number) +console.log("typeof:" + typeof b.v); // string + +// Re-promote: storing a number again must read back correctly either way. +b.v = 7; +console.log("repromoted-v:" + b.v); // 7 +console.log("repromoted-typeof:" + typeof b.v); // number + +// A fresh instance must still take the fast path correctly and independently. +const c = new Box(100); +console.log("fresh-v:" + c.v); // 100 +console.log("orig-after-fresh:" + b.v); // 7 + +// Store null / object through the alias too — both are non-numbers that must +// downgrade safely. +const d = new Box(5); +const da: any = d; +da.v = null; +console.log("null-v:" + d.v); // null +da.v = { tag: "obj" }; +console.log("obj-v:" + d.v.tag); // obj