From 37aa31d8f56bca7dcf9c43d0a3ecf059392df10d Mon Sep 17 00:00:00 2001 From: Vincenzo Petrucci Date: Sat, 16 May 2026 14:13:26 +0200 Subject: [PATCH 01/10] fix: model uninitialized typed property reads --- src/codegen/driver_support.rs | 22 +++- src/codegen/expr.rs | 2 +- src/codegen/expr/objects.rs | 3 +- src/codegen/expr/objects/access.rs | 74 +++++++++++++ src/codegen/expr/objects/allocation.rs | 21 ++++ src/codegen/expr/objects/static_properties.rs | 101 ++++++++++++++++++ src/codegen/mod.rs | 2 +- .../assignments/static_properties/arrays.rs | 1 + .../static_properties/arrays/indexed.rs | 1 + .../assignments/static_properties/assign.rs | 3 + .../static_properties/late_bound.rs | 10 ++ .../objects/property_access/nullsafe.rs | 29 ++++- tests/codegen/oop/modifiers_and_properties.rs | 66 ++++++++++++ 13 files changed, 326 insertions(+), 9 deletions(-) diff --git a/src/codegen/driver_support.rs b/src/codegen/driver_support.rs index 6d3e29bf..d87d4b79 100644 --- a/src/codegen/driver_support.rs +++ b/src/codegen/driver_support.rs @@ -21,6 +21,7 @@ use super::platform::{Arch, Target}; use super::runtime; const X86_64_HEAP_MAGIC_HI32: u64 = 0x454C5048; +pub(crate) const UNINITIALIZED_TYPED_PROPERTY_SENTINEL: i64 = 0x7fff_ffff_ffff_fffd; pub(super) fn emit_write_literal_stderr(emitter: &mut Emitter, label: &str, len: usize) { match emitter.target.arch { @@ -139,6 +140,7 @@ pub(super) fn emit_static_property_initializers( ctx: &mut Context, ) { let mut initializers = Vec::new(); + let mut uninitialized_static_properties = Vec::new(); let mut sorted_classes: Vec<(&String, &ClassInfo)> = ctx.classes.iter().collect(); sorted_classes.sort_by_key(|(class_name, _)| class_name.as_str()); for (class_name, class_info) in sorted_classes { @@ -151,7 +153,11 @@ pub(super) fn emit_static_property_initializers( if declaring_class != class_name { continue; } - let Some(default_expr) = class_info.static_defaults.get(index).cloned().flatten() else { + let default_expr = class_info.static_defaults.get(index).cloned().flatten(); + if default_expr.is_none() && class_info.declared_static_properties.contains(property_name) { + uninitialized_static_properties.push((class_name.clone(), property_name.clone())); + } + let Some(default_expr) = default_expr else { continue; }; let declared = class_info.declared_static_properties.contains(property_name); @@ -165,6 +171,17 @@ pub(super) fn emit_static_property_initializers( } } + for (class_name, property_name) in uninitialized_static_properties { + emitter.comment(&format!( + "mark static property {}::${} uninitialized", + class_name, property_name + )); + let marker_reg = abi::int_result_reg(emitter); + abi::emit_load_int_immediate(emitter, marker_reg, UNINITIALIZED_TYPED_PROPERTY_SENTINEL); + let symbol = crate::names::static_property_symbol(&class_name, &property_name); + abi::emit_store_reg_to_symbol(emitter, marker_reg, &symbol, 8); + } + for (class_name, property_name, prop_ty, default_expr, declared) in initializers { emitter.comment(&format!( "initialize static property {}::${}", @@ -179,6 +196,9 @@ pub(super) fn emit_static_property_initializers( }; let symbol = crate::names::static_property_symbol(&class_name, &property_name); abi::emit_store_result_to_symbol(emitter, &symbol, &store_ty, false); + if !matches!(store_ty.codegen_repr(), PhpType::Str) { + abi::emit_store_zero_to_symbol(emitter, &symbol, 8); + } } } diff --git a/src/codegen/expr.rs b/src/codegen/expr.rs index 4edf2781..11839e3d 100644 --- a/src/codegen/expr.rs +++ b/src/codegen/expr.rs @@ -218,7 +218,7 @@ pub fn emit_expr( objects::emit_nullsafe_property_access(object, property, emitter, ctx, data) } ExprKind::StaticPropertyAccess { receiver, property } => { - objects::emit_static_property_access(receiver, property, emitter, ctx) + objects::emit_static_property_access(receiver, property, emitter, ctx, data) } ExprKind::MethodCall { object, diff --git a/src/codegen/expr/objects.rs b/src/codegen/expr/objects.rs index d0e53dab..a9453958 100644 --- a/src/codegen/expr/objects.rs +++ b/src/codegen/expr/objects.rs @@ -397,8 +397,9 @@ pub(super) fn emit_static_property_access( property: &str, emitter: &mut Emitter, ctx: &mut Context, + data: &mut DataSection, ) -> PhpType { - static_properties::emit_static_property_access(receiver, property, emitter, ctx) + static_properties::emit_static_property_access(receiver, property, emitter, ctx, data) } pub(super) fn emit_enum_case( diff --git a/src/codegen/expr/objects/access.rs b/src/codegen/expr/objects/access.rs index 785c006e..d325f8a4 100644 --- a/src/codegen/expr/objects/access.rs +++ b/src/codegen/expr/objects/access.rs @@ -14,6 +14,7 @@ use crate::codegen::data_section::DataSection; use crate::codegen::emit::Emitter; use crate::codegen::functions; use crate::codegen::platform::Arch; +use crate::codegen::UNINITIALIZED_TYPED_PROPERTY_SENTINEL; use crate::parser::ast::Expr; use crate::types::PhpType; @@ -331,8 +332,11 @@ pub(super) fn emit_loaded_object_property_access( property, prop_ty, offset, + class_info.declared_properties.contains(property), false, class_info.reference_properties.contains(property), + ctx, + data, emitter, ) } @@ -342,8 +346,11 @@ fn emit_loaded_object_property_value( property: &str, prop_ty: PhpType, offset: usize, + is_declared: bool, needs_deref: bool, is_reference: bool, + ctx: &mut Context, + data: &mut DataSection, emitter: &mut Emitter, ) -> PhpType { if needs_deref { @@ -358,6 +365,12 @@ fn emit_loaded_object_property_value( let object_reg = abi::int_result_reg(emitter); + if is_declared { + emit_uninitialized_typed_property_guard( + class_name, property, offset, object_reg, emitter, ctx, data, + ); + } + if is_reference { let pointer_reg = abi::symbol_scratch_reg(emitter); abi::emit_load_from_address(emitter, pointer_reg, object_reg, offset); @@ -419,3 +432,64 @@ fn emit_loaded_object_property_value( prop_ty } + +fn emit_uninitialized_typed_property_guard( + class_name: &str, + property: &str, + offset: usize, + object_reg: &str, + emitter: &mut Emitter, + ctx: &mut Context, + data: &mut DataSection, +) { + let initialized_label = ctx.next_label("typed_prop_initialized"); + let marker_reg = abi::secondary_scratch_reg(emitter); + let sentinel_reg = abi::tertiary_scratch_reg(emitter); + abi::emit_load_from_address(emitter, marker_reg, object_reg, offset + 8); + abi::emit_load_int_immediate(emitter, sentinel_reg, UNINITIALIZED_TYPED_PROPERTY_SENTINEL); + match emitter.target.arch { + Arch::AArch64 => { + emitter.instruction(&format!("cmp {}, {}", marker_reg, sentinel_reg)); // check whether the typed property still carries the uninitialized marker + emitter.instruction(&format!("b.ne {}", initialized_label)); // continue the property read once the slot has been initialized + } + Arch::X86_64 => { + emitter.instruction(&format!("cmp {}, {}", marker_reg, sentinel_reg)); // check whether the typed property still carries the uninitialized marker + emitter.instruction(&format!("jne {}", initialized_label)); // continue the property read once the slot has been initialized + } + } + emit_uninitialized_typed_property_fatal(class_name, property, emitter, data); + emitter.label(&initialized_label); +} + +fn emit_uninitialized_typed_property_fatal( + class_name: &str, + property: &str, + emitter: &mut Emitter, + data: &mut DataSection, +) { + let message = format!( + "Fatal error: Typed property {}::${} must not be accessed before initialization\n", + class_name, property + ); + let (label, len) = data.add_string(message.as_bytes()); + match emitter.target.arch { + Arch::AArch64 => { + emitter.instruction("mov x0, #2"); // fd = stderr for the typed-property initialization fatal + abi::emit_symbol_address(emitter, "x1", &label); // point write() at the typed-property initialization diagnostic + emitter.instruction(&format!("mov x2, #{}", len)); // pass the diagnostic byte length to write() + emitter.syscall(4); + emitter.instruction("mov x0, #1"); // exit status 1 indicates abnormal termination + emitter.syscall(1); + } + Arch::X86_64 => { + abi::emit_symbol_address(emitter, "rsi", &label); // point write() at the typed-property initialization diagnostic + emitter.instruction(&format!("mov edx, {}", len)); // pass the diagnostic byte length to write() + emitter.instruction("mov edi, 2"); // fd = stderr for the typed-property initialization fatal + emitter.instruction("mov eax, 1"); // Linux x86_64 syscall 1 = write + emitter.instruction("syscall"); // emit the fatal diagnostic before terminating + emitter.instruction("mov edi, 1"); // exit status 1 indicates abnormal termination + emitter.instruction("mov eax, 60"); // Linux x86_64 syscall 60 = exit + emitter.instruction("syscall"); // terminate after the typed-property initialization fatal + } + } +} diff --git a/src/codegen/expr/objects/allocation.rs b/src/codegen/expr/objects/allocation.rs index 2d06ac87..d8d71f32 100644 --- a/src/codegen/expr/objects/allocation.rs +++ b/src/codegen/expr/objects/allocation.rs @@ -14,6 +14,7 @@ use crate::codegen::data_section::DataSection; use crate::codegen::emit::Emitter; use crate::codegen::expr::calls::args as call_args; use crate::codegen::platform::Arch; +use crate::codegen::UNINITIALIZED_TYPED_PROPERTY_SENTINEL; use crate::names::method_symbol; use crate::parser::ast::{Expr, ExprKind, TypeExpr}; use crate::types::PhpType; @@ -107,6 +108,9 @@ pub(super) fn emit_new_object_core( // -- zero-initialize all property slots -- for i in 0..num_props { let offset = 8 + i * 16; + let property_name = &class_info.properties[i].0; + let starts_uninitialized = class_info.declared_properties.contains(property_name) + && class_info.defaults.get(i).is_some_and(|default| default.is_none()); match emitter.target.arch { Arch::AArch64 => { emitter.instruction("ldr x9, [sp]"); // peek object pointer @@ -119,6 +123,23 @@ pub(super) fn emit_new_object_core( emitter.instruction(&format!("mov QWORD PTR [r11 + {}], 0", offset + 8)); // zero-initialize the high word / runtime metadata slot } } + if starts_uninitialized { + let marker_reg = abi::temp_int_reg(emitter.target); + abi::emit_load_int_immediate(emitter, marker_reg, UNINITIALIZED_TYPED_PROPERTY_SENTINEL); + match emitter.target.arch { + Arch::AArch64 => { + emitter.instruction("ldr x9, [sp]"); // peek object pointer before marking this typed property uninitialized + } + Arch::X86_64 => { + emitter.instruction("mov r11, QWORD PTR [rsp]"); // peek object pointer before marking this typed property uninitialized + } + } + let object_reg = match emitter.target.arch { + Arch::AArch64 => "x9", + Arch::X86_64 => "r11", + }; + abi::emit_store_to_address(emitter, marker_reg, object_reg, offset + 8); + } } // -- allocate the dyn_props hashtable if the class declares diff --git a/src/codegen/expr/objects/static_properties.rs b/src/codegen/expr/objects/static_properties.rs index b8742be1..c9862ba1 100644 --- a/src/codegen/expr/objects/static_properties.rs +++ b/src/codegen/expr/objects/static_properties.rs @@ -10,8 +10,10 @@ use crate::codegen::abi; use crate::codegen::context::Context; +use crate::codegen::data_section::DataSection; use crate::codegen::emit::Emitter; use crate::codegen::platform::Arch; +use crate::codegen::UNINITIALIZED_TYPED_PROPERTY_SENTINEL; use crate::names::static_property_symbol; use crate::parser::ast::{StaticReceiver, Visibility}; use crate::types::PhpType; @@ -32,6 +34,7 @@ pub(super) fn emit_static_property_access( property: &str, emitter: &mut Emitter, ctx: &mut Context, + data: &mut DataSection, ) -> PhpType { let Some((class_name, declaring_class, prop_ty)) = resolve_static_property(receiver, property, ctx, emitter) @@ -43,6 +46,9 @@ pub(super) fn emit_static_property_access( let branches = dynamic_static_property_branches(receiver, property, &declaring_class, ctx); if branches.is_empty() { let symbol = static_property_symbol(&declaring_class, property); + if static_property_has_declared_type(&declaring_class, property, ctx) { + emit_uninitialized_static_property_guard(&declaring_class, property, &symbol, emitter, ctx, data); + } abi::emit_load_symbol_to_result(emitter, &symbol, &prop_ty); } else if emit_called_class_id_into(emitter, ctx, class_id_work_reg(emitter)) { emit_dynamic_load_static_property_result( @@ -53,10 +59,14 @@ pub(super) fn emit_static_property_access( &prop_ty, emitter, ctx, + data, ); } else { emitter.comment("WARNING: missing forwarded called class id"); let symbol = static_property_symbol(&declaring_class, property); + if static_property_has_declared_type(&declaring_class, property, ctx) { + emit_uninitialized_static_property_guard(&declaring_class, property, &symbol, emitter, ctx, data); + } abi::emit_load_symbol_to_result(emitter, &symbol, &prop_ty); } prop_ty @@ -70,6 +80,7 @@ fn emit_dynamic_load_static_property_result( prop_ty: &PhpType, emitter: &mut Emitter, ctx: &mut Context, + data: &mut DataSection, ) { let done = ctx.next_label("static_prop_load_done"); let mut labels = Vec::new(); @@ -79,6 +90,16 @@ fn emit_dynamic_load_static_property_result( labels.push((label, branch)); } let fallback_symbol = static_property_symbol(fallback_declaring_class, property); + if static_property_has_declared_type(fallback_declaring_class, property, ctx) { + emit_uninitialized_static_property_guard( + fallback_declaring_class, + property, + &fallback_symbol, + emitter, + ctx, + data, + ); + } abi::emit_load_symbol_to_result(emitter, &fallback_symbol, prop_ty); emit_jump(emitter, &done); for (label, branch) in labels { @@ -88,6 +109,16 @@ fn emit_dynamic_load_static_property_result( continue; } let symbol = static_property_symbol(&branch.declaring_class, property); + if static_property_has_declared_type(&branch.declaring_class, property, ctx) { + emit_uninitialized_static_property_guard( + &branch.declaring_class, + property, + &symbol, + emitter, + ctx, + data, + ); + } abi::emit_load_symbol_to_result(emitter, &symbol, prop_ty); emit_jump(emitter, &done); } @@ -297,3 +328,73 @@ fn emit_private_static_property_access_fatal(emitter: &mut Emitter) { } } } + +fn static_property_has_declared_type( + declaring_class: &str, + property: &str, + ctx: &Context, +) -> bool { + ctx.classes + .get(declaring_class) + .is_some_and(|class_info| class_info.declared_static_properties.contains(property)) +} + +fn emit_uninitialized_static_property_guard( + class_name: &str, + property: &str, + symbol: &str, + emitter: &mut Emitter, + ctx: &mut Context, + data: &mut DataSection, +) { + let initialized_label = ctx.next_label("static_prop_initialized"); + let marker_reg = abi::secondary_scratch_reg(emitter); + let sentinel_reg = abi::tertiary_scratch_reg(emitter); + abi::emit_load_symbol_to_reg(emitter, marker_reg, symbol, 8); + abi::emit_load_int_immediate(emitter, sentinel_reg, UNINITIALIZED_TYPED_PROPERTY_SENTINEL); + match emitter.target.arch { + Arch::AArch64 => { + emitter.instruction(&format!("cmp {}, {}", marker_reg, sentinel_reg)); // check whether the static typed property is still uninitialized + emitter.instruction(&format!("b.ne {}", initialized_label)); // continue the static property read once initialized + } + Arch::X86_64 => { + emitter.instruction(&format!("cmp {}, {}", marker_reg, sentinel_reg)); // check whether the static typed property is still uninitialized + emitter.instruction(&format!("jne {}", initialized_label)); // continue the static property read once initialized + } + } + emit_uninitialized_static_property_fatal(class_name, property, emitter, data); + emitter.label(&initialized_label); +} + +fn emit_uninitialized_static_property_fatal( + class_name: &str, + property: &str, + emitter: &mut Emitter, + data: &mut DataSection, +) { + let message = format!( + "Fatal error: Typed static property {}::${} must not be accessed before initialization\n", + class_name, property + ); + let (label, len) = data.add_string(message.as_bytes()); + match emitter.target.arch { + Arch::AArch64 => { + emitter.instruction("mov x0, #2"); // fd = stderr for the static typed-property initialization fatal + abi::emit_symbol_address(emitter, "x1", &label); // point write() at the static typed-property diagnostic + emitter.instruction(&format!("mov x2, #{}", len)); // pass the diagnostic byte length to write() + emitter.syscall(4); + emitter.instruction("mov x0, #1"); // exit status 1 indicates abnormal termination + emitter.syscall(1); + } + Arch::X86_64 => { + abi::emit_symbol_address(emitter, "rsi", &label); // point write() at the static typed-property diagnostic + emitter.instruction(&format!("mov edx, {}", len)); // pass the diagnostic byte length to write() + emitter.instruction("mov edi, 2"); // fd = stderr for the static typed-property initialization fatal + emitter.instruction("mov eax, 1"); // Linux x86_64 syscall 1 = write + emitter.instruction("syscall"); // emit the fatal diagnostic before terminating + emitter.instruction("mov edi, 1"); // exit status 1 indicates abnormal termination + emitter.instruction("mov eax, 60"); // Linux x86_64 syscall 60 = exit + emitter.instruction("syscall"); // terminate after the static typed-property initialization fatal + } + } +} diff --git a/src/codegen/mod.rs b/src/codegen/mod.rs index accb493d..b4f67ce5 100644 --- a/src/codegen/mod.rs +++ b/src/codegen/mod.rs @@ -85,7 +85,7 @@ pub(crate) use driver_support::{ emit_box_current_expr_value_as_mixed_for_container, emit_box_current_value_as_mixed, emit_box_iterable_value_for_mixed_container, emit_box_runtime_payload_as_mixed, emit_normalized_hash_key, emit_release_pushed_refcounted_temp_after_array_push, - runtime_value_tag, + runtime_value_tag, UNINITIALIZED_TYPED_PROPERTY_SENTINEL, }; pub use driver_support::generate_runtime; use platform::Target; diff --git a/src/codegen/stmt/assignments/static_properties/arrays.rs b/src/codegen/stmt/assignments/static_properties/arrays.rs index 1b464a4f..a7851428 100644 --- a/src/codegen/stmt/assignments/static_properties/arrays.rs +++ b/src/codegen/stmt/assignments/static_properties/arrays.rs @@ -106,6 +106,7 @@ pub(crate) fn emit_static_property_array_push_stmt( } else { let symbol = static_property_symbol(&declaring_class, property); abi::emit_store_reg_to_symbol(emitter, abi::int_result_reg(emitter), &symbol, 0); + abi::emit_store_zero_to_symbol(emitter, &symbol, 8); } } diff --git a/src/codegen/stmt/assignments/static_properties/arrays/indexed.rs b/src/codegen/stmt/assignments/static_properties/arrays/indexed.rs index d97d8f93..2e0e9df5 100644 --- a/src/codegen/stmt/assignments/static_properties/arrays/indexed.rs +++ b/src/codegen/stmt/assignments/static_properties/arrays/indexed.rs @@ -147,6 +147,7 @@ fn publish_static_array_pointer( } else { let symbol = static_property_symbol(declaring_class, property); abi::emit_store_reg_to_symbol(emitter, source_reg, &symbol, 0); + abi::emit_store_zero_to_symbol(emitter, &symbol, 8); } } diff --git a/src/codegen/stmt/assignments/static_properties/assign.rs b/src/codegen/stmt/assignments/static_properties/assign.rs index bcdbb09d..cff38ba0 100644 --- a/src/codegen/stmt/assignments/static_properties/assign.rs +++ b/src/codegen/stmt/assignments/static_properties/assign.rs @@ -95,5 +95,8 @@ pub(crate) fn emit_static_property_assign_stmt( } else { let symbol = static_property_symbol(&declaring_class, property); abi::emit_store_result_to_symbol(emitter, &symbol, &val_ty, true); + if !matches!(val_ty.codegen_repr(), PhpType::Str) { + abi::emit_store_zero_to_symbol(emitter, &symbol, 8); + } } } diff --git a/src/codegen/stmt/assignments/static_properties/late_bound.rs b/src/codegen/stmt/assignments/static_properties/late_bound.rs index defa5bea..afd8c531 100644 --- a/src/codegen/stmt/assignments/static_properties/late_bound.rs +++ b/src/codegen/stmt/assignments/static_properties/late_bound.rs @@ -113,6 +113,7 @@ pub(super) fn emit_dynamic_store_result_to_static_property( } let fallback_symbol = static_property_symbol(fallback_declaring_class, property); abi::emit_store_result_to_symbol(emitter, &fallback_symbol, ty, release_previous); + clear_uninitialized_marker_after_static_store(emitter, &fallback_symbol, ty); emit_jump(emitter, &done); for (label, branch) in labels { emitter.label(&label); @@ -122,6 +123,7 @@ pub(super) fn emit_dynamic_store_result_to_static_property( } let symbol = static_property_symbol(&branch.declaring_class, property); abi::emit_store_result_to_symbol(emitter, &symbol, ty, release_previous); + clear_uninitialized_marker_after_static_store(emitter, &symbol, ty); emit_jump(emitter, &done); } emitter.label(&done); @@ -145,6 +147,7 @@ pub(super) fn emit_dynamic_store_reg_to_static_property( } let fallback_symbol = static_property_symbol(fallback_declaring_class, property); abi::emit_store_reg_to_symbol(emitter, source_reg, &fallback_symbol, 0); + abi::emit_store_zero_to_symbol(emitter, &fallback_symbol, 8); emit_jump(emitter, &done); for (label, branch) in labels { emitter.label(&label); @@ -154,11 +157,18 @@ pub(super) fn emit_dynamic_store_reg_to_static_property( } let symbol = static_property_symbol(&branch.declaring_class, property); abi::emit_store_reg_to_symbol(emitter, source_reg, &symbol, 0); + abi::emit_store_zero_to_symbol(emitter, &symbol, 8); emit_jump(emitter, &done); } emitter.label(&done); } +fn clear_uninitialized_marker_after_static_store(emitter: &mut Emitter, symbol: &str, ty: &PhpType) { + if !matches!(ty.codegen_repr(), PhpType::Str) { + abi::emit_store_zero_to_symbol(emitter, symbol, 8); + } +} + fn emit_branch_if_class_id_matches( emitter: &mut Emitter, class_id_reg: &str, diff --git a/tests/codegen/objects/property_access/nullsafe.rs b/tests/codegen/objects/property_access/nullsafe.rs index 191dc640..3518e58f 100644 --- a/tests/codegen/objects/property_access/nullsafe.rs +++ b/tests/codegen/objects/property_access/nullsafe.rs @@ -35,7 +35,7 @@ class Profile { public string $name = "Ada"; } class User { - public ?Profile $profile; + public ?Profile $profile = null; } $with = new User(); $with->profile = new Profile(); @@ -48,6 +48,26 @@ echo $without->profile?->name ?? "none"; assert_eq!(out, "Ada|none"); } +#[test] +fn test_nullsafe_property_access_does_not_suppress_uninitialized_typed_property() { + let err = compile_and_run_expect_failure( + r#"profile?->name ?? "none"; +"#, + ); + assert!( + err.contains("Fatal error: Typed property User::$profile must not be accessed before initialization"), + "{err}" + ); +} + #[test] fn test_nullsafe_method_call_skips_arguments_when_receiver_is_null() { let out = compile_and_run( @@ -124,10 +144,10 @@ class Address { public string $city = "Rome"; } class Profile { - public ?Address $address; + public ?Address $address = null; } class User { - public ?Profile $profile; + public ?Profile $profile = null; } $with = new User(); $profile = new Profile(); @@ -150,7 +170,7 @@ class Profile { public string $name = "Ada"; } class User { - public ?Profile $profile; + public ?Profile $profile = null; public function profile(): ?Profile { return $this->profile; } @@ -388,4 +408,3 @@ read(new Root()); assert_eq!(out.stdout, "noisy|21"); assert_eq!(out.stderr, ""); } - diff --git a/tests/codegen/oop/modifiers_and_properties.rs b/tests/codegen/oop/modifiers_and_properties.rs index dc220175..8ce9a03d 100644 --- a/tests/codegen/oop/modifiers_and_properties.rs +++ b/tests/codegen/oop/modifiers_and_properties.rs @@ -128,6 +128,72 @@ echo $user->email; assert_eq!(out, "Ada:42:1:ada@example.test"); } +#[test] +fn test_uninitialized_typed_instance_property_is_fatal() { + let err = compile_and_run_expect_failure( + r#"value; +"#, + ); + assert!( + err.contains("Fatal error: Typed property Box::$value must not be accessed before initialization"), + "{err}" + ); +} + +#[test] +fn test_typed_instance_property_initialized_to_zero_reads_normally() { + let out = compile_and_run( + r#"value = 0; +echo $box->value; +"#, + ); + assert_eq!(out, "0"); +} + +#[test] +fn test_uninitialized_typed_static_property_is_fatal() { + let err = compile_and_run_expect_failure( + r#" Date: Sat, 16 May 2026 14:13:36 +0200 Subject: [PATCH 02/10] fix: fold PHP overflow and loose comparisons --- src/optimize/fold/ops.rs | 25 ++++++++++-- src/optimize/fold/scalar.rs | 81 +++++++++++++++++++++++++++++++++++-- tests/codegen/operators.rs | 37 ++++++++++++++++- 3 files changed, 136 insertions(+), 7 deletions(-) diff --git a/src/optimize/fold/ops.rs b/src/optimize/fold/ops.rs index 8ccb7e21..51a45d91 100644 --- a/src/optimize/fold/ops.rs +++ b/src/optimize/fold/ops.rs @@ -93,9 +93,18 @@ fn try_fold_numeric_binop(op: &BinOp, left: &Expr, right: &Expr) -> Option Option { match op { - BinOp::Add => left.checked_add(right).map(ExprKind::IntLiteral), - BinOp::Sub => left.checked_sub(right).map(ExprKind::IntLiteral), - BinOp::Mul => left.checked_mul(right).map(ExprKind::IntLiteral), + BinOp::Add => left + .checked_add(right) + .map(ExprKind::IntLiteral) + .or_else(|| fold_int_overflow_to_float(op, left, right)), + BinOp::Sub => left + .checked_sub(right) + .map(ExprKind::IntLiteral) + .or_else(|| fold_int_overflow_to_float(op, left, right)), + BinOp::Mul => left + .checked_mul(right) + .map(ExprKind::IntLiteral) + .or_else(|| fold_int_overflow_to_float(op, left, right)), BinOp::Div => { if right == 0 { None @@ -115,6 +124,16 @@ fn try_fold_int_numeric_binop(op: &BinOp, left: i64, right: i64) -> Option Option { + let result = match op { + BinOp::Add => left as f64 + right as f64, + BinOp::Sub => left as f64 - right as f64, + BinOp::Mul => left as f64 * right as f64, + _ => return None, + }; + result.is_finite().then_some(ExprKind::FloatLiteral(result)) +} + fn try_fold_int_mod(left: &Expr, right: &Expr) -> Option { let (left, right) = (int_literal(left)?, int_literal(right)?); if right == 0 { diff --git a/src/optimize/fold/scalar.rs b/src/optimize/fold/scalar.rs index f3ebb70e..021944f4 100644 --- a/src/optimize/fold/scalar.rs +++ b/src/optimize/fold/scalar.rs @@ -76,15 +76,90 @@ pub(in crate::optimize) fn loose_eq(left: &Expr, right: &Expr) -> Option { let left = scalar_value(left)?; let right = scalar_value(right)?; match (&left, &right) { + (ScalarValue::Bool(left), right) => Some(*left == right.truthy()), + (left, ScalarValue::Bool(right)) => Some(left.truthy() == *right), (ScalarValue::Null, ScalarValue::Null) => Some(true), - (ScalarValue::Bool(left), ScalarValue::Bool(right)) => Some(left == right), - (ScalarValue::String(left), ScalarValue::String(right)) => Some(left == right), + (ScalarValue::Null, ScalarValue::String(right)) => Some(right.is_empty()), + (ScalarValue::String(left), ScalarValue::Null) => Some(left.is_empty()), + (ScalarValue::Null, ScalarValue::Int(right)) => Some(*right == 0), + (ScalarValue::Int(left), ScalarValue::Null) => Some(*left == 0), + (ScalarValue::Null, ScalarValue::Float(right)) => Some(*right == 0.0), + (ScalarValue::Float(left), ScalarValue::Null) => Some(*left == 0.0), + (ScalarValue::String(left), ScalarValue::String(right)) => { + match (php_numeric_string(left), php_numeric_string(right)) { + (Some(left), Some(right)) => Some(left == right), + _ => Some(left == right), + } + } (ScalarValue::Int(left), ScalarValue::Int(right)) => Some(left == right), (ScalarValue::Float(left), ScalarValue::Float(right)) => Some(left == right), (ScalarValue::Int(left), ScalarValue::Float(right)) => Some(*left as f64 == *right), (ScalarValue::Float(left), ScalarValue::Int(right)) => Some(*left == *right as f64), - _ => None, + (ScalarValue::Int(left), ScalarValue::String(right)) => { + php_numeric_string(right).map(|right| *left as f64 == right).or(Some(false)) + } + (ScalarValue::String(left), ScalarValue::Int(right)) => { + php_numeric_string(left).map(|left| left == *right as f64).or(Some(false)) + } + (ScalarValue::Float(left), ScalarValue::String(right)) => { + php_numeric_string(right).map(|right| *left == right).or(Some(false)) + } + (ScalarValue::String(left), ScalarValue::Float(right)) => { + php_numeric_string(left).map(|left| left == *right).or(Some(false)) + } + } +} + +fn php_numeric_string(value: &str) -> Option { + let trimmed = value.trim_matches(|c: char| c.is_ascii_whitespace()); + if trimmed.is_empty() { + return None; + } + + let bytes = trimmed.as_bytes(); + let mut idx = 0; + if matches!(bytes[idx], b'+' | b'-') { + idx += 1; + if idx == bytes.len() { + return None; + } + } + + let mut digits = 0; + while idx < bytes.len() && bytes[idx].is_ascii_digit() { + idx += 1; + digits += 1; + } + + if idx < bytes.len() && bytes[idx] == b'.' { + idx += 1; + while idx < bytes.len() && bytes[idx].is_ascii_digit() { + idx += 1; + digits += 1; + } + } + if digits == 0 { + return None; + } + + if idx < bytes.len() && matches!(bytes[idx], b'e' | b'E') { + idx += 1; + if idx < bytes.len() && matches!(bytes[idx], b'+' | b'-') { + idx += 1; + } + let exp_start = idx; + while idx < bytes.len() && bytes[idx].is_ascii_digit() { + idx += 1; + } + if idx == exp_start { + return None; + } + } + + if idx != bytes.len() { + return None; } + trimmed.parse::().ok().filter(|value| value.is_finite()) } pub(in crate::optimize) fn compare_numeric( diff --git a/tests/codegen/operators.rs b/tests/codegen/operators.rs index d07c771f..f291be23 100644 --- a/tests/codegen/operators.rs +++ b/tests/codegen/operators.rs @@ -77,6 +77,18 @@ fn test_nested_arithmetic() { assert_eq!(out, "10"); } +#[test] +fn test_constant_int_add_overflow_promotes_to_float() { + let out = compile_and_run("= 2;"); assert_eq!(out, ""); } - From 3b7b12de4dd84bdc7471967e3d99edecdd6d0fed Mon Sep 17 00:00:00 2001 From: Vincenzo Petrucci Date: Sat, 16 May 2026 14:13:44 +0200 Subject: [PATCH 03/10] docs: update runtime compatibility notes --- ROADMAP.md | 2 +- docs/php/classes.md | 4 ++-- docs/php/types.md | 5 ++--- 3 files changed, 5 insertions(+), 6 deletions(-) diff --git a/ROADMAP.md b/ROADMAP.md index 1f92d50c..308c7ac1 100644 --- a/ROADMAP.md +++ b/ROADMAP.md @@ -441,7 +441,7 @@ runtime helpers, and standard-library surfaces. - [x] PHP attributes runtime introspection — implement `ReflectionClass::getAttributes()`, `ReflectionMethod::getAttributes()`, `ReflectionProperty::getAttributes()`, plus `ReflectionAttribute::newInstance()`. Class/member declarations expose attribute names and supported literal args through helper builtins and Reflection objects; `ReflectionAttribute::newInstance()` constructs the attribute class on demand from the captured literal args. - [x] Mixed indexed/associative array union — model `array + array` across indexed/hash representations while preserving PHP's shared int/string key space and left-key precedence - [X] Callable parity follow-up — support captured method/static first-class callables in the remaining callback runtimes (`array_reduce()`, `array_walk()`, `usort()`, `uksort()`, `uasort()`), direct callable expression calls such as `($obj->method(...))()`, non-local method receivers such as `(new Foo())->method(...)`, nullsafe first-class callables, broader builtin first-class callable wrappers, and the remaining `call_user_func_array()` by-reference callback gaps -- [ ] Runtime-value compatibility polishing v2 — continue with PHP's uninitialized typed-property state, integer overflow promotion, broader loose-comparison semantics, and future warning/notice sites as they are added +- [ ] Runtime-value compatibility polishing v2 — uninitialized typed instance/static property reads now fail with PHP-style fatal diagnostics; constant-folded integer overflow promotes to float; constant-folded scalar loose comparisons use broader PHP bool/null/numeric-string rules. Continue with non-folded runtime integer overflow promotion, runtime loose-comparison helper semantics, and future warning/notice sites as they are added. - [ ] Broader date and regex PHP parity — expand `strtotime()` relative formats and PCRE-compatible regex features/captures/backreferences (JSON parity now closed: see v0.8.x base + v0.20.x polish) - [x] JSON encoder optimization — folded `__rt_json_assoc_is_list_shape` into the main associative-array encoding walk. `__rt_json_encode_assoc` now emits a provisional object form, tracks whether keys remain `0..count-1` while iterating the hash once, and compacts the finished buffer in-place to `[...]` only for real list-shape payloads. Object-shape inputs still stay object form, and `JSON_FORCE_OBJECT` disables compaction. - [x] JSON decoder optimization — fused the `__rt_json_validate` pre-pass into `__rt_json_decode_mixed` for `json_decode()`. The wrapper now calls the checked structural decoder directly; the decoder trims the input once, validates scalar strings/numbers at the point where they are decoded, enforces depth around containers, records syntax/depth/UTF-16 errors internally, and returns null-on-error for the PHP-facing wrapper. `json_validate()` keeps the standalone RFC 8259 validator surface. diff --git a/docs/php/classes.md b/docs/php/classes.md index 1548523d..4f6be8b9 100644 --- a/docs/php/classes.md +++ b/docs/php/classes.md @@ -173,7 +173,7 @@ class User { } ``` -Property type declarations are checked at compile time for both instance and static properties. Defaults and later assignments must be compatible with the declared type, including constructor assignments through untyped parameters. Nullable shorthand (`?T`) and union storage use the compiler's boxed mixed representation internally. `void` and `callable` property types are rejected. +Property type declarations are checked at compile time for both instance and static properties. Defaults and later assignments must be compatible with the declared type, including constructor assignments through untyped parameters. Typed properties without an explicit default start in PHP's uninitialized state; reading an instance or static property before the first assignment is a fatal runtime error, while assigning values such as `0`, `false`, `""`, or `null` to compatible nullable storage initializes the slot normally. Nullable shorthand (`?T`) and union storage use the compiler's boxed mixed representation internally. `void` and `callable` property types are rejected. ### Property redeclaration @@ -220,7 +220,7 @@ Counter::$count = 5; echo Counter::bump(); // 6 ``` -Supported receivers are `ClassName::$prop`, `self::$prop`, `parent::$prop`, and `static::$prop`. Static property visibility and declared types are checked at compile time. Inherited static properties share the declaring class storage until a subclass redeclares the property. Redeclarations follow PHP rules: non-private inherited properties keep invariant declared types, cannot reduce visibility, and cannot override `final` properties. Private static properties redeclared in subclasses are independent slots; `static::$prop` is still late-bound and reports a fatal runtime error if the current method scope cannot access the matched private slot. +Supported receivers are `ClassName::$prop`, `self::$prop`, `parent::$prop`, and `static::$prop`. Static property visibility and declared types are checked at compile time. Typed static properties without defaults use the same uninitialized-read fatal as typed instance properties. Inherited static properties share the declaring class storage until a subclass redeclares the property. Redeclarations follow PHP rules: non-private inherited properties keep invariant declared types, cannot reduce visibility, and cannot override `final` properties. Private static properties redeclared in subclasses are independent slots; `static::$prop` is still late-bound and reports a fatal runtime error if the current method scope cannot access the matched private slot. Static properties in elephc, like in PHP, are always mutable — even on a `readonly class`. PHP's `readonly` modifier only constrains instance properties; declaring `public readonly static` is a compile error in both PHP and elephc. diff --git a/docs/php/types.md b/docs/php/types.md index 8d950d9c..8d28fc82 100644 --- a/docs/php/types.md +++ b/docs/php/types.md @@ -161,10 +161,9 @@ Aliases: `(integer)`, `(double)`, `(real)`, `(boolean)`. ### Known incompatibilities with PHP - `$argv[0]` returns the compiled binary path, not the `.php` file path. -- Integer overflow wraps instead of promoting to float. -- Loose comparison (`==`) between different types coerces both sides to integer. +- Non-folded runtime integer overflow wraps instead of promoting to float; constant-folded integer overflow promotes to float. +- Non-folded runtime loose comparison (`==`) between different types coerces both sides to integer; constant-folded scalar literals use broader PHP-style bool/null/numeric-string rules. - `??=` is checked against typed assignment storage for variables, object properties, static properties, and non-append array elements. For concrete local variable types, the fallback must keep the same type or be a literal `null`. -- elephc does not model PHP's uninitialized typed-property state; property slots without explicit defaults start from the compiler's existing zero/null-like object-slot initialization until assigned. - Plain array numeric casts (`(int)$array`, `(float)$array`) follow elephc's existing array cast semantics (return the element count rather than PHP's `0`/`1`). Direct `iterable` numeric casts use PHP's empty/non-empty `0`/`1` semantics. - `FiberError` is currently modeled as an `Exception` subclass in elephc; PHP models `FiberError` under `Error`. From 34a0156ab0efc243b759ba8274ddd7db5e1ab784 Mon Sep 17 00:00:00 2001 From: Vincenzo Petrucci Date: Sat, 16 May 2026 14:30:17 +0200 Subject: [PATCH 04/10] fix: promote runtime integer overflow --- src/codegen/expr/binops/arithmetic.rs | 88 +++++- src/codegen/expr/helpers.rs | 19 +- .../runtime/arrays/mixed_numeric_binops.rs | 258 ++++++++++++++++++ src/codegen/runtime/arrays/mod.rs | 2 + src/codegen/runtime/emitters.rs | 1 + src/codegen/runtime/x86_minimal.rs | 1 + tests/codegen/operators.rs | 24 ++ 7 files changed, 387 insertions(+), 6 deletions(-) create mode 100644 src/codegen/runtime/arrays/mixed_numeric_binops.rs diff --git a/src/codegen/expr/binops/arithmetic.rs b/src/codegen/expr/binops/arithmetic.rs index 0220af71..01d73a3e 100644 --- a/src/codegen/expr/binops/arithmetic.rs +++ b/src/codegen/expr/binops/arithmetic.rs @@ -134,17 +134,31 @@ pub(super) fn emit_numeric_binop( data: &mut DataSection, ) -> PhpType { let left_ty = emit_expr(left, emitter, ctx, data); - coerce_numeric_mixed_to_int(emitter, &left_ty); - let use_float = left_ty == PhpType::Float; + let dynamic_candidate = matches!(op, BinOp::Add | BinOp::Sub | BinOp::Mul); + let left_stack_ty = if dynamic_candidate && left_ty == PhpType::Void { + coerce_null_to_zero(emitter, &left_ty); + PhpType::Int + } else if dynamic_candidate { + left_ty.clone() + } else { + coerce_numeric_mixed_to_int(emitter, &left_ty); + left_ty.clone() + }; + let use_float = left_stack_ty == PhpType::Float; if use_float { abi::emit_push_float_reg(emitter, abi::float_result_reg(emitter)); } else { - abi::emit_push_reg(emitter, abi::int_result_reg(emitter)); + abi::emit_push_result_value(emitter, &left_stack_ty); } let right_ty = emit_expr(right, emitter, ctx, data); + + if should_emit_mixed_numeric_binop(op, &left_stack_ty, &right_ty) { + return emit_mixed_numeric_binop(op, &left_stack_ty, &right_ty, emitter); + } + coerce_numeric_mixed_to_int(emitter, &right_ty); - if left_ty == PhpType::Float || right_ty == PhpType::Float || *op == BinOp::Div { + if left_stack_ty == PhpType::Float || right_ty == PhpType::Float || *op == BinOp::Div { if right_ty != PhpType::Float { emit_promote_int_to_float( emitter, @@ -153,7 +167,7 @@ pub(super) fn emit_numeric_binop( ); } abi::emit_push_float_reg(emitter, abi::float_result_reg(emitter)); - if left_ty == PhpType::Float { + if left_stack_ty == PhpType::Float { let left_float_reg = match emitter.target.arch { Arch::AArch64 => "d1", Arch::X86_64 => "xmm1", @@ -211,6 +225,70 @@ pub(super) fn emit_numeric_binop( } } +fn should_emit_mixed_numeric_binop(op: &BinOp, left_ty: &PhpType, right_ty: &PhpType) -> bool { + if !matches!(op, BinOp::Add | BinOp::Sub | BinOp::Mul) { + return false; + } + if matches!(left_ty, PhpType::Mixed | PhpType::Union(_)) + || matches!(right_ty, PhpType::Mixed | PhpType::Union(_)) + { + return true; + } + is_integerish_numeric(left_ty) && is_integerish_numeric(right_ty) +} + +fn is_integerish_numeric(ty: &PhpType) -> bool { + matches!(ty, PhpType::Int | PhpType::Bool | PhpType::Void) +} + +fn emit_mixed_numeric_binop( + op: &BinOp, + left_stack_ty: &PhpType, + right_ty: &PhpType, + emitter: &mut Emitter, +) -> PhpType { + if !matches!(right_ty, PhpType::Mixed | PhpType::Union(_)) { + crate::codegen::emit_box_current_value_as_mixed(emitter, right_ty); + } + abi::emit_push_reg(emitter, abi::int_result_reg(emitter)); + pop_saved_numeric_operand(emitter, left_stack_ty); + if !matches!(left_stack_ty, PhpType::Mixed | PhpType::Union(_)) { + crate::codegen::emit_box_current_value_as_mixed(emitter, left_stack_ty); + } + match emitter.target.arch { + Arch::AArch64 => { + abi::emit_pop_reg(emitter, "x1"); + } + Arch::X86_64 => { + abi::emit_pop_reg(emitter, "rdi"); + } + } + let helper = match op { + BinOp::Add => "__rt_mixed_numeric_add", + BinOp::Sub => "__rt_mixed_numeric_sub", + BinOp::Mul => "__rt_mixed_numeric_mul", + _ => unreachable!(), + }; + abi::emit_call_label(emitter, helper); + PhpType::Mixed +} + +fn pop_saved_numeric_operand(emitter: &mut Emitter, ty: &PhpType) { + match ty.codegen_repr() { + PhpType::Float => { + abi::emit_pop_float_reg(emitter, abi::float_result_reg(emitter)); + } + PhpType::Str => { + let (ptr_reg, len_reg) = abi::string_result_regs(emitter); + abi::emit_pop_reg_pair(emitter, ptr_reg, len_reg); + } + PhpType::Void | PhpType::Never => {} + _ => { + abi::emit_pop_reg(emitter, abi::int_result_reg(emitter)); + } + } +} + pub(super) fn emit_concat_binop( left: &Expr, right: &Expr, diff --git a/src/codegen/expr/helpers.rs b/src/codegen/expr/helpers.rs index 8723b9c0..d4445f47 100644 --- a/src/codegen/expr/helpers.rs +++ b/src/codegen/expr/helpers.rs @@ -53,7 +53,24 @@ pub(crate) fn coerce_result_to_type( if source_ty == target_ty { return; } - if matches!(target_ty, PhpType::Mixed | PhpType::Union(_)) { + if matches!(source_ty, PhpType::Mixed | PhpType::Union(_)) { + match target_ty.codegen_repr() { + PhpType::Int | PhpType::Resource(_) => { + crate::codegen::abi::emit_call_label(emitter, "__rt_mixed_cast_int"); + } + PhpType::Bool => { + crate::codegen::abi::emit_call_label(emitter, "__rt_mixed_cast_bool"); + } + PhpType::Float => { + crate::codegen::abi::emit_call_label(emitter, "__rt_mixed_cast_float"); + } + PhpType::Str => { + super::coerce_to_string(emitter, ctx, data, source_ty); + } + PhpType::Mixed | PhpType::Union(_) => {} + _ => {} + } + } else if matches!(target_ty, PhpType::Mixed | PhpType::Union(_)) { crate::codegen::emit_box_current_value_as_mixed(emitter, source_ty); } else if *target_ty == PhpType::Str { super::coerce_to_string(emitter, ctx, data, source_ty); diff --git a/src/codegen/runtime/arrays/mixed_numeric_binops.rs b/src/codegen/runtime/arrays/mixed_numeric_binops.rs new file mode 100644 index 00000000..44e93d6b --- /dev/null +++ b/src/codegen/runtime/arrays/mixed_numeric_binops.rs @@ -0,0 +1,258 @@ +//! Purpose: +//! Emits runtime helpers for arithmetic on boxed Mixed numeric values. +//! Centralizes PHP integer-overflow promotion for dynamic int|float results. +//! +//! Called from: +//! - `crate::codegen::runtime::emitters::emit_runtime()` via `crate::codegen::runtime::arrays`. +//! +//! Key details: +//! - Helpers return a boxed Mixed cell so callers can observe either integer or double at runtime. + +use crate::codegen::emit::Emitter; +use crate::codegen::{abi, platform::Arch}; + +/// mixed_numeric_binops: add/sub/mul boxed numeric payloads and return Mixed. +/// Input: AArch64 x0=left Mixed*, x1=right Mixed* +/// x86_64 rax=left Mixed*, rdi=right Mixed* +/// Output: boxed Mixed pointer in the integer result register +pub fn emit_mixed_numeric_binops(emitter: &mut Emitter) { + if emitter.target.arch == Arch::X86_64 { + emit_mixed_numeric_binops_linux_x86_64(emitter); + return; + } + + emitter.blank(); + emitter.comment("--- runtime: mixed_numeric_binops ---"); + + emit_aarch64_entry(emitter, "__rt_mixed_numeric_add", 0); + emit_aarch64_entry(emitter, "__rt_mixed_numeric_sub", 1); + emit_aarch64_entry(emitter, "__rt_mixed_numeric_mul", 2); + + emitter.label("__rt_mixed_numeric_common"); + emitter.instruction("str x0, [sp, #0]"); // save the boxed left operand pointer for unboxing and casts + emitter.instruction("str x1, [sp, #8]"); // save the boxed right operand pointer for unboxing and casts + emitter.instruction("str x9, [sp, #16]"); // save the selected arithmetic opcode across helper calls + + // -- classify operands so float payloads force floating-point arithmetic -- + emitter.instruction("bl __rt_mixed_unbox"); // inspect the left boxed payload tag and value words + emitter.instruction("str x0, [sp, #24]"); // save the left runtime value tag for numeric dispatch + emitter.instruction("ldr x0, [sp, #8]"); // load the boxed right operand pointer for unboxing + emitter.instruction("bl __rt_mixed_unbox"); // inspect the right boxed payload tag and value words + emitter.instruction("str x0, [sp, #32]"); // save the right runtime value tag for numeric dispatch + emitter.instruction("ldr x9, [sp, #24]"); // reload the left runtime value tag + emitter.instruction("cmp x9, #2"); // does the left operand hold a double payload? + emitter.instruction("b.eq __rt_mixed_numeric_float_path"); // any double payload makes the whole operation double-valued + emitter.instruction("ldr x9, [sp, #32]"); // reload the right runtime value tag + emitter.instruction("cmp x9, #2"); // does the right operand hold a double payload? + emitter.instruction("b.eq __rt_mixed_numeric_float_path"); // any double payload makes the whole operation double-valued + + // -- integer path with PHP overflow promotion -- + emitter.instruction("ldr x0, [sp, #0]"); // reload the boxed left operand before casting to integer + emitter.instruction("bl __rt_mixed_cast_int"); // coerce the left operand using the current integer numeric rules + emitter.instruction("str x0, [sp, #40]"); // save the left integer payload across the right cast + emitter.instruction("ldr x0, [sp, #8]"); // reload the boxed right operand before casting to integer + emitter.instruction("bl __rt_mixed_cast_int"); // coerce the right operand using the current integer numeric rules + emitter.instruction("mov x2, x0"); // keep the right integer operand in x2 for arithmetic and overflow fallback + emitter.instruction("ldr x1, [sp, #40]"); // reload the left integer operand into x1 + emitter.instruction("ldr x9, [sp, #16]"); // reload the selected arithmetic opcode + emitter.instruction("cmp x9, #1"); // is this helper handling subtraction? + emitter.instruction("b.eq __rt_mixed_numeric_int_sub"); // branch to the subtraction overflow sequence + emitter.instruction("cmp x9, #2"); // is this helper handling multiplication? + emitter.instruction("b.eq __rt_mixed_numeric_int_mul"); // branch to the multiplication overflow sequence + + emitter.label("__rt_mixed_numeric_int_add"); + emitter.instruction("adds x0, x1, x2"); // compute integer addition and set overflow flags + emitter.instruction("b.vs __rt_mixed_numeric_int_overflow"); // promote to double when signed addition overflowed + emitter.instruction("b __rt_mixed_numeric_box_int"); // box the in-range integer result + + emitter.label("__rt_mixed_numeric_int_sub"); + emitter.instruction("subs x0, x1, x2"); // compute integer subtraction and set overflow flags + emitter.instruction("b.vs __rt_mixed_numeric_int_overflow"); // promote to double when signed subtraction overflowed + emitter.instruction("b __rt_mixed_numeric_box_int"); // box the in-range integer result + + emitter.label("__rt_mixed_numeric_int_mul"); + emitter.instruction("mul x0, x1, x2"); // compute the low half of the signed integer product + emitter.instruction("smulh x3, x1, x2"); // compute the high half needed for overflow detection + emitter.instruction("cmp x3, x0, asr #63"); // high half must equal the sign extension of the low half + emitter.instruction("b.ne __rt_mixed_numeric_int_overflow"); // promote to double when signed multiplication overflowed + + emitter.label("__rt_mixed_numeric_box_int"); + emitter.instruction("mov x1, x0"); // move the integer result into the Mixed helper payload register + emitter.instruction("mov x2, xzr"); // integer payloads do not use a high word + emitter.instruction("mov x0, #0"); // runtime tag 0 = integer + emitter.instruction("bl __rt_mixed_from_value"); // box the integer result into a Mixed cell + emitter.instruction("b __rt_mixed_numeric_done"); // restore the helper frame and return the boxed result + + emitter.label("__rt_mixed_numeric_int_overflow"); + emitter.instruction("scvtf d0, x1"); // convert the original left integer to double for PHP overflow promotion + emitter.instruction("scvtf d1, x2"); // convert the original right integer to double for PHP overflow promotion + emitter.instruction("ldr x9, [sp, #16]"); // reload the selected arithmetic opcode for the double fallback + emitter.instruction("cmp x9, #1"); // is this overflow fallback for subtraction? + emitter.instruction("b.eq __rt_mixed_numeric_float_sub_loaded"); // use floating-point subtraction for an overflowing integer subtraction + emitter.instruction("cmp x9, #2"); // is this overflow fallback for multiplication? + emitter.instruction("b.eq __rt_mixed_numeric_float_mul_loaded"); // use floating-point multiplication for an overflowing integer multiplication + emitter.instruction("b __rt_mixed_numeric_float_add_loaded"); // use floating-point addition for an overflowing integer addition + + // -- float path: cast both operands to double, then box the double result -- + emitter.label("__rt_mixed_numeric_float_path"); + emitter.instruction("ldr x0, [sp, #0]"); // reload the boxed left operand before casting to double + emitter.instruction("bl __rt_mixed_cast_float"); // coerce the left operand to double + emitter.instruction("str d0, [sp, #48]"); // save the left double across the right cast + emitter.instruction("ldr x0, [sp, #8]"); // reload the boxed right operand before casting to double + emitter.instruction("bl __rt_mixed_cast_float"); // coerce the right operand to double + emitter.instruction("fmov d1, d0"); // keep the right double operand in d1 + emitter.instruction("ldr d0, [sp, #48]"); // reload the left double operand into d0 + emitter.instruction("ldr x9, [sp, #16]"); // reload the selected arithmetic opcode for double arithmetic + emitter.instruction("cmp x9, #1"); // is this helper handling subtraction? + emitter.instruction("b.eq __rt_mixed_numeric_float_sub_loaded"); // branch to the floating-point subtraction sequence + emitter.instruction("cmp x9, #2"); // is this helper handling multiplication? + emitter.instruction("b.eq __rt_mixed_numeric_float_mul_loaded"); // branch to the floating-point multiplication sequence + + emitter.label("__rt_mixed_numeric_float_add_loaded"); + emitter.instruction("fadd d0, d0, d1"); // compute the double addition result + emitter.instruction("b __rt_mixed_numeric_box_float"); // box the double result + + emitter.label("__rt_mixed_numeric_float_sub_loaded"); + emitter.instruction("fsub d0, d0, d1"); // compute the double subtraction result + emitter.instruction("b __rt_mixed_numeric_box_float"); // box the double result + + emitter.label("__rt_mixed_numeric_float_mul_loaded"); + emitter.instruction("fmul d0, d0, d1"); // compute the double multiplication result + + emitter.label("__rt_mixed_numeric_box_float"); + emitter.instruction("fmov x1, d0"); // move the double bits into the Mixed helper payload register + emitter.instruction("mov x2, xzr"); // double payloads do not use a high word + emitter.instruction("mov x0, #2"); // runtime tag 2 = double + emitter.instruction("bl __rt_mixed_from_value"); // box the double result into a Mixed cell + + emitter.label("__rt_mixed_numeric_done"); + emitter.instruction("ldp x29, x30, [sp, #64]"); // restore frame pointer and return address + emitter.instruction("add sp, sp, #80"); // release the helper stack frame + emitter.instruction("ret"); // return to generated code with boxed Mixed result in x0 +} + +fn emit_aarch64_entry(emitter: &mut Emitter, label: &str, opcode: i64) { + emitter.label_global(label); + emitter.instruction("sub sp, sp, #80"); // allocate a helper frame for operands, tags, and saved FP state + emitter.instruction("stp x29, x30, [sp, #64]"); // save frame pointer and return address + emitter.instruction("add x29, sp, #64"); // establish a stable helper frame pointer + abi::emit_load_int_immediate(emitter, "x9", opcode); + emitter.instruction("b __rt_mixed_numeric_common"); // enter the shared mixed numeric implementation +} + +fn emit_mixed_numeric_binops_linux_x86_64(emitter: &mut Emitter) { + emitter.blank(); + emitter.comment("--- runtime: mixed_numeric_binops ---"); + + emit_x86_64_entry(emitter, "__rt_mixed_numeric_add", 0); + emit_x86_64_entry(emitter, "__rt_mixed_numeric_sub", 1); + emit_x86_64_entry(emitter, "__rt_mixed_numeric_mul", 2); + + emitter.label("__rt_mixed_numeric_common_linux_x86_64"); + emitter.instruction("mov QWORD PTR [rbp - 8], rax"); // save the boxed left operand pointer for unboxing and casts + emitter.instruction("mov QWORD PTR [rbp - 16], rdi"); // save the boxed right operand pointer for unboxing and casts + emitter.instruction("mov QWORD PTR [rbp - 24], r10"); // save the selected arithmetic opcode across helper calls + + // -- classify operands so float payloads force floating-point arithmetic -- + emitter.instruction("call __rt_mixed_unbox"); // inspect the left boxed payload tag and value words + emitter.instruction("mov QWORD PTR [rbp - 32], rax"); // save the left runtime value tag for numeric dispatch + emitter.instruction("mov rax, QWORD PTR [rbp - 16]"); // load the boxed right operand pointer for unboxing + emitter.instruction("call __rt_mixed_unbox"); // inspect the right boxed payload tag and value words + emitter.instruction("mov QWORD PTR [rbp - 40], rax"); // save the right runtime value tag for numeric dispatch + emitter.instruction("cmp QWORD PTR [rbp - 32], 2"); // does the left operand hold a double payload? + emitter.instruction("je __rt_mixed_numeric_float_path_linux_x86_64"); // any double payload makes the whole operation double-valued + emitter.instruction("cmp QWORD PTR [rbp - 40], 2"); // does the right operand hold a double payload? + emitter.instruction("je __rt_mixed_numeric_float_path_linux_x86_64"); // any double payload makes the whole operation double-valued + + // -- integer path with PHP overflow promotion -- + emitter.instruction("mov rax, QWORD PTR [rbp - 8]"); // reload the boxed left operand before casting to integer + emitter.instruction("call __rt_mixed_cast_int"); // coerce the left operand using the current integer numeric rules + emitter.instruction("mov QWORD PTR [rbp - 48], rax"); // save the left integer payload across the right cast + emitter.instruction("mov rax, QWORD PTR [rbp - 16]"); // reload the boxed right operand before casting to integer + emitter.instruction("call __rt_mixed_cast_int"); // coerce the right operand using the current integer numeric rules + emitter.instruction("mov r11, rax"); // keep the right integer operand in r11 + emitter.instruction("mov r10, QWORD PTR [rbp - 48]"); // reload the left integer operand into r10 + emitter.instruction("mov r8, r10"); // preserve the original left integer for overflow promotion + emitter.instruction("mov r9, r11"); // preserve the original right integer for overflow promotion + emitter.instruction("cmp QWORD PTR [rbp - 24], 1"); // is this helper handling subtraction? + emitter.instruction("je __rt_mixed_numeric_int_sub_linux_x86_64"); // branch to the subtraction overflow sequence + emitter.instruction("cmp QWORD PTR [rbp - 24], 2"); // is this helper handling multiplication? + emitter.instruction("je __rt_mixed_numeric_int_mul_linux_x86_64"); // branch to the multiplication overflow sequence + + emitter.label("__rt_mixed_numeric_int_add_linux_x86_64"); + emitter.instruction("add r10, r11"); // compute integer addition and set overflow flags + emitter.instruction("jo __rt_mixed_numeric_int_overflow_linux_x86_64"); // promote to double when signed addition overflowed + emitter.instruction("jmp __rt_mixed_numeric_box_int_linux_x86_64"); // box the in-range integer result + + emitter.label("__rt_mixed_numeric_int_sub_linux_x86_64"); + emitter.instruction("sub r10, r11"); // compute integer subtraction and set overflow flags + emitter.instruction("jo __rt_mixed_numeric_int_overflow_linux_x86_64"); // promote to double when signed subtraction overflowed + emitter.instruction("jmp __rt_mixed_numeric_box_int_linux_x86_64"); // box the in-range integer result + + emitter.label("__rt_mixed_numeric_int_mul_linux_x86_64"); + emitter.instruction("mov rax, r10"); // move the left operand into rax for one-operand signed multiply + emitter.instruction("imul r11"); // compute signed multiplication and set overflow flags + emitter.instruction("jo __rt_mixed_numeric_int_overflow_linux_x86_64"); // promote to double when signed multiplication overflowed + emitter.instruction("mov r10, rax"); // keep the in-range product in the integer result scratch + + emitter.label("__rt_mixed_numeric_box_int_linux_x86_64"); + emitter.instruction("mov rdi, r10"); // move the integer result into the Mixed helper payload register + emitter.instruction("xor rsi, rsi"); // integer payloads do not use a high word + emitter.instruction("mov rax, 0"); // runtime tag 0 = integer + emitter.instruction("call __rt_mixed_from_value"); // box the integer result into a Mixed cell + emitter.instruction("jmp __rt_mixed_numeric_done_linux_x86_64"); // restore the helper frame and return the boxed result + + emitter.label("__rt_mixed_numeric_int_overflow_linux_x86_64"); + emitter.instruction("cvtsi2sd xmm0, r8"); // convert the original left integer to double for PHP overflow promotion + emitter.instruction("cvtsi2sd xmm1, r9"); // convert the original right integer to double for PHP overflow promotion + emitter.instruction("cmp QWORD PTR [rbp - 24], 1"); // is this overflow fallback for subtraction? + emitter.instruction("je __rt_mixed_numeric_float_sub_loaded_linux_x86_64"); // use floating-point subtraction for an overflowing integer subtraction + emitter.instruction("cmp QWORD PTR [rbp - 24], 2"); // is this overflow fallback for multiplication? + emitter.instruction("je __rt_mixed_numeric_float_mul_loaded_linux_x86_64"); // use floating-point multiplication for an overflowing integer multiplication + emitter.instruction("jmp __rt_mixed_numeric_float_add_loaded_linux_x86_64"); // use floating-point addition for an overflowing integer addition + + // -- float path: cast both operands to double, then box the double result -- + emitter.label("__rt_mixed_numeric_float_path_linux_x86_64"); + emitter.instruction("mov rax, QWORD PTR [rbp - 8]"); // reload the boxed left operand before casting to double + emitter.instruction("call __rt_mixed_cast_float"); // coerce the left operand to double + emitter.instruction("movsd QWORD PTR [rbp - 56], xmm0"); // save the left double across the right cast + emitter.instruction("mov rax, QWORD PTR [rbp - 16]"); // reload the boxed right operand before casting to double + emitter.instruction("call __rt_mixed_cast_float"); // coerce the right operand to double + emitter.instruction("movapd xmm1, xmm0"); // keep the right double operand in xmm1 + emitter.instruction("movsd xmm0, QWORD PTR [rbp - 56]"); // reload the left double operand into xmm0 + emitter.instruction("cmp QWORD PTR [rbp - 24], 1"); // is this helper handling subtraction? + emitter.instruction("je __rt_mixed_numeric_float_sub_loaded_linux_x86_64"); // branch to the floating-point subtraction sequence + emitter.instruction("cmp QWORD PTR [rbp - 24], 2"); // is this helper handling multiplication? + emitter.instruction("je __rt_mixed_numeric_float_mul_loaded_linux_x86_64"); // branch to the floating-point multiplication sequence + + emitter.label("__rt_mixed_numeric_float_add_loaded_linux_x86_64"); + emitter.instruction("addsd xmm0, xmm1"); // compute the double addition result + emitter.instruction("jmp __rt_mixed_numeric_box_float_linux_x86_64"); // box the double result + + emitter.label("__rt_mixed_numeric_float_sub_loaded_linux_x86_64"); + emitter.instruction("subsd xmm0, xmm1"); // compute the double subtraction result + emitter.instruction("jmp __rt_mixed_numeric_box_float_linux_x86_64"); // box the double result + + emitter.label("__rt_mixed_numeric_float_mul_loaded_linux_x86_64"); + emitter.instruction("mulsd xmm0, xmm1"); // compute the double multiplication result + + emitter.label("__rt_mixed_numeric_box_float_linux_x86_64"); + emitter.instruction("movq rdi, xmm0"); // move the double bits into the Mixed helper payload register + emitter.instruction("xor rsi, rsi"); // double payloads do not use a high word + emitter.instruction("mov rax, 2"); // runtime tag 2 = double + emitter.instruction("call __rt_mixed_from_value"); // box the double result into a Mixed cell + + emitter.label("__rt_mixed_numeric_done_linux_x86_64"); + emitter.instruction("add rsp, 80"); // release the helper stack frame + emitter.instruction("pop rbp"); // restore the caller frame pointer + emitter.instruction("ret"); // return to generated code with boxed Mixed result in rax +} + +fn emit_x86_64_entry(emitter: &mut Emitter, label: &str, opcode: i64) { + emitter.label_global(label); + emitter.instruction("push rbp"); // save the caller frame pointer before nested runtime calls + emitter.instruction("mov rbp, rsp"); // establish a stable helper frame pointer + emitter.instruction("sub rsp, 80"); // allocate aligned helper slots for operands, tags, and FP state + abi::emit_load_int_immediate(emitter, "r10", opcode); + emitter.instruction("jmp __rt_mixed_numeric_common_linux_x86_64"); // enter the shared mixed numeric implementation +} diff --git a/src/codegen/runtime/arrays/mod.rs b/src/codegen/runtime/arrays/mod.rs index 9df2e82d..05df8916 100644 --- a/src/codegen/runtime/arrays/mod.rs +++ b/src/codegen/runtime/arrays/mod.rs @@ -118,6 +118,7 @@ mod mixed_cast_string; mod mixed_free_deep; mod mixed_count; mod mixed_is_empty; +mod mixed_numeric_binops; mod mixed_strict_eq; mod mixed_unbox; mod mixed_write_stdout; @@ -233,6 +234,7 @@ pub use mixed_cast_string::emit_mixed_cast_string; pub use mixed_count::emit_mixed_count; pub use mixed_free_deep::emit_mixed_free_deep; pub use mixed_is_empty::emit_mixed_is_empty; +pub use mixed_numeric_binops::emit_mixed_numeric_binops; pub use mixed_strict_eq::emit_mixed_strict_eq; pub use mixed_unbox::emit_mixed_unbox; pub use mixed_write_stdout::emit_mixed_write_stdout; diff --git a/src/codegen/runtime/emitters.rs b/src/codegen/runtime/emitters.rs index 6e1933a9..09ae7ffd 100644 --- a/src/codegen/runtime/emitters.rs +++ b/src/codegen/runtime/emitters.rs @@ -244,6 +244,7 @@ pub(crate) fn emit_runtime(emitter: &mut Emitter) { arrays::emit_mixed_count(emitter); arrays::emit_mixed_free_deep(emitter); arrays::emit_mixed_is_empty(emitter); + arrays::emit_mixed_numeric_binops(emitter); arrays::emit_mixed_strict_eq(emitter); arrays::emit_mixed_unbox(emitter); arrays::emit_mixed_write_stdout(emitter); diff --git a/src/codegen/runtime/x86_minimal.rs b/src/codegen/runtime/x86_minimal.rs index a22e0eb3..287d6e7d 100644 --- a/src/codegen/runtime/x86_minimal.rs +++ b/src/codegen/runtime/x86_minimal.rs @@ -203,6 +203,7 @@ pub(super) fn emit_runtime_linux_x86_64_minimal(emitter: &mut Emitter) { arrays::emit_mixed_free_deep(emitter); arrays::emit_mixed_unbox(emitter); arrays::emit_mixed_cast_string(emitter); + arrays::emit_mixed_numeric_binops(emitter); arrays::emit_mixed_write_stdout(emitter); arrays::emit_mixed_strict_eq(emitter); arrays::emit_iterable_unsupported_kind(emitter); diff --git a/tests/codegen/operators.rs b/tests/codegen/operators.rs index f291be23..0a1ac2b1 100644 --- a/tests/codegen/operators.rs +++ b/tests/codegen/operators.rs @@ -89,6 +89,30 @@ fn test_constant_int_multiply_overflow_promotes_to_float() { assert_eq!(out, "double"); } +#[test] +fn test_runtime_int_add_overflow_promotes_to_float() { + let out = compile_and_run(" Date: Sat, 16 May 2026 14:38:38 +0200 Subject: [PATCH 05/10] fix: broaden runtime loose comparisons --- src/codegen/expr/binops/comparison.rs | 311 ++++++++++++++++++- src/codegen/runtime/emitters.rs | 2 + src/codegen/runtime/strings/mod.rs | 4 + src/codegen/runtime/strings/str_loose_eq.rs | 99 ++++++ src/codegen/runtime/strings/str_to_number.rs | 100 ++++++ src/codegen/runtime/x86_minimal.rs | 2 + tests/codegen/operators.rs | 36 +++ 7 files changed, 553 insertions(+), 1 deletion(-) create mode 100644 src/codegen/runtime/strings/str_loose_eq.rs create mode 100644 src/codegen/runtime/strings/str_to_number.rs diff --git a/src/codegen/expr/binops/comparison.rs b/src/codegen/expr/binops/comparison.rs index 2d141666..2b3b8cae 100644 --- a/src/codegen/expr/binops/comparison.rs +++ b/src/codegen/expr/binops/comparison.rs @@ -16,7 +16,7 @@ use super::target::{ emit_float_compare, emit_pop_left_float_for_comparison, emit_promote_int_to_float, emit_set_bool_from_flags, emit_set_float_bool_from_flags, }; -use super::super::{coerce_null_to_zero, emit_expr, BinOp, Expr, PhpType}; +use super::super::{coerce_null_to_zero, coerce_to_truthiness, emit_expr, BinOp, Expr, PhpType}; /// PHP loose comparison coerces both sides to a common type. /// Simplified: coerce everything to int, then compare. @@ -44,6 +44,302 @@ fn coerce_to_int_for_loose_cmp(emitter: &mut Emitter, ty: &PhpType) { } } +fn emit_bool_left_loose_equality( + _left: &Expr, + op: &BinOp, + right: &Expr, + left_ty: &PhpType, + emitter: &mut Emitter, + ctx: &mut Context, + data: &mut DataSection, +) -> PhpType { + coerce_to_truthiness(emitter, ctx, left_ty); + abi::emit_push_reg(emitter, abi::int_result_reg(emitter)); + let right_ty = emit_expr(right, emitter, ctx, data); + coerce_to_truthiness(emitter, ctx, &right_ty); + compare_saved_truthiness_with_current(op, emitter); + PhpType::Bool +} + +fn emit_string_left_loose_equality( + op: &BinOp, + right: &Expr, + emitter: &mut Emitter, + ctx: &mut Context, + data: &mut DataSection, +) -> PhpType { + let (left_ptr, left_len) = abi::string_result_regs(emitter); + abi::emit_push_reg_pair(emitter, left_ptr, left_len); + let right_ty = emit_expr(right, emitter, ctx, data); + match right_ty { + PhpType::Bool => { + coerce_to_truthiness(emitter, ctx, &right_ty); + abi::emit_push_reg(emitter, abi::int_result_reg(emitter)); + load_saved_left_string(emitter, 16); + coerce_to_truthiness(emitter, ctx, &PhpType::Str); + compare_saved_right_truthiness_with_current_left(op, emitter); + abi::emit_release_temporary_stack(emitter, 16); + } + PhpType::Void => { + pop_saved_left_string(emitter); + emit_compare_current_string_length_to_zero(op, emitter); + } + PhpType::Str => { + call_str_loose_eq_with_saved_left(op, emitter); + } + PhpType::Int | PhpType::Float => { + push_current_number_as_float(emitter, &right_ty); + load_saved_left_string(emitter, 16); + abi::emit_call_label(emitter, "__rt_str_to_number"); + compare_parsed_string_with_saved_float(op, emitter, ctx); + abi::emit_release_temporary_stack(emitter, 16); + } + _ => { + pop_saved_left_string(emitter); + emit_set_loose_bool_literal(op, false, emitter); + } + } + PhpType::Bool +} + +fn emit_bool_right_loose_equality( + op: &BinOp, + left_ty: &PhpType, + emitter: &mut Emitter, + ctx: &mut Context, +) -> PhpType { + coerce_to_truthiness(emitter, ctx, &PhpType::Bool); + abi::emit_push_reg(emitter, abi::int_result_reg(emitter)); + pop_saved_left_for_truthiness(emitter, left_ty); + coerce_to_truthiness(emitter, ctx, left_ty); + compare_saved_right_truthiness_with_current_left(op, emitter); + PhpType::Bool +} + +fn emit_right_string_loose_equality( + op: &BinOp, + left_ty: &PhpType, + emitter: &mut Emitter, + ctx: &mut Context, +) -> PhpType { + if *left_ty == PhpType::Void { + discard_saved_left_numeric(emitter, left_ty); + emit_compare_current_string_length_to_zero(op, emitter); + } else if matches!(left_ty, PhpType::Int | PhpType::Float) { + abi::emit_call_label(emitter, "__rt_str_to_number"); + compare_parsed_string_with_saved_left_number(op, left_ty, emitter, ctx); + } else { + discard_saved_left_numeric(emitter, left_ty); + emit_set_loose_bool_literal(op, false, emitter); + } + PhpType::Bool +} + +fn compare_saved_truthiness_with_current(op: &BinOp, emitter: &mut Emitter) { + let left_reg = match emitter.target.arch { + Arch::AArch64 => "x1", + Arch::X86_64 => "r10", + }; + abi::emit_pop_reg(emitter, left_reg); + emitter.instruction(&format!("cmp {}, {}", left_reg, abi::int_result_reg(emitter))); // compare left truthiness against right truthiness + emit_set_bool_from_flags(emitter, loose_equality_condition(op)); +} + +fn compare_saved_right_truthiness_with_current_left(op: &BinOp, emitter: &mut Emitter) { + let right_reg = match emitter.target.arch { + Arch::AArch64 => "x1", + Arch::X86_64 => "r10", + }; + abi::emit_pop_reg(emitter, right_reg); + emitter.instruction(&format!("cmp {}, {}", abi::int_result_reg(emitter), right_reg)); // compare left truthiness against right truthiness + emit_set_bool_from_flags(emitter, loose_equality_condition(op)); +} + +fn call_str_loose_eq_with_saved_left(op: &BinOp, emitter: &mut Emitter) { + match emitter.target.arch { + Arch::AArch64 => { + emitter.instruction("mov x3, x1"); // move the right string pointer into the loose string helper argument + emitter.instruction("mov x4, x2"); // move the right string length into the loose string helper argument + abi::emit_pop_reg_pair(emitter, "x1", "x2"); + } + Arch::X86_64 => { + emitter.instruction("mov r10, rax"); // preserve the right string pointer while arranging helper arguments + emitter.instruction("mov rcx, rdx"); // move the right string length into the fourth helper argument + abi::emit_pop_reg_pair(emitter, "rdi", "rsi"); + emitter.instruction("mov rdx, r10"); // move the right string pointer into the third helper argument + } + } + abi::emit_call_label(emitter, "__rt_str_loose_eq"); + invert_loose_bool_if_needed(op, emitter); +} + +fn load_saved_left_string(emitter: &mut Emitter, offset: usize) { + let (ptr_reg, len_reg) = abi::string_result_regs(emitter); + abi::emit_load_temporary_stack_slot(emitter, ptr_reg, offset); + abi::emit_load_temporary_stack_slot(emitter, len_reg, offset + 8); +} + +fn pop_saved_left_string(emitter: &mut Emitter) { + let (ptr_reg, len_reg) = abi::string_result_regs(emitter); + abi::emit_pop_reg_pair(emitter, ptr_reg, len_reg); +} + +fn emit_compare_current_string_length_to_zero(op: &BinOp, emitter: &mut Emitter) { + let (_, len_reg) = abi::string_result_regs(emitter); + match emitter.target.arch { + Arch::AArch64 => { + emitter.instruction(&format!("cmp {}, #0", len_reg)); // compare string length against the empty string for null loose equality + } + Arch::X86_64 => { + emitter.instruction(&format!("cmp {}, 0", len_reg)); // compare string length against the empty string for null loose equality + } + } + emit_set_bool_from_flags(emitter, loose_equality_condition(op)); +} + +fn push_current_number_as_float(emitter: &mut Emitter, ty: &PhpType) { + if *ty != PhpType::Float { + emit_promote_int_to_float( + emitter, + abi::float_result_reg(emitter), + abi::int_result_reg(emitter), + ); + } + abi::emit_push_float_reg(emitter, abi::float_result_reg(emitter)); +} + +fn compare_parsed_string_with_saved_float( + op: &BinOp, + emitter: &mut Emitter, + ctx: &mut Context, +) { + let false_label = ctx.next_label("loose_numeric_string_false"); + let done_label = ctx.next_label("loose_numeric_string_done"); + let saved_float_reg = match emitter.target.arch { + Arch::AArch64 => "d1", + Arch::X86_64 => "xmm1", + }; + abi::emit_pop_float_reg(emitter, saved_float_reg); + emit_branch_if_current_flag_false(emitter, &false_label); + emit_compare_saved_float_with_parsed_string(emitter); + emit_set_float_bool_from_flags(emitter, loose_equality_condition(op)); + abi::emit_jump(emitter, &done_label); // skip the non-numeric-string false branch + emitter.label(&false_label); + emit_set_loose_bool_literal(op, false, emitter); + emitter.label(&done_label); +} + +fn compare_parsed_string_with_saved_left_number( + op: &BinOp, + left_ty: &PhpType, + emitter: &mut Emitter, + ctx: &mut Context, +) { + let false_label = ctx.next_label("loose_numeric_string_false"); + let done_label = ctx.next_label("loose_numeric_string_done"); + let saved_float_reg = match emitter.target.arch { + Arch::AArch64 => "d1", + Arch::X86_64 => "xmm1", + }; + if *left_ty == PhpType::Float { + abi::emit_pop_float_reg(emitter, saved_float_reg); + } else { + let left_int_reg = match emitter.target.arch { + Arch::AArch64 => "x1", + Arch::X86_64 => "r10", + }; + abi::emit_pop_reg(emitter, left_int_reg); + emit_promote_int_to_float(emitter, saved_float_reg, left_int_reg); + } + emit_branch_if_current_flag_false(emitter, &false_label); + emit_compare_saved_float_with_parsed_string(emitter); + emit_set_float_bool_from_flags(emitter, loose_equality_condition(op)); + abi::emit_jump(emitter, &done_label); // skip the non-numeric-string false branch + emitter.label(&false_label); + emit_set_loose_bool_literal(op, false, emitter); + emitter.label(&done_label); +} + +fn emit_branch_if_current_flag_false(emitter: &mut Emitter, label: &str) { + match emitter.target.arch { + Arch::AArch64 => { + emitter.instruction("cmp x0, #0"); // test whether string-to-number parsing failed + emitter.instruction(&format!("b.eq {}", label)); // branch when the string was not numeric + } + Arch::X86_64 => { + emitter.instruction("test rax, rax"); // test whether string-to-number parsing failed + emitter.instruction(&format!("je {}", label)); // branch when the string was not numeric + } + } +} + +fn emit_compare_saved_float_with_parsed_string(emitter: &mut Emitter) { + match emitter.target.arch { + Arch::AArch64 => { + emitter.instruction("fcmp d1, d0"); // compare numeric operand against parsed numeric string + } + Arch::X86_64 => { + emitter.instruction("ucomisd xmm1, xmm0"); // compare numeric operand against parsed numeric string + } + } +} + +fn pop_saved_left_for_truthiness(emitter: &mut Emitter, left_ty: &PhpType) { + match left_ty { + PhpType::Float => { + abi::emit_pop_float_reg(emitter, abi::float_result_reg(emitter)); + } + PhpType::Str => { + pop_saved_left_string(emitter); + } + _ => { + abi::emit_pop_reg(emitter, abi::int_result_reg(emitter)); + } + } +} + +fn discard_saved_left_numeric(emitter: &mut Emitter, left_ty: &PhpType) { + if *left_ty == PhpType::Float { + abi::emit_pop_float_reg(emitter, abi::float_result_reg(emitter)); + } else { + abi::emit_pop_reg(emitter, abi::int_result_reg(emitter)); + } +} + +fn invert_loose_bool_if_needed(op: &BinOp, emitter: &mut Emitter) { + if *op == BinOp::NotEq { + match emitter.target.arch { + Arch::AArch64 => { + emitter.instruction("eor x0, x0, #1"); // invert normalized loose equality for != + } + Arch::X86_64 => { + emitter.instruction("xor rax, 1"); // invert normalized loose equality for != + } + } + } +} + +fn emit_set_loose_bool_literal(op: &BinOp, equality_value: bool, emitter: &mut Emitter) { + let result = match op { + BinOp::Eq => equality_value, + BinOp::NotEq => !equality_value, + _ => unreachable!(), + }; + abi::emit_load_int_immediate( + emitter, + abi::int_result_reg(emitter), + if result { 1 } else { 0 }, + ); +} + +fn loose_equality_condition(op: &BinOp) -> &'static str { + match op { + BinOp::Eq => "eq", + BinOp::NotEq => "ne", + _ => unreachable!(), + } +} + pub(super) fn emit_loose_equality_binop( left: &Expr, op: &BinOp, @@ -53,6 +349,12 @@ pub(super) fn emit_loose_equality_binop( data: &mut DataSection, ) -> PhpType { let left_ty = emit_expr(left, emitter, ctx, data); + if left_ty == PhpType::Bool { + return emit_bool_left_loose_equality(left, op, right, &left_ty, emitter, ctx, data); + } + if left_ty == PhpType::Str { + return emit_string_left_loose_equality(op, right, emitter, ctx, data); + } let left_numeric = matches!( left_ty, PhpType::Int | PhpType::Float | PhpType::Bool | PhpType::Void @@ -74,6 +376,13 @@ pub(super) fn emit_loose_equality_binop( ); coerce_null_to_zero(emitter, &right_ty); + if right_ty == PhpType::Bool && matches!(left_ty, PhpType::Int | PhpType::Float | PhpType::Void) { + return emit_bool_right_loose_equality(op, &left_ty, emitter, ctx); + } + if right_ty == PhpType::Str { + return emit_right_string_loose_equality(op, &left_ty, emitter, ctx); + } + if left_numeric && right_numeric && (left_ty == PhpType::Float || right_ty == PhpType::Float) { if right_ty != PhpType::Float { emit_promote_int_to_float( diff --git a/src/codegen/runtime/emitters.rs b/src/codegen/runtime/emitters.rs index 09ae7ffd..6937887d 100644 --- a/src/codegen/runtime/emitters.rs +++ b/src/codegen/runtime/emitters.rs @@ -40,6 +40,8 @@ pub(crate) fn emit_runtime(emitter: &mut Emitter) { strings::emit_concat(emitter); strings::emit_atoi(emitter); strings::emit_str_eq(emitter); + strings::emit_str_to_number(emitter); + strings::emit_str_loose_eq(emitter); strings::emit_number_format(emitter); strings::emit_strcopy(emitter); strings::emit_str_persist(emitter); diff --git a/src/codegen/runtime/strings/mod.rs b/src/codegen/runtime/strings/mod.rs index 7ad30f4b..1bc92149 100644 --- a/src/codegen/runtime/strings/mod.rs +++ b/src/codegen/runtime/strings/mod.rs @@ -12,6 +12,8 @@ mod itoa; mod concat; mod ftoa; mod str_eq; +mod str_loose_eq; +mod str_to_number; mod number_format; mod atoi; mod strcopy; @@ -68,6 +70,8 @@ pub use itoa::emit_itoa; pub use concat::emit_concat; pub use ftoa::emit_ftoa; pub use str_eq::emit_str_eq; +pub use str_loose_eq::emit_str_loose_eq; +pub use str_to_number::emit_str_to_number; pub use number_format::emit_number_format; pub use atoi::emit_atoi; pub use strcopy::emit_strcopy; diff --git a/src/codegen/runtime/strings/str_loose_eq.rs b/src/codegen/runtime/strings/str_loose_eq.rs new file mode 100644 index 00000000..6d18a329 --- /dev/null +++ b/src/codegen/runtime/strings/str_loose_eq.rs @@ -0,0 +1,99 @@ +//! Purpose: +//! Emits PHP loose equality for two runtime strings. +//! Numeric strings compare by numeric value; non-numeric strings compare byte-for-byte. +//! +//! Called from: +//! - `crate::codegen::runtime::emitters::emit_runtime()` via `crate::codegen::runtime::strings`. +//! +//! Key details: +//! - Both operands must be parsed before falling back to byte equality so numeric-looking strings follow PHP 8 rules. + +use crate::codegen::{abi, emit::Emitter, platform::Arch}; + +/// str_loose_eq: compare two PHP strings with loose-comparison semantics. +/// Input: AArch64 x1/x2=left, x3/x4=right +/// x86_64 rdi/rsi=left, rdx/rcx=right +/// Output: integer result register = 1 when loosely equal, 0 otherwise +pub fn emit_str_loose_eq(emitter: &mut Emitter) { + if emitter.target.arch == Arch::X86_64 { + emit_str_loose_eq_linux_x86_64(emitter); + return; + } + + emitter.blank(); + emitter.comment("--- runtime: str_loose_eq ---"); + emitter.label_global("__rt_str_loose_eq"); + + emitter.instruction("sub sp, sp, #80"); // allocate helper slots for both strings and parsed numeric state + emitter.instruction("stp x29, x30, [sp, #64]"); // save frame pointer and return address + emitter.instruction("add x29, sp, #64"); // establish a stable helper frame pointer + emitter.instruction("stp x1, x2, [sp, #0]"); // save the left string pointer and length + emitter.instruction("stp x3, x4, [sp, #16]"); // save the right string pointer and length + + emitter.instruction("bl __rt_str_to_number"); // parse the left string as a PHP numeric string + emitter.instruction("str x0, [sp, #32]"); // save whether the left string parsed as numeric + emitter.instruction("str d0, [sp, #40]"); // save the parsed left numeric value + emitter.instruction("ldp x1, x2, [sp, #16]"); // reload the right string into the parser input registers + emitter.instruction("bl __rt_str_to_number"); // parse the right string as a PHP numeric string + emitter.instruction("ldr x9, [sp, #32]"); // reload the left numeric-string flag + emitter.instruction("cbz x9, __rt_str_loose_eq_bytes"); // non-numeric left strings compare by bytes + emitter.instruction("cbz x0, __rt_str_loose_eq_bytes"); // non-numeric right strings compare by bytes + emitter.instruction("ldr d1, [sp, #40]"); // reload the parsed left numeric value + emitter.instruction("fcmp d1, d0"); // compare the numeric values for equality + emitter.instruction("cset x0, eq"); // produce true only when the parsed numeric values match + emitter.instruction("b __rt_str_loose_eq_done"); // skip the byte-comparison fallback + + emitter.label("__rt_str_loose_eq_bytes"); + emitter.instruction("ldp x1, x2, [sp, #0]"); // reload the left string pointer and length + emitter.instruction("ldp x3, x4, [sp, #16]"); // reload the right string pointer and length + emitter.instruction("bl __rt_str_eq"); // compare non-numeric strings byte-for-byte + + emitter.label("__rt_str_loose_eq_done"); + emitter.instruction("ldp x29, x30, [sp, #64]"); // restore frame pointer and return address + emitter.instruction("add sp, sp, #80"); // release the helper stack frame + emitter.instruction("ret"); // return the loose string equality result in x0 +} + +fn emit_str_loose_eq_linux_x86_64(emitter: &mut Emitter) { + emitter.blank(); + emitter.comment("--- runtime: str_loose_eq ---"); + emitter.label_global("__rt_str_loose_eq"); + + emitter.instruction("push rbp"); // save the caller frame pointer before nested runtime calls + emitter.instruction("mov rbp, rsp"); // establish a stable helper frame pointer + emitter.instruction("sub rsp, 80"); // allocate aligned helper slots for both strings and parsed numeric state + emitter.instruction("mov QWORD PTR [rbp - 8], rdi"); // save the left string pointer + emitter.instruction("mov QWORD PTR [rbp - 16], rsi"); // save the left string length + emitter.instruction("mov QWORD PTR [rbp - 24], rdx"); // save the right string pointer + emitter.instruction("mov QWORD PTR [rbp - 32], rcx"); // save the right string length + + emitter.instruction("mov rax, rdi"); // move the left string pointer into the parser input register + emitter.instruction("mov rdx, rsi"); // move the left string length into the parser input register + abi::emit_call_label(emitter, "__rt_str_to_number"); // parse the left string as a PHP numeric string + emitter.instruction("mov QWORD PTR [rbp - 40], rax"); // save whether the left string parsed as numeric + emitter.instruction("movsd QWORD PTR [rbp - 48], xmm0"); // save the parsed left numeric value + emitter.instruction("mov rax, QWORD PTR [rbp - 24]"); // reload the right string pointer into the parser input register + emitter.instruction("mov rdx, QWORD PTR [rbp - 32]"); // reload the right string length into the parser input register + abi::emit_call_label(emitter, "__rt_str_to_number"); // parse the right string as a PHP numeric string + emitter.instruction("cmp QWORD PTR [rbp - 40], 0"); // did the left string parse as numeric? + emitter.instruction("je __rt_str_loose_eq_bytes_linux_x86_64"); // non-numeric left strings compare by bytes + emitter.instruction("test rax, rax"); // did the right string parse as numeric? + emitter.instruction("je __rt_str_loose_eq_bytes_linux_x86_64"); // non-numeric right strings compare by bytes + emitter.instruction("movsd xmm1, QWORD PTR [rbp - 48]"); // reload the parsed left numeric value + emitter.instruction("ucomisd xmm1, xmm0"); // compare the parsed numeric values + emitter.instruction("sete al"); // produce true only when the numeric values match + emitter.instruction("movzx rax, al"); // widen the boolean byte into the full result register + emitter.instruction("jmp __rt_str_loose_eq_done_linux_x86_64"); // skip the byte-comparison fallback + + emitter.label("__rt_str_loose_eq_bytes_linux_x86_64"); + emitter.instruction("mov rdi, QWORD PTR [rbp - 8]"); // reload the left string pointer + emitter.instruction("mov rsi, QWORD PTR [rbp - 16]"); // reload the left string length + emitter.instruction("mov rdx, QWORD PTR [rbp - 24]"); // reload the right string pointer + emitter.instruction("mov rcx, QWORD PTR [rbp - 32]"); // reload the right string length + abi::emit_call_label(emitter, "__rt_str_eq"); // compare non-numeric strings byte-for-byte + + emitter.label("__rt_str_loose_eq_done_linux_x86_64"); + emitter.instruction("add rsp, 80"); // release the helper stack frame + emitter.instruction("pop rbp"); // restore the caller frame pointer + emitter.instruction("ret"); // return the loose string equality result in rax +} diff --git a/src/codegen/runtime/strings/str_to_number.rs b/src/codegen/runtime/strings/str_to_number.rs new file mode 100644 index 00000000..d2fd8ace --- /dev/null +++ b/src/codegen/runtime/strings/str_to_number.rs @@ -0,0 +1,100 @@ +//! Purpose: +//! Emits string numeric-detection helpers used by PHP loose comparison. +//! Converts pointer/length PHP strings through libc `strtod` while rejecting trailing junk. +//! +//! Called from: +//! - `crate::codegen::runtime::emitters::emit_runtime()` via `crate::codegen::runtime::strings`. +//! +//! Key details: +//! - The helper returns both a numeric flag and the parsed double without losing PHP byte-string bounds. + +use crate::codegen::emit::Emitter; +use crate::codegen::platform::Arch; + +/// str_to_number: parse a PHP string as a PHP-8-style numeric string. +/// Input: AArch64 x1=ptr, x2=len; x86_64 rax=ptr, rdx=len +/// Output: integer result register = 1 when numeric, 0 otherwise; d0/xmm0 = parsed number +pub fn emit_str_to_number(emitter: &mut Emitter) { + if emitter.target.arch == Arch::X86_64 { + emit_str_to_number_linux_x86_64(emitter); + return; + } + + emitter.blank(); + emitter.comment("--- runtime: str_to_number ---"); + emitter.label_global("__rt_str_to_number"); + + emitter.instruction("sub sp, sp, #32"); // allocate helper slots for the C string start and strtod end pointer + emitter.instruction("stp x29, x30, [sp, #16]"); // save frame pointer and return address + emitter.instruction("add x29, sp, #16"); // establish a stable helper frame pointer + emitter.instruction("bl __rt_cstr"); // copy the bounded PHP string into the C-string scratch buffer + emitter.instruction("str x0, [sp, #0]"); // save the C-string start pointer for the no-consumption check + emitter.instruction("add x1, sp, #8"); // pass the address of the local end-pointer slot to strtod + emitter.bl_c("strtod"); + emitter.instruction("ldr x9, [sp, #8]"); // load the end pointer returned by strtod + emitter.instruction("ldr x10, [sp, #0]"); // reload the C-string start pointer + emitter.instruction("cmp x9, x10"); // reject strings where strtod consumed no numeric bytes + emitter.instruction("b.eq __rt_str_to_number_false"); // no consumed bytes means this is not a numeric string + + emitter.label("__rt_str_to_number_trailing_loop"); + emitter.instruction("ldrb w11, [x9], #1"); // load the next trailing byte and advance the scan cursor + emitter.instruction("cbz w11, __rt_str_to_number_true"); // end of C string means all trailing bytes were acceptable + emitter.instruction("cmp w11, #32"); // ASCII space is allowed after the numeric payload + emitter.instruction("b.eq __rt_str_to_number_trailing_loop"); // keep scanning after an allowed space + emitter.instruction("sub w12, w11, #9"); // normalize ASCII tab/newline/form-feed/carriage-return range + emitter.instruction("cmp w12, #4"); // values 9 through 13 are accepted trailing whitespace + emitter.instruction("b.ls __rt_str_to_number_trailing_loop"); // keep scanning after accepted control whitespace + + emitter.label("__rt_str_to_number_false"); + emitter.instruction("mov x0, #0"); // report that the string is not numeric + emitter.instruction("b __rt_str_to_number_done"); // restore the helper frame and return + + emitter.label("__rt_str_to_number_true"); + emitter.instruction("mov x0, #1"); // report that the string parsed as a complete numeric string + + emitter.label("__rt_str_to_number_done"); + emitter.instruction("ldp x29, x30, [sp, #16]"); // restore frame pointer and return address + emitter.instruction("add sp, sp, #32"); // release the helper stack frame + emitter.instruction("ret"); // return the numeric flag while preserving the parsed double in d0 +} + +fn emit_str_to_number_linux_x86_64(emitter: &mut Emitter) { + emitter.blank(); + emitter.comment("--- runtime: str_to_number ---"); + emitter.label_global("__rt_str_to_number"); + + emitter.instruction("push rbp"); // save the caller frame pointer before nested libc calls + emitter.instruction("mov rbp, rsp"); // establish a stable helper frame pointer + emitter.instruction("sub rsp, 32"); // allocate aligned helper slots for start and end pointers + emitter.instruction("call __rt_cstr"); // copy the bounded PHP string into the C-string scratch buffer + emitter.instruction("mov QWORD PTR [rbp - 8], rax"); // save the C-string start pointer for the no-consumption check + emitter.instruction("lea rsi, [rbp - 16]"); // pass the address of the local end-pointer slot to strtod + emitter.instruction("mov rdi, rax"); // pass the C-string start pointer as strtod's first argument + emitter.instruction("call strtod"); // parse the C string as a double through libc + emitter.instruction("mov r8, QWORD PTR [rbp - 16]"); // load the end pointer returned by strtod + emitter.instruction("cmp r8, QWORD PTR [rbp - 8]"); // reject strings where strtod consumed no numeric bytes + emitter.instruction("je __rt_str_to_number_false_linux_x86_64"); // no consumed bytes means this is not a numeric string + + emitter.label("__rt_str_to_number_trailing_loop_linux_x86_64"); + emitter.instruction("movzx r9d, BYTE PTR [r8]"); // load the next trailing byte without sign extension + emitter.instruction("add r8, 1"); // advance the trailing-byte scan cursor + emitter.instruction("test r9d, r9d"); // check whether the scan reached the C-string terminator + emitter.instruction("je __rt_str_to_number_true_linux_x86_64"); // end of C string means all trailing bytes were acceptable + emitter.instruction("cmp r9d, 32"); // ASCII space is allowed after the numeric payload + emitter.instruction("je __rt_str_to_number_trailing_loop_linux_x86_64"); // keep scanning after an allowed space + emitter.instruction("sub r9d, 9"); // normalize ASCII tab/newline/form-feed/carriage-return range + emitter.instruction("cmp r9d, 4"); // values 9 through 13 are accepted trailing whitespace + emitter.instruction("jbe __rt_str_to_number_trailing_loop_linux_x86_64"); // keep scanning after accepted control whitespace + + emitter.label("__rt_str_to_number_false_linux_x86_64"); + emitter.instruction("xor rax, rax"); // report that the string is not numeric + emitter.instruction("jmp __rt_str_to_number_done_linux_x86_64"); // restore the helper frame and return + + emitter.label("__rt_str_to_number_true_linux_x86_64"); + emitter.instruction("mov rax, 1"); // report that the string parsed as a complete numeric string + + emitter.label("__rt_str_to_number_done_linux_x86_64"); + emitter.instruction("add rsp, 32"); // release the helper stack frame + emitter.instruction("pop rbp"); // restore the caller frame pointer + emitter.instruction("ret"); // return the numeric flag while preserving the parsed double in xmm0 +} diff --git a/src/codegen/runtime/x86_minimal.rs b/src/codegen/runtime/x86_minimal.rs index 287d6e7d..92137d0f 100644 --- a/src/codegen/runtime/x86_minimal.rs +++ b/src/codegen/runtime/x86_minimal.rs @@ -57,6 +57,8 @@ pub(super) fn emit_runtime_linux_x86_64_minimal(emitter: &mut Emitter) { strings::emit_atoi(emitter); strings::emit_str_persist(emitter); strings::emit_str_eq(emitter); + strings::emit_str_to_number(emitter); + strings::emit_str_loose_eq(emitter); strings::emit_strtolower(emitter); strings::emit_strpos(emitter); strings::emit_strrpos(emitter); diff --git a/tests/codegen/operators.rs b/tests/codegen/operators.rs index 0a1ac2b1..a9513cf3 100644 --- a/tests/codegen/operators.rs +++ b/tests/codegen/operators.rs @@ -279,6 +279,42 @@ fn test_constant_loose_eq_number_and_numeric_string_is_true() { assert_eq!(out, "bool(true)\n"); } +#[test] +fn test_runtime_loose_eq_non_numeric_strings_compare_by_bytes() { + let out = compile_and_run(" Date: Sat, 16 May 2026 14:39:18 +0200 Subject: [PATCH 06/10] docs: close runtime compatibility polishing v2 --- ROADMAP.md | 2 +- docs/php/operators.md | 10 +++++----- docs/php/types.md | 4 ++-- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/ROADMAP.md b/ROADMAP.md index 308c7ac1..d403ebee 100644 --- a/ROADMAP.md +++ b/ROADMAP.md @@ -441,7 +441,7 @@ runtime helpers, and standard-library surfaces. - [x] PHP attributes runtime introspection — implement `ReflectionClass::getAttributes()`, `ReflectionMethod::getAttributes()`, `ReflectionProperty::getAttributes()`, plus `ReflectionAttribute::newInstance()`. Class/member declarations expose attribute names and supported literal args through helper builtins and Reflection objects; `ReflectionAttribute::newInstance()` constructs the attribute class on demand from the captured literal args. - [x] Mixed indexed/associative array union — model `array + array` across indexed/hash representations while preserving PHP's shared int/string key space and left-key precedence - [X] Callable parity follow-up — support captured method/static first-class callables in the remaining callback runtimes (`array_reduce()`, `array_walk()`, `usort()`, `uksort()`, `uasort()`), direct callable expression calls such as `($obj->method(...))()`, non-local method receivers such as `(new Foo())->method(...)`, nullsafe first-class callables, broader builtin first-class callable wrappers, and the remaining `call_user_func_array()` by-reference callback gaps -- [ ] Runtime-value compatibility polishing v2 — uninitialized typed instance/static property reads now fail with PHP-style fatal diagnostics; constant-folded integer overflow promotes to float; constant-folded scalar loose comparisons use broader PHP bool/null/numeric-string rules. Continue with non-folded runtime integer overflow promotion, runtime loose-comparison helper semantics, and future warning/notice sites as they are added. +- [x] Runtime-value compatibility polishing v2 — uninitialized typed instance/static property reads fail with PHP-style fatal diagnostics; constant-folded and non-folded runtime integer `+`/`-`/`*` overflow promotes to double; scalar loose comparisons cover PHP bool truthiness, null-vs-empty-string, numeric-string, and non-numeric string byte-comparison rules at constant-fold and runtime helper sites. Warning/notice sites added so far route through the suppressible runtime diagnostics channel. - [ ] Broader date and regex PHP parity — expand `strtotime()` relative formats and PCRE-compatible regex features/captures/backreferences (JSON parity now closed: see v0.8.x base + v0.20.x polish) - [x] JSON encoder optimization — folded `__rt_json_assoc_is_list_shape` into the main associative-array encoding walk. `__rt_json_encode_assoc` now emits a provisional object form, tracks whether keys remain `0..count-1` while iterating the hash once, and compacts the finished buffer in-place to `[...]` only for real list-shape payloads. Object-shape inputs still stay object form, and `JSON_FORCE_OBJECT` disables compaction. - [x] JSON decoder optimization — fused the `__rt_json_validate` pre-pass into `__rt_json_decode_mixed` for `json_decode()`. The wrapper now calls the checked structural decoder directly; the decoder trims the input once, validates scalar strings/numbers at the point where they are decoded, enforces depth around containers, records syntax/depth/UTF-16 errors internally, and returns null-on-error for the PHP-facing wrapper. `json_validate()` keeps the standalone RFC 8259 validator surface. diff --git a/docs/php/operators.md b/docs/php/operators.md index 14dd6d5f..a1c60de7 100644 --- a/docs/php/operators.md +++ b/docs/php/operators.md @@ -9,9 +9,9 @@ sidebar: | Operator | Example | Notes | |---|---|---| -| `+` | `$a + $b` | Numeric addition, or PHP array union when both operands are arrays | -| `-` | `$a - $b` | Subtraction | -| `*` | `$a * $b` | Multiplication | +| `+` | `$a + $b` | Numeric addition, or PHP array union when both operands are arrays. Integer overflow promotes to `double`. | +| `-` | `$a - $b` | Subtraction. Integer overflow promotes to `double`. | +| `*` | `$a * $b` | Multiplication. Integer overflow promotes to `double`. | | `/` | `$a / $b` | Division (always returns float) | | `%` | `$a % $b` | Modulo | | `**` | `$a ** $b` | Exponentiation (right-associative, returns float) | @@ -21,8 +21,8 @@ sidebar: | Operator | Example | Notes | |---|---|---| -| `==` | `$a == $b` | Loose equality (cross-type: coerces to int) | -| `!=` | `$a != $b` | Inequality | +| `==` | `$a == $b` | Loose equality using PHP-style scalar coercions for bool, null, numeric strings, and non-numeric strings | +| `!=` | `$a != $b` | Loose inequality using the same scalar coercions as `==` | | `===` | `$a === $b` | Strict equality (type and value) | | `!==` | `$a !== $b` | Strict inequality | | `<` | `$a < $b` | Less than | diff --git a/docs/php/types.md b/docs/php/types.md index 8d28fc82..8ef7e702 100644 --- a/docs/php/types.md +++ b/docs/php/types.md @@ -161,8 +161,8 @@ Aliases: `(integer)`, `(double)`, `(real)`, `(boolean)`. ### Known incompatibilities with PHP - `$argv[0]` returns the compiled binary path, not the `.php` file path. -- Non-folded runtime integer overflow wraps instead of promoting to float; constant-folded integer overflow promotes to float. -- Non-folded runtime loose comparison (`==`) between different types coerces both sides to integer; constant-folded scalar literals use broader PHP-style bool/null/numeric-string rules. +- Integer `+`, `-`, and `*` overflow promotes to `double` for both constant-folded and non-folded runtime scalar arithmetic. +- Scalar loose comparison (`==`, `!=`) follows PHP-style bool truthiness, null-vs-empty-string, numeric-string, and non-numeric string byte-comparison rules for constant-folded literals and non-folded runtime scalar operands. - `??=` is checked against typed assignment storage for variables, object properties, static properties, and non-append array elements. For concrete local variable types, the fallback must keep the same type or be a literal `null`. - Plain array numeric casts (`(int)$array`, `(float)$array`) follow elephc's existing array cast semantics (return the element count rather than PHP's `0`/`1`). Direct `iterable` numeric casts use PHP's empty/non-empty `0`/`1` semantics. - `FiberError` is currently modeled as an `Exception` subclass in elephc; PHP models `FiberError` under `Error`. From 86d360236c738ce8d03038d81329ffe3b0d01aad Mon Sep 17 00:00:00 2001 From: Vincenzo Petrucci Date: Sat, 16 May 2026 14:46:14 +0200 Subject: [PATCH 07/10] fix: preserve mixed arithmetic locals --- src/codegen/expr/binops/arithmetic.rs | 16 +++++++++---- src/codegen/functions/locals.rs | 34 ++++++++++++++++++++++++--- 2 files changed, 42 insertions(+), 8 deletions(-) diff --git a/src/codegen/expr/binops/arithmetic.rs b/src/codegen/expr/binops/arithmetic.rs index 01d73a3e..21dc7075 100644 --- a/src/codegen/expr/binops/arithmetic.rs +++ b/src/codegen/expr/binops/arithmetic.rs @@ -251,7 +251,7 @@ fn emit_mixed_numeric_binop( crate::codegen::emit_box_current_value_as_mixed(emitter, right_ty); } abi::emit_push_reg(emitter, abi::int_result_reg(emitter)); - pop_saved_numeric_operand(emitter, left_stack_ty); + load_saved_numeric_operand(emitter, left_stack_ty, 16); if !matches!(left_stack_ty, PhpType::Mixed | PhpType::Union(_)) { crate::codegen::emit_box_current_value_as_mixed(emitter, left_stack_ty); } @@ -263,6 +263,7 @@ fn emit_mixed_numeric_binop( abi::emit_pop_reg(emitter, "rdi"); } } + abi::emit_release_temporary_stack(emitter, 16); let helper = match op { BinOp::Add => "__rt_mixed_numeric_add", BinOp::Sub => "__rt_mixed_numeric_sub", @@ -273,18 +274,23 @@ fn emit_mixed_numeric_binop( PhpType::Mixed } -fn pop_saved_numeric_operand(emitter: &mut Emitter, ty: &PhpType) { +fn load_saved_numeric_operand(emitter: &mut Emitter, ty: &PhpType, offset: usize) { match ty.codegen_repr() { PhpType::Float => { - abi::emit_pop_float_reg(emitter, abi::float_result_reg(emitter)); + abi::emit_load_temporary_stack_slot( + emitter, + abi::float_result_reg(emitter), + offset, + ); } PhpType::Str => { let (ptr_reg, len_reg) = abi::string_result_regs(emitter); - abi::emit_pop_reg_pair(emitter, ptr_reg, len_reg); + abi::emit_load_temporary_stack_slot(emitter, ptr_reg, offset); + abi::emit_load_temporary_stack_slot(emitter, len_reg, offset + 8); } PhpType::Void | PhpType::Never => {} _ => { - abi::emit_pop_reg(emitter, abi::int_result_reg(emitter)); + abi::emit_load_temporary_stack_slot(emitter, abi::int_result_reg(emitter), offset); } } } diff --git a/src/codegen/functions/locals.rs b/src/codegen/functions/locals.rs index abe3ef7f..8913a27e 100644 --- a/src/codegen/functions/locals.rs +++ b/src/codegen/functions/locals.rs @@ -8,8 +8,8 @@ //! Key details: //! - Any lowering path that introduces storage must be represented here before stack offsets are assigned. -use crate::codegen::context::Context; -use crate::parser::ast::{CallableTarget, Expr, ExprKind, InstanceOfTarget, StmtKind}; +use crate::codegen::context::{Context, HeapOwnership}; +use crate::parser::ast::{BinOp, CallableTarget, Expr, ExprKind, InstanceOfTarget, StmtKind}; use crate::types::{FunctionSig, PhpType}; use super::types::{codegen_declared_type, codegen_static_type, infer_local_type}; @@ -29,9 +29,27 @@ pub fn collect_local_vars( StmtKind::IncludeOnceMark { .. } => {} StmtKind::Assign { name, value } => { collect_assignment_expr_vars(value, ctx, sig); + let needs_mixed_numeric_slot = runtime_numeric_result_may_widen(value, sig, ctx); if !ctx.variables.contains_key(name) { let static_ty = infer_local_type(value, sig, Some(ctx)); - ctx.alloc_var_with_static_type(name, static_ty.codegen_repr(), static_ty); + let slot_ty = if needs_mixed_numeric_slot { + PhpType::Mixed + } else { + static_ty.codegen_repr() + }; + ctx.alloc_var_with_static_type(name, slot_ty, static_ty); + } else if needs_mixed_numeric_slot { + let static_ty = ctx + .variables + .get(name) + .map(|var| var.static_ty.clone()) + .unwrap_or_else(|| infer_local_type(value, sig, Some(ctx))); + ctx.update_var_type_static_and_ownership( + name, + PhpType::Mixed, + static_ty, + HeapOwnership::for_type(&PhpType::Mixed), + ); } } StmtKind::TypedAssign { @@ -388,6 +406,16 @@ fn infer_conditional_assignment_temp_type( } } +fn runtime_numeric_result_may_widen(value: &Expr, sig: &FunctionSig, ctx: &Context) -> bool { + matches!( + value.kind, + ExprKind::BinaryOp { + op: BinOp::Add | BinOp::Sub | BinOp::Mul, + .. + } + ) && infer_local_type(value, sig, Some(ctx)) == PhpType::Int +} + fn collect_named_builtin_or_extern_call_temps( name: &str, call_span: crate::span::Span, From ab2c93028688cd0adfa7da14fd518988af83c968 Mon Sep 17 00:00:00 2001 From: Vincenzo Petrucci Date: Sat, 16 May 2026 15:46:39 +0200 Subject: [PATCH 08/10] fix: guard heap small-bin reuse --- src/codegen/runtime/arrays/heap_alloc.rs | 30 ++++++++++++++++++------ 1 file changed, 23 insertions(+), 7 deletions(-) diff --git a/src/codegen/runtime/arrays/heap_alloc.rs b/src/codegen/runtime/arrays/heap_alloc.rs index 4551f862..2aaf08b0 100644 --- a/src/codegen/runtime/arrays/heap_alloc.rs +++ b/src/codegen/runtime/arrays/heap_alloc.rs @@ -61,8 +61,16 @@ pub fn emit_heap_alloc(emitter: &mut Emitter) { crate::codegen::abi::emit_symbol_address(emitter, "x9", "_heap_small_bins"); emitter.instruction("add x9, x9, x13"); // x9 = address of the first candidate bin head emitter.label("__rt_heap_alloc_small_bin_loop"); - emitter.instruction("ldr x10, [x9]"); // x10 = current small-bin head block (0 if this bin is empty) - emitter.instruction("cbnz x10, __rt_heap_alloc_small_bin_found"); // use the first available cached block in this size class or larger + emitter.instruction("mov x16, x9"); // x16 tracks the previous next-pointer slot while scanning this bin + emitter.label("__rt_heap_alloc_small_bin_scan"); + emitter.instruction("ldr x10, [x16]"); // x10 = current cached block header or null when this bin is exhausted + emitter.instruction("cbz x10, __rt_heap_alloc_small_bin_next_class"); // try the next larger bin when this bin has no fitting block + emitter.instruction("ldr w11, [x10]"); // load the cached block payload size before reusing it + emitter.instruction("cmp x11, x0"); // does the cached block satisfy the requested payload size? + emitter.instruction("b.hs __rt_heap_alloc_small_bin_found"); // yes — reuse this cached block safely + emitter.instruction("add x16, x10, #16"); // advance the previous next-pointer slot to current->next + emitter.instruction("b __rt_heap_alloc_small_bin_scan"); // keep searching this bin for a large-enough cached block + emitter.label("__rt_heap_alloc_small_bin_next_class"); emitter.instruction("cmp x13, #24"); // have we already checked the <=64-byte bin? emitter.instruction("b.eq __rt_heap_alloc_fl_start"); // yes — fall back to the general free list emitter.instruction("add x13, x13, #8"); // advance to the next larger small-bin class @@ -71,7 +79,7 @@ pub fn emit_heap_alloc(emitter: &mut Emitter) { emitter.label("__rt_heap_alloc_small_bin_found"); emitter.instruction("ldr x11, [x10, #16]"); // x11 = cached_small_block->next within this size class - emitter.instruction("str x11, [x9]"); // pop the cached block from the segregated small bin + emitter.instruction("str x11, [x16]"); // unlink the cached block from its segregated small-bin chain emitter.instruction("mov w13, #1"); // initial refcount = 1 for the reused block emitter.instruction("str w13, [x10, #4]"); // restore the live refcount in the reused header emitter.instruction("str xzr, [x10, #8]"); // reset heap kind to raw until a typed constructor overwrites it @@ -225,9 +233,17 @@ fn emit_heap_alloc_linux_x86_64(emitter: &mut Emitter) { crate::codegen::abi::emit_symbol_address(emitter, "r9", "_heap_small_bins"); emitter.instruction("add r9, r8"); // r9 = address of the first candidate small-bin head slot emitter.label("__rt_heap_alloc_small_bin_loop"); - emitter.instruction("mov r10, QWORD PTR [r9]"); // r10 = current cached small-bin block header or null if this bin is empty - emitter.instruction("test r10, r10"); // does this size class currently hold a cached reusable block? - emitter.instruction("jnz __rt_heap_alloc_small_bin_found"); // yes — reuse the first cached block in this class or a larger one + emitter.instruction("mov rcx, r9"); // rcx tracks the previous next-pointer slot while scanning this bin + emitter.label("__rt_heap_alloc_small_bin_scan"); + emitter.instruction("mov r10, QWORD PTR [rcx]"); // r10 = current cached block header or null when this bin is exhausted + emitter.instruction("test r10, r10"); // did this bin scan run out of cached blocks? + emitter.instruction("jz __rt_heap_alloc_small_bin_next_class"); // try the next larger bin when this bin has no fitting block + emitter.instruction("mov r11d, DWORD PTR [r10]"); // load the cached block payload size before reusing it + emitter.instruction("cmp r11, rax"); // does the cached block satisfy the requested payload size? + emitter.instruction("jae __rt_heap_alloc_small_bin_found"); // yes — reuse this cached block safely + emitter.instruction("lea rcx, [r10 + 16]"); // advance the previous next-pointer slot to current->next + emitter.instruction("jmp __rt_heap_alloc_small_bin_scan"); // keep searching this bin for a large-enough cached block + emitter.label("__rt_heap_alloc_small_bin_next_class"); emitter.instruction("cmp r8, 24"); // have we already checked the largest <=64-byte cache bin? emitter.instruction("je __rt_heap_alloc_fl_start"); // yes — fall back to the general free list emitter.instruction("add r8, 8"); // advance to the next larger small-bin class offset @@ -236,7 +252,7 @@ fn emit_heap_alloc_linux_x86_64(emitter: &mut Emitter) { emitter.label("__rt_heap_alloc_small_bin_found"); emitter.instruction("mov r11, QWORD PTR [r10 + 16]"); // load the cached block's next pointer within this size class - emitter.instruction("mov QWORD PTR [r9], r11"); // pop the cached block from the selected small bin + emitter.instruction("mov QWORD PTR [rcx], r11"); // unlink the cached block from its segregated small-bin chain emitter.instruction("mov DWORD PTR [r10 + 4], 1"); // restore a live refcount of one in the reused heap header emitter.instruction(&format!("mov r11, 0x{:x}", X86_64_HEAP_MAGIC_HI32 << 32)); // materialize the x86_64 heap marker while leaving the low kind bits clear emitter.instruction("mov QWORD PTR [r10 + 8], r11"); // stamp the reused heap header as an owned raw heap allocation From e6e80ae6f410710c5a077162e970adeae2ec395c Mon Sep 17 00:00:00 2001 From: Vincenzo Petrucci Date: Sat, 16 May 2026 15:46:54 +0200 Subject: [PATCH 09/10] fix: stabilize mixed value coercions --- src/codegen/builtins/pointers/ptr_set.rs | 27 +++++- src/codegen/expr.rs | 2 +- src/codegen/expr/binops/arithmetic.rs | 44 +++++++++- src/codegen/expr/calls/args/common.rs | 83 ++++++++++++++++++- src/codegen/expr/calls/args/emit.rs | 7 +- src/codegen/expr/calls/closure.rs | 1 + src/codegen/expr/calls/function.rs | 1 + src/codegen/expr/calls/indirect.rs | 1 + src/codegen/expr/helpers.rs | 26 ++++++ src/codegen/expr/objects/allocation.rs | 1 + src/codegen/expr/objects/dispatch/prep.rs | 1 + src/codegen/functions/locals.rs | 14 +--- src/codegen/stmt/arrays/assign/assoc.rs | 9 +- src/codegen/stmt/arrays/assign/buffer.rs | 14 +++- .../stmt/arrays/assign/indexed/prepare.rs | 22 ++++- src/codegen/stmt/assignments/locals.rs | 45 +++++++++- .../assignments/properties/arrays/indexed.rs | 17 +++- .../stmt/assignments/properties/assign.rs | 31 +++++-- .../static_properties/arrays/indexed.rs | 17 +++- .../assignments/static_properties/assign.rs | 15 +++- src/codegen/stmt/control_flow/loops/exits.rs | 22 ++++- src/codegen/stmt/helpers.rs | 53 +++++++++++- 22 files changed, 407 insertions(+), 46 deletions(-) diff --git a/src/codegen/builtins/pointers/ptr_set.rs b/src/codegen/builtins/pointers/ptr_set.rs index b6439ff8..cbf8c9fe 100644 --- a/src/codegen/builtins/pointers/ptr_set.rs +++ b/src/codegen/builtins/pointers/ptr_set.rs @@ -11,9 +11,10 @@ use crate::codegen::context::Context; use crate::codegen::data_section::DataSection; use crate::codegen::emit::Emitter; -use crate::codegen::expr::emit_expr; +use crate::codegen::expr::{coerce_result_to_type, emit_expr, expr_result_heap_ownership}; use crate::codegen::{abi, platform::Arch}; -use crate::parser::ast::Expr; +use crate::codegen::context::HeapOwnership; +use crate::parser::ast::{BinOp, Expr, ExprKind}; use crate::types::PhpType; pub fn emit( @@ -30,7 +31,27 @@ pub fn emit( abi::emit_push_reg(emitter, abi::int_result_reg(emitter)); // preserve the validated destination pointer while the stored value expression is evaluated // -- evaluate value to write -- - emit_expr(&args[1], emitter, ctx, data); + let value_ty = emit_expr(&args[1], emitter, ctx, data); + let release_mixed_after_coerce = matches!(value_ty, PhpType::Mixed | PhpType::Union(_)) + && (expr_result_heap_ownership(&args[1]) == HeapOwnership::Owned + || matches!( + args[1].kind, + ExprKind::BinaryOp { + op: BinOp::Add | BinOp::Sub | BinOp::Mul, + .. + } + )); + if release_mixed_after_coerce { + abi::emit_push_reg(emitter, abi::int_result_reg(emitter)); // preserve the boxed Mixed value so it can be released after integer coercion + } + coerce_result_to_type(emitter, ctx, data, &value_ty, &PhpType::Int); + if release_mixed_after_coerce { + abi::emit_push_reg(emitter, abi::int_result_reg(emitter)); // preserve the coerced integer payload while releasing the temporary Mixed box + abi::emit_load_temporary_stack_slot(emitter, abi::int_result_reg(emitter), 16); + abi::emit_decref_if_refcounted(emitter, &PhpType::Mixed); + abi::emit_pop_reg(emitter, abi::int_result_reg(emitter)); // restore the coerced integer payload after temporary Mixed cleanup + abi::emit_release_temporary_stack(emitter, 16); + } // -- store value at pointer address -- match emitter.target.arch { diff --git a/src/codegen/expr.rs b/src/codegen/expr.rs index 11839e3d..94ddf292 100644 --- a/src/codegen/expr.rs +++ b/src/codegen/expr.rs @@ -30,7 +30,7 @@ use super::emit::Emitter; use crate::parser::ast::{BinOp, Expr, ExprKind}; use crate::types::PhpType; -pub(crate) use helpers::coerce_result_to_type; +pub(crate) use helpers::{can_coerce_result_to_type, coerce_result_to_type}; pub(crate) use objects::{emit_method_call_with_pushed_args, push_magic_property_name_arg}; pub(crate) use ownership::expr_result_heap_ownership; pub use coerce::{coerce_null_to_zero, coerce_to_string, coerce_to_truthiness}; diff --git a/src/codegen/expr/binops/arithmetic.rs b/src/codegen/expr/binops/arithmetic.rs index 21dc7075..b90c27c1 100644 --- a/src/codegen/expr/binops/arithmetic.rs +++ b/src/codegen/expr/binops/arithmetic.rs @@ -247,12 +247,14 @@ fn emit_mixed_numeric_binop( right_ty: &PhpType, emitter: &mut Emitter, ) -> PhpType { - if !matches!(right_ty, PhpType::Mixed | PhpType::Union(_)) { + let right_was_boxed = !matches!(right_ty, PhpType::Mixed | PhpType::Union(_)); + let left_was_boxed = !matches!(left_stack_ty, PhpType::Mixed | PhpType::Union(_)); + if right_was_boxed { crate::codegen::emit_box_current_value_as_mixed(emitter, right_ty); } abi::emit_push_reg(emitter, abi::int_result_reg(emitter)); load_saved_numeric_operand(emitter, left_stack_ty, 16); - if !matches!(left_stack_ty, PhpType::Mixed | PhpType::Union(_)) { + if left_was_boxed { crate::codegen::emit_box_current_value_as_mixed(emitter, left_stack_ty); } match emitter.target.arch { @@ -270,10 +272,48 @@ fn emit_mixed_numeric_binop( BinOp::Mul => "__rt_mixed_numeric_mul", _ => unreachable!(), }; + if left_was_boxed { + abi::emit_push_reg(emitter, abi::int_result_reg(emitter)); + } + if right_was_boxed { + match emitter.target.arch { + Arch::AArch64 => { + abi::emit_push_reg(emitter, "x1"); + } + Arch::X86_64 => { + abi::emit_push_reg(emitter, "rdi"); + } + } + } abi::emit_call_label(emitter, helper); + release_temporary_numeric_operand_boxes(emitter, left_was_boxed, right_was_boxed); PhpType::Mixed } +fn release_temporary_numeric_operand_boxes( + emitter: &mut Emitter, + left_was_boxed: bool, + right_was_boxed: bool, +) { + if !left_was_boxed && !right_was_boxed { + return; + } + abi::emit_push_reg(emitter, abi::int_result_reg(emitter)); + let mut offset = 16; + if right_was_boxed { + abi::emit_load_temporary_stack_slot(emitter, abi::int_result_reg(emitter), offset); + abi::emit_decref_if_refcounted(emitter, &PhpType::Mixed); + offset += 16; + } + if left_was_boxed { + abi::emit_load_temporary_stack_slot(emitter, abi::int_result_reg(emitter), offset); + abi::emit_decref_if_refcounted(emitter, &PhpType::Mixed); + } + abi::emit_pop_reg(emitter, abi::int_result_reg(emitter)); + let operand_stack_bytes = 16 * usize::from(left_was_boxed) + 16 * usize::from(right_was_boxed); + abi::emit_release_temporary_stack(emitter, operand_stack_bytes); +} + fn load_saved_numeric_operand(emitter: &mut Emitter, ty: &PhpType, offset: usize) { match ty.codegen_repr() { PhpType::Float => { diff --git a/src/codegen/expr/calls/args/common.rs b/src/codegen/expr/calls/args/common.rs index 85bc36ff..b16a8c24 100644 --- a/src/codegen/expr/calls/args/common.rs +++ b/src/codegen/expr/calls/args/common.rs @@ -9,8 +9,12 @@ //! - Argument checks must happen at PHP-observable points without skipping later side effects. use crate::codegen::emit::Emitter; -use crate::codegen::{abi, context::Context, data_section::DataSection}; -use crate::parser::ast::Expr; +use crate::codegen::{ + abi, + context::{Context, HeapOwnership}, + data_section::DataSection, +}; +use crate::parser::ast::{BinOp, Expr, ExprKind}; use crate::types::{FunctionSig, PhpType}; pub(crate) fn declared_target_ty<'a>( @@ -33,6 +37,18 @@ pub(crate) fn declared_target_ty<'a>( }) } +pub(crate) fn call_target_ty<'a>( + sig: Option<&'a FunctionSig>, + param_idx: usize, + include_inferred: bool, +) -> Option<&'a PhpType> { + if include_inferred { + sig.and_then(|sig| sig.params.get(param_idx).map(|(_, ty)| ty)) + } else { + declared_target_ty(sig, param_idx) + } +} + pub(crate) fn push_arg_value(emitter: &mut Emitter, ty: &PhpType) { abi::emit_push_result_value(emitter, ty); } @@ -79,6 +95,9 @@ pub(crate) fn coerce_current_value_to_target( ) -> (PhpType, bool) { let source_repr = source_ty.codegen_repr(); let pushed_ty = target_ty + .filter(|target_ty| { + super::super::super::can_coerce_result_to_type(source_ty, target_ty) + }) .map(PhpType::codegen_repr) .or_else(|| { if matches!(source_repr, PhpType::Void) { @@ -110,11 +129,71 @@ pub(crate) fn push_expr_arg( data: &mut DataSection, ) -> PhpType { let source_ty = super::super::super::emit_expr(arg, emitter, ctx, data); + let release_mixed_after_coerce = + should_release_owned_mixed_after_arg_coerce(arg, &source_ty, target_ty); + if release_mixed_after_coerce { + abi::emit_push_reg(emitter, abi::int_result_reg(emitter)); + } let (pushed_ty, boxed_to_mixed) = coerce_current_value_to_target(emitter, ctx, data, &source_ty, target_ty); + if release_mixed_after_coerce { + release_preserved_mixed_after_arg_coercion(emitter, &pushed_ty); + } if !boxed_to_mixed { super::super::super::retain_borrowed_heap_arg(emitter, arg, &source_ty); } push_arg_value(emitter, &pushed_ty); pushed_ty } + +fn should_release_owned_mixed_after_arg_coerce( + arg: &Expr, + source_ty: &PhpType, + target_ty: Option<&PhpType>, +) -> bool { + let source_repr = source_ty.codegen_repr(); + let Some(target_repr) = target_ty.map(PhpType::codegen_repr) else { + return false; + }; + matches!(source_repr, PhpType::Mixed | PhpType::Union(_)) + && !matches!(target_repr, PhpType::Mixed | PhpType::Union(_)) + && target_ty.is_some_and(|target_ty| { + super::super::super::can_coerce_result_to_type(source_ty, target_ty) + }) + && (super::super::super::expr_result_heap_ownership(arg) == HeapOwnership::Owned + || matches!( + arg.kind, + ExprKind::BinaryOp { + op: BinOp::Add | BinOp::Sub | BinOp::Mul, + .. + } + )) +} + +fn release_preserved_mixed_after_arg_coercion(emitter: &mut Emitter, target_ty: &PhpType) { + match target_ty.codegen_repr() { + PhpType::Float => { + abi::emit_push_float_reg(emitter, abi::float_result_reg(emitter)); + abi::emit_load_temporary_stack_slot(emitter, abi::int_result_reg(emitter), 16); + abi::emit_decref_if_refcounted(emitter, &PhpType::Mixed); + abi::emit_pop_float_reg(emitter, abi::float_result_reg(emitter)); + abi::emit_release_temporary_stack(emitter, 16); + } + PhpType::Str => { + let (ptr_reg, len_reg) = abi::string_result_regs(emitter); + abi::emit_call_label(emitter, "__rt_str_persist"); // detach string casts from the mixed cell before releasing the boxed owner + abi::emit_push_reg_pair(emitter, ptr_reg, len_reg); + abi::emit_load_temporary_stack_slot(emitter, abi::int_result_reg(emitter), 16); + abi::emit_decref_if_refcounted(emitter, &PhpType::Mixed); + abi::emit_pop_reg_pair(emitter, ptr_reg, len_reg); + abi::emit_release_temporary_stack(emitter, 16); + } + _ => { + abi::emit_push_reg(emitter, abi::int_result_reg(emitter)); + abi::emit_load_temporary_stack_slot(emitter, abi::int_result_reg(emitter), 16); + abi::emit_decref_if_refcounted(emitter, &PhpType::Mixed); + abi::emit_pop_reg(emitter, abi::int_result_reg(emitter)); + abi::emit_release_temporary_stack(emitter, 16); + } + } +} diff --git a/src/codegen/expr/calls/args/emit.rs b/src/codegen/expr/calls/args/emit.rs index 5f3335e1..ba37a1ce 100644 --- a/src/codegen/expr/calls/args/emit.rs +++ b/src/codegen/expr/calls/args/emit.rs @@ -14,7 +14,7 @@ use crate::parser::ast::{Expr, ExprKind}; use crate::types::call_args; use crate::types::{FunctionSig, PhpType}; -use super::common::{declared_target_ty, emit_ref_arg_variable_address, push_arg_value, push_expr_arg}; +use super::common::{call_target_ty, emit_ref_arg_variable_address, push_arg_value, push_expr_arg}; use super::named; use super::normalize::{has_named_args, prepare_call_args}; use super::spread::{emit_spread_into_named_params, emit_spread_tail_variadic_array_arg}; @@ -27,6 +27,7 @@ pub(crate) fn emit_pushed_call_args( regular_param_count: usize, ref_arg_context_label: &str, retain_non_variable_ref_args: bool, + coerce_inferred_params: bool, emitter: &mut Emitter, ctx: &mut Context, data: &mut DataSection, @@ -61,6 +62,7 @@ pub(crate) fn emit_pushed_call_args( sig, ref_arg_context_label, retain_non_variable_ref_args, + coerce_inferred_params, emitter, ctx, data, @@ -123,6 +125,7 @@ pub(crate) fn emit_pushed_non_variadic_args( sig: Option<&FunctionSig>, ref_arg_context_label: &str, retain_non_variable_ref_args: bool, + coerce_inferred_params: bool, emitter: &mut Emitter, ctx: &mut Context, data: &mut DataSection, @@ -134,7 +137,7 @@ pub(crate) fn emit_pushed_non_variadic_args( .and_then(|sig| sig.ref_params.get(idx)) .copied() .unwrap_or(false); - let target_ty = declared_target_ty(sig, idx); + let target_ty = call_target_ty(sig, idx, coerce_inferred_params); if is_ref { if let ExprKind::Variable(var_name) = &arg.kind { diff --git a/src/codegen/expr/calls/closure.rs b/src/codegen/expr/calls/closure.rs index ede49052..78412244 100644 --- a/src/codegen/expr/calls/closure.rs +++ b/src/codegen/expr/calls/closure.rs @@ -316,6 +316,7 @@ pub(super) fn emit_closure_call( regular_param_count, "closure ref arg", true, + false, emitter, ctx, data, diff --git a/src/codegen/expr/calls/function.rs b/src/codegen/expr/calls/function.rs index e82e5cb9..4b955c95 100644 --- a/src/codegen/expr/calls/function.rs +++ b/src/codegen/expr/calls/function.rs @@ -39,6 +39,7 @@ pub(super) fn emit_function_call( args::regular_param_count(sig.as_ref(), args_exprs.len()), "ref arg", false, + true, emitter, ctx, data, diff --git a/src/codegen/expr/calls/indirect.rs b/src/codegen/expr/calls/indirect.rs index c5ab92fa..292109ae 100644 --- a/src/codegen/expr/calls/indirect.rs +++ b/src/codegen/expr/calls/indirect.rs @@ -41,6 +41,7 @@ pub(super) fn emit_loaded_expr_call( args::regular_param_count(callee_sig.as_ref(), args_exprs.len()), "indirect ref arg", true, + false, emitter, ctx, data, diff --git a/src/codegen/expr/helpers.rs b/src/codegen/expr/helpers.rs index d4445f47..62c8ff20 100644 --- a/src/codegen/expr/helpers.rs +++ b/src/codegen/expr/helpers.rs @@ -58,6 +58,9 @@ pub(crate) fn coerce_result_to_type( PhpType::Int | PhpType::Resource(_) => { crate::codegen::abi::emit_call_label(emitter, "__rt_mixed_cast_int"); } + PhpType::Pointer(_) => { + crate::codegen::abi::emit_call_label(emitter, "__rt_mixed_cast_int"); + } PhpType::Bool => { crate::codegen::abi::emit_call_label(emitter, "__rt_mixed_cast_bool"); } @@ -83,3 +86,26 @@ pub(crate) fn coerce_result_to_type( crate::codegen::abi::emit_int_result_to_float_result(emitter); // convert the integer-like result into the active target float-result register } } + +pub(crate) fn can_coerce_result_to_type(source_ty: &PhpType, target_ty: &PhpType) -> bool { + if source_ty == target_ty { + return true; + } + if matches!(source_ty, PhpType::Mixed | PhpType::Union(_)) { + return matches!( + target_ty.codegen_repr(), + PhpType::Int + | PhpType::Resource(_) + | PhpType::Pointer(_) + | PhpType::Bool + | PhpType::Float + | PhpType::Str + | PhpType::Mixed + | PhpType::Union(_) + ); + } + matches!(target_ty, PhpType::Mixed | PhpType::Union(_)) + || *target_ty == PhpType::Str + || (*target_ty == PhpType::Float + && matches!(source_ty, PhpType::Int | PhpType::Bool | PhpType::Void)) +} diff --git a/src/codegen/expr/objects/allocation.rs b/src/codegen/expr/objects/allocation.rs index d8d71f32..f8bf6d85 100644 --- a/src/codegen/expr/objects/allocation.rs +++ b/src/codegen/expr/objects/allocation.rs @@ -257,6 +257,7 @@ pub(super) fn emit_new_object_core( regular_param_count, "constructor ref arg", false, + true, emitter, ctx, data, diff --git a/src/codegen/expr/objects/dispatch/prep.rs b/src/codegen/expr/objects/dispatch/prep.rs index 55c84dbe..28a51c3f 100644 --- a/src/codegen/expr/objects/dispatch/prep.rs +++ b/src/codegen/expr/objects/dispatch/prep.rs @@ -29,6 +29,7 @@ pub(super) fn eval_and_push_args( super::super::super::calls::args::regular_param_count(sig, args.len()), "method ref arg", true, + true, emitter, ctx, data, diff --git a/src/codegen/functions/locals.rs b/src/codegen/functions/locals.rs index 8913a27e..98239227 100644 --- a/src/codegen/functions/locals.rs +++ b/src/codegen/functions/locals.rs @@ -8,7 +8,7 @@ //! Key details: //! - Any lowering path that introduces storage must be represented here before stack offsets are assigned. -use crate::codegen::context::{Context, HeapOwnership}; +use crate::codegen::context::Context; use crate::parser::ast::{BinOp, CallableTarget, Expr, ExprKind, InstanceOfTarget, StmtKind}; use crate::types::{FunctionSig, PhpType}; use super::types::{codegen_declared_type, codegen_static_type, infer_local_type}; @@ -38,18 +38,6 @@ pub fn collect_local_vars( static_ty.codegen_repr() }; ctx.alloc_var_with_static_type(name, slot_ty, static_ty); - } else if needs_mixed_numeric_slot { - let static_ty = ctx - .variables - .get(name) - .map(|var| var.static_ty.clone()) - .unwrap_or_else(|| infer_local_type(value, sig, Some(ctx))); - ctx.update_var_type_static_and_ownership( - name, - PhpType::Mixed, - static_ty, - HeapOwnership::for_type(&PhpType::Mixed), - ); } } StmtKind::TypedAssign { diff --git a/src/codegen/stmt/arrays/assign/assoc.rs b/src/codegen/stmt/arrays/assign/assoc.rs index 0dc20c76..7ab4dbc2 100644 --- a/src/codegen/stmt/arrays/assign/assoc.rs +++ b/src/codegen/stmt/arrays/assign/assoc.rs @@ -12,7 +12,7 @@ use crate::codegen::abi; use crate::codegen::context::Context; use crate::codegen::data_section::DataSection; use crate::codegen::emit::Emitter; -use crate::codegen::expr::emit_expr; +use crate::codegen::expr::{coerce_result_to_type, emit_expr}; use crate::codegen::platform::Arch; use crate::parser::ast::Expr; use crate::types::PhpType; @@ -42,6 +42,13 @@ pub(super) fn emit_assoc_array_assign( abi::emit_push_reg_pair(emitter, key_ptr_reg, key_len_reg); // preserve the computed key pointer and length while evaluating the value expression let mut val_ty = emit_expr(value, emitter, ctx, data); + if matches!(val_ty, PhpType::Mixed | PhpType::Union(_)) + && !matches!(target.elem_ty, PhpType::Mixed | PhpType::Union(_)) + && crate::codegen::expr::can_coerce_result_to_type(&val_ty, &target.elem_ty) + { + coerce_result_to_type(emitter, ctx, data, &val_ty, &target.elem_ty); + val_ty = target.elem_ty.clone(); + } let boxed_iterable = crate::codegen::emit_box_iterable_value_for_mixed_container(emitter, &mut val_ty); if !boxed_iterable diff --git a/src/codegen/stmt/arrays/assign/buffer.rs b/src/codegen/stmt/arrays/assign/buffer.rs index 03516856..7efbb488 100644 --- a/src/codegen/stmt/arrays/assign/buffer.rs +++ b/src/codegen/stmt/arrays/assign/buffer.rs @@ -12,7 +12,7 @@ use crate::codegen::abi; use crate::codegen::context::Context; use crate::codegen::data_section::DataSection; use crate::codegen::emit::Emitter; -use crate::codegen::expr::emit_expr; +use crate::codegen::expr::{coerce_result_to_type, emit_expr}; use crate::parser::ast::Expr; use crate::types::PhpType; @@ -39,9 +39,17 @@ pub(super) fn emit_buffer_array_assign( abi::load_at_offset(emitter, buffer_reg, target.offset); // load the buffer header pointer from the local slot } abi::emit_push_reg(emitter, buffer_reg); // preserve the buffer pointer while evaluating the index - emit_expr(index, emitter, ctx, data); + let index_ty = emit_expr(index, emitter, ctx, data); + coerce_result_to_type(emitter, ctx, data, &index_ty, &PhpType::Int); abi::emit_push_reg(emitter, abi::int_result_reg(emitter)); // preserve the computed element index across value evaluation - let val_ty = emit_expr(value, emitter, ctx, data); + let mut val_ty = emit_expr(value, emitter, ctx, data); + if matches!(val_ty, PhpType::Mixed | PhpType::Union(_)) + && !matches!(target.elem_ty, PhpType::Mixed | PhpType::Union(_)) + && crate::codegen::expr::can_coerce_result_to_type(&val_ty, &target.elem_ty) + { + coerce_result_to_type(emitter, ctx, data, &val_ty, &target.elem_ty); + val_ty = target.elem_ty.clone(); + } match &val_ty { PhpType::Float => { abi::emit_push_float_reg(emitter, abi::float_result_reg(emitter)); // preserve the float payload across address computation diff --git a/src/codegen/stmt/arrays/assign/indexed/prepare.rs b/src/codegen/stmt/arrays/assign/indexed/prepare.rs index be27ca0c..9f3ff4d1 100644 --- a/src/codegen/stmt/arrays/assign/indexed/prepare.rs +++ b/src/codegen/stmt/arrays/assign/indexed/prepare.rs @@ -12,7 +12,7 @@ use crate::codegen::abi; use crate::codegen::context::Context; use crate::codegen::data_section::DataSection; use crate::codegen::emit::Emitter; -use crate::codegen::expr::emit_expr; +use crate::codegen::expr::{coerce_result_to_type, emit_expr}; use crate::codegen::platform::Arch; use crate::parser::ast::Expr; use crate::types::PhpType; @@ -53,9 +53,17 @@ pub(super) fn prepare_indexed_array_assign( abi::store_at_offset(emitter, "x0", target.offset); // persist the unique array pointer in the local slot } emitter.instruction("str x0, [sp, #-16]!"); // push array pointer onto stack - emit_expr(index, emitter, ctx, data); + let index_ty = emit_expr(index, emitter, ctx, data); + coerce_result_to_type(emitter, ctx, data, &index_ty, &PhpType::Int); emitter.instruction("str x0, [sp, #-16]!"); // push computed index onto stack let mut val_ty = emit_expr(value, emitter, ctx, data); + if matches!(val_ty, PhpType::Mixed | PhpType::Union(_)) + && !matches!(target.elem_ty, PhpType::Mixed | PhpType::Union(_)) + && crate::codegen::expr::can_coerce_result_to_type(&val_ty, &target.elem_ty) + { + coerce_result_to_type(emitter, ctx, data, &val_ty, &target.elem_ty); + val_ty = target.elem_ty.clone(); + } let boxed_iterable = crate::codegen::emit_box_iterable_value_for_mixed_container(emitter, &mut val_ty); let effective_store_ty = @@ -185,9 +193,17 @@ fn prepare_indexed_array_assign_linux_x86_64( abi::store_at_offset(emitter, "rax", target.offset); // persist the unique indexed-array pointer in the local slot } abi::emit_push_reg(emitter, "rax"); // preserve the unique indexed-array pointer while evaluating the target index - emit_expr(index, emitter, ctx, data); + let index_ty = emit_expr(index, emitter, ctx, data); + coerce_result_to_type(emitter, ctx, data, &index_ty, &PhpType::Int); abi::emit_push_reg(emitter, "rax"); // preserve the computed target index while evaluating the assigned value let mut val_ty = emit_expr(value, emitter, ctx, data); + if matches!(val_ty, PhpType::Mixed | PhpType::Union(_)) + && !matches!(target.elem_ty, PhpType::Mixed | PhpType::Union(_)) + && crate::codegen::expr::can_coerce_result_to_type(&val_ty, &target.elem_ty) + { + coerce_result_to_type(emitter, ctx, data, &val_ty, &target.elem_ty); + val_ty = target.elem_ty.clone(); + } let boxed_iterable = crate::codegen::emit_box_iterable_value_for_mixed_container(emitter, &mut val_ty); let effective_store_ty = diff --git a/src/codegen/stmt/assignments/locals.rs b/src/codegen/stmt/assignments/locals.rs index 1404d6ff..915f5439 100644 --- a/src/codegen/stmt/assignments/locals.rs +++ b/src/codegen/stmt/assignments/locals.rs @@ -12,7 +12,7 @@ use super::super::super::abi; use super::super::super::context::Context; use super::super::super::data_section::DataSection; use super::super::super::emit::Emitter; -use super::super::super::expr::emit_expr; +use super::super::super::expr::{coerce_result_to_type, emit_expr}; use super::super::super::functions; use super::super::PhpType; use crate::names::Name; @@ -78,6 +78,27 @@ pub(crate) fn emit_assign_stmt( if ref_needs_mixed_box { super::super::super::emit_box_current_value_as_mixed(emitter, &ty); ty = PhpType::Mixed; + } else if matches!(ty, PhpType::Mixed | PhpType::Union(_)) + && !matches!(old_ty, PhpType::Mixed | PhpType::Union(_)) + && super::super::super::expr::can_coerce_result_to_type(&ty, &old_ty) + { + let release_mixed_after_coerce = + super::super::helpers::should_release_owned_mixed_after_coerce( + value, + &ty, + &old_ty, + ); + if release_mixed_after_coerce { + abi::emit_push_reg(emitter, abi::int_result_reg(emitter)); + } + coerce_result_to_type(emitter, ctx, data, &ty, &old_ty); + if release_mixed_after_coerce { + super::super::helpers::release_preserved_mixed_after_coercion( + emitter, + &old_ty, + ); + } + ty = old_ty.clone(); } else { super::super::helpers::retain_borrowed_heap_result(emitter, value, &ty); } @@ -127,6 +148,28 @@ pub(crate) fn emit_assign_stmt( }; let offset = var.stack_offset; let old_ty = var.ty.clone(); + if matches!(ty, PhpType::Mixed | PhpType::Union(_)) + && !matches!(old_ty, PhpType::Mixed | PhpType::Union(_)) + && super::super::super::expr::can_coerce_result_to_type(&ty, &old_ty) + { + let release_mixed_after_coerce = + super::super::helpers::should_release_owned_mixed_after_coerce( + value, + &ty, + &old_ty, + ); + if release_mixed_after_coerce { + abi::emit_push_reg(emitter, abi::int_result_reg(emitter)); + } + coerce_result_to_type(emitter, ctx, data, &ty, &old_ty); + if release_mixed_after_coerce { + super::super::helpers::release_preserved_mixed_after_coercion( + emitter, + &old_ty, + ); + } + ty = old_ty.clone(); + } if ctx.static_vars.contains(name) { if !dest_needs_mixed_box { diff --git a/src/codegen/stmt/assignments/properties/arrays/indexed.rs b/src/codegen/stmt/assignments/properties/arrays/indexed.rs index 663cbac8..a3f887a4 100644 --- a/src/codegen/stmt/assignments/properties/arrays/indexed.rs +++ b/src/codegen/stmt/assignments/properties/arrays/indexed.rs @@ -13,7 +13,7 @@ use crate::codegen::abi; use crate::codegen::context::Context; use crate::codegen::data_section::DataSection; use crate::codegen::emit::Emitter; -use crate::codegen::expr::emit_expr; +use crate::codegen::expr::{coerce_result_to_type, emit_expr}; use crate::codegen::platform::Arch; use crate::codegen::stmt::helpers; use crate::parser::ast::{Expr, ExprKind}; @@ -195,6 +195,21 @@ fn prepare_property_array_assign_value( elem_ty: &PhpType, ) -> PhpType { let mut val_ty = emit_expr(value, emitter, ctx, data); + if matches!(val_ty, PhpType::Mixed | PhpType::Union(_)) + && !matches!(elem_ty, PhpType::Mixed | PhpType::Union(_)) + && crate::codegen::expr::can_coerce_result_to_type(&val_ty, elem_ty) + { + let release_mixed_after_coerce = + helpers::should_release_owned_mixed_after_coerce(value, &val_ty, elem_ty); + if release_mixed_after_coerce { + abi::emit_push_reg(emitter, abi::int_result_reg(emitter)); + } + coerce_result_to_type(emitter, ctx, data, &val_ty, elem_ty); + if release_mixed_after_coerce { + helpers::release_preserved_mixed_after_coercion(emitter, elem_ty); + } + val_ty = elem_ty.clone(); + } let boxed_iterable = crate::codegen::emit_box_iterable_value_for_mixed_container(emitter, &mut val_ty); if !boxed_iterable diff --git a/src/codegen/stmt/assignments/properties/assign.rs b/src/codegen/stmt/assignments/properties/assign.rs index f2014b11..eb617a30 100644 --- a/src/codegen/stmt/assignments/properties/assign.rs +++ b/src/codegen/stmt/assignments/properties/assign.rs @@ -99,8 +99,18 @@ pub(crate) fn emit_property_assign_stmt( && !matches!(val_ty, PhpType::Mixed | PhpType::Union(_)) }); if let Some(target_ty) = &declared_target_ty { - coerce_result_to_type(emitter, ctx, data, &val_ty, target_ty); - val_ty = target_ty.clone(); + if crate::codegen::expr::can_coerce_result_to_type(&val_ty, target_ty) { + let release_mixed_after_coerce = + helpers::should_release_owned_mixed_after_coerce(value, &val_ty, target_ty); + if release_mixed_after_coerce { + abi::emit_push_reg(emitter, abi::int_result_reg(emitter)); + } + coerce_result_to_type(emitter, ctx, data, &val_ty, target_ty); + if release_mixed_after_coerce { + helpers::release_preserved_mixed_after_coercion(emitter, target_ty); + } + val_ty = target_ty.clone(); + } } if magic_set_class.is_none() && !boxed_to_mixed { helpers::retain_borrowed_heap_result(emitter, value, &val_ty); @@ -240,8 +250,18 @@ fn emit_nullable_property_assign_stmt( && !matches!(val_ty, PhpType::Mixed | PhpType::Union(_)) }); if let Some(target_ty) = &declared_target_ty { - coerce_result_to_type(emitter, ctx, data, &val_ty, target_ty); - val_ty = target_ty.clone(); + if crate::codegen::expr::can_coerce_result_to_type(&val_ty, target_ty) { + let release_mixed_after_coerce = + helpers::should_release_owned_mixed_after_coerce(value, &val_ty, target_ty); + if release_mixed_after_coerce { + abi::emit_push_reg(emitter, abi::int_result_reg(emitter)); + } + coerce_result_to_type(emitter, ctx, data, &val_ty, target_ty); + if release_mixed_after_coerce { + helpers::release_preserved_mixed_after_coercion(emitter, target_ty); + } + val_ty = target_ty.clone(); + } } if magic_set_class.is_none() && !boxed_to_mixed { helpers::retain_borrowed_heap_result(emitter, value, &val_ty); @@ -329,9 +349,6 @@ fn declared_property_type(object: &Expr, property: &str, ctx: &Context) -> Optio fn declared_property_type_for_class(class_name: &str, property: &str, ctx: &Context) -> Option { let class_info = ctx.classes.get(class_name)?; - if !class_info.declared_properties.contains(property) { - return None; - } class_info .properties .iter() diff --git a/src/codegen/stmt/assignments/static_properties/arrays/indexed.rs b/src/codegen/stmt/assignments/static_properties/arrays/indexed.rs index 2e0e9df5..fac54cc8 100644 --- a/src/codegen/stmt/assignments/static_properties/arrays/indexed.rs +++ b/src/codegen/stmt/assignments/static_properties/arrays/indexed.rs @@ -13,7 +13,7 @@ use crate::codegen::abi; use crate::codegen::context::Context; use crate::codegen::data_section::DataSection; use crate::codegen::emit::Emitter; -use crate::codegen::expr::emit_expr; +use crate::codegen::expr::{coerce_result_to_type, emit_expr}; use crate::codegen::platform::Arch; use crate::codegen::stmt::helpers; use crate::names::static_property_symbol; @@ -186,6 +186,21 @@ fn prepare_static_array_assign_value( elem_ty: &PhpType, ) -> PhpType { let mut val_ty = emit_expr(value, emitter, ctx, data); + if matches!(val_ty, PhpType::Mixed | PhpType::Union(_)) + && !matches!(elem_ty, PhpType::Mixed | PhpType::Union(_)) + && crate::codegen::expr::can_coerce_result_to_type(&val_ty, elem_ty) + { + let release_mixed_after_coerce = + helpers::should_release_owned_mixed_after_coerce(value, &val_ty, elem_ty); + if release_mixed_after_coerce { + abi::emit_push_reg(emitter, abi::int_result_reg(emitter)); + } + coerce_result_to_type(emitter, ctx, data, &val_ty, elem_ty); + if release_mixed_after_coerce { + helpers::release_preserved_mixed_after_coercion(emitter, elem_ty); + } + val_ty = elem_ty.clone(); + } let boxed_iterable = crate::codegen::emit_box_iterable_value_for_mixed_container(emitter, &mut val_ty); if !boxed_iterable diff --git a/src/codegen/stmt/assignments/static_properties/assign.rs b/src/codegen/stmt/assignments/static_properties/assign.rs index cff38ba0..ef17ef6d 100644 --- a/src/codegen/stmt/assignments/static_properties/assign.rs +++ b/src/codegen/stmt/assignments/static_properties/assign.rs @@ -53,7 +53,7 @@ pub(crate) fn emit_static_property_assign_stmt( return; } - let Some((class_name, declaring_class, prop_ty, declared)) = + let Some((class_name, declaring_class, prop_ty, _declared)) = resolve::resolve_static_property(receiver, property, ctx, emitter) else { return; @@ -67,11 +67,18 @@ pub(crate) fn emit_static_property_assign_stmt( ); let mut val_ty = emit_expr(value, emitter, ctx, data); - let boxed_to_mixed = declared - && matches!(prop_ty, PhpType::Mixed | PhpType::Union(_)) + let boxed_to_mixed = matches!(prop_ty, PhpType::Mixed | PhpType::Union(_)) && !matches!(val_ty, PhpType::Mixed | PhpType::Union(_)); - if declared { + if crate::codegen::expr::can_coerce_result_to_type(&val_ty, &prop_ty) { + let release_mixed_after_coerce = + helpers::should_release_owned_mixed_after_coerce(value, &val_ty, &prop_ty); + if release_mixed_after_coerce { + abi::emit_push_reg(emitter, abi::int_result_reg(emitter)); + } coerce_result_to_type(emitter, ctx, data, &val_ty, &prop_ty); + if release_mixed_after_coerce { + helpers::release_preserved_mixed_after_coercion(emitter, &prop_ty); + } val_ty = prop_ty.clone(); } if !boxed_to_mixed { diff --git a/src/codegen/stmt/control_flow/loops/exits.rs b/src/codegen/stmt/control_flow/loops/exits.rs index b51e91d5..8bd89b35 100644 --- a/src/codegen/stmt/control_flow/loops/exits.rs +++ b/src/codegen/stmt/control_flow/loops/exits.rs @@ -40,7 +40,27 @@ pub(crate) fn emit_return_stmt( crate::codegen::abi::emit_call_label(emitter, "__rt_str_persist"); // persist borrowed string before locals are freed } let target_ty = ctx.return_type.clone(); - coerce_result_to_type(emitter, ctx, data, &ty, &target_ty); + if crate::codegen::expr::can_coerce_result_to_type(&ty, &target_ty) { + let release_mixed_after_coerce = !matches!(target_ty, PhpType::Mixed | PhpType::Union(_)) + && super::super::super::helpers::should_release_owned_mixed_after_coerce( + e, + &ty, + &target_ty, + ); + if release_mixed_after_coerce { + crate::codegen::abi::emit_push_reg( + emitter, + crate::codegen::abi::int_result_reg(emitter), + ); + } + coerce_result_to_type(emitter, ctx, data, &ty, &target_ty); + if release_mixed_after_coerce { + super::super::super::helpers::release_preserved_mixed_after_coercion( + emitter, + &target_ty, + ); + } + } } if let Some(label) = &ctx.return_label { let sp_total: usize = ctx.loop_stack.iter().map(|l| l.sp_adjust).sum(); diff --git a/src/codegen/stmt/helpers.rs b/src/codegen/stmt/helpers.rs index e9050988..e7637b85 100644 --- a/src/codegen/stmt/helpers.rs +++ b/src/codegen/stmt/helpers.rs @@ -9,7 +9,7 @@ //! - Helpers must state register clobbers through code structure and preserve heap ownership assumptions. use crate::codegen::platform::Arch; -use crate::parser::ast::Expr; +use crate::parser::ast::{BinOp, Expr, ExprKind}; use crate::types::PhpType; use super::super::abi; @@ -27,6 +27,57 @@ pub(super) fn local_slot_ownership_after_store(ty: &PhpType) -> HeapOwnership { HeapOwnership::local_owner_for_type(ty) } +pub(super) fn release_preserved_mixed_after_coercion(emitter: &mut Emitter, target_ty: &PhpType) { + match target_ty.codegen_repr() { + PhpType::Float => { + abi::emit_push_float_reg(emitter, abi::float_result_reg(emitter)); + abi::emit_load_temporary_stack_slot(emitter, abi::int_result_reg(emitter), 16); + abi::emit_decref_if_refcounted(emitter, &PhpType::Mixed); + abi::emit_pop_float_reg(emitter, abi::float_result_reg(emitter)); + abi::emit_release_temporary_stack(emitter, 16); + } + PhpType::Str => { + let (ptr_reg, len_reg) = abi::string_result_regs(emitter); + abi::emit_call_label(emitter, "__rt_str_persist"); // detach string casts from the mixed cell before releasing the boxed owner + abi::emit_push_reg_pair(emitter, ptr_reg, len_reg); + abi::emit_load_temporary_stack_slot(emitter, abi::int_result_reg(emitter), 16); + abi::emit_decref_if_refcounted(emitter, &PhpType::Mixed); + abi::emit_pop_reg_pair(emitter, ptr_reg, len_reg); + abi::emit_release_temporary_stack(emitter, 16); + } + PhpType::Void | PhpType::Never => { + abi::emit_load_temporary_stack_slot(emitter, abi::int_result_reg(emitter), 0); + abi::emit_decref_if_refcounted(emitter, &PhpType::Mixed); + abi::emit_release_temporary_stack(emitter, 16); + } + _ => { + abi::emit_push_reg(emitter, abi::int_result_reg(emitter)); + abi::emit_load_temporary_stack_slot(emitter, abi::int_result_reg(emitter), 16); + abi::emit_decref_if_refcounted(emitter, &PhpType::Mixed); + abi::emit_pop_reg(emitter, abi::int_result_reg(emitter)); + abi::emit_release_temporary_stack(emitter, 16); + } + } +} + +pub(super) fn should_release_owned_mixed_after_coerce( + value: &Expr, + source_ty: &PhpType, + target_ty: &PhpType, +) -> bool { + matches!(source_ty, PhpType::Mixed | PhpType::Union(_)) + && crate::codegen::expr::can_coerce_result_to_type(source_ty, target_ty) + && !matches!(target_ty.codegen_repr(), PhpType::Mixed | PhpType::Union(_)) + && (expr_result_heap_ownership(value) == HeapOwnership::Owned + || matches!( + value.kind, + ExprKind::BinaryOp { + op: BinOp::Add | BinOp::Sub | BinOp::Mul, + .. + } + )) +} + pub(super) fn indexed_array_runtime_value_tag(ty: &PhpType) -> i64 { crate::codegen::runtime_value_tag(ty) as i64 } From efc8fd9bcbdb776eeb8c75ec0552647e7ab260a4 Mon Sep 17 00:00:00 2001 From: Vincenzo Petrucci Date: Sat, 16 May 2026 19:07:27 +0200 Subject: [PATCH 10/10] refactor: reuse static property marker clearing --- .../assignments/static_properties/arrays.rs | 4 ++- .../static_properties/arrays/indexed.rs | 9 +++++- .../assignments/static_properties/assign.rs | 4 +-- .../static_properties/late_bound.rs | 11 ++++++-- tests/codegen/oop/modifiers_and_properties.rs | 28 +++++++++++++++++++ 5 files changed, 48 insertions(+), 8 deletions(-) diff --git a/src/codegen/stmt/assignments/static_properties/arrays.rs b/src/codegen/stmt/assignments/static_properties/arrays.rs index a7851428..072fc770 100644 --- a/src/codegen/stmt/assignments/static_properties/arrays.rs +++ b/src/codegen/stmt/assignments/static_properties/arrays.rs @@ -100,13 +100,14 @@ pub(crate) fn emit_static_property_array_push_stmt( abi::int_result_reg(emitter), &declaring_class, &branches, + &prop_ty, emitter, ctx, ); } else { let symbol = static_property_symbol(&declaring_class, property); abi::emit_store_reg_to_symbol(emitter, abi::int_result_reg(emitter), &symbol, 0); - abi::emit_store_zero_to_symbol(emitter, &symbol, 8); + late_bound::clear_uninitialized_marker_after_static_store(emitter, &symbol, &prop_ty); } } @@ -192,6 +193,7 @@ pub(crate) fn emit_static_property_array_assign_stmt( &declaring_class, &branches, class_id_saved, + &prop_ty, &elem_ty, index, value, diff --git a/src/codegen/stmt/assignments/static_properties/arrays/indexed.rs b/src/codegen/stmt/assignments/static_properties/arrays/indexed.rs index fac54cc8..1afa06ed 100644 --- a/src/codegen/stmt/assignments/static_properties/arrays/indexed.rs +++ b/src/codegen/stmt/assignments/static_properties/arrays/indexed.rs @@ -25,6 +25,7 @@ pub(super) fn emit_static_indexed_array_assign( declaring_class: &str, branches: &[StaticPropertyBranch], class_id_saved: bool, + prop_ty: &PhpType, elem_ty: &PhpType, index: &Expr, value: &Expr, @@ -41,6 +42,7 @@ pub(super) fn emit_static_indexed_array_assign( branches, class_id_saved, 0, + prop_ty, "x0", emitter, ctx, @@ -61,6 +63,7 @@ pub(super) fn emit_static_indexed_array_assign( branches, class_id_saved, 64, + prop_ty, "x10", emitter, ctx, @@ -82,6 +85,7 @@ pub(super) fn emit_static_indexed_array_assign( branches, class_id_saved, 0, + prop_ty, "rax", emitter, ctx, @@ -101,6 +105,7 @@ pub(super) fn emit_static_indexed_array_assign( branches, class_id_saved, 48, + prop_ty, "r10", emitter, ctx, @@ -121,6 +126,7 @@ fn publish_static_array_pointer( branches: &[StaticPropertyBranch], class_id_saved: bool, class_id_stack_offset: usize, + prop_ty: &PhpType, source_reg: &str, emitter: &mut Emitter, ctx: &mut Context, @@ -141,13 +147,14 @@ fn publish_static_array_pointer( source_reg, declaring_class, branches, + prop_ty, emitter, ctx, ); } else { let symbol = static_property_symbol(declaring_class, property); abi::emit_store_reg_to_symbol(emitter, source_reg, &symbol, 0); - abi::emit_store_zero_to_symbol(emitter, &symbol, 8); + late_bound::clear_uninitialized_marker_after_static_store(emitter, &symbol, prop_ty); } } diff --git a/src/codegen/stmt/assignments/static_properties/assign.rs b/src/codegen/stmt/assignments/static_properties/assign.rs index ef17ef6d..1f1fdaf4 100644 --- a/src/codegen/stmt/assignments/static_properties/assign.rs +++ b/src/codegen/stmt/assignments/static_properties/assign.rs @@ -102,8 +102,6 @@ pub(crate) fn emit_static_property_assign_stmt( } else { let symbol = static_property_symbol(&declaring_class, property); abi::emit_store_result_to_symbol(emitter, &symbol, &val_ty, true); - if !matches!(val_ty.codegen_repr(), PhpType::Str) { - abi::emit_store_zero_to_symbol(emitter, &symbol, 8); - } + late_bound::clear_uninitialized_marker_after_static_store(emitter, &symbol, &val_ty); } } diff --git a/src/codegen/stmt/assignments/static_properties/late_bound.rs b/src/codegen/stmt/assignments/static_properties/late_bound.rs index afd8c531..aabb0cec 100644 --- a/src/codegen/stmt/assignments/static_properties/late_bound.rs +++ b/src/codegen/stmt/assignments/static_properties/late_bound.rs @@ -135,6 +135,7 @@ pub(super) fn emit_dynamic_store_reg_to_static_property( source_reg: &str, fallback_declaring_class: &str, branches: &[StaticPropertyBranch], + ty: &PhpType, emitter: &mut Emitter, ctx: &mut Context, ) { @@ -147,7 +148,7 @@ pub(super) fn emit_dynamic_store_reg_to_static_property( } let fallback_symbol = static_property_symbol(fallback_declaring_class, property); abi::emit_store_reg_to_symbol(emitter, source_reg, &fallback_symbol, 0); - abi::emit_store_zero_to_symbol(emitter, &fallback_symbol, 8); + clear_uninitialized_marker_after_static_store(emitter, &fallback_symbol, ty); emit_jump(emitter, &done); for (label, branch) in labels { emitter.label(&label); @@ -157,13 +158,17 @@ pub(super) fn emit_dynamic_store_reg_to_static_property( } let symbol = static_property_symbol(&branch.declaring_class, property); abi::emit_store_reg_to_symbol(emitter, source_reg, &symbol, 0); - abi::emit_store_zero_to_symbol(emitter, &symbol, 8); + clear_uninitialized_marker_after_static_store(emitter, &symbol, ty); emit_jump(emitter, &done); } emitter.label(&done); } -fn clear_uninitialized_marker_after_static_store(emitter: &mut Emitter, symbol: &str, ty: &PhpType) { +pub(super) fn clear_uninitialized_marker_after_static_store( + emitter: &mut Emitter, + symbol: &str, + ty: &PhpType, +) { if !matches!(ty.codegen_repr(), PhpType::Str) { abi::emit_store_zero_to_symbol(emitter, symbol, 8); } diff --git a/tests/codegen/oop/modifiers_and_properties.rs b/tests/codegen/oop/modifiers_and_properties.rs index 8ce9a03d..0a4fb594 100644 --- a/tests/codegen/oop/modifiers_and_properties.rs +++ b/tests/codegen/oop/modifiers_and_properties.rs @@ -194,6 +194,34 @@ echo Box::$value; assert_eq!(out, "0"); } +#[test] +fn test_nullable_static_property_default_null_is_initialized() { + let out = compile_and_run( + r#"