From 6c906b0f2c76e129cbbb02457a5691ee59798577 Mon Sep 17 00:00:00 2001 From: TheHypnoo Date: Sun, 14 Jun 2026 14:02:11 +0200 Subject: [PATCH 1/4] perf(codegen): inline the slot load for guarded numeric array index reads MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A numeric `arr[i]` read is lowered as a typed-feedback guard followed, on the fast path, by a call to `js_array_numeric_get_f64_unboxed`. But the `numeric_array_index_get_guard` on the way into the fast block already proves everything that helper re-checks: a plain non-forwarded `Array`, in raw-f64 numeric layout, with `index` in bounds (`plain_array_index_guard(.., in_bounds =true)` && `js_array_is_numeric_f64_layout`). So the helper's hot path just does `return *elements_ptr.add(index)` after re-validating — a redundant call per read. Emit that slot load inline (`getelementptr` + `load double` at `arr + 8 + index*8`) instead, matching the helper exactly. Raw-f64 arrays are dense (no HOLE slots) and hold raw f64s, so no hole→undefined translation is needed; the non-raw path already inlines (with the hole check) and is unchanged. Out-of- bounds / negative / non-number indices fail the guard and route to the boxed fallback exactly as before. 04_array_read: 149ms -> 91ms (~1.6x). numeric_array_numeric is read+write so it is now write-bound (neutral). Part of closing the AOT array-element-access gap (see #5094). Verified: 29/29 suite benchmarks match Node; OOB/negative/hole reads on numeric arrays return undefined identical to Node; codegen tests pass (the hot-path test now asserts the inline load instead of the unboxed-get call). --- crates/perry-codegen/src/expr/index_get.rs | 19 ++++++++++++++----- crates/perry-codegen/tests/typed_feedback.rs | 6 +++++- 2 files changed, 19 insertions(+), 6 deletions(-) diff --git a/crates/perry-codegen/src/expr/index_get.rs b/crates/perry-codegen/src/expr/index_get.rs index e0152a7253..115fa4956a 100644 --- a/crates/perry-codegen/src/expr/index_get.rs +++ b/crates/perry-codegen/src/expr/index_get.rs @@ -290,11 +290,20 @@ fn lower_guarded_array_index_get( let arr_bits = fast_blk.bitcast_double_to_i64(arr_box); let arr_handle = fast_blk.and(I64, &arr_bits, POINTER_MASK_I64); let fast_val = if require_numeric_layout { - fast_blk.call( - DOUBLE, - "js_array_numeric_get_f64_unboxed", - &[(I64, &arr_handle), (I32, idx_i32)], - ) + // The `numeric_array_index_get_guard` on the way into this block already + // proved: a plain, non-forwarded `Array`, in raw-f64 numeric layout, + // with `index` in bounds (`plain_array_index_guard(.., in_bounds=true)` + // && `js_array_is_numeric_f64_layout`). So load the slot inline instead + // of calling `js_array_numeric_get_f64_unboxed`, whose hot path + // re-validates exactly those same conditions and then does this load. + // Raw-f64 arrays are dense (no HOLE slots) and the slot holds a raw f64, + // matching the runtime helper's `return *elements_ptr.add(index)`. + let idx_i64 = fast_blk.zext(I32, idx_i32, I64); + let byte_offset = fast_blk.shl(I64, &idx_i64, "3"); + let with_header = fast_blk.add(I64, &byte_offset, "8"); + let element_addr = fast_blk.add(I64, &arr_handle, &with_header); + let element_ptr = fast_blk.inttoptr(I64, &element_addr); + fast_blk.load(DOUBLE, &element_ptr) } else { let idx_i64 = fast_blk.zext(I32, idx_i32, I64); let byte_offset = fast_blk.shl(I64, &idx_i64, "3"); diff --git a/crates/perry-codegen/tests/typed_feedback.rs b/crates/perry-codegen/tests/typed_feedback.rs index a442577767..93b7e109ac 100644 --- a/crates/perry-codegen/tests/typed_feedback.rs +++ b/crates/perry-codegen/tests/typed_feedback.rs @@ -535,5 +535,9 @@ fn typed_feedback_guards_computed_numeric_array_index_hot_path() { assert!(ir.contains("call i32 @js_typed_feedback_numeric_array_index_get_guard")); assert!(ir.contains("call double @js_typed_feedback_array_index_get_fallback_boxed")); - assert!(ir.contains("call double @js_array_numeric_get_f64_unboxed")); + // The numeric fast path no longer calls `js_array_numeric_get_f64_unboxed`: + // the guard already proved raw-f64 layout + in-bounds, so the slot is loaded + // inline (a direct `load double` from the element address). + assert!(!ir.contains("call double @js_array_numeric_get_f64_unboxed")); + assert!(ir.contains("load double")); } From 5dfcfd65e6ed13e6bdc5020a48b910627b4905c9 Mon Sep 17 00:00:00 2001 From: TheHypnoo Date: Mon, 15 Jun 2026 15:41:14 +0200 Subject: [PATCH 2/4] test(compiler-output): update numeric_array_uses_unboxed_get for inline GET The inline numeric array read no longer emits a call to js_array_numeric_get_f64_unboxed; the slot is loaded inline after the guard. Update the native-region-proof IR check to assert the guard (js_typed_feedback_numeric_array_index_get_guard) is present and that the helper call is elided, instead of requiring the now-removed call. The native-rep structural record still tags the logical consumer, so numeric_array_get_fast_f64 continues to prove the checked_native fast path. --- benchmarks/compiler_output/workloads.toml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/benchmarks/compiler_output/workloads.toml b/benchmarks/compiler_output/workloads.toml index 0cb8064928..c92138be11 100644 --- a/benchmarks/compiler_output/workloads.toml +++ b/benchmarks/compiler_output/workloads.toml @@ -625,8 +625,9 @@ detail = "numeric Array.push uses the guarded raw-f64 helper" [[workloads.numeric_arrays.ir_checks]] name = "numeric_array_uses_unboxed_get" -contains = "js_array_numeric_get_f64_unboxed" -detail = "numeric indexed read uses the guarded raw-f64 helper" +contains = "js_typed_feedback_numeric_array_index_get_guard" +regex_none = ["call double @js_array_numeric_get_f64_unboxed"] +detail = "numeric indexed read takes the guarded raw-f64 fast path and loads the slot inline (helper call elided)" [[workloads.numeric_arrays.ir_checks]] name = "numeric_array_uses_unboxed_set" From 73c239deddb40a5bf35c74fb11df02a0ff04f904 Mon Sep 17 00:00:00 2001 From: TheHypnoo Date: Mon, 15 Jun 2026 15:53:48 +0200 Subject: [PATCH 3/4] test(compiler-output): assert inline load double in numeric GET fast path Per CodeRabbit: prove the optimization contract positively, not just by helper removal. Add a regex asserting the guarded fast block (bidx.num.fast) computes the element pointer via inttoptr and performs an inline 'load double' before branching to the merge block. Keeps the guard presence + helper-call-absence checks. Validated against the optimized llvm_after IR the harness gates on. --- benchmarks/compiler_output/workloads.toml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/benchmarks/compiler_output/workloads.toml b/benchmarks/compiler_output/workloads.toml index c92138be11..38dac5600f 100644 --- a/benchmarks/compiler_output/workloads.toml +++ b/benchmarks/compiler_output/workloads.toml @@ -626,8 +626,9 @@ detail = "numeric Array.push uses the guarded raw-f64 helper" [[workloads.numeric_arrays.ir_checks]] name = "numeric_array_uses_unboxed_get" contains = "js_typed_feedback_numeric_array_index_get_guard" +regex = '''bidx\.num\.fast\.\d+:[\s\S]*?inttoptr i64 %\w+ to ptr\s*\n\s*%\w+ = load double, ptr %\w+[^\n]*\n\s*br label %bidx\.num\.merge''' regex_none = ["call double @js_array_numeric_get_f64_unboxed"] -detail = "numeric indexed read takes the guarded raw-f64 fast path and loads the slot inline (helper call elided)" +detail = "numeric indexed read takes the guarded raw-f64 fast path and loads the slot inline (inttoptr + load double in bidx.num.fast; helper call elided)" [[workloads.numeric_arrays.ir_checks]] name = "numeric_array_uses_unboxed_set" From 966e1aa0695639b86c773a398668e1f45c27542f Mon Sep 17 00:00:00 2001 From: TheHypnoo Date: Mon, 15 Jun 2026 16:00:07 +0200 Subject: [PATCH 4/4] test(compiler-output): update harness fixture for inline numeric GET test_generic_native_rep_checks_require_configured_records fed synthetic IR with the old 'call double @js_array_numeric_get_f64_unboxed' shape, which no longer matches the updated numeric_array_uses_unboxed_get check (guard present + inline inttoptr/load double in bidx.num.fast + helper call elided). Update the fixture IR to the inlined fast-path shape; push/set still exercise their guarded raw-f64 helpers. Native-rep records keep the logical get consumer tag, so the structural checks are unchanged. --- tests/test_compiler_output_regression.py | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/tests/test_compiler_output_regression.py b/tests/test_compiler_output_regression.py index 26c0f10fe9..479b4e52de 100644 --- a/tests/test_compiler_output_regression.py +++ b/tests/test_compiler_output_regression.py @@ -928,11 +928,28 @@ def test_native_rep_unchecked_unknown_bounds_fails_gate(self): ) def test_generic_native_rep_checks_require_configured_records(self): + # The numeric indexed read is inlined: a guarded fast block computes the + # element pointer (inttoptr) and performs a direct `load double` instead + # of calling js_array_numeric_get_f64_unboxed. Push/set still go through + # their guarded raw-f64 helpers. ir = """ define i32 @main() { entry: call i64 @js_array_numeric_push_f64_unboxed(i64 1, double 2.0) - call double @js_array_numeric_get_f64_unboxed(i64 1, i32 0) + %g = call i32 @js_typed_feedback_numeric_array_index_get_guard(i64 1, double 0.0, double 0.0, i32 0, i32 1) + %gc = icmp ne i32 %g, 0 + br i1 %gc, label %bidx.num.fast.1, label %bidx.num.fallback.2 + +bidx.num.fast.1: + %addr = add i64 1, 8 + %p = inttoptr i64 %addr to ptr + %v = load double, ptr %p, align 8 + br label %bidx.num.merge.3 + +bidx.num.fallback.2: + br label %bidx.num.merge.3 + +bidx.num.merge.3: call i32 @js_array_numeric_set_f64_unboxed(i64 1, i32 0, double 3.0) ret i32 0 }