From de8af6920d13de5b16b769057280ee0a5ae4fbc6 Mon Sep 17 00:00:00 2001 From: Johannes Stein Date: Sun, 28 Jun 2026 13:43:02 +0100 Subject: [PATCH 1/6] perf(vm): unbox typed-array element reads and writes on the computed-access path Typed-array element access in the bytecode VM fell through to the generic TGocciaObjectValue computed-access branch, allocating an IntToStr index name plus a heap TGocciaNumberLiteralValue on every read, and boxing the scalar via RegisterToValue on every write. Arithmetic and comparisons were already 100% scalar (ADR 0001/0005), so these boundary boxings dominated allocation-heavy typed-array workloads (issue #800). Add unboxed fast paths to ExecGetComputedProperty / ExecSetComputedProperty for TGocciaTypedArrayValue receivers at array-index keys: reads go straight into a register scalar via the new RegisterFromDouble; numeric-scalar writes store directly (ToNumber on a Number is side-effect-free, so the spec's observable conversion is preserved). BigInt kinds, non-index keys, non-scalar values, and out-of-range / detached / immutable cases fall through to the unchanged boxed path, so all value semantics are preserved. Dedup the element store while here: integer NaN/Infinity coercion now lives only in WriteBinaryNumberElement, and the element read/write paths single-validate via ReadElementUnchecked / WriteElementUnchecked. sort_large_countingsort.js (prod, bytecode): allocations 7,471,627 -> 4,719,119 (-36.8%); x86_64 jobs=4 at the 20s deadline 14.1s -> 10.4s. typed-arrays.js element access: read +57%, write +35%, Float64 write +26%. No regressions: full JS suite 11,009/11,009 in both modes; test262 staging, TypedArray, Array, and DataView identical before/after. Closes #800 Co-Authored-By: Claude Opus 4.8 --- source/units/Goccia.VM.Registers.pas | 25 +++ source/units/Goccia.VM.pas | 30 +++- .../units/Goccia.Values.TypedArrayValue.pas | 98 ++++++---- .../TypedArray/element-access-unboxed.js | 167 ++++++++++++++++++ 4 files changed, 283 insertions(+), 37 deletions(-) create mode 100644 tests/built-ins/TypedArray/element-access-unboxed.js diff --git a/source/units/Goccia.VM.Registers.pas b/source/units/Goccia.VM.Registers.pas index 2b5baacfa..460382d68 100644 --- a/source/units/Goccia.VM.Registers.pas +++ b/source/units/Goccia.VM.Registers.pas @@ -40,6 +40,7 @@ function RegisterHole: TGocciaRegister; inline; function RegisterBoolean(const AValue: Boolean): TGocciaRegister; inline; function RegisterInt(const AValue: Int64): TGocciaRegister; inline; function RegisterFloat(const AValue: Double): TGocciaRegister; inline; +function RegisterFromDouble(const AValue: Double): TGocciaRegister; inline; function RegisterObject(const AValue: TGocciaValue): TGocciaRegister; inline; function ValueToRegister(const AValue: TGocciaValue): TGocciaRegister; inline; function RegisterToValue(const ARegister: TGocciaRegister): TGocciaValue; inline; @@ -83,6 +84,30 @@ function RegisterFloat(const AValue: Double): TGocciaRegister; inline; Result.FloatValue := AValue; end; +function RegisterFromDouble(const AValue: Double): TGocciaRegister; inline; +var + Bits: Int64 absolute AValue; +begin + // Build a register directly from a raw Double without ever allocating a heap + // TGocciaNumberLiteralValue. Mirrors the number branch of VMValueToRegisterFast: + // exact integers in LongInt range become grkInt (so downstream scalar opcodes and + // the Zero/One singletons engage on later boxing), and -0.0 stays float to keep + // its sign bit. NaN/Infinity/non-integers stay float. + if AValue = 0.0 then + begin + if Bits < 0 then + Exit(RegisterFloat(AValue)); // -0.0: preserve the sign bit as a float + Exit(RegisterInt(0)); + end; + if AValue = 1.0 then + Exit(RegisterInt(1)); + if (not IsNaN(AValue)) and (not IsInfinite(AValue)) and + (Frac(AValue) = 0.0) and + (AValue >= Low(LongInt)) and (AValue <= High(LongInt)) then + Exit(RegisterInt(Trunc(AValue))); + Result := RegisterFloat(AValue); +end; + function RegisterObject(const AValue: TGocciaValue): TGocciaRegister; inline; begin Result.Kind := grkObject; diff --git a/source/units/Goccia.VM.pas b/source/units/Goccia.VM.pas index b05e37f22..0f72bc50f 100644 --- a/source/units/Goccia.VM.pas +++ b/source/units/Goccia.VM.pas @@ -521,7 +521,8 @@ implementation Goccia.Values.ProxyValue, Goccia.Values.Shape, Goccia.Values.ToObject, - Goccia.Values.ToPrimitive; + Goccia.Values.ToPrimitive, + Goccia.Values.TypedArrayValue; const BYTECODE_PRIVATE_SLOT_PREFIX = '#slot:'; @@ -7656,11 +7657,24 @@ procedure TGocciaVM.ExecGetComputedProperty(const ADest: Integer; Key: TGocciaPropertyKey; KeyName: string; ReceiverArray: TGocciaArrayValue; + FastIndex: Integer; + FastElement: Double; begin if (caoThrowOnNullUndefined in AOptions) and (AObjReg.Kind in [grkUndefined, grkNull]) then ThrowTypeError(SErrorCannotConvertNullOrUndefined, SSuggestCheckNullBeforeAccess) + else if (AObjReg.Kind = grkObject) and + (AObjReg.ObjectValue is TGocciaTypedArrayValue) and + TryGetArrayIndexRegister(AKeyReg, FastIndex) and + TGocciaTypedArrayValue(AObjReg.ObjectValue) + .TryReadIndexedScalar(FastIndex, FastElement) then + // Typed-array unboxed element read: the element goes straight into the + // destination register as a scalar, with no heap TGocciaNumberLiteralValue and + // no IntToStr index name. Non-index keys, BigInt kinds, and out-of-range indices + // fall through to the generic object branch below, which handles length, methods, + // `undefined` for out-of-range reads, BigInt boxing, and symbol keys unchanged. + FRegisters[ADest] := RegisterFromDouble(FastElement) else if (AObjReg.Kind = grkObject) and (AObjReg.ObjectValue is TGocciaArrayValue) then begin @@ -7752,7 +7766,21 @@ procedure TGocciaVM.ExecSetComputedProperty(const ATargetIndex: Integer; Value: TGocciaValue; TargetValue: TGocciaValue; BoxedTarget: TGocciaObjectValue; + FastIndex: Integer; begin + // Typed-array unboxed element write: a numeric-scalar value going to a valid + // integer index stores directly, with no heap TGocciaNumberLiteralValue and no + // IntToStr index name. ToNumber on a Number is side-effect-free, so the spec's + // observable conversion is preserved. BigInt kinds (a Number value must throw), + // non-index keys, and non-scalar values fall through to the boxed path below. + if (FRegisters[ATargetIndex].Kind = grkObject) and + (FRegisters[ATargetIndex].ObjectValue is TGocciaTypedArrayValue) and + RegisterIsNumericScalar(AValueReg) and + TryGetArrayIndexRegister(AKeyReg, FastIndex) and + TGocciaTypedArrayValue(FRegisters[ATargetIndex].ObjectValue) + .TryWriteIndexedScalar(FastIndex, RegisterToDouble(AValueReg)) then + Exit; + Value := RegisterToValue(AValueReg); if (FRegisters[ATargetIndex].Kind = grkObject) and (FRegisters[ATargetIndex].ObjectValue is TGocciaArrayValue) then diff --git a/source/units/Goccia.Values.TypedArrayValue.pas b/source/units/Goccia.Values.TypedArrayValue.pas index f74f1dfdc..ea2b552b1 100644 --- a/source/units/Goccia.Values.TypedArrayValue.pas +++ b/source/units/Goccia.Values.TypedArrayValue.pas @@ -44,7 +44,9 @@ TGocciaTypedArrayValue = class(TGocciaInstanceValue) function HasValidBackingRange(const ALength: Integer): Boolean; function HasValidElementIndex(const AIndex: Integer): Boolean; + function ReadElementUnchecked(const AIndex: Integer): Double; function ReadElement(const AIndex: Integer): Double; + procedure WriteElementUnchecked(const AIndex: Integer; const AValue: Double); procedure WriteElement(const AIndex: Integer; const AValue: Double); procedure WriteNumberLiteral(const AIndex: Integer; const ANum: TGocciaNumberLiteralValue); @@ -98,6 +100,18 @@ TGocciaTypedArrayValue = class(TGocciaInstanceValue) property ByteOffset: Integer read FByteOffset; property Length: Integer read GetLength; property Kind: TGocciaTypedArrayKind read FKind; + + // Boxing-free element fast paths for the bytecode VM computed-access cores. + // TryReadIndexedScalar yields the element as a raw Double for non-BigInt kinds + // and a valid in-range index; it returns False (caller falls back to GetProperty) + // for BigInt kinds and out-of-range indices. TryWriteIndexedScalar stores an + // already-numeric scalar value (ToNumber on a Number is side-effect-free, so the + // observable conversion the spec requires is preserved) with the same coercion as + // WriteNumberLiteral; it returns False for BigInt kinds so the caller takes the + // throwing slow path, and True (handled) for non-BigInt kinds whether or not the + // index is in range or the backing buffer is immutable. + function TryReadIndexedScalar(const AIndex: Integer; out AValue: Double): Boolean; + function TryWriteIndexedScalar(const AIndex: Integer; const AValue: Double): Boolean; published function TypedArrayAt(const AArgs: TGocciaArgumentsCollection; const AThisValue: TGocciaValue): TGocciaValue; function TypedArrayFill(const AArgs: TGocciaArgumentsCollection; const AThisValue: TGocciaValue): TGocciaValue; @@ -401,66 +415,78 @@ function TGocciaTypedArrayValue.HasValidElementIndex(const AIndex: Integer): Boo { Element read/write via buffer } -function TGocciaTypedArrayValue.ReadElement(const AIndex: Integer): Double; +function TGocciaTypedArrayValue.ReadElementUnchecked(const AIndex: Integer): Double; var Offset: Integer; begin - if not HasValidElementIndex(AIndex) then - Exit(0); - + // Precondition: AIndex is in range (the caller validated HasValidElementIndex). + // One sync + read, with no redundant bounds re-check on the hot element path. SyncBufferData; Offset := FByteOffset + AIndex * BytesPerElement(FKind); Result := ReadBinaryNumberElement(FBufferData, Offset, ToBinaryElementKind(FKind), TYPED_ARRAY_LITTLE_ENDIAN); end; -procedure TGocciaTypedArrayValue.WriteElement(const AIndex: Integer; const AValue: Double); -var - Offset: Integer; +function TGocciaTypedArrayValue.ReadElement(const AIndex: Integer): Double; begin if not HasValidElementIndex(AIndex) then - Exit; + Exit(0); + Result := ReadElementUnchecked(AIndex); +end; +procedure TGocciaTypedArrayValue.WriteElementUnchecked(const AIndex: Integer; const AValue: Double); +var + Offset: Integer; +begin + // Precondition: AIndex is in range (the caller validated HasValidElementIndex). + // Integer coercion of the ToNumber result — non-finite -> 0 for integer kinds, + // Uint8Clamped clamping +Infinity to 255, float kinds verbatim — is performed by + // WriteBinaryNumberElement, so it is not repeated here. One sync + write. SyncBufferData; Offset := FByteOffset + AIndex * BytesPerElement(FKind); WriteBinaryNumberElement(FBufferData, Offset, ToBinaryElementKind(FKind), AValue, TYPED_ARRAY_LITTLE_ENDIAN); end; +procedure TGocciaTypedArrayValue.WriteElement(const AIndex: Integer; const AValue: Double); +begin + if not HasValidElementIndex(AIndex) then + Exit; + WriteElementUnchecked(AIndex, AValue); +end; + procedure TGocciaTypedArrayValue.WriteNumberLiteral(const AIndex: Integer; const ANum: TGocciaNumberLiteralValue); -var - Offset: Integer; - ToWrite: Double; begin if not HasValidElementIndex(AIndex) then Exit; + WriteElementUnchecked(AIndex, ANum.Value); +end; - // Map the coerced ToNumber result to the value SetValueInBuffer stores: float - // kinds keep the value (including NaN/+/-Infinity) verbatim, while integer - // kinds store 0 for any non-finite input, except Uint8Clamped which clamps - // +Infinity to 255. Selecting the value first lets the index validation, the - // backing-store sync, and the byte-offset computation run exactly once per - // store instead of being repeated by a nested WriteElement re-dispatch. - if IsFloatKind(FKind) then - ToWrite := ANum.Value - else if ANum.IsNaN then - ToWrite := 0 - else if ANum.IsInfinity then - begin - if FKind = takUint8Clamped then - ToWrite := 255 - else - ToWrite := 0; - end - else if ANum.IsNegativeInfinity then - ToWrite := 0 - else - ToWrite := ANum.Value; +function TGocciaTypedArrayValue.TryReadIndexedScalar(const AIndex: Integer; out AValue: Double): Boolean; +begin + // BigInt kinds yield TGocciaBigIntValue, never a Double, so they fall back to the + // boxed path; an out-of-range index falls back so the caller yields `undefined`. + if IsBigIntKind(FKind) or (not HasValidElementIndex(AIndex)) then + Exit(False); + AValue := ReadElementUnchecked(AIndex); + Result := True; +end; - SyncBufferData; - Offset := FByteOffset + AIndex * BytesPerElement(FKind); - WriteBinaryNumberElement(FBufferData, Offset, ToBinaryElementKind(FKind), - ToWrite, TYPED_ARRAY_LITTLE_ENDIAN); +function TGocciaTypedArrayValue.TryWriteIndexedScalar(const AIndex: Integer; const AValue: Double): Boolean; +begin + // A Number value into a BigInt typed array must throw (ToBigInt(Number) throws), so + // signal not-handled and let the caller take the boxed, throwing slow path. + if IsBigIntKind(FKind) then + Exit(False); + // Non-BigInt integer-indexed [[Set]] is always "handled": an out-of-range index is + // ignored and an immutable backing buffer skips the store, both reporting success + // per ES2026 10.4.5.9 / the Immutable ArrayBuffers proposal. + Result := True; + if not HasValidElementIndex(AIndex) then + Exit; + if IsTypedArrayBackedByImmutableArrayBuffer(Self) then + Exit; + WriteElementUnchecked(AIndex, AValue); end; function TGocciaTypedArrayValue.ReadBigIntElement(const AIndex: Integer): Int64; diff --git a/tests/built-ins/TypedArray/element-access-unboxed.js b/tests/built-ins/TypedArray/element-access-unboxed.js new file mode 100644 index 000000000..312dbd4da --- /dev/null +++ b/tests/built-ins/TypedArray/element-access-unboxed.js @@ -0,0 +1,167 @@ +// Regression coverage for unboxed typed-array element reads and writes: the +// results must stay observably identical in both execution modes. + +describe("TypedArray unboxed element fast path", () => { + describe("signed zero round-trips", () => { + test("float array preserves -0 read back through an index", () => { + const ta = new Float64Array(1); + ta[0] = -0; + expect(Object.is(ta[0], -0)).toBe(true); + expect(Object.is(ta[0], 0)).toBe(false); + }); + + test("float32 array preserves -0", () => { + const ta = new Float32Array(1); + ta[0] = -0; + expect(Object.is(ta[0], -0)).toBe(true); + }); + + test("integer array normalizes -0 to +0", () => { + const ta = new Int32Array(1); + ta[0] = -0; + expect(Object.is(ta[0], 0)).toBe(true); + expect(Object.is(ta[0], -0)).toBe(false); + }); + }); + + describe("unboxed reads feed comparisons and equality", () => { + test("strict equality against a number literal", () => { + const ta = new Int16Array([-32768, 0, 32767]); + expect(ta[0] === -32768).toBe(true); + expect(ta[1] === 0).toBe(true); + expect(ta[2] === 32767).toBe(true); + expect(ta[0] === 0).toBe(false); + }); + + test("relational comparison of two elements (counting-sort scan shape)", () => { + const ta = new Uint16Array([0, 1, 1, 7, 65535]); + let sorted = true; + let scanned = 0; + // for...of so the workload actually runs (traditional for is opt-in here). + [0, 1, 2, 3].forEach((i) => { + scanned += 1; + if (ta[i] > ta[i + 1]) sorted = false; + }); + expect(scanned).toBe(4); + expect(sorted).toBe(true); + }); + + test("element used directly in arithmetic stays unboxed-correct", () => { + const ta = new Int32Array([10, 20, 30]); + expect(ta[0] + ta[1] + ta[2]).toBe(60); + expect(ta[2] - ta[0]).toBe(20); + }); + + test("float NaN read compares as not-equal to itself", () => { + const ta = new Float64Array(1); + ta[0] = NaN; + expect(ta[0] === ta[0]).toBe(false); + expect(Number.isNaN(ta[0])).toBe(true); + }); + + test("float Infinity read compares correctly", () => { + const ta = new Float32Array([Infinity, -Infinity]); + expect(ta[0] > 0).toBe(true); + expect(ta[1] < 0).toBe(true); + expect(ta[0] === Infinity).toBe(true); + }); + }); + + describe("writes from a variable (register-resident scalar)", () => { + test("integer value held in a let binding", () => { + const ta = new Int8Array(3); + let v = 127; + ta[0] = v; + ta[1] = v - 255; + ta[2] = v + 1; + expect(ta[0]).toBe(127); + expect(ta[1]).toBe(-128); + expect(ta[2]).toBe(-128); + }); + + test("float value held in a let binding", () => { + const ta = new Float64Array(1); + let v = 3.5; + ta[0] = v; + expect(ta[0]).toBe(3.5); + }); + + test("computed index from a variable", () => { + const ta = new Uint16Array(4); + [0, 1, 2, 3].forEach((i) => { + ta[i] = i * 100; + }); + expect(ta[0]).toBe(0); + expect(ta[1]).toBe(100); + expect(ta[3]).toBe(300); + }); + }); + + describe("Float16Array index access", () => { + test("round-trips representable half-precision values", () => { + const ta = new Float16Array(3); + ta[0] = 1.5; + ta[1] = -2; + ta[2] = 0.5; + expect(ta[0]).toBe(1.5); + expect(ta[1]).toBe(-2); + expect(ta[2]).toBe(0.5); + }); + + test("stores and reads -Infinity (counting-sort smallest)", () => { + const ta = new Float16Array(1); + ta[0] = -Infinity; + expect(ta[0]).toBe(-Infinity); + }); + }); + + describe("non-scalar values still coerce via the slow path", () => { + test("boolean value coerces with ToNumber", () => { + const ta = new Int32Array(2); + ta[0] = true; + ta[1] = false; + expect(ta[0]).toBe(1); + expect(ta[1]).toBe(0); + }); + + test("null and undefined coerce with ToNumber", () => { + const intArr = new Int32Array(2); + intArr[0] = null; + intArr[1] = undefined; + expect(intArr[0]).toBe(0); + expect(intArr[1]).toBe(0); + + const floatArr = new Float64Array(1); + floatArr[0] = undefined; + expect(Number.isNaN(floatArr[0])).toBe(true); + }); + + test("object with valueOf coerces with ToNumber", () => { + const ta = new Uint8Array(1); + ta[0] = { valueOf: () => 200 }; + expect(ta[0]).toBe(200); + }); + + test("number value into a BigInt array still throws TypeError", () => { + const ta = new BigInt64Array(1); + expect(() => { ta[0] = 5; }).toThrow(TypeError); + }); + }); + + describe("large array index access (counting-sort scale)", () => { + test("fill, mutate the middle, and read back across a big buffer", () => { + const len = 1 << 16; + const ta = new Int16Array(len); + ta.fill(-32768); + const offset = 10000; + const indices = Array.from({ length: 256 }, (_, i) => i); + indices.forEach((i) => { + ta[offset + i] = i; + }); + expect(ta[0]).toBe(-32768); + expect(ta[offset]).toBe(0); + expect(ta[offset + 255]).toBe(255); + expect(ta[len - 1]).toBe(-32768); + }); + }); +}); From 5bb736ba502d7e2cb1103543b64a073487051845 Mon Sep 17 00:00:00 2001 From: Johannes Stein Date: Sun, 28 Jun 2026 13:45:12 +0100 Subject: [PATCH 2/6] docs(bytecode-vm): note the typed-array unboxed element fast path Document the TryReadIndexedScalar/TryWriteIndexedScalar fast path on the computed-access cores so the bytecode VM doc stays accurate after the typed-array element unboxing change. Co-Authored-By: Claude Opus 4.8 --- docs/bytecode-vm.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/bytecode-vm.md b/docs/bytecode-vm.md index dee0dcbb8..5d4a1edc7 100644 --- a/docs/bytecode-vm.md +++ b/docs/bytecode-vm.md @@ -132,7 +132,7 @@ Hits and fills serve only exact-class `TGocciaObjectValue` / `TGocciaVMLiteralOb Cached pointers (scope, shape) are compared for identity only and never dereferenced. Scope cache entries carry an entry-version stamp against allocator address reuse; shape entries need none, because shapes are never freed within an engine's lifetime, function templates never outlive their engine, and cross-realm maps stop shape tracking before a foreign realm can cache their owner layout. -Computed property access (`OP_ARRAY_GET`/`OP_ARRAY_SET`, `OP_GET_INDEX`/`OP_SET_INDEX`, `OP_DEL_INDEX`) shares one key-classification and receiver-dispatch implementation (`ClassifyPropertyKey` plus the `ExecGet/ExecSet/ExecDeleteComputedProperty` cores in `Goccia.VM.pas`); per-opcode semantic differences are explicit `TGocciaComputedAccessOptions`, not divergent copies. +Computed property access (`OP_ARRAY_GET`/`OP_ARRAY_SET`, `OP_GET_INDEX`/`OP_SET_INDEX`, `OP_DEL_INDEX`) shares one key-classification and receiver-dispatch implementation (`ClassifyPropertyKey` plus the `ExecGet/ExecSet/ExecDeleteComputedProperty` cores in `Goccia.VM.pas`); per-opcode semantic differences are explicit `TGocciaComputedAccessOptions`, not divergent copies. A non-BigInt `TGocciaTypedArrayValue` receiver at an array-index key takes an unboxed element fast path (`TryReadIndexedScalar`/`TryWriteIndexedScalar`): reads move the element straight into a register scalar and numeric-scalar writes store it directly, so neither allocates the heap `TGocciaNumberLiteralValue` or index-name string the generic object branch would. BigInt kinds, non-index keys, non-scalar write values, and out-of-range/detached/immutable cases fall through to the boxed path, preserving all value semantics including the observable `ToNumber` ordering of integer-indexed `[[Set]]`. The current optimization target is reducing bytecode-mode suite time further without diverging interpreter and bytecode semantics. From 4b89bad186bbd520570faeffce9c40185784d2a4 Mon Sep 17 00:00:00 2001 From: Johannes Stein Date: Sun, 28 Jun 2026 17:10:11 +0100 Subject: [PATCH 3/6] docs(adr): reject shared value caches as a runtime optimization Record ADR 0080 capturing why interning/pooling boxed TGocciaValue instances to reduce allocation count does not improve runtime in this FPC codebase, so the C/C++ "fewer allocations => faster" intuition is not imported a fourth time. Generalizes ADR 0013 (reject string interning) to boxed numbers with the #900 spike data: a small-int + Infinity/NaN cache on the bytecode VM RegisterToValue path cut allocations 25% but moved runtime +2.2% (flat-to-worse, interleaved medians). Notes the narrow exceptions that do pay off (SmallInt 0-255, special -value singletons / ADR 0002) and the interleaved-measurement guardrail, and cross-links core-patterns.md. Co-Authored-By: Claude Opus 4.8 --- ...-reject-value-caches-for-allocation-reduction.md | 13 +++++++++++++ docs/adr/README.md | 1 + docs/core-patterns.md | 2 ++ 3 files changed, 16 insertions(+) create mode 100644 docs/adr/0080-reject-value-caches-for-allocation-reduction.md diff --git a/docs/adr/0080-reject-value-caches-for-allocation-reduction.md b/docs/adr/0080-reject-value-caches-for-allocation-reduction.md new file mode 100644 index 000000000..71e213e89 --- /dev/null +++ b/docs/adr/0080-reject-value-caches-for-allocation-reduction.md @@ -0,0 +1,13 @@ +# Reject shared value caches as a runtime optimization + +**Date:** 2026-06-28 +**Area:** `runtime` +**Pull Request:** [#900](https://github.com/frostney/GocciaScript/pull/900) + +Reducing allocation *count* is not, by itself, a runtime lever in this engine, so shared caches of boxed `TGocciaValue` instances — interning or pooling them to avoid allocation — are rejected as a performance optimization. The project has now explored value caching at least three times: the fixed `SmallInt` 0–255 cache (kept, deliberately narrow), dictionary-based string interning (rejected in [ADR 0013](0013-reject-string-interning.md) at −4% across 172 benchmarks), and the boxed-number cache widening described below (rejected). Each confirms the same conclusion; this ADR exists so the C/C++ intuition that "fewer allocations ⇒ faster" is not imported a fourth time. + +Alongside the [#900](https://github.com/frostney/GocciaScript/pull/900) typed-array element unboxing, a lazy, GC-pinned cache of boxed small integers (range −32768..1024) plus `±Infinity`/`NaN` singleton reuse was spiked into the bytecode VM's `RegisterToValue` — the register→`TGocciaValue` boxing site that feeds call arguments. On the `sm/TypedArray/sort_large_countingsort.js` workload it cut heap allocations 4,719,119 → 3,534,333 (**−25%, deterministic**), yet runtime did not move: interleaved medians 6920 ms → 7072 ms (**+2.2%, flat-to-worse**), a fibonacci benchmark +0.6% (noise), and boot time unchanged. FreePascal's allocator plus the mark-and-sweep GC make these short-lived boxed values cheap to create and reclaim, so the cache's per-box branch (range check + array index + nil check) offsets whatever the avoided allocation saved — the same mechanism that made string interning a regression. + +The narrow exceptions already in the codebase remain in force and are **not** superseded: the fixed-size `SmallInt` 0–255 cache used by `RuntimeCopy` on the literal hot path, and the special-value singletons of [ADR 0002](0002-singleton-special-values.md) (`NaN`, `±0`, `±Infinity`, `Zero`/`One`, `true`/`false`, `null`/`undefined`). Those pay off precisely because they are tiny, fixed-array, single-comparison lookups with a very high hit rate on the path they sit on. Widening the range, moving the cache to a different boxing site, or keying by content loses every one of those properties and lands back in allocator-mitigated territory. If boxed-value allocation ever does show up as a *measured* bottleneck, the lever to evaluate is arena/pool allocation that lowers per-object GC cost without a per-box lookup — not a content- or range-keyed value cache. + +Guardrail for any future attempt: measure with **interleaved** before/after binaries (alternate per repetition, compare medians via the runner's `--bare`), never sequential batches. The first, sequential measurement here falsely showed −13% on the test and +63% on a fibonacci bench purely from machine-load drift, which interleaving erased. Allocation count is deterministic and hardware-independent, but it is not, on its own, evidence of a runtime win. [core-patterns.md § String Interning — Attempted and Rejected](../core-patterns.md#string-interning--attempted-and-rejected). [garbage-collector.md](../garbage-collector.md). diff --git a/docs/adr/README.md b/docs/adr/README.md index 5252f9477..077f5753e 100644 --- a/docs/adr/README.md +++ b/docs/adr/README.md @@ -89,3 +89,4 @@ Durable architecture and implementation decisions for GocciaScript. New ADRs use - [0077 — SameValueZero-keyed ordered store for Map and Set](0077-samevaluezero-ordered-collections.md) - [0078 — Thread-local cleanup registry for managed threadvars](0078-thread-local-cleanup-registry.md) - [0079 — Keep speculatively-scanned tokens across parenthesized-group probes](0079-keep-speculatively-scanned-tokens.md) +- [0080 — Reject shared value caches as a runtime optimization](0080-reject-value-caches-for-allocation-reduction.md) diff --git a/docs/core-patterns.md b/docs/core-patterns.md index d8fcb486d..541a62fdc 100644 --- a/docs/core-patterns.md +++ b/docs/core-patterns.md @@ -392,6 +392,8 @@ String interning (caching `TGocciaStringLiteralValue` instances in a `TDictionar **Do not re-attempt** dictionary-based string interning. If string allocation becomes a measurable bottleneck in future profiling, consider instead: (a) pre-allocated singletons for a small fixed set of ultra-common strings (like `SmallInt` but for `"length"`, `"undefined"`, etc.), or (b) arena/pool allocation for `TGocciaStringLiteralValue` objects to reduce per-object GC overhead without per-string hashing. +The same result holds for **boxed numbers**: widening the `SmallInt` cache and reusing `±Infinity`/`NaN` singletons in the bytecode VM's `RegisterToValue` boxing path cut allocations ~25% on an allocation-heavy typed-array test but produced **no runtime improvement** (interleaved median +2.2%). Reducing allocation *count* is not, by itself, a runtime lever in this codebase — see [ADR 0080](adr/0080-reject-value-caches-for-allocation-reduction.md) for the data, the narrow exceptions that do pay off, and the interleaved-measurement guardrail. + ## Related documents - [Architecture](architecture.md) — Pipelines, main layers, design direction, duplication boundaries From 0c18204fe0c714cfc5cb4a0141f066363b41bfdc Mon Sep 17 00:00:00 2001 From: Johannes Stein Date: Sun, 28 Jun 2026 17:18:07 +0100 Subject: [PATCH 4/6] docs: correct the phantom SmallInt 0-255 number cache claim The docs (core-patterns.md, garbage-collector.md) described a "SmallInt cache for 0-255" used by RuntimeCopy as an accepted, working optimization. It never existed: there is no array-of-TGocciaNumberLiteralValue anywhere in git history, and the claim was introduced by a docs-only commit (#302, Apr 2026) with no implementation. The real number value-reuse is RuntimeCopy returning the ADR 0002 special-value singletons (0, 1, NaN, +/-Infinity, -0); all other numbers allocate via Create. Correct both docs and ADR 0080 (which had repeated the phantom claim) to describe only the singletons that actually exist, and note that a spiked 0-255 range cache showed no runtime gain. Surfaced while verifying ADR 0080's "narrow exception". Co-Authored-By: Claude Opus 4.8 --- .../0080-reject-value-caches-for-allocation-reduction.md | 4 ++-- docs/core-patterns.md | 8 ++++---- docs/garbage-collector.md | 4 ++-- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/docs/adr/0080-reject-value-caches-for-allocation-reduction.md b/docs/adr/0080-reject-value-caches-for-allocation-reduction.md index 71e213e89..0571eb3ef 100644 --- a/docs/adr/0080-reject-value-caches-for-allocation-reduction.md +++ b/docs/adr/0080-reject-value-caches-for-allocation-reduction.md @@ -4,10 +4,10 @@ **Area:** `runtime` **Pull Request:** [#900](https://github.com/frostney/GocciaScript/pull/900) -Reducing allocation *count* is not, by itself, a runtime lever in this engine, so shared caches of boxed `TGocciaValue` instances — interning or pooling them to avoid allocation — are rejected as a performance optimization. The project has now explored value caching at least three times: the fixed `SmallInt` 0–255 cache (kept, deliberately narrow), dictionary-based string interning (rejected in [ADR 0013](0013-reject-string-interning.md) at −4% across 172 benchmarks), and the boxed-number cache widening described below (rejected). Each confirms the same conclusion; this ADR exists so the C/C++ intuition that "fewer allocations ⇒ faster" is not imported a fourth time. +Reducing allocation *count* is not, by itself, a runtime lever in this engine, so shared caches of boxed `TGocciaValue` instances — interning or pooling them to avoid allocation — are rejected as a performance optimization. The only value reuse the engine actually has is the handful of special-value singletons returned by `RuntimeCopy` and the register-boxing paths (`0`, `1`, `NaN`, `±Infinity`, `-0`; see [ADR 0002](0002-singleton-special-values.md)). Every attempt to add caching *beyond* that fixed set has been measured and rejected: dictionary-based string interning ([ADR 0013](0013-reject-string-interning.md), −4% across 172 benchmarks) and the boxed-number range cache described below. A `SmallInt` 0–255 cache that earlier docs described as if implemented never actually existed in the source (corrected alongside this ADR) — itself a sign of how readily the C/C++ "fewer allocations ⇒ faster" intuition takes hold. This ADR exists so it is not imported again. Alongside the [#900](https://github.com/frostney/GocciaScript/pull/900) typed-array element unboxing, a lazy, GC-pinned cache of boxed small integers (range −32768..1024) plus `±Infinity`/`NaN` singleton reuse was spiked into the bytecode VM's `RegisterToValue` — the register→`TGocciaValue` boxing site that feeds call arguments. On the `sm/TypedArray/sort_large_countingsort.js` workload it cut heap allocations 4,719,119 → 3,534,333 (**−25%, deterministic**), yet runtime did not move: interleaved medians 6920 ms → 7072 ms (**+2.2%, flat-to-worse**), a fibonacci benchmark +0.6% (noise), and boot time unchanged. FreePascal's allocator plus the mark-and-sweep GC make these short-lived boxed values cheap to create and reclaim, so the cache's per-box branch (range check + array index + nil check) offsets whatever the avoided allocation saved — the same mechanism that made string interning a regression. -The narrow exceptions already in the codebase remain in force and are **not** superseded: the fixed-size `SmallInt` 0–255 cache used by `RuntimeCopy` on the literal hot path, and the special-value singletons of [ADR 0002](0002-singleton-special-values.md) (`NaN`, `±0`, `±Infinity`, `Zero`/`One`, `true`/`false`, `null`/`undefined`). Those pay off precisely because they are tiny, fixed-array, single-comparison lookups with a very high hit rate on the path they sit on. Widening the range, moving the cache to a different boxing site, or keying by content loses every one of those properties and lands back in allocator-mitigated territory. If boxed-value allocation ever does show up as a *measured* bottleneck, the lever to evaluate is arena/pool allocation that lowers per-object GC cost without a per-box lookup — not a content- or range-keyed value cache. +The one form of value reuse that does pay off — and is **not** superseded — is the special-value singleton set of [ADR 0002](0002-singleton-special-values.md) (`0`, `1`, `NaN`, `±0`, `±Infinity`, plus `true`/`false`, `null`/`undefined`). It works precisely because it is a tiny, fixed set matched by direct comparison with a very high hit rate on the path it sits on — not an array, not a range, not content-keyed. Widening it to an integer range, moving the reuse to a different boxing site, or keying by content loses every one of those properties and lands back in allocator-mitigated territory. If boxed-value allocation ever does show up as a *measured* bottleneck, the lever to evaluate is arena/pool allocation that lowers per-object GC cost without a per-box lookup — not a content- or range-keyed value cache. Guardrail for any future attempt: measure with **interleaved** before/after binaries (alternate per repetition, compare medians via the runner's `--bare`), never sequential batches. The first, sequential measurement here falsely showed −13% on the test and +63% on a fibonacci bench purely from machine-load drift, which interleaving erased. Allocation count is deterministic and hardware-independent, but it is not, on its own, evidence of a runtime win. [core-patterns.md § String Interning — Attempted and Rejected](../core-patterns.md#string-interning--attempted-and-rejected). [garbage-collector.md](../garbage-collector.md). diff --git a/docs/core-patterns.md b/docs/core-patterns.md index 541a62fdc..51783080d 100644 --- a/docs/core-patterns.md +++ b/docs/core-patterns.md @@ -386,13 +386,13 @@ String interning (caching `TGocciaStringLiteralValue` instances in a `TDictionar - **Dictionary lookup cost exceeds allocation cost.** FreePascal's allocator is fast. A `TDictionary.TryGetValue` call involves hashing the string (O(n) in string length) plus a hash-table probe, which is more expensive than simply allocating a short-lived `TGocciaStringLiteralValue` and letting the GC reclaim it later. - **Low hit rate on hot paths.** `ToStringLiteral` on numbers produces mostly unique strings (`"42"`, `"3.14"`, etc.) that never hit the cache, paying the hash cost with zero benefit. This path is called frequently in arithmetic-heavy benchmarks. - **`RuntimeCopy` is the wrong interception point.** Every string literal evaluation goes through `RuntimeCopy`. Adding a dictionary lookup to this universal hot path penalizes all string operations, including those that create one-off strings (concatenation results, method return values). -- **GC pressure is not the bottleneck.** The SmallInt cache works for numbers because integer equality is a single comparison. String equality requires content comparison, so the lookup cost scales with string length rather than being O(1). +- **GC pressure is not the bottleneck.** The number special-value singletons work because the check is a single equality against a fixed set. String equality requires content comparison, so the lookup cost scales with string length rather than being O(1). -**The `SmallInt` cache works because:** integer comparison is a single machine instruction, the cache is a fixed-size array (no hashing), and the hit rate for integers 0–255 is very high in typical code. None of these properties hold for arbitrary strings. +**The number special-value singletons work because:** they are a tiny fixed set (`0`, `1`, `NaN`, `±Infinity`, `-0`) matched by direct comparison in `RuntimeCopy` — no hashing, no array, no range — with a high hit rate in typical code. There is **no** general small-integer (e.g. 0–255) range cache: earlier revisions of this doc and `garbage-collector.md` described one, but it was never implemented, and a spike that added it (plus `±Infinity`/`NaN` reuse on the VM boxing path) measured **no runtime gain** — see the boxed-numbers note below. None of the singletons' properties hold for arbitrary strings. -**Do not re-attempt** dictionary-based string interning. If string allocation becomes a measurable bottleneck in future profiling, consider instead: (a) pre-allocated singletons for a small fixed set of ultra-common strings (like `SmallInt` but for `"length"`, `"undefined"`, etc.), or (b) arena/pool allocation for `TGocciaStringLiteralValue` objects to reduce per-object GC overhead without per-string hashing. +**Do not re-attempt** dictionary-based string interning. If string allocation becomes a measurable bottleneck in future profiling, consider instead: (a) pre-allocated singletons for a small fixed set of ultra-common strings (like the number special-value singletons but for `"length"`, `"undefined"`, etc.), or (b) arena/pool allocation for `TGocciaStringLiteralValue` objects to reduce per-object GC overhead without per-string hashing. -The same result holds for **boxed numbers**: widening the `SmallInt` cache and reusing `±Infinity`/`NaN` singletons in the bytecode VM's `RegisterToValue` boxing path cut allocations ~25% on an allocation-heavy typed-array test but produced **no runtime improvement** (interleaved median +2.2%). Reducing allocation *count* is not, by itself, a runtime lever in this codebase — see [ADR 0080](adr/0080-reject-value-caches-for-allocation-reduction.md) for the data, the narrow exceptions that do pay off, and the interleaved-measurement guardrail. +The same result holds for **boxed numbers**: adding a small-integer range cache and reusing `±Infinity`/`NaN` singletons in the bytecode VM's `RegisterToValue` boxing path cut allocations ~25% on an allocation-heavy typed-array test but produced **no runtime improvement** (interleaved median +2.2%). Reducing allocation *count* is not, by itself, a runtime lever in this codebase — see [ADR 0080](adr/0080-reject-value-caches-for-allocation-reduction.md) for the data, the narrow exceptions that do pay off, and the interleaved-measurement guardrail. ## Related documents diff --git a/docs/garbage-collector.md b/docs/garbage-collector.md index ac57047df..79d139ec8 100644 --- a/docs/garbage-collector.md +++ b/docs/garbage-collector.md @@ -44,7 +44,7 @@ end; - **`AfterConstruction` / `BeforeDestruction`** — Every value auto-registers with the thread-local `TGarbageCollector.Instance` upon creation and unregisters before destruction so root sets cannot retain stale object pointers. - **`MarkReferences`** — Base implementation sets `FGCMark := GCCurrentMark` (marking the object as alive for the current collection). `AdvanceMark` increments the shared `GCCurrentMark` while the collector lock is held, and `TGarbageCollector.Instance` uses that mark while traversing objects. Subclasses override `MarkReferences` to also mark values they reference (e.g., `TGocciaObjectValue` marks its prototype and property values, `TGocciaFunctionValue` marks its closure scope, `TGocciaArrayValue` marks its elements). The `if GCMarked then Exit;` guard at the top of each override prevents re-visiting objects in cyclic reference graphs. - **`TraceWeakReferences` / `SweepWeakReferences`** — Optional hooks for weak containers and weak references. The default implementations do nothing. WeakMap uses `TraceWeakReferences` as an ephemeron pass: if a key is already marked by normal roots, its value is marked, but the key is never marked by the map. WeakMap and WeakSet use `SweepWeakReferences` to remove entries whose keys/values remain unmarked. WeakRef clears an unmarked target, and FinalizationRegistry removes dead cells while enqueueing cleanup jobs for their held values. -- **`RuntimeCopy`** — Creates a fresh GC-managed copy of the value. Used by the evaluator when evaluating literal expressions: AST-owned literal values are not tracked by the GC, so `RuntimeCopy` produces a runtime value that is. The default implementation returns `Self` (for singletons and complex values). Primitives override this: numbers use the `SmallInt` cache for 0-255, booleans return singletons, strings create new instances (cheap due to copy-on-write). +- **`RuntimeCopy`** — Creates a fresh GC-managed copy of the value. Used by the evaluator when evaluating literal expressions: AST-owned literal values are not tracked by the GC, so `RuntimeCopy` produces a runtime value that is. The default implementation returns `Self` (for singletons and complex values). Primitives override this: numbers reuse the special-value singletons (`0`, `1`, `NaN`, `±Infinity`, `-0`) and otherwise create a fresh instance, booleans return singletons, strings create new instances (cheap due to copy-on-write). ## Contributor Rules @@ -151,7 +151,7 @@ The separate `memory.heap` JSON object comes from FreePascal's `GetHeapStatus`, The parser creates `TGocciaValue` instances (numbers, strings, booleans) and stores them inside `TGocciaLiteralExpression` AST nodes. These values are owned by the AST, not the GC. `TGocciaLiteralExpression.Create` calls `TGarbageCollector.Instance.UnregisterObject` to remove the value from GC tracking, and `TGocciaLiteralExpression.Destroy` frees the value (unless it is a singleton like `UndefinedValue`, `TrueValue`, or `FalseValue`). -When the evaluator encounters a literal expression, it calls `Value.RuntimeCopy` to produce a fresh GC-managed runtime value. This cleanly separates compile-time constants (owned by the AST) from runtime values (managed by the GC). The overhead is minimal: integers 0-255 hit the `SmallInt` cache (zero allocation), booleans return singletons, and strings benefit from FreePascal's copy-on-write semantics. +When the evaluator encounters a literal expression, it calls `Value.RuntimeCopy` to produce a fresh GC-managed runtime value. This cleanly separates compile-time constants (owned by the AST) from runtime values (managed by the GC). The overhead is minimal: `0`, `1`, and the special values (`NaN`, `±Infinity`, `-0`) reuse singletons (zero allocation), other numbers allocate cheaply, booleans return singletons, and strings benefit from FreePascal's copy-on-write semantics. ## Related Documents From 3f4b371571ac7d3826ae66e6178af3012a4888a1 Mon Sep 17 00:00:00 2001 From: Johannes Stein Date: Sun, 28 Jun 2026 17:45:17 +0100 Subject: [PATCH 5/6] docs(adr): quantify the special-value singleton cache effect in ADR 0080 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Measured both sides of the boundary so the "kept exception" is data-backed, not asserted: disabling the singleton reuse costs +786k allocations and ~1.4-1.7% on the allocation-heavy counting-sort test (within noise on typical integer code), while widening it to a small-integer range removed more allocations for no runtime gain (+2.2%). Even the kept cache barely moves runtime — it is retained because it is free, not because it is a meaningful speedup. Co-Authored-By: Claude Opus 4.8 --- docs/adr/0080-reject-value-caches-for-allocation-reduction.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/adr/0080-reject-value-caches-for-allocation-reduction.md b/docs/adr/0080-reject-value-caches-for-allocation-reduction.md index 0571eb3ef..d9e51398e 100644 --- a/docs/adr/0080-reject-value-caches-for-allocation-reduction.md +++ b/docs/adr/0080-reject-value-caches-for-allocation-reduction.md @@ -8,6 +8,6 @@ Reducing allocation *count* is not, by itself, a runtime lever in this engine, s Alongside the [#900](https://github.com/frostney/GocciaScript/pull/900) typed-array element unboxing, a lazy, GC-pinned cache of boxed small integers (range −32768..1024) plus `±Infinity`/`NaN` singleton reuse was spiked into the bytecode VM's `RegisterToValue` — the register→`TGocciaValue` boxing site that feeds call arguments. On the `sm/TypedArray/sort_large_countingsort.js` workload it cut heap allocations 4,719,119 → 3,534,333 (**−25%, deterministic**), yet runtime did not move: interleaved medians 6920 ms → 7072 ms (**+2.2%, flat-to-worse**), a fibonacci benchmark +0.6% (noise), and boot time unchanged. FreePascal's allocator plus the mark-and-sweep GC make these short-lived boxed values cheap to create and reclaim, so the cache's per-box branch (range check + array index + nil check) offsets whatever the avoided allocation saved — the same mechanism that made string interning a regression. -The one form of value reuse that does pay off — and is **not** superseded — is the special-value singleton set of [ADR 0002](0002-singleton-special-values.md) (`0`, `1`, `NaN`, `±0`, `±Infinity`, plus `true`/`false`, `null`/`undefined`). It works precisely because it is a tiny, fixed set matched by direct comparison with a very high hit rate on the path it sits on — not an array, not a range, not content-keyed. Widening it to an integer range, moving the reuse to a different boxing site, or keying by content loses every one of those properties and lands back in allocator-mitigated territory. If boxed-value allocation ever does show up as a *measured* bottleneck, the lever to evaluate is arena/pool allocation that lowers per-object GC cost without a per-box lookup — not a content- or range-keyed value cache. +The one form of value reuse worth keeping — and **not** superseded — is the special-value singleton set of [ADR 0002](0002-singleton-special-values.md) (`0`, `1`, `NaN`, `±0`, `±Infinity`, plus `true`/`false`, `null`/`undefined`), reused by `RuntimeCopy` and `RegisterToValue`. It is a tiny, fixed set matched by direct comparison with a high hit rate on the path it sits on — not an array, not a range, not content-keyed. The boundary was measured on both sides: *disabling* the singleton reuse (always allocating) costs +786k allocations and only ~1.4–1.7% on the allocation-heavy `sort_large_countingsort.js` test, within noise on typical integer code — a small, essentially free win; *widening* it to a small-integer range (the spike above) removed more allocations (−1.18M) for no runtime gain (+2.2%). So even the kept cache barely moves runtime, and everything past the narrow fixed set is pure cost — the singleton set is the measured sweet spot, kept because it is free rather than because it is a meaningful speedup. If boxed-value allocation ever shows up as a *measured* bottleneck, the lever to evaluate is arena/pool allocation that lowers per-object GC cost without a per-box lookup — not a content- or range-keyed value cache. Guardrail for any future attempt: measure with **interleaved** before/after binaries (alternate per repetition, compare medians via the runner's `--bare`), never sequential batches. The first, sequential measurement here falsely showed −13% on the test and +63% on a fibonacci bench purely from machine-load drift, which interleaving erased. Allocation count is deterministic and hardware-independent, but it is not, on its own, evidence of a runtime win. [core-patterns.md § String Interning — Attempted and Rejected](../core-patterns.md#string-interning--attempted-and-rejected). [garbage-collector.md](../garbage-collector.md). From f718c41099ddbcda78ff85a3dd030e4bd8f3ff47 Mon Sep 17 00:00:00 2001 From: Johannes Stein Date: Sun, 28 Jun 2026 17:58:07 +0100 Subject: [PATCH 6/6] docs(bytecode-vm): split typed-array read/write fallback rules Address PR review (coderabbitai): the computed-access note conflated the read and write fast-path fallbacks. Reads fall through to the boxed path for BigInt kinds, non-index keys, and out-of-range/detached indices; non-BigInt scalar writes are handled in place even for out-of-range or immutable cases (store skipped, success reported) and only fall through for BigInt kinds, non-index keys, or non-scalar values. Wording now matches the TryReadIndexedScalar/TryWriteIndexedScalar contracts. Co-Authored-By: Claude Opus 4.8 --- docs/bytecode-vm.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/bytecode-vm.md b/docs/bytecode-vm.md index 5d4a1edc7..7d4445cb4 100644 --- a/docs/bytecode-vm.md +++ b/docs/bytecode-vm.md @@ -132,7 +132,7 @@ Hits and fills serve only exact-class `TGocciaObjectValue` / `TGocciaVMLiteralOb Cached pointers (scope, shape) are compared for identity only and never dereferenced. Scope cache entries carry an entry-version stamp against allocator address reuse; shape entries need none, because shapes are never freed within an engine's lifetime, function templates never outlive their engine, and cross-realm maps stop shape tracking before a foreign realm can cache their owner layout. -Computed property access (`OP_ARRAY_GET`/`OP_ARRAY_SET`, `OP_GET_INDEX`/`OP_SET_INDEX`, `OP_DEL_INDEX`) shares one key-classification and receiver-dispatch implementation (`ClassifyPropertyKey` plus the `ExecGet/ExecSet/ExecDeleteComputedProperty` cores in `Goccia.VM.pas`); per-opcode semantic differences are explicit `TGocciaComputedAccessOptions`, not divergent copies. A non-BigInt `TGocciaTypedArrayValue` receiver at an array-index key takes an unboxed element fast path (`TryReadIndexedScalar`/`TryWriteIndexedScalar`): reads move the element straight into a register scalar and numeric-scalar writes store it directly, so neither allocates the heap `TGocciaNumberLiteralValue` or index-name string the generic object branch would. BigInt kinds, non-index keys, non-scalar write values, and out-of-range/detached/immutable cases fall through to the boxed path, preserving all value semantics including the observable `ToNumber` ordering of integer-indexed `[[Set]]`. +Computed property access (`OP_ARRAY_GET`/`OP_ARRAY_SET`, `OP_GET_INDEX`/`OP_SET_INDEX`, `OP_DEL_INDEX`) shares one key-classification and receiver-dispatch implementation (`ClassifyPropertyKey` plus the `ExecGet/ExecSet/ExecDeleteComputedProperty` cores in `Goccia.VM.pas`); per-opcode semantic differences are explicit `TGocciaComputedAccessOptions`, not divergent copies. A non-BigInt `TGocciaTypedArrayValue` receiver at an array-index key takes an unboxed element fast path (`TryReadIndexedScalar`/`TryWriteIndexedScalar`): reads move the element straight into a register scalar and numeric-scalar writes store it directly, so neither allocates the heap `TGocciaNumberLiteralValue` or index-name string the generic object branch would. BigInt kinds, non-index keys, and non-scalar write values fall through to the boxed path; an out-of-range or detached **read** does too (yielding `undefined`). A non-BigInt scalar **write**, however, keeps its integer-indexed exotic semantics in place even for an out-of-range index or immutable backing buffer — the store is skipped and reported as successful, never boxed. All value semantics are preserved, including the observable `ToNumber` ordering of integer-indexed `[[Set]]`. The current optimization target is reducing bytecode-mode suite time further without diverging interpreter and bytecode semantics.