From de8af6920d13de5b16b769057280ee0a5ae4fbc6 Mon Sep 17 00:00:00 2001
From: Johannes Stein <johannesstein@freeze-dev.com>
Date: Sun, 28 Jun 2026 13:43:02 +0100
Subject: [PATCH 1/6] perf(vm): unbox typed-array element reads and writes on
 the computed-access path

Typed-array element access in the bytecode VM fell through to the generic
TGocciaObjectValue computed-access branch, allocating an IntToStr index name plus
a heap TGocciaNumberLiteralValue on every read, and boxing the scalar via
RegisterToValue on every write. Arithmetic and comparisons were already 100%
scalar (ADR 0001/0005), so these boundary boxings dominated allocation-heavy
typed-array workloads (issue #800).

Add unboxed fast paths to ExecGetComputedProperty / ExecSetComputedProperty for
TGocciaTypedArrayValue receivers at array-index keys: reads go straight into a
register scalar via the new RegisterFromDouble; numeric-scalar writes store
directly (ToNumber on a Number is side-effect-free, so the spec's observable
conversion is preserved). BigInt kinds, non-index keys, non-scalar values, and
out-of-range / detached / immutable cases fall through to the unchanged boxed
path, so all value semantics are preserved.

Dedup the element store while here: integer NaN/Infinity coercion now lives only
in WriteBinaryNumberElement, and the element read/write paths single-validate via
ReadElementUnchecked / WriteElementUnchecked.

sort_large_countingsort.js (prod, bytecode): allocations 7,471,627 -> 4,719,119
(-36.8%); x86_64 jobs=4 at the 20s deadline 14.1s -> 10.4s. typed-arrays.js
element access: read +57%, write +35%, Float64 write +26%. No regressions: full
JS suite 11,009/11,009 in both modes; test262 staging, TypedArray, Array, and
DataView identical before/after.

Closes #800

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
---
 source/units/Goccia.VM.Registers.pas          |  25 +++
 source/units/Goccia.VM.pas                    |  30 +++-
 .../units/Goccia.Values.TypedArrayValue.pas   |  98 ++++++----
 .../TypedArray/element-access-unboxed.js      | 167 ++++++++++++++++++
 4 files changed, 283 insertions(+), 37 deletions(-)
 create mode 100644 tests/built-ins/TypedArray/element-access-unboxed.js

diff --git a/source/units/Goccia.VM.Registers.pas b/source/units/Goccia.VM.Registers.pas
index 2b5baacfa..460382d68 100644
--- a/source/units/Goccia.VM.Registers.pas
+++ b/source/units/Goccia.VM.Registers.pas
@@ -40,6 +40,7 @@ function RegisterHole: TGocciaRegister; inline;
 function RegisterBoolean(const AValue: Boolean): TGocciaRegister; inline;
 function RegisterInt(const AValue: Int64): TGocciaRegister; inline;
 function RegisterFloat(const AValue: Double): TGocciaRegister; inline;
+function RegisterFromDouble(const AValue: Double): TGocciaRegister; inline;
 function RegisterObject(const AValue: TGocciaValue): TGocciaRegister; inline;
 function ValueToRegister(const AValue: TGocciaValue): TGocciaRegister; inline;
 function RegisterToValue(const ARegister: TGocciaRegister): TGocciaValue; inline;
@@ -83,6 +84,30 @@ function RegisterFloat(const AValue: Double): TGocciaRegister; inline;
   Result.FloatValue := AValue;
 end;
 
+function RegisterFromDouble(const AValue: Double): TGocciaRegister; inline;
+var
+  Bits: Int64 absolute AValue;
+begin
+  // Build a register directly from a raw Double without ever allocating a heap
+  // TGocciaNumberLiteralValue. Mirrors the number branch of VMValueToRegisterFast:
+  // exact integers in LongInt range become grkInt (so downstream scalar opcodes and
+  // the Zero/One singletons engage on later boxing), and -0.0 stays float to keep
+  // its sign bit. NaN/Infinity/non-integers stay float.
+  if AValue = 0.0 then
+  begin
+    if Bits < 0 then
+      Exit(RegisterFloat(AValue)); // -0.0: preserve the sign bit as a float
+    Exit(RegisterInt(0));
+  end;
+  if AValue = 1.0 then
+    Exit(RegisterInt(1));
+  if (not IsNaN(AValue)) and (not IsInfinite(AValue)) and
+     (Frac(AValue) = 0.0) and
+     (AValue >= Low(LongInt)) and (AValue <= High(LongInt)) then
+    Exit(RegisterInt(Trunc(AValue)));
+  Result := RegisterFloat(AValue);
+end;
+
 function RegisterObject(const AValue: TGocciaValue): TGocciaRegister; inline;
 begin
   Result.Kind := grkObject;
diff --git a/source/units/Goccia.VM.pas b/source/units/Goccia.VM.pas
index b05e37f22..0f72bc50f 100644
--- a/source/units/Goccia.VM.pas
+++ b/source/units/Goccia.VM.pas
@@ -521,7 +521,8 @@ implementation
   Goccia.Values.ProxyValue,
   Goccia.Values.Shape,
   Goccia.Values.ToObject,
-  Goccia.Values.ToPrimitive;
+  Goccia.Values.ToPrimitive,
+  Goccia.Values.TypedArrayValue;
 
 const
   BYTECODE_PRIVATE_SLOT_PREFIX = '#slot:';
@@ -7656,11 +7657,24 @@ procedure TGocciaVM.ExecGetComputedProperty(const ADest: Integer;
   Key: TGocciaPropertyKey;
   KeyName: string;
   ReceiverArray: TGocciaArrayValue;
+  FastIndex: Integer;
+  FastElement: Double;
 begin
   if (caoThrowOnNullUndefined in AOptions) and
      (AObjReg.Kind in [grkUndefined, grkNull]) then
     ThrowTypeError(SErrorCannotConvertNullOrUndefined,
       SSuggestCheckNullBeforeAccess)
+  else if (AObjReg.Kind = grkObject) and
+          (AObjReg.ObjectValue is TGocciaTypedArrayValue) and
+          TryGetArrayIndexRegister(AKeyReg, FastIndex) and
+          TGocciaTypedArrayValue(AObjReg.ObjectValue)
+            .TryReadIndexedScalar(FastIndex, FastElement) then
+    // Typed-array unboxed element read: the element goes straight into the
+    // destination register as a scalar, with no heap TGocciaNumberLiteralValue and
+    // no IntToStr index name. Non-index keys, BigInt kinds, and out-of-range indices
+    // fall through to the generic object branch below, which handles length, methods,
+    // `undefined` for out-of-range reads, BigInt boxing, and symbol keys unchanged.
+    FRegisters[ADest] := RegisterFromDouble(FastElement)
   else if (AObjReg.Kind = grkObject) and
           (AObjReg.ObjectValue is TGocciaArrayValue) then
   begin
@@ -7752,7 +7766,21 @@ procedure TGocciaVM.ExecSetComputedProperty(const ATargetIndex: Integer;
   Value: TGocciaValue;
   TargetValue: TGocciaValue;
   BoxedTarget: TGocciaObjectValue;
+  FastIndex: Integer;
 begin
+  // Typed-array unboxed element write: a numeric-scalar value going to a valid
+  // integer index stores directly, with no heap TGocciaNumberLiteralValue and no
+  // IntToStr index name. ToNumber on a Number is side-effect-free, so the spec's
+  // observable conversion is preserved. BigInt kinds (a Number value must throw),
+  // non-index keys, and non-scalar values fall through to the boxed path below.
+  if (FRegisters[ATargetIndex].Kind = grkObject) and
+     (FRegisters[ATargetIndex].ObjectValue is TGocciaTypedArrayValue) and
+     RegisterIsNumericScalar(AValueReg) and
+     TryGetArrayIndexRegister(AKeyReg, FastIndex) and
+     TGocciaTypedArrayValue(FRegisters[ATargetIndex].ObjectValue)
+       .TryWriteIndexedScalar(FastIndex, RegisterToDouble(AValueReg)) then
+    Exit;
+
   Value := RegisterToValue(AValueReg);
   if (FRegisters[ATargetIndex].Kind = grkObject) and
      (FRegisters[ATargetIndex].ObjectValue is TGocciaArrayValue) then
diff --git a/source/units/Goccia.Values.TypedArrayValue.pas b/source/units/Goccia.Values.TypedArrayValue.pas
index f74f1dfdc..ea2b552b1 100644
--- a/source/units/Goccia.Values.TypedArrayValue.pas
+++ b/source/units/Goccia.Values.TypedArrayValue.pas
@@ -44,7 +44,9 @@   TGocciaTypedArrayValue = class(TGocciaInstanceValue)
     function HasValidBackingRange(const ALength: Integer): Boolean;
     function HasValidElementIndex(const AIndex: Integer): Boolean;
 
+    function ReadElementUnchecked(const AIndex: Integer): Double;
     function ReadElement(const AIndex: Integer): Double;
+    procedure WriteElementUnchecked(const AIndex: Integer; const AValue: Double);
     procedure WriteElement(const AIndex: Integer; const AValue: Double);
     procedure WriteNumberLiteral(const AIndex: Integer; const ANum: TGocciaNumberLiteralValue);
 
@@ -98,6 +100,18 @@   TGocciaTypedArrayValue = class(TGocciaInstanceValue)
     property ByteOffset: Integer read FByteOffset;
     property Length: Integer read GetLength;
     property Kind: TGocciaTypedArrayKind read FKind;
+
+    // Boxing-free element fast paths for the bytecode VM computed-access cores.
+    // TryReadIndexedScalar yields the element as a raw Double for non-BigInt kinds
+    // and a valid in-range index; it returns False (caller falls back to GetProperty)
+    // for BigInt kinds and out-of-range indices. TryWriteIndexedScalar stores an
+    // already-numeric scalar value (ToNumber on a Number is side-effect-free, so the
+    // observable conversion the spec requires is preserved) with the same coercion as
+    // WriteNumberLiteral; it returns False for BigInt kinds so the caller takes the
+    // throwing slow path, and True (handled) for non-BigInt kinds whether or not the
+    // index is in range or the backing buffer is immutable.
+    function TryReadIndexedScalar(const AIndex: Integer; out AValue: Double): Boolean;
+    function TryWriteIndexedScalar(const AIndex: Integer; const AValue: Double): Boolean;
   published
     function TypedArrayAt(const AArgs: TGocciaArgumentsCollection; const AThisValue: TGocciaValue): TGocciaValue;
     function TypedArrayFill(const AArgs: TGocciaArgumentsCollection; const AThisValue: TGocciaValue): TGocciaValue;
@@ -401,66 +415,78 @@ function TGocciaTypedArrayValue.HasValidElementIndex(const AIndex: Integer): Boo
 
 { Element read/write via buffer }
 
-function TGocciaTypedArrayValue.ReadElement(const AIndex: Integer): Double;
+function TGocciaTypedArrayValue.ReadElementUnchecked(const AIndex: Integer): Double;
 var
   Offset: Integer;
 begin
-  if not HasValidElementIndex(AIndex) then
-    Exit(0);
-
+  // Precondition: AIndex is in range (the caller validated HasValidElementIndex).
+  // One sync + read, with no redundant bounds re-check on the hot element path.
   SyncBufferData;
   Offset := FByteOffset + AIndex * BytesPerElement(FKind);
   Result := ReadBinaryNumberElement(FBufferData, Offset,
     ToBinaryElementKind(FKind), TYPED_ARRAY_LITTLE_ENDIAN);
 end;
 
-procedure TGocciaTypedArrayValue.WriteElement(const AIndex: Integer; const AValue: Double);
-var
-  Offset: Integer;
+function TGocciaTypedArrayValue.ReadElement(const AIndex: Integer): Double;
 begin
   if not HasValidElementIndex(AIndex) then
-    Exit;
+    Exit(0);
+  Result := ReadElementUnchecked(AIndex);
+end;
 
+procedure TGocciaTypedArrayValue.WriteElementUnchecked(const AIndex: Integer; const AValue: Double);
+var
+  Offset: Integer;
+begin
+  // Precondition: AIndex is in range (the caller validated HasValidElementIndex).
+  // Integer coercion of the ToNumber result — non-finite -> 0 for integer kinds,
+  // Uint8Clamped clamping +Infinity to 255, float kinds verbatim — is performed by
+  // WriteBinaryNumberElement, so it is not repeated here. One sync + write.
   SyncBufferData;
   Offset := FByteOffset + AIndex * BytesPerElement(FKind);
   WriteBinaryNumberElement(FBufferData, Offset, ToBinaryElementKind(FKind),
     AValue, TYPED_ARRAY_LITTLE_ENDIAN);
 end;
 
+procedure TGocciaTypedArrayValue.WriteElement(const AIndex: Integer; const AValue: Double);
+begin
+  if not HasValidElementIndex(AIndex) then
+    Exit;
+  WriteElementUnchecked(AIndex, AValue);
+end;
+
 procedure TGocciaTypedArrayValue.WriteNumberLiteral(const AIndex: Integer; const ANum: TGocciaNumberLiteralValue);
-var
-  Offset: Integer;
-  ToWrite: Double;
 begin
   if not HasValidElementIndex(AIndex) then
     Exit;
+  WriteElementUnchecked(AIndex, ANum.Value);
+end;
 
-  // Map the coerced ToNumber result to the value SetValueInBuffer stores: float
-  // kinds keep the value (including NaN/+/-Infinity) verbatim, while integer
-  // kinds store 0 for any non-finite input, except Uint8Clamped which clamps
-  // +Infinity to 255. Selecting the value first lets the index validation, the
-  // backing-store sync, and the byte-offset computation run exactly once per
-  // store instead of being repeated by a nested WriteElement re-dispatch.
-  if IsFloatKind(FKind) then
-    ToWrite := ANum.Value
-  else if ANum.IsNaN then
-    ToWrite := 0
-  else if ANum.IsInfinity then
-  begin
-    if FKind = takUint8Clamped then
-      ToWrite := 255
-    else
-      ToWrite := 0;
-  end
-  else if ANum.IsNegativeInfinity then
-    ToWrite := 0
-  else
-    ToWrite := ANum.Value;
+function TGocciaTypedArrayValue.TryReadIndexedScalar(const AIndex: Integer; out AValue: Double): Boolean;
+begin
+  // BigInt kinds yield TGocciaBigIntValue, never a Double, so they fall back to the
+  // boxed path; an out-of-range index falls back so the caller yields `undefined`.
+  if IsBigIntKind(FKind) or (not HasValidElementIndex(AIndex)) then
+    Exit(False);
+  AValue := ReadElementUnchecked(AIndex);
+  Result := True;
+end;
 
-  SyncBufferData;
-  Offset := FByteOffset + AIndex * BytesPerElement(FKind);
-  WriteBinaryNumberElement(FBufferData, Offset, ToBinaryElementKind(FKind),
-    ToWrite, TYPED_ARRAY_LITTLE_ENDIAN);
+function TGocciaTypedArrayValue.TryWriteIndexedScalar(const AIndex: Integer; const AValue: Double): Boolean;
+begin
+  // A Number value into a BigInt typed array must throw (ToBigInt(Number) throws), so
+  // signal not-handled and let the caller take the boxed, throwing slow path.
+  if IsBigIntKind(FKind) then
+    Exit(False);
+  // Non-BigInt integer-indexed [[Set]] is always "handled": an out-of-range index is
+  // ignored and an immutable backing buffer skips the store, both reporting success
+  // per ES2026 10.4.5.9 / the Immutable ArrayBuffers proposal.
+  Result := True;
+  if not HasValidElementIndex(AIndex) then
+    Exit;
+  if IsTypedArrayBackedByImmutableArrayBuffer(Self) then
+    Exit;
+  WriteElementUnchecked(AIndex, AValue);
 end;
 
 function TGocciaTypedArrayValue.ReadBigIntElement(const AIndex: Integer): Int64;
diff --git a/tests/built-ins/TypedArray/element-access-unboxed.js b/tests/built-ins/TypedArray/element-access-unboxed.js
new file mode 100644
index 000000000..312dbd4da
--- /dev/null
+++ b/tests/built-ins/TypedArray/element-access-unboxed.js
@@ -0,0 +1,167 @@
+// Regression coverage for unboxed typed-array element reads and writes: the
+// results must stay observably identical in both execution modes.
+
+describe("TypedArray unboxed element fast path", () => {
+  describe("signed zero round-trips", () => {
+    test("float array preserves -0 read back through an index", () => {
+      const ta = new Float64Array(1);
+      ta[0] = -0;
+      expect(Object.is(ta[0], -0)).toBe(true);
+      expect(Object.is(ta[0], 0)).toBe(false);
+    });
+
+    test("float32 array preserves -0", () => {
+      const ta = new Float32Array(1);
+      ta[0] = -0;
+      expect(Object.is(ta[0], -0)).toBe(true);
+    });
+
+    test("integer array normalizes -0 to +0", () => {
+      const ta = new Int32Array(1);
+      ta[0] = -0;
+      expect(Object.is(ta[0], 0)).toBe(true);
+      expect(Object.is(ta[0], -0)).toBe(false);
+    });
+  });
+
+  describe("unboxed reads feed comparisons and equality", () => {
+    test("strict equality against a number literal", () => {
+      const ta = new Int16Array([-32768, 0, 32767]);
+      expect(ta[0] === -32768).toBe(true);
+      expect(ta[1] === 0).toBe(true);
+      expect(ta[2] === 32767).toBe(true);
+      expect(ta[0] === 0).toBe(false);
+    });
+
+    test("relational comparison of two elements (counting-sort scan shape)", () => {
+      const ta = new Uint16Array([0, 1, 1, 7, 65535]);
+      let sorted = true;
+      let scanned = 0;
+      // for...of so the workload actually runs (traditional for is opt-in here).
+      [0, 1, 2, 3].forEach((i) => {
+        scanned += 1;
+        if (ta[i] > ta[i + 1]) sorted = false;
+      });
+      expect(scanned).toBe(4);
+      expect(sorted).toBe(true);
+    });
+
+    test("element used directly in arithmetic stays unboxed-correct", () => {
+      const ta = new Int32Array([10, 20, 30]);
+      expect(ta[0] + ta[1] + ta[2]).toBe(60);
+      expect(ta[2] - ta[0]).toBe(20);
+    });
+
+    test("float NaN read compares as not-equal to itself", () => {
+      const ta = new Float64Array(1);
+      ta[0] = NaN;
+      expect(ta[0] === ta[0]).toBe(false);
+      expect(Number.isNaN(ta[0])).toBe(true);
+    });
+
+    test("float Infinity read compares correctly", () => {
+      const ta = new Float32Array([Infinity, -Infinity]);
+      expect(ta[0] > 0).toBe(true);
+      expect(ta[1] < 0).toBe(true);
+      expect(ta[0] === Infinity).toBe(true);
+    });
+  });
+
+  describe("writes from a variable (register-resident scalar)", () => {
+    test("integer value held in a let binding", () => {
+      const ta = new Int8Array(3);
+      let v = 127;
+      ta[0] = v;
+      ta[1] = v - 255;
+      ta[2] = v + 1;
+      expect(ta[0]).toBe(127);
+      expect(ta[1]).toBe(-128);
+      expect(ta[2]).toBe(-128);
+    });
+
+    test("float value held in a let binding", () => {
+      const ta = new Float64Array(1);
+      let v = 3.5;
+      ta[0] = v;
+      expect(ta[0]).toBe(3.5);
+    });
+
+    test("computed index from a variable", () => {
+      const ta = new Uint16Array(4);
+      [0, 1, 2, 3].forEach((i) => {
+        ta[i] = i * 100;
+      });
+      expect(ta[0]).toBe(0);
+      expect(ta[1]).toBe(100);
+      expect(ta[3]).toBe(300);
+    });
+  });
+
+  describe("Float16Array index access", () => {
+    test("round-trips representable half-precision values", () => {
+      const ta = new Float16Array(3);
+      ta[0] = 1.5;
+      ta[1] = -2;
+      ta[2] = 0.5;
+      expect(ta[0]).toBe(1.5);
+      expect(ta[1]).toBe(-2);
+      expect(ta[2]).toBe(0.5);
+    });
+
+    test("stores and reads -Infinity (counting-sort smallest)", () => {
+      const ta = new Float16Array(1);
+      ta[0] = -Infinity;
+      expect(ta[0]).toBe(-Infinity);
+    });
+  });
+
+  describe("non-scalar values still coerce via the slow path", () => {
+    test("boolean value coerces with ToNumber", () => {
+      const ta = new Int32Array(2);
+      ta[0] = true;
+      ta[1] = false;
+      expect(ta[0]).toBe(1);
+      expect(ta[1]).toBe(0);
+    });
+
+    test("null and undefined coerce with ToNumber", () => {
+      const intArr = new Int32Array(2);
+      intArr[0] = null;
+      intArr[1] = undefined;
+      expect(intArr[0]).toBe(0);
+      expect(intArr[1]).toBe(0);
+
+      const floatArr = new Float64Array(1);
+      floatArr[0] = undefined;
+      expect(Number.isNaN(floatArr[0])).toBe(true);
+    });
+
+    test("object with valueOf coerces with ToNumber", () => {
+      const ta = new Uint8Array(1);
+      ta[0] = { valueOf: () => 200 };
+      expect(ta[0]).toBe(200);
+    });
+
+    test("number value into a BigInt array still throws TypeError", () => {
+      const ta = new BigInt64Array(1);
+      expect(() => { ta[0] = 5; }).toThrow(TypeError);
+    });
+  });
+
+  describe("large array index access (counting-sort scale)", () => {
+    test("fill, mutate the middle, and read back across a big buffer", () => {
+      const len = 1 << 16;
+      const ta = new Int16Array(len);
+      ta.fill(-32768);
+      const offset = 10000;
+      const indices = Array.from({ length: 256 }, (_, i) => i);
+      indices.forEach((i) => {
+        ta[offset + i] = i;
+      });
+      expect(ta[0]).toBe(-32768);
+      expect(ta[offset]).toBe(0);
+      expect(ta[offset + 255]).toBe(255);
+      expect(ta[len - 1]).toBe(-32768);
+    });
+  });
+});

From 5bb736ba502d7e2cb1103543b64a073487051845 Mon Sep 17 00:00:00 2001
From: Johannes Stein <johannesstein@freeze-dev.com>
Date: Sun, 28 Jun 2026 13:45:12 +0100
Subject: [PATCH 2/6] docs(bytecode-vm): note the typed-array unboxed element
 fast path

Document the TryReadIndexedScalar/TryWriteIndexedScalar fast path on the
computed-access cores so the bytecode VM doc stays accurate after the
typed-array element unboxing change.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
---
 docs/bytecode-vm.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/bytecode-vm.md b/docs/bytecode-vm.md
index dee0dcbb8..5d4a1edc7 100644
--- a/docs/bytecode-vm.md
+++ b/docs/bytecode-vm.md
@@ -132,7 +132,7 @@ Hits and fills serve only exact-class `TGocciaObjectValue` / `TGocciaVMLiteralOb
 
 Cached pointers (scope, shape) are compared for identity only and never dereferenced. Scope cache entries carry an entry-version stamp against allocator address reuse; shape entries need none, because shapes are never freed within an engine's lifetime, function templates never outlive their engine, and cross-realm maps stop shape tracking before a foreign realm can cache their owner layout.
 
-Computed property access (`OP_ARRAY_GET`/`OP_ARRAY_SET`, `OP_GET_INDEX`/`OP_SET_INDEX`, `OP_DEL_INDEX`) shares one key-classification and receiver-dispatch implementation (`ClassifyPropertyKey` plus the `ExecGet/ExecSet/ExecDeleteComputedProperty` cores in `Goccia.VM.pas`); per-opcode semantic differences are explicit `TGocciaComputedAccessOptions`, not divergent copies.
+Computed property access (`OP_ARRAY_GET`/`OP_ARRAY_SET`, `OP_GET_INDEX`/`OP_SET_INDEX`, `OP_DEL_INDEX`) shares one key-classification and receiver-dispatch implementation (`ClassifyPropertyKey` plus the `ExecGet/ExecSet/ExecDeleteComputedProperty` cores in `Goccia.VM.pas`); per-opcode semantic differences are explicit `TGocciaComputedAccessOptions`, not divergent copies. A non-BigInt `TGocciaTypedArrayValue` receiver at an array-index key takes an unboxed element fast path (`TryReadIndexedScalar`/`TryWriteIndexedScalar`): reads move the element straight into a register scalar and numeric-scalar writes store it directly, so neither allocates the heap `TGocciaNumberLiteralValue` or index-name string the generic object branch would. BigInt kinds, non-index keys, non-scalar write values, and out-of-range/detached/immutable cases fall through to the boxed path, preserving all value semantics including the observable `ToNumber` ordering of integer-indexed `[[Set]]`.
 
 The current optimization target is reducing bytecode-mode suite time further without diverging interpreter and bytecode semantics.
 

From 4b89bad186bbd520570faeffce9c40185784d2a4 Mon Sep 17 00:00:00 2001
From: Johannes Stein <johannesstein@freeze-dev.com>
Date: Sun, 28 Jun 2026 17:10:11 +0100
Subject: [PATCH 3/6] docs(adr): reject shared value caches as a runtime
 optimization

Record ADR 0080 capturing why interning/pooling boxed TGocciaValue instances to
reduce allocation count does not improve runtime in this FPC codebase, so the
C/C++ "fewer allocations => faster" intuition is not imported a fourth time.

Generalizes ADR 0013 (reject string interning) to boxed numbers with the #900
spike data: a small-int + Infinity/NaN cache on the bytecode VM RegisterToValue
path cut allocations 25% but moved runtime +2.2% (flat-to-worse, interleaved
medians). Notes the narrow exceptions that do pay off (SmallInt 0-255, special
-value singletons / ADR 0002) and the interleaved-measurement guardrail, and
cross-links core-patterns.md.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
---
 ...-reject-value-caches-for-allocation-reduction.md | 13 +++++++++++++
 docs/adr/README.md                                  |  1 +
 docs/core-patterns.md                               |  2 ++
 3 files changed, 16 insertions(+)
 create mode 100644 docs/adr/0080-reject-value-caches-for-allocation-reduction.md

diff --git a/docs/adr/0080-reject-value-caches-for-allocation-reduction.md b/docs/adr/0080-reject-value-caches-for-allocation-reduction.md
new file mode 100644
index 000000000..71e213e89
--- /dev/null
+++ b/docs/adr/0080-reject-value-caches-for-allocation-reduction.md
@@ -0,0 +1,13 @@
+# Reject shared value caches as a runtime optimization
+
+**Date:** 2026-06-28
+**Area:** `runtime`
+**Pull Request:** [#900](https://github.com/frostney/GocciaScript/pull/900)
+
+Reducing allocation *count* is not, by itself, a runtime lever in this engine, so shared caches of boxed `TGocciaValue` instances — interning or pooling them to avoid allocation — are rejected as a performance optimization. The project has now explored value caching at least three times: the fixed `SmallInt` 0–255 cache (kept, deliberately narrow), dictionary-based string interning (rejected in [ADR 0013](0013-reject-string-interning.md) at −4% across 172 benchmarks), and the boxed-number cache widening described below (rejected). Each confirms the same conclusion; this ADR exists so the C/C++ intuition that "fewer allocations ⇒ faster" is not imported a fourth time.
+
+Alongside the [#900](https://github.com/frostney/GocciaScript/pull/900) typed-array element unboxing, a lazy, GC-pinned cache of boxed small integers (range −32768..1024) plus `±Infinity`/`NaN` singleton reuse was spiked into the bytecode VM's `RegisterToValue` — the register→`TGocciaValue` boxing site that feeds call arguments. On the `sm/TypedArray/sort_large_countingsort.js` workload it cut heap allocations 4,719,119 → 3,534,333 (**−25%, deterministic**), yet runtime did not move: interleaved medians 6920 ms → 7072 ms (**+2.2%, flat-to-worse**), a fibonacci benchmark +0.6% (noise), and boot time unchanged. FreePascal's allocator plus the mark-and-sweep GC make these short-lived boxed values cheap to create and reclaim, so the cache's per-box branch (range check + array index + nil check) offsets whatever the avoided allocation saved — the same mechanism that made string interning a regression.
+
+The narrow exceptions already in the codebase remain in force and are **not** superseded: the fixed-size `SmallInt` 0–255 cache used by `RuntimeCopy` on the literal hot path, and the special-value singletons of [ADR 0002](0002-singleton-special-values.md) (`NaN`, `±0`, `±Infinity`, `Zero`/`One`, `true`/`false`, `null`/`undefined`). Those pay off precisely because they are tiny, fixed-array, single-comparison lookups with a very high hit rate on the path they sit on. Widening the range, moving the cache to a different boxing site, or keying by content loses every one of those properties and lands back in allocator-mitigated territory. If boxed-value allocation ever does show up as a *measured* bottleneck, the lever to evaluate is arena/pool allocation that lowers per-object GC cost without a per-box lookup — not a content- or range-keyed value cache.
+
+Guardrail for any future attempt: measure with **interleaved** before/after binaries (alternate per repetition, compare medians via the runner's `--bare`), never sequential batches. The first, sequential measurement here falsely showed −13% on the test and +63% on a fibonacci bench purely from machine-load drift, which interleaving erased. Allocation count is deterministic and hardware-independent, but it is not, on its own, evidence of a runtime win. [core-patterns.md § String Interning — Attempted and Rejected](../core-patterns.md#string-interning--attempted-and-rejected). [garbage-collector.md](../garbage-collector.md).
diff --git a/docs/adr/README.md b/docs/adr/README.md
index 5252f9477..077f5753e 100644
--- a/docs/adr/README.md
+++ b/docs/adr/README.md
@@ -89,3 +89,4 @@ Durable architecture and implementation decisions for GocciaScript. New ADRs use
 - [0077 — SameValueZero-keyed ordered store for Map and Set](0077-samevaluezero-ordered-collections.md)
 - [0078 — Thread-local cleanup registry for managed threadvars](0078-thread-local-cleanup-registry.md)
 - [0079 — Keep speculatively-scanned tokens across parenthesized-group probes](0079-keep-speculatively-scanned-tokens.md)
+- [0080 — Reject shared value caches as a runtime optimization](0080-reject-value-caches-for-allocation-reduction.md)
diff --git a/docs/core-patterns.md b/docs/core-patterns.md
index d8fcb486d..541a62fdc 100644
--- a/docs/core-patterns.md
+++ b/docs/core-patterns.md
@@ -392,6 +392,8 @@ String interning (caching `TGocciaStringLiteralValue` instances in a `TDictionar
 
 **Do not re-attempt** dictionary-based string interning. If string allocation becomes a measurable bottleneck in future profiling, consider instead: (a) pre-allocated singletons for a small fixed set of ultra-common strings (like `SmallInt` but for `"length"`, `"undefined"`, etc.), or (b) arena/pool allocation for `TGocciaStringLiteralValue` objects to reduce per-object GC overhead without per-string hashing.
 
+The same result holds for **boxed numbers**: widening the `SmallInt` cache and reusing `±Infinity`/`NaN` singletons in the bytecode VM's `RegisterToValue` boxing path cut allocations ~25% on an allocation-heavy typed-array test but produced **no runtime improvement** (interleaved median +2.2%). Reducing allocation *count* is not, by itself, a runtime lever in this codebase — see [ADR 0080](adr/0080-reject-value-caches-for-allocation-reduction.md) for the data, the narrow exceptions that do pay off, and the interleaved-measurement guardrail.
+
 ## Related documents
 
 - [Architecture](architecture.md) — Pipelines, main layers, design direction, duplication boundaries

From 0c18204fe0c714cfc5cb4a0141f066363b41bfdc Mon Sep 17 00:00:00 2001
From: Johannes Stein <johannesstein@freeze-dev.com>
Date: Sun, 28 Jun 2026 17:18:07 +0100
Subject: [PATCH 4/6] docs: correct the phantom SmallInt 0-255 number cache
 claim

The docs (core-patterns.md, garbage-collector.md) described a "SmallInt cache
for 0-255" used by RuntimeCopy as an accepted, working optimization. It never
existed: there is no array-of-TGocciaNumberLiteralValue anywhere in git history,
and the claim was introduced by a docs-only commit (#302, Apr 2026) with no
implementation. The real number value-reuse is RuntimeCopy returning the
ADR 0002 special-value singletons (0, 1, NaN, +/-Infinity, -0); all other
numbers allocate via Create.

Correct both docs and ADR 0080 (which had repeated the phantom claim) to describe
only the singletons that actually exist, and note that a spiked 0-255 range cache
showed no runtime gain. Surfaced while verifying ADR 0080's "narrow exception".

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
---
 .../0080-reject-value-caches-for-allocation-reduction.md  | 4 ++--
 docs/core-patterns.md                                     | 8 ++++----
 docs/garbage-collector.md                                 | 4 ++--
 3 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/docs/adr/0080-reject-value-caches-for-allocation-reduction.md b/docs/adr/0080-reject-value-caches-for-allocation-reduction.md
index 71e213e89..0571eb3ef 100644
--- a/docs/adr/0080-reject-value-caches-for-allocation-reduction.md
+++ b/docs/adr/0080-reject-value-caches-for-allocation-reduction.md
@@ -4,10 +4,10 @@
 **Area:** `runtime`
 **Pull Request:** [#900](https://github.com/frostney/GocciaScript/pull/900)
 
-Reducing allocation *count* is not, by itself, a runtime lever in this engine, so shared caches of boxed `TGocciaValue` instances — interning or pooling them to avoid allocation — are rejected as a performance optimization. The project has now explored value caching at least three times: the fixed `SmallInt` 0–255 cache (kept, deliberately narrow), dictionary-based string interning (rejected in [ADR 0013](0013-reject-string-interning.md) at −4% across 172 benchmarks), and the boxed-number cache widening described below (rejected). Each confirms the same conclusion; this ADR exists so the C/C++ intuition that "fewer allocations ⇒ faster" is not imported a fourth time.
+Reducing allocation *count* is not, by itself, a runtime lever in this engine, so shared caches of boxed `TGocciaValue` instances — interning or pooling them to avoid allocation — are rejected as a performance optimization. The only value reuse the engine actually has is the handful of special-value singletons returned by `RuntimeCopy` and the register-boxing paths (`0`, `1`, `NaN`, `±Infinity`, `-0`; see [ADR 0002](0002-singleton-special-values.md)). Every attempt to add caching *beyond* that fixed set has been measured and rejected: dictionary-based string interning ([ADR 0013](0013-reject-string-interning.md), −4% across 172 benchmarks) and the boxed-number range cache described below. A `SmallInt` 0–255 cache that earlier docs described as if implemented never actually existed in the source (corrected alongside this ADR) — itself a sign of how readily the C/C++ "fewer allocations ⇒ faster" intuition takes hold. This ADR exists so it is not imported again.
 
 Alongside the [#900](https://github.com/frostney/GocciaScript/pull/900) typed-array element unboxing, a lazy, GC-pinned cache of boxed small integers (range −32768..1024) plus `±Infinity`/`NaN` singleton reuse was spiked into the bytecode VM's `RegisterToValue` — the register→`TGocciaValue` boxing site that feeds call arguments. On the `sm/TypedArray/sort_large_countingsort.js` workload it cut heap allocations 4,719,119 → 3,534,333 (**−25%, deterministic**), yet runtime did not move: interleaved medians 6920 ms → 7072 ms (**+2.2%, flat-to-worse**), a fibonacci benchmark +0.6% (noise), and boot time unchanged. FreePascal's allocator plus the mark-and-sweep GC make these short-lived boxed values cheap to create and reclaim, so the cache's per-box branch (range check + array index + nil check) offsets whatever the avoided allocation saved — the same mechanism that made string interning a regression.
 
-The narrow exceptions already in the codebase remain in force and are **not** superseded: the fixed-size `SmallInt` 0–255 cache used by `RuntimeCopy` on the literal hot path, and the special-value singletons of [ADR 0002](0002-singleton-special-values.md) (`NaN`, `±0`, `±Infinity`, `Zero`/`One`, `true`/`false`, `null`/`undefined`). Those pay off precisely because they are tiny, fixed-array, single-comparison lookups with a very high hit rate on the path they sit on. Widening the range, moving the cache to a different boxing site, or keying by content loses every one of those properties and lands back in allocator-mitigated territory. If boxed-value allocation ever does show up as a *measured* bottleneck, the lever to evaluate is arena/pool allocation that lowers per-object GC cost without a per-box lookup — not a content- or range-keyed value cache.
+The one form of value reuse that does pay off — and is **not** superseded — is the special-value singleton set of [ADR 0002](0002-singleton-special-values.md) (`0`, `1`, `NaN`, `±0`, `±Infinity`, plus `true`/`false`, `null`/`undefined`). It works precisely because it is a tiny, fixed set matched by direct comparison with a very high hit rate on the path it sits on — not an array, not a range, not content-keyed. Widening it to an integer range, moving the reuse to a different boxing site, or keying by content loses every one of those properties and lands back in allocator-mitigated territory. If boxed-value allocation ever does show up as a *measured* bottleneck, the lever to evaluate is arena/pool allocation that lowers per-object GC cost without a per-box lookup — not a content- or range-keyed value cache.
 
 Guardrail for any future attempt: measure with **interleaved** before/after binaries (alternate per repetition, compare medians via the runner's `--bare`), never sequential batches. The first, sequential measurement here falsely showed −13% on the test and +63% on a fibonacci bench purely from machine-load drift, which interleaving erased. Allocation count is deterministic and hardware-independent, but it is not, on its own, evidence of a runtime win. [core-patterns.md § String Interning — Attempted and Rejected](../core-patterns.md#string-interning--attempted-and-rejected). [garbage-collector.md](../garbage-collector.md).
diff --git a/docs/core-patterns.md b/docs/core-patterns.md
index 541a62fdc..51783080d 100644
--- a/docs/core-patterns.md
+++ b/docs/core-patterns.md
@@ -386,13 +386,13 @@ String interning (caching `TGocciaStringLiteralValue` instances in a `TDictionar
 - **Dictionary lookup cost exceeds allocation cost.** FreePascal's allocator is fast. A `TDictionary.TryGetValue` call involves hashing the string (O(n) in string length) plus a hash-table probe, which is more expensive than simply allocating a short-lived `TGocciaStringLiteralValue` and letting the GC reclaim it later.
 - **Low hit rate on hot paths.** `ToStringLiteral` on numbers produces mostly unique strings (`"42"`, `"3.14"`, etc.) that never hit the cache, paying the hash cost with zero benefit. This path is called frequently in arithmetic-heavy benchmarks.
 - **`RuntimeCopy` is the wrong interception point.** Every string literal evaluation goes through `RuntimeCopy`. Adding a dictionary lookup to this universal hot path penalizes all string operations, including those that create one-off strings (concatenation results, method return values).
-- **GC pressure is not the bottleneck.** The SmallInt cache works for numbers because integer equality is a single comparison. String equality requires content comparison, so the lookup cost scales with string length rather than being O(1).
+- **GC pressure is not the bottleneck.** The number special-value singletons work because the check is a single equality against a fixed set. String equality requires content comparison, so the lookup cost scales with string length rather than being O(1).
 
-**The `SmallInt` cache works because:** integer comparison is a single machine instruction, the cache is a fixed-size array (no hashing), and the hit rate for integers 0–255 is very high in typical code. None of these properties hold for arbitrary strings.
+**The number special-value singletons work because:** they are a tiny fixed set (`0`, `1`, `NaN`, `±Infinity`, `-0`) matched by direct comparison in `RuntimeCopy` — no hashing, no array, no range — with a high hit rate in typical code. There is **no** general small-integer (e.g. 0–255) range cache: earlier revisions of this doc and `garbage-collector.md` described one, but it was never implemented, and a spike that added it (plus `±Infinity`/`NaN` reuse on the VM boxing path) measured **no runtime gain** — see the boxed-numbers note below. None of the singletons' properties hold for arbitrary strings.
 
-**Do not re-attempt** dictionary-based string interning. If string allocation becomes a measurable bottleneck in future profiling, consider instead: (a) pre-allocated singletons for a small fixed set of ultra-common strings (like `SmallInt` but for `"length"`, `"undefined"`, etc.), or (b) arena/pool allocation for `TGocciaStringLiteralValue` objects to reduce per-object GC overhead without per-string hashing.
+**Do not re-attempt** dictionary-based string interning. If string allocation becomes a measurable bottleneck in future profiling, consider instead: (a) pre-allocated singletons for a small fixed set of ultra-common strings (like the number special-value singletons but for `"length"`, `"undefined"`, etc.), or (b) arena/pool allocation for `TGocciaStringLiteralValue` objects to reduce per-object GC overhead without per-string hashing.
 
-The same result holds for **boxed numbers**: widening the `SmallInt` cache and reusing `±Infinity`/`NaN` singletons in the bytecode VM's `RegisterToValue` boxing path cut allocations ~25% on an allocation-heavy typed-array test but produced **no runtime improvement** (interleaved median +2.2%). Reducing allocation *count* is not, by itself, a runtime lever in this codebase — see [ADR 0080](adr/0080-reject-value-caches-for-allocation-reduction.md) for the data, the narrow exceptions that do pay off, and the interleaved-measurement guardrail.
+The same result holds for **boxed numbers**: adding a small-integer range cache and reusing `±Infinity`/`NaN` singletons in the bytecode VM's `RegisterToValue` boxing path cut allocations ~25% on an allocation-heavy typed-array test but produced **no runtime improvement** (interleaved median +2.2%). Reducing allocation *count* is not, by itself, a runtime lever in this codebase — see [ADR 0080](adr/0080-reject-value-caches-for-allocation-reduction.md) for the data, the narrow exceptions that do pay off, and the interleaved-measurement guardrail.
 
 ## Related documents
 
diff --git a/docs/garbage-collector.md b/docs/garbage-collector.md
index ac57047df..79d139ec8 100644
--- a/docs/garbage-collector.md
+++ b/docs/garbage-collector.md
@@ -44,7 +44,7 @@ end;
 - **`AfterConstruction` / `BeforeDestruction`** — Every value auto-registers with the thread-local `TGarbageCollector.Instance` upon creation and unregisters before destruction so root sets cannot retain stale object pointers.
 - **`MarkReferences`** — Base implementation sets `FGCMark := GCCurrentMark` (marking the object as alive for the current collection). `AdvanceMark` increments the shared `GCCurrentMark` while the collector lock is held, and `TGarbageCollector.Instance` uses that mark while traversing objects. Subclasses override `MarkReferences` to also mark values they reference (e.g., `TGocciaObjectValue` marks its prototype and property values, `TGocciaFunctionValue` marks its closure scope, `TGocciaArrayValue` marks its elements). The `if GCMarked then Exit;` guard at the top of each override prevents re-visiting objects in cyclic reference graphs.
 - **`TraceWeakReferences` / `SweepWeakReferences`** — Optional hooks for weak containers and weak references. The default implementations do nothing. WeakMap uses `TraceWeakReferences` as an ephemeron pass: if a key is already marked by normal roots, its value is marked, but the key is never marked by the map. WeakMap and WeakSet use `SweepWeakReferences` to remove entries whose keys/values remain unmarked. WeakRef clears an unmarked target, and FinalizationRegistry removes dead cells while enqueueing cleanup jobs for their held values.
-- **`RuntimeCopy`** — Creates a fresh GC-managed copy of the value. Used by the evaluator when evaluating literal expressions: AST-owned literal values are not tracked by the GC, so `RuntimeCopy` produces a runtime value that is. The default implementation returns `Self` (for singletons and complex values). Primitives override this: numbers use the `SmallInt` cache for 0-255, booleans return singletons, strings create new instances (cheap due to copy-on-write).
+- **`RuntimeCopy`** — Creates a fresh GC-managed copy of the value. Used by the evaluator when evaluating literal expressions: AST-owned literal values are not tracked by the GC, so `RuntimeCopy` produces a runtime value that is. The default implementation returns `Self` (for singletons and complex values). Primitives override this: numbers reuse the special-value singletons (`0`, `1`, `NaN`, `±Infinity`, `-0`) and otherwise create a fresh instance, booleans return singletons, strings create new instances (cheap due to copy-on-write).
 
 ## Contributor Rules
 
@@ -151,7 +151,7 @@ The separate `memory.heap` JSON object comes from FreePascal's `GetHeapStatus`,
 
 The parser creates `TGocciaValue` instances (numbers, strings, booleans) and stores them inside `TGocciaLiteralExpression` AST nodes. These values are owned by the AST, not the GC. `TGocciaLiteralExpression.Create` calls `TGarbageCollector.Instance.UnregisterObject` to remove the value from GC tracking, and `TGocciaLiteralExpression.Destroy` frees the value (unless it is a singleton like `UndefinedValue`, `TrueValue`, or `FalseValue`).
 
-When the evaluator encounters a literal expression, it calls `Value.RuntimeCopy` to produce a fresh GC-managed runtime value. This cleanly separates compile-time constants (owned by the AST) from runtime values (managed by the GC). The overhead is minimal: integers 0-255 hit the `SmallInt` cache (zero allocation), booleans return singletons, and strings benefit from FreePascal's copy-on-write semantics.
+When the evaluator encounters a literal expression, it calls `Value.RuntimeCopy` to produce a fresh GC-managed runtime value. This cleanly separates compile-time constants (owned by the AST) from runtime values (managed by the GC). The overhead is minimal: `0`, `1`, and the special values (`NaN`, `±Infinity`, `-0`) reuse singletons (zero allocation), other numbers allocate cheaply, booleans return singletons, and strings benefit from FreePascal's copy-on-write semantics.
 
 ## Related Documents
 

From 3f4b371571ac7d3826ae66e6178af3012a4888a1 Mon Sep 17 00:00:00 2001
From: Johannes Stein <johannesstein@freeze-dev.com>
Date: Sun, 28 Jun 2026 17:45:17 +0100
Subject: [PATCH 5/6] docs(adr): quantify the special-value singleton cache
 effect in ADR 0080
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Measured both sides of the boundary so the "kept exception" is data-backed, not
asserted: disabling the singleton reuse costs +786k allocations and ~1.4-1.7% on
the allocation-heavy counting-sort test (within noise on typical integer code),
while widening it to a small-integer range removed more allocations for no
runtime gain (+2.2%). Even the kept cache barely moves runtime — it is retained
because it is free, not because it is a meaningful speedup.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
---
 docs/adr/0080-reject-value-caches-for-allocation-reduction.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/adr/0080-reject-value-caches-for-allocation-reduction.md b/docs/adr/0080-reject-value-caches-for-allocation-reduction.md
index 0571eb3ef..d9e51398e 100644
--- a/docs/adr/0080-reject-value-caches-for-allocation-reduction.md
+++ b/docs/adr/0080-reject-value-caches-for-allocation-reduction.md
@@ -8,6 +8,6 @@ Reducing allocation *count* is not, by itself, a runtime lever in this engine, s
 
 Alongside the [#900](https://github.com/frostney/GocciaScript/pull/900) typed-array element unboxing, a lazy, GC-pinned cache of boxed small integers (range −32768..1024) plus `±Infinity`/`NaN` singleton reuse was spiked into the bytecode VM's `RegisterToValue` — the register→`TGocciaValue` boxing site that feeds call arguments. On the `sm/TypedArray/sort_large_countingsort.js` workload it cut heap allocations 4,719,119 → 3,534,333 (**−25%, deterministic**), yet runtime did not move: interleaved medians 6920 ms → 7072 ms (**+2.2%, flat-to-worse**), a fibonacci benchmark +0.6% (noise), and boot time unchanged. FreePascal's allocator plus the mark-and-sweep GC make these short-lived boxed values cheap to create and reclaim, so the cache's per-box branch (range check + array index + nil check) offsets whatever the avoided allocation saved — the same mechanism that made string interning a regression.
 
-The one form of value reuse that does pay off — and is **not** superseded — is the special-value singleton set of [ADR 0002](0002-singleton-special-values.md) (`0`, `1`, `NaN`, `±0`, `±Infinity`, plus `true`/`false`, `null`/`undefined`). It works precisely because it is a tiny, fixed set matched by direct comparison with a very high hit rate on the path it sits on — not an array, not a range, not content-keyed. Widening it to an integer range, moving the reuse to a different boxing site, or keying by content loses every one of those properties and lands back in allocator-mitigated territory. If boxed-value allocation ever does show up as a *measured* bottleneck, the lever to evaluate is arena/pool allocation that lowers per-object GC cost without a per-box lookup — not a content- or range-keyed value cache.
+The one form of value reuse worth keeping — and **not** superseded — is the special-value singleton set of [ADR 0002](0002-singleton-special-values.md) (`0`, `1`, `NaN`, `±0`, `±Infinity`, plus `true`/`false`, `null`/`undefined`), reused by `RuntimeCopy` and `RegisterToValue`. It is a tiny, fixed set matched by direct comparison with a high hit rate on the path it sits on — not an array, not a range, not content-keyed. The boundary was measured on both sides: *disabling* the singleton reuse (always allocating) costs +786k allocations and only ~1.4–1.7% on the allocation-heavy `sort_large_countingsort.js` test, within noise on typical integer code — a small, essentially free win; *widening* it to a small-integer range (the spike above) removed more allocations (−1.18M) for no runtime gain (+2.2%). So even the kept cache barely moves runtime, and everything past the narrow fixed set is pure cost — the singleton set is the measured sweet spot, kept because it is free rather than because it is a meaningful speedup. If boxed-value allocation ever shows up as a *measured* bottleneck, the lever to evaluate is arena/pool allocation that lowers per-object GC cost without a per-box lookup — not a content- or range-keyed value cache.
 
 Guardrail for any future attempt: measure with **interleaved** before/after binaries (alternate per repetition, compare medians via the runner's `--bare`), never sequential batches. The first, sequential measurement here falsely showed −13% on the test and +63% on a fibonacci bench purely from machine-load drift, which interleaving erased. Allocation count is deterministic and hardware-independent, but it is not, on its own, evidence of a runtime win. [core-patterns.md § String Interning — Attempted and Rejected](../core-patterns.md#string-interning--attempted-and-rejected). [garbage-collector.md](../garbage-collector.md).

From f718c41099ddbcda78ff85a3dd030e4bd8f3ff47 Mon Sep 17 00:00:00 2001
From: Johannes Stein <johannesstein@freeze-dev.com>
Date: Sun, 28 Jun 2026 17:58:07 +0100
Subject: [PATCH 6/6] docs(bytecode-vm): split typed-array read/write fallback
 rules

Address PR review (coderabbitai): the computed-access note conflated the read and
write fast-path fallbacks. Reads fall through to the boxed path for BigInt kinds,
non-index keys, and out-of-range/detached indices; non-BigInt scalar writes are
handled in place even for out-of-range or immutable cases (store skipped, success
reported) and only fall through for BigInt kinds, non-index keys, or non-scalar
values. Wording now matches the TryReadIndexedScalar/TryWriteIndexedScalar
contracts.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
---
 docs/bytecode-vm.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/bytecode-vm.md b/docs/bytecode-vm.md
index 5d4a1edc7..7d4445cb4 100644
--- a/docs/bytecode-vm.md
+++ b/docs/bytecode-vm.md
@@ -132,7 +132,7 @@ Hits and fills serve only exact-class `TGocciaObjectValue` / `TGocciaVMLiteralOb
 
 Cached pointers (scope, shape) are compared for identity only and never dereferenced. Scope cache entries carry an entry-version stamp against allocator address reuse; shape entries need none, because shapes are never freed within an engine's lifetime, function templates never outlive their engine, and cross-realm maps stop shape tracking before a foreign realm can cache their owner layout.
 
-Computed property access (`OP_ARRAY_GET`/`OP_ARRAY_SET`, `OP_GET_INDEX`/`OP_SET_INDEX`, `OP_DEL_INDEX`) shares one key-classification and receiver-dispatch implementation (`ClassifyPropertyKey` plus the `ExecGet/ExecSet/ExecDeleteComputedProperty` cores in `Goccia.VM.pas`); per-opcode semantic differences are explicit `TGocciaComputedAccessOptions`, not divergent copies. A non-BigInt `TGocciaTypedArrayValue` receiver at an array-index key takes an unboxed element fast path (`TryReadIndexedScalar`/`TryWriteIndexedScalar`): reads move the element straight into a register scalar and numeric-scalar writes store it directly, so neither allocates the heap `TGocciaNumberLiteralValue` or index-name string the generic object branch would. BigInt kinds, non-index keys, non-scalar write values, and out-of-range/detached/immutable cases fall through to the boxed path, preserving all value semantics including the observable `ToNumber` ordering of integer-indexed `[[Set]]`.
+Computed property access (`OP_ARRAY_GET`/`OP_ARRAY_SET`, `OP_GET_INDEX`/`OP_SET_INDEX`, `OP_DEL_INDEX`) shares one key-classification and receiver-dispatch implementation (`ClassifyPropertyKey` plus the `ExecGet/ExecSet/ExecDeleteComputedProperty` cores in `Goccia.VM.pas`); per-opcode semantic differences are explicit `TGocciaComputedAccessOptions`, not divergent copies. A non-BigInt `TGocciaTypedArrayValue` receiver at an array-index key takes an unboxed element fast path (`TryReadIndexedScalar`/`TryWriteIndexedScalar`): reads move the element straight into a register scalar and numeric-scalar writes store it directly, so neither allocates the heap `TGocciaNumberLiteralValue` or index-name string the generic object branch would. BigInt kinds, non-index keys, and non-scalar write values fall through to the boxed path; an out-of-range or detached **read** does too (yielding `undefined`). A non-BigInt scalar **write**, however, keeps its integer-indexed exotic semantics in place even for an out-of-range index or immutable backing buffer — the store is skipped and reported as successful, never boxed. All value semantics are preserved, including the observable `ToNumber` ordering of integer-indexed `[[Set]]`.
 
 The current optimization target is reducing bytecode-mode suite time further without diverging interpreter and bytecode semantics.