diff --git a/benchmarks/numbers.js b/benchmarks/numbers.js index 0d488398..d0b60949 100644 --- a/benchmarks/numbers.js +++ b/benchmarks/numbers.js @@ -2,6 +2,18 @@ description: Number operation benchmarks ---*/ +// Hoisted so the bench measures FormatDouble's shortest-round-trip path rather +// than the per-iteration cost of rebuilding the list and recomputing constants. +const ToStringNonIntegerSamples = [ + 0.1 + 0.2, + Math.PI, + Math.sqrt(2), + Math.E, + 123.456789012345, + 9.18742501042e222, + 5.7016275775556e-8, +]; + suite("number creation", () => { bench("integer arithmetic", { run: () => { @@ -54,6 +66,14 @@ suite("number prototype methods", () => { }, }); + bench("toString non-integer (shortest round-trip)", { + run: () => { + let total = 0; + for (const x of ToStringNonIntegerSamples) total += x.toString().length; + return total; + }, + }); + bench("valueOf", { run: () => { const a = (42).valueOf(); diff --git a/docs/adr/0080-formatdouble-first-hit-precision-scan.md b/docs/adr/0080-formatdouble-first-hit-precision-scan.md new file mode 100644 index 00000000..3d9fc3ed --- /dev/null +++ b/docs/adr/0080-formatdouble-first-hit-precision-scan.md @@ -0,0 +1,12 @@ +# FormatDouble first-hit precision scan + +**Date:** 2026-06-28 +**Area:** `engine` +**Issue:** [#812](https://github.com/frostney/GocciaScript/issues/812) +**Pull Request:** [#899](https://github.com/frostney/GocciaScript/pull/899) + +`FormatDouble` (`Goccia.Values.Primitives`) implements ES2026 §6.1.6.1.20 `Number::toString` for the non-integer case by finding the shortest decimal that round-trips: it scans the `Str(V:W)` precision width `W` from 9 (2 significant digits) to 24 (17 significant digits) and takes the **first** width whose output parses back to the original double. The normative step requires `k` (the digit count) to be "as small as possible", so the shortest representation is a conformance requirement, not a quality-of-implementation nicety. This path backs `Number.prototype.toString`, `String(x)`, template interpolation, property-key stringification, and `JSON.stringify` of floats; `toFixed`/`toExponential`/`toPrecision` use a separate `FormatDoubleToPrecision` path and are unaffected. + +Issue #812 proposed replacing the linear scan with a binary search over `W` ("same candidates, fewer probes", assumed low risk). It is not low risk: it is incorrect. A sweep of ~70M doubles (FPC 3.2.2, prod `-O4` with `NOFASTMATH`) found the round-trip predicate `Val(Str(V:W)) = V` is **not monotonic** in `W` — 14,241 general-case values have a width that round-trips, a wider width that does not, then a wider one that does again, because FPC `Str` is not correctly rounded at every width. The upward first-hit scan is robust to these holes (the first hit is still the smallest, hence shortest), but a binary search can converge onto a hole above the true minimum: for 115 of ~60M sampled doubles it selected a wider width and emitted a non-shortest string (for example `9.18742501042000e+222` instead of `9.18742501042e+222`, or `6.110371725116101e+201` instead of `6.1103717251161e+201`), violating "k as small as possible". Every probe-skipping variant (stride, galloping, scan-down-until-false) fails for the same reason. **Decision: the scan stays first-hit-from-the-bottom; binary search and probe-skipping are rejected for this function.** A correct single-pass alternative would be a Ryū/Grisu shortest-representation algorithm, which removes the dependence on `Str`'s per-width rounding entirely; that is a larger spec-exact rewrite left for a future decision. + +The performance concern behind #812 is addressed without changing the algorithm or its output. Each probe now reads `Str(V:W)` into a fixed `ShortString`, strips the right-justification padding in place, and parses with the locale-free `Val` instead of `Trim` + `TryStrToFloat`. `Val` selects the identical width — verified byte-for-byte against `TryStrToFloat` over 74.9M doubles with zero divergence — while avoiding the per-iteration heap allocation and the `TFormatSettings` scan. The probe loop itself is roughly halved; end-to-end the change is about **1.4× faster (−28% execution time)** on a `toString`-dominated float workload (2M `Number.prototype.toString` calls over 15–17-significant-digit doubles, bytecode, `--prod`), with the engine's per-call string allocation and dispatch a roughly constant overhead around `FormatDouble`. `benchmarks/numbers.js` covers this path (the `toString non-integer` bench). Regression tests in `tests/built-ins/Number/prototype/toString.js` and `tests/built-ins/JSON/stringify.js` lock the exact shortest output for computed fractional values and for several of the non-monotonic "hole" doubles, so any future move to binary search (or any change that lengthens these strings) fails the suite immediately. diff --git a/docs/adr/README.md b/docs/adr/README.md index 5252f947..65b3db02 100644 --- a/docs/adr/README.md +++ b/docs/adr/README.md @@ -89,3 +89,4 @@ Durable architecture and implementation decisions for GocciaScript. New ADRs use - [0077 — SameValueZero-keyed ordered store for Map and Set](0077-samevaluezero-ordered-collections.md) - [0078 — Thread-local cleanup registry for managed threadvars](0078-thread-local-cleanup-registry.md) - [0079 — Keep speculatively-scanned tokens across parenthesized-group probes](0079-keep-speculatively-scanned-tokens.md) +- [0080 — FormatDouble first-hit precision scan](0080-formatdouble-first-hit-precision-scan.md) diff --git a/docs/contributing/code-style.md b/docs/contributing/code-style.md index 7a645dbf..9a9ff0e6 100644 --- a/docs/contributing/code-style.md +++ b/docs/contributing/code-style.md @@ -119,7 +119,7 @@ Use `FormatDouble` (from `Goccia.Values.Primitives`) for any float-to-string con Result := FloatToStr(AValue); Result := FormatFloat('0.###', AValue); -// Correct — ES2026 §6.1.6.1.13 Number::toString, always uses '.' +// Correct — ES2026 §6.1.6.1.20 Number::toString, always uses '.' Result := FormatDouble(AValue); // Correct — formatted output with invariant decimal separator diff --git a/source/units/Goccia.Values.Primitives.pas b/source/units/Goccia.Values.Primitives.pas index 6921d921..f212dd55 100644 --- a/source/units/Goccia.Values.Primitives.pas +++ b/source/units/Goccia.Values.Primitives.pas @@ -161,7 +161,7 @@ TGocciaStringLiteralValue = class(TGocciaValue) procedure PinPrimitiveSingletons; - // ES2026 §6.1.6.1.13 Number::toString(x) + // ES2026 §6.1.6.1.20 Number::toString(x) function FormatDouble(AValue: Double): string; function InvariantFormatSettings: TFormatSettings; @@ -193,7 +193,7 @@ function InvariantFormatSettings: TFormatSettings; Result.DecimalSeparator := '.'; end; -// ES2026 §6.1.6.1.13 Number::toString(x) +// ES2026 §6.1.6.1.20 Number::toString(x) function FormatDouble(AValue: Double): string; procedure FormatES(const AMantissa: string; AK, AN: Integer; ANeg: Boolean; @@ -231,7 +231,8 @@ function FormatDouble(AValue: Double): string; var IsNeg: Boolean; SciStr, Mantissa, TestStr: string; - Exp, N, K, I, W, EPos, D: Integer; + Buf: ShortString; + Exp, N, K, I, W, EPos, D, Code: Integer; Parsed: Double; FS: TFormatSettings; begin @@ -290,17 +291,45 @@ function FormatDouble(AValue: Double): string; Exit; end; - // General case: find the shortest round-tripping representation. - // Str(V:W) outputs scientific notation with (W - 7) significant digits - // (for 3-digit exponents) and correctly rounds at each precision level. - // W=9 gives the minimum (2 sig digits), W=24 gives the maximum (17). + // General case: find the shortest round-tripping representation by scanning + // precision upward and taking the FIRST width that parses back exactly. + // Str(V:W) emits scientific notation with (W - 7) significant digits (doubles + // always have a 3-digit decimal exponent); W=9 gives the minimum (2 sig + // digits), W=24 the maximum (17, which always round-trips). + // + // This scan must stay first-hit-from-the-bottom; it must NOT be replaced with + // a binary search or any probe-skipping scheme. FPC Str is not correctly + // rounded at every width, so the "parses back exactly" predicate is not + // monotonic in W. The first hit is still the shortest and spec-correct, but a + // binary search can converge above it and emit a non-shortest string, + // violating "k as small as possible" in ES2026 Number::toString. See + // docs/adr/0080-formatdouble-first-hit-precision-scan.md. + // + // Each probe reads Str into a fixed ShortString and parses with the + // locale-free Val instead of Trim + TryStrToFloat. Val selects the same width + // here (verified byte-for-byte over 74.9M doubles) while avoiding both the + // per-iteration heap allocation and the TFormatSettings scan; this is ~1.4x + // faster end-to-end on float-stringify-heavy workloads (see ADR 0080). for W := 9 to 24 do begin - Str(AValue:W, SciStr); - SciStr := Trim(SciStr); + Str(AValue:W, Buf); - if TryStrToFloat(SciStr, Parsed, FS) and (Parsed = AValue) then + // Str right-justifies within width W; AValue is positive here, so the only + // padding is leading spaces. Strip them in place (no heap allocation) before + // parsing: this keeps the round-trip test independent of how Val treats + // leading blanks, and leaves Buf ready for the mantissa extraction on a hit. + if (Length(Buf) > 0) and (Buf[1] = ' ') then begin + I := 2; + while (I <= Length(Buf)) and (Buf[I] = ' ') do + Inc(I); + Delete(Buf, 1, I - 1); + end; + + Val(Buf, Parsed, Code); + if (Code = 0) and (Parsed = AValue) then + begin + SciStr := Buf; EPos := Pos('E', SciStr); Mantissa := Copy(SciStr, 1, EPos - 1); Exp := StrToInt(Copy(SciStr, EPos + 1, Length(SciStr) - EPos)); diff --git a/tests/built-ins/JSON/stringify.js b/tests/built-ins/JSON/stringify.js index 671efade..4e706db0 100644 --- a/tests/built-ins/JSON/stringify.js +++ b/tests/built-ins/JSON/stringify.js @@ -87,6 +87,13 @@ test("JSON.stringify preserves round-trip precision for large fractional floatin expect(JSON.parse(JSON.stringify(value))).toBe(value); }); +test("JSON.stringify emits the shortest round-tripping form for fractional floating-point numbers", () => { + expect(JSON.stringify(0.1 + 0.2)).toBe("0.30000000000000004"); + expect(JSON.stringify(1 / 3)).toBe("0.3333333333333333"); + expect(JSON.stringify(9.18742501042e222)).toBe("9.18742501042e+222"); + expect(JSON.stringify(5.7016275775556e-8)).toBe("5.7016275775556e-8"); +}); + test("JSON.stringify strings with special characters", () => { expect(JSON.stringify("hello\nworld")).toBe('"hello\\nworld"'); expect(JSON.stringify("tab\there")).toBe('"tab\\there"'); diff --git a/tests/built-ins/Number/prototype/toString.js b/tests/built-ins/Number/prototype/toString.js index b3e9fea1..275a3318 100644 --- a/tests/built-ins/Number/prototype/toString.js +++ b/tests/built-ins/Number/prototype/toString.js @@ -93,6 +93,22 @@ describe("Number.prototype.toString", () => { expect((0.0000001).toString()).toBe("1e-7"); }); + test("toString returns the shortest round-tripping form for computed fractional values", () => { + expect((0.1 + 0.2).toString()).toBe("0.30000000000000004"); + expect((1 / 3).toString()).toBe("0.3333333333333333"); + expect((2 / 3).toString()).toBe("0.6666666666666666"); + expect(Math.PI.toString()).toBe("3.141592653589793"); + expect(Math.sqrt(2).toString()).toBe("1.4142135623730951"); + }); + + test("toString uses the fewest significant digits for scientific-notation values", () => { + expect(Number.MAX_VALUE.toString()).toBe("1.7976931348623157e+308"); + expect((9.18742501042e222).toString()).toBe("9.18742501042e+222"); + expect((6.1103717251161e201).toString()).toBe("6.1103717251161e+201"); + expect((7.5183158306161e142).toString()).toBe("7.5183158306161e+142"); + expect((5.7016275775556e-8).toString()).toBe("5.7016275775556e-8"); + }); + test("String() coercion matches toString for large integers", () => { expect(String(1e15)).toBe("1000000000000000"); expect(String(1e20)).toBe("100000000000000000000"); @@ -112,6 +128,16 @@ describe("Number.prototype.toString", () => { expect("" + 1e20).toBe("100000000000000000000"); expect("" + 1e21).toBe("1e+21"); }); + + test("String, template, and concatenation coercion share the non-integer shortest round-trip", () => { + expect(String(0.1 + 0.2)).toBe("0.30000000000000004"); + expect(`${0.1 + 0.2}`).toBe("0.30000000000000004"); + expect("" + (0.1 + 0.2)).toBe("0.30000000000000004"); + + expect(String(Math.PI)).toBe("3.141592653589793"); + expect(`${Math.PI}`).toBe("3.141592653589793"); + expect("" + Math.PI).toBe("3.141592653589793"); + }); }); describe("Number.prototype.toString non-finite radix", () => {