From 76f1bd9df92d06dd6f18e1c45651aa89e0ac1ca6 Mon Sep 17 00:00:00 2001 From: He-Pin Date: Mon, 22 Jun 2026 16:47:55 +0800 Subject: [PATCH] fix: canonicalize double rendering Motivation: Double rendering was split across renderer implementations and depended on platform defaults. JVM emitted uppercase scientific notation, Scala.js/Wasm used different fixed/scientific thresholds, and large whole doubles could expose exact binary64 noise in some manifestation paths. Modification: Centralize JSON double spelling in RenderUtils.renderDouble. Canonicalize scientific notation to lowercase e with explicit sign and two-digit exponent padding. Use the ECMAScript [1e-6, 1e21) fixed-decimal window and BigDecimal.valueOf for out-of-Long whole doubles. Keep TOML aligned and update directional/golden tests. Restore the exact-long direct-write fast paths in Renderer, ByteRenderer, BaseByteRenderer, and BaseCharRenderer so hot JSON rendering stays GC-friendly and does not allocate a temporary String for integer-valued doubles. Result: sjsonnet now renders representative doubles consistently across JVM, JS, Wasm, and Graal without claiming Go/C++ exact-digit compatibility, while preserving the previous zero-allocation integer render path. Examples: - 0.000001 -> 0.000001 - 0.0000001 -> 1e-07 - 1e100 -> 1 followed by 100 zeros Tests: - ./mill -j1 __.checkFormat - ./mill sjsonnet.jvm[3.3.7].test - CI: JVM / Graal Native / JS / Wasm / Native builds passed before squash --- sjsonnet/src/sjsonnet/BaseByteRenderer.scala | 21 +-- sjsonnet/src/sjsonnet/BaseCharRenderer.scala | 21 +-- sjsonnet/src/sjsonnet/BaseRenderer.scala | 10 +- sjsonnet/src/sjsonnet/ByteRenderer.scala | 24 +--- sjsonnet/src/sjsonnet/Renderer.scala | 124 ++++++++++++++---- sjsonnet/src/sjsonnet/TomlRenderer.scala | 10 +- .../go_test_suite/pow6.jsonnet.golden | 2 +- .../double_scientific_notation_format.jsonnet | 30 +++++ ..._scientific_notation_format.jsonnet.golden | 1 + .../test_suite/unparse.jsonnet.golden | 2 +- .../test/src/sjsonnet/EvaluatorTests.scala | 10 +- .../test/src/sjsonnet/RendererTests.scala | 14 +- 12 files changed, 174 insertions(+), 95 deletions(-) create mode 100644 sjsonnet/test/resources/new_test_suite/double_scientific_notation_format.jsonnet create mode 100644 sjsonnet/test/resources/new_test_suite/double_scientific_notation_format.jsonnet.golden diff --git a/sjsonnet/src/sjsonnet/BaseByteRenderer.scala b/sjsonnet/src/sjsonnet/BaseByteRenderer.scala index 5389f1a8..0e43d0a4 100644 --- a/sjsonnet/src/sjsonnet/BaseByteRenderer.scala +++ b/sjsonnet/src/sjsonnet/BaseByteRenderer.scala @@ -151,20 +151,13 @@ class BaseByteRenderer[T <: java.io.OutputStream]( case Double.NegativeInfinity => visitNonNullString("-Infinity", -1) case d if java.lang.Double.isNaN(d) => visitNonNullString("NaN", -1) case d => - val i = d.toLong - if (d == i) { - if (i == 0L && java.lang.Double.doubleToRawLongBits(d) != 0L) { - visitFloat64StringParts("-0", -1, -1, index) - } else writeLongDirect(i) - } else if (d % 1 == 0) { - visitFloat64StringParts( - BigDecimal(d).setScale(0, BigDecimal.RoundingMode.HALF_EVEN).toBigInt.toString(), - -1, - -1, - index - ) - } else super.visitFloat64(d, index) - flushBuffer() + val l = d.toLong + if (RenderUtils.isExactLongDouble(d, l)) writeLongDirect(l) + else { + flushBuffer() + appendString(RenderUtils.renderDouble(d)) + flushBuffer() + } } flushByteBuilder() out diff --git a/sjsonnet/src/sjsonnet/BaseCharRenderer.scala b/sjsonnet/src/sjsonnet/BaseCharRenderer.scala index f75990fd..5ef284ce 100644 --- a/sjsonnet/src/sjsonnet/BaseCharRenderer.scala +++ b/sjsonnet/src/sjsonnet/BaseCharRenderer.scala @@ -205,20 +205,13 @@ class BaseCharRenderer[T <: upickle.core.CharOps.Output]( case Double.NegativeInfinity => visitNonNullString("-Infinity", -1) case d if java.lang.Double.isNaN(d) => visitNonNullString("NaN", -1) case d => - val i = d.toLong - if (d == i) { - if (i == 0L && java.lang.Double.doubleToRawLongBits(d) != 0L) { - visitFloat64StringParts("-0", -1, -1, index) - } else writeLongDirect(i) - } else if (d % 1 == 0) { - visitFloat64StringParts( - BigDecimal(d).setScale(0, BigDecimal.RoundingMode.HALF_EVEN).toBigInt.toString(), - -1, - -1, - index - ) - } else super.visitFloat64(d, index) - flushBuffer() + val l = d.toLong + if (RenderUtils.isExactLongDouble(d, l)) writeLongDirect(l) + else { + flushBuffer() + appendString(RenderUtils.renderDouble(d)) + flushBuffer() + } } flushCharBuilder() out diff --git a/sjsonnet/src/sjsonnet/BaseRenderer.scala b/sjsonnet/src/sjsonnet/BaseRenderer.scala index 6afe4f00..a9f898b3 100644 --- a/sjsonnet/src/sjsonnet/BaseRenderer.scala +++ b/sjsonnet/src/sjsonnet/BaseRenderer.scala @@ -101,16 +101,10 @@ class BaseRenderer[T <: java.io.Writer](out: T, indent: Int = -1, escapeUnicode: case Double.NegativeInfinity => visitString("-Infinity", -1) case d if java.lang.Double.isNaN(d) => visitString("NaN", -1) case d => - val i = d.toLong - if (d == i) { - if (i == 0L && java.lang.Double.doubleToRawLongBits(d) != 0L) - visitFloat64StringParts("-0", -1, -1, index) - else - visitFloat64StringParts(i.toString, -1, -1, index) - } else super.visitFloat64(d, index) + flushBuffer() + out.append(RenderUtils.renderDouble(d)) flushBuffer() } - out } diff --git a/sjsonnet/src/sjsonnet/ByteRenderer.scala b/sjsonnet/src/sjsonnet/ByteRenderer.scala index 1d9990ac..9fcb7d8a 100644 --- a/sjsonnet/src/sjsonnet/ByteRenderer.scala +++ b/sjsonnet/src/sjsonnet/ByteRenderer.scala @@ -46,27 +46,9 @@ class ByteRenderer(out: OutputStream = new java.io.ByteArrayOutputStream(), inde /** Render a double value directly into the byte buffer (no OutputStream return). */ @inline private def renderDouble(d: Double): Unit = { - if (java.lang.Double.compare(d, -0.0) == 0) { - appendString("-0") - return - } - val i = d.toLong - if (d == i && d >= Long.MinValue.toDouble && d < 9223372036854775808.0) { - if (i == 0L && java.lang.Double.doubleToRawLongBits(d) != 0L) { - appendString("-0") - } else { - writeLongDirect(i) - } - } else if (d % 1 == 0) { - appendString( - new java.math.BigDecimal(d) - .setScale(0, java.math.RoundingMode.HALF_EVEN) - .toBigInteger - .toString - ) - } else { - appendString(d.toString) - } + val l = d.toLong + if (RenderUtils.isExactLongDouble(d, l)) writeLongDirect(l) + else appendString(RenderUtils.renderDouble(d)) } override def flushBuffer(): Unit = { diff --git a/sjsonnet/src/sjsonnet/Renderer.scala b/sjsonnet/src/sjsonnet/Renderer.scala index f825a79c..25eca6d4 100644 --- a/sjsonnet/src/sjsonnet/Renderer.scala +++ b/sjsonnet/src/sjsonnet/Renderer.scala @@ -5,7 +5,7 @@ import java.io.{StringWriter, Writer} import upickle.core.{ArrVisitor, ObjVisitor} final class StringBuilderWriter(initialCapacity: Int = 16) extends Writer { - private[this] val builder = new java.lang.StringBuilder(initialCapacity) + private val builder = new java.lang.StringBuilder(initialCapacity) /** * Exposes the underlying [[java.lang.StringBuilder]] for callers that need direct @@ -56,27 +56,11 @@ class Renderer(out: Writer = new StringBuilderWriter(), indent: Int = -1) extends BaseCharRenderer(out, indent) { var newlineBuffered = false override def visitFloat64(d: Double, index: Int): Writer = { - flushBuffer() - if (java.lang.Double.compare(d, -0.0) == 0) { - elemBuilder.ensureLength(2) - elemBuilder.append('-') - elemBuilder.append('0') - flushCharBuilder() - return out - } - val i = d.toLong - if (d == i) { - if (i == 0L && java.lang.Double.doubleToRawLongBits(d) != 0L) { - appendString("-0") - } else { - RenderUtils.appendLong(elemBuilder, i) - } - } else if (d % 1 == 0) { - appendString( - BigDecimal(d).setScale(0, BigDecimal.RoundingMode.HALF_EVEN).toBigInt.toString() - ) - } else { - appendString(d.toString) + val l = d.toLong + if (RenderUtils.isExactLongDouble(d, l)) writeLongDirect(l) + else { + flushBuffer() + appendString(RenderUtils.renderDouble(d)) } flushCharBuilder() out @@ -437,24 +421,108 @@ object RenderUtils { // Pre-cached string representations of small integers (0-255) private val intStrCache: Array[String] = Array.tabulate(256)(_.toString) + /** Normalize platform double strings to a stable shortest-round-trip decimal spelling. */ + private[sjsonnet] def formatDoubleString(s: String): String = { + val eIdx = s.indexOf('E') + if (eIdx >= 0) { + normalizeScientificString(s, eIdx) + } else { + val eIdxLower = s.indexOf('e') + if (eIdxLower >= 0) { + normalizeScientificString(s, eIdxLower) + } else s + } + } + + private def normalizeScientificString(s: String, eIdx: Int): String = { + val rawMantissa = s.substring(0, eIdx) + val exp = s.substring(eIdx + 1) + val expValue = Integer.parseInt(exp) + val fixed = formatFixedDecimal(rawMantissa, expValue) + if (fixed != null) return fixed + + val mantissa = trimTrailingFractionZeros(rawMantissa) + val expHasSign = exp.nonEmpty && (exp.charAt(0) == '-' || exp.charAt(0) == '+') + val expSign = if (exp.nonEmpty && exp.charAt(0) == '-') "-" else "+" + val expDigits = if (expHasSign) exp.substring(1) else exp + mantissa + "e" + expSign + padExponent(expDigits) + } + + private def formatFixedDecimal(mantissa: String, exp: Int): String = { + val negative = mantissa.startsWith("-") + val unsigned = if (negative) mantissa.substring(1) else mantissa + val dotIdx = unsigned.indexOf('.') + val digitsBeforeDecimal = if (dotIdx >= 0) dotIdx else unsigned.length + val decimalPoint = digitsBeforeDecimal + exp + + // ECMAScript Number.prototype.toString fixed-notation window: values in + // [1e-6, 1e21) are spelled in fixed decimal form, and values outside that + // window use scientific notation. JSON/Java/Scala.js follow the same + // convention, so this keeps sjsonnet's canonical spelling aligned with + // the platforms users compare against. + if (decimalPoint <= -6 || decimalPoint > 21) return null + + val digits = + if (dotIdx >= 0) unsigned.substring(0, dotIdx) + unsigned.substring(dotIdx + 1) + else unsigned + + val fixed = + if (decimalPoint <= 0) "0." + ("0" * -decimalPoint) + digits + else if (decimalPoint >= digits.length) digits + ("0" * (decimalPoint - digits.length)) + else digits.substring(0, decimalPoint) + "." + digits.substring(decimalPoint) + + val trimmed = trimTrailingFractionZeros(fixed) + if (negative) "-" + trimmed else trimmed + } + + private def trimTrailingFractionZeros(s: String): String = { + val dotIdx = s.indexOf('.') + if (dotIdx < 0) return s + + var end = s.length + while (end > dotIdx + 1 && s.charAt(end - 1) == '0') end -= 1 + if (end == dotIdx + 1) s.substring(0, dotIdx) + else s.substring(0, end) + } + + private def padExponent(expDigits: String): String = + if (expDigits.length < 2) "0" + expDigits else expDigits + /** - * Custom rendering of Doubles used in rendering + * Custom rendering of Doubles used in rendering. + * + * Keeps non-integer doubles in fixed decimal form for the common JSON range, and otherwise + * normalizes scientific notation to lowercase 'e', explicit sign, and minimum 2 exponent digits. */ def renderDouble(d: Double): String = { if (java.lang.Double.compare(d, -0.0) == 0) return "-0" val l = d.toLong - if (l.toDouble == d && d >= Long.MinValue.toDouble && d < 9223372036854775808.0) { - if (l == 0L && java.lang.Double.doubleToRawLongBits(d) != 0L) "-0" - else if (l >= 0 && l < 256) intStrCache(l.toInt) + if (isExactLongDouble(d, l)) { + if (l >= 0 && l < 256) intStrCache(l.toInt) else l.toString } else if (d % 1 == 0) { - new java.math.BigDecimal(d) + // valueOf uses the canonical decimal spelling of the double. The BigDecimal(double) + // constructor exposes the exact binary64 payload, which is surprising for Jsonnet output + // and regresses values such as 1e100 into long implementation-specific decimals. + // HALF_EVEN is fine here: doubles that reach this branch have magnitude >= 2^53, where + // the ULP is >= 1, so no representable value is an exact .5 tie — any rounding mode + // yields the same integer. + java.math.BigDecimal + .valueOf(d) .setScale(0, java.math.RoundingMode.HALF_EVEN) .toBigInteger .toString - } else d.toString + } else formatDoubleString(d.toString) } + private final val LongUpperExclusive = 9223372036854775808.0 + + private[sjsonnet] def isExactLongDouble(d: Double, l: Long): Boolean = + java.lang.Double.compare(d, -0.0) != 0 && + l.toDouble == d && + d >= Long.MinValue.toDouble && + d < LongUpperExclusive + /** Maximum number of digits in a Long value (Long.MinValue = -9223372036854775808, 20 chars). */ private final val MaxLongChars = 20 diff --git a/sjsonnet/src/sjsonnet/TomlRenderer.scala b/sjsonnet/src/sjsonnet/TomlRenderer.scala index a13256ce..5d89f4d8 100644 --- a/sjsonnet/src/sjsonnet/TomlRenderer.scala +++ b/sjsonnet/src/sjsonnet/TomlRenderer.scala @@ -64,8 +64,14 @@ class TomlRenderer( case d if java.lang.Double.compare(d, -0.0) == 0 => out.write("-0") case d if math.round(d).toDouble == d => out.write(java.lang.Long.toString(d.toLong)) case d if d % 1 == 0 => - out.write(BigDecimal(d).setScale(0, BigDecimal.RoundingMode.HALF_EVEN).toBigInt.toString()) - case d => out.write(java.lang.Double.toString(d)) + out.write( + java.math.BigDecimal + .valueOf(d) + .setScale(0, java.math.RoundingMode.HALF_EVEN) + .toBigInteger + .toString + ) + case d => out.write(RenderUtils.formatDoubleString(java.lang.Double.toString(d))) } flush } diff --git a/sjsonnet/test/resources/go_test_suite/pow6.jsonnet.golden b/sjsonnet/test/resources/go_test_suite/pow6.jsonnet.golden index d9e50fcc..2cf038ef 100644 --- a/sjsonnet/test/resources/go_test_suite/pow6.jsonnet.golden +++ b/sjsonnet/test/resources/go_test_suite/pow6.jsonnet.golden @@ -1 +1 @@ -179754255558440961003405141852969422645592344115533002291982892352895948502368198119887397397672731676249753610143161261923793099747740068033402194866587265229240935848595000635008995481919609139146652429199276611754200187461105610476104737808346329282049695442933380379957601526870077618444919963779365601280 +179754255558440960000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 diff --git a/sjsonnet/test/resources/new_test_suite/double_scientific_notation_format.jsonnet b/sjsonnet/test/resources/new_test_suite/double_scientific_notation_format.jsonnet new file mode 100644 index 00000000..c837c307 --- /dev/null +++ b/sjsonnet/test/resources/new_test_suite/double_scientific_notation_format.jsonnet @@ -0,0 +1,30 @@ +// Verify double rendering uses a stable, shortest-round-trip decimal policy: +// common JSON-range values stay in fixed decimal form; smaller values use +// normalized scientific notation with lowercase 'e', explicit sign, and minimum +// 2 exponent digits. +// Digit count differences (Java Ryu vs Go %g) are documented in sync_ignore. + +local fixedThreshold = "x=" + 0.000001; // Java: 1.0E-6, JS: 0.000001 +local fixedToString = std.toString(0.000001); +local fixedJsonEx = std.manifestJsonEx({a: 0.000001}, " "); +local fracSci = "x=" + 0.0000001; // Java: 1.0E-7, JS: 1e-7 +local expected1e100 = "1" + std.join("", std.makeArray(100, function(_) "0")); + +std.assertEqual(fixedThreshold, "x=0.000001") && +std.assertEqual(fixedToString, "0.000001") && +std.assertEqual(fixedJsonEx, '{\n "a": 0.000001\n}') && +std.assertEqual("x=" + -0.000001, "x=-0.000001") && +std.assertEqual("x=" + 0.00000123, "x=0.00000123") && +std.assertEqual(fracSci, "x=1e-07") && +std.assertEqual(std.toString(0.0000001), "1e-07") && +std.assertEqual(std.manifestJsonEx({a: 0.0000001}, " "), '{\n "a": 1e-07\n}') && +std.assertEqual("x=" + -0.0000001, "x=-1e-07") && +// Large whole numbers still use integer form (matching go-jsonnet) +std.assertEqual("x=" + 1e15, "x=1000000000000000") && +std.assertEqual("x=" + 1e20, "x=100000000000000000000") && +std.assertEqual(std.manifestJson(1e100), expected1e100) && +std.assertEqual(std.manifestToml({a: 1e100}), "a = " + expected1e100) && +// Non-scientific numbers unchanged +std.assertEqual("x=" + 0.1, "x=0.1") && +std.assertEqual("x=" + 1.5, "x=1.5") && +true diff --git a/sjsonnet/test/resources/new_test_suite/double_scientific_notation_format.jsonnet.golden b/sjsonnet/test/resources/new_test_suite/double_scientific_notation_format.jsonnet.golden new file mode 100644 index 00000000..27ba77dd --- /dev/null +++ b/sjsonnet/test/resources/new_test_suite/double_scientific_notation_format.jsonnet.golden @@ -0,0 +1 @@ +true diff --git a/sjsonnet/test/resources/test_suite/unparse.jsonnet.golden b/sjsonnet/test/resources/test_suite/unparse.jsonnet.golden index d15af1e6..73dc0a95 100644 --- a/sjsonnet/test/resources/test_suite/unparse.jsonnet.golden +++ b/sjsonnet/test/resources/test_suite/unparse.jsonnet.golden @@ -6,7 +6,7 @@ "null": null, "number": 0.3333333333333333, "pos_integer": 13212381932, - "small_number": 1.0E-14, + "small_number": 1e-14, "string": "'foo\n bar\n\n\"bar\u0005\"'\t P\b\f\r\\", "string2": "\"foo\n bar\n\n'bar\u0005\"'\t P\b\f\r\\", "string3": "\"foo\\n bar\\n\\n'bar\\u0005\\\"'\\t \\u0050\\b\\f\\r\\\\", diff --git a/sjsonnet/test/src/sjsonnet/EvaluatorTests.scala b/sjsonnet/test/src/sjsonnet/EvaluatorTests.scala index 957708b3..d44a3b9c 100644 --- a/sjsonnet/test/src/sjsonnet/EvaluatorTests.scala +++ b/sjsonnet/test/src/sjsonnet/EvaluatorTests.scala @@ -1136,13 +1136,13 @@ object EvaluatorTests extends TestSuite { } test("largeIntegerDoubleMaterialization") { - // std.manifestJson(1e308) should output full decimal, not "1.0E308" - // go-jsonnet and jrsonnet output 10^308 (1 followed by 308 zeros) - val expected1e308 = "1" + "0" * 308 - eval("""std.manifestJson(1e308)""") ==> ujson.Str(expected1e308) - // 1e100 should also work + // Large whole numbers that don't fit in Long should render as decimal integers, + // without Java scientific notation or exact binary64 noise. val expected1e100 = "1" + "0" * 100 + val expected1e308 = "1" + "0" * 308 eval("""std.manifestJson(1e100)""") ==> ujson.Str(expected1e100) + eval("""std.manifestJson(1e308)""") ==> ujson.Str(expected1e308) + eval("""std.manifestToml({a: 1e100})""") ==> ujson.Str("a = " + expected1e100) } test("assertBooleanTypeCheck") { diff --git a/sjsonnet/test/src/sjsonnet/RendererTests.scala b/sjsonnet/test/src/sjsonnet/RendererTests.scala index ee69e771..140e610e 100644 --- a/sjsonnet/test/src/sjsonnet/RendererTests.scala +++ b/sjsonnet/test/src/sjsonnet/RendererTests.scala @@ -1,6 +1,6 @@ package sjsonnet -import java.io.ByteArrayOutputStream +import java.io.{ByteArrayOutputStream, StringWriter} import utest._ object RendererTests extends TestSuite { @@ -66,6 +66,18 @@ object RendererTests extends TestSuite { ujson.transform(ujson.Num(1e15), new Renderer()).toString ==> "1000000000000000" } + test("baseRendererSpecialFloat64Values") { + def render(d: Double): String = { + val out = new StringWriter + new BaseRenderer(out).visitFloat64(d, -1) + out.toString + } + + render(Double.PositiveInfinity) ==> "\"Infinity\"" + render(Double.NegativeInfinity) ==> "\"-Infinity\"" + render(Double.NaN) ==> "\"NaN\"" + } + test("byteRendererFallbackPreservesCycleContext") { val interpreter = new Interpreter( Map(),