From d28d4f254f717259e9b5f146d9091579d1ed097f Mon Sep 17 00:00:00 2001 From: PJK Date: Sun, 22 Mar 2026 21:25:56 +0100 Subject: [PATCH 1/3] Preserve NaN sign+payload and tiny-float sign in cbor_encode_half cbor_encode_half previously encoded all NaN values as 0x7E00, losing the sign bit and up to 10 payload bits. Negative tiny floats (too small for half precision) were also rounded to +0 instead of -0. - cbor_encode_half: map the top 10 bits of the single-precision mantissa to the half-precision mantissa, preserving both the quiet/signaling bit and the payload. Fall back to a quiet NaN only when the payload fits entirely in the bottom 13 bits (not representable in half precision). Preserve the sign bit in the tiny-float-to-zero rounding path. - _cbor_decode_half: reconstruct the single-precision float bit pattern from the half-precision NaN bits, enabling round-trip fidelity. Fixes https://github.com/PJK/libcbor/issues/215 Co-Authored-By: Claude Sonnet 4.6 --- src/cbor/encoding.c | 21 +++++++++++++++------ src/cbor/internal/loaders.c | 13 ++++++++++++- test/float_ctrl_encoders_test.c | 28 +++++++++++++++++++++++++++- 3 files changed, 54 insertions(+), 8 deletions(-) diff --git a/src/cbor/encoding.c b/src/cbor/encoding.c index 7629a16f..53e27e41 100644 --- a/src/cbor/encoding.c +++ b/src/cbor/encoding.c @@ -139,8 +139,17 @@ size_t cbor_encode_half(float value, unsigned char* buffer, val & 0x7FFFFFu; /* 0b0000_0000_0111_1111_1111_1111_1111_1111 */ if (exp == 0xFF) { /* Infinity or NaNs */ if (isnan(value)) { - // Note: Values of signaling NaNs are discarded. See `cbor_encode_single`. - res = (uint16_t)0x007e00; + /* Preserve the sign bit and NaN payload. The top 10 bits of the 23-bit + * single-precision mantissa map directly onto the 10-bit half-precision + * mantissa. If the payload fits entirely in the bottom 13 bits (which + * cannot be represented in half precision), fall back to a quiet NaN + * to avoid accidentally producing an infinity encoding (mantissa == 0). + * Note: signaling NaN payloads are preserved on a best-effort basis; + * some CPUs canonicalize them to quiet NaNs when loaded into registers. + * See https://github.com/PJK/libcbor/issues/215 */ + uint16_t half_mant = (uint16_t)(mant >> 13u); + if (half_mant == 0) half_mant = 0x0200u; /* quiet NaN fallback */ + res = (uint16_t)((val & 0x80000000u) >> 16u | 0x7C00u | half_mant); } else { // If the mantissa is non-zero, we have a NaN, but those are handled // above. See @@ -156,10 +165,10 @@ size_t cbor_encode_half(float value, unsigned char* buffer, // Now we know that 2^exp <= 0 logically if (logical_exp < -24) { - /* No unambiguous representation exists, this float is not a half float - and is too small to be represented using a half, round off to zero. - Consistent with the reference implementation. */ - res = 0; + /* Too small to represent even as a half-precision subnormal; round to + * zero. The sign bit is preserved so that small negative values round + * to negative zero rather than positive zero. */ + res = (uint16_t)((val & 0x80000000u) >> 16u); } else if (logical_exp < -14) { /* Offset the remaining decimal places by shifting the significand, the value is lost. This is an implementation decision that works around the diff --git a/src/cbor/internal/loaders.c b/src/cbor/internal/loaders.c index b89cf722..2a5b4f07 100644 --- a/src/cbor/internal/loaders.c +++ b/src/cbor/internal/loaders.c @@ -56,13 +56,24 @@ float _cbor_decode_half(unsigned char* halfp) { int half = (halfp[0] << 8) + halfp[1]; int exp = (half >> 10) & 0x1f; int mant = half & 0x3ff; + if (exp == 31 && mant != 0) { + /* NaN: reconstruct as a single-precision NaN, preserving the sign bit and + * the 10-bit half-precision payload by placing it in the top 10 bits of + * the 23-bit single-precision mantissa. This ensures round-trip fidelity + * with cbor_encode_half's NaN encoding. */ + union _cbor_float_helper helper = { + .as_uint = ((uint32_t)(half & 0x8000) << 16) | /* sign */ + 0x7F800000u | /* exponent (all 1s) */ + ((uint32_t)mant << 13)}; /* payload */ + return helper.as_float; + } double val; if (exp == 0) val = ldexp(mant, -24); else if (exp != 31) val = ldexp(mant + 1024, exp - 25); else - val = mant == 0 ? INFINITY : NAN; + val = INFINITY; /* exp == 31, mant == 0: the NaN case is handled above */ return (float)(half & 0x8000 ? -val : val); } diff --git a/test/float_ctrl_encoders_test.c b/test/float_ctrl_encoders_test.c index f7d30247..923ef5bc 100644 --- a/test/float_ctrl_encoders_test.c +++ b/test/float_ctrl_encoders_test.c @@ -6,6 +6,7 @@ */ #include +#include #include "assertions.h" #include "cbor.h" @@ -99,11 +100,15 @@ static void test_half(void** _state _CBOR_UNUSED) { assert_half_float_codec_identity(); /* Smaller than the smallest and even the magnitude cannot be represented, - round off to zero */ + round off to zero. Sign bit is preserved. */ assert_size_equal(3, cbor_encode_half(1e-25f, buffer, 512)); assert_memory_equal(buffer, ((unsigned char[]){0xF9, 0x00, 0x00}), 3); assert_half_float_codec_identity(); + assert_size_equal(3, cbor_encode_half(-1e-25f, buffer, 512)); + assert_memory_equal(buffer, ((unsigned char[]){0xF9, 0x80, 0x00}), 3); + assert_half_float_codec_identity(); + assert_size_equal(3, cbor_encode_half(1.1920928955078125e-7, buffer, 512)); assert_memory_equal(buffer, ((unsigned char[]){0xF9, 0x00, 0x02}), 3); assert_half_float_codec_identity(); @@ -118,13 +123,34 @@ static void test_half(void** _state _CBOR_UNUSED) { } static void test_half_special(void** _state _CBOR_UNUSED) { + /* NAN is typically a quiet NaN with zero payload; encodes to the canonical + * half-precision quiet NaN. */ assert_size_equal(3, cbor_encode_half(NAN, buffer, 512)); assert_memory_equal(buffer, ((unsigned char[]){0xF9, 0x7E, 0x00}), 3); assert_half_float_codec_identity(); + /* nanf("2") has payload 2 (in the low bits of the mantissa), which falls + * entirely in the bottom 13 bits that cannot be represented in half + * precision. The top 10 mantissa bits are 10_0000_0000 (just the quiet + * bit), so the encoded form is the same as NAN. */ assert_size_equal(3, cbor_encode_half(nanf("2"), buffer, 512)); assert_memory_equal(buffer, ((unsigned char[]){0xF9, 0x7E, 0x00}), 3); assert_half_float_codec_identity(); + + /* Negative NaN: sign bit must be preserved. */ + assert_size_equal(3, cbor_encode_half(-NAN, buffer, 512)); + assert_memory_equal(buffer, ((unsigned char[]){0xF9, 0xFE, 0x00}), 3); + assert_half_float_codec_identity(); + + /* NaN with payload bits in the top 10 of the mantissa: payload is + * preserved in half precision. Bit pattern 0x7FC02000 has the quiet bit + * (bit 22) and bit 13 set; after >> 13 the half mantissa is 0x201. */ + float nan_with_payload; + uint32_t nan_bits = 0x7FC02000u; + memcpy(&nan_with_payload, &nan_bits, sizeof(nan_with_payload)); + assert_size_equal(3, cbor_encode_half(nan_with_payload, buffer, 512)); + assert_memory_equal(buffer, ((unsigned char[]){0xF9, 0x7E, 0x01}), 3); + assert_half_float_codec_identity(); } static void test_half_infinity(void** _state _CBOR_UNUSED) { From 5d961226cee2409b19c44b034dbf87c4a464e45e Mon Sep 17 00:00:00 2001 From: PJK Date: Sun, 22 Mar 2026 21:28:04 +0100 Subject: [PATCH 2/3] Add CHANGELOG entry for NaN half-encoding fix Co-Authored-By: Claude Sonnet 4.6 --- CHANGELOG.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9ca8bd1c..b497a825 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -27,6 +27,11 @@ Next - Compares two items structurally: encoding width, definite-vs-indefinite length, chunk boundaries, and map entry order all count - Runs in O(n) time in the encoded byte size with no additional allocations - See also: #96 +- BREAKING: [Fix NaN encoding in `cbor_encode_half` to preserve sign and payload bits](https://github.com/PJK/libcbor/pull/412) + - Previously, all NaN values were encoded as `0x7E00` (positive quiet NaN, zero payload). Now the sign bit and the top 10 mantissa bits are preserved in the half-precision encoding. + - `_cbor_decode_half` now reconstructs the NaN bit pattern faithfully, enabling encode/decode round-trips. Previously it always returned the C `NAN` constant. + - Very small normal floats that previously rounded to `+0` now round to `±0` depending on their sign. + - Clients that relied on all NaNs normalising to `0x7E00` will see different output. See #215. 0.13.0 (2025-08-30) --------------------- From 184d5db75b4f21290a2929c5a171d94e6dfa0937 Mon Sep 17 00:00:00 2001 From: PJK Date: Sun, 22 Mar 2026 22:03:42 +0100 Subject: [PATCH 3/3] Fix NaN tests for MIPS legacy NaN convention MIPS legacy uses bit 22 = 0 for quiet NaN (opposite of IEEE 754-2008), so the C NAN constant has a different bit pattern there. Tests that asserted cbor_encode_half(NAN) == {0xF9, 0x7E, 0x00} were relying on the old behavior of normalizing all NaN values to 0x7E00. Replace specific-byte assertions on NAN, -NAN, and nanf("2") with structural checks (valid CBOR half marker, exp=0x1F, non-zero mantissa) plus the existing encode/decode round-trip. Keep a deterministic specific-byte assertion for an explicit bit pattern (0x7FC02000) constructed via memcpy, which is independent of the platform's NAN. Co-Authored-By: Claude Sonnet 4.6 --- test/cbor_serialize_test.c | 7 ++++- test/float_ctrl_encoders_test.c | 45 +++++++++++++++++++++++---------- 2 files changed, 37 insertions(+), 15 deletions(-) diff --git a/test/cbor_serialize_test.c b/test/cbor_serialize_test.c index f84c7241..4dfd58f4 100644 --- a/test/cbor_serialize_test.c +++ b/test/cbor_serialize_test.c @@ -487,7 +487,12 @@ static void test_serialize_half(void** _state _CBOR_UNUSED) { cbor_set_float2(item, NAN); assert_size_equal(3, cbor_serialize(item, buffer, 512)); - assert_memory_equal(buffer, ((unsigned char[]){0xF9, 0x7E, 0x00}), 3); + /* The C NAN constant has platform-specific bits (e.g. MIPS legacy NaN). + * Check structural validity: CBOR half marker, all exponent bits set, + * non-zero mantissa (distinguishes NaN from Infinity). */ + assert_int_equal(buffer[0], 0xF9); + assert_true((buffer[1] & 0x7C) == 0x7C); + assert_true((buffer[1] & 0x03) || buffer[2]); assert_size_equal(cbor_serialized_size(item), 3); cbor_decref(&item); } diff --git a/test/float_ctrl_encoders_test.c b/test/float_ctrl_encoders_test.c index 923ef5bc..c55d468d 100644 --- a/test/float_ctrl_encoders_test.c +++ b/test/float_ctrl_encoders_test.c @@ -122,29 +122,46 @@ static void test_half(void** _state _CBOR_UNUSED) { assert_half_float_codec_identity(); } +/* Check that buffer holds a valid half-precision CBOR NaN with the expected + * sign (0 = positive, non-zero = negative), then verify encode/decode + * round-trip. The exact payload bits are not checked because the C NAN + * constant has platform-specific bit patterns (e.g. MIPS legacy NaN). */ +static void assert_half_nan_and_roundtrip(int negative) { + /* CBOR additional info 0xF9 = half-precision float */ + assert_int_equal(buffer[0], 0xF9); + /* Sign bit */ + if (negative) { + assert_true(buffer[1] & 0x80); + } else { + assert_false(buffer[1] & 0x80); + } + /* All five exponent bits must be set (= 0x1F) for NaN/Inf */ + assert_true((buffer[1] & 0x7C) == 0x7C); + /* Mantissa must be non-zero to distinguish NaN from Infinity */ + assert_true((buffer[1] & 0x03) || buffer[2]); + assert_half_float_codec_identity(); +} + static void test_half_special(void** _state _CBOR_UNUSED) { - /* NAN is typically a quiet NaN with zero payload; encodes to the canonical - * half-precision quiet NaN. */ + /* The C NAN constant has platform-specific bits (e.g. MIPS legacy NaN + * uses bit 22 = 0 for quiet NaN, opposite of IEEE 754-2008). We only + * check that the result is a structurally valid half-precision NaN and + * that it round-trips correctly. */ assert_size_equal(3, cbor_encode_half(NAN, buffer, 512)); - assert_memory_equal(buffer, ((unsigned char[]){0xF9, 0x7E, 0x00}), 3); - assert_half_float_codec_identity(); + assert_half_nan_and_roundtrip(0); - /* nanf("2") has payload 2 (in the low bits of the mantissa), which falls - * entirely in the bottom 13 bits that cannot be represented in half - * precision. The top 10 mantissa bits are 10_0000_0000 (just the quiet - * bit), so the encoded form is the same as NAN. */ assert_size_equal(3, cbor_encode_half(nanf("2"), buffer, 512)); - assert_memory_equal(buffer, ((unsigned char[]){0xF9, 0x7E, 0x00}), 3); - assert_half_float_codec_identity(); + assert_half_nan_and_roundtrip(0); /* Negative NaN: sign bit must be preserved. */ assert_size_equal(3, cbor_encode_half(-NAN, buffer, 512)); - assert_memory_equal(buffer, ((unsigned char[]){0xF9, 0xFE, 0x00}), 3); - assert_half_float_codec_identity(); + assert_half_nan_and_roundtrip(1); /* NaN with payload bits in the top 10 of the mantissa: payload is - * preserved in half precision. Bit pattern 0x7FC02000 has the quiet bit - * (bit 22) and bit 13 set; after >> 13 the half mantissa is 0x201. */ + * preserved in half precision. Bit pattern 0x7FC02000 has the IEEE quiet + * bit (bit 22) and bit 13 set; after >> 13 the half mantissa is 0x201. + * This specific bit pattern is used (rather than a C NaN constant) to get + * deterministic results across platforms. */ float nan_with_payload; uint32_t nan_bits = 0x7FC02000u; memcpy(&nan_with_payload, &nan_bits, sizeof(nan_with_payload));