diff --git a/CHANGELOG.md b/CHANGELOG.md index 9ca8bd1c..b497a825 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -27,6 +27,11 @@ Next - Compares two items structurally: encoding width, definite-vs-indefinite length, chunk boundaries, and map entry order all count - Runs in O(n) time in the encoded byte size with no additional allocations - See also: #96 +- BREAKING: [Fix NaN encoding in `cbor_encode_half` to preserve sign and payload bits](https://github.com/PJK/libcbor/pull/412) + - Previously, all NaN values were encoded as `0x7E00` (positive quiet NaN, zero payload). Now the sign bit and the top 10 mantissa bits are preserved in the half-precision encoding. + - `_cbor_decode_half` now reconstructs the NaN bit pattern faithfully, enabling encode/decode round-trips. Previously it always returned the C `NAN` constant. + - Very small normal floats that previously rounded to `+0` now round to `±0` depending on their sign. + - Clients that relied on all NaNs normalising to `0x7E00` will see different output. See #215. 0.13.0 (2025-08-30) --------------------- diff --git a/src/cbor/encoding.c b/src/cbor/encoding.c index 7629a16f..53e27e41 100644 --- a/src/cbor/encoding.c +++ b/src/cbor/encoding.c @@ -139,8 +139,17 @@ size_t cbor_encode_half(float value, unsigned char* buffer, val & 0x7FFFFFu; /* 0b0000_0000_0111_1111_1111_1111_1111_1111 */ if (exp == 0xFF) { /* Infinity or NaNs */ if (isnan(value)) { - // Note: Values of signaling NaNs are discarded. See `cbor_encode_single`. - res = (uint16_t)0x007e00; + /* Preserve the sign bit and NaN payload. The top 10 bits of the 23-bit + * single-precision mantissa map directly onto the 10-bit half-precision + * mantissa. If the payload fits entirely in the bottom 13 bits (which + * cannot be represented in half precision), fall back to a quiet NaN + * to avoid accidentally producing an infinity encoding (mantissa == 0). + * Note: signaling NaN payloads are preserved on a best-effort basis; + * some CPUs canonicalize them to quiet NaNs when loaded into registers. + * See https://github.com/PJK/libcbor/issues/215 */ + uint16_t half_mant = (uint16_t)(mant >> 13u); + if (half_mant == 0) half_mant = 0x0200u; /* quiet NaN fallback */ + res = (uint16_t)((val & 0x80000000u) >> 16u | 0x7C00u | half_mant); } else { // If the mantissa is non-zero, we have a NaN, but those are handled // above. See @@ -156,10 +165,10 @@ size_t cbor_encode_half(float value, unsigned char* buffer, // Now we know that 2^exp <= 0 logically if (logical_exp < -24) { - /* No unambiguous representation exists, this float is not a half float - and is too small to be represented using a half, round off to zero. - Consistent with the reference implementation. */ - res = 0; + /* Too small to represent even as a half-precision subnormal; round to + * zero. The sign bit is preserved so that small negative values round + * to negative zero rather than positive zero. */ + res = (uint16_t)((val & 0x80000000u) >> 16u); } else if (logical_exp < -14) { /* Offset the remaining decimal places by shifting the significand, the value is lost. This is an implementation decision that works around the diff --git a/src/cbor/internal/loaders.c b/src/cbor/internal/loaders.c index b89cf722..2a5b4f07 100644 --- a/src/cbor/internal/loaders.c +++ b/src/cbor/internal/loaders.c @@ -56,13 +56,24 @@ float _cbor_decode_half(unsigned char* halfp) { int half = (halfp[0] << 8) + halfp[1]; int exp = (half >> 10) & 0x1f; int mant = half & 0x3ff; + if (exp == 31 && mant != 0) { + /* NaN: reconstruct as a single-precision NaN, preserving the sign bit and + * the 10-bit half-precision payload by placing it in the top 10 bits of + * the 23-bit single-precision mantissa. This ensures round-trip fidelity + * with cbor_encode_half's NaN encoding. */ + union _cbor_float_helper helper = { + .as_uint = ((uint32_t)(half & 0x8000) << 16) | /* sign */ + 0x7F800000u | /* exponent (all 1s) */ + ((uint32_t)mant << 13)}; /* payload */ + return helper.as_float; + } double val; if (exp == 0) val = ldexp(mant, -24); else if (exp != 31) val = ldexp(mant + 1024, exp - 25); else - val = mant == 0 ? INFINITY : NAN; + val = INFINITY; /* exp == 31, mant == 0: the NaN case is handled above */ return (float)(half & 0x8000 ? -val : val); } diff --git a/test/cbor_serialize_test.c b/test/cbor_serialize_test.c index f84c7241..4dfd58f4 100644 --- a/test/cbor_serialize_test.c +++ b/test/cbor_serialize_test.c @@ -487,7 +487,12 @@ static void test_serialize_half(void** _state _CBOR_UNUSED) { cbor_set_float2(item, NAN); assert_size_equal(3, cbor_serialize(item, buffer, 512)); - assert_memory_equal(buffer, ((unsigned char[]){0xF9, 0x7E, 0x00}), 3); + /* The C NAN constant has platform-specific bits (e.g. MIPS legacy NaN). + * Check structural validity: CBOR half marker, all exponent bits set, + * non-zero mantissa (distinguishes NaN from Infinity). */ + assert_int_equal(buffer[0], 0xF9); + assert_true((buffer[1] & 0x7C) == 0x7C); + assert_true((buffer[1] & 0x03) || buffer[2]); assert_size_equal(cbor_serialized_size(item), 3); cbor_decref(&item); } diff --git a/test/float_ctrl_encoders_test.c b/test/float_ctrl_encoders_test.c index f7d30247..c55d468d 100644 --- a/test/float_ctrl_encoders_test.c +++ b/test/float_ctrl_encoders_test.c @@ -6,6 +6,7 @@ */ #include +#include #include "assertions.h" #include "cbor.h" @@ -99,11 +100,15 @@ static void test_half(void** _state _CBOR_UNUSED) { assert_half_float_codec_identity(); /* Smaller than the smallest and even the magnitude cannot be represented, - round off to zero */ + round off to zero. Sign bit is preserved. */ assert_size_equal(3, cbor_encode_half(1e-25f, buffer, 512)); assert_memory_equal(buffer, ((unsigned char[]){0xF9, 0x00, 0x00}), 3); assert_half_float_codec_identity(); + assert_size_equal(3, cbor_encode_half(-1e-25f, buffer, 512)); + assert_memory_equal(buffer, ((unsigned char[]){0xF9, 0x80, 0x00}), 3); + assert_half_float_codec_identity(); + assert_size_equal(3, cbor_encode_half(1.1920928955078125e-7, buffer, 512)); assert_memory_equal(buffer, ((unsigned char[]){0xF9, 0x00, 0x02}), 3); assert_half_float_codec_identity(); @@ -117,13 +122,51 @@ static void test_half(void** _state _CBOR_UNUSED) { assert_half_float_codec_identity(); } +/* Check that buffer holds a valid half-precision CBOR NaN with the expected + * sign (0 = positive, non-zero = negative), then verify encode/decode + * round-trip. The exact payload bits are not checked because the C NAN + * constant has platform-specific bit patterns (e.g. MIPS legacy NaN). */ +static void assert_half_nan_and_roundtrip(int negative) { + /* CBOR additional info 0xF9 = half-precision float */ + assert_int_equal(buffer[0], 0xF9); + /* Sign bit */ + if (negative) { + assert_true(buffer[1] & 0x80); + } else { + assert_false(buffer[1] & 0x80); + } + /* All five exponent bits must be set (= 0x1F) for NaN/Inf */ + assert_true((buffer[1] & 0x7C) == 0x7C); + /* Mantissa must be non-zero to distinguish NaN from Infinity */ + assert_true((buffer[1] & 0x03) || buffer[2]); + assert_half_float_codec_identity(); +} + static void test_half_special(void** _state _CBOR_UNUSED) { + /* The C NAN constant has platform-specific bits (e.g. MIPS legacy NaN + * uses bit 22 = 0 for quiet NaN, opposite of IEEE 754-2008). We only + * check that the result is a structurally valid half-precision NaN and + * that it round-trips correctly. */ assert_size_equal(3, cbor_encode_half(NAN, buffer, 512)); - assert_memory_equal(buffer, ((unsigned char[]){0xF9, 0x7E, 0x00}), 3); - assert_half_float_codec_identity(); + assert_half_nan_and_roundtrip(0); assert_size_equal(3, cbor_encode_half(nanf("2"), buffer, 512)); - assert_memory_equal(buffer, ((unsigned char[]){0xF9, 0x7E, 0x00}), 3); + assert_half_nan_and_roundtrip(0); + + /* Negative NaN: sign bit must be preserved. */ + assert_size_equal(3, cbor_encode_half(-NAN, buffer, 512)); + assert_half_nan_and_roundtrip(1); + + /* NaN with payload bits in the top 10 of the mantissa: payload is + * preserved in half precision. Bit pattern 0x7FC02000 has the IEEE quiet + * bit (bit 22) and bit 13 set; after >> 13 the half mantissa is 0x201. + * This specific bit pattern is used (rather than a C NaN constant) to get + * deterministic results across platforms. */ + float nan_with_payload; + uint32_t nan_bits = 0x7FC02000u; + memcpy(&nan_with_payload, &nan_bits, sizeof(nan_with_payload)); + assert_size_equal(3, cbor_encode_half(nan_with_payload, buffer, 512)); + assert_memory_equal(buffer, ((unsigned char[]){0xF9, 0x7E, 0x01}), 3); assert_half_float_codec_identity(); }