From d28d4f254f717259e9b5f146d9091579d1ed097f Mon Sep 17 00:00:00 2001
From: PJK <me@pavelkalvoda.com>
Date: Sun, 22 Mar 2026 21:25:56 +0100
Subject: [PATCH 1/3] Preserve NaN sign+payload and tiny-float sign in
 cbor_encode_half

cbor_encode_half previously encoded all NaN values as 0x7E00, losing
the sign bit and up to 10 payload bits. Negative tiny floats (too small
for half precision) were also rounded to +0 instead of -0.

- cbor_encode_half: map the top 10 bits of the single-precision mantissa
  to the half-precision mantissa, preserving both the quiet/signaling bit
  and the payload. Fall back to a quiet NaN only when the payload fits
  entirely in the bottom 13 bits (not representable in half precision).
  Preserve the sign bit in the tiny-float-to-zero rounding path.
- _cbor_decode_half: reconstruct the single-precision float bit pattern
  from the half-precision NaN bits, enabling round-trip fidelity.

Fixes https://github.com/PJK/libcbor/issues/215

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 src/cbor/encoding.c             | 21 +++++++++++++++------
 src/cbor/internal/loaders.c     | 13 ++++++++++++-
 test/float_ctrl_encoders_test.c | 28 +++++++++++++++++++++++++++-
 3 files changed, 54 insertions(+), 8 deletions(-)

diff --git a/src/cbor/encoding.c b/src/cbor/encoding.c
index 7629a16f..53e27e41 100644
--- a/src/cbor/encoding.c
+++ b/src/cbor/encoding.c
@@ -139,8 +139,17 @@ size_t cbor_encode_half(float value, unsigned char* buffer,
       val & 0x7FFFFFu; /* 0b0000_0000_0111_1111_1111_1111_1111_1111 */
   if (exp == 0xFF) {   /* Infinity or NaNs */
     if (isnan(value)) {
-      // Note: Values of signaling NaNs are discarded. See `cbor_encode_single`.
-      res = (uint16_t)0x007e00;
+      /* Preserve the sign bit and NaN payload. The top 10 bits of the 23-bit
+       * single-precision mantissa map directly onto the 10-bit half-precision
+       * mantissa. If the payload fits entirely in the bottom 13 bits (which
+       * cannot be represented in half precision), fall back to a quiet NaN
+       * to avoid accidentally producing an infinity encoding (mantissa == 0).
+       * Note: signaling NaN payloads are preserved on a best-effort basis;
+       * some CPUs canonicalize them to quiet NaNs when loaded into registers.
+       * See https://github.com/PJK/libcbor/issues/215 */
+      uint16_t half_mant = (uint16_t)(mant >> 13u);
+      if (half_mant == 0) half_mant = 0x0200u; /* quiet NaN fallback */
+      res = (uint16_t)((val & 0x80000000u) >> 16u | 0x7C00u | half_mant);
     } else {
       // If the mantissa is non-zero, we have a NaN, but those are handled
       // above. See
@@ -156,10 +165,10 @@ size_t cbor_encode_half(float value, unsigned char* buffer,
 
     // Now we know that 2^exp <= 0 logically
     if (logical_exp < -24) {
-      /* No unambiguous representation exists, this float is not a half float
-         and is too small to be represented using a half, round off to zero.
-         Consistent with the reference implementation. */
-      res = 0;
+      /* Too small to represent even as a half-precision subnormal; round to
+       * zero. The sign bit is preserved so that small negative values round
+       * to negative zero rather than positive zero. */
+      res = (uint16_t)((val & 0x80000000u) >> 16u);
     } else if (logical_exp < -14) {
       /* Offset the remaining decimal places by shifting the significand, the
          value is lost. This is an implementation decision that works around the
diff --git a/src/cbor/internal/loaders.c b/src/cbor/internal/loaders.c
index b89cf722..2a5b4f07 100644
--- a/src/cbor/internal/loaders.c
+++ b/src/cbor/internal/loaders.c
@@ -56,13 +56,24 @@ float _cbor_decode_half(unsigned char* halfp) {
   int half = (halfp[0] << 8) + halfp[1];
   int exp = (half >> 10) & 0x1f;
   int mant = half & 0x3ff;
+  if (exp == 31 && mant != 0) {
+    /* NaN: reconstruct as a single-precision NaN, preserving the sign bit and
+     * the 10-bit half-precision payload by placing it in the top 10 bits of
+     * the 23-bit single-precision mantissa. This ensures round-trip fidelity
+     * with cbor_encode_half's NaN encoding. */
+    union _cbor_float_helper helper = {
+        .as_uint = ((uint32_t)(half & 0x8000) << 16) | /* sign */
+                   0x7F800000u |                       /* exponent (all 1s) */
+                   ((uint32_t)mant << 13)};            /* payload */
+    return helper.as_float;
+  }
   double val;
   if (exp == 0)
     val = ldexp(mant, -24);
   else if (exp != 31)
     val = ldexp(mant + 1024, exp - 25);
   else
-    val = mant == 0 ? INFINITY : NAN;
+    val = INFINITY; /* exp == 31, mant == 0: the NaN case is handled above */
   return (float)(half & 0x8000 ? -val : val);
 }
 
diff --git a/test/float_ctrl_encoders_test.c b/test/float_ctrl_encoders_test.c
index f7d30247..923ef5bc 100644
--- a/test/float_ctrl_encoders_test.c
+++ b/test/float_ctrl_encoders_test.c
@@ -6,6 +6,7 @@
  */
 
 #include <math.h>
+#include <string.h>
 #include "assertions.h"
 #include "cbor.h"
 
@@ -99,11 +100,15 @@ static void test_half(void** _state _CBOR_UNUSED) {
   assert_half_float_codec_identity();
 
   /* Smaller than the smallest and even the magnitude cannot be represented,
-     round off to zero */
+     round off to zero. Sign bit is preserved. */
   assert_size_equal(3, cbor_encode_half(1e-25f, buffer, 512));
   assert_memory_equal(buffer, ((unsigned char[]){0xF9, 0x00, 0x00}), 3);
   assert_half_float_codec_identity();
 
+  assert_size_equal(3, cbor_encode_half(-1e-25f, buffer, 512));
+  assert_memory_equal(buffer, ((unsigned char[]){0xF9, 0x80, 0x00}), 3);
+  assert_half_float_codec_identity();
+
   assert_size_equal(3, cbor_encode_half(1.1920928955078125e-7, buffer, 512));
   assert_memory_equal(buffer, ((unsigned char[]){0xF9, 0x00, 0x02}), 3);
   assert_half_float_codec_identity();
@@ -118,13 +123,34 @@ static void test_half(void** _state _CBOR_UNUSED) {
 }
 
 static void test_half_special(void** _state _CBOR_UNUSED) {
+  /* NAN is typically a quiet NaN with zero payload; encodes to the canonical
+   * half-precision quiet NaN. */
   assert_size_equal(3, cbor_encode_half(NAN, buffer, 512));
   assert_memory_equal(buffer, ((unsigned char[]){0xF9, 0x7E, 0x00}), 3);
   assert_half_float_codec_identity();
 
+  /* nanf("2") has payload 2 (in the low bits of the mantissa), which falls
+   * entirely in the bottom 13 bits that cannot be represented in half
+   * precision. The top 10 mantissa bits are 10_0000_0000 (just the quiet
+   * bit), so the encoded form is the same as NAN. */
   assert_size_equal(3, cbor_encode_half(nanf("2"), buffer, 512));
   assert_memory_equal(buffer, ((unsigned char[]){0xF9, 0x7E, 0x00}), 3);
   assert_half_float_codec_identity();
+
+  /* Negative NaN: sign bit must be preserved. */
+  assert_size_equal(3, cbor_encode_half(-NAN, buffer, 512));
+  assert_memory_equal(buffer, ((unsigned char[]){0xF9, 0xFE, 0x00}), 3);
+  assert_half_float_codec_identity();
+
+  /* NaN with payload bits in the top 10 of the mantissa: payload is
+   * preserved in half precision. Bit pattern 0x7FC02000 has the quiet bit
+   * (bit 22) and bit 13 set; after >> 13 the half mantissa is 0x201. */
+  float nan_with_payload;
+  uint32_t nan_bits = 0x7FC02000u;
+  memcpy(&nan_with_payload, &nan_bits, sizeof(nan_with_payload));
+  assert_size_equal(3, cbor_encode_half(nan_with_payload, buffer, 512));
+  assert_memory_equal(buffer, ((unsigned char[]){0xF9, 0x7E, 0x01}), 3);
+  assert_half_float_codec_identity();
 }
 
 static void test_half_infinity(void** _state _CBOR_UNUSED) {

From 5d961226cee2409b19c44b034dbf87c4a464e45e Mon Sep 17 00:00:00 2001
From: PJK <me@pavelkalvoda.com>
Date: Sun, 22 Mar 2026 21:28:04 +0100
Subject: [PATCH 2/3] Add CHANGELOG entry for NaN half-encoding fix

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 CHANGELOG.md | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 9ca8bd1c..b497a825 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -27,6 +27,11 @@ Next
   - Compares two items structurally: encoding width, definite-vs-indefinite length, chunk boundaries, and map entry order all count
   - Runs in O(n) time in the encoded byte size with no additional allocations
   - See also: #96
+- BREAKING: [Fix NaN encoding in `cbor_encode_half` to preserve sign and payload bits](https://github.com/PJK/libcbor/pull/412)
+  - Previously, all NaN values were encoded as `0x7E00` (positive quiet NaN, zero payload). Now the sign bit and the top 10 mantissa bits are preserved in the half-precision encoding.
+  - `_cbor_decode_half` now reconstructs the NaN bit pattern faithfully, enabling encode/decode round-trips. Previously it always returned the C `NAN` constant.
+  - Very small normal floats that previously rounded to `+0` now round to `±0` depending on their sign.
+  - Clients that relied on all NaNs normalising to `0x7E00` will see different output. See #215.
 
 0.13.0 (2025-08-30)
 ---------------------

From 184d5db75b4f21290a2929c5a171d94e6dfa0937 Mon Sep 17 00:00:00 2001
From: PJK <me@pavelkalvoda.com>
Date: Sun, 22 Mar 2026 22:03:42 +0100
Subject: [PATCH 3/3] Fix NaN tests for MIPS legacy NaN convention

MIPS legacy uses bit 22 = 0 for quiet NaN (opposite of IEEE 754-2008),
so the C NAN constant has a different bit pattern there. Tests that
asserted cbor_encode_half(NAN) == {0xF9, 0x7E, 0x00} were relying on
the old behavior of normalizing all NaN values to 0x7E00.

Replace specific-byte assertions on NAN, -NAN, and nanf("2") with
structural checks (valid CBOR half marker, exp=0x1F, non-zero mantissa)
plus the existing encode/decode round-trip. Keep a deterministic
specific-byte assertion for an explicit bit pattern (0x7FC02000)
constructed via memcpy, which is independent of the platform's NAN.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 test/cbor_serialize_test.c      |  7 ++++-
 test/float_ctrl_encoders_test.c | 45 +++++++++++++++++++++++----------
 2 files changed, 37 insertions(+), 15 deletions(-)

diff --git a/test/cbor_serialize_test.c b/test/cbor_serialize_test.c
index f84c7241..4dfd58f4 100644
--- a/test/cbor_serialize_test.c
+++ b/test/cbor_serialize_test.c
@@ -487,7 +487,12 @@ static void test_serialize_half(void** _state _CBOR_UNUSED) {
   cbor_set_float2(item, NAN);
 
   assert_size_equal(3, cbor_serialize(item, buffer, 512));
-  assert_memory_equal(buffer, ((unsigned char[]){0xF9, 0x7E, 0x00}), 3);
+  /* The C NAN constant has platform-specific bits (e.g. MIPS legacy NaN).
+   * Check structural validity: CBOR half marker, all exponent bits set,
+   * non-zero mantissa (distinguishes NaN from Infinity). */
+  assert_int_equal(buffer[0], 0xF9);
+  assert_true((buffer[1] & 0x7C) == 0x7C);
+  assert_true((buffer[1] & 0x03) || buffer[2]);
   assert_size_equal(cbor_serialized_size(item), 3);
   cbor_decref(&item);
 }
diff --git a/test/float_ctrl_encoders_test.c b/test/float_ctrl_encoders_test.c
index 923ef5bc..c55d468d 100644
--- a/test/float_ctrl_encoders_test.c
+++ b/test/float_ctrl_encoders_test.c
@@ -122,29 +122,46 @@ static void test_half(void** _state _CBOR_UNUSED) {
   assert_half_float_codec_identity();
 }
 
+/* Check that buffer holds a valid half-precision CBOR NaN with the expected
+ * sign (0 = positive, non-zero = negative), then verify encode/decode
+ * round-trip. The exact payload bits are not checked because the C NAN
+ * constant has platform-specific bit patterns (e.g. MIPS legacy NaN). */
+static void assert_half_nan_and_roundtrip(int negative) {
+  /* CBOR additional info 0xF9 = half-precision float */
+  assert_int_equal(buffer[0], 0xF9);
+  /* Sign bit */
+  if (negative) {
+    assert_true(buffer[1] & 0x80);
+  } else {
+    assert_false(buffer[1] & 0x80);
+  }
+  /* All five exponent bits must be set (= 0x1F) for NaN/Inf */
+  assert_true((buffer[1] & 0x7C) == 0x7C);
+  /* Mantissa must be non-zero to distinguish NaN from Infinity */
+  assert_true((buffer[1] & 0x03) || buffer[2]);
+  assert_half_float_codec_identity();
+}
+
 static void test_half_special(void** _state _CBOR_UNUSED) {
-  /* NAN is typically a quiet NaN with zero payload; encodes to the canonical
-   * half-precision quiet NaN. */
+  /* The C NAN constant has platform-specific bits (e.g. MIPS legacy NaN
+   * uses bit 22 = 0 for quiet NaN, opposite of IEEE 754-2008). We only
+   * check that the result is a structurally valid half-precision NaN and
+   * that it round-trips correctly. */
   assert_size_equal(3, cbor_encode_half(NAN, buffer, 512));
-  assert_memory_equal(buffer, ((unsigned char[]){0xF9, 0x7E, 0x00}), 3);
-  assert_half_float_codec_identity();
+  assert_half_nan_and_roundtrip(0);
 
-  /* nanf("2") has payload 2 (in the low bits of the mantissa), which falls
-   * entirely in the bottom 13 bits that cannot be represented in half
-   * precision. The top 10 mantissa bits are 10_0000_0000 (just the quiet
-   * bit), so the encoded form is the same as NAN. */
   assert_size_equal(3, cbor_encode_half(nanf("2"), buffer, 512));
-  assert_memory_equal(buffer, ((unsigned char[]){0xF9, 0x7E, 0x00}), 3);
-  assert_half_float_codec_identity();
+  assert_half_nan_and_roundtrip(0);
 
   /* Negative NaN: sign bit must be preserved. */
   assert_size_equal(3, cbor_encode_half(-NAN, buffer, 512));
-  assert_memory_equal(buffer, ((unsigned char[]){0xF9, 0xFE, 0x00}), 3);
-  assert_half_float_codec_identity();
+  assert_half_nan_and_roundtrip(1);
 
   /* NaN with payload bits in the top 10 of the mantissa: payload is
-   * preserved in half precision. Bit pattern 0x7FC02000 has the quiet bit
-   * (bit 22) and bit 13 set; after >> 13 the half mantissa is 0x201. */
+   * preserved in half precision. Bit pattern 0x7FC02000 has the IEEE quiet
+   * bit (bit 22) and bit 13 set; after >> 13 the half mantissa is 0x201.
+   * This specific bit pattern is used (rather than a C NaN constant) to get
+   * deterministic results across platforms. */
   float nan_with_payload;
   uint32_t nan_bits = 0x7FC02000u;
   memcpy(&nan_with_payload, &nan_bits, sizeof(nan_with_payload));