From d28d4f254f717259e9b5f146d9091579d1ed097f Mon Sep 17 00:00:00 2001 From: PJK Date: Sun, 22 Mar 2026 21:25:56 +0100 Subject: [PATCH 01/12] Preserve NaN sign+payload and tiny-float sign in cbor_encode_half cbor_encode_half previously encoded all NaN values as 0x7E00, losing the sign bit and up to 10 payload bits. Negative tiny floats (too small for half precision) were also rounded to +0 instead of -0. - cbor_encode_half: map the top 10 bits of the single-precision mantissa to the half-precision mantissa, preserving both the quiet/signaling bit and the payload. Fall back to a quiet NaN only when the payload fits entirely in the bottom 13 bits (not representable in half precision). Preserve the sign bit in the tiny-float-to-zero rounding path. - _cbor_decode_half: reconstruct the single-precision float bit pattern from the half-precision NaN bits, enabling round-trip fidelity. Fixes https://github.com/PJK/libcbor/issues/215 Co-Authored-By: Claude Sonnet 4.6 --- src/cbor/encoding.c | 21 +++++++++++++++------ src/cbor/internal/loaders.c | 13 ++++++++++++- test/float_ctrl_encoders_test.c | 28 +++++++++++++++++++++++++++- 3 files changed, 54 insertions(+), 8 deletions(-) diff --git a/src/cbor/encoding.c b/src/cbor/encoding.c index 7629a16f..53e27e41 100644 --- a/src/cbor/encoding.c +++ b/src/cbor/encoding.c @@ -139,8 +139,17 @@ size_t cbor_encode_half(float value, unsigned char* buffer, val & 0x7FFFFFu; /* 0b0000_0000_0111_1111_1111_1111_1111_1111 */ if (exp == 0xFF) { /* Infinity or NaNs */ if (isnan(value)) { - // Note: Values of signaling NaNs are discarded. See `cbor_encode_single`. - res = (uint16_t)0x007e00; + /* Preserve the sign bit and NaN payload. The top 10 bits of the 23-bit + * single-precision mantissa map directly onto the 10-bit half-precision + * mantissa. If the payload fits entirely in the bottom 13 bits (which + * cannot be represented in half precision), fall back to a quiet NaN + * to avoid accidentally producing an infinity encoding (mantissa == 0). + * Note: signaling NaN payloads are preserved on a best-effort basis; + * some CPUs canonicalize them to quiet NaNs when loaded into registers. + * See https://github.com/PJK/libcbor/issues/215 */ + uint16_t half_mant = (uint16_t)(mant >> 13u); + if (half_mant == 0) half_mant = 0x0200u; /* quiet NaN fallback */ + res = (uint16_t)((val & 0x80000000u) >> 16u | 0x7C00u | half_mant); } else { // If the mantissa is non-zero, we have a NaN, but those are handled // above. See @@ -156,10 +165,10 @@ size_t cbor_encode_half(float value, unsigned char* buffer, // Now we know that 2^exp <= 0 logically if (logical_exp < -24) { - /* No unambiguous representation exists, this float is not a half float - and is too small to be represented using a half, round off to zero. - Consistent with the reference implementation. */ - res = 0; + /* Too small to represent even as a half-precision subnormal; round to + * zero. The sign bit is preserved so that small negative values round + * to negative zero rather than positive zero. */ + res = (uint16_t)((val & 0x80000000u) >> 16u); } else if (logical_exp < -14) { /* Offset the remaining decimal places by shifting the significand, the value is lost. This is an implementation decision that works around the diff --git a/src/cbor/internal/loaders.c b/src/cbor/internal/loaders.c index b89cf722..2a5b4f07 100644 --- a/src/cbor/internal/loaders.c +++ b/src/cbor/internal/loaders.c @@ -56,13 +56,24 @@ float _cbor_decode_half(unsigned char* halfp) { int half = (halfp[0] << 8) + halfp[1]; int exp = (half >> 10) & 0x1f; int mant = half & 0x3ff; + if (exp == 31 && mant != 0) { + /* NaN: reconstruct as a single-precision NaN, preserving the sign bit and + * the 10-bit half-precision payload by placing it in the top 10 bits of + * the 23-bit single-precision mantissa. This ensures round-trip fidelity + * with cbor_encode_half's NaN encoding. */ + union _cbor_float_helper helper = { + .as_uint = ((uint32_t)(half & 0x8000) << 16) | /* sign */ + 0x7F800000u | /* exponent (all 1s) */ + ((uint32_t)mant << 13)}; /* payload */ + return helper.as_float; + } double val; if (exp == 0) val = ldexp(mant, -24); else if (exp != 31) val = ldexp(mant + 1024, exp - 25); else - val = mant == 0 ? INFINITY : NAN; + val = INFINITY; /* exp == 31, mant == 0: the NaN case is handled above */ return (float)(half & 0x8000 ? -val : val); } diff --git a/test/float_ctrl_encoders_test.c b/test/float_ctrl_encoders_test.c index f7d30247..923ef5bc 100644 --- a/test/float_ctrl_encoders_test.c +++ b/test/float_ctrl_encoders_test.c @@ -6,6 +6,7 @@ */ #include +#include #include "assertions.h" #include "cbor.h" @@ -99,11 +100,15 @@ static void test_half(void** _state _CBOR_UNUSED) { assert_half_float_codec_identity(); /* Smaller than the smallest and even the magnitude cannot be represented, - round off to zero */ + round off to zero. Sign bit is preserved. */ assert_size_equal(3, cbor_encode_half(1e-25f, buffer, 512)); assert_memory_equal(buffer, ((unsigned char[]){0xF9, 0x00, 0x00}), 3); assert_half_float_codec_identity(); + assert_size_equal(3, cbor_encode_half(-1e-25f, buffer, 512)); + assert_memory_equal(buffer, ((unsigned char[]){0xF9, 0x80, 0x00}), 3); + assert_half_float_codec_identity(); + assert_size_equal(3, cbor_encode_half(1.1920928955078125e-7, buffer, 512)); assert_memory_equal(buffer, ((unsigned char[]){0xF9, 0x00, 0x02}), 3); assert_half_float_codec_identity(); @@ -118,13 +123,34 @@ static void test_half(void** _state _CBOR_UNUSED) { } static void test_half_special(void** _state _CBOR_UNUSED) { + /* NAN is typically a quiet NaN with zero payload; encodes to the canonical + * half-precision quiet NaN. */ assert_size_equal(3, cbor_encode_half(NAN, buffer, 512)); assert_memory_equal(buffer, ((unsigned char[]){0xF9, 0x7E, 0x00}), 3); assert_half_float_codec_identity(); + /* nanf("2") has payload 2 (in the low bits of the mantissa), which falls + * entirely in the bottom 13 bits that cannot be represented in half + * precision. The top 10 mantissa bits are 10_0000_0000 (just the quiet + * bit), so the encoded form is the same as NAN. */ assert_size_equal(3, cbor_encode_half(nanf("2"), buffer, 512)); assert_memory_equal(buffer, ((unsigned char[]){0xF9, 0x7E, 0x00}), 3); assert_half_float_codec_identity(); + + /* Negative NaN: sign bit must be preserved. */ + assert_size_equal(3, cbor_encode_half(-NAN, buffer, 512)); + assert_memory_equal(buffer, ((unsigned char[]){0xF9, 0xFE, 0x00}), 3); + assert_half_float_codec_identity(); + + /* NaN with payload bits in the top 10 of the mantissa: payload is + * preserved in half precision. Bit pattern 0x7FC02000 has the quiet bit + * (bit 22) and bit 13 set; after >> 13 the half mantissa is 0x201. */ + float nan_with_payload; + uint32_t nan_bits = 0x7FC02000u; + memcpy(&nan_with_payload, &nan_bits, sizeof(nan_with_payload)); + assert_size_equal(3, cbor_encode_half(nan_with_payload, buffer, 512)); + assert_memory_equal(buffer, ((unsigned char[]){0xF9, 0x7E, 0x01}), 3); + assert_half_float_codec_identity(); } static void test_half_infinity(void** _state _CBOR_UNUSED) { From 5d961226cee2409b19c44b034dbf87c4a464e45e Mon Sep 17 00:00:00 2001 From: PJK Date: Sun, 22 Mar 2026 21:28:04 +0100 Subject: [PATCH 02/12] Add CHANGELOG entry for NaN half-encoding fix Co-Authored-By: Claude Sonnet 4.6 --- CHANGELOG.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9ca8bd1c..b497a825 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -27,6 +27,11 @@ Next - Compares two items structurally: encoding width, definite-vs-indefinite length, chunk boundaries, and map entry order all count - Runs in O(n) time in the encoded byte size with no additional allocations - See also: #96 +- BREAKING: [Fix NaN encoding in `cbor_encode_half` to preserve sign and payload bits](https://github.com/PJK/libcbor/pull/412) + - Previously, all NaN values were encoded as `0x7E00` (positive quiet NaN, zero payload). Now the sign bit and the top 10 mantissa bits are preserved in the half-precision encoding. + - `_cbor_decode_half` now reconstructs the NaN bit pattern faithfully, enabling encode/decode round-trips. Previously it always returned the C `NAN` constant. + - Very small normal floats that previously rounded to `+0` now round to `±0` depending on their sign. + - Clients that relied on all NaNs normalising to `0x7E00` will see different output. See #215. 0.13.0 (2025-08-30) --------------------- From 184d5db75b4f21290a2929c5a171d94e6dfa0937 Mon Sep 17 00:00:00 2001 From: PJK Date: Sun, 22 Mar 2026 22:03:42 +0100 Subject: [PATCH 03/12] Fix NaN tests for MIPS legacy NaN convention MIPS legacy uses bit 22 = 0 for quiet NaN (opposite of IEEE 754-2008), so the C NAN constant has a different bit pattern there. Tests that asserted cbor_encode_half(NAN) == {0xF9, 0x7E, 0x00} were relying on the old behavior of normalizing all NaN values to 0x7E00. Replace specific-byte assertions on NAN, -NAN, and nanf("2") with structural checks (valid CBOR half marker, exp=0x1F, non-zero mantissa) plus the existing encode/decode round-trip. Keep a deterministic specific-byte assertion for an explicit bit pattern (0x7FC02000) constructed via memcpy, which is independent of the platform's NAN. Co-Authored-By: Claude Sonnet 4.6 --- test/cbor_serialize_test.c | 7 ++++- test/float_ctrl_encoders_test.c | 45 +++++++++++++++++++++++---------- 2 files changed, 37 insertions(+), 15 deletions(-) diff --git a/test/cbor_serialize_test.c b/test/cbor_serialize_test.c index f84c7241..4dfd58f4 100644 --- a/test/cbor_serialize_test.c +++ b/test/cbor_serialize_test.c @@ -487,7 +487,12 @@ static void test_serialize_half(void** _state _CBOR_UNUSED) { cbor_set_float2(item, NAN); assert_size_equal(3, cbor_serialize(item, buffer, 512)); - assert_memory_equal(buffer, ((unsigned char[]){0xF9, 0x7E, 0x00}), 3); + /* The C NAN constant has platform-specific bits (e.g. MIPS legacy NaN). + * Check structural validity: CBOR half marker, all exponent bits set, + * non-zero mantissa (distinguishes NaN from Infinity). */ + assert_int_equal(buffer[0], 0xF9); + assert_true((buffer[1] & 0x7C) == 0x7C); + assert_true((buffer[1] & 0x03) || buffer[2]); assert_size_equal(cbor_serialized_size(item), 3); cbor_decref(&item); } diff --git a/test/float_ctrl_encoders_test.c b/test/float_ctrl_encoders_test.c index 923ef5bc..c55d468d 100644 --- a/test/float_ctrl_encoders_test.c +++ b/test/float_ctrl_encoders_test.c @@ -122,29 +122,46 @@ static void test_half(void** _state _CBOR_UNUSED) { assert_half_float_codec_identity(); } +/* Check that buffer holds a valid half-precision CBOR NaN with the expected + * sign (0 = positive, non-zero = negative), then verify encode/decode + * round-trip. The exact payload bits are not checked because the C NAN + * constant has platform-specific bit patterns (e.g. MIPS legacy NaN). */ +static void assert_half_nan_and_roundtrip(int negative) { + /* CBOR additional info 0xF9 = half-precision float */ + assert_int_equal(buffer[0], 0xF9); + /* Sign bit */ + if (negative) { + assert_true(buffer[1] & 0x80); + } else { + assert_false(buffer[1] & 0x80); + } + /* All five exponent bits must be set (= 0x1F) for NaN/Inf */ + assert_true((buffer[1] & 0x7C) == 0x7C); + /* Mantissa must be non-zero to distinguish NaN from Infinity */ + assert_true((buffer[1] & 0x03) || buffer[2]); + assert_half_float_codec_identity(); +} + static void test_half_special(void** _state _CBOR_UNUSED) { - /* NAN is typically a quiet NaN with zero payload; encodes to the canonical - * half-precision quiet NaN. */ + /* The C NAN constant has platform-specific bits (e.g. MIPS legacy NaN + * uses bit 22 = 0 for quiet NaN, opposite of IEEE 754-2008). We only + * check that the result is a structurally valid half-precision NaN and + * that it round-trips correctly. */ assert_size_equal(3, cbor_encode_half(NAN, buffer, 512)); - assert_memory_equal(buffer, ((unsigned char[]){0xF9, 0x7E, 0x00}), 3); - assert_half_float_codec_identity(); + assert_half_nan_and_roundtrip(0); - /* nanf("2") has payload 2 (in the low bits of the mantissa), which falls - * entirely in the bottom 13 bits that cannot be represented in half - * precision. The top 10 mantissa bits are 10_0000_0000 (just the quiet - * bit), so the encoded form is the same as NAN. */ assert_size_equal(3, cbor_encode_half(nanf("2"), buffer, 512)); - assert_memory_equal(buffer, ((unsigned char[]){0xF9, 0x7E, 0x00}), 3); - assert_half_float_codec_identity(); + assert_half_nan_and_roundtrip(0); /* Negative NaN: sign bit must be preserved. */ assert_size_equal(3, cbor_encode_half(-NAN, buffer, 512)); - assert_memory_equal(buffer, ((unsigned char[]){0xF9, 0xFE, 0x00}), 3); - assert_half_float_codec_identity(); + assert_half_nan_and_roundtrip(1); /* NaN with payload bits in the top 10 of the mantissa: payload is - * preserved in half precision. Bit pattern 0x7FC02000 has the quiet bit - * (bit 22) and bit 13 set; after >> 13 the half mantissa is 0x201. */ + * preserved in half precision. Bit pattern 0x7FC02000 has the IEEE quiet + * bit (bit 22) and bit 13 set; after >> 13 the half mantissa is 0x201. + * This specific bit pattern is used (rather than a C NaN constant) to get + * deterministic results across platforms. */ float nan_with_payload; uint32_t nan_bits = 0x7FC02000u; memcpy(&nan_with_payload, &nan_bits, sizeof(nan_with_payload)); From 624222848b8c8a9e85ef8dd8f53973f80bfafd38 Mon Sep 17 00:00:00 2001 From: PJK Date: Sun, 22 Mar 2026 22:34:28 +0100 Subject: [PATCH 04/12] Add static and shared linkage test to prevent install regression Adds test/linkage/, a standalone CMake consumer project that builds against an installed libcbor via find_package, for both static (BUILD_SHARED_LIBS=OFF) and shared (BUILD_SHARED_LIBS=ON) builds. This catches the class of bug fixed in #148, where static and shared library artifacts had colliding names on Windows, breaking installs. Adds a run-linkage-tests CircleCI command that installs libcbor to a temp prefix and builds/runs the consumer for both variants. The command runs in build-and-test, build-and-test-clang, and a new dedicated linkage-test CI job. Closes #153 Co-Authored-By: Claude Sonnet 4.6 --- .circleci/config.yml | 35 ++++++++++++++++++++++++++++++++++ test/linkage/CMakeLists.txt | 10 ++++++++++ test/linkage/main.c | 38 +++++++++++++++++++++++++++++++++++++ 3 files changed, 83 insertions(+) create mode 100644 test/linkage/CMakeLists.txt create mode 100644 test/linkage/main.c diff --git a/.circleci/config.yml b/.circleci/config.yml index 211db3ca..de5d77d1 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -42,6 +42,28 @@ commands: -DCMAKE_BUILD_TYPE=Release \ . - run: make -j 16 VERBOSE=1 + run-linkage-tests: + steps: + - run: + name: Linkage test (static) + command: | + BUILD=$(mktemp -d) INST=$(mktemp -d) LB=$(mktemp -d) + cmake -S . -B "$BUILD" -DCMAKE_BUILD_TYPE=Release -DBUILD_SHARED_LIBS=OFF -DCMAKE_INSTALL_PREFIX="$INST" + cmake --build "$BUILD" --parallel + cmake --install "$BUILD" + cmake -S test/linkage -B "$LB" -DCMAKE_PREFIX_PATH="$INST" + cmake --build "$LB" + ctest --test-dir "$LB" -VV + - run: + name: Linkage test (shared) + command: | + BUILD=$(mktemp -d) INST=$(mktemp -d) LB=$(mktemp -d) + cmake -S . -B "$BUILD" -DCMAKE_BUILD_TYPE=Release -DBUILD_SHARED_LIBS=ON -DCMAKE_INSTALL_PREFIX="$INST" + cmake --build "$BUILD" --parallel + cmake --install "$BUILD" + cmake -S test/linkage -B "$LB" -DCMAKE_PREFIX_PATH="$INST" + cmake --build "$LB" + LD_LIBRARY_PATH="$INST/lib" ctest --test-dir "$LB" -VV test: steps: - run: ctest -VV --output-junit ctest_out.xml @@ -100,6 +122,7 @@ jobs: cat Testing/Temporary/MemoryChecker* exit 1 fi; + - run-linkage-tests build-and-test-clang: machine: @@ -113,6 +136,7 @@ jobs: - linux-setup - build - test + - run-linkage-tests build-and-test-32b: machine: @@ -257,6 +281,16 @@ jobs: BEOF cmd //c _build_libcbor.bat + linkage-test: + machine: + <<: *default-machine + environment: + TOOLCHAIN_PACKAGES: g++ + steps: + - checkout + - linux-setup + - run-linkage-tests + build-and-test-mips: &dockcross-job docker: - image: dockcross/linux-mips-lts @@ -297,6 +331,7 @@ workflows: - build-and-test-mipsel - build-and-test-riscv64 - build-bazel + - linkage-test - llvm-coverage # OSX builds are expensive, run only on master - build-and-test-osx: diff --git a/test/linkage/CMakeLists.txt b/test/linkage/CMakeLists.txt new file mode 100644 index 00000000..696fd57a --- /dev/null +++ b/test/linkage/CMakeLists.txt @@ -0,0 +1,10 @@ +cmake_minimum_required(VERSION 3.5) +project(libcbor_linkage_test C) + +find_package(libcbor REQUIRED) + +add_executable(linkage_test main.c) +target_link_libraries(linkage_test libcbor::libcbor) + +enable_testing() +add_test(NAME linkage_test COMMAND linkage_test) diff --git a/test/linkage/main.c b/test/linkage/main.c new file mode 100644 index 00000000..2fc12340 --- /dev/null +++ b/test/linkage/main.c @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2014-2020 Pavel Kalvoda + * + * libcbor is free software; you can redistribute it and/or modify + * it under the terms of the MIT license. See LICENSE for details. + */ + +/* + * Standalone linkage test: verifies that libcbor can be found via + * find_package and linked against from an external project, for both + * static and shared library builds. + */ + +#include +#include +#include "cbor.h" + +int main(void) { + /* Encode an integer and decode it back */ + cbor_item_t* item = cbor_build_uint8(42); + assert(item != NULL); + + unsigned char buf[16]; + size_t len = cbor_serialize(item, buf, sizeof(buf)); + assert(len > 0); + + struct cbor_load_result result; + cbor_item_t* decoded = cbor_load(buf, len, &result); + assert(decoded != NULL); + assert(result.error.code == CBOR_ERR_NONE); + assert(cbor_get_uint8(decoded) == 42); + + cbor_decref(&item); + cbor_decref(&decoded); + + printf("libcbor linkage test passed (version %s)\n", CBOR_VERSION); + return 0; +} From cd330c8abb8cfa6b53a5bba763ac64bf3e741d09 Mon Sep 17 00:00:00 2001 From: PJK Date: Sun, 22 Mar 2026 22:40:52 +0100 Subject: [PATCH 05/12] Fix linkage test: use Debug build to avoid LTO bitcode in static archive Release builds enable LTO (INTERPROCEDURAL_OPTIMIZATION_RELEASE=ON), which makes clang emit LLVM bitcode into libcbor.a. GNU ld cannot read that format when linking the consumer, causing the static linkage test to fail on the build-and-test-clang job. Switching to Debug avoids LTO entirely while still exercising the install and find_package paths. Co-Authored-By: Claude Sonnet 4.6 --- .circleci/config.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index de5d77d1..05281b66 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -48,7 +48,7 @@ commands: name: Linkage test (static) command: | BUILD=$(mktemp -d) INST=$(mktemp -d) LB=$(mktemp -d) - cmake -S . -B "$BUILD" -DCMAKE_BUILD_TYPE=Release -DBUILD_SHARED_LIBS=OFF -DCMAKE_INSTALL_PREFIX="$INST" + cmake -S . -B "$BUILD" -DCMAKE_BUILD_TYPE=Debug -DSANITIZE=OFF -DBUILD_SHARED_LIBS=OFF -DCMAKE_INSTALL_PREFIX="$INST" cmake --build "$BUILD" --parallel cmake --install "$BUILD" cmake -S test/linkage -B "$LB" -DCMAKE_PREFIX_PATH="$INST" @@ -58,7 +58,7 @@ commands: name: Linkage test (shared) command: | BUILD=$(mktemp -d) INST=$(mktemp -d) LB=$(mktemp -d) - cmake -S . -B "$BUILD" -DCMAKE_BUILD_TYPE=Release -DBUILD_SHARED_LIBS=ON -DCMAKE_INSTALL_PREFIX="$INST" + cmake -S . -B "$BUILD" -DCMAKE_BUILD_TYPE=Debug -DSANITIZE=OFF -DBUILD_SHARED_LIBS=ON -DCMAKE_INSTALL_PREFIX="$INST" cmake --build "$BUILD" --parallel cmake --install "$BUILD" cmake -S test/linkage -B "$LB" -DCMAKE_PREFIX_PATH="$INST" From 6e46fe400a4b051dee4da08b9202fad7c4b90ef6 Mon Sep 17 00:00:00 2001 From: PJK Date: Sun, 22 Mar 2026 22:48:29 +0100 Subject: [PATCH 06/12] Remove standalone linkage-test CI job The linkage tests already run as part of build-and-test and build-and-test-clang; the dedicated job is redundant. Co-Authored-By: Claude Sonnet 4.6 --- .circleci/config.yml | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 6cc5dffe..53f7fb91 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -307,16 +307,6 @@ jobs: BEOF cmd //c _build_libcbor.bat - linkage-test: - machine: - <<: *default-machine - environment: - TOOLCHAIN_PACKAGES: g++ - steps: - - checkout - - linux-setup - - run-linkage-tests - build-and-test-mips: &dockcross-job docker: - image: dockcross/linux-mips-lts @@ -358,7 +348,6 @@ workflows: - build-and-test-mipsel - build-and-test-riscv64 - build-bazel - - linkage-test - llvm-coverage # OSX builds are expensive, run only on master - build-and-test-osx: From d1c1ec4e3158fa2f73353d6f1cbae52d8b5a47ea Mon Sep 17 00:00:00 2001 From: PJK Date: Sun, 22 Mar 2026 23:13:52 +0100 Subject: [PATCH 07/12] Fix infinite loop in streaming_parser example on NEDATA MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When cbor_stream_decode returns CBOR_DECODER_NEDATA with read=0 (e.g. truncated multi-byte value), the example loop would spin forever. Since the file is fully loaded into memory, NEDATA means the input is truncated — treat it as an error and stop. Co-Authored-By: Claude Sonnet 4.6 --- examples/streaming_parser.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/examples/streaming_parser.c b/examples/streaming_parser.c index 0c7a0b59..4d70a3de 100644 --- a/examples/streaming_parser.c +++ b/examples/streaming_parser.c @@ -54,12 +54,21 @@ int main(int argc, char* argv[]) { while (bytes_read < length) { decode_result = cbor_stream_decode(buffer + bytes_read, length - bytes_read, &callbacks, NULL); - if (decode_result.status == CBOR_DECODER_ERROR) { - fprintf(stderr, "Error at byte %zu\n", bytes_read); - break; + switch (decode_result.status) { + case CBOR_DECODER_FINISHED: + bytes_read += decode_result.read; + break; + case CBOR_DECODER_NEDATA: + // The input was fully loaded into memory, so NEDATA means the data is + // truncated. + fprintf(stderr, "Truncated data at byte %zu\n", bytes_read); + goto done; + case CBOR_DECODER_ERROR: + fprintf(stderr, "Error at byte %zu\n", bytes_read); + goto done; } - bytes_read += decode_result.read; } +done: free(buffer); fclose(f); From b4108f6c1911dc0465d162fd4a9b49ba2093a1d3 Mon Sep 17 00:00:00 2001 From: PJK Date: Sun, 22 Mar 2026 23:16:05 +0100 Subject: [PATCH 08/12] Fix and clarify cbor_decoder_result.required documentation - Fix typo: the "no data at all" bullet said "#read will be set to 1" but meant "#required will be set to 1" - Clarify that required is the total byte count to present at the same buffer position (not an increment), and add the empty-buffer case Co-Authored-By: Claude Sonnet 4.6 --- src/cbor/data.h | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/cbor/data.h b/src/cbor/data.h index a12e92f2..2522fec3 100644 --- a/src/cbor/data.h +++ b/src/cbor/data.h @@ -240,19 +240,19 @@ struct cbor_decoder_result { /** The decoding status */ enum cbor_decoder_status status; - /** Number of bytes in the input buffer needed to resume parsing + /** Number of bytes needed in the input buffer to make progress * - * Set to 0 unless the result status is #CBOR_DECODER_NEDATA. If it is, then: - * - If at least one byte was passed, #required will be set to the minimum - * number of bytes needed to invoke a decoded callback on the current - * prefix. + * Set to 0 unless the result status is #CBOR_DECODER_NEDATA. When it is: + * - #required is the total number of bytes that must be present at the + * start of the buffer before calling #cbor_stream_decode again will + * invoke a callback. The caller should accumulate bytes until at least + * #required bytes are available, then retry from the same position. * - * For example: Attempting to decode a 1B buffer containing `0x19` will - * set #required to 3 as `0x19` signals a 2B integer item, so we need at - * least 3B to continue (the `0x19` MTB byte and two bytes of data needed - * to invoke #cbor_callbacks.uint16). + * For example: A 1B buffer containing `0x19` (a 2B uint16 item) will + * set #required to 3 — the MTB byte plus two bytes of data. * - * - If there was no data at all, #read will always be set to 1 + * - If the buffer was empty (source_size == 0), #required is set to 1 + * since at least the initial MTB byte is needed. */ size_t required; }; From 645b420137e43e39b249267214479e95525547c5 Mon Sep 17 00:00:00 2001 From: PJK Date: Sun, 22 Mar 2026 23:57:44 +0100 Subject: [PATCH 09/12] Use if/else + break instead of switch/goto in streaming_parser break inside a switch only exits the switch, not the enclosing loop, so the original fix used goto. Restructuring to if/else avoids both the goto and the ambiguity. Co-Authored-By: Claude Sonnet 4.6 --- examples/streaming_parser.c | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/examples/streaming_parser.c b/examples/streaming_parser.c index 4d70a3de..0edd3e1d 100644 --- a/examples/streaming_parser.c +++ b/examples/streaming_parser.c @@ -54,21 +54,18 @@ int main(int argc, char* argv[]) { while (bytes_read < length) { decode_result = cbor_stream_decode(buffer + bytes_read, length - bytes_read, &callbacks, NULL); - switch (decode_result.status) { - case CBOR_DECODER_FINISHED: - bytes_read += decode_result.read; - break; - case CBOR_DECODER_NEDATA: - // The input was fully loaded into memory, so NEDATA means the data is - // truncated. - fprintf(stderr, "Truncated data at byte %zu\n", bytes_read); - goto done; - case CBOR_DECODER_ERROR: - fprintf(stderr, "Error at byte %zu\n", bytes_read); - goto done; + if (decode_result.status == CBOR_DECODER_FINISHED) { + bytes_read += decode_result.read; + } else { + // The input was fully loaded into memory, so NEDATA means truncated data. + fprintf(stderr, + decode_result.status == CBOR_DECODER_NEDATA + ? "Truncated data at byte %zu\n" + : "Error at byte %zu\n", + bytes_read); + break; } } -done: free(buffer); fclose(f); From f22589e5e369d0b5ef1ccda7908be8ab12a7e5c4 Mon Sep 17 00:00:00 2001 From: PJK Date: Mon, 23 Mar 2026 00:12:51 +0100 Subject: [PATCH 10/12] Document OOM handling contract for custom streaming callbacks cbor_stream_decode callbacks have no return value, so there is no built-in channel to propagate allocation failures. Add a section to the streaming decoding docs explaining the context-flag pattern and noting that cbor_load handles this internally via CBOR_ERR_MEMERROR. Closes #226 Co-Authored-By: Claude Sonnet 4.6 --- doc/source/api/streaming_decoding.rst | 32 +++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/doc/source/api/streaming_decoding.rst b/doc/source/api/streaming_decoding.rst index d7cea387..0d032ca2 100644 --- a/doc/source/api/streaming_decoding.rst +++ b/doc/source/api/streaming_decoding.rst @@ -18,6 +18,38 @@ When building custom sets of callbacks, feel free to start from .. doxygenvariable:: cbor_empty_callbacks +Handling allocation failures in callbacks +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Callbacks receive no return value, so there is no built-in channel to signal +allocation failure back to :func:`cbor_stream_decode`. When a callback allocates +memory and the allocation fails, the callback must track the failure itself — +typically via a flag in the ``context`` struct passed to +:func:`cbor_stream_decode`: + +.. code-block:: c + + struct my_context { + bool allocation_failed; + /* ... */ + }; + + void my_string_callback(void *context, cbor_data data, uint64_t length) { + struct my_context *ctx = context; + char *copy = malloc(length); + if (copy == NULL) { + ctx->allocation_failed = true; + return; + } + /* ... */ + } + +After each call to :func:`cbor_stream_decode`, check the flag before +continuing. Note that :func:`cbor_load` handles this internally — the +``CBOR_ERR_MEMERROR`` result code is set when a builder callback fails to +allocate memory. + + Callback types definition ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ From 050c5c0197e904ae0dc6d7aede202c8d37af62ab Mon Sep 17 00:00:00 2001 From: PJK Date: Mon, 23 Mar 2026 00:13:54 +0100 Subject: [PATCH 11/12] Generalize callback failure doc: OOM is one case, not the only one Co-Authored-By: Claude Sonnet 4.6 --- doc/source/api/streaming_decoding.rst | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/doc/source/api/streaming_decoding.rst b/doc/source/api/streaming_decoding.rst index 0d032ca2..7a95b47c 100644 --- a/doc/source/api/streaming_decoding.rst +++ b/doc/source/api/streaming_decoding.rst @@ -18,35 +18,39 @@ When building custom sets of callbacks, feel free to start from .. doxygenvariable:: cbor_empty_callbacks -Handling allocation failures in callbacks -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Handling failures in callbacks +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Callbacks receive no return value, so there is no built-in channel to signal -allocation failure back to :func:`cbor_stream_decode`. When a callback allocates -memory and the allocation fails, the callback must track the failure itself — -typically via a flag in the ``context`` struct passed to -:func:`cbor_stream_decode`: +failure back to :func:`cbor_stream_decode`. Any error that occurs inside a +callback — including allocation failures, validation errors, or application-level +rejections — must be tracked by the callback itself, typically via a flag in the +``context`` struct passed to :func:`cbor_stream_decode`: .. code-block:: c struct my_context { - bool allocation_failed; + bool failed; /* ... */ }; void my_string_callback(void *context, cbor_data data, uint64_t length) { struct my_context *ctx = context; + if (length > MAX_ALLOWED) { + ctx->failed = true; + return; + } char *copy = malloc(length); if (copy == NULL) { - ctx->allocation_failed = true; + ctx->failed = true; return; } /* ... */ } After each call to :func:`cbor_stream_decode`, check the flag before -continuing. Note that :func:`cbor_load` handles this internally — the -``CBOR_ERR_MEMERROR`` result code is set when a builder callback fails to +continuing. Note that :func:`cbor_load` handles allocation failures internally — +the ``CBOR_ERR_MEMERROR`` result code is set when a builder callback fails to allocate memory. From d67945f1e0135e8176aad7140708e4e28091c774 Mon Sep 17 00:00:00 2001 From: PJK Date: Mon, 23 Mar 2026 00:15:30 +0100 Subject: [PATCH 12/12] Clarify CBOR_DECODER_NEDATA comment in cbor_load The old comment "Data length doesn't match MTB expectation" implied a specific mismatch condition; in fact NEDATA means the buffer ran out of bytes anywhere mid-item. Co-Authored-By: Claude Sonnet 4.6 --- src/cbor.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cbor.c b/src/cbor.c index af99de1b..49926e33 100644 --- a/src/cbor.c +++ b/src/cbor.c @@ -81,7 +81,7 @@ cbor_item_t* cbor_load(cbor_data source, size_t source_size, break; } case CBOR_DECODER_NEDATA: - /* Data length doesn't match MTB expectation */ + /* Not enough data to complete the current item */ { result->error.code = CBOR_ERR_NOTENOUGHDATA; goto error;