From a8087d081b7429344818f80aa27550a4f65b4729 Mon Sep 17 00:00:00 2001 From: Eduardo Silva Date: Sun, 2 Nov 2025 18:39:03 -0600 Subject: [PATCH 1/5] file: use correct size when moving content data after metadata update Fix critical bug in cio_file_write_metadata() where memmove() was using metadata size instead of content data size when moving content after metadata expansion. This caused data corruption when metadata size differed from content size. Also fix available space calculation to properly account for header and metadata when determining if file resize is needed. Fixes issue where updating metadata after writing content data would corrupt the content when metadata grew larger. Signed-off-by: Eduardo Silva --- src/cio_file.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/src/cio_file.c b/src/cio_file.c index 56ac160..5af5832 100644 --- a/src/cio_file.c +++ b/src/cio_file.c @@ -1082,13 +1082,14 @@ int cio_file_write_metadata(struct cio_chunk *ch, char *buf, size_t size) * where we need to increase the memory map size, move the content area * bytes to a different position and write the metadata. * - * Calculate the available space in the content area. + * Calculate the available space after the new metadata position. + * We need: header + new_metadata + content_data <= alloc_size */ - content_av = cf->alloc_size - cf->data_size; + content_av = cf->alloc_size - CIO_FILE_HEADER_MIN - size; - /* If there is no enough space, increase the file size and it memory map */ - if (content_av < size) { - new_size = (size - meta_av) + cf->data_size + CIO_FILE_HEADER_MIN; + /* If there is no enough space for content data, increase the file size and it memory map */ + if (content_av < cf->data_size) { + new_size = CIO_FILE_HEADER_MIN + size + cf->data_size; ret = cio_file_resize(cf, new_size); @@ -1106,7 +1107,7 @@ int cio_file_write_metadata(struct cio_chunk *ch, char *buf, size_t size) /* set new position for the content data */ cur_content_data = cio_file_st_get_content(cf->map); new_content_data = meta + size; - memmove(new_content_data, cur_content_data, size); + memmove(new_content_data, cur_content_data, cf->data_size); /* copy new metadata */ memcpy(meta, buf, size); From 69c0aab179a895faaa23eddfe4e5b9d5a175e553 Mon Sep 17 00:00:00 2001 From: Eduardo Silva Date: Sun, 2 Nov 2025 18:40:07 -0600 Subject: [PATCH 2/5] test: add unit tests for metadata update with content data Add comprehensive tests to validate that updating metadata after writing content data correctly preserves both metadata and content integrity. The tests specifically cover the scenario where metadata grows and requires content data to be moved. Tests include: - Single metadata update with content data - Multiple metadata updates with varying sizes - Validation after chunk down/up cycles Signed-off-by: Eduardo Silva --- tests/CMakeLists.txt | 3 +- tests/metadata_update.c | 288 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 290 insertions(+), 1 deletion(-) create mode 100644 tests/metadata_update.c diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 7cee7e6..9ecc016 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -9,6 +9,7 @@ if(CIO_BACKEND_FILESYSTEM) set(UNIT_TESTS_FILES ${UNIT_TESTS_FILES} fs.c + metadata_update.c ) endif() @@ -35,7 +36,7 @@ foreach(source_file ${UNIT_TESTS_FILES}) add_test(${source_file_we} ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/${source_file_we}) endforeach() -# Perf tests for dev purposes: note these tests are not registered, they need to +# Perf tests for dev purposes: note these tests are not registered, they need to # be executed manually set(UNIT_PERF_TESTS fs_perf.c diff --git a/tests/metadata_update.c b/tests/metadata_update.c new file mode 100644 index 0000000..fc425f3 --- /dev/null +++ b/tests/metadata_update.c @@ -0,0 +1,288 @@ +/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ + +/* Chunk I/O + * ========= + * Copyright 2018-2019 Eduardo Silva + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "cio_tests_internal.h" + +#define CIO_ENV_META_TEST "/tmp/cio-metadata-update-test/" + +/* Logging callback */ +static int log_cb(struct cio_ctx *ctx, int level, const char *file, int line, + char *str) +{ + (void) ctx; + (void) level; + (void) file; + (void) line; + + printf("[cio-test-metadata] %s\n", str); + return 0; +} + +/* + * Test case: Validate that updating metadata after writing content data + * correctly moves the content data and preserves both metadata and content + * integrity. + * + * This test specifically validates the fix for the bug where memmove() + * was using metadata size instead of content data size when moving content + * after metadata update. + */ +static void test_metadata_update_with_content() +{ + int ret; + int err; + char *meta_buf; + int meta_len; + void *content_buf; + size_t content_size; + size_t expected_content_size; + struct cio_ctx *ctx; + struct cio_chunk *chunk; + struct cio_stream *stream; + struct cio_options cio_opts; + + /* Test data */ + const char *initial_meta = "initial-metadata"; + const char *updated_meta = "this-is-a-much-longer-metadata-string-that-will-require-content-to-be-moved"; + const char *content_data = "This is test content data that must be preserved when metadata is updated."; + const char *more_content = " Additional content appended after metadata update."; + + /* Expected final content */ + char *expected_content; + size_t expected_content_len; + + /* Cleanup any existing test directory */ + cio_utils_recursive_delete(CIO_ENV_META_TEST); + + /* Initialize options */ + cio_options_init(&cio_opts); + cio_opts.root_path = CIO_ENV_META_TEST; + cio_opts.log_cb = log_cb; + cio_opts.log_level = CIO_LOG_INFO; + cio_opts.flags = CIO_CHECKSUM; + + /* Create context */ + ctx = cio_create(&cio_opts); + TEST_CHECK(ctx != NULL); + if (!ctx) { + printf("cannot create context\n"); + exit(1); + } + + /* Create stream */ + stream = cio_stream_create(ctx, "test_stream", CIO_STORE_FS); + TEST_CHECK(stream != NULL); + if (!stream) { + printf("cannot create stream\n"); + cio_destroy(ctx); + exit(1); + } + + /* Create chunk */ + chunk = cio_chunk_open(ctx, stream, "test_chunk", CIO_OPEN, 1000, &err); + TEST_CHECK(chunk != NULL); + if (!chunk) { + printf("cannot open chunk\n"); + cio_destroy(ctx); + exit(1); + } + + /* Step 1: Write initial metadata */ + ret = cio_meta_write(chunk, (char *) initial_meta, strlen(initial_meta)); + TEST_CHECK(ret == CIO_OK); + + /* Step 2: Write some content data */ + ret = cio_chunk_write(chunk, content_data, strlen(content_data)); + TEST_CHECK(ret == CIO_OK); + + expected_content_size = strlen(content_data); + + /* Step 3: Update metadata to a larger size (this triggers content move) */ + /* This is the critical test case - when metadata grows, content data + * must be moved correctly using cf->data_size, not the metadata size */ + ret = cio_meta_write(chunk, (char *) updated_meta, strlen(updated_meta)); + TEST_CHECK(ret == CIO_OK); + + /* Step 4: Write more content after metadata update */ + ret = cio_chunk_write(chunk, more_content, strlen(more_content)); + TEST_CHECK(ret == CIO_OK); + + expected_content_size += strlen(more_content); + + /* Build expected content */ + expected_content_len = strlen(content_data) + strlen(more_content); + expected_content = malloc(expected_content_len + 1); + TEST_CHECK(expected_content != NULL); + if (!expected_content) { + cio_destroy(ctx); + exit(1); + } + memcpy(expected_content, content_data, strlen(content_data)); + memcpy(expected_content + strlen(content_data), more_content, strlen(more_content)); + expected_content[expected_content_len] = '\0'; + + /* Step 5: Sync to disk */ + ret = cio_chunk_sync(chunk); + TEST_CHECK(ret == CIO_OK); + + /* Step 6: Put chunk down */ + ret = cio_chunk_down(chunk); + TEST_CHECK(ret == CIO_OK); + + /* Verify chunk is down */ + ret = cio_chunk_is_up(chunk); + TEST_CHECK(ret == CIO_FALSE); + + /* Step 7: Put chunk up again */ + ret = cio_chunk_up(chunk); + TEST_CHECK(ret == CIO_OK); + + /* Verify chunk is up */ + ret = cio_chunk_is_up(chunk); + TEST_CHECK(ret == CIO_TRUE); + + /* Step 8: Validate metadata */ + ret = cio_meta_read(chunk, &meta_buf, &meta_len); + TEST_CHECK(ret == CIO_OK); + TEST_CHECK(meta_len == (int) strlen(updated_meta)); + TEST_CHECK(memcmp(meta_buf, updated_meta, strlen(updated_meta)) == 0); + + /* Step 9: Validate content data */ + ret = cio_chunk_get_content_copy(chunk, &content_buf, &content_size); + TEST_CHECK(ret == CIO_OK); + TEST_CHECK(content_size == expected_content_size); + TEST_CHECK(memcmp(content_buf, expected_content, expected_content_len) == 0); + + /* Cleanup */ + free(expected_content); + free(content_buf); + cio_destroy(ctx); +} + +/* + * Test case: Update metadata multiple times with varying sizes to ensure + * content data integrity is maintained throughout. + */ +static void test_metadata_multiple_updates() +{ + int ret; + int err; + char *meta_buf; + int meta_len; + void *content_buf; + size_t content_size; + struct cio_ctx *ctx; + struct cio_chunk *chunk; + struct cio_stream *stream; + struct cio_options cio_opts; + const char *test_strings[] = { + "small", + "medium-sized-metadata", + "very-long-metadata-string-that-exceeds-previous-sizes", + "tiny", + "another-medium-metadata-string" + }; + const char *content = "Test content that must remain intact"; + int i; + + /* Cleanup any existing test directory */ + cio_utils_recursive_delete(CIO_ENV_META_TEST); + + /* Initialize options */ + cio_options_init(&cio_opts); + cio_opts.root_path = CIO_ENV_META_TEST; + cio_opts.log_cb = log_cb; + cio_opts.log_level = CIO_LOG_INFO; + cio_opts.flags = CIO_CHECKSUM; + + /* Create context */ + ctx = cio_create(&cio_opts); + TEST_CHECK(ctx != NULL); + + /* Create stream */ + stream = cio_stream_create(ctx, "test_stream", CIO_STORE_FS); + TEST_CHECK(stream != NULL); + + /* Create chunk */ + chunk = cio_chunk_open(ctx, stream, "test_chunk2", CIO_OPEN, 1000, &err); + TEST_CHECK(chunk != NULL); + + /* Write initial content */ + ret = cio_chunk_write(chunk, content, strlen(content)); + TEST_CHECK(ret == CIO_OK); + + /* Update metadata multiple times with different sizes */ + for (i = 0; i < 5; i++) { + ret = cio_meta_write(chunk, (char *) test_strings[i], strlen(test_strings[i])); + TEST_CHECK(ret == CIO_OK); + + /* Verify metadata after each update */ + ret = cio_meta_read(chunk, &meta_buf, &meta_len); + TEST_CHECK(ret == CIO_OK); + TEST_CHECK(meta_len == (int) strlen(test_strings[i])); + TEST_CHECK(memcmp(meta_buf, test_strings[i], strlen(test_strings[i])) == 0); + + /* Verify content remains intact */ + ret = cio_chunk_get_content_copy(chunk, &content_buf, &content_size); + TEST_CHECK(ret == CIO_OK); + TEST_CHECK(content_size == strlen(content)); + TEST_CHECK(memcmp(content_buf, content, strlen(content)) == 0); + free(content_buf); + } + + /* Sync and test persistence */ + ret = cio_chunk_sync(chunk); + TEST_CHECK(ret == CIO_OK); + + ret = cio_chunk_down(chunk); + TEST_CHECK(ret == CIO_OK); + + ret = cio_chunk_up(chunk); + TEST_CHECK(ret == CIO_OK); + + /* Final validation after up/down cycle */ + ret = cio_meta_read(chunk, &meta_buf, &meta_len); + TEST_CHECK(ret == CIO_OK); + TEST_CHECK(meta_len == (int) strlen(test_strings[4])); + TEST_CHECK(memcmp(meta_buf, test_strings[4], strlen(test_strings[4])) == 0); + + ret = cio_chunk_get_content_copy(chunk, &content_buf, &content_size); + TEST_CHECK(ret == CIO_OK); + TEST_CHECK(content_size == strlen(content)); + TEST_CHECK(memcmp(content_buf, content, strlen(content)) == 0); + + /* Cleanup */ + free(content_buf); + cio_destroy(ctx); +} + +TEST_LIST = { + {"metadata_update_with_content", test_metadata_update_with_content}, + {"metadata_multiple_updates", test_metadata_multiple_updates}, + { 0 } +}; From 9ab71ea4219b1594ade614c172dc90ec45ca0850 Mon Sep 17 00:00:00 2001 From: Eduardo Silva Date: Sun, 2 Nov 2025 18:53:06 -0600 Subject: [PATCH 3/5] build: bump to v1.5.4 Signed-off-by: Eduardo Silva --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 21f1c3e..5a7f1c6 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -3,7 +3,7 @@ project(chunk-io C) set(CIO_VERSION_MAJOR 1) set(CIO_VERSION_MINOR 5) -set(CIO_VERSION_PATCH 3) +set(CIO_VERSION_PATCH 4) set(CIO_VERSION_STR "${CIO_VERSION_MAJOR}.${CIO_VERSION_MINOR}.${CIO_VERSION_PATCH}") # CFLAGS From 5e65fc11883148f66934d7dfeee649f955083df4 Mon Sep 17 00:00:00 2001 From: Eduardo Silva Date: Sun, 2 Nov 2025 19:10:01 -0600 Subject: [PATCH 4/5] file: prevent unsigned underflow in cio_file_write_metadata The resize check in cio_file_write_metadata calculated: content_av = alloc_size - CIO_FILE_HEADER_MIN - size When writing large metadata to a chunk with small initial allocation, this subtraction could underflow (wrap to huge unsigned value), causing the resize check to be skipped and leading to out-of-bounds writes. Fixed by checking if resize is needed before any subtraction: if (alloc_size < CIO_FILE_HEADER_MIN + size + data_size) Also added a regression test in tests/fs.c to verify the fix. Signed-off-by: Eduardo Silva --- src/cio_file.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/src/cio_file.c b/src/cio_file.c index 5af5832..9f1f613 100644 --- a/src/cio_file.c +++ b/src/cio_file.c @@ -1047,7 +1047,6 @@ int cio_file_write_metadata(struct cio_chunk *ch, char *buf, size_t size) char *cur_content_data; char *new_content_data; size_t new_size; - size_t content_av; size_t meta_av; struct cio_file *cf; @@ -1082,13 +1081,10 @@ int cio_file_write_metadata(struct cio_chunk *ch, char *buf, size_t size) * where we need to increase the memory map size, move the content area * bytes to a different position and write the metadata. * - * Calculate the available space after the new metadata position. - * We need: header + new_metadata + content_data <= alloc_size + * Check if resize is needed before calculating content_av to avoid + * unsigned underflow. We need: header + new_metadata + content_data <= alloc_size */ - content_av = cf->alloc_size - CIO_FILE_HEADER_MIN - size; - - /* If there is no enough space for content data, increase the file size and it memory map */ - if (content_av < cf->data_size) { + if (cf->alloc_size < CIO_FILE_HEADER_MIN + size + cf->data_size) { new_size = CIO_FILE_HEADER_MIN + size + cf->data_size; ret = cio_file_resize(cf, new_size); From 4904312108fb028db7398edd93cc509b539490c3 Mon Sep 17 00:00:00 2001 From: Eduardo Silva Date: Sun, 2 Nov 2025 19:13:46 -0600 Subject: [PATCH 5/5] tests: fs: add regression test for metadata unsigned underflow bug Signed-off-by: Eduardo Silva --- tests/fs.c | 138 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 138 insertions(+) diff --git a/tests/fs.c b/tests/fs.c index a976f46..130921d 100644 --- a/tests/fs.c +++ b/tests/fs.c @@ -964,6 +964,143 @@ void test_legacy_failure() test_legacy_core(CIO_TRUE); } +/* + * Test case: Prevent unsigned underflow when writing large metadata to a + * chunk with small initial allocation. + * + * This test specifically validates the fix for the bug where calculating + * content_av = alloc_size - CIO_FILE_HEADER_MIN - size could underflow + * when size is large, causing the resize check to be skipped and leading + * to out-of-bounds writes. + * + * Scenario: + * - Create chunk with minimal initial size (e.g., ~100 bytes page-aligned) + * - Write small metadata (10 bytes) and small content (20 bytes) + * - Write large metadata (80 bytes) that would cause underflow: + * old code: content_av = 100 - 24 - 80 = -4 (wraps to huge unsigned) + * - Verify resize happens correctly and no buffer overrun occurs + */ +static void test_metadata_unsigned_underflow() +{ + int ret; + int err; + char *meta_buf; + int meta_len; + void *content_buf; + size_t content_size; + struct cio_ctx *ctx; + struct cio_chunk *chunk; + struct cio_stream *stream; + struct cio_options cio_opts; + + /* Test data */ + const char *small_meta = "small"; + const char *large_meta = "this-is-a-very-large-metadata-string-that-would-cause-unsigned-underflow-in-old-code"; + const char *content_data = "test-content"; + + /* Cleanup any existing test directory */ + cio_utils_recursive_delete(CIO_ENV); + + /* Initialize options */ + cio_options_init(&cio_opts); + cio_opts.root_path = CIO_ENV; + cio_opts.log_cb = log_cb; + cio_opts.log_level = CIO_LOG_INFO; + cio_opts.flags = CIO_CHECKSUM; + + /* Create context */ + ctx = cio_create(&cio_opts); + TEST_CHECK(ctx != NULL); + if (!ctx) { + printf("cannot create context\n"); + exit(1); + } + + /* Create stream */ + stream = cio_stream_create(ctx, "test_stream_underflow", CIO_STORE_FS); + TEST_CHECK(stream != NULL); + if (!stream) { + printf("cannot create stream\n"); + cio_destroy(ctx); + exit(1); + } + + /* Create chunk with minimal initial size (forces small alloc_size) */ + chunk = cio_chunk_open(ctx, stream, "test_chunk_underflow", CIO_OPEN, 100, &err); + TEST_CHECK(chunk != NULL); + if (!chunk) { + printf("cannot open chunk\n"); + cio_destroy(ctx); + exit(1); + } + + /* Step 1: Write small initial metadata */ + ret = cio_meta_write(chunk, (char *) small_meta, strlen(small_meta)); + TEST_CHECK(ret == CIO_OK); + + /* Step 2: Write some content data */ + ret = cio_chunk_write(chunk, content_data, strlen(content_data)); + TEST_CHECK(ret == CIO_OK); + + /* Verify initial state */ + ret = cio_meta_read(chunk, &meta_buf, &meta_len); + TEST_CHECK(ret == CIO_OK); + TEST_CHECK(meta_len == (int) strlen(small_meta)); + TEST_CHECK(memcmp(meta_buf, small_meta, strlen(small_meta)) == 0); + + ret = cio_chunk_get_content_copy(chunk, &content_buf, &content_size); + TEST_CHECK(ret == CIO_OK); + TEST_CHECK(content_size == strlen(content_data)); + TEST_CHECK(memcmp(content_buf, content_data, strlen(content_data)) == 0); + free(content_buf); + + /* Step 3: Write large metadata that would cause underflow in old code + * This is the critical test - the new metadata (80 bytes) is larger than + * what would fit without resize, and with a small initial alloc_size, + * the old calculation would have underflowed. + */ + ret = cio_meta_write(chunk, (char *) large_meta, strlen(large_meta)); + TEST_CHECK(ret == CIO_OK); + + /* Step 4: Verify metadata was written correctly */ + ret = cio_meta_read(chunk, &meta_buf, &meta_len); + TEST_CHECK(ret == CIO_OK); + TEST_CHECK(meta_len == (int) strlen(large_meta)); + TEST_CHECK(memcmp(meta_buf, large_meta, strlen(large_meta)) == 0); + + /* Step 5: Verify content data integrity - must be preserved */ + ret = cio_chunk_get_content_copy(chunk, &content_buf, &content_size); + TEST_CHECK(ret == CIO_OK); + TEST_CHECK(content_size == strlen(content_data)); + TEST_CHECK(memcmp(content_buf, content_data, strlen(content_data)) == 0); + + /* Step 6: Sync to disk to ensure persistence */ + ret = cio_chunk_sync(chunk); + TEST_CHECK(ret == CIO_OK); + + /* Step 7: Put chunk down and up again to test persistence */ + ret = cio_chunk_down(chunk); + TEST_CHECK(ret == CIO_OK); + + ret = cio_chunk_up(chunk); + TEST_CHECK(ret == CIO_OK); + + /* Step 8: Final validation after persistence cycle */ + ret = cio_meta_read(chunk, &meta_buf, &meta_len); + TEST_CHECK(ret == CIO_OK); + TEST_CHECK(meta_len == (int) strlen(large_meta)); + TEST_CHECK(memcmp(meta_buf, large_meta, strlen(large_meta)) == 0); + + ret = cio_chunk_get_content_copy(chunk, &content_buf, &content_size); + TEST_CHECK(ret == CIO_OK); + TEST_CHECK(content_size == strlen(content_data)); + TEST_CHECK(memcmp(content_buf, content_data, strlen(content_data)) == 0); + + /* Cleanup */ + free(content_buf); + cio_destroy(ctx); +} + TEST_LIST = { {"fs_write", test_fs_write}, {"fs_checksum", test_fs_checksum}, @@ -976,5 +1113,6 @@ TEST_LIST = { {"fs_deep_hierachy", test_deep_hierarchy}, {"legacy_success", test_legacy_success}, {"legacy_failure", test_legacy_failure}, + {"metadata_unsigned_underflow", test_metadata_unsigned_underflow}, { 0 } };