diff --git a/CMakeLists.txt b/CMakeLists.txt index 21f1c3e..5a7f1c6 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -3,7 +3,7 @@ project(chunk-io C) set(CIO_VERSION_MAJOR 1) set(CIO_VERSION_MINOR 5) -set(CIO_VERSION_PATCH 3) +set(CIO_VERSION_PATCH 4) set(CIO_VERSION_STR "${CIO_VERSION_MAJOR}.${CIO_VERSION_MINOR}.${CIO_VERSION_PATCH}") # CFLAGS diff --git a/src/cio_file.c b/src/cio_file.c index 56ac160..9f1f613 100644 --- a/src/cio_file.c +++ b/src/cio_file.c @@ -1047,7 +1047,6 @@ int cio_file_write_metadata(struct cio_chunk *ch, char *buf, size_t size) char *cur_content_data; char *new_content_data; size_t new_size; - size_t content_av; size_t meta_av; struct cio_file *cf; @@ -1082,13 +1081,11 @@ int cio_file_write_metadata(struct cio_chunk *ch, char *buf, size_t size) * where we need to increase the memory map size, move the content area * bytes to a different position and write the metadata. * - * Calculate the available space in the content area. + * Check if resize is needed before calculating content_av to avoid + * unsigned underflow. We need: header + new_metadata + content_data <= alloc_size */ - content_av = cf->alloc_size - cf->data_size; - - /* If there is no enough space, increase the file size and it memory map */ - if (content_av < size) { - new_size = (size - meta_av) + cf->data_size + CIO_FILE_HEADER_MIN; + if (cf->alloc_size < CIO_FILE_HEADER_MIN + size + cf->data_size) { + new_size = CIO_FILE_HEADER_MIN + size + cf->data_size; ret = cio_file_resize(cf, new_size); @@ -1106,7 +1103,7 @@ int cio_file_write_metadata(struct cio_chunk *ch, char *buf, size_t size) /* set new position for the content data */ cur_content_data = cio_file_st_get_content(cf->map); new_content_data = meta + size; - memmove(new_content_data, cur_content_data, size); + memmove(new_content_data, cur_content_data, cf->data_size); /* copy new metadata */ memcpy(meta, buf, size); diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 7cee7e6..9ecc016 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -9,6 +9,7 @@ if(CIO_BACKEND_FILESYSTEM) set(UNIT_TESTS_FILES ${UNIT_TESTS_FILES} fs.c + metadata_update.c ) endif() @@ -35,7 +36,7 @@ foreach(source_file ${UNIT_TESTS_FILES}) add_test(${source_file_we} ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/${source_file_we}) endforeach() -# Perf tests for dev purposes: note these tests are not registered, they need to +# Perf tests for dev purposes: note these tests are not registered, they need to # be executed manually set(UNIT_PERF_TESTS fs_perf.c diff --git a/tests/fs.c b/tests/fs.c index a976f46..130921d 100644 --- a/tests/fs.c +++ b/tests/fs.c @@ -964,6 +964,143 @@ void test_legacy_failure() test_legacy_core(CIO_TRUE); } +/* + * Test case: Prevent unsigned underflow when writing large metadata to a + * chunk with small initial allocation. + * + * This test specifically validates the fix for the bug where calculating + * content_av = alloc_size - CIO_FILE_HEADER_MIN - size could underflow + * when size is large, causing the resize check to be skipped and leading + * to out-of-bounds writes. + * + * Scenario: + * - Create chunk with minimal initial size (e.g., ~100 bytes page-aligned) + * - Write small metadata (10 bytes) and small content (20 bytes) + * - Write large metadata (80 bytes) that would cause underflow: + * old code: content_av = 100 - 24 - 80 = -4 (wraps to huge unsigned) + * - Verify resize happens correctly and no buffer overrun occurs + */ +static void test_metadata_unsigned_underflow() +{ + int ret; + int err; + char *meta_buf; + int meta_len; + void *content_buf; + size_t content_size; + struct cio_ctx *ctx; + struct cio_chunk *chunk; + struct cio_stream *stream; + struct cio_options cio_opts; + + /* Test data */ + const char *small_meta = "small"; + const char *large_meta = "this-is-a-very-large-metadata-string-that-would-cause-unsigned-underflow-in-old-code"; + const char *content_data = "test-content"; + + /* Cleanup any existing test directory */ + cio_utils_recursive_delete(CIO_ENV); + + /* Initialize options */ + cio_options_init(&cio_opts); + cio_opts.root_path = CIO_ENV; + cio_opts.log_cb = log_cb; + cio_opts.log_level = CIO_LOG_INFO; + cio_opts.flags = CIO_CHECKSUM; + + /* Create context */ + ctx = cio_create(&cio_opts); + TEST_CHECK(ctx != NULL); + if (!ctx) { + printf("cannot create context\n"); + exit(1); + } + + /* Create stream */ + stream = cio_stream_create(ctx, "test_stream_underflow", CIO_STORE_FS); + TEST_CHECK(stream != NULL); + if (!stream) { + printf("cannot create stream\n"); + cio_destroy(ctx); + exit(1); + } + + /* Create chunk with minimal initial size (forces small alloc_size) */ + chunk = cio_chunk_open(ctx, stream, "test_chunk_underflow", CIO_OPEN, 100, &err); + TEST_CHECK(chunk != NULL); + if (!chunk) { + printf("cannot open chunk\n"); + cio_destroy(ctx); + exit(1); + } + + /* Step 1: Write small initial metadata */ + ret = cio_meta_write(chunk, (char *) small_meta, strlen(small_meta)); + TEST_CHECK(ret == CIO_OK); + + /* Step 2: Write some content data */ + ret = cio_chunk_write(chunk, content_data, strlen(content_data)); + TEST_CHECK(ret == CIO_OK); + + /* Verify initial state */ + ret = cio_meta_read(chunk, &meta_buf, &meta_len); + TEST_CHECK(ret == CIO_OK); + TEST_CHECK(meta_len == (int) strlen(small_meta)); + TEST_CHECK(memcmp(meta_buf, small_meta, strlen(small_meta)) == 0); + + ret = cio_chunk_get_content_copy(chunk, &content_buf, &content_size); + TEST_CHECK(ret == CIO_OK); + TEST_CHECK(content_size == strlen(content_data)); + TEST_CHECK(memcmp(content_buf, content_data, strlen(content_data)) == 0); + free(content_buf); + + /* Step 3: Write large metadata that would cause underflow in old code + * This is the critical test - the new metadata (80 bytes) is larger than + * what would fit without resize, and with a small initial alloc_size, + * the old calculation would have underflowed. + */ + ret = cio_meta_write(chunk, (char *) large_meta, strlen(large_meta)); + TEST_CHECK(ret == CIO_OK); + + /* Step 4: Verify metadata was written correctly */ + ret = cio_meta_read(chunk, &meta_buf, &meta_len); + TEST_CHECK(ret == CIO_OK); + TEST_CHECK(meta_len == (int) strlen(large_meta)); + TEST_CHECK(memcmp(meta_buf, large_meta, strlen(large_meta)) == 0); + + /* Step 5: Verify content data integrity - must be preserved */ + ret = cio_chunk_get_content_copy(chunk, &content_buf, &content_size); + TEST_CHECK(ret == CIO_OK); + TEST_CHECK(content_size == strlen(content_data)); + TEST_CHECK(memcmp(content_buf, content_data, strlen(content_data)) == 0); + + /* Step 6: Sync to disk to ensure persistence */ + ret = cio_chunk_sync(chunk); + TEST_CHECK(ret == CIO_OK); + + /* Step 7: Put chunk down and up again to test persistence */ + ret = cio_chunk_down(chunk); + TEST_CHECK(ret == CIO_OK); + + ret = cio_chunk_up(chunk); + TEST_CHECK(ret == CIO_OK); + + /* Step 8: Final validation after persistence cycle */ + ret = cio_meta_read(chunk, &meta_buf, &meta_len); + TEST_CHECK(ret == CIO_OK); + TEST_CHECK(meta_len == (int) strlen(large_meta)); + TEST_CHECK(memcmp(meta_buf, large_meta, strlen(large_meta)) == 0); + + ret = cio_chunk_get_content_copy(chunk, &content_buf, &content_size); + TEST_CHECK(ret == CIO_OK); + TEST_CHECK(content_size == strlen(content_data)); + TEST_CHECK(memcmp(content_buf, content_data, strlen(content_data)) == 0); + + /* Cleanup */ + free(content_buf); + cio_destroy(ctx); +} + TEST_LIST = { {"fs_write", test_fs_write}, {"fs_checksum", test_fs_checksum}, @@ -976,5 +1113,6 @@ TEST_LIST = { {"fs_deep_hierachy", test_deep_hierarchy}, {"legacy_success", test_legacy_success}, {"legacy_failure", test_legacy_failure}, + {"metadata_unsigned_underflow", test_metadata_unsigned_underflow}, { 0 } }; diff --git a/tests/metadata_update.c b/tests/metadata_update.c new file mode 100644 index 0000000..fc425f3 --- /dev/null +++ b/tests/metadata_update.c @@ -0,0 +1,288 @@ +/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ + +/* Chunk I/O + * ========= + * Copyright 2018-2019 Eduardo Silva + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "cio_tests_internal.h" + +#define CIO_ENV_META_TEST "/tmp/cio-metadata-update-test/" + +/* Logging callback */ +static int log_cb(struct cio_ctx *ctx, int level, const char *file, int line, + char *str) +{ + (void) ctx; + (void) level; + (void) file; + (void) line; + + printf("[cio-test-metadata] %s\n", str); + return 0; +} + +/* + * Test case: Validate that updating metadata after writing content data + * correctly moves the content data and preserves both metadata and content + * integrity. + * + * This test specifically validates the fix for the bug where memmove() + * was using metadata size instead of content data size when moving content + * after metadata update. + */ +static void test_metadata_update_with_content() +{ + int ret; + int err; + char *meta_buf; + int meta_len; + void *content_buf; + size_t content_size; + size_t expected_content_size; + struct cio_ctx *ctx; + struct cio_chunk *chunk; + struct cio_stream *stream; + struct cio_options cio_opts; + + /* Test data */ + const char *initial_meta = "initial-metadata"; + const char *updated_meta = "this-is-a-much-longer-metadata-string-that-will-require-content-to-be-moved"; + const char *content_data = "This is test content data that must be preserved when metadata is updated."; + const char *more_content = " Additional content appended after metadata update."; + + /* Expected final content */ + char *expected_content; + size_t expected_content_len; + + /* Cleanup any existing test directory */ + cio_utils_recursive_delete(CIO_ENV_META_TEST); + + /* Initialize options */ + cio_options_init(&cio_opts); + cio_opts.root_path = CIO_ENV_META_TEST; + cio_opts.log_cb = log_cb; + cio_opts.log_level = CIO_LOG_INFO; + cio_opts.flags = CIO_CHECKSUM; + + /* Create context */ + ctx = cio_create(&cio_opts); + TEST_CHECK(ctx != NULL); + if (!ctx) { + printf("cannot create context\n"); + exit(1); + } + + /* Create stream */ + stream = cio_stream_create(ctx, "test_stream", CIO_STORE_FS); + TEST_CHECK(stream != NULL); + if (!stream) { + printf("cannot create stream\n"); + cio_destroy(ctx); + exit(1); + } + + /* Create chunk */ + chunk = cio_chunk_open(ctx, stream, "test_chunk", CIO_OPEN, 1000, &err); + TEST_CHECK(chunk != NULL); + if (!chunk) { + printf("cannot open chunk\n"); + cio_destroy(ctx); + exit(1); + } + + /* Step 1: Write initial metadata */ + ret = cio_meta_write(chunk, (char *) initial_meta, strlen(initial_meta)); + TEST_CHECK(ret == CIO_OK); + + /* Step 2: Write some content data */ + ret = cio_chunk_write(chunk, content_data, strlen(content_data)); + TEST_CHECK(ret == CIO_OK); + + expected_content_size = strlen(content_data); + + /* Step 3: Update metadata to a larger size (this triggers content move) */ + /* This is the critical test case - when metadata grows, content data + * must be moved correctly using cf->data_size, not the metadata size */ + ret = cio_meta_write(chunk, (char *) updated_meta, strlen(updated_meta)); + TEST_CHECK(ret == CIO_OK); + + /* Step 4: Write more content after metadata update */ + ret = cio_chunk_write(chunk, more_content, strlen(more_content)); + TEST_CHECK(ret == CIO_OK); + + expected_content_size += strlen(more_content); + + /* Build expected content */ + expected_content_len = strlen(content_data) + strlen(more_content); + expected_content = malloc(expected_content_len + 1); + TEST_CHECK(expected_content != NULL); + if (!expected_content) { + cio_destroy(ctx); + exit(1); + } + memcpy(expected_content, content_data, strlen(content_data)); + memcpy(expected_content + strlen(content_data), more_content, strlen(more_content)); + expected_content[expected_content_len] = '\0'; + + /* Step 5: Sync to disk */ + ret = cio_chunk_sync(chunk); + TEST_CHECK(ret == CIO_OK); + + /* Step 6: Put chunk down */ + ret = cio_chunk_down(chunk); + TEST_CHECK(ret == CIO_OK); + + /* Verify chunk is down */ + ret = cio_chunk_is_up(chunk); + TEST_CHECK(ret == CIO_FALSE); + + /* Step 7: Put chunk up again */ + ret = cio_chunk_up(chunk); + TEST_CHECK(ret == CIO_OK); + + /* Verify chunk is up */ + ret = cio_chunk_is_up(chunk); + TEST_CHECK(ret == CIO_TRUE); + + /* Step 8: Validate metadata */ + ret = cio_meta_read(chunk, &meta_buf, &meta_len); + TEST_CHECK(ret == CIO_OK); + TEST_CHECK(meta_len == (int) strlen(updated_meta)); + TEST_CHECK(memcmp(meta_buf, updated_meta, strlen(updated_meta)) == 0); + + /* Step 9: Validate content data */ + ret = cio_chunk_get_content_copy(chunk, &content_buf, &content_size); + TEST_CHECK(ret == CIO_OK); + TEST_CHECK(content_size == expected_content_size); + TEST_CHECK(memcmp(content_buf, expected_content, expected_content_len) == 0); + + /* Cleanup */ + free(expected_content); + free(content_buf); + cio_destroy(ctx); +} + +/* + * Test case: Update metadata multiple times with varying sizes to ensure + * content data integrity is maintained throughout. + */ +static void test_metadata_multiple_updates() +{ + int ret; + int err; + char *meta_buf; + int meta_len; + void *content_buf; + size_t content_size; + struct cio_ctx *ctx; + struct cio_chunk *chunk; + struct cio_stream *stream; + struct cio_options cio_opts; + const char *test_strings[] = { + "small", + "medium-sized-metadata", + "very-long-metadata-string-that-exceeds-previous-sizes", + "tiny", + "another-medium-metadata-string" + }; + const char *content = "Test content that must remain intact"; + int i; + + /* Cleanup any existing test directory */ + cio_utils_recursive_delete(CIO_ENV_META_TEST); + + /* Initialize options */ + cio_options_init(&cio_opts); + cio_opts.root_path = CIO_ENV_META_TEST; + cio_opts.log_cb = log_cb; + cio_opts.log_level = CIO_LOG_INFO; + cio_opts.flags = CIO_CHECKSUM; + + /* Create context */ + ctx = cio_create(&cio_opts); + TEST_CHECK(ctx != NULL); + + /* Create stream */ + stream = cio_stream_create(ctx, "test_stream", CIO_STORE_FS); + TEST_CHECK(stream != NULL); + + /* Create chunk */ + chunk = cio_chunk_open(ctx, stream, "test_chunk2", CIO_OPEN, 1000, &err); + TEST_CHECK(chunk != NULL); + + /* Write initial content */ + ret = cio_chunk_write(chunk, content, strlen(content)); + TEST_CHECK(ret == CIO_OK); + + /* Update metadata multiple times with different sizes */ + for (i = 0; i < 5; i++) { + ret = cio_meta_write(chunk, (char *) test_strings[i], strlen(test_strings[i])); + TEST_CHECK(ret == CIO_OK); + + /* Verify metadata after each update */ + ret = cio_meta_read(chunk, &meta_buf, &meta_len); + TEST_CHECK(ret == CIO_OK); + TEST_CHECK(meta_len == (int) strlen(test_strings[i])); + TEST_CHECK(memcmp(meta_buf, test_strings[i], strlen(test_strings[i])) == 0); + + /* Verify content remains intact */ + ret = cio_chunk_get_content_copy(chunk, &content_buf, &content_size); + TEST_CHECK(ret == CIO_OK); + TEST_CHECK(content_size == strlen(content)); + TEST_CHECK(memcmp(content_buf, content, strlen(content)) == 0); + free(content_buf); + } + + /* Sync and test persistence */ + ret = cio_chunk_sync(chunk); + TEST_CHECK(ret == CIO_OK); + + ret = cio_chunk_down(chunk); + TEST_CHECK(ret == CIO_OK); + + ret = cio_chunk_up(chunk); + TEST_CHECK(ret == CIO_OK); + + /* Final validation after up/down cycle */ + ret = cio_meta_read(chunk, &meta_buf, &meta_len); + TEST_CHECK(ret == CIO_OK); + TEST_CHECK(meta_len == (int) strlen(test_strings[4])); + TEST_CHECK(memcmp(meta_buf, test_strings[4], strlen(test_strings[4])) == 0); + + ret = cio_chunk_get_content_copy(chunk, &content_buf, &content_size); + TEST_CHECK(ret == CIO_OK); + TEST_CHECK(content_size == strlen(content)); + TEST_CHECK(memcmp(content_buf, content, strlen(content)) == 0); + + /* Cleanup */ + free(content_buf); + cio_destroy(ctx); +} + +TEST_LIST = { + {"metadata_update_with_content", test_metadata_update_with_content}, + {"metadata_multiple_updates", test_metadata_multiple_updates}, + { 0 } +};