From 48bb991a8d34e478a1a567a9dd199dbf2060a8ec Mon Sep 17 00:00:00 2001 From: Jim Huang Date: Thu, 29 Jan 2026 02:27:43 +0800 Subject: [PATCH] Replace first-fit memory allocator with TLSF This implements Two-Level Segregated Fit (TLSF) allocator optimized for real-time scenarios: - O(1) malloc/free via bitmap-based segregated free lists - Branchless clz/ctz using mask-based shifts for RV32I compatibility - Immediate coalescing on free() to minimize fragmentation - Configurable FL_INDEX_MAX via CONFIG_TLSF_FL_INDEX_MAX - Bounds validation in block_remove/block_insert with panic on corruption - Sentinel placement validation in mo_heap_init - Block header sanity checks in free() to detect double-free Test coverage: - Block coalescing verification - Realloc in-place growth - Block splitting logic - Heap exhaustion handling - FL/SL boundary allocations - Alignment verification - Bitmap stress tests --- app/test_libc.c | 511 ++++++++++++++++++++++++++- config.h | 26 ++ include/private/utils.h | 11 + lib/malloc.c | 758 +++++++++++++++++++++++++++++----------- 4 files changed, 1099 insertions(+), 207 deletions(-) diff --git a/app/test_libc.c b/app/test_libc.c index bf8e34ca..cf61e53c 100644 --- a/app/test_libc.c +++ b/app/test_libc.c @@ -5,10 +5,10 @@ * * C99 semantics, truncation behavior, ISR safety * * Format specifiers: %s, %d, %u, %x, %p, %c, %% * * Edge cases: size=0, size=1, truncation, null termination + * - Memory allocation: malloc, free, realloc * * Future Tests (Planned): * - String functions: strlen, strcmp, strcpy, strncpy, memcpy, memset - * - Memory allocation: malloc, free, realloc * - Character classification: isdigit, isalpha, isspace, etc. */ @@ -298,11 +298,500 @@ void test_mixed_formats(void) ASSERT_TEST(buf[test_strlen(buf)] == '\0', "Mixed format null termination"); } +/* Memory Allocator Tests (malloc/free/calloc/realloc) */ + +/* Alignment check helper */ +static inline bool is_aligned(void *ptr, size_t align) +{ + return ((uintptr_t) ptr & (align - 1)) == 0; +} + +/* Test: Basic allocation and deallocation */ +void test_malloc_basic(void) +{ + void *p1 = malloc(64); + ASSERT_TEST(p1, "malloc(64) returns non-NULL"); + + void *p2 = malloc(128); + ASSERT_TEST(p2, "malloc(128) returns non-NULL"); + + /* Pointers should be different */ + ASSERT_TEST(p1 != p2, "Different allocations return different pointers"); + + /* Check 4-byte alignment (RISC-V requirement) */ + ASSERT_TEST(is_aligned(p1, 4), "malloc(64) is 4-byte aligned"); + ASSERT_TEST(is_aligned(p2, 4), "malloc(128) is 4-byte aligned"); + + /* Write and read back to verify memory is usable */ + *(uint32_t *) p1 = 0xDEADBEEF; + *(uint32_t *) p2 = 0xCAFEBABE; + ASSERT_TEST(*(uint32_t *) p1 == 0xDEADBEEF, "Write/read verification (p1)"); + ASSERT_TEST(*(uint32_t *) p2 == 0xCAFEBABE, "Write/read verification (p2)"); + + free(p1); + free(p2); +} + +/* Test: Multiple allocations and frees */ +void test_malloc_multiple(void) +{ + void *ptrs[10] = {NULL}; + bool all_valid = true; + + /* Allocate 10 blocks */ + for (int i = 0; i < 10; i++) { + ptrs[i] = malloc(32); + if (!ptrs[i]) { + all_valid = false; + break; + } + *(int *) ptrs[i] = i * 100; + } + ASSERT_TEST(all_valid, "Allocate 10 blocks of 32 bytes"); + + /* Verify all values are preserved (only if all allocations succeeded) */ + bool values_ok = all_valid; + for (int i = 0; i < 10 && values_ok; i++) { + if (!ptrs[i] || *(int *) ptrs[i] != i * 100) + values_ok = false; + } + ASSERT_TEST(values_ok, "All block values preserved"); + + /* Free all blocks */ + for (int i = 0; i < 10; i++) + free(ptrs[i]); +} + +/* Test: Calloc overflow protection */ +void test_calloc_overflow(void) +{ + /* These should return NULL due to overflow */ + void *p1 = calloc(0x10000, 0x10000); + ASSERT_TEST(!p1, "calloc overflow (0x10000 * 0x10000)"); + + void *p2 = calloc(0x7FFFFFFF, 2); + ASSERT_TEST(!p2, "calloc overflow (0x7FFFFFFF * 2)"); + + /* Valid calloc should work */ + void *p3 = calloc(10, 10); + ASSERT_TEST(p3, "Valid calloc(10, 10)"); + + /* Verify zero-initialization */ + if (p3) { + uint8_t *bytes = (uint8_t *) p3; + int all_zero = 1; + for (int i = 0; i < 100; i++) { + if (bytes[i] != 0) { + all_zero = 0; + break; + } + } + ASSERT_TEST(all_zero, "calloc zero-initialization"); + free(p3); + } +} + +/* Test: Realloc edge cases */ +void test_realloc_cases(void) +{ + /* realloc(NULL, size) == malloc(size) */ + void *p1 = realloc(NULL, 64); + ASSERT_TEST(p1, "realloc(NULL, size) == malloc"); + if (!p1) + return; /* Cannot continue without initial allocation */ + + /* Write data for shrink/grow tests */ + uint32_t *data = (uint32_t *) p1; + for (int i = 0; i < 16; i++) + data[i] = i * 10; + + /* realloc shrink */ + void *p2 = realloc(p1, 32); + ASSERT_TEST(p2, "realloc shrink"); + if (!p2) + return; + + /* Verify data preserved in shrunk region */ + data = (uint32_t *) p2; + bool shrink_ok = true; + for (int i = 0; i < 8; i++) { + if (data[i] != (uint32_t) (i * 10)) { + shrink_ok = false; + break; + } + } + ASSERT_TEST(shrink_ok, "realloc shrink preserves data"); + + /* realloc grow */ + void *p3 = realloc(p2, 128); + ASSERT_TEST(p3, "realloc grow"); + if (!p3) + return; + + /* Verify data preserved after grow (first 8 uint32_t from shrunk region) */ + data = (uint32_t *) p3; + bool grow_ok = true; + for (int i = 0; i < 8; i++) { + if (data[i] != (uint32_t) (i * 10)) { + grow_ok = false; + break; + } + } + ASSERT_TEST(grow_ok, "realloc grow preserves data"); + + /* realloc(ptr, 0) == free(ptr), returns NULL */ + void *p4 = realloc(p3, 0); + ASSERT_TEST(!p4, "realloc(ptr, 0) returns NULL"); +} + +/* Test: Boundary conditions */ +void test_malloc_boundaries(void) +{ + /* Zero size should return NULL */ + void *p1 = malloc(0); + ASSERT_TEST(!p1, "malloc(0) returns NULL"); + + /* Minimum allocation size */ + void *p2 = malloc(1); + ASSERT_TEST(p2, "malloc(1) succeeds (minimum size)"); + ASSERT_TEST(is_aligned(p2, 4), "malloc(1) is properly aligned"); + free(p2); + + /* Power-of-2 sizes */ + void *p3 = malloc(16); + void *p4 = malloc(256); + void *p5 = malloc(1024); + ASSERT_TEST(p3 && p4 && p5, "Power-of-2 allocations"); + free(p3); + free(p4); + free(p5); +} + +/* Test: Free NULL (should be no-op) */ +void test_free_null(void) +{ + free(NULL); + free(NULL); + free(NULL); + ASSERT_TEST(1, "free(NULL) is no-op"); +} + +/* Test: Allocation/free cycles (stress test) */ +void test_malloc_stress(void) +{ + int success_count = 0; + const int iterations = 50; + + for (int i = 0; i < iterations; i++) { + void *p = malloc(32 + (i % 64)); + if (p) { + *(int *) p = i; + if (*(int *) p == i) + success_count++; + free(p); + } + } + ASSERT_TEST(success_count == iterations, "Alloc/free cycle stress test"); +} + +/* Test: Coalescing verification - verify adjacent free blocks merge */ +void test_malloc_coalesce(void) +{ + /* Allocate three adjacent blocks */ + void *a = malloc(64); + void *b = malloc(64); + void *c = malloc(64); + ASSERT_TEST(a && b && c, "Coalesce: allocate A, B, C"); + + /* Mark them for identification */ + *(uint32_t *) a = 0xAAAAAAAA; + *(uint32_t *) b = 0xBBBBBBBB; + *(uint32_t *) c = 0xCCCCCCCC; + + /* Free A and C, creating gaps (B remains allocated) */ + free(a); + free(c); + + /* Free B - should trigger coalescing of A+B+C into one large block */ + free(b); + + /* Now allocate a block that requires merged space. + * If coalescing works, this should succeed and potentially + * return the same address as A (start of merged region). + */ + void *large = malloc(64 * 3); /* Needs space of A+B+C combined */ + ASSERT_TEST(large, "Coalesce: large alloc after merge"); + free(large); +} + +/* Test: Realloc in-place growth - verify realloc expands without copy */ +void test_realloc_inplace(void) +{ + /* Allocate initial block */ + void *p1 = malloc(64); + ASSERT_TEST(p1, "Realloc in-place: initial alloc"); + + /* Write marker data */ + uint32_t *data = (uint32_t *) p1; + for (int i = 0; i < 16; i++) + data[i] = 0xDEAD0000 | i; + + /* Allocate adjacent block then free it to create expansion room */ + void *p2 = malloc(64); + ASSERT_TEST(p2, "Realloc in-place: adjacent alloc"); + free(p2); + + /* Realloc to grow - should expand in-place if TLSF coalesces correctly */ + void *p3 = realloc(p1, 128); + ASSERT_TEST(p3, "Realloc in-place: grow succeeds"); + + /* Verify original data preserved */ + data = (uint32_t *) p3; + int data_ok = 1; + for (int i = 0; i < 16; i++) { + if (data[i] != (0xDEAD0000 | i)) { + data_ok = 0; + break; + } + } + ASSERT_TEST(data_ok, "Realloc in-place: data preserved"); + + /* Check if realloc returned same pointer (in-place) or moved */ + ASSERT_TEST(p3, "Realloc in-place: pointer valid"); + + free(p3); +} + +/* Test: Block splitting - verify large blocks split correctly */ +void test_malloc_split(void) +{ + /* Allocate a large block */ + void *large = malloc(256); + ASSERT_TEST(large, "Split: large block alloc"); + + /* Free it to return to free list */ + free(large); + + /* Allocate a small block - should split the large block */ + void *small1 = malloc(32); + ASSERT_TEST(small1, "Split: small alloc from large"); + + /* Allocate another small block - should use remainder from split */ + void *small2 = malloc(32); + ASSERT_TEST(small2, "Split: second small alloc"); + + /* Both should be different */ + ASSERT_TEST(small1 != small2, "Split: different addresses"); + + /* Verify memory is usable */ + *(uint32_t *) small1 = 0x11111111; + *(uint32_t *) small2 = 0x22222222; + ASSERT_TEST(*(uint32_t *) small1 == 0x11111111, "Split: small1 usable"); + ASSERT_TEST(*(uint32_t *) small2 == 0x22222222, "Split: small2 usable"); + + free(small1); + free(small2); +} + +/* Test: Heap exhaustion - verify graceful failure when heap is full */ +void test_malloc_exhaustion(void) +{ + void *ptrs[1000]; + int count = 0; + + /* Allocate until failure or reasonable limit */ + for (int i = 0; i < 1000; i++) { + ptrs[i] = malloc(1024 * 64); /* 64KB blocks */ + if (!ptrs[i]) + break; + count++; + } + + /* Should have allocated at least some blocks */ + ASSERT_TEST(count > 0, "Exhaustion: some allocations succeeded"); + + /* Should eventually return NULL (not crash) */ + void *overflow = malloc(1024 * 1024 * 100); /* 100MB - should fail */ + ASSERT_TEST(!overflow, "Exhaustion: large alloc returns NULL"); + + /* Free all allocated blocks */ + for (int i = 0; i < count; i++) + free(ptrs[i]); + + /* After freeing, should be able to allocate again */ + void *recovered = malloc(1024); + ASSERT_TEST(recovered, "Exhaustion: recovery after free"); + free(recovered); +} + +/* Test: FL/SL boundary allocations - verify power-of-2 size transitions */ +void test_malloc_fl_boundaries(void) +{ + /* Test allocations at FL transition points (power-of-2 boundaries). + * TLSF uses first-level index based on log2(size), so these sizes + * exercise the FL/SL mapping logic at boundary conditions. + */ + void *ptrs[12] = {NULL}; /* Initialize to avoid UB on early exit */ + size_t sizes[] = { + 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384, 32768, + }; + bool all_valid = true; + + /* Allocate at each FL boundary */ + for (int i = 0; i < 12; i++) { + ptrs[i] = malloc(sizes[i]); + if (!ptrs[i]) { + all_valid = false; + break; + } + /* Write pattern to verify memory is usable */ + *(uint32_t *) ptrs[i] = 0xF1000000 | sizes[i]; + } + ASSERT_TEST(all_valid, "FL boundary: all power-of-2 allocs succeed"); + + /* Verify all patterns */ + int patterns_ok = 1; + for (int i = 0; i < 12 && ptrs[i]; i++) { + if (*(uint32_t *) ptrs[i] != (0xF1000000 | sizes[i])) { + patterns_ok = 0; + break; + } + } + ASSERT_TEST(patterns_ok, "FL boundary: patterns preserved"); + + /* Test sizes just above power-of-2 (exercises SL subdivision) */ + void *above1 = malloc(65); /* Just above 64 */ + void *above2 = malloc(129); /* Just above 128 */ + void *above3 = malloc(257); /* Just above 256 */ + ASSERT_TEST(above1 && above2 && above3, + "FL boundary: above-power-of-2 allocs"); + + /* Test sizes just below power-of-2 */ + void *below1 = malloc(63); /* Just below 64 */ + void *below2 = malloc(127); /* Just below 128 */ + void *below3 = malloc(255); /* Just below 256 */ + ASSERT_TEST(below1 && below2 && below3, + "FL boundary: below-power-of-2 allocs"); + + /* Free all */ + for (int i = 0; i < 12; i++) { + if (ptrs[i]) + free(ptrs[i]); + } + free(above1); + free(above2); + free(above3); + free(below1); + free(below2); + free(below3); +} + +/* Test: Alignment verification for various sizes */ +void test_malloc_alignment(void) +{ + /* All allocations must be 4-byte aligned for RISC-V. + * Test various sizes to ensure alignment is maintained. + */ + size_t sizes[] = { + 1, 2, 3, 5, 7, 9, 15, 17, 31, 33, 63, 65, 100, 1000, + }; + int all_aligned = 1; + + for (int i = 0; i < 14; i++) { + void *p = malloc(sizes[i]); + if (p) { + if (!is_aligned(p, 4)) { + all_aligned = 0; + free(p); + break; + } + /* Also verify 4-byte write works */ + *(uint32_t *) p = 0xDEADBEEF; + if (*(uint32_t *) p != 0xDEADBEEF) { + all_aligned = 0; + free(p); + break; + } + free(p); + } else { + all_aligned = 0; + break; + } + } + ASSERT_TEST(all_aligned, "Alignment: all sizes 4-byte aligned"); +} + +/* Test: Rapid alloc/free patterns to stress bitmap updates */ +void test_malloc_bitmap_stress(void) +{ + /* Alternating allocation pattern that exercises bitmap set/clear. + * This pattern can expose race conditions in bitmap updates. + */ + void *small, *large; + int success = 1; + + for (int i = 0; i < 100; i++) { + small = malloc(32); + large = malloc(1024); + if (!small || !large) { + if (small) + free(small); + if (large) + free(large); + success = 0; + break; + } + free(small); + free(large); + } + ASSERT_TEST(success, "Bitmap stress: alternating alloc/free"); + + /* Interleaved pattern: alloc small, alloc large, free large, free small */ + success = 1; /* Reset for next test */ + for (int i = 0; i < 100; i++) { + small = malloc(32); + large = malloc(1024); + if (!small || !large) { + free(small); + free(large); + success = 0; + break; + } + free(large); + free(small); + } + ASSERT_TEST(success, "Bitmap stress: interleaved free order"); + + /* Multiple size classes simultaneously */ + void *ptrs[8] = {NULL}; /* Initialize to avoid UB */ + size_t sizes[] = {16, 32, 64, 128, 256, 512, 1024, 2048}; + success = 1; /* Reset for next test */ + for (int iter = 0; iter < 50 && success; iter++) { + /* Clear pointers at start of each iteration */ + for (int i = 0; i < 8; i++) + ptrs[i] = NULL; + + for (int i = 0; i < 8; i++) { + ptrs[i] = malloc(sizes[i]); + if (!ptrs[i]) { + success = 0; + break; + } + } + /* Free all allocated (including partial on failure) */ + for (int i = 0; i < 8; i++) { + if (ptrs[i]) + free(ptrs[i]); + } + } + ASSERT_TEST(success, "Bitmap stress: multi-size simultaneous"); +} + void test_runner(void) { printf("\n=== LibC Test Suite ===\n"); - printf("Testing: vsnprintf/snprintf\n\n"); + printf("\n--- Testing: vsnprintf/snprintf ---\n"); test_basic_functionality(); test_size_zero(); test_size_one(); @@ -314,6 +803,24 @@ void test_runner(void) test_isr_safety(); test_mixed_formats(); + printf("\n--- Testing: malloc/free/calloc/realloc ---\n"); + test_malloc_basic(); + test_malloc_multiple(); + test_calloc_overflow(); + test_realloc_cases(); + test_malloc_boundaries(); + test_free_null(); + test_malloc_stress(); + + printf("\n--- Testing: TLSF allocator internals ---\n"); + test_malloc_coalesce(); + test_realloc_inplace(); + test_malloc_split(); + test_malloc_exhaustion(); + test_malloc_fl_boundaries(); + test_malloc_alignment(); + test_malloc_bitmap_stress(); + printf("\n=== Test Summary ===\n"); printf("Tests run: %d\n", tests_run); printf("Tests passed: %d\n", tests_passed); diff --git a/config.h b/config.h index 24547a3b..6667b8d1 100644 --- a/config.h +++ b/config.h @@ -4,3 +4,29 @@ #ifndef CONFIG_STACK_PROTECTION #define CONFIG_STACK_PROTECTION 1 /* Default: enabled for safety */ #endif + +/* TLSF Memory Allocator Configuration + * + * FL_INDEX_MAX controls the maximum allocation size supported by the TLSF + * allocator. The maximum allocatable size is approximately 2^FL_INDEX_MAX + * bytes. + * + * Memory overhead scales with FL_INDEX_MAX: + * - Each FL level adds: 4 bytes (sl_bitmap) + 16 bytes (4 block pointers) + * - Total control structure: ~20 * (FL_INDEX_MAX - 3) bytes + * + * Recommended values: + * FL_INDEX_MAX | Max Alloc Size | Use Case + * -------------|----------------|----------------------------- + * 24 | 16 MB | Small embedded (< 16MB heap) + * 28 | 256 MB | Medium embedded (default) + * 30 | 1 GB | Large systems (< 1GB heap) + * 31 | 2 GB | Full 32-bit address space + * + * WARNING: If your heap size exceeds 2^FL_INDEX_MAX, large allocations may + * fail unexpectedly. The allocator clamps oversized requests to the largest + * bucket, which may contain blocks smaller than requested. + */ +#ifndef CONFIG_TLSF_FL_INDEX_MAX +#define CONFIG_TLSF_FL_INDEX_MAX 28 +#endif diff --git a/include/private/utils.h b/include/private/utils.h index 9d9edd2f..4286f3d0 100644 --- a/include/private/utils.h +++ b/include/private/utils.h @@ -30,6 +30,17 @@ */ #define ALIGN4(x) ((((uint32_t) (x) + 3u) >> 2) << 2) +/* Fast range check using bit operations. + * Checks if x is in range [minx, minx + size - 1]. + * Faster than (x >= minx && x < minx + size) due to fewer branches. + * For any variable range checking: + * if (x >= minx && x <= maxx) ... + * it is faster to use bit operation: + * if ((signed)((x - minx) | (maxx - x)) >= 0) ... + */ +#define RANGE_CHECK(x, minx, size) \ + ((int32_t) (((x) - (minx)) | ((minx) + (size) - 1 - (x))) >= 0) + /* Power-of-2 Utility Functions * * Efficient bit manipulation functions for power-of-2 operations, diff --git a/lib/malloc.c b/lib/malloc.c index 3e9a7494..c948214e 100644 --- a/lib/malloc.c +++ b/lib/malloc.c @@ -1,258 +1,533 @@ -/* libc: memory allocation. */ +/* libc: TLSF-inspired memory allocator with O(1) allocation/deallocation. + * + * Two-Level Segregated Fit (TLSF) allocator optimized for RTOS with: + * - O(1) malloc() and free() via bitmap-based segregated free lists + * - Bounded interrupt disable duration (no linear searches) + * - Immediate coalescing on free() to minimize fragmentation + * - Low memory overhead (~8 bytes per block) + * + * Performance characteristics: + * - malloc(): O(1) - bitmap lookup + list pop + * - free(): O(1) - coalesce + list insert + * - realloc(): O(1) best case (in-place), O(n) worst case (copy) + * + * Reference: + * M. Masmano, I. Ripoll, A. Crespo, and J. Real. + * TLSF: a new dynamic memory allocator for real-time systems. + * In Proc. ECRTS (2004), IEEE Computer Society, pp. 79-86. + */ #include +#include #include #include +#include "../config.h" #include "private/error.h" #include "private/utils.h" -/* Memory allocator using first-fit strategy with selective coalescing. +/* TLSF Configuration * - * Performance characteristics: - * - malloc(): O(n) worst case; searches linearly from heap start; coalesces - * free blocks when fragmentation threshold is reached. - * - free(): O(1) average case; marks memory areas as unused with immediate - * forward coalescing and optional backward coalescing. + * Memory is organized into segregated free lists indexed by two levels: + * - First Level (FL): log2(size) - power-of-2 size classes + * - Second Level (SL): 4 subdivisions within each FL class * - * This implementation prioritizes fast allocation/deallocation with proper - * fragmentation management to minimize memory waste. + * This provides O(1) block lookup via bitmap operations. + */ + +/* Alignment must be at least 4 bytes for RISC-V */ +#define ALIGN_SIZE 4 +#define ALIGN_SIZE_LOG2 2 + +/* Second level index: 4 subdivisions per first-level class */ +#define SL_INDEX_COUNT 4 +#define SL_INDEX_COUNT_LOG2 2 + +/* First level index shift: log2(SL_INDEX_COUNT * ALIGN_SIZE) = 4 */ +#define FL_INDEX_SHIFT (SL_INDEX_COUNT_LOG2 + ALIGN_SIZE_LOG2) + +/* First level index count: configurable via CONFIG_TLSF_FL_INDEX_MAX + * See config.h for configuration options and memory overhead tradeoffs. */ +#define FL_INDEX_MAX CONFIG_TLSF_FL_INDEX_MAX +#define FL_INDEX_COUNT (FL_INDEX_MAX - FL_INDEX_SHIFT + 1) -typedef struct __memblock { - struct __memblock *next; /* pointer to the next block */ - size_t size; /* block size, LSB = used flag */ -} memblock_t; +/* Small block threshold: blocks smaller than this use direct indexing */ +#define SMALL_BLOCK_SIZE (1 << FL_INDEX_SHIFT) -static memblock_t *first_free; +/* Block header flags (stored in low bits of size field) */ +#define BLOCK_BIT_FREE (1 << 0) +#define BLOCK_BIT_PREV_FREE (1 << 1) + +/* Mask for extracting actual size from header */ +#define BLOCK_SIZE_MASK (~(BLOCK_BIT_FREE | BLOCK_BIT_PREV_FREE)) + +/* Overhead: prev_phys + header fields (8 bytes on RV32) */ +#define BLOCK_HEADER_OVERHEAD (sizeof(block_t *) + sizeof(size_t)) + +/* Minimum block size: space for free list pointers when block is freed */ +#define BLOCK_SIZE_MIN (sizeof(block_t *) + sizeof(block_t *)) + +/* Block structure - 16 bytes when free, 8 bytes overhead when allocated */ +typedef struct block { + /* Previous physical block (for backward coalescing) */ + struct block *prev_phys; + /* Size + flags: bit 0 = free, bit 1 = prev_free */ + size_t header; + /* Free list links - only valid when block is free */ + struct block *next_free, *prev_free; +} block_t; + +/* TLSF control structure */ +typedef struct { + /* Null block for list termination */ + block_t block_null; + + /* First-level bitmap: bit i set if fl_bitmap[i] has free blocks */ + uint32_t fl_bitmap; + + /* Second-level bitmaps: bit j set if blocks[i][j] has free blocks */ + uint32_t sl_bitmap[FL_INDEX_COUNT]; + + /* Free block lists indexed by [fl][sl] */ + block_t *blocks[FL_INDEX_COUNT][SL_INDEX_COUNT]; +} tlsf_t; + +/* Global TLSF control structure */ +static tlsf_t tlsf_control; static void *heap_start, *heap_end; -static uint32_t free_blocks_count; /* track fragmentation */ -/* Block manipulation macros */ -#define IS_USED(b) ((b)->size & 1L) -#define GET_SIZE(b) ((b)->size & ~1L) -#define MARK_USED(b) ((b)->size |= 1L) -#define MARK_FREE(b) ((b)->size &= ~1L) +/* Bit manipulation helpers - branchless binary search + * + * These use mask-based shifts to avoid conditional branches, compiling to + * sltu/andi/srl sequences on RV32I. This provides predictable timing for + * real-time systems without: + * - Multiplication (heavy on RV32I without M extension) + * - libgcc dependencies (__clzsi2, __ctzsi2) + * - Branch misprediction penalties + */ -/* Memory layout validation */ -#define IS_VALID_BLOCK(b) \ - ((void *) (b) >= heap_start && (void *) (b) < heap_end && \ - (size_t) (b) % sizeof(size_t) == 0) +/* Find last set bit (1-indexed), 0 if no bits set */ +static inline int fls(uint32_t x) +{ + if (!x) + return 0; + + int r = 1; + uint32_t m; + + m = !!(x & 0xFFFF0000u); + r += (int) (m << 4); + x >>= (m << 4); + m = !!(x & 0x0000FF00u); + r += (int) (m << 3); + x >>= (m << 3); + m = !!(x & 0x000000F0u); + r += (int) (m << 2); + x >>= (m << 2); + m = !!(x & 0x0000000Cu); + r += (int) (m << 1); + x >>= (m << 1); + r += (int) !!(x & 0x00000002u); + + return r; +} -/* Fragmentation threshold - coalesce when free blocks exceed this ratio */ -#define COALESCE_THRESHOLD 8 +/* Find first set bit (0-indexed), -1 if no bits set */ +static inline int ffs_bit(uint32_t x) +{ + if (!x) + return -1; + + int r = 0; + uint32_t m; + + m = ((x & 0x0000FFFFu) == 0); + r += (int) (m << 4); + x >>= (m << 4); + m = ((x & 0x000000FFu) == 0); + r += (int) (m << 3); + x >>= (m << 3); + m = ((x & 0x0000000Fu) == 0); + r += (int) (m << 2); + x >>= (m << 2); + m = ((x & 0x00000003u) == 0); + r += (int) (m << 1); + x >>= (m << 1); + r += (int) ((x & 0x00000001u) == 0); + + return r; +} -/* Validate block integrity */ -static inline bool validate_block(memblock_t *block) +/* Block accessor macros */ +#define block_size(b) ((b)->header & BLOCK_SIZE_MASK) +#define block_is_free(b) ((b)->header & BLOCK_BIT_FREE) +#define block_is_prev_free(b) ((b)->header & BLOCK_BIT_PREV_FREE) +#define block_set_free(b) ((b)->header |= BLOCK_BIT_FREE) +#define block_set_used(b) ((b)->header &= ~BLOCK_BIT_FREE) +#define block_set_prev_free(b) ((b)->header |= BLOCK_BIT_PREV_FREE) +#define block_set_prev_used(b) ((b)->header &= ~BLOCK_BIT_PREV_FREE) + +/* Get pointer to block from user pointer. + * User pointer points to next_free field (offset = prev_phys + header = 8 + * bytes) + */ +static inline block_t *block_from_ptr(const void *ptr) { - if (unlikely(!IS_VALID_BLOCK(block))) - return false; + return (block_t *) ((char *) ptr - sizeof(block_t *) - sizeof(size_t)); +} - size_t size = GET_SIZE(block); - if (unlikely(!size || size > MALLOC_MAX_SIZE)) - return false; +/* Get user pointer from block */ +static inline void *block_to_ptr(const block_t *block) +{ + return (void *) &block->next_free; +} - /* Check if block extends beyond heap */ - if (unlikely((uint8_t *) block + sizeof(memblock_t) + size > - (uint8_t *) heap_end)) - return false; +/* Get next physical block */ +static inline block_t *block_next(const block_t *block) +{ + return (block_t *) ((char *) block_to_ptr(block) + block_size(block)); +} - if (unlikely(block->next && - (uint8_t *) block + sizeof(memblock_t) + GET_SIZE(block) != - (uint8_t *) block->next)) - return false; +/* Link next physical block's prev_phys pointer */ +static inline void block_link_next(block_t *block) +{ + block_t *next = block_next(block); + next->prev_phys = block; +} - return true; +/* Mark next physical block's prev_free flag */ +static inline void block_mark_as_free(block_t *block) +{ + block_t *next = block_next(block); + block_set_prev_free(next); } -/* O(1) with immediate forward coalescing, conditional backward coalescing */ -void free(void *ptr) +/* Clear next physical block's prev_free flag */ +static inline void block_mark_as_used(block_t *block) { - if (!ptr) - return; + block_t *next = block_next(block); + block_set_prev_used(next); +} - CRITICAL_ENTER(); +/* TLSF mapping functions - O(1) size to index conversion + * + * Maps a size to first-level and second-level indices: + * - fl = log2(size) - FL_INDEX_SHIFT + * - sl = (size >> (fl + FL_INDEX_SHIFT - SL_INDEX_COUNT_LOG2)) & (SL_COUNT-1) + */ - memblock_t *p = ((memblock_t *) ptr) - 1; +static inline void mapping_insert(size_t size, int *fl, int *sl) +{ + if (size < SMALL_BLOCK_SIZE) { + /* Small blocks: fl=0, sl=size/ALIGN_SIZE */ + *fl = 0; + *sl = (int) size / ALIGN_SIZE; + } else { + int t = fls((uint32_t) size) - 1; + *sl = (int) (size >> (t - SL_INDEX_COUNT_LOG2)) ^ SL_INDEX_COUNT; + *fl = t - FL_INDEX_SHIFT + 1; + + /* Clamp to valid index range for very large sizes. + * This handles sizes > 2^FL_INDEX_MAX which would overflow the + * segregated list arrays. All such sizes map to the largest bucket. + */ + if (*fl >= FL_INDEX_COUNT) { + *fl = FL_INDEX_COUNT - 1; + *sl = SL_INDEX_COUNT - 1; + } + } +} - /* Validate the block being freed */ - if (unlikely(!validate_block(p) || !IS_USED(p))) { - CRITICAL_LEAVE(); - panic(ERR_HEAP_CORRUPT); - return; /* Invalid or double-free */ +/* Round up size for searching - ensures we find a block >= requested size */ +static inline void mapping_search(size_t size, int *fl, int *sl) +{ + if (size >= SMALL_BLOCK_SIZE) { + size_t round = + (1 << (fls((uint32_t) size) - 1 - SL_INDEX_COUNT_LOG2)) - 1; + size += round; } + mapping_insert(size, fl, sl); +} - MARK_FREE(p); - free_blocks_count++; +/* Free list management - O(1) insert and remove */ - /* Forward merge if the next block is free and physically adjacent */ - if (p->next && !IS_USED(p->next)) { - p->size = GET_SIZE(p) + sizeof(memblock_t) + GET_SIZE(p->next); - p->next = p->next->next; - free_blocks_count--; +/* Remove block from its free list */ +static inline void block_remove(tlsf_t *t, block_t *block) +{ + int fl, sl; + mapping_insert(block_size(block), &fl, &sl); + + /* Bounds check: invalid indices indicate heap corruption or code bug. + * Fail-fast with panic rather than silent return to aid debugging. + */ + if (unlikely(!RANGE_CHECK(fl, 0, FL_INDEX_COUNT) || + !RANGE_CHECK(sl, 0, SL_INDEX_COUNT))) { + panic(ERR_HEAP_CORRUPT); + return; } - /* Backward merge: optimized single-pass search with early termination */ - memblock_t *prev = NULL; - memblock_t *current = first_free; - while (current && current != p) { - prev = current; - current = current->next; - } + block_t *prev = block->prev_free; + block_t *next = block->next_free; - if (prev && !IS_USED(prev)) { - if (unlikely(!validate_block(prev))) { - CRITICAL_LEAVE(); - panic(ERR_HEAP_CORRUPT); - return; + next->prev_free = prev; + prev->next_free = next; + + /* Update bitmaps if list is now empty */ + if (t->blocks[fl][sl] == block) { + t->blocks[fl][sl] = (next == &t->block_null) ? NULL : next; + if (!t->blocks[fl][sl]) { + t->sl_bitmap[fl] &= ~(1U << sl); + if (!t->sl_bitmap[fl]) + t->fl_bitmap &= ~(1U << fl); } - prev->size = GET_SIZE(prev) + sizeof(memblock_t) + GET_SIZE(p); - prev->next = p->next; - free_blocks_count--; } +} - CRITICAL_LEAVE(); +/* Insert block into appropriate free list */ +static inline void block_insert(tlsf_t *t, block_t *block) +{ + int fl, sl; + mapping_insert(block_size(block), &fl, &sl); + + /* Bounds check: invalid indices indicate heap corruption or code bug. + * Fail-fast with panic rather than silent return to aid debugging. + */ + if (unlikely(!RANGE_CHECK(fl, 0, FL_INDEX_COUNT) || + !RANGE_CHECK(sl, 0, SL_INDEX_COUNT))) { + panic(ERR_HEAP_CORRUPT); + return; + } + + block_t *current = t->blocks[fl][sl]; + block->next_free = current ? current : &t->block_null; + block->prev_free = &t->block_null; + + if (current) + current->prev_free = block; + + t->blocks[fl][sl] = block; + t->fl_bitmap |= (1U << fl); + t->sl_bitmap[fl] |= (1U << sl); } -/* Selective coalescing: only when fragmentation becomes significant */ -static void selective_coalesce(void) +/* Block splitting and merging - O(1) operations */ + +/* Check if block can be split */ +static inline bool block_can_split(block_t *block, size_t size) { - memblock_t *p = first_free; + return block_size(block) >= size + BLOCK_SIZE_MIN + BLOCK_HEADER_OVERHEAD; +} - while (p && p->next) { - /* Merge only when blocks are FREE *and* adjacent in memory */ - if (unlikely(!validate_block(p))) { - panic(ERR_HEAP_CORRUPT); - return; - } - if (!IS_USED(p) && !IS_USED(p->next)) { - p->size = GET_SIZE(p) + sizeof(memblock_t) + GET_SIZE(p->next); - p->next = p->next->next; - free_blocks_count--; - } else { - p = p->next; - } - } +/* Split block, return remainder */ +static inline block_t *block_split(block_t *block, size_t size) +{ + block_t *remaining = (block_t *) ((char *) block_to_ptr(block) + size); + + size_t remain_size = block_size(block) - size - BLOCK_HEADER_OVERHEAD; + + remaining->header = remain_size; + remaining->prev_phys = block; + block->header = + size | (block->header & (BLOCK_BIT_FREE | BLOCK_BIT_PREV_FREE)); + + block_link_next(remaining); + + return remaining; } -static inline void split_block(memblock_t *block, size_t size) +/* Absorb next block into current block */ +static inline block_t *block_absorb(block_t *prev, block_t *block) { - size_t remaining; - memblock_t *new_block; + size_t new_size = + block_size(prev) + block_size(block) + BLOCK_HEADER_OVERHEAD; - if (unlikely(size > GET_SIZE(block))) { + /* Bounds validation: merged size must not exceed heap bounds. + * A corrupted block header could cause arbitrarily large sizes. + */ + if (unlikely((char *) block_to_ptr(prev) + new_size > (char *) heap_end)) { panic(ERR_HEAP_CORRUPT); - return; + return prev; /* Unreachable after panic, but satisfies return type */ } - remaining = GET_SIZE(block) - size; - /* Split only when remaining memory is large enough */ - if (remaining < sizeof(memblock_t) + MALLOC_MIN_SIZE) - return; - new_block = (memblock_t *) ((size_t) block + sizeof(memblock_t) + size); - new_block->next = block->next; - new_block->size = remaining - sizeof(memblock_t); - MARK_FREE(new_block); - block->next = new_block; - block->size = size | IS_USED(block); - free_blocks_count++; /* New free block created */ + + prev->header = + new_size | (prev->header & (BLOCK_BIT_FREE | BLOCK_BIT_PREV_FREE)); + block_link_next(prev); + return prev; } -/* O(n) first-fit allocation with selective coalescing */ -void *malloc(uint32_t size) +/* Merge with previous physical block if free */ +static inline block_t *block_merge_prev(tlsf_t *t, block_t *block) { - /* Input validation */ - if (unlikely(!size || size > MALLOC_MAX_SIZE)) - return NULL; + if (block_is_prev_free(block)) { + block_t *prev = block->prev_phys; + block_remove(t, prev); + block = block_absorb(prev, block); + } + return block; +} - size = ALIGN4(size); +/* Merge with next physical block if free */ +static inline block_t *block_merge_next(tlsf_t *t, block_t *block) +{ + block_t *next = block_next(block); + if (block_is_free(next)) { + block_remove(t, next); + block = block_absorb(block, next); + } + return block; +} - /* Ensure minimum allocation size */ - if (size < MALLOC_MIN_SIZE) - size = MALLOC_MIN_SIZE; +/* Block location - O(1) via bitmap search */ - CRITICAL_ENTER(); +/* Find suitable free block for requested size */ +static inline block_t *block_find_free(tlsf_t *t, size_t size) +{ + int fl, sl; + mapping_search(size, &fl, &sl); - /* Trigger coalescing only when fragmentation is high */ - if (free_blocks_count > COALESCE_THRESHOLD) - selective_coalesce(); + /* Defensive bounds check using fast bit-op range check */ + if (!RANGE_CHECK(fl, 0, FL_INDEX_COUNT) || + !RANGE_CHECK(sl, 0, SL_INDEX_COUNT)) + return NULL; - memblock_t *p = first_free; - while (p) { - if (unlikely(!validate_block(p))) { - CRITICAL_LEAVE(); - panic(ERR_HEAP_CORRUPT); - return NULL; /* Heap corruption detected */ - } + /* Search for block in same or larger size class */ + uint32_t sl_map = t->sl_bitmap[fl] & (~0U << sl); + if (!sl_map) { + /* No block in first level, search larger classes */ + uint32_t fl_map = t->fl_bitmap & (~0U << (fl + 1)); + if (!fl_map) + return NULL; /* No suitable block found */ + + fl = ffs_bit(fl_map); + /* Bounds check on fl from bitmap lookup */ + if (!RANGE_CHECK(fl, 0, FL_INDEX_COUNT)) + return NULL; + sl_map = t->sl_bitmap[fl]; + } - if (!IS_USED(p) && GET_SIZE(p) >= size) { - /* Split block only if remainder is large enough to be useful */ - split_block(p, size); + sl = ffs_bit(sl_map); + /* Bounds check on sl from bitmap lookup */ + if (!RANGE_CHECK(sl, 0, SL_INDEX_COUNT)) + return NULL; - MARK_USED(p); - if (unlikely(free_blocks_count <= 0)) { - panic(ERR_HEAP_CORRUPT); - return NULL; - } - free_blocks_count--; + return t->blocks[fl][sl]; +} - CRITICAL_LEAVE(); - return (void *) (p + 1); - } - p = p->next; +/* Block preparation for allocation */ + +static inline void *block_prepare_used(tlsf_t *t, block_t *block, size_t size) +{ + if (!block) + return NULL; + + block_remove(t, block); + + /* Split if block is much larger than needed */ + if (block_can_split(block, size)) { + block_t *remaining = block_split(block, size); + block_set_free(remaining); + block_insert(t, remaining); + block_mark_as_free(remaining); + } else { + block_mark_as_used(block); } + block_set_used(block); + return block_to_ptr(block); +} + +/* Public API - Standard C allocation interface */ + +/* Adjust size for alignment and minimum requirements */ +static inline size_t adjust_size(size_t size) +{ + if (size < BLOCK_SIZE_MIN) + size = BLOCK_SIZE_MIN; + return ALIGN4(size); +} + +/* O(1) allocation */ +void *malloc(uint32_t size) +{ + if (unlikely(!size || size > MALLOC_MAX_SIZE)) + return NULL; + + size = adjust_size(size); + + CRITICAL_ENTER(); + block_t *block = block_find_free(&tlsf_control, size); + void *ptr = block_prepare_used(&tlsf_control, block, size); CRITICAL_LEAVE(); - return NULL; /* allocation failed */ + + return ptr; } -/* Initializes memory allocator with enhanced validation */ -void mo_heap_init(size_t *zone, uint32_t len) +/* O(1) deallocation with immediate coalescing */ +void free(void *ptr) { - memblock_t *start, *end; + if (!ptr) + return; + + CRITICAL_ENTER(); + + block_t *block = block_from_ptr(ptr); + + /* Validate block is within heap bounds */ + if (unlikely((void *) block < heap_start || (void *) block >= heap_end)) { + CRITICAL_LEAVE(); + panic(ERR_HEAP_CORRUPT); + return; + } + + /* Validate block header sanity - size must not exceed heap bounds */ + size_t bsize = block_size(block); + if (unlikely(bsize == 0 || + (void *) ((char *) block_to_ptr(block) + bsize) > heap_end)) { + CRITICAL_LEAVE(); + panic(ERR_HEAP_CORRUPT); + return; + } - if (unlikely(!zone || len < 2 * sizeof(memblock_t) + MALLOC_MIN_SIZE)) - return; /* Invalid parameters */ + /* Validate block is currently used (not double-free) */ + if (unlikely(block_is_free(block))) { + CRITICAL_LEAVE(); + panic(ERR_HEAP_CORRUPT); + return; + } - len = ALIGN4(len); - start = (memblock_t *) zone; - end = (memblock_t *) ((size_t) zone + len - sizeof(memblock_t)); + block_set_free(block); - start->next = end; - start->size = len - 2 * sizeof(memblock_t); - MARK_FREE(start); + /* Coalesce with adjacent free blocks */ + block = block_merge_prev(&tlsf_control, block); + block = block_merge_next(&tlsf_control, block); - end->next = NULL; - end->size = 0; - MARK_USED(end); /* end block marks heap boundary */ + /* Insert merged block into free list */ + block_insert(&tlsf_control, block); + block_mark_as_free(block); - first_free = start; - heap_start = (void *) zone; - heap_end = (void *) ((size_t) end + sizeof(memblock_t)); - free_blocks_count = 1; + CRITICAL_LEAVE(); } -/* Allocates zero-initialized memory with overflow protection */ +/* Zero-initialized allocation with overflow protection */ void *calloc(uint32_t nmemb, uint32_t size) { /* Check for multiplication overflow */ if (unlikely(nmemb && size > MALLOC_MAX_SIZE / nmemb)) return NULL; - uint32_t total_size = ALIGN4(nmemb * size); - void *buf = malloc(total_size); + uint32_t total = nmemb * size; + void *ptr = malloc(total); - if (buf) - memset(buf, 0, total_size); + if (ptr) + memset(ptr, 0, total); - return buf; + return ptr; } -/* Reallocates memory with improved efficiency */ +/* Reallocation with in-place optimization */ void *realloc(void *ptr, uint32_t size) { - if (unlikely(size > MALLOC_MAX_SIZE)) - return NULL; - if (!ptr) return malloc(size); @@ -261,55 +536,128 @@ void *realloc(void *ptr, uint32_t size) return NULL; } - size = ALIGN4(size); + if (unlikely(size > MALLOC_MAX_SIZE)) + return NULL; + + size = adjust_size(size); + + CRITICAL_ENTER(); - memblock_t *old_block = ((memblock_t *) ptr) - 1; + block_t *block = block_from_ptr(ptr); - /* Validate the existing block */ - if (unlikely(!validate_block(old_block) || !IS_USED(old_block))) { + /* Validate block */ + if (unlikely((void *) block < heap_start || (void *) block >= heap_end || + block_is_free(block))) { + CRITICAL_LEAVE(); panic(ERR_HEAP_CORRUPT); return NULL; } - size_t old_size = GET_SIZE(old_block); - - /* If shrinking or size is close, reuse existing block */ - if (size <= old_size && - old_size - size < sizeof(memblock_t) + MALLOC_MIN_SIZE) - return ptr; + size_t cur_size = block_size(block); - /* fast path for shrinking */ - if (size <= old_size) { - split_block(old_block, size); - /* Trigger coalescing only when fragmentation is high */ - if (free_blocks_count > COALESCE_THRESHOLD) - selective_coalesce(); + /* Shrinking: potentially split block */ + if (size <= cur_size) { + if (block_can_split(block, size)) { + block_t *remaining = block_split(block, size); + block_set_free(remaining); + remaining = block_merge_next(&tlsf_control, remaining); + block_insert(&tlsf_control, remaining); + block_mark_as_free(remaining); + } CRITICAL_LEAVE(); - return (void *) (old_block + 1); + return ptr; } - /* fast path for growing */ - if (old_block->next && !IS_USED(old_block->next) && - GET_SIZE(old_block) + sizeof(memblock_t) + GET_SIZE(old_block->next) >= - size) { - old_block->size = GET_SIZE(old_block) + sizeof(memblock_t) + - GET_SIZE(old_block->next); - old_block->next = old_block->next->next; - free_blocks_count--; - split_block(old_block, size); - /* Trigger coalescing only when fragmentation is high */ - if (free_blocks_count > COALESCE_THRESHOLD) - selective_coalesce(); + /* Growing: try to absorb next block */ + block_t *next = block_next(block); + if (block_is_free(next) && + cur_size + block_size(next) + BLOCK_HEADER_OVERHEAD >= size) { + block_remove(&tlsf_control, next); + block = block_absorb(block, next); + + if (block_can_split(block, size)) { + block_t *remaining = block_split(block, size); + block_set_free(remaining); + block_insert(&tlsf_control, remaining); + block_mark_as_free(remaining); + } else { + block_mark_as_used(block); + } CRITICAL_LEAVE(); - return (void *) (old_block + 1); + return ptr; } + CRITICAL_LEAVE(); - void *new_buf = malloc(size); - if (new_buf) { - memcpy(new_buf, ptr, min(old_size, size)); + /* Fall back to malloc + copy + free. + * + * Design note: We release the lock before the slow path to allow other + * allocations during the copy. This is safe because: + * 1. cur_size is a local copy captured while holding the lock + * 2. ptr remains marked as "used" in TLSF until our free(ptr) call + * 3. Other malloc calls cannot return this memory until after free(ptr) + * + * The caller must ensure ptr is not freed or modified by another thread + * during realloc (standard realloc semantics). + */ + void *new_ptr = malloc(size); + if (new_ptr) { + memcpy(new_ptr, ptr, min(cur_size, size)); free(ptr); } - return new_buf; + return new_ptr; +} + +/* Initialize TLSF heap */ +void mo_heap_init(size_t *zone, uint32_t len) +{ + if (unlikely(!zone || len < sizeof(block_t) * 3)) + return; + + tlsf_t *t = &tlsf_control; + + /* Initialize control structure */ + memset(t, 0, sizeof(*t)); + t->block_null.next_free = &t->block_null; + t->block_null.prev_free = &t->block_null; + + /* Align pool start and end */ + uintptr_t pool_start = ALIGN4((uintptr_t) zone); + uintptr_t pool_end = ((uintptr_t) zone + len) & ~(ALIGN_SIZE - 1); + + if (pool_end <= pool_start + sizeof(block_t) * 2) + return; + + heap_start = (void *) pool_start; + heap_end = (void *) pool_end; + + /* Create initial free block spanning entire pool */ + block_t *block = (block_t *) pool_start; + size_t block_size_val = pool_end - pool_start - BLOCK_HEADER_OVERHEAD * 2; + + block->header = block_size_val | BLOCK_BIT_FREE; + block->prev_phys = NULL; + block_link_next(block); + + /* Create sentinel block at end */ + block_t *sentinel = block_next(block); + + /* Runtime validation: sentinel must be within heap bounds. + * This catches off-by-one errors in block_size_val calculation. + */ + if (unlikely((void *) sentinel >= heap_end || + (void *) sentinel < heap_start)) { + panic(ERR_HEAP_CORRUPT); + return; + } + + sentinel->header = 0; /* Size 0, used, prev_free */ + sentinel->prev_phys = block; + sentinel->next_free = NULL; /* Clear for debugging clarity */ + sentinel->prev_free = NULL; + block_set_prev_free(sentinel); + + /* Insert initial block into free list */ + block_insert(t, block); }