diff --git a/Makefile b/Makefile index 277cd40..edd5485 100644 --- a/Makefile +++ b/Makefile @@ -6,7 +6,9 @@ TARGETS = \ wcet TARGETS := $(addprefix $(OUT)/,$(TARGETS)) -all: $(TARGETS) +THREAD_TARGETS = $(OUT)/test_thread + +all: $(TARGETS) $(THREAD_TARGETS) # Full benchmark with statistical rigor (50 iterations, 5 warmup) bench: all @@ -27,6 +29,9 @@ CFLAGS += \ OBJS = tlsf.o OBJS := $(addprefix $(OUT)/,$(OBJS)) + +THREAD_OBJS = $(OUT)/tlsf_thread.o + deps := $(OBJS:%.o=%.o.d) $(OUT)/test: $(OBJS) tests/test.c @@ -38,16 +43,25 @@ $(OUT)/bench: $(OBJS) tests/bench.c $(OUT)/wcet: $(OBJS) tests/wcet.c $(CC) $(CFLAGS) -o $@ -MMD -MF $@.d $^ $(LDFLAGS) -lm +# Thread-safe module (requires pthreads) +$(OUT)/tlsf_thread.o: src/tlsf_thread.c include/tlsf_thread.h + @mkdir -p $(OUT) + $(CC) $(CFLAGS) -pthread -c -o $@ -MMD -MF $@.d $< + +$(OUT)/test_thread: $(OBJS) $(THREAD_OBJS) tests/test_thread.c + $(CC) $(CFLAGS) -pthread -o $@ -MMD -MF $@.d $^ $(LDFLAGS) + $(OUT)/%.o: src/%.c @mkdir -p $(OUT) $(CC) $(CFLAGS) -c -o $@ -MMD -MF $@.d $< -check: $(TARGETS) +check: $(TARGETS) $(THREAD_TARGETS) MALLOC_CHECK_=3 ./build/test MALLOC_CHECK_=3 ./build/bench -l 10000 -i 3 -w 1 MALLOC_CHECK_=3 ./build/bench -s 32 -l 10000 -i 3 -w 1 MALLOC_CHECK_=3 ./build/bench -s 10:12345 -l 10000 -i 3 -w 1 ./build/wcet -i 100 -w 10 + ./build/test_thread # Full WCET measurement (10000 iterations, 1000 warmup) wcet: all @@ -64,7 +78,7 @@ wcet-plot: all python3 scripts/wcet_plot.py $(OUT)/wcet_raw.csv -o $(OUT)/wcet clean: - $(RM) $(TARGETS) $(OBJS) $(deps) + $(RM) $(TARGETS) $(THREAD_TARGETS) $(OBJS) $(THREAD_OBJS) $(deps) $(RM) $(OUT)/wcet_raw.csv $(OUT)/wcet_summary.csv $(RM) $(OUT)/wcet_boxplot.png $(OUT)/wcet_histogram.png diff --git a/README.md b/README.md index 457aa36..2846172 100644 --- a/README.md +++ b/README.md @@ -26,9 +26,10 @@ therefore no GPL restrictions apply. * Heap statistics and 4-phase consistency checking * WCET measurement infrastructure with cycle-accurate timing * Branch-free size-to-bin mapping +* Optional thread-safe wrapper (`tlsf_thread.h`) + with per-arena fine-grained locking and configurable lock primitives for RTOS portability * ~500 lines of core allocator code * Minimal libc: only `stddef.h`, `stdbool.h`, `stdint.h`, `string.h` -* Not thread-safe by design; callers provide external synchronization ## Build and Test @@ -81,8 +82,10 @@ tlsf_free(&s, r); | `tlsf_pool_init(t, mem, bytes)` | Initialize a fixed-size pool. Returns usable bytes, 0 on failure. | | `tlsf_append_pool(t, mem, size)` | Extend pool with adjacent memory. Returns bytes used, 0 on failure. | | `tlsf_resize(t, size)` | Platform callback for dynamic pool growth (weak symbol). | +| `tlsf_usable_size(ptr)` | Return the usable size of an allocated block. | | `tlsf_check(t)` | Validate heap consistency (requires `TLSF_ENABLE_CHECK`). | | `tlsf_get_stats(t, stats)` | Collect heap statistics (free/used bytes, block counts, overhead). | +| `tlsf_pool_reset(t)` | Reset a static pool to its initial empty state (bounded time). | ### Compile Flags @@ -93,6 +96,47 @@ tlsf_free(&s, r); | `TLSF_MAX_POOL_BITS` | Clamp FL index to reduce `tlsf_t` size. Pool max becomes `2^N` bytes. E.g. `-DTLSF_MAX_POOL_BITS=20` for 1 MB | | `TLSF_SPLIT_THRESHOLD` | Minimum remainder size (bytes) to split off when trimming. Default: `BLOCK_SIZE_MIN` (16 on 64-bit) | +### Thread-Safe Wrapper + +For concurrent use, include the optional per-arena wrapper: + +```c +#include "tlsf_thread.h" + +static char pool[4 * 1024 * 1024]; +tlsf_thread_t ts; + +size_t usable = tlsf_thread_init(&ts, pool, sizeof(pool)); +void *p = tlsf_thread_malloc(&ts, 256); +void *q = tlsf_thread_aalloc(&ts, 64, 256); +p = tlsf_thread_realloc(&ts, p, 512); +tlsf_thread_free(&ts, p); +tlsf_thread_free(&ts, q); +tlsf_thread_destroy(&ts); +``` + +| Function | Description | +|----------|-------------| +| `tlsf_thread_init(ts, mem, bytes)` | Split memory into per-arena sub-pools. Returns total usable bytes. | +| `tlsf_thread_destroy(ts)` | Release lock resources. Does not free the memory region. | +| `tlsf_thread_malloc(ts, size)` | Thread-safe malloc with per-arena locking. | +| `tlsf_thread_aalloc(ts, align, size)` | Thread-safe aligned allocation. | +| `tlsf_thread_realloc(ts, ptr, size)` | Thread-safe realloc. In-place first, cross-arena fallback. | +| `tlsf_thread_free(ts, ptr)` | Thread-safe free. Finds owning arena automatically. | +| `tlsf_thread_check(ts)` | Heap consistency check across all arenas. | +| `tlsf_thread_stats(ts, stats)` | Aggregate statistics across all arenas. | +| `tlsf_thread_reset(ts)` | Reset all arenas to initial state (bounded time). | + +| Compile Flag | Effect | +|-------------|--------| +| `TLSF_ARENA_COUNT` | Number of independent arenas (default 4). Power of two recommended. | +| `TLSF_LOCK_T` | Lock type. Override all six lock macros for RTOS portability. | +| `TLSF_THREAD_HINT()` | Thread-specific hash input for arena selection. Default: `pthread_self()`. | + +The default lock primitive is `pthread_mutex_t`. To use a platform-specific +lock (FreeRTOS semaphore, Zephyr k_mutex, bare-metal spinlock), define +`TLSF_LOCK_T` and all associated macros before including `tlsf_thread.h`. + ## Design ### Segregated Free Lists @@ -248,6 +292,29 @@ without one, allocations silently return NULL. Multiple independent allocator instances are supported by initializing separate `tlsf_t` structures with their own memory regions. +### Thread Safety + +The core allocator (`tlsf.h`) is single-threaded by design. +The optional wrapper (`tlsf_thread.h`) adds thread safety through per-arena fine-grained locking, +following the same multi-arena pattern used by jemalloc and mimalloc. + +The pool is split into `TLSF_ARENA_COUNT` independent sub-pools, each with its own lock. +Threads are mapped to arenas by a hash of their thread identifier, +so concurrent allocations from different threads typically hit different locks with zero contention. + +Allocation follows a two-phase fallback: +1. Fast path: lock the thread's preferred arena, allocate, unlock. +2. Slow path (arena exhausted): try remaining arenas via non-blocking `trylock` first, then blocking `acquire`. + +Free identifies the owning arena via pointer-range lookup (O(N) where N is the arena count, +effectively O(1) for small N) and locks only that arena. + +Realloc attempts in-place growth within the owning arena. +When the arena lacks space, it falls back to cross-arena malloc + memcpy + free. + +Trade-offs: more arenas reduce contention but partition memory (one arena can exhaust while others have space). +Fewer arenas improve memory utilization at the cost of higher contention. + ### Constants | Constant | 64-bit | 32-bit | Notes | diff --git a/include/tlsf.h b/include/tlsf.h index ef18ef3..6d14721 100644 --- a/include/tlsf.h +++ b/include/tlsf.h @@ -163,6 +163,18 @@ void *tlsf_realloc(tlsf_t *, void *, size_t); */ void tlsf_free(tlsf_t *, void *); +/** + * Return the usable size of an existing allocation. + * The usable size may exceed the originally requested size due to + * alignment rounding and bin-class quantization. + * Equivalent to POSIX malloc_usable_size(). + * + * @param ptr Pointer previously returned by tlsf_malloc/aalloc/realloc. + * Behavior is undefined if ptr has been freed. + * @return Usable payload bytes, or 0 if ptr is NULL + */ +size_t tlsf_usable_size(void *ptr); + #ifdef TLSF_ENABLE_CHECK void tlsf_check(tlsf_t *); #else diff --git a/include/tlsf_thread.h b/include/tlsf_thread.h new file mode 100644 index 0000000..3657eba --- /dev/null +++ b/include/tlsf_thread.h @@ -0,0 +1,192 @@ +/* + * SPDX-License-Identifier: BSD-3-Clause + */ + +/* + * tlsf-bsd is freely redistributable under the BSD License. See the file + * "LICENSE" for information on usage and redistribution of this file. + */ + +/* + * Thread-safe TLSF wrapper with fine-grained per-arena locking. + * + * Instead of a single coarse mutex around the entire allocator, the pool + * is split into TLSF_ARENA_COUNT independent sub-pools (arenas), each + * with its own lock. Threads are mapped to arenas by a hash of their + * thread identifier, so concurrent allocations from different threads + * typically hit different locks with zero contention. + * + * Thread-safety contract (same as POSIX malloc/free): + * - Different threads may call any API function concurrently. + * - Concurrent operations on the SAME pointer are undefined behavior. + * Each live pointer must be owned by exactly one thread at a time; + * the owner may free or realloc it, but no other thread may simultaneously + * free, realloc, or read/write that pointer. + * - init, destroy, and reset are not thread-safe with respect to other API + * calls on the same tlsf_thread_t instance. Callers must ensure + * quiescence (no concurrent alloc/free/realloc) before calling them. + * + * Lock primitives are configurable: define TLSF_LOCK_T and the associated + * macros BEFORE including this header to use a platform-specific primitive + * (FreeRTOS semaphore, Zephyr k_mutex, bare-metal spinlock, etc.). + * Default: POSIX pthread_mutex_t. + */ + +#pragma once + +#ifdef __cplusplus +extern "C" { +#endif + +#include "tlsf.h" + +#include +#include + +/* Lock abstraction + * + * Override ALL six lock macros together before including this header. + * When providing custom locks, also define TLSF_THREAD_HINT() to + * return a thread-specific unsigned integer for arena selection. + * + * Example (FreeRTOS): + * #define TLSF_LOCK_T SemaphoreHandle_t + * #define TLSF_LOCK_INIT(l) (*(l) = xSemaphoreCreateMutex()) + * #define TLSF_LOCK_DESTROY(l) vSemaphoreDelete(*(l)) + * #define TLSF_LOCK_ACQUIRE(l) xSemaphoreTake(*(l), portMAX_DELAY) + * #define TLSF_LOCK_RELEASE(l) xSemaphoreGive(*(l)) + * #define TLSF_LOCK_TRY(l) (xSemaphoreTake(*(l),0)==pdTRUE) + * #define TLSF_THREAD_HINT() ((unsigned)uxTaskGetTaskNumber(NULL)) + * #include "tlsf_thread.h" + */ + +#ifndef TLSF_LOCK_T + +#include + +#define TLSF_LOCK_T pthread_mutex_t +#define TLSF_LOCK_INIT(l) pthread_mutex_init((l), NULL) +#define TLSF_LOCK_DESTROY(l) pthread_mutex_destroy((l)) +#define TLSF_LOCK_ACQUIRE(l) pthread_mutex_lock((l)) +#define TLSF_LOCK_RELEASE(l) pthread_mutex_unlock((l)) +#define TLSF_LOCK_TRY(l) (pthread_mutex_trylock((l)) == 0) + +#ifndef TLSF_THREAD_HINT +/* Fold upper bits into lower 32 to retain entropy on 64-bit systems. */ +#define TLSF_THREAD_HINT() \ + ((unsigned) ((uintptr_t) pthread_self() ^ \ + ((uintptr_t) pthread_self() >> 16))) +#endif + +#endif /* TLSF_LOCK_T */ + +/* Fallback thread hint for custom locks without a custom hint. */ +#ifndef TLSF_THREAD_HINT +#define TLSF_THREAD_HINT() 0U +#endif + +/* + * Number of independent arenas. Each arena has its own lock and TLSF + * pool, so N arenas support up to N contention-free concurrent + * allocations. + * + * Trade-offs: + * More arenas -> lower contention, but memory is partitioned (one + * arena can exhaust while others have space). + * Fewer arenas -> better memory utilization, higher contention. + * + * Must be >= 1. Power of two recommended for efficient hash mapping. + */ +#ifndef TLSF_ARENA_COUNT +#define TLSF_ARENA_COUNT 4 +#endif + +_Static_assert(TLSF_ARENA_COUNT >= 1, "TLSF_ARENA_COUNT must be >= 1"); + +/* + * Align each arena to a cache line to prevent false sharing between + * arenas that would otherwise sit on the same line. 64 bytes is the + * common L1 cache line size on x86-64 and ARMv8. + */ +#ifndef TLSF_CACHELINE_SIZE +#define TLSF_CACHELINE_SIZE 64 +#endif + +_Static_assert((TLSF_CACHELINE_SIZE & (TLSF_CACHELINE_SIZE - 1)) == 0, + "TLSF_CACHELINE_SIZE must be a power of two"); + +typedef struct { + tlsf_t pool; + TLSF_LOCK_T lock; + void *base; /* Arena memory base (for pointer ownership) */ + size_t capacity; /* Arena memory size in bytes */ +} __attribute__((aligned(TLSF_CACHELINE_SIZE))) tlsf_arena_t; + +typedef struct { + tlsf_arena_t arenas[TLSF_ARENA_COUNT]; + int count; /* Initialized arena count (<= TLSF_ARENA_COUNT) */ +} tlsf_thread_t; + +/** + * Initialize from a contiguous memory region, splitting it into up to + * TLSF_ARENA_COUNT independent sub-pools. The arena count may be + * reduced if the region is too small to support all arenas. + * + * @param ts Thread-safe allocator instance + * @param mem Memory region + * @param bytes Size of the memory region + * @return Total usable bytes across all arenas, or 0 on failure + */ +size_t tlsf_thread_init(tlsf_thread_t *ts, void *mem, size_t bytes); + +/** + * Destroy: release lock resources. Does not free the memory region + * passed to tlsf_thread_init (caller retains ownership). + */ +void tlsf_thread_destroy(tlsf_thread_t *ts); + +/** + * Thread-safe malloc. Tries the calling thread's preferred arena + * first, then falls back to other arenas via non-blocking try-lock, + * then blocking acquire. + */ +void *tlsf_thread_malloc(tlsf_thread_t *ts, size_t size); + +/** + * Thread-safe aligned allocation. + */ +void *tlsf_thread_aalloc(tlsf_thread_t *ts, size_t align, size_t size); + +/** + * Thread-safe realloc. Attempts in-place realloc within the owning + * arena first; falls back to cross-arena malloc + memcpy + free. + */ +void *tlsf_thread_realloc(tlsf_thread_t *ts, void *ptr, size_t size); + +/** + * Thread-safe free. Finds the owning arena automatically via + * pointer-range lookup (O(TLSF_ARENA_COUNT), effectively O(1)). + */ +void tlsf_thread_free(tlsf_thread_t *ts, void *ptr); + +/** + * Heap consistency check across all arenas. + * Acquires each arena lock in order during the check. + */ +void tlsf_thread_check(tlsf_thread_t *ts); + +/** + * Aggregate statistics across all arenas. + * largest_free reports the single largest free block in any arena. + */ +int tlsf_thread_stats(tlsf_thread_t *ts, tlsf_stats_t *stats); + +/** + * Reset all arenas to initial state (bounded time). + * All outstanding pointers become invalid. + */ +void tlsf_thread_reset(tlsf_thread_t *ts); + +#ifdef __cplusplus +} +#endif diff --git a/src/tlsf.c b/src/tlsf.c index 998cfa6..f12bfb1 100644 --- a/src/tlsf.c +++ b/src/tlsf.c @@ -871,6 +871,15 @@ void tlsf_free(tlsf_t *t, void *mem) block_insert(t, block); } +size_t tlsf_usable_size(void *ptr) +{ + if (UNLIKELY(!ptr)) + return 0; + tlsf_block_t *block = block_from_payload(ptr); + ASSERT(!block_is_free(block), "block must be allocated"); + return block_size(block); +} + void *tlsf_realloc(tlsf_t *t, void *mem, size_t size) { /* Zero-size requests are treated as free. */ diff --git a/src/tlsf_thread.c b/src/tlsf_thread.c new file mode 100644 index 0000000..0ab5ba9 --- /dev/null +++ b/src/tlsf_thread.c @@ -0,0 +1,312 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ + +/* + * tlsf-bsd is freely redistributable under the BSD License. See the file + * "LICENSE" for information on usage and redistribution of this file. + */ + +/* + * Thread-safe TLSF wrapper: per-arena fine-grained locking. + * + * See include/tlsf_thread.h for the design rationale and API + * documentation. + */ + +#include + +#include "tlsf_thread.h" + +/* + * Hash the thread hint to select a preferred arena. + * + * The mixing function distributes thread IDs that may differ only in + * their low bits (sequential handles, page-aligned stacks) across all + * arenas. + */ +static inline int arena_select(const tlsf_thread_t *ts) +{ + unsigned h = TLSF_THREAD_HINT(); + h ^= h >> 16; + h *= 0x45d9f3bU; + h ^= h >> 16; + return (int) (h % (unsigned) ts->count); +} + +/* + * Find which arena owns a pointer by range check. + * O(TLSF_ARENA_COUNT) -- effectively O(1) for small N. + * Returns -1 if the pointer is not from any arena. + */ +static inline int arena_find(const tlsf_thread_t *ts, const void *ptr) +{ + uintptr_t p = (uintptr_t) ptr; + for (int i = 0; i < ts->count; i++) { + uintptr_t base = (uintptr_t) ts->arenas[i].base; + if (p >= base && (p - base) < ts->arenas[i].capacity) + return i; + } + return -1; +} + +/* + * Try to allocate from arenas other than `skip`, using non-blocking + * try-lock first, then blocking acquire. Returns NULL if all arenas + * are exhausted. + */ +static void *arena_fallback_malloc(tlsf_thread_t *ts, int skip, size_t size) +{ + void *ptr; + + /* Phase 1: non-blocking scan */ + for (int i = 1; i < ts->count; i++) { + int idx = (skip + i) % ts->count; + if (TLSF_LOCK_TRY(&ts->arenas[idx].lock)) { + ptr = tlsf_malloc(&ts->arenas[idx].pool, size); + TLSF_LOCK_RELEASE(&ts->arenas[idx].lock); + if (ptr) + return ptr; + } + } + + /* Phase 2: blocking scan */ + for (int i = 1; i < ts->count; i++) { + int idx = (skip + i) % ts->count; + TLSF_LOCK_ACQUIRE(&ts->arenas[idx].lock); + ptr = tlsf_malloc(&ts->arenas[idx].pool, size); + TLSF_LOCK_RELEASE(&ts->arenas[idx].lock); + if (ptr) + return ptr; + } + + return NULL; +} + +static void *arena_fallback_aalloc(tlsf_thread_t *ts, + int skip, + size_t align, + size_t size) +{ + void *ptr; + + for (int i = 1; i < ts->count; i++) { + int idx = (skip + i) % ts->count; + if (TLSF_LOCK_TRY(&ts->arenas[idx].lock)) { + ptr = tlsf_aalloc(&ts->arenas[idx].pool, align, size); + TLSF_LOCK_RELEASE(&ts->arenas[idx].lock); + if (ptr) + return ptr; + } + } + + for (int i = 1; i < ts->count; i++) { + int idx = (skip + i) % ts->count; + TLSF_LOCK_ACQUIRE(&ts->arenas[idx].lock); + ptr = tlsf_aalloc(&ts->arenas[idx].pool, align, size); + TLSF_LOCK_RELEASE(&ts->arenas[idx].lock); + if (ptr) + return ptr; + } + + return NULL; +} + +size_t tlsf_thread_init(tlsf_thread_t *ts, void *mem, size_t bytes) +{ + if (!ts || !mem || !bytes) + return 0; + + memset(ts, 0, sizeof(*ts)); + + /* + * Determine how many arenas we can fit. Reduce the count if the + * per-arena share is too small for a viable TLSF pool. + */ + int count = TLSF_ARENA_COUNT; + size_t min_arena = 256; + while (count > 1 && bytes / (unsigned) count < min_arena) + count >>= 1; + + size_t per_arena = + (bytes / (unsigned) count) & (size_t) ~(TLSF_CACHELINE_SIZE - 1); + size_t total_usable = 0; + char *base = (char *) mem; + + for (int i = 0; i < count; i++) { + /* Last arena absorbs any remainder from integer division. */ + size_t chunk = + (i == count - 1) ? bytes - (size_t) i * per_arena : per_arena; + + ts->arenas[i].base = base + (size_t) i * per_arena; + ts->arenas[i].capacity = chunk; + TLSF_LOCK_INIT(&ts->arenas[i].lock); + + size_t usable = + tlsf_pool_init(&ts->arenas[i].pool, ts->arenas[i].base, chunk); + if (!usable) { + /* Cleanup previously initialized arenas. */ + for (int j = 0; j <= i; j++) + TLSF_LOCK_DESTROY(&ts->arenas[j].lock); + memset(ts, 0, sizeof(*ts)); + return 0; + } + total_usable += usable; + } + + ts->count = count; + return total_usable; +} + +void tlsf_thread_destroy(tlsf_thread_t *ts) +{ + if (!ts) + return; + for (int i = 0; i < ts->count; i++) + TLSF_LOCK_DESTROY(&ts->arenas[i].lock); + ts->count = 0; +} + +void *tlsf_thread_malloc(tlsf_thread_t *ts, size_t size) +{ + if (!ts->count) + return NULL; + + int preferred = arena_select(ts); + void *ptr; + + /* Fast path: thread-preferred arena. */ + TLSF_LOCK_ACQUIRE(&ts->arenas[preferred].lock); + ptr = tlsf_malloc(&ts->arenas[preferred].pool, size); + TLSF_LOCK_RELEASE(&ts->arenas[preferred].lock); + if (ptr) + return ptr; + + /* Slow path: try remaining arenas. */ + return arena_fallback_malloc(ts, preferred, size); +} + +void *tlsf_thread_aalloc(tlsf_thread_t *ts, size_t align, size_t size) +{ + if (!ts->count) + return NULL; + + int preferred = arena_select(ts); + void *ptr; + + TLSF_LOCK_ACQUIRE(&ts->arenas[preferred].lock); + ptr = tlsf_aalloc(&ts->arenas[preferred].pool, align, size); + TLSF_LOCK_RELEASE(&ts->arenas[preferred].lock); + if (ptr) + return ptr; + + return arena_fallback_aalloc(ts, preferred, align, size); +} + +void tlsf_thread_free(tlsf_thread_t *ts, void *ptr) +{ + if (!ptr) + return; + + int idx = arena_find(ts, ptr); + if (idx < 0) + return; + + TLSF_LOCK_ACQUIRE(&ts->arenas[idx].lock); + tlsf_free(&ts->arenas[idx].pool, ptr); + TLSF_LOCK_RELEASE(&ts->arenas[idx].lock); +} + +void *tlsf_thread_realloc(tlsf_thread_t *ts, void *ptr, size_t size) +{ + if (!ptr) + return tlsf_thread_malloc(ts, size); + + if (!size) { + tlsf_thread_free(ts, ptr); + return NULL; + } + + int idx = arena_find(ts, ptr); + if (idx < 0) + return NULL; + + /* + * Try in-place realloc within the owning arena. We also grab + * the old usable size while we hold the lock, in case we need + * to do a cross-arena relocation afterwards. + */ + size_t old_size; + TLSF_LOCK_ACQUIRE(&ts->arenas[idx].lock); + old_size = tlsf_usable_size(ptr); + void *new_ptr = tlsf_realloc(&ts->arenas[idx].pool, ptr, size); + TLSF_LOCK_RELEASE(&ts->arenas[idx].lock); + + if (new_ptr) + return new_ptr; + + /* + * In-arena realloc failed (arena exhausted for the new size). + * The old block is untouched. Allocate from any arena, copy, + * then free the original. + */ + new_ptr = tlsf_thread_malloc(ts, size); + if (!new_ptr) + return NULL; + + size_t copy_size = old_size < size ? old_size : size; + memcpy(new_ptr, ptr, copy_size); + + TLSF_LOCK_ACQUIRE(&ts->arenas[idx].lock); + tlsf_free(&ts->arenas[idx].pool, ptr); + TLSF_LOCK_RELEASE(&ts->arenas[idx].lock); + + return new_ptr; +} + +void tlsf_thread_check(tlsf_thread_t *ts) +{ + if (!ts) + return; + for (int i = 0; i < ts->count; i++) { + TLSF_LOCK_ACQUIRE(&ts->arenas[i].lock); + tlsf_check(&ts->arenas[i].pool); + TLSF_LOCK_RELEASE(&ts->arenas[i].lock); + } +} + +int tlsf_thread_stats(tlsf_thread_t *ts, tlsf_stats_t *stats) +{ + if (!ts || !stats) + return -1; + + memset(stats, 0, sizeof(*stats)); + + for (int i = 0; i < ts->count; i++) { + tlsf_stats_t arena_stats; + TLSF_LOCK_ACQUIRE(&ts->arenas[i].lock); + int rc = tlsf_get_stats(&ts->arenas[i].pool, &arena_stats); + TLSF_LOCK_RELEASE(&ts->arenas[i].lock); + if (rc < 0) + return rc; + + stats->total_free += arena_stats.total_free; + stats->total_used += arena_stats.total_used; + stats->block_count += arena_stats.block_count; + stats->free_count += arena_stats.free_count; + stats->overhead += arena_stats.overhead; + if (arena_stats.largest_free > stats->largest_free) + stats->largest_free = arena_stats.largest_free; + } + + return 0; +} + +void tlsf_thread_reset(tlsf_thread_t *ts) +{ + if (!ts) + return; + for (int i = 0; i < ts->count; i++) { + TLSF_LOCK_ACQUIRE(&ts->arenas[i].lock); + tlsf_pool_reset(&ts->arenas[i].pool); + TLSF_LOCK_RELEASE(&ts->arenas[i].lock); + } +} diff --git a/tests/test_thread.c b/tests/test_thread.c new file mode 100644 index 0000000..e815c02 --- /dev/null +++ b/tests/test_thread.c @@ -0,0 +1,372 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* + * Thread-safety stress test for the per-arena TLSF wrapper. + * + * Spawns multiple threads that concurrently malloc/free/realloc from a + * shared tlsf_thread_t instance. Verifies: + * - No data corruption (fill-pattern integrity) + * - No double-free or use-after-free (ASan / TLSF_ENABLE_CHECK) + * - Arena distribution (multiple arenas actually used) + * - Aggregate statistics consistency after all threads join + */ + +#include +#include +#include +#include +#include +#include + +#include "tlsf_thread.h" + +/* ------------------------------------------------------------------ */ +/* Test parameters (tuned for < 2s on modern hardware) */ +/* ------------------------------------------------------------------ */ + +#define POOL_SIZE (4 * 1024 * 1024) /* 4 MB static pool */ +#define NUM_THREADS 8 +#define OPS_PER_THREAD 50000 +#define MAX_ALLOCS 128 +#define MAX_ALLOC_SIZE 2048 + +static char pool[POOL_SIZE] __attribute__((aligned(16))); +static tlsf_thread_t ts; + +/* ------------------------------------------------------------------ */ +/* Per-thread work */ +/* ------------------------------------------------------------------ */ + +typedef struct { + int id; + int errors; + int alloc_count; /* total successful allocations */ + int free_count; /* total frees */ + int realloc_count; /* total reallocs */ +} thread_result_t; + +static void *thread_func(void *arg) +{ + thread_result_t *res = (thread_result_t *) arg; + void *ptrs[MAX_ALLOCS]; + size_t sizes[MAX_ALLOCS]; + int count = 0; + unsigned seed = (unsigned) res->id * 2654435761U + 42; + + memset(ptrs, 0, sizeof(ptrs)); + + for (int op = 0; op < OPS_PER_THREAD; op++) { + int action = (int) (rand_r(&seed) % 4); + + switch (action) { + case 0: /* malloc */ + case 1: + if (count < MAX_ALLOCS) { + size_t sz = (size_t) (rand_r(&seed) % MAX_ALLOC_SIZE) + 1; + void *p = tlsf_thread_malloc(&ts, sz); + if (p) { + /* Fill with per-thread pattern for integrity check */ + memset(p, res->id & 0xFF, sz); + ptrs[count] = p; + sizes[count] = sz; + count++; + res->alloc_count++; + } + } + break; + + case 2: /* free */ + if (count > 0) { + int idx = (int) ((unsigned) rand_r(&seed) % (unsigned) count); + /* Verify fill pattern before freeing */ + uint8_t *data = (uint8_t *) ptrs[idx]; + for (size_t i = 0; i < sizes[idx]; i++) { + if (data[i] != (uint8_t) (res->id & 0xFF)) { + res->errors++; + break; + } + } + tlsf_thread_free(&ts, ptrs[idx]); + res->free_count++; + /* Swap-remove */ + ptrs[idx] = ptrs[count - 1]; + sizes[idx] = sizes[count - 1]; + count--; + } + break; + + case 3: /* realloc */ + if (count > 0) { + int idx = (int) ((unsigned) rand_r(&seed) % (unsigned) count); + size_t old_sz = sizes[idx]; + size_t new_sz = (size_t) (rand_r(&seed) % MAX_ALLOC_SIZE) + 1; + + void *p = tlsf_thread_realloc(&ts, ptrs[idx], new_sz); + if (p) { + /* Verify preserved portion */ + uint8_t *data = (uint8_t *) p; + size_t verify = old_sz < new_sz ? old_sz : new_sz; + for (size_t i = 0; i < verify; i++) { + if (data[i] != (uint8_t) (res->id & 0xFF)) { + res->errors++; + break; + } + } + /* Re-fill entirely with the pattern */ + memset(p, res->id & 0xFF, new_sz); + ptrs[idx] = p; + sizes[idx] = new_sz; + res->realloc_count++; + } + } + break; + } + } + + /* Free all remaining allocations */ + for (int i = 0; i < count; i++) { + uint8_t *data = (uint8_t *) ptrs[i]; + for (size_t j = 0; j < sizes[i]; j++) { + if (data[j] != (uint8_t) (res->id & 0xFF)) { + res->errors++; + break; + } + } + tlsf_thread_free(&ts, ptrs[i]); + } + + return NULL; +} + +/* ------------------------------------------------------------------ */ +/* Test: multi-threaded stress */ +/* ------------------------------------------------------------------ */ + +static void stress_test(void) +{ + printf("Thread stress test (%d threads, %d ops each): ", NUM_THREADS, + OPS_PER_THREAD); + fflush(stdout); + + size_t usable = tlsf_thread_init(&ts, pool, sizeof(pool)); + assert(usable > 0); + printf("(%d arenas, %zu usable) ", ts.count, usable); + fflush(stdout); + + pthread_t threads[NUM_THREADS]; + thread_result_t results[NUM_THREADS]; + + for (int i = 0; i < NUM_THREADS; i++) { + results[i].id = i; + results[i].errors = 0; + results[i].alloc_count = 0; + results[i].free_count = 0; + results[i].realloc_count = 0; + pthread_create(&threads[i], NULL, thread_func, &results[i]); + } + + int total_errors = 0; + int total_allocs = 0, total_frees = 0, total_reallocs = 0; + for (int i = 0; i < NUM_THREADS; i++) { + pthread_join(threads[i], NULL); + total_errors += results[i].errors; + total_allocs += results[i].alloc_count; + total_frees += results[i].free_count; + total_reallocs += results[i].realloc_count; + } + + /* Verify heap consistency after all threads complete. */ + tlsf_thread_check(&ts); + + /* All allocations should have been freed. */ + tlsf_stats_t stats; + int rc = tlsf_thread_stats(&ts, &stats); + assert(rc == 0); + assert(stats.total_used == 0); + + printf("done (%d allocs, %d frees, %d reallocs)\n", total_allocs, + total_frees, total_reallocs); + assert(total_errors == 0); + + tlsf_thread_destroy(&ts); +} + +/* ------------------------------------------------------------------ */ +/* Test: aligned allocation under contention */ +/* ------------------------------------------------------------------ */ + +static void *aligned_thread_func(void *arg) +{ + int id = *(int *) arg; + unsigned seed = (unsigned) id * 0xDEADBEEF + 7; + + for (int op = 0; op < 5000; op++) { + /* Alignment: power of two from 8 to 4096 */ + unsigned shift = (unsigned) (rand_r(&seed) % 10) + 3; /* 8 to 8192 */ + size_t align = (size_t) 1 << shift; + if (align > 4096) + align = 4096; + size_t sz = (size_t) (rand_r(&seed) % 512) + 1; + + void *p = tlsf_thread_aalloc(&ts, align, sz); + if (p) { + assert(((uintptr_t) p % align) == 0); + memset(p, id & 0xFF, sz); + tlsf_thread_free(&ts, p); + } + } + return NULL; +} + +static void aligned_test(void) +{ + printf("Thread aligned alloc test: "); + fflush(stdout); + + size_t usable = tlsf_thread_init(&ts, pool, sizeof(pool)); + assert(usable > 0); + + pthread_t threads[NUM_THREADS]; + int ids[NUM_THREADS]; + for (int i = 0; i < NUM_THREADS; i++) { + ids[i] = i; + pthread_create(&threads[i], NULL, aligned_thread_func, &ids[i]); + } + for (int i = 0; i < NUM_THREADS; i++) + pthread_join(threads[i], NULL); + + tlsf_thread_check(&ts); + + tlsf_stats_t stats; + tlsf_thread_stats(&ts, &stats); + assert(stats.total_used == 0); + + tlsf_thread_destroy(&ts); + printf("done\n"); +} + +/* ------------------------------------------------------------------ */ +/* Test: reset under quiescence */ +/* ------------------------------------------------------------------ */ + +static void reset_test(void) +{ + printf("Thread pool reset test: "); + fflush(stdout); + + size_t usable = tlsf_thread_init(&ts, pool, sizeof(pool)); + assert(usable > 0); + + /* Allocate from multiple threads, then reset. */ + void *ptrs[64]; + int count = 0; + for (int i = 0; i < 64; i++) { + ptrs[i] = tlsf_thread_malloc(&ts, 256); + if (ptrs[i]) + count++; + } + assert(count > 0); + + /* Reset discards everything. */ + tlsf_thread_reset(&ts); + tlsf_thread_check(&ts); + + /* All memory should be free after reset. */ + tlsf_stats_t stats; + tlsf_thread_stats(&ts, &stats); + assert(stats.total_used == 0); + assert(stats.total_free == usable); + + /* Pool should be usable after reset. */ + void *p = tlsf_thread_malloc(&ts, 100); + assert(p); + tlsf_thread_free(&ts, p); + + tlsf_thread_destroy(&ts); + printf("done\n"); +} + +/* ------------------------------------------------------------------ */ +/* Test: single-threaded basic sanity */ +/* ------------------------------------------------------------------ */ + +static void basic_test(void) +{ + printf("Thread wrapper basic test: "); + fflush(stdout); + + size_t usable = tlsf_thread_init(&ts, pool, sizeof(pool)); + assert(usable > 0); + assert(ts.count >= 1); + + /* malloc / free */ + void *p = tlsf_thread_malloc(&ts, 100); + assert(p); + memset(p, 0xAA, 100); + tlsf_thread_free(&ts, p); + + /* aalloc */ + p = tlsf_thread_aalloc(&ts, 256, 100); + assert(p); + assert(((uintptr_t) p % 256) == 0); + tlsf_thread_free(&ts, p); + + /* realloc */ + p = tlsf_thread_malloc(&ts, 50); + assert(p); + memset(p, 0xBB, 50); + void *q = tlsf_thread_realloc(&ts, p, 200); + assert(q); + uint8_t *data = (uint8_t *) q; + for (int i = 0; i < 50; i++) + assert(data[i] == 0xBB); + tlsf_thread_free(&ts, q); + + /* realloc NULL -> malloc */ + p = tlsf_thread_realloc(&ts, NULL, 64); + assert(p); + tlsf_thread_free(&ts, p); + + /* realloc ptr, 0 -> free */ + p = tlsf_thread_malloc(&ts, 32); + assert(p); + q = tlsf_thread_realloc(&ts, p, 0); + assert(q == NULL); + + /* free NULL is a no-op */ + tlsf_thread_free(&ts, NULL); + + /* stats */ + tlsf_stats_t stats; + int rc = tlsf_thread_stats(&ts, &stats); + assert(rc == 0); + assert(stats.total_used == 0); + + /* usable_size */ + p = tlsf_thread_malloc(&ts, 100); + assert(p); + size_t us = tlsf_usable_size(p); + assert(us >= 100); + tlsf_thread_free(&ts, p); + + tlsf_thread_check(&ts); + tlsf_thread_destroy(&ts); + printf("done\n"); +} + +/* ------------------------------------------------------------------ */ +/* Main */ +/* ------------------------------------------------------------------ */ + +int main(void) +{ + printf("=== Thread-safe TLSF tests ===\n"); + printf("Arena count: %d\n", TLSF_ARENA_COUNT); + + basic_test(); + stress_test(); + aligned_test(); + reset_test(); + + puts("OK!"); + return 0; +}