Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 17 additions & 3 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,9 @@ TARGETS = \
wcet
TARGETS := $(addprefix $(OUT)/,$(TARGETS))

all: $(TARGETS)
THREAD_TARGETS = $(OUT)/test_thread

all: $(TARGETS) $(THREAD_TARGETS)

# Full benchmark with statistical rigor (50 iterations, 5 warmup)
bench: all
Expand All @@ -27,6 +29,9 @@ CFLAGS += \

OBJS = tlsf.o
OBJS := $(addprefix $(OUT)/,$(OBJS))

THREAD_OBJS = $(OUT)/tlsf_thread.o

deps := $(OBJS:%.o=%.o.d)

$(OUT)/test: $(OBJS) tests/test.c
Expand All @@ -38,16 +43,25 @@ $(OUT)/bench: $(OBJS) tests/bench.c
$(OUT)/wcet: $(OBJS) tests/wcet.c
$(CC) $(CFLAGS) -o $@ -MMD -MF $@.d $^ $(LDFLAGS) -lm

# Thread-safe module (requires pthreads)
$(OUT)/tlsf_thread.o: src/tlsf_thread.c include/tlsf_thread.h
@mkdir -p $(OUT)
$(CC) $(CFLAGS) -pthread -c -o $@ -MMD -MF $@.d $<

$(OUT)/test_thread: $(OBJS) $(THREAD_OBJS) tests/test_thread.c
$(CC) $(CFLAGS) -pthread -o $@ -MMD -MF $@.d $^ $(LDFLAGS)

$(OUT)/%.o: src/%.c
@mkdir -p $(OUT)
$(CC) $(CFLAGS) -c -o $@ -MMD -MF $@.d $<

check: $(TARGETS)
check: $(TARGETS) $(THREAD_TARGETS)
MALLOC_CHECK_=3 ./build/test
MALLOC_CHECK_=3 ./build/bench -l 10000 -i 3 -w 1
MALLOC_CHECK_=3 ./build/bench -s 32 -l 10000 -i 3 -w 1
MALLOC_CHECK_=3 ./build/bench -s 10:12345 -l 10000 -i 3 -w 1
./build/wcet -i 100 -w 10
./build/test_thread

# Full WCET measurement (10000 iterations, 1000 warmup)
wcet: all
Expand All @@ -64,7 +78,7 @@ wcet-plot: all
python3 scripts/wcet_plot.py $(OUT)/wcet_raw.csv -o $(OUT)/wcet

clean:
$(RM) $(TARGETS) $(OBJS) $(deps)
$(RM) $(TARGETS) $(THREAD_TARGETS) $(OBJS) $(THREAD_OBJS) $(deps)
$(RM) $(OUT)/wcet_raw.csv $(OUT)/wcet_summary.csv
$(RM) $(OUT)/wcet_boxplot.png $(OUT)/wcet_histogram.png

Expand Down
69 changes: 68 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,10 @@ therefore no GPL restrictions apply.
* Heap statistics and 4-phase consistency checking
* WCET measurement infrastructure with cycle-accurate timing
* Branch-free size-to-bin mapping
* Optional thread-safe wrapper (`tlsf_thread.h`)
with per-arena fine-grained locking and configurable lock primitives for RTOS portability
* ~500 lines of core allocator code
* Minimal libc: only `stddef.h`, `stdbool.h`, `stdint.h`, `string.h`
* Not thread-safe by design; callers provide external synchronization

## Build and Test

Expand Down Expand Up @@ -81,8 +82,10 @@ tlsf_free(&s, r);
| `tlsf_pool_init(t, mem, bytes)` | Initialize a fixed-size pool. Returns usable bytes, 0 on failure. |
| `tlsf_append_pool(t, mem, size)` | Extend pool with adjacent memory. Returns bytes used, 0 on failure. |
| `tlsf_resize(t, size)` | Platform callback for dynamic pool growth (weak symbol). |
| `tlsf_usable_size(ptr)` | Return the usable size of an allocated block. |
| `tlsf_check(t)` | Validate heap consistency (requires `TLSF_ENABLE_CHECK`). |
| `tlsf_get_stats(t, stats)` | Collect heap statistics (free/used bytes, block counts, overhead). |
| `tlsf_pool_reset(t)` | Reset a static pool to its initial empty state (bounded time). |

### Compile Flags

Expand All @@ -93,6 +96,47 @@ tlsf_free(&s, r);
| `TLSF_MAX_POOL_BITS` | Clamp FL index to reduce `tlsf_t` size. Pool max becomes `2^N` bytes. E.g. `-DTLSF_MAX_POOL_BITS=20` for 1 MB |
| `TLSF_SPLIT_THRESHOLD` | Minimum remainder size (bytes) to split off when trimming. Default: `BLOCK_SIZE_MIN` (16 on 64-bit) |

### Thread-Safe Wrapper

For concurrent use, include the optional per-arena wrapper:

```c
#include "tlsf_thread.h"

static char pool[4 * 1024 * 1024];
tlsf_thread_t ts;

size_t usable = tlsf_thread_init(&ts, pool, sizeof(pool));
void *p = tlsf_thread_malloc(&ts, 256);
void *q = tlsf_thread_aalloc(&ts, 64, 256);
p = tlsf_thread_realloc(&ts, p, 512);
tlsf_thread_free(&ts, p);
tlsf_thread_free(&ts, q);
tlsf_thread_destroy(&ts);
```

| Function | Description |
|----------|-------------|
| `tlsf_thread_init(ts, mem, bytes)` | Split memory into per-arena sub-pools. Returns total usable bytes. |
| `tlsf_thread_destroy(ts)` | Release lock resources. Does not free the memory region. |
| `tlsf_thread_malloc(ts, size)` | Thread-safe malloc with per-arena locking. |
| `tlsf_thread_aalloc(ts, align, size)` | Thread-safe aligned allocation. |
| `tlsf_thread_realloc(ts, ptr, size)` | Thread-safe realloc. In-place first, cross-arena fallback. |
| `tlsf_thread_free(ts, ptr)` | Thread-safe free. Finds owning arena automatically. |
| `tlsf_thread_check(ts)` | Heap consistency check across all arenas. |
| `tlsf_thread_stats(ts, stats)` | Aggregate statistics across all arenas. |
| `tlsf_thread_reset(ts)` | Reset all arenas to initial state (bounded time). |

| Compile Flag | Effect |
|-------------|--------|
| `TLSF_ARENA_COUNT` | Number of independent arenas (default 4). Power of two recommended. |
| `TLSF_LOCK_T` | Lock type. Override all six lock macros for RTOS portability. |
| `TLSF_THREAD_HINT()` | Thread-specific hash input for arena selection. Default: `pthread_self()`. |

The default lock primitive is `pthread_mutex_t`. To use a platform-specific
lock (FreeRTOS semaphore, Zephyr k_mutex, bare-metal spinlock), define
`TLSF_LOCK_T` and all associated macros before including `tlsf_thread.h`.

## Design

### Segregated Free Lists
Expand Down Expand Up @@ -248,6 +292,29 @@ without one, allocations silently return NULL.

Multiple independent allocator instances are supported by initializing separate `tlsf_t` structures with their own memory regions.

### Thread Safety

The core allocator (`tlsf.h`) is single-threaded by design.
The optional wrapper (`tlsf_thread.h`) adds thread safety through per-arena fine-grained locking,
following the same multi-arena pattern used by jemalloc and mimalloc.

The pool is split into `TLSF_ARENA_COUNT` independent sub-pools, each with its own lock.
Threads are mapped to arenas by a hash of their thread identifier,
so concurrent allocations from different threads typically hit different locks with zero contention.

Allocation follows a two-phase fallback:
1. Fast path: lock the thread's preferred arena, allocate, unlock.
2. Slow path (arena exhausted): try remaining arenas via non-blocking `trylock` first, then blocking `acquire`.

Free identifies the owning arena via pointer-range lookup (O(N) where N is the arena count,
effectively O(1) for small N) and locks only that arena.

Realloc attempts in-place growth within the owning arena.
When the arena lacks space, it falls back to cross-arena malloc + memcpy + free.

Trade-offs: more arenas reduce contention but partition memory (one arena can exhaust while others have space).
Fewer arenas improve memory utilization at the cost of higher contention.

### Constants

| Constant | 64-bit | 32-bit | Notes |
Expand Down
12 changes: 12 additions & 0 deletions include/tlsf.h
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,18 @@ void *tlsf_realloc(tlsf_t *, void *, size_t);
*/
void tlsf_free(tlsf_t *, void *);

/**
* Return the usable size of an existing allocation.
* The usable size may exceed the originally requested size due to
* alignment rounding and bin-class quantization.
* Equivalent to POSIX malloc_usable_size().
*
* @param ptr Pointer previously returned by tlsf_malloc/aalloc/realloc.
* Behavior is undefined if ptr has been freed.
* @return Usable payload bytes, or 0 if ptr is NULL
*/
size_t tlsf_usable_size(void *ptr);

#ifdef TLSF_ENABLE_CHECK
void tlsf_check(tlsf_t *);
#else
Expand Down
192 changes: 192 additions & 0 deletions include/tlsf_thread.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,192 @@
/*
* SPDX-License-Identifier: BSD-3-Clause
*/

/*
* tlsf-bsd is freely redistributable under the BSD License. See the file
* "LICENSE" for information on usage and redistribution of this file.
*/

/*
* Thread-safe TLSF wrapper with fine-grained per-arena locking.
*
* Instead of a single coarse mutex around the entire allocator, the pool
* is split into TLSF_ARENA_COUNT independent sub-pools (arenas), each
* with its own lock. Threads are mapped to arenas by a hash of their
* thread identifier, so concurrent allocations from different threads
* typically hit different locks with zero contention.
*
* Thread-safety contract (same as POSIX malloc/free):
* - Different threads may call any API function concurrently.
* - Concurrent operations on the SAME pointer are undefined behavior.
* Each live pointer must be owned by exactly one thread at a time;
* the owner may free or realloc it, but no other thread may simultaneously
* free, realloc, or read/write that pointer.
* - init, destroy, and reset are not thread-safe with respect to other API
* calls on the same tlsf_thread_t instance. Callers must ensure
* quiescence (no concurrent alloc/free/realloc) before calling them.
*
* Lock primitives are configurable: define TLSF_LOCK_T and the associated
* macros BEFORE including this header to use a platform-specific primitive
* (FreeRTOS semaphore, Zephyr k_mutex, bare-metal spinlock, etc.).
* Default: POSIX pthread_mutex_t.
*/

#pragma once

#ifdef __cplusplus
extern "C" {
#endif

#include "tlsf.h"

#include <stddef.h>
#include <stdint.h>

/* Lock abstraction
*
* Override ALL six lock macros together before including this header.
* When providing custom locks, also define TLSF_THREAD_HINT() to
* return a thread-specific unsigned integer for arena selection.
*
* Example (FreeRTOS):
* #define TLSF_LOCK_T SemaphoreHandle_t
* #define TLSF_LOCK_INIT(l) (*(l) = xSemaphoreCreateMutex())
* #define TLSF_LOCK_DESTROY(l) vSemaphoreDelete(*(l))
* #define TLSF_LOCK_ACQUIRE(l) xSemaphoreTake(*(l), portMAX_DELAY)
* #define TLSF_LOCK_RELEASE(l) xSemaphoreGive(*(l))
* #define TLSF_LOCK_TRY(l) (xSemaphoreTake(*(l),0)==pdTRUE)
* #define TLSF_THREAD_HINT() ((unsigned)uxTaskGetTaskNumber(NULL))
* #include "tlsf_thread.h"
*/

#ifndef TLSF_LOCK_T

#include <pthread.h>

#define TLSF_LOCK_T pthread_mutex_t
#define TLSF_LOCK_INIT(l) pthread_mutex_init((l), NULL)
#define TLSF_LOCK_DESTROY(l) pthread_mutex_destroy((l))
#define TLSF_LOCK_ACQUIRE(l) pthread_mutex_lock((l))
#define TLSF_LOCK_RELEASE(l) pthread_mutex_unlock((l))
#define TLSF_LOCK_TRY(l) (pthread_mutex_trylock((l)) == 0)

#ifndef TLSF_THREAD_HINT
/* Fold upper bits into lower 32 to retain entropy on 64-bit systems. */
#define TLSF_THREAD_HINT() \
((unsigned) ((uintptr_t) pthread_self() ^ \
((uintptr_t) pthread_self() >> 16)))
#endif

#endif /* TLSF_LOCK_T */

/* Fallback thread hint for custom locks without a custom hint. */
#ifndef TLSF_THREAD_HINT
#define TLSF_THREAD_HINT() 0U
#endif

/*
* Number of independent arenas. Each arena has its own lock and TLSF
* pool, so N arenas support up to N contention-free concurrent
* allocations.
*
* Trade-offs:
* More arenas -> lower contention, but memory is partitioned (one
* arena can exhaust while others have space).
* Fewer arenas -> better memory utilization, higher contention.
*
* Must be >= 1. Power of two recommended for efficient hash mapping.
*/
#ifndef TLSF_ARENA_COUNT
#define TLSF_ARENA_COUNT 4
#endif

_Static_assert(TLSF_ARENA_COUNT >= 1, "TLSF_ARENA_COUNT must be >= 1");

/*
* Align each arena to a cache line to prevent false sharing between
* arenas that would otherwise sit on the same line. 64 bytes is the
* common L1 cache line size on x86-64 and ARMv8.
*/
#ifndef TLSF_CACHELINE_SIZE
#define TLSF_CACHELINE_SIZE 64
#endif

_Static_assert((TLSF_CACHELINE_SIZE & (TLSF_CACHELINE_SIZE - 1)) == 0,
"TLSF_CACHELINE_SIZE must be a power of two");

typedef struct {
tlsf_t pool;
TLSF_LOCK_T lock;
void *base; /* Arena memory base (for pointer ownership) */
size_t capacity; /* Arena memory size in bytes */
} __attribute__((aligned(TLSF_CACHELINE_SIZE))) tlsf_arena_t;

typedef struct {
tlsf_arena_t arenas[TLSF_ARENA_COUNT];
int count; /* Initialized arena count (<= TLSF_ARENA_COUNT) */
} tlsf_thread_t;

/**
* Initialize from a contiguous memory region, splitting it into up to
* TLSF_ARENA_COUNT independent sub-pools. The arena count may be
* reduced if the region is too small to support all arenas.
*
* @param ts Thread-safe allocator instance
* @param mem Memory region
* @param bytes Size of the memory region
* @return Total usable bytes across all arenas, or 0 on failure
*/
size_t tlsf_thread_init(tlsf_thread_t *ts, void *mem, size_t bytes);

/**
* Destroy: release lock resources. Does not free the memory region
* passed to tlsf_thread_init (caller retains ownership).
*/
void tlsf_thread_destroy(tlsf_thread_t *ts);

/**
* Thread-safe malloc. Tries the calling thread's preferred arena
* first, then falls back to other arenas via non-blocking try-lock,
* then blocking acquire.
*/
void *tlsf_thread_malloc(tlsf_thread_t *ts, size_t size);

/**
* Thread-safe aligned allocation.
*/
void *tlsf_thread_aalloc(tlsf_thread_t *ts, size_t align, size_t size);

/**
* Thread-safe realloc. Attempts in-place realloc within the owning
* arena first; falls back to cross-arena malloc + memcpy + free.
*/
void *tlsf_thread_realloc(tlsf_thread_t *ts, void *ptr, size_t size);

/**
* Thread-safe free. Finds the owning arena automatically via
* pointer-range lookup (O(TLSF_ARENA_COUNT), effectively O(1)).
*/
void tlsf_thread_free(tlsf_thread_t *ts, void *ptr);

/**
* Heap consistency check across all arenas.
* Acquires each arena lock in order during the check.
*/
void tlsf_thread_check(tlsf_thread_t *ts);

/**
* Aggregate statistics across all arenas.
* largest_free reports the single largest free block in any arena.
*/
int tlsf_thread_stats(tlsf_thread_t *ts, tlsf_stats_t *stats);

/**
* Reset all arenas to initial state (bounded time).
* All outstanding pointers become invalid.
*/
void tlsf_thread_reset(tlsf_thread_t *ts);

#ifdef __cplusplus
}
#endif
9 changes: 9 additions & 0 deletions src/tlsf.c
Original file line number Diff line number Diff line change
Expand Up @@ -871,6 +871,15 @@ void tlsf_free(tlsf_t *t, void *mem)
block_insert(t, block);
}

size_t tlsf_usable_size(void *ptr)
{
if (UNLIKELY(!ptr))
return 0;
tlsf_block_t *block = block_from_payload(ptr);
ASSERT(!block_is_free(block), "block must be allocated");
return block_size(block);
}

void *tlsf_realloc(tlsf_t *t, void *mem, size_t size)
{
/* Zero-size requests are treated as free. */
Expand Down
Loading