From 034690e7330213dcb1cf4d17d19263167ac6e71d Mon Sep 17 00:00:00 2001 From: Kemal Hadimli Date: Sat, 30 May 2026 22:01:40 +0100 Subject: [PATCH 1/4] fix(nrf52): run app loop on dedicated FreeRTOS task to avoid stack overflow The Adafruit_nRF52_Arduino framework runs Arduino loop() on a 4KB task stack (LOOP_STACK_SZ = 256*4, an unconditional #define that build flags cannot override). Heavy LittleFS work from loop() -- notably ClientACL::saveSessionKeys -> Adafruit_LittleFS::open() allocating large lfs_dir/lfs_info structs on the stack (measured peak ~4.7KB) -- overflows the 4KB chunk and corrupts the heap object directly below it (observed: the RadioLib Module*), causing a hardfault/lockup after admin/session activity on repeaters. Move the application loop onto a dedicated FreeRTOS task with an 8KB stack (via new header-only helpers/MeshWorkerTask.h). On nRF52 the Arduino loop() just vTaskDelays; the existing loop body runs unchanged on the worker task, keeping the board.sleep()/hasPendingWork() gating single-threaded. Other platforms are unaffected. Applied to the two examples that hit the saveSessionKeys path: simple_repeater and simple_room_server. Co-Authored-By: Claude Opus 4.8 --- examples/simple_repeater/main.cpp | 19 +++++++++++++++++++ examples/simple_room_server/main.cpp | 19 +++++++++++++++++++ src/helpers/MeshWorkerTask.h | 26 ++++++++++++++++++++++++++ 3 files changed, 64 insertions(+) create mode 100644 src/helpers/MeshWorkerTask.h diff --git a/examples/simple_repeater/main.cpp b/examples/simple_repeater/main.cpp index cd3b2e86e6..39dbe2cc14 100644 --- a/examples/simple_repeater/main.cpp +++ b/examples/simple_repeater/main.cpp @@ -3,6 +3,11 @@ #include "MyMesh.h" +#if defined(NRF52_PLATFORM) +#include +static void runMainLoop(); // forward decl: setup() references it before its definition +#endif + #ifdef DISPLAY_CLASS #include "UITask.h" static UITask ui_task(display); @@ -105,9 +110,17 @@ void setup() { #endif board.onBootComplete(); + +#if defined(NRF52_PLATFORM) + startMeshWorker(runMainLoop); +#endif } +#if defined(NRF52_PLATFORM) +static void runMainLoop() { +#else void loop() { +#endif board.loop(); int len = strlen(command); @@ -172,3 +185,9 @@ void loop() { #endif } } + +#if defined(NRF52_PLATFORM) +void loop() { + vTaskDelay(pdMS_TO_TICKS(1000)); // app loop runs on the mesh worker task +} +#endif diff --git a/examples/simple_room_server/main.cpp b/examples/simple_room_server/main.cpp index c413ff6102..1d78734ef8 100644 --- a/examples/simple_room_server/main.cpp +++ b/examples/simple_room_server/main.cpp @@ -3,6 +3,11 @@ #include "MyMesh.h" +#if defined(NRF52_PLATFORM) +#include +static void runMainLoop(); // forward decl: setup() references it before its definition +#endif + #ifdef DISPLAY_CLASS #include "UITask.h" static UITask ui_task(display); @@ -82,9 +87,17 @@ void setup() { #endif board.onBootComplete(); + +#if defined(NRF52_PLATFORM) + startMeshWorker(runMainLoop); +#endif } +#if defined(NRF52_PLATFORM) +static void runMainLoop() { +#else void loop() { +#endif board.loop(); int len = strlen(command); @@ -118,3 +131,9 @@ void loop() { #endif rtc_clock.tick(); } + +#if defined(NRF52_PLATFORM) +void loop() { + vTaskDelay(pdMS_TO_TICKS(1000)); // app loop runs on the mesh worker task +} +#endif diff --git a/src/helpers/MeshWorkerTask.h b/src/helpers/MeshWorkerTask.h new file mode 100644 index 0000000000..7c00a2e9f4 --- /dev/null +++ b/src/helpers/MeshWorkerTask.h @@ -0,0 +1,26 @@ +#pragma once + +#if defined(NRF52_PLATFORM) +#include +#include + +// The Adafruit_nRF52_Arduino framework runs Arduino loop() on a 4KB task stack +// (LOOP_STACK_SZ = 256*4, an unconditional #define that build flags can't override). +// Heavy LittleFS work from loop() (e.g. ClientACL::saveSessionKeys) overflows it and +// corrupts adjacent heap. Run the app loop on a dedicated task with an 8KB stack. +static TaskHandle_t _meshTaskHandle = nullptr; +static void (*_meshLoopBody)() = nullptr; + +static void _mesh_worker_task(void*) { + for (;;) { + _meshLoopBody(); + vTaskDelay(pdMS_TO_TICKS(1)); // yield + } +} + +// 2048 words = 8KB stack. TASK_PRIO_LOW matches the framework's own loop_task. +static inline void startMeshWorker(void (*body)()) { + _meshLoopBody = body; + xTaskCreate(_mesh_worker_task, "mesh", 2048, NULL, TASK_PRIO_LOW, &_meshTaskHandle); +} +#endif From 09cc949d8b84c6abfd5f052b88c6632e94ceb1b8 Mon Sep 17 00:00:00 2001 From: Kemal Hadimli Date: Sat, 30 May 2026 23:08:24 +0100 Subject: [PATCH 2/4] address review: move worker into .cpp, guard pointer, check task creation Copilot review feedback on the nRF52 mesh-worker fix: - Move the worker task + state out of the header into MeshWorkerTask.cpp so there is a single shared definition (the header was defining non-inline static globals/functions, fragile across translation units; inline-variable fix isn't viable since nRF52 builds aren't guaranteed C++17). Header now declares only startMeshWorker(). - Guard the loop-body pointer: the worker task does 'if (_meshLoopBody) _meshLoopBody();' so it can never deref a null pointer. - Check the xTaskCreate() return value and MESH_DEBUG_PRINTLN + return false on failure (8KB stack alloc out of FreeRTOS heap) instead of silently appearing to boot with no mesh loop running. - Reduce per-example boilerplate: the loop body is now a fixed-name meshAppLoop() on all platforms (no #if around the function signature); loop() either yields (nRF52) or calls meshAppLoop() directly (others). The shared .cpp is guarded by NRF52_PLATFORM, so it compiles empty on other platforms; verified it links cleanly into a non-modified nRF52 example (RAK_4631_companion_radio_usb) that neither defines meshAppLoop nor calls startMeshWorker. Co-Authored-By: Claude Opus 4.8 --- examples/simple_repeater/main.cpp | 18 +++++++--------- examples/simple_room_server/main.cpp | 18 +++++++--------- src/helpers/MeshWorkerTask.cpp | 29 ++++++++++++++++++++++++++ src/helpers/MeshWorkerTask.h | 31 ++++++++-------------------- 4 files changed, 54 insertions(+), 42 deletions(-) create mode 100644 src/helpers/MeshWorkerTask.cpp diff --git a/examples/simple_repeater/main.cpp b/examples/simple_repeater/main.cpp index 39dbe2cc14..edd5aaa400 100644 --- a/examples/simple_repeater/main.cpp +++ b/examples/simple_repeater/main.cpp @@ -5,7 +5,7 @@ #if defined(NRF52_PLATFORM) #include -static void runMainLoop(); // forward decl: setup() references it before its definition +static void meshAppLoop(); // forward decl: setup() passes it to startMeshWorker() #endif #ifdef DISPLAY_CLASS @@ -112,15 +112,11 @@ void setup() { board.onBootComplete(); #if defined(NRF52_PLATFORM) - startMeshWorker(runMainLoop); + startMeshWorker(meshAppLoop); #endif } -#if defined(NRF52_PLATFORM) -static void runMainLoop() { -#else -void loop() { -#endif +static void meshAppLoop() { board.loop(); int len = strlen(command); @@ -186,8 +182,10 @@ void loop() { } } -#if defined(NRF52_PLATFORM) void loop() { - vTaskDelay(pdMS_TO_TICKS(1000)); // app loop runs on the mesh worker task -} +#if defined(NRF52_PLATFORM) + vTaskDelay(pdMS_TO_TICKS(1000)); // app loop runs on the dedicated mesh worker task +#else + meshAppLoop(); #endif +} diff --git a/examples/simple_room_server/main.cpp b/examples/simple_room_server/main.cpp index 1d78734ef8..39f2cda43b 100644 --- a/examples/simple_room_server/main.cpp +++ b/examples/simple_room_server/main.cpp @@ -5,7 +5,7 @@ #if defined(NRF52_PLATFORM) #include -static void runMainLoop(); // forward decl: setup() references it before its definition +static void meshAppLoop(); // forward decl: setup() passes it to startMeshWorker() #endif #ifdef DISPLAY_CLASS @@ -89,15 +89,11 @@ void setup() { board.onBootComplete(); #if defined(NRF52_PLATFORM) - startMeshWorker(runMainLoop); + startMeshWorker(meshAppLoop); #endif } -#if defined(NRF52_PLATFORM) -static void runMainLoop() { -#else -void loop() { -#endif +static void meshAppLoop() { board.loop(); int len = strlen(command); @@ -132,8 +128,10 @@ void loop() { rtc_clock.tick(); } -#if defined(NRF52_PLATFORM) void loop() { - vTaskDelay(pdMS_TO_TICKS(1000)); // app loop runs on the mesh worker task -} +#if defined(NRF52_PLATFORM) + vTaskDelay(pdMS_TO_TICKS(1000)); // app loop runs on the dedicated mesh worker task +#else + meshAppLoop(); #endif +} diff --git a/src/helpers/MeshWorkerTask.cpp b/src/helpers/MeshWorkerTask.cpp new file mode 100644 index 0000000000..03204fb626 --- /dev/null +++ b/src/helpers/MeshWorkerTask.cpp @@ -0,0 +1,29 @@ +#include "MeshWorkerTask.h" + +#if defined(NRF52_PLATFORM) +#include +#include // MESH_DEBUG_PRINTLN +#include +#include + +static TaskHandle_t _meshTaskHandle = nullptr; +static void (*_meshLoopBody)() = nullptr; + +static void mesh_worker_task(void*) { + for (;;) { + if (_meshLoopBody) _meshLoopBody(); + vTaskDelay(pdMS_TO_TICKS(1)); // yield + } +} + +bool startMeshWorker(void (*loopBody)()) { + _meshLoopBody = loopBody; + // 2048 words = 8KB stack (measured peak ~4.7KB + headroom). + // TASK_PRIO_LOW (1) matches the framework's own loop_task priority. + if (xTaskCreate(mesh_worker_task, "mesh", 2048, NULL, TASK_PRIO_LOW, &_meshTaskHandle) != pdPASS) { + MESH_DEBUG_PRINTLN("startMeshWorker: xTaskCreate failed (out of FreeRTOS heap?)"); + return false; + } + return true; +} +#endif diff --git a/src/helpers/MeshWorkerTask.h b/src/helpers/MeshWorkerTask.h index 7c00a2e9f4..aa4583b73f 100644 --- a/src/helpers/MeshWorkerTask.h +++ b/src/helpers/MeshWorkerTask.h @@ -1,26 +1,13 @@ #pragma once #if defined(NRF52_PLATFORM) -#include -#include - -// The Adafruit_nRF52_Arduino framework runs Arduino loop() on a 4KB task stack -// (LOOP_STACK_SZ = 256*4, an unconditional #define that build flags can't override). -// Heavy LittleFS work from loop() (e.g. ClientACL::saveSessionKeys) overflows it and -// corrupts adjacent heap. Run the app loop on a dedicated task with an 8KB stack. -static TaskHandle_t _meshTaskHandle = nullptr; -static void (*_meshLoopBody)() = nullptr; - -static void _mesh_worker_task(void*) { - for (;;) { - _meshLoopBody(); - vTaskDelay(pdMS_TO_TICKS(1)); // yield - } -} - -// 2048 words = 8KB stack. TASK_PRIO_LOW matches the framework's own loop_task. -static inline void startMeshWorker(void (*body)()) { - _meshLoopBody = body; - xTaskCreate(_mesh_worker_task, "mesh", 2048, NULL, TASK_PRIO_LOW, &_meshTaskHandle); -} +// Run an application loop body on a dedicated FreeRTOS task with an 8KB stack, +// instead of the Adafruit_nRF52_Arduino framework's Arduino loop_task (whose 4KB +// stack -- LOOP_STACK_SZ = 256*4, an unconditional #define that build flags can't +// override -- is too small for LittleFS file opens done from loop(), e.g. +// ClientACL::saveSessionKeys, which overflow it and corrupt adjacent heap). +// +// Definitions live in MeshWorkerTask.cpp so there is a single shared instance. +// Returns true if the task was created, false on FreeRTOS heap exhaustion. +bool startMeshWorker(void (*loopBody)()); #endif From 8149af571f7cc99cf6c1b4b2e0e984e81773dbae Mon Sep 17 00:00:00 2001 From: Kemal Hadimli Date: Sat, 30 May 2026 23:17:06 +0100 Subject: [PATCH 3/4] check startMeshWorker() result at call site, halt on failure The previous commit made startMeshWorker() return bool but the call sites discarded it, so worker-task creation failure was still only visible under MESH_DEBUG. Act on the result: on failure, log and halt() -- mirroring the existing radio_init() failure handling in setup() -- so the node fails loudly instead of booting with no mesh loop running. Co-Authored-By: Claude Opus 4.8 --- examples/simple_repeater/main.cpp | 5 ++++- examples/simple_room_server/main.cpp | 5 ++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/examples/simple_repeater/main.cpp b/examples/simple_repeater/main.cpp index edd5aaa400..1ec86be526 100644 --- a/examples/simple_repeater/main.cpp +++ b/examples/simple_repeater/main.cpp @@ -112,7 +112,10 @@ void setup() { board.onBootComplete(); #if defined(NRF52_PLATFORM) - startMeshWorker(meshAppLoop); + if (!startMeshWorker(meshAppLoop)) { + MESH_DEBUG_PRINTLN("Failed to start mesh worker task!"); + halt(); + } #endif } diff --git a/examples/simple_room_server/main.cpp b/examples/simple_room_server/main.cpp index 39f2cda43b..fe41f38078 100644 --- a/examples/simple_room_server/main.cpp +++ b/examples/simple_room_server/main.cpp @@ -89,7 +89,10 @@ void setup() { board.onBootComplete(); #if defined(NRF52_PLATFORM) - startMeshWorker(meshAppLoop); + if (!startMeshWorker(meshAppLoop)) { + MESH_DEBUG_PRINTLN("Failed to start mesh worker task!"); + halt(); + } #endif } From 795e1e4abdf1be5b7ab18d96818e5bbe212f417d Mon Sep 17 00:00:00 2001 From: Kemal Hadimli Date: Sat, 30 May 2026 23:21:41 +0100 Subject: [PATCH 4/4] harden startMeshWorker: guaranteed yield, reject null/double start Address two further review points: - Worker yields with vTaskDelay(1) instead of vTaskDelay(pdMS_TO_TICKS(1)) so it always yields at least one tick regardless of configTICK_RATE_HZ (on this framework the tick rate is 1024 Hz so the old form was already 1 tick, but the explicit form is tick-rate independent). - startMeshWorker() now rejects a null loopBody and a second call (which would leak the previous task by overwriting _meshTaskHandle), returning false and logging instead of silently misbehaving. Co-Authored-By: Claude Opus 4.8 --- src/helpers/MeshWorkerTask.cpp | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/helpers/MeshWorkerTask.cpp b/src/helpers/MeshWorkerTask.cpp index 03204fb626..db67acd204 100644 --- a/src/helpers/MeshWorkerTask.cpp +++ b/src/helpers/MeshWorkerTask.cpp @@ -12,16 +12,25 @@ static void (*_meshLoopBody)() = nullptr; static void mesh_worker_task(void*) { for (;;) { if (_meshLoopBody) _meshLoopBody(); - vTaskDelay(pdMS_TO_TICKS(1)); // yield + vTaskDelay(1); // yield at least one tick (tick-rate independent) } } bool startMeshWorker(void (*loopBody)()) { + if (loopBody == nullptr) { + MESH_DEBUG_PRINTLN("startMeshWorker: null loopBody"); + return false; + } + if (_meshTaskHandle != nullptr) { + MESH_DEBUG_PRINTLN("startMeshWorker: already started"); + return false; + } _meshLoopBody = loopBody; // 2048 words = 8KB stack (measured peak ~4.7KB + headroom). // TASK_PRIO_LOW (1) matches the framework's own loop_task priority. if (xTaskCreate(mesh_worker_task, "mesh", 2048, NULL, TASK_PRIO_LOW, &_meshTaskHandle) != pdPASS) { MESH_DEBUG_PRINTLN("startMeshWorker: xTaskCreate failed (out of FreeRTOS heap?)"); + _meshTaskHandle = nullptr; return false; } return true;