From 8e19df3de58b6e97f17f52e57ad9b82c73e9fada Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bo=20St=C3=A5le=20Kopperud?= Date: Mon, 8 Jun 2026 20:18:52 +0200 Subject: [PATCH] atomic_v7: add memory barriers to atomic bit RMW The bitwise ldrex/strex RMW had no barriers, so a later load could be reordered ahead of the atomic write (e.g. signal.c sets tc_SigRecvd then reads tc_State). Bracket the loop with dmb and add the memory clobber to give the operation sequential-consistency ordering. --- arch/arm-all/include/aros/atomic_v7.h | 31 ++++++++++++++++----------- 1 file changed, 19 insertions(+), 12 deletions(-) diff --git a/arch/arm-all/include/aros/atomic_v7.h b/arch/arm-all/include/aros/atomic_v7.h index eee257d2253..04a2e19c8b4 100644 --- a/arch/arm-all/include/aros/atomic_v7.h +++ b/arch/arm-all/include/aros/atomic_v7.h @@ -61,52 +61,59 @@ do { \ :"cc"); \ } while (0) +/* + * Bitwise atomic RMW with full memory barrier. The dmb pair gives + * sequential-consistency semantics: prior stores are observable before + * the RMW, and the RMW is observable before later loads/stores. Without + * this, callers like signal.c (set tc_SigRecvd, then read tc_State) + * can have the trailing load hoist above the atomic write. + */ #define __AROS_ATOMIC_AND_B(var, mask) \ do { \ unsigned long temp; int result; \ - __asm__ __volatile__("\n1: ldrexb %0, [%3]; and %0, %0, %4; strexb %1, %0, [%3]; teq %1, #0; bne 1b" \ + __asm__ __volatile__("dmb\n1: ldrexb %0, [%3]; and %0, %0, %4; strexb %1, %0, [%3]; teq %1, #0; bne 1b; dmb" \ :"=&r"(result), "=&r"(temp), "+Qo"(var) \ :"r"(&var), "I"(mask) \ - :"cc"); \ + :"memory", "cc"); \ } while(0) #define __AROS_ATOMIC_AND_W(var, mask) \ do { \ unsigned long temp; int result; \ - __asm__ __volatile__("\n1: ldrexh %0, [%3]; and %0, %0, %4; strexh %1, %0, [%3]; teq %1, #0; bne 1b" \ + __asm__ __volatile__("dmb\n1: ldrexh %0, [%3]; and %0, %0, %4; strexh %1, %0, [%3]; teq %1, #0; bne 1b; dmb" \ :"=&r"(result), "=&r"(temp), "+Qo"(var) \ :"r"(&var), "Ir"(mask) \ - :"cc"); \ + :"memory", "cc"); \ } while(0) #define __AROS_ATOMIC_AND_L(var, mask) \ do { \ unsigned long temp; int result; \ - __asm__ __volatile__("\n1: ldrex %0, [%3]; and %0, %0, %4; strex %1, %0, [%3]; teq %1, #0; bne 1b" \ + __asm__ __volatile__("dmb\n1: ldrex %0, [%3]; and %0, %0, %4; strex %1, %0, [%3]; teq %1, #0; bne 1b; dmb" \ :"=&r"(result), "=&r"(temp), "+Qo"(var) \ :"r"(&var), "Ir"(mask) \ - :"cc"); \ + :"memory", "cc"); \ } while(0) #define __AROS_ATOMIC_OR_B(var, mask) \ do { \ unsigned long temp; int result; \ - __asm__ __volatile__("\n1: ldrexb %0, [%3]; orr %0, %0, %4; strexb %1, %0, [%3]; teq %1, #0; bne 1b" \ + __asm__ __volatile__("dmb\n1: ldrexb %0, [%3]; orr %0, %0, %4; strexb %1, %0, [%3]; teq %1, #0; bne 1b; dmb" \ :"=&r"(result), "=&r"(temp), "+Qo"(var) \ :"r"(&var), "I"(mask) \ - :"cc"); \ + :"memory", "cc"); \ } while(0) #define __AROS_ATOMIC_OR_W(var, mask) \ do { \ unsigned long temp; int result; \ - __asm__ __volatile__("\n1: ldrexh %0, [%3]; orr %0, %0, %4; strexh %1, %0, [%3]; teq %1, #0; bne 1b" \ + __asm__ __volatile__("dmb\n1: ldrexh %0, [%3]; orr %0, %0, %4; strexh %1, %0, [%3]; teq %1, #0; bne 1b; dmb" \ :"=&r"(result), "=&r"(temp), "+Qo"(var) \ :"r"(&var), "Ir"(mask) \ - :"cc"); \ + :"memory", "cc"); \ } while(0) #define __AROS_ATOMIC_OR_L(var, mask) \ do { \ unsigned long temp; int result; \ - __asm__ __volatile__("\n1: ldrex %0, [%3]; orr %0, %0, %4; strex %1, %0, [%3]; teq %1, #0; bne 1b" \ + __asm__ __volatile__("dmb\n1: ldrex %0, [%3]; orr %0, %0, %4; strex %1, %0, [%3]; teq %1, #0; bne 1b; dmb" \ :"=&r"(result), "=&r"(temp), "+Qo"(var) \ :"r"(&var), "Ir"(mask) \ - :"cc"); \ + :"memory", "cc"); \ } while(0)