From 0581a59a4c155cef67cee0b4fb66808f0a999f78 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Andr=C3=A9=20Reuter?= Date: Wed, 25 Feb 2026 15:08:43 +0100 Subject: [PATCH 1/3] LLVM: Switch native flag from -march=native to -mtune=native -mcpu=native for aarch64 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It was observed that `-march=native` does not actually provide all the CPU features on e.g. GH200, missing bf16 when building OpenBLAS 0.3.31 with LLVM 21.1.8. This causes build failures like: ../kernel/arm64/sbgemv_n_neon.c:146:14: error: '__builtin_neon_vld1q_bf16' needs target feature bf16,neon 146 | a3 = vld1q_bf16(a_ptr3); | ^ Switching to -mtune=native -mcpu=native solves this issue. Therefore, apply this option generally. Signed-off-by: Jan André Reuter --- easybuild/toolchains/compiler/llvm_compilers.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/easybuild/toolchains/compiler/llvm_compilers.py b/easybuild/toolchains/compiler/llvm_compilers.py index cee594f86b..c26d09b6ba 100644 --- a/easybuild/toolchains/compiler/llvm_compilers.py +++ b/easybuild/toolchains/compiler/llvm_compilers.py @@ -143,7 +143,8 @@ class LLVMCompilers(Compiler): # used when 'optarch' toolchain option is enabled (and --optarch is not specified) COMPILER_OPTIMAL_ARCHITECTURE_OPTION = { **(Compiler.COMPILER_OPTIMAL_ARCHITECTURE_OPTION or {}), - (systemtools.AARCH64, systemtools.ARM): '-march=native', + # -march=native may not include all CPU features on aarch64, e.g. missing bf16 on Grace for LLVM 21.1.8 + (systemtools.AARCH64, systemtools.ARM): '-mcpu=native -mtune=native', (systemtools.POWER, systemtools.POWER): '-mcpu=native', # no support for march=native on POWER (systemtools.POWER, systemtools.POWER_LE): '-mcpu=native', # no support for march=native on POWER (systemtools.X86_64, systemtools.AMD): '-march=native', From 185702ea475708f6b5fb21694a2fbcf7e826cf8f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Andr=C3=A9=20Reuter?= Date: Wed, 25 Feb 2026 21:18:03 +0100 Subject: [PATCH 2/3] Switch to -mcpu=native only MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Jan André Reuter --- easybuild/toolchains/compiler/llvm_compilers.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/easybuild/toolchains/compiler/llvm_compilers.py b/easybuild/toolchains/compiler/llvm_compilers.py index c26d09b6ba..a4211061c6 100644 --- a/easybuild/toolchains/compiler/llvm_compilers.py +++ b/easybuild/toolchains/compiler/llvm_compilers.py @@ -143,8 +143,9 @@ class LLVMCompilers(Compiler): # used when 'optarch' toolchain option is enabled (and --optarch is not specified) COMPILER_OPTIMAL_ARCHITECTURE_OPTION = { **(Compiler.COMPILER_OPTIMAL_ARCHITECTURE_OPTION or {}), - # -march=native may not include all CPU features on aarch64, e.g. missing bf16 on Grace for LLVM 21.1.8 - (systemtools.AARCH64, systemtools.ARM): '-mcpu=native -mtune=native', + # -mcpu=native is recommended way to specify feature set for aarch64, see: + # https://github.com/easybuilders/easybuild-framework/pull/5139#issuecomment-3961654073 + (systemtools.AARCH64, systemtools.ARM): '-mcpu=native', (systemtools.POWER, systemtools.POWER): '-mcpu=native', # no support for march=native on POWER (systemtools.POWER, systemtools.POWER_LE): '-mcpu=native', # no support for march=native on POWER (systemtools.X86_64, systemtools.AMD): '-march=native', From 468c7fb80e6d1e14e1a3fbd35df30d0dced20cb2 Mon Sep 17 00:00:00 2001 From: Kenneth Hoste Date: Wed, 25 Mar 2026 13:05:43 +0100 Subject: [PATCH 3/3] add extra reference for use of -mpu=native rather than -march=native with LLVM on Arm --- easybuild/toolchains/compiler/llvm_compilers.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/easybuild/toolchains/compiler/llvm_compilers.py b/easybuild/toolchains/compiler/llvm_compilers.py index a4211061c6..b640d2b130 100644 --- a/easybuild/toolchains/compiler/llvm_compilers.py +++ b/easybuild/toolchains/compiler/llvm_compilers.py @@ -144,7 +144,8 @@ class LLVMCompilers(Compiler): COMPILER_OPTIMAL_ARCHITECTURE_OPTION = { **(Compiler.COMPILER_OPTIMAL_ARCHITECTURE_OPTION or {}), # -mcpu=native is recommended way to specify feature set for aarch64, see: - # https://github.com/easybuilders/easybuild-framework/pull/5139#issuecomment-3961654073 + # https://github.com/easybuilders/easybuild-framework/pull/5139#issuecomment-3961654073 and + # https://developer.arm.com/community/arm-community-blogs/b/tools-software-ides-blog/posts/compiler-flags-across-architectures-march-mtune-and-mcpu (systemtools.AARCH64, systemtools.ARM): '-mcpu=native', (systemtools.POWER, systemtools.POWER): '-mcpu=native', # no support for march=native on POWER (systemtools.POWER, systemtools.POWER_LE): '-mcpu=native', # no support for march=native on POWER