diff --git a/IDE/WIN-SRTP-KDF-140-3/test.vcxproj b/IDE/WIN-SRTP-KDF-140-3/test.vcxproj
index a41ff9ac49f..2429f2fe9bb 100644
--- a/IDE/WIN-SRTP-KDF-140-3/test.vcxproj
+++ b/IDE/WIN-SRTP-KDF-140-3/test.vcxproj
@@ -162,7 +162,13 @@
true
true
UseLinkTimeCodeGeneration
+
false
+ true
@@ -177,6 +183,10 @@
true
+
+ false
+ true
Console
ws2_32.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)
true
diff --git a/IDE/WIN-SRTP-KDF-140-3/wolfssl-fips.vcxproj b/IDE/WIN-SRTP-KDF-140-3/wolfssl-fips.vcxproj
index 65bb39fffa2..390b38f0e92 100644
--- a/IDE/WIN-SRTP-KDF-140-3/wolfssl-fips.vcxproj
+++ b/IDE/WIN-SRTP-KDF-140-3/wolfssl-fips.vcxproj
@@ -314,6 +314,16 @@
+
+
+
+
+
+
+
+
+
diff --git a/configure.ac b/configure.ac
index c1d2dd089ef..82af885af65 100644
--- a/configure.ac
+++ b/configure.ac
@@ -3777,13 +3777,22 @@ then
AC_MSG_NOTICE([32bit ARMv4 found])
;;
*)
- AM_CPPFLAGS="$AM_CPPFLAGS -mfpu=crypto-neon-fp-armv8 -marm"
+ # AArch32 ARMv8 crypto-extension asm (armv8-32-*-asm.S: sha256h,
+ # aese/aesmc, pmull) needs an explicit -march=armv8-a+crypto. The
+ # ARMv8-A crypto extension is OPTIONAL, so the SHA/AES instructions
+ # are gated by the "+crypto" arch feature -- NOT enabled by -mfpu
+ # alone, and NOT by a bare -march=armv8-a (nor -mcpu=cortex-a53 on
+ # some toolchains). Cross toolchains whose default -mcpu is ARMv7
+ # (e.g. Xilinx Vitis cortex-a9) otherwise reject them with
+ # "selected processor does not support sha256h.32 in ARM mode".
+ # Mirrors the in-kernel ARM armasm enablement (port/arm/*.S crypto).
+ AM_CPPFLAGS="$AM_CPPFLAGS -march=armv8-a+crypto -mfpu=crypto-neon-fp-armv8 -marm"
# Include options.h
AM_CCASFLAGS="$AM_CCASFLAGS -DEXTERNAL_OPTS_OPENVPN"
ENABLED_ARMASM_CRYPTO=yes
ENABLED_ARMASM_NEON=yes
ENABLED_ARM_32=yes
- AC_MSG_NOTICE([32bit ARMv8 found, setting mfpu to crypto-neon-fp-armv8])
+ AC_MSG_NOTICE([32bit ARMv8 found, setting -march=armv8-a+crypto + mfpu=crypto-neon-fp-armv8])
;;
esac
esac
@@ -6315,13 +6324,7 @@ AS_CASE([$FIPS_VERSION],
-DWC_RSA_NO_PADDING \
-DECC_USER_CURVES \
-DHAVE_ECC384 \
- -DHAVE_ECC521 \
- -DWOLFSSL_VALIDATE_FFC_IMPORT \
- -DHAVE_FFDHE_Q \
- -DHAVE_FFDHE_3072 \
- -DHAVE_FFDHE_4096 \
- -DHAVE_FFDHE_6144 \
- -DHAVE_FFDHE_8192"
+ -DHAVE_ECC521"
# KCAPI API does not support custom k for sign, don't force enable ECC key sizes and don't use seed callback
AS_IF([test "x$ENABLED_KCAPI_ECC" = "xno"],
@@ -6335,6 +6338,20 @@ AS_CASE([$FIPS_VERSION],
-DHAVE_ECC256"])
DEFAULT_MAX_CLASSIC_ASYM_KEY_BITS=8192
+
+# Classic DH and DSA are OUT OF SCOPE for the FIPS 140-3 v7 PQ module.
+# (FIPS 186-5 retires DSA; v7 boundary keeps only ECDH/ECDSA + PQ KEM/DSA.)
+# Hard-error if explicitly enabled; otherwise force off and add NO_DH/NO_DSA.
+ AS_IF([test "$enable_dh" = "yes"],
+ [AC_MSG_ERROR([--enable-dh is not supported with --enable-fips=$FIPS_VERSION. Classic finite-field DH is out of scope for the FIPS 140-3 v7 PQ module. Use --enable-fips=v6 if you need DH support.])],
+ [test "$ENABLED_DH" != "no"],
+ [ENABLED_DH="no"; enable_dh="no"; AM_CFLAGS="$AM_CFLAGS -DNO_DH"])
+
+ AS_IF([test "$enable_dsa" = "yes"],
+ [AC_MSG_ERROR([--enable-dsa is not supported with --enable-fips=$FIPS_VERSION. DSA is retired by FIPS 186-5 and is out of scope for the FIPS 140-3 v7 PQ module. Use --enable-fips=v6 if you need DSA support.])],
+ [test "$ENABLED_DSA" != "no"],
+ [ENABLED_DSA="no"; enable_dsa="no"; AM_CFLAGS="$AM_CFLAGS -DNO_DSA"])
+
# optimizations section
# protocol section
@@ -8889,8 +8906,17 @@ then
fi
if test "x$ENABLED_DH" = "xno"
then
- ENABLED_DH="yes"
- AM_CFLAGS="$AM_CFLAGS -DHAVE_DH"
+ # Classic DH is out of scope for the FIPS 140-3 v7 PQ module.
+ # JNI normally auto-enables DH for legacy TLS suites; with FIPS v7+
+ # we report and skip the auto-enable rather than silently turning DH
+ # back on (which would conflict with the boundary).
+ if test "$FIPS_VERSION" = "v7" || test "$FIPS_VERSION" = "ready" || test "$FIPS_VERSION" = "dev"
+ then
+ AC_MSG_NOTICE([JNI enabled but FIPS is $FIPS_VERSION, NOT turning on DH with this module])
+ else
+ ENABLED_DH="yes"
+ AM_CFLAGS="$AM_CFLAGS -DHAVE_DH"
+ fi
fi
if test "x$ENABLED_PSK" = "xno"
then
diff --git a/fips-hash.sh b/fips-hash.sh
index 36f320c0bbd..8f8a1a86317 100755
--- a/fips-hash.sh
+++ b/fips-hash.sh
@@ -13,7 +13,11 @@ then
fi
OUT=$(./wolfcrypt/test/testwolfcrypt | sed -n 's/hash = \(.*\)/\1/p')
-NEWHASH=$(echo "$OUT" | cut -c1-64)
+# FIPS v7.0.0+ uses HMAC-SHA-512 (128 hex chars); older FIPS versions
+# use HMAC-SHA-256 (64 hex chars). Take the whole captured hash; the
+# static_assert on sizeof(verifyCore) guards against wrong length at
+# compile time after this script runs.
+NEWHASH=$(echo "$OUT" | head -n1 | tr -d '[:space:]')
if test -n "$NEWHASH"
then
cp wolfcrypt/src/fips_test.c wolfcrypt/src/fips_test.c.bak
diff --git a/linuxkm/Kbuild b/linuxkm/Kbuild
index fe3f823942f..eb14106f5e8 100644
--- a/linuxkm/Kbuild
+++ b/linuxkm/Kbuild
@@ -99,6 +99,18 @@ $(LIBWOLFSSL_NAME)-y := $(WOLFSSL_OBJ_FILES) linuxkm/module_hooks.o linuxkm/modu
ifeq "$(FIPS_OPTEST)" "1"
$(LIBWOLFSSL_NAME)-y += linuxkm/optest-140-3/linuxkm_optest_wrapper.o
+ # The optest TEST wrapper (#includes test.c / invalid_tests.c) aggregates
+ # several AES contexts per invalid-input test function. Under WOLFSSL_AESNI
+ # the Aes struct carries an inline ALIGN16 streamData[5*WC_AES_BLOCK_SIZE]
+ # (wolfssl/wolfcrypt/aes.h) plus use_aesni, so a handful of those functions
+ # (aes_{,mac_,ofb_,cfb_,kw_}invalid_data_tests) exceed the conservative i386
+ # THREAD_SIZE/4 = 2048 frame *warning*. They build cleanly on x86_64 (whose
+ # THREAD_SIZE/4 = 4096 already accommodates them) and run in a kernel thread
+ # well within THREAD_SIZE. Relax the cap to 4096 for the wrapper ONLY: it is
+ # test/evidence tooling OUTSIDE the FIPS module boundary, so the FIPS module
+ # objects keep the strict MAX_STACK_FRAME_SIZE. No effect on x86_64 (its
+ # default is already 4096).
+ $(obj)/linuxkm/optest-140-3/linuxkm_optest_wrapper.o: ccflags-y += -Wframe-larger-than=4096
endif
WOLFSSL_CFLAGS_NO_VECTOR_INSNS := $(CFLAGS_SIMD_DISABLE) $(CFLAGS_FPU_DISABLE)
@@ -127,6 +139,10 @@ ifeq "$(ENABLED_LINUXKM_PIE)" "yes"
endif
endif
endif
+ ifeq ($(KERNEL_ARCH),i386)
+ NO_PIE_FLAG := 1
+ $(info Note: disabling -fPIE on 32-bit x86 -- i386 -fPIE routes every local symbol through the GOT (R_386_GOTOFF), which the wolfCrypt PIE containerization forbids.)
+ endif
endif
ifdef NO_PIE_FLAG
@@ -217,6 +233,25 @@ $(obj)/wolfcrypt/src/wc_mlkem_asm.o: OBJECT_FILES_NON_STANDARD := y
$(obj)/wolfcrypt/src/wc_mldsa_asm.o: asflags-y := $(WOLFSSL_ASFLAGS) $(ASFLAGS_FPU_DISABLE_SIMD_ENABLE)
$(obj)/wolfcrypt/src/wc_mldsa_asm.o: OBJECT_FILES_NON_STANDARD := y
+# ARM/ARM64 crypto+NEON asm (wolfcrypt/src/port/arm/*.S) needs the crypto/NEON
+# -march enabled at assembly time. The wolfSSL ARM asm carries no .arch/.fpu
+# directives, and configure leaves ASFLAGS_*_SIMD_ENABLE empty on ARM: the
+# userspace build inherits +crypto from the toolchain's default -mcpu (e.g.
+# cortex-a72), but the kernel build forces its own baseline -march without it,
+# so the AES/SHA/PMULL instructions are rejected ("selected processor does not
+# support `aesd ...'"). Supply the right -march here per kernel arch. (The
+# wrong-arch port/arm files are #ifdef'd to empty objects, so the flag is a
+# no-op for them; OBJECT_FILES_NON_STANDARD silences objtool on the hand asm.)
+ifeq ($(CONFIG_ARM64),y)
+ WOLFSSL_ARM_ASM_MARCH := -march=armv8-a+crypto
+else ifeq ($(CONFIG_ARM),y)
+ WOLFSSL_ARM_ASM_MARCH := -march=armv8-a -mfpu=crypto-neon-fp-armv8
+endif
+ifdef WOLFSSL_ARM_ASM_MARCH
+$(obj)/wolfcrypt/src/port/arm/%.o: asflags-y := $(WOLFSSL_ASFLAGS) $(WOLFSSL_ARM_ASM_MARCH)
+$(obj)/wolfcrypt/src/port/arm/%.o: OBJECT_FILES_NON_STANDARD := y
+endif
+
ifndef READELF
READELF := readelf
endif
@@ -325,7 +360,7 @@ RENAME_PIE_TEXT_AND_DATA_SECTIONS := \
next; \
} \
else if ($$4 == "OBJECT") { \
- if (! ($$7 in wolfcrypt_data_sections)) { \
+ if (! ($$7 in wolfcrypt_data_sections) && ! ($$7 in wolfcrypt_text_sections)) { \
if ((other_sections[$$7] == ".printk_index") || \
(($$8 ~ /^_entry\.[0-9]+$$|^kernel_read_file_str$$/) && \
(other_sections[$$7] == ".data.rel.ro.local"))) \
diff --git a/linuxkm/Makefile b/linuxkm/Makefile
index 24a867b9356..1edb65bfa91 100644
--- a/linuxkm/Makefile
+++ b/linuxkm/Makefile
@@ -45,7 +45,17 @@ ifndef SRC_TOP
SRC_TOP=$(shell dirname $(MODULE_TOP))
endif
-WOLFSSL_CFLAGS=-DHAVE_CONFIG_H -I$(SRC_TOP) -DBUILDING_WOLFSSL $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -Wno-declaration-after-statement -Wno-redundant-decls -DLIBWOLFSSL_GLOBAL_EXTRA_CFLAGS="\" $(KERNEL_EXTRA_CFLAGS)\""
+# -Wno-nested-externs: the Linux kernel's compile-time-assert machinery
+# ( _compiletime_assert, reached via the atomic / per-CPU /
+# printk-once macros used in linuxkm/x86_vector_register_glue.c) expands to an
+# "extern void __compiletime_assert_N(void)" declaration *inside* a function body.
+# That is by-design kernel code, but trips wolfSSL's -Wnested-externs -> with
+# -Werror it breaks the i386 + AES-NI kernel build (the glue is only compiled when
+# WOLFSSL_USE_SAVE_VECTOR_REGISTERS is set, i.e. with a PAA enabled). Suppress it
+# for the whole linuxkm build, same as the two kernel-incompatible warnings already
+# stripped here. This only silences a diagnostic -> emitted object code (and the
+# FIPS in-core hash) is byte-identical on every arch.
+WOLFSSL_CFLAGS=-DHAVE_CONFIG_H -I$(SRC_TOP) -DBUILDING_WOLFSSL $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -Wno-declaration-after-statement -Wno-redundant-decls -Wno-nested-externs -DLIBWOLFSSL_GLOBAL_EXTRA_CFLAGS="\" $(KERNEL_EXTRA_CFLAGS)\""
ifdef KERNEL_EXTRA_CFLAGS
WOLFSSL_CFLAGS += $(KERNEL_EXTRA_CFLAGS)
endif
@@ -55,7 +65,13 @@ endif
WOLFSSL_ASFLAGS=-DHAVE_CONFIG_H -I$(SRC_TOP) -DBUILDING_WOLFSSL $(AM_CCASFLAGS) $(CCASFLAGS)
-WOLFSSL_OBJ_FILES=$(patsubst %.lo, %.o, $(patsubst src/src_libwolfssl_la-%, src/%, $(patsubst src/libwolfssl_la-%, src/%, $(patsubst wolfcrypt/src/src_libwolfssl_la-%, wolfcrypt/src/%, $(src_libwolfssl_la_OBJECTS)))))
+# Strip libtool's per-target object prefix (src_libwolfssl_la-) so Kbuild sees the
+# real source-derived object names. The innermost patsubst handles the ARM asm
+# under wolfcrypt/src/port/arm/ (armv8-*/armv8-32-*/thumb2-*), which the
+# directory-specific src/ and wolfcrypt/src/ patsubsts below do NOT match --
+# without it, --enable-armasm kernel builds fail with "No rule to make target
+# .../src_libwolfssl_la-armv8-aes-asm.o".
+WOLFSSL_OBJ_FILES=$(patsubst %.lo, %.o, $(patsubst src/src_libwolfssl_la-%, src/%, $(patsubst src/libwolfssl_la-%, src/%, $(patsubst wolfcrypt/src/src_libwolfssl_la-%, wolfcrypt/src/%, $(patsubst wolfcrypt/src/port/arm/src_libwolfssl_la-%, wolfcrypt/src/port/arm/%, $(src_libwolfssl_la_OBJECTS))))))
ifeq "$(ENABLED_CRYPT_TESTS)" "yes"
WOLFSSL_OBJ_FILES+=wolfcrypt/test/test.o
@@ -197,7 +213,7 @@ GENERATE_RELOC_TAB := $(AWK) ' \
next; \
} \
/^0/ { \
- if ($$3 !~ "^(R_X86_.*|R_AARCH64_.*|R_ARM.*)$$") { \
+ if ($$3 !~ "^(R_X86_.*|R_386_.*|R_AARCH64_.*|R_ARM.*)$$") { \
print "Unexpected relocation type in " cur_seg ":\n" $$0 >"/dev/stderr"; \
++bad_relocs; \
} \
@@ -361,12 +377,12 @@ module-update-fips-hash: $(LIBWOLFSSL_NAME).ko
readarray -t verifyCore_attrs < <($(READELF) --wide --symbols "$<" | \
sed -E -n 's/^[[:space:]]*[0-9]+: ([0-9a-fA-F]+)[[:space:]]+([0-9]+)[[:space:]]+OBJECT[[:space:]]+[A-Z]+[[:space:]]+[A-Z]+[[:space:]]+'"$${rodata_segment[0]}"'[[:space:]]+verifyCore$$/\1\n\2/p'); \
if [[ $${#verifyCore_attrs[@]} != 2 ]]; then echo ' unexpected verifyCore_attrs.' >&2; exit 1; fi; \
- if [[ "$${verifyCore_attrs[1]}" != "65" ]]; then echo " verifyCore has unexpected length $${verifyCore_attrs[1]}." >&2; exit 1; fi; \
+ if [[ "$${verifyCore_attrs[1]}" != "129" ]]; then echo " verifyCore has unexpected length $${verifyCore_attrs[1]}." >&2; exit 1; fi; \
verifyCore_offset=$$((0x$${rodata_segment[1]} + 0x$${verifyCore_attrs[0]})); \
- current_verifyCore=$$(dd bs=1 if="$<" skip=$$verifyCore_offset count=64 status=none); \
+ current_verifyCore=$$(dd bs=1 if="$<" skip=$$verifyCore_offset count=128 status=none); \
if [[ ! "$$current_verifyCore" =~ [0-9a-fA-F]{64} ]]; then echo " verifyCore at offset $$verifyCore_offset has unexpected value." >&2; exit 1; fi; \
if [[ '$(FIPS_HASH)' == "$$current_verifyCore" ]]; then echo ' Supplied FIPS_HASH matches existing verifyCore -- no update needed.'; exit 0; fi; \
- echo -n '$(FIPS_HASH)' | dd bs=1 conv=notrunc of="$<" seek=$$verifyCore_offset count=64 status=none && \
+ echo -n '$(FIPS_HASH)' | dd bs=1 conv=notrunc of="$<" seek=$$verifyCore_offset count=128 status=none && \
echo " FIPS verifyCore updated successfully." && \
if [[ -f '$(LIBWOLFSSL_NAME).ko.signed' ]]; then $(MAKE) $(QFLAG) --no-print-directory --no-silent -C . '$(LIBWOLFSSL_NAME).ko.signed'; fi
diff --git a/linuxkm/linuxkm_memory.c b/linuxkm/linuxkm_memory.c
index 32aa241404a..15cb32ad918 100644
--- a/linuxkm/linuxkm_memory.c
+++ b/linuxkm/linuxkm_memory.c
@@ -52,6 +52,8 @@ static const struct reloc_layout_ent {
[WC_R_X86_64_64] = { "R_X86_64_64", ~0UL, 64, .is_signed = 0, .is_relative = 0 },
[WC_R_X86_64_PC32] = { "R_X86_64_PC32", ~0UL, 32, .is_signed = 1, .is_relative = 1 },
[WC_R_X86_64_PLT32] = { "R_X86_64_PLT32", ~0UL, 32, .is_signed = 1, .is_relative = 1 },
+ [WC_R_386_32] = { "R_386_32", ~0UL, 32, .is_signed = 0, .is_relative = 0 },
+ [WC_R_386_PC32] = { "R_386_PC32", ~0UL, 32, .is_signed = 1, .is_relative = 1 },
[WC_R_AARCH64_ABS32] = { "R_AARCH64_ABS32", ~0UL, 32, .is_signed = 1, .is_relative = 0, .is_pages = 0, .is_pair_lo = 0, .is_pair_hi = 0 },
[WC_R_AARCH64_ABS64] = { "R_AARCH64_ABS64", ~0UL, 64, .is_signed = 1, .is_relative = 0, .is_pages = 0, .is_pair_lo = 0, .is_pair_hi = 0 },
[WC_R_AARCH64_ADD_ABS_LO12_NC] = { "R_AARCH64_ADD_ABS_LO12_NC", 0b00000000001111111111110000000000, 32, .is_signed = 0, .is_relative = 0, .is_pages = 0, .is_pair_lo = 1, .is_pair_hi = 0 },
@@ -64,6 +66,10 @@ static const struct reloc_layout_ent {
[WC_R_AARCH64_LDST64_ABS_LO12_NC] = { "R_AARCH64_LDST64_ABS_LO12_NC", 0b00000000001111111111110000000000, 32, .is_signed = 0, .is_relative = 0, .is_pages = 0, .is_pair_lo = 1, .is_pair_hi = 0 },
[WC_R_AARCH64_PREL32] = { "R_AARCH64_PREL32", ~0UL, 32, .is_signed = 1, .is_relative = 1, .is_pages = 0, .is_pair_lo = 0, .is_pair_hi = 0 },
[WC_R_ARM_ABS32] = { "R_ARM_ABS32", ~0UL, 32, .is_signed = 0, .is_relative = 0, .is_pages = 0, .is_pair_lo = 0, .is_pair_hi = 0 },
+ /* ARM-mode BL/B: signed 24-bit word offset in bits [23:0] (cf. AARCH64_CALL26's
+ * 26-bit field). Emitted by the arm32 ARM-mode (non-Thumb) kernel module build. */
+ [WC_R_ARM_CALL] = { "R_ARM_CALL", 0b00000000111111111111111111111111, 32, .is_signed = 1, .is_relative = 1, .is_pages = 0, .is_pair_lo = 0, .is_pair_hi = 0 },
+ [WC_R_ARM_JUMP24] = { "R_ARM_JUMP24", 0b00000000111111111111111111111111, 32, .is_signed = 1, .is_relative = 1, .is_pages = 0, .is_pair_lo = 0, .is_pair_hi = 0 },
[WC_R_ARM_PREL31] = { "R_ARM_PREL31", 0b01111111111111111111111111111111, 32, .is_signed = 1, .is_relative = 1, .is_pages = 0, .is_pair_lo = 0, .is_pair_hi = 0 },
[WC_R_ARM_REL32] = { "R_ARM_REL32", ~0UL, 32, .is_signed = 1, .is_relative = 1, .is_pages = 0, .is_pair_lo = 0, .is_pair_hi = 0 },
[WC_R_ARM_THM_CALL] = { "R_ARM_THM_CALL", 0b00000111111111110010111111111111, 32, .is_signed = 1, .is_relative = 1, .is_pages = 0, .is_pair_lo = 0, .is_pair_hi = 0 },
@@ -363,6 +369,12 @@ ssize_t wc_reloc_normalize_segment(
case WC_R_X86_64_32:
case WC_R_X86_64_32S:
case WC_R_X86_64_64:
+ /* i386 shares the x86_64 normalization: R_386_32 is absolute
+ * (is_relative=0), R_386_PC32 is PC-relative (is_relative=1). The
+ * math below is driven by layout->is_relative/is_signed and is
+ * width-correct via uintptr_t (32-bit on i386). */
+ case WC_R_386_32:
+ case WC_R_386_PC32:
if (dest_seg != WC_R_SEG_OTHER) {
#ifdef DEBUG_LINUXKM_PIE_SUPPORT
@@ -410,6 +422,8 @@ ssize_t wc_reloc_normalize_segment(
break;
case WC_R_ARM_ABS32:
+ case WC_R_ARM_CALL:
+ case WC_R_ARM_JUMP24:
case WC_R_ARM_PREL31:
case WC_R_ARM_REL32:
case WC_R_ARM_THM_CALL:
diff --git a/linuxkm/linuxkm_memory.h b/linuxkm/linuxkm_memory.h
index 76e681da805..d5111613d28 100644
--- a/linuxkm/linuxkm_memory.h
+++ b/linuxkm/linuxkm_memory.h
@@ -40,6 +40,12 @@ enum wc_reloc_type {
WC_R_X86_64_64,
WC_R_X86_64_PC32,
WC_R_X86_64_PLT32,
+ /* 32-bit x86 (i386). With NO_PIE_FLAG the wolfCrypt container emits only
+ * R_386_32 (absolute) and R_386_PC32 (PC-relative); these are semantically
+ * identical to R_X86_64_32 / R_X86_64_PC32 and share their canonicalization
+ * case below. */
+ WC_R_386_32,
+ WC_R_386_PC32,
WC_R_AARCH64_ABS32,
WC_R_AARCH64_ABS64,
WC_R_AARCH64_ADD_ABS_LO12_NC,
@@ -52,6 +58,8 @@ enum wc_reloc_type {
WC_R_AARCH64_LDST64_ABS_LO12_NC,
WC_R_AARCH64_PREL32,
WC_R_ARM_ABS32,
+ WC_R_ARM_CALL,
+ WC_R_ARM_JUMP24,
WC_R_ARM_PREL31,
WC_R_ARM_REL32,
WC_R_ARM_THM_CALL,
diff --git a/linuxkm/linuxkm_wc_port.h b/linuxkm/linuxkm_wc_port.h
index 2a47722ad02..6f1f1881fef 100644
--- a/linuxkm/linuxkm_wc_port.h
+++ b/linuxkm/linuxkm_wc_port.h
@@ -682,8 +682,12 @@
#define WOLFSSL_USE_SAVE_VECTOR_REGISTERS
#endif
+ /* x86 (kernel_fpu_*) and ARM/ARM64 (kernel_neon_*) share the same
+ * arch-neutral save/restore tracker in x86_vector_register_glue.c; the glue
+ * functions keep their historical wc_*_x86 names on all three arches (they
+ * are outside-boundary glue reached via the PIE redirect table). */
#if defined(WOLFSSL_USE_SAVE_VECTOR_REGISTERS) && \
- defined(CONFIG_X86)
+ (defined(CONFIG_X86) || defined(CONFIG_ARM) || defined(CONFIG_ARM64))
extern __must_check int allocate_wolfcrypt_linuxkm_fpu_states(void);
extern void free_wolfcrypt_linuxkm_fpu_states(void);
@@ -691,18 +695,23 @@
WOLFSSL_API __must_check int wc_save_vector_registers_x86(enum wc_svr_flags flags);
WOLFSSL_API void wc_restore_vector_registers_x86(enum wc_svr_flags flags);
- #if LINUX_VERSION_CODE < KERNEL_VERSION(4, 0, 0)
- #include
- #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 13, 0)
- /* added by a62b01cd6c */
- #include
- #endif
- #else
- #include
- #if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0)
- /* added by 266d051601 */
- #include
+ #ifdef CONFIG_X86
+ #if LINUX_VERSION_CODE < KERNEL_VERSION(4, 0, 0)
+ #include
+ #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 13, 0)
+ /* added by a62b01cd6c */
+ #include
+ #endif
+ #else
+ #include
+ #if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0)
+ /* added by 266d051601 */
+ #include
+ #endif
#endif
+ #else /* CONFIG_ARM || CONFIG_ARM64 */
+ #include /* may_use_simd() */
+ #include /* kernel_neon_begin() / kernel_neon_end() */
#endif
#ifndef CAN_SAVE_VECTOR_REGISTERS
#ifdef DEBUG_VECTOR_REGISTER_ACCESS_FUZZING
@@ -742,42 +751,6 @@
#define REENABLE_VECTOR_REGISTERS() wc_restore_vector_registers_x86(WC_SVR_FLAG_INHIBIT)
#endif
- #elif defined(WOLFSSL_USE_SAVE_VECTOR_REGISTERS) && (defined(CONFIG_ARM) || defined(CONFIG_ARM64))
-
- #error kernel module ARM SIMD is not yet tested or usable.
-
- #include
-
- static WARN_UNUSED_RESULT inline int save_vector_registers_arm(void)
- {
- preempt_disable();
- if (! may_use_simd()) {
- preempt_enable();
- return BAD_STATE_E;
- } else {
- fpsimd_preserve_current_state();
- return 0;
- }
- }
- static inline void restore_vector_registers_arm(void)
- {
- fpsimd_restore_current_state();
- preempt_enable();
- }
-
- #ifndef SAVE_VECTOR_REGISTERS
- #define SAVE_VECTOR_REGISTERS(fail_clause) { int _svr_ret = save_vector_registers_arm(); if (_svr_ret != 0) { fail_clause } }
- #endif
- #ifndef SAVE_VECTOR_REGISTERS2
- #define SAVE_VECTOR_REGISTERS2() save_vector_registers_arm()
- #endif
- #ifndef CAN_SAVE_VECTOR_REGISTERS
- #define CAN_SAVE_VECTOR_REGISTERS() can_save_vector_registers_arm()
- #endif
- #ifndef RESTORE_VECTOR_REGISTERS
- #define RESTORE_VECTOR_REGISTERS() restore_vector_registers_arm()
- #endif
-
#elif defined(WOLFSSL_USE_SAVE_VECTOR_REGISTERS)
#error WOLFSSL_USE_SAVE_VECTOR_REGISTERS is set for an unimplemented architecture.
#endif /* WOLFSSL_USE_SAVE_VECTOR_REGISTERS */
@@ -958,6 +931,22 @@
extern int memcmp(const void *s1, const void *s2, size_t n);
#endif
+#ifdef CONFIG_X86_32
+ /* arch/x86/include/asm/string_32.h #defines memcpy/memcmp/memset as
+ * __builtin_* object-like macros (x86_64's string_64.h declares them as
+ * plain functions, so this does not arise on K2). Left active, those
+ * macros expand inside the PIE redirect-table member declarations below --
+ * "typeof(memcmp) *memcmp;" becomes "... *__builtin_memcmp;" -- so the
+ * table loses its memcmp/memcpy/memset members and the downstream
+ * WC_PIE_INDIRECT_SYM(memcmp) lookups fail to compile. #undef the macros
+ * here, before the struct; string_32.h still declares the underlying
+ * functions, so typeof() resolves and the members and redirects use the
+ * canonical names. Mirrors the CONFIG_MIPS handling just above. */
+ #undef memcpy
+ #undef memcmp
+ #undef memset
+#endif
+
struct wolfssl_linuxkm_pie_redirect_table {
#ifdef HAVE_FIPS
typeof(wc_linuxkm_normalize_relocations) *wc_linuxkm_normalize_relocations;
@@ -1091,13 +1080,13 @@
#ifdef WOLFSSL_USE_SAVE_VECTOR_REGISTERS
- #ifdef CONFIG_X86
+ #if defined(CONFIG_X86) || defined(CONFIG_ARM) || defined(CONFIG_ARM64)
typeof(allocate_wolfcrypt_linuxkm_fpu_states) *allocate_wolfcrypt_linuxkm_fpu_states;
typeof(wc_can_save_vector_registers_x86) *wc_can_save_vector_registers_x86;
typeof(free_wolfcrypt_linuxkm_fpu_states) *free_wolfcrypt_linuxkm_fpu_states;
typeof(wc_restore_vector_registers_x86) *wc_restore_vector_registers_x86;
typeof(wc_save_vector_registers_x86) *wc_save_vector_registers_x86;
- #else /* !CONFIG_X86 */
+ #else
#error WOLFSSL_USE_SAVE_VECTOR_REGISTERS is set for an unimplemented architecture.
#endif /* arch */
@@ -1442,7 +1431,8 @@
#undef get_current
#define get_current WC_PIE_INDIRECT_SYM(get_current)
- #if defined(WOLFSSL_USE_SAVE_VECTOR_REGISTERS) && defined(CONFIG_X86)
+ #if defined(WOLFSSL_USE_SAVE_VECTOR_REGISTERS) && \
+ (defined(CONFIG_X86) || defined(CONFIG_ARM) || defined(CONFIG_ARM64))
#define allocate_wolfcrypt_linuxkm_fpu_states WC_PIE_INDIRECT_SYM(allocate_wolfcrypt_linuxkm_fpu_states)
#define wc_can_save_vector_registers_x86 WC_PIE_INDIRECT_SYM(wc_can_save_vector_registers_x86)
#define free_wolfcrypt_linuxkm_fpu_states WC_PIE_INDIRECT_SYM(free_wolfcrypt_linuxkm_fpu_states)
@@ -1751,7 +1741,7 @@
#if !defined(BUILDING_WOLFSSL)
/* some caller code needs these. */
#if defined(WOLFSSL_USE_SAVE_VECTOR_REGISTERS)
- #if defined(CONFIG_X86)
+ #if defined(CONFIG_X86) || defined(CONFIG_ARM) || defined(CONFIG_ARM64)
WOLFSSL_API __must_check int wc_can_save_vector_registers_x86(void);
WOLFSSL_API __must_check int wc_save_vector_registers_x86(enum wc_svr_flags flags);
WOLFSSL_API void wc_restore_vector_registers_x86(enum wc_svr_flags flags);
@@ -1761,9 +1751,9 @@
#ifndef REENABLE_VECTOR_REGISTERS
#define REENABLE_VECTOR_REGISTERS() wc_restore_vector_registers_x86(WC_SVR_FLAG_INHIBIT)
#endif
- #else /* !CONFIG_X86 */
+ #else
#error WOLFSSL_USE_SAVE_VECTOR_REGISTERS is set for an unimplemented architecture.
- #endif /* !CONFIG_X86 */
+ #endif
#endif /* WOLFSSL_USE_SAVE_VECTOR_REGISTERS */
#ifdef WC_LINUXKM_USE_HEAP_WRAPPERS
WOLFSSL_API extern void *wc_linuxkm_malloc(size_t size);
diff --git a/linuxkm/module_hooks.c b/linuxkm/module_hooks.c
index 66b953048e4..9a11b7007a4 100644
--- a/linuxkm/module_hooks.c
+++ b/linuxkm/module_hooks.c
@@ -527,7 +527,9 @@ int wc_linuxkm_GenerateSeed_IntelRD(struct OS_Seed* os, byte* output, word32 sz)
#endif /* WC_LINUXKM_RDSEED_IN_GLUE_LAYER */
-#if defined(WOLFSSL_USE_SAVE_VECTOR_REGISTERS) && defined(CONFIG_X86)
+#if defined(WOLFSSL_USE_SAVE_VECTOR_REGISTERS) && \
+ (defined(CONFIG_X86) || defined(CONFIG_ARM) || defined(CONFIG_ARM64))
+ /* arch-generic save/restore tracker (kernel_fpu_* on x86, kernel_neon_* on ARM) */
#include "linuxkm/x86_vector_register_glue.c"
#endif
@@ -1516,7 +1518,8 @@ static int set_up_wolfssl_linuxkm_pie_redirect_table(void) {
wolfssl_linuxkm_pie_redirect_table.get_current = my_get_current_thread;
-#if defined(WOLFSSL_USE_SAVE_VECTOR_REGISTERS) && defined(CONFIG_X86)
+#if defined(WOLFSSL_USE_SAVE_VECTOR_REGISTERS) && \
+ (defined(CONFIG_X86) || defined(CONFIG_ARM) || defined(CONFIG_ARM64))
wolfssl_linuxkm_pie_redirect_table.allocate_wolfcrypt_linuxkm_fpu_states = allocate_wolfcrypt_linuxkm_fpu_states;
wolfssl_linuxkm_pie_redirect_table.wc_can_save_vector_registers_x86 = wc_can_save_vector_registers_x86;
wolfssl_linuxkm_pie_redirect_table.free_wolfcrypt_linuxkm_fpu_states = free_wolfcrypt_linuxkm_fpu_states;
@@ -2041,7 +2044,12 @@ static ssize_t FIPS_optest_trig_handler(struct kobject *kobj, struct kobj_attrib
int ret;
int argc;
const char *argv[3];
- char code_buf[5];
+ /* Holds the textual error code written to the sysfs node, plus a NUL.
+ * Must accommodate the v7.0.0 module's 5-character codes (e.g. "-1015"
+ * ML_KEM_PCT_E, "-1016" ML_DSA_PCT_E, "-1017" DRBG_SHA512_KAT_FIPS_E);
+ * the earlier [5] sizing silently rejected them via the length guard
+ * below. Sized with headroom for any future wider code. */
+ char code_buf[8];
size_t corrected_count;
int i;
@@ -2057,7 +2065,7 @@ static ssize_t FIPS_optest_trig_handler(struct kobject *kobj, struct kobj_attrib
corrected_count = count - 1;
else
corrected_count = count;
- if ((corrected_count < 1) || (corrected_count > 4))
+ if ((corrected_count < 1) || (corrected_count > (sizeof(code_buf) - 1)))
return -EINVAL;
XMEMCPY(code_buf, buf, corrected_count);
code_buf[corrected_count] = 0;
diff --git a/linuxkm/pie_redirect_table.c b/linuxkm/pie_redirect_table.c
index 03be2e04fa0..657aa4c96b6 100644
--- a/linuxkm/pie_redirect_table.c
+++ b/linuxkm/pie_redirect_table.c
@@ -53,8 +53,18 @@ const struct wolfssl_linuxkm_pie_redirect_table
return &wolfssl_linuxkm_pie_redirect_table;
}
-/* placeholder implementations for missing functions. */
-#if defined(CONFIG_MIPS)
+/* placeholder implementations for missing functions.
+ *
+ * ARM/ARM64 need these for the same reason MIPS does: with vector codegen
+ * enabled (the --enable-armasm build does not pass -mgeneral-regs-only to the
+ * wolfCrypt C files), gcc auto-generates raw memcpy/memset libcalls for
+ * aggregate copies inside the position-independent FIPS container (e.g. in
+ * asn.c / fips_test.c). Source-level XMEMCPY/XMEMSET are redirected to the
+ * kernel's fast implementations via WC_PIE_INDIRECT_SYM, but compiler-emitted
+ * libcalls bypass that #define, so the container must define its own to stay
+ * self-contained (the in-core integrity check forbids ANY undefined symbol).
+ * (The pure-C C1 build does not auto-vectorize and so never references these.) */
+#if defined(CONFIG_MIPS) || defined(CONFIG_ARM) || defined(CONFIG_ARM64)
#undef memcpy
void *memcpy(void *dest, const void *src, size_t n) {
char *dest_i = (char *)dest;
@@ -74,3 +84,69 @@ const struct wolfssl_linuxkm_pie_redirect_table
return dest;
}
#endif
+
+#if defined(CONFIG_ARM)
+ /* 32-bit ARM has no integer-divide instruction in the baseline ISA, so gcc
+ * emits calls to these EABI runtime helpers for '/' and '%'. The kernel
+ * exports them (arch/arm/lib/lib1funcs.S), but the self-contained PIE FIPS
+ * container may not reference external symbols (the in-core integrity check
+ * forbids ANY undefined symbol), so provide them here. Restoring (bit-at-a-
+ * time) division -- correctness over speed; crypto-path divisions are on
+ * small sizes/indices. Per the EABI, __aeabi_*idivmod return the quotient
+ * in r0 and the remainder in r1, i.e. a little-endian 64-bit value with the
+ * quotient in the low word and the remainder in the high word. */
+ unsigned int __aeabi_uidiv(unsigned int n, unsigned int d);
+ unsigned int __aeabi_uidiv(unsigned int n, unsigned int d) {
+ unsigned int q = 0, r = 0;
+ int i;
+ if (d == 0)
+ return ~0u;
+ for (i = 31; i >= 0; i--) {
+ r = (r << 1) | ((n >> i) & 1u);
+ if (r >= d) {
+ r -= d;
+ q |= (1u << i);
+ }
+ }
+ return q;
+ }
+
+ unsigned long long __aeabi_uidivmod(unsigned int n, unsigned int d);
+ unsigned long long __aeabi_uidivmod(unsigned int n, unsigned int d) {
+ unsigned int q = 0, r = 0;
+ int i;
+ if (d == 0)
+ return (unsigned long long)n << 32; /* quot=0, rem=n */
+ for (i = 31; i >= 0; i--) {
+ r = (r << 1) | ((n >> i) & 1u);
+ if (r >= d) {
+ r -= d;
+ q |= (1u << i);
+ }
+ }
+ return ((unsigned long long)r << 32) | q;
+ }
+
+ int __aeabi_idiv(int n, int d);
+ int __aeabi_idiv(int n, int d) {
+ int neg = (n < 0) ^ (d < 0);
+ unsigned int un = (n < 0) ? (unsigned int)(-(long)n) : (unsigned int)n;
+ unsigned int ud = (d < 0) ? (unsigned int)(-(long)d) : (unsigned int)d;
+ unsigned int uq = __aeabi_uidiv(un, ud);
+ return neg ? -(int)uq : (int)uq;
+ }
+
+ unsigned long long __aeabi_idivmod(int n, int d);
+ unsigned long long __aeabi_idivmod(int n, int d) {
+ int nneg = (n < 0);
+ int qneg = (n < 0) ^ (d < 0);
+ unsigned int un = nneg ? (unsigned int)(-(long)n) : (unsigned int)n;
+ unsigned int ud = (d < 0) ? (unsigned int)(-(long)d) : (unsigned int)d;
+ unsigned long long um = __aeabi_uidivmod(un, ud);
+ unsigned int uq = (unsigned int)um;
+ unsigned int ur = (unsigned int)(um >> 32);
+ int q = qneg ? -(int)uq : (int)uq;
+ int r = nneg ? -(int)ur : (int)ur;
+ return ((unsigned long long)(unsigned int)r << 32) | (unsigned int)q;
+ }
+#endif /* CONFIG_ARM */
diff --git a/linuxkm/x86_vector_register_glue.c b/linuxkm/x86_vector_register_glue.c
index 107c7e11274..af4b9b6d598 100644
--- a/linuxkm/x86_vector_register_glue.c
+++ b/linuxkm/x86_vector_register_glue.c
@@ -23,8 +23,29 @@
/* included by linuxkm/module_hooks.c */
#ifndef WC_SKIP_INCLUDED_C_FILES
-#if !defined(WOLFSSL_USE_SAVE_VECTOR_REGISTERS) || !defined(CONFIG_X86)
- #error x86_vector_register_glue.c included in non-vectorized/non-x86 project.
+#if !defined(WOLFSSL_USE_SAVE_VECTOR_REGISTERS) || \
+ !(defined(CONFIG_X86) || defined(CONFIG_ARM) || defined(CONFIG_ARM64))
+ #error vector register glue included in non-vectorized or unsupported-arch project.
+#endif
+
+/* The per-CPU vector-register save/restore tracker below is architecture-neutral
+ * except for the single kernel call that claims/releases the SIMD/FP unit:
+ * x86 -> kernel_fpu_begin() / kernel_fpu_end() (, via the
+ * include in
+ * linuxkm_wc_port.h)
+ * ARM/ARM64 -> kernel_neon_begin() / kernel_neon_end() ()
+ * Both APIs obey the same context rules the tracker already enforces (may_use_simd(),
+ * hard-IRQ/NMI rejection, preempt/bh/migration disable). The functions retain their
+ * historical wc_*_x86 names: they are internal glue OUTSIDE the FIPS module boundary
+ * (reached from boundary code only through the PIE redirect table), so keeping the
+ * names leaves the validated x86 symbol set byte-for-byte unchanged. */
+#if defined(CONFIG_X86)
+ #define WC_LINUXKM_FPU_BEGIN() kernel_fpu_begin()
+ #define WC_LINUXKM_FPU_END() kernel_fpu_end()
+#elif defined(CONFIG_ARM) || defined(CONFIG_ARM64)
+ #include
+ #define WC_LINUXKM_FPU_BEGIN() kernel_neon_begin()
+ #define WC_LINUXKM_FPU_END() kernel_neon_end()
#endif
#ifdef WOLFSSL_LINUXKM_VERBOSE_DEBUG
@@ -70,9 +91,12 @@ WARN_UNUSED_RESULT int allocate_wolfcrypt_linuxkm_fpu_states(void)
wc_linuxkm_fpu_states_n_tracked * sizeof(wc_linuxkm_fpu_states[0]));
if (! wc_linuxkm_fpu_states) {
+ /* cast to unsigned long to match %lu: size_t is 32-bit on arm32 but
+ * 64-bit (== unsigned long) on x86_64/arm64, so the product type differs
+ * by arch. Cast keeps the format portable across the ARM-generalized glue. */
pr_err("ERROR: allocation of %lu bytes for "
"wc_linuxkm_fpu_states failed.\n",
- nr_cpu_ids * sizeof(wc_linuxkm_fpu_states[0]));
+ (unsigned long)(nr_cpu_ids * sizeof(wc_linuxkm_fpu_states[0])));
return MEMORY_E;
}
@@ -441,10 +465,10 @@ WARN_UNUSED_RESULT int wc_save_vector_registers_x86(enum wc_svr_flags flags)
#if IS_ENABLED(CONFIG_PREEMPT_RT)
preempt_disable();
#endif
- kernel_fpu_begin();
+ WC_LINUXKM_FPU_BEGIN();
pstate = wc_linuxkm_fpu_state_assoc(1, 1);
if (pstate == NULL) {
- kernel_fpu_end();
+ WC_LINUXKM_FPU_END();
#if IS_ENABLED(CONFIG_PREEMPT_RT)
preempt_enable();
#endif
@@ -508,7 +532,7 @@ void wc_restore_vector_registers_x86(enum wc_svr_flags flags)
if (pstate->fpu_state == 0U) {
wc_linuxkm_fpu_state_release(pstate);
- kernel_fpu_end();
+ WC_LINUXKM_FPU_END();
#if IS_ENABLED(CONFIG_PREEMPT_RT)
preempt_enable();
#endif
diff --git a/src/include.am b/src/include.am
index 4b80e149bac..bd4d2586a2c 100644
--- a/src/include.am
+++ b/src/include.am
@@ -109,17 +109,22 @@ endif
if BUILD_AESNI
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/aes_asm.S
-if BUILD_X86_ASM
-src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/aes_gcm_x86_asm.S
-else
+# GCM PCLMUL asm is x86_64-only. aes_gcm_asm.S is internally guarded by
+# WOLFSSL_X86_64_BUILD, so it is empty on 32-bit x86 and GCM there falls back to
+# the portable-C GHASH (still AES-NI for the counter-mode blocks). The 32-bit GCM
+# asm (aes_gcm_x86_asm.S) is NOT position-independent -- its .text relocations
+# break the FIPS in-core integrity in a shared object -- so it is not compiled.
+# Kaleb 2026-06-17; see memory 32bit-x86-aesni-enablement.
if BUILD_AESGCM
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/aes_gcm_asm.S
endif
+# aes_xts_asm.S provides AES_XTS_*_aesni for BOTH x86_64 (WOLFSSL_X86_64_BUILD) and
+# 32-bit x86 (WOLFSSL_X86_BUILD); compile it for either, not only the non-X86_ASM
+# (x86_64) path -- the 32-bit AES-NI XTS section was added 2026-06-17.
if BUILD_AESXTS
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/aes_xts_asm.S
endif
endif
-endif
if BUILD_DES3
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/des3.c
@@ -259,17 +264,22 @@ endif BUILD_PPC64_ASM
if BUILD_AESNI
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/aes_asm.S
-if BUILD_X86_ASM
-src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/aes_gcm_x86_asm.S
-else
+# GCM PCLMUL asm is x86_64-only. aes_gcm_asm.S is internally guarded by
+# WOLFSSL_X86_64_BUILD, so it is empty on 32-bit x86 and GCM there falls back to
+# the portable-C GHASH (still AES-NI for the counter-mode blocks). The 32-bit GCM
+# asm (aes_gcm_x86_asm.S) is NOT position-independent -- its .text relocations
+# break the FIPS in-core integrity in a shared object -- so it is not compiled.
+# Kaleb 2026-06-17; see memory 32bit-x86-aesni-enablement.
if BUILD_AESGCM
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/aes_gcm_asm.S
endif
+# aes_xts_asm.S provides AES_XTS_*_aesni for BOTH x86_64 (WOLFSSL_X86_64_BUILD) and
+# 32-bit x86 (WOLFSSL_X86_BUILD); compile it for either, not only the non-X86_ASM
+# (x86_64) path -- the 32-bit AES-NI XTS section was added 2026-06-17.
if BUILD_AESXTS
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/aes_xts_asm.S
endif
endif
-endif
if BUILD_RISCV_ASM
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/riscv/riscv-64-aes.c
@@ -532,17 +542,22 @@ endif BUILD_PPC64_ASM
if BUILD_AESNI
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/aes_asm.S
-if BUILD_X86_ASM
-src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/aes_gcm_x86_asm.S
-else
+# GCM PCLMUL asm is x86_64-only. aes_gcm_asm.S is internally guarded by
+# WOLFSSL_X86_64_BUILD, so it is empty on 32-bit x86 and GCM there falls back to
+# the portable-C GHASH (still AES-NI for the counter-mode blocks). The 32-bit GCM
+# asm (aes_gcm_x86_asm.S) is NOT position-independent -- its .text relocations
+# break the FIPS in-core integrity in a shared object -- so it is not compiled.
+# Kaleb 2026-06-17; see memory 32bit-x86-aesni-enablement.
if BUILD_AESGCM
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/aes_gcm_asm.S
endif
+# aes_xts_asm.S provides AES_XTS_*_aesni for BOTH x86_64 (WOLFSSL_X86_64_BUILD) and
+# 32-bit x86 (WOLFSSL_X86_BUILD); compile it for either, not only the non-X86_ASM
+# (x86_64) path -- the 32-bit AES-NI XTS section was added 2026-06-17.
if BUILD_AESXTS
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/aes_xts_asm.S
endif
endif
-endif
if BUILD_RISCV_ASM
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/riscv/riscv-64-aes.c
@@ -867,17 +882,22 @@ endif BUILD_AES
if BUILD_AESNI
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/aes_asm.S
-if BUILD_X86_ASM
-src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/aes_gcm_x86_asm.S
-else
+# GCM PCLMUL asm is x86_64-only. aes_gcm_asm.S is internally guarded by
+# WOLFSSL_X86_64_BUILD, so it is empty on 32-bit x86 and GCM there falls back to
+# the portable-C GHASH (still AES-NI for the counter-mode blocks). The 32-bit GCM
+# asm (aes_gcm_x86_asm.S) is NOT position-independent -- its .text relocations
+# break the FIPS in-core integrity in a shared object -- so it is not compiled.
+# Kaleb 2026-06-17; see memory 32bit-x86-aesni-enablement.
if BUILD_AESGCM
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/aes_gcm_asm.S
endif
+# aes_xts_asm.S provides AES_XTS_*_aesni for BOTH x86_64 (WOLFSSL_X86_64_BUILD) and
+# 32-bit x86 (WOLFSSL_X86_BUILD); compile it for either, not only the non-X86_ASM
+# (x86_64) path -- the 32-bit AES-NI XTS section was added 2026-06-17.
if BUILD_AESXTS
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/aes_xts_asm.S
endif
endif
-endif
if BUILD_SHA
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/sha.c
@@ -1708,18 +1728,23 @@ endif
if !BUILD_FIPS_V2_PLUS
if BUILD_AESNI
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/aes_asm.S
-if BUILD_X86_ASM
-src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/aes_gcm_x86_asm.S
-else
+# GCM PCLMUL asm is x86_64-only. aes_gcm_asm.S is internally guarded by
+# WOLFSSL_X86_64_BUILD, so it is empty on 32-bit x86 and GCM there falls back to
+# the portable-C GHASH (still AES-NI for the counter-mode blocks). The 32-bit GCM
+# asm (aes_gcm_x86_asm.S) is NOT position-independent -- its .text relocations
+# break the FIPS in-core integrity in a shared object -- so it is not compiled.
+# Kaleb 2026-06-17; see memory 32bit-x86-aesni-enablement.
if BUILD_AESGCM
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/aes_gcm_asm.S
endif
+# aes_xts_asm.S provides AES_XTS_*_aesni for BOTH x86_64 (WOLFSSL_X86_64_BUILD) and
+# 32-bit x86 (WOLFSSL_X86_BUILD); compile it for either, not only the non-X86_ASM
+# (x86_64) path -- the 32-bit AES-NI XTS section was added 2026-06-17.
if BUILD_AESXTS
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/aes_xts_asm.S
endif
endif
endif
-endif
if BUILD_CAMELLIA
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/camellia.c
diff --git a/tests/api/test_aes.c b/tests/api/test_aes.c
index 72221cd04ad..ec767c1319c 100644
--- a/tests/api/test_aes.c
+++ b/tests/api/test_aes.c
@@ -693,7 +693,14 @@ static int test_wc_AesCbcEncryptDecrypt_WithKey(Aes* aes, byte* key,
ExpectIntEQ(wc_AesCbcEncrypt(aes, cipher, vector, vector_len),
0);
ExpectBufEQ(cipher, vector_enc, vector_len);
-#ifdef WOLFSSL_AES_CBC_LENGTH_CHECKS
+ /* The BAD_LENGTH_E enforcement is in the non-FIPS aes.c implementation
+ * (see WOLFSSL_AES_CBC_LENGTH_CHECKS guard there). FIPSv2 (cert3389)
+ * routes through its own historical wc_AesCbcEncrypt_fips wrapper that
+ * predates this check and silently returns 0 on unaligned input. Only
+ * v5.x and newer FIPS modules carry the wrapper-level check. Skip the
+ * assertion for FIPSv2 builds. */
+#if defined(WOLFSSL_AES_CBC_LENGTH_CHECKS) && \
+ (!defined(HAVE_FIPS) || FIPS_VERSION_GE(5,0))
ExpectIntEQ(wc_AesCbcEncrypt(aes, cipher, vector, vector_len - 1),
WC_NO_ERR_TRACE(BAD_LENGTH_E));
#endif
@@ -703,7 +710,9 @@ static int test_wc_AesCbcEncryptDecrypt_WithKey(Aes* aes, byte* key,
ExpectIntEQ(wc_AesCbcDecrypt(aes, decrypted, cipher,
WC_AES_BLOCK_SIZE * 2), 0);
ExpectBufEQ(decrypted, vector, vector_len);
-#ifdef WOLFSSL_AES_CBC_LENGTH_CHECKS
+#if defined(WOLFSSL_AES_CBC_LENGTH_CHECKS) && \
+ (!defined(HAVE_FIPS) || FIPS_VERSION_GE(5,0))
+ /* Same FIPSv2 vs v5+ rationale as the encrypt assertion above. */
ExpectIntEQ(wc_AesCbcDecrypt(aes, decrypted, cipher,
WC_AES_BLOCK_SIZE * 2 - 1), WC_NO_ERR_TRACE(BAD_LENGTH_E));
#else
diff --git a/tests/api/test_evp_pkey.c b/tests/api/test_evp_pkey.c
index 9bdd5b9339d..2e106d16d6a 100644
--- a/tests/api/test_evp_pkey.c
+++ b/tests/api/test_evp_pkey.c
@@ -1526,7 +1526,7 @@ static int test_wolfSSL_EVP_PKEY_sign_verify(int keyType)
!defined(HAVE_SELFTEST)
#if !defined(HAVE_FIPS) || (defined(HAVE_FIPS_VERSION) && (HAVE_FIPS_VERSION>2))
{
- ExpectNotNull(rsa = RSA_generate_key(2048, 3, NULL, NULL));
+ ExpectNotNull(rsa = RSA_generate_key(2048, 65537, NULL, NULL));
ExpectIntEQ(EVP_PKEY_assign_RSA(pkey, rsa), WOLFSSL_SUCCESS);
}
#endif
@@ -2159,7 +2159,7 @@ int test_wolfSSL_EVP_PKEY_encrypt(void)
XMEMSET(outDec, 0, rsaKeySz);
}
- ExpectNotNull(rsa = RSA_generate_key(2048, 3, NULL, NULL));
+ ExpectNotNull(rsa = RSA_generate_key(2048, 65537, NULL, NULL));
ExpectNotNull(pkey = wolfSSL_EVP_PKEY_new());
ExpectIntEQ(EVP_PKEY_assign_RSA(pkey, rsa), WOLFSSL_SUCCESS);
if (EXPECT_FAIL()) {
diff --git a/tests/api/test_ossl_rsa.c b/tests/api/test_ossl_rsa.c
index dc0cee665ba..250d1df0070 100644
--- a/tests/api/test_ossl_rsa.c
+++ b/tests/api/test_ossl_rsa.c
@@ -65,7 +65,7 @@ int test_wolfSSL_RSA(void)
RSA_free(rsa);
rsa = NULL;
- ExpectNotNull(rsa = RSA_generate_key(2048, 3, NULL, NULL));
+ ExpectNotNull(rsa = RSA_generate_key(2048, 65537, NULL, NULL));
ExpectIntEQ(RSA_size(rsa), 256);
#if (!defined(HAVE_FIPS) || FIPS_VERSION3_GT(6,0,0)) && !defined(HAVE_SELFTEST)
@@ -306,7 +306,7 @@ int test_wolfSSL_RSA(void)
rsa = NULL;
#if !defined(USE_FAST_MATH) || (FP_MAX_BITS >= (3072*2))
- ExpectNotNull(rsa = RSA_generate_key(3072, 17, NULL, NULL));
+ ExpectNotNull(rsa = RSA_generate_key(3072, 65537, NULL, NULL));
ExpectIntEQ(RSA_size(rsa), 384);
ExpectIntEQ(RSA_bits(rsa), 3072);
RSA_free(rsa);
@@ -461,7 +461,7 @@ int test_wolfSSL_RSA_print(void)
RSA_free(rsa);
rsa = NULL;
- ExpectNotNull(rsa = RSA_generate_key(2048, 3, NULL, NULL));
+ ExpectNotNull(rsa = RSA_generate_key(2048, 65537, NULL, NULL));
ExpectIntEQ(RSA_print(bio, rsa, 0), 1);
ExpectIntEQ(RSA_print(bio, rsa, 4), 1);
@@ -644,11 +644,11 @@ int test_wolfSSL_RSA_meth(void)
RSA_METHOD *rsa_meth = NULL;
#ifdef WOLFSSL_KEY_GEN
- ExpectNotNull(rsa = RSA_generate_key(2048, 3, NULL, NULL));
+ ExpectNotNull(rsa = RSA_generate_key(2048, 65537, NULL, NULL));
RSA_free(rsa);
rsa = NULL;
#else
- ExpectNull(rsa = RSA_generate_key(2048, 3, NULL, NULL));
+ ExpectNull(rsa = RSA_generate_key(2048, 65537, NULL, NULL));
#endif
ExpectNotNull(RSA_get_default_method());
diff --git a/tests/api/test_slhdsa.c b/tests/api/test_slhdsa.c
index 988bbc579e0..4510319925e 100644
--- a/tests/api/test_slhdsa.c
+++ b/tests/api/test_slhdsa.c
@@ -1081,12 +1081,14 @@ int test_wc_slhdsa_sign_hash(void)
WC_HASH_TYPE_SHA256, sig, sigLen),
WC_NO_ERR_TRACE(BAD_LENGTH_E));
- /* Unsupported hashType (FIPS 205 doesn't list WC_HASH_TYPE_NONE) hits
- * the default branch of slhdsakey_validate_prehash. */
+ /* WC_HASH_TYPE_NONE is the "pure SLH-DSA" sentinel and is never a valid
+ * pre-hash algorithm (FIPS 205 Section 10.2.2 / Table 9). HashSLH-DSA
+ * signing rejects it with an explicit early check (BAD_FUNC_ARG), not via
+ * the slhdsa_check_hash_for_n() switch default. */
sigLen = WC_SLHDSA_MAX_SIG_LEN;
ExpectIntEQ(wc_SlhDsaKey_SignHash(&key, ctx, sizeof(ctx), hash, 32,
WC_HASH_TYPE_NONE, sig, &sigLen, &rng),
- WC_NO_ERR_TRACE(NOT_COMPILED_IN));
+ WC_NO_ERR_TRACE(BAD_FUNC_ARG));
/* Test SignHash with SHA-256. */
sigLen = WC_SLHDSA_MAX_SIG_LEN;
diff --git a/wolfcrypt/benchmark/fips_cast_bench.c b/wolfcrypt/benchmark/fips_cast_bench.c
new file mode 100644
index 00000000000..bd7c0e9dbc6
--- /dev/null
+++ b/wolfcrypt/benchmark/fips_cast_bench.c
@@ -0,0 +1,363 @@
+/* fips_cast_bench.c
+ *
+ * Copyright (C) 2006-2026 wolfSSL Inc.
+ *
+ * This file is part of wolfSSL.
+ *
+ * wolfSSL is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * wolfSSL is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
+ */
+
+/* FIPS CAST benchmark.
+ *
+ * Measures the wall-clock cost of each Conditional Algorithm Self-Test (CAST)
+ * defined by the wolfCrypt v7.0.0 FIPS module so operators can budget module
+ * power-on latency on resource-constrained operational environments (DSP,
+ * MCU) where every additional CAST is directly observable as boot-time delay.
+ *
+ * Compiled only when HAVE_FIPS is defined (see wolfcrypt/benchmark/include.am
+ * BUILD_FIPS gate). Calls wc_RunCast_fips(id) repeatedly per CAST and reports
+ * mean / stddev / min / max for each, plus total time for one pass over all
+ * enabled CASTs (the cost paid by callers that invoke wc_RunAllCast_fips() at
+ * application start).
+ *
+ * Citations:
+ * FIPS 140-3 sec 7.10 (Self-Tests) - CAST framework
+ * FIPS 140-3 IG 10.3.A - Algorithm-by-algorithm CAST coverage
+ * ISO/IEC 19790:2012 sec 7.10.2 - Conditional self-test execution
+ */
+
+#ifdef HAVE_CONFIG_H
+ #include
+#endif
+
+#if !defined(WOLFSSL_USER_SETTINGS) && !defined(WOLFSSL_NO_OPTIONS_H)
+ #include
+#endif
+#include /* also picks up user_settings.h */
+
+/* fips_cast_bench drives wc_RunCast_fips() / wc_RunAllCast_fips() which were
+ * introduced in the v7.0.0 module's CAST framework. Older 140-3 modules
+ * (v5.x, v6.0.0) and the FIPSv2 module do not export these symbols, so when
+ * fips-check.sh swaps in an older-flavor fips/ tree this file would otherwise
+ * fail to link. Gate the entire benchmark on FIPS_VERSION3_GE(7,0,0); for
+ * older flavors we fall through to the empty-main stub at the bottom of the
+ * file so the build still produces an executable. */
+#if defined(HAVE_FIPS) && FIPS_VERSION3_GE(7,0,0)
+
+#include
+#include
+#include
+#include
+#include
+
+#include
+#include
+#include
+#include
+#include
+
+#ifdef _WIN32
+ #define WIN32_LEAN_AND_MEAN
+ #include
+#else
+ #include
+#endif
+
+
+#define BENCH_DEFAULT_ITERS 10
+
+/* Map FIPS_CAST_* enum value to a printable name. Kept in sync with
+ * wolfssl/wolfcrypt/fips_test.h FipsCastId enum. */
+static const char* cast_name(int id)
+{
+ switch (id) {
+ case FIPS_CAST_AES_CBC: return "AES-CBC";
+ case FIPS_CAST_AES_GCM: return "AES-GCM";
+ case FIPS_CAST_HMAC_SHA1: return "HMAC-SHA-1";
+ case FIPS_CAST_HMAC_SHA2_256: return "HMAC-SHA2-256";
+ case FIPS_CAST_HMAC_SHA2_512: return "HMAC-SHA2-512";
+ case FIPS_CAST_HMAC_SHA3_256: return "HMAC-SHA3-256";
+ case FIPS_CAST_DRBG: return "DRBG (SHA-256)";
+ case FIPS_CAST_RSA_SIGN_PKCS1v15: return "RSA-SIGN-PKCS1v15";
+ case FIPS_CAST_ECC_CDH: return "ECC-CDH";
+ case FIPS_CAST_ECC_PRIMITIVE_Z: return "ECC-Primitive-Z";
+ case FIPS_CAST_DH_PRIMITIVE_Z: return "DH-Primitive-Z";
+ case FIPS_CAST_ECDSA: return "ECDSA";
+ case FIPS_CAST_KDF_TLS12: return "KDF-TLS12";
+ case FIPS_CAST_KDF_TLS13: return "KDF-TLS13";
+ case FIPS_CAST_KDF_SSH: return "KDF-SSH";
+#if defined(FIPS_VERSION_GE) && FIPS_VERSION_GE(6,0)
+ case FIPS_CAST_KDF_SRTP: return "KDF-SRTP";
+ case FIPS_CAST_ED25519: return "Ed25519";
+ case FIPS_CAST_ED448: return "Ed448";
+ case FIPS_CAST_PBKDF2: return "PBKDF2";
+#endif
+#if defined(FIPS_VERSION_GE) && FIPS_VERSION_GE(7,0)
+ case FIPS_CAST_AES_ECB: return "AES-ECB";
+ case FIPS_CAST_ML_KEM: return "ML-KEM";
+ case FIPS_CAST_ML_DSA: return "ML-DSA";
+ case FIPS_CAST_LMS: return "LMS";
+ case FIPS_CAST_XMSS: return "XMSS";
+ case FIPS_CAST_DRBG_SHA512: return "DRBG (SHA-512)";
+ case FIPS_CAST_SLH_DSA: return "SLH-DSA";
+ case FIPS_CAST_AES_CMAC: return "AES-CMAC";
+ case FIPS_CAST_SHAKE: return "SHAKE";
+ case FIPS_CAST_AES_KW: return "AES-KW";
+#endif
+ default: return "(unknown)";
+ }
+}
+
+
+/* Monotonic clock in nanoseconds. POSIX clock_gettime(CLOCK_MONOTONIC) on
+ * Unix-like systems; QueryPerformanceCounter on Windows. */
+static long long now_ns(void)
+{
+#ifdef _WIN32
+ static LARGE_INTEGER freq = { 0 };
+ LARGE_INTEGER count;
+ if (freq.QuadPart == 0)
+ QueryPerformanceFrequency(&freq);
+ QueryPerformanceCounter(&count);
+ /* Multiply before divide to keep precision; freq is typically 10MHz. */
+ return (long long)((count.QuadPart * 1000000000LL) / freq.QuadPart);
+#else
+ struct timespec ts;
+ if (clock_gettime(CLOCK_MONOTONIC, &ts) != 0)
+ return 0;
+ return (long long)ts.tv_sec * 1000000000LL + (long long)ts.tv_nsec;
+#endif
+}
+
+
+/* Run a single CAST iters times, populate stats (in milliseconds).
+ * Returns 0 on success, non-zero on first CAST failure. */
+static int run_one_cast(int id, int iters,
+ double* out_mean_ms, double* out_stddev_ms,
+ double* out_min_ms, double* out_max_ms)
+{
+ int i;
+ long long total = 0;
+ long long mn = LLONG_MAX;
+ long long mx = 0;
+ long long* samples;
+ double mean_ns;
+ double variance_acc = 0.0;
+
+ if (iters <= 0)
+ return BAD_FUNC_ARG;
+
+ samples = (long long*)XMALLOC((size_t)iters * sizeof(long long), NULL,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if (samples == NULL)
+ return MEMORY_E;
+
+ for (i = 0; i < iters; i++) {
+ long long t0, t1, dt;
+ int rc;
+
+ t0 = now_ns();
+ rc = wc_RunCast_fips(id);
+ t1 = now_ns();
+ if (rc != 0) {
+ XFREE(samples, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ return rc;
+ }
+ dt = t1 - t0;
+ if (dt < 0)
+ dt = 0;
+ samples[i] = dt;
+ total += dt;
+ if (dt < mn)
+ mn = dt;
+ if (dt > mx)
+ mx = dt;
+ }
+
+ mean_ns = (double)total / (double)iters;
+ for (i = 0; i < iters; i++) {
+ double d = (double)samples[i] - mean_ns;
+ variance_acc += d * d;
+ }
+ XFREE(samples, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+
+ *out_mean_ms = mean_ns / 1.0e6;
+ *out_stddev_ms = sqrt(variance_acc / (double)iters) / 1.0e6;
+ *out_min_ms = (double)mn / 1.0e6;
+ *out_max_ms = (double)mx / 1.0e6;
+ return 0;
+}
+
+
+static void usage(const char* prog)
+{
+ printf("usage: %s [-i ITERS] [-c CAST_ID] [-l]\n", prog);
+ printf(" -i ITERS iterations per CAST (default %d)\n",
+ BENCH_DEFAULT_ITERS);
+ printf(" -c CAST_ID benchmark only the named CAST id\n");
+ printf(" -l list CAST ids and names; do not run\n");
+ printf(" -h show this help\n");
+}
+
+
+int main(int argc, char** argv)
+{
+ int iters = BENCH_DEFAULT_ITERS;
+ int single = -1;
+ int list_only = 0;
+ int i;
+ int first, last;
+ int failures = 0;
+ int run_count = 0;
+ double total_mean_ms = 0.0;
+
+ for (i = 1; i < argc; i++) {
+ if (XSTRCMP(argv[i], "-i") == 0 && i + 1 < argc) {
+ iters = atoi(argv[++i]);
+ if (iters <= 0) {
+ fprintf(stderr, "-i requires a positive iteration count\n");
+ return 2;
+ }
+ } else if (XSTRCMP(argv[i], "-c") == 0 && i + 1 < argc) {
+ single = atoi(argv[++i]);
+ } else if (XSTRCMP(argv[i], "-l") == 0) {
+ list_only = 1;
+ } else if (XSTRCMP(argv[i], "-h") == 0
+ || XSTRCMP(argv[i], "--help") == 0) {
+ usage(argv[0]);
+ return 0;
+ } else {
+ fprintf(stderr, "unknown argument: %s\n", argv[i]);
+ usage(argv[0]);
+ return 2;
+ }
+ }
+
+ if (list_only) {
+ printf("FIPS CAST IDs (FIPS_CAST_COUNT = %d):\n", FIPS_CAST_COUNT);
+ for (i = 0; i < FIPS_CAST_COUNT; i++)
+ printf(" %2d %s\n", i, cast_name(i));
+ return 0;
+ }
+
+ if (single >= 0 && single >= FIPS_CAST_COUNT) {
+ fprintf(stderr, "CAST id %d out of range (0..%d)\n",
+ single, FIPS_CAST_COUNT - 1);
+ return 2;
+ }
+
+ printf("wolfCrypt FIPS CAST benchmark\n");
+ printf("Library version: %s\n", LIBWOLFSSL_VERSION_STRING);
+ printf("FIPS_CAST_COUNT: %d\n", FIPS_CAST_COUNT);
+ printf("Iterations per CAST: %d\n", iters);
+ printf("Clock: %s\n",
+#ifdef _WIN32
+ "QueryPerformanceCounter"
+#else
+ "clock_gettime(CLOCK_MONOTONIC)"
+#endif
+ );
+ printf("\n");
+
+ /* Register the default DRBG seed callback (mirrors benchmark.c and
+ * wolfcrypt/test/test.c). Builds with WC_RNG_SEED_CB - which include
+ * the FIPS optest CFLAGS - require every application that initializes
+ * the RNG to register a seed generator before _InitRng can produce a
+ * working DRBG; without it, wc_InitRng inside the ECC_PRIMITIVE_Z and
+ * ECDSA CASTs returns -199 (RNG_FAILURE_E) and the dependent CASTs
+ * cascade-fail. */
+#ifdef WC_RNG_SEED_CB
+ {
+ int seed_cb_rc = wc_SetSeed_Cb(WC_GENERATE_SEED_DEFAULT);
+ if (seed_cb_rc != 0) {
+ fprintf(stderr,
+ "wc_SetSeed_Cb returned %d - DRBG-using CASTs will fail.\n",
+ seed_cb_rc);
+ }
+ }
+#endif
+
+ /* Prime: run every CAST once via wc_RunAllCast_fips() so each CAST
+ * reaches FIPS_CAST_STATE_SUCCESS before we begin measuring. This
+ * isolates the per-CAST KAT runtime cost from the cascading
+ * recursive-CAST init chain that fires on the first invocation of a
+ * cold CAST whose KAT internally calls FIPS-wrapped primitives whose
+ * own CASTs are still in INIT state. Customers calling
+ * wc_RunAllCast_fips() at boot pay this one-time cost up front, so
+ * priming here matches that real-world workflow. */
+ {
+ int prime_rc = wc_RunAllCast_fips();
+ if (prime_rc != 0) {
+ fprintf(stderr,
+ "wc_RunAllCast_fips() prime returned %d - some CASTs may have failed.\n"
+ "Per-CAST measurements continue but failed CASTs will report errors.\n\n",
+ prime_rc);
+ }
+ }
+
+ printf("ID | Name | Mean(ms) | StdDev(ms) | Min(ms) "
+ "| Max(ms)\n");
+ printf("---+---------------------+----------+------------+---------"
+ "+---------\n");
+
+ first = (single >= 0) ? single : 0;
+ last = (single >= 0) ? single + 1 : FIPS_CAST_COUNT;
+
+ for (i = first; i < last; i++) {
+ double mean_ms = 0, sd_ms = 0, mn_ms = 0, mx_ms = 0;
+ int rc = run_one_cast(i, iters, &mean_ms, &sd_ms, &mn_ms, &mx_ms);
+ if (rc != 0) {
+ printf("%2d | %-19s | FAILED rc=%d (%s)\n",
+ i, cast_name(i), rc, wc_GetErrorString(rc));
+ failures++;
+ continue;
+ }
+ printf("%2d | %-19s | %8.3f | %10.3f | %7.3f | %7.3f\n",
+ i, cast_name(i), mean_ms, sd_ms, mn_ms, mx_ms);
+ total_mean_ms += mean_ms;
+ run_count++;
+ }
+
+ printf("\n");
+ if (run_count > 0) {
+ printf("Sum of mean CAST times (one wc_RunAllCast_fips() pass): "
+ "%.3f ms\n", total_mean_ms);
+ }
+ if (failures > 0) {
+ printf("WARN: %d CAST(s) failed.\n", failures);
+ return 1;
+ }
+ return 0;
+}
+
+#else /* !(HAVE_FIPS && FIPS_VERSION3_GE(7,0,0)) */
+
+#include
+
+int main(void)
+{
+#ifndef HAVE_FIPS
+ fprintf(stderr,
+ "fips_cast_bench: built without HAVE_FIPS - nothing to measure\n");
+#else
+ fprintf(stderr,
+ "fips_cast_bench: requires v7.0.0+ FIPS module "
+ "(wc_RunCast_fips / wc_RunAllCast_fips were added in v7) - "
+ "nothing to measure on this older module flavor\n");
+#endif
+ return 0;
+}
+
+#endif /* HAVE_FIPS && FIPS_VERSION3_GE(7,0,0) */
diff --git a/wolfcrypt/benchmark/include.am b/wolfcrypt/benchmark/include.am
index 22cecbdaefe..130343a14e1 100644
--- a/wolfcrypt/benchmark/include.am
+++ b/wolfcrypt/benchmark/include.am
@@ -10,6 +10,16 @@ wolfcrypt_benchmark_benchmark_LDADD = src/libwolfssl@LIBSUFFIX@.la $(LIB_
wolfcrypt_benchmark_benchmark_DEPENDENCIES = src/libwolfssl@LIBSUFFIX@.la
noinst_HEADERS += wolfcrypt/benchmark/benchmark.h
+# FIPS CAST benchmark - measures wc_RunCast_fips() execution time per CAST.
+# Helps operators of resource-constrained operational environments budget
+# module power-on latency. Compiled only when FIPS is enabled.
+if BUILD_FIPS
+noinst_PROGRAMS += wolfcrypt/benchmark/fips_cast_bench
+wolfcrypt_benchmark_fips_cast_bench_SOURCES = wolfcrypt/benchmark/fips_cast_bench.c
+wolfcrypt_benchmark_fips_cast_bench_LDADD = src/libwolfssl@LIBSUFFIX@.la $(LIB_STATIC_ADD) -lm
+wolfcrypt_benchmark_fips_cast_bench_DEPENDENCIES = src/libwolfssl@LIBSUFFIX@.la
+endif
+
endif
endif
diff --git a/wolfcrypt/src/aes.c b/wolfcrypt/src/aes.c
index 6806acbc965..95386f5f3d0 100644
--- a/wolfcrypt/src/aes.c
+++ b/wolfcrypt/src/aes.c
@@ -136,6 +136,16 @@ block cipher mechanism that uses n-bit binary string parameter key with 128-bits
#include
+/* The dedicated GCM (PCLMUL/GHASH) assembly is x86_64-only: the 32-bit GCM asm
+ * (aes_gcm_x86_asm.S) is not position-independent and its .text relocations break
+ * the FIPS module in-core integrity in a shared object. On 32-bit x86, GCM uses
+ * the portable-C GHASH with AES-NI block encryption (AES-NI still engaged for the
+ * counter-mode blocks) -- mirrors the existing x86_64-gating of the GCM-AVX path.
+ * Kaleb 2026-06-17; see memory 32bit-x86-aesni-enablement. */
+#if defined(WOLFSSL_AESNI) && defined(WOLFSSL_X86_64_BUILD)
+ #define WC_AESNI_GCM
+#endif
+
#ifdef WOLF_CRYPTO_CB
#include
#endif
@@ -913,6 +923,124 @@ static void Check_CPU_support_HwCrypto(Aes* aes)
}
#endif /* __aarch64__ && !WOLFSSL_ARMASM_NO_HW_CRYPTO */
+/* In a Linux kernel module the 32-bit ARM AES asm (ARMv8 AArch32 AES/PMULL crypto
+ * extension + NEON) MUST run between kernel_neon_begin()/end() or the first SIMD
+ * instruction faults "undefined instruction". wolfSSL never bracketed the
+ * in-kernel ARM crypto (it only happened to work on aarch64 because that kernel
+ * tolerates it). We wrap every AES_*_AARCH32 entry with SAVE/RESTORE_VECTOR_
+ * REGISTERS, then #define-redirect the call sites below. Defined before the
+ * #defines so the wrappers reference the real asm (no recursion). Scoped to
+ * !__aarch64__ so the aarch64 path is byte-identical. (FIPS 197 AES, SP 800-38D
+ * AES-GCM.) On a (process-context-only here) save failure the op is skipped
+ * rather than crashing -- never reached by POST/optest/harness. */
+#if defined(WOLFSSL_USE_SAVE_VECTOR_REGISTERS) && !defined(__aarch64__) && \
+ !defined(WOLFSSL_ARMASM_NO_HW_CRYPTO)
+ static WC_INLINE void wc_svr_AES_set_key_AARCH32(const byte* userKey,
+ int keylen, byte* key, int dir) {
+ if (SAVE_VECTOR_REGISTERS2() != 0) return;
+ AES_set_key_AARCH32(userKey, keylen, key, dir);
+ RESTORE_VECTOR_REGISTERS();
+ }
+ static WC_INLINE void wc_svr_AES_encrypt_AARCH32(const byte* inBlock,
+ byte* outBlock, byte* key, int nr) {
+ if (SAVE_VECTOR_REGISTERS2() != 0) return;
+ AES_encrypt_AARCH32(inBlock, outBlock, key, nr);
+ RESTORE_VECTOR_REGISTERS();
+ }
+ static WC_INLINE void wc_svr_AES_decrypt_AARCH32(const byte* inBlock,
+ byte* outBlock, byte* key, int nr) {
+ if (SAVE_VECTOR_REGISTERS2() != 0) return;
+ AES_decrypt_AARCH32(inBlock, outBlock, key, nr);
+ RESTORE_VECTOR_REGISTERS();
+ }
+ static WC_INLINE void wc_svr_AES_encrypt_blocks_AARCH32(const byte* in,
+ byte* out, word32 sz, byte* key, int nr) {
+ if (SAVE_VECTOR_REGISTERS2() != 0) return;
+ AES_encrypt_blocks_AARCH32(in, out, sz, key, nr);
+ RESTORE_VECTOR_REGISTERS();
+ }
+ static WC_INLINE void wc_svr_AES_decrypt_blocks_AARCH32(const byte* in,
+ byte* out, word32 sz, byte* key, int nr) {
+ if (SAVE_VECTOR_REGISTERS2() != 0) return;
+ AES_decrypt_blocks_AARCH32(in, out, sz, key, nr);
+ RESTORE_VECTOR_REGISTERS();
+ }
+ static WC_INLINE void wc_svr_AES_CBC_encrypt_AARCH32(const byte* in,
+ byte* out, word32 sz, byte* reg, byte* key, int rounds) {
+ if (SAVE_VECTOR_REGISTERS2() != 0) return;
+ AES_CBC_encrypt_AARCH32(in, out, sz, reg, key, rounds);
+ RESTORE_VECTOR_REGISTERS();
+ }
+ static WC_INLINE void wc_svr_AES_CBC_decrypt_AARCH32(const byte* in,
+ byte* out, word32 sz, byte* reg, byte* key, int rounds) {
+ if (SAVE_VECTOR_REGISTERS2() != 0) return;
+ AES_CBC_decrypt_AARCH32(in, out, sz, reg, key, rounds);
+ RESTORE_VECTOR_REGISTERS();
+ }
+ static WC_INLINE void wc_svr_AES_CTR_encrypt_AARCH32(const byte* in,
+ byte* out, word32 sz, byte* reg, byte* key, byte* tmp, word32* left,
+ word32 rounds) {
+ if (SAVE_VECTOR_REGISTERS2() != 0) return;
+ AES_CTR_encrypt_AARCH32(in, out, sz, reg, key, tmp, left, rounds);
+ RESTORE_VECTOR_REGISTERS();
+ }
+ static WC_INLINE void wc_svr_AES_GCM_set_key_AARCH32(const byte* nonce,
+ const byte* key, byte* gcm_h, int nr) {
+ if (SAVE_VECTOR_REGISTERS2() != 0) return;
+ AES_GCM_set_key_AARCH32(nonce, key, gcm_h, nr);
+ RESTORE_VECTOR_REGISTERS();
+ }
+ static WC_INLINE void wc_svr_AES_GCM_encrypt_AARCH32(const byte* in,
+ byte* out, word32 sz, const byte* nonce, word32 nonceSz, byte* tag,
+ word32 tagSz, const byte* aad, word32 aadSz, byte* key, byte* gcm_h,
+ byte* tmp, byte* reg, int nr) {
+ if (SAVE_VECTOR_REGISTERS2() != 0) return;
+ AES_GCM_encrypt_AARCH32(in, out, sz, nonce, nonceSz, tag, tagSz, aad,
+ aadSz, key, gcm_h, tmp, reg, nr);
+ RESTORE_VECTOR_REGISTERS();
+ }
+ static WC_INLINE int wc_svr_AES_GCM_decrypt_AARCH32(const byte* in,
+ byte* out, word32 sz, const byte* nonce, word32 nonceSz, const byte* tag,
+ word32 tagSz, const byte* aad, word32 aadSz, byte* key, byte* gcm_h,
+ byte* tmp, byte* reg, int nr) {
+ int _ret, _svr = SAVE_VECTOR_REGISTERS2();
+ if (_svr != 0) return _svr;
+ _ret = AES_GCM_decrypt_AARCH32(in, out, sz, nonce, nonceSz, tag, tagSz,
+ aad, aadSz, key, gcm_h, tmp, reg, nr);
+ RESTORE_VECTOR_REGISTERS();
+ return _ret;
+ }
+ #define AES_set_key_AARCH32 wc_svr_AES_set_key_AARCH32
+ #define AES_encrypt_AARCH32 wc_svr_AES_encrypt_AARCH32
+ #define AES_decrypt_AARCH32 wc_svr_AES_decrypt_AARCH32
+ #define AES_encrypt_blocks_AARCH32 wc_svr_AES_encrypt_blocks_AARCH32
+ #define AES_decrypt_blocks_AARCH32 wc_svr_AES_decrypt_blocks_AARCH32
+ #define AES_CBC_encrypt_AARCH32 wc_svr_AES_CBC_encrypt_AARCH32
+ #define AES_CBC_decrypt_AARCH32 wc_svr_AES_CBC_decrypt_AARCH32
+ #define AES_CTR_encrypt_AARCH32 wc_svr_AES_CTR_encrypt_AARCH32
+ #define AES_GCM_set_key_AARCH32 wc_svr_AES_GCM_set_key_AARCH32
+ #define AES_GCM_encrypt_AARCH32 wc_svr_AES_GCM_encrypt_AARCH32
+ #define AES_GCM_decrypt_AARCH32 wc_svr_AES_GCM_decrypt_AARCH32
+ #ifdef WOLFSSL_AES_XTS
+ static WC_INLINE void wc_svr_AES_XTS_encrypt_AARCH32(const byte* in,
+ byte* out, word32 sz, const byte* i, byte* key, byte* key2, byte* tmp,
+ int nr) {
+ if (SAVE_VECTOR_REGISTERS2() != 0) return;
+ AES_XTS_encrypt_AARCH32(in, out, sz, i, key, key2, tmp, nr);
+ RESTORE_VECTOR_REGISTERS();
+ }
+ static WC_INLINE void wc_svr_AES_XTS_decrypt_AARCH32(const byte* in,
+ byte* out, word32 sz, const byte* i, byte* key, byte* key2, byte* tmp,
+ int nr) {
+ if (SAVE_VECTOR_REGISTERS2() != 0) return;
+ AES_XTS_decrypt_AARCH32(in, out, sz, i, key, key2, tmp, nr);
+ RESTORE_VECTOR_REGISTERS();
+ }
+ #define AES_XTS_encrypt_AARCH32 wc_svr_AES_XTS_encrypt_AARCH32
+ #define AES_XTS_decrypt_AARCH32 wc_svr_AES_XTS_decrypt_AARCH32
+ #endif /* WOLFSSL_AES_XTS */
+#endif /* WOLFSSL_USE_SAVE_VECTOR_REGISTERS && !__aarch64__ && !NO_HW_CRYPTO */
+
#if defined(WOLFSSL_AES_DIRECT) || defined(HAVE_AESCCM) || \
defined(WOLFSSL_AESGCM_STREAM)
static WARN_UNUSED_RESULT int wc_AesEncrypt(Aes* aes, const byte* inBlock,
@@ -4571,6 +4699,14 @@ static WARN_UNUSED_RESULT int wc_AesDecrypt(Aes* aes, const byte* inBlock,
static int AesSetKey(Aes* aes, const byte* userKey, word32 keylen,
const byte* iv, int dir)
{
+ /* Reject invalid AES key lengths early (FIPS 197: 128/192/256 only).
+ * The lenient wc_AesSetKeyDirect entry only bounds-checks keylen, so
+ * without this a zero/invalid keylen would be accepted here on 32-bit ARM
+ * armasm -- the C software path rejects it in wc_AesSetKeyLocal, so match
+ * that ("check early and BAD_FUNC_ARG out"). */
+ if (userKey == NULL || (keylen != 16 && keylen != 24 && keylen != 32)) {
+ return BAD_FUNC_ARG;
+ }
#if defined(WOLFSSL_AES_COUNTER) || defined(WOLFSSL_AES_CFB) || \
defined(WOLFSSL_AES_OFB) || defined(WOLFSSL_AES_XTS) || \
defined(WOLFSSL_AES_CTS)
@@ -7751,8 +7887,16 @@ static WC_INLINE void IncrementGcmCounter(byte* inOutCtr)
#endif
#endif /* !FREESCALE_LTC_AES_GCM */
+/* SP 800-38D AES-GCM software GHASH (FlattenSzInBits length block + RIGHTSHIFTX +
+ * the GCM table GMULT/GHASH below). On 32-bit ARM with --enable-armasm the HW
+ * GCM only accelerates the one-shot path; the STREAMING GHASH (GHASH_INIT/UPDATE/
+ * FINAL) has no 32-bit asm implementation (only __aarch64__ + PMULL does) and so
+ * uses this software path. Therefore this block must also be compiled when
+ * WOLFSSL_AESGCM_STREAM is enabled, even for arm32 armasm HW-crypto -- otherwise
+ * GHASH_FINAL's fallback GHASH_LEN_BLOCK references an undefined FlattenSzInBits
+ * (arm64 already gets it via __aarch64__). */
#if !defined(WOLFSSL_ARMASM) || defined(__aarch64__) || \
- defined(WOLFSSL_ARMASM_NO_HW_CRYPTO)
+ defined(WOLFSSL_ARMASM_NO_HW_CRYPTO) || defined(WOLFSSL_AESGCM_STREAM)
#if defined(GCM_SMALL) || defined(GCM_TABLE) || defined(GCM_TABLE_4BIT)
static WC_INLINE void FlattenSzInBits(byte* buf, word32 sz)
@@ -7913,7 +8057,7 @@ void GenerateM0(Gcm* gcm)
#define HAVE_INTEL_AVX2
#endif
-#if defined(WOLFSSL_AESNI) && defined(GCM_TABLE_4BIT) && \
+#if defined(WC_AESNI_GCM) && defined(GCM_TABLE_4BIT) && \
defined(WC_C_DYNAMIC_FALLBACK)
void GCM_generate_m0_aesni(const unsigned char *h, unsigned char *m)
XASM_LINK("GCM_generate_m0_aesni");
@@ -8001,6 +8145,27 @@ int wc_AesGcmSetKey(Aes* aes, const byte* key, word32 len)
#ifndef WOLFSSL_ARMASM_NO_HW_CRYPTO
#if !defined(__aarch64__)
AES_GCM_set_key_AARCH32(iv, (byte*)aes->key, aes->gcm.H, aes->rounds);
+ #if defined(GCM_TABLE) || defined(GCM_TABLE_4BIT)
+ {
+ /* The 32-bit ARM asm GCM keeps gcm->H in the PMULL (bit-reflected)
+ * field representation for its one-shot GHASH (vmull.p64). The C
+ * STREAMING GHASH (wc_AesGcmEncrypt/DecryptUpdate/Final) instead uses
+ * the gcm->M0 table, which GenerateM0() builds from the *standard*
+ * H = E_K(0) byte order -- not derivable from the PMULL H. Recompute
+ * the standard H with the standard AES block encrypt into gcm->H,
+ * build M0 from it, then restore the PMULL H for the one-shot path.
+ * Without this, streaming AES-GCM mis-authenticates (AES_GCM_AUTH_E).
+ * SP 800-38D AES-GCM GHASH. */
+ ALIGN16 byte gcmStdH[WC_AES_BLOCK_SIZE];
+ ALIGN16 byte gcmZero[WC_AES_BLOCK_SIZE];
+ XMEMSET(gcmZero, 0, WC_AES_BLOCK_SIZE);
+ XMEMCPY(gcmStdH, aes->gcm.H, WC_AES_BLOCK_SIZE);
+ AES_encrypt_AARCH32(gcmZero, aes->gcm.H, (byte*)aes->key,
+ (int)aes->rounds);
+ GenerateM0(&aes->gcm);
+ XMEMCPY(aes->gcm.H, gcmStdH, WC_AES_BLOCK_SIZE);
+ }
+ #endif
#else
if (aes->use_aes_hw_crypto && aes->use_pmull_hw_crypto) {
AES_GCM_set_key_AARCH64(iv, (byte*)aes->key, aes->gcm.H,
@@ -8043,7 +8208,7 @@ int wc_AesGcmSetKey(Aes* aes, const byte* key, word32 len)
if (ret == 0) {
#if defined(GCM_TABLE) || defined(GCM_TABLE_4BIT)
- #if defined(WOLFSSL_AESNI) && defined(GCM_TABLE_4BIT)
+ #if defined(WC_AESNI_GCM) && defined(GCM_TABLE_4BIT)
if (aes->use_aesni) {
#if defined(WC_C_DYNAMIC_FALLBACK)
#ifdef HAVE_INTEL_AVX2
@@ -8104,7 +8269,8 @@ int wc_AesGcmSetKey(Aes* aes, const byte* key, word32 len)
}
-#ifdef WOLFSSL_AESNI
+
+#ifdef WC_AESNI_GCM
void AES_GCM_encrypt_aesni(const unsigned char *in, unsigned char *out,
const unsigned char* addt, const unsigned char* ivec,
@@ -8158,8 +8324,14 @@ void AES_GCM_decrypt_avx2(const unsigned char *in, unsigned char *out,
#endif /* WOLFSSL_AESNI */
+/* SP 800-38D software GHASH (GMULT / GHASH / GHASH_ONE_BLOCK_SW per GCM table mode).
+ * As with FlattenSzInBits above, 32-bit ARM --enable-armasm has no assembly STREAMING
+ * GHASH (only __aarch64__ + PMULL does), so when WOLFSSL_AESGCM_STREAM is enabled the
+ * streaming GHASH_INIT/UPDATE/FINAL use this software path and need these symbols
+ * compiled even for arm32 armasm HW-crypto. Widen the guard accordingly (no effect on
+ * x86 / arm64, which already satisfy it -> their in-core hash is unchanged). */
#if !defined(WOLFSSL_ARMASM) || defined(__aarch64__) || \
- defined(WOLFSSL_ARMASM_NO_HW_CRYPTO)
+ defined(WOLFSSL_ARMASM_NO_HW_CRYPTO) || defined(WOLFSSL_AESGCM_STREAM)
#if defined(GCM_SMALL)
static void GMULT(byte* X, byte* Y)
{
@@ -10533,7 +10705,7 @@ int wc_AesGcmEncrypt(Aes* aes, byte* out, const byte* in, word32 sz,
ret = AES_GCM_encrypt_ASM(aes, out, in, sz, iv, ivSz, authTag, authTagSz,
authIn, authInSz);
#else
-#ifdef WOLFSSL_AESNI
+#ifdef WC_AESNI_GCM
if (aes->use_aesni) {
#ifdef HAVE_INTEL_AVX2
if (IS_INTEL_AVX2(intel_flags)) {
@@ -11145,7 +11317,7 @@ int wc_AesGcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz,
const byte* authIn, word32 authInSz)
{
int ret;
-#ifdef WOLFSSL_AESNI
+#ifdef WC_AESNI_GCM
int res = WC_NO_ERR_TRACE(AES_GCM_AUTH_E);
#endif
@@ -11291,7 +11463,7 @@ int wc_AesGcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz,
authTagSz, authIn, authInSz);
}
#else
-#ifdef WOLFSSL_AESNI
+#ifdef WC_AESNI_GCM
if (aes->use_aesni) {
#ifdef HAVE_INTEL_AVX2
if (IS_INTEL_AVX2(intel_flags)) {
@@ -11334,6 +11506,16 @@ int wc_AesGcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz,
VECTOR_REGISTERS_POP;
+ /* FIPS 140-3 / SP 800-38D: on authentication failure, the decrypted-but-
+ * unauthenticated plaintext in `out` must not be released to the caller.
+ * Wipe it here so a caller that ignores the return value cannot observe
+ * plaintext derived from forged ciphertext. All software paths (AES-NI,
+ * AVX1/2, ARM HW/NEON, C fallback) funnel through `ret` here, so this
+ * single guard covers every sub-implementation. */
+ if (ret == WC_NO_ERR_TRACE(AES_GCM_AUTH_E) && out != NULL && sz > 0) {
+ ForceZero(out, sz);
+ }
+
return ret;
}
#endif
@@ -11502,7 +11684,7 @@ static WARN_UNUSED_RESULT int AesGcmFinal_C(
return 0;
}
-#ifdef WOLFSSL_AESNI
+#ifdef WC_AESNI_GCM
#ifdef __cplusplus
extern "C" {
@@ -12705,7 +12887,7 @@ int wc_AesGcmInit(Aes* aes, const byte* key, word32 len, const byte* iv,
if (iv != NULL) {
/* Initialize with the IV. */
- #ifdef WOLFSSL_AESNI
+ #ifdef WC_AESNI_GCM
if (aes->use_aesni) {
SAVE_VECTOR_REGISTERS(return _svr_ret;);
ret = AesGcmInit_aesni(aes, iv, ivSz);
@@ -12832,7 +13014,7 @@ int wc_AesGcmEncryptUpdate(Aes* aes, byte* out, const byte* in, word32 sz,
if (ret == 0) {
/* Encrypt with AAD and/or plaintext. */
- #ifdef WOLFSSL_AESNI
+ #ifdef WC_AESNI_GCM
if (aes->use_aesni) {
SAVE_VECTOR_REGISTERS(return _svr_ret;);
ret = AesGcmEncryptUpdate_aesni(aes, out, in, sz, authIn, authInSz);
@@ -12892,7 +13074,7 @@ int wc_AesGcmEncryptFinal(Aes* aes, byte* authTag, word32 authTagSz)
if (ret == 0) {
/* Calculate authentication tag. */
- #ifdef WOLFSSL_AESNI
+ #ifdef WC_AESNI_GCM
if (aes->use_aesni) {
SAVE_VECTOR_REGISTERS(return _svr_ret;);
ret = AesGcmEncryptFinal_aesni(aes, authTag, authTagSz);
@@ -12976,7 +13158,7 @@ int wc_AesGcmDecryptUpdate(Aes* aes, byte* out, const byte* in, word32 sz,
if (ret == 0) {
/* Decrypt with AAD and/or cipher text. */
- #ifdef WOLFSSL_AESNI
+ #ifdef WC_AESNI_GCM
if (aes->use_aesni) {
SAVE_VECTOR_REGISTERS(return _svr_ret;);
ret = AesGcmDecryptUpdate_aesni(aes, out, in, sz, authIn, authInSz);
@@ -13034,7 +13216,7 @@ int wc_AesGcmDecryptFinal(Aes* aes, const byte* authTag, word32 authTagSz)
if (ret == 0) {
/* Calculate authentication tag and compare with one passed in.. */
- #ifdef WOLFSSL_AESNI
+ #ifdef WC_AESNI_GCM
if (aes->use_aesni) {
SAVE_VECTOR_REGISTERS(return _svr_ret;);
ret = AesGcmDecryptFinal_aesni(aes, authTag, authTagSz);
@@ -13061,6 +13243,10 @@ int wc_AesGcmDecryptFinal(Aes* aes, const byte* authTag, word32 authTagSz)
}
}
+ /* Streaming decrypt cannot zeroize prior Update output buffers from here
+ * (Final does not see them). On AES_GCM_AUTH_E, the caller is responsible
+ * for treating all Update-produced plaintext as invalid and wiping it.
+ * See PL-R34 Security Policy section 8 (Operational Rules). */
return ret;
}
#endif /* HAVE_AES_DECRYPT || HAVE_AESGCM_DECRYPT */
diff --git a/wolfcrypt/src/aes_asm.S b/wolfcrypt/src/aes_asm.S
index 0371ca8cb22..3ee9983b20b 100644
--- a/wolfcrypt/src/aes_asm.S
+++ b/wolfcrypt/src/aes_asm.S
@@ -1831,11 +1831,16 @@ _AES_ECB_decrypt_AESNI:
push %edi
push %esi
push %ebx
- movl 20(%esp), %edi
- movl 24(%esp), %esi
- movl 28(%esp), %edx
- movl 32(%esp), %ecx
- movl 36(%esp), %eax
+ # 3 pushes above add 12 bytes; args (return addr at 0) are therefore at
+ # 16/20/24/28/32(%esp), matching AES_ECB_encrypt_AESNI. The original
+ # 32-bit decrypt used 20/24/28/32/36 (off by 4 -- offsets for 4 pushes),
+ # so it read the args shifted by one and dereferenced garbage -> segfault.
+ # This path was never exercised until 32-bit AES-NI was enabled 2026-06-17.
+ movl 16(%esp), %edi
+ movl 20(%esp), %esi
+ movl 24(%esp), %edx
+ movl 28(%esp), %ecx
+ movl 32(%esp), %eax
movl %edx, %ebx
diff --git a/wolfcrypt/src/aes_xts_asm.S b/wolfcrypt/src/aes_xts_asm.S
index 09045c6d8f7..ded430c211d 100644
--- a/wolfcrypt/src/aes_xts_asm.S
+++ b/wolfcrypt/src/aes_xts_asm.S
@@ -2786,6 +2786,518 @@ L_AES_XTS_decrypt_update_avx1_done_dec:
#endif /* __APPLE__ */
#endif /* HAVE_INTEL_AVX1 */
#endif /* WOLFSSL_X86_64_BUILD */
+#ifdef WOLFSSL_X86_BUILD
+/* =========================================================================
+ * 32-bit Intel (i386) AES-NI AES-XTS. Single-block ports of the x86_64
+ * AES_XTS_*_aesni routines above: SAME algorithm and SAME KAT output, but using
+ * only xmm0-xmm7 and the i386 cdecl stack ABI. The x86_64 versions use xmm8-15
+ * and r8-r15 (none available in 32-bit) and a 4-block pipeline; this port drops
+ * the pipeline and processes one block at a time, which is correct and still
+ * AES-NI-accelerated. Added 2026-06-17 so 32-bit Intel AES-NI builds
+ * (host_cpu=x86 -> WOLFSSL_X86_BUILD) link + run AES-XTS, matching x86_64. The
+ * x86_64 section above is byte-for-byte unchanged.
+ *
+ * The GF(2^128) constant {0x87,1,1,1} is materialized on the stack (PIC-safe; a
+ * 32-bit shared object would otherwise need a GOT relocation to reach .data).
+ *
+ * Conventions: %ebp = frame pointer, %edi = running byte offset.
+ * xmm0 = tweak T xmm1 = data block xmm2,xmm3 = round-key scratch
+ * xmm4 = GF scratch xmm5 = tweak' (decrypt CTS) xmm6 = GF const xmm7 = CTS buf
+ * ========================================================================= */
+
+/* Encrypt the block in \blk with expanded key whose base is GP reg \k; the AES
+ * round count is read from memory operand \nr. Clobbers xmm2,xmm3. */
+.macro AESENC_BLK k, nr, blk
+ pxor (\k), \blk
+ movdqu 16(\k), %xmm2
+ aesenc %xmm2, \blk
+ movdqu 32(\k), %xmm2
+ aesenc %xmm2, \blk
+ movdqu 48(\k), %xmm2
+ aesenc %xmm2, \blk
+ movdqu 64(\k), %xmm2
+ aesenc %xmm2, \blk
+ movdqu 80(\k), %xmm2
+ aesenc %xmm2, \blk
+ movdqu 96(\k), %xmm2
+ aesenc %xmm2, \blk
+ movdqu 112(\k), %xmm2
+ aesenc %xmm2, \blk
+ movdqu 128(\k), %xmm2
+ aesenc %xmm2, \blk
+ movdqu 144(\k), %xmm2
+ aesenc %xmm2, \blk
+ cmpl $11, \nr
+ movdqu 160(\k), %xmm2
+ jl .L_encblk_last\@
+ aesenc %xmm2, \blk
+ movdqu 176(\k), %xmm3
+ aesenc %xmm3, \blk
+ cmpl $13, \nr
+ movdqu 192(\k), %xmm2
+ jl .L_encblk_last\@
+ aesenc %xmm2, \blk
+ movdqu 208(\k), %xmm3
+ aesenc %xmm3, \blk
+ movdqu 224(\k), %xmm2
+.L_encblk_last\@:
+ aesenclast %xmm2, \blk
+.endm
+
+/* Decrypt the block in \blk with inverse-cipher key base \k, rounds at \nr. */
+.macro AESDEC_BLK k, nr, blk
+ pxor (\k), \blk
+ movdqu 16(\k), %xmm2
+ aesdec %xmm2, \blk
+ movdqu 32(\k), %xmm2
+ aesdec %xmm2, \blk
+ movdqu 48(\k), %xmm2
+ aesdec %xmm2, \blk
+ movdqu 64(\k), %xmm2
+ aesdec %xmm2, \blk
+ movdqu 80(\k), %xmm2
+ aesdec %xmm2, \blk
+ movdqu 96(\k), %xmm2
+ aesdec %xmm2, \blk
+ movdqu 112(\k), %xmm2
+ aesdec %xmm2, \blk
+ movdqu 128(\k), %xmm2
+ aesdec %xmm2, \blk
+ movdqu 144(\k), %xmm2
+ aesdec %xmm2, \blk
+ cmpl $11, \nr
+ movdqu 160(\k), %xmm2
+ jl .L_decblk_last\@
+ aesdec %xmm2, \blk
+ movdqu 176(\k), %xmm3
+ aesdec %xmm3, \blk
+ cmpl $13, \nr
+ movdqu 192(\k), %xmm2
+ jl .L_decblk_last\@
+ aesdec %xmm2, \blk
+ movdqu 208(\k), %xmm3
+ aesdec %xmm3, \blk
+ movdqu 224(\k), %xmm2
+.L_decblk_last\@:
+ aesdeclast %xmm2, \blk
+.endm
+
+/* \dst = \src doubled in GF(2^128) (XTS tweak * alpha). xmm6 = {0x87,1,1,1}. */
+.macro TWEAKDBL dst, src
+ movdqa \src, %xmm4
+.ifnc \dst,\src
+ movdqa \src, \dst
+.endif
+ psrad $31, %xmm4
+ pslld $1, \dst
+ pshufd $0x93, %xmm4, %xmm4
+ pand %xmm6, %xmm4
+ pxor %xmm4, \dst
+.endm
+
+/* Build {0x87,1,1,1} into xmm6 using the 16-byte stack scratch at (%esp). */
+.macro GF_CONST
+ movl $0x87, (%esp)
+ movl $1, 4(%esp)
+ movl $1, 8(%esp)
+ movl $1, 12(%esp)
+ movdqu (%esp), %xmm6
+.endm
+
+/* void AES_XTS_init_aesni(unsigned char* i, const unsigned char* tweak_key,
+ * int tweak_nr); */
+#ifndef __APPLE__
+.text
+.globl AES_XTS_init_aesni
+.type AES_XTS_init_aesni,@function
+.align 16
+AES_XTS_init_aesni:
+#else
+.section __TEXT,__text
+.globl _AES_XTS_init_aesni
+.p2align 4
+_AES_XTS_init_aesni:
+#endif /* __APPLE__ */
+ push %ebp
+ movl %esp, %ebp
+ movl 8(%ebp), %eax /* i */
+ movdqu (%eax), %xmm1
+ movl 12(%ebp), %ecx /* tweak_key */
+ AESENC_BLK %ecx, 16(%ebp), %xmm1 /* nr = tweak_nr */
+ movl 8(%ebp), %eax
+ movdqu %xmm1, (%eax)
+ pop %ebp
+ ret
+#ifndef __APPLE__
+.size AES_XTS_init_aesni,.-AES_XTS_init_aesni
+#endif /* __APPLE__ */
+
+/* void AES_XTS_encrypt_aesni(const unsigned char* in, unsigned char* out,
+ * word32 sz, const unsigned char* i, const unsigned char* key,
+ * const unsigned char* key2, int nr);
+ * args: 8=in 12=out 16=sz 20=i 24=key 28=key2 32=nr */
+#ifndef __APPLE__
+.text
+.globl AES_XTS_encrypt_aesni
+.type AES_XTS_encrypt_aesni,@function
+.align 16
+AES_XTS_encrypt_aesni:
+#else
+.section __TEXT,__text
+.globl _AES_XTS_encrypt_aesni
+.p2align 4
+_AES_XTS_encrypt_aesni:
+#endif /* __APPLE__ */
+ push %ebp
+ movl %esp, %ebp
+ push %ebx
+ push %esi
+ push %edi
+ subl $32, %esp
+ GF_CONST
+ /* T = AES_key2(i) */
+ movl 20(%ebp), %eax
+ movdqu (%eax), %xmm0
+ movl 28(%ebp), %ecx
+ AESENC_BLK %ecx, 32(%ebp), %xmm0
+ xorl %edi, %edi /* offset */
+ movl 16(%ebp), %edx
+ andl $0xfffffff0, %edx /* full = sz & ~15 (encrypt all full blocks) */
+.L_xe_loop:
+ cmpl %edx, %edi
+ jge .L_xe_loop_done
+ movl 8(%ebp), %eax
+ movdqu (%eax,%edi), %xmm1
+ pxor %xmm0, %xmm1
+ movl 24(%ebp), %ecx
+ AESENC_BLK %ecx, 32(%ebp), %xmm1
+ pxor %xmm0, %xmm1
+ movl 12(%ebp), %eax
+ movdqu %xmm1, (%eax,%edi)
+ TWEAKDBL %xmm0, %xmm0
+ addl $16, %edi
+ jmp .L_xe_loop
+.L_xe_loop_done:
+ movl 16(%ebp), %eax
+ cmpl %eax, %edi
+ je .L_xe_done /* exact multiple of 16 -> no CTS */
+ /* ciphertext stealing: steal from the last full ciphertext block */
+ subl $16, %edi
+ movl 12(%ebp), %eax
+ movdqu (%eax,%edi), %xmm7
+ addl $16, %edi
+ movdqu %xmm7, (%esp) /* buf = C_{m-1} */
+ xorl %edx, %edx /* j */
+.L_xe_cts:
+ movzbl (%esp,%edx), %ecx /* tmp1 = buf[j] */
+ movl 8(%ebp), %esi
+ movzbl (%esi,%edi), %ebx /* tmp2 = in[off] */
+ movl 12(%ebp), %esi
+ movb %cl, (%esi,%edi) /* out[off] = tmp1 */
+ movb %bl, (%esp,%edx) /* buf[j] = tmp2 */
+ incl %edi
+ incl %edx
+ movl 16(%ebp), %eax
+ cmpl %eax, %edi
+ jl .L_xe_cts
+ subl %edx, %edi /* off = m*16 */
+ movdqu (%esp), %xmm1
+ subl $16, %edi /* off = (m-1)*16 */
+ pxor %xmm0, %xmm1
+ movl 24(%ebp), %ecx
+ AESENC_BLK %ecx, 32(%ebp), %xmm1
+ pxor %xmm0, %xmm1
+ movl 12(%ebp), %eax
+ movdqu %xmm1, (%eax,%edi)
+.L_xe_done:
+ addl $32, %esp
+ pop %edi
+ pop %esi
+ pop %ebx
+ pop %ebp
+ ret
+#ifndef __APPLE__
+.size AES_XTS_encrypt_aesni,.-AES_XTS_encrypt_aesni
+#endif /* __APPLE__ */
+
+/* void AES_XTS_encrypt_update_aesni(const unsigned char* in, unsigned char* out,
+ * word32 sz, const unsigned char* key, unsigned char* i, int nr);
+ * args: 8=in 12=out 16=sz 20=key 24=i 28=nr. Tweak is read from *i (already
+ * encrypted) and the advanced tweak written back to *i. */
+#ifndef __APPLE__
+.text
+.globl AES_XTS_encrypt_update_aesni
+.type AES_XTS_encrypt_update_aesni,@function
+.align 16
+AES_XTS_encrypt_update_aesni:
+#else
+.section __TEXT,__text
+.globl _AES_XTS_encrypt_update_aesni
+.p2align 4
+_AES_XTS_encrypt_update_aesni:
+#endif /* __APPLE__ */
+ push %ebp
+ movl %esp, %ebp
+ push %ebx
+ push %esi
+ push %edi
+ subl $32, %esp
+ GF_CONST
+ movl 24(%ebp), %eax /* i (tweak, already encrypted) */
+ movdqu (%eax), %xmm0
+ xorl %edi, %edi
+ movl 16(%ebp), %edx
+ andl $0xfffffff0, %edx
+.L_xeu_loop:
+ cmpl %edx, %edi
+ jge .L_xeu_loop_done
+ movl 8(%ebp), %eax
+ movdqu (%eax,%edi), %xmm1
+ pxor %xmm0, %xmm1
+ movl 20(%ebp), %ecx
+ AESENC_BLK %ecx, 28(%ebp), %xmm1
+ pxor %xmm0, %xmm1
+ movl 12(%ebp), %eax
+ movdqu %xmm1, (%eax,%edi)
+ TWEAKDBL %xmm0, %xmm0
+ addl $16, %edi
+ jmp .L_xeu_loop
+.L_xeu_loop_done:
+ movl 16(%ebp), %eax
+ cmpl %eax, %edi
+ je .L_xeu_done
+ subl $16, %edi
+ movl 12(%ebp), %eax
+ movdqu (%eax,%edi), %xmm7
+ addl $16, %edi
+ movdqu %xmm7, (%esp)
+ xorl %edx, %edx
+.L_xeu_cts:
+ movzbl (%esp,%edx), %ecx
+ movl 8(%ebp), %esi
+ movzbl (%esi,%edi), %ebx
+ movl 12(%ebp), %esi
+ movb %cl, (%esi,%edi)
+ movb %bl, (%esp,%edx)
+ incl %edi
+ incl %edx
+ movl 16(%ebp), %eax
+ cmpl %eax, %edi
+ jl .L_xeu_cts
+ subl %edx, %edi
+ movdqu (%esp), %xmm1
+ subl $16, %edi
+ pxor %xmm0, %xmm1
+ movl 20(%ebp), %ecx
+ AESENC_BLK %ecx, 28(%ebp), %xmm1
+ pxor %xmm0, %xmm1
+ movl 12(%ebp), %eax
+ movdqu %xmm1, (%eax,%edi)
+.L_xeu_done:
+ movl 24(%ebp), %eax /* write advanced tweak back to *i */
+ movdqu %xmm0, (%eax)
+ addl $32, %esp
+ pop %edi
+ pop %esi
+ pop %ebx
+ pop %ebp
+ ret
+#ifndef __APPLE__
+.size AES_XTS_encrypt_update_aesni,.-AES_XTS_encrypt_update_aesni
+#endif /* __APPLE__ */
+
+/* void AES_XTS_decrypt_aesni(const unsigned char* in, unsigned char* out,
+ * word32 sz, const unsigned char* i, const unsigned char* key,
+ * const unsigned char* key2, int nr);
+ * args: 8=in 12=out 16=sz 20=i 24=key 28=key2 32=nr */
+#ifndef __APPLE__
+.text
+.globl AES_XTS_decrypt_aesni
+.type AES_XTS_decrypt_aesni,@function
+.align 16
+AES_XTS_decrypt_aesni:
+#else
+.section __TEXT,__text
+.globl _AES_XTS_decrypt_aesni
+.p2align 4
+_AES_XTS_decrypt_aesni:
+#endif /* __APPLE__ */
+ push %ebp
+ movl %esp, %ebp
+ push %ebx
+ push %esi
+ push %edi
+ subl $32, %esp
+ GF_CONST
+ /* T = AES_key2(i) (tweak is ENCRYPTED, even for decrypt) */
+ movl 20(%ebp), %eax
+ movdqu (%eax), %xmm0
+ movl 28(%ebp), %ecx
+ AESENC_BLK %ecx, 32(%ebp), %xmm0
+ xorl %edi, %edi
+ movl 16(%ebp), %eax
+ movl %eax, %edx
+ andl $0xfffffff0, %edx /* full */
+ cmpl %eax, %edx
+ je .L_xd_bound /* exact multiple -> bound = full */
+ subl $16, %edx /* partial -> reserve last full block */
+.L_xd_bound:
+.L_xd_loop:
+ cmpl %edx, %edi
+ jge .L_xd_loop_done
+ movl 8(%ebp), %eax
+ movdqu (%eax,%edi), %xmm1
+ pxor %xmm0, %xmm1
+ movl 24(%ebp), %ecx
+ AESDEC_BLK %ecx, 32(%ebp), %xmm1
+ pxor %xmm0, %xmm1
+ movl 12(%ebp), %eax
+ movdqu %xmm1, (%eax,%edi)
+ TWEAKDBL %xmm0, %xmm0
+ addl $16, %edi
+ jmp .L_xd_loop
+.L_xd_loop_done:
+ movl 16(%ebp), %eax
+ cmpl %eax, %edi
+ je .L_xd_done /* no partial */
+ /* last_31 CTS: T' = 2T (xmm5); decrypt last full block with T' */
+ TWEAKDBL %xmm5, %xmm0
+ movl 8(%ebp), %eax
+ movdqu (%eax,%edi), %xmm1
+ pxor %xmm5, %xmm1
+ movl 24(%ebp), %ecx
+ AESDEC_BLK %ecx, 32(%ebp), %xmm1
+ pxor %xmm5, %xmm1
+ movdqu %xmm1, (%esp) /* buf */
+ addl $16, %edi
+ xorl %edx, %edx
+.L_xd_cts:
+ movzbl (%esp,%edx), %ecx
+ movl 8(%ebp), %esi
+ movzbl (%esi,%edi), %ebx
+ movl 12(%ebp), %esi
+ movb %cl, (%esi,%edi)
+ movb %bl, (%esp,%edx)
+ incl %edi
+ incl %edx
+ movl 16(%ebp), %eax
+ cmpl %eax, %edi
+ jl .L_xd_cts
+ subl %edx, %edi /* off = m*16 */
+ movdqu (%esp), %xmm1
+ pxor %xmm0, %xmm1 /* ^ T (current) */
+ movl 24(%ebp), %ecx
+ AESDEC_BLK %ecx, 32(%ebp), %xmm1
+ pxor %xmm0, %xmm1
+ subl $16, %edi /* off = (m-1)*16 */
+ movl 12(%ebp), %eax
+ movdqu %xmm1, (%eax,%edi)
+.L_xd_done:
+ addl $32, %esp
+ pop %edi
+ pop %esi
+ pop %ebx
+ pop %ebp
+ ret
+#ifndef __APPLE__
+.size AES_XTS_decrypt_aesni,.-AES_XTS_decrypt_aesni
+#endif /* __APPLE__ */
+
+/* void AES_XTS_decrypt_update_aesni(const unsigned char* in, unsigned char* out,
+ * word32 sz, const unsigned char* key, unsigned char* i, int nr);
+ * args: 8=in 12=out 16=sz 20=key 24=i 28=nr. Tweak from *i, advanced back to *i. */
+#ifndef __APPLE__
+.text
+.globl AES_XTS_decrypt_update_aesni
+.type AES_XTS_decrypt_update_aesni,@function
+.align 16
+AES_XTS_decrypt_update_aesni:
+#else
+.section __TEXT,__text
+.globl _AES_XTS_decrypt_update_aesni
+.p2align 4
+_AES_XTS_decrypt_update_aesni:
+#endif /* __APPLE__ */
+ push %ebp
+ movl %esp, %ebp
+ push %ebx
+ push %esi
+ push %edi
+ subl $32, %esp
+ GF_CONST
+ movl 24(%ebp), %eax /* i (tweak) */
+ movdqu (%eax), %xmm0
+ xorl %edi, %edi
+ movl 16(%ebp), %eax
+ movl %eax, %edx
+ andl $0xfffffff0, %edx
+ cmpl %eax, %edx
+ je .L_xdu_bound
+ subl $16, %edx
+.L_xdu_bound:
+.L_xdu_loop:
+ cmpl %edx, %edi
+ jge .L_xdu_loop_done
+ movl 8(%ebp), %eax
+ movdqu (%eax,%edi), %xmm1
+ pxor %xmm0, %xmm1
+ movl 20(%ebp), %ecx
+ AESDEC_BLK %ecx, 28(%ebp), %xmm1
+ pxor %xmm0, %xmm1
+ movl 12(%ebp), %eax
+ movdqu %xmm1, (%eax,%edi)
+ TWEAKDBL %xmm0, %xmm0
+ addl $16, %edi
+ jmp .L_xdu_loop
+.L_xdu_loop_done:
+ movl 16(%ebp), %eax
+ cmpl %eax, %edi
+ je .L_xdu_done
+ TWEAKDBL %xmm5, %xmm0
+ movl 8(%ebp), %eax
+ movdqu (%eax,%edi), %xmm1
+ pxor %xmm5, %xmm1
+ movl 20(%ebp), %ecx
+ AESDEC_BLK %ecx, 28(%ebp), %xmm1
+ pxor %xmm5, %xmm1
+ movdqu %xmm1, (%esp)
+ addl $16, %edi
+ xorl %edx, %edx
+.L_xdu_cts:
+ movzbl (%esp,%edx), %ecx
+ movl 8(%ebp), %esi
+ movzbl (%esi,%edi), %ebx
+ movl 12(%ebp), %esi
+ movb %cl, (%esi,%edi)
+ movb %bl, (%esp,%edx)
+ incl %edi
+ incl %edx
+ movl 16(%ebp), %eax
+ cmpl %eax, %edi
+ jl .L_xdu_cts
+ subl %edx, %edi
+ movdqu (%esp), %xmm1
+ pxor %xmm0, %xmm1
+ movl 20(%ebp), %ecx
+ AESDEC_BLK %ecx, 28(%ebp), %xmm1
+ pxor %xmm0, %xmm1
+ subl $16, %edi
+ movl 12(%ebp), %eax
+ movdqu %xmm1, (%eax,%edi)
+.L_xdu_done:
+ movl 24(%ebp), %eax
+ movdqu %xmm0, (%eax)
+ addl $32, %esp
+ pop %edi
+ pop %esi
+ pop %ebx
+ pop %ebp
+ ret
+#ifndef __APPLE__
+.size AES_XTS_decrypt_update_aesni,.-AES_XTS_decrypt_update_aesni
+#endif /* __APPLE__ */
+
+#endif /* WOLFSSL_X86_BUILD */
#endif /* WOLFSSL_AES_XTS */
#if defined(__linux__) && defined(__ELF__)
diff --git a/wolfcrypt/src/cpuid.c b/wolfcrypt/src/cpuid.c
index 2c3670234a6..6b5335ad47d 100644
--- a/wolfcrypt/src/cpuid.c
+++ b/wolfcrypt/src/cpuid.c
@@ -75,6 +75,22 @@
#define cpuid(a,b,c) __cpuidex((int*)a,b,c)
#endif /* _MSC_VER */
+ /* On the 32-bit x86 Linux kernel (WOLFSSL_LINUXKM + WOLFSSL_X86_BUILD), the
+ * UAPI header -- pulled in transitively via
+ * processor.h -> math_emu.h -> ptrace.h on i386 only -- already #defines
+ * EAX/EBX/ECX/EDX as ptrace register *indices* with DIFFERENT values
+ * (EAX=6, EBX=0, ECX=1, EDX=2). We use these names as cpuid()-result
+ * array indices (0..3), so the collision is a real bug, not a cosmetic
+ * -Werror redefinition: leaving the kernel's values in place would index
+ * reg[6] (past "unsigned int reg[5]") and mis-compare the vendor string.
+ * #undef first so our indices win. This is a no-op where the names are
+ * not predefined (x86_64 kernel uses a processor.h chain without
+ * math_emu.h, and all user-space builds), so generated code off
+ * i386-kernel is byte-identical. */
+ #undef EAX
+ #undef EBX
+ #undef ECX
+ #undef EDX
#define EAX 0
#define EBX 1
#define ECX 2
diff --git a/wolfcrypt/src/dh.c b/wolfcrypt/src/dh.c
index 3e9abe3d869..378c5544b72 100644
--- a/wolfcrypt/src/dh.c
+++ b/wolfcrypt/src/dh.c
@@ -1422,8 +1422,20 @@ int wc_DhGeneratePublic(DhKey* key, byte* priv, word32 privSz,
#if FIPS_VERSION_GE(5,0) || defined(WOLFSSL_VALIDATE_DH_KEYGEN)
if (ret == 0)
ret = _ffc_validate_public_key(key, pub, *pubSz, NULL, 0, 0);
- if (ret == 0)
- ret = _ffc_pairwise_consistency_test(key, pub, *pubSz, priv, privSz);
+ if (ret == 0) {
+ /* Pairwise Consistency Test per SP 800-56A r3 sec 5.6.2.1.4
+ * (FFC key pair). FIPS 140-3 IG 10.3.B requires a PCT after
+ * KeyGen for key-establishment algorithms; on failure under a
+ * FIPS build the error is remapped to DH_PCT_E so the FIPS
+ * module's DEGRADE_STATE handler transitions FIPS_CAST_DH_
+ * PRIMITIVE_Z to the error state. */
+ ret = _ffc_pairwise_consistency_test(key, pub, *pubSz, priv,
+ privSz);
+ #ifdef HAVE_FIPS
+ if (ret != 0)
+ ret = DH_PCT_E;
+ #endif
+ }
#endif /* FIPS V5 or later || WOLFSSL_VALIDATE_DH_KEYGEN */
return ret;
@@ -1446,8 +1458,20 @@ static int wc_DhGenerateKeyPair_Sync(DhKey* key, WC_RNG* rng,
#if FIPS_VERSION_GE(5,0) || defined(WOLFSSL_VALIDATE_DH_KEYGEN)
if (ret == 0)
ret = _ffc_validate_public_key(key, pub, *pubSz, NULL, 0, 0);
- if (ret == 0)
- ret = _ffc_pairwise_consistency_test(key, pub, *pubSz, priv, *privSz);
+ if (ret == 0) {
+ /* Pairwise Consistency Test per SP 800-56A r3 sec 5.6.2.1.4
+ * (FFC key pair). FIPS 140-3 IG 10.3.B requires a PCT after
+ * KeyGen for key-establishment algorithms; on failure under a
+ * FIPS build the error is remapped to DH_PCT_E so the FIPS
+ * module's DEGRADE_STATE handler transitions FIPS_CAST_DH_
+ * PRIMITIVE_Z to the error state. */
+ ret = _ffc_pairwise_consistency_test(key, pub, *pubSz, priv,
+ *privSz);
+ #ifdef HAVE_FIPS
+ if (ret != 0)
+ ret = DH_PCT_E;
+ #endif
+ }
#endif /* FIPS V5 or later || WOLFSSL_VALIDATE_DH_KEYGEN */
return ret;
diff --git a/wolfcrypt/src/error.c b/wolfcrypt/src/error.c
index 0f70a84cc8b..be0acd033d3 100644
--- a/wolfcrypt/src/error.c
+++ b/wolfcrypt/src/error.c
@@ -692,6 +692,21 @@ const char* wc_GetErrorString(int error)
case SLH_DSA_KAT_FIPS_E:
return "SLH-DSA Known Answer Test check FIPS error";
+ case SLH_DSA_PCT_E:
+ return "wolfcrypt SLH-DSA Pairwise Consistency Test Failure";
+
+ case CMAC_KAT_FIPS_E:
+ return "AES-CMAC Known Answer Test FIPS error";
+
+ case SHAKE_KAT_FIPS_E:
+ return "SHAKE Known Answer Test FIPS error";
+
+ case DH_PCT_E:
+ return "wolfcrypt DH (FFC) Pairwise Consistency Test Failure";
+
+ case AES_KW_KAT_FIPS_E:
+ return "AES-KW Known Answer Test FIPS error";
+
case SEQ_OVERFLOW_E:
return "Sequence counter would overflow";
diff --git a/wolfcrypt/src/ge_operations.c b/wolfcrypt/src/ge_operations.c
index 5d3157628d2..2fbe29241bd 100644
--- a/wolfcrypt/src/ge_operations.c
+++ b/wolfcrypt/src/ge_operations.c
@@ -10196,9 +10196,14 @@ void ge_tobytes_nct(unsigned char *s,const ge_p2 *h)
/* if HAVE_ED25519 but not HAVE_CURVE25519, and an asm implementation is built,
* then curve25519() won't get its WOLFSSL_LOCAL attribute unless we dummy-call
* it here.
- */
+ * This assumes the asm port still emits curve25519() when X25519 is disabled --
+ * true for the x86 and 64-bit ARM (armv8-curve25519) ports, but NOT for the
+ * 32-bit ARM port (armv8-32-curve25519 gates curve25519() on HAVE_CURVE25519),
+ * so the dummy-call would reference an undefined symbol there. Exclude
+ * arm32 armasm (RFC 7748 / SP 800-186 X25519). */
#if defined(CURVED25519_ASM) && defined(WOLFSSL_API_PREFIX_MAP) && \
- !defined(HAVE_CURVE25519) && !defined(FREESCALE_LTC_ECC)
+ !defined(HAVE_CURVE25519) && !defined(FREESCALE_LTC_ECC) && \
+ (!defined(WOLFSSL_ARMASM) || defined(__aarch64__))
WOLFSSL_LOCAL void _wc_curve25519_dummy(void);
WOLFSSL_LOCAL void _wc_curve25519_dummy(void) {
(void)curve25519((byte *)0, (byte *)0, (const byte *)0);
diff --git a/wolfcrypt/src/port/arm/armv8-32-sha3-asm.S b/wolfcrypt/src/port/arm/armv8-32-sha3-asm.S
index 6d2f0172994..128e2e91997 100644
--- a/wolfcrypt/src/port/arm/armv8-32-sha3-asm.S
+++ b/wolfcrypt/src/port/arm/armv8-32-sha3-asm.S
@@ -27,6 +27,16 @@
#include
+/* Honor WC_SHA3_NO_ASM exactly as sha3.c does: when the build forces the C SHA-3
+ * (e.g. every Linux kernel-module build sets -DWC_SHA3_NO_ASM via KERNEL_MODE_DEFAULTS),
+ * this NEON BlockSha3 must NOT be emitted -- otherwise on 32-bit ARM it provides a
+ * second BlockSha3 that multiply-defines against sha3.c's C BlockSha3 at link time.
+ * (arm64's sha3 asm is gated on WOLFSSL_ARMASM_CRYPTO_SHA3 (default off) so it never
+ * hit this; the arm32 NEON path is only gated on WOLFSSL_ARMASM_NO_NEON.) FIPS 202. */
+#ifdef WC_SHA3_NO_ASM
+ #undef WOLFSSL_ARMASM
+#endif
+
#ifdef WOLFSSL_ARMASM
#if !defined(__aarch64__) && !defined(WOLFSSL_ARMASM_THUMB2)
#ifndef WOLFSSL_ARMASM_INLINE
diff --git a/wolfcrypt/src/random.c b/wolfcrypt/src/random.c
index 609e7fb6b45..f51d376fab6 100644
--- a/wolfcrypt/src/random.c
+++ b/wolfcrypt/src/random.c
@@ -416,6 +416,24 @@ int wc_DrbgState_MutexFree(void)
static int LockDrbgState(void)
{
#ifndef SINGLE_THREADED
+#ifndef WOLFSSL_MUTEX_INITIALIZER
+ /* Platforms lacking a static mutex initializer (e.g. Windows
+ * CRITICAL_SECTION) must initialize drbgStateMutex at run time.
+ * wolfCrypt_Init() does so, but the FIPS 140-3 module's pre-operational
+ * self test exercises the DRBG -- and therefore this lock -- from a
+ * load-time constructor that runs before any wolfCrypt_Init() call.
+ * Locking an uninitialized CRITICAL_SECTION is undefined behavior: it
+ * tolerates the single-threaded POST but faults (0xC0000005) on the
+ * degraded-mode CAST re-run. Initialize on demand here
+ * (wc_DrbgState_MutexInit() is idempotent); the first lock is the
+ * single-threaded POST, so this is race-free. The mutex guards the
+ * SP 800-90A DRBG enable/disable state, which must stay consistent. */
+ {
+ int initRet = wc_DrbgState_MutexInit();
+ if (initRet != 0)
+ return initRet;
+ }
+#endif
return wc_LockMutex(&drbgStateMutex);
#else
return 0;
@@ -3617,23 +3635,90 @@ int wc_FreeNetRandom(void)
#if defined(HAVE_INTEL_RDRAND) || defined(HAVE_INTEL_RDSEED) || \
defined(HAVE_AMD_RDSEED)
-#ifdef WOLFSSL_ASYNC_CRYPT
- /* need more retries if multiple cores */
- #define INTELRD_RETRY (32 * 8)
-#else
- #define INTELRD_RETRY 32
+/* INTELRD_RETRY bounds the RDRAND/RDSEED retry loop (IntelRDseed64_r /
+ * IntelRDrand* below). RDSEED legitimately sets CF=0 (failure) when the on-chip
+ * conditioned entropy has not been replenished since the last read; per Intel's
+ * DRNG implementation guidance software must retry. Overridable via -D for OEs
+ * that need a different budget. */
+#ifndef INTELRD_RETRY
+ #if defined(WOLFSSL_LINUXKM)
+ /* Linux-kernel-module: the boot-time FIPS CASTs poll RDSEED during
+ * module_init (wc_RunAllCast_fips -> ECDSA CAST -> wc_InitRng ->
+ * wc_GenerateSeed) while the platform RNG is warming up and the hardware
+ * RDSEED source is heavily contended -- under virtualization especially,
+ * where the guest's RDSEED is funneled to the (busy) host CPU. RDSEED
+ * then returns CF=0 far more than the 32-retry userspace default
+ * tolerates, which made --enable-amdrdseed / --enable-intelrdseed kernel
+ * modules fail the ECDSA CAST and refuse to load. Use a large retry
+ * budget: it is a ceiling, not a fixed cost -- RDSEED succeeds in ~1 read
+ * once entropy is available, so post-boot use is unaffected. */
+ #define INTELRD_RETRY 100000
+ #elif defined(WOLFSSL_ASYNC_CRYPT)
+ /* need more retries if multiple cores */
+ #define INTELRD_RETRY (32 * 8)
+ #else
+ #define INTELRD_RETRY 32
+ #endif
#endif
#if defined(HAVE_INTEL_RDSEED) || defined(HAVE_AMD_RDSEED)
+/* Vendor tag used by the optional FIPS_CODE_REVIEW evidence prints below.
+ * Both the Intel RDSEED and AMD RDSEED entropy sources funnel through the same
+ * primitive (the x86 RDSEED instruction is implemented by both vendors); only
+ * the underlying CPU vendor differs. At compile time exactly one of
+ * HAVE_INTEL_RDSEED / HAVE_AMD_RDSEED is expected to be set for a given OE, so
+ * this string resolves unambiguously per build. */
+#if defined(HAVE_AMD_RDSEED)
+#define WC_RDSEED_VENDOR "AMD"
+#else
+#define WC_RDSEED_VENDOR "Intel"
+#endif
+
#ifndef USE_INTEL_INTRINSICS
- /* return 0 on success */
+ /* return 0 on success.
+ *
+ * E27 Public Use Document (NIST CMVP entropy source disclosure) explicitly
+ * documents that wolfSSL polls the x86 CF (Carry Flag) to determine
+ * whether a given RDSEED invocation delivered a usable conditioned entropy
+ * sample. The rdseed instruction semantics are:
+ * CF = 1 (set) -> destination register holds 64 bits of conditioned
+ * entropy on this cycle; sample is fresh and usable.
+ * CF = 0 (clear) -> hardware seed pool was empty this cycle; the
+ * destination register MUST NOT be used; caller is
+ * required to retry (handled by IntelRDseed64_r
+ * below, which loops up to INTELRD_RETRY times).
+ * The "setc %1" mnemonic materialises CF into the byte-sized output bound
+ * to (ok); the "=qm" output constraint constrains that byte to a q-class
+ * register (one that has an addressable low byte) so setc can target it. */
static WC_INLINE int IntelRDseed64(word64* seed)
{
unsigned char ok;
__asm__ volatile("rdseed %0; setc %1":"=r"(*seed), "=qm"(ok));
+#ifdef FIPS_CODE_REVIEW
+ /* One-shot tracer: confirm this path is alive on the first call.
+ * After that go silent so the per-chunk RDSEED prints don't drown
+ * the optest sanity-log (RDSEED is invoked per 64-bit chunk for
+ * every DRBG instantiate/reseed; flooding the log obscures real
+ * signal). Per-request entropy volume is still surfaced by the
+ * outer wc_GenerateSeed_IntelRD print below. */
+ {
+ static int printed_asm = 0;
+ if (!printed_asm) {
+ printed_asm = 1;
+ printf("FIPS_CODE_REVIEW IntelRDseed64 [asm path, %s] "
+ "(one-shot): delivered %u bits, CF=%u\n",
+ WC_RDSEED_VENDOR, (unsigned)(sizeof(word64) * 8u),
+ (unsigned)ok);
+ }
+ }
+#endif
+ /* Ternary: CF set (ok != 0) -> 64 bits of conditioned entropy
+ * captured in *seed, return 0 (success). CF clear (ok == 0) -> sample
+ * invalid, return -1 so the retry wrapper IntelRDseed64_r() tries
+ * again. */
return (ok) ? 0 : -1;
}
@@ -3641,7 +3726,14 @@ int wc_FreeNetRandom(void)
/* The compiler Visual Studio uses does not allow inline assembly.
* It does allow for Intel intrinsic functions. */
- /* return 0 on success */
+ /* return 0 on success.
+ *
+ * E27 PUD (NIST CMVP) cited path: _rdseed64_step is the compiler intrinsic
+ * front-end for the same RDSEED instruction documented in the asm path
+ * above. The intrinsic returns 1 when CF was set by the underlying RDSEED
+ * (i.e. the 64-bit conditioned entropy sample in *seed is valid this
+ * cycle) and 0 when CF was clear (caller MUST retry; *seed MUST NOT be
+ * consumed). */
# ifdef __GNUC__
__attribute__((target("rdseed")))
# endif
@@ -3650,6 +3742,23 @@ int wc_FreeNetRandom(void)
int ok;
ok = _rdseed64_step((unsigned long long*) seed);
+#ifdef FIPS_CODE_REVIEW
+ /* One-shot tracer; see asm-path comment above for rationale. */
+ {
+ static int printed_intrinsic = 0;
+ if (!printed_intrinsic) {
+ printed_intrinsic = 1;
+ printf("FIPS_CODE_REVIEW IntelRDseed64 [intrinsic path, %s] "
+ "(one-shot): delivered %u bits, "
+ "intrinsic_ret=%d (== CF)\n",
+ WC_RDSEED_VENDOR, (unsigned)(sizeof(word64) * 8u), ok);
+ }
+ }
+#endif
+ /* intrinsic_ret == 1 -> CF was set, 64 bits of conditioned entropy
+ * captured in *seed; return 0 to signal success to the retry wrapper.
+ * intrinsic_ret == 0 -> CF was clear; return -1 so the retry wrapper
+ * re-attempts. */
return (ok) ? 0 : -1;
}
@@ -3662,6 +3771,12 @@ static WC_INLINE int IntelRDseed64_r(word64* rnd)
for (i = 0; i < INTELRD_RETRY; i++) {
if (IntelRDseed64(rnd) == 0)
return 0;
+ /* Give the hardware entropy source a chance to replenish between
+ * attempts (Intel DRNG guidance) and yield the CPU when it is safe to
+ * block. WC_RELAX_LONG_LOOP() is a no-op where blocking is unsafe, so
+ * this only ever helps -- e.g. it lets other work (and the entropy
+ * conditioner) run during a long boot-time RDSEED starvation. */
+ WC_RELAX_LONG_LOOP();
}
return -1;
}
@@ -3675,6 +3790,19 @@ static int wc_GenerateSeed_IntelRD(OS_Seed* os, byte* output, word32 sz)
(void)os;
+#ifdef FIPS_CODE_REVIEW
+ /* Each conditioned entropy sample produced by IntelRDseed64() is 64 bits
+ * wide. This entry-level trace makes the per-request entropy volume
+ * obvious in evidence logs: sz bytes requested -> ceil(sz/8) RDSEED
+ * invocations expected (plus the two-or-three sanity-status reads on the
+ * first ever call into this function). */
+ printf("FIPS_CODE_REVIEW wc_GenerateSeed_IntelRD [%s]: "
+ "requested %u bytes = %u bits "
+ "(expect %u RDSEED 64-bit samples)\n",
+ WC_RDSEED_VENDOR, (unsigned)sz, (unsigned)(sz * 8u),
+ (unsigned)((sz + sizeof(word64) - 1u) / sizeof(word64)));
+#endif
+
if (!IS_INTEL_RDSEED(intel_flags))
return -1;
diff --git a/wolfcrypt/src/rsa.c b/wolfcrypt/src/rsa.c
index 0a6b6143a7a..d313f81ac95 100644
--- a/wolfcrypt/src/rsa.c
+++ b/wolfcrypt/src/rsa.c
@@ -5153,9 +5153,15 @@ static WC_INLINE int RsaSizeCheck(int size)
}
#ifdef HAVE_FIPS
- /* Key size requirements for CAVP */
+ /* Approved RSA key sizes per FIPS 186-5 sec 5.1 and NIST SP 800-131Ar2
+ * sec 4 Table 2 (Asymmetric Key Establishment) - 2048, 3072, 4096 only.
+ * 1024-bit RSA was deprecated for FIPS-Approved key generation by
+ * SP 800-131Ar2 effective 2014-01-01 and is disallowed thereafter. The
+ * outer wc_MakeRsaKey_fips wrapper already gates on WC_RSA_FIPS_GEN_MIN,
+ * but RsaSizeCheck itself is reached by library-internal paths that do
+ * not pass through that wrapper - defense-in-depth removal here closes
+ * the gap. */
switch (size) {
- case 1024:
case 2048:
case 3072:
case 4096:
@@ -5415,6 +5421,20 @@ int wc_MakeRsaKey(RsaKey* key, int size, long e, WC_RNG* rng)
goto out;
}
+#ifdef HAVE_FIPS
+ /* FIPS 186-5 sec 5.2 (Public Verification Exponent e): 2^16 + 1 <= e <
+ * 2^256 and e odd. The general non-FIPS check above accepts e >= 3 odd;
+ * the FIPS Approved range is narrower. e is a long here so the upper
+ * bound 2^256 is structurally satisfied on any LP64 / LLP64 platform
+ * (long is at most 64 bits), but the lower bound 65537 must be enforced
+ * explicitly. Defense-in-depth even though FIPS application code
+ * conventionally passes e = 65537 (RSA_F4). */
+ if (e < 65537L) {
+ err = BAD_FUNC_ARG;
+ goto out;
+ }
+#endif
+
#if defined(WOLFSSL_CRYPTOCELL)
err = cc310_RSA_GenerateKeyPair(key, size, e);
goto out;
diff --git a/wolfcrypt/src/sha256.c b/wolfcrypt/src/sha256.c
index 047c57dade8..ddb6b304fa8 100644
--- a/wolfcrypt/src/sha256.c
+++ b/wolfcrypt/src/sha256.c
@@ -1179,14 +1179,32 @@ int wc_InitSha256_ex(wc_Sha256* sha256, void* heap, int devId)
return ret;
}
+/* The 32-bit ARM SHA-256 NEON/crypto-extension transforms use vector registers,
+ * so in a Linux kernel module they MUST be bracketed with SAVE/RESTORE_VECTOR_
+ * REGISTERS (kernel_neon_begin/end) or the first SIMD instruction faults with
+ * "undefined instruction". Scoped to !__aarch64__ so the aarch64 path is
+ * unchanged. (FIPS 180-4 SHA-256.) */
+#if defined(WOLFSSL_USE_SAVE_VECTOR_REGISTERS) && !defined(__aarch64__) && \
+ !defined(WOLFSSL_ARMASM_THUMB2) && !defined(WOLFSSL_ARMASM_NO_NEON)
+ #define WC_SHA256_ARM_SVR_BEGIN(fail) SAVE_VECTOR_REGISTERS(fail)
+ #define WC_SHA256_ARM_SVR_END() RESTORE_VECTOR_REGISTERS()
+#else
+ #define WC_SHA256_ARM_SVR_BEGIN(fail) WC_DO_NOTHING
+ #define WC_SHA256_ARM_SVR_END() WC_DO_NOTHING
+#endif
+
static WC_INLINE int Transform_Sha256(wc_Sha256* sha256, const byte* data)
{
#if defined(WOLFSSL_ARMASM_THUMB2) || defined(WOLFSSL_ARMASM_NO_NEON)
Transform_Sha256_Len_base(sha256, data, WC_SHA256_BLOCK_SIZE);
-#elif defined(WOLFSSL_ARMASM_NO_HW_CRYPTO)
- Transform_Sha256_Len_neon(sha256, data, WC_SHA256_BLOCK_SIZE);
#else
+ WC_SHA256_ARM_SVR_BEGIN(return _svr_ret;);
+ #if defined(WOLFSSL_ARMASM_NO_HW_CRYPTO)
+ Transform_Sha256_Len_neon(sha256, data, WC_SHA256_BLOCK_SIZE);
+ #else
Transform_Sha256_Len_crypto(sha256, data, WC_SHA256_BLOCK_SIZE);
+ #endif
+ WC_SHA256_ARM_SVR_END();
#endif
return 0;
}
@@ -1196,10 +1214,14 @@ static WC_INLINE int Transform_Sha256_Len(wc_Sha256* sha256, const byte* data,
{
#if defined(WOLFSSL_ARMASM_THUMB2) || defined(WOLFSSL_ARMASM_NO_NEON)
Transform_Sha256_Len_base(sha256, data, len);
-#elif defined(WOLFSSL_ARMASM_NO_HW_CRYPTO)
- Transform_Sha256_Len_neon(sha256, data, len);
#else
+ WC_SHA256_ARM_SVR_BEGIN(return _svr_ret;);
+ #if defined(WOLFSSL_ARMASM_NO_HW_CRYPTO)
+ Transform_Sha256_Len_neon(sha256, data, len);
+ #else
Transform_Sha256_Len_crypto(sha256, data, len);
+ #endif
+ WC_SHA256_ARM_SVR_END();
#endif
return 0;
}
diff --git a/wolfcrypt/src/sha512.c b/wolfcrypt/src/sha512.c
index b2f57b13b86..009f538c44b 100644
--- a/wolfcrypt/src/sha512.c
+++ b/wolfcrypt/src/sha512.c
@@ -1455,15 +1455,32 @@ static void (*Transform_Sha512_p)(wc_Sha512* sha512, const byte* data) = NULL;
static void (*Transform_Sha512_Len_p)(wc_Sha512* sha512, const byte* data,
word32 len) = NULL;
+/* The 32-bit ARM SHA-512 NEON transform uses vector registers, so in a Linux
+ * kernel module the asm calls below MUST be bracketed by SAVE/RESTORE_VECTOR_
+ * REGISTERS (kernel_neon_begin/end) or the first NEON instruction faults with
+ * "undefined instruction". Scoped to !__aarch64__ so the aarch64 path (and the
+ * THUMB2/NO_NEON base path) is unchanged. (FIPS 180-4 SHA-512.) */
+#if defined(WOLFSSL_USE_SAVE_VECTOR_REGISTERS) && !defined(__aarch64__) && \
+ !defined(WOLFSSL_ARMASM_THUMB2) && !defined(WOLFSSL_ARMASM_NO_NEON)
+ #define WC_SHA512_ARM_SVR_BEGIN(fail) SAVE_VECTOR_REGISTERS(fail)
+ #define WC_SHA512_ARM_SVR_END() RESTORE_VECTOR_REGISTERS()
+#else
+ #define WC_SHA512_ARM_SVR_BEGIN(fail) WC_DO_NOTHING
+ #define WC_SHA512_ARM_SVR_END() WC_DO_NOTHING
+#endif
static WC_INLINE int Transform_Sha512(wc_Sha512 *sha512, const byte* data)
{
+ WC_SHA512_ARM_SVR_BEGIN(return _svr_ret;);
(*Transform_Sha512_p)(sha512, data);
+ WC_SHA512_ARM_SVR_END();
return 0;
}
static WC_INLINE int Transform_Sha512_Len(wc_Sha512 *sha512, const byte* data,
word32 len)
{
+ WC_SHA512_ARM_SVR_BEGIN(return _svr_ret;);
(*Transform_Sha512_Len_p)(sha512, data, len);
+ WC_SHA512_ARM_SVR_END();
return 0;
}
diff --git a/wolfcrypt/src/wc_lms.c b/wolfcrypt/src/wc_lms.c
index f2b62ea5901..d223632ec19 100644
--- a/wolfcrypt/src/wc_lms.c
+++ b/wolfcrypt/src/wc_lms.c
@@ -26,6 +26,14 @@
#if FIPS_VERSION3_GE(2,0,0)
/* set NO_WRAPPERS before headers, use direct internal f()s not wrappers */
#define FIPS_NO_WRAPPERS
+
+ /* Windows orders the FIPS in-core integrity boundary by named linker
+ * sections. Keep LMS (SP 800-208) code/const inside the boundary,
+ * sorted between sha3 (.fipsA$n) and fips.c (.fipsA$o). */
+ #ifdef USE_WINDOWS_API
+ #pragma code_seg(".fipsA$nc")
+ #pragma const_seg(".fipsB$nc")
+ #endif
#endif
#include
#include
diff --git a/wolfcrypt/src/wc_lms_impl.c b/wolfcrypt/src/wc_lms_impl.c
index 25e89901a9b..ba449062ca8 100644
--- a/wolfcrypt/src/wc_lms_impl.c
+++ b/wolfcrypt/src/wc_lms_impl.c
@@ -39,6 +39,15 @@
#include
+#if FIPS_VERSION3_GE(2,0,0)
+ /* Keep this LMS (SP 800-208) implementation's code/const inside the FIPS
+ * in-core integrity boundary (Windows orders it by named sections). */
+ #ifdef USE_WINDOWS_API
+ #pragma code_seg(".fipsA$ne")
+ #pragma const_seg(".fipsB$ne")
+ #endif
+#endif
+
#include
#ifdef NO_INLINE
@@ -2317,7 +2326,10 @@ static int wc_lms_treehash_update(LmsState* state, LmsPrivState* privState,
byte* left = dp + LMS_D_LEN;
byte* temp = left + params->hash_len;
WC_DECLARE_VAR(stack, byte, (LMS_MAX_HEIGHT + 1) * LMS_MAX_NODE_LEN, 0);
- byte* sp;
+ /* Init to NULL: sp is set and used only on the ret==0 path, but 32-bit ARM
+ * gcc cannot correlate the two separate `if (ret == 0)` guards and reports a
+ * false-positive -Wmaybe-uninitialized (x86_64/aarch64 gcc do not). */
+ byte* sp = NULL;
word32 max_cb = (word32)1 << params->cacheBits;
word32 i;
diff --git a/wolfcrypt/src/wc_mldsa.c b/wolfcrypt/src/wc_mldsa.c
index 45e247e889b..1c028be496b 100644
--- a/wolfcrypt/src/wc_mldsa.c
+++ b/wolfcrypt/src/wc_mldsa.c
@@ -140,6 +140,14 @@
#if FIPS_VERSION3_GE(2,0,0)
/* set NO_WRAPPERS before headers, use direct internal f()s not wrappers */
#define FIPS_NO_WRAPPERS
+
+ /* Windows orders the FIPS in-core integrity boundary by named linker
+ * sections. Keep ML-DSA (FIPS 204) code/const inside the boundary,
+ * sorted between sha3 (.fipsA$n) and fips.c (.fipsA$o). */
+ #ifdef USE_WINDOWS_API
+ #pragma code_seg(".fipsA$nc")
+ #pragma const_seg(".fipsB$nc")
+ #endif
#endif
#ifndef WOLFSSL_MLDSA_NO_ASN1
@@ -760,8 +768,103 @@ static int mldsa_hash256_ctx_msg(wc_Shake* shake256, const byte* tr,
* @return 0 on success.
* @return BAD_FUNC_ARG if hash algorithm not known.
*/
-static int mldsa_get_hash_oid(int hash, byte* oidBuffer, word32* oidLen)
+/* HashML-DSA PH-vs-paramSet enforcement.
+ *
+ * FIPS 204 sec. 5.4 (Table 4) restricts the pre-hash PH for HashML-DSA to
+ * algorithms whose collision-resistance strength meets or exceeds the
+ * parameter set's claimed security level. Any other PH must be rejected
+ * for BOTH sigGen and sigVer:
+ * ML-DSA-44 (128-bit): SHA2-256, SHA2-384, SHA2-512, SHA2-512/256,
+ * SHA3-256, SHA3-384, SHA3-512,
+ * SHAKE-128, SHAKE-256
+ * ML-DSA-65 (192-bit): SHA2-384, SHA2-512, SHA3-384, SHA3-512, SHAKE-256
+ * ML-DSA-87 (256-bit): SHA2-512, SHA3-512, SHAKE-256
+ *
+ * Returns 0 if (hashAlg, level) is an approved combination. Returns
+ * BAD_FUNC_ARG otherwise -- including for any hash not on the approved
+ * list (SHA-224, SHA-512/224, SHA3-224, etc.).
+ */
+static int mldsa_check_hash_for_level(int hashAlg, byte level)
{
+ int strengthBits; /* collision-resistance strength of the chosen hash */
+ int requiredBits; /* security level required by the paramSet */
+
+ switch (hashAlg) {
+ #ifndef NO_SHA256
+ case WC_HASH_TYPE_SHA256:
+ strengthBits = 128;
+ break;
+ #endif
+ #ifdef WOLFSSL_SHA384
+ case WC_HASH_TYPE_SHA384:
+ strengthBits = 192;
+ break;
+ #endif
+ #ifdef WOLFSSL_SHA512
+ case WC_HASH_TYPE_SHA512:
+ strengthBits = 256;
+ break;
+ #ifndef WOLFSSL_NOSHA512_256
+ case WC_HASH_TYPE_SHA512_256:
+ /* SHA-512/256 has 128-bit collision resistance (truncated). */
+ strengthBits = 128;
+ break;
+ #endif
+ #endif
+ #ifdef WOLFSSL_SHA3
+ #ifndef WOLFSSL_NOSHA3_256
+ case WC_HASH_TYPE_SHA3_256:
+ strengthBits = 128;
+ break;
+ #endif
+ #ifndef WOLFSSL_NOSHA3_384
+ case WC_HASH_TYPE_SHA3_384:
+ strengthBits = 192;
+ break;
+ #endif
+ #ifndef WOLFSSL_NOSHA3_512
+ case WC_HASH_TYPE_SHA3_512:
+ strengthBits = 256;
+ break;
+ #endif
+ #endif
+ #ifdef WOLFSSL_SHAKE128
+ case WC_HASH_TYPE_SHAKE128:
+ strengthBits = 128;
+ break;
+ #endif
+ #ifdef WOLFSSL_SHAKE256
+ case WC_HASH_TYPE_SHAKE256:
+ strengthBits = 256;
+ break;
+ #endif
+ default:
+ /* Hash not on the FIPS 204 Table 4 approved list (e.g. SHA-224,
+ * SHA-512/224, SHA3-224, MD5). Reject regardless of level. */
+ return BAD_FUNC_ARG;
+ }
+
+ switch (level) {
+ case WC_ML_DSA_44:
+ requiredBits = 128;
+ break;
+ case WC_ML_DSA_65:
+ requiredBits = 192;
+ break;
+ case WC_ML_DSA_87:
+ requiredBits = 256;
+ break;
+ default:
+ return BAD_FUNC_ARG;
+ }
+
+ if (strengthBits < requiredBits) {
+ return BAD_FUNC_ARG;
+ }
+ return 0;
+}
+
+static int mldsa_get_hash_oid(int hash, byte* oidBuffer, word32* oidLen){
int ret = 0;
const byte* oid;
@@ -9453,11 +9556,17 @@ static int mldsa_sign_ctx_hash_with_seed(wc_MlDsaKey* key,
byte oidMsgHash[MLDSA_HASH_OID_LEN + WC_MAX_DIGEST_SIZE];
word32 oidMsgHashLen = 0;
- /* Check that the input hash length is valid. */
+ /* Check that the input hash length is valid (guards against caller-side
+ * buffer overruns before we touch hash). */
if ((int)hashLen != wc_HashGetDigestSize((enum wc_HashType)hashAlg)) {
ret = BAD_LENGTH_E;
}
+ /* FIPS 204 sec. 5.4 Table 4: enforce hash <-> paramSet matching. */
+ if (ret == 0) {
+ ret = mldsa_check_hash_for_level(hashAlg, key->level);
+ }
+
if (ret == 0) {
XMEMCPY(seedMu, seed, MLDSA_RND_SZ);
@@ -10126,12 +10235,17 @@ static int mldsa_verify_ctx_hash(wc_MlDsaKey* key, const byte* ctx,
if ((key == NULL) || (key->params == NULL)) {
ret = BAD_FUNC_ARG;
}
- /* Check that the input hash length is valid. */
+ /* Check that the input hash length is valid (guards against caller-side
+ * buffer overruns before we touch hash). */
if ((ret == 0) &&
((int)hashLen != wc_HashGetDigestSize((enum wc_HashType)hashAlg)))
{
ret = BAD_LENGTH_E;
}
+ /* FIPS 204 sec. 5.4 Table 4: enforce hash <-> paramSet matching. */
+ if (ret == 0) {
+ ret = mldsa_check_hash_for_level(hashAlg, key->level);
+ }
if (ret == 0) {
/* Step 6: Hash public key. */
diff --git a/wolfcrypt/src/wc_mlkem.c b/wolfcrypt/src/wc_mlkem.c
index d03539f7053..dac31afa6e4 100644
--- a/wolfcrypt/src/wc_mlkem.c
+++ b/wolfcrypt/src/wc_mlkem.c
@@ -81,6 +81,14 @@
#if FIPS_VERSION3_GE(2,0,0)
/* set NO_WRAPPERS before headers, use direct internal f()s not wrappers */
#define FIPS_NO_WRAPPERS
+
+ /* Windows orders the FIPS in-core integrity boundary by named linker
+ * sections. Keep ML-KEM (FIPS 203) code/const inside the boundary,
+ * sorted between sha3 (.fipsA$n) and fips.c (.fipsA$o). */
+ #ifdef USE_WINDOWS_API
+ #pragma code_seg(".fipsA$na")
+ #pragma const_seg(".fipsB$na")
+ #endif
#endif
#include
@@ -694,49 +702,12 @@ int wc_MlKemKey_MakeKey(MlKemKey* key, WC_RNG* rng)
ret = wc_MlKemKey_MakeKeyWithRandom(key, rand, sizeof(rand));
}
-#ifdef HAVE_FIPS
- /* Pairwise Consistency Test (PCT) per FIPS 140-3 / ISO 19790:2012
- * Section 7.10.3.3: encapsulate with ek, decapsulate with dk,
- * verify shared secrets match. */
- if (ret == 0) {
- WC_DECLARE_VAR(pct_ct, byte, WC_ML_KEM_MAX_CIPHER_TEXT_SIZE,
- key->heap);
- byte pct_ss1[WC_ML_KEM_SS_SZ];
- byte pct_ss2[WC_ML_KEM_SS_SZ];
- word32 ctSz = 0;
-
- WC_ALLOC_VAR_EX(pct_ct, byte, WC_ML_KEM_MAX_CIPHER_TEXT_SIZE,
- key->heap, DYNAMIC_TYPE_TMP_BUFFER, ret = MEMORY_E);
-
- if (ret == 0)
- ret = wc_MlKemKey_CipherTextSize(key, &ctSz);
-
- if (ret == 0)
- ret = wc_MlKemKey_Encapsulate(key, pct_ct, pct_ss1, rng);
-
- if (ret == 0)
- ret = wc_MlKemKey_Decapsulate(key, pct_ss2, pct_ct, ctSz);
-
- if (ret == 0) {
- if (XMEMCMP(pct_ss1, pct_ss2, WC_ML_KEM_SS_SZ) != 0)
- ret = ML_KEM_PCT_E;
- }
-
- ForceZero(pct_ss1, sizeof(pct_ss1));
- ForceZero(pct_ss2, sizeof(pct_ss2));
- if (WC_VAR_OK(pct_ct))
- ForceZero(pct_ct, WC_ML_KEM_MAX_CIPHER_TEXT_SIZE);
-
- WC_FREE_VAR_EX(pct_ct, key->heap, DYNAMIC_TYPE_TMP_BUFFER);
-
- /* FIPS 140-3 IG 10.3.A (TE10.35.02): a key pair that fails the PCT
- * must be rendered unusable. Zeroize the generated key material so
- * a caller that ignores the return value cannot use it. */
- if (ret != 0) {
- wc_MlKemKey_Free(key);
- }
- }
-#endif /* HAVE_FIPS */
+ /* PCT now lives in wc_MlKemKey_MakeKeyWithRandom() (called above) so
+ * that BOTH the random-seeded path (this function) and the
+ * caller-supplied-seed path (direct invocation of MakeKeyWithRandom)
+ * exercise the FIPS 140-3 IG 10.3.A 1.B Pairwise Consistency Test.
+ * Audit A16-1: previously the PCT lived only here, leaving the
+ * deterministic-seed entry uncovered. */
/* Ensure seeds are zeroized. */
ForceZero((void*)rand, (word32)sizeof(rand));
@@ -985,8 +956,70 @@ int wc_MlKemKey_MakeKeyWithRandom(MlKemKey* key, const unsigned char* rand,
ForceZero(e, (size_t)(k * MLKEM_N) * sizeof(sword16));
#endif
- /* Note: PCT is performed in wc_MlKemKey_MakeKey() which calls this
- * function and has the RNG parameter needed for encapsulation. */
+#ifdef HAVE_FIPS
+ /* Pairwise Consistency Test (PCT) per FIPS 140-3 IG 10.3.A 1.B and
+ * ISO/IEC 19790:2012 Section 7.10.3.3: encapsulate with the freshly
+ * generated encapsulation key (ek), decapsulate the ciphertext with
+ * the matching decapsulation key (dk), and verify the recovered
+ * shared secret matches. This entry point (MakeKeyWithRandom) is
+ * a deterministic key-gen path with no caller-supplied RNG; the PCT
+ * uses wc_MlKemKey_EncapsulateWithRandom() with a fixed 32-byte test
+ * value for `m` (FIPS 203 Algorithm 17 input). The encapsulation
+ * `m` does not need to be unpredictable for the PCT - it only needs
+ * the encap/decap roundtrip to recover the same shared secret.
+ *
+ * Audit A16-1: previously the PCT lived only in wc_MlKemKey_MakeKey
+ * which generates `rand` from the DRBG, leaving callers of this
+ * deterministic-seed entry without PCT coverage. */
+ if (ret == 0) {
+ WC_DECLARE_VAR(pct_ct, byte, WC_ML_KEM_MAX_CIPHER_TEXT_SIZE,
+ key->heap);
+ byte pct_ss1[WC_ML_KEM_SS_SZ];
+ byte pct_ss2[WC_ML_KEM_SS_SZ];
+ word32 pct_ctSz = 0;
+ /* Fixed 32-byte test pattern for FIPS 203 Alg 17 `m` parameter.
+ * Value is arbitrary - PCT only requires encap/decap roundtrip,
+ * not encap unpredictability. */
+ static const byte pct_m[WC_ML_KEM_ENC_RAND_SZ] = {
+ 0xAB, 0xAB, 0xAB, 0xAB, 0xAB, 0xAB, 0xAB, 0xAB,
+ 0xAB, 0xAB, 0xAB, 0xAB, 0xAB, 0xAB, 0xAB, 0xAB,
+ 0xAB, 0xAB, 0xAB, 0xAB, 0xAB, 0xAB, 0xAB, 0xAB,
+ 0xAB, 0xAB, 0xAB, 0xAB, 0xAB, 0xAB, 0xAB, 0xAB
+ };
+
+ WC_ALLOC_VAR_EX(pct_ct, byte, WC_ML_KEM_MAX_CIPHER_TEXT_SIZE,
+ key->heap, DYNAMIC_TYPE_TMP_BUFFER, ret = MEMORY_E);
+
+ if (ret == 0)
+ ret = wc_MlKemKey_CipherTextSize(key, &pct_ctSz);
+
+ if (ret == 0)
+ ret = wc_MlKemKey_EncapsulateWithRandom(key, pct_ct, pct_ss1,
+ pct_m, (int)sizeof(pct_m));
+
+ if (ret == 0)
+ ret = wc_MlKemKey_Decapsulate(key, pct_ss2, pct_ct, pct_ctSz);
+
+ if (ret == 0) {
+ if (XMEMCMP(pct_ss1, pct_ss2, WC_ML_KEM_SS_SZ) != 0)
+ ret = ML_KEM_PCT_E;
+ }
+
+ ForceZero(pct_ss1, sizeof(pct_ss1));
+ ForceZero(pct_ss2, sizeof(pct_ss2));
+ if (WC_VAR_OK(pct_ct))
+ ForceZero(pct_ct, WC_ML_KEM_MAX_CIPHER_TEXT_SIZE);
+
+ WC_FREE_VAR_EX(pct_ct, key->heap, DYNAMIC_TYPE_TMP_BUFFER);
+
+ /* FIPS 140-3 IG 10.3.A (TE10.35.02): a key pair that fails the PCT
+ * must be rendered unusable. Zeroize the generated key material so
+ * a caller that ignores the return value cannot use it. */
+ if (ret != 0) {
+ wc_MlKemKey_Free(key);
+ }
+ }
+#endif /* HAVE_FIPS */
return ret;
}
diff --git a/wolfcrypt/src/wc_mlkem_poly.c b/wolfcrypt/src/wc_mlkem_poly.c
index aa3d7835d5d..533a3198e02 100644
--- a/wolfcrypt/src/wc_mlkem_poly.c
+++ b/wolfcrypt/src/wc_mlkem_poly.c
@@ -69,6 +69,15 @@
#include
+#if FIPS_VERSION3_GE(2,0,0)
+ /* Keep this ML-KEM (FIPS 203) implementation's code/const inside the FIPS
+ * in-core integrity boundary (Windows orders it by named sections). */
+ #ifdef USE_WINDOWS_API
+ #pragma code_seg(".fipsA$nb")
+ #pragma const_seg(".fipsB$nb")
+ #endif
+#endif
+
#ifdef WC_MLKEM_NO_ASM
#undef USE_INTEL_SPEEDUP
#undef WOLFSSL_ARMASM
diff --git a/wolfcrypt/src/wc_slhdsa.c b/wolfcrypt/src/wc_slhdsa.c
index b3cfb56349b..0b776553610 100644
--- a/wolfcrypt/src/wc_slhdsa.c
+++ b/wolfcrypt/src/wc_slhdsa.c
@@ -24,6 +24,14 @@
#if FIPS_VERSION3_GE(2,0,0)
/* set NO_WRAPPERS before headers, use direct internal f()s not wrappers */
#define FIPS_NO_WRAPPERS
+
+ /* Windows orders the FIPS in-core integrity boundary by named linker
+ * sections. Keep SLH-DSA (FIPS 205) code/const inside the boundary,
+ * sorted between sha3 (.fipsA$n) and fips.c (.fipsA$o). */
+ #ifdef USE_WINDOWS_API
+ #pragma code_seg(".fipsA$nh")
+ #pragma const_seg(".fipsB$nh")
+ #endif
#endif
#include
@@ -6999,6 +7007,49 @@ int wc_SlhDsaKey_MakeKey(SlhDsaKey* key, WC_RNG* rng)
key->sk + 2 * n, n);
}
+#ifdef HAVE_FIPS
+ /* Pairwise Consistency Test (PCT) per FIPS 140-3 IG 10.3.A (TE10.35.02):
+ * sign with the new sk, verify with the matching pk. SLH-DSA is a
+ * stateless hash-based signature scheme (FIPS 205), so the relaxed PCT
+ * rule for stateful HBS (LMS/XMSS) does not apply -- PCT runs on every
+ * KeyGen. SignDeterministic avoids consuming RNG state; heap allocation
+ * is used because SLH-DSA signatures can reach ~50 KB. The paramSet is
+ * known by this point, so allocate the exact signature length for this
+ * variant rather than the family-wide worst case -- the difference
+ * across SLH-DSA variants is roughly 8 KB to 50 KB. */
+ if (ret == 0) {
+ static const byte pct_msg[] = "wolfSSL SLH-DSA PCT";
+ word32 pct_sigLen = key->params->sigLen;
+ byte* pct_sig = (byte*)XMALLOC(pct_sigLen, NULL,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ word32 pct_sigSz = pct_sigLen;
+
+ if (pct_sig == NULL) {
+ ret = MEMORY_E;
+ }
+ if (ret == 0) {
+ ret = wc_SlhDsaKey_SignDeterministic(key, NULL, 0,
+ pct_msg, sizeof(pct_msg), pct_sig, &pct_sigSz);
+ }
+ if (ret == 0) {
+ ret = wc_SlhDsaKey_Verify(key, NULL, 0,
+ pct_msg, sizeof(pct_msg), pct_sig, pct_sigSz);
+ if (ret != 0) {
+ ret = SLH_DSA_PCT_E;
+ }
+ }
+ if (pct_sig != NULL) {
+ ForceZero(pct_sig, pct_sigLen);
+ XFREE(pct_sig, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+ }
+ /* IG 10.3.A (TE10.35.02): a key pair that fails the PCT must be
+ * rendered unusable. */
+ if (ret != 0) {
+ wc_SlhDsaKey_Free(key);
+ }
+ }
+#endif /* HAVE_FIPS */
+
return ret;
}
@@ -7945,6 +7996,99 @@ static const byte slhdsakey_oid_sha3_512[] = {
#endif
#endif
+/* HashSLH-DSA PH-vs-paramSet enforcement.
+ *
+ * FIPS 205 sec. 10.2.2 (Table 9) restricts the pre-hash PH for HashSLH-DSA
+ * to algorithms whose collision-resistance strength meets or exceeds the
+ * parameter set's security level (encoded as key->params->n in bytes):
+ * n = 16 (128-bit): SHA2-256, SHA2-384, SHA2-512, SHA2-512/256,
+ * SHA3-256, SHA3-384, SHA3-512,
+ * SHAKE-128, SHAKE-256
+ * n = 24 (192-bit): SHA2-384, SHA2-512, SHA3-384, SHA3-512, SHAKE-256
+ * n = 32 (256-bit): SHA2-512, SHA3-512, SHAKE-256
+ *
+ * Returns 0 if (hashType, n) is an approved combination. Returns
+ * BAD_FUNC_ARG otherwise -- including for any hash not on the approved
+ * list (SHA-224, SHA-512/224, SHA3-224, etc.).
+ */
+static int slhdsa_check_hash_for_n(enum wc_HashType hashType, byte n)
+{
+ int strengthBits;
+ int requiredBits;
+
+ switch ((int)hashType) {
+ #ifndef NO_SHA256
+ case WC_HASH_TYPE_SHA256:
+ strengthBits = 128;
+ break;
+ #endif
+ #ifdef WOLFSSL_SHA384
+ case WC_HASH_TYPE_SHA384:
+ strengthBits = 192;
+ break;
+ #endif
+ #ifdef WOLFSSL_SHA512
+ case WC_HASH_TYPE_SHA512:
+ strengthBits = 256;
+ break;
+ #ifndef WOLFSSL_NOSHA512_256
+ case WC_HASH_TYPE_SHA512_256:
+ /* SHA-512/256 has 128-bit collision resistance (truncated). */
+ strengthBits = 128;
+ break;
+ #endif
+ #endif
+ #ifdef WOLFSSL_SHA3
+ #ifndef WOLFSSL_NOSHA3_256
+ case WC_HASH_TYPE_SHA3_256:
+ strengthBits = 128;
+ break;
+ #endif
+ #ifndef WOLFSSL_NOSHA3_384
+ case WC_HASH_TYPE_SHA3_384:
+ strengthBits = 192;
+ break;
+ #endif
+ #ifndef WOLFSSL_NOSHA3_512
+ case WC_HASH_TYPE_SHA3_512:
+ strengthBits = 256;
+ break;
+ #endif
+ #endif
+ #ifdef WOLFSSL_SHAKE128
+ case WC_HASH_TYPE_SHAKE128:
+ strengthBits = 128;
+ break;
+ #endif
+ #ifdef WOLFSSL_SHAKE256
+ case WC_HASH_TYPE_SHAKE256:
+ strengthBits = 256;
+ break;
+ #endif
+ default:
+ /* Hash not on the FIPS 205 Table 9 approved list. */
+ return BAD_FUNC_ARG;
+ }
+
+ if (n == WC_SLHDSA_N_128) {
+ requiredBits = 128;
+ }
+ else if (n == WC_SLHDSA_N_192) {
+ requiredBits = 192;
+ }
+ else if (n == WC_SLHDSA_N_256) {
+ requiredBits = 256;
+ }
+ else {
+ return BAD_FUNC_ARG;
+ }
+
+ if (strengthBits < requiredBits) {
+ return BAD_FUNC_ARG;
+ }
+ return 0;
+}
+
/* Validate the caller-supplied pre-hashed digest length and look up the
* corresponding OID for the chosen hash algorithm.
*
@@ -8162,6 +8306,16 @@ static int slhdsakey_signhash_external(SlhDsaKey* key, const byte* ctx,
(sigSz == NULL)) {
ret = BAD_FUNC_ARG;
}
+ /* HashSLH-DSA requires an explicit, approved pre-hash algorithm.
+ * WC_HASH_TYPE_NONE is the "pure SLH-DSA" sentinel used by the non
+ * pre-hash Sign/Verify paths and is never valid here. Reject it
+ * explicitly (FIPS 205 Section 10.2.2 / Table 9) rather than relying on
+ * the slhdsa_check_hash_for_n() switch default below, so the rejection
+ * survives any future reordering of the validators or the addition of a
+ * WC_HASH_TYPE_NONE case to that switch. */
+ else if (hashType == WC_HASH_TYPE_NONE) {
+ ret = BAD_FUNC_ARG;
+ }
/* Check sig buffer is large enough to hold generated signature. */
else if (*sigSz < key->params->sigLen) {
ret = BAD_LENGTH_E;
@@ -8171,6 +8325,12 @@ static int slhdsakey_signhash_external(SlhDsaKey* key, const byte* ctx,
/* Alg 23, Step 6: Return error. */
ret = BAD_FUNC_ARG;
}
+ /* FIPS 205 sec. 10.2.2 Table 9: enforce PH <-> paramSet matching before
+ * pre-hashing the message. Rejects PHs whose collision-resistance
+ * strength is below the paramSet's security level (n). */
+ if (ret == 0) {
+ ret = slhdsa_check_hash_for_n(hashType, key->params->n);
+ }
if (ret == 0) {
/* Alg 23, Steps 8-23: Validate caller-supplied pre-hashed digest length
* and select OID for the chosen hash algorithm. */
@@ -8405,8 +8565,11 @@ int wc_SlhDsaKey_SignHash(SlhDsaKey* key, const byte* ctx, byte ctxSz,
ret = MISSING_KEY;
}
/* First sanity check on hashType; the downstream prehash validator does
- * the detailed check for the actual type. */
- else if ((word32)hashType > (word32)WC_HASH_TYPE_MAX) {
+ * the detailed check for the actual type. Reject WC_HASH_TYPE_NONE here
+ * too -- it is never a valid pre-hash (FIPS 205 Section 10.2.2 / Table 9),
+ * so a known-invalid call fails before consuming DRBG output below. */
+ else if ((hashType == WC_HASH_TYPE_NONE) ||
+ ((word32)hashType > (word32)WC_HASH_TYPE_MAX)) {
ret = BAD_FUNC_ARG;
}
@@ -8535,6 +8698,12 @@ int wc_SlhDsaKey_VerifyHash(SlhDsaKey* key, const byte* ctx, byte ctxSz,
}
#ifdef WOLF_CRYPTO_CB
+ /* FIPS 205 sec. 10.2.2 Table 9: enforce PH <-> paramSet matching.
+ * Rejects PHs whose collision-resistance strength is below the
+ * paramSet's security level (n). */
+ if (ret == 0) {
+ ret = slhdsa_check_hash_for_n(hashType, key->params->n);
+ }
if (ret == 0) {
#ifndef WOLF_CRYPTO_CB_FIND
if (key->devId != INVALID_DEVID)
diff --git a/wolfcrypt/src/wc_xmss.c b/wolfcrypt/src/wc_xmss.c
index 9ad311d0bab..1e723333f2f 100644
--- a/wolfcrypt/src/wc_xmss.c
+++ b/wolfcrypt/src/wc_xmss.c
@@ -26,6 +26,14 @@
#if FIPS_VERSION3_GE(2,0,0)
/* set NO_WRAPPERS before headers, use direct internal f()s not wrappers */
#define FIPS_NO_WRAPPERS
+
+ /* Windows orders the FIPS in-core integrity boundary by named linker
+ * sections. Keep XMSS (SP 800-208) code/const inside the boundary,
+ * sorted between sha3 (.fipsA$n) and fips.c (.fipsA$o). */
+ #ifdef USE_WINDOWS_API
+ #pragma code_seg(".fipsA$nf")
+ #pragma const_seg(".fipsB$nf")
+ #endif
#endif
#include
#include
diff --git a/wolfcrypt/src/wc_xmss_impl.c b/wolfcrypt/src/wc_xmss_impl.c
index d1598c4d372..814495130fe 100644
--- a/wolfcrypt/src/wc_xmss_impl.c
+++ b/wolfcrypt/src/wc_xmss_impl.c
@@ -31,6 +31,15 @@
#include
+#if FIPS_VERSION3_GE(2,0,0)
+ /* Keep this XMSS (SP 800-208) implementation's code/const inside the FIPS
+ * in-core integrity boundary (Windows orders it by named sections). */
+ #ifdef USE_WINDOWS_API
+ #pragma code_seg(".fipsA$ng")
+ #pragma const_seg(".fipsB$ng")
+ #endif
+#endif
+
#include
#include
diff --git a/wolfcrypt/test/test.c b/wolfcrypt/test/test.c
index e95bba5ae5e..75d42847e4c 100644
--- a/wolfcrypt/test/test.c
+++ b/wolfcrypt/test/test.c
@@ -55884,6 +55884,142 @@ static wc_test_ret_t mldsa_param_test(int param, WC_RNG* rng)
#endif
return ret;
}
+
+#if !defined(WOLFSSL_DILITHIUM_NO_SIGN) && \
+ !defined(WOLFSSL_DILITHIUM_NO_VERIFY)
+/* Negative test: HashML-DSA must reject pre-hash algorithms whose collision
+ * resistance is below the parameter set's claimed security strength.
+ *
+ * Per FIPS 204 sec. 5.4, Table 4 (Approved PH algorithms for HashML-DSA):
+ * ML-DSA-44 (128-bit security): SHA2-256, SHA2-384, SHA2-512, SHA2-512/256,
+ * SHA3-256, SHA3-384, SHA3-512,
+ * SHAKE-128, SHAKE-256
+ * ML-DSA-65 (192-bit security): SHA2-384, SHA2-512, SHA3-384, SHA3-512,
+ * SHAKE-256
+ * ML-DSA-87 (256-bit security): SHA2-512, SHA3-512, SHAKE-256
+ *
+ * This test attempts sigGen and sigVer with disallowed (paramSet, hash) pairs
+ * and asserts both reject the call (non-zero return). Before the in-module
+ * hash-vs-paramSet check exists, wc_dilithium_sign_ctx_hash and
+ * wc_dilithium_verify_ctx_hash happily proceed with any compiled-in hash,
+ * so this test is expected to FAIL until the check is added. */
+static wc_test_ret_t mldsa_hash_paramset_rejection_test(WC_RNG* rng)
+{
+ wc_test_ret_t ret = 0;
+ int i;
+#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_NO_MALLOC)
+ dilithium_key* key = NULL;
+ byte* sig = NULL;
+#else
+ dilithium_key key[1];
+ byte sig[DILITHIUM_MAX_SIG_SIZE];
+#endif
+ word32 sigLen;
+ int verified;
+
+ /* Fixed-content digests; for a rejection test the bytes don't matter,
+ * only the (paramSet, hashAlg, hashLen) tuple. Sizes match each hash's
+ * digest length so the existing length sanity check inside
+ * wc_dilithium_*_ctx_hash() does NOT short-circuit before reaching the
+ * hash-vs-paramSet gate we are validating here. */
+ static const byte hash32[32] = { /* SHA-256 digest size */
+ 0xBA,0x78,0x16,0xBF,0x8F,0x01,0xCF,0xEA,
+ 0x41,0x41,0x40,0xDE,0x5D,0xAE,0x22,0x23,
+ 0xB0,0x03,0x61,0xA3,0x96,0x17,0x7A,0x9C,
+ 0xB4,0x10,0xFF,0x61,0xF2,0x00,0x15,0xAD
+ };
+ static const byte hash48[48] = { /* SHA-384 digest size */
+ 0xCB,0x00,0x75,0x3F,0x45,0xA3,0x5E,0x8B,
+ 0xB5,0xA0,0x3D,0x69,0x9A,0xC6,0x50,0x07,
+ 0x27,0x2C,0x32,0xAB,0x0E,0xDE,0xD1,0x63,
+ 0x1A,0x8B,0x60,0x5A,0x43,0xFF,0x5B,0xED,
+ 0x80,0x86,0x07,0x2B,0xA1,0xE7,0xCC,0x23,
+ 0x58,0xBA,0xEC,0xA1,0x34,0xC8,0x25,0xA7
+ };
+
+ struct {
+ int level;
+ int hashAlg;
+ const byte* hash;
+ word32 hashLen;
+ } forbidden[] = {
+ /* ML-DSA-65 needs >=192-bit collision strength; SHA-256 = 128-bit. */
+ { WC_ML_DSA_65, WC_HASH_TYPE_SHA256, hash32, 32 },
+ /* ML-DSA-87 needs >=256-bit collision strength; SHA-384 = 192-bit. */
+ { WC_ML_DSA_87, WC_HASH_TYPE_SHA384, hash48, 48 }
+ };
+
+#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_NO_MALLOC)
+ key = (dilithium_key*)XMALLOC(sizeof(*key), HEAP_HINT,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ sig = (byte*)XMALLOC(DILITHIUM_MAX_SIG_SIZE, HEAP_HINT,
+ DYNAMIC_TYPE_TMP_BUFFER);
+ if ((key == NULL) || (sig == NULL)) {
+ ERROR_OUT(WC_TEST_RET_ENC_ERRNO, neg_out);
+ }
+#endif
+ XMEMSET(sig, 0, DILITHIUM_MAX_SIG_SIZE);
+
+ for (i = 0; i < (int)(sizeof(forbidden) / sizeof(forbidden[0])); i++) {
+ #ifdef WOLFSSL_NO_ML_DSA_65
+ if (forbidden[i].level == WC_ML_DSA_65) continue;
+ #endif
+ #ifdef WOLFSSL_NO_ML_DSA_87
+ if (forbidden[i].level == WC_ML_DSA_87) continue;
+ #endif
+
+ ret = wc_dilithium_init_ex(key, NULL, devId);
+ if (ret != 0) {
+ ERROR_OUT(WC_TEST_RET_ENC_EC(ret), neg_out);
+ }
+ ret = wc_dilithium_set_level(key, (byte)forbidden[i].level);
+ if (ret != 0) {
+ wc_dilithium_free(key);
+ ERROR_OUT(WC_TEST_RET_ENC_EC(ret), neg_out);
+ }
+ ret = wc_dilithium_make_key(key, rng);
+ if (ret != 0) {
+ wc_dilithium_free(key);
+ ERROR_OUT(WC_TEST_RET_ENC_EC(ret), neg_out);
+ }
+
+ sigLen = (word32)wc_dilithium_sig_size(key);
+
+ /* sigGen with disallowed PH must be REJECTED. */
+ PRIVATE_KEY_UNLOCK();
+ ret = wc_dilithium_sign_ctx_hash(NULL, 0, forbidden[i].hashAlg,
+ forbidden[i].hash, forbidden[i].hashLen, sig, &sigLen, key, rng);
+ PRIVATE_KEY_LOCK();
+ if (ret == 0) {
+ /* Module did NOT reject -- this is the missing-enforcement bug. */
+ wc_dilithium_free(key);
+ ERROR_OUT(WC_TEST_RET_ENC_NC, neg_out);
+ }
+
+ /* sigVer with disallowed PH must ALSO be REJECTED. */
+ verified = -1;
+ sigLen = (word32)wc_dilithium_sig_size(key);
+ ret = wc_dilithium_verify_ctx_hash(sig, sigLen, NULL, 0,
+ forbidden[i].hashAlg, forbidden[i].hash, forbidden[i].hashLen,
+ &verified, key);
+ if (ret == 0) {
+ wc_dilithium_free(key);
+ ERROR_OUT(WC_TEST_RET_ENC_NC, neg_out);
+ }
+
+ wc_dilithium_free(key);
+ ret = 0;
+ }
+
+neg_out:
+#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_NO_MALLOC)
+ if (sig != NULL) XFREE(sig, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+ if (key != NULL) XFREE(key, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+#endif
+ return ret;
+}
+#endif /* !WOLFSSL_DILITHIUM_NO_SIGN && !WOLFSSL_DILITHIUM_NO_VERIFY */
+
#endif
#if defined(WC_MLDSA_CACHE_MATRIX_A) && \
@@ -56327,6 +56463,18 @@ WOLFSSL_TEST_SUBROUTINE wc_test_ret_t mldsa_test(void)
#endif /* (WOLFSSL_MLDSA_PUBLIC_KEY && !WOLFSSL_MLDSA_NO_VERIFY) ||
* (WOLFSSL_MLDSA_PRIVATE_KEY && !WOLFSSL_MLDSA_NO_SIGN) */
+#if !defined(WOLFSSL_MLDSA_NO_MAKE_KEY) && \
+ !defined(WOLFSSL_MLDSA_NO_SIGN) && \
+ !defined(WOLFSSL_MLDSA_NO_VERIFY) && \
+ (!defined(WOLFSSL_NO_ML_DSA_65) || !defined(WOLFSSL_NO_ML_DSA_87))
+ /* FIPS 204 sec. 5.4 -- HashML-DSA must reject pre-hashes weaker than
+ * the parameter set's security level. */
+ ret = mldsa_hash_paramset_rejection_test(&rng);
+ if (ret != 0) {
+ ERROR_OUT(ret, out);
+ }
+#endif
+
#if !defined(WOLFSSL_MLDSA_NO_MAKE_KEY) || \
!defined(WOLFSSL_MLDSA_NO_VERIFY) || \
defined(WOLFSSL_MLDSA_PRIVATE_KEY) || \
@@ -57772,29 +57920,18 @@ static wc_test_ret_t slhdsa_test_param(enum SlhDsaParam param)
ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out);
}
- /* HashSLH-DSA takes the caller's pre-hashed digest as input. */
+ /* HashSLH-DSA takes the caller's pre-hashed digest as input. SHAKE-256
+ * is universally approved by FIPS 205 sec. 10.2.2 Table 9 across all
+ * SLH-DSA-{128,192,256} variants, so use it unconditionally for the
+ * positive round-trip path -- avoids tripping the in-module
+ * hash-vs-paramSet validation gate for higher-security paramSets. */
{
-#ifdef WOLFSSL_SLHDSA_SHA2
- enum wc_HashType phType = SLHDSA_IS_SHA2(param) ?
- WC_HASH_TYPE_SHA256 : WC_HASH_TYPE_SHAKE256;
-#else
enum wc_HashType phType = WC_HASH_TYPE_SHAKE256;
-#endif
byte digest[WC_SHA3_512_DIGEST_SIZE];
- word32 digestLen;
+ word32 digestLen = WC_SHA3_512_DIGEST_SIZE;
-#ifdef WOLFSSL_SLHDSA_SHA2
- if (phType == WC_HASH_TYPE_SHA256) {
- ret = wc_Sha256Hash(msg, (word32)sizeof(msg), digest);
- digestLen = WC_SHA256_DIGEST_SIZE;
- }
- else
-#endif
- {
- ret = wc_Shake256Hash(msg, (word32)sizeof(msg), digest,
- WC_SHA3_512_DIGEST_SIZE);
- digestLen = WC_SHA3_512_DIGEST_SIZE;
- }
+ ret = wc_Shake256Hash(msg, (word32)sizeof(msg), digest,
+ WC_SHA3_512_DIGEST_SIZE);
if (ret != 0) {
ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out);
}
@@ -57813,9 +57950,13 @@ static wc_test_ret_t slhdsa_test_param(enum SlhDsaParam param)
ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out);
}
- /* Additional pre-hash test: SHA-384 exercises a different OID path */
+ /* Additional pre-hash test: SHA-384 exercises a different OID path.
+ * Skip for SLH-DSA-256 because SHA-384 (192-bit collision) is below the
+ * 256-bit security level required by FIPS 205 sec. 10.2.2 Table 9. */
#ifdef WOLFSSL_SHA384
- {
+ /* Skip SHA-384 for SLH-DSA-256: 192-bit collision strength below the
+ * 256-bit security level (FIPS 205 sec. 10.2.2 Table 9). */
+ if (key->params->n != WC_SLHDSA_N_256) {
byte digest384[WC_SHA384_DIGEST_SIZE];
ret = wc_Sha384Hash(msg, (word32)sizeof(msg), digest384);
@@ -57875,6 +58016,98 @@ static wc_test_ret_t slhdsa_test_param(enum SlhDsaParam param)
return ret;
}
+
+/* Negative test: HashSLH-DSA must reject pre-hash algorithms whose collision
+ * resistance is below the parameter set's claimed security strength.
+ *
+ * Per FIPS 205 sec. 10.2.2, Table 9 (Approved PH for HashSLH-DSA):
+ * SLH-DSA-*-128* (128-bit): SHA2-256, SHA2-384, SHA2-512, SHA2-512/256,
+ * SHA3-256, SHA3-384, SHA3-512,
+ * SHAKE-128, SHAKE-256
+ * SLH-DSA-*-192* (192-bit): SHA2-384, SHA2-512, SHA3-384, SHA3-512,
+ * SHAKE-256
+ * SLH-DSA-*-256* (256-bit): SHA2-512, SHA3-512, SHAKE-256
+ *
+ * This test attempts sigGen / sigVer with a disallowed (paramSet, hash) pair
+ * and asserts both reject the call. Before the in-module hash-vs-paramSet
+ * check exists, wc_SlhDsaKey_SignHash / wc_SlhDsaKey_VerifyHash happily
+ * proceed with any compiled-in hash, so this test is expected to FAIL until
+ * the check is added. */
+static wc_test_ret_t slhdsa_hash_paramset_rejection_test(enum SlhDsaParam param)
+{
+ int ret = 0;
+ WC_RNG rng;
+ SlhDsaKey key[1];
+ byte sig[WC_SLHDSA_MAX_SIG_LEN];
+ word32 sigLen;
+ static const byte msg[] = {
+ 0x48,0x65,0x6c,0x6c,0x6f,0x20,0x57,0x6f,
+ 0x72,0x6c,0x64,0x21
+ };
+ byte ctx[1];
+ /* Hash that is BELOW the security level of every 192/256-bit paramSet
+ * tested below. SHA-256 (128-bit collision) is approved only for the
+ * 128-bit SLH-DSA paramSets, so any 192/256-bit paramSet must reject it. */
+ enum wc_HashType badHash = WC_HASH_TYPE_SHA256;
+
+ XMEMSET(&key, 0, sizeof(key));
+
+#ifndef HAVE_FIPS
+ ret = wc_InitRng_ex(&rng, HEAP_HINT, devId);
+#else
+ ret = wc_InitRng(&rng);
+#endif
+ if (ret != 0) return WC_TEST_RET_ENC_EC(ret);
+
+ ret = wc_SlhDsaKey_Init(key, param, NULL, INVALID_DEVID);
+ if (ret != 0) {
+ wc_FreeRng(&rng);
+ return WC_TEST_RET_ENC_EC(ret);
+ }
+
+ ret = wc_SlhDsaKey_MakeKey(key, &rng);
+ if (ret != 0) {
+ wc_SlhDsaKey_Free(key);
+ wc_FreeRng(&rng);
+ return WC_TEST_RET_ENC_EC(ret);
+ }
+
+ /* Only enforce on paramSets above 128-bit security; SHA-256 is approved
+ * for 128-bit so wouldn't be a rejection target there. */
+ if (key->params->n == WC_SLHDSA_N_128) {
+ wc_SlhDsaKey_Free(key);
+ wc_FreeRng(&rng);
+ return 0;
+ }
+
+ /* sigGen with too-weak PH must be REJECTED. */
+ sigLen = WC_SLHDSA_MAX_SIG_LEN;
+ PRIVATE_KEY_UNLOCK();
+ ret = wc_SlhDsaKey_SignHash(key, ctx, 0, msg, (word32)sizeof(msg),
+ badHash, sig, &sigLen, &rng);
+ PRIVATE_KEY_LOCK();
+ if (ret == 0) {
+ /* Module did NOT reject -- this is the missing-enforcement bug. */
+ wc_SlhDsaKey_Free(key);
+ wc_FreeRng(&rng);
+ return WC_TEST_RET_ENC_NC;
+ }
+
+ /* sigVer with too-weak PH must ALSO be REJECTED. */
+ sigLen = WC_SLHDSA_MAX_SIG_LEN;
+ XMEMSET(sig, 0, sigLen);
+ ret = wc_SlhDsaKey_VerifyHash(key, ctx, 0, msg, (word32)sizeof(msg),
+ badHash, sig, sigLen);
+ if (ret == 0) {
+ wc_SlhDsaKey_Free(key);
+ wc_FreeRng(&rng);
+ return WC_TEST_RET_ENC_NC;
+ }
+
+ wc_SlhDsaKey_Free(key);
+ wc_FreeRng(&rng);
+ return 0;
+}
#endif
/* True iff slhdsa_test() actually emits at least one `goto out;` /
@@ -59868,6 +60101,41 @@ wc_test_ret_t slhdsa_test(void)
}
#endif
+ /* FIPS 205 sec. 10.2.2 -- HashSLH-DSA must reject pre-hashes whose
+ * collision strength is below the paramSet's security level. Use any
+ * available 192- or 256-bit paramSet to exercise the rejection. The
+ * 128-bit paramSets allow SHA-256, so they are not useful as targets
+ * here. */
+#ifdef WOLFSSL_SLHDSA_PARAM_192S
+ ret = slhdsa_hash_paramset_rejection_test(SLHDSA_SHAKE192S);
+ if (ret != 0) {
+ wc_test_render_error_message("SLHDSA_SHAKE192S (hash-paramset reject)",
+ 0);
+ goto out;
+ }
+#elif defined(WOLFSSL_SLHDSA_PARAM_256S)
+ ret = slhdsa_hash_paramset_rejection_test(SLHDSA_SHAKE256S);
+ if (ret != 0) {
+ wc_test_render_error_message("SLHDSA_SHAKE256S (hash-paramset reject)",
+ 0);
+ goto out;
+ }
+#elif defined(WOLFSSL_SLHDSA_PARAM_SHA2_192S)
+ ret = slhdsa_hash_paramset_rejection_test(SLHDSA_SHA2_192S);
+ if (ret != 0) {
+ wc_test_render_error_message("SLHDSA_SHA2_192S (hash-paramset reject)",
+ 0);
+ goto out;
+ }
+#elif defined(WOLFSSL_SLHDSA_PARAM_SHA2_256S)
+ ret = slhdsa_hash_paramset_rejection_test(SLHDSA_SHA2_256S);
+ if (ret != 0) {
+ wc_test_render_error_message("SLHDSA_SHA2_256S (hash-paramset reject)",
+ 0);
+ goto out;
+ }
+#endif
+
#endif /* !WOLFSSL_SLHDSA_VERIFY_ONLY */
#if defined(WOLF_PRIVATE_KEY_ID) && \
diff --git a/wolfssl/wolfcrypt/aes.h b/wolfssl/wolfcrypt/aes.h
index e3d7637470d..e5b8b2ded9d 100644
--- a/wolfssl/wolfcrypt/aes.h
+++ b/wolfssl/wolfcrypt/aes.h
@@ -66,8 +66,14 @@ typedef struct Gcm {
#endif
WOLFSSL_LOCAL void GenerateM0(Gcm* gcm);
+/* The two-byte-pointer GMULT signature is the GCM_SMALL form only. Other GCM
+ * table modes (GCM_TABLE / GCM_TABLE_4BIT) use a static GMULT taking a table
+ * argument (byte m[N][16]), so this prototype must be scoped to GCM_SMALL --
+ * otherwise on 32-bit ARM armasm with WOLFSSL_AESGCM_STREAM (which now compiles
+ * the software table GHASH for the streaming path) it conflicts with the
+ * table-mode GMULT. See SP 800-38D AES-GCM GHASH. */
#if !defined(__aarch64__) && defined(WOLFSSL_ARMASM) && \
- !defined(WOLFSSL_ARMASM_NO_HW_CRYPTO)
+ !defined(WOLFSSL_ARMASM_NO_HW_CRYPTO) && defined(GCM_SMALL)
WOLFSSL_LOCAL void GMULT(byte* X, byte* Y);
#endif
WOLFSSL_LOCAL void GHASH(Gcm* gcm, const byte* a, word32 aSz, const byte* c,
diff --git a/wolfssl/wolfcrypt/error-crypt.h b/wolfssl/wolfcrypt/error-crypt.h
index 5b089f118b4..286a8739f44 100644
--- a/wolfssl/wolfcrypt/error-crypt.h
+++ b/wolfssl/wolfcrypt/error-crypt.h
@@ -327,9 +327,17 @@ enum wolfCrypt_ErrorCodes {
ML_DSA_PCT_E = -1016, /* ML-DSA Pairwise Consistency Test failure */
DRBG_SHA512_KAT_FIPS_E = -1017, /* SHA-512 DRBG KAT failure */
SLH_DSA_KAT_FIPS_E = -1018, /* SLH-DSA CAST KAT failure */
-
- WC_SPAN2_LAST_E = -1018, /* Update to indicate last used error code */
- WC_LAST_E = -1018, /* the last code used either here or in
+ SLH_DSA_PCT_E = -1019, /* SLH-DSA Pairwise Consistency Test failure */
+ CMAC_KAT_FIPS_E = -1020, /* AES-CMAC KAT failure (vendor-elected) */
+ SHAKE_KAT_FIPS_E = -1021, /* SHAKE KAT failure (vendor-elected) */
+ DH_PCT_E = -1022, /* DH (FFC) Pairwise Consistency Test
+ * failure (SP 800-56A r3 sec 5.6.2.1.4,
+ * FIPS 140-3 IG 10.3.B) */
+ AES_KW_KAT_FIPS_E = -1023, /* AES-KW KAT failure (vendor-elected,
+ * SP 800-38F sec 6.2 / RFC 3394) */
+
+ WC_SPAN2_LAST_E = -1023, /* Update to indicate last used error code */
+ WC_LAST_E = -1023, /* the last code used either here or in
* error-ssl.h */
WC_SPAN2_MIN_CODE_E = -1999, /* Last usable code in span 2 */
diff --git a/wolfssl/wolfcrypt/fips_test.h b/wolfssl/wolfcrypt/fips_test.h
index de2b506df2c..41467b0ee2a 100644
--- a/wolfssl/wolfcrypt/fips_test.h
+++ b/wolfssl/wolfcrypt/fips_test.h
@@ -31,8 +31,23 @@
extern "C" {
#endif
-/* Added for FIPS v5.3 or later */
-#if defined(FIPS_VERSION_GE) && FIPS_VERSION_GE(5,3)
+/* Added for FIPS v5.3 or later.
+ *
+ * v7.0.0 and later upgrade the in-core integrity HMAC to SHA-512 (with a
+ * 512-bit key) for NSA 2.0 compliance. Customers that must avoid SHA-256
+ * anywhere in the validated module can therefore use the v7 module without
+ * residual SHA-256 integrity material. v5.3 and v6.x retain HMAC-SHA-256.
+ */
+#if defined(FIPS_VERSION_GE) && FIPS_VERSION_GE(7,0)
+ #ifdef WOLFSSL_SHA512
+ #define FIPS_IN_CORE_DIGEST_SIZE 64
+ #define FIPS_IN_CORE_HASH_TYPE WC_SHA512
+ #define FIPS_IN_CORE_KEY_SZ 64
+ #define FIPS_IN_CORE_VERIFY_SZ FIPS_IN_CORE_KEY_SZ
+ #else
+ #error FIPS v7+ integrity test requires WOLFSSL_SHA512
+ #endif
+#elif defined(FIPS_VERSION_GE) && FIPS_VERSION_GE(5,3)
/* Determine FIPS in core hash type and size */
#ifndef NO_SHA256
#define FIPS_IN_CORE_DIGEST_SIZE 32
@@ -62,7 +77,11 @@ enum FipsCastId {
FIPS_CAST_RSA_SIGN_PKCS1v15 = 7,
FIPS_CAST_ECC_CDH = 8,
FIPS_CAST_ECC_PRIMITIVE_Z = 9,
- FIPS_CAST_DH_PRIMITIVE_Z = 10,
+ FIPS_CAST_DH_PRIMITIVE_Z = 10, /* RETIRED (v7+): classic DH dropped
+ * from the FIPS 140-3 v7 PQ module
+ * boundary. Preserved for ABI --
+ * do not reuse this id, no longer
+ * triggered. */
FIPS_CAST_ECDSA = 11,
FIPS_CAST_KDF_TLS12 = 12,
FIPS_CAST_KDF_TLS13 = 13,
@@ -80,7 +99,10 @@ enum FipsCastId {
FIPS_CAST_XMSS = 23,
FIPS_CAST_DRBG_SHA512 = 24,
FIPS_CAST_SLH_DSA = 25,
- FIPS_CAST_COUNT = 26
+ FIPS_CAST_AES_CMAC = 26,
+ FIPS_CAST_SHAKE = 27,
+ FIPS_CAST_AES_KW = 28,
+ FIPS_CAST_COUNT = 29
};
enum FipsCastStateId {
diff --git a/wolfssl/wolfcrypt/random.h b/wolfssl/wolfcrypt/random.h
index 102f05d6b55..3747ea268f3 100644
--- a/wolfssl/wolfcrypt/random.h
+++ b/wolfssl/wolfcrypt/random.h
@@ -57,8 +57,12 @@
#define DRBG_SEED_LEN (440/8)
#endif
+/* Size of the DRBG seed (SHA-512) */
#ifdef WOLFSSL_DRBG_SHA512
- #define DRBG_SHA512_SEED_LEN (888/8) /* 111 bytes per SP 800-90A Table 2 */
+ #ifndef DRBG_SHA512_SEED_LEN
+ #define DRBG_SHA512_SEED_LEN (888/8) /* 111 bytes per SP 800-90A
+ * Table 2 */
+ #endif
#endif
@@ -212,12 +216,20 @@ struct OS_Seed {
*/
#define ENTROPY_SCALE_FACTOR (512)
#elif defined(HAVE_INTEL_RDSEED) || defined(HAVE_INTEL_RDRAND)
- /* The value of 2 applies to Intel's RDSEED which provides about
- * 0.5 bits minimum of entropy per bit. The value of 4 gives a
- * conservative margin for FIPS. */
+ /* Intel RDSEED nominally provides about 0.5 bits min entropy per
+ * bit (NIST CMVP cert3389 PUD). In FIPS mode we previously used
+ * ENTROPY_SCALE_FACTOR=8 (256-byte seed) on Intel and 512 (16384-
+ * byte seed) on AMD, asymmetric per-vendor. As of v7 we adopt the
+ * AMD worst-case scale of 512 on Intel too: the AMD "Tyzen V1xxxx"
+ * PUD Table 3 documents 0.656040 bits per 128-bit block as the
+ * absolute floor across the entire CMVP-validated AMD family, and
+ * we use that same worst-case oversampling on Intel rather than
+ * trusting the higher Intel PUD claim, so a single seeding budget
+ * covers any x86 OE we deploy on. Non-FIPS Intel builds keep the
+ * lighter scale=2 (Intel-PUD-derived) for performance. */
#if defined(HAVE_FIPS) && defined(HAVE_FIPS_VERSION) && \
(HAVE_FIPS_VERSION >= 2)
- #define ENTROPY_SCALE_FACTOR (2*4)
+ #define ENTROPY_SCALE_FACTOR (512)
#else
/* Not FIPS, but Intel RDSEED, only double. */
#define ENTROPY_SCALE_FACTOR (2)
diff --git a/wolfssl/wolfcrypt/settings.h b/wolfssl/wolfcrypt/settings.h
index 9f699145847..adf6dd75338 100644
--- a/wolfssl/wolfcrypt/settings.h
+++ b/wolfssl/wolfcrypt/settings.h
@@ -557,6 +557,17 @@
#endif
/* blinding adds API not available yet in FIPS mode */
#undef WC_RSA_BLINDING
+
+ /* NIST SP 800-38A sec 6.2 specifies CBC operates on plaintext that is
+ * a multiple of the block size; the cipher does not implement padding
+ * (project_aes_no_padding_policy). Force the wc_AesCbcEncrypt /
+ * wc_AesCbcDecrypt block-alignment check on for FIPS builds so a
+ * length not a multiple of WC_AES_BLOCK_SIZE returns BAD_LENGTH_E
+ * rather than silently truncating to the largest aligned prefix in
+ * the underlying implementation. */
+ #ifndef WOLFSSL_AES_CBC_LENGTH_CHECKS
+ #define WOLFSSL_AES_CBC_LENGTH_CHECKS
+ #endif
#endif
/* old FIPS has only AES_BLOCK_SIZE. */
@@ -3998,8 +4009,18 @@
#undef HAVE_PUBLIC_FFDHE
#endif
+ /* LinuxKM lkcapi previously needed a 4-byte minimum AES-GCM
+ * authentication tag for certain kernel-side test vectors. Per
+ * NIST SP 800-38D sec 5.2.1.2 / sec 8.2 a minimum tag length of 96 bits
+ * (12 bytes) provides robust integrity for general-purpose use; FIPS
+ * 140-3 IG C.H reaffirms this 96-bit minimum for Approved-mode AES-GCM.
+ * Gate the 32-bit-tag relaxation on non-FIPS builds only so the
+ * v7.0.0 module's Approved configuration retains the full 96-bit
+ * minimum in all linuxkm and non-linuxkm scenarios. */
+#ifndef HAVE_FIPS
#undef WOLFSSL_MIN_AUTH_TAG_SZ
#define WOLFSSL_MIN_AUTH_TAG_SZ 4
+#endif
#if defined(LINUXKM_LKCAPI_REGISTER) && !defined(WOLFSSL_ASN_INT_LEAD_0_ANY)
/* kernel 5.10 crypto manager tests key(s) that fail unless leading