From 064812cc1521e21f43badda4b98c977a1aca3a9a Mon Sep 17 00:00:00 2001 From: kaleb-himes Date: Mon, 22 Jun 2026 13:41:30 -0600 Subject: [PATCH] Phase 3: Security and FIPS Compliance Audit --- IDE/WIN-SRTP-KDF-140-3/test.vcxproj | 10 + IDE/WIN-SRTP-KDF-140-3/wolfssl-fips.vcxproj | 10 + configure.ac | 48 +- fips-hash.sh | 6 +- linuxkm/Kbuild | 37 +- linuxkm/Makefile | 28 +- linuxkm/linuxkm_memory.c | 14 + linuxkm/linuxkm_memory.h | 8 + linuxkm/linuxkm_wc_port.h | 98 ++-- linuxkm/module_hooks.c | 16 +- linuxkm/pie_redirect_table.c | 80 ++- linuxkm/x86_vector_register_glue.c | 36 +- src/include.am | 65 ++- tests/api/test_aes.c | 13 +- tests/api/test_evp_pkey.c | 4 +- tests/api/test_ossl_rsa.c | 10 +- tests/api/test_slhdsa.c | 8 +- wolfcrypt/benchmark/fips_cast_bench.c | 363 ++++++++++++++ wolfcrypt/benchmark/include.am | 10 + wolfcrypt/src/aes.c | 214 +++++++- wolfcrypt/src/aes_asm.S | 15 +- wolfcrypt/src/aes_xts_asm.S | 512 ++++++++++++++++++++ wolfcrypt/src/cpuid.c | 16 + wolfcrypt/src/dh.c | 32 +- wolfcrypt/src/error.c | 15 + wolfcrypt/src/ge_operations.c | 9 +- wolfcrypt/src/port/arm/armv8-32-sha3-asm.S | 10 + wolfcrypt/src/random.c | 142 +++++- wolfcrypt/src/rsa.c | 24 +- wolfcrypt/src/sha256.c | 30 +- wolfcrypt/src/sha512.c | 17 + wolfcrypt/src/wc_lms.c | 8 + wolfcrypt/src/wc_lms_impl.c | 14 +- wolfcrypt/src/wc_mldsa.c | 120 ++++- wolfcrypt/src/wc_mlkem.c | 123 +++-- wolfcrypt/src/wc_mlkem_poly.c | 9 + wolfcrypt/src/wc_slhdsa.c | 173 ++++++- wolfcrypt/src/wc_xmss.c | 8 + wolfcrypt/src/wc_xmss_impl.c | 9 + wolfcrypt/test/test.c | 310 +++++++++++- wolfssl/wolfcrypt/aes.h | 8 +- wolfssl/wolfcrypt/error-crypt.h | 14 +- wolfssl/wolfcrypt/fips_test.h | 30 +- wolfssl/wolfcrypt/random.h | 22 +- wolfssl/wolfcrypt/settings.h | 21 + 45 files changed, 2529 insertions(+), 240 deletions(-) create mode 100644 wolfcrypt/benchmark/fips_cast_bench.c diff --git a/IDE/WIN-SRTP-KDF-140-3/test.vcxproj b/IDE/WIN-SRTP-KDF-140-3/test.vcxproj index a41ff9ac49f..2429f2fe9bb 100644 --- a/IDE/WIN-SRTP-KDF-140-3/test.vcxproj +++ b/IDE/WIN-SRTP-KDF-140-3/test.vcxproj @@ -162,7 +162,13 @@ true true UseLinkTimeCodeGeneration + false + true @@ -177,6 +183,10 @@ true + + false + true Console ws2_32.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) true diff --git a/IDE/WIN-SRTP-KDF-140-3/wolfssl-fips.vcxproj b/IDE/WIN-SRTP-KDF-140-3/wolfssl-fips.vcxproj index 65bb39fffa2..390b38f0e92 100644 --- a/IDE/WIN-SRTP-KDF-140-3/wolfssl-fips.vcxproj +++ b/IDE/WIN-SRTP-KDF-140-3/wolfssl-fips.vcxproj @@ -314,6 +314,16 @@ + + + + + + + + + diff --git a/configure.ac b/configure.ac index c1d2dd089ef..82af885af65 100644 --- a/configure.ac +++ b/configure.ac @@ -3777,13 +3777,22 @@ then AC_MSG_NOTICE([32bit ARMv4 found]) ;; *) - AM_CPPFLAGS="$AM_CPPFLAGS -mfpu=crypto-neon-fp-armv8 -marm" + # AArch32 ARMv8 crypto-extension asm (armv8-32-*-asm.S: sha256h, + # aese/aesmc, pmull) needs an explicit -march=armv8-a+crypto. The + # ARMv8-A crypto extension is OPTIONAL, so the SHA/AES instructions + # are gated by the "+crypto" arch feature -- NOT enabled by -mfpu + # alone, and NOT by a bare -march=armv8-a (nor -mcpu=cortex-a53 on + # some toolchains). Cross toolchains whose default -mcpu is ARMv7 + # (e.g. Xilinx Vitis cortex-a9) otherwise reject them with + # "selected processor does not support sha256h.32 in ARM mode". + # Mirrors the in-kernel ARM armasm enablement (port/arm/*.S crypto). + AM_CPPFLAGS="$AM_CPPFLAGS -march=armv8-a+crypto -mfpu=crypto-neon-fp-armv8 -marm" # Include options.h AM_CCASFLAGS="$AM_CCASFLAGS -DEXTERNAL_OPTS_OPENVPN" ENABLED_ARMASM_CRYPTO=yes ENABLED_ARMASM_NEON=yes ENABLED_ARM_32=yes - AC_MSG_NOTICE([32bit ARMv8 found, setting mfpu to crypto-neon-fp-armv8]) + AC_MSG_NOTICE([32bit ARMv8 found, setting -march=armv8-a+crypto + mfpu=crypto-neon-fp-armv8]) ;; esac esac @@ -6315,13 +6324,7 @@ AS_CASE([$FIPS_VERSION], -DWC_RSA_NO_PADDING \ -DECC_USER_CURVES \ -DHAVE_ECC384 \ - -DHAVE_ECC521 \ - -DWOLFSSL_VALIDATE_FFC_IMPORT \ - -DHAVE_FFDHE_Q \ - -DHAVE_FFDHE_3072 \ - -DHAVE_FFDHE_4096 \ - -DHAVE_FFDHE_6144 \ - -DHAVE_FFDHE_8192" + -DHAVE_ECC521" # KCAPI API does not support custom k for sign, don't force enable ECC key sizes and don't use seed callback AS_IF([test "x$ENABLED_KCAPI_ECC" = "xno"], @@ -6335,6 +6338,20 @@ AS_CASE([$FIPS_VERSION], -DHAVE_ECC256"]) DEFAULT_MAX_CLASSIC_ASYM_KEY_BITS=8192 + +# Classic DH and DSA are OUT OF SCOPE for the FIPS 140-3 v7 PQ module. +# (FIPS 186-5 retires DSA; v7 boundary keeps only ECDH/ECDSA + PQ KEM/DSA.) +# Hard-error if explicitly enabled; otherwise force off and add NO_DH/NO_DSA. + AS_IF([test "$enable_dh" = "yes"], + [AC_MSG_ERROR([--enable-dh is not supported with --enable-fips=$FIPS_VERSION. Classic finite-field DH is out of scope for the FIPS 140-3 v7 PQ module. Use --enable-fips=v6 if you need DH support.])], + [test "$ENABLED_DH" != "no"], + [ENABLED_DH="no"; enable_dh="no"; AM_CFLAGS="$AM_CFLAGS -DNO_DH"]) + + AS_IF([test "$enable_dsa" = "yes"], + [AC_MSG_ERROR([--enable-dsa is not supported with --enable-fips=$FIPS_VERSION. DSA is retired by FIPS 186-5 and is out of scope for the FIPS 140-3 v7 PQ module. Use --enable-fips=v6 if you need DSA support.])], + [test "$ENABLED_DSA" != "no"], + [ENABLED_DSA="no"; enable_dsa="no"; AM_CFLAGS="$AM_CFLAGS -DNO_DSA"]) + # optimizations section # protocol section @@ -8889,8 +8906,17 @@ then fi if test "x$ENABLED_DH" = "xno" then - ENABLED_DH="yes" - AM_CFLAGS="$AM_CFLAGS -DHAVE_DH" + # Classic DH is out of scope for the FIPS 140-3 v7 PQ module. + # JNI normally auto-enables DH for legacy TLS suites; with FIPS v7+ + # we report and skip the auto-enable rather than silently turning DH + # back on (which would conflict with the boundary). + if test "$FIPS_VERSION" = "v7" || test "$FIPS_VERSION" = "ready" || test "$FIPS_VERSION" = "dev" + then + AC_MSG_NOTICE([JNI enabled but FIPS is $FIPS_VERSION, NOT turning on DH with this module]) + else + ENABLED_DH="yes" + AM_CFLAGS="$AM_CFLAGS -DHAVE_DH" + fi fi if test "x$ENABLED_PSK" = "xno" then diff --git a/fips-hash.sh b/fips-hash.sh index 36f320c0bbd..8f8a1a86317 100755 --- a/fips-hash.sh +++ b/fips-hash.sh @@ -13,7 +13,11 @@ then fi OUT=$(./wolfcrypt/test/testwolfcrypt | sed -n 's/hash = \(.*\)/\1/p') -NEWHASH=$(echo "$OUT" | cut -c1-64) +# FIPS v7.0.0+ uses HMAC-SHA-512 (128 hex chars); older FIPS versions +# use HMAC-SHA-256 (64 hex chars). Take the whole captured hash; the +# static_assert on sizeof(verifyCore) guards against wrong length at +# compile time after this script runs. +NEWHASH=$(echo "$OUT" | head -n1 | tr -d '[:space:]') if test -n "$NEWHASH" then cp wolfcrypt/src/fips_test.c wolfcrypt/src/fips_test.c.bak diff --git a/linuxkm/Kbuild b/linuxkm/Kbuild index fe3f823942f..eb14106f5e8 100644 --- a/linuxkm/Kbuild +++ b/linuxkm/Kbuild @@ -99,6 +99,18 @@ $(LIBWOLFSSL_NAME)-y := $(WOLFSSL_OBJ_FILES) linuxkm/module_hooks.o linuxkm/modu ifeq "$(FIPS_OPTEST)" "1" $(LIBWOLFSSL_NAME)-y += linuxkm/optest-140-3/linuxkm_optest_wrapper.o + # The optest TEST wrapper (#includes test.c / invalid_tests.c) aggregates + # several AES contexts per invalid-input test function. Under WOLFSSL_AESNI + # the Aes struct carries an inline ALIGN16 streamData[5*WC_AES_BLOCK_SIZE] + # (wolfssl/wolfcrypt/aes.h) plus use_aesni, so a handful of those functions + # (aes_{,mac_,ofb_,cfb_,kw_}invalid_data_tests) exceed the conservative i386 + # THREAD_SIZE/4 = 2048 frame *warning*. They build cleanly on x86_64 (whose + # THREAD_SIZE/4 = 4096 already accommodates them) and run in a kernel thread + # well within THREAD_SIZE. Relax the cap to 4096 for the wrapper ONLY: it is + # test/evidence tooling OUTSIDE the FIPS module boundary, so the FIPS module + # objects keep the strict MAX_STACK_FRAME_SIZE. No effect on x86_64 (its + # default is already 4096). + $(obj)/linuxkm/optest-140-3/linuxkm_optest_wrapper.o: ccflags-y += -Wframe-larger-than=4096 endif WOLFSSL_CFLAGS_NO_VECTOR_INSNS := $(CFLAGS_SIMD_DISABLE) $(CFLAGS_FPU_DISABLE) @@ -127,6 +139,10 @@ ifeq "$(ENABLED_LINUXKM_PIE)" "yes" endif endif endif + ifeq ($(KERNEL_ARCH),i386) + NO_PIE_FLAG := 1 + $(info Note: disabling -fPIE on 32-bit x86 -- i386 -fPIE routes every local symbol through the GOT (R_386_GOTOFF), which the wolfCrypt PIE containerization forbids.) + endif endif ifdef NO_PIE_FLAG @@ -217,6 +233,25 @@ $(obj)/wolfcrypt/src/wc_mlkem_asm.o: OBJECT_FILES_NON_STANDARD := y $(obj)/wolfcrypt/src/wc_mldsa_asm.o: asflags-y := $(WOLFSSL_ASFLAGS) $(ASFLAGS_FPU_DISABLE_SIMD_ENABLE) $(obj)/wolfcrypt/src/wc_mldsa_asm.o: OBJECT_FILES_NON_STANDARD := y +# ARM/ARM64 crypto+NEON asm (wolfcrypt/src/port/arm/*.S) needs the crypto/NEON +# -march enabled at assembly time. The wolfSSL ARM asm carries no .arch/.fpu +# directives, and configure leaves ASFLAGS_*_SIMD_ENABLE empty on ARM: the +# userspace build inherits +crypto from the toolchain's default -mcpu (e.g. +# cortex-a72), but the kernel build forces its own baseline -march without it, +# so the AES/SHA/PMULL instructions are rejected ("selected processor does not +# support `aesd ...'"). Supply the right -march here per kernel arch. (The +# wrong-arch port/arm files are #ifdef'd to empty objects, so the flag is a +# no-op for them; OBJECT_FILES_NON_STANDARD silences objtool on the hand asm.) +ifeq ($(CONFIG_ARM64),y) + WOLFSSL_ARM_ASM_MARCH := -march=armv8-a+crypto +else ifeq ($(CONFIG_ARM),y) + WOLFSSL_ARM_ASM_MARCH := -march=armv8-a -mfpu=crypto-neon-fp-armv8 +endif +ifdef WOLFSSL_ARM_ASM_MARCH +$(obj)/wolfcrypt/src/port/arm/%.o: asflags-y := $(WOLFSSL_ASFLAGS) $(WOLFSSL_ARM_ASM_MARCH) +$(obj)/wolfcrypt/src/port/arm/%.o: OBJECT_FILES_NON_STANDARD := y +endif + ifndef READELF READELF := readelf endif @@ -325,7 +360,7 @@ RENAME_PIE_TEXT_AND_DATA_SECTIONS := \ next; \ } \ else if ($$4 == "OBJECT") { \ - if (! ($$7 in wolfcrypt_data_sections)) { \ + if (! ($$7 in wolfcrypt_data_sections) && ! ($$7 in wolfcrypt_text_sections)) { \ if ((other_sections[$$7] == ".printk_index") || \ (($$8 ~ /^_entry\.[0-9]+$$|^kernel_read_file_str$$/) && \ (other_sections[$$7] == ".data.rel.ro.local"))) \ diff --git a/linuxkm/Makefile b/linuxkm/Makefile index 24a867b9356..1edb65bfa91 100644 --- a/linuxkm/Makefile +++ b/linuxkm/Makefile @@ -45,7 +45,17 @@ ifndef SRC_TOP SRC_TOP=$(shell dirname $(MODULE_TOP)) endif -WOLFSSL_CFLAGS=-DHAVE_CONFIG_H -I$(SRC_TOP) -DBUILDING_WOLFSSL $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -Wno-declaration-after-statement -Wno-redundant-decls -DLIBWOLFSSL_GLOBAL_EXTRA_CFLAGS="\" $(KERNEL_EXTRA_CFLAGS)\"" +# -Wno-nested-externs: the Linux kernel's compile-time-assert machinery +# ( _compiletime_assert, reached via the atomic / per-CPU / +# printk-once macros used in linuxkm/x86_vector_register_glue.c) expands to an +# "extern void __compiletime_assert_N(void)" declaration *inside* a function body. +# That is by-design kernel code, but trips wolfSSL's -Wnested-externs -> with +# -Werror it breaks the i386 + AES-NI kernel build (the glue is only compiled when +# WOLFSSL_USE_SAVE_VECTOR_REGISTERS is set, i.e. with a PAA enabled). Suppress it +# for the whole linuxkm build, same as the two kernel-incompatible warnings already +# stripped here. This only silences a diagnostic -> emitted object code (and the +# FIPS in-core hash) is byte-identical on every arch. +WOLFSSL_CFLAGS=-DHAVE_CONFIG_H -I$(SRC_TOP) -DBUILDING_WOLFSSL $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -Wno-declaration-after-statement -Wno-redundant-decls -Wno-nested-externs -DLIBWOLFSSL_GLOBAL_EXTRA_CFLAGS="\" $(KERNEL_EXTRA_CFLAGS)\"" ifdef KERNEL_EXTRA_CFLAGS WOLFSSL_CFLAGS += $(KERNEL_EXTRA_CFLAGS) endif @@ -55,7 +65,13 @@ endif WOLFSSL_ASFLAGS=-DHAVE_CONFIG_H -I$(SRC_TOP) -DBUILDING_WOLFSSL $(AM_CCASFLAGS) $(CCASFLAGS) -WOLFSSL_OBJ_FILES=$(patsubst %.lo, %.o, $(patsubst src/src_libwolfssl_la-%, src/%, $(patsubst src/libwolfssl_la-%, src/%, $(patsubst wolfcrypt/src/src_libwolfssl_la-%, wolfcrypt/src/%, $(src_libwolfssl_la_OBJECTS))))) +# Strip libtool's per-target object prefix (src_libwolfssl_la-) so Kbuild sees the +# real source-derived object names. The innermost patsubst handles the ARM asm +# under wolfcrypt/src/port/arm/ (armv8-*/armv8-32-*/thumb2-*), which the +# directory-specific src/ and wolfcrypt/src/ patsubsts below do NOT match -- +# without it, --enable-armasm kernel builds fail with "No rule to make target +# .../src_libwolfssl_la-armv8-aes-asm.o". +WOLFSSL_OBJ_FILES=$(patsubst %.lo, %.o, $(patsubst src/src_libwolfssl_la-%, src/%, $(patsubst src/libwolfssl_la-%, src/%, $(patsubst wolfcrypt/src/src_libwolfssl_la-%, wolfcrypt/src/%, $(patsubst wolfcrypt/src/port/arm/src_libwolfssl_la-%, wolfcrypt/src/port/arm/%, $(src_libwolfssl_la_OBJECTS)))))) ifeq "$(ENABLED_CRYPT_TESTS)" "yes" WOLFSSL_OBJ_FILES+=wolfcrypt/test/test.o @@ -197,7 +213,7 @@ GENERATE_RELOC_TAB := $(AWK) ' \ next; \ } \ /^0/ { \ - if ($$3 !~ "^(R_X86_.*|R_AARCH64_.*|R_ARM.*)$$") { \ + if ($$3 !~ "^(R_X86_.*|R_386_.*|R_AARCH64_.*|R_ARM.*)$$") { \ print "Unexpected relocation type in " cur_seg ":\n" $$0 >"/dev/stderr"; \ ++bad_relocs; \ } \ @@ -361,12 +377,12 @@ module-update-fips-hash: $(LIBWOLFSSL_NAME).ko readarray -t verifyCore_attrs < <($(READELF) --wide --symbols "$<" | \ sed -E -n 's/^[[:space:]]*[0-9]+: ([0-9a-fA-F]+)[[:space:]]+([0-9]+)[[:space:]]+OBJECT[[:space:]]+[A-Z]+[[:space:]]+[A-Z]+[[:space:]]+'"$${rodata_segment[0]}"'[[:space:]]+verifyCore$$/\1\n\2/p'); \ if [[ $${#verifyCore_attrs[@]} != 2 ]]; then echo ' unexpected verifyCore_attrs.' >&2; exit 1; fi; \ - if [[ "$${verifyCore_attrs[1]}" != "65" ]]; then echo " verifyCore has unexpected length $${verifyCore_attrs[1]}." >&2; exit 1; fi; \ + if [[ "$${verifyCore_attrs[1]}" != "129" ]]; then echo " verifyCore has unexpected length $${verifyCore_attrs[1]}." >&2; exit 1; fi; \ verifyCore_offset=$$((0x$${rodata_segment[1]} + 0x$${verifyCore_attrs[0]})); \ - current_verifyCore=$$(dd bs=1 if="$<" skip=$$verifyCore_offset count=64 status=none); \ + current_verifyCore=$$(dd bs=1 if="$<" skip=$$verifyCore_offset count=128 status=none); \ if [[ ! "$$current_verifyCore" =~ [0-9a-fA-F]{64} ]]; then echo " verifyCore at offset $$verifyCore_offset has unexpected value." >&2; exit 1; fi; \ if [[ '$(FIPS_HASH)' == "$$current_verifyCore" ]]; then echo ' Supplied FIPS_HASH matches existing verifyCore -- no update needed.'; exit 0; fi; \ - echo -n '$(FIPS_HASH)' | dd bs=1 conv=notrunc of="$<" seek=$$verifyCore_offset count=64 status=none && \ + echo -n '$(FIPS_HASH)' | dd bs=1 conv=notrunc of="$<" seek=$$verifyCore_offset count=128 status=none && \ echo " FIPS verifyCore updated successfully." && \ if [[ -f '$(LIBWOLFSSL_NAME).ko.signed' ]]; then $(MAKE) $(QFLAG) --no-print-directory --no-silent -C . '$(LIBWOLFSSL_NAME).ko.signed'; fi diff --git a/linuxkm/linuxkm_memory.c b/linuxkm/linuxkm_memory.c index 32aa241404a..15cb32ad918 100644 --- a/linuxkm/linuxkm_memory.c +++ b/linuxkm/linuxkm_memory.c @@ -52,6 +52,8 @@ static const struct reloc_layout_ent { [WC_R_X86_64_64] = { "R_X86_64_64", ~0UL, 64, .is_signed = 0, .is_relative = 0 }, [WC_R_X86_64_PC32] = { "R_X86_64_PC32", ~0UL, 32, .is_signed = 1, .is_relative = 1 }, [WC_R_X86_64_PLT32] = { "R_X86_64_PLT32", ~0UL, 32, .is_signed = 1, .is_relative = 1 }, + [WC_R_386_32] = { "R_386_32", ~0UL, 32, .is_signed = 0, .is_relative = 0 }, + [WC_R_386_PC32] = { "R_386_PC32", ~0UL, 32, .is_signed = 1, .is_relative = 1 }, [WC_R_AARCH64_ABS32] = { "R_AARCH64_ABS32", ~0UL, 32, .is_signed = 1, .is_relative = 0, .is_pages = 0, .is_pair_lo = 0, .is_pair_hi = 0 }, [WC_R_AARCH64_ABS64] = { "R_AARCH64_ABS64", ~0UL, 64, .is_signed = 1, .is_relative = 0, .is_pages = 0, .is_pair_lo = 0, .is_pair_hi = 0 }, [WC_R_AARCH64_ADD_ABS_LO12_NC] = { "R_AARCH64_ADD_ABS_LO12_NC", 0b00000000001111111111110000000000, 32, .is_signed = 0, .is_relative = 0, .is_pages = 0, .is_pair_lo = 1, .is_pair_hi = 0 }, @@ -64,6 +66,10 @@ static const struct reloc_layout_ent { [WC_R_AARCH64_LDST64_ABS_LO12_NC] = { "R_AARCH64_LDST64_ABS_LO12_NC", 0b00000000001111111111110000000000, 32, .is_signed = 0, .is_relative = 0, .is_pages = 0, .is_pair_lo = 1, .is_pair_hi = 0 }, [WC_R_AARCH64_PREL32] = { "R_AARCH64_PREL32", ~0UL, 32, .is_signed = 1, .is_relative = 1, .is_pages = 0, .is_pair_lo = 0, .is_pair_hi = 0 }, [WC_R_ARM_ABS32] = { "R_ARM_ABS32", ~0UL, 32, .is_signed = 0, .is_relative = 0, .is_pages = 0, .is_pair_lo = 0, .is_pair_hi = 0 }, + /* ARM-mode BL/B: signed 24-bit word offset in bits [23:0] (cf. AARCH64_CALL26's + * 26-bit field). Emitted by the arm32 ARM-mode (non-Thumb) kernel module build. */ + [WC_R_ARM_CALL] = { "R_ARM_CALL", 0b00000000111111111111111111111111, 32, .is_signed = 1, .is_relative = 1, .is_pages = 0, .is_pair_lo = 0, .is_pair_hi = 0 }, + [WC_R_ARM_JUMP24] = { "R_ARM_JUMP24", 0b00000000111111111111111111111111, 32, .is_signed = 1, .is_relative = 1, .is_pages = 0, .is_pair_lo = 0, .is_pair_hi = 0 }, [WC_R_ARM_PREL31] = { "R_ARM_PREL31", 0b01111111111111111111111111111111, 32, .is_signed = 1, .is_relative = 1, .is_pages = 0, .is_pair_lo = 0, .is_pair_hi = 0 }, [WC_R_ARM_REL32] = { "R_ARM_REL32", ~0UL, 32, .is_signed = 1, .is_relative = 1, .is_pages = 0, .is_pair_lo = 0, .is_pair_hi = 0 }, [WC_R_ARM_THM_CALL] = { "R_ARM_THM_CALL", 0b00000111111111110010111111111111, 32, .is_signed = 1, .is_relative = 1, .is_pages = 0, .is_pair_lo = 0, .is_pair_hi = 0 }, @@ -363,6 +369,12 @@ ssize_t wc_reloc_normalize_segment( case WC_R_X86_64_32: case WC_R_X86_64_32S: case WC_R_X86_64_64: + /* i386 shares the x86_64 normalization: R_386_32 is absolute + * (is_relative=0), R_386_PC32 is PC-relative (is_relative=1). The + * math below is driven by layout->is_relative/is_signed and is + * width-correct via uintptr_t (32-bit on i386). */ + case WC_R_386_32: + case WC_R_386_PC32: if (dest_seg != WC_R_SEG_OTHER) { #ifdef DEBUG_LINUXKM_PIE_SUPPORT @@ -410,6 +422,8 @@ ssize_t wc_reloc_normalize_segment( break; case WC_R_ARM_ABS32: + case WC_R_ARM_CALL: + case WC_R_ARM_JUMP24: case WC_R_ARM_PREL31: case WC_R_ARM_REL32: case WC_R_ARM_THM_CALL: diff --git a/linuxkm/linuxkm_memory.h b/linuxkm/linuxkm_memory.h index 76e681da805..d5111613d28 100644 --- a/linuxkm/linuxkm_memory.h +++ b/linuxkm/linuxkm_memory.h @@ -40,6 +40,12 @@ enum wc_reloc_type { WC_R_X86_64_64, WC_R_X86_64_PC32, WC_R_X86_64_PLT32, + /* 32-bit x86 (i386). With NO_PIE_FLAG the wolfCrypt container emits only + * R_386_32 (absolute) and R_386_PC32 (PC-relative); these are semantically + * identical to R_X86_64_32 / R_X86_64_PC32 and share their canonicalization + * case below. */ + WC_R_386_32, + WC_R_386_PC32, WC_R_AARCH64_ABS32, WC_R_AARCH64_ABS64, WC_R_AARCH64_ADD_ABS_LO12_NC, @@ -52,6 +58,8 @@ enum wc_reloc_type { WC_R_AARCH64_LDST64_ABS_LO12_NC, WC_R_AARCH64_PREL32, WC_R_ARM_ABS32, + WC_R_ARM_CALL, + WC_R_ARM_JUMP24, WC_R_ARM_PREL31, WC_R_ARM_REL32, WC_R_ARM_THM_CALL, diff --git a/linuxkm/linuxkm_wc_port.h b/linuxkm/linuxkm_wc_port.h index 2a47722ad02..6f1f1881fef 100644 --- a/linuxkm/linuxkm_wc_port.h +++ b/linuxkm/linuxkm_wc_port.h @@ -682,8 +682,12 @@ #define WOLFSSL_USE_SAVE_VECTOR_REGISTERS #endif + /* x86 (kernel_fpu_*) and ARM/ARM64 (kernel_neon_*) share the same + * arch-neutral save/restore tracker in x86_vector_register_glue.c; the glue + * functions keep their historical wc_*_x86 names on all three arches (they + * are outside-boundary glue reached via the PIE redirect table). */ #if defined(WOLFSSL_USE_SAVE_VECTOR_REGISTERS) && \ - defined(CONFIG_X86) + (defined(CONFIG_X86) || defined(CONFIG_ARM) || defined(CONFIG_ARM64)) extern __must_check int allocate_wolfcrypt_linuxkm_fpu_states(void); extern void free_wolfcrypt_linuxkm_fpu_states(void); @@ -691,18 +695,23 @@ WOLFSSL_API __must_check int wc_save_vector_registers_x86(enum wc_svr_flags flags); WOLFSSL_API void wc_restore_vector_registers_x86(enum wc_svr_flags flags); - #if LINUX_VERSION_CODE < KERNEL_VERSION(4, 0, 0) - #include - #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 13, 0) - /* added by a62b01cd6c */ - #include - #endif - #else - #include - #if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0) - /* added by 266d051601 */ - #include + #ifdef CONFIG_X86 + #if LINUX_VERSION_CODE < KERNEL_VERSION(4, 0, 0) + #include + #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 13, 0) + /* added by a62b01cd6c */ + #include + #endif + #else + #include + #if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0) + /* added by 266d051601 */ + #include + #endif #endif + #else /* CONFIG_ARM || CONFIG_ARM64 */ + #include /* may_use_simd() */ + #include /* kernel_neon_begin() / kernel_neon_end() */ #endif #ifndef CAN_SAVE_VECTOR_REGISTERS #ifdef DEBUG_VECTOR_REGISTER_ACCESS_FUZZING @@ -742,42 +751,6 @@ #define REENABLE_VECTOR_REGISTERS() wc_restore_vector_registers_x86(WC_SVR_FLAG_INHIBIT) #endif - #elif defined(WOLFSSL_USE_SAVE_VECTOR_REGISTERS) && (defined(CONFIG_ARM) || defined(CONFIG_ARM64)) - - #error kernel module ARM SIMD is not yet tested or usable. - - #include - - static WARN_UNUSED_RESULT inline int save_vector_registers_arm(void) - { - preempt_disable(); - if (! may_use_simd()) { - preempt_enable(); - return BAD_STATE_E; - } else { - fpsimd_preserve_current_state(); - return 0; - } - } - static inline void restore_vector_registers_arm(void) - { - fpsimd_restore_current_state(); - preempt_enable(); - } - - #ifndef SAVE_VECTOR_REGISTERS - #define SAVE_VECTOR_REGISTERS(fail_clause) { int _svr_ret = save_vector_registers_arm(); if (_svr_ret != 0) { fail_clause } } - #endif - #ifndef SAVE_VECTOR_REGISTERS2 - #define SAVE_VECTOR_REGISTERS2() save_vector_registers_arm() - #endif - #ifndef CAN_SAVE_VECTOR_REGISTERS - #define CAN_SAVE_VECTOR_REGISTERS() can_save_vector_registers_arm() - #endif - #ifndef RESTORE_VECTOR_REGISTERS - #define RESTORE_VECTOR_REGISTERS() restore_vector_registers_arm() - #endif - #elif defined(WOLFSSL_USE_SAVE_VECTOR_REGISTERS) #error WOLFSSL_USE_SAVE_VECTOR_REGISTERS is set for an unimplemented architecture. #endif /* WOLFSSL_USE_SAVE_VECTOR_REGISTERS */ @@ -958,6 +931,22 @@ extern int memcmp(const void *s1, const void *s2, size_t n); #endif +#ifdef CONFIG_X86_32 + /* arch/x86/include/asm/string_32.h #defines memcpy/memcmp/memset as + * __builtin_* object-like macros (x86_64's string_64.h declares them as + * plain functions, so this does not arise on K2). Left active, those + * macros expand inside the PIE redirect-table member declarations below -- + * "typeof(memcmp) *memcmp;" becomes "... *__builtin_memcmp;" -- so the + * table loses its memcmp/memcpy/memset members and the downstream + * WC_PIE_INDIRECT_SYM(memcmp) lookups fail to compile. #undef the macros + * here, before the struct; string_32.h still declares the underlying + * functions, so typeof() resolves and the members and redirects use the + * canonical names. Mirrors the CONFIG_MIPS handling just above. */ + #undef memcpy + #undef memcmp + #undef memset +#endif + struct wolfssl_linuxkm_pie_redirect_table { #ifdef HAVE_FIPS typeof(wc_linuxkm_normalize_relocations) *wc_linuxkm_normalize_relocations; @@ -1091,13 +1080,13 @@ #ifdef WOLFSSL_USE_SAVE_VECTOR_REGISTERS - #ifdef CONFIG_X86 + #if defined(CONFIG_X86) || defined(CONFIG_ARM) || defined(CONFIG_ARM64) typeof(allocate_wolfcrypt_linuxkm_fpu_states) *allocate_wolfcrypt_linuxkm_fpu_states; typeof(wc_can_save_vector_registers_x86) *wc_can_save_vector_registers_x86; typeof(free_wolfcrypt_linuxkm_fpu_states) *free_wolfcrypt_linuxkm_fpu_states; typeof(wc_restore_vector_registers_x86) *wc_restore_vector_registers_x86; typeof(wc_save_vector_registers_x86) *wc_save_vector_registers_x86; - #else /* !CONFIG_X86 */ + #else #error WOLFSSL_USE_SAVE_VECTOR_REGISTERS is set for an unimplemented architecture. #endif /* arch */ @@ -1442,7 +1431,8 @@ #undef get_current #define get_current WC_PIE_INDIRECT_SYM(get_current) - #if defined(WOLFSSL_USE_SAVE_VECTOR_REGISTERS) && defined(CONFIG_X86) + #if defined(WOLFSSL_USE_SAVE_VECTOR_REGISTERS) && \ + (defined(CONFIG_X86) || defined(CONFIG_ARM) || defined(CONFIG_ARM64)) #define allocate_wolfcrypt_linuxkm_fpu_states WC_PIE_INDIRECT_SYM(allocate_wolfcrypt_linuxkm_fpu_states) #define wc_can_save_vector_registers_x86 WC_PIE_INDIRECT_SYM(wc_can_save_vector_registers_x86) #define free_wolfcrypt_linuxkm_fpu_states WC_PIE_INDIRECT_SYM(free_wolfcrypt_linuxkm_fpu_states) @@ -1751,7 +1741,7 @@ #if !defined(BUILDING_WOLFSSL) /* some caller code needs these. */ #if defined(WOLFSSL_USE_SAVE_VECTOR_REGISTERS) - #if defined(CONFIG_X86) + #if defined(CONFIG_X86) || defined(CONFIG_ARM) || defined(CONFIG_ARM64) WOLFSSL_API __must_check int wc_can_save_vector_registers_x86(void); WOLFSSL_API __must_check int wc_save_vector_registers_x86(enum wc_svr_flags flags); WOLFSSL_API void wc_restore_vector_registers_x86(enum wc_svr_flags flags); @@ -1761,9 +1751,9 @@ #ifndef REENABLE_VECTOR_REGISTERS #define REENABLE_VECTOR_REGISTERS() wc_restore_vector_registers_x86(WC_SVR_FLAG_INHIBIT) #endif - #else /* !CONFIG_X86 */ + #else #error WOLFSSL_USE_SAVE_VECTOR_REGISTERS is set for an unimplemented architecture. - #endif /* !CONFIG_X86 */ + #endif #endif /* WOLFSSL_USE_SAVE_VECTOR_REGISTERS */ #ifdef WC_LINUXKM_USE_HEAP_WRAPPERS WOLFSSL_API extern void *wc_linuxkm_malloc(size_t size); diff --git a/linuxkm/module_hooks.c b/linuxkm/module_hooks.c index 66b953048e4..9a11b7007a4 100644 --- a/linuxkm/module_hooks.c +++ b/linuxkm/module_hooks.c @@ -527,7 +527,9 @@ int wc_linuxkm_GenerateSeed_IntelRD(struct OS_Seed* os, byte* output, word32 sz) #endif /* WC_LINUXKM_RDSEED_IN_GLUE_LAYER */ -#if defined(WOLFSSL_USE_SAVE_VECTOR_REGISTERS) && defined(CONFIG_X86) +#if defined(WOLFSSL_USE_SAVE_VECTOR_REGISTERS) && \ + (defined(CONFIG_X86) || defined(CONFIG_ARM) || defined(CONFIG_ARM64)) + /* arch-generic save/restore tracker (kernel_fpu_* on x86, kernel_neon_* on ARM) */ #include "linuxkm/x86_vector_register_glue.c" #endif @@ -1516,7 +1518,8 @@ static int set_up_wolfssl_linuxkm_pie_redirect_table(void) { wolfssl_linuxkm_pie_redirect_table.get_current = my_get_current_thread; -#if defined(WOLFSSL_USE_SAVE_VECTOR_REGISTERS) && defined(CONFIG_X86) +#if defined(WOLFSSL_USE_SAVE_VECTOR_REGISTERS) && \ + (defined(CONFIG_X86) || defined(CONFIG_ARM) || defined(CONFIG_ARM64)) wolfssl_linuxkm_pie_redirect_table.allocate_wolfcrypt_linuxkm_fpu_states = allocate_wolfcrypt_linuxkm_fpu_states; wolfssl_linuxkm_pie_redirect_table.wc_can_save_vector_registers_x86 = wc_can_save_vector_registers_x86; wolfssl_linuxkm_pie_redirect_table.free_wolfcrypt_linuxkm_fpu_states = free_wolfcrypt_linuxkm_fpu_states; @@ -2041,7 +2044,12 @@ static ssize_t FIPS_optest_trig_handler(struct kobject *kobj, struct kobj_attrib int ret; int argc; const char *argv[3]; - char code_buf[5]; + /* Holds the textual error code written to the sysfs node, plus a NUL. + * Must accommodate the v7.0.0 module's 5-character codes (e.g. "-1015" + * ML_KEM_PCT_E, "-1016" ML_DSA_PCT_E, "-1017" DRBG_SHA512_KAT_FIPS_E); + * the earlier [5] sizing silently rejected them via the length guard + * below. Sized with headroom for any future wider code. */ + char code_buf[8]; size_t corrected_count; int i; @@ -2057,7 +2065,7 @@ static ssize_t FIPS_optest_trig_handler(struct kobject *kobj, struct kobj_attrib corrected_count = count - 1; else corrected_count = count; - if ((corrected_count < 1) || (corrected_count > 4)) + if ((corrected_count < 1) || (corrected_count > (sizeof(code_buf) - 1))) return -EINVAL; XMEMCPY(code_buf, buf, corrected_count); code_buf[corrected_count] = 0; diff --git a/linuxkm/pie_redirect_table.c b/linuxkm/pie_redirect_table.c index 03be2e04fa0..657aa4c96b6 100644 --- a/linuxkm/pie_redirect_table.c +++ b/linuxkm/pie_redirect_table.c @@ -53,8 +53,18 @@ const struct wolfssl_linuxkm_pie_redirect_table return &wolfssl_linuxkm_pie_redirect_table; } -/* placeholder implementations for missing functions. */ -#if defined(CONFIG_MIPS) +/* placeholder implementations for missing functions. + * + * ARM/ARM64 need these for the same reason MIPS does: with vector codegen + * enabled (the --enable-armasm build does not pass -mgeneral-regs-only to the + * wolfCrypt C files), gcc auto-generates raw memcpy/memset libcalls for + * aggregate copies inside the position-independent FIPS container (e.g. in + * asn.c / fips_test.c). Source-level XMEMCPY/XMEMSET are redirected to the + * kernel's fast implementations via WC_PIE_INDIRECT_SYM, but compiler-emitted + * libcalls bypass that #define, so the container must define its own to stay + * self-contained (the in-core integrity check forbids ANY undefined symbol). + * (The pure-C C1 build does not auto-vectorize and so never references these.) */ +#if defined(CONFIG_MIPS) || defined(CONFIG_ARM) || defined(CONFIG_ARM64) #undef memcpy void *memcpy(void *dest, const void *src, size_t n) { char *dest_i = (char *)dest; @@ -74,3 +84,69 @@ const struct wolfssl_linuxkm_pie_redirect_table return dest; } #endif + +#if defined(CONFIG_ARM) + /* 32-bit ARM has no integer-divide instruction in the baseline ISA, so gcc + * emits calls to these EABI runtime helpers for '/' and '%'. The kernel + * exports them (arch/arm/lib/lib1funcs.S), but the self-contained PIE FIPS + * container may not reference external symbols (the in-core integrity check + * forbids ANY undefined symbol), so provide them here. Restoring (bit-at-a- + * time) division -- correctness over speed; crypto-path divisions are on + * small sizes/indices. Per the EABI, __aeabi_*idivmod return the quotient + * in r0 and the remainder in r1, i.e. a little-endian 64-bit value with the + * quotient in the low word and the remainder in the high word. */ + unsigned int __aeabi_uidiv(unsigned int n, unsigned int d); + unsigned int __aeabi_uidiv(unsigned int n, unsigned int d) { + unsigned int q = 0, r = 0; + int i; + if (d == 0) + return ~0u; + for (i = 31; i >= 0; i--) { + r = (r << 1) | ((n >> i) & 1u); + if (r >= d) { + r -= d; + q |= (1u << i); + } + } + return q; + } + + unsigned long long __aeabi_uidivmod(unsigned int n, unsigned int d); + unsigned long long __aeabi_uidivmod(unsigned int n, unsigned int d) { + unsigned int q = 0, r = 0; + int i; + if (d == 0) + return (unsigned long long)n << 32; /* quot=0, rem=n */ + for (i = 31; i >= 0; i--) { + r = (r << 1) | ((n >> i) & 1u); + if (r >= d) { + r -= d; + q |= (1u << i); + } + } + return ((unsigned long long)r << 32) | q; + } + + int __aeabi_idiv(int n, int d); + int __aeabi_idiv(int n, int d) { + int neg = (n < 0) ^ (d < 0); + unsigned int un = (n < 0) ? (unsigned int)(-(long)n) : (unsigned int)n; + unsigned int ud = (d < 0) ? (unsigned int)(-(long)d) : (unsigned int)d; + unsigned int uq = __aeabi_uidiv(un, ud); + return neg ? -(int)uq : (int)uq; + } + + unsigned long long __aeabi_idivmod(int n, int d); + unsigned long long __aeabi_idivmod(int n, int d) { + int nneg = (n < 0); + int qneg = (n < 0) ^ (d < 0); + unsigned int un = nneg ? (unsigned int)(-(long)n) : (unsigned int)n; + unsigned int ud = (d < 0) ? (unsigned int)(-(long)d) : (unsigned int)d; + unsigned long long um = __aeabi_uidivmod(un, ud); + unsigned int uq = (unsigned int)um; + unsigned int ur = (unsigned int)(um >> 32); + int q = qneg ? -(int)uq : (int)uq; + int r = nneg ? -(int)ur : (int)ur; + return ((unsigned long long)(unsigned int)r << 32) | (unsigned int)q; + } +#endif /* CONFIG_ARM */ diff --git a/linuxkm/x86_vector_register_glue.c b/linuxkm/x86_vector_register_glue.c index 107c7e11274..af4b9b6d598 100644 --- a/linuxkm/x86_vector_register_glue.c +++ b/linuxkm/x86_vector_register_glue.c @@ -23,8 +23,29 @@ /* included by linuxkm/module_hooks.c */ #ifndef WC_SKIP_INCLUDED_C_FILES -#if !defined(WOLFSSL_USE_SAVE_VECTOR_REGISTERS) || !defined(CONFIG_X86) - #error x86_vector_register_glue.c included in non-vectorized/non-x86 project. +#if !defined(WOLFSSL_USE_SAVE_VECTOR_REGISTERS) || \ + !(defined(CONFIG_X86) || defined(CONFIG_ARM) || defined(CONFIG_ARM64)) + #error vector register glue included in non-vectorized or unsupported-arch project. +#endif + +/* The per-CPU vector-register save/restore tracker below is architecture-neutral + * except for the single kernel call that claims/releases the SIMD/FP unit: + * x86 -> kernel_fpu_begin() / kernel_fpu_end() (, via the + * include in + * linuxkm_wc_port.h) + * ARM/ARM64 -> kernel_neon_begin() / kernel_neon_end() () + * Both APIs obey the same context rules the tracker already enforces (may_use_simd(), + * hard-IRQ/NMI rejection, preempt/bh/migration disable). The functions retain their + * historical wc_*_x86 names: they are internal glue OUTSIDE the FIPS module boundary + * (reached from boundary code only through the PIE redirect table), so keeping the + * names leaves the validated x86 symbol set byte-for-byte unchanged. */ +#if defined(CONFIG_X86) + #define WC_LINUXKM_FPU_BEGIN() kernel_fpu_begin() + #define WC_LINUXKM_FPU_END() kernel_fpu_end() +#elif defined(CONFIG_ARM) || defined(CONFIG_ARM64) + #include + #define WC_LINUXKM_FPU_BEGIN() kernel_neon_begin() + #define WC_LINUXKM_FPU_END() kernel_neon_end() #endif #ifdef WOLFSSL_LINUXKM_VERBOSE_DEBUG @@ -70,9 +91,12 @@ WARN_UNUSED_RESULT int allocate_wolfcrypt_linuxkm_fpu_states(void) wc_linuxkm_fpu_states_n_tracked * sizeof(wc_linuxkm_fpu_states[0])); if (! wc_linuxkm_fpu_states) { + /* cast to unsigned long to match %lu: size_t is 32-bit on arm32 but + * 64-bit (== unsigned long) on x86_64/arm64, so the product type differs + * by arch. Cast keeps the format portable across the ARM-generalized glue. */ pr_err("ERROR: allocation of %lu bytes for " "wc_linuxkm_fpu_states failed.\n", - nr_cpu_ids * sizeof(wc_linuxkm_fpu_states[0])); + (unsigned long)(nr_cpu_ids * sizeof(wc_linuxkm_fpu_states[0]))); return MEMORY_E; } @@ -441,10 +465,10 @@ WARN_UNUSED_RESULT int wc_save_vector_registers_x86(enum wc_svr_flags flags) #if IS_ENABLED(CONFIG_PREEMPT_RT) preempt_disable(); #endif - kernel_fpu_begin(); + WC_LINUXKM_FPU_BEGIN(); pstate = wc_linuxkm_fpu_state_assoc(1, 1); if (pstate == NULL) { - kernel_fpu_end(); + WC_LINUXKM_FPU_END(); #if IS_ENABLED(CONFIG_PREEMPT_RT) preempt_enable(); #endif @@ -508,7 +532,7 @@ void wc_restore_vector_registers_x86(enum wc_svr_flags flags) if (pstate->fpu_state == 0U) { wc_linuxkm_fpu_state_release(pstate); - kernel_fpu_end(); + WC_LINUXKM_FPU_END(); #if IS_ENABLED(CONFIG_PREEMPT_RT) preempt_enable(); #endif diff --git a/src/include.am b/src/include.am index 4b80e149bac..bd4d2586a2c 100644 --- a/src/include.am +++ b/src/include.am @@ -109,17 +109,22 @@ endif if BUILD_AESNI src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/aes_asm.S -if BUILD_X86_ASM -src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/aes_gcm_x86_asm.S -else +# GCM PCLMUL asm is x86_64-only. aes_gcm_asm.S is internally guarded by +# WOLFSSL_X86_64_BUILD, so it is empty on 32-bit x86 and GCM there falls back to +# the portable-C GHASH (still AES-NI for the counter-mode blocks). The 32-bit GCM +# asm (aes_gcm_x86_asm.S) is NOT position-independent -- its .text relocations +# break the FIPS in-core integrity in a shared object -- so it is not compiled. +# Kaleb 2026-06-17; see memory 32bit-x86-aesni-enablement. if BUILD_AESGCM src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/aes_gcm_asm.S endif +# aes_xts_asm.S provides AES_XTS_*_aesni for BOTH x86_64 (WOLFSSL_X86_64_BUILD) and +# 32-bit x86 (WOLFSSL_X86_BUILD); compile it for either, not only the non-X86_ASM +# (x86_64) path -- the 32-bit AES-NI XTS section was added 2026-06-17. if BUILD_AESXTS src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/aes_xts_asm.S endif endif -endif if BUILD_DES3 src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/des3.c @@ -259,17 +264,22 @@ endif BUILD_PPC64_ASM if BUILD_AESNI src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/aes_asm.S -if BUILD_X86_ASM -src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/aes_gcm_x86_asm.S -else +# GCM PCLMUL asm is x86_64-only. aes_gcm_asm.S is internally guarded by +# WOLFSSL_X86_64_BUILD, so it is empty on 32-bit x86 and GCM there falls back to +# the portable-C GHASH (still AES-NI for the counter-mode blocks). The 32-bit GCM +# asm (aes_gcm_x86_asm.S) is NOT position-independent -- its .text relocations +# break the FIPS in-core integrity in a shared object -- so it is not compiled. +# Kaleb 2026-06-17; see memory 32bit-x86-aesni-enablement. if BUILD_AESGCM src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/aes_gcm_asm.S endif +# aes_xts_asm.S provides AES_XTS_*_aesni for BOTH x86_64 (WOLFSSL_X86_64_BUILD) and +# 32-bit x86 (WOLFSSL_X86_BUILD); compile it for either, not only the non-X86_ASM +# (x86_64) path -- the 32-bit AES-NI XTS section was added 2026-06-17. if BUILD_AESXTS src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/aes_xts_asm.S endif endif -endif if BUILD_RISCV_ASM src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/riscv/riscv-64-aes.c @@ -532,17 +542,22 @@ endif BUILD_PPC64_ASM if BUILD_AESNI src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/aes_asm.S -if BUILD_X86_ASM -src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/aes_gcm_x86_asm.S -else +# GCM PCLMUL asm is x86_64-only. aes_gcm_asm.S is internally guarded by +# WOLFSSL_X86_64_BUILD, so it is empty on 32-bit x86 and GCM there falls back to +# the portable-C GHASH (still AES-NI for the counter-mode blocks). The 32-bit GCM +# asm (aes_gcm_x86_asm.S) is NOT position-independent -- its .text relocations +# break the FIPS in-core integrity in a shared object -- so it is not compiled. +# Kaleb 2026-06-17; see memory 32bit-x86-aesni-enablement. if BUILD_AESGCM src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/aes_gcm_asm.S endif +# aes_xts_asm.S provides AES_XTS_*_aesni for BOTH x86_64 (WOLFSSL_X86_64_BUILD) and +# 32-bit x86 (WOLFSSL_X86_BUILD); compile it for either, not only the non-X86_ASM +# (x86_64) path -- the 32-bit AES-NI XTS section was added 2026-06-17. if BUILD_AESXTS src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/aes_xts_asm.S endif endif -endif if BUILD_RISCV_ASM src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/riscv/riscv-64-aes.c @@ -867,17 +882,22 @@ endif BUILD_AES if BUILD_AESNI src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/aes_asm.S -if BUILD_X86_ASM -src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/aes_gcm_x86_asm.S -else +# GCM PCLMUL asm is x86_64-only. aes_gcm_asm.S is internally guarded by +# WOLFSSL_X86_64_BUILD, so it is empty on 32-bit x86 and GCM there falls back to +# the portable-C GHASH (still AES-NI for the counter-mode blocks). The 32-bit GCM +# asm (aes_gcm_x86_asm.S) is NOT position-independent -- its .text relocations +# break the FIPS in-core integrity in a shared object -- so it is not compiled. +# Kaleb 2026-06-17; see memory 32bit-x86-aesni-enablement. if BUILD_AESGCM src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/aes_gcm_asm.S endif +# aes_xts_asm.S provides AES_XTS_*_aesni for BOTH x86_64 (WOLFSSL_X86_64_BUILD) and +# 32-bit x86 (WOLFSSL_X86_BUILD); compile it for either, not only the non-X86_ASM +# (x86_64) path -- the 32-bit AES-NI XTS section was added 2026-06-17. if BUILD_AESXTS src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/aes_xts_asm.S endif endif -endif if BUILD_SHA src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/sha.c @@ -1708,18 +1728,23 @@ endif if !BUILD_FIPS_V2_PLUS if BUILD_AESNI src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/aes_asm.S -if BUILD_X86_ASM -src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/aes_gcm_x86_asm.S -else +# GCM PCLMUL asm is x86_64-only. aes_gcm_asm.S is internally guarded by +# WOLFSSL_X86_64_BUILD, so it is empty on 32-bit x86 and GCM there falls back to +# the portable-C GHASH (still AES-NI for the counter-mode blocks). The 32-bit GCM +# asm (aes_gcm_x86_asm.S) is NOT position-independent -- its .text relocations +# break the FIPS in-core integrity in a shared object -- so it is not compiled. +# Kaleb 2026-06-17; see memory 32bit-x86-aesni-enablement. if BUILD_AESGCM src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/aes_gcm_asm.S endif +# aes_xts_asm.S provides AES_XTS_*_aesni for BOTH x86_64 (WOLFSSL_X86_64_BUILD) and +# 32-bit x86 (WOLFSSL_X86_BUILD); compile it for either, not only the non-X86_ASM +# (x86_64) path -- the 32-bit AES-NI XTS section was added 2026-06-17. if BUILD_AESXTS src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/aes_xts_asm.S endif endif endif -endif if BUILD_CAMELLIA src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/camellia.c diff --git a/tests/api/test_aes.c b/tests/api/test_aes.c index 72221cd04ad..ec767c1319c 100644 --- a/tests/api/test_aes.c +++ b/tests/api/test_aes.c @@ -693,7 +693,14 @@ static int test_wc_AesCbcEncryptDecrypt_WithKey(Aes* aes, byte* key, ExpectIntEQ(wc_AesCbcEncrypt(aes, cipher, vector, vector_len), 0); ExpectBufEQ(cipher, vector_enc, vector_len); -#ifdef WOLFSSL_AES_CBC_LENGTH_CHECKS + /* The BAD_LENGTH_E enforcement is in the non-FIPS aes.c implementation + * (see WOLFSSL_AES_CBC_LENGTH_CHECKS guard there). FIPSv2 (cert3389) + * routes through its own historical wc_AesCbcEncrypt_fips wrapper that + * predates this check and silently returns 0 on unaligned input. Only + * v5.x and newer FIPS modules carry the wrapper-level check. Skip the + * assertion for FIPSv2 builds. */ +#if defined(WOLFSSL_AES_CBC_LENGTH_CHECKS) && \ + (!defined(HAVE_FIPS) || FIPS_VERSION_GE(5,0)) ExpectIntEQ(wc_AesCbcEncrypt(aes, cipher, vector, vector_len - 1), WC_NO_ERR_TRACE(BAD_LENGTH_E)); #endif @@ -703,7 +710,9 @@ static int test_wc_AesCbcEncryptDecrypt_WithKey(Aes* aes, byte* key, ExpectIntEQ(wc_AesCbcDecrypt(aes, decrypted, cipher, WC_AES_BLOCK_SIZE * 2), 0); ExpectBufEQ(decrypted, vector, vector_len); -#ifdef WOLFSSL_AES_CBC_LENGTH_CHECKS +#if defined(WOLFSSL_AES_CBC_LENGTH_CHECKS) && \ + (!defined(HAVE_FIPS) || FIPS_VERSION_GE(5,0)) + /* Same FIPSv2 vs v5+ rationale as the encrypt assertion above. */ ExpectIntEQ(wc_AesCbcDecrypt(aes, decrypted, cipher, WC_AES_BLOCK_SIZE * 2 - 1), WC_NO_ERR_TRACE(BAD_LENGTH_E)); #else diff --git a/tests/api/test_evp_pkey.c b/tests/api/test_evp_pkey.c index 9bdd5b9339d..2e106d16d6a 100644 --- a/tests/api/test_evp_pkey.c +++ b/tests/api/test_evp_pkey.c @@ -1526,7 +1526,7 @@ static int test_wolfSSL_EVP_PKEY_sign_verify(int keyType) !defined(HAVE_SELFTEST) #if !defined(HAVE_FIPS) || (defined(HAVE_FIPS_VERSION) && (HAVE_FIPS_VERSION>2)) { - ExpectNotNull(rsa = RSA_generate_key(2048, 3, NULL, NULL)); + ExpectNotNull(rsa = RSA_generate_key(2048, 65537, NULL, NULL)); ExpectIntEQ(EVP_PKEY_assign_RSA(pkey, rsa), WOLFSSL_SUCCESS); } #endif @@ -2159,7 +2159,7 @@ int test_wolfSSL_EVP_PKEY_encrypt(void) XMEMSET(outDec, 0, rsaKeySz); } - ExpectNotNull(rsa = RSA_generate_key(2048, 3, NULL, NULL)); + ExpectNotNull(rsa = RSA_generate_key(2048, 65537, NULL, NULL)); ExpectNotNull(pkey = wolfSSL_EVP_PKEY_new()); ExpectIntEQ(EVP_PKEY_assign_RSA(pkey, rsa), WOLFSSL_SUCCESS); if (EXPECT_FAIL()) { diff --git a/tests/api/test_ossl_rsa.c b/tests/api/test_ossl_rsa.c index dc0cee665ba..250d1df0070 100644 --- a/tests/api/test_ossl_rsa.c +++ b/tests/api/test_ossl_rsa.c @@ -65,7 +65,7 @@ int test_wolfSSL_RSA(void) RSA_free(rsa); rsa = NULL; - ExpectNotNull(rsa = RSA_generate_key(2048, 3, NULL, NULL)); + ExpectNotNull(rsa = RSA_generate_key(2048, 65537, NULL, NULL)); ExpectIntEQ(RSA_size(rsa), 256); #if (!defined(HAVE_FIPS) || FIPS_VERSION3_GT(6,0,0)) && !defined(HAVE_SELFTEST) @@ -306,7 +306,7 @@ int test_wolfSSL_RSA(void) rsa = NULL; #if !defined(USE_FAST_MATH) || (FP_MAX_BITS >= (3072*2)) - ExpectNotNull(rsa = RSA_generate_key(3072, 17, NULL, NULL)); + ExpectNotNull(rsa = RSA_generate_key(3072, 65537, NULL, NULL)); ExpectIntEQ(RSA_size(rsa), 384); ExpectIntEQ(RSA_bits(rsa), 3072); RSA_free(rsa); @@ -461,7 +461,7 @@ int test_wolfSSL_RSA_print(void) RSA_free(rsa); rsa = NULL; - ExpectNotNull(rsa = RSA_generate_key(2048, 3, NULL, NULL)); + ExpectNotNull(rsa = RSA_generate_key(2048, 65537, NULL, NULL)); ExpectIntEQ(RSA_print(bio, rsa, 0), 1); ExpectIntEQ(RSA_print(bio, rsa, 4), 1); @@ -644,11 +644,11 @@ int test_wolfSSL_RSA_meth(void) RSA_METHOD *rsa_meth = NULL; #ifdef WOLFSSL_KEY_GEN - ExpectNotNull(rsa = RSA_generate_key(2048, 3, NULL, NULL)); + ExpectNotNull(rsa = RSA_generate_key(2048, 65537, NULL, NULL)); RSA_free(rsa); rsa = NULL; #else - ExpectNull(rsa = RSA_generate_key(2048, 3, NULL, NULL)); + ExpectNull(rsa = RSA_generate_key(2048, 65537, NULL, NULL)); #endif ExpectNotNull(RSA_get_default_method()); diff --git a/tests/api/test_slhdsa.c b/tests/api/test_slhdsa.c index 988bbc579e0..4510319925e 100644 --- a/tests/api/test_slhdsa.c +++ b/tests/api/test_slhdsa.c @@ -1081,12 +1081,14 @@ int test_wc_slhdsa_sign_hash(void) WC_HASH_TYPE_SHA256, sig, sigLen), WC_NO_ERR_TRACE(BAD_LENGTH_E)); - /* Unsupported hashType (FIPS 205 doesn't list WC_HASH_TYPE_NONE) hits - * the default branch of slhdsakey_validate_prehash. */ + /* WC_HASH_TYPE_NONE is the "pure SLH-DSA" sentinel and is never a valid + * pre-hash algorithm (FIPS 205 Section 10.2.2 / Table 9). HashSLH-DSA + * signing rejects it with an explicit early check (BAD_FUNC_ARG), not via + * the slhdsa_check_hash_for_n() switch default. */ sigLen = WC_SLHDSA_MAX_SIG_LEN; ExpectIntEQ(wc_SlhDsaKey_SignHash(&key, ctx, sizeof(ctx), hash, 32, WC_HASH_TYPE_NONE, sig, &sigLen, &rng), - WC_NO_ERR_TRACE(NOT_COMPILED_IN)); + WC_NO_ERR_TRACE(BAD_FUNC_ARG)); /* Test SignHash with SHA-256. */ sigLen = WC_SLHDSA_MAX_SIG_LEN; diff --git a/wolfcrypt/benchmark/fips_cast_bench.c b/wolfcrypt/benchmark/fips_cast_bench.c new file mode 100644 index 00000000000..bd7c0e9dbc6 --- /dev/null +++ b/wolfcrypt/benchmark/fips_cast_bench.c @@ -0,0 +1,363 @@ +/* fips_cast_bench.c + * + * Copyright (C) 2006-2026 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + +/* FIPS CAST benchmark. + * + * Measures the wall-clock cost of each Conditional Algorithm Self-Test (CAST) + * defined by the wolfCrypt v7.0.0 FIPS module so operators can budget module + * power-on latency on resource-constrained operational environments (DSP, + * MCU) where every additional CAST is directly observable as boot-time delay. + * + * Compiled only when HAVE_FIPS is defined (see wolfcrypt/benchmark/include.am + * BUILD_FIPS gate). Calls wc_RunCast_fips(id) repeatedly per CAST and reports + * mean / stddev / min / max for each, plus total time for one pass over all + * enabled CASTs (the cost paid by callers that invoke wc_RunAllCast_fips() at + * application start). + * + * Citations: + * FIPS 140-3 sec 7.10 (Self-Tests) - CAST framework + * FIPS 140-3 IG 10.3.A - Algorithm-by-algorithm CAST coverage + * ISO/IEC 19790:2012 sec 7.10.2 - Conditional self-test execution + */ + +#ifdef HAVE_CONFIG_H + #include +#endif + +#if !defined(WOLFSSL_USER_SETTINGS) && !defined(WOLFSSL_NO_OPTIONS_H) + #include +#endif +#include /* also picks up user_settings.h */ + +/* fips_cast_bench drives wc_RunCast_fips() / wc_RunAllCast_fips() which were + * introduced in the v7.0.0 module's CAST framework. Older 140-3 modules + * (v5.x, v6.0.0) and the FIPSv2 module do not export these symbols, so when + * fips-check.sh swaps in an older-flavor fips/ tree this file would otherwise + * fail to link. Gate the entire benchmark on FIPS_VERSION3_GE(7,0,0); for + * older flavors we fall through to the empty-main stub at the bottom of the + * file so the build still produces an executable. */ +#if defined(HAVE_FIPS) && FIPS_VERSION3_GE(7,0,0) + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#ifdef _WIN32 + #define WIN32_LEAN_AND_MEAN + #include +#else + #include +#endif + + +#define BENCH_DEFAULT_ITERS 10 + +/* Map FIPS_CAST_* enum value to a printable name. Kept in sync with + * wolfssl/wolfcrypt/fips_test.h FipsCastId enum. */ +static const char* cast_name(int id) +{ + switch (id) { + case FIPS_CAST_AES_CBC: return "AES-CBC"; + case FIPS_CAST_AES_GCM: return "AES-GCM"; + case FIPS_CAST_HMAC_SHA1: return "HMAC-SHA-1"; + case FIPS_CAST_HMAC_SHA2_256: return "HMAC-SHA2-256"; + case FIPS_CAST_HMAC_SHA2_512: return "HMAC-SHA2-512"; + case FIPS_CAST_HMAC_SHA3_256: return "HMAC-SHA3-256"; + case FIPS_CAST_DRBG: return "DRBG (SHA-256)"; + case FIPS_CAST_RSA_SIGN_PKCS1v15: return "RSA-SIGN-PKCS1v15"; + case FIPS_CAST_ECC_CDH: return "ECC-CDH"; + case FIPS_CAST_ECC_PRIMITIVE_Z: return "ECC-Primitive-Z"; + case FIPS_CAST_DH_PRIMITIVE_Z: return "DH-Primitive-Z"; + case FIPS_CAST_ECDSA: return "ECDSA"; + case FIPS_CAST_KDF_TLS12: return "KDF-TLS12"; + case FIPS_CAST_KDF_TLS13: return "KDF-TLS13"; + case FIPS_CAST_KDF_SSH: return "KDF-SSH"; +#if defined(FIPS_VERSION_GE) && FIPS_VERSION_GE(6,0) + case FIPS_CAST_KDF_SRTP: return "KDF-SRTP"; + case FIPS_CAST_ED25519: return "Ed25519"; + case FIPS_CAST_ED448: return "Ed448"; + case FIPS_CAST_PBKDF2: return "PBKDF2"; +#endif +#if defined(FIPS_VERSION_GE) && FIPS_VERSION_GE(7,0) + case FIPS_CAST_AES_ECB: return "AES-ECB"; + case FIPS_CAST_ML_KEM: return "ML-KEM"; + case FIPS_CAST_ML_DSA: return "ML-DSA"; + case FIPS_CAST_LMS: return "LMS"; + case FIPS_CAST_XMSS: return "XMSS"; + case FIPS_CAST_DRBG_SHA512: return "DRBG (SHA-512)"; + case FIPS_CAST_SLH_DSA: return "SLH-DSA"; + case FIPS_CAST_AES_CMAC: return "AES-CMAC"; + case FIPS_CAST_SHAKE: return "SHAKE"; + case FIPS_CAST_AES_KW: return "AES-KW"; +#endif + default: return "(unknown)"; + } +} + + +/* Monotonic clock in nanoseconds. POSIX clock_gettime(CLOCK_MONOTONIC) on + * Unix-like systems; QueryPerformanceCounter on Windows. */ +static long long now_ns(void) +{ +#ifdef _WIN32 + static LARGE_INTEGER freq = { 0 }; + LARGE_INTEGER count; + if (freq.QuadPart == 0) + QueryPerformanceFrequency(&freq); + QueryPerformanceCounter(&count); + /* Multiply before divide to keep precision; freq is typically 10MHz. */ + return (long long)((count.QuadPart * 1000000000LL) / freq.QuadPart); +#else + struct timespec ts; + if (clock_gettime(CLOCK_MONOTONIC, &ts) != 0) + return 0; + return (long long)ts.tv_sec * 1000000000LL + (long long)ts.tv_nsec; +#endif +} + + +/* Run a single CAST iters times, populate stats (in milliseconds). + * Returns 0 on success, non-zero on first CAST failure. */ +static int run_one_cast(int id, int iters, + double* out_mean_ms, double* out_stddev_ms, + double* out_min_ms, double* out_max_ms) +{ + int i; + long long total = 0; + long long mn = LLONG_MAX; + long long mx = 0; + long long* samples; + double mean_ns; + double variance_acc = 0.0; + + if (iters <= 0) + return BAD_FUNC_ARG; + + samples = (long long*)XMALLOC((size_t)iters * sizeof(long long), NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (samples == NULL) + return MEMORY_E; + + for (i = 0; i < iters; i++) { + long long t0, t1, dt; + int rc; + + t0 = now_ns(); + rc = wc_RunCast_fips(id); + t1 = now_ns(); + if (rc != 0) { + XFREE(samples, NULL, DYNAMIC_TYPE_TMP_BUFFER); + return rc; + } + dt = t1 - t0; + if (dt < 0) + dt = 0; + samples[i] = dt; + total += dt; + if (dt < mn) + mn = dt; + if (dt > mx) + mx = dt; + } + + mean_ns = (double)total / (double)iters; + for (i = 0; i < iters; i++) { + double d = (double)samples[i] - mean_ns; + variance_acc += d * d; + } + XFREE(samples, NULL, DYNAMIC_TYPE_TMP_BUFFER); + + *out_mean_ms = mean_ns / 1.0e6; + *out_stddev_ms = sqrt(variance_acc / (double)iters) / 1.0e6; + *out_min_ms = (double)mn / 1.0e6; + *out_max_ms = (double)mx / 1.0e6; + return 0; +} + + +static void usage(const char* prog) +{ + printf("usage: %s [-i ITERS] [-c CAST_ID] [-l]\n", prog); + printf(" -i ITERS iterations per CAST (default %d)\n", + BENCH_DEFAULT_ITERS); + printf(" -c CAST_ID benchmark only the named CAST id\n"); + printf(" -l list CAST ids and names; do not run\n"); + printf(" -h show this help\n"); +} + + +int main(int argc, char** argv) +{ + int iters = BENCH_DEFAULT_ITERS; + int single = -1; + int list_only = 0; + int i; + int first, last; + int failures = 0; + int run_count = 0; + double total_mean_ms = 0.0; + + for (i = 1; i < argc; i++) { + if (XSTRCMP(argv[i], "-i") == 0 && i + 1 < argc) { + iters = atoi(argv[++i]); + if (iters <= 0) { + fprintf(stderr, "-i requires a positive iteration count\n"); + return 2; + } + } else if (XSTRCMP(argv[i], "-c") == 0 && i + 1 < argc) { + single = atoi(argv[++i]); + } else if (XSTRCMP(argv[i], "-l") == 0) { + list_only = 1; + } else if (XSTRCMP(argv[i], "-h") == 0 + || XSTRCMP(argv[i], "--help") == 0) { + usage(argv[0]); + return 0; + } else { + fprintf(stderr, "unknown argument: %s\n", argv[i]); + usage(argv[0]); + return 2; + } + } + + if (list_only) { + printf("FIPS CAST IDs (FIPS_CAST_COUNT = %d):\n", FIPS_CAST_COUNT); + for (i = 0; i < FIPS_CAST_COUNT; i++) + printf(" %2d %s\n", i, cast_name(i)); + return 0; + } + + if (single >= 0 && single >= FIPS_CAST_COUNT) { + fprintf(stderr, "CAST id %d out of range (0..%d)\n", + single, FIPS_CAST_COUNT - 1); + return 2; + } + + printf("wolfCrypt FIPS CAST benchmark\n"); + printf("Library version: %s\n", LIBWOLFSSL_VERSION_STRING); + printf("FIPS_CAST_COUNT: %d\n", FIPS_CAST_COUNT); + printf("Iterations per CAST: %d\n", iters); + printf("Clock: %s\n", +#ifdef _WIN32 + "QueryPerformanceCounter" +#else + "clock_gettime(CLOCK_MONOTONIC)" +#endif + ); + printf("\n"); + + /* Register the default DRBG seed callback (mirrors benchmark.c and + * wolfcrypt/test/test.c). Builds with WC_RNG_SEED_CB - which include + * the FIPS optest CFLAGS - require every application that initializes + * the RNG to register a seed generator before _InitRng can produce a + * working DRBG; without it, wc_InitRng inside the ECC_PRIMITIVE_Z and + * ECDSA CASTs returns -199 (RNG_FAILURE_E) and the dependent CASTs + * cascade-fail. */ +#ifdef WC_RNG_SEED_CB + { + int seed_cb_rc = wc_SetSeed_Cb(WC_GENERATE_SEED_DEFAULT); + if (seed_cb_rc != 0) { + fprintf(stderr, + "wc_SetSeed_Cb returned %d - DRBG-using CASTs will fail.\n", + seed_cb_rc); + } + } +#endif + + /* Prime: run every CAST once via wc_RunAllCast_fips() so each CAST + * reaches FIPS_CAST_STATE_SUCCESS before we begin measuring. This + * isolates the per-CAST KAT runtime cost from the cascading + * recursive-CAST init chain that fires on the first invocation of a + * cold CAST whose KAT internally calls FIPS-wrapped primitives whose + * own CASTs are still in INIT state. Customers calling + * wc_RunAllCast_fips() at boot pay this one-time cost up front, so + * priming here matches that real-world workflow. */ + { + int prime_rc = wc_RunAllCast_fips(); + if (prime_rc != 0) { + fprintf(stderr, + "wc_RunAllCast_fips() prime returned %d - some CASTs may have failed.\n" + "Per-CAST measurements continue but failed CASTs will report errors.\n\n", + prime_rc); + } + } + + printf("ID | Name | Mean(ms) | StdDev(ms) | Min(ms) " + "| Max(ms)\n"); + printf("---+---------------------+----------+------------+---------" + "+---------\n"); + + first = (single >= 0) ? single : 0; + last = (single >= 0) ? single + 1 : FIPS_CAST_COUNT; + + for (i = first; i < last; i++) { + double mean_ms = 0, sd_ms = 0, mn_ms = 0, mx_ms = 0; + int rc = run_one_cast(i, iters, &mean_ms, &sd_ms, &mn_ms, &mx_ms); + if (rc != 0) { + printf("%2d | %-19s | FAILED rc=%d (%s)\n", + i, cast_name(i), rc, wc_GetErrorString(rc)); + failures++; + continue; + } + printf("%2d | %-19s | %8.3f | %10.3f | %7.3f | %7.3f\n", + i, cast_name(i), mean_ms, sd_ms, mn_ms, mx_ms); + total_mean_ms += mean_ms; + run_count++; + } + + printf("\n"); + if (run_count > 0) { + printf("Sum of mean CAST times (one wc_RunAllCast_fips() pass): " + "%.3f ms\n", total_mean_ms); + } + if (failures > 0) { + printf("WARN: %d CAST(s) failed.\n", failures); + return 1; + } + return 0; +} + +#else /* !(HAVE_FIPS && FIPS_VERSION3_GE(7,0,0)) */ + +#include + +int main(void) +{ +#ifndef HAVE_FIPS + fprintf(stderr, + "fips_cast_bench: built without HAVE_FIPS - nothing to measure\n"); +#else + fprintf(stderr, + "fips_cast_bench: requires v7.0.0+ FIPS module " + "(wc_RunCast_fips / wc_RunAllCast_fips were added in v7) - " + "nothing to measure on this older module flavor\n"); +#endif + return 0; +} + +#endif /* HAVE_FIPS && FIPS_VERSION3_GE(7,0,0) */ diff --git a/wolfcrypt/benchmark/include.am b/wolfcrypt/benchmark/include.am index 22cecbdaefe..130343a14e1 100644 --- a/wolfcrypt/benchmark/include.am +++ b/wolfcrypt/benchmark/include.am @@ -10,6 +10,16 @@ wolfcrypt_benchmark_benchmark_LDADD = src/libwolfssl@LIBSUFFIX@.la $(LIB_ wolfcrypt_benchmark_benchmark_DEPENDENCIES = src/libwolfssl@LIBSUFFIX@.la noinst_HEADERS += wolfcrypt/benchmark/benchmark.h +# FIPS CAST benchmark - measures wc_RunCast_fips() execution time per CAST. +# Helps operators of resource-constrained operational environments budget +# module power-on latency. Compiled only when FIPS is enabled. +if BUILD_FIPS +noinst_PROGRAMS += wolfcrypt/benchmark/fips_cast_bench +wolfcrypt_benchmark_fips_cast_bench_SOURCES = wolfcrypt/benchmark/fips_cast_bench.c +wolfcrypt_benchmark_fips_cast_bench_LDADD = src/libwolfssl@LIBSUFFIX@.la $(LIB_STATIC_ADD) -lm +wolfcrypt_benchmark_fips_cast_bench_DEPENDENCIES = src/libwolfssl@LIBSUFFIX@.la +endif + endif endif diff --git a/wolfcrypt/src/aes.c b/wolfcrypt/src/aes.c index 6806acbc965..95386f5f3d0 100644 --- a/wolfcrypt/src/aes.c +++ b/wolfcrypt/src/aes.c @@ -136,6 +136,16 @@ block cipher mechanism that uses n-bit binary string parameter key with 128-bits #include +/* The dedicated GCM (PCLMUL/GHASH) assembly is x86_64-only: the 32-bit GCM asm + * (aes_gcm_x86_asm.S) is not position-independent and its .text relocations break + * the FIPS module in-core integrity in a shared object. On 32-bit x86, GCM uses + * the portable-C GHASH with AES-NI block encryption (AES-NI still engaged for the + * counter-mode blocks) -- mirrors the existing x86_64-gating of the GCM-AVX path. + * Kaleb 2026-06-17; see memory 32bit-x86-aesni-enablement. */ +#if defined(WOLFSSL_AESNI) && defined(WOLFSSL_X86_64_BUILD) + #define WC_AESNI_GCM +#endif + #ifdef WOLF_CRYPTO_CB #include #endif @@ -913,6 +923,124 @@ static void Check_CPU_support_HwCrypto(Aes* aes) } #endif /* __aarch64__ && !WOLFSSL_ARMASM_NO_HW_CRYPTO */ +/* In a Linux kernel module the 32-bit ARM AES asm (ARMv8 AArch32 AES/PMULL crypto + * extension + NEON) MUST run between kernel_neon_begin()/end() or the first SIMD + * instruction faults "undefined instruction". wolfSSL never bracketed the + * in-kernel ARM crypto (it only happened to work on aarch64 because that kernel + * tolerates it). We wrap every AES_*_AARCH32 entry with SAVE/RESTORE_VECTOR_ + * REGISTERS, then #define-redirect the call sites below. Defined before the + * #defines so the wrappers reference the real asm (no recursion). Scoped to + * !__aarch64__ so the aarch64 path is byte-identical. (FIPS 197 AES, SP 800-38D + * AES-GCM.) On a (process-context-only here) save failure the op is skipped + * rather than crashing -- never reached by POST/optest/harness. */ +#if defined(WOLFSSL_USE_SAVE_VECTOR_REGISTERS) && !defined(__aarch64__) && \ + !defined(WOLFSSL_ARMASM_NO_HW_CRYPTO) + static WC_INLINE void wc_svr_AES_set_key_AARCH32(const byte* userKey, + int keylen, byte* key, int dir) { + if (SAVE_VECTOR_REGISTERS2() != 0) return; + AES_set_key_AARCH32(userKey, keylen, key, dir); + RESTORE_VECTOR_REGISTERS(); + } + static WC_INLINE void wc_svr_AES_encrypt_AARCH32(const byte* inBlock, + byte* outBlock, byte* key, int nr) { + if (SAVE_VECTOR_REGISTERS2() != 0) return; + AES_encrypt_AARCH32(inBlock, outBlock, key, nr); + RESTORE_VECTOR_REGISTERS(); + } + static WC_INLINE void wc_svr_AES_decrypt_AARCH32(const byte* inBlock, + byte* outBlock, byte* key, int nr) { + if (SAVE_VECTOR_REGISTERS2() != 0) return; + AES_decrypt_AARCH32(inBlock, outBlock, key, nr); + RESTORE_VECTOR_REGISTERS(); + } + static WC_INLINE void wc_svr_AES_encrypt_blocks_AARCH32(const byte* in, + byte* out, word32 sz, byte* key, int nr) { + if (SAVE_VECTOR_REGISTERS2() != 0) return; + AES_encrypt_blocks_AARCH32(in, out, sz, key, nr); + RESTORE_VECTOR_REGISTERS(); + } + static WC_INLINE void wc_svr_AES_decrypt_blocks_AARCH32(const byte* in, + byte* out, word32 sz, byte* key, int nr) { + if (SAVE_VECTOR_REGISTERS2() != 0) return; + AES_decrypt_blocks_AARCH32(in, out, sz, key, nr); + RESTORE_VECTOR_REGISTERS(); + } + static WC_INLINE void wc_svr_AES_CBC_encrypt_AARCH32(const byte* in, + byte* out, word32 sz, byte* reg, byte* key, int rounds) { + if (SAVE_VECTOR_REGISTERS2() != 0) return; + AES_CBC_encrypt_AARCH32(in, out, sz, reg, key, rounds); + RESTORE_VECTOR_REGISTERS(); + } + static WC_INLINE void wc_svr_AES_CBC_decrypt_AARCH32(const byte* in, + byte* out, word32 sz, byte* reg, byte* key, int rounds) { + if (SAVE_VECTOR_REGISTERS2() != 0) return; + AES_CBC_decrypt_AARCH32(in, out, sz, reg, key, rounds); + RESTORE_VECTOR_REGISTERS(); + } + static WC_INLINE void wc_svr_AES_CTR_encrypt_AARCH32(const byte* in, + byte* out, word32 sz, byte* reg, byte* key, byte* tmp, word32* left, + word32 rounds) { + if (SAVE_VECTOR_REGISTERS2() != 0) return; + AES_CTR_encrypt_AARCH32(in, out, sz, reg, key, tmp, left, rounds); + RESTORE_VECTOR_REGISTERS(); + } + static WC_INLINE void wc_svr_AES_GCM_set_key_AARCH32(const byte* nonce, + const byte* key, byte* gcm_h, int nr) { + if (SAVE_VECTOR_REGISTERS2() != 0) return; + AES_GCM_set_key_AARCH32(nonce, key, gcm_h, nr); + RESTORE_VECTOR_REGISTERS(); + } + static WC_INLINE void wc_svr_AES_GCM_encrypt_AARCH32(const byte* in, + byte* out, word32 sz, const byte* nonce, word32 nonceSz, byte* tag, + word32 tagSz, const byte* aad, word32 aadSz, byte* key, byte* gcm_h, + byte* tmp, byte* reg, int nr) { + if (SAVE_VECTOR_REGISTERS2() != 0) return; + AES_GCM_encrypt_AARCH32(in, out, sz, nonce, nonceSz, tag, tagSz, aad, + aadSz, key, gcm_h, tmp, reg, nr); + RESTORE_VECTOR_REGISTERS(); + } + static WC_INLINE int wc_svr_AES_GCM_decrypt_AARCH32(const byte* in, + byte* out, word32 sz, const byte* nonce, word32 nonceSz, const byte* tag, + word32 tagSz, const byte* aad, word32 aadSz, byte* key, byte* gcm_h, + byte* tmp, byte* reg, int nr) { + int _ret, _svr = SAVE_VECTOR_REGISTERS2(); + if (_svr != 0) return _svr; + _ret = AES_GCM_decrypt_AARCH32(in, out, sz, nonce, nonceSz, tag, tagSz, + aad, aadSz, key, gcm_h, tmp, reg, nr); + RESTORE_VECTOR_REGISTERS(); + return _ret; + } + #define AES_set_key_AARCH32 wc_svr_AES_set_key_AARCH32 + #define AES_encrypt_AARCH32 wc_svr_AES_encrypt_AARCH32 + #define AES_decrypt_AARCH32 wc_svr_AES_decrypt_AARCH32 + #define AES_encrypt_blocks_AARCH32 wc_svr_AES_encrypt_blocks_AARCH32 + #define AES_decrypt_blocks_AARCH32 wc_svr_AES_decrypt_blocks_AARCH32 + #define AES_CBC_encrypt_AARCH32 wc_svr_AES_CBC_encrypt_AARCH32 + #define AES_CBC_decrypt_AARCH32 wc_svr_AES_CBC_decrypt_AARCH32 + #define AES_CTR_encrypt_AARCH32 wc_svr_AES_CTR_encrypt_AARCH32 + #define AES_GCM_set_key_AARCH32 wc_svr_AES_GCM_set_key_AARCH32 + #define AES_GCM_encrypt_AARCH32 wc_svr_AES_GCM_encrypt_AARCH32 + #define AES_GCM_decrypt_AARCH32 wc_svr_AES_GCM_decrypt_AARCH32 + #ifdef WOLFSSL_AES_XTS + static WC_INLINE void wc_svr_AES_XTS_encrypt_AARCH32(const byte* in, + byte* out, word32 sz, const byte* i, byte* key, byte* key2, byte* tmp, + int nr) { + if (SAVE_VECTOR_REGISTERS2() != 0) return; + AES_XTS_encrypt_AARCH32(in, out, sz, i, key, key2, tmp, nr); + RESTORE_VECTOR_REGISTERS(); + } + static WC_INLINE void wc_svr_AES_XTS_decrypt_AARCH32(const byte* in, + byte* out, word32 sz, const byte* i, byte* key, byte* key2, byte* tmp, + int nr) { + if (SAVE_VECTOR_REGISTERS2() != 0) return; + AES_XTS_decrypt_AARCH32(in, out, sz, i, key, key2, tmp, nr); + RESTORE_VECTOR_REGISTERS(); + } + #define AES_XTS_encrypt_AARCH32 wc_svr_AES_XTS_encrypt_AARCH32 + #define AES_XTS_decrypt_AARCH32 wc_svr_AES_XTS_decrypt_AARCH32 + #endif /* WOLFSSL_AES_XTS */ +#endif /* WOLFSSL_USE_SAVE_VECTOR_REGISTERS && !__aarch64__ && !NO_HW_CRYPTO */ + #if defined(WOLFSSL_AES_DIRECT) || defined(HAVE_AESCCM) || \ defined(WOLFSSL_AESGCM_STREAM) static WARN_UNUSED_RESULT int wc_AesEncrypt(Aes* aes, const byte* inBlock, @@ -4571,6 +4699,14 @@ static WARN_UNUSED_RESULT int wc_AesDecrypt(Aes* aes, const byte* inBlock, static int AesSetKey(Aes* aes, const byte* userKey, word32 keylen, const byte* iv, int dir) { + /* Reject invalid AES key lengths early (FIPS 197: 128/192/256 only). + * The lenient wc_AesSetKeyDirect entry only bounds-checks keylen, so + * without this a zero/invalid keylen would be accepted here on 32-bit ARM + * armasm -- the C software path rejects it in wc_AesSetKeyLocal, so match + * that ("check early and BAD_FUNC_ARG out"). */ + if (userKey == NULL || (keylen != 16 && keylen != 24 && keylen != 32)) { + return BAD_FUNC_ARG; + } #if defined(WOLFSSL_AES_COUNTER) || defined(WOLFSSL_AES_CFB) || \ defined(WOLFSSL_AES_OFB) || defined(WOLFSSL_AES_XTS) || \ defined(WOLFSSL_AES_CTS) @@ -7751,8 +7887,16 @@ static WC_INLINE void IncrementGcmCounter(byte* inOutCtr) #endif #endif /* !FREESCALE_LTC_AES_GCM */ +/* SP 800-38D AES-GCM software GHASH (FlattenSzInBits length block + RIGHTSHIFTX + + * the GCM table GMULT/GHASH below). On 32-bit ARM with --enable-armasm the HW + * GCM only accelerates the one-shot path; the STREAMING GHASH (GHASH_INIT/UPDATE/ + * FINAL) has no 32-bit asm implementation (only __aarch64__ + PMULL does) and so + * uses this software path. Therefore this block must also be compiled when + * WOLFSSL_AESGCM_STREAM is enabled, even for arm32 armasm HW-crypto -- otherwise + * GHASH_FINAL's fallback GHASH_LEN_BLOCK references an undefined FlattenSzInBits + * (arm64 already gets it via __aarch64__). */ #if !defined(WOLFSSL_ARMASM) || defined(__aarch64__) || \ - defined(WOLFSSL_ARMASM_NO_HW_CRYPTO) + defined(WOLFSSL_ARMASM_NO_HW_CRYPTO) || defined(WOLFSSL_AESGCM_STREAM) #if defined(GCM_SMALL) || defined(GCM_TABLE) || defined(GCM_TABLE_4BIT) static WC_INLINE void FlattenSzInBits(byte* buf, word32 sz) @@ -7913,7 +8057,7 @@ void GenerateM0(Gcm* gcm) #define HAVE_INTEL_AVX2 #endif -#if defined(WOLFSSL_AESNI) && defined(GCM_TABLE_4BIT) && \ +#if defined(WC_AESNI_GCM) && defined(GCM_TABLE_4BIT) && \ defined(WC_C_DYNAMIC_FALLBACK) void GCM_generate_m0_aesni(const unsigned char *h, unsigned char *m) XASM_LINK("GCM_generate_m0_aesni"); @@ -8001,6 +8145,27 @@ int wc_AesGcmSetKey(Aes* aes, const byte* key, word32 len) #ifndef WOLFSSL_ARMASM_NO_HW_CRYPTO #if !defined(__aarch64__) AES_GCM_set_key_AARCH32(iv, (byte*)aes->key, aes->gcm.H, aes->rounds); + #if defined(GCM_TABLE) || defined(GCM_TABLE_4BIT) + { + /* The 32-bit ARM asm GCM keeps gcm->H in the PMULL (bit-reflected) + * field representation for its one-shot GHASH (vmull.p64). The C + * STREAMING GHASH (wc_AesGcmEncrypt/DecryptUpdate/Final) instead uses + * the gcm->M0 table, which GenerateM0() builds from the *standard* + * H = E_K(0) byte order -- not derivable from the PMULL H. Recompute + * the standard H with the standard AES block encrypt into gcm->H, + * build M0 from it, then restore the PMULL H for the one-shot path. + * Without this, streaming AES-GCM mis-authenticates (AES_GCM_AUTH_E). + * SP 800-38D AES-GCM GHASH. */ + ALIGN16 byte gcmStdH[WC_AES_BLOCK_SIZE]; + ALIGN16 byte gcmZero[WC_AES_BLOCK_SIZE]; + XMEMSET(gcmZero, 0, WC_AES_BLOCK_SIZE); + XMEMCPY(gcmStdH, aes->gcm.H, WC_AES_BLOCK_SIZE); + AES_encrypt_AARCH32(gcmZero, aes->gcm.H, (byte*)aes->key, + (int)aes->rounds); + GenerateM0(&aes->gcm); + XMEMCPY(aes->gcm.H, gcmStdH, WC_AES_BLOCK_SIZE); + } + #endif #else if (aes->use_aes_hw_crypto && aes->use_pmull_hw_crypto) { AES_GCM_set_key_AARCH64(iv, (byte*)aes->key, aes->gcm.H, @@ -8043,7 +8208,7 @@ int wc_AesGcmSetKey(Aes* aes, const byte* key, word32 len) if (ret == 0) { #if defined(GCM_TABLE) || defined(GCM_TABLE_4BIT) - #if defined(WOLFSSL_AESNI) && defined(GCM_TABLE_4BIT) + #if defined(WC_AESNI_GCM) && defined(GCM_TABLE_4BIT) if (aes->use_aesni) { #if defined(WC_C_DYNAMIC_FALLBACK) #ifdef HAVE_INTEL_AVX2 @@ -8104,7 +8269,8 @@ int wc_AesGcmSetKey(Aes* aes, const byte* key, word32 len) } -#ifdef WOLFSSL_AESNI + +#ifdef WC_AESNI_GCM void AES_GCM_encrypt_aesni(const unsigned char *in, unsigned char *out, const unsigned char* addt, const unsigned char* ivec, @@ -8158,8 +8324,14 @@ void AES_GCM_decrypt_avx2(const unsigned char *in, unsigned char *out, #endif /* WOLFSSL_AESNI */ +/* SP 800-38D software GHASH (GMULT / GHASH / GHASH_ONE_BLOCK_SW per GCM table mode). + * As with FlattenSzInBits above, 32-bit ARM --enable-armasm has no assembly STREAMING + * GHASH (only __aarch64__ + PMULL does), so when WOLFSSL_AESGCM_STREAM is enabled the + * streaming GHASH_INIT/UPDATE/FINAL use this software path and need these symbols + * compiled even for arm32 armasm HW-crypto. Widen the guard accordingly (no effect on + * x86 / arm64, which already satisfy it -> their in-core hash is unchanged). */ #if !defined(WOLFSSL_ARMASM) || defined(__aarch64__) || \ - defined(WOLFSSL_ARMASM_NO_HW_CRYPTO) + defined(WOLFSSL_ARMASM_NO_HW_CRYPTO) || defined(WOLFSSL_AESGCM_STREAM) #if defined(GCM_SMALL) static void GMULT(byte* X, byte* Y) { @@ -10533,7 +10705,7 @@ int wc_AesGcmEncrypt(Aes* aes, byte* out, const byte* in, word32 sz, ret = AES_GCM_encrypt_ASM(aes, out, in, sz, iv, ivSz, authTag, authTagSz, authIn, authInSz); #else -#ifdef WOLFSSL_AESNI +#ifdef WC_AESNI_GCM if (aes->use_aesni) { #ifdef HAVE_INTEL_AVX2 if (IS_INTEL_AVX2(intel_flags)) { @@ -11145,7 +11317,7 @@ int wc_AesGcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz, const byte* authIn, word32 authInSz) { int ret; -#ifdef WOLFSSL_AESNI +#ifdef WC_AESNI_GCM int res = WC_NO_ERR_TRACE(AES_GCM_AUTH_E); #endif @@ -11291,7 +11463,7 @@ int wc_AesGcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz, authTagSz, authIn, authInSz); } #else -#ifdef WOLFSSL_AESNI +#ifdef WC_AESNI_GCM if (aes->use_aesni) { #ifdef HAVE_INTEL_AVX2 if (IS_INTEL_AVX2(intel_flags)) { @@ -11334,6 +11506,16 @@ int wc_AesGcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz, VECTOR_REGISTERS_POP; + /* FIPS 140-3 / SP 800-38D: on authentication failure, the decrypted-but- + * unauthenticated plaintext in `out` must not be released to the caller. + * Wipe it here so a caller that ignores the return value cannot observe + * plaintext derived from forged ciphertext. All software paths (AES-NI, + * AVX1/2, ARM HW/NEON, C fallback) funnel through `ret` here, so this + * single guard covers every sub-implementation. */ + if (ret == WC_NO_ERR_TRACE(AES_GCM_AUTH_E) && out != NULL && sz > 0) { + ForceZero(out, sz); + } + return ret; } #endif @@ -11502,7 +11684,7 @@ static WARN_UNUSED_RESULT int AesGcmFinal_C( return 0; } -#ifdef WOLFSSL_AESNI +#ifdef WC_AESNI_GCM #ifdef __cplusplus extern "C" { @@ -12705,7 +12887,7 @@ int wc_AesGcmInit(Aes* aes, const byte* key, word32 len, const byte* iv, if (iv != NULL) { /* Initialize with the IV. */ - #ifdef WOLFSSL_AESNI + #ifdef WC_AESNI_GCM if (aes->use_aesni) { SAVE_VECTOR_REGISTERS(return _svr_ret;); ret = AesGcmInit_aesni(aes, iv, ivSz); @@ -12832,7 +13014,7 @@ int wc_AesGcmEncryptUpdate(Aes* aes, byte* out, const byte* in, word32 sz, if (ret == 0) { /* Encrypt with AAD and/or plaintext. */ - #ifdef WOLFSSL_AESNI + #ifdef WC_AESNI_GCM if (aes->use_aesni) { SAVE_VECTOR_REGISTERS(return _svr_ret;); ret = AesGcmEncryptUpdate_aesni(aes, out, in, sz, authIn, authInSz); @@ -12892,7 +13074,7 @@ int wc_AesGcmEncryptFinal(Aes* aes, byte* authTag, word32 authTagSz) if (ret == 0) { /* Calculate authentication tag. */ - #ifdef WOLFSSL_AESNI + #ifdef WC_AESNI_GCM if (aes->use_aesni) { SAVE_VECTOR_REGISTERS(return _svr_ret;); ret = AesGcmEncryptFinal_aesni(aes, authTag, authTagSz); @@ -12976,7 +13158,7 @@ int wc_AesGcmDecryptUpdate(Aes* aes, byte* out, const byte* in, word32 sz, if (ret == 0) { /* Decrypt with AAD and/or cipher text. */ - #ifdef WOLFSSL_AESNI + #ifdef WC_AESNI_GCM if (aes->use_aesni) { SAVE_VECTOR_REGISTERS(return _svr_ret;); ret = AesGcmDecryptUpdate_aesni(aes, out, in, sz, authIn, authInSz); @@ -13034,7 +13216,7 @@ int wc_AesGcmDecryptFinal(Aes* aes, const byte* authTag, word32 authTagSz) if (ret == 0) { /* Calculate authentication tag and compare with one passed in.. */ - #ifdef WOLFSSL_AESNI + #ifdef WC_AESNI_GCM if (aes->use_aesni) { SAVE_VECTOR_REGISTERS(return _svr_ret;); ret = AesGcmDecryptFinal_aesni(aes, authTag, authTagSz); @@ -13061,6 +13243,10 @@ int wc_AesGcmDecryptFinal(Aes* aes, const byte* authTag, word32 authTagSz) } } + /* Streaming decrypt cannot zeroize prior Update output buffers from here + * (Final does not see them). On AES_GCM_AUTH_E, the caller is responsible + * for treating all Update-produced plaintext as invalid and wiping it. + * See PL-R34 Security Policy section 8 (Operational Rules). */ return ret; } #endif /* HAVE_AES_DECRYPT || HAVE_AESGCM_DECRYPT */ diff --git a/wolfcrypt/src/aes_asm.S b/wolfcrypt/src/aes_asm.S index 0371ca8cb22..3ee9983b20b 100644 --- a/wolfcrypt/src/aes_asm.S +++ b/wolfcrypt/src/aes_asm.S @@ -1831,11 +1831,16 @@ _AES_ECB_decrypt_AESNI: push %edi push %esi push %ebx - movl 20(%esp), %edi - movl 24(%esp), %esi - movl 28(%esp), %edx - movl 32(%esp), %ecx - movl 36(%esp), %eax + # 3 pushes above add 12 bytes; args (return addr at 0) are therefore at + # 16/20/24/28/32(%esp), matching AES_ECB_encrypt_AESNI. The original + # 32-bit decrypt used 20/24/28/32/36 (off by 4 -- offsets for 4 pushes), + # so it read the args shifted by one and dereferenced garbage -> segfault. + # This path was never exercised until 32-bit AES-NI was enabled 2026-06-17. + movl 16(%esp), %edi + movl 20(%esp), %esi + movl 24(%esp), %edx + movl 28(%esp), %ecx + movl 32(%esp), %eax movl %edx, %ebx diff --git a/wolfcrypt/src/aes_xts_asm.S b/wolfcrypt/src/aes_xts_asm.S index 09045c6d8f7..ded430c211d 100644 --- a/wolfcrypt/src/aes_xts_asm.S +++ b/wolfcrypt/src/aes_xts_asm.S @@ -2786,6 +2786,518 @@ L_AES_XTS_decrypt_update_avx1_done_dec: #endif /* __APPLE__ */ #endif /* HAVE_INTEL_AVX1 */ #endif /* WOLFSSL_X86_64_BUILD */ +#ifdef WOLFSSL_X86_BUILD +/* ========================================================================= + * 32-bit Intel (i386) AES-NI AES-XTS. Single-block ports of the x86_64 + * AES_XTS_*_aesni routines above: SAME algorithm and SAME KAT output, but using + * only xmm0-xmm7 and the i386 cdecl stack ABI. The x86_64 versions use xmm8-15 + * and r8-r15 (none available in 32-bit) and a 4-block pipeline; this port drops + * the pipeline and processes one block at a time, which is correct and still + * AES-NI-accelerated. Added 2026-06-17 so 32-bit Intel AES-NI builds + * (host_cpu=x86 -> WOLFSSL_X86_BUILD) link + run AES-XTS, matching x86_64. The + * x86_64 section above is byte-for-byte unchanged. + * + * The GF(2^128) constant {0x87,1,1,1} is materialized on the stack (PIC-safe; a + * 32-bit shared object would otherwise need a GOT relocation to reach .data). + * + * Conventions: %ebp = frame pointer, %edi = running byte offset. + * xmm0 = tweak T xmm1 = data block xmm2,xmm3 = round-key scratch + * xmm4 = GF scratch xmm5 = tweak' (decrypt CTS) xmm6 = GF const xmm7 = CTS buf + * ========================================================================= */ + +/* Encrypt the block in \blk with expanded key whose base is GP reg \k; the AES + * round count is read from memory operand \nr. Clobbers xmm2,xmm3. */ +.macro AESENC_BLK k, nr, blk + pxor (\k), \blk + movdqu 16(\k), %xmm2 + aesenc %xmm2, \blk + movdqu 32(\k), %xmm2 + aesenc %xmm2, \blk + movdqu 48(\k), %xmm2 + aesenc %xmm2, \blk + movdqu 64(\k), %xmm2 + aesenc %xmm2, \blk + movdqu 80(\k), %xmm2 + aesenc %xmm2, \blk + movdqu 96(\k), %xmm2 + aesenc %xmm2, \blk + movdqu 112(\k), %xmm2 + aesenc %xmm2, \blk + movdqu 128(\k), %xmm2 + aesenc %xmm2, \blk + movdqu 144(\k), %xmm2 + aesenc %xmm2, \blk + cmpl $11, \nr + movdqu 160(\k), %xmm2 + jl .L_encblk_last\@ + aesenc %xmm2, \blk + movdqu 176(\k), %xmm3 + aesenc %xmm3, \blk + cmpl $13, \nr + movdqu 192(\k), %xmm2 + jl .L_encblk_last\@ + aesenc %xmm2, \blk + movdqu 208(\k), %xmm3 + aesenc %xmm3, \blk + movdqu 224(\k), %xmm2 +.L_encblk_last\@: + aesenclast %xmm2, \blk +.endm + +/* Decrypt the block in \blk with inverse-cipher key base \k, rounds at \nr. */ +.macro AESDEC_BLK k, nr, blk + pxor (\k), \blk + movdqu 16(\k), %xmm2 + aesdec %xmm2, \blk + movdqu 32(\k), %xmm2 + aesdec %xmm2, \blk + movdqu 48(\k), %xmm2 + aesdec %xmm2, \blk + movdqu 64(\k), %xmm2 + aesdec %xmm2, \blk + movdqu 80(\k), %xmm2 + aesdec %xmm2, \blk + movdqu 96(\k), %xmm2 + aesdec %xmm2, \blk + movdqu 112(\k), %xmm2 + aesdec %xmm2, \blk + movdqu 128(\k), %xmm2 + aesdec %xmm2, \blk + movdqu 144(\k), %xmm2 + aesdec %xmm2, \blk + cmpl $11, \nr + movdqu 160(\k), %xmm2 + jl .L_decblk_last\@ + aesdec %xmm2, \blk + movdqu 176(\k), %xmm3 + aesdec %xmm3, \blk + cmpl $13, \nr + movdqu 192(\k), %xmm2 + jl .L_decblk_last\@ + aesdec %xmm2, \blk + movdqu 208(\k), %xmm3 + aesdec %xmm3, \blk + movdqu 224(\k), %xmm2 +.L_decblk_last\@: + aesdeclast %xmm2, \blk +.endm + +/* \dst = \src doubled in GF(2^128) (XTS tweak * alpha). xmm6 = {0x87,1,1,1}. */ +.macro TWEAKDBL dst, src + movdqa \src, %xmm4 +.ifnc \dst,\src + movdqa \src, \dst +.endif + psrad $31, %xmm4 + pslld $1, \dst + pshufd $0x93, %xmm4, %xmm4 + pand %xmm6, %xmm4 + pxor %xmm4, \dst +.endm + +/* Build {0x87,1,1,1} into xmm6 using the 16-byte stack scratch at (%esp). */ +.macro GF_CONST + movl $0x87, (%esp) + movl $1, 4(%esp) + movl $1, 8(%esp) + movl $1, 12(%esp) + movdqu (%esp), %xmm6 +.endm + +/* void AES_XTS_init_aesni(unsigned char* i, const unsigned char* tweak_key, + * int tweak_nr); */ +#ifndef __APPLE__ +.text +.globl AES_XTS_init_aesni +.type AES_XTS_init_aesni,@function +.align 16 +AES_XTS_init_aesni: +#else +.section __TEXT,__text +.globl _AES_XTS_init_aesni +.p2align 4 +_AES_XTS_init_aesni: +#endif /* __APPLE__ */ + push %ebp + movl %esp, %ebp + movl 8(%ebp), %eax /* i */ + movdqu (%eax), %xmm1 + movl 12(%ebp), %ecx /* tweak_key */ + AESENC_BLK %ecx, 16(%ebp), %xmm1 /* nr = tweak_nr */ + movl 8(%ebp), %eax + movdqu %xmm1, (%eax) + pop %ebp + ret +#ifndef __APPLE__ +.size AES_XTS_init_aesni,.-AES_XTS_init_aesni +#endif /* __APPLE__ */ + +/* void AES_XTS_encrypt_aesni(const unsigned char* in, unsigned char* out, + * word32 sz, const unsigned char* i, const unsigned char* key, + * const unsigned char* key2, int nr); + * args: 8=in 12=out 16=sz 20=i 24=key 28=key2 32=nr */ +#ifndef __APPLE__ +.text +.globl AES_XTS_encrypt_aesni +.type AES_XTS_encrypt_aesni,@function +.align 16 +AES_XTS_encrypt_aesni: +#else +.section __TEXT,__text +.globl _AES_XTS_encrypt_aesni +.p2align 4 +_AES_XTS_encrypt_aesni: +#endif /* __APPLE__ */ + push %ebp + movl %esp, %ebp + push %ebx + push %esi + push %edi + subl $32, %esp + GF_CONST + /* T = AES_key2(i) */ + movl 20(%ebp), %eax + movdqu (%eax), %xmm0 + movl 28(%ebp), %ecx + AESENC_BLK %ecx, 32(%ebp), %xmm0 + xorl %edi, %edi /* offset */ + movl 16(%ebp), %edx + andl $0xfffffff0, %edx /* full = sz & ~15 (encrypt all full blocks) */ +.L_xe_loop: + cmpl %edx, %edi + jge .L_xe_loop_done + movl 8(%ebp), %eax + movdqu (%eax,%edi), %xmm1 + pxor %xmm0, %xmm1 + movl 24(%ebp), %ecx + AESENC_BLK %ecx, 32(%ebp), %xmm1 + pxor %xmm0, %xmm1 + movl 12(%ebp), %eax + movdqu %xmm1, (%eax,%edi) + TWEAKDBL %xmm0, %xmm0 + addl $16, %edi + jmp .L_xe_loop +.L_xe_loop_done: + movl 16(%ebp), %eax + cmpl %eax, %edi + je .L_xe_done /* exact multiple of 16 -> no CTS */ + /* ciphertext stealing: steal from the last full ciphertext block */ + subl $16, %edi + movl 12(%ebp), %eax + movdqu (%eax,%edi), %xmm7 + addl $16, %edi + movdqu %xmm7, (%esp) /* buf = C_{m-1} */ + xorl %edx, %edx /* j */ +.L_xe_cts: + movzbl (%esp,%edx), %ecx /* tmp1 = buf[j] */ + movl 8(%ebp), %esi + movzbl (%esi,%edi), %ebx /* tmp2 = in[off] */ + movl 12(%ebp), %esi + movb %cl, (%esi,%edi) /* out[off] = tmp1 */ + movb %bl, (%esp,%edx) /* buf[j] = tmp2 */ + incl %edi + incl %edx + movl 16(%ebp), %eax + cmpl %eax, %edi + jl .L_xe_cts + subl %edx, %edi /* off = m*16 */ + movdqu (%esp), %xmm1 + subl $16, %edi /* off = (m-1)*16 */ + pxor %xmm0, %xmm1 + movl 24(%ebp), %ecx + AESENC_BLK %ecx, 32(%ebp), %xmm1 + pxor %xmm0, %xmm1 + movl 12(%ebp), %eax + movdqu %xmm1, (%eax,%edi) +.L_xe_done: + addl $32, %esp + pop %edi + pop %esi + pop %ebx + pop %ebp + ret +#ifndef __APPLE__ +.size AES_XTS_encrypt_aesni,.-AES_XTS_encrypt_aesni +#endif /* __APPLE__ */ + +/* void AES_XTS_encrypt_update_aesni(const unsigned char* in, unsigned char* out, + * word32 sz, const unsigned char* key, unsigned char* i, int nr); + * args: 8=in 12=out 16=sz 20=key 24=i 28=nr. Tweak is read from *i (already + * encrypted) and the advanced tweak written back to *i. */ +#ifndef __APPLE__ +.text +.globl AES_XTS_encrypt_update_aesni +.type AES_XTS_encrypt_update_aesni,@function +.align 16 +AES_XTS_encrypt_update_aesni: +#else +.section __TEXT,__text +.globl _AES_XTS_encrypt_update_aesni +.p2align 4 +_AES_XTS_encrypt_update_aesni: +#endif /* __APPLE__ */ + push %ebp + movl %esp, %ebp + push %ebx + push %esi + push %edi + subl $32, %esp + GF_CONST + movl 24(%ebp), %eax /* i (tweak, already encrypted) */ + movdqu (%eax), %xmm0 + xorl %edi, %edi + movl 16(%ebp), %edx + andl $0xfffffff0, %edx +.L_xeu_loop: + cmpl %edx, %edi + jge .L_xeu_loop_done + movl 8(%ebp), %eax + movdqu (%eax,%edi), %xmm1 + pxor %xmm0, %xmm1 + movl 20(%ebp), %ecx + AESENC_BLK %ecx, 28(%ebp), %xmm1 + pxor %xmm0, %xmm1 + movl 12(%ebp), %eax + movdqu %xmm1, (%eax,%edi) + TWEAKDBL %xmm0, %xmm0 + addl $16, %edi + jmp .L_xeu_loop +.L_xeu_loop_done: + movl 16(%ebp), %eax + cmpl %eax, %edi + je .L_xeu_done + subl $16, %edi + movl 12(%ebp), %eax + movdqu (%eax,%edi), %xmm7 + addl $16, %edi + movdqu %xmm7, (%esp) + xorl %edx, %edx +.L_xeu_cts: + movzbl (%esp,%edx), %ecx + movl 8(%ebp), %esi + movzbl (%esi,%edi), %ebx + movl 12(%ebp), %esi + movb %cl, (%esi,%edi) + movb %bl, (%esp,%edx) + incl %edi + incl %edx + movl 16(%ebp), %eax + cmpl %eax, %edi + jl .L_xeu_cts + subl %edx, %edi + movdqu (%esp), %xmm1 + subl $16, %edi + pxor %xmm0, %xmm1 + movl 20(%ebp), %ecx + AESENC_BLK %ecx, 28(%ebp), %xmm1 + pxor %xmm0, %xmm1 + movl 12(%ebp), %eax + movdqu %xmm1, (%eax,%edi) +.L_xeu_done: + movl 24(%ebp), %eax /* write advanced tweak back to *i */ + movdqu %xmm0, (%eax) + addl $32, %esp + pop %edi + pop %esi + pop %ebx + pop %ebp + ret +#ifndef __APPLE__ +.size AES_XTS_encrypt_update_aesni,.-AES_XTS_encrypt_update_aesni +#endif /* __APPLE__ */ + +/* void AES_XTS_decrypt_aesni(const unsigned char* in, unsigned char* out, + * word32 sz, const unsigned char* i, const unsigned char* key, + * const unsigned char* key2, int nr); + * args: 8=in 12=out 16=sz 20=i 24=key 28=key2 32=nr */ +#ifndef __APPLE__ +.text +.globl AES_XTS_decrypt_aesni +.type AES_XTS_decrypt_aesni,@function +.align 16 +AES_XTS_decrypt_aesni: +#else +.section __TEXT,__text +.globl _AES_XTS_decrypt_aesni +.p2align 4 +_AES_XTS_decrypt_aesni: +#endif /* __APPLE__ */ + push %ebp + movl %esp, %ebp + push %ebx + push %esi + push %edi + subl $32, %esp + GF_CONST + /* T = AES_key2(i) (tweak is ENCRYPTED, even for decrypt) */ + movl 20(%ebp), %eax + movdqu (%eax), %xmm0 + movl 28(%ebp), %ecx + AESENC_BLK %ecx, 32(%ebp), %xmm0 + xorl %edi, %edi + movl 16(%ebp), %eax + movl %eax, %edx + andl $0xfffffff0, %edx /* full */ + cmpl %eax, %edx + je .L_xd_bound /* exact multiple -> bound = full */ + subl $16, %edx /* partial -> reserve last full block */ +.L_xd_bound: +.L_xd_loop: + cmpl %edx, %edi + jge .L_xd_loop_done + movl 8(%ebp), %eax + movdqu (%eax,%edi), %xmm1 + pxor %xmm0, %xmm1 + movl 24(%ebp), %ecx + AESDEC_BLK %ecx, 32(%ebp), %xmm1 + pxor %xmm0, %xmm1 + movl 12(%ebp), %eax + movdqu %xmm1, (%eax,%edi) + TWEAKDBL %xmm0, %xmm0 + addl $16, %edi + jmp .L_xd_loop +.L_xd_loop_done: + movl 16(%ebp), %eax + cmpl %eax, %edi + je .L_xd_done /* no partial */ + /* last_31 CTS: T' = 2T (xmm5); decrypt last full block with T' */ + TWEAKDBL %xmm5, %xmm0 + movl 8(%ebp), %eax + movdqu (%eax,%edi), %xmm1 + pxor %xmm5, %xmm1 + movl 24(%ebp), %ecx + AESDEC_BLK %ecx, 32(%ebp), %xmm1 + pxor %xmm5, %xmm1 + movdqu %xmm1, (%esp) /* buf */ + addl $16, %edi + xorl %edx, %edx +.L_xd_cts: + movzbl (%esp,%edx), %ecx + movl 8(%ebp), %esi + movzbl (%esi,%edi), %ebx + movl 12(%ebp), %esi + movb %cl, (%esi,%edi) + movb %bl, (%esp,%edx) + incl %edi + incl %edx + movl 16(%ebp), %eax + cmpl %eax, %edi + jl .L_xd_cts + subl %edx, %edi /* off = m*16 */ + movdqu (%esp), %xmm1 + pxor %xmm0, %xmm1 /* ^ T (current) */ + movl 24(%ebp), %ecx + AESDEC_BLK %ecx, 32(%ebp), %xmm1 + pxor %xmm0, %xmm1 + subl $16, %edi /* off = (m-1)*16 */ + movl 12(%ebp), %eax + movdqu %xmm1, (%eax,%edi) +.L_xd_done: + addl $32, %esp + pop %edi + pop %esi + pop %ebx + pop %ebp + ret +#ifndef __APPLE__ +.size AES_XTS_decrypt_aesni,.-AES_XTS_decrypt_aesni +#endif /* __APPLE__ */ + +/* void AES_XTS_decrypt_update_aesni(const unsigned char* in, unsigned char* out, + * word32 sz, const unsigned char* key, unsigned char* i, int nr); + * args: 8=in 12=out 16=sz 20=key 24=i 28=nr. Tweak from *i, advanced back to *i. */ +#ifndef __APPLE__ +.text +.globl AES_XTS_decrypt_update_aesni +.type AES_XTS_decrypt_update_aesni,@function +.align 16 +AES_XTS_decrypt_update_aesni: +#else +.section __TEXT,__text +.globl _AES_XTS_decrypt_update_aesni +.p2align 4 +_AES_XTS_decrypt_update_aesni: +#endif /* __APPLE__ */ + push %ebp + movl %esp, %ebp + push %ebx + push %esi + push %edi + subl $32, %esp + GF_CONST + movl 24(%ebp), %eax /* i (tweak) */ + movdqu (%eax), %xmm0 + xorl %edi, %edi + movl 16(%ebp), %eax + movl %eax, %edx + andl $0xfffffff0, %edx + cmpl %eax, %edx + je .L_xdu_bound + subl $16, %edx +.L_xdu_bound: +.L_xdu_loop: + cmpl %edx, %edi + jge .L_xdu_loop_done + movl 8(%ebp), %eax + movdqu (%eax,%edi), %xmm1 + pxor %xmm0, %xmm1 + movl 20(%ebp), %ecx + AESDEC_BLK %ecx, 28(%ebp), %xmm1 + pxor %xmm0, %xmm1 + movl 12(%ebp), %eax + movdqu %xmm1, (%eax,%edi) + TWEAKDBL %xmm0, %xmm0 + addl $16, %edi + jmp .L_xdu_loop +.L_xdu_loop_done: + movl 16(%ebp), %eax + cmpl %eax, %edi + je .L_xdu_done + TWEAKDBL %xmm5, %xmm0 + movl 8(%ebp), %eax + movdqu (%eax,%edi), %xmm1 + pxor %xmm5, %xmm1 + movl 20(%ebp), %ecx + AESDEC_BLK %ecx, 28(%ebp), %xmm1 + pxor %xmm5, %xmm1 + movdqu %xmm1, (%esp) + addl $16, %edi + xorl %edx, %edx +.L_xdu_cts: + movzbl (%esp,%edx), %ecx + movl 8(%ebp), %esi + movzbl (%esi,%edi), %ebx + movl 12(%ebp), %esi + movb %cl, (%esi,%edi) + movb %bl, (%esp,%edx) + incl %edi + incl %edx + movl 16(%ebp), %eax + cmpl %eax, %edi + jl .L_xdu_cts + subl %edx, %edi + movdqu (%esp), %xmm1 + pxor %xmm0, %xmm1 + movl 20(%ebp), %ecx + AESDEC_BLK %ecx, 28(%ebp), %xmm1 + pxor %xmm0, %xmm1 + subl $16, %edi + movl 12(%ebp), %eax + movdqu %xmm1, (%eax,%edi) +.L_xdu_done: + movl 24(%ebp), %eax + movdqu %xmm0, (%eax) + addl $32, %esp + pop %edi + pop %esi + pop %ebx + pop %ebp + ret +#ifndef __APPLE__ +.size AES_XTS_decrypt_update_aesni,.-AES_XTS_decrypt_update_aesni +#endif /* __APPLE__ */ + +#endif /* WOLFSSL_X86_BUILD */ #endif /* WOLFSSL_AES_XTS */ #if defined(__linux__) && defined(__ELF__) diff --git a/wolfcrypt/src/cpuid.c b/wolfcrypt/src/cpuid.c index 2c3670234a6..6b5335ad47d 100644 --- a/wolfcrypt/src/cpuid.c +++ b/wolfcrypt/src/cpuid.c @@ -75,6 +75,22 @@ #define cpuid(a,b,c) __cpuidex((int*)a,b,c) #endif /* _MSC_VER */ + /* On the 32-bit x86 Linux kernel (WOLFSSL_LINUXKM + WOLFSSL_X86_BUILD), the + * UAPI header -- pulled in transitively via + * processor.h -> math_emu.h -> ptrace.h on i386 only -- already #defines + * EAX/EBX/ECX/EDX as ptrace register *indices* with DIFFERENT values + * (EAX=6, EBX=0, ECX=1, EDX=2). We use these names as cpuid()-result + * array indices (0..3), so the collision is a real bug, not a cosmetic + * -Werror redefinition: leaving the kernel's values in place would index + * reg[6] (past "unsigned int reg[5]") and mis-compare the vendor string. + * #undef first so our indices win. This is a no-op where the names are + * not predefined (x86_64 kernel uses a processor.h chain without + * math_emu.h, and all user-space builds), so generated code off + * i386-kernel is byte-identical. */ + #undef EAX + #undef EBX + #undef ECX + #undef EDX #define EAX 0 #define EBX 1 #define ECX 2 diff --git a/wolfcrypt/src/dh.c b/wolfcrypt/src/dh.c index 3e9abe3d869..378c5544b72 100644 --- a/wolfcrypt/src/dh.c +++ b/wolfcrypt/src/dh.c @@ -1422,8 +1422,20 @@ int wc_DhGeneratePublic(DhKey* key, byte* priv, word32 privSz, #if FIPS_VERSION_GE(5,0) || defined(WOLFSSL_VALIDATE_DH_KEYGEN) if (ret == 0) ret = _ffc_validate_public_key(key, pub, *pubSz, NULL, 0, 0); - if (ret == 0) - ret = _ffc_pairwise_consistency_test(key, pub, *pubSz, priv, privSz); + if (ret == 0) { + /* Pairwise Consistency Test per SP 800-56A r3 sec 5.6.2.1.4 + * (FFC key pair). FIPS 140-3 IG 10.3.B requires a PCT after + * KeyGen for key-establishment algorithms; on failure under a + * FIPS build the error is remapped to DH_PCT_E so the FIPS + * module's DEGRADE_STATE handler transitions FIPS_CAST_DH_ + * PRIMITIVE_Z to the error state. */ + ret = _ffc_pairwise_consistency_test(key, pub, *pubSz, priv, + privSz); + #ifdef HAVE_FIPS + if (ret != 0) + ret = DH_PCT_E; + #endif + } #endif /* FIPS V5 or later || WOLFSSL_VALIDATE_DH_KEYGEN */ return ret; @@ -1446,8 +1458,20 @@ static int wc_DhGenerateKeyPair_Sync(DhKey* key, WC_RNG* rng, #if FIPS_VERSION_GE(5,0) || defined(WOLFSSL_VALIDATE_DH_KEYGEN) if (ret == 0) ret = _ffc_validate_public_key(key, pub, *pubSz, NULL, 0, 0); - if (ret == 0) - ret = _ffc_pairwise_consistency_test(key, pub, *pubSz, priv, *privSz); + if (ret == 0) { + /* Pairwise Consistency Test per SP 800-56A r3 sec 5.6.2.1.4 + * (FFC key pair). FIPS 140-3 IG 10.3.B requires a PCT after + * KeyGen for key-establishment algorithms; on failure under a + * FIPS build the error is remapped to DH_PCT_E so the FIPS + * module's DEGRADE_STATE handler transitions FIPS_CAST_DH_ + * PRIMITIVE_Z to the error state. */ + ret = _ffc_pairwise_consistency_test(key, pub, *pubSz, priv, + *privSz); + #ifdef HAVE_FIPS + if (ret != 0) + ret = DH_PCT_E; + #endif + } #endif /* FIPS V5 or later || WOLFSSL_VALIDATE_DH_KEYGEN */ return ret; diff --git a/wolfcrypt/src/error.c b/wolfcrypt/src/error.c index 0f70a84cc8b..be0acd033d3 100644 --- a/wolfcrypt/src/error.c +++ b/wolfcrypt/src/error.c @@ -692,6 +692,21 @@ const char* wc_GetErrorString(int error) case SLH_DSA_KAT_FIPS_E: return "SLH-DSA Known Answer Test check FIPS error"; + case SLH_DSA_PCT_E: + return "wolfcrypt SLH-DSA Pairwise Consistency Test Failure"; + + case CMAC_KAT_FIPS_E: + return "AES-CMAC Known Answer Test FIPS error"; + + case SHAKE_KAT_FIPS_E: + return "SHAKE Known Answer Test FIPS error"; + + case DH_PCT_E: + return "wolfcrypt DH (FFC) Pairwise Consistency Test Failure"; + + case AES_KW_KAT_FIPS_E: + return "AES-KW Known Answer Test FIPS error"; + case SEQ_OVERFLOW_E: return "Sequence counter would overflow"; diff --git a/wolfcrypt/src/ge_operations.c b/wolfcrypt/src/ge_operations.c index 5d3157628d2..2fbe29241bd 100644 --- a/wolfcrypt/src/ge_operations.c +++ b/wolfcrypt/src/ge_operations.c @@ -10196,9 +10196,14 @@ void ge_tobytes_nct(unsigned char *s,const ge_p2 *h) /* if HAVE_ED25519 but not HAVE_CURVE25519, and an asm implementation is built, * then curve25519() won't get its WOLFSSL_LOCAL attribute unless we dummy-call * it here. - */ + * This assumes the asm port still emits curve25519() when X25519 is disabled -- + * true for the x86 and 64-bit ARM (armv8-curve25519) ports, but NOT for the + * 32-bit ARM port (armv8-32-curve25519 gates curve25519() on HAVE_CURVE25519), + * so the dummy-call would reference an undefined symbol there. Exclude + * arm32 armasm (RFC 7748 / SP 800-186 X25519). */ #if defined(CURVED25519_ASM) && defined(WOLFSSL_API_PREFIX_MAP) && \ - !defined(HAVE_CURVE25519) && !defined(FREESCALE_LTC_ECC) + !defined(HAVE_CURVE25519) && !defined(FREESCALE_LTC_ECC) && \ + (!defined(WOLFSSL_ARMASM) || defined(__aarch64__)) WOLFSSL_LOCAL void _wc_curve25519_dummy(void); WOLFSSL_LOCAL void _wc_curve25519_dummy(void) { (void)curve25519((byte *)0, (byte *)0, (const byte *)0); diff --git a/wolfcrypt/src/port/arm/armv8-32-sha3-asm.S b/wolfcrypt/src/port/arm/armv8-32-sha3-asm.S index 6d2f0172994..128e2e91997 100644 --- a/wolfcrypt/src/port/arm/armv8-32-sha3-asm.S +++ b/wolfcrypt/src/port/arm/armv8-32-sha3-asm.S @@ -27,6 +27,16 @@ #include +/* Honor WC_SHA3_NO_ASM exactly as sha3.c does: when the build forces the C SHA-3 + * (e.g. every Linux kernel-module build sets -DWC_SHA3_NO_ASM via KERNEL_MODE_DEFAULTS), + * this NEON BlockSha3 must NOT be emitted -- otherwise on 32-bit ARM it provides a + * second BlockSha3 that multiply-defines against sha3.c's C BlockSha3 at link time. + * (arm64's sha3 asm is gated on WOLFSSL_ARMASM_CRYPTO_SHA3 (default off) so it never + * hit this; the arm32 NEON path is only gated on WOLFSSL_ARMASM_NO_NEON.) FIPS 202. */ +#ifdef WC_SHA3_NO_ASM + #undef WOLFSSL_ARMASM +#endif + #ifdef WOLFSSL_ARMASM #if !defined(__aarch64__) && !defined(WOLFSSL_ARMASM_THUMB2) #ifndef WOLFSSL_ARMASM_INLINE diff --git a/wolfcrypt/src/random.c b/wolfcrypt/src/random.c index 609e7fb6b45..f51d376fab6 100644 --- a/wolfcrypt/src/random.c +++ b/wolfcrypt/src/random.c @@ -416,6 +416,24 @@ int wc_DrbgState_MutexFree(void) static int LockDrbgState(void) { #ifndef SINGLE_THREADED +#ifndef WOLFSSL_MUTEX_INITIALIZER + /* Platforms lacking a static mutex initializer (e.g. Windows + * CRITICAL_SECTION) must initialize drbgStateMutex at run time. + * wolfCrypt_Init() does so, but the FIPS 140-3 module's pre-operational + * self test exercises the DRBG -- and therefore this lock -- from a + * load-time constructor that runs before any wolfCrypt_Init() call. + * Locking an uninitialized CRITICAL_SECTION is undefined behavior: it + * tolerates the single-threaded POST but faults (0xC0000005) on the + * degraded-mode CAST re-run. Initialize on demand here + * (wc_DrbgState_MutexInit() is idempotent); the first lock is the + * single-threaded POST, so this is race-free. The mutex guards the + * SP 800-90A DRBG enable/disable state, which must stay consistent. */ + { + int initRet = wc_DrbgState_MutexInit(); + if (initRet != 0) + return initRet; + } +#endif return wc_LockMutex(&drbgStateMutex); #else return 0; @@ -3617,23 +3635,90 @@ int wc_FreeNetRandom(void) #if defined(HAVE_INTEL_RDRAND) || defined(HAVE_INTEL_RDSEED) || \ defined(HAVE_AMD_RDSEED) -#ifdef WOLFSSL_ASYNC_CRYPT - /* need more retries if multiple cores */ - #define INTELRD_RETRY (32 * 8) -#else - #define INTELRD_RETRY 32 +/* INTELRD_RETRY bounds the RDRAND/RDSEED retry loop (IntelRDseed64_r / + * IntelRDrand* below). RDSEED legitimately sets CF=0 (failure) when the on-chip + * conditioned entropy has not been replenished since the last read; per Intel's + * DRNG implementation guidance software must retry. Overridable via -D for OEs + * that need a different budget. */ +#ifndef INTELRD_RETRY + #if defined(WOLFSSL_LINUXKM) + /* Linux-kernel-module: the boot-time FIPS CASTs poll RDSEED during + * module_init (wc_RunAllCast_fips -> ECDSA CAST -> wc_InitRng -> + * wc_GenerateSeed) while the platform RNG is warming up and the hardware + * RDSEED source is heavily contended -- under virtualization especially, + * where the guest's RDSEED is funneled to the (busy) host CPU. RDSEED + * then returns CF=0 far more than the 32-retry userspace default + * tolerates, which made --enable-amdrdseed / --enable-intelrdseed kernel + * modules fail the ECDSA CAST and refuse to load. Use a large retry + * budget: it is a ceiling, not a fixed cost -- RDSEED succeeds in ~1 read + * once entropy is available, so post-boot use is unaffected. */ + #define INTELRD_RETRY 100000 + #elif defined(WOLFSSL_ASYNC_CRYPT) + /* need more retries if multiple cores */ + #define INTELRD_RETRY (32 * 8) + #else + #define INTELRD_RETRY 32 + #endif #endif #if defined(HAVE_INTEL_RDSEED) || defined(HAVE_AMD_RDSEED) +/* Vendor tag used by the optional FIPS_CODE_REVIEW evidence prints below. + * Both the Intel RDSEED and AMD RDSEED entropy sources funnel through the same + * primitive (the x86 RDSEED instruction is implemented by both vendors); only + * the underlying CPU vendor differs. At compile time exactly one of + * HAVE_INTEL_RDSEED / HAVE_AMD_RDSEED is expected to be set for a given OE, so + * this string resolves unambiguously per build. */ +#if defined(HAVE_AMD_RDSEED) +#define WC_RDSEED_VENDOR "AMD" +#else +#define WC_RDSEED_VENDOR "Intel" +#endif + #ifndef USE_INTEL_INTRINSICS - /* return 0 on success */ + /* return 0 on success. + * + * E27 Public Use Document (NIST CMVP entropy source disclosure) explicitly + * documents that wolfSSL polls the x86 CF (Carry Flag) to determine + * whether a given RDSEED invocation delivered a usable conditioned entropy + * sample. The rdseed instruction semantics are: + * CF = 1 (set) -> destination register holds 64 bits of conditioned + * entropy on this cycle; sample is fresh and usable. + * CF = 0 (clear) -> hardware seed pool was empty this cycle; the + * destination register MUST NOT be used; caller is + * required to retry (handled by IntelRDseed64_r + * below, which loops up to INTELRD_RETRY times). + * The "setc %1" mnemonic materialises CF into the byte-sized output bound + * to (ok); the "=qm" output constraint constrains that byte to a q-class + * register (one that has an addressable low byte) so setc can target it. */ static WC_INLINE int IntelRDseed64(word64* seed) { unsigned char ok; __asm__ volatile("rdseed %0; setc %1":"=r"(*seed), "=qm"(ok)); +#ifdef FIPS_CODE_REVIEW + /* One-shot tracer: confirm this path is alive on the first call. + * After that go silent so the per-chunk RDSEED prints don't drown + * the optest sanity-log (RDSEED is invoked per 64-bit chunk for + * every DRBG instantiate/reseed; flooding the log obscures real + * signal). Per-request entropy volume is still surfaced by the + * outer wc_GenerateSeed_IntelRD print below. */ + { + static int printed_asm = 0; + if (!printed_asm) { + printed_asm = 1; + printf("FIPS_CODE_REVIEW IntelRDseed64 [asm path, %s] " + "(one-shot): delivered %u bits, CF=%u\n", + WC_RDSEED_VENDOR, (unsigned)(sizeof(word64) * 8u), + (unsigned)ok); + } + } +#endif + /* Ternary: CF set (ok != 0) -> 64 bits of conditioned entropy + * captured in *seed, return 0 (success). CF clear (ok == 0) -> sample + * invalid, return -1 so the retry wrapper IntelRDseed64_r() tries + * again. */ return (ok) ? 0 : -1; } @@ -3641,7 +3726,14 @@ int wc_FreeNetRandom(void) /* The compiler Visual Studio uses does not allow inline assembly. * It does allow for Intel intrinsic functions. */ - /* return 0 on success */ + /* return 0 on success. + * + * E27 PUD (NIST CMVP) cited path: _rdseed64_step is the compiler intrinsic + * front-end for the same RDSEED instruction documented in the asm path + * above. The intrinsic returns 1 when CF was set by the underlying RDSEED + * (i.e. the 64-bit conditioned entropy sample in *seed is valid this + * cycle) and 0 when CF was clear (caller MUST retry; *seed MUST NOT be + * consumed). */ # ifdef __GNUC__ __attribute__((target("rdseed"))) # endif @@ -3650,6 +3742,23 @@ int wc_FreeNetRandom(void) int ok; ok = _rdseed64_step((unsigned long long*) seed); +#ifdef FIPS_CODE_REVIEW + /* One-shot tracer; see asm-path comment above for rationale. */ + { + static int printed_intrinsic = 0; + if (!printed_intrinsic) { + printed_intrinsic = 1; + printf("FIPS_CODE_REVIEW IntelRDseed64 [intrinsic path, %s] " + "(one-shot): delivered %u bits, " + "intrinsic_ret=%d (== CF)\n", + WC_RDSEED_VENDOR, (unsigned)(sizeof(word64) * 8u), ok); + } + } +#endif + /* intrinsic_ret == 1 -> CF was set, 64 bits of conditioned entropy + * captured in *seed; return 0 to signal success to the retry wrapper. + * intrinsic_ret == 0 -> CF was clear; return -1 so the retry wrapper + * re-attempts. */ return (ok) ? 0 : -1; } @@ -3662,6 +3771,12 @@ static WC_INLINE int IntelRDseed64_r(word64* rnd) for (i = 0; i < INTELRD_RETRY; i++) { if (IntelRDseed64(rnd) == 0) return 0; + /* Give the hardware entropy source a chance to replenish between + * attempts (Intel DRNG guidance) and yield the CPU when it is safe to + * block. WC_RELAX_LONG_LOOP() is a no-op where blocking is unsafe, so + * this only ever helps -- e.g. it lets other work (and the entropy + * conditioner) run during a long boot-time RDSEED starvation. */ + WC_RELAX_LONG_LOOP(); } return -1; } @@ -3675,6 +3790,19 @@ static int wc_GenerateSeed_IntelRD(OS_Seed* os, byte* output, word32 sz) (void)os; +#ifdef FIPS_CODE_REVIEW + /* Each conditioned entropy sample produced by IntelRDseed64() is 64 bits + * wide. This entry-level trace makes the per-request entropy volume + * obvious in evidence logs: sz bytes requested -> ceil(sz/8) RDSEED + * invocations expected (plus the two-or-three sanity-status reads on the + * first ever call into this function). */ + printf("FIPS_CODE_REVIEW wc_GenerateSeed_IntelRD [%s]: " + "requested %u bytes = %u bits " + "(expect %u RDSEED 64-bit samples)\n", + WC_RDSEED_VENDOR, (unsigned)sz, (unsigned)(sz * 8u), + (unsigned)((sz + sizeof(word64) - 1u) / sizeof(word64))); +#endif + if (!IS_INTEL_RDSEED(intel_flags)) return -1; diff --git a/wolfcrypt/src/rsa.c b/wolfcrypt/src/rsa.c index 0a6b6143a7a..d313f81ac95 100644 --- a/wolfcrypt/src/rsa.c +++ b/wolfcrypt/src/rsa.c @@ -5153,9 +5153,15 @@ static WC_INLINE int RsaSizeCheck(int size) } #ifdef HAVE_FIPS - /* Key size requirements for CAVP */ + /* Approved RSA key sizes per FIPS 186-5 sec 5.1 and NIST SP 800-131Ar2 + * sec 4 Table 2 (Asymmetric Key Establishment) - 2048, 3072, 4096 only. + * 1024-bit RSA was deprecated for FIPS-Approved key generation by + * SP 800-131Ar2 effective 2014-01-01 and is disallowed thereafter. The + * outer wc_MakeRsaKey_fips wrapper already gates on WC_RSA_FIPS_GEN_MIN, + * but RsaSizeCheck itself is reached by library-internal paths that do + * not pass through that wrapper - defense-in-depth removal here closes + * the gap. */ switch (size) { - case 1024: case 2048: case 3072: case 4096: @@ -5415,6 +5421,20 @@ int wc_MakeRsaKey(RsaKey* key, int size, long e, WC_RNG* rng) goto out; } +#ifdef HAVE_FIPS + /* FIPS 186-5 sec 5.2 (Public Verification Exponent e): 2^16 + 1 <= e < + * 2^256 and e odd. The general non-FIPS check above accepts e >= 3 odd; + * the FIPS Approved range is narrower. e is a long here so the upper + * bound 2^256 is structurally satisfied on any LP64 / LLP64 platform + * (long is at most 64 bits), but the lower bound 65537 must be enforced + * explicitly. Defense-in-depth even though FIPS application code + * conventionally passes e = 65537 (RSA_F4). */ + if (e < 65537L) { + err = BAD_FUNC_ARG; + goto out; + } +#endif + #if defined(WOLFSSL_CRYPTOCELL) err = cc310_RSA_GenerateKeyPair(key, size, e); goto out; diff --git a/wolfcrypt/src/sha256.c b/wolfcrypt/src/sha256.c index 047c57dade8..ddb6b304fa8 100644 --- a/wolfcrypt/src/sha256.c +++ b/wolfcrypt/src/sha256.c @@ -1179,14 +1179,32 @@ int wc_InitSha256_ex(wc_Sha256* sha256, void* heap, int devId) return ret; } +/* The 32-bit ARM SHA-256 NEON/crypto-extension transforms use vector registers, + * so in a Linux kernel module they MUST be bracketed with SAVE/RESTORE_VECTOR_ + * REGISTERS (kernel_neon_begin/end) or the first SIMD instruction faults with + * "undefined instruction". Scoped to !__aarch64__ so the aarch64 path is + * unchanged. (FIPS 180-4 SHA-256.) */ +#if defined(WOLFSSL_USE_SAVE_VECTOR_REGISTERS) && !defined(__aarch64__) && \ + !defined(WOLFSSL_ARMASM_THUMB2) && !defined(WOLFSSL_ARMASM_NO_NEON) + #define WC_SHA256_ARM_SVR_BEGIN(fail) SAVE_VECTOR_REGISTERS(fail) + #define WC_SHA256_ARM_SVR_END() RESTORE_VECTOR_REGISTERS() +#else + #define WC_SHA256_ARM_SVR_BEGIN(fail) WC_DO_NOTHING + #define WC_SHA256_ARM_SVR_END() WC_DO_NOTHING +#endif + static WC_INLINE int Transform_Sha256(wc_Sha256* sha256, const byte* data) { #if defined(WOLFSSL_ARMASM_THUMB2) || defined(WOLFSSL_ARMASM_NO_NEON) Transform_Sha256_Len_base(sha256, data, WC_SHA256_BLOCK_SIZE); -#elif defined(WOLFSSL_ARMASM_NO_HW_CRYPTO) - Transform_Sha256_Len_neon(sha256, data, WC_SHA256_BLOCK_SIZE); #else + WC_SHA256_ARM_SVR_BEGIN(return _svr_ret;); + #if defined(WOLFSSL_ARMASM_NO_HW_CRYPTO) + Transform_Sha256_Len_neon(sha256, data, WC_SHA256_BLOCK_SIZE); + #else Transform_Sha256_Len_crypto(sha256, data, WC_SHA256_BLOCK_SIZE); + #endif + WC_SHA256_ARM_SVR_END(); #endif return 0; } @@ -1196,10 +1214,14 @@ static WC_INLINE int Transform_Sha256_Len(wc_Sha256* sha256, const byte* data, { #if defined(WOLFSSL_ARMASM_THUMB2) || defined(WOLFSSL_ARMASM_NO_NEON) Transform_Sha256_Len_base(sha256, data, len); -#elif defined(WOLFSSL_ARMASM_NO_HW_CRYPTO) - Transform_Sha256_Len_neon(sha256, data, len); #else + WC_SHA256_ARM_SVR_BEGIN(return _svr_ret;); + #if defined(WOLFSSL_ARMASM_NO_HW_CRYPTO) + Transform_Sha256_Len_neon(sha256, data, len); + #else Transform_Sha256_Len_crypto(sha256, data, len); + #endif + WC_SHA256_ARM_SVR_END(); #endif return 0; } diff --git a/wolfcrypt/src/sha512.c b/wolfcrypt/src/sha512.c index b2f57b13b86..009f538c44b 100644 --- a/wolfcrypt/src/sha512.c +++ b/wolfcrypt/src/sha512.c @@ -1455,15 +1455,32 @@ static void (*Transform_Sha512_p)(wc_Sha512* sha512, const byte* data) = NULL; static void (*Transform_Sha512_Len_p)(wc_Sha512* sha512, const byte* data, word32 len) = NULL; +/* The 32-bit ARM SHA-512 NEON transform uses vector registers, so in a Linux + * kernel module the asm calls below MUST be bracketed by SAVE/RESTORE_VECTOR_ + * REGISTERS (kernel_neon_begin/end) or the first NEON instruction faults with + * "undefined instruction". Scoped to !__aarch64__ so the aarch64 path (and the + * THUMB2/NO_NEON base path) is unchanged. (FIPS 180-4 SHA-512.) */ +#if defined(WOLFSSL_USE_SAVE_VECTOR_REGISTERS) && !defined(__aarch64__) && \ + !defined(WOLFSSL_ARMASM_THUMB2) && !defined(WOLFSSL_ARMASM_NO_NEON) + #define WC_SHA512_ARM_SVR_BEGIN(fail) SAVE_VECTOR_REGISTERS(fail) + #define WC_SHA512_ARM_SVR_END() RESTORE_VECTOR_REGISTERS() +#else + #define WC_SHA512_ARM_SVR_BEGIN(fail) WC_DO_NOTHING + #define WC_SHA512_ARM_SVR_END() WC_DO_NOTHING +#endif static WC_INLINE int Transform_Sha512(wc_Sha512 *sha512, const byte* data) { + WC_SHA512_ARM_SVR_BEGIN(return _svr_ret;); (*Transform_Sha512_p)(sha512, data); + WC_SHA512_ARM_SVR_END(); return 0; } static WC_INLINE int Transform_Sha512_Len(wc_Sha512 *sha512, const byte* data, word32 len) { + WC_SHA512_ARM_SVR_BEGIN(return _svr_ret;); (*Transform_Sha512_Len_p)(sha512, data, len); + WC_SHA512_ARM_SVR_END(); return 0; } diff --git a/wolfcrypt/src/wc_lms.c b/wolfcrypt/src/wc_lms.c index f2b62ea5901..d223632ec19 100644 --- a/wolfcrypt/src/wc_lms.c +++ b/wolfcrypt/src/wc_lms.c @@ -26,6 +26,14 @@ #if FIPS_VERSION3_GE(2,0,0) /* set NO_WRAPPERS before headers, use direct internal f()s not wrappers */ #define FIPS_NO_WRAPPERS + + /* Windows orders the FIPS in-core integrity boundary by named linker + * sections. Keep LMS (SP 800-208) code/const inside the boundary, + * sorted between sha3 (.fipsA$n) and fips.c (.fipsA$o). */ + #ifdef USE_WINDOWS_API + #pragma code_seg(".fipsA$nc") + #pragma const_seg(".fipsB$nc") + #endif #endif #include #include diff --git a/wolfcrypt/src/wc_lms_impl.c b/wolfcrypt/src/wc_lms_impl.c index 25e89901a9b..ba449062ca8 100644 --- a/wolfcrypt/src/wc_lms_impl.c +++ b/wolfcrypt/src/wc_lms_impl.c @@ -39,6 +39,15 @@ #include +#if FIPS_VERSION3_GE(2,0,0) + /* Keep this LMS (SP 800-208) implementation's code/const inside the FIPS + * in-core integrity boundary (Windows orders it by named sections). */ + #ifdef USE_WINDOWS_API + #pragma code_seg(".fipsA$ne") + #pragma const_seg(".fipsB$ne") + #endif +#endif + #include #ifdef NO_INLINE @@ -2317,7 +2326,10 @@ static int wc_lms_treehash_update(LmsState* state, LmsPrivState* privState, byte* left = dp + LMS_D_LEN; byte* temp = left + params->hash_len; WC_DECLARE_VAR(stack, byte, (LMS_MAX_HEIGHT + 1) * LMS_MAX_NODE_LEN, 0); - byte* sp; + /* Init to NULL: sp is set and used only on the ret==0 path, but 32-bit ARM + * gcc cannot correlate the two separate `if (ret == 0)` guards and reports a + * false-positive -Wmaybe-uninitialized (x86_64/aarch64 gcc do not). */ + byte* sp = NULL; word32 max_cb = (word32)1 << params->cacheBits; word32 i; diff --git a/wolfcrypt/src/wc_mldsa.c b/wolfcrypt/src/wc_mldsa.c index 45e247e889b..1c028be496b 100644 --- a/wolfcrypt/src/wc_mldsa.c +++ b/wolfcrypt/src/wc_mldsa.c @@ -140,6 +140,14 @@ #if FIPS_VERSION3_GE(2,0,0) /* set NO_WRAPPERS before headers, use direct internal f()s not wrappers */ #define FIPS_NO_WRAPPERS + + /* Windows orders the FIPS in-core integrity boundary by named linker + * sections. Keep ML-DSA (FIPS 204) code/const inside the boundary, + * sorted between sha3 (.fipsA$n) and fips.c (.fipsA$o). */ + #ifdef USE_WINDOWS_API + #pragma code_seg(".fipsA$nc") + #pragma const_seg(".fipsB$nc") + #endif #endif #ifndef WOLFSSL_MLDSA_NO_ASN1 @@ -760,8 +768,103 @@ static int mldsa_hash256_ctx_msg(wc_Shake* shake256, const byte* tr, * @return 0 on success. * @return BAD_FUNC_ARG if hash algorithm not known. */ -static int mldsa_get_hash_oid(int hash, byte* oidBuffer, word32* oidLen) +/* HashML-DSA PH-vs-paramSet enforcement. + * + * FIPS 204 sec. 5.4 (Table 4) restricts the pre-hash PH for HashML-DSA to + * algorithms whose collision-resistance strength meets or exceeds the + * parameter set's claimed security level. Any other PH must be rejected + * for BOTH sigGen and sigVer: + * ML-DSA-44 (128-bit): SHA2-256, SHA2-384, SHA2-512, SHA2-512/256, + * SHA3-256, SHA3-384, SHA3-512, + * SHAKE-128, SHAKE-256 + * ML-DSA-65 (192-bit): SHA2-384, SHA2-512, SHA3-384, SHA3-512, SHAKE-256 + * ML-DSA-87 (256-bit): SHA2-512, SHA3-512, SHAKE-256 + * + * Returns 0 if (hashAlg, level) is an approved combination. Returns + * BAD_FUNC_ARG otherwise -- including for any hash not on the approved + * list (SHA-224, SHA-512/224, SHA3-224, etc.). + */ +static int mldsa_check_hash_for_level(int hashAlg, byte level) { + int strengthBits; /* collision-resistance strength of the chosen hash */ + int requiredBits; /* security level required by the paramSet */ + + switch (hashAlg) { + #ifndef NO_SHA256 + case WC_HASH_TYPE_SHA256: + strengthBits = 128; + break; + #endif + #ifdef WOLFSSL_SHA384 + case WC_HASH_TYPE_SHA384: + strengthBits = 192; + break; + #endif + #ifdef WOLFSSL_SHA512 + case WC_HASH_TYPE_SHA512: + strengthBits = 256; + break; + #ifndef WOLFSSL_NOSHA512_256 + case WC_HASH_TYPE_SHA512_256: + /* SHA-512/256 has 128-bit collision resistance (truncated). */ + strengthBits = 128; + break; + #endif + #endif + #ifdef WOLFSSL_SHA3 + #ifndef WOLFSSL_NOSHA3_256 + case WC_HASH_TYPE_SHA3_256: + strengthBits = 128; + break; + #endif + #ifndef WOLFSSL_NOSHA3_384 + case WC_HASH_TYPE_SHA3_384: + strengthBits = 192; + break; + #endif + #ifndef WOLFSSL_NOSHA3_512 + case WC_HASH_TYPE_SHA3_512: + strengthBits = 256; + break; + #endif + #endif + #ifdef WOLFSSL_SHAKE128 + case WC_HASH_TYPE_SHAKE128: + strengthBits = 128; + break; + #endif + #ifdef WOLFSSL_SHAKE256 + case WC_HASH_TYPE_SHAKE256: + strengthBits = 256; + break; + #endif + default: + /* Hash not on the FIPS 204 Table 4 approved list (e.g. SHA-224, + * SHA-512/224, SHA3-224, MD5). Reject regardless of level. */ + return BAD_FUNC_ARG; + } + + switch (level) { + case WC_ML_DSA_44: + requiredBits = 128; + break; + case WC_ML_DSA_65: + requiredBits = 192; + break; + case WC_ML_DSA_87: + requiredBits = 256; + break; + default: + return BAD_FUNC_ARG; + } + + if (strengthBits < requiredBits) { + return BAD_FUNC_ARG; + } + return 0; +} + +static int mldsa_get_hash_oid(int hash, byte* oidBuffer, word32* oidLen){ int ret = 0; const byte* oid; @@ -9453,11 +9556,17 @@ static int mldsa_sign_ctx_hash_with_seed(wc_MlDsaKey* key, byte oidMsgHash[MLDSA_HASH_OID_LEN + WC_MAX_DIGEST_SIZE]; word32 oidMsgHashLen = 0; - /* Check that the input hash length is valid. */ + /* Check that the input hash length is valid (guards against caller-side + * buffer overruns before we touch hash). */ if ((int)hashLen != wc_HashGetDigestSize((enum wc_HashType)hashAlg)) { ret = BAD_LENGTH_E; } + /* FIPS 204 sec. 5.4 Table 4: enforce hash <-> paramSet matching. */ + if (ret == 0) { + ret = mldsa_check_hash_for_level(hashAlg, key->level); + } + if (ret == 0) { XMEMCPY(seedMu, seed, MLDSA_RND_SZ); @@ -10126,12 +10235,17 @@ static int mldsa_verify_ctx_hash(wc_MlDsaKey* key, const byte* ctx, if ((key == NULL) || (key->params == NULL)) { ret = BAD_FUNC_ARG; } - /* Check that the input hash length is valid. */ + /* Check that the input hash length is valid (guards against caller-side + * buffer overruns before we touch hash). */ if ((ret == 0) && ((int)hashLen != wc_HashGetDigestSize((enum wc_HashType)hashAlg))) { ret = BAD_LENGTH_E; } + /* FIPS 204 sec. 5.4 Table 4: enforce hash <-> paramSet matching. */ + if (ret == 0) { + ret = mldsa_check_hash_for_level(hashAlg, key->level); + } if (ret == 0) { /* Step 6: Hash public key. */ diff --git a/wolfcrypt/src/wc_mlkem.c b/wolfcrypt/src/wc_mlkem.c index d03539f7053..dac31afa6e4 100644 --- a/wolfcrypt/src/wc_mlkem.c +++ b/wolfcrypt/src/wc_mlkem.c @@ -81,6 +81,14 @@ #if FIPS_VERSION3_GE(2,0,0) /* set NO_WRAPPERS before headers, use direct internal f()s not wrappers */ #define FIPS_NO_WRAPPERS + + /* Windows orders the FIPS in-core integrity boundary by named linker + * sections. Keep ML-KEM (FIPS 203) code/const inside the boundary, + * sorted between sha3 (.fipsA$n) and fips.c (.fipsA$o). */ + #ifdef USE_WINDOWS_API + #pragma code_seg(".fipsA$na") + #pragma const_seg(".fipsB$na") + #endif #endif #include @@ -694,49 +702,12 @@ int wc_MlKemKey_MakeKey(MlKemKey* key, WC_RNG* rng) ret = wc_MlKemKey_MakeKeyWithRandom(key, rand, sizeof(rand)); } -#ifdef HAVE_FIPS - /* Pairwise Consistency Test (PCT) per FIPS 140-3 / ISO 19790:2012 - * Section 7.10.3.3: encapsulate with ek, decapsulate with dk, - * verify shared secrets match. */ - if (ret == 0) { - WC_DECLARE_VAR(pct_ct, byte, WC_ML_KEM_MAX_CIPHER_TEXT_SIZE, - key->heap); - byte pct_ss1[WC_ML_KEM_SS_SZ]; - byte pct_ss2[WC_ML_KEM_SS_SZ]; - word32 ctSz = 0; - - WC_ALLOC_VAR_EX(pct_ct, byte, WC_ML_KEM_MAX_CIPHER_TEXT_SIZE, - key->heap, DYNAMIC_TYPE_TMP_BUFFER, ret = MEMORY_E); - - if (ret == 0) - ret = wc_MlKemKey_CipherTextSize(key, &ctSz); - - if (ret == 0) - ret = wc_MlKemKey_Encapsulate(key, pct_ct, pct_ss1, rng); - - if (ret == 0) - ret = wc_MlKemKey_Decapsulate(key, pct_ss2, pct_ct, ctSz); - - if (ret == 0) { - if (XMEMCMP(pct_ss1, pct_ss2, WC_ML_KEM_SS_SZ) != 0) - ret = ML_KEM_PCT_E; - } - - ForceZero(pct_ss1, sizeof(pct_ss1)); - ForceZero(pct_ss2, sizeof(pct_ss2)); - if (WC_VAR_OK(pct_ct)) - ForceZero(pct_ct, WC_ML_KEM_MAX_CIPHER_TEXT_SIZE); - - WC_FREE_VAR_EX(pct_ct, key->heap, DYNAMIC_TYPE_TMP_BUFFER); - - /* FIPS 140-3 IG 10.3.A (TE10.35.02): a key pair that fails the PCT - * must be rendered unusable. Zeroize the generated key material so - * a caller that ignores the return value cannot use it. */ - if (ret != 0) { - wc_MlKemKey_Free(key); - } - } -#endif /* HAVE_FIPS */ + /* PCT now lives in wc_MlKemKey_MakeKeyWithRandom() (called above) so + * that BOTH the random-seeded path (this function) and the + * caller-supplied-seed path (direct invocation of MakeKeyWithRandom) + * exercise the FIPS 140-3 IG 10.3.A 1.B Pairwise Consistency Test. + * Audit A16-1: previously the PCT lived only here, leaving the + * deterministic-seed entry uncovered. */ /* Ensure seeds are zeroized. */ ForceZero((void*)rand, (word32)sizeof(rand)); @@ -985,8 +956,70 @@ int wc_MlKemKey_MakeKeyWithRandom(MlKemKey* key, const unsigned char* rand, ForceZero(e, (size_t)(k * MLKEM_N) * sizeof(sword16)); #endif - /* Note: PCT is performed in wc_MlKemKey_MakeKey() which calls this - * function and has the RNG parameter needed for encapsulation. */ +#ifdef HAVE_FIPS + /* Pairwise Consistency Test (PCT) per FIPS 140-3 IG 10.3.A 1.B and + * ISO/IEC 19790:2012 Section 7.10.3.3: encapsulate with the freshly + * generated encapsulation key (ek), decapsulate the ciphertext with + * the matching decapsulation key (dk), and verify the recovered + * shared secret matches. This entry point (MakeKeyWithRandom) is + * a deterministic key-gen path with no caller-supplied RNG; the PCT + * uses wc_MlKemKey_EncapsulateWithRandom() with a fixed 32-byte test + * value for `m` (FIPS 203 Algorithm 17 input). The encapsulation + * `m` does not need to be unpredictable for the PCT - it only needs + * the encap/decap roundtrip to recover the same shared secret. + * + * Audit A16-1: previously the PCT lived only in wc_MlKemKey_MakeKey + * which generates `rand` from the DRBG, leaving callers of this + * deterministic-seed entry without PCT coverage. */ + if (ret == 0) { + WC_DECLARE_VAR(pct_ct, byte, WC_ML_KEM_MAX_CIPHER_TEXT_SIZE, + key->heap); + byte pct_ss1[WC_ML_KEM_SS_SZ]; + byte pct_ss2[WC_ML_KEM_SS_SZ]; + word32 pct_ctSz = 0; + /* Fixed 32-byte test pattern for FIPS 203 Alg 17 `m` parameter. + * Value is arbitrary - PCT only requires encap/decap roundtrip, + * not encap unpredictability. */ + static const byte pct_m[WC_ML_KEM_ENC_RAND_SZ] = { + 0xAB, 0xAB, 0xAB, 0xAB, 0xAB, 0xAB, 0xAB, 0xAB, + 0xAB, 0xAB, 0xAB, 0xAB, 0xAB, 0xAB, 0xAB, 0xAB, + 0xAB, 0xAB, 0xAB, 0xAB, 0xAB, 0xAB, 0xAB, 0xAB, + 0xAB, 0xAB, 0xAB, 0xAB, 0xAB, 0xAB, 0xAB, 0xAB + }; + + WC_ALLOC_VAR_EX(pct_ct, byte, WC_ML_KEM_MAX_CIPHER_TEXT_SIZE, + key->heap, DYNAMIC_TYPE_TMP_BUFFER, ret = MEMORY_E); + + if (ret == 0) + ret = wc_MlKemKey_CipherTextSize(key, &pct_ctSz); + + if (ret == 0) + ret = wc_MlKemKey_EncapsulateWithRandom(key, pct_ct, pct_ss1, + pct_m, (int)sizeof(pct_m)); + + if (ret == 0) + ret = wc_MlKemKey_Decapsulate(key, pct_ss2, pct_ct, pct_ctSz); + + if (ret == 0) { + if (XMEMCMP(pct_ss1, pct_ss2, WC_ML_KEM_SS_SZ) != 0) + ret = ML_KEM_PCT_E; + } + + ForceZero(pct_ss1, sizeof(pct_ss1)); + ForceZero(pct_ss2, sizeof(pct_ss2)); + if (WC_VAR_OK(pct_ct)) + ForceZero(pct_ct, WC_ML_KEM_MAX_CIPHER_TEXT_SIZE); + + WC_FREE_VAR_EX(pct_ct, key->heap, DYNAMIC_TYPE_TMP_BUFFER); + + /* FIPS 140-3 IG 10.3.A (TE10.35.02): a key pair that fails the PCT + * must be rendered unusable. Zeroize the generated key material so + * a caller that ignores the return value cannot use it. */ + if (ret != 0) { + wc_MlKemKey_Free(key); + } + } +#endif /* HAVE_FIPS */ return ret; } diff --git a/wolfcrypt/src/wc_mlkem_poly.c b/wolfcrypt/src/wc_mlkem_poly.c index aa3d7835d5d..533a3198e02 100644 --- a/wolfcrypt/src/wc_mlkem_poly.c +++ b/wolfcrypt/src/wc_mlkem_poly.c @@ -69,6 +69,15 @@ #include +#if FIPS_VERSION3_GE(2,0,0) + /* Keep this ML-KEM (FIPS 203) implementation's code/const inside the FIPS + * in-core integrity boundary (Windows orders it by named sections). */ + #ifdef USE_WINDOWS_API + #pragma code_seg(".fipsA$nb") + #pragma const_seg(".fipsB$nb") + #endif +#endif + #ifdef WC_MLKEM_NO_ASM #undef USE_INTEL_SPEEDUP #undef WOLFSSL_ARMASM diff --git a/wolfcrypt/src/wc_slhdsa.c b/wolfcrypt/src/wc_slhdsa.c index b3cfb56349b..0b776553610 100644 --- a/wolfcrypt/src/wc_slhdsa.c +++ b/wolfcrypt/src/wc_slhdsa.c @@ -24,6 +24,14 @@ #if FIPS_VERSION3_GE(2,0,0) /* set NO_WRAPPERS before headers, use direct internal f()s not wrappers */ #define FIPS_NO_WRAPPERS + + /* Windows orders the FIPS in-core integrity boundary by named linker + * sections. Keep SLH-DSA (FIPS 205) code/const inside the boundary, + * sorted between sha3 (.fipsA$n) and fips.c (.fipsA$o). */ + #ifdef USE_WINDOWS_API + #pragma code_seg(".fipsA$nh") + #pragma const_seg(".fipsB$nh") + #endif #endif #include @@ -6999,6 +7007,49 @@ int wc_SlhDsaKey_MakeKey(SlhDsaKey* key, WC_RNG* rng) key->sk + 2 * n, n); } +#ifdef HAVE_FIPS + /* Pairwise Consistency Test (PCT) per FIPS 140-3 IG 10.3.A (TE10.35.02): + * sign with the new sk, verify with the matching pk. SLH-DSA is a + * stateless hash-based signature scheme (FIPS 205), so the relaxed PCT + * rule for stateful HBS (LMS/XMSS) does not apply -- PCT runs on every + * KeyGen. SignDeterministic avoids consuming RNG state; heap allocation + * is used because SLH-DSA signatures can reach ~50 KB. The paramSet is + * known by this point, so allocate the exact signature length for this + * variant rather than the family-wide worst case -- the difference + * across SLH-DSA variants is roughly 8 KB to 50 KB. */ + if (ret == 0) { + static const byte pct_msg[] = "wolfSSL SLH-DSA PCT"; + word32 pct_sigLen = key->params->sigLen; + byte* pct_sig = (byte*)XMALLOC(pct_sigLen, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + word32 pct_sigSz = pct_sigLen; + + if (pct_sig == NULL) { + ret = MEMORY_E; + } + if (ret == 0) { + ret = wc_SlhDsaKey_SignDeterministic(key, NULL, 0, + pct_msg, sizeof(pct_msg), pct_sig, &pct_sigSz); + } + if (ret == 0) { + ret = wc_SlhDsaKey_Verify(key, NULL, 0, + pct_msg, sizeof(pct_msg), pct_sig, pct_sigSz); + if (ret != 0) { + ret = SLH_DSA_PCT_E; + } + } + if (pct_sig != NULL) { + ForceZero(pct_sig, pct_sigLen); + XFREE(pct_sig, NULL, DYNAMIC_TYPE_TMP_BUFFER); + } + /* IG 10.3.A (TE10.35.02): a key pair that fails the PCT must be + * rendered unusable. */ + if (ret != 0) { + wc_SlhDsaKey_Free(key); + } + } +#endif /* HAVE_FIPS */ + return ret; } @@ -7945,6 +7996,99 @@ static const byte slhdsakey_oid_sha3_512[] = { #endif #endif +/* HashSLH-DSA PH-vs-paramSet enforcement. + * + * FIPS 205 sec. 10.2.2 (Table 9) restricts the pre-hash PH for HashSLH-DSA + * to algorithms whose collision-resistance strength meets or exceeds the + * parameter set's security level (encoded as key->params->n in bytes): + * n = 16 (128-bit): SHA2-256, SHA2-384, SHA2-512, SHA2-512/256, + * SHA3-256, SHA3-384, SHA3-512, + * SHAKE-128, SHAKE-256 + * n = 24 (192-bit): SHA2-384, SHA2-512, SHA3-384, SHA3-512, SHAKE-256 + * n = 32 (256-bit): SHA2-512, SHA3-512, SHAKE-256 + * + * Returns 0 if (hashType, n) is an approved combination. Returns + * BAD_FUNC_ARG otherwise -- including for any hash not on the approved + * list (SHA-224, SHA-512/224, SHA3-224, etc.). + */ +static int slhdsa_check_hash_for_n(enum wc_HashType hashType, byte n) +{ + int strengthBits; + int requiredBits; + + switch ((int)hashType) { + #ifndef NO_SHA256 + case WC_HASH_TYPE_SHA256: + strengthBits = 128; + break; + #endif + #ifdef WOLFSSL_SHA384 + case WC_HASH_TYPE_SHA384: + strengthBits = 192; + break; + #endif + #ifdef WOLFSSL_SHA512 + case WC_HASH_TYPE_SHA512: + strengthBits = 256; + break; + #ifndef WOLFSSL_NOSHA512_256 + case WC_HASH_TYPE_SHA512_256: + /* SHA-512/256 has 128-bit collision resistance (truncated). */ + strengthBits = 128; + break; + #endif + #endif + #ifdef WOLFSSL_SHA3 + #ifndef WOLFSSL_NOSHA3_256 + case WC_HASH_TYPE_SHA3_256: + strengthBits = 128; + break; + #endif + #ifndef WOLFSSL_NOSHA3_384 + case WC_HASH_TYPE_SHA3_384: + strengthBits = 192; + break; + #endif + #ifndef WOLFSSL_NOSHA3_512 + case WC_HASH_TYPE_SHA3_512: + strengthBits = 256; + break; + #endif + #endif + #ifdef WOLFSSL_SHAKE128 + case WC_HASH_TYPE_SHAKE128: + strengthBits = 128; + break; + #endif + #ifdef WOLFSSL_SHAKE256 + case WC_HASH_TYPE_SHAKE256: + strengthBits = 256; + break; + #endif + default: + /* Hash not on the FIPS 205 Table 9 approved list. */ + return BAD_FUNC_ARG; + } + + if (n == WC_SLHDSA_N_128) { + requiredBits = 128; + } + else if (n == WC_SLHDSA_N_192) { + requiredBits = 192; + } + else if (n == WC_SLHDSA_N_256) { + requiredBits = 256; + } + else { + return BAD_FUNC_ARG; + } + + if (strengthBits < requiredBits) { + return BAD_FUNC_ARG; + } + return 0; +} + /* Validate the caller-supplied pre-hashed digest length and look up the * corresponding OID for the chosen hash algorithm. * @@ -8162,6 +8306,16 @@ static int slhdsakey_signhash_external(SlhDsaKey* key, const byte* ctx, (sigSz == NULL)) { ret = BAD_FUNC_ARG; } + /* HashSLH-DSA requires an explicit, approved pre-hash algorithm. + * WC_HASH_TYPE_NONE is the "pure SLH-DSA" sentinel used by the non + * pre-hash Sign/Verify paths and is never valid here. Reject it + * explicitly (FIPS 205 Section 10.2.2 / Table 9) rather than relying on + * the slhdsa_check_hash_for_n() switch default below, so the rejection + * survives any future reordering of the validators or the addition of a + * WC_HASH_TYPE_NONE case to that switch. */ + else if (hashType == WC_HASH_TYPE_NONE) { + ret = BAD_FUNC_ARG; + } /* Check sig buffer is large enough to hold generated signature. */ else if (*sigSz < key->params->sigLen) { ret = BAD_LENGTH_E; @@ -8171,6 +8325,12 @@ static int slhdsakey_signhash_external(SlhDsaKey* key, const byte* ctx, /* Alg 23, Step 6: Return error. */ ret = BAD_FUNC_ARG; } + /* FIPS 205 sec. 10.2.2 Table 9: enforce PH <-> paramSet matching before + * pre-hashing the message. Rejects PHs whose collision-resistance + * strength is below the paramSet's security level (n). */ + if (ret == 0) { + ret = slhdsa_check_hash_for_n(hashType, key->params->n); + } if (ret == 0) { /* Alg 23, Steps 8-23: Validate caller-supplied pre-hashed digest length * and select OID for the chosen hash algorithm. */ @@ -8405,8 +8565,11 @@ int wc_SlhDsaKey_SignHash(SlhDsaKey* key, const byte* ctx, byte ctxSz, ret = MISSING_KEY; } /* First sanity check on hashType; the downstream prehash validator does - * the detailed check for the actual type. */ - else if ((word32)hashType > (word32)WC_HASH_TYPE_MAX) { + * the detailed check for the actual type. Reject WC_HASH_TYPE_NONE here + * too -- it is never a valid pre-hash (FIPS 205 Section 10.2.2 / Table 9), + * so a known-invalid call fails before consuming DRBG output below. */ + else if ((hashType == WC_HASH_TYPE_NONE) || + ((word32)hashType > (word32)WC_HASH_TYPE_MAX)) { ret = BAD_FUNC_ARG; } @@ -8535,6 +8698,12 @@ int wc_SlhDsaKey_VerifyHash(SlhDsaKey* key, const byte* ctx, byte ctxSz, } #ifdef WOLF_CRYPTO_CB + /* FIPS 205 sec. 10.2.2 Table 9: enforce PH <-> paramSet matching. + * Rejects PHs whose collision-resistance strength is below the + * paramSet's security level (n). */ + if (ret == 0) { + ret = slhdsa_check_hash_for_n(hashType, key->params->n); + } if (ret == 0) { #ifndef WOLF_CRYPTO_CB_FIND if (key->devId != INVALID_DEVID) diff --git a/wolfcrypt/src/wc_xmss.c b/wolfcrypt/src/wc_xmss.c index 9ad311d0bab..1e723333f2f 100644 --- a/wolfcrypt/src/wc_xmss.c +++ b/wolfcrypt/src/wc_xmss.c @@ -26,6 +26,14 @@ #if FIPS_VERSION3_GE(2,0,0) /* set NO_WRAPPERS before headers, use direct internal f()s not wrappers */ #define FIPS_NO_WRAPPERS + + /* Windows orders the FIPS in-core integrity boundary by named linker + * sections. Keep XMSS (SP 800-208) code/const inside the boundary, + * sorted between sha3 (.fipsA$n) and fips.c (.fipsA$o). */ + #ifdef USE_WINDOWS_API + #pragma code_seg(".fipsA$nf") + #pragma const_seg(".fipsB$nf") + #endif #endif #include #include diff --git a/wolfcrypt/src/wc_xmss_impl.c b/wolfcrypt/src/wc_xmss_impl.c index d1598c4d372..814495130fe 100644 --- a/wolfcrypt/src/wc_xmss_impl.c +++ b/wolfcrypt/src/wc_xmss_impl.c @@ -31,6 +31,15 @@ #include +#if FIPS_VERSION3_GE(2,0,0) + /* Keep this XMSS (SP 800-208) implementation's code/const inside the FIPS + * in-core integrity boundary (Windows orders it by named sections). */ + #ifdef USE_WINDOWS_API + #pragma code_seg(".fipsA$ng") + #pragma const_seg(".fipsB$ng") + #endif +#endif + #include #include diff --git a/wolfcrypt/test/test.c b/wolfcrypt/test/test.c index e95bba5ae5e..75d42847e4c 100644 --- a/wolfcrypt/test/test.c +++ b/wolfcrypt/test/test.c @@ -55884,6 +55884,142 @@ static wc_test_ret_t mldsa_param_test(int param, WC_RNG* rng) #endif return ret; } + +#if !defined(WOLFSSL_DILITHIUM_NO_SIGN) && \ + !defined(WOLFSSL_DILITHIUM_NO_VERIFY) +/* Negative test: HashML-DSA must reject pre-hash algorithms whose collision + * resistance is below the parameter set's claimed security strength. + * + * Per FIPS 204 sec. 5.4, Table 4 (Approved PH algorithms for HashML-DSA): + * ML-DSA-44 (128-bit security): SHA2-256, SHA2-384, SHA2-512, SHA2-512/256, + * SHA3-256, SHA3-384, SHA3-512, + * SHAKE-128, SHAKE-256 + * ML-DSA-65 (192-bit security): SHA2-384, SHA2-512, SHA3-384, SHA3-512, + * SHAKE-256 + * ML-DSA-87 (256-bit security): SHA2-512, SHA3-512, SHAKE-256 + * + * This test attempts sigGen and sigVer with disallowed (paramSet, hash) pairs + * and asserts both reject the call (non-zero return). Before the in-module + * hash-vs-paramSet check exists, wc_dilithium_sign_ctx_hash and + * wc_dilithium_verify_ctx_hash happily proceed with any compiled-in hash, + * so this test is expected to FAIL until the check is added. */ +static wc_test_ret_t mldsa_hash_paramset_rejection_test(WC_RNG* rng) +{ + wc_test_ret_t ret = 0; + int i; +#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_NO_MALLOC) + dilithium_key* key = NULL; + byte* sig = NULL; +#else + dilithium_key key[1]; + byte sig[DILITHIUM_MAX_SIG_SIZE]; +#endif + word32 sigLen; + int verified; + + /* Fixed-content digests; for a rejection test the bytes don't matter, + * only the (paramSet, hashAlg, hashLen) tuple. Sizes match each hash's + * digest length so the existing length sanity check inside + * wc_dilithium_*_ctx_hash() does NOT short-circuit before reaching the + * hash-vs-paramSet gate we are validating here. */ + static const byte hash32[32] = { /* SHA-256 digest size */ + 0xBA,0x78,0x16,0xBF,0x8F,0x01,0xCF,0xEA, + 0x41,0x41,0x40,0xDE,0x5D,0xAE,0x22,0x23, + 0xB0,0x03,0x61,0xA3,0x96,0x17,0x7A,0x9C, + 0xB4,0x10,0xFF,0x61,0xF2,0x00,0x15,0xAD + }; + static const byte hash48[48] = { /* SHA-384 digest size */ + 0xCB,0x00,0x75,0x3F,0x45,0xA3,0x5E,0x8B, + 0xB5,0xA0,0x3D,0x69,0x9A,0xC6,0x50,0x07, + 0x27,0x2C,0x32,0xAB,0x0E,0xDE,0xD1,0x63, + 0x1A,0x8B,0x60,0x5A,0x43,0xFF,0x5B,0xED, + 0x80,0x86,0x07,0x2B,0xA1,0xE7,0xCC,0x23, + 0x58,0xBA,0xEC,0xA1,0x34,0xC8,0x25,0xA7 + }; + + struct { + int level; + int hashAlg; + const byte* hash; + word32 hashLen; + } forbidden[] = { + /* ML-DSA-65 needs >=192-bit collision strength; SHA-256 = 128-bit. */ + { WC_ML_DSA_65, WC_HASH_TYPE_SHA256, hash32, 32 }, + /* ML-DSA-87 needs >=256-bit collision strength; SHA-384 = 192-bit. */ + { WC_ML_DSA_87, WC_HASH_TYPE_SHA384, hash48, 48 } + }; + +#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_NO_MALLOC) + key = (dilithium_key*)XMALLOC(sizeof(*key), HEAP_HINT, + DYNAMIC_TYPE_TMP_BUFFER); + sig = (byte*)XMALLOC(DILITHIUM_MAX_SIG_SIZE, HEAP_HINT, + DYNAMIC_TYPE_TMP_BUFFER); + if ((key == NULL) || (sig == NULL)) { + ERROR_OUT(WC_TEST_RET_ENC_ERRNO, neg_out); + } +#endif + XMEMSET(sig, 0, DILITHIUM_MAX_SIG_SIZE); + + for (i = 0; i < (int)(sizeof(forbidden) / sizeof(forbidden[0])); i++) { + #ifdef WOLFSSL_NO_ML_DSA_65 + if (forbidden[i].level == WC_ML_DSA_65) continue; + #endif + #ifdef WOLFSSL_NO_ML_DSA_87 + if (forbidden[i].level == WC_ML_DSA_87) continue; + #endif + + ret = wc_dilithium_init_ex(key, NULL, devId); + if (ret != 0) { + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), neg_out); + } + ret = wc_dilithium_set_level(key, (byte)forbidden[i].level); + if (ret != 0) { + wc_dilithium_free(key); + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), neg_out); + } + ret = wc_dilithium_make_key(key, rng); + if (ret != 0) { + wc_dilithium_free(key); + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), neg_out); + } + + sigLen = (word32)wc_dilithium_sig_size(key); + + /* sigGen with disallowed PH must be REJECTED. */ + PRIVATE_KEY_UNLOCK(); + ret = wc_dilithium_sign_ctx_hash(NULL, 0, forbidden[i].hashAlg, + forbidden[i].hash, forbidden[i].hashLen, sig, &sigLen, key, rng); + PRIVATE_KEY_LOCK(); + if (ret == 0) { + /* Module did NOT reject -- this is the missing-enforcement bug. */ + wc_dilithium_free(key); + ERROR_OUT(WC_TEST_RET_ENC_NC, neg_out); + } + + /* sigVer with disallowed PH must ALSO be REJECTED. */ + verified = -1; + sigLen = (word32)wc_dilithium_sig_size(key); + ret = wc_dilithium_verify_ctx_hash(sig, sigLen, NULL, 0, + forbidden[i].hashAlg, forbidden[i].hash, forbidden[i].hashLen, + &verified, key); + if (ret == 0) { + wc_dilithium_free(key); + ERROR_OUT(WC_TEST_RET_ENC_NC, neg_out); + } + + wc_dilithium_free(key); + ret = 0; + } + +neg_out: +#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_NO_MALLOC) + if (sig != NULL) XFREE(sig, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER); + if (key != NULL) XFREE(key, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER); +#endif + return ret; +} +#endif /* !WOLFSSL_DILITHIUM_NO_SIGN && !WOLFSSL_DILITHIUM_NO_VERIFY */ + #endif #if defined(WC_MLDSA_CACHE_MATRIX_A) && \ @@ -56327,6 +56463,18 @@ WOLFSSL_TEST_SUBROUTINE wc_test_ret_t mldsa_test(void) #endif /* (WOLFSSL_MLDSA_PUBLIC_KEY && !WOLFSSL_MLDSA_NO_VERIFY) || * (WOLFSSL_MLDSA_PRIVATE_KEY && !WOLFSSL_MLDSA_NO_SIGN) */ +#if !defined(WOLFSSL_MLDSA_NO_MAKE_KEY) && \ + !defined(WOLFSSL_MLDSA_NO_SIGN) && \ + !defined(WOLFSSL_MLDSA_NO_VERIFY) && \ + (!defined(WOLFSSL_NO_ML_DSA_65) || !defined(WOLFSSL_NO_ML_DSA_87)) + /* FIPS 204 sec. 5.4 -- HashML-DSA must reject pre-hashes weaker than + * the parameter set's security level. */ + ret = mldsa_hash_paramset_rejection_test(&rng); + if (ret != 0) { + ERROR_OUT(ret, out); + } +#endif + #if !defined(WOLFSSL_MLDSA_NO_MAKE_KEY) || \ !defined(WOLFSSL_MLDSA_NO_VERIFY) || \ defined(WOLFSSL_MLDSA_PRIVATE_KEY) || \ @@ -57772,29 +57920,18 @@ static wc_test_ret_t slhdsa_test_param(enum SlhDsaParam param) ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); } - /* HashSLH-DSA takes the caller's pre-hashed digest as input. */ + /* HashSLH-DSA takes the caller's pre-hashed digest as input. SHAKE-256 + * is universally approved by FIPS 205 sec. 10.2.2 Table 9 across all + * SLH-DSA-{128,192,256} variants, so use it unconditionally for the + * positive round-trip path -- avoids tripping the in-module + * hash-vs-paramSet validation gate for higher-security paramSets. */ { -#ifdef WOLFSSL_SLHDSA_SHA2 - enum wc_HashType phType = SLHDSA_IS_SHA2(param) ? - WC_HASH_TYPE_SHA256 : WC_HASH_TYPE_SHAKE256; -#else enum wc_HashType phType = WC_HASH_TYPE_SHAKE256; -#endif byte digest[WC_SHA3_512_DIGEST_SIZE]; - word32 digestLen; + word32 digestLen = WC_SHA3_512_DIGEST_SIZE; -#ifdef WOLFSSL_SLHDSA_SHA2 - if (phType == WC_HASH_TYPE_SHA256) { - ret = wc_Sha256Hash(msg, (word32)sizeof(msg), digest); - digestLen = WC_SHA256_DIGEST_SIZE; - } - else -#endif - { - ret = wc_Shake256Hash(msg, (word32)sizeof(msg), digest, - WC_SHA3_512_DIGEST_SIZE); - digestLen = WC_SHA3_512_DIGEST_SIZE; - } + ret = wc_Shake256Hash(msg, (word32)sizeof(msg), digest, + WC_SHA3_512_DIGEST_SIZE); if (ret != 0) { ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); } @@ -57813,9 +57950,13 @@ static wc_test_ret_t slhdsa_test_param(enum SlhDsaParam param) ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); } - /* Additional pre-hash test: SHA-384 exercises a different OID path */ + /* Additional pre-hash test: SHA-384 exercises a different OID path. + * Skip for SLH-DSA-256 because SHA-384 (192-bit collision) is below the + * 256-bit security level required by FIPS 205 sec. 10.2.2 Table 9. */ #ifdef WOLFSSL_SHA384 - { + /* Skip SHA-384 for SLH-DSA-256: 192-bit collision strength below the + * 256-bit security level (FIPS 205 sec. 10.2.2 Table 9). */ + if (key->params->n != WC_SLHDSA_N_256) { byte digest384[WC_SHA384_DIGEST_SIZE]; ret = wc_Sha384Hash(msg, (word32)sizeof(msg), digest384); @@ -57875,6 +58016,98 @@ static wc_test_ret_t slhdsa_test_param(enum SlhDsaParam param) return ret; } + +/* Negative test: HashSLH-DSA must reject pre-hash algorithms whose collision + * resistance is below the parameter set's claimed security strength. + * + * Per FIPS 205 sec. 10.2.2, Table 9 (Approved PH for HashSLH-DSA): + * SLH-DSA-*-128* (128-bit): SHA2-256, SHA2-384, SHA2-512, SHA2-512/256, + * SHA3-256, SHA3-384, SHA3-512, + * SHAKE-128, SHAKE-256 + * SLH-DSA-*-192* (192-bit): SHA2-384, SHA2-512, SHA3-384, SHA3-512, + * SHAKE-256 + * SLH-DSA-*-256* (256-bit): SHA2-512, SHA3-512, SHAKE-256 + * + * This test attempts sigGen / sigVer with a disallowed (paramSet, hash) pair + * and asserts both reject the call. Before the in-module hash-vs-paramSet + * check exists, wc_SlhDsaKey_SignHash / wc_SlhDsaKey_VerifyHash happily + * proceed with any compiled-in hash, so this test is expected to FAIL until + * the check is added. */ +static wc_test_ret_t slhdsa_hash_paramset_rejection_test(enum SlhDsaParam param) +{ + int ret = 0; + WC_RNG rng; + SlhDsaKey key[1]; + byte sig[WC_SLHDSA_MAX_SIG_LEN]; + word32 sigLen; + static const byte msg[] = { + 0x48,0x65,0x6c,0x6c,0x6f,0x20,0x57,0x6f, + 0x72,0x6c,0x64,0x21 + }; + byte ctx[1]; + /* Hash that is BELOW the security level of every 192/256-bit paramSet + * tested below. SHA-256 (128-bit collision) is approved only for the + * 128-bit SLH-DSA paramSets, so any 192/256-bit paramSet must reject it. */ + enum wc_HashType badHash = WC_HASH_TYPE_SHA256; + + XMEMSET(&key, 0, sizeof(key)); + +#ifndef HAVE_FIPS + ret = wc_InitRng_ex(&rng, HEAP_HINT, devId); +#else + ret = wc_InitRng(&rng); +#endif + if (ret != 0) return WC_TEST_RET_ENC_EC(ret); + + ret = wc_SlhDsaKey_Init(key, param, NULL, INVALID_DEVID); + if (ret != 0) { + wc_FreeRng(&rng); + return WC_TEST_RET_ENC_EC(ret); + } + + ret = wc_SlhDsaKey_MakeKey(key, &rng); + if (ret != 0) { + wc_SlhDsaKey_Free(key); + wc_FreeRng(&rng); + return WC_TEST_RET_ENC_EC(ret); + } + + /* Only enforce on paramSets above 128-bit security; SHA-256 is approved + * for 128-bit so wouldn't be a rejection target there. */ + if (key->params->n == WC_SLHDSA_N_128) { + wc_SlhDsaKey_Free(key); + wc_FreeRng(&rng); + return 0; + } + + /* sigGen with too-weak PH must be REJECTED. */ + sigLen = WC_SLHDSA_MAX_SIG_LEN; + PRIVATE_KEY_UNLOCK(); + ret = wc_SlhDsaKey_SignHash(key, ctx, 0, msg, (word32)sizeof(msg), + badHash, sig, &sigLen, &rng); + PRIVATE_KEY_LOCK(); + if (ret == 0) { + /* Module did NOT reject -- this is the missing-enforcement bug. */ + wc_SlhDsaKey_Free(key); + wc_FreeRng(&rng); + return WC_TEST_RET_ENC_NC; + } + + /* sigVer with too-weak PH must ALSO be REJECTED. */ + sigLen = WC_SLHDSA_MAX_SIG_LEN; + XMEMSET(sig, 0, sigLen); + ret = wc_SlhDsaKey_VerifyHash(key, ctx, 0, msg, (word32)sizeof(msg), + badHash, sig, sigLen); + if (ret == 0) { + wc_SlhDsaKey_Free(key); + wc_FreeRng(&rng); + return WC_TEST_RET_ENC_NC; + } + + wc_SlhDsaKey_Free(key); + wc_FreeRng(&rng); + return 0; +} #endif /* True iff slhdsa_test() actually emits at least one `goto out;` / @@ -59868,6 +60101,41 @@ wc_test_ret_t slhdsa_test(void) } #endif + /* FIPS 205 sec. 10.2.2 -- HashSLH-DSA must reject pre-hashes whose + * collision strength is below the paramSet's security level. Use any + * available 192- or 256-bit paramSet to exercise the rejection. The + * 128-bit paramSets allow SHA-256, so they are not useful as targets + * here. */ +#ifdef WOLFSSL_SLHDSA_PARAM_192S + ret = slhdsa_hash_paramset_rejection_test(SLHDSA_SHAKE192S); + if (ret != 0) { + wc_test_render_error_message("SLHDSA_SHAKE192S (hash-paramset reject)", + 0); + goto out; + } +#elif defined(WOLFSSL_SLHDSA_PARAM_256S) + ret = slhdsa_hash_paramset_rejection_test(SLHDSA_SHAKE256S); + if (ret != 0) { + wc_test_render_error_message("SLHDSA_SHAKE256S (hash-paramset reject)", + 0); + goto out; + } +#elif defined(WOLFSSL_SLHDSA_PARAM_SHA2_192S) + ret = slhdsa_hash_paramset_rejection_test(SLHDSA_SHA2_192S); + if (ret != 0) { + wc_test_render_error_message("SLHDSA_SHA2_192S (hash-paramset reject)", + 0); + goto out; + } +#elif defined(WOLFSSL_SLHDSA_PARAM_SHA2_256S) + ret = slhdsa_hash_paramset_rejection_test(SLHDSA_SHA2_256S); + if (ret != 0) { + wc_test_render_error_message("SLHDSA_SHA2_256S (hash-paramset reject)", + 0); + goto out; + } +#endif + #endif /* !WOLFSSL_SLHDSA_VERIFY_ONLY */ #if defined(WOLF_PRIVATE_KEY_ID) && \ diff --git a/wolfssl/wolfcrypt/aes.h b/wolfssl/wolfcrypt/aes.h index e3d7637470d..e5b8b2ded9d 100644 --- a/wolfssl/wolfcrypt/aes.h +++ b/wolfssl/wolfcrypt/aes.h @@ -66,8 +66,14 @@ typedef struct Gcm { #endif WOLFSSL_LOCAL void GenerateM0(Gcm* gcm); +/* The two-byte-pointer GMULT signature is the GCM_SMALL form only. Other GCM + * table modes (GCM_TABLE / GCM_TABLE_4BIT) use a static GMULT taking a table + * argument (byte m[N][16]), so this prototype must be scoped to GCM_SMALL -- + * otherwise on 32-bit ARM armasm with WOLFSSL_AESGCM_STREAM (which now compiles + * the software table GHASH for the streaming path) it conflicts with the + * table-mode GMULT. See SP 800-38D AES-GCM GHASH. */ #if !defined(__aarch64__) && defined(WOLFSSL_ARMASM) && \ - !defined(WOLFSSL_ARMASM_NO_HW_CRYPTO) + !defined(WOLFSSL_ARMASM_NO_HW_CRYPTO) && defined(GCM_SMALL) WOLFSSL_LOCAL void GMULT(byte* X, byte* Y); #endif WOLFSSL_LOCAL void GHASH(Gcm* gcm, const byte* a, word32 aSz, const byte* c, diff --git a/wolfssl/wolfcrypt/error-crypt.h b/wolfssl/wolfcrypt/error-crypt.h index 5b089f118b4..286a8739f44 100644 --- a/wolfssl/wolfcrypt/error-crypt.h +++ b/wolfssl/wolfcrypt/error-crypt.h @@ -327,9 +327,17 @@ enum wolfCrypt_ErrorCodes { ML_DSA_PCT_E = -1016, /* ML-DSA Pairwise Consistency Test failure */ DRBG_SHA512_KAT_FIPS_E = -1017, /* SHA-512 DRBG KAT failure */ SLH_DSA_KAT_FIPS_E = -1018, /* SLH-DSA CAST KAT failure */ - - WC_SPAN2_LAST_E = -1018, /* Update to indicate last used error code */ - WC_LAST_E = -1018, /* the last code used either here or in + SLH_DSA_PCT_E = -1019, /* SLH-DSA Pairwise Consistency Test failure */ + CMAC_KAT_FIPS_E = -1020, /* AES-CMAC KAT failure (vendor-elected) */ + SHAKE_KAT_FIPS_E = -1021, /* SHAKE KAT failure (vendor-elected) */ + DH_PCT_E = -1022, /* DH (FFC) Pairwise Consistency Test + * failure (SP 800-56A r3 sec 5.6.2.1.4, + * FIPS 140-3 IG 10.3.B) */ + AES_KW_KAT_FIPS_E = -1023, /* AES-KW KAT failure (vendor-elected, + * SP 800-38F sec 6.2 / RFC 3394) */ + + WC_SPAN2_LAST_E = -1023, /* Update to indicate last used error code */ + WC_LAST_E = -1023, /* the last code used either here or in * error-ssl.h */ WC_SPAN2_MIN_CODE_E = -1999, /* Last usable code in span 2 */ diff --git a/wolfssl/wolfcrypt/fips_test.h b/wolfssl/wolfcrypt/fips_test.h index de2b506df2c..41467b0ee2a 100644 --- a/wolfssl/wolfcrypt/fips_test.h +++ b/wolfssl/wolfcrypt/fips_test.h @@ -31,8 +31,23 @@ extern "C" { #endif -/* Added for FIPS v5.3 or later */ -#if defined(FIPS_VERSION_GE) && FIPS_VERSION_GE(5,3) +/* Added for FIPS v5.3 or later. + * + * v7.0.0 and later upgrade the in-core integrity HMAC to SHA-512 (with a + * 512-bit key) for NSA 2.0 compliance. Customers that must avoid SHA-256 + * anywhere in the validated module can therefore use the v7 module without + * residual SHA-256 integrity material. v5.3 and v6.x retain HMAC-SHA-256. + */ +#if defined(FIPS_VERSION_GE) && FIPS_VERSION_GE(7,0) + #ifdef WOLFSSL_SHA512 + #define FIPS_IN_CORE_DIGEST_SIZE 64 + #define FIPS_IN_CORE_HASH_TYPE WC_SHA512 + #define FIPS_IN_CORE_KEY_SZ 64 + #define FIPS_IN_CORE_VERIFY_SZ FIPS_IN_CORE_KEY_SZ + #else + #error FIPS v7+ integrity test requires WOLFSSL_SHA512 + #endif +#elif defined(FIPS_VERSION_GE) && FIPS_VERSION_GE(5,3) /* Determine FIPS in core hash type and size */ #ifndef NO_SHA256 #define FIPS_IN_CORE_DIGEST_SIZE 32 @@ -62,7 +77,11 @@ enum FipsCastId { FIPS_CAST_RSA_SIGN_PKCS1v15 = 7, FIPS_CAST_ECC_CDH = 8, FIPS_CAST_ECC_PRIMITIVE_Z = 9, - FIPS_CAST_DH_PRIMITIVE_Z = 10, + FIPS_CAST_DH_PRIMITIVE_Z = 10, /* RETIRED (v7+): classic DH dropped + * from the FIPS 140-3 v7 PQ module + * boundary. Preserved for ABI -- + * do not reuse this id, no longer + * triggered. */ FIPS_CAST_ECDSA = 11, FIPS_CAST_KDF_TLS12 = 12, FIPS_CAST_KDF_TLS13 = 13, @@ -80,7 +99,10 @@ enum FipsCastId { FIPS_CAST_XMSS = 23, FIPS_CAST_DRBG_SHA512 = 24, FIPS_CAST_SLH_DSA = 25, - FIPS_CAST_COUNT = 26 + FIPS_CAST_AES_CMAC = 26, + FIPS_CAST_SHAKE = 27, + FIPS_CAST_AES_KW = 28, + FIPS_CAST_COUNT = 29 }; enum FipsCastStateId { diff --git a/wolfssl/wolfcrypt/random.h b/wolfssl/wolfcrypt/random.h index 102f05d6b55..3747ea268f3 100644 --- a/wolfssl/wolfcrypt/random.h +++ b/wolfssl/wolfcrypt/random.h @@ -57,8 +57,12 @@ #define DRBG_SEED_LEN (440/8) #endif +/* Size of the DRBG seed (SHA-512) */ #ifdef WOLFSSL_DRBG_SHA512 - #define DRBG_SHA512_SEED_LEN (888/8) /* 111 bytes per SP 800-90A Table 2 */ + #ifndef DRBG_SHA512_SEED_LEN + #define DRBG_SHA512_SEED_LEN (888/8) /* 111 bytes per SP 800-90A + * Table 2 */ + #endif #endif @@ -212,12 +216,20 @@ struct OS_Seed { */ #define ENTROPY_SCALE_FACTOR (512) #elif defined(HAVE_INTEL_RDSEED) || defined(HAVE_INTEL_RDRAND) - /* The value of 2 applies to Intel's RDSEED which provides about - * 0.5 bits minimum of entropy per bit. The value of 4 gives a - * conservative margin for FIPS. */ + /* Intel RDSEED nominally provides about 0.5 bits min entropy per + * bit (NIST CMVP cert3389 PUD). In FIPS mode we previously used + * ENTROPY_SCALE_FACTOR=8 (256-byte seed) on Intel and 512 (16384- + * byte seed) on AMD, asymmetric per-vendor. As of v7 we adopt the + * AMD worst-case scale of 512 on Intel too: the AMD "Tyzen V1xxxx" + * PUD Table 3 documents 0.656040 bits per 128-bit block as the + * absolute floor across the entire CMVP-validated AMD family, and + * we use that same worst-case oversampling on Intel rather than + * trusting the higher Intel PUD claim, so a single seeding budget + * covers any x86 OE we deploy on. Non-FIPS Intel builds keep the + * lighter scale=2 (Intel-PUD-derived) for performance. */ #if defined(HAVE_FIPS) && defined(HAVE_FIPS_VERSION) && \ (HAVE_FIPS_VERSION >= 2) - #define ENTROPY_SCALE_FACTOR (2*4) + #define ENTROPY_SCALE_FACTOR (512) #else /* Not FIPS, but Intel RDSEED, only double. */ #define ENTROPY_SCALE_FACTOR (2) diff --git a/wolfssl/wolfcrypt/settings.h b/wolfssl/wolfcrypt/settings.h index 9f699145847..adf6dd75338 100644 --- a/wolfssl/wolfcrypt/settings.h +++ b/wolfssl/wolfcrypt/settings.h @@ -557,6 +557,17 @@ #endif /* blinding adds API not available yet in FIPS mode */ #undef WC_RSA_BLINDING + + /* NIST SP 800-38A sec 6.2 specifies CBC operates on plaintext that is + * a multiple of the block size; the cipher does not implement padding + * (project_aes_no_padding_policy). Force the wc_AesCbcEncrypt / + * wc_AesCbcDecrypt block-alignment check on for FIPS builds so a + * length not a multiple of WC_AES_BLOCK_SIZE returns BAD_LENGTH_E + * rather than silently truncating to the largest aligned prefix in + * the underlying implementation. */ + #ifndef WOLFSSL_AES_CBC_LENGTH_CHECKS + #define WOLFSSL_AES_CBC_LENGTH_CHECKS + #endif #endif /* old FIPS has only AES_BLOCK_SIZE. */ @@ -3998,8 +4009,18 @@ #undef HAVE_PUBLIC_FFDHE #endif + /* LinuxKM lkcapi previously needed a 4-byte minimum AES-GCM + * authentication tag for certain kernel-side test vectors. Per + * NIST SP 800-38D sec 5.2.1.2 / sec 8.2 a minimum tag length of 96 bits + * (12 bytes) provides robust integrity for general-purpose use; FIPS + * 140-3 IG C.H reaffirms this 96-bit minimum for Approved-mode AES-GCM. + * Gate the 32-bit-tag relaxation on non-FIPS builds only so the + * v7.0.0 module's Approved configuration retains the full 96-bit + * minimum in all linuxkm and non-linuxkm scenarios. */ +#ifndef HAVE_FIPS #undef WOLFSSL_MIN_AUTH_TAG_SZ #define WOLFSSL_MIN_AUTH_TAG_SZ 4 +#endif #if defined(LINUXKM_LKCAPI_REGISTER) && !defined(WOLFSSL_ASN_INT_LEAD_0_ANY) /* kernel 5.10 crypto manager tests key(s) that fail unless leading