diff --git a/.github/mlc_config.json b/.github/mlc_config.json index 14c45a8fb..7a4a2f652 100644 --- a/.github/mlc_config.json +++ b/.github/mlc_config.json @@ -21,6 +21,10 @@ }, { "pattern": "^https://eprint\\.iacr\\.org" + }, + { + "_comment": "Armv8-M Architecture Reference Manual; the Arm developer site rate-limits / 403s automated link-checks.", + "pattern": "^https://developer\\.arm\\.com" } ] } diff --git a/BIBLIOGRAPHY.md b/BIBLIOGRAPHY.md index 2ed637232..96430656d 100644 --- a/BIBLIOGRAPHY.md +++ b/BIBLIOGRAPHY.md @@ -7,6 +7,26 @@ This file lists the citations made throughout the mldsa-native source code and documentation. +### `AAPCS32` + +* Procedure Call Standard for the Arm Architecture (AAPCS32) +* Author(s): + - Arm Limited +* URL: https://github.com/ARM-software/abi-aa/blob/main/aapcs32/aapcs32.rst +* Referenced from: + - [test/abicheck/README.md](test/abicheck/README.md) + - [test/abicheck/armv81m/abicheck_armv81m.c](test/abicheck/armv81m/abicheck_armv81m.c) + +### `AAPCS64` + +* Procedure Call Standard for the Arm 64-bit Architecture (AAPCS64) +* Author(s): + - Arm Limited +* URL: https://github.com/ARM-software/abi-aa/blob/main/aapcs64/aapcs64.rst +* Referenced from: + - [test/abicheck/README.md](test/abicheck/README.md) + - [test/abicheck/aarch64/abicheck_aarch64.c](test/abicheck/aarch64/abicheck_aarch64.c) + ### `ACVP` * Automated Cryptographic Validation Protocol (ACVP) Server @@ -16,6 +36,16 @@ source code and documentation. * Referenced from: - [README.md](README.md) +### `ArmARMv8M` + +* Armv8-M Architecture Reference Manual (DDI 0553) +* Author(s): + - Arm Limited +* URL: https://developer.arm.com/documentation/ddi0553/latest/ +* Referenced from: + - [test/abicheck/README.md](test/abicheck/README.md) + - [test/abicheck/armv81m/abicheck_armv81m.c](test/abicheck/armv81m/abicheck_armv81m.c) + ### `AutoCorrode` * AutoCorrode software verification framework for Isabelle/HOL @@ -347,6 +377,19 @@ source code and documentation. - [proofs/hol_light/aarch64/mldsa/intt_aarch64_asm.S](proofs/hol_light/aarch64/mldsa/intt_aarch64_asm.S) - [proofs/hol_light/aarch64/mldsa/ntt_aarch64_asm.S](proofs/hol_light/aarch64/mldsa/ntt_aarch64_asm.S) +### `SysVAMD64` + +* System V Application Binary Interface — AMD64 Architecture Processor Supplement +* Author(s): + - Michael Matz + - Jan Hubička + - Andreas Jaeger + - Mark Mitchell +* URL: https://gitlab.com/x86-psABIs/x86-64-ABI +* Referenced from: + - [test/abicheck/README.md](test/abicheck/README.md) + - [test/abicheck/x86_64/abicheck_x86_64.c](test/abicheck/x86_64/abicheck_x86_64.c) + ### `libmceliece` * libmceliece implementation of Classic McEliece diff --git a/BIBLIOGRAPHY.yml b/BIBLIOGRAPHY.yml index bffe7ee76..2ac52f513 100644 --- a/BIBLIOGRAPHY.yml +++ b/BIBLIOGRAPHY.yml @@ -204,3 +204,31 @@ name: Project Wycheproof author: Community Cryptography Specification Project url: https://github.com/C2SP/wycheproof + +- id: AAPCS64 + short: AAPCS64 + name: "Procedure Call Standard for the Arm 64-bit Architecture (AAPCS64)" + author: Arm Limited + url: https://github.com/ARM-software/abi-aa/blob/main/aapcs64/aapcs64.rst + +- id: AAPCS32 + short: AAPCS32 + name: "Procedure Call Standard for the Arm Architecture (AAPCS32)" + author: Arm Limited + url: https://github.com/ARM-software/abi-aa/blob/main/aapcs32/aapcs32.rst + +- id: SysVAMD64 + short: System V AMD64 psABI + name: "System V Application Binary Interface — AMD64 Architecture Processor Supplement" + author: + - Matz, Michael + - Hubička, Jan + - Jaeger, Andreas + - Mitchell, Mark + url: https://gitlab.com/x86-psABIs/x86-64-ABI + +- id: ArmARMv8M + short: Armv8-M ARM + name: "Armv8-M Architecture Reference Manual (DDI 0553)" + author: Arm Limited + url: https://developer.arm.com/documentation/ddi0553/latest/ diff --git a/Makefile b/Makefile index 86fe704a7..cdfc6db7c 100644 --- a/Makefile +++ b/Makefile @@ -18,7 +18,7 @@ clean quickcheck check-defined-CYCLES \ size_44 size_65 size_87 size \ run_size_44 run_size_65 run_size_87 run_size \ - host_info + host_info abicheck run_abicheck SHELL := /usr/bin/env bash .DEFAULT_GOAL := build @@ -47,7 +47,7 @@ quickcheck: test build: func kat acvp wycheproof $(Q)echo " Everything builds fine!" -test: run_kat run_func run_acvp run_wycheproof run_unit run_alloc run_rng_fail +test: run_kat run_func run_acvp run_wycheproof run_unit run_alloc run_rng_fail run_abicheck $(Q)echo " Everything checks fine!" run_kat_44: kat_44 @@ -247,6 +247,22 @@ run_size: \ run_size_65 \ run_size_87 +# ABI checker: verifies each assembly kernel preserves the callee-saved +# registers its platform calling convention requires. Needs OPT=1 (the native +# .S kernels are only assembled then), and on x86_64 also relies on +# MLD_SYSV_ABI_SUPPORTED because the call stub is hand-written SysV asm. +# Unsupported targets get an empty registry and exit success, so this builds +# and runs cleanly on every arch (e.g. riscv64) with no explicit allowlist. +ifeq ($(OPT),1) +abicheck: $(ABICHECK_DIR)/bin/abicheck + +run_abicheck: abicheck + $(W) $(ABICHECK_DIR)/bin/abicheck +else +abicheck: +run_abicheck: +endif + # Display host and compiler feature detection information # Shows which architectural features are supported by both the compiler and host CPU # Usage: make host_info [AUTO=0|1] [CROSS_PREFIX=...] diff --git a/dev/aarch64_clean/src/intt_aarch64_asm.S b/dev/aarch64_clean/src/intt_aarch64_asm.S index 06e6ff302..241aabf0c 100644 --- a/dev/aarch64_clean/src/intt_aarch64_asm.S +++ b/dev/aarch64_clean/src/intt_aarch64_asm.S @@ -27,6 +27,33 @@ /* AArch64 ML-DSA inverse NTT following @[NeonNTT], @[SLOTHY_Paper], and @[NeonNTT_Autoformalised] */ +/*yaml + Name: intt_aarch64_asm + Description: AArch64 ML-DSA inverse NTT + Signature: void mld_intt_aarch64_asm(int32_t *r, const int32_t *zetas_l78, const int32_t *zetas_l123456) + ABI: + Architecture: aarch64 + CallingConvention: AAPCS64 + x0: + type: buffer + size_bytes: 1024 + permissions: read/write + c_parameter: int32_t *r + description: Input/output polynomial (256 x int32_t) + x1: + type: buffer + size_bytes: 1536 + permissions: read-only + c_parameter: const int32_t *zetas_l78 + description: Twiddle factors for layers 7-8 (384 x int32_t) + x2: + type: buffer + size_bytes: 640 + permissions: read-only + c_parameter: const int32_t *zetas_l123456 + description: Twiddle factors for layers 1-6 (160 x int32_t) +*/ + #include "../../../common.h" #if defined(MLD_ARITH_BACKEND_AARCH64) && \ !defined(MLD_CONFIG_MULTILEVEL_NO_SHARED) diff --git a/dev/aarch64_clean/src/mld_polyvecl_pointwise_acc_montgomery_l4_aarch64_asm.S b/dev/aarch64_clean/src/mld_polyvecl_pointwise_acc_montgomery_l4_aarch64_asm.S index 540692ff2..a4df0f6bc 100644 --- a/dev/aarch64_clean/src/mld_polyvecl_pointwise_acc_montgomery_l4_aarch64_asm.S +++ b/dev/aarch64_clean/src/mld_polyvecl_pointwise_acc_montgomery_l4_aarch64_asm.S @@ -2,6 +2,33 @@ * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT */ +/*yaml + Name: polyvecl_pointwise_acc_montgomery_l4_aarch64_asm + Description: AArch64 pointwise multiply-accumulate of length-4 polynomial vectors + Signature: void mld_polyvecl_pointwise_acc_montgomery_l4_aarch64_asm(int32_t *r, const int32_t a[4][256], const int32_t b[4][256]) + ABI: + Architecture: aarch64 + CallingConvention: AAPCS64 + x0: + type: buffer + size_bytes: 1024 + permissions: write-only + c_parameter: int32_t *r + description: Output polynomial (256 x int32_t) + x1: + type: buffer + size_bytes: 4096 + permissions: read-only + c_parameter: const int32_t a[4][256] + description: Input polynomial vector a (4 x 256 x int32_t) + x2: + type: buffer + size_bytes: 4096 + permissions: read-only + c_parameter: const int32_t b[4][256] + description: Input polynomial vector b (4 x 256 x int32_t) +*/ + #include "../../../common.h" #if defined(MLD_ARITH_BACKEND_AARCH64) && !defined(MLD_CONFIG_MULTILEVEL_NO_SHARED) && \ (defined(MLD_CONFIG_MULTILEVEL_WITH_SHARED) || MLDSA_L == 4) diff --git a/dev/aarch64_clean/src/mld_polyvecl_pointwise_acc_montgomery_l5_aarch64_asm.S b/dev/aarch64_clean/src/mld_polyvecl_pointwise_acc_montgomery_l5_aarch64_asm.S index a68a66f5b..8d02c19fe 100644 --- a/dev/aarch64_clean/src/mld_polyvecl_pointwise_acc_montgomery_l5_aarch64_asm.S +++ b/dev/aarch64_clean/src/mld_polyvecl_pointwise_acc_montgomery_l5_aarch64_asm.S @@ -2,6 +2,33 @@ * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT */ +/*yaml + Name: polyvecl_pointwise_acc_montgomery_l5_aarch64_asm + Description: AArch64 pointwise multiply-accumulate of length-5 polynomial vectors + Signature: void mld_polyvecl_pointwise_acc_montgomery_l5_aarch64_asm(int32_t *r, const int32_t a[5][256], const int32_t b[5][256]) + ABI: + Architecture: aarch64 + CallingConvention: AAPCS64 + x0: + type: buffer + size_bytes: 1024 + permissions: write-only + c_parameter: int32_t *r + description: Output polynomial (256 x int32_t) + x1: + type: buffer + size_bytes: 5120 + permissions: read-only + c_parameter: const int32_t a[5][256] + description: Input polynomial vector a (5 x 256 x int32_t) + x2: + type: buffer + size_bytes: 5120 + permissions: read-only + c_parameter: const int32_t b[5][256] + description: Input polynomial vector b (5 x 256 x int32_t) +*/ + #include "../../../common.h" #if defined(MLD_ARITH_BACKEND_AARCH64) && !defined(MLD_CONFIG_MULTILEVEL_NO_SHARED) && \ (defined(MLD_CONFIG_MULTILEVEL_WITH_SHARED) || MLDSA_L == 5) diff --git a/dev/aarch64_clean/src/mld_polyvecl_pointwise_acc_montgomery_l7_aarch64_asm.S b/dev/aarch64_clean/src/mld_polyvecl_pointwise_acc_montgomery_l7_aarch64_asm.S index 8260d5606..9f9f78c3c 100644 --- a/dev/aarch64_clean/src/mld_polyvecl_pointwise_acc_montgomery_l7_aarch64_asm.S +++ b/dev/aarch64_clean/src/mld_polyvecl_pointwise_acc_montgomery_l7_aarch64_asm.S @@ -2,6 +2,33 @@ * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT */ +/*yaml + Name: polyvecl_pointwise_acc_montgomery_l7_aarch64_asm + Description: AArch64 pointwise multiply-accumulate of length-7 polynomial vectors + Signature: void mld_polyvecl_pointwise_acc_montgomery_l7_aarch64_asm(int32_t *r, const int32_t a[7][256], const int32_t b[7][256]) + ABI: + Architecture: aarch64 + CallingConvention: AAPCS64 + x0: + type: buffer + size_bytes: 1024 + permissions: write-only + c_parameter: int32_t *r + description: Output polynomial (256 x int32_t) + x1: + type: buffer + size_bytes: 7168 + permissions: read-only + c_parameter: const int32_t a[7][256] + description: Input polynomial vector a (7 x 256 x int32_t) + x2: + type: buffer + size_bytes: 7168 + permissions: read-only + c_parameter: const int32_t b[7][256] + description: Input polynomial vector b (7 x 256 x int32_t) +*/ + #include "../../../common.h" #if defined(MLD_ARITH_BACKEND_AARCH64) && !defined(MLD_CONFIG_MULTILEVEL_NO_SHARED) && \ (defined(MLD_CONFIG_MULTILEVEL_WITH_SHARED) || MLDSA_L == 7) diff --git a/dev/aarch64_clean/src/ntt_aarch64_asm.S b/dev/aarch64_clean/src/ntt_aarch64_asm.S index 2e4777879..e90b46b1e 100644 --- a/dev/aarch64_clean/src/ntt_aarch64_asm.S +++ b/dev/aarch64_clean/src/ntt_aarch64_asm.S @@ -27,6 +27,33 @@ /* AArch64 ML-DSA forward NTT following @[NeonNTT], @[SLOTHY_Paper], and @[NeonNTT_Autoformalised] */ +/*yaml + Name: ntt_aarch64_asm + Description: AArch64 ML-DSA forward NTT + Signature: void mld_ntt_aarch64_asm(int32_t *r, const int32_t *zetas_l123456, const int32_t *zetas_l78) + ABI: + Architecture: aarch64 + CallingConvention: AAPCS64 + x0: + type: buffer + size_bytes: 1024 + permissions: read/write + c_parameter: int32_t *r + description: Input/output polynomial (256 x int32_t) + x1: + type: buffer + size_bytes: 576 + permissions: read-only + c_parameter: const int32_t *zetas_l123456 + description: Twiddle factors for layers 1-6 (144 x int32_t) + x2: + type: buffer + size_bytes: 1536 + permissions: read-only + c_parameter: const int32_t *zetas_l78 + description: Twiddle factors for layers 7-8 (384 x int32_t) +*/ + #include "../../../common.h" #if defined(MLD_ARITH_BACKEND_AARCH64) && \ !defined(MLD_CONFIG_MULTILEVEL_NO_SHARED) diff --git a/dev/aarch64_clean/src/pointwise_montgomery_aarch64_asm.S b/dev/aarch64_clean/src/pointwise_montgomery_aarch64_asm.S index 53cf9537c..dcce1fec8 100644 --- a/dev/aarch64_clean/src/pointwise_montgomery_aarch64_asm.S +++ b/dev/aarch64_clean/src/pointwise_montgomery_aarch64_asm.S @@ -2,6 +2,27 @@ * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT */ +/*yaml + Name: poly_pointwise_montgomery_aarch64_asm + Description: AArch64 pointwise Montgomery multiplication of two polynomials + Signature: void mld_poly_pointwise_montgomery_aarch64_asm(int32_t *a, const int32_t *b) + ABI: + Architecture: aarch64 + CallingConvention: AAPCS64 + x0: + type: buffer + size_bytes: 1024 + permissions: read/write + c_parameter: int32_t *a + description: Input/output polynomial (256 x int32_t) + x1: + type: buffer + size_bytes: 1024 + permissions: read-only + c_parameter: const int32_t *b + description: Input polynomial (256 x int32_t) +*/ + #include "../../../common.h" #if defined(MLD_ARITH_BACKEND_AARCH64) && \ (!defined(MLD_CONFIG_NO_SIGN_API) || !defined(MLD_CONFIG_NO_VERIFY_API) || \ diff --git a/dev/aarch64_clean/src/poly_caddq_aarch64_asm.S b/dev/aarch64_clean/src/poly_caddq_aarch64_asm.S index 108475378..e28bab108 100644 --- a/dev/aarch64_clean/src/poly_caddq_aarch64_asm.S +++ b/dev/aarch64_clean/src/poly_caddq_aarch64_asm.S @@ -2,6 +2,21 @@ * Copyright (c) The mldsa-native project authors * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT */ +/*yaml + Name: poly_caddq_aarch64_asm + Description: AArch64 conditional addition of q to each coefficient + Signature: void mld_poly_caddq_aarch64_asm(int32_t *a) + ABI: + Architecture: aarch64 + CallingConvention: AAPCS64 + x0: + type: buffer + size_bytes: 1024 + permissions: read/write + c_parameter: int32_t *a + description: Input/output polynomial (256 x int32_t) +*/ + #include "../../../common.h" #if defined(MLD_ARITH_BACKEND_AARCH64) && !defined(MLD_CONFIG_MULTILEVEL_NO_SHARED) diff --git a/dev/aarch64_clean/src/poly_chknorm_aarch64_asm.S b/dev/aarch64_clean/src/poly_chknorm_aarch64_asm.S index d4bf8c22a..2c6d0aa12 100644 --- a/dev/aarch64_clean/src/poly_chknorm_aarch64_asm.S +++ b/dev/aarch64_clean/src/poly_chknorm_aarch64_asm.S @@ -2,6 +2,26 @@ * Copyright (c) The mldsa-native project authors * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT */ +/*yaml + Name: poly_chknorm_aarch64_asm + Description: AArch64 infinity-norm bound check on polynomial coefficients + Signature: int mld_poly_chknorm_aarch64_asm(const int32_t *a, int32_t B) + ABI: + Architecture: aarch64 + CallingConvention: AAPCS64 + x0: + type: buffer + size_bytes: 1024 + permissions: read-only + c_parameter: const int32_t *a + description: Input polynomial (256 x int32_t) + x1: + type: scalar + c_parameter: int32_t B + description: Norm bound + test_with: 131072 # representative non-negative bound (1 << 17) +*/ + #include "../../../common.h" #if defined(MLD_ARITH_BACKEND_AARCH64) && !defined(MLD_CONFIG_MULTILEVEL_NO_SHARED) diff --git a/dev/aarch64_clean/src/poly_decompose_32_aarch64_asm.S b/dev/aarch64_clean/src/poly_decompose_32_aarch64_asm.S index a311e6786..3488a4afd 100644 --- a/dev/aarch64_clean/src/poly_decompose_32_aarch64_asm.S +++ b/dev/aarch64_clean/src/poly_decompose_32_aarch64_asm.S @@ -2,6 +2,27 @@ * Copyright (c) The mldsa-native project authors * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT */ +/*yaml + Name: poly_decompose_32_aarch64_asm + Description: AArch64 coefficient decomposition (alpha = (Q-1)/32) + Signature: void mld_poly_decompose_32_aarch64_asm(int32_t *a1, int32_t *a0) + ABI: + Architecture: aarch64 + CallingConvention: AAPCS64 + x0: + type: buffer + size_bytes: 1024 + permissions: write-only + c_parameter: int32_t *a1 + description: Output high-part polynomial (256 x int32_t) + x1: + type: buffer + size_bytes: 1024 + permissions: read/write + c_parameter: int32_t *a0 + description: Input polynomial / output low-part (256 x int32_t) +*/ + #include "../../../common.h" #if defined(MLD_ARITH_BACKEND_AARCH64) && !defined(MLD_CONFIG_NO_SIGN_API) && !defined(MLD_CONFIG_MULTILEVEL_NO_SHARED) && \ diff --git a/dev/aarch64_clean/src/poly_decompose_88_aarch64_asm.S b/dev/aarch64_clean/src/poly_decompose_88_aarch64_asm.S index 8821bf2ad..ecde76d1c 100644 --- a/dev/aarch64_clean/src/poly_decompose_88_aarch64_asm.S +++ b/dev/aarch64_clean/src/poly_decompose_88_aarch64_asm.S @@ -2,6 +2,27 @@ * Copyright (c) The mldsa-native project authors * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT */ +/*yaml + Name: poly_decompose_88_aarch64_asm + Description: AArch64 coefficient decomposition (alpha = (Q-1)/88) + Signature: void mld_poly_decompose_88_aarch64_asm(int32_t *a1, int32_t *a0) + ABI: + Architecture: aarch64 + CallingConvention: AAPCS64 + x0: + type: buffer + size_bytes: 1024 + permissions: write-only + c_parameter: int32_t *a1 + description: Output high-part polynomial (256 x int32_t) + x1: + type: buffer + size_bytes: 1024 + permissions: read/write + c_parameter: int32_t *a0 + description: Input polynomial / output low-part (256 x int32_t) +*/ + #include "../../../common.h" #if defined(MLD_ARITH_BACKEND_AARCH64) && !defined(MLD_CONFIG_NO_SIGN_API) && !defined(MLD_CONFIG_MULTILEVEL_NO_SHARED) && \ diff --git a/dev/aarch64_clean/src/poly_use_hint_32_aarch64_asm.S b/dev/aarch64_clean/src/poly_use_hint_32_aarch64_asm.S index d772d60cb..b630bfbf7 100644 --- a/dev/aarch64_clean/src/poly_use_hint_32_aarch64_asm.S +++ b/dev/aarch64_clean/src/poly_use_hint_32_aarch64_asm.S @@ -2,6 +2,27 @@ * Copyright (c) The mldsa-native project authors * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT */ +/*yaml + Name: poly_use_hint_32_aarch64_asm + Description: AArch64 hint application (alpha = (Q-1)/32) + Signature: void mld_poly_use_hint_32_aarch64_asm(int32_t *a, const int32_t *h) + ABI: + Architecture: aarch64 + CallingConvention: AAPCS64 + x0: + type: buffer + size_bytes: 1024 + permissions: read/write + c_parameter: int32_t *a + description: Input/output polynomial (256 x int32_t) + x1: + type: buffer + size_bytes: 1024 + permissions: read-only + c_parameter: const int32_t *h + description: Hint polynomial (256 x int32_t) +*/ + #include "../../../common.h" #if defined(MLD_ARITH_BACKEND_AARCH64) && !defined(MLD_CONFIG_NO_VERIFY_API) && !defined(MLD_CONFIG_MULTILEVEL_NO_SHARED) && \ diff --git a/dev/aarch64_clean/src/poly_use_hint_88_aarch64_asm.S b/dev/aarch64_clean/src/poly_use_hint_88_aarch64_asm.S index 28470e5c5..31252699b 100644 --- a/dev/aarch64_clean/src/poly_use_hint_88_aarch64_asm.S +++ b/dev/aarch64_clean/src/poly_use_hint_88_aarch64_asm.S @@ -2,6 +2,27 @@ * Copyright (c) The mldsa-native project authors * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT */ +/*yaml + Name: poly_use_hint_88_aarch64_asm + Description: AArch64 hint application (alpha = (Q-1)/88) + Signature: void mld_poly_use_hint_88_aarch64_asm(int32_t *a, const int32_t *h) + ABI: + Architecture: aarch64 + CallingConvention: AAPCS64 + x0: + type: buffer + size_bytes: 1024 + permissions: read/write + c_parameter: int32_t *a + description: Input/output polynomial (256 x int32_t) + x1: + type: buffer + size_bytes: 1024 + permissions: read-only + c_parameter: const int32_t *h + description: Hint polynomial (256 x int32_t) +*/ + #include "../../../common.h" #if defined(MLD_ARITH_BACKEND_AARCH64) && !defined(MLD_CONFIG_NO_VERIFY_API) && !defined(MLD_CONFIG_MULTILEVEL_NO_SHARED) && \ diff --git a/dev/aarch64_clean/src/polyz_unpack_17_aarch64_asm.S b/dev/aarch64_clean/src/polyz_unpack_17_aarch64_asm.S index 7856fd6bd..8e15263b1 100644 --- a/dev/aarch64_clean/src/polyz_unpack_17_aarch64_asm.S +++ b/dev/aarch64_clean/src/polyz_unpack_17_aarch64_asm.S @@ -4,6 +4,33 @@ * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT */ +/*yaml + Name: polyz_unpack_17_aarch64_asm + Description: AArch64 unpacking of 17-bit packed coefficients + Signature: void mld_polyz_unpack_17_aarch64_asm(int32_t *r, const uint8_t *buf, const uint8_t *indices) + ABI: + Architecture: aarch64 + CallingConvention: AAPCS64 + x0: + type: buffer + size_bytes: 1024 + permissions: write-only + c_parameter: int32_t *r + description: Output polynomial (256 x int32_t) + x1: + type: buffer + size_bytes: 576 + permissions: read-only + c_parameter: const uint8_t *buf + description: Packed input bytes + x2: + type: buffer + size_bytes: 64 + permissions: read-only + c_parameter: const uint8_t *indices + description: Permutation index table (64 x uint8_t) +*/ + #include "../../../common.h" #if defined(MLD_ARITH_BACKEND_AARCH64) && \ (!defined(MLD_CONFIG_NO_SIGN_API) || !defined(MLD_CONFIG_NO_VERIFY_API)) && \ diff --git a/dev/aarch64_clean/src/polyz_unpack_19_aarch64_asm.S b/dev/aarch64_clean/src/polyz_unpack_19_aarch64_asm.S index a82c991f9..3a7eee6da 100644 --- a/dev/aarch64_clean/src/polyz_unpack_19_aarch64_asm.S +++ b/dev/aarch64_clean/src/polyz_unpack_19_aarch64_asm.S @@ -4,6 +4,33 @@ * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT */ +/*yaml + Name: polyz_unpack_19_aarch64_asm + Description: AArch64 unpacking of 19-bit packed coefficients + Signature: void mld_polyz_unpack_19_aarch64_asm(int32_t *r, const uint8_t *buf, const uint8_t *indices) + ABI: + Architecture: aarch64 + CallingConvention: AAPCS64 + x0: + type: buffer + size_bytes: 1024 + permissions: write-only + c_parameter: int32_t *r + description: Output polynomial (256 x int32_t) + x1: + type: buffer + size_bytes: 640 + permissions: read-only + c_parameter: const uint8_t *buf + description: Packed input bytes + x2: + type: buffer + size_bytes: 64 + permissions: read-only + c_parameter: const uint8_t *indices + description: Permutation index table (64 x uint8_t) +*/ + #include "../../../common.h" #if defined(MLD_ARITH_BACKEND_AARCH64) && \ (!defined(MLD_CONFIG_NO_SIGN_API) || !defined(MLD_CONFIG_NO_VERIFY_API)) && \ diff --git a/dev/aarch64_clean/src/rej_uniform_aarch64_asm.S b/dev/aarch64_clean/src/rej_uniform_aarch64_asm.S index 3c49998dd..752b1f4ca 100644 --- a/dev/aarch64_clean/src/rej_uniform_aarch64_asm.S +++ b/dev/aarch64_clean/src/rej_uniform_aarch64_asm.S @@ -4,6 +4,38 @@ * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT */ +/*yaml + Name: rej_uniform_aarch64_asm + Description: AArch64 rejection sampling of uniform coefficients mod q + Signature: uint64_t mld_rej_uniform_aarch64_asm(int32_t *r, const uint8_t *buf, unsigned buflen, const uint8_t *table) + ABI: + Architecture: aarch64 + CallingConvention: AAPCS64 + x0: + type: buffer + size_bytes: 1024 + permissions: write-only + c_parameter: int32_t *r + description: Output buffer (256 x int32_t) + x1: + type: buffer + size_bytes: x2 + permissions: read-only + c_parameter: const uint8_t *buf + description: Input buffer + x2: + type: scalar + c_parameter: unsigned buflen + description: Length of input buffer (must be a multiple of 24) + test_with: 840 # MLD_POLY_UNIFORM_NBLOCKS * SHAKE128_RATE = 5 * 168 + x3: + type: buffer + size_bytes: 256 + permissions: read-only + c_parameter: const uint8_t *table + description: Lookup table (256 x uint8_t) +*/ + #include "../../../common.h" #if defined(MLD_ARITH_BACKEND_AARCH64) && \ !defined(MLD_CONFIG_MULTILEVEL_NO_SHARED) diff --git a/dev/aarch64_clean/src/rej_uniform_eta2_aarch64_asm.S b/dev/aarch64_clean/src/rej_uniform_eta2_aarch64_asm.S index f48b8383e..00e63a8fb 100644 --- a/dev/aarch64_clean/src/rej_uniform_eta2_aarch64_asm.S +++ b/dev/aarch64_clean/src/rej_uniform_eta2_aarch64_asm.S @@ -4,6 +4,38 @@ * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT */ +/*yaml + Name: rej_uniform_eta2_aarch64_asm + Description: AArch64 rejection sampling of eta=2 secret coefficients + Signature: uint64_t mld_rej_uniform_eta2_aarch64_asm(int32_t *r, const uint8_t *buf, unsigned buflen, const uint8_t *table) + ABI: + Architecture: aarch64 + CallingConvention: AAPCS64 + x0: + type: buffer + size_bytes: 1024 + permissions: write-only + c_parameter: int32_t *r + description: Output buffer (256 x int32_t) + x1: + type: buffer + size_bytes: x2 + permissions: read-only + c_parameter: const uint8_t *buf + description: Input buffer + x2: + type: scalar + c_parameter: unsigned buflen + description: Length of input buffer (must be a multiple of 8) + test_with: 136 # MLD_AARCH64_REJ_UNIFORM_ETA2_BUFLEN + x3: + type: buffer + size_bytes: 4096 + permissions: read-only + c_parameter: const uint8_t *table + description: Lookup table (4096 x uint8_t) +*/ + #include "../../../common.h" #if defined(MLD_ARITH_BACKEND_AARCH64) && \ !defined(MLD_CONFIG_NO_KEYPAIR_API) && \ diff --git a/dev/aarch64_clean/src/rej_uniform_eta4_aarch64_asm.S b/dev/aarch64_clean/src/rej_uniform_eta4_aarch64_asm.S index ae04425c4..bb053c5ee 100644 --- a/dev/aarch64_clean/src/rej_uniform_eta4_aarch64_asm.S +++ b/dev/aarch64_clean/src/rej_uniform_eta4_aarch64_asm.S @@ -4,6 +4,38 @@ * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT */ +/*yaml + Name: rej_uniform_eta4_aarch64_asm + Description: AArch64 rejection sampling of eta=4 secret coefficients + Signature: uint64_t mld_rej_uniform_eta4_aarch64_asm(int32_t *r, const uint8_t *buf, unsigned buflen, const uint8_t *table) + ABI: + Architecture: aarch64 + CallingConvention: AAPCS64 + x0: + type: buffer + size_bytes: 1024 + permissions: write-only + c_parameter: int32_t *r + description: Output buffer (256 x int32_t) + x1: + type: buffer + size_bytes: x2 + permissions: read-only + c_parameter: const uint8_t *buf + description: Input buffer + x2: + type: scalar + c_parameter: unsigned buflen + description: Length of input buffer (must be a multiple of 8) + test_with: 272 # MLD_AARCH64_REJ_UNIFORM_ETA4_BUFLEN + x3: + type: buffer + size_bytes: 4096 + permissions: read-only + c_parameter: const uint8_t *table + description: Lookup table (4096 x uint8_t) +*/ + #include "../../../common.h" #if defined(MLD_ARITH_BACKEND_AARCH64) && \ !defined(MLD_CONFIG_NO_KEYPAIR_API) && \ diff --git a/dev/aarch64_opt/src/intt_aarch64_asm.S b/dev/aarch64_opt/src/intt_aarch64_asm.S index 1efdc6dd0..657866653 100644 --- a/dev/aarch64_opt/src/intt_aarch64_asm.S +++ b/dev/aarch64_opt/src/intt_aarch64_asm.S @@ -27,6 +27,33 @@ /* AArch64 ML-DSA inverse NTT following @[NeonNTT], @[SLOTHY_Paper], and @[NeonNTT_Autoformalised] */ +/*yaml + Name: intt_aarch64_asm + Description: AArch64 ML-DSA inverse NTT + Signature: void mld_intt_aarch64_asm(int32_t *r, const int32_t *zetas_l78, const int32_t *zetas_l123456) + ABI: + Architecture: aarch64 + CallingConvention: AAPCS64 + x0: + type: buffer + size_bytes: 1024 + permissions: read/write + c_parameter: int32_t *r + description: Input/output polynomial (256 x int32_t) + x1: + type: buffer + size_bytes: 1536 + permissions: read-only + c_parameter: const int32_t *zetas_l78 + description: Twiddle factors for layers 7-8 (384 x int32_t) + x2: + type: buffer + size_bytes: 640 + permissions: read-only + c_parameter: const int32_t *zetas_l123456 + description: Twiddle factors for layers 1-6 (160 x int32_t) +*/ + #include "../../../common.h" #if defined(MLD_ARITH_BACKEND_AARCH64) && !defined(MLD_CONFIG_MULTILEVEL_NO_SHARED) /* simpasm: header-end */ diff --git a/dev/aarch64_opt/src/mld_polyvecl_pointwise_acc_montgomery_l4_aarch64_asm.S b/dev/aarch64_opt/src/mld_polyvecl_pointwise_acc_montgomery_l4_aarch64_asm.S index 540692ff2..a4df0f6bc 100644 --- a/dev/aarch64_opt/src/mld_polyvecl_pointwise_acc_montgomery_l4_aarch64_asm.S +++ b/dev/aarch64_opt/src/mld_polyvecl_pointwise_acc_montgomery_l4_aarch64_asm.S @@ -2,6 +2,33 @@ * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT */ +/*yaml + Name: polyvecl_pointwise_acc_montgomery_l4_aarch64_asm + Description: AArch64 pointwise multiply-accumulate of length-4 polynomial vectors + Signature: void mld_polyvecl_pointwise_acc_montgomery_l4_aarch64_asm(int32_t *r, const int32_t a[4][256], const int32_t b[4][256]) + ABI: + Architecture: aarch64 + CallingConvention: AAPCS64 + x0: + type: buffer + size_bytes: 1024 + permissions: write-only + c_parameter: int32_t *r + description: Output polynomial (256 x int32_t) + x1: + type: buffer + size_bytes: 4096 + permissions: read-only + c_parameter: const int32_t a[4][256] + description: Input polynomial vector a (4 x 256 x int32_t) + x2: + type: buffer + size_bytes: 4096 + permissions: read-only + c_parameter: const int32_t b[4][256] + description: Input polynomial vector b (4 x 256 x int32_t) +*/ + #include "../../../common.h" #if defined(MLD_ARITH_BACKEND_AARCH64) && !defined(MLD_CONFIG_MULTILEVEL_NO_SHARED) && \ (defined(MLD_CONFIG_MULTILEVEL_WITH_SHARED) || MLDSA_L == 4) diff --git a/dev/aarch64_opt/src/mld_polyvecl_pointwise_acc_montgomery_l5_aarch64_asm.S b/dev/aarch64_opt/src/mld_polyvecl_pointwise_acc_montgomery_l5_aarch64_asm.S index a68a66f5b..8d02c19fe 100644 --- a/dev/aarch64_opt/src/mld_polyvecl_pointwise_acc_montgomery_l5_aarch64_asm.S +++ b/dev/aarch64_opt/src/mld_polyvecl_pointwise_acc_montgomery_l5_aarch64_asm.S @@ -2,6 +2,33 @@ * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT */ +/*yaml + Name: polyvecl_pointwise_acc_montgomery_l5_aarch64_asm + Description: AArch64 pointwise multiply-accumulate of length-5 polynomial vectors + Signature: void mld_polyvecl_pointwise_acc_montgomery_l5_aarch64_asm(int32_t *r, const int32_t a[5][256], const int32_t b[5][256]) + ABI: + Architecture: aarch64 + CallingConvention: AAPCS64 + x0: + type: buffer + size_bytes: 1024 + permissions: write-only + c_parameter: int32_t *r + description: Output polynomial (256 x int32_t) + x1: + type: buffer + size_bytes: 5120 + permissions: read-only + c_parameter: const int32_t a[5][256] + description: Input polynomial vector a (5 x 256 x int32_t) + x2: + type: buffer + size_bytes: 5120 + permissions: read-only + c_parameter: const int32_t b[5][256] + description: Input polynomial vector b (5 x 256 x int32_t) +*/ + #include "../../../common.h" #if defined(MLD_ARITH_BACKEND_AARCH64) && !defined(MLD_CONFIG_MULTILEVEL_NO_SHARED) && \ (defined(MLD_CONFIG_MULTILEVEL_WITH_SHARED) || MLDSA_L == 5) diff --git a/dev/aarch64_opt/src/mld_polyvecl_pointwise_acc_montgomery_l7_aarch64_asm.S b/dev/aarch64_opt/src/mld_polyvecl_pointwise_acc_montgomery_l7_aarch64_asm.S index 8260d5606..9f9f78c3c 100644 --- a/dev/aarch64_opt/src/mld_polyvecl_pointwise_acc_montgomery_l7_aarch64_asm.S +++ b/dev/aarch64_opt/src/mld_polyvecl_pointwise_acc_montgomery_l7_aarch64_asm.S @@ -2,6 +2,33 @@ * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT */ +/*yaml + Name: polyvecl_pointwise_acc_montgomery_l7_aarch64_asm + Description: AArch64 pointwise multiply-accumulate of length-7 polynomial vectors + Signature: void mld_polyvecl_pointwise_acc_montgomery_l7_aarch64_asm(int32_t *r, const int32_t a[7][256], const int32_t b[7][256]) + ABI: + Architecture: aarch64 + CallingConvention: AAPCS64 + x0: + type: buffer + size_bytes: 1024 + permissions: write-only + c_parameter: int32_t *r + description: Output polynomial (256 x int32_t) + x1: + type: buffer + size_bytes: 7168 + permissions: read-only + c_parameter: const int32_t a[7][256] + description: Input polynomial vector a (7 x 256 x int32_t) + x2: + type: buffer + size_bytes: 7168 + permissions: read-only + c_parameter: const int32_t b[7][256] + description: Input polynomial vector b (7 x 256 x int32_t) +*/ + #include "../../../common.h" #if defined(MLD_ARITH_BACKEND_AARCH64) && !defined(MLD_CONFIG_MULTILEVEL_NO_SHARED) && \ (defined(MLD_CONFIG_MULTILEVEL_WITH_SHARED) || MLDSA_L == 7) diff --git a/dev/aarch64_opt/src/ntt_aarch64_asm.S b/dev/aarch64_opt/src/ntt_aarch64_asm.S index 2e7019b79..3c9dab725 100644 --- a/dev/aarch64_opt/src/ntt_aarch64_asm.S +++ b/dev/aarch64_opt/src/ntt_aarch64_asm.S @@ -27,6 +27,33 @@ /* AArch64 ML-DSA forward NTT following @[NeonNTT], @[SLOTHY_Paper], and @[NeonNTT_Autoformalised] */ +/*yaml + Name: ntt_aarch64_asm + Description: AArch64 ML-DSA forward NTT + Signature: void mld_ntt_aarch64_asm(int32_t *r, const int32_t *zetas_l123456, const int32_t *zetas_l78) + ABI: + Architecture: aarch64 + CallingConvention: AAPCS64 + x0: + type: buffer + size_bytes: 1024 + permissions: read/write + c_parameter: int32_t *r + description: Input/output polynomial (256 x int32_t) + x1: + type: buffer + size_bytes: 576 + permissions: read-only + c_parameter: const int32_t *zetas_l123456 + description: Twiddle factors for layers 1-6 (144 x int32_t) + x2: + type: buffer + size_bytes: 1536 + permissions: read-only + c_parameter: const int32_t *zetas_l78 + description: Twiddle factors for layers 7-8 (384 x int32_t) +*/ + #include "../../../common.h" #if defined(MLD_ARITH_BACKEND_AARCH64) && !defined(MLD_CONFIG_MULTILEVEL_NO_SHARED) /* simpasm: header-end */ diff --git a/dev/aarch64_opt/src/pointwise_montgomery_aarch64_asm.S b/dev/aarch64_opt/src/pointwise_montgomery_aarch64_asm.S index 53cf9537c..dcce1fec8 100644 --- a/dev/aarch64_opt/src/pointwise_montgomery_aarch64_asm.S +++ b/dev/aarch64_opt/src/pointwise_montgomery_aarch64_asm.S @@ -2,6 +2,27 @@ * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT */ +/*yaml + Name: poly_pointwise_montgomery_aarch64_asm + Description: AArch64 pointwise Montgomery multiplication of two polynomials + Signature: void mld_poly_pointwise_montgomery_aarch64_asm(int32_t *a, const int32_t *b) + ABI: + Architecture: aarch64 + CallingConvention: AAPCS64 + x0: + type: buffer + size_bytes: 1024 + permissions: read/write + c_parameter: int32_t *a + description: Input/output polynomial (256 x int32_t) + x1: + type: buffer + size_bytes: 1024 + permissions: read-only + c_parameter: const int32_t *b + description: Input polynomial (256 x int32_t) +*/ + #include "../../../common.h" #if defined(MLD_ARITH_BACKEND_AARCH64) && \ (!defined(MLD_CONFIG_NO_SIGN_API) || !defined(MLD_CONFIG_NO_VERIFY_API) || \ diff --git a/dev/aarch64_opt/src/poly_caddq_aarch64_asm.S b/dev/aarch64_opt/src/poly_caddq_aarch64_asm.S index 108475378..e28bab108 100644 --- a/dev/aarch64_opt/src/poly_caddq_aarch64_asm.S +++ b/dev/aarch64_opt/src/poly_caddq_aarch64_asm.S @@ -2,6 +2,21 @@ * Copyright (c) The mldsa-native project authors * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT */ +/*yaml + Name: poly_caddq_aarch64_asm + Description: AArch64 conditional addition of q to each coefficient + Signature: void mld_poly_caddq_aarch64_asm(int32_t *a) + ABI: + Architecture: aarch64 + CallingConvention: AAPCS64 + x0: + type: buffer + size_bytes: 1024 + permissions: read/write + c_parameter: int32_t *a + description: Input/output polynomial (256 x int32_t) +*/ + #include "../../../common.h" #if defined(MLD_ARITH_BACKEND_AARCH64) && !defined(MLD_CONFIG_MULTILEVEL_NO_SHARED) diff --git a/dev/aarch64_opt/src/poly_chknorm_aarch64_asm.S b/dev/aarch64_opt/src/poly_chknorm_aarch64_asm.S index a1491b2ca..1d4b7c249 100644 --- a/dev/aarch64_opt/src/poly_chknorm_aarch64_asm.S +++ b/dev/aarch64_opt/src/poly_chknorm_aarch64_asm.S @@ -2,6 +2,26 @@ * Copyright (c) The mldsa-native project authors * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT */ +/*yaml + Name: poly_chknorm_aarch64_asm + Description: AArch64 infinity-norm bound check on polynomial coefficients + Signature: int mld_poly_chknorm_aarch64_asm(const int32_t *a, int32_t B) + ABI: + Architecture: aarch64 + CallingConvention: AAPCS64 + x0: + type: buffer + size_bytes: 1024 + permissions: read-only + c_parameter: const int32_t *a + description: Input polynomial (256 x int32_t) + x1: + type: scalar + c_parameter: int32_t B + description: Norm bound + test_with: 131072 # representative non-negative bound (1 << 17) +*/ + #include "../../../common.h" #if defined(MLD_ARITH_BACKEND_AARCH64) && !defined(MLD_CONFIG_MULTILEVEL_NO_SHARED) diff --git a/dev/aarch64_opt/src/poly_decompose_32_aarch64_asm.S b/dev/aarch64_opt/src/poly_decompose_32_aarch64_asm.S index 1096e3eae..13d8b93aa 100644 --- a/dev/aarch64_opt/src/poly_decompose_32_aarch64_asm.S +++ b/dev/aarch64_opt/src/poly_decompose_32_aarch64_asm.S @@ -2,6 +2,27 @@ * Copyright (c) The mldsa-native project authors * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT */ +/*yaml + Name: poly_decompose_32_aarch64_asm + Description: AArch64 coefficient decomposition (alpha = (Q-1)/32) + Signature: void mld_poly_decompose_32_aarch64_asm(int32_t *a1, int32_t *a0) + ABI: + Architecture: aarch64 + CallingConvention: AAPCS64 + x0: + type: buffer + size_bytes: 1024 + permissions: write-only + c_parameter: int32_t *a1 + description: Output high-part polynomial (256 x int32_t) + x1: + type: buffer + size_bytes: 1024 + permissions: read/write + c_parameter: int32_t *a0 + description: Input polynomial / output low-part (256 x int32_t) +*/ + #include "../../../common.h" #if defined(MLD_ARITH_BACKEND_AARCH64) && !defined(MLD_CONFIG_NO_SIGN_API) && !defined(MLD_CONFIG_MULTILEVEL_NO_SHARED) && \ diff --git a/dev/aarch64_opt/src/poly_decompose_88_aarch64_asm.S b/dev/aarch64_opt/src/poly_decompose_88_aarch64_asm.S index 94568eaaa..bfbbd0c78 100644 --- a/dev/aarch64_opt/src/poly_decompose_88_aarch64_asm.S +++ b/dev/aarch64_opt/src/poly_decompose_88_aarch64_asm.S @@ -2,6 +2,27 @@ * Copyright (c) The mldsa-native project authors * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT */ +/*yaml + Name: poly_decompose_88_aarch64_asm + Description: AArch64 coefficient decomposition (alpha = (Q-1)/88) + Signature: void mld_poly_decompose_88_aarch64_asm(int32_t *a1, int32_t *a0) + ABI: + Architecture: aarch64 + CallingConvention: AAPCS64 + x0: + type: buffer + size_bytes: 1024 + permissions: write-only + c_parameter: int32_t *a1 + description: Output high-part polynomial (256 x int32_t) + x1: + type: buffer + size_bytes: 1024 + permissions: read/write + c_parameter: int32_t *a0 + description: Input polynomial / output low-part (256 x int32_t) +*/ + #include "../../../common.h" #if defined(MLD_ARITH_BACKEND_AARCH64) && !defined(MLD_CONFIG_NO_SIGN_API) && !defined(MLD_CONFIG_MULTILEVEL_NO_SHARED) && \ diff --git a/dev/aarch64_opt/src/poly_use_hint_32_aarch64_asm.S b/dev/aarch64_opt/src/poly_use_hint_32_aarch64_asm.S index d772d60cb..b630bfbf7 100644 --- a/dev/aarch64_opt/src/poly_use_hint_32_aarch64_asm.S +++ b/dev/aarch64_opt/src/poly_use_hint_32_aarch64_asm.S @@ -2,6 +2,27 @@ * Copyright (c) The mldsa-native project authors * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT */ +/*yaml + Name: poly_use_hint_32_aarch64_asm + Description: AArch64 hint application (alpha = (Q-1)/32) + Signature: void mld_poly_use_hint_32_aarch64_asm(int32_t *a, const int32_t *h) + ABI: + Architecture: aarch64 + CallingConvention: AAPCS64 + x0: + type: buffer + size_bytes: 1024 + permissions: read/write + c_parameter: int32_t *a + description: Input/output polynomial (256 x int32_t) + x1: + type: buffer + size_bytes: 1024 + permissions: read-only + c_parameter: const int32_t *h + description: Hint polynomial (256 x int32_t) +*/ + #include "../../../common.h" #if defined(MLD_ARITH_BACKEND_AARCH64) && !defined(MLD_CONFIG_NO_VERIFY_API) && !defined(MLD_CONFIG_MULTILEVEL_NO_SHARED) && \ diff --git a/dev/aarch64_opt/src/poly_use_hint_88_aarch64_asm.S b/dev/aarch64_opt/src/poly_use_hint_88_aarch64_asm.S index 28470e5c5..31252699b 100644 --- a/dev/aarch64_opt/src/poly_use_hint_88_aarch64_asm.S +++ b/dev/aarch64_opt/src/poly_use_hint_88_aarch64_asm.S @@ -2,6 +2,27 @@ * Copyright (c) The mldsa-native project authors * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT */ +/*yaml + Name: poly_use_hint_88_aarch64_asm + Description: AArch64 hint application (alpha = (Q-1)/88) + Signature: void mld_poly_use_hint_88_aarch64_asm(int32_t *a, const int32_t *h) + ABI: + Architecture: aarch64 + CallingConvention: AAPCS64 + x0: + type: buffer + size_bytes: 1024 + permissions: read/write + c_parameter: int32_t *a + description: Input/output polynomial (256 x int32_t) + x1: + type: buffer + size_bytes: 1024 + permissions: read-only + c_parameter: const int32_t *h + description: Hint polynomial (256 x int32_t) +*/ + #include "../../../common.h" #if defined(MLD_ARITH_BACKEND_AARCH64) && !defined(MLD_CONFIG_NO_VERIFY_API) && !defined(MLD_CONFIG_MULTILEVEL_NO_SHARED) && \ diff --git a/dev/aarch64_opt/src/polyz_unpack_17_aarch64_asm.S b/dev/aarch64_opt/src/polyz_unpack_17_aarch64_asm.S index 7856fd6bd..8e15263b1 100644 --- a/dev/aarch64_opt/src/polyz_unpack_17_aarch64_asm.S +++ b/dev/aarch64_opt/src/polyz_unpack_17_aarch64_asm.S @@ -4,6 +4,33 @@ * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT */ +/*yaml + Name: polyz_unpack_17_aarch64_asm + Description: AArch64 unpacking of 17-bit packed coefficients + Signature: void mld_polyz_unpack_17_aarch64_asm(int32_t *r, const uint8_t *buf, const uint8_t *indices) + ABI: + Architecture: aarch64 + CallingConvention: AAPCS64 + x0: + type: buffer + size_bytes: 1024 + permissions: write-only + c_parameter: int32_t *r + description: Output polynomial (256 x int32_t) + x1: + type: buffer + size_bytes: 576 + permissions: read-only + c_parameter: const uint8_t *buf + description: Packed input bytes + x2: + type: buffer + size_bytes: 64 + permissions: read-only + c_parameter: const uint8_t *indices + description: Permutation index table (64 x uint8_t) +*/ + #include "../../../common.h" #if defined(MLD_ARITH_BACKEND_AARCH64) && \ (!defined(MLD_CONFIG_NO_SIGN_API) || !defined(MLD_CONFIG_NO_VERIFY_API)) && \ diff --git a/dev/aarch64_opt/src/polyz_unpack_19_aarch64_asm.S b/dev/aarch64_opt/src/polyz_unpack_19_aarch64_asm.S index a82c991f9..3a7eee6da 100644 --- a/dev/aarch64_opt/src/polyz_unpack_19_aarch64_asm.S +++ b/dev/aarch64_opt/src/polyz_unpack_19_aarch64_asm.S @@ -4,6 +4,33 @@ * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT */ +/*yaml + Name: polyz_unpack_19_aarch64_asm + Description: AArch64 unpacking of 19-bit packed coefficients + Signature: void mld_polyz_unpack_19_aarch64_asm(int32_t *r, const uint8_t *buf, const uint8_t *indices) + ABI: + Architecture: aarch64 + CallingConvention: AAPCS64 + x0: + type: buffer + size_bytes: 1024 + permissions: write-only + c_parameter: int32_t *r + description: Output polynomial (256 x int32_t) + x1: + type: buffer + size_bytes: 640 + permissions: read-only + c_parameter: const uint8_t *buf + description: Packed input bytes + x2: + type: buffer + size_bytes: 64 + permissions: read-only + c_parameter: const uint8_t *indices + description: Permutation index table (64 x uint8_t) +*/ + #include "../../../common.h" #if defined(MLD_ARITH_BACKEND_AARCH64) && \ (!defined(MLD_CONFIG_NO_SIGN_API) || !defined(MLD_CONFIG_NO_VERIFY_API)) && \ diff --git a/dev/aarch64_opt/src/rej_uniform_aarch64_asm.S b/dev/aarch64_opt/src/rej_uniform_aarch64_asm.S index 3c49998dd..752b1f4ca 100644 --- a/dev/aarch64_opt/src/rej_uniform_aarch64_asm.S +++ b/dev/aarch64_opt/src/rej_uniform_aarch64_asm.S @@ -4,6 +4,38 @@ * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT */ +/*yaml + Name: rej_uniform_aarch64_asm + Description: AArch64 rejection sampling of uniform coefficients mod q + Signature: uint64_t mld_rej_uniform_aarch64_asm(int32_t *r, const uint8_t *buf, unsigned buflen, const uint8_t *table) + ABI: + Architecture: aarch64 + CallingConvention: AAPCS64 + x0: + type: buffer + size_bytes: 1024 + permissions: write-only + c_parameter: int32_t *r + description: Output buffer (256 x int32_t) + x1: + type: buffer + size_bytes: x2 + permissions: read-only + c_parameter: const uint8_t *buf + description: Input buffer + x2: + type: scalar + c_parameter: unsigned buflen + description: Length of input buffer (must be a multiple of 24) + test_with: 840 # MLD_POLY_UNIFORM_NBLOCKS * SHAKE128_RATE = 5 * 168 + x3: + type: buffer + size_bytes: 256 + permissions: read-only + c_parameter: const uint8_t *table + description: Lookup table (256 x uint8_t) +*/ + #include "../../../common.h" #if defined(MLD_ARITH_BACKEND_AARCH64) && \ !defined(MLD_CONFIG_MULTILEVEL_NO_SHARED) diff --git a/dev/aarch64_opt/src/rej_uniform_eta2_aarch64_asm.S b/dev/aarch64_opt/src/rej_uniform_eta2_aarch64_asm.S index f48b8383e..00e63a8fb 100644 --- a/dev/aarch64_opt/src/rej_uniform_eta2_aarch64_asm.S +++ b/dev/aarch64_opt/src/rej_uniform_eta2_aarch64_asm.S @@ -4,6 +4,38 @@ * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT */ +/*yaml + Name: rej_uniform_eta2_aarch64_asm + Description: AArch64 rejection sampling of eta=2 secret coefficients + Signature: uint64_t mld_rej_uniform_eta2_aarch64_asm(int32_t *r, const uint8_t *buf, unsigned buflen, const uint8_t *table) + ABI: + Architecture: aarch64 + CallingConvention: AAPCS64 + x0: + type: buffer + size_bytes: 1024 + permissions: write-only + c_parameter: int32_t *r + description: Output buffer (256 x int32_t) + x1: + type: buffer + size_bytes: x2 + permissions: read-only + c_parameter: const uint8_t *buf + description: Input buffer + x2: + type: scalar + c_parameter: unsigned buflen + description: Length of input buffer (must be a multiple of 8) + test_with: 136 # MLD_AARCH64_REJ_UNIFORM_ETA2_BUFLEN + x3: + type: buffer + size_bytes: 4096 + permissions: read-only + c_parameter: const uint8_t *table + description: Lookup table (4096 x uint8_t) +*/ + #include "../../../common.h" #if defined(MLD_ARITH_BACKEND_AARCH64) && \ !defined(MLD_CONFIG_NO_KEYPAIR_API) && \ diff --git a/dev/aarch64_opt/src/rej_uniform_eta4_aarch64_asm.S b/dev/aarch64_opt/src/rej_uniform_eta4_aarch64_asm.S index ae04425c4..bb053c5ee 100644 --- a/dev/aarch64_opt/src/rej_uniform_eta4_aarch64_asm.S +++ b/dev/aarch64_opt/src/rej_uniform_eta4_aarch64_asm.S @@ -4,6 +4,38 @@ * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT */ +/*yaml + Name: rej_uniform_eta4_aarch64_asm + Description: AArch64 rejection sampling of eta=4 secret coefficients + Signature: uint64_t mld_rej_uniform_eta4_aarch64_asm(int32_t *r, const uint8_t *buf, unsigned buflen, const uint8_t *table) + ABI: + Architecture: aarch64 + CallingConvention: AAPCS64 + x0: + type: buffer + size_bytes: 1024 + permissions: write-only + c_parameter: int32_t *r + description: Output buffer (256 x int32_t) + x1: + type: buffer + size_bytes: x2 + permissions: read-only + c_parameter: const uint8_t *buf + description: Input buffer + x2: + type: scalar + c_parameter: unsigned buflen + description: Length of input buffer (must be a multiple of 8) + test_with: 272 # MLD_AARCH64_REJ_UNIFORM_ETA4_BUFLEN + x3: + type: buffer + size_bytes: 4096 + permissions: read-only + c_parameter: const uint8_t *table + description: Lookup table (4096 x uint8_t) +*/ + #include "../../../common.h" #if defined(MLD_ARITH_BACKEND_AARCH64) && \ !defined(MLD_CONFIG_NO_KEYPAIR_API) && \ diff --git a/dev/fips202/aarch64/src/keccak_f1600_x1_scalar_aarch64_asm.S b/dev/fips202/aarch64/src/keccak_f1600_x1_scalar_aarch64_asm.S index 3525803b3..ed8d9220e 100644 --- a/dev/fips202/aarch64/src/keccak_f1600_x1_scalar_aarch64_asm.S +++ b/dev/fips202/aarch64/src/keccak_f1600_x1_scalar_aarch64_asm.S @@ -10,10 +10,12 @@ // Author: Matthias Kannwischer /*yaml - Name: keccak_f1600_x1_scalar_asm + Name: keccak_f1600_x1_scalar_aarch64_asm Description: AArch64 scalar implementation of Keccak-f[1600] permutation for single state Signature: void mld_keccak_f1600_x1_scalar_aarch64_asm(uint64_t state[25], const uint64_t rc[24]) ABI: + Architecture: aarch64 + CallingConvention: AAPCS64 x0: type: buffer size_bytes: 200 diff --git a/dev/fips202/aarch64/src/keccak_f1600_x1_v84a_aarch64_asm.S b/dev/fips202/aarch64/src/keccak_f1600_x1_v84a_aarch64_asm.S index eb1958841..8244f165e 100644 --- a/dev/fips202/aarch64/src/keccak_f1600_x1_v84a_aarch64_asm.S +++ b/dev/fips202/aarch64/src/keccak_f1600_x1_v84a_aarch64_asm.S @@ -16,10 +16,13 @@ */ /*yaml - Name: keccak_f1600_x1_v84a_asm + Name: keccak_f1600_x1_v84a_aarch64_asm Description: AArch64 ARMv8.4-A implementation of Keccak-f[1600] permutation for single state Signature: void mld_keccak_f1600_x1_v84a_aarch64_asm(uint64_t state[25], const uint64_t rc[24]) ABI: + Architecture: aarch64 + CallingConvention: AAPCS64 + Features: [SHA3] x0: type: buffer size_bytes: 200 @@ -30,7 +33,7 @@ type: buffer size_bytes: 192 permissions: read-only - c_parameter: const uint64_t rc[24] + c_parameter: uint64_t const *rc description: Round constants (24 x uint64_t) Stack: bytes: 64 diff --git a/dev/fips202/aarch64/src/keccak_f1600_x2_v84a_aarch64_asm.S b/dev/fips202/aarch64/src/keccak_f1600_x2_v84a_aarch64_asm.S index 4dadd927d..36db82d3e 100644 --- a/dev/fips202/aarch64/src/keccak_f1600_x2_v84a_aarch64_asm.S +++ b/dev/fips202/aarch64/src/keccak_f1600_x2_v84a_aarch64_asm.S @@ -16,10 +16,13 @@ */ /*yaml - Name: keccak_f1600_x2_v84a_asm + Name: keccak_f1600_x2_v84a_aarch64_asm Description: AArch64 ARMv8.4-A implementation of Keccak-f[1600] permutation for two sequential states Signature: void mld_keccak_f1600_x2_v84a_aarch64_asm(uint64_t state[50], const uint64_t rc[24]) ABI: + Architecture: aarch64 + CallingConvention: AAPCS64 + Features: [SHA3] x0: type: buffer size_bytes: 400 @@ -30,7 +33,7 @@ type: buffer size_bytes: 192 permissions: read-only - c_parameter: const uint64_t rc[24] + c_parameter: uint64_t const *rc description: Round constants (24 x uint64_t) Stack: bytes: 64 diff --git a/dev/fips202/aarch64/src/keccak_f1600_x4_v8a_scalar_hybrid_aarch64_asm.S b/dev/fips202/aarch64/src/keccak_f1600_x4_v8a_scalar_hybrid_aarch64_asm.S index 5d20665e3..b5b105c27 100644 --- a/dev/fips202/aarch64/src/keccak_f1600_x4_v8a_scalar_hybrid_aarch64_asm.S +++ b/dev/fips202/aarch64/src/keccak_f1600_x4_v8a_scalar_hybrid_aarch64_asm.S @@ -10,10 +10,12 @@ // Author: Matthias Kannwischer /*yaml - Name: keccak_f1600_x4_v8a_scalar_hybrid_asm + Name: keccak_f1600_x4_v8a_scalar_hybrid_aarch64_asm Description: AArch64 hybrid scalar/vector implementation of Keccak-f[1600] permutation for four sequential states Signature: void mld_keccak_f1600_x4_v8a_scalar_hybrid_aarch64_asm(uint64_t state[100], const uint64_t rc[24]) ABI: + Architecture: aarch64 + CallingConvention: AAPCS64 x0: type: buffer size_bytes: 800 @@ -24,7 +26,7 @@ type: buffer size_bytes: 192 permissions: read-only - c_parameter: const uint64_t rc[24] + c_parameter: uint64_t const *rc description: Round constants (24 x uint64_t) Stack: bytes: 224 diff --git a/dev/fips202/aarch64/src/keccak_f1600_x4_v8a_v84a_scalar_hybrid_aarch64_asm.S b/dev/fips202/aarch64/src/keccak_f1600_x4_v8a_v84a_scalar_hybrid_aarch64_asm.S index 191a997fe..943940f2c 100644 --- a/dev/fips202/aarch64/src/keccak_f1600_x4_v8a_v84a_scalar_hybrid_aarch64_asm.S +++ b/dev/fips202/aarch64/src/keccak_f1600_x4_v8a_v84a_scalar_hybrid_aarch64_asm.S @@ -10,10 +10,13 @@ // Author: Matthias Kannwischer /*yaml - Name: keccak_f1600_x4_v8a_v84a_scalar_hybrid_asm + Name: keccak_f1600_x4_v8a_v84a_scalar_hybrid_aarch64_asm Description: AArch64 hybrid scalar/vector implementation of Keccak-f[1600] permutation for four sequential states with ARMv8.4-A optimizations Signature: void mld_keccak_f1600_x4_v8a_v84a_scalar_hybrid_aarch64_asm(uint64_t state[100], const uint64_t rc[24]) ABI: + Architecture: aarch64 + CallingConvention: AAPCS64 + Features: [SHA3] x0: type: buffer size_bytes: 800 @@ -24,7 +27,7 @@ type: buffer size_bytes: 192 permissions: read-only - c_parameter: const uint64_t rc[24] + c_parameter: uint64_t const *rc description: Round constants (24 x uint64_t) Stack: bytes: 224 diff --git a/dev/fips202/armv81m/src/keccak_f1600_x4_mve.S b/dev/fips202/armv81m/src/keccak_f1600_x4_mve.S index 95b6e7a75..c5109e7db 100644 --- a/dev/fips202/armv81m/src/keccak_f1600_x4_mve.S +++ b/dev/fips202/armv81m/src/keccak_f1600_x4_mve.S @@ -10,6 +10,9 @@ Description: Armv8.1-M MVE implementation of batched (x4) Keccak-f[1600] permutation using bit-interleaved state Signature: void mld_keccak_f1600_x4_mve_asm(void *state, void *tmpstate, const uint32_t *rc) ABI: + Architecture: armv81m + CallingConvention: AAPCS32 + Features: [MVE] r0: type: buffer size_bytes: 800 diff --git a/dev/fips202/x86_64/src/keccak_f1600_x4_avx2_asm.S b/dev/fips202/x86_64/src/keccak_f1600_x4_avx2_asm.S index 06b066f6b..443c1e8f0 100644 --- a/dev/fips202/x86_64/src/keccak_f1600_x4_avx2_asm.S +++ b/dev/fips202/x86_64/src/keccak_f1600_x4_avx2_asm.S @@ -3,6 +3,40 @@ * Copyright (c) The mldsa-native project authors * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT */ +/*yaml + Name: keccak_f1600_x4_avx2_asm + Description: x86_64 AVX2 Keccak-f[1600] permutation for four sequential states + Signature: void mld_keccak_f1600_x4_avx2_asm(uint64_t states[100], const uint64_t rc[24], const uint64_t rho8[4], const uint64_t rho56[4]) + ABI: + Architecture: x86_64 + CallingConvention: SysV + Features: [AVX2] + rdi: + type: buffer + size_bytes: 800 + permissions: read/write + c_parameter: uint64_t states[100] + description: Four sequential Keccak states (4 x 25 x uint64_t) + rsi: + type: buffer + size_bytes: 192 + permissions: read-only + c_parameter: const uint64_t rc[24] + description: Round constants (24 x uint64_t) + rdx: + type: buffer + size_bytes: 32 + permissions: read-only + c_parameter: const uint64_t rho8[4] + description: Rotation constant rho8 (4 x uint64_t) + rcx: + type: buffer + size_bytes: 32 + permissions: read-only + c_parameter: const uint64_t rho56[4] + description: Rotation constant rho56 (4 x uint64_t) +*/ + #include "../../../../common.h" #if defined(MLD_FIPS202_X86_64_NEED_X4_AVX2) && \ diff --git a/dev/x86_64/src/intt_avx2_asm.S b/dev/x86_64/src/intt_avx2_asm.S index 942ed57aa..115f33acc 100644 --- a/dev/x86_64/src/intt_avx2_asm.S +++ b/dev/x86_64/src/intt_avx2_asm.S @@ -18,6 +18,28 @@ * AVX2 Dilithium implementation @[REF_AVX2]. */ +/*yaml + Name: invntt_avx2_asm + Description: x86_64 AVX2 inverse NTT + Signature: void mld_invntt_avx2_asm(int32_t *r, const int32_t *qdata) + ABI: + Architecture: x86_64 + CallingConvention: SysV + Features: [AVX2] + rdi: + type: buffer + size_bytes: 1024 + permissions: read/write + c_parameter: int32_t *r + description: Input/output polynomial (256 x int32_t) + rsi: + type: buffer + size_bytes: 2496 + permissions: read-only + c_parameter: const int32_t *qdata + description: Precomputed constants (624 x int32_t) +*/ + #include "../../../common.h" #if defined(MLD_ARITH_BACKEND_X86_64_DEFAULT) && \ !defined(MLD_CONFIG_MULTILEVEL_NO_SHARED) diff --git a/dev/x86_64/src/ntt_avx2_asm.S b/dev/x86_64/src/ntt_avx2_asm.S index 5ce680c00..07eabf42f 100644 --- a/dev/x86_64/src/ntt_avx2_asm.S +++ b/dev/x86_64/src/ntt_avx2_asm.S @@ -18,6 +18,28 @@ * AVX2 Dilithium implementation @[REF_AVX2]. */ +/*yaml + Name: ntt_avx2_asm + Description: x86_64 AVX2 forward NTT + Signature: void mld_ntt_avx2_asm(int32_t *r, const int32_t *qdata) + ABI: + Architecture: x86_64 + CallingConvention: SysV + Features: [AVX2] + rdi: + type: buffer + size_bytes: 1024 + permissions: read/write + c_parameter: int32_t *r + description: Input/output polynomial (256 x int32_t) + rsi: + type: buffer + size_bytes: 2496 + permissions: read-only + c_parameter: const int32_t *qdata + description: Precomputed constants (624 x int32_t) +*/ + #include "../../../common.h" #if defined(MLD_ARITH_BACKEND_X86_64_DEFAULT) && \ !defined(MLD_CONFIG_MULTILEVEL_NO_SHARED) diff --git a/dev/x86_64/src/nttunpack_avx2_asm.S b/dev/x86_64/src/nttunpack_avx2_asm.S index 4019f8bd3..5389280a3 100644 --- a/dev/x86_64/src/nttunpack_avx2_asm.S +++ b/dev/x86_64/src/nttunpack_avx2_asm.S @@ -18,6 +18,22 @@ * AVX2 Dilithium implementation @[REF_AVX2]. */ +/*yaml + Name: nttunpack_avx2_asm + Description: x86_64 AVX2 NTT coefficient unpacking/permutation + Signature: void mld_nttunpack_avx2_asm(int32_t *r) + ABI: + Architecture: x86_64 + CallingConvention: SysV + Features: [AVX2] + rdi: + type: buffer + size_bytes: 1024 + permissions: read/write + c_parameter: int32_t *r + description: Input/output polynomial (256 x int32_t) +*/ + #include "../../../common.h" #if defined(MLD_ARITH_BACKEND_X86_64_DEFAULT) && \ !defined(MLD_CONFIG_MULTILEVEL_NO_SHARED) diff --git a/dev/x86_64/src/pointwise_acc_l4_avx2_asm.S b/dev/x86_64/src/pointwise_acc_l4_avx2_asm.S index 85249dced..18e8b96e8 100644 --- a/dev/x86_64/src/pointwise_acc_l4_avx2_asm.S +++ b/dev/x86_64/src/pointwise_acc_l4_avx2_asm.S @@ -17,6 +17,40 @@ * AVX2 Dilithium implementation @[REF_AVX2]. */ +/*yaml + Name: pointwise_acc_l4_avx2_asm + Description: x86_64 AVX2 pointwise multiply-accumulate of length-4 polynomial vectors + Signature: void mld_pointwise_acc_l4_avx2_asm(int32_t *c, const int32_t a[4][256], const int32_t b[4][256], const int32_t *qdata) + ABI: + Architecture: x86_64 + CallingConvention: SysV + Features: [AVX2] + rdi: + type: buffer + size_bytes: 1024 + permissions: write-only + c_parameter: int32_t *c + description: Output polynomial (256 x int32_t) + rsi: + type: buffer + size_bytes: 4096 + permissions: read-only + c_parameter: const int32_t a[4][256] + description: Input polynomial vector a (4 x 256 x int32_t) + rdx: + type: buffer + size_bytes: 4096 + permissions: read-only + c_parameter: const int32_t b[4][256] + description: Input polynomial vector b (4 x 256 x int32_t) + rcx: + type: buffer + size_bytes: 2496 + permissions: read-only + c_parameter: const int32_t *qdata + description: Precomputed constants (624 x int32_t) +*/ + #include "../../../common.h" #if defined(MLD_ARITH_BACKEND_X86_64_DEFAULT) && \ !defined(MLD_CONFIG_MULTILEVEL_NO_SHARED) && \ diff --git a/dev/x86_64/src/pointwise_acc_l5_avx2_asm.S b/dev/x86_64/src/pointwise_acc_l5_avx2_asm.S index 181d6fea8..3f0af5471 100644 --- a/dev/x86_64/src/pointwise_acc_l5_avx2_asm.S +++ b/dev/x86_64/src/pointwise_acc_l5_avx2_asm.S @@ -17,6 +17,40 @@ * AVX2 Dilithium implementation @[REF_AVX2]. */ +/*yaml + Name: pointwise_acc_l5_avx2_asm + Description: x86_64 AVX2 pointwise multiply-accumulate of length-5 polynomial vectors + Signature: void mld_pointwise_acc_l5_avx2_asm(int32_t *c, const int32_t a[5][256], const int32_t b[5][256], const int32_t *qdata) + ABI: + Architecture: x86_64 + CallingConvention: SysV + Features: [AVX2] + rdi: + type: buffer + size_bytes: 1024 + permissions: write-only + c_parameter: int32_t *c + description: Output polynomial (256 x int32_t) + rsi: + type: buffer + size_bytes: 5120 + permissions: read-only + c_parameter: const int32_t a[5][256] + description: Input polynomial vector a (5 x 256 x int32_t) + rdx: + type: buffer + size_bytes: 5120 + permissions: read-only + c_parameter: const int32_t b[5][256] + description: Input polynomial vector b (5 x 256 x int32_t) + rcx: + type: buffer + size_bytes: 2496 + permissions: read-only + c_parameter: const int32_t *qdata + description: Precomputed constants (624 x int32_t) +*/ + #include "../../../common.h" #if defined(MLD_ARITH_BACKEND_X86_64_DEFAULT) && \ !defined(MLD_CONFIG_MULTILEVEL_NO_SHARED) && \ diff --git a/dev/x86_64/src/pointwise_acc_l7_avx2_asm.S b/dev/x86_64/src/pointwise_acc_l7_avx2_asm.S index 904dd8dd2..afb860485 100644 --- a/dev/x86_64/src/pointwise_acc_l7_avx2_asm.S +++ b/dev/x86_64/src/pointwise_acc_l7_avx2_asm.S @@ -17,6 +17,40 @@ * AVX2 Dilithium implementation @[REF_AVX2]. */ +/*yaml + Name: pointwise_acc_l7_avx2_asm + Description: x86_64 AVX2 pointwise multiply-accumulate of length-7 polynomial vectors + Signature: void mld_pointwise_acc_l7_avx2_asm(int32_t *c, const int32_t a[7][256], const int32_t b[7][256], const int32_t *qdata) + ABI: + Architecture: x86_64 + CallingConvention: SysV + Features: [AVX2] + rdi: + type: buffer + size_bytes: 1024 + permissions: write-only + c_parameter: int32_t *c + description: Output polynomial (256 x int32_t) + rsi: + type: buffer + size_bytes: 7168 + permissions: read-only + c_parameter: const int32_t a[7][256] + description: Input polynomial vector a (7 x 256 x int32_t) + rdx: + type: buffer + size_bytes: 7168 + permissions: read-only + c_parameter: const int32_t b[7][256] + description: Input polynomial vector b (7 x 256 x int32_t) + rcx: + type: buffer + size_bytes: 2496 + permissions: read-only + c_parameter: const int32_t *qdata + description: Precomputed constants (624 x int32_t) +*/ + #include "../../../common.h" #if defined(MLD_ARITH_BACKEND_X86_64_DEFAULT) && \ !defined(MLD_CONFIG_MULTILEVEL_NO_SHARED) && \ diff --git a/dev/x86_64/src/pointwise_avx2_asm.S b/dev/x86_64/src/pointwise_avx2_asm.S index 057a4fb57..99257ecad 100644 --- a/dev/x86_64/src/pointwise_avx2_asm.S +++ b/dev/x86_64/src/pointwise_avx2_asm.S @@ -17,6 +17,34 @@ * AVX2 Dilithium implementation @[REF_AVX2]. */ +/*yaml + Name: pointwise_avx2_asm + Description: x86_64 AVX2 pointwise Montgomery multiplication + Signature: void mld_pointwise_avx2_asm(int32_t *a, const int32_t *b, const int32_t *qdata) + ABI: + Architecture: x86_64 + CallingConvention: SysV + Features: [AVX2] + rdi: + type: buffer + size_bytes: 1024 + permissions: read/write + c_parameter: int32_t *a + description: Input/output polynomial (256 x int32_t) + rsi: + type: buffer + size_bytes: 1024 + permissions: read-only + c_parameter: const int32_t *b + description: Input polynomial (256 x int32_t) + rdx: + type: buffer + size_bytes: 2496 + permissions: read-only + c_parameter: const int32_t *qdata + description: Precomputed constants (624 x int32_t) +*/ + #include "../../../common.h" #if defined(MLD_ARITH_BACKEND_X86_64_DEFAULT) && \ !defined(MLD_CONFIG_MULTILEVEL_NO_SHARED) diff --git a/dev/x86_64/src/poly_caddq_avx2_asm.S b/dev/x86_64/src/poly_caddq_avx2_asm.S index cc61d792c..acef16e9a 100644 --- a/dev/x86_64/src/poly_caddq_avx2_asm.S +++ b/dev/x86_64/src/poly_caddq_avx2_asm.S @@ -27,6 +27,22 @@ * Arguments: - int32_t *r: pointer to input/output polynomial **************************************************/ +/*yaml + Name: poly_caddq_avx2_asm + Description: x86_64 AVX2 conditional addition of q to each coefficient + Signature: void mld_poly_caddq_avx2_asm(int32_t *r) + ABI: + Architecture: x86_64 + CallingConvention: SysV + Features: [AVX2] + rdi: + type: buffer + size_bytes: 1024 + permissions: read/write + c_parameter: int32_t *r + description: Input/output polynomial (256 x int32_t) +*/ + #include "../../../common.h" #if defined(MLD_ARITH_BACKEND_X86_64_DEFAULT) && \ diff --git a/dev/x86_64/src/poly_chknorm_avx2_asm.S b/dev/x86_64/src/poly_chknorm_avx2_asm.S index 412c89e79..4a5ce9f57 100644 --- a/dev/x86_64/src/poly_chknorm_avx2_asm.S +++ b/dev/x86_64/src/poly_chknorm_avx2_asm.S @@ -31,6 +31,27 @@ * Returns: - 1 if any |coefficient| >= B, 0 otherwise. **************************************************/ +/*yaml + Name: poly_chknorm_avx2_asm + Description: x86_64 AVX2 infinity-norm bound check on polynomial coefficients + Signature: int mld_poly_chknorm_avx2_asm(const int32_t *a, int32_t B) + ABI: + Architecture: x86_64 + CallingConvention: SysV + Features: [AVX2] + rdi: + type: buffer + size_bytes: 1024 + permissions: read-only + c_parameter: const int32_t *a + description: Input polynomial (256 x int32_t) + rsi: + type: scalar + c_parameter: int32_t B + description: Norm bound (must be non-negative) + test_with: 131072 # representative non-negative bound (1 << 17) +*/ + #include "../../../common.h" #if defined(MLD_ARITH_BACKEND_X86_64_DEFAULT) && \ diff --git a/dev/x86_64/src/polyz_unpack_17_avx2_asm.S b/dev/x86_64/src/polyz_unpack_17_avx2_asm.S index dd32c1bac..aa0176689 100644 --- a/dev/x86_64/src/polyz_unpack_17_avx2_asm.S +++ b/dev/x86_64/src/polyz_unpack_17_avx2_asm.S @@ -29,6 +29,28 @@ * - const uint8_t *a: pointer to packed input (576 bytes) **************************************************/ +/*yaml + Name: polyz_unpack_17_avx2_asm + Description: x86_64 AVX2 unpacking of 17-bit packed coefficients + Signature: void mld_polyz_unpack_17_avx2_asm(int32_t *r, const uint8_t *a) + ABI: + Architecture: x86_64 + CallingConvention: SysV + Features: [AVX2] + rdi: + type: buffer + size_bytes: 1024 + permissions: write-only + c_parameter: int32_t *r + description: Output polynomial (256 x int32_t) + rsi: + type: buffer + size_bytes: 576 + permissions: read-only + c_parameter: const uint8_t *a + description: Packed input bytes +*/ + #include "../../../common.h" #if defined(MLD_ARITH_BACKEND_X86_64_DEFAULT) && \ diff --git a/dev/x86_64/src/polyz_unpack_19_avx2_asm.S b/dev/x86_64/src/polyz_unpack_19_avx2_asm.S index 2d8d8f529..f6a758320 100644 --- a/dev/x86_64/src/polyz_unpack_19_avx2_asm.S +++ b/dev/x86_64/src/polyz_unpack_19_avx2_asm.S @@ -29,6 +29,28 @@ * - const uint8_t *a: pointer to packed input (640 bytes) **************************************************/ +/*yaml + Name: polyz_unpack_19_avx2_asm + Description: x86_64 AVX2 unpacking of 19-bit packed coefficients + Signature: void mld_polyz_unpack_19_avx2_asm(int32_t *r, const uint8_t *a) + ABI: + Architecture: x86_64 + CallingConvention: SysV + Features: [AVX2] + rdi: + type: buffer + size_bytes: 1024 + permissions: write-only + c_parameter: int32_t *r + description: Output polynomial (256 x int32_t) + rsi: + type: buffer + size_bytes: 640 + permissions: read-only + c_parameter: const uint8_t *a + description: Packed input bytes +*/ + #include "../../../common.h" #if defined(MLD_ARITH_BACKEND_X86_64_DEFAULT) && \ diff --git a/mldsa/mldsa_native.h b/mldsa/mldsa_native.h index 4310f2aae..1cb25ca45 100644 --- a/mldsa/mldsa_native.h +++ b/mldsa/mldsa_native.h @@ -444,6 +444,9 @@ int MLD_API_NAMESPACE(signature_extmu)( * MLD_CONFIG_CONTEXT_PARAMETER is defined; type set by * MLD_CONFIG_CONTEXT_PARAMETER_TYPE. * + * On failure, *smlen is set to 0. No partial signed-message output is + * preserved, but if sm == m, the input message bytes remain unchanged. + * * @retval 0 Success. * @retval MLD_ERR_OUT_OF_MEMORY MLD_CONFIG_CUSTOM_ALLOC_FREE was * used and an allocation via diff --git a/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x1_scalar_aarch64_asm.S b/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x1_scalar_aarch64_asm.S index 219205d55..427e47dd6 100644 --- a/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x1_scalar_aarch64_asm.S +++ b/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x1_scalar_aarch64_asm.S @@ -10,10 +10,12 @@ // Author: Matthias Kannwischer /*yaml - Name: keccak_f1600_x1_scalar_asm + Name: keccak_f1600_x1_scalar_aarch64_asm Description: AArch64 scalar implementation of Keccak-f[1600] permutation for single state Signature: void mld_keccak_f1600_x1_scalar_aarch64_asm(uint64_t state[25], const uint64_t rc[24]) ABI: + Architecture: aarch64 + CallingConvention: AAPCS64 x0: type: buffer size_bytes: 200 diff --git a/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x1_v84a_aarch64_asm.S b/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x1_v84a_aarch64_asm.S index aadeec0b7..665e46319 100644 --- a/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x1_v84a_aarch64_asm.S +++ b/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x1_v84a_aarch64_asm.S @@ -16,10 +16,13 @@ */ /*yaml - Name: keccak_f1600_x1_v84a_asm + Name: keccak_f1600_x1_v84a_aarch64_asm Description: AArch64 ARMv8.4-A implementation of Keccak-f[1600] permutation for single state Signature: void mld_keccak_f1600_x1_v84a_aarch64_asm(uint64_t state[25], const uint64_t rc[24]) ABI: + Architecture: aarch64 + CallingConvention: AAPCS64 + Features: [SHA3] x0: type: buffer size_bytes: 200 @@ -30,7 +33,7 @@ type: buffer size_bytes: 192 permissions: read-only - c_parameter: const uint64_t rc[24] + c_parameter: uint64_t const *rc description: Round constants (24 x uint64_t) Stack: bytes: 64 diff --git a/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x2_v84a_aarch64_asm.S b/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x2_v84a_aarch64_asm.S index b132f5740..9b306dbb9 100644 --- a/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x2_v84a_aarch64_asm.S +++ b/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x2_v84a_aarch64_asm.S @@ -16,10 +16,13 @@ */ /*yaml - Name: keccak_f1600_x2_v84a_asm + Name: keccak_f1600_x2_v84a_aarch64_asm Description: AArch64 ARMv8.4-A implementation of Keccak-f[1600] permutation for two sequential states Signature: void mld_keccak_f1600_x2_v84a_aarch64_asm(uint64_t state[50], const uint64_t rc[24]) ABI: + Architecture: aarch64 + CallingConvention: AAPCS64 + Features: [SHA3] x0: type: buffer size_bytes: 400 @@ -30,7 +33,7 @@ type: buffer size_bytes: 192 permissions: read-only - c_parameter: const uint64_t rc[24] + c_parameter: uint64_t const *rc description: Round constants (24 x uint64_t) Stack: bytes: 64 diff --git a/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x4_v8a_scalar_hybrid_aarch64_asm.S b/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x4_v8a_scalar_hybrid_aarch64_asm.S index 4443e230f..28b2d9732 100644 --- a/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x4_v8a_scalar_hybrid_aarch64_asm.S +++ b/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x4_v8a_scalar_hybrid_aarch64_asm.S @@ -10,10 +10,12 @@ // Author: Matthias Kannwischer /*yaml - Name: keccak_f1600_x4_v8a_scalar_hybrid_asm + Name: keccak_f1600_x4_v8a_scalar_hybrid_aarch64_asm Description: AArch64 hybrid scalar/vector implementation of Keccak-f[1600] permutation for four sequential states Signature: void mld_keccak_f1600_x4_v8a_scalar_hybrid_aarch64_asm(uint64_t state[100], const uint64_t rc[24]) ABI: + Architecture: aarch64 + CallingConvention: AAPCS64 x0: type: buffer size_bytes: 800 @@ -24,7 +26,7 @@ type: buffer size_bytes: 192 permissions: read-only - c_parameter: const uint64_t rc[24] + c_parameter: uint64_t const *rc description: Round constants (24 x uint64_t) Stack: bytes: 224 diff --git a/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x4_v8a_v84a_scalar_hybrid_aarch64_asm.S b/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x4_v8a_v84a_scalar_hybrid_aarch64_asm.S index ff24b6850..96a5e6587 100644 --- a/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x4_v8a_v84a_scalar_hybrid_aarch64_asm.S +++ b/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x4_v8a_v84a_scalar_hybrid_aarch64_asm.S @@ -10,10 +10,13 @@ // Author: Matthias Kannwischer /*yaml - Name: keccak_f1600_x4_v8a_v84a_scalar_hybrid_asm + Name: keccak_f1600_x4_v8a_v84a_scalar_hybrid_aarch64_asm Description: AArch64 hybrid scalar/vector implementation of Keccak-f[1600] permutation for four sequential states with ARMv8.4-A optimizations Signature: void mld_keccak_f1600_x4_v8a_v84a_scalar_hybrid_aarch64_asm(uint64_t state[100], const uint64_t rc[24]) ABI: + Architecture: aarch64 + CallingConvention: AAPCS64 + Features: [SHA3] x0: type: buffer size_bytes: 800 @@ -24,7 +27,7 @@ type: buffer size_bytes: 192 permissions: read-only - c_parameter: const uint64_t rc[24] + c_parameter: uint64_t const *rc description: Round constants (24 x uint64_t) Stack: bytes: 224 diff --git a/mldsa/src/fips202/native/armv81m/src/keccak_f1600_x4_mve.S b/mldsa/src/fips202/native/armv81m/src/keccak_f1600_x4_mve.S index 4e0967c7d..f57dd51a1 100644 --- a/mldsa/src/fips202/native/armv81m/src/keccak_f1600_x4_mve.S +++ b/mldsa/src/fips202/native/armv81m/src/keccak_f1600_x4_mve.S @@ -10,6 +10,9 @@ Description: Armv8.1-M MVE implementation of batched (x4) Keccak-f[1600] permutation using bit-interleaved state Signature: void mld_keccak_f1600_x4_mve_asm(void *state, void *tmpstate, const uint32_t *rc) ABI: + Architecture: armv81m + CallingConvention: AAPCS32 + Features: [MVE] r0: type: buffer size_bytes: 800 diff --git a/mldsa/src/fips202/native/x86_64/src/keccak_f1600_x4_avx2_asm.S b/mldsa/src/fips202/native/x86_64/src/keccak_f1600_x4_avx2_asm.S index fd74227e1..79a577d2c 100644 --- a/mldsa/src/fips202/native/x86_64/src/keccak_f1600_x4_avx2_asm.S +++ b/mldsa/src/fips202/native/x86_64/src/keccak_f1600_x4_avx2_asm.S @@ -3,6 +3,40 @@ * Copyright (c) The mldsa-native project authors * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT */ +/*yaml + Name: keccak_f1600_x4_avx2_asm + Description: x86_64 AVX2 Keccak-f[1600] permutation for four sequential states + Signature: void mld_keccak_f1600_x4_avx2_asm(uint64_t states[100], const uint64_t rc[24], const uint64_t rho8[4], const uint64_t rho56[4]) + ABI: + Architecture: x86_64 + CallingConvention: SysV + Features: [AVX2] + rdi: + type: buffer + size_bytes: 800 + permissions: read/write + c_parameter: uint64_t states[100] + description: Four sequential Keccak states (4 x 25 x uint64_t) + rsi: + type: buffer + size_bytes: 192 + permissions: read-only + c_parameter: const uint64_t rc[24] + description: Round constants (24 x uint64_t) + rdx: + type: buffer + size_bytes: 32 + permissions: read-only + c_parameter: const uint64_t rho8[4] + description: Rotation constant rho8 (4 x uint64_t) + rcx: + type: buffer + size_bytes: 32 + permissions: read-only + c_parameter: const uint64_t rho56[4] + description: Rotation constant rho56 (4 x uint64_t) +*/ + #include "../../../../common.h" #if defined(MLD_FIPS202_X86_64_NEED_X4_AVX2) && \ diff --git a/mldsa/src/native/aarch64/src/intt_aarch64_asm.S b/mldsa/src/native/aarch64/src/intt_aarch64_asm.S index 2cfee205c..e01a0d553 100644 --- a/mldsa/src/native/aarch64/src/intt_aarch64_asm.S +++ b/mldsa/src/native/aarch64/src/intt_aarch64_asm.S @@ -27,6 +27,33 @@ /* AArch64 ML-DSA inverse NTT following @[NeonNTT], @[SLOTHY_Paper], and @[NeonNTT_Autoformalised] */ +/*yaml + Name: intt_aarch64_asm + Description: AArch64 ML-DSA inverse NTT + Signature: void mld_intt_aarch64_asm(int32_t *r, const int32_t *zetas_l78, const int32_t *zetas_l123456) + ABI: + Architecture: aarch64 + CallingConvention: AAPCS64 + x0: + type: buffer + size_bytes: 1024 + permissions: read/write + c_parameter: int32_t *r + description: Input/output polynomial (256 x int32_t) + x1: + type: buffer + size_bytes: 1536 + permissions: read-only + c_parameter: const int32_t *zetas_l78 + description: Twiddle factors for layers 7-8 (384 x int32_t) + x2: + type: buffer + size_bytes: 640 + permissions: read-only + c_parameter: const int32_t *zetas_l123456 + description: Twiddle factors for layers 1-6 (160 x int32_t) +*/ + #include "../../../common.h" #if defined(MLD_ARITH_BACKEND_AARCH64) && !defined(MLD_CONFIG_MULTILEVEL_NO_SHARED) diff --git a/mldsa/src/native/aarch64/src/mld_polyvecl_pointwise_acc_montgomery_l4_aarch64_asm.S b/mldsa/src/native/aarch64/src/mld_polyvecl_pointwise_acc_montgomery_l4_aarch64_asm.S index eaa73631a..cc17d0043 100644 --- a/mldsa/src/native/aarch64/src/mld_polyvecl_pointwise_acc_montgomery_l4_aarch64_asm.S +++ b/mldsa/src/native/aarch64/src/mld_polyvecl_pointwise_acc_montgomery_l4_aarch64_asm.S @@ -2,6 +2,33 @@ * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT */ +/*yaml + Name: polyvecl_pointwise_acc_montgomery_l4_aarch64_asm + Description: AArch64 pointwise multiply-accumulate of length-4 polynomial vectors + Signature: void mld_polyvecl_pointwise_acc_montgomery_l4_aarch64_asm(int32_t *r, const int32_t a[4][256], const int32_t b[4][256]) + ABI: + Architecture: aarch64 + CallingConvention: AAPCS64 + x0: + type: buffer + size_bytes: 1024 + permissions: write-only + c_parameter: int32_t *r + description: Output polynomial (256 x int32_t) + x1: + type: buffer + size_bytes: 4096 + permissions: read-only + c_parameter: const int32_t a[4][256] + description: Input polynomial vector a (4 x 256 x int32_t) + x2: + type: buffer + size_bytes: 4096 + permissions: read-only + c_parameter: const int32_t b[4][256] + description: Input polynomial vector b (4 x 256 x int32_t) +*/ + #include "../../../common.h" #if defined(MLD_ARITH_BACKEND_AARCH64) && !defined(MLD_CONFIG_MULTILEVEL_NO_SHARED) && \ (defined(MLD_CONFIG_MULTILEVEL_WITH_SHARED) || MLDSA_L == 4) diff --git a/mldsa/src/native/aarch64/src/mld_polyvecl_pointwise_acc_montgomery_l5_aarch64_asm.S b/mldsa/src/native/aarch64/src/mld_polyvecl_pointwise_acc_montgomery_l5_aarch64_asm.S index 6a86b168e..223c6563e 100644 --- a/mldsa/src/native/aarch64/src/mld_polyvecl_pointwise_acc_montgomery_l5_aarch64_asm.S +++ b/mldsa/src/native/aarch64/src/mld_polyvecl_pointwise_acc_montgomery_l5_aarch64_asm.S @@ -2,6 +2,33 @@ * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT */ +/*yaml + Name: polyvecl_pointwise_acc_montgomery_l5_aarch64_asm + Description: AArch64 pointwise multiply-accumulate of length-5 polynomial vectors + Signature: void mld_polyvecl_pointwise_acc_montgomery_l5_aarch64_asm(int32_t *r, const int32_t a[5][256], const int32_t b[5][256]) + ABI: + Architecture: aarch64 + CallingConvention: AAPCS64 + x0: + type: buffer + size_bytes: 1024 + permissions: write-only + c_parameter: int32_t *r + description: Output polynomial (256 x int32_t) + x1: + type: buffer + size_bytes: 5120 + permissions: read-only + c_parameter: const int32_t a[5][256] + description: Input polynomial vector a (5 x 256 x int32_t) + x2: + type: buffer + size_bytes: 5120 + permissions: read-only + c_parameter: const int32_t b[5][256] + description: Input polynomial vector b (5 x 256 x int32_t) +*/ + #include "../../../common.h" #if defined(MLD_ARITH_BACKEND_AARCH64) && !defined(MLD_CONFIG_MULTILEVEL_NO_SHARED) && \ (defined(MLD_CONFIG_MULTILEVEL_WITH_SHARED) || MLDSA_L == 5) diff --git a/mldsa/src/native/aarch64/src/mld_polyvecl_pointwise_acc_montgomery_l7_aarch64_asm.S b/mldsa/src/native/aarch64/src/mld_polyvecl_pointwise_acc_montgomery_l7_aarch64_asm.S index 4ea1a392d..8b52e0c54 100644 --- a/mldsa/src/native/aarch64/src/mld_polyvecl_pointwise_acc_montgomery_l7_aarch64_asm.S +++ b/mldsa/src/native/aarch64/src/mld_polyvecl_pointwise_acc_montgomery_l7_aarch64_asm.S @@ -2,6 +2,33 @@ * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT */ +/*yaml + Name: polyvecl_pointwise_acc_montgomery_l7_aarch64_asm + Description: AArch64 pointwise multiply-accumulate of length-7 polynomial vectors + Signature: void mld_polyvecl_pointwise_acc_montgomery_l7_aarch64_asm(int32_t *r, const int32_t a[7][256], const int32_t b[7][256]) + ABI: + Architecture: aarch64 + CallingConvention: AAPCS64 + x0: + type: buffer + size_bytes: 1024 + permissions: write-only + c_parameter: int32_t *r + description: Output polynomial (256 x int32_t) + x1: + type: buffer + size_bytes: 7168 + permissions: read-only + c_parameter: const int32_t a[7][256] + description: Input polynomial vector a (7 x 256 x int32_t) + x2: + type: buffer + size_bytes: 7168 + permissions: read-only + c_parameter: const int32_t b[7][256] + description: Input polynomial vector b (7 x 256 x int32_t) +*/ + #include "../../../common.h" #if defined(MLD_ARITH_BACKEND_AARCH64) && !defined(MLD_CONFIG_MULTILEVEL_NO_SHARED) && \ (defined(MLD_CONFIG_MULTILEVEL_WITH_SHARED) || MLDSA_L == 7) diff --git a/mldsa/src/native/aarch64/src/ntt_aarch64_asm.S b/mldsa/src/native/aarch64/src/ntt_aarch64_asm.S index d7f39e56d..819ae459e 100644 --- a/mldsa/src/native/aarch64/src/ntt_aarch64_asm.S +++ b/mldsa/src/native/aarch64/src/ntt_aarch64_asm.S @@ -27,6 +27,33 @@ /* AArch64 ML-DSA forward NTT following @[NeonNTT], @[SLOTHY_Paper], and @[NeonNTT_Autoformalised] */ +/*yaml + Name: ntt_aarch64_asm + Description: AArch64 ML-DSA forward NTT + Signature: void mld_ntt_aarch64_asm(int32_t *r, const int32_t *zetas_l123456, const int32_t *zetas_l78) + ABI: + Architecture: aarch64 + CallingConvention: AAPCS64 + x0: + type: buffer + size_bytes: 1024 + permissions: read/write + c_parameter: int32_t *r + description: Input/output polynomial (256 x int32_t) + x1: + type: buffer + size_bytes: 576 + permissions: read-only + c_parameter: const int32_t *zetas_l123456 + description: Twiddle factors for layers 1-6 (144 x int32_t) + x2: + type: buffer + size_bytes: 1536 + permissions: read-only + c_parameter: const int32_t *zetas_l78 + description: Twiddle factors for layers 7-8 (384 x int32_t) +*/ + #include "../../../common.h" #if defined(MLD_ARITH_BACKEND_AARCH64) && !defined(MLD_CONFIG_MULTILEVEL_NO_SHARED) diff --git a/mldsa/src/native/aarch64/src/pointwise_montgomery_aarch64_asm.S b/mldsa/src/native/aarch64/src/pointwise_montgomery_aarch64_asm.S index d6138ef3a..cf60f1015 100644 --- a/mldsa/src/native/aarch64/src/pointwise_montgomery_aarch64_asm.S +++ b/mldsa/src/native/aarch64/src/pointwise_montgomery_aarch64_asm.S @@ -2,6 +2,27 @@ * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT */ +/*yaml + Name: poly_pointwise_montgomery_aarch64_asm + Description: AArch64 pointwise Montgomery multiplication of two polynomials + Signature: void mld_poly_pointwise_montgomery_aarch64_asm(int32_t *a, const int32_t *b) + ABI: + Architecture: aarch64 + CallingConvention: AAPCS64 + x0: + type: buffer + size_bytes: 1024 + permissions: read/write + c_parameter: int32_t *a + description: Input/output polynomial (256 x int32_t) + x1: + type: buffer + size_bytes: 1024 + permissions: read-only + c_parameter: const int32_t *b + description: Input polynomial (256 x int32_t) +*/ + #include "../../../common.h" #if defined(MLD_ARITH_BACKEND_AARCH64) && \ (!defined(MLD_CONFIG_NO_SIGN_API) || !defined(MLD_CONFIG_NO_VERIFY_API) || \ diff --git a/mldsa/src/native/aarch64/src/poly_caddq_aarch64_asm.S b/mldsa/src/native/aarch64/src/poly_caddq_aarch64_asm.S index e99cd44e1..a5c197836 100644 --- a/mldsa/src/native/aarch64/src/poly_caddq_aarch64_asm.S +++ b/mldsa/src/native/aarch64/src/poly_caddq_aarch64_asm.S @@ -2,6 +2,21 @@ * Copyright (c) The mldsa-native project authors * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT */ +/*yaml + Name: poly_caddq_aarch64_asm + Description: AArch64 conditional addition of q to each coefficient + Signature: void mld_poly_caddq_aarch64_asm(int32_t *a) + ABI: + Architecture: aarch64 + CallingConvention: AAPCS64 + x0: + type: buffer + size_bytes: 1024 + permissions: read/write + c_parameter: int32_t *a + description: Input/output polynomial (256 x int32_t) +*/ + #include "../../../common.h" #if defined(MLD_ARITH_BACKEND_AARCH64) && !defined(MLD_CONFIG_MULTILEVEL_NO_SHARED) diff --git a/mldsa/src/native/aarch64/src/poly_chknorm_aarch64_asm.S b/mldsa/src/native/aarch64/src/poly_chknorm_aarch64_asm.S index 29d3f57e0..7d4d629aa 100644 --- a/mldsa/src/native/aarch64/src/poly_chknorm_aarch64_asm.S +++ b/mldsa/src/native/aarch64/src/poly_chknorm_aarch64_asm.S @@ -2,6 +2,26 @@ * Copyright (c) The mldsa-native project authors * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT */ +/*yaml + Name: poly_chknorm_aarch64_asm + Description: AArch64 infinity-norm bound check on polynomial coefficients + Signature: int mld_poly_chknorm_aarch64_asm(const int32_t *a, int32_t B) + ABI: + Architecture: aarch64 + CallingConvention: AAPCS64 + x0: + type: buffer + size_bytes: 1024 + permissions: read-only + c_parameter: const int32_t *a + description: Input polynomial (256 x int32_t) + x1: + type: scalar + c_parameter: int32_t B + description: Norm bound + test_with: 131072 # representative non-negative bound (1 << 17) +*/ + #include "../../../common.h" #if defined(MLD_ARITH_BACKEND_AARCH64) && !defined(MLD_CONFIG_MULTILEVEL_NO_SHARED) diff --git a/mldsa/src/native/aarch64/src/poly_decompose_32_aarch64_asm.S b/mldsa/src/native/aarch64/src/poly_decompose_32_aarch64_asm.S index f2bdc5069..78f6418cb 100644 --- a/mldsa/src/native/aarch64/src/poly_decompose_32_aarch64_asm.S +++ b/mldsa/src/native/aarch64/src/poly_decompose_32_aarch64_asm.S @@ -2,6 +2,27 @@ * Copyright (c) The mldsa-native project authors * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT */ +/*yaml + Name: poly_decompose_32_aarch64_asm + Description: AArch64 coefficient decomposition (alpha = (Q-1)/32) + Signature: void mld_poly_decompose_32_aarch64_asm(int32_t *a1, int32_t *a0) + ABI: + Architecture: aarch64 + CallingConvention: AAPCS64 + x0: + type: buffer + size_bytes: 1024 + permissions: write-only + c_parameter: int32_t *a1 + description: Output high-part polynomial (256 x int32_t) + x1: + type: buffer + size_bytes: 1024 + permissions: read/write + c_parameter: int32_t *a0 + description: Input polynomial / output low-part (256 x int32_t) +*/ + #include "../../../common.h" #if defined(MLD_ARITH_BACKEND_AARCH64) && !defined(MLD_CONFIG_NO_SIGN_API) && !defined(MLD_CONFIG_MULTILEVEL_NO_SHARED) && \ diff --git a/mldsa/src/native/aarch64/src/poly_decompose_88_aarch64_asm.S b/mldsa/src/native/aarch64/src/poly_decompose_88_aarch64_asm.S index 100f27780..00aa1a55d 100644 --- a/mldsa/src/native/aarch64/src/poly_decompose_88_aarch64_asm.S +++ b/mldsa/src/native/aarch64/src/poly_decompose_88_aarch64_asm.S @@ -2,6 +2,27 @@ * Copyright (c) The mldsa-native project authors * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT */ +/*yaml + Name: poly_decompose_88_aarch64_asm + Description: AArch64 coefficient decomposition (alpha = (Q-1)/88) + Signature: void mld_poly_decompose_88_aarch64_asm(int32_t *a1, int32_t *a0) + ABI: + Architecture: aarch64 + CallingConvention: AAPCS64 + x0: + type: buffer + size_bytes: 1024 + permissions: write-only + c_parameter: int32_t *a1 + description: Output high-part polynomial (256 x int32_t) + x1: + type: buffer + size_bytes: 1024 + permissions: read/write + c_parameter: int32_t *a0 + description: Input polynomial / output low-part (256 x int32_t) +*/ + #include "../../../common.h" #if defined(MLD_ARITH_BACKEND_AARCH64) && !defined(MLD_CONFIG_NO_SIGN_API) && !defined(MLD_CONFIG_MULTILEVEL_NO_SHARED) && \ diff --git a/mldsa/src/native/aarch64/src/poly_use_hint_32_aarch64_asm.S b/mldsa/src/native/aarch64/src/poly_use_hint_32_aarch64_asm.S index c7b1c3d96..ce730e5f8 100644 --- a/mldsa/src/native/aarch64/src/poly_use_hint_32_aarch64_asm.S +++ b/mldsa/src/native/aarch64/src/poly_use_hint_32_aarch64_asm.S @@ -2,6 +2,27 @@ * Copyright (c) The mldsa-native project authors * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT */ +/*yaml + Name: poly_use_hint_32_aarch64_asm + Description: AArch64 hint application (alpha = (Q-1)/32) + Signature: void mld_poly_use_hint_32_aarch64_asm(int32_t *a, const int32_t *h) + ABI: + Architecture: aarch64 + CallingConvention: AAPCS64 + x0: + type: buffer + size_bytes: 1024 + permissions: read/write + c_parameter: int32_t *a + description: Input/output polynomial (256 x int32_t) + x1: + type: buffer + size_bytes: 1024 + permissions: read-only + c_parameter: const int32_t *h + description: Hint polynomial (256 x int32_t) +*/ + #include "../../../common.h" #if defined(MLD_ARITH_BACKEND_AARCH64) && !defined(MLD_CONFIG_NO_VERIFY_API) && !defined(MLD_CONFIG_MULTILEVEL_NO_SHARED) && \ diff --git a/mldsa/src/native/aarch64/src/poly_use_hint_88_aarch64_asm.S b/mldsa/src/native/aarch64/src/poly_use_hint_88_aarch64_asm.S index 7961e71da..94a7b2d73 100644 --- a/mldsa/src/native/aarch64/src/poly_use_hint_88_aarch64_asm.S +++ b/mldsa/src/native/aarch64/src/poly_use_hint_88_aarch64_asm.S @@ -2,6 +2,27 @@ * Copyright (c) The mldsa-native project authors * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT */ +/*yaml + Name: poly_use_hint_88_aarch64_asm + Description: AArch64 hint application (alpha = (Q-1)/88) + Signature: void mld_poly_use_hint_88_aarch64_asm(int32_t *a, const int32_t *h) + ABI: + Architecture: aarch64 + CallingConvention: AAPCS64 + x0: + type: buffer + size_bytes: 1024 + permissions: read/write + c_parameter: int32_t *a + description: Input/output polynomial (256 x int32_t) + x1: + type: buffer + size_bytes: 1024 + permissions: read-only + c_parameter: const int32_t *h + description: Hint polynomial (256 x int32_t) +*/ + #include "../../../common.h" #if defined(MLD_ARITH_BACKEND_AARCH64) && !defined(MLD_CONFIG_NO_VERIFY_API) && !defined(MLD_CONFIG_MULTILEVEL_NO_SHARED) && \ diff --git a/mldsa/src/native/aarch64/src/polyz_unpack_17_aarch64_asm.S b/mldsa/src/native/aarch64/src/polyz_unpack_17_aarch64_asm.S index c0ea64560..bc65aafd9 100644 --- a/mldsa/src/native/aarch64/src/polyz_unpack_17_aarch64_asm.S +++ b/mldsa/src/native/aarch64/src/polyz_unpack_17_aarch64_asm.S @@ -4,6 +4,33 @@ * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT */ +/*yaml + Name: polyz_unpack_17_aarch64_asm + Description: AArch64 unpacking of 17-bit packed coefficients + Signature: void mld_polyz_unpack_17_aarch64_asm(int32_t *r, const uint8_t *buf, const uint8_t *indices) + ABI: + Architecture: aarch64 + CallingConvention: AAPCS64 + x0: + type: buffer + size_bytes: 1024 + permissions: write-only + c_parameter: int32_t *r + description: Output polynomial (256 x int32_t) + x1: + type: buffer + size_bytes: 576 + permissions: read-only + c_parameter: const uint8_t *buf + description: Packed input bytes + x2: + type: buffer + size_bytes: 64 + permissions: read-only + c_parameter: const uint8_t *indices + description: Permutation index table (64 x uint8_t) +*/ + #include "../../../common.h" #if defined(MLD_ARITH_BACKEND_AARCH64) && \ (!defined(MLD_CONFIG_NO_SIGN_API) || !defined(MLD_CONFIG_NO_VERIFY_API)) && \ diff --git a/mldsa/src/native/aarch64/src/polyz_unpack_19_aarch64_asm.S b/mldsa/src/native/aarch64/src/polyz_unpack_19_aarch64_asm.S index 622249bdd..e09c45fca 100644 --- a/mldsa/src/native/aarch64/src/polyz_unpack_19_aarch64_asm.S +++ b/mldsa/src/native/aarch64/src/polyz_unpack_19_aarch64_asm.S @@ -4,6 +4,33 @@ * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT */ +/*yaml + Name: polyz_unpack_19_aarch64_asm + Description: AArch64 unpacking of 19-bit packed coefficients + Signature: void mld_polyz_unpack_19_aarch64_asm(int32_t *r, const uint8_t *buf, const uint8_t *indices) + ABI: + Architecture: aarch64 + CallingConvention: AAPCS64 + x0: + type: buffer + size_bytes: 1024 + permissions: write-only + c_parameter: int32_t *r + description: Output polynomial (256 x int32_t) + x1: + type: buffer + size_bytes: 640 + permissions: read-only + c_parameter: const uint8_t *buf + description: Packed input bytes + x2: + type: buffer + size_bytes: 64 + permissions: read-only + c_parameter: const uint8_t *indices + description: Permutation index table (64 x uint8_t) +*/ + #include "../../../common.h" #if defined(MLD_ARITH_BACKEND_AARCH64) && \ (!defined(MLD_CONFIG_NO_SIGN_API) || !defined(MLD_CONFIG_NO_VERIFY_API)) && \ diff --git a/mldsa/src/native/aarch64/src/rej_uniform_aarch64_asm.S b/mldsa/src/native/aarch64/src/rej_uniform_aarch64_asm.S index f10f2facc..ee4cd423d 100644 --- a/mldsa/src/native/aarch64/src/rej_uniform_aarch64_asm.S +++ b/mldsa/src/native/aarch64/src/rej_uniform_aarch64_asm.S @@ -4,6 +4,38 @@ * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT */ +/*yaml + Name: rej_uniform_aarch64_asm + Description: AArch64 rejection sampling of uniform coefficients mod q + Signature: uint64_t mld_rej_uniform_aarch64_asm(int32_t *r, const uint8_t *buf, unsigned buflen, const uint8_t *table) + ABI: + Architecture: aarch64 + CallingConvention: AAPCS64 + x0: + type: buffer + size_bytes: 1024 + permissions: write-only + c_parameter: int32_t *r + description: Output buffer (256 x int32_t) + x1: + type: buffer + size_bytes: x2 + permissions: read-only + c_parameter: const uint8_t *buf + description: Input buffer + x2: + type: scalar + c_parameter: unsigned buflen + description: Length of input buffer (must be a multiple of 24) + test_with: 840 # MLD_POLY_UNIFORM_NBLOCKS * SHAKE128_RATE = 5 * 168 + x3: + type: buffer + size_bytes: 256 + permissions: read-only + c_parameter: const uint8_t *table + description: Lookup table (256 x uint8_t) +*/ + #include "../../../common.h" #if defined(MLD_ARITH_BACKEND_AARCH64) && \ !defined(MLD_CONFIG_MULTILEVEL_NO_SHARED) diff --git a/mldsa/src/native/aarch64/src/rej_uniform_eta2_aarch64_asm.S b/mldsa/src/native/aarch64/src/rej_uniform_eta2_aarch64_asm.S index 50c110300..ac4e51db9 100644 --- a/mldsa/src/native/aarch64/src/rej_uniform_eta2_aarch64_asm.S +++ b/mldsa/src/native/aarch64/src/rej_uniform_eta2_aarch64_asm.S @@ -4,6 +4,38 @@ * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT */ +/*yaml + Name: rej_uniform_eta2_aarch64_asm + Description: AArch64 rejection sampling of eta=2 secret coefficients + Signature: uint64_t mld_rej_uniform_eta2_aarch64_asm(int32_t *r, const uint8_t *buf, unsigned buflen, const uint8_t *table) + ABI: + Architecture: aarch64 + CallingConvention: AAPCS64 + x0: + type: buffer + size_bytes: 1024 + permissions: write-only + c_parameter: int32_t *r + description: Output buffer (256 x int32_t) + x1: + type: buffer + size_bytes: x2 + permissions: read-only + c_parameter: const uint8_t *buf + description: Input buffer + x2: + type: scalar + c_parameter: unsigned buflen + description: Length of input buffer (must be a multiple of 8) + test_with: 136 # MLD_AARCH64_REJ_UNIFORM_ETA2_BUFLEN + x3: + type: buffer + size_bytes: 4096 + permissions: read-only + c_parameter: const uint8_t *table + description: Lookup table (4096 x uint8_t) +*/ + #include "../../../common.h" #if defined(MLD_ARITH_BACKEND_AARCH64) && \ !defined(MLD_CONFIG_NO_KEYPAIR_API) && \ diff --git a/mldsa/src/native/aarch64/src/rej_uniform_eta4_aarch64_asm.S b/mldsa/src/native/aarch64/src/rej_uniform_eta4_aarch64_asm.S index 9e1354665..5be0e3c23 100644 --- a/mldsa/src/native/aarch64/src/rej_uniform_eta4_aarch64_asm.S +++ b/mldsa/src/native/aarch64/src/rej_uniform_eta4_aarch64_asm.S @@ -4,6 +4,38 @@ * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT */ +/*yaml + Name: rej_uniform_eta4_aarch64_asm + Description: AArch64 rejection sampling of eta=4 secret coefficients + Signature: uint64_t mld_rej_uniform_eta4_aarch64_asm(int32_t *r, const uint8_t *buf, unsigned buflen, const uint8_t *table) + ABI: + Architecture: aarch64 + CallingConvention: AAPCS64 + x0: + type: buffer + size_bytes: 1024 + permissions: write-only + c_parameter: int32_t *r + description: Output buffer (256 x int32_t) + x1: + type: buffer + size_bytes: x2 + permissions: read-only + c_parameter: const uint8_t *buf + description: Input buffer + x2: + type: scalar + c_parameter: unsigned buflen + description: Length of input buffer (must be a multiple of 8) + test_with: 272 # MLD_AARCH64_REJ_UNIFORM_ETA4_BUFLEN + x3: + type: buffer + size_bytes: 4096 + permissions: read-only + c_parameter: const uint8_t *table + description: Lookup table (4096 x uint8_t) +*/ + #include "../../../common.h" #if defined(MLD_ARITH_BACKEND_AARCH64) && \ !defined(MLD_CONFIG_NO_KEYPAIR_API) && \ diff --git a/mldsa/src/native/x86_64/src/intt_avx2_asm.S b/mldsa/src/native/x86_64/src/intt_avx2_asm.S index 252679f8a..ef1b1765c 100644 --- a/mldsa/src/native/x86_64/src/intt_avx2_asm.S +++ b/mldsa/src/native/x86_64/src/intt_avx2_asm.S @@ -18,6 +18,28 @@ * AVX2 Dilithium implementation @[REF_AVX2]. */ +/*yaml + Name: invntt_avx2_asm + Description: x86_64 AVX2 inverse NTT + Signature: void mld_invntt_avx2_asm(int32_t *r, const int32_t *qdata) + ABI: + Architecture: x86_64 + CallingConvention: SysV + Features: [AVX2] + rdi: + type: buffer + size_bytes: 1024 + permissions: read/write + c_parameter: int32_t *r + description: Input/output polynomial (256 x int32_t) + rsi: + type: buffer + size_bytes: 2496 + permissions: read-only + c_parameter: const int32_t *qdata + description: Precomputed constants (624 x int32_t) +*/ + #include "../../../common.h" #if defined(MLD_ARITH_BACKEND_X86_64_DEFAULT) && \ !defined(MLD_CONFIG_MULTILEVEL_NO_SHARED) diff --git a/mldsa/src/native/x86_64/src/ntt_avx2_asm.S b/mldsa/src/native/x86_64/src/ntt_avx2_asm.S index da51829a6..bfc3ce8e2 100644 --- a/mldsa/src/native/x86_64/src/ntt_avx2_asm.S +++ b/mldsa/src/native/x86_64/src/ntt_avx2_asm.S @@ -18,6 +18,28 @@ * AVX2 Dilithium implementation @[REF_AVX2]. */ +/*yaml + Name: ntt_avx2_asm + Description: x86_64 AVX2 forward NTT + Signature: void mld_ntt_avx2_asm(int32_t *r, const int32_t *qdata) + ABI: + Architecture: x86_64 + CallingConvention: SysV + Features: [AVX2] + rdi: + type: buffer + size_bytes: 1024 + permissions: read/write + c_parameter: int32_t *r + description: Input/output polynomial (256 x int32_t) + rsi: + type: buffer + size_bytes: 2496 + permissions: read-only + c_parameter: const int32_t *qdata + description: Precomputed constants (624 x int32_t) +*/ + #include "../../../common.h" #if defined(MLD_ARITH_BACKEND_X86_64_DEFAULT) && \ !defined(MLD_CONFIG_MULTILEVEL_NO_SHARED) diff --git a/mldsa/src/native/x86_64/src/nttunpack_avx2_asm.S b/mldsa/src/native/x86_64/src/nttunpack_avx2_asm.S index 0a2417b58..00eb98532 100644 --- a/mldsa/src/native/x86_64/src/nttunpack_avx2_asm.S +++ b/mldsa/src/native/x86_64/src/nttunpack_avx2_asm.S @@ -18,6 +18,22 @@ * AVX2 Dilithium implementation @[REF_AVX2]. */ +/*yaml + Name: nttunpack_avx2_asm + Description: x86_64 AVX2 NTT coefficient unpacking/permutation + Signature: void mld_nttunpack_avx2_asm(int32_t *r) + ABI: + Architecture: x86_64 + CallingConvention: SysV + Features: [AVX2] + rdi: + type: buffer + size_bytes: 1024 + permissions: read/write + c_parameter: int32_t *r + description: Input/output polynomial (256 x int32_t) +*/ + #include "../../../common.h" #if defined(MLD_ARITH_BACKEND_X86_64_DEFAULT) && \ !defined(MLD_CONFIG_MULTILEVEL_NO_SHARED) diff --git a/mldsa/src/native/x86_64/src/pointwise_acc_l4_avx2_asm.S b/mldsa/src/native/x86_64/src/pointwise_acc_l4_avx2_asm.S index 83f18620b..f82436e73 100644 --- a/mldsa/src/native/x86_64/src/pointwise_acc_l4_avx2_asm.S +++ b/mldsa/src/native/x86_64/src/pointwise_acc_l4_avx2_asm.S @@ -17,6 +17,40 @@ * AVX2 Dilithium implementation @[REF_AVX2]. */ +/*yaml + Name: pointwise_acc_l4_avx2_asm + Description: x86_64 AVX2 pointwise multiply-accumulate of length-4 polynomial vectors + Signature: void mld_pointwise_acc_l4_avx2_asm(int32_t *c, const int32_t a[4][256], const int32_t b[4][256], const int32_t *qdata) + ABI: + Architecture: x86_64 + CallingConvention: SysV + Features: [AVX2] + rdi: + type: buffer + size_bytes: 1024 + permissions: write-only + c_parameter: int32_t *c + description: Output polynomial (256 x int32_t) + rsi: + type: buffer + size_bytes: 4096 + permissions: read-only + c_parameter: const int32_t a[4][256] + description: Input polynomial vector a (4 x 256 x int32_t) + rdx: + type: buffer + size_bytes: 4096 + permissions: read-only + c_parameter: const int32_t b[4][256] + description: Input polynomial vector b (4 x 256 x int32_t) + rcx: + type: buffer + size_bytes: 2496 + permissions: read-only + c_parameter: const int32_t *qdata + description: Precomputed constants (624 x int32_t) +*/ + #include "../../../common.h" #if defined(MLD_ARITH_BACKEND_X86_64_DEFAULT) && \ !defined(MLD_CONFIG_MULTILEVEL_NO_SHARED) && \ diff --git a/mldsa/src/native/x86_64/src/pointwise_acc_l5_avx2_asm.S b/mldsa/src/native/x86_64/src/pointwise_acc_l5_avx2_asm.S index dd419b689..4e96936f8 100644 --- a/mldsa/src/native/x86_64/src/pointwise_acc_l5_avx2_asm.S +++ b/mldsa/src/native/x86_64/src/pointwise_acc_l5_avx2_asm.S @@ -17,6 +17,40 @@ * AVX2 Dilithium implementation @[REF_AVX2]. */ +/*yaml + Name: pointwise_acc_l5_avx2_asm + Description: x86_64 AVX2 pointwise multiply-accumulate of length-5 polynomial vectors + Signature: void mld_pointwise_acc_l5_avx2_asm(int32_t *c, const int32_t a[5][256], const int32_t b[5][256], const int32_t *qdata) + ABI: + Architecture: x86_64 + CallingConvention: SysV + Features: [AVX2] + rdi: + type: buffer + size_bytes: 1024 + permissions: write-only + c_parameter: int32_t *c + description: Output polynomial (256 x int32_t) + rsi: + type: buffer + size_bytes: 5120 + permissions: read-only + c_parameter: const int32_t a[5][256] + description: Input polynomial vector a (5 x 256 x int32_t) + rdx: + type: buffer + size_bytes: 5120 + permissions: read-only + c_parameter: const int32_t b[5][256] + description: Input polynomial vector b (5 x 256 x int32_t) + rcx: + type: buffer + size_bytes: 2496 + permissions: read-only + c_parameter: const int32_t *qdata + description: Precomputed constants (624 x int32_t) +*/ + #include "../../../common.h" #if defined(MLD_ARITH_BACKEND_X86_64_DEFAULT) && \ !defined(MLD_CONFIG_MULTILEVEL_NO_SHARED) && \ diff --git a/mldsa/src/native/x86_64/src/pointwise_acc_l7_avx2_asm.S b/mldsa/src/native/x86_64/src/pointwise_acc_l7_avx2_asm.S index 99bce46d9..9544518fa 100644 --- a/mldsa/src/native/x86_64/src/pointwise_acc_l7_avx2_asm.S +++ b/mldsa/src/native/x86_64/src/pointwise_acc_l7_avx2_asm.S @@ -17,6 +17,40 @@ * AVX2 Dilithium implementation @[REF_AVX2]. */ +/*yaml + Name: pointwise_acc_l7_avx2_asm + Description: x86_64 AVX2 pointwise multiply-accumulate of length-7 polynomial vectors + Signature: void mld_pointwise_acc_l7_avx2_asm(int32_t *c, const int32_t a[7][256], const int32_t b[7][256], const int32_t *qdata) + ABI: + Architecture: x86_64 + CallingConvention: SysV + Features: [AVX2] + rdi: + type: buffer + size_bytes: 1024 + permissions: write-only + c_parameter: int32_t *c + description: Output polynomial (256 x int32_t) + rsi: + type: buffer + size_bytes: 7168 + permissions: read-only + c_parameter: const int32_t a[7][256] + description: Input polynomial vector a (7 x 256 x int32_t) + rdx: + type: buffer + size_bytes: 7168 + permissions: read-only + c_parameter: const int32_t b[7][256] + description: Input polynomial vector b (7 x 256 x int32_t) + rcx: + type: buffer + size_bytes: 2496 + permissions: read-only + c_parameter: const int32_t *qdata + description: Precomputed constants (624 x int32_t) +*/ + #include "../../../common.h" #if defined(MLD_ARITH_BACKEND_X86_64_DEFAULT) && \ !defined(MLD_CONFIG_MULTILEVEL_NO_SHARED) && \ diff --git a/mldsa/src/native/x86_64/src/pointwise_avx2_asm.S b/mldsa/src/native/x86_64/src/pointwise_avx2_asm.S index af0959012..0e28bc482 100644 --- a/mldsa/src/native/x86_64/src/pointwise_avx2_asm.S +++ b/mldsa/src/native/x86_64/src/pointwise_avx2_asm.S @@ -17,6 +17,34 @@ * AVX2 Dilithium implementation @[REF_AVX2]. */ +/*yaml + Name: pointwise_avx2_asm + Description: x86_64 AVX2 pointwise Montgomery multiplication + Signature: void mld_pointwise_avx2_asm(int32_t *a, const int32_t *b, const int32_t *qdata) + ABI: + Architecture: x86_64 + CallingConvention: SysV + Features: [AVX2] + rdi: + type: buffer + size_bytes: 1024 + permissions: read/write + c_parameter: int32_t *a + description: Input/output polynomial (256 x int32_t) + rsi: + type: buffer + size_bytes: 1024 + permissions: read-only + c_parameter: const int32_t *b + description: Input polynomial (256 x int32_t) + rdx: + type: buffer + size_bytes: 2496 + permissions: read-only + c_parameter: const int32_t *qdata + description: Precomputed constants (624 x int32_t) +*/ + #include "../../../common.h" #if defined(MLD_ARITH_BACKEND_X86_64_DEFAULT) && \ !defined(MLD_CONFIG_MULTILEVEL_NO_SHARED) diff --git a/mldsa/src/native/x86_64/src/poly_caddq_avx2_asm.S b/mldsa/src/native/x86_64/src/poly_caddq_avx2_asm.S index 3b9c8d953..82d2533ee 100644 --- a/mldsa/src/native/x86_64/src/poly_caddq_avx2_asm.S +++ b/mldsa/src/native/x86_64/src/poly_caddq_avx2_asm.S @@ -27,6 +27,22 @@ * Arguments: - int32_t *r: pointer to input/output polynomial **************************************************/ +/*yaml + Name: poly_caddq_avx2_asm + Description: x86_64 AVX2 conditional addition of q to each coefficient + Signature: void mld_poly_caddq_avx2_asm(int32_t *r) + ABI: + Architecture: x86_64 + CallingConvention: SysV + Features: [AVX2] + rdi: + type: buffer + size_bytes: 1024 + permissions: read/write + c_parameter: int32_t *r + description: Input/output polynomial (256 x int32_t) +*/ + #include "../../../common.h" #if defined(MLD_ARITH_BACKEND_X86_64_DEFAULT) && \ diff --git a/mldsa/src/native/x86_64/src/poly_chknorm_avx2_asm.S b/mldsa/src/native/x86_64/src/poly_chknorm_avx2_asm.S index 2c6f84011..3dfe98399 100644 --- a/mldsa/src/native/x86_64/src/poly_chknorm_avx2_asm.S +++ b/mldsa/src/native/x86_64/src/poly_chknorm_avx2_asm.S @@ -31,6 +31,27 @@ * Returns: - 1 if any |coefficient| >= B, 0 otherwise. **************************************************/ +/*yaml + Name: poly_chknorm_avx2_asm + Description: x86_64 AVX2 infinity-norm bound check on polynomial coefficients + Signature: int mld_poly_chknorm_avx2_asm(const int32_t *a, int32_t B) + ABI: + Architecture: x86_64 + CallingConvention: SysV + Features: [AVX2] + rdi: + type: buffer + size_bytes: 1024 + permissions: read-only + c_parameter: const int32_t *a + description: Input polynomial (256 x int32_t) + rsi: + type: scalar + c_parameter: int32_t B + description: Norm bound (must be non-negative) + test_with: 131072 # representative non-negative bound (1 << 17) +*/ + #include "../../../common.h" #if defined(MLD_ARITH_BACKEND_X86_64_DEFAULT) && \ diff --git a/mldsa/src/native/x86_64/src/polyz_unpack_17_avx2_asm.S b/mldsa/src/native/x86_64/src/polyz_unpack_17_avx2_asm.S index ceb763ab3..243eec68b 100644 --- a/mldsa/src/native/x86_64/src/polyz_unpack_17_avx2_asm.S +++ b/mldsa/src/native/x86_64/src/polyz_unpack_17_avx2_asm.S @@ -29,6 +29,28 @@ * - const uint8_t *a: pointer to packed input (576 bytes) **************************************************/ +/*yaml + Name: polyz_unpack_17_avx2_asm + Description: x86_64 AVX2 unpacking of 17-bit packed coefficients + Signature: void mld_polyz_unpack_17_avx2_asm(int32_t *r, const uint8_t *a) + ABI: + Architecture: x86_64 + CallingConvention: SysV + Features: [AVX2] + rdi: + type: buffer + size_bytes: 1024 + permissions: write-only + c_parameter: int32_t *r + description: Output polynomial (256 x int32_t) + rsi: + type: buffer + size_bytes: 576 + permissions: read-only + c_parameter: const uint8_t *a + description: Packed input bytes +*/ + #include "../../../common.h" #if defined(MLD_ARITH_BACKEND_X86_64_DEFAULT) && \ diff --git a/mldsa/src/native/x86_64/src/polyz_unpack_19_avx2_asm.S b/mldsa/src/native/x86_64/src/polyz_unpack_19_avx2_asm.S index f247ced81..769d38dec 100644 --- a/mldsa/src/native/x86_64/src/polyz_unpack_19_avx2_asm.S +++ b/mldsa/src/native/x86_64/src/polyz_unpack_19_avx2_asm.S @@ -29,6 +29,28 @@ * - const uint8_t *a: pointer to packed input (640 bytes) **************************************************/ +/*yaml + Name: polyz_unpack_19_avx2_asm + Description: x86_64 AVX2 unpacking of 19-bit packed coefficients + Signature: void mld_polyz_unpack_19_avx2_asm(int32_t *r, const uint8_t *a) + ABI: + Architecture: x86_64 + CallingConvention: SysV + Features: [AVX2] + rdi: + type: buffer + size_bytes: 1024 + permissions: write-only + c_parameter: int32_t *r + description: Output polynomial (256 x int32_t) + rsi: + type: buffer + size_bytes: 640 + permissions: read-only + c_parameter: const uint8_t *a + description: Packed input bytes +*/ + #include "../../../common.h" #if defined(MLD_ARITH_BACKEND_X86_64_DEFAULT) && \ diff --git a/mldsa/src/sign.c b/mldsa/src/sign.c index 276c5a5f5..9eb8879fb 100644 --- a/mldsa/src/sign.c +++ b/mldsa/src/sign.c @@ -1151,8 +1151,17 @@ int mld_sign(uint8_t *sm, size_t *smlen, const uint8_t *m, size_t mlen, MLD_CONFIG_CONTEXT_PARAMETER_TYPE context) { int ret; + uint8_t sig[MLDSA_CRYPTO_BYTES]; + size_t siglen; size_t i; + ret = mld_sign_signature(sig, &siglen, m, mlen, ctx, ctxlen, sk, context); + if (ret != 0) + { + *smlen = 0; + goto cleanup; + } + for (i = 0; i < mlen; ++i) __loop__( assigns(i, object_whole(sm)) @@ -1162,12 +1171,12 @@ int mld_sign(uint8_t *sm, size_t *smlen, const uint8_t *m, size_t mlen, { sm[MLDSA_CRYPTO_BYTES + mlen - 1 - i] = m[mlen - 1 - i]; } - ret = mld_sign_signature(sm, smlen, sm + MLDSA_CRYPTO_BYTES, mlen, ctx, - ctxlen, sk, context); - if (ret == 0) - { - *smlen += mlen; - } + + mld_memcpy(sm, sig, MLDSA_CRYPTO_BYTES); + *smlen = siglen + mlen; + +cleanup: + mld_zeroize(sig, sizeof(sig)); return ret; } #endif /* !MLD_CONFIG_NO_RANDOMIZED_API */ diff --git a/mldsa/src/sign.h b/mldsa/src/sign.h index cc2ddc6dc..48e667e33 100644 --- a/mldsa/src/sign.h +++ b/mldsa/src/sign.h @@ -259,6 +259,9 @@ __contract__( * MLD_CONFIG_CONTEXT_PARAMETER is defined; type set by * MLD_CONFIG_CONTEXT_PARAMETER_TYPE. * + * On failure, *smlen is set to 0. No partial signed-message output is + * preserved, but if sm == m, the input message bytes remain unchanged. + * * @retval 0 Success. * @retval MLD_ERR_OUT_OF_MEMORY MLD_CONFIG_CUSTOM_ALLOC_FREE was * used and an allocation via diff --git a/mldsa/src/sys.h b/mldsa/src/sys.h index d89bf0979..051b0fe6d 100644 --- a/mldsa/src/sys.h +++ b/mldsa/src/sys.h @@ -266,7 +266,9 @@ typedef enum /* x86_64 */ MLD_SYS_CAP_AVX2, /* AArch64 */ - MLD_SYS_CAP_SHA3 + MLD_SYS_CAP_SHA3, + /* Armv8.1-M */ + MLD_SYS_CAP_MVE } mld_sys_cap; #if !defined(MLD_CONFIG_CUSTOM_CAPABILITY_FUNC) diff --git a/proofs/hol_light/aarch64/mldsa/intt_aarch64_asm.S b/proofs/hol_light/aarch64/mldsa/intt_aarch64_asm.S index fc75da63c..bfaf66e1f 100644 --- a/proofs/hol_light/aarch64/mldsa/intt_aarch64_asm.S +++ b/proofs/hol_light/aarch64/mldsa/intt_aarch64_asm.S @@ -27,6 +27,33 @@ /* AArch64 ML-DSA inverse NTT following @[NeonNTT], @[SLOTHY_Paper], and @[NeonNTT_Autoformalised] */ +/*yaml + Name: intt_aarch64_asm + Description: AArch64 ML-DSA inverse NTT + Signature: void mld_intt_aarch64_asm(int32_t *r, const int32_t *zetas_l78, const int32_t *zetas_l123456) + ABI: + Architecture: aarch64 + CallingConvention: AAPCS64 + x0: + type: buffer + size_bytes: 1024 + permissions: read/write + c_parameter: int32_t *r + description: Input/output polynomial (256 x int32_t) + x1: + type: buffer + size_bytes: 1536 + permissions: read-only + c_parameter: const int32_t *zetas_l78 + description: Twiddle factors for layers 7-8 (384 x int32_t) + x2: + type: buffer + size_bytes: 640 + permissions: read-only + c_parameter: const int32_t *zetas_l123456 + description: Twiddle factors for layers 1-6 (160 x int32_t) +*/ + /* * WARNING: This file is auto-derived from the mldsa-native source file diff --git a/proofs/hol_light/aarch64/mldsa/keccak_f1600_x1_scalar_aarch64_asm.S b/proofs/hol_light/aarch64/mldsa/keccak_f1600_x1_scalar_aarch64_asm.S index 3c980d0b8..fdf7bb9ae 100644 --- a/proofs/hol_light/aarch64/mldsa/keccak_f1600_x1_scalar_aarch64_asm.S +++ b/proofs/hol_light/aarch64/mldsa/keccak_f1600_x1_scalar_aarch64_asm.S @@ -10,10 +10,12 @@ // Author: Matthias Kannwischer /*yaml - Name: keccak_f1600_x1_scalar_asm + Name: keccak_f1600_x1_scalar_aarch64_asm Description: AArch64 scalar implementation of Keccak-f[1600] permutation for single state Signature: void mld_keccak_f1600_x1_scalar_aarch64_asm(uint64_t state[25], const uint64_t rc[24]) ABI: + Architecture: aarch64 + CallingConvention: AAPCS64 x0: type: buffer size_bytes: 200 diff --git a/proofs/hol_light/aarch64/mldsa/keccak_f1600_x1_v84a_aarch64_asm.S b/proofs/hol_light/aarch64/mldsa/keccak_f1600_x1_v84a_aarch64_asm.S index bb59d51d6..b92db2bb8 100644 --- a/proofs/hol_light/aarch64/mldsa/keccak_f1600_x1_v84a_aarch64_asm.S +++ b/proofs/hol_light/aarch64/mldsa/keccak_f1600_x1_v84a_aarch64_asm.S @@ -16,10 +16,13 @@ */ /*yaml - Name: keccak_f1600_x1_v84a_asm + Name: keccak_f1600_x1_v84a_aarch64_asm Description: AArch64 ARMv8.4-A implementation of Keccak-f[1600] permutation for single state Signature: void mld_keccak_f1600_x1_v84a_aarch64_asm(uint64_t state[25], const uint64_t rc[24]) ABI: + Architecture: aarch64 + CallingConvention: AAPCS64 + Features: [SHA3] x0: type: buffer size_bytes: 200 @@ -30,7 +33,7 @@ type: buffer size_bytes: 192 permissions: read-only - c_parameter: const uint64_t rc[24] + c_parameter: uint64_t const *rc description: Round constants (24 x uint64_t) Stack: bytes: 64 diff --git a/proofs/hol_light/aarch64/mldsa/keccak_f1600_x2_v84a_aarch64_asm.S b/proofs/hol_light/aarch64/mldsa/keccak_f1600_x2_v84a_aarch64_asm.S index 85fb100bd..58ddd93c6 100644 --- a/proofs/hol_light/aarch64/mldsa/keccak_f1600_x2_v84a_aarch64_asm.S +++ b/proofs/hol_light/aarch64/mldsa/keccak_f1600_x2_v84a_aarch64_asm.S @@ -16,10 +16,13 @@ */ /*yaml - Name: keccak_f1600_x2_v84a_asm + Name: keccak_f1600_x2_v84a_aarch64_asm Description: AArch64 ARMv8.4-A implementation of Keccak-f[1600] permutation for two sequential states Signature: void mld_keccak_f1600_x2_v84a_aarch64_asm(uint64_t state[50], const uint64_t rc[24]) ABI: + Architecture: aarch64 + CallingConvention: AAPCS64 + Features: [SHA3] x0: type: buffer size_bytes: 400 @@ -30,7 +33,7 @@ type: buffer size_bytes: 192 permissions: read-only - c_parameter: const uint64_t rc[24] + c_parameter: uint64_t const *rc description: Round constants (24 x uint64_t) Stack: bytes: 64 diff --git a/proofs/hol_light/aarch64/mldsa/keccak_f1600_x4_v8a_scalar_hybrid_aarch64_asm.S b/proofs/hol_light/aarch64/mldsa/keccak_f1600_x4_v8a_scalar_hybrid_aarch64_asm.S index 0dcf6d705..868f9ef06 100644 --- a/proofs/hol_light/aarch64/mldsa/keccak_f1600_x4_v8a_scalar_hybrid_aarch64_asm.S +++ b/proofs/hol_light/aarch64/mldsa/keccak_f1600_x4_v8a_scalar_hybrid_aarch64_asm.S @@ -10,10 +10,12 @@ // Author: Matthias Kannwischer /*yaml - Name: keccak_f1600_x4_v8a_scalar_hybrid_asm + Name: keccak_f1600_x4_v8a_scalar_hybrid_aarch64_asm Description: AArch64 hybrid scalar/vector implementation of Keccak-f[1600] permutation for four sequential states Signature: void mld_keccak_f1600_x4_v8a_scalar_hybrid_aarch64_asm(uint64_t state[100], const uint64_t rc[24]) ABI: + Architecture: aarch64 + CallingConvention: AAPCS64 x0: type: buffer size_bytes: 800 @@ -24,7 +26,7 @@ type: buffer size_bytes: 192 permissions: read-only - c_parameter: const uint64_t rc[24] + c_parameter: uint64_t const *rc description: Round constants (24 x uint64_t) Stack: bytes: 224 diff --git a/proofs/hol_light/aarch64/mldsa/keccak_f1600_x4_v8a_v84a_scalar_hybrid_aarch64_asm.S b/proofs/hol_light/aarch64/mldsa/keccak_f1600_x4_v8a_v84a_scalar_hybrid_aarch64_asm.S index 2e9988a81..139ea5d8e 100644 --- a/proofs/hol_light/aarch64/mldsa/keccak_f1600_x4_v8a_v84a_scalar_hybrid_aarch64_asm.S +++ b/proofs/hol_light/aarch64/mldsa/keccak_f1600_x4_v8a_v84a_scalar_hybrid_aarch64_asm.S @@ -10,10 +10,13 @@ // Author: Matthias Kannwischer /*yaml - Name: keccak_f1600_x4_v8a_v84a_scalar_hybrid_asm + Name: keccak_f1600_x4_v8a_v84a_scalar_hybrid_aarch64_asm Description: AArch64 hybrid scalar/vector implementation of Keccak-f[1600] permutation for four sequential states with ARMv8.4-A optimizations Signature: void mld_keccak_f1600_x4_v8a_v84a_scalar_hybrid_aarch64_asm(uint64_t state[100], const uint64_t rc[24]) ABI: + Architecture: aarch64 + CallingConvention: AAPCS64 + Features: [SHA3] x0: type: buffer size_bytes: 800 @@ -24,7 +27,7 @@ type: buffer size_bytes: 192 permissions: read-only - c_parameter: const uint64_t rc[24] + c_parameter: uint64_t const *rc description: Round constants (24 x uint64_t) Stack: bytes: 224 diff --git a/proofs/hol_light/aarch64/mldsa/mld_polyvecl_pointwise_acc_montgomery_l4_aarch64_asm.S b/proofs/hol_light/aarch64/mldsa/mld_polyvecl_pointwise_acc_montgomery_l4_aarch64_asm.S index 4bf1aa010..88a6a8a8a 100644 --- a/proofs/hol_light/aarch64/mldsa/mld_polyvecl_pointwise_acc_montgomery_l4_aarch64_asm.S +++ b/proofs/hol_light/aarch64/mldsa/mld_polyvecl_pointwise_acc_montgomery_l4_aarch64_asm.S @@ -2,6 +2,33 @@ * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT */ +/*yaml + Name: polyvecl_pointwise_acc_montgomery_l4_aarch64_asm + Description: AArch64 pointwise multiply-accumulate of length-4 polynomial vectors + Signature: void mld_polyvecl_pointwise_acc_montgomery_l4_aarch64_asm(int32_t *r, const int32_t a[4][256], const int32_t b[4][256]) + ABI: + Architecture: aarch64 + CallingConvention: AAPCS64 + x0: + type: buffer + size_bytes: 1024 + permissions: write-only + c_parameter: int32_t *r + description: Output polynomial (256 x int32_t) + x1: + type: buffer + size_bytes: 4096 + permissions: read-only + c_parameter: const int32_t a[4][256] + description: Input polynomial vector a (4 x 256 x int32_t) + x2: + type: buffer + size_bytes: 4096 + permissions: read-only + c_parameter: const int32_t b[4][256] + description: Input polynomial vector b (4 x 256 x int32_t) +*/ + /* * WARNING: This file is auto-derived from the mldsa-native source file diff --git a/proofs/hol_light/aarch64/mldsa/mld_polyvecl_pointwise_acc_montgomery_l5_aarch64_asm.S b/proofs/hol_light/aarch64/mldsa/mld_polyvecl_pointwise_acc_montgomery_l5_aarch64_asm.S index e906fba83..6216fc20d 100644 --- a/proofs/hol_light/aarch64/mldsa/mld_polyvecl_pointwise_acc_montgomery_l5_aarch64_asm.S +++ b/proofs/hol_light/aarch64/mldsa/mld_polyvecl_pointwise_acc_montgomery_l5_aarch64_asm.S @@ -2,6 +2,33 @@ * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT */ +/*yaml + Name: polyvecl_pointwise_acc_montgomery_l5_aarch64_asm + Description: AArch64 pointwise multiply-accumulate of length-5 polynomial vectors + Signature: void mld_polyvecl_pointwise_acc_montgomery_l5_aarch64_asm(int32_t *r, const int32_t a[5][256], const int32_t b[5][256]) + ABI: + Architecture: aarch64 + CallingConvention: AAPCS64 + x0: + type: buffer + size_bytes: 1024 + permissions: write-only + c_parameter: int32_t *r + description: Output polynomial (256 x int32_t) + x1: + type: buffer + size_bytes: 5120 + permissions: read-only + c_parameter: const int32_t a[5][256] + description: Input polynomial vector a (5 x 256 x int32_t) + x2: + type: buffer + size_bytes: 5120 + permissions: read-only + c_parameter: const int32_t b[5][256] + description: Input polynomial vector b (5 x 256 x int32_t) +*/ + /* * WARNING: This file is auto-derived from the mldsa-native source file diff --git a/proofs/hol_light/aarch64/mldsa/mld_polyvecl_pointwise_acc_montgomery_l7_aarch64_asm.S b/proofs/hol_light/aarch64/mldsa/mld_polyvecl_pointwise_acc_montgomery_l7_aarch64_asm.S index 1c5bed001..c9ea4675a 100644 --- a/proofs/hol_light/aarch64/mldsa/mld_polyvecl_pointwise_acc_montgomery_l7_aarch64_asm.S +++ b/proofs/hol_light/aarch64/mldsa/mld_polyvecl_pointwise_acc_montgomery_l7_aarch64_asm.S @@ -2,6 +2,33 @@ * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT */ +/*yaml + Name: polyvecl_pointwise_acc_montgomery_l7_aarch64_asm + Description: AArch64 pointwise multiply-accumulate of length-7 polynomial vectors + Signature: void mld_polyvecl_pointwise_acc_montgomery_l7_aarch64_asm(int32_t *r, const int32_t a[7][256], const int32_t b[7][256]) + ABI: + Architecture: aarch64 + CallingConvention: AAPCS64 + x0: + type: buffer + size_bytes: 1024 + permissions: write-only + c_parameter: int32_t *r + description: Output polynomial (256 x int32_t) + x1: + type: buffer + size_bytes: 7168 + permissions: read-only + c_parameter: const int32_t a[7][256] + description: Input polynomial vector a (7 x 256 x int32_t) + x2: + type: buffer + size_bytes: 7168 + permissions: read-only + c_parameter: const int32_t b[7][256] + description: Input polynomial vector b (7 x 256 x int32_t) +*/ + /* * WARNING: This file is auto-derived from the mldsa-native source file diff --git a/proofs/hol_light/aarch64/mldsa/ntt_aarch64_asm.S b/proofs/hol_light/aarch64/mldsa/ntt_aarch64_asm.S index 2a2709350..8d4ad9b87 100644 --- a/proofs/hol_light/aarch64/mldsa/ntt_aarch64_asm.S +++ b/proofs/hol_light/aarch64/mldsa/ntt_aarch64_asm.S @@ -27,6 +27,33 @@ /* AArch64 ML-DSA forward NTT following @[NeonNTT], @[SLOTHY_Paper], and @[NeonNTT_Autoformalised] */ +/*yaml + Name: ntt_aarch64_asm + Description: AArch64 ML-DSA forward NTT + Signature: void mld_ntt_aarch64_asm(int32_t *r, const int32_t *zetas_l123456, const int32_t *zetas_l78) + ABI: + Architecture: aarch64 + CallingConvention: AAPCS64 + x0: + type: buffer + size_bytes: 1024 + permissions: read/write + c_parameter: int32_t *r + description: Input/output polynomial (256 x int32_t) + x1: + type: buffer + size_bytes: 576 + permissions: read-only + c_parameter: const int32_t *zetas_l123456 + description: Twiddle factors for layers 1-6 (144 x int32_t) + x2: + type: buffer + size_bytes: 1536 + permissions: read-only + c_parameter: const int32_t *zetas_l78 + description: Twiddle factors for layers 7-8 (384 x int32_t) +*/ + /* * WARNING: This file is auto-derived from the mldsa-native source file diff --git a/proofs/hol_light/aarch64/mldsa/pointwise_montgomery_aarch64_asm.S b/proofs/hol_light/aarch64/mldsa/pointwise_montgomery_aarch64_asm.S index 17574e96a..28e5757c2 100644 --- a/proofs/hol_light/aarch64/mldsa/pointwise_montgomery_aarch64_asm.S +++ b/proofs/hol_light/aarch64/mldsa/pointwise_montgomery_aarch64_asm.S @@ -2,6 +2,27 @@ * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT */ +/*yaml + Name: poly_pointwise_montgomery_aarch64_asm + Description: AArch64 pointwise Montgomery multiplication of two polynomials + Signature: void mld_poly_pointwise_montgomery_aarch64_asm(int32_t *a, const int32_t *b) + ABI: + Architecture: aarch64 + CallingConvention: AAPCS64 + x0: + type: buffer + size_bytes: 1024 + permissions: read/write + c_parameter: int32_t *a + description: Input/output polynomial (256 x int32_t) + x1: + type: buffer + size_bytes: 1024 + permissions: read-only + c_parameter: const int32_t *b + description: Input polynomial (256 x int32_t) +*/ + /* * WARNING: This file is auto-derived from the mldsa-native source file diff --git a/proofs/hol_light/aarch64/mldsa/poly_caddq_aarch64_asm.S b/proofs/hol_light/aarch64/mldsa/poly_caddq_aarch64_asm.S index 691ed5535..c88f7d9af 100644 --- a/proofs/hol_light/aarch64/mldsa/poly_caddq_aarch64_asm.S +++ b/proofs/hol_light/aarch64/mldsa/poly_caddq_aarch64_asm.S @@ -2,6 +2,21 @@ * Copyright (c) The mldsa-native project authors * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT */ +/*yaml + Name: poly_caddq_aarch64_asm + Description: AArch64 conditional addition of q to each coefficient + Signature: void mld_poly_caddq_aarch64_asm(int32_t *a) + ABI: + Architecture: aarch64 + CallingConvention: AAPCS64 + x0: + type: buffer + size_bytes: 1024 + permissions: read/write + c_parameter: int32_t *a + description: Input/output polynomial (256 x int32_t) +*/ + /* diff --git a/proofs/hol_light/aarch64/mldsa/poly_chknorm_aarch64_asm.S b/proofs/hol_light/aarch64/mldsa/poly_chknorm_aarch64_asm.S index a2ae5206b..72df8bcf3 100644 --- a/proofs/hol_light/aarch64/mldsa/poly_chknorm_aarch64_asm.S +++ b/proofs/hol_light/aarch64/mldsa/poly_chknorm_aarch64_asm.S @@ -2,6 +2,26 @@ * Copyright (c) The mldsa-native project authors * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT */ +/*yaml + Name: poly_chknorm_aarch64_asm + Description: AArch64 infinity-norm bound check on polynomial coefficients + Signature: int mld_poly_chknorm_aarch64_asm(const int32_t *a, int32_t B) + ABI: + Architecture: aarch64 + CallingConvention: AAPCS64 + x0: + type: buffer + size_bytes: 1024 + permissions: read-only + c_parameter: const int32_t *a + description: Input polynomial (256 x int32_t) + x1: + type: scalar + c_parameter: int32_t B + description: Norm bound + test_with: 131072 # representative non-negative bound (1 << 17) +*/ + /* diff --git a/proofs/hol_light/aarch64/mldsa/poly_decompose_32_aarch64_asm.S b/proofs/hol_light/aarch64/mldsa/poly_decompose_32_aarch64_asm.S index b0ad0492e..b0767690d 100644 --- a/proofs/hol_light/aarch64/mldsa/poly_decompose_32_aarch64_asm.S +++ b/proofs/hol_light/aarch64/mldsa/poly_decompose_32_aarch64_asm.S @@ -2,6 +2,27 @@ * Copyright (c) The mldsa-native project authors * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT */ +/*yaml + Name: poly_decompose_32_aarch64_asm + Description: AArch64 coefficient decomposition (alpha = (Q-1)/32) + Signature: void mld_poly_decompose_32_aarch64_asm(int32_t *a1, int32_t *a0) + ABI: + Architecture: aarch64 + CallingConvention: AAPCS64 + x0: + type: buffer + size_bytes: 1024 + permissions: write-only + c_parameter: int32_t *a1 + description: Output high-part polynomial (256 x int32_t) + x1: + type: buffer + size_bytes: 1024 + permissions: read/write + c_parameter: int32_t *a0 + description: Input polynomial / output low-part (256 x int32_t) +*/ + /* diff --git a/proofs/hol_light/aarch64/mldsa/poly_decompose_88_aarch64_asm.S b/proofs/hol_light/aarch64/mldsa/poly_decompose_88_aarch64_asm.S index 58a494c8e..e72f6f919 100644 --- a/proofs/hol_light/aarch64/mldsa/poly_decompose_88_aarch64_asm.S +++ b/proofs/hol_light/aarch64/mldsa/poly_decompose_88_aarch64_asm.S @@ -2,6 +2,27 @@ * Copyright (c) The mldsa-native project authors * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT */ +/*yaml + Name: poly_decompose_88_aarch64_asm + Description: AArch64 coefficient decomposition (alpha = (Q-1)/88) + Signature: void mld_poly_decompose_88_aarch64_asm(int32_t *a1, int32_t *a0) + ABI: + Architecture: aarch64 + CallingConvention: AAPCS64 + x0: + type: buffer + size_bytes: 1024 + permissions: write-only + c_parameter: int32_t *a1 + description: Output high-part polynomial (256 x int32_t) + x1: + type: buffer + size_bytes: 1024 + permissions: read/write + c_parameter: int32_t *a0 + description: Input polynomial / output low-part (256 x int32_t) +*/ + /* diff --git a/proofs/hol_light/aarch64/mldsa/poly_use_hint_32_aarch64_asm.S b/proofs/hol_light/aarch64/mldsa/poly_use_hint_32_aarch64_asm.S index fc255bcd0..33fc31867 100644 --- a/proofs/hol_light/aarch64/mldsa/poly_use_hint_32_aarch64_asm.S +++ b/proofs/hol_light/aarch64/mldsa/poly_use_hint_32_aarch64_asm.S @@ -2,6 +2,27 @@ * Copyright (c) The mldsa-native project authors * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT */ +/*yaml + Name: poly_use_hint_32_aarch64_asm + Description: AArch64 hint application (alpha = (Q-1)/32) + Signature: void mld_poly_use_hint_32_aarch64_asm(int32_t *a, const int32_t *h) + ABI: + Architecture: aarch64 + CallingConvention: AAPCS64 + x0: + type: buffer + size_bytes: 1024 + permissions: read/write + c_parameter: int32_t *a + description: Input/output polynomial (256 x int32_t) + x1: + type: buffer + size_bytes: 1024 + permissions: read-only + c_parameter: const int32_t *h + description: Hint polynomial (256 x int32_t) +*/ + /* diff --git a/proofs/hol_light/aarch64/mldsa/poly_use_hint_88_aarch64_asm.S b/proofs/hol_light/aarch64/mldsa/poly_use_hint_88_aarch64_asm.S index 4db2f374a..396673da6 100644 --- a/proofs/hol_light/aarch64/mldsa/poly_use_hint_88_aarch64_asm.S +++ b/proofs/hol_light/aarch64/mldsa/poly_use_hint_88_aarch64_asm.S @@ -2,6 +2,27 @@ * Copyright (c) The mldsa-native project authors * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT */ +/*yaml + Name: poly_use_hint_88_aarch64_asm + Description: AArch64 hint application (alpha = (Q-1)/88) + Signature: void mld_poly_use_hint_88_aarch64_asm(int32_t *a, const int32_t *h) + ABI: + Architecture: aarch64 + CallingConvention: AAPCS64 + x0: + type: buffer + size_bytes: 1024 + permissions: read/write + c_parameter: int32_t *a + description: Input/output polynomial (256 x int32_t) + x1: + type: buffer + size_bytes: 1024 + permissions: read-only + c_parameter: const int32_t *h + description: Hint polynomial (256 x int32_t) +*/ + /* diff --git a/proofs/hol_light/aarch64/mldsa/polyz_unpack_17_aarch64_asm.S b/proofs/hol_light/aarch64/mldsa/polyz_unpack_17_aarch64_asm.S index 87cb5d8ec..c68ebd591 100644 --- a/proofs/hol_light/aarch64/mldsa/polyz_unpack_17_aarch64_asm.S +++ b/proofs/hol_light/aarch64/mldsa/polyz_unpack_17_aarch64_asm.S @@ -4,6 +4,33 @@ * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT */ +/*yaml + Name: polyz_unpack_17_aarch64_asm + Description: AArch64 unpacking of 17-bit packed coefficients + Signature: void mld_polyz_unpack_17_aarch64_asm(int32_t *r, const uint8_t *buf, const uint8_t *indices) + ABI: + Architecture: aarch64 + CallingConvention: AAPCS64 + x0: + type: buffer + size_bytes: 1024 + permissions: write-only + c_parameter: int32_t *r + description: Output polynomial (256 x int32_t) + x1: + type: buffer + size_bytes: 576 + permissions: read-only + c_parameter: const uint8_t *buf + description: Packed input bytes + x2: + type: buffer + size_bytes: 64 + permissions: read-only + c_parameter: const uint8_t *indices + description: Permutation index table (64 x uint8_t) +*/ + /* * WARNING: This file is auto-derived from the mldsa-native source file diff --git a/proofs/hol_light/aarch64/mldsa/polyz_unpack_19_aarch64_asm.S b/proofs/hol_light/aarch64/mldsa/polyz_unpack_19_aarch64_asm.S index d76dee7aa..bd54127f1 100644 --- a/proofs/hol_light/aarch64/mldsa/polyz_unpack_19_aarch64_asm.S +++ b/proofs/hol_light/aarch64/mldsa/polyz_unpack_19_aarch64_asm.S @@ -4,6 +4,33 @@ * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT */ +/*yaml + Name: polyz_unpack_19_aarch64_asm + Description: AArch64 unpacking of 19-bit packed coefficients + Signature: void mld_polyz_unpack_19_aarch64_asm(int32_t *r, const uint8_t *buf, const uint8_t *indices) + ABI: + Architecture: aarch64 + CallingConvention: AAPCS64 + x0: + type: buffer + size_bytes: 1024 + permissions: write-only + c_parameter: int32_t *r + description: Output polynomial (256 x int32_t) + x1: + type: buffer + size_bytes: 640 + permissions: read-only + c_parameter: const uint8_t *buf + description: Packed input bytes + x2: + type: buffer + size_bytes: 64 + permissions: read-only + c_parameter: const uint8_t *indices + description: Permutation index table (64 x uint8_t) +*/ + /* * WARNING: This file is auto-derived from the mldsa-native source file diff --git a/proofs/hol_light/aarch64/mldsa/rej_uniform_aarch64_asm.S b/proofs/hol_light/aarch64/mldsa/rej_uniform_aarch64_asm.S index 93e621402..78a93a298 100644 --- a/proofs/hol_light/aarch64/mldsa/rej_uniform_aarch64_asm.S +++ b/proofs/hol_light/aarch64/mldsa/rej_uniform_aarch64_asm.S @@ -4,6 +4,38 @@ * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT */ +/*yaml + Name: rej_uniform_aarch64_asm + Description: AArch64 rejection sampling of uniform coefficients mod q + Signature: uint64_t mld_rej_uniform_aarch64_asm(int32_t *r, const uint8_t *buf, unsigned buflen, const uint8_t *table) + ABI: + Architecture: aarch64 + CallingConvention: AAPCS64 + x0: + type: buffer + size_bytes: 1024 + permissions: write-only + c_parameter: int32_t *r + description: Output buffer (256 x int32_t) + x1: + type: buffer + size_bytes: x2 + permissions: read-only + c_parameter: const uint8_t *buf + description: Input buffer + x2: + type: scalar + c_parameter: unsigned buflen + description: Length of input buffer (must be a multiple of 24) + test_with: 840 # MLD_POLY_UNIFORM_NBLOCKS * SHAKE128_RATE = 5 * 168 + x3: + type: buffer + size_bytes: 256 + permissions: read-only + c_parameter: const uint8_t *table + description: Lookup table (256 x uint8_t) +*/ + /* * WARNING: This file is auto-derived from the mldsa-native source file diff --git a/proofs/hol_light/aarch64/mldsa/rej_uniform_eta2_aarch64_asm.S b/proofs/hol_light/aarch64/mldsa/rej_uniform_eta2_aarch64_asm.S index b26d89276..a14f63bed 100644 --- a/proofs/hol_light/aarch64/mldsa/rej_uniform_eta2_aarch64_asm.S +++ b/proofs/hol_light/aarch64/mldsa/rej_uniform_eta2_aarch64_asm.S @@ -4,6 +4,38 @@ * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT */ +/*yaml + Name: rej_uniform_eta2_aarch64_asm + Description: AArch64 rejection sampling of eta=2 secret coefficients + Signature: uint64_t mld_rej_uniform_eta2_aarch64_asm(int32_t *r, const uint8_t *buf, unsigned buflen, const uint8_t *table) + ABI: + Architecture: aarch64 + CallingConvention: AAPCS64 + x0: + type: buffer + size_bytes: 1024 + permissions: write-only + c_parameter: int32_t *r + description: Output buffer (256 x int32_t) + x1: + type: buffer + size_bytes: x2 + permissions: read-only + c_parameter: const uint8_t *buf + description: Input buffer + x2: + type: scalar + c_parameter: unsigned buflen + description: Length of input buffer (must be a multiple of 8) + test_with: 136 # MLD_AARCH64_REJ_UNIFORM_ETA2_BUFLEN + x3: + type: buffer + size_bytes: 4096 + permissions: read-only + c_parameter: const uint8_t *table + description: Lookup table (4096 x uint8_t) +*/ + /* * WARNING: This file is auto-derived from the mldsa-native source file diff --git a/proofs/hol_light/aarch64/mldsa/rej_uniform_eta4_aarch64_asm.S b/proofs/hol_light/aarch64/mldsa/rej_uniform_eta4_aarch64_asm.S index c87af4b65..bfedbe262 100644 --- a/proofs/hol_light/aarch64/mldsa/rej_uniform_eta4_aarch64_asm.S +++ b/proofs/hol_light/aarch64/mldsa/rej_uniform_eta4_aarch64_asm.S @@ -4,6 +4,38 @@ * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT */ +/*yaml + Name: rej_uniform_eta4_aarch64_asm + Description: AArch64 rejection sampling of eta=4 secret coefficients + Signature: uint64_t mld_rej_uniform_eta4_aarch64_asm(int32_t *r, const uint8_t *buf, unsigned buflen, const uint8_t *table) + ABI: + Architecture: aarch64 + CallingConvention: AAPCS64 + x0: + type: buffer + size_bytes: 1024 + permissions: write-only + c_parameter: int32_t *r + description: Output buffer (256 x int32_t) + x1: + type: buffer + size_bytes: x2 + permissions: read-only + c_parameter: const uint8_t *buf + description: Input buffer + x2: + type: scalar + c_parameter: unsigned buflen + description: Length of input buffer (must be a multiple of 8) + test_with: 272 # MLD_AARCH64_REJ_UNIFORM_ETA4_BUFLEN + x3: + type: buffer + size_bytes: 4096 + permissions: read-only + c_parameter: const uint8_t *table + description: Lookup table (4096 x uint8_t) +*/ + /* * WARNING: This file is auto-derived from the mldsa-native source file diff --git a/proofs/hol_light/x86_64/mldsa/intt_avx2_asm.S b/proofs/hol_light/x86_64/mldsa/intt_avx2_asm.S index 3544da16f..d3172ba85 100644 --- a/proofs/hol_light/x86_64/mldsa/intt_avx2_asm.S +++ b/proofs/hol_light/x86_64/mldsa/intt_avx2_asm.S @@ -18,6 +18,28 @@ * AVX2 Dilithium implementation @[REF_AVX2]. */ +/*yaml + Name: invntt_avx2_asm + Description: x86_64 AVX2 inverse NTT + Signature: void mld_invntt_avx2_asm(int32_t *r, const int32_t *qdata) + ABI: + Architecture: x86_64 + CallingConvention: SysV + Features: [AVX2] + rdi: + type: buffer + size_bytes: 1024 + permissions: read/write + c_parameter: int32_t *r + description: Input/output polynomial (256 x int32_t) + rsi: + type: buffer + size_bytes: 2496 + permissions: read-only + c_parameter: const int32_t *qdata + description: Precomputed constants (624 x int32_t) +*/ + /* * WARNING: This file is auto-derived from the mldsa-native source file diff --git a/proofs/hol_light/x86_64/mldsa/keccak_f1600_x4_avx2_asm.S b/proofs/hol_light/x86_64/mldsa/keccak_f1600_x4_avx2_asm.S index 49b6f58ae..4087d9a7a 100644 --- a/proofs/hol_light/x86_64/mldsa/keccak_f1600_x4_avx2_asm.S +++ b/proofs/hol_light/x86_64/mldsa/keccak_f1600_x4_avx2_asm.S @@ -3,6 +3,40 @@ * Copyright (c) The mldsa-native project authors * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT */ +/*yaml + Name: keccak_f1600_x4_avx2_asm + Description: x86_64 AVX2 Keccak-f[1600] permutation for four sequential states + Signature: void mld_keccak_f1600_x4_avx2_asm(uint64_t states[100], const uint64_t rc[24], const uint64_t rho8[4], const uint64_t rho56[4]) + ABI: + Architecture: x86_64 + CallingConvention: SysV + Features: [AVX2] + rdi: + type: buffer + size_bytes: 800 + permissions: read/write + c_parameter: uint64_t states[100] + description: Four sequential Keccak states (4 x 25 x uint64_t) + rsi: + type: buffer + size_bytes: 192 + permissions: read-only + c_parameter: const uint64_t rc[24] + description: Round constants (24 x uint64_t) + rdx: + type: buffer + size_bytes: 32 + permissions: read-only + c_parameter: const uint64_t rho8[4] + description: Rotation constant rho8 (4 x uint64_t) + rcx: + type: buffer + size_bytes: 32 + permissions: read-only + c_parameter: const uint64_t rho56[4] + description: Rotation constant rho56 (4 x uint64_t) +*/ + /* diff --git a/proofs/hol_light/x86_64/mldsa/ntt_avx2_asm.S b/proofs/hol_light/x86_64/mldsa/ntt_avx2_asm.S index b46565f07..dfc91107f 100644 --- a/proofs/hol_light/x86_64/mldsa/ntt_avx2_asm.S +++ b/proofs/hol_light/x86_64/mldsa/ntt_avx2_asm.S @@ -18,6 +18,28 @@ * AVX2 Dilithium implementation @[REF_AVX2]. */ +/*yaml + Name: ntt_avx2_asm + Description: x86_64 AVX2 forward NTT + Signature: void mld_ntt_avx2_asm(int32_t *r, const int32_t *qdata) + ABI: + Architecture: x86_64 + CallingConvention: SysV + Features: [AVX2] + rdi: + type: buffer + size_bytes: 1024 + permissions: read/write + c_parameter: int32_t *r + description: Input/output polynomial (256 x int32_t) + rsi: + type: buffer + size_bytes: 2496 + permissions: read-only + c_parameter: const int32_t *qdata + description: Precomputed constants (624 x int32_t) +*/ + /* * WARNING: This file is auto-derived from the mldsa-native source file diff --git a/proofs/hol_light/x86_64/mldsa/nttunpack_avx2_asm.S b/proofs/hol_light/x86_64/mldsa/nttunpack_avx2_asm.S index 25603baa8..5fe391e04 100644 --- a/proofs/hol_light/x86_64/mldsa/nttunpack_avx2_asm.S +++ b/proofs/hol_light/x86_64/mldsa/nttunpack_avx2_asm.S @@ -18,6 +18,22 @@ * AVX2 Dilithium implementation @[REF_AVX2]. */ +/*yaml + Name: nttunpack_avx2_asm + Description: x86_64 AVX2 NTT coefficient unpacking/permutation + Signature: void mld_nttunpack_avx2_asm(int32_t *r) + ABI: + Architecture: x86_64 + CallingConvention: SysV + Features: [AVX2] + rdi: + type: buffer + size_bytes: 1024 + permissions: read/write + c_parameter: int32_t *r + description: Input/output polynomial (256 x int32_t) +*/ + /* * WARNING: This file is auto-derived from the mldsa-native source file diff --git a/proofs/hol_light/x86_64/mldsa/pointwise_acc_l4_avx2_asm.S b/proofs/hol_light/x86_64/mldsa/pointwise_acc_l4_avx2_asm.S index 29102bd5a..5e06fed37 100644 --- a/proofs/hol_light/x86_64/mldsa/pointwise_acc_l4_avx2_asm.S +++ b/proofs/hol_light/x86_64/mldsa/pointwise_acc_l4_avx2_asm.S @@ -17,6 +17,40 @@ * AVX2 Dilithium implementation @[REF_AVX2]. */ +/*yaml + Name: pointwise_acc_l4_avx2_asm + Description: x86_64 AVX2 pointwise multiply-accumulate of length-4 polynomial vectors + Signature: void mld_pointwise_acc_l4_avx2_asm(int32_t *c, const int32_t a[4][256], const int32_t b[4][256], const int32_t *qdata) + ABI: + Architecture: x86_64 + CallingConvention: SysV + Features: [AVX2] + rdi: + type: buffer + size_bytes: 1024 + permissions: write-only + c_parameter: int32_t *c + description: Output polynomial (256 x int32_t) + rsi: + type: buffer + size_bytes: 4096 + permissions: read-only + c_parameter: const int32_t a[4][256] + description: Input polynomial vector a (4 x 256 x int32_t) + rdx: + type: buffer + size_bytes: 4096 + permissions: read-only + c_parameter: const int32_t b[4][256] + description: Input polynomial vector b (4 x 256 x int32_t) + rcx: + type: buffer + size_bytes: 2496 + permissions: read-only + c_parameter: const int32_t *qdata + description: Precomputed constants (624 x int32_t) +*/ + /* * WARNING: This file is auto-derived from the mldsa-native source file diff --git a/proofs/hol_light/x86_64/mldsa/pointwise_acc_l5_avx2_asm.S b/proofs/hol_light/x86_64/mldsa/pointwise_acc_l5_avx2_asm.S index a398504f4..262a918e7 100644 --- a/proofs/hol_light/x86_64/mldsa/pointwise_acc_l5_avx2_asm.S +++ b/proofs/hol_light/x86_64/mldsa/pointwise_acc_l5_avx2_asm.S @@ -17,6 +17,40 @@ * AVX2 Dilithium implementation @[REF_AVX2]. */ +/*yaml + Name: pointwise_acc_l5_avx2_asm + Description: x86_64 AVX2 pointwise multiply-accumulate of length-5 polynomial vectors + Signature: void mld_pointwise_acc_l5_avx2_asm(int32_t *c, const int32_t a[5][256], const int32_t b[5][256], const int32_t *qdata) + ABI: + Architecture: x86_64 + CallingConvention: SysV + Features: [AVX2] + rdi: + type: buffer + size_bytes: 1024 + permissions: write-only + c_parameter: int32_t *c + description: Output polynomial (256 x int32_t) + rsi: + type: buffer + size_bytes: 5120 + permissions: read-only + c_parameter: const int32_t a[5][256] + description: Input polynomial vector a (5 x 256 x int32_t) + rdx: + type: buffer + size_bytes: 5120 + permissions: read-only + c_parameter: const int32_t b[5][256] + description: Input polynomial vector b (5 x 256 x int32_t) + rcx: + type: buffer + size_bytes: 2496 + permissions: read-only + c_parameter: const int32_t *qdata + description: Precomputed constants (624 x int32_t) +*/ + /* * WARNING: This file is auto-derived from the mldsa-native source file diff --git a/proofs/hol_light/x86_64/mldsa/pointwise_acc_l7_avx2_asm.S b/proofs/hol_light/x86_64/mldsa/pointwise_acc_l7_avx2_asm.S index 7d4784766..01b645fc4 100644 --- a/proofs/hol_light/x86_64/mldsa/pointwise_acc_l7_avx2_asm.S +++ b/proofs/hol_light/x86_64/mldsa/pointwise_acc_l7_avx2_asm.S @@ -17,6 +17,40 @@ * AVX2 Dilithium implementation @[REF_AVX2]. */ +/*yaml + Name: pointwise_acc_l7_avx2_asm + Description: x86_64 AVX2 pointwise multiply-accumulate of length-7 polynomial vectors + Signature: void mld_pointwise_acc_l7_avx2_asm(int32_t *c, const int32_t a[7][256], const int32_t b[7][256], const int32_t *qdata) + ABI: + Architecture: x86_64 + CallingConvention: SysV + Features: [AVX2] + rdi: + type: buffer + size_bytes: 1024 + permissions: write-only + c_parameter: int32_t *c + description: Output polynomial (256 x int32_t) + rsi: + type: buffer + size_bytes: 7168 + permissions: read-only + c_parameter: const int32_t a[7][256] + description: Input polynomial vector a (7 x 256 x int32_t) + rdx: + type: buffer + size_bytes: 7168 + permissions: read-only + c_parameter: const int32_t b[7][256] + description: Input polynomial vector b (7 x 256 x int32_t) + rcx: + type: buffer + size_bytes: 2496 + permissions: read-only + c_parameter: const int32_t *qdata + description: Precomputed constants (624 x int32_t) +*/ + /* * WARNING: This file is auto-derived from the mldsa-native source file diff --git a/proofs/hol_light/x86_64/mldsa/pointwise_avx2_asm.S b/proofs/hol_light/x86_64/mldsa/pointwise_avx2_asm.S index 3b92a9f58..728f21422 100644 --- a/proofs/hol_light/x86_64/mldsa/pointwise_avx2_asm.S +++ b/proofs/hol_light/x86_64/mldsa/pointwise_avx2_asm.S @@ -17,6 +17,34 @@ * AVX2 Dilithium implementation @[REF_AVX2]. */ +/*yaml + Name: pointwise_avx2_asm + Description: x86_64 AVX2 pointwise Montgomery multiplication + Signature: void mld_pointwise_avx2_asm(int32_t *a, const int32_t *b, const int32_t *qdata) + ABI: + Architecture: x86_64 + CallingConvention: SysV + Features: [AVX2] + rdi: + type: buffer + size_bytes: 1024 + permissions: read/write + c_parameter: int32_t *a + description: Input/output polynomial (256 x int32_t) + rsi: + type: buffer + size_bytes: 1024 + permissions: read-only + c_parameter: const int32_t *b + description: Input polynomial (256 x int32_t) + rdx: + type: buffer + size_bytes: 2496 + permissions: read-only + c_parameter: const int32_t *qdata + description: Precomputed constants (624 x int32_t) +*/ + /* * WARNING: This file is auto-derived from the mldsa-native source file diff --git a/proofs/hol_light/x86_64/mldsa/poly_caddq_avx2_asm.S b/proofs/hol_light/x86_64/mldsa/poly_caddq_avx2_asm.S index 79d762074..f546e1046 100644 --- a/proofs/hol_light/x86_64/mldsa/poly_caddq_avx2_asm.S +++ b/proofs/hol_light/x86_64/mldsa/poly_caddq_avx2_asm.S @@ -27,6 +27,22 @@ * Arguments: - int32_t *r: pointer to input/output polynomial **************************************************/ +/*yaml + Name: poly_caddq_avx2_asm + Description: x86_64 AVX2 conditional addition of q to each coefficient + Signature: void mld_poly_caddq_avx2_asm(int32_t *r) + ABI: + Architecture: x86_64 + CallingConvention: SysV + Features: [AVX2] + rdi: + type: buffer + size_bytes: 1024 + permissions: read/write + c_parameter: int32_t *r + description: Input/output polynomial (256 x int32_t) +*/ + diff --git a/proofs/hol_light/x86_64/mldsa/poly_chknorm_avx2_asm.S b/proofs/hol_light/x86_64/mldsa/poly_chknorm_avx2_asm.S index 274ff7c22..639b63a33 100644 --- a/proofs/hol_light/x86_64/mldsa/poly_chknorm_avx2_asm.S +++ b/proofs/hol_light/x86_64/mldsa/poly_chknorm_avx2_asm.S @@ -31,6 +31,27 @@ * Returns: - 1 if any |coefficient| >= B, 0 otherwise. **************************************************/ +/*yaml + Name: poly_chknorm_avx2_asm + Description: x86_64 AVX2 infinity-norm bound check on polynomial coefficients + Signature: int mld_poly_chknorm_avx2_asm(const int32_t *a, int32_t B) + ABI: + Architecture: x86_64 + CallingConvention: SysV + Features: [AVX2] + rdi: + type: buffer + size_bytes: 1024 + permissions: read-only + c_parameter: const int32_t *a + description: Input polynomial (256 x int32_t) + rsi: + type: scalar + c_parameter: int32_t B + description: Norm bound (must be non-negative) + test_with: 131072 # representative non-negative bound (1 << 17) +*/ + diff --git a/proofs/hol_light/x86_64/mldsa/polyz_unpack_17_avx2_asm.S b/proofs/hol_light/x86_64/mldsa/polyz_unpack_17_avx2_asm.S index 2bb921f9e..2844cc28c 100644 --- a/proofs/hol_light/x86_64/mldsa/polyz_unpack_17_avx2_asm.S +++ b/proofs/hol_light/x86_64/mldsa/polyz_unpack_17_avx2_asm.S @@ -29,6 +29,28 @@ * - const uint8_t *a: pointer to packed input (576 bytes) **************************************************/ +/*yaml + Name: polyz_unpack_17_avx2_asm + Description: x86_64 AVX2 unpacking of 17-bit packed coefficients + Signature: void mld_polyz_unpack_17_avx2_asm(int32_t *r, const uint8_t *a) + ABI: + Architecture: x86_64 + CallingConvention: SysV + Features: [AVX2] + rdi: + type: buffer + size_bytes: 1024 + permissions: write-only + c_parameter: int32_t *r + description: Output polynomial (256 x int32_t) + rsi: + type: buffer + size_bytes: 576 + permissions: read-only + c_parameter: const uint8_t *a + description: Packed input bytes +*/ + diff --git a/proofs/hol_light/x86_64/mldsa/polyz_unpack_19_avx2_asm.S b/proofs/hol_light/x86_64/mldsa/polyz_unpack_19_avx2_asm.S index 999977f1c..09c14f3d7 100644 --- a/proofs/hol_light/x86_64/mldsa/polyz_unpack_19_avx2_asm.S +++ b/proofs/hol_light/x86_64/mldsa/polyz_unpack_19_avx2_asm.S @@ -29,6 +29,28 @@ * - const uint8_t *a: pointer to packed input (640 bytes) **************************************************/ +/*yaml + Name: polyz_unpack_19_avx2_asm + Description: x86_64 AVX2 unpacking of 19-bit packed coefficients + Signature: void mld_polyz_unpack_19_avx2_asm(int32_t *r, const uint8_t *a) + ABI: + Architecture: x86_64 + CallingConvention: SysV + Features: [AVX2] + rdi: + type: buffer + size_bytes: 1024 + permissions: write-only + c_parameter: int32_t *r + description: Output polynomial (256 x int32_t) + rsi: + type: buffer + size_bytes: 640 + permissions: read-only + c_parameter: const uint8_t *a + description: Packed input bytes +*/ + diff --git a/scripts/autogen b/scripts/autogen index 3ce73146f..68a1e9b26 100755 --- a/scripts/autogen +++ b/scripts/autogen @@ -330,6 +330,12 @@ def gen_yaml_header(): yield "" +def gen_yaml_autogen_warning(): + yield "# WARNING: This file is auto-generated from scripts/autogen" + yield "# in the mldsa-native repository." + yield "# Do not modify it directly." + + def format_content(content): clang_format_file = os.path.join(os.path.dirname(__file__), "..", ".clang-format") p = subprocess.run( @@ -2302,7 +2308,12 @@ def check_macro_typos_in_file(filename, macro_check): def get_syscaps(): - return ["MLD_SYS_CAP_AVX2", "MLD_SYS_CAP_SHA3", "MLD_SYS_CAP_DUMMY"] + return [ + "MLD_SYS_CAP_AVX2", + "MLD_SYS_CAP_SHA3", + "MLD_SYS_CAP_MVE", + "MLD_SYS_CAP_DUMMY", + ] def check_macro_typos(): @@ -2669,6 +2680,35 @@ def gen_hol_light_asm_file(job): ) +def extract_yaml_from_assembly(assembly_file): + """Extract YAML metadata from assembly file.""" + with open(assembly_file, "r") as f: + content = f.read() + + yaml_match = re.search(r"/\*yaml\s*\n(.*?)\n\*/", content, re.DOTALL) + if not yaml_match: + raise ValueError(f"No YAML metadata found in {assembly_file}") + + return yaml.safe_load(yaml_match.group(1)) + + +# Map dev/ source directory to the corresponding upstream under mldsa/. +def upstream_src_dir_for_dev_dir(dev_dir): + if dev_dir.startswith("dev/fips202/"): + # dev/fips202//src -> mldsa/src/fips202/native//src + arch = dev_dir.split("/")[2] + return f"mldsa/src/fips202/native/{arch}/src" + if dev_dir.startswith("dev/aarch64_"): + # dev/aarch64_opt/src and dev/aarch64_clean/src map to the same tree + return "mldsa/src/native/aarch64/src" + if dev_dir.startswith("dev/x86_64/"): + return "mldsa/src/native/x86_64/src" + raise ValueError( + f"Unrecognised dev/ source directory {dev_dir!r}; " + "extend upstream_src_dir_for_dev_dir() to map it to mldsa/src/..." + ) + + def hol_light_asm_joblist(): aarch64_flags = "-march=armv8.4-a+sha3" joblist_aarch64 = [ @@ -4106,6 +4146,588 @@ def gen_test_vectors(msg, ctx): ) +# ABI checker capability table. +# +# Maps each `ABI.Features:` tag from the kernel YAMLs to: +# - 'asm_subdir': the arch this cap belongs to; selects the per-arch +# test/abicheck//abicheck_.mk it is +# emitted into. +# - 'guard': if non-None, wrap the generated check in #if defined(...). +# - 'syscap': runtime capability; check skips if the host lacks it. +# - 'cflags': for the autogeneration of the per-arch abicheck_.mk. +# - 'description': shown when a check is skipped. +# - 'aux_files': extra .S files needing these CFLAGS though not generated +# kernel exports (hand-written abicheck stubs/selftests, and +# sources whose YAML doesn't list them as ABI-tested kernels +# but still need the per-cap toolchain flags). +ABI_CAPS = { + "AVX2": { + "asm_subdir": "x86_64", + "guard": "__AVX2__", + "syscap": "MLD_SYS_CAP_AVX2", + "description": "AVX2", + "cflags": ["-mavx2", "-mbmi2"], + "aux_files": [], + }, + "SHA3": { + "asm_subdir": "aarch64", + "guard": "__ARM_FEATURE_SHA3", + "syscap": "MLD_SYS_CAP_SHA3", + "description": "Armv8.4-A SHA3 (eor3, rax1, xar, bcax)", + "cflags": ["-march=armv8.4-a+sha3"], + "aux_files": [], + }, + "MVE": { + "asm_subdir": "armv81m", + "guard": "__ARM_FEATURE_MVE", + "syscap": "MLD_SYS_CAP_MVE", + "description": "Armv8.1-M MVE", + "cflags": ["-march=armv8.1-m.main+mve", "-mthumb"], + "aux_files": [ + "test/abicheck/armv81m/callstub_armv81m.S", + "test/abicheck/armv81m/selftest_armv81m.S", + # Shipped MVE helpers pulled in by the abicheck wildcard but + # not registered as ABI-tested kernels - they still need MVE + # CFLAGS to assemble. + "mldsa/src/fips202/native/armv81m/src/state_extract_bytes_x4_mve.S", + "mldsa/src/fips202/native/armv81m/src/state_xor_bytes_x4_mve.S", + ], + }, +} + + +# ABI checker config, keyed by the (ABI.Architecture, ABI.CallingConvention) +# pair each kernel YAML declares. +ARCH_CALLINGS = { + ("aarch64", "AAPCS64"): { + "arch_guard": "MLD_SYS_AARCH64", + "extra_guards": [], + "asm_subdir": "aarch64", + "state_type": "struct aarch64_register_state", + # GPR-slot integer type. Used to cast pointer-arg buffers into the + # input state without introducing an intermediate type (avoiding + # uintptr_t / size_t which we don't use elsewhere in mldsa-native). + "gpr_int_type": "uint64_t", + "fn_qualifier": "", + "init_func": "init_aarch64_register_state", + "stub_func": "asm_call_stub_aarch64", + "check_func": "check_aarch64_aapcs_compliance", + # YAML register xN maps to gpr[N]. + "reg_to_field": lambda reg: f"gpr[{int(reg[1:])}]", + }, + ("x86_64", "SysV"): { + "arch_guard": "MLD_SYS_X86_64", + "extra_guards": ["MLD_SYSV_ABI_SUPPORTED"], + "asm_subdir": "x86_64", + "state_type": "struct x86_64_register_state", + "gpr_int_type": "uint64_t", + # MLD_SYSV_ABI is `__attribute__((sysv_abi))` on Windows-MinGW + # (where Microsoft x64 is the default convention) and empty on + # ELF/Mach-O x86_64. Matches the library's own kernel decls in + # mldsa/src/native/x86_64/src/arith_native_x86_64.h. + "fn_qualifier": "MLD_SYSV_ABI", + "init_func": "init_x86_64_register_state", + "stub_func": "asm_call_stub_x86_64_sysv", + "check_func": "check_x86_64_sysv_compliance", + "reg_to_field": lambda reg: reg, + }, + ("armv81m", "AAPCS32"): { + "arch_guard": "MLD_SYS_ARMV81M_MVE", + "extra_guards": [], + "asm_subdir": "armv81m", + "state_type": "struct armv81m_register_state", + "gpr_int_type": "uint32_t", + "fn_qualifier": "", + "init_func": "init_armv81m_register_state", + "stub_func": "asm_call_stub_armv81m", + "check_func": "check_armv81m_aapcs32_compliance", + # AAPCS32 GPRs r0-r12 map directly to gpr[0..12]. + "reg_to_field": lambda reg: f"gpr[{int(reg[1:])}]", + }, +} + + +def _abi_block(yaml_data, dev_source): + """Return the YAML 'ABI:' block, raising on missing/wrong type.""" + abi = yaml_data.get("ABI") + if abi is None: + raise ValueError(f"{dev_source}: YAML missing 'ABI:' block") + if not isinstance(abi, dict): + raise ValueError( + f"{dev_source}: YAML 'ABI:' must be a mapping, got {type(abi).__name__}" + ) + return abi + + +def kernel_arch_calling(yaml_data, dev_source): + """Return the (arch, calling) pair from a kernel's YAML; both are required + and must form a key in ARCH_CALLINGS.""" + abi = _abi_block(yaml_data, dev_source) + arch = abi.get("Architecture") + calling = abi.get("CallingConvention") + if arch is None or calling is None: + raise ValueError( + f"{dev_source}: YAML 'ABI:' must set both 'Architecture:' and " + f"'CallingConvention:' (got Architecture={arch!r}, " + f"CallingConvention={calling!r})" + ) + if (arch, calling) not in ARCH_CALLINGS: + known = ", ".join(f"({a},{c})" for (a, c) in sorted(ARCH_CALLINGS)) + raise ValueError( + f"{dev_source}: unknown (Architecture, CallingConvention) pair " + f"({arch!r}, {calling!r}); expected one of: {known}. " + f"Add it to ARCH_CALLINGS in scripts/autogen." + ) + return (arch, calling) + + +def kernel_features(yaml_data, dev_source): + """Return the 'ABI.Features:' list (capability tags, keys of ABI_CAPS); + missing/empty means no requirement, unknown tags raise.""" + abi = _abi_block(yaml_data, dev_source) + feats = abi.get("Features") or [] + if not isinstance(feats, list): + raise ValueError( + f"{dev_source}: YAML 'ABI.Features:' must be a list, " + f"got {type(feats).__name__}" + ) + for r in feats: + if r not in ABI_CAPS: + known = ", ".join(sorted(ABI_CAPS.keys())) + raise ValueError( + f"{dev_source}: unknown feature {r!r} in 'ABI.Features:'; " + f"expected one of: {known}. Add it to ABI_CAPS in scripts/autogen." + ) + return feats + + +def kernel_registers(yaml_data, dev_source): + """Return the per-register entries in the kernel's 'ABI:' block: the direct + children carrying a `type:` field (e.g. `x0:`, `rdi:`), skipping the scalar + metadata keys.""" + abi = _abi_block(yaml_data, dev_source) + abi_metadata_keys = {"Architecture", "CallingConvention", "Features"} + regs = {} + for key, value in abi.items(): + if key in abi_metadata_keys: + continue + if not isinstance(value, dict) or "type" not in value: + raise ValueError( + f"{dev_source}: unexpected key {key!r} in 'ABI:' block; " + f"expected one of {sorted(abi_metadata_keys)} or a register " + f"entry with a 'type:' field" + ) + regs[key] = value + return regs + + +def resolve_buffer_size(reg_info, abi_data): + """Resolve buffer size from register info, handling cross-references.""" + size = reg_info.get("size_bytes") + if isinstance(size, str): + if size in abi_data: + ref_reg = abi_data[size] + if ref_reg.get("type") == "scalar" and "test_with" in ref_reg: + return ref_reg["test_with"] + raise ValueError(f"Buffer {reg_info} references non-scalar register {size}") + return int(size) + elif isinstance(size, int): + return size + raise ValueError(f"Cannot resolve buffer size from {reg_info}") + + +def gen_abicheck(): + """Generate ABI checker tests""" + + # Sources the ABI checker covers in addition to those exported for + # HOL-Light proofs (armv81m). Shape matches hol_light_asm_joblist + # (basename, dev_dir, _cflags, arch); the cflags slot is unused here. + joblist_abicheck_extra = [ + ("keccak_f1600_x4_mve.S", "dev/fips202/armv81m/src", "", "armv81m"), + ] + joblist = hol_light_asm_joblist() + joblist_abicheck_extra + + # Bucket joblist entries by their YAML-declared (arch, calling) pair so a + # single dev source file is fully self-describing. + jobs_by_arch_calling = {key: [] for key in ARCH_CALLINGS} + # Map cap -> sorted list of .S files that require it. Drives the + # generated per-arch abicheck_.mk at the end of this function. + cap_to_files = {cap: [] for cap in ABI_CAPS} + + for entry in sorted(joblist, key=lambda j: j[0]): + assembly_basename, dev_src_dir, _cflags, joblist_arch = entry + dev_source = f"{dev_src_dir}/{assembly_basename}" + yaml_data = extract_yaml_from_assembly(dev_source) + arch_calling = kernel_arch_calling(yaml_data, dev_source) + # Sanity: the joblist directory architecture should match the YAML. + if arch_calling[0] != joblist_arch: + raise ValueError( + f"{dev_source}: YAML 'ABI.Architecture: {arch_calling[0]}' " + f"disagrees with joblist arch {joblist_arch!r}" + ) + upstream_src_dir = upstream_src_dir_for_dev_dir(dev_src_dir) + for cap in kernel_features(yaml_data, dev_source): + cap_to_files[cap].append(f"{upstream_src_dir}/{assembly_basename}") + jobs_by_arch_calling[arch_calling].append((entry, yaml_data)) + + # Per-(arch, calling) generated metadata used to assemble checks_all.h: + # list of (check_name, arch_flags, extra_guards) tuples plus the + # arch_guard / extra_guards keying. + all_groups = [] # list of (arch_calling, arch_guard, extra_guards, [(name, arch_flags)]) + + # Per-arch-subdir set of check_*.c basenames just generated. Used after the + # main loop to delete stale check_*.c left over from a renamed/removed + # kernel (mirrors synchronize_backend's delete= path). + generated_basenames_by_subdir = {} + + for arch_calling, jobs in jobs_by_arch_calling.items(): + config = ARCH_CALLINGS[arch_calling] + arch_guard = config["arch_guard"] + extra_guards = config["extra_guards"] + generated_functions = [] + generated_basenames_by_subdir.setdefault(config["asm_subdir"], set()) + + for (assembly_basename, dev_src_dir, _cflags, _joblist_arch), yaml_data in jobs: + dev_source = f"{dev_src_dir}/{assembly_basename}" + features = kernel_features(yaml_data, dev_source) + registers = kernel_registers(yaml_data, dev_source) + # Compile-time guards derived from Features. Each capability with a + # 'guard' (e.g. __AVX2__ for AVX2, __ARM_FEATURE_SHA3 for SHA3) + # contributes an #if term mirroring the backend's own compile-time + # gate; runtime gating via mld_sys_check_capability is + # additional, not a replacement. + feature_guards = [ + ABI_CAPS[f]["guard"] for f in features if ABI_CAPS[f]["guard"] + ] + + function_name = yaml_data.get("Name") + c_function_name = "mld_" + function_name + # The YAML Name already encodes the architecture (e.g. + # ntt_aarch64_asm, ntt_avx2_asm), so use it directly rather than + # appending the architecture suffix again. + check_name = function_name + + def gen_c_test( + function_name=function_name, + check_name=check_name, + c_function_name=c_function_name, + yaml_data=yaml_data, + features=features, + registers=registers, + feature_guards=feature_guards, + config=config, + arch_guard=arch_guard, + extra_guards=extra_guards, + ): + yield from gen_header() + yield "#include " + yield "" + yield f'#include "../abicheck_{config["asm_subdir"]}.h"' + yield f'#include "../checks_{config["asm_subdir"]}_all.h"' + yield "" + + # Full guard set: architecture macro, then any calling- + # convention-specific guards (e.g. MLD_SYSV_ABI_SUPPORTED for + # x86_64 SysV), then any feature-derived guards (e.g. + # __ARM_FEATURE_SHA3 for SHA3 kernels). + conds = ( + [f"defined({arch_guard})"] + + [f"defined({g})" for g in extra_guards] + + [f"defined({g})" for g in feature_guards] + ) + cond = " && ".join(conds) + + yield f"#if {cond}" + yield "" + yield '#include "../../../notrandombytes/notrandombytes.h"' + yield "" + yield f"typedef {config['state_type']} reg_state;" + yield "" + qualifier = config["fn_qualifier"] + yield f"{qualifier} {yaml_data.get('Signature')};" + yield "" + yield f"int check_{check_name}(void)" + yield "{" + + yield " int test_iter;" + yield " reg_state input_state, output_state;" + yield " int violations;" + + sorted_registers = sorted(registers.items()) + + buffer_info = [] + for reg_name, reg_info in sorted_registers: + if reg_info.get("type") == "buffer": + size_bytes = resolve_buffer_size(reg_info, registers) + buffer_name = f"buf_{reg_name}" + description = reg_info.get("description", "") + yield f" MLD_ALIGN uint8_t {buffer_name}[{size_bytes}]; /* {description} */" + buffer_info.append((reg_name, buffer_name, size_bytes)) + + # Emit a runtime mld_sys_check_capability() skip (returning + # MLD_ABICHECK_SKIPPED) per Features cap that has a syscap; caps + # without a syscap (build-time-only) are no-ops here. + # Emitted after the buffer decls to keep C90 mixed-decl-and-code happy. + for cap in features: + cap_enum = ABI_CAPS[cap]["syscap"] + if cap_enum is None: + continue + cap_desc = ABI_CAPS[cap]["description"] + yield "" + yield f" if (!mld_sys_check_capability({cap_enum}))" + yield " {" + yield ( + f' fprintf(stderr, "ABI check {check_name}: ' + f'host lacks {cap_desc}, skipping\\n");' + ) + yield " return MLD_ABICHECK_SKIPPED;" + yield " }" + + yield "" + yield " for (test_iter = 0; test_iter < MLD_ABICHECK_NUM_TESTS; test_iter++)" + yield " {" + yield " /* Initialize random register state */" + yield f" {config['init_func']}(&input_state);" + yield "" + + for reg_name, buffer_name, size_bytes in buffer_info: + yield f" randombytes({buffer_name}, {size_bytes});" + + yield "" + yield " /* Set up register state for function arguments */" + + reg_to_field = config["reg_to_field"] + for reg_name, reg_info in sorted_registers: + field = reg_to_field(reg_name) + if reg_info.get("type") == "buffer": + # Cast pointer to gpr_int_type (uint64_t / uint32_t on + # armv81m) so the conversion lands at the slot width. + yield f" input_state.{field} = ({config['gpr_int_type']}){f'buf_{reg_name}'};" + elif reg_info.get("type") == "scalar": + test_with = reg_info.get("test_with") + if test_with: + yield f" input_state.{field} = {test_with};" + + yield "" + yield " /* Call function through ABI test stub */" + yield ( + f" {config['stub_func']}(&input_state, &output_state, " + f"({qualifier} void (*)(void)){c_function_name});" + ) + yield "" + yield " /* Check ABI compliance */" + yield ( + f" violations = {config['check_func']}(" + f"&input_state, &output_state, MLD_ABICHECK_VERBOSE);" + ) + yield " if (violations > 0) {" + yield f' fprintf(stderr, "ABI test FAILED for {function_name} (iteration %d): %d violations\\n",' + yield " test_iter + 1, violations);" + yield " return MLD_ABICHECK_FAILED;" + yield " }" + yield " }" + yield "" + yield " return MLD_ABICHECK_PASSED;" + yield "}" + yield "" + # No #else: components.mk only globs check_*.c from the + # active arch's checks/ directory, and the forward decl in + # checks__all.h is gated on the same `cond`, so an + # inactive-arch fallback would be unreachable. + yield f"#endif /* {cond} */" + yield "" + + output_file = ( + f"test/abicheck/{config['asm_subdir']}/checks/check_{check_name}.c" + ) + update_file(output_file, "\n".join(gen_c_test()), force_format=True) + generated_functions.append((check_name, feature_guards)) + generated_basenames_by_subdir[config["asm_subdir"]].add( + f"check_{check_name}.c" + ) + + all_groups.append((arch_calling, arch_guard, extra_guards, generated_functions)) + + # Prune stale check_*.c left from a renamed/removed kernel. components.mk + # globs check_*.c into the abicheck build, so a stale file would silently + # compile and link against an out-of-date YAML signature. Mirrors the + # delete= path in synchronize_backend. + for asm_subdir, generated_basenames in generated_basenames_by_subdir.items(): + subdir = f"test/abicheck/{asm_subdir}/checks" + for f in get_files(f"{subdir}/check_*.c"): + if os.path.basename(f) not in generated_basenames: + update_via_remove(f) + + # Generate per-arch checks__all.h headers, one per arch_guard. Each + # header lives at test/abicheck//checks__all.h (one + # level above that arch's check_*.c sources in checks/) and is selected + # from abicheck.c via a single #if-chain on the arch guards. + # + # Group the per-(arch, calling) rows by arch_guard, so kernels for the + # same architecture but different calling conventions share one + # all_checks[] array on that arch (gated per-entry on the calling + # convention's extra_guards). Today each arch has only one calling + # convention, but this keeps the door open to e.g. x86_64 + SysV plus + # x86_64 + MSx64 coexisting in the same build. + groups_by_arch = {} + asm_subdir_by_arch = {} + for arch_calling, arch_guard, extra_guards, functions in all_groups: + groups_by_arch.setdefault(arch_guard, []).append( + (arch_calling, extra_guards, functions) + ) + asm_subdir_by_arch[arch_guard] = ARCH_CALLINGS[arch_calling]["asm_subdir"] + + def gen_checks_arch_header(arch_guard, groups, asm_subdir): + guard_macro = f"MLD_TEST_ABICHECK_CHECKS_{asm_subdir.upper()}_ALL_H" + yield from gen_header() + yield "" + yield f"#ifndef {guard_macro}" + yield f"#define {guard_macro}" + yield "" + yield "#include " + yield '#include "../abicheck_common.h"' + yield "" + + # Wrap the whole body in the arch guard so accidental inclusion on a + # foreign architecture is harmless (this header is selected by an + # #if-chain in abicheck.c, but defense in depth). + yield f"#if defined({arch_guard})" + yield "" + + # Emit one row per function, consolidating consecutive functions that + # share the same feature_guards into a single #if/#endif block (rather + # than one block per function). This collapses a uniform-feature group + # (e.g. x86_64, where every kernel is gated on __AVX2__) into a single + # block, while still splitting where guards differ (e.g. aarch64, where + # SHA3 and non-SHA3 kernels are interleaved). + def emit_feature_grouped(functions, render): + i = 0 + while i < len(functions): + guards = functions[i][1] + j = i + while j < len(functions) and functions[j][1] == guards: + j += 1 + for g in guards: + yield f"#if defined({g})" + for func_name, _ in functions[i:j]: + yield render(func_name) + for _ in guards: + yield "#endif" + i = j + + # Forward-declare each check_(void) for kernels in THIS arch. + # Gate on the same extra+feature guards as the registry entry below; + # this matches the #if guarding the function definition in + # check_.c, so the inactive-arch fallback can be omitted there. + for _arch_calling, extra_guards, functions in groups: + if extra_guards: + inner = " && ".join(f"defined({g})" for g in extra_guards) + yield f"#if {inner}" + yield from emit_feature_grouped( + functions, lambda fn: f"int check_{fn}(void);" + ) + if extra_guards: + yield f"#endif /* {inner} */" + + if any(functions for _, _, functions in groups): + yield "" + yield "static const abicheck_entry_t all_checks[] = {" + + for _arch_calling, extra_guards, functions in groups: + if not functions: + continue + if extra_guards: + inner = " && ".join(f"defined({g})" for g in extra_guards) + yield f"#if {inner}" + yield from emit_feature_grouped( + functions, lambda fn: f' {{"{fn}", check_{fn}}},' + ) + if extra_guards: + yield f"#endif /* {inner} */" + + yield " {NULL, NULL} /* Sentinel */" + yield "};" + + yield "" + yield f"#endif /* defined({arch_guard}) */" + yield "" + yield f"#endif /* !{guard_macro} */" + yield "" + + for arch_guard, groups in groups_by_arch.items(): + asm_subdir = asm_subdir_by_arch[arch_guard] + update_file( + f"test/abicheck/{asm_subdir}/checks_{asm_subdir}_all.h", + "\n".join(gen_checks_arch_header(arch_guard, groups, asm_subdir)), + force_format=True, + ) + + # Emit per-capability CFLAGS injection for the build, split per arch: + # one test/abicheck//abicheck_.mk per arch with caps, plus a + # top-level test/abicheck/abicheck.mk that includes them all. Single source + # of truth alongside the C-time guard (arch_flags) and runtime cap check. + caps_by_subdir = {} + for cap in sorted(ABI_CAPS): + caps_by_subdir.setdefault(ABI_CAPS[cap]["asm_subdir"], []).append(cap) + + def gen_features_mk(asm_subdir, caps): + yield from gen_yaml_header() + yield from gen_yaml_autogen_warning() + yield "#" + yield "# Edit the YAML 'ABI.Features:' list in dev//src/.S" + yield "# and re-run scripts/autogen instead." + yield "#" + yield "# For each capability declared by a kernel's ABI.Features list, this" + yield "# file appends the capability's CFLAGS to that kernel's .S object" + yield "# under mldsa/src/." + yield "" + yield "# Default each cap's file list to empty so the unconditional appends" + yield "# below are safe even when a cap has no kernels on this arch." + for cap in caps: + yield f"ABICHECK_REQ_{cap}_FILES :=" + for cap in caps: + # Files needing this cap's CFLAGS are: the kernel exports declared + # by YAML, plus any always-on auxiliary files (callstub/selftest) + # listed in ABI_CAPS[cap]['aux_files']. + files = sorted( + set(cap_to_files[cap]) | set(ABI_CAPS[cap].get("aux_files", [])) + ) + if not files: + continue + cflags = " ".join(ABI_CAPS[cap]["cflags"]) + yield "" + yield f"# {cap}: {ABI_CAPS[cap]['description']}" + yield f"ABICHECK_REQ_{cap}_FILES := \\" + for i, f in enumerate(files): + sep = " \\" if i + 1 < len(files) else "" + yield f" {f}{sep}" + yield ( + f"ABICHECK_REQ_{cap}_OBJS := " + f"$(call MAKE_OBJS,$(ABICHECK_DIR),$(ABICHECK_REQ_{cap}_FILES))" + ) + yield f"$(ABICHECK_REQ_{cap}_OBJS): CFLAGS += {cflags}" + yield "" + + for asm_subdir, caps in caps_by_subdir.items(): + update_file( + f"test/abicheck/{asm_subdir}/abicheck_{asm_subdir}.mk", + "\n".join(gen_features_mk(asm_subdir, caps)), + ) + + def gen_abicheck_mk(): + yield from gen_yaml_header() + yield from gen_yaml_autogen_warning() + yield "#" + yield "# Includes the per-arch abicheck_.mk, each of which appends" + yield "# its capabilities' CFLAGS to the matching .S objects." + yield "" + for asm_subdir in sorted(caps_by_subdir): + yield f"include test/abicheck/{asm_subdir}/abicheck_{asm_subdir}.mk" + yield "" + + update_file("test/abicheck/abicheck.mk", "\n".join(gen_abicheck_mk())) + + def _main(): slothy_choices = [ "ntt_aarch64_asm", @@ -4264,6 +4886,7 @@ def _main(): args.test_vectors, ), ("Check macro typos", check_macro_typos), + ("Generate ABI checker tests", gen_abicheck), ("Generate preprocessor comments", gen_preprocessor_comments), # Formatting should be the last step ("Format files", lambda: format_files(args.dry_run)), diff --git a/scripts/tests b/scripts/tests index c6f1cad32..14360b2cf 100755 --- a/scripts/tests +++ b/scripts/tests @@ -216,6 +216,7 @@ class TEST_TYPES(Enum): BASIC_LOWRAM = 21 RNG_FAIL = 22 WYCHEPROOF = 23 + ABICHECK = 24 def is_benchmark(self): return self in [TEST_TYPES.BENCH, TEST_TYPES.BENCH_COMPONENTS] @@ -299,6 +300,8 @@ class TEST_TYPES(Enum): return "Alloc Test" if self == TEST_TYPES.RNG_FAIL: return "RNG Failure Test" + if self == TEST_TYPES.ABICHECK: + return "ABI Compliance Test" def make_dir(self): if self == TEST_TYPES.BRING_YOUR_OWN_FIPS202: @@ -374,6 +377,8 @@ class TEST_TYPES(Enum): return "alloc" if self == TEST_TYPES.RNG_FAIL: return "rng_fail" + if self == TEST_TYPES.ABICHECK: + return "abicheck" def make_run_target(self, scheme): t = self.make_target() @@ -699,6 +704,17 @@ class Tests: self.check_fail() + def abicheck(self): + """Run ABI compliance tests for assembly functions.""" + if not self.do_opt(): + return + + self._compile_schemes(TEST_TYPES.ABICHECK, True) + if self.args.run: + self._run_scheme(TEST_TYPES.ABICHECK, True, None) + + self.check_fail() + def acvp(self): def _acvp(opt): self._compile_schemes(TEST_TYPES.ACVP, opt) @@ -848,6 +864,7 @@ class Tests: unit = self.args.unit alloc = self.args.alloc rng_fail = self.args.rng_fail + abicheck = self.args.abicheck def _all(opt): if func is True: @@ -866,6 +883,8 @@ class Tests: self._compile_schemes(TEST_TYPES.ALLOC, opt) if rng_fail is True: self._compile_schemes(TEST_TYPES.RNG_FAIL, opt) + if abicheck is True and opt: + self._compile_schemes(TEST_TYPES.ABICHECK, opt) if self.args.check_namespace is True: p = subprocess.run( @@ -895,6 +914,8 @@ class Tests: self._run_schemes(TEST_TYPES.ALLOC, opt) if rng_fail is True: self._run_schemes(TEST_TYPES.RNG_FAIL, opt) + if abicheck is True and opt: + self._run_scheme(TEST_TYPES.ABICHECK, opt, None) if self.do_no_opt(): _all(False) @@ -1320,6 +1341,21 @@ def cli(): help="Do not run RNG failure tests", ) + abicheck_group = all_parser.add_mutually_exclusive_group() + abicheck_group.add_argument( + "--abicheck", + action="store_true", + dest="abicheck", + help="Run ABI compliance tests", + default=True, + ) + abicheck_group.add_argument( + "--no-abicheck", + action="store_false", + dest="abicheck", + help="Do not run ABI compliance tests", + ) + # acvp arguments acvp_parser = cmd_subparsers.add_parser( "acvp", help="Run ACVP client", parents=[common_parser] @@ -1559,6 +1595,13 @@ def cli(): parents=[common_parser], ) + # abicheck arguments + cmd_subparsers.add_parser( + "abicheck", + help="Run ABI compliance tests for assembly functions", + parents=[common_parser], + ) + args = main_parser.parse_args() if not hasattr(args, "mac_taskpolicy"): @@ -1596,6 +1639,8 @@ def cli(): Tests(args).alloc() elif args.cmd == "rng_fail": Tests(args).rng_fail() + elif args.cmd == "abicheck": + Tests(args).abicheck() if __name__ == "__main__": diff --git a/test/abicheck/README.md b/test/abicheck/README.md new file mode 100644 index 000000000..e13fc17eb --- /dev/null +++ b/test/abicheck/README.md @@ -0,0 +1,49 @@ +[//]: # (SPDX-License-Identifier: CC-BY-4.0) + +# ABI Checker + +Checks that each assembly kernel preserves the callee-saved registers required +by its platform ABI. + +- **AArch64** (AAPCS64[^AAPCS64]): x19–x28, x29/FP, lower 64 bits of d8–d15. +- **x86_64** (System V[^SysVAMD64]): rbx, rbp, r12–r15. No SIMD register is callee-saved. +- **Armv8.1-M** (AAPCS32[^AAPCS32] + MVE[^ArmARMv8M]): r4–r11, MVE Q4–Q7 (= D8–D15). + +## Usage + +```bash +make run_abicheck OPT=1 # OPT=1 required; a no-op without it +./scripts/tests abicheck +./scripts/tests all # runs abicheck by default; --no-abicheck to skip +``` + +## How it works + +A per-arch assembly call stub (`/callstub_.S`) loads a random +register state, calls the kernel, and captures the result — it does *not* +assume the kernel obeys the calling convention, which is the point. The +generated `/checks/check_.c` then seeds the state, backs pointer +arguments with correctly-sized buffers, calls through the stub, and reports any +callee-saved register the kernel failed to preserve. + +Before trusting any verdict, `abicheck.c` runs a self-test: hand-written +corrupters (`/selftest_.S`) that each clobber one callee-saved +register, confirming the checker fires. + +The `mldsa/src/.../*.S` sources are assembled directly. On unsupported +architectures the registry is empty and the driver exits cleanly. + +## Code generation + +Each kernel's buffer sizes, calling convention, and required CPU features come +from a `/*yaml ... */` block in its `dev/` assembly source. `scripts/autogen` +turns that into the per-kernel `check_.c`, the `checks__all.h` +registry, and the per-arch `abicheck_.mk` (CFLAGS per feature, e.g. +`-mavx2`), all included via `abicheck.mk` from `test/mk/components.mk`. Edit the +YAML, not the generated files. + + +[^AAPCS32]: Arm Limited: Procedure Call Standard for the Arm Architecture (AAPCS32), [https://github.com/ARM-software/abi-aa/blob/main/aapcs32/aapcs32.rst](https://github.com/ARM-software/abi-aa/blob/main/aapcs32/aapcs32.rst) +[^AAPCS64]: Arm Limited: Procedure Call Standard for the Arm 64-bit Architecture (AAPCS64), [https://github.com/ARM-software/abi-aa/blob/main/aapcs64/aapcs64.rst](https://github.com/ARM-software/abi-aa/blob/main/aapcs64/aapcs64.rst) +[^ArmARMv8M]: Arm Limited: Armv8-M Architecture Reference Manual (DDI 0553), [https://developer.arm.com/documentation/ddi0553/latest/](https://developer.arm.com/documentation/ddi0553/latest/) +[^SysVAMD64]: Matz, Hubička, Jaeger, Mitchell: System V Application Binary Interface — AMD64 Architecture Processor Supplement, [https://gitlab.com/x86-psABIs/x86-64-ABI](https://gitlab.com/x86-psABIs/x86-64-ABI) diff --git a/test/abicheck/aarch64/abicheck_aarch64.c b/test/abicheck/aarch64/abicheck_aarch64.c new file mode 100644 index 000000000..09c0afc02 --- /dev/null +++ b/test/abicheck/aarch64/abicheck_aarch64.c @@ -0,0 +1,66 @@ +/* + * Copyright (c) The mlkem-native project authors + * Copyright (c) The mldsa-native project authors + * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT + */ + +/* References + * ========== + * + * - [AAPCS64] + * Procedure Call Standard for the Arm 64-bit Architecture (AAPCS64) + * Arm Limited + * https://github.com/ARM-software/abi-aa/blob/main/aapcs64/aapcs64.rst + */ + +#include + +#include "../../notrandombytes/notrandombytes.h" +#include "abicheck_aarch64.h" + +#if defined(MLD_SYS_AARCH64) + +/* Callee-saved set per @[AAPCS64, Section "Machine Registers"]. */ +int check_aarch64_aapcs_compliance(struct aarch64_register_state *before, + struct aarch64_register_state *after, + int quiet) +{ + int violations = 0; + int i; + + /* Callee-saved GPRs x19-x29, plus x18: AAPCS64 leaves x18 unspecified, + * but Darwin reserves it and Linux/ELF leaves it unused, so we treat + * it as callee-saved. */ + if (before->gpr[18] != after->gpr[18]) + { + MLD_ABI_VIOLATION(quiet, "%s modified\n", "x18"); + violations++; + } + for (i = 19; i <= 29; i++) + { + if (before->gpr[i] != after->gpr[i]) + { + MLD_ABI_VIOLATION(quiet, "x%d modified\n", i); + violations++; + } + } + + /* Check callee-saved NEON registers (d8-d15, lower 64 bits only) */ + for (i = 8; i <= 15; i++) + { + if (before->neon[i][0] != after->neon[i][0]) + { + MLD_ABI_VIOLATION(quiet, "d%d modified\n", i); + violations++; + } + } + + return violations; +} + +void init_aarch64_register_state(struct aarch64_register_state *state) +{ + randombytes((uint8_t *)state, sizeof(*state)); +} + +#endif /* MLD_SYS_AARCH64 */ diff --git a/test/abicheck/aarch64/abicheck_aarch64.h b/test/abicheck/aarch64/abicheck_aarch64.h new file mode 100644 index 000000000..2dedb84fb --- /dev/null +++ b/test/abicheck/aarch64/abicheck_aarch64.h @@ -0,0 +1,34 @@ +/* + * Copyright (c) The mlkem-native project authors + * Copyright (c) The mldsa-native project authors + * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT + */ + +#ifndef MLD_TEST_ABICHECK_ABICHECK_AARCH64_H +#define MLD_TEST_ABICHECK_ABICHECK_AARCH64_H + +#include "../abicheck_common.h" + +#if defined(MLD_SYS_AARCH64) + +/* MLD_ALIGN-aligned so the stub's 128-bit ldp/stp q on neon[] are aligned. */ +struct MLD_ALIGN aarch64_register_state +{ + uint64_t gpr[32]; /* x0-x30 */ + uint64_t neon[32][2]; /* q0-q31 (full 128-bit NEON registers as two 64-bit + values) */ +}; + +/* quiet: MLD_ABICHECK_VERBOSE or MLD_ABICHECK_QUIET (see abicheck_common.h). */ +int check_aarch64_aapcs_compliance(struct aarch64_register_state *before, + struct aarch64_register_state *after, + int quiet); +void init_aarch64_register_state(struct aarch64_register_state *state); + +extern void asm_call_stub_aarch64(struct aarch64_register_state *input, + struct aarch64_register_state *output, + void (*function_ptr)(void)); + +#endif /* MLD_SYS_AARCH64 */ + +#endif /* !MLD_TEST_ABICHECK_ABICHECK_AARCH64_H */ diff --git a/test/abicheck/aarch64/abicheck_aarch64.mk b/test/abicheck/aarch64/abicheck_aarch64.mk new file mode 100644 index 000000000..badbeabac --- /dev/null +++ b/test/abicheck/aarch64/abicheck_aarch64.mk @@ -0,0 +1,25 @@ +# Copyright (c) The mldsa-native project authors +# SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT + +# WARNING: This file is auto-generated from scripts/autogen +# in the mldsa-native repository. +# Do not modify it directly. +# +# Edit the YAML 'ABI.Features:' list in dev//src/.S +# and re-run scripts/autogen instead. +# +# For each capability declared by a kernel's ABI.Features list, this +# file appends the capability's CFLAGS to that kernel's .S object +# under mldsa/src/. + +# Default each cap's file list to empty so the unconditional appends +# below are safe even when a cap has no kernels on this arch. +ABICHECK_REQ_SHA3_FILES := + +# SHA3: Armv8.4-A SHA3 (eor3, rax1, xar, bcax) +ABICHECK_REQ_SHA3_FILES := \ + mldsa/src/fips202/native/aarch64/src/keccak_f1600_x1_v84a_aarch64_asm.S \ + mldsa/src/fips202/native/aarch64/src/keccak_f1600_x2_v84a_aarch64_asm.S \ + mldsa/src/fips202/native/aarch64/src/keccak_f1600_x4_v8a_v84a_scalar_hybrid_aarch64_asm.S +ABICHECK_REQ_SHA3_OBJS := $(call MAKE_OBJS,$(ABICHECK_DIR),$(ABICHECK_REQ_SHA3_FILES)) +$(ABICHECK_REQ_SHA3_OBJS): CFLAGS += -march=armv8.4-a+sha3 diff --git a/test/abicheck/aarch64/callstub_aarch64.S b/test/abicheck/aarch64/callstub_aarch64.S new file mode 100644 index 000000000..6e8d560fe --- /dev/null +++ b/test/abicheck/aarch64/callstub_aarch64.S @@ -0,0 +1,217 @@ +/* + * Copyright (c) The mlkem-native project authors + * Copyright (c) The mldsa-native project authors + * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT + */ + +#if defined(__ELF__) +.section .note.GNU-stack,"",@progbits +#endif + +/* + * Function: asm_call_stub_aarch64 + * Description: AArch64 ABI compliance testing stub that captures register state + * before and after function calls to verify AAPCS compliance + * + * C Signature: void asm_call_stub_aarch64(struct aarch64_register_state *input, + * struct aarch64_register_state *output, + * void (*function_ptr)(void)) + * + * Stack Usage: 192 bytes total (16-byte aligned) + * - 112 bytes for saving callee-saved GPRs (x18-x30: 7 pairs * 16 bytes) + * - 64 bytes for saving callee-saved NEON registers (d8-d15: 8 regs * 8 bytes) + * - 16 bytes for local variables (output state ptr, function ptr) + */ + +#define STACK_SIZE_GPRS 112 +#define STACK_SIZE_VREGS 64 +#define STACK_SIZE_LOCALS 16 + +#define STACK_SIZE 192 +#define STACK_BASE_GPRS 0 +#define STACK_BASE_VREGS STACK_SIZE_GPRS +#define STACK_BASE_LOCALS (STACK_SIZE_GPRS + STACK_SIZE_VREGS) + +/* Store \lo,\hi into the (\base, \idx) slot and emit CFI offsets + * (CFA = sp + STACK_SIZE inside the prologue block). */ +.macro save_pair lo, hi, base, idx + stp \lo, \hi, [sp, #(\base + 16*\idx)] + .cfi_offset \lo, -(STACK_SIZE - \base - 16*\idx) + .cfi_offset \hi, -(STACK_SIZE - \base - 16*\idx - 8) +.endm + +/* Variant for the (xzr, x18) pair: xzr is filler (no CFI), x18 lives in the + * upper half of the slot. */ +.macro save_pair_padlo hi, base, idx + stp xzr, \hi, [sp, #(\base + 16*\idx)] + .cfi_offset \hi, -(STACK_SIZE - \base - 16*\idx - 8) +.endm + +/* Reload \lo,\hi from the (\base, \idx) slot and emit CFI restores; the + * inverse of save_pair. */ +.macro restore_pair lo, hi, base, idx + ldp \lo, \hi, [sp, #(\base + 16*\idx)] + .cfi_restore \lo + .cfi_restore \hi +.endm + +/* Inverse of save_pair_padlo: reload \hi (xzr discards the filler half), only + * \hi has CFI. */ +.macro restore_pair_padlo hi, base, idx + ldp xzr, \hi, [sp, #(\base + 16*\idx)] + .cfi_restore \hi +.endm + +/* Callee-saved GPRs x18-x30; xzr pads the x18 slot for 16-byte stp/ldp + * alignment (see save_pair_padlo). */ +.macro save_gprs + save_pair_padlo x18, STACK_BASE_GPRS, 0 + save_pair x19, x20, STACK_BASE_GPRS, 1 + save_pair x21, x22, STACK_BASE_GPRS, 2 + save_pair x23, x24, STACK_BASE_GPRS, 3 + save_pair x25, x26, STACK_BASE_GPRS, 4 + save_pair x27, x28, STACK_BASE_GPRS, 5 + save_pair x29, x30, STACK_BASE_GPRS, 6 +.endm + +.macro restore_gprs + restore_pair_padlo x18, STACK_BASE_GPRS, 0 + restore_pair x19, x20, STACK_BASE_GPRS, 1 + restore_pair x21, x22, STACK_BASE_GPRS, 2 + restore_pair x23, x24, STACK_BASE_GPRS, 3 + restore_pair x25, x26, STACK_BASE_GPRS, 4 + restore_pair x27, x28, STACK_BASE_GPRS, 5 + restore_pair x29, x30, STACK_BASE_GPRS, 6 +.endm + +/* Callee-saved NEON registers d8-d15. */ +.macro save_vregs + save_pair d8, d9, STACK_BASE_VREGS, 0 + save_pair d10, d11, STACK_BASE_VREGS, 1 + save_pair d12, d13, STACK_BASE_VREGS, 2 + save_pair d14, d15, STACK_BASE_VREGS, 3 +.endm + +.macro restore_vregs + restore_pair d8, d9, STACK_BASE_VREGS, 0 + restore_pair d10, d11, STACK_BASE_VREGS, 1 + restore_pair d12, d13, STACK_BASE_VREGS, 2 + restore_pair d14, d15, STACK_BASE_VREGS, 3 +.endm + +.text +.balign 4 +#ifdef __APPLE__ +.global _asm_call_stub_aarch64 +_asm_call_stub_aarch64: +#else +.global asm_call_stub_aarch64 +asm_call_stub_aarch64: +#endif + .cfi_startproc + sub sp, sp, #(STACK_SIZE) + .cfi_def_cfa_offset STACK_SIZE + + save_gprs + save_vregs + + /* Spill output ptr (x1) and fn ptr (x2); the next block loads + * x0-x29 from input state and clobbers them. */ + stp x1, x2, [sp, #(STACK_BASE_LOCALS)] + + /* Load NEON registers from input state */ + add x30, x0, #256 + ldp q0, q1, [x30, #(16*0)] + ldp q2, q3, [x30, #(16*2)] + ldp q4, q5, [x30, #(16*4)] + ldp q6, q7, [x30, #(16*6)] + ldp q8, q9, [x30, #(16*8)] + ldp q10, q11, [x30, #(16*10)] + ldp q12, q13, [x30, #(16*12)] + ldp q14, q15, [x30, #(16*14)] + ldp q16, q17, [x30, #(16*16)] + ldp q18, q19, [x30, #(16*18)] + ldp q20, q21, [x30, #(16*20)] + ldp q22, q23, [x30, #(16*22)] + ldp q24, q25, [x30, #(16*24)] + ldp q26, q27, [x30, #(16*26)] + ldp q28, q29, [x30, #(16*28)] + ldp q30, q31, [x30, #(16*30)] + /* Load GPRs from input state */ + sub x30, x30, #256 + ldp x0, x1, [x30, #(8*0)] + ldp x2, x3, [x30, #(8*2)] + ldp x4, x5, [x30, #(8*4)] + ldp x6, x7, [x30, #(8*6)] + ldp x8, x9, [x30, #(8*8)] + ldp x10, x11, [x30, #(8*10)] + ldp x12, x13, [x30, #(8*12)] + ldp x14, x15, [x30, #(8*14)] + ldp x16, x17, [x30, #(8*16)] +#ifdef __APPLE__ + /* x18 is the platform register on Darwin and reserved by the OS; + * user code must not clobber it. Skip seeding x18 from the random + * input state, and instead overwrite input->gpr[18] with the live + * x18 value so the post-call comparison still has a meaningful + * pre-call baseline (the kernel under test must leave x18 + * unchanged either way). */ + str x18, [x30, #(8*18)] + ldr x19, [x30, #(8*19)] +#else /* __APPLE__ */ + ldp x18, x19, [x30, #(8*18)] +#endif /* !__APPLE__ */ + ldp x20, x21, [x30, #(8*20)] + ldp x22, x23, [x30, #(8*22)] + ldp x24, x25, [x30, #(8*24)] + ldp x26, x27, [x30, #(8*26)] + ldp x28, x29, [x30, #(8*28)] + + /* Reload target function pointer (overwriting x30/LR is fine - blr will set it) */ + ldr x30, [sp, #(STACK_BASE_LOCALS + 8)] + /* Call target */ + blr x30 + /* Load output state address (overwrite x30/LR again) */ + ldr x30, [sp, #(STACK_BASE_LOCALS + 0)] + + /* Store final GPR state to output state */ + stp x0, x1, [x30, #(8*0)] + stp x2, x3, [x30, #(8*2)] + stp x4, x5, [x30, #(8*4)] + stp x6, x7, [x30, #(8*6)] + stp x8, x9, [x30, #(8*8)] + stp x10, x11, [x30, #(8*10)] + stp x12, x13, [x30, #(8*12)] + stp x14, x15, [x30, #(8*14)] + stp x16, x17, [x30, #(8*16)] + stp x18, x19, [x30, #(8*18)] + stp x20, x21, [x30, #(8*20)] + stp x22, x23, [x30, #(8*22)] + stp x24, x25, [x30, #(8*24)] + stp x26, x27, [x30, #(8*26)] + stp x28, x29, [x30, #(8*28)] + + /* Store final NEON state to output state */ + add x30, x30, #256 + stp q0, q1, [x30, #(16*0)] + stp q2, q3, [x30, #(16*2)] + stp q4, q5, [x30, #(16*4)] + stp q6, q7, [x30, #(16*6)] + stp q8, q9, [x30, #(16*8)] + stp q10, q11, [x30, #(16*10)] + stp q12, q13, [x30, #(16*12)] + stp q14, q15, [x30, #(16*14)] + stp q16, q17, [x30, #(16*16)] + stp q18, q19, [x30, #(16*18)] + stp q20, q21, [x30, #(16*20)] + stp q22, q23, [x30, #(16*22)] + stp q24, q25, [x30, #(16*24)] + stp q26, q27, [x30, #(16*26)] + stp q28, q29, [x30, #(16*28)] + stp q30, q31, [x30, #(16*30)] + + restore_vregs + restore_gprs + add sp, sp, #(STACK_SIZE) + .cfi_def_cfa_offset 0 + ret + .cfi_endproc diff --git a/test/abicheck/aarch64/checks/check_intt_aarch64_asm.c b/test/abicheck/aarch64/checks/check_intt_aarch64_asm.c new file mode 100644 index 000000000..a73a7bb7b --- /dev/null +++ b/test/abicheck/aarch64/checks/check_intt_aarch64_asm.c @@ -0,0 +1,71 @@ +/* + * Copyright (c) The mldsa-native project authors + * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT + */ + +/* + * WARNING: This file is auto-generated from scripts/autogen + * in the mldsa-native repository. + * Do not modify it directly. + */ + +#include + +#include "../abicheck_aarch64.h" +#include "../checks_aarch64_all.h" + +#if defined(MLD_SYS_AARCH64) + +#include "../../../notrandombytes/notrandombytes.h" + +typedef struct aarch64_register_state reg_state; + +void mld_intt_aarch64_asm(int32_t *r, const int32_t *zetas_l78, + const int32_t *zetas_l123456); + +int check_intt_aarch64_asm(void) +{ + int test_iter; + reg_state input_state, output_state; + int violations; + MLD_ALIGN uint8_t buf_x0[1024]; /* Input/output polynomial (256 x int32_t) */ + MLD_ALIGN uint8_t + buf_x1[1536]; /* Twiddle factors for layers 7-8 (384 x int32_t) */ + MLD_ALIGN uint8_t + buf_x2[640]; /* Twiddle factors for layers 1-6 (160 x int32_t) */ + + for (test_iter = 0; test_iter < MLD_ABICHECK_NUM_TESTS; test_iter++) + { + /* Initialize random register state */ + init_aarch64_register_state(&input_state); + + randombytes(buf_x0, 1024); + randombytes(buf_x1, 1536); + randombytes(buf_x2, 640); + + /* Set up register state for function arguments */ + input_state.gpr[0] = (uint64_t)buf_x0; + input_state.gpr[1] = (uint64_t)buf_x1; + input_state.gpr[2] = (uint64_t)buf_x2; + + /* Call function through ABI test stub */ + asm_call_stub_aarch64(&input_state, &output_state, + (void (*)(void))mld_intt_aarch64_asm); + + /* Check ABI compliance */ + violations = check_aarch64_aapcs_compliance(&input_state, &output_state, + MLD_ABICHECK_VERBOSE); + if (violations > 0) + { + fprintf(stderr, + "ABI test FAILED for intt_aarch64_asm (iteration %d): %d " + "violations\n", + test_iter + 1, violations); + return MLD_ABICHECK_FAILED; + } + } + + return MLD_ABICHECK_PASSED; +} + +#endif /* MLD_SYS_AARCH64 */ diff --git a/test/abicheck/aarch64/checks/check_keccak_f1600_x1_scalar_aarch64_asm.c b/test/abicheck/aarch64/checks/check_keccak_f1600_x1_scalar_aarch64_asm.c new file mode 100644 index 000000000..37d83fded --- /dev/null +++ b/test/abicheck/aarch64/checks/check_keccak_f1600_x1_scalar_aarch64_asm.c @@ -0,0 +1,67 @@ +/* + * Copyright (c) The mldsa-native project authors + * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT + */ + +/* + * WARNING: This file is auto-generated from scripts/autogen + * in the mldsa-native repository. + * Do not modify it directly. + */ + +#include + +#include "../abicheck_aarch64.h" +#include "../checks_aarch64_all.h" + +#if defined(MLD_SYS_AARCH64) + +#include "../../../notrandombytes/notrandombytes.h" + +typedef struct aarch64_register_state reg_state; + +void mld_keccak_f1600_x1_scalar_aarch64_asm(uint64_t state[25], + const uint64_t rc[24]); + +int check_keccak_f1600_x1_scalar_aarch64_asm(void) +{ + int test_iter; + reg_state input_state, output_state; + int violations; + MLD_ALIGN uint8_t buf_x0[200]; /* Keccak state (25 x uint64_t) */ + MLD_ALIGN uint8_t buf_x1[192]; /* Round constants (24 x uint64_t) */ + + for (test_iter = 0; test_iter < MLD_ABICHECK_NUM_TESTS; test_iter++) + { + /* Initialize random register state */ + init_aarch64_register_state(&input_state); + + randombytes(buf_x0, 200); + randombytes(buf_x1, 192); + + /* Set up register state for function arguments */ + input_state.gpr[0] = (uint64_t)buf_x0; + input_state.gpr[1] = (uint64_t)buf_x1; + + /* Call function through ABI test stub */ + asm_call_stub_aarch64( + &input_state, &output_state, + (void (*)(void))mld_keccak_f1600_x1_scalar_aarch64_asm); + + /* Check ABI compliance */ + violations = check_aarch64_aapcs_compliance(&input_state, &output_state, + MLD_ABICHECK_VERBOSE); + if (violations > 0) + { + fprintf(stderr, + "ABI test FAILED for keccak_f1600_x1_scalar_aarch64_asm " + "(iteration %d): %d violations\n", + test_iter + 1, violations); + return MLD_ABICHECK_FAILED; + } + } + + return MLD_ABICHECK_PASSED; +} + +#endif /* MLD_SYS_AARCH64 */ diff --git a/test/abicheck/aarch64/checks/check_keccak_f1600_x1_v84a_aarch64_asm.c b/test/abicheck/aarch64/checks/check_keccak_f1600_x1_v84a_aarch64_asm.c new file mode 100644 index 000000000..23bdcf793 --- /dev/null +++ b/test/abicheck/aarch64/checks/check_keccak_f1600_x1_v84a_aarch64_asm.c @@ -0,0 +1,74 @@ +/* + * Copyright (c) The mldsa-native project authors + * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT + */ + +/* + * WARNING: This file is auto-generated from scripts/autogen + * in the mldsa-native repository. + * Do not modify it directly. + */ + +#include + +#include "../abicheck_aarch64.h" +#include "../checks_aarch64_all.h" + +#if defined(MLD_SYS_AARCH64) && defined(__ARM_FEATURE_SHA3) + +#include "../../../notrandombytes/notrandombytes.h" + +typedef struct aarch64_register_state reg_state; + +void mld_keccak_f1600_x1_v84a_aarch64_asm(uint64_t state[25], + const uint64_t rc[24]); + +int check_keccak_f1600_x1_v84a_aarch64_asm(void) +{ + int test_iter; + reg_state input_state, output_state; + int violations; + MLD_ALIGN uint8_t buf_x0[200]; /* Keccak state (25 x uint64_t) */ + MLD_ALIGN uint8_t buf_x1[192]; /* Round constants (24 x uint64_t) */ + + if (!mld_sys_check_capability(MLD_SYS_CAP_SHA3)) + { + fprintf(stderr, + "ABI check keccak_f1600_x1_v84a_aarch64_asm: host lacks Armv8.4-A " + "SHA3 (eor3, rax1, xar, bcax), skipping\n"); + return MLD_ABICHECK_SKIPPED; + } + + for (test_iter = 0; test_iter < MLD_ABICHECK_NUM_TESTS; test_iter++) + { + /* Initialize random register state */ + init_aarch64_register_state(&input_state); + + randombytes(buf_x0, 200); + randombytes(buf_x1, 192); + + /* Set up register state for function arguments */ + input_state.gpr[0] = (uint64_t)buf_x0; + input_state.gpr[1] = (uint64_t)buf_x1; + + /* Call function through ABI test stub */ + asm_call_stub_aarch64(&input_state, &output_state, + (void (*)(void))mld_keccak_f1600_x1_v84a_aarch64_asm); + + /* Check ABI compliance */ + violations = check_aarch64_aapcs_compliance(&input_state, &output_state, + MLD_ABICHECK_VERBOSE); + if (violations > 0) + { + fprintf(stderr, + "ABI test FAILED for keccak_f1600_x1_v84a_aarch64_asm (iteration " + "%d): %d violations\n", + test_iter + 1, violations); + return MLD_ABICHECK_FAILED; + } + } + + return MLD_ABICHECK_PASSED; +} + +#endif /* MLD_SYS_AARCH64 && __ARM_FEATURE_SHA3 */ diff --git a/test/abicheck/aarch64/checks/check_keccak_f1600_x2_v84a_aarch64_asm.c b/test/abicheck/aarch64/checks/check_keccak_f1600_x2_v84a_aarch64_asm.c new file mode 100644 index 000000000..1643bea57 --- /dev/null +++ b/test/abicheck/aarch64/checks/check_keccak_f1600_x2_v84a_aarch64_asm.c @@ -0,0 +1,75 @@ +/* + * Copyright (c) The mldsa-native project authors + * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT + */ + +/* + * WARNING: This file is auto-generated from scripts/autogen + * in the mldsa-native repository. + * Do not modify it directly. + */ + +#include + +#include "../abicheck_aarch64.h" +#include "../checks_aarch64_all.h" + +#if defined(MLD_SYS_AARCH64) && defined(__ARM_FEATURE_SHA3) + +#include "../../../notrandombytes/notrandombytes.h" + +typedef struct aarch64_register_state reg_state; + +void mld_keccak_f1600_x2_v84a_aarch64_asm(uint64_t state[50], + const uint64_t rc[24]); + +int check_keccak_f1600_x2_v84a_aarch64_asm(void) +{ + int test_iter; + reg_state input_state, output_state; + int violations; + MLD_ALIGN uint8_t + buf_x0[400]; /* Two sequential Keccak states (state0[25], state1[25]) */ + MLD_ALIGN uint8_t buf_x1[192]; /* Round constants (24 x uint64_t) */ + + if (!mld_sys_check_capability(MLD_SYS_CAP_SHA3)) + { + fprintf(stderr, + "ABI check keccak_f1600_x2_v84a_aarch64_asm: host lacks Armv8.4-A " + "SHA3 (eor3, rax1, xar, bcax), skipping\n"); + return MLD_ABICHECK_SKIPPED; + } + + for (test_iter = 0; test_iter < MLD_ABICHECK_NUM_TESTS; test_iter++) + { + /* Initialize random register state */ + init_aarch64_register_state(&input_state); + + randombytes(buf_x0, 400); + randombytes(buf_x1, 192); + + /* Set up register state for function arguments */ + input_state.gpr[0] = (uint64_t)buf_x0; + input_state.gpr[1] = (uint64_t)buf_x1; + + /* Call function through ABI test stub */ + asm_call_stub_aarch64(&input_state, &output_state, + (void (*)(void))mld_keccak_f1600_x2_v84a_aarch64_asm); + + /* Check ABI compliance */ + violations = check_aarch64_aapcs_compliance(&input_state, &output_state, + MLD_ABICHECK_VERBOSE); + if (violations > 0) + { + fprintf(stderr, + "ABI test FAILED for keccak_f1600_x2_v84a_aarch64_asm (iteration " + "%d): %d violations\n", + test_iter + 1, violations); + return MLD_ABICHECK_FAILED; + } + } + + return MLD_ABICHECK_PASSED; +} + +#endif /* MLD_SYS_AARCH64 && __ARM_FEATURE_SHA3 */ diff --git a/test/abicheck/aarch64/checks/check_keccak_f1600_x4_v8a_scalar_hybrid_aarch64_asm.c b/test/abicheck/aarch64/checks/check_keccak_f1600_x4_v8a_scalar_hybrid_aarch64_asm.c new file mode 100644 index 000000000..953877ec3 --- /dev/null +++ b/test/abicheck/aarch64/checks/check_keccak_f1600_x4_v8a_scalar_hybrid_aarch64_asm.c @@ -0,0 +1,69 @@ +/* + * Copyright (c) The mldsa-native project authors + * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT + */ + +/* + * WARNING: This file is auto-generated from scripts/autogen + * in the mldsa-native repository. + * Do not modify it directly. + */ + +#include + +#include "../abicheck_aarch64.h" +#include "../checks_aarch64_all.h" + +#if defined(MLD_SYS_AARCH64) + +#include "../../../notrandombytes/notrandombytes.h" + +typedef struct aarch64_register_state reg_state; + +void mld_keccak_f1600_x4_v8a_scalar_hybrid_aarch64_asm(uint64_t state[100], + const uint64_t rc[24]); + +int check_keccak_f1600_x4_v8a_scalar_hybrid_aarch64_asm(void) +{ + int test_iter; + reg_state input_state, output_state; + int violations; + MLD_ALIGN uint8_t buf_x0[800]; /* Four sequential Keccak states (state0[25], + state1[25], state2[25], state3[25]) */ + MLD_ALIGN uint8_t buf_x1[192]; /* Round constants (24 x uint64_t) */ + + for (test_iter = 0; test_iter < MLD_ABICHECK_NUM_TESTS; test_iter++) + { + /* Initialize random register state */ + init_aarch64_register_state(&input_state); + + randombytes(buf_x0, 800); + randombytes(buf_x1, 192); + + /* Set up register state for function arguments */ + input_state.gpr[0] = (uint64_t)buf_x0; + input_state.gpr[1] = (uint64_t)buf_x1; + + /* Call function through ABI test stub */ + asm_call_stub_aarch64( + &input_state, &output_state, + (void (*)(void))mld_keccak_f1600_x4_v8a_scalar_hybrid_aarch64_asm); + + /* Check ABI compliance */ + violations = check_aarch64_aapcs_compliance(&input_state, &output_state, + MLD_ABICHECK_VERBOSE); + if (violations > 0) + { + fprintf( + stderr, + "ABI test FAILED for keccak_f1600_x4_v8a_scalar_hybrid_aarch64_asm " + "(iteration %d): %d violations\n", + test_iter + 1, violations); + return MLD_ABICHECK_FAILED; + } + } + + return MLD_ABICHECK_PASSED; +} + +#endif /* MLD_SYS_AARCH64 */ diff --git a/test/abicheck/aarch64/checks/check_keccak_f1600_x4_v8a_v84a_scalar_hybrid_aarch64_asm.c b/test/abicheck/aarch64/checks/check_keccak_f1600_x4_v8a_v84a_scalar_hybrid_aarch64_asm.c new file mode 100644 index 000000000..0660e1921 --- /dev/null +++ b/test/abicheck/aarch64/checks/check_keccak_f1600_x4_v8a_v84a_scalar_hybrid_aarch64_asm.c @@ -0,0 +1,77 @@ +/* + * Copyright (c) The mldsa-native project authors + * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT + */ + +/* + * WARNING: This file is auto-generated from scripts/autogen + * in the mldsa-native repository. + * Do not modify it directly. + */ + +#include + +#include "../abicheck_aarch64.h" +#include "../checks_aarch64_all.h" + +#if defined(MLD_SYS_AARCH64) && defined(__ARM_FEATURE_SHA3) + +#include "../../../notrandombytes/notrandombytes.h" + +typedef struct aarch64_register_state reg_state; + +void mld_keccak_f1600_x4_v8a_v84a_scalar_hybrid_aarch64_asm( + uint64_t state[100], const uint64_t rc[24]); + +int check_keccak_f1600_x4_v8a_v84a_scalar_hybrid_aarch64_asm(void) +{ + int test_iter; + reg_state input_state, output_state; + int violations; + MLD_ALIGN uint8_t buf_x0[800]; /* Four sequential Keccak states (state0[25], + state1[25], state2[25], state3[25]) */ + MLD_ALIGN uint8_t buf_x1[192]; /* Round constants (24 x uint64_t) */ + + if (!mld_sys_check_capability(MLD_SYS_CAP_SHA3)) + { + fprintf(stderr, + "ABI check keccak_f1600_x4_v8a_v84a_scalar_hybrid_aarch64_asm: " + "host lacks Armv8.4-A SHA3 (eor3, rax1, xar, bcax), skipping\n"); + return MLD_ABICHECK_SKIPPED; + } + + for (test_iter = 0; test_iter < MLD_ABICHECK_NUM_TESTS; test_iter++) + { + /* Initialize random register state */ + init_aarch64_register_state(&input_state); + + randombytes(buf_x0, 800); + randombytes(buf_x1, 192); + + /* Set up register state for function arguments */ + input_state.gpr[0] = (uint64_t)buf_x0; + input_state.gpr[1] = (uint64_t)buf_x1; + + /* Call function through ABI test stub */ + asm_call_stub_aarch64( + &input_state, &output_state, + (void (*)(void))mld_keccak_f1600_x4_v8a_v84a_scalar_hybrid_aarch64_asm); + + /* Check ABI compliance */ + violations = check_aarch64_aapcs_compliance(&input_state, &output_state, + MLD_ABICHECK_VERBOSE); + if (violations > 0) + { + fprintf(stderr, + "ABI test FAILED for " + "keccak_f1600_x4_v8a_v84a_scalar_hybrid_aarch64_asm (iteration " + "%d): %d violations\n", + test_iter + 1, violations); + return MLD_ABICHECK_FAILED; + } + } + + return MLD_ABICHECK_PASSED; +} + +#endif /* MLD_SYS_AARCH64 && __ARM_FEATURE_SHA3 */ diff --git a/test/abicheck/aarch64/checks/check_ntt_aarch64_asm.c b/test/abicheck/aarch64/checks/check_ntt_aarch64_asm.c new file mode 100644 index 000000000..836966b32 --- /dev/null +++ b/test/abicheck/aarch64/checks/check_ntt_aarch64_asm.c @@ -0,0 +1,71 @@ +/* + * Copyright (c) The mldsa-native project authors + * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT + */ + +/* + * WARNING: This file is auto-generated from scripts/autogen + * in the mldsa-native repository. + * Do not modify it directly. + */ + +#include + +#include "../abicheck_aarch64.h" +#include "../checks_aarch64_all.h" + +#if defined(MLD_SYS_AARCH64) + +#include "../../../notrandombytes/notrandombytes.h" + +typedef struct aarch64_register_state reg_state; + +void mld_ntt_aarch64_asm(int32_t *r, const int32_t *zetas_l123456, + const int32_t *zetas_l78); + +int check_ntt_aarch64_asm(void) +{ + int test_iter; + reg_state input_state, output_state; + int violations; + MLD_ALIGN uint8_t buf_x0[1024]; /* Input/output polynomial (256 x int32_t) */ + MLD_ALIGN uint8_t + buf_x1[576]; /* Twiddle factors for layers 1-6 (144 x int32_t) */ + MLD_ALIGN uint8_t + buf_x2[1536]; /* Twiddle factors for layers 7-8 (384 x int32_t) */ + + for (test_iter = 0; test_iter < MLD_ABICHECK_NUM_TESTS; test_iter++) + { + /* Initialize random register state */ + init_aarch64_register_state(&input_state); + + randombytes(buf_x0, 1024); + randombytes(buf_x1, 576); + randombytes(buf_x2, 1536); + + /* Set up register state for function arguments */ + input_state.gpr[0] = (uint64_t)buf_x0; + input_state.gpr[1] = (uint64_t)buf_x1; + input_state.gpr[2] = (uint64_t)buf_x2; + + /* Call function through ABI test stub */ + asm_call_stub_aarch64(&input_state, &output_state, + (void (*)(void))mld_ntt_aarch64_asm); + + /* Check ABI compliance */ + violations = check_aarch64_aapcs_compliance(&input_state, &output_state, + MLD_ABICHECK_VERBOSE); + if (violations > 0) + { + fprintf( + stderr, + "ABI test FAILED for ntt_aarch64_asm (iteration %d): %d violations\n", + test_iter + 1, violations); + return MLD_ABICHECK_FAILED; + } + } + + return MLD_ABICHECK_PASSED; +} + +#endif /* MLD_SYS_AARCH64 */ diff --git a/test/abicheck/aarch64/checks/check_poly_caddq_aarch64_asm.c b/test/abicheck/aarch64/checks/check_poly_caddq_aarch64_asm.c new file mode 100644 index 000000000..2bc5f6474 --- /dev/null +++ b/test/abicheck/aarch64/checks/check_poly_caddq_aarch64_asm.c @@ -0,0 +1,62 @@ +/* + * Copyright (c) The mldsa-native project authors + * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT + */ + +/* + * WARNING: This file is auto-generated from scripts/autogen + * in the mldsa-native repository. + * Do not modify it directly. + */ + +#include + +#include "../abicheck_aarch64.h" +#include "../checks_aarch64_all.h" + +#if defined(MLD_SYS_AARCH64) + +#include "../../../notrandombytes/notrandombytes.h" + +typedef struct aarch64_register_state reg_state; + +void mld_poly_caddq_aarch64_asm(int32_t *a); + +int check_poly_caddq_aarch64_asm(void) +{ + int test_iter; + reg_state input_state, output_state; + int violations; + MLD_ALIGN uint8_t buf_x0[1024]; /* Input/output polynomial (256 x int32_t) */ + + for (test_iter = 0; test_iter < MLD_ABICHECK_NUM_TESTS; test_iter++) + { + /* Initialize random register state */ + init_aarch64_register_state(&input_state); + + randombytes(buf_x0, 1024); + + /* Set up register state for function arguments */ + input_state.gpr[0] = (uint64_t)buf_x0; + + /* Call function through ABI test stub */ + asm_call_stub_aarch64(&input_state, &output_state, + (void (*)(void))mld_poly_caddq_aarch64_asm); + + /* Check ABI compliance */ + violations = check_aarch64_aapcs_compliance(&input_state, &output_state, + MLD_ABICHECK_VERBOSE); + if (violations > 0) + { + fprintf(stderr, + "ABI test FAILED for poly_caddq_aarch64_asm (iteration %d): %d " + "violations\n", + test_iter + 1, violations); + return MLD_ABICHECK_FAILED; + } + } + + return MLD_ABICHECK_PASSED; +} + +#endif /* MLD_SYS_AARCH64 */ diff --git a/test/abicheck/aarch64/checks/check_poly_chknorm_aarch64_asm.c b/test/abicheck/aarch64/checks/check_poly_chknorm_aarch64_asm.c new file mode 100644 index 000000000..355d5cd3b --- /dev/null +++ b/test/abicheck/aarch64/checks/check_poly_chknorm_aarch64_asm.c @@ -0,0 +1,63 @@ +/* + * Copyright (c) The mldsa-native project authors + * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT + */ + +/* + * WARNING: This file is auto-generated from scripts/autogen + * in the mldsa-native repository. + * Do not modify it directly. + */ + +#include + +#include "../abicheck_aarch64.h" +#include "../checks_aarch64_all.h" + +#if defined(MLD_SYS_AARCH64) + +#include "../../../notrandombytes/notrandombytes.h" + +typedef struct aarch64_register_state reg_state; + +int mld_poly_chknorm_aarch64_asm(const int32_t *a, int32_t B); + +int check_poly_chknorm_aarch64_asm(void) +{ + int test_iter; + reg_state input_state, output_state; + int violations; + MLD_ALIGN uint8_t buf_x0[1024]; /* Input polynomial (256 x int32_t) */ + + for (test_iter = 0; test_iter < MLD_ABICHECK_NUM_TESTS; test_iter++) + { + /* Initialize random register state */ + init_aarch64_register_state(&input_state); + + randombytes(buf_x0, 1024); + + /* Set up register state for function arguments */ + input_state.gpr[0] = (uint64_t)buf_x0; + input_state.gpr[1] = 131072; + + /* Call function through ABI test stub */ + asm_call_stub_aarch64(&input_state, &output_state, + (void (*)(void))mld_poly_chknorm_aarch64_asm); + + /* Check ABI compliance */ + violations = check_aarch64_aapcs_compliance(&input_state, &output_state, + MLD_ABICHECK_VERBOSE); + if (violations > 0) + { + fprintf(stderr, + "ABI test FAILED for poly_chknorm_aarch64_asm (iteration %d): %d " + "violations\n", + test_iter + 1, violations); + return MLD_ABICHECK_FAILED; + } + } + + return MLD_ABICHECK_PASSED; +} + +#endif /* MLD_SYS_AARCH64 */ diff --git a/test/abicheck/aarch64/checks/check_poly_decompose_32_aarch64_asm.c b/test/abicheck/aarch64/checks/check_poly_decompose_32_aarch64_asm.c new file mode 100644 index 000000000..67d16117b --- /dev/null +++ b/test/abicheck/aarch64/checks/check_poly_decompose_32_aarch64_asm.c @@ -0,0 +1,67 @@ +/* + * Copyright (c) The mldsa-native project authors + * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT + */ + +/* + * WARNING: This file is auto-generated from scripts/autogen + * in the mldsa-native repository. + * Do not modify it directly. + */ + +#include + +#include "../abicheck_aarch64.h" +#include "../checks_aarch64_all.h" + +#if defined(MLD_SYS_AARCH64) + +#include "../../../notrandombytes/notrandombytes.h" + +typedef struct aarch64_register_state reg_state; + +void mld_poly_decompose_32_aarch64_asm(int32_t *a1, int32_t *a0); + +int check_poly_decompose_32_aarch64_asm(void) +{ + int test_iter; + reg_state input_state, output_state; + int violations; + MLD_ALIGN uint8_t + buf_x0[1024]; /* Output high-part polynomial (256 x int32_t) */ + MLD_ALIGN uint8_t + buf_x1[1024]; /* Input polynomial / output low-part (256 x int32_t) */ + + for (test_iter = 0; test_iter < MLD_ABICHECK_NUM_TESTS; test_iter++) + { + /* Initialize random register state */ + init_aarch64_register_state(&input_state); + + randombytes(buf_x0, 1024); + randombytes(buf_x1, 1024); + + /* Set up register state for function arguments */ + input_state.gpr[0] = (uint64_t)buf_x0; + input_state.gpr[1] = (uint64_t)buf_x1; + + /* Call function through ABI test stub */ + asm_call_stub_aarch64(&input_state, &output_state, + (void (*)(void))mld_poly_decompose_32_aarch64_asm); + + /* Check ABI compliance */ + violations = check_aarch64_aapcs_compliance(&input_state, &output_state, + MLD_ABICHECK_VERBOSE); + if (violations > 0) + { + fprintf(stderr, + "ABI test FAILED for poly_decompose_32_aarch64_asm (iteration " + "%d): %d violations\n", + test_iter + 1, violations); + return MLD_ABICHECK_FAILED; + } + } + + return MLD_ABICHECK_PASSED; +} + +#endif /* MLD_SYS_AARCH64 */ diff --git a/test/abicheck/aarch64/checks/check_poly_decompose_88_aarch64_asm.c b/test/abicheck/aarch64/checks/check_poly_decompose_88_aarch64_asm.c new file mode 100644 index 000000000..096539f46 --- /dev/null +++ b/test/abicheck/aarch64/checks/check_poly_decompose_88_aarch64_asm.c @@ -0,0 +1,67 @@ +/* + * Copyright (c) The mldsa-native project authors + * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT + */ + +/* + * WARNING: This file is auto-generated from scripts/autogen + * in the mldsa-native repository. + * Do not modify it directly. + */ + +#include + +#include "../abicheck_aarch64.h" +#include "../checks_aarch64_all.h" + +#if defined(MLD_SYS_AARCH64) + +#include "../../../notrandombytes/notrandombytes.h" + +typedef struct aarch64_register_state reg_state; + +void mld_poly_decompose_88_aarch64_asm(int32_t *a1, int32_t *a0); + +int check_poly_decompose_88_aarch64_asm(void) +{ + int test_iter; + reg_state input_state, output_state; + int violations; + MLD_ALIGN uint8_t + buf_x0[1024]; /* Output high-part polynomial (256 x int32_t) */ + MLD_ALIGN uint8_t + buf_x1[1024]; /* Input polynomial / output low-part (256 x int32_t) */ + + for (test_iter = 0; test_iter < MLD_ABICHECK_NUM_TESTS; test_iter++) + { + /* Initialize random register state */ + init_aarch64_register_state(&input_state); + + randombytes(buf_x0, 1024); + randombytes(buf_x1, 1024); + + /* Set up register state for function arguments */ + input_state.gpr[0] = (uint64_t)buf_x0; + input_state.gpr[1] = (uint64_t)buf_x1; + + /* Call function through ABI test stub */ + asm_call_stub_aarch64(&input_state, &output_state, + (void (*)(void))mld_poly_decompose_88_aarch64_asm); + + /* Check ABI compliance */ + violations = check_aarch64_aapcs_compliance(&input_state, &output_state, + MLD_ABICHECK_VERBOSE); + if (violations > 0) + { + fprintf(stderr, + "ABI test FAILED for poly_decompose_88_aarch64_asm (iteration " + "%d): %d violations\n", + test_iter + 1, violations); + return MLD_ABICHECK_FAILED; + } + } + + return MLD_ABICHECK_PASSED; +} + +#endif /* MLD_SYS_AARCH64 */ diff --git a/test/abicheck/aarch64/checks/check_poly_pointwise_montgomery_aarch64_asm.c b/test/abicheck/aarch64/checks/check_poly_pointwise_montgomery_aarch64_asm.c new file mode 100644 index 000000000..1d3b45f7f --- /dev/null +++ b/test/abicheck/aarch64/checks/check_poly_pointwise_montgomery_aarch64_asm.c @@ -0,0 +1,66 @@ +/* + * Copyright (c) The mldsa-native project authors + * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT + */ + +/* + * WARNING: This file is auto-generated from scripts/autogen + * in the mldsa-native repository. + * Do not modify it directly. + */ + +#include + +#include "../abicheck_aarch64.h" +#include "../checks_aarch64_all.h" + +#if defined(MLD_SYS_AARCH64) + +#include "../../../notrandombytes/notrandombytes.h" + +typedef struct aarch64_register_state reg_state; + +void mld_poly_pointwise_montgomery_aarch64_asm(int32_t *a, const int32_t *b); + +int check_poly_pointwise_montgomery_aarch64_asm(void) +{ + int test_iter; + reg_state input_state, output_state; + int violations; + MLD_ALIGN uint8_t buf_x0[1024]; /* Input/output polynomial (256 x int32_t) */ + MLD_ALIGN uint8_t buf_x1[1024]; /* Input polynomial (256 x int32_t) */ + + for (test_iter = 0; test_iter < MLD_ABICHECK_NUM_TESTS; test_iter++) + { + /* Initialize random register state */ + init_aarch64_register_state(&input_state); + + randombytes(buf_x0, 1024); + randombytes(buf_x1, 1024); + + /* Set up register state for function arguments */ + input_state.gpr[0] = (uint64_t)buf_x0; + input_state.gpr[1] = (uint64_t)buf_x1; + + /* Call function through ABI test stub */ + asm_call_stub_aarch64( + &input_state, &output_state, + (void (*)(void))mld_poly_pointwise_montgomery_aarch64_asm); + + /* Check ABI compliance */ + violations = check_aarch64_aapcs_compliance(&input_state, &output_state, + MLD_ABICHECK_VERBOSE); + if (violations > 0) + { + fprintf(stderr, + "ABI test FAILED for poly_pointwise_montgomery_aarch64_asm " + "(iteration %d): %d violations\n", + test_iter + 1, violations); + return MLD_ABICHECK_FAILED; + } + } + + return MLD_ABICHECK_PASSED; +} + +#endif /* MLD_SYS_AARCH64 */ diff --git a/test/abicheck/aarch64/checks/check_poly_use_hint_32_aarch64_asm.c b/test/abicheck/aarch64/checks/check_poly_use_hint_32_aarch64_asm.c new file mode 100644 index 000000000..0323dbbb0 --- /dev/null +++ b/test/abicheck/aarch64/checks/check_poly_use_hint_32_aarch64_asm.c @@ -0,0 +1,65 @@ +/* + * Copyright (c) The mldsa-native project authors + * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT + */ + +/* + * WARNING: This file is auto-generated from scripts/autogen + * in the mldsa-native repository. + * Do not modify it directly. + */ + +#include + +#include "../abicheck_aarch64.h" +#include "../checks_aarch64_all.h" + +#if defined(MLD_SYS_AARCH64) + +#include "../../../notrandombytes/notrandombytes.h" + +typedef struct aarch64_register_state reg_state; + +void mld_poly_use_hint_32_aarch64_asm(int32_t *a, const int32_t *h); + +int check_poly_use_hint_32_aarch64_asm(void) +{ + int test_iter; + reg_state input_state, output_state; + int violations; + MLD_ALIGN uint8_t buf_x0[1024]; /* Input/output polynomial (256 x int32_t) */ + MLD_ALIGN uint8_t buf_x1[1024]; /* Hint polynomial (256 x int32_t) */ + + for (test_iter = 0; test_iter < MLD_ABICHECK_NUM_TESTS; test_iter++) + { + /* Initialize random register state */ + init_aarch64_register_state(&input_state); + + randombytes(buf_x0, 1024); + randombytes(buf_x1, 1024); + + /* Set up register state for function arguments */ + input_state.gpr[0] = (uint64_t)buf_x0; + input_state.gpr[1] = (uint64_t)buf_x1; + + /* Call function through ABI test stub */ + asm_call_stub_aarch64(&input_state, &output_state, + (void (*)(void))mld_poly_use_hint_32_aarch64_asm); + + /* Check ABI compliance */ + violations = check_aarch64_aapcs_compliance(&input_state, &output_state, + MLD_ABICHECK_VERBOSE); + if (violations > 0) + { + fprintf(stderr, + "ABI test FAILED for poly_use_hint_32_aarch64_asm (iteration " + "%d): %d violations\n", + test_iter + 1, violations); + return MLD_ABICHECK_FAILED; + } + } + + return MLD_ABICHECK_PASSED; +} + +#endif /* MLD_SYS_AARCH64 */ diff --git a/test/abicheck/aarch64/checks/check_poly_use_hint_88_aarch64_asm.c b/test/abicheck/aarch64/checks/check_poly_use_hint_88_aarch64_asm.c new file mode 100644 index 000000000..56f32177d --- /dev/null +++ b/test/abicheck/aarch64/checks/check_poly_use_hint_88_aarch64_asm.c @@ -0,0 +1,65 @@ +/* + * Copyright (c) The mldsa-native project authors + * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT + */ + +/* + * WARNING: This file is auto-generated from scripts/autogen + * in the mldsa-native repository. + * Do not modify it directly. + */ + +#include + +#include "../abicheck_aarch64.h" +#include "../checks_aarch64_all.h" + +#if defined(MLD_SYS_AARCH64) + +#include "../../../notrandombytes/notrandombytes.h" + +typedef struct aarch64_register_state reg_state; + +void mld_poly_use_hint_88_aarch64_asm(int32_t *a, const int32_t *h); + +int check_poly_use_hint_88_aarch64_asm(void) +{ + int test_iter; + reg_state input_state, output_state; + int violations; + MLD_ALIGN uint8_t buf_x0[1024]; /* Input/output polynomial (256 x int32_t) */ + MLD_ALIGN uint8_t buf_x1[1024]; /* Hint polynomial (256 x int32_t) */ + + for (test_iter = 0; test_iter < MLD_ABICHECK_NUM_TESTS; test_iter++) + { + /* Initialize random register state */ + init_aarch64_register_state(&input_state); + + randombytes(buf_x0, 1024); + randombytes(buf_x1, 1024); + + /* Set up register state for function arguments */ + input_state.gpr[0] = (uint64_t)buf_x0; + input_state.gpr[1] = (uint64_t)buf_x1; + + /* Call function through ABI test stub */ + asm_call_stub_aarch64(&input_state, &output_state, + (void (*)(void))mld_poly_use_hint_88_aarch64_asm); + + /* Check ABI compliance */ + violations = check_aarch64_aapcs_compliance(&input_state, &output_state, + MLD_ABICHECK_VERBOSE); + if (violations > 0) + { + fprintf(stderr, + "ABI test FAILED for poly_use_hint_88_aarch64_asm (iteration " + "%d): %d violations\n", + test_iter + 1, violations); + return MLD_ABICHECK_FAILED; + } + } + + return MLD_ABICHECK_PASSED; +} + +#endif /* MLD_SYS_AARCH64 */ diff --git a/test/abicheck/aarch64/checks/check_polyvecl_pointwise_acc_montgomery_l4_aarch64_asm.c b/test/abicheck/aarch64/checks/check_polyvecl_pointwise_acc_montgomery_l4_aarch64_asm.c new file mode 100644 index 000000000..6739cd34f --- /dev/null +++ b/test/abicheck/aarch64/checks/check_polyvecl_pointwise_acc_montgomery_l4_aarch64_asm.c @@ -0,0 +1,73 @@ +/* + * Copyright (c) The mldsa-native project authors + * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT + */ + +/* + * WARNING: This file is auto-generated from scripts/autogen + * in the mldsa-native repository. + * Do not modify it directly. + */ + +#include + +#include "../abicheck_aarch64.h" +#include "../checks_aarch64_all.h" + +#if defined(MLD_SYS_AARCH64) + +#include "../../../notrandombytes/notrandombytes.h" + +typedef struct aarch64_register_state reg_state; + +void mld_polyvecl_pointwise_acc_montgomery_l4_aarch64_asm( + int32_t *r, const int32_t a[4][256], const int32_t b[4][256]); + +int check_polyvecl_pointwise_acc_montgomery_l4_aarch64_asm(void) +{ + int test_iter; + reg_state input_state, output_state; + int violations; + MLD_ALIGN uint8_t buf_x0[1024]; /* Output polynomial (256 x int32_t) */ + MLD_ALIGN uint8_t + buf_x1[4096]; /* Input polynomial vector a (4 x 256 x int32_t) */ + MLD_ALIGN uint8_t + buf_x2[4096]; /* Input polynomial vector b (4 x 256 x int32_t) */ + + for (test_iter = 0; test_iter < MLD_ABICHECK_NUM_TESTS; test_iter++) + { + /* Initialize random register state */ + init_aarch64_register_state(&input_state); + + randombytes(buf_x0, 1024); + randombytes(buf_x1, 4096); + randombytes(buf_x2, 4096); + + /* Set up register state for function arguments */ + input_state.gpr[0] = (uint64_t)buf_x0; + input_state.gpr[1] = (uint64_t)buf_x1; + input_state.gpr[2] = (uint64_t)buf_x2; + + /* Call function through ABI test stub */ + asm_call_stub_aarch64( + &input_state, &output_state, + (void (*)(void))mld_polyvecl_pointwise_acc_montgomery_l4_aarch64_asm); + + /* Check ABI compliance */ + violations = check_aarch64_aapcs_compliance(&input_state, &output_state, + MLD_ABICHECK_VERBOSE); + if (violations > 0) + { + fprintf(stderr, + "ABI test FAILED for " + "polyvecl_pointwise_acc_montgomery_l4_aarch64_asm (iteration " + "%d): %d violations\n", + test_iter + 1, violations); + return MLD_ABICHECK_FAILED; + } + } + + return MLD_ABICHECK_PASSED; +} + +#endif /* MLD_SYS_AARCH64 */ diff --git a/test/abicheck/aarch64/checks/check_polyvecl_pointwise_acc_montgomery_l5_aarch64_asm.c b/test/abicheck/aarch64/checks/check_polyvecl_pointwise_acc_montgomery_l5_aarch64_asm.c new file mode 100644 index 000000000..9e5142316 --- /dev/null +++ b/test/abicheck/aarch64/checks/check_polyvecl_pointwise_acc_montgomery_l5_aarch64_asm.c @@ -0,0 +1,73 @@ +/* + * Copyright (c) The mldsa-native project authors + * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT + */ + +/* + * WARNING: This file is auto-generated from scripts/autogen + * in the mldsa-native repository. + * Do not modify it directly. + */ + +#include + +#include "../abicheck_aarch64.h" +#include "../checks_aarch64_all.h" + +#if defined(MLD_SYS_AARCH64) + +#include "../../../notrandombytes/notrandombytes.h" + +typedef struct aarch64_register_state reg_state; + +void mld_polyvecl_pointwise_acc_montgomery_l5_aarch64_asm( + int32_t *r, const int32_t a[5][256], const int32_t b[5][256]); + +int check_polyvecl_pointwise_acc_montgomery_l5_aarch64_asm(void) +{ + int test_iter; + reg_state input_state, output_state; + int violations; + MLD_ALIGN uint8_t buf_x0[1024]; /* Output polynomial (256 x int32_t) */ + MLD_ALIGN uint8_t + buf_x1[5120]; /* Input polynomial vector a (5 x 256 x int32_t) */ + MLD_ALIGN uint8_t + buf_x2[5120]; /* Input polynomial vector b (5 x 256 x int32_t) */ + + for (test_iter = 0; test_iter < MLD_ABICHECK_NUM_TESTS; test_iter++) + { + /* Initialize random register state */ + init_aarch64_register_state(&input_state); + + randombytes(buf_x0, 1024); + randombytes(buf_x1, 5120); + randombytes(buf_x2, 5120); + + /* Set up register state for function arguments */ + input_state.gpr[0] = (uint64_t)buf_x0; + input_state.gpr[1] = (uint64_t)buf_x1; + input_state.gpr[2] = (uint64_t)buf_x2; + + /* Call function through ABI test stub */ + asm_call_stub_aarch64( + &input_state, &output_state, + (void (*)(void))mld_polyvecl_pointwise_acc_montgomery_l5_aarch64_asm); + + /* Check ABI compliance */ + violations = check_aarch64_aapcs_compliance(&input_state, &output_state, + MLD_ABICHECK_VERBOSE); + if (violations > 0) + { + fprintf(stderr, + "ABI test FAILED for " + "polyvecl_pointwise_acc_montgomery_l5_aarch64_asm (iteration " + "%d): %d violations\n", + test_iter + 1, violations); + return MLD_ABICHECK_FAILED; + } + } + + return MLD_ABICHECK_PASSED; +} + +#endif /* MLD_SYS_AARCH64 */ diff --git a/test/abicheck/aarch64/checks/check_polyvecl_pointwise_acc_montgomery_l7_aarch64_asm.c b/test/abicheck/aarch64/checks/check_polyvecl_pointwise_acc_montgomery_l7_aarch64_asm.c new file mode 100644 index 000000000..93b1b76b0 --- /dev/null +++ b/test/abicheck/aarch64/checks/check_polyvecl_pointwise_acc_montgomery_l7_aarch64_asm.c @@ -0,0 +1,73 @@ +/* + * Copyright (c) The mldsa-native project authors + * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT + */ + +/* + * WARNING: This file is auto-generated from scripts/autogen + * in the mldsa-native repository. + * Do not modify it directly. + */ + +#include + +#include "../abicheck_aarch64.h" +#include "../checks_aarch64_all.h" + +#if defined(MLD_SYS_AARCH64) + +#include "../../../notrandombytes/notrandombytes.h" + +typedef struct aarch64_register_state reg_state; + +void mld_polyvecl_pointwise_acc_montgomery_l7_aarch64_asm( + int32_t *r, const int32_t a[7][256], const int32_t b[7][256]); + +int check_polyvecl_pointwise_acc_montgomery_l7_aarch64_asm(void) +{ + int test_iter; + reg_state input_state, output_state; + int violations; + MLD_ALIGN uint8_t buf_x0[1024]; /* Output polynomial (256 x int32_t) */ + MLD_ALIGN uint8_t + buf_x1[7168]; /* Input polynomial vector a (7 x 256 x int32_t) */ + MLD_ALIGN uint8_t + buf_x2[7168]; /* Input polynomial vector b (7 x 256 x int32_t) */ + + for (test_iter = 0; test_iter < MLD_ABICHECK_NUM_TESTS; test_iter++) + { + /* Initialize random register state */ + init_aarch64_register_state(&input_state); + + randombytes(buf_x0, 1024); + randombytes(buf_x1, 7168); + randombytes(buf_x2, 7168); + + /* Set up register state for function arguments */ + input_state.gpr[0] = (uint64_t)buf_x0; + input_state.gpr[1] = (uint64_t)buf_x1; + input_state.gpr[2] = (uint64_t)buf_x2; + + /* Call function through ABI test stub */ + asm_call_stub_aarch64( + &input_state, &output_state, + (void (*)(void))mld_polyvecl_pointwise_acc_montgomery_l7_aarch64_asm); + + /* Check ABI compliance */ + violations = check_aarch64_aapcs_compliance(&input_state, &output_state, + MLD_ABICHECK_VERBOSE); + if (violations > 0) + { + fprintf(stderr, + "ABI test FAILED for " + "polyvecl_pointwise_acc_montgomery_l7_aarch64_asm (iteration " + "%d): %d violations\n", + test_iter + 1, violations); + return MLD_ABICHECK_FAILED; + } + } + + return MLD_ABICHECK_PASSED; +} + +#endif /* MLD_SYS_AARCH64 */ diff --git a/test/abicheck/aarch64/checks/check_polyz_unpack_17_aarch64_asm.c b/test/abicheck/aarch64/checks/check_polyz_unpack_17_aarch64_asm.c new file mode 100644 index 000000000..d86d17495 --- /dev/null +++ b/test/abicheck/aarch64/checks/check_polyz_unpack_17_aarch64_asm.c @@ -0,0 +1,69 @@ +/* + * Copyright (c) The mldsa-native project authors + * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT + */ + +/* + * WARNING: This file is auto-generated from scripts/autogen + * in the mldsa-native repository. + * Do not modify it directly. + */ + +#include + +#include "../abicheck_aarch64.h" +#include "../checks_aarch64_all.h" + +#if defined(MLD_SYS_AARCH64) + +#include "../../../notrandombytes/notrandombytes.h" + +typedef struct aarch64_register_state reg_state; + +void mld_polyz_unpack_17_aarch64_asm(int32_t *r, const uint8_t *buf, + const uint8_t *indices); + +int check_polyz_unpack_17_aarch64_asm(void) +{ + int test_iter; + reg_state input_state, output_state; + int violations; + MLD_ALIGN uint8_t buf_x0[1024]; /* Output polynomial (256 x int32_t) */ + MLD_ALIGN uint8_t buf_x1[576]; /* Packed input bytes */ + MLD_ALIGN uint8_t buf_x2[64]; /* Permutation index table (64 x uint8_t) */ + + for (test_iter = 0; test_iter < MLD_ABICHECK_NUM_TESTS; test_iter++) + { + /* Initialize random register state */ + init_aarch64_register_state(&input_state); + + randombytes(buf_x0, 1024); + randombytes(buf_x1, 576); + randombytes(buf_x2, 64); + + /* Set up register state for function arguments */ + input_state.gpr[0] = (uint64_t)buf_x0; + input_state.gpr[1] = (uint64_t)buf_x1; + input_state.gpr[2] = (uint64_t)buf_x2; + + /* Call function through ABI test stub */ + asm_call_stub_aarch64(&input_state, &output_state, + (void (*)(void))mld_polyz_unpack_17_aarch64_asm); + + /* Check ABI compliance */ + violations = check_aarch64_aapcs_compliance(&input_state, &output_state, + MLD_ABICHECK_VERBOSE); + if (violations > 0) + { + fprintf(stderr, + "ABI test FAILED for polyz_unpack_17_aarch64_asm (iteration %d): " + "%d violations\n", + test_iter + 1, violations); + return MLD_ABICHECK_FAILED; + } + } + + return MLD_ABICHECK_PASSED; +} + +#endif /* MLD_SYS_AARCH64 */ diff --git a/test/abicheck/aarch64/checks/check_polyz_unpack_19_aarch64_asm.c b/test/abicheck/aarch64/checks/check_polyz_unpack_19_aarch64_asm.c new file mode 100644 index 000000000..4b6ecc65c --- /dev/null +++ b/test/abicheck/aarch64/checks/check_polyz_unpack_19_aarch64_asm.c @@ -0,0 +1,69 @@ +/* + * Copyright (c) The mldsa-native project authors + * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT + */ + +/* + * WARNING: This file is auto-generated from scripts/autogen + * in the mldsa-native repository. + * Do not modify it directly. + */ + +#include + +#include "../abicheck_aarch64.h" +#include "../checks_aarch64_all.h" + +#if defined(MLD_SYS_AARCH64) + +#include "../../../notrandombytes/notrandombytes.h" + +typedef struct aarch64_register_state reg_state; + +void mld_polyz_unpack_19_aarch64_asm(int32_t *r, const uint8_t *buf, + const uint8_t *indices); + +int check_polyz_unpack_19_aarch64_asm(void) +{ + int test_iter; + reg_state input_state, output_state; + int violations; + MLD_ALIGN uint8_t buf_x0[1024]; /* Output polynomial (256 x int32_t) */ + MLD_ALIGN uint8_t buf_x1[640]; /* Packed input bytes */ + MLD_ALIGN uint8_t buf_x2[64]; /* Permutation index table (64 x uint8_t) */ + + for (test_iter = 0; test_iter < MLD_ABICHECK_NUM_TESTS; test_iter++) + { + /* Initialize random register state */ + init_aarch64_register_state(&input_state); + + randombytes(buf_x0, 1024); + randombytes(buf_x1, 640); + randombytes(buf_x2, 64); + + /* Set up register state for function arguments */ + input_state.gpr[0] = (uint64_t)buf_x0; + input_state.gpr[1] = (uint64_t)buf_x1; + input_state.gpr[2] = (uint64_t)buf_x2; + + /* Call function through ABI test stub */ + asm_call_stub_aarch64(&input_state, &output_state, + (void (*)(void))mld_polyz_unpack_19_aarch64_asm); + + /* Check ABI compliance */ + violations = check_aarch64_aapcs_compliance(&input_state, &output_state, + MLD_ABICHECK_VERBOSE); + if (violations > 0) + { + fprintf(stderr, + "ABI test FAILED for polyz_unpack_19_aarch64_asm (iteration %d): " + "%d violations\n", + test_iter + 1, violations); + return MLD_ABICHECK_FAILED; + } + } + + return MLD_ABICHECK_PASSED; +} + +#endif /* MLD_SYS_AARCH64 */ diff --git a/test/abicheck/aarch64/checks/check_rej_uniform_aarch64_asm.c b/test/abicheck/aarch64/checks/check_rej_uniform_aarch64_asm.c new file mode 100644 index 000000000..2d4375dae --- /dev/null +++ b/test/abicheck/aarch64/checks/check_rej_uniform_aarch64_asm.c @@ -0,0 +1,70 @@ +/* + * Copyright (c) The mldsa-native project authors + * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT + */ + +/* + * WARNING: This file is auto-generated from scripts/autogen + * in the mldsa-native repository. + * Do not modify it directly. + */ + +#include + +#include "../abicheck_aarch64.h" +#include "../checks_aarch64_all.h" + +#if defined(MLD_SYS_AARCH64) + +#include "../../../notrandombytes/notrandombytes.h" + +typedef struct aarch64_register_state reg_state; + +uint64_t mld_rej_uniform_aarch64_asm(int32_t *r, const uint8_t *buf, + unsigned buflen, const uint8_t *table); + +int check_rej_uniform_aarch64_asm(void) +{ + int test_iter; + reg_state input_state, output_state; + int violations; + MLD_ALIGN uint8_t buf_x0[1024]; /* Output buffer (256 x int32_t) */ + MLD_ALIGN uint8_t buf_x1[840]; /* Input buffer */ + MLD_ALIGN uint8_t buf_x3[256]; /* Lookup table (256 x uint8_t) */ + + for (test_iter = 0; test_iter < MLD_ABICHECK_NUM_TESTS; test_iter++) + { + /* Initialize random register state */ + init_aarch64_register_state(&input_state); + + randombytes(buf_x0, 1024); + randombytes(buf_x1, 840); + randombytes(buf_x3, 256); + + /* Set up register state for function arguments */ + input_state.gpr[0] = (uint64_t)buf_x0; + input_state.gpr[1] = (uint64_t)buf_x1; + input_state.gpr[2] = 840; + input_state.gpr[3] = (uint64_t)buf_x3; + + /* Call function through ABI test stub */ + asm_call_stub_aarch64(&input_state, &output_state, + (void (*)(void))mld_rej_uniform_aarch64_asm); + + /* Check ABI compliance */ + violations = check_aarch64_aapcs_compliance(&input_state, &output_state, + MLD_ABICHECK_VERBOSE); + if (violations > 0) + { + fprintf(stderr, + "ABI test FAILED for rej_uniform_aarch64_asm (iteration %d): %d " + "violations\n", + test_iter + 1, violations); + return MLD_ABICHECK_FAILED; + } + } + + return MLD_ABICHECK_PASSED; +} + +#endif /* MLD_SYS_AARCH64 */ diff --git a/test/abicheck/aarch64/checks/check_rej_uniform_eta2_aarch64_asm.c b/test/abicheck/aarch64/checks/check_rej_uniform_eta2_aarch64_asm.c new file mode 100644 index 000000000..fdca38dec --- /dev/null +++ b/test/abicheck/aarch64/checks/check_rej_uniform_eta2_aarch64_asm.c @@ -0,0 +1,71 @@ +/* + * Copyright (c) The mldsa-native project authors + * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT + */ + +/* + * WARNING: This file is auto-generated from scripts/autogen + * in the mldsa-native repository. + * Do not modify it directly. + */ + +#include + +#include "../abicheck_aarch64.h" +#include "../checks_aarch64_all.h" + +#if defined(MLD_SYS_AARCH64) + +#include "../../../notrandombytes/notrandombytes.h" + +typedef struct aarch64_register_state reg_state; + +uint64_t mld_rej_uniform_eta2_aarch64_asm(int32_t *r, const uint8_t *buf, + unsigned buflen, + const uint8_t *table); + +int check_rej_uniform_eta2_aarch64_asm(void) +{ + int test_iter; + reg_state input_state, output_state; + int violations; + MLD_ALIGN uint8_t buf_x0[1024]; /* Output buffer (256 x int32_t) */ + MLD_ALIGN uint8_t buf_x1[136]; /* Input buffer */ + MLD_ALIGN uint8_t buf_x3[4096]; /* Lookup table (4096 x uint8_t) */ + + for (test_iter = 0; test_iter < MLD_ABICHECK_NUM_TESTS; test_iter++) + { + /* Initialize random register state */ + init_aarch64_register_state(&input_state); + + randombytes(buf_x0, 1024); + randombytes(buf_x1, 136); + randombytes(buf_x3, 4096); + + /* Set up register state for function arguments */ + input_state.gpr[0] = (uint64_t)buf_x0; + input_state.gpr[1] = (uint64_t)buf_x1; + input_state.gpr[2] = 136; + input_state.gpr[3] = (uint64_t)buf_x3; + + /* Call function through ABI test stub */ + asm_call_stub_aarch64(&input_state, &output_state, + (void (*)(void))mld_rej_uniform_eta2_aarch64_asm); + + /* Check ABI compliance */ + violations = check_aarch64_aapcs_compliance(&input_state, &output_state, + MLD_ABICHECK_VERBOSE); + if (violations > 0) + { + fprintf(stderr, + "ABI test FAILED for rej_uniform_eta2_aarch64_asm (iteration " + "%d): %d violations\n", + test_iter + 1, violations); + return MLD_ABICHECK_FAILED; + } + } + + return MLD_ABICHECK_PASSED; +} + +#endif /* MLD_SYS_AARCH64 */ diff --git a/test/abicheck/aarch64/checks/check_rej_uniform_eta4_aarch64_asm.c b/test/abicheck/aarch64/checks/check_rej_uniform_eta4_aarch64_asm.c new file mode 100644 index 000000000..7c38db632 --- /dev/null +++ b/test/abicheck/aarch64/checks/check_rej_uniform_eta4_aarch64_asm.c @@ -0,0 +1,71 @@ +/* + * Copyright (c) The mldsa-native project authors + * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT + */ + +/* + * WARNING: This file is auto-generated from scripts/autogen + * in the mldsa-native repository. + * Do not modify it directly. + */ + +#include + +#include "../abicheck_aarch64.h" +#include "../checks_aarch64_all.h" + +#if defined(MLD_SYS_AARCH64) + +#include "../../../notrandombytes/notrandombytes.h" + +typedef struct aarch64_register_state reg_state; + +uint64_t mld_rej_uniform_eta4_aarch64_asm(int32_t *r, const uint8_t *buf, + unsigned buflen, + const uint8_t *table); + +int check_rej_uniform_eta4_aarch64_asm(void) +{ + int test_iter; + reg_state input_state, output_state; + int violations; + MLD_ALIGN uint8_t buf_x0[1024]; /* Output buffer (256 x int32_t) */ + MLD_ALIGN uint8_t buf_x1[272]; /* Input buffer */ + MLD_ALIGN uint8_t buf_x3[4096]; /* Lookup table (4096 x uint8_t) */ + + for (test_iter = 0; test_iter < MLD_ABICHECK_NUM_TESTS; test_iter++) + { + /* Initialize random register state */ + init_aarch64_register_state(&input_state); + + randombytes(buf_x0, 1024); + randombytes(buf_x1, 272); + randombytes(buf_x3, 4096); + + /* Set up register state for function arguments */ + input_state.gpr[0] = (uint64_t)buf_x0; + input_state.gpr[1] = (uint64_t)buf_x1; + input_state.gpr[2] = 272; + input_state.gpr[3] = (uint64_t)buf_x3; + + /* Call function through ABI test stub */ + asm_call_stub_aarch64(&input_state, &output_state, + (void (*)(void))mld_rej_uniform_eta4_aarch64_asm); + + /* Check ABI compliance */ + violations = check_aarch64_aapcs_compliance(&input_state, &output_state, + MLD_ABICHECK_VERBOSE); + if (violations > 0) + { + fprintf(stderr, + "ABI test FAILED for rej_uniform_eta4_aarch64_asm (iteration " + "%d): %d violations\n", + test_iter + 1, violations); + return MLD_ABICHECK_FAILED; + } + } + + return MLD_ABICHECK_PASSED; +} + +#endif /* MLD_SYS_AARCH64 */ diff --git a/test/abicheck/aarch64/checks_aarch64_all.h b/test/abicheck/aarch64/checks_aarch64_all.h new file mode 100644 index 000000000..729a53ca4 --- /dev/null +++ b/test/abicheck/aarch64/checks_aarch64_all.h @@ -0,0 +1,89 @@ +/* + * Copyright (c) The mldsa-native project authors + * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT + */ + +/* + * WARNING: This file is auto-generated from scripts/autogen + * in the mldsa-native repository. + * Do not modify it directly. + */ + + +#ifndef MLD_TEST_ABICHECK_CHECKS_AARCH64_ALL_H +#define MLD_TEST_ABICHECK_CHECKS_AARCH64_ALL_H + +#include +#include "../abicheck_common.h" + +#if defined(MLD_SYS_AARCH64) + +int check_intt_aarch64_asm(void); +int check_keccak_f1600_x1_scalar_aarch64_asm(void); +#if defined(__ARM_FEATURE_SHA3) +int check_keccak_f1600_x1_v84a_aarch64_asm(void); +int check_keccak_f1600_x2_v84a_aarch64_asm(void); +#endif +int check_keccak_f1600_x4_v8a_scalar_hybrid_aarch64_asm(void); +#if defined(__ARM_FEATURE_SHA3) +int check_keccak_f1600_x4_v8a_v84a_scalar_hybrid_aarch64_asm(void); +#endif +int check_polyvecl_pointwise_acc_montgomery_l4_aarch64_asm(void); +int check_polyvecl_pointwise_acc_montgomery_l5_aarch64_asm(void); +int check_polyvecl_pointwise_acc_montgomery_l7_aarch64_asm(void); +int check_ntt_aarch64_asm(void); +int check_poly_pointwise_montgomery_aarch64_asm(void); +int check_poly_caddq_aarch64_asm(void); +int check_poly_chknorm_aarch64_asm(void); +int check_poly_decompose_32_aarch64_asm(void); +int check_poly_decompose_88_aarch64_asm(void); +int check_poly_use_hint_32_aarch64_asm(void); +int check_poly_use_hint_88_aarch64_asm(void); +int check_polyz_unpack_17_aarch64_asm(void); +int check_polyz_unpack_19_aarch64_asm(void); +int check_rej_uniform_aarch64_asm(void); +int check_rej_uniform_eta2_aarch64_asm(void); +int check_rej_uniform_eta4_aarch64_asm(void); + +static const abicheck_entry_t all_checks[] = { + {"intt_aarch64_asm", check_intt_aarch64_asm}, + {"keccak_f1600_x1_scalar_aarch64_asm", + check_keccak_f1600_x1_scalar_aarch64_asm}, +#if defined(__ARM_FEATURE_SHA3) + {"keccak_f1600_x1_v84a_aarch64_asm", + check_keccak_f1600_x1_v84a_aarch64_asm}, + {"keccak_f1600_x2_v84a_aarch64_asm", + check_keccak_f1600_x2_v84a_aarch64_asm}, +#endif + {"keccak_f1600_x4_v8a_scalar_hybrid_aarch64_asm", + check_keccak_f1600_x4_v8a_scalar_hybrid_aarch64_asm}, +#if defined(__ARM_FEATURE_SHA3) + {"keccak_f1600_x4_v8a_v84a_scalar_hybrid_aarch64_asm", + check_keccak_f1600_x4_v8a_v84a_scalar_hybrid_aarch64_asm}, +#endif + {"polyvecl_pointwise_acc_montgomery_l4_aarch64_asm", + check_polyvecl_pointwise_acc_montgomery_l4_aarch64_asm}, + {"polyvecl_pointwise_acc_montgomery_l5_aarch64_asm", + check_polyvecl_pointwise_acc_montgomery_l5_aarch64_asm}, + {"polyvecl_pointwise_acc_montgomery_l7_aarch64_asm", + check_polyvecl_pointwise_acc_montgomery_l7_aarch64_asm}, + {"ntt_aarch64_asm", check_ntt_aarch64_asm}, + {"poly_pointwise_montgomery_aarch64_asm", + check_poly_pointwise_montgomery_aarch64_asm}, + {"poly_caddq_aarch64_asm", check_poly_caddq_aarch64_asm}, + {"poly_chknorm_aarch64_asm", check_poly_chknorm_aarch64_asm}, + {"poly_decompose_32_aarch64_asm", check_poly_decompose_32_aarch64_asm}, + {"poly_decompose_88_aarch64_asm", check_poly_decompose_88_aarch64_asm}, + {"poly_use_hint_32_aarch64_asm", check_poly_use_hint_32_aarch64_asm}, + {"poly_use_hint_88_aarch64_asm", check_poly_use_hint_88_aarch64_asm}, + {"polyz_unpack_17_aarch64_asm", check_polyz_unpack_17_aarch64_asm}, + {"polyz_unpack_19_aarch64_asm", check_polyz_unpack_19_aarch64_asm}, + {"rej_uniform_aarch64_asm", check_rej_uniform_aarch64_asm}, + {"rej_uniform_eta2_aarch64_asm", check_rej_uniform_eta2_aarch64_asm}, + {"rej_uniform_eta4_aarch64_asm", check_rej_uniform_eta4_aarch64_asm}, + {NULL, NULL} /* Sentinel */ +}; + +#endif /* MLD_SYS_AARCH64 */ + +#endif /* !MLD_TEST_ABICHECK_CHECKS_AARCH64_ALL_H */ diff --git a/test/abicheck/aarch64/selftest_aarch64.S b/test/abicheck/aarch64/selftest_aarch64.S new file mode 100644 index 000000000..a66a28385 --- /dev/null +++ b/test/abicheck/aarch64/selftest_aarch64.S @@ -0,0 +1,73 @@ +/* + * Copyright (c) The mlkem-native project authors + * Copyright (c) The mldsa-native project authors + * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT + */ + +/* + * AArch64 selftest stubs. + * + * Each function clobbers exactly one callee-saved register and returns: + * `mvn x,x` for GPRs and `fneg d,d` for D-regs. Both unconditionally flip + * a bit (mvn inverts all bits; fneg inverts bit 63 even for zero/NaN), so + * the integer-compare in check_aarch64_aapcs_compliance always fires. The + * noop produces zero violations. + */ + +#if defined(__ELF__) +.section .note.GNU-stack,"",@progbits +#endif + +.text +.balign 4 + +/* Emit a global label (Apple wants a leading underscore) and a one-line + * body. Mirrors the .macro use in callstub_aarch64.S and the corrupter + * macros in selftest_armv81m.S. */ +.macro define_corrupter name, body +#ifdef __APPLE__ +.global _\name +_\name: +#else +.global \name +\name: +#endif + \body + ret +.endm + +define_corrupter selftest_aarch64_noop + +/* x18 is the platform register on Darwin (reserved by the OS, must not be + * touched) and unused on ELF/Linux. We still expect every kernel-under-test + * to leave it alone, so the compliance check verifies preservation; this + * selftest ensures the checker actually fires when x18 is clobbered. + * Skipped on Apple (the consumer in selftest.c also guards out the extern + * decl and the registry slot under !__APPLE__) since we can't safely mvn it. */ +#if !defined(__APPLE__) +define_corrupter selftest_aarch64_corrupt_x18, "mvn x18, x18" +#endif + +/* GPRs: callee-saved set is x19-x29. */ +define_corrupter selftest_aarch64_corrupt_x19, "mvn x19, x19" +define_corrupter selftest_aarch64_corrupt_x20, "mvn x20, x20" +define_corrupter selftest_aarch64_corrupt_x21, "mvn x21, x21" +define_corrupter selftest_aarch64_corrupt_x22, "mvn x22, x22" +define_corrupter selftest_aarch64_corrupt_x23, "mvn x23, x23" +define_corrupter selftest_aarch64_corrupt_x24, "mvn x24, x24" +define_corrupter selftest_aarch64_corrupt_x25, "mvn x25, x25" +define_corrupter selftest_aarch64_corrupt_x26, "mvn x26, x26" +define_corrupter selftest_aarch64_corrupt_x27, "mvn x27, x27" +define_corrupter selftest_aarch64_corrupt_x28, "mvn x28, x28" +define_corrupter selftest_aarch64_corrupt_x29, "mvn x29, x29" + +/* AAPCS64 preserves only the low 64 bits of v8-v15; fneg d touches just the + * sign bit of that range. */ +define_corrupter selftest_aarch64_corrupt_d8, "fneg d8, d8" +define_corrupter selftest_aarch64_corrupt_d9, "fneg d9, d9" +define_corrupter selftest_aarch64_corrupt_d10, "fneg d10, d10" +define_corrupter selftest_aarch64_corrupt_d11, "fneg d11, d11" +define_corrupter selftest_aarch64_corrupt_d12, "fneg d12, d12" +define_corrupter selftest_aarch64_corrupt_d13, "fneg d13, d13" +define_corrupter selftest_aarch64_corrupt_d14, "fneg d14, d14" +define_corrupter selftest_aarch64_corrupt_d15, "fneg d15, d15" diff --git a/test/abicheck/abicheck.c b/test/abicheck/abicheck.c new file mode 100644 index 000000000..8b0350193 --- /dev/null +++ b/test/abicheck/abicheck.c @@ -0,0 +1,67 @@ +/* + * Copyright (c) The mlkem-native project authors + * Copyright (c) The mldsa-native project authors + * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT + */ + +#include +#include "abicheck_common.h" +#if defined(MLD_SYS_AARCH64) +#include "aarch64/checks_aarch64_all.h" +#elif defined(MLD_SYS_X86_64) && defined(MLD_SYSV_ABI_SUPPORTED) +#include "x86_64/checks_x86_64_all.h" +#elif defined(MLD_SYS_ARMV81M_MVE) +#include "armv81m/checks_armv81m_all.h" +#else +/* No abicheck support on this architecture - empty registry, driver runs zero + * kernel checks (selftest still runs). */ +static const abicheck_entry_t all_checks[] = {{NULL, NULL}}; +#endif +#include "selftest.h" + +/* Return-code convention: see abicheck_common.h. SKIPPED means the kernel + * built but the host lacks the runtime capability; the generated check + * decides this via mld_sys_check_capability. */ +int main(void) +{ + int failed_tests = 0; + int selftest_failures; + const abicheck_entry_t *entry; + + /* Meta-test the ABI checker before trusting kernel verdicts (see selftest.h). + */ + selftest_failures = abicheck_selftest(); + if (selftest_failures > 0) + { + fprintf(stderr, "abicheck selftest FAILED (%d failure(s)); aborting.\n", + selftest_failures); + return 1; + } + printf("ABI checker selftest... PASSED\n"); + + /* all_checks comes from checks__all.h (sentinel-only on unsupported + * archs, in which case this loop is a no-op). */ + for (entry = all_checks; entry->name != NULL; entry++) + { + int rc = entry->check_func(); + if (rc == MLD_ABICHECK_PASSED) + { + printf("ABI check for %s... PASSED\n", entry->name); + } + else if (rc == MLD_ABICHECK_SKIPPED) + { + printf("ABI check for %s... SKIPPED\n", entry->name); + } + else + { + printf("ABI check for %s... FAILED\n", entry->name); + failed_tests++; + } + } + + if (failed_tests > 0) + { + return 1; + } + return 0; +} diff --git a/test/abicheck/abicheck.mk b/test/abicheck/abicheck.mk new file mode 100644 index 000000000..4bb98c6b3 --- /dev/null +++ b/test/abicheck/abicheck.mk @@ -0,0 +1,13 @@ +# Copyright (c) The mldsa-native project authors +# SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT + +# WARNING: This file is auto-generated from scripts/autogen +# in the mldsa-native repository. +# Do not modify it directly. +# +# Includes the per-arch abicheck_.mk, each of which appends +# its capabilities' CFLAGS to the matching .S objects. + +include test/abicheck/aarch64/abicheck_aarch64.mk +include test/abicheck/armv81m/abicheck_armv81m.mk +include test/abicheck/x86_64/abicheck_x86_64.mk diff --git a/test/abicheck/abicheck_common.h b/test/abicheck/abicheck_common.h new file mode 100644 index 000000000..67a8052c2 --- /dev/null +++ b/test/abicheck/abicheck_common.h @@ -0,0 +1,72 @@ +/* + * Copyright (c) The mlkem-native project authors + * Copyright (c) The mldsa-native project authors + * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT + */ + +#ifndef MLD_TEST_ABICHECK_ABICHECK_COMMON_H +#define MLD_TEST_ABICHECK_ABICHECK_COMMON_H + +#include + +/* + * Resolve the build config and sys.h. Defines MLD_BUILD_INTERNAL (so + * MLD_CONFIG_FILE's MLD_CONFIG_CUSTOM_CAPABILITY_FUNC body, gated on it, is + * visible) and pulls in the resolved config + sys.h. We don't include + * common.h: it would drag in backend constant tables that older compilers do + * not drop, forcing them into the abicheck link. + * + * Paths use the project's -Imldsa search path (set in test/mk/components.mk) + * so this header is includable from any depth under test/abicheck/. + */ +#define MLD_BUILD_INTERNAL +#if defined(MLD_CONFIG_FILE) +#include MLD_CONFIG_FILE +#else +#include "mldsa_native_config.h" +#endif +#include "src/sys.h" + +/* Return codes for check_(), shared with abicheck.c. */ +#define MLD_ABICHECK_PASSED 0 /* No violation in any iteration. */ +#define MLD_ABICHECK_SKIPPED 1 /* Host lacks the required ISA capability. */ +#define MLD_ABICHECK_FAILED \ + (-1) /* Violation observed, or arch unsupported. \ + */ + +/* Randomized trials per kernel; each trial reseeds the register state and + * pointer-arg buffers. */ +#define MLD_ABICHECK_NUM_TESTS 100 + +/* Quiet suppresses the per-violation diagnostic (used by the selftest, whose + * corrupters always fire). */ +#define MLD_ABICHECK_VERBOSE 0 +#define MLD_ABICHECK_QUIET 1 + +/* If quiet, suppress the per-register diagnostic. Non-variadic to stay + * C90-clean under -pedantic; fixed names are passed via "%s". */ +#define MLD_ABI_VIOLATION(quiet, fmt, arg) \ + do \ + { \ + if (!(quiet)) \ + { \ + fprintf(stderr, "ABI violation: " fmt, (arg)); \ + } \ + } while (0) + +/* Registry entry shared by all per-arch checks_{arch}_all.h headers and + * consumed by abicheck.c. */ +typedef struct +{ + const char *name; + int (*check_func)(void); +} abicheck_entry_t; + +/* The per-arch register-state struct and its call-stub / compliance / init + * declarations live in test/abicheck//abicheck_.h, with the + * compliance/init implementations in the matching abicheck_.c. The + * generated check_*.c include their own arch's header; selftest.c, which + * dispatches across all arches, includes them all directly. The driver + * abicheck.c needs only the shared definitions above, so it pulls in none. */ + +#endif /* !MLD_TEST_ABICHECK_ABICHECK_COMMON_H */ diff --git a/test/abicheck/armv81m/abicheck_armv81m.c b/test/abicheck/armv81m/abicheck_armv81m.c new file mode 100644 index 000000000..6023a5dd6 --- /dev/null +++ b/test/abicheck/armv81m/abicheck_armv81m.c @@ -0,0 +1,68 @@ +/* + * Copyright (c) The mlkem-native project authors + * Copyright (c) The mldsa-native project authors + * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT + */ + +/* References + * ========== + * + * - [AAPCS32] + * Procedure Call Standard for the Arm Architecture (AAPCS32) + * Arm Limited + * https://github.com/ARM-software/abi-aa/blob/main/aapcs32/aapcs32.rst + * + * - [ArmARMv8M] + * Armv8-M Architecture Reference Manual (DDI 0553) + * Arm Limited + * https://developer.arm.com/documentation/ddi0553/latest/ + */ + +#include + +#include "../../notrandombytes/notrandombytes.h" +#include "abicheck_armv81m.h" + +#if defined(MLD_SYS_ARMV81M_MVE) + +/* Callee-saved set per @[AAPCS32, Section 5.1 "Machine Registers"]; MVE + * Q-register file per @[ArmARMv8M, Section B7]. */ +int check_armv81m_aapcs32_compliance(struct armv81m_register_state *before, + struct armv81m_register_state *after, + int quiet) +{ + int violations = 0; + int i; + + /* Callee-saved GPRs r4-r11 (AAPCS32). r12=IP and r0-r3 are caller-saved. */ + for (i = 4; i <= 11; i++) + { + if (before->gpr[i] != after->gpr[i]) + { + MLD_ABI_VIOLATION(quiet, "r%d modified\n", i); + violations++; + } + } + + /* Callee-saved MVE Q-registers q4-q7 (= d8-d15). Compare full 128 bits. */ + for (i = 4; i <= 7; i++) + { + if (before->mve[i][0] != after->mve[i][0] || + before->mve[i][1] != after->mve[i][1] || + before->mve[i][2] != after->mve[i][2] || + before->mve[i][3] != after->mve[i][3]) + { + MLD_ABI_VIOLATION(quiet, "q%d modified\n", i); + violations++; + } + } + + return violations; +} + +void init_armv81m_register_state(struct armv81m_register_state *state) +{ + randombytes((uint8_t *)state, sizeof(*state)); +} + +#endif /* MLD_SYS_ARMV81M_MVE */ diff --git a/test/abicheck/armv81m/abicheck_armv81m.h b/test/abicheck/armv81m/abicheck_armv81m.h new file mode 100644 index 000000000..cc8c64b35 --- /dev/null +++ b/test/abicheck/armv81m/abicheck_armv81m.h @@ -0,0 +1,42 @@ +/* + * Copyright (c) The mlkem-native project authors + * Copyright (c) The mldsa-native project authors + * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT + */ + +#ifndef MLD_TEST_ABICHECK_ABICHECK_ARMV81M_H +#define MLD_TEST_ABICHECK_ABICHECK_ARMV81M_H + +#include "../abicheck_common.h" + +#if defined(MLD_SYS_ARMV81M_MVE) + +/* Armv8.1-M (Thumb-2, AAPCS32) register state. + * + * Layout must match callstub_armv81m.S. Covers the full GPR set r0-r12 (so + * the stub can seed every available register with random data) plus all + * eight 128-bit MVE Q-registers q0-q7 (= d0-d15 on the floating-point side). + * AAPCS32 callee-saved set: r4-r11 plus q4-q7 (= d8-d15). + * + * gpr is sized to 16 entries so mve[] (which follows) is 16-byte aligned; + * slots 13..15 are unused. The struct is MLD_ALIGN-aligned so the stub's + * 128-bit vldrw.u32 / vstrw.32 are aligned. */ +struct MLD_ALIGN armv81m_register_state +{ + uint32_t gpr[16]; /* r0-r12 in slots 0..12; slots 13..15 are padding */ + uint32_t mve[8][4]; /* q0-q7 (full 128-bit MVE registers as four 32-bit + lanes each) */ +}; + +int check_armv81m_aapcs32_compliance(struct armv81m_register_state *before, + struct armv81m_register_state *after, + int quiet); +void init_armv81m_register_state(struct armv81m_register_state *state); + +extern void asm_call_stub_armv81m(struct armv81m_register_state *input, + struct armv81m_register_state *output, + void (*function_ptr)(void)); + +#endif /* MLD_SYS_ARMV81M_MVE */ + +#endif /* !MLD_TEST_ABICHECK_ABICHECK_ARMV81M_H */ diff --git a/test/abicheck/armv81m/abicheck_armv81m.mk b/test/abicheck/armv81m/abicheck_armv81m.mk new file mode 100644 index 000000000..0029b6457 --- /dev/null +++ b/test/abicheck/armv81m/abicheck_armv81m.mk @@ -0,0 +1,27 @@ +# Copyright (c) The mldsa-native project authors +# SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT + +# WARNING: This file is auto-generated from scripts/autogen +# in the mldsa-native repository. +# Do not modify it directly. +# +# Edit the YAML 'ABI.Features:' list in dev//src/.S +# and re-run scripts/autogen instead. +# +# For each capability declared by a kernel's ABI.Features list, this +# file appends the capability's CFLAGS to that kernel's .S object +# under mldsa/src/. + +# Default each cap's file list to empty so the unconditional appends +# below are safe even when a cap has no kernels on this arch. +ABICHECK_REQ_MVE_FILES := + +# MVE: Armv8.1-M MVE +ABICHECK_REQ_MVE_FILES := \ + mldsa/src/fips202/native/armv81m/src/keccak_f1600_x4_mve.S \ + mldsa/src/fips202/native/armv81m/src/state_extract_bytes_x4_mve.S \ + mldsa/src/fips202/native/armv81m/src/state_xor_bytes_x4_mve.S \ + test/abicheck/armv81m/callstub_armv81m.S \ + test/abicheck/armv81m/selftest_armv81m.S +ABICHECK_REQ_MVE_OBJS := $(call MAKE_OBJS,$(ABICHECK_DIR),$(ABICHECK_REQ_MVE_FILES)) +$(ABICHECK_REQ_MVE_OBJS): CFLAGS += -march=armv8.1-m.main+mve -mthumb diff --git a/test/abicheck/armv81m/callstub_armv81m.S b/test/abicheck/armv81m/callstub_armv81m.S new file mode 100644 index 000000000..cdc85bb3c --- /dev/null +++ b/test/abicheck/armv81m/callstub_armv81m.S @@ -0,0 +1,184 @@ +/* + * Copyright (c) The mlkem-native project authors + * Copyright (c) The mldsa-native project authors + * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT + */ + +#if defined(__ELF__) +.section .note.GNU-stack,"",%progbits +#endif + +/* + * Function: asm_call_stub_armv81m + * Description: Armv8.1-M (Thumb-2, AAPCS32) ABI compliance testing stub. + * Captures the full GPR / MVE register state before and after a + * call to verify the callee preserves the AAPCS32 callee-saved + * set. + * + * C signature: + * void asm_call_stub_armv81m(struct armv81m_register_state *input, + * struct armv81m_register_state *output, + * void (*function_ptr)(void)); + * + * AAPCS32 callee-saved registers we preserve for our own caller: + * - r4-r11 + * - lr (we use it ourselves, save/restore via push/pop) + * - Q4-Q7 (= D8-D15) + * (r12 is caller-saved (IP); r13 is sp; we never touch r13 except for stack + * management.) + * + * All of r0-r12 are seeded from input->gpr[]: the kernel itself decides + * which of r0-r3 are arguments and which of r4-r11 it must preserve, so + * the stub seeds every register and lets the C side check what AAPCS32 + * actually required. + * + * Stack layout once the prologue is complete: + * + * sp + 0: output_ptr (push {r1, r2}) + * sp + 4: function_ptr (push {r1, r2}) + * sp + 8..71 : d8..d15 (vpush {d8-d15}) + * sp + 72..111 : r4..r12, lr (push {r4-r12, lr}) + * + * After the inner blx, before we restore our own state, we additionally + * spill r0-r12 with stmdb to free a scratch register for the output stores. + */ + +#define LOC_OUTPUT_PTR 0 +#define LOC_FUNCTION_PTR 4 +#define LOCALS_SIZE 8 + +/* Offset of mve[] inside struct armv81m_register_state. Must stay in sync + * with abicheck_armv81m.h. */ +#define STATE_MVE_OFFSET 64 + +/* Width in bytes of the post-blx `stmdb sp!, {r0-r12}` spill (13 GPRs * 4). + * After that spill the original locals at sp+LOC_* are this much further down + * the stack, so we re-read output_ptr at sp+(52 + LOC_OUTPUT_PTR). */ +#define LOCALS_OFFSET_AFTER_GPR_SPILL (52 + LOC_OUTPUT_PTR) + +.text +.balign 4 +.thumb +.syntax unified +.global asm_call_stub_armv81m +.type asm_call_stub_armv81m, %function +asm_call_stub_armv81m: + .cfi_startproc + /* --- Prologue: save our own callee-saved registers and stash args --- */ + push {r4-r12, lr} + .cfi_adjust_cfa_offset 40 + .cfi_offset r4, -40 + .cfi_offset r5, -36 + .cfi_offset r6, -32 + .cfi_offset r7, -28 + .cfi_offset r8, -24 + .cfi_offset r9, -20 + .cfi_offset r10, -16 + .cfi_offset r11, -12 + .cfi_offset r12, -8 + .cfi_offset lr, -4 + vpush {d8-d15} + .cfi_adjust_cfa_offset 64 + push {r1, r2} /* output_ptr at sp+0, function_ptr at sp+4 */ + .cfi_adjust_cfa_offset 8 + + /* --- Load Q0-Q7 from input->mve[] --- */ + /* r1 already saved on the stack, free to clobber. */ + add r1, r0, #STATE_MVE_OFFSET + vldrw.u32 q0, [r1, #(0 * 16)] + vldrw.u32 q1, [r1, #(1 * 16)] + vldrw.u32 q2, [r1, #(2 * 16)] + vldrw.u32 q3, [r1, #(3 * 16)] + vldrw.u32 q4, [r1, #(4 * 16)] + vldrw.u32 q5, [r1, #(5 * 16)] + vldrw.u32 q6, [r1, #(6 * 16)] + vldrw.u32 q7, [r1, #(7 * 16)] + + /* --- Load r0-r12 from input->gpr[] --- */ + /* r0 is currently the input-state pointer; load r1..r12 via offset + * addressing relative to r0, then overwrite r0 itself last. We avoid + * `ldm rN, {rN, ...}` because Thumb-2 deprecates having the base in + * the register list without writeback. */ + ldr r1, [r0, #( 1 * 4)] + ldr r2, [r0, #( 2 * 4)] + ldr r3, [r0, #( 3 * 4)] + ldr r4, [r0, #( 4 * 4)] + ldr r5, [r0, #( 5 * 4)] + ldr r6, [r0, #( 6 * 4)] + ldr r7, [r0, #( 7 * 4)] + ldr r8, [r0, #( 8 * 4)] + ldr r9, [r0, #( 9 * 4)] + ldr r10, [r0, #(10 * 4)] + ldr r11, [r0, #(11 * 4)] + ldr r12, [r0, #(12 * 4)] + ldr r0, [r0, #( 0 * 4)] /* r0 last (overwrites the base) */ + + /* --- Recover function_ptr and call. r12 (IP, caller-saved) is OK to + * clobber here even though we just loaded it: the AAPCS32 contract + * does not require r12 to be preserved. --- */ + ldr r12, [sp, #LOC_FUNCTION_PTR] + blx r12 + + /* --- Capture post-call state: dump r0-r12 and Q0-Q7. The C side + * compares the AAPCS32 callee-saved subset (r4-r11, Q4-Q7). --- */ + + /* Spill r0-r12 to free a scratch register. After this stmdb the + * captured values live at [sp+0..48], in slot order r0..r12. */ + stmdb sp!, {r0-r12} + .cfi_adjust_cfa_offset 52 + + /* Output pointer location was at sp + LOC_OUTPUT_PTR; the spill + * above shifted everything by 52 bytes. */ + ldr r0, [sp, #LOCALS_OFFSET_AFTER_GPR_SPILL] + + /* Copy r0..r12 from the spill area to output->gpr[0..12]. r1 is the + * only scratch we have; r0 holds output_ptr. Unrolled ldr/str pairs. */ + ldr r1, [sp, #(0 * 4)] /* captured r0 */ + str r1, [r0, #(0 * 4)] + ldr r1, [sp, #(1 * 4)] /* captured r1 */ + str r1, [r0, #(1 * 4)] + ldr r1, [sp, #(2 * 4)] /* captured r2 */ + str r1, [r0, #(2 * 4)] + ldr r1, [sp, #(3 * 4)] /* captured r3 */ + str r1, [r0, #(3 * 4)] + ldr r1, [sp, #(4 * 4)] /* captured r4 */ + str r1, [r0, #(4 * 4)] + ldr r1, [sp, #(5 * 4)] /* captured r5 */ + str r1, [r0, #(5 * 4)] + ldr r1, [sp, #(6 * 4)] /* captured r6 */ + str r1, [r0, #(6 * 4)] + ldr r1, [sp, #(7 * 4)] /* captured r7 */ + str r1, [r0, #(7 * 4)] + ldr r1, [sp, #(8 * 4)] /* captured r8 */ + str r1, [r0, #(8 * 4)] + ldr r1, [sp, #(9 * 4)] /* captured r9 */ + str r1, [r0, #(9 * 4)] + ldr r1, [sp, #(10 * 4)] /* captured r10 */ + str r1, [r0, #(10 * 4)] + ldr r1, [sp, #(11 * 4)] /* captured r11 */ + str r1, [r0, #(11 * 4)] + ldr r1, [sp, #(12 * 4)] /* captured r12 */ + str r1, [r0, #(12 * 4)] + + /* Drop the GPR spill area. */ + add sp, sp, #52 + .cfi_adjust_cfa_offset -52 + + /* --- Capture Q0-Q7 into output->mve[]. --- */ + add r1, r0, #STATE_MVE_OFFSET + vstrw.32 q0, [r1, #(0 * 16)] + vstrw.32 q1, [r1, #(1 * 16)] + vstrw.32 q2, [r1, #(2 * 16)] + vstrw.32 q3, [r1, #(3 * 16)] + vstrw.32 q4, [r1, #(4 * 16)] + vstrw.32 q5, [r1, #(5 * 16)] + vstrw.32 q6, [r1, #(6 * 16)] + vstrw.32 q7, [r1, #(7 * 16)] + + /* --- Epilogue: restore our own callee-saves and return. --- */ + add sp, sp, #LOCALS_SIZE /* drop output_ptr/function_ptr locals */ + .cfi_adjust_cfa_offset -8 + vpop {d8-d15} + .cfi_adjust_cfa_offset -64 + pop {r4-r12, pc} + .cfi_endproc diff --git a/test/abicheck/armv81m/checks/check_keccak_f1600_x4_mve_asm.c b/test/abicheck/armv81m/checks/check_keccak_f1600_x4_mve_asm.c new file mode 100644 index 000000000..2c8379374 --- /dev/null +++ b/test/abicheck/armv81m/checks/check_keccak_f1600_x4_mve_asm.c @@ -0,0 +1,79 @@ +/* + * Copyright (c) The mldsa-native project authors + * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT + */ + +/* + * WARNING: This file is auto-generated from scripts/autogen + * in the mldsa-native repository. + * Do not modify it directly. + */ + +#include + +#include "../abicheck_armv81m.h" +#include "../checks_armv81m_all.h" + +#if defined(MLD_SYS_ARMV81M_MVE) && defined(__ARM_FEATURE_MVE) + +#include "../../../notrandombytes/notrandombytes.h" + +typedef struct armv81m_register_state reg_state; + +void mld_keccak_f1600_x4_mve_asm(void *state, void *tmpstate, + const uint32_t *rc); + +int check_keccak_f1600_x4_mve_asm(void) +{ + int test_iter; + reg_state input_state, output_state; + int violations; + MLD_ALIGN uint8_t buf_r0[800]; /* Bit-interleaved state for 4 Keccak instances + (even halves followed by odd halves) */ + MLD_ALIGN uint8_t buf_r1[800]; /* Temporary storage for intermediate state */ + MLD_ALIGN uint8_t buf_r2[192]; /* Keccak round constants in bit-interleaved + form (24 pairs of 32-bit words) */ + + if (!mld_sys_check_capability(MLD_SYS_CAP_MVE)) + { + fprintf(stderr, + "ABI check keccak_f1600_x4_mve_asm: host lacks Armv8.1-M MVE, " + "skipping\n"); + return MLD_ABICHECK_SKIPPED; + } + + for (test_iter = 0; test_iter < MLD_ABICHECK_NUM_TESTS; test_iter++) + { + /* Initialize random register state */ + init_armv81m_register_state(&input_state); + + randombytes(buf_r0, 800); + randombytes(buf_r1, 800); + randombytes(buf_r2, 192); + + /* Set up register state for function arguments */ + input_state.gpr[0] = (uint32_t)buf_r0; + input_state.gpr[1] = (uint32_t)buf_r1; + input_state.gpr[2] = (uint32_t)buf_r2; + + /* Call function through ABI test stub */ + asm_call_stub_armv81m(&input_state, &output_state, + (void (*)(void))mld_keccak_f1600_x4_mve_asm); + + /* Check ABI compliance */ + violations = check_armv81m_aapcs32_compliance(&input_state, &output_state, + MLD_ABICHECK_VERBOSE); + if (violations > 0) + { + fprintf(stderr, + "ABI test FAILED for keccak_f1600_x4_mve_asm (iteration %d): %d " + "violations\n", + test_iter + 1, violations); + return MLD_ABICHECK_FAILED; + } + } + + return MLD_ABICHECK_PASSED; +} + +#endif /* MLD_SYS_ARMV81M_MVE && __ARM_FEATURE_MVE */ diff --git a/test/abicheck/armv81m/checks_armv81m_all.h b/test/abicheck/armv81m/checks_armv81m_all.h new file mode 100644 index 000000000..8711a085c --- /dev/null +++ b/test/abicheck/armv81m/checks_armv81m_all.h @@ -0,0 +1,34 @@ +/* + * Copyright (c) The mldsa-native project authors + * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT + */ + +/* + * WARNING: This file is auto-generated from scripts/autogen + * in the mldsa-native repository. + * Do not modify it directly. + */ + + +#ifndef MLD_TEST_ABICHECK_CHECKS_ARMV81M_ALL_H +#define MLD_TEST_ABICHECK_CHECKS_ARMV81M_ALL_H + +#include +#include "../abicheck_common.h" + +#if defined(MLD_SYS_ARMV81M_MVE) + +#if defined(__ARM_FEATURE_MVE) +int check_keccak_f1600_x4_mve_asm(void); +#endif + +static const abicheck_entry_t all_checks[] = { +#if defined(__ARM_FEATURE_MVE) + {"keccak_f1600_x4_mve_asm", check_keccak_f1600_x4_mve_asm}, +#endif + {NULL, NULL} /* Sentinel */ +}; + +#endif /* MLD_SYS_ARMV81M_MVE */ + +#endif /* !MLD_TEST_ABICHECK_CHECKS_ARMV81M_ALL_H */ diff --git a/test/abicheck/armv81m/selftest_armv81m.S b/test/abicheck/armv81m/selftest_armv81m.S new file mode 100644 index 000000000..7aa13bde3 --- /dev/null +++ b/test/abicheck/armv81m/selftest_armv81m.S @@ -0,0 +1,64 @@ +/* + * Copyright (c) The mlkem-native project authors + * Copyright (c) The mldsa-native project authors + * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT + */ + +/* + * Armv8.1-M (Thumb-2, AAPCS32+MVE) selftest stubs. + * + * Each function clobbers exactly one callee-saved register and returns. + * GPR corrupters use `mvn rN, rN` and Q-register corrupters use + * `vmvn qN, qN`: both are bitwise NOT, always differ from input, + * and need no scratch register. + */ + +#if defined(__ELF__) +.section .note.GNU-stack,"",%progbits +#endif + +.text +.balign 4 +.thumb +.syntax unified + +/* The ELF Thumb linker needs `.type sym, %function` for every external + * Thumb symbol so it can set the LSB of branch relocations from C + * callers. Apple has no Armv8.1-M platform, so we emit the ELF form + * unconditionally. */ + +.global selftest_armv81m_noop +.type selftest_armv81m_noop, %function +selftest_armv81m_noop: + bx lr + +.macro define_corrupter name, body +.global \name +.type \name, %function +\name: + \body + bx lr +.endm + +define_corrupter selftest_armv81m_corrupt_r4, "mvn r4, r4" +define_corrupter selftest_armv81m_corrupt_r5, "mvn r5, r5" +define_corrupter selftest_armv81m_corrupt_r6, "mvn r6, r6" +define_corrupter selftest_armv81m_corrupt_r7, "mvn r7, r7" +define_corrupter selftest_armv81m_corrupt_r8, "mvn r8, r8" +define_corrupter selftest_armv81m_corrupt_r9, "mvn r9, r9" +define_corrupter selftest_armv81m_corrupt_r10, "mvn r10, r10" +define_corrupter selftest_armv81m_corrupt_r11, "mvn r11, r11" + +/* Q-register corrupters. */ +.macro corrupt_q name, qreg +.global \name +.type \name, %function +\name: + vmvn \qreg, \qreg + bx lr +.endm + +corrupt_q selftest_armv81m_corrupt_q4, q4 +corrupt_q selftest_armv81m_corrupt_q5, q5 +corrupt_q selftest_armv81m_corrupt_q6, q6 +corrupt_q selftest_armv81m_corrupt_q7, q7 diff --git a/test/abicheck/selftest.c b/test/abicheck/selftest.c new file mode 100644 index 000000000..6cf163bc9 --- /dev/null +++ b/test/abicheck/selftest.c @@ -0,0 +1,233 @@ +/* + * Copyright (c) The mlkem-native project authors + * Copyright (c) The mldsa-native project authors + * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT + */ + +/* + * Meta-test for the ABI checker. + * + * For each supported architecture, this file iterates over a registry of + * "corrupter" functions. Each corrupter is a tiny hand-written assembly + * stub that violates the platform calling convention by clobbering exactly + * one callee-saved register (without restoring it). We call each through + * the architecture's call stub, run the matching check_*_compliance, and + * assert the checker reports the expected violation count - that proves + * the checker actually fires. + * + * The corrupter sources live in selftest_.S; the registry below maps + * each one to a human-readable name. We do not assert *which* register was + * flagged (that would require parsing stderr); strict-equal-count plus a + * known-good no-op is sufficient to validate the checker's polarity, basic + * plumbing, and that it doesn't over-count. + * + * If any selftest fails, abicheck.c bails before running the kernel + * registry, on the principle that a broken checker's verdicts cannot be + * trusted. + */ + +#include + +#include "abicheck_common.h" +#include "selftest.h" + +/* Per-arch register-state structs + declarations; each is guarded on its arch + * macro, so only the active arch's definitions materialize. selftest.c + * dispatches across all arches, so it pulls in every arch's header. */ +#include "aarch64/abicheck_aarch64.h" +#include "armv81m/abicheck_armv81m.h" +#include "x86_64/abicheck_x86_64.h" + +/* Shared registry shape: per-arch tables of (name, fn-ptr, expected count). + * On x86_64 Windows-MinGW the corrupter symbols are MLD_SYSV_ABI; we store + * them as plain void(*)(void) here and re-qualify with a cast at the call + * site below, matching the per-kernel check_*.c pattern. */ +typedef struct +{ + const char *name; + void (*fn)(void); + int expected_violations; /* 0 for noop, >=1 for corrupters */ +} selftest_entry_t; + +/* Run a per-arch selftest pass: iterate `entries`, for each call the stub + * with a freshly-initialised input state, run the compliance check, and + * count cases where the violation count doesn't match expectations + * (catches both polarity flips and over-counting). */ +#define SELFTEST_RUN_ARCH(arch_label, state_t, init_fn, stub_fn, check_fn, \ + entries, fn_cast) \ + do \ + { \ + state_t input_state, output_state; \ + const selftest_entry_t *e; \ + for (e = (entries); e->name != NULL; e++) \ + { \ + int violations; \ + init_fn(&input_state); \ + stub_fn(&input_state, &output_state, fn_cast e->fn); \ + violations = check_fn(&input_state, &output_state, MLD_ABICHECK_QUIET); \ + if (violations != e->expected_violations) \ + { \ + fprintf(stderr, \ + "selftest FAIL: " arch_label \ + " %s: expected %d violations, got %d\n", \ + e->name, e->expected_violations, violations); \ + failures++; \ + } \ + } \ + } while (0) + +#if defined(MLD_SYS_AARCH64) + +/* Corrupter declarations. Defined in selftest_aarch64.S. */ +extern void selftest_aarch64_noop(void); +/* x18 is the AArch64 platform register (Darwin-reserved, ELF-unused); + * the call stub does not seed it on Apple, but we still verify that + * kernels leave it alone. The corrupter is registered only on + * non-Apple builds because on Darwin user code must not touch x18. */ +#if !defined(__APPLE__) +extern void selftest_aarch64_corrupt_x18(void); +#endif +/* GPRs: callee-saved set is x19-x29. */ +extern void selftest_aarch64_corrupt_x19(void); +extern void selftest_aarch64_corrupt_x20(void); +extern void selftest_aarch64_corrupt_x21(void); +extern void selftest_aarch64_corrupt_x22(void); +extern void selftest_aarch64_corrupt_x23(void); +extern void selftest_aarch64_corrupt_x24(void); +extern void selftest_aarch64_corrupt_x25(void); +extern void selftest_aarch64_corrupt_x26(void); +extern void selftest_aarch64_corrupt_x27(void); +extern void selftest_aarch64_corrupt_x28(void); +extern void selftest_aarch64_corrupt_x29(void); +/* SIMD: lower 64 bits of d8-d15 are callee-saved. */ +extern void selftest_aarch64_corrupt_d8(void); +extern void selftest_aarch64_corrupt_d9(void); +extern void selftest_aarch64_corrupt_d10(void); +extern void selftest_aarch64_corrupt_d11(void); +extern void selftest_aarch64_corrupt_d12(void); +extern void selftest_aarch64_corrupt_d13(void); +extern void selftest_aarch64_corrupt_d14(void); +extern void selftest_aarch64_corrupt_d15(void); + +static const selftest_entry_t aarch64_entries[] = { + {"noop", selftest_aarch64_noop, 0}, +#if !defined(__APPLE__) + {"corrupt_x18", selftest_aarch64_corrupt_x18, 1}, +#endif + {"corrupt_x19", selftest_aarch64_corrupt_x19, 1}, + {"corrupt_x20", selftest_aarch64_corrupt_x20, 1}, + {"corrupt_x21", selftest_aarch64_corrupt_x21, 1}, + {"corrupt_x22", selftest_aarch64_corrupt_x22, 1}, + {"corrupt_x23", selftest_aarch64_corrupt_x23, 1}, + {"corrupt_x24", selftest_aarch64_corrupt_x24, 1}, + {"corrupt_x25", selftest_aarch64_corrupt_x25, 1}, + {"corrupt_x26", selftest_aarch64_corrupt_x26, 1}, + {"corrupt_x27", selftest_aarch64_corrupt_x27, 1}, + {"corrupt_x28", selftest_aarch64_corrupt_x28, 1}, + {"corrupt_x29", selftest_aarch64_corrupt_x29, 1}, + {"corrupt_d8", selftest_aarch64_corrupt_d8, 1}, + {"corrupt_d9", selftest_aarch64_corrupt_d9, 1}, + {"corrupt_d10", selftest_aarch64_corrupt_d10, 1}, + {"corrupt_d11", selftest_aarch64_corrupt_d11, 1}, + {"corrupt_d12", selftest_aarch64_corrupt_d12, 1}, + {"corrupt_d13", selftest_aarch64_corrupt_d13, 1}, + {"corrupt_d14", selftest_aarch64_corrupt_d14, 1}, + {"corrupt_d15", selftest_aarch64_corrupt_d15, 1}, + {NULL, NULL, 0}, +}; + +#elif defined(MLD_SYS_X86_64) && defined(MLD_SYSV_ABI_SUPPORTED) + +/* Defined in selftest_x86_64.S. The .S symbols are MLD_SYSV_ABI-qualified; + * we store them as plain void(*)(void) and re-qualify the cast at the call + * site (see SELFTEST_RUN_ARCH below). */ +extern MLD_SYSV_ABI +void selftest_x86_64_noop(void); +extern MLD_SYSV_ABI +void selftest_x86_64_corrupt_rbx(void); +extern MLD_SYSV_ABI +void selftest_x86_64_corrupt_rbp(void); +extern MLD_SYSV_ABI +void selftest_x86_64_corrupt_r12(void); +extern MLD_SYSV_ABI +void selftest_x86_64_corrupt_r13(void); +extern MLD_SYSV_ABI +void selftest_x86_64_corrupt_r14(void); +extern MLD_SYSV_ABI +void selftest_x86_64_corrupt_r15(void); + +static const selftest_entry_t x86_64_entries[] = { + {"noop", (void (*)(void))selftest_x86_64_noop, 0}, + {"corrupt_rbx", (void (*)(void))selftest_x86_64_corrupt_rbx, 1}, + {"corrupt_rbp", (void (*)(void))selftest_x86_64_corrupt_rbp, 1}, + {"corrupt_r12", (void (*)(void))selftest_x86_64_corrupt_r12, 1}, + {"corrupt_r13", (void (*)(void))selftest_x86_64_corrupt_r13, 1}, + {"corrupt_r14", (void (*)(void))selftest_x86_64_corrupt_r14, 1}, + {"corrupt_r15", (void (*)(void))selftest_x86_64_corrupt_r15, 1}, + {NULL, NULL, 0}, +}; + +#elif defined(MLD_SYS_ARMV81M_MVE) + +extern void selftest_armv81m_noop(void); +extern void selftest_armv81m_corrupt_r4(void); +extern void selftest_armv81m_corrupt_r5(void); +extern void selftest_armv81m_corrupt_r6(void); +extern void selftest_armv81m_corrupt_r7(void); +extern void selftest_armv81m_corrupt_r8(void); +extern void selftest_armv81m_corrupt_r9(void); +extern void selftest_armv81m_corrupt_r10(void); +extern void selftest_armv81m_corrupt_r11(void); +extern void selftest_armv81m_corrupt_q4(void); +extern void selftest_armv81m_corrupt_q5(void); +extern void selftest_armv81m_corrupt_q6(void); +extern void selftest_armv81m_corrupt_q7(void); + +static const selftest_entry_t armv81m_entries[] = { + {"noop", selftest_armv81m_noop, 0}, + {"corrupt_r4", selftest_armv81m_corrupt_r4, 1}, + {"corrupt_r5", selftest_armv81m_corrupt_r5, 1}, + {"corrupt_r6", selftest_armv81m_corrupt_r6, 1}, + {"corrupt_r7", selftest_armv81m_corrupt_r7, 1}, + {"corrupt_r8", selftest_armv81m_corrupt_r8, 1}, + {"corrupt_r9", selftest_armv81m_corrupt_r9, 1}, + {"corrupt_r10", selftest_armv81m_corrupt_r10, 1}, + {"corrupt_r11", selftest_armv81m_corrupt_r11, 1}, + {"corrupt_q4", selftest_armv81m_corrupt_q4, 1}, + {"corrupt_q5", selftest_armv81m_corrupt_q5, 1}, + {"corrupt_q6", selftest_armv81m_corrupt_q6, 1}, + {"corrupt_q7", selftest_armv81m_corrupt_q7, 1}, + {NULL, NULL, 0}, +}; + +#endif /* !MLD_SYS_AARCH64 && !(MLD_SYS_X86_64 && MLD_SYSV_ABI_SUPPORTED) && \ + MLD_SYS_ARMV81M_MVE */ + +int abicheck_selftest(void) +{ + int failures = 0; + +#if defined(MLD_SYS_AARCH64) + SELFTEST_RUN_ARCH("aarch64", struct aarch64_register_state, + init_aarch64_register_state, asm_call_stub_aarch64, + check_aarch64_aapcs_compliance, aarch64_entries, + (void (*)(void))); +#elif defined(MLD_SYS_X86_64) && defined(MLD_SYSV_ABI_SUPPORTED) + SELFTEST_RUN_ARCH( + "x86_64", struct x86_64_register_state, init_x86_64_register_state, + asm_call_stub_x86_64_sysv, check_x86_64_sysv_compliance, x86_64_entries, + (MLD_SYSV_ABI + void (*)(void))); +#elif defined(MLD_SYS_ARMV81M_MVE) + SELFTEST_RUN_ARCH("armv81m", struct armv81m_register_state, + init_armv81m_register_state, asm_call_stub_armv81m, + check_armv81m_aapcs32_compliance, armv81m_entries, + (void (*)(void))); +#else /* !MLD_SYS_AARCH64 && !(MLD_SYS_X86_64 && MLD_SYSV_ABI_SUPPORTED) && \ + MLD_SYS_ARMV81M_MVE */ + /* No abicheck support on this architecture. */ +#endif /* !MLD_SYS_AARCH64 && !(MLD_SYS_X86_64 && MLD_SYSV_ABI_SUPPORTED) && \ + !MLD_SYS_ARMV81M_MVE */ + + return failures; +} diff --git a/test/abicheck/selftest.h b/test/abicheck/selftest.h new file mode 100644 index 000000000..ddae9b5a4 --- /dev/null +++ b/test/abicheck/selftest.h @@ -0,0 +1,14 @@ +/* + * Copyright (c) The mlkem-native project authors + * Copyright (c) The mldsa-native project authors + * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT + */ + +#ifndef MLD_TEST_ABICHECK_SELFTEST_H +#define MLD_TEST_ABICHECK_SELFTEST_H + +/* Run the ABI checker meta-test for the active architecture. Returns the + * number of selftest failures (0 = all good). */ +int abicheck_selftest(void); + +#endif /* !MLD_TEST_ABICHECK_SELFTEST_H */ diff --git a/test/abicheck/x86_64/abicheck_x86_64.c b/test/abicheck/x86_64/abicheck_x86_64.c new file mode 100644 index 000000000..f23506a47 --- /dev/null +++ b/test/abicheck/x86_64/abicheck_x86_64.c @@ -0,0 +1,70 @@ +/* + * Copyright (c) The mlkem-native project authors + * Copyright (c) The mldsa-native project authors + * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT + */ + +/* References + * ========== + * + * - [SysVAMD64] + * System V Application Binary Interface — AMD64 Architecture Processor + * Supplement + * Matz, Hubička, Jaeger, Mitchell + * https://gitlab.com/x86-psABIs/x86-64-ABI + */ + +#include + +#include "../../notrandombytes/notrandombytes.h" +#include "abicheck_x86_64.h" + +#if defined(MLD_SYS_X86_64) && defined(MLD_SYSV_ABI_SUPPORTED) + +/* Callee-saved set per @[SysVAMD64, Section 3.2 "Function Calling Sequence"]. + */ +int check_x86_64_sysv_compliance(struct x86_64_register_state *before, + struct x86_64_register_state *after, int quiet) +{ + int violations = 0; + + if (before->rbx != after->rbx) + { + MLD_ABI_VIOLATION(quiet, "%s modified\n", "rbx"); + violations++; + } + if (before->rbp != after->rbp) + { + MLD_ABI_VIOLATION(quiet, "%s modified\n", "rbp"); + violations++; + } + if (before->r12 != after->r12) + { + MLD_ABI_VIOLATION(quiet, "%s modified\n", "r12"); + violations++; + } + if (before->r13 != after->r13) + { + MLD_ABI_VIOLATION(quiet, "%s modified\n", "r13"); + violations++; + } + if (before->r14 != after->r14) + { + MLD_ABI_VIOLATION(quiet, "%s modified\n", "r14"); + violations++; + } + if (before->r15 != after->r15) + { + MLD_ABI_VIOLATION(quiet, "%s modified\n", "r15"); + violations++; + } + + return violations; +} + +void init_x86_64_register_state(struct x86_64_register_state *state) +{ + randombytes((uint8_t *)state, sizeof(*state)); +} + +#endif /* MLD_SYS_X86_64 && MLD_SYSV_ABI_SUPPORTED */ diff --git a/test/abicheck/x86_64/abicheck_x86_64.h b/test/abicheck/x86_64/abicheck_x86_64.h new file mode 100644 index 000000000..2ee731439 --- /dev/null +++ b/test/abicheck/x86_64/abicheck_x86_64.h @@ -0,0 +1,47 @@ +/* + * Copyright (c) The mlkem-native project authors + * Copyright (c) The mldsa-native project authors + * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT + */ + +#ifndef MLD_TEST_ABICHECK_ABICHECK_X86_64_H +#define MLD_TEST_ABICHECK_ABICHECK_X86_64_H + +#include "../abicheck_common.h" + +#if defined(MLD_SYS_X86_64) && defined(MLD_SYSV_ABI_SUPPORTED) + +/* x86_64 System V ABI register state + * Layout must match callstub_x86_64.S */ +struct x86_64_register_state +{ + uint64_t rdi; /* 0 */ + uint64_t rsi; /* 8 */ + uint64_t rdx; /* 16 */ + uint64_t rcx; /* 24 */ + uint64_t r8; /* 32 */ + uint64_t r9; /* 40 */ + uint64_t rax; /* 48 */ + uint64_t rbx; /* 56 */ + uint64_t rbp; /* 64 */ + uint64_t r12; /* 72 */ + uint64_t r13; /* 80 */ + uint64_t r14; /* 88 */ + uint64_t r15; /* 96 */ + uint64_t r10; /* 104 */ +}; + +int check_x86_64_sysv_compliance(struct x86_64_register_state *before, + struct x86_64_register_state *after, + int quiet); +void init_x86_64_register_state(struct x86_64_register_state *state); + +extern MLD_SYSV_ABI +void asm_call_stub_x86_64_sysv( + struct x86_64_register_state *input, struct x86_64_register_state *output, + MLD_SYSV_ABI + void (*function_ptr)(void)); + +#endif /* MLD_SYS_X86_64 && MLD_SYSV_ABI_SUPPORTED */ + +#endif /* !MLD_TEST_ABICHECK_ABICHECK_X86_64_H */ diff --git a/test/abicheck/x86_64/abicheck_x86_64.mk b/test/abicheck/x86_64/abicheck_x86_64.mk new file mode 100644 index 000000000..7ec707254 --- /dev/null +++ b/test/abicheck/x86_64/abicheck_x86_64.mk @@ -0,0 +1,34 @@ +# Copyright (c) The mldsa-native project authors +# SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT + +# WARNING: This file is auto-generated from scripts/autogen +# in the mldsa-native repository. +# Do not modify it directly. +# +# Edit the YAML 'ABI.Features:' list in dev//src/.S +# and re-run scripts/autogen instead. +# +# For each capability declared by a kernel's ABI.Features list, this +# file appends the capability's CFLAGS to that kernel's .S object +# under mldsa/src/. + +# Default each cap's file list to empty so the unconditional appends +# below are safe even when a cap has no kernels on this arch. +ABICHECK_REQ_AVX2_FILES := + +# AVX2: AVX2 +ABICHECK_REQ_AVX2_FILES := \ + mldsa/src/fips202/native/x86_64/src/keccak_f1600_x4_avx2_asm.S \ + mldsa/src/native/x86_64/src/intt_avx2_asm.S \ + mldsa/src/native/x86_64/src/ntt_avx2_asm.S \ + mldsa/src/native/x86_64/src/nttunpack_avx2_asm.S \ + mldsa/src/native/x86_64/src/pointwise_acc_l4_avx2_asm.S \ + mldsa/src/native/x86_64/src/pointwise_acc_l5_avx2_asm.S \ + mldsa/src/native/x86_64/src/pointwise_acc_l7_avx2_asm.S \ + mldsa/src/native/x86_64/src/pointwise_avx2_asm.S \ + mldsa/src/native/x86_64/src/poly_caddq_avx2_asm.S \ + mldsa/src/native/x86_64/src/poly_chknorm_avx2_asm.S \ + mldsa/src/native/x86_64/src/polyz_unpack_17_avx2_asm.S \ + mldsa/src/native/x86_64/src/polyz_unpack_19_avx2_asm.S +ABICHECK_REQ_AVX2_OBJS := $(call MAKE_OBJS,$(ABICHECK_DIR),$(ABICHECK_REQ_AVX2_FILES)) +$(ABICHECK_REQ_AVX2_OBJS): CFLAGS += -mavx2 -mbmi2 diff --git a/test/abicheck/x86_64/callstub_x86_64.S b/test/abicheck/x86_64/callstub_x86_64.S new file mode 100644 index 000000000..5095cdaa2 --- /dev/null +++ b/test/abicheck/x86_64/callstub_x86_64.S @@ -0,0 +1,150 @@ +/* + * Copyright (c) The mlkem-native project authors + * Copyright (c) The mldsa-native project authors + * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT + */ + +/* + * Function: asm_call_stub_x86_64_sysv + * Description: x86_64 System V ABI compliance testing stub + * + * C Signature: void asm_call_stub_x86_64_sysv( + * struct x86_64_register_state *input, // rdi + * struct x86_64_register_state *output, // rsi + * void (*function_ptr)(void)) // rdx + * + * struct x86_64_register_state layout (14 * 8 = 112 bytes): + * offset 0: rdi offset 48: rax + * offset 8: rsi offset 56: rbx + * offset 16: rdx offset 64: rbp + * offset 24: rcx offset 72: r12 + * offset 32: r8 offset 80: r13 + * offset 40: r9 offset 88: r14 + * offset 96: r15 + * offset 104: r10 + * + * Callee-saved in System V: rbx, rbp, r12-r15 + * + * Stack layout after prologue (16-byte aligned; alignment math at the + * subq below): + * [rsp+ 0]: alignment padding + * [rsp+ 8]: output state ptr + * [rsp+16]: function ptr + * [rsp+24]: saved rbx + * [rsp+32]: saved rbp + * [rsp+40]: saved r12 + * [rsp+48]: saved r13 + * [rsp+56]: saved r14 + * [rsp+64]: saved r15 + * [rsp+72]: return address + */ + +#if defined(__ELF__) +.section .note.GNU-stack,"",@progbits +#endif + +.text +.balign 16 +#ifdef __APPLE__ +.global _asm_call_stub_x86_64_sysv +_asm_call_stub_x86_64_sysv: +#else +.global asm_call_stub_x86_64_sysv +asm_call_stub_x86_64_sysv: +#endif + .cfi_startproc + /* Save callee-saved registers */ + pushq %r15 + .cfi_adjust_cfa_offset 8 + .cfi_offset %r15, -16 + pushq %r14 + .cfi_adjust_cfa_offset 8 + .cfi_offset %r14, -24 + pushq %r13 + .cfi_adjust_cfa_offset 8 + .cfi_offset %r13, -32 + pushq %r12 + .cfi_adjust_cfa_offset 8 + .cfi_offset %r12, -40 + pushq %rbp + .cfi_adjust_cfa_offset 8 + .cfi_offset %rbp, -48 + pushq %rbx + .cfi_adjust_cfa_offset 8 + .cfi_offset %rbx, -56 + + /* 16 bytes for the two pointers + 8 padding; keeps %rsp 16-byte + * aligned at the inner call (entry %rsp%16==8, +6 pushes +24 = 80, + * 80%16==0; SysV). */ + subq $24, %rsp + .cfi_adjust_cfa_offset 24 + + /* Save output state ptr and function ptr to stack. */ + movq %rsi, 8(%rsp) /* output state ptr */ + movq %rdx, 16(%rsp) /* function ptr */ + + /* rdi still points to input state - load all registers from it */ + /* Load non-argument registers first */ + movq 48(%rdi), %rax + movq 56(%rdi), %rbx + movq 64(%rdi), %rbp + movq 72(%rdi), %r12 + movq 80(%rdi), %r13 + movq 88(%rdi), %r14 + movq 96(%rdi), %r15 + movq 104(%rdi), %r10 + + /* Load argument registers (rdi last since it's our base) */ + movq 8(%rdi), %rsi + movq 16(%rdi), %rdx + movq 24(%rdi), %rcx + movq 32(%rdi), %r8 + movq 40(%rdi), %r9 + movq 0(%rdi), %rdi + + /* Indirect call via stack slot - no free GPR to hold the fn ptr. */ + call *16(%rsp) + + /* Reload output state ptr. %r11 is caller-saved and not tracked + * by the abicheck, so we use it as scratch. */ + movq 8(%rsp), %r11 + + /* Store all GPRs to output state */ + movq %rdi, 0(%r11) + movq %rsi, 8(%r11) + movq %rdx, 16(%r11) + movq %rcx, 24(%r11) + movq %r8, 32(%r11) + movq %r9, 40(%r11) + movq %rax, 48(%r11) + movq %rbx, 56(%r11) + movq %rbp, 64(%r11) + movq %r12, 72(%r11) + movq %r13, 80(%r11) + movq %r14, 88(%r11) + movq %r15, 96(%r11) + movq %r10, 104(%r11) + + /* Epilogue */ + addq $24, %rsp + .cfi_adjust_cfa_offset -24 + popq %rbx + .cfi_adjust_cfa_offset -8 + .cfi_restore %rbx + popq %rbp + .cfi_adjust_cfa_offset -8 + .cfi_restore %rbp + popq %r12 + .cfi_adjust_cfa_offset -8 + .cfi_restore %r12 + popq %r13 + .cfi_adjust_cfa_offset -8 + .cfi_restore %r13 + popq %r14 + .cfi_adjust_cfa_offset -8 + .cfi_restore %r14 + popq %r15 + .cfi_adjust_cfa_offset -8 + .cfi_restore %r15 + ret + .cfi_endproc diff --git a/test/abicheck/x86_64/checks/check_invntt_avx2_asm.c b/test/abicheck/x86_64/checks/check_invntt_avx2_asm.c new file mode 100644 index 000000000..5e33065ee --- /dev/null +++ b/test/abicheck/x86_64/checks/check_invntt_avx2_asm.c @@ -0,0 +1,75 @@ +/* + * Copyright (c) The mldsa-native project authors + * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT + */ + +/* + * WARNING: This file is auto-generated from scripts/autogen + * in the mldsa-native repository. + * Do not modify it directly. + */ + +#include + +#include "../abicheck_x86_64.h" +#include "../checks_x86_64_all.h" + +#if defined(MLD_SYS_X86_64) && defined(MLD_SYSV_ABI_SUPPORTED) && \ + defined(__AVX2__) + +#include "../../../notrandombytes/notrandombytes.h" + +typedef struct x86_64_register_state reg_state; + +MLD_SYSV_ABI +void mld_invntt_avx2_asm(int32_t *r, const int32_t *qdata); + +int check_invntt_avx2_asm(void) +{ + int test_iter; + reg_state input_state, output_state; + int violations; + MLD_ALIGN uint8_t buf_rdi[1024]; /* Input/output polynomial (256 x int32_t) */ + MLD_ALIGN uint8_t buf_rsi[2496]; /* Precomputed constants (624 x int32_t) */ + + if (!mld_sys_check_capability(MLD_SYS_CAP_AVX2)) + { + fprintf(stderr, "ABI check invntt_avx2_asm: host lacks AVX2, skipping\n"); + return MLD_ABICHECK_SKIPPED; + } + + for (test_iter = 0; test_iter < MLD_ABICHECK_NUM_TESTS; test_iter++) + { + /* Initialize random register state */ + init_x86_64_register_state(&input_state); + + randombytes(buf_rdi, 1024); + randombytes(buf_rsi, 2496); + + /* Set up register state for function arguments */ + input_state.rdi = (uint64_t)buf_rdi; + input_state.rsi = (uint64_t)buf_rsi; + + /* Call function through ABI test stub */ + asm_call_stub_x86_64_sysv( + &input_state, &output_state, + (MLD_SYSV_ABI + void (*)(void))mld_invntt_avx2_asm); + + /* Check ABI compliance */ + violations = check_x86_64_sysv_compliance(&input_state, &output_state, + MLD_ABICHECK_VERBOSE); + if (violations > 0) + { + fprintf( + stderr, + "ABI test FAILED for invntt_avx2_asm (iteration %d): %d violations\n", + test_iter + 1, violations); + return MLD_ABICHECK_FAILED; + } + } + + return MLD_ABICHECK_PASSED; +} + +#endif /* MLD_SYS_X86_64 && MLD_SYSV_ABI_SUPPORTED && __AVX2__ */ diff --git a/test/abicheck/x86_64/checks/check_keccak_f1600_x4_avx2_asm.c b/test/abicheck/x86_64/checks/check_keccak_f1600_x4_avx2_asm.c new file mode 100644 index 000000000..43429e44a --- /dev/null +++ b/test/abicheck/x86_64/checks/check_keccak_f1600_x4_avx2_asm.c @@ -0,0 +1,85 @@ +/* + * Copyright (c) The mldsa-native project authors + * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT + */ + +/* + * WARNING: This file is auto-generated from scripts/autogen + * in the mldsa-native repository. + * Do not modify it directly. + */ + +#include + +#include "../abicheck_x86_64.h" +#include "../checks_x86_64_all.h" + +#if defined(MLD_SYS_X86_64) && defined(MLD_SYSV_ABI_SUPPORTED) && \ + defined(__AVX2__) + +#include "../../../notrandombytes/notrandombytes.h" + +typedef struct x86_64_register_state reg_state; + +MLD_SYSV_ABI +void mld_keccak_f1600_x4_avx2_asm(uint64_t states[100], const uint64_t rc[24], + const uint64_t rho8[4], + const uint64_t rho56[4]); + +int check_keccak_f1600_x4_avx2_asm(void) +{ + int test_iter; + reg_state input_state, output_state; + int violations; + MLD_ALIGN uint8_t buf_rcx[32]; /* Rotation constant rho56 (4 x uint64_t) */ + MLD_ALIGN uint8_t + buf_rdi[800]; /* Four sequential Keccak states (4 x 25 x uint64_t) */ + MLD_ALIGN uint8_t buf_rdx[32]; /* Rotation constant rho8 (4 x uint64_t) */ + MLD_ALIGN uint8_t buf_rsi[192]; /* Round constants (24 x uint64_t) */ + + if (!mld_sys_check_capability(MLD_SYS_CAP_AVX2)) + { + fprintf(stderr, + "ABI check keccak_f1600_x4_avx2_asm: host lacks AVX2, skipping\n"); + return MLD_ABICHECK_SKIPPED; + } + + for (test_iter = 0; test_iter < MLD_ABICHECK_NUM_TESTS; test_iter++) + { + /* Initialize random register state */ + init_x86_64_register_state(&input_state); + + randombytes(buf_rcx, 32); + randombytes(buf_rdi, 800); + randombytes(buf_rdx, 32); + randombytes(buf_rsi, 192); + + /* Set up register state for function arguments */ + input_state.rcx = (uint64_t)buf_rcx; + input_state.rdi = (uint64_t)buf_rdi; + input_state.rdx = (uint64_t)buf_rdx; + input_state.rsi = (uint64_t)buf_rsi; + + /* Call function through ABI test stub */ + asm_call_stub_x86_64_sysv( + &input_state, &output_state, + (MLD_SYSV_ABI + void (*)(void))mld_keccak_f1600_x4_avx2_asm); + + /* Check ABI compliance */ + violations = check_x86_64_sysv_compliance(&input_state, &output_state, + MLD_ABICHECK_VERBOSE); + if (violations > 0) + { + fprintf(stderr, + "ABI test FAILED for keccak_f1600_x4_avx2_asm (iteration %d): %d " + "violations\n", + test_iter + 1, violations); + return MLD_ABICHECK_FAILED; + } + } + + return MLD_ABICHECK_PASSED; +} + +#endif /* MLD_SYS_X86_64 && MLD_SYSV_ABI_SUPPORTED && __AVX2__ */ diff --git a/test/abicheck/x86_64/checks/check_ntt_avx2_asm.c b/test/abicheck/x86_64/checks/check_ntt_avx2_asm.c new file mode 100644 index 000000000..28be87cc7 --- /dev/null +++ b/test/abicheck/x86_64/checks/check_ntt_avx2_asm.c @@ -0,0 +1,75 @@ +/* + * Copyright (c) The mldsa-native project authors + * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT + */ + +/* + * WARNING: This file is auto-generated from scripts/autogen + * in the mldsa-native repository. + * Do not modify it directly. + */ + +#include + +#include "../abicheck_x86_64.h" +#include "../checks_x86_64_all.h" + +#if defined(MLD_SYS_X86_64) && defined(MLD_SYSV_ABI_SUPPORTED) && \ + defined(__AVX2__) + +#include "../../../notrandombytes/notrandombytes.h" + +typedef struct x86_64_register_state reg_state; + +MLD_SYSV_ABI +void mld_ntt_avx2_asm(int32_t *r, const int32_t *qdata); + +int check_ntt_avx2_asm(void) +{ + int test_iter; + reg_state input_state, output_state; + int violations; + MLD_ALIGN uint8_t buf_rdi[1024]; /* Input/output polynomial (256 x int32_t) */ + MLD_ALIGN uint8_t buf_rsi[2496]; /* Precomputed constants (624 x int32_t) */ + + if (!mld_sys_check_capability(MLD_SYS_CAP_AVX2)) + { + fprintf(stderr, "ABI check ntt_avx2_asm: host lacks AVX2, skipping\n"); + return MLD_ABICHECK_SKIPPED; + } + + for (test_iter = 0; test_iter < MLD_ABICHECK_NUM_TESTS; test_iter++) + { + /* Initialize random register state */ + init_x86_64_register_state(&input_state); + + randombytes(buf_rdi, 1024); + randombytes(buf_rsi, 2496); + + /* Set up register state for function arguments */ + input_state.rdi = (uint64_t)buf_rdi; + input_state.rsi = (uint64_t)buf_rsi; + + /* Call function through ABI test stub */ + asm_call_stub_x86_64_sysv( + &input_state, &output_state, + (MLD_SYSV_ABI + void (*)(void))mld_ntt_avx2_asm); + + /* Check ABI compliance */ + violations = check_x86_64_sysv_compliance(&input_state, &output_state, + MLD_ABICHECK_VERBOSE); + if (violations > 0) + { + fprintf( + stderr, + "ABI test FAILED for ntt_avx2_asm (iteration %d): %d violations\n", + test_iter + 1, violations); + return MLD_ABICHECK_FAILED; + } + } + + return MLD_ABICHECK_PASSED; +} + +#endif /* MLD_SYS_X86_64 && MLD_SYSV_ABI_SUPPORTED && __AVX2__ */ diff --git a/test/abicheck/x86_64/checks/check_nttunpack_avx2_asm.c b/test/abicheck/x86_64/checks/check_nttunpack_avx2_asm.c new file mode 100644 index 000000000..fa33f9c3f --- /dev/null +++ b/test/abicheck/x86_64/checks/check_nttunpack_avx2_asm.c @@ -0,0 +1,73 @@ +/* + * Copyright (c) The mldsa-native project authors + * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT + */ + +/* + * WARNING: This file is auto-generated from scripts/autogen + * in the mldsa-native repository. + * Do not modify it directly. + */ + +#include + +#include "../abicheck_x86_64.h" +#include "../checks_x86_64_all.h" + +#if defined(MLD_SYS_X86_64) && defined(MLD_SYSV_ABI_SUPPORTED) && \ + defined(__AVX2__) + +#include "../../../notrandombytes/notrandombytes.h" + +typedef struct x86_64_register_state reg_state; + +MLD_SYSV_ABI +void mld_nttunpack_avx2_asm(int32_t *r); + +int check_nttunpack_avx2_asm(void) +{ + int test_iter; + reg_state input_state, output_state; + int violations; + MLD_ALIGN uint8_t buf_rdi[1024]; /* Input/output polynomial (256 x int32_t) */ + + if (!mld_sys_check_capability(MLD_SYS_CAP_AVX2)) + { + fprintf(stderr, + "ABI check nttunpack_avx2_asm: host lacks AVX2, skipping\n"); + return MLD_ABICHECK_SKIPPED; + } + + for (test_iter = 0; test_iter < MLD_ABICHECK_NUM_TESTS; test_iter++) + { + /* Initialize random register state */ + init_x86_64_register_state(&input_state); + + randombytes(buf_rdi, 1024); + + /* Set up register state for function arguments */ + input_state.rdi = (uint64_t)buf_rdi; + + /* Call function through ABI test stub */ + asm_call_stub_x86_64_sysv( + &input_state, &output_state, + (MLD_SYSV_ABI + void (*)(void))mld_nttunpack_avx2_asm); + + /* Check ABI compliance */ + violations = check_x86_64_sysv_compliance(&input_state, &output_state, + MLD_ABICHECK_VERBOSE); + if (violations > 0) + { + fprintf(stderr, + "ABI test FAILED for nttunpack_avx2_asm (iteration %d): %d " + "violations\n", + test_iter + 1, violations); + return MLD_ABICHECK_FAILED; + } + } + + return MLD_ABICHECK_PASSED; +} + +#endif /* MLD_SYS_X86_64 && MLD_SYSV_ABI_SUPPORTED && __AVX2__ */ diff --git a/test/abicheck/x86_64/checks/check_pointwise_acc_l4_avx2_asm.c b/test/abicheck/x86_64/checks/check_pointwise_acc_l4_avx2_asm.c new file mode 100644 index 000000000..f15894138 --- /dev/null +++ b/test/abicheck/x86_64/checks/check_pointwise_acc_l4_avx2_asm.c @@ -0,0 +1,86 @@ +/* + * Copyright (c) The mldsa-native project authors + * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT + */ + +/* + * WARNING: This file is auto-generated from scripts/autogen + * in the mldsa-native repository. + * Do not modify it directly. + */ + +#include + +#include "../abicheck_x86_64.h" +#include "../checks_x86_64_all.h" + +#if defined(MLD_SYS_X86_64) && defined(MLD_SYSV_ABI_SUPPORTED) && \ + defined(__AVX2__) + +#include "../../../notrandombytes/notrandombytes.h" + +typedef struct x86_64_register_state reg_state; + +MLD_SYSV_ABI +void mld_pointwise_acc_l4_avx2_asm(int32_t *c, const int32_t a[4][256], + const int32_t b[4][256], + const int32_t *qdata); + +int check_pointwise_acc_l4_avx2_asm(void) +{ + int test_iter; + reg_state input_state, output_state; + int violations; + MLD_ALIGN uint8_t buf_rcx[2496]; /* Precomputed constants (624 x int32_t) */ + MLD_ALIGN uint8_t buf_rdi[1024]; /* Output polynomial (256 x int32_t) */ + MLD_ALIGN uint8_t + buf_rdx[4096]; /* Input polynomial vector b (4 x 256 x int32_t) */ + MLD_ALIGN uint8_t + buf_rsi[4096]; /* Input polynomial vector a (4 x 256 x int32_t) */ + + if (!mld_sys_check_capability(MLD_SYS_CAP_AVX2)) + { + fprintf(stderr, + "ABI check pointwise_acc_l4_avx2_asm: host lacks AVX2, skipping\n"); + return MLD_ABICHECK_SKIPPED; + } + + for (test_iter = 0; test_iter < MLD_ABICHECK_NUM_TESTS; test_iter++) + { + /* Initialize random register state */ + init_x86_64_register_state(&input_state); + + randombytes(buf_rcx, 2496); + randombytes(buf_rdi, 1024); + randombytes(buf_rdx, 4096); + randombytes(buf_rsi, 4096); + + /* Set up register state for function arguments */ + input_state.rcx = (uint64_t)buf_rcx; + input_state.rdi = (uint64_t)buf_rdi; + input_state.rdx = (uint64_t)buf_rdx; + input_state.rsi = (uint64_t)buf_rsi; + + /* Call function through ABI test stub */ + asm_call_stub_x86_64_sysv( + &input_state, &output_state, + (MLD_SYSV_ABI + void (*)(void))mld_pointwise_acc_l4_avx2_asm); + + /* Check ABI compliance */ + violations = check_x86_64_sysv_compliance(&input_state, &output_state, + MLD_ABICHECK_VERBOSE); + if (violations > 0) + { + fprintf(stderr, + "ABI test FAILED for pointwise_acc_l4_avx2_asm (iteration %d): " + "%d violations\n", + test_iter + 1, violations); + return MLD_ABICHECK_FAILED; + } + } + + return MLD_ABICHECK_PASSED; +} + +#endif /* MLD_SYS_X86_64 && MLD_SYSV_ABI_SUPPORTED && __AVX2__ */ diff --git a/test/abicheck/x86_64/checks/check_pointwise_acc_l5_avx2_asm.c b/test/abicheck/x86_64/checks/check_pointwise_acc_l5_avx2_asm.c new file mode 100644 index 000000000..16b08d2f9 --- /dev/null +++ b/test/abicheck/x86_64/checks/check_pointwise_acc_l5_avx2_asm.c @@ -0,0 +1,86 @@ +/* + * Copyright (c) The mldsa-native project authors + * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT + */ + +/* + * WARNING: This file is auto-generated from scripts/autogen + * in the mldsa-native repository. + * Do not modify it directly. + */ + +#include + +#include "../abicheck_x86_64.h" +#include "../checks_x86_64_all.h" + +#if defined(MLD_SYS_X86_64) && defined(MLD_SYSV_ABI_SUPPORTED) && \ + defined(__AVX2__) + +#include "../../../notrandombytes/notrandombytes.h" + +typedef struct x86_64_register_state reg_state; + +MLD_SYSV_ABI +void mld_pointwise_acc_l5_avx2_asm(int32_t *c, const int32_t a[5][256], + const int32_t b[5][256], + const int32_t *qdata); + +int check_pointwise_acc_l5_avx2_asm(void) +{ + int test_iter; + reg_state input_state, output_state; + int violations; + MLD_ALIGN uint8_t buf_rcx[2496]; /* Precomputed constants (624 x int32_t) */ + MLD_ALIGN uint8_t buf_rdi[1024]; /* Output polynomial (256 x int32_t) */ + MLD_ALIGN uint8_t + buf_rdx[5120]; /* Input polynomial vector b (5 x 256 x int32_t) */ + MLD_ALIGN uint8_t + buf_rsi[5120]; /* Input polynomial vector a (5 x 256 x int32_t) */ + + if (!mld_sys_check_capability(MLD_SYS_CAP_AVX2)) + { + fprintf(stderr, + "ABI check pointwise_acc_l5_avx2_asm: host lacks AVX2, skipping\n"); + return MLD_ABICHECK_SKIPPED; + } + + for (test_iter = 0; test_iter < MLD_ABICHECK_NUM_TESTS; test_iter++) + { + /* Initialize random register state */ + init_x86_64_register_state(&input_state); + + randombytes(buf_rcx, 2496); + randombytes(buf_rdi, 1024); + randombytes(buf_rdx, 5120); + randombytes(buf_rsi, 5120); + + /* Set up register state for function arguments */ + input_state.rcx = (uint64_t)buf_rcx; + input_state.rdi = (uint64_t)buf_rdi; + input_state.rdx = (uint64_t)buf_rdx; + input_state.rsi = (uint64_t)buf_rsi; + + /* Call function through ABI test stub */ + asm_call_stub_x86_64_sysv( + &input_state, &output_state, + (MLD_SYSV_ABI + void (*)(void))mld_pointwise_acc_l5_avx2_asm); + + /* Check ABI compliance */ + violations = check_x86_64_sysv_compliance(&input_state, &output_state, + MLD_ABICHECK_VERBOSE); + if (violations > 0) + { + fprintf(stderr, + "ABI test FAILED for pointwise_acc_l5_avx2_asm (iteration %d): " + "%d violations\n", + test_iter + 1, violations); + return MLD_ABICHECK_FAILED; + } + } + + return MLD_ABICHECK_PASSED; +} + +#endif /* MLD_SYS_X86_64 && MLD_SYSV_ABI_SUPPORTED && __AVX2__ */ diff --git a/test/abicheck/x86_64/checks/check_pointwise_acc_l7_avx2_asm.c b/test/abicheck/x86_64/checks/check_pointwise_acc_l7_avx2_asm.c new file mode 100644 index 000000000..227f60113 --- /dev/null +++ b/test/abicheck/x86_64/checks/check_pointwise_acc_l7_avx2_asm.c @@ -0,0 +1,86 @@ +/* + * Copyright (c) The mldsa-native project authors + * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT + */ + +/* + * WARNING: This file is auto-generated from scripts/autogen + * in the mldsa-native repository. + * Do not modify it directly. + */ + +#include + +#include "../abicheck_x86_64.h" +#include "../checks_x86_64_all.h" + +#if defined(MLD_SYS_X86_64) && defined(MLD_SYSV_ABI_SUPPORTED) && \ + defined(__AVX2__) + +#include "../../../notrandombytes/notrandombytes.h" + +typedef struct x86_64_register_state reg_state; + +MLD_SYSV_ABI +void mld_pointwise_acc_l7_avx2_asm(int32_t *c, const int32_t a[7][256], + const int32_t b[7][256], + const int32_t *qdata); + +int check_pointwise_acc_l7_avx2_asm(void) +{ + int test_iter; + reg_state input_state, output_state; + int violations; + MLD_ALIGN uint8_t buf_rcx[2496]; /* Precomputed constants (624 x int32_t) */ + MLD_ALIGN uint8_t buf_rdi[1024]; /* Output polynomial (256 x int32_t) */ + MLD_ALIGN uint8_t + buf_rdx[7168]; /* Input polynomial vector b (7 x 256 x int32_t) */ + MLD_ALIGN uint8_t + buf_rsi[7168]; /* Input polynomial vector a (7 x 256 x int32_t) */ + + if (!mld_sys_check_capability(MLD_SYS_CAP_AVX2)) + { + fprintf(stderr, + "ABI check pointwise_acc_l7_avx2_asm: host lacks AVX2, skipping\n"); + return MLD_ABICHECK_SKIPPED; + } + + for (test_iter = 0; test_iter < MLD_ABICHECK_NUM_TESTS; test_iter++) + { + /* Initialize random register state */ + init_x86_64_register_state(&input_state); + + randombytes(buf_rcx, 2496); + randombytes(buf_rdi, 1024); + randombytes(buf_rdx, 7168); + randombytes(buf_rsi, 7168); + + /* Set up register state for function arguments */ + input_state.rcx = (uint64_t)buf_rcx; + input_state.rdi = (uint64_t)buf_rdi; + input_state.rdx = (uint64_t)buf_rdx; + input_state.rsi = (uint64_t)buf_rsi; + + /* Call function through ABI test stub */ + asm_call_stub_x86_64_sysv( + &input_state, &output_state, + (MLD_SYSV_ABI + void (*)(void))mld_pointwise_acc_l7_avx2_asm); + + /* Check ABI compliance */ + violations = check_x86_64_sysv_compliance(&input_state, &output_state, + MLD_ABICHECK_VERBOSE); + if (violations > 0) + { + fprintf(stderr, + "ABI test FAILED for pointwise_acc_l7_avx2_asm (iteration %d): " + "%d violations\n", + test_iter + 1, violations); + return MLD_ABICHECK_FAILED; + } + } + + return MLD_ABICHECK_PASSED; +} + +#endif /* MLD_SYS_X86_64 && MLD_SYSV_ABI_SUPPORTED && __AVX2__ */ diff --git a/test/abicheck/x86_64/checks/check_pointwise_avx2_asm.c b/test/abicheck/x86_64/checks/check_pointwise_avx2_asm.c new file mode 100644 index 000000000..fe9124cf2 --- /dev/null +++ b/test/abicheck/x86_64/checks/check_pointwise_avx2_asm.c @@ -0,0 +1,79 @@ +/* + * Copyright (c) The mldsa-native project authors + * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT + */ + +/* + * WARNING: This file is auto-generated from scripts/autogen + * in the mldsa-native repository. + * Do not modify it directly. + */ + +#include + +#include "../abicheck_x86_64.h" +#include "../checks_x86_64_all.h" + +#if defined(MLD_SYS_X86_64) && defined(MLD_SYSV_ABI_SUPPORTED) && \ + defined(__AVX2__) + +#include "../../../notrandombytes/notrandombytes.h" + +typedef struct x86_64_register_state reg_state; + +MLD_SYSV_ABI +void mld_pointwise_avx2_asm(int32_t *a, const int32_t *b, const int32_t *qdata); + +int check_pointwise_avx2_asm(void) +{ + int test_iter; + reg_state input_state, output_state; + int violations; + MLD_ALIGN uint8_t buf_rdi[1024]; /* Input/output polynomial (256 x int32_t) */ + MLD_ALIGN uint8_t buf_rdx[2496]; /* Precomputed constants (624 x int32_t) */ + MLD_ALIGN uint8_t buf_rsi[1024]; /* Input polynomial (256 x int32_t) */ + + if (!mld_sys_check_capability(MLD_SYS_CAP_AVX2)) + { + fprintf(stderr, + "ABI check pointwise_avx2_asm: host lacks AVX2, skipping\n"); + return MLD_ABICHECK_SKIPPED; + } + + for (test_iter = 0; test_iter < MLD_ABICHECK_NUM_TESTS; test_iter++) + { + /* Initialize random register state */ + init_x86_64_register_state(&input_state); + + randombytes(buf_rdi, 1024); + randombytes(buf_rdx, 2496); + randombytes(buf_rsi, 1024); + + /* Set up register state for function arguments */ + input_state.rdi = (uint64_t)buf_rdi; + input_state.rdx = (uint64_t)buf_rdx; + input_state.rsi = (uint64_t)buf_rsi; + + /* Call function through ABI test stub */ + asm_call_stub_x86_64_sysv( + &input_state, &output_state, + (MLD_SYSV_ABI + void (*)(void))mld_pointwise_avx2_asm); + + /* Check ABI compliance */ + violations = check_x86_64_sysv_compliance(&input_state, &output_state, + MLD_ABICHECK_VERBOSE); + if (violations > 0) + { + fprintf(stderr, + "ABI test FAILED for pointwise_avx2_asm (iteration %d): %d " + "violations\n", + test_iter + 1, violations); + return MLD_ABICHECK_FAILED; + } + } + + return MLD_ABICHECK_PASSED; +} + +#endif /* MLD_SYS_X86_64 && MLD_SYSV_ABI_SUPPORTED && __AVX2__ */ diff --git a/test/abicheck/x86_64/checks/check_poly_caddq_avx2_asm.c b/test/abicheck/x86_64/checks/check_poly_caddq_avx2_asm.c new file mode 100644 index 000000000..1bb52a741 --- /dev/null +++ b/test/abicheck/x86_64/checks/check_poly_caddq_avx2_asm.c @@ -0,0 +1,73 @@ +/* + * Copyright (c) The mldsa-native project authors + * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT + */ + +/* + * WARNING: This file is auto-generated from scripts/autogen + * in the mldsa-native repository. + * Do not modify it directly. + */ + +#include + +#include "../abicheck_x86_64.h" +#include "../checks_x86_64_all.h" + +#if defined(MLD_SYS_X86_64) && defined(MLD_SYSV_ABI_SUPPORTED) && \ + defined(__AVX2__) + +#include "../../../notrandombytes/notrandombytes.h" + +typedef struct x86_64_register_state reg_state; + +MLD_SYSV_ABI +void mld_poly_caddq_avx2_asm(int32_t *r); + +int check_poly_caddq_avx2_asm(void) +{ + int test_iter; + reg_state input_state, output_state; + int violations; + MLD_ALIGN uint8_t buf_rdi[1024]; /* Input/output polynomial (256 x int32_t) */ + + if (!mld_sys_check_capability(MLD_SYS_CAP_AVX2)) + { + fprintf(stderr, + "ABI check poly_caddq_avx2_asm: host lacks AVX2, skipping\n"); + return MLD_ABICHECK_SKIPPED; + } + + for (test_iter = 0; test_iter < MLD_ABICHECK_NUM_TESTS; test_iter++) + { + /* Initialize random register state */ + init_x86_64_register_state(&input_state); + + randombytes(buf_rdi, 1024); + + /* Set up register state for function arguments */ + input_state.rdi = (uint64_t)buf_rdi; + + /* Call function through ABI test stub */ + asm_call_stub_x86_64_sysv( + &input_state, &output_state, + (MLD_SYSV_ABI + void (*)(void))mld_poly_caddq_avx2_asm); + + /* Check ABI compliance */ + violations = check_x86_64_sysv_compliance(&input_state, &output_state, + MLD_ABICHECK_VERBOSE); + if (violations > 0) + { + fprintf(stderr, + "ABI test FAILED for poly_caddq_avx2_asm (iteration %d): %d " + "violations\n", + test_iter + 1, violations); + return MLD_ABICHECK_FAILED; + } + } + + return MLD_ABICHECK_PASSED; +} + +#endif /* MLD_SYS_X86_64 && MLD_SYSV_ABI_SUPPORTED && __AVX2__ */ diff --git a/test/abicheck/x86_64/checks/check_poly_chknorm_avx2_asm.c b/test/abicheck/x86_64/checks/check_poly_chknorm_avx2_asm.c new file mode 100644 index 000000000..8da720f31 --- /dev/null +++ b/test/abicheck/x86_64/checks/check_poly_chknorm_avx2_asm.c @@ -0,0 +1,74 @@ +/* + * Copyright (c) The mldsa-native project authors + * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT + */ + +/* + * WARNING: This file is auto-generated from scripts/autogen + * in the mldsa-native repository. + * Do not modify it directly. + */ + +#include + +#include "../abicheck_x86_64.h" +#include "../checks_x86_64_all.h" + +#if defined(MLD_SYS_X86_64) && defined(MLD_SYSV_ABI_SUPPORTED) && \ + defined(__AVX2__) + +#include "../../../notrandombytes/notrandombytes.h" + +typedef struct x86_64_register_state reg_state; + +MLD_SYSV_ABI +int mld_poly_chknorm_avx2_asm(const int32_t *a, int32_t B); + +int check_poly_chknorm_avx2_asm(void) +{ + int test_iter; + reg_state input_state, output_state; + int violations; + MLD_ALIGN uint8_t buf_rdi[1024]; /* Input polynomial (256 x int32_t) */ + + if (!mld_sys_check_capability(MLD_SYS_CAP_AVX2)) + { + fprintf(stderr, + "ABI check poly_chknorm_avx2_asm: host lacks AVX2, skipping\n"); + return MLD_ABICHECK_SKIPPED; + } + + for (test_iter = 0; test_iter < MLD_ABICHECK_NUM_TESTS; test_iter++) + { + /* Initialize random register state */ + init_x86_64_register_state(&input_state); + + randombytes(buf_rdi, 1024); + + /* Set up register state for function arguments */ + input_state.rdi = (uint64_t)buf_rdi; + input_state.rsi = 131072; + + /* Call function through ABI test stub */ + asm_call_stub_x86_64_sysv( + &input_state, &output_state, + (MLD_SYSV_ABI + void (*)(void))mld_poly_chknorm_avx2_asm); + + /* Check ABI compliance */ + violations = check_x86_64_sysv_compliance(&input_state, &output_state, + MLD_ABICHECK_VERBOSE); + if (violations > 0) + { + fprintf(stderr, + "ABI test FAILED for poly_chknorm_avx2_asm (iteration %d): %d " + "violations\n", + test_iter + 1, violations); + return MLD_ABICHECK_FAILED; + } + } + + return MLD_ABICHECK_PASSED; +} + +#endif /* MLD_SYS_X86_64 && MLD_SYSV_ABI_SUPPORTED && __AVX2__ */ diff --git a/test/abicheck/x86_64/checks/check_polyz_unpack_17_avx2_asm.c b/test/abicheck/x86_64/checks/check_polyz_unpack_17_avx2_asm.c new file mode 100644 index 000000000..3050b1b1c --- /dev/null +++ b/test/abicheck/x86_64/checks/check_polyz_unpack_17_avx2_asm.c @@ -0,0 +1,76 @@ +/* + * Copyright (c) The mldsa-native project authors + * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT + */ + +/* + * WARNING: This file is auto-generated from scripts/autogen + * in the mldsa-native repository. + * Do not modify it directly. + */ + +#include + +#include "../abicheck_x86_64.h" +#include "../checks_x86_64_all.h" + +#if defined(MLD_SYS_X86_64) && defined(MLD_SYSV_ABI_SUPPORTED) && \ + defined(__AVX2__) + +#include "../../../notrandombytes/notrandombytes.h" + +typedef struct x86_64_register_state reg_state; + +MLD_SYSV_ABI +void mld_polyz_unpack_17_avx2_asm(int32_t *r, const uint8_t *a); + +int check_polyz_unpack_17_avx2_asm(void) +{ + int test_iter; + reg_state input_state, output_state; + int violations; + MLD_ALIGN uint8_t buf_rdi[1024]; /* Output polynomial (256 x int32_t) */ + MLD_ALIGN uint8_t buf_rsi[576]; /* Packed input bytes */ + + if (!mld_sys_check_capability(MLD_SYS_CAP_AVX2)) + { + fprintf(stderr, + "ABI check polyz_unpack_17_avx2_asm: host lacks AVX2, skipping\n"); + return MLD_ABICHECK_SKIPPED; + } + + for (test_iter = 0; test_iter < MLD_ABICHECK_NUM_TESTS; test_iter++) + { + /* Initialize random register state */ + init_x86_64_register_state(&input_state); + + randombytes(buf_rdi, 1024); + randombytes(buf_rsi, 576); + + /* Set up register state for function arguments */ + input_state.rdi = (uint64_t)buf_rdi; + input_state.rsi = (uint64_t)buf_rsi; + + /* Call function through ABI test stub */ + asm_call_stub_x86_64_sysv( + &input_state, &output_state, + (MLD_SYSV_ABI + void (*)(void))mld_polyz_unpack_17_avx2_asm); + + /* Check ABI compliance */ + violations = check_x86_64_sysv_compliance(&input_state, &output_state, + MLD_ABICHECK_VERBOSE); + if (violations > 0) + { + fprintf(stderr, + "ABI test FAILED for polyz_unpack_17_avx2_asm (iteration %d): %d " + "violations\n", + test_iter + 1, violations); + return MLD_ABICHECK_FAILED; + } + } + + return MLD_ABICHECK_PASSED; +} + +#endif /* MLD_SYS_X86_64 && MLD_SYSV_ABI_SUPPORTED && __AVX2__ */ diff --git a/test/abicheck/x86_64/checks/check_polyz_unpack_19_avx2_asm.c b/test/abicheck/x86_64/checks/check_polyz_unpack_19_avx2_asm.c new file mode 100644 index 000000000..8e866fb25 --- /dev/null +++ b/test/abicheck/x86_64/checks/check_polyz_unpack_19_avx2_asm.c @@ -0,0 +1,76 @@ +/* + * Copyright (c) The mldsa-native project authors + * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT + */ + +/* + * WARNING: This file is auto-generated from scripts/autogen + * in the mldsa-native repository. + * Do not modify it directly. + */ + +#include + +#include "../abicheck_x86_64.h" +#include "../checks_x86_64_all.h" + +#if defined(MLD_SYS_X86_64) && defined(MLD_SYSV_ABI_SUPPORTED) && \ + defined(__AVX2__) + +#include "../../../notrandombytes/notrandombytes.h" + +typedef struct x86_64_register_state reg_state; + +MLD_SYSV_ABI +void mld_polyz_unpack_19_avx2_asm(int32_t *r, const uint8_t *a); + +int check_polyz_unpack_19_avx2_asm(void) +{ + int test_iter; + reg_state input_state, output_state; + int violations; + MLD_ALIGN uint8_t buf_rdi[1024]; /* Output polynomial (256 x int32_t) */ + MLD_ALIGN uint8_t buf_rsi[640]; /* Packed input bytes */ + + if (!mld_sys_check_capability(MLD_SYS_CAP_AVX2)) + { + fprintf(stderr, + "ABI check polyz_unpack_19_avx2_asm: host lacks AVX2, skipping\n"); + return MLD_ABICHECK_SKIPPED; + } + + for (test_iter = 0; test_iter < MLD_ABICHECK_NUM_TESTS; test_iter++) + { + /* Initialize random register state */ + init_x86_64_register_state(&input_state); + + randombytes(buf_rdi, 1024); + randombytes(buf_rsi, 640); + + /* Set up register state for function arguments */ + input_state.rdi = (uint64_t)buf_rdi; + input_state.rsi = (uint64_t)buf_rsi; + + /* Call function through ABI test stub */ + asm_call_stub_x86_64_sysv( + &input_state, &output_state, + (MLD_SYSV_ABI + void (*)(void))mld_polyz_unpack_19_avx2_asm); + + /* Check ABI compliance */ + violations = check_x86_64_sysv_compliance(&input_state, &output_state, + MLD_ABICHECK_VERBOSE); + if (violations > 0) + { + fprintf(stderr, + "ABI test FAILED for polyz_unpack_19_avx2_asm (iteration %d): %d " + "violations\n", + test_iter + 1, violations); + return MLD_ABICHECK_FAILED; + } + } + + return MLD_ABICHECK_PASSED; +} + +#endif /* MLD_SYS_X86_64 && MLD_SYSV_ABI_SUPPORTED && __AVX2__ */ diff --git a/test/abicheck/x86_64/checks_x86_64_all.h b/test/abicheck/x86_64/checks_x86_64_all.h new file mode 100644 index 000000000..8995f02f9 --- /dev/null +++ b/test/abicheck/x86_64/checks_x86_64_all.h @@ -0,0 +1,60 @@ +/* + * Copyright (c) The mldsa-native project authors + * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT + */ + +/* + * WARNING: This file is auto-generated from scripts/autogen + * in the mldsa-native repository. + * Do not modify it directly. + */ + + +#ifndef MLD_TEST_ABICHECK_CHECKS_X86_64_ALL_H +#define MLD_TEST_ABICHECK_CHECKS_X86_64_ALL_H + +#include +#include "../abicheck_common.h" + +#if defined(MLD_SYS_X86_64) + +#if defined(MLD_SYSV_ABI_SUPPORTED) +#if defined(__AVX2__) +int check_invntt_avx2_asm(void); +int check_keccak_f1600_x4_avx2_asm(void); +int check_ntt_avx2_asm(void); +int check_nttunpack_avx2_asm(void); +int check_pointwise_acc_l4_avx2_asm(void); +int check_pointwise_acc_l5_avx2_asm(void); +int check_pointwise_acc_l7_avx2_asm(void); +int check_pointwise_avx2_asm(void); +int check_poly_caddq_avx2_asm(void); +int check_poly_chknorm_avx2_asm(void); +int check_polyz_unpack_17_avx2_asm(void); +int check_polyz_unpack_19_avx2_asm(void); +#endif /* __AVX2__ */ +#endif /* MLD_SYSV_ABI_SUPPORTED */ + +static const abicheck_entry_t all_checks[] = { +#if defined(MLD_SYSV_ABI_SUPPORTED) +#if defined(__AVX2__) + {"invntt_avx2_asm", check_invntt_avx2_asm}, + {"keccak_f1600_x4_avx2_asm", check_keccak_f1600_x4_avx2_asm}, + {"ntt_avx2_asm", check_ntt_avx2_asm}, + {"nttunpack_avx2_asm", check_nttunpack_avx2_asm}, + {"pointwise_acc_l4_avx2_asm", check_pointwise_acc_l4_avx2_asm}, + {"pointwise_acc_l5_avx2_asm", check_pointwise_acc_l5_avx2_asm}, + {"pointwise_acc_l7_avx2_asm", check_pointwise_acc_l7_avx2_asm}, + {"pointwise_avx2_asm", check_pointwise_avx2_asm}, + {"poly_caddq_avx2_asm", check_poly_caddq_avx2_asm}, + {"poly_chknorm_avx2_asm", check_poly_chknorm_avx2_asm}, + {"polyz_unpack_17_avx2_asm", check_polyz_unpack_17_avx2_asm}, + {"polyz_unpack_19_avx2_asm", check_polyz_unpack_19_avx2_asm}, +#endif /* __AVX2__ */ +#endif /* MLD_SYSV_ABI_SUPPORTED */ + {NULL, NULL} /* Sentinel */ +}; + +#endif /* MLD_SYS_X86_64 */ + +#endif /* !MLD_TEST_ABICHECK_CHECKS_X86_64_ALL_H */ diff --git a/test/abicheck/x86_64/selftest_x86_64.S b/test/abicheck/x86_64/selftest_x86_64.S new file mode 100644 index 000000000..92b99b2f0 --- /dev/null +++ b/test/abicheck/x86_64/selftest_x86_64.S @@ -0,0 +1,42 @@ +/* + * Copyright (c) The mlkem-native project authors + * Copyright (c) The mldsa-native project authors + * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT + */ + +/* + * x86_64 (System V) selftest stubs. + * + * Each function clobbers exactly one callee-saved register and returns. + * `not %rN` flips every bit of an integer GPR; the result is guaranteed + * to differ from the input random value. + */ + +#if defined(__ELF__) +.section .note.GNU-stack,"",@progbits +#endif + +.text + +/* Emit a global label (Apple wants a leading underscore) and a one-line + * body. Mirrors the corrupter macros in selftest_aarch64.S / + * selftest_armv81m.S. */ +.macro define_corrupter name, body +#ifdef __APPLE__ +.global _\name +_\name: +#else +.global \name +\name: +#endif + \body + ret +.endm + +define_corrupter selftest_x86_64_noop +define_corrupter selftest_x86_64_corrupt_rbx, "notq %rbx" +define_corrupter selftest_x86_64_corrupt_rbp, "notq %rbp" +define_corrupter selftest_x86_64_corrupt_r12, "notq %r12" +define_corrupter selftest_x86_64_corrupt_r13, "notq %r13" +define_corrupter selftest_x86_64_corrupt_r14, "notq %r14" +define_corrupter selftest_x86_64_corrupt_r15, "notq %r15" diff --git a/test/mk/components.mk b/test/mk/components.mk index 67698aabe..a93056b8f 100644 --- a/test/mk/components.mk +++ b/test/mk/components.mk @@ -163,3 +163,99 @@ $(call MAKE_OBJS, $(MLDSA44_DIR), $(EXTRA_SOURCES)): CFLAGS += $(EXTRA_SOURCES_C $(call MAKE_OBJS, $(MLDSA65_DIR), $(EXTRA_SOURCES)): CFLAGS += $(EXTRA_SOURCES_CFLAGS) $(call MAKE_OBJS, $(MLDSA87_DIR), $(EXTRA_SOURCES)): CFLAGS += $(EXTRA_SOURCES_CFLAGS) endif + +# ABI checker +ABICHECK_DIR = $(BUILD_DIR)/abicheck + +# Map $(ARCH) to the abicheck per-arch subdir name. For most architectures +# the subdir matches $(ARCH); one exception: +# - arm-none-eabi- targets: $(ARCH) = arm (a generic label for the +# bare-metal Cortex-M family). The abicheck subdir is the more specific +# armv81m. +ifeq ($(ARCH),arm) +ABICHECK_ARCH := armv81m +else +ABICHECK_ARCH := $(ARCH) +endif + +ABICHECK_SOURCES = test/abicheck/abicheck.c test/abicheck/selftest.c +ABICHECK_SOURCES += $(wildcard test/abicheck/$(ABICHECK_ARCH)/abicheck_$(ABICHECK_ARCH).c) +ABICHECK_SOURCES += $(wildcard test/abicheck/$(ABICHECK_ARCH)/callstub_$(ABICHECK_ARCH).S) +ABICHECK_SOURCES += $(wildcard test/abicheck/$(ABICHECK_ARCH)/selftest_$(ABICHECK_ARCH).S) +ABICHECK_SOURCES += $(wildcard test/abicheck/$(ABICHECK_ARCH)/checks/check_*.c) +ABICHECK_SOURCES += $(wildcard test/notrandombytes/*.c) + +# Per-arch shipped assembly (mldsa/src/.../*.S), assembled directly with +# ABICHECK_ASM_CFLAGS (defined below). +ifeq ($(ABICHECK_ARCH),aarch64) +ABICHECK_ASM_SOURCES := $(wildcard mldsa/src/native/aarch64/src/*.S) \ + $(wildcard mldsa/src/fips202/native/aarch64/src/*.S) +else ifeq ($(ABICHECK_ARCH),x86_64) +ABICHECK_ASM_SOURCES := $(wildcard mldsa/src/native/x86_64/src/*.S) \ + $(wildcard mldsa/src/fips202/native/x86_64/src/*.S) +else ifeq ($(ABICHECK_ARCH),armv81m) +ABICHECK_ASM_SOURCES := $(wildcard mldsa/src/fips202/native/armv81m/src/*.S) +else +ABICHECK_ASM_SOURCES := +endif + +# Per-capability CFLAGS injection (e.g. -march=armv8.4-a+sha3 for SHA3, +# -mavx2 -mbmi2 for AVX2), generated by scripts/autogen from each kernel's +# YAML 'ABI.Features:' list. abicheck.mk includes the per-arch abicheck_.mk. +include test/abicheck/abicheck.mk + +# SHA3-not-assemblable case: some aarch64 compilers do not support +# `-march=armv8.4-a+sha3`, in which case we cannot even assemble the SHA3 +# Keccak kernels. +ifeq ($(ARCH),aarch64) +ifneq ($(MK_COMPILER_SUPPORTS_SHA3),1) +ABICHECK_ASM_SOURCES := $(filter-out $(ABICHECK_REQ_SHA3_FILES),$(ABICHECK_ASM_SOURCES)) +endif +endif + +ABICHECK_ALL_SOURCES = $(ABICHECK_SOURCES) $(ABICHECK_ASM_SOURCES) +ABICHECK_OBJS = $(call MAKE_OBJS,$(ABICHECK_DIR),$(ABICHECK_ALL_SOURCES)) + +# Predefine the kernel-gating macros (arith backend, fips202 NEED_*) so the +# shipped #ifs evaluate true. Undefine the two USE_NATIVE_BACKEND switches so +# common.h does not pull in the per-arch backend headers and the constant-table +# C declarations the abicheck does not link against. +ABICHECK_ASM_CFLAGS := \ + -UMLD_CONFIG_USE_NATIVE_BACKEND_ARITH \ + -UMLD_CONFIG_USE_NATIVE_BACKEND_FIPS202 \ + -DMLD_CONFIG_MULTILEVEL_WITH_SHARED \ + -DMLD_CONFIG_PARAMETER_SET=65 \ + -DMLD_CONFIG_NAMESPACE_PREFIX=mld \ + -DMLD_ARITH_BACKEND_AARCH64 \ + -DMLD_ARITH_BACKEND_X86_64_DEFAULT \ + -DMLD_FIPS202_AARCH64_NEED_X1_SCALAR \ + -DMLD_FIPS202_AARCH64_NEED_X1_V84A \ + -DMLD_FIPS202_AARCH64_NEED_X2_V84A \ + -DMLD_FIPS202_AARCH64_NEED_X4_V8A_SCALAR_HYBRID \ + -DMLD_FIPS202_AARCH64_NEED_X4_V8A_V84A_SCALAR_HYBRID \ + -DMLD_FIPS202_ARMV81M_NEED_X4 \ + -DMLD_FIPS202_X86_64_NEED_X4_AVX2 + +ABICHECK_ASM_OBJS = $(call MAKE_OBJS,$(ABICHECK_DIR),$(ABICHECK_ASM_SOURCES)) +$(ABICHECK_ASM_OBJS): CFLAGS += $(ABICHECK_ASM_CFLAGS) + +# Force the full ML-DSA API surface for the ABI check, regardless of any +# reduced-API config the caller passes in. +ABICHECK_FULL_API_CFLAGS := \ + -UMLD_CONFIG_NO_KEYPAIR_API \ + -UMLD_CONFIG_NO_SIGN_API \ + -UMLD_CONFIG_NO_VERIFY_API +$(ABICHECK_OBJS): CFLAGS += $(ABICHECK_FULL_API_CFLAGS) + +# Platform support objects (e.g. the bare-metal startup providing _start and the +# semihosting runtime). EXTRA_SOURCES is set by a platform makefile (see +# test/baremetal/platform/*/platform.mk via EXTRA_MAKEFILE); empty for native +# builds. Like the other test binaries, the ABI checker must link these or it has +# no entry point on bare metal. The platform's LDSCRIPT is already applied via +# LDFLAGS in the link rule. +ABICHECK_EXTRA_OBJS = $(call MAKE_OBJS,$(ABICHECK_DIR),$(EXTRA_SOURCES)) +ifneq ($(EXTRA_SOURCES),) +$(ABICHECK_EXTRA_OBJS): CFLAGS += $(EXTRA_SOURCES_CFLAGS) +endif + +$(ABICHECK_DIR)/bin/abicheck: $(ABICHECK_OBJS) $(ABICHECK_EXTRA_OBJS) diff --git a/test/mk/rules.mk b/test/mk/rules.mk index 2c59268e6..ba8d8e8e1 100644 --- a/test/mk/rules.mk +++ b/test/mk/rules.mk @@ -114,3 +114,18 @@ $(BUILD_DIR)/mldsa87/alloc/%.S.o: %.S $(CONFIG) $(Q)[ -d $(@D) ] || mkdir -p $(@D) $(Q)$(CC) -c -o $@ $(CFLAGS) $< +$(BUILD_DIR)/abicheck/bin/%: $(CONFIG) + $(Q)echo " LD $@" + $(Q)[ -d $(@D) ] || mkdir -p $(@D) + $(Q)$(LD) $(LDFLAGS) -o $@ $(filter %.o,$^) $(LDLIBS) + +$(BUILD_DIR)/abicheck/%.c.o: %.c $(CONFIG) + $(Q)echo " CC $@" + $(Q)[ -d $(@D) ] || mkdir -p $(@D) + $(Q)$(CC) -c -o $@ $(CFLAGS) $< + +$(BUILD_DIR)/abicheck/%.S.o: %.S $(CONFIG) + $(Q)echo " AS $@" + $(Q)[ -d $(@D) ] || mkdir -p $(@D) + $(Q)$(CC) -c -o $@ $(CFLAGS) $< + diff --git a/test/src/test_mldsa.c b/test/src/test_mldsa.c index 63a18d62a..13c6798d6 100644 --- a/test/src/test_mldsa.c +++ b/test/src/test_mldsa.c @@ -134,6 +134,73 @@ static int test_sign_unaligned(void) return test_sign_core(pk + 1, sk + 1, sm + 1, m + 1, m2 + 1, ctx + 1); } +static int test_sign_inplace_success(void) +{ + uint8_t pk[CRYPTO_PUBLICKEYBYTES]; + uint8_t sk[CRYPTO_SECRETKEYBYTES]; + uint8_t sm[MLEN + CRYPTO_BYTES]; + uint8_t expected[MLEN]; + uint8_t m2[MLEN + CRYPTO_BYTES]; + uint8_t ctx[CTXLEN]; + size_t smlen; + size_t mlen; + int rc; + + CHECK(crypto_sign_keypair(pk, sk) == 0); + CHECK(randombytes(ctx, sizeof(ctx)) == 0); + MLD_CT_TESTING_SECRET(ctx, sizeof(ctx)); + CHECK(randombytes(sm, MLEN) == 0); + MLD_CT_TESTING_SECRET(sm, MLEN); + memcpy(expected, sm, MLEN); + MLD_CT_TESTING_SECRET(expected, sizeof(expected)); + + CHECK_SIGN_RC(crypto_sign(sm, &smlen, sm, MLEN, ctx, sizeof(ctx), sk)); + rc = crypto_sign_open(m2, &mlen, sm, smlen, ctx, sizeof(ctx), pk); + + MLD_CT_TESTING_DECLASSIFY(&rc, sizeof(rc)); + MLD_CT_TESTING_DECLASSIFY(expected, sizeof(expected)); + MLD_CT_TESTING_DECLASSIFY(m2, sizeof(m2)); + + CHECK(rc == 0); + CHECK(smlen == MLEN + CRYPTO_BYTES); + CHECK(mlen == MLEN); + CHECK(memcmp(expected, m2, MLEN) == 0); + + return 0; +} + +static int test_sign_inplace_context_failure_preserves_message(void) +{ + uint8_t pk[CRYPTO_PUBLICKEYBYTES]; + uint8_t sk[CRYPTO_SECRETKEYBYTES]; + uint8_t sm[MLEN + CRYPTO_BYTES]; + uint8_t expected[MLEN]; + uint8_t ctx[256]; + size_t smlen = MLEN + CRYPTO_BYTES; + int rc; + + CHECK(crypto_sign_keypair(pk, sk) == 0); + CHECK(randombytes(ctx, sizeof(ctx)) == 0); + MLD_CT_TESTING_SECRET(ctx, sizeof(ctx)); + CHECK(randombytes(sm, MLEN) == 0); + MLD_CT_TESTING_SECRET(sm, MLEN); + memcpy(expected, sm, MLEN); + MLD_CT_TESTING_SECRET(expected, sizeof(expected)); + + rc = crypto_sign(sm, &smlen, sm, MLEN, ctx, sizeof(ctx), sk); + + MLD_CT_TESTING_DECLASSIFY(&rc, sizeof(rc)); + MLD_CT_TESTING_DECLASSIFY(&smlen, sizeof(smlen)); + MLD_CT_TESTING_DECLASSIFY(sm, MLEN); + MLD_CT_TESTING_DECLASSIFY(expected, sizeof(expected)); + + CHECK(rc == MLD_ERR_FAIL); + CHECK(smlen == 0); + CHECK(memcmp(sm, expected, MLEN) == 0); + + return 0; +} + static int test_sign_extmu(void) { uint8_t pk[CRYPTO_PUBLICKEYBYTES]; @@ -509,6 +576,8 @@ int main(void) !defined(MLD_CONFIG_NO_VERIFY_API) r |= test_sign(); r |= test_sign_unaligned(); + r |= test_sign_inplace_success(); + r |= test_sign_inplace_context_failure_preserves_message(); r |= test_wrong_pk(); r |= test_wrong_sig(); r |= test_wrong_ctx(); diff --git a/test/src/test_rng_fail.c b/test/src/test_rng_fail.c index f358325da..d4c4ae8f8 100644 --- a/test/src/test_rng_fail.c +++ b/test/src/test_rng_fail.c @@ -158,6 +158,46 @@ static int test_sign_combined_rng_failure(void) return 0; } +static int test_sign_combined_inplace_rng_failure_preserves_message(void) +{ + uint8_t sm[CRYPTO_BYTES + TEST_VECTOR_MSG_LEN]; + uint8_t expected[TEST_VECTOR_MSG_LEN]; + size_t smlen = CRYPTO_BYTES + TEST_VECTOR_MSG_LEN; + int rc; + + memcpy(sm, TEST_VECTOR_MSG, TEST_VECTOR_MSG_LEN); + memcpy(expected, TEST_VECTOR_MSG, TEST_VECTOR_MSG_LEN); + + reset_all(); + randombytes_fail_on_counter = 0; + rc = crypto_sign(sm, &smlen, sm, TEST_VECTOR_MSG_LEN, + (const uint8_t *)TEST_VECTOR_CTX, TEST_VECTOR_CTX_LEN, + test_vector_sk); + if (rc != MLD_ERR_RNG_FAIL) + { + fprintf(stderr, + "ERROR: crypto_sign returned %d on in-place RNG failure " + "(expected %d)\n", + rc, MLD_ERR_RNG_FAIL); + return 1; + } + if (smlen != 0) + { + fprintf(stderr, + "ERROR: crypto_sign returned smlen=%zu on in-place RNG failure\n", + smlen); + return 1; + } + if (memcmp(sm, expected, TEST_VECTOR_MSG_LEN) != 0) + { + fprintf(stderr, + "ERROR: crypto_sign changed the in-place message on RNG failure\n"); + return 1; + } + + return 0; +} + static int test_signature_extmu_rng_failure(void) { uint8_t sig[CRYPTO_BYTES]; @@ -249,6 +289,7 @@ int main(void) #if !defined(MLD_CONFIG_NO_SIGN_API) r |= test_sign_rng_failure(); r |= test_sign_combined_rng_failure(); + r |= test_sign_combined_inplace_rng_failure_preserves_message(); r |= test_signature_extmu_rng_failure(); r |= test_signature_pre_hash_shake256_rng_failure(); #endif /* !MLD_CONFIG_NO_SIGN_API */