diff --git a/mlkem/src/poly.c b/mlkem/src/poly.c index 4a7316718..725dcb509 100644 --- a/mlkem/src/poly.c +++ b/mlkem/src/poly.c @@ -29,33 +29,61 @@ #include "verify.h" /** - * Montgomery multiplication modulo MLKEM_Q. + * Montgomery multiplication modulo MLKEM_Q with precomputed twist. * * @reference{`fqmul()` in the reference implementation @[REF].} * - * @param a First factor. Can be any int16_t. - * @param b Second factor. Must be signed canonical - * (abs value < (MLKEM_Q+1)/2). + * @param a First factor. Can be any int16_t. + * @param b Second factor. Must be signed canonical + * (abs value < (MLKEM_Q+1)/2). + * @param b_twisted Precomputed `b * MLKEM_Q^{-1} mod 2^16` (signed canonical). * * @return 16-bit integer congruent to a*b*R^{-1} mod MLKEM_Q, and * smaller than MLKEM_Q in absolute value. */ -static MLK_INLINE int16_t mlk_fqmul(int16_t a, int16_t b) +static MLK_INLINE int16_t mlk_fqmul(int16_t a, int16_t b, int16_t b_twisted) __contract__( requires(b > -MLKEM_Q_HALF && b < MLKEM_Q_HALF) ensures(return_value > -MLKEM_Q && return_value < MLKEM_Q) ) { - int16_t res; + uint16_t lo0; + int16_t res, hi, lo, correction; mlk_assert_abs_bound(&b, 1, MLKEM_Q_HALF); - res = mlk_montgomery_reduce((int32_t)a * (int32_t)b); + /* High 16 bits of the signed product a * b. */ + hi = (int16_t)(((int32_t)a * b) >> 16); + /* Low 16 bits of a * b_twisted (== a * b * MLKEM_Q^{-1} mod 2^16). + * This is just a 16x16->16 bit low multiplication, but we express + * it as a 16x16->32 widening multiplication with an explicit truncation + * to avoid unsigned overflow errors from CBMC. The compiler will be smart + * enough to realize to optimize this away. */ + lo0 = (uint16_t)(((uint32_t)mlk_cast_int16_to_uint16(a) * + mlk_cast_int16_to_uint16(b_twisted)) & + UINT16_MAX); + /* Lift low product to signed (!) 16-bit integer */ + lo = mlk_cast_uint16_to_int16(lo0); + + /* 16x16->16 bit high multiplication + * + * PORTABILITY: Right-shift on a signed integer is, strictly-speaking, + * implementation-defined for negative left argument. Here, + * we assume it's sign-preserving "arithmetic" shift right. (C99 6.5.7 (5)) + * + * Bounds: |correction| <= ceil(2^15 * MLKEM_Q / 2^16) = (MLKEM_Q + 1)/2 + * + * Safety: The bounds argument above demonstrates that the truncation is safe. + */ + correction = (int16_t)(((int32_t)lo * MLKEM_Q) >> 16); + /* Bounds: - * |res| <= ceil(|a| * |b| / 2^16) + (MLKEM_Q + 1) / 2 + * |res| <= |hi| + |correction| + * <= ceil(|a| * |b| / 2^16) + (MLKEM_Q + 1) / 2 * <= ceil(2^15 * ((MLKEM_Q - 1)/2) / 2^16) + (MLKEM_Q + 1) / 2 * <= ceil((MLKEM_Q - 1) / 4) + (MLKEM_Q + 1) / 2 * < MLKEM_Q */ + res = (int16_t)(hi - correction); mlk_assert_abs_bound(&res, 1, MLKEM_Q); return res; @@ -115,13 +143,16 @@ __contract__( { unsigned i; const int16_t f = 1353; /* check-magic: 1353 == signed_mod(2^32, MLKEM_Q) */ + /* check-magic: + 20553 == signed_mod(1353 * pow(MLKEM_Q, -1, 2^16), 2^16) */ + const int16_t f_twisted = 20553; for (i = 0; i < MLKEM_N; i++) __loop__( invariant(i <= MLKEM_N) invariant(array_abs_bound(r->coeffs, 0, i, MLKEM_Q)) decreases(MLKEM_N - i)) { - r->coeffs[i] = mlk_fqmul(r->coeffs[i], f); + r->coeffs[i] = mlk_fqmul(r->coeffs[i], f, f_twisted); } mlk_assert_abs_bound(r, MLKEM_N, MLKEM_Q); @@ -281,11 +312,13 @@ __contract__( invariant(array_abs_bound(x->coeffs, 0, 2 * i, MLKEM_Q)) decreases(MLKEM_N / 4 - i)) { - x->coeffs[2 * i + 0] = mlk_fqmul(a->coeffs[4 * i + 1], mlk_zetas[64 + i]); + const int16_t z = mlk_zetas[64 + i][0]; + const int16_t z_t = mlk_zetas[64 + i][1]; + x->coeffs[2 * i + 0] = mlk_fqmul(a->coeffs[4 * i + 1], z, z_t); /* The values in zeta table are <= MLKEM_Q in absolute value, * so the negation in int16_t is safe. */ x->coeffs[2 * i + 1] = - mlk_fqmul(a->coeffs[4 * i + 3], (int16_t)(-mlk_zetas[64 + i])); + mlk_fqmul(a->coeffs[4 * i + 3], (int16_t)(-z), (int16_t)(-z_t)); } /* @@ -312,37 +345,133 @@ void mlk_poly_mulcache_compute(mlk_poly_mulcache *x, const mlk_poly *a) mlk_poly_mulcache_compute_c(x, a); } +/* Cooley-Tukey butterfly */ +static MLK_INLINE void mlk_ct_butterfly(int16_t r[MLKEM_N], unsigned i, + unsigned j, int16_t z, int16_t zt) +{ + const int16_t t = mlk_fqmul(r[j], z, zt); + r[j] = (int16_t)(r[i] - t); + r[i] = (int16_t)(r[i] + t); +} + +/* Gentleman-Sande butterfly without reduction. + * + * The twiddles `z`, `zt` are implicitly negated: we compute `b - a` instead + * of `a - b`, which is equivalent to multiplying by `-z`. + */ +static MLK_INLINE void mlk_gs_butterfly(int16_t r[MLKEM_N], unsigned i, + unsigned j, int16_t z, int16_t zt) +{ + const int16_t a = r[i]; + const int16_t b = r[j]; + r[i] = (int16_t)(a + b); + r[j] = mlk_fqmul((int16_t)(b - a), z, zt); +} + /* - * Computes a block CT butterflies with a fixed twiddle factor, - * using Montgomery multiplication. - * Parameters: - * - r: Pointer to base of polynomial (_not_ the base of butterfly block) - * - root: Twiddle factor to use for the butterfly. This must be in - * Montgomery form and signed canonical. - * - start: Offset to the beginning of the butterfly block - * - len: Index difference between coefficients subject to a butterfly - * - bound: Ghost variable describing coefficient bound: Prior to `start`, - * coefficients must be bound by `bound + MLKEM_Q`. Post `start`, - * they must be bound by `bound`. - * When this function returns, output coefficients in the index range - * [start, start+2*len) have bound bumped to `bound + MLKEM_Q`. - * Example: - * - start=8, len=4 - * This would compute the following four butterflies - * 8 -- 12 - * 9 -- 13 - * 10 -- 14 - * 11 -- 15 - * - start=4, len=2 - * This would compute the following two butterflies - * 4 -- 6 - * 5 -- 7 + * Two merged forward-NTT layers, applied to one outer block. + */ +static MLK_INLINE void mlk_ntt_2_layers_block( + int16_t r[MLKEM_N], unsigned start, unsigned len, int16_t z0, int16_t z0t, + int16_t z1, int16_t z1t, int16_t z2, int16_t z2t, const int16_t bound) +__contract__( + requires(memory_no_alias(r, sizeof(int16_t) * MLKEM_N)) + requires(0 < bound && bound < INT16_MAX - 2 * MLKEM_Q) + requires(1 <= len && len <= MLKEM_N / 4) + requires(start <= MLKEM_N - 4 * len) + requires(z0 > -MLKEM_Q_HALF && z0 < MLKEM_Q_HALF) + requires(z1 > -MLKEM_Q_HALF && z1 < MLKEM_Q_HALF) + requires(z2 > -MLKEM_Q_HALF && z2 < MLKEM_Q_HALF) + requires(array_abs_bound(r, start, MLKEM_N, bound)) + requires(array_abs_bound(r, 0, start, bound + 2 * MLKEM_Q)) + assigns(memory_slice(r, sizeof(int16_t) * MLKEM_N)) + ensures(array_abs_bound(r, start + 4 * len, MLKEM_N, bound)) + ensures(array_abs_bound(r, 0, start + 4 * len, bound + 2 * MLKEM_Q)) +) +{ + unsigned j = 0; + /* `bound` is a ghost variable referenced only in the CBMC contract. */ + ((void)bound); + for (j = 0; j < len; j++) + __loop__( + assigns(j, memory_slice(r, sizeof(int16_t) * MLKEM_N)) + invariant(j <= len) + /* Static bounds */ + invariant(array_abs_bound(r, 0, start, bound + 2 * MLKEM_Q)) + invariant(array_abs_bound(r, start + 4 * len, MLKEM_N, bound)) + /* Dynamic bounds */ + invariant(array_abs_bound(r, start + 0 * len, start + 0 * len + j, bound + 2 * MLKEM_Q)) + invariant(array_abs_bound(r, start + 1 * len, start + 1 * len + j, bound + 2 * MLKEM_Q)) + invariant(array_abs_bound(r, start + 2 * len, start + 2 * len + j, bound + 2 * MLKEM_Q)) + invariant(array_abs_bound(r, start + 3 * len, start + 3 * len + j, bound + 2 * MLKEM_Q)) + invariant(array_abs_bound(r, start + 0 * len + j, start + 1 * len, bound)) + invariant(array_abs_bound(r, start + 1 * len + j, start + 2 * len, bound)) + invariant(array_abs_bound(r, start + 2 * len + j, start + 3 * len, bound)) + invariant(array_abs_bound(r, start + 3 * len + j, start + 4 * len, bound)) + decreases(len - j)) + { + const unsigned i0 = start + j; + const unsigned i1 = i0 + 1 * len; + const unsigned i2 = i0 + 2 * len; + const unsigned i3 = i0 + 3 * len; + + mlk_ct_butterfly(r, i0, i2, z0, z0t); + mlk_ct_butterfly(r, i1, i3, z0, z0t); + mlk_ct_butterfly(r, i0, i1, z1, z1t); + mlk_ct_butterfly(r, i2, i3, z2, z2t); + } +} + +/* + * Two merged forward-NTT layers. + */ +static MLK_INLINE void mlk_ntt_2_layers(int16_t r[MLKEM_N], + const unsigned layer) +__contract__( + requires(memory_no_alias(r, sizeof(int16_t) * MLKEM_N)) + requires(layer == 1 || layer == 3 || layer == 5) + requires(array_abs_bound(r, 0, MLKEM_N, layer * MLKEM_Q)) + assigns(memory_slice(r, sizeof(int16_t) * MLKEM_N)) + ensures(array_abs_bound(r, 0, MLKEM_N, (layer + 2) * MLKEM_Q))) +{ + const unsigned len_outer = (unsigned)MLKEM_N >> layer; + const unsigned len = len_outer >> 1; + unsigned start, k; + k = 1u << (layer - 1); + for (start = 0; start < MLKEM_N; start += 2 * len_outer) + __loop__( + invariant(start <= MLKEM_N) + invariant((1u << (layer - 1)) <= k && k <= (1u << layer)) + invariant(2 * len_outer * k == start + MLKEM_N) + invariant(array_abs_bound(r, 0, start, (layer + 2) * MLKEM_Q)) + invariant(array_abs_bound(r, start, MLKEM_N, layer * MLKEM_Q)) + decreases(MLKEM_N - start)) + { + /* Negation of the zetas is embedded in + * mlk_ntt_2_layers_blocks -> mlk_gs_butterfly */ + const int16_t z0 = mlk_zetas[k][0]; + const int16_t z1 = mlk_zetas[2 * k][0]; + const int16_t z2 = mlk_zetas[2 * k + 1][0]; + + const int16_t z0t = mlk_zetas[k][1]; + const int16_t z1t = mlk_zetas[2 * k][1]; + const int16_t z2t = mlk_zetas[2 * k + 1][1]; + + k++; + mlk_ntt_2_layers_block(r, start, len, z0, z0t, z1, z1t, z2, z2t, + (int16_t)(layer * MLKEM_Q)); + } +} + +/* + * Single-layer forward NTT, used for the leftover layer 7 (len=2) after the + * three merged 2-layer calls. */ /* Reference: Embedded in `ntt()` in the reference implementation @[REF]. */ static void mlk_ntt_butterfly_block(int16_t r[MLKEM_N], int16_t zeta, - unsigned start, unsigned len, - unsigned bound) + int16_t zeta_t, unsigned start, + unsigned len, unsigned bound) __contract__( requires(start < MLKEM_N) requires(1 <= len && len <= MLKEM_N / 2 && start + 2 * len <= MLKEM_N) @@ -372,20 +501,13 @@ __contract__( decreases(start + len - j)) { int16_t t; - t = mlk_fqmul(r[j + len], zeta); + t = mlk_fqmul(r[j + len], zeta, zeta_t); /* The precondition implies that the arithmetic does not overflow. */ r[j + len] = (int16_t)(r[j] - t); r[j] = (int16_t)(r[j] + t); } } -/* - * Compute one layer of forward NTT - * Parameters: - * - r: Pointer to base of polynomial - * - layer: Variable indicating which layer is being applied. - */ - /* Reference: Embedded in `ntt()` in the reference implementation @[REF]. */ static void mlk_ntt_layer(int16_t r[MLKEM_N], unsigned layer) __contract__( @@ -407,8 +529,10 @@ __contract__( invariant(array_abs_bound(r, start, MLKEM_N, layer * MLKEM_Q)) decreases(MLKEM_N - start)) { - int16_t zeta = mlk_zetas[k++]; - mlk_ntt_butterfly_block(r, zeta, start, len, layer * MLKEM_Q); + const int16_t zeta = mlk_zetas[k][0]; + const int16_t zeta_t = mlk_zetas[k][1]; + k++; + mlk_ntt_butterfly_block(r, zeta, zeta_t, start, len, layer * MLKEM_Q); } } @@ -432,21 +556,17 @@ __contract__( ensures(array_abs_bound(p->coeffs, 0, MLKEM_N, MLK_NTT_BOUND)) ) { - unsigned layer; int16_t *r; mlk_assert_abs_bound(p, MLKEM_N, MLKEM_Q); - r = p->coeffs; - for (layer = 1; layer <= 7; layer++) - __loop__( - invariant(1 <= layer && layer <= 8) - invariant(array_abs_bound(r, 0, MLKEM_N, layer * MLKEM_Q)) - decreases(8 - layer)) - { - mlk_ntt_layer(r, layer); - } + mlk_ntt_2_layers(r, 1); + mlk_ntt_2_layers(r, 3); + mlk_ntt_2_layers(r, 5); + /* Layer 7 is left as a single layer because the 2-layer merge only covers + * an even number of layers (ML-KEM has 7). */ + mlk_ntt_layer(r, 7); /* Check the stronger bound */ mlk_assert_abs_bound(p, MLKEM_N, MLK_NTT_BOUND); @@ -470,7 +590,96 @@ void mlk_poly_ntt(mlk_poly *p) } -/* Compute one layer of inverse NTT */ +/* + * Two merged inverse-NTT layers, applied to one outer block. + * + * Bound discipline (int16_t): on entry every coefficient is -MLKEM_Q_HALF && z0 < MLKEM_Q_HALF) + requires(z1 > -MLKEM_Q_HALF && z1 < MLKEM_Q_HALF) + requires(z2 > -MLKEM_Q_HALF && z2 < MLKEM_Q_HALF) + requires(array_abs_bound(r, 0, MLKEM_N, MLKEM_Q)) + assigns(memory_slice(r, sizeof(int16_t) * MLKEM_N)) + ensures(array_abs_bound(r, 0, MLKEM_N, MLKEM_Q)) +) +{ + unsigned j = 0; + for (j = 0; j < len; j++) + __loop__( + assigns(j, memory_slice(r, sizeof(int16_t) * MLKEM_N)) + invariant(j <= len) + invariant(array_abs_bound(r, 0, MLKEM_N, MLKEM_Q)) + decreases(len - j)) + { + const unsigned i0 = start + j; + const unsigned i1 = i0 + 1 * len; + const unsigned i2 = i0 + 2 * len; + const unsigned i3 = i0 + 3 * len; + /* Bounds: MLKEM_Q */ + mlk_gs_butterfly(r, i0, i1, z1, z1t); + mlk_gs_butterfly(r, i2, i3, z2, z2t); + /* Bounds: 2 * MLKEM_Q */ + mlk_gs_butterfly(r, i0, i2, z0, z0t); + mlk_gs_butterfly(r, i1, i3, z0, z0t); + /* Barrett-reduce the additive outputs back to > layer; + const unsigned len_outer = len << 1; + unsigned start, k; + k = (1u << (layer - 1)) - 1u; + for (start = 0; start < MLKEM_N; start += 2 * len_outer) + __loop__( + invariant(start <= MLKEM_N) + invariant(k < (1u << (layer - 1))) + invariant(2 * len_outer * k + 2 * len_outer == 2 * MLKEM_N - start) + invariant(array_abs_bound(r, 0, MLKEM_N, MLKEM_Q)) + decreases(MLKEM_N - start)) + { + /* Zetas are passed un-negated; mlk_gs_butterfly absorbs the negation. */ + const int16_t z0 = mlk_zetas[k][0]; + const int16_t z1 = mlk_zetas[2 * k + 1][0]; + const int16_t z2 = mlk_zetas[2 * k][0]; + + const int16_t z0t = mlk_zetas[k][1]; + const int16_t z1t = mlk_zetas[2 * k + 1][1]; + const int16_t z2t = mlk_zetas[2 * k][1]; + + k--; + mlk_invntt_2_layers_block(r, start, len, z0, z0t, z1, z1t, z2, z2t); + } +} + +/* Single-layer inverse NTT, used for the leftover layer 7 (len=1) before + * the three merged 2-layer calls. */ /* Reference: Embedded into `invntt()` in the reference implementation @[REF] */ static void mlk_invntt_layer(int16_t *r, unsigned layer) @@ -493,7 +702,9 @@ __contract__( decreases(MLKEM_N - start)) { unsigned j; - int16_t zeta = mlk_zetas[k--]; + const int16_t zeta = mlk_zetas[k][0]; + const int16_t zeta_t = mlk_zetas[k][1]; + k--; for (j = start; j < start + len; j++) __loop__( invariant(start <= j && j <= start + len) @@ -505,7 +716,7 @@ __contract__( /* The preconditions imply that the arithmetic does not overflow. */ r[j] = mlk_barrett_reduce((int16_t)(t + r[j + len])); r[j + len] = (int16_t)(r[j + len] - t); - r[j + len] = mlk_fqmul(r[j + len], zeta); + r[j + len] = mlk_fqmul(r[j + len], zeta, zeta_t); } } } @@ -522,8 +733,11 @@ __contract__( ensures(array_abs_bound(p->coeffs, 0, MLKEM_N, MLK_INVNTT_BOUND)) ) { - unsigned j, layer; + unsigned j; const int16_t f = 1441; /* check-magic: 1441 == pow(2,32 - 7,MLKEM_Q) */ + /* check-magic: + -10079 == signed_mod(1441 * pow(MLKEM_Q, -1, 2^16), 2^16) */ + const int16_t f_twisted = -10079; int16_t *r = p->coeffs; /* @@ -537,18 +751,15 @@ __contract__( invariant(array_abs_bound(r, 0, j, MLKEM_Q)) decreases(MLKEM_N - j)) { - r[j] = mlk_fqmul(r[j], f); + r[j] = mlk_fqmul(r[j], f, f_twisted); } - /* Run the invNTT layers */ - for (layer = 7; layer > 0; layer--) - __loop__( - invariant(0 <= layer && layer < 8) - invariant(array_abs_bound(r, 0, MLKEM_N, MLKEM_Q)) - decreases(layer)) - { - mlk_invntt_layer(r, layer); - } + /* Layer 7 is left as a single layer because the 2-layer merge only covers + * an even number of layers (ML-KEM has 7). */ + mlk_invntt_layer(r, 7); + mlk_invntt_2_layers(r, 6); + mlk_invntt_2_layers(r, 4); + mlk_invntt_2_layers(r, 2); mlk_assert_abs_bound(p, MLKEM_N, MLK_INVNTT_BOUND); } diff --git a/mlkem/src/zetas.inc b/mlkem/src/zetas.inc index 00316daf6..f16fafe3a 100644 --- a/mlkem/src/zetas.inc +++ b/mlkem/src/zetas.inc @@ -12,19 +12,39 @@ /* * Table of zeta values used in the reference NTT and inverse NTT. - * See autogen for details. + * Each row is (zeta_mont, zeta_twisted); see autogen for details. */ -static MLK_ALIGN const int16_t mlk_zetas[128] = { - -1044, -758, -359, -1517, 1493, 1422, 287, 202, -171, 622, 1577, - 182, 962, -1202, -1474, 1468, 573, -1325, 264, 383, -829, 1458, - -1602, -130, -681, 1017, 732, 608, -1542, 411, -205, -1571, 1223, - 652, -552, 1015, -1293, 1491, -282, -1544, 516, -8, -320, -666, - -1618, -1162, 126, 1469, -853, -90, -271, 830, 107, -1421, -247, - -951, -398, 961, -1508, -725, 448, -1065, 677, -1275, -1103, 430, - 555, 843, -1251, 871, 1550, 105, 422, 587, 177, -235, -291, - -460, 1574, 1653, -246, 778, 1159, -147, -777, 1483, -602, 1119, - -1590, 644, -872, 349, 418, 329, -156, -75, 817, 1097, 603, - 610, 1322, -1285, -1465, 384, -1215, -136, 1218, -1335, -874, 220, - -1187, -1659, -1185, -1530, -1278, 794, -1510, -854, -870, 478, -108, - -308, 996, 991, 958, -1460, 1522, 1628, +static MLK_ALIGN const int16_t mlk_zetas[128][2] = { + {-1044, -20}, {-758, 31498}, {-359, 14745}, {-1517, 787}, + {1493, 13525}, {1422, -12402}, {287, 28191}, {202, -16694}, + {-171, -20907}, {622, 27758}, {1577, -3799}, {182, -15690}, + {962, 10690}, {-1202, 1358}, {-1474, -11202}, {1468, 31164}, + {573, -5827}, {-1325, 17363}, {264, -26360}, {383, -29057}, + {-829, 5571}, {1458, -1102}, {-1602, 21438}, {-130, -26242}, + {-681, -28073}, {1017, 24313}, {732, -10532}, {608, 8800}, + {-1542, 18426}, {411, 8859}, {-205, 26675}, {-1571, -16163}, + {1223, -5689}, {652, -6516}, {-552, 1496}, {1015, 30967}, + {-1293, -23565}, {1491, 20179}, {-282, 20710}, {-1544, 25080}, + {516, -12796}, {-8, 26616}, {-320, 16064}, {-666, -12442}, + {-1618, 9134}, {-1162, -650}, {126, -25986}, {1469, 27837}, + {-853, 19883}, {-90, -28250}, {-271, -15887}, {830, -8898}, + {107, -28309}, {-1421, 9075}, {-247, -30199}, {-951, 18249}, + {-398, 13426}, {961, 14017}, {-1508, -29156}, {-725, -12757}, + {448, 16832}, {-1065, 4311}, {677, -24155}, {-1275, -17915}, + {-1103, -335}, {430, 11182}, {555, -11477}, {843, 13387}, + {-1251, -32227}, {871, -14233}, {1550, 20494}, {105, -21655}, + {422, -27738}, {587, 13131}, {177, 945}, {-235, -4587}, + {-291, -14883}, {-460, 23092}, {1574, 6182}, {1653, 5493}, + {-246, 32010}, {778, -32502}, {1159, 10631}, {-147, 30317}, + {-777, 29175}, {1483, -18741}, {-602, -28762}, {1119, 12639}, + {-1590, -18486}, {644, 20100}, {-872, 17560}, {349, 18525}, + {418, -14430}, {329, 19529}, {-156, -5276}, {-75, -12619}, + {817, -31183}, {1097, 20297}, {603, 25435}, {610, 2146}, + {1322, -7382}, {-1285, 15355}, {-1465, 24391}, {384, -32384}, + {-1215, -20927}, {-136, -6280}, {1218, 10946}, {-1335, -14903}, + {-874, 24214}, {220, -11044}, {-1187, 16989}, {-1659, 14469}, + {-1185, 10335}, {-1530, -21498}, {-1278, -7934}, {794, -20198}, + {-1510, -22502}, {-854, 23210}, {-870, 10906}, {478, -17442}, + {-108, 31636}, {-308, -23860}, {996, 28644}, {991, -20257}, + {958, 23998}, {-1460, 7756}, {1522, -17422}, {1628, 23132}, }; diff --git a/proofs/cbmc/fqmul/fqmul_harness.c b/proofs/cbmc/fqmul/fqmul_harness.c index 570e508af..ff7e04478 100644 --- a/proofs/cbmc/fqmul/fqmul_harness.c +++ b/proofs/cbmc/fqmul/fqmul_harness.c @@ -6,11 +6,11 @@ #include "common.h" -int16_t mlk_fqmul(int16_t a, int16_t b); +int16_t mlk_fqmul(int16_t a, int16_t b, int16_t b_twisted); void harness(void) { - int16_t a, b, r; + int16_t a, b, b_twisted, r; - r = mlk_fqmul(a, b); + r = mlk_fqmul(a, b, b_twisted); } diff --git a/proofs/cbmc/invntt_2_layers/Makefile b/proofs/cbmc/invntt_2_layers/Makefile new file mode 100644 index 000000000..cb3063704 --- /dev/null +++ b/proofs/cbmc/invntt_2_layers/Makefile @@ -0,0 +1,34 @@ +# Copyright (c) The mlkem-native project authors +# SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT + +include ../Makefile_params.common + +HARNESS_ENTRY = harness +HARNESS_FILE = invntt_2_layers_harness + +# This should be a unique identifier for this proof, and will appear on the +# Litani dashboard. It can be human-readable and contain spaces if you wish. +PROOF_UID = mlk_invntt_2_layers + +DEFINES += +INCLUDES += + +REMOVE_FUNCTION_BODY += +UNWINDSET += + +PROOF_SOURCES += $(PROOFDIR)/$(HARNESS_FILE).c +PROJECT_SOURCES += $(SRCDIR)/mlkem/src/poly.c + +CHECK_FUNCTION_CONTRACTS=mlk_invntt_2_layers +USE_FUNCTION_CONTRACTS=mlk_invntt_2_layers_block +APPLY_LOOP_CONTRACTS=on +USE_DYNAMIC_FRAMES=1 + +EXTERNAL_SAT_SOLVER= +CBMCFLAGS=--smt2 + +FUNCTION_NAME = mlk_invntt_2_layers + +CBMC_OBJECT_BITS = 8 + +include ../Makefile.common diff --git a/proofs/cbmc/invntt_2_layers/invntt_2_layers_harness.c b/proofs/cbmc/invntt_2_layers/invntt_2_layers_harness.c new file mode 100644 index 000000000..014bb8957 --- /dev/null +++ b/proofs/cbmc/invntt_2_layers/invntt_2_layers_harness.c @@ -0,0 +1,15 @@ +// Copyright (c) The mlkem-native project authors +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: MIT-0 + +#include "poly.h" + + +void mlk_invntt_2_layers(int16_t *r, unsigned layer); + +void harness(void) +{ + int16_t *r; + unsigned layer; + mlk_invntt_2_layers(r, layer); +} diff --git a/proofs/cbmc/invntt_2_layers_block/Makefile b/proofs/cbmc/invntt_2_layers_block/Makefile new file mode 100644 index 000000000..884687706 --- /dev/null +++ b/proofs/cbmc/invntt_2_layers_block/Makefile @@ -0,0 +1,33 @@ +# Copyright (c) The mlkem-native project authors +# SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT + +include ../Makefile_params.common + +HARNESS_ENTRY = harness +HARNESS_FILE = invntt_2_layers_block_harness + +PROOF_UID = mlk_invntt_2_layers_block + +DEFINES += +INCLUDES += + +REMOVE_FUNCTION_BODY += +UNWINDSET += + +PROOF_SOURCES += $(PROOFDIR)/$(HARNESS_FILE).c +PROJECT_SOURCES += $(SRCDIR)/mlkem/src/poly.c + +CHECK_FUNCTION_CONTRACTS=mlk_invntt_2_layers_block +USE_FUNCTION_CONTRACTS = mlk_fqmul +USE_FUNCTION_CONTRACTS += mlk_barrett_reduce +APPLY_LOOP_CONTRACTS=on +USE_DYNAMIC_FRAMES=1 + +EXTERNAL_SAT_SOLVER= +CBMCFLAGS=--smt2 + +FUNCTION_NAME = mlk_invntt_2_layers_block + +CBMC_OBJECT_BITS = 9 + +include ../Makefile.common diff --git a/proofs/cbmc/invntt_2_layers_block/invntt_2_layers_block_harness.c b/proofs/cbmc/invntt_2_layers_block/invntt_2_layers_block_harness.c new file mode 100644 index 000000000..d4968af7b --- /dev/null +++ b/proofs/cbmc/invntt_2_layers_block/invntt_2_layers_block_harness.c @@ -0,0 +1,20 @@ +// Copyright (c) The mlkem-native project authors +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: MIT-0 + +#include +#include "common.h" + + +void mlk_invntt_2_layers_block(int16_t *r, unsigned start, unsigned len, + int16_t z0, int16_t z0_twst, int16_t z1, + int16_t z1_twst, int16_t z2, int16_t z2_twst); + +void harness(void) +{ + int16_t *r; + unsigned start, len; + int16_t z0, z0_twst, z1, z1_twst, z2, z2_twst; + mlk_invntt_2_layers_block(r, start, len, z0, z0_twst, z1, z1_twst, z2, + z2_twst); +} diff --git a/proofs/cbmc/ntt_2_layers/Makefile b/proofs/cbmc/ntt_2_layers/Makefile new file mode 100644 index 000000000..32360d139 --- /dev/null +++ b/proofs/cbmc/ntt_2_layers/Makefile @@ -0,0 +1,42 @@ +# Copyright (c) The mlkem-native project authors +# SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT + +include ../Makefile_params.common + +HARNESS_ENTRY = harness +HARNESS_FILE = ntt_2_layers_harness + +# This should be a unique identifier for this proof, and will appear on the +# Litani dashboard. It can be human-readable and contain spaces if you wish. +PROOF_UID = mlk_ntt_2_layers + +DEFINES += +INCLUDES += + +REMOVE_FUNCTION_BODY += +UNWINDSET += + +PROOF_SOURCES += $(PROOFDIR)/$(HARNESS_FILE).c +PROJECT_SOURCES += $(SRCDIR)/mlkem/src/poly.c + +CHECK_FUNCTION_CONTRACTS=mlk_ntt_2_layers +USE_FUNCTION_CONTRACTS=mlk_ntt_2_layers_block +APPLY_LOOP_CONTRACTS=on +USE_DYNAMIC_FRAMES=1 + +# Disable any setting of EXTERNAL_SAT_SOLVER, and choose SMT backend instead +EXTERNAL_SAT_SOLVER= +CBMCFLAGS=--bitwuzla + +FUNCTION_NAME = mlk_ntt_2_layers + +# If this proof is found to consume huge amounts of RAM, you can set the +# EXPENSIVE variable. With new enough versions of the proof tools, this will +# restrict the number of EXPENSIVE CBMC jobs running at once. See the +# documentation in Makefile.common under the "Job Pools" heading for details. +# EXPENSIVE = true + +# This function is large enough to need... +CBMC_OBJECT_BITS = 8 + +include ../Makefile.common diff --git a/proofs/cbmc/ntt_2_layers/ntt_2_layers_harness.c b/proofs/cbmc/ntt_2_layers/ntt_2_layers_harness.c new file mode 100644 index 000000000..46a2a2f95 --- /dev/null +++ b/proofs/cbmc/ntt_2_layers/ntt_2_layers_harness.c @@ -0,0 +1,15 @@ +// Copyright (c) The mlkem-native project authors +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: MIT-0 + +#include "poly.h" + + +void mlk_ntt_2_layers(int16_t *r, unsigned layer); + +void harness(void) +{ + int16_t *r; + unsigned layer; + mlk_ntt_2_layers(r, layer); +} diff --git a/proofs/cbmc/ntt_2_layers_block/Makefile b/proofs/cbmc/ntt_2_layers_block/Makefile new file mode 100644 index 000000000..b3b6f5e4a --- /dev/null +++ b/proofs/cbmc/ntt_2_layers_block/Makefile @@ -0,0 +1,32 @@ +# Copyright (c) The mlkem-native project authors +# SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT + +include ../Makefile_params.common + +HARNESS_ENTRY = harness +HARNESS_FILE = ntt_2_layers_block_harness + +PROOF_UID = mlk_ntt_2_layers_block + +DEFINES += +INCLUDES += + +REMOVE_FUNCTION_BODY += +UNWINDSET += + +PROOF_SOURCES += $(PROOFDIR)/$(HARNESS_FILE).c +PROJECT_SOURCES += $(SRCDIR)/mlkem/src/poly.c + +CHECK_FUNCTION_CONTRACTS=mlk_ntt_2_layers_block +USE_FUNCTION_CONTRACTS=mlk_fqmul +APPLY_LOOP_CONTRACTS=on +USE_DYNAMIC_FRAMES=1 + +EXTERNAL_SAT_SOLVER= +CBMCFLAGS=--bitwuzla + +FUNCTION_NAME = mlk_ntt_2_layers_block + +CBMC_OBJECT_BITS = 9 + +include ../Makefile.common diff --git a/proofs/cbmc/ntt_2_layers_block/ntt_2_layers_block_harness.c b/proofs/cbmc/ntt_2_layers_block/ntt_2_layers_block_harness.c new file mode 100644 index 000000000..d823f72b3 --- /dev/null +++ b/proofs/cbmc/ntt_2_layers_block/ntt_2_layers_block_harness.c @@ -0,0 +1,21 @@ +// Copyright (c) The mlkem-native project authors +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: MIT-0 + +#include +#include "common.h" + + +void mlk_ntt_2_layers_block(int16_t *r, unsigned start, unsigned len, + int16_t z0, int16_t z0_twst, int16_t z1, + int16_t z1_twst, int16_t z2, int16_t z2_twst, + const int16_t bound); + +void harness(void) +{ + int16_t *r; + unsigned start, len; + int16_t z0, z0_twst, z1, z1_twst, z2, z2_twst, bound; + mlk_ntt_2_layers_block(r, start, len, z0, z0_twst, z1, z1_twst, z2, z2_twst, + bound); +} diff --git a/proofs/cbmc/ntt_butterfly_block/ntt_butterfly_block_harness.c b/proofs/cbmc/ntt_butterfly_block/ntt_butterfly_block_harness.c index 4434ffe1a..aa03d39fd 100644 --- a/proofs/cbmc/ntt_butterfly_block/ntt_butterfly_block_harness.c +++ b/proofs/cbmc/ntt_butterfly_block/ntt_butterfly_block_harness.c @@ -6,12 +6,12 @@ #include "common.h" -void mlk_ntt_butterfly_block(int16_t *r, int16_t root, unsigned start, - unsigned len, unsigned bound); +void mlk_ntt_butterfly_block(int16_t *r, int16_t root, int16_t root_twisted, + unsigned start, unsigned len, unsigned bound); void harness(void) { - int16_t *r, root; + int16_t *r, root, root_twisted; unsigned start, stride, bound; - mlk_ntt_butterfly_block(r, root, start, stride, bound); + mlk_ntt_butterfly_block(r, root, root_twisted, start, stride, bound); } diff --git a/proofs/cbmc/poly_invntt_tomont_c/Makefile b/proofs/cbmc/poly_invntt_tomont_c/Makefile index ae66bf139..180e94a4c 100644 --- a/proofs/cbmc/poly_invntt_tomont_c/Makefile +++ b/proofs/cbmc/poly_invntt_tomont_c/Makefile @@ -22,6 +22,7 @@ PROJECT_SOURCES += $(SRCDIR)/mlkem/src/poly.c CHECK_FUNCTION_CONTRACTS=mlk_poly_invntt_tomont_c USE_FUNCTION_CONTRACTS = mlk_fqmul USE_FUNCTION_CONTRACTS += mlk_invntt_layer +USE_FUNCTION_CONTRACTS += mlk_invntt_2_layers APPLY_LOOP_CONTRACTS=on USE_DYNAMIC_FRAMES=1 diff --git a/proofs/cbmc/poly_ntt_c/Makefile b/proofs/cbmc/poly_ntt_c/Makefile index 11ed7098b..5704a2565 100644 --- a/proofs/cbmc/poly_ntt_c/Makefile +++ b/proofs/cbmc/poly_ntt_c/Makefile @@ -21,6 +21,7 @@ PROJECT_SOURCES += $(SRCDIR)/mlkem/src/poly.c CHECK_FUNCTION_CONTRACTS=mlk_poly_ntt_c USE_FUNCTION_CONTRACTS=mlk_ntt_layer +USE_FUNCTION_CONTRACTS+=mlk_ntt_2_layers APPLY_LOOP_CONTRACTS=on USE_DYNAMIC_FRAMES=1 diff --git a/scripts/autogen b/scripts/autogen index ba7801fb5..9cbc1bccf 100755 --- a/scripts/autogen +++ b/scripts/autogen @@ -714,17 +714,25 @@ def signed_reduce(a): def gen_c_zetas(): """Generate source and header file for zeta values used in - the reference NTT and invNTT""" + the reference NTT and invNTT. + + Each yielded entry is a pair (zeta_mont, zeta_twisted) where: + - zeta_mont = signed canonical representative of zeta * 2^16 mod q + - zeta_twisted = signed canonical 16-bit representative of + zeta_mont * q^{-1} mod 2^16 + The twisted form lets the C-reference fqmul skip the extra QINV + multiply that mlk_montgomery_reduce would otherwise perform.""" # The zeta values are the powers of the chosen root of unity (17), - # converted to Montgomery form. + # converted to Montgomery form (and to its twisted-Montgomery counterpart). - zeta = [] + zeta_pairs = [] for i in range(128): - zeta.append(signed_reduce(pow(root_of_unity, i, modulus) * montgomery_factor)) + raw = pow(root_of_unity, i, modulus) + zeta_pairs.append(prepare_root_for_montmul(raw)) # The source code stores the zeta table in bit reversed form - yield from (zeta[bitreverse(i, 7)] for i in range(128)) + yield from (zeta_pairs[bitreverse(i, 7)] for i in range(128)) def gen_c_zeta_file(): @@ -733,10 +741,11 @@ def gen_c_zeta_file(): yield "" yield "/*" yield " * Table of zeta values used in the reference NTT and inverse NTT." - yield " * See autogen for details." + yield " * Each row is (zeta_mont, zeta_twisted); see autogen for details." yield " */" - yield "static MLK_ALIGN const int16_t mlk_zetas[128] = {" - yield from map(lambda t: str(t) + ",", gen_c_zetas()) + yield "static MLK_ALIGN const int16_t mlk_zetas[128][2] = {" + for z_mont, z_twisted in gen_c_zetas(): + yield f"{{ {z_mont}, {z_twisted} }}," yield "};" yield "" diff --git a/scripts/check-magic b/scripts/check-magic index 42b50815d..29105374a 100755 --- a/scripts/check-magic +++ b/scripts/check-magic @@ -37,6 +37,37 @@ FAIL = f"{RED}✗{NORMAL}" REMEMBERED = f"{BLUE}⊢{NORMAL}" +def join_block_comments(lines): + """Collapse multi-line C block comments onto their opening line. + + Each continuation line is replaced with an empty placeholder so the + overall line count - and therefore line numbers in diagnostics - is + unchanged. Only handles block comments that start and end with the + standard /* ... */ delimiters; line comments and strings are left + alone, which is sufficient for this script's input. + """ + out = list(lines) + i = 0 + n = len(out) + while i < n: + line = out[i] + if "/*" in line and "*/" not in line: + j = i + 1 + merged = line + while j < n and "*/" not in out[j]: + merged += " " + out[j].strip() + out[j] = "" + j += 1 + if j < n: + merged += " " + out[j].strip() + out[j] = "" + out[i] = merged + i = j + 1 + continue + i += 1 + return out + + def check_magic_numbers(): mlkem_q = 3329 exceptions = [ @@ -129,6 +160,11 @@ def check_magic_numbers(): if autogen_marker in content: continue content = content.split("\n") + # Pre-pass: collapse multi-line block comments onto their opening + # line so wrapped check-magic annotations are recognized. Consumed + # continuation lines are replaced with empty placeholders to preserve + # line numbers in diagnostics. + content = join_block_comments(content) # Use negative lookbefore and lookahead to exclude numbers # that occur as part of identifiers (e.g. layer12345 or 199901L) pattern = r"(?