From b8ddb537f5e5911e07349763a62b6a987a6f4f20 Mon Sep 17 00:00:00 2001 From: David Garske Date: Wed, 17 Jun 2026 16:54:50 -0700 Subject: [PATCH] Add RealTek AmebaPro2 (RTL8735B) HUK crypto-callback port (AES, HMAC-SHA256, ECDSA, TRNG) --- .github/workflows/rtl8735b.yml | 86 ++ configure.ac | 23 +- wolfcrypt/src/include.am | 7 + wolfcrypt/src/port/realtek/README.md | 497 +++++++ wolfcrypt/src/port/realtek/rtl8735b.c | 1529 ++++++++++++++++++++ wolfcrypt/src/port/realtek/rtl8735b_shim.h | 184 +++ wolfssl/wolfcrypt/include.am | 1 + wolfssl/wolfcrypt/port/realtek/rtl8735b.h | 161 +++ zephyr/CMakeLists.txt | 1 + 9 files changed, 2488 insertions(+), 1 deletion(-) create mode 100644 .github/workflows/rtl8735b.yml create mode 100644 wolfcrypt/src/port/realtek/README.md create mode 100644 wolfcrypt/src/port/realtek/rtl8735b.c create mode 100644 wolfcrypt/src/port/realtek/rtl8735b_shim.h create mode 100644 wolfssl/wolfcrypt/port/realtek/rtl8735b.h diff --git a/.github/workflows/rtl8735b.yml b/.github/workflows/rtl8735b.yml new file mode 100644 index 00000000000..99f2ad28b44 --- /dev/null +++ b/.github/workflows/rtl8735b.yml @@ -0,0 +1,86 @@ +name: RealTek RTL8735B (AmebaPro2) HUK port Tests + +# START OF COMMON SECTION +on: + push: + branches: [ 'release/**' ] + pull_request: + types: [opened, synchronize, reopened, ready_for_review] + branches: [ '*' ] + # Weekday-morning cron seeds the master-scoped ccache that PR runs restore + # read-only (see ccache-setup). + schedule: + - cron: '27 10 * * 1-5' + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true +# END OF COMMON SECTION + +jobs: + # Host compile-test of the RTL8735B HUK crypto-callback port. --enable-rtl8735b + # sets WOLFSSL_RTL8735B_HOST_TEST, which swaps the vendor HAL headers for + # rtl8735b_shim.h (sentinel stubs, no real crypto). So this is a build-only + # test: it exercises the crypto-callback dispatch, info-struct field access, + # compile guards, and build wiring without the RealTek SDK. Functional crypto + # validation requires RTL8735B hardware (see the port README). Configs span + # the algo/guard combinations and both WOLFSSL_SMALL_STACK branches of the + # HW-ECDSA helpers; build via .github/scripts/parallel-make-check.py (see + # cryptocb-only.yml / os-check.yml for the full pattern), one out-of-tree + # build dir per config off a single checkout/autogen. + build: + name: build (--enable-rtl8735b) + if: ${{ (github.repository_owner == 'wolfssl') && (github.event_name != 'pull_request' || github.event.pull_request.draft == false) }} + runs-on: ubuntu-24.04 + timeout-minutes: 15 + steps: + - uses: actions/checkout@v5 + name: Checkout wolfSSL + + - name: Install dependencies + uses: ./.github/actions/install-apt-deps + with: + packages: autoconf automake libtool build-essential + ghcr-debs-tag: ubuntu-24.04-minimal + + - name: Set up ccache + uses: ./.github/actions/ccache-setup + with: + workflow-id: rtl8735b + read-only: ${{ github.event_name == 'pull_request' }} + max-size: 100M + + - name: Build all configs (compile-only, out-of-tree) + run: | + cat > "$RUNNER_TEMP/rtl8735b-configs.json" <<'EOF' + [ + {"name": "full-smallstack", "minutes": 2, + "comment": "All port paths (AES-GCM/ECB/CBC/CTR, HMAC-SHA256, ECDSA P-256) with WOLFSSL_SMALL_STACK -> exercises the heap branch of the HW-ECDSA mp_int helpers.", + "configure": ["--enable-rtl8735b", "--enable-cryptocb", "--enable-ecc", + "--enable-aesgcm", "--enable-aesctr", "--enable-aescbc", + "CPPFLAGS=-DWOLFSSL_SMALL_STACK"]}, + {"name": "full", "minutes": 2, + "comment": "Same algo coverage without WOLFSSL_SMALL_STACK -> exercises the on-stack (array) branch of the HW-ECDSA mp_int helpers.", + "configure": ["--enable-rtl8735b", "--enable-cryptocb", "--enable-ecc", + "--enable-aesgcm", "--enable-aesctr", "--enable-aescbc"]}, + {"name": "min", "minutes": 2, + "comment": "Minimal: no ECC (HW/SW ECDSA sign code compiled out), AES + HMAC only -> exercises the non-ECC compile guards.", + "configure": ["--enable-rtl8735b", "--enable-cryptocb"]} + ] + EOF + .github/scripts/parallel-make-check.py --build-only \ + "$RUNNER_TEMP/rtl8735b-configs.json" + + - name: ccache stats + if: always() + run: ccache -s || true + + - name: Upload logs on failure + if: failure() + uses: actions/upload-artifact@v6 + with: + retention-days: 7 + name: rtl8735b-logs + path: | + build-*/config.log + if-no-files-found: ignore diff --git a/configure.ac b/configure.ac index c1d2dd089ef..d99a253148f 100644 --- a/configure.ac +++ b/configure.ac @@ -3212,6 +3212,25 @@ case "$ENABLED_STSAFE" in esac +# RealTek AmebaPro2 (RTL8735B) HUK crypto-callback port. +# On-target the application supplies the AmebaPro2 HAL include path. This option +# is a host compile-test of the port: it swaps the HAL headers for a shim +# (WOLFSSL_RTL8735B_HOST_TEST) so the cryptocb dispatch and wiring build without +# the vendor SDK. It forces crypto callbacks on (see the cryptocb block). +# Example: "./configure --enable-rtl8735b" +ENABLED_RTL8735B="no" +AC_ARG_ENABLE([rtl8735b], + [AS_HELP_STRING([--enable-rtl8735b], + [Enable RealTek AmebaPro2 (RTL8735B) HUK crypto-callback port (host compile-test).])], + [ ENABLED_RTL8735B=$enableval ], + [ ENABLED_RTL8735B=no ]) + +if test "x$ENABLED_RTL8735B" != "xno" +then + AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_RTL8735B_HUK -DWOLFSSL_RTL8735B_HOST_TEST -DHAVE_AES_ECB" +fi + + # NXP SE050 # Example: "./configure --with-se050=/home/pi/simw_top" ENABLED_SE050="no" @@ -10680,7 +10699,7 @@ AC_ARG_ENABLE([cryptocb-sw-test], [ ENABLED_CRYPTOCB_SW_TEST=yes ] ) -if test "x$ENABLED_PKCS11" = "xyes" || test "x$ENABLED_WOLFTPM" = "xyes" || test "$ENABLED_CAAM" != "no" +if test "x$ENABLED_PKCS11" = "xyes" || test "x$ENABLED_WOLFTPM" = "xyes" || test "$ENABLED_CAAM" != "no" || test "x$ENABLED_RTL8735B" != "xno" then ENABLED_CRYPTOCB=yes fi @@ -12429,6 +12448,7 @@ AM_CONDITIONAL([BUILD_IOTSAFE],[test "x$ENABLED_IOTSAFE" = "xyes"]) AM_CONDITIONAL([BUILD_IOTSAFE_HWRNG],[test "x$ENABLED_IOTSAFE_HWRNG" = "xyes"]) AM_CONDITIONAL([BUILD_SE050],[test "x$ENABLED_SE050" = "xyes"]) AM_CONDITIONAL([BUILD_STSAFE],[test "x$ENABLED_STSAFE" != "xno"]) +AM_CONDITIONAL([BUILD_RTL8735B],[test "x$ENABLED_RTL8735B" != "xno"]) AM_CONDITIONAL([BUILD_TROPIC01],[test "x$ENABLED_TROPIC01" = "xyes"]) AM_CONDITIONAL([BUILD_KDF],[test "x$ENABLED_KDF" = "xyes"]) AM_CONDITIONAL([BUILD_HMAC],[test "x$ENABLED_HMAC" = "xyes"]) @@ -13008,6 +13028,7 @@ echo " * IoT-Safe: $ENABLED_IOTSAFE" echo " * IoT-Safe HWRNG: $ENABLED_IOTSAFE_HWRNG" echo " * NXP SE050: $ENABLED_SE050" echo " * STMicro STSAFE: $ENABLED_STSAFE" +echo " * RealTek RTL8735B HUK: $ENABLED_RTL8735B" echo " * TROPIC01: $ENABLED_TROPIC01" echo " * Maxim Integrated MAXQ10XX: $ENABLED_MAXQ10XX" echo " * PSA: $ENABLED_PSA" diff --git a/wolfcrypt/src/include.am b/wolfcrypt/src/include.am index 18d7a339cd5..082f45ee3c7 100644 --- a/wolfcrypt/src/include.am +++ b/wolfcrypt/src/include.am @@ -105,6 +105,9 @@ EXTRA_DIST += wolfcrypt/src/port/ti/ti-aes.c \ wolfcrypt/src/port/st/README.md \ wolfcrypt/src/port/st/STM32MP13.md \ wolfcrypt/src/port/st/STM32MP25.md \ + wolfcrypt/src/port/realtek/rtl8735b.c \ + wolfcrypt/src/port/realtek/rtl8735b_shim.h \ + wolfcrypt/src/port/realtek/README.md \ wolfcrypt/src/port/tropicsquare/tropic01.c \ wolfcrypt/src/port/tropicsquare/README.md \ wolfcrypt/src/port/af_alg/afalg_aes.c \ @@ -244,6 +247,10 @@ if BUILD_TROPIC01 src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/tropicsquare/tropic01.c endif +if BUILD_RTL8735B +src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/realtek/rtl8735b.c +endif + if BUILD_PSA src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/psa/psa.c src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/psa/psa_hash.c diff --git a/wolfcrypt/src/port/realtek/README.md b/wolfcrypt/src/port/realtek/README.md new file mode 100644 index 00000000000..027d281be8c --- /dev/null +++ b/wolfcrypt/src/port/realtek/README.md @@ -0,0 +1,497 @@ +# RealTek AmebaPro2 (RTL8735B) HUK Port + +Binds wolfCrypt keys to the RTL8735B silicon Hardware Unique Key (HUK) through +the AmebaPro2 HAL crypto engine, via the wolfCrypt crypto-callback (CryptoCb) +framework. A 256-bit "seed" is run through the HAL HKDF key-ladder against the +HUK to land a device-bound working key in a secure key-storage slot; AES +(GCM/ECB/CBC/CTR) then runs from that slot and the working key never enters +software. It is a pure crypto-callback device and adds no wolfSSL core API or +struct fields: AES reads its seed from the standard `aes->devKey`, and ECDSA +reads a `wc_Rtl8735b_EccKey` (the HUK-wrapped scalar + seed) the caller attaches +via the standard `ecc_key->devCtx`. This mirrors the device pattern the STM32 +DHUK port (`wc_Stm32_DhukRegister`) also uses. + +## Hardware + +RTL8735B / AmebaPro2 security blocks used by this port (from the +`Ameba-AIoT/nuwa_hal_realtek` SDK, `rtl8735b` branch, headers under +`ameba/amebapro2/source/fwlib/rtl8735b/include/`): + +- HUK in OTP: `SB_OTP_HIGH_VAL_HUK1` (0x21), `HUK2` (0x22), `HUK_RMA` (0x2F). +- HKDF key-ladder in secure RAM: `hal_hkdf_hmac_sha256_secure_init`, + `hal_hkdf_extract_secure_all`, `hal_hkdf_expand_secure_all` -- derive the HUK + into a secure key-storage slot without exposing the key to software. +- AES secure-key ops that reference the derived slot by number: + `hal_crypto_aes_ecb_sk_init`, `hal_crypto_aes_gcm_sk_init` (key never leaves + hardware). +- Secure-key HMAC-SHA256 (`hal_crypto_hmac_sha2_256_sk_init` / + `hal_crypto_hmac_sha2_256_update` / `_sk_final`) MAC'ing over the HUK-derived + slot, so the HMAC key also never enters software. +- HUK-bound ECDSA sign has two backends: software sign after an AES secure-key + unwrap of the wrapped scalar (default), or the HW ECDSA engine (`hal_ecdsa.h`) + with the private scalar either unwrapped (INPUT PRK) or OTP-resident via + `hal_ecdsa_select_prk` (`ECDSA_OTP_PRK_1/2`, scalar never in software). +- Secure TRNG (`hal_trng_sec.h`, `hal_trng_sec_init` / `hal_trng_sec_get_rand`) exposed as the + crypto-callback SEED source. The plain TRNG (`hal_trng.h`) is also available; the + `ameba-zephyr-pro2-platform` repo provides a Zephyr entropy driver + (`entropy_rtl8735b.c`, DT `realtek,amebapro2-trng`) that feeds wolfCrypt's + `wc_GenerateSeed` via `sys_rand_get`. + +## Enabling + +```c +#define WOLFSSL_RTL8735B_HUK /* enable the AmebaPro2 HUK device */ +#define WOLF_CRYPTO_CB /* required -- HUK routes through crypto callbacks */ +``` + +Set these in `user_settings.h`. The application/board CMake must add +the AmebaPro2 HAL include directory (e.g. +`.../fwlib/rtl8735b/include/`) to the wolfSSL library include path so this port +can include `hal_crypto.h` and `hal_hkdf.h`. + +Configurable (override in `user_settings.h` before including wolfSSL): + +| Macro | Default | Meaning | +|--------------------------------|---------|--------------------------------------| +| `WC_HUK_DEVID` | 810 | CryptoCb device id (STM32 uses 807-809) | +| `WC_RTL8735B_HUK_SK_IDX` | 0xC | Key-storage slot holding the HUK (KEY_STG_HUK1) | +| `WC_RTL8735B_HKDF_PRK_IDX` | 3 | Intermediate HKDF PRK slot | +| `WC_RTL8735B_DERIVED_WB_IDX` | 4 | Derived working-key slot (AES uses it) | +| `WC_RTL8735B_HKDF_CRYPTO_SEL` | 0 | `crypto_sel` for the secure HKDF init | +| `WC_RTL8735B_MAX_WRAPPED` | 96 | Max wrapped-scalar blob the ECDSA sign path unwraps | + +## API + +```c +#include + +/* One-time: register the AmebaPro2 HUK crypto-callback device. */ +wc_Rtl8735b_HukRegister(WC_HUK_DEVID); + +/* AES / GCM: enable via devId at init, then pass the 256-bit seed as the key. + * The seed is HKDF input that diversifies the HUK -- it is NOT the AES key. */ +Aes aes; +byte seed[32]; /* per-purpose derivation seed (need not be secret) */ +wc_AesInit(&aes, NULL, WC_HUK_DEVID); +wc_AesGcmSetKey(&aes, seed, 32); +wc_AesGcmEncrypt(&aes, ct, pt, ptSz, iv, 12, tag, tagSz, aad, aadSz); /* full GCM */ +wc_AesFree(&aes); + +/* AES-ECB / AES-CBC follow the same pattern (wc_AesSetKey + wc_AesEcb*/ +/* wc_AesCbc* with devId = WC_HUK_DEVID). */ + +wc_Rtl8735b_HukUnRegister(WC_HUK_DEVID); +``` + +The seed maps to a device-bound working key as: +HUK (slot `WC_RTL8735B_HUK_SK_IDX`) -> `hal_hkdf_extract_secure_all` -> PRK slot +-> `hal_hkdf_expand_secure_all` -> working key in `WC_RTL8735B_DERIVED_WB_IDX` +-> `hal_crypto_aes_gcm_sk_init` / `hal_crypto_aes_ecb_sk_init`. The derive and +the AES op run under one crypto-mutex hold; the working key never enters +software. Identical seed -> identical working key (deterministic, so GMAC +verifies and AES round-trips); a wrong seed yields a different key (GCM decrypt +returns `AES_GCM_AUTH_E`). + +HUK-bound ECDSA sign (Stage 3, wrapped-scalar): point the key's crypto-callback +context at a `wc_Rtl8735b_EccKey` (the scalar AES-wrapped under a HUK-derived +key, plus its 32-byte seed) -- no dedicated wolfSSL import API: + +```c +#include +wc_Rtl8735b_EccKey hk = { seed, 32, wrapped, wrappedLen, plainLen }; +ecc_key key; +wc_ecc_init_ex(&key, NULL, WC_HUK_DEVID); +wc_ecc_set_curve(&key, plainLen, ECC_SECP256R1); +key.devCtx = &hk; /* borrowed; must outlive the key */ +wc_ecc_sign_hash(hash, hashSz, sig, &sigSz, rng, &key); +``` + +At sign time the port derives the slot key from the seed, ECB-unwraps the scalar +into a short-lived buffer, signs, and scrubs it. The wrapped blob is device-bound +(it only unwraps on the silicon whose HUK produced the slot key). The scalar is +briefly in software during the sign; an OTP-resident model (`hal_ecdsa_select_prk`, +scalar never in software) and routing the sign itself through the HW ECDSA engine +(`hal_ecdsa`) are follow-ons. + +### Additional HUK operations + +HMAC-SHA256 under the HUK (the 32-byte key is the HKDF seed; the MAC runs over +the HUK-derived secure-key slot): + +```c +Hmac hmac; +byte seed[32]; +wc_HmacInit(&hmac, NULL, WC_HUK_DEVID); +wc_HmacSetKey(&hmac, WC_SHA256, seed, 32); +wc_HmacUpdate(&hmac, msg, msgSz); +wc_HmacFinal(&hmac, mac); /* MAC produced one-shot at final under the HUK */ +wc_HmacFree(&hmac); +``` + +HW-seeded RNG (entropy from the secure TRNG via the crypto-callback SEED source, +no `CUSTOM_RAND_GENERATE_SEED` wiring needed): + +```c +WC_RNG rng; +wc_InitRng_ex(&rng, NULL, WC_HUK_DEVID); +wc_RNG_GenerateBlock(&rng, buf, sizeof(buf)); +wc_FreeRng(&rng); +``` + +To route ECDSA sign through the HW engine instead of the software-after-unwrap +path, set `hk.useHwEngine = 1` (validated on the RTL8735B); to sign from an +OTP-resident key (scalar never in software) set `hk.otpPrkSel` to `1`/`2` +(`ECDSA_OTP_PRK_1/2`) and leave `seed`/`wrapped` unused (that OTP path is +implemented but unexercised -- it needs an OTP key provisioned). + +## Notes / limitations + +- The HAL GCM path assumes a 96-bit (12-byte) IV (standard J0). A non-12-byte + IV returns a hard error (not a software fallback, which would key off the seed + rather than the device-bound key). +- AES-CBC and AES-CTR chain in software over single-block + `hal_crypto_aes_ecb_sk_*` calls because the HAL exposes no CBC/CTR secure-key + variant; the key still stays in hardware. CTR maintains the wolfCrypt counter + state (`aes->reg`/`tmp`/`left`) so partial blocks continue across calls. +- The HAL crypto engine DMAs its buffers on 32-byte (cache-line) boundaries and + rejects an unaligned GCM iv/aad. The port stages key/iv/aad/tag on aligned + temporaries and bounces unaligned in/out through aligned buffers, so callers + need not align. +- Each operation derives the working key from the Aes' own `devKey` seed under + the crypto mutex (no shared port global), so concurrent `Aes` objects are + safe. +- `--enable-rtl8735b` builds a host compile-test only: it swaps the HAL headers + for `rtl8735b_shim.h` (sentinel stubs, no real crypto) to exercise the + crypto-callback dispatch and build wiring without the vendor SDK. The + `.github/workflows/rtl8735b.yml` CI builds this option (several algo/guard and + WOLFSSL_SMALL_STACK combinations, build-only, ccache-backed) on every PR. The + CI is build-only by design -- the shim does no real crypto, so functional and + dispatch validation is done on RTL8735B hardware via the wolfssl-examples HUK + app (a host-side shim dispatch smoke test is a possible future addition). + +## Status + +Validated on RTL8735B silicon (both the RealTek FreeRTOS SDK app and a Zephyr +image): registration; AES-GCM (encrypt / deterministic tag / decrypt-verify / +round-trip / wrong-seed -> `AES_GCM_AUTH_E` / unaligned buffers / non-12-byte-IV +reject); AES-ECB; AES-CBC (incl. in-place, multi-call); AES-CTR; HMAC-SHA256; +HUK-bound ECDSA (P-256) sign; and HW ECDSA verify (good signature accepted, +tampered digest rejected) -- all pass. + +- Stage 0 (skeleton, build wiring, host compile-test): done. +- Stage 1 (HUK key-ladder + full AES-GCM): done, validated on hardware. +- Stage 2 (AES-ECB / AES-CBC / AES-CTR): done, validated on hardware. +- Stage 3 (HUK-bound ECDSA sign, wrapped-scalar software path): done, validated + on RTL8735B (P-256 sign verifies against the original public key; tampered hash + fails). +- Stage 4 (HUK-bound HMAC-SHA256, secure TRNG SEED callback, per-seed slot + derivation cache): done, validated on RTL8735B (HMAC deterministic / wrong-seed + differs; RNG seeded from `hal_trng_sec_get_rand`). +- Stage 5 (HW ECDSA engine sign, opt-in via `wc_Rtl8735b_EccKey.useHwEngine`): + done, validated on RTL8735B (P-256 sign via `hal_ecdsa` verifies against the + software public key). Follows the RealTek reference flow: `set_curve(ECDSA_P256, + NULL, ...)` uses the engine's built-in P-256 constants; `hal_ecdsa_signature` + loads the private key + nonce and `hal_ecdsa_hash` marks the hash ready, which + starts the engine; completion arrives via the finish IRQ (`hal_ecdsa_cb_handler`). + The INPUT / HUK-wrapped-scalar path is validated; the OTP-resident path + (`otpPrkSel`) is implemented but unexercised (it needs an OTP key provisioned). + The software-after-unwrap path remains the default (`useHwEngine = 0`). +- Stage 6 (general HW ECDSA sign/verify offload): done, validated on RTL8735B. + An `ecc_key` with `devId = WC_HUK_DEVID` and no `devCtx` routes + `wc_ecc_sign_hash` through the engine using the key's own scalar, and + `wc_ecc_verify_hash` through the engine using the key's own public point (no + HUK context needed to verify -- any P-256 public key). Verify completes via the + verify finish IRQ and reads `hal_ecdsa_get_veri_result` (a good signature -> + res = 1; a tampered hash -> res = 0, still returning 0 since a bad signature is + not an error). This is what lets a stock `wolfcrypt_benchmark` exercise the + engine via `WC_USE_DEVID = WC_HUK_DEVID`; both a focused good/tampered check + (in the `wolfcrypt_huk` example) and the benchmark ECDSA rows pass on hardware. + +## Benchmarks (software crypto baseline) + +`wolfcrypt_test` (full self-test, all PASS) and `wolfcrypt_benchmark` were run on +the RTL8735B EVB to validate the core library and toolchain on this target. The +figures below are **pure software wolfCrypt** -- they are NOT the HUK device +(which routes AES through the silicon engine for HUK-derived keys); they serve as +a reference baseline and to size the benefit of hardware offload. + +- Target: RTL8735B "KM4" Arm Cortex-M33 (ARMv8-M Mainline, TrustZone + DSP) at + 500 MHz (`CPU_CLK`); DDR at 533 MHz. +- Toolchain / build: RealTek ASDK 10.3.0 (GCC 10.3.0), SDK default `-Os`, + FreeRTOS, `WOLFCRYPT_ONLY`, `SINGLE_THREADED`, big-integer math via the generic + `WOLFSSL_SP_MATH_ALL` (portable C, no Cortex-M assembly), `BENCH_EMBEDDED`. +- Build options live with the example, not the wolfSSL tree: the + `wolfssl-examples` repo `rtl8735b/test/{user_settings.h, wolfcrypt_test.cmake, + main.c}`, copied into the AmebaPro2 FreeRTOS SDK as + `component/example/wolfcrypt_test`. One `-DRTL_BENCH_MODE=N` switch selects the + backend: 1 = pure C (this baseline), 2 = Thumb-2 / SP Cortex-M (the asm tables + below), 3 = RealTek HW (the hardware-offload table below). The RNG is seeded + from the SDK `rtw_get_random_bytes`; `current_time()` uses + `hal_read_systime_us()`. + +Symmetric / hash (higher is better): + +| Algorithm | Throughput | +|---------------------|------------| +| AES-128-CBC enc/dec | 9.55 / 9.67 MiB/s | +| AES-256-CBC enc/dec | 7.25 / 7.02 MiB/s | +| AES-128-GCM enc/dec | 5.35 / 5.33 MiB/s | +| AES-256-GCM enc/dec | 4.53 / 4.52 MiB/s | +| AES-128-CTR | 9.75 MiB/s | +| AES-128-ECB enc/dec | 10.42 / 10.56 MiB/s | +| AES-CCM enc/dec | 4.73 / 4.65 MiB/s | +| GMAC (4-bit table) | 13.43 MiB/s | +| AES-128-CMAC | 8.84 MiB/s | +| ChaCha20 | 24.79 MiB/s | +| ChaCha20-Poly1305 | 15.83 MiB/s | +| Poly1305 | 64.77 MiB/s | +| SHA-1 | 29.19 MiB/s | +| SHA-256 | 10.94 MiB/s | +| SHA-512 | 7.29 MiB/s | +| SHA3-256 | 6.61 MiB/s | +| HMAC-SHA256 | 10.85 MiB/s | + +Public key (higher is better): + +| Operation | Rate | +|-----------------------|------| +| RSA-2048 public | 214.7 ops/s | +| RSA-2048 private | 6.14 ops/s | +| RSA-2048 key gen | 0.40 ops/s | +| DH-2048 key gen/agree | 17.67 / 15.23 ops/s | +| ECDSA P-256 sign/verify | 40.03 / 29.81 ops/s | +| ECDHE P-256 agree | 40.69 ops/s | +| Curve25519 key gen/agree | 414.8 / 419.4 ops/s | +| Ed25519 sign/verify | 788.3 / 397.0 ops/s | + +The tables above are the portable-C baseline. The assembly backends below raise +these substantially. Curve25519/Ed25519 already use the dedicated +`curve25519.c`/`ed25519.c` fast code. + +## Optimizations (measured on RTL8735B @ 500 MHz, -Os) + +Two wolfCrypt assembly backends apply to this Cortex-M33 and were validated on +hardware (both keep `wolfcrypt_test` all-PASS). Neither needs wolfSSL source +changes -- they are build-config selections plus adding the relevant asm files. + +### 1. Public key -- `sp_cortexm.c` (Thumb-2/DSP single-precision) + +Enable with `WOLFSSL_SP_ARM_CORTEX_M_ASM` + `WOLFSSL_HAVE_SP_RSA` + +`WOLFSSL_HAVE_SP_ECC` + `WOLFSSL_HAVE_SP_DH`, and add `wolfcrypt/src/sp_cortexm.c` +to the build (alongside the generic `sp_int.c` for sizes without an asm path). + +| Operation | Generic C | sp_cortexm | Speedup | +|------------------------|-----------|------------|---------| +| ECC P-256 key gen | 40.7 | 541.2 ops/s | 13.3x | +| ECDSA P-256 sign | 40.0 | 427.6 ops/s | 10.7x | +| ECDSA P-256 verify | 29.8 | 292.7 ops/s | 9.8x | +| ECDHE P-256 agree | 40.7 | 318.1 ops/s | 7.8x | +| RSA-2048 public | 214.7 | 618.4 ops/s | 2.9x | +| RSA-2048 private | 6.14 | 19.0 ops/s | 3.1x | +| DH-2048 agree | 15.2 | 38.3 ops/s | 2.5x | + +### 2. Symmetric -- Thumb-2 asm (`port/arm/thumb2-*-asm.S`) + +Enable with `WOLFSSL_ARMASM` + `WOLFSSL_ARMASM_THUMB2` + +`WOLFSSL_ARMASM_NO_HW_CRYPTO` + `WOLFSSL_ARMASM_NO_NEON` + `WOLFSSL_ARM_ARCH=7`, +and add `thumb2-aes-asm.S`, `thumb2-sha256-asm.S`, `thumb2-sha512-asm.S`, +`thumb2-sha3-asm.S`, `thumb2-chacha-asm.S`, `thumb2-poly1305-asm.S`. +`WOLFSSL_ARMASM` is a global switch, so provide the `.S` for every covered +module. (Curve25519/Ed25519 also have Thumb-2 asm but their `ge_operations.c` +integration assumes 64-bit and was left on the C path here.) + +| Algorithm | Generic C | Thumb-2 asm | Speedup | +|---------------------|-----------|-------------|---------| +| AES-128-CBC enc | 9.55 | 20.85 MiB/s | 2.2x | +| AES-128-ECB enc | 10.42 | 20.82 MiB/s | 2.0x | +| AES-128-CTR | 9.75 | 20.47 MiB/s | 2.1x | +| AES-128-GCM enc | 5.35 | 10.30 MiB/s | 1.9x | +| GMAC | 13.43 | 20.81 MiB/s | 1.5x | +| AES-128-CMAC | 8.84 | 14.67 MiB/s | 1.7x | +| ChaCha20 | 24.79 | 46.44 MiB/s | 1.9x | +| ChaCha20-Poly1305 | 15.83 | 25.38 MiB/s | 1.6x | +| SHA-256 | 10.94 | 17.83 MiB/s | 1.6x | +| SHA3-256 | 6.61 | 8.64 MiB/s | 1.3x | +| HMAC-SHA256 | 10.85 | 17.66 MiB/s | 1.6x | + +### 3. Hardware offload -- the HUK crypto-callback device (`hal_crypto` / `hal_ecdsa`) + +Measured on the same `wolfcrypt_benchmark` with `WC_USE_DEVID = WC_HUK_DEVID` +(the test/benchmark example's mode 3, `RTL_BENCH_MODE=3`). The benchmark prints +a software and a hardware row per op; the software column here is the pure-C +`sp_int.c` baseline, the hardware column is this port driving the silicon engine. +The ECDSA rows exercise the port's general HW sign/verify offload (a benchmark +key with `devId = WC_HUK_DEVID` and no HUK context -- the engine signs with the +key's own scalar and verifies with its own public point). + +| Operation | Pure C | HW (engine) | Speedup | +|------------------------|-----------|-------------|---------| +| AES-256-ECB enc/dec | 7.73/7.76 | 48.87/48.69 MiB/s | 6.3x | +| AES-256-GCM enc/dec | 4.52 | 38.44/38.18 MiB/s | 8.5x | +| AES-256-GCM no_AAD | 4.55 | 41.73/41.34 MiB/s | 9.1x | +| HMAC-SHA256 | 10.63 | 42.03 MiB/s | 4.0x | +| ECDSA P-256 sign | 39.81 | 272.05 ops/s | 6.8x | +| ECDSA P-256 verify | 29.39 | 275.13 ops/s | 9.4x | + +Caveats, all expected from the port's design: + +- **AES-256-CBC / -CTR are slower on the engine** (2.28 MiB/s vs ~7.2 software): + the port chains those in software over single-block secure-key ECB calls, so + per-block HAL overhead dominates. AES-256-GCM and -ECB use the engine's native + block path and are the real symmetric wins; for bulk CBC/CTR the software + (especially Thumb-2) path is faster. +- **AES-128/192 fall back to software** -- the HUK-derived working key is 256-bit. +- **RSA, DH, ECDH, hashing and key generation fall back to software** (hardware + row ~= software row) -- the device only advertises AES, HMAC-SHA256 and ECDSA + P-256 sign/verify; everything else returns `CRYPTOCB_UNAVAILABLE` and the core + runs it in software. +- The HW ECDSA **sign** (272 ops/s) is actually slower than the `sp_cortexm.c` + software sign (427.6 ops/s above): the engine's value is binding to the HUK and + offloading the CPU, not beating hand-tuned Thumb-2 P-256 latency. HW verify + (275) is on par with the `sp_cortexm.c` verify (292.7). + +So the recommended posture: take `sp_cortexm.c` for public-key math unconditionally +(no silicon dependency), use the engine for AES-256-GCM/ECB bulk throughput and +HUK-bound ECDSA, and keep the Thumb-2 symmetric asm as the portable fallback for +the cipher modes the engine does not accelerate well. + +#### Full benchmark output (hardware mode) + +Part: RealTek RTL8735B (AmebaPro2), "KM4" Arm Cortex-M33 @ 500 MHz (`CPU_CLK`), +DDR @ 533 MHz. Toolchain: RealTek ASDK 10.3.0 (GCC 10.3.0), SDK default `-Os`, +FreeRTOS, `WOLFCRYPT_ONLY`, `SINGLE_THREADED`, `BENCH_EMBEDDED`. Generic `sp_int.c` +big-integer math (the HW base is the pure-C backend, so the `SW` rows are the +pure-C baseline). The HUK device is registered and `WC_USE_DEVID = WC_HUK_DEVID`, +so `wolfcrypt_benchmark` prints a software (`SW`) and a hardware (`HW`) row per op; +ops the engine does not accelerate show `HW` ~= `SW` (software fallback). + +``` +=== wolfCrypt Benchmark (RTL8735B, HUK hardware) === +wolfCrypt Benchmark (block bytes 1024, min 1.0 sec each) +RNG SHA-256 DRBG SW 2.1 MiB took 1.006 seconds, 2.936 MiB/s +AES-128-CBC-enc SW 9.1 MiB took 1.001 seconds, 9.537 MiB/s +AES-128-CBC-dec SW 9.1 MiB took 1.000 seconds, 9.643 MiB/s +AES-192-CBC-enc SW 8.0 MiB took 1.002 seconds, 8.237 MiB/s +AES-192-CBC-dec SW 8.0 MiB took 1.003 seconds, 8.303 MiB/s +AES-256-CBC-enc SW 7.0 MiB took 1.001 seconds, 7.242 MiB/s +AES-256-CBC-dec SW 7.0 MiB took 1.000 seconds, 7.005 MiB/s +AES-128-CBC-enc HW 9.0 MiB took 1.000 seconds, 9.420 MiB/s +AES-128-CBC-dec HW 9.1 MiB took 1.002 seconds, 9.528 MiB/s +AES-192-CBC-enc HW 8.0 MiB took 1.000 seconds, 8.151 MiB/s +AES-192-CBC-dec HW 8.0 MiB took 1.001 seconds, 8.218 MiB/s +AES-256-CBC-enc HW 2.0 MiB took 1.005 seconds, 2.283 MiB/s +AES-256-CBC-dec HW 2.0 MiB took 1.008 seconds, 2.228 MiB/s +AES-128-GCM-enc SW 5.0 MiB took 1.005 seconds, 5.347 MiB/s +AES-128-GCM-dec SW 5.0 MiB took 1.000 seconds, 5.346 MiB/s +AES-192-GCM-enc SW 4.1 MiB took 1.001 seconds, 4.903 MiB/s +AES-192-GCM-dec SW 4.1 MiB took 1.001 seconds, 4.902 MiB/s +AES-256-GCM-enc SW 4.1 MiB took 1.004 seconds, 4.524 MiB/s +AES-256-GCM-dec SW 4.1 MiB took 1.004 seconds, 4.524 MiB/s +AES-128-GCM-enc-no_AAD SW 5.0 MiB took 1.002 seconds, 5.383 MiB/s +AES-128-GCM-dec-no_AAD SW 5.0 MiB took 1.002 seconds, 5.382 MiB/s +AES-192-GCM-enc-no_AAD SW 4.1 MiB took 1.005 seconds, 4.934 MiB/s +AES-192-GCM-dec-no_AAD SW 4.1 MiB took 1.005 seconds, 4.933 MiB/s +AES-256-GCM-enc-no_AAD SW 4.1 MiB took 1.003 seconds, 4.550 MiB/s +AES-256-GCM-dec-no_AAD SW 4.1 MiB took 1.003 seconds, 4.550 MiB/s +AES-128-GCM-enc HW 5.0 MiB took 1.005 seconds, 5.274 MiB/s +AES-128-GCM-dec HW 5.0 MiB took 1.000 seconds, 5.271 MiB/s +AES-192-GCM-enc HW 4.1 MiB took 1.004 seconds, 4.841 MiB/s +AES-192-GCM-dec HW 4.1 MiB took 1.004 seconds, 4.840 MiB/s +AES-256-GCM-enc HW 38.0 MiB took 1.000 seconds, 38.436 MiB/s +AES-256-GCM-dec HW 38.0 MiB took 1.000 seconds, 38.175 MiB/s +AES-128-GCM-enc-no_AAD HW 5.0 MiB took 1.003 seconds, 5.307 MiB/s +AES-128-GCM-dec-no_AAD HW 5.0 MiB took 1.003 seconds, 5.307 MiB/s +AES-192-GCM-enc-no_AAD HW 4.1 MiB took 1.003 seconds, 4.870 MiB/s +AES-192-GCM-dec-no_AAD HW 4.1 MiB took 1.003 seconds, 4.870 MiB/s +AES-256-GCM-enc-no_AAD HW 41.1 MiB took 1.000 seconds, 41.729 MiB/s +AES-256-GCM-dec-no_AAD HW 41.0 MiB took 1.000 seconds, 41.339 MiB/s +GMAC Table 4-bit SW 13.0 MiB took 1.000 seconds, 13.423 MiB/s +GMAC Table 4-bit SW 13.0 MiB took 1.002 seconds, 13.184 MiB/s +AES-128-ECB-enc SW 10.0 MiB took 1.001 seconds, 10.411 MiB/s +AES-128-ECB-dec SW 10.1 MiB took 1.001 seconds, 10.561 MiB/s +AES-192-ECB-enc SW 8.1 MiB took 1.001 seconds, 8.879 MiB/s +AES-192-ECB-dec SW 8.1 MiB took 1.001 seconds, 8.981 MiB/s +AES-256-ECB-enc SW 7.1 MiB took 1.000 seconds, 7.734 MiB/s +AES-256-ECB-dec SW 7.1 MiB took 1.000 seconds, 7.756 MiB/s +AES-128-ECB-enc HW 10.0 MiB took 1.001 seconds, 10.274 MiB/s +AES-128-ECB-dec HW 10.0 MiB took 1.000 seconds, 10.417 MiB/s +AES-192-ECB-enc HW 8.1 MiB took 1.001 seconds, 8.781 MiB/s +AES-192-ECB-dec HW 8.1 MiB took 1.001 seconds, 8.877 MiB/s +AES-256-ECB-enc HW 48.1 MiB took 1.000 seconds, 48.867 MiB/s +AES-256-ECB-dec HW 48.1 MiB took 1.000 seconds, 48.685 MiB/s +AES-128-CTR SW 9.1 MiB took 1.002 seconds, 9.669 MiB/s +AES-192-CTR SW 8.0 MiB took 1.002 seconds, 8.336 MiB/s +AES-256-CTR SW 7.0 MiB took 1.001 seconds, 7.319 MiB/s +AES-128-CTR HW 9.0 MiB took 1.001 seconds, 9.438 MiB/s +AES-192-CTR HW 8.0 MiB took 1.002 seconds, 8.164 MiB/s +AES-256-CTR HW 2.0 MiB took 1.005 seconds, 2.284 MiB/s +AES-CCM-enc SW 4.1 MiB took 1.001 seconds, 4.732 MiB/s +AES-CCM-dec SW 4.1 MiB took 1.003 seconds, 4.647 MiB/s +AES-CCM-enc-no_AAD SW 4.1 MiB took 1.005 seconds, 4.739 MiB/s +AES-CCM-dec-no_AAD SW 4.1 MiB took 1.003 seconds, 4.648 MiB/s +AES-CCM-enc HW 4.1 MiB took 1.000 seconds, 4.711 MiB/s +AES-CCM-dec HW 4.1 MiB took 1.004 seconds, 4.619 MiB/s +AES-CCM-enc-no_AAD HW 4.1 MiB took 1.000 seconds, 4.711 MiB/s +AES-CCM-dec-no_AAD HW 4.1 MiB took 1.004 seconds, 4.619 MiB/s +CHACHA SW 24.1 MiB took 1.000 seconds, 24.793 MiB/s +CHA-POLY SW 15.1 MiB took 1.000 seconds, 15.815 MiB/s +POLY1305 SW 64.1 MiB took 1.000 seconds, 64.781 MiB/s +SHA SW 29.0 MiB took 1.001 seconds, 29.131 MiB/s +SHA HW 28.0 MiB took 1.000 seconds, 28.074 MiB/s +SHA-224 SW 10.1 MiB took 1.002 seconds, 10.726 MiB/s +SHA-224 HW 10.1 MiB took 1.001 seconds, 10.581 MiB/s +SHA-256 SW 10.1 MiB took 1.002 seconds, 10.725 MiB/s +SHA-256 HW 10.1 MiB took 1.001 seconds, 10.582 MiB/s +SHA-384 SW 7.0 MiB took 1.003 seconds, 7.277 MiB/s +SHA-384 HW 7.0 MiB took 1.003 seconds, 7.206 MiB/s +SHA-512 SW 7.0 MiB took 1.003 seconds, 7.277 MiB/s +SHA-512 HW 7.0 MiB took 1.003 seconds, 7.208 MiB/s +SHA-512/224 SW 7.0 MiB took 1.000 seconds, 7.275 MiB/s +SHA-512/224 HW 7.0 MiB took 1.003 seconds, 7.207 MiB/s +SHA-512/256 SW 7.0 MiB took 1.000 seconds, 7.275 MiB/s +SHA-512/256 HW 7.0 MiB took 1.003 seconds, 7.207 MiB/s +SHA3-224 SW 7.0 MiB took 1.000 seconds, 7.004 MiB/s +SHA3-224 HW 6.1 MiB took 1.002 seconds, 6.942 MiB/s +SHA3-256 SW 6.1 MiB took 1.001 seconds, 6.608 MiB/s +SHA3-256 HW 6.1 MiB took 1.002 seconds, 6.552 MiB/s +SHA3-384 SW 5.0 MiB took 1.001 seconds, 5.100 MiB/s +SHA3-384 HW 5.0 MiB took 1.002 seconds, 5.067 MiB/s +SHA3-512 SW 3.1 MiB took 1.007 seconds, 3.565 MiB/s +SHA3-512 HW 3.1 MiB took 1.004 seconds, 3.549 MiB/s +AES-128-CMAC SW 8.1 MiB took 1.001 seconds, 8.852 MiB/s +AES-256-CMAC SW 6.1 MiB took 1.001 seconds, 6.563 MiB/s +AES-128-CMAC HW 8.1 MiB took 1.002 seconds, 8.747 MiB/s +AES-256-CMAC HW 6.1 MiB took 1.002 seconds, 6.504 MiB/s +HMAC-SHA SW 28.1 MiB took 1.001 seconds, 28.834 MiB/s +HMAC-SHA HW 26.1 MiB took 1.001 seconds, 26.642 MiB/s +HMAC-SHA224 SW 10.1 MiB took 1.001 seconds, 10.632 MiB/s +HMAC-SHA224 HW 10.0 MiB took 1.002 seconds, 10.331 MiB/s +HMAC-SHA256 SW 10.1 MiB took 1.001 seconds, 10.632 MiB/s +HMAC-SHA256 HW 42.0 MiB took 1.000 seconds, 42.034 MiB/s +HMAC-SHA384 SW 7.0 MiB took 1.002 seconds, 7.161 MiB/s +HMAC-SHA384 HW 7.0 MiB took 1.002 seconds, 7.017 MiB/s +HMAC-SHA512 SW 7.0 MiB took 1.002 seconds, 7.163 MiB/s +HMAC-SHA512 HW 7.0 MiB took 1.001 seconds, 7.023 MiB/s +RSA 2048 key gen SW 1 ops took 7.697 sec, avg 7697.142 ms, 0.130 ops/sec +RSA 3072 key gen SW 1 ops took 23.488 sec, avg 23488.015 ms, 0.043 ops/sec +RSA 2048 key gen HW 1 ops took 6.656 sec, avg 6656.367 ms, 0.150 ops/sec +RSA 3072 key gen HW 1 ops took 66.662 sec, avg 66662.406 ms, 0.015 ops/sec +RSA 2048 public SW 214 ops took 1.003 sec, avg 4.686 ms, 213.392 ops/sec +RSA 2048 private SW 8 ops took 1.305 sec, avg 163.098 ms, 6.131 ops/sec +RSA 2048 public HW 214 ops took 1.003 sec, avg 4.687 ms, 213.372 ops/sec +RSA 2048 private HW 8 ops took 1.304 sec, avg 163.055 ms, 6.133 ops/sec +DH 2048 key gen SW 18 ops took 1.019 sec, avg 56.629 ms, 17.659 ops/sec +DH 2048 agree SW 16 ops took 1.047 sec, avg 65.445 ms, 15.280 ops/sec +DH 2048 key gen HW 18 ops took 1.019 sec, avg 56.616 ms, 17.663 ops/sec +DH 2048 agree HW 16 ops took 1.043 sec, avg 65.172 ms, 15.344 ops/sec +ECC [ SECP256R1] 256 key gen SW 42 ops took 1.035 sec, avg 24.641 ms, 40.582 ops/sec +ECC [ SECP256R1] 256 key gen HW 42 ops took 1.038 sec, avg 24.705 ms, 40.478 ops/sec +ECDHE [ SECP256R1] 256 agree SW 42 ops took 1.021 sec, avg 24.315 ms, 41.126 ops/sec +ECDSA [ SECP256R1] 256 sign SW 40 ops took 1.005 sec, avg 25.121 ms, 39.808 ops/sec +ECDSA [ SECP256R1] 256 verify SW 30 ops took 1.021 sec, avg 34.028 ms, 29.387 ops/sec +ECDHE [ SECP256R1] 256 agree HW 42 ops took 1.043 sec, avg 24.844 ms, 40.251 ops/sec +ECDSA [ SECP256R1] 256 sign HW 274 ops took 1.007 sec, avg 3.676 ms, 272.051 ops/sec +ECDSA [ SECP256R1] 256 verify HW 276 ops took 1.003 sec, avg 3.635 ms, 275.134 ops/sec +RNG 256 SHA256 Init/Free 2053 ops took 1.000 sec, avg 0.487 ms, 2052.873 ops/sec +Benchmark complete +``` diff --git a/wolfcrypt/src/port/realtek/rtl8735b.c b/wolfcrypt/src/port/realtek/rtl8735b.c new file mode 100644 index 00000000000..1b6db08d950 --- /dev/null +++ b/wolfcrypt/src/port/realtek/rtl8735b.c @@ -0,0 +1,1529 @@ +/* rtl8735b.c + * + * Copyright (C) 2006-2026 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + */ + +#ifdef HAVE_CONFIG_H + #include +#endif + +#include + +#ifdef WOLFSSL_RTL8735B_HUK + +#include +#include +#include +#include +#include + +#ifdef WOLF_CRYPTO_CB + #include +#endif +#ifndef NO_AES + #include +#endif +#if !defined(NO_HMAC) && !defined(NO_SHA256) + #include +#endif +#ifdef HAVE_ECC + #include +#endif + +#ifdef NO_INLINE + #include +#else + #define WOLFSSL_MISC_INCLUDED + #include +#endif + +/* Vendor HAL surface: the real SDK headers on target, the host-test shim under + * --enable-rtl8735b (see rtl8735b_shim.h). The on-target include path is + * supplied by the application / board CMake (see this port's README). */ +#ifdef WOLFSSL_RTL8735B_HOST_TEST + #include "rtl8735b_shim.h" +#else + #include "hal_crypto.h" + #include "hal_hkdf.h" + #ifndef WC_NO_RNG + #include "hal_trng_sec.h" + #endif + #if defined(HAVE_ECC) && defined(HAVE_ECC_SIGN) + #include "hal_ecdsa.h" + #endif +#endif + +#ifdef WOLF_CRYPTO_CB + +/* The HUK-derived working key is always a 256-bit key. */ +#define WC_RTL8735B_KEYLEN 32 + +/* The HAL crypto engine DMAs its key/iv/aad/tag buffers on 32-byte (cache line) + * boundaries; unaligned caller buffers are bounced through aligned temporaries + * so callers need not align. A heap bounce is over-allocated by 31 bytes and the + * usable pointer rounded up to a 32-byte boundary (XMALLOC does not guarantee + * that alignment); the raw allocation pointer is kept for XFREE. */ +#define WC_RTL8735B_IS_ALIGNED32(p) ((((wc_ptr_t)(p)) & 31u) == 0) +#define WC_RTL8735B_ALIGN_UP32(p) \ + ((byte*)((((wc_ptr_t)(p)) + 31u) & ~(wc_ptr_t)31u)) + +/* Derivation cache (see Rtl8735bHuk_DeriveSlotKey): the seed whose working key + * currently resides in the derived slot WC_RTL8735B_DERIVED_WB_IDX. Accessed + * only with the crypto mutex held (every derive path takes it first), so it is + * safe across concurrent Aes/Hmac objects. The seed is HKDF input, not a secret, + * but it is scrubbed on unregister for hygiene. Define + * WC_RTL8735B_NO_DERIVE_CACHE to disable (always re-run the HKDF ladder). */ +#ifndef WC_RTL8735B_NO_DERIVE_CACHE +static byte huk_seedCache[WC_RTL8735B_KEYLEN]; +static int huk_haveCache = 0; +#endif + +static int Rtl8735bHuk_Init(void* ctx) +{ + (void)ctx; + /* One-time crypto engine bring-up. Idempotent on the HAL side. */ + if (hal_crypto_engine_init() != 0) { + return WC_HW_E; + } + return 0; +} + +/* Run the HUK key-ladder on the per-operation seed (the 32-byte HKDF input the + * Aes carries in devKey): HUK (secure key slot) -> HKDF-Extract(secure) -> PRK + * slot -> HKDF-Expand(secure) -> device-bound working key in the derived slot. + * The working key never enters software; on return it resides in + * WC_RTL8735B_DERIVED_WB_IDX, ready for an AES *_sk_init that references that + * slot. The seed is passed by argument (not held in a global), so concurrent + * Aes objects never race; the caller holds the crypto mutex across derive + op. + * + * The HUK is the built-in secure key at slot WC_RTL8735B_HUK_SK_IDX (HUK1); the + * engine reads it internally. We deliberately do NOT lock the derived slot: each + * operation re-derives the working key into it, and a locked key-storage slot + * silently rejects that re-derivation (it would keep a stale key, so a different + * seed would yield the wrong result). The slot is overwritten on the next + * derive; nothing reads it back into software. */ +static int Rtl8735bHuk_DeriveSlotKey(const byte* seed) +{ + XALIGNED(32) byte seedA[WC_RTL8735B_KEYLEN]; + + if (seed == NULL) { + return BAD_FUNC_ARG; + } +#ifndef WC_RTL8735B_NO_DERIVE_CACHE + /* If the derived slot already holds the working key for this exact seed, + * skip the (two secure HMAC-SHA256 ops) HKDF ladder and reuse the slot. */ + if (huk_haveCache && + ConstantCompare(huk_seedCache, seed, WC_RTL8735B_KEYLEN) == 0) { + return 0; + } + /* A fresh derive is starting; invalidate the cache until it succeeds so a + * mid-ladder failure never leaves the cache claiming a stale slot. */ + huk_haveCache = 0; +#endif + /* HKDF reads the seed via DMA -- pass it a 32-byte-aligned copy. */ + XMEMCPY(seedA, seed, WC_RTL8735B_KEYLEN); + + /* Init the secure HKDF HMAC-SHA256 engine (sets isHWCrypto_Init); required + * before any *_secure_all call or extract returns HW_NOT_INIT. */ + if (hal_hkdf_hmac_sha256_secure_init((u8)WC_RTL8735B_HKDF_CRYPTO_SEL) + != HAL_OK) { + return WC_HW_E; + } + /* HKDF-Extract: PRK = HMAC(HUK, seed), into the PRK slot. */ + if (hal_hkdf_extract_secure_all((u8)WC_RTL8735B_HUK_SK_IDX, + (u8)WC_RTL8735B_HKDF_PRK_IDX, seedA) != HAL_OK) { + return WC_HW_E; + } + /* HKDF-Expand: OKM = working key, into the derived working-key slot. */ + if (hal_hkdf_expand_secure_all((u8)WC_RTL8735B_HKDF_PRK_IDX, + (u8)WC_RTL8735B_DERIVED_WB_IDX, seedA) != HAL_OK) { + return WC_HW_E; + } +#ifndef WC_RTL8735B_NO_DERIVE_CACHE + /* Slot now holds the working key for this seed; remember it. */ + XMEMCPY(huk_seedCache, seed, WC_RTL8735B_KEYLEN); + huk_haveCache = 1; +#endif + return 0; +} + +#ifndef NO_AES + +#ifdef HAVE_AESGCM +/* Full AES-GCM (encrypt or decrypt-verify) under a HUK-derived slot key. + * The HAL GCM path assumes a 96-bit (12-byte) IV (standard J0). For a HUK key we + * must not fall back to software GCM (the software path would key off the seed, + * not the device-bound key), so an unsupported IV length returns BAD_FUNC_ARG -- + * a hard error. (NOT_COMPILED_IN must NOT be used here: the crypto-callback layer + * rewrites it to CRYPTOCB_UNAVAILABLE, which would trigger exactly that unwanted + * software fallback.) */ +static int Rtl8735bHuk_Gcm(int enc, const byte* seed, const byte* in, + word32 sz, byte* out, const byte* iv, word32 ivSz, const byte* aad, + word32 aadSz, byte* tag, word32 tagSz) +{ + int ret; + /* 16-byte aligned IV block: the HAL reads a full block, so the 4 bytes past + * the 12-byte nonce must be zero and stable across calls. */ + XALIGNED(32) byte ivA[WC_AES_BLOCK_SIZE] = { 0 }; + XALIGNED(32) byte hwTag[WC_AES_BLOCK_SIZE] = { 0 }; + const byte* inA = in; /* aligned views; bounced below if needed */ + const byte* aadA = aad; + byte* outA = out; + byte* inBounce = NULL; + byte* outBounce = NULL; + byte* aadBounce = NULL; + + /* Validate args before any copy/bounce/dereference: the crypto-callback + * wrapper does not, so a bad caller would otherwise crash inside the HAL. */ + if (seed == NULL || iv == NULL) { + return BAD_FUNC_ARG; + } + if (sz > 0 && (in == NULL || out == NULL)) { + return BAD_FUNC_ARG; + } + if (aadSz > 0 && aad == NULL) { + return BAD_FUNC_ARG; + } + if (ivSz != GCM_NONCE_MID_SZ) { + /* Hard error -- see header comment; must not be NOT_COMPILED_IN. */ + return BAD_FUNC_ARG; /* only 12-byte GCM IV supported by the HAL */ + } + if (tag == NULL || tagSz == 0 || tagSz > WC_AES_BLOCK_SIZE) { + return BAD_FUNC_ARG; + } + + /* Bounce any unaligned DMA buffer through a 32-byte-aligned temporary. iv + * and tag are small and always staged on aligned stack buffers; in/out/aad + * may be large, so are only copied when actually unaligned. */ + XMEMCPY(ivA, iv, GCM_NONCE_MID_SZ); + if (aadSz > 0 && !WC_RTL8735B_IS_ALIGNED32(aad)) { + aadBounce = (byte*)XMALLOC(aadSz + 31, NULL, DYNAMIC_TYPE_TMP_BUFFER); + if (aadBounce == NULL) { + return MEMORY_E; + } + aadA = WC_RTL8735B_ALIGN_UP32(aadBounce); + XMEMCPY((byte*)aadA, aad, aadSz); + } + if (sz > 0 && !WC_RTL8735B_IS_ALIGNED32(in)) { + inBounce = (byte*)XMALLOC(sz + 31, NULL, DYNAMIC_TYPE_TMP_BUFFER); + if (inBounce == NULL) { + ret = MEMORY_E; + goto cleanup; + } + inA = WC_RTL8735B_ALIGN_UP32(inBounce); + XMEMCPY((byte*)inA, in, sz); + } + if (sz > 0 && !WC_RTL8735B_IS_ALIGNED32(out)) { + outBounce = (byte*)XMALLOC(sz + 31, NULL, DYNAMIC_TYPE_TMP_BUFFER); + if (outBounce == NULL) { + ret = MEMORY_E; + goto cleanup; + } + outA = WC_RTL8735B_ALIGN_UP32(outBounce); + } + if (sz == 0) { + /* GMAC (empty payload): the caller's in/out may be NULL. Point the HAL at + * a valid aligned buffer -- zero data bytes are processed, only the tag is + * produced over the AAD. */ + inA = ivA; + outA = ivA; + } + + ret = wolfSSL_CryptHwMutexLock(); + if (ret != 0) { + goto cleanup; + } + ret = Rtl8735bHuk_DeriveSlotKey(seed); + if (ret != 0) { + goto unlock; + } + if (hal_crypto_aes_gcm_sk_init((byte)WC_RTL8735B_DERIVED_WB_IDX, + WC_RTL8735B_KEYLEN) != 0) { + ret = WC_HW_E; + goto unlock; + } + XMEMSET(hwTag, 0, sizeof(hwTag)); + if (enc) { + if (hal_crypto_aes_gcm_encrypt(inA, sz, ivA, aadA, aadSz, outA, hwTag) + != 0) { + ret = WC_HW_E; + goto unlock; + } + XMEMCPY(tag, hwTag, tagSz); + ret = 0; + } + else { + if (hal_crypto_aes_gcm_decrypt(inA, sz, ivA, aadA, aadSz, outA, hwTag) + != 0) { + ret = WC_HW_E; + goto unlock; + } + if (ConstantCompare(hwTag, tag, (int)tagSz) != 0) { + if (outA != NULL && sz != 0) { + ForceZero(outA, sz); + } + /* When out was bounced, outA is the heap bounce; also clear the + * caller's out so the zero-on-auth-fail contract holds for the + * unaligned (incl. in-place) case. */ + if (outBounce != NULL && sz != 0) { + ForceZero(out, sz); + } + ret = AES_GCM_AUTH_E; + } + else { + ret = 0; + } + } + if (ret == 0 && outBounce != NULL) { + XMEMCPY(out, outA, sz); + } + +unlock: + ForceZero(hwTag, sizeof(hwTag)); + wolfSSL_CryptHwMutexUnLock(); +cleanup: + if (inBounce != NULL) { + ForceZero((byte*)inA, sz); /* scrub the aligned input (plaintext) view */ + XFREE(inBounce, NULL, DYNAMIC_TYPE_TMP_BUFFER); + } + if (outBounce != NULL) { + ForceZero(outA, sz); /* scrub the aligned plaintext view */ + XFREE(outBounce, NULL, DYNAMIC_TYPE_TMP_BUFFER); + } + if (aadBounce != NULL) { + XFREE(aadBounce, NULL, DYNAMIC_TYPE_TMP_BUFFER); + } + return ret; +} +#endif /* HAVE_AESGCM */ + +#if defined(HAVE_AES_ECB) || defined(WOLFSSL_AES_DIRECT) || \ + defined(WOLF_CRYPTO_CB_ONLY_AES) +/* AES-ECB under a HUK-derived slot key. sz must be a multiple of the block. + * Unaligned caller in/out are bounced through 32-byte-aligned temporaries (the + * HAL DMAs its buffers on cache-line boundaries), so callers need not align. + * Guarded to match its only call site (the WC_CIPHER_AES_ECB dispatch case); + * CBC/CTR drive hal_crypto_aes_ecb_* directly, not this helper. */ +static int Rtl8735bHuk_Ecb(int enc, const byte* seed, const byte* in, + word32 sz, byte* out) +{ + int ret = 0; + const byte* inA = in; + byte* outA = out; + byte* inBounce = NULL; + byte* outBounce = NULL; + + if (seed == NULL || in == NULL || out == NULL || + sz == 0 || (sz % WC_AES_BLOCK_SIZE) != 0) { + return BAD_FUNC_ARG; + } + if (!WC_RTL8735B_IS_ALIGNED32(in)) { + inBounce = (byte*)XMALLOC(sz + 31, NULL, DYNAMIC_TYPE_TMP_BUFFER); + if (inBounce == NULL) { + return MEMORY_E; + } + inA = WC_RTL8735B_ALIGN_UP32(inBounce); + XMEMCPY((byte*)inA, in, sz); + } + if (!WC_RTL8735B_IS_ALIGNED32(out)) { + outBounce = (byte*)XMALLOC(sz + 31, NULL, DYNAMIC_TYPE_TMP_BUFFER); + if (outBounce == NULL) { + ret = MEMORY_E; + goto cleanup; + } + outA = WC_RTL8735B_ALIGN_UP32(outBounce); + } + + ret = wolfSSL_CryptHwMutexLock(); + if (ret != 0) { + goto cleanup; + } + ret = Rtl8735bHuk_DeriveSlotKey(seed); + if (ret != 0) { + goto unlock; + } + if (hal_crypto_aes_ecb_sk_init((byte)WC_RTL8735B_DERIVED_WB_IDX, + WC_RTL8735B_KEYLEN) != 0) { + ret = WC_HW_E; + goto unlock; + } + if (enc) { + ret = hal_crypto_aes_ecb_encrypt(inA, sz, NULL, 0, outA); + } + else { + ret = hal_crypto_aes_ecb_decrypt(inA, sz, NULL, 0, outA); + } + if (ret != 0) { + ret = WC_HW_E; + } + else if (outBounce != NULL) { + XMEMCPY(out, outA, sz); + } + +unlock: + wolfSSL_CryptHwMutexUnLock(); +cleanup: + if (inBounce != NULL) { + ForceZero((byte*)inA, sz); /* scrub the aligned input (plaintext) view */ + XFREE(inBounce, NULL, DYNAMIC_TYPE_TMP_BUFFER); + } + if (outBounce != NULL) { + ForceZero(outA, sz); /* scrub the aligned data view */ + XFREE(outBounce, NULL, DYNAMIC_TYPE_TMP_BUFFER); + } + return ret; +} +#endif /* HAVE_AES_ECB || WOLFSSL_AES_DIRECT || WOLF_CRYPTO_CB_ONLY_AES */ + +#ifdef HAVE_AES_CBC +/* AES-CBC under a HUK-derived slot key. The HAL has no CBC secure-key variant + * (only ECB/GCM expose *_sk_init), so chain in software over single-block + * ECB-sk operations -- the key still never leaves hardware. iv is the 16-byte + * chaining block (aes->reg); on success it is advanced to the last ciphertext + * block for the next call. Handles in == out (in-place) for both directions. */ +static int Rtl8735bHuk_Cbc(int enc, const byte* seed, const byte* in, + word32 sz, byte* out, byte* iv) +{ + int ret; + word32 off; + XALIGNED(32) byte prev[WC_AES_BLOCK_SIZE]; + XALIGNED(32) byte blk[WC_AES_BLOCK_SIZE]; + XALIGNED(32) byte cur[WC_AES_BLOCK_SIZE]; + + if (seed == NULL || in == NULL || out == NULL || iv == NULL || + sz == 0 || (sz % WC_AES_BLOCK_SIZE) != 0) { + return BAD_FUNC_ARG; + } + ret = wolfSSL_CryptHwMutexLock(); + if (ret != 0) { + return ret; + } + ret = Rtl8735bHuk_DeriveSlotKey(seed); + if (ret != 0) { + goto out; + } + if (hal_crypto_aes_ecb_sk_init((byte)WC_RTL8735B_DERIVED_WB_IDX, + WC_RTL8735B_KEYLEN) != 0) { + ret = WC_HW_E; + goto out; + } + + XMEMCPY(prev, iv, WC_AES_BLOCK_SIZE); + for (off = 0; off < sz; off += WC_AES_BLOCK_SIZE) { + if (enc) { + /* C_i = ECB_enc(P_i XOR C_{i-1}). The HAL DMAs its output on a + * 32-byte boundary, so encrypt into the aligned temp cur, then copy + * to the (possibly unaligned) out+off. Reads in+off before writing + * out+off, so in-place (out == in) is safe. */ + xorbufout(blk, in + off, prev, WC_AES_BLOCK_SIZE); + ret = hal_crypto_aes_ecb_encrypt(blk, WC_AES_BLOCK_SIZE, NULL, 0, + cur); + if (ret != 0) { + ret = WC_HW_E; + goto out; + } + XMEMCPY(out + off, cur, WC_AES_BLOCK_SIZE); + XMEMCPY(prev, cur, WC_AES_BLOCK_SIZE); + } + else { + /* P_i = ECB_dec(C_i) XOR C_{i-1}. Save C_i first: writing out+off + * below would clobber it when out == in, and it is the next call's + * chaining value. */ + XMEMCPY(cur, in + off, WC_AES_BLOCK_SIZE); + ret = hal_crypto_aes_ecb_decrypt(cur, WC_AES_BLOCK_SIZE, NULL, 0, + blk); + if (ret != 0) { + ret = WC_HW_E; + goto out; + } + xorbufout(out + off, blk, prev, WC_AES_BLOCK_SIZE); + XMEMCPY(prev, cur, WC_AES_BLOCK_SIZE); + } + } + /* Advance the chaining IV to the last ciphertext block (prev holds it for + * both directions); only on full success. */ + XMEMCPY(iv, prev, WC_AES_BLOCK_SIZE); + ret = 0; + +out: + ForceZero(prev, sizeof(prev)); + ForceZero(blk, sizeof(blk)); + ForceZero(cur, sizeof(cur)); + wolfSSL_CryptHwMutexUnLock(); + return ret; +} +#endif /* HAVE_AES_CBC */ + +#ifdef WOLFSSL_AES_COUNTER +/* Increment a 16-byte big-endian (network order) counter in place. */ +static void Rtl8735bHuk_IncCtr(byte* ctr) +{ + int i; + for (i = WC_AES_BLOCK_SIZE - 1; i >= 0; i--) { + if (++ctr[i] != 0) { + break; + } + } +} + +/* AES-CTR under a HUK-derived slot key. The HAL has no CTR secure-key variant, + * so generate the keystream by ECB-sk encrypting the counter and XOR it with the + * data -- the key never leaves hardware. Maintains the wolfCrypt CTR state: + * aes->reg (counter), aes->tmp (current keystream block) and aes->left (unused + * keystream bytes at the tail of aes->tmp) so partial blocks continue across + * calls exactly as the software path does. The counter is staged on an aligned + * stack buffer, so caller in/out alignment does not matter (only XORed here). */ +static int Rtl8735bHuk_Ctr(Aes* aes, const byte* seed, const byte* in, + word32 sz, byte* out) +{ + int ret; + word32 processed; + XALIGNED(32) byte ctr[WC_AES_BLOCK_SIZE] = { 0 }; + XALIGNED(32) byte ks[WC_AES_BLOCK_SIZE] = { 0 }; + + if (aes == NULL || (sz != 0 && (in == NULL || out == NULL))) { + return BAD_FUNC_ARG; + } + + /* If the whole request is covered by leftover keystream, no HW is needed: + * consume it and return without touching the lock. */ + if (aes->left >= sz) { + if (sz > 0) { + xorbufout(out, in, + (byte*)aes->tmp + WC_AES_BLOCK_SIZE - aes->left, sz); + aes->left -= sz; + } + return 0; + } + + /* HW is needed -- take the lock before mutating any state, so a lock failure + * leaves the CTR state and output untouched. */ + ret = wolfSSL_CryptHwMutexLock(); + if (ret != 0) { + return ret; + } + + /* Derive/init the HW key first: a failure here must leave the output and CTR + * state untouched, so the leftover-keystream consumption (below) only runs + * once the hardware is ready. */ + ret = Rtl8735bHuk_DeriveSlotKey(seed); + if (ret != 0) { + goto out; + } + if (hal_crypto_aes_ecb_sk_init((byte)WC_RTL8735B_DERIVED_WB_IDX, + WC_RTL8735B_KEYLEN) != 0) { + ret = WC_HW_E; + goto out; + } + + /* Now consume any leftover keystream (all of it, since left < sz here). */ + processed = aes->left; + if (processed > 0) { + xorbufout(out, in, + (byte*)aes->tmp + WC_AES_BLOCK_SIZE - aes->left, processed); + out += processed; + in += processed; + aes->left = 0; + sz -= processed; + } + + XMEMCPY(ctr, aes->reg, WC_AES_BLOCK_SIZE); + while (sz >= WC_AES_BLOCK_SIZE) { + ret = hal_crypto_aes_ecb_encrypt(ctr, WC_AES_BLOCK_SIZE, NULL, 0, ks); + if (ret != 0) { + ret = WC_HW_E; + goto out; + } + xorbufout(out, in, ks, WC_AES_BLOCK_SIZE); + Rtl8735bHuk_IncCtr(ctr); + out += WC_AES_BLOCK_SIZE; + in += WC_AES_BLOCK_SIZE; + sz -= WC_AES_BLOCK_SIZE; + } + if (sz > 0) { + /* Final partial block: keep the unused keystream for the next call. */ + ret = hal_crypto_aes_ecb_encrypt(ctr, WC_AES_BLOCK_SIZE, NULL, 0, ks); + if (ret != 0) { + ret = WC_HW_E; + goto out; + } + XMEMCPY(aes->tmp, ks, WC_AES_BLOCK_SIZE); + xorbufout(out, in, ks, sz); + Rtl8735bHuk_IncCtr(ctr); + aes->left = WC_AES_BLOCK_SIZE - sz; + } + XMEMCPY(aes->reg, ctr, WC_AES_BLOCK_SIZE); + ret = 0; + +out: + ForceZero(ks, sizeof(ks)); + ForceZero(ctr, sizeof(ctr)); + wolfSSL_CryptHwMutexUnLock(); + return ret; +} +#endif /* WOLFSSL_AES_COUNTER */ + +/* The 256-bit seed an Aes carries in devKey (set via the normal key API) is the + * per-operation HKDF input. Point *seed at it, or return CRYPTOCB_UNAVAILABLE if + * this is not a 256-bit seed key (so non-HUK keys fall back to software). */ +static int Rtl8735bHuk_AesSeed(Aes* aes, const byte** seed) +{ + if (aes == NULL || aes->keylen != WC_RTL8735B_KEYLEN) { + return CRYPTOCB_UNAVAILABLE; + } + *seed = (const byte*)aes->devKey; + return 0; +} + +/* Route a cipher (AES ECB/CBC/CTR, AES-GCM) request to the HUK backend. */ +static int Rtl8735bHuk_Cipher(struct wc_CryptoInfo* info) +{ + int ret; + const byte* seed = NULL; + + switch (info->cipher.type) { +#if defined(HAVE_AES_ECB) || defined(WOLFSSL_AES_DIRECT) || \ + defined(WOLF_CRYPTO_CB_ONLY_AES) + case WC_CIPHER_AES_ECB: + ret = Rtl8735bHuk_AesSeed(info->cipher.aesecb.aes, &seed); + if (ret != 0) { + return ret; + } + return Rtl8735bHuk_Ecb(info->cipher.enc, seed, info->cipher.aesecb.in, + info->cipher.aesecb.sz, info->cipher.aesecb.out); +#endif +#if defined(HAVE_AES_CBC) + case WC_CIPHER_AES_CBC: + ret = Rtl8735bHuk_AesSeed(info->cipher.aescbc.aes, &seed); + if (ret != 0) { + return ret; + } + /* Rtl8735bHuk_Cbc advances aes->reg (the chaining IV) itself, correctly + * for in-place and both directions. */ + return Rtl8735bHuk_Cbc(info->cipher.enc, seed, info->cipher.aescbc.in, + info->cipher.aescbc.sz, info->cipher.aescbc.out, + (byte*)info->cipher.aescbc.aes->reg); +#endif +#ifdef WOLFSSL_AES_COUNTER + case WC_CIPHER_AES_CTR: + ret = Rtl8735bHuk_AesSeed(info->cipher.aesctr.aes, &seed); + if (ret != 0) { + return ret; + } + return Rtl8735bHuk_Ctr(info->cipher.aesctr.aes, seed, + info->cipher.aesctr.in, info->cipher.aesctr.sz, + info->cipher.aesctr.out); +#endif +#ifdef HAVE_AESGCM + case WC_CIPHER_AES_GCM: + if (info->cipher.enc) { + ret = Rtl8735bHuk_AesSeed(info->cipher.aesgcm_enc.aes, &seed); + if (ret != 0) { + return ret; + } + return Rtl8735bHuk_Gcm(1, seed, + info->cipher.aesgcm_enc.in, + info->cipher.aesgcm_enc.sz, + info->cipher.aesgcm_enc.out, + info->cipher.aesgcm_enc.iv, + info->cipher.aesgcm_enc.ivSz, + info->cipher.aesgcm_enc.authIn, + info->cipher.aesgcm_enc.authInSz, + info->cipher.aesgcm_enc.authTag, + info->cipher.aesgcm_enc.authTagSz); + } + else { + ret = Rtl8735bHuk_AesSeed(info->cipher.aesgcm_dec.aes, &seed); + if (ret != 0) { + return ret; + } + return Rtl8735bHuk_Gcm(0, seed, + info->cipher.aesgcm_dec.in, + info->cipher.aesgcm_dec.sz, + info->cipher.aesgcm_dec.out, + info->cipher.aesgcm_dec.iv, + info->cipher.aesgcm_dec.ivSz, + info->cipher.aesgcm_dec.authIn, + info->cipher.aesgcm_dec.authInSz, + (byte*)info->cipher.aesgcm_dec.authTag, + info->cipher.aesgcm_dec.authTagSz); + } +#endif + default: + return CRYPTOCB_UNAVAILABLE; + } +} +#endif /* !NO_AES */ + +#if !defined(NO_HMAC) && !defined(NO_SHA256) +/* HUK-bound HMAC-SHA256 over a secure-key slot. The wolfCrypt HMAC + * crypto-callback is incremental (wc_HmacUpdate delivers the message in chunks, + * wc_HmacFinal produces the digest), but the HAL secure-key HMAC engine state + * (sk_init -> update -> sk_final) cannot span those separate calls under one + * mutex hold without risking interleave with other Hmac objects. So the message + * is accumulated into a heap buffer hung off hmac->devCtx and the whole MAC runs + * one-shot at final under a single mutex hold. This suits HUK MAC / KDF use + * (short messages); it buffers the full message rather than streaming it. */ +typedef struct Rtl8735bHmacCtx { + byte* buf; + word32 len; + word32 cap; +} Rtl8735bHmacCtx; + +/* Append a message chunk to the per-Hmac accumulation buffer (grown geometric). + * On allocation failure the partial buffer is released later by the HmacFree + * cleanup callback (devCtx stays set). */ +static int Rtl8735bHuk_HmacAccumulate(Hmac* hmac, const byte* in, word32 inSz) +{ + Rtl8735bHmacCtx* ctx; + byte* nb; + word32 need; + word32 newCap; + word32 dbl; + + if (inSz == 0) { + return 0; /* nothing to buffer; this update is handled */ + } + if (in == NULL) { + return BAD_FUNC_ARG; + } + ctx = (Rtl8735bHmacCtx*)hmac->devCtx; + if (ctx == NULL) { + ctx = (Rtl8735bHmacCtx*)XMALLOC(sizeof(Rtl8735bHmacCtx), hmac->heap, + DYNAMIC_TYPE_TMP_BUFFER); + if (ctx == NULL) { + return MEMORY_E; + } + ctx->buf = NULL; + ctx->len = 0; + ctx->cap = 0; + hmac->devCtx = ctx; + } + need = ctx->len + inSz; + if (need < ctx->len) { /* word32 overflow */ + return BUFFER_E; + } + if (need > ctx->cap) { + newCap = (ctx->cap == 0) ? inSz : ctx->cap; + while (newCap < need) { + dbl = newCap << 1; + if (dbl < newCap) { /* overflow -> clamp to exact need */ + newCap = need; + break; + } + newCap = dbl; + } + nb = (byte*)XREALLOC(ctx->buf, newCap, hmac->heap, + DYNAMIC_TYPE_TMP_BUFFER); + if (nb == NULL) { + return MEMORY_E; + } + ctx->buf = nb; + ctx->cap = newCap; + } + XMEMCPY(ctx->buf + ctx->len, in, inSz); + ctx->len += inSz; + return 0; +} + +/* Release the accumulation buffer and clear hmac->devCtx. */ +static void Rtl8735bHuk_HmacFreeCtx(Hmac* hmac) +{ + Rtl8735bHmacCtx* ctx = (Rtl8735bHmacCtx*)hmac->devCtx; + if (ctx != NULL) { + if (ctx->buf != NULL) { + XFREE(ctx->buf, hmac->heap, DYNAMIC_TYPE_TMP_BUFFER); + } + XFREE(ctx, hmac->heap, DYNAMIC_TYPE_TMP_BUFFER); + hmac->devCtx = NULL; + } +} + +/* Compute HMAC-SHA256(message) under the HUK-derived slot key. The slot key is + * loaded by sk_cfg (LD_SK from WC_RTL8735B_DERIVED_WB_IDX); the key passed to + * sk_init is unused in that mode but a valid aligned buffer is supplied so the + * HAL never dereferences NULL. */ +static int Rtl8735bHuk_Hmac(const byte* seed, const byte* msg, word32 msgSz, + byte* digest) +{ + int ret; + u32 skCfg; + const byte* msgA = msg; + byte* msgBounce = NULL; + XALIGNED(32) byte dummyKey[WC_RTL8735B_KEYLEN] = { 0 }; + XALIGNED(32) byte digA[WC_SHA256_DIGEST_SIZE] = { 0 }; + + if (seed == NULL || digest == NULL) { + return BAD_FUNC_ARG; + } + if (msgSz > 0 && msg == NULL) { + return BAD_FUNC_ARG; + } + /* The HAL DMAs the message; bounce an unaligned buffer through an aligned + * temporary (the accumulation buffer is not guaranteed 32-byte aligned). */ + if (msgSz > 0 && !WC_RTL8735B_IS_ALIGNED32(msg)) { + msgBounce = (byte*)XMALLOC(msgSz + 31, NULL, DYNAMIC_TYPE_TMP_BUFFER); + if (msgBounce == NULL) { + return MEMORY_E; + } + msgA = WC_RTL8735B_ALIGN_UP32(msgBounce); + XMEMCPY((byte*)msgA, msg, msgSz); + } + + ret = wolfSSL_CryptHwMutexLock(); + if (ret != 0) { + goto cleanup; + } + ret = Rtl8735bHuk_DeriveSlotKey(seed); + if (ret != 0) { + goto unlock; + } + skCfg = (u32)hal_crypto_hmac_sha2_256_get_sk_cfg( + (u8)WC_RTL8735B_HMAC_SK_OP, (u8)WC_RTL8735B_DERIVED_WB_IDX, + (u8)WC_RTL8735B_HMAC_WB_OP, (u8)WC_RTL8735B_HMAC_WB_IDX); + if (hal_crypto_hmac_sha2_256_sk_init(dummyKey, skCfg) != 0) { + ret = WC_HW_E; + goto unlock; + } + if (msgSz > 0) { + if (hal_crypto_hmac_sha2_256_update(msgA, msgSz) != 0) { + ret = WC_HW_E; + goto unlock; + } + } + if (hal_crypto_hmac_sha2_256_sk_final(digA) != 0) { + ret = WC_HW_E; + goto unlock; + } + XMEMCPY(digest, digA, WC_SHA256_DIGEST_SIZE); + ret = 0; + +unlock: + ForceZero(digA, sizeof(digA)); + wolfSSL_CryptHwMutexUnLock(); +cleanup: + if (msgBounce != NULL) { + XFREE(msgBounce, NULL, DYNAMIC_TYPE_TMP_BUFFER); + } + return ret; +} + +/* Route an HMAC request to the HUK backend. Handles only HMAC-SHA256 keyed by a + * 32-byte HUK seed (the seed is the wc_HmacSetKey key, read from hmac->keyRaw); + * anything else returns CRYPTOCB_UNAVAILABLE for software fallback. Update calls + * (digest == NULL) accumulate; the final call (digest != NULL) produces the MAC + * and frees the buffer. The HmacFree cleanup callback also arrives as a final + * call with a throwaway digest; computing into it is harmless. */ +static int Rtl8735bHuk_HmacCb(struct wc_CryptoInfo* info) +{ + Hmac* hmac = info->hmac.hmac; + const byte* seed; + const byte* msg = NULL; + word32 msgSz = 0; + Rtl8735bHmacCtx* ctx; + int ret; + + if (hmac == NULL) { + return CRYPTOCB_UNAVAILABLE; + } + if (info->hmac.macType != WC_SHA256 || + hmac->keyLen != WC_RTL8735B_KEYLEN || hmac->keyRaw == NULL) { + return CRYPTOCB_UNAVAILABLE; + } + seed = hmac->keyRaw; + + if (info->hmac.digest == NULL) { + return Rtl8735bHuk_HmacAccumulate(hmac, info->hmac.in, + info->hmac.inSz); + } + ctx = (Rtl8735bHmacCtx*)hmac->devCtx; + if (ctx != NULL) { + msg = ctx->buf; + msgSz = ctx->len; + } + ret = Rtl8735bHuk_Hmac(seed, msg, msgSz, info->hmac.digest); + Rtl8735bHuk_HmacFreeCtx(hmac); + return ret; +} +#endif /* !NO_HMAC && !NO_SHA256 */ + +#if defined(HAVE_ECC) && defined(HAVE_ECC_SIGN) +/* The HW ECDSA engine here is wired for 256-bit curves (e.g. P-256) only; other + * sizes fall back to software. Big integers cross the HAL as little-endian + * 32-bit word arrays (word[0] = least-significant limb), matching how the SDK + * mbedTLS _alt bridge feeds the engine. */ +#define WC_RTL8735B_ECC_BYTES 32 +#define WC_RTL8735B_ECC_WORDS (WC_RTL8735B_ECC_BYTES / 4) + +/* Scratch buffers for one HW ECDSA sign (~0.25KB). Heap-allocated under + * WOLFSSL_SMALL_STACK (see Rtl8735bHuk_EccSignHw), on the stack otherwise. */ +typedef struct Rtl8735bEccSignTmp { + word32 hashW[WC_RTL8735B_ECC_WORDS]; + word32 kW[WC_RTL8735B_ECC_WORDS]; + word32 scW[WC_RTL8735B_ECC_WORDS]; + word32 rW[WC_RTL8735B_ECC_WORDS]; + word32 sW[WC_RTL8735B_ECC_WORDS]; + byte be[WC_RTL8735B_ECC_BYTES]; + byte rBe[WC_RTL8735B_ECC_BYTES]; + byte sBe[WC_RTL8735B_ECC_BYTES]; +} Rtl8735bEccSignTmp; + +#ifdef HAVE_ECC_VERIFY +/* Scratch buffers for one HW ECDSA verify (heap-allocated under + * WOLFSSL_SMALL_STACK, see Rtl8735bHuk_EccVerifyHw). */ +typedef struct Rtl8735bEccVeriTmp { + word32 qxW[WC_RTL8735B_ECC_WORDS]; + word32 qyW[WC_RTL8735B_ECC_WORDS]; + word32 rW[WC_RTL8735B_ECC_WORDS]; + word32 sW[WC_RTL8735B_ECC_WORDS]; + word32 hashW[WC_RTL8735B_ECC_WORDS]; + byte qxBe[WC_RTL8735B_ECC_BYTES]; + byte qyBe[WC_RTL8735B_ECC_BYTES]; + byte rBe[WC_RTL8735B_ECC_BYTES]; + byte sBe[WC_RTL8735B_ECC_BYTES]; + byte hashBe[WC_RTL8735B_ECC_BYTES]; +} Rtl8735bEccVeriTmp; +#endif + +/* 32 big-endian bytes -> WC_RTL8735B_ECC_WORDS little-endian words. */ +static void Rtl8735b_BeToLeWords(const byte* be, word32* w) +{ + int j; + const byte* p; + for (j = 0; j < WC_RTL8735B_ECC_WORDS; j++) { + p = be + (WC_RTL8735B_ECC_BYTES - 4 * (j + 1)); + w[j] = ((word32)p[0] << 24) | ((word32)p[1] << 16) | + ((word32)p[2] << 8) | (word32)p[3]; + } +} + +/* Little-endian words -> 32 big-endian bytes (inverse of Rtl8735b_BeToLeWords). */ +static void Rtl8735b_LeWordsToBe(const word32* w, byte* be) +{ + int j; + byte* p; + for (j = 0; j < WC_RTL8735B_ECC_WORDS; j++) { + p = be + (WC_RTL8735B_ECC_BYTES - 4 * (j + 1)); + p[0] = (byte)(w[j] >> 24); + p[1] = (byte)(w[j] >> 16); + p[2] = (byte)(w[j] >> 8); + p[3] = (byte)(w[j]); + } +} + +/* HW ECDSA finish-IRQ completion state. The sign runs under the crypto mutex + * (one operation at a time), so these module statics are safe; the finish ISR + * fills them and sets huk_ecdsaDone, the caller busy-waits on the flag. */ +static volatile int huk_ecdsaDone; +static volatile word32 huk_ecdsaErr; +static volatile word32 huk_ecdsaR[WC_RTL8735B_ECC_WORDS]; +static volatile word32 huk_ecdsaS[WC_RTL8735B_ECC_WORDS]; +static volatile word32 huk_veriResult; + +/* ECDSA finish-interrupt callback (registered via hal_ecdsa_cb_handler): read + * the error status and r,s, then flag completion. Mirrors the RealTek example. */ +static void Rtl8735bHuk_EcdsaIrqCb(void* data) +{ + hal_ecdsa_adapter_t* a = (hal_ecdsa_adapter_t*)data; + huk_ecdsaErr = (word32)hal_ecdsa_get_err_sta(a); + hal_ecdsa_get_rs(a, (u32*)huk_ecdsaR, (u32*)huk_ecdsaS); + huk_ecdsaDone = 1; +} + +#ifdef HAVE_ECC_VERIFY +/* ECDSA verify finish-interrupt callback: read the verify error status and the + * pass result (ECDSA_BIT_VERIFY_PASS -> non-zero == verified), then flag done. */ +static void Rtl8735bHuk_EcdsaVeriIrqCb(void* data) +{ + hal_ecdsa_adapter_t* a = (hal_ecdsa_adapter_t*)data; + huk_ecdsaErr = (word32)hal_ecdsa_get_veri_err_sta(a); + huk_veriResult = (word32)hal_ecdsa_get_veri_result(a); + huk_ecdsaDone = 1; +} +#endif + +/* Generate a per-signature nonce k in [1, n-1] from the RNG, as LE words. The + * curve order n comes from the wolfCrypt curve params (dp->order). */ +static int Rtl8735b_GenK(WC_RNG* rng, const ecc_set_type* dp, word32* kW) +{ + byte be[WC_RTL8735B_ECC_BYTES]; +#ifdef WOLFSSL_SMALL_STACK + mp_int* k; + mp_int* n; +#else + mp_int k[1]; /* mp_int can be multi-KB with fastmath; heap on small stack */ + mp_int n[1]; +#endif + int ret; + int i; + int ok = 0; + + if (rng == NULL || dp == NULL) { + return BAD_FUNC_ARG; + } +#ifdef WOLFSSL_SMALL_STACK + k = (mp_int*)XMALLOC(sizeof(mp_int) * 2, NULL, DYNAMIC_TYPE_TMP_BUFFER); + if (k == NULL) { + return MEMORY_E; + } + n = k + 1; +#endif + ret = mp_init_multi(k, n, NULL, NULL, NULL, NULL); + if (ret != 0) { + #ifdef WOLFSSL_SMALL_STACK + XFREE(k, NULL, DYNAMIC_TYPE_TMP_BUFFER); + #endif + return ret; + } + ret = mp_read_radix(n, dp->order, MP_RADIX_HEX); + for (i = 0; ret == 0 && ok == 0 && i < 16; i++) { + ret = wc_RNG_GenerateBlock(rng, be, WC_RTL8735B_ECC_BYTES); + if (ret != 0) { + break; + } + ret = mp_read_unsigned_bin(k, be, WC_RTL8735B_ECC_BYTES); + if (ret != 0) { + break; + } + if (!mp_iszero(k) && mp_cmp(k, n) == MP_LT) { + ok = 1; + } + } + if (ret == 0 && ok == 0) { + ret = RNG_FAILURE_E; + } + if (ret == 0) { + ret = mp_to_unsigned_bin_len(k, be, WC_RTL8735B_ECC_BYTES); + if (ret == 0) { + Rtl8735b_BeToLeWords(be, kW); + } + } + ForceZero(be, sizeof(be)); + mp_forcezero(k); + mp_clear(n); +#ifdef WOLFSSL_SMALL_STACK + XFREE(k, NULL, DYNAMIC_TYPE_TMP_BUFFER); +#endif + return ret; +} + +/* Sign info->pk.eccsign over the HW ECDSA engine (P-256). The private scalar is + * either the unwrapped INPUT scalar (otpPrkSel == 0) or sourced from OTP via + * select_prk (otpPrkSel != 0, scalar never in software). On success the DER + * signature is written to info->pk.eccsign.out/outlen. + * + * Follows the RealTek hal_ecdsa reference flow: set_curve(ECDSA_P256, NULL) uses + * the engine's built-in P-256 constants (no curve table needed); load the + * private key + nonce via hal_ecdsa_signature, then hal_ecdsa_hash marks the + * hash ready, which STARTS the engine; completion arrives via the finish IRQ + * (hal_ecdsa_cb_handler -> Rtl8735bHuk_EcdsaIrqCb). Big integers cross the HAL + * as little-endian 32-bit word arrays. This is the opt-in useHwEngine path; the + * default software sign delegates to wc_ecc_sign_hash. */ +static int Rtl8735bHuk_EccSignHw(struct wc_CryptoInfo* info, + const wc_Rtl8735b_EccKey* hk, const byte* scalar, word32 scalarSz) +{ + ecc_key* key = info->pk.eccsign.key; + hal_ecdsa_adapter_t adapter; +#ifdef WOLFSSL_SMALL_STACK + Rtl8735bEccSignTmp* t; +#else + Rtl8735bEccSignTmp t[1]; +#endif + word32 inlen; + long spin; + int ret; + int useOtp = (hk != NULL && hk->otpPrkSel != 0); + + /* The HW engine here is wired for P-256 (secp256r1). PkSign only routes P-256 + * here; this is a defensive hard error (not CRYPTOCB_UNAVAILABLE, which would + * make the core retry a software sign on the keyless HUK device key). */ + if (key->dp == NULL || key->dp->size != WC_RTL8735B_ECC_BYTES || + key->dp->id != ECC_SECP256R1) { + return BAD_FUNC_ARG; + } +#ifdef WOLFSSL_SMALL_STACK + t = (Rtl8735bEccSignTmp*)XMALLOC(sizeof(*t), NULL, DYNAMIC_TYPE_TMP_BUFFER); + if (t == NULL) { + return MEMORY_E; + } +#endif + XMEMSET(t, 0, sizeof(*t)); + + /* Message hash -> leftmost 256 bits, big-endian, then LE words. */ + inlen = info->pk.eccsign.inlen; + if (inlen >= WC_RTL8735B_ECC_BYTES) { + XMEMCPY(t->be, info->pk.eccsign.in, WC_RTL8735B_ECC_BYTES); + } + else { + XMEMCPY(t->be + (WC_RTL8735B_ECC_BYTES - inlen), info->pk.eccsign.in, + inlen); + } + Rtl8735b_BeToLeWords(t->be, t->hashW); + + ret = Rtl8735b_GenK(info->pk.eccsign.rng, key->dp, t->kW); + if (ret != 0) { + goto done; + } + if (!useOtp) { + if (scalar == NULL || scalarSz == 0 || + scalarSz > WC_RTL8735B_ECC_BYTES) { + ret = BAD_FUNC_ARG; + goto done; + } + XMEMSET(t->be, 0, sizeof(t->be)); + XMEMCPY(t->be + (WC_RTL8735B_ECC_BYTES - scalarSz), scalar, scalarSz); + Rtl8735b_BeToLeWords(t->be, t->scW); + } + + XMEMSET(&adapter, 0, sizeof(adapter)); + ret = wolfSSL_CryptHwMutexLock(); + if (ret != 0) { + goto done; + } + if (hal_ecdsa_init(&adapter) != 0) { + ret = WC_HW_E; + goto unlock; + } + huk_ecdsaDone = 0; + huk_ecdsaErr = 0; + hal_ecdsa_cb_handler(&adapter, + (ecdsa_irq_user_cb_t)Rtl8735bHuk_EcdsaIrqCb, &adapter); + /* P-256: NULL curve table -> the engine uses its built-in constants. */ + hal_ecdsa_set_curve(&adapter, ECDSA_P256, NULL, ECDSA_256_BIT); + hal_ecdsa_set_mode(&adapter, ECDSA_SIGN, ECDSA_NONE); + if (useOtp) { + hal_ecdsa_select_prk(&adapter, (ecdsa_sel_prk_t)hk->otpPrkSel); + } + /* Load priv + nonce, then mark the hash ready -- that starts the engine. + * The HAL word pointers are the vendor u32 type (not necessarily wolfCrypt's + * word32 on this ABI), so cast the 32-bit word arrays at the boundary. */ + hal_ecdsa_signature(&adapter, (u32*)(useOtp ? NULL : t->scW), (u32*)t->kW); + hal_ecdsa_hash(&adapter, (u32*)t->hashW); + + /* Wait for the finish IRQ (the callback reads err + r,s and sets the flag). */ + for (spin = 0; spin < WC_RTL8735B_ECDSA_SPIN && huk_ecdsaDone == 0; spin++) { + /* busy-wait */ + } + hal_ecdsa_deinit(&adapter); + + if (huk_ecdsaDone == 0) { + ret = WC_HW_E; /* engine never signaled completion */ + } + else if (huk_ecdsaErr != 0) { + ret = WC_HW_E; + } + else { + XMEMCPY(t->rW, (const void*)huk_ecdsaR, sizeof(t->rW)); + XMEMCPY(t->sW, (const void*)huk_ecdsaS, sizeof(t->sW)); + ret = 0; + } +unlock: + wolfSSL_CryptHwMutexUnLock(); + if (ret == 0) { + Rtl8735b_LeWordsToBe(t->rW, t->rBe); + Rtl8735b_LeWordsToBe(t->sW, t->sBe); + ret = wc_ecc_rs_raw_to_sig(t->rBe, WC_RTL8735B_ECC_BYTES, + t->sBe, WC_RTL8735B_ECC_BYTES, + info->pk.eccsign.out, info->pk.eccsign.outlen); + } +done: + ForceZero(t, sizeof(*t)); /* scrub nonce + scalar (and the rest) */ +#ifdef WOLFSSL_SMALL_STACK + XFREE(t, NULL, DYNAMIC_TYPE_TMP_BUFFER); +#endif + return ret; +} + +#ifdef HAVE_ECC_VERIFY +/* Verify an ECDSA P-256 signature over the HW engine (general offload; any + * P-256 public key, no HUK context needed). Sets *res = 1 if the signature + * verifies, 0 otherwise. Same engine flow as sign: load inputs (here the public + * key + r + s via hal_ecdsa_verify), then hal_ecdsa_hash marks the hash ready to + * start; completion via the verify finish IRQ (ECDSA_BIT_VERIFY_PASS). */ +static int Rtl8735bHuk_EccVerifyHw(struct wc_CryptoInfo* info) +{ + ecc_key* key = info->pk.eccverify.key; + hal_ecdsa_adapter_t adapter; + hal_ecdsa_veri_input_t vin; +#ifdef WOLFSSL_SMALL_STACK + Rtl8735bEccVeriTmp* t; +#else + Rtl8735bEccVeriTmp t[1]; +#endif + word32 qxLen, qyLen, rLen, sLen, inlen; + long spin; + int ret; + + if (key == NULL || info->pk.eccverify.res == NULL || key->dp == NULL || + key->dp->id != ECC_SECP256R1) { + return CRYPTOCB_UNAVAILABLE; /* non-P256 -> software verify */ + } + *info->pk.eccverify.res = 0; +#ifdef WOLFSSL_SMALL_STACK + t = (Rtl8735bEccVeriTmp*)XMALLOC(sizeof(*t), NULL, DYNAMIC_TYPE_TMP_BUFFER); + if (t == NULL) { + return MEMORY_E; + } +#endif + XMEMSET(t, 0, sizeof(*t)); + + /* public key X,Y -> LE words */ + qxLen = WC_RTL8735B_ECC_BYTES; + qyLen = WC_RTL8735B_ECC_BYTES; + ret = wc_ecc_export_public_raw(key, t->qxBe, &qxLen, t->qyBe, &qyLen); + if (ret != 0 || qxLen != WC_RTL8735B_ECC_BYTES || + qyLen != WC_RTL8735B_ECC_BYTES) { + ret = CRYPTOCB_UNAVAILABLE; /* fall back to software verify */ + goto done; + } + Rtl8735b_BeToLeWords(t->qxBe, t->qxW); + Rtl8735b_BeToLeWords(t->qyBe, t->qyW); + + /* DER signature -> raw r,s -> right-aligned 32 bytes -> LE words. Decode + * into a scratch pair first, then re-pad zero-extended into rBe/sBe. */ + rLen = WC_RTL8735B_ECC_BYTES; + sLen = WC_RTL8735B_ECC_BYTES; + ret = wc_ecc_sig_to_rs(info->pk.eccverify.sig, info->pk.eccverify.siglen, + t->hashBe, &rLen, t->qxBe, &sLen); + if (ret != 0 || rLen > WC_RTL8735B_ECC_BYTES || + sLen > WC_RTL8735B_ECC_BYTES) { + ret = CRYPTOCB_UNAVAILABLE; + goto done; + } + XMEMCPY(t->rBe + (WC_RTL8735B_ECC_BYTES - rLen), t->hashBe, rLen); + Rtl8735b_BeToLeWords(t->rBe, t->rW); + XMEMCPY(t->sBe + (WC_RTL8735B_ECC_BYTES - sLen), t->qxBe, sLen); + Rtl8735b_BeToLeWords(t->sBe, t->sW); + + /* message hash -> leftmost 256 bits, big-endian, then LE words */ + inlen = info->pk.eccverify.hashlen; + if (inlen >= WC_RTL8735B_ECC_BYTES) { + XMEMCPY(t->hashBe, info->pk.eccverify.hash, WC_RTL8735B_ECC_BYTES); + } + else { + XMEMSET(t->hashBe, 0, WC_RTL8735B_ECC_BYTES); + XMEMCPY(t->hashBe + (WC_RTL8735B_ECC_BYTES - inlen), + info->pk.eccverify.hash, inlen); + } + Rtl8735b_BeToLeWords(t->hashBe, t->hashW); + + XMEMSET(&adapter, 0, sizeof(adapter)); + ret = wolfSSL_CryptHwMutexLock(); + if (ret != 0) { + goto done; + } + if (hal_ecdsa_init(&adapter) != 0) { + ret = WC_HW_E; + goto unlock; + } + huk_ecdsaDone = 0; + huk_ecdsaErr = 0; + huk_veriResult = 0; + hal_ecdsa_cb_handler(&adapter, + (ecdsa_irq_user_cb_t)Rtl8735bHuk_EcdsaVeriIrqCb, + &adapter); + hal_ecdsa_set_curve(&adapter, ECDSA_P256, NULL, ECDSA_256_BIT); + hal_ecdsa_set_mode(&adapter, ECDSA_VERI, ECDSA_NONE); + vin.ppub_key_x = (u32*)t->qxW; + vin.ppub_key_y = (u32*)t->qyW; + vin.pr_adr = (u32*)t->rW; + vin.ps_adr = (u32*)t->sW; + hal_ecdsa_verify(&adapter, &vin); /* load pubkey + r + s */ + hal_ecdsa_hash(&adapter, (u32*)t->hashW); /* mark hash ready -> start */ + + for (spin = 0; spin < WC_RTL8735B_ECDSA_SPIN && huk_ecdsaDone == 0; spin++) { + /* busy-wait for the verify finish IRQ */ + } + hal_ecdsa_deinit(&adapter); + + if (huk_ecdsaDone == 0) { + ret = WC_HW_E; /* engine never signaled completion */ + } + else { + /* A completed verify is success (ret 0); *res reflects pass/fail. A bad + * signature is not an error -- it just yields res = 0. */ + *info->pk.eccverify.res = + (huk_ecdsaErr == 0 && huk_veriResult != 0) ? 1 : 0; + ret = 0; + } +unlock: + wolfSSL_CryptHwMutexUnLock(); +done: + XMEMSET(t, 0, sizeof(*t)); +#ifdef WOLFSSL_SMALL_STACK + XFREE(t, NULL, DYNAMIC_TYPE_TMP_BUFFER); +#endif + return ret; +} +#endif /* HAVE_ECC_VERIFY */ + +/* Unwrap the HUK-wrapped private scalar into scalar[] (scalarSz bytes). Derives + * the HUK slot key from the seed and ECB-sk decrypts the blob in place. */ +static int Rtl8735bHuk_UnwrapScalar(const wc_Rtl8735b_EccKey* hk, + byte* scalar, word32 scalarSz) +{ + XALIGNED(32) byte wrapped[WC_RTL8735B_MAX_WRAPPED]; + int ret; + + XMEMCPY(wrapped, hk->wrapped, hk->wrappedLen); + ret = wolfSSL_CryptHwMutexLock(); + if (ret != 0) { + ForceZero(wrapped, sizeof(wrapped)); + return ret; + } + ret = Rtl8735bHuk_DeriveSlotKey(hk->seed); + if (ret == 0 && hal_crypto_aes_ecb_sk_init( + (byte)WC_RTL8735B_DERIVED_WB_IDX, WC_RTL8735B_KEYLEN) != 0) { + ret = WC_HW_E; + } + if (ret == 0 && hal_crypto_aes_ecb_decrypt( + wrapped, hk->wrappedLen, NULL, 0, wrapped) != 0) { + ret = WC_HW_E; + } + wolfSSL_CryptHwMutexUnLock(); + if (ret == 0) { + XMEMCPY(scalar, wrapped, scalarSz); + } + ForceZero(wrapped, sizeof(wrapped)); + return ret; +} + +/* Route an ECDSA sign request to the HUK backend. + * + * The caller attaches a wc_Rtl8735b_EccKey via the standard crypto-callback + * context pointer key->devCtx (see rtl8735b.h). Three modes: + * 1. OTP-resident HW sign (useHwEngine, otpPrkSel != 0): the scalar lives in + * OTP and never enters software; the HW ECDSA engine signs directly. + * 2. Wrapped-scalar HW sign (useHwEngine, otpPrkSel == 0): the scalar is + * ECB-unwrapped under the HUK, then fed to the HW engine. + * 3. Wrapped-scalar software sign (default): unwrap, then sign in software. + * The wrapped blob is device-bound (it only unwraps on the silicon whose HUK + * produced the slot key), so it is not portable. */ +static int Rtl8735bHuk_PkSign(struct wc_CryptoInfo* info) +{ + ecc_key* key = info->pk.eccsign.key; + const wc_Rtl8735b_EccKey* hk; + ecc_key* tmp = NULL; + int ret; + int curveId; + word32 scalarSz; + byte scalar[MAX_ECC_BYTES]; + + if (key == NULL) { + return CRYPTOCB_UNAVAILABLE; + } + + /* General HW ECDSA offload: a plain ecc_key (no HUK devCtx) signed via the + * HW engine with its own private scalar. Only P-256 with a usable private key + * is offloaded; anything else falls back to software. */ + if (key->devCtx == NULL) { + if (key->dp == NULL || key->dp->id != ECC_SECP256R1 || + (key->type != ECC_PRIVATEKEY && + key->type != ECC_PRIVATEKEY_ONLY)) { + return CRYPTOCB_UNAVAILABLE; + } + scalarSz = sizeof(scalar); + ret = wc_ecc_export_private_only(key, scalar, &scalarSz); + if (ret != 0) { + ForceZero(scalar, sizeof(scalar)); + return CRYPTOCB_UNAVAILABLE; /* fall back to software */ + } + ret = Rtl8735bHuk_EccSignHw(info, NULL, scalar, scalarSz); + ForceZero(scalar, sizeof(scalar)); + return ret; + } + hk = (const wc_Rtl8735b_EccKey*)key->devCtx; + + /* Mode 1: OTP-resident scalar -- no seed/wrapped blob needed. */ + if (hk->useHwEngine && hk->otpPrkSel != 0) { + return Rtl8735bHuk_EccSignHw(info, hk, NULL, 0); + } + + /* Modes 2/3 need the seed + wrapped scalar. */ + if (hk->seed == NULL || hk->seedSz != WC_RTL8735B_KEYLEN || + hk->wrapped == NULL) { + return CRYPTOCB_UNAVAILABLE; + } + if (hk->wrappedLen == 0 || + (hk->wrappedLen % WC_AES_BLOCK_SIZE) != 0 || /* ECB needs whole blocks */ + hk->wrappedLen > WC_RTL8735B_MAX_WRAPPED || + hk->plainLen == 0 || + hk->plainLen > hk->wrappedLen || /* scalar fits in the blob */ + hk->plainLen > (word32)sizeof(scalar)) { + return BAD_FUNC_ARG; + } + curveId = (key->dp != NULL) ? key->dp->id : ECC_SECP256R1; + scalarSz = hk->plainLen; + + ret = Rtl8735bHuk_UnwrapScalar(hk, scalar, scalarSz); + if (ret != 0) { + ForceZero(scalar, sizeof(scalar)); + return ret; + } + + /* Mode 2: HW-engine sign with the unwrapped scalar -- only for the curve the + * HW supports (P-256). For other curves fall through to the software sign + * below with the same unwrapped scalar (returning CRYPTOCB_UNAVAILABLE from + * the HW path would make the core retry a software sign on the keyless HUK + * device key and fail confusingly). */ + if (hk->useHwEngine && key->dp != NULL && key->dp->id == ECC_SECP256R1) { + ret = Rtl8735bHuk_EccSignHw(info, hk, scalar, scalarSz); + ForceZero(scalar, sizeof(scalar)); + return ret; + } + + /* Mode 3: software sign. The temporary key is forced to INVALID_DEVID so the + * inner sign does not re-enter this crypto callback. */ + tmp = (ecc_key*)XMALLOC(sizeof(ecc_key), NULL, DYNAMIC_TYPE_ECC); + if (tmp == NULL) { + ForceZero(scalar, sizeof(scalar)); + return MEMORY_E; + } + ret = wc_ecc_init_ex(tmp, NULL, INVALID_DEVID); + if (ret == 0) { + ret = wc_ecc_import_private_key_ex(scalar, scalarSz, NULL, 0, tmp, + curveId); + if (ret == 0) { + ret = wc_ecc_sign_hash(info->pk.eccsign.in, info->pk.eccsign.inlen, + info->pk.eccsign.out, info->pk.eccsign.outlen, + info->pk.eccsign.rng, tmp); + } + wc_ecc_free(tmp); + } + ForceZero(scalar, sizeof(scalar)); + XFREE(tmp, NULL, DYNAMIC_TYPE_ECC); + return ret; +} +#endif /* HAVE_ECC && HAVE_ECC_SIGN */ + +#ifndef WC_NO_RNG +/* Lazily-initialized secure TRNG (a peripheral distinct from the AES/HKDF crypto + * engine). Guarded, like every HW touch in this port, by the crypto mutex. */ +static int huk_trngInit = 0; + +/* Fill a caller buffer with entropy from the secure (self-tested) TRNG. Exposed + * as the crypto-callback SEED source so an app that inits its RNG with + * WC_HUK_DEVID (wc_InitRng_ex(&rng, NULL, WC_HUK_DEVID)) gets HW-seeded entropy + * without wiring CUSTOM_RAND_GENERATE_SEED. */ +static int Rtl8735b_Seed(byte* seed, word32 sz) +{ + int ret; + word32 i; + word32 n; + u32 r; + + if (seed == NULL) { + return BAD_FUNC_ARG; + } + if (sz == 0) { + return 0; + } + ret = wolfSSL_CryptHwMutexLock(); + if (ret != 0) { + return ret; + } + if (!huk_trngInit) { + if (hal_trng_sec_init() != HAL_OK) { + wolfSSL_CryptHwMutexUnLock(); + return WC_HW_E; + } + huk_trngInit = 1; + } + /* Fill from the secure TRNG a 32-bit word at a time (hal_trng_sec_get_rand). */ + for (i = 0; i < sz; ) { + r = hal_trng_sec_get_rand(); + n = (sz - i) < 4u ? (sz - i) : 4u; + XMEMCPY(seed + i, &r, n); + i += n; + } + ret = 0; + wolfSSL_CryptHwMutexUnLock(); + return ret; +} +#endif /* WC_NO_RNG */ + +/* The crypto-callback device entry point (registered by + * wc_Rtl8735b_HukRegister). Returns CRYPTOCB_UNAVAILABLE for anything it does + * not handle so the caller falls back to software. */ +static int Rtl8735b_CryptoDevCb(int devId, struct wc_CryptoInfo* info, + void* ctx) +{ + (void)devId; + (void)ctx; + if (info == NULL) { + return CRYPTOCB_UNAVAILABLE; + } + + switch (info->algo_type) { +#ifndef NO_AES + case WC_ALGO_TYPE_CIPHER: + return Rtl8735bHuk_Cipher(info); +#endif +#if !defined(NO_HMAC) && !defined(NO_SHA256) + case WC_ALGO_TYPE_HMAC: + return Rtl8735bHuk_HmacCb(info); +#endif +#if defined(HAVE_ECC) && defined(HAVE_ECC_SIGN) + case WC_ALGO_TYPE_PK: + if (info->pk.type == WC_PK_TYPE_ECDSA_SIGN) { + return Rtl8735bHuk_PkSign(info); + } + #ifdef HAVE_ECC_VERIFY + if (info->pk.type == WC_PK_TYPE_ECDSA_VERIFY) { + return Rtl8735bHuk_EccVerifyHw(info); + } + #endif + return CRYPTOCB_UNAVAILABLE; +#endif +#ifndef WC_NO_RNG + case WC_ALGO_TYPE_SEED: + return Rtl8735b_Seed(info->seed.seed, info->seed.sz); +#endif + default: + return CRYPTOCB_UNAVAILABLE; + } +} + +/* Register the AmebaPro2 HUK device at devId (e.g. WC_HUK_DEVID). After this, + * objects whose devId is set to it at init route transparently to the HUK + * crypto engine. */ +int wc_Rtl8735b_HukRegister(int devId) +{ + int ret = Rtl8735bHuk_Init(NULL); + if (ret != 0) { + return ret; + } + return wc_CryptoCb_RegisterDevice(devId, Rtl8735b_CryptoDevCb, NULL); +} + +void wc_Rtl8735b_HukUnRegister(int devId) +{ + wc_CryptoCb_UnRegisterDevice(devId); + /* No port-global secret to scrub: each op derives from the object's own seed + * under the crypto mutex; the working key lives only in the HW slot. */ +#ifndef WC_RTL8735B_NO_DERIVE_CACHE + /* Invalidate the derivation cache; the next op re-runs the HKDF ladder. */ + ForceZero(huk_seedCache, sizeof(huk_seedCache)); + huk_haveCache = 0; +#endif +} + +#endif /* WOLF_CRYPTO_CB */ + +#endif /* WOLFSSL_RTL8735B_HUK */ diff --git a/wolfcrypt/src/port/realtek/rtl8735b_shim.h b/wolfcrypt/src/port/realtek/rtl8735b_shim.h new file mode 100644 index 00000000000..611227ebfc1 --- /dev/null +++ b/wolfcrypt/src/port/realtek/rtl8735b_shim.h @@ -0,0 +1,184 @@ +/* rtl8735b_shim.h + * + * Copyright (C) 2006-2026 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + */ + +/* Host compile-test stand-in for the slice of the RealTek AmebaPro2 HAL that + * wolfcrypt/src/port/realtek/rtl8735b.c references. Compiled ONLY under + * WOLFSSL_RTL8735B_HOST_TEST (set by --enable-rtl8735b). It lets the + * crypto-callback dispatch, field access, and compile-time guards be exercised + * on a host without the customer SDK. Every stub returns a success sentinel; it + * performs NO real crypto. On target this header is NOT used -- the real HAL + * headers (hal_crypto.h, hal_hkdf.h) are included instead, supplied via the + * application/board include path. + * + * The prototypes here intentionally mirror the real HAL signatures from + * nuwa_hal_realtek (rtl8735b branch), + * ameba/amebapro2/source/fwlib/rtl8735b/include/. Keep this in sync with the + * HAL calls in rtl8735b.c (add a stub here when the port starts calling a new + * HAL function under host test). + */ + +#ifndef _WOLFPORT_RTL8735B_SHIM_H_ +#define _WOLFPORT_RTL8735B_SHIM_H_ + +#ifdef WOLFSSL_RTL8735B_HOST_TEST + +/* HAL scalar types (the real HAL pulls these from its basic_types header). */ +#ifndef _RTL8735B_TYPES_SHIMMED_ + #define _RTL8735B_TYPES_SHIMMED_ + typedef unsigned char u8; + typedef unsigned int u32; +#endif + +/* hal_status_t / success sentinel. */ +typedef int hal_status_t; +#ifndef HAL_OK + #define HAL_OK 0 +#endif + +/* ---- Engine + AES secure-key ops (hal_crypto.h) ---- */ +static inline int hal_crypto_engine_init(void) { return 0; } +static inline int hal_crypto_aes_gcm_sk_init(u8 key_num, const u32 keylen) + { (void)key_num; (void)keylen; return 0; } +static inline int hal_crypto_aes_gcm_encrypt(const u8* msg, const u32 msglen, + const u8* iv, const u8* aad, const u32 aadlen, u8* pResult, u8* pTag) + { (void)msg; (void)msglen; (void)iv; (void)aad; (void)aadlen; + (void)pResult; (void)pTag; return 0; } +static inline int hal_crypto_aes_gcm_decrypt(const u8* msg, const u32 msglen, + const u8* iv, const u8* aad, const u32 aadlen, u8* pResult, u8* pTag) + { (void)msg; (void)msglen; (void)iv; (void)aad; (void)aadlen; + (void)pResult; (void)pTag; return 0; } +static inline int hal_crypto_aes_ecb_sk_init(u8 key_num, const u32 keylen) + { (void)key_num; (void)keylen; return 0; } +static inline int hal_crypto_aes_ecb_encrypt(const u8* msg, const u32 msglen, + const u8* iv, const u32 ivlen, u8* pResult) + { (void)msg; (void)msglen; (void)iv; (void)ivlen; (void)pResult; return 0; } +static inline int hal_crypto_aes_ecb_decrypt(const u8* msg, const u32 msglen, + const u8* iv, const u32 ivlen, u8* pResult) + { (void)msg; (void)msglen; (void)iv; (void)ivlen; (void)pResult; return 0; } + +/* ---- Secure-key HMAC-SHA256 (hal_crypto.h) ---- */ +#if !defined(NO_HMAC) && !defined(NO_SHA256) +static inline u32 hal_crypto_hmac_sha2_256_get_sk_cfg(const u8 sk_op, + const u8 sk_idx, const u8 wb_op, const u8 wb_idx) + { (void)sk_op; (void)sk_idx; (void)wb_op; (void)wb_idx; return 0; } +static inline int hal_crypto_hmac_sha2_256_sk_init(const u8* key, + const u32 sk_cfg) + { (void)key; (void)sk_cfg; return 0; } +static inline int hal_crypto_hmac_sha2_256_update(const u8* message, + const u32 msglen) + { (void)message; (void)msglen; return 0; } +static inline int hal_crypto_hmac_sha2_256_sk_final(u8* pDigest) + { (void)pDigest; return 0; } +#endif /* !NO_HMAC && !NO_SHA256 */ + +/* ---- HKDF secure key-ladder (hal_hkdf.h) ---- */ +static inline hal_status_t hal_hkdf_hmac_sha256_secure_init(const u8 crypto_sel) + { (void)crypto_sel; return HAL_OK; } +static inline hal_status_t hal_hkdf_extract_secure_all(const u8 sk_idx, + const u8 wb_idx, const u8* msg_buf) + { (void)sk_idx; (void)wb_idx; (void)msg_buf; return HAL_OK; } +static inline hal_status_t hal_hkdf_expand_secure_all(const u8 sk_idx, + const u8 wb_idx, const u8* nonce) + { (void)sk_idx; (void)wb_idx; (void)nonce; return HAL_OK; } + +/* ---- Secure TRNG (hal_trng_sec.h) ---- */ +#ifndef WC_NO_RNG +static inline hal_status_t hal_trng_sec_init(void) { return HAL_OK; } +static inline u32 hal_trng_sec_get_rand(void) { return 0x5A5A5A5Au; } +#endif /* WC_NO_RNG */ + +/* ---- HW ECDSA engine (hal_ecdsa.h) ---- */ +#if defined(HAVE_ECC) && defined(HAVE_ECC_SIGN) +typedef int HAL_Status; +typedef unsigned char ecdsa_curve_t; +typedef unsigned char ecdsa_mode_t; +typedef unsigned char ecdsa_basic_func_t; +typedef unsigned char ecdsa_bit_num_t; +typedef unsigned char ecdsa_sel_prk_t; +#ifndef ECDSA_P256 + #define ECDSA_P256 0x1 +#endif +#ifndef ECDSA_SIGN + #define ECDSA_SIGN 0x1 +#endif +#ifndef ECDSA_VERI + #define ECDSA_VERI 0x0 +#endif +#ifndef ECDSA_NONE + #define ECDSA_NONE 0x5 +#endif +#ifndef ECDSA_256_BIT + #define ECDSA_256_BIT 0x0 +#endif +#ifndef ECDSA_INPUT_PRK + #define ECDSA_INPUT_PRK 0x0 + #define ECDSA_OTP_PRK_1 0x1 + #define ECDSA_OTP_PRK_2 0x2 +#endif +typedef struct hal_ecdsa_adapter_s { int dummy; } hal_ecdsa_adapter_t; +typedef struct hal_ecdsa_curve_table_s { + u32* ppoint_x; + u32* ppoint_y; + u32* pa_adr; + u32* pprime; + u32* porder_n; +} hal_ecdsa_curve_table_t; +typedef void (*ecdsa_irq_user_cb_t)(void*); +static inline HAL_Status hal_ecdsa_init(hal_ecdsa_adapter_t* a) + { (void)a; return 0; } +static inline HAL_Status hal_ecdsa_deinit(hal_ecdsa_adapter_t* a) + { (void)a; return 0; } +static inline u32 hal_ecdsa_get_err_sta(hal_ecdsa_adapter_t* a) + { (void)a; return 0; } +static inline void hal_ecdsa_cb_handler(hal_ecdsa_adapter_t* a, + ecdsa_irq_user_cb_t cb, void* arg) { (void)a; (void)cb; (void)arg; } +static inline void hal_ecdsa_set_curve(hal_ecdsa_adapter_t* a, ecdsa_curve_t c, + hal_ecdsa_curve_table_t* t, ecdsa_bit_num_t b) + { (void)a; (void)c; (void)t; (void)b; } +static inline void hal_ecdsa_set_mode(hal_ecdsa_adapter_t* a, ecdsa_mode_t m, + ecdsa_basic_func_t f) { (void)a; (void)m; (void)f; } +static inline void hal_ecdsa_select_prk(hal_ecdsa_adapter_t* a, + ecdsa_sel_prk_t s) { (void)a; (void)s; } +static inline void hal_ecdsa_signature(hal_ecdsa_adapter_t* a, u32* pk, u32* k) + { (void)a; (void)pk; (void)k; } +static inline void hal_ecdsa_hash(hal_ecdsa_adapter_t* a, u32* h) + { (void)a; (void)h; } +static inline void hal_ecdsa_get_rs(hal_ecdsa_adapter_t* a, u32* r, u32* s) + { (void)a; (void)r; (void)s; } +#ifdef HAVE_ECC_VERIFY +typedef struct hal_ecdsa_veri_input_s { + u32* ppub_key_x; + u32* ppub_key_y; + u32* pr_adr; + u32* ps_adr; +} hal_ecdsa_veri_input_t; +static inline void hal_ecdsa_verify(hal_ecdsa_adapter_t* a, + hal_ecdsa_veri_input_t* in) { (void)a; (void)in; } +static inline u32 hal_ecdsa_get_veri_result(hal_ecdsa_adapter_t* a) + { (void)a; return 0; } +static inline u32 hal_ecdsa_get_veri_err_sta(hal_ecdsa_adapter_t* a) + { (void)a; return 0; } +#endif /* HAVE_ECC_VERIFY */ +#endif /* HAVE_ECC && HAVE_ECC_SIGN */ + +#endif /* WOLFSSL_RTL8735B_HOST_TEST */ + +#endif /* _WOLFPORT_RTL8735B_SHIM_H_ */ diff --git a/wolfssl/wolfcrypt/include.am b/wolfssl/wolfcrypt/include.am index 9635e1a6cfd..d7778f451eb 100644 --- a/wolfssl/wolfcrypt/include.am +++ b/wolfssl/wolfcrypt/include.am @@ -110,6 +110,7 @@ noinst_HEADERS+= \ wolfssl/wolfcrypt/port/silabs/silabs_random.h \ wolfssl/wolfcrypt/port/st/stm32.h \ wolfssl/wolfcrypt/port/st/stsafe.h \ + wolfssl/wolfcrypt/port/realtek/rtl8735b.h \ wolfssl/wolfcrypt/port/tropicsquare/tropic01.h \ wolfssl/wolfcrypt/port/Espressif/esp-sdk-lib.h \ wolfssl/wolfcrypt/port/Espressif/esp32-crypt.h \ diff --git a/wolfssl/wolfcrypt/port/realtek/rtl8735b.h b/wolfssl/wolfcrypt/port/realtek/rtl8735b.h new file mode 100644 index 00000000000..ddd554a9d3d --- /dev/null +++ b/wolfssl/wolfcrypt/port/realtek/rtl8735b.h @@ -0,0 +1,161 @@ +/* rtl8735b.h + * + * Copyright (C) 2006-2026 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + */ + +/* RealTek AmebaPro2 (RTL8735B) HUK (Hardware Unique Key) crypto-callback port. + * + * Binds keys to the silicon HUK via the AmebaPro2 HAL crypto engine: a 256-bit + * "seed" is run through the HAL HKDF key-ladder against the HUK to land a + * device-bound working key in a secure key-storage slot; AES (GCM/ECB/CBC/CTR) + * then runs from that slot without the key ever entering software. ECDSA sign + * binds a HUK-wrapped private scalar. The port is a pure crypto-callback device: + * it adds no wolfSSL core API or struct fields -- AES reads its seed from the + * standard aes->devKey, and ECDSA reads a wc_Rtl8735b_EccKey (below) the caller + * attaches via the standard ecc_key->devCtx. + * + * The HW ECDSA P-256 engine (hal_ecdsa) is also used as a general sign/verify + * offload, independent of the HUK: an ecc_key with devId = WC_HUK_DEVID and no + * devCtx routes wc_ecc_sign_hash through the engine using the key's own scalar, + * and wc_ecc_verify_hash through the engine using the key's own public point + * (no HUK context needed for verify). This lets a standard wolfCrypt benchmark + * exercise the HW engine just by setting WC_USE_DEVID = WC_HUK_DEVID. + */ + +#ifndef _WOLFPORT_RTL8735B_H_ +#define _WOLFPORT_RTL8735B_H_ + +#include + +#ifdef WOLFSSL_RTL8735B_HUK + +/* Transparent HUK crypto flows through the crypto-callback framework. */ +#if !defined(WOLF_CRYPTO_CB) + #error "WOLFSSL_RTL8735B_HUK requires WOLF_CRYPTO_CB (crypto callback dispatch)" +#endif + +/* Crypto-callback device id for transparent HUK crypto. Must not collide with + * the STM32 DHUK device ids (807 SAES, 808 DHUK, 809 DHUK-wrapped) if both ports + * are enabled in one build. Override before include if it collides. */ +#ifndef WC_HUK_DEVID + #define WC_HUK_DEVID 810 +#endif + +/* Secure key-storage slot indices used by the key ladder. These are the + * KEY_STG_IDX_* / KEY_STG_HUK* values from rtl8735b_crypto_ctrl.h (NOT the + * enum hkdf_key_storage_e in rtl8735b_hkdf.h): the HUK source slot is HUK1 + * (== 0xC, KEY_STG_HUK1); HUK2 is 0xD; SK1/SK2 are 0xE/0xF; slots 0..7 are + * general write-back slots. The PRK lands in one general slot, the derived + * working key in another -- the working-key slot is the one AES/HMAC *_sk_init + * references. All overridable from user_settings. */ +#ifndef WC_RTL8735B_HUK_SK_IDX + #define WC_RTL8735B_HUK_SK_IDX 0xC /* KEY_STG_HUK1 */ +#endif +#ifndef WC_RTL8735B_HKDF_PRK_IDX + #define WC_RTL8735B_HKDF_PRK_IDX 3 /* KEY_STG_IDX3 */ +#endif +#ifndef WC_RTL8735B_DERIVED_WB_IDX + #define WC_RTL8735B_DERIVED_WB_IDX 4 /* KEY_STG_IDX4 */ +#endif + +/* crypto_sel for hal_hkdf_hmac_sha256_secure_init: HKDF_CRYPTO_HW_SEL_EN. */ +#ifndef WC_RTL8735B_HKDF_CRYPTO_SEL + #define WC_RTL8735B_HKDF_CRYPTO_SEL 0 +#endif + +/* Secure-key HMAC-SHA256 config selectors (rtl8735b crypto key-storage roles, + * passed to hal_crypto_hmac_sha2_256_get_sk_cfg): source the HMAC key from the + * secure slot (KEY_STG_SKTYPE_LD_SK) and emit the digest to the output buffer + * with no slot write-back (KEY_STG_WBTYPE_WB_ONLY_BUF). The key slot used is + * WC_RTL8735B_DERIVED_WB_IDX (the HUK-derived working key). All overridable. */ +#ifndef WC_RTL8735B_HMAC_SK_OP + #define WC_RTL8735B_HMAC_SK_OP 1 /* KEY_STG_SKTYPE_LD_SK */ +#endif +#ifndef WC_RTL8735B_HMAC_WB_OP + #define WC_RTL8735B_HMAC_WB_OP 0 /* KEY_STG_WBTYPE_WB_ONLY_BUF */ +#endif +#ifndef WC_RTL8735B_HMAC_WB_IDX + #define WC_RTL8735B_HMAC_WB_IDX 0 /* unused when WB_ONLY_BUF */ +#endif + +/* Max wrapped-scalar blob the HUK ECDSA sign path will unwrap (a multiple of 16 + * covering up to P-521: 66 padded to 80, plus headroom). */ +#ifndef WC_RTL8735B_MAX_WRAPPED + #define WC_RTL8735B_MAX_WRAPPED 96 +#endif + +/* Bounded spin used to wait for the HW ECDSA engine (hal_ecdsa) finish status. + * The engine completion model (blocking call vs. poll vs. IRQ) is ROM-resident; + * this poll is the portable default and is overridable per platform. */ +#ifndef WC_RTL8735B_ECDSA_SPIN + #define WC_RTL8735B_ECDSA_SPIN 2000000L +#endif + +/* HUK-bound ECC private key context for the ECDSA sign path. Instead of a new + * wolfSSL core API, the caller attaches one of these to a WC_HUK_DEVID ecc_key + * via the standard crypto-callback context pointer (key->devCtx) before signing: + * + * wc_Rtl8735b_EccKey hk = { seed, 32, wrapped, wrappedLen, plainLen }; + * wc_ecc_init_ex(&key, NULL, WC_HUK_DEVID); + * wc_ecc_set_curve(&key, plainLen, curveId); + * key.devCtx = &hk; + * wc_ecc_sign_hash(...); (unwraps + signs under the HUK) + * + * The pointed-at buffers must stay valid for the key's lifetime (borrowed, not + * copied). seed is the 256-bit HKDF input; wrapped is the private scalar + * AES-wrapped under the HUK-derived key (length a multiple of 16, <= + * WC_RTL8735B_MAX_WRAPPED); plainLen is the real scalar size (e.g. 32 P-256). + * + * By default the sign runs in software after the HUK ECB-unwrap. Set useHwEngine + * to route the sign through the HW ECDSA engine (hal_ecdsa, P-256 only). With + * otpPrkSel != 0 the private scalar is sourced from OTP via hal_ecdsa_select_prk + * and never enters software -- seed/wrapped/plainLen are then unused and may be + * zero/NULL. (The HW-engine INPUT/HUK-wrapped path is validated on silicon; the + * OTP-resident path is implemented but unexercised -- it needs an OTP key.) */ +typedef struct wc_Rtl8735b_EccKey { + const byte* seed; + word32 seedSz; + const byte* wrapped; + word32 wrappedLen; + word32 plainLen; + /* --- HW ECDSA engine extensions (appended; zero keeps legacy behavior) --- */ + byte useHwEngine; /* 1: sign via the HW ECDSA engine (hal_ecdsa) */ + byte otpPrkSel; /* HW private-key source: 0 = unwrapped INPUT scalar, + * 1 = ECDSA_OTP_PRK_1, 2 = ECDSA_OTP_PRK_2 (scalar + * never in software; seed/wrapped may be NULL) */ +} wc_Rtl8735b_EccKey; + +#ifdef __cplusplus + extern "C" { +#endif + +/* Register / unregister the AmebaPro2 HUK device. After registering at + * WC_HUK_DEVID, set an object's devId to it at init (e.g. + * wc_AesInit(&aes, NULL, WC_HUK_DEVID)) to route transparently to the HUK + * crypto engine. Returns 0 on success. */ +WOLFSSL_API int wc_Rtl8735b_HukRegister(int devId); +WOLFSSL_API void wc_Rtl8735b_HukUnRegister(int devId); + +#ifdef __cplusplus + } +#endif + +#endif /* WOLFSSL_RTL8735B_HUK */ + +#endif /* _WOLFPORT_RTL8735B_H_ */ diff --git a/zephyr/CMakeLists.txt b/zephyr/CMakeLists.txt index b4603df6058..b5b8297e292 100644 --- a/zephyr/CMakeLists.txt +++ b/zephyr/CMakeLists.txt @@ -131,6 +131,7 @@ if(CONFIG_WOLFSSL) zephyr_library_sources(${ZEPHYR_CURRENT_MODULE_DIR}/wolfcrypt/src/port/psa/psa_hash.c) zephyr_library_sources(${ZEPHYR_CURRENT_MODULE_DIR}/wolfcrypt/src/port/psa/psa_pkcbs.c) zephyr_library_sources(${ZEPHYR_CURRENT_MODULE_DIR}/wolfcrypt/src/port/st/stm32.c) + zephyr_library_sources(${ZEPHYR_CURRENT_MODULE_DIR}/wolfcrypt/src/port/realtek/rtl8735b.c) if(CONFIG_WOLFCRYPT_ARMASM) # tested with board: "qemu_kvm_arm64"