From 8fc1e4bc6d6d0b8a523de589e1ba7cf6b0e2b5a8 Mon Sep 17 00:00:00 2001 From: Brett Nicholas <7547222+bigbrett@users.noreply.github.com> Date: Wed, 22 Apr 2026 13:36:38 -0600 Subject: [PATCH] added async RNG --- docs/draft/async-crypto.md | 126 +++++++++++++++- src/wh_client_crypto.c | 261 ++++++++++++++++++++------------ test/wh_test_crypto.c | 289 +++++++++++++++++++++++++++++++++++- wolfhsm/wh_client.h | 10 ++ wolfhsm/wh_client_crypto.h | 79 +++++++++- wolfhsm/wh_message_crypto.h | 9 ++ 6 files changed, 676 insertions(+), 98 deletions(-) diff --git a/docs/draft/async-crypto.md b/docs/draft/async-crypto.md index 2262e6b1..0059b4b3 100644 --- a/docs/draft/async-crypto.md +++ b/docs/draft/async-crypto.md @@ -416,6 +416,130 @@ int wh_Client_Sha256Dma(whClientContext* ctx, wc_Sha256* sha, const uint8_t* in, | **`requestSent` flag** | Adds a parameter to the API, but avoids unnecessary round-trips when input is absorbed entirely into the local buffer | | **Snapshot/rollback on send failure** | Small CPU cost to copy the partial buffer, but guarantees SHA state consistency even on transport failures | + + +## RNG: Single-Shot with Caller-Driven Chunking + +The RNG generate operation is the second algorithm to receive the async +treatment. Unlike SHA, RNG is **single-shot** -- there is no intermediate +state to carry, no partial-block buffering, and no multi-call Init/Update/Final +sequence. Each Request asks for N random bytes and the matching Response +delivers them. + +RNG is still interesting because the existing blocking API silently chunks +large requests into multiple round-trips when the caller asks for more bytes +than fit in one comm-buffer message. The async split has to decide where +that chunking logic lives. + +### Chunking Policy + +The async Request/Response pair is **single-shot per call**: one Request +produces one Response. Callers requesting more bytes than fit in a single +inline message must loop themselves. The per-call inline cap is exposed as: + +```c +#define WH_MESSAGE_CRYPTO_RNG_MAX_INLINE_SZ \ + (WOLFHSM_CFG_COMM_DATA_LEN - \ + (uint32_t)sizeof(whMessageCrypto_GenericResponseHeader) - \ + (uint32_t)sizeof(whMessageCrypto_RngResponse)) +``` + +Requests exceeding this cap (or of size zero) are rejected with +`WH_ERROR_BADARGS` before any bytes hit the wire. + +The existing blocking `wh_Client_RngGenerate()` function is retained as a +thin wrapper that chunks internally against the cap, so application code +using the wolfCrypt RNG callback path continues to work without changes: + +```c +int wh_Client_RngGenerate(whClientContext* ctx, uint8_t* out, uint32_t size) +{ + while (remaining > 0) { + uint32_t chunk = min(remaining, WH_MESSAGE_CRYPTO_RNG_MAX_INLINE_SZ); + uint32_t got = chunk; + wh_Client_RngGenerateRequest(ctx, chunk); + do { + ret = wh_Client_RngGenerateResponse(ctx, out, &got); + } while (ret == WH_ERROR_NOTREADY); + out += got; remaining -= got; + } +} +``` + +This keeps the async primitives predictable (each call is bounded by a single +round trip) and pushes the scheduling decision -- "when should I yield +between chunks?" -- up to the async caller, who is the only one with enough +context to answer it. + +### Response Size Negotiation + +The Response function takes an `inout_size` parameter: on entry it is the +capacity of the output buffer; on exit it is the actual number of bytes the +server wrote. This lets the caller distinguish short reads from bugs: + +```c +uint32_t got = requested; +ret = wh_Client_RngGenerateResponse(ctx, out, &got); +/* got may be < requested if the server returned a shorter reply */ +``` + +If the server somehow returns more bytes than the caller's buffer can hold +(should not happen, but defended against), the Response returns +`WH_ERROR_ABORTED` instead of overflowing. + +### DMA Variant + +The DMA variant bypasses the comm buffer entirely for the data payload: the +server writes random bytes directly into the client's output buffer via +translated DMA addresses. The Request/Response split introduces the same +address-stashing pattern used by SHA DMA: + +```c +typedef struct { + uintptr_t outAddr; /* translated DMA address */ + uintptr_t clientAddr; /* original client address (for POST) */ + uint64_t outSz; /* DMA'd size (0 means "nothing to clean up") */ +} whClientDmaAsyncRng; +``` + +Stored in `whClientContext.dma.asyncCtx.rng`, this context carries the +translated address across the Request/Response boundary so the Response can +perform the matching POST cleanup. + +Two points worth calling out: + +- **Fail-fast on occupied transport**: the DMA Request checks + `wh_CommClient_IsRequestPending()` *before* acquiring the DMA mapping. + Without this check, a request that would be rejected by `SendRequest` would + still leave a leaked DMA mapping behind, because the Response (which + normally releases the mapping) would never run. +- **POST runs on every non-NOTREADY exit**: once the Response receives a + reply -- success or otherwise -- it performs the POST cleanup + unconditionally, so the client buffer is safe to read regardless of the + final return code. + +Unlike the non-DMA variant, the DMA variant has no per-call size cap: the +server writes directly to client memory, so a single DMA call can fulfill +arbitrarily large requests. + +### API Reference + +```c +/* Non-DMA */ +int wh_Client_RngGenerateRequest(whClientContext* ctx, uint32_t size); +int wh_Client_RngGenerateResponse(whClientContext* ctx, uint8_t* out, + uint32_t* inout_size); + +/* DMA (requires WOLFHSM_CFG_DMA) */ +int wh_Client_RngGenerateDmaRequest(whClientContext* ctx, uint8_t* out, + uint32_t size); +int wh_Client_RngGenerateDmaResponse(whClientContext* ctx); + +/* Blocking (unchanged; now wraps the async primitives and chunks internally) */ +int wh_Client_RngGenerate(whClientContext* ctx, uint8_t* out, uint32_t size); +int wh_Client_RngGenerateDma(whClientContext* ctx, uint8_t* out, uint32_t size); +``` + ## Roadmap: Remaining Algorithms The async split pattern will be applied algorithm by algorithm to all crypto @@ -430,6 +554,7 @@ the full set of operations and their planned async status. | SHA-224 | Update/Final Request/Response | Shares SHA-256 wire format | | SHA-384 | Update/Final Request/Response | Shares SHA-512 wire format | | SHA-512 | Update/Final Request/Response | Non-DMA and DMA variants | +| RNG Generate | `wh_Client_RngGenerate{Request,Response}` and DMA variants | Single-shot per call; non-DMA callers chunk against `WH_MESSAGE_CRYPTO_RNG_MAX_INLINE_SZ`, DMA has no per-call cap | **Planned:** @@ -450,7 +575,6 @@ the full set of operations and their planned async status. | CMAC | `wh_Client_Cmac{Request,Response}` | Low | Already has partial split pattern | | ML-DSA Sign | `wh_Client_MlDsaSign{Request,Response}` | Low | Post-quantum; single-shot | | ML-DSA Verify | `wh_Client_MlDsaVerify{Request,Response}` | Low | Post-quantum; single-shot | -| RNG Generate | `wh_Client_RngGenerate{Request,Response}` | Medium | Chunking needed for large requests; async callers must handle chunking themselves | Most remaining algorithms are **single-shot** operations (one request, one response) and are straightforward to split compared to SHA's streaming diff --git a/src/wh_client_crypto.c b/src/wh_client_crypto.c index 9799c91d..ef433aa1 100644 --- a/src/wh_client_crypto.c +++ b/src/wh_client_crypto.c @@ -175,155 +175,232 @@ static int _getCryptoResponse(uint8_t* respBuf, uint16_t type, } /** Implementations */ -int wh_Client_RngGenerate(whClientContext* ctx, uint8_t* out, uint32_t size) +int wh_Client_RngGenerateRequest(whClientContext* ctx, uint32_t size) { - int ret = WH_ERROR_OK; - whMessageCrypto_RngRequest* req; - whMessageCrypto_RngResponse* res; - uint8_t* dataPtr; - uint8_t* reqData; + whMessageCrypto_RngRequest* req; + uint8_t* dataPtr; + uint16_t req_len; - if (ctx == NULL) { + if (ctx == NULL || size == 0) { + return WH_ERROR_BADARGS; + } + if (size > WH_MESSAGE_CRYPTO_RNG_MAX_INLINE_SZ) { return WH_ERROR_BADARGS; } - /* Get data buffer */ dataPtr = wh_CommClient_GetDataPtr(ctx->comm); if (dataPtr == NULL) { return WH_ERROR_BADARGS; } - /* Setup generic header and get pointer to request data */ - reqData = - _createCryptoRequest(dataPtr, WC_ALGO_TYPE_RNG, ctx->cryptoAffinity); + req = (whMessageCrypto_RngRequest*)_createCryptoRequest( + dataPtr, WC_ALGO_TYPE_RNG, ctx->cryptoAffinity); + req->sz = size; - /* Setup request header */ - req = (whMessageCrypto_RngRequest*)reqData; + req_len = + (uint16_t)(sizeof(whMessageCrypto_GenericRequestHeader) + sizeof(*req)); - /* Calculate maximum data size client can request (subtract headers) */ - const uint32_t client_max_data = - WOLFHSM_CFG_COMM_DATA_LEN - - sizeof(whMessageCrypto_GenericRequestHeader) - - sizeof(whMessageCrypto_RngRequest); + WH_DEBUG_CLIENT_VERBOSE("RNG req: size=%u\n", (unsigned int)size); - while ((size > 0) && (ret == WH_ERROR_OK)) { - /* Request Message */ - uint16_t group = WH_MESSAGE_GROUP_CRYPTO; - uint16_t action = WC_ALGO_TYPE_RNG; - uint16_t req_len = sizeof(whMessageCrypto_GenericRequestHeader) + - sizeof(whMessageCrypto_RngRequest); - uint16_t res_len; + return wh_Client_SendRequest(ctx, WH_MESSAGE_GROUP_CRYPTO, WC_ALGO_TYPE_RNG, + req_len, dataPtr); +} - /* Request up to client max, but no more than remaining size */ - uint32_t chunk_size = (size < client_max_data) ? size : client_max_data; - req->sz = chunk_size; +int wh_Client_RngGenerateResponse(whClientContext* ctx, uint8_t* out, + uint32_t* inout_size) +{ + int ret; + uint16_t group; + uint16_t action; + uint16_t res_len = 0; + uint8_t* dataPtr; + whMessageCrypto_RngResponse* res = NULL; - WH_DEBUG_CLIENT_VERBOSE("RNG: size:%u reqsz:%u remaining:%u\n", - (unsigned int)chunk_size, (unsigned int)req_len, - (unsigned int)size); - WH_DEBUG_CLIENT_VERBOSE("RNG: req:%p\n", req); + if (ctx == NULL || inout_size == NULL || + (out == NULL && *inout_size != 0)) { + return WH_ERROR_BADARGS; + } - /* Send request and get response */ - ret = wh_Client_SendRequest(ctx, group, action, req_len, dataPtr); - if (ret == 0) { - do { - ret = wh_Client_RecvResponse(ctx, &group, &action, &res_len, - dataPtr); - } while (ret == WH_ERROR_NOTREADY); + dataPtr = wh_CommClient_GetDataPtr(ctx->comm); + if (dataPtr == NULL) { + return WH_ERROR_BADARGS; + } + + ret = wh_Client_RecvResponse(ctx, &group, &action, &res_len, dataPtr); + if (ret != WH_ERROR_OK) { + return ret; + } + + ret = _getCryptoResponse(dataPtr, WC_ALGO_TYPE_RNG, (uint8_t**)&res); + if (ret == WH_ERROR_OK) { + if (res->sz > WH_MESSAGE_CRYPTO_RNG_MAX_INLINE_SZ || + res->sz > *inout_size) { + /* Server returned more than the inline cap or the caller's buffer + * can hold. Guard the inline cap first to avoid reading past the + * comm buffer on a malformed response. */ + ret = WH_ERROR_ABORTED; } - if (ret == WH_ERROR_OK) { - /* Get response */ - ret = - _getCryptoResponse(dataPtr, WC_ALGO_TYPE_RNG, (uint8_t**)&res); - if (ret == WH_ERROR_OK) { - /* Validate server didn't respond with more than requested */ - if (res->sz <= chunk_size) { - uint8_t* res_out = (uint8_t*)(res + 1); - if (out != NULL) { - memcpy(out, res_out, res->sz); - out += res->sz; - } - size -= res->sz; - WH_DEBUG_CLIENT_VERBOSE("out size:%u remaining:%u\n", - (unsigned int)res->sz, (unsigned int)size); - WH_DEBUG_VERBOSE_HEXDUMP("[client] res_out: \n", out - res->sz, - res->sz); - } - else { - /* Server returned more than we can handle - error */ - ret = WH_ERROR_ABORTED; - } + else { + if (res->sz > 0 && out != NULL) { + memcpy(out, (uint8_t*)(res + 1), res->sz); } + *inout_size = res->sz; + WH_DEBUG_CLIENT_VERBOSE("RNG resp: size=%u\n", + (unsigned int)res->sz); } } return ret; } +int wh_Client_RngGenerate(whClientContext* ctx, uint8_t* out, uint32_t size) +{ + int ret = WH_ERROR_OK; + uint32_t remaining; + const uint32_t cap = (uint32_t)WH_MESSAGE_CRYPTO_RNG_MAX_INLINE_SZ; + + if (ctx == NULL || out == NULL) { + return WH_ERROR_BADARGS; + } + + remaining = size; + while (ret == WH_ERROR_OK && remaining > 0) { + uint32_t chunk = (remaining < cap) ? remaining : cap; + uint32_t got = chunk; + + ret = wh_Client_RngGenerateRequest(ctx, chunk); + if (ret != WH_ERROR_OK) { + break; + } + do { + ret = wh_Client_RngGenerateResponse(ctx, out, &got); + } while (ret == WH_ERROR_NOTREADY); + if (ret != WH_ERROR_OK) { + break; + } + if (got == 0) { + /* Server returned nothing for a non-zero request — guard against + * infinite loop. */ + ret = WH_ERROR_ABORTED; + break; + } + out += got; + remaining -= got; + } + return ret; +} + #ifdef WOLFHSM_CFG_DMA -int wh_Client_RngGenerateDma(whClientContext* ctx, uint8_t* out, uint32_t size) +int wh_Client_RngGenerateDmaRequest(whClientContext* ctx, uint8_t* out, + uint32_t size) { - int ret = WH_ERROR_OK; - uint8_t* dataPtr = NULL; - whMessageCrypto_RngDmaRequest* req = NULL; - whMessageCrypto_RngDmaResponse* resp = NULL; - uint16_t respSz = 0; - uintptr_t outAddr = 0; + int ret = WH_ERROR_OK; + uint8_t* dataPtr = NULL; + whMessageCrypto_RngDmaRequest* req = NULL; + uintptr_t outAddr = 0; + bool outAddrAcquired = false; - if ((ctx == NULL) || (out == NULL) || (size == 0)) { + if (ctx == NULL || out == NULL || size == 0) { return WH_ERROR_BADARGS; } + /* Fail-fast on occupied transport to avoid acquiring a DMA mapping that + * would be leaked if SendRequest later rejects the request. */ + if (wh_CommClient_IsRequestPending(ctx->comm) == 1) { + return WH_ERROR_REQUEST_PENDING; + } - /* Get data pointer from the context to use as request/response storage */ dataPtr = (uint8_t*)wh_CommClient_GetDataPtr(ctx->comm); if (dataPtr == NULL) { return WH_ERROR_BADARGS; } - /* Setup generic header and get pointer to request data */ req = (whMessageCrypto_RngDmaRequest*)_createCryptoRequest( dataPtr, WC_ALGO_TYPE_RNG, ctx->cryptoAffinity); - /* Set up output buffer address and size */ - req->output.sz = size; + req->output.sz = size; + req->output.addr = 0; - /* Perform address translation for output buffer (PRE operation) */ + /* PRE address translation for the output buffer */ ret = wh_Client_DmaProcessClientAddress( - ctx, (uintptr_t)out, (void**)&outAddr, req->output.sz, + ctx, (uintptr_t)out, (void**)&outAddr, size, WH_DMA_OPER_CLIENT_WRITE_PRE, (whDmaFlags){0}); if (ret == WH_ERROR_OK) { + outAddrAcquired = true; req->output.addr = outAddr; } if (ret == WH_ERROR_OK) { - /* Send the request to the server */ + /* Stash for POST cleanup in the matching Response */ + ctx->dma.asyncCtx.rng.outAddr = outAddr; + ctx->dma.asyncCtx.rng.clientAddr = (uintptr_t)out; + ctx->dma.asyncCtx.rng.outSz = size; + ret = wh_Client_SendRequest( ctx, WH_MESSAGE_GROUP_CRYPTO_DMA, WC_ALGO_TYPE_RNG, sizeof(whMessageCrypto_GenericRequestHeader) + sizeof(*req), - (uint8_t*)dataPtr); + dataPtr); } - if (ret == WH_ERROR_OK) { - /* Wait for and receive the response */ - do { - ret = wh_Client_RecvResponse(ctx, NULL, NULL, &respSz, - (uint8_t*)dataPtr); - } while (ret == WH_ERROR_NOTREADY); + if (ret != WH_ERROR_OK && outAddrAcquired) { + /* Release the mapping if SendRequest failed; the Response will not run + * and the stash is meaningless. */ + (void)wh_Client_DmaProcessClientAddress( + ctx, (uintptr_t)out, (void**)&outAddr, size, + WH_DMA_OPER_CLIENT_WRITE_POST, (whDmaFlags){0}); + ctx->dma.asyncCtx.rng.outSz = 0; + } + return ret; +} + +int wh_Client_RngGenerateDmaResponse(whClientContext* ctx) +{ + int ret = WH_ERROR_OK; + uint8_t* dataPtr = NULL; + whMessageCrypto_RngDmaResponse* resp = NULL; + uint16_t respSz = 0; + + if (ctx == NULL) { + return WH_ERROR_BADARGS; + } + + dataPtr = (uint8_t*)wh_CommClient_GetDataPtr(ctx->comm); + if (dataPtr == NULL) { + return WH_ERROR_BADARGS; + } + + ret = wh_Client_RecvResponse(ctx, NULL, NULL, &respSz, dataPtr); + if (ret == WH_ERROR_NOTREADY) { + return ret; } if (ret == WH_ERROR_OK) { - /* Get response structure pointer, validates generic header rc */ ret = _getCryptoResponse(dataPtr, WC_ALGO_TYPE_RNG, (uint8_t**)&resp); - /* Nothing more to do on success, as server will have written random - * bytes directly to client memory */ + /* On success, server has written random bytes directly to client + * memory — nothing else to copy. */ } - /* Perform address translation cleanup (POST operation) - * This is called regardless of successful operation to give the callback a - * chance for cleanup */ - (void)wh_Client_DmaProcessClientAddress( - ctx, (uintptr_t)out, (void**)&outAddr, size, - WH_DMA_OPER_CLIENT_WRITE_POST, (whDmaFlags){0}); + /* POST DMA cleanup using stashed addresses (runs on every non-NOTREADY + * exit so the client buffer is safe to read regardless of error). */ + if (ctx->dma.asyncCtx.rng.outSz > 0) { + uintptr_t outAddr = ctx->dma.asyncCtx.rng.outAddr; + (void)wh_Client_DmaProcessClientAddress( + ctx, ctx->dma.asyncCtx.rng.clientAddr, (void**)&outAddr, + ctx->dma.asyncCtx.rng.outSz, WH_DMA_OPER_CLIENT_WRITE_POST, + (whDmaFlags){0}); + ctx->dma.asyncCtx.rng.outSz = 0; + } + return ret; +} + +int wh_Client_RngGenerateDma(whClientContext* ctx, uint8_t* out, uint32_t size) +{ + int ret; + ret = wh_Client_RngGenerateDmaRequest(ctx, out, size); + if (ret == WH_ERROR_OK) { + do { + ret = wh_Client_RngGenerateDmaResponse(ctx); + } while (ret == WH_ERROR_NOTREADY); + } return ret; } #endif /* WOLFHSM_CFG_DMA */ diff --git a/test/wh_test_crypto.c b/test/wh_test_crypto.c index 0dc66b72..4bb0e2c4 100644 --- a/test/wh_test_crypto.c +++ b/test/wh_test_crypto.c @@ -142,6 +142,7 @@ static int whTest_CryptoRng(whClientContext* ctx, int devId, WC_RNG* rng) if (ret != 0) { WH_ERROR_PRINT("Failed to wc_InitRng_ex %d\n", ret); } else { + int freeRet; ret = wc_RNG_GenerateBlock(rng, lil, sizeof(lil)); if (ret != 0) { WH_ERROR_PRINT("Failed to wc_RNG_GenerateBlock %d\n", ret); @@ -154,10 +155,22 @@ static int whTest_CryptoRng(whClientContext* ctx, int devId, WC_RNG* rng) if (ret != 0) { WH_ERROR_PRINT("Failed to wc_RNG_GenerateBlock %d\n", ret); } + else if (memcmp(lil, med, sizeof(lil)) == 0) { + /* The prefixes of two successive independent RNG calls + * must not match. A collision here indicates a stuck RNG */ + WH_ERROR_PRINT("RNG: successive calls produced identical " + "prefix\n"); + ret = -1; + } } - ret = wc_FreeRng(rng); - if (ret != 0) { - WH_ERROR_PRINT("Failed to wc_FreeRng %d\n", ret); + } + /* Always free the RNG if InitRng succeeded, regardless of which (if + * any) GenerateBlock call failed. */ + freeRet = wc_FreeRng(rng); + if (freeRet != 0) { + WH_ERROR_PRINT("Failed to wc_FreeRng %d\n", freeRet); + if (ret == 0) { + ret = freeRet; } } } @@ -167,6 +180,265 @@ static int whTest_CryptoRng(whClientContext* ctx, int devId, WC_RNG* rng) return ret; } +/* Returns 0 if buf appears to contain non-trivial data (not all zero), -1 on + * the all-zero case which would suggest the response was never written. */ +static int whTest_RngBufNonZero(const uint8_t* buf, uint32_t len) +{ + uint32_t i; + for (i = 0; i < len; i++) { + if (buf[i] != 0) { + return 0; + } + } + return -1; +} + +/* Direct exercise of the new async non-DMA RNG primitives. */ +static int whTest_CryptoRngAsync(whClientContext* ctx) +{ + int ret = WH_ERROR_OK; + uint8_t small[64]; + uint8_t big[WOLFHSM_CFG_COMM_DATA_LEN * 2]; + uint32_t got; + + /* Case A: small Request -> poll Response */ + if (ret == 0) { + memset(small, 0, sizeof(small)); + ret = wh_Client_RngGenerateRequest(ctx, sizeof(small)); + if (ret != WH_ERROR_OK) { + WH_ERROR_PRINT("Async RNG: Request(small) failed %d\n", ret); + } + } + if (ret == 0) { + got = sizeof(small); + do { + ret = wh_Client_RngGenerateResponse(ctx, small, &got); + } while (ret == WH_ERROR_NOTREADY); + if (ret != WH_ERROR_OK) { + WH_ERROR_PRINT("Async RNG: Response(small) failed %d\n", ret); + } + else if (got != sizeof(small)) { + WH_ERROR_PRINT("Async RNG: short read got=%u want=%u\n", + (unsigned)got, (unsigned)sizeof(small)); + ret = -1; + } + else if (whTest_RngBufNonZero(small, sizeof(small)) != 0) { + WH_ERROR_PRINT("Async RNG: small buffer all zeros\n"); + ret = -1; + } + } + + /* Case B: max-inline-size Request -> Response in a single round trip */ + if (ret == 0) { + uint32_t cap = (uint32_t)WH_MESSAGE_CRYPTO_RNG_MAX_INLINE_SZ; + memset(big, 0, cap); + ret = wh_Client_RngGenerateRequest(ctx, cap); + if (ret != WH_ERROR_OK) { + WH_ERROR_PRINT("Async RNG: Request(max) failed %d\n", ret); + } + if (ret == 0) { + got = cap; + do { + ret = wh_Client_RngGenerateResponse(ctx, big, &got); + } while (ret == WH_ERROR_NOTREADY); + if (ret == 0 && got != cap) { + WH_ERROR_PRINT("Async RNG: max read short got=%u want=%u\n", + (unsigned)got, (unsigned)cap); + ret = -1; + } + else if (ret == 0 && whTest_RngBufNonZero(big, cap) != 0) { + WH_ERROR_PRINT("Async RNG: max buffer all zeros\n"); + ret = -1; + } + } + } + + /* Case C: caller-driven chunking to fill a buffer larger than the per-call + * inline capacity. */ + if (ret == 0) { + uint32_t cap = (uint32_t)WH_MESSAGE_CRYPTO_RNG_MAX_INLINE_SZ; + uint32_t total = (uint32_t)sizeof(big); + uint32_t consumed = 0; + + memset(big, 0, total); + while (ret == 0 && consumed < total) { + uint32_t want = total - consumed; + if (want > cap) { + want = cap; + } + ret = wh_Client_RngGenerateRequest(ctx, want); + if (ret == 0) { + got = want; + do { + ret = wh_Client_RngGenerateResponse(ctx, big + consumed, + &got); + } while (ret == WH_ERROR_NOTREADY); + } + if (ret == 0) { + if (got == 0 || got > want) { + WH_ERROR_PRINT( + "Async RNG: bad chunk reply got=%u want=%u\n", + (unsigned)got, (unsigned)want); + ret = -1; + } + else { + consumed += got; + } + } + } + if (ret == 0 && whTest_RngBufNonZero(big, total) != 0) { + WH_ERROR_PRINT("Async RNG: chunked buffer all zeros\n"); + ret = -1; + } + } + + /* Case D: oversize request must be rejected without sending. */ + if (ret == 0) { + int rc = wh_Client_RngGenerateRequest( + ctx, (uint32_t)WH_MESSAGE_CRYPTO_RNG_MAX_INLINE_SZ + 1u); + if (rc != WH_ERROR_BADARGS) { + WH_ERROR_PRINT( + "Async RNG: oversize Request returned %d (want BADARGS)\n", rc); + ret = -1; + } + } + + /* Case E: zero-size request must be rejected. */ + if (ret == 0) { + int rc = wh_Client_RngGenerateRequest(ctx, 0); + if (rc != WH_ERROR_BADARGS) { + WH_ERROR_PRINT( + "Async RNG: zero-size Request returned %d (want BADARGS)\n", + rc); + ret = -1; + } + } + + /* Case F: NULL ctx rejection on both halves. */ + if (ret == 0) { + int rc = wh_Client_RngGenerateRequest(NULL, 16); + if (rc != WH_ERROR_BADARGS) { + WH_ERROR_PRINT( + "Async RNG: NULL ctx Request returned %d (want BADARGS)\n", rc); + ret = -1; + } + } + if (ret == 0) { + got = 16; + int rc = wh_Client_RngGenerateResponse(NULL, small, &got); + if (rc != WH_ERROR_BADARGS) { + WH_ERROR_PRINT( + "Async RNG: NULL ctx Response returned %d (want BADARGS)\n", + rc); + ret = -1; + } + } + + /* Case G: NULL inout_size rejection. */ + if (ret == 0) { + int rc = wh_Client_RngGenerateResponse(ctx, small, NULL); + if (rc != WH_ERROR_BADARGS) { + WH_ERROR_PRINT("Async RNG: NULL inout_size Response returned %d " + "(want BADARGS)\n", + rc); + ret = -1; + } + } + + if (ret == 0) { + WH_TEST_PRINT("RNG ASYNC SUCCESS\n"); + } + return ret; +} + +#ifdef WOLFHSM_CFG_DMA +/* Direct exercise of the new async DMA RNG primitives. */ +static int whTest_CryptoRngDmaAsync(whClientContext* ctx) +{ + int ret = WH_ERROR_OK; + /* DMA bypasses the comm buffer so we can request more than COMM_DATA_LEN + * in a single round trip. */ + uint8_t big[WOLFHSM_CFG_COMM_DATA_LEN * 2]; + + /* Case A: basic DMA Request -> Response */ + if (ret == 0) { + memset(big, 0, sizeof(big)); + ret = wh_Client_RngGenerateDmaRequest(ctx, big, sizeof(big)); + if (ret != WH_ERROR_OK) { + WH_ERROR_PRINT("Async RNG DMA: Request failed %d\n", ret); + } + } + if (ret == 0) { + do { + ret = wh_Client_RngGenerateDmaResponse(ctx); + } while (ret == WH_ERROR_NOTREADY); + if (ret != WH_ERROR_OK) { + WH_ERROR_PRINT("Async RNG DMA: Response failed %d\n", ret); + } + else if (whTest_RngBufNonZero(big, sizeof(big)) != 0) { + WH_ERROR_PRINT("Async RNG DMA: buffer all zeros\n"); + ret = -1; + } + } + + /* Case B: small DMA request still works (no chunking semantics). */ + if (ret == 0) { + uint8_t small[32]; + memset(small, 0, sizeof(small)); + ret = wh_Client_RngGenerateDmaRequest(ctx, small, sizeof(small)); + if (ret == 0) { + do { + ret = wh_Client_RngGenerateDmaResponse(ctx); + } while (ret == WH_ERROR_NOTREADY); + } + if (ret == 0 && whTest_RngBufNonZero(small, sizeof(small)) != 0) { + WH_ERROR_PRINT("Async RNG DMA: small buffer all zeros\n"); + ret = -1; + } + } + + /* Case C: input validation. */ + if (ret == 0) { + int rc = wh_Client_RngGenerateDmaRequest(NULL, big, sizeof(big)); + if (rc != WH_ERROR_BADARGS) { + WH_ERROR_PRINT( + "Async RNG DMA: NULL ctx returned %d (want BADARGS)\n", rc); + ret = -1; + } + } + if (ret == 0) { + int rc = wh_Client_RngGenerateDmaRequest(ctx, NULL, sizeof(big)); + if (rc != WH_ERROR_BADARGS) { + WH_ERROR_PRINT( + "Async RNG DMA: NULL out returned %d (want BADARGS)\n", rc); + ret = -1; + } + } + if (ret == 0) { + int rc = wh_Client_RngGenerateDmaRequest(ctx, big, 0); + if (rc != WH_ERROR_BADARGS) { + WH_ERROR_PRINT( + "Async RNG DMA: zero size returned %d (want BADARGS)\n", rc); + ret = -1; + } + } + if (ret == 0) { + int rc = wh_Client_RngGenerateDmaResponse(NULL); + if (rc != WH_ERROR_BADARGS) { + WH_ERROR_PRINT( + "Async RNG DMA: Response NULL ctx returned %d (want BADARGS)\n", + rc); + ret = -1; + } + } + + if (ret == 0) { + WH_TEST_PRINT("RNG DMA ASYNC SUCCESS\n"); + } + return ret; +} +#endif /* WOLFHSM_CFG_DMA */ + #ifndef NO_RSA static int whTest_CryptoRsa(whClientContext* ctx, int devId, WC_RNG* rng) { @@ -7376,6 +7648,17 @@ int whTest_CryptoClientConfig(whClientConfig* config) } } + /* Direct exercise of the async RNG primitives (does not go through the + * wolfCrypt callback path, so devId is not relevant). */ + if (ret == WH_ERROR_OK) { + ret = whTest_CryptoRngAsync(client); + } +#ifdef WOLFHSM_CFG_DMA + if (ret == WH_ERROR_OK) { + ret = whTest_CryptoRngDmaAsync(client); + } +#endif /* WOLFHSM_CFG_DMA */ + /* Now that we have tested all RNG devIds, reinitialize the default RNG * devId (non-DMA) that will be used by the remainder of the tests for * random input generation */ diff --git a/wolfhsm/wh_client.h b/wolfhsm/wh_client.h index fa1c0350..a71a788c 100644 --- a/wolfhsm/wh_client.h +++ b/wolfhsm/wh_client.h @@ -110,11 +110,21 @@ typedef struct { uint64_t ioSz; } whClientDmaAsyncSha; +/* Per-operation async DMA context for RNG: stores the translated output DMA + * address that must survive across the Request/Response boundary for POST + * cleanup of the client's output buffer. */ +typedef struct { + uintptr_t outAddr; + uintptr_t clientAddr; + uint64_t outSz; +} whClientDmaAsyncRng; + /* Async DMA context union. Only one DMA request can be in flight at a time * per client context, so a single union suffices. Each Response function * knows which member to access based on its own operation type. */ typedef union { whClientDmaAsyncSha sha; + whClientDmaAsyncRng rng; } whClientDmaAsyncCtx; typedef struct { diff --git a/wolfhsm/wh_client_crypto.h b/wolfhsm/wh_client_crypto.h index ea0c8156..5c1666a2 100644 --- a/wolfhsm/wh_client_crypto.h +++ b/wolfhsm/wh_client_crypto.h @@ -60,12 +60,54 @@ * requesting the maximum block size of data from the server at a time * * @param[in] ctx Pointer to the client context - * @param[in] out Pointer to the where the bytes are to be placed. May be NULL. - * @param[in] size Number of bytes to generate. * + * @param[out] out Pointer to where the bytes are to be placed. Must not be + * NULL. + * @param[in] size Number of bytes to generate. * @return int Returns 0 on success or a negative error code on failure. */ int wh_Client_RngGenerate(whClientContext* ctx, uint8_t* out, uint32_t size); +/** + * @brief Async request half of a non-DMA RNG generate. + * + * Serializes and sends a request for size bytes of random data. Does NOT wait + * for a reply. Single-shot per call: chunking large requests is the caller's + * responsibility. The blocking wrapper wh_Client_RngGenerate handles chunking + * automatically. + * + * Contract: at most one outstanding async request may be in flight per + * whClientContext. The caller MUST call wh_Client_RngGenerateResponse before + * issuing any other async Request on the same ctx. + * + * @param[in] ctx Client context. + * @param[in] size Number of random bytes to request. Must be > 0 and must not + * exceed WH_MESSAGE_CRYPTO_RNG_MAX_INLINE_SZ. + * @return WH_ERROR_OK on success, WH_ERROR_BADARGS for invalid args or a size + * exceeding the per-call inline capacity, or a negative error from the + * transport. + */ +int wh_Client_RngGenerateRequest(whClientContext* ctx, uint32_t size); + +/** + * @brief Async response half of a non-DMA RNG generate. + * + * Single-shot RecvResponse; returns WH_ERROR_NOTREADY if the server has not + * yet replied. On success, copies up to *inout_size random bytes into out and + * updates *inout_size to the actual number received. + * + * @param[in] ctx Client context. + * @param[out] out Buffer to receive random bytes. May be NULL only if + * *inout_size is 0. + * @param[in,out] inout_size On entry: capacity of out (typically equals the + * size passed to wh_Client_RngGenerateRequest). On + * success: number of bytes actually written to out. + * @return WH_ERROR_OK on success, WH_ERROR_NOTREADY if no reply yet, + * WH_ERROR_ABORTED if the server returned more bytes than the buffer + * can hold, WH_ERROR_BADARGS for invalid args. + */ +int wh_Client_RngGenerateResponse(whClientContext* ctx, uint8_t* out, + uint32_t* inout_size); + #ifdef WOLFHSM_CFG_DMA /** * @brief Generate random bytes using DMA @@ -80,6 +122,39 @@ int wh_Client_RngGenerate(whClientContext* ctx, uint8_t* out, uint32_t size); * @return int Returns 0 on success or a negative error code on failure. */ int wh_Client_RngGenerateDma(whClientContext* ctx, uint8_t* out, uint32_t size); + +/** + * @brief Async request half of a DMA RNG generate. + * + * Performs PRE address translation for the output buffer, sends the DMA + * request, and stashes the translated address for POST cleanup in the + * matching Response. Does NOT wait for a reply. + * + * Contract: at most one outstanding async request may be in flight per + * whClientContext. The caller MUST call wh_Client_RngGenerateDmaResponse + * before issuing any other async Request on the same ctx, and must keep out + * valid until the Response completes. + * + * @param[in] ctx Client context. + * @param[out] out Client buffer that will receive the random bytes via DMA. + * @param[in] size Number of random bytes to generate. Must be > 0. + * @return WH_ERROR_OK on success, WH_ERROR_BADARGS for invalid args, or a + * negative error from the DMA layer or transport. On failure any + * acquired DMA mapping is released before returning. + */ +int wh_Client_RngGenerateDmaRequest(whClientContext* ctx, uint8_t* out, + uint32_t size); + +/** + * @brief Async response half of a DMA RNG generate. + * + * Single-shot RecvResponse; returns WH_ERROR_NOTREADY if the server has not + * yet replied. The random bytes are written by the server directly to the + * client buffer passed to wh_Client_RngGenerateDmaRequest. POST DMA cleanup + * for the output buffer is performed on every non-NOTREADY return so the + * client buffer is safe to read regardless of error. + */ +int wh_Client_RngGenerateDmaResponse(whClientContext* ctx); #endif /* WOLFHSM_CFG_DMA */ #ifdef HAVE_CURVE25519 diff --git a/wolfhsm/wh_message_crypto.h b/wolfhsm/wh_message_crypto.h index 34f783ae..ac341774 100644 --- a/wolfhsm/wh_message_crypto.h +++ b/wolfhsm/wh_message_crypto.h @@ -128,6 +128,15 @@ typedef struct { */ } whMessageCrypto_RngResponse; +/* Maximum number of random bytes that can be returned inline (after the generic + * crypto response header and the RngResponse struct) in a single comm-buffer + * message. Async callers must chunk requests larger than this; the blocking + * wrapper handles chunking automatically. */ +#define WH_MESSAGE_CRYPTO_RNG_MAX_INLINE_SZ \ + (WOLFHSM_CFG_COMM_DATA_LEN - \ + (uint32_t)sizeof(whMessageCrypto_GenericResponseHeader) - \ + (uint32_t)sizeof(whMessageCrypto_RngResponse)) + int wh_MessageCrypto_TranslateRngRequest(uint16_t magic, const whMessageCrypto_RngRequest* src, whMessageCrypto_RngRequest* dest);