From 0780fdacc7240af14278ea8797b5357d86f6787d Mon Sep 17 00:00:00 2001 From: Tianning Li Date: Tue, 24 Feb 2026 15:29:42 -0500 Subject: [PATCH] fix(lifecycle): raise axum body limit to 6 MB for large Lambda payloads axum 0.7+ applies a 2 MB default body limit globally, causing the /lambda/start-invocation endpoint to reject payloads larger than 2 MB with "length limit exceeded". Raises the limit to 6 MB to match Lambda's maximum synchronous invocation payload size. Fixes #1041 --- bottlecap/Cargo.lock | 1 + bottlecap/Cargo.toml | 1 + bottlecap/src/lifecycle/listener.rs | 83 ++++++++++++++++++- local_tests/Dockerfile.LargePayload | 29 +++++++ local_tests/repro-large-payload.sh | 122 ++++++++++++++++++++++++++++ 5 files changed, 235 insertions(+), 1 deletion(-) create mode 100644 local_tests/Dockerfile.LargePayload create mode 100755 local_tests/repro-large-payload.sh diff --git a/bottlecap/Cargo.lock b/bottlecap/Cargo.lock index 298487f95..44dd9cedc 100644 --- a/bottlecap/Cargo.lock +++ b/bottlecap/Cargo.lock @@ -525,6 +525,7 @@ dependencies = [ "tokio", "tokio-util", "tonic-types", + "tower 0.5.3", "tower-http", "tracing", "tracing-core", diff --git a/bottlecap/Cargo.toml b/bottlecap/Cargo.toml index abc4ea306..a67e15294 100644 --- a/bottlecap/Cargo.toml +++ b/bottlecap/Cargo.toml @@ -81,6 +81,7 @@ libddwaf = { version = "1.28.1", git = "https://github.com/DataDog/libddwaf-rust figment = { version = "0.10", default-features = false, features = ["yaml", "env", "test"] } proptest = "1.4" httpmock = "0.7" +tower = { version = "0.5", features = ["util"] } mock_instant = "0.6" serial_test = "3.1" tempfile = "3.20" diff --git a/bottlecap/src/lifecycle/listener.rs b/bottlecap/src/lifecycle/listener.rs index 67031d9d6..afbe78d52 100644 --- a/bottlecap/src/lifecycle/listener.rs +++ b/bottlecap/src/lifecycle/listener.rs @@ -3,7 +3,7 @@ use axum::{ Router, - extract::{Request, State}, + extract::{DefaultBodyLimit, Request, State}, http::{HeaderMap, StatusCode}, response::{IntoResponse, Response}, routing::{get, post}, @@ -37,6 +37,9 @@ const HELLO_PATH: &str = "/lambda/hello"; const START_INVOCATION_PATH: &str = "/lambda/start-invocation"; const END_INVOCATION_PATH: &str = "/lambda/end-invocation"; const AGENT_PORT: usize = 8124; +// Lambda's maximum synchronous invocation payload size +// reference: https://docs.aws.amazon.com/lambda/latest/api/API_Invoke.html +const LAMBDA_INVOCATION_MAX_PAYLOAD: usize = 6 * 1024 * 1024; /// Extracts the AWS Lambda request ID from the LWA proxy header. fn extract_request_id_from_headers(headers: &HashMap) -> Option { @@ -102,6 +105,7 @@ impl Listener { .route(END_INVOCATION_PATH, post(Self::handle_end_invocation)) .route(HELLO_PATH, get(Self::handle_hello)) .with_state(state) + .layer(DefaultBodyLimit::max(LAMBDA_INVOCATION_MAX_PAYLOAD)) } async fn graceful_shutdown(tasks: Arc>>, shutdown_token: CancellationToken) { @@ -270,6 +274,83 @@ impl Listener { #[cfg(test)] mod tests { use super::*; + use axum::{body::Body, http::Request, routing::post}; + use http_body_util::BodyExt; + use tower::ServiceExt; + + /// Builds a minimal router that applies only the body limit layer. + /// The handler reads the full body (via the `Bytes` extractor), which + /// is what triggers `DefaultBodyLimit` enforcement. + fn body_limit_router() -> Router { + async fn handler(body: Bytes) -> StatusCode { + let _ = body; + StatusCode::OK + } + Router::new() + .route("/lambda/start-invocation", post(handler)) + .layer(DefaultBodyLimit::max(LAMBDA_INVOCATION_MAX_PAYLOAD)) + } + + #[tokio::test] + async fn test_body_limit_accepts_payload_just_below_6mb() { + let router = body_limit_router(); + // 6 MB - 1 byte: should be accepted + let payload = vec![b'x'; LAMBDA_INVOCATION_MAX_PAYLOAD - 1]; + let req = Request::builder() + .method("POST") + .uri("/lambda/start-invocation") + .header("Content-Type", "application/json") + .body(Body::from(payload)) + .expect("failed to build request"); + + let response = router.oneshot(req).await.expect("request failed"); + assert_eq!(response.status(), StatusCode::OK); + } + + #[tokio::test] + async fn test_body_limit_accepts_payload_above_old_2mb_default() { + let router = body_limit_router(); + // 3 MB: above the old axum 2 MB default, should now succeed + let payload = vec![b'x'; 3 * 1024 * 1024]; + let req = Request::builder() + .method("POST") + .uri("/lambda/start-invocation") + .header("Content-Type", "application/json") + .body(Body::from(payload)) + .expect("failed to build request"); + + let response = router.oneshot(req).await.expect("request failed"); + assert_eq!(response.status(), StatusCode::OK); + } + + #[tokio::test] + async fn test_body_limit_rejects_payload_above_6mb() { + let router = body_limit_router(); + // 6 MB + 1 byte: should be rejected with 413 + let payload = vec![b'x'; LAMBDA_INVOCATION_MAX_PAYLOAD + 1]; + let req = Request::builder() + .method("POST") + .uri("/lambda/start-invocation") + .header("Content-Type", "application/json") + .body(Body::from(payload)) + .expect("failed to build request"); + + let response = router.oneshot(req).await.expect("request failed"); + assert_eq!(response.status(), StatusCode::PAYLOAD_TOO_LARGE); + + let body = response + .into_body() + .collect() + .await + .expect("failed to read body") + .to_bytes(); + assert!( + body.windows(b"length limit exceeded".len()) + .any(|w| w == b"length limit exceeded"), + "expected 'length limit exceeded' in response body, got: {}", + String::from_utf8_lossy(&body) + ); + } #[test] fn test_extract_request_id_from_header() { diff --git a/local_tests/Dockerfile.LargePayload b/local_tests/Dockerfile.LargePayload new file mode 100644 index 000000000..18e9fbd9e --- /dev/null +++ b/local_tests/Dockerfile.LargePayload @@ -0,0 +1,29 @@ +# No Lambda RIE — runs the extension directly against a minimal mock +# Extensions API server. Uses amazonlinux:2 to match the build environment +# (same glibc / library versions as Dockerfile.build-bottlecap). +FROM --platform=linux/amd64 amazonlinux:2 + +RUN yum install -y curl python3 + +RUN mkdir -p /opt/extensions +COPY datadog-agent /opt/extensions/datadog-agent +RUN chmod +x /opt/extensions/datadog-agent + +COPY mock-extensions-api.py /mock-extensions-api.py +COPY entrypoint.sh /entrypoint.sh +RUN chmod +x /entrypoint.sh + +# Extension configuration +ENV DD_API_KEY=fake-key-for-local-test +ENV DD_APM_DD_URL=http://127.0.0.1:3333 +ENV DD_DD_URL=http://127.0.0.1:3333 +ENV DD_TRACE_ENABLED=false +ENV DD_LOG_LEVEL=DEBUG + +# Point the extension at our mock Lambda Extensions API +ENV AWS_LAMBDA_RUNTIME_API=127.0.0.1:9001 +ENV AWS_LAMBDA_FUNCTION_NAME=large-payload-test +ENV AWS_LAMBDA_FUNCTION_MEMORY_SIZE=512 +ENV AWS_REGION=us-east-1 + +ENTRYPOINT ["/entrypoint.sh"] diff --git a/local_tests/repro-large-payload.sh b/local_tests/repro-large-payload.sh new file mode 100755 index 000000000..7986f2a7c --- /dev/null +++ b/local_tests/repro-large-payload.sh @@ -0,0 +1,122 @@ +#!/bin/bash +# repro-large-payload.sh +# Reproduces GitHub issue #1041: extension errors on Lambda payloads > 2 MB. +# +# Strategy: POST the large payload directly to the extension's +# /lambda/start-invocation endpoint (port 8124), exactly as the DD Java agent +# does in production. The extension binds to 127.0.0.1:8124 (loopback only), +# so we write the payload to a file, docker-cp it into the container, and +# send the request from inside the container via docker exec. +# +# Run from the repo root: +# bash local_tests/repro-large-payload.sh + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +REPO_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" +IMAGE_NAME="dd-extension-large-payload-repro" +CONTAINER_ID="" +LOG_FILE="$SCRIPT_DIR/large-payload-repro.log" +PAYLOAD_FILE=$(mktemp /tmp/large-payload-XXXXXX.json) + +# 3 MB — above the old 2 MB axum default, below the new 6 MB limit. +PAYLOAD_CHARS=3200000 + +cleanup() { + rm -f "$PAYLOAD_FILE" + if [[ -n "$CONTAINER_ID" ]]; then + docker logs "$CONTAINER_ID" > "$LOG_FILE" 2>&1 || true + docker stop "$CONTAINER_ID" > /dev/null 2>&1 || true + docker rm "$CONTAINER_ID" > /dev/null 2>&1 || true + fi + docker rmi "$IMAGE_NAME" > /dev/null 2>&1 || true +} +trap cleanup EXIT INT TERM + +# Always rebuild the Linux x86_64 binary from the current source. +# Mirrors the official AL2 build environment (images/Dockerfile.bottlecap.compile). +echo "==> Building Linux extension binary (~10-20 min first run, cached after)..." +rm -f "$SCRIPT_DIR/datadog-agent" +docker build \ + --platform linux/amd64 \ + -f "$SCRIPT_DIR/Dockerfile.build-bottlecap" \ + -t dd-bottlecap-builder \ + "$REPO_ROOT" +cid=$(docker create dd-bottlecap-builder) +docker cp "$cid:/bottlecap" "$SCRIPT_DIR/datadog-agent" +docker rm "$cid" > /dev/null +docker rmi dd-bottlecap-builder > /dev/null 2>&1 || true +chmod +x "$SCRIPT_DIR/datadog-agent" + +echo "==> Building test image..." +docker build \ + --no-cache \ + --platform linux/amd64 \ + -f "$SCRIPT_DIR/Dockerfile.LargePayload" \ + -t "$IMAGE_NAME" \ + "$SCRIPT_DIR" + +echo "==> Starting container..." +CONTAINER_ID=$(docker run -d --platform linux/amd64 "$IMAGE_NAME") + +echo "==> Waiting for extension to bind port 8124..." +READY=false +for _ in $(seq 1 30); do + if ! docker inspect "$CONTAINER_ID" --format='{{.State.Running}}' 2>/dev/null | grep -q "true"; then + echo "ERROR: Container exited during init. Logs:" + docker logs "$CONTAINER_ID" 2>&1 | tail -30 + exit 1 + fi + if docker exec "$CONTAINER_ID" \ + curl -sf -o /dev/null \ + -X POST "http://localhost:8124/lambda/start-invocation" \ + -H "Content-Type: application/json" \ + -d '{}' --max-time 2 2>/dev/null; then + READY=true + break + fi + sleep 1 +done + +if [[ "$READY" != "true" ]]; then + echo "ERROR: Extension did not become ready after 30s. Logs:" + docker logs "$CONTAINER_ID" 2>&1 + exit 1 +fi + +echo "==> Sending ~3 MB payload to /lambda/start-invocation..." +python3 -c " +import json +payload = {'description': 'Large payload repro for GitHub issue #1041', 'data': 'x' * $PAYLOAD_CHARS} +print(json.dumps(payload)) +" > "$PAYLOAD_FILE" + +PAYLOAD_SIZE=$(wc -c < "$PAYLOAD_FILE") +docker cp "$PAYLOAD_FILE" "$CONTAINER_ID:/tmp/large-payload.json" + +HTTP_CODE=$(docker exec "$CONTAINER_ID" \ + curl -s -o /dev/null -w "%{http_code}" \ + -X POST "http://localhost:8124/lambda/start-invocation" \ + -H "Content-Type: application/json" \ + -H "lambda-runtime-aws-request-id: test-large-payload-request" \ + -H "datadog-meta-lang: java" \ + --data-binary "@/tmp/large-payload.json" \ + --max-time 15) || HTTP_CODE="error" +sleep 1 + +ERRORS=$(docker logs "$CONTAINER_ID" 2>&1 | grep -E "length limit|extract request body" || true) + +echo "" +echo "────────────────────────────────────────────────────────────" +if [[ -n "$ERRORS" ]]; then + echo "RESULT: BUG REPRODUCED (fix not applied or not working)" + echo "" + echo "$ERRORS" +else + echo "RESULT: OK — no 'length limit exceeded' error (fix is working)" + echo " HTTP $HTTP_CODE returned for a ${PAYLOAD_SIZE}-byte payload" +fi +echo "────────────────────────────────────────────────────────────" +echo "" +echo "Full logs saved to: $LOG_FILE"