From 0780fdacc7240af14278ea8797b5357d86f6787d Mon Sep 17 00:00:00 2001
From: Tianning Li <tianning.li@datadoghq.com>
Date: Tue, 24 Feb 2026 15:29:42 -0500
Subject: [PATCH] fix(lifecycle): raise axum body limit to 6 MB for large
 Lambda payloads

axum 0.7+ applies a 2 MB default body limit globally, causing the
/lambda/start-invocation endpoint to reject payloads larger than 2 MB
with "length limit exceeded". Raises the limit to 6 MB to match
Lambda's maximum synchronous invocation payload size.

Fixes #1041
---
 bottlecap/Cargo.lock                |   1 +
 bottlecap/Cargo.toml                |   1 +
 bottlecap/src/lifecycle/listener.rs |  83 ++++++++++++++++++-
 local_tests/Dockerfile.LargePayload |  29 +++++++
 local_tests/repro-large-payload.sh  | 122 ++++++++++++++++++++++++++++
 5 files changed, 235 insertions(+), 1 deletion(-)
 create mode 100644 local_tests/Dockerfile.LargePayload
 create mode 100755 local_tests/repro-large-payload.sh
diff --git a/bottlecap/Cargo.lock b/bottlecap/Cargo.lock
index 298487f95..44dd9cedc 100644
--- a/bottlecap/Cargo.lock
+++ b/bottlecap/Cargo.lock
@@ -525,6 +525,7 @@ dependencies = [
  "tokio",
  "tokio-util",
  "tonic-types",
+ "tower 0.5.3",
  "tower-http",
  "tracing",
  "tracing-core",
diff --git a/bottlecap/Cargo.toml b/bottlecap/Cargo.toml
index abc4ea306..a67e15294 100644
--- a/bottlecap/Cargo.toml
+++ b/bottlecap/Cargo.toml
@@ -81,6 +81,7 @@ libddwaf = { version = "1.28.1", git = "https://github.com/DataDog/libddwaf-rust
 figment = { version = "0.10", default-features = false, features = ["yaml", "env", "test"] }
 proptest = "1.4"
 httpmock = "0.7"
+tower = { version = "0.5", features = ["util"] }
 mock_instant = "0.6"
 serial_test = "3.1"
 tempfile = "3.20"
diff --git a/bottlecap/src/lifecycle/listener.rs b/bottlecap/src/lifecycle/listener.rs
index 67031d9d6..afbe78d52 100644
--- a/bottlecap/src/lifecycle/listener.rs
+++ b/bottlecap/src/lifecycle/listener.rs
@@ -3,7 +3,7 @@
 
 use axum::{
     Router,
-    extract::{Request, State},
+    extract::{DefaultBodyLimit, Request, State},
     http::{HeaderMap, StatusCode},
     response::{IntoResponse, Response},
     routing::{get, post},
@@ -37,6 +37,9 @@ const HELLO_PATH: &str = "/lambda/hello";
 const START_INVOCATION_PATH: &str = "/lambda/start-invocation";
 const END_INVOCATION_PATH: &str = "/lambda/end-invocation";
 const AGENT_PORT: usize = 8124;
+// Lambda's maximum synchronous invocation payload size
+// reference: https://docs.aws.amazon.com/lambda/latest/api/API_Invoke.html
+const LAMBDA_INVOCATION_MAX_PAYLOAD: usize = 6 * 1024 * 1024;
 
 /// Extracts the AWS Lambda request ID from the LWA proxy header.
 fn extract_request_id_from_headers(headers: &HashMap<String, String>) -> Option<String> {
@@ -102,6 +105,7 @@ impl Listener {
             .route(END_INVOCATION_PATH, post(Self::handle_end_invocation))
             .route(HELLO_PATH, get(Self::handle_hello))
             .with_state(state)
+            .layer(DefaultBodyLimit::max(LAMBDA_INVOCATION_MAX_PAYLOAD))
     }
 
     async fn graceful_shutdown(tasks: Arc<Mutex<JoinSet<()>>>, shutdown_token: CancellationToken) {
@@ -270,6 +274,83 @@ impl Listener {
 #[cfg(test)]
 mod tests {
     use super::*;
+    use axum::{body::Body, http::Request, routing::post};
+    use http_body_util::BodyExt;
+    use tower::ServiceExt;
+
+    /// Builds a minimal router that applies only the body limit layer.
+    /// The handler reads the full body (via the `Bytes` extractor), which
+    /// is what triggers `DefaultBodyLimit` enforcement.
+    fn body_limit_router() -> Router {
+        async fn handler(body: Bytes) -> StatusCode {
+            let _ = body;
+            StatusCode::OK
+        }
+        Router::new()
+            .route("/lambda/start-invocation", post(handler))
+            .layer(DefaultBodyLimit::max(LAMBDA_INVOCATION_MAX_PAYLOAD))
+    }
+
+    #[tokio::test]
+    async fn test_body_limit_accepts_payload_just_below_6mb() {
+        let router = body_limit_router();
+        // 6 MB - 1 byte: should be accepted
+        let payload = vec![b'x'; LAMBDA_INVOCATION_MAX_PAYLOAD - 1];
+        let req = Request::builder()
+            .method("POST")
+            .uri("/lambda/start-invocation")
+            .header("Content-Type", "application/json")
+            .body(Body::from(payload))
+            .expect("failed to build request");
+
+        let response = router.oneshot(req).await.expect("request failed");
+        assert_eq!(response.status(), StatusCode::OK);
+    }
+
+    #[tokio::test]
+    async fn test_body_limit_accepts_payload_above_old_2mb_default() {
+        let router = body_limit_router();
+        // 3 MB: above the old axum 2 MB default, should now succeed
+        let payload = vec![b'x'; 3 * 1024 * 1024];
+        let req = Request::builder()
+            .method("POST")
+            .uri("/lambda/start-invocation")
+            .header("Content-Type", "application/json")
+            .body(Body::from(payload))
+            .expect("failed to build request");
+
+        let response = router.oneshot(req).await.expect("request failed");
+        assert_eq!(response.status(), StatusCode::OK);
+    }
+
+    #[tokio::test]
+    async fn test_body_limit_rejects_payload_above_6mb() {
+        let router = body_limit_router();
+        // 6 MB + 1 byte: should be rejected with 413
+        let payload = vec![b'x'; LAMBDA_INVOCATION_MAX_PAYLOAD + 1];
+        let req = Request::builder()
+            .method("POST")
+            .uri("/lambda/start-invocation")
+            .header("Content-Type", "application/json")
+            .body(Body::from(payload))
+            .expect("failed to build request");
+
+        let response = router.oneshot(req).await.expect("request failed");
+        assert_eq!(response.status(), StatusCode::PAYLOAD_TOO_LARGE);
+
+        let body = response
+            .into_body()
+            .collect()
+            .await
+            .expect("failed to read body")
+            .to_bytes();
+        assert!(
+            body.windows(b"length limit exceeded".len())
+                .any(|w| w == b"length limit exceeded"),
+            "expected 'length limit exceeded' in response body, got: {}",
+            String::from_utf8_lossy(&body)
+        );
+    }
 
     #[test]
     fn test_extract_request_id_from_header() {
diff --git a/local_tests/Dockerfile.LargePayload b/local_tests/Dockerfile.LargePayload
new file mode 100644
index 000000000..18e9fbd9e
--- /dev/null
+++ b/local_tests/Dockerfile.LargePayload
@@ -0,0 +1,29 @@
+# No Lambda RIE — runs the extension directly against a minimal mock
+# Extensions API server. Uses amazonlinux:2 to match the build environment
+# (same glibc / library versions as Dockerfile.build-bottlecap).
+FROM --platform=linux/amd64 amazonlinux:2
+
+RUN yum install -y curl python3
+
+RUN mkdir -p /opt/extensions
+COPY datadog-agent /opt/extensions/datadog-agent
+RUN chmod +x /opt/extensions/datadog-agent
+
+COPY mock-extensions-api.py /mock-extensions-api.py
+COPY entrypoint.sh /entrypoint.sh
+RUN chmod +x /entrypoint.sh
+
+# Extension configuration
+ENV DD_API_KEY=fake-key-for-local-test
+ENV DD_APM_DD_URL=http://127.0.0.1:3333
+ENV DD_DD_URL=http://127.0.0.1:3333
+ENV DD_TRACE_ENABLED=false
+ENV DD_LOG_LEVEL=DEBUG
+
+# Point the extension at our mock Lambda Extensions API
+ENV AWS_LAMBDA_RUNTIME_API=127.0.0.1:9001
+ENV AWS_LAMBDA_FUNCTION_NAME=large-payload-test
+ENV AWS_LAMBDA_FUNCTION_MEMORY_SIZE=512
+ENV AWS_REGION=us-east-1
+
+ENTRYPOINT ["/entrypoint.sh"]
diff --git a/local_tests/repro-large-payload.sh b/local_tests/repro-large-payload.sh
new file mode 100755
index 000000000..7986f2a7c
--- /dev/null
+++ b/local_tests/repro-large-payload.sh
@@ -0,0 +1,122 @@
+#!/bin/bash
+# repro-large-payload.sh
+# Reproduces GitHub issue #1041: extension errors on Lambda payloads > 2 MB.
+#
+# Strategy: POST the large payload directly to the extension's
+# /lambda/start-invocation endpoint (port 8124), exactly as the DD Java agent
+# does in production. The extension binds to 127.0.0.1:8124 (loopback only),
+# so we write the payload to a file, docker-cp it into the container, and
+# send the request from inside the container via docker exec.
+#
+# Run from the repo root:
+#   bash local_tests/repro-large-payload.sh
+
+set -euo pipefail
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+REPO_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
+IMAGE_NAME="dd-extension-large-payload-repro"
+CONTAINER_ID=""
+LOG_FILE="$SCRIPT_DIR/large-payload-repro.log"
+PAYLOAD_FILE=$(mktemp /tmp/large-payload-XXXXXX.json)
+
+# 3 MB — above the old 2 MB axum default, below the new 6 MB limit.
+PAYLOAD_CHARS=3200000
+
+cleanup() {
+    rm -f "$PAYLOAD_FILE"
+    if [[ -n "$CONTAINER_ID" ]]; then
+        docker logs "$CONTAINER_ID" > "$LOG_FILE" 2>&1 || true
+        docker stop "$CONTAINER_ID" > /dev/null 2>&1 || true
+        docker rm   "$CONTAINER_ID" > /dev/null 2>&1 || true
+    fi
+    docker rmi "$IMAGE_NAME" > /dev/null 2>&1 || true
+}
+trap cleanup EXIT INT TERM
+
+# Always rebuild the Linux x86_64 binary from the current source.
+# Mirrors the official AL2 build environment (images/Dockerfile.bottlecap.compile).
+echo "==> Building Linux extension binary (~10-20 min first run, cached after)..."
+rm -f "$SCRIPT_DIR/datadog-agent"
+docker build \
+    --platform linux/amd64 \
+    -f "$SCRIPT_DIR/Dockerfile.build-bottlecap" \
+    -t dd-bottlecap-builder \
+    "$REPO_ROOT"
+cid=$(docker create dd-bottlecap-builder)
+docker cp "$cid:/bottlecap" "$SCRIPT_DIR/datadog-agent"
+docker rm "$cid" > /dev/null
+docker rmi dd-bottlecap-builder > /dev/null 2>&1 || true
+chmod +x "$SCRIPT_DIR/datadog-agent"
+
+echo "==> Building test image..."
+docker build \
+    --no-cache \
+    --platform linux/amd64 \
+    -f "$SCRIPT_DIR/Dockerfile.LargePayload" \
+    -t "$IMAGE_NAME" \
+    "$SCRIPT_DIR"
+
+echo "==> Starting container..."
+CONTAINER_ID=$(docker run -d --platform linux/amd64 "$IMAGE_NAME")
+
+echo "==> Waiting for extension to bind port 8124..."
+READY=false
+for _ in $(seq 1 30); do
+    if ! docker inspect "$CONTAINER_ID" --format='{{.State.Running}}' 2>/dev/null | grep -q "true"; then
+        echo "ERROR: Container exited during init. Logs:"
+        docker logs "$CONTAINER_ID" 2>&1 | tail -30
+        exit 1
+    fi
+    if docker exec "$CONTAINER_ID" \
+        curl -sf -o /dev/null \
+        -X POST "http://localhost:8124/lambda/start-invocation" \
+        -H "Content-Type: application/json" \
+        -d '{}' --max-time 2 2>/dev/null; then
+        READY=true
+        break
+    fi
+    sleep 1
+done
+
+if [[ "$READY" != "true" ]]; then
+    echo "ERROR: Extension did not become ready after 30s. Logs:"
+    docker logs "$CONTAINER_ID" 2>&1
+    exit 1
+fi
+
+echo "==> Sending ~3 MB payload to /lambda/start-invocation..."
+python3 -c "
+import json
+payload = {'description': 'Large payload repro for GitHub issue #1041', 'data': 'x' * $PAYLOAD_CHARS}
+print(json.dumps(payload))
+" > "$PAYLOAD_FILE"
+
+PAYLOAD_SIZE=$(wc -c < "$PAYLOAD_FILE")
+docker cp "$PAYLOAD_FILE" "$CONTAINER_ID:/tmp/large-payload.json"
+
+HTTP_CODE=$(docker exec "$CONTAINER_ID" \
+    curl -s -o /dev/null -w "%{http_code}" \
+    -X POST "http://localhost:8124/lambda/start-invocation" \
+    -H "Content-Type: application/json" \
+    -H "lambda-runtime-aws-request-id: test-large-payload-request" \
+    -H "datadog-meta-lang: java" \
+    --data-binary "@/tmp/large-payload.json" \
+    --max-time 15) || HTTP_CODE="error"
+sleep 1
+
+ERRORS=$(docker logs "$CONTAINER_ID" 2>&1 | grep -E "length limit|extract request body" || true)
+
+echo ""
+echo "────────────────────────────────────────────────────────────"
+if [[ -n "$ERRORS" ]]; then
+    echo "RESULT: BUG REPRODUCED (fix not applied or not working)"
+    echo ""
+    echo "$ERRORS"
+else
+    echo "RESULT: OK — no 'length limit exceeded' error (fix is working)"
+    echo "        HTTP $HTTP_CODE returned for a ${PAYLOAD_SIZE}-byte payload"
+fi
+echo "────────────────────────────────────────────────────────────"
+echo ""
+echo "Full logs saved to: $LOG_FILE"