Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions bottlecap/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions bottlecap/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ libddwaf = { version = "1.28.1", git = "https://github.com/DataDog/libddwaf-rust
figment = { version = "0.10", default-features = false, features = ["yaml", "env", "test"] }
proptest = "1.4"
httpmock = "0.7"
tower = { version = "0.5", features = ["util"] }
mock_instant = "0.6"
serial_test = "3.1"
tempfile = "3.20"
Expand Down
83 changes: 82 additions & 1 deletion bottlecap/src/lifecycle/listener.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

use axum::{
Router,
extract::{Request, State},
extract::{DefaultBodyLimit, Request, State},
http::{HeaderMap, StatusCode},
response::{IntoResponse, Response},
routing::{get, post},
Expand Down Expand Up @@ -37,6 +37,9 @@ const HELLO_PATH: &str = "/lambda/hello";
const START_INVOCATION_PATH: &str = "/lambda/start-invocation";
const END_INVOCATION_PATH: &str = "/lambda/end-invocation";
const AGENT_PORT: usize = 8124;
// Lambda's maximum synchronous invocation payload size
// reference: https://docs.aws.amazon.com/lambda/latest/api/API_Invoke.html
const LAMBDA_INVOCATION_MAX_PAYLOAD: usize = 6 * 1024 * 1024;
Comment on lines +40 to +42
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Wondering how would this look like with streaming lambda payload... do we have guards to not send the payload if it goes over the limit?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Because in streaming the max is 200MB

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I mean good for now, but we might want to leave a comment about streaming


/// Extracts the AWS Lambda request ID from the LWA proxy header.
fn extract_request_id_from_headers(headers: &HashMap<String, String>) -> Option<String> {
Expand Down Expand Up @@ -102,6 +105,7 @@ impl Listener {
.route(END_INVOCATION_PATH, post(Self::handle_end_invocation))
.route(HELLO_PATH, get(Self::handle_hello))
.with_state(state)
.layer(DefaultBodyLimit::max(LAMBDA_INVOCATION_MAX_PAYLOAD))
}

async fn graceful_shutdown(tasks: Arc<Mutex<JoinSet<()>>>, shutdown_token: CancellationToken) {
Expand Down Expand Up @@ -270,6 +274,83 @@ impl Listener {
#[cfg(test)]
mod tests {
use super::*;
use axum::{body::Body, http::Request, routing::post};
use http_body_util::BodyExt;
use tower::ServiceExt;

/// Builds a minimal router that applies only the body limit layer.
/// The handler reads the full body (via the `Bytes` extractor), which
/// is what triggers `DefaultBodyLimit` enforcement.
fn body_limit_router() -> Router {
async fn handler(body: Bytes) -> StatusCode {
let _ = body;
StatusCode::OK
}
Router::new()
.route("/lambda/start-invocation", post(handler))
.layer(DefaultBodyLimit::max(LAMBDA_INVOCATION_MAX_PAYLOAD))
}

#[tokio::test]
async fn test_body_limit_accepts_payload_just_below_6mb() {
let router = body_limit_router();
// 6 MB - 1 byte: should be accepted
let payload = vec![b'x'; LAMBDA_INVOCATION_MAX_PAYLOAD - 1];
let req = Request::builder()
.method("POST")
.uri("/lambda/start-invocation")
.header("Content-Type", "application/json")
.body(Body::from(payload))
.expect("failed to build request");

let response = router.oneshot(req).await.expect("request failed");
assert_eq!(response.status(), StatusCode::OK);
}

#[tokio::test]
async fn test_body_limit_accepts_payload_above_old_2mb_default() {
let router = body_limit_router();
// 3 MB: above the old axum 2 MB default, should now succeed
let payload = vec![b'x'; 3 * 1024 * 1024];
let req = Request::builder()
.method("POST")
.uri("/lambda/start-invocation")
.header("Content-Type", "application/json")
.body(Body::from(payload))
.expect("failed to build request");

let response = router.oneshot(req).await.expect("request failed");
assert_eq!(response.status(), StatusCode::OK);
}

#[tokio::test]
async fn test_body_limit_rejects_payload_above_6mb() {
let router = body_limit_router();
// 6 MB + 1 byte: should be rejected with 413
let payload = vec![b'x'; LAMBDA_INVOCATION_MAX_PAYLOAD + 1];
let req = Request::builder()
.method("POST")
.uri("/lambda/start-invocation")
.header("Content-Type", "application/json")
.body(Body::from(payload))
.expect("failed to build request");

let response = router.oneshot(req).await.expect("request failed");
assert_eq!(response.status(), StatusCode::PAYLOAD_TOO_LARGE);

let body = response
.into_body()
.collect()
.await
.expect("failed to read body")
.to_bytes();
assert!(
body.windows(b"length limit exceeded".len())
.any(|w| w == b"length limit exceeded"),
"expected 'length limit exceeded' in response body, got: {}",
String::from_utf8_lossy(&body)
);
}

#[test]
fn test_extract_request_id_from_header() {
Expand Down
29 changes: 29 additions & 0 deletions local_tests/Dockerfile.LargePayload
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# No Lambda RIE — runs the extension directly against a minimal mock
# Extensions API server. Uses amazonlinux:2 to match the build environment
# (same glibc / library versions as Dockerfile.build-bottlecap).
FROM --platform=linux/amd64 amazonlinux:2

RUN yum install -y curl python3

RUN mkdir -p /opt/extensions
COPY datadog-agent /opt/extensions/datadog-agent
RUN chmod +x /opt/extensions/datadog-agent

COPY mock-extensions-api.py /mock-extensions-api.py
COPY entrypoint.sh /entrypoint.sh
RUN chmod +x /entrypoint.sh

# Extension configuration
ENV DD_API_KEY=fake-key-for-local-test
ENV DD_APM_DD_URL=http://127.0.0.1:3333
ENV DD_DD_URL=http://127.0.0.1:3333
ENV DD_TRACE_ENABLED=false
ENV DD_LOG_LEVEL=DEBUG

# Point the extension at our mock Lambda Extensions API
ENV AWS_LAMBDA_RUNTIME_API=127.0.0.1:9001
ENV AWS_LAMBDA_FUNCTION_NAME=large-payload-test
ENV AWS_LAMBDA_FUNCTION_MEMORY_SIZE=512
ENV AWS_REGION=us-east-1

ENTRYPOINT ["/entrypoint.sh"]
122 changes: 122 additions & 0 deletions local_tests/repro-large-payload.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
#!/bin/bash
# repro-large-payload.sh
# Reproduces GitHub issue #1041: extension errors on Lambda payloads > 2 MB.
#
# Strategy: POST the large payload directly to the extension's
# /lambda/start-invocation endpoint (port 8124), exactly as the DD Java agent
# does in production. The extension binds to 127.0.0.1:8124 (loopback only),
# so we write the payload to a file, docker-cp it into the container, and
# send the request from inside the container via docker exec.
#
# Run from the repo root:
# bash local_tests/repro-large-payload.sh

set -euo pipefail

SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
REPO_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
IMAGE_NAME="dd-extension-large-payload-repro"
CONTAINER_ID=""
LOG_FILE="$SCRIPT_DIR/large-payload-repro.log"
PAYLOAD_FILE=$(mktemp /tmp/large-payload-XXXXXX.json)

# 3 MB — above the old 2 MB axum default, below the new 6 MB limit.
PAYLOAD_CHARS=3200000

cleanup() {
rm -f "$PAYLOAD_FILE"
if [[ -n "$CONTAINER_ID" ]]; then
docker logs "$CONTAINER_ID" > "$LOG_FILE" 2>&1 || true
docker stop "$CONTAINER_ID" > /dev/null 2>&1 || true
docker rm "$CONTAINER_ID" > /dev/null 2>&1 || true
fi
docker rmi "$IMAGE_NAME" > /dev/null 2>&1 || true
}
trap cleanup EXIT INT TERM

# Always rebuild the Linux x86_64 binary from the current source.
# Mirrors the official AL2 build environment (images/Dockerfile.bottlecap.compile).
echo "==> Building Linux extension binary (~10-20 min first run, cached after)..."
rm -f "$SCRIPT_DIR/datadog-agent"
docker build \
--platform linux/amd64 \
-f "$SCRIPT_DIR/Dockerfile.build-bottlecap" \
-t dd-bottlecap-builder \
"$REPO_ROOT"
cid=$(docker create dd-bottlecap-builder)
docker cp "$cid:/bottlecap" "$SCRIPT_DIR/datadog-agent"
docker rm "$cid" > /dev/null
docker rmi dd-bottlecap-builder > /dev/null 2>&1 || true
chmod +x "$SCRIPT_DIR/datadog-agent"

echo "==> Building test image..."
docker build \
--no-cache \
--platform linux/amd64 \
-f "$SCRIPT_DIR/Dockerfile.LargePayload" \
-t "$IMAGE_NAME" \
"$SCRIPT_DIR"

echo "==> Starting container..."
CONTAINER_ID=$(docker run -d --platform linux/amd64 "$IMAGE_NAME")

echo "==> Waiting for extension to bind port 8124..."
READY=false
for _ in $(seq 1 30); do
if ! docker inspect "$CONTAINER_ID" --format='{{.State.Running}}' 2>/dev/null | grep -q "true"; then
echo "ERROR: Container exited during init. Logs:"
docker logs "$CONTAINER_ID" 2>&1 | tail -30
exit 1
fi
if docker exec "$CONTAINER_ID" \
curl -sf -o /dev/null \
-X POST "http://localhost:8124/lambda/start-invocation" \
-H "Content-Type: application/json" \
-d '{}' --max-time 2 2>/dev/null; then
READY=true
break
fi
sleep 1
done

if [[ "$READY" != "true" ]]; then
echo "ERROR: Extension did not become ready after 30s. Logs:"
docker logs "$CONTAINER_ID" 2>&1
exit 1
fi

echo "==> Sending ~3 MB payload to /lambda/start-invocation..."
python3 -c "
import json
payload = {'description': 'Large payload repro for GitHub issue #1041', 'data': 'x' * $PAYLOAD_CHARS}
print(json.dumps(payload))
" > "$PAYLOAD_FILE"

PAYLOAD_SIZE=$(wc -c < "$PAYLOAD_FILE")
docker cp "$PAYLOAD_FILE" "$CONTAINER_ID:/tmp/large-payload.json"

HTTP_CODE=$(docker exec "$CONTAINER_ID" \
curl -s -o /dev/null -w "%{http_code}" \
-X POST "http://localhost:8124/lambda/start-invocation" \
-H "Content-Type: application/json" \
-H "lambda-runtime-aws-request-id: test-large-payload-request" \
-H "datadog-meta-lang: java" \
--data-binary "@/tmp/large-payload.json" \
--max-time 15) || HTTP_CODE="error"
sleep 1

ERRORS=$(docker logs "$CONTAINER_ID" 2>&1 | grep -E "length limit|extract request body" || true)

echo ""
echo "────────────────────────────────────────────────────────────"
if [[ -n "$ERRORS" ]]; then
echo "RESULT: BUG REPRODUCED (fix not applied or not working)"
echo ""
echo "$ERRORS"
else
echo "RESULT: OK — no 'length limit exceeded' error (fix is working)"
echo " HTTP $HTTP_CODE returned for a ${PAYLOAD_SIZE}-byte payload"
fi
echo "────────────────────────────────────────────────────────────"
echo ""
echo "Full logs saved to: $LOG_FILE"
Loading