Skip to content
24 changes: 16 additions & 8 deletions .github/actions/detect-changed-impls/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -84,15 +84,23 @@ runs:
git show "origin/$BASE_REF:$IMPLS_FILE" > /tmp/impls-old.yaml 2>/dev/null || echo "" > /tmp/impls-old.yaml
cp "$IMPLS_FILE" /tmp/impls-new.yaml

# Normalize objects before comparing so YAML comments/formatting
# don't make unchanged entries look modified.
normalize_object() {
local file="$1"
local query="$2"
yq eval -o=json -I=0 "$query" "$file" 2>/dev/null || echo ""
}

# Find changed/new implementations
OLD_IMPLS=$(yq eval '.implementations[].id' /tmp/impls-old.yaml 2>/dev/null | sort || echo "")
NEW_IMPLS=$(yq eval '.implementations[].id' /tmp/impls-new.yaml 2>/dev/null | sort || echo "")

for impl_id in $NEW_IMPLS; do
if echo "$OLD_IMPLS" | grep -q "^${impl_id}$"; then
# Exists in old, check if content changed
OLD_IMPL=$(yq eval ".implementations[] | select(.id == \"$impl_id\")" /tmp/impls-old.yaml 2>/dev/null || echo "")
NEW_IMPL=$(yq eval ".implementations[] | select(.id == \"$impl_id\")" /tmp/impls-new.yaml 2>/dev/null || echo "")
OLD_IMPL=$(normalize_object /tmp/impls-old.yaml ".implementations[] | select(.id == \"$impl_id\")")
NEW_IMPL=$(normalize_object /tmp/impls-new.yaml ".implementations[] | select(.id == \"$impl_id\")")

if [ "$OLD_IMPL" != "$NEW_IMPL" ]; then
CHANGED_IMPLS="${CHANGED_IMPLS:+$CHANGED_IMPLS|}$impl_id"
Expand All @@ -113,8 +121,8 @@ runs:

for relay_id in $NEW_RELAYS; do
if echo "$OLD_RELAYS" | grep -q "^${relay_id}$"; then
OLD_RELAY=$(yq eval ".relays[] | select(.id == \"$relay_id\")" /tmp/impls-old.yaml 2>/dev/null || echo "")
NEW_RELAY=$(yq eval ".relays[] | select(.id == \"$relay_id\")" /tmp/impls-new.yaml 2>/dev/null || echo "")
OLD_RELAY=$(normalize_object /tmp/impls-old.yaml ".relays[] | select(.id == \"$relay_id\")")
NEW_RELAY=$(normalize_object /tmp/impls-new.yaml ".relays[] | select(.id == \"$relay_id\")")
if [ "$OLD_RELAY" != "$NEW_RELAY" ]; then
CHANGED_RELAYS="${CHANGED_RELAYS:+$CHANGED_RELAYS|}$relay_id"
echo " → Changed relay: $relay_id"
Expand All @@ -131,8 +139,8 @@ runs:

for router_id in $NEW_ROUTERS; do
if echo "$OLD_ROUTERS" | grep -q "^${router_id}$"; then
OLD_ROUTER=$(yq eval ".routers[] | select(.id == \"$router_id\")" /tmp/impls-old.yaml 2>/dev/null || echo "")
NEW_ROUTER=$(yq eval ".routers[] | select(.id == \"$router_id\")" /tmp/impls-new.yaml 2>/dev/null || echo "")
OLD_ROUTER=$(normalize_object /tmp/impls-old.yaml ".routers[] | select(.id == \"$router_id\")")
NEW_ROUTER=$(normalize_object /tmp/impls-new.yaml ".routers[] | select(.id == \"$router_id\")")
if [ "$OLD_ROUTER" != "$NEW_ROUTER" ]; then
CHANGED_ROUTERS="${CHANGED_ROUTERS:+$CHANGED_ROUTERS|}$router_id"
echo " → Changed router: $router_id"
Expand All @@ -153,8 +161,8 @@ runs:
for baseline_id in $NEW_BASELINES; do
if echo "$OLD_BASELINES" | grep -q "^${baseline_id}$"; then
# Exists in old, check if content changed
OLD_BASELINE=$(yq eval ".baselines[] | select(.id == \"$baseline_id\")" /tmp/impls-old.yaml 2>/dev/null || echo "")
NEW_BASELINE=$(yq eval ".baselines[] | select(.id == \"$baseline_id\")" /tmp/impls-new.yaml 2>/dev/null || echo "")
OLD_BASELINE=$(normalize_object /tmp/impls-old.yaml ".baselines[] | select(.id == \"$baseline_id\")")
NEW_BASELINE=$(normalize_object /tmp/impls-new.yaml ".baselines[] | select(.id == \"$baseline_id\")")
if [ "$OLD_BASELINE" != "$NEW_BASELINE" ]; then
CHANGED_BASELINES="${CHANGED_BASELINES:+$CHANGED_BASELINES|}$baseline_id"
echo " → Changed baseline: $baseline_id"
Expand Down
1 change: 1 addition & 0 deletions lib/lib-inputs-yaml.sh
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,7 @@ EOF
DOWNLOAD_BYTES: "${DOWNLOAD_BYTES:-}"
DURATION: "${DURATION:-}"
LATENCY_ITERATIONS: "${LATENCY_ITERATIONS:-}"
PERF_TEST_TIMEOUT_SECS: "${PERF_TEST_TIMEOUT_SECS:-}"
EOF
;;
hole-punch)
Expand Down
16 changes: 11 additions & 5 deletions perf/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -73,8 +73,8 @@ Control test behavior with these options:
# Number of iterations for upload/download tests (default: 10)
./run.sh --iterations 10

# Duration per iteration for throughput tests in seconds (default: 20)
./run.sh --duration 20
# Max seconds per test before the harness aborts (default: 300)
./run.sh --timeout 900

# Number of iterations for latency tests (default: 100)
./run.sh --latency-iterations 100
Expand All @@ -83,7 +83,11 @@ Control test behavior with these options:
./run.sh --cache-dir /srv/cache
```

**Note**: The `--iterations` flag controls both upload and download iterations. The framework measures throughput by transferring data over a time period and measuring bytes/second.
**Note**: The `--iterations` flag controls both upload and download iterations. Modern dialers measure throughput by transferring a fixed amount of data (`UPLOAD_BYTES` / `DOWNLOAD_BYTES`) per iteration and reporting Gbps.

**Note**: `--timeout` sets `PERF_TEST_TIMEOUT_SECS` (harness limit for the whole docker-compose test). This is separate from `TEST_TIMEOUT_SECS` inside dialer/listener containers (peer/redis wait limits).

**Legacy**: `--duration` (default 20s) is passed to dialers as `DURATION` but is unused by current implementations; prefer `--upload-bytes`, `--iterations`, and `--timeout`.

## Test Filtering

Expand Down Expand Up @@ -586,8 +590,10 @@ For now, all tests run locally using Docker networking. Multi-machine testing su

**Tests failing with timeout**
- Check container logs: `$TEST_PASS_DIR/logs/<test-name>.log`
- Increase test duration: `--duration 30`
- Enable debug mode: `--debug`
- Each perf test is capped at **300 seconds** by default (`--timeout`). For slow stacks: `./run.sh --timeout 900 --test-select "..." --yes`
- Do not use `--debug` for throughput runs (adds log I/O and often lowers Gbps)
- `--duration` does not extend the harness limit and is unused by modern dialers (they transfer fixed `UPLOAD_BYTES`/`DOWNLOAD_BYTES` per iteration, not “run for N seconds”)
- Python yamux tuning uses `PY_YAMUX_*` env vars (e.g. `PY_YAMUX_DISABLE_HYSTERESIS=1`); they are injected into **python-v0.x** containers only, not Go/Rust

**Cache not working / Matrix regenerates every time**
- Check TEST_RUN_KEY in output (should be consistent for same configuration)
Expand Down
24 changes: 23 additions & 1 deletion perf/images.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,9 @@ test-aliases:
- alias: "none"
value: "!~all"
- alias: "images"
value: "dotnet-v1.0|go-v0.45|js-v3.x|rust-v0.56|lua-v0.1.0"
value: "dotnet-v1.0|go-v0.45|js-v3.x|python-v0.x|rust-v0.56|lua-v0.1.0"
- alias: "python"
value: "python-v0.x"
- alias: "baselines"
value: "https|quic-go|iperf|quinn|tquic|tokio-quiche|tokio-tquic|quiche"
- alias: "default_baselines"
Expand Down Expand Up @@ -152,6 +154,26 @@ implementations:
secureChannels: [noise, tls]
muxers: [yamux]

- id: python-v0.x
source:
type: github
repo: libp2p/py-libp2p
commit: 21be0a249851276039a0ae2f896c9691e3760ad3
dockerfile: interop/perf/Dockerfile
transports: [tcp, ws]
secureChannels: [noise, tls]
muxers: [yamux, mplex]

# Local python config example (same Dockerfile; context = repo root = images/python/0.x/py-libp2p)
#- id: python-v0.x
# source:
# type: local
# path: images/python/0.x/py-libp2p
# dockerfile: interop/perf/Dockerfile
# transports: [tcp, ws]
# secureChannels: [noise, tls]
# muxers: [yamux, mplex]

- id: lua-v0.1.0
source:
type: github
Expand Down
84 changes: 67 additions & 17 deletions perf/lib/run-single-test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -61,9 +61,16 @@ TEST_SLUG=$(echo "${TEST_NAME}" | sed 's/[^a-zA-Z0-9-]/_/g')
LOG_FILE="${TEST_PASS_DIR}/logs/${TEST_SLUG}.log"
> "${LOG_FILE}"

# Use unique compose project/container names per test pass to avoid stale
# docker compose state collisions when rerunning the same test selection.
RUN_KEY=$(compute_test_key "${TEST_PASS_NAME}-${TEST_NAME}")
COMPOSE_PROJECT_NAME="${TEST_SLUG}_${RUN_KEY}"
CONTAINER_PREFIX="${COMPOSE_PROJECT_NAME}"

print_debug "test key: ${TEST_KEY}"
print_debug "test slug: ${TEST_SLUG}"
print_debug "log file: ${LOG_FILE}"
print_debug "compose project: ${COMPOSE_PROJECT_NAME}"

log_message "[$((${TEST_INDEX} + 1))] ${TEST_NAME} (key: ${TEST_KEY})"

Expand All @@ -89,13 +96,55 @@ cleanup() {
}
trap cleanup EXIT

# Build environment variables per container based on legacy status
# Legacy containers get lowercase env vars pointing to the proxy
# Modern containers get uppercase env vars pointing to global Redis
# Build environment variables per container based on legacy status.
# Legacy containers get lowercase env vars pointing to the proxy.
# Modern containers get uppercase env vars pointing to global Redis.

# Python-only yamux tuning: forward host PY_YAMUX_* into python-v0.x containers only.
# Read by py-libp2p yamux; ignored by Go/Rust/JS. DEBUG=true sets PY_YAMUX_DEBUG=1.
LISTENER_PY_ENV=()
if [[ "${LISTENER_ID}" == "python-v0.x" ]]; then
if [ "${DEBUG:-false}" = "true" ]; then
LISTENER_PY_ENV+=("PY_YAMUX_DEBUG=1")
fi
if [ -n "${PY_YAMUX_DISABLE_HYSTERESIS:-}" ]; then
LISTENER_PY_ENV+=("PY_YAMUX_DISABLE_HYSTERESIS=${PY_YAMUX_DISABLE_HYSTERESIS}")
fi
if [ -n "${PY_YAMUX_RELEASE_ON_READ:-}" ]; then
LISTENER_PY_ENV+=("PY_YAMUX_RELEASE_ON_READ=${PY_YAMUX_RELEASE_ON_READ}")
fi
if [ -n "${PY_YAMUX_ASSUME_RTT_MS:-}" ]; then
LISTENER_PY_ENV+=("PY_YAMUX_ASSUME_RTT_MS=${PY_YAMUX_ASSUME_RTT_MS}")
fi
if [ -n "${PY_YAMUX_BATCH_THRESHOLD_DIV:-}" ]; then
LISTENER_PY_ENV+=("PY_YAMUX_BATCH_THRESHOLD_DIV=${PY_YAMUX_BATCH_THRESHOLD_DIV}")
fi
fi

DIALER_PY_ENV=()
if [[ "${DIALER_ID}" == "python-v0.x" ]]; then
if [ "${DEBUG:-false}" = "true" ]; then
DIALER_PY_ENV+=("PY_YAMUX_DEBUG=1")
fi
if [ -n "${PY_YAMUX_DISABLE_HYSTERESIS:-}" ]; then
DIALER_PY_ENV+=("PY_YAMUX_DISABLE_HYSTERESIS=${PY_YAMUX_DISABLE_HYSTERESIS}")
fi
if [ -n "${PY_YAMUX_RELEASE_ON_READ:-}" ]; then
DIALER_PY_ENV+=("PY_YAMUX_RELEASE_ON_READ=${PY_YAMUX_RELEASE_ON_READ}")
fi
if [ -n "${PY_YAMUX_ASSUME_RTT_MS:-}" ]; then
DIALER_PY_ENV+=("PY_YAMUX_ASSUME_RTT_MS=${PY_YAMUX_ASSUME_RTT_MS}")
fi
if [ -n "${PY_YAMUX_BATCH_THRESHOLD_DIV:-}" ]; then
DIALER_PY_ENV+=("PY_YAMUX_BATCH_THRESHOLD_DIV=${PY_YAMUX_BATCH_THRESHOLD_DIV}")
fi
fi

if [ "${LISTENER_LEGACY}" == "true" ]; then
LISTENER_ENV=$(generate_legacy_env_vars "false" "proxy-${TEST_KEY}:6379" "${TRANSPORT_NAME}" "${SECURE}" "${MUXER_NAME}")
else
LISTENER_ENV=$(generate_modern_env_vars "false" "perf-redis:6379" "${TEST_KEY}" "${TRANSPORT_NAME}" "${SECURE}" "${MUXER_NAME}" "${DEBUG:-false}")
LISTENER_ENV=$(generate_modern_env_vars "false" "perf-redis:6379" "${TEST_KEY}" "${TRANSPORT_NAME}" "${SECURE}" "${MUXER_NAME}" "${DEBUG:-false}" \
"${LISTENER_PY_ENV[@]}")
fi

if [ "${DIALER_LEGACY}" == "true" ]; then
Expand All @@ -107,14 +156,15 @@ else
"UPLOAD_ITERATIONS=${upload_iterations}" \
"DOWNLOAD_ITERATIONS=${download_iterations}" \
"LATENCY_ITERATIONS=${latency_iterations}" \
"DURATION=${duration}")
"DURATION=${duration}" \
"${DIALER_PY_ENV[@]}")
fi

# Generate docker-compose file
if [ "${IS_LEGACY_TEST}" == "true" ]; then
# Legacy test: external shared network + Redis proxy service
cat > "${COMPOSE_FILE}" <<EOF
name: ${TEST_SLUG}
name: ${COMPOSE_PROJECT_NAME}

networks:
default:
Expand All @@ -127,7 +177,7 @@ networks:
services:
proxy-${TEST_KEY}:
image: libp2p-redis-proxy
container_name: ${TEST_SLUG}_proxy
container_name: ${CONTAINER_PREFIX}_proxy
networks:
- perf-network
environment:
Expand All @@ -136,7 +186,7 @@ services:

listener:
image: ${LISTENER_IMAGE}
container_name: ${TEST_SLUG}_listener
container_name: ${CONTAINER_PREFIX}_listener
init: true
depends_on:
- proxy-${TEST_KEY}
Expand All @@ -147,7 +197,7 @@ ${LISTENER_ENV}

dialer:
image: ${DIALER_IMAGE}
container_name: ${TEST_SLUG}_dialer
container_name: ${CONTAINER_PREFIX}_dialer
depends_on:
- listener
- proxy-${TEST_KEY}
Expand All @@ -159,7 +209,7 @@ EOF
else
# Modern test: external shared network, no proxy needed
cat > "${COMPOSE_FILE}" <<EOF
name: ${TEST_SLUG}
name: ${COMPOSE_PROJECT_NAME}

networks:
default:
Expand All @@ -172,7 +222,7 @@ networks:
services:
listener:
image: ${LISTENER_IMAGE}
container_name: ${TEST_SLUG}_listener
container_name: ${CONTAINER_PREFIX}_listener
init: true
networks:
- perf-network
Expand All @@ -181,7 +231,7 @@ ${LISTENER_ENV}

dialer:
image: ${DIALER_IMAGE}
container_name: ${TEST_SLUG}_dialer
container_name: ${CONTAINER_PREFIX}_dialer
depends_on:
- listener
networks:
Expand All @@ -195,25 +245,25 @@ fi
log_debug " Starting containers..."
log_message "Running: ${TEST_NAME}"

# Set timeout (300 seconds / 5 minutes)
TEST_TIMEOUT=300
# Per-test harness timeout (default 300s). Set via run.sh --timeout:
# ./run.sh --timeout 900 --test-select "python-v0" --yes

# Track test duration
TEST_START=$(date +%s)

# Start containers and wait for dialer to exit (with timeout)
# WARNING: Do NOT put quotes around this because the command has two parts
if timeout "${TEST_TIMEOUT}" ${DOCKER_COMPOSE_CMD} -f "${COMPOSE_FILE}" up --exit-code-from dialer --abort-on-container-exit >> "${LOG_FILE}" 2>&1; then
if timeout "${PERF_TEST_TIMEOUT_SECS:-300}" ${DOCKER_COMPOSE_CMD} -f "${COMPOSE_FILE}" up --exit-code-from dialer --abort-on-container-exit >> "${LOG_FILE}" 2>&1; then
EXIT_CODE=0
log_message " ✓ Test complete"
else
TEST_EXIT=$?
# Check if it was a timeout (exit code 124)
if [ "${TEST_EXIT}" -eq 124 ]; then
EXIT_CODE=1
log_error " ✗ Test timed out after ${TEST_TIMEOUT}s"
log_error " ✗ Test timed out after ${PERF_TEST_TIMEOUT_SECS:-300}s"
echo "" >> "${LOG_FILE}"
log_error "Test timed out after ${TEST_TIMEOUT} seconds"
log_error "Test timed out after ${PERF_TEST_TIMEOUT_SECS:-300} seconds"
else
EXIT_CODE="${TEST_EXIT}"
log_error " ✗ Test failed (exit code ${TEST_EXIT})"
Expand Down
11 changes: 11 additions & 0 deletions perf/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,9 @@ export DOWNLOAD_BYTES=1073741824 # 1GB default
export ITERATIONS=10
export DURATION_PER_ITERATION=20 # seconds per iteration for throughput tests
export LATENCY_ITERATIONS=100 # iterations for latency test
# Harness kills compose after this many seconds per test (see run-single-test.sh).
# Set via --timeout or PERF_TEST_TIMEOUT_SECS (default 300).
export PERF_TEST_TIMEOUT_SECS="${PERF_TEST_TIMEOUT_SECS:-300}"

# Source common libraries
source "${SCRIPT_LIB_DIR}/lib-github-snapshots.sh"
Expand Down Expand Up @@ -169,6 +172,7 @@ Configuration Options:
--iterations VALUE Number of iterations per test (default: 10)
--duration VALUE Duration per iteration for throughput (default: 20s)
--latency-iterations VALUE Iterations for latency test (default: 100)
--timeout VALUE Max seconds per test before harness abort (default: 300)
--cache-dir VALUE Cache directory (default: /srv/cache)

Execution Options:
Expand Down Expand Up @@ -214,6 +218,9 @@ Examples:
# Exclude specific baseline
${0} --baseline-ignore "https"

# Slow stacks (e.g. python yamux): 15 minutes per test
${0} --timeout 900 --impl-select python --yes

# Traditional usage (still supported)
${0} --upload-bytes 5368709120 --download-bytes 5368709120

Expand Down Expand Up @@ -258,6 +265,7 @@ while [ $# -gt 0 ]; do
--iterations) ITERATIONS="${2}"; shift 2 ;;
--duration) DURATION_PER_ITERATION="${2}"; shift 2 ;;
--latency-iterations) LATENCY_ITERATIONS="${2}"; shift 2 ;;
--timeout) export PERF_TEST_TIMEOUT_SECS="${2}"; shift 2 ;;
--cache-dir) CACHE_DIR="${2}"; shift 2 ;;

# Execution options
Expand Down Expand Up @@ -285,6 +293,8 @@ while [ $# -gt 0 ]; do
esac
done

export PERF_TEST_TIMEOUT_SECS="${PERF_TEST_TIMEOUT_SECS:-300}"

# Re-derive paths from (possibly updated) CACHE_DIR
# --cache-dir may have changed CACHE_DIR after init_common_variables set TEST_RUN_DIR
export TEST_RUN_DIR="${CACHE_DIR}/test-run"
Expand Down Expand Up @@ -482,6 +492,7 @@ print_message "Download Bytes: $(numfmt --to=iec --suffix=B "${DOWNLOAD_BYTES}"
print_message "Iterations: ${ITERATIONS}"
print_message "Duration per Iteration: ${DURATION_PER_ITERATION}s"
print_message "Latency Iterations: ${LATENCY_ITERATIONS}"
print_message "Test Timeout: ${PERF_TEST_TIMEOUT_SECS}s"
print_message "Full Matrix Test: ${FULL_MATRIX_TEST}"
print_message "Create Snapshot: ${CREATE_SNAPSHOT}"
print_message "Export Docker Images: ${EXPORT_DOCKER_IMAGES}"
Expand Down
Loading