From e1141bf2a8c4664aa80387686ebf22d1bb461c1d Mon Sep 17 00:00:00 2001 From: cbaugus Date: Mon, 13 Apr 2026 14:18:43 -0500 Subject: [PATCH 1/3] feat: add configurable newConnectionThresholdMs to YAML pool config MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The pool stats tracker uses a latency heuristic to classify requests as new vs reused connections. The default 100ms threshold doesn't fit all environments — fast targets can complete a full TLS handshake under 100ms, making new connections appear reused. Now configurable per-test via YAML: config: pool: newConnectionThresholdMs: 50 The threshold is applied and pool stats are reset on each POST /config. Co-Authored-By: Claude Opus 4.6 (1M context) --- src/config.rs | 23 +++++++++++++++-------- src/connection_pool.rs | 24 +++++++++++------------- src/main.rs | 6 ++++++ src/yaml_config.rs | 7 +++++++ 4 files changed, 39 insertions(+), 21 deletions(-) diff --git a/src/config.rs b/src/config.rs index 5e5d65d..21af537 100644 --- a/src/config.rs +++ b/src/config.rs @@ -101,6 +101,7 @@ pub struct Config { // When Some, these override env-var defaults when building the HTTP client. pub pool_max_idle_per_host: Option, pub pool_idle_timeout_secs: Option, + pub pool_new_connection_threshold_ms: Option, } /// Helper to get a required environment variable. @@ -235,10 +236,11 @@ impl Config { let auto_disable_percentiles_on_warning = env_bool("AUTO_DISABLE_PERCENTILES_ON_WARNING", true); - let (pool_max_idle_per_host, pool_idle_timeout_secs) = match &yaml_config.config.pool { - Some(p) => (p.max_idle_per_host, p.idle_timeout_secs), - None => (None, None), - }; + let (pool_max_idle_per_host, pool_idle_timeout_secs, pool_new_connection_threshold_ms) = + match &yaml_config.config.pool { + Some(p) => (p.max_idle_per_host, p.idle_timeout_secs, p.new_connection_threshold_ms), + None => (None, None, None), + }; let config = Config { target_url, @@ -263,6 +265,7 @@ impl Config { cluster: ClusterConfig::from_env(), pool_max_idle_per_host, pool_idle_timeout_secs, + pool_new_connection_threshold_ms, }; config.validate()?; @@ -330,10 +333,11 @@ impl Config { let auto_disable_percentiles_on_warning = env_bool("AUTO_DISABLE_PERCENTILES_ON_WARNING", true); - let (pool_max_idle_per_host, pool_idle_timeout_secs) = match &yaml_config.config.pool { - Some(p) => (p.max_idle_per_host, p.idle_timeout_secs), - None => (None, None), - }; + let (pool_max_idle_per_host, pool_idle_timeout_secs, pool_new_connection_threshold_ms) = + match &yaml_config.config.pool { + Some(p) => (p.max_idle_per_host, p.idle_timeout_secs, p.new_connection_threshold_ms), + None => (None, None, None), + }; let config = Config { target_url, @@ -358,6 +362,7 @@ impl Config { cluster: ClusterConfig::from_env(), pool_max_idle_per_host, pool_idle_timeout_secs, + pool_new_connection_threshold_ms, }; config.validate()?; @@ -525,6 +530,7 @@ impl Config { cluster: ClusterConfig::from_env(), pool_max_idle_per_host: None, pool_idle_timeout_secs: None, + pool_new_connection_threshold_ms: None, }; config.validate()?; @@ -730,6 +736,7 @@ impl Config { cluster: ClusterConfig::for_testing(), pool_max_idle_per_host: None, pool_idle_timeout_secs: None, + pool_new_connection_threshold_ms: None, } } diff --git a/src/connection_pool.rs b/src/connection_pool.rs index 99c1452..e0574dc 100644 --- a/src/connection_pool.rs +++ b/src/connection_pool.rs @@ -191,7 +191,7 @@ pub struct PoolStatsTracker { /// Threshold for considering a connection "likely new" (milliseconds) /// Requests slower than this are likely establishing new connections - new_connection_threshold_ms: u64, + new_connection_threshold_ms: Arc>, } impl PoolStatsTracker { @@ -203,10 +203,15 @@ impl PoolStatsTracker { pub fn new(new_connection_threshold_ms: u64) -> Self { Self { stats: Arc::new(Mutex::new(ConnectionStats::default())), - new_connection_threshold_ms, + new_connection_threshold_ms: Arc::new(Mutex::new(new_connection_threshold_ms)), } } + /// Update the latency threshold used to classify new vs reused connections. + pub fn set_threshold_ms(&self, threshold_ms: u64) { + *self.new_connection_threshold_ms.lock().unwrap() = threshold_ms; + } + /// Record a request with timing information. /// /// Uses latency to infer connection reuse. Requests with very low latency @@ -214,6 +219,7 @@ impl PoolStatsTracker { /// may have established a new connection (including TLS handshake). pub fn record_request(&self, latency_ms: u64) { let now = Instant::now(); + let threshold = *self.new_connection_threshold_ms.lock().unwrap(); let mut stats = self.stats.lock().unwrap(); stats.total_requests += 1; @@ -228,22 +234,14 @@ impl PoolStatsTracker { // Infer connection type based on latency // Fast requests (= self.new_connection_threshold_ms { + if latency_ms >= threshold { stats.likely_new_connections += 1; CONNECTION_POOL_LIKELY_NEW.inc(); - debug!( - latency_ms = latency_ms, - threshold = self.new_connection_threshold_ms, - "Request latency suggests new connection" - ); + debug!(latency_ms, threshold, "Request latency suggests new connection"); } else { stats.likely_reused_connections += 1; CONNECTION_POOL_LIKELY_REUSED.inc(); - debug!( - latency_ms = latency_ms, - threshold = self.new_connection_threshold_ms, - "Request latency suggests reused connection" - ); + debug!(latency_ms, threshold, "Request latency suggests reused connection"); } // Update reuse rate gauge diff --git a/src/main.rs b/src/main.rs index afab893..cefe593 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1136,6 +1136,12 @@ async fn main() -> Result<(), Box> { h.abort(); } + // Apply pool stats threshold from YAML and reset counters for new test. + if let Some(threshold_ms) = new_cfg.pool_new_connection_threshold_ms { + GLOBAL_POOL_STATS.set_threshold_ms(threshold_ms); + } + GLOBAL_POOL_STATS.reset(); + // Rebuild HTTP client in case TLS/pool config changed. let new_client = match rust_loadtest::client::build_client(&new_cfg.to_client_config()) { diff --git a/src/yaml_config.rs b/src/yaml_config.rs index 1e72c1a..4dca0ad 100644 --- a/src/yaml_config.rs +++ b/src/yaml_config.rs @@ -118,6 +118,13 @@ pub struct YamlPoolConfig { /// Set to 0 to immediately close connections after each request. #[serde(rename = "idleTimeoutSecs")] pub idle_timeout_secs: Option, + + /// Latency threshold in milliseconds for classifying a request as a new + /// connection vs a reused one (default: 100). Requests slower than this + /// are counted as "likely new connection" in Prometheus metrics. + /// Tune this to match your target's typical TLS handshake time. + #[serde(rename = "newConnectionThresholdMs")] + pub new_connection_threshold_ms: Option, } fn default_timeout() -> YamlDuration { From 1071101e09aeac21322a98e8a565a46d310d11bb Mon Sep 17 00:00:00 2001 From: cbaugus Date: Mon, 13 Apr 2026 14:21:22 -0500 Subject: [PATCH 2/3] =?UTF-8?q?style:=20rustfmt=20=E2=80=94=20break=20long?= =?UTF-8?q?=20match=20arm=20and=20debug!=20macro=20lines?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-Authored-By: Claude Opus 4.6 (1M context) --- src/config.rs | 12 ++++++++++-- src/connection_pool.rs | 10 ++++++++-- 2 files changed, 18 insertions(+), 4 deletions(-) diff --git a/src/config.rs b/src/config.rs index 21af537..5855b61 100644 --- a/src/config.rs +++ b/src/config.rs @@ -238,7 +238,11 @@ impl Config { let (pool_max_idle_per_host, pool_idle_timeout_secs, pool_new_connection_threshold_ms) = match &yaml_config.config.pool { - Some(p) => (p.max_idle_per_host, p.idle_timeout_secs, p.new_connection_threshold_ms), + Some(p) => ( + p.max_idle_per_host, + p.idle_timeout_secs, + p.new_connection_threshold_ms, + ), None => (None, None, None), }; @@ -335,7 +339,11 @@ impl Config { let (pool_max_idle_per_host, pool_idle_timeout_secs, pool_new_connection_threshold_ms) = match &yaml_config.config.pool { - Some(p) => (p.max_idle_per_host, p.idle_timeout_secs, p.new_connection_threshold_ms), + Some(p) => ( + p.max_idle_per_host, + p.idle_timeout_secs, + p.new_connection_threshold_ms, + ), None => (None, None, None), }; diff --git a/src/connection_pool.rs b/src/connection_pool.rs index e0574dc..ffd16c0 100644 --- a/src/connection_pool.rs +++ b/src/connection_pool.rs @@ -237,11 +237,17 @@ impl PoolStatsTracker { if latency_ms >= threshold { stats.likely_new_connections += 1; CONNECTION_POOL_LIKELY_NEW.inc(); - debug!(latency_ms, threshold, "Request latency suggests new connection"); + debug!( + latency_ms, + threshold, "Request latency suggests new connection" + ); } else { stats.likely_reused_connections += 1; CONNECTION_POOL_LIKELY_REUSED.inc(); - debug!(latency_ms, threshold, "Request latency suggests reused connection"); + debug!( + latency_ms, + threshold, "Request latency suggests reused connection" + ); } // Update reuse rate gauge From d1399ae3055d675257d07440d91b9f84052ea66f Mon Sep 17 00:00:00 2001 From: cbaugus Date: Mon, 13 Apr 2026 14:32:34 -0500 Subject: [PATCH 3/3] feat: rename newConnectionThresholdMs to metricsReuseThresholdMs, add pool docs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Rename makes it clear the setting only affects the Prometheus metrics heuristic — not actual connection behavior. Added docs/CONNECTION_POOL.md with use cases for new-connection-per-request, default reuse, and long-lived keepalive patterns. Co-Authored-By: Claude Opus 4.6 (1M context) --- docs/CONNECTION_POOL.md | 229 ++++++++++++++++++++++++++++++++++++++++ src/config.rs | 14 +-- src/main.rs | 2 +- src/yaml_config.rs | 12 +-- 4 files changed, 243 insertions(+), 14 deletions(-) create mode 100644 docs/CONNECTION_POOL.md diff --git a/docs/CONNECTION_POOL.md b/docs/CONNECTION_POOL.md new file mode 100644 index 0000000..fe12542 --- /dev/null +++ b/docs/CONNECTION_POOL.md @@ -0,0 +1,229 @@ +# Connection Pool Configuration + +This document explains how `rust_loadtest` manages HTTP connections and how to +configure pooling behavior for different test scenarios. + +## How Connection Pooling Works + +Each load test builds a single `reqwest::Client` that maintains a connection +pool per target host. When a request completes, the underlying TCP connection +(including its TLS session) is returned to the pool. Subsequent requests grab +an existing connection from the pool instead of performing a new TCP handshake +and TLS negotiation. + +This is the **default behavior** — no special configuration is needed to reuse +connections. + +### When connections are reused + +- Workers fire requests continuously (e.g., RPS >= 1) +- Idle connections haven't exceeded the idle timeout +- The pool hasn't reached the max idle limit + +### When new connections are created + +- First request from each worker (no pooled connection exists yet) +- Idle timeout expired — the pooled connection was closed +- `maxIdlePerHost` is set to 0 — pooling is effectively disabled +- The server closed the connection (e.g., server-side idle timeout) + +## Configuration + +Pool settings can be configured via **environment variables** (applied at +startup) or via the **YAML config** (applied per-test on `POST /config`). +YAML values override environment variables when present. + +### Environment Variables + +| Variable | Default | Description | +|--------------------------|---------|--------------------------------------------------| +| `POOL_MAX_IDLE_PER_HOST` | `32` | Maximum idle connections kept per host | +| `POOL_IDLE_TIMEOUT_SECS` | `30` | Seconds an idle connection stays in the pool | +| `TCP_NODELAY` | `true` | Disable Nagle's algorithm for lower latency | +| `REQUEST_TIMEOUT_SECS` | `30` | Per-request timeout | + +### YAML Config + +Add an optional `pool` section under `config`: + +```yaml +config: + baseUrl: https://example.com + pool: + maxIdlePerHost: 32 + idleTimeoutSecs: 30 + metricsReuseThresholdMs: 100 +``` + +| Field | Default | Description | +|--------------------------|---------|--------------------------------------------------| +| `maxIdlePerHost` | `32` | Max idle connections per host. Set to `0` to disable pooling. | +| `idleTimeoutSecs` | `30` | Seconds before idle connections are closed. Set to `0` to close immediately. | +| `metricsReuseThresholdMs`| `100` | Latency threshold (ms) for the Prometheus metrics heuristic. Does **not** affect actual connection behavior — only how metrics classify requests as "new" vs "reused". | + +## Use Case: Force New Connection Per Request + +Use this when you need every request to perform a full TCP + TLS handshake. +Useful for testing: + +- TLS handshake latency and overhead +- Server-side connection establishment handling under load +- Certificate validation performance +- Load balancer connection distribution + +```yaml +version: "1.0" +config: + baseUrl: https://api.example.com + workers: 10 + duration: 5m + timeout: 30s + pool: + maxIdlePerHost: 0 + idleTimeoutSecs: 0 +load: + model: rps + target: 100 +scenarios: + - name: new-connection-test + weight: 100 + steps: + - name: request + request: + method: GET + path: /health + assertions: + - type: statusCode + expected: 200 +``` + +With environment variables: + +```bash +POOL_MAX_IDLE_PER_HOST=0 POOL_IDLE_TIMEOUT_SECS=0 +``` + +## Use Case: Reuse Connections (Default) + +Use this for standard load testing where you want realistic connection behavior. +Connections are established once and reused across requests, which is how most +production clients behave. + +```yaml +version: "1.0" +config: + baseUrl: https://api.example.com + workers: 25 + duration: 10m + timeout: 30s + # No pool section needed — defaults reuse connections +load: + model: rps + target: 1000 +scenarios: + - name: reuse-connection-test + weight: 100 + steps: + - name: request + request: + method: GET + path: /health + assertions: + - type: statusCode + expected: 200 +``` + +## Use Case: Long-Lived Connection Reuse with Infrequent Requests + +Use this when requests are spaced far apart (e.g., every 5 minutes) but you +want to keep the same TCP/TLS session alive between them. Increase the idle +timeout to prevent the pool from closing connections during gaps. + +```yaml +version: "1.0" +config: + baseUrl: https://api.example.com + workers: 1 + duration: 1h + timeout: 30s + pool: + maxIdlePerHost: 1 + idleTimeoutSecs: 600 +load: + model: rps + target: 1 +scenarios: + - name: keepalive-test + weight: 100 + steps: + - name: request + request: + method: POST + path: /oauth2/v1/token + body: "grant_type=client_credentials&client_id=my_id&client_secret=my_secret" + headers: + Content-Type: application/x-www-form-urlencoded + assertions: + - type: statusCode + expected: 200 + thinkTime: + min: 4m + max: 5m +standby: + workers: 1 + rps: 1.0 +``` + +**Note:** Even with a high idle timeout, the remote server may close the +connection on its side (common server idle timeouts are 60-120s). The pool +will transparently open a new connection when this happens. + +## Monitoring Connection Reuse + +Prometheus metrics are available on port 9090: + +| Metric | Type | Description | +|-----------------------------------------|------------|------------------------------------------| +| `connection_pool_likely_new_total` | Counter | Requests classified as new connections | +| `connection_pool_likely_reused_total` | Counter | Requests classified as reused connections| +| `connection_pool_reuse_rate_percent` | Gauge | Current reuse percentage | +| `connection_pool_requests_total` | Counter | Total requests tracked | +| `connection_pool_max_idle_per_host` | Gauge | Configured max idle setting | +| `connection_pool_idle_timeout_seconds` | Gauge | Configured idle timeout setting | + +### Important: Metrics Are Heuristic-Based + +The "new" vs "reused" classification uses a **latency heuristic**, not actual +connection state (reqwest does not expose this). Requests slower than +`metricsReuseThresholdMs` (default: 100ms) are classified as "likely new +connection" because a TLS handshake typically adds 50-150ms. + +This means: + +- Fast targets where TLS completes in <100ms will **undercount** new connections +- Slow targets where reused requests take >100ms will **overcount** new connections + +Tune `metricsReuseThresholdMs` in the YAML to match your target's typical TLS +handshake time for more accurate classification. For definitive connection +tracking, check server-side access logs. + +### Grafana Queries + +**New vs reused connections over time (time series panel):** + +| Query | Legend | +|-------------------------------------------------|----------| +| `rate(connection_pool_likely_reused_total[1m])` | Reused | +| `rate(connection_pool_likely_new_total[1m])` | New | + +**Reuse rate (single stat panel):** + +```promql +connection_pool_reuse_rate_percent +``` + +**Percentage of new connections (single stat panel):** + +```promql +connection_pool_likely_new_total / connection_pool_requests_total * 100 +``` diff --git a/src/config.rs b/src/config.rs index 5855b61..ae9385d 100644 --- a/src/config.rs +++ b/src/config.rs @@ -101,7 +101,7 @@ pub struct Config { // When Some, these override env-var defaults when building the HTTP client. pub pool_max_idle_per_host: Option, pub pool_idle_timeout_secs: Option, - pub pool_new_connection_threshold_ms: Option, + pub pool_metrics_reuse_threshold_ms: Option, } /// Helper to get a required environment variable. @@ -241,7 +241,7 @@ impl Config { Some(p) => ( p.max_idle_per_host, p.idle_timeout_secs, - p.new_connection_threshold_ms, + p.metrics_reuse_threshold_ms, ), None => (None, None, None), }; @@ -269,7 +269,7 @@ impl Config { cluster: ClusterConfig::from_env(), pool_max_idle_per_host, pool_idle_timeout_secs, - pool_new_connection_threshold_ms, + pool_metrics_reuse_threshold_ms, }; config.validate()?; @@ -342,7 +342,7 @@ impl Config { Some(p) => ( p.max_idle_per_host, p.idle_timeout_secs, - p.new_connection_threshold_ms, + p.metrics_reuse_threshold_ms, ), None => (None, None, None), }; @@ -370,7 +370,7 @@ impl Config { cluster: ClusterConfig::from_env(), pool_max_idle_per_host, pool_idle_timeout_secs, - pool_new_connection_threshold_ms, + pool_metrics_reuse_threshold_ms, }; config.validate()?; @@ -538,7 +538,7 @@ impl Config { cluster: ClusterConfig::from_env(), pool_max_idle_per_host: None, pool_idle_timeout_secs: None, - pool_new_connection_threshold_ms: None, + pool_metrics_reuse_threshold_ms: None, }; config.validate()?; @@ -744,7 +744,7 @@ impl Config { cluster: ClusterConfig::for_testing(), pool_max_idle_per_host: None, pool_idle_timeout_secs: None, - pool_new_connection_threshold_ms: None, + pool_metrics_reuse_threshold_ms: None, } } diff --git a/src/main.rs b/src/main.rs index cefe593..09060d4 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1137,7 +1137,7 @@ async fn main() -> Result<(), Box> { } // Apply pool stats threshold from YAML and reset counters for new test. - if let Some(threshold_ms) = new_cfg.pool_new_connection_threshold_ms { + if let Some(threshold_ms) = new_cfg.pool_metrics_reuse_threshold_ms { GLOBAL_POOL_STATS.set_threshold_ms(threshold_ms); } GLOBAL_POOL_STATS.reset(); diff --git a/src/yaml_config.rs b/src/yaml_config.rs index 4dca0ad..67462a5 100644 --- a/src/yaml_config.rs +++ b/src/yaml_config.rs @@ -119,12 +119,12 @@ pub struct YamlPoolConfig { #[serde(rename = "idleTimeoutSecs")] pub idle_timeout_secs: Option, - /// Latency threshold in milliseconds for classifying a request as a new - /// connection vs a reused one (default: 100). Requests slower than this - /// are counted as "likely new connection" in Prometheus metrics. - /// Tune this to match your target's typical TLS handshake time. - #[serde(rename = "newConnectionThresholdMs")] - pub new_connection_threshold_ms: Option, + /// Latency threshold in milliseconds used by Prometheus metrics to classify + /// a request as a new connection vs a reused one (default: 100). Requests + /// slower than this are counted as "likely new connection". Does NOT affect + /// actual connection behavior — only the metrics heuristic. + #[serde(rename = "metricsReuseThresholdMs")] + pub metrics_reuse_threshold_ms: Option, } fn default_timeout() -> YamlDuration {