From 1e01e13b60f7a220f63ae407ff652322fddf43a8 Mon Sep 17 00:00:00 2001
From: Sebastian Hofstetter <shofstetter@salesforce.com>
Date: Fri, 6 Feb 2026 13:58:44 +0100
Subject: [PATCH] Avoid io_context contention in high-throughput SSL stream
 reads by filling buffers fully

The current implementation of read_some in asio::ssl decodes only one TLS segment per operation. A TLS maximum segment size is 16KB. This leads to small reads per io_context operation. High-Throughput real-world scenarios observe as little as 9KB buffer utilization per operation, causing significant overhead and thread overscheduling when high throughput is required. This is relevant because it is hard to get much more than ~600k operations per second from a single io_context. While multiple io-contexts are possible, the overhead also applies to multiple io_contexts. Moreover, implementations get significantly more complex when multiple io_contexts are required which require dedicated load-balancing and scheduling.
On our production machines (e.g., `r8i.48xlarge` with a 75Gb/s interface or `x2idn.32xlarge` with a 100Gb/s interface), we see significant contention and a maximum network throughput of ~25Gb/s at very high (up to 100%) CPU utilization due to contention for concurrent S3 downloads.
With this PR, the system CPU utilization drops to ~10% while throughput increases to 70GB/s and 92Gb/s respectively.

This PR modifies the read operation to loop multiple reads until either:
1. There is no more data in the system buffer (would block).
2. The user-provided buffer is full.

Additionally, the internal buffer sizes are increased from 17KB to 128KB. This part is open for suggestions - should the buffer sizes be configurable for high-throughput scenarios, e.g., at runtime or via compile-time macros?
---
 include/asio/ssl/detail/impl/engine.ipp |  4 +-
 include/asio/ssl/detail/read_op.hpp     | 52 ++++++++++++++++++++++++-
 include/asio/ssl/detail/stream_core.hpp |  6 +--
 3 files changed, 56 insertions(+), 6 deletions(-)

diff --git a/include/asio/ssl/detail/impl/engine.ipp b/include/asio/ssl/detail/impl/engine.ipp
index 814e11bcea..b81673e9d5 100644
--- a/include/asio/ssl/detail/impl/engine.ipp
+++ b/include/asio/ssl/detail/impl/engine.ipp
@@ -51,7 +51,7 @@ engine::engine(SSL_CTX* context)
 #endif // defined(SSL_MODE_RELEASE_BUFFERS)
 
   ::BIO* int_bio = 0;
-  ::BIO_new_bio_pair(&int_bio, 0, &ext_bio_, 0);
+  ::BIO_new_bio_pair(&int_bio, 128*1024, &ext_bio_, 128*1024);
   ::SSL_set_bio(ssl_, int_bio, int_bio);
 }
 
@@ -69,7 +69,7 @@ engine::engine(SSL* ssl_impl)
 #endif // defined(SSL_MODE_RELEASE_BUFFERS)
 
   ::BIO* int_bio = 0;
-  ::BIO_new_bio_pair(&int_bio, 0, &ext_bio_, 0);
+  ::BIO_new_bio_pair(&int_bio, 128*1024, &ext_bio_, 128*1024);
   ::SSL_set_bio(ssl_, int_bio, int_bio);
 }
 
diff --git a/include/asio/ssl/detail/read_op.hpp b/include/asio/ssl/detail/read_op.hpp
index 36b623fc29..a61e2e19e4 100644
--- a/include/asio/ssl/detail/read_op.hpp
+++ b/include/asio/ssl/detail/read_op.hpp
@@ -44,11 +44,61 @@ class read_op
       asio::error_code& ec,
       std::size_t& bytes_transferred) const
   {
+    bytes_transferred = 0;
+
     asio::mutable_buffer buffer =
       asio::detail::buffer_sequence_adapter<asio::mutable_buffer,
         MutableBufferSequence>::first(buffers_);
 
-    return eng.read(buffer, ec, bytes_transferred);
+    while (true)
+    {
+      asio::mutable_buffer current_buffer = buffer + bytes_transferred;
+
+      // If user buffer is full, we are done
+      if (current_buffer.size() == 0)
+      {
+        ec = asio::error_code();
+        return engine::want_nothing;
+      }
+
+      std::size_t bytes = 0;
+      engine::want w = eng.read(current_buffer, ec, bytes);
+
+      bytes_transferred += bytes;
+
+      // If an error occurred but we already got data in this call, return the
+      // data first. This avoids dropping trailing bytes when the peer closes.
+      // If the error persists, it will be reraised in the next call.
+      if (ec)
+      {
+        if ((ec == asio::error::eof) && (bytes_transferred > 0))
+        {
+          ec = asio::error_code();
+          return engine::want_nothing;
+        }
+        return w;
+      }
+
+      switch (w)
+      {
+      case engine::want_nothing:
+        // If we got bytes, LOOP AGAIN to see if more data is waiting in the BIO.
+        if (bytes > 0) continue;
+
+        // If 0 bytes (EOF/Shutdown), fall through to return result
+        [[fallthrough]];
+
+      default:
+        // If we have accumulated ANY data, treat this as success.
+        // This handles want_input, want_output, etc., by returning control
+        // to the caller to process the data before handling the SSL state.
+        if (bytes_transferred > 0)
+        {
+          return engine::want_nothing;
+        }
+        return w;
+      }
+    }
   }
 
   template <typename Handler>
diff --git a/include/asio/ssl/detail/stream_core.hpp b/include/asio/ssl/detail/stream_core.hpp
index 3edf7ccee5..135a9818ff 100644
--- a/include/asio/ssl/detail/stream_core.hpp
+++ b/include/asio/ssl/detail/stream_core.hpp
@@ -29,9 +29,9 @@ namespace detail {
 
 struct stream_core
 {
-  // According to the OpenSSL documentation, this is the buffer size that is
-  // sufficient to hold the largest possible TLS record.
-  enum { max_tls_record_size = 17 * 1024 };
+  // A TLS record requires a buffer size of 17KB at maximum.
+  // We further increase the buffer size to avoid small operations in the io_context.
+  enum { max_tls_record_size = 128 * 1024 };
 
   template <typename Executor>
   stream_core(SSL_CTX* context, const Executor& ex)