Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -631,10 +631,27 @@ public void testJvmRouteLostJoinAtRuntime(TestCluster cluster, HttpClient httpCl
final AtomicReference<String> initialRoute = new AtomicReference<>();
final AtomicReference<String> initialWorker = new AtomicReference<>();

// Extended read timeout: Infinispan state transfer during worker2 join/leave
// can stall request processing beyond the default 10s, especially under CI load.
final long stateTransferTimeout = TestTimeouts.STATE_TRANSFER_REQUEST.toSeconds();

final Future<?> requestTask = executor.submit(() -> {
try {
// Initial request — establishes session on worker1
final HttpResponse response = httpClient.get(balancerUrl);
// Initial request — establishes session on worker1.
// Retry with extended timeout: after stopping worker2 in a previous cycle,
// Infinispan state transfer may still be in progress on worker1.
HttpResponse response = null;
for (int attempt = 0; attempt < 5; attempt++) {
try {
response = httpClient.getWithTimeout(
balancerUrl, stateTransferTimeout, TimeUnit.SECONDS);
if (response.getStatusCode() == 200) break;
} catch (IOException e) {
log.warn("Cycle {} initial request attempt {}/5 failed: {}",
currentCycle, attempt + 1, e.getMessage());
if (attempt == 4) throw e;
}
}
final String cookie = response.getCookie("JSESSIONID");

final String sessionId = extractSessionIdOnly(cookie);
Expand All @@ -657,17 +674,22 @@ public void testJvmRouteLostJoinAtRuntime(TestCluster cluster, HttpClient httpCl
// Allow occasional IOExceptions (SocketTimeoutException) and HTTP 500
// (Infinispan timeout when worker2 joins/leaves and triggers state transfer)
// on CI where Podman rootless networking causes delays.
// Generous failure budget: state transfer during worker2 join/leave
// causes both SocketTimeoutException and HTTP 500 on the remaining node.
// The budget covers up to ~half the requests failing transiently.
final int maxTransientFailures = 25;
int transientFailures = 0;
for (int i = 0; i < 50; i++) {
try {
final HttpResponse req = httpClient.getWithSession(balancerUrl, "JSESSIONID=" + cookie);
final HttpResponse req = httpClient.getWithSession(
balancerUrl, "JSESSIONID=" + cookie,
stateTransferTimeout, TimeUnit.SECONDS);

if (req.getStatusCode() == 500) {
// HTTP 500 from Infinispan timeout during state transfer
transientFailures++;
log.warn("Cycle {} request {} got HTTP 500 ({}/10 allowed)",
currentCycle, i, transientFailures);
if (transientFailures > 10) {
log.warn("Cycle {} request {} got HTTP 500 ({}/{} allowed)",
currentCycle, i, transientFailures, maxTransientFailures);
if (transientFailures > maxTransientFailures) {
assertThat(req.getStatusCode())
.as("Cycle %d request %d: Too many HTTP 500 errors", currentCycle, i)
.isEqualTo(200);
Expand All @@ -684,9 +706,9 @@ public void testJvmRouteLostJoinAtRuntime(TestCluster cluster, HttpClient httpCl
}
} catch (IOException e) {
transientFailures++;
log.warn("Cycle {} request {} failed with IOException ({}/10 allowed): {}",
currentCycle, i, transientFailures, e.getMessage());
if (transientFailures > 10) {
log.warn("Cycle {} request {} failed with IOException ({}/{} allowed): {}",
currentCycle, i, transientFailures, maxTransientFailures, e.getMessage());
if (transientFailures > maxTransientFailures) {
throw e;
}
}
Expand Down
35 changes: 35 additions & 0 deletions src/test/java/org/jboss/modcluster/test/utils/HttpClient.java
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,41 @@ public HttpResponse getWithSession(String url, String sessionCookie) throws IOEx
return get(url, headers);
}

/**
* Perform a GET request with a session cookie and custom read timeout.
* Use when the default 10-second read timeout is too short, such as during
* Infinispan state transfer when a new node joins or leaves the cluster.
*
* @param url the URL to request
* @param sessionCookie the session cookie value (e.g., "JSESSIONID=abc.worker1")
* @param timeout read timeout duration
* @param unit time unit for the timeout
* @return the HTTP response
* @throws IOException if the request fails
* @see #getWithSession(String, String)
*/
public HttpResponse getWithSession(String url, String sessionCookie,
long timeout, TimeUnit unit) throws IOException {
OkHttpClient customClient = client.newBuilder()
.readTimeout(timeout, unit)
.build();

Request request = new Request.Builder()
.url(url)
.addHeader("Cookie", sessionCookie)
.addHeader("Connection", "close")
.build();

try (Response response = customClient.newCall(request).execute()) {
return new HttpResponse(
response.code(),
response.body() != null ? response.body().string() : "",
extractCookies(response),
extractHeaders(response)
);
}
}

/**
* Perform an HTTPS GET request (ignoring certificate validation).
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,14 @@ private TestTimeouts() {
/** Timeout for failover completion after worker kill, including Infinispan rebalancing. */
public static final Duration FAILOVER = durationSeconds("test.timeout.failover", 120);

/**
* HTTP read timeout for requests sent during Infinispan state transfer.
* When a node joins or leaves the cluster, JGroups view changes and Infinispan
* cache rebalancing can stall request processing on the remaining node for
* longer than the default 10-second read timeout, especially under CI load.
*/
public static final Duration STATE_TRANSFER_REQUEST = durationSeconds("test.timeout.state.transfer.request", 30);

// -- Helpers --

private static Duration durationSeconds(String prop, int defaultSeconds) {
Expand Down
Loading