From 12b2237b476fd188212a010c4915da1dc0998547 Mon Sep 17 00:00:00 2001 From: Mark Robert Miller Date: Wed, 3 Jun 2026 21:12:41 -0500 Subject: [PATCH] SOLR-18284: Load-average and memory circuit breakers no longer stampede or trip on transient pre-GC heap peaks Load-average and memory circuit breakers were doing more harm than good under high concurrency. LoadAverageCircuitBreaker called OperatingSystemMXBean.getSystemLoadAverage() on every request. The OS load average is a one-minute moving average, so re-polling it per-request is wasted work - and when many requests arrived concurrently, the syscall stampede hammered the CPU, which is the condition this breaker exists to prevent rather than cause. MemoryCircuitBreaker sampled MemoryMXBean.getHeapMemoryUsage().getUsed() on a 30-second moving average. With a generational collector that signal climbs toward max between collections during normal operation; the breaker would trip on transient pre-GC peaks that GC was about to reclaim. --- ...18284-load-and-memory-circuit-breakers.yml | 8 + .../CircuitBreakerRegistry.java | 9 + .../LoadAverageCircuitBreaker.java | 17 +- .../circuitbreaker/MemoryCircuitBreaker.java | 195 +++++++++++++----- .../util/circuitbreaker/TtlSampledMetric.java | 109 ++++++++++ .../apache/solr/util/TestCircuitBreakers.java | 4 +- .../TestHeapPressureCircuitBreakers.java | 115 +++++++++++ ...TestLoadAverageCircuitBreakerSampling.java | 174 ++++++++++++++++ .../pages/circuit-breakers.adoc | 15 ++ 9 files changed, 590 insertions(+), 56 deletions(-) create mode 100644 changelog/unreleased/SOLR-18284-load-and-memory-circuit-breakers.yml create mode 100644 solr/core/src/java/org/apache/solr/util/circuitbreaker/TtlSampledMetric.java create mode 100644 solr/core/src/test/org/apache/solr/util/circuitbreaker/TestHeapPressureCircuitBreakers.java create mode 100644 solr/core/src/test/org/apache/solr/util/circuitbreaker/TestLoadAverageCircuitBreakerSampling.java diff --git a/changelog/unreleased/SOLR-18284-load-and-memory-circuit-breakers.yml b/changelog/unreleased/SOLR-18284-load-and-memory-circuit-breakers.yml new file mode 100644 index 000000000000..2c4bdc7887b5 --- /dev/null +++ b/changelog/unreleased/SOLR-18284-load-and-memory-circuit-breakers.yml @@ -0,0 +1,8 @@ +# See https://github.com/apache/solr/blob/main/dev-docs/changelog.adoc +title: Load-average and memory circuit breakers no longer stampede or trip on transient pre-GC heap peaks +type: fixed +authors: + - name: Mark Robert Miller +links: + - name: SOLR-18284 + url: https://issues.apache.org/jira/browse/SOLR-18284 diff --git a/solr/core/src/java/org/apache/solr/util/circuitbreaker/CircuitBreakerRegistry.java b/solr/core/src/java/org/apache/solr/util/circuitbreaker/CircuitBreakerRegistry.java index 075aefc23dd8..16b2f9c752d6 100644 --- a/solr/core/src/java/org/apache/solr/util/circuitbreaker/CircuitBreakerRegistry.java +++ b/solr/core/src/java/org/apache/solr/util/circuitbreaker/CircuitBreakerRegistry.java @@ -62,6 +62,15 @@ public class CircuitBreakerRegistry implements Closeable { public static final String SYSPROP_QUERY_LOADAVG = SYSPROP_PREFIX + "query.loadavg"; public static final String SYSPROP_WARN_ONLY_SUFFIX = ".warnonly"; + /** + * Default TTL (ms) of cached load-average / heap samples consulted by {@link + * LoadAverageCircuitBreaker} and {@link MemoryCircuitBreaker}. Override per-process via the + * {@value #SYSPROP_SAMPLE_TTL_MS} system property. + */ + public static final long DEFAULT_SAMPLE_TTL_MS = 1000L; + + public static final String SYSPROP_SAMPLE_TTL_MS = SYSPROP_PREFIX + "sample.ttl.ms"; + private static boolean globalsInitialized = false; private static final Map> globalCircuitBreakerMap = new ConcurrentHashMap<>(); diff --git a/solr/core/src/java/org/apache/solr/util/circuitbreaker/LoadAverageCircuitBreaker.java b/solr/core/src/java/org/apache/solr/util/circuitbreaker/LoadAverageCircuitBreaker.java index 64b786aa5333..a5784b0322a0 100644 --- a/solr/core/src/java/org/apache/solr/util/circuitbreaker/LoadAverageCircuitBreaker.java +++ b/solr/core/src/java/org/apache/solr/util/circuitbreaker/LoadAverageCircuitBreaker.java @@ -21,6 +21,7 @@ import java.lang.management.ManagementFactory; import java.lang.management.OperatingSystemMXBean; import java.util.Locale; +import org.apache.solr.common.util.EnvUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -31,6 +32,11 @@ * uses that data to take a decision. We depend on OperatingSystemMXBean which does not allow a * configurable interval of collection of data. * + *

Because the OS load average is a one-minute moving average, polling the metric per request is + * wasted work. Successive {@link #isTripped()} invocations reuse a sample for {@link + * CircuitBreakerRegistry#SYSPROP_SAMPLE_TTL_MS} (default {@value + * CircuitBreakerRegistry#DEFAULT_SAMPLE_TTL_MS} ms). + * *

This Circuit breaker is dependent on the operating system, and may typically not work on * Microsoft Windows. */ @@ -41,6 +47,15 @@ public class LoadAverageCircuitBreaker extends CircuitBreaker { private double loadAverageThreshold; + // Per-instance cache: tests subclass this breaker and override calculateLiveLoadAverage(); a + // shared cache would return values produced by the first override that wins the CAS to all + // subsequent instances. The TTL still bounds the rate of underlying syscalls per instance. + private final TtlSampledMetric loadAverageCache = + new TtlSampledMetric<>( + EnvUtils.getPropertyAsLong( + CircuitBreakerRegistry.SYSPROP_SAMPLE_TTL_MS, + CircuitBreakerRegistry.DEFAULT_SAMPLE_TTL_MS)); + // Assumption -- the value of these parameters will be set correctly before invoking // getDebugInfo() private static final ThreadLocal seenLoadAverage = ThreadLocal.withInitial(() -> 0.0); @@ -54,7 +69,7 @@ public LoadAverageCircuitBreaker() { @Override public boolean isTripped() { double localAllowedLoadAverage = getLoadAverageThreshold(); - double localSeenLoadAverage = calculateLiveLoadAverage(); + double localSeenLoadAverage = loadAverageCache.get(this::calculateLiveLoadAverage); if (localSeenLoadAverage < 0) { if (log.isWarnEnabled()) { diff --git a/solr/core/src/java/org/apache/solr/util/circuitbreaker/MemoryCircuitBreaker.java b/solr/core/src/java/org/apache/solr/util/circuitbreaker/MemoryCircuitBreaker.java index 3888283589e9..2f6fbf97e393 100644 --- a/solr/core/src/java/org/apache/solr/util/circuitbreaker/MemoryCircuitBreaker.java +++ b/solr/core/src/java/org/apache/solr/util/circuitbreaker/MemoryCircuitBreaker.java @@ -17,75 +17,145 @@ package org.apache.solr.util.circuitbreaker; -import java.io.IOException; import java.lang.invoke.MethodHandles; import java.lang.management.ManagementFactory; import java.lang.management.MemoryMXBean; +import java.lang.management.MemoryPoolMXBean; +import java.lang.management.MemoryType; +import java.lang.management.MemoryUsage; +import java.util.ArrayList; +import java.util.List; import java.util.Locale; -import org.apache.solr.util.RefCounted; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.regex.Pattern; +import org.apache.solr.common.util.EnvUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** - * Tracks the current JVM heap usage and triggers if a moving heap usage average over 30 seconds - * exceeds the defined percentage of the maximum heap size allocated to the JVM. Once the average - * memory usage goes below the threshold, it will start allowing queries again. + * Trips when post-collection live data in the JVM heap exceeds a configured percentage of the + * maximum heap size. * - *

The memory threshold is defined as a percentage of the maximum memory allocated -- see - * memThreshold in solrconfig.xml. + *

The signal is read from {@link MemoryPoolMXBean#getCollectionUsage()} on the old/tenured heap + * pool, which reports memory usage immediately after the most recent collection that affected that + * pool. This is the only memory reading that distinguishes "live data" from "garbage waiting to be + * collected." + * + *

Earlier versions of this breaker sampled {@link MemoryMXBean#getHeapMemoryUsage()} on a + * 30-second moving average, which produced a high signal during normal operation: with a + * generational collector, {@code used} climbs toward {@code max} between collections — that's the + * steady-state shape, not a problem. The new signal updates only when an old-gen GC runs, which is + * the only point at which "how full is the heap really?" has a defined answer. + * + *

Pool selection by collector: + * + *

+ * + *

Pre-first-GC, {@link MemoryPoolMXBean#getCollectionUsage()} can return {@code null} on every + * pool; in that case the breaker reports {@code 0} live bytes and will not trip until the JVM has + * performed at least one collection on a heap pool. + * + *

The threshold semantics are unchanged: configure a percentage of the maximum heap size, and + * the breaker trips when post-GC live data exceeds that percentage. */ public class MemoryCircuitBreaker extends CircuitBreaker { private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); + private static final MemoryMXBean MEMORY_MX_BEAN = ManagementFactory.getMemoryMXBean(); - // One shared provider / executor for all instances of this class - private static RefCounted averagingMetricProvider; + + /** Guards the one-shot deprecation warning emitted by {@link #MemoryCircuitBreaker(int, int)}. */ + private static final AtomicBoolean DEPRECATED_CTOR_WARNED = new AtomicBoolean(false); + + /** + * Word-boundary match for the old/tenured generation pool name across HotSpot collectors. + * Examples that match: {@code "G1 Old Gen"}, {@code "PS Old Gen"}, {@code "Tenured Gen"}, {@code + * "ZGC Old Generation"}. Word boundaries prevent false positives such as a hypothetical pool + * literally named {@code "ColdCache"}. Non-generational ZGC and Shenandoah expose a single + * combined heap pool whose name matches none of these — the fallback path sums every HEAP pool + * instead. + */ + private static final Pattern OLD_GEN_NAME = Pattern.compile("\\b(Old|Tenured)\\b"); + + /** + * Lazily-initialized snapshot of the JVM heap pools. Pool resolution is deferred to the first + * call to {@link #samplePostGcLiveBytes()} — i.e. the first request after the breaker is in use — + * rather than performed at class load. This avoids any risk that a collector lazily creates its + * old-generation pool after Solr's class loader has already touched {@link MemoryCircuitBreaker} + * but before the GC subsystem has fully initialized. Once resolved the snapshot is stable for the + * life of the JVM (the {@code MemoryPoolMXBean} list is fixed). + */ + private static final class HeapPools { + static final List OLD_GEN; + static final List ALL_HEAP; + + static { + List oldGen = new ArrayList<>(2); + List allHeap = new ArrayList<>(4); + for (MemoryPoolMXBean pool : ManagementFactory.getMemoryPoolMXBeans()) { + if (pool.getType() != MemoryType.HEAP) { + continue; + } + allHeap.add(pool); + String name = pool.getName(); + if (name != null && OLD_GEN_NAME.matcher(name).find()) { + oldGen.add(pool); + } + } + OLD_GEN = List.copyOf(oldGen); + ALL_HEAP = List.copyOf(allHeap); + } + } private long heapMemoryThreshold; + // Per-instance cache: tests subclass this breaker and override getAvgMemoryUsage() (and may in + // future override samplePostGcLiveBytes()); a single shared cache would return values produced + // by whichever instance won the first CAS to all subsequent instances. The TTL still bounds the + // rate of underlying MemoryPoolMXBean walks per instance. + private final TtlSampledMetric heapLiveCache = + new TtlSampledMetric<>( + EnvUtils.getPropertyAsLong( + CircuitBreakerRegistry.SYSPROP_SAMPLE_TTL_MS, + CircuitBreakerRegistry.DEFAULT_SAMPLE_TTL_MS)); + private static final ThreadLocal seenMemory = ThreadLocal.withInitial(() -> 0L); private static final ThreadLocal allowedMemory = ThreadLocal.withInitial(() -> 0L); - /** Creates an instance which averages over 6 samples during last 30 seconds. */ public MemoryCircuitBreaker() { - this(6, 5); + super(); } /** - * Constructor that allows override of sample interval for which the memory usage is fetched. This - * is provided for testing, not intended for general use because the average metric provider - * implementation is the same for all instances of the class. - * - * @param numSamples number of samples to calculate average for - * @param sampleInterval interval between each sample + * @deprecated Retained only for backwards source-compatibility with subclasses that called the + * pre-Solr-10.1 constructor. The arguments are ignored: the breaker no longer averages + * samples — see the class javadoc. Subclasses should use {@link #MemoryCircuitBreaker()}. */ + @Deprecated(forRemoval = true) protected MemoryCircuitBreaker(int numSamples, int sampleInterval) { - super(); - synchronized (MemoryCircuitBreaker.class) { - if (averagingMetricProvider == null || averagingMetricProvider.getRefcount() == 0) { - averagingMetricProvider = - new RefCounted<>( - new AveragingMetricProvider( - () -> MEMORY_MX_BEAN.getHeapMemoryUsage().getUsed(), - numSamples, - sampleInterval)) { - @Override - protected void close() { - get().close(); - } - }; - } - averagingMetricProvider.incref(); + this(); + if (DEPRECATED_CTOR_WARNED.compareAndSet(false, true)) { + log.warn( + "MemoryCircuitBreaker(int, int) is deprecated and its arguments (numSamples={}, " + + "sampleInterval={}) are ignored: the breaker no longer averages samples and now " + + "reads post-GC live heap data directly. See the MemoryCircuitBreaker javadoc. " + + "Switch to the no-arg constructor.", + numSamples, + sampleInterval); } } public MemoryCircuitBreaker setThreshold(double thresholdValueInPercentage) { long currentMaxHeap = MEMORY_MX_BEAN.getHeapMemoryUsage().getMax(); - if (currentMaxHeap <= 0) { throw new IllegalArgumentException("Invalid JVM state for the max heap usage"); } - double thresholdInFraction = thresholdValueInPercentage / (double) 100; + double thresholdInFraction = thresholdValueInPercentage / 100.0; heapMemoryThreshold = (long) (currentMaxHeap * thresholdInFraction); if (heapMemoryThreshold <= 0) { @@ -96,19 +166,51 @@ public MemoryCircuitBreaker setThreshold(double thresholdValueInPercentage) { @Override public boolean isTripped() { - - long localAllowedMemory = getCurrentMemoryThreshold(); + long localAllowedMemory = heapMemoryThreshold; long localSeenMemory = getAvgMemoryUsage(); allowedMemory.set(localAllowedMemory); - seenMemory.set(localSeenMemory); - return (localSeenMemory >= localAllowedMemory); + return localSeenMemory >= localAllowedMemory; } + /** + * Returns post-GC live bytes for use by {@link #isTripped()}, cached for {@link + * CircuitBreakerRegistry#SYSPROP_SAMPLE_TTL_MS} ms so high-QPS callers don't repeatedly walk the + * heap pool list. + * + *

The historical name is preserved for source-compatibility. Tests that need to inject a + * synthetic value typically override this method directly (which bypasses the cache); tests that + * want to feed a synthetic sample through the cache should override {@link + * #samplePostGcLiveBytes()} instead. The implementation no longer averages anything. + */ protected long getAvgMemoryUsage() { - return (long) averagingMetricProvider.get().getMetricValue(); + return heapLiveCache.get(this::samplePostGcLiveBytes); + } + + /** + * Sum of {@code getCollectionUsage().getUsed()} across the old/tenured heap pool, falling back to + * the union of all {@link MemoryType#HEAP} pools when no pool name matches {@link #OLD_GEN_NAME} + * (non-generational ZGC, Shenandoah). Pool resolution is deferred to the first call to this + * method — see {@link HeapPools}. + * + *

Visible for subclassing in tests that want to feed a synthetic sample through the TTL cache. + * Most tests override {@link #getAvgMemoryUsage()} instead and bypass the cache entirely. + * + * @return post-GC live bytes, or {@code 0} if no GC has yet run on a heap pool + */ + protected long samplePostGcLiveBytes() { + List pools = + HeapPools.OLD_GEN.isEmpty() ? HeapPools.ALL_HEAP : HeapPools.OLD_GEN; + long total = 0; + for (MemoryPoolMXBean pool : pools) { + MemoryUsage cu = pool.getCollectionUsage(); + if (cu != null) { + total += cu.getUsed(); + } + } + return total; } @Override @@ -120,19 +222,6 @@ public String getErrorMessage() { + allowedMemory.get(); } - private long getCurrentMemoryThreshold() { - return heapMemoryThreshold; - } - - @Override - public void close() throws IOException { - synchronized (MemoryCircuitBreaker.class) { - if (averagingMetricProvider != null && averagingMetricProvider.getRefcount() > 0) { - averagingMetricProvider.decref(); - } - } - } - @Override public String toString() { return String.format( diff --git a/solr/core/src/java/org/apache/solr/util/circuitbreaker/TtlSampledMetric.java b/solr/core/src/java/org/apache/solr/util/circuitbreaker/TtlSampledMetric.java new file mode 100644 index 000000000000..2545248e5e3f --- /dev/null +++ b/solr/core/src/java/org/apache/solr/util/circuitbreaker/TtlSampledMetric.java @@ -0,0 +1,109 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.util.circuitbreaker; + +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicReference; +import java.util.function.Supplier; + +/** + * Single-flight, time-bounded cache around an expensive metric sample. + * + *

+ * + *

Used by {@link LoadAverageCircuitBreaker} and {@link MemoryCircuitBreaker} so that high-QPS + * admission control cannot stampede the OS load-average syscall or the post-GC heap-pool walk: even + * under thousands of concurrent {@code isTripped()} callers, the underlying sampler is invoked at + * most once per TTL window. + * + *

Exception behavior: if the {@code source} supplier throws, the exception propagates to + * the calling thread and no new sample is published. Any previously-published value remains and + * other concurrent callers continue to see it; the next caller to find the entry stale will retry + * the supplier. The {@code refreshing} flag is always released, so a thrown sampler does not wedge + * the single-flight latch. + */ +final class TtlSampledMetric { + + private final long ttlNanos; + private final AtomicReference> sample = new AtomicReference<>(); + private final AtomicBoolean refreshing = new AtomicBoolean(false); + + TtlSampledMetric(long ttlMs) { + this.ttlNanos = TimeUnit.MILLISECONDS.toNanos(ttlMs); + } + + T get(Supplier source) { + long now = System.nanoTime(); + Sample s = sample.get(); + if (s != null && (now - s.nanos) < ttlNanos) { + return s.value; + } + if (s != null) { + // Stale value present: single-flight refresh — exactly one thread runs the sampler; + // every other concurrent caller returns the most recent published value immediately. + if (refreshing.compareAndSet(false, true)) { + try { + T v = source.get(); + sample.set(new Sample<>(System.nanoTime(), v)); + return v; + } finally { + refreshing.set(false); + } + } + // Re-read so we return whatever the winning thread may have just published, rather than + // the older snapshot we captured at the top of this call. + Sample latest = sample.get(); + return latest != null ? latest.value : s.value; + } + // No sample yet — one-time cold path. Synchronize so the first wave of concurrent callers + // does not all run the sampler. The monitor is held across the sampler invocation; this is + // acceptable because the cold path runs at most once per successful publish, and only callers + // that arrive before any value has been published are affected. + synchronized (this) { + s = sample.get(); + if (s != null) { + return s.value; + } + T v = source.get(); + sample.set(new Sample<>(System.nanoTime(), v)); + return v; + } + } + + private static final class Sample { + final long nanos; + final T value; + + Sample(long nanos, T value) { + this.nanos = nanos; + this.value = value; + } + } +} diff --git a/solr/core/src/test/org/apache/solr/util/TestCircuitBreakers.java b/solr/core/src/test/org/apache/solr/util/TestCircuitBreakers.java index 23381e9de012..e014c6499cf2 100644 --- a/solr/core/src/test/org/apache/solr/util/TestCircuitBreakers.java +++ b/solr/core/src/test/org/apache/solr/util/TestCircuitBreakers.java @@ -388,7 +388,7 @@ public boolean isTripped() { private static class FakeMemoryPressureCircuitBreaker extends MemoryCircuitBreaker { public FakeMemoryPressureCircuitBreaker() { - super(1, 1); + super(); } @Override @@ -401,7 +401,7 @@ private static class BuildingUpMemoryPressureCircuitBreaker extends MemoryCircui private AtomicInteger count; public BuildingUpMemoryPressureCircuitBreaker() { - super(1, 1); + super(); this.count = new AtomicInteger(0); } diff --git a/solr/core/src/test/org/apache/solr/util/circuitbreaker/TestHeapPressureCircuitBreakers.java b/solr/core/src/test/org/apache/solr/util/circuitbreaker/TestHeapPressureCircuitBreakers.java new file mode 100644 index 000000000000..191eaa9c5ced --- /dev/null +++ b/solr/core/src/test/org/apache/solr/util/circuitbreaker/TestHeapPressureCircuitBreakers.java @@ -0,0 +1,115 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.util.circuitbreaker; + +import java.lang.management.ManagementFactory; +import java.lang.management.MemoryPoolMXBean; +import java.lang.management.MemoryType; +import java.util.concurrent.atomic.AtomicInteger; +import org.apache.solr.SolrTestCase; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; + +/** + * Verifies that {@link MemoryCircuitBreaker} reads post-GC live data — not the raw, garbage- + * inflated heap usage — so it does not trip on transient pre-collection peaks that GC will reclaim. + */ +public class TestHeapPressureCircuitBreakers extends SolrTestCase { + + @Before + public void setUpProps() { + System.setProperty(CircuitBreakerRegistry.SYSPROP_SAMPLE_TTL_MS, "0"); + } + + @After + public void tearDownProps() { + System.clearProperty(CircuitBreakerRegistry.SYSPROP_SAMPLE_TTL_MS); + } + + @Test + public void memoryBreakerReadsRealHeapPools() { + // Sanity: the in-process JVM exposes at least one heap pool. samplePostGcLiveBytes() must + // return a non-negative value computed from real pool data, regardless of which collector + // is active. (Pre-first-GC, getCollectionUsage() can be zero on every pool — accept that.) + boolean hasHeapPool = false; + for (MemoryPoolMXBean pool : ManagementFactory.getMemoryPoolMXBeans()) { + if (pool.getType() == MemoryType.HEAP) { + hasHeapPool = true; + break; + } + } + assertTrue("JVM must expose at least one HEAP MemoryPoolMXBean", hasHeapPool); + + long bytes = new MemoryCircuitBreaker().samplePostGcLiveBytes(); + assertTrue("post-GC live bytes must be non-negative, got " + bytes, bytes >= 0); + } + + @Test + public void memoryBreakerTripsWhenOverThreshold() { + MemoryCircuitBreaker breaker = + new MemoryCircuitBreaker() { + @Override + protected long getAvgMemoryUsage() { + return Long.MAX_VALUE; // simulate "heap is exhausted post-GC" + } + }; + breaker.setThreshold(50.0); + assertTrue(breaker.isTripped()); + } + + @Test + public void memoryBreakerDoesNotTripWhenUnderThreshold() { + MemoryCircuitBreaker breaker = + new MemoryCircuitBreaker() { + @Override + protected long getAvgMemoryUsage() { + return 0L; // simulate "post-GC live data is empty" + } + }; + breaker.setThreshold(50.0); + assertFalse(breaker.isTripped()); + } + + @Test + public void samplePostGcLiveBytesIsCachedThroughGetAvgMemoryUsage() { + // With a long TTL, getAvgMemoryUsage() should consult samplePostGcLiveBytes() at most once + // across many isTripped() invocations — i.e. the cache wraps the overridable hook, not just + // the static heap-pool walk. + System.setProperty(CircuitBreakerRegistry.SYSPROP_SAMPLE_TTL_MS, "60000"); + try { + AtomicInteger calls = new AtomicInteger(); + MemoryCircuitBreaker breaker = + new MemoryCircuitBreaker() { + @Override + protected long samplePostGcLiveBytes() { + calls.incrementAndGet(); + return 0L; + } + }; + breaker.setThreshold(50.0); + for (int i = 0; i < 50; i++) { + breaker.isTripped(); + } + assertEquals( + "samplePostGcLiveBytes() must be invoked at most once per TTL window", 1, calls.get()); + } finally { + System.setProperty(CircuitBreakerRegistry.SYSPROP_SAMPLE_TTL_MS, "0"); + } + } +} diff --git a/solr/core/src/test/org/apache/solr/util/circuitbreaker/TestLoadAverageCircuitBreakerSampling.java b/solr/core/src/test/org/apache/solr/util/circuitbreaker/TestLoadAverageCircuitBreakerSampling.java new file mode 100644 index 000000000000..c191acde185f --- /dev/null +++ b/solr/core/src/test/org/apache/solr/util/circuitbreaker/TestLoadAverageCircuitBreakerSampling.java @@ -0,0 +1,174 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.util.circuitbreaker; + +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicInteger; +import org.apache.solr.SolrTestCase; +import org.apache.solr.common.util.ExecutorUtil; +import org.apache.solr.common.util.SolrNamedThreadFactory; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; + +/** + * Verifies that {@link LoadAverageCircuitBreaker} caches the result of {@code + * calculateLiveLoadAverage()} for the configured TTL so that high-QPS admission control does not + * re-poll {@code OperatingSystemMXBean.getSystemLoadAverage()} per request. + */ +public class TestLoadAverageCircuitBreakerSampling extends SolrTestCase { + + @Before + public void setUp() throws Exception { + super.setUp(); + // Long TTL so successive isTripped() calls within one test stay in the same cache window. + System.setProperty(CircuitBreakerRegistry.SYSPROP_SAMPLE_TTL_MS, "60000"); + } + + @After + public void tearDownProps() { + System.clearProperty(CircuitBreakerRegistry.SYSPROP_SAMPLE_TTL_MS); + } + + @Test + public void successiveIsTrippedSharesOneSample() { + AtomicInteger calls = new AtomicInteger(); + LoadAverageCircuitBreaker breaker = + new LoadAverageCircuitBreaker() { + @Override + protected double calculateLiveLoadAverage() { + calls.incrementAndGet(); + return 1.0; + } + }; + breaker.setThreshold(0.5); + + for (int i = 0; i < 100; i++) { + assertTrue("breaker should be tripped (1.0 >= 0.5)", breaker.isTripped()); + } + assertEquals( + "Underlying calculateLiveLoadAverage() must be invoked once per TTL window, not per call", + 1, + calls.get()); + } + + @Test + public void zeroTtlDisablesCache() { + System.setProperty(CircuitBreakerRegistry.SYSPROP_SAMPLE_TTL_MS, "0"); + AtomicInteger calls = new AtomicInteger(); + LoadAverageCircuitBreaker breaker = + new LoadAverageCircuitBreaker() { + @Override + protected double calculateLiveLoadAverage() { + calls.incrementAndGet(); + return 1.0; + } + }; + breaker.setThreshold(0.5); + + for (int i = 0; i < 5; i++) { + breaker.isTripped(); + } + assertEquals("With zero TTL, every call re-evaluates", 5, calls.get()); + } + + /** + * Stampede scenario: many concurrent callers find the cache stale at the same instant. Single- + * flight refresh must ensure the underlying sampler is invoked at most once across all of them. + * Without this guarantee, a burst of concurrent requests would each pin a CPU on the load- + * average syscall, which is the bug this breaker exists to prevent — not cause. + */ + @Test + public void concurrentStampedeRunsSamplerAtMostOnce() throws Exception { + // Force every call to find the cache stale. + System.setProperty(CircuitBreakerRegistry.SYSPROP_SAMPLE_TTL_MS, "0"); + + final AtomicInteger samplerInvocations = new AtomicInteger(); + final AtomicInteger inFlight = new AtomicInteger(); + final AtomicInteger maxInFlight = new AtomicInteger(); + + LoadAverageCircuitBreaker breaker = + new LoadAverageCircuitBreaker() { + @Override + protected double calculateLiveLoadAverage() { + int now = inFlight.incrementAndGet(); + maxInFlight.accumulateAndGet(now, Math::max); + try { + // Hold the "syscall" long enough that, without single-flight, every concurrent + // caller would pile in. + Thread.sleep(20); + } catch (InterruptedException ie) { + Thread.currentThread().interrupt(); + } finally { + inFlight.decrementAndGet(); + } + samplerInvocations.incrementAndGet(); + return 1.0; + } + }; + breaker.setThreshold(0.5); + + // Prime the cache so the stampede hits the stale-refresh path (not the cold path). + breaker.isTripped(); + int primed = samplerInvocations.get(); + + final int threads = 64; + final CountDownLatch start = new CountDownLatch(1); + final CountDownLatch done = new CountDownLatch(threads); + ExecutorService pool = + ExecutorUtil.newMDCAwareFixedThreadPool( + threads, new SolrNamedThreadFactory("TestLoadAverageCircuitBreakerSampling")); + try { + for (int i = 0; i < threads; i++) { + pool.submit( + () -> { + try { + start.await(); + breaker.isTripped(); + } catch (InterruptedException ie) { + Thread.currentThread().interrupt(); + } finally { + done.countDown(); + } + }); + } + start.countDown(); + assertTrue("all threads completed", done.await(30, TimeUnit.SECONDS)); + } finally { + pool.shutdownNow(); + } + + assertEquals( + "Single-flight: at most one sampler invocation runs at a time across the stampede", + 1, + maxInFlight.get()); + // We deliberately don't assert an exact stampedeInvocations count here. Single-flight + // guarantees at-most-one *concurrent* sampler invocation; under load a thread that arrives + // after the winning thread releases the CAS legitimately runs the sampler again. Asserting + // the count would make the test timing-sensitive on slow CI shards. The maxInFlight==1 + // assertion above captures the actual invariant. + int stampedeInvocations = samplerInvocations.get() - primed; + assertTrue( + "Sanity: stampede produced at least one sampler invocation (saw " + + stampedeInvocations + + ")", + stampedeInvocations >= 1); + } +} diff --git a/solr/solr-ref-guide/modules/deployment-guide/pages/circuit-breakers.adoc b/solr/solr-ref-guide/modules/deployment-guide/pages/circuit-breakers.adoc index e8c4b5b115cc..70eb07a85170 100644 --- a/solr/solr-ref-guide/modules/deployment-guide/pages/circuit-breakers.adoc +++ b/solr/solr-ref-guide/modules/deployment-guide/pages/circuit-breakers.adoc @@ -71,6 +71,16 @@ HTTP error code from circuit breakers is configurable with java system property This circuit breaker tracks JVM heap memory usage and rejects incoming requests with a 429 error code if the heap usage exceeds a configured percentage of maximum heap allocated to the JVM (-Xmx). The main configuration for this circuit breaker is controlling the threshold percentage at which the breaker will trip. +The heap-usage signal is read from `MemoryPoolMXBean.getCollectionUsage()` on the old/tenured generation pool — that is, JVM heap usage *immediately after* the most recent collection. +This is the only heap reading that distinguishes live data from garbage waiting to be reclaimed; on a generational collector, raw `used` heap climbs toward `max` between collections during normal operation, which is the steady-state shape of a healthy JVM rather than a problem worth tripping on. + +For non-generational collectors (non-generational ZGC, Shenandoah) where the JVM exposes a single combined heap pool, the breaker sums `getCollectionUsage()` across all `HEAP`-typed pools instead. + +[NOTE] +==== +Until the JVM has performed at least one collection on a heap pool, `getCollectionUsage()` may return `null` on every pool and the breaker will report `0` live bytes — meaning it will not trip. On a freshly started node with a small workload this can take some time, particularly under ZGC or Shenandoah. +==== + To enable and configure the JVM heap usage based circuit breaker, add the following: .Per collection in `solrconfig.xml` @@ -155,6 +165,11 @@ SOLR_CIRCUITBREAKER_QUERY_LOADAVG=8.0 The triggering threshold is a floating point number matching load average. The example circuit breaker above will trip when the load average is equal to or greater than 8.0. +[NOTE] +==== +The OS load average is itself a moving average (typically over one minute), so this breaker caches the sampled value for a short TTL and shares it across concurrent requests; under high QPS the underlying `getSystemLoadAverage()` syscall is invoked at most once per TTL window. The TTL defaults to 1000 ms and is configurable per-process with the `solr.circuitbreaker.sample.ttl.ms` system property (or the equivalent `SOLR_CIRCUITBREAKER_SAMPLE_TTL_MS` environment variable). The TTL is read once when each breaker is constructed, so changes apply to subsequently-created breakers, not to instances already in use. The same TTL is also applied to the JVM Heap Usage breaker's post-GC sample. +==== + [NOTE] ==== The System Load Average Circuit breaker behavior is dependent on the operating system, and may not work on some operating systems like Microsoft Windows. See https://docs.oracle.com/en/java/javase/17/docs/api/java.management/java/lang/management/OperatingSystemMXBean.html#getSystemLoadAverage()[JavaDoc] for more.