diff --git a/.github/scripts/dependency_age.py b/.github/scripts/dependency_age.py
index 62e27422e2e..9c0a77a8bc2 100644
--- a/.github/scripts/dependency_age.py
+++ b/.github/scripts/dependency_age.py
@@ -452,8 +452,10 @@ def build_validation_summary(
             if old_gav not in seen:
                 seen.add(old_gav)
                 if new_gav in baseline_coords:
-                    continue  # no-op downgrade — replacement matches baseline
-                lines.append(f"- `{old_gav}` is {hours_remaining}h away from meeting {min_age_hours}h cooldown, updated to `{new_gav}`")
+                    lines.append(f"- `{old_gav}` is {hours_remaining}h away from meeting {min_age_hours}h cooldown, reverted")
+                else:
+                    new_version = new_gav.rsplit(":", 1)[1]
+                    lines.append(f"- `{old_gav}` is {hours_remaining}h away from meeting {min_age_hours}h cooldown, updated to `{new_version}`")
     for entries in violations_by_file.values():
         for gav, kind, hours_remaining in entries:
             if gav not in seen:
@@ -462,8 +464,6 @@ def build_validation_summary(
                     lines.append(f"- `{gav}` — cannot verify age, reverted")
                 else:
                     lines.append(f"- `{gav}` is {hours_remaining}h away from meeting {min_age_hours}h cooldown, reverted")
-    if len(lines) == 2:
-        return "" # only header, no entries after filtering
     return "\n".join(lines)
 
 
diff --git a/.github/workflows/analyze-changes.yaml b/.github/workflows/analyze-changes.yaml
index 8622431686a..51f652b2b4b 100644
--- a/.github/workflows/analyze-changes.yaml
+++ b/.github/workflows/analyze-changes.yaml
@@ -30,7 +30,7 @@ jobs:
             ${{ runner.os }}-gradle-
         
       - name: Initialize CodeQL
-        uses: github/codeql-action/init@9e0d7b8d25671d64c341c19c0152d693099fb5ba # v4.35.5
+        uses: github/codeql-action/init@7211b7c8077ea37d8641b6271f6a365a22a5fbfa # v4.36.0
         with:
           languages: 'java'
           build-mode: 'manual'
@@ -43,7 +43,7 @@ jobs:
           ./gradlew clean :dd-java-agent:shadowJar --build-cache --parallel --stacktrace --no-daemon --max-workers=4
 
       - name: Perform CodeQL Analysis and upload results to GitHub Security tab
-        uses: github/codeql-action/analyze@9e0d7b8d25671d64c341c19c0152d693099fb5ba # v4.35.5
+        uses: github/codeql-action/analyze@7211b7c8077ea37d8641b6271f6a365a22a5fbfa # v4.36.0
 
   trivy:
     name: Analyze changes with Trivy
@@ -102,7 +102,7 @@ jobs:
           TRIVY_JAVA_DB_REPOSITORY: ghcr.io/aquasecurity/trivy-java-db,public.ecr.aws/aquasecurity/trivy-java-db
 
       - name: Upload Trivy scan results to GitHub Security tab
-        uses: github/codeql-action/upload-sarif@9e0d7b8d25671d64c341c19c0152d693099fb5ba # v4.35.5
+        uses: github/codeql-action/upload-sarif@7211b7c8077ea37d8641b6271f6a365a22a5fbfa # v4.36.0
         if: always()
         with:
           sarif_file: 'trivy-results.sarif'
diff --git a/.github/workflows/prune-old-pull-requests.yaml b/.github/workflows/prune-old-pull-requests.yaml
index e5633a39415..db92774ea0c 100644
--- a/.github/workflows/prune-old-pull-requests.yaml
+++ b/.github/workflows/prune-old-pull-requests.yaml
@@ -13,7 +13,7 @@ jobs:
       pull-requests: write
     steps:
     - name: Prune old pull requests
-      uses: actions/stale@b5d41d4e1d5dceea10e7104786b73624c18a190f # v10.2.0
+      uses: actions/stale@eb5cf3af3ac0a1aa4c9c45633dd1ae542a27a899 # v10.3.0
       with:
         days-before-stale: -1 # Disable general stale bot
         days-before-pr-stale: 90 # Only enable stale bot for PRs with no activity for 90 days
diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 8f19a760870..4081c4b7123 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -161,8 +161,8 @@ default:
     fi
 
     echo -e "${TEXT_BOLD}${TEXT_YELLOW}Runner dashboard, these are live (limited to a 1-hour window, and expire after 36 hours)${TEXT_CLEAR}"
-    echo -e "${TEXT_BOLD}${TEXT_YELLOW}   Containers:${TEXT_CLEAR} https://app.datadoghq.com/containers?${TIME_PARAMS}query=image_name%3A%2A%2Fdatadog%2Fdd-trace-java-docker-build%20AND%20pod_name%3A${POD_NAME}&live=false"
-    echo -e "${TEXT_BOLD}${TEXT_YELLOW}   Processes:${TEXT_CLEAR} https://app.datadoghq.com/process?${TIME_PARAMS}query=image_name%3A%2A%2Fdatadog%2Fdd-trace-java-docker-build%20AND%20pod_name%3A${POD_NAME}&live=false"
+    echo -e "${TEXT_BOLD}${TEXT_YELLOW}   Containers:${TEXT_CLEAR} https://app.datadoghq.com/containers?${TIME_PARAMS}query=image_name%3A%2A%2Fdd-trace-java-docker-build%20AND%20pod_name%3A${POD_NAME}&live=false"
+    echo -e "${TEXT_BOLD}${TEXT_YELLOW}   Processes:${TEXT_CLEAR} https://app.datadoghq.com/process?${TIME_PARAMS}query=image_name%3A%2A%2Fdd-trace-java-docker-build%20AND%20pod_name%3A${POD_NAME}&live=false"
 
 .gitlab_base_ref_params: &gitlab_base_ref_params
   - |
@@ -548,7 +548,15 @@ check_debugger:
 
 muzzle:
   extends: .gradle_build
-  needs: [ build_tests ]
+  # needs:parallel:matrix limits this job to a specific build_tests combination.
+  # Keep matrix vars exact and in build_tests declaration order:
+  # https://docs.gitlab.com/ci/yaml/#needsparallelmatrix
+  needs: &needs_build_tests_inst
+    - job: build_tests
+      parallel:
+        matrix:
+          - GRADLE_TARGET: ":instrumentationTest"
+            CACHE_TYPE: "inst"
   stage: tests
   rules:
     - if: '$CI_COMMIT_BRANCH =~ /^mq-working-branch-/'
@@ -586,7 +594,7 @@ muzzle:
 
 muzzle-dep-report:
   extends: .gradle_build
-  needs: [ build_tests ]
+  needs: *needs_build_tests_inst
   stage: tests
   rules:
     - if: '$CI_COMMIT_BRANCH =~ /^mq-working-branch-/'
@@ -737,6 +745,15 @@ agent_integration_tests:
 
 test_base:
   extends: .test_job
+  # needs:parallel:matrix limits this job to a specific build_tests combination.
+  # Keep matrix vars exact and in build_tests declaration order:
+  # https://docs.gitlab.com/ci/yaml/#needsparallelmatrix
+  needs:
+    - job: build_tests
+      parallel:
+        matrix:
+          - GRADLE_TARGET: ":baseTest"
+            CACHE_TYPE: "base"
   variables:
     GRADLE_TARGET: ":baseTest"
     CACHE_TYPE: "base"
@@ -748,6 +765,7 @@ test_base:
 
 test_inst:
   extends: .test_job_with_test_agent
+  needs: *needs_build_tests_inst
   variables:
     GRADLE_TARGET: ":instrumentationTest"
     CACHE_TYPE: "inst"
@@ -756,6 +774,15 @@ test_inst:
 
 test_inst_latest:
   extends: .test_job_with_test_agent
+  # needs:parallel:matrix limits this job to a specific build_tests combination.
+  # Keep matrix vars exact and in build_tests declaration order:
+  # https://docs.gitlab.com/ci/yaml/#needsparallelmatrix
+  needs:
+    - job: build_tests
+      parallel:
+        matrix:
+          - GRADLE_TARGET: ":instrumentationLatestDepTest"
+            CACHE_TYPE: "latestdep"
   variables:
     GRADLE_TARGET: ":instrumentationLatestDepTest"
     CACHE_TYPE: "latestdep"
@@ -786,6 +813,7 @@ test_flaky:
 
 test_flaky_inst:
   extends: .test_job
+  needs: *needs_build_tests_inst
   variables:
     GRADLE_TARGET: ":instrumentationTest"
     GRADLE_PARAMS: "-PrunFlakyTests"
@@ -800,6 +828,15 @@ test_flaky_inst:
 
 test_profiling:
   extends: .test_job
+  # needs:parallel:matrix limits this job to a specific build_tests combination.
+  # Keep matrix vars exact and in build_tests declaration order:
+  # https://docs.gitlab.com/ci/yaml/#needsparallelmatrix
+  needs:
+    - job: build_tests
+      parallel:
+        matrix:
+          - GRADLE_TARGET: ":profilingTest"
+            CACHE_TYPE: "profiling"
   variables:
     GRADLE_TARGET: ":profilingTest"
     CACHE_TYPE: "profiling"
@@ -819,6 +856,16 @@ test_debugger:
 
 test_smoke:
   extends: .test_job
+  # needs:parallel:matrix limits this job to a specific build_tests combination.
+  # Keep matrix vars exact and in build_tests declaration order:
+  # https://docs.gitlab.com/ci/yaml/#needsparallelmatrix
+  needs: &needs_build_tests_smoke
+    - job: build_tests
+      parallel:
+        matrix:
+          - GRADLE_TARGET: ":smokeTest"
+            CACHE_TYPE: "smoke"
+            MAVEN_OPTS: "-Xms256M -Xmx1024M"
   variables:
     GRADLE_TARGET: "stageMainDist :smokeTest"
     GRADLE_PARAMS: "-PskipFlakyTests"
@@ -828,6 +875,7 @@ test_smoke:
 
 test_ssi_smoke:
   extends: .test_job
+  needs: *needs_build_tests_smoke
   rules:
     - if: $CI_COMMIT_BRANCH == "master"
       when: on_success
@@ -845,6 +893,7 @@ test_ssi_smoke:
 
 test_smoke_graalvm:
   extends: .test_job
+  needs: *needs_build_tests_smoke
   tags: [ "arch:amd64" ]
   variables:
     GRADLE_TARGET: "stageMainDist :dd-smoke-test:spring-boot-3.0-native:test"
@@ -857,6 +906,7 @@ test_smoke_graalvm:
 
 test_smoke_semeru8_debugger:
   extends: .test_job
+  needs: *needs_build_tests_smoke
   tags: [ "arch:amd64" ]
   variables:
     GRADLE_TARGET: "stageMainDist dd-smoke-tests:debugger-integration-tests:test"
diff --git a/communication/src/main/java/datadog/communication/serialization/FlushingBuffer.java b/communication/src/main/java/datadog/communication/serialization/FlushingBuffer.java
index 332434ad46a..e3087bd5f27 100644
--- a/communication/src/main/java/datadog/communication/serialization/FlushingBuffer.java
+++ b/communication/src/main/java/datadog/communication/serialization/FlushingBuffer.java
@@ -1,5 +1,6 @@
 package datadog.communication.serialization;
 
+import datadog.trace.api.internal.VisibleForTesting;
 import java.nio.ByteBuffer;
 
 public final class FlushingBuffer implements StreamingBuffer {
@@ -106,7 +107,7 @@ public void reset() {
     mark = 0;
   }
 
-  // for tests only
+  @VisibleForTesting
   int getMessageCount() {
     return messageCount;
   }
diff --git a/components/annotations/build.gradle.kts b/components/annotations/build.gradle.kts
new file mode 100644
index 00000000000..b8314ee7f33
--- /dev/null
+++ b/components/annotations/build.gradle.kts
@@ -0,0 +1 @@
+apply(from = "$rootDir/gradle/java.gradle")
diff --git a/components/annotations/src/main/java/datadog/trace/api/internal/VisibleForTesting.java b/components/annotations/src/main/java/datadog/trace/api/internal/VisibleForTesting.java
new file mode 100644
index 00000000000..60e3cd2c33a
--- /dev/null
+++ b/components/annotations/src/main/java/datadog/trace/api/internal/VisibleForTesting.java
@@ -0,0 +1,10 @@
+package datadog.trace.api.internal;
+
+import java.lang.annotation.ElementType;
+import java.lang.annotation.Retention;
+import java.lang.annotation.RetentionPolicy;
+import java.lang.annotation.Target;
+
+@Retention(RetentionPolicy.SOURCE)
+@Target({ElementType.CONSTRUCTOR, ElementType.FIELD, ElementType.METHOD, ElementType.TYPE})
+public @interface VisibleForTesting {}
diff --git a/components/environment/build.gradle.kts b/components/environment/build.gradle.kts
index 7818fef2463..82c834d5ef3 100644
--- a/components/environment/build.gradle.kts
+++ b/components/environment/build.gradle.kts
@@ -5,6 +5,10 @@ plugins {
 
 apply(from = "$rootDir/gradle/java.gradle")
 
+dependencies {
+  compileOnly(project(":components:annotations"))
+}
+
 /*
  * Add an addition gradle configuration to be consumed by bootstrap only.
  * "datadog.trace." prefix is required to be excluded from Jacoco instrumentation.
diff --git a/components/environment/src/main/java/datadog/environment/JavaVirtualMachine.java b/components/environment/src/main/java/datadog/environment/JavaVirtualMachine.java
index a71d66859bc..d6fe120c98b 100644
--- a/components/environment/src/main/java/datadog/environment/JavaVirtualMachine.java
+++ b/components/environment/src/main/java/datadog/environment/JavaVirtualMachine.java
@@ -1,5 +1,6 @@
 package datadog.environment;
 
+import datadog.trace.api.internal.VisibleForTesting;
 import java.util.List;
 import java.util.Locale;
 import javax.annotation.Nullable;
@@ -212,7 +213,7 @@ public Runtime() {
           SystemProperties.get("java.vendor.version"));
     }
 
-    // Only visible for testing
+    @VisibleForTesting
     Runtime(String javaVer, String rtVer, String name, String vendor, String vendorVersion) {
       this.name = name == null ? "" : name;
       this.vendor = vendor == null ? "" : vendor;
diff --git a/components/environment/src/main/java/datadog/environment/JvmOptions.java b/components/environment/src/main/java/datadog/environment/JvmOptions.java
index ef549bc4d73..7dd77e29ff9 100644
--- a/components/environment/src/main/java/datadog/environment/JvmOptions.java
+++ b/components/environment/src/main/java/datadog/environment/JvmOptions.java
@@ -5,6 +5,7 @@
 import static java.util.Collections.emptyList;
 import static java.util.Collections.singletonList;
 
+import datadog.trace.api.internal.VisibleForTesting;
 import de.thetaphi.forbiddenapis.SuppressForbidden;
 import java.io.BufferedReader;
 import java.io.IOException;
@@ -93,7 +94,7 @@ private List<String> findVmOptions() {
 
   // Be aware that when running a native image, the command line in /proc/self/cmdline is just the
   // executable
-  // Visible for testing
+  @VisibleForTesting
   List<String> findVmOptionsFromProcFs(String[] procfsCmdline) {
     // Create the list of VM options
     List<String> vmOptions = new ArrayList<>();
diff --git a/dd-java-agent/agent-bootstrap/src/main/java/datadog/trace/bootstrap/InstrumentationErrors.java b/dd-java-agent/agent-bootstrap/src/main/java/datadog/trace/bootstrap/InstrumentationErrors.java
index c18534a73e9..b2bc1702805 100644
--- a/dd-java-agent/agent-bootstrap/src/main/java/datadog/trace/bootstrap/InstrumentationErrors.java
+++ b/dd-java-agent/agent-bootstrap/src/main/java/datadog/trace/bootstrap/InstrumentationErrors.java
@@ -1,5 +1,6 @@
 package datadog.trace.bootstrap;
 
+import datadog.trace.api.internal.VisibleForTesting;
 import java.io.PrintWriter;
 import java.io.StringWriter;
 import java.util.List;
@@ -30,7 +31,7 @@ public static Throwable recordError(Throwable error) {
     return error; // keep throwable at top of the stack
   }
 
-  // Visible for testing
+  @VisibleForTesting
   public static void resetErrors() {
     COUNTER.set(0);
     if (detailed) {
diff --git a/dd-java-agent/agent-bootstrap/src/main/java/datadog/trace/bootstrap/WeakMapContextStore.java b/dd-java-agent/agent-bootstrap/src/main/java/datadog/trace/bootstrap/WeakMapContextStore.java
index a3926412889..1750b7f2b9c 100644
--- a/dd-java-agent/agent-bootstrap/src/main/java/datadog/trace/bootstrap/WeakMapContextStore.java
+++ b/dd-java-agent/agent-bootstrap/src/main/java/datadog/trace/bootstrap/WeakMapContextStore.java
@@ -1,5 +1,7 @@
 package datadog.trace.bootstrap;
 
+import datadog.trace.api.internal.VisibleForTesting;
+
 /**
  * Weak {@link ContextStore} that acts as a fall-back when field-injection isn't possible.
  *
@@ -85,7 +87,7 @@ public V remove(final K key) {
     return (V) map.remove(key);
   }
 
-  // Package reachable for testing
+  @VisibleForTesting
   int size() {
     return map.size();
   }
diff --git a/dd-java-agent/agent-bootstrap/src/main/java/datadog/trace/bootstrap/blocking/BlockingActionHelper.java b/dd-java-agent/agent-bootstrap/src/main/java/datadog/trace/bootstrap/blocking/BlockingActionHelper.java
index fbe5e150367..e72a86a7098 100644
--- a/dd-java-agent/agent-bootstrap/src/main/java/datadog/trace/bootstrap/blocking/BlockingActionHelper.java
+++ b/dd-java-agent/agent-bootstrap/src/main/java/datadog/trace/bootstrap/blocking/BlockingActionHelper.java
@@ -6,6 +6,7 @@
 
 import datadog.appsec.api.blocking.BlockingContentType;
 import datadog.trace.api.Config;
+import datadog.trace.api.internal.VisibleForTesting;
 import java.io.ByteArrayOutputStream;
 import java.io.File;
 import java.io.FileInputStream;
@@ -187,7 +188,7 @@ private static String nextMediaRange(String s, int[] pos, float[] quality) {
     return mediaRangeMatcher.group(1);
   }
 
-  // public for testing
+  @VisibleForTesting
   public static void reset(Config config) {
     TEMPLATE_HTML = null;
     TEMPLATE_JSON = null;
diff --git a/dd-java-agent/agent-bootstrap/src/main/java/datadog/trace/bootstrap/instrumentation/dbm/SharedDBCommenter.java b/dd-java-agent/agent-bootstrap/src/main/java/datadog/trace/bootstrap/instrumentation/dbm/SharedDBCommenter.java
index 161e2f8ad11..3dbf916362c 100644
--- a/dd-java-agent/agent-bootstrap/src/main/java/datadog/trace/bootstrap/instrumentation/dbm/SharedDBCommenter.java
+++ b/dd-java-agent/agent-bootstrap/src/main/java/datadog/trace/bootstrap/instrumentation/dbm/SharedDBCommenter.java
@@ -4,6 +4,7 @@
 
 import datadog.trace.api.BaseHash;
 import datadog.trace.api.Config;
+import datadog.trace.api.internal.VisibleForTesting;
 import datadog.trace.bootstrap.instrumentation.api.AgentSpan;
 import datadog.trace.bootstrap.instrumentation.api.Tags;
 import java.io.UnsupportedEncodingException;
@@ -88,7 +89,7 @@ private static void ensureStaticPrefixComputed() {
     staticPrefixComputed = true;
   }
 
-  // @VisibleForTesting
+  @VisibleForTesting
   public static void resetStaticPrefixForTesting() {
     staticPrefixComputed = false;
   }
diff --git a/dd-java-agent/agent-crashtracking/src/main/java/datadog/crashtracking/ConfigManager.java b/dd-java-agent/agent-crashtracking/src/main/java/datadog/crashtracking/ConfigManager.java
index 02c10016d83..99e6235a6b2 100644
--- a/dd-java-agent/agent-crashtracking/src/main/java/datadog/crashtracking/ConfigManager.java
+++ b/dd-java-agent/agent-crashtracking/src/main/java/datadog/crashtracking/ConfigManager.java
@@ -8,6 +8,7 @@
 import datadog.trace.api.Config;
 import datadog.trace.api.ProcessTags;
 import datadog.trace.api.WellKnownTags;
+import datadog.trace.api.internal.VisibleForTesting;
 import datadog.trace.util.PidHelper;
 import datadog.trace.util.RandomUtils;
 import java.io.BufferedReader;
@@ -137,7 +138,7 @@ public Builder extendedInfoEnabled(boolean extendedInfoEnabled) {
         return this;
       }
 
-      // @VisibleForTesting
+      @VisibleForTesting
       Builder reportUUID(String reportUUID) {
         this.reportUUID = reportUUID;
         return this;
@@ -170,7 +171,7 @@ private static String getBaseName(File file) {
     return filename.substring(0, dotIndex);
   }
 
-  // @VisibleForTesting
+  @VisibleForTesting
   static String getMergedTagsForSerialization(Config config) {
     return config.getMergedCrashTrackingTags().entrySet().stream()
         .filter(e -> e.getValue() != null)
@@ -195,7 +196,7 @@ public static void writeConfigToPath(File scriptFile, String... additionalEntrie
     writeConfigToFile(Config.get(), cfgFile, additionalEntries);
   }
 
-  // @VisibleForTesting
+  @VisibleForTesting
   static void writeConfigToFile(Config config, File cfgFile, String... additionalEntries) {
     final WellKnownTags wellKnownTags = config.getWellKnownTags();
 
diff --git a/dd-java-agent/agent-crashtracking/src/main/java/datadog/crashtracking/CrashUploader.java b/dd-java-agent/agent-crashtracking/src/main/java/datadog/crashtracking/CrashUploader.java
index 76fd0e45f8c..5c37b7c5908 100644
--- a/dd-java-agent/agent-crashtracking/src/main/java/datadog/crashtracking/CrashUploader.java
+++ b/dd-java-agent/agent-crashtracking/src/main/java/datadog/crashtracking/CrashUploader.java
@@ -24,6 +24,7 @@
 import datadog.environment.SystemProperties;
 import datadog.trace.api.Config;
 import datadog.trace.api.DDTags;
+import datadog.trace.api.internal.VisibleForTesting;
 import datadog.trace.bootstrap.config.provider.ConfigProvider;
 import datadog.trace.util.AgentThreadFactory;
 import datadog.trace.util.PidHelper;
@@ -187,7 +188,7 @@ public void notifyCrashStarted(String error) {
     }
   }
 
-  // @VisibleForTesting
+  @VisibleForTesting
   void sendPingToTelemetry(String error) {
     // send a ping message to the telemetry to notify that the crash report started
     try (Buffer buf = new Buffer();
@@ -207,7 +208,7 @@ void sendPingToTelemetry(String error) {
     }
   }
 
-  // @VisibleForTesting
+  @VisibleForTesting
   void sendPingToErrorTracking(String error) {
     try {
       final CrashLog ping =
@@ -253,7 +254,7 @@ public void upload(@Nonnull Path file) {
     }
   }
 
-  // @VisibleForTesting
+  @VisibleForTesting
   void remoteUpload(
       @Nonnull String fileContent, boolean sendToTelemetry, boolean sendToErrorTracking) {
     final String uuid = storedConfig.reportUUID;
@@ -316,7 +317,7 @@ void uploadToLogs(@Nonnull String message, @Nonnull PrintStream out) throws IOEx
     }
   }
 
-  // @VisibleForTesting
+  @VisibleForTesting
   @SuppressForbidden
   static String extractErrorKind(String fileContent) {
     Matcher matcher = ERROR_MESSAGE_PATTERN.matcher(fileContent);
@@ -348,7 +349,7 @@ static String extractErrorKind(String fileContent) {
               "$"),
           Pattern.DOTALL | Pattern.MULTILINE);
 
-  // @VisibleForTesting
+  @VisibleForTesting
   @SuppressForbidden
   static String extractErrorMessage(String fileContent) {
     Matcher matcher = ERROR_MESSAGE_PATTERN.matcher(fileContent);
diff --git a/dd-java-agent/agent-crashtracking/src/main/java/datadog/crashtracking/CrashUploaderScriptInitializer.java b/dd-java-agent/agent-crashtracking/src/main/java/datadog/crashtracking/CrashUploaderScriptInitializer.java
index 0e810694be1..5677d10f220 100644
--- a/dd-java-agent/agent-crashtracking/src/main/java/datadog/crashtracking/CrashUploaderScriptInitializer.java
+++ b/dd-java-agent/agent-crashtracking/src/main/java/datadog/crashtracking/CrashUploaderScriptInitializer.java
@@ -8,6 +8,7 @@
 import static java.util.Locale.ROOT;
 
 import datadog.environment.SystemProperties;
+import datadog.trace.api.internal.VisibleForTesting;
 import datadog.trace.util.PidHelper;
 import datadog.trace.util.Strings;
 import java.io.BufferedReader;
@@ -25,12 +26,12 @@ public final class CrashUploaderScriptInitializer {
 
   private CrashUploaderScriptInitializer() {}
 
-  // @VisibleForTests
+  @VisibleForTesting
   static void initialize(String onErrorVal, String onErrorFile) {
     initialize(onErrorVal, onErrorFile, null);
   }
 
-  // @VisibleForTests
+  @VisibleForTesting
   static void initialize(String onErrorVal, String onErrorFile, String javacorePath) {
     if (onErrorVal == null || onErrorVal.isEmpty()) {
       LOG.debug(
diff --git a/dd-java-agent/agent-crashtracking/src/main/java/datadog/crashtracking/OOMENotifierScriptInitializer.java b/dd-java-agent/agent-crashtracking/src/main/java/datadog/crashtracking/OOMENotifierScriptInitializer.java
index c558d3a83a5..5d37b975b98 100644
--- a/dd-java-agent/agent-crashtracking/src/main/java/datadog/crashtracking/OOMENotifierScriptInitializer.java
+++ b/dd-java-agent/agent-crashtracking/src/main/java/datadog/crashtracking/OOMENotifierScriptInitializer.java
@@ -8,6 +8,7 @@
 import static datadog.crashtracking.Initializer.pidFromSpecialFileName;
 import static datadog.trace.api.telemetry.LogCollector.SEND_TELEMETRY;
 
+import datadog.trace.api.internal.VisibleForTesting;
 import datadog.trace.util.PidHelper;
 import java.io.File;
 import java.io.FileOutputStream;
@@ -20,7 +21,7 @@ public final class OOMENotifierScriptInitializer {
 
   private OOMENotifierScriptInitializer() {}
 
-  // @VisibleForTests
+  @VisibleForTesting
   static void initialize(String onOutOfMemoryVal) {
     if (onOutOfMemoryVal == null || onOutOfMemoryVal.isEmpty()) {
       LOG.debug(
diff --git a/dd-java-agent/agent-crashtracking/src/main/java/datadog/crashtracking/parsers/RedactUtils.java b/dd-java-agent/agent-crashtracking/src/main/java/datadog/crashtracking/parsers/RedactUtils.java
index 7be12817525..6665d0f412d 100644
--- a/dd-java-agent/agent-crashtracking/src/main/java/datadog/crashtracking/parsers/RedactUtils.java
+++ b/dd-java-agent/agent-crashtracking/src/main/java/datadog/crashtracking/parsers/RedactUtils.java
@@ -1,5 +1,6 @@
 package datadog.crashtracking.parsers;
 
+import datadog.trace.api.internal.VisibleForTesting;
 import de.thetaphi.forbiddenapis.SuppressForbidden;
 import java.util.function.Function;
 import java.util.regex.Matcher;
@@ -215,7 +216,7 @@ static String redactNmethodClass(String line) {
    * java.lang.Class} oop) use {@link #redactRegisterToMemoryMapping} which detects the oop type
    * automatically.
    */
-  // @VisibleForTesting
+  @VisibleForTesting
   static String redactDottedClassOopRef(String line) {
     return redactStringOopRef(line, false);
   }
diff --git a/dd-java-agent/agent-debugger/src/main/java/com/datadog/debugger/agent/ConfigurationUpdater.java b/dd-java-agent/agent-debugger/src/main/java/com/datadog/debugger/agent/ConfigurationUpdater.java
index c392ab3c663..3caa2a149ab 100644
--- a/dd-java-agent/agent-debugger/src/main/java/com/datadog/debugger/agent/ConfigurationUpdater.java
+++ b/dd-java-agent/agent-debugger/src/main/java/com/datadog/debugger/agent/ConfigurationUpdater.java
@@ -15,6 +15,7 @@
 import datadog.environment.JavaVirtualMachine;
 import datadog.logging.RatelimitedLogger;
 import datadog.trace.api.Config;
+import datadog.trace.api.internal.VisibleForTesting;
 import datadog.trace.bootstrap.debugger.DebuggerContext;
 import datadog.trace.bootstrap.debugger.ProbeId;
 import datadog.trace.bootstrap.debugger.ProbeImplementation;
@@ -445,7 +446,7 @@ private void removeCurrentTransformer() {
     currentTransformer = null;
   }
 
-  // only visible for tests
+  @VisibleForTesting
   Map<String, ProbeDefinition> getAppliedDefinitions() {
     if (currentTransformer == null) {
       return Collections.emptyMap();
diff --git a/dd-java-agent/agent-debugger/src/main/java/com/datadog/debugger/sink/DebuggerSink.java b/dd-java-agent/agent-debugger/src/main/java/com/datadog/debugger/sink/DebuggerSink.java
index 5c91e110acb..becf47b0337 100644
--- a/dd-java-agent/agent-debugger/src/main/java/com/datadog/debugger/sink/DebuggerSink.java
+++ b/dd-java-agent/agent-debugger/src/main/java/com/datadog/debugger/sink/DebuggerSink.java
@@ -5,6 +5,7 @@
 import com.datadog.debugger.uploader.BatchUploader;
 import com.datadog.debugger.util.DebuggerMetrics;
 import datadog.trace.api.Config;
+import datadog.trace.api.internal.VisibleForTesting;
 import datadog.trace.bootstrap.debugger.DebuggerContext.SkipCause;
 import datadog.trace.bootstrap.debugger.ProbeId;
 import datadog.trace.util.AgentTaskScheduler;
@@ -159,7 +160,7 @@ private void lowRateReschedule() {
             TimeUnit.MILLISECONDS);
   }
 
-  // visible for testing
+  @VisibleForTesting
   void lowRateFlush(DebuggerSink ignored) {
     symbolSink.flush();
     probeStatusSink.flush(tags);
diff --git a/dd-java-agent/agent-debugger/src/main/java/com/datadog/debugger/uploader/BatchUploader.java b/dd-java-agent/agent-debugger/src/main/java/com/datadog/debugger/uploader/BatchUploader.java
index 63ff0ef8dcc..993d8194668 100644
--- a/dd-java-agent/agent-debugger/src/main/java/com/datadog/debugger/uploader/BatchUploader.java
+++ b/dd-java-agent/agent-debugger/src/main/java/com/datadog/debugger/uploader/BatchUploader.java
@@ -7,6 +7,7 @@
 import datadog.communication.http.OkHttpUtils;
 import datadog.logging.RatelimitedLogger;
 import datadog.trace.api.Config;
+import datadog.trace.api.internal.VisibleForTesting;
 import datadog.trace.util.AgentThreadFactory;
 import java.io.IOException;
 import java.time.Duration;
@@ -124,7 +125,7 @@ public BatchUploader(String name, Config config, String endpoint, RetryPolicy re
         ContainerInfo.getEntityId());
   }
 
-  // Visible for testing
+  @VisibleForTesting
   BatchUploader(
       String name,
       Config config,
@@ -220,10 +221,7 @@ private void doUpload(Runnable makeRequest) {
     }
   }
 
-  /**
-   * Note that this method is only visible for testing and should not be used from outside this
-   * class.
-   */
+  @VisibleForTesting
   OkHttpClient getClient() {
     return client;
   }
diff --git a/dd-java-agent/agent-logging/src/main/java/datadog/trace/logging/PrintStreamWrapper.java b/dd-java-agent/agent-logging/src/main/java/datadog/trace/logging/PrintStreamWrapper.java
index cd1e1b4a9c9..c71390a8e23 100644
--- a/dd-java-agent/agent-logging/src/main/java/datadog/trace/logging/PrintStreamWrapper.java
+++ b/dd-java-agent/agent-logging/src/main/java/datadog/trace/logging/PrintStreamWrapper.java
@@ -1,6 +1,7 @@
 package datadog.trace.logging;
 
 import datadog.environment.OperatingSystem;
+import datadog.trace.api.internal.VisibleForTesting;
 import java.io.FileNotFoundException;
 import java.io.FileOutputStream;
 import java.io.OutputStream;
@@ -17,7 +18,7 @@ public PrintStreamWrapper(PrintStream ps) {
     super(ps);
   }
 
-  // use for tests only
+  @VisibleForTesting
   public OutputStream getOriginalPrintStream() {
     return super.out;
   }
diff --git a/dd-java-agent/agent-otel/otel-shim/src/main/java/datadog/opentelemetry/shim/logs/OtelLogRecordBuilder.java b/dd-java-agent/agent-otel/otel-shim/src/main/java/datadog/opentelemetry/shim/logs/OtelLogRecordBuilder.java
index 93554fdc8c5..81cfb7c875e 100644
--- a/dd-java-agent/agent-otel/otel-shim/src/main/java/datadog/opentelemetry/shim/logs/OtelLogRecordBuilder.java
+++ b/dd-java-agent/agent-otel/otel-shim/src/main/java/datadog/opentelemetry/shim/logs/OtelLogRecordBuilder.java
@@ -3,6 +3,7 @@
 import static datadog.opentelemetry.shim.trace.OtelExtractedContext.extract;
 import static io.opentelemetry.api.common.AttributeKey.stringKey;
 
+import datadog.trace.api.internal.VisibleForTesting;
 import datadog.trace.api.time.SystemTimeSource;
 import datadog.trace.api.time.TimeSource;
 import datadog.trace.bootstrap.otel.logs.data.OtelLogRecordProcessor;
@@ -23,8 +24,7 @@
 
 @ParametersAreNonnullByDefault
 final class OtelLogRecordBuilder implements LogRecordBuilder {
-  // package-visible for testing
-  static TimeSource TIME_SOURCE = SystemTimeSource.INSTANCE;
+  @VisibleForTesting static TimeSource TIME_SOURCE = SystemTimeSource.INSTANCE;
 
   private static final AttributeKey<String> EXCEPTION_TYPE_KEY = stringKey("exception.type");
   private static final AttributeKey<String> EXCEPTION_MESSAGE_KEY = stringKey("exception.message");
diff --git a/dd-java-agent/agent-profiling/profiling-controller-ddprof/src/main/java/com/datadog/profiling/controller/ddprof/DatadogProfilerOngoingRecording.java b/dd-java-agent/agent-profiling/profiling-controller-ddprof/src/main/java/com/datadog/profiling/controller/ddprof/DatadogProfilerOngoingRecording.java
index 7976f7f81a9..26ffc13e972 100644
--- a/dd-java-agent/agent-profiling/profiling-controller-ddprof/src/main/java/com/datadog/profiling/controller/ddprof/DatadogProfilerOngoingRecording.java
+++ b/dd-java-agent/agent-profiling/profiling-controller-ddprof/src/main/java/com/datadog/profiling/controller/ddprof/DatadogProfilerOngoingRecording.java
@@ -20,6 +20,7 @@
 import com.datadog.profiling.controller.UnsupportedEnvironmentException;
 import com.datadog.profiling.ddprof.DatadogProfiler;
 import datadog.environment.JavaVirtualMachine;
+import datadog.trace.api.internal.VisibleForTesting;
 import datadog.trace.api.profiling.ProfilingSnapshot;
 import datadog.trace.api.profiling.RecordingData;
 import java.time.Instant;
@@ -52,7 +53,7 @@ public RecordingData stop() {
     return recording.stop();
   }
 
-  // @VisibleForTesting
+  @VisibleForTesting
   final RecordingData snapshot(final Instant start) {
     return snapshot(start, ProfilingSnapshot.Kind.PERIODIC);
   }
diff --git a/dd-java-agent/agent-profiling/profiling-controller-openjdk/src/main/java/com/datadog/profiling/controller/openjdk/OpenJdkOngoingRecording.java b/dd-java-agent/agent-profiling/profiling-controller-openjdk/src/main/java/com/datadog/profiling/controller/openjdk/OpenJdkOngoingRecording.java
index ddac86e663f..4b9d71e3a26 100644
--- a/dd-java-agent/agent-profiling/profiling-controller-openjdk/src/main/java/com/datadog/profiling/controller/openjdk/OpenJdkOngoingRecording.java
+++ b/dd-java-agent/agent-profiling/profiling-controller-openjdk/src/main/java/com/datadog/profiling/controller/openjdk/OpenJdkOngoingRecording.java
@@ -3,6 +3,7 @@
 import com.datadog.profiling.controller.ControllerContext;
 import com.datadog.profiling.controller.OngoingRecording;
 import com.datadog.profiling.utils.ProfilingMode;
+import datadog.trace.api.internal.VisibleForTesting;
 import datadog.trace.api.profiling.ProfilingSnapshot;
 import datadog.trace.api.profiling.RecordingData;
 import datadog.trace.bootstrap.config.provider.ConfigProvider;
@@ -135,7 +136,7 @@ public RecordingData stop() {
     return new OpenJdkRecordingData(recording, ProfilingSnapshot.Kind.PERIODIC);
   }
 
-  // @VisibleForTesting
+  @VisibleForTesting
   final RecordingData snapshot(@Nonnull final Instant start) {
     return snapshot(start, ProfilingSnapshot.Kind.PERIODIC);
   }
diff --git a/dd-java-agent/agent-profiling/profiling-controller-openjdk/src/main/java/com/datadog/profiling/controller/openjdk/OpenJdkRecordingData.java b/dd-java-agent/agent-profiling/profiling-controller-openjdk/src/main/java/com/datadog/profiling/controller/openjdk/OpenJdkRecordingData.java
index 6cedf96691b..da022c226ed 100644
--- a/dd-java-agent/agent-profiling/profiling-controller-openjdk/src/main/java/com/datadog/profiling/controller/openjdk/OpenJdkRecordingData.java
+++ b/dd-java-agent/agent-profiling/profiling-controller-openjdk/src/main/java/com/datadog/profiling/controller/openjdk/OpenJdkRecordingData.java
@@ -15,6 +15,7 @@
  */
 package com.datadog.profiling.controller.openjdk;
 
+import datadog.trace.api.internal.VisibleForTesting;
 import datadog.trace.api.profiling.RecordingData;
 import datadog.trace.api.profiling.RecordingInputStream;
 import java.io.IOException;
@@ -54,7 +55,7 @@ public String getName() {
     return recording.getName();
   }
 
-  // Visible for testing
+  @VisibleForTesting
   Recording getRecording() {
     return recording;
   }
diff --git a/dd-java-agent/agent-profiling/profiling-controller-openjdk/src/main/java/com/datadog/profiling/controller/openjdk/events/SmapEntryCache.java b/dd-java-agent/agent-profiling/profiling-controller-openjdk/src/main/java/com/datadog/profiling/controller/openjdk/events/SmapEntryCache.java
index 4b03fff0396..b890904a461 100644
--- a/dd-java-agent/agent-profiling/profiling-controller-openjdk/src/main/java/com/datadog/profiling/controller/openjdk/events/SmapEntryCache.java
+++ b/dd-java-agent/agent-profiling/profiling-controller-openjdk/src/main/java/com/datadog/profiling/controller/openjdk/events/SmapEntryCache.java
@@ -1,6 +1,7 @@
 package com.datadog.profiling.controller.openjdk.events;
 
 import datadog.environment.JavaVirtualMachine;
+import datadog.trace.api.internal.VisibleForTesting;
 import de.thetaphi.forbiddenapis.SuppressForbidden;
 import java.io.BufferedReader;
 import java.io.IOException;
@@ -60,7 +61,7 @@ static class AnnotatedRegion {
     this.smapsPath = smapsPath;
   }
 
-  // @VisibleForTesting
+  @VisibleForTesting
   void invalidate() {
     UPDATER.getAndSet(this, System.nanoTime() - (2 * ttl));
   }
@@ -81,7 +82,7 @@ public List<SmapEntryEvent> getEvents() {
     return (List<SmapEntryEvent>) events[index];
   }
 
-  // accessible for testing
+  @VisibleForTesting
   static AnnotatedRegion fromAnnotatedEntry(String line, int javaVersion) {
     boolean isRegion = line.startsWith("0x");
     if (isRegion) {
@@ -149,7 +150,7 @@ private static boolean isSmapHeader(String line) {
     return ((firstChar >= '0' && firstChar <= '9') || (firstChar >= 'a' && firstChar <= 'f'));
   }
 
-  // accessible for testing
+  @VisibleForTesting
   static void readEvents(BufferedReader br, List<SmapEntryEvent> events) throws IOException {
     String line = br.readLine();
     while (line != null) {
diff --git a/dd-java-agent/agent-profiling/profiling-controller-oracle/src/main/java/com/datadog/profiling/controller/oracle/OracleJdkOngoingRecording.java b/dd-java-agent/agent-profiling/profiling-controller-oracle/src/main/java/com/datadog/profiling/controller/oracle/OracleJdkOngoingRecording.java
index 18f104e4e6f..2edbc1a369e 100644
--- a/dd-java-agent/agent-profiling/profiling-controller-oracle/src/main/java/com/datadog/profiling/controller/oracle/OracleJdkOngoingRecording.java
+++ b/dd-java-agent/agent-profiling/profiling-controller-oracle/src/main/java/com/datadog/profiling/controller/oracle/OracleJdkOngoingRecording.java
@@ -1,6 +1,7 @@
 package com.datadog.profiling.controller.oracle;
 
 import com.datadog.profiling.controller.OngoingRecording;
+import datadog.trace.api.internal.VisibleForTesting;
 import datadog.trace.api.profiling.ProfilingSnapshot;
 import java.io.IOException;
 import java.time.Duration;
@@ -52,7 +53,7 @@ public OracleJdkRecordingData stop() {
     }
   }
 
-  // @VisibleForTesting
+  @VisibleForTesting
   final OracleJdkRecordingData snapshot(@Nonnull final Instant start) {
     return snapshot(start, ProfilingSnapshot.Kind.PERIODIC);
   }
diff --git a/dd-java-agent/agent-profiling/profiling-controller/src/main/java/com/datadog/profiling/controller/ProfilingSystem.java b/dd-java-agent/agent-profiling/profiling-controller/src/main/java/com/datadog/profiling/controller/ProfilingSystem.java
index 7f57b356d99..bfb3df0f333 100644
--- a/dd-java-agent/agent-profiling/profiling-controller/src/main/java/com/datadog/profiling/controller/ProfilingSystem.java
+++ b/dd-java-agent/agent-profiling/profiling-controller/src/main/java/com/datadog/profiling/controller/ProfilingSystem.java
@@ -22,6 +22,7 @@
 import static datadog.trace.util.AgentThreadFactory.AgentThread.PROFILER_RECORDING_SCHEDULER;
 
 import datadog.environment.JavaVirtualMachine;
+import datadog.trace.api.internal.VisibleForTesting;
 import datadog.trace.api.profiling.ProfilerFlareLogger;
 import datadog.trace.api.profiling.ProfilingSnapshot;
 import datadog.trace.api.profiling.RecordingData;
@@ -238,7 +239,7 @@ public boolean isStarted() {
     return started;
   }
 
-  /** VisibleForTesting */
+  @VisibleForTesting
   final Duration getStartupDelay() {
     return startupDelay;
   }
diff --git a/dd-java-agent/agent-profiling/profiling-ddprof/src/main/java/com/datadog/profiling/ddprof/DatadogProfiler.java b/dd-java-agent/agent-profiling/profiling-ddprof/src/main/java/com/datadog/profiling/ddprof/DatadogProfiler.java
index b1e07b08c32..9aedde9e49f 100644
--- a/dd-java-agent/agent-profiling/profiling-ddprof/src/main/java/com/datadog/profiling/ddprof/DatadogProfiler.java
+++ b/dd-java-agent/agent-profiling/profiling-ddprof/src/main/java/com/datadog/profiling/ddprof/DatadogProfiler.java
@@ -38,6 +38,7 @@
 import datadog.environment.JavaVirtualMachine;
 import datadog.libs.ddprof.DdprofLibraryLoader;
 import datadog.trace.api.config.ProfilingConfig;
+import datadog.trace.api.internal.VisibleForTesting;
 import datadog.trace.api.profiling.RecordingData;
 import datadog.trace.bootstrap.config.provider.ConfigProvider;
 import datadog.trace.bootstrap.instrumentation.api.TaskWrapper;
@@ -115,7 +116,7 @@ private DatadogProfiler(ConfigProvider configProvider) {
     this(configProvider, getContextAttributes(configProvider));
   }
 
-  // visible for testing
+  @VisibleForTesting
   DatadogProfiler(ConfigProvider configProvider, Set<String> contextAttributes) {
     this.configProvider = configProvider;
     this.profiler = DdprofLibraryLoader.javaProfiler().getComponent();
diff --git a/dd-java-agent/agent-profiling/profiling-ddprof/src/main/java/com/datadog/profiling/ddprof/DatadogProfilerRecording.java b/dd-java-agent/agent-profiling/profiling-ddprof/src/main/java/com/datadog/profiling/ddprof/DatadogProfilerRecording.java
index 8154a8a444e..82bf3d5b868 100644
--- a/dd-java-agent/agent-profiling/profiling-ddprof/src/main/java/com/datadog/profiling/ddprof/DatadogProfilerRecording.java
+++ b/dd-java-agent/agent-profiling/profiling-ddprof/src/main/java/com/datadog/profiling/ddprof/DatadogProfilerRecording.java
@@ -1,6 +1,7 @@
 package com.datadog.profiling.ddprof;
 
 import com.datadog.profiling.controller.OngoingRecording;
+import datadog.trace.api.internal.VisibleForTesting;
 import datadog.trace.api.profiling.ProfilingSnapshot;
 import datadog.trace.api.profiling.RecordingData;
 import java.io.IOException;
@@ -37,7 +38,7 @@ public RecordingData stop() {
         recordingFile, started, Instant.now(), ProfilingSnapshot.Kind.ON_SHUTDOWN);
   }
 
-  // @VisibleForTesting
+  @VisibleForTesting
   final RecordingData snapshot(@Nonnull Instant start) {
     return snapshot(start, ProfilingSnapshot.Kind.PERIODIC);
   }
@@ -62,7 +63,7 @@ public void close() {
     }
   }
 
-  // used for tests only
+  @VisibleForTesting
   Path getRecordingFile() {
     return recordingFile;
   }
diff --git a/dd-java-agent/agent-profiling/profiling-uploader/src/main/java/com/datadog/profiling/uploader/ProfileUploader.java b/dd-java-agent/agent-profiling/profiling-uploader/src/main/java/com/datadog/profiling/uploader/ProfileUploader.java
index 80afc760feb..86ea3f6e3e0 100644
--- a/dd-java-agent/agent-profiling/profiling-uploader/src/main/java/com/datadog/profiling/uploader/ProfileUploader.java
+++ b/dd-java-agent/agent-profiling/profiling-uploader/src/main/java/com/datadog/profiling/uploader/ProfileUploader.java
@@ -31,6 +31,7 @@
 import datadog.trace.api.ProcessTags;
 import datadog.trace.api.git.GitInfo;
 import datadog.trace.api.git.GitInfoProvider;
+import datadog.trace.api.internal.VisibleForTesting;
 import datadog.trace.api.profiling.RecordingData;
 import datadog.trace.api.profiling.RecordingType;
 import datadog.trace.bootstrap.config.provider.ConfigProvider;
@@ -134,10 +135,7 @@ public ProfileUploader(final Config config, final ConfigProvider configProvider)
     this(config, configProvider, new IOLogger(log), TERMINATION_TIMEOUT_SEC);
   }
 
-  /**
-   * Note that this method is only visible for testing and should not be used from outside this
-   * class.
-   */
+  @VisibleForTesting
   ProfileUploader(
       final Config config,
       final ConfigProvider configProvider,
@@ -443,10 +441,7 @@ private List<String> tagsToList(final Map<String, String> tags) {
         .collect(Collectors.toList());
   }
 
-  /**
-   * Note that this method is only visible for testing and should not be used from outside this
-   * class.
-   */
+  @VisibleForTesting
   OkHttpClient getClient() {
     return client;
   }
diff --git a/dd-java-agent/agent-profiling/src/main/java/com/datadog/profiling/agent/CompositeController.java b/dd-java-agent/agent-profiling/src/main/java/com/datadog/profiling/agent/CompositeController.java
index 5cec160a754..236bf52b1ae 100644
--- a/dd-java-agent/agent-profiling/src/main/java/com/datadog/profiling/agent/CompositeController.java
+++ b/dd-java-agent/agent-profiling/src/main/java/com/datadog/profiling/agent/CompositeController.java
@@ -17,6 +17,7 @@
 import datadog.trace.api.Config;
 import datadog.trace.api.Platform;
 import datadog.trace.api.config.ProfilingConfig;
+import datadog.trace.api.internal.VisibleForTesting;
 import datadog.trace.api.profiling.ProfilerFlareLogger;
 import datadog.trace.api.profiling.ProfilingSnapshot;
 import datadog.trace.api.profiling.RecordingData;
@@ -50,7 +51,7 @@ public CompositeController(List<Controller> controllers) {
     this.controllers = controllers;
   }
 
-  // visible for testing
+  @VisibleForTesting
   public List<Controller> getControllers() {
     return Collections.unmodifiableList(controllers);
   }
diff --git a/dd-java-agent/agent-profiling/src/main/java/com/datadog/profiling/agent/ScrubRecordingDataListener.java b/dd-java-agent/agent-profiling/src/main/java/com/datadog/profiling/agent/ScrubRecordingDataListener.java
index 6e195a0b7a7..211d0e23c64 100644
--- a/dd-java-agent/agent-profiling/src/main/java/com/datadog/profiling/agent/ScrubRecordingDataListener.java
+++ b/dd-java-agent/agent-profiling/src/main/java/com/datadog/profiling/agent/ScrubRecordingDataListener.java
@@ -4,6 +4,7 @@
 
 import com.datadog.profiling.scrubber.DefaultScrubDefinition;
 import com.datadog.profiling.scrubber.JfrScrubber;
+import datadog.trace.api.internal.VisibleForTesting;
 import datadog.trace.api.profiling.RecordingData;
 import datadog.trace.api.profiling.RecordingDataListener;
 import datadog.trace.api.profiling.RecordingInputStream;
@@ -45,7 +46,7 @@ static RecordingDataListener wrap(
     this(delegate, scrubber, failOpen, null);
   }
 
-  // visible for testing
+  @VisibleForTesting
   ScrubRecordingDataListener(
       RecordingDataListener delegate, JfrScrubber scrubber, boolean failOpen, Path tempDir) {
     this.delegate = delegate;
diff --git a/dd-java-agent/build.gradle b/dd-java-agent/build.gradle
index 58ad7540df2..a2001cb4ee8 100644
--- a/dd-java-agent/build.gradle
+++ b/dd-java-agent/build.gradle
@@ -60,6 +60,7 @@ tasks.named("compileJava") {
 dependencies {
   implementation sourceSets.main_java11.output
   main_java11CompileOnly libs.forbiddenapis
+  main_java6CompileOnly project(':components:annotations')
   main_java6CompileOnly libs.forbiddenapis
   testImplementation sourceSets.main_java6.output
 }
diff --git a/dd-java-agent/src/main/java6/datadog/trace/bootstrap/AgentPreCheck.java b/dd-java-agent/src/main/java6/datadog/trace/bootstrap/AgentPreCheck.java
index de49e86ec73..e9ea4fc7b7a 100644
--- a/dd-java-agent/src/main/java6/datadog/trace/bootstrap/AgentPreCheck.java
+++ b/dd-java-agent/src/main/java6/datadog/trace/bootstrap/AgentPreCheck.java
@@ -1,5 +1,6 @@
 package datadog.trace.bootstrap;
 
+import datadog.trace.api.internal.VisibleForTesting;
 import de.thetaphi.forbiddenapis.SuppressForbidden;
 import java.io.BufferedReader;
 import java.io.IOException;
@@ -91,9 +92,9 @@ private static boolean compatible() {
     return compatible(javaVersion, javaHome, System.err);
   }
 
-  // Reachable for testing
   // System.getenv usage is necessary since class is designed to be Java 6 compatible, while
   // Environment component is for Java 8+
+  @VisibleForTesting
   @SuppressForbidden
   static boolean compatible(String javaVersion, String javaHome, PrintStream output) {
     int majorJavaVersion = parseJavaMajorVersion(javaVersion);
@@ -114,7 +115,7 @@ static boolean compatible(String javaVersion, String javaHome, PrintStream outpu
     return false;
   }
 
-  // Reachable for testing
+  @VisibleForTesting
   static int parseJavaMajorVersion(String javaVersion) {
     int major = 0;
     if (javaVersion == null || javaVersion.isEmpty()) {
diff --git a/dd-smoke-tests/log-injection/src/test/groovy/datadog/smoketest/LogInjectionSmokeTest.groovy b/dd-smoke-tests/log-injection/src/test/groovy/datadog/smoketest/LogInjectionSmokeTest.groovy
index 5a640da4ae8..0e35061d8ee 100644
--- a/dd-smoke-tests/log-injection/src/test/groovy/datadog/smoketest/LogInjectionSmokeTest.groovy
+++ b/dd-smoke-tests/log-injection/src/test/groovy/datadog/smoketest/LogInjectionSmokeTest.groovy
@@ -3,6 +3,7 @@ package datadog.smoketest
 import com.squareup.moshi.Moshi
 import com.squareup.moshi.Types
 import datadog.environment.JavaVirtualMachine
+import datadog.environment.OperatingSystem
 import datadog.trace.agent.test.server.http.TestHttpServer.HandlerApi.RequestApi
 import datadog.trace.api.config.GeneralConfig
 import datadog.trace.test.util.Flaky
@@ -378,15 +379,213 @@ abstract class LogInjectionSmokeTest extends AbstractSmokeTest {
       // The default error ("Condition not satisfied after 30s") is useless — enrich with diagnostic state
       def alive = testedProcess?.isAlive()
       def lastLines = tailProcessLog(30)
+      def threadDump = alive ? dumpThreadStacks() : "(process not alive, skipping thread dump)"
       throw new AssertionError(
       "Timed out waiting for ${count} traces after ${defaultPoll.timeout}s. " +
       "traceCount=${traceCount.get()}, process.alive=${alive}, " +
       "RC polls received: ${rcClientMessages.size()}.\n" +
-      "Last process output:\n${lastLines}", e)
+      "Last process output:\n${lastLines}\n" +
+      "Thread dump:\n${threadDump}", e)
     }
     traceCount.get()
   }
 
+  /**
+   * Capture a thread dump of the forked process via {@code jstack}. jstack's output is captured by
+   * the smoketest JVM directly, bypassing the tested-process output-capture thread that has been
+   * observed to be starved at timeout (which makes the SIGQUIT-via-stderr approach unreliable for
+   * exactly the failures we want to diagnose).
+   *
+   * <p>No raw {@code kill -3} fallback: PID reuse on shared CI hosts could cause us to signal an
+   * unrelated process if the child has exited since the surrounding liveness check.
+   */
+  private String dumpThreadStacks() {
+    try {
+      if (testedProcess == null) {
+        return "(no tested process)"
+      }
+      if (OperatingSystem.isWindows()) {
+        return "(thread dump not supported on Windows)"
+      }
+      long pid = getTestedProcessPid()
+      if (pid <= 0) {
+        return "(could not determine pid)"
+      }
+      // Re-check liveness immediately before invoking jstack. The earlier check that gates this
+      // method runs ~1 statement away, but if the child has exited and been reaped since then,
+      // the OS may have reused the PID — jstack-ing the wrong process would attach misleading
+      // diagnostics to the test failure.
+      if (!testedProcess.isAlive()) {
+        return "(process exited between liveness check and dump; skipping to avoid PID reuse)"
+      }
+      String jstackOut = runJstack(pid)
+      if (jstackOut == null) {
+        return "(jstack not available or failed)"
+      }
+      return filterThreadDump(jstackOut)
+    } catch (Throwable t) {
+      // Never let a diagnostic failure mask the original AssertionError.
+      return "(thread dump failed: ${t.getClass().simpleName}: ${t.message})"
+    }
+  }
+
+  // Approximate budget for the inline dump in error.message. Datadog CI Visibility caps
+  // error.message at ~5000 chars; this leaves a few hundred for the "Timed out waiting..."
+  // prefix and the elision marker.
+  private static final int INLINE_DUMP_CAP = 4700
+
+  /**
+   * Reduce a jstack thread dump to the threads most likely to explain a hang: the main thread,
+   * dd-trace agent threads (dd-*, datadog-*), OkHttp threads, and anything BLOCKED. Drops known
+   * JVM boilerplate (compiler/GC/reference handler/etc). Truncates to {@link #INLINE_DUMP_CAP}
+   * with an elision marker.
+   */
+  private String filterThreadDump(String fullDump) {
+    int firstBlockIdx = fullDump.indexOf('\n"')
+    if (firstBlockIdx < 0) {
+      // No recognizable thread blocks — return the original, truncated if needed
+      return fullDump.length() > INLINE_DUMP_CAP
+      ? fullDump.substring(0, INLINE_DUMP_CAP) + "\n(truncated)"
+      : fullDump
+    }
+    String header = fullDump.substring(0, firstBlockIdx + 1)
+    String rest = fullDump.substring(firstBlockIdx + 1)
+
+    List<String> blocks = []
+    int i = 0
+    while (i < rest.length()) {
+      int next = rest.indexOf('\n"', i)
+      if (next < 0) {
+        blocks.add(rest.substring(i))
+        break
+      }
+      blocks.add(rest.substring(i, next + 1))
+      i = next + 1
+    }
+
+    List<String> highPriority = []
+    List<String> lowPriority = []
+    int boilerplateDropped = 0
+    for (String block : blocks) {
+      def m = block =~ /^"([^"]+)"/
+      String name = m.find() ? m.group(1) : ''
+      if (isBoilerplateThread(name)) {
+        boilerplateDropped++
+      } else if (isHighPriorityThread(name, block)) {
+        highPriority.add(block)
+      } else {
+        lowPriority.add(block)
+      }
+    }
+
+    StringBuilder out = new StringBuilder(header)
+    int elided = 0
+    for (String block : highPriority + lowPriority) {
+      if (out.length() + block.length() + 120 > INLINE_DUMP_CAP) {
+        elided++
+        continue
+      }
+      out.append(block)
+    }
+    if (boilerplateDropped > 0 || elided > 0) {
+      out.append("\n(elided ${boilerplateDropped} JVM-boilerplate thread(s)")
+      if (elided > 0) {
+        out.append(", elided ${elided} other thread(s) for size")
+      }
+      out.append(")")
+    }
+    return out.toString()
+  }
+
+  private static boolean isBoilerplateThread(String name) {
+    if (name in [
+      "Reference Handler", "Finalizer", "Signal Dispatcher", "Common-Cleaner",
+      "Service Thread", "Monitor Deflation Thread", "Notification Thread",
+      "Attach Listener", "process reaper", "Sweeper thread", "VM Thread", "VM Periodic Task Thread"
+    ]) {
+      return true
+    }
+    return name.startsWith("C1 CompilerThread") ||
+    name.startsWith("C2 CompilerThread") ||
+    name.startsWith("GC Thread") ||
+    name.startsWith("G1 ") ||
+    name.startsWith("ParGC ") ||
+    name.startsWith("CMS ")
+  }
+
+  private static boolean isHighPriorityThread(String name, String block) {
+    if (name == "main") {
+      return true
+    }
+    if (name.startsWith("dd-") || name.startsWith("datadog-")) {
+      return true
+    }
+    if (name.startsWith("OkHttp") || name.contains("okhttp")) {
+      return true
+    }
+    return block.contains("java.lang.Thread.State: BLOCKED")
+  }
+
+  private long getTestedProcessPid() {
+    try {
+      return (long) testedProcess.getClass().getMethod("pid").invoke(testedProcess)
+    } catch (Throwable ignored) {
+      try {
+        // UNIXProcess's private 'pid' field, for JDK 8 compatibility
+        def field = testedProcess.getClass().getDeclaredField("pid")
+        field.setAccessible(true)
+        return field.getInt(testedProcess) as long
+      } catch (Throwable ignored2) {
+        return -1L
+      }
+    }
+  }
+
+  private String runJstack(long pid) {
+    def candidates = []
+    // java.home is always set by the JVM and points to the active JDK/JRE; prefer it over the
+    // JAVA_HOME env var which is frequently absent in CI runners even when Java is present.
+    String javaHome = System.getProperty("java.home")
+    if (javaHome) {
+      candidates.add(javaHome + "/bin/jstack")
+    }
+    String javaHomeEnv = System.getenv("JAVA_HOME")
+    if (javaHomeEnv && javaHomeEnv != javaHome) {
+      candidates.add(javaHomeEnv + "/bin/jstack")
+    }
+    candidates.add("jstack")
+    for (String cmd : candidates) {
+      File tmp = null
+      try {
+        tmp = File.createTempFile("jstack", ".txt")
+        // Redirect output to a file to avoid pipe-buffer deadlock — a full thread dump can
+        // exceed the OS pipe buffer (typically 64 KB) before waitFor returns.
+        Process p = new ProcessBuilder(cmd, String.valueOf(pid))
+        .redirectErrorStream(true)
+        .redirectOutput(tmp)
+        .start()
+        if (!p.waitFor(5, SECONDS)) {
+          p.destroyForcibly()
+          p.waitFor(2, SECONDS)
+          continue
+        }
+        if (p.exitValue() == 0) {
+          String output = tmp.getText("UTF-8")
+          if (output) {
+            return output
+          }
+        }
+      } catch (Throwable ignored) {
+        // try next candidate
+      } finally {
+        if (tmp != null) {
+          tmp.delete()
+        }
+      }
+    }
+    return null
+  }
+
   private String tailProcessLog(int lines) {
     try {
       def logFile = new File(logFilePath)
diff --git a/dd-trace-core/src/jmh/java/datadog/trace/util/concurrent/RingVsQueueBenchmark.java b/dd-trace-core/src/jmh/java/datadog/trace/util/concurrent/RingVsQueueBenchmark.java
new file mode 100644
index 00000000000..6a1e3d7c802
--- /dev/null
+++ b/dd-trace-core/src/jmh/java/datadog/trace/util/concurrent/RingVsQueueBenchmark.java
@@ -0,0 +1,191 @@
+package datadog.trace.util.concurrent;
+
+import static java.util.concurrent.TimeUnit.SECONDS;
+
+import java.util.function.BiConsumer;
+import org.jctools.queues.MpscArrayQueue;
+import org.openjdk.jmh.annotations.Benchmark;
+import org.openjdk.jmh.annotations.BenchmarkMode;
+import org.openjdk.jmh.annotations.Fork;
+import org.openjdk.jmh.annotations.Group;
+import org.openjdk.jmh.annotations.GroupThreads;
+import org.openjdk.jmh.annotations.Level;
+import org.openjdk.jmh.annotations.Measurement;
+import org.openjdk.jmh.annotations.Mode;
+import org.openjdk.jmh.annotations.OutputTimeUnit;
+import org.openjdk.jmh.annotations.Param;
+import org.openjdk.jmh.annotations.Scope;
+import org.openjdk.jmh.annotations.Setup;
+import org.openjdk.jmh.annotations.State;
+import org.openjdk.jmh.annotations.TearDown;
+import org.openjdk.jmh.annotations.Threads;
+import org.openjdk.jmh.annotations.Warmup;
+import org.openjdk.jmh.infra.Blackhole;
+
+/**
+ * Head-to-head comparison of {@link MpscRingBuffer} (mutable pre-allocated slots) against the
+ * conventional approach of a jctools {@link MpscArrayQueue} with a fresh {@code Slot} allocated per
+ * publish. The latter is the pattern the current CSS code uses for {@code SpanSnapshot} on the
+ * producer side, so the delta between the two measures the actual allocation/handoff saving of the
+ * ring-buffer rewrite.
+ *
+ * <ul>
+ *   <li>{@code write_*_8p} — 8 producers, background drainer keeps the structure empty so the
+ *       measurement reflects publish cost, not full-structure drop cost. Pair-compare ring vs queue
+ *       at matched capacity.
+ *   <li>{@code e2e_*_8p} — JMH {@code @Group} pairing 8 producers with 1 consumer for each
+ *       structure. End-to-end ops/s under realistic backpressure.
+ * </ul>
+ *
+ * <p>Run with {@code -prof gc} to also see per-op allocation rate — that's where the ring's win is
+ * loudest, since the queue allocates one {@code Slot} per publish and the ring allocates none.
+ */
+@State(Scope.Benchmark)
+@Warmup(iterations = 2, time = 15, timeUnit = SECONDS)
+@Measurement(iterations = 5, time = 15, timeUnit = SECONDS)
+@BenchmarkMode(Mode.Throughput)
+@OutputTimeUnit(SECONDS)
+@Fork(value = 1)
+public class RingVsQueueBenchmark {
+
+  /**
+   * Shared slot type. {@code MpscRingBuffer} pre-allocates these and the producer mutates in place;
+   * the queue path allocates a fresh one per publish and offers the reference. Two constructors so
+   * both styles read naturally.
+   */
+  public static final class Slot {
+    long value;
+
+    Slot() {}
+
+    Slot(final long value) {
+      this.value = value;
+    }
+  }
+
+  // Static (non-capturing) handlers. Passing ts/bh as context lets the JIT keep these as
+  // singleton functions and avoid per-call lambda allocation.
+  private static final BiConsumer<ThreadState, Slot> RING_FILLER =
+      (ts, slot) -> {
+        slot.value = ts.counter;
+        ts.counter++;
+      };
+
+  private static final BiConsumer<Blackhole, Slot> RING_CONSUMER =
+      (bh, slot) -> bh.consume(slot.value);
+
+  @Param({"1024"})
+  public int capacity;
+
+  /** Write-side benchmark structures. Drained by a background thread so they never fill. */
+  MpscRingBuffer<Slot> ring;
+
+  MpscArrayQueue<Slot> queue;
+
+  /** E2e benchmark structures. JMH drives both sides via {@code @Group}; no background drainer. */
+  MpscRingBuffer<Slot> e2eRing;
+
+  MpscArrayQueue<Slot> e2eQueue;
+
+  private volatile boolean stopDrainers;
+  private Thread ringDrainer;
+  private Thread queueDrainer;
+
+  @Setup(Level.Trial)
+  public void setup() {
+    ring = new MpscRingBuffer<>(Slot::new, capacity);
+    queue = new MpscArrayQueue<>(capacity);
+    e2eRing = new MpscRingBuffer<>(Slot::new, capacity);
+    e2eQueue = new MpscArrayQueue<>(capacity);
+
+    stopDrainers = false;
+    ringDrainer =
+        new Thread(
+            () -> {
+              while (!stopDrainers) {
+                if (ring.drain((Slot s) -> {}) == 0) Thread.yield();
+              }
+            },
+            "RingVsQueueBenchmark-ringDrainer");
+    ringDrainer.setDaemon(true);
+    ringDrainer.start();
+
+    queueDrainer =
+        new Thread(
+            () -> {
+              while (!stopDrainers) {
+                Slot s = queue.poll();
+                if (s == null) Thread.yield();
+              }
+            },
+            "RingVsQueueBenchmark-queueDrainer");
+    queueDrainer.setDaemon(true);
+    queueDrainer.start();
+  }
+
+  @TearDown(Level.Trial)
+  public void teardown() throws InterruptedException {
+    stopDrainers = true;
+    ringDrainer.join(5_000);
+    queueDrainer.join(5_000);
+  }
+
+  @State(Scope.Thread)
+  public static class ThreadState {
+    long counter;
+  }
+
+  // ============ Write-side throughput ============
+
+  @Threads(8)
+  @Benchmark
+  public boolean write_ring_8p(final ThreadState ts) {
+    return ring.tryWrite(ts, RING_FILLER);
+  }
+
+  /** Mirrors the SpanSnapshot pattern: allocate a fresh instance per publish, offer it. */
+  @Threads(8)
+  @Benchmark
+  public boolean write_queue_8p(final ThreadState ts) {
+    return queue.offer(new Slot(ts.counter++));
+  }
+
+  // ============ End-to-end producer/consumer ============
+
+  @Group("e2e_ring_8p")
+  @GroupThreads(8)
+  @Benchmark
+  public boolean e2e_ring_producer(final ThreadState ts) {
+    return e2eRing.tryWrite(ts, RING_FILLER);
+  }
+
+  @Group("e2e_ring_8p")
+  @GroupThreads(1)
+  @Benchmark
+  public int e2e_ring_consumer(final Blackhole bh) {
+    int drained = e2eRing.drain(bh, RING_CONSUMER);
+    if (drained == 0) Thread.yield();
+    return drained;
+  }
+
+  @Group("e2e_queue_8p")
+  @GroupThreads(8)
+  @Benchmark
+  public boolean e2e_queue_producer(final ThreadState ts) {
+    return e2eQueue.offer(new Slot(ts.counter++));
+  }
+
+  @Group("e2e_queue_8p")
+  @GroupThreads(1)
+  @Benchmark
+  public int e2e_queue_consumer(final Blackhole bh) {
+    int drained = 0;
+    Slot slot;
+    while ((slot = e2eQueue.poll()) != null) {
+      bh.consume(slot.value);
+      drained++;
+    }
+    if (drained == 0) Thread.yield();
+    return drained;
+  }
+}
diff --git a/dd-trace-core/src/main/java/datadog/trace/common/metrics/AggregateEntry.java b/dd-trace-core/src/main/java/datadog/trace/common/metrics/AggregateEntry.java
index 8d6dc6b72d0..a518090f93b 100644
--- a/dd-trace-core/src/main/java/datadog/trace/common/metrics/AggregateEntry.java
+++ b/dd-trace-core/src/main/java/datadog/trace/common/metrics/AggregateEntry.java
@@ -8,7 +8,6 @@
 import datadog.trace.api.cache.DDCaches;
 import datadog.trace.bootstrap.instrumentation.api.UTF8BytesString;
 import datadog.trace.util.Hashtable;
-import datadog.trace.util.LongHashingUtils;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collections;
@@ -62,23 +61,19 @@ final class AggregateEntry extends Hashtable.Entry {
   static final long ERROR_TAG = 0x8000000000000000L;
   static final long TOP_LEVEL_TAG = 0x4000000000000000L;
 
-  // UTF8 caches consolidated from the previous MetricKey + ConflatingMetricsAggregator split.
-  private static final DDCache<String, UTF8BytesString> RESOURCE_CACHE =
-      DDCaches.newFixedSizeCache(32);
-  private static final DDCache<String, UTF8BytesString> SERVICE_CACHE =
-      DDCaches.newFixedSizeCache(32);
-  private static final DDCache<String, UTF8BytesString> OPERATION_CACHE =
-      DDCaches.newFixedSizeCache(64);
-  private static final DDCache<String, UTF8BytesString> SERVICE_SOURCE_CACHE =
-      DDCaches.newFixedSizeCache(16);
-  private static final DDCache<String, UTF8BytesString> TYPE_CACHE = DDCaches.newFixedSizeCache(8);
-  private static final DDCache<String, UTF8BytesString> SPAN_KIND_CACHE =
+  // UTF8 caches. Package-private so ConflatingMetricsAggregator's fillSlot can canonicalize
+  // producer-side via the same caches the entry constructor used to consult.
+  static final DDCache<String, UTF8BytesString> RESOURCE_CACHE = DDCaches.newFixedSizeCache(32);
+  static final DDCache<String, UTF8BytesString> SERVICE_CACHE = DDCaches.newFixedSizeCache(32);
+  static final DDCache<String, UTF8BytesString> OPERATION_CACHE = DDCaches.newFixedSizeCache(64);
+  static final DDCache<String, UTF8BytesString> SERVICE_SOURCE_CACHE =
       DDCaches.newFixedSizeCache(16);
-  private static final DDCache<String, UTF8BytesString> HTTP_METHOD_CACHE =
-      DDCaches.newFixedSizeCache(8);
-  private static final DDCache<String, UTF8BytesString> HTTP_ENDPOINT_CACHE =
+  static final DDCache<String, UTF8BytesString> TYPE_CACHE = DDCaches.newFixedSizeCache(8);
+  static final DDCache<String, UTF8BytesString> SPAN_KIND_CACHE = DDCaches.newFixedSizeCache(16);
+  static final DDCache<String, UTF8BytesString> HTTP_METHOD_CACHE = DDCaches.newFixedSizeCache(8);
+  static final DDCache<String, UTF8BytesString> HTTP_ENDPOINT_CACHE =
       DDCaches.newFixedSizeCache(32);
-  private static final DDCache<String, UTF8BytesString> GRPC_STATUS_CODE_CACHE =
+  static final DDCache<String, UTF8BytesString> GRPC_STATUS_CODE_CACHE =
       DDCaches.newFixedSizeCache(32);
 
   /**
@@ -141,23 +136,30 @@ final class AggregateEntry extends Hashtable.Entry {
   private int topLevelCount;
   private long duration;
 
-  /** Hot-path constructor for the producer/consumer flow. Builds UTF8 fields via the caches. */
-  AggregateEntry(SpanSnapshot s, long keyHash) {
-    super(keyHash);
-    this.resource = canonicalize(RESOURCE_CACHE, s.resourceName);
-    this.service = canonicalize(SERVICE_CACHE, s.serviceName);
-    this.operationName = canonicalize(OPERATION_CACHE, s.operationName);
-    this.serviceSource = canonicalizeOptional(SERVICE_SOURCE_CACHE, s.serviceNameSource);
-    this.type = canonicalize(TYPE_CACHE, s.spanType);
-    this.spanKind = canonicalize(SPAN_KIND_CACHE, s.spanKind);
-    this.httpMethod = canonicalizeOptional(HTTP_METHOD_CACHE, s.httpMethod);
-    this.httpEndpoint = canonicalizeOptional(HTTP_ENDPOINT_CACHE, s.httpEndpoint);
-    this.grpcStatusCode = canonicalizeOptional(GRPC_STATUS_CODE_CACHE, s.grpcStatusCode);
+  /** Hot-path constructor for the producer/consumer flow. Slot fields are already canonical. */
+  AggregateEntry(SpanSnapshot s) {
+    super(s.keyHash);
+    // The slot's string fields are already canonical UTF8BytesString instances (canonicalized by
+    // the producer in fillSlot via the same caches we used to consult here). Just copy refs;
+    // matches() can do identity comparison instead of contentEquals.
+    this.resource = s.resourceName;
+    this.service = s.serviceName;
+    this.operationName = s.operationName;
+    this.serviceSource = s.serviceNameSource;
+    this.type = s.spanType;
+    this.spanKind = s.spanKind;
+    this.httpMethod = s.httpMethod;
+    this.httpEndpoint = s.httpEndpoint;
+    this.grpcStatusCode = s.grpcStatusCode;
     this.httpStatusCode = s.httpStatusCode;
     this.synthetic = s.synthetic;
     this.traceRoot = s.traceRoot;
     this.peerTagNames = s.peerTagSchema == null ? null : s.peerTagSchema.names;
-    this.peerTagValues = s.peerTagValues;
+    // The slot's peerTagValues is a reusable scratch buffer owned by the ring -- the producer
+    // overwrites it on every claim of this slot. Snapshot it here by value so this entry's
+    // identity doesn't drift when the slot is reclaimed.
+    this.peerTagValues =
+        s.peerTagSchema == null ? null : Arrays.copyOf(s.peerTagValues, s.peerTagValues.length);
     this.peerTags = materializePeerTags(this.peerTagNames, this.peerTagValues);
   }
 
@@ -241,21 +243,29 @@ void clear() {
   }
 
   boolean matches(SpanSnapshot s) {
-    String[] snapshotNames = s.peerTagSchema == null ? null : s.peerTagSchema.names;
+    // All string fields are canonical UTF8BytesString references from the per-field DDCaches.
+    // Same content => same canonical instance => identity comparison suffices.
+    // peerTagSchema is also shared by reference (producer reads cachedPeerTagSchema), so its
+    // names array reference is stable too. peerTagValues stays content-compared since values
+    // vary per publish and aren't canonicalized.
     return httpStatusCode == s.httpStatusCode
         && synthetic == s.synthetic
         && traceRoot == s.traceRoot
-        && contentEquals(resource, s.resourceName)
-        && contentEquals(service, s.serviceName)
-        && contentEquals(operationName, s.operationName)
-        && contentEquals(serviceSource, s.serviceNameSource)
-        && contentEquals(type, s.spanType)
-        && contentEquals(spanKind, s.spanKind)
-        && Arrays.equals(peerTagNames, snapshotNames)
-        && Arrays.equals(peerTagValues, s.peerTagValues)
-        && contentEquals(httpMethod, s.httpMethod)
-        && contentEquals(httpEndpoint, s.httpEndpoint)
-        && contentEquals(grpcStatusCode, s.grpcStatusCode);
+        && resource == s.resourceName
+        && service == s.serviceName
+        && operationName == s.operationName
+        && serviceSource == s.serviceNameSource
+        && type == s.spanType
+        && spanKind == s.spanKind
+        // PeerTagSchema instances aren't guaranteed to be shared by reference across all
+        // producers (cachedPeerTagSchema can be swapped during reconcile; tests build fresh
+        // instances), so compare names by content. The same-reference case short-circuits to
+        // O(1) via Arrays.equals's a==b fast path.
+        && Arrays.equals(peerTagNames, s.peerTagSchema == null ? null : s.peerTagSchema.names)
+        && (peerTagNames == null || Arrays.equals(peerTagValues, s.peerTagValues))
+        && httpMethod == s.httpMethod
+        && httpEndpoint == s.httpEndpoint
+        && grpcStatusCode == s.grpcStatusCode;
   }
 
   /**
@@ -267,41 +277,6 @@ boolean matches(long keyHash, SpanSnapshot s) {
     return this.keyHash == keyHash && matches(s);
   }
 
-  /**
-   * Computes the 64-bit lookup hash for a {@link SpanSnapshot}. Chained per-field calls -- no
-   * varargs / Object[] allocation, no autoboxing on primitive overloads. The constructor's
-   * super({@code hashOf(s)}) call uses the same function so an entry built from a snapshot hashes
-   * to the same bucket the snapshot itself looks up.
-   *
-   * <p>Hashes are content-stable across {@code String} / {@code UTF8BytesString}: {@link
-   * UTF8BytesString#hashCode()} returns the underlying {@code String}'s hash.
-   */
-  static long hashOf(SpanSnapshot s) {
-    long h = 0;
-    h = LongHashingUtils.addToHash(h, s.resourceName);
-    h = LongHashingUtils.addToHash(h, s.serviceName);
-    h = LongHashingUtils.addToHash(h, s.operationName);
-    h = LongHashingUtils.addToHash(h, s.serviceNameSource);
-    h = LongHashingUtils.addToHash(h, s.spanType);
-    h = LongHashingUtils.addToHash(h, s.httpStatusCode);
-    h = LongHashingUtils.addToHash(h, s.synthetic);
-    h = LongHashingUtils.addToHash(h, s.traceRoot);
-    h = LongHashingUtils.addToHash(h, s.spanKind);
-    // Always mix in both the schema's content hash and the values' content hash, unconditionally
-    // (no null-skip). Arrays.hashCode is content-based for both String[]s; the default
-    // Object[].hashCode is identity-based, which would let two snapshots with content-equal but
-    // distinct PeerTagSchema instances hash to different buckets. Null inputs hash to 0 here,
-    // distinct from {@code Arrays.hashCode(empty)} = 1 or any non-empty array.
-    h =
-        LongHashingUtils.addToHash(
-            h, s.peerTagSchema == null ? 0 : Arrays.hashCode(s.peerTagSchema.names));
-    h = LongHashingUtils.addToHash(h, Arrays.hashCode(s.peerTagValues));
-    h = LongHashingUtils.addToHash(h, s.httpMethod);
-    h = LongHashingUtils.addToHash(h, s.httpEndpoint);
-    h = LongHashingUtils.addToHash(h, s.grpcStatusCode);
-    return h;
-  }
-
   // Accessors for SerializingMetricWriter.
   UTF8BytesString getResource() {
     return resource;
@@ -366,9 +341,12 @@ List<UTF8BytesString> getPeerTags() {
 
   // ----- helpers -----
 
-  private static UTF8BytesString canonicalize(
+  static UTF8BytesString canonicalize(
       DDCache<String, UTF8BytesString> cache, CharSequence charSeq) {
-    if (charSeq == null) {
+    // Treat null and length-zero as the same canonical EMPTY instance so the producer-side
+    // canonicalization produces identity-equal results for null/"" inputs. matches() now uses
+    // identity comparison, so the previous content-equals collapse needs to happen here.
+    if (charSeq == null || charSeq.length() == 0) {
       return EMPTY;
     }
     if (charSeq instanceof UTF8BytesString) {
@@ -388,9 +366,11 @@ private static UTF8BytesString canonicalize(
    * single helper keeps the constructor consistent.
    */
   @Nullable
-  private static UTF8BytesString canonicalizeOptional(
+  static UTF8BytesString canonicalizeOptional(
       DDCache<String, UTF8BytesString> cache, @Nullable CharSequence charSeq) {
-    if (charSeq == null) {
+    // Treat null and length-zero the same: both map to null (i.e. "absent"). matches() uses
+    // identity comparison, so the prior content-equals collapse needs to happen here.
+    if (charSeq == null || charSeq.length() == 0) {
       return null;
     }
     if (charSeq instanceof UTF8BytesString) {
diff --git a/dd-trace-core/src/main/java/datadog/trace/common/metrics/AggregateTable.java b/dd-trace-core/src/main/java/datadog/trace/common/metrics/AggregateTable.java
index abadc7e5f17..b23e492ce82 100644
--- a/dd-trace-core/src/main/java/datadog/trace/common/metrics/AggregateTable.java
+++ b/dd-trace-core/src/main/java/datadog/trace/common/metrics/AggregateTable.java
@@ -55,7 +55,9 @@ boolean isEmpty() {
    * caller should drop the data point in that case.
    */
   AggregateEntry findOrInsert(SpanSnapshot snapshot) {
-    long keyHash = AggregateEntry.hashOf(snapshot);
+    // keyHash is precomputed by the producer in SpanSnapshot.computeAndSetKeyHash, no
+    // re-hashing on the aggregator thread.
+    final long keyHash = snapshot.keyHash;
     for (AggregateEntry candidate = Support.bucket(buckets, keyHash);
         candidate != null;
         candidate = candidate.next()) {
@@ -66,7 +68,7 @@ AggregateEntry findOrInsert(SpanSnapshot snapshot) {
     if (size >= maxAggregates && !evictOneStale()) {
       return null;
     }
-    AggregateEntry entry = new AggregateEntry(snapshot, keyHash);
+    AggregateEntry entry = new AggregateEntry(snapshot);
     Support.insertHeadEntry(buckets, keyHash, entry);
     size++;
     return entry;
diff --git a/dd-trace-core/src/main/java/datadog/trace/common/metrics/Aggregator.java b/dd-trace-core/src/main/java/datadog/trace/common/metrics/Aggregator.java
index d809d452522..d70a1fbaae1 100644
--- a/dd-trace-core/src/main/java/datadog/trace/common/metrics/Aggregator.java
+++ b/dd-trace-core/src/main/java/datadog/trace/common/metrics/Aggregator.java
@@ -5,8 +5,11 @@
 import datadog.trace.common.metrics.SignalItem.ClearSignal;
 import datadog.trace.common.metrics.SignalItem.StopSignal;
 import datadog.trace.core.monitor.HealthMetrics;
+import datadog.trace.util.concurrent.MpscRingBuffer;
 import edu.umd.cs.findbugs.annotations.SuppressFBWarnings;
 import java.util.concurrent.TimeUnit;
+import java.util.concurrent.locks.LockSupport;
+import java.util.function.BiConsumer;
 import org.jctools.queues.MessagePassingQueue;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -17,7 +20,8 @@ final class Aggregator implements Runnable {
 
   private static final Logger log = LoggerFactory.getLogger(Aggregator.class);
 
-  private final MessagePassingQueue<InboxItem> inbox;
+  private final MpscRingBuffer<SpanSnapshot> dataInbox;
+  private final MessagePassingQueue<SignalItem> signalInbox;
   private final AggregateTable aggregates;
   private final MetricWriter writer;
   private final HealthMetrics healthMetrics;
@@ -42,9 +46,36 @@ final class Aggregator implements Runnable {
       justification = "the field is confined to the agent thread running the Aggregator")
   private boolean dirty;
 
+  @SuppressFBWarnings(
+      value = "AT_STALE_THREAD_WRITE_OF_PRIMITIVE",
+      justification = "the field is confined to the agent thread running the Aggregator")
+  private boolean stopped;
+
+  /**
+   * True iff the aggregator thread is currently parked (or about to park) waiting for new work.
+   * Producers volatile-read this after publishing; on true they call {@link #unparkIfWaiting()} to
+   * wake the aggregator without paying the {@code sleepMillis} latency.
+   *
+   * <p>Volatile so the producer side sees the latest value. Set true before park, false on wake.
+   * The aggregator re-checks the inbox after setting this true (see {@link #run}) to close the race
+   * window: if a producer published just before the aggregator set the flag, the aggregator would
+   * have missed the unpark; the re-check catches that work and skips the park.
+   */
+  private volatile boolean waiting;
+
+  private volatile Thread aggregatorThread;
+
+  /**
+   * Static-singleton snapshot handler. Reads from the slot via {@code this} (passed as the context)
+   * and avoids per-drain lambda capture.
+   */
+  private static final BiConsumer<Aggregator, SpanSnapshot> SNAPSHOT_HANDLER =
+      Aggregator::handleSnapshot;
+
   Aggregator(
       MetricWriter writer,
-      MessagePassingQueue<InboxItem> inbox,
+      MpscRingBuffer<SpanSnapshot> dataInbox,
+      MessagePassingQueue<SignalItem> signalInbox,
       int maxAggregates,
       long reportingInterval,
       TimeUnit reportingIntervalTimeUnit,
@@ -52,7 +83,8 @@ final class Aggregator implements Runnable {
       Runnable onReportCycle) {
     this(
         writer,
-        inbox,
+        dataInbox,
+        signalInbox,
         maxAggregates,
         reportingInterval,
         reportingIntervalTimeUnit,
@@ -63,7 +95,8 @@ final class Aggregator implements Runnable {
 
   Aggregator(
       MetricWriter writer,
-      MessagePassingQueue<InboxItem> inbox,
+      MpscRingBuffer<SpanSnapshot> dataInbox,
+      MessagePassingQueue<SignalItem> signalInbox,
       int maxAggregates,
       long reportingInterval,
       TimeUnit reportingIntervalTimeUnit,
@@ -71,7 +104,8 @@ final class Aggregator implements Runnable {
       HealthMetrics healthMetrics,
       Runnable onReportCycle) {
     this.writer = writer;
-    this.inbox = inbox;
+    this.dataInbox = dataInbox;
+    this.signalInbox = signalInbox;
     this.aggregates = new AggregateTable(maxAggregates);
     this.reportingIntervalNanos = reportingIntervalTimeUnit.toNanos(reportingInterval);
     this.sleepMillis = sleepMillis;
@@ -81,17 +115,34 @@ final class Aggregator implements Runnable {
 
   @Override
   public void run() {
-    Thread currentThread = Thread.currentThread();
-    Drainer drainer = new Drainer();
-    while (!currentThread.isInterrupted() && !drainer.stopped) {
+    final Thread currentThread = Thread.currentThread();
+    aggregatorThread = currentThread;
+    final long parkNanos = TimeUnit.MILLISECONDS.toNanos(sleepMillis);
+    while (!currentThread.isInterrupted() && !stopped) {
       try {
-        if (!inbox.isEmpty()) {
-          inbox.drain(drainer);
-        } else {
-          Thread.sleep(sleepMillis);
+        int drainedData = dataInbox.drain(this, SNAPSHOT_HANDLER);
+        // Signals are processed after the data drain. REPORT/STOP additionally drain any
+        // snapshots that arrived between the drain above and the signal's enqueue -- the
+        // signal-channel offer happens-before the poll here, so any publish that completed
+        // before report()/stop() is guaranteed visible to the inline drain in handleSignal.
+        SignalItem signal;
+        while (!stopped && (signal = signalInbox.poll()) != null) {
+          handleSignal(signal);
+        }
+        if (stopped) break;
+        if (drainedData == 0 && signalInbox.isEmpty()) {
+          waiting = true;
+          try {
+            // Re-check after publishing the waiting flag. A producer that published just before
+            // we set waiting=true won't have unparked us (they read waiting=false); the re-check
+            // catches that work via the volatile producerCursor.
+            if (dataInbox.isEmpty() && signalInbox.isEmpty()) {
+              LockSupport.parkNanos(this, parkNanos);
+            }
+          } finally {
+            waiting = false;
+          }
         }
-      } catch (InterruptedException e) {
-        currentThread.interrupt();
       } catch (Throwable error) {
         log.debug("error aggregating metrics", error);
       }
@@ -99,57 +150,68 @@ public void run() {
     log.debug("metrics aggregator exited");
   }
 
-  private final class Drainer implements MessagePassingQueue.Consumer<InboxItem> {
-
-    boolean stopped = false;
-
-    @Override
-    public void accept(InboxItem item) {
-      if (item == ClearSignal.CLEAR) {
-        // ClearSignal is routed through the inbox (rather than letting the caller mutate
-        // AggregateTable directly) so the aggregator thread stays the sole writer. AggregateTable
-        // is not thread-safe; a direct clear() from e.g. the OkHttpSink callback thread would
-        // race with Drainer.accept on this thread.
-        //
-        // We deliberately do NOT call inbox.clear() here. Doing so would erase any queued STOP
-        // (or REPORT) signals that happen to sit behind CLEAR -- a real concern when a
-        // downgrade is followed quickly by close(), where the trampled STOP leaves the
-        // aggregator thread spinning until thread.join times out. features.supportsMetrics() is
-        // already false by the time CLEAR was offered, so producers have stopped publishing;
-        // any in-flight snapshots will drain naturally into the just-cleared table, get
-        // re-aggregated, and flushed on the next report -- where the agent rejects them again,
-        // triggering another DOWNGRADED -> disable() -> CLEAR cycle. Worst case: one extra
-        // reporting cycle of wasted work, which we accept for the safety of preserving STOP.
-        if (!stopped) {
-          aggregates.clear();
-          // Clear dirty too -- without this, the next report() would see dirty=true, run
-          // expungeStaleAggregates against the (now-empty) table, find isEmpty()=true, and skip
-          // the flush anyway. Same observable outcome, but resetting here keeps the invariant
-          // "dirty implies there's data to flush" honest.
-          dirty = false;
-        }
-        ((SignalItem) item).complete();
-      } else if (item instanceof SignalItem) {
-        SignalItem signal = (SignalItem) item;
-        if (!stopped) {
-          report(wallClockTime(), signal);
-          stopped = item instanceof StopSignal;
-          if (stopped) {
-            signal.complete();
-          }
-        } else {
-          signal.ignore();
-        }
-      } else if (item instanceof SpanSnapshot && !stopped) {
-        SpanSnapshot snapshot = (SpanSnapshot) item;
-        AggregateEntry entry = aggregates.findOrInsert(snapshot);
-        if (entry != null) {
-          entry.recordOneDuration(snapshot.tagAndDuration);
-          dirty = true;
-        } else {
-          // table at cap with no stale entry available to evict
-          healthMetrics.onStatsAggregateDropped();
-        }
+  /**
+   * Wake the aggregator if it's currently parked. Called from producer threads after publishing.
+   *
+   * <p>Cost on the producer hot path is one volatile read; only on a {@code true} read does the
+   * producer pay an {@code unpark} (cheap idempotent atomic). At saturating workloads where the
+   * aggregator is never parked, the volatile read is the only cost.
+   */
+  void unparkIfWaiting() {
+    if (waiting) {
+      final Thread t = aggregatorThread;
+      if (t != null) {
+        LockSupport.unpark(t);
+      }
+    }
+  }
+
+  private static void handleSnapshot(Aggregator agg, SpanSnapshot snapshot) {
+    // Skip-sentinel from the producer's overclaim path: the producer claimed this slot
+    // speculatively (one slot per span in the trace) but the span turned out to be non-eligible
+    // or the trace hit an ignored-resource. shouldComputeMetric guarantees duration > 0 for any
+    // real publish, so tagAndDuration == 0 uniquely identifies a skip.
+    if (snapshot.tagAndDuration == 0L) {
+      return;
+    }
+    AggregateEntry entry = agg.aggregates.findOrInsert(snapshot);
+    if (entry != null) {
+      entry.recordOneDuration(snapshot.tagAndDuration);
+      agg.dirty = true;
+    } else {
+      // table at cap with no stale entry available to evict
+      agg.healthMetrics.onStatsAggregateDropped();
+    }
+  }
+
+  private void handleSignal(SignalItem signal) {
+    if (signal == ClearSignal.CLEAR) {
+      // ClearSignal is routed through the signal channel (rather than letting the caller mutate
+      // AggregateTable directly) so the aggregator thread stays the sole writer. AggregateTable
+      // is not thread-safe; a direct clear() from e.g. the OkHttpSink callback thread would race
+      // with the data-drain on this thread.
+      //
+      // We do NOT clear the snapshot data ring here. Any in-flight snapshots will drain
+      // naturally into the just-cleared table, get re-aggregated, and flushed on the next
+      // report -- where the agent rejects them again, triggering another DOWNGRADED -> disable()
+      // -> CLEAR cycle. Worst case: one extra reporting cycle of wasted work.
+      aggregates.clear();
+      // Clear dirty too -- without this, the next report() would see dirty=true, run
+      // expungeStaleAggregates against the (now-empty) table, find isEmpty()=true, and skip
+      // the flush anyway. Same observable outcome, but resetting here keeps the invariant
+      // "dirty implies there's data to flush" honest.
+      dirty = false;
+      signal.complete();
+    } else {
+      // STOP or REPORT: catch up on any data that arrived between the loop's main drain and the
+      // signal being enqueued. Producer's signalInbox.offer happens-after its dataInbox publish,
+      // so by the time we observe the signal, every snapshot the producer had published is
+      // visible to drain() here. This is what gives report() bucket-boundary determinism.
+      dataInbox.drain(this, SNAPSHOT_HANDLER);
+      report(wallClockTime(), signal);
+      if (signal instanceof StopSignal) {
+        stopped = true;
+        signal.complete();
       }
     }
   }
diff --git a/dd-trace-core/src/main/java/datadog/trace/common/metrics/ConflatingMetricsAggregator.java b/dd-trace-core/src/main/java/datadog/trace/common/metrics/ConflatingMetricsAggregator.java
index 895ee434854..dc6ea86bac7 100644
--- a/dd-trace-core/src/main/java/datadog/trace/common/metrics/ConflatingMetricsAggregator.java
+++ b/dd-trace-core/src/main/java/datadog/trace/common/metrics/ConflatingMetricsAggregator.java
@@ -28,6 +28,7 @@
 import datadog.trace.core.SpanKindFilter;
 import datadog.trace.core.monitor.HealthMetrics;
 import datadog.trace.util.AgentTaskScheduler;
+import datadog.trace.util.concurrent.MpscRingBuffer;
 import java.util.Collections;
 import java.util.List;
 import java.util.Map;
@@ -63,9 +64,27 @@ public final class ConflatingMetricsAggregator implements MetricsAggregator, Eve
   private static final SpanKindFilter INTERNAL_KIND =
       SpanKindFilter.builder().includeInternal().build();
 
+  /** Capacity of the small MPSC queue carrying control signals (REPORT / STOP / CLEAR). */
+  private static final int SIGNAL_INBOX_CAPACITY = 16;
+
   private final Set<String> ignoredResources;
   private final Thread thread;
-  private final MessagePassingQueue<InboxItem> inbox;
+
+  /**
+   * Producer/consumer data channel: a {@link MpscRingBuffer} of pre-allocated, recyclable {@link
+   * SpanSnapshot} slots. {@link #publish(List)} claims a range of slots in one CAS via {@link
+   * MpscRingBuffer#tryClaimRange} and fills each in place via {@link #fillSlot} -- no per-publish
+   * allocation.
+   */
+  private final MpscRingBuffer<SpanSnapshot> dataInbox;
+
+  /**
+   * Separate small queue for control signals. Kept off the data ring because the ring only holds
+   * one element type (the slot), and routing signals through a side channel lets the aggregator
+   * service them ahead of the data backlog each loop iteration.
+   */
+  private final MessagePassingQueue<SignalItem> signalInbox;
+
   private final Sink sink;
   private final Aggregator aggregator;
   private final long reportingInterval;
@@ -173,14 +192,16 @@ public ConflatingMetricsAggregator(
       boolean includeEndpointInMetrics) {
     this.ignoredResources = ignoredResources;
     this.includeEndpointInMetrics = includeEndpointInMetrics;
-    this.inbox = Queues.mpscArrayQueue(queueSize);
+    this.dataInbox = new MpscRingBuffer<>(SpanSnapshot::new, queueSize);
+    this.signalInbox = Queues.mpscArrayQueue(SIGNAL_INBOX_CAPACITY);
     this.features = features;
     this.healthMetrics = healthMetric;
     this.sink = sink;
     this.aggregator =
         new Aggregator(
             metricWriter,
-            inbox,
+            dataInbox,
+            signalInbox,
             maxAggregates,
             reportingInterval,
             timeUnit,
@@ -218,11 +239,11 @@ public boolean report() {
     boolean published;
     int attempts = 0;
     do {
-      published = inbox.offer(REPORT);
+      published = signalInbox.offer(REPORT);
       ++attempts;
     } while (!published && attempts < 10);
     if (!published) {
-      log.debug("Skipped metrics reporting because the queue is full");
+      log.debug("Skipped metrics reporting because the signal queue is full");
     }
     return published;
   }
@@ -245,7 +266,7 @@ public Future<Boolean> forceReport() {
     ReportSignal reportSignal = new ReportSignal();
     boolean published = false;
     while (thread.isAlive() && !published) {
-      published = inbox.offer(reportSignal);
+      published = signalInbox.offer(reportSignal);
       if (!published) {
         try {
           Thread.sleep(10);
@@ -264,27 +285,81 @@ public Future<Boolean> forceReport() {
 
   @Override
   public boolean publish(List<? extends CoreSpan<?>> trace) {
+    if (!features.supportsMetrics()) {
+      return false;
+    }
+    final int traceSize = trace.size();
+    if (traceSize == 0) {
+      return false;
+    }
+
+    // Overclaim the worst case: assume every span in the trace is metrics-eligible and claim
+    // that many ring slots in a single CAS. Non-eligible spans get a "skip" sentinel
+    // (tagAndDuration = 0L); the aggregator recognises this and bypasses findOrInsert for those
+    // slots. We expect most spans in a typical trace to be eligible (top-level / measured /
+    // server-client-producer-consumer kinds), so the slot waste is small in exchange for a
+    // single-pass producer loop. Indexed iteration (vs. enhanced-for) avoids per-call Iterator
+    // allocation -- trace is typically a SpanList, but even Collections.singletonList's
+    // iterator() allocates.
+    final long startSeq = dataInbox.tryClaimRange(traceSize);
+    if (startSeq < 0L) {
+      healthMetrics.onStatsInboxFull();
+      return false;
+    }
+
     boolean forceKeep = false;
-    int counted = 0;
-    if (features.supportsMetrics()) {
-      for (CoreSpan<?> span : trace) {
-        boolean isTopLevel = span.isTopLevel();
-        if (shouldComputeMetric(span, isTopLevel)) {
-          final CharSequence resourceName = span.getResourceName();
-          if (resourceName != null && ignoredResources.contains(resourceName.toString())) {
-            // skip publishing all children
-            forceKeep = false;
-            break;
+    int filled = 0;
+
+    for (int i = 0; i < traceSize; i++) {
+      final CoreSpan<?> span = trace.get(i);
+      final long seq = startSeq + i;
+      final SpanSnapshot slot = dataInbox.slotAt(seq);
+      final boolean isTopLevel = span.isTopLevel();
+
+      if (shouldComputeMetric(span, isTopLevel)) {
+        final CharSequence resourceName = span.getResourceName();
+        if (resourceName != null && ignoredResources.contains(resourceName.toString())) {
+          // Ignored resource: drop metrics for the whole trace. Mark this slot AND every
+          // remaining slot in the claimed range as skip-sentinel so the aggregator advances past
+          // them.
+          slot.tagAndDuration = 0L;
+          dataInbox.publish(seq);
+          for (int j = i + 1; j < traceSize; j++) {
+            final long skipSeq = startSeq + j;
+            dataInbox.slotAt(skipSeq).tagAndDuration = 0L;
+            dataInbox.publish(skipSeq);
           }
-          counted++;
-          forceKeep |= publish(span, isTopLevel);
+          healthMetrics.onClientStatTraceComputed(0, traceSize, true);
+          aggregator.unparkIfWaiting();
+          return false;
+        }
+        try {
+          fillSlot(span, isTopLevel, slot);
+        } finally {
+          // Publish in finally so a throwing fillSlot can't strand the consumer at this
+          // sequence -- matches the publish-on-throw guarantee in MpscRingBuffer.tryWrite.
+          dataInbox.publish(seq);
         }
+        forceKeep |= span.getError() > 0;
+        filled++;
+      } else {
+        // Non-eligible span: mark slot as skip-sentinel and publish so the aggregator advances.
+        slot.tagAndDuration = 0L;
+        dataInbox.publish(seq);
       }
-      healthMetrics.onClientStatTraceComputed(counted, trace.size(), !forceKeep);
     }
+
+    healthMetrics.onClientStatTraceComputed(filled, traceSize, !forceKeep);
+    // Wake the aggregator if it parked while we were filling. Producer-side cost is one volatile
+    // read when the aggregator isn't parked.
+    aggregator.unparkIfWaiting();
     return forceKeep;
   }
 
+  // Other site that posts work the aggregator needs to see: ignored-resource short-circuit
+  // returns above before reaching unparkIfWaiting. Mirror that wake-up there too so the
+  // aggregator picks up the skip-sentinel slots quickly.
+
   private boolean shouldComputeMetric(CoreSpan<?> span, boolean isTopLevel) {
     return (span.isMeasured() || isTopLevel || span.isKind(METRICS_ELIGIBLE_KINDS))
         && span.getLongRunningVersion()
@@ -292,21 +367,16 @@ private boolean shouldComputeMetric(CoreSpan<?> span, boolean isTopLevel) {
         && span.getDurationNano() > 0;
   }
 
-  private boolean publish(CoreSpan<?> span, boolean isTopLevel) {
-    // Error decision drives force-keep sampling regardless of whether the snapshot gets queued.
-    boolean error = span.getError() > 0;
-
-    // Fast-path the inbox-full case before any tag extraction or snapshot allocation. size() is
-    // approximate on jctools' MPSC queue but that's fine: if we under-estimate, we fall through
-    // and let inbox.offer be the source of truth (existing behavior); if we over-estimate, we
-    // drop a snapshot that would have fit -- acceptable, onStatsInboxFull was going to fire
-    // imminently anyway.
-    if (inbox.size() >= inbox.capacity()) {
-      healthMetrics.onStatsInboxFull();
-      return error;
-    }
+  /**
+   * Producer-side slot fill. Called between a {@link MpscRingBuffer#tryClaimRange} and the matching
+   * {@link MpscRingBuffer#publish}: reads from {@code span} and instance state, writes every field
+   * on {@code slot} (including {@code null} where applicable) so stale values from a prior occupant
+   * of this slot don't bleed through.
+   */
+  private void fillSlot(CoreSpan<?> span, boolean isTopLevel, SpanSnapshot slot) {
+    final boolean error = span.getError() > 0;
 
-    // Extract HTTP method and endpoint only if the feature is enabled
+    // Extract HTTP method and endpoint only if the feature is enabled.
     String httpMethod = null;
     String httpEndpoint = null;
     if (includeEndpointInMetrics) {
@@ -329,37 +399,63 @@ private boolean publish(CoreSpan<?> span, boolean isTopLevel) {
     long tagAndDuration =
         span.getDurationNano() | (error ? ERROR_TAG : 0L) | (isTopLevel ? TOP_LEVEL_TAG : 0L);
 
+    // Fill the slot's reusable peerTagValues scratch in place. The slot owns its array across
+    // publishes; we only allocate when the slot has none or the schema's length changed. When
+    // no tags fire, leave the scratch in place and clear peerTagSchema -- consumers gate on
+    // peerTagSchema (see AggregateEntry.matches / hashOf), so stale scratch contents are inert.
     PeerTagSchema peerTagSchema = peerTagSchemaFor(span);
-    String[] peerTagValues =
-        peerTagSchema == null ? null : capturePeerTagValues(span, peerTagSchema);
-    if (peerTagValues == null) {
-      // No tags fired -- drop the schema reference so the consumer doesn't bother iterating an
-      // all-null array.
-      peerTagSchema = null;
+    if (peerTagSchema != null) {
+      final String[] names = peerTagSchema.names;
+      final int n = names.length;
+      String[] valuesArr = slot.peerTagValues;
+      if (valuesArr == null || valuesArr.length != n) {
+        valuesArr = new String[n];
+        slot.peerTagValues = valuesArr;
+      }
+      boolean anyHit = false;
+      for (int i = 0; i < n; i++) {
+        Object v = span.unsafeGetTag(names[i]);
+        String tag = v == null ? null : v.toString();
+        valuesArr[i] = tag;
+        anyHit |= (tag != null);
+      }
+      if (!anyHit) {
+        // Schema fired no tags -- treat like the no-peer-tag case. Keep valuesArr alive on the
+        // slot for the next publish; nullifying peerTagSchema makes downstream gates skip it.
+        peerTagSchema = null;
+      }
     }
 
-    SpanSnapshot snapshot =
-        new SpanSnapshot(
-            span.getResourceName(),
-            span.getServiceName(),
-            span.getOperationName(),
-            span.getServiceNameSource(),
-            spanType,
-            span.getHttpStatusCode(),
-            isSynthetic(span),
-            span.getParentId() == 0,
-            spanKind,
-            peerTagSchema,
-            peerTagValues,
-            httpMethod,
-            httpEndpoint,
-            grpcStatusCode,
-            tagAndDuration);
-    if (!inbox.offer(snapshot)) {
-      healthMetrics.onStatsInboxFull();
-    }
-    // force keep keys if there are errors
-    return error;
+    // Canonicalize every string field to UTF8BytesString via the per-field DDCaches on
+    // AggregateEntry. This work used to happen on the aggregator thread; doing it here
+    // distributes the cost across producer threads and gives the aggregator's matches() identity
+    // comparisons instead of contentEquals chains.
+    slot.resourceName =
+        AggregateEntry.canonicalize(AggregateEntry.RESOURCE_CACHE, span.getResourceName());
+    slot.serviceName =
+        AggregateEntry.canonicalize(AggregateEntry.SERVICE_CACHE, span.getServiceName());
+    slot.operationName =
+        AggregateEntry.canonicalize(AggregateEntry.OPERATION_CACHE, span.getOperationName());
+    slot.serviceNameSource =
+        AggregateEntry.canonicalizeOptional(
+            AggregateEntry.SERVICE_SOURCE_CACHE, span.getServiceNameSource());
+    slot.spanType = AggregateEntry.canonicalize(AggregateEntry.TYPE_CACHE, spanType);
+    slot.httpStatusCode = span.getHttpStatusCode();
+    slot.synthetic = isSynthetic(span);
+    slot.traceRoot = span.getParentId() == 0;
+    slot.spanKind = AggregateEntry.canonicalize(AggregateEntry.SPAN_KIND_CACHE, spanKind);
+    slot.peerTagSchema = peerTagSchema;
+    slot.httpMethod =
+        AggregateEntry.canonicalizeOptional(AggregateEntry.HTTP_METHOD_CACHE, httpMethod);
+    slot.httpEndpoint =
+        AggregateEntry.canonicalizeOptional(AggregateEntry.HTTP_ENDPOINT_CACHE, httpEndpoint);
+    slot.grpcStatusCode =
+        AggregateEntry.canonicalizeOptional(AggregateEntry.GRPC_STATUS_CODE_CACHE, grpcStatusCode);
+    slot.tagAndDuration = tagAndDuration;
+
+    // Precompute the table-lookup key hash on the producer side. AggregateTable#findOrInsert
+    // reads this directly instead of running the chained hashOf on the aggregator thread.
+    SpanSnapshot.computeAndSetKeyHash(slot);
   }
 
   /**
@@ -444,28 +540,6 @@ private void reconcilePeerTagSchema() {
     }
   }
 
-  /**
-   * Captures the span's peer-tag values into a {@code String[]} parallel to {@code schema.names}.
-   * Slots remain {@code null} for tags the span didn't set; the array itself is lazily allocated on
-   * the first hit so spans that fire no peer tags pay zero allocation. Returns {@code null} when
-   * none of the configured peer tags are set on the span.
-   */
-  private static String[] capturePeerTagValues(CoreSpan<?> span, PeerTagSchema schema) {
-    String[] names = schema.names;
-    int n = names.length;
-    String[] values = null;
-    for (int i = 0; i < n; i++) {
-      Object v = span.unsafeGetTag(names[i]);
-      if (v != null) {
-        if (values == null) {
-          values = new String[n];
-        }
-        values[i] = v.toString();
-      }
-    }
-    return values;
-  }
-
   private static boolean isSynthetic(CoreSpan<?> span) {
     return span.getOrigin() != null && SYNTHETICS_ORIGIN.equals(span.getOrigin().toString());
   }
@@ -474,7 +548,7 @@ public void stop() {
     if (null != cancellation) {
       cancellation.cancel();
     }
-    inbox.offer(STOP);
+    signalInbox.offer(STOP);
   }
 
   @Override
@@ -512,17 +586,18 @@ private void disable() {
     features.discover();
     if (!features.supportsMetrics()) {
       log.debug("Disabling metric reporting because an agent downgrade was detected");
-      // Route the clear through the inbox so the aggregator thread is the only writer.
+      // Route the clear through the signal channel so the aggregator thread is the only writer.
       // AggregateTable is not thread-safe; mutating it directly from this thread would race
-      // with Drainer.accept on the aggregator thread.
+      // with snapshot processing on the aggregator thread.
       //
-      // Best-effort single offer rather than the retry-loop pattern in report(). If the inbox is
-      // full at downgrade time the clear is dropped, but the system self-heals: features.discover()
-      // already flipped supportsMetrics() false, so producer publish() calls now skip the inbox;
-      // the aggregator drains existing snapshots and ships them on the next report cycle; the
-      // sink rejects that payload and fires DOWNGRADED again, which retries disable() against a
-      // now-empty inbox. Worst case: one extra reporting cycle of stale data.
-      inbox.offer(CLEAR);
+      // Best-effort single offer rather than the retry-loop pattern in report(). If the signal
+      // queue is full at downgrade time the clear is dropped, but the system self-heals:
+      // features.discover() already flipped supportsMetrics() false, so producer publish() calls
+      // now skip the data ring; the aggregator drains existing snapshots and ships them on the
+      // next report cycle; the sink rejects that payload and fires DOWNGRADED again, which
+      // retries disable() against a now-empty signal queue. Worst case: one extra reporting
+      // cycle of stale data.
+      signalInbox.offer(CLEAR);
     }
   }
 
diff --git a/dd-trace-core/src/main/java/datadog/trace/common/metrics/PeerTagSchema.java b/dd-trace-core/src/main/java/datadog/trace/common/metrics/PeerTagSchema.java
index 4821d1b33a4..d3a3d47d65a 100644
--- a/dd-trace-core/src/main/java/datadog/trace/common/metrics/PeerTagSchema.java
+++ b/dd-trace-core/src/main/java/datadog/trace/common/metrics/PeerTagSchema.java
@@ -43,6 +43,14 @@ final class PeerTagSchema {
 
   final String[] names;
 
+  /**
+   * Precomputed {@code Arrays.hashCode(names)}. The schema is shared across many publishes so
+   * recomputing it on the aggregator hot path (per-publish call to {@code AggregateEntry.hashOf})
+   * was waste -- it showed up as a top aggregator-thread sample. Cached here, computed once at
+   * construction.
+   */
+  final int namesHash;
+
   /**
    * The {@code DDAgentFeaturesDiscovery.state()} hash this schema was built from. The aggregator
    * thread reads and updates this once per reporting cycle when reconciling against the latest
@@ -54,6 +62,7 @@ final class PeerTagSchema {
 
   private PeerTagSchema(String[] names, String state) {
     this.names = names;
+    this.namesHash = java.util.Arrays.hashCode(names);
     this.state = state;
   }
 
diff --git a/dd-trace-core/src/main/java/datadog/trace/common/metrics/SpanSnapshot.java b/dd-trace-core/src/main/java/datadog/trace/common/metrics/SpanSnapshot.java
index 152ac42bb55..f10d60167cc 100644
--- a/dd-trace-core/src/main/java/datadog/trace/common/metrics/SpanSnapshot.java
+++ b/dd-trace-core/src/main/java/datadog/trace/common/metrics/SpanSnapshot.java
@@ -1,76 +1,118 @@
 package datadog.trace.common.metrics;
 
+import datadog.trace.bootstrap.instrumentation.api.UTF8BytesString;
+import datadog.trace.util.LongHashingUtils;
+import java.util.Arrays;
+import javax.annotation.Nullable;
+
 /**
- * Immutable per-span value posted from the producer to the aggregator thread. Carries the raw
- * inputs the aggregator needs to look up or build an {@link AggregateEntry} and update its
- * counters.
+ * Per-span value posted from the producer to the aggregator thread. Carries the canonical inputs
+ * the aggregator needs to look up or build an {@link AggregateEntry} and update its counters.
+ *
+ * <p>Fields are mutable so this class can serve as a slot in {@link
+ * datadog.trace.util.concurrent.MpscRingBuffer}: the producer claims a slot, writes its fields, and
+ * publishes; the aggregator reads them. There is exactly one writer per outstanding sequence (the
+ * producer that claimed it) and one reader (the aggregator), so no synchronization on fields is
+ * required.
  *
- * <p>All cache-canonicalization (service-name, span-kind, peer-tag string interning) happens on the
- * aggregator thread; the producer just shuffles references.
+ * <p><b>Producer-side canonicalization and hashing.</b> String fields are canonicalized to {@link
+ * UTF8BytesString} on the producer side (see {@link AggregateEntry#canonicalize}); same-content
+ * strings collapse to the same canonical reference through the per-field {@code DDCache} instances
+ * on {@link AggregateEntry}. The producer also precomputes {@link #keyHash} via the same chain the
+ * consumer used to use. The aggregator then does identity comparisons in {@link
+ * AggregateEntry#matches} and a precomputed-hash table lookup, with all the per-field work
+ * distributed across producer threads where there's CPU headroom.
  */
-final class SpanSnapshot implements InboxItem {
-
-  final CharSequence resourceName;
-  final String serviceName;
-  final CharSequence operationName;
-  final CharSequence serviceNameSource;
-  final CharSequence spanType;
-  final short httpStatusCode;
-  final boolean synthetic;
-  final boolean traceRoot;
-  final String spanKind;
+final class SpanSnapshot {
+
+  /** Canonical UTF-8 form of the span's resource name. {@link UTF8BytesString#EMPTY} when null. */
+  UTF8BytesString resourceName;
+
+  /** Canonical UTF-8 form of the span's service. {@link UTF8BytesString#EMPTY} when null. */
+  UTF8BytesString serviceName;
+
+  /** Canonical UTF-8 form of the span's operation name. {@link UTF8BytesString#EMPTY} when null. */
+  UTF8BytesString operationName;
+
+  /** Canonical UTF-8 form, or {@code null} when the span had no service-name source set. */
+  @Nullable UTF8BytesString serviceNameSource;
+
+  /** Canonical UTF-8 form of the span's type. {@link UTF8BytesString#EMPTY} when null. */
+  UTF8BytesString spanType;
+
+  short httpStatusCode;
+  boolean synthetic;
+  boolean traceRoot;
+
+  /** Canonical UTF-8 form of the span kind. {@link UTF8BytesString#EMPTY} when not set. */
+  UTF8BytesString spanKind;
 
   /**
    * Schema for {@link #peerTagValues}. {@code null} when the span has no peer tags. The schema
    * carries the names in parallel-array form; {@code peerTagValues} holds the per-span tag values
    * at the same indices.
    */
-  final PeerTagSchema peerTagSchema;
+  @Nullable PeerTagSchema peerTagSchema;
 
   /**
    * Peer tag values captured from the span, parallel to {@code peerTagSchema.names}. A {@code null}
-   * entry means the span didn't have that peer tag set. {@code null} (the whole array) when {@link
-   * #peerTagSchema} is {@code null}.
+   * entry means the span didn't have that peer tag set. These are per-publish strings (typically
+   * not shared across calls), so they stay as {@link String} -- canonicalizing them is more
+   * expensive than the aggregator's content-comparison on a 1-3 element array.
+   *
+   * <p>The slot owns this array as a reusable scratch buffer; see {@code
+   * ConflatingMetricsAggregator#fillSlot}.
    */
-  final String[] peerTagValues;
+  @Nullable String[] peerTagValues;
 
-  final String httpMethod;
-  final String httpEndpoint;
-  final String grpcStatusCode;
+  @Nullable UTF8BytesString httpMethod;
+  @Nullable UTF8BytesString httpEndpoint;
+  @Nullable UTF8BytesString grpcStatusCode;
 
   /** Duration in nanoseconds, OR-ed with {@code ERROR_TAG} / {@code TOP_LEVEL_TAG} as needed. */
-  final long tagAndDuration;
-
-  SpanSnapshot(
-      CharSequence resourceName,
-      String serviceName,
-      CharSequence operationName,
-      CharSequence serviceNameSource,
-      CharSequence spanType,
-      short httpStatusCode,
-      boolean synthetic,
-      boolean traceRoot,
-      String spanKind,
-      PeerTagSchema peerTagSchema,
-      String[] peerTagValues,
-      String httpMethod,
-      String httpEndpoint,
-      String grpcStatusCode,
-      long tagAndDuration) {
-    this.resourceName = resourceName;
-    this.serviceName = serviceName;
-    this.operationName = operationName;
-    this.serviceNameSource = serviceNameSource;
-    this.spanType = spanType;
-    this.httpStatusCode = httpStatusCode;
-    this.synthetic = synthetic;
-    this.traceRoot = traceRoot;
-    this.spanKind = spanKind;
-    this.peerTagSchema = peerTagSchema;
-    this.peerTagValues = peerTagValues;
-    this.httpMethod = httpMethod;
-    this.httpEndpoint = httpEndpoint;
-    this.grpcStatusCode = grpcStatusCode;
-    this.tagAndDuration = tagAndDuration;
+  long tagAndDuration;
+
+  /**
+   * Hashtable bucket key, computed by the producer at the end of {@code fillSlot} via {@link
+   * #computeAndSetKeyHash}. {@code AggregateTable#findOrInsert} uses this directly instead of
+   * re-running the chained hash on the aggregator thread.
+   */
+  long keyHash;
+
+  /** No-arg constructor used by {@link datadog.trace.util.concurrent.MpscRingBuffer}. */
+  SpanSnapshot() {}
+
+  /**
+   * Computes and stores {@link #keyHash} from the snapshot's current canonical fields. The producer
+   * calls this at the end of {@code fillSlot}; the aggregator then uses {@code keyHash} directly
+   * for bucket lookup in {@code AggregateTable}.
+   *
+   * <p>Mixing identical to the prior {@code AggregateEntry.hashOf} so the bucket distribution is
+   * unchanged: {@code UTF8BytesString.hashCode} returns the underlying {@code String} hash, so
+   * snapshots built from {@code String} and {@code UTF8BytesString} of the same content fold to the
+   * same long.
+   */
+  static void computeAndSetKeyHash(SpanSnapshot s) {
+    long h = 0;
+    h = LongHashingUtils.addToHash(h, s.resourceName);
+    h = LongHashingUtils.addToHash(h, s.serviceName);
+    h = LongHashingUtils.addToHash(h, s.operationName);
+    h = LongHashingUtils.addToHash(h, s.serviceNameSource);
+    h = LongHashingUtils.addToHash(h, s.spanType);
+    h = LongHashingUtils.addToHash(h, s.httpStatusCode);
+    h = LongHashingUtils.addToHash(h, s.synthetic);
+    h = LongHashingUtils.addToHash(h, s.traceRoot);
+    h = LongHashingUtils.addToHash(h, s.spanKind);
+    // peerTagValues is gated by peerTagSchema -- the slot's array is a reusable scratch buffer
+    // that may carry stale contents from a prior tag-firing publish when this publish had no
+    // peer tags. Hash it only when the schema says it's meaningful.
+    h = LongHashingUtils.addToHash(h, s.peerTagSchema == null ? 0 : s.peerTagSchema.namesHash);
+    h =
+        LongHashingUtils.addToHash(
+            h, s.peerTagSchema == null ? 0 : Arrays.hashCode(s.peerTagValues));
+    h = LongHashingUtils.addToHash(h, s.httpMethod);
+    h = LongHashingUtils.addToHash(h, s.httpEndpoint);
+    h = LongHashingUtils.addToHash(h, s.grpcStatusCode);
+    s.keyHash = h;
   }
 }
diff --git a/dd-trace-core/src/main/java/datadog/trace/common/writer/TraceStructureWriter.java b/dd-trace-core/src/main/java/datadog/trace/common/writer/TraceStructureWriter.java
index afeaebf2772..f5cb0d2a978 100644
--- a/dd-trace-core/src/main/java/datadog/trace/common/writer/TraceStructureWriter.java
+++ b/dd-trace-core/src/main/java/datadog/trace/common/writer/TraceStructureWriter.java
@@ -3,6 +3,7 @@
 import datadog.environment.OperatingSystem;
 import datadog.trace.api.DDSpanId;
 import datadog.trace.api.DDTraceId;
+import datadog.trace.api.internal.VisibleForTesting;
 import datadog.trace.core.DDSpan;
 import de.thetaphi.forbiddenapis.SuppressForbidden;
 import java.io.FileOutputStream;
@@ -82,7 +83,7 @@ private static String[] parseArgs(String outputFile) {
     return parseArgs(outputFile, OperatingSystem.isWindows());
   }
 
-  // package visibility for testing
+  @VisibleForTesting
   static String[] parseArgs(String outputFile, boolean windows) {
     String[] args = ARGS_DELIMITER.split(outputFile);
     // Check Windows absolute paths (<drive>:<path>) as column is used as arg delimiter
diff --git a/dd-trace-core/src/main/java/datadog/trace/common/writer/ddagent/TraceMapperV0_5.java b/dd-trace-core/src/main/java/datadog/trace/common/writer/ddagent/TraceMapperV0_5.java
index b8d1c77afdc..a993fdb3c26 100644
--- a/dd-trace-core/src/main/java/datadog/trace/common/writer/ddagent/TraceMapperV0_5.java
+++ b/dd-trace-core/src/main/java/datadog/trace/common/writer/ddagent/TraceMapperV0_5.java
@@ -9,6 +9,7 @@
 import datadog.communication.serialization.msgpack.MsgPackWriter;
 import datadog.trace.api.TagMap;
 import datadog.trace.api.TagMap.EntryReader;
+import datadog.trace.api.internal.VisibleForTesting;
 import datadog.trace.bootstrap.instrumentation.api.InstrumentationTags;
 import datadog.trace.bootstrap.instrumentation.api.UTF8BytesString;
 import datadog.trace.common.writer.Payload;
@@ -125,12 +126,12 @@ public String endpoint() {
     return "v0.5";
   }
 
-  // Visible for tests
+  @VisibleForTesting
   Map<Object, Integer> getEncoding() {
     return encoding;
   }
 
-  // Visible for tests
+  @VisibleForTesting
   GrowableBuffer getDictionary() {
     return dictionary;
   }
diff --git a/dd-trace-core/src/main/java/datadog/trace/core/CoreTracer.java b/dd-trace-core/src/main/java/datadog/trace/core/CoreTracer.java
index 55db6962976..73e3c2063e0 100644
--- a/dd-trace-core/src/main/java/datadog/trace/core/CoreTracer.java
+++ b/dd-trace-core/src/main/java/datadog/trace/core/CoreTracer.java
@@ -53,6 +53,7 @@
 import datadog.trace.api.interceptor.MutableSpan;
 import datadog.trace.api.interceptor.TraceInterceptor;
 import datadog.trace.api.internal.TraceSegment;
+import datadog.trace.api.internal.VisibleForTesting;
 import datadog.trace.api.metrics.SpanMetricRegistry;
 import datadog.trace.api.naming.SpanNaming;
 import datadog.trace.api.remoteconfig.ServiceNameCollector;
@@ -1391,7 +1392,7 @@ public String getSpanId(AgentSpan span) {
     return "0";
   }
 
-  // @VisibleForTesting
+  @VisibleForTesting
   TraceInterceptors getInterceptors() {
     return interceptors;
   }
diff --git a/dd-trace-core/src/main/java/datadog/trace/core/DDSpan.java b/dd-trace-core/src/main/java/datadog/trace/core/DDSpan.java
index d404343bb96..8ffcc77b49c 100644
--- a/dd-trace-core/src/main/java/datadog/trace/core/DDSpan.java
+++ b/dd-trace-core/src/main/java/datadog/trace/core/DDSpan.java
@@ -19,6 +19,7 @@
 import datadog.trace.api.debugger.DebuggerConfigBridge;
 import datadog.trace.api.gateway.Flow;
 import datadog.trace.api.gateway.RequestContext;
+import datadog.trace.api.internal.VisibleForTesting;
 import datadog.trace.api.metrics.SpanMetricRegistry;
 import datadog.trace.api.metrics.SpanMetrics;
 import datadog.trace.api.sampling.PrioritySampling;
@@ -125,7 +126,7 @@ static DDSpan create(
    * @param timestampMicro if greater than zero, use this time instead of the current time
    * @param context the context used for the span
    */
-  // @VisibleForTesting
+  @VisibleForTesting
   DDSpan(
       @Nonnull String instrumentationName,
       final long timestampMicro,
@@ -681,7 +682,7 @@ public long getDurationNano() {
     return durationNano;
   }
 
-  // @VisibleForTesting
+  @VisibleForTesting
   void setDurationNano(long duration) {
     DURATION_NANO_UPDATER.set(this, duration);
   }
diff --git a/dd-trace-core/src/main/java/datadog/trace/core/LongRunningTracesTracker.java b/dd-trace-core/src/main/java/datadog/trace/core/LongRunningTracesTracker.java
index dbbd57b0c48..6002fce5415 100644
--- a/dd-trace-core/src/main/java/datadog/trace/core/LongRunningTracesTracker.java
+++ b/dd-trace-core/src/main/java/datadog/trace/core/LongRunningTracesTracker.java
@@ -3,6 +3,7 @@
 import datadog.communication.ddagent.DDAgentFeaturesDiscovery;
 import datadog.communication.ddagent.SharedCommunicationObjects;
 import datadog.trace.api.Config;
+import datadog.trace.api.internal.VisibleForTesting;
 import datadog.trace.core.monitor.HealthMetrics;
 import java.util.ArrayList;
 import java.util.List;
@@ -140,12 +141,12 @@ private void flushStats() {
     expired = 0;
   }
 
-  // @VisibleForTesting
+  @VisibleForTesting
   int trackedCount() {
     return traceArray.size();
   }
 
-  // @VisibleForTesting
+  @VisibleForTesting
   int getDropped() {
     return dropped;
   }
diff --git a/dd-trace-core/src/main/java/datadog/trace/core/PendingTrace.java b/dd-trace-core/src/main/java/datadog/trace/core/PendingTrace.java
index 5a0d1e992a4..11f8566cc48 100644
--- a/dd-trace-core/src/main/java/datadog/trace/core/PendingTrace.java
+++ b/dd-trace-core/src/main/java/datadog/trace/core/PendingTrace.java
@@ -2,6 +2,7 @@
 
 import datadog.metrics.api.Recording;
 import datadog.trace.api.DDTraceId;
+import datadog.trace.api.internal.VisibleForTesting;
 import datadog.trace.api.time.TimeSource;
 import datadog.trace.bootstrap.instrumentation.api.AgentScope;
 import datadog.trace.core.CoreTracer.ConfigSnapshot;
@@ -139,7 +140,7 @@ public PendingTrace create(@Nonnull DDTraceId traceId, ConfigSnapshot traceConfi
   private static final AtomicLongFieldUpdater<PendingTrace> LAST_REFERENCED =
       AtomicLongFieldUpdater.newUpdater(PendingTrace.class, "lastReferenced");
 
-  // @VisibleForTesting
+  @VisibleForTesting
   PendingTrace(
       @Nonnull CoreTracer tracer,
       @Nonnull DDTraceId traceId,
@@ -224,37 +225,37 @@ boolean compareAndSetLongRunningState(int expected, int newState) {
     return LONG_RUNNING_STATE.compareAndSet(this, expected, newState);
   }
 
-  // @VisibleForTesting
+  @VisibleForTesting
   int getLongRunningTrackedState() {
     return longRunningTrackedState;
   }
 
-  // @VisibleForTesting
+  @VisibleForTesting
   void setLongRunningTrackedState(int state) {
     LONG_RUNNING_STATE.set(this, state);
   }
 
-  // @VisibleForTesting
+  @VisibleForTesting
   int getPendingReferenceCount() {
     return pendingReferenceCount;
   }
 
-  // @VisibleForTesting
+  @VisibleForTesting
   PendingTraceBuffer getPendingTraceBuffer() {
     return pendingTraceBuffer;
   }
 
-  // @VisibleForTesting
+  @VisibleForTesting
   DDTraceId getTraceId() {
     return traceId;
   }
 
-  // @VisibleForTesting
+  @VisibleForTesting
   boolean isRootSpanWritten() {
     return rootSpanWritten;
   }
 
-  // @VisibleForTesting
+  @VisibleForTesting
   int getIsEnqueued() {
     return isEnqueued;
   }
diff --git a/dd-trace-core/src/main/java/datadog/trace/core/PendingTraceBuffer.java b/dd-trace-core/src/main/java/datadog/trace/core/PendingTraceBuffer.java
index 9cd7f73eb2e..c0e8de95a5c 100644
--- a/dd-trace-core/src/main/java/datadog/trace/core/PendingTraceBuffer.java
+++ b/dd-trace-core/src/main/java/datadog/trace/core/PendingTraceBuffer.java
@@ -11,6 +11,7 @@
 import datadog.communication.ddagent.SharedCommunicationObjects;
 import datadog.trace.api.Config;
 import datadog.trace.api.flare.TracerFlare;
+import datadog.trace.api.internal.VisibleForTesting;
 import datadog.trace.api.time.TimeSource;
 import datadog.trace.common.writer.TraceDumpJsonExporter;
 import datadog.trace.core.monitor.HealthMetrics;
@@ -304,17 +305,17 @@ public DelayingPendingTraceBuffer(
               : null;
     }
 
-    // @VisibleForTesting
+    @VisibleForTesting
     LongRunningTracesTracker getRunningTracesTracker() {
       return runningTracesTracker;
     }
 
-    // @VisibleForTesting
+    @VisibleForTesting
     Thread getWorker() {
       return worker;
     }
 
-    // @VisibleForTesting
+    @VisibleForTesting
     MessagePassingBlockingQueue<Element> getQueue() {
       return queue;
     }
diff --git a/dd-trace-core/src/main/java/datadog/trace/core/propagation/W3CHttpCodec.java b/dd-trace-core/src/main/java/datadog/trace/core/propagation/W3CHttpCodec.java
index 52037859cb2..f8372f0d76c 100644
--- a/dd-trace-core/src/main/java/datadog/trace/core/propagation/W3CHttpCodec.java
+++ b/dd-trace-core/src/main/java/datadog/trace/core/propagation/W3CHttpCodec.java
@@ -16,6 +16,7 @@
 import datadog.trace.api.DDTraceId;
 import datadog.trace.api.TraceConfig;
 import datadog.trace.api.TracePropagationStyle;
+import datadog.trace.api.internal.VisibleForTesting;
 import datadog.trace.api.internal.util.LongStringUtils;
 import datadog.trace.api.propagation.W3CTraceParent;
 import datadog.trace.api.sampling.PrioritySampling;
@@ -40,10 +41,9 @@ class W3CHttpCodec {
   private static final int TRACE_PARENT_FLAGS_SAMPLED = 1;
   private static final int TRACE_PARENT_LENGTH = TRACE_PARENT_FLAGS_START + 2;
 
-  // Package-protected for testing
-  static final String TRACE_PARENT_KEY = "traceparent";
-  static final String TRACE_STATE_KEY = "tracestate";
-  static final String OT_BAGGAGE_PREFIX = "ot-baggage-";
+  @VisibleForTesting static final String TRACE_PARENT_KEY = "traceparent";
+  @VisibleForTesting static final String TRACE_STATE_KEY = "tracestate";
+  @VisibleForTesting static final String OT_BAGGAGE_PREFIX = "ot-baggage-";
   static final String E2E_START_KEY = OT_BAGGAGE_PREFIX + DDTags.TRACE_START_TIME;
 
   private W3CHttpCodec() {
diff --git a/dd-trace-core/src/main/java/datadog/trace/core/tagprocessor/HttpEndpointPostProcessor.java b/dd-trace-core/src/main/java/datadog/trace/core/tagprocessor/HttpEndpointPostProcessor.java
index 340a5d82649..c2e0dd72761 100644
--- a/dd-trace-core/src/main/java/datadog/trace/core/tagprocessor/HttpEndpointPostProcessor.java
+++ b/dd-trace-core/src/main/java/datadog/trace/core/tagprocessor/HttpEndpointPostProcessor.java
@@ -6,6 +6,7 @@
 
 import datadog.trace.api.TagMap;
 import datadog.trace.api.endpoint.EndpointResolver;
+import datadog.trace.api.internal.VisibleForTesting;
 import datadog.trace.bootstrap.instrumentation.api.AppendableSpanLinks;
 import datadog.trace.core.DDSpanContext;
 import org.slf4j.Logger;
@@ -45,10 +46,9 @@ public HttpEndpointPostProcessor() {
   /**
    * Creates a new HttpEndpointPostProcessor with the given endpoint resolver.
    *
-   * <p>Visible for testing.
-   *
    * @param endpointResolver the resolver to use for endpoint inference
    */
+  @VisibleForTesting
   HttpEndpointPostProcessor(EndpointResolver endpointResolver) {
     this.endpointResolver = endpointResolver;
   }
diff --git a/dd-trace-core/src/main/java/datadog/trace/core/tagprocessor/PeerServiceCalculator.java b/dd-trace-core/src/main/java/datadog/trace/core/tagprocessor/PeerServiceCalculator.java
index ed1fee8b2d6..198e2c78f1c 100644
--- a/dd-trace-core/src/main/java/datadog/trace/core/tagprocessor/PeerServiceCalculator.java
+++ b/dd-trace-core/src/main/java/datadog/trace/core/tagprocessor/PeerServiceCalculator.java
@@ -3,6 +3,7 @@
 import datadog.trace.api.Config;
 import datadog.trace.api.DDTags;
 import datadog.trace.api.TagMap;
+import datadog.trace.api.internal.VisibleForTesting;
 import datadog.trace.api.naming.NamingSchema;
 import datadog.trace.api.naming.SpanNaming;
 import datadog.trace.bootstrap.instrumentation.api.AppendableSpanLinks;
@@ -22,7 +23,7 @@ public PeerServiceCalculator() {
     this(SpanNaming.instance().namingSchema().peerService(), Config.get().getPeerServiceMapping());
   }
 
-  // Visible for testing
+  @VisibleForTesting
   PeerServiceCalculator(
       @Nonnull final NamingSchema.ForPeerService peerServiceNaming,
       @Nonnull final Map<String, String> peerServiceMapping) {
diff --git a/dd-trace-core/src/test/groovy/datadog/trace/common/metrics/ConflatingMetricAggregatorTest.groovy b/dd-trace-core/src/test/groovy/datadog/trace/common/metrics/ConflatingMetricAggregatorTest.groovy
index 00bd706b8fb..193d46b18bd 100644
--- a/dd-trace-core/src/test/groovy/datadog/trace/common/metrics/ConflatingMetricAggregatorTest.groovy
+++ b/dd-trace-core/src/test/groovy/datadog/trace/common/metrics/ConflatingMetricAggregatorTest.groovy
@@ -27,7 +27,12 @@ class ConflatingMetricAggregatorTest extends DDSpecification {
   @Shared
   long reportingInterval = 1
   @Shared
-  int queueSize = 256
+  // The CSS overclaim path claims traceSize slots per publish (not just the eligible count),
+  // so tests with multi-span traces and tight ring sizes can spuriously hit capacity. 1024 leaves
+  // plenty of headroom for the largest test traces (~10 spans) across hundreds of iterations.
+  // ConflatingMetricsAggregatorInboxFullTest deliberately keeps a tiny ring to exercise full-ring
+  // behaviour.
+  int queueSize = 1024
 
   def "should ignore traces with no measured spans"() {
     setup:
@@ -1718,7 +1723,7 @@ class ConflatingMetricAggregatorTest extends DDSpecification {
 
   def waitUntilEmpty(ConflatingMetricsAggregator aggregator) {
     int i = 0
-    while (!aggregator.inbox.isEmpty() && i++ < 100) {
+    while ((!aggregator.dataInbox.isEmpty() || !aggregator.signalInbox.isEmpty()) && i++ < 100) {
       Thread.sleep(10)
     }
   }
diff --git a/dd-trace-core/src/test/java/datadog/trace/common/metrics/AggregateEntryTest.java b/dd-trace-core/src/test/java/datadog/trace/common/metrics/AggregateEntryTest.java
index 7fd767533c7..b35dd4cd938 100644
--- a/dd-trace-core/src/test/java/datadog/trace/common/metrics/AggregateEntryTest.java
+++ b/dd-trace-core/src/test/java/datadog/trace/common/metrics/AggregateEntryTest.java
@@ -124,7 +124,7 @@ void testUtilsEqualEntriesHaveEqualHashCodes() {
   }
 
   private static SpanSnapshot snapshotWithPeerTags(String[] names, String[] values) {
-    return new SpanSnapshot(
+    return AggregateEntryTestUtils.buildSnapshot(
         "resource",
         "svc",
         "op",
@@ -144,7 +144,7 @@ private static SpanSnapshot snapshotWithPeerTags(String[] names, String[] values
 
   private static AggregateEntry newEntry() {
     SpanSnapshot snapshot =
-        new SpanSnapshot(
+        AggregateEntryTestUtils.buildSnapshot(
             "resource",
             "svc",
             "op",
diff --git a/dd-trace-core/src/test/java/datadog/trace/common/metrics/AggregateEntryTestUtils.java b/dd-trace-core/src/test/java/datadog/trace/common/metrics/AggregateEntryTestUtils.java
index ed6fd5a3a7e..acc298fd436 100644
--- a/dd-trace-core/src/test/java/datadog/trace/common/metrics/AggregateEntryTestUtils.java
+++ b/dd-trace-core/src/test/java/datadog/trace/common/metrics/AggregateEntryTestUtils.java
@@ -69,33 +69,76 @@ public static AggregateEntry of(
       schema = PeerTagSchema.testSchema(names);
     }
     SpanSnapshot syntheticSnapshot =
-        new SpanSnapshot(
+        buildSnapshot(
             resource,
-            service == null ? null : service.toString(),
+            service,
             operationName,
             serviceSource,
             type,
             (short) httpStatusCode,
             synthetic,
             traceRoot,
-            spanKind == null ? null : spanKind.toString(),
+            spanKind,
             schema,
             values,
-            httpMethod == null ? null : httpMethod.toString(),
-            httpEndpoint == null ? null : httpEndpoint.toString(),
-            grpcStatusCode == null ? null : grpcStatusCode.toString(),
+            httpMethod,
+            httpEndpoint,
+            grpcStatusCode,
             0L);
     return forSnapshot(syntheticSnapshot);
   }
 
   /**
-   * Builds an {@link AggregateEntry} from {@code s} by computing its lookup hash via {@link
-   * AggregateEntry#hashOf(SpanSnapshot)} and calling the package-private constructor directly.
-   * Production callers route through {@link AggregateTable#findOrInsert} which already has the
-   * {@code keyHash} on hand; tests rarely do, so this helper hides the second argument.
+   * Test-only snapshot builder. Production fillSlot canonicalizes each field via the per-field
+   * DDCaches on {@link AggregateEntry} and precomputes {@link SpanSnapshot#keyHash}; this helper
+   * does the same so tests exercise the production matches/hash path.
+   */
+  static SpanSnapshot buildSnapshot(
+      CharSequence resource,
+      CharSequence service,
+      CharSequence operationName,
+      @Nullable CharSequence serviceSource,
+      CharSequence type,
+      short httpStatusCode,
+      boolean synthetic,
+      boolean traceRoot,
+      CharSequence spanKind,
+      @Nullable PeerTagSchema peerTagSchema,
+      @Nullable String[] peerTagValues,
+      @Nullable CharSequence httpMethod,
+      @Nullable CharSequence httpEndpoint,
+      @Nullable CharSequence grpcStatusCode,
+      long tagAndDuration) {
+    SpanSnapshot s = new SpanSnapshot();
+    s.resourceName = AggregateEntry.canonicalize(AggregateEntry.RESOURCE_CACHE, resource);
+    s.serviceName = AggregateEntry.canonicalize(AggregateEntry.SERVICE_CACHE, service);
+    s.operationName = AggregateEntry.canonicalize(AggregateEntry.OPERATION_CACHE, operationName);
+    s.serviceNameSource =
+        AggregateEntry.canonicalizeOptional(AggregateEntry.SERVICE_SOURCE_CACHE, serviceSource);
+    s.spanType = AggregateEntry.canonicalize(AggregateEntry.TYPE_CACHE, type);
+    s.httpStatusCode = httpStatusCode;
+    s.synthetic = synthetic;
+    s.traceRoot = traceRoot;
+    s.spanKind = AggregateEntry.canonicalize(AggregateEntry.SPAN_KIND_CACHE, spanKind);
+    s.peerTagSchema = peerTagSchema;
+    s.peerTagValues = peerTagValues;
+    s.httpMethod =
+        AggregateEntry.canonicalizeOptional(AggregateEntry.HTTP_METHOD_CACHE, httpMethod);
+    s.httpEndpoint =
+        AggregateEntry.canonicalizeOptional(AggregateEntry.HTTP_ENDPOINT_CACHE, httpEndpoint);
+    s.grpcStatusCode =
+        AggregateEntry.canonicalizeOptional(AggregateEntry.GRPC_STATUS_CODE_CACHE, grpcStatusCode);
+    s.tagAndDuration = tagAndDuration;
+    SpanSnapshot.computeAndSetKeyHash(s);
+    return s;
+  }
+
+  /**
+   * Builds an {@link AggregateEntry} from {@code s}. Production callers route through {@link
+   * AggregateTable#findOrInsert}; tests use this helper for direct construction.
    */
   public static AggregateEntry forSnapshot(SpanSnapshot s) {
-    return new AggregateEntry(s, AggregateEntry.hashOf(s));
+    return new AggregateEntry(s);
   }
 
   /**
diff --git a/dd-trace-core/src/test/java/datadog/trace/common/metrics/AggregateTableTest.java b/dd-trace-core/src/test/java/datadog/trace/common/metrics/AggregateTableTest.java
index 618ead2ab43..e807dfdd338 100644
--- a/dd-trace-core/src/test/java/datadog/trace/common/metrics/AggregateTableTest.java
+++ b/dd-trace-core/src/test/java/datadog/trace/common/metrics/AggregateTableTest.java
@@ -266,8 +266,73 @@ void nullServiceAndSpanKindDoNotNpeAndCollapseWithEmpty() {
     assertEquals(0, first.getSpanKind().length(), "null spanKind should canonicalize to EMPTY");
   }
 
+  @Test
+  void noPeerTagSnapshotMatchesEntryEvenWithStaleScratchPeerTagValues() {
+    // Models the ring-buffer slot-reuse pattern: a slot whose previous publish fired peer tags
+    // ends up reused for a span with no peer tags. The producer clears peerTagSchema but leaves
+    // peerTagValues populated (the array is the slot's reusable scratch). The consumer must
+    // gate on peerTagSchema and ignore stale scratch values for both hashOf and matches --
+    // otherwise the slot's stale state would steer the snapshot to a different bucket or fail
+    // the field-by-field comparison, producing a duplicate aggregate.
+    AggregateTable table = new AggregateTable(8);
+    SpanSnapshot cleanNoPeerTags = snapshot("svc", "op", "internal");
+    AggregateEntry inserted = table.findOrInsert(cleanNoPeerTags);
+
+    SpanSnapshot withStaleScratch = snapshot("svc", "op", "internal");
+    // Simulate stale scratch: same logical contents (no peer tags -> schema null), but the
+    // values array carries leftover data the slot's prior tag-firing publish wrote into it.
+    withStaleScratch.peerTagSchema = null;
+    withStaleScratch.peerTagValues = new String[] {"leftover.host", "leftover.service"};
+
+    AggregateEntry hit = table.findOrInsert(withStaleScratch);
+    assertSame(inserted, hit, "stale peerTagValues must be ignored when peerTagSchema is null");
+    assertEquals(1, table.size());
+  }
+
+  @Test
+  void aggregateEntryCopiesPeerTagValuesSoSlotReuseDoesntCorruptIt() {
+    // The AggregateEntry must snapshot peerTagValues by value at insert time -- otherwise
+    // when the producer reuses the slot's array on the next publish, the entry's identity
+    // would silently change. Verify by mutating the source array after insert and confirming
+    // the entry still matches its original-content snapshot.
+    AggregateTable table = new AggregateTable(8);
+    String[] values = new String[] {"host1", "svc1"};
+    SpanSnapshot s1 = snapshotWithPeerTags("svc", "op", "client", values);
+    AggregateEntry inserted = table.findOrInsert(s1);
+
+    // Producer "reuses" the array: same reference, new contents.
+    values[0] = "host2";
+    values[1] = "svc2";
+
+    // A FRESH snapshot with the original peer tag values should still match the entry.
+    SpanSnapshot s2 = snapshotWithPeerTags("svc", "op", "client", new String[] {"host1", "svc1"});
+    AggregateEntry hit = table.findOrInsert(s2);
+    assertSame(inserted, hit, "entry's peer tag values must be a snapshot, not a live ref");
+  }
+
+  private static SpanSnapshot snapshotWithPeerTags(
+      String service, String operation, String spanKind, String[] peerTagValues) {
+    PeerTagSchema schema = PeerTagSchema.testSchema(new String[] {"peer.host", "peer.service"});
+    return AggregateEntryTestUtils.buildSnapshot(
+        "resource",
+        service,
+        operation,
+        null,
+        "web",
+        (short) 200,
+        false,
+        true,
+        spanKind,
+        schema,
+        peerTagValues,
+        null,
+        null,
+        null,
+        0L);
+  }
+
   private static SpanSnapshot nullServiceKindSnapshot(String service, String spanKind) {
-    return new SpanSnapshot(
+    return AggregateEntryTestUtils.buildSnapshot(
         "resource",
         service,
         "op",
@@ -287,7 +352,7 @@ private static SpanSnapshot nullServiceKindSnapshot(String service, String spanK
 
   private static SpanSnapshot nullableSnapshot(
       String resource, String operation, String type, String serviceNameSource) {
-    return new SpanSnapshot(
+    return AggregateEntryTestUtils.buildSnapshot(
         resource,
         "svc",
         operation,
@@ -343,7 +408,7 @@ SnapshotBuilder peerTags(String... namesAndValues) {
     }
 
     SpanSnapshot build() {
-      return new SpanSnapshot(
+      return AggregateEntryTestUtils.buildSnapshot(
           "resource",
           service,
           operation,
diff --git a/gradle/libs.versions.toml b/gradle/libs.versions.toml
index 6abe7896aab..2014db87f13 100644
--- a/gradle/libs.versions.toml
+++ b/gradle/libs.versions.toml
@@ -8,7 +8,7 @@ gradle-tooling-api = "8.14.5"
 spotbugs_annotations = "4.9.8"
 
 # DataDog libs and forks
-ddprof = "1.42.0"
+ddprof = "1.43.0"
 dogstatsd = "4.4.5"
 okhttp = "3.12.15" # Datadog fork to support Java 7
 
diff --git a/internal-api/build.gradle.kts b/internal-api/build.gradle.kts
index b78e5b38b14..fc95dd9e1f1 100644
--- a/internal-api/build.gradle.kts
+++ b/internal-api/build.gradle.kts
@@ -260,6 +260,7 @@ dependencies {
   // references TraceScope and Continuation from public api
   api(project(":dd-trace-api"))
   api(libs.slf4j)
+  compileOnlyApi(project(":components:annotations"))
   api(project(":components:context"))
   api(project(":components:environment"))
   api(project(":components:json"))
diff --git a/internal-api/src/jmh/java/datadog/trace/util/concurrent/MpscRingBufferBenchmark.java b/internal-api/src/jmh/java/datadog/trace/util/concurrent/MpscRingBufferBenchmark.java
new file mode 100644
index 00000000000..e9d084f3334
--- /dev/null
+++ b/internal-api/src/jmh/java/datadog/trace/util/concurrent/MpscRingBufferBenchmark.java
@@ -0,0 +1,146 @@
+package datadog.trace.util.concurrent;
+
+import static java.util.concurrent.TimeUnit.SECONDS;
+
+import java.util.function.BiConsumer;
+import org.openjdk.jmh.annotations.Benchmark;
+import org.openjdk.jmh.annotations.BenchmarkMode;
+import org.openjdk.jmh.annotations.Fork;
+import org.openjdk.jmh.annotations.Group;
+import org.openjdk.jmh.annotations.GroupThreads;
+import org.openjdk.jmh.annotations.Level;
+import org.openjdk.jmh.annotations.Measurement;
+import org.openjdk.jmh.annotations.Mode;
+import org.openjdk.jmh.annotations.OutputTimeUnit;
+import org.openjdk.jmh.annotations.Param;
+import org.openjdk.jmh.annotations.Scope;
+import org.openjdk.jmh.annotations.Setup;
+import org.openjdk.jmh.annotations.State;
+import org.openjdk.jmh.annotations.TearDown;
+import org.openjdk.jmh.annotations.Threads;
+import org.openjdk.jmh.annotations.Warmup;
+import org.openjdk.jmh.infra.Blackhole;
+
+/**
+ * Throughput benchmarks for {@link MpscRingBuffer}.
+ *
+ * <ul>
+ *   <li>{@code write_1p / write_8p / write_16p} — producer-side throughput with a background
+ *       drainer consuming what's published. Measures the cost of one {@code tryWrite} including CAS
+ *       contention on the producer cursor at the given thread count.
+ *   <li>{@code e2e_8p} — JMH {@code @Group} pairing 8 producers with 1 consumer. Aggregate
+ *       throughput reflects whichever side is the bottleneck under realistic pressure.
+ * </ul>
+ *
+ * <p>Run with {@code -p capacity=...} to override the default ring capacity.
+ */
+@State(Scope.Benchmark)
+@Warmup(iterations = 2, time = 15, timeUnit = SECONDS)
+@Measurement(iterations = 5, time = 15, timeUnit = SECONDS)
+@BenchmarkMode(Mode.Throughput)
+@OutputTimeUnit(SECONDS)
+@Fork(value = 1)
+public class MpscRingBufferBenchmark {
+
+  /**
+   * Static filler so the lambda is non-capturing and the JIT can hoist it past the {@code tryWrite}
+   * call. Context arg comes first, slot last — matches {@code TagMap.forEach} convention.
+   */
+  private static final BiConsumer<Long, Slot> FILLER = (v, slot) -> slot.value = v;
+
+  /** Mutable slot. Replicates the per-publish allocation a real producer wants to avoid. */
+  public static final class Slot {
+    long value;
+  }
+
+  @Param({"1024"})
+  public int capacity;
+
+  /**
+   * Shared ring for the {@code write_*} benches. A background drainer keeps space available so
+   * producer benchmarks measure write throughput rather than full-ring drop throughput.
+   */
+  MpscRingBuffer<Slot> ring;
+
+  private volatile boolean stopDrainer;
+  private Thread drainerThread;
+
+  /**
+   * Separate ring for the {@code e2e_*} group benches. JMH drives both sides directly so we don't
+   * want our own background drainer for those.
+   */
+  MpscRingBuffer<Slot> e2eRing;
+
+  @Setup(Level.Trial)
+  public void setup() {
+    ring = new MpscRingBuffer<>(Slot::new, capacity);
+    e2eRing = new MpscRingBuffer<>(Slot::new, capacity);
+    stopDrainer = false;
+    drainerThread =
+        new Thread(
+            () -> {
+              while (!stopDrainer) {
+                if (ring.drain((Slot s) -> {}) == 0) Thread.yield();
+              }
+            },
+            "MpscRingBufferBenchmark-drainer");
+    drainerThread.setDaemon(true);
+    drainerThread.start();
+  }
+
+  @TearDown(Level.Trial)
+  public void teardown() throws InterruptedException {
+    stopDrainer = true;
+    drainerThread.join(5_000);
+  }
+
+  @State(Scope.Thread)
+  public static class ThreadState {
+    long counter;
+  }
+
+  // ============ Write throughput with background drainer ============
+
+  @Threads(1)
+  @Benchmark
+  public boolean write_1p(ThreadState ts) {
+    return ring.tryWrite(ts.counter++, FILLER);
+  }
+
+  @Threads(8)
+  @Benchmark
+  public boolean write_8p(ThreadState ts) {
+    return ring.tryWrite(ts.counter++, FILLER);
+  }
+
+  @Threads(16)
+  @Benchmark
+  public boolean write_16p(ThreadState ts) {
+    return ring.tryWrite(ts.counter++, FILLER);
+  }
+
+  // ============ End-to-end producer/consumer pairing ============
+  //
+  // JMH runs both methods in the same trial: 8 producer threads + 1 consumer thread. Throughput
+  // is reported as ops/sec aggregated across all 9 threads, but the consumer's drain count
+  // dwarfs the producer ops since one call processes many slots -- in practice the bottleneck
+  // is the producer side (CAS contention), and that's what the number reflects.
+
+  private static final BiConsumer<Blackhole, Slot> CONSUMER = (bh, slot) -> bh.consume(slot.value);
+
+  @Group("e2e_8p")
+  @GroupThreads(8)
+  @Benchmark
+  public boolean e2e_producer(ThreadState ts) {
+    return e2eRing.tryWrite(ts.counter++, FILLER);
+  }
+
+  @Group("e2e_8p")
+  @GroupThreads(1)
+  @Benchmark
+  public int e2e_consumer(Blackhole bh) {
+    int drained = e2eRing.drain(bh, CONSUMER);
+    if (drained == 0) Thread.yield();
+    return drained;
+  }
+}
diff --git a/internal-api/src/main/java/datadog/trace/api/Config.java b/internal-api/src/main/java/datadog/trace/api/Config.java
index 07f10672273..b90fcb0fd87 100644
--- a/internal-api/src/main/java/datadog/trace/api/Config.java
+++ b/internal-api/src/main/java/datadog/trace/api/Config.java
@@ -2184,7 +2184,23 @@ private Config(final ConfigProvider configProvider, final InstrumenterConfig ins
         configProvider.getBoolean(TRACE_STATS_COMPUTATION_IGNORE_AGENT_VERSION, false);
     tracerMetricsBufferingEnabled =
         configProvider.getBoolean(TRACER_METRICS_BUFFERING_ENABLED, false);
-    tracerMetricsMaxAggregates = configProvider.getInteger(TRACER_METRICS_MAX_AGGREGATES, 2048);
+    // The MpscRingBuffer pre-allocates one SpanSnapshot per slot at construction, so capacity
+    // becomes a resident-RSS cost. The old TRACER_METRICS_MAX_PENDING default of 2048 (logical)
+    // was sized for the previous on-demand batch-allocation model; with the ring it would pin
+    // ~15 MB upfront for what only needs to absorb sub-second consumer stalls. Cut the default
+    // accordingly:
+    //   - normal heap: 128 logical * 64 LEGACY_BATCH_SIZE = 8192 slots, ~1 MB upfront, ~0.8 s
+    //     of buffer at 10K spans/s. Plenty of margin for GC pauses.
+    //   - tight heap (<128 MB): 64 logical * 64 = 4096 slots, ~500 KB upfront. Observed
+    //     catastrophic at Xmx64m with the prior 131072-slot default (Full-GC death spiral).
+    // Customers who explicitly configured TRACER_METRICS_MAX_PENDING keep their value (the
+    // LEGACY_BATCH_SIZE multiplier still applies to it) -- only the implicit default shrinks.
+    final boolean tightHeap = Runtime.getRuntime().maxMemory() < 128L * 1024 * 1024;
+    final int defaultMaxAggregates = tightHeap ? 256 : 2048;
+    final int defaultMaxPending = tightHeap ? 64 : 128;
+
+    tracerMetricsMaxAggregates =
+        configProvider.getInteger(TRACER_METRICS_MAX_AGGREGATES, defaultMaxAggregates);
     /*
      * TRACER_METRICS_MAX_PENDING historically counted conflating Batch slots (~64 spans per batch
      * via Batch.MAX_BATCH_SIZE). The inbox now holds 1 SpanSnapshot per metrics-eligible span, so
@@ -2199,7 +2215,8 @@ private Config(final ConfigProvider configProvider, final InstrumenterConfig ins
      * see java.util.ArraysSupport.SOFT_MAX_ARRAY_LENGTH for the same convention.
      */
     long requestedMaxPending =
-        (long) configProvider.getInteger(TRACER_METRICS_MAX_PENDING, 2048) * LEGACY_BATCH_SIZE;
+        (long) configProvider.getInteger(TRACER_METRICS_MAX_PENDING, defaultMaxPending)
+            * LEGACY_BATCH_SIZE;
     tracerMetricsMaxPending = (int) Math.min(requestedMaxPending, MAX_SAFE_ARRAY_SIZE);
 
     reportHostName =
diff --git a/internal-api/src/main/java/datadog/trace/api/ProcessTags.java b/internal-api/src/main/java/datadog/trace/api/ProcessTags.java
index 932ec19c565..40b23faa45c 100644
--- a/internal-api/src/main/java/datadog/trace/api/ProcessTags.java
+++ b/internal-api/src/main/java/datadog/trace/api/ProcessTags.java
@@ -3,6 +3,7 @@
 import datadog.environment.EnvironmentVariables;
 import datadog.environment.SystemProperties;
 import datadog.trace.api.env.CapturedEnvironment;
+import datadog.trace.api.internal.VisibleForTesting;
 import datadog.trace.bootstrap.instrumentation.api.UTF8BytesString;
 import datadog.trace.util.TraceUtils;
 import java.nio.file.Path;
@@ -30,8 +31,7 @@ public class ProcessTags {
   public static final String ENTRYPOINT_BASEDIR = "entrypoint.basedir";
   public static final String ENTRYPOINT_WORKDIR = "entrypoint.workdir";
 
-  // visible for testing
-  static Function<String, String> envGetter = EnvironmentVariables::get;
+  @VisibleForTesting static Function<String, String> envGetter = EnvironmentVariables::get;
 
   private static class Lazy {
     // the tags are used to compute a hash for dsm hence that map must be sorted.
@@ -269,7 +269,7 @@ public static UTF8BytesString getTagsForSerialization() {
     return Lazy.serializedForm;
   }
 
-  /** Visible for testing. */
+  @VisibleForTesting
   static void empty() {
     synchronized (Lazy.TAGS) {
       Lazy.TAGS.clear();
@@ -279,12 +279,12 @@ static void empty() {
     }
   }
 
-  /** Visible for testing. */
+  @VisibleForTesting
   static void reset() {
     reset(Config.get());
   }
 
-  /** Visible for testing. */
+  @VisibleForTesting
   public static void reset(Config config) {
     synchronized (Lazy.TAGS) {
       empty();
diff --git a/internal-api/src/main/java/datadog/trace/api/civisibility/config/BazelMode.java b/internal-api/src/main/java/datadog/trace/api/civisibility/config/BazelMode.java
index 898dd76129b..56698fe7085 100644
--- a/internal-api/src/main/java/datadog/trace/api/civisibility/config/BazelMode.java
+++ b/internal-api/src/main/java/datadog/trace/api/civisibility/config/BazelMode.java
@@ -3,6 +3,7 @@
 import static java.nio.charset.StandardCharsets.UTF_8;
 
 import datadog.trace.api.Config;
+import datadog.trace.api.internal.VisibleForTesting;
 import datadog.trace.config.inversion.ConfigHelper;
 import datadog.trace.util.Strings;
 import java.io.BufferedReader;
@@ -51,7 +52,7 @@ public static BazelMode get() {
     return INSTANCE;
   }
 
-  // visible for testing
+  @VisibleForTesting
   BazelMode(Config config) {
     String manifestRloc = config.getTestOptimizationManifestFile();
     if (Strings.isNotBlank(manifestRloc)) {
@@ -331,7 +332,7 @@ private static String lookupInRunfilesManifest(String manifestFile, String rloca
     return null;
   }
 
-  // visible for testing
+  @VisibleForTesting
   static void reset() {
     INSTANCE = null;
   }
diff --git a/internal-api/src/main/java/datadog/trace/api/datastreams/TransactionInfo.java b/internal-api/src/main/java/datadog/trace/api/datastreams/TransactionInfo.java
index 9f09186d30d..b4755d58342 100644
--- a/internal-api/src/main/java/datadog/trace/api/datastreams/TransactionInfo.java
+++ b/internal-api/src/main/java/datadog/trace/api/datastreams/TransactionInfo.java
@@ -1,5 +1,6 @@
 package datadog.trace.api.datastreams;
 
+import datadog.trace.api.internal.VisibleForTesting;
 import java.nio.ByteBuffer;
 import java.nio.ByteOrder;
 import java.nio.charset.StandardCharsets;
@@ -71,7 +72,7 @@ public byte[] getBytes() {
     return buffer.array();
   }
 
-  // @VisibleForTesting
+  @VisibleForTesting
   static synchronized void resetCache() {
     CACHE.clear();
     CACHE_BYTES = new byte[0];
diff --git a/internal-api/src/main/java/datadog/trace/api/profiling/ProfilerFlareLogger.java b/internal-api/src/main/java/datadog/trace/api/profiling/ProfilerFlareLogger.java
index 2672d8e1567..6fc57ae641d 100644
--- a/internal-api/src/main/java/datadog/trace/api/profiling/ProfilerFlareLogger.java
+++ b/internal-api/src/main/java/datadog/trace/api/profiling/ProfilerFlareLogger.java
@@ -1,6 +1,7 @@
 package datadog.trace.api.profiling;
 
 import datadog.trace.api.flare.TracerFlare;
+import datadog.trace.api.internal.VisibleForTesting;
 import java.io.IOException;
 import java.time.Instant;
 import java.time.ZoneOffset;
@@ -23,7 +24,7 @@ private static final class Singleton {
   private final List<String> flareReportLines = new ArrayList<>();
   private int usedReportCapacity = 0;
 
-  // @VisibleForTesting
+  @VisibleForTesting
   ProfilerFlareLogger() {
     TracerFlare.addReporter(this);
   }
@@ -76,17 +77,17 @@ public void cleanupAfterFlare() {
     cleanup();
   }
 
-  // @VisibleForTesting
+  @VisibleForTesting
   int getUsedReportCapacity() {
     return usedReportCapacity;
   }
 
-  // @VisibleForTesting
+  @VisibleForTesting
   int getMaxReportCapacity() {
     return REPORT_CAPACITY;
   }
 
-  // @VisibleForTesting
+  @VisibleForTesting
   int linesSize() {
     synchronized (flareReportLines) {
       return flareReportLines.size();
diff --git a/internal-api/src/main/java/datadog/trace/api/remoteconfig/ServiceNameCollector.java b/internal-api/src/main/java/datadog/trace/api/remoteconfig/ServiceNameCollector.java
index b7c0a52a63f..664e1f1fe6f 100644
--- a/internal-api/src/main/java/datadog/trace/api/remoteconfig/ServiceNameCollector.java
+++ b/internal-api/src/main/java/datadog/trace/api/remoteconfig/ServiceNameCollector.java
@@ -3,6 +3,7 @@
 import static datadog.trace.api.telemetry.LogCollector.SEND_TELEMETRY;
 
 import datadog.trace.api.Config;
+import datadog.trace.api.internal.VisibleForTesting;
 import java.util.ArrayList;
 import java.util.List;
 import java.util.Set;
@@ -73,7 +74,7 @@ public void clear() {
     services.clear();
   }
 
-  // Visible for testing
+  @VisibleForTesting
   static void setInstance(ServiceNameCollector instance) {
     INSTANCE = instance;
   }
diff --git a/internal-api/src/main/java/datadog/trace/util/ProcessSupervisor.java b/internal-api/src/main/java/datadog/trace/util/ProcessSupervisor.java
index 686c35ce65b..006d921bec9 100644
--- a/internal-api/src/main/java/datadog/trace/util/ProcessSupervisor.java
+++ b/internal-api/src/main/java/datadog/trace/util/ProcessSupervisor.java
@@ -7,6 +7,7 @@
 import static datadog.trace.util.ProcessSupervisor.Health.INTERRUPTED;
 import static datadog.trace.util.ProcessSupervisor.Health.READY_TO_START;
 
+import datadog.trace.api.internal.VisibleForTesting;
 import datadog.trace.bootstrap.instrumentation.api.AgentTracer;
 import datadog.trace.context.TraceScope;
 import java.io.Closeable;
@@ -146,7 +147,7 @@ public void close() {
     }
   }
 
-  // Package reachable for testing
+  @VisibleForTesting
   Process getCurrentProcess() {
     return currentProcess;
   }
diff --git a/internal-api/src/main/java/datadog/trace/util/TempLocationManager.java b/internal-api/src/main/java/datadog/trace/util/TempLocationManager.java
index 7574c9c5edc..94c74c9f5f8 100644
--- a/internal-api/src/main/java/datadog/trace/util/TempLocationManager.java
+++ b/internal-api/src/main/java/datadog/trace/util/TempLocationManager.java
@@ -2,6 +2,7 @@
 
 import datadog.environment.SystemProperties;
 import datadog.trace.api.config.ProfilingConfig;
+import datadog.trace.api.internal.VisibleForTesting;
 import datadog.trace.api.profiling.ProfilerFlareLogger;
 import datadog.trace.api.time.SystemTimeSource;
 import datadog.trace.api.time.TimeSource;
@@ -307,7 +308,7 @@ private TempLocationManager() {
     createTempDir(tempDir);
   }
 
-  // @VisibleForTesting
+  @VisibleForTesting
   static String getBaseTempDirName() {
     String userName = SystemProperties.get("user.name");
     // unlikely, but fall-back to system env based user name
@@ -399,7 +400,7 @@ boolean cleanup(long timeout, TimeUnit unit) {
     return false;
   }
 
-  // accessible for tests
+  @VisibleForTesting
   boolean waitForCleanup(long timeout, TimeUnit unit) {
     try {
       return cleanupTask.await(timeout, unit);
@@ -416,7 +417,7 @@ boolean waitForCleanup(long timeout, TimeUnit unit) {
     return false;
   }
 
-  // accessible for tests
+  @VisibleForTesting
   void createDirStructure() throws IOException {
     Files.createDirectories(baseTempDir);
   }
diff --git a/internal-api/src/main/java/datadog/trace/util/concurrent/MpscRingBuffer.java b/internal-api/src/main/java/datadog/trace/util/concurrent/MpscRingBuffer.java
new file mode 100644
index 00000000000..4cefa43cac0
--- /dev/null
+++ b/internal-api/src/main/java/datadog/trace/util/concurrent/MpscRingBuffer.java
@@ -0,0 +1,434 @@
+package datadog.trace.util.concurrent;
+
+import datadog.trace.api.function.TriConsumer;
+import java.util.concurrent.atomic.AtomicLongArray;
+import java.util.concurrent.atomic.AtomicLongFieldUpdater;
+import java.util.function.BiConsumer;
+import java.util.function.Consumer;
+import java.util.function.Supplier;
+
+// Disruptor-style cache-line padding around the cursors. The two cursors live on different cache
+// lines so consumer-side writes to consumerCursor don't invalidate the line producers read for
+// producerCursor (and vice versa). Each padding class declares 7 longs (56 bytes); combined with
+// the cursor's own 8 bytes plus the JVM object header, each cursor + its surrounding pad fills
+// a 64-byte cache line. The HotSpot field-layout strategy preserves the declaration order across
+// the class hierarchy, so this pattern is reliable on all production JVMs we target.
+
+abstract class MpscRingBufferPad0 {
+  long p01, p02, p03, p04, p05, p06, p07;
+}
+
+abstract class MpscRingBufferProducerCursor extends MpscRingBufferPad0 {
+  /** Next sequence to claim. Producers increment via CAS through {@code PRODUCER_CURSOR}. */
+  volatile long producerCursor = -1L;
+}
+
+abstract class MpscRingBufferPad1 extends MpscRingBufferProducerCursor {
+  long p11, p12, p13, p14, p15, p16, p17;
+}
+
+abstract class MpscRingBufferConsumerCursor extends MpscRingBufferPad1 {
+  /** Highest sequence consumed. Only the consumer thread writes; producers read volatile. */
+  volatile long consumerCursor = -1L;
+}
+
+abstract class MpscRingBufferPad2 extends MpscRingBufferConsumerCursor {
+  long p21, p22, p23, p24, p25, p26, p27;
+}
+
+/**
+ * Bounded multi-producer / single-consumer ring buffer of pre-allocated {@code T} instances.
+ *
+ * <p>Each slot is a long-lived {@code T} instance created at construction time via the supplied
+ * factory and recycled forever. Producers mutate slots in place via callbacks; the consumer reads
+ * them the same way. No allocation occurs per write/read after construction, which is the entire
+ * point of this class over a queue of references.
+ *
+ * <p>The {@code BiConsumer} and {@link TriConsumer} variants take their context object(s)
+ * <i>before</i> the slot, matching the convention of {@code TagMap.forEach} and {@code
+ * Hashtable.forEach}. That ordering lets callers declare the callback as a {@code static final}
+ * non-capturing lambda and pass per-call context at the call site without allocating a closure.
+ *
+ * <h2>Thread safety contract</h2>
+ *
+ * <p>The ring buffer is thread-safe for any number of producer threads plus exactly one consumer
+ * thread. Calling {@code drain} from multiple threads concurrently is <b>not</b> supported and will
+ * corrupt state.
+ *
+ * <p>For the slot type {@code T}:
+ *
+ * <ul>
+ *   <li><b>Slot fields can be plain</b> ({@code int}, {@code long}, object references) -- they do
+ *       <i>not</i> need to be {@code volatile} or guarded by synchronization. Happens-before
+ *       between the producer's slot mutation and the consumer's slot read is provided by the ring's
+ *       internal publication-sequence machinery: a release write on the per-slot sequence inside
+ *       {@code tryWrite}, paired with an acquire read inside {@code drain}.
+ *   <li><b>Don't retain slot references past your handler's return.</b> Once a {@code tryWrite}
+ *       filler returns, the slot becomes visible to the consumer; once a {@code drain} handler
+ *       returns, the slot may be reclaimed by another producer and its fields overwritten. If the
+ *       consumer needs to keep any state from a slot, it must extract by value (or copy references)
+ *       before returning.
+ *   <li><b>Don't expose slot references outside the ring.</b> Treat {@code T} as ring-buffer-owned;
+ *       sharing a slot reference with code that doesn't follow the same discipline breaks the
+ *       happens-before story.
+ * </ul>
+ *
+ * <p>For producer fillers:
+ *
+ * <ul>
+ *   <li>Filler invocations on the same slot are serialized (one producer wins the sequence CAS), so
+ *       the filler can write fields without synchronization.
+ *   <li><b>If a filler throws, the slot is published anyway</b> (with whatever the filler had
+ *       written so far) and the exception propagates to the caller. This prevents the consumer from
+ *       getting stuck waiting for an unfinished slot; the cost is that the consumer may observe a
+ *       partially-filled or stale-fielded slot. Fillers should be written to either not throw or to
+ *       leave the slot in a state the consumer can recognize and skip.
+ * </ul>
+ *
+ * <h2>Implementation</h2>
+ *
+ * <p>Producer cursor is CAS-claimed; visibility of a claimed slot to the consumer is gated by a
+ * per-slot publication-sequence array. Consumer cursor is updated with a volatile write so
+ * producers observe space being freed. Cursors are cache-line-padded against each other (see the
+ * {@code MpscRingBufferPad*} hierarchy at the top of this file) and the publication-sequence array
+ * is strided so each logical entry occupies a distinct cache line.
+ */
+public final class MpscRingBuffer<T> extends MpscRingBufferPad2 {
+
+  /**
+   * Cache line size in {@code long}-units. 64-byte cache lines on every common CPU we ship to (x86,
+   * ARM); 8 bytes per long. Each logical slot in {@link #publishedSequences} is spread out by this
+   * stride so adjacent logical sequences don't share a cache line and don't ping-pong between
+   * producer cores under heavy contention.
+   */
+  private static final int CACHE_LINE_LONGS = 8;
+
+  @SuppressWarnings("rawtypes") // AtomicLongFieldUpdater can't take a parameterized class
+  private static final AtomicLongFieldUpdater<MpscRingBufferProducerCursor> PRODUCER_CURSOR =
+      AtomicLongFieldUpdater.newUpdater(MpscRingBufferProducerCursor.class, "producerCursor");
+
+  private final T[] slots;
+
+  /**
+   * Per-slot publication sequence, strided by {@link #CACHE_LINE_LONGS} to avoid false sharing.
+   * Producers write the claimed sequence here as the last step of a publish (release write via
+   * {@link AtomicLongArray#set}); the consumer reads it (acquire read) to determine whether the
+   * slot at the next position is ready. A slot is considered published for sequence {@code s} iff
+   * {@code publishedSequences[(s & mask) * CACHE_LINE_LONGS] == s}.
+   */
+  private final AtomicLongArray publishedSequences;
+
+  private final int capacity;
+  private final int mask;
+
+  @SuppressWarnings("unchecked")
+  public MpscRingBuffer(final Supplier<T> factory, final int capacityHint) {
+    if (capacityHint < 1) {
+      throw new IllegalArgumentException("capacityHint must be >= 1, got " + capacityHint);
+    }
+    this.capacity = nextPowerOfTwo(capacityHint);
+    if (this.capacity < 1) {
+      throw new IllegalArgumentException("capacity overflow for hint " + capacityHint);
+    }
+    this.mask = capacity - 1;
+    this.slots = (T[]) new Object[capacity];
+    this.publishedSequences = new AtomicLongArray(capacity * CACHE_LINE_LONGS);
+    for (int i = 0; i < capacity; i++) {
+      slots[i] = factory.get();
+      // Initial: sentinel "no sequence published here yet" -- anything < 0 works since
+      // sequences are 0-based and monotonically increasing.
+      publishedSequences.set(i * CACHE_LINE_LONGS, Long.MIN_VALUE);
+    }
+  }
+
+  public int capacity() {
+    return capacity;
+  }
+
+  /** Approximate count of slots holding unread items. May briefly exceed capacity under race. */
+  public int size() {
+    final long p = producerCursor;
+    final long c = consumerCursor;
+    final long diff = p - c;
+    if (diff <= 0) return 0;
+    if (diff > capacity) return capacity;
+    return (int) diff;
+  }
+
+  public boolean isEmpty() {
+    return producerCursor == consumerCursor;
+  }
+
+  /** {@code true} if the slot was filled and published; {@code false} if the ring is full. */
+  public boolean tryWrite(final Consumer<? super T> filler) {
+    final long seq = claim();
+    if (seq < 0L) return false;
+    // publish in finally so a throwing filler doesn't leave the slot un-published -- the
+    // consumer would otherwise wait at that sequence forever. See class javadoc.
+    try {
+      filler.accept(slots[(int) (seq & mask)]);
+    } finally {
+      publish(seq);
+    }
+    return true;
+  }
+
+  public <C> boolean tryWrite(final C context, final BiConsumer<? super C, ? super T> filler) {
+    final long seq = claim();
+    if (seq < 0L) return false;
+    try {
+      filler.accept(context, slots[(int) (seq & mask)]);
+    } finally {
+      publish(seq);
+    }
+    return true;
+  }
+
+  public <C1, C2> boolean tryWrite(
+      final C1 context1,
+      final C2 context2,
+      final TriConsumer<? super C1, ? super C2, ? super T> filler) {
+    final long seq = claim();
+    if (seq < 0L) return false;
+    try {
+      filler.accept(context1, context2, slots[(int) (seq & mask)]);
+    } finally {
+      publish(seq);
+    }
+    return true;
+  }
+
+  /** Drains all currently-available slots. Returns the count processed. */
+  public int drain(final Consumer<? super T> handler) {
+    long cursor = consumerCursor;
+    int count = 0;
+    while (true) {
+      final long nextSeq = cursor + 1L;
+      final int idx = (int) (nextSeq & mask);
+      if (publishedSequences.get(idx * CACHE_LINE_LONGS) != nextSeq) break;
+      handler.accept(slots[idx]);
+      cursor = nextSeq;
+      count++;
+    }
+    if (count > 0) consumerCursor = cursor;
+    return count;
+  }
+
+  public <C> int drain(final C context, final BiConsumer<? super C, ? super T> handler) {
+    long cursor = consumerCursor;
+    int count = 0;
+    while (true) {
+      final long nextSeq = cursor + 1L;
+      final int idx = (int) (nextSeq & mask);
+      if (publishedSequences.get(idx * CACHE_LINE_LONGS) != nextSeq) break;
+      handler.accept(context, slots[idx]);
+      cursor = nextSeq;
+      count++;
+    }
+    if (count > 0) consumerCursor = cursor;
+    return count;
+  }
+
+  public <C1, C2> int drain(
+      final C1 context1,
+      final C2 context2,
+      final TriConsumer<? super C1, ? super C2, ? super T> handler) {
+    long cursor = consumerCursor;
+    int count = 0;
+    while (true) {
+      final long nextSeq = cursor + 1L;
+      final int idx = (int) (nextSeq & mask);
+      if (publishedSequences.get(idx * CACHE_LINE_LONGS) != nextSeq) break;
+      handler.accept(context1, context2, slots[idx]);
+      cursor = nextSeq;
+      count++;
+    }
+    if (count > 0) consumerCursor = cursor;
+    return count;
+  }
+
+  /**
+   * Try to claim a contiguous range of {@code n} sequences in a single CAS. Returns {@code null} if
+   * the ring doesn't have room for the whole batch -- the caller treats that as "drop all {@code
+   * n}", which is the natural shape for callers that batch by a higher-level unit (e.g. one CSS
+   * publish per completed trace). When the caller has a list of N items to write, this amortizes
+   * producer-cursor contention from O(N) CASes to O(1) per call.
+   *
+   * <p>The returned {@link Batch} must be filled via {@link Batch#fillAndPublish} exactly {@code n}
+   * times. Under-publishing leaves the ring stuck at the unfilled sequence -- the consumer waits
+   * there forever. Over-publishing throws {@link IllegalStateException}.
+   *
+   * @throws IllegalArgumentException if {@code n &lt; 1}
+   */
+  public Batch tryClaim(final int n) {
+    if (n < 1) {
+      throw new IllegalArgumentException("n must be >= 1, got " + n);
+    }
+    while (true) {
+      final long current = producerCursor;
+      // Stale read of consumerCursor is fine: a false "full" reading just causes a drop, and a
+      // real one is correctly identified because consumerCursor only advances.
+      final long consumed = consumerCursor;
+      final long next = current + n;
+      if (next - consumed > capacity) {
+        return null;
+      }
+      if (PRODUCER_CURSOR.compareAndSet(this, current, next)) {
+        // Claimed sequences [current + 1, next] inclusive (== n sequences total).
+        return new Batch(current + 1L, n);
+      }
+      // CAS failure -> another producer claimed; retry.
+    }
+  }
+
+  /** CAS-claim the next sequence, or return {@code -1} if the ring is full. */
+  private long claim() {
+    while (true) {
+      final long current = producerCursor;
+      // Stale read of consumerCursor is fine: a false "full" reading just causes a drop, and
+      // a real "full" reading is correctly identified because consumerCursor only advances.
+      final long consumed = consumerCursor;
+      if (current - consumed >= capacity) {
+        return -1L;
+      }
+      final long next = current + 1L;
+      if (PRODUCER_CURSOR.compareAndSet(this, current, next)) {
+        return next;
+      }
+      // CAS failure -> another producer claimed; retry.
+    }
+  }
+
+  /**
+   * Handle returned by {@link MpscRingBuffer#tryClaim}. Holds a contiguous range of pre-claimed
+   * sequences belonging to the producer thread that called {@code tryClaim}; the caller must fill
+   * and publish each via {@link #fillAndPublish}.
+   *
+   * <p><b>Not thread-safe</b> -- the producer thread owns it for the lifetime of the call. Do not
+   * share across threads.
+   */
+  public final class Batch {
+    private final long startSeq;
+    private final int size;
+    private int published;
+
+    Batch(final long startSeq, final int size) {
+      this.startSeq = startSeq;
+      this.size = size;
+    }
+
+    /** Total slots in this batch (the {@code n} passed to {@code tryClaim}). */
+    public int size() {
+      return size;
+    }
+
+    /** Slots not yet filled. */
+    public int remaining() {
+      return size - published;
+    }
+
+    public void fillAndPublish(final Consumer<? super T> filler) {
+      final long seq = nextSeq();
+      final int idx = (int) (seq & mask);
+      try {
+        filler.accept(slots[idx]);
+      } finally {
+        publishedSequences.set(idx * CACHE_LINE_LONGS, seq);
+      }
+    }
+
+    public <C> void fillAndPublish(final C context, final BiConsumer<? super C, ? super T> filler) {
+      final long seq = nextSeq();
+      final int idx = (int) (seq & mask);
+      try {
+        filler.accept(context, slots[idx]);
+      } finally {
+        publishedSequences.set(idx * CACHE_LINE_LONGS, seq);
+      }
+    }
+
+    public <C1, C2> void fillAndPublish(
+        final C1 context1,
+        final C2 context2,
+        final TriConsumer<? super C1, ? super C2, ? super T> filler) {
+      final long seq = nextSeq();
+      final int idx = (int) (seq & mask);
+      try {
+        filler.accept(context1, context2, slots[idx]);
+      } finally {
+        publishedSequences.set(idx * CACHE_LINE_LONGS, seq);
+      }
+    }
+
+    private long nextSeq() {
+      if (published >= size) {
+        throw new IllegalStateException(
+            "Batch over-published: size=" + size + " published=" + published);
+      }
+      final long seq = startSeq + published;
+      published++;
+      return seq;
+    }
+  }
+
+  // ============ Low-level primitives ============
+  //
+  // These three methods (tryClaimRange / slotAt / publish) expose the producer-side machinery
+  // directly. Callers manage the claimed sequence range themselves -- no per-call handle
+  // allocated, no callback dispatched. This is intended for hot paths where the higher-level
+  // tryWrite/tryClaim+Batch APIs would allocate per call (Iterator on the iterable being
+  // batched, or the Batch object itself) that escape analysis doesn't eliminate.
+  //
+  // Misuse hazards: every claimed sequence MUST be published exactly once; sequences must be
+  // published in [start, start+n) range only; the slot returned by slotAt() must not escape past
+  // the next publish() of any sequence (the producer "owns" the slot until publish).
+
+  /**
+   * Try to claim a contiguous range of {@code n} sequences in a single CAS. Returns the
+   * <i>start</i> sequence on success (a non-negative long) or {@code -1L} if the ring doesn't have
+   * room for the whole batch. The caller must publish each sequence in {@code [start, start + n)}
+   * exactly once via {@link #publish(long)}.
+   *
+   * @throws IllegalArgumentException if {@code n &lt; 1}
+   */
+  public long tryClaimRange(final int n) {
+    if (n < 1) {
+      throw new IllegalArgumentException("n must be >= 1, got " + n);
+    }
+    while (true) {
+      final long current = producerCursor;
+      final long consumed = consumerCursor;
+      final long next = current + n;
+      if (next - consumed > capacity) {
+        return -1L;
+      }
+      if (PRODUCER_CURSOR.compareAndSet(this, current, next)) {
+        return current + 1L;
+      }
+    }
+  }
+
+  /**
+   * Slot for sequence {@code seq}. Only safe when {@code seq} is in a range currently owned by the
+   * caller (claimed via {@link #tryClaimRange} and not yet published).
+   */
+  public T slotAt(final long seq) {
+    return slots[(int) (seq & mask)];
+  }
+
+  /**
+   * Publish sequence {@code seq}, making the slot at {@code slotAt(seq)} visible to the consumer.
+   * Release semantics via {@link AtomicLongArray#set}.
+   */
+  public void publish(final long seq) {
+    publishedSequences.set(((int) (seq & mask)) * CACHE_LINE_LONGS, seq);
+  }
+
+  private static int nextPowerOfTwo(final int n) {
+    if (n <= 1) return 1;
+    // 32 - leadingZeros(n-1) gives the number of bits needed to represent n-1; shifting 1 by that
+    // gives the smallest power of two >= n.
+    final int bits = 32 - Integer.numberOfLeadingZeros(n - 1);
+    return 1 << bits;
+  }
+}
diff --git a/internal-api/src/test/java/datadog/trace/util/concurrent/MpscRingBufferTest.java b/internal-api/src/test/java/datadog/trace/util/concurrent/MpscRingBufferTest.java
new file mode 100644
index 00000000000..6aeb635abd0
--- /dev/null
+++ b/internal-api/src/test/java/datadog/trace/util/concurrent/MpscRingBufferTest.java
@@ -0,0 +1,498 @@
+package datadog.trace.util.concurrent;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertFalse;
+import static org.junit.jupiter.api.Assertions.assertNotNull;
+import static org.junit.jupiter.api.Assertions.assertNull;
+import static org.junit.jupiter.api.Assertions.assertSame;
+import static org.junit.jupiter.api.Assertions.assertThrows;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+import datadog.trace.api.function.TriConsumer;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+import java.util.concurrent.CountDownLatch;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicInteger;
+import java.util.function.BiConsumer;
+import org.junit.jupiter.api.Test;
+
+class MpscRingBufferTest {
+
+  /** Mutable slot used by the tests; replaces the per-publish allocation a real consumer avoids. */
+  static final class Slot {
+    int value;
+    String tag;
+  }
+
+  // ============ Construction ============
+
+  @Test
+  void capacityRoundsUpToPowerOfTwo() {
+    assertEquals(1, new MpscRingBuffer<>(Slot::new, 1).capacity());
+    assertEquals(2, new MpscRingBuffer<>(Slot::new, 2).capacity());
+    assertEquals(4, new MpscRingBuffer<>(Slot::new, 3).capacity());
+    assertEquals(16, new MpscRingBuffer<>(Slot::new, 10).capacity());
+    assertEquals(1024, new MpscRingBuffer<>(Slot::new, 1024).capacity());
+    assertEquals(2048, new MpscRingBuffer<>(Slot::new, 1025).capacity());
+  }
+
+  @Test
+  void rejectsNonPositiveCapacityHint() {
+    assertThrows(IllegalArgumentException.class, () -> new MpscRingBuffer<>(Slot::new, 0));
+    assertThrows(IllegalArgumentException.class, () -> new MpscRingBuffer<>(Slot::new, -1));
+  }
+
+  @Test
+  void slotsArePreAllocatedFromFactory() {
+    AtomicInteger calls = new AtomicInteger();
+    MpscRingBuffer<Slot> ring =
+        new MpscRingBuffer<>(
+            () -> {
+              calls.incrementAndGet();
+              return new Slot();
+            },
+            8);
+    assertEquals(8, ring.capacity());
+    assertEquals(8, calls.get(), "factory must run capacity times during construction");
+  }
+
+  // ============ Basic produce / consume ============
+
+  @Test
+  void emptyRingIsEmpty() {
+    MpscRingBuffer<Slot> ring = new MpscRingBuffer<>(Slot::new, 4);
+    assertTrue(ring.isEmpty());
+    assertEquals(0, ring.size());
+    assertEquals(0, ring.drain(s -> {}));
+  }
+
+  @Test
+  void singleWriteThenDrain() {
+    MpscRingBuffer<Slot> ring = new MpscRingBuffer<>(Slot::new, 4);
+    assertTrue(
+        ring.tryWrite(
+            s -> {
+              s.value = 42;
+              s.tag = "hello";
+            }));
+    assertEquals(1, ring.size());
+
+    List<Integer> seen = new ArrayList<>();
+    int drained = ring.drain(s -> seen.add(s.value));
+    assertEquals(1, drained);
+    assertEquals(Arrays.asList(42), seen);
+    assertTrue(ring.isEmpty());
+  }
+
+  @Test
+  void writesPreserveFIFOOrder() {
+    MpscRingBuffer<Slot> ring = new MpscRingBuffer<>(Slot::new, 8);
+    for (int i = 0; i < 6; i++) {
+      final int v = i;
+      assertTrue(ring.tryWrite(s -> s.value = v));
+    }
+    List<Integer> seen = new ArrayList<>();
+    ring.drain(s -> seen.add(s.value));
+    assertEquals(Arrays.asList(0, 1, 2, 3, 4, 5), seen);
+  }
+
+  // ============ Full / drop behavior ============
+
+  @Test
+  void writesBeyondCapacityFail() {
+    MpscRingBuffer<Slot> ring = new MpscRingBuffer<>(Slot::new, 4);
+    for (int i = 0; i < 4; i++) {
+      assertTrue(ring.tryWrite(s -> s.value = 0));
+    }
+    assertFalse(ring.tryWrite(s -> s.value = 0), "5th write must fail when capacity == 4");
+    assertEquals(4, ring.size());
+  }
+
+  @Test
+  void writesResumeAfterDrain() {
+    MpscRingBuffer<Slot> ring = new MpscRingBuffer<>(Slot::new, 4);
+    for (int i = 0; i < 4; i++) {
+      final int v = i;
+      assertTrue(ring.tryWrite(s -> s.value = v));
+    }
+    assertFalse(ring.tryWrite(s -> {}));
+
+    AtomicInteger sum = new AtomicInteger();
+    ring.drain(s -> sum.addAndGet(s.value));
+    assertEquals(0 + 1 + 2 + 3, sum.get());
+
+    // Now should accept another full round.
+    for (int i = 100; i < 104; i++) {
+      final int v = i;
+      assertTrue(ring.tryWrite(s -> s.value = v));
+    }
+    assertFalse(ring.tryWrite(s -> {}));
+  }
+
+  // ============ Context-passing variants (context first, slot last) ============
+
+  @Test
+  void writeAndDrainWithOneContext() {
+    MpscRingBuffer<Slot> ring = new MpscRingBuffer<>(Slot::new, 4);
+    BiConsumer<Integer, Slot> filler = (ctx, s) -> s.value = ctx;
+    assertTrue(ring.tryWrite(7, filler));
+    assertTrue(ring.tryWrite(8, filler));
+
+    List<Integer> seen = new ArrayList<>();
+    BiConsumer<List<Integer>, Slot> reader = (sink, s) -> sink.add(s.value);
+    assertEquals(2, ring.drain(seen, reader));
+    assertEquals(Arrays.asList(7, 8), seen);
+  }
+
+  @Test
+  void writeAndDrainWithTwoContexts() {
+    MpscRingBuffer<Slot> ring = new MpscRingBuffer<>(Slot::new, 4);
+    TriConsumer<Integer, String, Slot> filler =
+        (v, t, s) -> {
+          s.value = v;
+          s.tag = t;
+        };
+    assertTrue(ring.tryWrite(1, "a", filler));
+    assertTrue(ring.tryWrite(2, "b", filler));
+
+    List<String> tags = new ArrayList<>();
+    List<Integer> vals = new ArrayList<>();
+    TriConsumer<List<String>, List<Integer>, Slot> reader =
+        (ts, vs, s) -> {
+          ts.add(s.tag);
+          vs.add(s.value);
+        };
+    assertEquals(2, ring.drain(tags, vals, reader));
+    assertEquals(Arrays.asList("a", "b"), tags);
+    assertEquals(Arrays.asList(1, 2), vals);
+  }
+
+  // ============ Concurrency ============
+
+  @Test
+  void manyProducersSingleConsumerSeesEveryWrittenValue() throws InterruptedException {
+    final int producers = 8;
+    final int perProducer = 50_000;
+    final int total = producers * perProducer;
+
+    MpscRingBuffer<Slot> ring = new MpscRingBuffer<>(Slot::new, 1024);
+
+    ExecutorService producerPool = Executors.newFixedThreadPool(producers);
+    AtomicInteger dropped = new AtomicInteger();
+    AtomicInteger written = new AtomicInteger();
+    CountDownLatch start = new CountDownLatch(1);
+
+    BiConsumer<Integer, Slot> filler = (v, s) -> s.value = v;
+
+    for (int p = 0; p < producers; p++) {
+      final int base = p * perProducer;
+      producerPool.submit(
+          () -> {
+            try {
+              start.await();
+            } catch (InterruptedException ie) {
+              Thread.currentThread().interrupt();
+              return;
+            }
+            for (int i = 0; i < perProducer; i++) {
+              int v = base + i;
+              // Spin until the consumer makes room. We're testing correctness, not throughput.
+              while (!ring.tryWrite(v, filler)) {
+                dropped.incrementAndGet();
+                Thread.yield();
+              }
+              written.incrementAndGet();
+            }
+          });
+    }
+
+    Set<Integer> seen = new HashSet<>(total);
+    BiConsumer<Set<Integer>, Slot> reader = (sink, s) -> sink.add(s.value);
+
+    Thread consumer =
+        new Thread(
+            () -> {
+              while (seen.size() < total) {
+                if (ring.drain(seen, reader) == 0) Thread.yield();
+              }
+            },
+            "ring-consumer");
+    consumer.start();
+
+    start.countDown();
+    producerPool.shutdown();
+    assertTrue(producerPool.awaitTermination(30, TimeUnit.SECONDS), "producers timed out");
+    assertTrue(consumer.isAlive() || seen.size() == total);
+    consumer.join(30_000);
+    assertFalse(consumer.isAlive(), "consumer timed out");
+
+    assertEquals(total, written.get(), "every producer call should eventually succeed");
+    assertEquals(total, seen.size(), "consumer must see every value exactly once");
+  }
+
+  @Test
+  void sizeReflectsOutstandingWrites() {
+    MpscRingBuffer<Slot> ring = new MpscRingBuffer<>(Slot::new, 8);
+    assertEquals(0, ring.size());
+    ring.tryWrite(s -> {});
+    assertEquals(1, ring.size());
+    ring.tryWrite(s -> {});
+    assertEquals(2, ring.size());
+    ring.drain(s -> {});
+    assertEquals(0, ring.size());
+  }
+
+  @Test
+  void throwingFillerStillPublishesSoConsumerDoesntHang() {
+    MpscRingBuffer<Slot> ring = new MpscRingBuffer<>(Slot::new, 4);
+
+    // First write succeeds.
+    assertTrue(ring.tryWrite(s -> s.value = 1));
+
+    // Second write throws midway through filling. Slot must still be published so the consumer's
+    // drain can advance past it.
+    RuntimeException boom = new RuntimeException("boom");
+    RuntimeException thrown =
+        assertThrows(
+            RuntimeException.class,
+            () ->
+                ring.tryWrite(
+                    s -> {
+                      s.value = 2;
+                      throw boom;
+                    }));
+    assertSame(boom, thrown);
+
+    // Third write proves the ring's cursors are still healthy after the throw.
+    assertTrue(ring.tryWrite(s -> s.value = 3));
+
+    // Consumer drains all three: it must not hang on the partially-filled slot.
+    List<Integer> seen = new ArrayList<>();
+    int drained = ring.drain(s -> seen.add(s.value));
+    assertEquals(3, drained, "consumer must advance past the throwing slot");
+    assertEquals(Arrays.asList(1, 2, 3), seen, "throwing slot keeps whatever filler had written");
+  }
+
+  // ============ Batch claim (tryClaim) ============
+
+  @Test
+  void tryClaimReturnsBatchOfRequestedSize() {
+    MpscRingBuffer<Slot> ring = new MpscRingBuffer<>(Slot::new, 8);
+    MpscRingBuffer<Slot>.Batch batch = ring.tryClaim(3);
+
+    assertNotNull(batch);
+    assertEquals(3, batch.size());
+    assertEquals(3, batch.remaining());
+  }
+
+  @Test
+  void tryClaimRejectsZeroOrNegative() {
+    MpscRingBuffer<Slot> ring = new MpscRingBuffer<>(Slot::new, 8);
+    assertThrows(IllegalArgumentException.class, () -> ring.tryClaim(0));
+    assertThrows(IllegalArgumentException.class, () -> ring.tryClaim(-1));
+  }
+
+  @Test
+  void tryClaimReturnsNullWhenRingCantFitBatch() {
+    MpscRingBuffer<Slot> ring = new MpscRingBuffer<>(Slot::new, 4);
+    assertNotNull(ring.tryClaim(3));
+    // Only 1 slot left; claiming 2 must fail wholesale.
+    assertNull(ring.tryClaim(2), "all-or-nothing: partial batches are not allowed");
+    // But one more slot does fit.
+    assertNotNull(ring.tryClaim(1));
+    // Now full.
+    assertNull(ring.tryClaim(1));
+  }
+
+  @Test
+  void tryClaimFillAndPublishDeliversAllToDrain() {
+    MpscRingBuffer<Slot> ring = new MpscRingBuffer<>(Slot::new, 8);
+    MpscRingBuffer<Slot>.Batch batch = ring.tryClaim(5);
+
+    for (int i = 0; i < 5; i++) {
+      final int v = i;
+      batch.fillAndPublish(s -> s.value = v);
+    }
+    assertEquals(0, batch.remaining());
+
+    List<Integer> seen = new ArrayList<>();
+    int drained = ring.drain(s -> seen.add(s.value));
+    assertEquals(5, drained);
+    assertEquals(Arrays.asList(0, 1, 2, 3, 4), seen, "batch publishes in order");
+  }
+
+  @Test
+  void overPublishingBatchThrows() {
+    MpscRingBuffer<Slot> ring = new MpscRingBuffer<>(Slot::new, 8);
+    MpscRingBuffer<Slot>.Batch batch = ring.tryClaim(2);
+
+    batch.fillAndPublish(s -> s.value = 1);
+    batch.fillAndPublish(s -> s.value = 2);
+    assertThrows(IllegalStateException.class, () -> batch.fillAndPublish(s -> s.value = 3));
+  }
+
+  @Test
+  void batchSupportsContextFillers() {
+    MpscRingBuffer<Slot> ring = new MpscRingBuffer<>(Slot::new, 8);
+    MpscRingBuffer<Slot>.Batch batch = ring.tryClaim(3);
+
+    BiConsumer<Integer, Slot> oneCtx = (v, s) -> s.value = v;
+    TriConsumer<Integer, String, Slot> twoCtx =
+        (v, t, s) -> {
+          s.value = v;
+          s.tag = t;
+        };
+
+    batch.fillAndPublish(s -> s.value = 1);
+    batch.fillAndPublish(2, oneCtx);
+    batch.fillAndPublish(3, "three", twoCtx);
+
+    List<String> seen = new ArrayList<>();
+    ring.drain(s -> seen.add(s.value + "/" + s.tag));
+    assertEquals(Arrays.asList("1/null", "2/null", "3/three"), seen);
+  }
+
+  @Test
+  void batchFillerThrowStillPublishesAndAdvances() {
+    MpscRingBuffer<Slot> ring = new MpscRingBuffer<>(Slot::new, 8);
+    MpscRingBuffer<Slot>.Batch batch = ring.tryClaim(3);
+
+    batch.fillAndPublish(s -> s.value = 1);
+    RuntimeException boom = new RuntimeException("boom");
+    assertThrows(
+        RuntimeException.class,
+        () ->
+            batch.fillAndPublish(
+                s -> {
+                  s.value = 2;
+                  throw boom;
+                }));
+    // The throwing slot's sequence has already been consumed; published counter advanced.
+    assertEquals(1, batch.remaining(), "throwing slot still counts as published");
+    batch.fillAndPublish(s -> s.value = 3);
+
+    List<Integer> seen = new ArrayList<>();
+    int drained = ring.drain(s -> seen.add(s.value));
+    assertEquals(3, drained);
+    assertEquals(Arrays.asList(1, 2, 3), seen);
+  }
+
+  // ============ Low-level primitives (tryClaimRange / slotAt / publish) ============
+
+  @Test
+  void tryClaimRangeReturnsStartSequence() {
+    MpscRingBuffer<Slot> ring = new MpscRingBuffer<>(Slot::new, 8);
+    long start1 = ring.tryClaimRange(3);
+    long start2 = ring.tryClaimRange(2);
+
+    assertEquals(0L, start1, "first range starts at sequence 0");
+    assertEquals(3L, start2, "second range begins immediately after the first");
+  }
+
+  @Test
+  void tryClaimRangeRejectsZeroOrNegative() {
+    MpscRingBuffer<Slot> ring = new MpscRingBuffer<>(Slot::new, 8);
+    assertThrows(IllegalArgumentException.class, () -> ring.tryClaimRange(0));
+    assertThrows(IllegalArgumentException.class, () -> ring.tryClaimRange(-1));
+  }
+
+  @Test
+  void tryClaimRangeReturnsMinusOneWhenFull() {
+    MpscRingBuffer<Slot> ring = new MpscRingBuffer<>(Slot::new, 4);
+    assertTrue(ring.tryClaimRange(3) >= 0);
+    assertEquals(-1L, ring.tryClaimRange(2), "all-or-nothing");
+    assertTrue(ring.tryClaimRange(1) >= 0);
+    assertEquals(-1L, ring.tryClaimRange(1));
+  }
+
+  @Test
+  void slotAtAndPublishRoundTrip() {
+    MpscRingBuffer<Slot> ring = new MpscRingBuffer<>(Slot::new, 8);
+    long start = ring.tryClaimRange(3);
+    assertTrue(start >= 0);
+
+    for (int i = 0; i < 3; i++) {
+      long seq = start + i;
+      Slot slot = ring.slotAt(seq);
+      slot.value = (int) (seq + 100);
+      ring.publish(seq);
+    }
+
+    List<Integer> seen = new ArrayList<>();
+    int drained = ring.drain(s -> seen.add(s.value));
+    assertEquals(3, drained);
+    assertEquals(Arrays.asList(100, 101, 102), seen);
+  }
+
+  @Test
+  void slotAtReturnsSameInstanceForSameModuloPosition() {
+    // After publish+drain wraps around, the slot at sequence N and sequence N+capacity are the
+    // same physical object (this is the whole point of the ring).
+    MpscRingBuffer<Slot> ring = new MpscRingBuffer<>(Slot::new, 4);
+    Slot firstSlot = ring.slotAt(0L);
+    Slot wrappedSlot = ring.slotAt(4L); // 4 & mask(3) == 0
+    assertSame(firstSlot, wrappedSlot);
+  }
+
+  @Test
+  void concurrentBatchClaimsAreOrderedAndDontInterleave() throws InterruptedException {
+    final int producers = 8;
+    final int batchesPerProducer = 200;
+    final int batchSize = 16;
+    final int total = producers * batchesPerProducer * batchSize;
+
+    MpscRingBuffer<Slot> ring = new MpscRingBuffer<>(Slot::new, 256);
+    ExecutorService pool = Executors.newFixedThreadPool(producers);
+    AtomicInteger writes = new AtomicInteger();
+    CountDownLatch start = new CountDownLatch(1);
+
+    for (int p = 0; p < producers; p++) {
+      final int base = p * batchesPerProducer * batchSize;
+      pool.submit(
+          () -> {
+            try {
+              start.await();
+            } catch (InterruptedException ie) {
+              Thread.currentThread().interrupt();
+              return;
+            }
+            for (int b = 0; b < batchesPerProducer; b++) {
+              MpscRingBuffer<Slot>.Batch batch;
+              while ((batch = ring.tryClaim(batchSize)) == null) {
+                Thread.yield();
+              }
+              for (int i = 0; i < batchSize; i++) {
+                final int v = base + b * batchSize + i;
+                batch.fillAndPublish(s -> s.value = v);
+              }
+              writes.addAndGet(batchSize);
+            }
+          });
+    }
+
+    Set<Integer> seen = new HashSet<>(total);
+    Thread consumer =
+        new Thread(
+            () -> {
+              while (seen.size() < total) {
+                if (ring.drain((Slot s) -> seen.add(s.value)) == 0) Thread.yield();
+              }
+            },
+            "ring-batch-consumer");
+    consumer.start();
+
+    start.countDown();
+    pool.shutdown();
+    assertTrue(pool.awaitTermination(30, TimeUnit.SECONDS), "producers timed out");
+    consumer.join(30_000);
+    assertFalse(consumer.isAlive(), "consumer timed out");
+    assertEquals(total, writes.get());
+    assertEquals(total, seen.size(), "consumer must see every value exactly once");
+  }
+}
diff --git a/settings.gradle.kts b/settings.gradle.kts
index e6c9469b9c6..dda1f432e6f 100644
--- a/settings.gradle.kts
+++ b/settings.gradle.kts
@@ -112,6 +112,7 @@ include(
 
 include(
   ":communication",
+  ":components:annotations",
   ":components:context",
   ":components:environment",
   ":components:http:http-api",
diff --git a/utils/config-utils/build.gradle.kts b/utils/config-utils/build.gradle.kts
index 84f494596d8..b1bb146231e 100644
--- a/utils/config-utils/build.gradle.kts
+++ b/utils/config-utils/build.gradle.kts
@@ -55,6 +55,7 @@ val excludedClassesInstructionCoverage by extra(
 )
 
 dependencies {
+  compileOnly(project(":components:annotations"))
   implementation(project(":components:environment"))
   implementation(project(":dd-trace-api"))
   api(project(":utils:filesystem-utils"))
diff --git a/utils/config-utils/src/main/java/datadog/trace/api/env/CapturedEnvironment.java b/utils/config-utils/src/main/java/datadog/trace/api/env/CapturedEnvironment.java
index 44e55b6fd18..2db02edd88a 100644
--- a/utils/config-utils/src/main/java/datadog/trace/api/env/CapturedEnvironment.java
+++ b/utils/config-utils/src/main/java/datadog/trace/api/env/CapturedEnvironment.java
@@ -2,6 +2,7 @@
 
 import datadog.environment.JavaVirtualMachine;
 import datadog.trace.api.config.GeneralConfig;
+import datadog.trace.api.internal.VisibleForTesting;
 import datadog.trace.config.inversion.ConfigHelper;
 import java.io.File;
 import java.util.HashMap;
@@ -24,12 +25,7 @@ public ProcessInfo() {
       mainClass = JavaVirtualMachine.getMainClass();
     }
 
-    /**
-     * Visible for testing
-     *
-     * @param mainClass
-     * @param jarFile
-     */
+    @VisibleForTesting
     ProcessInfo(String mainClass, File jarFile) {
       this.mainClass = mainClass;
       this.jarFile = jarFile;
@@ -55,17 +51,13 @@ public ProcessInfo getProcessInfo() {
     return processInfo;
   }
 
-  // Testing purposes
+  @VisibleForTesting
   static void useFixedEnv(final Map<String, String> props) {
     INSTANCE.properties.clear();
     INSTANCE.properties.putAll(props);
   }
 
-  /**
-   * For testing purposes.
-   *
-   * @param processInfo
-   */
+  @VisibleForTesting
   static void useFixedProcessInfo(final ProcessInfo processInfo) {
     INSTANCE.processInfo = processInfo;
   }
diff --git a/utils/logging-utils/src/main/java/datadog/logging/IOLogger.java b/utils/logging-utils/src/main/java/datadog/logging/IOLogger.java
index 407f7509454..af21f4a0c57 100644
--- a/utils/logging-utils/src/main/java/datadog/logging/IOLogger.java
+++ b/utils/logging-utils/src/main/java/datadog/logging/IOLogger.java
@@ -2,6 +2,7 @@
 
 import static datadog.trace.api.telemetry.LogCollector.EXCLUDE_TELEMETRY;
 
+import datadog.trace.api.internal.VisibleForTesting;
 import java.util.concurrent.TimeUnit;
 import org.slf4j.Logger;
 
@@ -15,7 +16,7 @@ public IOLogger(final Logger log) {
     this(log, new RatelimitedLogger(log, 5, TimeUnit.MINUTES));
   }
 
-  // Visible for testing
+  @VisibleForTesting
   IOLogger(final Logger log, final RatelimitedLogger ratelimitedLogger) {
     this.log = log;
     this.ratelimitedLogger = ratelimitedLogger;
diff --git a/utils/logging-utils/src/main/java/datadog/logging/RatelimitedLogger.java b/utils/logging-utils/src/main/java/datadog/logging/RatelimitedLogger.java
index 3e5728b6bb7..b88505c1e59 100644
--- a/utils/logging-utils/src/main/java/datadog/logging/RatelimitedLogger.java
+++ b/utils/logging-utils/src/main/java/datadog/logging/RatelimitedLogger.java
@@ -1,5 +1,6 @@
 package datadog.logging;
 
+import datadog.trace.api.internal.VisibleForTesting;
 import datadog.trace.api.time.SystemTimeSource;
 import datadog.trace.api.time.TimeSource;
 import java.util.Locale;
@@ -25,7 +26,7 @@ public RatelimitedLogger(final Logger log, final int delay, final TimeUnit timeU
     this(log, delay, timeUnit, SystemTimeSource.INSTANCE);
   }
 
-  // Visible for testing
+  @VisibleForTesting
   RatelimitedLogger(
       final Logger log, final int delay, final TimeUnit timeUnit, final TimeSource timeSource) {
     this.log = log;