From dc7642a7ea3fe8a722d358849f41e11e772df413 Mon Sep 17 00:00:00 2001 From: David Smiley Date: Mon, 15 Jun 2026 12:04:38 -0400 Subject: [PATCH 1/3] TestDistributedTracing goldfile approach Goal: make it easy & insightful to maintain TestDistributedTracing Risk: determinism --- .../tracing/TestDistributedTracing/test.json | 111 ++++ .../TestDistributedTracing/testAdminApi.json | 21 + .../TestDistributedTracing/testV2Api.json | 143 ++++++ .../opentelemetry/GoldFileTraceVerifier.java | 477 ++++++++++++++++++ .../opentelemetry/TestDistributedTracing.java | 155 +++--- 5 files changed, 812 insertions(+), 95 deletions(-) create mode 100644 solr/modules/opentelemetry/src/test-files/solr/tracing/TestDistributedTracing/test.json create mode 100644 solr/modules/opentelemetry/src/test-files/solr/tracing/TestDistributedTracing/testAdminApi.json create mode 100644 solr/modules/opentelemetry/src/test-files/solr/tracing/TestDistributedTracing/testV2Api.json create mode 100644 solr/modules/opentelemetry/src/test/org/apache/solr/opentelemetry/GoldFileTraceVerifier.java diff --git a/solr/modules/opentelemetry/src/test-files/solr/tracing/TestDistributedTracing/test.json b/solr/modules/opentelemetry/src/test-files/solr/tracing/TestDistributedTracing/test.json new file mode 100644 index 000000000000..8cd2d28437af --- /dev/null +++ b/solr/modules/opentelemetry/src/test-files/solr/tracing/TestDistributedTracing/test.json @@ -0,0 +1,111 @@ +{"phases":[ + { + "description":"phase 0", + "spans":[{ + "name":"post:/{collection}/update", + "kind":"SERVER", + "db.instance":"collection1", + "db.type":"solr", + "http.request.method":"POST", + "http.response.status_code":200, + "http.url":"http://NORMALIZED/solr/collection1/update", + "http.params":"commit=true&wt=javabin", + "children":[ + { + "name":"post:/{core}/update", + "kind":"SERVER", + "db.instance":"collection1_shard1_replica_nN", + "db.type":"solr", + "http.request.method":"POST", + "http.response.status_code":200, + "http.url":"http://NORMALIZED/solr/collection1_shard1_replica_nN/update", + "http.params":"distrib.from=http%3A%2F%2FNORMALIZED&update.distrib=FROMLEADER&wt=javabin"}, + { + "name":"post:/{core}/update", + "kind":"SERVER", + "db.instance":"collection1_shard1_replica_nN", + "db.type":"solr", + "http.request.method":"POST", + "http.response.status_code":200, + "http.url":"http://NORMALIZED/solr/collection1_shard1_replica_nN/update", + "http.params":"distrib.from=http%3A%2F%2FNORMALIZED&update.distrib=FROMLEADER"}, + { + "name":"post:/{core}/update", + "kind":"SERVER", + "db.instance":"collection1_shard2_replica_nN", + "db.type":"solr", + "http.request.method":"POST", + "http.response.status_code":200, + "http.url":"http://NORMALIZED/solr/collection1_shard2_replica_nN/update", + "http.params":"distrib.from=http%3A%2F%2FNORMALIZED&update.distrib=TOLEADER&wt=javabin", + "children":[{ + "name":"post:/{core}/update", + "kind":"SERVER", + "db.instance":"collection1_shard2_replica_nN", + "db.type":"solr", + "http.request.method":"POST", + "http.response.status_code":200, + "http.url":"http://NORMALIZED/solr/collection1_shard2_replica_nN/update", + "http.params":"distrib.from=http%3A%2F%2FNORMALIZED&update.distrib=FROMLEADER&wt=javabin"}]}, + { + "name":"post:/{core}/update", + "kind":"SERVER", + "db.instance":"collection1_shard2_replica_nN", + "db.type":"solr", + "http.request.method":"POST", + "http.response.status_code":200, + "http.url":"http://NORMALIZED/solr/collection1_shard2_replica_nN/update", + "http.params":"distrib.from=http%3A%2F%2FNORMALIZED&update.distrib=TOLEADER", + "children":[{ + "name":"post:/{core}/update", + "kind":"SERVER", + "db.instance":"collection1_shard2_replica_nN", + "db.type":"solr", + "http.request.method":"POST", + "http.response.status_code":200, + "http.url":"http://NORMALIZED/solr/collection1_shard2_replica_nN/update", + "http.params":"distrib.from=http%3A%2F%2FNORMALIZED&update.distrib=FROMLEADER"}]}]}]}, + { + "description":"phase 1", + "spans":[{ + "name":"get:/{collection}/select", + "kind":"SERVER", + "db.instance":"collection1", + "db.type":"solr", + "http.request.method":"GET", + "http.response.status_code":200, + "http.url":"http://NORMALIZED/solr/collection1/select", + "http.params":"q=*%3A*&wt=javabin", + "children":[ + { + "name":"post:/{core}/select", + "kind":"SERVER", + "db.instance":"collection1_shard1_replica_nN", + "db.type":"solr", + "http.request.method":"POST", + "http.response.status_code":200, + "http.url":"http://NORMALIZED/solr/collection1_shard1_replica_nN/select"}, + { + "name":"post:/{core}/select", + "kind":"SERVER", + "db.instance":"collection1_shard1_replica_nN", + "db.type":"solr", + "http.request.method":"POST", + "http.response.status_code":200, + "http.url":"http://NORMALIZED/solr/collection1_shard1_replica_nN/select"}, + { + "name":"post:/{core}/select", + "kind":"SERVER", + "db.instance":"collection1_shard2_replica_nN", + "db.type":"solr", + "http.request.method":"POST", + "http.response.status_code":200, + "http.url":"http://NORMALIZED/solr/collection1_shard2_replica_nN/select"}, + { + "name":"post:/{core}/select", + "kind":"SERVER", + "db.instance":"collection1_shard2_replica_nN", + "db.type":"solr", + "http.request.method":"POST", + "http.response.status_code":200, + "http.url":"http://NORMALIZED/solr/collection1_shard2_replica_nN/select"}]}]}]} \ No newline at end of file diff --git a/solr/modules/opentelemetry/src/test-files/solr/tracing/TestDistributedTracing/testAdminApi.json b/solr/modules/opentelemetry/src/test-files/solr/tracing/TestDistributedTracing/testAdminApi.json new file mode 100644 index 000000000000..479f3cebb789 --- /dev/null +++ b/solr/modules/opentelemetry/src/test-files/solr/tracing/TestDistributedTracing/testAdminApi.json @@ -0,0 +1,21 @@ +{"phases":[ + { + "description":"phase 0", + "spans":[{ + "name":"get:/admin/metrics", + "kind":"SERVER", + "db.type":"solr", + "http.request.method":"GET", + "http.response.status_code":200, + "http.url":"http://NORMALIZED/solr/admin/metrics", + "http.params":"wt=prometheus"}]}, + { + "description":"phase 1", + "spans":[{ + "name":"list:/admin/collections", + "kind":"SERVER", + "db.type":"solr", + "http.request.method":"GET", + "http.response.status_code":200, + "http.url":"http://NORMALIZED/solr/admin/collections", + "http.params":"action=LIST&wt=javabin"}]}]} \ No newline at end of file diff --git a/solr/modules/opentelemetry/src/test-files/solr/tracing/TestDistributedTracing/testV2Api.json b/solr/modules/opentelemetry/src/test-files/solr/tracing/TestDistributedTracing/testV2Api.json new file mode 100644 index 000000000000..975aac135329 --- /dev/null +++ b/solr/modules/opentelemetry/src/test-files/solr/tracing/TestDistributedTracing/testV2Api.json @@ -0,0 +1,143 @@ +{"phases":[ + { + "description":"phase 0", + "spans":[ + { + "name":"ReloadCollectionCmd", + "kind":"CLIENT", + "db.instance":"collection1", + "children":[ + { + "name":"post:/admin/cores", + "kind":"SERVER", + "db.instance":"collection1_shard1_replica_nN", + "db.type":"solr", + "http.request.method":"POST", + "http.response.status_code":200, + "http.url":"http://NORMALIZED/solr/admin/cores"}, + { + "name":"post:/admin/cores", + "kind":"SERVER", + "db.instance":"collection1_shard1_replica_nN", + "db.type":"solr", + "http.request.method":"POST", + "http.response.status_code":200, + "http.url":"http://NORMALIZED/solr/admin/cores"}, + { + "name":"post:/admin/cores", + "kind":"SERVER", + "db.instance":"collection1_shard2_replica_nN", + "db.type":"solr", + "http.request.method":"POST", + "http.response.status_code":200, + "http.url":"http://NORMALIZED/solr/admin/cores"}, + { + "name":"post:/admin/cores", + "kind":"SERVER", + "db.instance":"collection1_shard2_replica_nN", + "db.type":"solr", + "http.request.method":"POST", + "http.response.status_code":200, + "http.url":"http://NORMALIZED/solr/admin/cores"}]}, + { + "name":"post:/collections/{collection}/reload", + "kind":"SERVER", + "db.instance":"collection1", + "db.type":"solr", + "http.request.method":"POST", + "http.response.status_code":200, + "http.url":"http://NORMALIZED/solr/____v2/collections/collection1/reload", + "http.params":"wt=javabin"}]}, + { + "description":"phase 1", + "spans":[{ + "name":"post:/c/{collection}/update/json", + "kind":"SERVER", + "db.instance":"collection1", + "db.type":"solr", + "http.request.method":"POST", + "http.response.status_code":200, + "http.url":"http://NORMALIZED/solr/____v2/c/collection1/update/json", + "http.params":"commit=true&wt=javabin", + "children":[ + { + "name":"post:/{core}/update", + "kind":"SERVER", + "db.instance":"collection1_shard1_replica_nN", + "db.type":"solr", + "http.request.method":"POST", + "http.response.status_code":200, + "http.url":"http://NORMALIZED/solr/collection1_shard1_replica_nN/update", + "http.params":"distrib.from=http%3A%2F%2FNORMALIZED&update.distrib=FROMLEADER"}, + { + "name":"post:/{core}/update", + "kind":"SERVER", + "db.instance":"collection1_shard2_replica_nN", + "db.type":"solr", + "http.request.method":"POST", + "http.response.status_code":200, + "http.url":"http://NORMALIZED/solr/collection1_shard2_replica_nN/update", + "http.params":"distrib.from=http%3A%2F%2FNORMALIZED&update.distrib=TOLEADER&wt=javabin", + "children":[{ + "name":"post:/{core}/update", + "kind":"SERVER", + "db.instance":"collection1_shard2_replica_nN", + "db.type":"solr", + "http.request.method":"POST", + "http.response.status_code":200, + "http.url":"http://NORMALIZED/solr/collection1_shard2_replica_nN/update", + "http.params":"distrib.from=http%3A%2F%2FNORMALIZED&update.distrib=FROMLEADER&wt=javabin"}]}, + { + "name":"post:/{core}/update", + "kind":"SERVER", + "db.instance":"collection1_shard2_replica_nN", + "db.type":"solr", + "http.request.method":"POST", + "http.response.status_code":200, + "http.url":"http://NORMALIZED/solr/collection1_shard2_replica_nN/update", + "http.params":"distrib.from=http%3A%2F%2FNORMALIZED&update.distrib=TOLEADER", + "children":[{ + "name":"post:/{core}/update", + "kind":"SERVER", + "db.instance":"collection1_shard2_replica_nN", + "db.type":"solr", + "http.request.method":"POST", + "http.response.status_code":200, + "http.url":"http://NORMALIZED/solr/collection1_shard2_replica_nN/update", + "http.params":"distrib.from=http%3A%2F%2FNORMALIZED&update.distrib=FROMLEADER"}]}]}]}, + { + "description":"phase 2", + "spans":[{ + "name":"get:/c/{collection}/select", + "kind":"SERVER", + "db.instance":"collection1", + "db.type":"solr", + "http.request.method":"GET", + "http.response.status_code":200, + "http.url":"http://NORMALIZED/solr/____v2/c/collection1/select", + "http.params":"q=id%3A9&wt=javabin", + "children":[ + { + "name":"post:/{core}/select", + "kind":"SERVER", + "db.instance":"collection1_shard1_replica_nN", + "db.type":"solr", + "http.request.method":"POST", + "http.response.status_code":200, + "http.url":"http://NORMALIZED/solr/collection1_shard1_replica_nN/select"}, + { + "name":"post:/{core}/select", + "kind":"SERVER", + "db.instance":"collection1_shard2_replica_nN", + "db.type":"solr", + "http.request.method":"POST", + "http.response.status_code":200, + "http.url":"http://NORMALIZED/solr/collection1_shard2_replica_nN/select"}, + { + "name":"post:/{core}/select", + "kind":"SERVER", + "db.instance":"collection1_shard2_replica_nN", + "db.type":"solr", + "http.request.method":"POST", + "http.response.status_code":200, + "http.url":"http://NORMALIZED/solr/collection1_shard2_replica_nN/select"}]}]}]} \ No newline at end of file diff --git a/solr/modules/opentelemetry/src/test/org/apache/solr/opentelemetry/GoldFileTraceVerifier.java b/solr/modules/opentelemetry/src/test/org/apache/solr/opentelemetry/GoldFileTraceVerifier.java new file mode 100644 index 000000000000..6f94117d1d78 --- /dev/null +++ b/solr/modules/opentelemetry/src/test/org/apache/solr/opentelemetry/GoldFileTraceVerifier.java @@ -0,0 +1,477 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.opentelemetry; + +import io.opentelemetry.api.trace.SpanKind; +import io.opentelemetry.sdk.testing.exporter.InMemorySpanExporter; +import io.opentelemetry.sdk.trace.data.SpanData; +import java.io.IOException; +import java.io.UncheckedIOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Comparator; +import java.util.HashMap; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.concurrent.TimeUnit; +import java.util.regex.Pattern; +import org.apache.solr.SolrTestCaseJ4; +import org.apache.solr.common.util.RetryUtil; +import org.apache.solr.common.util.Utils; +import org.apache.solr.util.tracing.TraceUtils; +import org.junit.Assert; + +/** + * Verifies collected OpenTelemetry spans against a committed gold file. + * + *

Spans are normalized (IDs/timestamps stripped, URLs scrubbed) and arranged into a tree by + * parent-child relationships. The resulting JSON is compared against a gold file committed in + * {@code src/test-files/solr/tracing/}. + * + *

Usage

+ * + *
+ * var verifier = new GoldFileTraceVerifier("testV2Api");
+ * // ... perform operations ...
+ * verifier.verifyPhase(); // waits for spans, normalizes, compares
+ * // ... more operations ...
+ * verifier.verifyPhase();
+ * verifier.done();
+ * 
+ * + *

Regenerating Gold Files

+ * + *

Requires {@code -Ptests.useSecurityManager=false} since it writes to the source tree. + * + *

+ * gradlew :solr:modules:opentelemetry:test --tests TestDistributedTracing.testV2Api \
+ *   "-Ptests.jvmargs=-Dregenerate.golden.files=true" -Ptests.useSecurityManager=false
+ * 
+ * + *

Dumping Raw Traces (no normalization, all attributes)

+ * + *

Also requires {@code -Ptests.useSecurityManager=false} (writes to source tree). + * Dump files are written alongside gold files as {@code {testMethod}-phase{N}-dump.json}. + * Output paths are logged to stdout (visible in {@code build/test-results/test/outputs/OUTPUT-*.txt}). + * + *

+ * gradlew :solr:modules:opentelemetry:test --tests TestDistributedTracing.testV2Api \
+ *   "-Ptests.jvmargs=-Ddump.traces=true" -Ptests.useSecurityManager=false
+ * 
+ */ +public class GoldFileTraceVerifier { + + private static final boolean REGENERATE = Boolean.getBoolean("regenerate.golden.files"); + private static final boolean DUMP_TRACES = Boolean.getBoolean("dump.traces"); + private static final Pattern URL_HOST_PORT = Pattern.compile("https?://[^/]+/"); + private static final Pattern REPLICA_SUFFIX = Pattern.compile("_replica_n\\d+"); + private static final Pattern ENCODED_URL_IN_PARAMS = Pattern.compile("https?%3A%2F%2F[^&=]+"); + + private static final Comparator> SPAN_COMPARATOR = + Comparator.comparing((Map s) -> (String) s.get("name")) + .thenComparing(s -> s.getOrDefault(TraceUtils.TAG_DB.getKey(), "").toString()); + + private final String testName; + private final Path goldFilePath; + private final Map goldFile; // parsed gold file (phases list) + private final List>> recordedPhases = new ArrayList<>(); + private int currentPhaseIndex = 0; + + public GoldFileTraceVerifier(Class testClass, String testMethodName) { + this.testName = testMethodName; + this.goldFilePath = resolveGoldFilePath(testClass, testMethodName); + if (REGENERATE) { + this.goldFile = null; + } else { + this.goldFile = loadGoldFile(goldFilePath); + } + } + + /** + * Waits for the expected number of spans (from the gold file), collects them, normalizes into a + * tree, and compares against the gold file's current phase. + */ + @SuppressWarnings("unchecked") + public void verifyPhase() { + InMemorySpanExporter exporter = CustomTestOtelTracerConfigurator.getInMemorySpanExporter(); + + List spans; + if (REGENERATE) { + spans = waitForStableSpans(exporter); + } else { + List> phases = (List>) goldFile.get("phases"); + Map phase = phases.get(currentPhaseIndex); + List> expectedSpans = (List>) phase.get("spans"); + int expectedCount = countSpansRecursive(expectedSpans); + spans = waitForSpans(exporter, expectedCount); + } + + exporter.reset(); + + if (DUMP_TRACES) { + dumpRawSpans(spans); + } + + List> tree = buildTree(spans); + recordedPhases.add(tree); + + if (!REGENERATE) { + comparePhase(currentPhaseIndex, tree); + } + currentPhaseIndex++; + } + + /** Call after all phases are verified. In regenerate mode, writes the gold file. */ + @SuppressWarnings("unchecked") + public void done() { + if (REGENERATE) { + writeGoldFile(); + } else { + List> phases = (List>) goldFile.get("phases"); + Assert.assertEquals( + "Unverified phases remain in gold file for " + testName, + phases.size(), + currentPhaseIndex); + } + } + + // --- Span collection --- + + private List waitForSpans(InMemorySpanExporter exporter, int expectedCount) { + try { + RetryUtil.retryUntil( + "Timed out waiting for " + expectedCount + " span(s) in phase " + currentPhaseIndex, + 500, + 20, + TimeUnit.MILLISECONDS, + () -> exporter.getFinishedSpanItems().size() >= expectedCount); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + } + return new ArrayList<>(exporter.getFinishedSpanItems()); + } + + private List waitForStableSpans(InMemorySpanExporter exporter) { + try { + RetryUtil.retryUntil( + "Timed out waiting for any spans in phase " + currentPhaseIndex, + 500, + 20, + TimeUnit.MILLISECONDS, + () -> !exporter.getFinishedSpanItems().isEmpty()); + int lastCount = -1; + for (int i = 0; i < 10; i++) { + TimeUnit.MILLISECONDS.sleep(500); + int current = exporter.getFinishedSpanItems().size(); + if (current == lastCount) { + break; + } + lastCount = current; + } + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + } + return new ArrayList<>(exporter.getFinishedSpanItems()); + } + + // --- Raw dump (for debugging, no normalization) --- + + private void dumpRawSpans(List spans) { + // Sort by start timestamp for a chronological view + List sorted = new ArrayList<>(spans); + sorted.sort(Comparator.comparingLong(SpanData::getStartEpochNanos)); + + // Build tree from raw data (no normalization) + Map> nodesBySpanId = new HashMap<>(); + List> roots = new ArrayList<>(); + + for (SpanData span : sorted) { + Map node = new LinkedHashMap<>(); + node.put("name", span.getName()); + node.put("kind", span.getKind().name()); + node.put("traceId", span.getSpanContext().getTraceId()); + node.put("spanId", span.getSpanContext().getSpanId()); + if (span.getParentSpanContext().isValid()) { + node.put("parentSpanId", span.getParentSpanContext().getSpanId()); + } + node.put("startEpochNanos", span.getStartEpochNanos()); + node.put("endEpochNanos", span.getEndEpochNanos()); + // All attributes + Map attrs = new LinkedHashMap<>(); + span.getAttributes() + .forEach((key, value) -> attrs.put(key.getKey(), value)); + if (!attrs.isEmpty()) { + node.put("attributes", attrs); + } + nodesBySpanId.put(span.getSpanContext().getSpanId(), node); + } + + for (SpanData span : sorted) { + Map node = nodesBySpanId.get(span.getSpanContext().getSpanId()); + String parentId = span.getParentSpanContext().getSpanId(); + if (span.getParentSpanContext().isValid() && nodesBySpanId.containsKey(parentId)) { + @SuppressWarnings("unchecked") + List> children = + (List>) + nodesBySpanId.get(parentId).computeIfAbsent("children", k -> new ArrayList<>()); + children.add(node); + } else { + roots.add(node); + } + } + + String json = Utils.toJSONString(roots, 2); + Path dumpDir = goldFilePath.getParent(); + Path dumpFile = dumpDir.resolve(testName + "-phase" + currentPhaseIndex + "-dump.json"); + try { + Files.createDirectories(dumpDir); + Files.writeString(dumpFile, json); + System.out.println("Dumped raw traces: " + dumpFile.toAbsolutePath()); + } catch (IOException e) { + throw new UncheckedIOException("Failed to write trace dump", e); + } + } + + // --- Tree building --- + + private List> buildTree(List spans) { + Map> nodesBySpanId = new HashMap<>(); + List> roots = new ArrayList<>(); + + for (SpanData span : spans) { + nodesBySpanId.put(span.getSpanContext().getSpanId(), normalize(span)); + } + + for (SpanData span : spans) { + Map node = nodesBySpanId.get(span.getSpanContext().getSpanId()); + String parentId = span.getParentSpanContext().getSpanId(); + if (span.getParentSpanContext().isValid() && nodesBySpanId.containsKey(parentId)) { + @SuppressWarnings("unchecked") + List> children = + (List>) nodesBySpanId.get(parentId).get("children"); + if (children == null) { + children = new ArrayList<>(); + nodesBySpanId.get(parentId).put("children", children); + } + children.add(node); + } else { + roots.add(node); + } + } + + sortTree(roots); + return roots; + } + + @SuppressWarnings("unchecked") + private void sortTree(List> nodes) { + nodes.sort(SPAN_COMPARATOR); + for (Map node : nodes) { + List> children = (List>) node.get("children"); + if (children != null && !children.isEmpty()) { + sortTree(children); + } + } + } + + private Map normalize(SpanData span) { + // Use LinkedHashMap for consistent key ordering in JSON output + Map node = new LinkedHashMap<>(); + node.put("name", span.getName()); + if (span.getKind() != SpanKind.INTERNAL) { + node.put("kind", span.getKind().name()); + } + + String dbInstance = normalizeDbInstance(span.getAttributes().get(TraceUtils.TAG_DB)); + if (dbInstance != null) { + node.put(TraceUtils.TAG_DB.getKey(), dbInstance); + } + + String dbType = span.getAttributes().get(TraceUtils.TAG_DB_TYPE); + if (dbType != null) { + node.put(TraceUtils.TAG_DB_TYPE.getKey(), dbType); + } + + String httpMethod = span.getAttributes().get(TraceUtils.TAG_HTTP_METHOD); + if (httpMethod != null) { + node.put(TraceUtils.TAG_HTTP_METHOD.getKey(), httpMethod); + } + + Long statusCode = span.getAttributes().get(TraceUtils.TAG_HTTP_STATUS); + if (statusCode != null) { + node.put(TraceUtils.TAG_HTTP_STATUS.getKey(), statusCode.intValue()); + } + + String httpUrl = span.getAttributes().get(TraceUtils.TAG_HTTP_URL); + if (httpUrl != null) { + node.put(TraceUtils.TAG_HTTP_URL.getKey(), normalizeUrl(httpUrl)); + } + + String httpParams = span.getAttributes().get(TraceUtils.TAG_HTTP_PARAMS); + if (httpParams != null) { + node.put(TraceUtils.TAG_HTTP_PARAMS.getKey(), normalizeParams(httpParams)); + } + + // children will be added later during tree building if needed + return node; + } + + private String normalizeUrl(String url) { + url = URL_HOST_PORT.matcher(url).replaceFirst("http://NORMALIZED/"); + url = REPLICA_SUFFIX.matcher(url).replaceAll("_replica_nN"); + return url; + } + + private String normalizeParams(String params) { + // Scrub embedded URLs (distrib.from=http%3A%2F%2F...) that contain ports + params = ENCODED_URL_IN_PARAMS.matcher(params).replaceAll("http%3A%2F%2FNORMALIZED"); + String[] parts = params.split("&"); + Arrays.sort(parts); + return String.join("&", parts); + } + + private String normalizeDbInstance(String dbInstance) { + if (dbInstance == null) return null; + return REPLICA_SUFFIX.matcher(dbInstance).replaceAll("_replica_nN"); + } + + // --- Comparison --- + + @SuppressWarnings("unchecked") + private void comparePhase(int phaseIndex, List> actual) { + List> phases = (List>) goldFile.get("phases"); + Map phase = phases.get(phaseIndex); + List> expectedSpans = (List>) phase.get("spans"); + String description = (String) phase.get("description"); + + String expectedJson = Utils.toJSONString(expectedSpans, 2); + String actualJson = Utils.toJSONString(actual, 2); + + if (!expectedJson.equals(actualJson)) { + Path tempFile = writeTempJson(Utils.toJSONString(actual, 2), phaseIndex); + String message = + String.format( + """ + + Trace spans mismatch in phase %d%s + + Expected (gold file): + %s + + Actual output (written to temp file): + %s + + To compare: + diff %s %s + + If intentional, regenerate: + gradlew :solr:modules:opentelemetry:test --tests TestDistributedTracing.%s "-Ptests.jvmargs=-Dregenerate.golden.files=true" -Ptests.useSecurityManager=false + """, + phaseIndex, + description != null ? " \"" + description + "\"" : "", + goldFilePath.toAbsolutePath(), + tempFile.toAbsolutePath(), + goldFilePath.toAbsolutePath(), + tempFile.toAbsolutePath(), + testName); + Assert.assertEquals(message, expectedJson, actualJson); + } + } + + @SuppressWarnings("unchecked") + private int countSpansRecursive(List> nodes) { + int count = 0; + for (Map node : nodes) { + count++; + List> children = (List>) node.get("children"); + if (children != null && !children.isEmpty()) { + count += countSpansRecursive(children); + } + } + return count; + } + + // --- Gold file I/O --- + + private static Path resolveGoldFilePath(Class testClass, String testMethodName) { + // Gold files live at: {test-files}/solr/tracing/{TestClassName}/{testMethod}.json + // SolrTestCaseJ4.getFile() may resolve to the build output; walk up to find src/test-files. + Path solrDir = SolrTestCaseJ4.getFile("solr"); + Path testFilesDir = solrDir.getParent(); + // If we're in build output (e.g. build/resources/test/solr), find the source equivalent + String testFilesStr = testFilesDir.toString(); + if (testFilesStr.contains("/build/")) { + testFilesDir = + Path.of(testFilesStr.substring(0, testFilesStr.indexOf("/build/"))) + .resolve("src/test-files"); + } + return testFilesDir + .resolve("solr/tracing") + .resolve(testClass.getSimpleName()) + .resolve(testMethodName + ".json"); + } + + @SuppressWarnings("unchecked") + private static Map loadGoldFile(Path path) { + if (!Files.exists(path)) { + Assert.fail( + "Gold file not found: " + + path.toAbsolutePath() + + "\nRegenerate with: gradlew ... \"-Ptests.jvmargs=-Dregenerate.golden.files=true\" -Ptests.useSecurityManager=false"); + } + try { + byte[] bytes = Files.readAllBytes(path); + return (Map) Utils.fromJSON(bytes); + } catch (IOException e) { + throw new UncheckedIOException("Failed to read gold file: " + path, e); + } + } + + private void writeGoldFile() { + Map output = new LinkedHashMap<>(); + List> phases = new ArrayList<>(); + for (int i = 0; i < recordedPhases.size(); i++) { + Map phase = new LinkedHashMap<>(); + phase.put("description", "phase " + i); + phase.put("spans", recordedPhases.get(i)); + phases.add(phase); + } + output.put("phases", phases); + try { + Files.createDirectories(goldFilePath.getParent()); + String json = Utils.toJSONString(output, 2); + Files.writeString(goldFilePath, json); + System.out.println("Regenerated gold file: " + goldFilePath.toAbsolutePath()); + } catch (IOException e) { + throw new UncheckedIOException("Failed to write gold file: " + goldFilePath, e); + } + } + + private Path writeTempJson(String json, int phaseIndex) { + try { + Path tempFile = + Path.of(System.getProperty("java.io.tmpdir"), testName + "-phase" + phaseIndex + ".json"); + Files.writeString(tempFile, json); + return tempFile; + } catch (IOException e) { + throw new UncheckedIOException("Failed to write temp file", e); + } + } +} diff --git a/solr/modules/opentelemetry/src/test/org/apache/solr/opentelemetry/TestDistributedTracing.java b/solr/modules/opentelemetry/src/test/org/apache/solr/opentelemetry/TestDistributedTracing.java index 5433c11fd1d8..44ff21ce6f9d 100644 --- a/solr/modules/opentelemetry/src/test/org/apache/solr/opentelemetry/TestDistributedTracing.java +++ b/solr/modules/opentelemetry/src/test/org/apache/solr/opentelemetry/TestDistributedTracing.java @@ -17,11 +17,11 @@ package org.apache.solr.opentelemetry; +import com.carrotsearch.randomizedtesting.annotations.Seed; import io.opentelemetry.api.GlobalOpenTelemetry; import io.opentelemetry.api.trace.TracerProvider; import io.opentelemetry.sdk.testing.exporter.InMemorySpanExporter; import io.opentelemetry.sdk.trace.data.SpanData; -import java.io.IOException; import java.io.InputStream; import java.util.ArrayList; import java.util.Collections; @@ -30,11 +30,10 @@ import java.util.Map; import java.util.concurrent.TimeUnit; import org.apache.solr.client.solrj.SolrRequest; -import org.apache.solr.client.solrj.SolrServerException; -import org.apache.solr.client.solrj.impl.CloudSolrClient; import org.apache.solr.client.solrj.request.CollectionAdminRequest; import org.apache.solr.client.solrj.request.MetricsRequest; import org.apache.solr.client.solrj.request.SolrQuery; +import org.apache.solr.client.solrj.request.UpdateRequest; import org.apache.solr.client.solrj.request.V2Request; import org.apache.solr.client.solrj.response.CollectionAdminResponse; import org.apache.solr.client.solrj.response.InputStreamResponseParser; @@ -50,6 +49,7 @@ import org.junit.BeforeClass; import org.junit.Test; +@Seed("0") // don't want randomization when testing observability public class TestDistributedTracing extends SolrCloudTestCase { private static final String COLLECTION = "collection1"; @@ -71,7 +71,28 @@ public static void setupCluster() throws Exception { TracerProvider.noop(), GlobalOpenTelemetry.get().getTracerProvider()); - CollectionAdminRequest.createCollection(COLLECTION, "config", 2, 2) + // Create collection with explicit replica placement for deterministic leader assignment. + // First replica on each shard becomes the leader. + String node0 = cluster.getJettySolrRunner(0).getNodeName(); + String node1 = cluster.getJettySolrRunner(1).getNodeName(); + String node2 = cluster.getJettySolrRunner(2).getNodeName(); + String node3 = cluster.getJettySolrRunner(3).getNodeName(); + CollectionAdminRequest.createCollection(COLLECTION, "config", 2, 1) + .setCreateNodeSet("EMPTY") + .process(cluster.getSolrClient()); + // shard1: node0 = leader, node1 = follower + CollectionAdminRequest.addReplicaToShard(COLLECTION, "shard1") + .setNode(node0) + .process(cluster.getSolrClient()); + CollectionAdminRequest.addReplicaToShard(COLLECTION, "shard1") + .setNode(node1) + .process(cluster.getSolrClient()); + // shard2: node2 = leader, node3 = follower + CollectionAdminRequest.addReplicaToShard(COLLECTION, "shard2") + .setNode(node2) + .process(cluster.getSolrClient()); + CollectionAdminRequest.addReplicaToShard(COLLECTION, "shard2") + .setNode(node3) .process(cluster.getSolrClient()); cluster.waitForActiveCollection(COLLECTION, 2, 4); } @@ -87,121 +108,76 @@ private void resetSpanData() { } @Test - public void test() throws IOException, SolrServerException { - // TODO it would be clearer if we could compare the complete Span tree between reality - // and what we assert it looks like in a structured visual way. - CloudSolrClient cloudClient = cluster.getSolrClient(); + public void test() throws Exception { + var verifier = new GoldFileTraceVerifier(getClass(), "test"); + // TODO use a CloudSolrClient. However it's not yet deterministic due to use of random not + // aligned to the test seed. + var client = cluster.getJettySolrRunner(0).getSolrClient(); // Indexing - cloudClient.add(COLLECTION, sdoc("id", "1")); - var finishedSpans = getAndClearSpans(1); - finishedSpans.removeIf( - span -> - span.getAttributes().get(TraceUtils.TAG_HTTP_URL) == null - || !span.getAttributes().get(TraceUtils.TAG_HTTP_URL).endsWith("/update")); - assertEquals(2, finishedSpans.size()); - assertOneSpanIsChildOfAnother(finishedSpans); - // core because cloudClient routes to core - assertEquals("post:/{core}/update", finishedSpans.get(0).getName()); - assertCoreName(finishedSpans.get(0), COLLECTION); - - cloudClient.add(COLLECTION, sdoc("id", "2")); - cloudClient.add(COLLECTION, sdoc("id", "3")); - cloudClient.add(COLLECTION, sdoc("id", "4")); - cloudClient.commit(COLLECTION); - getAndClearSpans(); + new UpdateRequest() + .add(List.of(sdoc("id", "1"), sdoc("id", "2"), sdoc("id", "3"), sdoc("id", "4"))) + .commit(client, COLLECTION); + verifier.verifyPhase(); // Searching - cloudClient.query(COLLECTION, new SolrQuery("*:*")); - finishedSpans = getAndClearSpans(1); - finishedSpans.removeIf( - span -> - span.getAttributes().get(TraceUtils.TAG_HTTP_URL) == null - || !span.getAttributes().get(TraceUtils.TAG_HTTP_URL).endsWith("/select")); - // one from client to server, 2 for execute query, 2 for fetching documents - assertEquals(5, finishedSpans.size()); - var parentTraceId = getRootTraceId(finishedSpans); - for (var span : finishedSpans) { - if (isRootSpan(span)) { - continue; - } - assertEquals(span.getParentSpanContext().getTraceId(), parentTraceId); - assertEquals(span.getTraceId(), parentTraceId); - } - assertEquals("get:/{core}/select", finishedSpans.get(0).getName()); - assertCoreName(finishedSpans.get(0), COLLECTION); + client.query(COLLECTION, new SolrQuery("*:*")); + verifier.verifyPhase(); + + verifier.done(); } @Test public void testAdminApi() throws Exception { - CloudSolrClient cloudClient = cluster.getSolrClient(); + var verifier = new GoldFileTraceVerifier(getClass(), "testAdminApi"); + // TODO use a CloudSolrClient. However it's not yet deterministic due to use of random not + // aligned to the test seed. + var client = cluster.getJettySolrRunner(0).getSolrClient(); MetricsRequest request = new MetricsRequest(); request.setResponseParser(new InputStreamResponseParser(MetricUtils.PROMETHEUS_METRICS_WT)); - NamedList rsp = cloudClient.request(request); + NamedList rsp = client.request(request); ((InputStream) rsp.get("stream")).close(); - var finishedSpans = getAndClearSpans(1); - assertEquals("get:/admin/metrics", finishedSpans.get(0).getName()); + verifier.verifyPhase(); - CollectionAdminRequest.listCollections(cloudClient); - finishedSpans = getAndClearSpans(1); - assertEquals("list:/admin/collections", finishedSpans.get(0).getName()); + CollectionAdminRequest.listCollections(client); + verifier.verifyPhase(); + + verifier.done(); } @Test public void testV2Api() throws Exception { - CloudSolrClient cloudClient = cluster.getSolrClient(); + var verifier = new GoldFileTraceVerifier(getClass(), "testV2Api"); + // TODO use a CloudSolrClient. However it's not yet deterministic due to use of random not + // aligned to the test seed. + var client = cluster.getJettySolrRunner(0).getSolrClient(); new V2Request.Builder("/collections/" + COLLECTION + "/reload") .withMethod(SolrRequest.METHOD.POST) .withPayload("{}") .build() - .process(cloudClient); - var finishedSpans = getAndClearSpans(1); - assertEquals("post:/collections/{collection}/reload", finishedSpans.get(0).getName()); - assertCollectionName(finishedSpans.get(0), COLLECTION); + .process(client); + verifier.verifyPhase(); new V2Request.Builder("/c/" + COLLECTION + "/update/json") .withMethod(SolrRequest.METHOD.POST) - .withPayload("{\n" + " \"id\" : \"9\"\n" + "}") + .withPayload("{\"id\":\"9\"}") .withParams(params("commit", "true")) .build() - .process(cloudClient); - finishedSpans = getAndClearSpans(1); - assertEquals("post:/c/{collection}/update/json", finishedSpans.get(0).getName()); - assertCollectionName(finishedSpans.get(0), COLLECTION); + .process(client); + verifier.verifyPhase(); final V2Response v2Response = new V2Request.Builder("/c/" + COLLECTION + "/select") .withMethod(SolrRequest.METHOD.GET) .withParams(params("q", "id:9")) .build() - .process(cloudClient); - finishedSpans = getAndClearSpans(1); - assertEquals("get:/c/{collection}/select", finishedSpans.get(0).getName()); - assertCollectionName(finishedSpans.get(0), COLLECTION); + .process(client); + verifier.verifyPhase(); assertEquals(1, ((SolrDocumentList) v2Response.getResponse().get("response")).getNumFound()); - } - /** - * Best effort test of the apache http client tracing. the test assumes the request uses the http - * client but there is no way to enforce it, so when the api will be rewritten this test will - * become obsolete - */ - @Test - public void testApacheClient() throws Exception { - CollectionAdminRequest.ColStatus a1 = CollectionAdminRequest.collectionStatus(COLLECTION); - CollectionAdminResponse r1 = a1.process(cluster.getSolrClient()); - assertEquals(0, r1.getStatus()); - var finishedSpans = getAndClearSpans(1); - var parentTraceId = getRootTraceId(finishedSpans); - for (var span : finishedSpans) { - if (isRootSpan(span)) { - continue; - } - assertEquals(span.getParentSpanContext().getTraceId(), parentTraceId); - assertEquals(span.getTraceId(), parentTraceId); - } + verifier.done(); } @Test @@ -316,17 +292,6 @@ private static void assertCoreName(SpanData span, String collection) { assertTrue(span.getAttributes().get(TraceUtils.TAG_DB).startsWith(collection + "_")); } - private void assertOneSpanIsChildOfAnother(List finishedSpans) { - SpanData child = finishedSpans.get(0); - SpanData parent = finishedSpans.get(1); - if (isRootSpan(child)) { - var temp = parent; - parent = child; - child = temp; - } - assertEquals(child.getParentSpanContext().getTraceId(), parent.getTraceId()); - assertEquals(child.getTraceId(), parent.getTraceId()); - } static List getAndClearSpans() { return getAndClearSpans(0); From 324ac6f08d1cafed8be572b91d960bfa9a769db1 Mon Sep 17 00:00:00 2001 From: David Smiley Date: Tue, 16 Jun 2026 17:11:33 -0400 Subject: [PATCH 2/3] HTTP 2 clients can do things more asynchronously, leading to less determinism in what we test --- .../org/apache/solr/opentelemetry/TestDistributedTracing.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/solr/modules/opentelemetry/src/test/org/apache/solr/opentelemetry/TestDistributedTracing.java b/solr/modules/opentelemetry/src/test/org/apache/solr/opentelemetry/TestDistributedTracing.java index 44ff21ce6f9d..f02ad9475f04 100644 --- a/solr/modules/opentelemetry/src/test/org/apache/solr/opentelemetry/TestDistributedTracing.java +++ b/solr/modules/opentelemetry/src/test/org/apache/solr/opentelemetry/TestDistributedTracing.java @@ -58,6 +58,8 @@ public class TestDistributedTracing extends SolrCloudTestCase { public static void setupCluster() throws Exception { // force early init CustomTestOtelTracerConfigurator.prepareForTest(); + // HTTP 2 clients can do things more asynchronously, leading to less determinism in what we test + System.setProperty("solr.http1", "true"); configureCluster(4) .addConfig("config", TEST_PATH().resolve("collection1").resolve("conf")) From ec7a0f87d27bcf95804828193b1f549b68ec0565 Mon Sep 17 00:00:00 2001 From: David Smiley Date: Tue, 16 Jun 2026 18:20:37 -0400 Subject: [PATCH 3/3] tidy --- .../apache/solr/opentelemetry/GoldFileTraceVerifier.java | 9 ++++----- .../solr/opentelemetry/TestDistributedTracing.java | 1 - 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/solr/modules/opentelemetry/src/test/org/apache/solr/opentelemetry/GoldFileTraceVerifier.java b/solr/modules/opentelemetry/src/test/org/apache/solr/opentelemetry/GoldFileTraceVerifier.java index 6f94117d1d78..adb32ac8bed1 100644 --- a/solr/modules/opentelemetry/src/test/org/apache/solr/opentelemetry/GoldFileTraceVerifier.java +++ b/solr/modules/opentelemetry/src/test/org/apache/solr/opentelemetry/GoldFileTraceVerifier.java @@ -67,9 +67,9 @@ * *

Dumping Raw Traces (no normalization, all attributes)

* - *

Also requires {@code -Ptests.useSecurityManager=false} (writes to source tree). - * Dump files are written alongside gold files as {@code {testMethod}-phase{N}-dump.json}. - * Output paths are logged to stdout (visible in {@code build/test-results/test/outputs/OUTPUT-*.txt}). + *

Also requires {@code -Ptests.useSecurityManager=false} (writes to source tree). Dump files are + * written alongside gold files as {@code {testMethod}-phase{N}-dump.json}. Output paths are logged + * to stdout (visible in {@code build/test-results/test/outputs/OUTPUT-*.txt}). * *

  * gradlew :solr:modules:opentelemetry:test --tests TestDistributedTracing.testV2Api \
@@ -215,8 +215,7 @@ private void dumpRawSpans(List spans) {
       node.put("endEpochNanos", span.getEndEpochNanos());
       // All attributes
       Map attrs = new LinkedHashMap<>();
-      span.getAttributes()
-          .forEach((key, value) -> attrs.put(key.getKey(), value));
+      span.getAttributes().forEach((key, value) -> attrs.put(key.getKey(), value));
       if (!attrs.isEmpty()) {
         node.put("attributes", attrs);
       }
diff --git a/solr/modules/opentelemetry/src/test/org/apache/solr/opentelemetry/TestDistributedTracing.java b/solr/modules/opentelemetry/src/test/org/apache/solr/opentelemetry/TestDistributedTracing.java
index f02ad9475f04..f73d47060032 100644
--- a/solr/modules/opentelemetry/src/test/org/apache/solr/opentelemetry/TestDistributedTracing.java
+++ b/solr/modules/opentelemetry/src/test/org/apache/solr/opentelemetry/TestDistributedTracing.java
@@ -294,7 +294,6 @@ private static void assertCoreName(SpanData span, String collection) {
     assertTrue(span.getAttributes().get(TraceUtils.TAG_DB).startsWith(collection + "_"));
   }
 
-
   static List getAndClearSpans() {
     return getAndClearSpans(0);
   }