-
Notifications
You must be signed in to change notification settings - Fork 336
Add Additional OTel JVM Runtime Metrics and Gate "Developmental" Metrics #11411
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -2,9 +2,12 @@ | |
|
|
||
| import static datadog.trace.bootstrap.otel.metrics.OtelInstrumentType.COUNTER; | ||
| import static datadog.trace.bootstrap.otel.metrics.OtelInstrumentType.GAUGE; | ||
| import static datadog.trace.bootstrap.otel.metrics.OtelInstrumentType.HISTOGRAM; | ||
| import static datadog.trace.bootstrap.otel.metrics.OtelInstrumentType.UP_DOWN_COUNTER; | ||
|
|
||
| import com.sun.management.GarbageCollectionNotificationInfo; | ||
| import com.sun.management.OperatingSystemMXBean; | ||
| import com.sun.management.UnixOperatingSystemMXBean; | ||
| import datadog.trace.bootstrap.otel.api.common.AttributeKey; | ||
| import datadog.trace.bootstrap.otel.api.common.Attributes; | ||
| import datadog.trace.bootstrap.otel.common.OtelInstrumentationScope; | ||
|
|
@@ -16,17 +19,24 @@ | |
| import datadog.trace.bootstrap.otel.metrics.data.OtelRunnableObservable; | ||
| import java.lang.management.BufferPoolMXBean; | ||
| import java.lang.management.ClassLoadingMXBean; | ||
| import java.lang.management.GarbageCollectorMXBean; | ||
| import java.lang.management.ManagementFactory; | ||
| import java.lang.management.MemoryMXBean; | ||
| import java.lang.management.MemoryPoolMXBean; | ||
| import java.lang.management.MemoryUsage; | ||
| import java.lang.management.ThreadMXBean; | ||
| import java.util.Arrays; | ||
| import java.util.List; | ||
| import java.util.Locale; | ||
| import java.util.concurrent.atomic.AtomicBoolean; | ||
| import java.util.function.Consumer; | ||
| import java.util.function.Function; | ||
| import java.util.function.ToLongFunction; | ||
| import javax.management.Notification; | ||
| import javax.management.NotificationEmitter; | ||
| import javax.management.NotificationFilter; | ||
| import javax.management.NotificationListener; | ||
| import javax.management.openmbean.CompositeData; | ||
| import org.slf4j.Logger; | ||
| import org.slf4j.LoggerFactory; | ||
|
|
||
|
|
@@ -45,13 +55,27 @@ public final class JvmOtlpRuntimeMetrics { | |
| AttributeKey.stringKey("jvm.memory.pool.name"); | ||
| private static final AttributeKey<String> BUFFER_POOL = | ||
| AttributeKey.stringKey("jvm.buffer.pool.name"); | ||
| private static final AttributeKey<String> GC_NAME = AttributeKey.stringKey("jvm.gc.name"); | ||
| private static final AttributeKey<String> GC_ACTION = AttributeKey.stringKey("jvm.gc.action"); | ||
| private static final AttributeKey<String> GC_CAUSE = AttributeKey.stringKey("jvm.gc.cause"); | ||
| private static final Attributes HEAP_ATTRS = Attributes.of(MEMORY_TYPE, "heap"); | ||
| private static final Attributes NON_HEAP_ATTRS = Attributes.of(MEMORY_TYPE, "non_heap"); | ||
|
|
||
| /** Explicit bucket advice for jvm.gc.duration in seconds (matches OTel runtime-telemetry). */ | ||
| private static final List<Double> GC_DURATION_BUCKETS = Arrays.asList(0.01, 0.1, 1.0, 10.0); | ||
|
|
||
| private static final String GC_NOTIFICATION_TYPE = "com.sun.management.gc.notification"; | ||
|
|
||
| private static final AtomicBoolean started = new AtomicBoolean(false); | ||
|
|
||
| /** Registers all JVM runtime metric instruments on the bootstrap-level metric registry. */ | ||
| public static void start() { | ||
| /** | ||
| * Registers all JVM runtime metric instruments on the bootstrap-level metric registry. | ||
| * | ||
| * @param emitExperimentalMetrics when {@code true} (the spec-aligned default), metrics marked as | ||
| * <em>Development</em> in the OTel semantic conventions are also registered. When {@code | ||
| * false}, only metrics with stable status are emitted. | ||
| */ | ||
| public static void start(boolean emitExperimentalMetrics) { | ||
| if (!started.compareAndSet(false, true)) { | ||
| return; | ||
| } | ||
|
|
@@ -66,20 +90,31 @@ public static void start() { | |
| ((Attributes) attributes) | ||
| .forEach((a, v) -> visitor.visitAttribute(a.getType().ordinal(), a.getKey(), v))); | ||
|
|
||
| // Stable metrics — always registered. | ||
| registerMemoryMetrics(); | ||
| registerBufferMetrics(); | ||
| registerThreadMetrics(); | ||
| registerClassLoadingMetrics(); | ||
| registerCpuMetrics(); | ||
| log.debug("Started OTLP runtime metrics with OTel-native naming (jvm.*)"); | ||
| registerGcDurationMetric(emitExperimentalMetrics); | ||
|
|
||
| // Development-status metrics — gated by the experimental flag. | ||
| if (emitExperimentalMetrics) { | ||
| registerMemoryInitMetric(); | ||
| registerBufferMetrics(); | ||
| registerSystemCpuMetrics(); | ||
| registerFileDescriptorMetrics(); | ||
| } | ||
| log.debug( | ||
| "Started OTLP runtime metrics with OTel-native naming (jvm.*), experimental={}", | ||
| emitExperimentalMetrics); | ||
| } catch (Exception e) { | ||
| log.error("Failed to start JVM OTLP runtime metrics", e); | ||
| } | ||
| } | ||
|
|
||
| /** | ||
| * jvm.memory.used, jvm.memory.committed, jvm.memory.limit, jvm.memory.init, | ||
| * jvm.memory.used_after_last_gc — all UpDownCounter per spec. | ||
| * jvm.memory.used, jvm.memory.committed, jvm.memory.limit, jvm.memory.used_after_last_gc — all | ||
| * Stable per spec. All UpDownCounter. | ||
| */ | ||
| private static void registerMemoryMetrics() { | ||
| MemoryMXBean memoryBean = ManagementFactory.getMemoryMXBean(); | ||
|
|
@@ -118,37 +153,21 @@ private static void registerMemoryMetrics() { | |
| UP_DOWN_COUNTER, | ||
| storage -> { | ||
| long heapMax = memoryBean.getHeapMemoryUsage().getMax(); | ||
| if (heapMax > 0) { | ||
| if (heapMax != -1) { | ||
| storage.recordLong(heapMax, HEAP_ATTRS); | ||
| } | ||
| long nonHeapMax = memoryBean.getNonHeapMemoryUsage().getMax(); | ||
| if (nonHeapMax > 0) { | ||
| if (nonHeapMax != -1) { | ||
| storage.recordLong(nonHeapMax, NON_HEAP_ATTRS); | ||
| } | ||
| for (MemoryPoolMXBean pool : pools) { | ||
| long max = pool.getUsage().getMax(); | ||
| if (max > 0) { | ||
| if (max != -1) { | ||
| storage.recordLong(max, poolAttributes(pool)); | ||
| } | ||
| } | ||
| }); | ||
|
|
||
| registerLongObservable( | ||
| "jvm.memory.init", | ||
| "Measure of initial memory requested.", | ||
| "By", | ||
| UP_DOWN_COUNTER, | ||
| storage -> { | ||
| long heapInit = memoryBean.getHeapMemoryUsage().getInit(); | ||
| if (heapInit > 0) { | ||
| storage.recordLong(heapInit, HEAP_ATTRS); | ||
| } | ||
| long nonHeapInit = memoryBean.getNonHeapMemoryUsage().getInit(); | ||
| if (nonHeapInit > 0) { | ||
| storage.recordLong(nonHeapInit, NON_HEAP_ATTRS); | ||
| } | ||
| }); | ||
|
|
||
| registerLongObservable( | ||
| "jvm.memory.used_after_last_gc", | ||
| "Measure of memory used after the most recent garbage collection event.", | ||
|
|
@@ -164,6 +183,26 @@ private static void registerMemoryMetrics() { | |
| }); | ||
| } | ||
|
|
||
| /** jvm.memory.init (UpDownCounter, Development). */ | ||
| private static void registerMemoryInitMetric() { | ||
| MemoryMXBean memoryBean = ManagementFactory.getMemoryMXBean(); | ||
| registerLongObservable( | ||
| "jvm.memory.init", | ||
| "Measure of initial memory requested.", | ||
| "By", | ||
| UP_DOWN_COUNTER, | ||
| storage -> { | ||
| long heapInit = memoryBean.getHeapMemoryUsage().getInit(); | ||
| if (heapInit != -1) { | ||
| storage.recordLong(heapInit, HEAP_ATTRS); | ||
| } | ||
| long nonHeapInit = memoryBean.getNonHeapMemoryUsage().getInit(); | ||
| if (nonHeapInit != -1) { | ||
| storage.recordLong(nonHeapInit, NON_HEAP_ATTRS); | ||
| } | ||
| }); | ||
| } | ||
|
|
||
| /** jvm.buffer.* (UpDownCounter, Development) — direct + mapped pool metrics. */ | ||
| private static void registerBufferMetrics() { | ||
| List<BufferPoolMXBean> bufferPools = | ||
|
|
@@ -234,10 +273,7 @@ private static void registerClassLoadingMetrics() { | |
| * Stable per spec. | ||
| */ | ||
| private static void registerCpuMetrics() { | ||
| java.lang.management.OperatingSystemMXBean rawOsBean = | ||
| ManagementFactory.getOperatingSystemMXBean(); | ||
| OperatingSystemMXBean osBean = | ||
| rawOsBean instanceof OperatingSystemMXBean ? (OperatingSystemMXBean) rawOsBean : null; | ||
| OperatingSystemMXBean osBean = sunOsBean(); | ||
|
|
||
| if (osBean != null) { | ||
| registerDoubleObservable( | ||
|
|
@@ -263,6 +299,9 @@ private static void registerCpuMetrics() { | |
| storage.recordDouble(cpuLoad, Attributes.empty()); | ||
| } | ||
| }); | ||
| } else { | ||
| log.debug( | ||
| "com.sun.management.OperatingSystemMXBean not available; skipping jvm.cpu.time and jvm.cpu.recent_utilization"); | ||
| } | ||
|
|
||
| registerLongObservable( | ||
|
|
@@ -274,6 +313,164 @@ private static void registerCpuMetrics() { | |
| storage.recordLong(Runtime.getRuntime().availableProcessors(), Attributes.empty())); | ||
| } | ||
|
|
||
| /** | ||
| * jvm.gc.duration (Histogram, Stable) — synchronous; recorded from a JMX notification listener | ||
| * attached to each {@link GarbageCollectorMXBean} when the JVM completes a GC. | ||
| * | ||
| * <p>The {@code jvm.gc.cause} attribute is gated on {@code captureGcCause} because cause is not | ||
| * part of the stable attribute set in the OTel semantic conventions. | ||
| */ | ||
| private static void registerGcDurationMetric(boolean captureGcCause) { | ||
| if (!isGcNotificationInfoAvailable()) { | ||
| log.debug( | ||
| "com.sun.management.GarbageCollectionNotificationInfo not available; skipping jvm.gc.duration"); | ||
| return; | ||
| } | ||
| OtelMetricStorage storage = | ||
| registerDoubleHistogramStorage( | ||
| "jvm.gc.duration", | ||
| "Duration of JVM garbage collection actions.", | ||
| "s", | ||
| GC_DURATION_BUCKETS); | ||
| NotificationFilter filter = n -> GC_NOTIFICATION_TYPE.equals(n.getType()); | ||
| GcNotificationListener listener = new GcNotificationListener(storage, captureGcCause); | ||
| for (GarbageCollectorMXBean bean : ManagementFactory.getGarbageCollectorMXBeans()) { | ||
| if (bean instanceof NotificationEmitter) { | ||
| ((NotificationEmitter) bean).addNotificationListener(listener, filter, null); | ||
| } | ||
| } | ||
| } | ||
|
|
||
| private static boolean isGcNotificationInfoAvailable() { | ||
| try { | ||
| Class.forName( | ||
| "com.sun.management.GarbageCollectionNotificationInfo", | ||
| false, | ||
| GarbageCollectorMXBean.class.getClassLoader()); | ||
| return true; | ||
| } catch (ClassNotFoundException e) { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'd widen this to catch |
||
| return false; | ||
| } | ||
| } | ||
|
|
||
| private static void recordGcDuration( | ||
| OtelMetricStorage storage, GarbageCollectionNotificationInfo info, boolean captureGcCause) { | ||
| double durationSeconds = info.getGcInfo().getDuration() / 1000d; | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We should probably add a null check in case
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'd suggesting adding that check in |
||
| Attributes attrs = | ||
| captureGcCause | ||
| ? Attributes.of( | ||
| GC_NAME, info.getGcName(), | ||
| GC_ACTION, info.getGcAction(), | ||
| GC_CAUSE, info.getGcCause()) | ||
| : Attributes.of( | ||
| GC_NAME, info.getGcName(), | ||
| GC_ACTION, info.getGcAction()); | ||
| storage.recordDouble(durationSeconds, attrs); | ||
| } | ||
|
|
||
| /** Listener fired by the JVM on the JMX notification thread when a GC completes. */ | ||
| static final class GcNotificationListener implements NotificationListener { | ||
| private final OtelMetricStorage storage; | ||
| private final boolean captureGcCause; | ||
|
|
||
| GcNotificationListener(OtelMetricStorage storage, boolean captureGcCause) { | ||
| this.storage = storage; | ||
| this.captureGcCause = captureGcCause; | ||
| } | ||
|
|
||
| @Override | ||
| public void handleNotification(Notification notification, Object handback) { | ||
| GarbageCollectionNotificationInfo info = | ||
| GarbageCollectionNotificationInfo.from((CompositeData) notification.getUserData()); | ||
| recordGcDuration(storage, info, captureGcCause); | ||
| } | ||
| } | ||
|
|
||
| /** | ||
| * jvm.system.cpu.utilization (Gauge) and jvm.system.cpu.load_1m (Gauge) — both Development per | ||
| * spec. | ||
| */ | ||
| private static void registerSystemCpuMetrics() { | ||
| OperatingSystemMXBean osBean = sunOsBean(); | ||
| if (osBean != null) { | ||
| registerDoubleObservable( | ||
| "jvm.system.cpu.utilization", | ||
| "Recent CPU utilization for the whole system as reported by the JVM.", | ||
| "1", | ||
| GAUGE, | ||
| storage -> { | ||
| double load = osBean.getSystemCpuLoad(); | ||
| if (load >= 0) { | ||
| storage.recordDouble(load, Attributes.empty()); | ||
| } | ||
| }); | ||
| } else { | ||
| log.debug( | ||
| "com.sun.management.OperatingSystemMXBean not available; skipping jvm.system.cpu.utilization"); | ||
| } | ||
|
|
||
| java.lang.management.OperatingSystemMXBean stdOsBean = | ||
| ManagementFactory.getOperatingSystemMXBean(); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There's quite a few calls to It might actually be more readable and consistent to just get the MBean with |
||
| registerDoubleObservable( | ||
| "jvm.system.cpu.load_1m", | ||
| "Average CPU load of the whole system for the last minute as reported by the JVM.", | ||
| "{run_queue_item}", | ||
| GAUGE, | ||
| storage -> { | ||
| double load = stdOsBean.getSystemLoadAverage(); | ||
| if (load >= 0) { | ||
| storage.recordDouble(load, Attributes.empty()); | ||
| } | ||
| }); | ||
| } | ||
|
|
||
| /** | ||
| * jvm.file_descriptor.count (UpDownCounter) and jvm.file_descriptor.limit (UpDownCounter) — both | ||
| * Development per spec. Only registered when the underlying JVM exposes {@link | ||
| * UnixOperatingSystemMXBean} (Unix-like platforms). | ||
| */ | ||
| private static void registerFileDescriptorMetrics() { | ||
| java.lang.management.OperatingSystemMXBean rawOsBean = | ||
| ManagementFactory.getOperatingSystemMXBean(); | ||
| if (!(rawOsBean instanceof UnixOperatingSystemMXBean)) { | ||
| log.debug( | ||
| "com.sun.management.UnixOperatingSystemMXBean not available (non-Unix JVM); skipping jvm.file_descriptor.count and jvm.file_descriptor.limit"); | ||
| return; | ||
| } | ||
| UnixOperatingSystemMXBean unixOsBean = (UnixOperatingSystemMXBean) rawOsBean; | ||
|
|
||
| registerLongObservable( | ||
| "jvm.file_descriptor.count", | ||
| "Number of open file descriptors as reported by the JVM.", | ||
| "{file_descriptor}", | ||
| UP_DOWN_COUNTER, | ||
| storage -> { | ||
| long count = unixOsBean.getOpenFileDescriptorCount(); | ||
| if (count >= 0) { | ||
| storage.recordLong(count, Attributes.empty()); | ||
| } | ||
| }); | ||
|
|
||
| registerLongObservable( | ||
| "jvm.file_descriptor.limit", | ||
| "Measure of max open file descriptors as reported by the JVM.", | ||
| "{file_descriptor}", | ||
| UP_DOWN_COUNTER, | ||
| storage -> { | ||
| long limit = unixOsBean.getMaxFileDescriptorCount(); | ||
| if (limit >= 0) { | ||
| storage.recordLong(limit, Attributes.empty()); | ||
| } | ||
| }); | ||
| } | ||
|
|
||
| /** Returns the {@code com.sun.management} OS bean if available on this JVM, else {@code null}. */ | ||
| private static OperatingSystemMXBean sunOsBean() { | ||
| java.lang.management.OperatingSystemMXBean rawOsBean = | ||
| ManagementFactory.getOperatingSystemMXBean(); | ||
| return rawOsBean instanceof OperatingSystemMXBean ? (OperatingSystemMXBean) rawOsBean : null; | ||
| } | ||
|
|
||
| /** | ||
| * Registers an UpDownCounter that iterates each platform buffer pool and records {@code getter} | ||
| * with the {@code jvm.buffer.pool.name} attribute. Skips negative readings. | ||
|
|
@@ -332,6 +529,21 @@ private static void registerObservable( | |
| JVM_SCOPE, new OtelRunnableObservable(() -> callback.accept(storage))); | ||
| } | ||
|
|
||
| /** | ||
| * Registers a synchronous double histogram against the bootstrap registry and returns its storage | ||
| * so callers can record values directly (e.g. from a JMX notification listener). | ||
| */ | ||
| private static OtelMetricStorage registerDoubleHistogramStorage( | ||
| String name, String description, String unit, List<Double> bucketBoundaries) { | ||
| OtelInstrumentBuilder builder = OtelInstrumentBuilder.ofDoubles(name, HISTOGRAM); | ||
| builder.setDescription(description); | ||
| builder.setUnit(unit); | ||
| return OtelMetricRegistry.INSTANCE.registerStorage( | ||
| JVM_SCOPE, | ||
| builder.descriptor(), | ||
| descriptor -> OtelMetricStorage.newHistogramStorage(descriptor, bucketBoundaries)); | ||
| } | ||
|
|
||
| /** Registers metric storage for the instrument against the bootstrap registry. */ | ||
| private static OtelMetricStorage registerStorage(OtelInstrumentDescriptor descriptor) { | ||
| Function<OtelInstrumentDescriptor, OtelMetricStorage> storageFactory; | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
maybe
findOsBean()?