@@ -3,17 +3,44 @@ import { _INTERNAL_safeDateNow, defineIntegration, flushIfServerless, metrics }
33
44const INTEGRATION_NAME = 'NodeRuntimeMetrics' ;
55const DEFAULT_INTERVAL_MS = 30_000 ;
6+ const EVENT_LOOP_DELAY_RESOLUTION_MS = 10 ;
67
78export interface NodeRuntimeMetricsOptions {
89 /**
9- * Which metric groups to collect. All groups are enabled by default.
10+ * Which metrics to collect.
11+ *
12+ * Default on (8 metrics):
13+ * - `cpuUtilization` — CPU utilization ratio
14+ * - `memRss` — Resident Set Size (actual memory footprint)
15+ * - `memHeapUsed` — V8 heap currently in use
16+ * - `memHeapTotal` — total V8 heap allocated (headroom paired with `memHeapUsed`)
17+ * - `eventLoopDelayP50` — median event loop delay (baseline latency)
18+ * - `eventLoopDelayP99` — 99th percentile event loop delay (tail latency / spikes)
19+ * - `eventLoopUtilization` — fraction of time the event loop was active
20+ * - `uptime` — process uptime (detect restarts/crashes)
21+ *
22+ * Default off (opt-in):
23+ * - `cpuTime` — raw user/system CPU time in seconds
24+ * - `memExternal` — external/ArrayBuffer memory (relevant for native addons)
25+ * - `eventLoopDelayMin` / `eventLoopDelayMax` / `eventLoopDelayMean` / `eventLoopDelayP90`
1026 */
1127 collect ?: {
12- cpu ?: boolean ;
13- memory ?: boolean ;
14- eventLoopDelay ?: boolean ;
28+ // Default on
29+ cpuUtilization ?: boolean ;
30+ memHeapUsed ?: boolean ;
31+ memRss ?: boolean ;
32+ eventLoopDelayP99 ?: boolean ;
1533 eventLoopUtilization ?: boolean ;
1634 uptime ?: boolean ;
35+ // Default off
36+ cpuTime ?: boolean ;
37+ memHeapTotal ?: boolean ;
38+ memExternal ?: boolean ;
39+ eventLoopDelayMin ?: boolean ;
40+ eventLoopDelayMax ?: boolean ;
41+ eventLoopDelayMean ?: boolean ;
42+ eventLoopDelayP50 ?: boolean ;
43+ eventLoopDelayP90 ?: boolean ;
1744 } ;
1845 /**
1946 * How often to collect metrics, in milliseconds.
@@ -37,62 +64,94 @@ export interface NodeRuntimeMetricsOptions {
3764export const nodeRuntimeMetricsIntegration = defineIntegration ( ( options : NodeRuntimeMetricsOptions = { } ) => {
3865 const collectionIntervalMs = options . collectionIntervalMs ?? DEFAULT_INTERVAL_MS ;
3966 const collect = {
40- cpu : true ,
41- memory : true ,
42- eventLoopDelay : true ,
67+ // Default on
68+ cpuUtilization : true ,
69+ memHeapUsed : true ,
70+ memHeapTotal : true ,
71+ memRss : true ,
72+ eventLoopDelayP50 : true ,
73+ eventLoopDelayP99 : true ,
4374 eventLoopUtilization : true ,
4475 uptime : true ,
76+ // Default off
77+ cpuTime : false ,
78+ memExternal : false ,
79+ eventLoopDelayMin : false ,
80+ eventLoopDelayMax : false ,
81+ eventLoopDelayMean : false ,
82+ eventLoopDelayP90 : false ,
4583 ...options . collect ,
4684 } ;
4785
86+ const needsEventLoopDelay =
87+ collect . eventLoopDelayP99 ||
88+ collect . eventLoopDelayMin ||
89+ collect . eventLoopDelayMax ||
90+ collect . eventLoopDelayMean ||
91+ collect . eventLoopDelayP50 ||
92+ collect . eventLoopDelayP90 ;
93+
94+ const needsCpu = collect . cpuUtilization || collect . cpuTime ;
95+
4896 let intervalId : ReturnType < typeof setInterval > | undefined ;
4997 let prevCpuUsage : NodeJS . CpuUsage | undefined ;
5098 let prevElu : ReturnType < typeof performance . eventLoopUtilization > | undefined ;
51- let prevFlushTime : number ;
99+ let prevFlushTime : number = 0 ;
52100 let eventLoopDelayHistogram : ReturnType < typeof monitorEventLoopDelay > | undefined ;
53101
102+ const resolutionNs = EVENT_LOOP_DELAY_RESOLUTION_MS * 1e6 ;
103+ const nsToS = ( ns : number ) : number => Math . max ( 0 , ( ns - resolutionNs ) / 1e9 ) ;
104+
54105 function collectMetrics ( ) : void {
55106 const now = _INTERNAL_safeDateNow ( ) ;
56107 const elapsed = now - prevFlushTime ;
57108
58- if ( collect . cpu && prevCpuUsage !== undefined ) {
109+ if ( needsCpu && prevCpuUsage !== undefined ) {
59110 const delta = process . cpuUsage ( prevCpuUsage ) ;
60- metrics . gauge ( 'node.runtime.cpu.user' , delta . user / 1e6 , { unit : 'second' } ) ;
61- metrics . gauge ( 'node.runtime.cpu.system' , delta . system / 1e6 , { unit : 'second' } ) ;
62- if ( elapsed > 0 ) {
111+
112+ if ( collect . cpuTime ) {
113+ metrics . gauge ( 'node.runtime.cpu.user' , delta . user / 1e6 , { unit : 'second' } ) ;
114+ metrics . gauge ( 'node.runtime.cpu.system' , delta . system / 1e6 , { unit : 'second' } ) ;
115+ }
116+ if ( collect . cpuUtilization && elapsed > 0 ) {
63117 // Ratio of CPU time to wall-clock time. Can exceed 1.0 on multi-core systems.
64118 // TODO: In cluster mode, add a runtime_id/process_id attribute to disambiguate per-worker metrics.
65119 metrics . gauge ( 'node.runtime.cpu.utilization' , ( delta . user + delta . system ) / ( elapsed * 1000 ) ) ;
66120 }
121+
67122 prevCpuUsage = process . cpuUsage ( ) ;
68123 }
69124
70- if ( collect . memory ) {
125+ if ( collect . memRss || collect . memHeapUsed || collect . memHeapTotal || collect . memExternal ) {
71126 const mem = process . memoryUsage ( ) ;
72- metrics . gauge ( 'node.runtime.mem.rss' , mem . rss , { unit : 'byte' } ) ;
73- metrics . gauge ( 'node.runtime.mem.heap_total' , mem . heapTotal , { unit : 'byte' } ) ;
74- metrics . gauge ( 'node.runtime.mem.heap_used' , mem . heapUsed , { unit : 'byte' } ) ;
75- metrics . gauge ( 'node.runtime.mem.external' , mem . external , { unit : 'byte' } ) ;
76- metrics . gauge ( 'node.runtime.mem.array_buffers' , mem . arrayBuffers , { unit : 'byte' } ) ;
127+ if ( collect . memRss ) metrics . gauge ( 'node.runtime.mem.rss' , mem . rss , { unit : 'byte' } ) ;
128+ if ( collect . memHeapUsed ) metrics . gauge ( 'node.runtime.mem.heap_used' , mem . heapUsed , { unit : 'byte' } ) ;
129+ if ( collect . memHeapTotal ) metrics . gauge ( 'node.runtime.mem.heap_total' , mem . heapTotal , { unit : 'byte' } ) ;
130+ if ( collect . memExternal ) {
131+ metrics . gauge ( 'node.runtime.mem.external' , mem . external , { unit : 'byte' } ) ;
132+ metrics . gauge ( 'node.runtime.mem.array_buffers' , mem . arrayBuffers , { unit : 'byte' } ) ;
133+ }
77134 }
78135
79- if ( collect . eventLoopDelay && eventLoopDelayHistogram ) {
80- // Resolution is 10ms (10_000_000 ns) as configured below. Subtract it to normalize out sampling overhead.
81- const resolutionNs = 10_000_000 ;
82- const nsToS = ( ns : number ) : number => Math . max ( 0 , ( ns - resolutionNs ) / 1e9 ) ;
83-
84- metrics . gauge ( 'node.runtime.event_loop.delay.min' , nsToS ( eventLoopDelayHistogram . min ) , { unit : 'second' } ) ;
85- metrics . gauge ( 'node.runtime.event_loop.delay.max' , nsToS ( eventLoopDelayHistogram . max ) , { unit : 'second' } ) ;
86- metrics . gauge ( 'node.runtime.event_loop.delay.mean' , nsToS ( eventLoopDelayHistogram . mean ) , { unit : 'second' } ) ;
87- metrics . gauge ( 'node.runtime.event_loop.delay.p50' , nsToS ( eventLoopDelayHistogram . percentile ( 50 ) ) , {
88- unit : 'second' ,
89- } ) ;
90- metrics . gauge ( 'node.runtime.event_loop.delay.p90' , nsToS ( eventLoopDelayHistogram . percentile ( 90 ) ) , {
91- unit : 'second' ,
92- } ) ;
93- metrics . gauge ( 'node.runtime.event_loop.delay.p99' , nsToS ( eventLoopDelayHistogram . percentile ( 99 ) ) , {
94- unit : 'second' ,
95- } ) ;
136+ if ( needsEventLoopDelay && eventLoopDelayHistogram ) {
137+ if ( collect . eventLoopDelayMin )
138+ metrics . gauge ( 'node.runtime.event_loop.delay.min' , nsToS ( eventLoopDelayHistogram . min ) , { unit : 'second' } ) ;
139+ if ( collect . eventLoopDelayMax )
140+ metrics . gauge ( 'node.runtime.event_loop.delay.max' , nsToS ( eventLoopDelayHistogram . max ) , { unit : 'second' } ) ;
141+ if ( collect . eventLoopDelayMean )
142+ metrics . gauge ( 'node.runtime.event_loop.delay.mean' , nsToS ( eventLoopDelayHistogram . mean ) , { unit : 'second' } ) ;
143+ if ( collect . eventLoopDelayP50 )
144+ metrics . gauge ( 'node.runtime.event_loop.delay.p50' , nsToS ( eventLoopDelayHistogram . percentile ( 50 ) ) , {
145+ unit : 'second' ,
146+ } ) ;
147+ if ( collect . eventLoopDelayP90 )
148+ metrics . gauge ( 'node.runtime.event_loop.delay.p90' , nsToS ( eventLoopDelayHistogram . percentile ( 90 ) ) , {
149+ unit : 'second' ,
150+ } ) ;
151+ if ( collect . eventLoopDelayP99 )
152+ metrics . gauge ( 'node.runtime.event_loop.delay.p99' , nsToS ( eventLoopDelayHistogram . percentile ( 99 ) ) , {
153+ unit : 'second' ,
154+ } ) ;
96155
97156 eventLoopDelayHistogram . reset ( ) ;
98157 }
@@ -115,17 +174,17 @@ export const nodeRuntimeMetricsIntegration = defineIntegration((options: NodeRun
115174 name : INTEGRATION_NAME ,
116175
117176 setup ( ) : void {
118- if ( collect . eventLoopDelay ) {
177+ if ( needsEventLoopDelay ) {
119178 try {
120- eventLoopDelayHistogram = monitorEventLoopDelay ( { resolution : 10 } ) ;
179+ eventLoopDelayHistogram = monitorEventLoopDelay ( { resolution : EVENT_LOOP_DELAY_RESOLUTION_MS } ) ;
121180 eventLoopDelayHistogram . enable ( ) ;
122181 } catch {
123182 // Not available in all runtimes (e.g. Bun throws NotImplementedError).
124183 }
125184 }
126185
127186 // Prime baselines before the first collection interval.
128- if ( collect . cpu ) {
187+ if ( needsCpu ) {
129188 prevCpuUsage = process . cpuUsage ( ) ;
130189 }
131190 if ( collect . eventLoopUtilization ) {
0 commit comments