From b3a752a394f2fb3034b36960253664d52189d464 Mon Sep 17 00:00:00 2001 From: obchain Date: Sun, 26 Apr 2026 16:10:32 +0530 Subject: [PATCH] fix(scanner+metrics): route listener block counter through names module (closes #328) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `crates/charon-scanner/src/listener.rs::publish` emitted the counter via a raw `metrics::counter!("charon_blocks_received_total", ...)` string literal, bypassing `charon_metrics::names::*`. The dashboard and `chain` template variable both queried `charon_scanner_blocks_total` (bumped per pipeline tick from `record_block_scanned`), so the listener counter was a write-only metric that consumed cardinality without ever appearing on a panel — and worse, the chain template variable wouldn't populate until the pipeline's first tick landed, leaving fresh dashboard imports stuck on "No Data" for several minutes after a cold boot. Add `names::LISTENER_BLOCKS_RECEIVED_TOTAL = "charon_listener_blocks_received_total"` plus a typed `record_block_received(chain)` helper, route the listener call site through it, register it in `describe_all`, and swap the dashboard's `chain` template variable to query the listener counter so panels populate the moment the websocket subscription delivers `new_heads` rather than waiting for the first pipeline tick. `describe_counter!` for `SCANNER_BLOCKS_TOTAL` updated to clarify "per pipeline tick" so the two counters can't drift in operator mental models again. --- crates/charon-metrics/src/lib.rs | 23 ++++++++++++++++++++++- crates/charon-scanner/src/listener.rs | 7 +++++-- deploy/grafana/charon.json | 6 +++--- 3 files changed, 30 insertions(+), 6 deletions(-) diff --git a/crates/charon-metrics/src/lib.rs b/crates/charon-metrics/src/lib.rs index ccc0b66..1405c1d 100644 --- a/crates/charon-metrics/src/lib.rs +++ b/crates/charon-metrics/src/lib.rs @@ -123,6 +123,15 @@ pub mod names { pub const SCANNER_BLOCKS_TOTAL: &str = "charon_scanner_blocks_total"; pub const SCANNER_POSITIONS: &str = "charon_scanner_positions"; + // Listener — counts every `new_heads` arrival the moment the + // websocket subscription delivers it, before the pipeline runs. + // Distinct from `SCANNER_BLOCKS_TOTAL` (which advances per + // pipeline tick): if the pipeline stalls or the per-block work + // unit panics, the listener counter still climbs and the + // dashboard can distinguish "no blocks arriving" from "blocks + // arriving but pipeline wedged" (#328). + pub const LISTENER_BLOCKS_RECEIVED_TOTAL: &str = "charon_listener_blocks_received_total"; + // Pipeline pub const PIPELINE_BLOCK_DURATION_SECONDS: &str = "charon_pipeline_block_duration_seconds"; @@ -358,7 +367,11 @@ pub fn install(bind: SocketAddr) -> Result> { fn describe_all() { describe_counter!( names::SCANNER_BLOCKS_TOTAL, - "Total blocks drained from chain listeners." + "Total blocks processed by the scanner pipeline (one increment per per-block tick)." + ); + describe_counter!( + names::LISTENER_BLOCKS_RECEIVED_TOTAL, + "Total `new_heads` events delivered by the chain websocket. Climbs whether or not the pipeline ticks." ); describe_gauge!( names::SCANNER_POSITIONS, @@ -443,6 +456,14 @@ pub fn record_block_scanned(chain: &str) { counter!(names::SCANNER_BLOCKS_TOTAL, "chain" => chain.to_owned()).increment(1); } +/// Increment the per-chain listener block-ingress counter (#328). +/// Bumped from the websocket `new_heads` handler before the pipeline +/// runs, so a flat listener counter unambiguously means "no blocks +/// arriving" rather than "pipeline stalled". +pub fn record_block_received(chain: &str) { + counter!(names::LISTENER_BLOCKS_RECEIVED_TOTAL, "chain" => chain.to_owned()).increment(1); +} + /// Set the gauge for one health bucket on one chain. pub fn set_position_bucket(chain: &str, bucket: &str, count: u64) { gauge!(names::SCANNER_POSITIONS, "chain" => chain.to_owned(), "bucket" => bucket.to_owned()) diff --git a/crates/charon-scanner/src/listener.rs b/crates/charon-scanner/src/listener.rs index 5e2fe82..dc1a073 100644 --- a/crates/charon-scanner/src/listener.rs +++ b/crates/charon-scanner/src/listener.rs @@ -178,8 +178,11 @@ impl BlockListener { /// stalled consumer cannot stall the WebSocket drain loop; full channel /// drops the event with a warning (back-pressure visible to ops). fn publish(&mut self, number: u64, timestamp: u64, block_hash: B256, backfill: bool) { - metrics::counter!("charon_blocks_received_total", "chain" => self.name.clone()) - .increment(1); + // Route through the typed helper so this counter shares the + // same name constant the dashboard and alert rules read; a + // raw string here used to drift away from `names::*` and + // never showed up on any panel (#328). + charon_metrics::record_block_received(&self.name); debug!( chain = %self.name, block = number, diff --git a/deploy/grafana/charon.json b/deploy/grafana/charon.json index 3afaa03..23a8d7c 100644 --- a/deploy/grafana/charon.json +++ b/deploy/grafana/charon.json @@ -447,15 +447,15 @@ "allValue": ".*", "current": { "selected": true, "text": "All", "value": "$__all" }, "datasource": { "type": "prometheus", "uid": "${datasource}" }, - "definition": "label_values(charon_scanner_blocks_total, chain)", - "description": "Chain label. Defaults to All (.*) so panels render data even before label_values is populated (fresh import or cold start).", + "definition": "label_values(charon_listener_blocks_received_total, chain)", + "description": "Chain label. Sourced from the listener counter (#328) — climbs the moment the websocket subscription delivers `new_heads`, so panels populate immediately on connect rather than waiting for the first pipeline tick.", "hide": 0, "includeAll": true, "label": "Chain", "multi": true, "name": "chain", "options": [], - "query": { "query": "label_values(charon_scanner_blocks_total, chain)", "refId": "StandardVariableQuery" }, + "query": { "query": "label_values(charon_listener_blocks_received_total, chain)", "refId": "StandardVariableQuery" }, "refresh": 2, "regex": "", "skipUrlSync": false,