diff --git a/README.md b/README.md index 24f1895161..3b8c2d811e 100644 --- a/README.md +++ b/README.md @@ -134,6 +134,7 @@ filesystem | Exposes filesystem statistics, such as disk space used. | Darwin, D hwmon | Expose hardware monitoring and sensor data from `/sys/class/hwmon/`. | Linux infiniband | Exposes network statistics specific to InfiniBand and Intel OmniPath configurations. | Linux ipvs | Exposes IPVS status from `/proc/net/ip_vs` and stats from `/proc/net/ip_vs_stats`. | Linux +kernel_hung | Exposes number of tasks that have been detected as hung from `/proc/sys/kernel/hung_task_detect_count`. | Linux loadavg | Exposes load average. | Darwin, Dragonfly, FreeBSD, Linux, NetBSD, OpenBSD, Solaris mdadm | Exposes statistics about devices in `/proc/mdstat` (does nothing if no `/proc/mdstat` present). | Linux meminfo | Exposes memory statistics. | Darwin, Dragonfly, FreeBSD, Linux, OpenBSD @@ -195,7 +196,6 @@ drm | Expose GPU metrics using sysfs / DRM, `amdgpu` is the only driver which ex drbd | Exposes Distributed Replicated Block Device statistics (to version 8.4) | Linux ethtool | Exposes network interface information and network driver statistics equivalent to `ethtool`, `ethtool -S`, and `ethtool -i`. | Linux interrupts | Exposes detailed interrupts statistics. | Linux, OpenBSD -kernel_hung | Exposes number of tasks that have been detected as hung from `/proc/sys/kernel/hung_task_detect_count`. | Linux ksmd | Exposes kernel and system statistics from `/sys/kernel/mm/ksm`. | Linux lnstat | Exposes stats from `/proc/net/stat/`. | Linux logind | Exposes session counts from [logind](http://www.freedesktop.org/wiki/Software/systemd/logind/). | Linux diff --git a/collector/fixtures/e2e-64k-page-output.txt b/collector/fixtures/e2e-64k-page-output.txt index 8065c5fee7..3be158f783 100644 --- a/collector/fixtures/e2e-64k-page-output.txt +++ b/collector/fixtures/e2e-64k-page-output.txt @@ -1924,6 +1924,9 @@ node_ipvs_outgoing_bytes_total 0 # HELP node_ipvs_outgoing_packets_total The total number of outgoing packets. # TYPE node_ipvs_outgoing_packets_total counter node_ipvs_outgoing_packets_total 0 +# HELP node_kernel_hung_tasks_total Total number of tasks that have been detected as hung since the system booted. +# TYPE node_kernel_hung_tasks_total counter +node_kernel_hung_tasks_total 42 # HELP node_ksmd_full_scans_total ksmd 'full_scans' file. # TYPE node_ksmd_full_scans_total counter node_ksmd_full_scans_total 323 @@ -3660,6 +3663,7 @@ node_scrape_collector_success{collector="hwmon"} 1 node_scrape_collector_success{collector="infiniband"} 1 node_scrape_collector_success{collector="interrupts"} 1 node_scrape_collector_success{collector="ipvs"} 1 +node_scrape_collector_success{collector="kernel_hung"} 1 node_scrape_collector_success{collector="ksmd"} 1 node_scrape_collector_success{collector="lnstat"} 1 node_scrape_collector_success{collector="loadavg"} 1 diff --git a/collector/fixtures/e2e-output.txt b/collector/fixtures/e2e-output.txt index 09a58cdc52..380e812c98 100644 --- a/collector/fixtures/e2e-output.txt +++ b/collector/fixtures/e2e-output.txt @@ -1956,6 +1956,9 @@ node_ipvs_outgoing_bytes_total 0 # HELP node_ipvs_outgoing_packets_total The total number of outgoing packets. # TYPE node_ipvs_outgoing_packets_total counter node_ipvs_outgoing_packets_total 0 +# HELP node_kernel_hung_tasks_total Total number of tasks that have been detected as hung since the system booted. +# TYPE node_kernel_hung_tasks_total counter +node_kernel_hung_tasks_total 42 # HELP node_ksmd_full_scans_total ksmd 'full_scans' file. # TYPE node_ksmd_full_scans_total counter node_ksmd_full_scans_total 323 @@ -3692,6 +3695,7 @@ node_scrape_collector_success{collector="hwmon"} 1 node_scrape_collector_success{collector="infiniband"} 1 node_scrape_collector_success{collector="interrupts"} 1 node_scrape_collector_success{collector="ipvs"} 1 +node_scrape_collector_success{collector="kernel_hung"} 1 node_scrape_collector_success{collector="ksmd"} 1 node_scrape_collector_success{collector="lnstat"} 1 node_scrape_collector_success{collector="loadavg"} 1 diff --git a/collector/fixtures/proc/sys/kernel/hung_task_detect_count b/collector/fixtures/proc/sys/kernel/hung_task_detect_count new file mode 100644 index 0000000000..d81cc0710e --- /dev/null +++ b/collector/fixtures/proc/sys/kernel/hung_task_detect_count @@ -0,0 +1 @@ +42 diff --git a/collector/kernel_hung_linux.go b/collector/kernel_hung_linux.go index fe8a07baf6..8403977ded 100644 --- a/collector/kernel_hung_linux.go +++ b/collector/kernel_hung_linux.go @@ -29,7 +29,7 @@ type kernelHungCollector struct { } func init() { - registerCollector("kernel_hung", defaultDisabled, NewKernelHungCollector) + registerCollector("kernel_hung", defaultEnabled, NewKernelHungCollector) } func NewKernelHungCollector(logger *slog.Logger) (Collector, error) { @@ -44,8 +44,8 @@ func NewKernelHungCollector(logger *slog.Logger) (Collector, error) { } var ( - taskDetectCount = prometheus.NewDesc( - prometheus.BuildFQName(namespace, "kernel_hung", "task_detect_count"), + kernelHungTasks = prometheus.NewDesc( + prometheus.BuildFQName(namespace, "kernel_hung", "tasks_total"), "Total number of tasks that have been detected as hung since the system booted.", nil, nil, ) @@ -57,7 +57,7 @@ func (c *kernelHungCollector) Update(ch chan<- prometheus.Metric) error { return err } - ch <- prometheus.MustNewConstMetric(taskDetectCount, prometheus.CounterValue, float64(*kernelHung.HungTaskDetectCount)) + ch <- prometheus.MustNewConstMetric(kernelHungTasks, prometheus.CounterValue, float64(*kernelHung.HungTaskDetectCount)) return nil } diff --git a/end-to-end-test.sh b/end-to-end-test.sh index bd9679560b..e8357ea847 100755 --- a/end-to-end-test.sh +++ b/end-to-end-test.sh @@ -57,6 +57,7 @@ enabled_collectors=$(cat << COLLECTORS infiniband interrupts ipvs + kernel_hung ksmd lnstat loadavg