2424#include < fstream>
2525#include < utility>
2626#include < numeric>
27+ #ifdef O2_MONITORING_OS_LINUX
28+ #include < linux/perf_event.h>
29+ #include < sys/syscall.h>
30+ #include < cstring>
31+ #endif
2732
2833namespace o2
2934{
@@ -37,6 +42,22 @@ static constexpr auto SMAPS_FILE = "/proc/self/smaps";
3742static constexpr auto SMAPS_FILE = " /proc/self/smaps_rollup" ;
3843#endif
3944
45+ #ifdef O2_MONITORING_OS_LINUX
46+ namespace
47+ {
48+ struct InstrReadFormat {
49+ uint64_t value;
50+ uint64_t timeEnabled;
51+ uint64_t timeRunning;
52+ };
53+ inline long perfEventOpen (struct perf_event_attr * attr, pid_t pid, int cpu,
54+ int group, unsigned long flags)
55+ {
56+ return syscall (__NR_perf_event_open, attr, pid, cpu, group, flags);
57+ }
58+ } // namespace
59+ #endif
60+
4061ProcessMonitor::ProcessMonitor ()
4162{
4263 mPid = static_cast <unsigned int >(::getpid ());
@@ -46,6 +67,35 @@ ProcessMonitor::ProcessMonitor()
4667 setTotalMemory ();
4768#endif
4869 mEnabledMeasurements .fill (false );
70+ openInstructionCounter ();
71+ }
72+
73+ void ProcessMonitor::openInstructionCounter ()
74+ {
75+ #ifdef O2_MONITORING_OS_LINUX
76+ struct perf_event_attr attr;
77+ std::memset (&attr, 0 , sizeof (attr));
78+ attr.size = sizeof (attr);
79+ attr.type = PERF_TYPE_HARDWARE ;
80+ attr.config = PERF_COUNT_HW_INSTRUCTIONS ;
81+ attr.disabled = 0 ; // count from the moment it is opened
82+ attr.exclude_kernel = 1 ; // user-space only: the relevant signal, and works at perf_event_paranoid <= 2
83+ attr.exclude_hv = 1 ;
84+ attr.inherit = 1 ; // also count threads spawned afterwards (validated: live + exited threads aggregate)
85+ attr.read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | PERF_FORMAT_TOTAL_TIME_RUNNING ;
86+ // pid 0 = this process (and inherited threads), cpu -1 = any. Best effort: a high
87+ // perf_event_paranoid, container seccomp, or a missing PMU simply leaves it disabled.
88+ mInstructionsFd = static_cast <int >(perfEventOpen (&attr, 0 , -1 , -1 , 0 ));
89+ #endif
90+ }
91+
92+ ProcessMonitor::~ProcessMonitor ()
93+ {
94+ #ifdef O2_MONITORING_OS_LINUX
95+ if (mInstructionsFd >= 0 ) {
96+ ::close (mInstructionsFd );
97+ }
98+ #endif
4999}
50100
51101void ProcessMonitor::init ()
@@ -145,6 +195,22 @@ std::vector<Metric> ProcessMonitor::getCpuAndContexts()
145195 static_cast <uint64_t >(currentUsage.ru_nvcsw - mPreviousGetrUsage .ru_nvcsw ), metricsNames[VOLUNTARY_CONTEXT_SWITCHES ]});
146196 metrics.emplace_back (cpuUsedInMicroSeconds, metricsNames[CPU_USED_ABSOLUTE ]);
147197
198+ #ifdef O2_MONITORING_OS_LINUX
199+ if (mInstructionsFd >= 0 ) {
200+ InstrReadFormat rf;
201+ if (::read (mInstructionsFd , &rf, sizeof (rf)) == static_cast <ssize_t >(sizeof (rf))) {
202+ // Correct for PMU multiplexing: when the counter is not always scheduled,
203+ // timeEnabled > timeRunning, so scale the raw value back up to a full-time estimate.
204+ double scale = rf.timeRunning ? static_cast <double >(rf.timeEnabled ) / rf.timeRunning : 1.0 ;
205+ uint64_t total = static_cast <uint64_t >(rf.value * scale);
206+ uint64_t delta = (total >= mPreviousInstructions ) ? (total - mPreviousInstructions ) : total;
207+ mPreviousInstructions = total;
208+ // Per-interval retired instructions; summed over the run = total instructions (cf. cpuUsedAbsolute).
209+ metrics.emplace_back (delta, metricsNames[CPU_INSTRUCTIONS ]);
210+ }
211+ }
212+ #endif
213+
148214 mTimeLastRun = timeNow;
149215 mPreviousGetrUsage = currentUsage;
150216 return metrics;
0 commit comments