From fc123c04281acd6ccf897fbe2ae8277e5d207100 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Guillaume=20Cor=C3=A9?= Date: Thu, 11 Sep 2025 17:23:42 +0200 Subject: [PATCH] Add python profiler - use by changing the value of ENABLE_PROFILING env variable - Use signal to stop / start the profiler too # Find the operator PID kubectl exec -- pgrep -f "python.*operator" # Start profiling kubectl exec -- kill -USR1 # Stop profiling kubectl exec -- kill -USR2 # Get results kubectl cp :/tmp/poolboy_profile.prof ./poolboy_profile.prof kubectl cp :/tmp/poolboy_profile_stats.txt ./poolboy_profile_stats.txt Use signal to stop / start the profiler too --- operator/operator.py | 8 ++++ operator/profiling.py | 89 +++++++++++++++++++++++++++++++++++++++++ util/analyze_profile.py | 56 ++++++++++++++++++++++++++ 3 files changed, 153 insertions(+) create mode 100644 operator/profiling.py create mode 100755 util/analyze_profile.py diff --git a/operator/operator.py b/operator/operator.py index fbdd366..d4117f0 100755 --- a/operator/operator.py +++ b/operator/operator.py @@ -10,6 +10,7 @@ from poolboy import Poolboy from configure_kopf_logging import configure_kopf_logging from infinite_relative_backoff import InfiniteRelativeBackoff +from profiling import profiler from resourceclaim import ResourceClaim from resourcehandle import ResourceHandle @@ -47,6 +48,10 @@ async def startup(logger: kopf.ObjectLogger, settings: kopf.OperatorSettings, ** # Configure logging configure_kopf_logging() + # Initialize profiler (sets up signal handlers) + # Use SIGUSR1 to start profiling, SIGUSR2 to stop profiling + profiler # Just reference it to initialize signal handlers + await Poolboy.on_startup(logger=logger) # Preload configuration from ResourceProviders @@ -64,6 +69,9 @@ async def cleanup(logger: kopf.ObjectLogger, **_): ResourceHandle.stop_watch_other() await ResourceWatch.stop_all() await Poolboy.on_cleanup() + + # Cleanup profiler + profiler.cleanup() @kopf.on.event(Poolboy.operator_domain, Poolboy.operator_version, 'resourceproviders') async def resource_provider_event(event: Mapping, logger: kopf.ObjectLogger, **_) -> None: diff --git a/operator/profiling.py b/operator/profiling.py new file mode 100644 index 0000000..01da265 --- /dev/null +++ b/operator/profiling.py @@ -0,0 +1,89 @@ +import os +import cProfile +import pstats +import atexit +import logging +import signal +from datetime import datetime + +class PoolboyProfiler: + """Simple profiler for Poolboy operator controlled via signals.""" + + def __init__(self): + self.profiler = None + self.profile_file = '/tmp/poolboy_profile.prof' + self.stats_file = '/tmp/poolboy_profile_stats.txt' + self.logger = logging.getLogger('poolboy.profiling') + self.is_profiling = False + + # Setup signal handlers + signal.signal(signal.SIGUSR1, self._signal_start_profiling) + signal.signal(signal.SIGUSR2, self._signal_stop_profiling) + + def _signal_start_profiling(self, signum, frame): + """Signal handler to start profiling (SIGUSR1).""" + self.start_profiling() + + def _signal_stop_profiling(self, signum, frame): + """Signal handler to stop profiling (SIGUSR2).""" + self.stop_profiling() + + def start_profiling(self): + """Start profiling.""" + if self.is_profiling: + self.logger.info("Profiling already active") + return + + self.logger.info(f"Starting profiling - output will be saved to {self.profile_file}") + self.profiler = cProfile.Profile() + self.profiler.enable() + self.is_profiling = True + + def stop_profiling(self): + """Stop profiling and save results.""" + if not self.is_profiling or not self.profiler: + self.logger.info("Profiling not active") + return + + self.profiler.disable() + self.is_profiling = False + + # Save raw profile data + self.profiler.dump_stats(self.profile_file) + + # Generate human-readable stats + try: + with open(self.stats_file, 'w') as f: + f.write(f"Poolboy Profiling Results - Generated at {datetime.now()}\n") + f.write("=" * 80 + "\n\n") + + stats = pstats.Stats(self.profiler) + + # Top functions by cumulative time + f.write("TOP 50 FUNCTIONS BY CUMULATIVE TIME:\n") + f.write("-" * 40 + "\n") + stats.sort_stats('cumulative') + stats.print_stats(50, file=f) + + f.write("\n\nTOP 50 FUNCTIONS BY TOTAL TIME:\n") + f.write("-" * 40 + "\n") + stats.sort_stats('tottime') + stats.print_stats(50, file=f) + + f.write("\n\nFUNCTIONS WITH MOST CALLS:\n") + f.write("-" * 40 + "\n") + stats.sort_stats('ncalls') + stats.print_stats(50, file=f) + + self.logger.info(f"Profiling stopped - results saved to {self.profile_file} and {self.stats_file}") + + except Exception as e: + self.logger.error(f"Error saving profiling stats: {e}") + + def cleanup(self): + """Cleanup on shutdown - stop profiling if active.""" + if self.is_profiling: + self.stop_profiling() + +# Global profiler instance +profiler = PoolboyProfiler() \ No newline at end of file diff --git a/util/analyze_profile.py b/util/analyze_profile.py new file mode 100755 index 0000000..284d761 --- /dev/null +++ b/util/analyze_profile.py @@ -0,0 +1,56 @@ +#!/usr/bin/env python3 +""" +Simple script to analyze Poolboy profiling results. +Usage: python analyze_profile.py [profile_file] +""" + +import sys +import pstats +import os + +def analyze_profile(profile_file='/tmp/poolboy_profile.prof'): + """Analyze a cProfile output file and display key metrics.""" + + if not os.path.exists(profile_file): + print(f"Profile file {profile_file} not found!") + print("Make sure ENABLE_PROFILING=true is set and the operator has been running.") + return + + print(f"Analyzing profile data from: {profile_file}") + print("=" * 80) + + stats = pstats.Stats(profile_file) + + print("\nPROFILE SUMMARY:") + print("-" * 40) + stats.print_stats(0) # Just show summary + + print("\nTOP 20 FUNCTIONS BY CUMULATIVE TIME:") + print("-" * 40) + stats.sort_stats('cumulative').print_stats(20) + + print("\nTOP 20 FUNCTIONS BY TOTAL TIME:") + print("-" * 40) + stats.sort_stats('tottime').print_stats(20) + + print("\nTOP 20 MOST CALLED FUNCTIONS:") + print("-" * 40) + stats.sort_stats('ncalls').print_stats(20) + + # Find potential hotspots - functions with high total time and many calls + print("\nPOTENTIAL HOTSPOTS (high time + many calls):") + print("-" * 40) + stats.sort_stats('tottime') + hotspots = [] + for func, (cc, nc, tt, ct, callers) in stats.stats.items(): + if tt > 0.1 and nc > 100: # Functions taking >0.1s total time with >100 calls + hotspots.append((func, tt, nc, tt/nc if nc > 0 else 0)) + + hotspots.sort(key=lambda x: x[1], reverse=True) # Sort by total time + for func, total_time, calls, avg_time in hotspots[:15]: + filename, line, funcname = func + print(f"{filename}:{line}({funcname}) - {total_time:.3f}s total, {calls} calls, {avg_time:.6f}s avg") + +if __name__ == '__main__': + profile_file = sys.argv[1] if len(sys.argv) > 1 else '/tmp/poolboy_profile.prof' + analyze_profile(profile_file) \ No newline at end of file