diff --git a/tests/profiling/plot_memory_timeline.py b/tests/profiling/plot_memory_timeline.py index f7cbeec4..0f792be1 100644 --- a/tests/profiling/plot_memory_timeline.py +++ b/tests/profiling/plot_memory_timeline.py @@ -8,6 +8,9 @@ from dataclasses import dataclass from pathlib import Path +import matplotlib + +matplotlib.use("Agg") # Use non-GUI backend to avoid tkinter dependency import matplotlib.pyplot as plt import numpy as np from paths import TRACES_DIR @@ -17,6 +20,8 @@ class MemoryFrame: timestamp: int total_allocated: int # in bytes + device_type: int # 0 for CPU, 1 for CUDA + device_id: int # -1 for CPU, 0+ for CUDA devices @staticmethod def from_event(event: dict): @@ -24,10 +29,12 @@ def from_event(event: dict): return MemoryFrame( timestamp=event["ts"], total_allocated=args.get("Total Allocated"), + device_type=args.get("Device Type"), + device_id=args.get("Device Id"), ) -def extract_memory_timeline(path: Path) -> np.ndarray: +def extract_memory_timelines(path: Path) -> tuple[np.ndarray, np.ndarray]: with open(path) as f: data = json.load(f) @@ -36,34 +43,65 @@ def extract_memory_timeline(path: Path) -> np.ndarray: print("Extracting memory frames...") frames = [MemoryFrame.from_event(e) for e in events if e["name"] == "[memory]"] - frames.sort(key=lambda frame: frame.timestamp) - print(f"Found {len(frames):,} memory frames") + # Separate CPU (device_type=0) and CUDA (device_type=1) frames + cpu_frames = [f for f in frames if f.device_type == 0] + cuda_frames = [f for f in frames if f.device_type == 1] + + cpu_frames.sort(key=lambda frame: frame.timestamp) + cuda_frames.sort(key=lambda frame: frame.timestamp) - timestamp_list = [frame.timestamp for frame in frames] - total_allocated_list = [frame.total_allocated for frame in frames] + print(f"Found {len(cpu_frames)} CPU memory frames and {len(cuda_frames)} CUDA memory frames") - return np.array([timestamp_list, total_allocated_list]).T + cpu_timeline = np.array([[f.timestamp, f.total_allocated] for f in cpu_frames]) + cuda_timeline = np.array([[f.timestamp, f.total_allocated] for f in cuda_frames]) + + return cpu_timeline, cuda_timeline def plot_memory_timelines(experiment: str, folders: list[str]) -> None: - timelines = list[np.ndarray]() + cpu_timelines = [] + cuda_timelines = [] for folder in folders: path = TRACES_DIR / folder / f"{experiment}.json" - timelines.append(extract_memory_timeline(path)) - - fig, ax = plt.subplots(figsize=(12, 6)) - for folder, timeline in zip(folders, timelines, strict=True): - time = (timeline[:, 0] - timeline[0, 0]) // 1000 # Make time start at 0 and convert to ms. - memory = timeline[:, 1] - ax.plot(time, memory, label=folder, linewidth=1.5) - - ax.set_xlabel("Time (ms)", fontsize=12) - ax.set_ylabel("Total Allocated (bytes)", fontsize=12) - ax.set_title(f"Memory Timeline: {experiment}", fontsize=14, fontweight="bold") - ax.legend(loc="best", fontsize=11) - ax.grid(True, alpha=0.3) - ax.set_ylim(bottom=0) + cpu_timeline, cuda_timeline = extract_memory_timelines(path) + cpu_timelines.append(cpu_timeline) + cuda_timelines.append(cuda_timeline) + + fig, (ax_cuda, ax_cpu) = plt.subplots(2, 1, figsize=(12, 10), sharex=True) + + start_times = [ + min(cpu_tl[0, 0], cuda_tl[0, 0]) if len(cuda_tl) > 0 else cpu_tl[0, 0] + for cpu_tl, cuda_tl in zip(cpu_timelines, cuda_timelines, strict=True) + ] + + # Plot CUDA memory (top subplot) + for folder, cuda_timeline, start_time in zip(folders, cuda_timelines, start_times, strict=True): + if len(cuda_timeline) > 0: + time = (cuda_timeline[:, 0] - start_time) // 1000 # Convert to ms starting at 0 + memory = cuda_timeline[:, 1] + ax_cuda.plot(time, memory, label=folder, linewidth=1.5) + + ax_cuda.set_xlabel("Time (ms)", fontsize=12) + ax_cuda.set_ylabel("CUDA Memory (bytes)", fontsize=12) + ax_cuda.set_title(f"CUDA Memory Timeline: {experiment}", fontsize=14, fontweight="bold") + ax_cuda.legend(loc="best", fontsize=11) + ax_cuda.grid(True, alpha=0.3) + ax_cuda.set_ylim(bottom=0) + + # Plot CPU memory (bottom subplot) + for folder, cpu_timeline, start_time in zip(folders, cpu_timelines, start_times, strict=True): + time = (cpu_timeline[:, 0] - start_time) // 1000 # Convert to ms starting at 0 + memory = cpu_timeline[:, 1] + ax_cpu.plot(time, memory, label=folder, linewidth=1.5) + + ax_cpu.set_xlabel("Time (ms)", fontsize=12) + ax_cpu.set_ylabel("CPU Memory (bytes)", fontsize=12) + ax_cpu.set_title(f"CPU Memory Timeline: {experiment}", fontsize=14, fontweight="bold") + ax_cpu.legend(loc="best", fontsize=11) + ax_cpu.grid(True, alpha=0.3) + ax_cpu.set_ylim(bottom=0) + fig.tight_layout() output_dir = Path(TRACES_DIR / "memory_timelines")