From 06f4bd8221f377f449a4f7756e39f02305389e49 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Val=C3=A9rian=20Rey?= Date: Sun, 1 Feb 2026 01:36:55 +0100 Subject: [PATCH 1/3] test: Add --- pyproject.toml | 1 + tests/paths.py | 4 + tests/profiling/plot_memory_timeline.py | 100 ++++++++++++++++++++++++ 3 files changed, 105 insertions(+) create mode 100644 tests/paths.py create mode 100644 tests/profiling/plot_memory_timeline.py diff --git a/pyproject.toml b/pyproject.toml index 6c7eb165..c91e8ed5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -90,6 +90,7 @@ plot = [ "plotly>=5.19.0", # Recent version to avoid problems, could be relaxed "dash>=2.16.0", # Recent version to avoid problems, could be relaxed "kaleido==0.2.1", # Only works with locked version + "matplotlib>=3.10.0", # Recent version to avoid problems, could be relaxed ] [project.optional-dependencies] diff --git a/tests/paths.py b/tests/paths.py new file mode 100644 index 00000000..0ed56e2d --- /dev/null +++ b/tests/paths.py @@ -0,0 +1,4 @@ +from pathlib import Path + +TORCHJD_DIR = Path(__file__).parent.parent +TRACES_DIR = TORCHJD_DIR / "traces" diff --git a/tests/profiling/plot_memory_timeline.py b/tests/profiling/plot_memory_timeline.py new file mode 100644 index 00000000..1466c1d8 --- /dev/null +++ b/tests/profiling/plot_memory_timeline.py @@ -0,0 +1,100 @@ +""" +Script to plot memory timeline evolution from profiling traces. +Reads memory traces from CSV files and plots them on a single graph. +""" + +import argparse +import json +from dataclasses import dataclass +from pathlib import Path + +import matplotlib.pyplot as plt +import numpy as np +from paths import TRACES_DIR + + +@dataclass +class MemoryFrame: + timestamp: int + total_allocated: int # in bytes + current_allocation: int # in bytes + + @staticmethod + def from_event(event: dict): + args = event["args"] + return MemoryFrame( + timestamp=event["ts"], + total_allocated=args.get("Total Allocated"), + current_allocation=args.get("Bytes"), + ) + + +def extract_memory_timeline(path: Path) -> np.ndarray: + with open(path, "r") as f: + data = json.load(f) + + events = data["traceEvents"] + print(f"Total events in trace: {len(events):,}") + print("Extracting memory frames...") + + frames = [MemoryFrame.from_event(e) for e in events if e["name"] == "[memory]"] + frames.sort(key=lambda frame: frame.timestamp) + + print(f"Found {len(frames):,} memory frames") + + timestamp_list = [frame.timestamp for frame in frames] + total_allocated_list = [frame.total_allocated for frame in frames] + current_allocation_list = [frame.current_allocation for frame in frames] + + return np.array([timestamp_list, total_allocated_list, current_allocation_list]).T + + +def plot_memory_timeline(experiment: str, folders: list[str]) -> None: + timelines = list[np.ndarray]() + for folder in folders: + path = TRACES_DIR / folder / f"{experiment}.json" + timelines.append(extract_memory_timeline(path)) + + fig, ax = plt.subplots(figsize=(12, 6)) + for folder, timeline in zip(folders, timelines): + time = (timeline[:, 0] - timeline[0, 0]) // 1000 # Make time start at 0 and convert to ms. + memory = timeline[:, 1] + ax.plot(time, memory, label=folder, linewidth=1.5) + + ax.set_xlabel("Time (ms)", fontsize=12) + ax.set_ylabel("Total Allocated (bytes)", fontsize=12) + ax.set_title(f"Memory Timeline: {experiment}", fontsize=14, fontweight="bold") + ax.legend(loc="best", fontsize=11) + ax.grid(True, alpha=0.3) + ax.set_ylim(bottom=0) + fig.tight_layout() + + output_dir = Path(TRACES_DIR / "memory_timelines") + output_dir.mkdir(parents=True, exist_ok=True) + output_path = output_dir / f"{experiment}.png" + print(f"\nSaving plot to: {output_path}") + fig.savefig(output_path, dpi=300, bbox_inches="tight") + print("Plot saved successfully!") + + +def main(): + parser = argparse.ArgumentParser(description="Plot memory timeline from profiling traces.") + parser.add_argument( + "experiment", + type=str, + help="Name of the experiment under profiling (e.g., 'WithTransformerLarge()-bs4-cpu')", + ) + parser.add_argument( + "folders", + nargs="+", + type=str, + help="Folder names containing the traces (e.g., autojac_old autojac_new)", + ) + + args = parser.parse_args() + + return plot_memory_timeline(args.experiment, args.folders) + + +if __name__ == "__main__": + main() From 261d968adb81a919f671cdba979ad17c1187f68a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Val=C3=A9rian=20Rey?= Date: Sun, 1 Feb 2026 01:46:46 +0100 Subject: [PATCH 2/3] Use new path in run_profiler.py --- tests/profiling/run_profiler.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/tests/profiling/run_profiler.py b/tests/profiling/run_profiler.py index e041f994..ebab7849 100644 --- a/tests/profiling/run_profiler.py +++ b/tests/profiling/run_profiler.py @@ -1,5 +1,4 @@ import gc -from pathlib import Path from typing import Callable import torch @@ -22,6 +21,7 @@ ) from utils.tensors import make_inputs_and_targets +from tests.paths import TRACES_DIR from torchjd.aggregation import UPGrad, UPGradWeighting from torchjd.autogram import Engine @@ -93,10 +93,9 @@ def _save_and_print_trace( prof: profile, method_name: str, factory: ModuleFactory, batch_size: int ) -> None: filename = f"{factory}-bs{batch_size}-{DEVICE.type}.json" - torchjd_dir = Path(__file__).parent.parent.parent - traces_dir = torchjd_dir / "traces" / method_name - traces_dir.mkdir(parents=True, exist_ok=True) - trace_path = traces_dir / filename + output_dir = TRACES_DIR / method_name + output_dir.mkdir(parents=True, exist_ok=True) + trace_path = output_dir / filename prof.export_chrome_trace(str(trace_path)) print(prof.key_averages().table(sort_by="self_cpu_memory_usage", row_limit=20)) From 0d37abb82d468994f27320713a4ac82f60bdc1c7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Val=C3=A9rian=20Rey?= Date: Sun, 1 Feb 2026 01:49:43 +0100 Subject: [PATCH 3/3] Simplify --- tests/profiling/plot_memory_timeline.py | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/tests/profiling/plot_memory_timeline.py b/tests/profiling/plot_memory_timeline.py index 1466c1d8..f9197101 100644 --- a/tests/profiling/plot_memory_timeline.py +++ b/tests/profiling/plot_memory_timeline.py @@ -1,6 +1,6 @@ """ Script to plot memory timeline evolution from profiling traces. -Reads memory traces from CSV files and plots them on a single graph. +Reads memory traces from json files and plots them on a single graph. """ import argparse @@ -17,7 +17,6 @@ class MemoryFrame: timestamp: int total_allocated: int # in bytes - current_allocation: int # in bytes @staticmethod def from_event(event: dict): @@ -25,7 +24,6 @@ def from_event(event: dict): return MemoryFrame( timestamp=event["ts"], total_allocated=args.get("Total Allocated"), - current_allocation=args.get("Bytes"), ) @@ -44,12 +42,11 @@ def extract_memory_timeline(path: Path) -> np.ndarray: timestamp_list = [frame.timestamp for frame in frames] total_allocated_list = [frame.total_allocated for frame in frames] - current_allocation_list = [frame.current_allocation for frame in frames] - return np.array([timestamp_list, total_allocated_list, current_allocation_list]).T + return np.array([timestamp_list, total_allocated_list]).T -def plot_memory_timeline(experiment: str, folders: list[str]) -> None: +def plot_memory_timelines(experiment: str, folders: list[str]) -> None: timelines = list[np.ndarray]() for folder in folders: path = TRACES_DIR / folder / f"{experiment}.json" @@ -93,7 +90,7 @@ def main(): args = parser.parse_args() - return plot_memory_timeline(args.experiment, args.folders) + return plot_memory_timelines(args.experiment, args.folders) if __name__ == "__main__":