From 82123812d84d3abbfe2af53b19080419279097f7 Mon Sep 17 00:00:00 2001 From: zhixiangli Date: Mon, 29 Jun 2026 14:05:16 +0000 Subject: [PATCH 1/2] fix: seed timer at training start to avoid AttributeError TAG=agy CONV=3319ee1f-f74b-46be-8ac8-2282151c2ff3 --- .../helm_chart/llama_3_1_8b_cpu_sim.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/gcsfs/tests/perf/macrobenchmarks/workloads/hf-pytorch-lightning-cpu/helm_chart/llama_3_1_8b_cpu_sim.py b/gcsfs/tests/perf/macrobenchmarks/workloads/hf-pytorch-lightning-cpu/helm_chart/llama_3_1_8b_cpu_sim.py index 79bab470..ad71b055 100644 --- a/gcsfs/tests/perf/macrobenchmarks/workloads/hf-pytorch-lightning-cpu/helm_chart/llama_3_1_8b_cpu_sim.py +++ b/gcsfs/tests/perf/macrobenchmarks/workloads/hf-pytorch-lightning-cpu/helm_chart/llama_3_1_8b_cpu_sim.py @@ -284,6 +284,10 @@ def __init__(self): super().__init__() self.ckpt_time = 0.0 + def on_train_start(self, trainer, pl_module): + self.start_time = time.perf_counter() + self.ckpt_time = 0.0 + def on_train_epoch_start(self, trainer, pl_module): # Start timer at the beginning of the epoch to capture the first batch's data loading time self.start_time = time.perf_counter() From 5dd0b53a7cd6c6c10547b3697ea0a3e0cddf97eb Mon Sep 17 00:00:00 2001 From: Zhixiang Li Date: Mon, 29 Jun 2026 22:23:07 +0800 Subject: [PATCH 2/2] Update gcsfs/tests/perf/macrobenchmarks/workloads/hf-pytorch-lightning-cpu/helm_chart/llama_3_1_8b_cpu_sim.py Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> --- .../hf-pytorch-lightning-cpu/helm_chart/llama_3_1_8b_cpu_sim.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/gcsfs/tests/perf/macrobenchmarks/workloads/hf-pytorch-lightning-cpu/helm_chart/llama_3_1_8b_cpu_sim.py b/gcsfs/tests/perf/macrobenchmarks/workloads/hf-pytorch-lightning-cpu/helm_chart/llama_3_1_8b_cpu_sim.py index ad71b055..feb7c5b9 100644 --- a/gcsfs/tests/perf/macrobenchmarks/workloads/hf-pytorch-lightning-cpu/helm_chart/llama_3_1_8b_cpu_sim.py +++ b/gcsfs/tests/perf/macrobenchmarks/workloads/hf-pytorch-lightning-cpu/helm_chart/llama_3_1_8b_cpu_sim.py @@ -285,6 +285,8 @@ def __init__(self): self.ckpt_time = 0.0 def on_train_start(self, trainer, pl_module): + # Initialize timer at training start to avoid AttributeError when resuming mid-epoch + # (where on_train_epoch_start is skipped). self.start_time = time.perf_counter() self.ckpt_time = 0.0