From e3506be439db031269370af8a7e21e9e358a4d78 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fran=C3=A7ois=20Trahay?= Date: Mon, 10 Nov 2025 15:24:20 +0100 Subject: [PATCH 1/7] an otf2 trace may not be named traces.otf2 --- pipit/readers/otf2_reader.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pipit/readers/otf2_reader.py b/pipit/readers/otf2_reader.py index b685c3a7..83dd01f8 100644 --- a/pipit/readers/otf2_reader.py +++ b/pipit/readers/otf2_reader.py @@ -8,14 +8,14 @@ import pandas as pd import multiprocessing as mp import pipit.trace - +import glob class OTF2Reader: """Reader for OTF2 trace files""" def __init__(self, dir_name, num_processes=None, create_cct=False): self.dir_name = dir_name # directory of otf2 file being read - self.file_name = self.dir_name + "/traces.otf2" + self.file_name = glob.glob(self.dir_name + "/*.otf2")[0] self.create_cct = create_cct num_cpus = mp.cpu_count() From be798388fe7959a6bfa6446fd05d83af7f0788c9 Mon Sep 17 00:00:00 2001 From: Catherine Guelque Date: Thu, 5 Mar 2026 14:51:03 +0100 Subject: [PATCH 2/7] Changed the way a region ( or any definition really ) is printed. The type is known from the key in the dict, and adding it to the ID in a string only complicates things up. --- pipit/readers/otf2_reader.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/pipit/readers/otf2_reader.py b/pipit/readers/otf2_reader.py index 83dd01f8..ed1892c5 100644 --- a/pipit/readers/otf2_reader.py +++ b/pipit/readers/otf2_reader.py @@ -56,12 +56,10 @@ def field_to_val(self, field): if "otf2.definitions" in field_type: """ Example: An event can have an attribute called region which corresponds - to a definition. We strip the string and extract only the relevant - information, which is the type of definition such as Region and also - append its id (like Region 6) so that this definition can be accessed - in the Definitions DataFrame + to a definition. This region has an ID, and can be retrieved in the + Definitions DataFrame. """ - return field_type[25:-2] + " " + str(getattr(field, "_ref")) + return int(getattr(field, "_ref")) elif "_otf2" in field_type or "otf2" in field_type: """ Example: A measurement event has an attribute called measurement mode From 8c29959ec3af67d32ac9493b566e6d6c8a5901bb Mon Sep 17 00:00:00 2001 From: Catherine Guelque Date: Thu, 5 Mar 2026 14:53:34 +0100 Subject: [PATCH 3/7] Changed the ID column being floats. ID are ints. --- pipit/readers/otf2_reader.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/pipit/readers/otf2_reader.py b/pipit/readers/otf2_reader.py index ed1892c5..aa12040f 100644 --- a/pipit/readers/otf2_reader.py +++ b/pipit/readers/otf2_reader.py @@ -371,7 +371,7 @@ def read_definitions(self, trace): # only definition type that is not a registry if key == "clock_properties": # clock properties doesn't have an ID - def_id.append(float("NaN")) + def_id.append(-1) def_name.append(str(type(def_attribute))[25:-2]) attributes.append(self.fields_to_dict(def_attribute)) @@ -408,8 +408,7 @@ def read_definitions(self, trace): # only add ids for those definitions that have it def_id.append(def_object._ref) else: - # ID column is of float64 dtype - def_id.append(float("NaN")) + def_id.append(-1) # name of the definition def_name.append(str(type(def_object))[25:-2]) @@ -426,7 +425,7 @@ def read_definitions(self, trace): # Definition column is of categorical dtype definitions_dataframe = definitions_dataframe.astype( - {"Definition Type": "category"} + {"Definition Type": "category", "ID": "int"} ) return definitions_dataframe From 21bbddb89069c7d528597c006974fd69e59fe252 Mon Sep 17 00:00:00 2001 From: Catherine Guelque Date: Thu, 5 Mar 2026 15:46:59 +0100 Subject: [PATCH 4/7] Added the Leave data back. --- pipit/readers/otf2_reader.py | 39 ++++++++++++++++++------------------ 1 file changed, 19 insertions(+), 20 deletions(-) diff --git a/pipit/readers/otf2_reader.py b/pipit/readers/otf2_reader.py index aa12040f..0777d95a 100644 --- a/pipit/readers/otf2_reader.py +++ b/pipit/readers/otf2_reader.py @@ -303,26 +303,25 @@ def events_reader(self, rank_size): # only add attributes for non-leave rows so that # there aren't duplicate attributes for a single event - if event_type != "Leave": - attributes_dict = {} - - # iterates through the event's attributes - # (ex: region, bytes sent, etc) - for key, value in vars(event).items(): - # only adds non-empty attributes - # and ignores time so there isn't a duplicate time - if value is not None and key != "time": - # uses field_to_val to convert all data types - # and ensure that there are no pickling errors - attributes_dict[self.field_to_val(key)] = ( - self.handle_data(value) - ) - event_attributes.append(attributes_dict) - else: - # nan attributes for leave rows - # attributes column is of object dtype - event_attributes.append(None) - + # ↑ + # This is genuinely baffling ???? + # Like + # Why would you not want this information in the case of a Leave Event + # You need to know what you just left ???? + # Anyways I removed the "if event_type != "Leave" + attributes_dict = {} + # iterates through the event's attributes + # (ex: region, bytes sent, etc) + for key, value in vars(event).items(): + # only adds non-empty attributes + # and ignores time so there isn't a duplicate time + if value is not None and key != "time": + # uses field_to_val to convert all data types + # and ensure that there are no pickling errors + attributes_dict[self.field_to_val(key)] = ( + self.handle_data(value) + ) + event_attributes.append(attributes_dict) trace.close() # close event files # returns dataframe with all events and their fields From 37d3ac5492e74baa70168fcbf9b9dd714b3d335c Mon Sep 17 00:00:00 2001 From: Catherine Guelque Date: Fri, 24 Apr 2026 17:05:37 +0200 Subject: [PATCH 5/7] Fixed idle_time. --- pipit/trace.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pipit/trace.py b/pipit/trace.py index 48735379..92cd833f 100644 --- a/pipit/trace.py +++ b/pipit/trace.py @@ -621,6 +621,9 @@ def idle_time(self, idle_functions=["Idle"], mpi_events=False): if "time.inc" not in self.events.columns: self.calc_inc_metrics() + if "time.exc" not in self.events.columns: + self.calc_exc_metrics() + if mpi_events: idle_functions += ["MPI_Wait", "MPI_Waitall", "MPI_Recv"] From e4e010116f43b0c325daec8d5a47853de6bba182 Mon Sep 17 00:00:00 2001 From: Catherine Guelque Date: Fri, 24 Apr 2026 17:21:25 +0200 Subject: [PATCH 6/7] Fixed idle_time. AGAIN. --- pipit/trace.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/pipit/trace.py b/pipit/trace.py index 92cd833f..365ac132 100644 --- a/pipit/trace.py +++ b/pipit/trace.py @@ -616,8 +616,10 @@ def load_imbalance(self, metric="time.exc", num_processes=1): return imbalance_df - def idle_time(self, idle_functions=["Idle"], mpi_events=False): + def idle_time(self, idle_functions=None, mpi_events=False, include_blank_spaces=False): # calculate inclusive metrics + if idle_functions is None: + idle_functions = ["Idle"] if "time.inc" not in self.events.columns: self.calc_inc_metrics() @@ -630,14 +632,15 @@ def idle_time(self, idle_functions=["Idle"], mpi_events=False): def calc_idle_time(events): # assumes events is sorted by time - # Calculate idle time due to gaps in between events - # This is the total time minus exclusive time spent in functions - total_time = events["Timestamp (ns)"].max() - events["Timestamp (ns)"].min() + # Calculate idle time due to idle_functions + idle_time = events[events["Name"].isin(idle_functions)]["time.inc"].sum() + if include_blank_spaces: + # Calculate idle time due to gaps in between events + # This is the total time minus exclusive time spent in functions + total_time = events["Timestamp (ns)"].max() - events["Timestamp (ns)"].min() - idle_time = total_time - events["time.exc"].sum() + idle_time = total_time - events["time.exc"].sum() - # Calculate idle time due to idle_functions - idle_time += events[events["Name"].isin(idle_functions)]["time.inc"].sum() return idle_time return ( From 970364943f99c242f3dd7daa2725b4ca89e51b58 Mon Sep 17 00:00:00 2001 From: Catherine Guelque Date: Fri, 24 Apr 2026 17:29:28 +0200 Subject: [PATCH 7/7] Fixed idle_time and added warning. --- pipit/trace.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pipit/trace.py b/pipit/trace.py index 365ac132..9e29df23 100644 --- a/pipit/trace.py +++ b/pipit/trace.py @@ -2,6 +2,7 @@ # Maryland. See the top-level LICENSE file for details. # # SPDX-License-Identifier: MIT +import sys import numpy as np import pandas as pd @@ -325,6 +326,8 @@ def calc_inc_metrics(self, columns=None): def calc_exc_metrics(self, columns=None): # calculate exc metrics for all numeric columns if not specified + # Fixme This function doesn't work properly. + print("Warning: using calc_exc_metrics but the function doesn't work properly.", file=sys.stderr) columns = self.numeric_cols if columns is None else columns # match caller and callee rows @@ -623,7 +626,7 @@ def idle_time(self, idle_functions=None, mpi_events=False, include_blank_spaces= if "time.inc" not in self.events.columns: self.calc_inc_metrics() - if "time.exc" not in self.events.columns: + if "time.exc" not in self.events.columns and include_blank_spaces: self.calc_exc_metrics() if mpi_events: