From 277d1b6de76d34188656a1a4fa662405fda10320 Mon Sep 17 00:00:00 2001 From: Tomas Sosa Date: Mon, 2 Mar 2026 14:41:02 -0500 Subject: [PATCH 01/58] Calorimeter pid bic notebook --- benchmarks/calo_pid/calo_pid_bic.org | 490 +++++++++++++++++++++++++++ benchmarks/calo_pid/config.yml | 6 +- 2 files changed, 495 insertions(+), 1 deletion(-) create mode 100644 benchmarks/calo_pid/calo_pid_bic.org diff --git a/benchmarks/calo_pid/calo_pid_bic.org b/benchmarks/calo_pid/calo_pid_bic.org new file mode 100644 index 00000000..1c4959c5 --- /dev/null +++ b/benchmarks/calo_pid/calo_pid_bic.org @@ -0,0 +1,490 @@ +#+begin_src jupyter-python + import os + import math + from math import floor + + import pandas as pd + import numpy as np + + ## dangerous: silence annoying TF warnings , remove when running on new systems or debugging + os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2" # this MUST come before any tf call. + import tensorflow as tf + from tensorflow import keras + from tensorflow.keras import layers + + import matplotlib.pyplot as plt + from collections import OrderedDict + import json + import re +#+end_src + +#+begin_src jupyter-python + print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU'))) +#+end_src + +#+begin_src jupyter-python + # Simulate argparse in Kaggle + class Args: + def __init__(self): + self.angle = ['45to135deg'] # ✅ choose your angles + self.energy = '1GeV' + self.cap_train_sample = 0 + self.epochs = 30 + self.target_imbalance = 1.0 + self.model = 'vgg-v2' + self.workdir = '/kaggle/working/output' # 🔧 all outputs go here + + args = Args() +#+end_src + +#+begin_src jupyter-python + ## Global efficiencies we want to optimize for (electron efficiencies) + kTargetEfficiency = .95 + ## other efficiency scenarios to cut the ML on + kAlternativeEfficiencies = np.arange(.5, 1., .05) + + ## setting + #angle_settings=['eta0.0', 'eta0.5n', 'eta0.5p', 'eta1.0n', 'eta1.0p'] + #angle_settings=['eta0.0', 'eta1.0p'] + #angle_settings=['eta0.0'] + angle_settings=args.angle + energy_setting= args.energy + ## MeV or GeV + energy_GeV = float(energy_setting[:-3]) * (1 if energy_setting[-3:] == 'GeV' else 1/1000.) + def eta_from_angle(angle_label): + match = re.match(r"(\d+)to(\d+)deg", angle_label) + if match: + theta1 = float(match.group(1)) + theta2 = float(match.group(2)) + mean_theta_deg = (theta1 + theta2) / 2.0 + mean_theta_rad = np.deg2rad(mean_theta_deg) + eta = -np.log(np.tan(mean_theta_rad / 2)) + return eta + else: + raise ValueError(f"Cannot parse eta from angle label: {angle_label}") + + etas = {} + for setting in angle_settings: + if setting.startswith("eta"): + val = float(setting[3:-1]) + sign = -1. if setting[-1] == 'n' else 1. + etas[setting] = val * sign + elif "deg" in setting: + etas[setting] = eta_from_angle(setting) + else: + etas[setting] = 0.0 + + print(f'E/p scan for {energy_setting}') + print(f' - detected energy: {energy_GeV} GeV') + print(f' - eta ranges: {angle_settings}') +#+end_src + +#+begin_src jupyter-python + ## set ML configuration + kTrainSampleCap = args.cap_train_sample + kEpochs = args.epochs + kTestSize = .2 + kValidateSize = .1 + kTargetImbalance = args.target_imbalance + kPionWeightCap = 1.00 + kElectronLabel = 1 + kPionLabel = 0 + kModel = args.model + + print('ML configuration:') + print(f' - Number of epochs: {kEpochs}') + if kTrainSampleCap > 0: + print(f' - Training sample cap: {kTrainSampleCap}') + print(f' - Validation fraction: {kValidateSize}') + print(f' - Test fraction: {kTestSize}') + print(f' - Target pi:E imbalance: {kTargetImbalance}') + print(f' - Upper cap on pion weights: {kPionWeightCap}') + print(f' - Model: {kModel}') +#+end_src + +#+begin_src jupyter-python + def get_dimensions(df): + max_idx = df.index.max() + min_idx = df.index.min() + max_idx = np.array([v if type(v) != str else 0 for v in max_idx]) + min_idx = np.array([v if type(v) != str else 0 for v in min_idx]) + return {k: v for (k, v) in zip(('event', '_', 'layer', 'hit'), (max_idx - min_idx + 1))} + + ## boiler-plate for in-memory datasets + def make_dataset(fields): + dataset = tf.data.Dataset.from_tensor_slices(fields) + ## do magic to avoid shard warnings of operating on DATA instead of FILE + options = tf.data.Options() + options.experimental_distribute.auto_shard_policy = tf.data.experimental.AutoShardPolicy.DATA + return dataset.with_options(options) +#+end_src + +#+begin_src jupyter-python + ## Chaos CNN model + def build_old(input_shape, n_labels=2): + my_model = keras.Sequential([ + keras.layers.Conv2D(64, (3, 3), padding='same', activation='relu', input_shape=input_shape), + keras.layers.MaxPooling2D((2, 2), strides=2), + keras.layers.Dropout(0.25), + keras.layers.Conv2D(128, (2, 2), padding='same', activation='relu'), + keras.layers.MaxPooling2D((2, 2), strides=2), + keras.layers.Conv2D(64, (2, 2), padding='same', activation='relu'), + keras.layers.MaxPooling2D((2, 2), strides=2), + keras.layers.Dropout(0.25), + + keras.layers.Flatten(), + keras.layers.Dense(128, activation='relu'), + #keras.layers.Dropout(0.25), + keras.layers.Dense(32, activation='relu'), + keras.layers.Dense(n_labels, activation='softmax') + ]) + return my_model + + ## Slightly beefier VGG-style CNN + def build_vgg_v1(input_shape, n_labels=2): + my_model = keras.Sequential([ + keras.layers.Conv2D(64, kernel_size=(3, 3), activation='relu',padding='same',input_shape=input_shape), + keras.layers.Conv2D(64, kernel_size=(3, 3), activation='relu',padding='same'), + keras.layers.MaxPooling2D(pool_size=(2, 2),strides=2), + keras.layers.Conv2D(128, kernel_size=(3, 3), activation='relu'), + keras.layers.Conv2D(128, kernel_size=(3, 3), activation='relu'), + keras.layers.Conv2D(128, kernel_size=(3, 3), activation='relu'), + keras.layers.MaxPooling2D(pool_size=(2, 2),strides=2), + keras.layers.Flatten(), + keras.layers.Dense(1024, activation='relu'), + keras.layers.Dense(512, activation='relu'), + keras.layers.Dense(n_labels, activation='softmax') + ]) + + return my_model + + def build_vgg_v2(input_shape, n_labels=2): + my_model = keras.Sequential([ + keras.layers.Conv2D(64, kernel_size=(3, 3), activation='relu',padding='same',input_shape=input_shape), + keras.layers.Conv2D(64, kernel_size=(3, 3), activation='relu',padding='same'), + keras.layers.MaxPooling2D(pool_size=(2, 2),strides=2), + keras.layers.Conv2D(128, kernel_size=(3, 3), activation='relu'), + keras.layers.Conv2D(128, kernel_size=(3, 3), activation='relu'), + keras.layers.Conv2D(128, kernel_size=(3, 3), activation='relu'), + keras.layers.MaxPooling2D(pool_size=(2, 2),strides=2), + keras.layers.Flatten(), + keras.layers.Dense(1024, activation='relu'), + keras.layers.Dense(1024, activation='relu'), + keras.layers.Dense(n_labels, activation='softmax') + ]) + + return my_model +#+end_src + +#+begin_src jupyter-python + def build_model(input_shape, n_labels=2): + if kModel == 'old': + print(f'Building old') + return build_old(input_shape, n_labels) + elif kModel == 'vgg-v1': + print(f'Building vgg-v1') + return build_vgg_v1(input_shape, n_labels) + elif kModel == 'vgg-v2': + print(f'Building vgg-v2') + return build_vgg_v2(input_shape, n_labels) + print('Building default') + return build_vgg_v2(input_shape, n_labels) +#+end_src + +#+begin_src jupyter-python + angle_label=angle_settings[0] + print(angle_label) +#+end_src + +#+begin_src jupyter-python + datadir = f'/kaggle/input/results-45to135deg-1gev-data' + plotdir = f'/kaggle/working/plots/{angle_label}' + output_directory = f'/kaggle/working/output/{angle_label}/{energy_setting}' +#+end_src + +#+begin_src jupyter-python + print('\nprocessing angle setting:', angle_label) + print(f' - eta: {etas[angle_label]}') + + ## output directories + #output_directory = f'{args.workdir}/{angle_label}/{energy_setting}' + #plotdir = f'{output_directory}/plots' + #datadir = f'{output_directory}/data' + os.makedirs(plotdir, exist_ok=True) + os.makedirs(datadir, exist_ok=True) + print(f' - output data directory: {datadir}') + print(f' - output plot directory: {plotdir}') +#+end_src + +#+begin_src jupyter-python + print('Loading datasets: ') + print(f' - Loading {datadir}/hits.snappy.parquet') + df_data = pd.read_parquet(f'{datadir}/hits.snappy.parquet') + print(f' - Loading {datadir}/labels.snappy.parquet') + df_mc = pd.read_parquet(f'{datadir}/labels.snappy.parquet') +#+end_src + +#+begin_src jupyter-python + ## calculate weight to achieve target imbalance + n_electrons = np.sum(df_mc['PDG'] == 11) + n_pions = np.sum(df_mc['PDG'] == -211) + imbalance = n_pions/n_electrons + kSuggestedWeight = min(n_electrons/n_pions*kTargetImbalance, kPionWeightCap) + print(f'Data set has relative class imbalance of {n_electrons} : {n_pions} = {imbalance}') + print(f' - target imbalance: {kTargetImbalance}') + print(f' - pion weight upper limit: {kPionWeightCap:.2f}') + print(f' - suggested pion weight {kSuggestedWeight:.2f}') +#+end_src + +#+begin_src jupyter-python + ## Load E/P data again for aggregate statistics, and to calculate the target efficiency + print(f'Loading E/P data from {datadir}/EoverP_results.csv') + cutdf = pd.read_csv(f'{datadir}/EoverP_results.csv').sort_values('rejection', ascending=False) + results_EoverP = {key: cutdf[key][0] for key in cutdf.keys()} + results_EoverP['max_layer'] = int(results_EoverP['max_layer']) ## get rid of the int64 which causes trouble with json + kTargetEfficiencyML = kTargetEfficiency / results_EoverP['efficiency'] + print(results_EoverP) + print(f'Deduced target efficiency for ML: {kTargetEfficiencyML:.3f}') +#+end_src + +#+begin_src jupyter-python + print('Formatting data objects') + dim = get_dimensions(df_data) + xdata_both = df_data.values.reshape(dim['event'], + dim['layer'], + dim['hit'], + len(df_data.columns)).astype(np.float32) + + ldata = df_mc['PDG'].map(lambda pdg: kElectronLabel if (pdg == 11) else kPionLabel).values + wdata = df_mc['PDG'].map(lambda pdg: 1 if (pdg == 11) else kSuggestedWeight).values +#+end_src + +#+begin_src jupyter-python + print('Shuffling data and separating samples') + ## shuffle data + index = np.arange(len(ldata)) + np.random.shuffle(index) + tot_len = len(index) + + n_valid = floor(tot_len * kValidateSize) + n_test = floor(tot_len * kTestSize) + n_train = tot_len - n_valid - n_test + if kTrainSampleCap > 0 and n_train > kTrainSampleCap: + print(f'Capping training sample size to {kTrainSampleCap}') + valid_over_train = n_valid / n_train + test_over_train = n_test / n_train + n_train = kTrainSampleCap + n_valid = floor(valid_over_train * n_train) + n_test = floor(test_over_train * n_train) + tot_len = n_train + n_valid + n_test + print(f'Sample sizes: {{n_train: {n_train}, n_valid: {n_valid}, n_test: {n_test}}}') +#+end_src + +#+begin_src jupyter-python + id_valid = index[:n_valid] + id_test = index[n_valid:n_valid + n_test] + id_train = index[n_valid + n_test:tot_len] + xtrain, xvalid, xtest = xdata_both[id_train], xdata_both[id_valid], xdata_both[id_test] + ltrain, lvalid, ltest = ldata[id_train], ldata[id_valid], ldata[id_test] + wtrain, wvalid = wdata[id_train], wdata[id_valid] +#+end_src + +#+begin_src jupyter-python + print('Start training, using GPU resources') + gpu = tf.config.list_logical_devices('GPU') + strategy = tf.distribute.MirroredStrategy(gpu) if len(gpu) == 1 else tf.distribute.MirroredStrategy([gpu[0]]) + history = None + with strategy.scope(): + train_dataset = make_dataset((xtrain, ltrain, wtrain)) + valid_dataset = make_dataset((xvalid, lvalid, wvalid)) + + ## avoid warning that we are operating on DATA instead of FILE + options = tf.data.Options() + options.experimental_distribute.auto_shard_policy = tf.data.experimental.AutoShardPolicy.DATA + train_dataset = train_dataset.with_options(options) + valid_dataset = valid_dataset.with_options(options) + + model = build_model(input_shape=xtrain.shape[1:]) + model.compile(optimizer=keras.optimizers.Adam(learning_rate=1e-3), + loss=keras.losses.SparseCategoricalCrossentropy(from_logits=False), + weighted_metrics=['accuracy']) + history = model.fit(train_dataset.batch(2000), validation_data=valid_dataset.batch(1000), epochs=kEpochs) + os.makedirs(output_directory, exist_ok=True) +#+end_src + +#+begin_src jupyter-python + import keras.backend as K + # Monkey-patch the missing function to avoid the crash + K.set_learning_phase = lambda flag: None + + import tensorflow as tf + import tf2onnx + + # Load your Keras model + #model = tf.keras.models.load_model("/epi_separation/results/45to135deg/1GeV/data/cnn_model_30epochs.h5") + + # Define a function to capture the input signature + @tf.function(input_signature=[tf.TensorSpec(shape=[None, *model.input_shape[1:]], dtype=tf.float32)]) + def model_fn(input_tensor): + return model(input_tensor) + + # Convert to ONNX format + onnx_model, _ = tf2onnx.convert.from_function( + model_fn, + input_signature=[tf.TensorSpec(shape=[None, *model.input_shape[1:]], dtype=tf.float32)], # This is important + opset=13, + output_path=f"{output_directory}/EcalBarrel_pi_rejection.onnx" + ) + + print("Model converted successfully to ONNX format!") +#+end_src + +#+begin_src jupyter-python + print('Summarizing metrics') + fig, ax = plt.subplots(1, 2, figsize=(12,6)) + + ax[0].plot(history.history['loss']) + ax[0].plot(history.history['val_loss']) + ax[0].set_title('model loss') + ax[0].set_ylabel('loss') + ax[0].set_xlabel('epoch') + ax[0].legend(['train', 'validate'], loc='upper left') + + ax[1].plot(history.history['accuracy']) + ax[1].plot(history.history['val_accuracy']) + ax[1].set_title('accuracy') + ax[1].set_ylabel('accuracy') + ax[1].set_xlabel('epoch') + ax[1].legend(['train', 'validate'], loc='upper left') + ax[1].set_ylim(0, 1.1) + + fig.savefig(f'{plotdir}/ML_learning.pdf') +#+end_src + +#+begin_src jupyter-python + print('Bencmarking test data') + # benchmark + test_dataset = make_dataset((xtest,)) + prediction = model.predict(test_dataset.batch(1000)) +#+end_src + +#+begin_src jupyter-python + print('Calculate aggregate e-pi rejection metrics') + + def calculate_metrics(target_efficiency=kTargetEfficiencyML, export_prediction=True): + ## find the target efficiency cut point and weight the electron results + ## to move the cross-over point into pions to fit this efficiency + ## this code is specific to two particles where (P_e + P_pi = 1) + efficiency_cut = np.percentile(prediction[ltest == kElectronLabel].T[kElectronLabel], + (1 - target_efficiency)*100) + target_weight = (1 - efficiency_cut) / efficiency_cut + + prediction_weights = np.ones(2) + prediction_weights[kElectronLabel] = target_weight + prediction_labels = np.argmax(prediction * prediction_weights, axis=1) + + + electron_predicted = [None, None] + probabilities = np.zeros(shape=(2,2)) + for i in [kPionLabel, kElectronLabel]: + mask = (ltest == i) + probabilities[i] = np.bincount(prediction_labels[mask])/float(np.sum(mask)) + electron_predicted[i] = prediction[mask].T[kElectronLabel] + + binomial_error = lambda eff, n: np.sqrt(n * eff * (1 - eff)) / n + inverse_error = lambda val, err: err / val**2 + + n_electron_test = np.sum(ltest == kElectronLabel) + n_pion_test = np.sum(ltest == kPionLabel) + + results_ML = OrderedDict({'target_particle': 'e-', + 'target_weight': target_weight, + 'target_efficiency': target_efficiency, + 'target_cut': efficiency_cut, + 'n_electrons': int(n_electron_test), + 'n_pions': int(n_pion_test), + 'probabilities': probabilities.tolist(), + 'efficiency': probabilities[kElectronLabel, kElectronLabel], + 'efficiency_error': binomial_error(probabilities[kElectronLabel, kElectronLabel], n_electron_test), + 'rejection': 1 / probabilities[kPionLabel, kElectronLabel], + 'rejection_error': inverse_error(probabilities[kPionLabel, kElectronLabel], binomial_error(probabilities[kPionLabel, kElectronLabel], n_pion_test))}) + + ## calculate aggregate results from E/P + ML + results = OrderedDict({ + 'energy': energy_GeV, + 'eta': etas[angle_label], + 'angle': angle_label, + 'efficiency': results_EoverP['efficiency'] * results_ML['efficiency'], + 'efficiency_error': np.sqrt(results_EoverP['efficiency']**2 * results_ML['efficiency_error']**2 + + results_ML['efficiency']**2 * results_EoverP['efficiency_error']**2), + 'rejection': results_EoverP['rejection'] * results_ML['rejection'], + 'rejection_error': np.sqrt(results_EoverP['rejection']**2 * results_ML['rejection_error']**2 + + results_ML['rejection']**2 * results_EoverP['rejection_error']**2), + 'prob_cut': efficiency_cut, + 'EoverP': results_EoverP, + 'ML': results_ML}) + if export_prediction: + return results, electron_predicted + return results +#+end_src + +#+begin_src jupyter-python + results, electron_predicted = calculate_metrics() + results_ML = results['ML'] + test = electron_predicted + print(f'Calculating alternative target efficiency scenarios: {kAlternativeEfficiencies}') + results['scenarios'] = {} + for alternative_eff in kAlternativeEfficiencies: + target_eff_ml = alternative_eff / results_EoverP['efficiency'] + tmp_res = calculate_metrics(target_efficiency=target_eff_ml, export_prediction=False) + results['scenarios'][alternative_eff] = tmp_res +#+end_src + +#+begin_src jupyter-python + assert test is electron_predicted + + with open(f'{output_directory}/results.json', 'w') as f: + f.write(json.dumps(results, indent=2)) + print(f' - Found overal rejection {results["rejection"]:.2f} at {results["efficiency"]:.2f} efficiency') + print(f' - Results written to {datadir}/results.json') +#+end_src + +#+begin_src jupyter-python + print('Plotting ML results') + # default color cycle of matplotlib + prop_cycle = plt.rcParams['axes.prop_cycle'] + colors = prop_cycle.by_key()['color'] + box_props = dict(boxstyle='round', facecolor='white', alpha=0.5) + + parts = {kElectronLabel: r'e^-', kPionLabel: r'\pi^-'} + + fig, ax = plt.subplots(figsize=(12, 9), dpi=160) + effs = [] + for i in parts.keys(): + ax.hist(electron_predicted[i], bins=np.linspace(0, 1, 101), label='${}$'.format(parts[i]), + color=colors[i], ec=colors[i], alpha=0.5) + ax.axvline(x=results['prob_cut'], lw=2, color='k', ls='--') + eff_text = '\n'.join([r'$\epsilon_{{ML}}^{{e^-}} = {:.2f}$%'.format(results_ML['efficiency'] * 100.), + r'$R_{{ML}}^{{\pi^-}} = {:.1f}$'.format(results_ML['rejection']), + r'$\epsilon_{{E/p}}^{{e^-}} = {:.2f}$%'.format(results_EoverP['efficiency'] * 100.), + r'$R_{{E/p}}^{{\pi^-}} = {:.1f}$'.format(results_EoverP['rejection']) + ]) + data_to_axis = (ax.transAxes + ax.transData.inverted()).inverted() + ax.text(data_to_axis.transform((results['prob_cut'], 1))[0] + 0.01, 0.99, eff_text, fontsize=24, + transform=ax.transAxes, ha='left', va='top') + ax.set_yscale('log') + ax.set_ylabel('Counts', fontsize=24) + ax.set_xlabel(r'$P_{{{}}}$'.format(r'e^-'), fontsize=24) + ax.tick_params(direction='in', which='both', labelsize=24) + ax.legend(fontsize=24, ncol=4, loc='upper center', bbox_to_anchor=(0.5, 1.12),) + ax.text(0.05, .99, '\n'.join( + [r'{energy} at ${loc}$'.format(energy='1GeV', + loc=f'eta = {etas[angle_label]}'), + r'$R_{{\pi}} = {rejection:.1f}$ at $\epsilon_{{e^-}} = {efficiency:.2f}$%'.format( + rejection=results_EoverP['rejection'] * results_ML['rejection'], + efficiency=results_EoverP['efficiency'] * results_ML['efficiency'] * 100.)]), + ha='left', va='top', fontsize=24, transform=ax.transAxes) + fig.savefig(f'{plotdir}/ML_rejection.pdf') + + print('Done with this eta bin') +#+end_src \ No newline at end of file diff --git a/benchmarks/calo_pid/config.yml b/benchmarks/calo_pid/config.yml index 88410a49..c781cb40 100644 --- a/benchmarks/calo_pid/config.yml +++ b/benchmarks/calo_pid/config.yml @@ -4,6 +4,10 @@ sim:calo_pid: parallel: matrix: - PARTICLE: ["e-", "pi-"] + ANGLE: [ + "45to135deg", + "130to177deg" + ] INDEX_RANGE: [ "0 9", "10 19", @@ -19,7 +23,7 @@ sim:calo_pid: script: - | snakemake $SNAKEMAKE_FLAGS --cores $MAX_CORES_PER_JOB \ - $(seq --format="sim_output/calo_pid/epic_inner_detector/${PARTICLE}/100MeVto20GeV/130to177deg/${PARTICLE}_100MeVto20GeV_130to177deg.%04.f.eicrecon.edm4eic.root" ${INDEX_RANGE}) + $(seq --format="sim_output/calo_pid/epic_inner_detector/${PARTICLE}/100MeVto20GeV/${ANGLE}/${PARTICLE}_100MeVto20GeV_${ANGLE}.%04.f.eicrecon.edm4eic.root" ${INDEX_RANGE}) bench:calo_pid: extends: .det_benchmark From b81a623dd8caf811649e3d009c5b0a330a1dd30c Mon Sep 17 00:00:00 2001 From: Tomas Sosa Date: Sun, 8 Mar 2026 21:07:29 -0500 Subject: [PATCH 02/58] Add bic pid benchmark --- benchmarks/bic_pid/Snakefile | 106 ++++++++++++++++++ .../calo_pid_bic.org => bic_pid/bic_pid.org} | 0 benchmarks/bic_pid/config.yml | 57 ++++++++++ benchmarks/bic_pid/requirements.txt | 7 ++ benchmarks/calo_pid/config.yml | 6 +- 5 files changed, 171 insertions(+), 5 deletions(-) create mode 100644 benchmarks/bic_pid/Snakefile rename benchmarks/{calo_pid/calo_pid_bic.org => bic_pid/bic_pid.org} (100%) create mode 100644 benchmarks/bic_pid/config.yml create mode 100644 benchmarks/bic_pid/requirements.txt diff --git a/benchmarks/bic_pid/Snakefile b/benchmarks/bic_pid/Snakefile new file mode 100644 index 00000000..c734be29 --- /dev/null +++ b/benchmarks/bic_pid/Snakefile @@ -0,0 +1,106 @@ +def format_energy_for_dd4hep(s): + return s.rstrip("kMGeV") + "*" + s.lstrip("0123456789") + + +rule bic_pid_sim: + input: + warmup="warmup.edm4hep.root", + geometry_lib=find_epic_libraries(), + output: + "sim_output/bic_pid/{DETECTOR_CONFIG}/{PARTICLE}/{ENERGY}/{THETA_MIN}to{THETA_MAX}deg/{PARTICLE}_{ENERGY}_{THETA_MIN}to{THETA_MAX}deg.{INDEX}.edm4hep.root", + log: + "sim_output/bic_pid/{DETECTOR_CONFIG}/{PARTICLE}/{ENERGY}/{THETA_MIN}to{THETA_MAX}deg/{PARTICLE}_{ENERGY}_{THETA_MIN}to{THETA_MAX}deg.{INDEX}.edm4hep.root.log", + wildcard_constraints: + PARTICLE="(e-|pi-)", + ENERGY="[0-9]+[kMG]eV", + THETA_MIN="[0-9]+", + THETA_MAX="[0-9]+", + INDEX=r"\d{4}", + params: + N_EVENTS=1000, + SEED=lambda wildcards: "1" + wildcards.INDEX, + DETECTOR_PATH=os.environ["DETECTOR_PATH"], + DETECTOR_CONFIG=lambda wildcards: wildcards.DETECTOR_CONFIG, + ENERGY=lambda wildcards: format_energy_for_dd4hep(wildcards.ENERGY), + THETA_MIN=lambda wildcards: wildcards.THETA_MIN, + THETA_MAX=lambda wildcards: wildcards.THETA_MAX, + DD4HEP_HASH=get_spack_package_hash("dd4hep"), + NPSIM_HASH=get_spack_package_hash("npsim"), + cache: True + shell: + """ +set -m # monitor mode to prevent lingering processes +exec npsim \ + --runType batch \ + --enableGun \ + --gun.momentumMin "{params.ENERGY}" \ + --gun.momentumMax "{params.ENERGY}" \ + --gun.thetaMin "{wildcards.THETA_MIN}*deg" \ + --gun.thetaMax "{wildcards.THETA_MAX}*deg" \ + --gun.particle {wildcards.PARTICLE} \ + --gun.distribution eta \ + --random.seed {params.SEED} \ + --filter.tracker edep0 \ + -v WARNING \ + --numberOfEvents {params.N_EVENTS} \ + --compactFile {params.DETECTOR_PATH}/{params.DETECTOR_CONFIG}.xml \ + --outputFile {output} +""" + + +rule bic_pid_recon: + input: + sim="sim_output/bic_pid/{DETECTOR_CONFIG}/{PARTICLE}/{ENERGY}/{PHASE_SPACE}/{PARTICLE}_{ENERGY}_{PHASE_SPACE}.{INDEX}.edm4hep.root", + warmup="warmup.edm4hep.root", + output: + "sim_output/bic_pid/{DETECTOR_CONFIG}/{PARTICLE}/{ENERGY}/{PHASE_SPACE}/{PARTICLE}_{ENERGY}_{PHASE_SPACE}.{INDEX}.eicrecon.edm4eic.root", + log: + "sim_output/bic_pid/{DETECTOR_CONFIG}/{PARTICLE}/{ENERGY}/{PHASE_SPACE}/{PARTICLE}_{ENERGY}_{PHASE_SPACE}.{INDEX}.eicrecon.edm4eic.root.log", + wildcard_constraints: + INDEX=r"\d{4}", + params: + DETECTOR_CONFIG=lambda wildcards: wildcards.DETECTOR_CONFIG, + EICRECON_HASH=get_spack_package_hash("eicrecon"), + cache: True + shell: + """ +DETECTOR_CONFIG={wildcards.DETECTOR_CONFIG} \ +exec eicrecon {input.sim} -Ppodio:output_file={output} \ + -Ppodio:output_collections=MCParticles,EcalBarrelScFiRecHits,EcalBarrelImagingRecHits +""" + + +rule bic_pid_input_list: + input: + electrons=expand( + "sim_output/bic_pid/{{DETECTOR_CONFIG}}/{{PARTICLE}}/{ENERGY}/{PHASE_SPACE}/{{PARTICLE}}_{ENERGY}_{PHASE_SPACE}.{INDEX:04d}.eicrecon.edm4eic.root", + ENERGY=["1GeV"], + PHASE_SPACE=["45to135deg"], + INDEX=range(100), + ), + output: + "listing/bic_pid/{DETECTOR_CONFIG}/{PARTICLE}.lst", + run: + with open(output[0], "wt") as fp: + fp.write("\n".join(input)) + + +rule bic_pid: + input: + electrons="listing/bic_pid/{DETECTOR_CONFIG}/e-.lst", + pions="listing/bic_pid/{DETECTOR_CONFIG}/pi-.lst", + matplotlibrc=".matplotlibrc", + script="benchmarks/bic_pid/bic_pid.py", + output: + directory("results/{DETECTOR_CONFIG}/bic_pid") + shell: + """ +env \ +MATPLOTLIBRC={input.matplotlibrc} \ +DETECTOR_CONFIG={wildcards.DETECTOR_CONFIG} \ +PLOT_TITLE={wildcards.DETECTOR_CONFIG} \ +INPUT_ELECTRONS="{input.electrons}" \ +INPUT_PIONS="{input.pions}" \ +OUTPUT_DIR={output} \ +python {input.script} +""" \ No newline at end of file diff --git a/benchmarks/calo_pid/calo_pid_bic.org b/benchmarks/bic_pid/bic_pid.org similarity index 100% rename from benchmarks/calo_pid/calo_pid_bic.org rename to benchmarks/bic_pid/bic_pid.org diff --git a/benchmarks/bic_pid/config.yml b/benchmarks/bic_pid/config.yml new file mode 100644 index 00000000..cda68261 --- /dev/null +++ b/benchmarks/bic_pid/config.yml @@ -0,0 +1,57 @@ +sim:bic_pid: + extends: .det_benchmark + stage: simulate + image: $(BENCHMARKS_REGISTRY)$/eic_ci$(BENCHMARKS_SIGIL)$(BENCHMARKS_TAG) + parallel: + matrix: + - PARTICLE: ["e-", "pi-"] + ANGLE: [ + "45to135deg" + ] + ENERGY: [ + "1GeV" + ] + INDEX_RANGE: [ + "0 9", + "10 19", + "20 29", + "30 39", + "40 49", + "50 59", + "60 69", + "70 79", + "80 89", + "90 99", + ] + script: + - | + snakemake $SNAKEMAKE_FLAGS --cores $MAX_CORES_PER_JOB \ + $(seq --format="sim_output/bic_pid/epic_inner_detector/${PARTICLE}/${ENERGY}/${ANGLE}/${PARTICLE}_${ENERGY}_${ANGLE}.%04.f.eicrecon.edm4eic.root" ${INDEX_RANGE}) + +bench:bic_pid: + extends: .det_benchmark + stage: benchmarks + needs: + - ["sim:bic_pid"] + image: $(BENCHMARKS_REGISTRY)$/eic_tf$(BENCHMARKS_SIGIL)$(BENCHMARKS_TAG) + variables: + CUDA_VISIBLE_DEVICES: "" + script: + - export PYTHONUSERBASE=$LOCAL_DATA_PATH/deps + - export PATH=$PYTHONUSERBASE/bin:$PATH + - python -m pip install --user snakemake + - python -m pip install --user -r benchmarks/bic_pid/requirements.txt + - snakemake $SNAKEMAKE_FLAGS --cores 1 results/epic_inner_detector/bic_pid + +collect_results:bic_pid: + extends: .det_benchmark + stage: collect + needs: + - "bench:bic_pid" + when: always + image: $(BENCHMARKS_REGISTRY)$/eic_ci$(BENCHMARKS_SIGIL)$(BENCHMARKS_TAG) + script: + - ls -lrht + - mv results{,_save}/ # move results directory out of the way to preserve it + - snakemake $SNAKEMAKE_FLAGS --cores 1 --delete-all-output results/epic_inner_detector/bic_pid + - mv results{_save,}/ \ No newline at end of file diff --git a/benchmarks/bic_pid/requirements.txt b/benchmarks/bic_pid/requirements.txt new file mode 100644 index 00000000..d32037a3 --- /dev/null +++ b/benchmarks/bic_pid/requirements.txt @@ -0,0 +1,7 @@ +awkward >= 2.4.0 +catboost +onnx +scikit-learn +uproot >= 5.2.0 +vector +tf2onnx \ No newline at end of file diff --git a/benchmarks/calo_pid/config.yml b/benchmarks/calo_pid/config.yml index c781cb40..88410a49 100644 --- a/benchmarks/calo_pid/config.yml +++ b/benchmarks/calo_pid/config.yml @@ -4,10 +4,6 @@ sim:calo_pid: parallel: matrix: - PARTICLE: ["e-", "pi-"] - ANGLE: [ - "45to135deg", - "130to177deg" - ] INDEX_RANGE: [ "0 9", "10 19", @@ -23,7 +19,7 @@ sim:calo_pid: script: - | snakemake $SNAKEMAKE_FLAGS --cores $MAX_CORES_PER_JOB \ - $(seq --format="sim_output/calo_pid/epic_inner_detector/${PARTICLE}/100MeVto20GeV/${ANGLE}/${PARTICLE}_100MeVto20GeV_${ANGLE}.%04.f.eicrecon.edm4eic.root" ${INDEX_RANGE}) + $(seq --format="sim_output/calo_pid/epic_inner_detector/${PARTICLE}/100MeVto20GeV/130to177deg/${PARTICLE}_100MeVto20GeV_130to177deg.%04.f.eicrecon.edm4eic.root" ${INDEX_RANGE}) bench:calo_pid: extends: .det_benchmark From cbccb2a16fe829a39f1a2326df09d2c9212ad0ed Mon Sep 17 00:00:00 2001 From: Tomas Sosa Date: Mon, 9 Mar 2026 13:56:49 -0500 Subject: [PATCH 03/58] Fix some things in bic_pid --- benchmarks/bic_pid/bic_pid.org | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/benchmarks/bic_pid/bic_pid.org b/benchmarks/bic_pid/bic_pid.org index 1c4959c5..dd975c49 100644 --- a/benchmarks/bic_pid/bic_pid.org +++ b/benchmarks/bic_pid/bic_pid.org @@ -197,9 +197,9 @@ #+end_src #+begin_src jupyter-python - datadir = f'/kaggle/input/results-45to135deg-1gev-data' - plotdir = f'/kaggle/working/plots/{angle_label}' - output_directory = f'/kaggle/working/output/{angle_label}/{energy_setting}' + #datadir = f'/kaggle/input/results-45to135deg-1gev-data' + #plotdir = f'/kaggle/working/plots/{angle_label}' + #output_directory = f'/kaggle/working/output/{angle_label}/{energy_setting}' #+end_src #+begin_src jupyter-python @@ -207,9 +207,9 @@ print(f' - eta: {etas[angle_label]}') ## output directories - #output_directory = f'{args.workdir}/{angle_label}/{energy_setting}' - #plotdir = f'{output_directory}/plots' - #datadir = f'{output_directory}/data' + output_directory = f'{args.workdir}/{angle_label}/{energy_setting}' + plotdir = f'{output_directory}/plots' + datadir = f'{output_directory}/data' os.makedirs(plotdir, exist_ok=True) os.makedirs(datadir, exist_ok=True) print(f' - output data directory: {datadir}') @@ -362,7 +362,7 @@ #+end_src #+begin_src jupyter-python - print('Bencmarking test data') + print('Benchmarking test data') # benchmark test_dataset = make_dataset((xtest,)) prediction = model.predict(test_dataset.batch(1000)) @@ -445,8 +445,8 @@ with open(f'{output_directory}/results.json', 'w') as f: f.write(json.dumps(results, indent=2)) - print(f' - Found overal rejection {results["rejection"]:.2f} at {results["efficiency"]:.2f} efficiency') - print(f' - Results written to {datadir}/results.json') + print(f' - Found overall rejection {results["rejection"]:.2f} at {results["efficiency"]:.2f} efficiency') + print(f' - Results written to {output_directory}/results.json') #+end_src #+begin_src jupyter-python From 1d75227e534cdbf3e4bce3b75404b8e0b46ae963 Mon Sep 17 00:00:00 2001 From: Tomas Sosa Date: Sun, 29 Mar 2026 18:44:48 -0500 Subject: [PATCH 04/58] Include bic_pid in execution --- .gitlab-ci.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index abb866c7..42ab0af1 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -128,6 +128,7 @@ include: - local: 'benchmarks/backwards_ecal/config.yml' - local: 'benchmarks/beamline/config.yml' - local: 'benchmarks/calo_pid/config.yml' + - local: 'benchmarks/bic_pid/config.yml' - local: 'benchmarks/campaign/config.yml' - local: 'benchmarks/ecal_gaps/config.yml' - local: 'benchmarks/far_forward_dvcs/config.yml' From 95242e965c9bfdf57d706b5083b5c5423a8967ac Mon Sep 17 00:00:00 2001 From: Tomas Sosa Date: Sun, 29 Mar 2026 22:10:21 -0500 Subject: [PATCH 05/58] Fix the shell command substitution --- benchmarks/bic_pid/config.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/benchmarks/bic_pid/config.yml b/benchmarks/bic_pid/config.yml index cda68261..d7e91459 100644 --- a/benchmarks/bic_pid/config.yml +++ b/benchmarks/bic_pid/config.yml @@ -1,7 +1,7 @@ sim:bic_pid: extends: .det_benchmark stage: simulate - image: $(BENCHMARKS_REGISTRY)$/eic_ci$(BENCHMARKS_SIGIL)$(BENCHMARKS_TAG) + image: $BENCHMARKS_REGISTRY/eic_ci$BENCHMARKS_SIGIL$BENCHMARKS_TAG parallel: matrix: - PARTICLE: ["e-", "pi-"] @@ -33,7 +33,7 @@ bench:bic_pid: stage: benchmarks needs: - ["sim:bic_pid"] - image: $(BENCHMARKS_REGISTRY)$/eic_tf$(BENCHMARKS_SIGIL)$(BENCHMARKS_TAG) + image: $BENCHMARKS_REGISTRY/eic_tf$BENCHMARKS_SIGIL$BENCHMARKS_TAG variables: CUDA_VISIBLE_DEVICES: "" script: @@ -49,7 +49,7 @@ collect_results:bic_pid: needs: - "bench:bic_pid" when: always - image: $(BENCHMARKS_REGISTRY)$/eic_ci$(BENCHMARKS_SIGIL)$(BENCHMARKS_TAG) + image: $BENCHMARKS_REGISTRY/eic_ci$BENCHMARKS_SIGIL$BENCHMARKS_TAG script: - ls -lrht - mv results{,_save}/ # move results directory out of the way to preserve it From 35989a06639b3a5e55a266f62db33b3dad8e85c3 Mon Sep 17 00:00:00 2001 From: Tomas Sosa Date: Sun, 29 Mar 2026 22:28:39 -0500 Subject: [PATCH 06/58] Add new change in config --- benchmarks/bic_pid/config.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/benchmarks/bic_pid/config.yml b/benchmarks/bic_pid/config.yml index d7e91459..0bf4b5f1 100644 --- a/benchmarks/bic_pid/config.yml +++ b/benchmarks/bic_pid/config.yml @@ -26,7 +26,7 @@ sim:bic_pid: script: - | snakemake $SNAKEMAKE_FLAGS --cores $MAX_CORES_PER_JOB \ - $(seq --format="sim_output/bic_pid/epic_inner_detector/${PARTICLE}/${ENERGY}/${ANGLE}/${PARTICLE}_${ENERGY}_${ANGLE}.%04.f.eicrecon.edm4eic.root" ${INDEX_RANGE}) + $(seq --format="sim_output/bic_pid/${DETECTOR_CONFIG}/${PARTICLE}/${ENERGY}/${ANGLE}/${PARTICLE}_${ENERGY}_${ANGLE}.%04.f.eicrecon.edm4eic.root" ${INDEX_RANGE}) bench:bic_pid: extends: .det_benchmark @@ -41,7 +41,7 @@ bench:bic_pid: - export PATH=$PYTHONUSERBASE/bin:$PATH - python -m pip install --user snakemake - python -m pip install --user -r benchmarks/bic_pid/requirements.txt - - snakemake $SNAKEMAKE_FLAGS --cores 1 results/epic_inner_detector/bic_pid + - snakemake $SNAKEMAKE_FLAGS --cores 1 results/${DETECTOR_CONFIG}/bic_pid collect_results:bic_pid: extends: .det_benchmark @@ -52,6 +52,6 @@ collect_results:bic_pid: image: $BENCHMARKS_REGISTRY/eic_ci$BENCHMARKS_SIGIL$BENCHMARKS_TAG script: - ls -lrht - - mv results{,_save}/ # move results directory out of the way to preserve it - - snakemake $SNAKEMAKE_FLAGS --cores 1 --delete-all-output results/epic_inner_detector/bic_pid + - mv results{,_save}/ + - snakemake $SNAKEMAKE_FLAGS --cores 1 --delete-all-output results/${DETECTOR_CONFIG}/bic_pid - mv results{_save,}/ \ No newline at end of file From 5ca3769ac180f20fcdb0332c8e484fae0f61bf2b Mon Sep 17 00:00:00 2001 From: Tomas Sosa Date: Sun, 29 Mar 2026 22:46:46 -0500 Subject: [PATCH 07/58] Add new correction to the code --- benchmarks/bic_pid/Snakefile | 30 ++++++++++++++++++++---------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/benchmarks/bic_pid/Snakefile b/benchmarks/bic_pid/Snakefile index c734be29..587982a4 100644 --- a/benchmarks/bic_pid/Snakefile +++ b/benchmarks/bic_pid/Snakefile @@ -2,19 +2,26 @@ def format_energy_for_dd4hep(s): return s.rstrip("kMGeV") + "*" + s.lstrip("0123456789") +def theta_min_from_phase_space(s): + return s.replace("deg", "").split("to")[0] + + +def theta_max_from_phase_space(s): + return s.replace("deg", "").split("to")[1] + + rule bic_pid_sim: input: warmup="warmup.edm4hep.root", geometry_lib=find_epic_libraries(), output: - "sim_output/bic_pid/{DETECTOR_CONFIG}/{PARTICLE}/{ENERGY}/{THETA_MIN}to{THETA_MAX}deg/{PARTICLE}_{ENERGY}_{THETA_MIN}to{THETA_MAX}deg.{INDEX}.edm4hep.root", + "sim_output/bic_pid/{DETECTOR_CONFIG}/{PARTICLE}/{ENERGY}/{PHASE_SPACE}/{PARTICLE}_{ENERGY}_{PHASE_SPACE}.{INDEX}.edm4hep.root", log: - "sim_output/bic_pid/{DETECTOR_CONFIG}/{PARTICLE}/{ENERGY}/{THETA_MIN}to{THETA_MAX}deg/{PARTICLE}_{ENERGY}_{THETA_MIN}to{THETA_MAX}deg.{INDEX}.edm4hep.root.log", + "sim_output/bic_pid/{DETECTOR_CONFIG}/{PARTICLE}/{ENERGY}/{PHASE_SPACE}/{PARTICLE}_{ENERGY}_{PHASE_SPACE}.{INDEX}.edm4hep.root.log", wildcard_constraints: PARTICLE="(e-|pi-)", ENERGY="[0-9]+[kMG]eV", - THETA_MIN="[0-9]+", - THETA_MAX="[0-9]+", + PHASE_SPACE="[0-9]+to[0-9]+deg", INDEX=r"\d{4}", params: N_EVENTS=1000, @@ -22,8 +29,8 @@ rule bic_pid_sim: DETECTOR_PATH=os.environ["DETECTOR_PATH"], DETECTOR_CONFIG=lambda wildcards: wildcards.DETECTOR_CONFIG, ENERGY=lambda wildcards: format_energy_for_dd4hep(wildcards.ENERGY), - THETA_MIN=lambda wildcards: wildcards.THETA_MIN, - THETA_MAX=lambda wildcards: wildcards.THETA_MAX, + THETA_MIN=lambda wildcards: theta_min_from_phase_space(wildcards.PHASE_SPACE), + THETA_MAX=lambda wildcards: theta_max_from_phase_space(wildcards.PHASE_SPACE), DD4HEP_HASH=get_spack_package_hash("dd4hep"), NPSIM_HASH=get_spack_package_hash("npsim"), cache: True @@ -35,8 +42,8 @@ exec npsim \ --enableGun \ --gun.momentumMin "{params.ENERGY}" \ --gun.momentumMax "{params.ENERGY}" \ - --gun.thetaMin "{wildcards.THETA_MIN}*deg" \ - --gun.thetaMax "{wildcards.THETA_MAX}*deg" \ + --gun.thetaMin "{params.THETA_MIN}*deg" \ + --gun.thetaMax "{params.THETA_MAX}*deg" \ --gun.particle {wildcards.PARTICLE} \ --gun.distribution eta \ --random.seed {params.SEED} \ @@ -57,6 +64,9 @@ rule bic_pid_recon: log: "sim_output/bic_pid/{DETECTOR_CONFIG}/{PARTICLE}/{ENERGY}/{PHASE_SPACE}/{PARTICLE}_{ENERGY}_{PHASE_SPACE}.{INDEX}.eicrecon.edm4eic.root.log", wildcard_constraints: + PARTICLE="(e-|pi-)", + ENERGY="[0-9]+[kMG]eV", + PHASE_SPACE="[0-9]+to[0-9]+deg", INDEX=r"\d{4}", params: DETECTOR_CONFIG=lambda wildcards: wildcards.DETECTOR_CONFIG, @@ -72,7 +82,7 @@ exec eicrecon {input.sim} -Ppodio:output_file={output} \ rule bic_pid_input_list: input: - electrons=expand( + files=expand( "sim_output/bic_pid/{{DETECTOR_CONFIG}}/{{PARTICLE}}/{ENERGY}/{PHASE_SPACE}/{{PARTICLE}}_{ENERGY}_{PHASE_SPACE}.{INDEX:04d}.eicrecon.edm4eic.root", ENERGY=["1GeV"], PHASE_SPACE=["45to135deg"], @@ -82,7 +92,7 @@ rule bic_pid_input_list: "listing/bic_pid/{DETECTOR_CONFIG}/{PARTICLE}.lst", run: with open(output[0], "wt") as fp: - fp.write("\n".join(input)) + fp.write("\n".join(input.files)) rule bic_pid: From 859a327bb08cf2e16f125d86c48ba10ec975563d Mon Sep 17 00:00:00 2001 From: Tomas Sosa Date: Sun, 5 Apr 2026 09:16:51 -0500 Subject: [PATCH 08/58] A correction in the Snakefile of the repo (add bic_pid snakefile) --- Snakefile | 1 + 1 file changed, 1 insertion(+) diff --git a/Snakefile b/Snakefile index 59443fe0..16e2fbe6 100644 --- a/Snakefile +++ b/Snakefile @@ -48,6 +48,7 @@ include: "benchmarks/backwards_ecal/Snakefile" include: "benchmarks/barrel_ecal/Snakefile" include: "benchmarks/beamline/Snakefile" include: "benchmarks/calo_pid/Snakefile" +include: "benchmarks/bic_pid/Snakefile" include: "benchmarks/campaign/Snakefile" include: "benchmarks/ecal_gaps/Snakefile" include: "benchmarks/far_forward_dvcs/Snakefile" From d440bbeb8443f6f61b9f767d98e143d837d89ad8 Mon Sep 17 00:00:00 2001 From: Tomas Sosa Date: Sun, 5 Apr 2026 13:03:45 -0500 Subject: [PATCH 09/58] Add some corrections in the Snakefile and config.yml for the errors in the job --- benchmarks/bic_pid/Snakefile | 8 ++++---- benchmarks/bic_pid/config.yml | 5 ++++- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/benchmarks/bic_pid/Snakefile b/benchmarks/bic_pid/Snakefile index 587982a4..2afa3496 100644 --- a/benchmarks/bic_pid/Snakefile +++ b/benchmarks/bic_pid/Snakefile @@ -35,9 +35,9 @@ rule bic_pid_sim: NPSIM_HASH=get_spack_package_hash("npsim"), cache: True shell: - """ -set -m # monitor mode to prevent lingering processes -exec npsim \ + r""" +set -m +npsim \ --runType batch \ --enableGun \ --gun.momentumMin "{params.ENERGY}" \ @@ -51,7 +51,7 @@ exec npsim \ -v WARNING \ --numberOfEvents {params.N_EVENTS} \ --compactFile {params.DETECTOR_PATH}/{params.DETECTOR_CONFIG}.xml \ - --outputFile {output} + --outputFile {output} > {log} 2>&1 """ diff --git a/benchmarks/bic_pid/config.yml b/benchmarks/bic_pid/config.yml index 0bf4b5f1..5660c546 100644 --- a/benchmarks/bic_pid/config.yml +++ b/benchmarks/bic_pid/config.yml @@ -24,8 +24,9 @@ sim:bic_pid: "90 99", ] script: + - export DETECTOR_CONFIG=epic_craterlake - | - snakemake $SNAKEMAKE_FLAGS --cores $MAX_CORES_PER_JOB \ + snakemake $SNAKEMAKE_FLAGS --cores 1 \ $(seq --format="sim_output/bic_pid/${DETECTOR_CONFIG}/${PARTICLE}/${ENERGY}/${ANGLE}/${PARTICLE}_${ENERGY}_${ANGLE}.%04.f.eicrecon.edm4eic.root" ${INDEX_RANGE}) bench:bic_pid: @@ -37,6 +38,7 @@ bench:bic_pid: variables: CUDA_VISIBLE_DEVICES: "" script: + - export DETECTOR_CONFIG=epic_craterlake - export PYTHONUSERBASE=$LOCAL_DATA_PATH/deps - export PATH=$PYTHONUSERBASE/bin:$PATH - python -m pip install --user snakemake @@ -51,6 +53,7 @@ collect_results:bic_pid: when: always image: $BENCHMARKS_REGISTRY/eic_ci$BENCHMARKS_SIGIL$BENCHMARKS_TAG script: + - export DETECTOR_CONFIG=epic_craterlake - ls -lrht - mv results{,_save}/ - snakemake $SNAKEMAKE_FLAGS --cores 1 --delete-all-output results/${DETECTOR_CONFIG}/bic_pid From e4588b5029c5282c086fc4ce25730e872b3b495f Mon Sep 17 00:00:00 2001 From: Tomas Sosa Date: Sun, 5 Apr 2026 22:24:18 -0500 Subject: [PATCH 10/58] Fix config.yml to solve the problem of the bench:bic_pid --- benchmarks/bic_pid/config.yml | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/benchmarks/bic_pid/config.yml b/benchmarks/bic_pid/config.yml index 5660c546..cf9e746d 100644 --- a/benchmarks/bic_pid/config.yml +++ b/benchmarks/bic_pid/config.yml @@ -2,15 +2,13 @@ sim:bic_pid: extends: .det_benchmark stage: simulate image: $BENCHMARKS_REGISTRY/eic_ci$BENCHMARKS_SIGIL$BENCHMARKS_TAG + variables: + DETECTOR_CONFIG: epic_craterlake parallel: matrix: - PARTICLE: ["e-", "pi-"] - ANGLE: [ - "45to135deg" - ] - ENERGY: [ - "1GeV" - ] + ANGLE: ["45to135deg"] + ENERGY: ["1GeV"] INDEX_RANGE: [ "0 9", "10 19", @@ -24,7 +22,6 @@ sim:bic_pid: "90 99", ] script: - - export DETECTOR_CONFIG=epic_craterlake - | snakemake $SNAKEMAKE_FLAGS --cores 1 \ $(seq --format="sim_output/bic_pid/${DETECTOR_CONFIG}/${PARTICLE}/${ENERGY}/${ANGLE}/${PARTICLE}_${ENERGY}_${ANGLE}.%04.f.eicrecon.edm4eic.root" ${INDEX_RANGE}) @@ -37,8 +34,17 @@ bench:bic_pid: image: $BENCHMARKS_REGISTRY/eic_tf$BENCHMARKS_SIGIL$BENCHMARKS_TAG variables: CUDA_VISIBLE_DEVICES: "" + DETECTOR_CONFIG: epic_craterlake + before_script: + - source .local/bin/env.sh + - ls -lrtha + - ln -s "${LOCAL_DATA_PATH}/sim_output" sim_output + - mkdir -p "${DETECTOR_CONFIG}" + - ln -s "${LOCAL_DATA_PATH}/sim_output" "${DETECTOR_CONFIG}/sim_output" + - ln -s "../results" "${DETECTOR_CONFIG}/results" + - mkdir -p "$SNAKEMAKE_OUTPUT_CACHE" + - ls -lrtha script: - - export DETECTOR_CONFIG=epic_craterlake - export PYTHONUSERBASE=$LOCAL_DATA_PATH/deps - export PATH=$PYTHONUSERBASE/bin:$PATH - python -m pip install --user snakemake @@ -52,8 +58,9 @@ collect_results:bic_pid: - "bench:bic_pid" when: always image: $BENCHMARKS_REGISTRY/eic_ci$BENCHMARKS_SIGIL$BENCHMARKS_TAG + variables: + DETECTOR_CONFIG: epic_craterlake script: - - export DETECTOR_CONFIG=epic_craterlake - ls -lrht - mv results{,_save}/ - snakemake $SNAKEMAKE_FLAGS --cores 1 --delete-all-output results/${DETECTOR_CONFIG}/bic_pid From 6af00e145cbb91572f848d91c063b099891321b0 Mon Sep 17 00:00:00 2001 From: Tomas Sosa Date: Sun, 5 Apr 2026 23:48:46 -0500 Subject: [PATCH 11/58] Fix bench:bic_pid to solve the user authorization problme --- benchmarks/bic_pid/config.yml | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/benchmarks/bic_pid/config.yml b/benchmarks/bic_pid/config.yml index cf9e746d..e53aff61 100644 --- a/benchmarks/bic_pid/config.yml +++ b/benchmarks/bic_pid/config.yml @@ -45,10 +45,8 @@ bench:bic_pid: - mkdir -p "$SNAKEMAKE_OUTPUT_CACHE" - ls -lrtha script: - - export PYTHONUSERBASE=$LOCAL_DATA_PATH/deps - - export PATH=$PYTHONUSERBASE/bin:$PATH - - python -m pip install --user snakemake - - python -m pip install --user -r benchmarks/bic_pid/requirements.txt + - python -m pip install snakemake + - python -m pip install -r benchmarks/bic_pid/requirements.txt - snakemake $SNAKEMAKE_FLAGS --cores 1 results/${DETECTOR_CONFIG}/bic_pid collect_results:bic_pid: From ac283e7fa72f88bc682fc1113404bfd65bc5fe4b Mon Sep 17 00:00:00 2001 From: Tomas Sosa Date: Thu, 16 Apr 2026 12:27:20 -0500 Subject: [PATCH 12/58] warmup problem --- benchmarks/bic_pid/Snakefile | 4 ++-- benchmarks/bic_pid/config.yml | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/benchmarks/bic_pid/Snakefile b/benchmarks/bic_pid/Snakefile index 2afa3496..5338caa1 100644 --- a/benchmarks/bic_pid/Snakefile +++ b/benchmarks/bic_pid/Snakefile @@ -12,7 +12,7 @@ def theta_max_from_phase_space(s): rule bic_pid_sim: input: - warmup="warmup.edm4hep.root", + warmup=ancient("warmup.edm4hep.root"), geometry_lib=find_epic_libraries(), output: "sim_output/bic_pid/{DETECTOR_CONFIG}/{PARTICLE}/{ENERGY}/{PHASE_SPACE}/{PARTICLE}_{ENERGY}_{PHASE_SPACE}.{INDEX}.edm4hep.root", @@ -58,7 +58,7 @@ npsim \ rule bic_pid_recon: input: sim="sim_output/bic_pid/{DETECTOR_CONFIG}/{PARTICLE}/{ENERGY}/{PHASE_SPACE}/{PARTICLE}_{ENERGY}_{PHASE_SPACE}.{INDEX}.edm4hep.root", - warmup="warmup.edm4hep.root", + warmup=ancient("warmup.edm4hep.root"), output: "sim_output/bic_pid/{DETECTOR_CONFIG}/{PARTICLE}/{ENERGY}/{PHASE_SPACE}/{PARTICLE}_{ENERGY}_{PHASE_SPACE}.{INDEX}.eicrecon.edm4eic.root", log: diff --git a/benchmarks/bic_pid/config.yml b/benchmarks/bic_pid/config.yml index e53aff61..ef7616f0 100644 --- a/benchmarks/bic_pid/config.yml +++ b/benchmarks/bic_pid/config.yml @@ -43,6 +43,7 @@ bench:bic_pid: - ln -s "${LOCAL_DATA_PATH}/sim_output" "${DETECTOR_CONFIG}/sim_output" - ln -s "../results" "${DETECTOR_CONFIG}/results" - mkdir -p "$SNAKEMAKE_OUTPUT_CACHE" + - touch warmup.edm4hep.root - ls -lrtha script: - python -m pip install snakemake From bdada3b9ed6548fc401043f87162c184ba45c9e0 Mon Sep 17 00:00:00 2001 From: Tomas Sosa Date: Thu, 16 Apr 2026 17:29:52 -0500 Subject: [PATCH 13/58] Go back to the changes --- benchmarks/bic_pid/Snakefile | 4 ++-- benchmarks/bic_pid/config.yml | 1 - 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/benchmarks/bic_pid/Snakefile b/benchmarks/bic_pid/Snakefile index 5338caa1..2afa3496 100644 --- a/benchmarks/bic_pid/Snakefile +++ b/benchmarks/bic_pid/Snakefile @@ -12,7 +12,7 @@ def theta_max_from_phase_space(s): rule bic_pid_sim: input: - warmup=ancient("warmup.edm4hep.root"), + warmup="warmup.edm4hep.root", geometry_lib=find_epic_libraries(), output: "sim_output/bic_pid/{DETECTOR_CONFIG}/{PARTICLE}/{ENERGY}/{PHASE_SPACE}/{PARTICLE}_{ENERGY}_{PHASE_SPACE}.{INDEX}.edm4hep.root", @@ -58,7 +58,7 @@ npsim \ rule bic_pid_recon: input: sim="sim_output/bic_pid/{DETECTOR_CONFIG}/{PARTICLE}/{ENERGY}/{PHASE_SPACE}/{PARTICLE}_{ENERGY}_{PHASE_SPACE}.{INDEX}.edm4hep.root", - warmup=ancient("warmup.edm4hep.root"), + warmup="warmup.edm4hep.root", output: "sim_output/bic_pid/{DETECTOR_CONFIG}/{PARTICLE}/{ENERGY}/{PHASE_SPACE}/{PARTICLE}_{ENERGY}_{PHASE_SPACE}.{INDEX}.eicrecon.edm4eic.root", log: diff --git a/benchmarks/bic_pid/config.yml b/benchmarks/bic_pid/config.yml index ef7616f0..e53aff61 100644 --- a/benchmarks/bic_pid/config.yml +++ b/benchmarks/bic_pid/config.yml @@ -43,7 +43,6 @@ bench:bic_pid: - ln -s "${LOCAL_DATA_PATH}/sim_output" "${DETECTOR_CONFIG}/sim_output" - ln -s "../results" "${DETECTOR_CONFIG}/results" - mkdir -p "$SNAKEMAKE_OUTPUT_CACHE" - - touch warmup.edm4hep.root - ls -lrtha script: - python -m pip install snakemake From c7d507ab8ff658c79188def1c3f075f44267b902 Mon Sep 17 00:00:00 2001 From: Tomas Sosa Date: Fri, 17 Apr 2026 00:36:47 -0500 Subject: [PATCH 14/58] Test warmup --- benchmarks/bic_pid/Snakefile | 4 ++-- benchmarks/bic_pid/config.yml | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/benchmarks/bic_pid/Snakefile b/benchmarks/bic_pid/Snakefile index 2afa3496..5338caa1 100644 --- a/benchmarks/bic_pid/Snakefile +++ b/benchmarks/bic_pid/Snakefile @@ -12,7 +12,7 @@ def theta_max_from_phase_space(s): rule bic_pid_sim: input: - warmup="warmup.edm4hep.root", + warmup=ancient("warmup.edm4hep.root"), geometry_lib=find_epic_libraries(), output: "sim_output/bic_pid/{DETECTOR_CONFIG}/{PARTICLE}/{ENERGY}/{PHASE_SPACE}/{PARTICLE}_{ENERGY}_{PHASE_SPACE}.{INDEX}.edm4hep.root", @@ -58,7 +58,7 @@ npsim \ rule bic_pid_recon: input: sim="sim_output/bic_pid/{DETECTOR_CONFIG}/{PARTICLE}/{ENERGY}/{PHASE_SPACE}/{PARTICLE}_{ENERGY}_{PHASE_SPACE}.{INDEX}.edm4hep.root", - warmup="warmup.edm4hep.root", + warmup=ancient("warmup.edm4hep.root"), output: "sim_output/bic_pid/{DETECTOR_CONFIG}/{PARTICLE}/{ENERGY}/{PHASE_SPACE}/{PARTICLE}_{ENERGY}_{PHASE_SPACE}.{INDEX}.eicrecon.edm4eic.root", log: diff --git a/benchmarks/bic_pid/config.yml b/benchmarks/bic_pid/config.yml index e53aff61..ef7616f0 100644 --- a/benchmarks/bic_pid/config.yml +++ b/benchmarks/bic_pid/config.yml @@ -43,6 +43,7 @@ bench:bic_pid: - ln -s "${LOCAL_DATA_PATH}/sim_output" "${DETECTOR_CONFIG}/sim_output" - ln -s "../results" "${DETECTOR_CONFIG}/results" - mkdir -p "$SNAKEMAKE_OUTPUT_CACHE" + - touch warmup.edm4hep.root - ls -lrtha script: - python -m pip install snakemake From 9dad473938b2745afa97b8a9712478854eecf346 Mon Sep 17 00:00:00 2001 From: Tomas Sosa Date: Fri, 17 Apr 2026 08:45:26 -0500 Subject: [PATCH 15/58] Solve new problem in bench:bic_pid --- benchmarks/bic_pid/Snakefile | 29 +++++++++++++++++++++-------- 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/benchmarks/bic_pid/Snakefile b/benchmarks/bic_pid/Snakefile index 5338caa1..1b327344 100644 --- a/benchmarks/bic_pid/Snakefile +++ b/benchmarks/bic_pid/Snakefile @@ -1,3 +1,6 @@ +from glob import glob +import os + def format_energy_for_dd4hep(s): return s.rstrip("kMGeV") + "*" + s.lstrip("0123456789") @@ -81,18 +84,28 @@ exec eicrecon {input.sim} -Ppodio:output_file={output} \ rule bic_pid_input_list: - input: - files=expand( - "sim_output/bic_pid/{{DETECTOR_CONFIG}}/{{PARTICLE}}/{ENERGY}/{PHASE_SPACE}/{{PARTICLE}}_{ENERGY}_{PHASE_SPACE}.{INDEX:04d}.eicrecon.edm4eic.root", - ENERGY=["1GeV"], - PHASE_SPACE=["45to135deg"], - INDEX=range(100), - ), output: "listing/bic_pid/{DETECTOR_CONFIG}/{PARTICLE}.lst", + params: + energy="1GeV", + phase_space="45to135deg", run: + pattern = ( + f"sim_output/bic_pid/{wildcards.DETECTOR_CONFIG}/{wildcards.PARTICLE}/" + f"{params.energy}/{params.phase_space}/" + f"{wildcards.PARTICLE}_{params.energy}_{params.phase_space}.*.eicrecon.edm4eic.root" + ) + files = sorted(glob(pattern)) + + if len(files) != 100: + raise ValueError( + f"Expected 100 files for {wildcards.PARTICLE}, found {len(files)}.\n" + f"Pattern used: {pattern}" + ) + + os.makedirs(os.path.dirname(output[0]), exist_ok=True) with open(output[0], "wt") as fp: - fp.write("\n".join(input.files)) + fp.write("\n".join(files)) rule bic_pid: From 632077b7ce24adcb6f634569bca9bbdfedf2dbde Mon Sep 17 00:00:00 2001 From: Tomas Sosa Date: Fri, 17 Apr 2026 09:38:14 -0500 Subject: [PATCH 16/58] changes in sim and bench --- benchmarks/bic_pid/config.yml | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/benchmarks/bic_pid/config.yml b/benchmarks/bic_pid/config.yml index ef7616f0..356cd12e 100644 --- a/benchmarks/bic_pid/config.yml +++ b/benchmarks/bic_pid/config.yml @@ -25,6 +25,14 @@ sim:bic_pid: - | snakemake $SNAKEMAKE_FLAGS --cores 1 \ $(seq --format="sim_output/bic_pid/${DETECTOR_CONFIG}/${PARTICLE}/${ENERGY}/${ANGLE}/${PARTICLE}_${ENERGY}_${ANGLE}.%04.f.eicrecon.edm4eic.root" ${INDEX_RANGE}) + - mkdir -p transferred_sim_output + - cp -aL sim_output/bic_pid transferred_sim_output/ + artifacts: + when: always + paths: + - transferred_sim_output/ + - .snakemake/log/ + bench:bic_pid: extends: .det_benchmark @@ -38,17 +46,19 @@ bench:bic_pid: before_script: - source .local/bin/env.sh - ls -lrtha - - ln -s "${LOCAL_DATA_PATH}/sim_output" sim_output + - mkdir -p sim_output + - cp -a transferred_sim_output/bic_pid sim_output/ - mkdir -p "${DETECTOR_CONFIG}" - - ln -s "${LOCAL_DATA_PATH}/sim_output" "${DETECTOR_CONFIG}/sim_output" - - ln -s "../results" "${DETECTOR_CONFIG}/results" + - ln -s ../results "${DETECTOR_CONFIG}/results" - mkdir -p "$SNAKEMAKE_OUTPUT_CACHE" - touch warmup.edm4hep.root + - find sim_output/bic_pid | head -50 - ls -lrtha script: - python -m pip install snakemake - python -m pip install -r benchmarks/bic_pid/requirements.txt - snakemake $SNAKEMAKE_FLAGS --cores 1 results/${DETECTOR_CONFIG}/bic_pid + collect_results:bic_pid: extends: .det_benchmark From 0c70b7ed2fed5f69ab72b244c006709c89a2594c Mon Sep 17 00:00:00 2001 From: Tomas Sosa Date: Fri, 17 Apr 2026 11:43:50 -0500 Subject: [PATCH 17/58] Remove warmup and transfered things --- benchmarks/bic_pid/Snakefile | 4 ++-- benchmarks/bic_pid/config.yml | 16 ++++------------ 2 files changed, 6 insertions(+), 14 deletions(-) diff --git a/benchmarks/bic_pid/Snakefile b/benchmarks/bic_pid/Snakefile index 1b327344..379667a2 100644 --- a/benchmarks/bic_pid/Snakefile +++ b/benchmarks/bic_pid/Snakefile @@ -15,7 +15,7 @@ def theta_max_from_phase_space(s): rule bic_pid_sim: input: - warmup=ancient("warmup.edm4hep.root"), + warmup="warmup.edm4hep.root", geometry_lib=find_epic_libraries(), output: "sim_output/bic_pid/{DETECTOR_CONFIG}/{PARTICLE}/{ENERGY}/{PHASE_SPACE}/{PARTICLE}_{ENERGY}_{PHASE_SPACE}.{INDEX}.edm4hep.root", @@ -61,7 +61,7 @@ npsim \ rule bic_pid_recon: input: sim="sim_output/bic_pid/{DETECTOR_CONFIG}/{PARTICLE}/{ENERGY}/{PHASE_SPACE}/{PARTICLE}_{ENERGY}_{PHASE_SPACE}.{INDEX}.edm4hep.root", - warmup=ancient("warmup.edm4hep.root"), + warmup="warmup.edm4hep.root", output: "sim_output/bic_pid/{DETECTOR_CONFIG}/{PARTICLE}/{ENERGY}/{PHASE_SPACE}/{PARTICLE}_{ENERGY}_{PHASE_SPACE}.{INDEX}.eicrecon.edm4eic.root", log: diff --git a/benchmarks/bic_pid/config.yml b/benchmarks/bic_pid/config.yml index 356cd12e..e834e2bf 100644 --- a/benchmarks/bic_pid/config.yml +++ b/benchmarks/bic_pid/config.yml @@ -25,13 +25,6 @@ sim:bic_pid: - | snakemake $SNAKEMAKE_FLAGS --cores 1 \ $(seq --format="sim_output/bic_pid/${DETECTOR_CONFIG}/${PARTICLE}/${ENERGY}/${ANGLE}/${PARTICLE}_${ENERGY}_${ANGLE}.%04.f.eicrecon.edm4eic.root" ${INDEX_RANGE}) - - mkdir -p transferred_sim_output - - cp -aL sim_output/bic_pid transferred_sim_output/ - artifacts: - when: always - paths: - - transferred_sim_output/ - - .snakemake/log/ bench:bic_pid: @@ -46,13 +39,12 @@ bench:bic_pid: before_script: - source .local/bin/env.sh - ls -lrtha - - mkdir -p sim_output - - cp -a transferred_sim_output/bic_pid sim_output/ + - ln -s "${LOCAL_DATA_PATH}/sim_output" sim_output - mkdir -p "${DETECTOR_CONFIG}" - - ln -s ../results "${DETECTOR_CONFIG}/results" + - ln -s "${LOCAL_DATA_PATH}/sim_output" "${DETECTOR_CONFIG}/sim_output" + - ln -s "../results" "${DETECTOR_CONFIG}/results" - mkdir -p "$SNAKEMAKE_OUTPUT_CACHE" - - touch warmup.edm4hep.root - - find sim_output/bic_pid | head -50 + - find sim_output/bic_pid/${DETECTOR_CONFIG} | head -50 || true - ls -lrtha script: - python -m pip install snakemake From 7cd8385daf2b43505ed0faeb10c1884fd4675cde Mon Sep 17 00:00:00 2001 From: Tomas Sosa Date: Sun, 19 Apr 2026 16:45:26 -0500 Subject: [PATCH 18/58] Check again the timeout --- benchmarks/bic_pid/config.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmarks/bic_pid/config.yml b/benchmarks/bic_pid/config.yml index e834e2bf..88767036 100644 --- a/benchmarks/bic_pid/config.yml +++ b/benchmarks/bic_pid/config.yml @@ -50,7 +50,7 @@ bench:bic_pid: - python -m pip install snakemake - python -m pip install -r benchmarks/bic_pid/requirements.txt - snakemake $SNAKEMAKE_FLAGS --cores 1 results/${DETECTOR_CONFIG}/bic_pid - + collect_results:bic_pid: extends: .det_benchmark From 1eef1f957e1284d58662715b9698c87f607b8aeb Mon Sep 17 00:00:00 2001 From: Tomas Sosa Date: Sun, 19 Apr 2026 18:30:15 -0500 Subject: [PATCH 19/58] Solve situation with files --- benchmarks/bic_pid/config.yml | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/benchmarks/bic_pid/config.yml b/benchmarks/bic_pid/config.yml index 88767036..267f1eb2 100644 --- a/benchmarks/bic_pid/config.yml +++ b/benchmarks/bic_pid/config.yml @@ -25,13 +25,21 @@ sim:bic_pid: - | snakemake $SNAKEMAKE_FLAGS --cores 1 \ $(seq --format="sim_output/bic_pid/${DETECTOR_CONFIG}/${PARTICLE}/${ENERGY}/${ANGLE}/${PARTICLE}_${ENERGY}_${ANGLE}.%04.f.eicrecon.edm4eic.root" ${INDEX_RANGE}) + - mkdir -p transferred_sim_output + - cp -aL sim_output/bic_pid transferred_sim_output/ + artifacts: + when: always + paths: + - transferred_sim_output/ + - .snakemake/log/ bench:bic_pid: extends: .det_benchmark stage: benchmarks needs: - - ["sim:bic_pid"] + - "common:setup" + - "sim:bic_pid" image: $BENCHMARKS_REGISTRY/eic_tf$BENCHMARKS_SIGIL$BENCHMARKS_TAG variables: CUDA_VISIBLE_DEVICES: "" @@ -39,10 +47,11 @@ bench:bic_pid: before_script: - source .local/bin/env.sh - ls -lrtha - - ln -s "${LOCAL_DATA_PATH}/sim_output" sim_output + - mkdir -p sim_output + - cp -a transferred_sim_output/bic_pid sim_output/ - mkdir -p "${DETECTOR_CONFIG}" - - ln -s "${LOCAL_DATA_PATH}/sim_output" "${DETECTOR_CONFIG}/sim_output" - - ln -s "../results" "${DETECTOR_CONFIG}/results" + - ln -s ../sim_output "${DETECTOR_CONFIG}/sim_output" + - ln -s ../results "${DETECTOR_CONFIG}/results" - mkdir -p "$SNAKEMAKE_OUTPUT_CACHE" - find sim_output/bic_pid/${DETECTOR_CONFIG} | head -50 || true - ls -lrtha @@ -56,6 +65,7 @@ collect_results:bic_pid: extends: .det_benchmark stage: collect needs: + - "common:setup" - "bench:bic_pid" when: always image: $BENCHMARKS_REGISTRY/eic_ci$BENCHMARKS_SIGIL$BENCHMARKS_TAG From 79c70de21e2b48be2432e9b4114dcb58f52bdf22 Mon Sep 17 00:00:00 2001 From: Tomas Sosa Date: Sun, 19 Apr 2026 20:01:13 -0500 Subject: [PATCH 20/58] Solve new error --- benchmarks/bic_pid/config.yml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/benchmarks/bic_pid/config.yml b/benchmarks/bic_pid/config.yml index 267f1eb2..a0e44016 100644 --- a/benchmarks/bic_pid/config.yml +++ b/benchmarks/bic_pid/config.yml @@ -22,6 +22,7 @@ sim:bic_pid: "90 99", ] script: + - export DETECTOR_CONFIG=epic_craterlake - | snakemake $SNAKEMAKE_FLAGS --cores 1 \ $(seq --format="sim_output/bic_pid/${DETECTOR_CONFIG}/${PARTICLE}/${ENERGY}/${ANGLE}/${PARTICLE}_${ENERGY}_${ANGLE}.%04.f.eicrecon.edm4eic.root" ${INDEX_RANGE}) @@ -47,13 +48,14 @@ bench:bic_pid: before_script: - source .local/bin/env.sh - ls -lrtha + - find transferred_sim_output/bic_pid | head -50 || true - mkdir -p sim_output - cp -a transferred_sim_output/bic_pid sim_output/ - mkdir -p "${DETECTOR_CONFIG}" - ln -s ../sim_output "${DETECTOR_CONFIG}/sim_output" - ln -s ../results "${DETECTOR_CONFIG}/results" - mkdir -p "$SNAKEMAKE_OUTPUT_CACHE" - - find sim_output/bic_pid/${DETECTOR_CONFIG} | head -50 || true + - find sim_output/bic_pid | head -50 || true - ls -lrtha script: - python -m pip install snakemake @@ -72,6 +74,7 @@ collect_results:bic_pid: variables: DETECTOR_CONFIG: epic_craterlake script: + - export DETECTOR_CONFIG=epic_craterlake - ls -lrht - mv results{,_save}/ - snakemake $SNAKEMAKE_FLAGS --cores 1 --delete-all-output results/${DETECTOR_CONFIG}/bic_pid From d8fb95e565e3955e8bcfe9a6542db0306f60fe4b Mon Sep 17 00:00:00 2001 From: Tomas Sosa Date: Sun, 19 Apr 2026 22:38:09 -0500 Subject: [PATCH 21/58] Change errors in bic_pid.org file --- benchmarks/bic_pid/bic_pid.org | 942 ++++++++++++++++++--------------- 1 file changed, 519 insertions(+), 423 deletions(-) diff --git a/benchmarks/bic_pid/bic_pid.org b/benchmarks/bic_pid/bic_pid.org index dd975c49..35412310 100644 --- a/benchmarks/bic_pid/bic_pid.org +++ b/benchmarks/bic_pid/bic_pid.org @@ -1,490 +1,586 @@ -#+begin_src jupyter-python - import os - import math - from math import floor - - import pandas as pd - import numpy as np - - ## dangerous: silence annoying TF warnings , remove when running on new systems or debugging - os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2" # this MUST come before any tf call. - import tensorflow as tf - from tensorflow import keras - from tensorflow.keras import layers - - import matplotlib.pyplot as plt - from collections import OrderedDict - import json - import re +#+PROPERTY: header-args:jupyter-python :session /jpy:localhost#8888:benchmark :async yes :results drawer :exports both + +#+TITLE: ePIC BIC e/\pi separation benchmark +#+AUTHOR: Tomas Sosa +#+OPTIONS: d:t + +#+LATEX_CLASS_OPTIONS: [9pt,letter] +#+BIND: org-latex-image-default-width "" +#+BIND: org-latex-image-default-option "scale=0.3" +#+BIND: org-latex-images-centered nil +#+BIND: org-latex-minted-options (("breaklines") ("bgcolor" "black!5") ("frame" "single")) +#+LATEX_HEADER: \usepackage[margin=1in]{geometry} +#+LATEX_HEADER: \setlength{\parindent}{0pt} +#+LATEX: \sloppy + +#+begin_src jupyter-python :results silent +import os +import math +from math import floor +from pathlib import Path +from collections import OrderedDict +import json +import re + +import pandas as pd +import numpy as np + +# Must be set before importing TensorFlow +os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2" + +import tensorflow as tf +from tensorflow import keras +from tensorflow.keras import layers + +import matplotlib.pyplot as plt #+end_src -#+begin_src jupyter-python - print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU'))) +* Parameters + +#+begin_src jupyter-python :results silent +DETECTOR_CONFIG = os.environ.get("DETECTOR_CONFIG") +PLOT_TITLE = os.environ.get("PLOT_TITLE", DETECTOR_CONFIG or "bic_pid") +INPUT_ELECTRONS = os.environ.get("INPUT_ELECTRONS") +INPUT_PIONS = os.environ.get("INPUT_PIONS") +OUTPUT_DIR = os.environ.get("OUTPUT_DIR", "./") + +ANGLE = os.environ.get("ANGLE", "45to135deg") +ENERGY = os.environ.get("ENERGY", "1GeV") +EPOCHS = int(os.environ.get("EPOCHS", "30")) +TARGET_IMBALANCE = float(os.environ.get("TARGET_IMBALANCE", "1.0")) +MODEL_NAME = os.environ.get("MODEL", "vgg-v2") +TRAIN_SAMPLE_CAP = int(os.environ.get("CAP_TRAIN_SAMPLE", "0")) + +output_root = Path(OUTPUT_DIR) +output_root.mkdir(parents=True, exist_ok=True) #+end_src #+begin_src jupyter-python - # Simulate argparse in Kaggle - class Args: - def __init__(self): - self.angle = ['45to135deg'] # ✅ choose your angles - self.energy = '1GeV' - self.cap_train_sample = 0 - self.epochs = 30 - self.target_imbalance = 1.0 - self.model = 'vgg-v2' - self.workdir = '/kaggle/working/output' # 🔧 all outputs go here - - args = Args() +print("Num GPUs Available:", len(tf.config.list_physical_devices("GPU"))) +print("DETECTOR_CONFIG =", DETECTOR_CONFIG) +print("PLOT_TITLE =", PLOT_TITLE) +print("INPUT_ELECTRONS =", INPUT_ELECTRONS) +print("INPUT_PIONS =", INPUT_PIONS) +print("OUTPUT_DIR =", OUTPUT_DIR) +print("ANGLE =", ANGLE) +print("ENERGY =", ENERGY) +print("EPOCHS =", EPOCHS) +print("TARGET_IMBALANCE=", TARGET_IMBALANCE) +print("MODEL_NAME =", MODEL_NAME) +print("TRAIN_SAMPLE_CAP=", TRAIN_SAMPLE_CAP) #+end_src -#+begin_src jupyter-python - ## Global efficiencies we want to optimize for (electron efficiencies) - kTargetEfficiency = .95 - ## other efficiency scenarios to cut the ML on - kAlternativeEfficiencies = np.arange(.5, 1., .05) - - ## setting - #angle_settings=['eta0.0', 'eta0.5n', 'eta0.5p', 'eta1.0n', 'eta1.0p'] - #angle_settings=['eta0.0', 'eta1.0p'] - #angle_settings=['eta0.0'] - angle_settings=args.angle - energy_setting= args.energy - ## MeV or GeV - energy_GeV = float(energy_setting[:-3]) * (1 if energy_setting[-3:] == 'GeV' else 1/1000.) - def eta_from_angle(angle_label): - match = re.match(r"(\d+)to(\d+)deg", angle_label) - if match: - theta1 = float(match.group(1)) - theta2 = float(match.group(2)) - mean_theta_deg = (theta1 + theta2) / 2.0 - mean_theta_rad = np.deg2rad(mean_theta_deg) - eta = -np.log(np.tan(mean_theta_rad / 2)) - return eta - else: - raise ValueError(f"Cannot parse eta from angle label: {angle_label}") - - etas = {} - for setting in angle_settings: - if setting.startswith("eta"): - val = float(setting[3:-1]) - sign = -1. if setting[-1] == 'n' else 1. - etas[setting] = val * sign - elif "deg" in setting: - etas[setting] = eta_from_angle(setting) - else: - etas[setting] = 0.0 - - print(f'E/p scan for {energy_setting}') - print(f' - detected energy: {energy_GeV} GeV') - print(f' - eta ranges: {angle_settings}') +* Plotting setup + +#+begin_src jupyter-python :results silent +import matplotlib as mpl + +def setup_presentation_style(): + mpl.rcParams.update(mpl.rcParamsDefault) + plt.style.use("ggplot") + plt.rcParams.update({ + "axes.labelsize": 12, + "axes.titlesize": 13, + "figure.titlesize": 13, + "figure.figsize": (8, 6), + "legend.fontsize": 11, + "xtick.labelsize": 11, + "ytick.labelsize": 11, + "pgf.rcfonts": False, + }) + +setup_presentation_style() #+end_src -#+begin_src jupyter-python - ## set ML configuration - kTrainSampleCap = args.cap_train_sample - kEpochs = args.epochs - kTestSize = .2 - kValidateSize = .1 - kTargetImbalance = args.target_imbalance - kPionWeightCap = 1.00 - kElectronLabel = 1 - kPionLabel = 0 - kModel = args.model - - print('ML configuration:') - print(f' - Number of epochs: {kEpochs}') - if kTrainSampleCap > 0: - print(f' - Training sample cap: {kTrainSampleCap}') - print(f' - Validation fraction: {kValidateSize}') - print(f' - Test fraction: {kTestSize}') - print(f' - Target pi:E imbalance: {kTargetImbalance}') - print(f' - Upper cap on pion weights: {kPionWeightCap}') - print(f' - Model: {kModel}') +* Analysis setup + +#+begin_src jupyter-python :results silent +kTargetEfficiency = 0.95 +kAlternativeEfficiencies = np.arange(0.5, 1.0, 0.05) + +angle_settings = [ANGLE] +energy_setting = ENERGY +energy_GeV = float(energy_setting[:-3]) * (1.0 if energy_setting.endswith("GeV") else 1.0 / 1000.0) + +def eta_from_angle(angle_label): + match = re.match(r"(\d+)to(\d+)deg", angle_label) + if match: + theta1 = float(match.group(1)) + theta2 = float(match.group(2)) + mean_theta_deg = (theta1 + theta2) / 2.0 + mean_theta_rad = np.deg2rad(mean_theta_deg) + eta = -np.log(np.tan(mean_theta_rad / 2.0)) + return eta + raise ValueError(f"Cannot parse eta from angle label: {angle_label}") + +etas = {} +for setting in angle_settings: + if setting.startswith("eta"): + val = float(setting[3:-1]) + sign = -1.0 if setting[-1] == "n" else 1.0 + etas[setting] = val * sign + elif "deg" in setting: + etas[setting] = eta_from_angle(setting) + else: + etas[setting] = 0.0 #+end_src #+begin_src jupyter-python - def get_dimensions(df): - max_idx = df.index.max() - min_idx = df.index.min() - max_idx = np.array([v if type(v) != str else 0 for v in max_idx]) - min_idx = np.array([v if type(v) != str else 0 for v in min_idx]) - return {k: v for (k, v) in zip(('event', '_', 'layer', 'hit'), (max_idx - min_idx + 1))} - - ## boiler-plate for in-memory datasets - def make_dataset(fields): - dataset = tf.data.Dataset.from_tensor_slices(fields) - ## do magic to avoid shard warnings of operating on DATA instead of FILE - options = tf.data.Options() - options.experimental_distribute.auto_shard_policy = tf.data.experimental.AutoShardPolicy.DATA - return dataset.with_options(options) +print(f"E/p scan for {energy_setting}") +print(f" - detected energy: {energy_GeV} GeV") +print(f" - eta ranges: {angle_settings}") #+end_src -#+begin_src jupyter-python - ## Chaos CNN model - def build_old(input_shape, n_labels=2): - my_model = keras.Sequential([ - keras.layers.Conv2D(64, (3, 3), padding='same', activation='relu', input_shape=input_shape), - keras.layers.MaxPooling2D((2, 2), strides=2), - keras.layers.Dropout(0.25), - keras.layers.Conv2D(128, (2, 2), padding='same', activation='relu'), - keras.layers.MaxPooling2D((2, 2), strides=2), - keras.layers.Conv2D(64, (2, 2), padding='same', activation='relu'), - keras.layers.MaxPooling2D((2, 2), strides=2), - keras.layers.Dropout(0.25), - - keras.layers.Flatten(), - keras.layers.Dense(128, activation='relu'), - #keras.layers.Dropout(0.25), - keras.layers.Dense(32, activation='relu'), - keras.layers.Dense(n_labels, activation='softmax') - ]) - return my_model - - ## Slightly beefier VGG-style CNN - def build_vgg_v1(input_shape, n_labels=2): - my_model = keras.Sequential([ - keras.layers.Conv2D(64, kernel_size=(3, 3), activation='relu',padding='same',input_shape=input_shape), - keras.layers.Conv2D(64, kernel_size=(3, 3), activation='relu',padding='same'), - keras.layers.MaxPooling2D(pool_size=(2, 2),strides=2), - keras.layers.Conv2D(128, kernel_size=(3, 3), activation='relu'), - keras.layers.Conv2D(128, kernel_size=(3, 3), activation='relu'), - keras.layers.Conv2D(128, kernel_size=(3, 3), activation='relu'), - keras.layers.MaxPooling2D(pool_size=(2, 2),strides=2), - keras.layers.Flatten(), - keras.layers.Dense(1024, activation='relu'), - keras.layers.Dense(512, activation='relu'), - keras.layers.Dense(n_labels, activation='softmax') - ]) - - return my_model - - def build_vgg_v2(input_shape, n_labels=2): - my_model = keras.Sequential([ - keras.layers.Conv2D(64, kernel_size=(3, 3), activation='relu',padding='same',input_shape=input_shape), - keras.layers.Conv2D(64, kernel_size=(3, 3), activation='relu',padding='same'), - keras.layers.MaxPooling2D(pool_size=(2, 2),strides=2), - keras.layers.Conv2D(128, kernel_size=(3, 3), activation='relu'), - keras.layers.Conv2D(128, kernel_size=(3, 3), activation='relu'), - keras.layers.Conv2D(128, kernel_size=(3, 3), activation='relu'), - keras.layers.MaxPooling2D(pool_size=(2, 2),strides=2), - keras.layers.Flatten(), - keras.layers.Dense(1024, activation='relu'), - keras.layers.Dense(1024, activation='relu'), - keras.layers.Dense(n_labels, activation='softmax') - ]) - - return my_model +#+begin_src jupyter-python :results silent +kTrainSampleCap = TRAIN_SAMPLE_CAP +kEpochs = EPOCHS +kTestSize = 0.2 +kValidateSize = 0.1 +kTargetImbalance = TARGET_IMBALANCE +kPionWeightCap = 1.0 +kElectronLabel = 1 +kPionLabel = 0 +kModel = MODEL_NAME #+end_src #+begin_src jupyter-python - def build_model(input_shape, n_labels=2): - if kModel == 'old': - print(f'Building old') - return build_old(input_shape, n_labels) - elif kModel == 'vgg-v1': - print(f'Building vgg-v1') - return build_vgg_v1(input_shape, n_labels) - elif kModel == 'vgg-v2': - print(f'Building vgg-v2') - return build_vgg_v2(input_shape, n_labels) - print('Building default') - return build_vgg_v2(input_shape, n_labels) +print("ML configuration:") +print(f" - Number of epochs: {kEpochs}") +if kTrainSampleCap > 0: + print(f" - Training sample cap: {kTrainSampleCap}") +print(f" - Validation fraction: {kValidateSize}") +print(f" - Test fraction: {kTestSize}") +print(f" - Target pi:E imbalance: {kTargetImbalance}") +print(f" - Upper cap on pion weights: {kPionWeightCap:.2f}") +print(f" - Model: {kModel}") #+end_src -#+begin_src jupyter-python - angle_label=angle_settings[0] - print(angle_label) +* Helper functions + +#+begin_src jupyter-python :results silent +def get_dimensions(df): + max_idx = df.index.max() + min_idx = df.index.min() + max_idx = np.array([v if type(v) != str else 0 for v in max_idx]) + min_idx = np.array([v if type(v) != str else 0 for v in min_idx]) + return {k: v for (k, v) in zip(("event", "_", "layer", "hit"), (max_idx - min_idx + 1))} + +def make_dataset(fields): + dataset = tf.data.Dataset.from_tensor_slices(fields) + options = tf.data.Options() + options.experimental_distribute.auto_shard_policy = tf.data.experimental.AutoShardPolicy.DATA + return dataset.with_options(options) #+end_src -#+begin_src jupyter-python - #datadir = f'/kaggle/input/results-45to135deg-1gev-data' - #plotdir = f'/kaggle/working/plots/{angle_label}' - #output_directory = f'/kaggle/working/output/{angle_label}/{energy_setting}' +* Models + +#+begin_src jupyter-python :results silent +def build_old(input_shape, n_labels=2): + my_model = keras.Sequential([ + keras.layers.Conv2D(64, (3, 3), padding="same", activation="relu", input_shape=input_shape), + keras.layers.MaxPooling2D((2, 2), strides=2), + keras.layers.Dropout(0.25), + keras.layers.Conv2D(128, (2, 2), padding="same", activation="relu"), + keras.layers.MaxPooling2D((2, 2), strides=2), + keras.layers.Conv2D(64, (2, 2), padding="same", activation="relu"), + keras.layers.MaxPooling2D((2, 2), strides=2), + keras.layers.Dropout(0.25), + keras.layers.Flatten(), + keras.layers.Dense(128, activation="relu"), + keras.layers.Dense(32, activation="relu"), + keras.layers.Dense(n_labels, activation="softmax"), + ]) + return my_model + +def build_vgg_v1(input_shape, n_labels=2): + my_model = keras.Sequential([ + keras.layers.Conv2D(64, kernel_size=(3, 3), activation="relu", padding="same", input_shape=input_shape), + keras.layers.Conv2D(64, kernel_size=(3, 3), activation="relu", padding="same"), + keras.layers.MaxPooling2D(pool_size=(2, 2), strides=2), + keras.layers.Conv2D(128, kernel_size=(3, 3), activation="relu"), + keras.layers.Conv2D(128, kernel_size=(3, 3), activation="relu"), + keras.layers.Conv2D(128, kernel_size=(3, 3), activation="relu"), + keras.layers.MaxPooling2D(pool_size=(2, 2), strides=2), + keras.layers.Flatten(), + keras.layers.Dense(1024, activation="relu"), + keras.layers.Dense(512, activation="relu"), + keras.layers.Dense(n_labels, activation="softmax"), + ]) + return my_model + +def build_vgg_v2(input_shape, n_labels=2): + my_model = keras.Sequential([ + keras.layers.Conv2D(64, kernel_size=(3, 3), activation="relu", padding="same", input_shape=input_shape), + keras.layers.Conv2D(64, kernel_size=(3, 3), activation="relu", padding="same"), + keras.layers.MaxPooling2D(pool_size=(2, 2), strides=2), + keras.layers.Conv2D(128, kernel_size=(3, 3), activation="relu"), + keras.layers.Conv2D(128, kernel_size=(3, 3), activation="relu"), + keras.layers.Conv2D(128, kernel_size=(3, 3), activation="relu"), + keras.layers.MaxPooling2D(pool_size=(2, 2), strides=2), + keras.layers.Flatten(), + keras.layers.Dense(1024, activation="relu"), + keras.layers.Dense(1024, activation="relu"), + keras.layers.Dense(n_labels, activation="softmax"), + ]) + return my_model + +def build_model(input_shape, n_labels=2): + if kModel == "old": + print("Building old") + return build_old(input_shape, n_labels) + elif kModel == "vgg-v1": + print("Building vgg-v1") + return build_vgg_v1(input_shape, n_labels) + elif kModel == "vgg-v2": + print("Building vgg-v2") + return build_vgg_v2(input_shape, n_labels) + print("Building default") + return build_vgg_v2(input_shape, n_labels) #+end_src -#+begin_src jupyter-python - print('\nprocessing angle setting:', angle_label) - print(f' - eta: {etas[angle_label]}') - - ## output directories - output_directory = f'{args.workdir}/{angle_label}/{energy_setting}' - plotdir = f'{output_directory}/plots' - datadir = f'{output_directory}/data' - os.makedirs(plotdir, exist_ok=True) - os.makedirs(datadir, exist_ok=True) - print(f' - output data directory: {datadir}') - print(f' - output plot directory: {plotdir}') +* Output layout + +#+begin_src jupyter-python :results silent +angle_label = angle_settings[0] + +output_directory = f"{OUTPUT_DIR}/{angle_label}/{energy_setting}" +plotdir = f"{output_directory}/plots" +datadir = f"{output_directory}/data" + +os.makedirs(plotdir, exist_ok=True) +os.makedirs(datadir, exist_ok=True) #+end_src #+begin_src jupyter-python - print('Loading datasets: ') - print(f' - Loading {datadir}/hits.snappy.parquet') - df_data = pd.read_parquet(f'{datadir}/hits.snappy.parquet') - print(f' - Loading {datadir}/labels.snappy.parquet') - df_mc = pd.read_parquet(f'{datadir}/labels.snappy.parquet') +print("\nprocessing angle setting:", angle_label) +print(f" - eta: {etas[angle_label]}") +print(f" - output data directory: {datadir}") +print(f" - output plot directory: {plotdir}") #+end_src +* Load datasets + #+begin_src jupyter-python - ## calculate weight to achieve target imbalance - n_electrons = np.sum(df_mc['PDG'] == 11) - n_pions = np.sum(df_mc['PDG'] == -211) - imbalance = n_pions/n_electrons - kSuggestedWeight = min(n_electrons/n_pions*kTargetImbalance, kPionWeightCap) - print(f'Data set has relative class imbalance of {n_electrons} : {n_pions} = {imbalance}') - print(f' - target imbalance: {kTargetImbalance}') - print(f' - pion weight upper limit: {kPionWeightCap:.2f}') - print(f' - suggested pion weight {kSuggestedWeight:.2f}') +print("Loading datasets:") +print(f" - Loading {datadir}/hits.snappy.parquet") +df_data = pd.read_parquet(f"{datadir}/hits.snappy.parquet") + +print(f" - Loading {datadir}/labels.snappy.parquet") +df_mc = pd.read_parquet(f"{datadir}/labels.snappy.parquet") #+end_src #+begin_src jupyter-python - ## Load E/P data again for aggregate statistics, and to calculate the target efficiency - print(f'Loading E/P data from {datadir}/EoverP_results.csv') - cutdf = pd.read_csv(f'{datadir}/EoverP_results.csv').sort_values('rejection', ascending=False) - results_EoverP = {key: cutdf[key][0] for key in cutdf.keys()} - results_EoverP['max_layer'] = int(results_EoverP['max_layer']) ## get rid of the int64 which causes trouble with json - kTargetEfficiencyML = kTargetEfficiency / results_EoverP['efficiency'] - print(results_EoverP) - print(f'Deduced target efficiency for ML: {kTargetEfficiencyML:.3f}') +n_electrons = np.sum(df_mc["PDG"] == 11) +n_pions = np.sum(df_mc["PDG"] == -211) +imbalance = n_pions / n_electrons +kSuggestedWeight = min(n_electrons / n_pions * kTargetImbalance, kPionWeightCap) + +print(f"Data set has relative class imbalance of {n_electrons} : {n_pions} = {imbalance}") +print(f" - target imbalance: {kTargetImbalance}") +print(f" - pion weight upper limit: {kPionWeightCap:.2f}") +print(f" - suggested pion weight {kSuggestedWeight:.2f}") #+end_src #+begin_src jupyter-python - print('Formatting data objects') - dim = get_dimensions(df_data) - xdata_both = df_data.values.reshape(dim['event'], - dim['layer'], - dim['hit'], - len(df_data.columns)).astype(np.float32) - - ldata = df_mc['PDG'].map(lambda pdg: kElectronLabel if (pdg == 11) else kPionLabel).values - wdata = df_mc['PDG'].map(lambda pdg: 1 if (pdg == 11) else kSuggestedWeight).values +print(f"Loading E/P data from {datadir}/EoverP_results.csv") +cutdf = pd.read_csv(f"{datadir}/EoverP_results.csv").sort_values("rejection", ascending=False) +results_EoverP = {key: cutdf[key].iloc[0] for key in cutdf.columns} +results_EoverP["max_layer"] = int(results_EoverP["max_layer"]) +kTargetEfficiencyML = kTargetEfficiency / results_EoverP["efficiency"] + +print(results_EoverP) +print(f"Deduced target efficiency for ML: {kTargetEfficiencyML:.3f}") #+end_src +* Format tensors + #+begin_src jupyter-python - print('Shuffling data and separating samples') - ## shuffle data - index = np.arange(len(ldata)) - np.random.shuffle(index) - tot_len = len(index) - - n_valid = floor(tot_len * kValidateSize) - n_test = floor(tot_len * kTestSize) - n_train = tot_len - n_valid - n_test - if kTrainSampleCap > 0 and n_train > kTrainSampleCap: - print(f'Capping training sample size to {kTrainSampleCap}') - valid_over_train = n_valid / n_train - test_over_train = n_test / n_train - n_train = kTrainSampleCap - n_valid = floor(valid_over_train * n_train) - n_test = floor(test_over_train * n_train) - tot_len = n_train + n_valid + n_test - print(f'Sample sizes: {{n_train: {n_train}, n_valid: {n_valid}, n_test: {n_test}}}') +print("Formatting data objects") +dim = get_dimensions(df_data) + +xdata_both = df_data.values.reshape( + dim["event"], + dim["layer"], + dim["hit"], + len(df_data.columns) +).astype(np.float32) + +ldata = df_mc["PDG"].map(lambda pdg: kElectronLabel if pdg == 11 else kPionLabel).values +wdata = df_mc["PDG"].map(lambda pdg: 1 if pdg == 11 else kSuggestedWeight).values #+end_src #+begin_src jupyter-python - id_valid = index[:n_valid] - id_test = index[n_valid:n_valid + n_test] - id_train = index[n_valid + n_test:tot_len] - xtrain, xvalid, xtest = xdata_both[id_train], xdata_both[id_valid], xdata_both[id_test] - ltrain, lvalid, ltest = ldata[id_train], ldata[id_valid], ldata[id_test] - wtrain, wvalid = wdata[id_train], wdata[id_valid] +print("Shuffling data and separating samples") +index = np.arange(len(ldata)) +np.random.shuffle(index) +tot_len = len(index) + +n_valid = floor(tot_len * kValidateSize) +n_test = floor(tot_len * kTestSize) +n_train = tot_len - n_valid - n_test + +if kTrainSampleCap > 0 and n_train > kTrainSampleCap: + print(f"Capping training sample size to {kTrainSampleCap}") + valid_over_train = n_valid / n_train + test_over_train = n_test / n_train + n_train = kTrainSampleCap + n_valid = floor(valid_over_train * n_train) + n_test = floor(test_over_train * n_train) + tot_len = n_train + n_valid + n_test + +print(f"Sample sizes: {{n_train: {n_train}, n_valid: {n_valid}, n_test: {n_test}}}") +#+end_src + +#+begin_src jupyter-python :results silent +id_valid = index[:n_valid] +id_test = index[n_valid:n_valid + n_test] +id_train = index[n_valid + n_test:tot_len] + +xtrain, xvalid, xtest = xdata_both[id_train], xdata_both[id_valid], xdata_both[id_test] +ltrain, lvalid, ltest = ldata[id_train], ldata[id_valid], ldata[id_test] +wtrain, wvalid = wdata[id_train], wdata[id_valid] #+end_src +* Training + #+begin_src jupyter-python - print('Start training, using GPU resources') - gpu = tf.config.list_logical_devices('GPU') - strategy = tf.distribute.MirroredStrategy(gpu) if len(gpu) == 1 else tf.distribute.MirroredStrategy([gpu[0]]) - history = None - with strategy.scope(): - train_dataset = make_dataset((xtrain, ltrain, wtrain)) - valid_dataset = make_dataset((xvalid, lvalid, wvalid)) - - ## avoid warning that we are operating on DATA instead of FILE - options = tf.data.Options() - options.experimental_distribute.auto_shard_policy = tf.data.experimental.AutoShardPolicy.DATA - train_dataset = train_dataset.with_options(options) - valid_dataset = valid_dataset.with_options(options) - - model = build_model(input_shape=xtrain.shape[1:]) - model.compile(optimizer=keras.optimizers.Adam(learning_rate=1e-3), - loss=keras.losses.SparseCategoricalCrossentropy(from_logits=False), - weighted_metrics=['accuracy']) - history = model.fit(train_dataset.batch(2000), validation_data=valid_dataset.batch(1000), epochs=kEpochs) - os.makedirs(output_directory, exist_ok=True) +print("Start training") +gpus = tf.config.list_logical_devices("GPU") +if gpus: + strategy = tf.distribute.MirroredStrategy(devices=[d.name for d in gpus]) +else: + strategy = tf.distribute.get_strategy() + +history = None +with strategy.scope(): + train_dataset = make_dataset((xtrain, ltrain, wtrain)) + valid_dataset = make_dataset((xvalid, lvalid, wvalid)) + + options = tf.data.Options() + options.experimental_distribute.auto_shard_policy = tf.data.experimental.AutoShardPolicy.DATA + train_dataset = train_dataset.with_options(options) + valid_dataset = valid_dataset.with_options(options) + + model = build_model(input_shape=xtrain.shape[1:]) + model.compile( + optimizer=keras.optimizers.Adam(learning_rate=1e-3), + loss=keras.losses.SparseCategoricalCrossentropy(from_logits=False), + weighted_metrics=["accuracy"], + ) + history = model.fit( + train_dataset.batch(2000), + validation_data=valid_dataset.batch(1000), + epochs=kEpochs + ) + os.makedirs(output_directory, exist_ok=True) #+end_src +* Export ONNX + #+begin_src jupyter-python - import keras.backend as K - # Monkey-patch the missing function to avoid the crash - K.set_learning_phase = lambda flag: None - - import tensorflow as tf - import tf2onnx - - # Load your Keras model - #model = tf.keras.models.load_model("/epi_separation/results/45to135deg/1GeV/data/cnn_model_30epochs.h5") - - # Define a function to capture the input signature - @tf.function(input_signature=[tf.TensorSpec(shape=[None, *model.input_shape[1:]], dtype=tf.float32)]) - def model_fn(input_tensor): - return model(input_tensor) - - # Convert to ONNX format - onnx_model, _ = tf2onnx.convert.from_function( - model_fn, - input_signature=[tf.TensorSpec(shape=[None, *model.input_shape[1:]], dtype=tf.float32)], # This is important - opset=13, - output_path=f"{output_directory}/EcalBarrel_pi_rejection.onnx" - ) - - print("Model converted successfully to ONNX format!") +import keras.backend as K +K.set_learning_phase = lambda flag: None + +import tf2onnx + +@tf.function(input_signature=[tf.TensorSpec(shape=[None, *model.input_shape[1:]], dtype=tf.float32)]) +def model_fn(input_tensor): + return model(input_tensor) + +onnx_model, _ = tf2onnx.convert.from_function( + model_fn, + input_signature=[tf.TensorSpec(shape=[None, *model.input_shape[1:]], dtype=tf.float32)], + opset=13, + output_path=f"{output_directory}/EcalBarrel_pi_rejection.onnx" +) + +print("Model converted successfully to ONNX format!") #+end_src +* Learning curves + #+begin_src jupyter-python - print('Summarizing metrics') - fig, ax = plt.subplots(1, 2, figsize=(12,6)) - - ax[0].plot(history.history['loss']) - ax[0].plot(history.history['val_loss']) - ax[0].set_title('model loss') - ax[0].set_ylabel('loss') - ax[0].set_xlabel('epoch') - ax[0].legend(['train', 'validate'], loc='upper left') - - ax[1].plot(history.history['accuracy']) - ax[1].plot(history.history['val_accuracy']) - ax[1].set_title('accuracy') - ax[1].set_ylabel('accuracy') - ax[1].set_xlabel('epoch') - ax[1].legend(['train', 'validate'], loc='upper left') - ax[1].set_ylim(0, 1.1) - - fig.savefig(f'{plotdir}/ML_learning.pdf') +print("Summarizing metrics") +fig, ax = plt.subplots(1, 2, figsize=(12, 6)) + +ax[0].plot(history.history["loss"]) +ax[0].plot(history.history["val_loss"]) +ax[0].set_title("model loss") +ax[0].set_ylabel("loss") +ax[0].set_xlabel("epoch") +ax[0].legend(["train", "validate"], loc="upper left") + +ax[1].plot(history.history["accuracy"]) +ax[1].plot(history.history["val_accuracy"]) +ax[1].set_title("accuracy") +ax[1].set_ylabel("accuracy") +ax[1].set_xlabel("epoch") +ax[1].legend(["train", "validate"], loc="upper left") +ax[1].set_ylim(0, 1.1) + +fig.savefig(f"{plotdir}/ML_learning.pdf") +plt.close(fig) #+end_src +* Evaluation + #+begin_src jupyter-python - print('Benchmarking test data') - # benchmark - test_dataset = make_dataset((xtest,)) - prediction = model.predict(test_dataset.batch(1000)) +print("Benchmarking test data") +test_dataset = make_dataset((xtest,)) +prediction = model.predict(test_dataset.batch(1000)) #+end_src #+begin_src jupyter-python - print('Calculate aggregate e-pi rejection metrics') - - def calculate_metrics(target_efficiency=kTargetEfficiencyML, export_prediction=True): - ## find the target efficiency cut point and weight the electron results - ## to move the cross-over point into pions to fit this efficiency - ## this code is specific to two particles where (P_e + P_pi = 1) - efficiency_cut = np.percentile(prediction[ltest == kElectronLabel].T[kElectronLabel], - (1 - target_efficiency)*100) - target_weight = (1 - efficiency_cut) / efficiency_cut - - prediction_weights = np.ones(2) - prediction_weights[kElectronLabel] = target_weight - prediction_labels = np.argmax(prediction * prediction_weights, axis=1) - - - electron_predicted = [None, None] - probabilities = np.zeros(shape=(2,2)) - for i in [kPionLabel, kElectronLabel]: - mask = (ltest == i) - probabilities[i] = np.bincount(prediction_labels[mask])/float(np.sum(mask)) - electron_predicted[i] = prediction[mask].T[kElectronLabel] - - binomial_error = lambda eff, n: np.sqrt(n * eff * (1 - eff)) / n - inverse_error = lambda val, err: err / val**2 - - n_electron_test = np.sum(ltest == kElectronLabel) - n_pion_test = np.sum(ltest == kPionLabel) - - results_ML = OrderedDict({'target_particle': 'e-', - 'target_weight': target_weight, - 'target_efficiency': target_efficiency, - 'target_cut': efficiency_cut, - 'n_electrons': int(n_electron_test), - 'n_pions': int(n_pion_test), - 'probabilities': probabilities.tolist(), - 'efficiency': probabilities[kElectronLabel, kElectronLabel], - 'efficiency_error': binomial_error(probabilities[kElectronLabel, kElectronLabel], n_electron_test), - 'rejection': 1 / probabilities[kPionLabel, kElectronLabel], - 'rejection_error': inverse_error(probabilities[kPionLabel, kElectronLabel], binomial_error(probabilities[kPionLabel, kElectronLabel], n_pion_test))}) - - ## calculate aggregate results from E/P + ML - results = OrderedDict({ - 'energy': energy_GeV, - 'eta': etas[angle_label], - 'angle': angle_label, - 'efficiency': results_EoverP['efficiency'] * results_ML['efficiency'], - 'efficiency_error': np.sqrt(results_EoverP['efficiency']**2 * results_ML['efficiency_error']**2 - + results_ML['efficiency']**2 * results_EoverP['efficiency_error']**2), - 'rejection': results_EoverP['rejection'] * results_ML['rejection'], - 'rejection_error': np.sqrt(results_EoverP['rejection']**2 * results_ML['rejection_error']**2 - + results_ML['rejection']**2 * results_EoverP['rejection_error']**2), - 'prob_cut': efficiency_cut, - 'EoverP': results_EoverP, - 'ML': results_ML}) - if export_prediction: - return results, electron_predicted - return results +print("Calculate aggregate e-pi rejection metrics") + +def calculate_metrics(target_efficiency=kTargetEfficiencyML, export_prediction=True): + efficiency_cut = np.percentile( + prediction[ltest == kElectronLabel].T[kElectronLabel], + (1 - target_efficiency) * 100 + ) + target_weight = (1 - efficiency_cut) / efficiency_cut + + prediction_weights = np.ones(2) + prediction_weights[kElectronLabel] = target_weight + prediction_labels = np.argmax(prediction * prediction_weights, axis=1) + + electron_predicted = [None, None] + probabilities = np.zeros(shape=(2, 2)) + for i in [kPionLabel, kElectronLabel]: + mask = (ltest == i) + probabilities[i] = np.bincount(prediction_labels[mask], minlength=2) / float(np.sum(mask)) + electron_predicted[i] = prediction[mask].T[kElectronLabel] + + binomial_error = lambda eff, n: np.sqrt(n * eff * (1 - eff)) / n + inverse_error = lambda val, err: err / val**2 + + n_electron_test = np.sum(ltest == kElectronLabel) + n_pion_test = np.sum(ltest == kPionLabel) + + results_ML = OrderedDict({ + "target_particle": "e-", + "target_weight": float(target_weight), + "target_efficiency": float(target_efficiency), + "target_cut": float(efficiency_cut), + "n_electrons": int(n_electron_test), + "n_pions": int(n_pion_test), + "probabilities": probabilities.tolist(), + "efficiency": float(probabilities[kElectronLabel, kElectronLabel]), + "efficiency_error": float(binomial_error(probabilities[kElectronLabel, kElectronLabel], n_electron_test)), + "rejection": float(1 / probabilities[kPionLabel, kElectronLabel]), + "rejection_error": float(inverse_error( + probabilities[kPionLabel, kElectronLabel], + binomial_error(probabilities[kPionLabel, kElectronLabel], n_pion_test) + )), + }) + + results = OrderedDict({ + "energy": float(energy_GeV), + "eta": float(etas[angle_label]), + "angle": angle_label, + "efficiency": float(results_EoverP["efficiency"] * results_ML["efficiency"]), + "efficiency_error": float(np.sqrt( + results_EoverP["efficiency"]**2 * results_ML["efficiency_error"]**2 + + results_ML["efficiency"]**2 * results_EoverP["efficiency_error"]**2 + )), + "rejection": float(results_EoverP["rejection"] * results_ML["rejection"]), + "rejection_error": float(np.sqrt( + results_EoverP["rejection"]**2 * results_ML["rejection_error"]**2 + + results_ML["rejection"]**2 * results_EoverP["rejection_error"]**2 + )), + "prob_cut": float(efficiency_cut), + "EoverP": results_EoverP, + "ML": results_ML, + }) + + if export_prediction: + return results, electron_predicted + return results #+end_src #+begin_src jupyter-python - results, electron_predicted = calculate_metrics() - results_ML = results['ML'] - test = electron_predicted - print(f'Calculating alternative target efficiency scenarios: {kAlternativeEfficiencies}') - results['scenarios'] = {} - for alternative_eff in kAlternativeEfficiencies: - target_eff_ml = alternative_eff / results_EoverP['efficiency'] - tmp_res = calculate_metrics(target_efficiency=target_eff_ml, export_prediction=False) - results['scenarios'][alternative_eff] = tmp_res +results, electron_predicted = calculate_metrics() +results_ML = results["ML"] +test = electron_predicted + +print(f"Calculating alternative target efficiency scenarios: {kAlternativeEfficiencies}") +results["scenarios"] = {} +for alternative_eff in kAlternativeEfficiencies: + target_eff_ml = alternative_eff / results_EoverP["efficiency"] + tmp_res = calculate_metrics(target_efficiency=target_eff_ml, export_prediction=False) + results["scenarios"][float(alternative_eff)] = tmp_res #+end_src #+begin_src jupyter-python - assert test is electron_predicted +assert test is electron_predicted + +with open(f"{output_directory}/results.json", "w") as f: + f.write(json.dumps(results, indent=2)) - with open(f'{output_directory}/results.json', 'w') as f: - f.write(json.dumps(results, indent=2)) - print(f' - Found overall rejection {results["rejection"]:.2f} at {results["efficiency"]:.2f} efficiency') - print(f' - Results written to {output_directory}/results.json') +print(f' - Found overall rejection {results["rejection"]:.2f} at {results["efficiency"]:.2f} efficiency') +print(f" - Results written to {output_directory}/results.json") #+end_src +* Rejection plot + #+begin_src jupyter-python - print('Plotting ML results') - # default color cycle of matplotlib - prop_cycle = plt.rcParams['axes.prop_cycle'] - colors = prop_cycle.by_key()['color'] - box_props = dict(boxstyle='round', facecolor='white', alpha=0.5) - - parts = {kElectronLabel: r'e^-', kPionLabel: r'\pi^-'} - - fig, ax = plt.subplots(figsize=(12, 9), dpi=160) - effs = [] - for i in parts.keys(): - ax.hist(electron_predicted[i], bins=np.linspace(0, 1, 101), label='${}$'.format(parts[i]), - color=colors[i], ec=colors[i], alpha=0.5) - ax.axvline(x=results['prob_cut'], lw=2, color='k', ls='--') - eff_text = '\n'.join([r'$\epsilon_{{ML}}^{{e^-}} = {:.2f}$%'.format(results_ML['efficiency'] * 100.), - r'$R_{{ML}}^{{\pi^-}} = {:.1f}$'.format(results_ML['rejection']), - r'$\epsilon_{{E/p}}^{{e^-}} = {:.2f}$%'.format(results_EoverP['efficiency'] * 100.), - r'$R_{{E/p}}^{{\pi^-}} = {:.1f}$'.format(results_EoverP['rejection']) - ]) - data_to_axis = (ax.transAxes + ax.transData.inverted()).inverted() - ax.text(data_to_axis.transform((results['prob_cut'], 1))[0] + 0.01, 0.99, eff_text, fontsize=24, - transform=ax.transAxes, ha='left', va='top') - ax.set_yscale('log') - ax.set_ylabel('Counts', fontsize=24) - ax.set_xlabel(r'$P_{{{}}}$'.format(r'e^-'), fontsize=24) - ax.tick_params(direction='in', which='both', labelsize=24) - ax.legend(fontsize=24, ncol=4, loc='upper center', bbox_to_anchor=(0.5, 1.12),) - ax.text(0.05, .99, '\n'.join( - [r'{energy} at ${loc}$'.format(energy='1GeV', - loc=f'eta = {etas[angle_label]}'), - r'$R_{{\pi}} = {rejection:.1f}$ at $\epsilon_{{e^-}} = {efficiency:.2f}$%'.format( - rejection=results_EoverP['rejection'] * results_ML['rejection'], - efficiency=results_EoverP['efficiency'] * results_ML['efficiency'] * 100.)]), - ha='left', va='top', fontsize=24, transform=ax.transAxes) - fig.savefig(f'{plotdir}/ML_rejection.pdf') - - print('Done with this eta bin') +print("Plotting ML results") +prop_cycle = plt.rcParams["axes.prop_cycle"] +colors = prop_cycle.by_key()["color"] +parts = {kElectronLabel: r"e^-", kPionLabel: r"\pi^-"} + +fig, ax = plt.subplots(figsize=(12, 9), dpi=160) +for i in parts.keys(): + ax.hist( + electron_predicted[i], + bins=np.linspace(0, 1, 101), + label=f'${parts[i]}$', + color=colors[i], + ec=colors[i], + alpha=0.5 + ) + +ax.axvline(x=results["prob_cut"], lw=2, color="k", ls="--") + +eff_text = "\n".join([ + rf'$\epsilon_{{ML}}^{{e^-}} = {results_ML["efficiency"] * 100.:.2f}$%', + rf'$R_{{ML}}^{{\pi^-}} = {results_ML["rejection"]:.1f}$', + rf'$\epsilon_{{E/p}}^{{e^-}} = {results_EoverP["efficiency"] * 100.:.2f}$%', + rf'$R_{{E/p}}^{{\pi^-}} = {results_EoverP["rejection"]:.1f}$', +]) + +data_to_axis = (ax.transAxes + ax.transData.inverted()).inverted() +ax.text( + data_to_axis.transform((results["prob_cut"], 1))[0] + 0.01, + 0.99, + eff_text, + fontsize=24, + transform=ax.transAxes, + ha="left", + va="top" +) + +ax.set_yscale("log") +ax.set_ylabel("Counts", fontsize=24) +ax.set_xlabel(r"$P_{e^-}$", fontsize=24) +ax.tick_params(direction="in", which="both", labelsize=24) +ax.legend(fontsize=24, ncol=4, loc="upper center", bbox_to_anchor=(0.5, 1.12)) + +ax.text( + 0.05, + 0.99, + "\n".join([ + rf"{energy_setting} at $\eta = {etas[angle_label]:.3f}$", + rf'$R_{{\pi}} = {results["rejection"]:.1f}$ at $\epsilon_{{e^-}} = {results["efficiency"] * 100.:.2f}$%', + ]), + ha="left", + va="top", + fontsize=24, + transform=ax.transAxes +) + +fig.savefig(f"{plotdir}/ML_rejection.pdf") +plt.close(fig) + +print("Done with this eta bin") #+end_src \ No newline at end of file From abf19f75026917c0089a0cf39b373b819365692f Mon Sep 17 00:00:00 2001 From: Tomas Sosa Date: Mon, 20 Apr 2026 00:06:50 -0500 Subject: [PATCH 22/58] Changes in config and requirements --- benchmarks/bic_pid/config.yml | 4 ++++ benchmarks/bic_pid/requirements.txt | 12 ++++++------ 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/benchmarks/bic_pid/config.yml b/benchmarks/bic_pid/config.yml index a0e44016..d7cb216d 100644 --- a/benchmarks/bic_pid/config.yml +++ b/benchmarks/bic_pid/config.yml @@ -58,8 +58,12 @@ bench:bic_pid: - find sim_output/bic_pid | head -50 || true - ls -lrtha script: + - python -m pip install --upgrade pip + - python -m pip uninstall -y tensorflow tensorflow-cpu tf-keras keras || true + - python -m pip install "tensorflow==2.18.0" - python -m pip install snakemake - python -m pip install -r benchmarks/bic_pid/requirements.txt + - python -c "import tensorflow as tf; print('TF OK:', tf.__version__)" - snakemake $SNAKEMAKE_FLAGS --cores 1 results/${DETECTOR_CONFIG}/bic_pid diff --git a/benchmarks/bic_pid/requirements.txt b/benchmarks/bic_pid/requirements.txt index d32037a3..94afc652 100644 --- a/benchmarks/bic_pid/requirements.txt +++ b/benchmarks/bic_pid/requirements.txt @@ -1,7 +1,7 @@ -awkward >= 2.4.0 -catboost +pandas +numpy +matplotlib +tensorflow==2.18.0 +tf2onnx onnx -scikit-learn -uproot >= 5.2.0 -vector -tf2onnx \ No newline at end of file +pyarrow \ No newline at end of file From 2153f438ab1bc044c2782db54ccac27abf1cf3c8 Mon Sep 17 00:00:00 2001 From: Tomas Sosa Date: Mon, 20 Apr 2026 08:57:54 -0500 Subject: [PATCH 23/58] Solve problem with requirements of tf --- benchmarks/bic_pid/config.yml | 17 +++++++++-------- benchmarks/bic_pid/requirements.txt | 13 +++++++------ 2 files changed, 16 insertions(+), 14 deletions(-) diff --git a/benchmarks/bic_pid/config.yml b/benchmarks/bic_pid/config.yml index d7cb216d..787b69eb 100644 --- a/benchmarks/bic_pid/config.yml +++ b/benchmarks/bic_pid/config.yml @@ -57,14 +57,15 @@ bench:bic_pid: - mkdir -p "$SNAKEMAKE_OUTPUT_CACHE" - find sim_output/bic_pid | head -50 || true - ls -lrtha - script: - - python -m pip install --upgrade pip - - python -m pip uninstall -y tensorflow tensorflow-cpu tf-keras keras || true - - python -m pip install "tensorflow==2.18.0" - - python -m pip install snakemake - - python -m pip install -r benchmarks/bic_pid/requirements.txt - - python -c "import tensorflow as tf; print('TF OK:', tf.__version__)" - - snakemake $SNAKEMAKE_FLAGS --cores 1 results/${DETECTOR_CONFIG}/bic_pid +script: + - python -m pip install snakemake + - python -m pip uninstall -y tensorflow tensorflow-cpu tf-keras || true + - python -m pip install --no-cache-dir --force-reinstall -r benchmarks/bic_pid/requirements.txt + - python - <<'PY' +import tensorflow as tf +print("TF OK:", tf.__version__) +PY + - snakemake $SNAKEMAKE_FLAGS --cores 1 results/${DETECTOR_CONFIG}/bic_pid collect_results:bic_pid: diff --git a/benchmarks/bic_pid/requirements.txt b/benchmarks/bic_pid/requirements.txt index 94afc652..3cc2b61a 100644 --- a/benchmarks/bic_pid/requirements.txt +++ b/benchmarks/bic_pid/requirements.txt @@ -1,7 +1,8 @@ -pandas -numpy -matplotlib -tensorflow==2.18.0 -tf2onnx +awkward>=2.4.0 +catboost onnx -pyarrow \ No newline at end of file +scikit-learn +uproot>=5.2.0 +vector +tf2onnx +tensorflow==2.20.0 \ No newline at end of file From 2759b0e258595e54077606f93a5eb688f462fb65 Mon Sep 17 00:00:00 2001 From: Tomas Sosa Date: Mon, 20 Apr 2026 09:04:40 -0500 Subject: [PATCH 24/58] SOlve problem with config file --- benchmarks/bic_pid/config.yml | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/benchmarks/bic_pid/config.yml b/benchmarks/bic_pid/config.yml index 787b69eb..67bf2e64 100644 --- a/benchmarks/bic_pid/config.yml +++ b/benchmarks/bic_pid/config.yml @@ -61,10 +61,7 @@ script: - python -m pip install snakemake - python -m pip uninstall -y tensorflow tensorflow-cpu tf-keras || true - python -m pip install --no-cache-dir --force-reinstall -r benchmarks/bic_pid/requirements.txt - - python - <<'PY' -import tensorflow as tf -print("TF OK:", tf.__version__) -PY + - python -c "import tensorflow as tf; print('TF OK:', tf.__version__)" - snakemake $SNAKEMAKE_FLAGS --cores 1 results/${DETECTOR_CONFIG}/bic_pid From 9749d8f9ef80e20ddb69358f3cc14484e348321c Mon Sep 17 00:00:00 2001 From: Tomas Sosa Date: Mon, 20 Apr 2026 09:11:14 -0500 Subject: [PATCH 25/58] Solve problem with config file --- benchmarks/bic_pid/config.yml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/benchmarks/bic_pid/config.yml b/benchmarks/bic_pid/config.yml index 67bf2e64..8021155e 100644 --- a/benchmarks/bic_pid/config.yml +++ b/benchmarks/bic_pid/config.yml @@ -57,12 +57,12 @@ bench:bic_pid: - mkdir -p "$SNAKEMAKE_OUTPUT_CACHE" - find sim_output/bic_pid | head -50 || true - ls -lrtha -script: - - python -m pip install snakemake - - python -m pip uninstall -y tensorflow tensorflow-cpu tf-keras || true - - python -m pip install --no-cache-dir --force-reinstall -r benchmarks/bic_pid/requirements.txt - - python -c "import tensorflow as tf; print('TF OK:', tf.__version__)" - - snakemake $SNAKEMAKE_FLAGS --cores 1 results/${DETECTOR_CONFIG}/bic_pid + script: + - python -m pip install snakemake + - python -m pip uninstall -y tensorflow tensorflow-cpu tf-keras || true + - python -m pip install --no-cache-dir --force-reinstall -r benchmarks/bic_pid/requirements.txt + - python -c "import tensorflow as tf; print('TF OK:', tf.__version__)" + - snakemake $SNAKEMAKE_FLAGS --cores 1 results/${DETECTOR_CONFIG}/bic_pid collect_results:bic_pid: From 3447243953510a3607c208a62befcb03431dc4b9 Mon Sep 17 00:00:00 2001 From: Tomas Sosa Date: Mon, 20 Apr 2026 12:50:30 -0500 Subject: [PATCH 26/58] Solve problem with bench:bic_pid script --- benchmarks/bic_pid/config.yml | 4 ++-- benchmarks/bic_pid/requirements.txt | 3 ++- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/benchmarks/bic_pid/config.yml b/benchmarks/bic_pid/config.yml index 8021155e..27c99780 100644 --- a/benchmarks/bic_pid/config.yml +++ b/benchmarks/bic_pid/config.yml @@ -59,8 +59,8 @@ bench:bic_pid: - ls -lrtha script: - python -m pip install snakemake - - python -m pip uninstall -y tensorflow tensorflow-cpu tf-keras || true - - python -m pip install --no-cache-dir --force-reinstall -r benchmarks/bic_pid/requirements.txt + - python -m pip install -r benchmarks/bic_pid/requirements.txt + - python -m pip install --upgrade --force-reinstall "protobuf==5.28.3" "python-dateutil==2.9.0.post0" - python -c "import tensorflow as tf; print('TF OK:', tf.__version__)" - snakemake $SNAKEMAKE_FLAGS --cores 1 results/${DETECTOR_CONFIG}/bic_pid diff --git a/benchmarks/bic_pid/requirements.txt b/benchmarks/bic_pid/requirements.txt index 3cc2b61a..81d10e68 100644 --- a/benchmarks/bic_pid/requirements.txt +++ b/benchmarks/bic_pid/requirements.txt @@ -5,4 +5,5 @@ scikit-learn uproot>=5.2.0 vector tf2onnx -tensorflow==2.20.0 \ No newline at end of file +protobuf==5.28.3 +python-dateutil==2.9.0.post0 \ No newline at end of file From 627146bf9b97d230ff833faad344ca0c496e1868 Mon Sep 17 00:00:00 2001 From: Tomas Sosa Date: Sun, 26 Apr 2026 14:06:13 -0500 Subject: [PATCH 27/58] Solve the problem with transferred_sim_output --- benchmarks/bic_pid/config.yml | 23 +---------------------- 1 file changed, 1 insertion(+), 22 deletions(-) diff --git a/benchmarks/bic_pid/config.yml b/benchmarks/bic_pid/config.yml index 27c99780..f336beab 100644 --- a/benchmarks/bic_pid/config.yml +++ b/benchmarks/bic_pid/config.yml @@ -26,37 +26,17 @@ sim:bic_pid: - | snakemake $SNAKEMAKE_FLAGS --cores 1 \ $(seq --format="sim_output/bic_pid/${DETECTOR_CONFIG}/${PARTICLE}/${ENERGY}/${ANGLE}/${PARTICLE}_${ENERGY}_${ANGLE}.%04.f.eicrecon.edm4eic.root" ${INDEX_RANGE}) - - mkdir -p transferred_sim_output - - cp -aL sim_output/bic_pid transferred_sim_output/ - artifacts: - when: always - paths: - - transferred_sim_output/ - - .snakemake/log/ bench:bic_pid: extends: .det_benchmark stage: benchmarks needs: - - "common:setup" - - "sim:bic_pid" + - ["sim:bic_pid"] image: $BENCHMARKS_REGISTRY/eic_tf$BENCHMARKS_SIGIL$BENCHMARKS_TAG variables: CUDA_VISIBLE_DEVICES: "" DETECTOR_CONFIG: epic_craterlake - before_script: - - source .local/bin/env.sh - - ls -lrtha - - find transferred_sim_output/bic_pid | head -50 || true - - mkdir -p sim_output - - cp -a transferred_sim_output/bic_pid sim_output/ - - mkdir -p "${DETECTOR_CONFIG}" - - ln -s ../sim_output "${DETECTOR_CONFIG}/sim_output" - - ln -s ../results "${DETECTOR_CONFIG}/results" - - mkdir -p "$SNAKEMAKE_OUTPUT_CACHE" - - find sim_output/bic_pid | head -50 || true - - ls -lrtha script: - python -m pip install snakemake - python -m pip install -r benchmarks/bic_pid/requirements.txt @@ -69,7 +49,6 @@ collect_results:bic_pid: extends: .det_benchmark stage: collect needs: - - "common:setup" - "bench:bic_pid" when: always image: $BENCHMARKS_REGISTRY/eic_ci$BENCHMARKS_SIGIL$BENCHMARKS_TAG From 3c47eab73b03670ad875fc9fcd25e8574bcaa8b7 Mon Sep 17 00:00:00 2001 From: Tomas Sosa Date: Sun, 26 Apr 2026 18:04:06 -0500 Subject: [PATCH 28/58] Restore bench:bic_pid --- benchmarks/bic_pid/config.yml | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/benchmarks/bic_pid/config.yml b/benchmarks/bic_pid/config.yml index f336beab..c8c02fa1 100644 --- a/benchmarks/bic_pid/config.yml +++ b/benchmarks/bic_pid/config.yml @@ -37,6 +37,16 @@ bench:bic_pid: variables: CUDA_VISIBLE_DEVICES: "" DETECTOR_CONFIG: epic_craterlake + before_script: + - source .local/bin/env.sh + - ls -lrtha + - ln -s "${LOCAL_DATA_PATH}/sim_output" sim_output + - mkdir -p "${DETECTOR_CONFIG}" + - ln -s "${LOCAL_DATA_PATH}/sim_output" "${DETECTOR_CONFIG}/sim_output" + - ln -s "../results" "${DETECTOR_CONFIG}/results" + - mkdir -p "$SNAKEMAKE_OUTPUT_CACHE" + - find sim_output/bic_pid/${DETECTOR_CONFIG} | head -50 || true + - ls -lrtha script: - python -m pip install snakemake - python -m pip install -r benchmarks/bic_pid/requirements.txt From b20e2cfbdfa9856cd5fa646f0a68da4b3fada0a7 Mon Sep 17 00:00:00 2001 From: Tomas Sosa Date: Sun, 26 Apr 2026 19:16:03 -0500 Subject: [PATCH 29/58] Solve problem with the reinstalled Tensorflow --- benchmarks/bic_pid/requirements.txt | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/benchmarks/bic_pid/requirements.txt b/benchmarks/bic_pid/requirements.txt index 81d10e68..88e48536 100644 --- a/benchmarks/bic_pid/requirements.txt +++ b/benchmarks/bic_pid/requirements.txt @@ -1,9 +1 @@ -awkward>=2.4.0 -catboost -onnx -scikit-learn -uproot>=5.2.0 -vector -tf2onnx -protobuf==5.28.3 -python-dateutil==2.9.0.post0 \ No newline at end of file +tf2onnx==1.17.0 \ No newline at end of file From 0ff7adb84707b99315c413dd3921b334a59b180a Mon Sep 17 00:00:00 2001 From: Tomas Sosa Date: Sun, 26 Apr 2026 19:38:21 -0500 Subject: [PATCH 30/58] Delete line in config.yml file --- benchmarks/bic_pid/config.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/benchmarks/bic_pid/config.yml b/benchmarks/bic_pid/config.yml index c8c02fa1..ff198a1f 100644 --- a/benchmarks/bic_pid/config.yml +++ b/benchmarks/bic_pid/config.yml @@ -50,7 +50,6 @@ bench:bic_pid: script: - python -m pip install snakemake - python -m pip install -r benchmarks/bic_pid/requirements.txt - - python -m pip install --upgrade --force-reinstall "protobuf==5.28.3" "python-dateutil==2.9.0.post0" - python -c "import tensorflow as tf; print('TF OK:', tf.__version__)" - snakemake $SNAKEMAKE_FLAGS --cores 1 results/${DETECTOR_CONFIG}/bic_pid From db336474b6bb5a0ca86a30b66bd9163ca9a80ab7 Mon Sep 17 00:00:00 2001 From: Tomas Sosa Date: Sun, 26 Apr 2026 20:41:28 -0500 Subject: [PATCH 31/58] Solve protobuf mismatch in config.yml file --- benchmarks/bic_pid/config.yml | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/benchmarks/bic_pid/config.yml b/benchmarks/bic_pid/config.yml index ff198a1f..3c4155aa 100644 --- a/benchmarks/bic_pid/config.yml +++ b/benchmarks/bic_pid/config.yml @@ -48,9 +48,12 @@ bench:bic_pid: - find sim_output/bic_pid/${DETECTOR_CONFIG} | head -50 || true - ls -lrtha script: - - python -m pip install snakemake - - python -m pip install -r benchmarks/bic_pid/requirements.txt - - python -c "import tensorflow as tf; print('TF OK:', tf.__version__)" + - export PYTHONUSERBASE=$LOCAL_DATA_PATH/deps + - export PATH=$PYTHONUSERBASE/bin:$PATH + - python -m pip install --user snakemake + - python -m pip install --user -r benchmarks/bic_pid/requirements.txt + - python -m pip install --user --ignore-installed --no-deps "protobuf==5.28.3" + - python -c "import google.protobuf; print('protobuf', google.protobuf.__version__, google.protobuf.__file__); import tensorflow as tf; print('TF OK:', tf.__version__)" - snakemake $SNAKEMAKE_FLAGS --cores 1 results/${DETECTOR_CONFIG}/bic_pid From 337f49734d3d74a4ce8a04b2ab04c16dd51ee3da Mon Sep 17 00:00:00 2001 From: Tomas Sosa Date: Sun, 3 May 2026 11:10:31 -0500 Subject: [PATCH 32/58] Test new eic_tf --- benchmarks/bic_pid/config.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmarks/bic_pid/config.yml b/benchmarks/bic_pid/config.yml index 3c4155aa..d58fb67e 100644 --- a/benchmarks/bic_pid/config.yml +++ b/benchmarks/bic_pid/config.yml @@ -71,4 +71,4 @@ collect_results:bic_pid: - ls -lrht - mv results{,_save}/ - snakemake $SNAKEMAKE_FLAGS --cores 1 --delete-all-output results/${DETECTOR_CONFIG}/bic_pid - - mv results{_save,}/ \ No newline at end of file + - mv results{_save,}/ From 1fbe7bde94d04ebf1e4865937bc184687667e1ec Mon Sep 17 00:00:00 2001 From: Tomas Sosa Date: Sun, 3 May 2026 13:37:34 -0500 Subject: [PATCH 33/58] Solve problem with --user installation --- benchmarks/bic_pid/config.yml | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/benchmarks/bic_pid/config.yml b/benchmarks/bic_pid/config.yml index d58fb67e..e529c2ee 100644 --- a/benchmarks/bic_pid/config.yml +++ b/benchmarks/bic_pid/config.yml @@ -48,12 +48,8 @@ bench:bic_pid: - find sim_output/bic_pid/${DETECTOR_CONFIG} | head -50 || true - ls -lrtha script: - - export PYTHONUSERBASE=$LOCAL_DATA_PATH/deps - - export PATH=$PYTHONUSERBASE/bin:$PATH - - python -m pip install --user snakemake - - python -m pip install --user -r benchmarks/bic_pid/requirements.txt - - python -m pip install --user --ignore-installed --no-deps "protobuf==5.28.3" - - python -c "import google.protobuf; print('protobuf', google.protobuf.__version__, google.protobuf.__file__); import tensorflow as tf; print('TF OK:', tf.__version__)" + - python -m pip install snakemake + - python -c "import google.protobuf, tensorflow as tf, tf2onnx; print('protobuf', google.protobuf.__version__); print('TF OK:', tf.__version__); print('tf2onnx OK:', tf2onnx.__version__)" - snakemake $SNAKEMAKE_FLAGS --cores 1 results/${DETECTOR_CONFIG}/bic_pid From aec9db4a105dae8c14029d9165009a85a36266d2 Mon Sep 17 00:00:00 2001 From: Tomas Sosa Date: Sun, 3 May 2026 17:50:23 -0500 Subject: [PATCH 34/58] Use temporary solution for protobuf --- benchmarks/bic_pid/config.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/benchmarks/bic_pid/config.yml b/benchmarks/bic_pid/config.yml index e529c2ee..c350714c 100644 --- a/benchmarks/bic_pid/config.yml +++ b/benchmarks/bic_pid/config.yml @@ -48,6 +48,7 @@ bench:bic_pid: - find sim_output/bic_pid/${DETECTOR_CONFIG} | head -50 || true - ls -lrtha script: + - export TEMPORARILY_DISABLE_PROTOBUF_VERSION_CHECK=true - python -m pip install snakemake - python -c "import google.protobuf, tensorflow as tf, tf2onnx; print('protobuf', google.protobuf.__version__); print('TF OK:', tf.__version__); print('tf2onnx OK:', tf2onnx.__version__)" - snakemake $SNAKEMAKE_FLAGS --cores 1 results/${DETECTOR_CONFIG}/bic_pid From c6ad32bbd7ac7f47e75537ae31bac3f008b37492 Mon Sep 17 00:00:00 2001 From: Tomas Sosa Date: Sun, 3 May 2026 18:26:41 -0500 Subject: [PATCH 35/58] Inspect mismatch error --- benchmarks/bic_pid/config.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/benchmarks/bic_pid/config.yml b/benchmarks/bic_pid/config.yml index c350714c..b6ccbaa0 100644 --- a/benchmarks/bic_pid/config.yml +++ b/benchmarks/bic_pid/config.yml @@ -50,6 +50,8 @@ bench:bic_pid: script: - export TEMPORARILY_DISABLE_PROTOBUF_VERSION_CHECK=true - python -m pip install snakemake + - python -c "import google.protobuf.runtime_version as rv, pathlib; print(rv.__file__); text = pathlib.Path(rv.__file__).read_text(); print('flag_present=', 'TEMPORARILY_DISABLE_PROTOBUF_VERSION_CHECK' in text)" + - python -c "import os; print('TEMP FLAG =', os.getenv('TEMPORARILY_DISABLE_PROTOBUF_VERSION_CHECK'))" - python -c "import google.protobuf, tensorflow as tf, tf2onnx; print('protobuf', google.protobuf.__version__); print('TF OK:', tf.__version__); print('tf2onnx OK:', tf2onnx.__version__)" - snakemake $SNAKEMAKE_FLAGS --cores 1 results/${DETECTOR_CONFIG}/bic_pid From cc3a60e84c323657a351ddfa4dbd494cd7e0ce00 Mon Sep 17 00:00:00 2001 From: Tomas Sosa Date: Wed, 13 May 2026 12:54:23 -0500 Subject: [PATCH 36/58] Test new protobuf --- benchmarks/bic_pid/config.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmarks/bic_pid/config.yml b/benchmarks/bic_pid/config.yml index b6ccbaa0..ff8009d2 100644 --- a/benchmarks/bic_pid/config.yml +++ b/benchmarks/bic_pid/config.yml @@ -20,7 +20,7 @@ sim:bic_pid: "70 79", "80 89", "90 99", - ] + ] script: - export DETECTOR_CONFIG=epic_craterlake - | From 4e26ec786698613d8afb59cc0f6e6b168cf0e7f3 Mon Sep 17 00:00:00 2001 From: Tomas Sosa Date: Sun, 17 May 2026 09:05:53 -0500 Subject: [PATCH 37/58] Test new environment solution --- benchmarks/bic_pid/config.yml | 18 +++++++++++------- benchmarks/bic_pid/requirements.txt | 7 ++++++- 2 files changed, 17 insertions(+), 8 deletions(-) diff --git a/benchmarks/bic_pid/config.yml b/benchmarks/bic_pid/config.yml index ff8009d2..3bef79b4 100644 --- a/benchmarks/bic_pid/config.yml +++ b/benchmarks/bic_pid/config.yml @@ -33,7 +33,7 @@ bench:bic_pid: stage: benchmarks needs: - ["sim:bic_pid"] - image: $BENCHMARKS_REGISTRY/eic_tf$BENCHMARKS_SIGIL$BENCHMARKS_TAG + image: $BENCHMARKS_REGISTRY/eic_ci$BENCHMARKS_SIGIL$BENCHMARKS_TAG variables: CUDA_VISIBLE_DEVICES: "" DETECTOR_CONFIG: epic_craterlake @@ -48,13 +48,17 @@ bench:bic_pid: - find sim_output/bic_pid/${DETECTOR_CONFIG} | head -50 || true - ls -lrtha script: - - export TEMPORARILY_DISABLE_PROTOBUF_VERSION_CHECK=true - - python -m pip install snakemake - - python -c "import google.protobuf.runtime_version as rv, pathlib; print(rv.__file__); text = pathlib.Path(rv.__file__).read_text(); print('flag_present=', 'TEMPORARILY_DISABLE_PROTOBUF_VERSION_CHECK' in text)" - - python -c "import os; print('TEMP FLAG =', os.getenv('TEMPORARILY_DISABLE_PROTOBUF_VERSION_CHECK'))" - - python -c "import google.protobuf, tensorflow as tf, tf2onnx; print('protobuf', google.protobuf.__version__); print('TF OK:', tf.__version__); print('tf2onnx OK:', tf2onnx.__version__)" - - snakemake $SNAKEMAKE_FLAGS --cores 1 results/${DETECTOR_CONFIG}/bic_pid + - | + python -m venv .venv + source .venv/bin/activate + + python -m pip install --upgrade pip setuptools wheel + python -m pip install snakemake + python -m pip install -r benchmarks/bic_pid/requirements.txt + + python -c "import tensorflow as tf, tf2onnx, pandas as pd, numpy as np, matplotlib, pyarrow; print('TF OK:', tf.__version__); print('tf2onnx OK:', tf2onnx.__version__)" + snakemake $SNAKEMAKE_FLAGS --cores 1 results/${DETECTOR_CONFIG}/bic_pid collect_results:bic_pid: extends: .det_benchmark diff --git a/benchmarks/bic_pid/requirements.txt b/benchmarks/bic_pid/requirements.txt index 88e48536..edf56c11 100644 --- a/benchmarks/bic_pid/requirements.txt +++ b/benchmarks/bic_pid/requirements.txt @@ -1 +1,6 @@ -tf2onnx==1.17.0 \ No newline at end of file +tensorflow-cpu==2.20.0 +tf2onnx==1.17.0 +numpy +pandas +matplotlib +pyarrow \ No newline at end of file From 6891692e3d72abe0c9b87fc8e7f018c23c3c3e38 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sun, 17 May 2026 14:07:40 +0000 Subject: [PATCH 38/58] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- benchmarks/bic_pid/Snakefile | 2 +- benchmarks/bic_pid/bic_pid.org | 2 +- benchmarks/bic_pid/requirements.txt | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/benchmarks/bic_pid/Snakefile b/benchmarks/bic_pid/Snakefile index 379667a2..eaeec6d0 100644 --- a/benchmarks/bic_pid/Snakefile +++ b/benchmarks/bic_pid/Snakefile @@ -126,4 +126,4 @@ INPUT_ELECTRONS="{input.electrons}" \ INPUT_PIONS="{input.pions}" \ OUTPUT_DIR={output} \ python {input.script} -""" \ No newline at end of file +""" diff --git a/benchmarks/bic_pid/bic_pid.org b/benchmarks/bic_pid/bic_pid.org index 35412310..6ef1e0c1 100644 --- a/benchmarks/bic_pid/bic_pid.org +++ b/benchmarks/bic_pid/bic_pid.org @@ -583,4 +583,4 @@ fig.savefig(f"{plotdir}/ML_rejection.pdf") plt.close(fig) print("Done with this eta bin") -#+end_src \ No newline at end of file +#+end_src diff --git a/benchmarks/bic_pid/requirements.txt b/benchmarks/bic_pid/requirements.txt index edf56c11..985e8845 100644 --- a/benchmarks/bic_pid/requirements.txt +++ b/benchmarks/bic_pid/requirements.txt @@ -3,4 +3,4 @@ tf2onnx==1.17.0 numpy pandas matplotlib -pyarrow \ No newline at end of file +pyarrow From 36cce02ba859275d1be3be7f80ecdae07454f722 Mon Sep 17 00:00:00 2001 From: Tomas Sosa Date: Sun, 17 May 2026 13:44:13 -0500 Subject: [PATCH 39/58] Add code to generate inputs --- benchmarks/bic_pid/bic_pid.org | 672 ++++++++++++++++++++++++++++ benchmarks/bic_pid/requirements.txt | 9 +- 2 files changed, 680 insertions(+), 1 deletion(-) diff --git a/benchmarks/bic_pid/bic_pid.org b/benchmarks/bic_pid/bic_pid.org index 35412310..f7d1f159 100644 --- a/benchmarks/bic_pid/bic_pid.org +++ b/benchmarks/bic_pid/bic_pid.org @@ -172,6 +172,208 @@ def make_dataset(fields): return dataset.with_options(options) #+end_src +* Podio helpers + +#+begin_src jupyter-python :results silent +class PodioData: + def __init__(self, events, branch, cut=None, default_vector="momentum"): + self.events = events + self.data = events[branch] + self.branch = branch + self.cut = cut + self.default_vector = default_vector + + def __getattr__(self, var): + return self.get(var) + + def filter(self, new_cut): + if self.cut is not None: + new_cut = np.logical_and(new_cut, self.cut) + return PodioData( + self.events, + self.branch, + cut=new_cut, + default_vector=self.default_vector, + ) + + def get(self, var, subvars=None, extra_cut=None): + if subvars is None: + subvars = [] + + if len(subvars): + result = [] + for v in subvars: + fullvar = f"{var}.{v}" + result.append(self.get(fullvar, extra_cut=extra_cut)) + return result + + result = self.data[f"{self.branch}.{var}"] + + if self.cut is not None or extra_cut is not None: + if self.cut is not None and extra_cut is not None: + cut = np.logical_and(self.cut, extra_cut) + elif self.cut is not None: + cut = self.cut + else: + cut = extra_cut + return result[cut] + + return result + + def get_vector(self, var=None, dim=None, extra_cut=None): + if var is None: + var = self.default_vector + if dim is None: + dim = ["x", "y", "z"] + return [x for x in self.get(var, subvars=dim, extra_cut=extra_cut)] + + def hypot(self, var=None, dim=None, extra_cut=None): + if dim is None: + dim = ["x", "y", "z"] + if not len(dim): + return 0.0 + return hypot(self, var=var, dim=dim, extra_cut=extra_cut) + + def azimuthal_angle(self, var=None, extra_cut=None): + return azimuthal_angle(self, var=var, extra_cut=extra_cut) + + def polar_angle(self, var=None, extra_cut=None): + return polar_angle(self, var=var, extra_cut=extra_cut) + + def eta(self, var=None, extra_cut=None): + return eta(self, var=var, extra_cut=extra_cut) + + def momentum(self, extra_cut=None): + return self.hypot("momentum", extra_cut=extra_cut) + + def transverse(self, var=None, extra_cut=None): + return self.hypot(var, dim=["x", "y"], extra_cut=extra_cut) + + +def _get_components(vector, **kwargs): + if hasattr(vector, "get_vector"): + return vector.get_vector(**kwargs) + return vector + + +def hypot(vector, **kwargs): + components = _get_components(vector, **kwargs) + res = components[0] ** 2 + for i in range(1, len(components)): + res = res + components[i] ** 2 + return np.sqrt(res) + + +def azimuthal_angle(vector, **kwargs): + if "dim" not in kwargs: + kwargs["dim"] = ["x", "y"] + components = _get_components(vector, **kwargs) + x = components[0] + y = components[1] + return np.arctan2(y, x) + + +def polar_angle(vector, **kwargs): + components = _get_components(vector, **kwargs) + r = hypot(components) + z = components[2] + return np.arccos(z / r) + + +def eta(vector, **kwargs): + theta = polar_angle(vector, **kwargs) + return -np.log(np.tan(theta / 2.0)) +#+end_src + +* Array and window helpers + +#+begin_src jupyter-python :results silent +import dask +import awkward as ak +import dask_awkward as dak +import numpy as np +from dask_awkward.lib.core import map_partitions + +## numpy-style clip array between min-max +def _clip(a, a_min, a_max): + ret = a + if a_min is not None: + is_outside = (a < a_min) + ret = ret * np.logical_not(is_outside) + a_min * is_outside + if a_max is not None: + is_outside = (a > a_max) + ret = ret * np.logical_not(is_outside) + a_max * is_outside + return ret + + +class _ClipFn: + def __init__(self, **kwargs): + self.kwargs = kwargs + + def __call__(self, array): + return _clip(array, self.kwargs["a_min"], self.kwargs["a_max"]) + + +def clip(array, a_min, a_max): + fn = _ClipFn(a_min=a_min, a_max=a_max) + return map_partitions(fn, array, label="clip", output_divisions=1, meta=array._meta) + + +class _ArgsortFn: + def __init__(self, **kwargs): + self.kwargs = kwargs + + def __call__(self, array): + return ak.argsort(array, **self.kwargs) + + +def argsort(array, axis=-1, ascending=True, stable=True, highlevel=True, behavior=None): + if axis == 0: + raise NotImplementedError("axis=0 not implemented here") + fn = _ArgsortFn( + axis=axis, + ascending=ascending, + stable=stable, + behavior=behavior, + ) + return map_partitions(fn, array, label="argsort", output_divisions=1) + + +class Window: + def __init__(self, name, interval, unit=None, tolerance=0.02): + self.name = name + self.interval = interval + self.step = (interval[0] / 2.0, interval[1] / 2.0) + self.unit = unit + self.tolerance = tolerance + + def linear_norm(self, values): + norm = (values - self.interval[0]) / (self.interval[1] - self.interval[0]) + + count = dak.sum(dak.num(norm)) + underflow = dak.sum(dak.num(norm[norm < 0])) + overflow = dak.sum(dak.num(norm[norm > 1])) + count, underflow, overflow = dask.compute(count, underflow, overflow) + + if underflow / count > self.tolerance: + self.interval[0] += self.step[0] + print( + f"Warning: large UNDERFLOW count in normalization window {self.name}: " + f"{underflow/count*100:.2f}%, growing the window to {self.interval} and trying again" + ) + return self.linear_norm(values) + + if overflow / count > self.tolerance: + self.interval[1] += self.step[1] + print( + f"Warning: large OVERFLOW count in normalization window {self.name}: " + f"{overflow/count*100:.2f}%, growing the window to {self.interval} and trying again" + ) + return self.linear_norm(values) + + return clip(norm, 0, 1) +#+end_src + * Models #+begin_src jupyter-python :results silent @@ -258,6 +460,475 @@ print(f" - output data directory: {datadir}") print(f" - output plot directory: {plotdir}") #+end_src +* E/p preprocessing + +#+begin_src jupyter-python :results silent +import os + +import hist +import dask_histogram as dh +import boost_histogram as bh +from matplotlib.ticker import MultipleLocator + +import uproot +import awkward as ak +import dask_awkward as dak +import dask +import pandas as pd + +## I/O bound so limit threads on large CPU linux nodes +if "arm64" not in os.uname(): + from multiprocessing.pool import ThreadPool + dask.config.set(pool=ThreadPool(6)) + +kTargetEfficiencyEOverP = 0.97 + +def read_input_list(path): + with open(path) as f: + return [line.strip() for line in f if line.strip()] + +electron_files = read_input_list(INPUT_ELECTRONS) +pion_files = read_input_list(INPUT_PIONS) + +all_input_files = electron_files + pion_files +#+end_src + +#+begin_src jupyter-python +print("Loading ROOT files for E/p preprocessing:") +for file in all_input_files[:10]: + print(" -", file) +if len(all_input_files) > 10: + print(f" ... and {len(all_input_files)-10} more files") +#+end_src + +#+begin_src jupyter-python :results silent +class ParticleData: + def __init__(self, h, efficiency=None, cut_idx=None): + if efficiency is None and cut_idx is None: + raise ValueError("Need either efficiency or cut index") + primary = cut_idx is None + self.count = h.sum() + self.norm_hist = hist.Hist(h / self.count) + if primary: + self.idx = self.find_ecut(efficiency) + else: + self.idx = cut_idx + self.e_cut = self.norm_hist.axes.centers[0][self.idx] + self.efficiency = np.sum(self.norm_hist.values()[self.idx:]) + self.efficiency_error = np.sqrt( + self.count * self.efficiency * (1 - self.efficiency) + ) / self.count + + def find_ecut(self, efficiency): + perc = np.cumsum(self.norm_hist.values()) + idx = len(perc[perc < 1.0 - efficiency]) + return idx + + +class EcutSeparationData: + def __init__(self, max_layer, ehist, pihist, efficiency=kTargetEfficiencyEOverP): + self.max_layer = max_layer + self.electron = ParticleData(ehist, efficiency=efficiency) + self.pion = ParticleData(pihist, cut_idx=self.electron.idx) + self.count_e = self.electron.count + self.count_pi = self.pion.count + self.efficiency = self.electron.efficiency + self.efficiency_error = self.electron.efficiency_error + self.rejection = 1.0 / self.pion.efficiency + self.rejection_error = self.rejection**2 * self.pion.efficiency_error + self.e_cut = self.electron.e_cut + + +class EcutSeparationResults: + def __init__(self): + self.raw = [] + self.fields = [ + "max_layer", + "count_e", + "count_pi", + "efficiency", + "efficiency_error", + "rejection", + "rejection_error", + "e_cut", + ] + + def append(self, rejection): + for field in self.fields: + if not hasattr(self, field): + setattr(self, field, []) + getattr(self, field).append(getattr(rejection, field)) + self.raw.append(rejection) + + def to_pandas(self): + data = [getattr(self, field) for field in self.fields] + return pd.DataFrame({k: v for (k, v) in zip(self.fields, data)}) +#+end_src + +#+begin_src jupyter-python +print("Building E/p preprocessing inputs") + +events = uproot.dask([f"{file}:events" for file in all_input_files]) + +gen = PodioData(events, "MCParticles") +scifi = PodioData(events, "EcalBarrelScFiRecHits") +astropix = PodioData(events, "EcalBarrelImagingRecHits") + +hits_in_calo = ((dak.num(scifi.layer, axis=1) > 0) & (dak.num(astropix.layer, axis=1) > 0)) +is_electron = (gen.PDG[:, 0] == 11) +is_pion = (gen.PDG[:, 0] == -211) + +gen_cut = gen.filter(hits_in_calo) +scifi_e = scifi.filter(hits_in_calo & is_electron) +scifi_pi = scifi.filter(hits_in_calo & is_pion) +gen_e = gen_cut.filter(is_electron) +gen_pi = gen_cut.filter(is_pion) +#+end_src + +#+begin_src jupyter-python +print("Making input diagnostic plots") + +fig, ax = plt.subplots(1, 3, figsize=(12, 4)) +hists = dask.compute({ + r"$P$ (GeV)": dh.histogram(gen_cut.momentum()[:, 0], bins=200, range=(0, 11), histogram=bh.Histogram), + r"$\eta$": dh.histogram(gen_cut.eta()[:, 0], bins=200, range=(-2, 2), histogram=bh.Histogram), + r"$\phi$ (deg.)": dh.histogram(gen_cut.azimuthal_angle()[:, 0] / 3.1415 * 180, bins=200, range=(-180, 180), histogram=bh.Histogram), +})[0] + +hists = {key: hist.Hist(hists[key]) for key in hists} +for i, key in enumerate(hists): + hists[key].plot1d(ax=ax[i], ls="-", color="darkblue") + ax[i].set_xlabel(key) + +fig.savefig(f"{plotdir}/diagnostic_input.pdf") +plt.close(fig) +#+end_src + +#+begin_src jupyter-python +print("Computing E/p by layer") + +edep_e = scifi_e.energy +layer_e = scifi_e.layer + +edep_pi = scifi_pi.energy +layer_pi = scifi_pi.layer + +max_layer = dak.max(layer_pi).compute() +print(f"Max ScFi layer = {max_layer}") + +mom_e = gen_e.momentum()[:, 0] +ratio_e = [dak.sum(edep_e[layer_e <= x] / mom_e, axis=1) for x in range(1, max_layer + 1)] + +mom_pi = gen_pi.momentum()[:, 0] +ratio_pi = [dak.sum(edep_pi[layer_pi <= x] / mom_pi, axis=1) for x in range(1, max_layer + 1)] +#+end_src + +#+begin_src jupyter-python +print("Building histograms for E/p scan") + +e_histo = [] +pi_histo = [] + +for x in range(max_layer): + e_histo.append( + dh.histogram(ratio_e[x], bins=1000, range=(0, 1.5), histogram=bh.Histogram) + ) + pi_histo.append( + dh.histogram(ratio_pi[x], bins=1000, range=(0, 1.5), histogram=bh.Histogram) + ) + +res_e = dask.compute(*e_histo) +res_pi = dask.compute(*pi_histo) +#+end_src + +#+begin_src jupyter-python +print("Plotting E/p scan") + +fig, ax = plt.subplots(int((max_layer + 2) / 3), 3, figsize=(12, 18)) + +results_eop = EcutSeparationResults() + +for idx in range(len(res_e)): + current_layer = idx + 1 + layer_result = EcutSeparationData( + current_layer, res_e[idx], res_pi[idx], efficiency=kTargetEfficiencyEOverP + ) + results_eop.append(layer_result) + + subax = ax[int(idx / 3), idx % 3] + stack = hist.Stack.from_dict({ + "$e$": layer_result.electron.norm_hist, + "$\\pi^-$": layer_result.pion.norm_hist, + }) + stack.plot(ax=subax, alpha=0.6, histtype="fill") + subax.axvline(x=layer_result.e_cut, color="k", ls="--", lw=2) + subax.set_xlabel("E/P") + subax.legend() + subax.text( + 0.4, kTargetEfficiencyEOverP, + "\n".join([ + rf"$layer \leq {current_layer}$", + rf"$\epsilon_e = {layer_result.efficiency:.2f} \pm {layer_result.efficiency_error:.2e}$", + rf"$R_\pi = {layer_result.rejection:.2f} \pm {layer_result.rejection_error:.2e}$", + ]), + transform=subax.transAxes, + fontsize=10, + va="top", + ha="center", + ) + +fig.savefig(f"{plotdir}/EoverP_scan.pdf") +plt.close(fig) +#+end_src + +#+begin_src jupyter-python +print(f"Saving E/p results to {datadir}/EoverP_results.csv") + +df_eop = results_eop.to_pandas() +df_eop_sorted = df_eop.sort_values("rejection", ascending=False) +df_eop_sorted.to_csv(f"{datadir}/EoverP_results.csv", index=False) +#+end_src + +#+begin_src jupyter-python +print("Making E/p optimization overview plot") + +prop_cycle = plt.rcParams["axes.prop_cycle"] +colors = prop_cycle.by_key()["color"] +box_props = dict(boxstyle="round", facecolor="white", alpha=0.5) + +fig, ax_cut = plt.subplots(figsize=(8, 8)) +ax_rejection = ax_cut.twinx() +ax_rejection.set_yscale("log") + +ax_cut.plot(df_eop.max_layer, df_eop.e_cut, ls="-", color=colors[0]) +ax_rejection.errorbar( + df_eop.max_layer, + df_eop.rejection, + yerr=df_eop.rejection_error, + fmt="o", + capsize=3, + color=colors[1], + label="$R_\\pi$", +) + +ax_cut.set_xlabel("Max ScFi Layer", fontsize=20) +ax_cut.set_ylabel("E/p Cut Position", color=colors[0], fontsize=22) + +ax_rejection.grid(axis="both", which="both", ls=":") +ax_rejection.xaxis.set_major_locator(MultipleLocator(5)) +ax_rejection.xaxis.set_minor_locator(MultipleLocator(1)) +ax_rejection.set_ylabel("Rejection Factor $R_\\pi$", color=colors[1], fontsize=20) + +ax_cut.set_title("Optimal $E/p$ cut versus max ScFi layer", fontsize=20) +ax_cut.tick_params(labelsize=15) +ax_rejection.tick_params(labelsize=15) +ax_cut.text( + 0.5, + 0.03, + rf"$\epsilon_{{e}}\geq {kTargetEfficiencyEOverP*100.:.2f}\%$", + transform=ax_cut.transAxes, + fontsize=20, + va="bottom", + ha="center", + bbox=box_props, +) + +fig.subplots_adjust(left=0.15, right=0.85) +fig.savefig(f"{plotdir}/EoverP_optimization.pdf") +plt.close(fig) + +print("Finished E/p preprocessing") +#+end_src + +* Feature preprocessing + +#+begin_src jupyter-python :results silent +pd.set_option("display.min_rows", 50) + +def data_features(data, n_hits=50, ltype="img", lval="0", loffset=0): + # raw hit r, eta, phi + r_h = data.hypot() + eta_h = data.eta() + phi_h = data.azimuthal_angle() + + # raw hit normalized energy + e_tot = dak.sum(data.energy, axis=1) + e_norm = data.energy / e_tot + + # logarithmic weighting based on hit energy + weights = clip(np.log(e_norm) + 5.6, 0, None) + tot_weight = dak.sum(weights, axis=1) + weights = weights / tot_weight + + # calculate central xyz hit position based on the weight + x, y, z = data.get_vector("position") + xc = dak.sum(x * weights, axis=1) + yc = dak.sum(y * weights, axis=1) + zc = dak.sum(z * weights, axis=1) + + # calculate central hit r, eta, phi + r_c = hypot([xc, yc, zc]) + eta_c = eta([xc, yc, zc], r=r_c) + phi_c = azimuthal_angle([xc, yc, zc]) + + dphi = phi_h - phi_c + dphi_low = (dphi < -math.pi) * 2.0 * math.pi + dphi_high = (dphi > math.pi) * 2.0 * math.pi + dphi_corr = dphi + dphi_low - dphi_high + dsphi = np.sin(dphi_corr * 0.5) + + # normalize and bind to window + r_norm = kWinR0.linear_norm(r_h) + eta_norm = kWinEta.linear_norm(eta_h - eta_c) + phi_norm = kWinPhi.linear_norm(dsphi) + + norm_data = { + "eh": e_norm, + "r0": r_norm, + "eta": eta_norm, + "phi": phi_norm, + } + + min_layer, max_layer = dask.compute(dak.min(data.layer), dak.max(data.layer)) + n_events = len(e_norm) + + # sort hits by descending hit energy + sort_idx = argsort(e_norm, ascending=False) + + sorted_data = { + key: [ + dak.pad_none( + norm_data[key][sort_idx][data.layer[sort_idx] == layer], + n_hits, + clip=True, + ) + for layer in range(min_layer, max_layer + 1) + ] + for key in norm_data + } + + computed_data = dask.compute(sorted_data)[0] + + raw_df = ak.to_dataframe( + ak.Array({ + key: ak.flatten(ak.concatenate(computed_data[key], axis=1)) + for key in computed_data + }) + ).astype(np.float32).fillna(0) + + index = [ + [ev for ev in range(1, n_events + 1)], + [ltype], + [layer for layer in range(min_layer + loffset, max_layer + loffset + 1)], + [hit for hit in range(1, n_hits + 1)], + ] + index = pd.MultiIndex.from_product(index, names=["event", "ltype", "layer", "hit"]) + + indexed_df = pd.DataFrame( + {key: raw_df[key].values for key in raw_df.keys()}, + index=index, + ) + indexed_df.loc[:, "lval"] = np.int32(lval) + + return indexed_df +#+end_src + +#+begin_src jupyter-python +print("Preparing feature-generation inputs") + +# Rebuild PodioData objects with position as default vector for hit features +gen = PodioData(events, "MCParticles") +scifi = PodioData(events, "EcalBarrelScFiRecHits", default_vector="position") +astropix = PodioData(events, "EcalBarrelImagingRecHits", default_vector="position") + +hits_in_calo = ((dak.num(scifi.layer, axis=1) > 0) & (dak.num(astropix.layer, axis=1) > 0)) +electron_or_pion = (gen.PDG[:, 0] == 11) | (gen.PDG[:, 0] == -211) + +print("Loading E/p cut results") +cutdf = pd.read_csv(f"{datadir}/EoverP_results.csv").sort_values("rejection", ascending=False) +results_EoverP = OrderedDict({key: cutdf[key].iloc[0] for key in cutdf.columns}) +print(results_EoverP) +#+end_src + +#+begin_src jupyter-python +print("Defining normalization windows") +kWinEta = Window("eta", [-0.3, 0.3]) +kWinPhi = Window("phi", [-0.4, 0.4], unit="rad") +kWinR0 = Window("R0", [500, 2000], unit="mm") +#+end_src + +#+begin_src jupyter-python +print("Applying E/p cut before feature generation") + +mom = gen.momentum()[:, 0] +passes_eoverp_cut = ( + dak.sum(scifi.energy[scifi.layer <= results_EoverP["max_layer"]] / mom, axis=1) + > results_EoverP["e_cut"] +) + +gen_good = gen.filter(hits_in_calo & electron_or_pion & passes_eoverp_cut) +scifi_good = scifi.filter(hits_in_calo & electron_or_pion & passes_eoverp_cut) +astropix_good = astropix.filter(hits_in_calo & electron_or_pion & passes_eoverp_cut) +#+end_src + +#+begin_src jupyter-python +print("Creating feature data structures (this may take a while)") + +print(" --> creating Astropix feature table") +df_astropix = data_features(astropix_good, n_hits=50, ltype="img", lval=0) + +print(" --> creating SciFi feature table") +df_scifi = data_features( + scifi_good, + n_hits=50, + ltype="scfi", + lval=1, + loffset=dak.max(astropix_good.layer).compute(), +) + +# keep the same behavior as the original script +df_scifi.eta = np.float32(0.0) + +print(" --> merging feature tables") +df_both = ( + pd.concat([df_astropix.reset_index(), df_scifi.reset_index()], ignore_index=True) + .set_index(["event", "ltype", "layer", "hit"]) + .sort_index() +) + +print(f"Saving feature table to {datadir}/hits.snappy.parquet") +df_both.to_parquet(f"{datadir}/hits.snappy.parquet", compression="snappy") +#+end_src + +#+begin_src jupyter-python +print("Formatting labels") + +padded_PDG = dak.map_partitions(ak.pad_none, gen_good.PDG, 1, axis=1) +padded_mom = dak.map_partitions(ak.pad_none, gen_good.momentum(), 1, axis=1) +padded_mass = dak.map_partitions(ak.pad_none, gen_good.mass, 1, axis=1) + +pdg0 = padded_PDG[:, 0] +moment0 = padded_mom[:, 0] +mass0 = padded_mass[:, 0] + +pdg0_filled = dak.map_partitions(ak.fill_none, pdg0, 0) +moment0_filled = dak.map_partitions(ak.fill_none, moment0, 0.0) +mass0_filled = dak.map_partitions(ak.fill_none, mass0, 0.0) + +mc_pdg, mc_p, mc_mass = dask.compute(pdg0_filled, moment0_filled, mass0_filled) + +df_mc = ak.to_dataframe( + ak.Array({ + "PDG": mc_pdg, + "P": mc_p, + "mass": mc_mass, + }) +).fillna(0) + +print(f"Saving labels to {datadir}/labels.snappy.parquet") +df_mc.to_parquet(f"{datadir}/labels.snappy.parquet", compression="snappy") +#+end_src + * Load datasets #+begin_src jupyter-python @@ -365,6 +1036,7 @@ with strategy.scope(): model.compile( optimizer=keras.optimizers.Adam(learning_rate=1e-3), loss=keras.losses.SparseCategoricalCrossentropy(from_logits=False), + metrics=["accuracy"], weighted_metrics=["accuracy"], ) history = model.fit( diff --git a/benchmarks/bic_pid/requirements.txt b/benchmarks/bic_pid/requirements.txt index edf56c11..697a6c44 100644 --- a/benchmarks/bic_pid/requirements.txt +++ b/benchmarks/bic_pid/requirements.txt @@ -3,4 +3,11 @@ tf2onnx==1.17.0 numpy pandas matplotlib -pyarrow \ No newline at end of file +pyarrow +uproot>=5.2.0 +awkward>=2.4.0 +dask +dask-awkward +hist +dask-histogram +boost-histogram \ No newline at end of file From 218219cb42e0811c68e8531f2df5e128cd795024 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sun, 17 May 2026 18:46:26 +0000 Subject: [PATCH 40/58] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- benchmarks/bic_pid/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmarks/bic_pid/requirements.txt b/benchmarks/bic_pid/requirements.txt index 697a6c44..975df6b7 100644 --- a/benchmarks/bic_pid/requirements.txt +++ b/benchmarks/bic_pid/requirements.txt @@ -10,4 +10,4 @@ dask dask-awkward hist dask-histogram -boost-histogram \ No newline at end of file +boost-histogram From 0ee50f8764d37468d51a08374ee45586ed095bc2 Mon Sep 17 00:00:00 2001 From: Tomas Sosa Date: Sun, 17 May 2026 14:13:43 -0500 Subject: [PATCH 41/58] Test again --- benchmarks/bic_pid/config.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/benchmarks/bic_pid/config.yml b/benchmarks/bic_pid/config.yml index 3bef79b4..62f90e39 100644 --- a/benchmarks/bic_pid/config.yml +++ b/benchmarks/bic_pid/config.yml @@ -55,7 +55,6 @@ bench:bic_pid: python -m pip install --upgrade pip setuptools wheel python -m pip install snakemake python -m pip install -r benchmarks/bic_pid/requirements.txt - python -c "import tensorflow as tf, tf2onnx, pandas as pd, numpy as np, matplotlib, pyarrow; print('TF OK:', tf.__version__); print('tf2onnx OK:', tf2onnx.__version__)" snakemake $SNAKEMAKE_FLAGS --cores 1 results/${DETECTOR_CONFIG}/bic_pid From a4cac715c17da5faa229778441f0dd3a66cb204f Mon Sep 17 00:00:00 2001 From: Tomas Sosa Date: Sun, 17 May 2026 19:27:30 -0500 Subject: [PATCH 42/58] Use the correct environment --- benchmarks/bic_pid/config.yml | 8 ++++++-- benchmarks/bic_pid/requirements.txt | 22 +++++++++++----------- 2 files changed, 17 insertions(+), 13 deletions(-) diff --git a/benchmarks/bic_pid/config.yml b/benchmarks/bic_pid/config.yml index 62f90e39..2fab7e17 100644 --- a/benchmarks/bic_pid/config.yml +++ b/benchmarks/bic_pid/config.yml @@ -49,16 +49,20 @@ bench:bic_pid: - ls -lrtha script: - | - python -m venv .venv + command -v python3.10 + python3.10 -V + + python3.10 -m venv .venv source .venv/bin/activate python -m pip install --upgrade pip setuptools wheel python -m pip install snakemake python -m pip install -r benchmarks/bic_pid/requirements.txt - python -c "import tensorflow as tf, tf2onnx, pandas as pd, numpy as np, matplotlib, pyarrow; print('TF OK:', tf.__version__); print('tf2onnx OK:', tf2onnx.__version__)" + python -c "import sys, uproot, awkward as ak, dask, dask_awkward as dak, numpy as np, pandas as pd, tensorflow as tf, tf2onnx; print(sys.version); print('uproot', uproot.__version__); print('awkward', ak.__version__); print('dask', dask.__version__); print('dask_awkward', dak.__version__); print('numpy', np.__version__); print('pandas', pd.__version__); print('TF', tf.__version__); print('tf2onnx', tf2onnx.__version__)" snakemake $SNAKEMAKE_FLAGS --cores 1 results/${DETECTOR_CONFIG}/bic_pid + collect_results:bic_pid: extends: .det_benchmark stage: collect diff --git a/benchmarks/bic_pid/requirements.txt b/benchmarks/bic_pid/requirements.txt index 975df6b7..6cb9b27b 100644 --- a/benchmarks/bic_pid/requirements.txt +++ b/benchmarks/bic_pid/requirements.txt @@ -1,13 +1,13 @@ tensorflow-cpu==2.20.0 tf2onnx==1.17.0 -numpy -pandas -matplotlib -pyarrow -uproot>=5.2.0 -awkward>=2.4.0 -dask -dask-awkward -hist -dask-histogram -boost-histogram +numpy==1.23.2 +pandas==2.2.3 +matplotlib==3.10.3 +pyarrow==20.0.0 +uproot==5.0.3 +awkward==2.0.8 +dask==2023.2.1 +dask-awkward==2023.2.0 +dask-histogram==2023.2.0 +boost-histogram==1.5.2 +hist==2.8.1 \ No newline at end of file From 97506ac973cee0a9737510302f60dcd7622f40be Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 18 May 2026 00:27:41 +0000 Subject: [PATCH 43/58] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- benchmarks/bic_pid/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmarks/bic_pid/requirements.txt b/benchmarks/bic_pid/requirements.txt index 6cb9b27b..a6c261e6 100644 --- a/benchmarks/bic_pid/requirements.txt +++ b/benchmarks/bic_pid/requirements.txt @@ -10,4 +10,4 @@ dask==2023.2.1 dask-awkward==2023.2.0 dask-histogram==2023.2.0 boost-histogram==1.5.2 -hist==2.8.1 \ No newline at end of file +hist==2.8.1 From c01ce13d1a9f9568244a2bc07d98f6cb41510196 Mon Sep 17 00:00:00 2001 From: Tomas Sosa Date: Sun, 17 May 2026 20:12:25 -0500 Subject: [PATCH 44/58] Install environment --- benchmarks/bic_pid/config.yml | 21 ++++++++------------- benchmarks/bic_pid/environment.yml | 16 ++++++++++++++++ 2 files changed, 24 insertions(+), 13 deletions(-) create mode 100644 benchmarks/bic_pid/environment.yml diff --git a/benchmarks/bic_pid/config.yml b/benchmarks/bic_pid/config.yml index 2fab7e17..ecbd247a 100644 --- a/benchmarks/bic_pid/config.yml +++ b/benchmarks/bic_pid/config.yml @@ -37,6 +37,7 @@ bench:bic_pid: variables: CUDA_VISIBLE_DEVICES: "" DETECTOR_CONFIG: epic_craterlake + MAMBA_ROOT_PREFIX: "$LOCAL_DATA_PATH/micromamba" before_script: - source .local/bin/env.sh - ls -lrtha @@ -48,19 +49,13 @@ bench:bic_pid: - find sim_output/bic_pid/${DETECTOR_CONFIG} | head -50 || true - ls -lrtha script: - - | - command -v python3.10 - python3.10 -V - - python3.10 -m venv .venv - source .venv/bin/activate - - python -m pip install --upgrade pip setuptools wheel - python -m pip install snakemake - python -m pip install -r benchmarks/bic_pid/requirements.txt - - python -c "import sys, uproot, awkward as ak, dask, dask_awkward as dak, numpy as np, pandas as pd, tensorflow as tf, tf2onnx; print(sys.version); print('uproot', uproot.__version__); print('awkward', ak.__version__); print('dask', dask.__version__); print('dask_awkward', dak.__version__); print('numpy', np.__version__); print('pandas', pd.__version__); print('TF', tf.__version__); print('tf2onnx', tf2onnx.__version__)" - snakemake $SNAKEMAKE_FLAGS --cores 1 results/${DETECTOR_CONFIG}/bic_pid + - mkdir -p mm + - curl -Ls https://micro.mamba.pm/api/micromamba/linux-64/latest | tar -xvj -C mm + - mm/bin/micromamba create -y -p "$MAMBA_ROOT_PREFIX/envs/bicpid" -f benchmarks/bic_pid/environment.yml + - mm/bin/micromamba run -p "$MAMBA_ROOT_PREFIX/envs/bicpid" python -m pip install --upgrade pip + - mm/bin/micromamba run -p "$MAMBA_ROOT_PREFIX/envs/bicpid" python -m pip install snakemake tensorflow-cpu==2.20.0 tf2onnx==1.17.0 + - mm/bin/micromamba run -p "$MAMBA_ROOT_PREFIX/envs/bicpid" python -c "import sys, uproot, awkward as ak, dask, dask_awkward as dak, numpy as np, pandas as pd, tensorflow as tf, tf2onnx; print(sys.version); print('uproot', uproot.__version__); print('awkward', ak.__version__); print('dask', dask.__version__); print('dask_awkward', dak.__version__); print('numpy', np.__version__); print('pandas', pd.__version__); print('TF', tf.__version__); print('tf2onnx', tf2onnx.__version__)" + - mm/bin/micromamba run -p "$MAMBA_ROOT_PREFIX/envs/bicpid" snakemake $SNAKEMAKE_FLAGS --cores 1 results/${DETECTOR_CONFIG}/bic_pid collect_results:bic_pid: diff --git a/benchmarks/bic_pid/environment.yml b/benchmarks/bic_pid/environment.yml new file mode 100644 index 00000000..1615d62e --- /dev/null +++ b/benchmarks/bic_pid/environment.yml @@ -0,0 +1,16 @@ +channels: + - conda-forge +dependencies: + - python=3.10 + - pip + - numpy=1.23.2 + - pandas=2.2.3 + - matplotlib=3.10.3 + - pyarrow=20.0.0 + - uproot=5.0.3 + - awkward=2.0.8 + - dask=2023.2.1 + - dask-awkward=2023.2.0 + - dask-histogram=2023.2.0 + - boost-histogram=1.5.2 + - hist=2.8.1 \ No newline at end of file From 23e1d6c48ef77ef199f76caf0759b58ee0d8eb18 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 18 May 2026 01:13:36 +0000 Subject: [PATCH 45/58] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- benchmarks/bic_pid/environment.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmarks/bic_pid/environment.yml b/benchmarks/bic_pid/environment.yml index 1615d62e..e787b06a 100644 --- a/benchmarks/bic_pid/environment.yml +++ b/benchmarks/bic_pid/environment.yml @@ -13,4 +13,4 @@ dependencies: - dask-awkward=2023.2.0 - dask-histogram=2023.2.0 - boost-histogram=1.5.2 - - hist=2.8.1 \ No newline at end of file + - hist=2.8.1 From b4706c1402fd96fa9aa32eab67eab4a4711f1aee Mon Sep 17 00:00:00 2001 From: Tomas Sosa Date: Sun, 17 May 2026 20:15:31 -0500 Subject: [PATCH 46/58] Create environment again --- benchmarks/bic_pid/config.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/benchmarks/bic_pid/config.yml b/benchmarks/bic_pid/config.yml index ecbd247a..9bad34b4 100644 --- a/benchmarks/bic_pid/config.yml +++ b/benchmarks/bic_pid/config.yml @@ -57,7 +57,6 @@ bench:bic_pid: - mm/bin/micromamba run -p "$MAMBA_ROOT_PREFIX/envs/bicpid" python -c "import sys, uproot, awkward as ak, dask, dask_awkward as dak, numpy as np, pandas as pd, tensorflow as tf, tf2onnx; print(sys.version); print('uproot', uproot.__version__); print('awkward', ak.__version__); print('dask', dask.__version__); print('dask_awkward', dak.__version__); print('numpy', np.__version__); print('pandas', pd.__version__); print('TF', tf.__version__); print('tf2onnx', tf2onnx.__version__)" - mm/bin/micromamba run -p "$MAMBA_ROOT_PREFIX/envs/bicpid" snakemake $SNAKEMAKE_FLAGS --cores 1 results/${DETECTOR_CONFIG}/bic_pid - collect_results:bic_pid: extends: .det_benchmark stage: collect From 348b159b8ba2bb5718c02b012d94b28e58fa06a7 Mon Sep 17 00:00:00 2001 From: Tomas Sosa Date: Sun, 17 May 2026 22:11:54 -0500 Subject: [PATCH 47/58] Solve python interpreter problem --- benchmarks/bic_pid/config.yml | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/benchmarks/bic_pid/config.yml b/benchmarks/bic_pid/config.yml index 9bad34b4..5c5b8644 100644 --- a/benchmarks/bic_pid/config.yml +++ b/benchmarks/bic_pid/config.yml @@ -48,14 +48,23 @@ bench:bic_pid: - mkdir -p "$SNAKEMAKE_OUTPUT_CACHE" - find sim_output/bic_pid/${DETECTOR_CONFIG} | head -50 || true - ls -lrtha - script: - - mkdir -p mm - - curl -Ls https://micro.mamba.pm/api/micromamba/linux-64/latest | tar -xvj -C mm - - mm/bin/micromamba create -y -p "$MAMBA_ROOT_PREFIX/envs/bicpid" -f benchmarks/bic_pid/environment.yml - - mm/bin/micromamba run -p "$MAMBA_ROOT_PREFIX/envs/bicpid" python -m pip install --upgrade pip - - mm/bin/micromamba run -p "$MAMBA_ROOT_PREFIX/envs/bicpid" python -m pip install snakemake tensorflow-cpu==2.20.0 tf2onnx==1.17.0 - - mm/bin/micromamba run -p "$MAMBA_ROOT_PREFIX/envs/bicpid" python -c "import sys, uproot, awkward as ak, dask, dask_awkward as dak, numpy as np, pandas as pd, tensorflow as tf, tf2onnx; print(sys.version); print('uproot', uproot.__version__); print('awkward', ak.__version__); print('dask', dask.__version__); print('dask_awkward', dak.__version__); print('numpy', np.__version__); print('pandas', pd.__version__); print('TF', tf.__version__); print('tf2onnx', tf2onnx.__version__)" - - mm/bin/micromamba run -p "$MAMBA_ROOT_PREFIX/envs/bicpid" snakemake $SNAKEMAKE_FLAGS --cores 1 results/${DETECTOR_CONFIG}/bic_pid +script: + - mkdir -p mm + - curl -Ls https://micro.mamba.pm/api/micromamba/linux-64/latest | tar -xvj -C mm + - mm/bin/micromamba create -y -p "$MAMBA_ROOT_PREFIX/envs/bicpid" -f benchmarks/bic_pid/environment.yml + + - export BICPY="$MAMBA_ROOT_PREFIX/envs/bicpid/bin/python" + - export PYTHONNOUSERSITE=1 + - unset PYTHONPATH + + - "$BICPY" -V + - "$BICPY" -m pip install --upgrade pip + - "$BICPY" -m pip install snakemake tensorflow-cpu==2.20.0 tf2onnx==1.17.0 + + - "$BICPY" -c "import sys, uproot, awkward as ak, dask, dask_awkward as dak, numpy as np, pandas as pd, tensorflow as tf, tf2onnx; print(sys.version); print('uproot', uproot.__version__); print('awkward', ak.__version__); print('dask', dask.__version__); print('dask_awkward', dak.__version__); print('numpy', np.__version__); print('pandas', pd.__version__); print('TF', tf.__version__); print('tf2onnx', tf2onnx.__version__)" + + - "$BICPY" -m snakemake $SNAKEMAKE_FLAGS --cores 1 results/${DETECTOR_CONFIG}/bic_pid + collect_results:bic_pid: extends: .det_benchmark From 0d7d32154063e68deb9f4501e8976a9d5028507d Mon Sep 17 00:00:00 2001 From: Tomas Sosa Date: Sun, 17 May 2026 22:13:41 -0500 Subject: [PATCH 48/58] Solve syntax problem --- benchmarks/bic_pid/config.yml | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/benchmarks/bic_pid/config.yml b/benchmarks/bic_pid/config.yml index 5c5b8644..2732d7d1 100644 --- a/benchmarks/bic_pid/config.yml +++ b/benchmarks/bic_pid/config.yml @@ -48,22 +48,22 @@ bench:bic_pid: - mkdir -p "$SNAKEMAKE_OUTPUT_CACHE" - find sim_output/bic_pid/${DETECTOR_CONFIG} | head -50 || true - ls -lrtha -script: - - mkdir -p mm - - curl -Ls https://micro.mamba.pm/api/micromamba/linux-64/latest | tar -xvj -C mm - - mm/bin/micromamba create -y -p "$MAMBA_ROOT_PREFIX/envs/bicpid" -f benchmarks/bic_pid/environment.yml + script: + - mkdir -p mm + - curl -Ls https://micro.mamba.pm/api/micromamba/linux-64/latest | tar -xvj -C mm + - mm/bin/micromamba create -y -p "$MAMBA_ROOT_PREFIX/envs/bicpid" -f benchmarks/bic_pid/environment.yml - - export BICPY="$MAMBA_ROOT_PREFIX/envs/bicpid/bin/python" - - export PYTHONNOUSERSITE=1 - - unset PYTHONPATH + - export BICPY="$MAMBA_ROOT_PREFIX/envs/bicpid/bin/python" + - export PYTHONNOUSERSITE=1 + - unset PYTHONPATH - - "$BICPY" -V - - "$BICPY" -m pip install --upgrade pip - - "$BICPY" -m pip install snakemake tensorflow-cpu==2.20.0 tf2onnx==1.17.0 + - "$BICPY" -V + - "$BICPY" -m pip install --upgrade pip + - "$BICPY" -m pip install snakemake tensorflow-cpu==2.20.0 tf2onnx==1.17.0 - - "$BICPY" -c "import sys, uproot, awkward as ak, dask, dask_awkward as dak, numpy as np, pandas as pd, tensorflow as tf, tf2onnx; print(sys.version); print('uproot', uproot.__version__); print('awkward', ak.__version__); print('dask', dask.__version__); print('dask_awkward', dak.__version__); print('numpy', np.__version__); print('pandas', pd.__version__); print('TF', tf.__version__); print('tf2onnx', tf2onnx.__version__)" + - "$BICPY" -c "import sys, uproot, awkward as ak, dask, dask_awkward as dak, numpy as np, pandas as pd, tensorflow as tf, tf2onnx; print(sys.version); print('uproot', uproot.__version__); print('awkward', ak.__version__); print('dask', dask.__version__); print('dask_awkward', dak.__version__); print('numpy', np.__version__); print('pandas', pd.__version__); print('TF', tf.__version__); print('tf2onnx', tf2onnx.__version__)" - - "$BICPY" -m snakemake $SNAKEMAKE_FLAGS --cores 1 results/${DETECTOR_CONFIG}/bic_pid + - "$BICPY" -m snakemake $SNAKEMAKE_FLAGS --cores 1 results/${DETECTOR_CONFIG}/bic_pid collect_results:bic_pid: From 170d5defdcf3704acc8995c85174734e81ef34b2 Mon Sep 17 00:00:00 2001 From: Tomas Sosa Date: Sun, 17 May 2026 22:18:38 -0500 Subject: [PATCH 49/58] Solve syntax problem again --- benchmarks/bic_pid/config.yml | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/benchmarks/bic_pid/config.yml b/benchmarks/bic_pid/config.yml index 2732d7d1..56cf1c8f 100644 --- a/benchmarks/bic_pid/config.yml +++ b/benchmarks/bic_pid/config.yml @@ -49,22 +49,22 @@ bench:bic_pid: - find sim_output/bic_pid/${DETECTOR_CONFIG} | head -50 || true - ls -lrtha script: - - mkdir -p mm - - curl -Ls https://micro.mamba.pm/api/micromamba/linux-64/latest | tar -xvj -C mm - - mm/bin/micromamba create -y -p "$MAMBA_ROOT_PREFIX/envs/bicpid" -f benchmarks/bic_pid/environment.yml - - - export BICPY="$MAMBA_ROOT_PREFIX/envs/bicpid/bin/python" - - export PYTHONNOUSERSITE=1 - - unset PYTHONPATH + - | + mkdir -p mm + curl -Ls https://micro.mamba.pm/api/micromamba/linux-64/latest | tar -xvj -C mm + mm/bin/micromamba create -y -p "$MAMBA_ROOT_PREFIX/envs/bicpid" -f benchmarks/bic_pid/environment.yml - - "$BICPY" -V - - "$BICPY" -m pip install --upgrade pip - - "$BICPY" -m pip install snakemake tensorflow-cpu==2.20.0 tf2onnx==1.17.0 + export BICPY="$MAMBA_ROOT_PREFIX/envs/bicpid/bin/python" + export PYTHONNOUSERSITE=1 + unset PYTHONPATH - - "$BICPY" -c "import sys, uproot, awkward as ak, dask, dask_awkward as dak, numpy as np, pandas as pd, tensorflow as tf, tf2onnx; print(sys.version); print('uproot', uproot.__version__); print('awkward', ak.__version__); print('dask', dask.__version__); print('dask_awkward', dak.__version__); print('numpy', np.__version__); print('pandas', pd.__version__); print('TF', tf.__version__); print('tf2onnx', tf2onnx.__version__)" + "$BICPY" -V + "$BICPY" -m pip install --upgrade pip + "$BICPY" -m pip install snakemake tensorflow-cpu==2.20.0 tf2onnx==1.17.0 - - "$BICPY" -m snakemake $SNAKEMAKE_FLAGS --cores 1 results/${DETECTOR_CONFIG}/bic_pid + "$BICPY" -c "import sys, uproot, awkward as ak, dask, dask_awkward as dak, numpy as np, pandas as pd, tensorflow as tf, tf2onnx; print(sys.version); print('uproot', uproot.__version__); print('awkward', ak.__version__); print('dask', dask.__version__); print('dask_awkward', dak.__version__); print('numpy', np.__version__); print('pandas', pd.__version__); print('TF', tf.__version__); print('tf2onnx', tf2onnx.__version__)" + "$BICPY" -m snakemake $SNAKEMAKE_FLAGS --cores 1 results/${DETECTOR_CONFIG}/bic_pid collect_results:bic_pid: extends: .det_benchmark From 51d312a5f0ffcf4f716d9df334fc215dfae97e98 Mon Sep 17 00:00:00 2001 From: Tomas Sosa Date: Sun, 24 May 2026 22:48:40 -0500 Subject: [PATCH 50/58] Solve tensorflow compability problem --- benchmarks/bic_pid/config.yml | 9 +++++---- benchmarks/bic_pid/environment.yml | 5 +++-- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/benchmarks/bic_pid/config.yml b/benchmarks/bic_pid/config.yml index 56cf1c8f..52457b47 100644 --- a/benchmarks/bic_pid/config.yml +++ b/benchmarks/bic_pid/config.yml @@ -48,8 +48,8 @@ bench:bic_pid: - mkdir -p "$SNAKEMAKE_OUTPUT_CACHE" - find sim_output/bic_pid/${DETECTOR_CONFIG} | head -50 || true - ls -lrtha - script: - - | +script: + - | mkdir -p mm curl -Ls https://micro.mamba.pm/api/micromamba/linux-64/latest | tar -xvj -C mm mm/bin/micromamba create -y -p "$MAMBA_ROOT_PREFIX/envs/bicpid" -f benchmarks/bic_pid/environment.yml @@ -60,12 +60,13 @@ bench:bic_pid: "$BICPY" -V "$BICPY" -m pip install --upgrade pip - "$BICPY" -m pip install snakemake tensorflow-cpu==2.20.0 tf2onnx==1.17.0 + "$BICPY" -m pip install "tensorflow-cpu==2.13.0" "tf2onnx==1.17.0" - "$BICPY" -c "import sys, uproot, awkward as ak, dask, dask_awkward as dak, numpy as np, pandas as pd, tensorflow as tf, tf2onnx; print(sys.version); print('uproot', uproot.__version__); print('awkward', ak.__version__); print('dask', dask.__version__); print('dask_awkward', dak.__version__); print('numpy', np.__version__); print('pandas', pd.__version__); print('TF', tf.__version__); print('tf2onnx', tf2onnx.__version__)" + "$BICPY" -c "import sys, numpy as np, awkward as ak, uproot, tensorflow as tf, tf2onnx; print(sys.version); print('numpy', np.__version__); print('awkward', ak.__version__); print('uproot', uproot.__version__); print('TF', tf.__version__); print('tf2onnx', tf2onnx.__version__)" "$BICPY" -m snakemake $SNAKEMAKE_FLAGS --cores 1 results/${DETECTOR_CONFIG}/bic_pid + collect_results:bic_pid: extends: .det_benchmark stage: collect diff --git a/benchmarks/bic_pid/environment.yml b/benchmarks/bic_pid/environment.yml index e787b06a..a7abbbe7 100644 --- a/benchmarks/bic_pid/environment.yml +++ b/benchmarks/bic_pid/environment.yml @@ -3,7 +3,8 @@ channels: dependencies: - python=3.10 - pip - - numpy=1.23.2 + - snakemake=7.32.4 + - numpy=1.24.3 - pandas=2.2.3 - matplotlib=3.10.3 - pyarrow=20.0.0 @@ -13,4 +14,4 @@ dependencies: - dask-awkward=2023.2.0 - dask-histogram=2023.2.0 - boost-histogram=1.5.2 - - hist=2.8.1 + - hist=2.8.1 \ No newline at end of file From a6f880bfa1de1a1d75d78bdc3bfcf0f88d6f7cb0 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 25 May 2026 03:48:53 +0000 Subject: [PATCH 51/58] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- benchmarks/bic_pid/environment.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmarks/bic_pid/environment.yml b/benchmarks/bic_pid/environment.yml index a7abbbe7..f35c2199 100644 --- a/benchmarks/bic_pid/environment.yml +++ b/benchmarks/bic_pid/environment.yml @@ -14,4 +14,4 @@ dependencies: - dask-awkward=2023.2.0 - dask-histogram=2023.2.0 - boost-histogram=1.5.2 - - hist=2.8.1 \ No newline at end of file + - hist=2.8.1 From ff926b20b560ae198b7946524369e6ffe695a6fa Mon Sep 17 00:00:00 2001 From: Tomas Sosa Date: Sun, 24 May 2026 22:49:57 -0500 Subject: [PATCH 52/58] Solve problme of indexation --- benchmarks/bic_pid/config.yml | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/benchmarks/bic_pid/config.yml b/benchmarks/bic_pid/config.yml index 52457b47..63e59863 100644 --- a/benchmarks/bic_pid/config.yml +++ b/benchmarks/bic_pid/config.yml @@ -48,23 +48,23 @@ bench:bic_pid: - mkdir -p "$SNAKEMAKE_OUTPUT_CACHE" - find sim_output/bic_pid/${DETECTOR_CONFIG} | head -50 || true - ls -lrtha -script: - - | - mkdir -p mm - curl -Ls https://micro.mamba.pm/api/micromamba/linux-64/latest | tar -xvj -C mm - mm/bin/micromamba create -y -p "$MAMBA_ROOT_PREFIX/envs/bicpid" -f benchmarks/bic_pid/environment.yml + script: + - | + mkdir -p mm + curl -Ls https://micro.mamba.pm/api/micromamba/linux-64/latest | tar -xvj -C mm + mm/bin/micromamba create -y -p "$MAMBA_ROOT_PREFIX/envs/bicpid" -f benchmarks/bic_pid/environment.yml - export BICPY="$MAMBA_ROOT_PREFIX/envs/bicpid/bin/python" - export PYTHONNOUSERSITE=1 - unset PYTHONPATH + export BICPY="$MAMBA_ROOT_PREFIX/envs/bicpid/bin/python" + export PYTHONNOUSERSITE=1 + unset PYTHONPATH - "$BICPY" -V - "$BICPY" -m pip install --upgrade pip - "$BICPY" -m pip install "tensorflow-cpu==2.13.0" "tf2onnx==1.17.0" + "$BICPY" -V + "$BICPY" -m pip install --upgrade pip + "$BICPY" -m pip install "tensorflow-cpu==2.13.0" "tf2onnx==1.17.0" - "$BICPY" -c "import sys, numpy as np, awkward as ak, uproot, tensorflow as tf, tf2onnx; print(sys.version); print('numpy', np.__version__); print('awkward', ak.__version__); print('uproot', uproot.__version__); print('TF', tf.__version__); print('tf2onnx', tf2onnx.__version__)" + "$BICPY" -c "import sys, numpy as np, awkward as ak, uproot, tensorflow as tf, tf2onnx; print(sys.version); print('numpy', np.__version__); print('awkward', ak.__version__); print('uproot', uproot.__version__); print('TF', tf.__version__); print('tf2onnx', tf2onnx.__version__)" - "$BICPY" -m snakemake $SNAKEMAKE_FLAGS --cores 1 results/${DETECTOR_CONFIG}/bic_pid + "$BICPY" -m snakemake $SNAKEMAKE_FLAGS --cores 1 results/${DETECTOR_CONFIG}/bic_pid collect_results:bic_pid: From d225eda98e4c369827709fc15710a1de69823fa9 Mon Sep 17 00:00:00 2001 From: Tomas Sosa Date: Mon, 25 May 2026 00:18:05 -0500 Subject: [PATCH 53/58] Solve snakemake problem --- benchmarks/bic_pid/config.yml | 2 +- benchmarks/bic_pid/environment.yml | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/benchmarks/bic_pid/config.yml b/benchmarks/bic_pid/config.yml index 63e59863..022f0a80 100644 --- a/benchmarks/bic_pid/config.yml +++ b/benchmarks/bic_pid/config.yml @@ -60,7 +60,7 @@ bench:bic_pid: "$BICPY" -V "$BICPY" -m pip install --upgrade pip - "$BICPY" -m pip install "tensorflow-cpu==2.13.0" "tf2onnx==1.17.0" + "$BICPY" -m pip install snakemake tensorflow-cpu==2.13.0 tf2onnx==1.17.0 "$BICPY" -c "import sys, numpy as np, awkward as ak, uproot, tensorflow as tf, tf2onnx; print(sys.version); print('numpy', np.__version__); print('awkward', ak.__version__); print('uproot', uproot.__version__); print('TF', tf.__version__); print('tf2onnx', tf2onnx.__version__)" diff --git a/benchmarks/bic_pid/environment.yml b/benchmarks/bic_pid/environment.yml index f35c2199..bcef2315 100644 --- a/benchmarks/bic_pid/environment.yml +++ b/benchmarks/bic_pid/environment.yml @@ -3,7 +3,6 @@ channels: dependencies: - python=3.10 - pip - - snakemake=7.32.4 - numpy=1.24.3 - pandas=2.2.3 - matplotlib=3.10.3 @@ -14,4 +13,4 @@ dependencies: - dask-awkward=2023.2.0 - dask-histogram=2023.2.0 - boost-histogram=1.5.2 - - hist=2.8.1 + - hist=2.8.1 \ No newline at end of file From 2c0b237328af841cbe5f18951a55fb3106e9727c Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 25 May 2026 05:18:14 +0000 Subject: [PATCH 54/58] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- benchmarks/bic_pid/environment.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmarks/bic_pid/environment.yml b/benchmarks/bic_pid/environment.yml index bcef2315..5eadce25 100644 --- a/benchmarks/bic_pid/environment.yml +++ b/benchmarks/bic_pid/environment.yml @@ -13,4 +13,4 @@ dependencies: - dask-awkward=2023.2.0 - dask-histogram=2023.2.0 - boost-histogram=1.5.2 - - hist=2.8.1 \ No newline at end of file + - hist=2.8.1 From b92b42f17876aeba330042c049125e87f65cf211 Mon Sep 17 00:00:00 2001 From: Tomas Sosa Date: Mon, 25 May 2026 08:42:39 -0500 Subject: [PATCH 55/58] Solve PuLP compability --- benchmarks/bic_pid/config.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmarks/bic_pid/config.yml b/benchmarks/bic_pid/config.yml index 022f0a80..6b7c6b7d 100644 --- a/benchmarks/bic_pid/config.yml +++ b/benchmarks/bic_pid/config.yml @@ -60,7 +60,7 @@ bench:bic_pid: "$BICPY" -V "$BICPY" -m pip install --upgrade pip - "$BICPY" -m pip install snakemake tensorflow-cpu==2.13.0 tf2onnx==1.17.0 + "$BICPY" -m pip install "snakemake==7.32.4" "pulp==2.7.0" "tensorflow-cpu==2.13.0" "tf2onnx==1.17.0" "$BICPY" -c "import sys, numpy as np, awkward as ak, uproot, tensorflow as tf, tf2onnx; print(sys.version); print('numpy', np.__version__); print('awkward', ak.__version__); print('uproot', uproot.__version__); print('TF', tf.__version__); print('tf2onnx', tf2onnx.__version__)" From cf610b4ad85b98b4562334b4a70d4333c17bdaf5 Mon Sep 17 00:00:00 2001 From: Tomas Sosa Date: Mon, 25 May 2026 09:40:18 -0500 Subject: [PATCH 56/58] Continue solving PuLP installation --- benchmarks/bic_pid/config.yml | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/benchmarks/bic_pid/config.yml b/benchmarks/bic_pid/config.yml index 6b7c6b7d..fd5f1261 100644 --- a/benchmarks/bic_pid/config.yml +++ b/benchmarks/bic_pid/config.yml @@ -52,7 +52,10 @@ bench:bic_pid: - | mkdir -p mm curl -Ls https://micro.mamba.pm/api/micromamba/linux-64/latest | tar -xvj -C mm - mm/bin/micromamba create -y -p "$MAMBA_ROOT_PREFIX/envs/bicpid" -f benchmarks/bic_pid/environment.yml + + mm/bin/micromamba create -y \ + -p "$MAMBA_ROOT_PREFIX/envs/bicpid" \ + -f benchmarks/bic_pid/environment.yml export BICPY="$MAMBA_ROOT_PREFIX/envs/bicpid/bin/python" export PYTHONNOUSERSITE=1 @@ -60,12 +63,17 @@ bench:bic_pid: "$BICPY" -V "$BICPY" -m pip install --upgrade pip - "$BICPY" -m pip install "snakemake==7.32.4" "pulp==2.7.0" "tensorflow-cpu==2.13.0" "tf2onnx==1.17.0" - "$BICPY" -c "import sys, numpy as np, awkward as ak, uproot, tensorflow as tf, tf2onnx; print(sys.version); print('numpy', np.__version__); print('awkward', ak.__version__); print('uproot', uproot.__version__); print('TF', tf.__version__); print('tf2onnx', tf2onnx.__version__)" + "$BICPY" -m pip install \ + "snakemake==7.32.4" \ + "tensorflow-cpu==2.13.0" \ + "tf2onnx==1.17.0" - "$BICPY" -m snakemake $SNAKEMAKE_FLAGS --cores 1 results/${DETECTOR_CONFIG}/bic_pid + "$BICPY" -m pip install --force-reinstall --no-deps "pulp==2.7.0" + "$BICPY" -c "import sys, numpy as np, awkward as ak, uproot, tensorflow as tf, tf2onnx, pulp; print(sys.version); print('numpy', np.__version__); print('awkward', ak.__version__); print('uproot', uproot.__version__); print('TF', tf.__version__); print('tf2onnx', tf2onnx.__version__); print('PuLP version =', getattr(pulp, '__version__', 'unknown')); print('has list_solvers =', hasattr(pulp, 'list_solvers')); print('has listSolvers =', hasattr(pulp, 'listSolvers')); print('module =', pulp.__file__)" + + "$BICPY" -m snakemake $SNAKEMAKE_FLAGS --cores 1 results/${DETECTOR_CONFIG}/bic_pid collect_results:bic_pid: extends: .det_benchmark From 3805f5be9797888f78971869a2f4971e1a490e75 Mon Sep 17 00:00:00 2001 From: Tomas Sosa Date: Mon, 25 May 2026 10:10:44 -0500 Subject: [PATCH 57/58] Add subprocess in snakefile root repo --- Snakefile | 1 + 1 file changed, 1 insertion(+) diff --git a/Snakefile b/Snakefile index 16e2fbe6..2f7cf8c3 100644 --- a/Snakefile +++ b/Snakefile @@ -2,6 +2,7 @@ configfile: "snakemake.yml" import functools import os +import subprocess from snakemake.logging import logger From 578748deea7e50d9206d6bff9c7ab5d9a2470150 Mon Sep 17 00:00:00 2001 From: Tomas Sosa Date: Mon, 25 May 2026 12:18:16 -0500 Subject: [PATCH 58/58] Add the line to use the correct environment --- benchmarks/bic_pid/config.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/benchmarks/bic_pid/config.yml b/benchmarks/bic_pid/config.yml index fd5f1261..36c34929 100644 --- a/benchmarks/bic_pid/config.yml +++ b/benchmarks/bic_pid/config.yml @@ -73,8 +73,12 @@ bench:bic_pid: "$BICPY" -c "import sys, numpy as np, awkward as ak, uproot, tensorflow as tf, tf2onnx, pulp; print(sys.version); print('numpy', np.__version__); print('awkward', ak.__version__); print('uproot', uproot.__version__); print('TF', tf.__version__); print('tf2onnx', tf2onnx.__version__); print('PuLP version =', getattr(pulp, '__version__', 'unknown')); print('has list_solvers =', hasattr(pulp, 'list_solvers')); print('has listSolvers =', hasattr(pulp, 'listSolvers')); print('module =', pulp.__file__)" + export PATH="$MAMBA_ROOT_PREFIX/envs/bicpid/bin:$PATH" + hash -r + "$BICPY" -m snakemake $SNAKEMAKE_FLAGS --cores 1 results/${DETECTOR_CONFIG}/bic_pid + collect_results:bic_pid: extends: .det_benchmark stage: collect