From 277d1b6de76d34188656a1a4fa662405fda10320 Mon Sep 17 00:00:00 2001
From: Tomas Sosa <tomassosa.23@gmail.com>
Date: Mon, 2 Mar 2026 14:41:02 -0500
Subject: [PATCH 01/58] Calorimeter pid bic notebook

---
 benchmarks/calo_pid/calo_pid_bic.org | 490 +++++++++++++++++++++++++++
 benchmarks/calo_pid/config.yml       |   6 +-
 2 files changed, 495 insertions(+), 1 deletion(-)
 create mode 100644 benchmarks/calo_pid/calo_pid_bic.org

diff --git a/benchmarks/calo_pid/calo_pid_bic.org b/benchmarks/calo_pid/calo_pid_bic.org
new file mode 100644
index 00000000..1c4959c5
--- /dev/null
+++ b/benchmarks/calo_pid/calo_pid_bic.org
@@ -0,0 +1,490 @@
+#+begin_src jupyter-python
+  import os
+  import math
+  from math import floor
+
+  import pandas as pd
+  import numpy as np
+
+  ## dangerous: silence annoying TF warnings , remove when running on new systems or debugging
+  os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"  # this MUST come before any tf call.
+  import tensorflow as tf
+  from tensorflow import keras
+  from tensorflow.keras import layers
+
+  import matplotlib.pyplot as plt
+  from collections import OrderedDict
+  import json
+  import re
+#+end_src
+
+#+begin_src jupyter-python
+  print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))
+#+end_src
+
+#+begin_src jupyter-python
+  # Simulate argparse in Kaggle
+  class Args:
+      def __init__(self):
+          self.angle = ['45to135deg']  # ✅ choose your angles
+          self.energy = '1GeV'
+          self.cap_train_sample = 0
+          self.epochs = 30
+          self.target_imbalance = 1.0
+          self.model = 'vgg-v2'
+          self.workdir = '/kaggle/working/output'  # 🔧 all outputs go here
+
+  args = Args()
+#+end_src
+
+#+begin_src jupyter-python
+  ## Global efficiencies we want to optimize for (electron efficiencies)
+  kTargetEfficiency = .95
+  ## other efficiency scenarios to cut the ML on
+  kAlternativeEfficiencies = np.arange(.5, 1., .05)
+
+  ## setting
+  #angle_settings=['eta0.0', 'eta0.5n', 'eta0.5p', 'eta1.0n', 'eta1.0p']
+  #angle_settings=['eta0.0', 'eta1.0p']
+  #angle_settings=['eta0.0']
+  angle_settings=args.angle
+  energy_setting= args.energy
+  ## MeV or GeV
+  energy_GeV = float(energy_setting[:-3]) * (1 if energy_setting[-3:] == 'GeV' else 1/1000.)
+  def eta_from_angle(angle_label):
+      match = re.match(r"(\d+)to(\d+)deg", angle_label)
+      if match:
+          theta1 = float(match.group(1))
+          theta2 = float(match.group(2))
+          mean_theta_deg = (theta1 + theta2) / 2.0
+          mean_theta_rad = np.deg2rad(mean_theta_deg)
+          eta = -np.log(np.tan(mean_theta_rad / 2))
+          return eta
+      else:
+          raise ValueError(f"Cannot parse eta from angle label: {angle_label}")
+
+  etas = {}
+  for setting in angle_settings:
+      if setting.startswith("eta"):
+          val = float(setting[3:-1])
+          sign = -1. if setting[-1] == 'n' else 1.
+          etas[setting] = val * sign
+      elif "deg" in setting:
+          etas[setting] = eta_from_angle(setting)
+      else:
+          etas[setting] = 0.0
+
+  print(f'E/p scan for {energy_setting}')
+  print(f'   - detected energy: {energy_GeV} GeV')
+  print(f'   - eta ranges: {angle_settings}')
+#+end_src
+
+#+begin_src jupyter-python
+  ## set ML configuration
+  kTrainSampleCap = args.cap_train_sample
+  kEpochs = args.epochs
+  kTestSize = .2
+  kValidateSize = .1
+  kTargetImbalance = args.target_imbalance
+  kPionWeightCap = 1.00
+  kElectronLabel = 1
+  kPionLabel = 0
+  kModel = args.model
+
+  print('ML configuration:')
+  print(f'   - Number of epochs: {kEpochs}')
+  if kTrainSampleCap > 0:
+      print(f'   - Training sample cap: {kTrainSampleCap}')
+  print(f'   - Validation fraction: {kValidateSize}')
+  print(f'   - Test fraction: {kTestSize}')
+  print(f'   - Target pi:E imbalance: {kTargetImbalance}')
+  print(f'   - Upper cap on pion weights: {kPionWeightCap}')
+  print(f'   - Model: {kModel}')
+#+end_src
+
+#+begin_src jupyter-python
+  def get_dimensions(df):
+      max_idx = df.index.max()
+      min_idx = df.index.min()
+      max_idx = np.array([v if type(v) != str else 0 for v in max_idx])
+      min_idx = np.array([v if type(v) != str else 0 for v in min_idx])
+      return {k: v for (k, v) in zip(('event', '_', 'layer', 'hit'), (max_idx - min_idx + 1))}
+
+  ## boiler-plate for in-memory datasets
+  def make_dataset(fields):
+      dataset = tf.data.Dataset.from_tensor_slices(fields)
+      ## do magic to avoid shard warnings of operating on DATA instead of FILE
+      options = tf.data.Options()
+      options.experimental_distribute.auto_shard_policy = tf.data.experimental.AutoShardPolicy.DATA
+      return dataset.with_options(options)
+#+end_src
+
+#+begin_src jupyter-python
+  ## Chaos CNN model
+  def build_old(input_shape, n_labels=2):
+      my_model = keras.Sequential([
+          keras.layers.Conv2D(64, (3, 3), padding='same', activation='relu', input_shape=input_shape),
+          keras.layers.MaxPooling2D((2, 2), strides=2),
+          keras.layers.Dropout(0.25),
+          keras.layers.Conv2D(128, (2, 2), padding='same', activation='relu'),
+          keras.layers.MaxPooling2D((2, 2), strides=2),
+          keras.layers.Conv2D(64, (2, 2), padding='same', activation='relu'),
+          keras.layers.MaxPooling2D((2, 2), strides=2),
+          keras.layers.Dropout(0.25),
+
+          keras.layers.Flatten(),
+          keras.layers.Dense(128, activation='relu'),
+          #keras.layers.Dropout(0.25),
+          keras.layers.Dense(32, activation='relu'),
+          keras.layers.Dense(n_labels, activation='softmax')
+          ])
+      return my_model
+   
+  ## Slightly beefier VGG-style CNN
+  def build_vgg_v1(input_shape, n_labels=2):
+      my_model = keras.Sequential([
+          keras.layers.Conv2D(64, kernel_size=(3, 3), activation='relu',padding='same',input_shape=input_shape),
+          keras.layers.Conv2D(64, kernel_size=(3, 3), activation='relu',padding='same'),
+          keras.layers.MaxPooling2D(pool_size=(2, 2),strides=2),
+          keras.layers.Conv2D(128, kernel_size=(3, 3), activation='relu'),
+          keras.layers.Conv2D(128, kernel_size=(3, 3), activation='relu'),
+          keras.layers.Conv2D(128, kernel_size=(3, 3), activation='relu'),
+          keras.layers.MaxPooling2D(pool_size=(2, 2),strides=2),
+          keras.layers.Flatten(),
+          keras.layers.Dense(1024, activation='relu'),
+          keras.layers.Dense(512, activation='relu'),
+          keras.layers.Dense(n_labels, activation='softmax')
+          ])
+
+      return my_model
+
+  def build_vgg_v2(input_shape, n_labels=2):
+      my_model = keras.Sequential([
+          keras.layers.Conv2D(64, kernel_size=(3, 3), activation='relu',padding='same',input_shape=input_shape),
+          keras.layers.Conv2D(64, kernel_size=(3, 3), activation='relu',padding='same'),
+          keras.layers.MaxPooling2D(pool_size=(2, 2),strides=2),
+          keras.layers.Conv2D(128, kernel_size=(3, 3), activation='relu'),
+          keras.layers.Conv2D(128, kernel_size=(3, 3), activation='relu'),
+          keras.layers.Conv2D(128, kernel_size=(3, 3), activation='relu'),
+          keras.layers.MaxPooling2D(pool_size=(2, 2),strides=2),
+          keras.layers.Flatten(),
+          keras.layers.Dense(1024, activation='relu'),
+          keras.layers.Dense(1024, activation='relu'),
+          keras.layers.Dense(n_labels, activation='softmax')
+          ])
+
+      return my_model
+#+end_src
+
+#+begin_src jupyter-python
+  def build_model(input_shape, n_labels=2):
+      if kModel == 'old':
+          print(f'Building old')
+          return build_old(input_shape, n_labels)
+      elif kModel == 'vgg-v1':
+          print(f'Building vgg-v1')
+          return build_vgg_v1(input_shape, n_labels)
+      elif kModel == 'vgg-v2':
+          print(f'Building vgg-v2')
+          return build_vgg_v2(input_shape, n_labels)
+      print('Building default')
+      return build_vgg_v2(input_shape, n_labels)
+#+end_src
+
+#+begin_src jupyter-python
+  angle_label=angle_settings[0]
+  print(angle_label)
+#+end_src
+
+#+begin_src jupyter-python
+  datadir = f'/kaggle/input/results-45to135deg-1gev-data'
+  plotdir = f'/kaggle/working/plots/{angle_label}'
+  output_directory = f'/kaggle/working/output/{angle_label}/{energy_setting}'
+#+end_src
+
+#+begin_src jupyter-python
+  print('\nprocessing angle setting:', angle_label)
+  print(f'  - eta: {etas[angle_label]}')
+
+  ## output directories
+  #output_directory = f'{args.workdir}/{angle_label}/{energy_setting}'
+  #plotdir = f'{output_directory}/plots'
+  #datadir = f'{output_directory}/data'
+  os.makedirs(plotdir, exist_ok=True)
+  os.makedirs(datadir, exist_ok=True)
+  print(f'   - output data directory: {datadir}')
+  print(f'   - output plot directory: {plotdir}')
+#+end_src
+
+#+begin_src jupyter-python
+  print('Loading datasets: ')
+  print(f'   - Loading {datadir}/hits.snappy.parquet')
+  df_data = pd.read_parquet(f'{datadir}/hits.snappy.parquet')
+  print(f'   - Loading {datadir}/labels.snappy.parquet')
+  df_mc = pd.read_parquet(f'{datadir}/labels.snappy.parquet')
+#+end_src
+
+#+begin_src jupyter-python
+  ## calculate weight to achieve target imbalance
+  n_electrons = np.sum(df_mc['PDG'] == 11)
+  n_pions = np.sum(df_mc['PDG'] == -211)
+  imbalance = n_pions/n_electrons
+  kSuggestedWeight = min(n_electrons/n_pions*kTargetImbalance, kPionWeightCap)
+  print(f'Data set has relative class imbalance of {n_electrons} : {n_pions} = {imbalance}')
+  print(f'  - target imbalance: {kTargetImbalance}')
+  print(f'  - pion weight upper limit: {kPionWeightCap:.2f}')
+  print(f'  - suggested pion weight {kSuggestedWeight:.2f}')
+#+end_src
+
+#+begin_src jupyter-python
+  ## Load E/P data again for aggregate statistics, and to calculate the target efficiency
+  print(f'Loading E/P data from {datadir}/EoverP_results.csv')
+  cutdf = pd.read_csv(f'{datadir}/EoverP_results.csv').sort_values('rejection', ascending=False)
+  results_EoverP = {key: cutdf[key][0] for key in cutdf.keys()}
+  results_EoverP['max_layer'] = int(results_EoverP['max_layer']) ## get rid of the int64 which causes trouble with json
+  kTargetEfficiencyML = kTargetEfficiency / results_EoverP['efficiency']
+  print(results_EoverP)
+  print(f'Deduced target efficiency for ML: {kTargetEfficiencyML:.3f}')
+#+end_src
+
+#+begin_src jupyter-python
+  print('Formatting data objects')
+  dim = get_dimensions(df_data)
+  xdata_both = df_data.values.reshape(dim['event'], 
+                                      dim['layer'], 
+                                      dim['hit'], 
+                                      len(df_data.columns)).astype(np.float32)
+
+  ldata = df_mc['PDG'].map(lambda pdg: kElectronLabel if (pdg == 11) else kPionLabel).values
+  wdata = df_mc['PDG'].map(lambda pdg: 1 if (pdg == 11) else kSuggestedWeight).values
+#+end_src
+
+#+begin_src jupyter-python
+  print('Shuffling data and separating samples')
+  ## shuffle data
+  index = np.arange(len(ldata))
+  np.random.shuffle(index)
+  tot_len = len(index)
+
+  n_valid = floor(tot_len * kValidateSize)
+  n_test = floor(tot_len * kTestSize)
+  n_train = tot_len - n_valid - n_test
+  if kTrainSampleCap > 0 and n_train > kTrainSampleCap:
+      print(f'Capping training sample size to {kTrainSampleCap}')
+      valid_over_train = n_valid / n_train
+      test_over_train = n_test / n_train
+      n_train = kTrainSampleCap
+      n_valid = floor(valid_over_train * n_train)
+      n_test = floor(test_over_train * n_train)
+      tot_len = n_train + n_valid + n_test
+  print(f'Sample sizes: {{n_train: {n_train}, n_valid: {n_valid}, n_test: {n_test}}}')
+#+end_src
+
+#+begin_src jupyter-python
+  id_valid = index[:n_valid]
+  id_test = index[n_valid:n_valid + n_test]
+  id_train = index[n_valid + n_test:tot_len]
+  xtrain, xvalid, xtest = xdata_both[id_train], xdata_both[id_valid], xdata_both[id_test]
+  ltrain, lvalid, ltest = ldata[id_train], ldata[id_valid], ldata[id_test]
+  wtrain, wvalid = wdata[id_train], wdata[id_valid]
+#+end_src
+
+#+begin_src jupyter-python
+  print('Start training, using GPU resources')
+  gpu = tf.config.list_logical_devices('GPU')
+  strategy = tf.distribute.MirroredStrategy(gpu) if len(gpu) == 1 else tf.distribute.MirroredStrategy([gpu[0]])
+  history = None
+  with strategy.scope():
+      train_dataset = make_dataset((xtrain, ltrain, wtrain))
+      valid_dataset = make_dataset((xvalid, lvalid, wvalid))
+
+      ## avoid warning that we are operating on DATA instead of FILE
+      options = tf.data.Options()
+      options.experimental_distribute.auto_shard_policy = tf.data.experimental.AutoShardPolicy.DATA
+      train_dataset = train_dataset.with_options(options)
+      valid_dataset = valid_dataset.with_options(options)
+      
+      model = build_model(input_shape=xtrain.shape[1:])
+      model.compile(optimizer=keras.optimizers.Adam(learning_rate=1e-3),
+                    loss=keras.losses.SparseCategoricalCrossentropy(from_logits=False),
+                    weighted_metrics=['accuracy'])
+      history = model.fit(train_dataset.batch(2000), validation_data=valid_dataset.batch(1000), epochs=kEpochs)
+      os.makedirs(output_directory, exist_ok=True)
+#+end_src
+
+#+begin_src jupyter-python
+  import keras.backend as K
+  # Monkey-patch the missing function to avoid the crash
+  K.set_learning_phase = lambda flag: None
+
+  import tensorflow as tf
+  import tf2onnx
+
+  # Load your Keras model
+  #model = tf.keras.models.load_model("/epi_separation/results/45to135deg/1GeV/data/cnn_model_30epochs.h5")
+
+  # Define a function to capture the input signature
+  @tf.function(input_signature=[tf.TensorSpec(shape=[None, *model.input_shape[1:]], dtype=tf.float32)])
+  def model_fn(input_tensor):
+      return model(input_tensor)
+
+  # Convert to ONNX format
+  onnx_model, _ = tf2onnx.convert.from_function(
+      model_fn,
+      input_signature=[tf.TensorSpec(shape=[None, *model.input_shape[1:]], dtype=tf.float32)],  # This is important
+      opset=13,
+      output_path=f"{output_directory}/EcalBarrel_pi_rejection.onnx"
+  )
+
+  print("Model converted successfully to ONNX format!")
+#+end_src
+
+#+begin_src jupyter-python
+  print('Summarizing metrics')
+  fig, ax = plt.subplots(1, 2, figsize=(12,6))
+
+  ax[0].plot(history.history['loss'])
+  ax[0].plot(history.history['val_loss'])
+  ax[0].set_title('model loss')
+  ax[0].set_ylabel('loss')
+  ax[0].set_xlabel('epoch')
+  ax[0].legend(['train', 'validate'], loc='upper left')
+
+  ax[1].plot(history.history['accuracy'])
+  ax[1].plot(history.history['val_accuracy'])
+  ax[1].set_title('accuracy')
+  ax[1].set_ylabel('accuracy')
+  ax[1].set_xlabel('epoch')
+  ax[1].legend(['train', 'validate'], loc='upper left')
+  ax[1].set_ylim(0, 1.1)
+
+  fig.savefig(f'{plotdir}/ML_learning.pdf')
+#+end_src
+
+#+begin_src jupyter-python
+  print('Bencmarking test data')
+  # benchmark
+  test_dataset = make_dataset((xtest,))
+  prediction = model.predict(test_dataset.batch(1000))
+#+end_src
+
+#+begin_src jupyter-python
+  print('Calculate aggregate e-pi rejection metrics')
+
+  def calculate_metrics(target_efficiency=kTargetEfficiencyML, export_prediction=True):
+      ## find the target efficiency cut point and weight the electron results
+      ## to move the cross-over point into pions to fit this efficiency
+      ## this code is specific to two particles where (P_e + P_pi = 1)
+      efficiency_cut = np.percentile(prediction[ltest == kElectronLabel].T[kElectronLabel], 
+                                     (1 - target_efficiency)*100)
+      target_weight = (1 - efficiency_cut) / efficiency_cut
+
+      prediction_weights = np.ones(2)
+      prediction_weights[kElectronLabel] = target_weight
+      prediction_labels = np.argmax(prediction * prediction_weights, axis=1)
+
+
+      electron_predicted = [None, None]
+      probabilities = np.zeros(shape=(2,2))
+      for i in [kPionLabel, kElectronLabel]:
+          mask = (ltest == i)
+          probabilities[i] = np.bincount(prediction_labels[mask])/float(np.sum(mask))
+          electron_predicted[i] = prediction[mask].T[kElectronLabel]
+          
+      binomial_error = lambda eff, n:  np.sqrt(n * eff * (1 - eff)) / n
+      inverse_error = lambda val, err: err / val**2 
+
+      n_electron_test = np.sum(ltest == kElectronLabel)
+      n_pion_test = np.sum(ltest == kPionLabel)
+
+      results_ML = OrderedDict({'target_particle': 'e-',
+                                'target_weight': target_weight,
+                                'target_efficiency': target_efficiency,
+                                'target_cut': efficiency_cut,
+                                'n_electrons': int(n_electron_test),
+                                'n_pions': int(n_pion_test),
+                                'probabilities': probabilities.tolist(),
+                                'efficiency': probabilities[kElectronLabel, kElectronLabel],
+                                'efficiency_error': binomial_error(probabilities[kElectronLabel, kElectronLabel], n_electron_test),
+                                'rejection': 1 / probabilities[kPionLabel, kElectronLabel],
+                                'rejection_error': inverse_error(probabilities[kPionLabel, kElectronLabel], binomial_error(probabilities[kPionLabel, kElectronLabel], n_pion_test))})
+
+      ## calculate aggregate results from E/P + ML
+      results = OrderedDict({
+          'energy': energy_GeV,
+          'eta': etas[angle_label],
+          'angle': angle_label,
+          'efficiency': results_EoverP['efficiency'] * results_ML['efficiency'],
+          'efficiency_error': np.sqrt(results_EoverP['efficiency']**2 * results_ML['efficiency_error']**2 
+                                      + results_ML['efficiency']**2 * results_EoverP['efficiency_error']**2),
+          'rejection': results_EoverP['rejection'] * results_ML['rejection'],
+          'rejection_error': np.sqrt(results_EoverP['rejection']**2 * results_ML['rejection_error']**2 
+                                      + results_ML['rejection']**2 * results_EoverP['rejection_error']**2),
+          'prob_cut': efficiency_cut,
+          'EoverP': results_EoverP,
+          'ML': results_ML})
+      if export_prediction:
+          return results, electron_predicted
+      return results
+#+end_src
+
+#+begin_src jupyter-python
+  results, electron_predicted = calculate_metrics()
+  results_ML = results['ML']
+  test = electron_predicted
+  print(f'Calculating alternative target efficiency scenarios: {kAlternativeEfficiencies}')
+  results['scenarios'] = {}
+  for alternative_eff in kAlternativeEfficiencies:
+      target_eff_ml = alternative_eff / results_EoverP['efficiency']
+      tmp_res = calculate_metrics(target_efficiency=target_eff_ml, export_prediction=False)
+      results['scenarios'][alternative_eff] = tmp_res
+#+end_src
+
+#+begin_src jupyter-python
+  assert test is electron_predicted
+
+  with open(f'{output_directory}/results.json', 'w') as f:
+      f.write(json.dumps(results, indent=2))
+  print(f' - Found overal rejection {results["rejection"]:.2f} at {results["efficiency"]:.2f} efficiency')
+  print(f' - Results written to {datadir}/results.json')
+#+end_src
+
+#+begin_src jupyter-python
+  print('Plotting ML results')
+  # default color cycle of matplotlib
+  prop_cycle = plt.rcParams['axes.prop_cycle']
+  colors = prop_cycle.by_key()['color']
+  box_props = dict(boxstyle='round', facecolor='white', alpha=0.5)
+
+  parts = {kElectronLabel: r'e^-', kPionLabel: r'\pi^-'}
+
+  fig, ax = plt.subplots(figsize=(12, 9), dpi=160)
+  effs = []
+  for i in parts.keys():
+      ax.hist(electron_predicted[i], bins=np.linspace(0, 1, 101), label='${}$'.format(parts[i]),
+                  color=colors[i], ec=colors[i], alpha=0.5)
+  ax.axvline(x=results['prob_cut'], lw=2, color='k', ls='--')
+  eff_text = '\n'.join([r'$\epsilon_{{ML}}^{{e^-}} = {:.2f}$%'.format(results_ML['efficiency'] * 100.),
+                            r'$R_{{ML}}^{{\pi^-}} = {:.1f}$'.format(results_ML['rejection']),
+                            r'$\epsilon_{{E/p}}^{{e^-}} = {:.2f}$%'.format(results_EoverP['efficiency'] * 100.),
+                            r'$R_{{E/p}}^{{\pi^-}} = {:.1f}$'.format(results_EoverP['rejection'])
+                           ])
+  data_to_axis = (ax.transAxes + ax.transData.inverted()).inverted()
+  ax.text(data_to_axis.transform((results['prob_cut'], 1))[0] + 0.01, 0.99, eff_text, fontsize=24,
+          transform=ax.transAxes, ha='left', va='top')
+  ax.set_yscale('log')
+  ax.set_ylabel('Counts', fontsize=24)
+  ax.set_xlabel(r'$P_{{{}}}$'.format(r'e^-'), fontsize=24)
+  ax.tick_params(direction='in', which='both', labelsize=24)
+  ax.legend(fontsize=24, ncol=4, loc='upper center', bbox_to_anchor=(0.5, 1.12),)
+  ax.text(0.05, .99, '\n'.join(
+      [r'{energy} at ${loc}$'.format(energy='1GeV', 
+                                     loc=f'eta = {etas[angle_label]}'),
+       r'$R_{{\pi}} = {rejection:.1f}$ at $\epsilon_{{e^-}} = {efficiency:.2f}$%'.format(
+           rejection=results_EoverP['rejection'] * results_ML['rejection'],
+           efficiency=results_EoverP['efficiency'] * results_ML['efficiency'] * 100.)]),
+      ha='left', va='top', fontsize=24, transform=ax.transAxes)
+  fig.savefig(f'{plotdir}/ML_rejection.pdf')
+
+  print('Done with this eta bin')
+#+end_src
\ No newline at end of file
diff --git a/benchmarks/calo_pid/config.yml b/benchmarks/calo_pid/config.yml
index 88410a49..c781cb40 100644
--- a/benchmarks/calo_pid/config.yml
+++ b/benchmarks/calo_pid/config.yml
@@ -4,6 +4,10 @@ sim:calo_pid:
   parallel:
     matrix:
       - PARTICLE: ["e-", "pi-"]
+        ANGLE: [
+          "45to135deg",
+          "130to177deg"
+        ]
         INDEX_RANGE: [
           "0 9",
           "10 19",
@@ -19,7 +23,7 @@ sim:calo_pid:
   script:
     - |
       snakemake $SNAKEMAKE_FLAGS --cores $MAX_CORES_PER_JOB \
-        $(seq --format="sim_output/calo_pid/epic_inner_detector/${PARTICLE}/100MeVto20GeV/130to177deg/${PARTICLE}_100MeVto20GeV_130to177deg.%04.f.eicrecon.edm4eic.root" ${INDEX_RANGE})
+        $(seq --format="sim_output/calo_pid/epic_inner_detector/${PARTICLE}/100MeVto20GeV/${ANGLE}/${PARTICLE}_100MeVto20GeV_${ANGLE}.%04.f.eicrecon.edm4eic.root" ${INDEX_RANGE})
 
 bench:calo_pid:
   extends: .det_benchmark

From b81a623dd8caf811649e3d009c5b0a330a1dd30c Mon Sep 17 00:00:00 2001
From: Tomas Sosa <tomassosa.23@gmail.com>
Date: Sun, 8 Mar 2026 21:07:29 -0500
Subject: [PATCH 02/58] Add bic pid benchmark

---
 benchmarks/bic_pid/Snakefile                  | 106 ++++++++++++++++++
 .../calo_pid_bic.org => bic_pid/bic_pid.org}  |   0
 benchmarks/bic_pid/config.yml                 |  57 ++++++++++
 benchmarks/bic_pid/requirements.txt           |   7 ++
 benchmarks/calo_pid/config.yml                |   6 +-
 5 files changed, 171 insertions(+), 5 deletions(-)
 create mode 100644 benchmarks/bic_pid/Snakefile
 rename benchmarks/{calo_pid/calo_pid_bic.org => bic_pid/bic_pid.org} (100%)
 create mode 100644 benchmarks/bic_pid/config.yml
 create mode 100644 benchmarks/bic_pid/requirements.txt

diff --git a/benchmarks/bic_pid/Snakefile b/benchmarks/bic_pid/Snakefile
new file mode 100644
index 00000000..c734be29
--- /dev/null
+++ b/benchmarks/bic_pid/Snakefile
@@ -0,0 +1,106 @@
+def format_energy_for_dd4hep(s):
+    return s.rstrip("kMGeV") + "*" + s.lstrip("0123456789")
+
+
+rule bic_pid_sim:
+    input:
+        warmup="warmup.edm4hep.root",
+        geometry_lib=find_epic_libraries(),
+    output:
+        "sim_output/bic_pid/{DETECTOR_CONFIG}/{PARTICLE}/{ENERGY}/{THETA_MIN}to{THETA_MAX}deg/{PARTICLE}_{ENERGY}_{THETA_MIN}to{THETA_MAX}deg.{INDEX}.edm4hep.root",
+    log:
+        "sim_output/bic_pid/{DETECTOR_CONFIG}/{PARTICLE}/{ENERGY}/{THETA_MIN}to{THETA_MAX}deg/{PARTICLE}_{ENERGY}_{THETA_MIN}to{THETA_MAX}deg.{INDEX}.edm4hep.root.log",
+    wildcard_constraints:
+        PARTICLE="(e-|pi-)",
+        ENERGY="[0-9]+[kMG]eV",
+        THETA_MIN="[0-9]+",
+        THETA_MAX="[0-9]+",
+        INDEX=r"\d{4}",
+    params:
+        N_EVENTS=1000,
+        SEED=lambda wildcards: "1" + wildcards.INDEX,
+        DETECTOR_PATH=os.environ["DETECTOR_PATH"],
+        DETECTOR_CONFIG=lambda wildcards: wildcards.DETECTOR_CONFIG,
+        ENERGY=lambda wildcards: format_energy_for_dd4hep(wildcards.ENERGY),
+        THETA_MIN=lambda wildcards: wildcards.THETA_MIN,
+        THETA_MAX=lambda wildcards: wildcards.THETA_MAX,
+        DD4HEP_HASH=get_spack_package_hash("dd4hep"),
+        NPSIM_HASH=get_spack_package_hash("npsim"),
+    cache: True
+    shell:
+        """
+set -m # monitor mode to prevent lingering processes
+exec npsim \
+  --runType batch \
+  --enableGun \
+  --gun.momentumMin "{params.ENERGY}" \
+  --gun.momentumMax "{params.ENERGY}" \
+  --gun.thetaMin "{wildcards.THETA_MIN}*deg" \
+  --gun.thetaMax "{wildcards.THETA_MAX}*deg" \
+  --gun.particle {wildcards.PARTICLE} \
+  --gun.distribution eta \
+  --random.seed {params.SEED} \
+  --filter.tracker edep0 \
+  -v WARNING \
+  --numberOfEvents {params.N_EVENTS} \
+  --compactFile {params.DETECTOR_PATH}/{params.DETECTOR_CONFIG}.xml \
+  --outputFile {output}
+"""
+
+
+rule bic_pid_recon:
+    input:
+        sim="sim_output/bic_pid/{DETECTOR_CONFIG}/{PARTICLE}/{ENERGY}/{PHASE_SPACE}/{PARTICLE}_{ENERGY}_{PHASE_SPACE}.{INDEX}.edm4hep.root",
+        warmup="warmup.edm4hep.root",
+    output:
+        "sim_output/bic_pid/{DETECTOR_CONFIG}/{PARTICLE}/{ENERGY}/{PHASE_SPACE}/{PARTICLE}_{ENERGY}_{PHASE_SPACE}.{INDEX}.eicrecon.edm4eic.root",
+    log:
+        "sim_output/bic_pid/{DETECTOR_CONFIG}/{PARTICLE}/{ENERGY}/{PHASE_SPACE}/{PARTICLE}_{ENERGY}_{PHASE_SPACE}.{INDEX}.eicrecon.edm4eic.root.log",
+    wildcard_constraints:
+        INDEX=r"\d{4}",
+    params:
+        DETECTOR_CONFIG=lambda wildcards: wildcards.DETECTOR_CONFIG,
+        EICRECON_HASH=get_spack_package_hash("eicrecon"),
+    cache: True
+    shell:
+        """
+DETECTOR_CONFIG={wildcards.DETECTOR_CONFIG} \
+exec eicrecon {input.sim} -Ppodio:output_file={output} \
+  -Ppodio:output_collections=MCParticles,EcalBarrelScFiRecHits,EcalBarrelImagingRecHits
+"""
+
+
+rule bic_pid_input_list:
+    input:
+        electrons=expand(
+            "sim_output/bic_pid/{{DETECTOR_CONFIG}}/{{PARTICLE}}/{ENERGY}/{PHASE_SPACE}/{{PARTICLE}}_{ENERGY}_{PHASE_SPACE}.{INDEX:04d}.eicrecon.edm4eic.root",
+            ENERGY=["1GeV"],
+            PHASE_SPACE=["45to135deg"],
+            INDEX=range(100),
+        ),
+    output:
+        "listing/bic_pid/{DETECTOR_CONFIG}/{PARTICLE}.lst",
+    run:
+        with open(output[0], "wt") as fp:
+            fp.write("\n".join(input))
+
+
+rule bic_pid:
+    input:
+        electrons="listing/bic_pid/{DETECTOR_CONFIG}/e-.lst",
+        pions="listing/bic_pid/{DETECTOR_CONFIG}/pi-.lst",
+        matplotlibrc=".matplotlibrc",
+        script="benchmarks/bic_pid/bic_pid.py",
+    output:
+        directory("results/{DETECTOR_CONFIG}/bic_pid")
+    shell:
+        """
+env \
+MATPLOTLIBRC={input.matplotlibrc} \
+DETECTOR_CONFIG={wildcards.DETECTOR_CONFIG} \
+PLOT_TITLE={wildcards.DETECTOR_CONFIG} \
+INPUT_ELECTRONS="{input.electrons}" \
+INPUT_PIONS="{input.pions}" \
+OUTPUT_DIR={output} \
+python {input.script}
+"""
\ No newline at end of file
diff --git a/benchmarks/calo_pid/calo_pid_bic.org b/benchmarks/bic_pid/bic_pid.org
similarity index 100%
rename from benchmarks/calo_pid/calo_pid_bic.org
rename to benchmarks/bic_pid/bic_pid.org
diff --git a/benchmarks/bic_pid/config.yml b/benchmarks/bic_pid/config.yml
new file mode 100644
index 00000000..cda68261
--- /dev/null
+++ b/benchmarks/bic_pid/config.yml
@@ -0,0 +1,57 @@
+sim:bic_pid:
+  extends: .det_benchmark
+  stage: simulate
+  image: $(BENCHMARKS_REGISTRY)$/eic_ci$(BENCHMARKS_SIGIL)$(BENCHMARKS_TAG)
+  parallel:
+    matrix:
+      - PARTICLE: ["e-", "pi-"]
+        ANGLE: [
+          "45to135deg"
+        ]
+        ENERGY: [
+          "1GeV"
+        ]
+        INDEX_RANGE: [
+          "0 9",
+          "10 19",
+          "20 29",
+          "30 39",
+          "40 49",
+          "50 59",
+          "60 69",
+          "70 79",
+          "80 89",
+          "90 99",
+        ]
+  script:
+    - |
+      snakemake $SNAKEMAKE_FLAGS --cores $MAX_CORES_PER_JOB \
+        $(seq --format="sim_output/bic_pid/epic_inner_detector/${PARTICLE}/${ENERGY}/${ANGLE}/${PARTICLE}_${ENERGY}_${ANGLE}.%04.f.eicrecon.edm4eic.root" ${INDEX_RANGE})
+
+bench:bic_pid:
+  extends: .det_benchmark
+  stage: benchmarks
+  needs:
+    - ["sim:bic_pid"]
+  image: $(BENCHMARKS_REGISTRY)$/eic_tf$(BENCHMARKS_SIGIL)$(BENCHMARKS_TAG)
+  variables:
+    CUDA_VISIBLE_DEVICES: ""
+  script:
+    - export PYTHONUSERBASE=$LOCAL_DATA_PATH/deps
+    - export PATH=$PYTHONUSERBASE/bin:$PATH
+    - python -m pip install --user snakemake
+    - python -m pip install --user -r benchmarks/bic_pid/requirements.txt
+    - snakemake $SNAKEMAKE_FLAGS --cores 1 results/epic_inner_detector/bic_pid
+
+collect_results:bic_pid:
+  extends: .det_benchmark
+  stage: collect
+  needs:
+    - "bench:bic_pid"
+  when: always
+  image: $(BENCHMARKS_REGISTRY)$/eic_ci$(BENCHMARKS_SIGIL)$(BENCHMARKS_TAG)
+  script:
+    - ls -lrht
+    - mv results{,_save}/ # move results directory out of the way to preserve it
+    - snakemake $SNAKEMAKE_FLAGS --cores 1 --delete-all-output results/epic_inner_detector/bic_pid
+    - mv results{_save,}/
\ No newline at end of file
diff --git a/benchmarks/bic_pid/requirements.txt b/benchmarks/bic_pid/requirements.txt
new file mode 100644
index 00000000..d32037a3
--- /dev/null
+++ b/benchmarks/bic_pid/requirements.txt
@@ -0,0 +1,7 @@
+awkward >= 2.4.0
+catboost
+onnx
+scikit-learn
+uproot >= 5.2.0
+vector
+tf2onnx
\ No newline at end of file
diff --git a/benchmarks/calo_pid/config.yml b/benchmarks/calo_pid/config.yml
index c781cb40..88410a49 100644
--- a/benchmarks/calo_pid/config.yml
+++ b/benchmarks/calo_pid/config.yml
@@ -4,10 +4,6 @@ sim:calo_pid:
   parallel:
     matrix:
       - PARTICLE: ["e-", "pi-"]
-        ANGLE: [
-          "45to135deg",
-          "130to177deg"
-        ]
         INDEX_RANGE: [
           "0 9",
           "10 19",
@@ -23,7 +19,7 @@ sim:calo_pid:
   script:
     - |
       snakemake $SNAKEMAKE_FLAGS --cores $MAX_CORES_PER_JOB \
-        $(seq --format="sim_output/calo_pid/epic_inner_detector/${PARTICLE}/100MeVto20GeV/${ANGLE}/${PARTICLE}_100MeVto20GeV_${ANGLE}.%04.f.eicrecon.edm4eic.root" ${INDEX_RANGE})
+        $(seq --format="sim_output/calo_pid/epic_inner_detector/${PARTICLE}/100MeVto20GeV/130to177deg/${PARTICLE}_100MeVto20GeV_130to177deg.%04.f.eicrecon.edm4eic.root" ${INDEX_RANGE})
 
 bench:calo_pid:
   extends: .det_benchmark

From cbccb2a16fe829a39f1a2326df09d2c9212ad0ed Mon Sep 17 00:00:00 2001
From: Tomas Sosa <tomassosa.23@gmail.com>
Date: Mon, 9 Mar 2026 13:56:49 -0500
Subject: [PATCH 03/58] Fix some things in bic_pid

---
 benchmarks/bic_pid/bic_pid.org | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/benchmarks/bic_pid/bic_pid.org b/benchmarks/bic_pid/bic_pid.org
index 1c4959c5..dd975c49 100644
--- a/benchmarks/bic_pid/bic_pid.org
+++ b/benchmarks/bic_pid/bic_pid.org
@@ -197,9 +197,9 @@
 #+end_src
 
 #+begin_src jupyter-python
-  datadir = f'/kaggle/input/results-45to135deg-1gev-data'
-  plotdir = f'/kaggle/working/plots/{angle_label}'
-  output_directory = f'/kaggle/working/output/{angle_label}/{energy_setting}'
+  #datadir = f'/kaggle/input/results-45to135deg-1gev-data'
+  #plotdir = f'/kaggle/working/plots/{angle_label}'
+  #output_directory = f'/kaggle/working/output/{angle_label}/{energy_setting}'
 #+end_src
 
 #+begin_src jupyter-python
@@ -207,9 +207,9 @@
   print(f'  - eta: {etas[angle_label]}')
 
   ## output directories
-  #output_directory = f'{args.workdir}/{angle_label}/{energy_setting}'
-  #plotdir = f'{output_directory}/plots'
-  #datadir = f'{output_directory}/data'
+  output_directory = f'{args.workdir}/{angle_label}/{energy_setting}'
+  plotdir = f'{output_directory}/plots'
+  datadir = f'{output_directory}/data'
   os.makedirs(plotdir, exist_ok=True)
   os.makedirs(datadir, exist_ok=True)
   print(f'   - output data directory: {datadir}')
@@ -362,7 +362,7 @@
 #+end_src
 
 #+begin_src jupyter-python
-  print('Bencmarking test data')
+  print('Benchmarking test data')
   # benchmark
   test_dataset = make_dataset((xtest,))
   prediction = model.predict(test_dataset.batch(1000))
@@ -445,8 +445,8 @@
 
   with open(f'{output_directory}/results.json', 'w') as f:
       f.write(json.dumps(results, indent=2))
-  print(f' - Found overal rejection {results["rejection"]:.2f} at {results["efficiency"]:.2f} efficiency')
-  print(f' - Results written to {datadir}/results.json')
+  print(f' - Found overall rejection {results["rejection"]:.2f} at {results["efficiency"]:.2f} efficiency')
+  print(f' - Results written to {output_directory}/results.json')
 #+end_src
 
 #+begin_src jupyter-python

From 1d75227e534cdbf3e4bce3b75404b8e0b46ae963 Mon Sep 17 00:00:00 2001
From: Tomas Sosa <tomassosa.23@gmail.com>
Date: Sun, 29 Mar 2026 18:44:48 -0500
Subject: [PATCH 04/58] Include bic_pid in execution

---
 .gitlab-ci.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index abb866c7..42ab0af1 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -128,6 +128,7 @@ include:
   - local: 'benchmarks/backwards_ecal/config.yml'
   - local: 'benchmarks/beamline/config.yml'
   - local: 'benchmarks/calo_pid/config.yml'
+  - local: 'benchmarks/bic_pid/config.yml'
   - local: 'benchmarks/campaign/config.yml'
   - local: 'benchmarks/ecal_gaps/config.yml'
   - local: 'benchmarks/far_forward_dvcs/config.yml'

From 95242e965c9bfdf57d706b5083b5c5423a8967ac Mon Sep 17 00:00:00 2001
From: Tomas Sosa <tomassosa.23@gmail.com>
Date: Sun, 29 Mar 2026 22:10:21 -0500
Subject: [PATCH 05/58] Fix the shell command substitution

---
 benchmarks/bic_pid/config.yml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/benchmarks/bic_pid/config.yml b/benchmarks/bic_pid/config.yml
index cda68261..d7e91459 100644
--- a/benchmarks/bic_pid/config.yml
+++ b/benchmarks/bic_pid/config.yml
@@ -1,7 +1,7 @@
 sim:bic_pid:
   extends: .det_benchmark
   stage: simulate
-  image: $(BENCHMARKS_REGISTRY)$/eic_ci$(BENCHMARKS_SIGIL)$(BENCHMARKS_TAG)
+  image: $BENCHMARKS_REGISTRY/eic_ci$BENCHMARKS_SIGIL$BENCHMARKS_TAG
   parallel:
     matrix:
       - PARTICLE: ["e-", "pi-"]
@@ -33,7 +33,7 @@ bench:bic_pid:
   stage: benchmarks
   needs:
     - ["sim:bic_pid"]
-  image: $(BENCHMARKS_REGISTRY)$/eic_tf$(BENCHMARKS_SIGIL)$(BENCHMARKS_TAG)
+  image: $BENCHMARKS_REGISTRY/eic_tf$BENCHMARKS_SIGIL$BENCHMARKS_TAG
   variables:
     CUDA_VISIBLE_DEVICES: ""
   script:
@@ -49,7 +49,7 @@ collect_results:bic_pid:
   needs:
     - "bench:bic_pid"
   when: always
-  image: $(BENCHMARKS_REGISTRY)$/eic_ci$(BENCHMARKS_SIGIL)$(BENCHMARKS_TAG)
+  image: $BENCHMARKS_REGISTRY/eic_ci$BENCHMARKS_SIGIL$BENCHMARKS_TAG
   script:
     - ls -lrht
     - mv results{,_save}/ # move results directory out of the way to preserve it

From 35989a06639b3a5e55a266f62db33b3dad8e85c3 Mon Sep 17 00:00:00 2001
From: Tomas Sosa <tomassosa.23@gmail.com>
Date: Sun, 29 Mar 2026 22:28:39 -0500
Subject: [PATCH 06/58] Add new change in config

---
 benchmarks/bic_pid/config.yml | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/benchmarks/bic_pid/config.yml b/benchmarks/bic_pid/config.yml
index d7e91459..0bf4b5f1 100644
--- a/benchmarks/bic_pid/config.yml
+++ b/benchmarks/bic_pid/config.yml
@@ -26,7 +26,7 @@ sim:bic_pid:
   script:
     - |
       snakemake $SNAKEMAKE_FLAGS --cores $MAX_CORES_PER_JOB \
-        $(seq --format="sim_output/bic_pid/epic_inner_detector/${PARTICLE}/${ENERGY}/${ANGLE}/${PARTICLE}_${ENERGY}_${ANGLE}.%04.f.eicrecon.edm4eic.root" ${INDEX_RANGE})
+        $(seq --format="sim_output/bic_pid/${DETECTOR_CONFIG}/${PARTICLE}/${ENERGY}/${ANGLE}/${PARTICLE}_${ENERGY}_${ANGLE}.%04.f.eicrecon.edm4eic.root" ${INDEX_RANGE})
 
 bench:bic_pid:
   extends: .det_benchmark
@@ -41,7 +41,7 @@ bench:bic_pid:
     - export PATH=$PYTHONUSERBASE/bin:$PATH
     - python -m pip install --user snakemake
     - python -m pip install --user -r benchmarks/bic_pid/requirements.txt
-    - snakemake $SNAKEMAKE_FLAGS --cores 1 results/epic_inner_detector/bic_pid
+    - snakemake $SNAKEMAKE_FLAGS --cores 1 results/${DETECTOR_CONFIG}/bic_pid
 
 collect_results:bic_pid:
   extends: .det_benchmark
@@ -52,6 +52,6 @@ collect_results:bic_pid:
   image: $BENCHMARKS_REGISTRY/eic_ci$BENCHMARKS_SIGIL$BENCHMARKS_TAG
   script:
     - ls -lrht
-    - mv results{,_save}/ # move results directory out of the way to preserve it
-    - snakemake $SNAKEMAKE_FLAGS --cores 1 --delete-all-output results/epic_inner_detector/bic_pid
+    - mv results{,_save}/
+    - snakemake $SNAKEMAKE_FLAGS --cores 1 --delete-all-output results/${DETECTOR_CONFIG}/bic_pid
     - mv results{_save,}/
\ No newline at end of file

From 5ca3769ac180f20fcdb0332c8e484fae0f61bf2b Mon Sep 17 00:00:00 2001
From: Tomas Sosa <tomassosa.23@gmail.com>
Date: Sun, 29 Mar 2026 22:46:46 -0500
Subject: [PATCH 07/58] Add new correction to the code

---
 benchmarks/bic_pid/Snakefile | 30 ++++++++++++++++++++----------
 1 file changed, 20 insertions(+), 10 deletions(-)

diff --git a/benchmarks/bic_pid/Snakefile b/benchmarks/bic_pid/Snakefile
index c734be29..587982a4 100644
--- a/benchmarks/bic_pid/Snakefile
+++ b/benchmarks/bic_pid/Snakefile
@@ -2,19 +2,26 @@ def format_energy_for_dd4hep(s):
     return s.rstrip("kMGeV") + "*" + s.lstrip("0123456789")
 
 
+def theta_min_from_phase_space(s):
+    return s.replace("deg", "").split("to")[0]
+
+
+def theta_max_from_phase_space(s):
+    return s.replace("deg", "").split("to")[1]
+
+
 rule bic_pid_sim:
     input:
         warmup="warmup.edm4hep.root",
         geometry_lib=find_epic_libraries(),
     output:
-        "sim_output/bic_pid/{DETECTOR_CONFIG}/{PARTICLE}/{ENERGY}/{THETA_MIN}to{THETA_MAX}deg/{PARTICLE}_{ENERGY}_{THETA_MIN}to{THETA_MAX}deg.{INDEX}.edm4hep.root",
+        "sim_output/bic_pid/{DETECTOR_CONFIG}/{PARTICLE}/{ENERGY}/{PHASE_SPACE}/{PARTICLE}_{ENERGY}_{PHASE_SPACE}.{INDEX}.edm4hep.root",
     log:
-        "sim_output/bic_pid/{DETECTOR_CONFIG}/{PARTICLE}/{ENERGY}/{THETA_MIN}to{THETA_MAX}deg/{PARTICLE}_{ENERGY}_{THETA_MIN}to{THETA_MAX}deg.{INDEX}.edm4hep.root.log",
+        "sim_output/bic_pid/{DETECTOR_CONFIG}/{PARTICLE}/{ENERGY}/{PHASE_SPACE}/{PARTICLE}_{ENERGY}_{PHASE_SPACE}.{INDEX}.edm4hep.root.log",
     wildcard_constraints:
         PARTICLE="(e-|pi-)",
         ENERGY="[0-9]+[kMG]eV",
-        THETA_MIN="[0-9]+",
-        THETA_MAX="[0-9]+",
+        PHASE_SPACE="[0-9]+to[0-9]+deg",
         INDEX=r"\d{4}",
     params:
         N_EVENTS=1000,
@@ -22,8 +29,8 @@ rule bic_pid_sim:
         DETECTOR_PATH=os.environ["DETECTOR_PATH"],
         DETECTOR_CONFIG=lambda wildcards: wildcards.DETECTOR_CONFIG,
         ENERGY=lambda wildcards: format_energy_for_dd4hep(wildcards.ENERGY),
-        THETA_MIN=lambda wildcards: wildcards.THETA_MIN,
-        THETA_MAX=lambda wildcards: wildcards.THETA_MAX,
+        THETA_MIN=lambda wildcards: theta_min_from_phase_space(wildcards.PHASE_SPACE),
+        THETA_MAX=lambda wildcards: theta_max_from_phase_space(wildcards.PHASE_SPACE),
         DD4HEP_HASH=get_spack_package_hash("dd4hep"),
         NPSIM_HASH=get_spack_package_hash("npsim"),
     cache: True
@@ -35,8 +42,8 @@ exec npsim \
   --enableGun \
   --gun.momentumMin "{params.ENERGY}" \
   --gun.momentumMax "{params.ENERGY}" \
-  --gun.thetaMin "{wildcards.THETA_MIN}*deg" \
-  --gun.thetaMax "{wildcards.THETA_MAX}*deg" \
+  --gun.thetaMin "{params.THETA_MIN}*deg" \
+  --gun.thetaMax "{params.THETA_MAX}*deg" \
   --gun.particle {wildcards.PARTICLE} \
   --gun.distribution eta \
   --random.seed {params.SEED} \
@@ -57,6 +64,9 @@ rule bic_pid_recon:
     log:
         "sim_output/bic_pid/{DETECTOR_CONFIG}/{PARTICLE}/{ENERGY}/{PHASE_SPACE}/{PARTICLE}_{ENERGY}_{PHASE_SPACE}.{INDEX}.eicrecon.edm4eic.root.log",
     wildcard_constraints:
+        PARTICLE="(e-|pi-)",
+        ENERGY="[0-9]+[kMG]eV",
+        PHASE_SPACE="[0-9]+to[0-9]+deg",
         INDEX=r"\d{4}",
     params:
         DETECTOR_CONFIG=lambda wildcards: wildcards.DETECTOR_CONFIG,
@@ -72,7 +82,7 @@ exec eicrecon {input.sim} -Ppodio:output_file={output} \
 
 rule bic_pid_input_list:
     input:
-        electrons=expand(
+        files=expand(
             "sim_output/bic_pid/{{DETECTOR_CONFIG}}/{{PARTICLE}}/{ENERGY}/{PHASE_SPACE}/{{PARTICLE}}_{ENERGY}_{PHASE_SPACE}.{INDEX:04d}.eicrecon.edm4eic.root",
             ENERGY=["1GeV"],
             PHASE_SPACE=["45to135deg"],
@@ -82,7 +92,7 @@ rule bic_pid_input_list:
         "listing/bic_pid/{DETECTOR_CONFIG}/{PARTICLE}.lst",
     run:
         with open(output[0], "wt") as fp:
-            fp.write("\n".join(input))
+            fp.write("\n".join(input.files))
 
 
 rule bic_pid:

From 859a327bb08cf2e16f125d86c48ba10ec975563d Mon Sep 17 00:00:00 2001
From: Tomas Sosa <tomassosa.23@gmail.com>
Date: Sun, 5 Apr 2026 09:16:51 -0500
Subject: [PATCH 08/58] A correction in the Snakefile of the repo (add bic_pid
 snakefile)

---
 Snakefile | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Snakefile b/Snakefile
index 59443fe0..16e2fbe6 100644
--- a/Snakefile
+++ b/Snakefile
@@ -48,6 +48,7 @@ include: "benchmarks/backwards_ecal/Snakefile"
 include: "benchmarks/barrel_ecal/Snakefile"
 include: "benchmarks/beamline/Snakefile"
 include: "benchmarks/calo_pid/Snakefile"
+include: "benchmarks/bic_pid/Snakefile"
 include: "benchmarks/campaign/Snakefile"
 include: "benchmarks/ecal_gaps/Snakefile"
 include: "benchmarks/far_forward_dvcs/Snakefile"

From d440bbeb8443f6f61b9f767d98e143d837d89ad8 Mon Sep 17 00:00:00 2001
From: Tomas Sosa <tomassosa.23@gmail.com>
Date: Sun, 5 Apr 2026 13:03:45 -0500
Subject: [PATCH 09/58] Add some corrections in the Snakefile and config.yml
 for the errors in the job

---
 benchmarks/bic_pid/Snakefile  | 8 ++++----
 benchmarks/bic_pid/config.yml | 5 ++++-
 2 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/benchmarks/bic_pid/Snakefile b/benchmarks/bic_pid/Snakefile
index 587982a4..2afa3496 100644
--- a/benchmarks/bic_pid/Snakefile
+++ b/benchmarks/bic_pid/Snakefile
@@ -35,9 +35,9 @@ rule bic_pid_sim:
         NPSIM_HASH=get_spack_package_hash("npsim"),
     cache: True
     shell:
-        """
-set -m # monitor mode to prevent lingering processes
-exec npsim \
+        r"""
+set -m 
+npsim \
   --runType batch \
   --enableGun \
   --gun.momentumMin "{params.ENERGY}" \
@@ -51,7 +51,7 @@ exec npsim \
   -v WARNING \
   --numberOfEvents {params.N_EVENTS} \
   --compactFile {params.DETECTOR_PATH}/{params.DETECTOR_CONFIG}.xml \
-  --outputFile {output}
+  --outputFile {output} > {log} 2>&1
 """
 
 
diff --git a/benchmarks/bic_pid/config.yml b/benchmarks/bic_pid/config.yml
index 0bf4b5f1..5660c546 100644
--- a/benchmarks/bic_pid/config.yml
+++ b/benchmarks/bic_pid/config.yml
@@ -24,8 +24,9 @@ sim:bic_pid:
           "90 99",
         ]
   script:
+    - export DETECTOR_CONFIG=epic_craterlake
     - |
-      snakemake $SNAKEMAKE_FLAGS --cores $MAX_CORES_PER_JOB \
+      snakemake $SNAKEMAKE_FLAGS --cores 1 \
         $(seq --format="sim_output/bic_pid/${DETECTOR_CONFIG}/${PARTICLE}/${ENERGY}/${ANGLE}/${PARTICLE}_${ENERGY}_${ANGLE}.%04.f.eicrecon.edm4eic.root" ${INDEX_RANGE})
 
 bench:bic_pid:
@@ -37,6 +38,7 @@ bench:bic_pid:
   variables:
     CUDA_VISIBLE_DEVICES: ""
   script:
+    - export DETECTOR_CONFIG=epic_craterlake
     - export PYTHONUSERBASE=$LOCAL_DATA_PATH/deps
     - export PATH=$PYTHONUSERBASE/bin:$PATH
     - python -m pip install --user snakemake
@@ -51,6 +53,7 @@ collect_results:bic_pid:
   when: always
   image: $BENCHMARKS_REGISTRY/eic_ci$BENCHMARKS_SIGIL$BENCHMARKS_TAG
   script:
+    - export DETECTOR_CONFIG=epic_craterlake
     - ls -lrht
     - mv results{,_save}/
     - snakemake $SNAKEMAKE_FLAGS --cores 1 --delete-all-output results/${DETECTOR_CONFIG}/bic_pid

From e4588b5029c5282c086fc4ce25730e872b3b495f Mon Sep 17 00:00:00 2001
From: Tomas Sosa <tomassosa.23@gmail.com>
Date: Sun, 5 Apr 2026 22:24:18 -0500
Subject: [PATCH 10/58] Fix config.yml to solve the problem of the
 bench:bic_pid

---
 benchmarks/bic_pid/config.yml | 25 ++++++++++++++++---------
 1 file changed, 16 insertions(+), 9 deletions(-)

diff --git a/benchmarks/bic_pid/config.yml b/benchmarks/bic_pid/config.yml
index 5660c546..cf9e746d 100644
--- a/benchmarks/bic_pid/config.yml
+++ b/benchmarks/bic_pid/config.yml
@@ -2,15 +2,13 @@ sim:bic_pid:
   extends: .det_benchmark
   stage: simulate
   image: $BENCHMARKS_REGISTRY/eic_ci$BENCHMARKS_SIGIL$BENCHMARKS_TAG
+  variables:
+    DETECTOR_CONFIG: epic_craterlake
   parallel:
     matrix:
       - PARTICLE: ["e-", "pi-"]
-        ANGLE: [
-          "45to135deg"
-        ]
-        ENERGY: [
-          "1GeV"
-        ]
+        ANGLE: ["45to135deg"]
+        ENERGY: ["1GeV"]
         INDEX_RANGE: [
           "0 9",
           "10 19",
@@ -24,7 +22,6 @@ sim:bic_pid:
           "90 99",
         ]
   script:
-    - export DETECTOR_CONFIG=epic_craterlake
     - |
       snakemake $SNAKEMAKE_FLAGS --cores 1 \
         $(seq --format="sim_output/bic_pid/${DETECTOR_CONFIG}/${PARTICLE}/${ENERGY}/${ANGLE}/${PARTICLE}_${ENERGY}_${ANGLE}.%04.f.eicrecon.edm4eic.root" ${INDEX_RANGE})
@@ -37,8 +34,17 @@ bench:bic_pid:
   image: $BENCHMARKS_REGISTRY/eic_tf$BENCHMARKS_SIGIL$BENCHMARKS_TAG
   variables:
     CUDA_VISIBLE_DEVICES: ""
+    DETECTOR_CONFIG: epic_craterlake
+  before_script:
+    - source .local/bin/env.sh
+    - ls -lrtha
+    - ln -s "${LOCAL_DATA_PATH}/sim_output" sim_output
+    - mkdir -p "${DETECTOR_CONFIG}"
+    - ln -s "${LOCAL_DATA_PATH}/sim_output" "${DETECTOR_CONFIG}/sim_output"
+    - ln -s "../results" "${DETECTOR_CONFIG}/results"
+    - mkdir -p "$SNAKEMAKE_OUTPUT_CACHE"
+    - ls -lrtha
   script:
-    - export DETECTOR_CONFIG=epic_craterlake
     - export PYTHONUSERBASE=$LOCAL_DATA_PATH/deps
     - export PATH=$PYTHONUSERBASE/bin:$PATH
     - python -m pip install --user snakemake
@@ -52,8 +58,9 @@ collect_results:bic_pid:
     - "bench:bic_pid"
   when: always
   image: $BENCHMARKS_REGISTRY/eic_ci$BENCHMARKS_SIGIL$BENCHMARKS_TAG
+  variables:
+    DETECTOR_CONFIG: epic_craterlake
   script:
-    - export DETECTOR_CONFIG=epic_craterlake
     - ls -lrht
     - mv results{,_save}/
     - snakemake $SNAKEMAKE_FLAGS --cores 1 --delete-all-output results/${DETECTOR_CONFIG}/bic_pid

From 6af00e145cbb91572f848d91c063b099891321b0 Mon Sep 17 00:00:00 2001
From: Tomas Sosa <tomassosa.23@gmail.com>
Date: Sun, 5 Apr 2026 23:48:46 -0500
Subject: [PATCH 11/58] Fix bench:bic_pid to solve the user authorization
 problme

---
 benchmarks/bic_pid/config.yml | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/benchmarks/bic_pid/config.yml b/benchmarks/bic_pid/config.yml
index cf9e746d..e53aff61 100644
--- a/benchmarks/bic_pid/config.yml
+++ b/benchmarks/bic_pid/config.yml
@@ -45,10 +45,8 @@ bench:bic_pid:
     - mkdir -p "$SNAKEMAKE_OUTPUT_CACHE"
     - ls -lrtha
   script:
-    - export PYTHONUSERBASE=$LOCAL_DATA_PATH/deps
-    - export PATH=$PYTHONUSERBASE/bin:$PATH
-    - python -m pip install --user snakemake
-    - python -m pip install --user -r benchmarks/bic_pid/requirements.txt
+    - python -m pip install snakemake
+    - python -m pip install -r benchmarks/bic_pid/requirements.txt
     - snakemake $SNAKEMAKE_FLAGS --cores 1 results/${DETECTOR_CONFIG}/bic_pid
 
 collect_results:bic_pid:

From ac283e7fa72f88bc682fc1113404bfd65bc5fe4b Mon Sep 17 00:00:00 2001
From: Tomas Sosa <tomassosa.23@gmail.com>
Date: Thu, 16 Apr 2026 12:27:20 -0500
Subject: [PATCH 12/58] warmup problem

---
 benchmarks/bic_pid/Snakefile  | 4 ++--
 benchmarks/bic_pid/config.yml | 1 +
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/benchmarks/bic_pid/Snakefile b/benchmarks/bic_pid/Snakefile
index 2afa3496..5338caa1 100644
--- a/benchmarks/bic_pid/Snakefile
+++ b/benchmarks/bic_pid/Snakefile
@@ -12,7 +12,7 @@ def theta_max_from_phase_space(s):
 
 rule bic_pid_sim:
     input:
-        warmup="warmup.edm4hep.root",
+        warmup=ancient("warmup.edm4hep.root"),
         geometry_lib=find_epic_libraries(),
     output:
         "sim_output/bic_pid/{DETECTOR_CONFIG}/{PARTICLE}/{ENERGY}/{PHASE_SPACE}/{PARTICLE}_{ENERGY}_{PHASE_SPACE}.{INDEX}.edm4hep.root",
@@ -58,7 +58,7 @@ npsim \
 rule bic_pid_recon:
     input:
         sim="sim_output/bic_pid/{DETECTOR_CONFIG}/{PARTICLE}/{ENERGY}/{PHASE_SPACE}/{PARTICLE}_{ENERGY}_{PHASE_SPACE}.{INDEX}.edm4hep.root",
-        warmup="warmup.edm4hep.root",
+        warmup=ancient("warmup.edm4hep.root"),
     output:
         "sim_output/bic_pid/{DETECTOR_CONFIG}/{PARTICLE}/{ENERGY}/{PHASE_SPACE}/{PARTICLE}_{ENERGY}_{PHASE_SPACE}.{INDEX}.eicrecon.edm4eic.root",
     log:
diff --git a/benchmarks/bic_pid/config.yml b/benchmarks/bic_pid/config.yml
index e53aff61..ef7616f0 100644
--- a/benchmarks/bic_pid/config.yml
+++ b/benchmarks/bic_pid/config.yml
@@ -43,6 +43,7 @@ bench:bic_pid:
     - ln -s "${LOCAL_DATA_PATH}/sim_output" "${DETECTOR_CONFIG}/sim_output"
     - ln -s "../results" "${DETECTOR_CONFIG}/results"
     - mkdir -p "$SNAKEMAKE_OUTPUT_CACHE"
+    - touch warmup.edm4hep.root
     - ls -lrtha
   script:
     - python -m pip install snakemake

From bdada3b9ed6548fc401043f87162c184ba45c9e0 Mon Sep 17 00:00:00 2001
From: Tomas Sosa <tomassosa.23@gmail.com>
Date: Thu, 16 Apr 2026 17:29:52 -0500
Subject: [PATCH 13/58] Go back to the changes

---
 benchmarks/bic_pid/Snakefile  | 4 ++--
 benchmarks/bic_pid/config.yml | 1 -
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/benchmarks/bic_pid/Snakefile b/benchmarks/bic_pid/Snakefile
index 5338caa1..2afa3496 100644
--- a/benchmarks/bic_pid/Snakefile
+++ b/benchmarks/bic_pid/Snakefile
@@ -12,7 +12,7 @@ def theta_max_from_phase_space(s):
 
 rule bic_pid_sim:
     input:
-        warmup=ancient("warmup.edm4hep.root"),
+        warmup="warmup.edm4hep.root",
         geometry_lib=find_epic_libraries(),
     output:
         "sim_output/bic_pid/{DETECTOR_CONFIG}/{PARTICLE}/{ENERGY}/{PHASE_SPACE}/{PARTICLE}_{ENERGY}_{PHASE_SPACE}.{INDEX}.edm4hep.root",
@@ -58,7 +58,7 @@ npsim \
 rule bic_pid_recon:
     input:
         sim="sim_output/bic_pid/{DETECTOR_CONFIG}/{PARTICLE}/{ENERGY}/{PHASE_SPACE}/{PARTICLE}_{ENERGY}_{PHASE_SPACE}.{INDEX}.edm4hep.root",
-        warmup=ancient("warmup.edm4hep.root"),
+        warmup="warmup.edm4hep.root",
     output:
         "sim_output/bic_pid/{DETECTOR_CONFIG}/{PARTICLE}/{ENERGY}/{PHASE_SPACE}/{PARTICLE}_{ENERGY}_{PHASE_SPACE}.{INDEX}.eicrecon.edm4eic.root",
     log:
diff --git a/benchmarks/bic_pid/config.yml b/benchmarks/bic_pid/config.yml
index ef7616f0..e53aff61 100644
--- a/benchmarks/bic_pid/config.yml
+++ b/benchmarks/bic_pid/config.yml
@@ -43,7 +43,6 @@ bench:bic_pid:
     - ln -s "${LOCAL_DATA_PATH}/sim_output" "${DETECTOR_CONFIG}/sim_output"
     - ln -s "../results" "${DETECTOR_CONFIG}/results"
     - mkdir -p "$SNAKEMAKE_OUTPUT_CACHE"
-    - touch warmup.edm4hep.root
     - ls -lrtha
   script:
     - python -m pip install snakemake

From c7d507ab8ff658c79188def1c3f075f44267b902 Mon Sep 17 00:00:00 2001
From: Tomas Sosa <tomassosa.23@gmail.com>
Date: Fri, 17 Apr 2026 00:36:47 -0500
Subject: [PATCH 14/58] Test warmup

---
 benchmarks/bic_pid/Snakefile  | 4 ++--
 benchmarks/bic_pid/config.yml | 1 +
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/benchmarks/bic_pid/Snakefile b/benchmarks/bic_pid/Snakefile
index 2afa3496..5338caa1 100644
--- a/benchmarks/bic_pid/Snakefile
+++ b/benchmarks/bic_pid/Snakefile
@@ -12,7 +12,7 @@ def theta_max_from_phase_space(s):
 
 rule bic_pid_sim:
     input:
-        warmup="warmup.edm4hep.root",
+        warmup=ancient("warmup.edm4hep.root"),
         geometry_lib=find_epic_libraries(),
     output:
         "sim_output/bic_pid/{DETECTOR_CONFIG}/{PARTICLE}/{ENERGY}/{PHASE_SPACE}/{PARTICLE}_{ENERGY}_{PHASE_SPACE}.{INDEX}.edm4hep.root",
@@ -58,7 +58,7 @@ npsim \
 rule bic_pid_recon:
     input:
         sim="sim_output/bic_pid/{DETECTOR_CONFIG}/{PARTICLE}/{ENERGY}/{PHASE_SPACE}/{PARTICLE}_{ENERGY}_{PHASE_SPACE}.{INDEX}.edm4hep.root",
-        warmup="warmup.edm4hep.root",
+        warmup=ancient("warmup.edm4hep.root"),
     output:
         "sim_output/bic_pid/{DETECTOR_CONFIG}/{PARTICLE}/{ENERGY}/{PHASE_SPACE}/{PARTICLE}_{ENERGY}_{PHASE_SPACE}.{INDEX}.eicrecon.edm4eic.root",
     log:
diff --git a/benchmarks/bic_pid/config.yml b/benchmarks/bic_pid/config.yml
index e53aff61..ef7616f0 100644
--- a/benchmarks/bic_pid/config.yml
+++ b/benchmarks/bic_pid/config.yml
@@ -43,6 +43,7 @@ bench:bic_pid:
     - ln -s "${LOCAL_DATA_PATH}/sim_output" "${DETECTOR_CONFIG}/sim_output"
     - ln -s "../results" "${DETECTOR_CONFIG}/results"
     - mkdir -p "$SNAKEMAKE_OUTPUT_CACHE"
+    - touch warmup.edm4hep.root
     - ls -lrtha
   script:
     - python -m pip install snakemake

From 9dad473938b2745afa97b8a9712478854eecf346 Mon Sep 17 00:00:00 2001
From: Tomas Sosa <tomassosa.23@gmail.com>
Date: Fri, 17 Apr 2026 08:45:26 -0500
Subject: [PATCH 15/58] Solve new problem in bench:bic_pid

---
 benchmarks/bic_pid/Snakefile | 29 +++++++++++++++++++++--------
 1 file changed, 21 insertions(+), 8 deletions(-)

diff --git a/benchmarks/bic_pid/Snakefile b/benchmarks/bic_pid/Snakefile
index 5338caa1..1b327344 100644
--- a/benchmarks/bic_pid/Snakefile
+++ b/benchmarks/bic_pid/Snakefile
@@ -1,3 +1,6 @@
+from glob import glob
+import os
+
 def format_energy_for_dd4hep(s):
     return s.rstrip("kMGeV") + "*" + s.lstrip("0123456789")
 
@@ -81,18 +84,28 @@ exec eicrecon {input.sim} -Ppodio:output_file={output} \
 
 
 rule bic_pid_input_list:
-    input:
-        files=expand(
-            "sim_output/bic_pid/{{DETECTOR_CONFIG}}/{{PARTICLE}}/{ENERGY}/{PHASE_SPACE}/{{PARTICLE}}_{ENERGY}_{PHASE_SPACE}.{INDEX:04d}.eicrecon.edm4eic.root",
-            ENERGY=["1GeV"],
-            PHASE_SPACE=["45to135deg"],
-            INDEX=range(100),
-        ),
     output:
         "listing/bic_pid/{DETECTOR_CONFIG}/{PARTICLE}.lst",
+    params:
+        energy="1GeV",
+        phase_space="45to135deg",
     run:
+        pattern = (
+            f"sim_output/bic_pid/{wildcards.DETECTOR_CONFIG}/{wildcards.PARTICLE}/"
+            f"{params.energy}/{params.phase_space}/"
+            f"{wildcards.PARTICLE}_{params.energy}_{params.phase_space}.*.eicrecon.edm4eic.root"
+        )
+        files = sorted(glob(pattern))
+
+        if len(files) != 100:
+            raise ValueError(
+                f"Expected 100 files for {wildcards.PARTICLE}, found {len(files)}.\n"
+                f"Pattern used: {pattern}"
+            )
+
+        os.makedirs(os.path.dirname(output[0]), exist_ok=True)
         with open(output[0], "wt") as fp:
-            fp.write("\n".join(input.files))
+            fp.write("\n".join(files))
 
 
 rule bic_pid:

From 632077b7ce24adcb6f634569bca9bbdfedf2dbde Mon Sep 17 00:00:00 2001
From: Tomas Sosa <tomassosa.23@gmail.com>
Date: Fri, 17 Apr 2026 09:38:14 -0500
Subject: [PATCH 16/58] changes in sim and bench

---
 benchmarks/bic_pid/config.yml | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/benchmarks/bic_pid/config.yml b/benchmarks/bic_pid/config.yml
index ef7616f0..356cd12e 100644
--- a/benchmarks/bic_pid/config.yml
+++ b/benchmarks/bic_pid/config.yml
@@ -25,6 +25,14 @@ sim:bic_pid:
     - |
       snakemake $SNAKEMAKE_FLAGS --cores 1 \
         $(seq --format="sim_output/bic_pid/${DETECTOR_CONFIG}/${PARTICLE}/${ENERGY}/${ANGLE}/${PARTICLE}_${ENERGY}_${ANGLE}.%04.f.eicrecon.edm4eic.root" ${INDEX_RANGE})
+    - mkdir -p transferred_sim_output
+    - cp -aL sim_output/bic_pid transferred_sim_output/
+  artifacts:
+    when: always
+    paths:
+      - transferred_sim_output/
+      - .snakemake/log/
+
 
 bench:bic_pid:
   extends: .det_benchmark
@@ -38,17 +46,19 @@ bench:bic_pid:
   before_script:
     - source .local/bin/env.sh
     - ls -lrtha
-    - ln -s "${LOCAL_DATA_PATH}/sim_output" sim_output
+    - mkdir -p sim_output
+    - cp -a transferred_sim_output/bic_pid sim_output/
     - mkdir -p "${DETECTOR_CONFIG}"
-    - ln -s "${LOCAL_DATA_PATH}/sim_output" "${DETECTOR_CONFIG}/sim_output"
-    - ln -s "../results" "${DETECTOR_CONFIG}/results"
+    - ln -s ../results "${DETECTOR_CONFIG}/results"
     - mkdir -p "$SNAKEMAKE_OUTPUT_CACHE"
     - touch warmup.edm4hep.root
+    - find sim_output/bic_pid | head -50
     - ls -lrtha
   script:
     - python -m pip install snakemake
     - python -m pip install -r benchmarks/bic_pid/requirements.txt
     - snakemake $SNAKEMAKE_FLAGS --cores 1 results/${DETECTOR_CONFIG}/bic_pid
+    
 
 collect_results:bic_pid:
   extends: .det_benchmark

From 0c70b7ed2fed5f69ab72b244c006709c89a2594c Mon Sep 17 00:00:00 2001
From: Tomas Sosa <tomassosa.23@gmail.com>
Date: Fri, 17 Apr 2026 11:43:50 -0500
Subject: [PATCH 17/58] Remove warmup and transfered things

---
 benchmarks/bic_pid/Snakefile  |  4 ++--
 benchmarks/bic_pid/config.yml | 16 ++++------------
 2 files changed, 6 insertions(+), 14 deletions(-)

diff --git a/benchmarks/bic_pid/Snakefile b/benchmarks/bic_pid/Snakefile
index 1b327344..379667a2 100644
--- a/benchmarks/bic_pid/Snakefile
+++ b/benchmarks/bic_pid/Snakefile
@@ -15,7 +15,7 @@ def theta_max_from_phase_space(s):
 
 rule bic_pid_sim:
     input:
-        warmup=ancient("warmup.edm4hep.root"),
+        warmup="warmup.edm4hep.root",
         geometry_lib=find_epic_libraries(),
     output:
         "sim_output/bic_pid/{DETECTOR_CONFIG}/{PARTICLE}/{ENERGY}/{PHASE_SPACE}/{PARTICLE}_{ENERGY}_{PHASE_SPACE}.{INDEX}.edm4hep.root",
@@ -61,7 +61,7 @@ npsim \
 rule bic_pid_recon:
     input:
         sim="sim_output/bic_pid/{DETECTOR_CONFIG}/{PARTICLE}/{ENERGY}/{PHASE_SPACE}/{PARTICLE}_{ENERGY}_{PHASE_SPACE}.{INDEX}.edm4hep.root",
-        warmup=ancient("warmup.edm4hep.root"),
+        warmup="warmup.edm4hep.root",
     output:
         "sim_output/bic_pid/{DETECTOR_CONFIG}/{PARTICLE}/{ENERGY}/{PHASE_SPACE}/{PARTICLE}_{ENERGY}_{PHASE_SPACE}.{INDEX}.eicrecon.edm4eic.root",
     log:
diff --git a/benchmarks/bic_pid/config.yml b/benchmarks/bic_pid/config.yml
index 356cd12e..e834e2bf 100644
--- a/benchmarks/bic_pid/config.yml
+++ b/benchmarks/bic_pid/config.yml
@@ -25,13 +25,6 @@ sim:bic_pid:
     - |
       snakemake $SNAKEMAKE_FLAGS --cores 1 \
         $(seq --format="sim_output/bic_pid/${DETECTOR_CONFIG}/${PARTICLE}/${ENERGY}/${ANGLE}/${PARTICLE}_${ENERGY}_${ANGLE}.%04.f.eicrecon.edm4eic.root" ${INDEX_RANGE})
-    - mkdir -p transferred_sim_output
-    - cp -aL sim_output/bic_pid transferred_sim_output/
-  artifacts:
-    when: always
-    paths:
-      - transferred_sim_output/
-      - .snakemake/log/
 
 
 bench:bic_pid:
@@ -46,13 +39,12 @@ bench:bic_pid:
   before_script:
     - source .local/bin/env.sh
     - ls -lrtha
-    - mkdir -p sim_output
-    - cp -a transferred_sim_output/bic_pid sim_output/
+    - ln -s "${LOCAL_DATA_PATH}/sim_output" sim_output
     - mkdir -p "${DETECTOR_CONFIG}"
-    - ln -s ../results "${DETECTOR_CONFIG}/results"
+    - ln -s "${LOCAL_DATA_PATH}/sim_output" "${DETECTOR_CONFIG}/sim_output"
+    - ln -s "../results" "${DETECTOR_CONFIG}/results"
     - mkdir -p "$SNAKEMAKE_OUTPUT_CACHE"
-    - touch warmup.edm4hep.root
-    - find sim_output/bic_pid | head -50
+    - find sim_output/bic_pid/${DETECTOR_CONFIG} | head -50 || true
     - ls -lrtha
   script:
     - python -m pip install snakemake

From 7cd8385daf2b43505ed0faeb10c1884fd4675cde Mon Sep 17 00:00:00 2001
From: Tomas Sosa <tomassosa.23@gmail.com>
Date: Sun, 19 Apr 2026 16:45:26 -0500
Subject: [PATCH 18/58] Check again the timeout

---
 benchmarks/bic_pid/config.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/benchmarks/bic_pid/config.yml b/benchmarks/bic_pid/config.yml
index e834e2bf..88767036 100644
--- a/benchmarks/bic_pid/config.yml
+++ b/benchmarks/bic_pid/config.yml
@@ -50,7 +50,7 @@ bench:bic_pid:
     - python -m pip install snakemake
     - python -m pip install -r benchmarks/bic_pid/requirements.txt
     - snakemake $SNAKEMAKE_FLAGS --cores 1 results/${DETECTOR_CONFIG}/bic_pid
-    
+
 
 collect_results:bic_pid:
   extends: .det_benchmark

From 1eef1f957e1284d58662715b9698c87f607b8aeb Mon Sep 17 00:00:00 2001
From: Tomas Sosa <tomassosa.23@gmail.com>
Date: Sun, 19 Apr 2026 18:30:15 -0500
Subject: [PATCH 19/58] Solve situation with files

---
 benchmarks/bic_pid/config.yml | 18 ++++++++++++++----
 1 file changed, 14 insertions(+), 4 deletions(-)

diff --git a/benchmarks/bic_pid/config.yml b/benchmarks/bic_pid/config.yml
index 88767036..267f1eb2 100644
--- a/benchmarks/bic_pid/config.yml
+++ b/benchmarks/bic_pid/config.yml
@@ -25,13 +25,21 @@ sim:bic_pid:
     - |
       snakemake $SNAKEMAKE_FLAGS --cores 1 \
         $(seq --format="sim_output/bic_pid/${DETECTOR_CONFIG}/${PARTICLE}/${ENERGY}/${ANGLE}/${PARTICLE}_${ENERGY}_${ANGLE}.%04.f.eicrecon.edm4eic.root" ${INDEX_RANGE})
+    - mkdir -p transferred_sim_output
+    - cp -aL sim_output/bic_pid transferred_sim_output/
+  artifacts:
+    when: always
+    paths:
+      - transferred_sim_output/
+      - .snakemake/log/
 
 
 bench:bic_pid:
   extends: .det_benchmark
   stage: benchmarks
   needs:
-    - ["sim:bic_pid"]
+    - "common:setup"
+    - "sim:bic_pid"
   image: $BENCHMARKS_REGISTRY/eic_tf$BENCHMARKS_SIGIL$BENCHMARKS_TAG
   variables:
     CUDA_VISIBLE_DEVICES: ""
@@ -39,10 +47,11 @@ bench:bic_pid:
   before_script:
     - source .local/bin/env.sh
     - ls -lrtha
-    - ln -s "${LOCAL_DATA_PATH}/sim_output" sim_output
+    - mkdir -p sim_output
+    - cp -a transferred_sim_output/bic_pid sim_output/
     - mkdir -p "${DETECTOR_CONFIG}"
-    - ln -s "${LOCAL_DATA_PATH}/sim_output" "${DETECTOR_CONFIG}/sim_output"
-    - ln -s "../results" "${DETECTOR_CONFIG}/results"
+    - ln -s ../sim_output "${DETECTOR_CONFIG}/sim_output"
+    - ln -s ../results "${DETECTOR_CONFIG}/results"
     - mkdir -p "$SNAKEMAKE_OUTPUT_CACHE"
     - find sim_output/bic_pid/${DETECTOR_CONFIG} | head -50 || true
     - ls -lrtha
@@ -56,6 +65,7 @@ collect_results:bic_pid:
   extends: .det_benchmark
   stage: collect
   needs:
+    - "common:setup"
     - "bench:bic_pid"
   when: always
   image: $BENCHMARKS_REGISTRY/eic_ci$BENCHMARKS_SIGIL$BENCHMARKS_TAG

From 79c70de21e2b48be2432e9b4114dcb58f52bdf22 Mon Sep 17 00:00:00 2001
From: Tomas Sosa <tomassosa.23@gmail.com>
Date: Sun, 19 Apr 2026 20:01:13 -0500
Subject: [PATCH 20/58] Solve new error

---
 benchmarks/bic_pid/config.yml | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/benchmarks/bic_pid/config.yml b/benchmarks/bic_pid/config.yml
index 267f1eb2..a0e44016 100644
--- a/benchmarks/bic_pid/config.yml
+++ b/benchmarks/bic_pid/config.yml
@@ -22,6 +22,7 @@ sim:bic_pid:
           "90 99",
         ]
   script:
+    - export DETECTOR_CONFIG=epic_craterlake
     - |
       snakemake $SNAKEMAKE_FLAGS --cores 1 \
         $(seq --format="sim_output/bic_pid/${DETECTOR_CONFIG}/${PARTICLE}/${ENERGY}/${ANGLE}/${PARTICLE}_${ENERGY}_${ANGLE}.%04.f.eicrecon.edm4eic.root" ${INDEX_RANGE})
@@ -47,13 +48,14 @@ bench:bic_pid:
   before_script:
     - source .local/bin/env.sh
     - ls -lrtha
+    - find transferred_sim_output/bic_pid | head -50 || true
     - mkdir -p sim_output
     - cp -a transferred_sim_output/bic_pid sim_output/
     - mkdir -p "${DETECTOR_CONFIG}"
     - ln -s ../sim_output "${DETECTOR_CONFIG}/sim_output"
     - ln -s ../results "${DETECTOR_CONFIG}/results"
     - mkdir -p "$SNAKEMAKE_OUTPUT_CACHE"
-    - find sim_output/bic_pid/${DETECTOR_CONFIG} | head -50 || true
+    - find sim_output/bic_pid | head -50 || true
     - ls -lrtha
   script:
     - python -m pip install snakemake
@@ -72,6 +74,7 @@ collect_results:bic_pid:
   variables:
     DETECTOR_CONFIG: epic_craterlake
   script:
+    - export DETECTOR_CONFIG=epic_craterlake
     - ls -lrht
     - mv results{,_save}/
     - snakemake $SNAKEMAKE_FLAGS --cores 1 --delete-all-output results/${DETECTOR_CONFIG}/bic_pid

From d8fb95e565e3955e8bcfe9a6542db0306f60fe4b Mon Sep 17 00:00:00 2001
From: Tomas Sosa <tomassosa.23@gmail.com>
Date: Sun, 19 Apr 2026 22:38:09 -0500
Subject: [PATCH 21/58] Change errors in bic_pid.org file

---
 benchmarks/bic_pid/bic_pid.org | 942 ++++++++++++++++++---------------
 1 file changed, 519 insertions(+), 423 deletions(-)

diff --git a/benchmarks/bic_pid/bic_pid.org b/benchmarks/bic_pid/bic_pid.org
index dd975c49..35412310 100644
--- a/benchmarks/bic_pid/bic_pid.org
+++ b/benchmarks/bic_pid/bic_pid.org
@@ -1,490 +1,586 @@
-#+begin_src jupyter-python
-  import os
-  import math
-  from math import floor
-
-  import pandas as pd
-  import numpy as np
-
-  ## dangerous: silence annoying TF warnings , remove when running on new systems or debugging
-  os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"  # this MUST come before any tf call.
-  import tensorflow as tf
-  from tensorflow import keras
-  from tensorflow.keras import layers
-
-  import matplotlib.pyplot as plt
-  from collections import OrderedDict
-  import json
-  import re
+#+PROPERTY: header-args:jupyter-python :session /jpy:localhost#8888:benchmark :async yes :results drawer :exports both
+
+#+TITLE: ePIC BIC e/\pi separation benchmark
+#+AUTHOR: Tomas Sosa
+#+OPTIONS: d:t
+
+#+LATEX_CLASS_OPTIONS: [9pt,letter]
+#+BIND: org-latex-image-default-width ""
+#+BIND: org-latex-image-default-option "scale=0.3"
+#+BIND: org-latex-images-centered nil
+#+BIND: org-latex-minted-options (("breaklines") ("bgcolor" "black!5") ("frame" "single"))
+#+LATEX_HEADER: \usepackage[margin=1in]{geometry}
+#+LATEX_HEADER: \setlength{\parindent}{0pt}
+#+LATEX: \sloppy
+
+#+begin_src jupyter-python :results silent
+import os
+import math
+from math import floor
+from pathlib import Path
+from collections import OrderedDict
+import json
+import re
+
+import pandas as pd
+import numpy as np
+
+# Must be set before importing TensorFlow
+os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"
+
+import tensorflow as tf
+from tensorflow import keras
+from tensorflow.keras import layers
+
+import matplotlib.pyplot as plt
 #+end_src
 
-#+begin_src jupyter-python
-  print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))
+* Parameters
+
+#+begin_src jupyter-python :results silent
+DETECTOR_CONFIG = os.environ.get("DETECTOR_CONFIG")
+PLOT_TITLE = os.environ.get("PLOT_TITLE", DETECTOR_CONFIG or "bic_pid")
+INPUT_ELECTRONS = os.environ.get("INPUT_ELECTRONS")
+INPUT_PIONS = os.environ.get("INPUT_PIONS")
+OUTPUT_DIR = os.environ.get("OUTPUT_DIR", "./")
+
+ANGLE = os.environ.get("ANGLE", "45to135deg")
+ENERGY = os.environ.get("ENERGY", "1GeV")
+EPOCHS = int(os.environ.get("EPOCHS", "30"))
+TARGET_IMBALANCE = float(os.environ.get("TARGET_IMBALANCE", "1.0"))
+MODEL_NAME = os.environ.get("MODEL", "vgg-v2")
+TRAIN_SAMPLE_CAP = int(os.environ.get("CAP_TRAIN_SAMPLE", "0"))
+
+output_root = Path(OUTPUT_DIR)
+output_root.mkdir(parents=True, exist_ok=True)
 #+end_src
 
 #+begin_src jupyter-python
-  # Simulate argparse in Kaggle
-  class Args:
-      def __init__(self):
-          self.angle = ['45to135deg']  # ✅ choose your angles
-          self.energy = '1GeV'
-          self.cap_train_sample = 0
-          self.epochs = 30
-          self.target_imbalance = 1.0
-          self.model = 'vgg-v2'
-          self.workdir = '/kaggle/working/output'  # 🔧 all outputs go here
-
-  args = Args()
+print("Num GPUs Available:", len(tf.config.list_physical_devices("GPU")))
+print("DETECTOR_CONFIG =", DETECTOR_CONFIG)
+print("PLOT_TITLE      =", PLOT_TITLE)
+print("INPUT_ELECTRONS =", INPUT_ELECTRONS)
+print("INPUT_PIONS     =", INPUT_PIONS)
+print("OUTPUT_DIR      =", OUTPUT_DIR)
+print("ANGLE           =", ANGLE)
+print("ENERGY          =", ENERGY)
+print("EPOCHS          =", EPOCHS)
+print("TARGET_IMBALANCE=", TARGET_IMBALANCE)
+print("MODEL_NAME      =", MODEL_NAME)
+print("TRAIN_SAMPLE_CAP=", TRAIN_SAMPLE_CAP)
 #+end_src
 
-#+begin_src jupyter-python
-  ## Global efficiencies we want to optimize for (electron efficiencies)
-  kTargetEfficiency = .95
-  ## other efficiency scenarios to cut the ML on
-  kAlternativeEfficiencies = np.arange(.5, 1., .05)
-
-  ## setting
-  #angle_settings=['eta0.0', 'eta0.5n', 'eta0.5p', 'eta1.0n', 'eta1.0p']
-  #angle_settings=['eta0.0', 'eta1.0p']
-  #angle_settings=['eta0.0']
-  angle_settings=args.angle
-  energy_setting= args.energy
-  ## MeV or GeV
-  energy_GeV = float(energy_setting[:-3]) * (1 if energy_setting[-3:] == 'GeV' else 1/1000.)
-  def eta_from_angle(angle_label):
-      match = re.match(r"(\d+)to(\d+)deg", angle_label)
-      if match:
-          theta1 = float(match.group(1))
-          theta2 = float(match.group(2))
-          mean_theta_deg = (theta1 + theta2) / 2.0
-          mean_theta_rad = np.deg2rad(mean_theta_deg)
-          eta = -np.log(np.tan(mean_theta_rad / 2))
-          return eta
-      else:
-          raise ValueError(f"Cannot parse eta from angle label: {angle_label}")
-
-  etas = {}
-  for setting in angle_settings:
-      if setting.startswith("eta"):
-          val = float(setting[3:-1])
-          sign = -1. if setting[-1] == 'n' else 1.
-          etas[setting] = val * sign
-      elif "deg" in setting:
-          etas[setting] = eta_from_angle(setting)
-      else:
-          etas[setting] = 0.0
-
-  print(f'E/p scan for {energy_setting}')
-  print(f'   - detected energy: {energy_GeV} GeV')
-  print(f'   - eta ranges: {angle_settings}')
+* Plotting setup
+
+#+begin_src jupyter-python :results silent
+import matplotlib as mpl
+
+def setup_presentation_style():
+    mpl.rcParams.update(mpl.rcParamsDefault)
+    plt.style.use("ggplot")
+    plt.rcParams.update({
+        "axes.labelsize": 12,
+        "axes.titlesize": 13,
+        "figure.titlesize": 13,
+        "figure.figsize": (8, 6),
+        "legend.fontsize": 11,
+        "xtick.labelsize": 11,
+        "ytick.labelsize": 11,
+        "pgf.rcfonts": False,
+    })
+
+setup_presentation_style()
 #+end_src
 
-#+begin_src jupyter-python
-  ## set ML configuration
-  kTrainSampleCap = args.cap_train_sample
-  kEpochs = args.epochs
-  kTestSize = .2
-  kValidateSize = .1
-  kTargetImbalance = args.target_imbalance
-  kPionWeightCap = 1.00
-  kElectronLabel = 1
-  kPionLabel = 0
-  kModel = args.model
-
-  print('ML configuration:')
-  print(f'   - Number of epochs: {kEpochs}')
-  if kTrainSampleCap > 0:
-      print(f'   - Training sample cap: {kTrainSampleCap}')
-  print(f'   - Validation fraction: {kValidateSize}')
-  print(f'   - Test fraction: {kTestSize}')
-  print(f'   - Target pi:E imbalance: {kTargetImbalance}')
-  print(f'   - Upper cap on pion weights: {kPionWeightCap}')
-  print(f'   - Model: {kModel}')
+* Analysis setup
+
+#+begin_src jupyter-python :results silent
+kTargetEfficiency = 0.95
+kAlternativeEfficiencies = np.arange(0.5, 1.0, 0.05)
+
+angle_settings = [ANGLE]
+energy_setting = ENERGY
+energy_GeV = float(energy_setting[:-3]) * (1.0 if energy_setting.endswith("GeV") else 1.0 / 1000.0)
+
+def eta_from_angle(angle_label):
+    match = re.match(r"(\d+)to(\d+)deg", angle_label)
+    if match:
+        theta1 = float(match.group(1))
+        theta2 = float(match.group(2))
+        mean_theta_deg = (theta1 + theta2) / 2.0
+        mean_theta_rad = np.deg2rad(mean_theta_deg)
+        eta = -np.log(np.tan(mean_theta_rad / 2.0))
+        return eta
+    raise ValueError(f"Cannot parse eta from angle label: {angle_label}")
+
+etas = {}
+for setting in angle_settings:
+    if setting.startswith("eta"):
+        val = float(setting[3:-1])
+        sign = -1.0 if setting[-1] == "n" else 1.0
+        etas[setting] = val * sign
+    elif "deg" in setting:
+        etas[setting] = eta_from_angle(setting)
+    else:
+        etas[setting] = 0.0
 #+end_src
 
 #+begin_src jupyter-python
-  def get_dimensions(df):
-      max_idx = df.index.max()
-      min_idx = df.index.min()
-      max_idx = np.array([v if type(v) != str else 0 for v in max_idx])
-      min_idx = np.array([v if type(v) != str else 0 for v in min_idx])
-      return {k: v for (k, v) in zip(('event', '_', 'layer', 'hit'), (max_idx - min_idx + 1))}
-
-  ## boiler-plate for in-memory datasets
-  def make_dataset(fields):
-      dataset = tf.data.Dataset.from_tensor_slices(fields)
-      ## do magic to avoid shard warnings of operating on DATA instead of FILE
-      options = tf.data.Options()
-      options.experimental_distribute.auto_shard_policy = tf.data.experimental.AutoShardPolicy.DATA
-      return dataset.with_options(options)
+print(f"E/p scan for {energy_setting}")
+print(f"   - detected energy: {energy_GeV} GeV")
+print(f"   - eta ranges: {angle_settings}")
 #+end_src
 
-#+begin_src jupyter-python
-  ## Chaos CNN model
-  def build_old(input_shape, n_labels=2):
-      my_model = keras.Sequential([
-          keras.layers.Conv2D(64, (3, 3), padding='same', activation='relu', input_shape=input_shape),
-          keras.layers.MaxPooling2D((2, 2), strides=2),
-          keras.layers.Dropout(0.25),
-          keras.layers.Conv2D(128, (2, 2), padding='same', activation='relu'),
-          keras.layers.MaxPooling2D((2, 2), strides=2),
-          keras.layers.Conv2D(64, (2, 2), padding='same', activation='relu'),
-          keras.layers.MaxPooling2D((2, 2), strides=2),
-          keras.layers.Dropout(0.25),
-
-          keras.layers.Flatten(),
-          keras.layers.Dense(128, activation='relu'),
-          #keras.layers.Dropout(0.25),
-          keras.layers.Dense(32, activation='relu'),
-          keras.layers.Dense(n_labels, activation='softmax')
-          ])
-      return my_model
-   
-  ## Slightly beefier VGG-style CNN
-  def build_vgg_v1(input_shape, n_labels=2):
-      my_model = keras.Sequential([
-          keras.layers.Conv2D(64, kernel_size=(3, 3), activation='relu',padding='same',input_shape=input_shape),
-          keras.layers.Conv2D(64, kernel_size=(3, 3), activation='relu',padding='same'),
-          keras.layers.MaxPooling2D(pool_size=(2, 2),strides=2),
-          keras.layers.Conv2D(128, kernel_size=(3, 3), activation='relu'),
-          keras.layers.Conv2D(128, kernel_size=(3, 3), activation='relu'),
-          keras.layers.Conv2D(128, kernel_size=(3, 3), activation='relu'),
-          keras.layers.MaxPooling2D(pool_size=(2, 2),strides=2),
-          keras.layers.Flatten(),
-          keras.layers.Dense(1024, activation='relu'),
-          keras.layers.Dense(512, activation='relu'),
-          keras.layers.Dense(n_labels, activation='softmax')
-          ])
-
-      return my_model
-
-  def build_vgg_v2(input_shape, n_labels=2):
-      my_model = keras.Sequential([
-          keras.layers.Conv2D(64, kernel_size=(3, 3), activation='relu',padding='same',input_shape=input_shape),
-          keras.layers.Conv2D(64, kernel_size=(3, 3), activation='relu',padding='same'),
-          keras.layers.MaxPooling2D(pool_size=(2, 2),strides=2),
-          keras.layers.Conv2D(128, kernel_size=(3, 3), activation='relu'),
-          keras.layers.Conv2D(128, kernel_size=(3, 3), activation='relu'),
-          keras.layers.Conv2D(128, kernel_size=(3, 3), activation='relu'),
-          keras.layers.MaxPooling2D(pool_size=(2, 2),strides=2),
-          keras.layers.Flatten(),
-          keras.layers.Dense(1024, activation='relu'),
-          keras.layers.Dense(1024, activation='relu'),
-          keras.layers.Dense(n_labels, activation='softmax')
-          ])
-
-      return my_model
+#+begin_src jupyter-python :results silent
+kTrainSampleCap = TRAIN_SAMPLE_CAP
+kEpochs = EPOCHS
+kTestSize = 0.2
+kValidateSize = 0.1
+kTargetImbalance = TARGET_IMBALANCE
+kPionWeightCap = 1.0
+kElectronLabel = 1
+kPionLabel = 0
+kModel = MODEL_NAME
 #+end_src
 
 #+begin_src jupyter-python
-  def build_model(input_shape, n_labels=2):
-      if kModel == 'old':
-          print(f'Building old')
-          return build_old(input_shape, n_labels)
-      elif kModel == 'vgg-v1':
-          print(f'Building vgg-v1')
-          return build_vgg_v1(input_shape, n_labels)
-      elif kModel == 'vgg-v2':
-          print(f'Building vgg-v2')
-          return build_vgg_v2(input_shape, n_labels)
-      print('Building default')
-      return build_vgg_v2(input_shape, n_labels)
+print("ML configuration:")
+print(f"   - Number of epochs: {kEpochs}")
+if kTrainSampleCap > 0:
+    print(f"   - Training sample cap: {kTrainSampleCap}")
+print(f"   - Validation fraction: {kValidateSize}")
+print(f"   - Test fraction: {kTestSize}")
+print(f"   - Target pi:E imbalance: {kTargetImbalance}")
+print(f"   - Upper cap on pion weights: {kPionWeightCap:.2f}")
+print(f"   - Model: {kModel}")
 #+end_src
 
-#+begin_src jupyter-python
-  angle_label=angle_settings[0]
-  print(angle_label)
+* Helper functions
+
+#+begin_src jupyter-python :results silent
+def get_dimensions(df):
+    max_idx = df.index.max()
+    min_idx = df.index.min()
+    max_idx = np.array([v if type(v) != str else 0 for v in max_idx])
+    min_idx = np.array([v if type(v) != str else 0 for v in min_idx])
+    return {k: v for (k, v) in zip(("event", "_", "layer", "hit"), (max_idx - min_idx + 1))}
+
+def make_dataset(fields):
+    dataset = tf.data.Dataset.from_tensor_slices(fields)
+    options = tf.data.Options()
+    options.experimental_distribute.auto_shard_policy = tf.data.experimental.AutoShardPolicy.DATA
+    return dataset.with_options(options)
 #+end_src
 
-#+begin_src jupyter-python
-  #datadir = f'/kaggle/input/results-45to135deg-1gev-data'
-  #plotdir = f'/kaggle/working/plots/{angle_label}'
-  #output_directory = f'/kaggle/working/output/{angle_label}/{energy_setting}'
+* Models
+
+#+begin_src jupyter-python :results silent
+def build_old(input_shape, n_labels=2):
+    my_model = keras.Sequential([
+        keras.layers.Conv2D(64, (3, 3), padding="same", activation="relu", input_shape=input_shape),
+        keras.layers.MaxPooling2D((2, 2), strides=2),
+        keras.layers.Dropout(0.25),
+        keras.layers.Conv2D(128, (2, 2), padding="same", activation="relu"),
+        keras.layers.MaxPooling2D((2, 2), strides=2),
+        keras.layers.Conv2D(64, (2, 2), padding="same", activation="relu"),
+        keras.layers.MaxPooling2D((2, 2), strides=2),
+        keras.layers.Dropout(0.25),
+        keras.layers.Flatten(),
+        keras.layers.Dense(128, activation="relu"),
+        keras.layers.Dense(32, activation="relu"),
+        keras.layers.Dense(n_labels, activation="softmax"),
+    ])
+    return my_model
+
+def build_vgg_v1(input_shape, n_labels=2):
+    my_model = keras.Sequential([
+        keras.layers.Conv2D(64, kernel_size=(3, 3), activation="relu", padding="same", input_shape=input_shape),
+        keras.layers.Conv2D(64, kernel_size=(3, 3), activation="relu", padding="same"),
+        keras.layers.MaxPooling2D(pool_size=(2, 2), strides=2),
+        keras.layers.Conv2D(128, kernel_size=(3, 3), activation="relu"),
+        keras.layers.Conv2D(128, kernel_size=(3, 3), activation="relu"),
+        keras.layers.Conv2D(128, kernel_size=(3, 3), activation="relu"),
+        keras.layers.MaxPooling2D(pool_size=(2, 2), strides=2),
+        keras.layers.Flatten(),
+        keras.layers.Dense(1024, activation="relu"),
+        keras.layers.Dense(512, activation="relu"),
+        keras.layers.Dense(n_labels, activation="softmax"),
+    ])
+    return my_model
+
+def build_vgg_v2(input_shape, n_labels=2):
+    my_model = keras.Sequential([
+        keras.layers.Conv2D(64, kernel_size=(3, 3), activation="relu", padding="same", input_shape=input_shape),
+        keras.layers.Conv2D(64, kernel_size=(3, 3), activation="relu", padding="same"),
+        keras.layers.MaxPooling2D(pool_size=(2, 2), strides=2),
+        keras.layers.Conv2D(128, kernel_size=(3, 3), activation="relu"),
+        keras.layers.Conv2D(128, kernel_size=(3, 3), activation="relu"),
+        keras.layers.Conv2D(128, kernel_size=(3, 3), activation="relu"),
+        keras.layers.MaxPooling2D(pool_size=(2, 2), strides=2),
+        keras.layers.Flatten(),
+        keras.layers.Dense(1024, activation="relu"),
+        keras.layers.Dense(1024, activation="relu"),
+        keras.layers.Dense(n_labels, activation="softmax"),
+    ])
+    return my_model
+
+def build_model(input_shape, n_labels=2):
+    if kModel == "old":
+        print("Building old")
+        return build_old(input_shape, n_labels)
+    elif kModel == "vgg-v1":
+        print("Building vgg-v1")
+        return build_vgg_v1(input_shape, n_labels)
+    elif kModel == "vgg-v2":
+        print("Building vgg-v2")
+        return build_vgg_v2(input_shape, n_labels)
+    print("Building default")
+    return build_vgg_v2(input_shape, n_labels)
 #+end_src
 
-#+begin_src jupyter-python
-  print('\nprocessing angle setting:', angle_label)
-  print(f'  - eta: {etas[angle_label]}')
-
-  ## output directories
-  output_directory = f'{args.workdir}/{angle_label}/{energy_setting}'
-  plotdir = f'{output_directory}/plots'
-  datadir = f'{output_directory}/data'
-  os.makedirs(plotdir, exist_ok=True)
-  os.makedirs(datadir, exist_ok=True)
-  print(f'   - output data directory: {datadir}')
-  print(f'   - output plot directory: {plotdir}')
+* Output layout
+
+#+begin_src jupyter-python :results silent
+angle_label = angle_settings[0]
+
+output_directory = f"{OUTPUT_DIR}/{angle_label}/{energy_setting}"
+plotdir = f"{output_directory}/plots"
+datadir = f"{output_directory}/data"
+
+os.makedirs(plotdir, exist_ok=True)
+os.makedirs(datadir, exist_ok=True)
 #+end_src
 
 #+begin_src jupyter-python
-  print('Loading datasets: ')
-  print(f'   - Loading {datadir}/hits.snappy.parquet')
-  df_data = pd.read_parquet(f'{datadir}/hits.snappy.parquet')
-  print(f'   - Loading {datadir}/labels.snappy.parquet')
-  df_mc = pd.read_parquet(f'{datadir}/labels.snappy.parquet')
+print("\nprocessing angle setting:", angle_label)
+print(f"  - eta: {etas[angle_label]}")
+print(f"  - output data directory: {datadir}")
+print(f"  - output plot directory: {plotdir}")
 #+end_src
 
+* Load datasets
+
 #+begin_src jupyter-python
-  ## calculate weight to achieve target imbalance
-  n_electrons = np.sum(df_mc['PDG'] == 11)
-  n_pions = np.sum(df_mc['PDG'] == -211)
-  imbalance = n_pions/n_electrons
-  kSuggestedWeight = min(n_electrons/n_pions*kTargetImbalance, kPionWeightCap)
-  print(f'Data set has relative class imbalance of {n_electrons} : {n_pions} = {imbalance}')
-  print(f'  - target imbalance: {kTargetImbalance}')
-  print(f'  - pion weight upper limit: {kPionWeightCap:.2f}')
-  print(f'  - suggested pion weight {kSuggestedWeight:.2f}')
+print("Loading datasets:")
+print(f"   - Loading {datadir}/hits.snappy.parquet")
+df_data = pd.read_parquet(f"{datadir}/hits.snappy.parquet")
+
+print(f"   - Loading {datadir}/labels.snappy.parquet")
+df_mc = pd.read_parquet(f"{datadir}/labels.snappy.parquet")
 #+end_src
 
 #+begin_src jupyter-python
-  ## Load E/P data again for aggregate statistics, and to calculate the target efficiency
-  print(f'Loading E/P data from {datadir}/EoverP_results.csv')
-  cutdf = pd.read_csv(f'{datadir}/EoverP_results.csv').sort_values('rejection', ascending=False)
-  results_EoverP = {key: cutdf[key][0] for key in cutdf.keys()}
-  results_EoverP['max_layer'] = int(results_EoverP['max_layer']) ## get rid of the int64 which causes trouble with json
-  kTargetEfficiencyML = kTargetEfficiency / results_EoverP['efficiency']
-  print(results_EoverP)
-  print(f'Deduced target efficiency for ML: {kTargetEfficiencyML:.3f}')
+n_electrons = np.sum(df_mc["PDG"] == 11)
+n_pions = np.sum(df_mc["PDG"] == -211)
+imbalance = n_pions / n_electrons
+kSuggestedWeight = min(n_electrons / n_pions * kTargetImbalance, kPionWeightCap)
+
+print(f"Data set has relative class imbalance of {n_electrons} : {n_pions} = {imbalance}")
+print(f"  - target imbalance: {kTargetImbalance}")
+print(f"  - pion weight upper limit: {kPionWeightCap:.2f}")
+print(f"  - suggested pion weight {kSuggestedWeight:.2f}")
 #+end_src
 
 #+begin_src jupyter-python
-  print('Formatting data objects')
-  dim = get_dimensions(df_data)
-  xdata_both = df_data.values.reshape(dim['event'], 
-                                      dim['layer'], 
-                                      dim['hit'], 
-                                      len(df_data.columns)).astype(np.float32)
-
-  ldata = df_mc['PDG'].map(lambda pdg: kElectronLabel if (pdg == 11) else kPionLabel).values
-  wdata = df_mc['PDG'].map(lambda pdg: 1 if (pdg == 11) else kSuggestedWeight).values
+print(f"Loading E/P data from {datadir}/EoverP_results.csv")
+cutdf = pd.read_csv(f"{datadir}/EoverP_results.csv").sort_values("rejection", ascending=False)
+results_EoverP = {key: cutdf[key].iloc[0] for key in cutdf.columns}
+results_EoverP["max_layer"] = int(results_EoverP["max_layer"])
+kTargetEfficiencyML = kTargetEfficiency / results_EoverP["efficiency"]
+
+print(results_EoverP)
+print(f"Deduced target efficiency for ML: {kTargetEfficiencyML:.3f}")
 #+end_src
 
+* Format tensors
+
 #+begin_src jupyter-python
-  print('Shuffling data and separating samples')
-  ## shuffle data
-  index = np.arange(len(ldata))
-  np.random.shuffle(index)
-  tot_len = len(index)
-
-  n_valid = floor(tot_len * kValidateSize)
-  n_test = floor(tot_len * kTestSize)
-  n_train = tot_len - n_valid - n_test
-  if kTrainSampleCap > 0 and n_train > kTrainSampleCap:
-      print(f'Capping training sample size to {kTrainSampleCap}')
-      valid_over_train = n_valid / n_train
-      test_over_train = n_test / n_train
-      n_train = kTrainSampleCap
-      n_valid = floor(valid_over_train * n_train)
-      n_test = floor(test_over_train * n_train)
-      tot_len = n_train + n_valid + n_test
-  print(f'Sample sizes: {{n_train: {n_train}, n_valid: {n_valid}, n_test: {n_test}}}')
+print("Formatting data objects")
+dim = get_dimensions(df_data)
+
+xdata_both = df_data.values.reshape(
+    dim["event"],
+    dim["layer"],
+    dim["hit"],
+    len(df_data.columns)
+).astype(np.float32)
+
+ldata = df_mc["PDG"].map(lambda pdg: kElectronLabel if pdg == 11 else kPionLabel).values
+wdata = df_mc["PDG"].map(lambda pdg: 1 if pdg == 11 else kSuggestedWeight).values
 #+end_src
 
 #+begin_src jupyter-python
-  id_valid = index[:n_valid]
-  id_test = index[n_valid:n_valid + n_test]
-  id_train = index[n_valid + n_test:tot_len]
-  xtrain, xvalid, xtest = xdata_both[id_train], xdata_both[id_valid], xdata_both[id_test]
-  ltrain, lvalid, ltest = ldata[id_train], ldata[id_valid], ldata[id_test]
-  wtrain, wvalid = wdata[id_train], wdata[id_valid]
+print("Shuffling data and separating samples")
+index = np.arange(len(ldata))
+np.random.shuffle(index)
+tot_len = len(index)
+
+n_valid = floor(tot_len * kValidateSize)
+n_test = floor(tot_len * kTestSize)
+n_train = tot_len - n_valid - n_test
+
+if kTrainSampleCap > 0 and n_train > kTrainSampleCap:
+    print(f"Capping training sample size to {kTrainSampleCap}")
+    valid_over_train = n_valid / n_train
+    test_over_train = n_test / n_train
+    n_train = kTrainSampleCap
+    n_valid = floor(valid_over_train * n_train)
+    n_test = floor(test_over_train * n_train)
+    tot_len = n_train + n_valid + n_test
+
+print(f"Sample sizes: {{n_train: {n_train}, n_valid: {n_valid}, n_test: {n_test}}}")
+#+end_src
+
+#+begin_src jupyter-python :results silent
+id_valid = index[:n_valid]
+id_test = index[n_valid:n_valid + n_test]
+id_train = index[n_valid + n_test:tot_len]
+
+xtrain, xvalid, xtest = xdata_both[id_train], xdata_both[id_valid], xdata_both[id_test]
+ltrain, lvalid, ltest = ldata[id_train], ldata[id_valid], ldata[id_test]
+wtrain, wvalid = wdata[id_train], wdata[id_valid]
 #+end_src
 
+* Training
+
 #+begin_src jupyter-python
-  print('Start training, using GPU resources')
-  gpu = tf.config.list_logical_devices('GPU')
-  strategy = tf.distribute.MirroredStrategy(gpu) if len(gpu) == 1 else tf.distribute.MirroredStrategy([gpu[0]])
-  history = None
-  with strategy.scope():
-      train_dataset = make_dataset((xtrain, ltrain, wtrain))
-      valid_dataset = make_dataset((xvalid, lvalid, wvalid))
-
-      ## avoid warning that we are operating on DATA instead of FILE
-      options = tf.data.Options()
-      options.experimental_distribute.auto_shard_policy = tf.data.experimental.AutoShardPolicy.DATA
-      train_dataset = train_dataset.with_options(options)
-      valid_dataset = valid_dataset.with_options(options)
-      
-      model = build_model(input_shape=xtrain.shape[1:])
-      model.compile(optimizer=keras.optimizers.Adam(learning_rate=1e-3),
-                    loss=keras.losses.SparseCategoricalCrossentropy(from_logits=False),
-                    weighted_metrics=['accuracy'])
-      history = model.fit(train_dataset.batch(2000), validation_data=valid_dataset.batch(1000), epochs=kEpochs)
-      os.makedirs(output_directory, exist_ok=True)
+print("Start training")
+gpus = tf.config.list_logical_devices("GPU")
+if gpus:
+    strategy = tf.distribute.MirroredStrategy(devices=[d.name for d in gpus])
+else:
+    strategy = tf.distribute.get_strategy()
+
+history = None
+with strategy.scope():
+    train_dataset = make_dataset((xtrain, ltrain, wtrain))
+    valid_dataset = make_dataset((xvalid, lvalid, wvalid))
+
+    options = tf.data.Options()
+    options.experimental_distribute.auto_shard_policy = tf.data.experimental.AutoShardPolicy.DATA
+    train_dataset = train_dataset.with_options(options)
+    valid_dataset = valid_dataset.with_options(options)
+
+    model = build_model(input_shape=xtrain.shape[1:])
+    model.compile(
+        optimizer=keras.optimizers.Adam(learning_rate=1e-3),
+        loss=keras.losses.SparseCategoricalCrossentropy(from_logits=False),
+        weighted_metrics=["accuracy"],
+    )
+    history = model.fit(
+        train_dataset.batch(2000),
+        validation_data=valid_dataset.batch(1000),
+        epochs=kEpochs
+    )
+    os.makedirs(output_directory, exist_ok=True)
 #+end_src
 
+* Export ONNX
+
 #+begin_src jupyter-python
-  import keras.backend as K
-  # Monkey-patch the missing function to avoid the crash
-  K.set_learning_phase = lambda flag: None
-
-  import tensorflow as tf
-  import tf2onnx
-
-  # Load your Keras model
-  #model = tf.keras.models.load_model("/epi_separation/results/45to135deg/1GeV/data/cnn_model_30epochs.h5")
-
-  # Define a function to capture the input signature
-  @tf.function(input_signature=[tf.TensorSpec(shape=[None, *model.input_shape[1:]], dtype=tf.float32)])
-  def model_fn(input_tensor):
-      return model(input_tensor)
-
-  # Convert to ONNX format
-  onnx_model, _ = tf2onnx.convert.from_function(
-      model_fn,
-      input_signature=[tf.TensorSpec(shape=[None, *model.input_shape[1:]], dtype=tf.float32)],  # This is important
-      opset=13,
-      output_path=f"{output_directory}/EcalBarrel_pi_rejection.onnx"
-  )
-
-  print("Model converted successfully to ONNX format!")
+import keras.backend as K
+K.set_learning_phase = lambda flag: None
+
+import tf2onnx
+
+@tf.function(input_signature=[tf.TensorSpec(shape=[None, *model.input_shape[1:]], dtype=tf.float32)])
+def model_fn(input_tensor):
+    return model(input_tensor)
+
+onnx_model, _ = tf2onnx.convert.from_function(
+    model_fn,
+    input_signature=[tf.TensorSpec(shape=[None, *model.input_shape[1:]], dtype=tf.float32)],
+    opset=13,
+    output_path=f"{output_directory}/EcalBarrel_pi_rejection.onnx"
+)
+
+print("Model converted successfully to ONNX format!")
 #+end_src
 
+* Learning curves
+
 #+begin_src jupyter-python
-  print('Summarizing metrics')
-  fig, ax = plt.subplots(1, 2, figsize=(12,6))
-
-  ax[0].plot(history.history['loss'])
-  ax[0].plot(history.history['val_loss'])
-  ax[0].set_title('model loss')
-  ax[0].set_ylabel('loss')
-  ax[0].set_xlabel('epoch')
-  ax[0].legend(['train', 'validate'], loc='upper left')
-
-  ax[1].plot(history.history['accuracy'])
-  ax[1].plot(history.history['val_accuracy'])
-  ax[1].set_title('accuracy')
-  ax[1].set_ylabel('accuracy')
-  ax[1].set_xlabel('epoch')
-  ax[1].legend(['train', 'validate'], loc='upper left')
-  ax[1].set_ylim(0, 1.1)
-
-  fig.savefig(f'{plotdir}/ML_learning.pdf')
+print("Summarizing metrics")
+fig, ax = plt.subplots(1, 2, figsize=(12, 6))
+
+ax[0].plot(history.history["loss"])
+ax[0].plot(history.history["val_loss"])
+ax[0].set_title("model loss")
+ax[0].set_ylabel("loss")
+ax[0].set_xlabel("epoch")
+ax[0].legend(["train", "validate"], loc="upper left")
+
+ax[1].plot(history.history["accuracy"])
+ax[1].plot(history.history["val_accuracy"])
+ax[1].set_title("accuracy")
+ax[1].set_ylabel("accuracy")
+ax[1].set_xlabel("epoch")
+ax[1].legend(["train", "validate"], loc="upper left")
+ax[1].set_ylim(0, 1.1)
+
+fig.savefig(f"{plotdir}/ML_learning.pdf")
+plt.close(fig)
 #+end_src
 
+* Evaluation
+
 #+begin_src jupyter-python
-  print('Benchmarking test data')
-  # benchmark
-  test_dataset = make_dataset((xtest,))
-  prediction = model.predict(test_dataset.batch(1000))
+print("Benchmarking test data")
+test_dataset = make_dataset((xtest,))
+prediction = model.predict(test_dataset.batch(1000))
 #+end_src
 
 #+begin_src jupyter-python
-  print('Calculate aggregate e-pi rejection metrics')
-
-  def calculate_metrics(target_efficiency=kTargetEfficiencyML, export_prediction=True):
-      ## find the target efficiency cut point and weight the electron results
-      ## to move the cross-over point into pions to fit this efficiency
-      ## this code is specific to two particles where (P_e + P_pi = 1)
-      efficiency_cut = np.percentile(prediction[ltest == kElectronLabel].T[kElectronLabel], 
-                                     (1 - target_efficiency)*100)
-      target_weight = (1 - efficiency_cut) / efficiency_cut
-
-      prediction_weights = np.ones(2)
-      prediction_weights[kElectronLabel] = target_weight
-      prediction_labels = np.argmax(prediction * prediction_weights, axis=1)
-
-
-      electron_predicted = [None, None]
-      probabilities = np.zeros(shape=(2,2))
-      for i in [kPionLabel, kElectronLabel]:
-          mask = (ltest == i)
-          probabilities[i] = np.bincount(prediction_labels[mask])/float(np.sum(mask))
-          electron_predicted[i] = prediction[mask].T[kElectronLabel]
-          
-      binomial_error = lambda eff, n:  np.sqrt(n * eff * (1 - eff)) / n
-      inverse_error = lambda val, err: err / val**2 
-
-      n_electron_test = np.sum(ltest == kElectronLabel)
-      n_pion_test = np.sum(ltest == kPionLabel)
-
-      results_ML = OrderedDict({'target_particle': 'e-',
-                                'target_weight': target_weight,
-                                'target_efficiency': target_efficiency,
-                                'target_cut': efficiency_cut,
-                                'n_electrons': int(n_electron_test),
-                                'n_pions': int(n_pion_test),
-                                'probabilities': probabilities.tolist(),
-                                'efficiency': probabilities[kElectronLabel, kElectronLabel],
-                                'efficiency_error': binomial_error(probabilities[kElectronLabel, kElectronLabel], n_electron_test),
-                                'rejection': 1 / probabilities[kPionLabel, kElectronLabel],
-                                'rejection_error': inverse_error(probabilities[kPionLabel, kElectronLabel], binomial_error(probabilities[kPionLabel, kElectronLabel], n_pion_test))})
-
-      ## calculate aggregate results from E/P + ML
-      results = OrderedDict({
-          'energy': energy_GeV,
-          'eta': etas[angle_label],
-          'angle': angle_label,
-          'efficiency': results_EoverP['efficiency'] * results_ML['efficiency'],
-          'efficiency_error': np.sqrt(results_EoverP['efficiency']**2 * results_ML['efficiency_error']**2 
-                                      + results_ML['efficiency']**2 * results_EoverP['efficiency_error']**2),
-          'rejection': results_EoverP['rejection'] * results_ML['rejection'],
-          'rejection_error': np.sqrt(results_EoverP['rejection']**2 * results_ML['rejection_error']**2 
-                                      + results_ML['rejection']**2 * results_EoverP['rejection_error']**2),
-          'prob_cut': efficiency_cut,
-          'EoverP': results_EoverP,
-          'ML': results_ML})
-      if export_prediction:
-          return results, electron_predicted
-      return results
+print("Calculate aggregate e-pi rejection metrics")
+
+def calculate_metrics(target_efficiency=kTargetEfficiencyML, export_prediction=True):
+    efficiency_cut = np.percentile(
+        prediction[ltest == kElectronLabel].T[kElectronLabel],
+        (1 - target_efficiency) * 100
+    )
+    target_weight = (1 - efficiency_cut) / efficiency_cut
+
+    prediction_weights = np.ones(2)
+    prediction_weights[kElectronLabel] = target_weight
+    prediction_labels = np.argmax(prediction * prediction_weights, axis=1)
+
+    electron_predicted = [None, None]
+    probabilities = np.zeros(shape=(2, 2))
+    for i in [kPionLabel, kElectronLabel]:
+        mask = (ltest == i)
+        probabilities[i] = np.bincount(prediction_labels[mask], minlength=2) / float(np.sum(mask))
+        electron_predicted[i] = prediction[mask].T[kElectronLabel]
+
+    binomial_error = lambda eff, n: np.sqrt(n * eff * (1 - eff)) / n
+    inverse_error = lambda val, err: err / val**2
+
+    n_electron_test = np.sum(ltest == kElectronLabel)
+    n_pion_test = np.sum(ltest == kPionLabel)
+
+    results_ML = OrderedDict({
+        "target_particle": "e-",
+        "target_weight": float(target_weight),
+        "target_efficiency": float(target_efficiency),
+        "target_cut": float(efficiency_cut),
+        "n_electrons": int(n_electron_test),
+        "n_pions": int(n_pion_test),
+        "probabilities": probabilities.tolist(),
+        "efficiency": float(probabilities[kElectronLabel, kElectronLabel]),
+        "efficiency_error": float(binomial_error(probabilities[kElectronLabel, kElectronLabel], n_electron_test)),
+        "rejection": float(1 / probabilities[kPionLabel, kElectronLabel]),
+        "rejection_error": float(inverse_error(
+            probabilities[kPionLabel, kElectronLabel],
+            binomial_error(probabilities[kPionLabel, kElectronLabel], n_pion_test)
+        )),
+    })
+
+    results = OrderedDict({
+        "energy": float(energy_GeV),
+        "eta": float(etas[angle_label]),
+        "angle": angle_label,
+        "efficiency": float(results_EoverP["efficiency"] * results_ML["efficiency"]),
+        "efficiency_error": float(np.sqrt(
+            results_EoverP["efficiency"]**2 * results_ML["efficiency_error"]**2 +
+            results_ML["efficiency"]**2 * results_EoverP["efficiency_error"]**2
+        )),
+        "rejection": float(results_EoverP["rejection"] * results_ML["rejection"]),
+        "rejection_error": float(np.sqrt(
+            results_EoverP["rejection"]**2 * results_ML["rejection_error"]**2 +
+            results_ML["rejection"]**2 * results_EoverP["rejection_error"]**2
+        )),
+        "prob_cut": float(efficiency_cut),
+        "EoverP": results_EoverP,
+        "ML": results_ML,
+    })
+
+    if export_prediction:
+        return results, electron_predicted
+    return results
 #+end_src
 
 #+begin_src jupyter-python
-  results, electron_predicted = calculate_metrics()
-  results_ML = results['ML']
-  test = electron_predicted
-  print(f'Calculating alternative target efficiency scenarios: {kAlternativeEfficiencies}')
-  results['scenarios'] = {}
-  for alternative_eff in kAlternativeEfficiencies:
-      target_eff_ml = alternative_eff / results_EoverP['efficiency']
-      tmp_res = calculate_metrics(target_efficiency=target_eff_ml, export_prediction=False)
-      results['scenarios'][alternative_eff] = tmp_res
+results, electron_predicted = calculate_metrics()
+results_ML = results["ML"]
+test = electron_predicted
+
+print(f"Calculating alternative target efficiency scenarios: {kAlternativeEfficiencies}")
+results["scenarios"] = {}
+for alternative_eff in kAlternativeEfficiencies:
+    target_eff_ml = alternative_eff / results_EoverP["efficiency"]
+    tmp_res = calculate_metrics(target_efficiency=target_eff_ml, export_prediction=False)
+    results["scenarios"][float(alternative_eff)] = tmp_res
 #+end_src
 
 #+begin_src jupyter-python
-  assert test is electron_predicted
+assert test is electron_predicted
+
+with open(f"{output_directory}/results.json", "w") as f:
+    f.write(json.dumps(results, indent=2))
 
-  with open(f'{output_directory}/results.json', 'w') as f:
-      f.write(json.dumps(results, indent=2))
-  print(f' - Found overall rejection {results["rejection"]:.2f} at {results["efficiency"]:.2f} efficiency')
-  print(f' - Results written to {output_directory}/results.json')
+print(f' - Found overall rejection {results["rejection"]:.2f} at {results["efficiency"]:.2f} efficiency')
+print(f" - Results written to {output_directory}/results.json")
 #+end_src
 
+* Rejection plot
+
 #+begin_src jupyter-python
-  print('Plotting ML results')
-  # default color cycle of matplotlib
-  prop_cycle = plt.rcParams['axes.prop_cycle']
-  colors = prop_cycle.by_key()['color']
-  box_props = dict(boxstyle='round', facecolor='white', alpha=0.5)
-
-  parts = {kElectronLabel: r'e^-', kPionLabel: r'\pi^-'}
-
-  fig, ax = plt.subplots(figsize=(12, 9), dpi=160)
-  effs = []
-  for i in parts.keys():
-      ax.hist(electron_predicted[i], bins=np.linspace(0, 1, 101), label='${}$'.format(parts[i]),
-                  color=colors[i], ec=colors[i], alpha=0.5)
-  ax.axvline(x=results['prob_cut'], lw=2, color='k', ls='--')
-  eff_text = '\n'.join([r'$\epsilon_{{ML}}^{{e^-}} = {:.2f}$%'.format(results_ML['efficiency'] * 100.),
-                            r'$R_{{ML}}^{{\pi^-}} = {:.1f}$'.format(results_ML['rejection']),
-                            r'$\epsilon_{{E/p}}^{{e^-}} = {:.2f}$%'.format(results_EoverP['efficiency'] * 100.),
-                            r'$R_{{E/p}}^{{\pi^-}} = {:.1f}$'.format(results_EoverP['rejection'])
-                           ])
-  data_to_axis = (ax.transAxes + ax.transData.inverted()).inverted()
-  ax.text(data_to_axis.transform((results['prob_cut'], 1))[0] + 0.01, 0.99, eff_text, fontsize=24,
-          transform=ax.transAxes, ha='left', va='top')
-  ax.set_yscale('log')
-  ax.set_ylabel('Counts', fontsize=24)
-  ax.set_xlabel(r'$P_{{{}}}$'.format(r'e^-'), fontsize=24)
-  ax.tick_params(direction='in', which='both', labelsize=24)
-  ax.legend(fontsize=24, ncol=4, loc='upper center', bbox_to_anchor=(0.5, 1.12),)
-  ax.text(0.05, .99, '\n'.join(
-      [r'{energy} at ${loc}$'.format(energy='1GeV', 
-                                     loc=f'eta = {etas[angle_label]}'),
-       r'$R_{{\pi}} = {rejection:.1f}$ at $\epsilon_{{e^-}} = {efficiency:.2f}$%'.format(
-           rejection=results_EoverP['rejection'] * results_ML['rejection'],
-           efficiency=results_EoverP['efficiency'] * results_ML['efficiency'] * 100.)]),
-      ha='left', va='top', fontsize=24, transform=ax.transAxes)
-  fig.savefig(f'{plotdir}/ML_rejection.pdf')
-
-  print('Done with this eta bin')
+print("Plotting ML results")
+prop_cycle = plt.rcParams["axes.prop_cycle"]
+colors = prop_cycle.by_key()["color"]
+parts = {kElectronLabel: r"e^-", kPionLabel: r"\pi^-"}
+
+fig, ax = plt.subplots(figsize=(12, 9), dpi=160)
+for i in parts.keys():
+    ax.hist(
+        electron_predicted[i],
+        bins=np.linspace(0, 1, 101),
+        label=f'${parts[i]}$',
+        color=colors[i],
+        ec=colors[i],
+        alpha=0.5
+    )
+
+ax.axvline(x=results["prob_cut"], lw=2, color="k", ls="--")
+
+eff_text = "\n".join([
+    rf'$\epsilon_{{ML}}^{{e^-}} = {results_ML["efficiency"] * 100.:.2f}$%',
+    rf'$R_{{ML}}^{{\pi^-}} = {results_ML["rejection"]:.1f}$',
+    rf'$\epsilon_{{E/p}}^{{e^-}} = {results_EoverP["efficiency"] * 100.:.2f}$%',
+    rf'$R_{{E/p}}^{{\pi^-}} = {results_EoverP["rejection"]:.1f}$',
+])
+
+data_to_axis = (ax.transAxes + ax.transData.inverted()).inverted()
+ax.text(
+    data_to_axis.transform((results["prob_cut"], 1))[0] + 0.01,
+    0.99,
+    eff_text,
+    fontsize=24,
+    transform=ax.transAxes,
+    ha="left",
+    va="top"
+)
+
+ax.set_yscale("log")
+ax.set_ylabel("Counts", fontsize=24)
+ax.set_xlabel(r"$P_{e^-}$", fontsize=24)
+ax.tick_params(direction="in", which="both", labelsize=24)
+ax.legend(fontsize=24, ncol=4, loc="upper center", bbox_to_anchor=(0.5, 1.12))
+
+ax.text(
+    0.05,
+    0.99,
+    "\n".join([
+        rf"{energy_setting} at $\eta = {etas[angle_label]:.3f}$",
+        rf'$R_{{\pi}} = {results["rejection"]:.1f}$ at $\epsilon_{{e^-}} = {results["efficiency"] * 100.:.2f}$%',
+    ]),
+    ha="left",
+    va="top",
+    fontsize=24,
+    transform=ax.transAxes
+)
+
+fig.savefig(f"{plotdir}/ML_rejection.pdf")
+plt.close(fig)
+
+print("Done with this eta bin")
 #+end_src
\ No newline at end of file

From abf19f75026917c0089a0cf39b373b819365692f Mon Sep 17 00:00:00 2001
From: Tomas Sosa <tomassosa.23@gmail.com>
Date: Mon, 20 Apr 2026 00:06:50 -0500
Subject: [PATCH 22/58] Changes in config and requirements

---
 benchmarks/bic_pid/config.yml       |  4 ++++
 benchmarks/bic_pid/requirements.txt | 12 ++++++------
 2 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/benchmarks/bic_pid/config.yml b/benchmarks/bic_pid/config.yml
index a0e44016..d7cb216d 100644
--- a/benchmarks/bic_pid/config.yml
+++ b/benchmarks/bic_pid/config.yml
@@ -58,8 +58,12 @@ bench:bic_pid:
     - find sim_output/bic_pid | head -50 || true
     - ls -lrtha
   script:
+    - python -m pip install --upgrade pip
+    - python -m pip uninstall -y tensorflow tensorflow-cpu tf-keras keras || true
+    - python -m pip install "tensorflow==2.18.0"
     - python -m pip install snakemake
     - python -m pip install -r benchmarks/bic_pid/requirements.txt
+    - python -c "import tensorflow as tf; print('TF OK:', tf.__version__)"
     - snakemake $SNAKEMAKE_FLAGS --cores 1 results/${DETECTOR_CONFIG}/bic_pid
 
 
diff --git a/benchmarks/bic_pid/requirements.txt b/benchmarks/bic_pid/requirements.txt
index d32037a3..94afc652 100644
--- a/benchmarks/bic_pid/requirements.txt
+++ b/benchmarks/bic_pid/requirements.txt
@@ -1,7 +1,7 @@
-awkward >= 2.4.0
-catboost
+pandas
+numpy
+matplotlib
+tensorflow==2.18.0
+tf2onnx
 onnx
-scikit-learn
-uproot >= 5.2.0
-vector
-tf2onnx
\ No newline at end of file
+pyarrow
\ No newline at end of file

From 2153f438ab1bc044c2782db54ccac27abf1cf3c8 Mon Sep 17 00:00:00 2001
From: Tomas Sosa <tomassosa.23@gmail.com>
Date: Mon, 20 Apr 2026 08:57:54 -0500
Subject: [PATCH 23/58] Solve problem with requirements of tf

---
 benchmarks/bic_pid/config.yml       | 17 +++++++++--------
 benchmarks/bic_pid/requirements.txt | 13 +++++++------
 2 files changed, 16 insertions(+), 14 deletions(-)

diff --git a/benchmarks/bic_pid/config.yml b/benchmarks/bic_pid/config.yml
index d7cb216d..787b69eb 100644
--- a/benchmarks/bic_pid/config.yml
+++ b/benchmarks/bic_pid/config.yml
@@ -57,14 +57,15 @@ bench:bic_pid:
     - mkdir -p "$SNAKEMAKE_OUTPUT_CACHE"
     - find sim_output/bic_pid | head -50 || true
     - ls -lrtha
-  script:
-    - python -m pip install --upgrade pip
-    - python -m pip uninstall -y tensorflow tensorflow-cpu tf-keras keras || true
-    - python -m pip install "tensorflow==2.18.0"
-    - python -m pip install snakemake
-    - python -m pip install -r benchmarks/bic_pid/requirements.txt
-    - python -c "import tensorflow as tf; print('TF OK:', tf.__version__)"
-    - snakemake $SNAKEMAKE_FLAGS --cores 1 results/${DETECTOR_CONFIG}/bic_pid
+script:
+  - python -m pip install snakemake
+  - python -m pip uninstall -y tensorflow tensorflow-cpu tf-keras || true
+  - python -m pip install --no-cache-dir --force-reinstall -r benchmarks/bic_pid/requirements.txt
+  - python - <<'PY'
+import tensorflow as tf
+print("TF OK:", tf.__version__)
+PY
+  - snakemake $SNAKEMAKE_FLAGS --cores 1 results/${DETECTOR_CONFIG}/bic_pid
 
 
 collect_results:bic_pid:
diff --git a/benchmarks/bic_pid/requirements.txt b/benchmarks/bic_pid/requirements.txt
index 94afc652..3cc2b61a 100644
--- a/benchmarks/bic_pid/requirements.txt
+++ b/benchmarks/bic_pid/requirements.txt
@@ -1,7 +1,8 @@
-pandas
-numpy
-matplotlib
-tensorflow==2.18.0
-tf2onnx
+awkward>=2.4.0
+catboost
 onnx
-pyarrow
\ No newline at end of file
+scikit-learn
+uproot>=5.2.0
+vector
+tf2onnx
+tensorflow==2.20.0
\ No newline at end of file

From 2759b0e258595e54077606f93a5eb688f462fb65 Mon Sep 17 00:00:00 2001
From: Tomas Sosa <tomassosa.23@gmail.com>
Date: Mon, 20 Apr 2026 09:04:40 -0500
Subject: [PATCH 24/58] SOlve problem with config file

---
 benchmarks/bic_pid/config.yml | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/benchmarks/bic_pid/config.yml b/benchmarks/bic_pid/config.yml
index 787b69eb..67bf2e64 100644
--- a/benchmarks/bic_pid/config.yml
+++ b/benchmarks/bic_pid/config.yml
@@ -61,10 +61,7 @@ script:
   - python -m pip install snakemake
   - python -m pip uninstall -y tensorflow tensorflow-cpu tf-keras || true
   - python -m pip install --no-cache-dir --force-reinstall -r benchmarks/bic_pid/requirements.txt
-  - python - <<'PY'
-import tensorflow as tf
-print("TF OK:", tf.__version__)
-PY
+  - python -c "import tensorflow as tf; print('TF OK:', tf.__version__)"
   - snakemake $SNAKEMAKE_FLAGS --cores 1 results/${DETECTOR_CONFIG}/bic_pid
 
 

From 9749d8f9ef80e20ddb69358f3cc14484e348321c Mon Sep 17 00:00:00 2001
From: Tomas Sosa <tomassosa.23@gmail.com>
Date: Mon, 20 Apr 2026 09:11:14 -0500
Subject: [PATCH 25/58] Solve problem with config file

---
 benchmarks/bic_pid/config.yml | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/benchmarks/bic_pid/config.yml b/benchmarks/bic_pid/config.yml
index 67bf2e64..8021155e 100644
--- a/benchmarks/bic_pid/config.yml
+++ b/benchmarks/bic_pid/config.yml
@@ -57,12 +57,12 @@ bench:bic_pid:
     - mkdir -p "$SNAKEMAKE_OUTPUT_CACHE"
     - find sim_output/bic_pid | head -50 || true
     - ls -lrtha
-script:
-  - python -m pip install snakemake
-  - python -m pip uninstall -y tensorflow tensorflow-cpu tf-keras || true
-  - python -m pip install --no-cache-dir --force-reinstall -r benchmarks/bic_pid/requirements.txt
-  - python -c "import tensorflow as tf; print('TF OK:', tf.__version__)"
-  - snakemake $SNAKEMAKE_FLAGS --cores 1 results/${DETECTOR_CONFIG}/bic_pid
+  script:
+    - python -m pip install snakemake
+    - python -m pip uninstall -y tensorflow tensorflow-cpu tf-keras || true
+    - python -m pip install --no-cache-dir --force-reinstall -r benchmarks/bic_pid/requirements.txt
+    - python -c "import tensorflow as tf; print('TF OK:', tf.__version__)"
+    - snakemake $SNAKEMAKE_FLAGS --cores 1 results/${DETECTOR_CONFIG}/bic_pid
 
 
 collect_results:bic_pid:

From 3447243953510a3607c208a62befcb03431dc4b9 Mon Sep 17 00:00:00 2001
From: Tomas Sosa <tomassosa.23@gmail.com>
Date: Mon, 20 Apr 2026 12:50:30 -0500
Subject: [PATCH 26/58] Solve problem with bench:bic_pid script

---
 benchmarks/bic_pid/config.yml       | 4 ++--
 benchmarks/bic_pid/requirements.txt | 3 ++-
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/benchmarks/bic_pid/config.yml b/benchmarks/bic_pid/config.yml
index 8021155e..27c99780 100644
--- a/benchmarks/bic_pid/config.yml
+++ b/benchmarks/bic_pid/config.yml
@@ -59,8 +59,8 @@ bench:bic_pid:
     - ls -lrtha
   script:
     - python -m pip install snakemake
-    - python -m pip uninstall -y tensorflow tensorflow-cpu tf-keras || true
-    - python -m pip install --no-cache-dir --force-reinstall -r benchmarks/bic_pid/requirements.txt
+    - python -m pip install -r benchmarks/bic_pid/requirements.txt
+    - python -m pip install --upgrade --force-reinstall "protobuf==5.28.3" "python-dateutil==2.9.0.post0"
     - python -c "import tensorflow as tf; print('TF OK:', tf.__version__)"
     - snakemake $SNAKEMAKE_FLAGS --cores 1 results/${DETECTOR_CONFIG}/bic_pid
 
diff --git a/benchmarks/bic_pid/requirements.txt b/benchmarks/bic_pid/requirements.txt
index 3cc2b61a..81d10e68 100644
--- a/benchmarks/bic_pid/requirements.txt
+++ b/benchmarks/bic_pid/requirements.txt
@@ -5,4 +5,5 @@ scikit-learn
 uproot>=5.2.0
 vector
 tf2onnx
-tensorflow==2.20.0
\ No newline at end of file
+protobuf==5.28.3
+python-dateutil==2.9.0.post0
\ No newline at end of file

From 627146bf9b97d230ff833faad344ca0c496e1868 Mon Sep 17 00:00:00 2001
From: Tomas Sosa <tomassosa.23@gmail.com>
Date: Sun, 26 Apr 2026 14:06:13 -0500
Subject: [PATCH 27/58] Solve the problem with transferred_sim_output

---
 benchmarks/bic_pid/config.yml | 23 +----------------------
 1 file changed, 1 insertion(+), 22 deletions(-)

diff --git a/benchmarks/bic_pid/config.yml b/benchmarks/bic_pid/config.yml
index 27c99780..f336beab 100644
--- a/benchmarks/bic_pid/config.yml
+++ b/benchmarks/bic_pid/config.yml
@@ -26,37 +26,17 @@ sim:bic_pid:
     - |
       snakemake $SNAKEMAKE_FLAGS --cores 1 \
         $(seq --format="sim_output/bic_pid/${DETECTOR_CONFIG}/${PARTICLE}/${ENERGY}/${ANGLE}/${PARTICLE}_${ENERGY}_${ANGLE}.%04.f.eicrecon.edm4eic.root" ${INDEX_RANGE})
-    - mkdir -p transferred_sim_output
-    - cp -aL sim_output/bic_pid transferred_sim_output/
-  artifacts:
-    when: always
-    paths:
-      - transferred_sim_output/
-      - .snakemake/log/
 
 
 bench:bic_pid:
   extends: .det_benchmark
   stage: benchmarks
   needs:
-    - "common:setup"
-    - "sim:bic_pid"
+    - ["sim:bic_pid"]
   image: $BENCHMARKS_REGISTRY/eic_tf$BENCHMARKS_SIGIL$BENCHMARKS_TAG
   variables:
     CUDA_VISIBLE_DEVICES: ""
     DETECTOR_CONFIG: epic_craterlake
-  before_script:
-    - source .local/bin/env.sh
-    - ls -lrtha
-    - find transferred_sim_output/bic_pid | head -50 || true
-    - mkdir -p sim_output
-    - cp -a transferred_sim_output/bic_pid sim_output/
-    - mkdir -p "${DETECTOR_CONFIG}"
-    - ln -s ../sim_output "${DETECTOR_CONFIG}/sim_output"
-    - ln -s ../results "${DETECTOR_CONFIG}/results"
-    - mkdir -p "$SNAKEMAKE_OUTPUT_CACHE"
-    - find sim_output/bic_pid | head -50 || true
-    - ls -lrtha
   script:
     - python -m pip install snakemake
     - python -m pip install -r benchmarks/bic_pid/requirements.txt
@@ -69,7 +49,6 @@ collect_results:bic_pid:
   extends: .det_benchmark
   stage: collect
   needs:
-    - "common:setup"
     - "bench:bic_pid"
   when: always
   image: $BENCHMARKS_REGISTRY/eic_ci$BENCHMARKS_SIGIL$BENCHMARKS_TAG

From 3c47eab73b03670ad875fc9fcd25e8574bcaa8b7 Mon Sep 17 00:00:00 2001
From: Tomas Sosa <tomassosa.23@gmail.com>
Date: Sun, 26 Apr 2026 18:04:06 -0500
Subject: [PATCH 28/58] Restore bench:bic_pid

---
 benchmarks/bic_pid/config.yml | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/benchmarks/bic_pid/config.yml b/benchmarks/bic_pid/config.yml
index f336beab..c8c02fa1 100644
--- a/benchmarks/bic_pid/config.yml
+++ b/benchmarks/bic_pid/config.yml
@@ -37,6 +37,16 @@ bench:bic_pid:
   variables:
     CUDA_VISIBLE_DEVICES: ""
     DETECTOR_CONFIG: epic_craterlake
+  before_script:
+    - source .local/bin/env.sh
+    - ls -lrtha
+    - ln -s "${LOCAL_DATA_PATH}/sim_output" sim_output
+    - mkdir -p "${DETECTOR_CONFIG}"
+    - ln -s "${LOCAL_DATA_PATH}/sim_output" "${DETECTOR_CONFIG}/sim_output"
+    - ln -s "../results" "${DETECTOR_CONFIG}/results"
+    - mkdir -p "$SNAKEMAKE_OUTPUT_CACHE"
+    - find sim_output/bic_pid/${DETECTOR_CONFIG} | head -50 || true
+    - ls -lrtha
   script:
     - python -m pip install snakemake
     - python -m pip install -r benchmarks/bic_pid/requirements.txt

From b20e2cfbdfa9856cd5fa646f0a68da4b3fada0a7 Mon Sep 17 00:00:00 2001
From: Tomas Sosa <tomassosa.23@gmail.com>
Date: Sun, 26 Apr 2026 19:16:03 -0500
Subject: [PATCH 29/58] Solve problem with the reinstalled Tensorflow

---
 benchmarks/bic_pid/requirements.txt | 10 +---------
 1 file changed, 1 insertion(+), 9 deletions(-)

diff --git a/benchmarks/bic_pid/requirements.txt b/benchmarks/bic_pid/requirements.txt
index 81d10e68..88e48536 100644
--- a/benchmarks/bic_pid/requirements.txt
+++ b/benchmarks/bic_pid/requirements.txt
@@ -1,9 +1 @@
-awkward>=2.4.0
-catboost
-onnx
-scikit-learn
-uproot>=5.2.0
-vector
-tf2onnx
-protobuf==5.28.3
-python-dateutil==2.9.0.post0
\ No newline at end of file
+tf2onnx==1.17.0
\ No newline at end of file

From 0ff7adb84707b99315c413dd3921b334a59b180a Mon Sep 17 00:00:00 2001
From: Tomas Sosa <tomassosa.23@gmail.com>
Date: Sun, 26 Apr 2026 19:38:21 -0500
Subject: [PATCH 30/58] Delete line in config.yml file

---
 benchmarks/bic_pid/config.yml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/benchmarks/bic_pid/config.yml b/benchmarks/bic_pid/config.yml
index c8c02fa1..ff198a1f 100644
--- a/benchmarks/bic_pid/config.yml
+++ b/benchmarks/bic_pid/config.yml
@@ -50,7 +50,6 @@ bench:bic_pid:
   script:
     - python -m pip install snakemake
     - python -m pip install -r benchmarks/bic_pid/requirements.txt
-    - python -m pip install --upgrade --force-reinstall "protobuf==5.28.3" "python-dateutil==2.9.0.post0"
     - python -c "import tensorflow as tf; print('TF OK:', tf.__version__)"
     - snakemake $SNAKEMAKE_FLAGS --cores 1 results/${DETECTOR_CONFIG}/bic_pid
 

From db336474b6bb5a0ca86a30b66bd9163ca9a80ab7 Mon Sep 17 00:00:00 2001
From: Tomas Sosa <tomassosa.23@gmail.com>
Date: Sun, 26 Apr 2026 20:41:28 -0500
Subject: [PATCH 31/58] Solve protobuf mismatch in config.yml file

---
 benchmarks/bic_pid/config.yml | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/benchmarks/bic_pid/config.yml b/benchmarks/bic_pid/config.yml
index ff198a1f..3c4155aa 100644
--- a/benchmarks/bic_pid/config.yml
+++ b/benchmarks/bic_pid/config.yml
@@ -48,9 +48,12 @@ bench:bic_pid:
     - find sim_output/bic_pid/${DETECTOR_CONFIG} | head -50 || true
     - ls -lrtha
   script:
-    - python -m pip install snakemake
-    - python -m pip install -r benchmarks/bic_pid/requirements.txt
-    - python -c "import tensorflow as tf; print('TF OK:', tf.__version__)"
+    - export PYTHONUSERBASE=$LOCAL_DATA_PATH/deps
+    - export PATH=$PYTHONUSERBASE/bin:$PATH
+    - python -m pip install --user snakemake
+    - python -m pip install --user -r benchmarks/bic_pid/requirements.txt
+    - python -m pip install --user --ignore-installed --no-deps "protobuf==5.28.3"
+    - python -c "import google.protobuf; print('protobuf', google.protobuf.__version__, google.protobuf.__file__); import tensorflow as tf; print('TF OK:', tf.__version__)"
     - snakemake $SNAKEMAKE_FLAGS --cores 1 results/${DETECTOR_CONFIG}/bic_pid
 
 

From 337f49734d3d74a4ce8a04b2ab04c16dd51ee3da Mon Sep 17 00:00:00 2001
From: Tomas Sosa <tomassosa.23@gmail.com>
Date: Sun, 3 May 2026 11:10:31 -0500
Subject: [PATCH 32/58] Test new eic_tf

---
 benchmarks/bic_pid/config.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/benchmarks/bic_pid/config.yml b/benchmarks/bic_pid/config.yml
index 3c4155aa..d58fb67e 100644
--- a/benchmarks/bic_pid/config.yml
+++ b/benchmarks/bic_pid/config.yml
@@ -71,4 +71,4 @@ collect_results:bic_pid:
     - ls -lrht
     - mv results{,_save}/
     - snakemake $SNAKEMAKE_FLAGS --cores 1 --delete-all-output results/${DETECTOR_CONFIG}/bic_pid
-    - mv results{_save,}/
\ No newline at end of file
+    - mv results{_save,}/

From 1fbe7bde94d04ebf1e4865937bc184687667e1ec Mon Sep 17 00:00:00 2001
From: Tomas Sosa <tomassosa.23@gmail.com>
Date: Sun, 3 May 2026 13:37:34 -0500
Subject: [PATCH 33/58] Solve problem with --user installation

---
 benchmarks/bic_pid/config.yml | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/benchmarks/bic_pid/config.yml b/benchmarks/bic_pid/config.yml
index d58fb67e..e529c2ee 100644
--- a/benchmarks/bic_pid/config.yml
+++ b/benchmarks/bic_pid/config.yml
@@ -48,12 +48,8 @@ bench:bic_pid:
     - find sim_output/bic_pid/${DETECTOR_CONFIG} | head -50 || true
     - ls -lrtha
   script:
-    - export PYTHONUSERBASE=$LOCAL_DATA_PATH/deps
-    - export PATH=$PYTHONUSERBASE/bin:$PATH
-    - python -m pip install --user snakemake
-    - python -m pip install --user -r benchmarks/bic_pid/requirements.txt
-    - python -m pip install --user --ignore-installed --no-deps "protobuf==5.28.3"
-    - python -c "import google.protobuf; print('protobuf', google.protobuf.__version__, google.protobuf.__file__); import tensorflow as tf; print('TF OK:', tf.__version__)"
+    - python -m pip install snakemake
+    - python -c "import google.protobuf, tensorflow as tf, tf2onnx; print('protobuf', google.protobuf.__version__); print('TF OK:', tf.__version__); print('tf2onnx OK:', tf2onnx.__version__)"
     - snakemake $SNAKEMAKE_FLAGS --cores 1 results/${DETECTOR_CONFIG}/bic_pid
 
 

From aec9db4a105dae8c14029d9165009a85a36266d2 Mon Sep 17 00:00:00 2001
From: Tomas Sosa <tomassosa.23@gmail.com>
Date: Sun, 3 May 2026 17:50:23 -0500
Subject: [PATCH 34/58] Use temporary solution for protobuf

---
 benchmarks/bic_pid/config.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/benchmarks/bic_pid/config.yml b/benchmarks/bic_pid/config.yml
index e529c2ee..c350714c 100644
--- a/benchmarks/bic_pid/config.yml
+++ b/benchmarks/bic_pid/config.yml
@@ -48,6 +48,7 @@ bench:bic_pid:
     - find sim_output/bic_pid/${DETECTOR_CONFIG} | head -50 || true
     - ls -lrtha
   script:
+    - export TEMPORARILY_DISABLE_PROTOBUF_VERSION_CHECK=true
     - python -m pip install snakemake
     - python -c "import google.protobuf, tensorflow as tf, tf2onnx; print('protobuf', google.protobuf.__version__); print('TF OK:', tf.__version__); print('tf2onnx OK:', tf2onnx.__version__)"
     - snakemake $SNAKEMAKE_FLAGS --cores 1 results/${DETECTOR_CONFIG}/bic_pid

From c6ad32bbd7ac7f47e75537ae31bac3f008b37492 Mon Sep 17 00:00:00 2001
From: Tomas Sosa <tomassosa.23@gmail.com>
Date: Sun, 3 May 2026 18:26:41 -0500
Subject: [PATCH 35/58] Inspect mismatch error

---
 benchmarks/bic_pid/config.yml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/benchmarks/bic_pid/config.yml b/benchmarks/bic_pid/config.yml
index c350714c..b6ccbaa0 100644
--- a/benchmarks/bic_pid/config.yml
+++ b/benchmarks/bic_pid/config.yml
@@ -50,6 +50,8 @@ bench:bic_pid:
   script:
     - export TEMPORARILY_DISABLE_PROTOBUF_VERSION_CHECK=true
     - python -m pip install snakemake
+    - python -c "import google.protobuf.runtime_version as rv, pathlib; print(rv.__file__); text = pathlib.Path(rv.__file__).read_text(); print('flag_present=', 'TEMPORARILY_DISABLE_PROTOBUF_VERSION_CHECK' in text)"
+    - python -c "import os; print('TEMP FLAG =', os.getenv('TEMPORARILY_DISABLE_PROTOBUF_VERSION_CHECK'))"
     - python -c "import google.protobuf, tensorflow as tf, tf2onnx; print('protobuf', google.protobuf.__version__); print('TF OK:', tf.__version__); print('tf2onnx OK:', tf2onnx.__version__)"
     - snakemake $SNAKEMAKE_FLAGS --cores 1 results/${DETECTOR_CONFIG}/bic_pid
 

From cc3a60e84c323657a351ddfa4dbd494cd7e0ce00 Mon Sep 17 00:00:00 2001
From: Tomas Sosa <tomassosa.23@gmail.com>
Date: Wed, 13 May 2026 12:54:23 -0500
Subject: [PATCH 36/58] Test new protobuf

---
 benchmarks/bic_pid/config.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/benchmarks/bic_pid/config.yml b/benchmarks/bic_pid/config.yml
index b6ccbaa0..ff8009d2 100644
--- a/benchmarks/bic_pid/config.yml
+++ b/benchmarks/bic_pid/config.yml
@@ -20,7 +20,7 @@ sim:bic_pid:
           "70 79",
           "80 89",
           "90 99",
-        ]
+        ] 
   script:
     - export DETECTOR_CONFIG=epic_craterlake
     - |

From 4e26ec786698613d8afb59cc0f6e6b168cf0e7f3 Mon Sep 17 00:00:00 2001
From: Tomas Sosa <tomassosa.23@gmail.com>
Date: Sun, 17 May 2026 09:05:53 -0500
Subject: [PATCH 37/58] Test new environment solution

---
 benchmarks/bic_pid/config.yml       | 18 +++++++++++-------
 benchmarks/bic_pid/requirements.txt |  7 ++++++-
 2 files changed, 17 insertions(+), 8 deletions(-)

diff --git a/benchmarks/bic_pid/config.yml b/benchmarks/bic_pid/config.yml
index ff8009d2..3bef79b4 100644
--- a/benchmarks/bic_pid/config.yml
+++ b/benchmarks/bic_pid/config.yml
@@ -33,7 +33,7 @@ bench:bic_pid:
   stage: benchmarks
   needs:
     - ["sim:bic_pid"]
-  image: $BENCHMARKS_REGISTRY/eic_tf$BENCHMARKS_SIGIL$BENCHMARKS_TAG
+  image: $BENCHMARKS_REGISTRY/eic_ci$BENCHMARKS_SIGIL$BENCHMARKS_TAG
   variables:
     CUDA_VISIBLE_DEVICES: ""
     DETECTOR_CONFIG: epic_craterlake
@@ -48,13 +48,17 @@ bench:bic_pid:
     - find sim_output/bic_pid/${DETECTOR_CONFIG} | head -50 || true
     - ls -lrtha
   script:
-    - export TEMPORARILY_DISABLE_PROTOBUF_VERSION_CHECK=true
-    - python -m pip install snakemake
-    - python -c "import google.protobuf.runtime_version as rv, pathlib; print(rv.__file__); text = pathlib.Path(rv.__file__).read_text(); print('flag_present=', 'TEMPORARILY_DISABLE_PROTOBUF_VERSION_CHECK' in text)"
-    - python -c "import os; print('TEMP FLAG =', os.getenv('TEMPORARILY_DISABLE_PROTOBUF_VERSION_CHECK'))"
-    - python -c "import google.protobuf, tensorflow as tf, tf2onnx; print('protobuf', google.protobuf.__version__); print('TF OK:', tf.__version__); print('tf2onnx OK:', tf2onnx.__version__)"
-    - snakemake $SNAKEMAKE_FLAGS --cores 1 results/${DETECTOR_CONFIG}/bic_pid
+    - |
+      python -m venv .venv
+      source .venv/bin/activate
+
+      python -m pip install --upgrade pip setuptools wheel
+      python -m pip install snakemake
+      python -m pip install -r benchmarks/bic_pid/requirements.txt
+
+      python -c "import tensorflow as tf, tf2onnx, pandas as pd, numpy as np, matplotlib, pyarrow; print('TF OK:', tf.__version__); print('tf2onnx OK:', tf2onnx.__version__)"
 
+      snakemake $SNAKEMAKE_FLAGS --cores 1 results/${DETECTOR_CONFIG}/bic_pid
 
 collect_results:bic_pid:
   extends: .det_benchmark
diff --git a/benchmarks/bic_pid/requirements.txt b/benchmarks/bic_pid/requirements.txt
index 88e48536..edf56c11 100644
--- a/benchmarks/bic_pid/requirements.txt
+++ b/benchmarks/bic_pid/requirements.txt
@@ -1 +1,6 @@
-tf2onnx==1.17.0
\ No newline at end of file
+tensorflow-cpu==2.20.0
+tf2onnx==1.17.0
+numpy
+pandas
+matplotlib
+pyarrow
\ No newline at end of file

From 6891692e3d72abe0c9b87fc8e7f018c23c3c3e38 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Sun, 17 May 2026 14:07:40 +0000
Subject: [PATCH 38/58] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 benchmarks/bic_pid/Snakefile        | 2 +-
 benchmarks/bic_pid/bic_pid.org      | 2 +-
 benchmarks/bic_pid/requirements.txt | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/benchmarks/bic_pid/Snakefile b/benchmarks/bic_pid/Snakefile
index 379667a2..eaeec6d0 100644
--- a/benchmarks/bic_pid/Snakefile
+++ b/benchmarks/bic_pid/Snakefile
@@ -126,4 +126,4 @@ INPUT_ELECTRONS="{input.electrons}" \
 INPUT_PIONS="{input.pions}" \
 OUTPUT_DIR={output} \
 python {input.script}
-"""
\ No newline at end of file
+"""
diff --git a/benchmarks/bic_pid/bic_pid.org b/benchmarks/bic_pid/bic_pid.org
index 35412310..6ef1e0c1 100644
--- a/benchmarks/bic_pid/bic_pid.org
+++ b/benchmarks/bic_pid/bic_pid.org
@@ -583,4 +583,4 @@ fig.savefig(f"{plotdir}/ML_rejection.pdf")
 plt.close(fig)
 
 print("Done with this eta bin")
-#+end_src
\ No newline at end of file
+#+end_src
diff --git a/benchmarks/bic_pid/requirements.txt b/benchmarks/bic_pid/requirements.txt
index edf56c11..985e8845 100644
--- a/benchmarks/bic_pid/requirements.txt
+++ b/benchmarks/bic_pid/requirements.txt
@@ -3,4 +3,4 @@ tf2onnx==1.17.0
 numpy
 pandas
 matplotlib
-pyarrow
\ No newline at end of file
+pyarrow

From 36cce02ba859275d1be3be7f80ecdae07454f722 Mon Sep 17 00:00:00 2001
From: Tomas Sosa <tomassosa.23@gmail.com>
Date: Sun, 17 May 2026 13:44:13 -0500
Subject: [PATCH 39/58] Add code to generate inputs

---
 benchmarks/bic_pid/bic_pid.org      | 672 ++++++++++++++++++++++++++++
 benchmarks/bic_pid/requirements.txt |   9 +-
 2 files changed, 680 insertions(+), 1 deletion(-)

diff --git a/benchmarks/bic_pid/bic_pid.org b/benchmarks/bic_pid/bic_pid.org
index 35412310..f7d1f159 100644
--- a/benchmarks/bic_pid/bic_pid.org
+++ b/benchmarks/bic_pid/bic_pid.org
@@ -172,6 +172,208 @@ def make_dataset(fields):
     return dataset.with_options(options)
 #+end_src
 
+* Podio helpers
+
+#+begin_src jupyter-python :results silent
+class PodioData:
+    def __init__(self, events, branch, cut=None, default_vector="momentum"):
+        self.events = events
+        self.data = events[branch]
+        self.branch = branch
+        self.cut = cut
+        self.default_vector = default_vector
+
+    def __getattr__(self, var):
+        return self.get(var)
+
+    def filter(self, new_cut):
+        if self.cut is not None:
+            new_cut = np.logical_and(new_cut, self.cut)
+        return PodioData(
+            self.events,
+            self.branch,
+            cut=new_cut,
+            default_vector=self.default_vector,
+        )
+
+    def get(self, var, subvars=None, extra_cut=None):
+        if subvars is None:
+            subvars = []
+
+        if len(subvars):
+            result = []
+            for v in subvars:
+                fullvar = f"{var}.{v}"
+                result.append(self.get(fullvar, extra_cut=extra_cut))
+            return result
+
+        result = self.data[f"{self.branch}.{var}"]
+
+        if self.cut is not None or extra_cut is not None:
+            if self.cut is not None and extra_cut is not None:
+                cut = np.logical_and(self.cut, extra_cut)
+            elif self.cut is not None:
+                cut = self.cut
+            else:
+                cut = extra_cut
+            return result[cut]
+
+        return result
+
+    def get_vector(self, var=None, dim=None, extra_cut=None):
+        if var is None:
+            var = self.default_vector
+        if dim is None:
+            dim = ["x", "y", "z"]
+        return [x for x in self.get(var, subvars=dim, extra_cut=extra_cut)]
+
+    def hypot(self, var=None, dim=None, extra_cut=None):
+        if dim is None:
+            dim = ["x", "y", "z"]
+        if not len(dim):
+            return 0.0
+        return hypot(self, var=var, dim=dim, extra_cut=extra_cut)
+
+    def azimuthal_angle(self, var=None, extra_cut=None):
+        return azimuthal_angle(self, var=var, extra_cut=extra_cut)
+
+    def polar_angle(self, var=None, extra_cut=None):
+        return polar_angle(self, var=var, extra_cut=extra_cut)
+
+    def eta(self, var=None, extra_cut=None):
+        return eta(self, var=var, extra_cut=extra_cut)
+
+    def momentum(self, extra_cut=None):
+        return self.hypot("momentum", extra_cut=extra_cut)
+
+    def transverse(self, var=None, extra_cut=None):
+        return self.hypot(var, dim=["x", "y"], extra_cut=extra_cut)
+
+
+def _get_components(vector, **kwargs):
+    if hasattr(vector, "get_vector"):
+        return vector.get_vector(**kwargs)
+    return vector
+
+
+def hypot(vector, **kwargs):
+    components = _get_components(vector, **kwargs)
+    res = components[0] ** 2
+    for i in range(1, len(components)):
+        res = res + components[i] ** 2
+    return np.sqrt(res)
+
+
+def azimuthal_angle(vector, **kwargs):
+    if "dim" not in kwargs:
+        kwargs["dim"] = ["x", "y"]
+    components = _get_components(vector, **kwargs)
+    x = components[0]
+    y = components[1]
+    return np.arctan2(y, x)
+
+
+def polar_angle(vector, **kwargs):
+    components = _get_components(vector, **kwargs)
+    r = hypot(components)
+    z = components[2]
+    return np.arccos(z / r)
+
+
+def eta(vector, **kwargs):
+    theta = polar_angle(vector, **kwargs)
+    return -np.log(np.tan(theta / 2.0))
+#+end_src
+
+* Array and window helpers
+
+#+begin_src jupyter-python :results silent
+import dask
+import awkward as ak
+import dask_awkward as dak
+import numpy as np
+from dask_awkward.lib.core import map_partitions
+
+## numpy-style clip array between min-max
+def _clip(a, a_min, a_max):
+    ret = a
+    if a_min is not None:
+        is_outside = (a < a_min)
+        ret = ret * np.logical_not(is_outside) + a_min * is_outside
+    if a_max is not None:
+        is_outside = (a > a_max)
+        ret = ret * np.logical_not(is_outside) + a_max * is_outside
+    return ret
+
+
+class _ClipFn:
+    def __init__(self, **kwargs):
+        self.kwargs = kwargs
+
+    def __call__(self, array):
+        return _clip(array, self.kwargs["a_min"], self.kwargs["a_max"])
+
+
+def clip(array, a_min, a_max):
+    fn = _ClipFn(a_min=a_min, a_max=a_max)
+    return map_partitions(fn, array, label="clip", output_divisions=1, meta=array._meta)
+
+
+class _ArgsortFn:
+    def __init__(self, **kwargs):
+        self.kwargs = kwargs
+
+    def __call__(self, array):
+        return ak.argsort(array, **self.kwargs)
+
+
+def argsort(array, axis=-1, ascending=True, stable=True, highlevel=True, behavior=None):
+    if axis == 0:
+        raise NotImplementedError("axis=0 not implemented here")
+    fn = _ArgsortFn(
+        axis=axis,
+        ascending=ascending,
+        stable=stable,
+        behavior=behavior,
+    )
+    return map_partitions(fn, array, label="argsort", output_divisions=1)
+
+
+class Window:
+    def __init__(self, name, interval, unit=None, tolerance=0.02):
+        self.name = name
+        self.interval = interval
+        self.step = (interval[0] / 2.0, interval[1] / 2.0)
+        self.unit = unit
+        self.tolerance = tolerance
+
+    def linear_norm(self, values):
+        norm = (values - self.interval[0]) / (self.interval[1] - self.interval[0])
+
+        count = dak.sum(dak.num(norm))
+        underflow = dak.sum(dak.num(norm[norm < 0]))
+        overflow = dak.sum(dak.num(norm[norm > 1]))
+        count, underflow, overflow = dask.compute(count, underflow, overflow)
+
+        if underflow / count > self.tolerance:
+            self.interval[0] += self.step[0]
+            print(
+                f"Warning: large UNDERFLOW count in normalization window {self.name}: "
+                f"{underflow/count*100:.2f}%, growing the window to {self.interval} and trying again"
+            )
+            return self.linear_norm(values)
+
+        if overflow / count > self.tolerance:
+            self.interval[1] += self.step[1]
+            print(
+                f"Warning: large OVERFLOW count in normalization window {self.name}: "
+                f"{overflow/count*100:.2f}%, growing the window to {self.interval} and trying again"
+            )
+            return self.linear_norm(values)
+
+        return clip(norm, 0, 1)
+#+end_src
+
 * Models
 
 #+begin_src jupyter-python :results silent
@@ -258,6 +460,475 @@ print(f"  - output data directory: {datadir}")
 print(f"  - output plot directory: {plotdir}")
 #+end_src
 
+* E/p preprocessing
+
+#+begin_src jupyter-python :results silent
+import os
+
+import hist
+import dask_histogram as dh
+import boost_histogram as bh
+from matplotlib.ticker import MultipleLocator
+
+import uproot
+import awkward as ak
+import dask_awkward as dak
+import dask
+import pandas as pd
+
+## I/O bound so limit threads on large CPU linux nodes
+if "arm64" not in os.uname():
+    from multiprocessing.pool import ThreadPool
+    dask.config.set(pool=ThreadPool(6))
+
+kTargetEfficiencyEOverP = 0.97
+
+def read_input_list(path):
+    with open(path) as f:
+        return [line.strip() for line in f if line.strip()]
+
+electron_files = read_input_list(INPUT_ELECTRONS)
+pion_files = read_input_list(INPUT_PIONS)
+
+all_input_files = electron_files + pion_files
+#+end_src
+
+#+begin_src jupyter-python
+print("Loading ROOT files for E/p preprocessing:")
+for file in all_input_files[:10]:
+    print("  -", file)
+if len(all_input_files) > 10:
+    print(f"  ... and {len(all_input_files)-10} more files")
+#+end_src
+
+#+begin_src jupyter-python :results silent
+class ParticleData:
+    def __init__(self, h, efficiency=None, cut_idx=None):
+        if efficiency is None and cut_idx is None:
+            raise ValueError("Need either efficiency or cut index")
+        primary = cut_idx is None
+        self.count = h.sum()
+        self.norm_hist = hist.Hist(h / self.count)
+        if primary:
+            self.idx = self.find_ecut(efficiency)
+        else:
+            self.idx = cut_idx
+        self.e_cut = self.norm_hist.axes.centers[0][self.idx]
+        self.efficiency = np.sum(self.norm_hist.values()[self.idx:])
+        self.efficiency_error = np.sqrt(
+            self.count * self.efficiency * (1 - self.efficiency)
+        ) / self.count
+
+    def find_ecut(self, efficiency):
+        perc = np.cumsum(self.norm_hist.values())
+        idx = len(perc[perc < 1.0 - efficiency])
+        return idx
+
+
+class EcutSeparationData:
+    def __init__(self, max_layer, ehist, pihist, efficiency=kTargetEfficiencyEOverP):
+        self.max_layer = max_layer
+        self.electron = ParticleData(ehist, efficiency=efficiency)
+        self.pion = ParticleData(pihist, cut_idx=self.electron.idx)
+        self.count_e = self.electron.count
+        self.count_pi = self.pion.count
+        self.efficiency = self.electron.efficiency
+        self.efficiency_error = self.electron.efficiency_error
+        self.rejection = 1.0 / self.pion.efficiency
+        self.rejection_error = self.rejection**2 * self.pion.efficiency_error
+        self.e_cut = self.electron.e_cut
+
+
+class EcutSeparationResults:
+    def __init__(self):
+        self.raw = []
+        self.fields = [
+            "max_layer",
+            "count_e",
+            "count_pi",
+            "efficiency",
+            "efficiency_error",
+            "rejection",
+            "rejection_error",
+            "e_cut",
+        ]
+
+    def append(self, rejection):
+        for field in self.fields:
+            if not hasattr(self, field):
+                setattr(self, field, [])
+            getattr(self, field).append(getattr(rejection, field))
+        self.raw.append(rejection)
+
+    def to_pandas(self):
+        data = [getattr(self, field) for field in self.fields]
+        return pd.DataFrame({k: v for (k, v) in zip(self.fields, data)})
+#+end_src
+
+#+begin_src jupyter-python
+print("Building E/p preprocessing inputs")
+
+events = uproot.dask([f"{file}:events" for file in all_input_files])
+
+gen = PodioData(events, "MCParticles")
+scifi = PodioData(events, "EcalBarrelScFiRecHits")
+astropix = PodioData(events, "EcalBarrelImagingRecHits")
+
+hits_in_calo = ((dak.num(scifi.layer, axis=1) > 0) & (dak.num(astropix.layer, axis=1) > 0))
+is_electron = (gen.PDG[:, 0] == 11)
+is_pion = (gen.PDG[:, 0] == -211)
+
+gen_cut = gen.filter(hits_in_calo)
+scifi_e = scifi.filter(hits_in_calo & is_electron)
+scifi_pi = scifi.filter(hits_in_calo & is_pion)
+gen_e = gen_cut.filter(is_electron)
+gen_pi = gen_cut.filter(is_pion)
+#+end_src
+
+#+begin_src jupyter-python
+print("Making input diagnostic plots")
+
+fig, ax = plt.subplots(1, 3, figsize=(12, 4))
+hists = dask.compute({
+    r"$P$ (GeV)": dh.histogram(gen_cut.momentum()[:, 0], bins=200, range=(0, 11), histogram=bh.Histogram),
+    r"$\eta$": dh.histogram(gen_cut.eta()[:, 0], bins=200, range=(-2, 2), histogram=bh.Histogram),
+    r"$\phi$ (deg.)": dh.histogram(gen_cut.azimuthal_angle()[:, 0] / 3.1415 * 180, bins=200, range=(-180, 180), histogram=bh.Histogram),
+})[0]
+
+hists = {key: hist.Hist(hists[key]) for key in hists}
+for i, key in enumerate(hists):
+    hists[key].plot1d(ax=ax[i], ls="-", color="darkblue")
+    ax[i].set_xlabel(key)
+
+fig.savefig(f"{plotdir}/diagnostic_input.pdf")
+plt.close(fig)
+#+end_src
+
+#+begin_src jupyter-python
+print("Computing E/p by layer")
+
+edep_e = scifi_e.energy
+layer_e = scifi_e.layer
+
+edep_pi = scifi_pi.energy
+layer_pi = scifi_pi.layer
+
+max_layer = dak.max(layer_pi).compute()
+print(f"Max ScFi layer = {max_layer}")
+
+mom_e = gen_e.momentum()[:, 0]
+ratio_e = [dak.sum(edep_e[layer_e <= x] / mom_e, axis=1) for x in range(1, max_layer + 1)]
+
+mom_pi = gen_pi.momentum()[:, 0]
+ratio_pi = [dak.sum(edep_pi[layer_pi <= x] / mom_pi, axis=1) for x in range(1, max_layer + 1)]
+#+end_src
+
+#+begin_src jupyter-python
+print("Building histograms for E/p scan")
+
+e_histo = []
+pi_histo = []
+
+for x in range(max_layer):
+    e_histo.append(
+        dh.histogram(ratio_e[x], bins=1000, range=(0, 1.5), histogram=bh.Histogram)
+    )
+    pi_histo.append(
+        dh.histogram(ratio_pi[x], bins=1000, range=(0, 1.5), histogram=bh.Histogram)
+    )
+
+res_e = dask.compute(*e_histo)
+res_pi = dask.compute(*pi_histo)
+#+end_src
+
+#+begin_src jupyter-python
+print("Plotting E/p scan")
+
+fig, ax = plt.subplots(int((max_layer + 2) / 3), 3, figsize=(12, 18))
+
+results_eop = EcutSeparationResults()
+
+for idx in range(len(res_e)):
+    current_layer = idx + 1
+    layer_result = EcutSeparationData(
+        current_layer, res_e[idx], res_pi[idx], efficiency=kTargetEfficiencyEOverP
+    )
+    results_eop.append(layer_result)
+
+    subax = ax[int(idx / 3), idx % 3]
+    stack = hist.Stack.from_dict({
+        "$e$": layer_result.electron.norm_hist,
+        "$\\pi^-$": layer_result.pion.norm_hist,
+    })
+    stack.plot(ax=subax, alpha=0.6, histtype="fill")
+    subax.axvline(x=layer_result.e_cut, color="k", ls="--", lw=2)
+    subax.set_xlabel("E/P")
+    subax.legend()
+    subax.text(
+        0.4, kTargetEfficiencyEOverP,
+        "\n".join([
+            rf"$layer \leq {current_layer}$",
+            rf"$\epsilon_e = {layer_result.efficiency:.2f} \pm {layer_result.efficiency_error:.2e}$",
+            rf"$R_\pi = {layer_result.rejection:.2f} \pm {layer_result.rejection_error:.2e}$",
+        ]),
+        transform=subax.transAxes,
+        fontsize=10,
+        va="top",
+        ha="center",
+    )
+
+fig.savefig(f"{plotdir}/EoverP_scan.pdf")
+plt.close(fig)
+#+end_src
+
+#+begin_src jupyter-python
+print(f"Saving E/p results to {datadir}/EoverP_results.csv")
+
+df_eop = results_eop.to_pandas()
+df_eop_sorted = df_eop.sort_values("rejection", ascending=False)
+df_eop_sorted.to_csv(f"{datadir}/EoverP_results.csv", index=False)
+#+end_src
+
+#+begin_src jupyter-python
+print("Making E/p optimization overview plot")
+
+prop_cycle = plt.rcParams["axes.prop_cycle"]
+colors = prop_cycle.by_key()["color"]
+box_props = dict(boxstyle="round", facecolor="white", alpha=0.5)
+
+fig, ax_cut = plt.subplots(figsize=(8, 8))
+ax_rejection = ax_cut.twinx()
+ax_rejection.set_yscale("log")
+
+ax_cut.plot(df_eop.max_layer, df_eop.e_cut, ls="-", color=colors[0])
+ax_rejection.errorbar(
+    df_eop.max_layer,
+    df_eop.rejection,
+    yerr=df_eop.rejection_error,
+    fmt="o",
+    capsize=3,
+    color=colors[1],
+    label="$R_\\pi$",
+)
+
+ax_cut.set_xlabel("Max ScFi Layer", fontsize=20)
+ax_cut.set_ylabel("E/p Cut Position", color=colors[0], fontsize=22)
+
+ax_rejection.grid(axis="both", which="both", ls=":")
+ax_rejection.xaxis.set_major_locator(MultipleLocator(5))
+ax_rejection.xaxis.set_minor_locator(MultipleLocator(1))
+ax_rejection.set_ylabel("Rejection Factor $R_\\pi$", color=colors[1], fontsize=20)
+
+ax_cut.set_title("Optimal $E/p$ cut versus max ScFi layer", fontsize=20)
+ax_cut.tick_params(labelsize=15)
+ax_rejection.tick_params(labelsize=15)
+ax_cut.text(
+    0.5,
+    0.03,
+    rf"$\epsilon_{{e}}\geq {kTargetEfficiencyEOverP*100.:.2f}\%$",
+    transform=ax_cut.transAxes,
+    fontsize=20,
+    va="bottom",
+    ha="center",
+    bbox=box_props,
+)
+
+fig.subplots_adjust(left=0.15, right=0.85)
+fig.savefig(f"{plotdir}/EoverP_optimization.pdf")
+plt.close(fig)
+
+print("Finished E/p preprocessing")
+#+end_src
+
+* Feature preprocessing
+
+#+begin_src jupyter-python :results silent
+pd.set_option("display.min_rows", 50)
+
+def data_features(data, n_hits=50, ltype="img", lval="0", loffset=0):
+    # raw hit r, eta, phi
+    r_h = data.hypot()
+    eta_h = data.eta()
+    phi_h = data.azimuthal_angle()
+
+    # raw hit normalized energy
+    e_tot = dak.sum(data.energy, axis=1)
+    e_norm = data.energy / e_tot
+
+    # logarithmic weighting based on hit energy
+    weights = clip(np.log(e_norm) + 5.6, 0, None)
+    tot_weight = dak.sum(weights, axis=1)
+    weights = weights / tot_weight
+
+    # calculate central xyz hit position based on the weight
+    x, y, z = data.get_vector("position")
+    xc = dak.sum(x * weights, axis=1)
+    yc = dak.sum(y * weights, axis=1)
+    zc = dak.sum(z * weights, axis=1)
+
+    # calculate central hit r, eta, phi
+    r_c = hypot([xc, yc, zc])
+    eta_c = eta([xc, yc, zc], r=r_c)
+    phi_c = azimuthal_angle([xc, yc, zc])
+
+    dphi = phi_h - phi_c
+    dphi_low = (dphi < -math.pi) * 2.0 * math.pi
+    dphi_high = (dphi > math.pi) * 2.0 * math.pi
+    dphi_corr = dphi + dphi_low - dphi_high
+    dsphi = np.sin(dphi_corr * 0.5)
+
+    # normalize and bind to window
+    r_norm = kWinR0.linear_norm(r_h)
+    eta_norm = kWinEta.linear_norm(eta_h - eta_c)
+    phi_norm = kWinPhi.linear_norm(dsphi)
+
+    norm_data = {
+        "eh": e_norm,
+        "r0": r_norm,
+        "eta": eta_norm,
+        "phi": phi_norm,
+    }
+
+    min_layer, max_layer = dask.compute(dak.min(data.layer), dak.max(data.layer))
+    n_events = len(e_norm)
+
+    # sort hits by descending hit energy
+    sort_idx = argsort(e_norm, ascending=False)
+
+    sorted_data = {
+        key: [
+            dak.pad_none(
+                norm_data[key][sort_idx][data.layer[sort_idx] == layer],
+                n_hits,
+                clip=True,
+            )
+            for layer in range(min_layer, max_layer + 1)
+        ]
+        for key in norm_data
+    }
+
+    computed_data = dask.compute(sorted_data)[0]
+
+    raw_df = ak.to_dataframe(
+        ak.Array({
+            key: ak.flatten(ak.concatenate(computed_data[key], axis=1))
+            for key in computed_data
+        })
+    ).astype(np.float32).fillna(0)
+
+    index = [
+        [ev for ev in range(1, n_events + 1)],
+        [ltype],
+        [layer for layer in range(min_layer + loffset, max_layer + loffset + 1)],
+        [hit for hit in range(1, n_hits + 1)],
+    ]
+    index = pd.MultiIndex.from_product(index, names=["event", "ltype", "layer", "hit"])
+
+    indexed_df = pd.DataFrame(
+        {key: raw_df[key].values for key in raw_df.keys()},
+        index=index,
+    )
+    indexed_df.loc[:, "lval"] = np.int32(lval)
+
+    return indexed_df
+#+end_src
+
+#+begin_src jupyter-python
+print("Preparing feature-generation inputs")
+
+# Rebuild PodioData objects with position as default vector for hit features
+gen = PodioData(events, "MCParticles")
+scifi = PodioData(events, "EcalBarrelScFiRecHits", default_vector="position")
+astropix = PodioData(events, "EcalBarrelImagingRecHits", default_vector="position")
+
+hits_in_calo = ((dak.num(scifi.layer, axis=1) > 0) & (dak.num(astropix.layer, axis=1) > 0))
+electron_or_pion = (gen.PDG[:, 0] == 11) | (gen.PDG[:, 0] == -211)
+
+print("Loading E/p cut results")
+cutdf = pd.read_csv(f"{datadir}/EoverP_results.csv").sort_values("rejection", ascending=False)
+results_EoverP = OrderedDict({key: cutdf[key].iloc[0] for key in cutdf.columns})
+print(results_EoverP)
+#+end_src
+
+#+begin_src jupyter-python
+print("Defining normalization windows")
+kWinEta = Window("eta", [-0.3, 0.3])
+kWinPhi = Window("phi", [-0.4, 0.4], unit="rad")
+kWinR0 = Window("R0", [500, 2000], unit="mm")
+#+end_src
+
+#+begin_src jupyter-python
+print("Applying E/p cut before feature generation")
+
+mom = gen.momentum()[:, 0]
+passes_eoverp_cut = (
+    dak.sum(scifi.energy[scifi.layer <= results_EoverP["max_layer"]] / mom, axis=1)
+    > results_EoverP["e_cut"]
+)
+
+gen_good = gen.filter(hits_in_calo & electron_or_pion & passes_eoverp_cut)
+scifi_good = scifi.filter(hits_in_calo & electron_or_pion & passes_eoverp_cut)
+astropix_good = astropix.filter(hits_in_calo & electron_or_pion & passes_eoverp_cut)
+#+end_src
+
+#+begin_src jupyter-python
+print("Creating feature data structures (this may take a while)")
+
+print(" --> creating Astropix feature table")
+df_astropix = data_features(astropix_good, n_hits=50, ltype="img", lval=0)
+
+print(" --> creating SciFi feature table")
+df_scifi = data_features(
+    scifi_good,
+    n_hits=50,
+    ltype="scfi",
+    lval=1,
+    loffset=dak.max(astropix_good.layer).compute(),
+)
+
+# keep the same behavior as the original script
+df_scifi.eta = np.float32(0.0)
+
+print(" --> merging feature tables")
+df_both = (
+    pd.concat([df_astropix.reset_index(), df_scifi.reset_index()], ignore_index=True)
+      .set_index(["event", "ltype", "layer", "hit"])
+      .sort_index()
+)
+
+print(f"Saving feature table to {datadir}/hits.snappy.parquet")
+df_both.to_parquet(f"{datadir}/hits.snappy.parquet", compression="snappy")
+#+end_src
+
+#+begin_src jupyter-python
+print("Formatting labels")
+
+padded_PDG = dak.map_partitions(ak.pad_none, gen_good.PDG, 1, axis=1)
+padded_mom = dak.map_partitions(ak.pad_none, gen_good.momentum(), 1, axis=1)
+padded_mass = dak.map_partitions(ak.pad_none, gen_good.mass, 1, axis=1)
+
+pdg0 = padded_PDG[:, 0]
+moment0 = padded_mom[:, 0]
+mass0 = padded_mass[:, 0]
+
+pdg0_filled = dak.map_partitions(ak.fill_none, pdg0, 0)
+moment0_filled = dak.map_partitions(ak.fill_none, moment0, 0.0)
+mass0_filled = dak.map_partitions(ak.fill_none, mass0, 0.0)
+
+mc_pdg, mc_p, mc_mass = dask.compute(pdg0_filled, moment0_filled, mass0_filled)
+
+df_mc = ak.to_dataframe(
+    ak.Array({
+        "PDG": mc_pdg,
+        "P": mc_p,
+        "mass": mc_mass,
+    })
+).fillna(0)
+
+print(f"Saving labels to {datadir}/labels.snappy.parquet")
+df_mc.to_parquet(f"{datadir}/labels.snappy.parquet", compression="snappy")
+#+end_src
+
 * Load datasets
 
 #+begin_src jupyter-python
@@ -365,6 +1036,7 @@ with strategy.scope():
     model.compile(
         optimizer=keras.optimizers.Adam(learning_rate=1e-3),
         loss=keras.losses.SparseCategoricalCrossentropy(from_logits=False),
+        metrics=["accuracy"],
         weighted_metrics=["accuracy"],
     )
     history = model.fit(
diff --git a/benchmarks/bic_pid/requirements.txt b/benchmarks/bic_pid/requirements.txt
index edf56c11..697a6c44 100644
--- a/benchmarks/bic_pid/requirements.txt
+++ b/benchmarks/bic_pid/requirements.txt
@@ -3,4 +3,11 @@ tf2onnx==1.17.0
 numpy
 pandas
 matplotlib
-pyarrow
\ No newline at end of file
+pyarrow
+uproot>=5.2.0
+awkward>=2.4.0
+dask
+dask-awkward
+hist
+dask-histogram
+boost-histogram
\ No newline at end of file

From 218219cb42e0811c68e8531f2df5e128cd795024 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Sun, 17 May 2026 18:46:26 +0000
Subject: [PATCH 40/58] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 benchmarks/bic_pid/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/benchmarks/bic_pid/requirements.txt b/benchmarks/bic_pid/requirements.txt
index 697a6c44..975df6b7 100644
--- a/benchmarks/bic_pid/requirements.txt
+++ b/benchmarks/bic_pid/requirements.txt
@@ -10,4 +10,4 @@ dask
 dask-awkward
 hist
 dask-histogram
-boost-histogram
\ No newline at end of file
+boost-histogram

From 0ee50f8764d37468d51a08374ee45586ed095bc2 Mon Sep 17 00:00:00 2001
From: Tomas Sosa <tomassosa.23@gmail.com>
Date: Sun, 17 May 2026 14:13:43 -0500
Subject: [PATCH 41/58] Test again

---
 benchmarks/bic_pid/config.yml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/benchmarks/bic_pid/config.yml b/benchmarks/bic_pid/config.yml
index 3bef79b4..62f90e39 100644
--- a/benchmarks/bic_pid/config.yml
+++ b/benchmarks/bic_pid/config.yml
@@ -55,7 +55,6 @@ bench:bic_pid:
       python -m pip install --upgrade pip setuptools wheel
       python -m pip install snakemake
       python -m pip install -r benchmarks/bic_pid/requirements.txt
-
       python -c "import tensorflow as tf, tf2onnx, pandas as pd, numpy as np, matplotlib, pyarrow; print('TF OK:', tf.__version__); print('tf2onnx OK:', tf2onnx.__version__)"
 
       snakemake $SNAKEMAKE_FLAGS --cores 1 results/${DETECTOR_CONFIG}/bic_pid

From a4cac715c17da5faa229778441f0dd3a66cb204f Mon Sep 17 00:00:00 2001
From: Tomas Sosa <tomassosa.23@gmail.com>
Date: Sun, 17 May 2026 19:27:30 -0500
Subject: [PATCH 42/58] Use the correct environment

---
 benchmarks/bic_pid/config.yml       |  8 ++++++--
 benchmarks/bic_pid/requirements.txt | 22 +++++++++++-----------
 2 files changed, 17 insertions(+), 13 deletions(-)

diff --git a/benchmarks/bic_pid/config.yml b/benchmarks/bic_pid/config.yml
index 62f90e39..2fab7e17 100644
--- a/benchmarks/bic_pid/config.yml
+++ b/benchmarks/bic_pid/config.yml
@@ -49,16 +49,20 @@ bench:bic_pid:
     - ls -lrtha
   script:
     - |
-      python -m venv .venv
+      command -v python3.10
+      python3.10 -V
+
+      python3.10 -m venv .venv
       source .venv/bin/activate
 
       python -m pip install --upgrade pip setuptools wheel
       python -m pip install snakemake
       python -m pip install -r benchmarks/bic_pid/requirements.txt
-      python -c "import tensorflow as tf, tf2onnx, pandas as pd, numpy as np, matplotlib, pyarrow; print('TF OK:', tf.__version__); print('tf2onnx OK:', tf2onnx.__version__)"
 
+      python -c "import sys, uproot, awkward as ak, dask, dask_awkward as dak, numpy as np, pandas as pd, tensorflow as tf, tf2onnx; print(sys.version); print('uproot', uproot.__version__); print('awkward', ak.__version__); print('dask', dask.__version__); print('dask_awkward', dak.__version__); print('numpy', np.__version__); print('pandas', pd.__version__); print('TF', tf.__version__); print('tf2onnx', tf2onnx.__version__)"
       snakemake $SNAKEMAKE_FLAGS --cores 1 results/${DETECTOR_CONFIG}/bic_pid
 
+
 collect_results:bic_pid:
   extends: .det_benchmark
   stage: collect
diff --git a/benchmarks/bic_pid/requirements.txt b/benchmarks/bic_pid/requirements.txt
index 975df6b7..6cb9b27b 100644
--- a/benchmarks/bic_pid/requirements.txt
+++ b/benchmarks/bic_pid/requirements.txt
@@ -1,13 +1,13 @@
 tensorflow-cpu==2.20.0
 tf2onnx==1.17.0
-numpy
-pandas
-matplotlib
-pyarrow
-uproot>=5.2.0
-awkward>=2.4.0
-dask
-dask-awkward
-hist
-dask-histogram
-boost-histogram
+numpy==1.23.2
+pandas==2.2.3
+matplotlib==3.10.3
+pyarrow==20.0.0
+uproot==5.0.3
+awkward==2.0.8
+dask==2023.2.1
+dask-awkward==2023.2.0
+dask-histogram==2023.2.0
+boost-histogram==1.5.2
+hist==2.8.1
\ No newline at end of file

From 97506ac973cee0a9737510302f60dcd7622f40be Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Mon, 18 May 2026 00:27:41 +0000
Subject: [PATCH 43/58] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 benchmarks/bic_pid/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/benchmarks/bic_pid/requirements.txt b/benchmarks/bic_pid/requirements.txt
index 6cb9b27b..a6c261e6 100644
--- a/benchmarks/bic_pid/requirements.txt
+++ b/benchmarks/bic_pid/requirements.txt
@@ -10,4 +10,4 @@ dask==2023.2.1
 dask-awkward==2023.2.0
 dask-histogram==2023.2.0
 boost-histogram==1.5.2
-hist==2.8.1
\ No newline at end of file
+hist==2.8.1

From c01ce13d1a9f9568244a2bc07d98f6cb41510196 Mon Sep 17 00:00:00 2001
From: Tomas Sosa <tomassosa.23@gmail.com>
Date: Sun, 17 May 2026 20:12:25 -0500
Subject: [PATCH 44/58] Install environment

---
 benchmarks/bic_pid/config.yml      | 21 ++++++++-------------
 benchmarks/bic_pid/environment.yml | 16 ++++++++++++++++
 2 files changed, 24 insertions(+), 13 deletions(-)
 create mode 100644 benchmarks/bic_pid/environment.yml

diff --git a/benchmarks/bic_pid/config.yml b/benchmarks/bic_pid/config.yml
index 2fab7e17..ecbd247a 100644
--- a/benchmarks/bic_pid/config.yml
+++ b/benchmarks/bic_pid/config.yml
@@ -37,6 +37,7 @@ bench:bic_pid:
   variables:
     CUDA_VISIBLE_DEVICES: ""
     DETECTOR_CONFIG: epic_craterlake
+    MAMBA_ROOT_PREFIX: "$LOCAL_DATA_PATH/micromamba"
   before_script:
     - source .local/bin/env.sh
     - ls -lrtha
@@ -48,19 +49,13 @@ bench:bic_pid:
     - find sim_output/bic_pid/${DETECTOR_CONFIG} | head -50 || true
     - ls -lrtha
   script:
-    - |
-      command -v python3.10
-      python3.10 -V
-
-      python3.10 -m venv .venv
-      source .venv/bin/activate
-
-      python -m pip install --upgrade pip setuptools wheel
-      python -m pip install snakemake
-      python -m pip install -r benchmarks/bic_pid/requirements.txt
-
-      python -c "import sys, uproot, awkward as ak, dask, dask_awkward as dak, numpy as np, pandas as pd, tensorflow as tf, tf2onnx; print(sys.version); print('uproot', uproot.__version__); print('awkward', ak.__version__); print('dask', dask.__version__); print('dask_awkward', dak.__version__); print('numpy', np.__version__); print('pandas', pd.__version__); print('TF', tf.__version__); print('tf2onnx', tf2onnx.__version__)"
-      snakemake $SNAKEMAKE_FLAGS --cores 1 results/${DETECTOR_CONFIG}/bic_pid
+    - mkdir -p mm
+    - curl -Ls https://micro.mamba.pm/api/micromamba/linux-64/latest | tar -xvj -C mm
+    - mm/bin/micromamba create -y -p "$MAMBA_ROOT_PREFIX/envs/bicpid" -f benchmarks/bic_pid/environment.yml
+    - mm/bin/micromamba run -p "$MAMBA_ROOT_PREFIX/envs/bicpid" python -m pip install --upgrade pip
+    - mm/bin/micromamba run -p "$MAMBA_ROOT_PREFIX/envs/bicpid" python -m pip install snakemake tensorflow-cpu==2.20.0 tf2onnx==1.17.0
+    - mm/bin/micromamba run -p "$MAMBA_ROOT_PREFIX/envs/bicpid" python -c "import sys, uproot, awkward as ak, dask, dask_awkward as dak, numpy as np, pandas as pd, tensorflow as tf, tf2onnx; print(sys.version); print('uproot', uproot.__version__); print('awkward', ak.__version__); print('dask', dask.__version__); print('dask_awkward', dak.__version__); print('numpy', np.__version__); print('pandas', pd.__version__); print('TF', tf.__version__); print('tf2onnx', tf2onnx.__version__)"
+    - mm/bin/micromamba run -p "$MAMBA_ROOT_PREFIX/envs/bicpid" snakemake $SNAKEMAKE_FLAGS --cores 1 results/${DETECTOR_CONFIG}/bic_pid
 
 
 collect_results:bic_pid:
diff --git a/benchmarks/bic_pid/environment.yml b/benchmarks/bic_pid/environment.yml
new file mode 100644
index 00000000..1615d62e
--- /dev/null
+++ b/benchmarks/bic_pid/environment.yml
@@ -0,0 +1,16 @@
+channels:
+  - conda-forge
+dependencies:
+  - python=3.10
+  - pip
+  - numpy=1.23.2
+  - pandas=2.2.3
+  - matplotlib=3.10.3
+  - pyarrow=20.0.0
+  - uproot=5.0.3
+  - awkward=2.0.8
+  - dask=2023.2.1
+  - dask-awkward=2023.2.0
+  - dask-histogram=2023.2.0
+  - boost-histogram=1.5.2
+  - hist=2.8.1
\ No newline at end of file

From 23e1d6c48ef77ef199f76caf0759b58ee0d8eb18 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Mon, 18 May 2026 01:13:36 +0000
Subject: [PATCH 45/58] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 benchmarks/bic_pid/environment.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/benchmarks/bic_pid/environment.yml b/benchmarks/bic_pid/environment.yml
index 1615d62e..e787b06a 100644
--- a/benchmarks/bic_pid/environment.yml
+++ b/benchmarks/bic_pid/environment.yml
@@ -13,4 +13,4 @@ dependencies:
   - dask-awkward=2023.2.0
   - dask-histogram=2023.2.0
   - boost-histogram=1.5.2
-  - hist=2.8.1
\ No newline at end of file
+  - hist=2.8.1

From b4706c1402fd96fa9aa32eab67eab4a4711f1aee Mon Sep 17 00:00:00 2001
From: Tomas Sosa <tomassosa.23@gmail.com>
Date: Sun, 17 May 2026 20:15:31 -0500
Subject: [PATCH 46/58] Create environment again

---
 benchmarks/bic_pid/config.yml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/benchmarks/bic_pid/config.yml b/benchmarks/bic_pid/config.yml
index ecbd247a..9bad34b4 100644
--- a/benchmarks/bic_pid/config.yml
+++ b/benchmarks/bic_pid/config.yml
@@ -57,7 +57,6 @@ bench:bic_pid:
     - mm/bin/micromamba run -p "$MAMBA_ROOT_PREFIX/envs/bicpid" python -c "import sys, uproot, awkward as ak, dask, dask_awkward as dak, numpy as np, pandas as pd, tensorflow as tf, tf2onnx; print(sys.version); print('uproot', uproot.__version__); print('awkward', ak.__version__); print('dask', dask.__version__); print('dask_awkward', dak.__version__); print('numpy', np.__version__); print('pandas', pd.__version__); print('TF', tf.__version__); print('tf2onnx', tf2onnx.__version__)"
     - mm/bin/micromamba run -p "$MAMBA_ROOT_PREFIX/envs/bicpid" snakemake $SNAKEMAKE_FLAGS --cores 1 results/${DETECTOR_CONFIG}/bic_pid
 
-
 collect_results:bic_pid:
   extends: .det_benchmark
   stage: collect

From 348b159b8ba2bb5718c02b012d94b28e58fa06a7 Mon Sep 17 00:00:00 2001
From: Tomas Sosa <tomassosa.23@gmail.com>
Date: Sun, 17 May 2026 22:11:54 -0500
Subject: [PATCH 47/58] Solve python interpreter problem

---
 benchmarks/bic_pid/config.yml | 25 +++++++++++++++++--------
 1 file changed, 17 insertions(+), 8 deletions(-)

diff --git a/benchmarks/bic_pid/config.yml b/benchmarks/bic_pid/config.yml
index 9bad34b4..5c5b8644 100644
--- a/benchmarks/bic_pid/config.yml
+++ b/benchmarks/bic_pid/config.yml
@@ -48,14 +48,23 @@ bench:bic_pid:
     - mkdir -p "$SNAKEMAKE_OUTPUT_CACHE"
     - find sim_output/bic_pid/${DETECTOR_CONFIG} | head -50 || true
     - ls -lrtha
-  script:
-    - mkdir -p mm
-    - curl -Ls https://micro.mamba.pm/api/micromamba/linux-64/latest | tar -xvj -C mm
-    - mm/bin/micromamba create -y -p "$MAMBA_ROOT_PREFIX/envs/bicpid" -f benchmarks/bic_pid/environment.yml
-    - mm/bin/micromamba run -p "$MAMBA_ROOT_PREFIX/envs/bicpid" python -m pip install --upgrade pip
-    - mm/bin/micromamba run -p "$MAMBA_ROOT_PREFIX/envs/bicpid" python -m pip install snakemake tensorflow-cpu==2.20.0 tf2onnx==1.17.0
-    - mm/bin/micromamba run -p "$MAMBA_ROOT_PREFIX/envs/bicpid" python -c "import sys, uproot, awkward as ak, dask, dask_awkward as dak, numpy as np, pandas as pd, tensorflow as tf, tf2onnx; print(sys.version); print('uproot', uproot.__version__); print('awkward', ak.__version__); print('dask', dask.__version__); print('dask_awkward', dak.__version__); print('numpy', np.__version__); print('pandas', pd.__version__); print('TF', tf.__version__); print('tf2onnx', tf2onnx.__version__)"
-    - mm/bin/micromamba run -p "$MAMBA_ROOT_PREFIX/envs/bicpid" snakemake $SNAKEMAKE_FLAGS --cores 1 results/${DETECTOR_CONFIG}/bic_pid
+script:
+  - mkdir -p mm
+  - curl -Ls https://micro.mamba.pm/api/micromamba/linux-64/latest | tar -xvj -C mm
+  - mm/bin/micromamba create -y -p "$MAMBA_ROOT_PREFIX/envs/bicpid" -f benchmarks/bic_pid/environment.yml
+
+  - export BICPY="$MAMBA_ROOT_PREFIX/envs/bicpid/bin/python"
+  - export PYTHONNOUSERSITE=1
+  - unset PYTHONPATH
+
+  - "$BICPY" -V
+  - "$BICPY" -m pip install --upgrade pip
+  - "$BICPY" -m pip install snakemake tensorflow-cpu==2.20.0 tf2onnx==1.17.0
+
+  - "$BICPY" -c "import sys, uproot, awkward as ak, dask, dask_awkward as dak, numpy as np, pandas as pd, tensorflow as tf, tf2onnx; print(sys.version); print('uproot', uproot.__version__); print('awkward', ak.__version__); print('dask', dask.__version__); print('dask_awkward', dak.__version__); print('numpy', np.__version__); print('pandas', pd.__version__); print('TF', tf.__version__); print('tf2onnx', tf2onnx.__version__)"
+
+  - "$BICPY" -m snakemake $SNAKEMAKE_FLAGS --cores 1 results/${DETECTOR_CONFIG}/bic_pid
+
 
 collect_results:bic_pid:
   extends: .det_benchmark

From 0d7d32154063e68deb9f4501e8976a9d5028507d Mon Sep 17 00:00:00 2001
From: Tomas Sosa <tomassosa.23@gmail.com>
Date: Sun, 17 May 2026 22:13:41 -0500
Subject: [PATCH 48/58] Solve syntax problem

---
 benchmarks/bic_pid/config.yml | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/benchmarks/bic_pid/config.yml b/benchmarks/bic_pid/config.yml
index 5c5b8644..2732d7d1 100644
--- a/benchmarks/bic_pid/config.yml
+++ b/benchmarks/bic_pid/config.yml
@@ -48,22 +48,22 @@ bench:bic_pid:
     - mkdir -p "$SNAKEMAKE_OUTPUT_CACHE"
     - find sim_output/bic_pid/${DETECTOR_CONFIG} | head -50 || true
     - ls -lrtha
-script:
-  - mkdir -p mm
-  - curl -Ls https://micro.mamba.pm/api/micromamba/linux-64/latest | tar -xvj -C mm
-  - mm/bin/micromamba create -y -p "$MAMBA_ROOT_PREFIX/envs/bicpid" -f benchmarks/bic_pid/environment.yml
+  script:
+    - mkdir -p mm
+    - curl -Ls https://micro.mamba.pm/api/micromamba/linux-64/latest | tar -xvj -C mm
+    - mm/bin/micromamba create -y -p "$MAMBA_ROOT_PREFIX/envs/bicpid" -f benchmarks/bic_pid/environment.yml
 
-  - export BICPY="$MAMBA_ROOT_PREFIX/envs/bicpid/bin/python"
-  - export PYTHONNOUSERSITE=1
-  - unset PYTHONPATH
+    - export BICPY="$MAMBA_ROOT_PREFIX/envs/bicpid/bin/python"
+    - export PYTHONNOUSERSITE=1
+    - unset PYTHONPATH
 
-  - "$BICPY" -V
-  - "$BICPY" -m pip install --upgrade pip
-  - "$BICPY" -m pip install snakemake tensorflow-cpu==2.20.0 tf2onnx==1.17.0
+    - "$BICPY" -V
+    - "$BICPY" -m pip install --upgrade pip
+    - "$BICPY" -m pip install snakemake tensorflow-cpu==2.20.0 tf2onnx==1.17.0
 
-  - "$BICPY" -c "import sys, uproot, awkward as ak, dask, dask_awkward as dak, numpy as np, pandas as pd, tensorflow as tf, tf2onnx; print(sys.version); print('uproot', uproot.__version__); print('awkward', ak.__version__); print('dask', dask.__version__); print('dask_awkward', dak.__version__); print('numpy', np.__version__); print('pandas', pd.__version__); print('TF', tf.__version__); print('tf2onnx', tf2onnx.__version__)"
+    - "$BICPY" -c "import sys, uproot, awkward as ak, dask, dask_awkward as dak, numpy as np, pandas as pd, tensorflow as tf, tf2onnx; print(sys.version); print('uproot', uproot.__version__); print('awkward', ak.__version__); print('dask', dask.__version__); print('dask_awkward', dak.__version__); print('numpy', np.__version__); print('pandas', pd.__version__); print('TF', tf.__version__); print('tf2onnx', tf2onnx.__version__)"
 
-  - "$BICPY" -m snakemake $SNAKEMAKE_FLAGS --cores 1 results/${DETECTOR_CONFIG}/bic_pid
+    - "$BICPY" -m snakemake $SNAKEMAKE_FLAGS --cores 1 results/${DETECTOR_CONFIG}/bic_pid
 
 
 collect_results:bic_pid:

From 170d5defdcf3704acc8995c85174734e81ef34b2 Mon Sep 17 00:00:00 2001
From: Tomas Sosa <tomassosa.23@gmail.com>
Date: Sun, 17 May 2026 22:18:38 -0500
Subject: [PATCH 49/58] Solve syntax problem again

---
 benchmarks/bic_pid/config.yml | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/benchmarks/bic_pid/config.yml b/benchmarks/bic_pid/config.yml
index 2732d7d1..56cf1c8f 100644
--- a/benchmarks/bic_pid/config.yml
+++ b/benchmarks/bic_pid/config.yml
@@ -49,22 +49,22 @@ bench:bic_pid:
     - find sim_output/bic_pid/${DETECTOR_CONFIG} | head -50 || true
     - ls -lrtha
   script:
-    - mkdir -p mm
-    - curl -Ls https://micro.mamba.pm/api/micromamba/linux-64/latest | tar -xvj -C mm
-    - mm/bin/micromamba create -y -p "$MAMBA_ROOT_PREFIX/envs/bicpid" -f benchmarks/bic_pid/environment.yml
-
-    - export BICPY="$MAMBA_ROOT_PREFIX/envs/bicpid/bin/python"
-    - export PYTHONNOUSERSITE=1
-    - unset PYTHONPATH
+    - |
+      mkdir -p mm
+      curl -Ls https://micro.mamba.pm/api/micromamba/linux-64/latest | tar -xvj -C mm
+      mm/bin/micromamba create -y -p "$MAMBA_ROOT_PREFIX/envs/bicpid" -f benchmarks/bic_pid/environment.yml
 
-    - "$BICPY" -V
-    - "$BICPY" -m pip install --upgrade pip
-    - "$BICPY" -m pip install snakemake tensorflow-cpu==2.20.0 tf2onnx==1.17.0
+      export BICPY="$MAMBA_ROOT_PREFIX/envs/bicpid/bin/python"
+      export PYTHONNOUSERSITE=1
+      unset PYTHONPATH
 
-    - "$BICPY" -c "import sys, uproot, awkward as ak, dask, dask_awkward as dak, numpy as np, pandas as pd, tensorflow as tf, tf2onnx; print(sys.version); print('uproot', uproot.__version__); print('awkward', ak.__version__); print('dask', dask.__version__); print('dask_awkward', dak.__version__); print('numpy', np.__version__); print('pandas', pd.__version__); print('TF', tf.__version__); print('tf2onnx', tf2onnx.__version__)"
+      "$BICPY" -V
+      "$BICPY" -m pip install --upgrade pip
+      "$BICPY" -m pip install snakemake tensorflow-cpu==2.20.0 tf2onnx==1.17.0
 
-    - "$BICPY" -m snakemake $SNAKEMAKE_FLAGS --cores 1 results/${DETECTOR_CONFIG}/bic_pid
+      "$BICPY" -c "import sys, uproot, awkward as ak, dask, dask_awkward as dak, numpy as np, pandas as pd, tensorflow as tf, tf2onnx; print(sys.version); print('uproot', uproot.__version__); print('awkward', ak.__version__); print('dask', dask.__version__); print('dask_awkward', dak.__version__); print('numpy', np.__version__); print('pandas', pd.__version__); print('TF', tf.__version__); print('tf2onnx', tf2onnx.__version__)"
 
+      "$BICPY" -m snakemake $SNAKEMAKE_FLAGS --cores 1 results/${DETECTOR_CONFIG}/bic_pid
 
 collect_results:bic_pid:
   extends: .det_benchmark

From 51d312a5f0ffcf4f716d9df334fc215dfae97e98 Mon Sep 17 00:00:00 2001
From: Tomas Sosa <tomassosa.23@gmail.com>
Date: Sun, 24 May 2026 22:48:40 -0500
Subject: [PATCH 50/58] Solve tensorflow compability problem

---
 benchmarks/bic_pid/config.yml      | 9 +++++----
 benchmarks/bic_pid/environment.yml | 5 +++--
 2 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/benchmarks/bic_pid/config.yml b/benchmarks/bic_pid/config.yml
index 56cf1c8f..52457b47 100644
--- a/benchmarks/bic_pid/config.yml
+++ b/benchmarks/bic_pid/config.yml
@@ -48,8 +48,8 @@ bench:bic_pid:
     - mkdir -p "$SNAKEMAKE_OUTPUT_CACHE"
     - find sim_output/bic_pid/${DETECTOR_CONFIG} | head -50 || true
     - ls -lrtha
-  script:
-    - |
+script:
+  - |
       mkdir -p mm
       curl -Ls https://micro.mamba.pm/api/micromamba/linux-64/latest | tar -xvj -C mm
       mm/bin/micromamba create -y -p "$MAMBA_ROOT_PREFIX/envs/bicpid" -f benchmarks/bic_pid/environment.yml
@@ -60,12 +60,13 @@ bench:bic_pid:
 
       "$BICPY" -V
       "$BICPY" -m pip install --upgrade pip
-      "$BICPY" -m pip install snakemake tensorflow-cpu==2.20.0 tf2onnx==1.17.0
+      "$BICPY" -m pip install "tensorflow-cpu==2.13.0" "tf2onnx==1.17.0"
 
-      "$BICPY" -c "import sys, uproot, awkward as ak, dask, dask_awkward as dak, numpy as np, pandas as pd, tensorflow as tf, tf2onnx; print(sys.version); print('uproot', uproot.__version__); print('awkward', ak.__version__); print('dask', dask.__version__); print('dask_awkward', dak.__version__); print('numpy', np.__version__); print('pandas', pd.__version__); print('TF', tf.__version__); print('tf2onnx', tf2onnx.__version__)"
+      "$BICPY" -c "import sys, numpy as np, awkward as ak, uproot, tensorflow as tf, tf2onnx; print(sys.version); print('numpy', np.__version__); print('awkward', ak.__version__); print('uproot', uproot.__version__); print('TF', tf.__version__); print('tf2onnx', tf2onnx.__version__)"
 
       "$BICPY" -m snakemake $SNAKEMAKE_FLAGS --cores 1 results/${DETECTOR_CONFIG}/bic_pid
 
+
 collect_results:bic_pid:
   extends: .det_benchmark
   stage: collect
diff --git a/benchmarks/bic_pid/environment.yml b/benchmarks/bic_pid/environment.yml
index e787b06a..a7abbbe7 100644
--- a/benchmarks/bic_pid/environment.yml
+++ b/benchmarks/bic_pid/environment.yml
@@ -3,7 +3,8 @@ channels:
 dependencies:
   - python=3.10
   - pip
-  - numpy=1.23.2
+  - snakemake=7.32.4
+  - numpy=1.24.3
   - pandas=2.2.3
   - matplotlib=3.10.3
   - pyarrow=20.0.0
@@ -13,4 +14,4 @@ dependencies:
   - dask-awkward=2023.2.0
   - dask-histogram=2023.2.0
   - boost-histogram=1.5.2
-  - hist=2.8.1
+  - hist=2.8.1
\ No newline at end of file

From a6f880bfa1de1a1d75d78bdc3bfcf0f88d6f7cb0 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Mon, 25 May 2026 03:48:53 +0000
Subject: [PATCH 51/58] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 benchmarks/bic_pid/environment.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/benchmarks/bic_pid/environment.yml b/benchmarks/bic_pid/environment.yml
index a7abbbe7..f35c2199 100644
--- a/benchmarks/bic_pid/environment.yml
+++ b/benchmarks/bic_pid/environment.yml
@@ -14,4 +14,4 @@ dependencies:
   - dask-awkward=2023.2.0
   - dask-histogram=2023.2.0
   - boost-histogram=1.5.2
-  - hist=2.8.1
\ No newline at end of file
+  - hist=2.8.1

From ff926b20b560ae198b7946524369e6ffe695a6fa Mon Sep 17 00:00:00 2001
From: Tomas Sosa <tomassosa.23@gmail.com>
Date: Sun, 24 May 2026 22:49:57 -0500
Subject: [PATCH 52/58] Solve problme of indexation

---
 benchmarks/bic_pid/config.yml | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/benchmarks/bic_pid/config.yml b/benchmarks/bic_pid/config.yml
index 52457b47..63e59863 100644
--- a/benchmarks/bic_pid/config.yml
+++ b/benchmarks/bic_pid/config.yml
@@ -48,23 +48,23 @@ bench:bic_pid:
     - mkdir -p "$SNAKEMAKE_OUTPUT_CACHE"
     - find sim_output/bic_pid/${DETECTOR_CONFIG} | head -50 || true
     - ls -lrtha
-script:
-  - |
-      mkdir -p mm
-      curl -Ls https://micro.mamba.pm/api/micromamba/linux-64/latest | tar -xvj -C mm
-      mm/bin/micromamba create -y -p "$MAMBA_ROOT_PREFIX/envs/bicpid" -f benchmarks/bic_pid/environment.yml
+  script:
+    - |
+        mkdir -p mm
+        curl -Ls https://micro.mamba.pm/api/micromamba/linux-64/latest | tar -xvj -C mm
+        mm/bin/micromamba create -y -p "$MAMBA_ROOT_PREFIX/envs/bicpid" -f benchmarks/bic_pid/environment.yml
 
-      export BICPY="$MAMBA_ROOT_PREFIX/envs/bicpid/bin/python"
-      export PYTHONNOUSERSITE=1
-      unset PYTHONPATH
+        export BICPY="$MAMBA_ROOT_PREFIX/envs/bicpid/bin/python"
+        export PYTHONNOUSERSITE=1
+        unset PYTHONPATH
 
-      "$BICPY" -V
-      "$BICPY" -m pip install --upgrade pip
-      "$BICPY" -m pip install "tensorflow-cpu==2.13.0" "tf2onnx==1.17.0"
+        "$BICPY" -V
+        "$BICPY" -m pip install --upgrade pip
+        "$BICPY" -m pip install "tensorflow-cpu==2.13.0" "tf2onnx==1.17.0"
 
-      "$BICPY" -c "import sys, numpy as np, awkward as ak, uproot, tensorflow as tf, tf2onnx; print(sys.version); print('numpy', np.__version__); print('awkward', ak.__version__); print('uproot', uproot.__version__); print('TF', tf.__version__); print('tf2onnx', tf2onnx.__version__)"
+        "$BICPY" -c "import sys, numpy as np, awkward as ak, uproot, tensorflow as tf, tf2onnx; print(sys.version); print('numpy', np.__version__); print('awkward', ak.__version__); print('uproot', uproot.__version__); print('TF', tf.__version__); print('tf2onnx', tf2onnx.__version__)"
 
-      "$BICPY" -m snakemake $SNAKEMAKE_FLAGS --cores 1 results/${DETECTOR_CONFIG}/bic_pid
+        "$BICPY" -m snakemake $SNAKEMAKE_FLAGS --cores 1 results/${DETECTOR_CONFIG}/bic_pid
 
 
 collect_results:bic_pid:

From d225eda98e4c369827709fc15710a1de69823fa9 Mon Sep 17 00:00:00 2001
From: Tomas Sosa <tomassosa.23@gmail.com>
Date: Mon, 25 May 2026 00:18:05 -0500
Subject: [PATCH 53/58] Solve snakemake problem

---
 benchmarks/bic_pid/config.yml      | 2 +-
 benchmarks/bic_pid/environment.yml | 3 +--
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/benchmarks/bic_pid/config.yml b/benchmarks/bic_pid/config.yml
index 63e59863..022f0a80 100644
--- a/benchmarks/bic_pid/config.yml
+++ b/benchmarks/bic_pid/config.yml
@@ -60,7 +60,7 @@ bench:bic_pid:
 
         "$BICPY" -V
         "$BICPY" -m pip install --upgrade pip
-        "$BICPY" -m pip install "tensorflow-cpu==2.13.0" "tf2onnx==1.17.0"
+        "$BICPY" -m pip install snakemake tensorflow-cpu==2.13.0 tf2onnx==1.17.0
 
         "$BICPY" -c "import sys, numpy as np, awkward as ak, uproot, tensorflow as tf, tf2onnx; print(sys.version); print('numpy', np.__version__); print('awkward', ak.__version__); print('uproot', uproot.__version__); print('TF', tf.__version__); print('tf2onnx', tf2onnx.__version__)"
 
diff --git a/benchmarks/bic_pid/environment.yml b/benchmarks/bic_pid/environment.yml
index f35c2199..bcef2315 100644
--- a/benchmarks/bic_pid/environment.yml
+++ b/benchmarks/bic_pid/environment.yml
@@ -3,7 +3,6 @@ channels:
 dependencies:
   - python=3.10
   - pip
-  - snakemake=7.32.4
   - numpy=1.24.3
   - pandas=2.2.3
   - matplotlib=3.10.3
@@ -14,4 +13,4 @@ dependencies:
   - dask-awkward=2023.2.0
   - dask-histogram=2023.2.0
   - boost-histogram=1.5.2
-  - hist=2.8.1
+  - hist=2.8.1
\ No newline at end of file

From 2c0b237328af841cbe5f18951a55fb3106e9727c Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Mon, 25 May 2026 05:18:14 +0000
Subject: [PATCH 54/58] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 benchmarks/bic_pid/environment.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/benchmarks/bic_pid/environment.yml b/benchmarks/bic_pid/environment.yml
index bcef2315..5eadce25 100644
--- a/benchmarks/bic_pid/environment.yml
+++ b/benchmarks/bic_pid/environment.yml
@@ -13,4 +13,4 @@ dependencies:
   - dask-awkward=2023.2.0
   - dask-histogram=2023.2.0
   - boost-histogram=1.5.2
-  - hist=2.8.1
\ No newline at end of file
+  - hist=2.8.1

From b92b42f17876aeba330042c049125e87f65cf211 Mon Sep 17 00:00:00 2001
From: Tomas Sosa <tomassosa.23@gmail.com>
Date: Mon, 25 May 2026 08:42:39 -0500
Subject: [PATCH 55/58] Solve PuLP compability

---
 benchmarks/bic_pid/config.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/benchmarks/bic_pid/config.yml b/benchmarks/bic_pid/config.yml
index 022f0a80..6b7c6b7d 100644
--- a/benchmarks/bic_pid/config.yml
+++ b/benchmarks/bic_pid/config.yml
@@ -60,7 +60,7 @@ bench:bic_pid:
 
         "$BICPY" -V
         "$BICPY" -m pip install --upgrade pip
-        "$BICPY" -m pip install snakemake tensorflow-cpu==2.13.0 tf2onnx==1.17.0
+        "$BICPY" -m pip install "snakemake==7.32.4" "pulp==2.7.0" "tensorflow-cpu==2.13.0" "tf2onnx==1.17.0"
 
         "$BICPY" -c "import sys, numpy as np, awkward as ak, uproot, tensorflow as tf, tf2onnx; print(sys.version); print('numpy', np.__version__); print('awkward', ak.__version__); print('uproot', uproot.__version__); print('TF', tf.__version__); print('tf2onnx', tf2onnx.__version__)"
 

From cf610b4ad85b98b4562334b4a70d4333c17bdaf5 Mon Sep 17 00:00:00 2001
From: Tomas Sosa <tomassosa.23@gmail.com>
Date: Mon, 25 May 2026 09:40:18 -0500
Subject: [PATCH 56/58] Continue solving PuLP installation

---
 benchmarks/bic_pid/config.yml | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/benchmarks/bic_pid/config.yml b/benchmarks/bic_pid/config.yml
index 6b7c6b7d..fd5f1261 100644
--- a/benchmarks/bic_pid/config.yml
+++ b/benchmarks/bic_pid/config.yml
@@ -52,7 +52,10 @@ bench:bic_pid:
     - |
         mkdir -p mm
         curl -Ls https://micro.mamba.pm/api/micromamba/linux-64/latest | tar -xvj -C mm
-        mm/bin/micromamba create -y -p "$MAMBA_ROOT_PREFIX/envs/bicpid" -f benchmarks/bic_pid/environment.yml
+
+        mm/bin/micromamba create -y \
+          -p "$MAMBA_ROOT_PREFIX/envs/bicpid" \
+          -f benchmarks/bic_pid/environment.yml
 
         export BICPY="$MAMBA_ROOT_PREFIX/envs/bicpid/bin/python"
         export PYTHONNOUSERSITE=1
@@ -60,12 +63,17 @@ bench:bic_pid:
 
         "$BICPY" -V
         "$BICPY" -m pip install --upgrade pip
-        "$BICPY" -m pip install "snakemake==7.32.4" "pulp==2.7.0" "tensorflow-cpu==2.13.0" "tf2onnx==1.17.0"
 
-        "$BICPY" -c "import sys, numpy as np, awkward as ak, uproot, tensorflow as tf, tf2onnx; print(sys.version); print('numpy', np.__version__); print('awkward', ak.__version__); print('uproot', uproot.__version__); print('TF', tf.__version__); print('tf2onnx', tf2onnx.__version__)"
+        "$BICPY" -m pip install \
+          "snakemake==7.32.4" \
+          "tensorflow-cpu==2.13.0" \
+          "tf2onnx==1.17.0"
 
-        "$BICPY" -m snakemake $SNAKEMAKE_FLAGS --cores 1 results/${DETECTOR_CONFIG}/bic_pid
+        "$BICPY" -m pip install --force-reinstall --no-deps "pulp==2.7.0"
 
+        "$BICPY" -c "import sys, numpy as np, awkward as ak, uproot, tensorflow as tf, tf2onnx, pulp; print(sys.version); print('numpy', np.__version__); print('awkward', ak.__version__); print('uproot', uproot.__version__); print('TF', tf.__version__); print('tf2onnx', tf2onnx.__version__); print('PuLP version =', getattr(pulp, '__version__', 'unknown')); print('has list_solvers =', hasattr(pulp, 'list_solvers')); print('has listSolvers =', hasattr(pulp, 'listSolvers')); print('module =', pulp.__file__)"
+
+        "$BICPY" -m snakemake $SNAKEMAKE_FLAGS --cores 1 results/${DETECTOR_CONFIG}/bic_pid
 
 collect_results:bic_pid:
   extends: .det_benchmark

From 3805f5be9797888f78971869a2f4971e1a490e75 Mon Sep 17 00:00:00 2001
From: Tomas Sosa <tomassosa.23@gmail.com>
Date: Mon, 25 May 2026 10:10:44 -0500
Subject: [PATCH 57/58] Add subprocess in snakefile root repo

---
 Snakefile | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Snakefile b/Snakefile
index 16e2fbe6..2f7cf8c3 100644
--- a/Snakefile
+++ b/Snakefile
@@ -2,6 +2,7 @@ configfile: "snakemake.yml"
 
 import functools
 import os
+import subprocess
 from snakemake.logging import logger
 
 

From 578748deea7e50d9206d6bff9c7ab5d9a2470150 Mon Sep 17 00:00:00 2001
From: Tomas Sosa <tomassosa.23@gmail.com>
Date: Mon, 25 May 2026 12:18:16 -0500
Subject: [PATCH 58/58] Add the line to use the correct environment

---
 benchmarks/bic_pid/config.yml | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/benchmarks/bic_pid/config.yml b/benchmarks/bic_pid/config.yml
index fd5f1261..36c34929 100644
--- a/benchmarks/bic_pid/config.yml
+++ b/benchmarks/bic_pid/config.yml
@@ -73,8 +73,12 @@ bench:bic_pid:
 
         "$BICPY" -c "import sys, numpy as np, awkward as ak, uproot, tensorflow as tf, tf2onnx, pulp; print(sys.version); print('numpy', np.__version__); print('awkward', ak.__version__); print('uproot', uproot.__version__); print('TF', tf.__version__); print('tf2onnx', tf2onnx.__version__); print('PuLP version =', getattr(pulp, '__version__', 'unknown')); print('has list_solvers =', hasattr(pulp, 'list_solvers')); print('has listSolvers =', hasattr(pulp, 'listSolvers')); print('module =', pulp.__file__)"
 
+        export PATH="$MAMBA_ROOT_PREFIX/envs/bicpid/bin:$PATH"
+        hash -r
+
         "$BICPY" -m snakemake $SNAKEMAKE_FLAGS --cores 1 results/${DETECTOR_CONFIG}/bic_pid
 
+
 collect_results:bic_pid:
   extends: .det_benchmark
   stage: collect