From 49cdd8ba667840a6ad4a6acf048cb0dca9501cd6 Mon Sep 17 00:00:00 2001
From: Student2 <student2@mdessolevictus.cern.ch>
Date: Tue, 9 Jun 2026 09:33:25 +0200
Subject: [PATCH 1/3] testing new print statement

---
 .../pythonizations/python/ROOT/_pythonization/_ml_dataloader.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/bindings/pyroot/pythonizations/python/ROOT/_pythonization/_ml_dataloader.py b/bindings/pyroot/pythonizations/python/ROOT/_pythonization/_ml_dataloader.py
index 3ab53859622e4..05f4a55a3ddca 100644
--- a/bindings/pyroot/pythonizations/python/ROOT/_pythonization/_ml_dataloader.py
+++ b/bindings/pyroot/pythonizations/python/ROOT/_pythonization/_ml_dataloader.py
@@ -4,6 +4,7 @@
 # Author: Vincenzo Eduardo Padulano, CERN 10/2024
 # Author: Martin Føll, University of Oslo (UiO) & CERN 01/2026
 # Author: Silia Taider, CERN 02/2026
+# Author: Jonah Ascoli, CERN 06/2026
 
 ################################################################################
 # Copyright (C) 1995-2026, Rene Brun and Fons Rademakers.                      #
@@ -161,6 +162,7 @@ def __init__(
         """
 
         from ROOT import RDF
+        print("This is jonah's testing branch")
 
         if rdataframes is None:
             rdataframes = []

From d875a11293457add97decf7ff141029d033a438f Mon Sep 17 00:00:00 2001
From: Student2 <student2@mdessolevictus.cern.ch>
Date: Thu, 11 Jun 2026 15:53:21 +0200
Subject: [PATCH 2/3] [tutorials][ML] Add resampling tutorial

This Pull request adds a tutorial to show the syntax of resampling
---
 .../ml_dataloader_resampling.py               | 84 +++++++++++++++++++
 1 file changed, 84 insertions(+)
 create mode 100644 tutorials/machine_learning/ml_dataloader_resampling.py

diff --git a/tutorials/machine_learning/ml_dataloader_resampling.py b/tutorials/machine_learning/ml_dataloader_resampling.py
new file mode 100644
index 0000000000000..bceee538d1549
--- /dev/null
+++ b/tutorials/machine_learning/ml_dataloader_resampling.py
@@ -0,0 +1,84 @@
+### \file
+### \ingroup tutorial_ml
+### \notebook -nodraw
+### Example of resampling when one class is underrepresented in the dataset.
+###
+### \macro_code
+### \macro_output
+### \author Jonah Ascoli
+
+import os
+
+import ROOT
+import torch
+
+torch.manual_seed(42)
+
+# Create an imbalanced dataset with two classes, one of which is underrepresented.
+# Here, we'll create two files, one with even numbers and one with odd numbers,
+# and then merge them to form a dataset with underrepresented odd numbers.
+ROOT.RDataFrame(100000).Define("b1", "(int) 2 * rdfentry_").Define("b2", "(int) b1%2").Snapshot("tree", "major.root")
+ROOT.RDataFrame(100).Define("b1", "(int) 2 * rdfentry_ + 1").Define("b2", "(int) b1%2").Snapshot("tree", "minor.root")
+df_major = ROOT.RDataFrame("tree", "major.root")
+df_minor = ROOT.RDataFrame("tree", "minor.root")
+
+batch_size = 16
+batches_in_memory = 10
+num_epochs = 10
+
+loss_fn = torch.nn.BCEWithLogitsLoss()
+
+
+def train_model(model, optimizer, dataloader):
+    train, val = dataloader.train_test_split(test_size=0.2)
+    for _ in range(num_epochs):
+        model.train()
+        for X, y in train.as_torch():
+            optimizer.zero_grad()
+            loss = loss_fn(model(X), y)
+            loss.backward()
+            optimizer.step()
+    losses = []
+    for X, y in val.as_torch():
+        with torch.no_grad():
+            loss = loss_fn(model(X), y)
+        losses.append(loss.item())
+    print(f"Validation Loss: {sum(losses) / len(losses)}")
+
+
+# First, let's try to create a dataloader without resampling and see how it handles the underrepresented class.
+dl = ROOT.Experimental.ML.RDataLoader(
+    [df_major, df_minor],
+    batch_size=batch_size,
+    batches_in_memory=batches_in_memory,
+    target="b2",
+    set_seed=42,
+)
+
+basic_model = torch.nn.Linear(1, 1)  # Simple linear model for binary classification
+basic_optimizer = torch.optim.Adam(basic_model.parameters())
+
+print("Training without resampling:")
+train_model(basic_model, basic_optimizer, dl)
+
+# Now, let's try the same thing with oversampling
+# Strategy: more batches of the underrepresented class
+# Takes more time per epoch, but each epoch is more effective
+dl_oversampled = ROOT.Experimental.ML.RDataLoader(
+    [df_major, df_minor],
+    batch_size=batch_size,
+    batches_in_memory=batches_in_memory,
+    target="b2",
+    set_seed=42,
+    load_eager=True,  # Must be enabled for resampling
+    sampling_type="oversampling",  # Can also be "undersampling"
+)
+
+oversampling_model = torch.nn.Linear(1, 1)
+oversampling_optimizer = torch.optim.Adam(oversampling_model.parameters())
+
+print("Training with oversampling:")
+train_model(oversampling_model, oversampling_optimizer, dl_oversampled)
+
+os.remove("major.root")
+os.remove("minor.root")

From 15141b5f97b0297b2e10d5014b97a0440f0e6ae4 Mon Sep 17 00:00:00 2001
From: Student2 <student2@mdessolevictus.cern.ch>
Date: Thu, 11 Jun 2026 16:05:02 +0200
Subject: [PATCH 3/3] Remove debugging print statement

---
 .../pythonizations/python/ROOT/_pythonization/_ml_dataloader.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/bindings/pyroot/pythonizations/python/ROOT/_pythonization/_ml_dataloader.py b/bindings/pyroot/pythonizations/python/ROOT/_pythonization/_ml_dataloader.py
index 05f4a55a3ddca..3ab53859622e4 100644
--- a/bindings/pyroot/pythonizations/python/ROOT/_pythonization/_ml_dataloader.py
+++ b/bindings/pyroot/pythonizations/python/ROOT/_pythonization/_ml_dataloader.py
@@ -4,7 +4,6 @@
 # Author: Vincenzo Eduardo Padulano, CERN 10/2024
 # Author: Martin Føll, University of Oslo (UiO) & CERN 01/2026
 # Author: Silia Taider, CERN 02/2026
-# Author: Jonah Ascoli, CERN 06/2026
 
 ################################################################################
 # Copyright (C) 1995-2026, Rene Brun and Fons Rademakers.                      #
@@ -162,7 +161,6 @@ def __init__(
         """
 
         from ROOT import RDF
-        print("This is jonah's testing branch")
 
         if rdataframes is None:
             rdataframes = []