From 590bd02167e8a82609a96e25e52173ac367dabe5 Mon Sep 17 00:00:00 2001
From: vsripada2423 <vsripada@umd.edu>
Date: Tue, 31 Mar 2026 21:23:39 -0400
Subject: [PATCH 01/28] Create README.md

---
 .../README.md                                 | 54 +++++++++++++++++++
 1 file changed, 54 insertions(+)
 create mode 100644 class_project/data605/Spring2026/Benchmarking_Data_Science_Agents/README.md

diff --git a/class_project/data605/Spring2026/Benchmarking_Data_Science_Agents/README.md b/class_project/data605/Spring2026/Benchmarking_Data_Science_Agents/README.md
new file mode 100644
index 000000000..00592bf95
--- /dev/null
+++ b/class_project/data605/Spring2026/Benchmarking_Data_Science_Agents/README.md
@@ -0,0 +1,54 @@
+# Benchmarking Data Science Agents: A Comparative Study
+
+## Team Members
+- Venkata Sripada
+- Amulya Grace Bandlamudi
+
+## Project Overview
+This project studies and compares publicly available data science benchmarks for AI agents, including DataSciBench, DSBench, MLE-Bench, GAIA, and SWE-bench. The goal is to understand how different benchmarks evaluate data science capabilities such as data analysis, machine learning engineering, and multi-step reasoning.
+
+We will collect leaderboard data from these benchmarks, analyze benchmark design (task type, evaluation metric, and difficulty), and compare agent performance across benchmarks. The project will produce a reproducible data analysis pipeline, visualizations, and a comparative research report.
+
+## Objectives
+- Collect benchmark leaderboard data
+- Analyze benchmark structure and evaluation metrics
+- Compare performance across different AI agents
+- Perform clustering and correlation analysis on benchmark results
+- Visualize benchmark differences and capability gaps
+- Produce a final research report
+
+## Tools and Technologies
+- Python
+- Pandas
+- NumPy
+- Matplotlib / Seaborn
+- Scikit-learn
+- Jupyter Notebook
+- (Optional) PySpark for large-scale data processing
+
+## Methodology
+1. Data collection from benchmark leaderboards
+2. Data cleaning and dataset integration
+3. Exploratory data analysis
+4. Statistical analysis and correlation analysis      
+5. Clustering benchmarks and agent performance
+6. Visualization and interpretation of results
+7. Final report and documentation
+
+## Expected Outcome
+The project aims to identify differences between data science benchmarks and determine which benchmarks better represent real-world data science tasks. We expect to find capability gaps where some agents perform well on coding tasks but struggle with multi-step reasoning and workflow-based tasks.
+
+## Repository Structure
+## Repository Structure
+
+```
+Benchmarking_Data_Science_Agents/
+│
+├── README.md          # Project overview and instructions
+├── data/              # Collected benchmark datasets
+├── notebooks/         # Jupyter notebooks for analysis
+├── src/               # Python scripts for data processing and analysis
+├── results/           # Output files, plots, and tables
+├── report/            # Final report and documentation
+└── references/        # Papers and benchmark documentation
+```

From 8192bc991d3c8cc7fc49cbc4ad79c252951ab803 Mon Sep 17 00:00:00 2001
From: vsripada2423 <vsripada@umd.edu>
Date: Tue, 31 Mar 2026 21:24:17 -0400
Subject: [PATCH 02/28] Update README.md

---
 .../Spring2026/Benchmarking_Data_Science_Agents/README.md        | 1 -
 1 file changed, 1 deletion(-)

diff --git a/class_project/data605/Spring2026/Benchmarking_Data_Science_Agents/README.md b/class_project/data605/Spring2026/Benchmarking_Data_Science_Agents/README.md
index 00592bf95..f2f9aa91f 100644
--- a/class_project/data605/Spring2026/Benchmarking_Data_Science_Agents/README.md
+++ b/class_project/data605/Spring2026/Benchmarking_Data_Science_Agents/README.md
@@ -38,7 +38,6 @@ We will collect leaderboard data from these benchmarks, analyze benchmark design
 ## Expected Outcome
 The project aims to identify differences between data science benchmarks and determine which benchmarks better represent real-world data science tasks. We expect to find capability gaps where some agents perform well on coding tasks but struggle with multi-step reasoning and workflow-based tasks.
 
-## Repository Structure
 ## Repository Structure
 
 ```

From ba2bd8d68488dddf57c435f2071c54b2a59c92c0 Mon Sep 17 00:00:00 2001
From: vsripada2423 <vsripada@umd.edu>
Date: Tue, 31 Mar 2026 21:55:55 -0400
Subject: [PATCH 03/28] Add requirements.txt from project template

---
 .../Benchmarking_Data_Science_Agents/requirements.txt         | 4 ++++
 1 file changed, 4 insertions(+)
 create mode 100644 class_project/data605/Spring2026/Benchmarking_Data_Science_Agents/requirements.txt

diff --git a/class_project/data605/Spring2026/Benchmarking_Data_Science_Agents/requirements.txt b/class_project/data605/Spring2026/Benchmarking_Data_Science_Agents/requirements.txt
new file mode 100644
index 000000000..49aca3901
--- /dev/null
+++ b/class_project/data605/Spring2026/Benchmarking_Data_Science_Agents/requirements.txt
@@ -0,0 +1,4 @@
+matplotlib
+numpy
+pandas
+seaborn

From 91276c05f99a70e86043a2fbaec46b2da78ec1fa Mon Sep 17 00:00:00 2001
From: vsripada2423 <vsripada@umd.edu>
Date: Tue, 31 Mar 2026 21:57:50 -0400
Subject: [PATCH 04/28] Add Dockerfile from project template

---
 .../Dockerfile                                | 30 +++++++++++++++++++
 1 file changed, 30 insertions(+)
 create mode 100644 class_project/data605/Spring2026/Benchmarking_Data_Science_Agents/Dockerfile

diff --git a/class_project/data605/Spring2026/Benchmarking_Data_Science_Agents/Dockerfile b/class_project/data605/Spring2026/Benchmarking_Data_Science_Agents/Dockerfile
new file mode 100644
index 000000000..f5c02c562
--- /dev/null
+++ b/class_project/data605/Spring2026/Benchmarking_Data_Science_Agents/Dockerfile
@@ -0,0 +1,30 @@
+# Use Python 3.12 slim (already has Python and pip).
+FROM python:3.12-slim
+
+# Avoid interactive prompts during apt operations.
+ENV DEBIAN_FRONTEND=noninteractive
+
+# Install CA certificates (needed for HTTPS).
+RUN apt-get update && apt-get install -y \
+    ca-certificates \
+    && rm -rf /var/lib/apt/lists/*
+
+# Install project specific packages.
+RUN mkdir -p /install
+COPY requirements.txt /install/requirements.txt
+RUN pip install --upgrade pip && \
+    pip install --no-cache-dir jupyterlab jupyterlab_vim jupytext -r /install/requirements.txt
+
+# Config.
+COPY etc_sudoers /install/
+COPY etc_sudoers /etc/sudoers
+COPY bashrc /root/.bashrc
+
+# Report package versions.
+COPY version.sh /install/
+RUN /install/version.sh 2>&1 | tee version.log
+
+# Jupyter.
+EXPOSE 8888
+
+CMD ["/bin/bash"]

From e5f40625a99bcf71d4b7260b96607a6b75ead5ea Mon Sep 17 00:00:00 2001
From: vsripada2423 <vsripada@umd.edu>
Date: Tue, 31 Mar 2026 22:03:02 -0400
Subject: [PATCH 05/28] Add docker_clean.sh from project template

---
 .../docker_clean.sh                           | 26 +++++++++++++++++++
 1 file changed, 26 insertions(+)
 create mode 100644 class_project/data605/Spring2026/Benchmarking_Data_Science_Agents/docker_clean.sh

diff --git a/class_project/data605/Spring2026/Benchmarking_Data_Science_Agents/docker_clean.sh b/class_project/data605/Spring2026/Benchmarking_Data_Science_Agents/docker_clean.sh
new file mode 100644
index 000000000..7e40839ae
--- /dev/null
+++ b/class_project/data605/Spring2026/Benchmarking_Data_Science_Agents/docker_clean.sh
@@ -0,0 +1,26 @@
+#!/bin/bash
+# """
+# Remove Docker container image for the project.
+#
+# This script cleans up Docker images by removing the container image
+# matching the project configuration. Useful for freeing disk space or
+# ensuring a fresh build.
+# """
+
+# Exit immediately if any command exits with a non-zero status.
+set -e
+
+# Import the utility functions.
+GIT_ROOT=$(git rev-parse --show-toplevel)
+source $GIT_ROOT/class_project/project_template/utils.sh
+
+# Parse default args (-h, -v) and enable set -x if -v is passed.
+parse_default_args "$@"
+
+# Load Docker configuration variables for this script.
+get_docker_vars_script ${BASH_SOURCE[0]}
+source $DOCKER_NAME
+print_docker_vars
+
+# Remove the container image.
+remove_container_image

From d737a7832c49034b25518e2d693992e25d1393ab Mon Sep 17 00:00:00 2001
From: vsripada2423 <vsripada@umd.edu>
Date: Tue, 31 Mar 2026 22:05:18 -0400
Subject: [PATCH 06/28] Add template_utils.py from project template

---
 .../template_utils.py                         | 72 +++++++++++++++++++
 1 file changed, 72 insertions(+)
 create mode 100644 class_project/data605/Spring2026/Benchmarking_Data_Science_Agents/template_utils.py

diff --git a/class_project/data605/Spring2026/Benchmarking_Data_Science_Agents/template_utils.py b/class_project/data605/Spring2026/Benchmarking_Data_Science_Agents/template_utils.py
new file mode 100644
index 000000000..f8916102e
--- /dev/null
+++ b/class_project/data605/Spring2026/Benchmarking_Data_Science_Agents/template_utils.py
@@ -0,0 +1,72 @@
+"""
+template_utils.py
+
+This file contains utility functions that support the tutorial notebooks.
+
+- Notebooks should call these functions instead of writing raw logic inline.
+- This helps keep the notebooks clean, modular, and easier to debug.
+- Students should implement functions here for data preprocessing,
+  model setup, evaluation, or any reusable logic.
+
+Import as:
+
+import class_project.project_template.template_utils as cpptteut
+"""
+
+import pandas as pd
+import logging
+from sklearn.model_selection import train_test_split
+from pycaret.classification import compare_models
+
+# -----------------------------------------------------------------------------
+# Logging
+# -----------------------------------------------------------------------------
+
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+
+# -----------------------------------------------------------------------------
+# Example 1: Split the dataset into train and test sets
+# -----------------------------------------------------------------------------
+
+
+def split_data(df: pd.DataFrame, target_column: str, test_size: float = 0.2):
+    """
+    Split the dataset into training and testing sets.
+
+    :param df: full dataset
+    :param target_column: name of the target column
+    :param test_size: proportion of test data (default = 0.2)
+
+    :return: X_train, X_test, y_train, y_test
+    """
+    logger.info("Splitting data into train and test sets")
+    X = df.drop(columns=[target_column])
+    y = df[target_column]
+    return train_test_split(X, y, test_size=test_size, random_state=42)
+
+
+# -----------------------------------------------------------------------------
+# Example 2: PyCaret classification pipeline
+# -----------------------------------------------------------------------------
+
+
+def run_pycaret_classification(
+    df: pd.DataFrame, target_column: str
+) -> pd.DataFrame:
+    """
+    Run a basic PyCaret classification experiment.
+
+    :param df: dataset containing features and target
+    :param target_column: name of the target column
+
+    :return: comparison of top-performing models
+    """
+    logger.info("Initializing PyCaret classification setup")
+    ...
+
+    logger.info("Comparing models")
+    results = compare_models()
+    ...
+
+    return results

From 1ebefc665e8e54d352c07475a6fd0702fdbc26cb Mon Sep 17 00:00:00 2001
From: vsripada2423 <vsripada@umd.edu>
Date: Tue, 31 Mar 2026 22:06:23 -0400
Subject: [PATCH 07/28] Add template.example.py from project template

---
 .../template.example.py                       | 125 ++++++++++++++++++
 1 file changed, 125 insertions(+)
 create mode 100644 class_project/data605/Spring2026/Benchmarking_Data_Science_Agents/template.example.py

diff --git a/class_project/data605/Spring2026/Benchmarking_Data_Science_Agents/template.example.py b/class_project/data605/Spring2026/Benchmarking_Data_Science_Agents/template.example.py
new file mode 100644
index 000000000..b091b1369
--- /dev/null
+++ b/class_project/data605/Spring2026/Benchmarking_Data_Science_Agents/template.example.py
@@ -0,0 +1,125 @@
+---
+# jupyter:
+#   jupytext:
+#     formats: ipynb,py:percent
+#     text_representation:
+#       extension: .py
+#       format_name: percent
+#       format_version: '1.3'
+#       jupytext_version: 1.19.0
+#   kernelspec:
+#     display_name: Python 3 (ipykernel)
+#     language: python
+#     name: python3
+# ---
+
+# %% [markdown]
+# # Template Example Notebook
+#
+# This is a template notebook. The first heading should be the title of what notebook is about. For example, if it is a project on neo4j tutorial the heading should be `Project Title`.
+#
+# - Add description of what the notebook does.
+# - Point to references, e.g. (neo4j.example.md)
+# - Add citations.
+# - Keep the notebook flow clear.
+# - Comments should be imperative and have a period at the end.
+# - Your code should be well commented.
+#
+# The name of this notebook should in the following format:
+# - if the notebook is exploring `pycaret API`, then it is `pycaret.example.ipynb`
+#
+# Follow the reference to write notebooks in a clear manner: https://github.com/causify-ai/helpers/blob/master/docs/coding/all.jupyter_notebook.how_to_guide.md
+
+# %%
+# %load_ext autoreload
+# %autoreload 2
+# %matplotlib inline
+
+# %%
+import logging
+# Import libraries in this section.
+# Avoid imports like import *, from ... import ..., from ... import *, etc.
+
+import helpers.hdbg as hdbg
+import helpers.hnotebook as hnotebo
+
+# %%
+hdbg.init_logger(verbosity=logging.INFO)
+
+_LOG = logging.getLogger(__name__)
+
+hnotebo.config_notebook()
+
+
+# %% [markdown]
+# ## Make the notebook flow clear
+# Each notebook needs to follow a clear and logical flow, e.g:
+# - Load data
+# - Compute stats
+# - Clean data
+# - Compute stats
+# - Do analysis
+# - Show results
+#
+#
+#
+#
+
+
+# #############################################################################
+# Template
+# #############################################################################
+
+
+# %%
+class Template:
+    """
+    Brief imperative description of what the class does in one line, if needed.
+    """
+
+    def __init__(self):
+        pass
+
+    def method1(self, arg1: int) -> None:
+        """
+        Brief imperative description of what the method does in one line.
+
+        You can elaborate more in the method docstring in this section, for e.g. explaining
+        the formula/algorithm. Every method/function should have a docstring, typehints and include the
+        parameters and return as follows:
+
+        :param arg1: description of arg1
+        :return: description of return
+        """
+        # Code bloks go here.
+        # Make sure to include comments to explain what the code is doing.
+        # No empty lines between code blocks.
+        pass
+
+
+def template_function(arg1: int) -> None:
+    """
+    Brief imperative description of what the function does in one line.
+
+    You can elaborate more in the function docstring in this section, for e.g. explaining
+    the formula/algorithm. Every function should have a docstring, typehints and include the
+    parameters and return as follows:
+
+    :param arg1: description of arg1
+    :return: description of return
+    """
+    # Code bloks go here.
+    # Make sure to include comments to explain what the code is doing.
+    # No empty lines between code blocks.
+    pass
+
+
+# %% [markdown]
+# ## The flow should be highlighted using headings in markdown
+# ```
+# # Level 1
+# ## Level 2
+# ### Level 3
+# ```
+
+# %%

From 94765523ca330e5474f024c66476d95efcabb40c Mon Sep 17 00:00:00 2001
From: vsripada2423 <vsripada@umd.edu>
Date: Tue, 31 Mar 2026 22:08:34 -0400
Subject: [PATCH 08/28] Add docker_jupyter.sh from project template

---
 .../docker_jupyter.sh                         | 39 +++++++++++++++++++
 1 file changed, 39 insertions(+)
 create mode 100644 class_project/data605/Spring2026/Benchmarking_Data_Science_Agents/docker_jupyter.sh

diff --git a/class_project/data605/Spring2026/Benchmarking_Data_Science_Agents/docker_jupyter.sh b/class_project/data605/Spring2026/Benchmarking_Data_Science_Agents/docker_jupyter.sh
new file mode 100644
index 000000000..1a60dfd3a
--- /dev/null
+++ b/class_project/data605/Spring2026/Benchmarking_Data_Science_Agents/docker_jupyter.sh
@@ -0,0 +1,39 @@
+#!/bin/bash
+# """
+# Execute Jupyter Lab in a Docker container.
+#
+# This script launches a Docker container running Jupyter Lab with
+# configurable port, directory mounting, and vim bindings. It passes
+# command-line options to the run_jupyter.sh script inside the container.
+#
+# Usage:
+# > docker_jupyter.sh [options]
+# """
+
+# Exit immediately if any command exits with a non-zero status.
+set -e
+
+# Import the utility functions.
+GIT_ROOT=$(git rev-parse --show-toplevel)
+source $GIT_ROOT/class_project/project_template/utils.sh
+
+# Parse command-line options and set Jupyter configuration variables.
+parse_docker_jupyter_args "$@"
+
+# Load Docker configuration variables for this script.
+get_docker_vars_script ${BASH_SOURCE[0]}
+source $DOCKER_NAME
+print_docker_vars
+
+# List available Docker images and inspect architecture.
+list_and_inspect_docker_image
+
+# Run the Docker container with Jupyter Lab.
+CMD=$(get_run_jupyter_cmd "${BASH_SOURCE[0]}" "$OLD_CMD_OPTS")
+CONTAINER_NAME=$IMAGE_NAME
+# Kill existing container if -f flag is set.
+kill_existing_container_if_forced
+
+DOCKER_CMD=$(get_docker_jupyter_command)
+DOCKER_CMD_OPTS=$(get_docker_jupyter_options $CONTAINER_NAME $JUPYTER_HOST_PORT $JUPYTER_USE_VIM)
+run "$DOCKER_CMD $DOCKER_CMD_OPTS $FULL_IMAGE_NAME $CMD"

From 39ebe17f6112f02df9ddd2b4e4ef05ac8fe544e6 Mon Sep 17 00:00:00 2001
From: vsripada2423 <vsripada@umd.edu>
Date: Tue, 31 Mar 2026 22:12:41 -0400
Subject: [PATCH 09/28] Add version.sh from project template

---
 .../version.sh                                | 28 +++++++++++++++++++
 1 file changed, 28 insertions(+)
 create mode 100644 class_project/data605/Spring2026/Benchmarking_Data_Science_Agents/version.sh

diff --git a/class_project/data605/Spring2026/Benchmarking_Data_Science_Agents/version.sh b/class_project/data605/Spring2026/Benchmarking_Data_Science_Agents/version.sh
new file mode 100644
index 000000000..c46ed254c
--- /dev/null
+++ b/class_project/data605/Spring2026/Benchmarking_Data_Science_Agents/version.sh
@@ -0,0 +1,28 @@
+#!/bin/bash
+# """
+# Display versions of installed tools and packages.
+#
+# This script prints version information for Python, pip, Jupyter, and all
+# installed Python packages. Used for debugging and documentation purposes
+# to verify the Docker container environment setup.
+# """
+
+# Display Python 3 version.
+echo "# Python3"
+python3 --version
+
+# Display pip version.
+echo "# pip3"
+pip3 --version
+
+# Display Jupyter version.
+echo "# jupyter"
+jupyter --version
+
+# List all installed Python packages and their versions.
+echo "# Python packages"
+pip3 list
+
+# Template for adding additional tool versions.
+# echo "# mongo"
+# mongod --version

From 99ad2c5ceba4eecc3bfe7c96fae37880da2741c0 Mon Sep 17 00:00:00 2001
From: vsripada2423 <vsripada@umd.edu>
Date: Tue, 31 Mar 2026 22:20:41 -0400
Subject: [PATCH 10/28] Create README.md

---
 .../data605/Spring2026/projects/README.md     | 53 +++++++++++++++++++
 1 file changed, 53 insertions(+)
 create mode 100644 class_project/data605/Spring2026/projects/README.md

diff --git a/class_project/data605/Spring2026/projects/README.md b/class_project/data605/Spring2026/projects/README.md
new file mode 100644
index 000000000..f2f9aa91f
--- /dev/null
+++ b/class_project/data605/Spring2026/projects/README.md
@@ -0,0 +1,53 @@
+# Benchmarking Data Science Agents: A Comparative Study
+
+## Team Members
+- Venkata Sripada
+- Amulya Grace Bandlamudi
+
+## Project Overview
+This project studies and compares publicly available data science benchmarks for AI agents, including DataSciBench, DSBench, MLE-Bench, GAIA, and SWE-bench. The goal is to understand how different benchmarks evaluate data science capabilities such as data analysis, machine learning engineering, and multi-step reasoning.
+
+We will collect leaderboard data from these benchmarks, analyze benchmark design (task type, evaluation metric, and difficulty), and compare agent performance across benchmarks. The project will produce a reproducible data analysis pipeline, visualizations, and a comparative research report.
+
+## Objectives
+- Collect benchmark leaderboard data
+- Analyze benchmark structure and evaluation metrics
+- Compare performance across different AI agents
+- Perform clustering and correlation analysis on benchmark results
+- Visualize benchmark differences and capability gaps
+- Produce a final research report
+
+## Tools and Technologies
+- Python
+- Pandas
+- NumPy
+- Matplotlib / Seaborn
+- Scikit-learn
+- Jupyter Notebook
+- (Optional) PySpark for large-scale data processing
+
+## Methodology
+1. Data collection from benchmark leaderboards
+2. Data cleaning and dataset integration
+3. Exploratory data analysis
+4. Statistical analysis and correlation analysis      
+5. Clustering benchmarks and agent performance
+6. Visualization and interpretation of results
+7. Final report and documentation
+
+## Expected Outcome
+The project aims to identify differences between data science benchmarks and determine which benchmarks better represent real-world data science tasks. We expect to find capability gaps where some agents perform well on coding tasks but struggle with multi-step reasoning and workflow-based tasks.
+
+## Repository Structure
+
+```
+Benchmarking_Data_Science_Agents/
+│
+├── README.md          # Project overview and instructions
+├── data/              # Collected benchmark datasets
+├── notebooks/         # Jupyter notebooks for analysis
+├── src/               # Python scripts for data processing and analysis
+├── results/           # Output files, plots, and tables
+├── report/            # Final report and documentation
+└── references/        # Papers and benchmark documentation
+```

From 4e45939cdec97e6efecd9feb9ce8b6c40c190215 Mon Sep 17 00:00:00 2001
From: vsripada2423 <vsripada@umd.edu>
Date: Tue, 31 Mar 2026 22:22:19 -0400
Subject: [PATCH 11/28] Add Dockerfile from project template

---
 .../data605/Spring2026/projects/Dockerfile    | 30 +++++++++++++++++++
 1 file changed, 30 insertions(+)
 create mode 100644 class_project/data605/Spring2026/projects/Dockerfile

diff --git a/class_project/data605/Spring2026/projects/Dockerfile b/class_project/data605/Spring2026/projects/Dockerfile
new file mode 100644
index 000000000..f5c02c562
--- /dev/null
+++ b/class_project/data605/Spring2026/projects/Dockerfile
@@ -0,0 +1,30 @@
+# Use Python 3.12 slim (already has Python and pip).
+FROM python:3.12-slim
+
+# Avoid interactive prompts during apt operations.
+ENV DEBIAN_FRONTEND=noninteractive
+
+# Install CA certificates (needed for HTTPS).
+RUN apt-get update && apt-get install -y \
+    ca-certificates \
+    && rm -rf /var/lib/apt/lists/*
+
+# Install project specific packages.
+RUN mkdir -p /install
+COPY requirements.txt /install/requirements.txt
+RUN pip install --upgrade pip && \
+    pip install --no-cache-dir jupyterlab jupyterlab_vim jupytext -r /install/requirements.txt
+
+# Config.
+COPY etc_sudoers /install/
+COPY etc_sudoers /etc/sudoers
+COPY bashrc /root/.bashrc
+
+# Report package versions.
+COPY version.sh /install/
+RUN /install/version.sh 2>&1 | tee version.log
+
+# Jupyter.
+EXPOSE 8888
+
+CMD ["/bin/bash"]

From 4fa06e7493ce765cf47249339b864110e89f5892 Mon Sep 17 00:00:00 2001
From: vsripada2423 <vsripada@umd.edu>
Date: Tue, 31 Mar 2026 22:25:06 -0400
Subject: [PATCH 12/28] Add template_utils.py from project template

---
 .../Spring2026/projects/template_utils.py     | 72 +++++++++++++++++++
 1 file changed, 72 insertions(+)
 create mode 100644 class_project/data605/Spring2026/projects/template_utils.py

diff --git a/class_project/data605/Spring2026/projects/template_utils.py b/class_project/data605/Spring2026/projects/template_utils.py
new file mode 100644
index 000000000..f8916102e
--- /dev/null
+++ b/class_project/data605/Spring2026/projects/template_utils.py
@@ -0,0 +1,72 @@
+"""
+template_utils.py
+
+This file contains utility functions that support the tutorial notebooks.
+
+- Notebooks should call these functions instead of writing raw logic inline.
+- This helps keep the notebooks clean, modular, and easier to debug.
+- Students should implement functions here for data preprocessing,
+  model setup, evaluation, or any reusable logic.
+
+Import as:
+
+import class_project.project_template.template_utils as cpptteut
+"""
+
+import pandas as pd
+import logging
+from sklearn.model_selection import train_test_split
+from pycaret.classification import compare_models
+
+# -----------------------------------------------------------------------------
+# Logging
+# -----------------------------------------------------------------------------
+
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+
+# -----------------------------------------------------------------------------
+# Example 1: Split the dataset into train and test sets
+# -----------------------------------------------------------------------------
+
+
+def split_data(df: pd.DataFrame, target_column: str, test_size: float = 0.2):
+    """
+    Split the dataset into training and testing sets.
+
+    :param df: full dataset
+    :param target_column: name of the target column
+    :param test_size: proportion of test data (default = 0.2)
+
+    :return: X_train, X_test, y_train, y_test
+    """
+    logger.info("Splitting data into train and test sets")
+    X = df.drop(columns=[target_column])
+    y = df[target_column]
+    return train_test_split(X, y, test_size=test_size, random_state=42)
+
+
+# -----------------------------------------------------------------------------
+# Example 2: PyCaret classification pipeline
+# -----------------------------------------------------------------------------
+
+
+def run_pycaret_classification(
+    df: pd.DataFrame, target_column: str
+) -> pd.DataFrame:
+    """
+    Run a basic PyCaret classification experiment.
+
+    :param df: dataset containing features and target
+    :param target_column: name of the target column
+
+    :return: comparison of top-performing models
+    """
+    logger.info("Initializing PyCaret classification setup")
+    ...
+
+    logger.info("Comparing models")
+    results = compare_models()
+    ...
+
+    return results

From d3dfad93c4dfb3a2378f2a3d47c6fff11d8f50f4 Mon Sep 17 00:00:00 2001
From: vsripada2423 <vsripada@umd.edu>
Date: Tue, 31 Mar 2026 22:26:04 -0400
Subject: [PATCH 13/28] Add template.example.py from project template

---
 .../Spring2026/projects/template.example.py   | 125 ++++++++++++++++++
 1 file changed, 125 insertions(+)
 create mode 100644 class_project/data605/Spring2026/projects/template.example.py

diff --git a/class_project/data605/Spring2026/projects/template.example.py b/class_project/data605/Spring2026/projects/template.example.py
new file mode 100644
index 000000000..b091b1369
--- /dev/null
+++ b/class_project/data605/Spring2026/projects/template.example.py
@@ -0,0 +1,125 @@
+---
+# jupyter:
+#   jupytext:
+#     formats: ipynb,py:percent
+#     text_representation:
+#       extension: .py
+#       format_name: percent
+#       format_version: '1.3'
+#       jupytext_version: 1.19.0
+#   kernelspec:
+#     display_name: Python 3 (ipykernel)
+#     language: python
+#     name: python3
+# ---
+
+# %% [markdown]
+# # Template Example Notebook
+#
+# This is a template notebook. The first heading should be the title of what notebook is about. For example, if it is a project on neo4j tutorial the heading should be `Project Title`.
+#
+# - Add description of what the notebook does.
+# - Point to references, e.g. (neo4j.example.md)
+# - Add citations.
+# - Keep the notebook flow clear.
+# - Comments should be imperative and have a period at the end.
+# - Your code should be well commented.
+#
+# The name of this notebook should in the following format:
+# - if the notebook is exploring `pycaret API`, then it is `pycaret.example.ipynb`
+#
+# Follow the reference to write notebooks in a clear manner: https://github.com/causify-ai/helpers/blob/master/docs/coding/all.jupyter_notebook.how_to_guide.md
+
+# %%
+# %load_ext autoreload
+# %autoreload 2
+# %matplotlib inline
+
+# %%
+import logging
+# Import libraries in this section.
+# Avoid imports like import *, from ... import ..., from ... import *, etc.
+
+import helpers.hdbg as hdbg
+import helpers.hnotebook as hnotebo
+
+# %%
+hdbg.init_logger(verbosity=logging.INFO)
+
+_LOG = logging.getLogger(__name__)
+
+hnotebo.config_notebook()
+
+
+# %% [markdown]
+# ## Make the notebook flow clear
+# Each notebook needs to follow a clear and logical flow, e.g:
+# - Load data
+# - Compute stats
+# - Clean data
+# - Compute stats
+# - Do analysis
+# - Show results
+#
+#
+#
+#
+
+
+# #############################################################################
+# Template
+# #############################################################################
+
+
+# %%
+class Template:
+    """
+    Brief imperative description of what the class does in one line, if needed.
+    """
+
+    def __init__(self):
+        pass
+
+    def method1(self, arg1: int) -> None:
+        """
+        Brief imperative description of what the method does in one line.
+
+        You can elaborate more in the method docstring in this section, for e.g. explaining
+        the formula/algorithm. Every method/function should have a docstring, typehints and include the
+        parameters and return as follows:
+
+        :param arg1: description of arg1
+        :return: description of return
+        """
+        # Code bloks go here.
+        # Make sure to include comments to explain what the code is doing.
+        # No empty lines between code blocks.
+        pass
+
+
+def template_function(arg1: int) -> None:
+    """
+    Brief imperative description of what the function does in one line.
+
+    You can elaborate more in the function docstring in this section, for e.g. explaining
+    the formula/algorithm. Every function should have a docstring, typehints and include the
+    parameters and return as follows:
+
+    :param arg1: description of arg1
+    :return: description of return
+    """
+    # Code bloks go here.
+    # Make sure to include comments to explain what the code is doing.
+    # No empty lines between code blocks.
+    pass
+
+
+# %% [markdown]
+# ## The flow should be highlighted using headings in markdown
+# ```
+# # Level 1
+# ## Level 2
+# ### Level 3
+# ```
+
+# %%

From ef715f73d4079a9d33cd1013eae59e5704e56a21 Mon Sep 17 00:00:00 2001
From: vsripada2423 <vsripada@umd.edu>
Date: Tue, 31 Mar 2026 22:26:40 -0400
Subject: [PATCH 14/28] Add version.sh from project template

---
 .../data605/Spring2026/projects/version.sh    | 28 +++++++++++++++++++
 1 file changed, 28 insertions(+)
 create mode 100644 class_project/data605/Spring2026/projects/version.sh

diff --git a/class_project/data605/Spring2026/projects/version.sh b/class_project/data605/Spring2026/projects/version.sh
new file mode 100644
index 000000000..c46ed254c
--- /dev/null
+++ b/class_project/data605/Spring2026/projects/version.sh
@@ -0,0 +1,28 @@
+#!/bin/bash
+# """
+# Display versions of installed tools and packages.
+#
+# This script prints version information for Python, pip, Jupyter, and all
+# installed Python packages. Used for debugging and documentation purposes
+# to verify the Docker container environment setup.
+# """
+
+# Display Python 3 version.
+echo "# Python3"
+python3 --version
+
+# Display pip version.
+echo "# pip3"
+pip3 --version
+
+# Display Jupyter version.
+echo "# jupyter"
+jupyter --version
+
+# List all installed Python packages and their versions.
+echo "# Python packages"
+pip3 list
+
+# Template for adding additional tool versions.
+# echo "# mongo"
+# mongod --version

From 80b605fbb89fc269035cb0b52a595765fd53403f Mon Sep 17 00:00:00 2001
From: vsripada2423 <vsripada@umd.edu>
Date: Tue, 31 Mar 2026 22:30:27 -0400
Subject: [PATCH 15/28] Add requirements.txt from project template

---
 class_project/data605/Spring2026/projects/requirements.txt | 4 ++++
 1 file changed, 4 insertions(+)
 create mode 100644 class_project/data605/Spring2026/projects/requirements.txt

diff --git a/class_project/data605/Spring2026/projects/requirements.txt b/class_project/data605/Spring2026/projects/requirements.txt
new file mode 100644
index 000000000..49aca3901
--- /dev/null
+++ b/class_project/data605/Spring2026/projects/requirements.txt
@@ -0,0 +1,4 @@
+matplotlib
+numpy
+pandas
+seaborn

From 3e9436c5650786b5f6d4eb755b67c910c9f22b88 Mon Sep 17 00:00:00 2001
From: vsripada2423 <vsripada@umd.edu>
Date: Tue, 31 Mar 2026 22:37:39 -0400
Subject: [PATCH 16/28] Delete
 class_project/data605/Spring2026/Benchmarking_Data_Science_Agents directory

---
 .../Dockerfile                                |  30 -----
 .../README.md                                 |  53 --------
 .../docker_clean.sh                           |  26 ----
 .../docker_jupyter.sh                         |  39 ------
 .../requirements.txt                          |   4 -
 .../template.example.py                       | 125 ------------------
 .../template_utils.py                         |  72 ----------
 .../version.sh                                |  28 ----
 8 files changed, 377 deletions(-)
 delete mode 100644 class_project/data605/Spring2026/Benchmarking_Data_Science_Agents/Dockerfile
 delete mode 100644 class_project/data605/Spring2026/Benchmarking_Data_Science_Agents/README.md
 delete mode 100644 class_project/data605/Spring2026/Benchmarking_Data_Science_Agents/docker_clean.sh
 delete mode 100644 class_project/data605/Spring2026/Benchmarking_Data_Science_Agents/docker_jupyter.sh
 delete mode 100644 class_project/data605/Spring2026/Benchmarking_Data_Science_Agents/requirements.txt
 delete mode 100644 class_project/data605/Spring2026/Benchmarking_Data_Science_Agents/template.example.py
 delete mode 100644 class_project/data605/Spring2026/Benchmarking_Data_Science_Agents/template_utils.py
 delete mode 100644 class_project/data605/Spring2026/Benchmarking_Data_Science_Agents/version.sh

diff --git a/class_project/data605/Spring2026/Benchmarking_Data_Science_Agents/Dockerfile b/class_project/data605/Spring2026/Benchmarking_Data_Science_Agents/Dockerfile
deleted file mode 100644
index f5c02c562..000000000
--- a/class_project/data605/Spring2026/Benchmarking_Data_Science_Agents/Dockerfile
+++ /dev/null
@@ -1,30 +0,0 @@
-# Use Python 3.12 slim (already has Python and pip).
-FROM python:3.12-slim
-
-# Avoid interactive prompts during apt operations.
-ENV DEBIAN_FRONTEND=noninteractive
-
-# Install CA certificates (needed for HTTPS).
-RUN apt-get update && apt-get install -y \
-    ca-certificates \
-    && rm -rf /var/lib/apt/lists/*
-
-# Install project specific packages.
-RUN mkdir -p /install
-COPY requirements.txt /install/requirements.txt
-RUN pip install --upgrade pip && \
-    pip install --no-cache-dir jupyterlab jupyterlab_vim jupytext -r /install/requirements.txt
-
-# Config.
-COPY etc_sudoers /install/
-COPY etc_sudoers /etc/sudoers
-COPY bashrc /root/.bashrc
-
-# Report package versions.
-COPY version.sh /install/
-RUN /install/version.sh 2>&1 | tee version.log
-
-# Jupyter.
-EXPOSE 8888
-
-CMD ["/bin/bash"]
diff --git a/class_project/data605/Spring2026/Benchmarking_Data_Science_Agents/README.md b/class_project/data605/Spring2026/Benchmarking_Data_Science_Agents/README.md
deleted file mode 100644
index f2f9aa91f..000000000
--- a/class_project/data605/Spring2026/Benchmarking_Data_Science_Agents/README.md
+++ /dev/null
@@ -1,53 +0,0 @@
-# Benchmarking Data Science Agents: A Comparative Study
-
-## Team Members
-- Venkata Sripada
-- Amulya Grace Bandlamudi
-
-## Project Overview
-This project studies and compares publicly available data science benchmarks for AI agents, including DataSciBench, DSBench, MLE-Bench, GAIA, and SWE-bench. The goal is to understand how different benchmarks evaluate data science capabilities such as data analysis, machine learning engineering, and multi-step reasoning.
-
-We will collect leaderboard data from these benchmarks, analyze benchmark design (task type, evaluation metric, and difficulty), and compare agent performance across benchmarks. The project will produce a reproducible data analysis pipeline, visualizations, and a comparative research report.
-
-## Objectives
-- Collect benchmark leaderboard data
-- Analyze benchmark structure and evaluation metrics
-- Compare performance across different AI agents
-- Perform clustering and correlation analysis on benchmark results
-- Visualize benchmark differences and capability gaps
-- Produce a final research report
-
-## Tools and Technologies
-- Python
-- Pandas
-- NumPy
-- Matplotlib / Seaborn
-- Scikit-learn
-- Jupyter Notebook
-- (Optional) PySpark for large-scale data processing
-
-## Methodology
-1. Data collection from benchmark leaderboards
-2. Data cleaning and dataset integration
-3. Exploratory data analysis
-4. Statistical analysis and correlation analysis      
-5. Clustering benchmarks and agent performance
-6. Visualization and interpretation of results
-7. Final report and documentation
-
-## Expected Outcome
-The project aims to identify differences between data science benchmarks and determine which benchmarks better represent real-world data science tasks. We expect to find capability gaps where some agents perform well on coding tasks but struggle with multi-step reasoning and workflow-based tasks.
-
-## Repository Structure
-
-```
-Benchmarking_Data_Science_Agents/
-│
-├── README.md          # Project overview and instructions
-├── data/              # Collected benchmark datasets
-├── notebooks/         # Jupyter notebooks for analysis
-├── src/               # Python scripts for data processing and analysis
-├── results/           # Output files, plots, and tables
-├── report/            # Final report and documentation
-└── references/        # Papers and benchmark documentation
-```
diff --git a/class_project/data605/Spring2026/Benchmarking_Data_Science_Agents/docker_clean.sh b/class_project/data605/Spring2026/Benchmarking_Data_Science_Agents/docker_clean.sh
deleted file mode 100644
index 7e40839ae..000000000
--- a/class_project/data605/Spring2026/Benchmarking_Data_Science_Agents/docker_clean.sh
+++ /dev/null
@@ -1,26 +0,0 @@
-#!/bin/bash
-# """
-# Remove Docker container image for the project.
-#
-# This script cleans up Docker images by removing the container image
-# matching the project configuration. Useful for freeing disk space or
-# ensuring a fresh build.
-# """
-
-# Exit immediately if any command exits with a non-zero status.
-set -e
-
-# Import the utility functions.
-GIT_ROOT=$(git rev-parse --show-toplevel)
-source $GIT_ROOT/class_project/project_template/utils.sh
-
-# Parse default args (-h, -v) and enable set -x if -v is passed.
-parse_default_args "$@"
-
-# Load Docker configuration variables for this script.
-get_docker_vars_script ${BASH_SOURCE[0]}
-source $DOCKER_NAME
-print_docker_vars
-
-# Remove the container image.
-remove_container_image
diff --git a/class_project/data605/Spring2026/Benchmarking_Data_Science_Agents/docker_jupyter.sh b/class_project/data605/Spring2026/Benchmarking_Data_Science_Agents/docker_jupyter.sh
deleted file mode 100644
index 1a60dfd3a..000000000
--- a/class_project/data605/Spring2026/Benchmarking_Data_Science_Agents/docker_jupyter.sh
+++ /dev/null
@@ -1,39 +0,0 @@
-#!/bin/bash
-# """
-# Execute Jupyter Lab in a Docker container.
-#
-# This script launches a Docker container running Jupyter Lab with
-# configurable port, directory mounting, and vim bindings. It passes
-# command-line options to the run_jupyter.sh script inside the container.
-#
-# Usage:
-# > docker_jupyter.sh [options]
-# """
-
-# Exit immediately if any command exits with a non-zero status.
-set -e
-
-# Import the utility functions.
-GIT_ROOT=$(git rev-parse --show-toplevel)
-source $GIT_ROOT/class_project/project_template/utils.sh
-
-# Parse command-line options and set Jupyter configuration variables.
-parse_docker_jupyter_args "$@"
-
-# Load Docker configuration variables for this script.
-get_docker_vars_script ${BASH_SOURCE[0]}
-source $DOCKER_NAME
-print_docker_vars
-
-# List available Docker images and inspect architecture.
-list_and_inspect_docker_image
-
-# Run the Docker container with Jupyter Lab.
-CMD=$(get_run_jupyter_cmd "${BASH_SOURCE[0]}" "$OLD_CMD_OPTS")
-CONTAINER_NAME=$IMAGE_NAME
-# Kill existing container if -f flag is set.
-kill_existing_container_if_forced
-
-DOCKER_CMD=$(get_docker_jupyter_command)
-DOCKER_CMD_OPTS=$(get_docker_jupyter_options $CONTAINER_NAME $JUPYTER_HOST_PORT $JUPYTER_USE_VIM)
-run "$DOCKER_CMD $DOCKER_CMD_OPTS $FULL_IMAGE_NAME $CMD"
diff --git a/class_project/data605/Spring2026/Benchmarking_Data_Science_Agents/requirements.txt b/class_project/data605/Spring2026/Benchmarking_Data_Science_Agents/requirements.txt
deleted file mode 100644
index 49aca3901..000000000
--- a/class_project/data605/Spring2026/Benchmarking_Data_Science_Agents/requirements.txt
+++ /dev/null
@@ -1,4 +0,0 @@
-matplotlib
-numpy
-pandas
-seaborn
diff --git a/class_project/data605/Spring2026/Benchmarking_Data_Science_Agents/template.example.py b/class_project/data605/Spring2026/Benchmarking_Data_Science_Agents/template.example.py
deleted file mode 100644
index b091b1369..000000000
--- a/class_project/data605/Spring2026/Benchmarking_Data_Science_Agents/template.example.py
+++ /dev/null
@@ -1,125 +0,0 @@
----
-# jupyter:
-#   jupytext:
-#     formats: ipynb,py:percent
-#     text_representation:
-#       extension: .py
-#       format_name: percent
-#       format_version: '1.3'
-#       jupytext_version: 1.19.0
-#   kernelspec:
-#     display_name: Python 3 (ipykernel)
-#     language: python
-#     name: python3
-# ---
-
-# %% [markdown]
-# # Template Example Notebook
-#
-# This is a template notebook. The first heading should be the title of what notebook is about. For example, if it is a project on neo4j tutorial the heading should be `Project Title`.
-#
-# - Add description of what the notebook does.
-# - Point to references, e.g. (neo4j.example.md)
-# - Add citations.
-# - Keep the notebook flow clear.
-# - Comments should be imperative and have a period at the end.
-# - Your code should be well commented.
-#
-# The name of this notebook should in the following format:
-# - if the notebook is exploring `pycaret API`, then it is `pycaret.example.ipynb`
-#
-# Follow the reference to write notebooks in a clear manner: https://github.com/causify-ai/helpers/blob/master/docs/coding/all.jupyter_notebook.how_to_guide.md
-
-# %%
-# %load_ext autoreload
-# %autoreload 2
-# %matplotlib inline
-
-# %%
-import logging
-# Import libraries in this section.
-# Avoid imports like import *, from ... import ..., from ... import *, etc.
-
-import helpers.hdbg as hdbg
-import helpers.hnotebook as hnotebo
-
-# %%
-hdbg.init_logger(verbosity=logging.INFO)
-
-_LOG = logging.getLogger(__name__)
-
-hnotebo.config_notebook()
-
-
-# %% [markdown]
-# ## Make the notebook flow clear
-# Each notebook needs to follow a clear and logical flow, e.g:
-# - Load data
-# - Compute stats
-# - Clean data
-# - Compute stats
-# - Do analysis
-# - Show results
-#
-#
-#
-#
-
-
-# #############################################################################
-# Template
-# #############################################################################
-
-
-# %%
-class Template:
-    """
-    Brief imperative description of what the class does in one line, if needed.
-    """
-
-    def __init__(self):
-        pass
-
-    def method1(self, arg1: int) -> None:
-        """
-        Brief imperative description of what the method does in one line.
-
-        You can elaborate more in the method docstring in this section, for e.g. explaining
-        the formula/algorithm. Every method/function should have a docstring, typehints and include the
-        parameters and return as follows:
-
-        :param arg1: description of arg1
-        :return: description of return
-        """
-        # Code bloks go here.
-        # Make sure to include comments to explain what the code is doing.
-        # No empty lines between code blocks.
-        pass
-
-
-def template_function(arg1: int) -> None:
-    """
-    Brief imperative description of what the function does in one line.
-
-    You can elaborate more in the function docstring in this section, for e.g. explaining
-    the formula/algorithm. Every function should have a docstring, typehints and include the
-    parameters and return as follows:
-
-    :param arg1: description of arg1
-    :return: description of return
-    """
-    # Code bloks go here.
-    # Make sure to include comments to explain what the code is doing.
-    # No empty lines between code blocks.
-    pass
-
-
-# %% [markdown]
-# ## The flow should be highlighted using headings in markdown
-# ```
-# # Level 1
-# ## Level 2
-# ### Level 3
-# ```
-
-# %%
diff --git a/class_project/data605/Spring2026/Benchmarking_Data_Science_Agents/template_utils.py b/class_project/data605/Spring2026/Benchmarking_Data_Science_Agents/template_utils.py
deleted file mode 100644
index f8916102e..000000000
--- a/class_project/data605/Spring2026/Benchmarking_Data_Science_Agents/template_utils.py
+++ /dev/null
@@ -1,72 +0,0 @@
-"""
-template_utils.py
-
-This file contains utility functions that support the tutorial notebooks.
-
-- Notebooks should call these functions instead of writing raw logic inline.
-- This helps keep the notebooks clean, modular, and easier to debug.
-- Students should implement functions here for data preprocessing,
-  model setup, evaluation, or any reusable logic.
-
-Import as:
-
-import class_project.project_template.template_utils as cpptteut
-"""
-
-import pandas as pd
-import logging
-from sklearn.model_selection import train_test_split
-from pycaret.classification import compare_models
-
-# -----------------------------------------------------------------------------
-# Logging
-# -----------------------------------------------------------------------------
-
-logging.basicConfig(level=logging.INFO)
-logger = logging.getLogger(__name__)
-
-# -----------------------------------------------------------------------------
-# Example 1: Split the dataset into train and test sets
-# -----------------------------------------------------------------------------
-
-
-def split_data(df: pd.DataFrame, target_column: str, test_size: float = 0.2):
-    """
-    Split the dataset into training and testing sets.
-
-    :param df: full dataset
-    :param target_column: name of the target column
-    :param test_size: proportion of test data (default = 0.2)
-
-    :return: X_train, X_test, y_train, y_test
-    """
-    logger.info("Splitting data into train and test sets")
-    X = df.drop(columns=[target_column])
-    y = df[target_column]
-    return train_test_split(X, y, test_size=test_size, random_state=42)
-
-
-# -----------------------------------------------------------------------------
-# Example 2: PyCaret classification pipeline
-# -----------------------------------------------------------------------------
-
-
-def run_pycaret_classification(
-    df: pd.DataFrame, target_column: str
-) -> pd.DataFrame:
-    """
-    Run a basic PyCaret classification experiment.
-
-    :param df: dataset containing features and target
-    :param target_column: name of the target column
-
-    :return: comparison of top-performing models
-    """
-    logger.info("Initializing PyCaret classification setup")
-    ...
-
-    logger.info("Comparing models")
-    results = compare_models()
-    ...
-
-    return results
diff --git a/class_project/data605/Spring2026/Benchmarking_Data_Science_Agents/version.sh b/class_project/data605/Spring2026/Benchmarking_Data_Science_Agents/version.sh
deleted file mode 100644
index c46ed254c..000000000
--- a/class_project/data605/Spring2026/Benchmarking_Data_Science_Agents/version.sh
+++ /dev/null
@@ -1,28 +0,0 @@
-#!/bin/bash
-# """
-# Display versions of installed tools and packages.
-#
-# This script prints version information for Python, pip, Jupyter, and all
-# installed Python packages. Used for debugging and documentation purposes
-# to verify the Docker container environment setup.
-# """
-
-# Display Python 3 version.
-echo "# Python3"
-python3 --version
-
-# Display pip version.
-echo "# pip3"
-pip3 --version
-
-# Display Jupyter version.
-echo "# jupyter"
-jupyter --version
-
-# List all installed Python packages and their versions.
-echo "# Python packages"
-pip3 list
-
-# Template for adding additional tool versions.
-# echo "# mongo"
-# mongod --version

From 72fca1bdd67ccf8a2db2bf7e965a302d1db6113e Mon Sep 17 00:00:00 2001
From: vsripada2423 <vsripada@umd.edu>
Date: Tue, 31 Mar 2026 22:53:43 -0400
Subject: [PATCH 17/28] Create README.md

---
 .../README.md                                 | 53 +++++++++++++++++++
 1 file changed, 53 insertions(+)
 create mode 100644 class_project/data605/Spring2026/projects/Benchmarking_Data_Science_Agents/README.md

diff --git a/class_project/data605/Spring2026/projects/Benchmarking_Data_Science_Agents/README.md b/class_project/data605/Spring2026/projects/Benchmarking_Data_Science_Agents/README.md
new file mode 100644
index 000000000..f2f9aa91f
--- /dev/null
+++ b/class_project/data605/Spring2026/projects/Benchmarking_Data_Science_Agents/README.md
@@ -0,0 +1,53 @@
+# Benchmarking Data Science Agents: A Comparative Study
+
+## Team Members
+- Venkata Sripada
+- Amulya Grace Bandlamudi
+
+## Project Overview
+This project studies and compares publicly available data science benchmarks for AI agents, including DataSciBench, DSBench, MLE-Bench, GAIA, and SWE-bench. The goal is to understand how different benchmarks evaluate data science capabilities such as data analysis, machine learning engineering, and multi-step reasoning.
+
+We will collect leaderboard data from these benchmarks, analyze benchmark design (task type, evaluation metric, and difficulty), and compare agent performance across benchmarks. The project will produce a reproducible data analysis pipeline, visualizations, and a comparative research report.
+
+## Objectives
+- Collect benchmark leaderboard data
+- Analyze benchmark structure and evaluation metrics
+- Compare performance across different AI agents
+- Perform clustering and correlation analysis on benchmark results
+- Visualize benchmark differences and capability gaps
+- Produce a final research report
+
+## Tools and Technologies
+- Python
+- Pandas
+- NumPy
+- Matplotlib / Seaborn
+- Scikit-learn
+- Jupyter Notebook
+- (Optional) PySpark for large-scale data processing
+
+## Methodology
+1. Data collection from benchmark leaderboards
+2. Data cleaning and dataset integration
+3. Exploratory data analysis
+4. Statistical analysis and correlation analysis      
+5. Clustering benchmarks and agent performance
+6. Visualization and interpretation of results
+7. Final report and documentation
+
+## Expected Outcome
+The project aims to identify differences between data science benchmarks and determine which benchmarks better represent real-world data science tasks. We expect to find capability gaps where some agents perform well on coding tasks but struggle with multi-step reasoning and workflow-based tasks.
+
+## Repository Structure
+
+```
+Benchmarking_Data_Science_Agents/
+│
+├── README.md          # Project overview and instructions
+├── data/              # Collected benchmark datasets
+├── notebooks/         # Jupyter notebooks for analysis
+├── src/               # Python scripts for data processing and analysis
+├── results/           # Output files, plots, and tables
+├── report/            # Final report and documentation
+└── references/        # Papers and benchmark documentation
+```

From c85a761aed473648a8d3ca58dcc9c98ae2d6eabc Mon Sep 17 00:00:00 2001
From: vsripada2423 <vsripada@umd.edu>
Date: Tue, 31 Mar 2026 23:25:26 -0400
Subject: [PATCH 18/28] Add requirements.txt from project template

---
 .../Benchmarking_Data_Science_Agents/requirements.txt         | 4 ++++
 1 file changed, 4 insertions(+)
 create mode 100644 class_project/data605/Spring2026/projects/Benchmarking_Data_Science_Agents/requirements.txt

diff --git a/class_project/data605/Spring2026/projects/Benchmarking_Data_Science_Agents/requirements.txt b/class_project/data605/Spring2026/projects/Benchmarking_Data_Science_Agents/requirements.txt
new file mode 100644
index 000000000..49aca3901
--- /dev/null
+++ b/class_project/data605/Spring2026/projects/Benchmarking_Data_Science_Agents/requirements.txt
@@ -0,0 +1,4 @@
+matplotlib
+numpy
+pandas
+seaborn

From 14bf1413b0a0b6d515127a6e688379d60b8834b2 Mon Sep 17 00:00:00 2001
From: vsripada2423 <vsripada@umd.edu>
Date: Tue, 31 Mar 2026 23:26:39 -0400
Subject: [PATCH 19/28] Add Dockerfile from project template

---
 .../Dockerfile                                | 30 +++++++++++++++++++
 1 file changed, 30 insertions(+)
 create mode 100644 class_project/data605/Spring2026/projects/Benchmarking_Data_Science_Agents/Dockerfile

diff --git a/class_project/data605/Spring2026/projects/Benchmarking_Data_Science_Agents/Dockerfile b/class_project/data605/Spring2026/projects/Benchmarking_Data_Science_Agents/Dockerfile
new file mode 100644
index 000000000..f5c02c562
--- /dev/null
+++ b/class_project/data605/Spring2026/projects/Benchmarking_Data_Science_Agents/Dockerfile
@@ -0,0 +1,30 @@
+# Use Python 3.12 slim (already has Python and pip).
+FROM python:3.12-slim
+
+# Avoid interactive prompts during apt operations.
+ENV DEBIAN_FRONTEND=noninteractive
+
+# Install CA certificates (needed for HTTPS).
+RUN apt-get update && apt-get install -y \
+    ca-certificates \
+    && rm -rf /var/lib/apt/lists/*
+
+# Install project specific packages.
+RUN mkdir -p /install
+COPY requirements.txt /install/requirements.txt
+RUN pip install --upgrade pip && \
+    pip install --no-cache-dir jupyterlab jupyterlab_vim jupytext -r /install/requirements.txt
+
+# Config.
+COPY etc_sudoers /install/
+COPY etc_sudoers /etc/sudoers
+COPY bashrc /root/.bashrc
+
+# Report package versions.
+COPY version.sh /install/
+RUN /install/version.sh 2>&1 | tee version.log
+
+# Jupyter.
+EXPOSE 8888
+
+CMD ["/bin/bash"]

From f8456132829bb721f374bf131d050d3c8e4ea19f Mon Sep 17 00:00:00 2001
From: vsripada2423 <vsripada@umd.edu>
Date: Tue, 31 Mar 2026 23:28:13 -0400
Subject: [PATCH 20/28]  Add template_utils.py from project template

---
 .../template_utils.py                         | 72 +++++++++++++++++++
 1 file changed, 72 insertions(+)
 create mode 100644 class_project/data605/Spring2026/projects/Benchmarking_Data_Science_Agents/template_utils.py

diff --git a/class_project/data605/Spring2026/projects/Benchmarking_Data_Science_Agents/template_utils.py b/class_project/data605/Spring2026/projects/Benchmarking_Data_Science_Agents/template_utils.py
new file mode 100644
index 000000000..f8916102e
--- /dev/null
+++ b/class_project/data605/Spring2026/projects/Benchmarking_Data_Science_Agents/template_utils.py
@@ -0,0 +1,72 @@
+"""
+template_utils.py
+
+This file contains utility functions that support the tutorial notebooks.
+
+- Notebooks should call these functions instead of writing raw logic inline.
+- This helps keep the notebooks clean, modular, and easier to debug.
+- Students should implement functions here for data preprocessing,
+  model setup, evaluation, or any reusable logic.
+
+Import as:
+
+import class_project.project_template.template_utils as cpptteut
+"""
+
+import pandas as pd
+import logging
+from sklearn.model_selection import train_test_split
+from pycaret.classification import compare_models
+
+# -----------------------------------------------------------------------------
+# Logging
+# -----------------------------------------------------------------------------
+
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+
+# -----------------------------------------------------------------------------
+# Example 1: Split the dataset into train and test sets
+# -----------------------------------------------------------------------------
+
+
+def split_data(df: pd.DataFrame, target_column: str, test_size: float = 0.2):
+    """
+    Split the dataset into training and testing sets.
+
+    :param df: full dataset
+    :param target_column: name of the target column
+    :param test_size: proportion of test data (default = 0.2)
+
+    :return: X_train, X_test, y_train, y_test
+    """
+    logger.info("Splitting data into train and test sets")
+    X = df.drop(columns=[target_column])
+    y = df[target_column]
+    return train_test_split(X, y, test_size=test_size, random_state=42)
+
+
+# -----------------------------------------------------------------------------
+# Example 2: PyCaret classification pipeline
+# -----------------------------------------------------------------------------
+
+
+def run_pycaret_classification(
+    df: pd.DataFrame, target_column: str
+) -> pd.DataFrame:
+    """
+    Run a basic PyCaret classification experiment.
+
+    :param df: dataset containing features and target
+    :param target_column: name of the target column
+
+    :return: comparison of top-performing models
+    """
+    logger.info("Initializing PyCaret classification setup")
+    ...
+
+    logger.info("Comparing models")
+    results = compare_models()
+    ...
+
+    return results

From 487e0f5755d3bde6b96825dddcaed0bed1cf06f9 Mon Sep 17 00:00:00 2001
From: vsripada2423 <vsripada@umd.edu>
Date: Tue, 31 Mar 2026 23:30:13 -0400
Subject: [PATCH 21/28] Add version.sh from project template

---
 .../version.sh                                | 28 +++++++++++++++++++
 1 file changed, 28 insertions(+)
 create mode 100644 class_project/data605/Spring2026/projects/Benchmarking_Data_Science_Agents/version.sh

diff --git a/class_project/data605/Spring2026/projects/Benchmarking_Data_Science_Agents/version.sh b/class_project/data605/Spring2026/projects/Benchmarking_Data_Science_Agents/version.sh
new file mode 100644
index 000000000..c46ed254c
--- /dev/null
+++ b/class_project/data605/Spring2026/projects/Benchmarking_Data_Science_Agents/version.sh
@@ -0,0 +1,28 @@
+#!/bin/bash
+# """
+# Display versions of installed tools and packages.
+#
+# This script prints version information for Python, pip, Jupyter, and all
+# installed Python packages. Used for debugging and documentation purposes
+# to verify the Docker container environment setup.
+# """
+
+# Display Python 3 version.
+echo "# Python3"
+python3 --version
+
+# Display pip version.
+echo "# pip3"
+pip3 --version
+
+# Display Jupyter version.
+echo "# jupyter"
+jupyter --version
+
+# List all installed Python packages and their versions.
+echo "# Python packages"
+pip3 list
+
+# Template for adding additional tool versions.
+# echo "# mongo"
+# mongod --version

From 6adc237ba482f17b5ce669724d4bd854d3ea0481 Mon Sep 17 00:00:00 2001
From: vsripada2423 <vsripada@umd.edu>
Date: Tue, 31 Mar 2026 23:34:30 -0400
Subject: [PATCH 22/28] Add template.example.py from project template

---
 .../template.example.py                       | 125 ++++++++++++++++++
 1 file changed, 125 insertions(+)
 create mode 100644 class_project/data605/Spring2026/projects/Benchmarking_Data_Science_Agents/template.example.py

diff --git a/class_project/data605/Spring2026/projects/Benchmarking_Data_Science_Agents/template.example.py b/class_project/data605/Spring2026/projects/Benchmarking_Data_Science_Agents/template.example.py
new file mode 100644
index 000000000..b091b1369
--- /dev/null
+++ b/class_project/data605/Spring2026/projects/Benchmarking_Data_Science_Agents/template.example.py
@@ -0,0 +1,125 @@
+---
+# jupyter:
+#   jupytext:
+#     formats: ipynb,py:percent
+#     text_representation:
+#       extension: .py
+#       format_name: percent
+#       format_version: '1.3'
+#       jupytext_version: 1.19.0
+#   kernelspec:
+#     display_name: Python 3 (ipykernel)
+#     language: python
+#     name: python3
+# ---
+
+# %% [markdown]
+# # Template Example Notebook
+#
+# This is a template notebook. The first heading should be the title of what notebook is about. For example, if it is a project on neo4j tutorial the heading should be `Project Title`.
+#
+# - Add description of what the notebook does.
+# - Point to references, e.g. (neo4j.example.md)
+# - Add citations.
+# - Keep the notebook flow clear.
+# - Comments should be imperative and have a period at the end.
+# - Your code should be well commented.
+#
+# The name of this notebook should in the following format:
+# - if the notebook is exploring `pycaret API`, then it is `pycaret.example.ipynb`
+#
+# Follow the reference to write notebooks in a clear manner: https://github.com/causify-ai/helpers/blob/master/docs/coding/all.jupyter_notebook.how_to_guide.md
+
+# %%
+# %load_ext autoreload
+# %autoreload 2
+# %matplotlib inline
+
+# %%
+import logging
+# Import libraries in this section.
+# Avoid imports like import *, from ... import ..., from ... import *, etc.
+
+import helpers.hdbg as hdbg
+import helpers.hnotebook as hnotebo
+
+# %%
+hdbg.init_logger(verbosity=logging.INFO)
+
+_LOG = logging.getLogger(__name__)
+
+hnotebo.config_notebook()
+
+
+# %% [markdown]
+# ## Make the notebook flow clear
+# Each notebook needs to follow a clear and logical flow, e.g:
+# - Load data
+# - Compute stats
+# - Clean data
+# - Compute stats
+# - Do analysis
+# - Show results
+#
+#
+#
+#
+
+
+# #############################################################################
+# Template
+# #############################################################################
+
+
+# %%
+class Template:
+    """
+    Brief imperative description of what the class does in one line, if needed.
+    """
+
+    def __init__(self):
+        pass
+
+    def method1(self, arg1: int) -> None:
+        """
+        Brief imperative description of what the method does in one line.
+
+        You can elaborate more in the method docstring in this section, for e.g. explaining
+        the formula/algorithm. Every method/function should have a docstring, typehints and include the
+        parameters and return as follows:
+
+        :param arg1: description of arg1
+        :return: description of return
+        """
+        # Code bloks go here.
+        # Make sure to include comments to explain what the code is doing.
+        # No empty lines between code blocks.
+        pass
+
+
+def template_function(arg1: int) -> None:
+    """
+    Brief imperative description of what the function does in one line.
+
+    You can elaborate more in the function docstring in this section, for e.g. explaining
+    the formula/algorithm. Every function should have a docstring, typehints and include the
+    parameters and return as follows:
+
+    :param arg1: description of arg1
+    :return: description of return
+    """
+    # Code bloks go here.
+    # Make sure to include comments to explain what the code is doing.
+    # No empty lines between code blocks.
+    pass
+
+
+# %% [markdown]
+# ## The flow should be highlighted using headings in markdown
+# ```
+# # Level 1
+# ## Level 2
+# ### Level 3
+# ```
+
+# %%

From 671ec45f5a66c026ae9d13f6806b4f3dcd7c614c Mon Sep 17 00:00:00 2001
From: vsripada2423 <vsripada@umd.edu>
Date: Tue, 31 Mar 2026 23:37:44 -0400
Subject: [PATCH 23/28] Delete
 class_project/data605/Spring2026/projects/version.sh

---
 .../data605/Spring2026/projects/version.sh    | 28 -------------------
 1 file changed, 28 deletions(-)
 delete mode 100644 class_project/data605/Spring2026/projects/version.sh

diff --git a/class_project/data605/Spring2026/projects/version.sh b/class_project/data605/Spring2026/projects/version.sh
deleted file mode 100644
index c46ed254c..000000000
--- a/class_project/data605/Spring2026/projects/version.sh
+++ /dev/null
@@ -1,28 +0,0 @@
-#!/bin/bash
-# """
-# Display versions of installed tools and packages.
-#
-# This script prints version information for Python, pip, Jupyter, and all
-# installed Python packages. Used for debugging and documentation purposes
-# to verify the Docker container environment setup.
-# """
-
-# Display Python 3 version.
-echo "# Python3"
-python3 --version
-
-# Display pip version.
-echo "# pip3"
-pip3 --version
-
-# Display Jupyter version.
-echo "# jupyter"
-jupyter --version
-
-# List all installed Python packages and their versions.
-echo "# Python packages"
-pip3 list
-
-# Template for adding additional tool versions.
-# echo "# mongo"
-# mongod --version

From 88aa353f621b5c78070c76fadb39af3a363f2877 Mon Sep 17 00:00:00 2001
From: vsripada2423 <vsripada@umd.edu>
Date: Tue, 31 Mar 2026 23:40:51 -0400
Subject: [PATCH 24/28] Delete
 class_project/data605/Spring2026/projects/Dockerfile

---
 .../data605/Spring2026/projects/Dockerfile    | 30 -------------------
 1 file changed, 30 deletions(-)
 delete mode 100644 class_project/data605/Spring2026/projects/Dockerfile

diff --git a/class_project/data605/Spring2026/projects/Dockerfile b/class_project/data605/Spring2026/projects/Dockerfile
deleted file mode 100644
index f5c02c562..000000000
--- a/class_project/data605/Spring2026/projects/Dockerfile
+++ /dev/null
@@ -1,30 +0,0 @@
-# Use Python 3.12 slim (already has Python and pip).
-FROM python:3.12-slim
-
-# Avoid interactive prompts during apt operations.
-ENV DEBIAN_FRONTEND=noninteractive
-
-# Install CA certificates (needed for HTTPS).
-RUN apt-get update && apt-get install -y \
-    ca-certificates \
-    && rm -rf /var/lib/apt/lists/*
-
-# Install project specific packages.
-RUN mkdir -p /install
-COPY requirements.txt /install/requirements.txt
-RUN pip install --upgrade pip && \
-    pip install --no-cache-dir jupyterlab jupyterlab_vim jupytext -r /install/requirements.txt
-
-# Config.
-COPY etc_sudoers /install/
-COPY etc_sudoers /etc/sudoers
-COPY bashrc /root/.bashrc
-
-# Report package versions.
-COPY version.sh /install/
-RUN /install/version.sh 2>&1 | tee version.log
-
-# Jupyter.
-EXPOSE 8888
-
-CMD ["/bin/bash"]

From bb1765227cb77ecd87c05655d6638b8893cb12c0 Mon Sep 17 00:00:00 2001
From: vsripada2423 <vsripada@umd.edu>
Date: Tue, 31 Mar 2026 23:55:18 -0400
Subject: [PATCH 25/28] Delete
 class_project/data605/Spring2026/projects/README.md

---
 .../data605/Spring2026/projects/README.md     | 53 -------------------
 1 file changed, 53 deletions(-)
 delete mode 100644 class_project/data605/Spring2026/projects/README.md

diff --git a/class_project/data605/Spring2026/projects/README.md b/class_project/data605/Spring2026/projects/README.md
deleted file mode 100644
index f2f9aa91f..000000000
--- a/class_project/data605/Spring2026/projects/README.md
+++ /dev/null
@@ -1,53 +0,0 @@
-# Benchmarking Data Science Agents: A Comparative Study
-
-## Team Members
-- Venkata Sripada
-- Amulya Grace Bandlamudi
-
-## Project Overview
-This project studies and compares publicly available data science benchmarks for AI agents, including DataSciBench, DSBench, MLE-Bench, GAIA, and SWE-bench. The goal is to understand how different benchmarks evaluate data science capabilities such as data analysis, machine learning engineering, and multi-step reasoning.
-
-We will collect leaderboard data from these benchmarks, analyze benchmark design (task type, evaluation metric, and difficulty), and compare agent performance across benchmarks. The project will produce a reproducible data analysis pipeline, visualizations, and a comparative research report.
-
-## Objectives
-- Collect benchmark leaderboard data
-- Analyze benchmark structure and evaluation metrics
-- Compare performance across different AI agents
-- Perform clustering and correlation analysis on benchmark results
-- Visualize benchmark differences and capability gaps
-- Produce a final research report
-
-## Tools and Technologies
-- Python
-- Pandas
-- NumPy
-- Matplotlib / Seaborn
-- Scikit-learn
-- Jupyter Notebook
-- (Optional) PySpark for large-scale data processing
-
-## Methodology
-1. Data collection from benchmark leaderboards
-2. Data cleaning and dataset integration
-3. Exploratory data analysis
-4. Statistical analysis and correlation analysis      
-5. Clustering benchmarks and agent performance
-6. Visualization and interpretation of results
-7. Final report and documentation
-
-## Expected Outcome
-The project aims to identify differences between data science benchmarks and determine which benchmarks better represent real-world data science tasks. We expect to find capability gaps where some agents perform well on coding tasks but struggle with multi-step reasoning and workflow-based tasks.
-
-## Repository Structure
-
-```
-Benchmarking_Data_Science_Agents/
-│
-├── README.md          # Project overview and instructions
-├── data/              # Collected benchmark datasets
-├── notebooks/         # Jupyter notebooks for analysis
-├── src/               # Python scripts for data processing and analysis
-├── results/           # Output files, plots, and tables
-├── report/            # Final report and documentation
-└── references/        # Papers and benchmark documentation
-```

From 7384c224f3a1f649552148e302582d957cb43a51 Mon Sep 17 00:00:00 2001
From: vsripada2423 <vsripada@umd.edu>
Date: Tue, 31 Mar 2026 23:55:55 -0400
Subject: [PATCH 26/28] Delete
 class_project/data605/Spring2026/projects/requirements.txt

---
 class_project/data605/Spring2026/projects/requirements.txt | 4 ----
 1 file changed, 4 deletions(-)
 delete mode 100644 class_project/data605/Spring2026/projects/requirements.txt

diff --git a/class_project/data605/Spring2026/projects/requirements.txt b/class_project/data605/Spring2026/projects/requirements.txt
deleted file mode 100644
index 49aca3901..000000000
--- a/class_project/data605/Spring2026/projects/requirements.txt
+++ /dev/null
@@ -1,4 +0,0 @@
-matplotlib
-numpy
-pandas
-seaborn

From 88a3e4c88a3cc971b58857e7564443268a280d0d Mon Sep 17 00:00:00 2001
From: vsripada2423 <vsripada@umd.edu>
Date: Tue, 31 Mar 2026 23:56:17 -0400
Subject: [PATCH 27/28] Delete
 class_project/data605/Spring2026/projects/template.example.py

---
 .../Spring2026/projects/template.example.py   | 125 ------------------
 1 file changed, 125 deletions(-)
 delete mode 100644 class_project/data605/Spring2026/projects/template.example.py

diff --git a/class_project/data605/Spring2026/projects/template.example.py b/class_project/data605/Spring2026/projects/template.example.py
deleted file mode 100644
index b091b1369..000000000
--- a/class_project/data605/Spring2026/projects/template.example.py
+++ /dev/null
@@ -1,125 +0,0 @@
----
-# jupyter:
-#   jupytext:
-#     formats: ipynb,py:percent
-#     text_representation:
-#       extension: .py
-#       format_name: percent
-#       format_version: '1.3'
-#       jupytext_version: 1.19.0
-#   kernelspec:
-#     display_name: Python 3 (ipykernel)
-#     language: python
-#     name: python3
-# ---
-
-# %% [markdown]
-# # Template Example Notebook
-#
-# This is a template notebook. The first heading should be the title of what notebook is about. For example, if it is a project on neo4j tutorial the heading should be `Project Title`.
-#
-# - Add description of what the notebook does.
-# - Point to references, e.g. (neo4j.example.md)
-# - Add citations.
-# - Keep the notebook flow clear.
-# - Comments should be imperative and have a period at the end.
-# - Your code should be well commented.
-#
-# The name of this notebook should in the following format:
-# - if the notebook is exploring `pycaret API`, then it is `pycaret.example.ipynb`
-#
-# Follow the reference to write notebooks in a clear manner: https://github.com/causify-ai/helpers/blob/master/docs/coding/all.jupyter_notebook.how_to_guide.md
-
-# %%
-# %load_ext autoreload
-# %autoreload 2
-# %matplotlib inline
-
-# %%
-import logging
-# Import libraries in this section.
-# Avoid imports like import *, from ... import ..., from ... import *, etc.
-
-import helpers.hdbg as hdbg
-import helpers.hnotebook as hnotebo
-
-# %%
-hdbg.init_logger(verbosity=logging.INFO)
-
-_LOG = logging.getLogger(__name__)
-
-hnotebo.config_notebook()
-
-
-# %% [markdown]
-# ## Make the notebook flow clear
-# Each notebook needs to follow a clear and logical flow, e.g:
-# - Load data
-# - Compute stats
-# - Clean data
-# - Compute stats
-# - Do analysis
-# - Show results
-#
-#
-#
-#
-
-
-# #############################################################################
-# Template
-# #############################################################################
-
-
-# %%
-class Template:
-    """
-    Brief imperative description of what the class does in one line, if needed.
-    """
-
-    def __init__(self):
-        pass
-
-    def method1(self, arg1: int) -> None:
-        """
-        Brief imperative description of what the method does in one line.
-
-        You can elaborate more in the method docstring in this section, for e.g. explaining
-        the formula/algorithm. Every method/function should have a docstring, typehints and include the
-        parameters and return as follows:
-
-        :param arg1: description of arg1
-        :return: description of return
-        """
-        # Code bloks go here.
-        # Make sure to include comments to explain what the code is doing.
-        # No empty lines between code blocks.
-        pass
-
-
-def template_function(arg1: int) -> None:
-    """
-    Brief imperative description of what the function does in one line.
-
-    You can elaborate more in the function docstring in this section, for e.g. explaining
-    the formula/algorithm. Every function should have a docstring, typehints and include the
-    parameters and return as follows:
-
-    :param arg1: description of arg1
-    :return: description of return
-    """
-    # Code bloks go here.
-    # Make sure to include comments to explain what the code is doing.
-    # No empty lines between code blocks.
-    pass
-
-
-# %% [markdown]
-# ## The flow should be highlighted using headings in markdown
-# ```
-# # Level 1
-# ## Level 2
-# ### Level 3
-# ```
-
-# %%

From 4501962419e9aa6b8ca0d0ae25549d5d9f10bc14 Mon Sep 17 00:00:00 2001
From: vsripada2423 <vsripada@umd.edu>
Date: Wed, 1 Apr 2026 00:03:46 -0400
Subject: [PATCH 28/28] Delete
 class_project/data605/Spring2026/projects/template_utils.py

---
 .../Spring2026/projects/template_utils.py     | 72 -------------------
 1 file changed, 72 deletions(-)
 delete mode 100644 class_project/data605/Spring2026/projects/template_utils.py

diff --git a/class_project/data605/Spring2026/projects/template_utils.py b/class_project/data605/Spring2026/projects/template_utils.py
deleted file mode 100644
index f8916102e..000000000
--- a/class_project/data605/Spring2026/projects/template_utils.py
+++ /dev/null
@@ -1,72 +0,0 @@
-"""
-template_utils.py
-
-This file contains utility functions that support the tutorial notebooks.
-
-- Notebooks should call these functions instead of writing raw logic inline.
-- This helps keep the notebooks clean, modular, and easier to debug.
-- Students should implement functions here for data preprocessing,
-  model setup, evaluation, or any reusable logic.
-
-Import as:
-
-import class_project.project_template.template_utils as cpptteut
-"""
-
-import pandas as pd
-import logging
-from sklearn.model_selection import train_test_split
-from pycaret.classification import compare_models
-
-# -----------------------------------------------------------------------------
-# Logging
-# -----------------------------------------------------------------------------
-
-logging.basicConfig(level=logging.INFO)
-logger = logging.getLogger(__name__)
-
-# -----------------------------------------------------------------------------
-# Example 1: Split the dataset into train and test sets
-# -----------------------------------------------------------------------------
-
-
-def split_data(df: pd.DataFrame, target_column: str, test_size: float = 0.2):
-    """
-    Split the dataset into training and testing sets.
-
-    :param df: full dataset
-    :param target_column: name of the target column
-    :param test_size: proportion of test data (default = 0.2)
-
-    :return: X_train, X_test, y_train, y_test
-    """
-    logger.info("Splitting data into train and test sets")
-    X = df.drop(columns=[target_column])
-    y = df[target_column]
-    return train_test_split(X, y, test_size=test_size, random_state=42)
-
-
-# -----------------------------------------------------------------------------
-# Example 2: PyCaret classification pipeline
-# -----------------------------------------------------------------------------
-
-
-def run_pycaret_classification(
-    df: pd.DataFrame, target_column: str
-) -> pd.DataFrame:
-    """
-    Run a basic PyCaret classification experiment.
-
-    :param df: dataset containing features and target
-    :param target_column: name of the target column
-
-    :return: comparison of top-performing models
-    """
-    logger.info("Initializing PyCaret classification setup")
-    ...
-
-    logger.info("Comparing models")
-    results = compare_models()
-    ...
-
-    return results