From 590bd02167e8a82609a96e25e52173ac367dabe5 Mon Sep 17 00:00:00 2001 From: vsripada2423 Date: Tue, 31 Mar 2026 21:23:39 -0400 Subject: [PATCH 01/28] Create README.md --- .../README.md | 54 +++++++++++++++++++ 1 file changed, 54 insertions(+) create mode 100644 class_project/data605/Spring2026/Benchmarking_Data_Science_Agents/README.md diff --git a/class_project/data605/Spring2026/Benchmarking_Data_Science_Agents/README.md b/class_project/data605/Spring2026/Benchmarking_Data_Science_Agents/README.md new file mode 100644 index 000000000..00592bf95 --- /dev/null +++ b/class_project/data605/Spring2026/Benchmarking_Data_Science_Agents/README.md @@ -0,0 +1,54 @@ +# Benchmarking Data Science Agents: A Comparative Study + +## Team Members +- Venkata Sripada +- Amulya Grace Bandlamudi + +## Project Overview +This project studies and compares publicly available data science benchmarks for AI agents, including DataSciBench, DSBench, MLE-Bench, GAIA, and SWE-bench. The goal is to understand how different benchmarks evaluate data science capabilities such as data analysis, machine learning engineering, and multi-step reasoning. + +We will collect leaderboard data from these benchmarks, analyze benchmark design (task type, evaluation metric, and difficulty), and compare agent performance across benchmarks. The project will produce a reproducible data analysis pipeline, visualizations, and a comparative research report. + +## Objectives +- Collect benchmark leaderboard data +- Analyze benchmark structure and evaluation metrics +- Compare performance across different AI agents +- Perform clustering and correlation analysis on benchmark results +- Visualize benchmark differences and capability gaps +- Produce a final research report + +## Tools and Technologies +- Python +- Pandas +- NumPy +- Matplotlib / Seaborn +- Scikit-learn +- Jupyter Notebook +- (Optional) PySpark for large-scale data processing + +## Methodology +1. Data collection from benchmark leaderboards +2. Data cleaning and dataset integration +3. Exploratory data analysis +4. Statistical analysis and correlation analysis +5. Clustering benchmarks and agent performance +6. Visualization and interpretation of results +7. Final report and documentation + +## Expected Outcome +The project aims to identify differences between data science benchmarks and determine which benchmarks better represent real-world data science tasks. We expect to find capability gaps where some agents perform well on coding tasks but struggle with multi-step reasoning and workflow-based tasks. + +## Repository Structure +## Repository Structure + +``` +Benchmarking_Data_Science_Agents/ +│ +├── README.md # Project overview and instructions +├── data/ # Collected benchmark datasets +├── notebooks/ # Jupyter notebooks for analysis +├── src/ # Python scripts for data processing and analysis +├── results/ # Output files, plots, and tables +├── report/ # Final report and documentation +└── references/ # Papers and benchmark documentation +``` From 8192bc991d3c8cc7fc49cbc4ad79c252951ab803 Mon Sep 17 00:00:00 2001 From: vsripada2423 Date: Tue, 31 Mar 2026 21:24:17 -0400 Subject: [PATCH 02/28] Update README.md --- .../Spring2026/Benchmarking_Data_Science_Agents/README.md | 1 - 1 file changed, 1 deletion(-) diff --git a/class_project/data605/Spring2026/Benchmarking_Data_Science_Agents/README.md b/class_project/data605/Spring2026/Benchmarking_Data_Science_Agents/README.md index 00592bf95..f2f9aa91f 100644 --- a/class_project/data605/Spring2026/Benchmarking_Data_Science_Agents/README.md +++ b/class_project/data605/Spring2026/Benchmarking_Data_Science_Agents/README.md @@ -38,7 +38,6 @@ We will collect leaderboard data from these benchmarks, analyze benchmark design ## Expected Outcome The project aims to identify differences between data science benchmarks and determine which benchmarks better represent real-world data science tasks. We expect to find capability gaps where some agents perform well on coding tasks but struggle with multi-step reasoning and workflow-based tasks. -## Repository Structure ## Repository Structure ``` From ba2bd8d68488dddf57c435f2071c54b2a59c92c0 Mon Sep 17 00:00:00 2001 From: vsripada2423 Date: Tue, 31 Mar 2026 21:55:55 -0400 Subject: [PATCH 03/28] Add requirements.txt from project template --- .../Benchmarking_Data_Science_Agents/requirements.txt | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 class_project/data605/Spring2026/Benchmarking_Data_Science_Agents/requirements.txt diff --git a/class_project/data605/Spring2026/Benchmarking_Data_Science_Agents/requirements.txt b/class_project/data605/Spring2026/Benchmarking_Data_Science_Agents/requirements.txt new file mode 100644 index 000000000..49aca3901 --- /dev/null +++ b/class_project/data605/Spring2026/Benchmarking_Data_Science_Agents/requirements.txt @@ -0,0 +1,4 @@ +matplotlib +numpy +pandas +seaborn From 91276c05f99a70e86043a2fbaec46b2da78ec1fa Mon Sep 17 00:00:00 2001 From: vsripada2423 Date: Tue, 31 Mar 2026 21:57:50 -0400 Subject: [PATCH 04/28] Add Dockerfile from project template --- .../Dockerfile | 30 +++++++++++++++++++ 1 file changed, 30 insertions(+) create mode 100644 class_project/data605/Spring2026/Benchmarking_Data_Science_Agents/Dockerfile diff --git a/class_project/data605/Spring2026/Benchmarking_Data_Science_Agents/Dockerfile b/class_project/data605/Spring2026/Benchmarking_Data_Science_Agents/Dockerfile new file mode 100644 index 000000000..f5c02c562 --- /dev/null +++ b/class_project/data605/Spring2026/Benchmarking_Data_Science_Agents/Dockerfile @@ -0,0 +1,30 @@ +# Use Python 3.12 slim (already has Python and pip). +FROM python:3.12-slim + +# Avoid interactive prompts during apt operations. +ENV DEBIAN_FRONTEND=noninteractive + +# Install CA certificates (needed for HTTPS). +RUN apt-get update && apt-get install -y \ + ca-certificates \ + && rm -rf /var/lib/apt/lists/* + +# Install project specific packages. +RUN mkdir -p /install +COPY requirements.txt /install/requirements.txt +RUN pip install --upgrade pip && \ + pip install --no-cache-dir jupyterlab jupyterlab_vim jupytext -r /install/requirements.txt + +# Config. +COPY etc_sudoers /install/ +COPY etc_sudoers /etc/sudoers +COPY bashrc /root/.bashrc + +# Report package versions. +COPY version.sh /install/ +RUN /install/version.sh 2>&1 | tee version.log + +# Jupyter. +EXPOSE 8888 + +CMD ["/bin/bash"] From e5f40625a99bcf71d4b7260b96607a6b75ead5ea Mon Sep 17 00:00:00 2001 From: vsripada2423 Date: Tue, 31 Mar 2026 22:03:02 -0400 Subject: [PATCH 05/28] Add docker_clean.sh from project template --- .../docker_clean.sh | 26 +++++++++++++++++++ 1 file changed, 26 insertions(+) create mode 100644 class_project/data605/Spring2026/Benchmarking_Data_Science_Agents/docker_clean.sh diff --git a/class_project/data605/Spring2026/Benchmarking_Data_Science_Agents/docker_clean.sh b/class_project/data605/Spring2026/Benchmarking_Data_Science_Agents/docker_clean.sh new file mode 100644 index 000000000..7e40839ae --- /dev/null +++ b/class_project/data605/Spring2026/Benchmarking_Data_Science_Agents/docker_clean.sh @@ -0,0 +1,26 @@ +#!/bin/bash +# """ +# Remove Docker container image for the project. +# +# This script cleans up Docker images by removing the container image +# matching the project configuration. Useful for freeing disk space or +# ensuring a fresh build. +# """ + +# Exit immediately if any command exits with a non-zero status. +set -e + +# Import the utility functions. +GIT_ROOT=$(git rev-parse --show-toplevel) +source $GIT_ROOT/class_project/project_template/utils.sh + +# Parse default args (-h, -v) and enable set -x if -v is passed. +parse_default_args "$@" + +# Load Docker configuration variables for this script. +get_docker_vars_script ${BASH_SOURCE[0]} +source $DOCKER_NAME +print_docker_vars + +# Remove the container image. +remove_container_image From d737a7832c49034b25518e2d693992e25d1393ab Mon Sep 17 00:00:00 2001 From: vsripada2423 Date: Tue, 31 Mar 2026 22:05:18 -0400 Subject: [PATCH 06/28] Add template_utils.py from project template --- .../template_utils.py | 72 +++++++++++++++++++ 1 file changed, 72 insertions(+) create mode 100644 class_project/data605/Spring2026/Benchmarking_Data_Science_Agents/template_utils.py diff --git a/class_project/data605/Spring2026/Benchmarking_Data_Science_Agents/template_utils.py b/class_project/data605/Spring2026/Benchmarking_Data_Science_Agents/template_utils.py new file mode 100644 index 000000000..f8916102e --- /dev/null +++ b/class_project/data605/Spring2026/Benchmarking_Data_Science_Agents/template_utils.py @@ -0,0 +1,72 @@ +""" +template_utils.py + +This file contains utility functions that support the tutorial notebooks. + +- Notebooks should call these functions instead of writing raw logic inline. +- This helps keep the notebooks clean, modular, and easier to debug. +- Students should implement functions here for data preprocessing, + model setup, evaluation, or any reusable logic. + +Import as: + +import class_project.project_template.template_utils as cpptteut +""" + +import pandas as pd +import logging +from sklearn.model_selection import train_test_split +from pycaret.classification import compare_models + +# ----------------------------------------------------------------------------- +# Logging +# ----------------------------------------------------------------------------- + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + +# ----------------------------------------------------------------------------- +# Example 1: Split the dataset into train and test sets +# ----------------------------------------------------------------------------- + + +def split_data(df: pd.DataFrame, target_column: str, test_size: float = 0.2): + """ + Split the dataset into training and testing sets. + + :param df: full dataset + :param target_column: name of the target column + :param test_size: proportion of test data (default = 0.2) + + :return: X_train, X_test, y_train, y_test + """ + logger.info("Splitting data into train and test sets") + X = df.drop(columns=[target_column]) + y = df[target_column] + return train_test_split(X, y, test_size=test_size, random_state=42) + + +# ----------------------------------------------------------------------------- +# Example 2: PyCaret classification pipeline +# ----------------------------------------------------------------------------- + + +def run_pycaret_classification( + df: pd.DataFrame, target_column: str +) -> pd.DataFrame: + """ + Run a basic PyCaret classification experiment. + + :param df: dataset containing features and target + :param target_column: name of the target column + + :return: comparison of top-performing models + """ + logger.info("Initializing PyCaret classification setup") + ... + + logger.info("Comparing models") + results = compare_models() + ... + + return results From 1ebefc665e8e54d352c07475a6fd0702fdbc26cb Mon Sep 17 00:00:00 2001 From: vsripada2423 Date: Tue, 31 Mar 2026 22:06:23 -0400 Subject: [PATCH 07/28] Add template.example.py from project template --- .../template.example.py | 125 ++++++++++++++++++ 1 file changed, 125 insertions(+) create mode 100644 class_project/data605/Spring2026/Benchmarking_Data_Science_Agents/template.example.py diff --git a/class_project/data605/Spring2026/Benchmarking_Data_Science_Agents/template.example.py b/class_project/data605/Spring2026/Benchmarking_Data_Science_Agents/template.example.py new file mode 100644 index 000000000..b091b1369 --- /dev/null +++ b/class_project/data605/Spring2026/Benchmarking_Data_Science_Agents/template.example.py @@ -0,0 +1,125 @@ +--- +# jupyter: +# jupytext: +# formats: ipynb,py:percent +# text_representation: +# extension: .py +# format_name: percent +# format_version: '1.3' +# jupytext_version: 1.19.0 +# kernelspec: +# display_name: Python 3 (ipykernel) +# language: python +# name: python3 +# --- + +# %% [markdown] +# # Template Example Notebook +# +# This is a template notebook. The first heading should be the title of what notebook is about. For example, if it is a project on neo4j tutorial the heading should be `Project Title`. +# +# - Add description of what the notebook does. +# - Point to references, e.g. (neo4j.example.md) +# - Add citations. +# - Keep the notebook flow clear. +# - Comments should be imperative and have a period at the end. +# - Your code should be well commented. +# +# The name of this notebook should in the following format: +# - if the notebook is exploring `pycaret API`, then it is `pycaret.example.ipynb` +# +# Follow the reference to write notebooks in a clear manner: https://github.com/causify-ai/helpers/blob/master/docs/coding/all.jupyter_notebook.how_to_guide.md + +# %% +# %load_ext autoreload +# %autoreload 2 +# %matplotlib inline + +# %% +import logging +# Import libraries in this section. +# Avoid imports like import *, from ... import ..., from ... import *, etc. + +import helpers.hdbg as hdbg +import helpers.hnotebook as hnotebo + +# %% +hdbg.init_logger(verbosity=logging.INFO) + +_LOG = logging.getLogger(__name__) + +hnotebo.config_notebook() + + +# %% [markdown] +# ## Make the notebook flow clear +# Each notebook needs to follow a clear and logical flow, e.g: +# - Load data +# - Compute stats +# - Clean data +# - Compute stats +# - Do analysis +# - Show results +# +# +# +# + + +# ############################################################################# +# Template +# ############################################################################# + + +# %% +class Template: + """ + Brief imperative description of what the class does in one line, if needed. + """ + + def __init__(self): + pass + + def method1(self, arg1: int) -> None: + """ + Brief imperative description of what the method does in one line. + + You can elaborate more in the method docstring in this section, for e.g. explaining + the formula/algorithm. Every method/function should have a docstring, typehints and include the + parameters and return as follows: + + :param arg1: description of arg1 + :return: description of return + """ + # Code bloks go here. + # Make sure to include comments to explain what the code is doing. + # No empty lines between code blocks. + pass + + +def template_function(arg1: int) -> None: + """ + Brief imperative description of what the function does in one line. + + You can elaborate more in the function docstring in this section, for e.g. explaining + the formula/algorithm. Every function should have a docstring, typehints and include the + parameters and return as follows: + + :param arg1: description of arg1 + :return: description of return + """ + # Code bloks go here. + # Make sure to include comments to explain what the code is doing. + # No empty lines between code blocks. + pass + + +# %% [markdown] +# ## The flow should be highlighted using headings in markdown +# ``` +# # Level 1 +# ## Level 2 +# ### Level 3 +# ``` + +# %% From 94765523ca330e5474f024c66476d95efcabb40c Mon Sep 17 00:00:00 2001 From: vsripada2423 Date: Tue, 31 Mar 2026 22:08:34 -0400 Subject: [PATCH 08/28] Add docker_jupyter.sh from project template --- .../docker_jupyter.sh | 39 +++++++++++++++++++ 1 file changed, 39 insertions(+) create mode 100644 class_project/data605/Spring2026/Benchmarking_Data_Science_Agents/docker_jupyter.sh diff --git a/class_project/data605/Spring2026/Benchmarking_Data_Science_Agents/docker_jupyter.sh b/class_project/data605/Spring2026/Benchmarking_Data_Science_Agents/docker_jupyter.sh new file mode 100644 index 000000000..1a60dfd3a --- /dev/null +++ b/class_project/data605/Spring2026/Benchmarking_Data_Science_Agents/docker_jupyter.sh @@ -0,0 +1,39 @@ +#!/bin/bash +# """ +# Execute Jupyter Lab in a Docker container. +# +# This script launches a Docker container running Jupyter Lab with +# configurable port, directory mounting, and vim bindings. It passes +# command-line options to the run_jupyter.sh script inside the container. +# +# Usage: +# > docker_jupyter.sh [options] +# """ + +# Exit immediately if any command exits with a non-zero status. +set -e + +# Import the utility functions. +GIT_ROOT=$(git rev-parse --show-toplevel) +source $GIT_ROOT/class_project/project_template/utils.sh + +# Parse command-line options and set Jupyter configuration variables. +parse_docker_jupyter_args "$@" + +# Load Docker configuration variables for this script. +get_docker_vars_script ${BASH_SOURCE[0]} +source $DOCKER_NAME +print_docker_vars + +# List available Docker images and inspect architecture. +list_and_inspect_docker_image + +# Run the Docker container with Jupyter Lab. +CMD=$(get_run_jupyter_cmd "${BASH_SOURCE[0]}" "$OLD_CMD_OPTS") +CONTAINER_NAME=$IMAGE_NAME +# Kill existing container if -f flag is set. +kill_existing_container_if_forced + +DOCKER_CMD=$(get_docker_jupyter_command) +DOCKER_CMD_OPTS=$(get_docker_jupyter_options $CONTAINER_NAME $JUPYTER_HOST_PORT $JUPYTER_USE_VIM) +run "$DOCKER_CMD $DOCKER_CMD_OPTS $FULL_IMAGE_NAME $CMD" From 39ebe17f6112f02df9ddd2b4e4ef05ac8fe544e6 Mon Sep 17 00:00:00 2001 From: vsripada2423 Date: Tue, 31 Mar 2026 22:12:41 -0400 Subject: [PATCH 09/28] Add version.sh from project template --- .../version.sh | 28 +++++++++++++++++++ 1 file changed, 28 insertions(+) create mode 100644 class_project/data605/Spring2026/Benchmarking_Data_Science_Agents/version.sh diff --git a/class_project/data605/Spring2026/Benchmarking_Data_Science_Agents/version.sh b/class_project/data605/Spring2026/Benchmarking_Data_Science_Agents/version.sh new file mode 100644 index 000000000..c46ed254c --- /dev/null +++ b/class_project/data605/Spring2026/Benchmarking_Data_Science_Agents/version.sh @@ -0,0 +1,28 @@ +#!/bin/bash +# """ +# Display versions of installed tools and packages. +# +# This script prints version information for Python, pip, Jupyter, and all +# installed Python packages. Used for debugging and documentation purposes +# to verify the Docker container environment setup. +# """ + +# Display Python 3 version. +echo "# Python3" +python3 --version + +# Display pip version. +echo "# pip3" +pip3 --version + +# Display Jupyter version. +echo "# jupyter" +jupyter --version + +# List all installed Python packages and their versions. +echo "# Python packages" +pip3 list + +# Template for adding additional tool versions. +# echo "# mongo" +# mongod --version From 99ad2c5ceba4eecc3bfe7c96fae37880da2741c0 Mon Sep 17 00:00:00 2001 From: vsripada2423 Date: Tue, 31 Mar 2026 22:20:41 -0400 Subject: [PATCH 10/28] Create README.md --- .../data605/Spring2026/projects/README.md | 53 +++++++++++++++++++ 1 file changed, 53 insertions(+) create mode 100644 class_project/data605/Spring2026/projects/README.md diff --git a/class_project/data605/Spring2026/projects/README.md b/class_project/data605/Spring2026/projects/README.md new file mode 100644 index 000000000..f2f9aa91f --- /dev/null +++ b/class_project/data605/Spring2026/projects/README.md @@ -0,0 +1,53 @@ +# Benchmarking Data Science Agents: A Comparative Study + +## Team Members +- Venkata Sripada +- Amulya Grace Bandlamudi + +## Project Overview +This project studies and compares publicly available data science benchmarks for AI agents, including DataSciBench, DSBench, MLE-Bench, GAIA, and SWE-bench. The goal is to understand how different benchmarks evaluate data science capabilities such as data analysis, machine learning engineering, and multi-step reasoning. + +We will collect leaderboard data from these benchmarks, analyze benchmark design (task type, evaluation metric, and difficulty), and compare agent performance across benchmarks. The project will produce a reproducible data analysis pipeline, visualizations, and a comparative research report. + +## Objectives +- Collect benchmark leaderboard data +- Analyze benchmark structure and evaluation metrics +- Compare performance across different AI agents +- Perform clustering and correlation analysis on benchmark results +- Visualize benchmark differences and capability gaps +- Produce a final research report + +## Tools and Technologies +- Python +- Pandas +- NumPy +- Matplotlib / Seaborn +- Scikit-learn +- Jupyter Notebook +- (Optional) PySpark for large-scale data processing + +## Methodology +1. Data collection from benchmark leaderboards +2. Data cleaning and dataset integration +3. Exploratory data analysis +4. Statistical analysis and correlation analysis +5. Clustering benchmarks and agent performance +6. Visualization and interpretation of results +7. Final report and documentation + +## Expected Outcome +The project aims to identify differences between data science benchmarks and determine which benchmarks better represent real-world data science tasks. We expect to find capability gaps where some agents perform well on coding tasks but struggle with multi-step reasoning and workflow-based tasks. + +## Repository Structure + +``` +Benchmarking_Data_Science_Agents/ +│ +├── README.md # Project overview and instructions +├── data/ # Collected benchmark datasets +├── notebooks/ # Jupyter notebooks for analysis +├── src/ # Python scripts for data processing and analysis +├── results/ # Output files, plots, and tables +├── report/ # Final report and documentation +└── references/ # Papers and benchmark documentation +``` From 4e45939cdec97e6efecd9feb9ce8b6c40c190215 Mon Sep 17 00:00:00 2001 From: vsripada2423 Date: Tue, 31 Mar 2026 22:22:19 -0400 Subject: [PATCH 11/28] Add Dockerfile from project template --- .../data605/Spring2026/projects/Dockerfile | 30 +++++++++++++++++++ 1 file changed, 30 insertions(+) create mode 100644 class_project/data605/Spring2026/projects/Dockerfile diff --git a/class_project/data605/Spring2026/projects/Dockerfile b/class_project/data605/Spring2026/projects/Dockerfile new file mode 100644 index 000000000..f5c02c562 --- /dev/null +++ b/class_project/data605/Spring2026/projects/Dockerfile @@ -0,0 +1,30 @@ +# Use Python 3.12 slim (already has Python and pip). +FROM python:3.12-slim + +# Avoid interactive prompts during apt operations. +ENV DEBIAN_FRONTEND=noninteractive + +# Install CA certificates (needed for HTTPS). +RUN apt-get update && apt-get install -y \ + ca-certificates \ + && rm -rf /var/lib/apt/lists/* + +# Install project specific packages. +RUN mkdir -p /install +COPY requirements.txt /install/requirements.txt +RUN pip install --upgrade pip && \ + pip install --no-cache-dir jupyterlab jupyterlab_vim jupytext -r /install/requirements.txt + +# Config. +COPY etc_sudoers /install/ +COPY etc_sudoers /etc/sudoers +COPY bashrc /root/.bashrc + +# Report package versions. +COPY version.sh /install/ +RUN /install/version.sh 2>&1 | tee version.log + +# Jupyter. +EXPOSE 8888 + +CMD ["/bin/bash"] From 4fa06e7493ce765cf47249339b864110e89f5892 Mon Sep 17 00:00:00 2001 From: vsripada2423 Date: Tue, 31 Mar 2026 22:25:06 -0400 Subject: [PATCH 12/28] Add template_utils.py from project template --- .../Spring2026/projects/template_utils.py | 72 +++++++++++++++++++ 1 file changed, 72 insertions(+) create mode 100644 class_project/data605/Spring2026/projects/template_utils.py diff --git a/class_project/data605/Spring2026/projects/template_utils.py b/class_project/data605/Spring2026/projects/template_utils.py new file mode 100644 index 000000000..f8916102e --- /dev/null +++ b/class_project/data605/Spring2026/projects/template_utils.py @@ -0,0 +1,72 @@ +""" +template_utils.py + +This file contains utility functions that support the tutorial notebooks. + +- Notebooks should call these functions instead of writing raw logic inline. +- This helps keep the notebooks clean, modular, and easier to debug. +- Students should implement functions here for data preprocessing, + model setup, evaluation, or any reusable logic. + +Import as: + +import class_project.project_template.template_utils as cpptteut +""" + +import pandas as pd +import logging +from sklearn.model_selection import train_test_split +from pycaret.classification import compare_models + +# ----------------------------------------------------------------------------- +# Logging +# ----------------------------------------------------------------------------- + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + +# ----------------------------------------------------------------------------- +# Example 1: Split the dataset into train and test sets +# ----------------------------------------------------------------------------- + + +def split_data(df: pd.DataFrame, target_column: str, test_size: float = 0.2): + """ + Split the dataset into training and testing sets. + + :param df: full dataset + :param target_column: name of the target column + :param test_size: proportion of test data (default = 0.2) + + :return: X_train, X_test, y_train, y_test + """ + logger.info("Splitting data into train and test sets") + X = df.drop(columns=[target_column]) + y = df[target_column] + return train_test_split(X, y, test_size=test_size, random_state=42) + + +# ----------------------------------------------------------------------------- +# Example 2: PyCaret classification pipeline +# ----------------------------------------------------------------------------- + + +def run_pycaret_classification( + df: pd.DataFrame, target_column: str +) -> pd.DataFrame: + """ + Run a basic PyCaret classification experiment. + + :param df: dataset containing features and target + :param target_column: name of the target column + + :return: comparison of top-performing models + """ + logger.info("Initializing PyCaret classification setup") + ... + + logger.info("Comparing models") + results = compare_models() + ... + + return results From d3dfad93c4dfb3a2378f2a3d47c6fff11d8f50f4 Mon Sep 17 00:00:00 2001 From: vsripada2423 Date: Tue, 31 Mar 2026 22:26:04 -0400 Subject: [PATCH 13/28] Add template.example.py from project template --- .../Spring2026/projects/template.example.py | 125 ++++++++++++++++++ 1 file changed, 125 insertions(+) create mode 100644 class_project/data605/Spring2026/projects/template.example.py diff --git a/class_project/data605/Spring2026/projects/template.example.py b/class_project/data605/Spring2026/projects/template.example.py new file mode 100644 index 000000000..b091b1369 --- /dev/null +++ b/class_project/data605/Spring2026/projects/template.example.py @@ -0,0 +1,125 @@ +--- +# jupyter: +# jupytext: +# formats: ipynb,py:percent +# text_representation: +# extension: .py +# format_name: percent +# format_version: '1.3' +# jupytext_version: 1.19.0 +# kernelspec: +# display_name: Python 3 (ipykernel) +# language: python +# name: python3 +# --- + +# %% [markdown] +# # Template Example Notebook +# +# This is a template notebook. The first heading should be the title of what notebook is about. For example, if it is a project on neo4j tutorial the heading should be `Project Title`. +# +# - Add description of what the notebook does. +# - Point to references, e.g. (neo4j.example.md) +# - Add citations. +# - Keep the notebook flow clear. +# - Comments should be imperative and have a period at the end. +# - Your code should be well commented. +# +# The name of this notebook should in the following format: +# - if the notebook is exploring `pycaret API`, then it is `pycaret.example.ipynb` +# +# Follow the reference to write notebooks in a clear manner: https://github.com/causify-ai/helpers/blob/master/docs/coding/all.jupyter_notebook.how_to_guide.md + +# %% +# %load_ext autoreload +# %autoreload 2 +# %matplotlib inline + +# %% +import logging +# Import libraries in this section. +# Avoid imports like import *, from ... import ..., from ... import *, etc. + +import helpers.hdbg as hdbg +import helpers.hnotebook as hnotebo + +# %% +hdbg.init_logger(verbosity=logging.INFO) + +_LOG = logging.getLogger(__name__) + +hnotebo.config_notebook() + + +# %% [markdown] +# ## Make the notebook flow clear +# Each notebook needs to follow a clear and logical flow, e.g: +# - Load data +# - Compute stats +# - Clean data +# - Compute stats +# - Do analysis +# - Show results +# +# +# +# + + +# ############################################################################# +# Template +# ############################################################################# + + +# %% +class Template: + """ + Brief imperative description of what the class does in one line, if needed. + """ + + def __init__(self): + pass + + def method1(self, arg1: int) -> None: + """ + Brief imperative description of what the method does in one line. + + You can elaborate more in the method docstring in this section, for e.g. explaining + the formula/algorithm. Every method/function should have a docstring, typehints and include the + parameters and return as follows: + + :param arg1: description of arg1 + :return: description of return + """ + # Code bloks go here. + # Make sure to include comments to explain what the code is doing. + # No empty lines between code blocks. + pass + + +def template_function(arg1: int) -> None: + """ + Brief imperative description of what the function does in one line. + + You can elaborate more in the function docstring in this section, for e.g. explaining + the formula/algorithm. Every function should have a docstring, typehints and include the + parameters and return as follows: + + :param arg1: description of arg1 + :return: description of return + """ + # Code bloks go here. + # Make sure to include comments to explain what the code is doing. + # No empty lines between code blocks. + pass + + +# %% [markdown] +# ## The flow should be highlighted using headings in markdown +# ``` +# # Level 1 +# ## Level 2 +# ### Level 3 +# ``` + +# %% From ef715f73d4079a9d33cd1013eae59e5704e56a21 Mon Sep 17 00:00:00 2001 From: vsripada2423 Date: Tue, 31 Mar 2026 22:26:40 -0400 Subject: [PATCH 14/28] Add version.sh from project template --- .../data605/Spring2026/projects/version.sh | 28 +++++++++++++++++++ 1 file changed, 28 insertions(+) create mode 100644 class_project/data605/Spring2026/projects/version.sh diff --git a/class_project/data605/Spring2026/projects/version.sh b/class_project/data605/Spring2026/projects/version.sh new file mode 100644 index 000000000..c46ed254c --- /dev/null +++ b/class_project/data605/Spring2026/projects/version.sh @@ -0,0 +1,28 @@ +#!/bin/bash +# """ +# Display versions of installed tools and packages. +# +# This script prints version information for Python, pip, Jupyter, and all +# installed Python packages. Used for debugging and documentation purposes +# to verify the Docker container environment setup. +# """ + +# Display Python 3 version. +echo "# Python3" +python3 --version + +# Display pip version. +echo "# pip3" +pip3 --version + +# Display Jupyter version. +echo "# jupyter" +jupyter --version + +# List all installed Python packages and their versions. +echo "# Python packages" +pip3 list + +# Template for adding additional tool versions. +# echo "# mongo" +# mongod --version From 80b605fbb89fc269035cb0b52a595765fd53403f Mon Sep 17 00:00:00 2001 From: vsripada2423 Date: Tue, 31 Mar 2026 22:30:27 -0400 Subject: [PATCH 15/28] Add requirements.txt from project template --- class_project/data605/Spring2026/projects/requirements.txt | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 class_project/data605/Spring2026/projects/requirements.txt diff --git a/class_project/data605/Spring2026/projects/requirements.txt b/class_project/data605/Spring2026/projects/requirements.txt new file mode 100644 index 000000000..49aca3901 --- /dev/null +++ b/class_project/data605/Spring2026/projects/requirements.txt @@ -0,0 +1,4 @@ +matplotlib +numpy +pandas +seaborn From 3e9436c5650786b5f6d4eb755b67c910c9f22b88 Mon Sep 17 00:00:00 2001 From: vsripada2423 Date: Tue, 31 Mar 2026 22:37:39 -0400 Subject: [PATCH 16/28] Delete class_project/data605/Spring2026/Benchmarking_Data_Science_Agents directory --- .../Dockerfile | 30 ----- .../README.md | 53 -------- .../docker_clean.sh | 26 ---- .../docker_jupyter.sh | 39 ------ .../requirements.txt | 4 - .../template.example.py | 125 ------------------ .../template_utils.py | 72 ---------- .../version.sh | 28 ---- 8 files changed, 377 deletions(-) delete mode 100644 class_project/data605/Spring2026/Benchmarking_Data_Science_Agents/Dockerfile delete mode 100644 class_project/data605/Spring2026/Benchmarking_Data_Science_Agents/README.md delete mode 100644 class_project/data605/Spring2026/Benchmarking_Data_Science_Agents/docker_clean.sh delete mode 100644 class_project/data605/Spring2026/Benchmarking_Data_Science_Agents/docker_jupyter.sh delete mode 100644 class_project/data605/Spring2026/Benchmarking_Data_Science_Agents/requirements.txt delete mode 100644 class_project/data605/Spring2026/Benchmarking_Data_Science_Agents/template.example.py delete mode 100644 class_project/data605/Spring2026/Benchmarking_Data_Science_Agents/template_utils.py delete mode 100644 class_project/data605/Spring2026/Benchmarking_Data_Science_Agents/version.sh diff --git a/class_project/data605/Spring2026/Benchmarking_Data_Science_Agents/Dockerfile b/class_project/data605/Spring2026/Benchmarking_Data_Science_Agents/Dockerfile deleted file mode 100644 index f5c02c562..000000000 --- a/class_project/data605/Spring2026/Benchmarking_Data_Science_Agents/Dockerfile +++ /dev/null @@ -1,30 +0,0 @@ -# Use Python 3.12 slim (already has Python and pip). -FROM python:3.12-slim - -# Avoid interactive prompts during apt operations. -ENV DEBIAN_FRONTEND=noninteractive - -# Install CA certificates (needed for HTTPS). -RUN apt-get update && apt-get install -y \ - ca-certificates \ - && rm -rf /var/lib/apt/lists/* - -# Install project specific packages. -RUN mkdir -p /install -COPY requirements.txt /install/requirements.txt -RUN pip install --upgrade pip && \ - pip install --no-cache-dir jupyterlab jupyterlab_vim jupytext -r /install/requirements.txt - -# Config. -COPY etc_sudoers /install/ -COPY etc_sudoers /etc/sudoers -COPY bashrc /root/.bashrc - -# Report package versions. -COPY version.sh /install/ -RUN /install/version.sh 2>&1 | tee version.log - -# Jupyter. -EXPOSE 8888 - -CMD ["/bin/bash"] diff --git a/class_project/data605/Spring2026/Benchmarking_Data_Science_Agents/README.md b/class_project/data605/Spring2026/Benchmarking_Data_Science_Agents/README.md deleted file mode 100644 index f2f9aa91f..000000000 --- a/class_project/data605/Spring2026/Benchmarking_Data_Science_Agents/README.md +++ /dev/null @@ -1,53 +0,0 @@ -# Benchmarking Data Science Agents: A Comparative Study - -## Team Members -- Venkata Sripada -- Amulya Grace Bandlamudi - -## Project Overview -This project studies and compares publicly available data science benchmarks for AI agents, including DataSciBench, DSBench, MLE-Bench, GAIA, and SWE-bench. The goal is to understand how different benchmarks evaluate data science capabilities such as data analysis, machine learning engineering, and multi-step reasoning. - -We will collect leaderboard data from these benchmarks, analyze benchmark design (task type, evaluation metric, and difficulty), and compare agent performance across benchmarks. The project will produce a reproducible data analysis pipeline, visualizations, and a comparative research report. - -## Objectives -- Collect benchmark leaderboard data -- Analyze benchmark structure and evaluation metrics -- Compare performance across different AI agents -- Perform clustering and correlation analysis on benchmark results -- Visualize benchmark differences and capability gaps -- Produce a final research report - -## Tools and Technologies -- Python -- Pandas -- NumPy -- Matplotlib / Seaborn -- Scikit-learn -- Jupyter Notebook -- (Optional) PySpark for large-scale data processing - -## Methodology -1. Data collection from benchmark leaderboards -2. Data cleaning and dataset integration -3. Exploratory data analysis -4. Statistical analysis and correlation analysis -5. Clustering benchmarks and agent performance -6. Visualization and interpretation of results -7. Final report and documentation - -## Expected Outcome -The project aims to identify differences between data science benchmarks and determine which benchmarks better represent real-world data science tasks. We expect to find capability gaps where some agents perform well on coding tasks but struggle with multi-step reasoning and workflow-based tasks. - -## Repository Structure - -``` -Benchmarking_Data_Science_Agents/ -│ -├── README.md # Project overview and instructions -├── data/ # Collected benchmark datasets -├── notebooks/ # Jupyter notebooks for analysis -├── src/ # Python scripts for data processing and analysis -├── results/ # Output files, plots, and tables -├── report/ # Final report and documentation -└── references/ # Papers and benchmark documentation -``` diff --git a/class_project/data605/Spring2026/Benchmarking_Data_Science_Agents/docker_clean.sh b/class_project/data605/Spring2026/Benchmarking_Data_Science_Agents/docker_clean.sh deleted file mode 100644 index 7e40839ae..000000000 --- a/class_project/data605/Spring2026/Benchmarking_Data_Science_Agents/docker_clean.sh +++ /dev/null @@ -1,26 +0,0 @@ -#!/bin/bash -# """ -# Remove Docker container image for the project. -# -# This script cleans up Docker images by removing the container image -# matching the project configuration. Useful for freeing disk space or -# ensuring a fresh build. -# """ - -# Exit immediately if any command exits with a non-zero status. -set -e - -# Import the utility functions. -GIT_ROOT=$(git rev-parse --show-toplevel) -source $GIT_ROOT/class_project/project_template/utils.sh - -# Parse default args (-h, -v) and enable set -x if -v is passed. -parse_default_args "$@" - -# Load Docker configuration variables for this script. -get_docker_vars_script ${BASH_SOURCE[0]} -source $DOCKER_NAME -print_docker_vars - -# Remove the container image. -remove_container_image diff --git a/class_project/data605/Spring2026/Benchmarking_Data_Science_Agents/docker_jupyter.sh b/class_project/data605/Spring2026/Benchmarking_Data_Science_Agents/docker_jupyter.sh deleted file mode 100644 index 1a60dfd3a..000000000 --- a/class_project/data605/Spring2026/Benchmarking_Data_Science_Agents/docker_jupyter.sh +++ /dev/null @@ -1,39 +0,0 @@ -#!/bin/bash -# """ -# Execute Jupyter Lab in a Docker container. -# -# This script launches a Docker container running Jupyter Lab with -# configurable port, directory mounting, and vim bindings. It passes -# command-line options to the run_jupyter.sh script inside the container. -# -# Usage: -# > docker_jupyter.sh [options] -# """ - -# Exit immediately if any command exits with a non-zero status. -set -e - -# Import the utility functions. -GIT_ROOT=$(git rev-parse --show-toplevel) -source $GIT_ROOT/class_project/project_template/utils.sh - -# Parse command-line options and set Jupyter configuration variables. -parse_docker_jupyter_args "$@" - -# Load Docker configuration variables for this script. -get_docker_vars_script ${BASH_SOURCE[0]} -source $DOCKER_NAME -print_docker_vars - -# List available Docker images and inspect architecture. -list_and_inspect_docker_image - -# Run the Docker container with Jupyter Lab. -CMD=$(get_run_jupyter_cmd "${BASH_SOURCE[0]}" "$OLD_CMD_OPTS") -CONTAINER_NAME=$IMAGE_NAME -# Kill existing container if -f flag is set. -kill_existing_container_if_forced - -DOCKER_CMD=$(get_docker_jupyter_command) -DOCKER_CMD_OPTS=$(get_docker_jupyter_options $CONTAINER_NAME $JUPYTER_HOST_PORT $JUPYTER_USE_VIM) -run "$DOCKER_CMD $DOCKER_CMD_OPTS $FULL_IMAGE_NAME $CMD" diff --git a/class_project/data605/Spring2026/Benchmarking_Data_Science_Agents/requirements.txt b/class_project/data605/Spring2026/Benchmarking_Data_Science_Agents/requirements.txt deleted file mode 100644 index 49aca3901..000000000 --- a/class_project/data605/Spring2026/Benchmarking_Data_Science_Agents/requirements.txt +++ /dev/null @@ -1,4 +0,0 @@ -matplotlib -numpy -pandas -seaborn diff --git a/class_project/data605/Spring2026/Benchmarking_Data_Science_Agents/template.example.py b/class_project/data605/Spring2026/Benchmarking_Data_Science_Agents/template.example.py deleted file mode 100644 index b091b1369..000000000 --- a/class_project/data605/Spring2026/Benchmarking_Data_Science_Agents/template.example.py +++ /dev/null @@ -1,125 +0,0 @@ ---- -# jupyter: -# jupytext: -# formats: ipynb,py:percent -# text_representation: -# extension: .py -# format_name: percent -# format_version: '1.3' -# jupytext_version: 1.19.0 -# kernelspec: -# display_name: Python 3 (ipykernel) -# language: python -# name: python3 -# --- - -# %% [markdown] -# # Template Example Notebook -# -# This is a template notebook. The first heading should be the title of what notebook is about. For example, if it is a project on neo4j tutorial the heading should be `Project Title`. -# -# - Add description of what the notebook does. -# - Point to references, e.g. (neo4j.example.md) -# - Add citations. -# - Keep the notebook flow clear. -# - Comments should be imperative and have a period at the end. -# - Your code should be well commented. -# -# The name of this notebook should in the following format: -# - if the notebook is exploring `pycaret API`, then it is `pycaret.example.ipynb` -# -# Follow the reference to write notebooks in a clear manner: https://github.com/causify-ai/helpers/blob/master/docs/coding/all.jupyter_notebook.how_to_guide.md - -# %% -# %load_ext autoreload -# %autoreload 2 -# %matplotlib inline - -# %% -import logging -# Import libraries in this section. -# Avoid imports like import *, from ... import ..., from ... import *, etc. - -import helpers.hdbg as hdbg -import helpers.hnotebook as hnotebo - -# %% -hdbg.init_logger(verbosity=logging.INFO) - -_LOG = logging.getLogger(__name__) - -hnotebo.config_notebook() - - -# %% [markdown] -# ## Make the notebook flow clear -# Each notebook needs to follow a clear and logical flow, e.g: -# - Load data -# - Compute stats -# - Clean data -# - Compute stats -# - Do analysis -# - Show results -# -# -# -# - - -# ############################################################################# -# Template -# ############################################################################# - - -# %% -class Template: - """ - Brief imperative description of what the class does in one line, if needed. - """ - - def __init__(self): - pass - - def method1(self, arg1: int) -> None: - """ - Brief imperative description of what the method does in one line. - - You can elaborate more in the method docstring in this section, for e.g. explaining - the formula/algorithm. Every method/function should have a docstring, typehints and include the - parameters and return as follows: - - :param arg1: description of arg1 - :return: description of return - """ - # Code bloks go here. - # Make sure to include comments to explain what the code is doing. - # No empty lines between code blocks. - pass - - -def template_function(arg1: int) -> None: - """ - Brief imperative description of what the function does in one line. - - You can elaborate more in the function docstring in this section, for e.g. explaining - the formula/algorithm. Every function should have a docstring, typehints and include the - parameters and return as follows: - - :param arg1: description of arg1 - :return: description of return - """ - # Code bloks go here. - # Make sure to include comments to explain what the code is doing. - # No empty lines between code blocks. - pass - - -# %% [markdown] -# ## The flow should be highlighted using headings in markdown -# ``` -# # Level 1 -# ## Level 2 -# ### Level 3 -# ``` - -# %% diff --git a/class_project/data605/Spring2026/Benchmarking_Data_Science_Agents/template_utils.py b/class_project/data605/Spring2026/Benchmarking_Data_Science_Agents/template_utils.py deleted file mode 100644 index f8916102e..000000000 --- a/class_project/data605/Spring2026/Benchmarking_Data_Science_Agents/template_utils.py +++ /dev/null @@ -1,72 +0,0 @@ -""" -template_utils.py - -This file contains utility functions that support the tutorial notebooks. - -- Notebooks should call these functions instead of writing raw logic inline. -- This helps keep the notebooks clean, modular, and easier to debug. -- Students should implement functions here for data preprocessing, - model setup, evaluation, or any reusable logic. - -Import as: - -import class_project.project_template.template_utils as cpptteut -""" - -import pandas as pd -import logging -from sklearn.model_selection import train_test_split -from pycaret.classification import compare_models - -# ----------------------------------------------------------------------------- -# Logging -# ----------------------------------------------------------------------------- - -logging.basicConfig(level=logging.INFO) -logger = logging.getLogger(__name__) - -# ----------------------------------------------------------------------------- -# Example 1: Split the dataset into train and test sets -# ----------------------------------------------------------------------------- - - -def split_data(df: pd.DataFrame, target_column: str, test_size: float = 0.2): - """ - Split the dataset into training and testing sets. - - :param df: full dataset - :param target_column: name of the target column - :param test_size: proportion of test data (default = 0.2) - - :return: X_train, X_test, y_train, y_test - """ - logger.info("Splitting data into train and test sets") - X = df.drop(columns=[target_column]) - y = df[target_column] - return train_test_split(X, y, test_size=test_size, random_state=42) - - -# ----------------------------------------------------------------------------- -# Example 2: PyCaret classification pipeline -# ----------------------------------------------------------------------------- - - -def run_pycaret_classification( - df: pd.DataFrame, target_column: str -) -> pd.DataFrame: - """ - Run a basic PyCaret classification experiment. - - :param df: dataset containing features and target - :param target_column: name of the target column - - :return: comparison of top-performing models - """ - logger.info("Initializing PyCaret classification setup") - ... - - logger.info("Comparing models") - results = compare_models() - ... - - return results diff --git a/class_project/data605/Spring2026/Benchmarking_Data_Science_Agents/version.sh b/class_project/data605/Spring2026/Benchmarking_Data_Science_Agents/version.sh deleted file mode 100644 index c46ed254c..000000000 --- a/class_project/data605/Spring2026/Benchmarking_Data_Science_Agents/version.sh +++ /dev/null @@ -1,28 +0,0 @@ -#!/bin/bash -# """ -# Display versions of installed tools and packages. -# -# This script prints version information for Python, pip, Jupyter, and all -# installed Python packages. Used for debugging and documentation purposes -# to verify the Docker container environment setup. -# """ - -# Display Python 3 version. -echo "# Python3" -python3 --version - -# Display pip version. -echo "# pip3" -pip3 --version - -# Display Jupyter version. -echo "# jupyter" -jupyter --version - -# List all installed Python packages and their versions. -echo "# Python packages" -pip3 list - -# Template for adding additional tool versions. -# echo "# mongo" -# mongod --version From 72fca1bdd67ccf8a2db2bf7e965a302d1db6113e Mon Sep 17 00:00:00 2001 From: vsripada2423 Date: Tue, 31 Mar 2026 22:53:43 -0400 Subject: [PATCH 17/28] Create README.md --- .../README.md | 53 +++++++++++++++++++ 1 file changed, 53 insertions(+) create mode 100644 class_project/data605/Spring2026/projects/Benchmarking_Data_Science_Agents/README.md diff --git a/class_project/data605/Spring2026/projects/Benchmarking_Data_Science_Agents/README.md b/class_project/data605/Spring2026/projects/Benchmarking_Data_Science_Agents/README.md new file mode 100644 index 000000000..f2f9aa91f --- /dev/null +++ b/class_project/data605/Spring2026/projects/Benchmarking_Data_Science_Agents/README.md @@ -0,0 +1,53 @@ +# Benchmarking Data Science Agents: A Comparative Study + +## Team Members +- Venkata Sripada +- Amulya Grace Bandlamudi + +## Project Overview +This project studies and compares publicly available data science benchmarks for AI agents, including DataSciBench, DSBench, MLE-Bench, GAIA, and SWE-bench. The goal is to understand how different benchmarks evaluate data science capabilities such as data analysis, machine learning engineering, and multi-step reasoning. + +We will collect leaderboard data from these benchmarks, analyze benchmark design (task type, evaluation metric, and difficulty), and compare agent performance across benchmarks. The project will produce a reproducible data analysis pipeline, visualizations, and a comparative research report. + +## Objectives +- Collect benchmark leaderboard data +- Analyze benchmark structure and evaluation metrics +- Compare performance across different AI agents +- Perform clustering and correlation analysis on benchmark results +- Visualize benchmark differences and capability gaps +- Produce a final research report + +## Tools and Technologies +- Python +- Pandas +- NumPy +- Matplotlib / Seaborn +- Scikit-learn +- Jupyter Notebook +- (Optional) PySpark for large-scale data processing + +## Methodology +1. Data collection from benchmark leaderboards +2. Data cleaning and dataset integration +3. Exploratory data analysis +4. Statistical analysis and correlation analysis +5. Clustering benchmarks and agent performance +6. Visualization and interpretation of results +7. Final report and documentation + +## Expected Outcome +The project aims to identify differences between data science benchmarks and determine which benchmarks better represent real-world data science tasks. We expect to find capability gaps where some agents perform well on coding tasks but struggle with multi-step reasoning and workflow-based tasks. + +## Repository Structure + +``` +Benchmarking_Data_Science_Agents/ +│ +├── README.md # Project overview and instructions +├── data/ # Collected benchmark datasets +├── notebooks/ # Jupyter notebooks for analysis +├── src/ # Python scripts for data processing and analysis +├── results/ # Output files, plots, and tables +├── report/ # Final report and documentation +└── references/ # Papers and benchmark documentation +``` From c85a761aed473648a8d3ca58dcc9c98ae2d6eabc Mon Sep 17 00:00:00 2001 From: vsripada2423 Date: Tue, 31 Mar 2026 23:25:26 -0400 Subject: [PATCH 18/28] Add requirements.txt from project template --- .../Benchmarking_Data_Science_Agents/requirements.txt | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 class_project/data605/Spring2026/projects/Benchmarking_Data_Science_Agents/requirements.txt diff --git a/class_project/data605/Spring2026/projects/Benchmarking_Data_Science_Agents/requirements.txt b/class_project/data605/Spring2026/projects/Benchmarking_Data_Science_Agents/requirements.txt new file mode 100644 index 000000000..49aca3901 --- /dev/null +++ b/class_project/data605/Spring2026/projects/Benchmarking_Data_Science_Agents/requirements.txt @@ -0,0 +1,4 @@ +matplotlib +numpy +pandas +seaborn From 14bf1413b0a0b6d515127a6e688379d60b8834b2 Mon Sep 17 00:00:00 2001 From: vsripada2423 Date: Tue, 31 Mar 2026 23:26:39 -0400 Subject: [PATCH 19/28] Add Dockerfile from project template --- .../Dockerfile | 30 +++++++++++++++++++ 1 file changed, 30 insertions(+) create mode 100644 class_project/data605/Spring2026/projects/Benchmarking_Data_Science_Agents/Dockerfile diff --git a/class_project/data605/Spring2026/projects/Benchmarking_Data_Science_Agents/Dockerfile b/class_project/data605/Spring2026/projects/Benchmarking_Data_Science_Agents/Dockerfile new file mode 100644 index 000000000..f5c02c562 --- /dev/null +++ b/class_project/data605/Spring2026/projects/Benchmarking_Data_Science_Agents/Dockerfile @@ -0,0 +1,30 @@ +# Use Python 3.12 slim (already has Python and pip). +FROM python:3.12-slim + +# Avoid interactive prompts during apt operations. +ENV DEBIAN_FRONTEND=noninteractive + +# Install CA certificates (needed for HTTPS). +RUN apt-get update && apt-get install -y \ + ca-certificates \ + && rm -rf /var/lib/apt/lists/* + +# Install project specific packages. +RUN mkdir -p /install +COPY requirements.txt /install/requirements.txt +RUN pip install --upgrade pip && \ + pip install --no-cache-dir jupyterlab jupyterlab_vim jupytext -r /install/requirements.txt + +# Config. +COPY etc_sudoers /install/ +COPY etc_sudoers /etc/sudoers +COPY bashrc /root/.bashrc + +# Report package versions. +COPY version.sh /install/ +RUN /install/version.sh 2>&1 | tee version.log + +# Jupyter. +EXPOSE 8888 + +CMD ["/bin/bash"] From f8456132829bb721f374bf131d050d3c8e4ea19f Mon Sep 17 00:00:00 2001 From: vsripada2423 Date: Tue, 31 Mar 2026 23:28:13 -0400 Subject: [PATCH 20/28] Add template_utils.py from project template --- .../template_utils.py | 72 +++++++++++++++++++ 1 file changed, 72 insertions(+) create mode 100644 class_project/data605/Spring2026/projects/Benchmarking_Data_Science_Agents/template_utils.py diff --git a/class_project/data605/Spring2026/projects/Benchmarking_Data_Science_Agents/template_utils.py b/class_project/data605/Spring2026/projects/Benchmarking_Data_Science_Agents/template_utils.py new file mode 100644 index 000000000..f8916102e --- /dev/null +++ b/class_project/data605/Spring2026/projects/Benchmarking_Data_Science_Agents/template_utils.py @@ -0,0 +1,72 @@ +""" +template_utils.py + +This file contains utility functions that support the tutorial notebooks. + +- Notebooks should call these functions instead of writing raw logic inline. +- This helps keep the notebooks clean, modular, and easier to debug. +- Students should implement functions here for data preprocessing, + model setup, evaluation, or any reusable logic. + +Import as: + +import class_project.project_template.template_utils as cpptteut +""" + +import pandas as pd +import logging +from sklearn.model_selection import train_test_split +from pycaret.classification import compare_models + +# ----------------------------------------------------------------------------- +# Logging +# ----------------------------------------------------------------------------- + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + +# ----------------------------------------------------------------------------- +# Example 1: Split the dataset into train and test sets +# ----------------------------------------------------------------------------- + + +def split_data(df: pd.DataFrame, target_column: str, test_size: float = 0.2): + """ + Split the dataset into training and testing sets. + + :param df: full dataset + :param target_column: name of the target column + :param test_size: proportion of test data (default = 0.2) + + :return: X_train, X_test, y_train, y_test + """ + logger.info("Splitting data into train and test sets") + X = df.drop(columns=[target_column]) + y = df[target_column] + return train_test_split(X, y, test_size=test_size, random_state=42) + + +# ----------------------------------------------------------------------------- +# Example 2: PyCaret classification pipeline +# ----------------------------------------------------------------------------- + + +def run_pycaret_classification( + df: pd.DataFrame, target_column: str +) -> pd.DataFrame: + """ + Run a basic PyCaret classification experiment. + + :param df: dataset containing features and target + :param target_column: name of the target column + + :return: comparison of top-performing models + """ + logger.info("Initializing PyCaret classification setup") + ... + + logger.info("Comparing models") + results = compare_models() + ... + + return results From 487e0f5755d3bde6b96825dddcaed0bed1cf06f9 Mon Sep 17 00:00:00 2001 From: vsripada2423 Date: Tue, 31 Mar 2026 23:30:13 -0400 Subject: [PATCH 21/28] Add version.sh from project template --- .../version.sh | 28 +++++++++++++++++++ 1 file changed, 28 insertions(+) create mode 100644 class_project/data605/Spring2026/projects/Benchmarking_Data_Science_Agents/version.sh diff --git a/class_project/data605/Spring2026/projects/Benchmarking_Data_Science_Agents/version.sh b/class_project/data605/Spring2026/projects/Benchmarking_Data_Science_Agents/version.sh new file mode 100644 index 000000000..c46ed254c --- /dev/null +++ b/class_project/data605/Spring2026/projects/Benchmarking_Data_Science_Agents/version.sh @@ -0,0 +1,28 @@ +#!/bin/bash +# """ +# Display versions of installed tools and packages. +# +# This script prints version information for Python, pip, Jupyter, and all +# installed Python packages. Used for debugging and documentation purposes +# to verify the Docker container environment setup. +# """ + +# Display Python 3 version. +echo "# Python3" +python3 --version + +# Display pip version. +echo "# pip3" +pip3 --version + +# Display Jupyter version. +echo "# jupyter" +jupyter --version + +# List all installed Python packages and their versions. +echo "# Python packages" +pip3 list + +# Template for adding additional tool versions. +# echo "# mongo" +# mongod --version From 6adc237ba482f17b5ce669724d4bd854d3ea0481 Mon Sep 17 00:00:00 2001 From: vsripada2423 Date: Tue, 31 Mar 2026 23:34:30 -0400 Subject: [PATCH 22/28] Add template.example.py from project template --- .../template.example.py | 125 ++++++++++++++++++ 1 file changed, 125 insertions(+) create mode 100644 class_project/data605/Spring2026/projects/Benchmarking_Data_Science_Agents/template.example.py diff --git a/class_project/data605/Spring2026/projects/Benchmarking_Data_Science_Agents/template.example.py b/class_project/data605/Spring2026/projects/Benchmarking_Data_Science_Agents/template.example.py new file mode 100644 index 000000000..b091b1369 --- /dev/null +++ b/class_project/data605/Spring2026/projects/Benchmarking_Data_Science_Agents/template.example.py @@ -0,0 +1,125 @@ +--- +# jupyter: +# jupytext: +# formats: ipynb,py:percent +# text_representation: +# extension: .py +# format_name: percent +# format_version: '1.3' +# jupytext_version: 1.19.0 +# kernelspec: +# display_name: Python 3 (ipykernel) +# language: python +# name: python3 +# --- + +# %% [markdown] +# # Template Example Notebook +# +# This is a template notebook. The first heading should be the title of what notebook is about. For example, if it is a project on neo4j tutorial the heading should be `Project Title`. +# +# - Add description of what the notebook does. +# - Point to references, e.g. (neo4j.example.md) +# - Add citations. +# - Keep the notebook flow clear. +# - Comments should be imperative and have a period at the end. +# - Your code should be well commented. +# +# The name of this notebook should in the following format: +# - if the notebook is exploring `pycaret API`, then it is `pycaret.example.ipynb` +# +# Follow the reference to write notebooks in a clear manner: https://github.com/causify-ai/helpers/blob/master/docs/coding/all.jupyter_notebook.how_to_guide.md + +# %% +# %load_ext autoreload +# %autoreload 2 +# %matplotlib inline + +# %% +import logging +# Import libraries in this section. +# Avoid imports like import *, from ... import ..., from ... import *, etc. + +import helpers.hdbg as hdbg +import helpers.hnotebook as hnotebo + +# %% +hdbg.init_logger(verbosity=logging.INFO) + +_LOG = logging.getLogger(__name__) + +hnotebo.config_notebook() + + +# %% [markdown] +# ## Make the notebook flow clear +# Each notebook needs to follow a clear and logical flow, e.g: +# - Load data +# - Compute stats +# - Clean data +# - Compute stats +# - Do analysis +# - Show results +# +# +# +# + + +# ############################################################################# +# Template +# ############################################################################# + + +# %% +class Template: + """ + Brief imperative description of what the class does in one line, if needed. + """ + + def __init__(self): + pass + + def method1(self, arg1: int) -> None: + """ + Brief imperative description of what the method does in one line. + + You can elaborate more in the method docstring in this section, for e.g. explaining + the formula/algorithm. Every method/function should have a docstring, typehints and include the + parameters and return as follows: + + :param arg1: description of arg1 + :return: description of return + """ + # Code bloks go here. + # Make sure to include comments to explain what the code is doing. + # No empty lines between code blocks. + pass + + +def template_function(arg1: int) -> None: + """ + Brief imperative description of what the function does in one line. + + You can elaborate more in the function docstring in this section, for e.g. explaining + the formula/algorithm. Every function should have a docstring, typehints and include the + parameters and return as follows: + + :param arg1: description of arg1 + :return: description of return + """ + # Code bloks go here. + # Make sure to include comments to explain what the code is doing. + # No empty lines between code blocks. + pass + + +# %% [markdown] +# ## The flow should be highlighted using headings in markdown +# ``` +# # Level 1 +# ## Level 2 +# ### Level 3 +# ``` + +# %% From 671ec45f5a66c026ae9d13f6806b4f3dcd7c614c Mon Sep 17 00:00:00 2001 From: vsripada2423 Date: Tue, 31 Mar 2026 23:37:44 -0400 Subject: [PATCH 23/28] Delete class_project/data605/Spring2026/projects/version.sh --- .../data605/Spring2026/projects/version.sh | 28 ------------------- 1 file changed, 28 deletions(-) delete mode 100644 class_project/data605/Spring2026/projects/version.sh diff --git a/class_project/data605/Spring2026/projects/version.sh b/class_project/data605/Spring2026/projects/version.sh deleted file mode 100644 index c46ed254c..000000000 --- a/class_project/data605/Spring2026/projects/version.sh +++ /dev/null @@ -1,28 +0,0 @@ -#!/bin/bash -# """ -# Display versions of installed tools and packages. -# -# This script prints version information for Python, pip, Jupyter, and all -# installed Python packages. Used for debugging and documentation purposes -# to verify the Docker container environment setup. -# """ - -# Display Python 3 version. -echo "# Python3" -python3 --version - -# Display pip version. -echo "# pip3" -pip3 --version - -# Display Jupyter version. -echo "# jupyter" -jupyter --version - -# List all installed Python packages and their versions. -echo "# Python packages" -pip3 list - -# Template for adding additional tool versions. -# echo "# mongo" -# mongod --version From 88aa353f621b5c78070c76fadb39af3a363f2877 Mon Sep 17 00:00:00 2001 From: vsripada2423 Date: Tue, 31 Mar 2026 23:40:51 -0400 Subject: [PATCH 24/28] Delete class_project/data605/Spring2026/projects/Dockerfile --- .../data605/Spring2026/projects/Dockerfile | 30 ------------------- 1 file changed, 30 deletions(-) delete mode 100644 class_project/data605/Spring2026/projects/Dockerfile diff --git a/class_project/data605/Spring2026/projects/Dockerfile b/class_project/data605/Spring2026/projects/Dockerfile deleted file mode 100644 index f5c02c562..000000000 --- a/class_project/data605/Spring2026/projects/Dockerfile +++ /dev/null @@ -1,30 +0,0 @@ -# Use Python 3.12 slim (already has Python and pip). -FROM python:3.12-slim - -# Avoid interactive prompts during apt operations. -ENV DEBIAN_FRONTEND=noninteractive - -# Install CA certificates (needed for HTTPS). -RUN apt-get update && apt-get install -y \ - ca-certificates \ - && rm -rf /var/lib/apt/lists/* - -# Install project specific packages. -RUN mkdir -p /install -COPY requirements.txt /install/requirements.txt -RUN pip install --upgrade pip && \ - pip install --no-cache-dir jupyterlab jupyterlab_vim jupytext -r /install/requirements.txt - -# Config. -COPY etc_sudoers /install/ -COPY etc_sudoers /etc/sudoers -COPY bashrc /root/.bashrc - -# Report package versions. -COPY version.sh /install/ -RUN /install/version.sh 2>&1 | tee version.log - -# Jupyter. -EXPOSE 8888 - -CMD ["/bin/bash"] From bb1765227cb77ecd87c05655d6638b8893cb12c0 Mon Sep 17 00:00:00 2001 From: vsripada2423 Date: Tue, 31 Mar 2026 23:55:18 -0400 Subject: [PATCH 25/28] Delete class_project/data605/Spring2026/projects/README.md --- .../data605/Spring2026/projects/README.md | 53 ------------------- 1 file changed, 53 deletions(-) delete mode 100644 class_project/data605/Spring2026/projects/README.md diff --git a/class_project/data605/Spring2026/projects/README.md b/class_project/data605/Spring2026/projects/README.md deleted file mode 100644 index f2f9aa91f..000000000 --- a/class_project/data605/Spring2026/projects/README.md +++ /dev/null @@ -1,53 +0,0 @@ -# Benchmarking Data Science Agents: A Comparative Study - -## Team Members -- Venkata Sripada -- Amulya Grace Bandlamudi - -## Project Overview -This project studies and compares publicly available data science benchmarks for AI agents, including DataSciBench, DSBench, MLE-Bench, GAIA, and SWE-bench. The goal is to understand how different benchmarks evaluate data science capabilities such as data analysis, machine learning engineering, and multi-step reasoning. - -We will collect leaderboard data from these benchmarks, analyze benchmark design (task type, evaluation metric, and difficulty), and compare agent performance across benchmarks. The project will produce a reproducible data analysis pipeline, visualizations, and a comparative research report. - -## Objectives -- Collect benchmark leaderboard data -- Analyze benchmark structure and evaluation metrics -- Compare performance across different AI agents -- Perform clustering and correlation analysis on benchmark results -- Visualize benchmark differences and capability gaps -- Produce a final research report - -## Tools and Technologies -- Python -- Pandas -- NumPy -- Matplotlib / Seaborn -- Scikit-learn -- Jupyter Notebook -- (Optional) PySpark for large-scale data processing - -## Methodology -1. Data collection from benchmark leaderboards -2. Data cleaning and dataset integration -3. Exploratory data analysis -4. Statistical analysis and correlation analysis -5. Clustering benchmarks and agent performance -6. Visualization and interpretation of results -7. Final report and documentation - -## Expected Outcome -The project aims to identify differences between data science benchmarks and determine which benchmarks better represent real-world data science tasks. We expect to find capability gaps where some agents perform well on coding tasks but struggle with multi-step reasoning and workflow-based tasks. - -## Repository Structure - -``` -Benchmarking_Data_Science_Agents/ -│ -├── README.md # Project overview and instructions -├── data/ # Collected benchmark datasets -├── notebooks/ # Jupyter notebooks for analysis -├── src/ # Python scripts for data processing and analysis -├── results/ # Output files, plots, and tables -├── report/ # Final report and documentation -└── references/ # Papers and benchmark documentation -``` From 7384c224f3a1f649552148e302582d957cb43a51 Mon Sep 17 00:00:00 2001 From: vsripada2423 Date: Tue, 31 Mar 2026 23:55:55 -0400 Subject: [PATCH 26/28] Delete class_project/data605/Spring2026/projects/requirements.txt --- class_project/data605/Spring2026/projects/requirements.txt | 4 ---- 1 file changed, 4 deletions(-) delete mode 100644 class_project/data605/Spring2026/projects/requirements.txt diff --git a/class_project/data605/Spring2026/projects/requirements.txt b/class_project/data605/Spring2026/projects/requirements.txt deleted file mode 100644 index 49aca3901..000000000 --- a/class_project/data605/Spring2026/projects/requirements.txt +++ /dev/null @@ -1,4 +0,0 @@ -matplotlib -numpy -pandas -seaborn From 88a3e4c88a3cc971b58857e7564443268a280d0d Mon Sep 17 00:00:00 2001 From: vsripada2423 Date: Tue, 31 Mar 2026 23:56:17 -0400 Subject: [PATCH 27/28] Delete class_project/data605/Spring2026/projects/template.example.py --- .../Spring2026/projects/template.example.py | 125 ------------------ 1 file changed, 125 deletions(-) delete mode 100644 class_project/data605/Spring2026/projects/template.example.py diff --git a/class_project/data605/Spring2026/projects/template.example.py b/class_project/data605/Spring2026/projects/template.example.py deleted file mode 100644 index b091b1369..000000000 --- a/class_project/data605/Spring2026/projects/template.example.py +++ /dev/null @@ -1,125 +0,0 @@ ---- -# jupyter: -# jupytext: -# formats: ipynb,py:percent -# text_representation: -# extension: .py -# format_name: percent -# format_version: '1.3' -# jupytext_version: 1.19.0 -# kernelspec: -# display_name: Python 3 (ipykernel) -# language: python -# name: python3 -# --- - -# %% [markdown] -# # Template Example Notebook -# -# This is a template notebook. The first heading should be the title of what notebook is about. For example, if it is a project on neo4j tutorial the heading should be `Project Title`. -# -# - Add description of what the notebook does. -# - Point to references, e.g. (neo4j.example.md) -# - Add citations. -# - Keep the notebook flow clear. -# - Comments should be imperative and have a period at the end. -# - Your code should be well commented. -# -# The name of this notebook should in the following format: -# - if the notebook is exploring `pycaret API`, then it is `pycaret.example.ipynb` -# -# Follow the reference to write notebooks in a clear manner: https://github.com/causify-ai/helpers/blob/master/docs/coding/all.jupyter_notebook.how_to_guide.md - -# %% -# %load_ext autoreload -# %autoreload 2 -# %matplotlib inline - -# %% -import logging -# Import libraries in this section. -# Avoid imports like import *, from ... import ..., from ... import *, etc. - -import helpers.hdbg as hdbg -import helpers.hnotebook as hnotebo - -# %% -hdbg.init_logger(verbosity=logging.INFO) - -_LOG = logging.getLogger(__name__) - -hnotebo.config_notebook() - - -# %% [markdown] -# ## Make the notebook flow clear -# Each notebook needs to follow a clear and logical flow, e.g: -# - Load data -# - Compute stats -# - Clean data -# - Compute stats -# - Do analysis -# - Show results -# -# -# -# - - -# ############################################################################# -# Template -# ############################################################################# - - -# %% -class Template: - """ - Brief imperative description of what the class does in one line, if needed. - """ - - def __init__(self): - pass - - def method1(self, arg1: int) -> None: - """ - Brief imperative description of what the method does in one line. - - You can elaborate more in the method docstring in this section, for e.g. explaining - the formula/algorithm. Every method/function should have a docstring, typehints and include the - parameters and return as follows: - - :param arg1: description of arg1 - :return: description of return - """ - # Code bloks go here. - # Make sure to include comments to explain what the code is doing. - # No empty lines between code blocks. - pass - - -def template_function(arg1: int) -> None: - """ - Brief imperative description of what the function does in one line. - - You can elaborate more in the function docstring in this section, for e.g. explaining - the formula/algorithm. Every function should have a docstring, typehints and include the - parameters and return as follows: - - :param arg1: description of arg1 - :return: description of return - """ - # Code bloks go here. - # Make sure to include comments to explain what the code is doing. - # No empty lines between code blocks. - pass - - -# %% [markdown] -# ## The flow should be highlighted using headings in markdown -# ``` -# # Level 1 -# ## Level 2 -# ### Level 3 -# ``` - -# %% From 4501962419e9aa6b8ca0d0ae25549d5d9f10bc14 Mon Sep 17 00:00:00 2001 From: vsripada2423 Date: Wed, 1 Apr 2026 00:03:46 -0400 Subject: [PATCH 28/28] Delete class_project/data605/Spring2026/projects/template_utils.py --- .../Spring2026/projects/template_utils.py | 72 ------------------- 1 file changed, 72 deletions(-) delete mode 100644 class_project/data605/Spring2026/projects/template_utils.py diff --git a/class_project/data605/Spring2026/projects/template_utils.py b/class_project/data605/Spring2026/projects/template_utils.py deleted file mode 100644 index f8916102e..000000000 --- a/class_project/data605/Spring2026/projects/template_utils.py +++ /dev/null @@ -1,72 +0,0 @@ -""" -template_utils.py - -This file contains utility functions that support the tutorial notebooks. - -- Notebooks should call these functions instead of writing raw logic inline. -- This helps keep the notebooks clean, modular, and easier to debug. -- Students should implement functions here for data preprocessing, - model setup, evaluation, or any reusable logic. - -Import as: - -import class_project.project_template.template_utils as cpptteut -""" - -import pandas as pd -import logging -from sklearn.model_selection import train_test_split -from pycaret.classification import compare_models - -# ----------------------------------------------------------------------------- -# Logging -# ----------------------------------------------------------------------------- - -logging.basicConfig(level=logging.INFO) -logger = logging.getLogger(__name__) - -# ----------------------------------------------------------------------------- -# Example 1: Split the dataset into train and test sets -# ----------------------------------------------------------------------------- - - -def split_data(df: pd.DataFrame, target_column: str, test_size: float = 0.2): - """ - Split the dataset into training and testing sets. - - :param df: full dataset - :param target_column: name of the target column - :param test_size: proportion of test data (default = 0.2) - - :return: X_train, X_test, y_train, y_test - """ - logger.info("Splitting data into train and test sets") - X = df.drop(columns=[target_column]) - y = df[target_column] - return train_test_split(X, y, test_size=test_size, random_state=42) - - -# ----------------------------------------------------------------------------- -# Example 2: PyCaret classification pipeline -# ----------------------------------------------------------------------------- - - -def run_pycaret_classification( - df: pd.DataFrame, target_column: str -) -> pd.DataFrame: - """ - Run a basic PyCaret classification experiment. - - :param df: dataset containing features and target - :param target_column: name of the target column - - :return: comparison of top-performing models - """ - logger.info("Initializing PyCaret classification setup") - ... - - logger.info("Comparing models") - results = compare_models() - ... - - return results