From 834d384b2d5390bc6467fed3205009f9772d48e7 Mon Sep 17 00:00:00 2001 From: Pedro Antonacio <30991781+antonacio@users.noreply.github.com> Date: Sun, 12 Jan 2025 18:21:34 -0300 Subject: [PATCH 01/10] intial repo setup (#1) --- .circleci/example_config.yml | 31 ------------------------------- Makefile | 2 -- README.md | 18 +++++++++++++----- 3 files changed, 13 insertions(+), 38 deletions(-) delete mode 100644 .circleci/example_config.yml diff --git a/.circleci/example_config.yml b/.circleci/example_config.yml deleted file mode 100644 index ada64a7..0000000 --- a/.circleci/example_config.yml +++ /dev/null @@ -1,31 +0,0 @@ -version: "2.1" - -orbs: - python: circleci/python@2.1.1 -jobs: - build_and_test: - docker: - - image: cimg/python:3.10 - executor: - name: python/default - tag: "3.10" - steps: - - checkout - - python/install-packages: - pip-dependency-file: requirements.txt - pkg-manager: pip - - run: - name: Run tests - command: | - python -m pytest - - run: - name: Check linting - command: | - pip install pre-commit - pre-commit install - pre-commit run -a - -workflows: - main: - jobs: - - build_and_test diff --git a/Makefile b/Makefile index 3ea5588..bf94dda 100644 --- a/Makefile +++ b/Makefile @@ -5,5 +5,3 @@ install-pre-commit: lint: pre-commit run -a -test: - python -m pytest diff --git a/README.md b/README.md index 98b5835..26bdfdf 100644 --- a/README.md +++ b/README.md @@ -1,12 +1,20 @@ -[![Python](https://img.shields.io/badge/python-3.10-blue.svg)](https://github.com) +[![Python](https://img.shields.io/badge/python-3.12-blue.svg)](https://github.com) -# Title +# Data Science Examples -Description +Useful examples for common Data Science use cases: +1. Classification + - Binary + - Multiclass +2. Regression +3. Clustering +4. Dimensionality Reduction + - PCA + - UMAP -## Installation +## Setup -### Pre Commit Setup +### Pre Commit ```bash pip install -r requirements.txt From 8d984ba29131c0cde0a97f4be3f04de08257109f Mon Sep 17 00:00:00 2001 From: Pedro Antonacio <30991781+antonacio@users.noreply.github.com> Date: Sun, 19 Jan 2025 22:53:59 -0300 Subject: [PATCH 02/10] Clustering use case (#2) * clustering use case * refining README * refining README * adjusting PR template --- .github/pull_request_template.md | 4 - README.md | 17 +- src/clustering.ipynb | 306 +++++++++++++++++++++++++++++++ src/utils/__init__.py | 0 src/utils/clustering.py | 152 +++++++++++++++ src/utils/common.py | 28 +++ src/utils/constants.py | 10 + 7 files changed, 503 insertions(+), 14 deletions(-) create mode 100755 src/clustering.ipynb create mode 100644 src/utils/__init__.py create mode 100644 src/utils/clustering.py create mode 100644 src/utils/common.py create mode 100644 src/utils/constants.py diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md index 1994b87..1b4e6ca 100644 --- a/.github/pull_request_template.md +++ b/.github/pull_request_template.md @@ -7,10 +7,6 @@ - [ ] New feature (non-breaking change which adds functionality) - [ ] New documentation -## How Has This Been Tested? - -- [x] `kedro run --pipeline ` - ## ✅ Checks - [ ] I have commented my code, particularly in hard-to-understand areas diff --git a/README.md b/README.md index 26bdfdf..2e31afd 100644 --- a/README.md +++ b/README.md @@ -1,16 +1,13 @@ [![Python](https://img.shields.io/badge/python-3.12-blue.svg)](https://github.com) -# Data Science Examples +# Simple Data Science -Useful examples for common Data Science use cases: -1. Classification - - Binary - - Multiclass -2. Regression -3. Clustering -4. Dimensionality Reduction - - PCA - - UMAP +This repository provides simple and practical examples for common data science tasks using tabular data: +1. Binary Classification +2. Multiclass Classification +3. Regression +4. Clustering +5. Dimensionality Reduction ## Setup diff --git a/src/clustering.ipynb b/src/clustering.ipynb new file mode 100755 index 0000000..0c1f44d --- /dev/null +++ b/src/clustering.ipynb @@ -0,0 +1,306 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "0", + "metadata": {}, + "source": [ + "# Clustering" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1", + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import warnings\n", + "import pandas as pd\n", + "import seaborn as sns\n", + "\n", + "from kneed import KneeLocator\n", + "from sklearn.preprocessing import StandardScaler\n", + "from sklearn.cluster import KMeans\n", + "\n", + "pd.set_option('display.max_columns', None)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2", + "metadata": {}, + "outputs": [], + "source": [ + "from utils.constants import RANDOM_SEED\n", + "from utils.common import get_data_folder_path, set_plot_font_sizes\n", + "from utils.clustering import search_kmeans, plot_kmeans_search, plot_cluster_boxplots" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3", + "metadata": {}, + "outputs": [], + "source": [ + "# plots configuration\n", + "sns.set_style(\"darkgrid\")\n", + "sns.set_palette(\"colorblind\")\n", + "set_plot_font_sizes()\n", + "%matplotlib inline" + ] + }, + { + "cell_type": "markdown", + "id": "4", + "metadata": {}, + "source": [ + "## Preprocessing" + ] + }, + { + "cell_type": "markdown", + "id": "5", + "metadata": {}, + "source": [ + "### Load data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6", + "metadata": {}, + "outputs": [], + "source": [ + "data_path = get_data_folder_path()\n", + "\n", + "df_input = pd.read_csv(os.path.join(data_path, 'fetal_health.csv'))\n", + "df_input.columns = [col.replace(' ', '_') for col in df_input.columns]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7", + "metadata": {}, + "outputs": [], + "source": [ + "# define columns for clustering\n", + "cluster_cols = [\n", + " col for col in df_input.columns\n", + " # remove the target column to simulate an unsupervised problem\n", + " if col != \"fetal_health\"\n", + " # remove all histogram-derived and variability features to simplify the clustering process\n", + " and not col.startswith(\"histogram_\")\n", + " and not col.endswith(\"_variability\")\n", + "]\n", + "df_cl = df_input[cluster_cols]" + ] + }, + { + "cell_type": "markdown", + "id": "8", + "metadata": {}, + "source": [ + "### Scale data (if necessary)\n", + "\n", + "If all features used for clustering have the same range (e.g. scores form 0 to 100) or the same unit (e.g. distances), there is no need to standardize the data." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9", + "metadata": {}, + "outputs": [], + "source": [ + "# Standardize X_train and X_test\n", + "stdscaler = StandardScaler()\n", + "df_cl_std = pd.DataFrame(stdscaler.fit_transform(df_cl), columns=df_cl.columns, index=df_cl.index)" + ] + }, + { + "cell_type": "markdown", + "id": "10", + "metadata": { + "tags": [] + }, + "source": [ + "## K-means" + ] + }, + { + "cell_type": "markdown", + "id": "11", + "metadata": {}, + "source": [ + "### Find best number of clusters" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "12", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "df_kmeans = search_kmeans(df_cl_std, max_n_clusters=15)" + ] + }, + { + "cell_type": "markdown", + "id": "13", + "metadata": {}, + "source": [ + "Elbow Method implementation:\n", + "- Kneedle algorithm original paper: https://www1.icsi.berkeley.edu/~barath/papers/kneedle-simplex11.pdf\n", + "- `kneed` python package: https://github.com/arvkevi/kneed" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "14", + "metadata": {}, + "outputs": [], + "source": [ + "# determine the ideal number of cluster using the \"Elbow Method\"\n", + "# using the kneed package which implements the Kneedle algorithm\n", + "kl = KneeLocator(\n", + " x=df_kmeans[\"n_clusters\"].values,\n", + " y=df_kmeans[\"wcss\"].values,\n", + " curve=\"convex\",\n", + " direction=\"decreasing\"\n", + ")\n", + "print(f'Elbow Method: best number of clusters is {kl.elbow}')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "15", + "metadata": {}, + "outputs": [], + "source": [ + "fig = plot_kmeans_search(df_kmeans=df_kmeans, elbow=kl.elbow)" + ] + }, + { + "cell_type": "markdown", + "id": "16", + "metadata": {}, + "source": [ + "### Fit final model" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "17", + "metadata": {}, + "outputs": [], + "source": [ + "# fit K-means with selected number of clusters\n", + "kmeans_model = KMeans(n_clusters=kl.elbow, verbose=0, random_state=RANDOM_SEED)\n", + "kmeans_model.fit(df_cl_std)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "18", + "metadata": {}, + "outputs": [], + "source": [ + "s_clusters = pd.Series(data=kmeans_model.labels_, name=\"cluster\", index=df_cl_std.index)\n", + "s_clusters += 1 # set first cluster as 1 instead of 0\n", + "\n", + "with warnings.catch_warnings(action=\"ignore\"):\n", + " df_cl_std.loc[:, \"cluster\"] = s_clusters\n", + " df_cl.loc[:, 'cluster'] = s_clusters\n", + " df_input.loc[:, \"cluster\"] = s_clusters" + ] + }, + { + "cell_type": "markdown", + "id": "19", + "metadata": {}, + "source": [ + "### Describe clusters" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "20", + "metadata": {}, + "outputs": [], + "source": [ + "fig = plot_cluster_boxplots(\n", + " df=df_cl,\n", + " cluster_col=\"cluster\",\n", + " plots_per_line=2,\n", + " title=\"Features used in K-means Clustering\",\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "21", + "metadata": {}, + "outputs": [], + "source": [ + "fig = plot_cluster_boxplots(\n", + " df=df_input,\n", + " cluster_col=\"cluster\",\n", + " plots_per_line=2,\n", + " title=\"All features from input dataset\",\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "22", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "23", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "ds", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.8" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/src/utils/__init__.py b/src/utils/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/utils/clustering.py b/src/utils/clustering.py new file mode 100644 index 0000000..b860b3d --- /dev/null +++ b/src/utils/clustering.py @@ -0,0 +1,152 @@ +import numpy as np +import pandas as pd +import seaborn as sns +import matplotlib.pyplot as plt + +from tqdm import tqdm +from sklearn.cluster import KMeans +from sklearn.metrics import silhouette_score + +from .constants import RANDOM_SEED + + +def search_kmeans(df_cl: pd.DataFrame, max_n_clusters: int) -> pd.DataFrame: + + kmeans_search_lst = [] + + for i in tqdm(range(1, max_n_clusters + 1)): + kmeans_dict = dict() + kmeans_dict["n_clusters"] = i + kmeans_model = KMeans(n_clusters=i, verbose=0, random_state=RANDOM_SEED) + kmeans_model.fit(df_cl) + # save within cluster sum of squares + kmeans_dict["wcss"] = kmeans_model.inertia_ + if i > 1: + # save silhouette score + kmeans_dict["silhouette_score"] = silhouette_score(df_cl, kmeans_model.labels_) + + kmeans_search_lst.append(kmeans_dict) + + # consolidate results in a dataframe + df_kmeans = pd.DataFrame(kmeans_search_lst) + + return df_kmeans + + +def plot_kmeans_search( + df_kmeans: pd.DataFrame, + elbow: int, + title: str = "K-means Clustering", + figsize: tuple[int] = (9, 7), +): + fig, axes = plt.subplots( + nrows=2, + ncols=1, + figsize=figsize, + sharex=True, + ) + + plt.suptitle(title) + color_lst = sns.color_palette() + + for ax, plot_col, plot_title in zip( + axes, + ["wcss", "silhouette_score"], + ["Within Cluster Sum of Squared Distances (WCSS)", "Silhouette Score"], + ): + + ax.plot( + df_kmeans["n_clusters"].values, + df_kmeans[plot_col].values, + color=color_lst[0], + marker="o", + linestyle="--", + zorder=2, + ) + ax.scatter( + elbow, + df_kmeans.loc[(df_kmeans["n_clusters"] == elbow), plot_col], + marker="o", + s=250, + color=color_lst[1], + label=f"Elbow Method's optimal\nnumber of clusters (n={elbow})", + alpha=0.75, + zorder=1, + ) + ax.set_title(plot_title) + ax.set_xticks(df_kmeans["n_clusters"].values) + ax.set_ylabel(None) + + axes[0].legend() + axes[1].set_xlabel("Number of clusters") + + fig.tight_layout() + + return fig + + +def plot_cluster_boxplots( + df: pd.DataFrame, + cluster_col: str, + plot_cols: list[str] = None, + plots_per_line: int = 2, + display_order: list[str] = None, + title: str = "Features by Cluster", + share_y_axis: bool = False, + y_lim: list[float | int] = None, + scale_factor: float = 1.5, +): + n_clusters = df[cluster_col].nunique() + + if plot_cols is None: + plot_cols = [col for col in df.columns if col != cluster_col] + num_lines = int(np.ceil(len(plot_cols) / plots_per_line)) + fig, axes = plt.subplots( + nrows=num_lines, + ncols=plots_per_line, + figsize=(n_clusters * plots_per_line * scale_factor, num_lines * scale_factor * 2), + sharey=share_y_axis, + ) + axes_flattend = axes.flatten() + + plt.suptitle(title, y=1) + color_lst = sns.color_palette() + + if display_order is None: + display_order = np.sort(df[cluster_col].unique()).tolist() + + for ax, col in zip(axes_flattend, plot_cols): + sns.boxplot( + x=df[cluster_col], + y=df[col], + order=display_order, + ax=ax, + fliersize=2, + color=color_lst[0], + medianprops=dict(linewidth=2, alpha=1.0), + flierprops=dict(markerfacecolor="black", marker=".", alpha=0.33), + showmeans=True, + meanprops=dict( + marker=5, + markerfacecolor=color_lst[1], + markeredgecolor=color_lst[1], + markersize=10, + ), + ) + ax.set_title(col) + ax.set_ylabel("") + ax.set_xlabel("") + if y_lim is not None: + y_range = max(y_lim) - min(y_lim) + pct_margin = 0.01 + ax.set_ylim( + ymin=(min(y_lim) - y_range * pct_margin), ymax=(max(y_lim) + y_range * pct_margin) + ) + + # delete unused axes + for ax in axes_flattend[len(plot_cols) :]: + fig.delaxes(ax=ax) + + fig.tight_layout() + + return fig diff --git a/src/utils/common.py b/src/utils/common.py new file mode 100644 index 0000000..dcf83f0 --- /dev/null +++ b/src/utils/common.py @@ -0,0 +1,28 @@ +import os +import pandas as pd +import matplotlib.pyplot as plt +from .constants import REPO_NAME, SMALL_FONTSIZE, MEDIUM_FONTSIZE, BIG_FONTSIZE + + +def get_repo_root_path() -> str: + return os.path.normpath(os.getcwd().split(REPO_NAME, maxsplit=1)[0] + REPO_NAME) + + +def get_data_folder_path() -> str: + repo_path = get_repo_root_path() + data_path = os.path.normpath(os.path.join(repo_path, "data")) + return data_path + + +def convert_to_integer(s: pd.Series) -> pd.Series: + return pd.to_numeric(s, downcast="integer", errors="raise") + + +def set_plot_font_sizes() -> None: + plt.rc("font", size=SMALL_FONTSIZE) # default font size + plt.rc("figure", titlesize=BIG_FONTSIZE) # figure title + plt.rc("legend", fontsize=SMALL_FONTSIZE) # legend + plt.rc("axes", titlesize=MEDIUM_FONTSIZE) # axes title + plt.rc("axes", labelsize=SMALL_FONTSIZE) # axes labels + plt.rc("xtick", labelsize=SMALL_FONTSIZE) # x tick labels + plt.rc("ytick", labelsize=SMALL_FONTSIZE) # y tick labels diff --git a/src/utils/constants.py b/src/utils/constants.py new file mode 100644 index 0000000..836bda8 --- /dev/null +++ b/src/utils/constants.py @@ -0,0 +1,10 @@ +# repository name +REPO_NAME = "data-science" + +# random seed +RANDOM_SEED = 42 + +# font sizes for plots +SMALL_FONTSIZE = 12 +MEDIUM_FONTSIZE = 14 +BIG_FONTSIZE = 17 From e22ba0f660807bc6e9069898f376d2bf21535ce0 Mon Sep 17 00:00:00 2001 From: Pedro Antonacio <30991781+antonacio@users.noreply.github.com> Date: Sun, 19 Jan 2025 23:04:57 -0300 Subject: [PATCH 03/10] chore: updating repository name (#3) --- data/.gitkeep | 0 src/utils/constants.py | 2 +- 2 files changed, 1 insertion(+), 1 deletion(-) create mode 100644 data/.gitkeep diff --git a/data/.gitkeep b/data/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/src/utils/constants.py b/src/utils/constants.py index 836bda8..08fef0a 100644 --- a/src/utils/constants.py +++ b/src/utils/constants.py @@ -1,5 +1,5 @@ # repository name -REPO_NAME = "data-science" +REPO_NAME = "simple-data-science" # random seed RANDOM_SEED = 42 From 2b57d9443bbea819b0d71f5fd307f31f53ef9cff Mon Sep 17 00:00:00 2001 From: Pedro Antonacio <30991781+antonacio@users.noreply.github.com> Date: Sun, 26 Jan 2025 20:45:49 -0300 Subject: [PATCH 04/10] feat: binary classification use case (#4) * chore: fixing pre-commit install command * feat: generalizing plot boxplot function * feat: adding correlation matrix plotting function * feat: feature selection util * feat: adding L1-based regularization step for feature selection * feat: binary classification use case * feat: xgboost classifier * chore: removing WIP tag from binary classification --- README.md | 10 +- src/classification-binary.ipynb | 879 ++++++++++++++++++++++++++++++++ src/clustering.ipynb | 26 +- src/utils/classification.py | 575 +++++++++++++++++++++ src/utils/clustering.py | 67 --- src/utils/common.py | 104 ++++ src/utils/feature_selection.py | 294 +++++++++++ 7 files changed, 1870 insertions(+), 85 deletions(-) create mode 100644 src/classification-binary.ipynb create mode 100644 src/utils/classification.py create mode 100644 src/utils/feature_selection.py diff --git a/README.md b/README.md index 2e31afd..864237a 100644 --- a/README.md +++ b/README.md @@ -2,19 +2,19 @@ # Simple Data Science -This repository provides simple and practical examples for common data science tasks using tabular data: +This repository compiles simple and practical examples for common data science tasks using tabular data: 1. Binary Classification -2. Multiclass Classification -3. Regression +2. (WIP) Multiclass Classification +3. (WIP) Regression 4. Clustering -5. Dimensionality Reduction +5. (WIP) Dimensionality Reduction ## Setup ### Pre Commit ```bash -pip install -r requirements.txt +pip install pre-commit pre-commit install ``` diff --git a/src/classification-binary.ipynb b/src/classification-binary.ipynb new file mode 100644 index 0000000..d1cb1d8 --- /dev/null +++ b/src/classification-binary.ipynb @@ -0,0 +1,879 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "0", + "metadata": { + "tags": [] + }, + "source": [ + "# Binary Classification" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "import os\n", + "import logging\n", + "import warnings\n", + "import numpy as np\n", + "import pandas as pd\n", + "import seaborn as sns\n", + "import matplotlib as mpl\n", + "import matplotlib.pyplot as plt\n", + "import shap\n", + "\n", + "from sklearn.preprocessing import StandardScaler\n", + "from sklearn.linear_model import LogisticRegression\n", + "from sklearn.model_selection import train_test_split, RepeatedStratifiedKFold, GridSearchCV\n", + "from statsmodels.stats.outliers_influence import variance_inflation_factor\n", + "from xgboost import XGBClassifier" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "from utils.constants import RANDOM_SEED\n", + "from utils.common import (\n", + " get_data_folder_path,\n", + " set_plot_font_sizes,\n", + " plot_boxplot_by_class,\n", + " plot_correlation_matrix,\n", + ")\n", + "from utils.classification import (\n", + " describe_input_features,\n", + " plot_confusion_matrix,\n", + " plot_target_rate,\n", + " compute_classification_metrics,\n", + " build_coefficients_table,\n", + " plot_coefficients_values,\n", + " plot_coefficients_significance,\n", + " plot_eval_metrics_xgb,\n", + " plot_gain_metric_xgb,\n", + " plot_shap_importance,\n", + " plot_shap_beeswarm,\n", + " build_ks_table,\n", + " beautify_ks_table,\n", + " plot_ks_table,\n", + " plot_roc_curve,\n", + ")\n", + "from utils.feature_selection import run_feature_selection_steps" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3", + "metadata": {}, + "outputs": [], + "source": [ + "logging.basicConfig(\n", + " level=logging.INFO,\n", + " format=\"%(asctime)s [%(levelname)s] %(message)s\",\n", + " datefmt='%H:%M:%S',\n", + ")\n", + "logger = logging.getLogger(__name__)\n", + "\n", + "pd.set_option('display.max_columns', None)\n", + "pd.options.display.float_format = \"{:.2f}\".format\n", + "\n", + "mpl.rcParams['font.sans-serif'] = \"Arial\"\n", + "plt.set_loglevel('WARNING')\n", + "\n", + "# plots configuration\n", + "sns.set_style(\"darkgrid\")\n", + "sns.set_palette(\"colorblind\")\n", + "set_plot_font_sizes()\n", + "%matplotlib inline" + ] + }, + { + "cell_type": "markdown", + "id": "4", + "metadata": {}, + "source": [ + "## 1. Define Parameters" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5", + "metadata": {}, + "outputs": [], + "source": [ + "target_col = \"is_normal\"\n", + "target_classes_dict = {\n", + " 0: 'Not Normal',\n", + " 1: 'Normal'\n", + "}\n", + "test_size = 0.20" + ] + }, + { + "cell_type": "markdown", + "id": "6", + "metadata": { + "tags": [] + }, + "source": [ + "## 2. Load Data\n", + "\n", + "In this notebook we will use the Fetal Health Dataset.\n", + "\n", + "Sources:\n", + "1. Fetal Health Classification Dataset: https://www.kaggle.com/datasets/andrewmvd/fetal-health-classification\n", + "2. Original article: https://www.ncbi.nlm.nih.gov/pmc/articles/PMC68223152" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7", + "metadata": {}, + "outputs": [], + "source": [ + "data_path = get_data_folder_path()\n", + "\n", + "df_input = pd.read_csv(os.path.join(data_path, 'fetal_health.csv'))\n", + "df_input.columns = [col.replace(' ', '_') for col in df_input.columns]" + ] + }, + { + "cell_type": "markdown", + "id": "8", + "metadata": {}, + "source": [ + "### Target column\n", + "\n", + "Fetal health (target column) can have the following values:\n", + "- 1 - Normal\n", + "- 2 - Suspect\n", + "- 3 - Pathological\n", + "\n", + "For this notebook, we will consider the Normal/not Normal distinction for binary classification" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9", + "metadata": {}, + "outputs": [], + "source": [ + "df_input[target_col] = (df_input['fetal_health'] == 1).astype(np.int8)\n", + "df_input.drop(columns=[\"fetal_health\"], inplace=True)" + ] + }, + { + "cell_type": "markdown", + "id": "10", + "metadata": {}, + "source": [ + "### Train test split" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "11", + "metadata": {}, + "outputs": [], + "source": [ + "df_input_train, df_input_test = train_test_split(\n", + " df_input,\n", + " test_size=test_size,\n", + " stratify=df_input[target_col],\n", + " random_state=RANDOM_SEED,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "12", + "metadata": {}, + "outputs": [], + "source": [ + "pd.concat([\n", + " df_input_train[target_col].value_counts(dropna=False, normalize=False).rename(\"train_target_count\"),\n", + " df_input_train[target_col].value_counts(dropna=False, normalize=True).rename(\"train_target_pct\"),\n", + " df_input_test[target_col].value_counts(dropna=False, normalize=False).rename(\"test_target_count\"),\n", + " df_input_test[target_col].value_counts(dropna=False, normalize=True).rename(\"test_target_pct\"),\n", + "], axis=1)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "13", + "metadata": {}, + "outputs": [], + "source": [ + "describe_input_features(df_input, df_input_train, df_input_test)" + ] + }, + { + "cell_type": "markdown", + "id": "14", + "metadata": {}, + "source": [ + "## 3. Exploratory Data Analysis (EDA)" + ] + }, + { + "cell_type": "markdown", + "id": "15", + "metadata": {}, + "source": [ + "### Boxplots by Target Class" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "16", + "metadata": {}, + "outputs": [], + "source": [ + "fig = plot_boxplot_by_class(\n", + " df=df_input_train, # use only training data to avoid bias in test results\n", + " class_col=target_col,\n", + " plots_per_line=6,\n", + " title=\"Features in input dataset\",\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "17", + "metadata": {}, + "source": [ + "### Pearson's Correlation" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "18", + "metadata": {}, + "outputs": [], + "source": [ + "fig = plot_correlation_matrix(\n", + " df=df_input_train, # use only training data to avoid bias in test results\n", + " method=\"pearson\",\n", + " fig_height=10\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "19", + "metadata": {}, + "source": [ + "## 4. Feature Selection" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "20", + "metadata": {}, + "outputs": [], + "source": [ + "fs_steps = {\n", + " \"manual\": dict(\n", + " cols_to_exclude=[\n", + " \"histogram_variance\",\n", + " \"severe_decelerations\",\n", + " ]\n", + " ),\n", + " \"variance\": dict(threshold=0),\n", + " \"correlation\": dict(threshold=0.9),\n", + " \"l1_regularization\": dict(\n", + " problem=\"classification\",\n", + " train_test_split_params=dict(test_size=test_size),\n", + " logspace_search=dict(start=-5, stop=1, num=20, base=10),\n", + " # tolerance over minimum error with which to search for the best model\n", + " error_tolerance_pct=0.02,\n", + " # minimum features to keep in final selection\n", + " min_feats_to_keep=4,\n", + " random_seed=RANDOM_SEED,\n", + " ),\n", + " \"vif\": dict(threshold=5),\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "21", + "metadata": {}, + "outputs": [], + "source": [ + "selected_feats, df_fs = run_feature_selection_steps(\n", + " df_input=df_input_train, # use only training data to avoid bias in test results\n", + " target_col=target_col,\n", + " fs_steps=fs_steps\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "22", + "metadata": {}, + "source": [ + "### Correlation check\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "23", + "metadata": {}, + "outputs": [], + "source": [ + "fig = plot_correlation_matrix(\n", + " df=df_input_train[selected_feats + [target_col]], # use only training data to avoid bias in test results\n", + " method=\"pearson\",\n", + " fig_height=5\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "24", + "metadata": {}, + "source": [ + "### Multicollinearity check\n", + "\n", + "Multicollinearity is a problem because it undermines the statistical significance of an independent variable. [\\[source\\]](https://link.springer.com/chapter/10.1007/978-0-585-25657-3_37)\n", + "\n", + "Multicollinearity does not affect the accuracy of predictive models, including regression models. \\[...\\] Now, where multicollinearity becomes 'an issue' is when you want to 'interpret' the parameters learned by your model. In other words, you cannot say that the feature with the 'biggest weight' is 'the most important' when the features are correlated. Note that this is independent on the accuracy of the model, this is only the interpretation part [\\[source\\]](https://www.researchgate.net/post/Are-Random-Forests-affected-by-multi-collinearity-between-features)\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "id": "25", + "metadata": {}, + "source": [ + "**Variance Inflation Factor (VIF)**\n", + "\n", + "The variance inflation factor (VIF) is a statistical tool that measures the amount of multicollinearity in a regression model. As a general rule of thumb, \"VIF > 5 is cause for concern and VIF > 10 indicates a serious collinearity problem.\"\n", + "\n", + "The higher the VIF:\n", + "- The more correlated a predictor is with the other predictors\n", + "- The more the standard error is inflated\n", + "- The larger the confidence interval\n", + "- The less likely it is that a coefficient will be evaluated as statistically significant\n", + "[\\[source\\]](https://towardsdatascience.com/everything-you-need-to-know-about-multicollinearity-2f21f082d6dc)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "26", + "metadata": {}, + "outputs": [], + "source": [ + "df_vif = pd.DataFrame(\n", + " data=[variance_inflation_factor(df_input_train[selected_feats].values, i) for i in range(len(selected_feats))],\n", + " index=selected_feats,\n", + " columns=['VIF']\n", + ").sort_values('VIF', ascending=False)\n", + "\n", + "df_vif" + ] + }, + { + "cell_type": "markdown", + "id": "27", + "metadata": {}, + "source": [ + "### Model input datasets" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "28", + "metadata": {}, + "outputs": [], + "source": [ + "# train datasets\n", + "X_train = df_input_train[selected_feats]\n", + "y_train = df_input_train[target_col]\n", + "# test datatsets\n", + "X_test = df_input_test[selected_feats]\n", + "y_test = df_input_test[target_col]" + ] + }, + { + "cell_type": "markdown", + "id": "29", + "metadata": {}, + "source": [ + "### Scaling" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "30", + "metadata": {}, + "outputs": [], + "source": [ + "# Standardize X_train and X_test\n", + "stdscaler = StandardScaler()\n", + "X_train_std = pd.DataFrame(stdscaler.fit_transform(X_train), columns=X_train.columns, index=X_train.index)\n", + "X_test_std = pd.DataFrame(stdscaler.transform(X_test), columns=X_test.columns, index=X_test.index)" + ] + }, + { + "cell_type": "markdown", + "id": "31", + "metadata": {}, + "source": [ + "## 5. Classifier Model" + ] + }, + { + "cell_type": "markdown", + "id": "32", + "metadata": {}, + "source": [ + "### Select classifier: Logistic Regression or XGBoost" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "33", + "metadata": {}, + "outputs": [], + "source": [ + "MODEL_SELECTION = \"logistic_regression\"\n", + "# MODEL_SELECTION = \"xgboost\"" + ] + }, + { + "cell_type": "markdown", + "id": "34", + "metadata": {}, + "source": [ + "### Hyperparameter tuning with K-Fold Cross Validation\n", + "\n", + "For detailed explanation on XGBoost's parameters: https://www.kaggle.com/code/prashant111/a-guide-on-xgboost-hyperparameters-tuning/notebook" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "35", + "metadata": {}, + "outputs": [], + "source": [ + "if MODEL_SELECTION == \"logistic_regression\":\n", + " Estimator = LogisticRegression\n", + " cv_search_space = {\n", + " 'penalty': ['l1', 'l2', 'elasticnet'],\n", + " 'C': np.logspace(-3, 1, num=9, base=10.0),\n", + " 'class_weight': [None],\n", + " }\n", + "elif MODEL_SELECTION == \"xgboost\":\n", + " Estimator = XGBClassifier\n", + " cv_search_space = {\n", + " 'objective': ['binary:logistic'],\n", + " 'n_estimators': [30, 40, 50],\n", + " 'learning_rate': [0.1],\n", + " 'max_depth': [3, 4, 6],\n", + " 'min_child_weight': [2, 4],\n", + " 'gamma': [0, 0.5],\n", + " 'alpha':[0, 0.3],\n", + " 'scale_pos_weight': [1],\n", + " 'lambda':[1],\n", + " ## 'subsample': [0.8, 1.0],\n", + " ## 'colsample_bytree': [0.8, 1.0],\n", + " 'verbosity': [0],\n", + " }\n", + "else:\n", + " raise ValueError(\n", + " \"'MODEL_SELECTION' must be either 'logistic_regression' or 'xgboost'. \"\n", + " f\"Got {MODEL_SELECTION} instead.\"\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "36", + "metadata": {}, + "outputs": [], + "source": [ + "cv_scoring_metrics = {\n", + " \"roc_auc\": \"ROC AUC\",\n", + " \"accuracy\": \"Accuracy\",\n", + " \"precision\": \"Precision\",\n", + " \"recall\": \"Recall\",\n", + " \"f1\": \"F1 Score\",\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "37", + "metadata": {}, + "outputs": [], + "source": [ + "%%time\n", + "# define evaluation\n", + "kfold_cv = RepeatedStratifiedKFold(n_splits=3, n_repeats=1, random_state=RANDOM_SEED)\n", + "# define search\n", + "grid_search = GridSearchCV(\n", + " estimator=Estimator(),\n", + " param_grid=cv_search_space,\n", + " scoring=list(cv_scoring_metrics.keys()),\n", + " cv=kfold_cv,\n", + " refit=\"f1\",\n", + " verbose=1,\n", + ")\n", + "# execute search\n", + "with warnings.catch_warnings(action=\"ignore\"):\n", + " result_cv = grid_search.fit(X_train, y_train)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "38", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"Grid Search CV Best Model - Scoring Metrics:\")\n", + "for i, (metric_key, metric_name) in enumerate(cv_scoring_metrics.items(), start=1):\n", + " print(\n", + " f\" {i}. {(metric_name + \":\").ljust(10)} \"\n", + " f\"{result_cv.cv_results_[f\"mean_test_{metric_key}\"][result_cv.best_index_]:.3f}\"\n", + " )\n", + "print(f\"\\nBest Hyperparameters: {result_cv.best_params_}\")" + ] + }, + { + "cell_type": "markdown", + "id": "39", + "metadata": {}, + "source": [ + "### Final Model" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "40", + "metadata": {}, + "outputs": [], + "source": [ + "# instantiate model with best hyperparameters and additional kwargs\n", + "if MODEL_SELECTION == \"logistic_regression\":\n", + " model_kwargs = dict()\n", + " model_fit_kwargs = dict()\n", + "elif MODEL_SELECTION == \"xgboost\":\n", + " eval_metrics = dict(\n", + " logloss=\"Binary Cross-entropy Loss (Log-loss)\",\n", + " error=\"Binary Classification Error Rate\",\n", + " auc=\"ROC AUC\",\n", + " )\n", + " model_kwargs = dict(eval_metric=list(eval_metrics.keys()))\n", + " model_fit_kwargs = dict(\n", + " eval_set=[(X_train_std, y_train), (X_test_std, y_test)],\n", + " verbose=False\n", + " )\n", + " \n", + "model = Estimator(**result_cv.best_params_, **model_kwargs, random_state=RANDOM_SEED)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "41", + "metadata": {}, + "outputs": [], + "source": [ + "# Fit model and make predictions\n", + "model.fit(X_train_std, y_train, **model_fit_kwargs)\n", + "# Make predictions\n", + "y_pred_proba_train = pd.Series(data=model.predict_proba(X_train_std)[:, 1], index=X_train_std.index)\n", + "y_pred_proba = pd.Series(data=model.predict_proba(X_test_std)[:, 1], index=X_test_std.index)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "42", + "metadata": {}, + "outputs": [], + "source": [ + "if MODEL_SELECTION == \"xgboost\":\n", + " fig = plot_eval_metrics_xgb(model.evals_result(), eval_metrics)" + ] + }, + { + "cell_type": "markdown", + "id": "43", + "metadata": {}, + "source": [ + "**Plot target rate per group of predicted probability**\n", + "\n", + "A good model should have increasing target rate for each group of predicted probability (e.g. quartiles, deciles)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "44", + "metadata": {}, + "outputs": [], + "source": [ + "fig = plot_target_rate(y_test, y_pred_proba)" + ] + }, + { + "cell_type": "markdown", + "id": "45", + "metadata": {}, + "source": [ + "**Define optimal threshold for separating classes using the ROC Curve**\n", + "\n", + "The optimal threshold is the one that maximizes the difference between the True Positive Rate (TPR) and False Positive Rate (FPR)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "46", + "metadata": {}, + "outputs": [], + "source": [ + "fig, optimal_thresh = plot_roc_curve(\n", + " y_true=y_train, # use only training data to avoid bias in test results\n", + " y_pred_proba=y_pred_proba_train,\n", + " title=\"ROC Curve on Training Data\",\n", + " ret_optimal_thresh=True\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "47", + "metadata": {}, + "outputs": [], + "source": [ + "# compute binary predictions\n", + "print(f\"Optimal Threshold for Classification: {100*optimal_thresh:.2f}%\")\n", + "y_pred_train = (y_pred_proba_train > optimal_thresh).astype(int)\n", + "y_pred = (y_pred_proba > optimal_thresh).astype(int)" + ] + }, + { + "cell_type": "markdown", + "id": "48", + "metadata": {}, + "source": [ + "### Feature Importance\n", + "\n", + "- For Logistic Regression: coefficients values and statistical significance\n", + "- For XGBoost: SHAP analysis and Gain Metric" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "49", + "metadata": {}, + "outputs": [], + "source": [ + "if MODEL_SELECTION == \"logistic_regression\":\n", + " df_coefficients = build_coefficients_table(model, X_train_std)\n", + " fig = plot_coefficients_values(df_coefficients)\n", + " fig = plot_coefficients_significance(df_coefficients, log_scale=False)\n", + " \n", + "elif MODEL_SELECTION == \"xgboost\":\n", + " # compute SHAP values\n", + " explainer = shap.Explainer(model)\n", + " shap_values = explainer(X_test_std)\n", + " # shap plots\n", + " fig = plot_shap_importance(shap_values)\n", + " fig = plot_shap_beeswarm(shap_values)\n", + " fig = plot_gain_metric_xgb(model, X_test_std)" + ] + }, + { + "cell_type": "markdown", + "id": "50", + "metadata": {}, + "source": [ + "### Performance Metrics" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "51", + "metadata": {}, + "outputs": [], + "source": [ + "df_train_metrics = pd.Series(\n", + " compute_classification_metrics(y_train, y_pred_train, y_pred_proba_train)\n", + ").to_frame(name=\"Train Metrics\")\n", + "df_test_metrics = pd.Series(\n", + " compute_classification_metrics(y_test, y_pred, y_pred_proba)\n", + ").to_frame(name=\"Test Metrics\")\n", + "\n", + "print(\"Final Model - Scoring Metrics on Train & Test Datasets:\")\n", + "df_metrics = df_train_metrics.join(df_test_metrics)\n", + "display(df_metrics)" + ] + }, + { + "cell_type": "markdown", + "id": "52", + "metadata": {}, + "source": [ + "#### Confusion Matrix" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "53", + "metadata": {}, + "outputs": [], + "source": [ + "# Confusion Matrix\n", + "fig = plot_confusion_matrix(\n", + " y_test,\n", + " y_pred,\n", + " estimator=model,\n", + " target_classes_dict=target_classes_dict,\n", + " normalize=\"true\",\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "54", + "metadata": {}, + "source": [ + "#### ROC AUC" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "55", + "metadata": {}, + "outputs": [], + "source": [ + "fig = plot_roc_curve(y_test, y_pred_proba)" + ] + }, + { + "cell_type": "markdown", + "id": "56", + "metadata": {}, + "source": [ + "#### KS Gain" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "57", + "metadata": {}, + "outputs": [], + "source": [ + "df_ks, ks_score = build_ks_table(y_test, y_pred_proba, ret_ks=True)\n", + "print(f\"KS score: {ks_score * 100:.2f} p.p.\")\n", + "beautify_ks_table(df_ks)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "58", + "metadata": {}, + "outputs": [], + "source": [ + "fig = plot_ks_table(df_ks)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "59", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "60", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "61", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "62", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "ds", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.8" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/src/clustering.ipynb b/src/clustering.ipynb index 0c1f44d..a5ab1af 100755 --- a/src/clustering.ipynb +++ b/src/clustering.ipynb @@ -10,7 +10,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "id": "1", "metadata": {}, "outputs": [], @@ -29,19 +29,19 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "id": "2", "metadata": {}, "outputs": [], "source": [ "from utils.constants import RANDOM_SEED\n", - "from utils.common import get_data_folder_path, set_plot_font_sizes\n", - "from utils.clustering import search_kmeans, plot_kmeans_search, plot_cluster_boxplots" + "from utils.common import get_data_folder_path, set_plot_font_sizes, plot_boxplot_by_class\n", + "from utils.clustering import search_kmeans, plot_kmeans_search" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "id": "3", "metadata": {}, "outputs": [], @@ -71,7 +71,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "id": "6", "metadata": {}, "outputs": [], @@ -84,7 +84,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "id": "7", "metadata": {}, "outputs": [], @@ -113,7 +113,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "id": "9", "metadata": {}, "outputs": [], @@ -213,7 +213,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 11, "id": "18", "metadata": {}, "outputs": [], @@ -242,9 +242,9 @@ "metadata": {}, "outputs": [], "source": [ - "fig = plot_cluster_boxplots(\n", + "fig = plot_boxplot_by_class(\n", " df=df_cl,\n", - " cluster_col=\"cluster\",\n", + " class_col=\"cluster\",\n", " plots_per_line=2,\n", " title=\"Features used in K-means Clustering\",\n", ")" @@ -257,9 +257,9 @@ "metadata": {}, "outputs": [], "source": [ - "fig = plot_cluster_boxplots(\n", + "fig = plot_boxplot_by_class(\n", " df=df_input,\n", - " cluster_col=\"cluster\",\n", + " class_col=\"cluster\",\n", " plots_per_line=2,\n", " title=\"All features from input dataset\",\n", ")" diff --git a/src/utils/classification.py b/src/utils/classification.py new file mode 100644 index 0000000..4f3601e --- /dev/null +++ b/src/utils/classification.py @@ -0,0 +1,575 @@ +import math +import numpy as np +import pandas as pd +import shap +import scipy +import seaborn as sns +import matplotlib.pyplot as plt + +from typing import Any +from matplotlib import patches as mpatches +from matplotlib import ticker as mticker + +from sklearn.metrics import ( + accuracy_score, + roc_auc_score, + roc_curve, + auc, + precision_score, + recall_score, + f1_score, + confusion_matrix, +) +from sklearn.linear_model import LogisticRegression, LinearRegression + +from .common import convert_to_integer + + +def describe_input_features( + df_input: pd.DataFrame, + df_input_train: pd.DataFrame, + df_input_test: pd.DataFrame, +) -> pd.DataFrame: + df_describe = df_input.describe().T + df_describe["count"] = convert_to_integer(df_describe["count"]) + df_describe["null_count"] = df_input.isna().sum() + df_describe["data_type"] = df_input.dtypes.astype(str).apply( + lambda x: "numeric" if any([tp in x for tp in ["int", "float"]]) else "categorical" + ) + # reorder columns + df_describe = df_describe[ + ["data_type", "count", "null_count", "min", "25%", "50%", "75%", "max", "std", "mean"] + ] + df_describe["mean_train"] = df_input_train.mean() + df_describe["mean_test"] = df_input_test.mean() + df_describe["train_test_pct_diff"] = ( + df_describe["mean_test"] - df_describe["mean_train"] + ) / df_describe["mean_train"] + + return df_describe + + +def plot_roc_curve( + y_true: pd.Series, + y_pred_proba: pd.Series, + title: str = "Receiver Operating Characteristic", + figsize: tuple[int, int] = (8, 5), + ret_optimal_thresh: bool = False, +) -> plt.Figure | tuple[plt.Figure, np.float64]: + fpr, tpr, thresholds = roc_curve(y_true, y_pred_proba) + optimal_thresh = thresholds[np.argmax(tpr - fpr)] + roc_auc = auc(fpr, tpr) + + fig = plt.figure(figsize=figsize) + color_lst = sns.color_palette() + margin = 5 + plt.title(title) + plt.plot( + fpr * 100, + tpr * 100, + color=color_lst[0], + ls="-", + lw=1, + label=f"ROC Curve (AUC: {roc_auc:.3f})", + ) + plt.plot( + [0, 100], + [0, 100], + color=color_lst[1], + ls="--", + lw=0.5, + label="Random Classifier", + ) + plt.ylabel("True Positive Rate") + plt.xlabel("False Positive Rate") + if ret_optimal_thresh: + plt.vlines( + x=100 * optimal_thresh, + ymin=-margin, + ymax=100 + margin, + color=color_lst[2], + ls="--", + label=f"Optimal Threshold: {100*optimal_thresh:.1f}%", + ) + plt.legend(loc="lower right", framealpha=1) + ax = plt.gca() + ax.set_xlim([-margin, 100 + margin]) + ax.set_ylim([-margin, 100 + margin]) + ax.yaxis.set_major_formatter(mticker.PercentFormatter()) + ax.xaxis.set_major_formatter(mticker.PercentFormatter()) + + if ret_optimal_thresh: + return fig, optimal_thresh + else: + return fig + + +def plot_target_rate( + y_test: pd.Series, + y_pred_proba: pd.Series, + title: str = "Target rate per group of predicted probability", +) -> plt.Figure: + + df_gh = pd.concat( + [ + y_test.rename("true_label"), + y_pred_proba.rename("pred_proba"), + # quratiles + pd.qcut( + y_pred_proba.rank(method="first"), + q=4, + labels=[f"Q{i}" for i in range(1, 4 + 1)], + duplicates="raise", + ).rename("pred_quartile"), + # deciles + pd.qcut( + y_pred_proba.rank(method="first"), + q=10, + labels=[f"D{i}" for i in range(1, 10 + 1)], + duplicates="raise", + ).rename("pred_decile"), + ], + axis=1, + ) + + fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(12, 4)) + fig.suptitle(title, y=1.025) + + for ax, groupby_col, plot_title in zip( + axes, + ["pred_quartile", "pred_decile"], + ["Quartiles of model's predicted probability", "Deciles of model's predicted probability"], + ): + ax.set_title(plot_title) + ax = ( + 100 + * ( + df_gh.groupby(groupby_col, observed=True).agg( + taxa_pgto_parcela=("true_label", "mean") + ) + ) + ).plot(kind="bar", legend=False, rot=0, ax=ax) + ax.yaxis.set_major_formatter(mticker.PercentFormatter()) + ax.set_xlabel("") + + return fig + + +def compute_classification_metrics( + y_true: pd.Series, y_pred: pd.Series, y_pred_proba: pd.Series +) -> dict[str, float]: + metrics_dict = dict() + + if len(np.unique(y_true)) > 1: + metrics_dict["KS"] = compute_ks_score(y_true=y_true, y_pred_proba=y_pred_proba) + roc_auc = roc_auc_score(y_true=y_true, y_score=y_pred_proba) + metrics_dict["ROC AUC"] = roc_auc + metrics_dict["GINI"] = 2 * roc_auc - 1 + else: + metrics_dict["KS"] = np.nan + metrics_dict["ROC AUC"] = np.nan + metrics_dict["GINI"] = np.nan + metrics_dict["Accuracy"] = accuracy_score(y_true=y_true, y_pred=y_pred) + metrics_dict["Precision"] = precision_score(y_true=y_true, y_pred=y_pred) + metrics_dict["Recall"] = recall_score(y_true=y_true, y_pred=y_pred) + metrics_dict["F1 Score"] = f1_score(y_true=y_true, y_pred=y_pred) + + return metrics_dict + + +def _get_logit_stderror_pvalues( + model: LogisticRegression, x: pd.DataFrame | np.ndarray +) -> tuple[np.ndarray, np.ndarray]: + """Calculate z-scores for scikit-learn LogisticRegression. + This function uses asymtptics for maximum likelihood estimates. + + Source: https://stackoverflow.com/a/47079198 + + parameters: + model: fitted sklearn.linear_model.LogisticRegression with intercept and large C + x: matrix on which the model was fit + """ + p = model.predict_proba(x) + n = len(p) + m = len(model.coef_[0]) + 1 + coefs = np.concatenate([model.intercept_, model.coef_[0]]) + x_full = np.matrix(np.insert(np.array(x), 0, 1, axis=1)) + ans = np.zeros((m, m)) + for i in range(n): + ans = ans + np.dot(np.transpose(x_full[i, :]), x_full[i, :]) * p[i, 1] * p[i, 0] + vcov = np.linalg.inv(np.matrix(ans)) + se = np.sqrt(np.diag(vcov)) + t = coefs / se + p_values = (1 - scipy.stats.norm.cdf(abs(t))) * 2 + + return se, p_values + + +def build_coefficients_table( + model: LogisticRegression | LinearRegression, + X_train_std: pd.DataFrame, +) -> pd.DataFrame: + if not isinstance(model, (LogisticRegression, LinearRegression)): + raise ValueError( + "Model must be either sklearn's Linear Regression or Logistic Regression. " + f"Got {type(model)} instead." + ) + + df_coef = pd.DataFrame( + np.transpose(model.coef_), columns=["Coefficients"], index=model.feature_names_in_ + ) + df_coef["Absolute Coefficients"] = df_coef["Coefficients"].abs() + # compute Standard Error and coefficients' p-values + stderr, pvalues = _get_logit_stderror_pvalues(model, X_train_std) + df_coef["Standard Error"] = stderr[1:] # [1:] to skip constant + df_coef["95% CI"] = df_coef["Standard Error"] * 1.96 + df_coef["p-values"] = pvalues[1:] # [1:] to skip constant + + df_coef = df_coef.sort_values(by="Absolute Coefficients", ascending=False) + + return df_coef + + +def _get_order_of_magnitude(number: float | int) -> float: + return math.floor(math.log(number, 10)) + + +def plot_coefficients_values( + df_coef: pd.DataFrame, +) -> plt.Figure: + + fig, ax = plt.subplots(nrows=1, ncols=1) + fig.suptitle("Coefficient Values with 95% CI (±1.96 Std Error)") + max_coeff, max_ci = df_coef[["Absolute Coefficients", "95% CI"]].max().tolist() + + if _get_order_of_magnitude(max_ci) > _get_order_of_magnitude(max_coeff): + # limit x axis range as CI is too large + ax.set_xlim([-1, max_coeff * 1.5]) + + colors_dict = {"Positive": "royalblue", "Negative": "crimson"} + df_plot = df_coef.sort_values(by="Absolute Coefficients", ascending=True) + ax = df_plot["Absolute Coefficients"].plot( + kind="barh", + color=df_plot.apply( + lambda row: ( + colors_dict["Negative"] if row["Coefficients"] < 0 else colors_dict["Positive"] + ), + axis=1, + ), + figsize=(10, max(df_plot.shape[0] / 2, 4)), + legend=False, + ax=ax, + xerr=df_plot["95% CI"], + ecolor="black", + error_kw={"label": "95% confidence interval", "capsize": 4, "capthick": 1}, + ) + + ax.xaxis.grid(True) + ax.set_axisbelow(True) + legend_patches = [ + mpatches.Patch(color=colors_dict["Positive"], label="Positive coefficient"), + mpatches.Patch(color=colors_dict["Negative"], label="Negative coefficient"), + ax.get_legend_handles_labels()[0][0], # confidence interval + ] + plt.legend(handles=legend_patches, loc="lower right", framealpha=1) + + return fig + + +def plot_coefficients_significance( + df_coef: pd.DataFrame, + alpha: float = 0.05, + log_scale: bool = False, +) -> plt.Figure: + + fig, ax = plt.subplots(nrows=1, ncols=1) + fig.suptitle( + "Coefficients' Statistical Significance " f"({100*(1 - alpha):.0f}% Confidence Level)" + ) + + colors_dict = {"fail": "orange", "pass": "limegreen", "threshold": "crimson"} + df_plot = df_coef.sort_values(by="Absolute Coefficients", ascending=True) + ax = df_plot["p-values"].plot( + kind="barh", + color=df_plot.apply( + lambda row: colors_dict["pass"] if row["p-values"] < alpha else colors_dict["fail"], + axis=1, + ), + figsize=(10, max(df_plot.shape[0] / 2, 4)), + legend=False, + ax=ax, + label=None, + ) + if log_scale: + ax.set_xscale("log") + # add vertical line at alpha + ax.vlines( + x=alpha, + ymin=-1, + ymax=len(df_plot), + colors=colors_dict["threshold"], + ls="--", + lw=2, + alpha=0.75, + label=f"{100*(1 - alpha):.0f}% Confidence Level", + ) + + ax.xaxis.grid(True) + ax.set_axisbelow(True) + legend_patches = [ + ax.get_legend_handles_labels()[0][0], # confidence level line + mpatches.Patch(color=colors_dict["pass"], label="Coefficient is statistically significant"), + mpatches.Patch( + color=colors_dict["fail"], label="Coefficient is not statistically significant" + ), + ] + plt.legend(handles=legend_patches, framealpha=0.75) + + return fig + + +def plot_eval_metrics_xgb(eval_results: dict, eval_metrics: dict) -> plt.Figure: + n_epochs = len(eval_results["validation_0"][list(eval_metrics.keys())[0]]) + + fig, axes = plt.subplots( + nrows=1, ncols=len(eval_metrics.keys()), figsize=(7 * len(eval_metrics.keys()), 5) + ) + for ax, (metric_code, metric) in zip(axes, eval_metrics.items()): + ax.plot(range(n_epochs), eval_results["validation_0"][metric_code], label="Train") + ax.plot(range(n_epochs), eval_results["validation_1"][metric_code], label="Test") + ax.set_title(metric) + ax.set_xlabel("Iterations") + ax.legend() + plt.suptitle("Convergence during XGBoost Model Training", y=1.05) + + return fig + + +def plot_shap_importance( + shap_values: np.ndarray, title: str = "SHAP Feature Importance", **kwargs: dict +) -> plt.Figure: + fig, ax = plt.subplots(figsize=(5, max(shap_values.values.shape[1] / 2, 3))) + ax.set_title(title, pad=15) + shap.plots.bar(shap_values, ax=ax, **kwargs) + + return fig + + +def plot_shap_beeswarm( + shap_values: np.ndarray, title: str = "SHAP Summary Plot", **kwargs: dict +) -> plt.Figure: + ax = shap.plots.beeswarm( + shap_values, show=False, plot_size=(6, max(shap_values.values.shape[1] / 2, 3)), **kwargs + ) + ax.set_title(title, pad=15) + fig = plt.gcf() + + return fig + + +def plot_gain_metric_xgb( + xgb_estimator: Any, + X_test_: pd.DataFrame, + title: str = "XGBoost Feature Importance (Gain metric)", +) -> plt.Figure: + df_xgb_gain = pd.DataFrame( + xgb_estimator.feature_importances_, index=X_test_.columns, columns=["Feature Gain"] + ) + fig, ax = plt.subplots(figsize=(6, max(len(df_xgb_gain) / 2, 3))) + ax = df_xgb_gain.sort_values("Feature Gain", ascending=True).plot( + kind="barh", legend=False, ax=ax + ) + ax.xaxis.grid(True) + ax.set_axisbelow(True) + plt.title(title) + + return fig + + +def plot_confusion_matrix( + y_true: pd.Series, + y_pred: pd.Series, + estimator: Any, + target_classes_dict: dict, + title: str = "Confusion matrix", + normalize: str = None, + figsize: tuple[int, int] = (6, 4), +) -> plt.Figure: + + target_labels = [target_classes_dict[i] for i in estimator.classes_] + + cm = confusion_matrix(y_true=y_true, y_pred=y_pred) + if normalize is not None: + cm_pct = confusion_matrix(y_true=y_true, y_pred=y_pred, normalize=normalize) + + fig = plt.figure(figsize=figsize) + plt.imshow(cm, interpolation="nearest", cmap=plt.get_cmap("Blues")) + plt.title(title, pad=20) + plt.colorbar(format="{x:,.0f}") + + tick_marks = np.arange(len(target_labels)) + plt.xticks( + ticks=tick_marks, + labels=["\n".join(lb.rsplit(" ")) for lb in target_labels], + rotation=0, + ) + plt.yticks( + ticks=tick_marks, + labels=["\n".join(lb.rsplit(" ")) for lb in target_labels], + ) + + half_threshold = cm.sum() // 2 + for i in range(cm.shape[0]): + for j in range(cm.shape[1]): + s = f"{cm[i, j]:,.0f}" + if normalize is not None: + s += f"\n({(cm_pct[i, j] * 100):.1f}%)" + plt.text( + x=j, + y=i, + s=s, + horizontalalignment="center", + verticalalignment="center", + color="white" if cm[i, j] > half_threshold else "black", + ) + + plt.grid(False) + plt.tight_layout() + plt.ylabel("True label", labelpad=10) + plt.xlabel("Predicted label", labelpad=15) + + return fig + + +def build_ks_table( + y_true: pd.Series | np.ndarray, + y_pred_proba: pd.Series | np.ndarray, + n_bins: int = 10, + ret_ks: bool = False, +) -> pd.DataFrame | tuple[pd.DataFrame, np.float64]: + + if isinstance(y_true, pd.Series): + y_true = y_true.values + + if isinstance(y_pred_proba, pd.Series): + y_pred_proba = y_pred_proba.values + + df = pd.DataFrame() + df["score"] = y_pred_proba + df["positive"] = y_true + df["negative"] = 1 - y_true + + df["bucket"] = pd.qcut(df["score"].rank(method="first"), q=n_bins) + + ks_table = ( + df.groupby("bucket", as_index=True, observed=True) + .agg( + min_score=("score", "min"), + max_score=("score", "max"), + n_positives=("positive", "sum"), + n_negatives=("negative", "sum"), + ) + .reset_index(drop=True) + ) + + ks_table["n_all"] = ks_table["n_positives"] + ks_table["n_negatives"] + + ks_table["positive_rate"] = ks_table["n_positives"] / ks_table["n_all"] + ks_table["negative_rate"] = ks_table["n_negatives"] / ks_table["n_all"] + + ks_table["cum_positives"] = ks_table["n_positives"].cumsum() + ks_table["cum_negatives"] = ks_table["n_negatives"].cumsum() + ks_table["cumpct_positives"] = ks_table["cum_positives"] / ks_table["n_positives"].sum() + ks_table["cumpct_negatives"] = ks_table["cum_negatives"] / ks_table["n_negatives"].sum() + + ks_table["diff"] = np.abs(ks_table["cumpct_positives"] - ks_table["cumpct_negatives"]) + + ks = ks_table["diff"].max() + + if ret_ks: + return ks_table, ks + else: + return ks_table + + +def beautify_ks_table(ks_table: pd.DataFrame) -> pd.DataFrame: + ks_table = ks_table.copy() + + def flag(x): + return "<--" if x == ks_table["diff"].max() else "" + + ks_table["KS"] = ks_table["diff"].apply(flag) + + for pct_col in ["positive_rate", "negative_rate", "cumpct_positives", "cumpct_negatives"]: + ks_table[pct_col] = ks_table[pct_col].apply("{0:.2%}".format) + for pp_col in ["diff"]: + ks_table[pp_col] = ks_table[pp_col].apply(lambda x: f"{100*x:.2f} pp") + + ks_table.columns = [col.replace("_", " ") for col in ks_table.columns] + + return ks_table + + +def compute_ks_score( + y_true: pd.Series | np.ndarray, + y_pred_proba: pd.Series | np.ndarray, + n_bins: int = 10, +) -> np.float64: + + _, ks = build_ks_table(y_true=y_true, y_pred_proba=y_pred_proba, n_bins=n_bins, ret_ks=True) + + return ks + + +def plot_ks_table(ks_table: pd.DataFrame, figsize: tuple[int, int] = (7, 5)) -> plt.Figure: + + # Plot the KS Gain Chart + df_plot_ks = ( + ks_table[["max_score", "cumpct_negatives", "cumpct_positives", "diff"]] * 100 + ).set_index("max_score") + # add point zero + df_plot_ks = pd.concat( + [ + pd.DataFrame( + data=0, index=[0], columns=["cumpct_negatives", "cumpct_positives", "diff"] + ), + df_plot_ks, + ], + axis=0, + ) + df_plot_ks.sort_index(inplace=True) + + # Create a figure and axis instance + color_lst = sns.color_palette() + fig, ax = plt.subplots(figsize=figsize) + + # Plot the cumulative distributions + ax.plot(df_plot_ks["cumpct_negatives"], label="Cumulative Negative", color=color_lst[0]) + ax.plot(df_plot_ks["cumpct_positives"], label="Cumulative Positive", color=color_lst[1]) + + # set axis limites + margin = 5 + ax.set_xlim(-margin, 100 + margin) + ax.set_ylim(-margin, 100 + margin) + + ks_argmax = df_plot_ks["diff"].argmax() + ks_max = df_plot_ks["diff"].max() + ax.axvline( + df_plot_ks.index[ks_argmax], + ymin=(margin + df_plot_ks["cumpct_positives"].iloc[ks_argmax]) / (100 + 2 * margin), + ymax=(margin + df_plot_ks["cumpct_negatives"].iloc[ks_argmax]) / (100 + 2 * margin), + color=color_lst[2], + linestyle="--", + linewidth=1.5, + label=f"Max KS Statistic ({ks_max:.1f})", + ) + ax.xaxis.set_major_formatter(mticker.PercentFormatter()) + ax.yaxis.set_major_formatter(mticker.PercentFormatter()) + # Customize the plot + ax.set_xlabel("Predicted Probability") + ax.set_ylabel("Cumulative Percentage") + ax.set_title(f"KS Gain Plot (KS Statistic = {ks_max:.3f})") + ax.legend() + ax.grid(True) + + return fig diff --git a/src/utils/clustering.py b/src/utils/clustering.py index b860b3d..cc906e7 100644 --- a/src/utils/clustering.py +++ b/src/utils/clustering.py @@ -1,4 +1,3 @@ -import numpy as np import pandas as pd import seaborn as sns import matplotlib.pyplot as plt @@ -84,69 +83,3 @@ def plot_kmeans_search( return fig - -def plot_cluster_boxplots( - df: pd.DataFrame, - cluster_col: str, - plot_cols: list[str] = None, - plots_per_line: int = 2, - display_order: list[str] = None, - title: str = "Features by Cluster", - share_y_axis: bool = False, - y_lim: list[float | int] = None, - scale_factor: float = 1.5, -): - n_clusters = df[cluster_col].nunique() - - if plot_cols is None: - plot_cols = [col for col in df.columns if col != cluster_col] - num_lines = int(np.ceil(len(plot_cols) / plots_per_line)) - fig, axes = plt.subplots( - nrows=num_lines, - ncols=plots_per_line, - figsize=(n_clusters * plots_per_line * scale_factor, num_lines * scale_factor * 2), - sharey=share_y_axis, - ) - axes_flattend = axes.flatten() - - plt.suptitle(title, y=1) - color_lst = sns.color_palette() - - if display_order is None: - display_order = np.sort(df[cluster_col].unique()).tolist() - - for ax, col in zip(axes_flattend, plot_cols): - sns.boxplot( - x=df[cluster_col], - y=df[col], - order=display_order, - ax=ax, - fliersize=2, - color=color_lst[0], - medianprops=dict(linewidth=2, alpha=1.0), - flierprops=dict(markerfacecolor="black", marker=".", alpha=0.33), - showmeans=True, - meanprops=dict( - marker=5, - markerfacecolor=color_lst[1], - markeredgecolor=color_lst[1], - markersize=10, - ), - ) - ax.set_title(col) - ax.set_ylabel("") - ax.set_xlabel("") - if y_lim is not None: - y_range = max(y_lim) - min(y_lim) - pct_margin = 0.01 - ax.set_ylim( - ymin=(min(y_lim) - y_range * pct_margin), ymax=(max(y_lim) + y_range * pct_margin) - ) - - # delete unused axes - for ax in axes_flattend[len(plot_cols) :]: - fig.delaxes(ax=ax) - - fig.tight_layout() - - return fig diff --git a/src/utils/common.py b/src/utils/common.py index dcf83f0..3eafaa6 100644 --- a/src/utils/common.py +++ b/src/utils/common.py @@ -1,5 +1,7 @@ import os +import numpy as np import pandas as pd +import seaborn as sns import matplotlib.pyplot as plt from .constants import REPO_NAME, SMALL_FONTSIZE, MEDIUM_FONTSIZE, BIG_FONTSIZE @@ -26,3 +28,105 @@ def set_plot_font_sizes() -> None: plt.rc("axes", labelsize=SMALL_FONTSIZE) # axes labels plt.rc("xtick", labelsize=SMALL_FONTSIZE) # x tick labels plt.rc("ytick", labelsize=SMALL_FONTSIZE) # y tick labels + + +def plot_boxplot_by_class( + df: pd.DataFrame, + class_col: str, + plot_cols: list[str] = None, + plots_per_line: int = 2, + display_order: list[str] = None, + title: str = "Features by Class", + share_y_axis: bool = False, + y_lim: list[float | int] = None, + scale_factor: float = 1.5, +) -> plt.Figure: + + n_classes = df[class_col].nunique() + + if plot_cols is None: + plot_cols = [col for col in df.columns if col != class_col] + num_lines = int(np.ceil(len(plot_cols) / plots_per_line)) + fig, axes = plt.subplots( + nrows=num_lines, + ncols=plots_per_line, + figsize=(n_classes * plots_per_line * scale_factor, num_lines * scale_factor * 2), + sharey=share_y_axis, + ) + axes_flattend = axes.flatten() + + plt.suptitle(title, y=1) + color_lst = sns.color_palette() + + if display_order is None: + display_order = np.sort(df[class_col].unique()).tolist() + + for ax, col in zip(axes_flattend, plot_cols): + sns.boxplot( + x=df[class_col], + y=df[col], + order=display_order, + ax=ax, + fliersize=2, + color=color_lst[0], + medianprops=dict(linewidth=2, alpha=1.0), + flierprops=dict(markerfacecolor="black", marker=".", alpha=0.33), + showmeans=True, + meanprops=dict( + marker=5, + markerfacecolor=color_lst[1], + markeredgecolor=color_lst[1], + markersize=10, + ), + ) + ax.set_title(col) + ax.set_ylabel("") + ax.set_xlabel("") + if y_lim is not None: + y_range = max(y_lim) - min(y_lim) + pct_margin = 0.01 + ax.set_ylim( + ymin=(min(y_lim) - y_range * pct_margin), ymax=(max(y_lim) + y_range * pct_margin) + ) + + # delete unused axes + for ax in axes_flattend[len(plot_cols) :]: + fig.delaxes(ax=ax) + + fig.tight_layout() + + return fig + + +def plot_correlation_matrix( + df: pd.DataFrame, + title: str = "Features' Correlation", + method: str = "pearson", + fig_height: int = 8, + annot_fontsize: int = 10, +) -> plt.Figure: + # Compute features' correlation + df_corr = df.corr(method=method) + # Generate a mask to onlyshow the bottom triangle + mask_corr = ~np.triu(np.ones_like(df_corr, dtype=bool)).T + + with sns.axes_style("whitegrid"): + fig = plt.figure(figsize=(fig_height, fig_height)) + plt.title(title) + + # generate heatmap + sns.heatmap( + df_corr, + cmap="YlGnBu", + annot=True, + mask=mask_corr, + vmin=-1, + vmax=1, + square=True, + annot_kws=dict(fontsize=annot_fontsize), + fmt=".2f", + ) + plt.grid(False) + plt.xticks(rotation=45, ha="right") + + return fig diff --git a/src/utils/feature_selection.py b/src/utils/feature_selection.py new file mode 100644 index 0000000..cc44230 --- /dev/null +++ b/src/utils/feature_selection.py @@ -0,0 +1,294 @@ +import logging +import pandas as pd +import numpy as np +import warnings +from statsmodels.stats.outliers_influence import variance_inflation_factor + +from sklearn.metrics import root_mean_squared_error, f1_score +from sklearn.linear_model import Lasso +from sklearn.svm import LinearSVC +from sklearn.preprocessing import StandardScaler +from sklearn.model_selection import train_test_split + +logger = logging.getLogger(__name__) + + +def _remove_features_with_l1_regularization( + df_input: pd.DataFrame, + target_col: str, + l1_params: dict, +) -> list[str]: + + # carregar parametros + problem = l1_params["problem"] + train_test_split_params = l1_params["train_test_split_params"] + logspace_search = l1_params["logspace_search"] + error_tolerance_pct = l1_params["error_tolerance_pct"] + min_feats_to_keep = l1_params["min_feats_to_keep"] + random_seed = l1_params["random_seed"] + + # split data + X = df_input.drop(columns=[target_col]) + y = df_input[target_col] + X_train, X_test, y_train, y_test = train_test_split( + X, + y, + **train_test_split_params, + random_state=random_seed, + ) + + # Standardize X_train + stdscaler = StandardScaler() + X_train_std = pd.DataFrame( + stdscaler.fit_transform(X_train), columns=X.columns, index=X_train.index + ) + X_test_std = pd.DataFrame(stdscaler.transform(X_test), columns=X.columns, index=X_test.index) + + # define search space + logspace_values = np.logspace(**logspace_search) + coef_lst = [] + metrics_dict = dict() + + # define L1-based linear model ad its evaluation metric + if problem.lower() == "classification": + LinearModel = LinearSVC + model_params = dict(penalty="l1") + search_arg = "C" + eval_metric_fn = f1_score + eval_metric_greater_is_better = True + elif problem.lower() == "regression": + LinearModel = Lasso + model_params = dict() + search_arg = "alpha" + eval_metric_fn = root_mean_squared_error + eval_metric_greater_is_better = False + else: + raise ValueError( + "Argument 'problem' must be either 'classification' or 'regression'. " + f"Got {problem} instead." + ) + + for i, search_val in enumerate(logspace_values, start=1): + # Fit model and make predictions + model_params[search_arg] = search_val + model = LinearModel(**model_params, random_state=random_seed) + model.fit(X_train_std, y_train) + y_pred = model.predict(X_test_std) + eval_metric = eval_metric_fn(y_test, y_pred) + + s_coef = pd.Series(data=np.mean(model.coef_, axis=0), index=model.feature_names_in_, name=i) + coef_lst.append(s_coef) + metrics_dict[i] = dict( + search_val=search_val, n_zero_coefs=len(s_coef[s_coef == 0]), eval_metric=eval_metric + ) + + df_coef = pd.concat(coef_lst, axis=1) + df_coef.columns.name = "iteration" + df_coef.index.name = "feature" + df_iter_metrics = pd.DataFrame.from_dict(metrics_dict, orient="index") + df_iter_metrics.index.name = "iteration" + + # select the model that removes the most features while satisfying the following conditions: + # - the selected model's metric score must be within the specified tolerance with respect to + # the best score among all models + # - the number of removed features must not exceed the specified number + if eval_metric_greater_is_better is True: + best_metric_score = df_iter_metrics["eval_metric"].max() + eval_metric_filter = df_iter_metrics["eval_metric"] > ( + best_metric_score * (1 - error_tolerance_pct) + ) + else: + best_metric_score = df_iter_metrics["eval_metric"].min() + eval_metric_filter = df_iter_metrics["eval_metric"] < ( + best_metric_score * (1 + error_tolerance_pct) + ) + min_feats_filter = (X.shape[1] - df_iter_metrics["n_zero_coefs"]) >= min_feats_to_keep + df_iter_best = df_iter_metrics[eval_metric_filter & min_feats_filter] + + if len(df_iter_best) > 0: + best_iter = df_iter_best["n_zero_coefs"].idxmax() + s_coef_best_iter = df_coef[best_iter] + l1_feats_to_drop = s_coef_best_iter[s_coef_best_iter == 0].index.tolist() + else: + l1_feats_to_drop = [] + + return l1_feats_to_drop + + +def _get_high_vif_features( + X: pd.DataFrame, threshold: int, break_threshold: int = 1e6 +) -> list[str]: + features = list(X.columns) + max_len = max([len(f) for f in features]) + high_vif_feats = [] + + logger.info(f"Computing the Variance Inflation Factor (VIF) for {len(features)} features...") + + count = 1 + max_vif = threshold + while max_vif >= threshold: + max_vif = 0 + for i, feat in enumerate(features): + with warnings.catch_warnings(action="ignore"): + vif_feat = variance_inflation_factor(X[features].values, i) + if vif_feat > max_vif: + max_vif = vif_feat + max_vif_idx = i + max_vif_feat = feat + # break for loop before checking all features to save time + if vif_feat > break_threshold: + break + + if max_vif > threshold: + high_vif_feat = features.pop(max_vif_idx) + assert high_vif_feat == max_vif_feat + high_vif_feats.append(high_vif_feat) + logger.info( + f'{(str(count) + ".").rjust(4)} Removing feature: ' + f'{(high_vif_feat + " ").ljust(max_len+2, ".")} VIF: {max_vif:,.2f}' + ) + count += 1 + else: + max_vif_col = features.pop(max_vif_idx) + logger.info( + f' >> Stopping at feat: {(max_vif_col + " ").ljust(max_len+2, ".")} ' + f"VIF: {max_vif:,.2f} (threshold: {threshold:,})" + ) + + return high_vif_feats + + +def _run_manual_filter(df: pd.DataFrame, target_col: str, params: dict) -> list[str]: + orig_shp = df.shape + cols_to_exclude = params["cols_to_exclude"] + + if not isinstance(cols_to_exclude, list): + cols_to_exclude = [] + elif len(cols_to_exclude) > 0: + # cannot remove target + if target_col in cols_to_exclude: + cols_to_exclude = [col for col in cols_to_exclude if col != target_col] + + logger.info( + f" - Removing {len(cols_to_exclude)} " + f"({100 * len(cols_to_exclude) / (orig_shp[1] - 1):.2f}%) feature(s) manually..." + ) + return cols_to_exclude + + +def _run_variance_filter(df: pd.DataFrame, target_col: str, params: dict) -> list[str]: + orig_shp = df.shape + var_threshold = params["threshold"] + + s_var = df.drop(columns=[target_col]).var(axis=0) + low_var_cols = s_var[s_var <= var_threshold].index.tolist() + + logger.info( + f" - Removing {len(low_var_cols):,} ({100 * len(low_var_cols) / (orig_shp[1] - 1):.1f}%)" + f" feature(s) with variance <= {var_threshold} ..." + ) + + return low_var_cols + + +def _run_correlation_filter(df: pd.DataFrame, target_col: str, params: dict) -> list[str]: + orig_shp = df.shape + corr_threshold = params["threshold"] + + # compute pearson correlation with target + df_feats = df.drop(columns=[target_col]) + s_corr_target = df_feats.corrwith(df[target_col]) + # rank features based on correlation with target (from worst to best) + ranked_feats = s_corr_target.dropna().abs().sort_values(ascending=True).index.tolist() + + logger.info(f" Running Correlation filter with threshold of {corr_threshold}") + high_corr_cols = [] + for feat in ranked_feats: + s_corr_feat = df_feats.drop(columns=[feat]).corrwith(df_feats[feat]).dropna() + feat_max_corr = s_corr_feat.abs().max() + feat_idxmax_corr = s_corr_feat.abs().idxmax() + + if feat_max_corr > corr_threshold: + logger.info( + f" - Removing feature '{feat}' with correlation " + f"{s_corr_feat.loc[feat_idxmax_corr]:+.4f} to '{feat_idxmax_corr}'" + ) + high_corr_cols.append(feat) + df_feats = df_feats.drop(columns=[feat]) + + logger.info( + f" - Removing {len(high_corr_cols):,} " + f"({100 * len(high_corr_cols) / (orig_shp[1] - 1):.1f}%) feature(s) with " + f"abs(correlation) > {corr_threshold} ..." + ) + + return high_corr_cols + + +def _run_l1_filter(df: pd.DataFrame, target_col: str, params: dict) -> list[str]: + orig_shp = df.shape + l1_feats_to_drop = _remove_features_with_l1_regularization(df, target_col, params) + logger.info( + f" - Removing {len(l1_feats_to_drop):,} " + f"({100 * len(l1_feats_to_drop) / orig_shp[1]:.1f}%)" + f" feature(s) with null coefficient after L1 regularization ...\n" + ) + + return l1_feats_to_drop + + +def _run_vif_filter(df: pd.DataFrame, target_col: str, params: dict) -> list[str]: + orig_shp = df.shape + + vif_threshold = params["threshold"] + high_vif_feats = _get_high_vif_features(df.drop(columns=[target_col]), threshold=vif_threshold) + logger.info( + f" - Removing {len(high_vif_feats):,} " + f"({100 * len(high_vif_feats) / (orig_shp[1] - 1):.1f}%)" + f" feature(s) with VIF >= {vif_threshold:,.0f} ..." + ) + + return high_vif_feats + + +def run_feature_selection_steps( + df_input: pd.DataFrame, target_col: str, fs_steps: dict +) -> tuple[list[str], pd.DataFrame]: + # define available filter functions + fs_functions = { + "manual": _run_manual_filter, + "variance": _run_variance_filter, + "correlation": _run_correlation_filter, + "l1_regularization": _run_l1_filter, + "vif": _run_vif_filter, + } + # check if provided steps are valid + for filter_name, filter_params in fs_steps.items(): + if filter_name not in fs_functions.keys(): + raise ValueError( + f"Filter name must be one of {list(fs_functions.keys())}. " + f"Got {filter_name} instead" + ) + + # run feature selection steps + logger.info( + "--> Starting the Feature Selection process with " + f"{df_input.drop(columns=[target_col]).shape[1]:,} features" + ) + # build feature selection log table + df_fs = pd.DataFrame(index=df_input.columns).assign(filter="", step=0) + df = df_input.copy() + for step, (filter_name, filter_params) in enumerate(fs_steps.items(), start=1): + logger.info(f"{step}. {filter_name.upper()} FILTER") + removed_feats = fs_functions[filter_name](df, target_col, params=filter_params) + df = df.drop(columns=removed_feats) + df_fs.loc[removed_feats, ["filter", "step"]] = (filter_name, step) + + selected_feats = df.drop(columns=[target_col]).columns.tolist() + logger.info( + "--> Completed the Feature Selection process with " + f"{len(selected_feats):,} selected features" + ) + df_fs.loc[selected_feats, ["filter", "step"]] = ("Selected feature", -1) + + return selected_feats, df_fs From 352bfbe319ff1b7ae99e588930f80cb54c26b956 Mon Sep 17 00:00:00 2001 From: Pedro Antonacio <30991781+antonacio@users.noreply.github.com> Date: Sun, 30 Mar 2025 18:06:19 -0300 Subject: [PATCH 05/10] feat: Multiclass Classification use case (#5) * chore: replacing single quotes with double quotes * minor adjustments to binary classification * refactor feature selection so that it receives scaled tables X, y * rename classification utils to evals * wip multiclass classification notebook * multiclass classification use case --- Makefile | 1 - README.md | 2 +- src/classification-binary.ipynb | 488 ++++++------ src/classification-multiclass.ipynb | 897 ++++++++++++++++++++++ src/clustering.ipynb | 44 +- src/utils/clustering.py | 2 +- src/utils/common.py | 23 +- src/utils/constants.py | 3 + src/utils/{classification.py => evals.py} | 167 ++-- src/utils/feature_selection.py | 145 ++-- 10 files changed, 1376 insertions(+), 396 deletions(-) create mode 100644 src/classification-multiclass.ipynb rename src/utils/{classification.py => evals.py} (79%) diff --git a/Makefile b/Makefile index bf94dda..4e05f33 100644 --- a/Makefile +++ b/Makefile @@ -4,4 +4,3 @@ install-pre-commit: lint: pre-commit run -a - diff --git a/README.md b/README.md index 864237a..40ade90 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@ This repository compiles simple and practical examples for common data science tasks using tabular data: 1. Binary Classification -2. (WIP) Multiclass Classification +2. Multiclass Classification 3. (WIP) Regression 4. Clustering 5. (WIP) Dimensionality Reduction diff --git a/src/classification-binary.ipynb b/src/classification-binary.ipynb index d1cb1d8..9a287d9 100644 --- a/src/classification-binary.ipynb +++ b/src/classification-binary.ipynb @@ -48,16 +48,16 @@ "from utils.constants import RANDOM_SEED\n", "from utils.common import (\n", " get_data_folder_path,\n", - " set_plot_font_sizes,\n", + " set_plotting_config,\n", " plot_boxplot_by_class,\n", " plot_correlation_matrix,\n", ")\n", - "from utils.classification import (\n", + "from utils.evals import (\n", " describe_input_features,\n", " plot_confusion_matrix,\n", " plot_target_rate,\n", - " compute_classification_metrics,\n", - " build_coefficients_table,\n", + " compute_binary_classification_metrics,\n", + " build_logit_coefficients_table,\n", " plot_coefficients_values,\n", " plot_coefficients_significance,\n", " plot_eval_metrics_xgb,\n", @@ -82,20 +82,20 @@ "logging.basicConfig(\n", " level=logging.INFO,\n", " format=\"%(asctime)s [%(levelname)s] %(message)s\",\n", - " datefmt='%H:%M:%S',\n", + " datefmt=\"%H:%M:%S\",\n", ")\n", "logger = logging.getLogger(__name__)\n", "\n", - "pd.set_option('display.max_columns', None)\n", + "pd.set_option(\"display.max_columns\", None)\n", "pd.options.display.float_format = \"{:.2f}\".format\n", "\n", - "mpl.rcParams['font.sans-serif'] = \"Arial\"\n", - "plt.set_loglevel('WARNING')\n", + "mpl.rcParams[\"font.sans-serif\"] = \"Arial\"\n", + "plt.set_loglevel(\"WARNING\")\n", "\n", "# plots configuration\n", "sns.set_style(\"darkgrid\")\n", "sns.set_palette(\"colorblind\")\n", - "set_plot_font_sizes()\n", + "set_plotting_config()\n", "%matplotlib inline" ] }, @@ -104,7 +104,13 @@ "id": "4", "metadata": {}, "source": [ - "## 1. Define Parameters" + "## 1. Load Data\n", + "\n", + "In this notebook we will use the Fetal Health Dataset.\n", + "\n", + "Sources:\n", + "1. Fetal Health Classification Dataset: https://www.kaggle.com/datasets/andrewmvd/fetal-health-classification\n", + "2. Original article: https://www.ncbi.nlm.nih.gov/pmc/articles/PMC68223152" ] }, { @@ -114,12 +120,10 @@ "metadata": {}, "outputs": [], "source": [ - "target_col = \"is_normal\"\n", - "target_classes_dict = {\n", - " 0: 'Not Normal',\n", - " 1: 'Normal'\n", - "}\n", - "test_size = 0.20" + "data_path = get_data_folder_path()\n", + "\n", + "df_input = pd.read_csv(os.path.join(data_path, \"fetal_health.csv\"))\n", + "df_input.columns = [col.replace(\" \", \"_\") for col in df_input.columns]" ] }, { @@ -129,41 +133,37 @@ "tags": [] }, "source": [ - "## 2. Load Data\n", - "\n", - "In this notebook we will use the Fetal Health Dataset.\n", - "\n", - "Sources:\n", - "1. Fetal Health Classification Dataset: https://www.kaggle.com/datasets/andrewmvd/fetal-health-classification\n", - "2. Original article: https://www.ncbi.nlm.nih.gov/pmc/articles/PMC68223152" + "## 2. Process Data" ] }, { - "cell_type": "code", - "execution_count": null, + "cell_type": "markdown", "id": "7", "metadata": {}, - "outputs": [], "source": [ - "data_path = get_data_folder_path()\n", + "### Target column\n", "\n", - "df_input = pd.read_csv(os.path.join(data_path, 'fetal_health.csv'))\n", - "df_input.columns = [col.replace(' ', '_') for col in df_input.columns]" + "Fetal health (target column) can have the following values:\n", + "- 1: Normal\n", + "- 2: Suspect\n", + "- 3: Pathological\n", + "\n", + "For this notebook, we will consider the Normal/not Normal distinction for binary classification" ] }, { - "cell_type": "markdown", + "cell_type": "code", + "execution_count": null, "id": "8", "metadata": {}, + "outputs": [], "source": [ - "### Target column\n", - "\n", - "Fetal health (target column) can have the following values:\n", - "- 1 - Normal\n", - "- 2 - Suspect\n", - "- 3 - Pathological\n", - "\n", - "For this notebook, we will consider the Normal/not Normal distinction for binary classification" + "target_col = \"is_normal\"\n", + "target_classes_dict = {\n", + " 0: \"Not Normal\",\n", + " 1: \"Normal\"\n", + "}\n", + "test_size = 0.20" ] }, { @@ -173,7 +173,8 @@ "metadata": {}, "outputs": [], "source": [ - "df_input[target_col] = (df_input['fetal_health'] == 1).astype(np.int8)\n", + "# create a new binary target column from the original multi-class target column\n", + "df_input[target_col] = (df_input[\"fetal_health\"] == 1).astype(np.int8)\n", "df_input.drop(columns=[\"fetal_health\"], inplace=True)" ] }, @@ -208,6 +209,7 @@ "outputs": [], "source": [ "pd.concat([\n", + " pd.Series(target_classes_dict, name=\"label\"),\n", " df_input_train[target_col].value_counts(dropna=False, normalize=False).rename(\"train_target_count\"),\n", " df_input_train[target_col].value_counts(dropna=False, normalize=True).rename(\"train_target_pct\"),\n", " df_input_test[target_col].value_counts(dropna=False, normalize=False).rename(\"test_target_count\"),\n", @@ -230,30 +232,52 @@ "id": "14", "metadata": {}, "source": [ - "## 3. Exploratory Data Analysis (EDA)" + "### Scaling (Standardization)" ] }, { - "cell_type": "markdown", + "cell_type": "code", + "execution_count": null, "id": "15", "metadata": {}, + "outputs": [], "source": [ - "### Boxplots by Target Class" + "# Standardize training and test data\n", + "stdscaler = StandardScaler()\n", + "\n", + "# training data\n", + "y_train = df_input_train[target_col]\n", + "X_train_all = (\n", + " pd.DataFrame(\n", + " # fit scaler on training data (and then transform training data)\n", + " data=stdscaler.fit_transform(df_input_train),\n", + " columns=df_input_train.columns,\n", + " index=df_input_train.index\n", + " )\n", + " # remove target from the model input features table\n", + " .drop(columns=[target_col])\n", + ")\n", + "\n", + "# test data\n", + "y_test = df_input_test[target_col]\n", + "X_test_all = (\n", + " pd.DataFrame(\n", + " # use scaler fitted on training data to transform test data\n", + " data=stdscaler.transform(df_input_test),\n", + " columns=df_input_test.columns,\n", + " index=df_input_test.index\n", + " )\n", + " # remove target from the model input features table\n", + " .drop(columns=[target_col])\n", + ")" ] }, { - "cell_type": "code", - "execution_count": null, + "cell_type": "markdown", "id": "16", "metadata": {}, - "outputs": [], "source": [ - "fig = plot_boxplot_by_class(\n", - " df=df_input_train, # use only training data to avoid bias in test results\n", - " class_col=target_col,\n", - " plots_per_line=6,\n", - " title=\"Features in input dataset\",\n", - ")" + "## 3. Exploratory Data Analysis (EDA)" ] }, { @@ -261,7 +285,7 @@ "id": "17", "metadata": {}, "source": [ - "### Pearson's Correlation" + "### Boxplots by Target Class" ] }, { @@ -271,10 +295,14 @@ "metadata": {}, "outputs": [], "source": [ - "fig = plot_correlation_matrix(\n", - " df=df_input_train, # use only training data to avoid bias in test results\n", - " method=\"pearson\",\n", - " fig_height=10\n", + "display(\n", + " plot_boxplot_by_class(\n", + " df_input=df_input_train, # use only training data to avoid bias in test results\n", + " class_col=target_col,\n", + " class_mapping=target_classes_dict,\n", + " plots_per_line=6,\n", + " title=\"Features in input dataset\",\n", + " )\n", ")" ] }, @@ -283,7 +311,7 @@ "id": "19", "metadata": {}, "source": [ - "## 4. Feature Selection" + "### Pearson's Correlation" ] }, { @@ -293,49 +321,49 @@ "metadata": {}, "outputs": [], "source": [ - "fs_steps = {\n", - " \"manual\": dict(\n", - " cols_to_exclude=[\n", - " \"histogram_variance\",\n", - " \"severe_decelerations\",\n", - " ]\n", - " ),\n", - " \"variance\": dict(threshold=0),\n", - " \"correlation\": dict(threshold=0.9),\n", - " \"l1_regularization\": dict(\n", - " problem=\"classification\",\n", - " train_test_split_params=dict(test_size=test_size),\n", - " logspace_search=dict(start=-5, stop=1, num=20, base=10),\n", - " # tolerance over minimum error with which to search for the best model\n", - " error_tolerance_pct=0.02,\n", - " # minimum features to keep in final selection\n", - " min_feats_to_keep=4,\n", - " random_seed=RANDOM_SEED,\n", - " ),\n", - " \"vif\": dict(threshold=5),\n", - "}" + "display(\n", + " plot_correlation_matrix(\n", + " # use only training data to avoid bias in test results\n", + " df=df_input_train, method=\"pearson\", fig_height=10\n", + " )\n", + ")" ] }, { - "cell_type": "code", - "execution_count": null, + "cell_type": "markdown", "id": "21", "metadata": {}, - "outputs": [], "source": [ - "selected_feats, df_fs = run_feature_selection_steps(\n", - " df_input=df_input_train, # use only training data to avoid bias in test results\n", - " target_col=target_col,\n", - " fs_steps=fs_steps\n", - ")" + "## 4. Feature Selection" ] }, { - "cell_type": "markdown", + "cell_type": "code", + "execution_count": null, "id": "22", "metadata": {}, + "outputs": [], "source": [ - "### Correlation check\n" + "fs_steps = {\n", + " \"manual\": {\n", + " \"cols_to_exclude\": [\n", + " \"severe_decelerations\",\n", + " ]\n", + " },\n", + " \"null_variance\": None,\n", + " \"correlation\": {\"threshold\": 0.8},\n", + " \"vif\": {\"threshold\": 2},\n", + " \"l1_regularization\": {\n", + " \"problem\": \"classification\",\n", + " \"train_test_split_params\": {\"test_size\": test_size},\n", + " \"logspace_search\": {\"start\": -5, \"stop\": 1, \"num\": 20, \"base\": 10},\n", + " # tolerance over minimum error with which to search for the best model\n", + " \"error_tolerance_pct\": 0.05,\n", + " # minimum features to keep in final selection\n", + " \"min_feats_to_keep\": 4,\n", + " \"random_seed\": RANDOM_SEED,\n", + " },\n", + "}" ] }, { @@ -345,24 +373,24 @@ "metadata": {}, "outputs": [], "source": [ - "fig = plot_correlation_matrix(\n", - " df=df_input_train[selected_feats + [target_col]], # use only training data to avoid bias in test results\n", - " method=\"pearson\",\n", - " fig_height=5\n", + "selected_feats, df_fs = run_feature_selection_steps(\n", + " # use only training data to avoid bias in test results\n", + " X=X_train_all,\n", + " y=y_train,\n", + " fs_steps=fs_steps\n", ")" ] }, { - "cell_type": "markdown", + "cell_type": "code", + "execution_count": null, "id": "24", "metadata": {}, + "outputs": [], "source": [ - "### Multicollinearity check\n", - "\n", - "Multicollinearity is a problem because it undermines the statistical significance of an independent variable. [\\[source\\]](https://link.springer.com/chapter/10.1007/978-0-585-25657-3_37)\n", - "\n", - "Multicollinearity does not affect the accuracy of predictive models, including regression models. \\[...\\] Now, where multicollinearity becomes 'an issue' is when you want to 'interpret' the parameters learned by your model. In other words, you cannot say that the feature with the 'biggest weight' is 'the most important' when the features are correlated. Note that this is independent on the accuracy of the model, this is only the interpretation part [\\[source\\]](https://www.researchgate.net/post/Are-Random-Forests-affected-by-multi-collinearity-between-features)\n", - "\n" + "# build model input datasets\n", + "X_train = X_train_all[selected_feats]\n", + "X_test = X_test_all[selected_feats]" ] }, { @@ -370,16 +398,7 @@ "id": "25", "metadata": {}, "source": [ - "**Variance Inflation Factor (VIF)**\n", - "\n", - "The variance inflation factor (VIF) is a statistical tool that measures the amount of multicollinearity in a regression model. As a general rule of thumb, \"VIF > 5 is cause for concern and VIF > 10 indicates a serious collinearity problem.\"\n", - "\n", - "The higher the VIF:\n", - "- The more correlated a predictor is with the other predictors\n", - "- The more the standard error is inflated\n", - "- The larger the confidence interval\n", - "- The less likely it is that a coefficient will be evaluated as statistically significant\n", - "[\\[source\\]](https://towardsdatascience.com/everything-you-need-to-know-about-multicollinearity-2f21f082d6dc)" + "### Correlation check\n" ] }, { @@ -389,13 +408,12 @@ "metadata": {}, "outputs": [], "source": [ - "df_vif = pd.DataFrame(\n", - " data=[variance_inflation_factor(df_input_train[selected_feats].values, i) for i in range(len(selected_feats))],\n", - " index=selected_feats,\n", - " columns=['VIF']\n", - ").sort_values('VIF', ascending=False)\n", - "\n", - "df_vif" + "display(\n", + " plot_correlation_matrix(\n", + " # use only training data to avoid bias in test results\n", + " df=df_input_train[selected_feats + [target_col]], method=\"pearson\", fig_height=5\n", + " )\n", + ")" ] }, { @@ -403,7 +421,7 @@ "id": "27", "metadata": {}, "source": [ - "### Model input datasets" + "### Multicollinearity check" ] }, { @@ -413,46 +431,27 @@ "metadata": {}, "outputs": [], "source": [ - "# train datasets\n", - "X_train = df_input_train[selected_feats]\n", - "y_train = df_input_train[target_col]\n", - "# test datatsets\n", - "X_test = df_input_test[selected_feats]\n", - "y_test = df_input_test[target_col]" + "# compute the Variance Inflation Factor (VIF) for each feature\n", + "df_vif = pd.DataFrame(\n", + " data=[variance_inflation_factor(X_train.values, i) for i in range(len(selected_feats))],\n", + " index=selected_feats,\n", + " columns=[\"VIF\"]\n", + ").sort_values(\"VIF\", ascending=False)\n", + "\n", + "df_vif" ] }, { "cell_type": "markdown", "id": "29", "metadata": {}, - "source": [ - "### Scaling" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "30", - "metadata": {}, - "outputs": [], - "source": [ - "# Standardize X_train and X_test\n", - "stdscaler = StandardScaler()\n", - "X_train_std = pd.DataFrame(stdscaler.fit_transform(X_train), columns=X_train.columns, index=X_train.index)\n", - "X_test_std = pd.DataFrame(stdscaler.transform(X_test), columns=X_test.columns, index=X_test.index)" - ] - }, - { - "cell_type": "markdown", - "id": "31", - "metadata": {}, "source": [ "## 5. Classifier Model" ] }, { "cell_type": "markdown", - "id": "32", + "id": "30", "metadata": {}, "source": [ "### Select classifier: Logistic Regression or XGBoost" @@ -461,65 +460,69 @@ { "cell_type": "code", "execution_count": null, - "id": "33", + "id": "31", "metadata": {}, "outputs": [], "source": [ "MODEL_SELECTION = \"logistic_regression\"\n", - "# MODEL_SELECTION = \"xgboost\"" + "# MODEL_SELECTION = \"xgboost\"\n", + "\n", + "model_selection_error = ValueError(\n", + " \"'MODEL_SELECTION' must be either 'logistic_regression' or 'xgboost'. \"\n", + " f\"Got {MODEL_SELECTION} instead.\"\n", + ")" ] }, { "cell_type": "markdown", - "id": "34", + "id": "32", "metadata": {}, "source": [ "### Hyperparameter tuning with K-Fold Cross Validation\n", "\n", - "For detailed explanation on XGBoost's parameters: https://www.kaggle.com/code/prashant111/a-guide-on-xgboost-hyperparameters-tuning/notebook" + "- **Logistic Regression**: In binary classification with imbalanced classes, avoid setting `class_weight=\"balanced\"` if you want to use the model's predicted probabilities as proxies for the real probability distributions of the target classes, that is, if you want to interpret the predicted probability as \"the actual probability that the sample belongs to the class\". In this case, you should not use 50% as the threshold for the binary classification; you should find the optimal threshold using the ROC Curve (detailed below) to maximize the model's performance.\n", + "\n", + "- **XGBoost**: For detailed explanation on XGBoost's parameters: https://www.kaggle.com/code/prashant111/a-guide-on-xgboost-hyperparameters-tuning/notebook" ] }, { "cell_type": "code", "execution_count": null, - "id": "35", + "id": "33", "metadata": {}, "outputs": [], "source": [ "if MODEL_SELECTION == \"logistic_regression\":\n", " Estimator = LogisticRegression\n", " cv_search_space = {\n", - " 'penalty': ['l1', 'l2', 'elasticnet'],\n", - " 'C': np.logspace(-3, 1, num=9, base=10.0),\n", - " 'class_weight': [None],\n", + " \"penalty\": [\"l1\", \"l2\", \"elasticnet\"],\n", + " \"C\": np.logspace(-3, 1, num=9, base=10.0),\n", + " \"class_weight\": [None],\n", " }\n", "elif MODEL_SELECTION == \"xgboost\":\n", " Estimator = XGBClassifier\n", " cv_search_space = {\n", - " 'objective': ['binary:logistic'],\n", - " 'n_estimators': [30, 40, 50],\n", - " 'learning_rate': [0.1],\n", - " 'max_depth': [3, 4, 6],\n", - " 'min_child_weight': [2, 4],\n", - " 'gamma': [0, 0.5],\n", - " 'alpha':[0, 0.3],\n", - " 'scale_pos_weight': [1],\n", - " 'lambda':[1],\n", - " ## 'subsample': [0.8, 1.0],\n", - " ## 'colsample_bytree': [0.8, 1.0],\n", - " 'verbosity': [0],\n", + " \"objective\": [\"binary:logistic\"],\n", + " \"n_estimators\": [30, 40, 50],\n", + " \"learning_rate\": [0.1],\n", + " \"max_depth\": [3, 4, 6],\n", + " \"min_child_weight\": [2, 4],\n", + " \"gamma\": [0, 0.5],\n", + " \"alpha\":[0, 0.3],\n", + " \"scale_pos_weight\": [1],\n", + " \"lambda\":[1],\n", + " ## \"subsample\": [0.8, 1.0],\n", + " ## \"colsample_bytree\": [0.8, 1.0],\n", + " \"verbosity\": [0],\n", " }\n", "else:\n", - " raise ValueError(\n", - " \"'MODEL_SELECTION' must be either 'logistic_regression' or 'xgboost'. \"\n", - " f\"Got {MODEL_SELECTION} instead.\"\n", - " )" + " raise model_selection_error" ] }, { "cell_type": "code", "execution_count": null, - "id": "36", + "id": "34", "metadata": {}, "outputs": [], "source": [ @@ -529,13 +532,14 @@ " \"precision\": \"Precision\",\n", " \"recall\": \"Recall\",\n", " \"f1\": \"F1 Score\",\n", - "}" + "}\n", + "refit_metric = \"f1\" # metric to optimize for the final model" ] }, { "cell_type": "code", "execution_count": null, - "id": "37", + "id": "35", "metadata": {}, "outputs": [], "source": [ @@ -548,7 +552,7 @@ " param_grid=cv_search_space,\n", " scoring=list(cv_scoring_metrics.keys()),\n", " cv=kfold_cv,\n", - " refit=\"f1\",\n", + " refit=refit_metric,\n", " verbose=1,\n", ")\n", "# execute search\n", @@ -559,14 +563,14 @@ { "cell_type": "code", "execution_count": null, - "id": "38", + "id": "36", "metadata": {}, "outputs": [], "source": [ "print(\"Grid Search CV Best Model - Scoring Metrics:\")\n", "for i, (metric_key, metric_name) in enumerate(cv_scoring_metrics.items(), start=1):\n", " print(\n", - " f\" {i}. {(metric_name + \":\").ljust(10)} \"\n", + " f\" {str(i) + \".\":>2} {metric_name:.<10} \"\n", " f\"{result_cv.cv_results_[f\"mean_test_{metric_key}\"][result_cv.best_index_]:.3f}\"\n", " )\n", "print(f\"\\nBest Hyperparameters: {result_cv.best_params_}\")" @@ -574,7 +578,7 @@ }, { "cell_type": "markdown", - "id": "39", + "id": "37", "metadata": {}, "source": [ "### Final Model" @@ -583,7 +587,7 @@ { "cell_type": "code", "execution_count": null, - "id": "40", + "id": "38", "metadata": {}, "outputs": [], "source": [ @@ -599,9 +603,11 @@ " )\n", " model_kwargs = dict(eval_metric=list(eval_metrics.keys()))\n", " model_fit_kwargs = dict(\n", - " eval_set=[(X_train_std, y_train), (X_test_std, y_test)],\n", + " eval_set=[(X_train, y_train), (X_test, y_test)],\n", " verbose=False\n", " )\n", + "else:\n", + " raise model_selection_error\n", " \n", "model = Estimator(**result_cv.best_params_, **model_kwargs, random_state=RANDOM_SEED)" ] @@ -609,31 +615,31 @@ { "cell_type": "code", "execution_count": null, - "id": "41", + "id": "39", "metadata": {}, "outputs": [], "source": [ "# Fit model and make predictions\n", - "model.fit(X_train_std, y_train, **model_fit_kwargs)\n", - "# Make predictions\n", - "y_pred_proba_train = pd.Series(data=model.predict_proba(X_train_std)[:, 1], index=X_train_std.index)\n", - "y_pred_proba = pd.Series(data=model.predict_proba(X_test_std)[:, 1], index=X_test_std.index)" + "model.fit(X_train, y_train, **model_fit_kwargs)\n", + "# Make predictions ([:, 1] returns the probability of the positive class)\n", + "y_pred_proba_train = pd.Series(data=model.predict_proba(X_train)[:, 1], index=X_train.index)\n", + "y_pred_proba = pd.Series(data=model.predict_proba(X_test)[:, 1], index=X_test.index)" ] }, { "cell_type": "code", "execution_count": null, - "id": "42", + "id": "40", "metadata": {}, "outputs": [], "source": [ "if MODEL_SELECTION == \"xgboost\":\n", - " fig = plot_eval_metrics_xgb(model.evals_result(), eval_metrics)" + " display(plot_eval_metrics_xgb(model.evals_result(), eval_metrics))" ] }, { "cell_type": "markdown", - "id": "43", + "id": "41", "metadata": {}, "source": [ "**Plot target rate per group of predicted probability**\n", @@ -644,16 +650,16 @@ { "cell_type": "code", "execution_count": null, - "id": "44", + "id": "42", "metadata": {}, "outputs": [], "source": [ - "fig = plot_target_rate(y_test, y_pred_proba)" + "display(plot_target_rate(y_test, y_pred_proba))" ] }, { "cell_type": "markdown", - "id": "45", + "id": "43", "metadata": {}, "source": [ "**Define optimal threshold for separating classes using the ROC Curve**\n", @@ -664,22 +670,24 @@ { "cell_type": "code", "execution_count": null, - "id": "46", + "id": "44", "metadata": {}, "outputs": [], "source": [ - "fig, optimal_thresh = plot_roc_curve(\n", - " y_true=y_train, # use only training data to avoid bias in test results\n", + "# use only training data to get optimal threshold to avoid bias in test results\n", + "fig_roc_curve, optimal_thresh = plot_roc_curve(\n", + " y_true=y_train,\n", " y_pred_proba=y_pred_proba_train,\n", - " title=\"ROC Curve on Training Data\",\n", - " ret_optimal_thresh=True\n", - ")" + " title=\"ROC Curve on Training Data (for finding the Optimal Threshold)\",\n", + " return_optimal_thresh=True\n", + ")\n", + "display(fig_roc_curve)" ] }, { "cell_type": "code", "execution_count": null, - "id": "47", + "id": "45", "metadata": {}, "outputs": [], "source": [ @@ -691,7 +699,7 @@ }, { "cell_type": "markdown", - "id": "48", + "id": "46", "metadata": {}, "source": [ "### Feature Importance\n", @@ -703,28 +711,36 @@ { "cell_type": "code", "execution_count": null, - "id": "49", + "id": "47", "metadata": {}, "outputs": [], "source": [ "if MODEL_SELECTION == \"logistic_regression\":\n", - " df_coefficients = build_coefficients_table(model, X_train_std)\n", - " fig = plot_coefficients_values(df_coefficients)\n", - " fig = plot_coefficients_significance(df_coefficients, log_scale=False)\n", + " df_coefficients = build_logit_coefficients_table(\n", + " coefficients=model.coef_[0],\n", + " intercept=model.intercept_[0],\n", + " X_train=X_train,\n", + " y_pred_proba_train=y_pred_proba_train,\n", + " )\n", + " display(plot_coefficients_values(df_coefficients))\n", + " display(plot_coefficients_significance(df_coefficients, log_scale=False))\n", " \n", "elif MODEL_SELECTION == \"xgboost\":\n", " # compute SHAP values\n", " explainer = shap.Explainer(model)\n", - " shap_values = explainer(X_test_std)\n", + " shap_values = explainer(X_test)\n", " # shap plots\n", - " fig = plot_shap_importance(shap_values)\n", - " fig = plot_shap_beeswarm(shap_values)\n", - " fig = plot_gain_metric_xgb(model, X_test_std)" + " display(plot_shap_importance(shap_values))\n", + " display(plot_shap_beeswarm(shap_values))\n", + " display(plot_gain_metric_xgb(model, X_test))\n", + "\n", + "else:\n", + " raise model_selection_error" ] }, { "cell_type": "markdown", - "id": "50", + "id": "48", "metadata": {}, "source": [ "### Performance Metrics" @@ -733,15 +749,15 @@ { "cell_type": "code", "execution_count": null, - "id": "51", + "id": "49", "metadata": {}, "outputs": [], "source": [ "df_train_metrics = pd.Series(\n", - " compute_classification_metrics(y_train, y_pred_train, y_pred_proba_train)\n", + " compute_binary_classification_metrics(y_train, y_pred_train, y_pred_proba_train)\n", ").to_frame(name=\"Train Metrics\")\n", "df_test_metrics = pd.Series(\n", - " compute_classification_metrics(y_test, y_pred, y_pred_proba)\n", + " compute_binary_classification_metrics(y_test, y_pred, y_pred_proba)\n", ").to_frame(name=\"Test Metrics\")\n", "\n", "print(\"Final Model - Scoring Metrics on Train & Test Datasets:\")\n", @@ -751,7 +767,7 @@ }, { "cell_type": "markdown", - "id": "52", + "id": "50", "metadata": {}, "source": [ "#### Confusion Matrix" @@ -760,23 +776,25 @@ { "cell_type": "code", "execution_count": null, - "id": "53", + "id": "51", "metadata": {}, "outputs": [], "source": [ "# Confusion Matrix\n", - "fig = plot_confusion_matrix(\n", - " y_test,\n", - " y_pred,\n", - " estimator=model,\n", - " target_classes_dict=target_classes_dict,\n", - " normalize=\"true\",\n", + "display(\n", + " plot_confusion_matrix(\n", + " y_test,\n", + " y_pred,\n", + " estimator=model,\n", + " target_classes_dict=target_classes_dict,\n", + " normalize=\"true\",\n", + " )\n", ")" ] }, { "cell_type": "markdown", - "id": "54", + "id": "52", "metadata": {}, "source": [ "#### ROC AUC" @@ -785,16 +803,16 @@ { "cell_type": "code", "execution_count": null, - "id": "55", + "id": "53", "metadata": {}, "outputs": [], "source": [ - "fig = plot_roc_curve(y_test, y_pred_proba)" + "display(plot_roc_curve(y_test, y_pred_proba))" ] }, { "cell_type": "markdown", - "id": "56", + "id": "54", "metadata": {}, "source": [ "#### KS Gain" @@ -803,11 +821,11 @@ { "cell_type": "code", "execution_count": null, - "id": "57", + "id": "55", "metadata": {}, "outputs": [], "source": [ - "df_ks, ks_score = build_ks_table(y_test, y_pred_proba, ret_ks=True)\n", + "df_ks, ks_score = build_ks_table(y_test, y_pred_proba, return_ks=True)\n", "print(f\"KS score: {ks_score * 100:.2f} p.p.\")\n", "beautify_ks_table(df_ks)" ] @@ -815,33 +833,17 @@ { "cell_type": "code", "execution_count": null, - "id": "58", + "id": "56", "metadata": {}, "outputs": [], "source": [ - "fig = plot_ks_table(df_ks)" + "display(plot_ks_table(df_ks))" ] }, { "cell_type": "code", "execution_count": null, - "id": "59", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "60", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "61", + "id": "57", "metadata": {}, "outputs": [], "source": [] @@ -849,7 +851,7 @@ { "cell_type": "code", "execution_count": null, - "id": "62", + "id": "58", "metadata": {}, "outputs": [], "source": [] diff --git a/src/classification-multiclass.ipynb b/src/classification-multiclass.ipynb new file mode 100644 index 0000000..3873719 --- /dev/null +++ b/src/classification-multiclass.ipynb @@ -0,0 +1,897 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "0", + "metadata": { + "tags": [] + }, + "source": [ + "# Multiclass Classification" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "import os\n", + "import logging\n", + "import warnings\n", + "import itertools\n", + "import numpy as np\n", + "import pandas as pd\n", + "import seaborn as sns\n", + "import matplotlib as mpl\n", + "import matplotlib.pyplot as plt\n", + "import shap\n", + "\n", + "from sklearn.preprocessing import StandardScaler\n", + "from sklearn.linear_model import LogisticRegression\n", + "from sklearn.model_selection import train_test_split, RepeatedStratifiedKFold, GridSearchCV\n", + "from statsmodels.stats.outliers_influence import variance_inflation_factor\n", + "from xgboost import XGBClassifier" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "from utils.constants import RANDOM_SEED\n", + "from utils.common import (\n", + " get_data_folder_path,\n", + " set_plotting_config,\n", + " plot_boxplot_by_class,\n", + " plot_correlation_matrix,\n", + ")\n", + "from utils.evals import (\n", + " describe_input_features,\n", + " plot_confusion_matrix,\n", + " plot_target_rate,\n", + " compute_multiclass_classification_metrics,\n", + " build_logit_coefficients_table,\n", + " plot_coefficients_values,\n", + " plot_coefficients_significance,\n", + " plot_eval_metrics_xgb,\n", + " plot_gain_metric_xgb,\n", + " plot_shap_importance,\n", + " plot_shap_beeswarm,\n", + " plot_roc_curve,\n", + ")\n", + "from utils.feature_selection import run_feature_selection_steps" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3", + "metadata": {}, + "outputs": [], + "source": [ + "logging.basicConfig(\n", + " level=logging.INFO,\n", + " format=\"%(asctime)s [%(levelname)s] %(message)s\",\n", + " datefmt=\"%H:%M:%S\",\n", + ")\n", + "logger = logging.getLogger(__name__)\n", + "\n", + "pd.set_option(\"display.max_columns\", None)\n", + "pd.options.display.float_format = \"{:.2f}\".format\n", + "\n", + "mpl.rcParams[\"font.sans-serif\"] = \"Arial\"\n", + "plt.set_loglevel(\"WARNING\")\n", + "\n", + "# plots configuration\n", + "sns.set_style(\"darkgrid\")\n", + "sns.set_palette(\"colorblind\")\n", + "set_plotting_config()\n", + "%matplotlib inline" + ] + }, + { + "cell_type": "markdown", + "id": "4", + "metadata": {}, + "source": [ + "## 1. Load Data\n", + "\n", + "In this notebook we will use the Fetal Health Dataset.\n", + "\n", + "Sources:\n", + "1. Fetal Health Classification Dataset: https://www.kaggle.com/datasets/andrewmvd/fetal-health-classification\n", + "2. Original article: https://www.ncbi.nlm.nih.gov/pmc/articles/PMC68223152" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5", + "metadata": {}, + "outputs": [], + "source": [ + "data_path = get_data_folder_path()\n", + "\n", + "df_input = pd.read_csv(os.path.join(data_path, \"fetal_health.csv\"))\n", + "df_input.columns = [col.replace(\" \", \"_\") for col in df_input.columns]" + ] + }, + { + "cell_type": "markdown", + "id": "6", + "metadata": { + "tags": [] + }, + "source": [ + "## 2. Process Data" + ] + }, + { + "cell_type": "markdown", + "id": "7", + "metadata": {}, + "source": [ + "### Target column\n", + "\n", + "Fetal health (target column) can have the following values:\n", + "- 1: Normal\n", + "- 2: Suspect\n", + "- 3: Pathological\n", + "\n", + "However, XGBoost expects 0-indexed positive integers for the classes. Therefore, we will use the following values in this notebook:\n", + "- 0: Normal\n", + "- 1: Suspect\n", + "- 2: Pathological" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8", + "metadata": {}, + "outputs": [], + "source": [ + "target_col = \"fetal_health\"\n", + "target_classes_dict = {\n", + " 0: \"Normal\",\n", + " 1: \"Suspect\",\n", + " 2: \"Pathological\",\n", + "}\n", + "test_size = 0.20" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9", + "metadata": {}, + "outputs": [], + "source": [ + "# convert target column to integer\n", + "df_input[target_col] = df_input[target_col].astype(np.int8) - np.int8(1) # subtract 1 to make it 0-indexed" + ] + }, + { + "cell_type": "markdown", + "id": "10", + "metadata": {}, + "source": [ + "### Train test split" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "11", + "metadata": {}, + "outputs": [], + "source": [ + "df_input_train, df_input_test = train_test_split(\n", + " df_input,\n", + " test_size=test_size,\n", + " stratify=df_input[target_col],\n", + " random_state=RANDOM_SEED,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "12", + "metadata": {}, + "outputs": [], + "source": [ + "pd.concat([\n", + " pd.Series(target_classes_dict, name=\"label\"),\n", + " df_input_train[target_col].value_counts(dropna=False, normalize=False).rename(\"train_target_count\"),\n", + " df_input_train[target_col].value_counts(dropna=False, normalize=True).rename(\"train_target_pct\"),\n", + " df_input_test[target_col].value_counts(dropna=False, normalize=False).rename(\"test_target_count\"),\n", + " df_input_test[target_col].value_counts(dropna=False, normalize=True).rename(\"test_target_pct\"),\n", + "], axis=1)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "13", + "metadata": {}, + "outputs": [], + "source": [ + "describe_input_features(df_input, df_input_train, df_input_test)" + ] + }, + { + "cell_type": "markdown", + "id": "14", + "metadata": {}, + "source": [ + "### Scaling (Standardization)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "15", + "metadata": {}, + "outputs": [], + "source": [ + "# Standardize training and test data\n", + "stdscaler = StandardScaler()\n", + "\n", + "# training data\n", + "X_train_all = (\n", + " pd.DataFrame(\n", + " # fit scaler on training data (and then transform training data)\n", + " data=stdscaler.fit_transform(df_input_train),\n", + " columns=df_input_train.columns,\n", + " index=df_input_train.index\n", + " )\n", + " # remove target from the model input features table\n", + " .drop(columns=[target_col])\n", + ")\n", + "y_train = df_input_train[target_col]\n", + "y_train_ohe = pd.get_dummies(y_train, dtype=np.int8) # one-hot encoding for plots\n", + "\n", + "# test data\n", + "y_test = df_input_test[target_col]\n", + "X_test_all = (\n", + " pd.DataFrame(\n", + " # use scaler fitted on training data to transform test data\n", + " data=stdscaler.transform(df_input_test),\n", + " columns=df_input_test.columns,\n", + " index=df_input_test.index\n", + " )\n", + " # remove target from the model input features table\n", + " .drop(columns=[target_col])\n", + ")\n", + "y_test_ohe = pd.get_dummies(y_test, dtype=np.int8) # one-hot encoding for plots" + ] + }, + { + "cell_type": "markdown", + "id": "16", + "metadata": {}, + "source": [ + "## 3. Exploratory Data Analysis (EDA)" + ] + }, + { + "cell_type": "markdown", + "id": "17", + "metadata": {}, + "source": [ + "### Boxplots by Target Class" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "18", + "metadata": {}, + "outputs": [], + "source": [ + "display(\n", + " plot_boxplot_by_class(\n", + " df_input=df_input_train, # use only training data to avoid bias in test results\n", + " class_col=target_col,\n", + " class_mapping=target_classes_dict,\n", + " plots_per_line=6,\n", + " title=\"Features in input dataset\",\n", + " )\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "19", + "metadata": {}, + "source": [ + "### Pearson's Correlation" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "20", + "metadata": {}, + "outputs": [], + "source": [ + "display(\n", + " plot_correlation_matrix(\n", + " # use only training data to avoid bias in test results\n", + " df=df_input_train, method=\"pearson\", fig_height=10\n", + " )\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "21", + "metadata": {}, + "source": [ + "## 4. Feature Selection" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "22", + "metadata": {}, + "outputs": [], + "source": [ + "fs_steps = {\n", + " \"manual\": {\n", + " \"cols_to_exclude\": [\n", + " \"percentage_of_time_with_abnormal_long_term_variability\",\n", + " \"prolongued_decelerations\",\n", + " # \"histogram_variance\",\n", + " ]\n", + " },\n", + " \"null_variance\": None,\n", + " \"correlation\": {\"threshold\": 0.8},\n", + " \"vif\": {\"threshold\": 2},\n", + " \"l1_regularization\": {\n", + " \"problem\": \"classification\",\n", + " \"train_test_split_params\": {\"test_size\": test_size},\n", + " \"logspace_search\": {\"start\": -5, \"stop\": 1, \"num\": 20, \"base\": 10},\n", + " # tolerance over minimum error with which to search for the best model\n", + " \"error_tolerance_pct\": 0.02,\n", + " # minimum features to keep in final selection\n", + " \"min_feats_to_keep\": 4,\n", + " \"random_seed\": RANDOM_SEED,\n", + " },\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "23", + "metadata": {}, + "outputs": [], + "source": [ + "# run Feature Selection separately for each class as binary classifications\n", + "selected_feats_ovr = {}\n", + "fs_tables_ovr = {}\n", + "\n", + "for clss, label in target_classes_dict.items():\n", + " logger.info(f\"Running Feature Selection for Class '{label}' (vs Rest)\")\n", + " selected_feats_ovr[clss], fs_tables_ovr[clss] = run_feature_selection_steps(\n", + " # use only training data to avoid bias in test results\n", + " X=X_train_all,\n", + " y=y_train_ohe[clss],\n", + " fs_steps=fs_steps\n", + " )\n", + " logger.info(\"-\" * 100)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "24", + "metadata": {}, + "outputs": [], + "source": [ + "# keep only the features that were selected for at least 2 classes\n", + "MIN_NUM_SELECTIONS = 2\n", + "\n", + "classes_intersections = []\n", + "for classes_group in itertools.combinations(selected_feats_ovr.keys(), MIN_NUM_SELECTIONS):\n", + " classes_intersections.append(\n", + " set.intersection(*[set(selected_feats_ovr[clss]) for clss in classes_group])\n", + " )\n", + " \n", + "selected_feats = list(set.union(*classes_intersections))\n", + "print(f\"Final selection ({len(selected_feats)} features selected):\")\n", + "for feat in sorted(selected_feats):\n", + " print(f\" - {feat}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "25", + "metadata": {}, + "outputs": [], + "source": [ + "# build model input datasets\n", + "X_train = X_train_all[selected_feats]\n", + "X_test = X_test_all[selected_feats]" + ] + }, + { + "cell_type": "markdown", + "id": "26", + "metadata": {}, + "source": [ + "### Correlation check\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "27", + "metadata": {}, + "outputs": [], + "source": [ + "display(\n", + " plot_correlation_matrix(\n", + " # use only training data to avoid bias in test results\n", + " df=df_input_train[selected_feats + [target_col]], method=\"pearson\", fig_height=5\n", + " )\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "28", + "metadata": {}, + "source": [ + "### Multicollinearity check\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "29", + "metadata": {}, + "outputs": [], + "source": [ + "# compute the Variance Inflation Factor (VIF) for each feature\n", + "df_vif = pd.DataFrame(\n", + " data=[variance_inflation_factor(X_train.values, i) for i in range(len(selected_feats))],\n", + " index=selected_feats,\n", + " columns=[\"VIF\"]\n", + ").sort_values(\"VIF\", ascending=False)\n", + "\n", + "df_vif" + ] + }, + { + "cell_type": "markdown", + "id": "30", + "metadata": {}, + "source": [ + "## 5. Classifier Model" + ] + }, + { + "cell_type": "markdown", + "id": "31", + "metadata": {}, + "source": [ + "### Select classifier: Logistic Regression or XGBoost" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "32", + "metadata": {}, + "outputs": [], + "source": [ + "# MODEL_SELECTION = \"logistic_regression\"\n", + "MODEL_SELECTION = \"xgboost\"\n", + "\n", + "model_selection_error = ValueError(\n", + " \"'MODEL_SELECTION' must be either 'logistic_regression' or 'xgboost'. \"\n", + " f\"Got {MODEL_SELECTION} instead.\"\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "33", + "metadata": {}, + "source": [ + "### Hyperparameter tuning with K-Fold Cross Validation\n", + "\n", + "For detailed explanation on XGBoost's parameters: https://www.kaggle.com/code/prashant111/a-guide-on-xgboost-hyperparameters-tuning/notebook" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "34", + "metadata": {}, + "outputs": [], + "source": [ + "if MODEL_SELECTION == \"logistic_regression\":\n", + " Estimator = LogisticRegression\n", + " cv_search_space = {\n", + " \"penalty\": [\"l1\", \"l2\", \"elasticnet\"],\n", + " \"solver\": [\"saga\"],\n", + " \"C\": np.logspace(-3, 1, num=9, base=10.0),\n", + " \"class_weight\": [None],\n", + " }\n", + "elif MODEL_SELECTION == \"xgboost\":\n", + " Estimator = XGBClassifier\n", + " cv_search_space = {\n", + " \"objective\": [\"multi:softmax\"],\n", + " 'num_class': [len(target_classes_dict)],\n", + " \"n_estimators\": [30, 40, 50],\n", + " \"learning_rate\": [0.1],\n", + " \"max_depth\": [3, 4, 6],\n", + " \"min_child_weight\": [2, 4],\n", + " \"gamma\": [0, 0.5],\n", + " \"alpha\":[0, 0.3],\n", + " \"scale_pos_weight\": [1],\n", + " \"lambda\":[1],\n", + " ## \"subsample\": [0.8, 1.0],\n", + " ## \"colsample_bytree\": [0.8, 1.0],\n", + " \"verbosity\": [0],\n", + " }\n", + "else:\n", + " raise model_selection_error" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "35", + "metadata": {}, + "outputs": [], + "source": [ + "cv_scoring_metrics = {\n", + " \"accuracy\": \"Accuracy\",\n", + " \"precision_macro\": \"Precision (macro)\",\n", + " \"recall_macro\": \"Recall (macro)\",\n", + " \"f1_macro\": \"F1 Score (macro)\",\n", + " \"precision_weighted\": \"Precision (weighted)\",\n", + " \"recall_weighted\": \"Recall (weighted)\",\n", + " \"f1_weighted\": \"F1 Score (weighted)\",\n", + " \"roc_auc_ovr\": \"ROC AUC One-vs-Rest (macro)\",\n", + " \"roc_auc_ovo\": \"ROC AUC One-vs-One (macro)\",\n", + " \"roc_auc_ovr_weighted\": \"ROC AUC One-vs-Rest (weighted)\",\n", + " \"roc_auc_ovo_weighted\": \"ROC AUC One-vs-One (weighted)\",\n", + "}\n", + "refit_metric = \"f1_weighted\" # metric to optimize for the final model" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "36", + "metadata": {}, + "outputs": [], + "source": [ + "%%time\n", + "# define evaluation\n", + "kfold_cv = RepeatedStratifiedKFold(n_splits=3, n_repeats=1, random_state=RANDOM_SEED)\n", + "# define search\n", + "grid_search = GridSearchCV(\n", + " estimator=Estimator(),\n", + " param_grid=cv_search_space,\n", + " scoring=list(cv_scoring_metrics.keys()),\n", + " cv=kfold_cv,\n", + " refit=refit_metric,\n", + " verbose=1,\n", + ")\n", + "# execute search\n", + "with warnings.catch_warnings(action=\"ignore\"):\n", + " result_cv = grid_search.fit(X_train, y_train)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "37", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"Grid Search CV Best Model - Scoring Metrics (averaging method):\")\n", + "for i, (metric_key, metric_name) in enumerate(cv_scoring_metrics.items(), start=1):\n", + " print(\n", + " f\" {str(i) + \".\":>3} {metric_name:.<31} \"\n", + " f\"{result_cv.cv_results_[f\"mean_test_{metric_key}\"][result_cv.best_index_]:.3f}\"\n", + " )\n", + "print(f\"\\nBest Hyperparameters: {result_cv.best_params_}\")" + ] + }, + { + "cell_type": "markdown", + "id": "38", + "metadata": {}, + "source": [ + "### Final Model" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "39", + "metadata": {}, + "outputs": [], + "source": [ + "# instantiate model with best hyperparameters and additional kwargs\n", + "if MODEL_SELECTION == \"logistic_regression\":\n", + " model_kwargs = dict()\n", + " model_fit_kwargs = dict()\n", + "elif MODEL_SELECTION == \"xgboost\":\n", + " eval_metrics = dict(\n", + " mlogloss=\"Binary Cross-entropy Loss (Log-loss)\",\n", + " merror=\"Binary Classification Error Rate\",\n", + " auc=\"ROC AUC\",\n", + " )\n", + " model_kwargs = dict(eval_metric=list(eval_metrics.keys()))\n", + " model_fit_kwargs = dict(\n", + " eval_set=[(X_train, y_train), (X_test, y_test)],\n", + " verbose=False\n", + " )\n", + "else:\n", + " raise model_selection_error\n", + " \n", + "model = Estimator(**result_cv.best_params_, **model_kwargs, random_state=RANDOM_SEED)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "40", + "metadata": {}, + "outputs": [], + "source": [ + "# Fit model and make predictions\n", + "model.fit(X_train, y_train, **model_fit_kwargs)\n", + "# Make probabilities predictions\n", + "y_pred_proba_train = pd.DataFrame(\n", + " data=model.predict_proba(X_train), columns=model.classes_, index=X_train.index\n", + ")\n", + "y_pred_proba = pd.DataFrame(\n", + " data=model.predict_proba(X_test), columns=model.classes_, index=X_test.index\n", + ")\n", + "# Make class predictions\n", + "y_pred_train = pd.Series(\n", + " data=model.predict(X_train), index=X_train.index, name=target_col\n", + ")\n", + "y_pred = pd.Series(\n", + " data=model.predict(X_test), index=X_test.index, name=target_col\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "41", + "metadata": {}, + "outputs": [], + "source": [ + "if MODEL_SELECTION == \"xgboost\":\n", + " display(plot_eval_metrics_xgb(model.evals_result(), eval_metrics))" + ] + }, + { + "cell_type": "markdown", + "id": "42", + "metadata": {}, + "source": [ + "**Plot target rate per group of predicted probability**\n", + "\n", + "A good model should have increasing target rate for each group of predicted probability (e.g. quartiles, deciles)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "43", + "metadata": {}, + "outputs": [], + "source": [ + "for clss, label in target_classes_dict.items():\n", + " title = f\"Class '{label}': Target rate per group of predicted probability\"\n", + " display(\n", + " plot_target_rate(y_test=y_test_ohe[clss], y_pred_proba=y_pred_proba[clss], title=title)\n", + " )" + ] + }, + { + "cell_type": "markdown", + "id": "44", + "metadata": {}, + "source": [ + "### Feature Importance\n", + "\n", + "- For Logistic Regression: coefficients values and statistical significance\n", + "- For XGBoost: SHAP analysis and Gain Metric" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "45", + "metadata": {}, + "outputs": [], + "source": [ + "if MODEL_SELECTION == \"logistic_regression\":\n", + " for clss, coefficients, intercept in zip(\n", + " model.classes_, model.coef_, model.intercept_\n", + " ):\n", + " label = f\"Class '{target_classes_dict[clss]}'\"\n", + " print(label)\n", + " df_coefficients = build_logit_coefficients_table(\n", + " coefficients=coefficients,\n", + " intercept=intercept,\n", + " X_train=X_train,\n", + " y_pred_proba_train=y_pred_proba_train,\n", + " )\n", + " display(\n", + " plot_coefficients_values(\n", + " df_coefficients,\n", + " title=f\"{label}: Coefficient Values with 95% CI (±1.96 Std Error)\"\n", + " ),\n", + " plot_coefficients_significance(\n", + " df_coefficients,\n", + " log_scale=False,\n", + " title=f\"{label}: Coefficient Significance\"\n", + " ),\n", + " )\n", + " \n", + "elif MODEL_SELECTION == \"xgboost\":\n", + " # compute SHAP values\n", + " explainer = shap.Explainer(model)\n", + " shap_values = explainer(X_test)\n", + " # shap plots\n", + " for i, clss in enumerate(model.classes_):\n", + " label = f\"Class '{target_classes_dict[clss]}'\"\n", + " print(label)\n", + " display(\n", + " plot_shap_importance(\n", + " shap_values[:, :, i], title=f\"{label}: SHAP Feature Importance\"\n", + " ),\n", + " plot_shap_beeswarm(\n", + " shap_values[:, :, i], title=f\"{label}: SHAP Summary Plot\"\n", + " ),\n", + " plot_gain_metric_xgb(\n", + " model, X_test, title=f\"{label}: XGBoost Feature Importance (Gain metric)\"\n", + " )\n", + " )\n", + "\n", + "else:\n", + " raise model_selection_error" + ] + }, + { + "cell_type": "markdown", + "id": "46", + "metadata": {}, + "source": [ + "### Performance Metrics" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "47", + "metadata": {}, + "outputs": [], + "source": [ + "df_train_metrics = pd.Series(\n", + " compute_multiclass_classification_metrics(y_train, y_pred_train, y_pred_proba_train)\n", + ").to_frame(name=\"Train Metrics\")\n", + "df_test_metrics = pd.Series(\n", + " compute_multiclass_classification_metrics(y_test, y_pred, y_pred_proba)\n", + ").to_frame(name=\"Test Metrics\")\n", + "\n", + "print(\"Final Model - Scoring Metrics on Train & Test Datasets:\")\n", + "df_metrics = df_train_metrics.join(df_test_metrics)\n", + "display(df_metrics)" + ] + }, + { + "cell_type": "markdown", + "id": "48", + "metadata": {}, + "source": [ + "#### Confusion Matrix" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "49", + "metadata": {}, + "outputs": [], + "source": [ + "# Confusion Matrix\n", + "display(\n", + " plot_confusion_matrix(\n", + " y_test,\n", + " y_pred,\n", + " estimator=model,\n", + " target_classes_dict=target_classes_dict,\n", + " normalize=\"true\",\n", + " )\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "50", + "metadata": {}, + "source": [ + "#### ROC AUC" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "51", + "metadata": {}, + "outputs": [], + "source": [ + "for clss, label in target_classes_dict.items():\n", + " title = f\"Class '{label}': ROC Curve One-vs-Rest\"\n", + " display(\n", + " plot_roc_curve(\n", + " y_true=y_test_ohe[clss], y_pred_proba=y_pred_proba[clss], title=title,\n", + " )\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "52", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "53", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "ds", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.8" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/src/clustering.ipynb b/src/clustering.ipynb index a5ab1af..5b7debd 100755 --- a/src/clustering.ipynb +++ b/src/clustering.ipynb @@ -10,7 +10,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "id": "1", "metadata": {}, "outputs": [], @@ -29,19 +29,19 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "id": "2", "metadata": {}, "outputs": [], "source": [ "from utils.constants import RANDOM_SEED\n", - "from utils.common import get_data_folder_path, set_plot_font_sizes, plot_boxplot_by_class\n", + "from utils.common import get_data_folder_path, set_plotting_config, plot_boxplot_by_class\n", "from utils.clustering import search_kmeans, plot_kmeans_search" ] }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "id": "3", "metadata": {}, "outputs": [], @@ -49,7 +49,7 @@ "# plots configuration\n", "sns.set_style(\"darkgrid\")\n", "sns.set_palette(\"colorblind\")\n", - "set_plot_font_sizes()\n", + "set_plotting_config()\n", "%matplotlib inline" ] }, @@ -71,7 +71,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "id": "6", "metadata": {}, "outputs": [], @@ -84,7 +84,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "id": "7", "metadata": {}, "outputs": [], @@ -113,7 +113,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "id": "9", "metadata": {}, "outputs": [], @@ -188,7 +188,7 @@ "metadata": {}, "outputs": [], "source": [ - "fig = plot_kmeans_search(df_kmeans=df_kmeans, elbow=kl.elbow)" + "display(plot_kmeans_search(df_kmeans=df_kmeans, elbow=kl.elbow))" ] }, { @@ -213,7 +213,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": null, "id": "18", "metadata": {}, "outputs": [], @@ -242,11 +242,13 @@ "metadata": {}, "outputs": [], "source": [ - "fig = plot_boxplot_by_class(\n", - " df=df_cl,\n", - " class_col=\"cluster\",\n", - " plots_per_line=2,\n", - " title=\"Features used in K-means Clustering\",\n", + "display(\n", + " plot_boxplot_by_class(\n", + " df_input=df_cl,\n", + " class_col=\"cluster\",\n", + " plots_per_line=2,\n", + " title=\"Features used in K-means Clustering\",\n", + " )\n", ")" ] }, @@ -257,11 +259,13 @@ "metadata": {}, "outputs": [], "source": [ - "fig = plot_boxplot_by_class(\n", - " df=df_input,\n", - " class_col=\"cluster\",\n", - " plots_per_line=2,\n", - " title=\"All features from input dataset\",\n", + "display(\n", + " plot_boxplot_by_class(\n", + " df_input=df_input,\n", + " class_col=\"cluster\",\n", + " plots_per_line=2,\n", + " title=\"All features from input dataset\",\n", + " )\n", ")" ] }, diff --git a/src/utils/clustering.py b/src/utils/clustering.py index cc906e7..292640e 100644 --- a/src/utils/clustering.py +++ b/src/utils/clustering.py @@ -80,6 +80,6 @@ def plot_kmeans_search( axes[1].set_xlabel("Number of clusters") fig.tight_layout() + plt.close(fig) return fig - diff --git a/src/utils/common.py b/src/utils/common.py index 3eafaa6..740a0aa 100644 --- a/src/utils/common.py +++ b/src/utils/common.py @@ -2,8 +2,9 @@ import numpy as np import pandas as pd import seaborn as sns +import matplotlib as mpl import matplotlib.pyplot as plt -from .constants import REPO_NAME, SMALL_FONTSIZE, MEDIUM_FONTSIZE, BIG_FONTSIZE +from .constants import REPO_NAME, SMALL_FONTSIZE, MEDIUM_FONTSIZE, BIG_FONTSIZE, FIGURE_DPI def get_repo_root_path() -> str: @@ -20,7 +21,7 @@ def convert_to_integer(s: pd.Series) -> pd.Series: return pd.to_numeric(s, downcast="integer", errors="raise") -def set_plot_font_sizes() -> None: +def _set_plot_font_sizes() -> None: plt.rc("font", size=SMALL_FONTSIZE) # default font size plt.rc("figure", titlesize=BIG_FONTSIZE) # figure title plt.rc("legend", fontsize=SMALL_FONTSIZE) # legend @@ -30,9 +31,19 @@ def set_plot_font_sizes() -> None: plt.rc("ytick", labelsize=SMALL_FONTSIZE) # y tick labels +def _set_figure_dpi() -> None: + mpl.rcParams["figure.dpi"] = FIGURE_DPI + + +def set_plotting_config() -> None: + _set_plot_font_sizes() + _set_figure_dpi() + + def plot_boxplot_by_class( - df: pd.DataFrame, + df_input: pd.DataFrame, class_col: str, + class_mapping: dict = None, plot_cols: list[str] = None, plots_per_line: int = 2, display_order: list[str] = None, @@ -42,8 +53,11 @@ def plot_boxplot_by_class( scale_factor: float = 1.5, ) -> plt.Figure: + df = df_input.copy() n_classes = df[class_col].nunique() + if class_mapping is not None: + df[class_col] = df[class_col].map(class_mapping) if plot_cols is None: plot_cols = [col for col in df.columns if col != class_col] num_lines = int(np.ceil(len(plot_cols) / plots_per_line)) @@ -94,6 +108,7 @@ def plot_boxplot_by_class( fig.delaxes(ax=ax) fig.tight_layout() + plt.close(fig) return fig @@ -129,4 +144,6 @@ def plot_correlation_matrix( plt.grid(False) plt.xticks(rotation=45, ha="right") + plt.close(fig) + return fig diff --git a/src/utils/constants.py b/src/utils/constants.py index 08fef0a..d71ca63 100644 --- a/src/utils/constants.py +++ b/src/utils/constants.py @@ -8,3 +8,6 @@ SMALL_FONTSIZE = 12 MEDIUM_FONTSIZE = 14 BIG_FONTSIZE = 17 + +# plots resolution +FIGURE_DPI = 150 diff --git a/src/utils/classification.py b/src/utils/evals.py similarity index 79% rename from src/utils/classification.py rename to src/utils/evals.py index 4f3601e..5cc8f4e 100644 --- a/src/utils/classification.py +++ b/src/utils/evals.py @@ -6,7 +6,7 @@ import seaborn as sns import matplotlib.pyplot as plt -from typing import Any +from typing import Any, Union from matplotlib import patches as mpatches from matplotlib import ticker as mticker @@ -20,7 +20,6 @@ f1_score, confusion_matrix, ) -from sklearn.linear_model import LogisticRegression, LinearRegression from .common import convert_to_integer @@ -54,7 +53,7 @@ def plot_roc_curve( y_pred_proba: pd.Series, title: str = "Receiver Operating Characteristic", figsize: tuple[int, int] = (8, 5), - ret_optimal_thresh: bool = False, + return_optimal_thresh: bool = False, ) -> plt.Figure | tuple[plt.Figure, np.float64]: fpr, tpr, thresholds = roc_curve(y_true, y_pred_proba) optimal_thresh = thresholds[np.argmax(tpr - fpr)] @@ -82,7 +81,7 @@ def plot_roc_curve( ) plt.ylabel("True Positive Rate") plt.xlabel("False Positive Rate") - if ret_optimal_thresh: + if return_optimal_thresh: plt.vlines( x=100 * optimal_thresh, ymin=-margin, @@ -97,8 +96,9 @@ def plot_roc_curve( ax.set_ylim([-margin, 100 + margin]) ax.yaxis.set_major_formatter(mticker.PercentFormatter()) ax.xaxis.set_major_formatter(mticker.PercentFormatter()) + plt.close(fig) - if ret_optimal_thresh: + if return_optimal_thresh: return fig, optimal_thresh else: return fig @@ -114,7 +114,7 @@ def plot_target_rate( [ y_test.rename("true_label"), y_pred_proba.rename("pred_proba"), - # quratiles + # quartiles pd.qcut( y_pred_proba.rank(method="first"), q=4, @@ -152,48 +152,85 @@ def plot_target_rate( ax.yaxis.set_major_formatter(mticker.PercentFormatter()) ax.set_xlabel("") + plt.close(fig) + return fig -def compute_classification_metrics( +def compute_binary_classification_metrics( y_true: pd.Series, y_pred: pd.Series, y_pred_proba: pd.Series ) -> dict[str, float]: metrics_dict = dict() + metrics_dict["Accuracy"] = accuracy_score(y_true=y_true, y_pred=y_pred) + metrics_dict["Precision"] = precision_score(y_true=y_true, y_pred=y_pred) + metrics_dict["Recall"] = recall_score(y_true=y_true, y_pred=y_pred) + metrics_dict["F1 Score"] = f1_score(y_true=y_true, y_pred=y_pred) if len(np.unique(y_true)) > 1: - metrics_dict["KS"] = compute_ks_score(y_true=y_true, y_pred_proba=y_pred_proba) roc_auc = roc_auc_score(y_true=y_true, y_score=y_pred_proba) metrics_dict["ROC AUC"] = roc_auc metrics_dict["GINI"] = 2 * roc_auc - 1 + metrics_dict["KS Gain"] = compute_ks_gain_score(y_true=y_true, y_pred_proba=y_pred_proba) else: - metrics_dict["KS"] = np.nan metrics_dict["ROC AUC"] = np.nan metrics_dict["GINI"] = np.nan + metrics_dict["KS Gain"] = np.nan + + return metrics_dict + + +def compute_multiclass_classification_metrics( + y_true: pd.Series, y_pred: pd.Series, y_pred_proba: pd.Series +) -> dict[str, float]: + metrics_dict = dict() + metrics_dict["Accuracy"] = accuracy_score(y_true=y_true, y_pred=y_pred) - metrics_dict["Precision"] = precision_score(y_true=y_true, y_pred=y_pred) - metrics_dict["Recall"] = recall_score(y_true=y_true, y_pred=y_pred) - metrics_dict["F1 Score"] = f1_score(y_true=y_true, y_pred=y_pred) + for avg_method in ["macro", "weighted"]: + metrics_dict[f"Precision ({avg_method})"] = precision_score( + y_true=y_true, y_pred=y_pred, average=avg_method + ) + metrics_dict[f"Recall ({avg_method})"] = recall_score( + y_true=y_true, y_pred=y_pred, average=avg_method + ) + metrics_dict[f"F1 Score ({avg_method})"] = f1_score( + y_true=y_true, y_pred=y_pred, average=avg_method + ) + + for multiclass_method, multiclass_label in { + "ovr": "One-vs-Rest", + "ovo": "One-vs-One", + }.items(): + if len(np.unique(y_true)) > 1: + metrics_dict[f"ROC AUC {multiclass_label} ({avg_method})"] = roc_auc_score( + y_true=y_true, + y_score=y_pred_proba, + average=avg_method, + multi_class=multiclass_method, + ) + else: + metrics_dict[f"ROC AUC {multiclass_label} ({avg_method})"] = np.nan return metrics_dict -def _get_logit_stderror_pvalues( - model: LogisticRegression, x: pd.DataFrame | np.ndarray +def _compute_logit_stderror_pvalues( + coefficients: np.ndarray, + intercept: float, + X_train: pd.DataFrame, + y_pred_proba_train: pd.Series, ) -> tuple[np.ndarray, np.ndarray]: - """Calculate z-scores for scikit-learn LogisticRegression. - This function uses asymtptics for maximum likelihood estimates. + """Calculate z-scores for a Logistic Regression and returns the + standard errors and p-values for each of the model's coefficients. + Uses asymtotic approximation for maximum likelihood estimates. Source: https://stackoverflow.com/a/47079198 - - parameters: - model: fitted sklearn.linear_model.LogisticRegression with intercept and large C - x: matrix on which the model was fit """ - p = model.predict_proba(x) + p = np.vstack([y_pred_proba_train.values, (1 - y_pred_proba_train.values)]).T n = len(p) - m = len(model.coef_[0]) + 1 - coefs = np.concatenate([model.intercept_, model.coef_[0]]) - x_full = np.matrix(np.insert(np.array(x), 0, 1, axis=1)) + m = len(coefficients) + 1 + coefs = np.concatenate([[intercept], coefficients]) + # add a constant column of ones to the training data + x_full = np.matrix(np.insert(X_train.values, 0, 1, axis=1)) ans = np.zeros((m, m)) for i in range(n): ans = ans + np.dot(np.transpose(x_full[i, :]), x_full[i, :]) * p[i, 1] * p[i, 0] @@ -202,30 +239,33 @@ def _get_logit_stderror_pvalues( t = coefs / se p_values = (1 - scipy.stats.norm.cdf(abs(t))) * 2 - return se, p_values + return se[1:], p_values[1:] # [1:] to skip the added constant -def build_coefficients_table( - model: LogisticRegression | LinearRegression, - X_train_std: pd.DataFrame, +def build_logit_coefficients_table( + coefficients: np.ndarray, + intercept: float, + X_train: pd.DataFrame, + y_pred_proba_train: pd.Series, ) -> pd.DataFrame: - if not isinstance(model, (LogisticRegression, LinearRegression)): - raise ValueError( - "Model must be either sklearn's Linear Regression or Logistic Regression. " - f"Got {type(model)} instead." - ) - df_coef = pd.DataFrame( - np.transpose(model.coef_), columns=["Coefficients"], index=model.feature_names_in_ + # compute coefficients' Standard Error and p-values + stderr, pvalues = _compute_logit_stderror_pvalues( + coefficients=coefficients, + intercept=intercept, + X_train=X_train, + y_pred_proba_train=y_pred_proba_train, ) - df_coef["Absolute Coefficients"] = df_coef["Coefficients"].abs() - # compute Standard Error and coefficients' p-values - stderr, pvalues = _get_logit_stderror_pvalues(model, X_train_std) - df_coef["Standard Error"] = stderr[1:] # [1:] to skip constant - df_coef["95% CI"] = df_coef["Standard Error"] * 1.96 - df_coef["p-values"] = pvalues[1:] # [1:] to skip constant - - df_coef = df_coef.sort_values(by="Absolute Coefficients", ascending=False) + df_coef = pd.DataFrame( + data={ + "Coefficients": coefficients, + "Absolute Coefficients": np.abs(coefficients), + "Standard Error": stderr, + "95% CI": stderr * 1.96, + "p-values": pvalues, + }, + index=X_train.columns.tolist(), + ).sort_values(by="Absolute Coefficients", ascending=False) return df_coef @@ -236,10 +276,11 @@ def _get_order_of_magnitude(number: float | int) -> float: def plot_coefficients_values( df_coef: pd.DataFrame, + title: str = "Coefficient Values with 95% CI (±1.96 Std Error)", ) -> plt.Figure: fig, ax = plt.subplots(nrows=1, ncols=1) - fig.suptitle("Coefficient Values with 95% CI (±1.96 Std Error)") + fig.suptitle(title) max_coeff, max_ci = df_coef[["Absolute Coefficients", "95% CI"]].max().tolist() if _get_order_of_magnitude(max_ci) > _get_order_of_magnitude(max_coeff): @@ -272,6 +313,7 @@ def plot_coefficients_values( ax.get_legend_handles_labels()[0][0], # confidence interval ] plt.legend(handles=legend_patches, loc="lower right", framealpha=1) + plt.close(fig) return fig @@ -280,12 +322,11 @@ def plot_coefficients_significance( df_coef: pd.DataFrame, alpha: float = 0.05, log_scale: bool = False, + title: str = "Coefficients' Significance", ) -> plt.Figure: fig, ax = plt.subplots(nrows=1, ncols=1) - fig.suptitle( - "Coefficients' Statistical Significance " f"({100*(1 - alpha):.0f}% Confidence Level)" - ) + fig.suptitle(title + f" ({100*(1 - alpha):.0f}% Confidence Level)") colors_dict = {"fail": "orange", "pass": "limegreen", "threshold": "crimson"} df_plot = df_coef.sort_values(by="Absolute Coefficients", ascending=True) @@ -318,12 +359,15 @@ def plot_coefficients_significance( ax.set_axisbelow(True) legend_patches = [ ax.get_legend_handles_labels()[0][0], # confidence level line - mpatches.Patch(color=colors_dict["pass"], label="Coefficient is statistically significant"), mpatches.Patch( - color=colors_dict["fail"], label="Coefficient is not statistically significant" + color=colors_dict["pass"], label="Coefficient value is statistically significant" + ), + mpatches.Patch( + color=colors_dict["fail"], label="Coefficient value is not statistically significant" ), ] plt.legend(handles=legend_patches, framealpha=0.75) + plt.close(fig) return fig @@ -341,6 +385,7 @@ def plot_eval_metrics_xgb(eval_results: dict, eval_metrics: dict) -> plt.Figure: ax.set_xlabel("Iterations") ax.legend() plt.suptitle("Convergence during XGBoost Model Training", y=1.05) + plt.close(fig) return fig @@ -350,7 +395,8 @@ def plot_shap_importance( ) -> plt.Figure: fig, ax = plt.subplots(figsize=(5, max(shap_values.values.shape[1] / 2, 3))) ax.set_title(title, pad=15) - shap.plots.bar(shap_values, ax=ax, **kwargs) + shap.plots.bar(shap_values, show=False, ax=ax, **kwargs) + plt.close(fig) return fig @@ -363,12 +409,13 @@ def plot_shap_beeswarm( ) ax.set_title(title, pad=15) fig = plt.gcf() + plt.close(fig) return fig def plot_gain_metric_xgb( - xgb_estimator: Any, + xgb_estimator: Union["XGBClassifier", "XGBRegressor"], # noqa: F821 X_test_: pd.DataFrame, title: str = "XGBoost Feature Importance (Gain metric)", ) -> plt.Figure: @@ -382,6 +429,7 @@ def plot_gain_metric_xgb( ax.xaxis.grid(True) ax.set_axisbelow(True) plt.title(title) + plt.close(fig) return fig @@ -437,6 +485,7 @@ def plot_confusion_matrix( plt.tight_layout() plt.ylabel("True label", labelpad=10) plt.xlabel("Predicted label", labelpad=15) + plt.close(fig) return fig @@ -445,7 +494,7 @@ def build_ks_table( y_true: pd.Series | np.ndarray, y_pred_proba: pd.Series | np.ndarray, n_bins: int = 10, - ret_ks: bool = False, + return_ks: bool = False, ) -> pd.DataFrame | tuple[pd.DataFrame, np.float64]: if isinstance(y_true, pd.Series): @@ -486,7 +535,7 @@ def build_ks_table( ks = ks_table["diff"].max() - if ret_ks: + if return_ks: return ks_table, ks else: return ks_table @@ -498,7 +547,7 @@ def beautify_ks_table(ks_table: pd.DataFrame) -> pd.DataFrame: def flag(x): return "<--" if x == ks_table["diff"].max() else "" - ks_table["KS"] = ks_table["diff"].apply(flag) + ks_table["KS Gain"] = ks_table["diff"].apply(flag) for pct_col in ["positive_rate", "negative_rate", "cumpct_positives", "cumpct_negatives"]: ks_table[pct_col] = ks_table[pct_col].apply("{0:.2%}".format) @@ -510,13 +559,13 @@ def flag(x): return ks_table -def compute_ks_score( +def compute_ks_gain_score( y_true: pd.Series | np.ndarray, y_pred_proba: pd.Series | np.ndarray, n_bins: int = 10, ) -> np.float64: - _, ks = build_ks_table(y_true=y_true, y_pred_proba=y_pred_proba, n_bins=n_bins, ret_ks=True) + _, ks = build_ks_table(y_true=y_true, y_pred_proba=y_pred_proba, n_bins=n_bins, return_ks=True) return ks @@ -561,15 +610,17 @@ def plot_ks_table(ks_table: pd.DataFrame, figsize: tuple[int, int] = (7, 5)) -> color=color_lst[2], linestyle="--", linewidth=1.5, - label=f"Max KS Statistic ({ks_max:.1f})", + label=f"Max KS Gain ({ks_max:.1f})", ) ax.xaxis.set_major_formatter(mticker.PercentFormatter()) ax.yaxis.set_major_formatter(mticker.PercentFormatter()) # Customize the plot ax.set_xlabel("Predicted Probability") ax.set_ylabel("Cumulative Percentage") - ax.set_title(f"KS Gain Plot (KS Statistic = {ks_max:.3f})") + ax.set_title(f"KS Gain Plot (Max Gain = {ks_max:.3f})") ax.legend() ax.grid(True) + plt.close(fig) + return fig diff --git a/src/utils/feature_selection.py b/src/utils/feature_selection.py index cc44230..ad8904f 100644 --- a/src/utils/feature_selection.py +++ b/src/utils/feature_selection.py @@ -14,8 +14,8 @@ def _remove_features_with_l1_regularization( - df_input: pd.DataFrame, - target_col: str, + X: pd.DataFrame, + y: pd.Series, l1_params: dict, ) -> list[str]: @@ -28,8 +28,6 @@ def _remove_features_with_l1_regularization( random_seed = l1_params["random_seed"] # split data - X = df_input.drop(columns=[target_col]) - y = df_input[target_col] X_train, X_test, y_train, y_test = train_test_split( X, y, @@ -50,23 +48,26 @@ def _remove_features_with_l1_regularization( metrics_dict = dict() # define L1-based linear model ad its evaluation metric - if problem.lower() == "classification": - LinearModel = LinearSVC - model_params = dict(penalty="l1") - search_arg = "C" - eval_metric_fn = f1_score - eval_metric_greater_is_better = True - elif problem.lower() == "regression": - LinearModel = Lasso - model_params = dict() - search_arg = "alpha" - eval_metric_fn = root_mean_squared_error - eval_metric_greater_is_better = False - else: - raise ValueError( - "Argument 'problem' must be either 'classification' or 'regression'. " - f"Got {problem} instead." - ) + match problem.lower().strip(): + case "classification": + LinearModel = LinearSVC + model_params = dict(penalty="l1") + search_arg = "C" + eval_metric_fn = f1_score + eval_metric_params = dict(average="weighted") + eval_metric_greater_is_better = True + case "regression": + LinearModel = Lasso + model_params = dict() + search_arg = "alpha" + eval_metric_fn = root_mean_squared_error + eval_metric_params = dict() + eval_metric_greater_is_better = False + case _: + raise ValueError( + "Argument 'problem' must be either 'classification' or 'regression'. " + f"Got {problem} instead." + ) for i, search_val in enumerate(logspace_values, start=1): # Fit model and make predictions @@ -74,7 +75,7 @@ def _remove_features_with_l1_regularization( model = LinearModel(**model_params, random_state=random_seed) model.fit(X_train_std, y_train) y_pred = model.predict(X_test_std) - eval_metric = eval_metric_fn(y_test, y_pred) + eval_metric = eval_metric_fn(y_test, y_pred, **eval_metric_params) s_coef = pd.Series(data=np.mean(model.coef_, axis=0), index=model.feature_names_in_, name=i) coef_lst.append(s_coef) @@ -158,53 +159,59 @@ def _get_high_vif_features( return high_vif_feats -def _run_manual_filter(df: pd.DataFrame, target_col: str, params: dict) -> list[str]: - orig_shp = df.shape +def _run_manual_filter(X: pd.DataFrame, y: pd.Series, params: dict) -> list[str]: + target_col = y.name + orig_shp = X.shape cols_to_exclude = params["cols_to_exclude"] if not isinstance(cols_to_exclude, list): - cols_to_exclude = [] + cols_to_exclude = [cols_to_exclude] elif len(cols_to_exclude) > 0: # cannot remove target if target_col in cols_to_exclude: cols_to_exclude = [col for col in cols_to_exclude if col != target_col] + # check if any of the features to exclude are not in the dataframe + if not set(cols_to_exclude).issubset(X.columns.tolist()): + raise ValueError( + "Some features to exclude in the 'manual_filter' are not present in the input table: " + f"{set(cols_to_exclude) - set(X.columns.tolist())}" + ) + logger.info( - f" - Removing {len(cols_to_exclude)} " - f"({100 * len(cols_to_exclude) / (orig_shp[1] - 1):.2f}%) feature(s) manually..." + f" - Removing {len(cols_to_exclude)} ({100 * len(cols_to_exclude) / orig_shp[1]:.1f}%) " + f"feature(s) manually: {cols_to_exclude}" ) return cols_to_exclude -def _run_variance_filter(df: pd.DataFrame, target_col: str, params: dict) -> list[str]: - orig_shp = df.shape - var_threshold = params["threshold"] +def _run_null_variance_filter(X: pd.DataFrame, y: pd.Series, params: dict) -> list[str]: + orig_shp = X.shape - s_var = df.drop(columns=[target_col]).var(axis=0) - low_var_cols = s_var[s_var <= var_threshold].index.tolist() + s_var = X.var(axis=0) + low_var_cols = s_var[s_var == 0].index.tolist() logger.info( - f" - Removing {len(low_var_cols):,} ({100 * len(low_var_cols) / (orig_shp[1] - 1):.1f}%)" - f" feature(s) with variance <= {var_threshold} ..." + f" - Removing {len(low_var_cols):,} ({100 * len(low_var_cols) / orig_shp[1]:.1f}%) " + f"feature(s) with null variance (var == 0): {low_var_cols}" ) return low_var_cols -def _run_correlation_filter(df: pd.DataFrame, target_col: str, params: dict) -> list[str]: - orig_shp = df.shape +def _run_correlation_filter(X: pd.DataFrame, y: pd.Series, params: dict) -> list[str]: + orig_shp = X.shape corr_threshold = params["threshold"] # compute pearson correlation with target - df_feats = df.drop(columns=[target_col]) - s_corr_target = df_feats.corrwith(df[target_col]) + s_corr_target = X.corrwith(y) # rank features based on correlation with target (from worst to best) ranked_feats = s_corr_target.dropna().abs().sort_values(ascending=True).index.tolist() logger.info(f" Running Correlation filter with threshold of {corr_threshold}") high_corr_cols = [] for feat in ranked_feats: - s_corr_feat = df_feats.drop(columns=[feat]).corrwith(df_feats[feat]).dropna() + s_corr_feat = X.drop(columns=[feat]).corrwith(X[feat]).dropna() feat_max_corr = s_corr_feat.abs().max() feat_idxmax_corr = s_corr_feat.abs().idxmax() @@ -214,50 +221,53 @@ def _run_correlation_filter(df: pd.DataFrame, target_col: str, params: dict) -> f"{s_corr_feat.loc[feat_idxmax_corr]:+.4f} to '{feat_idxmax_corr}'" ) high_corr_cols.append(feat) - df_feats = df_feats.drop(columns=[feat]) + X = X.drop(columns=[feat]) logger.info( - f" - Removing {len(high_corr_cols):,} " - f"({100 * len(high_corr_cols) / (orig_shp[1] - 1):.1f}%) feature(s) with " - f"abs(correlation) > {corr_threshold} ..." + f" - Removing {len(high_corr_cols):,} ({100 * len(high_corr_cols) / orig_shp[1]:.1f}%) " + f"feature(s) with abs(correlation) > {corr_threshold}" ) return high_corr_cols -def _run_l1_filter(df: pd.DataFrame, target_col: str, params: dict) -> list[str]: - orig_shp = df.shape - l1_feats_to_drop = _remove_features_with_l1_regularization(df, target_col, params) +def _run_l1_filter(X: pd.DataFrame, y: pd.Series, params: dict) -> list[str]: + orig_shp = X.shape + l1_feats_to_drop = _remove_features_with_l1_regularization(X=X, y=y, l1_params=params) logger.info( - f" - Removing {len(l1_feats_to_drop):,} " - f"({100 * len(l1_feats_to_drop) / orig_shp[1]:.1f}%)" - f" feature(s) with null coefficient after L1 regularization ...\n" + f" - Removing {len(l1_feats_to_drop):,} ({100 * len(l1_feats_to_drop) / orig_shp[1]:.1f}%)" + f" feature(s) with null coefficient after L1 regularization: {l1_feats_to_drop}" ) return l1_feats_to_drop -def _run_vif_filter(df: pd.DataFrame, target_col: str, params: dict) -> list[str]: - orig_shp = df.shape +def _run_vif_filter(X: pd.DataFrame, y: pd.Series, params: dict) -> list[str]: + orig_shp = X.shape vif_threshold = params["threshold"] - high_vif_feats = _get_high_vif_features(df.drop(columns=[target_col]), threshold=vif_threshold) + high_vif_feats = _get_high_vif_features(X=X, threshold=vif_threshold) logger.info( - f" - Removing {len(high_vif_feats):,} " - f"({100 * len(high_vif_feats) / (orig_shp[1] - 1):.1f}%)" - f" feature(s) with VIF >= {vif_threshold:,.0f} ..." + f" - Removing {len(high_vif_feats):,} ({100 * len(high_vif_feats) / orig_shp[1]:.1f}%) " + f"feature(s) with VIF >= {vif_threshold:,.0f}" ) return high_vif_feats def run_feature_selection_steps( - df_input: pd.DataFrame, target_col: str, fs_steps: dict + X: pd.DataFrame, y: pd.Series, fs_steps: dict ) -> tuple[list[str], pd.DataFrame]: + + # build feature selection log table + target_col = y.name + orig_shp = X.shape + df_fs = pd.DataFrame(index=X.columns.tolist() + [target_col]).assign(filter="", step=0) + # define available filter functions fs_functions = { "manual": _run_manual_filter, - "variance": _run_variance_filter, + "null_variance": _run_null_variance_filter, "correlation": _run_correlation_filter, "l1_regularization": _run_l1_filter, "vif": _run_vif_filter, @@ -271,24 +281,21 @@ def run_feature_selection_steps( ) # run feature selection steps - logger.info( - "--> Starting the Feature Selection process with " - f"{df_input.drop(columns=[target_col]).shape[1]:,} features" - ) - # build feature selection log table - df_fs = pd.DataFrame(index=df_input.columns).assign(filter="", step=0) - df = df_input.copy() + logger.info(f"--> Starting Feature Selection with {orig_shp[1]:,} features") + for step, (filter_name, filter_params) in enumerate(fs_steps.items(), start=1): logger.info(f"{step}. {filter_name.upper()} FILTER") - removed_feats = fs_functions[filter_name](df, target_col, params=filter_params) - df = df.drop(columns=removed_feats) + removed_feats = fs_functions[filter_name](X=X.copy(), y=y.copy(), params=filter_params) + X = X.drop(columns=removed_feats) df_fs.loc[removed_feats, ["filter", "step"]] = (filter_name, step) - selected_feats = df.drop(columns=[target_col]).columns.tolist() + selected_feats = X.columns.tolist() logger.info( - "--> Completed the Feature Selection process with " - f"{len(selected_feats):,} selected features" + f"--> Completed Feature Selection with {len(selected_feats):,} selected features " + f"({100 * len(selected_feats) / orig_shp[1]:.1f}% of the original {orig_shp[1]} features): " + f"{selected_feats}" ) df_fs.loc[selected_feats, ["filter", "step"]] = ("Selected feature", -1) + df_fs.loc[target_col, ["filter", "step"]] = ("Target Column", -1) return selected_feats, df_fs From d373f2980a1b33bdd3733b65454bf94e1b5d5c99 Mon Sep 17 00:00:00 2001 From: Pedro Antonacio <30991781+antonacio@users.noreply.github.com> Date: Sun, 30 Mar 2025 21:51:57 -0300 Subject: [PATCH 06/10] feat: Regression use case (#6) * updating classification dataset description * regression use case * update readme --- .vscode/settings.json | 5 + README.md | 2 +- src/classification-binary.ipynb | 80 +-- src/classification-multiclass.ipynb | 80 +-- src/regression.ipynb | 744 ++++++++++++++++++++++++++++ src/utils/constants.py | 2 +- src/utils/evals.py | 96 +++- src/utils/feature_selection.py | 27 +- 8 files changed, 929 insertions(+), 107 deletions(-) create mode 100644 .vscode/settings.json create mode 100644 src/regression.ipynb diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..64c5a51 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,5 @@ +{ + "cSpell.words": [ + "pvalues" + ] +} diff --git a/README.md b/README.md index 40ade90..4929c06 100644 --- a/README.md +++ b/README.md @@ -5,7 +5,7 @@ This repository compiles simple and practical examples for common data science tasks using tabular data: 1. Binary Classification 2. Multiclass Classification -3. (WIP) Regression +3. Regression 4. Clustering 5. (WIP) Dimensionality Reduction diff --git a/src/classification-binary.ipynb b/src/classification-binary.ipynb index 9a287d9..b6cc044 100644 --- a/src/classification-binary.ipynb +++ b/src/classification-binary.ipynb @@ -57,7 +57,7 @@ " plot_confusion_matrix,\n", " plot_target_rate,\n", " compute_binary_classification_metrics,\n", - " build_logit_coefficients_table,\n", + " build_coefficients_table,\n", " plot_coefficients_values,\n", " plot_coefficients_significance,\n", " plot_eval_metrics_xgb,\n", @@ -87,7 +87,7 @@ "logger = logging.getLogger(__name__)\n", "\n", "pd.set_option(\"display.max_columns\", None)\n", - "pd.options.display.float_format = \"{:.2f}\".format\n", + "pd.options.display.float_format = \"{:,.2f}\".format\n", "\n", "mpl.rcParams[\"font.sans-serif\"] = \"Arial\"\n", "plt.set_loglevel(\"WARNING\")\n", @@ -106,10 +106,10 @@ "source": [ "## 1. Load Data\n", "\n", - "In this notebook we will use the Fetal Health Dataset.\n", + "In this notebook, we will use the **Fetal Health Dataset**. This dataset comprises 2126 records of features from Cardiotocogram exams, classified by experts into Normal, Suspect, and Pathological to assess fetal health and help reduce child and maternal mortality.\n", "\n", "Sources:\n", - "1. Fetal Health Classification Dataset: https://www.kaggle.com/datasets/andrewmvd/fetal-health-classification\n", + "1. Kaggle: https://www.kaggle.com/datasets/andrewmvd/fetal-health-classification\n", "2. Original article: https://www.ncbi.nlm.nih.gov/pmc/articles/PMC68223152" ] }, @@ -354,7 +354,7 @@ " \"correlation\": {\"threshold\": 0.8},\n", " \"vif\": {\"threshold\": 2},\n", " \"l1_regularization\": {\n", - " \"problem\": \"classification\",\n", + " \"problem_type\": \"classification\",\n", " \"train_test_split_params\": {\"test_size\": test_size},\n", " \"logspace_search\": {\"start\": -5, \"stop\": 1, \"num\": 20, \"base\": 10},\n", " # tolerance over minimum error with which to search for the best model\n", @@ -482,7 +482,7 @@ "\n", "- **Logistic Regression**: In binary classification with imbalanced classes, avoid setting `class_weight=\"balanced\"` if you want to use the model's predicted probabilities as proxies for the real probability distributions of the target classes, that is, if you want to interpret the predicted probability as \"the actual probability that the sample belongs to the class\". In this case, you should not use 50% as the threshold for the binary classification; you should find the optimal threshold using the ROC Curve (detailed below) to maximize the model's performance.\n", "\n", - "- **XGBoost**: For detailed explanation on XGBoost's parameters: https://www.kaggle.com/code/prashant111/a-guide-on-xgboost-hyperparameters-tuning/notebook" + "- **XGBoost**: For a detailed explanation of XGBoost's parameters, refer to: https://www.kaggle.com/code/prashant111/a-guide-on-xgboost-hyperparameters-tuning/notebook" ] }, { @@ -508,9 +508,9 @@ " \"max_depth\": [3, 4, 6],\n", " \"min_child_weight\": [2, 4],\n", " \"gamma\": [0, 0.5],\n", - " \"alpha\":[0, 0.3],\n", + " \"alpha\": [0, 0.3],\n", " \"scale_pos_weight\": [1],\n", - " \"lambda\":[1],\n", + " \"lambda\": [1],\n", " ## \"subsample\": [0.8, 1.0],\n", " ## \"colsample_bytree\": [0.8, 1.0],\n", " \"verbosity\": [0],\n", @@ -519,10 +519,18 @@ " raise model_selection_error" ] }, + { + "cell_type": "markdown", + "id": "34", + "metadata": {}, + "source": [ + "For the full list of scikit-learn's scoring string names, refer to: https://scikit-learn.org/stable/modules/model_evaluation.html#string-name-scorers" + ] + }, { "cell_type": "code", "execution_count": null, - "id": "34", + "id": "35", "metadata": {}, "outputs": [], "source": [ @@ -539,7 +547,7 @@ { "cell_type": "code", "execution_count": null, - "id": "35", + "id": "36", "metadata": {}, "outputs": [], "source": [ @@ -563,7 +571,7 @@ { "cell_type": "code", "execution_count": null, - "id": "36", + "id": "37", "metadata": {}, "outputs": [], "source": [ @@ -578,7 +586,7 @@ }, { "cell_type": "markdown", - "id": "37", + "id": "38", "metadata": {}, "source": [ "### Final Model" @@ -587,7 +595,7 @@ { "cell_type": "code", "execution_count": null, - "id": "38", + "id": "39", "metadata": {}, "outputs": [], "source": [ @@ -615,7 +623,7 @@ { "cell_type": "code", "execution_count": null, - "id": "39", + "id": "40", "metadata": {}, "outputs": [], "source": [ @@ -629,7 +637,7 @@ { "cell_type": "code", "execution_count": null, - "id": "40", + "id": "41", "metadata": {}, "outputs": [], "source": [ @@ -639,7 +647,7 @@ }, { "cell_type": "markdown", - "id": "41", + "id": "42", "metadata": {}, "source": [ "**Plot target rate per group of predicted probability**\n", @@ -650,7 +658,7 @@ { "cell_type": "code", "execution_count": null, - "id": "42", + "id": "43", "metadata": {}, "outputs": [], "source": [ @@ -659,7 +667,7 @@ }, { "cell_type": "markdown", - "id": "43", + "id": "44", "metadata": {}, "source": [ "**Define optimal threshold for separating classes using the ROC Curve**\n", @@ -670,7 +678,7 @@ { "cell_type": "code", "execution_count": null, - "id": "44", + "id": "45", "metadata": {}, "outputs": [], "source": [ @@ -687,7 +695,7 @@ { "cell_type": "code", "execution_count": null, - "id": "45", + "id": "46", "metadata": {}, "outputs": [], "source": [ @@ -699,7 +707,7 @@ }, { "cell_type": "markdown", - "id": "46", + "id": "47", "metadata": {}, "source": [ "### Feature Importance\n", @@ -711,16 +719,18 @@ { "cell_type": "code", "execution_count": null, - "id": "47", + "id": "48", "metadata": {}, "outputs": [], "source": [ "if MODEL_SELECTION == \"logistic_regression\":\n", - " df_coefficients = build_logit_coefficients_table(\n", + " df_coefficients = build_coefficients_table(\n", " coefficients=model.coef_[0],\n", " intercept=model.intercept_[0],\n", " X_train=X_train,\n", - " y_pred_proba_train=y_pred_proba_train,\n", + " y_pred_train=y_pred_proba_train,\n", + " y_train=y_train,\n", + " problem_type=\"classification\",\n", " )\n", " display(plot_coefficients_values(df_coefficients))\n", " display(plot_coefficients_significance(df_coefficients, log_scale=False))\n", @@ -740,7 +750,7 @@ }, { "cell_type": "markdown", - "id": "48", + "id": "49", "metadata": {}, "source": [ "### Performance Metrics" @@ -749,7 +759,7 @@ { "cell_type": "code", "execution_count": null, - "id": "49", + "id": "50", "metadata": {}, "outputs": [], "source": [ @@ -767,7 +777,7 @@ }, { "cell_type": "markdown", - "id": "50", + "id": "51", "metadata": {}, "source": [ "#### Confusion Matrix" @@ -776,7 +786,7 @@ { "cell_type": "code", "execution_count": null, - "id": "51", + "id": "52", "metadata": {}, "outputs": [], "source": [ @@ -794,7 +804,7 @@ }, { "cell_type": "markdown", - "id": "52", + "id": "53", "metadata": {}, "source": [ "#### ROC AUC" @@ -803,7 +813,7 @@ { "cell_type": "code", "execution_count": null, - "id": "53", + "id": "54", "metadata": {}, "outputs": [], "source": [ @@ -812,7 +822,7 @@ }, { "cell_type": "markdown", - "id": "54", + "id": "55", "metadata": {}, "source": [ "#### KS Gain" @@ -821,7 +831,7 @@ { "cell_type": "code", "execution_count": null, - "id": "55", + "id": "56", "metadata": {}, "outputs": [], "source": [ @@ -833,7 +843,7 @@ { "cell_type": "code", "execution_count": null, - "id": "56", + "id": "57", "metadata": {}, "outputs": [], "source": [ @@ -843,7 +853,7 @@ { "cell_type": "code", "execution_count": null, - "id": "57", + "id": "58", "metadata": {}, "outputs": [], "source": [] @@ -851,7 +861,7 @@ { "cell_type": "code", "execution_count": null, - "id": "58", + "id": "59", "metadata": {}, "outputs": [], "source": [] diff --git a/src/classification-multiclass.ipynb b/src/classification-multiclass.ipynb index 3873719..c66f3f1 100644 --- a/src/classification-multiclass.ipynb +++ b/src/classification-multiclass.ipynb @@ -58,7 +58,7 @@ " plot_confusion_matrix,\n", " plot_target_rate,\n", " compute_multiclass_classification_metrics,\n", - " build_logit_coefficients_table,\n", + " build_coefficients_table,\n", " plot_coefficients_values,\n", " plot_coefficients_significance,\n", " plot_eval_metrics_xgb,\n", @@ -85,7 +85,7 @@ "logger = logging.getLogger(__name__)\n", "\n", "pd.set_option(\"display.max_columns\", None)\n", - "pd.options.display.float_format = \"{:.2f}\".format\n", + "pd.options.display.float_format = \"{:,.2f}\".format\n", "\n", "mpl.rcParams[\"font.sans-serif\"] = \"Arial\"\n", "plt.set_loglevel(\"WARNING\")\n", @@ -104,10 +104,10 @@ "source": [ "## 1. Load Data\n", "\n", - "In this notebook we will use the Fetal Health Dataset.\n", + "In this notebook, we will use the **Fetal Health Dataset**. This dataset comprises 2126 records of features from Cardiotocogram exams, classified by experts into Normal, Suspect, and Pathological to assess fetal health and help reduce child and maternal mortality.\n", "\n", "Sources:\n", - "1. Fetal Health Classification Dataset: https://www.kaggle.com/datasets/andrewmvd/fetal-health-classification\n", + "1. Kaggle: https://www.kaggle.com/datasets/andrewmvd/fetal-health-classification\n", "2. Original article: https://www.ncbi.nlm.nih.gov/pmc/articles/PMC68223152" ] }, @@ -303,7 +303,7 @@ " df_input=df_input_train, # use only training data to avoid bias in test results\n", " class_col=target_col,\n", " class_mapping=target_classes_dict,\n", - " plots_per_line=6,\n", + " plots_per_line=5,\n", " title=\"Features in input dataset\",\n", " )\n", ")" @@ -350,22 +350,20 @@ "fs_steps = {\n", " \"manual\": {\n", " \"cols_to_exclude\": [\n", - " \"percentage_of_time_with_abnormal_long_term_variability\",\n", - " \"prolongued_decelerations\",\n", - " # \"histogram_variance\",\n", + " \"severe_decelerations\",\n", " ]\n", " },\n", " \"null_variance\": None,\n", - " \"correlation\": {\"threshold\": 0.8},\n", + " \"correlation\": {\"threshold\": 0.75},\n", " \"vif\": {\"threshold\": 2},\n", " \"l1_regularization\": {\n", - " \"problem\": \"classification\",\n", + " \"problem_type\": \"classification\",\n", " \"train_test_split_params\": {\"test_size\": test_size},\n", " \"logspace_search\": {\"start\": -5, \"stop\": 1, \"num\": 20, \"base\": 10},\n", " # tolerance over minimum error with which to search for the best model\n", - " \"error_tolerance_pct\": 0.02,\n", + " \"error_tolerance_pct\": 0.05,\n", " # minimum features to keep in final selection\n", - " \"min_feats_to_keep\": 4,\n", + " \"min_feats_to_keep\": 3,\n", " \"random_seed\": RANDOM_SEED,\n", " },\n", "}" @@ -514,7 +512,7 @@ "source": [ "### Hyperparameter tuning with K-Fold Cross Validation\n", "\n", - "For detailed explanation on XGBoost's parameters: https://www.kaggle.com/code/prashant111/a-guide-on-xgboost-hyperparameters-tuning/notebook" + "For a detailed explanation of XGBoost's parameters, refer to: https://www.kaggle.com/code/prashant111/a-guide-on-xgboost-hyperparameters-tuning/notebook" ] }, { @@ -542,9 +540,9 @@ " \"max_depth\": [3, 4, 6],\n", " \"min_child_weight\": [2, 4],\n", " \"gamma\": [0, 0.5],\n", - " \"alpha\":[0, 0.3],\n", + " \"alpha\": [0, 0.3],\n", " \"scale_pos_weight\": [1],\n", - " \"lambda\":[1],\n", + " \"lambda\": [1],\n", " ## \"subsample\": [0.8, 1.0],\n", " ## \"colsample_bytree\": [0.8, 1.0],\n", " \"verbosity\": [0],\n", @@ -553,10 +551,18 @@ " raise model_selection_error" ] }, + { + "cell_type": "markdown", + "id": "35", + "metadata": {}, + "source": [ + "For the full list of scikit-learn's scoring string names, refer to: https://scikit-learn.org/stable/modules/model_evaluation.html#string-name-scorers" + ] + }, { "cell_type": "code", "execution_count": null, - "id": "35", + "id": "36", "metadata": {}, "outputs": [], "source": [ @@ -579,7 +585,7 @@ { "cell_type": "code", "execution_count": null, - "id": "36", + "id": "37", "metadata": {}, "outputs": [], "source": [ @@ -603,7 +609,7 @@ { "cell_type": "code", "execution_count": null, - "id": "37", + "id": "38", "metadata": {}, "outputs": [], "source": [ @@ -618,7 +624,7 @@ }, { "cell_type": "markdown", - "id": "38", + "id": "39", "metadata": {}, "source": [ "### Final Model" @@ -627,7 +633,7 @@ { "cell_type": "code", "execution_count": null, - "id": "39", + "id": "40", "metadata": {}, "outputs": [], "source": [ @@ -655,7 +661,7 @@ { "cell_type": "code", "execution_count": null, - "id": "40", + "id": "41", "metadata": {}, "outputs": [], "source": [ @@ -680,7 +686,7 @@ { "cell_type": "code", "execution_count": null, - "id": "41", + "id": "42", "metadata": {}, "outputs": [], "source": [ @@ -690,7 +696,7 @@ }, { "cell_type": "markdown", - "id": "42", + "id": "43", "metadata": {}, "source": [ "**Plot target rate per group of predicted probability**\n", @@ -701,7 +707,7 @@ { "cell_type": "code", "execution_count": null, - "id": "43", + "id": "44", "metadata": {}, "outputs": [], "source": [ @@ -714,7 +720,7 @@ }, { "cell_type": "markdown", - "id": "44", + "id": "45", "metadata": {}, "source": [ "### Feature Importance\n", @@ -726,7 +732,7 @@ { "cell_type": "code", "execution_count": null, - "id": "45", + "id": "46", "metadata": {}, "outputs": [], "source": [ @@ -736,11 +742,13 @@ " ):\n", " label = f\"Class '{target_classes_dict[clss]}'\"\n", " print(label)\n", - " df_coefficients = build_logit_coefficients_table(\n", + " df_coefficients = build_coefficients_table(\n", " coefficients=coefficients,\n", " intercept=intercept,\n", " X_train=X_train,\n", - " y_pred_proba_train=y_pred_proba_train,\n", + " y_pred_train=y_pred_proba_train[clss],\n", + " y_train=y_train_ohe[clss],\n", + " problem_type=\"classification\",\n", " )\n", " display(\n", " plot_coefficients_values(\n", @@ -780,7 +788,7 @@ }, { "cell_type": "markdown", - "id": "46", + "id": "47", "metadata": {}, "source": [ "### Performance Metrics" @@ -789,7 +797,7 @@ { "cell_type": "code", "execution_count": null, - "id": "47", + "id": "48", "metadata": {}, "outputs": [], "source": [ @@ -807,7 +815,7 @@ }, { "cell_type": "markdown", - "id": "48", + "id": "49", "metadata": {}, "source": [ "#### Confusion Matrix" @@ -816,7 +824,7 @@ { "cell_type": "code", "execution_count": null, - "id": "49", + "id": "50", "metadata": {}, "outputs": [], "source": [ @@ -834,7 +842,7 @@ }, { "cell_type": "markdown", - "id": "50", + "id": "51", "metadata": {}, "source": [ "#### ROC AUC" @@ -843,7 +851,7 @@ { "cell_type": "code", "execution_count": null, - "id": "51", + "id": "52", "metadata": {}, "outputs": [], "source": [ @@ -859,7 +867,7 @@ { "cell_type": "code", "execution_count": null, - "id": "52", + "id": "53", "metadata": {}, "outputs": [], "source": [] @@ -867,7 +875,7 @@ { "cell_type": "code", "execution_count": null, - "id": "53", + "id": "54", "metadata": {}, "outputs": [], "source": [] diff --git a/src/regression.ipynb b/src/regression.ipynb new file mode 100644 index 0000000..aa2266d --- /dev/null +++ b/src/regression.ipynb @@ -0,0 +1,744 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "0", + "metadata": { + "tags": [] + }, + "source": [ + "# Regression" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "import os\n", + "import logging\n", + "import warnings\n", + "import numpy as np\n", + "import pandas as pd\n", + "import seaborn as sns\n", + "import matplotlib as mpl\n", + "import matplotlib.pyplot as plt\n", + "import shap\n", + "\n", + "from sklearn.preprocessing import StandardScaler\n", + "from sklearn.linear_model import ElasticNet\n", + "from sklearn.model_selection import train_test_split, RepeatedKFold, GridSearchCV\n", + "from statsmodels.stats.outliers_influence import variance_inflation_factor\n", + "from xgboost import XGBRegressor" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "from utils.constants import RANDOM_SEED\n", + "from utils.common import (\n", + " get_data_folder_path,\n", + " set_plotting_config,\n", + " plot_boxplot_by_class,\n", + " plot_correlation_matrix,\n", + ")\n", + "from utils.evals import (\n", + " describe_input_features,\n", + " compute_regression_metrics,\n", + " build_coefficients_table,\n", + " plot_coefficients_values,\n", + " plot_coefficients_significance,\n", + " plot_eval_metrics_xgb,\n", + " plot_gain_metric_xgb,\n", + " plot_shap_importance,\n", + " plot_shap_beeswarm,\n", + ")\n", + "from utils.feature_selection import run_feature_selection_steps" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3", + "metadata": {}, + "outputs": [], + "source": [ + "logging.basicConfig(\n", + " level=logging.INFO,\n", + " format=\"%(asctime)s [%(levelname)s] %(message)s\",\n", + " datefmt=\"%H:%M:%S\",\n", + ")\n", + "logger = logging.getLogger(__name__)\n", + "\n", + "pd.set_option(\"display.max_columns\", None)\n", + "pd.options.display.float_format = \"{:,.2f}\".format\n", + "\n", + "mpl.rcParams[\"font.sans-serif\"] = \"Arial\"\n", + "plt.set_loglevel(\"WARNING\")\n", + "\n", + "# plots configuration\n", + "sns.set_style(\"darkgrid\")\n", + "sns.set_palette(\"colorblind\")\n", + "set_plotting_config()\n", + "%matplotlib inline" + ] + }, + { + "cell_type": "markdown", + "id": "4", + "metadata": {}, + "source": [ + "## 1. Load Data\n", + "\n", + "In this notebook, we will use the **Medical Insurance Payout Dataset**. This dataset contains historical data for over 1300 insurance customers (age, sex, BMI, number of children, smoking habits, and region) along with their actual medical charges. i.e., the expenditure for the customer (target variable).\n", + "\n", + "Sources:\n", + "1. Kaggle: https://www.kaggle.com/datasets/harshsingh2209/medical-insurance-payout\n", + "2. Original source: https://raw.githubusercontent.com/JovianML/opendatasets/master/data/medical-charges.csv" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5", + "metadata": {}, + "outputs": [], + "source": [ + "data_path = get_data_folder_path()\n", + "\n", + "df_input = pd.read_csv(os.path.join(data_path, \"expenses.csv\"))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6", + "metadata": {}, + "outputs": [], + "source": [ + "# convert categorical columns into numerical\n", + "df_input[\"is_male\"] = (df_input[\"sex\"] == \"male\").astype(np.int8)\n", + "df_input[\"is_smoker\"] = (df_input[\"smoker\"] == \"yes\").astype(np.int8)\n", + "df_input = (\n", + " pd.concat([\n", + " df_input.drop(columns=[\"sex\", \"smoker\", \"region\"]),\n", + " pd.get_dummies(df_input[\"region\"], prefix=\"region\", dtype=np.int8)\n", + " ], axis=1)\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "7", + "metadata": { + "tags": [] + }, + "source": [ + "## 2. Process Data" + ] + }, + { + "cell_type": "markdown", + "id": "8", + "metadata": {}, + "source": [ + "### Target column\n", + "\n", + "The target column is the medical charges for each customer. We want to build a model capable of predicting medical charges for new customers in order to help the insurance company to determine their pricing strategy." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9", + "metadata": {}, + "outputs": [], + "source": [ + "target_col = \"charges\"\n", + "test_size = 0.20" + ] + }, + { + "cell_type": "markdown", + "id": "10", + "metadata": {}, + "source": [ + "### Train test split" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "11", + "metadata": {}, + "outputs": [], + "source": [ + "df_input_train, df_input_test = train_test_split(\n", + " df_input,\n", + " test_size=test_size,\n", + " random_state=RANDOM_SEED,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "12", + "metadata": {}, + "outputs": [], + "source": [ + "describe_input_features(df_input, df_input_train, df_input_test)" + ] + }, + { + "cell_type": "markdown", + "id": "13", + "metadata": {}, + "source": [ + "### Scaling (Standardization)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "14", + "metadata": {}, + "outputs": [], + "source": [ + "# Standardize training and test data\n", + "stdscaler = StandardScaler()\n", + "\n", + "# training data\n", + "y_train = df_input_train[target_col]\n", + "X_train_all = (\n", + " pd.DataFrame(\n", + " # fit scaler on training data (and then transform training data)\n", + " data=stdscaler.fit_transform(df_input_train),\n", + " columns=df_input_train.columns,\n", + " index=df_input_train.index\n", + " )\n", + " # remove target from the model input features table\n", + " .drop(columns=[target_col])\n", + ")\n", + "\n", + "# test data\n", + "y_test = df_input_test[target_col]\n", + "X_test_all = (\n", + " pd.DataFrame(\n", + " # use scaler fitted on training data to transform test data\n", + " data=stdscaler.transform(df_input_test),\n", + " columns=df_input_test.columns,\n", + " index=df_input_test.index\n", + " )\n", + " # remove target from the model input features table\n", + " .drop(columns=[target_col])\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "15", + "metadata": {}, + "source": [ + "## 3. Exploratory Data Analysis (EDA)" + ] + }, + { + "cell_type": "markdown", + "id": "16", + "metadata": {}, + "source": [ + "### Boxplots by Target Class" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "17", + "metadata": {}, + "outputs": [], + "source": [ + "# use only training data to avoid bias in test results\n", + "df_boxplot = df_input_train.copy()\n", + "\n", + "# get target quartiles\n", + "df_boxplot[\"charges_quartiles\"] = pd.qcut(\n", + " df_boxplot[\"charges\"], q=4, labels=[f\"Q{i}\" for i in range(1, 5)],\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "18", + "metadata": {}, + "outputs": [], + "source": [ + "display(\n", + " plot_boxplot_by_class(\n", + " df_input=df_boxplot,\n", + " class_col=\"charges_quartiles\",\n", + " plots_per_line=4,\n", + " title=\"Features in input dataset by medical charges quartiles\",\n", + " )\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "19", + "metadata": {}, + "source": [ + "### Pearson's Correlation" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "20", + "metadata": {}, + "outputs": [], + "source": [ + "display(\n", + " plot_correlation_matrix(\n", + " # use only training data to avoid bias in test results\n", + " df=df_input_train, method=\"pearson\", fig_height=10\n", + " )\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "21", + "metadata": {}, + "source": [ + "## 4. Feature Selection" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "22", + "metadata": {}, + "outputs": [], + "source": [ + "fs_steps = {\n", + " \"manual\": {\n", + " \"cols_to_exclude\": []\n", + " },\n", + " \"null_variance\": None,\n", + " \"correlation\": {\"threshold\": 0.8},\n", + " \"vif\": {\"threshold\": 2},\n", + " \"l1_regularization\": {\n", + " \"problem_type\": \"regression\",\n", + " \"train_test_split_params\": {\"test_size\": test_size},\n", + " \"logspace_search\": {\"start\": -3, \"stop\": 3, \"num\": 20, \"base\": 10},\n", + " # tolerance over minimum error with which to search for the best model\n", + " \"error_tolerance_pct\": 0.05,\n", + " # minimum features to keep in final selection\n", + " \"min_feats_to_keep\": 4,\n", + " \"random_seed\": RANDOM_SEED,\n", + " },\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "23", + "metadata": {}, + "outputs": [], + "source": [ + "selected_feats, df_fs = run_feature_selection_steps(\n", + " # use only training data to avoid bias in test results\n", + " X=X_train_all,\n", + " y=y_train,\n", + " fs_steps=fs_steps\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "24", + "metadata": {}, + "outputs": [], + "source": [ + "# build model input datasets\n", + "X_train = X_train_all[selected_feats]\n", + "X_test = X_test_all[selected_feats]" + ] + }, + { + "cell_type": "markdown", + "id": "25", + "metadata": {}, + "source": [ + "### Correlation check\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "26", + "metadata": {}, + "outputs": [], + "source": [ + "display(\n", + " plot_correlation_matrix(\n", + " # use only training data to avoid bias in test results\n", + " df=df_input_train[selected_feats + [target_col]], method=\"pearson\", fig_height=5\n", + " )\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "27", + "metadata": {}, + "source": [ + "### Multicollinearity check" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "28", + "metadata": {}, + "outputs": [], + "source": [ + "# compute the Variance Inflation Factor (VIF) for each feature\n", + "df_vif = pd.DataFrame(\n", + " data=[variance_inflation_factor(X_train.values, i) for i in range(len(selected_feats))],\n", + " index=selected_feats,\n", + " columns=[\"VIF\"]\n", + ").sort_values(\"VIF\", ascending=False)\n", + "\n", + "df_vif" + ] + }, + { + "cell_type": "markdown", + "id": "29", + "metadata": {}, + "source": [ + "## 5. Regression Model" + ] + }, + { + "cell_type": "markdown", + "id": "30", + "metadata": {}, + "source": [ + "### Select regressor: Linear Regression or XGBoost" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "31", + "metadata": {}, + "outputs": [], + "source": [ + "MODEL_SELECTION = \"linear_regression\"\n", + "# MODEL_SELECTION = \"xgboost\"\n", + "\n", + "model_selection_error = ValueError(\n", + " \"'MODEL_SELECTION' must be either 'linear_regression' or 'xgboost'. \"\n", + " f\"Got {MODEL_SELECTION} instead.\"\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "32", + "metadata": {}, + "source": [ + "### Hyperparameter tuning with K-Fold Cross Validation\n", + "\n", + "For a detailed explanation of XGBoost's parameters, refer to: https://www.kaggle.com/code/prashant111/a-guide-on-xgboost-hyperparameters-tuning/notebook" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "33", + "metadata": {}, + "outputs": [], + "source": [ + "if MODEL_SELECTION == \"linear_regression\":\n", + " # ElasticNet is a linear regression model with combined L1 (Lasso)\n", + " # and L2 (Ridge) priors as regularizer\n", + " Estimator = ElasticNet\n", + " cv_search_space = {\n", + " \"alpha\": np.logspace(-4, 1, num=11, base=10.0), # 10e-4 to 10 in 11 steps\n", + " \"l1_ratio\": np.linspace(0,1,9), # 0%, 12.5%, 25%, ... 100%\n", + " \"fit_intercept\": [True],\n", + " \"max_iter\": [2000], # use 2000 instead of defalult 1000\n", + " }\n", + "elif MODEL_SELECTION == \"xgboost\":\n", + " Estimator = XGBRegressor\n", + " cv_search_space = {\n", + " \"objective\": [\"reg:squarederror\"],\n", + " \"n_estimators\": [20, 35, 50],\n", + " \"learning_rate\": [0.1],\n", + " \"max_depth\": [3, 4, 6],\n", + " \"min_child_weight\": [2, 4],\n", + " \"gamma\": [0, 0.5],\n", + " \"alpha\": [0, 0.3],\n", + " \"scale_pos_weight\": [1],\n", + " \"lambda\": [1],\n", + " ## \"subsample\": [0.8, 1.0],\n", + " ## \"colsample_bytree\": [0.8, 1.0],\n", + " \"verbosity\": [0],\n", + " }\n", + "else:\n", + " raise model_selection_error" + ] + }, + { + "cell_type": "markdown", + "id": "34", + "metadata": {}, + "source": [ + "For the full list of scikit-learn's scoring string names, refer to: https://scikit-learn.org/stable/modules/model_evaluation.html#string-name-scorers" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "35", + "metadata": {}, + "outputs": [], + "source": [ + "cv_scoring_metrics = {\n", + " \"neg_mean_absolute_error\": \"Mean Absolute Error\",\n", + " \"neg_median_absolute_error\": \"Median Absolute Error\",\n", + " \"neg_mean_squared_error\": \"Mean Squared Error\",\n", + " \"neg_root_mean_squared_error\": \"Root Mean Squared Error\",\n", + " \"max_error\": \"Maximum Residual Error\",\n", + " \"r2\": \"R-squared (Coefficient of Determination)\",\n", + "}\n", + "refit_metric = \"neg_root_mean_squared_error\" # metric to optimize for the final model" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "36", + "metadata": {}, + "outputs": [], + "source": [ + "%%time\n", + "# define evaluation\n", + "kfold_cv = RepeatedKFold(n_splits=3, n_repeats=1, random_state=RANDOM_SEED)\n", + "# define search\n", + "grid_search = GridSearchCV(\n", + " estimator=Estimator(),\n", + " param_grid=cv_search_space,\n", + " scoring=list(cv_scoring_metrics.keys()),\n", + " cv=kfold_cv,\n", + " refit=refit_metric,\n", + " verbose=1,\n", + ")\n", + "# execute search\n", + "with warnings.catch_warnings(action=\"ignore\"):\n", + " result_cv = grid_search.fit(X_train, y_train)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "37", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"Grid Search CV Best Model - Scoring Metrics:\")\n", + "for i, (metric_key, metric_name) in enumerate(cv_scoring_metrics.items(), start=1):\n", + " print(\n", + " f\" {str(i) + \".\":>2} {metric_name:.<42} \"\n", + " f\"{result_cv.cv_results_[f\"mean_test_{metric_key}\"][result_cv.best_index_]:+,.3f}\"\n", + " )\n", + "print(f\"\\nBest Hyperparameters: {result_cv.best_params_}\")" + ] + }, + { + "cell_type": "markdown", + "id": "38", + "metadata": {}, + "source": [ + "### Final Model" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "39", + "metadata": {}, + "outputs": [], + "source": [ + "# instantiate model with best hyperparameters and additional kwargs\n", + "if MODEL_SELECTION == \"linear_regression\":\n", + " model_kwargs = dict()\n", + " model_fit_kwargs = dict()\n", + "elif MODEL_SELECTION == \"xgboost\":\n", + " eval_metrics = dict(\n", + " rmse=\"Root Mean Squared Error\",\n", + " mae=\"Mean Absolute Error\",\n", + " mape=\"Mean Absolute Percentage Error\",\n", + " )\n", + " model_kwargs = dict(eval_metric=list(eval_metrics.keys()))\n", + " model_fit_kwargs = dict(\n", + " eval_set=[(X_train, y_train), (X_test, y_test)],\n", + " verbose=False\n", + " )\n", + "else:\n", + " raise model_selection_error\n", + " \n", + "model = Estimator(**result_cv.best_params_, **model_kwargs, random_state=RANDOM_SEED)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "40", + "metadata": {}, + "outputs": [], + "source": [ + "# Fit model and make predictions\n", + "model.fit(X_train, y_train, **model_fit_kwargs)\n", + "# Make predictions\n", + "y_pred_train = pd.Series(data=model.predict(X_train), index=X_train.index)\n", + "y_pred = pd.Series(data=model.predict(X_test), index=X_test.index)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "41", + "metadata": {}, + "outputs": [], + "source": [ + "if MODEL_SELECTION == \"xgboost\":\n", + " display(plot_eval_metrics_xgb(model.evals_result(), eval_metrics))" + ] + }, + { + "cell_type": "markdown", + "id": "42", + "metadata": {}, + "source": [ + "### Feature Importance\n", + "\n", + "- For Linear Regression: coefficients values and statistical significance\n", + "- For XGBoost: SHAP analysis and Gain Metric" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "43", + "metadata": {}, + "outputs": [], + "source": [ + "if MODEL_SELECTION == \"linear_regression\":\n", + " df_coefficients = build_coefficients_table(\n", + " coefficients=model.coef_,\n", + " intercept=model.intercept_,\n", + " X_train=X_train,\n", + " y_pred_train=y_pred_train,\n", + " y_train=y_train,\n", + " problem_type=\"regression\",\n", + " )\n", + " display(plot_coefficients_values(df_coefficients))\n", + " display(plot_coefficients_significance(df_coefficients, log_scale=False))\n", + " \n", + "elif MODEL_SELECTION == \"xgboost\":\n", + " # compute SHAP values\n", + " explainer = shap.Explainer(model)\n", + " shap_values = explainer(X_test)\n", + " # shap plots\n", + " display(plot_shap_importance(shap_values))\n", + " display(plot_shap_beeswarm(shap_values))\n", + " display(plot_gain_metric_xgb(model, X_test))\n", + "\n", + "else:\n", + " raise model_selection_error" + ] + }, + { + "cell_type": "markdown", + "id": "44", + "metadata": {}, + "source": [ + "### Performance Metrics" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "45", + "metadata": {}, + "outputs": [], + "source": [ + "df_train_metrics = pd.Series(\n", + " compute_regression_metrics(y_train, y_pred_train)\n", + ").to_frame(name=\"Train Metrics\")\n", + "df_test_metrics = pd.Series(\n", + " compute_regression_metrics(y_test, y_pred)\n", + ").to_frame(name=\"Test Metrics\")\n", + "\n", + "print(\"Final Model - Scoring Metrics on Train & Test Datasets:\")\n", + "df_metrics = df_train_metrics.join(df_test_metrics)\n", + "display(df_metrics)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "46", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "47", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "ds", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.8" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/src/utils/constants.py b/src/utils/constants.py index d71ca63..45cf5d8 100644 --- a/src/utils/constants.py +++ b/src/utils/constants.py @@ -10,4 +10,4 @@ BIG_FONTSIZE = 17 # plots resolution -FIGURE_DPI = 150 +FIGURE_DPI = 100 diff --git a/src/utils/evals.py b/src/utils/evals.py index 5cc8f4e..3fbf885 100644 --- a/src/utils/evals.py +++ b/src/utils/evals.py @@ -19,6 +19,12 @@ recall_score, f1_score, confusion_matrix, + mean_absolute_error, + median_absolute_error, + mean_squared_error, + root_mean_squared_error, + max_error, + r2_score, ) from .common import convert_to_integer @@ -213,15 +219,29 @@ def compute_multiclass_classification_metrics( return metrics_dict -def _compute_logit_stderror_pvalues( +def compute_regression_metrics(y_true: pd.Series, y_pred: pd.Series) -> dict[str, float]: + metrics_dict = dict() + + metrics_dict["Mean Absolute Error"] = mean_absolute_error(y_true=y_true, y_pred=y_pred) + metrics_dict["Median Absolute Error"] = median_absolute_error(y_true=y_true, y_pred=y_pred) + metrics_dict["Mean Squared Error"] = mean_squared_error(y_true=y_true, y_pred=y_pred) + metrics_dict["Root Mean Squared Error"] = root_mean_squared_error(y_true=y_true, y_pred=y_pred) + metrics_dict["Maximum Residual Error"] = max_error(y_true=y_true, y_pred=y_pred) + metrics_dict["R-squared (Coefficient of Determination)"] = r2_score( + y_true=y_true, y_pred=y_pred + ) + + return metrics_dict + + +def _compute_classifier_stderror_pvalues( coefficients: np.ndarray, intercept: float, X_train: pd.DataFrame, y_pred_proba_train: pd.Series, ) -> tuple[np.ndarray, np.ndarray]: - """Calculate z-scores for a Logistic Regression and returns the - standard errors and p-values for each of the model's coefficients. - Uses asymtotic approximation for maximum likelihood estimates. + """Computes standard errors and p-values for the coefficients of a + binary classifier logistic regression model. Source: https://stackoverflow.com/a/47079198 """ @@ -235,27 +255,73 @@ def _compute_logit_stderror_pvalues( for i in range(n): ans = ans + np.dot(np.transpose(x_full[i, :]), x_full[i, :]) * p[i, 1] * p[i, 0] vcov = np.linalg.inv(np.matrix(ans)) - se = np.sqrt(np.diag(vcov)) - t = coefs / se + stderr = np.sqrt(np.diag(vcov)) + t = coefs / stderr p_values = (1 - scipy.stats.norm.cdf(abs(t))) * 2 - return se[1:], p_values[1:] # [1:] to skip the added constant + return stderr[1:], p_values[1:] # [1:] to skip the intercept -def build_logit_coefficients_table( +def _compute_regression_stderror_pvalues( coefficients: np.ndarray, intercept: float, + y_pred_train: pd.Series, + y_train: pd.Series, X_train: pd.DataFrame, - y_pred_proba_train: pd.Series, +) -> tuple[np.ndarray, np.ndarray]: + """Computes standard errors and p-values for the coefficients of a + linear regression model. + + Source: https://stackoverflow.com/a/42677750 + """ + params = np.append(intercept, coefficients) + + x_full = np.append(np.ones((len(X_train), 1)), X_train, axis=1) + mse = (np.sum((y_train - y_pred_train) ** 2)) / (len(x_full) - len(x_full[0])) + + vcov = mse * (np.linalg.inv(np.dot(x_full.T, x_full)).diagonal()) + stderr = np.sqrt(vcov) + t_values = params / stderr + + p_values = np.array( + [2 * (1 - scipy.stats.t.cdf(np.abs(i), (len(x_full) - len(x_full[0])))) for i in t_values] + ) + + return stderr[1:], p_values[1:] # [1:] to skip the intercept + + +def build_coefficients_table( + coefficients: np.ndarray, + intercept: float, + X_train: pd.DataFrame, + y_pred_train: pd.Series, + y_train: pd.Series, + problem_type: str, ) -> pd.DataFrame: # compute coefficients' Standard Error and p-values - stderr, pvalues = _compute_logit_stderror_pvalues( - coefficients=coefficients, - intercept=intercept, - X_train=X_train, - y_pred_proba_train=y_pred_proba_train, - ) + match problem_type.lower().strip(): + case "classification": + stderr, pvalues = _compute_classifier_stderror_pvalues( + coefficients=coefficients, + intercept=intercept, + X_train=X_train, + y_pred_proba_train=y_pred_train, + ) + case "regression": + stderr, pvalues = _compute_regression_stderror_pvalues( + coefficients=coefficients, + intercept=intercept, + X_train=X_train, + y_pred_train=y_pred_train, + y_train=y_train, + ) + case _: + raise ValueError( + "Argument 'problem_type' must be either 'classification' or 'regression'. " + f"Got {problem_type} instead." + ) + df_coef = pd.DataFrame( data={ "Coefficients": coefficients, diff --git a/src/utils/feature_selection.py b/src/utils/feature_selection.py index ad8904f..8bb4ead 100644 --- a/src/utils/feature_selection.py +++ b/src/utils/feature_selection.py @@ -7,7 +7,6 @@ from sklearn.metrics import root_mean_squared_error, f1_score from sklearn.linear_model import Lasso from sklearn.svm import LinearSVC -from sklearn.preprocessing import StandardScaler from sklearn.model_selection import train_test_split logger = logging.getLogger(__name__) @@ -20,7 +19,7 @@ def _remove_features_with_l1_regularization( ) -> list[str]: # carregar parametros - problem = l1_params["problem"] + problem_type = l1_params["problem_type"] train_test_split_params = l1_params["train_test_split_params"] logspace_search = l1_params["logspace_search"] error_tolerance_pct = l1_params["error_tolerance_pct"] @@ -35,49 +34,39 @@ def _remove_features_with_l1_regularization( random_state=random_seed, ) - # Standardize X_train - stdscaler = StandardScaler() - X_train_std = pd.DataFrame( - stdscaler.fit_transform(X_train), columns=X.columns, index=X_train.index - ) - X_test_std = pd.DataFrame(stdscaler.transform(X_test), columns=X.columns, index=X_test.index) - # define search space logspace_values = np.logspace(**logspace_search) coef_lst = [] metrics_dict = dict() # define L1-based linear model ad its evaluation metric - match problem.lower().strip(): + match problem_type.lower().strip(): case "classification": LinearModel = LinearSVC model_params = dict(penalty="l1") search_arg = "C" eval_metric_fn = f1_score - eval_metric_params = dict(average="weighted") eval_metric_greater_is_better = True case "regression": LinearModel = Lasso model_params = dict() search_arg = "alpha" eval_metric_fn = root_mean_squared_error - eval_metric_params = dict() eval_metric_greater_is_better = False case _: raise ValueError( - "Argument 'problem' must be either 'classification' or 'regression'. " - f"Got {problem} instead." + "Argument 'problem_type' must be either 'classification' or 'regression'. " + f"Got {problem_type} instead." ) for i, search_val in enumerate(logspace_values, start=1): # Fit model and make predictions model_params[search_arg] = search_val model = LinearModel(**model_params, random_state=random_seed) - model.fit(X_train_std, y_train) - y_pred = model.predict(X_test_std) - eval_metric = eval_metric_fn(y_test, y_pred, **eval_metric_params) - - s_coef = pd.Series(data=np.mean(model.coef_, axis=0), index=model.feature_names_in_, name=i) + model.fit(X_train, y_train) + y_pred = model.predict(X_test) + eval_metric = eval_metric_fn(y_test, y_pred) + s_coef = pd.Series(data=model.coef_.flatten(), index=model.feature_names_in_, name=i) coef_lst.append(s_coef) metrics_dict[i] = dict( search_val=search_val, n_zero_coefs=len(s_coef[s_coef == 0]), eval_metric=eval_metric From 1d95fcbf387af1c249caedc009ee20eea56678c4 Mon Sep 17 00:00:00 2001 From: Pedro Antonacio <30991781+antonacio@users.noreply.github.com> Date: Tue, 1 Apr 2025 19:08:57 -0300 Subject: [PATCH 07/10] feat: Histogram Analysis use case (#7) * add vscode folder to gitignore * update readme * update clustering to use regression data * wip histogram notebook * adjust kwargs names * histogram analysis use case * verified stderr computation function --- .gitignore | 3 + .vscode/settings.json | 5 - README.md | 2 +- src/clustering.ipynb | 73 ++++++---- src/histogram_analysis.ipynb | 263 +++++++++++++++++++++++++++++++++++ src/utils/clustering.py | 4 +- src/utils/common.py | 236 ++++++++++++++++++++++++++++++- src/utils/evals.py | 14 +- 8 files changed, 561 insertions(+), 39 deletions(-) delete mode 100644 .vscode/settings.json create mode 100755 src/histogram_analysis.ipynb diff --git a/.gitignore b/.gitignore index b6bd321..f70ec60 100644 --- a/.gitignore +++ b/.gitignore @@ -139,3 +139,6 @@ venv.bak/ # mypy .mypy_cache/ + +# VS Code +.vscode/ diff --git a/.vscode/settings.json b/.vscode/settings.json deleted file mode 100644 index 64c5a51..0000000 --- a/.vscode/settings.json +++ /dev/null @@ -1,5 +0,0 @@ -{ - "cSpell.words": [ - "pvalues" - ] -} diff --git a/README.md b/README.md index 4929c06..769695e 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,7 @@ This repository compiles simple and practical examples for common data science t 2. Multiclass Classification 3. Regression 4. Clustering -5. (WIP) Dimensionality Reduction +5. Histogram Analysis ## Setup diff --git a/src/clustering.ipynb b/src/clustering.ipynb index 5b7debd..8eb0ca9 100755 --- a/src/clustering.ipynb +++ b/src/clustering.ipynb @@ -17,6 +17,7 @@ "source": [ "import os\n", "import warnings\n", + "import numpy as np\n", "import pandas as pd\n", "import seaborn as sns\n", "\n", @@ -58,7 +59,7 @@ "id": "4", "metadata": {}, "source": [ - "## Preprocessing" + "## 1. Preprocessing" ] }, { @@ -66,7 +67,13 @@ "id": "5", "metadata": {}, "source": [ - "### Load data" + "### Load data\n", + "\n", + "In this notebook, we will use the **Medical Insurance Payout Dataset**. This dataset contains historical data for over 1300 insurance customers (age, sex, BMI, number of children, smoking habits, and region) along with their actual medical charges. i.e., the expenditure for the customer.\n", + "\n", + "Sources:\n", + "1. Kaggle: https://www.kaggle.com/datasets/harshsingh2209/medical-insurance-payout\n", + "2. Original source: https://raw.githubusercontent.com/JovianML/opendatasets/master/data/medical-charges.csv" ] }, { @@ -78,8 +85,7 @@ "source": [ "data_path = get_data_folder_path()\n", "\n", - "df_input = pd.read_csv(os.path.join(data_path, 'fetal_health.csv'))\n", - "df_input.columns = [col.replace(' ', '_') for col in df_input.columns]" + "df_input = pd.read_csv(os.path.join(data_path, 'expenses.csv'))" ] }, { @@ -88,22 +94,39 @@ "id": "7", "metadata": {}, "outputs": [], + "source": [ + "# convert categorical columns into numerical\n", + "df_input[\"is_male\"] = (df_input[\"sex\"] == \"male\").astype(np.int8)\n", + "df_input[\"is_smoker\"] = (df_input[\"smoker\"] == \"yes\").astype(np.int8)\n", + "df_input = (\n", + " pd.concat([\n", + " df_input.drop(columns=[\"sex\", \"smoker\", \"region\"]),\n", + " pd.get_dummies(df_input[\"region\"], prefix=\"region\", dtype=np.int8)\n", + " ], axis=1)\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8", + "metadata": {}, + "outputs": [], "source": [ "# define columns for clustering\n", "cluster_cols = [\n", " col for col in df_input.columns\n", " # remove the target column to simulate an unsupervised problem\n", - " if col != \"fetal_health\"\n", - " # remove all histogram-derived and variability features to simplify the clustering process\n", - " and not col.startswith(\"histogram_\")\n", - " and not col.endswith(\"_variability\")\n", + " if col != \"charges\"\n", + " # remove one-hot-encoded region columns to simplify the clustering process\n", + " and not col.startswith(\"region_\")\n", "]\n", "df_cl = df_input[cluster_cols]" ] }, { "cell_type": "markdown", - "id": "8", + "id": "9", "metadata": {}, "source": [ "### Scale data (if necessary)\n", @@ -114,7 +137,7 @@ { "cell_type": "code", "execution_count": null, - "id": "9", + "id": "10", "metadata": {}, "outputs": [], "source": [ @@ -125,17 +148,17 @@ }, { "cell_type": "markdown", - "id": "10", + "id": "11", "metadata": { "tags": [] }, "source": [ - "## K-means" + "## 2. K-means Clustering" ] }, { "cell_type": "markdown", - "id": "11", + "id": "12", "metadata": {}, "source": [ "### Find best number of clusters" @@ -144,7 +167,7 @@ { "cell_type": "code", "execution_count": null, - "id": "12", + "id": "13", "metadata": { "tags": [] }, @@ -155,7 +178,7 @@ }, { "cell_type": "markdown", - "id": "13", + "id": "14", "metadata": {}, "source": [ "Elbow Method implementation:\n", @@ -166,7 +189,7 @@ { "cell_type": "code", "execution_count": null, - "id": "14", + "id": "15", "metadata": {}, "outputs": [], "source": [ @@ -184,7 +207,7 @@ { "cell_type": "code", "execution_count": null, - "id": "15", + "id": "16", "metadata": {}, "outputs": [], "source": [ @@ -193,7 +216,7 @@ }, { "cell_type": "markdown", - "id": "16", + "id": "17", "metadata": {}, "source": [ "### Fit final model" @@ -202,7 +225,7 @@ { "cell_type": "code", "execution_count": null, - "id": "17", + "id": "18", "metadata": {}, "outputs": [], "source": [ @@ -214,7 +237,7 @@ { "cell_type": "code", "execution_count": null, - "id": "18", + "id": "19", "metadata": {}, "outputs": [], "source": [ @@ -229,7 +252,7 @@ }, { "cell_type": "markdown", - "id": "19", + "id": "20", "metadata": {}, "source": [ "### Describe clusters" @@ -238,7 +261,7 @@ { "cell_type": "code", "execution_count": null, - "id": "20", + "id": "21", "metadata": {}, "outputs": [], "source": [ @@ -255,7 +278,7 @@ { "cell_type": "code", "execution_count": null, - "id": "21", + "id": "22", "metadata": {}, "outputs": [], "source": [ @@ -272,7 +295,7 @@ { "cell_type": "code", "execution_count": null, - "id": "22", + "id": "23", "metadata": {}, "outputs": [], "source": [] @@ -280,7 +303,7 @@ { "cell_type": "code", "execution_count": null, - "id": "23", + "id": "24", "metadata": {}, "outputs": [], "source": [] diff --git a/src/histogram_analysis.ipynb b/src/histogram_analysis.ipynb new file mode 100755 index 0000000..b73e1f1 --- /dev/null +++ b/src/histogram_analysis.ipynb @@ -0,0 +1,263 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "0", + "metadata": {}, + "source": [ + "# Histogram Analysis" + ] + }, + { + "cell_type": "markdown", + "id": "1", + "metadata": {}, + "source": [ + "- Histograms are one of the most powerful ways to visualize data because they provide a clear view of the distribution of values within a dataset.\n", + "- By grouping data into bins, histograms reveal the shape of the distribution, highlight central tendencies, identify potential outliers, and illustrate variability.\n", + "- This visual summary makes it easier to compare different sets of data and spot trends or anomalies that might not be apparent through summary statistics alone." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2", + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import numpy as np\n", + "import pandas as pd\n", + "import seaborn as sns\n", + "\n", + "pd.set_option(\"display.max_columns\", None)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3", + "metadata": {}, + "outputs": [], + "source": [ + "from utils.constants import RANDOM_SEED\n", + "from utils.common import (\n", + " get_data_folder_path,\n", + " set_plotting_config,\n", + " plot_histogram,\n", + " plot_comparison_histograms\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4", + "metadata": {}, + "outputs": [], + "source": [ + "# plots configuration\n", + "sns.set_style(\"darkgrid\")\n", + "sns.set_palette(\"colorblind\")\n", + "set_plotting_config()\n", + "%matplotlib inline" + ] + }, + { + "cell_type": "markdown", + "id": "5", + "metadata": {}, + "source": [ + "## 1. Load Data" + ] + }, + { + "cell_type": "markdown", + "id": "6", + "metadata": {}, + "source": [ + "In this notebook, we will use the **Medical Insurance Payout Dataset**. This dataset contains historical data for over 1300 insurance customers (age, sex, BMI, number of children, smoking habits, and region) along with their actual medical charges. i.e., the expenditure for the customer.\n", + "\n", + "Sources:\n", + "1. Kaggle: https://www.kaggle.com/datasets/harshsingh2209/medical-insurance-payout\n", + "2. Original source: https://raw.githubusercontent.com/JovianML/opendatasets/master/data/medical-charges.csv" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7", + "metadata": {}, + "outputs": [], + "source": [ + "data_path = get_data_folder_path()\n", + "\n", + "df_input = pd.read_csv(os.path.join(data_path, \"expenses.csv\"))" + ] + }, + { + "cell_type": "markdown", + "id": "8", + "metadata": {}, + "source": [ + "## 2. Plot Histograms" + ] + }, + { + "cell_type": "markdown", + "id": "9", + "metadata": {}, + "source": [ + "### Distribution of Medical Charges" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "10", + "metadata": {}, + "outputs": [], + "source": [ + "fig1 = plot_histogram(\n", + " title=\"Distribution of Medical Charges\",\n", + " histogram_title=f\"(n = {len(df_input)})\",\n", + " df=df_input,\n", + " plot_col=\"charges\",\n", + " display_name=\"Medical Charges\",\n", + " display_unit=\"USD\",\n", + " bin_size=2000,\n", + " linewidth=1.5,\n", + " show_legend=True,\n", + " show_percentage=True,\n", + " show_mean=True,\n", + " show_median=True,\n", + " show_zero_line=False,\n", + " show_kde=True,\n", + " figsize=(8, 6),\n", + ")\n", + "display(fig1)" + ] + }, + { + "cell_type": "markdown", + "id": "11", + "metadata": {}, + "source": [ + "### Distribution of Medical Charges by Smoking Status" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "12", + "metadata": {}, + "outputs": [], + "source": [ + "fig2 = plot_histogram(\n", + " title=\"Distribution of Medical Charges by Smoking Status\",\n", + " histogram_title=f\"(n = {len(df_input)})\",\n", + " df=df_input,\n", + " plot_col=\"charges\",\n", + " display_name=\"Medical Charges\",\n", + " display_unit=\"USD\",\n", + " stratify_col=\"smoker\",\n", + " bin_size=2000,\n", + " linewidth=1.5,\n", + " show_legend=True,\n", + " show_mean=True,\n", + " show_percentage=False,\n", + " show_median=True,\n", + " show_zero_line=False,\n", + " show_kde=False,\n", + ")\n", + "display(fig2)" + ] + }, + { + "cell_type": "markdown", + "id": "13", + "metadata": {}, + "source": [ + "### COVID impact on Medical Charges" + ] + }, + { + "cell_type": "markdown", + "id": "14", + "metadata": {}, + "source": [ + "Let's do a (hypothetical) simulation of the impact of an event such as COVID-19 on the distribution of medical charges. Let's assume that COVID caused the average medical charge to increase by ~10% for all insurance customers in the database" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "15", + "metadata": {}, + "outputs": [], + "source": [ + "# create a random normal distribution with mean 1.1 (+10%) and std 0.1\n", + "np.random.seed(RANDOM_SEED)\n", + "random_change = np.random.normal(loc=1.1, scale=0.1, size=len(df_input))\n", + "# apply the random change to the charges to simulate COVID's impact\n", + "df_input[\"charges_new\"] = df_input[\"charges\"] * random_change\n", + "# calculate the difference\n", + "df_input[\"charges_diff\"] = df_input[\"charges_new\"] - df_input[\"charges\"]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "16", + "metadata": {}, + "outputs": [], + "source": [ + "plot_comparison_histograms(\n", + " title=\"COVID-19 impact on Medical Charges\",\n", + " left_title=\"Distribution of Medical Charges Before and After COVID-19\",\n", + " right_title=\"Distribution of Medical Charges Differences\",\n", + " df=df_input,\n", + " plot_col_before=\"charges\",\n", + " plot_col_after=\"charges_new\",\n", + " plot_col_diff=\"charges_diff\",\n", + " display_name=\"Medical Charges\",\n", + " display_unit=\"USD\",\n", + " bin_size=2000,\n", + " show_percentage=True,\n", + " show_mean=True,\n", + " show_median=True,\n", + " figsize=(14, 6),\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "17", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "ds", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.8" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/src/utils/clustering.py b/src/utils/clustering.py index 292640e..afb7af8 100644 --- a/src/utils/clustering.py +++ b/src/utils/clustering.py @@ -22,7 +22,9 @@ def search_kmeans(df_cl: pd.DataFrame, max_n_clusters: int) -> pd.DataFrame: kmeans_dict["wcss"] = kmeans_model.inertia_ if i > 1: # save silhouette score - kmeans_dict["silhouette_score"] = silhouette_score(df_cl, kmeans_model.labels_) + kmeans_dict["silhouette_score"] = silhouette_score( + df_cl, kmeans_model.labels_, random_state=RANDOM_SEED + ) kmeans_search_lst.append(kmeans_dict) diff --git a/src/utils/common.py b/src/utils/common.py index 740a0aa..8423601 100644 --- a/src/utils/common.py +++ b/src/utils/common.py @@ -3,7 +3,8 @@ import pandas as pd import seaborn as sns import matplotlib as mpl -import matplotlib.pyplot as plt +from matplotlib import pyplot as plt +from matplotlib import ticker as mticker from .constants import REPO_NAME, SMALL_FONTSIZE, MEDIUM_FONTSIZE, BIG_FONTSIZE, FIGURE_DPI @@ -147,3 +148,236 @@ def plot_correlation_matrix( plt.close(fig) return fig + + +def _bring_last_n_items_to_front(lst: list, n: int) -> list: + return [*lst[-n:], *lst[:-n]] + + +def _build_histogram( + ax: plt.Axes, + df: pd.DataFrame, + plot_col: str, + display_name: str = None, + display_unit: str = None, + legend_label: str = None, + stratify_col: str = None, + bin_size: int = None, + linewidth: int = 1.5, + histogram_opacity: float = 0.75, + color: str = None, + show_legend: bool = True, + show_percentage: bool = False, + show_mean: bool = True, + show_median: bool = False, + show_zero_line: bool = False, + show_kde: bool = False, +) -> plt.Axes: + + display_name = display_name or plot_col + if stratify_col: + hist_label = None + else: + hist_label = legend_label or display_name + + ax = sns.histplot( + df, + x=plot_col, + kde=show_kde, + ax=ax, + bins=np.arange( + (df[plot_col].min() // bin_size) * bin_size, (df[plot_col].max() + bin_size), bin_size + ), + color=color, + alpha=histogram_opacity, + line_kws=dict(linewidth=linewidth), + label=hist_label, + stat="percent" if show_percentage else "count", + hue=stratify_col, + multiple="stack" if stratify_col else "layer", + zorder=1, + ) + + if show_percentage: + ax.yaxis.set_major_formatter(mticker.PercentFormatter(decimals=0)) + ax.set_ylabel("Percentage") + else: + ax.set_ylabel("Count") + # add thousands separator to x-axis + ax.xaxis.set_major_formatter(mticker.StrMethodFormatter("{x:,.0f}")) + ax.set_xlabel(display_name + (f" ({display_unit})" if display_unit else "")) + + if show_mean or show_median: + s = df[plot_col] + + if not color: + color_lst = sns.color_palette() + lines_color_idx = df[stratify_col].nunique() if stratify_col else 1 + lines_color = color_lst[lines_color_idx] + else: + lines_color = color + + def format_line_label( + name: str, + value: float, + legend_label: str = legend_label, + display_unit: str = display_unit, + ) -> str: + lines_label = f"{legend_label}: " if legend_label else "" + unit = f" {display_unit}" if display_unit else "" + return f"{lines_label}{name} ({value:,.0f}{unit})" + + if show_mean is True: + # Plot vertical line for mean + mean = s.mean() + ax.axvline( + mean, + linestyle="-", + linewidth=linewidth, + color=lines_color, + label=format_line_label("Mean", mean), + zorder=3, + ) + if show_median is True: + # Plot dotted vertical line for median + median = s.median() + ax.axvline( + median, + linestyle="--", + linewidth=linewidth, + color=lines_color, + label=format_line_label("Median", median), + zorder=4, + ) + + if show_zero_line is True: + # Plot vertical line in zero + ax.axvline(0, linestyle=":", linewidth=linewidth / 2, color="black", label=None, zorder=2) + + if show_legend is True: + # configure legend + legend = ax.get_legend() + handles, labels = ax.get_legend_handles_labels() + if legend: + ax.legend( + handles=list(legend.get_patches()) + handles, + labels=[ + f"{stratify_col.title()}: {txt.get_text().title()}" + for txt in legend.get_texts() + ] + + labels, + title=None, + fontsize=SMALL_FONTSIZE, + ) + else: + # bring last legend item (histogram) to the front + n = 1 + ax.legend( + handles=_bring_last_n_items_to_front(lst=handles, n=n), + labels=_bring_last_n_items_to_front(lst=labels, n=n), + title=None, + fontsize=SMALL_FONTSIZE, + ) + + return ax + + +def plot_histogram( + df: pd.DataFrame, + plot_col: str, + title: str, + histogram_title: str = None, + figsize: tuple = (8, 6), + **kwargs: dict, +) -> plt.Figure: + + fig, ax = plt.subplots(1, 1, figsize=figsize) + fig.suptitle(title) + if histogram_title: + ax.set_title(histogram_title) + + ax = _build_histogram(ax=ax, df=df, plot_col=plot_col, **kwargs) + + plt.close(fig) + + return fig + + +def plot_comparison_histograms( + title: str, + left_title: str, + right_title: str, + df: pd.DataFrame, + plot_col_before: str, + plot_col_after: str, + plot_col_diff: str, + display_name: str, + figsize: tuple = (14, 6), + **kwargs: dict, +) -> plt.Figure: + + fig, (ax1, ax2) = plt.subplots(1, 2, figsize=figsize) + fig.suptitle(title) + + # LEFT PLOT (before and after) + # before plot + before_plot_kwargs = kwargs.copy() + before_plot_kwargs.update( + dict( + ax=ax1, + df=df, + plot_col=plot_col_before, + histogram_opacity=0.5, + color="red", + legend_label="Before", + display_name=display_name, + show_legend=False, + ) + ) + _build_histogram(**before_plot_kwargs) + # after plot + after_plot_kwargs = kwargs.copy() + after_plot_kwargs.update( + dict( + ax=ax1, + df=df, + plot_col=plot_col_after, + histogram_opacity=0.5, + color="blue", + legend_label="After", + display_name=display_name, + show_legend=False, + ) + ) + _build_histogram(**after_plot_kwargs) + ax1.set_title(left_title) + # configure legend + handles, labels = ax1.get_legend_handles_labels() + # bring last 2 legend items (histogram) to the front + n = 2 + ax1.legend( + handles=_bring_last_n_items_to_front(lst=handles, n=n), + labels=_bring_last_n_items_to_front(lst=labels, n=n), + title=None, + fontsize=SMALL_FONTSIZE, + ) + + # RIGHT PLOT (difference) + _build_histogram( + ax=ax2, + df=df, + plot_col=plot_col_diff, + histogram_opacity=0.5, + color="green", + legend_label="Difference", + display_name=display_name, + show_zero_line=True, + show_legend=True, + **kwargs, + ) + ax2.set_title(right_title) + + plt.tight_layout() + plt.close(fig) + + return fig diff --git a/src/utils/evals.py b/src/utils/evals.py index 3fbf885..94a0209 100644 --- a/src/utils/evals.py +++ b/src/utils/evals.py @@ -245,7 +245,7 @@ def _compute_classifier_stderror_pvalues( Source: https://stackoverflow.com/a/47079198 """ - p = np.vstack([y_pred_proba_train.values, (1 - y_pred_proba_train.values)]).T + p = np.vstack([(1 - y_pred_proba_train.values), y_pred_proba_train.values]).T n = len(p) m = len(coefficients) + 1 coefs = np.concatenate([[intercept], coefficients]) @@ -259,7 +259,7 @@ def _compute_classifier_stderror_pvalues( t = coefs / stderr p_values = (1 - scipy.stats.norm.cdf(abs(t))) * 2 - return stderr[1:], p_values[1:] # [1:] to skip the intercept + return stderr, p_values def _compute_regression_stderror_pvalues( @@ -287,7 +287,7 @@ def _compute_regression_stderror_pvalues( [2 * (1 - scipy.stats.t.cdf(np.abs(i), (len(x_full) - len(x_full[0])))) for i in t_values] ) - return stderr[1:], p_values[1:] # [1:] to skip the intercept + return stderr, p_values def build_coefficients_table( @@ -302,14 +302,14 @@ def build_coefficients_table( # compute coefficients' Standard Error and p-values match problem_type.lower().strip(): case "classification": - stderr, pvalues = _compute_classifier_stderror_pvalues( + stderr, p_values = _compute_classifier_stderror_pvalues( coefficients=coefficients, intercept=intercept, X_train=X_train, y_pred_proba_train=y_pred_train, ) case "regression": - stderr, pvalues = _compute_regression_stderror_pvalues( + stderr, p_values = _compute_regression_stderror_pvalues( coefficients=coefficients, intercept=intercept, X_train=X_train, @@ -322,13 +322,15 @@ def build_coefficients_table( f"Got {problem_type} instead." ) + # skip the first value as it corresponds to the intercept and thus is not a coefficient + stderr, p_values = stderr[1:], p_values[1:] df_coef = pd.DataFrame( data={ "Coefficients": coefficients, "Absolute Coefficients": np.abs(coefficients), "Standard Error": stderr, "95% CI": stderr * 1.96, - "p-values": pvalues, + "p-values": p_values, }, index=X_train.columns.tolist(), ).sort_values(by="Absolute Coefficients", ascending=False) From f3f16a9786bace785a308deea0273133b8d8d290 Mon Sep 17 00:00:00 2001 From: Pedro Antonacio <30991781+antonacio@users.noreply.github.com> Date: Tue, 1 Apr 2025 21:30:11 -0300 Subject: [PATCH 08/10] Create LICENSE --- LICENSE | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100644 LICENSE diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..5df81ba --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2025 Pedro Orii Antonacio + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. From eb87c234df94b21c753cc3bcc3d9b7c426aad17d Mon Sep 17 00:00:00 2001 From: Pedro Antonacio <30991781+antonacio@users.noreply.github.com> Date: Wed, 2 Apr 2025 22:04:16 -0300 Subject: [PATCH 09/10] feat: final repo configuration (#8) * add uv, make commands * pre-commit install make command and update README * add readme instructions and data files * fixing figures resolution * wip make command to convert notebooks * fix xgboost version * standardize logging and warnings * removing tqdm dependincy * add rendered example HTMLs --- .gitignore | 3 + .pre-commit-config.yaml | 2 +- Makefile | 19 +- README.md | 34 +- data/expenses.csv.zip | Bin 0 -> 16023 bytes data/fetal_health.csv.zip | Bin 0 -> 44347 bytes examples/classification-binary.html | 9757 +++++++++++++++++++++ examples/classification-multiclass.html | 10036 ++++++++++++++++++++++ examples/clustering.html | 8430 ++++++++++++++++++ examples/histogram_analysis.html | 7899 +++++++++++++++++ examples/regression.html | 8972 +++++++++++++++++++ pyproject.toml | 20 + src/classification-binary.ipynb | 170 +- src/classification-multiclass.ipynb | 160 +- src/clustering.ipynb | 75 +- src/histogram_analysis.ipynb | 60 +- src/regression.ipynb | 150 +- src/utils/clustering.py | 3 +- src/utils/common.py | 28 +- src/utils/constants.py | 3 - uv.lock | 2077 +++++ 21 files changed, 47596 insertions(+), 302 deletions(-) create mode 100644 data/expenses.csv.zip create mode 100644 data/fetal_health.csv.zip create mode 100644 examples/classification-binary.html create mode 100644 examples/classification-multiclass.html create mode 100644 examples/clustering.html create mode 100644 examples/histogram_analysis.html create mode 100644 examples/regression.html create mode 100644 uv.lock diff --git a/.gitignore b/.gitignore index f70ec60..3d2991c 100644 --- a/.gitignore +++ b/.gitignore @@ -4,6 +4,9 @@ logs/** # except their sub-folders !data/**/ !logs/**/ +# keep the (small) datasets used in the examples +!data/expenses.csv.zip +!data/fetal_health.csv.zip # also keep all .gitkeep files !.gitkeep diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 503f940..e7f530f 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,4 +1,4 @@ -exclude: ^data/ +exclude: ^(data/|examples/) repos: - repo: https://github.com/pre-commit/pre-commit-hooks rev: v5.0.0 diff --git a/Makefile b/Makefile index 4e05f33..b186845 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,15 @@ -install-pre-commit: - pip install pre-commit && \ - pre-commit install - -lint: +checks: + @echo "Running checks..." pre-commit run -a + +unzip-datasets: + @echo "Unzipping datasets..." + unzip -j data/expenses.csv.zip -d data/ + unzip -j data/fetal_health.csv.zip -d data/ + +convert-notebooks-to-html: + rm -rf examples/*.html + @for nb in src/*.ipynb; do \ + echo "Converting $$nb to HTML..."; \ + WARNING_FILTER_POLICY=ignore jupyter nbconvert --to html --execute "$$nb" --output-dir=examples/ --ExtractOutputPreprocessor.enabled=False; \ + done diff --git a/README.md b/README.md index 769695e..d687b2b 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,9 @@ # Simple Data Science -This repository compiles simple and practical examples for common data science tasks using tabular data: +This project compiles simple and practical examples for common Data Science use cases with tabular data. + +You can access complete examples using the following links: 1. Binary Classification 2. Multiclass Classification 3. Regression @@ -11,15 +13,35 @@ This repository compiles simple and practical examples for common data science t ## Setup -### Pre Commit +In this repository, we use UV—a handy Python package and project manager. To install UV, follow [these instructions](https://docs.astral.sh/uv/getting-started/installation/). + +To set up the environment and install the required dependencies, run the following commands in your terminal: ```bash -pip install pre-commit -pre-commit install +cd simple-data-science # change to the project's directory +uv venv --python 3.12 # create virtual environment using UV +source .venv/bin/activate # activate virtual environment +uv sync # synchronize dependencies +pre-commit install # install pre-commit hooks ``` -or +If you want to deactivate and delete the virtual environment, run: ```bash -make install-pre-commit +deactivate # deactivate virtual environment +rm -rf .venv # delete virtual environment ``` + +## Data + +The examples in this project use the publicly available [Fetal Health Dataset](https://www.kaggle.com/datasets/andrewmvd/fetal-health-classification) and [Medical Insurance Payout Dataset](https://www.kaggle.com/datasets/harshsingh2209/medical-insurance-payout). + +Because the datasets are small, they are available as `.zip` files in the repository's `data/` folder. You can unzip them with your preferred software or simply run `make unzip-datasets` in your terminal. + +## Contributions + +We welcome contributions of all kinds! Whether you have questions, spot a bug, or want to enhance the code, documentation, or tests, please feel free to start a discussion or open a pull request. Your feedback, ideas, and fixes are vital in making this project better for everyone! + +## License + +MIT diff --git a/data/expenses.csv.zip b/data/expenses.csv.zip new file mode 100644 index 0000000000000000000000000000000000000000..9c0a717cb944f600a00c679c85b5fd42c0beae7f GIT binary patch literal 16023 zcmZ{rQ*bW~?51nCr?zd|wr$&XdusDn+qP}nwvAJ}`~7#Xc4u~Sk!0r0U1pNZx42t7Y>H(aWfJW0jM$BaJjAfaou!JuBs z2|Vgl)S@wTkt2umejl#RUJZWl&i;Pg)Cv4P?)`oc{JzW?{QZ6ZcKChU>-%|s{4aaI z68NG2`|~L9pZ<04C;4pd_v5)u;CHN!;4<$$$^YlKZtt)E@7VAA+~426KL6jjyzSBb zInq>$v3P!s$+FkHugBNF)o1?SSaWWbdD<0UFBIN+pHx3Y=D7s+Pg{A%zcqKo3JPky zYPp}E0zdD*R3}shlYA|-@r#e=Z!u2%TH-c&M0+%IC$D0cWsly@e?2Z*{IE}6w({Ej z@{%=-_zlbu%M|cba%p#H<9yBk?d@6}E6BO=b>wE{DfyrK$qO_%Srg1w{JxZ>PS@e$ z?)u77=jm=Ha}<8~ntvi@P&~6#sQjQxi|{Ay&Lvaq;Pc1FvGO+xHD*51`X)ZJ$uz%s zkM|MK>S(^gOnlCFq;i#I@bh`RJA3PAOZmIMT{7#;=IQ&aCAIpK{>{{SJgbuVRZJ|q z{A3pEHLiwywV5KG{hWcw(QvP-vpXdv$dnl{qPZ(NNfP(CMW)wnq-4-mG~N45p)N4y`no6vOEo5lW2ac7b{t_rKE$u zrZZlR>&qD%y17qAFc2&p4dl8oFU~rkT8|r-R?W_KTDO<9;5NIv@pxw=`9VHJReE~_ z8{WK*O&&jr4q|sBYnGx4gCwsHRE!tH<}+Izhhz=gm8Y`Yy`M}~qdBtMIozfr?_5a6 zD1GcE0&BzSQ5+Yh@ZJMB1O)5S=zHpBCX*f}$D1ol5Nr)Xji__Lzx)_*uqEUM!tF3G z>1bTsHM#VK$yCuQS@x6usRz~yc^V^0a)pQ8k=e<}(-3TLMbKGln?3~N_$|*jMP*c@ zi#w3pxKXcx%(6m*N=3G~Ry1cf^esa^=RYxbu~1{#$2H-&gv@@1>e_e(pGHLPkl$3=|ud9cL@+S6@~`+6B)h z9T8qL@O5f!6Qsk8XO}8Osj<0{*?!?!u;@**OajQ}Z13P-Y%Fhb=FsY>OQfgy+g@Wx z>%}iaV`P<5Urr(cYj1Az->iLPW|7U>sWP^I%F}bSf@)4<>JDQFq$av~mzt_Zah)VW z`tRvxduR8{Pl&uozUFNQlWgt(Jw`eg^F!0jUBu=aZpPVMP((}zdlCMyq zGgHiNnMZ$SiKy?~m24M~f6mHcW8Z9T*l^A_^6T-6=Z%(3M6aD_#`OHx-KX5!Qpjbd z+fUw3T0~mX)~Cx!A@D@%7=1ySm|=16d?LQ|#hS8UYmNIYeHbia;i<5~SE>8+F9?%N zIcFSc6AyeHrY(2|G%ifKXbsB23}lT3mN6~m8XLCKnxeDeS3Ld|)gGx99qZs}8n^?) zGNpi%YP5CF+EG`P7|)Loxg4yyUL}dN=UDUBsnlqteyy`tgoCWf&Qy1$Bz+BCj-%!` z7UO=F@a&DuvY+L(*=fsb$wPN8gA|n&zMWC}Hb*A2l+JD84d$#r<6H%fOk|V{Q$F>6 z_2EzEq@2F*b75f*Wmw2O@6*60@j9|Eoi6QJA%Al4JeIcqU@kizoqV$?6f_6vCpD(9 z)JkDRIwtYT!fa+4o5dZPczPbO9q*Whtdr05Mh>w9e| zOR0~G5_1+E@RJE!a~h`fb||J2)@@GM@Et(co$I?S42DVrGqaF2H!+QZE&3 zeVAh+pe~wAz=O-eSc~ckiCIXy9U1xDx7_+xZ^rm#U*i$exw>kCc8bhypL|v<*wV#; zrg0MG$}p0*lRPCw--Ar!5zfr{G*DK0>7tdRY}S#L)GTq_LAux?JKq$#B9tZ(b~kK) zcihMj_qr7~uQ(1HeU?A#^SuGt{XoUSX+JUKWx&3f{56Q|XkA{@Rw%-p^gLxKY-fV! z{+#J1^&4GekpB?*bBCl;TrJzNj??yz!^DK415X(qI_}@GI|lOGak@HrM};xun5Naq z4>Vt=?1pQDw#(@wgx)94swJi28*iG~lD2DbD7Rp0U~-G?G8yh0 z1jg{h5ea;Nfu4m0i%o^ zq3f@fHy82Y;3J3j#+?7&BxV*Zd2y^olKuYjQLy$;bwJv#K?cJaJrlqDoSqxwY9zI zXPmKsvWYb7b9;E9(|rc*L=$RgP7peBhmbi>in)#BOBP#T9oyV3FZRxA_yj?AbCeFx zac1(V85yhe*Mppo%}=q1BJFSqzu3>$OI!h!7eq!=;y)^Du(HJV%zR@-&L=82VEpqT_!U`hfD;Fu}6r8+D>AzgR=qG(>N%Cu!+S zG1!P`%uO-63mmw#$UD3TjXE%a>cUyl9}Xyrd5(=MvIvj9On>e(S&?1D*sImZbVuK7q$~9c%o5^K=*kTSf zLN+h_vaHEaE5|pNkP)yv2xlv*v0CQX{9Oc+f7`MN19CkNPJ!P}PyRdKgmZ;KOTdNA z<}!J#Ie(H|AkE_E&Ry7*a!0q0kv5;*)f3cRskQ3KION0o5o&sz8pp4~$G9&8Sbgd? zJ(ARjbI*m5Vjs!oJ=p4P!cD-Mh@=Q`2!7n>okMxO)$7eT(Y1$CkK4FMF!O7(H4@^? z^i3}7So>FDCcV$DL1w+zaJ>D^Esh-e7hBO8az7-;mJ)d86t5UF%oiU%7a2m*k&6xU zQ-1`z4t@nDFTA*pwJ90+FhPWBf{1oXC%FeL1KO=qU1_#zCSz$SRGJZtK&9E*VJee%Doy665}Xv-?g)VSVQQgr#M}sX zv}7F!&SzSPXB;4cMr%DsD&!c)O$U0UKL3nklx!$h=Bs}BS%fcm6u=;1kKA8GThV*a zlF%R;q87MCy_!67-dsvbzzIZx#mlZ&?U@q60Ssz^b2r+p?f$kD{}59r&VO4(2&;Vj zi=&#XSwI1erjGa997WNrLC&YXkSwIM4n?4q`2MmA+107|{uH5Srpk#2LOnNfnUk#w z?U8o&fox;nUbt;B!5gV}$`MpY??|7Qs*(TQJCX?L$xt%7otq1WHEMD3rR&U-+q%Nog-oYf0+=LZRP0MvcviWl1}xce zn^g+pIIrzColW89xc;qa@PRdy-GeiYzMtB|iO!j`@rZh-R;jeq3J*|KS0Fx&O^{vW zXHJzH&1^Co4>&Rnj;2L?a1Dnh?cyF+T|7x&g8L-NKr8sFU{JATbfl_BEf$(M&4HWp z&NVUz>?9t0*jLrQFE((^nN(*q%U#U&7`@4DWQ~+=Qi%A=MSHp@W=p{#qlw7r?EBIV z$^qWYfqv<>rY9y~EcMWq{vq9UC*)$dj}LI1Rnt5BQ^_&FVMIR~qLMAMhqp8``Zhp7 zx#LcE0TT|8#~6C24eMM(5;k<^h!-T-Mkv^o&u9TX6*z?uY(iDYS7rjx{vIp1GBtov zsQ5Z=HQ=ED-8Xp;8~2;OqpAPuAMy$$4kr5C_6Ln?dvX<#l{#cxftBa8jXMmWixxam zyDjIlaU8_{An-dZlIAayV8IXtt|DS}Y8HU79`Q~4c8LP2rbekqpV&4hgSd6YF)#?j zL>B&A#t1vg4j?CM6vL$0Hj>^dp^#}m+4Wk4Od6M7qrB2m#%EC{)N=$9hm|BdtMnHQ zcYF`5&A8407IqHQ7=@4$DOS|8pg{+-(Z^w{`+?+$@w)qgxLQ;av*b0*Mm8o}bed!C z6MYh9h%Kl(G(H5~y(bpl+ zU6CY`yw_zQG=iIb?LnPj?CF|9bn*ibJSH+%J!jd_?kN1#+$&X0%o7}QR`cbd#~~-C zx*B~W$x4Zt)+h$hv8Ve&VWlEF!E7sN_CC1oW6n7~h$bbaWb7oE$en9XLJF%Z_j&-gMk@XK?dK&OaR(WiY%_P4_ zN7eu33QT9LX_2gA?kR$%9dH8w^)+6*>^RzN@qMH+EI=?F3Om`|OJCP_*KE(+e1aqq ztvlITF{ESz9um(dE5ojMe0}0oywG9-ykXCdvk0-Z3-AM!#V1NFmJ{HIAp&l@6W}CL zlcTjb_F1*35&wPXai@@;1k5i7!3w?;+90TMEYYi8Kp;chUR(EEQ}gbf>kkr_Alcau zT`$n6tdfp)j@nFf-{Vpo&zWw>`%0NYO562d(ht)8&BY6qa#N~9(aR1^x?mNAv4Pp? zYdAmb4^Pe(IdZ#Hc8&SIrdRU~c`EA4>0G}9L?qi12P-YQK zeu|OqcKWfSUZCF9X!0Enj4K?B7s&(IM);NoVL{ASr>eqD*8_MXX*Q^BSvTfb+=9zP z(OT9-M$rs81l|8&Z!d!C713bPU$rZ3Xuih@6kohA<(=a&r_&=qDB^NXi zC>BZn{FH;hPW%}id_baPHvlzcH(y(J+6CA*aC)j>MS}M0ID!->FWLK%xktSmC08`Z zY`GZXldZl=-ZP;mv1IZ_dIq#sg?0rCgKeI`ia3d33;PwjJ8zp1nvvDgVYUuW!zG`3 zPE97@q;E{JO4Pxb7w@0h3XsW0Q4qxZAMVFsh7ol4f8aN4VI);r1A?2faIc7LZ%<+v1Q+$gE7rDBS|JT9Oc_ ze#%Wh<7w&zJQDjRI8kd1ck`UdOP9HVTP*4fuj0F`>*1r-T*@_>7%~s%1u{SB;Q&mo zzmKSBKa!iC;#wpF$gF0z{}9NOr#)J_738Qq9kiJIT=1uygGTzFtzSk|Xlc+PD(VOo z7++;zoeq*I){(@R-%Vw*33)10vQy}c&NW#KkxHo4i@`RVV`acHqeF1iigX(3kT?B2 z1jj*#KQ`)bM#73rMooo$^VZ7%4-gW4>suRwDTz(gjN7y(JO=52N!@cPqsTRb<1{Bz zFGndRK5^a7AuKKg37VxLBx%6sN-clUZS?XecwKSfJX8mHYke)Cc3dhjl41(n2=na` zpgMs(qHp=uJK@<8LSGr!D=g)Zd4@LXZh8*`Y29DSV59FDWxOe*vI48@!KtK8%OMRB z@_|YvxHBDSUjRY3(R;imO-$HTlspH25;p0X=YrCkyW7#vb7?61e4KFBLEb(FX&fV~s8R2vKJ&&S+Ig{Ll09cNAYiOWKvyb`E5k<I>U~Fj*fJRcPv4yXsAe>zZGDuE#fo=@UYXgp7XDFpNdHZG{`U$un6uaOhR!qBpNMxBjPT=m@e(4-#DS8%}V%oc&mxI1h*3LT*pas5~g7>{GkDk7gaJN05poz`B}7=8p_HnmZJF_a_Z4d64U zxBWhj(2s5s75L5|XG7R87ag-)VgXOkS?VKIG5)e~KVZHv;{?eivRD4}>++ zZ`?a940PqLc8l95x0()(*!n{!6L?geNC=9kBNt}(oX>QY2Jm2_DdNN1=XohZPYjFd9jetzgAQr9 zZ4jx?w*Xa2i)L&3ND~yAlbfO(YD)ISe7*h@gp4h-N|v0*Fc5$u3O43_Y?lb+fvf?z zMSC8d7T~nWH)!o}6xb}|_i(~JZX982A*KH|k#yqk%}jRzFe#L^Z@MXVU65uJTt@UCgh&lCE;dKQaLh5k1-L zE%x4H29{@pHUiNbLVKA@E@VPru@yYQ?gb5%6(uw0!&lS&SR14O5^k2;*n9dgl;F9; zBR*E4xpzt*wGZ%x^9NnJP3~W}0%uH{YVLf$YB=zx zK9Ao;bTKS%E`WG_6^Jw`63E-i9z|bMhxF(rZKP!sCq={dgfb>Q4~QEcv<^!L`e(n- zr878xCif=G7O?RTmR)>diY%ogHn=L@w z=Jd(#RWY7cMRS?nkZCc-ZGUx$TR4(stq)Ct%`k!V2Xl`w%A1~&W*L#x)#g2`Eu3V5 zt`@2Or!02uBRcBleD9wal!L+p;law#{47k1TEg?6w4QE7qQl%jUO8SQ-33w0v+Bdn zOnp|ah8xw&>O8UIt$-&&L9to9nO>2!P;9VI4v3H_?F`7gXW43*nM@8i_R!7z;)ic% zRLQBL;s?GblGC;iL*+J=U9-Bx!q8D@;E;W>TA#Lzi5duQSmK!Dh29js zFj3fzUSWlmmn%q2hoIQ(%!|0etsHr;Rr1}~(#L!(vqqP;j`1K0>X8;Sz~J7Q4~j=5 zHO*y7sS_PK@qoXY`SUDy?{x=BG2X){5ZC76uK!<)xR~Ez_#4Ef6d|&cbYkL4JFt47lS zQ|TwNERmgy$CUJiN73Bb9k6o}_v5$_hlM7SjwKdKE_gMnMbvgJ;KXsAq@^VOO^eHd za+HW^LqbGTNl%4;|Mq1fq5<`sCM9wfkb3!Ik;Z{kWtRYCNtck$ ziR2lY;I6y*y9T`)nu}&dHzHTn2PF>Te-%thRo}coHh3$Q+IkJLQ!`^QXso@GwYs>4 zzFi?$Se*brcF}B?VtO~tsASp{mC(3C73{?V^+M2dyp~Hghchc z(N@wEigKNbIH^_u@AFG7_&0J3c6JxNfGg0Tp4=DnHV@q?mJKj%YhV_@NDjk*f5s_v z^TZh1)WdYv52r?q!*o>92;l_A zw?vBF6Fc&(Wxw@wRSzM{yFKjiXJ9(>#7H(Hof`d&nM9KA+9V}ZI(u3~9lWt1w@I4? zKmG7QyIXBxk}ZUN1#eus#Jgk?u1{{nVMej{LU%bT=oZyJRs#sW54H zq8KM!d^~AIuKS2Fs$7GQ4vLGozC7M`_9NvY6v(bsU|*pL(C=Pn76rSktTIS~Uakpq zPj++%0CJa*?+y|d@w?dI%DnVqfGn`B!s{W2iV#8$p_pf?Vr5)fM6vXh+i4k1n6Q_n zl`E~Z%BL7>n=26|S3eTTgzPn_w$>duVpS_Q#h!>7D{d2;AX=l8mVJ(nE5$v>G1Hl5bXf{TiU+lqbv~!9lb<_MUvB7Bg8O$! z>_UPMr;G%H26&wu7{D1pk(+^S_5UT$Em@c zVk4B>)09~p&z9+hp0M7t8X@c$_!n~GL<^=vKLN;8`U?m#$~`vmoj8O~CB?9mH188R zifn9U@G9}@b6NS@$`2v=3~R?!8G=Oz%UAmkHW{i=|@4&^K*J!KI`50zc{Mrt=w zF|RblmYA>UgC+*10>+9$K7EDtoKK5cAa1i()9~~BF6fgG&jV)S#wxbJd6|?WhOisD z$fe@WVC_%-ZKw7*W`>@bVGfrlq9;>0&^v+iS98NKmR=GsY2Q)`=8qfb$0kYoD`i7; zz1Gy`h;&o)z6x0_WQlQ#yKLSVnNHfhIpHvU>Zj0wb*=e|Xer$HvY@;_oG`OCTY~fQ zkAi8$6xdxQD3GQ4z3r)?70kJ{vw)tL6&3TaECL@VR~HIK0Z7Wa(oKoOiZ;CrJg$Kk z%>4-Kp*vLN;4R{Ldv(_ll~S6s4V{{fcua;7W2P`|?c!kUJL2wL8ls0Kvq6`I*v!Zz zACSp-VaWxVOG=~Pi>K}Ci^WLdr4mA<_cNE2Yv~QJyyF24fk_|kb%m@@*}Zu5C9P4~I$c_w?$jbH zq4epqodV+5fba-fQ~}xEYs?fr&l{m8*t(3)(w<;R&w3p2S2BZu)42gzvHyw~+3elP zdN@_Sy%zMroeMzyV2 zSFvjd_fol}1zyu9G75uv8|SO4-V;7F1YjpXYgE6zi1r{(Zddl@m2 zF~fc;xk#J8kJbuJhHwHc={za;utCFms(@IZ?n$5ZL#e7W1wL-kOot$)Ssl+y^$-s_ zSv=CCtLOI(1eI40DF*@L+Dj*Qv?5f07Sv$1F)bZf5}%C_DNhPzAMT%q#awWzq3)*^3NcPa`%7r-?o$HYkT z1{jhmDkk$eVOh7oCtRT~ll`or{uFTc5AR`O3QHVQ*&xRMX+%M>Pjnko-WA4Gsq ze|boICKj_657{>=-A7%pNCGY6K|Y&)@@0VvzgC}E4&}4q^>@uDB5TKIzfBh;ymoVn zQ=appw4Su#+Rr7}A_Il5jalU(9u9B_VyrKS_=@~K@?%OXM#DzbYNr?1n17Yy-o3_^ z;$qYw^5pZcNeV5Zeq4069Z{MK{#FRdhi9t8`0*Kue$W{||JgLVd?An4b$Y`Sw& zR-a*E4zO;?R$`z18Dzr51NUMjZNk%S4tveMEKHd8m-M!i;Vi@5q0*q7R^$$&%bzEx z#H}Hhn<|Zlv;K25tY}*mj9YbM>*+I3d6HUjusvMUuaocytQ?0?*oBmWM^(%=hH^@+@$k~4+2VZuO^N5xilw_`FP_jP6|Ea zM7AhkCrI9SAb{6?M%0p$j5)7tmdmli2t2yAB}-a4uF1$-BXes~uWed8x=2)$-clpd zs1;OLI6%xepgB(jZR4YBDot8G@CVX7$qm#_SH=ZUF9Q|~p}+f_0Yo}cCSJKR$|;GJy;9T#C9c3AnZP!Dk%bnVL!hq zS%jmkyon(SmR?RwU2kyuqm2|r0Ayup$E778UqG*JxQLj;spjhn9)_%>JYr7WIt^3I z^^iTBdl0Z(5rZ zX_>4=)KBdlL+UlM2U4=Gr7&zU&3X2I56n|)p84>(P(*|Wi((j$&Pjm`)*AA_e+V5B^{HZdLD5mra zQ(3rQ%*Fi(y_a_hu{WKVfS-=iR*Z5ZP(?j9l=~xW_;Cu}V+ZdVw-S_LCl*QU2A6YA zM-{qeZxI#z(un|N}WN|4Nu}-39*w6# z?Z2-F*~E~-fPKq?7{?7zIl~n zcxWI`TS5f|18mjjauk@$^o;sRy$NxPa5>C*i89um5+BdeGxP!$LFFi;6Gi$M5Xt!n zIc)V)N4C{1Rt!~v<#c476 zxs9Ue25{9?i(l`>p2~F8vIa-R9zVr$3XmX$DIOgQ!WO+dEw*$U?~F47GHa)t&dvlZ z%HZ(i)i^Mkih1L{jXKX{CD_hKg`*};cJ2iMA`JdwZnC#mn5n`=b`OQ?0%#FilLX@?Af`Rfy`D{#QMu5PVJpVTfIh6ZC>535qJ z@VIz0cQ&Ov5auR(a8KDtPv%w3S19=weh}DQF&BLbDqv%RL9lkn94dFx=T9{#e{Yw# z@3#V8Dv)!zGK}$RApSLlqXl=l@_ugZrY;ntN~I?}+sx3$M1|zAW(QTc(4~>!+awlD zC5VvaxEo>7pw4U3V`>04W=0gv(vy0|u!>iW`ZMZmFT9-Z0d~+v>F5dN>#R&@Jb$XH zqBppSHWEF6fem>#kw4wcMLmh+X}P?xS5F%?5)247S!dex6gcom1uv_RW|?`M;V7Bx zjm;r@i|4?%rw=cV@7wpQqg<+wI1<-|Yf7X=_>4@7u?ZH7 z0n=^_m514SbysF@n2c_b7&W_3mq9W6Jf4JOUVkMJdZjKDFoQ3!eT9w@ zcD8F+pN7zjZsFx}HQAF4`|`<6PsapW2@%PbNw2g&7B4Iopm^ z*xv$ZT(RDVBEXf~>}>ctk3rB%xwnWuvZ%~!0LMn1#+8sGThjx~it01tOHA17o;_3d zVE?=*=`t1^$2~w>;x{u342e-G!EAXBdc#H^^2?6j>2n8fejhA#K0fqmS}ACtC za@-!mG9#*z20BVqIbQ=u%7@4*3TQP+7{|8F++t-29B+o2U%} zW%Vc-%ku=egt|LcCG?Gd!Q$#6){^^UD|$0h7-F^-29%f>V?!dy)FYK1rMNHRd}q#v zZ5Ia0zO(Bn*Ql^RflMr`9IBqQZJYcFh_WdGTf!~VRL!~oFlf1BWmaeWo8}q?-()5h z5i42pXl`!MyrYUxCO(PUMo!MjEE(=>LSk_RitXL6=u2{KuM9k~eQBwq;^@uhiWT@m zxC0}+d3Br>Hg(-Id8yu@A2}(;Q~Rc%ZlB@~xqQSwfj2lnPq0&QEs9D_G9FaJD0(T` z6kPs0hxOn{K7!YOVtvWZC&8$#Xj6#L2#@;cd;fL3AfC zW481H+dpk;H-`meD0hO>a%n$WreBT8j;hLD;_%ne=oCHyxTV}JZe^AQ|Mjg1wt{8- zAChp`T9uqA|6HsPRJ01RSs~0#Iu7cQMF*U8Eje{*1-x@9V{8_ID0}%A#<4_3_)Q0` zaPD^C)rNDp6{>~-^;Oj?rf^Jo8#Kaw%UXJ*$`1G?Ds`TSDvB^rmEn9!fy7o)b=9^p zQnDI^#jq*q^A?Jjs1(XXyjZ_Tq%eK8{z$h5-kbh zCw*0TMC%5~118g0sg@!pKDE|Oh(AhR%qYT4S)$&o%j~VsHpi9ZnE9D=V4rtJML(&~ zEcT$Kq}2OjTmiB^wR=4e2dDarwDmb8eQQMY3sLX*sEV%_X+!s25bVIYm-BOBZJH z|L)1HmR8GC z7MnXKW5J`4D+;d+djvm>DdFM+rBiIKXIUZQI8^nutW!d<$Q=pi&0dpq(&&9+=dIeR zXk;n9g%)@$Edl;DR-OhDmh5P_phK#+a6*&=8sXG)4SAf*?_Y)q?bE`m-WaN(0-O^H zo0Pac5!=f zEhUAxhW<8=dOl|rUFt@K3`mI#3}CH%XKfA(kKSXjY_olHljS7WyZnad9+?1u;? zfR>9^sT5kEC^iOy==9F8wpa!$EUR8n4gO-*P||{HU|@SUmfBDa@pREOhuxw?$J*-R zHs}@Cw{mE34m6+QO}gTu=9cClXBkJtaN51^v@U*_cu9@N{%79sPtw42V@kn=gYjmOu(_8TCQOjKItDOJte>&q~fws)ds{yEU55rjbl& zWL;bZ`3-mM6fTZG!*=%|XVg|C(wM%F>lV)`SliDEz%lTMFAK1a%&?N7Go#@j&M3kI z88$wuL>ZnbNXs)13J|TDjW)KI<#kDyUE7qjc)^5S=k_*_jbx&ZR~ikb`s8B~$-qZ%qzGf8S<}^4>6_au&N0)h&rZ3f;T7{lwLX3Wv@SaGnN$ zF60tbhQ9|X$4UsKu|UD+!`a5>8Yy%|A)SF0!;OnNoZdb}Pa>k(%7mCB?c-3a6@8Zk5K;DIQ&APilP$UcSG;g>CR4+MomIqcQEG_fVy(mS^oH*Z;nab-6 z|71DhS?Q{{BvlbzT25_PrMp0SwAm~qyl`f6hD%iYma&zlygbL$Oi;(0($2u&P@pCt zDko%!QiW57U$JL8KL!@|`L1V0AhS5ER_0y#5pF$I$(9}Q^I^@(PtW(Wrp zTs*dnY?cqSU`mwZK~v~q6)Ng^DcM?O7-2!y|D=f7$;-vxyKrlV39~5_`6}OksB%k@3uvVUkjjcfBSWtb;f`ym!mp(Q}vIl&gHb z@G{eG(Q90x4W-27r|vs6I-08tbEB!X?W4fgrjm++YSviA>)qx&2^LC<2LrucP0El5 z-KV*wC9$V98)8m(VI3o1>19@b;kO^n6sHswgKfnQc(`_T`oIUTmTS$YU8~Zbgu7Xp z>j!J!AFN^!(wI8rf`8QH-wm#hggCQ;$+sK*n3r-i-kIi5uCT` zshYg>PYw&fbQuelK!fO3az0!$bT`zxxHsY|1u(7XlzMws?a@fx{+o7C1qy6)hLzcS$6A3Ch%c=7anMMSk#Ks~( zb+Mf@thbA3BQ^~pyz#0RHT+lsmeG%RXv__hyy7#3Fb8nR>Gplg4SbBvkncma-CLa? z-i{;~+KfbYbV&))I6W}d;=rEBNq!~S3AiK-a?(6&4g}=22A6T9(s46s^Goy2>quHb zq1h!A9UDVfo*OXyBN$nBvZ+0981_w4buc}IR`RvD(igC^CgnwYpoMBA)onQVKFqmd zmz!Ljsx6O1ukt#3!(OK@W$;KTl)%lRW^_?SRIw|7?VMA7h|v~lu%iC5g?CbakM9l` zeJlp`{3za9Ji901#0h8NE|pd0n>)Hj=qcK%`dN zb-(ZlHi5WYucz?rH7QaWq52S}0{*EL<*hFOVfc24c5aq@m8NZ$Z<$uR;7}By&iStV zS2CYM%il$pv*s!{f(Q7bBs=H7qs6pA-nPL*%}Jru43+LD?bf#4qS5QCos$a9#1wB{ zgX0uG+>G)o94fCBsW&!*sN5yk`sTUJNZX_?pxvR~yHO(tR2{4OzJGX9SFLvkX!7QU zzu_MQ@nOYE7RI7nU_~f?VzIKuURD-AS%e^hROZ0&qr*|sTV31S`5x1$bqiy7>JuCm zv+;jlO%W+3pWIC16bfh~%uZ~WLqx3QWG2dkxTQvh6ZN4@TuPk2q!4@Cy!qZ-LN3|i zhsEzZ52F;f+y&i3lWj#il#0Zj#8;A&!xw#cDhmw3;Qh_}7v5iK-vWDG6ZTE3TyPxR zd!-2xk-^^fOvF2z<=&&*n2{tE9!{NQOJ>fBck94j~4%(C84o`E7Xtukus8W`d=Vs!7Y8dY|#06fJef4kHeMt++Fli&HAkvq6Ei!v75E?M}7wA)?zMaX7dvnaC?!yXIJSSg?X(pHaCMp5U(VBX|J&v zo;D9UrfNy3_I|(~g}U2H3D0)ZTD&X)%-#7kPR+v1f?VZsr zBN5hBEj$6cwm)IiomcoHrL6A*x!J|7VERw|J;$ETFzET6h6Glsj!vC|z zR;XIpq^ao$J5_&}B5fN95Q1WjD&edyNE33^gu}&ddy0hvIK`N2nb*Qm?#=iUA1x16 zKpJ80Cqn3-wh;f}2*%k-WA2It_D)7a!U>mOWy_gD5ItuTAG4fRypiHx4ss}P1r|-x zk)Pe#_F0p-lDuQ-Fz`(y197AxVt=o<@@ilSzyxK@Q_Uo>|9wizhJq}1uvVK`s*zL{ zSts9sL&Ts7exH9<2o9^=Ez_uagSK*FFP?>AiR@x-mETH20<(ttn2B2sk*MfF2x6Dx z_`W|!^r{btqNY+%+3Yc$bzxWa&b||7Vw%i_2dU6LSYePtAbteauH~AN(iF*}vmuqY zvz~7OEKjtcFdV)}wH(JBUFgniWjM3@EM@+gc;?{I`w)Iyl~v*CF~}D_C|<|aOR(hQ zO~+$`*ld{fo6!zpm%J8(Kq}^0eRo)=7+bj}q0) z)>SrgO=t6&c3{z_QP)$pWk&d?o~t-xv*D&uy*l|>p#cgKg1nAA@2E_keghPGaVl_*%;JD}~gLnlX8uzQ(cXKL?+}S~A2#7K?~q zs4A&?!y0BYnnY%d$r-W)uqo)m@H3aOjFj-b#b7l6@IgNfWa;5$uUNY_yP9+oipZE? zV}UtJ2Ca$m$vzmB?KhgP1@-!>A2iw5Pr2AXqSOWW(-p)*Mm&hkt1RZ|S$!UT?PuW+ z@~Fxo>i=zkkjj4Il$7{C#b|n_z?zFew@7?g`c=?@a=-ZKo1}>9 zB1&%$HAhJ;`=;_^lNX-e(hxj~mD#3%bOd?>sodI=wO4?+VX3lzI=F*PMDRIdCDAC# zGKQ4+W-j#Z0ra{ze7o~CW*ks8vTHTw2%;Iz zL%j5o=gaH0#lVUV=U=5AEd28PY9NSJ8VH?XXxvS%aPzm3x(=@iF+9EJ$W zj`BSvQCw&}^orMBHHK1!i{4+DD^Quo;cW!Ej?*H$-h%@)k?l zaB>OXbu_71yH8`G1t@B@+92cw(;K)IcHOIzSjnu)h}NuMgHlMKz$n_5SY*T{qGSJJ zF1M>ru>M$u8%z%fWhdx0CB>`ha8$o8$3@C$ty%mko@MbOc#Fi3^Y9Ky^mb?5zf7Eo z*vc7Rkn%HstoXPVl2+biMHBowH=TouY;4|>4C+^`B$eqVW+~hRF;Pt&c`1DGU$B;2 zr6Y|al0>Yvd}(%~?8ksEm~jB3anfH_cs+w5Z~7*LyZLxkMQ92DJq1+$2Vr>5(Lh~H zF9o1JW>TX)Wpj}9DZLG*rid0ywUjuWCyiPu@*SCzRpm!0|CIDY{#~HPftYT|LKaeA z;9%;h$_XyXb?(cTwKJ=|9r{3wFHF?ZLDRp(pybBtuqWvCd180R1d(nQCt2@wfTtoT zx(UJR$q)xkJJQULS_EtJ62!Uc0j0yW{wS9CTJe{$Aq?ZdcD5(i=8v{P(37&IO?5~6 zraHc}?__OK9E2;huQu!xy(;;miE7M%+w^g*5C98PQ9_XOvZg6)8{;d9f5gE6{ zKC}1aB$1O-j%hQN(}rtDl_H1^k%Uso?RhcLcVIl4bpDV#Wz%wuFMu1rf|{zp_&JS?rC=XHo}<~{0arGx1e`Up35vv|cz{+ogoE8y@U z!tfhIQ3ek^=$#2ZaOm2mVj=f8*oe{|6ucf6#G78E}aI(E|Tx%Kv!= I{{PAT7oj5R2mk;8 literal 0 HcmV?d00001 diff --git a/data/fetal_health.csv.zip b/data/fetal_health.csv.zip new file mode 100644 index 0000000000000000000000000000000000000000..b1a4547a7df06f21a3989758086eafe9379dca82 GIT binary patch literal 44347 zcmV(|$ey>}Pd&3jhHG^#K3?1QY-O0PVfmwj{f8CHP)n(I0SbEC=A@*KkqN zZHcQzvKB?DyZY-p9ouFYJmTcNMZNUHiHtmh?=XYGU^D*dkN@%OzyIkkzy2Tp^~Znz z&tJd(_|O0R>%af{w?F>qxP|N7(K{`ALx`qRJv>F@vR>(9Ub@h^YqAMX!; z{Xb{F^gn<7+du#KjDP&s{!4%V)1QC+!~gxKzyI4GV!Vr@z?${qg@X|NF~-{`sGN{oDQ2|M}}5|F8c@fAatS`nSLSYJc?S zzy8aw_P;+qo%`><{K>e%}AT?$@0jS=RpA`u|tp^HVxnBMf=^az?b>qO}Zd3uyiS?Zod}wDnW8 zwg0Df&UA9O{`Tmtz;&LK&B9fdne6Li z(eIymd+m@6h<&U0m8hzEu+6Ava_<9+ou`QoK-4DAT@Ix@R5o()4mBnH;B5T?1 z*Q2J}xc0y1omGF(xZfBj9=cjM(iXaj4`rl5yLU~k)hNUo(fSAa@!<9AJVdIu)3T8i z)S>Sehh|moN>r^%EpfGi86fw#)kfbtJ9g?yGyt%RMcstbW9J_YfX(8q2CrNO?|dC2 zo~T*yu|iqh-(A}jZ(WO$rUZUf2#cIA7ZE^eku|=h54!$-Jv>8s`D&-HshAjs)*ll1 zGK#om9Nhwi;OM=uEB~lE3U8EkD@I@}OKH1a#M1NYx_P`^A~Lc}4HN?P{(0M9naJGq ztovm+>hQ2%4-%R4$MJE0_PvvF94~WZ6W}H9t4LE{-TVx$o`Ja}+x|t=^W1J2*CUC!HwC}I2uVA+lE{|wdkdHGW@Kgec z!U)&!CzZB*|Lk21Kw)1?IiHXF2l&>~N$;xdml6p~zfQc_h^!MfLn%;K_BUbXv7m@a zG((x4ItTXp^#pVpIN_=Qt*-}i1g0|II!nqaFnYSJfgXOy$1p!k#kIqnU-TI8e|Ipu zd1Xoe<5EIn#1Kx6pMOe(4w!c&fb!qCVmY~{;jxM93idxI3kqS>6ooOAXc(9eEywpzE zrD*rt%Y#*2yKSF=fgI?kvMO5Fe58AC19bpX(#QpXklt+AXK2fL>9QHpct7>O*?#E6 zvaX8Og+eTOtLyt_Xq!c27qBz2kXDMOR+qZQ9?Y!~iA{YGPv8i7#fWA%_;SGh@y|`y zE?PhBReAAN*7i*R*;sU{w{Nxy-rflWL&bXn+R@SQ%^x4UGC%wCuZ{V+7$8HQ)Aa0R zkSS@;PGd?o!_RUb)nvMMndpa^$R@TMY0eY;w|J|?%h6WL%>5>>A2o#5jaWfDL@Me1 zl~UKOkKNb=R$BV`_ky$k8Jib~uIRYkLYAzsi~)4=eQCbVh2Aw_Xh+q4vm6w}~*`pv3+Uv*6t~G`G7V&EE*g`L=6woC*>WQVqw>a?mBs| zRl!<476Jf=J0UvMZXT<9PT{(xwUYZ(>ud)gzEJc-^!?cJ$`os~DsjHhe!CUP!w!I7 zZ(_O&|7?f@=`z_k5QJ@mO1Y0WdKeamf;_YtSD}gC(^rRDjd2}(na&@)X)Py9g>)WvNj z{5({U3$um07~qqfUoZylv+UHz?aqOsb;6nIeiGLyJ6Z|>QM7pdqO-n}xVHqT$@|3t zIfJ0T^OhrwYuZ)avcgV+M(|Ok%ZMI_EoDzJ@1hwOxp#qS_X)P*j9f=3B1(Da4zN$5 zP8?)66ufir@5nZk^3N~Sb6A=p3Z(vS!viiecCJ%1Wkq~*llMO7vqykvCE~Mf4E1BB z`XP0|kn$$}YF|slS~>n#W>}lWf(PFO3+IT^Rh6~tN^7O6HtyWjA*~CoRFD34OT(pg0xHC9+1!a)CD-jC|aQHXO6JX%W%Xw16DiO>NP(D2Se}9nx{B za=N;O*GbrYer{Je++=gVX8Ac!5m{wd|J-+e$K+GW?o>OSiMp{|RFP95i}8;`8kMpa z;wA62fg#tzJy^ABlF_S|jLlMG!(mZpP_*cdJWz&R6FZXkAcN`9_a#N!ben~ahxJnp*DCun^|>Ykx37OmS*-L65TZFgXujUam8reMafPL~Wn zuHA;hCR0)bi)_%M?|n8?J)mL?0mW-J)!xHi6d!HEM9tK~P12GXAG$ZrR*2*+yeiAB z3lZrXQ4xs6KOcHMOxfpMdsgney22 z!B=*=HdgpX5JWhzRL1sj>T&P@z*X{eiaNb@)Ge_W-yQ5K76b8RZO|m z)?TwpmgNa(q~|1<1CH88?gO~Daiz~QJ#C=Jq*aun%243aP8)?Na>m-5_{9@N6!qG^ z5EkCxMv@?SXMP~aD~U(GHIjpF+}HGWPzsNdOXT@Y%iCyC3 zi_V=LnFYiZcWd);z7oo#|J;aQ1hXfwjp2eHeEtbK}m)D0z>k=(qaFDn#x%&RPwDBjIga+#ku7 zXvAH@;7}z+tC~Y@#yci(B8OxNNTW6D*_ox6%xT@3M8aLwh)<(9ypFNijEPBf^%t^b z`k~)V8Jxd5O>vuW}t4-b)s?$JZzlb#efTSU#&J)@;A9z{Wa7^(2cSra$= zNPCyzm_2U+e^PedkrE(HO~x( zEIBfhJ!Y8lu_Ks6tqeI{-Rncu0f&t_OQ`jMyBCr6a>5?ZPc98ydC{)!z3xhzb1AW- z7kELMR_+U8;fxg@BT`hwR{8^Pb=(cZOMO+%@W%3#J){|G$2~;yn zwoak6WHc?xWy!yF;~c6O!0i!(#XV zOsOJs`G-M@v*yQsmP9lny_&7Ibq%f#=K)EVYL!#5*YMt_^d?mGprpzm_PHsQa=}+w ztk|JrM6j8~x~m(={{=pm(-~9Wa8Uf>r|o5}M^|fqJ4Glb#?x6ca6E%BEvA!bD58`h z{TZW_SlX35p?lSO$s@8FK7PHM^kM(I`Un)luK00-Gj!)9ixv!({hR_B1^*dE_{`M0 za_4hDBS^&m+nfVI=X-+cgrG-f62HFVOztJu-Eq(cBF$yQkdrT3ctqKVX_T54VmcF# z^0LHiLPm&VzlYhGn7$;p7gU9qowtugQI>s!n1hJvzqR)GbSLH#o<(A|2{C-g>uKTZ zSIA#ozfPiua?&tvee^N1&AIx5BxkI*$Rwt4|L5*bc>rnwsAfPjf~3eBX^V}{D8vp9 ztGIGO7`D%c6rG<^Gzxa4=$cTpcc4mbBCBt~`wm4jX%NBd+w{k|M;UZfl6Z|^M81Cp z7j7JR7l;z((K7vD7L0~npsI&Lq~TyCGC&E?o|ClnKP==JqcG)QPjL}4m@xMVY*xUMO9L7*uyMU60w21q|VJFoE3Q`r|ae?surcKig|e8x)a5 zSM!t571BJGs@6*sB2`?wVpD*K3)=?7|53Q@%UB8_iwbtvFY;zDG8`#0cQW%9l@$|T z4;d^5n`J1-%mWL%OWNB^Nzi=hk4ckBPH9BSB7z+Wzgp-M;x|N@J=mgPO69bw&3m@3MD@RZ+V{Gk+GyfJ}fwX#yUzllOcpcdR!d z6^B$FksQw`S{&>7DHnH;5^TJyAbs%3goBp|IdRbKDjyC^Rtv!!EeUI1RGZb8|GHk* zDTIluQONtJV_14}T2YXPB6D|)ctBb`62-CD{V+(pgH;AYh=f z%U+BUpE7W=SW~PL8)SK3<(e;23z!snWACRH|~_ z4~oz3D(Og8&cOtW=#QJ&s0>OuR2*5i6m|!6_t|WK#ekKoof52V#zP<#miHB&>De4o zF-V&r`JFXV6>0^Gr>R2YpJ1S~nFb_xj`)!wwgJHwX74X;+FFaYh@oKBmD)CwY47{r zcXb53*wECU+FsfLxAoIW*Q}9tpN7x*4a`~k8=vz<(R&>F2DuxCvc5Zdeh)1_ zlaF5QtJ``GYi8w=QPQ|XHb<%kqY6@cgOnlI%Hx4|Ot83x($qIq)MGTz0*ONpb<4 z4{EAhg-euCLIM+BR`v%hWmV|&k$c({Wu_@8n9ju)8)?!_$uivtpa@52|BlCE1Rcwi zbT^AE^>`rGyeQbh>q1jj3`3n1a+^R=<0(mIZjvR~lA<#{~g)7_5h9=rxJtF9ot?jq(7<)v56|u7r?p&S#rgQUuM-2MvZMTW3P&bnw|L+kUF{!za-#J8r3Ox*ML*aXP6N+>GdI^Hm>5s1FtC* zZvr7|=&=Z)EdXs0zCN@I;TJs4>3xbg9RkN6AqVnD2*uDS-rk#GjN`qx1jqz&t>_Iv zo^=vKBZ>}g-8_V%knbakY+Aj>rT{M&RvLD;%XW&NLmc+Q{3^w?Oqo9Kc?T3D`yJ(?JLR9&NZ_k}#KxKl%r^KeB5Ea92 zu#P)n--FoLRQA~6!Fh{(h?+qqio28K_0LYFnoCURE{O{+|nqsk_yQ-?Z6FW#FG?UF2#OY`+9VraleqRN?8 z14(O!1c9g{P3vKJx8D+e{{_NStBX7gKwcc^@LKF((GU{@cMyO>q@{_JuRc(9iO|t# z$`s2KwnhfpLWeth)J#5N^E>L2MWO~h$3Sx^C+)%15VGL|mc+jZG*OFz)rIvF92Pyo z+2%M@iL4x~8IJ2@7{b8JZYpi#l|p#iH%S5$nio=&E>$e-%T?c@{nhXZSh;zicV&n^=Sc_=AW-oZaD6oGot3~tD{PfQI5hl{Pe-A;l8BJLE} z42gnnisYMR1Jp$JiZ+?Bk^q$R!3K5iiQ+8ctObcSF1faMbP7>7(73#YNnA$B<^{)D zM}*nATy$_DT!|3GmKaCIXlWTx`X!~>)P#DXHPXb=0^=x@$g!4@ob@wu_Wv#9tepG* zy~z3Bj8dEoO#Q!|6GB`Dj*a4LK}=;q<)6%ul5?UOq`o)dmw`lAx@a4w>QLt=NO1_K zAh9_Fjj~cjAt7A}lBY?LY=2f}i&F(>gP*bSJf|mTQ?q*fllmFdK~Nhi&Zy9Z+`#IK z3o@vZLM0vaCjoWaxKXVsJO9}_ZLdgDdm_mYMzyij(SG;R_3gST`?i3kv43yqS*$c4 z&NaH|MY^M2ka(l41q!cYoTH|#73n&0u&ee-DXPGCufU&)dQuU@x((6X+vtb7y96bA z>tj9{C>#?vd5b*SPli{&NdWnM{2y@~Mr$TZLg84IKm#|sU1fEpOLtA&rGfa(?NO>F z(0%QSKDm9LNXiQj3nlxR7s1V;3K z@N$JNQ~lvZa3xt^@i8UvipM%A3WRFl)yalberk*$pFy-C;@Xw%F3X{%^uU5Y3cJQt zV)u-R-BXt?T-;e3?c9~yzz>rj(HtziH+|2hk;8hJog-mH3MJnen)~l}?OF+Tm}%b6 zuLnuL#QvB39HqHdd0Gs>R(ZXD`b&zF4snu@P8YqqeE_aGutN6TDk!cQGX-kS3oskZ z4CTS|z}8-C52ev14jogTfhMIZsfENkH%n%~zz%GNO^d6vF(QIhdB2tvEacx!*ss`( zu4*Dm=)wZ3K{?@%S^_6VA0@}NYoZU#`CE@u6kd9BnYVP|DvBMj;n<-@N1xNM3VLC|srw)e%Vp z2b*AMJ=y0-P%|=@`W%t2fV7MJ9N$8!9%=iOCa$@AKnm+rEYdwG9VZvGMhN?vfS-ip zRQh64Jk*)u7b|NI%B@lM{c^Do8k>WUu(8-KFE(m8HK27*!KATv&^=lKRf?KeokxJtB@6|K1}VxWwDx54+h6Dk$6)(@#d6BtyiF2yo5LCmbNu0jCT&3F9_qoRkoo5AOm4o^i&OObANS z$Qy}51x5QP3JSCsQ5Xx*ssS2<=(IuyDTH1X)&rX6)5pk!@@|SWD+}w_R=m1qIam|* z503daid4)i2UF#Iax_iLsA$Aa>2>CdC|(M;h=L`-8;Jsxi47-2?Hq+8_0#oh-@W~* zr}Jf8^R*3S)d6YDK^%PK;yB05+oDMB&~;jnJVe~Tp zqzypR-Bb|^y9cS^10zi3s`S2rGinE!)k=3Jo+CjT^DuDFlTB;Fxk^BNje0M|_kjlW z@Wi>!r7fG)s;t|Lln==pr-w%Ma2y1*S7sfYJpJ6dXV*`G5`qeGjwXej6iRF+?)o7? zCY_RPBi7&zX`7R3XmIqM$QwNY{ZcZdgd*k~8YU#aMjD(~+v;w>8Rsv-84JVYFemG9 z8!6HyiWHUq7K+-l{+78xRJqCbmQXm`9on{}q>aK*4A5wv91MiOZqHhGjeW`Ij?%5i zgv)n1pk!J!XFv~Zql&sLs>5>LinD`^^+*cHkeHq3P>tJ{`r@leAa9&#f(B~3ZW}w* zN|8kxZPkO*$7BhueB0F~kB$Zk#AGxfEl^@Ya*YQ+Pg z3FI}ZQnbJ3t8}tsbmD_u?EM>F2~1`Hsrvcogs5K>wB6b2hI)1dQzQvh7HUdQc8Z?_ zM3q_X2`Dp6J_cevN4oQO0!n0#57&Owt?IBSkVv<9ds)2c=(JsX1sl}? zCFOuzOr(8#kcA>Z(Wz8faQ2S{$_y=z))hqSxVOBuDbUEe8pGbBBs5x~6DLALc_S6? zxIHw~+dK91jKZ;ni?!TgP|n{Ah3y0hGR)0waBn}2ViZT>u^ub{D+1khNbqS8 za8YcTx26(pah_aoxDI^lP&Ely{-RBzA=G6Glz7H+Fu)zDl-O-!e14?JsnK^pOc6O) z?e_snTyS<+3k<;W#5x0Ki;zZNKUM`h-u06Kv(ODm-tB5ni{y~9syKm?yN6(r!0-uB znLI2RR8j#i78U{R6u>Z6&W9wKr)H-ri^GWn^q>u!M1EGgR3@NAhn;z9=HM~oPOgXh z3}j%5vrOu{!FP`4Tyjs_9V4SJgQ9+mNA>Dvb_^L3hyALv%Ag)w8q-zvQ z2?2!%NndS0V!Oga*7#m^Z9I%v#nClZe!8!XBU&_{b-dd`VY z%W69al{;-O+sX^?Li5UeDwZL2ot0C|jbYRVaZ6wSz9u<@D3?&?OpX#YLWQzfJcx!9 z^^KOUiM zvLL&@OIlZUB=h;+!lH^omnx$wjWU$%S|Oo|EFv0y+>F`|Nc$@iHAEUpym6{4zlD&* zD*7Fe()L3wvA^H+;4PA`r?5}!K}`I(!3CMIFOzh+@lP5Y@H}%Gy(Q{HnP6jY=Q>{+ znXn5~3s5teu$wBYvC}x?q`fG4E3a4sf7vx_SzE5VM7ysV>dw33U-+=G zPAfupvWVnrST8fX!g|Y|TQiw9;o%KG>?pXKgho}nHX(T_Xk(a9-`ck-Ps$9)eThlG!X;|o3EuNVYD!N!mF!}nILYpXN9ix)rR!!sXGj!O*KAw+29k`1YOuqu_e-QXM4!od=A8^iqzcY5QY7lY(Bg}FORPg2zO;52 z>h_?`NIlfPr~vY@>3c`dXE5bOQ>sn*=58Z$+@!5APUN3v4iGezaAaIlU>niU~P9OHo?XA8Pg zkoYA7`ejzG={*cf1eE^s-9DqfN5P67Uv%T*^%E#VZArp3%H~{ObaT~7TLY_uA;ba% zb}b@z)j3!xlAhqWt?l+gq`D1?wLc)-DD8}gE|u#$CS z4$~~moe_V>H(v;%zJ)7jN>)hbm);M(x+^HoP&s|+lznw!y0-;da;e(#vIBFelE4)h znX7L?7imsU#BoPl^NQns^t{`EXFGzx$by zcS~5O=8y)PF}+AxdT?YyDp{u-f(D2+gPEx1QzT9mpbCEg!BWwE z|L2k`XR8Lg#P4x(H&o?(hO_g$^NZ`O)dmrVWV8I+4Nq|v?GYpnrf&p=c<N$&e>;?md{LO$Z!L;8@W`G0i%LtVAHm{iKe{@E`X1|G3iUZ;aM9S&zU>~D`pfgePyURhw?(XP>91&Bjo?f6YVPZY@-{Uk-=TV}^!4h`-fbPjSS{(Rp%^K<6T-;D z4qER2Op(Y_Tm&ygdP@-+o;EVs=>CNJb@f0Nj(_4vI%ZHz5=_kNE$`G@bQHC$w}wXJJ{m+95ZNr+*Ni=3+dpaScPr@au91&qi4k ziuiEjUy%*qF1cS*tWhgk*kjntoomsb+$LU7nSaa6$l}KY29U9tFMIXaJOxS2LWn4TP$p>X6PI5WmY#)o2 z4>w$dKfo^((?}`vOgvYQb-c%Fza0xn>h0&TtZS0&u#?SOy3~88VUU>cxVxfj(#3eS zH83&49AL%7s}6-+tp!TPR#{G5yWLS^YQyjnNquyB^ER#yu1PE!x`0tG{z$sct;@na^ZQlV1fNN%_{iP)2Hl?WciRc;0qR-FN;_V3SLJq z@hv?I(E~qmqnc3Y*_0j)l)RHjIiJUCv5bUEFY+{!t7aI-J?1z1?OdXB2SmEz z0NqKOi^JD~I-_FK2YI%;3_lVdAT zg77+U4);&tD+U=1sfn?K)j{>rt-2^wO`zay_tfK_Lb;fW*}ILS(H-D+w7dNU({g)_ zw=ZsjniG{-%N=NSH0|%%H>HUl2_fH1gNbKg<*VdNChh9f@1`1{YpcVD5Io#?LXmGl zbKko-c2z)?LfCH}O9RD6>QEtdb!PV)HKbbkn-^&xy5s>x)6!Tv+D`6?tPXDc1{9u3va zSuloD4iJ)~mwW~^;g;zuq%w=mf^oYiNV_tn*s77pt4EnSk)3nL;skQjA{Z(sJ5sdH z!ZWRJUX74wP7#khIaOi7ih|{&-LhDc89#J;0j21WcZ-y%ZEFRx3-X$Rt%kBPr>u8l zM#`S}=)_+-Bvv6XqR|X-Y;9D2bzF!D9BzIjnKoL&*;GQ^-WC#a?6p&r z)_~SkSH5g?KXSag#B6Q4O?c7kgyAZh=uUZlok|(k9(8mN%lE0Zo(Q9&JnPkQlwg zVZt)WH!#ovVA3Db{ISccWPd!EiHqk~PVcha1~73}P|#(-)*S3IVCjwB2Z6OGFi>{9 zqfA4f4_9+&ofG8z;<6w>l>l{lsMRsCW?+`CyPgkBLeT5v3W93DR(W*=AcAGIN0bis zC_vcuWg5hQrL{rDEEuKHGq4t4RXU_?;)QHRGw#q~V;2mD_pa<}#2UT?2ZF(Or;!oj z$~brX${5B?a+<1TJ!Dee2(W?yNHhTp4KIr`K}?!3J*TNDF4MshiC0eX)BqHk2`jTWf=n@+_*m07hOve6cu?R(rHPteASfE=d zi(^myr?7brbS#Z>$Y{mLe9}(K`%Ba#pD0`@X|RBDYC0mtYMGNuA*Cy@74eSCRUQ*o zmG@T))frdEY6+@wAj|E}S%Ib~+j5aQQeMo!mtL$tvb?}l4VvHG*pV#$-_p1%QIh#bJ_qys-e zbu#@{iq)BPCSDSbJSB_fk@SxpT6_*8H<*N0YdRnLdW8pTlr%P^GdAzH%(;iP zsp%=N9~dh04>}NcdUP7)R<%{VMbR5??pPiKA(B)LNqwdoMn6%ZCz4>yzgSG*OLYPf zd5r`GeDA#!?^llNq^%zQILz{D$=sc8&mEEnhk(#^S~G%DftV1a4gHF&P=;1p9i85~3%m^~6C`*tXBxJoEo0XJd2w0JeB3n5Bw)JJxc!%MhRNo6x*?m2tk^>rszi`D_ikbKAzF5R*~tb>D{2AWmm&ryrih&IV?;nWPBFIEW_0FFt;8P+HHoj#w+%vb~rfLTQfzVWBEc zwWBQ6utdXDwPq_-Yws8B!bq`kAJKJ%_1z6>8C(F{BWVPNtl^y#zS=^EI$dbCC^cP--I8_IMiTRUl<&M;`ht&-Ry zf9HOj9PF+U?PNYK1Df6t9KC!>=W!w7F@hZ_*vfnv!SvSA7%ZXu8l8}E z3<8=gn`@^MPi{bSBB=!q3Qdn~K8fap-cV{k6nlKF-<{HDWnId!ZGbR)H2-k`-M2AY6$eZj_%b;B-m=^)pIiSs4} z(>z90`>RbXV~9*7O~UOXY3~>r`|jFqgrwjA?T4I3)S`0K@aLY>7OyooM;h`KWN4qg z`XC5NC>kNbTt1;K1~Z7ZlQ%#T=nCDl6pruCb9;k!JfWfQByuupel3WZY_}_*nTQa4 z9GWY*_d`v4CQ{m99CVOn$>2&nLyhW!DSsKAoWU95XwI9lOCns5Npsd9QYRG%=O`s1 zWdjGQO4@wnWDA@$JK(ZtJxd?a^C+0|>F!1859>Ku+vek5^&=UgUGuQzwy(xS#AO1ES z5q77W!uJskCn(PO@`RT5$yw*}(rYX)K|5Yk;h3Nm!^t>UlklX3Xd(`4xkaf8(liM(X#u#XmvPqqLCX1j{|36^(veQiq5)&L4>!i5B23&Y2+?n8{L zR+B~Z(@9&e@#&ER$OzZCEd~Xz8?Qi0SC;X0X@GL~*T8ee;O4!KP;}4g@`|M*j)hrT z)MIt2h*+z~GQD0o78a>S3#6lANqDOF&e>%xsZhy>`JoDb^#`>BqZ+U)1KU_SE;LD9 zyDXTTWhio>izxcXSW+;QNuByWf`QyBNsd#ngfPOw8QN`=EyY@=;&E8I?zM|pVa1HH z+S~@?IgwW2gM1v^kkDww=(d6WjKfv;Oq*aFmQfj=EHcDaZlz?;2~ayd$Uzl>;-Az3 zDjWMF%}Q|7`3~Gw>ur}5$prewd&P;UEoUDQRY(3sq72~6wXa#MteoOFJ=XG5SR)6s zI+26(MGLTw?_ez+%U6A!X4OnOxf#4W`qy*wXL3GM&I57%Az3M4Rd-hi;M)SJJ}7rn zdkJP@4Oq=#Es?NcoROsq{2)i^&K(81+LW$6rR#hGv&oi?dzGnnVwr-Ci7E!m>U{4n zx_yN$fCE5>)~CkQgtiU%xUetb%*s-TfqbHDL=KDzUzTFCVVg`OSq3mv6=%wj=rSez zpb3h~msJwlp|82>HDQ6c$C@MUdLqpbwOu2a-(qR82pCUdfAysH%KX*#6`Wn#MMuiR z#TlL4KXgJ!OUXtD71(sxTrUT^jS8t5mhI=wIw3fIT6=)KXak3DPS`AFnWODy(E=Jy zBg(*Z+PW!4d~{aFaF4QPFl>sO1Tr0me%V#)FBnIRRdn@GZq@FeyO1{z6t|l!))eED z*=T-uRjFa1i>wGFR0nKw&(MgrtX*ccdcE%2 zFaX+`W!In_!w@B=4ONzzkoc(vy*i=Gh;}EaCPu~`mWdQTDp=LW=6el%d%Jp_a};CHGQ*mh#27!=5N8+Oh`1Qy{XE0s6fMtuGTk5CJ|ouhO5<)ue`U z>=})iSq6sfm3DWmwuP{Vg_~x2PJ8V8BZewzW-QEf6>a%bI!mi*!t`SShKcq*Fd%n( zz-5UcQ&Gd56sbqQl%i7u$>!4-Xg15)t$w$kQ?$g<9td%zjR4&zUy5XzmRf>Yrq)h@!DPI&B!k&(ey*bguf?tYQ$qSjdC!8GhVV0CmMC!6LL+H=6ZG9gxNf(v^*2y7>2=%65OB3g5h? zV}VdnI{_NLWRe7Yp28FGjl~XXTAuGpET`m9~@!Wq>dRx;T-l| zW2BW%JIVmF#@h^3l|>58U*3FAWN$aJw`)m4S1>wK){>>goz4rY%|ANc5RM-=xczqo z1-cp6qBdu^yIh-<*p>l#7!X`B$Gyqj>J;U7I*(wgfGtljd10csk1MPQOlN66?X?+l zhHC70d|QccJC|`poSbb#{o^~FN;n&;C$_gZ25p9uY$GZ1-FKn~g&+$~Lg(Fm4m85B zxLx>tx>=YKFfOY)NZX?r5r_ldLFgQ10n>KtP&S48!Fh1I%0m77LTqQTn((IX^qJz&#-mm6Hgr^rjttCOH>c-I#ka>kO$8 zQqPmgX-6qeE_<~|>N@!|eAqiVO)Jme=DY~qj^QkzVUmA&1Lc(*Z8{oAce}ZCwGp74 zG2!?(x)p_YX#Y>7i{YtjX9%R<4(Qhp(FPwy9PBuG@>LfnL6O5 zXpPbMjJ=QD0Z!xFFuSM>>3GD;3Qc}cCy7jyrSYf`OpHCneK*wMZ0X2*nW2p$a%0Ko zuJYUgO+)QVK-=cDWu*k-si9_FTAz)-CgNj~g~_b9N4oR^!@_xWb5_ChQ!BP4KywC$ z(C{sc_}p~i-<}C0XJR;~ukWR)r`kS|Se<6eU2!^GU(mJL0Bzpc$97Vd2^ zW}bXT(05nK<5=%{)f;x|2Wu3%hByUc^JCtup%@LoYLkSmX?BmXD&14(Td)wC#Rr$7 z2tNC%w0$RyNmJl@|&!~}l76u`BBRxew}QY{)-Vr(h!6QW=XRyrES=Jqw)J(vw={x}Bu z`&g@9U8&ez&)vPE(bW^9a2wuetfySw7cIf27QlRKs^`uIiQG!9b?9I^@-oKaN4 z{)O_88cCj@T};J2V>w^NMFB?2HqaE?4?Sh~-j0k^%9co3%_!^4t28!gVB~4w?(^~; zA2n6xYcT}v=HY59t=(YOT8vPZJ}$D~1A%msuU}kE(ght99O$fJ-vnjJ?) z5&=$27>%=rqEl9DZD8lR8N1Z3o-I4=cQGOazWegbqU4dc*>$$}0d1ypT}{oO5zq<^ zRv2$3e$kXIZtbz`JOyueW}II@(?Q|a?=l2lI)4xq<%gw@8Y#5bIrLdF-rn(2Kt2@l zjk|ZX?*qCT&&A)zrmQ?~+&VXHdD3dhgVV~b#>ZFJ%H~e$&EwHb6Z28C&9|P`0}Ufz ziX=@|X#8Eb`kGQVR=LO(ETwM-TUlcht}Z5Z_6%1z5wrxKQrT5yW?{YAEaVN?(#`M# z(B1mdw8^If>flc~gDzXYfB&XAg%(5NYEE<0r(1acT|{9cRRx=tZaVZ7l6on;f~sEh z5NKV?!Ep=65w(R1 zA5y?$7=N^^d@(^I>UKDOm)3&_ZOBWQVFQvnYdJ3+Qj19ITO>_J$ehsuX&a36AF|?* z`eI{Q5ZG~@A&d4p;I*TUhLJqoC#NXqallZ%q!AKX*Ee^FN`=@bK-jeF7tQE9$Pl35 zku$yqAkjsW5T{RqiFpo}{UG;C0CnH zLGF(^obb8vfMemBALf`gA5fX0m{}_&TTC@=JjxbDTO8UTc=@PLv{aPdQ_*#*xtCPj z(Q)vhw^`XA-Q#N%#GS9tYm}*nvA8*})7>E8T8tc+t-}srmB6q(^#N;`!PQc%Hpk)~ zj;_>t^a>}FvM~~dlX9JbP1Z}a6W!j}_@X&Ag7XB7ReIG?Wp3t(+Zj8B?C<+{06sv$ zza&>>b5ud+YHXeA<1AHjHZS8y2}c$UC2D*h)~O34WR)QDjx38bbewl64ny}eZKI$q z=OWgRkXo$asngMH4O?uNsb+X6D#S$oY&gXFz%>FT#V(ql?Q7O+pjD`SqKtr`C2bVF z#yZz^oZu+C7lz5bqhKK+DCYDM(c+^?s+PEb2RW;(vBB-$Tfx(Y0{0Gwg#}Vg;j0&Y zvC!Kv+0MfVEmr#w%L<=~B#kWWes zM;Fu7=3sbpe8G>b=!l}CNLK-E>rA_in>XpB3_sIKjD-9{`36#cI4U)frbZ0B|2?%< z0IJTHvpp8J`eQgf^C1{IjBwamLe*w=H?~&JK(xxG94v;eL+;Bf+8`0@Z8mw8#4x~C zOW11W0w`tpWzK*!YL~zna>AIWkG~n!p=~_F?m|IX%W|2{mfhq+lS9G8VI?q}n-by} zz`DM8xL|?rPT5AfhN~(zeV-EsE@riP4(-g)A{P(gqHn;qfv_c^iwFLY5vz<$?*vWa!Z&(Ku4y2)%cExRtp_CSh{D7*+S53~Oi~eH zYt@iL%(t*cytHitujiIQ4Ro0V0c=h>|9i=6H+cJhFDVo-5pA+d`})>T(2QJCJdGU5 zKBzySX{yos3cW1;9oP|-*S3LSObye(0rKZIx$>@R0#jX} z&Ai%hRQ&*mt}Lxzbg%nG{fVUNJ_BkIXoP75vqR3fXNM|!yzir4qwlO4U>+G)ex7kj zWNY5QupwrBn}eI05Xc3G*f@zEJKv@w&TihMB}0(dm? zoTTXpismBMwD4rD-9!zhKs5uJPTQ8dHz^u+Qe+!v;^~rV!!k|1xe;ckHM;-!x~t}} z9N?TibnN03Y2*_n6bKsKZaH@$3;_rq(LsKr&=$bNQSd+^UT8%sUnivjf{KKU3<|MLIdk5qK6!h9D^=4=Ss9= zwOmHlLF}U@?22*|4C-2a`X4@=Ip^uC4mgP(=ROk}^VG;_z_H*J(NM9;4;g^w(ohlS zXta5oR_!RvYN#@sh|byV35L10qZ^N|8m4PPeL&NsmF(nSI+Pw!P7*mPdb)V*LUT(n zhgn`9o#kvYT?{PFKe8Y&QXvhYotXOrf$# zg$IX#DNVGI%LJ%>8==S>BnXx2Y%sIJun|Nngn*Ts^(-j%)x>iWg0iaCg9E_ihm|LS z`m42196=^lmnu3MS5-M3wtrdIGNd0&NPQi7_ZDg+D6)bs3Ts!5Hv}jub!O(}P}yaO z3dQEeKLD!#?^Qm8l61L1*|v4Mx2PQ;Bqk> zH{zMo5~g#;LM7r*X_dP5c=w1klce-zxDLsdLw2A-B2MF*lJao;`hiS!mp*a5gC-x^ z_>6_3NF8S~$zb7j#Kx_524@R6SVb0;F?=|^SEmvZ5?x&JMe$E;WGCS!e3%C+-y*?U zV46^G9*~ZJw53Q;LSr(I+*!RSwYy0tEb#iWZnFs+_D%al(kOY9MZtu2+~8A1(BCjG zC+UAX5>IIII>OF@*ob+36WuBQzQ1rKyuYQ&1<99J9Ltv@nVWS)Ab_+FNa8nd)gavn z*CSG#a<#JmLP!U!Kir?V?~h1fzFUL};a#aLC@+L;iU#O{4KZ-fsn-E0^qaBgOc9%f zfe=V`v4GzO#F>6^PzQ=4(51Yium5JCNedkfs*v0HRsjmqV28VY2Z%j%nqjf?_!h1F zSf)VylLsI>-(6@b#5~&nsH1f{FQsV?wdc$ux#PRU2*}wF=_X@@Aksal68eo+7at37B4fI(069bGdpfeO8Bc(dap-Z3u<Nxq1W=u<(=cD1{{YFHdsa+OWOc!l z;Xy?>#&efCqHi;cRE1Vb3FE=4JE?vc#ZPs|ZF( zPxz-xsqo7i-)T2WhUkRdYQVm?Q^Vzfjjl)OR!yCO=3ZKjCZGBW7x>Jurex8tudD?R zq`FR37ZFM;Km05|A*+4>wlmnKV9sS*wUmLtoN{t!ShBnDW<{R)lqJ9i@WUQ=v&W=} zM`ZpDklfzbSNm4CKLM0e6wDXa7dxzq!mZbrER<}Y0mxYuAF~d2((ecqY}v{bp?Hs@ zy_?Yvfpw?6Umk(BoFcJP5F_7I#(x%QvPQU4{PsNhpBv+4odPlI+9zuyKoEw53b>#_ ze-lt(9k0=b^a3Y==WAvy~CTRh@|Yp8a0vj zXPQD)=fP6mPub6RuJuBTE1A||n=v_E@tOI~y`2irkn23ylPk(NGoE_gQ?T8yl}bf{ z55vt4qQR%Q^Qx;1EsxE#1GeC6P(eX1tH3dpW zH+PEQ%%3CB8h|wK<&2{NC{z@4q;FkkKsiOuZ2yF!N)*u{RY#!ulR!z0bniNZDZyp! zRBz@wKj)<$EMkZcWCofq>u<=%8KonQgr z4s6e%s)1@BKqdFv-B1Uh&fgAICsa`>$oWN;>y65=7mfI>0jdgAx}pA&P_wmKIX-c- z?d~@;4eB9`>5yD7a6;5X^A!XPwkIfCitPgHALt1c_4|Ll@~jyk=N6_iF$YTS4e8kw z6hfU`rFA7h_O-kP3dQ+S#4cuBth0UA6Cw{ov3GJKM4wb&UYJlwYh;Ws0n+->P*U{? zP@^?!FMH$W?~k*xEZl$4C!lwT>OCb3w2J`NN1&u7S#D6Oa-W<566rSW8#*TZ<0S$R zwNKnpZJPSo-T+DFB?7(E<1+k#Cx}#aluCd)1Zqv8+Kei;Rw_c_7ult)B~Xhn&ZoRS zgVK0lW3pgW!WCqv!it2~Lr)A0Lc#l%lDOr|cJps&;qWSN>hKgdid@TqsLIih=8@#riMszQ;oPgg2jxXQ;M zsq*@Ju2g7L6QSsC(;cpd8G8xTI)(Bk%N_xu{ISI>7pPJUM5ziEvzhWQ^g5_>Mpb}n zIaK)ul(wo({D1;pZSSe_*3k$xn;WfERfakYly7{_jOGz)HVj+Df~evGJJNvuNE~QO zp{ACYK*3{^fwnlS9GlXCwhFbqgJS0psep}#*hc{_xK@}?;Qp99+XjuWn51L|HO`k1sM%RX6`*q1b20@v(Fdx^2T;j9 zzs`gisCs7LpaCaJDHDy2fSE`?ecR9@f6@y4VS~Wlk(8R-`##`yw&@#_YBO&kh)G7%GDB8wB7Y!N2 z{JMWhApo7}k^8JFGO%TUuhWU#n&Yvf&DH9%=`-=g9`B_kPBaNK5N zplde)yR~^atddHFpInjeDO-dD7ScVRcTpFhQ0vA?m1-7i)g^PL%TR@MdNJX`AnK~G zy*{!>Mf%gjJ<>)7Wwqu|k%1(G4bh>yuN8Axk$~k;n3Z#FrA*7X0@4LO{s|EPC8Kx?2fhc{}C?2~}pGGJ}*u-X@52OCurUJfc3z_dVe z>d%u zO24$@PxR@~Bvmq)w54`=g~RH$lQsznW2efe9)LtJ9Z)EpF#n@f;(2p)XVPhmA`eCP zBKG&sixe+NqCVa7#i{a$B()QC6|vdfQ41WnGmHza&QT(GnC#5Is-M@QRlFu0+6{ta7%$gBzWW>@b7Axek@#C zVyZzGcv*PAvb;X=FIFy6iTpu1OPbSnCQ%m}s91EtQcWR9v89dl#QH^_v!RFw4f(6n zJOs4})Se*?R1Ft^fW`Or?Pl0{2~{e9TM)opQw)tbLuI|r=)*0h8Zv4DOT{CXzz4yw zTD@PG`dbA;}EB;m72)1Plw6j&*$NwD?q>8M7<+yaVEh%dqG|0BYuu_js*Eg zaDPXa0iOvFR?B5xKHXF-ew~Ybv33#lhh5c1`5R5L!Ks32Pf20t!8P;sQT-{2O8#9+A6iX2Pc$ou*NRJbYRW7=v&PN~0 zh_e0+lt`e)J-Eq;De^#TV4W}fj6iXcGWDJo9yhU4F#uvpT*v_D5O^j9{hW&kZ3Fo6 z%0-3yI6y;adY(!2Hf}1F+ue!j&aT?vF!`?(%34YuY8HNx@tTDuL2y*5Xyvr{xD-t- z^6G5bByw6atc@=5XpArTJ=k2!DA++{7vtAlf1WoEe=n4lt9>cvS zLrry%K`r-29ru-EHAC4d@56XASI#^&DEJ6pIfGV9lT&^eufha=33d zr1!TGn5z zFW+iOs{7Aw_+0qdAx-4`FC*+Zd3UAPN)j_GOz zC_VVejA5^lX1W|2Tof)j+hY5as4XIqVRi@0y15DdbsJ4YkoZ?;{A($cnRC!zIAd@} z1N*IDbPsECI#StVZ^vC9p|G#62t{|1oF72Ja$1G-Db%3Xx0fCw-GISOaxi5M2W4{W zc)w>*!%;lQ#rj3%nkPaQyGAj^<#jfD_gzykuh%UUhobdxEF$f`WoGQ7N>^(L*_wWA zfStUJPi!rc!uB${eN{7>ieRo3EOzS;&EM3tbz9UulVsZIII86?Fa=n?V$t_D!DxIs znyP&V+`EsuYblImDps?Jp?2#Vuo!q`kMS}gjsgF=+gn}?bWuUoz3T5JFeS@9PP;rn=<2)_Afrh}3X~$L{-xAdi z>E#h=jYuq9K-v(f!}++fuq*~i)w^%IoX`{w{mMl!y__iWPgH`tG`DYol3@)IAU%z> z!3?Ij1Z7kaih`%1%B#@a)(kD|V1osg(*;m{U_CC5Qxp0KSXIP5G?X%BF)^&g*oDt7 zNM`NVt4U5-HMQeSfm6rY(^WH^8`cd<2z+cj*yc!TD@n)i^1ePxi1)XA7O86wNTyaS zDQjWbs6@KlMZU=9^ADQTeNJA`AG(DYyg0Q;@O$JDgYD6m%W)d?c z#kxFYQSA}d?OD;_x^;@BNxLh9(vizi!&%D z-?C4Ua`7`z{t7pe2?K?P)Den?iReldXSF0%StZDOffGmZtULCN4+!J=grX-#**WL^ zL#{bmP=Q7eVeIMRmG7W!c_ElDz&RooMcXlyBVbABhc49AN~S%}u*Z2lie2Eu7iKC= zNJARh-+*UpJ07ae=7hmI-B|~>kucdsz#_uBkM9rS@ze@cT3q2qz$ZC(P~iuAp>~Ji zm1FUyIpi&tOT(->Xb^hC6@(3qJl=#JPm|DJgAdKuobsLAL{&?W-cmKa$I=(Blo2Yb zD0NOIQ2UI5Hf46_e9V0-MdBb9YJUec>v?wF={iC2Bd9fpiZNVM{Ag>Jk6xSDvp%o} zT_J}%x)3hmA33mTnX0S1(Wc3gsCFU{V{Y}~4%ZYPT!DOOl8vusjF3c^;*GTyb~GSao` z`y5N%g`wgS3j(_CNO*Vas0V?oJq%semGp5E`dSDL9)6Ywd2! zIv%orvJ6Pm=mKK=d~(s^|jm3~mRfsv+%%k+9 zLi1jfj6k12S`m&MkW1jr$xidIOe9d&3am4M z@JE_bi|~Xub2ka#GZ*8cPJxyTXg+08T1R^Lk(*&C?i4N%wopbOK3gZDsrzy)A_U~u zDP`WrKtd4*cCb4aQ|I$NpgHq|CNS^(M=@{v&CHv3LPJ;LYNtB@PVqW8-^++(UVR8- zJfLK%1?nqY&`APw5Le(F9o=qpY4@BH$_zCe`ybo(8~ z1ySo8sO5!CkwDF+N|AfOWr-J(2beS*LR&={N7tfc?3+2L&SLM@*c%{|hxQ9~M3IZa zXH{EojxuNb?hjr-Z3jDN_ z0zOX{5x-hYmx_fCd6EPc5(_SwB0I@>-6VmBaI*T)gG->Dc(A zj+Di$xng(+=Dykti;Ei46Q*mlc~hF8CG*xJT$dvU{2_FSFjE-t$}~B%BaA3 zqH2|Urgs#Ic;stg(;K092erPHKbX)b|E)lA`Cib3aj94jHtZ>y5#sg~UBr3Gt6cc& zfu?p$F-RprlE4IzsGw@qKb<2D$y-~idVlp00_w_q4uP=X^Q&z-XJ2|gVw2rv;5a%C zN0eb7dc{znn-}1Q9I?$WVgG{aTf-v|Q-!GDznNH_j2rS;A*^q)5J1vzM69UQaXR)= zu{FhtNAXa_OjEu4G}ucENDmi?L8!;%rG1v}ec3q+A$g+=1FYTEV6A!#Q?S^JA@`2N zkf1@^NjeG+!V@;ysqJRfgPAJ|L$GsyMe!I0jI}(LsN{Gc~-e`iqRgIjB3H} zka4_wF@Twp8GlXO2-dfo*~*J~THm^>BGJNIsW(G+z0#%DQ-f53)+Mw0v&*H9%=gQ(7T01LAl)4*5HJ(%+%aJOxsk@=}a_#y+c{E{&^d zZ>Vex7YSOXNb4s^y=#=BTZyepQ0E({X)&bDO5gquzJU`u42sy|YCWx!M+vE09&d!L zY3)l{pIzUl1Wikv@X6NxXLnHqjV6ZRImVJNr-BzEP9I6$_JutvA!%^V`97{zW~@=9 z|CL$4GI{#sq)MizcO%4VNbBeih30~!_F&DBpq*~S2PE6=xW3{r88)Y94f>9zU20p=?c&jv3PGxR)SBnlw!EPU%hbD-xIYLaheMo|a=m23c%%0ZP^PH&=?SajewFCv6iBMkJMq zZ>`kkR%&}%sVzvvJLV08dc#UBzkQ{q=D5>$TLu;Rr%>;l+b!duhA&rh(yq=*80@^9 zsM&1MP)pWEq%B37#vtaA73)Qxs*%(tipQVId1||+Uot!#C>xKOH4(}wO3|-0)G;Jh zoH<^2{wA_G{yK&s0+AlH(|Z4?B)IN{)OiM_6*l=Y#c?Ah+SXs#BEJnv48DQb`z}*p zsc|kfUNf60&11{L-{yY=6Hbw_vdF5{H2)&dwvGLznA%Ldpf=B0;C3A*2nPFL`NZmAaBR zGaO&(G8Xu+vP!)?0$XZzp$F>k(jJNM$6Po{xh%yW zQo27O4=6+7AGpbO_s{p1!G!?~ma=Ln>(up^B2cn-Ts6(kOj!mf!5ShJ?v%RE!$ZZ* zPt>8ieoT7)O0oRzonhTyhY@X|3s_hsyk7?)Oon`gc@Hp@#7?g?vkqe2%R%IbrCWvY zibcz?3=cyP#tQtM^T)J9KwT~wX`B*jd~R=`(h7X;5#yPgv|3D6G4vGWpx5kN*%dl4 z^Tm=|jCkmc**}t~)Ve?K5R6C<^IC8#afFh!a!}U{N`OL727l`N-@E6%8mBtU=8w78 z`iK+Sf$(Fr&fFuuzJZf3(sZnex6*lD1@$g$7HJ$7bbtGo0m~^01uA}x_zEMst@Rj@ z>@D=)StC|$z1;5R9c1?u-bGmeMWZjZ$LGf=nQfbaFVz&PZ!A&U=8^_1R%p?QG^JC^ zNEuZvpn*VS0l8BcnG-cleQ`h-+U`y!?+O)e#n}yMEI?2E3*mHIMRRM|-2}>Z-*0&Y z_JxD~4f{?@Q@_#_kyDf~@{Tb`#J;&InnwR@n>RpjN$w3Q0K%4O;8Dernjs8maQ3RQmxsBOZ( z%OUZFx|FwG6C_m`K9Xslx(vFU=2_y@z|}E5Fs3~!njf$c9yZ>~45#=!1l0x}U&M-) z1NTiuzypQb=spH&1O#Pw11-8nzEb8hpuuD|Un{Cs&P-UYvG{m8C1BNBl*gp zCStepI3S`)uD#Q8^ECi10`x2m9%x%UP$fBBREbs(M9f=WLFq{=#B2}?L0w;D6>td% zHG8?$XeQU(h8);isMYWqvImP8$J6dyn*l0s@R$wFp_}E8FD!8#?gi1fPBdR@3RK?# zb?=k!d}sY?>^VU)cq!5>S4l@*fyDL#oQ~O_w{xx%NG32M;|9xsG@G$Oku4cf#*+`*n45u}2g+9nnbOPT&Pr9UT$)Y8jf|b}We1Xm)T~7^?L`kro0O9lWuGj( znXILkj*bg8t2B_!i|FUw{jI{cv1Mt6&x^#cP%|8>VJ|U^sOXnJ_b7U2#NlR(I512_ zZzX|1{^CAwP3vD6xuy#QfpE-w3*|OOzbp$XuRq~PI#dkOmLg5}6CQMR|GZbEHG|Yc z=qb`5dV(}F_UIR`FY3s;c(z-YKtIZGAgZQNv&Pi*Aen$-y`h>4R~jRLSPoS#3I77r z>7iKl(ZsqnfzseyO4WpYJAO>lE_SJ>sp_glnb67ren{;bYB2WVO5qpK0HgJ!I9xZk zao!A&8y$eaz3rtZ{C-y|2Ld{j7Nul~XGgI*XGQ$#MSG4!2sfTs_A`|?g;4zR^ zcZ$Ec{qP-80=1&rpM%a6vUbbyZK!ldtzV>|&-YN}W2it?Whm&r_4lO8n_~OIYt>$8 zeEkMgaZsCitN?Lz8@6v5?^{rn)L-rFZ2g)+jgl@v#l*dQLDFhpE&CpTEKMsx z6(|Z$c^9?qs)w?J3m<8U$#HT*cQTqUp7~?!E=Hm=MFWlP5osW*5>mIoZW}4m6xe5i z%U=1tS*L|?`XAgDQZtdu&8-GOcw=k;GABoHLRp`emmPmgCNp*!BN?IVzqMvaZ|qp| z;Keb-rV3+tw236tQ8$v7VR?1+-!lqlo8vmAQ+EoO*ALzva!H)$4@s(? zd*k>l!YVBJ+^yu(P^PpcNd++2bSfS1L5l73)M~U({vJ@4E(YthM?VaheiUEo$4Bwo z-#|qcnt2^kF?av(zMLRpeMnXrv0_yz6=F%tK!r7f!Mdw+riRg3TZ$DX`;9$PTy}B| zWhdGPxCjWU?T!>G%y-U#?h@y$zNN{-x$J$L0tHtzYa>~(Ugr=;Qn|21)mP{@EV(^a zbE0}u_w$unGo-2ULUjd{3>9_3^N(#HfXb^ZX(?6HP!iSA$gJf$VpWd?vS1wEzpAGd zmt@YzFoLkg3S;92N9-)NS^>72S1x88v1Uk(kZ^2Ng;MW5@UUpkuclDb8@DV~U9ype z*b*d=v&-w2BZVP?3S~2*J$e96UI~=3*f3@=Un?y+*>kFTJ-y8C&sv!rRr96#>}I*+ zvA@m8!z0u*`Oj40pw|{#FBWBxKu%)*gapFydMzWAl4T{3wXiV*2Q{0`A1P9fDZ_Q4 zalICV!ca5A)fsTrK`i~W8rRn7cH+82KpJ;>*<`QLgCc_?mM>@>%EP(ny&z74xmg`Z zLifYI^3~x9Rm0inszQ3iySt8;fN5%3_f(;g-3iug2ZMXk2~oJmZ4s%6>{FnesCfJ8 zQ2t5bq(1kyAH;A@`H5B_fGeis6IisGq|#X;(7hxRMTHMwD{qfE3y0#*@-m0kU6+I! zdsTya6q|x2T(R_s6z^9GC4R>P&=!DvVF(w3K3KT2vHEB36CcBBDcG*G_4h}L^h9Gk3_#F{Ns6&hWdbOsd}xbkSOGS2;0V|r787<)RUl-8})xPf~`2)F%M(;^m- zRY6N}rnv&ID$J;XT}YCpw;Kw#i9M1Ow7RoxckNPDAPMw6cVi1p)5dS7mQtE@nHIyo zI)j=4IotN`97t)(<>4792%hT$>+5gIwtf86HG;9^|1t=W< zfDCw~S$CM-8-@j&q8B=Z9Ro)x_E@@p$W=xQkjCGGX;wCI47=YcjMx{IZBm@ z8RIqgx#t&qMX(*^=mgdhutV9IgG~)~3)LeD(^}J!t|gTXPnIGTD;IY=F=Az8Sh%>E zn<71$o9_hU$GZ8MIQO84UJQV;{>_|5V#ORPizy4uWj0>1Q-lN@?t{l73xnSs>edu# zbWh`?Rw0Z3xYr#VTE{3cG9*f$W=48boql3M~*=@Q6`1Fp5 zHKZXKV>~9hJaINAwS`rV60b1XiKrzZ3gW)7xIb}lYQumfb+FCiE>rTkFf=HKI2R!(#OFxA55-plK@avzpOFI(1ctb)k)yK%I78iE zqH3?OW9{Omu8Y3&Hhs_87sKvpj&t3^?vbb182WtwkO`{?#V@!Y%=x`GwiHYl$( z!<+Ui`av-L@|3E3;Bq?B+|q~S(C$P4~89P;$SYWHjovsWR>Apa%~@a4{%Vk=AS_MI;tUxOsdh$ zui)-Ds9Xw6(!h33+AR8fbFGvgAthMxu7@LH@ii8=1rort2j1u>6zTX}zw7U^7J=W* zRt>NHwC+j>i?%xIaHy$rJbaHKdt$3<;9=$sVz|a*ifH1HwaCjb_<_;3h&H(~D@D^2 zZB=HXiet~v?0UUtX3NPNnc07GrGRG+3;LR&Z|f&^-(_eaIY(h3wJ)bk8Q`#XH9Kb* zj&H$R-^0Nk>Hbw-hhpY}3^&y{SvfQ6t^n5r?p!mtfwm)pwK-T$+x?v@1*7>OhN|(# zGahKrMmTk6o3X{afE}Kft|`afZDFjv5-vXL*SS6K1v<8S;_*lUN5!%kKr*=2Ip7fh z!Pnkok^}C#)z<+p447}^oI%dhy{C4`v1E92arv>fL12>25T&!Iz~tm1Q+j1voozf` z`EGI)A#cNh%j~qO<^HABop^$0#{9*BVUeF#MquP;69UhapUeDUV6&3xyR-O_zH?4r z$>=lNFP!MNqkotUc#8LtEnm|(Ct2|=rEW9qjAJONs*WAig~B1Iy6xcwJ@>4pZv3~C z&0q~9I{?sD<=p)M-H5JeLNSUfqN)@skjXL+$t7=TS;x!I1NwjaA~yyX3vxC6w@zPK04vutmEo zhTiE-qp`0P4ZH5jRZBH!w_o{Qab6;tK5P?=z?`~f>j7G(IZqEqeFc5VbHUt)y^t)P zmX%Rhs?G^C$6jhAmCDX1j0cv(MuC2XgC}+Ik8aS zoW`jku*Z)DGl~hcG{ z3n^)df!1+B=jqU7US1sUKa3VOL#n!)N~IUp{Plyka5BD(dXcaaL&XWu{>qH|bjOv@ z{l79E0a5%zM1wBZbcX^5l?J*@dYI#5_~`eY+BZDiKg}gM2rMpU+sR=#fO)8>z!n7} z6KD}6{po35$4FRdPl|h*r@L=uvTdK5BmB83*?N3~dP@vu}Wn!eu z@UXnBGH!I&@FH|cMx7$3+D{o~L`Eq0IbOLf06e!7x;)Zkqd;Nx*)EyR@ci5{4S7nO~D>^*KP#Vj>Vh4HsvcNe92h5SA9k`e!#AlQunV>Xv|aeQZW&r2Y)I~K z(r=pxM-`s(Z0)XP_D8I^!-$D&pPm!CEY>Enu{kcT5i3q>lR*uL)bUM(5SqKsXN3gHy&S7`*&N^Mc@ZE5UUv?aZc{eO0*485jUYITMpCx7_gZcD^c3mdPSuH>F#g*u3EMzXZu zXMgvKjx~WYAW%zJ3KdFoMw{j!vS@fX-2>s`JjAvlSk)(|7%NsNnq6#b67DG7p`|NQ z2xM4DHFo_#yWU_8_gh%QaJ@(W2P~xv>K?DX;Mqq&W^@hF9dR0NE(SD_0BTh+XsK`n zt!t0yVS4zk^+w)?rdvo*CeTkBwxH@i5X*BhPOfx8PV0KDa{tL9Xc}{bWa#^vu35k` z*LT?Cf8)XgLdvQ=v0f5m86`0)4s$54Bom!~x0BZdOPmJl^b>oICj6;LseXwOg2Xk>>->Y0xf%^DTP1*zPh)23GanED&9upsV*x(3ulN zQv})W7DI2%dHDcW<2`VQG;teOF;-vb(2 zZB^^Z6lX*tdstartg||>rhkgZ&W77VF0q+5J9`yg<0Py!jFIsnaT58abmzpWgW)^ zr0-<=g>n`BB-U+Z^%I-(DtjYRdqN_WNZa(_P{K%4eZ|?ZBhs344{^X_q)7w0*UQu7 z_tO@iA5Y+o1Of{8B6FJRB)0?!^f_uLNc$XV=WF&*uH1tyb0B9t4}up?;DaR`t5gxm zp|WV_4*Fmf_z};UfMFG zX`ijkslNZykKRaAeSM3jc*ZNzNwh%nxKW;>IZg`@l3KGhMbdS;=G;4E;vNoSzfPNd zEw8AFM(%~_$JPqEi)ZQ1ETqa@SZTAX#Yp3Jn$G2P_SS7E^;*h9b)FXUoaXgW3T zMSZHx0E`puYXzDHBoy_*5*bX@pD9qR!b5>bx7K^vCe6_FBPS(Q6Tv! zTvX&KO|u!*5|!@%x#u!Opj`AupyQ3`$w21@vE>1etCr^~O^^uGwvMq1z zyjYbpO|w}YMAPv_*OK$)WAJdi$KlvFUnv#8H1=^|$!={9XM_4~9s35TC54(AviQu` z7u_aykr_=s$NC7BQl-A3kGoNEm|ht;0Ojm+yR8GcO5dkFg=+IkxiH`ziz8_>ulp;K zWVc@;d>0Ad{YQ7FHf0|m_5#;&rSB2ArLcB{LR=H0Frj615N%Rh6Dp9a7cYi%?3HKj7m!1s95)HKfhtf;~f%L=2t1XGO1m zCVCvy>4sMyO1K%Cd@wOWdB@syKV(XR+1DSYqk)@3McY!v+#gc^4N%kDAz1a1r&mS> z%F?@w!)c9122O;6(n@AR|7y;7Kg)+!N(-^JN1zZ-#G4I5xAJ{w6aw`rJ)%e>fXhz- zA?b8bWD%eH0mx_wFXjRiLcWE7Ha^{panwHq$|;%v5k(tR1%&GZoMGQiWI@s=B(N)` zcZncpEKh+x5jp`PyQqyo&$95eXTuOP$TzX9v3ziVz(+l-tASK{1~n_npeh&|)U)wy z6oNW4|4G%ukwVSRjK;YdJ_xA>y}$NsY9va}sOKXi@oD-oh$BgEE~1XxjD6dHR3}Gn z61S8rVZXLbn`4Tkh!+)lBYi1RbM_s?zJ-yrqQ0D7k4IzgnSH4hv7_4Y&aqO%M}L+3 z77a0r+<-MP#2(9dGA(hfC>p-J{s>| z>&{evcG13$h@&Kx3}>2Uqm`=6z5PA+pe2`AoEkQyLxanU-=sFX8xnmNhYB=5j%xWk z$GTE;6@)@ltlhTL2c4`ReSAwVJ5hzVdNdlu-@Vtob4%%Rge~k+n~?y}6=;{)`Cu;! zX12ret{vz!R1t0Xb-99K8;N6I@v^1qo!x5&ClLDxR_(IgF*&rb_y4-e1?p~vwbMys zk#7^y#-WjRrArx>$^`xR zoM4W|G($l!-7wi-&kSm&pqyv3W;BUrC7KXQbE+%X8 zl0(%D%G*xzMf^Tk2{tlhsP!GuPA`FmgZY7SPM~Z*-#s|4cT%4U6f)+B+>v?jfS7A> zj%`v2KvmouIdw&!ANmK(@FQ={==$e7ONwHoD^_UW z3f;2CL={8S%tYho&#sB3564}=7gffAd;24hH$2Csnp~Mq#p%aLlWf!BHeWC3_OZ(t z$%1rkeCIbe5KtDxrCJ6cv*A*jlJc4^d^gX&w@hBujOBDp9q!P2bQX3_MW8wbN-3HaqG&q18ptWD%gt7zycRlVUVfGBi*q`Bb8R63*9 zsS0OV&yf@d6k!Y0d(B8ynA#lyGJ^;jm!b|p!&vZP1bG23ZgqjU1w7K_WlyTiq6e=*C~_jo9Y%zr?^E_oFTfLlz`ndQJm4Qz%C}!&D`{dO74z5$ zW?fQtv?*PGyaz*lN12LxuxPEQZ$uedVpkRnIGub{loZSBBraZ?7IJfG0D?Gobr0Me zL<9L)G-}q2r>Ys$LcabQ8(Yvj+X^t6 zxGl^?8Jo7cUB2BTVtM(K*+x`BU#Y-?`pJE_f;!%1v)ihsK;9{!1oDcG-rn|I4Um&Q zSJ`juqX|%$aZm|}%1$@(J^qc%ab*d=I;+2&c`VC;vA64 zxk7JUdO)fy8l3__U(oSR?#aPALDnrMn=vpRDnvOD{sa{qVJua?v@XU}Vs@Gql+SPA0B>8&({%VE|d6Kq)q);;~y6^=bj$D8y zBni^c1LAivBTYqu4M8M8+Iu7~mug7!^JD4KFpSSk=IxF>iYG*07@LW_`}-4}YX(+i z8|ga>rP%2EfJbtb5z5F!h|CQuG#G>KlFTlItB}=?eogiv>aO)1{u-AgLh_rj)T=z4 zUP|(%!!TZ8nHyY-7Q1*RQu~4N6ufH6$H-c`aK5Vw#;ovNR)rS@W7ZMPKwK!?F`i7i zj?=Ivuh+Fbxw`n6s?#7Q;Z%geY5Ze%DV@{x;i z?r*qhaYf?vY_crkdAPLm@rps>e7Q_Gnk&u1}at99kf(EPS;CE)FaiEvI)5{sxT52uScQ}Mv2Dj2Ub7vjUN(r zGR?bbc9~P6!ZoniSajh=Q}F{3Ct1d&nv35PRe07$TH!?l>rwwC8DYagx=ln(!ue7S zBT#XqN{v3P)8;dcW|_>LGwn0FhN;P9IbA%ko~%CJaagU(h+q$-r1I|RiW>`ZS?{C2 z@2NwhFPf-O;R8^rabvbIj-9R?1K;~@?her+RK+>rv|Qa4tQlMYGo#k9Nh%BFMPvBw z*q8pH%z{o!LYc2*x@YF>cp35Xh&9~TBl$9Pa1^>rY8R+|4)qCH z!{s?E_u^f2_?dpF7O-@^LTy`4meKIs8bIgQg~?Q8*tXX&*xf+AA!``yigmB}@ORv^ znD!pmYrun6`%kJy+-6uwxml%Nn|kLFaJ$Ob)4(zJ)x2kNA80xwO&x7k;GlYKnmQFdKxEW^bUX=_w08)$Z3EgXOKv{hZiWQ;$xn5^00vc^qp zkl=f)37qkDq{>ais;^icy{Qn|#}Pl3EtC`B+ZR#xnEB{NZk z^ zmm5~d#v3 zMZ4<=$g8<=rxf-SWgi?vO;YwcQ^cmSGNrJ6PGJ!WuVs$6ny^;}xG7+^hDJUYtpYfG zTF=GWwuuh2m2Z8JeuNWanBdg2-dx~Ua#Db+2JZB5Y&e#TI&+)SX875IN!}uMJHjrV zImLGl$-=L^(37)kC{djg8V)4B|jn)P6%hTDc&9*3xom-s9Eat|yY@jsT zt+%DzH}w zfeI&iW@sJIqEw!F2M_i%^M*wSUQ zEe*q^j++eu_S&7>lE|h=)WrqVZ zIZd+Xa;db8+#_5yQrT&XaM7Ss*<8=Ja4{-F)Y(0E3Eb)Q^04Q;`AN+j)^g9#t0{KA<2CB#Y?O+*kt#mLARIf+Y>nuw&x2;EW$cHbDg^Vf=c zeSx9S8~E5}-U{`2tg?K?eFT0T3TFqPD+4musKa+y>IXpO5hzf!dZ0=*1rTN_e6T?4 z?*g<^?tM|o|DAvw>u$|K_iolHMQ_$jFeBA{a_2dHLMTDAPM`LGU-*%RW z*=-`Gw@D!>w+fW*DANPfiRwE@H`HPYU1EBZ@`e;HyKlR#Iw3@=D?k(AbO?)cpuYoi zhj!*L^%EE~9pnkAUlX?|(l#O$^ZsWEs(kYu!PK9JiGr|NoQ3oFEZ!c-1m7gOF>-F! zwM0+{leBZwb^kwZAq+I_D&MOUIu`e#z1D<<@d+q&dTTXfAjUOyH`JfSUg z{5W~+UTxoA2;94dpJIh_kb9xa#$;=x3;Pw{xaUas_yc0fhu4CaVWS+N2^+1CGTc26 zMFisRs>=x10^BNaZ31Wg%NSiDIKG^s4sc?tkdD`LvL68-Yygw)gd>jK-S2MgK>=hz zwOXYp9OeY{_uaC8gi8~i%*wqFM;15`CnCXcnvH4B0ZY(gaW2hG;LFC(ggAN}Nh-Ri zQPgiet zIRajj4Y5wt@`%?wo;vCYGX;+f5G~+t6UA)v6T1oweU}L4hr&eVk+qtRBS^e9W0 zZ3JIVRw?~|R0DtsqWCmDo+!JNjs4H;z!(D-%h!iB%Z->~8_P$I3$Ju0Q(1_MPLPBEZjsM4z@6{mc7oeS zxSYDaueTJ=CP&^-S97>Bz;zXuGxG!LR#K72cbK^zDV+5L)(CePxILq8&EQZGl2?C? z%&arGbA*#;T4uh7Gr0oz?zuc4qHR;!^4y?Mrk6C95i68-65{a+*O^FeQt4{C0qOsA zotr7j>Y875uso6meW7The}@%KvuJjY#r~UxrtYbwaKY^cUWeBD8U&nBirl@Dl2K<< z{$o~)8#fkOKWsHb)^?2lpj|ag?F{*UhO@PEzBYWbMY6YXP7?+%0gKtlW)?98J0eFt zSbfD)kHEUcD2&9eR>=B9*<`!f(&iLSvypg(1eJGiwp1h}2suJ?QyGidB&-Hb(!95o z(aHpEvb#On`3{X(*;!Ikw8vTsTeZ|Hw`XWnMQzab3@!9USSXljD@o*76Q}~InjZpL zmy|G79Wi7`SZpXXst9aTtg2WKOw3tmv3kaTx?n}bki;X$Qo4pi)CSagLI`mXcuP*d zm|3WR^s0YYu3?0aud+GcLYaw*Dbz5Otwt;^AX=gKX^;ureLiHCN2)k^XXCqrMQ$}? zA8p)JHeAMGHCh;>2fYnBM>8&)wf`lOWYDxu!=%G8=(?q}tpl2=uW?OccXR!zXvNaj z^e7{=lG0`q4u_}h$9J7kcgrLCFJK09eN9n3IzW5wW6SX5*_Ws64#e1fc2`b!^MkP3 zNL$hJTbb(I9Bpxcg{hWajPUpOC>-r?Pp?3B-2&L)OU_Auy?~7*15EQ91K8=o$oIV; zEaslaGAh61q}4fTbF@8||MLN@7}}b+PzFsS*PAg`J16ZWi3ZO2P-Wm{HPo%WEh6Au zDRsR_F|Vq(9^w!uUmV;u!bxmmQ^T0L;+D5?F~dRIjV=E=!bRg?m-j8#Cf?i^pv?f? zdIa(B{=aa??h?ofXIiEshr~^BWLu7$B+d&{Y2p*}c!POdm^9?e4dl!5W(I7SYBLOOI?dhgnWix~vHv1D}*iXQOx{ zuc^n+5Jgj^WgU!u(CjH`)+&yNw$sRWSy4Ad2+bwZ32l@2EjLJ>(A=)Rv+5CTo1(2X zMYF6tb2K&dK11sYa(zIv3UY{8tK3{#lF-rCfCd3s=V&@5>>U&iVvKz{ofWy6s8&~X zYr54mgdF==$f{fCSb>Y1W^F>a+F`kp1Vb8j%=R%H^?7#&=Ni(4e8Rhx_qK(*U(w^# zUzr3t!8dt5Lek4CWE*F)RRg5B?7XlhfZdZ~SvYNlhpNp4(VEF&`EEoA);>o(JmRUi z*Y+u*qY-Oh%VT@J`!0yAR8G^b33uz9aPHZ)L1_Idq%0}a9dR`WdM=2U# zT&<4}1A1|C;G6g07xd3}_qi^vSph?xVEZ3Zcqh7&d%Vo1jt$bJEWzXBzUr<=T8-Lp z`+}>EdP;apv6{!4gyQe}k0X?*WmJw)JylOYT%SOVWNlw81`$h4kCCi1!}@ZVwvZoc z6$<3yFdZBuxkTeeLSZmf$#mO+&+L{0by>7E{1&KB-sn!$OP+(KU>aZp*9bN!sG6KN;TgT);`0! zIK(+8tFPMm0aTc=bwlA%jmTODsAi#h{Yx)a3CHDIq9pE82&oK6)gehm_xgsW*zwQP zYlxW6Io#C<(xANGQ*YnYa2O8YU9&Y${>W=r)sECvwa9R%l?Q^9!i}Uqy(q$gF))0#Euy-d%(IzWl7+!0H>{m zEvL>-#Tqs^sAFBZd`Mik7oUhTg&HFoh^q;0aStrAaxLF*wm4+>x|x+1qpj9>2b@q~ z+Gqf}rMfbTNeLloKH~1cEsR2q83imd61t7gd!P+iChB2A1n^y}#J|_CVX8QDtLx5F z19rioO~BwKIkdx|;bT`p6Hl`UT0iRgySHF4OX`fY6l`-~60*L|&nUB4=R2%svAR|} zB|Dm98L6~wLX_mY=1@Dy51@9&+dWx4n5uZ%ROTT|JbI-fnn#-7)lF|UbHD;BK}H#X zWox@c?!mpqNc^oA;iJQ(@3y$;c3Zx=R2IZGZ zfuMlaFS6Sz<-+KBEa!`oWA2tKKLZ5Q^TmzgR4H~QK#;(W%p{Vh%qxs!0Z?vy*d+V%$LJw4?O&hN#y36Aj1RF6F1 zyysiMxg5?x^>ajz=iB!C;jB;e{4O|Xr@5YROf|v>I1{-h&9gl*&H^28f%Xh&<@-+> zmtNwG=smFX$PvgD2E3!lN_L;7^C9crQxwKIIHi4`L<2<<_55uVIiM={TBSgl&tb_1e6m7R`p58v+k`e9Ozsah2@t|;==NMJaX9pSeFL>W zLebR%1m%1HqH2-XQ25I{w8^t%cpwK$stNcU!b2?rgII3D$uMInLkorWkR__@TrxB* zMn40Zzo2P#zFWpdC|y*r7m|&feu^!UP!BObeju2(?V1{Ptpla-4}6;s{leT9rBn5Jo8W)7x8r@%~EDOekik zHUOh|Q1b{Tku92Oy2^WBIc;aJyLgP5OKVGUSKeWCCyDdvX(+Uw_ic`4d?-6|*B0xw z36_z-rk;Q=K2NY}q^lU2&#;a_m#W~peni&uByyW#)hD`my-o_U zHixwstfpAk6l=o6CGs%P6?tgrI=)NS;^;aCy3T<05nVN3FJNu5zYq}tnDhY|Fsu^5 z-U;Fxuq_8OD@MDyK1;`Bhgs>M$q8@J3~6nOmNsxPXLP+Vy=&vC5}d`**PF8@h%z!tw?yu9xx3 za;!MgW{P#ZrRy!0mJEkG4DhRkq_9wxHMPa}b2vbq0_EEgMx=d2S|s#}gr>;7cSv3i zg5)iP!xhRX9utaMigTnmVcgIR=@^kT)Ekj7o?y>IA|<8Nh{q^hDK& z#TTQESX;n)t7*3x2l=D1Ody|nO_7wUBLngwF9WvP%H8fP_e67t5BD$FfL^x%Qj%P) zq!)IFCB=OPD+Gwll>2Z`Gh^*xsU}!kLRV~P!}}hzNfml>^}-gH>mW3?1rLRbv6M^U za@*puJdWjW3Kv{+x}2cz@0_}ak%xe*5|m2dL_N^xfK6CBp>CW)r~*p2k1*Uu4J~`Z zOm~kWfxwr?S8-J)QxZhbt}Q0BQqrisdk>$>9F z@u;TdK{AP%1}@c)E?(3{B}{&(GGt<f0G5$@5@)}D_zT&=kz;(}eTl$}HomgT-y4$nncrM+ye5LDP9 z@4aipP+=@)gyNdzLNQG&OQ=fH9H#Gqm?3Q$k`8{Ri6DaiGb^em_Pkv`)}W zFwRmWPPXX*NN6py_&*2PC>RT9K|4DR3{&kL&7)_;+Z& z-g_zpRh7VZ{Ck5{c;;E+>WyIe_BF$U$`D+(h4Q_uu=5idkb_=8gITJ&>8L51F&JZ^ zMHt3Z!9v`^=dJEtZ5vp^$C-i+m)VY2Fl}007~!-B{4}&}yb7Pd8R8ld$KlqDxDl;K zxJ!m+FEKP-?f}DYtHCoR4aW#2g1h*n%a(N02TR!8)Wvn{fa~e800#>DsrH_eXXNB! zWU{OhiX}XkgWF@h#gX%JfLkYUo9s3Nrz3X=524>{Dk;Wu_ZO4Uy=4?%xo_G$%<1fB zmI7sUgV$0~EI`VLF#|NGuRi*)~^J*UfvV+jqA zh2hLt4h$meO0(d&GjL|WLzP?c5;j@`-?!BDKeeN~BHrfkPJ?%|(u@bl08g*FxQEXU zzZ?Kh2Wa>injyajxXl6G0%Ag1K_yE`0S#Poo522#J_kp4k9W5AIwjM8zz-dV%7v>Z z>@cX_21!SFR?wnRkcnpWO_RhvmYbptH_O;U z;XD70bpvg^v^4BCgH}_thk=oPuuxCzqkOdohbvc_7`UqMt9M5|akhug3?IZNK1P4L(k?IeX{-^Qu|ON&kNEnqfUn3~6O%Ix=bEtoZw|9HwqMeRIv>jip@K0GYkfvTf&U@oX?k$+;9fj)ZODJgAZ5oIV!`qvoZQ^{N z4vUN;V6E=xrigg+SOaBdUk}Gkmn^AbC|EASV&S$}tJ0X3mYX#jOcG!j9Tu=e>5?>7 zmBw|7)eH~AluyMX&*0n>tQIAIj%8(W@TWp81CF(o6XX_4c{pdGCCflT6|Dkg?rJ9O z7qB+r=lOG3{EZFsO$e)93Dzn+{7I}j$0|R8m8y>DBW_3|U9b_ige<(mReVENEW;x# zE;VTIFW?4B7gkPI=vMk*ds!?gexe$@Of;&{721_Xx`t!SJ+J3?<=fhsXTXZ;;4o@) zBy0j!iHR&11m@BdRnv8E!O~Jjp4O<>5iVTCz6hLfi$)_LNZPFu=1KbyuKF23tM&sZ zoREMc4BjrTJ1=kG;vrpGS+zNJ5w0oR@=Z8lCYP6&cg&Ha$*As?aYjDh-VxT&%8Y4f75Nvb7@v8GfxQy9N% z2C)@E@<*pZLU@ZEn~#uE{;dMFyL1Miu;=@#J@ap-I?OE8Me{woD-5yxYm7QTnZ*hA zTijfDsE9HK%AOwW6yiTb8wTdOx{z+4(am|T{npZk)9B3Lnjzze;q zDwAfWU0k;y{@7`YNak;m|1NS6$V`2ho#q8{tb0uBD98a^@9ur&WP!uPP?W;f58;D5 zf~8sWhdPvUj}b-e88Oq2CDs)xBuO<(WT!+dQvwyRI6n_o98|ys&(cGg5#|I{Qm7dR zfy5V*os{&gnkfuKncjU(SWY@OQli)hXjD*GwPZ*mwg^&bKqRePUZ$1omto&sO0gOj zZ+Fi6(2kNLO@-nzG<7pNB1M{-;N{{s?E|n#w+S^MwSa_}uxoCcAcgTli{W01lymP5 z(lGSTU@=wed#rV`QlvP_5DJc1ES?*U{D&v`z`;3I`4nsY6l>&RMwcIJ!uHTMD=Wy)o9k6cIG+^a>YmRk1V`*Bheo^4H3Dy#@s!0CvU93uUA=Xq$ z4;FJ~q*&bcIAYoT73Oc?qvM;mCq#5D2^K7wB6Q_ER83f)kgg}HEK-|tu+5NQJse6? zM5^CJvYCJcNk*?HOVz#!Rc26|hicTr4JbeRe}KwY%3c5yc5yLlwTpQ{Qt^ChJ?yLm z$=#lCr>9^zz(z^?uXcH#%eo?^<{8s;jYyHEcSy7I(GBjpJ4EQr>PS169b1~-=yo@( zK0(4a*E=hP5L1Sv(vV##JRoH_9LTF&HVzyY!6clIWN)@g%)7fKYSTiWC=Vs@&YPPO ziDe)tTy+&)nz?XY$FA8{^;2yuv`0x@l5K)C4CgrrsSZez&!($1fYb|kuAf1oD#E+2 zFCd*MQm1KoL^^ZsaXVf^NE1lSS>z6>mosfMBv*eh3>@Ni(0z_Y30G*V;}fha($$<) z90S%Guy&zqE)@=IDi;OC%)1umZYkFGC>O3`WQx^LSA??9j&%mCcao7*vkoi9osB0( z=mITV`e|28(9&HzlMZ||p50L(^M~5zamAD}-ym0=5_FY{lls!TGXq1tU~_UTtv}MH zUa^;tj50Srekc}MjO5=3aYWNjBt^sZHXhrRvTt=QMOnvRy(p&%fx08jV!QiV3%a+0o8=yoqKA6jh#_3Vo)E2E} zXakl3+YDGQP}~Je)JQQ{c;6#ynfwp?+Fi8455t4&0JcuS=I3*+AGI`vNuyERyPo-x zjmrmU25X;FwhvhQ6zh6~3d(VWItQpjpbo>t{Vmj#D%Mvm<>0tppF$;6aUIe24XE=8 zD$LzfBvu_{e4teVmxuE69Mbv@DeB)yR6kgHseO{^B6$E*dkd9Fg{{N7GbnSYHBgnF zEbpOKf;#3<%Mqw5PJNIlYu6O79AoWEF%Pw~N09Z?Hf?%hXb1lm%d!4uE9H>z6( zI({1HG9YHCf$c;XGP$O?89Yb4NRn;c=OrJ-o&9PMyu_AbxQ&|Ig-chUCG(Y6`dHAmY$ zno%D04O-Gx(rt8`1ntEGXe$Xq0=TBc`K0TVmv*uUwXv8U!&Qh|ot6L^j(#aId%tko z2zE}v_5_Ud{zqW$+B_Uo8oTWFueks|P^KAjF>Rig&4C>O441p=iFtT1yyZ6K?{HDD zjMnOZMt8B*C$K0X=Kyz(a8W`AIB{Eg3Yp};3+yt7vsJzhZctnq+#+zegQtj+T)u|} z;*L3*p46R}m+ZD$K0@0^+O|(Jci8qAQYzxI{h zjSXMD3Jd9)Ir*Cb?1--x%xt!2xiBk1h6$w*7=8MH zS^sY5+GeY;5Oi!t=)Kx-Q7Q~*%P#)7BP#gSM$^fvrgmkwQgJqla9CJ!_GM{vxxT9~ zB;sAzL7{BlaIuvEio}s_Ba7{IhOGKCWG!>7VY0ZWjC)6|oFSsi85aT5qRQR8a)iv- z`QJg?e=jtzraaP}_oab5+n2fAkJy$X*~bn*>9rq}qCeO}W7#00mIP{(_sf*19IG*w zj7o@Dk%5~d>k%t0J)ohHZ|GVBUGv4tu@=T!W>`vBjfg*mwSQ5YaecyS0c*~~IR`15 zh=;sXYdGDIBBeq&7mMvn&%$>=3ik{YD8eUOVb|Z$h~JGO4@6B7D<3E2%Y^(B@$#?8$mQk-_3BHc#XYN9~fnIN5lw7x@%GRXC=2<_Gb$1|3Z`fX%j75!AHMFD3!4bo*KU?XgIg{LR7hlw`xxGC(+_%Kpl;%^cq44zk#-- zaXDbI99B*mVKs{OfJh9c84p|r@jnJr{IWU7QY)e!aLT%k=pko4reE%^5#SQ!^(3S!1aPHSnfIiX`@d|*B#dV?uTdML& z+ytuytoDIK#0fSX3ddtXlCkpSWHX>gYn=$H>U;h2P*WTCj)OxZj6ho4X!M7@yKYKS za8ub|!k^H5cL4S3d#y%AEa8;*bqW-(PUt|V18NUIO-1DmP)R5v)^W0ZqiJ(sU5|PT z>zY|2Cnp`q#(IT)QIpfrzDjcexpJ;_ha5)TOUrkF@CVfBv}tq>wyTeRpXT)uC}$nh z)R2fSG_wZh;p$v#Yt)KN0h z(Xbr-xlk`dX!prPnmyW*LK4)DbL1ZS_m-;iAyr$1f^Z(NV|x;?7$~Qkh$>~Ei{9o` zMVczS?L(S$p~w#xyrp^O%OIE4Nne>k$uW#_ra~cVjCk_%jxm4%($|eB))F5=T@2NA z#tn5S)WKTHP(@H&9P|X0dfH@M^woMt)ge@QC}QX2mhcX?zRaoL4Hr}KE)zVB)P;S; zV)6e0P)h*<9s?8r000O8j* + + + + +classification-binary + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + + + + + + +
+ + diff --git a/examples/classification-multiclass.html b/examples/classification-multiclass.html new file mode 100644 index 0000000..5f73f78 --- /dev/null +++ b/examples/classification-multiclass.html @@ -0,0 +1,10036 @@ + + + + + +classification-multiclass + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + + + + +
+ + diff --git a/examples/clustering.html b/examples/clustering.html new file mode 100644 index 0000000..dc09720 --- /dev/null +++ b/examples/clustering.html @@ -0,0 +1,8430 @@ + + + + + +clustering + + + + + + + + + + + + +
+ + + + + + + + + +
+ + diff --git a/examples/histogram_analysis.html b/examples/histogram_analysis.html new file mode 100644 index 0000000..fff86ba --- /dev/null +++ b/examples/histogram_analysis.html @@ -0,0 +1,7899 @@ + + + + + +histogram_analysis + + + + + + + + + + + + +
+ + + + + + + + + +
+ + diff --git a/examples/regression.html b/examples/regression.html new file mode 100644 index 0000000..458ea93 --- /dev/null +++ b/examples/regression.html @@ -0,0 +1,8972 @@ + + + + + +regression + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + +
+ + diff --git a/pyproject.toml b/pyproject.toml index 1463c3c..0b12bd6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,3 +1,23 @@ +[project] +name = "simple-data-science" +version = "0.1.0" +description = "Simple examples for common Data Science use cases with tabular data" +readme = "README.md" +requires-python = ">=3.12" +dependencies = [ + "jupyter>=1.1.1", + "kneed>=0.8.5", + "matplotlib>=3.10.1", + "numpy>=2.1.3", + "pandas>=2.2.3", + "pre-commit>=4.2.0", + "scikit-learn>=1.6.1", + "seaborn>=0.13.2", + "shap>=0.47.1", + "statsmodels>=0.14.4", + "xgboost~=2.1.4", +] + [tool.ruff] line-length = 100 diff --git a/src/classification-binary.ipynb b/src/classification-binary.ipynb index b6cc044..af9f07f 100644 --- a/src/classification-binary.ipynb +++ b/src/classification-binary.ipynb @@ -14,32 +14,54 @@ "cell_type": "code", "execution_count": null, "id": "1", + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import warnings\n", + "import logging\n", + "\n", + "# configure logging\n", + "logging.basicConfig(\n", + " level=logging.INFO, format=\"%(asctime)s [%(levelname)s] %(message)s\", datefmt=\"%H:%M:%S\",\n", + ")\n", + "logger = logging.getLogger(__name__)\n", + "\n", + "# get warning filter policy from the environment variables\n", + "# set to \"ignore\" for rendering the HTMLs, or to \"once\" otherwise\n", + "WARNING_FILTER_POLICY = os.getenv(\"WARNING_FILTER_POLICY\", \"once\")\n", + "logger.info(f\"{WARNING_FILTER_POLICY = }\")\n", + "warnings.filterwarnings(WARNING_FILTER_POLICY)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2", "metadata": { "tags": [] }, "outputs": [], "source": [ - "import os\n", - "import logging\n", - "import warnings\n", "import numpy as np\n", "import pandas as pd\n", "import seaborn as sns\n", - "import matplotlib as mpl\n", - "import matplotlib.pyplot as plt\n", "import shap\n", "\n", "from sklearn.preprocessing import StandardScaler\n", "from sklearn.linear_model import LogisticRegression\n", "from sklearn.model_selection import train_test_split, RepeatedStratifiedKFold, GridSearchCV\n", "from statsmodels.stats.outliers_influence import variance_inflation_factor\n", - "from xgboost import XGBClassifier" + "from xgboost import XGBClassifier\n", + "\n", + "pd.set_option(\"display.max_columns\", None)\n", + "pd.options.display.float_format = \"{:,.2f}\".format" ] }, { "cell_type": "code", "execution_count": null, - "id": "2", + "id": "3", "metadata": { "tags": [] }, @@ -75,23 +97,10 @@ { "cell_type": "code", "execution_count": null, - "id": "3", + "id": "4", "metadata": {}, "outputs": [], "source": [ - "logging.basicConfig(\n", - " level=logging.INFO,\n", - " format=\"%(asctime)s [%(levelname)s] %(message)s\",\n", - " datefmt=\"%H:%M:%S\",\n", - ")\n", - "logger = logging.getLogger(__name__)\n", - "\n", - "pd.set_option(\"display.max_columns\", None)\n", - "pd.options.display.float_format = \"{:,.2f}\".format\n", - "\n", - "mpl.rcParams[\"font.sans-serif\"] = \"Arial\"\n", - "plt.set_loglevel(\"WARNING\")\n", - "\n", "# plots configuration\n", "sns.set_style(\"darkgrid\")\n", "sns.set_palette(\"colorblind\")\n", @@ -101,7 +110,7 @@ }, { "cell_type": "markdown", - "id": "4", + "id": "5", "metadata": {}, "source": [ "## 1. Load Data\n", @@ -116,7 +125,7 @@ { "cell_type": "code", "execution_count": null, - "id": "5", + "id": "6", "metadata": {}, "outputs": [], "source": [ @@ -128,7 +137,7 @@ }, { "cell_type": "markdown", - "id": "6", + "id": "7", "metadata": { "tags": [] }, @@ -138,7 +147,7 @@ }, { "cell_type": "markdown", - "id": "7", + "id": "8", "metadata": {}, "source": [ "### Target column\n", @@ -154,7 +163,7 @@ { "cell_type": "code", "execution_count": null, - "id": "8", + "id": "9", "metadata": {}, "outputs": [], "source": [ @@ -169,7 +178,7 @@ { "cell_type": "code", "execution_count": null, - "id": "9", + "id": "10", "metadata": {}, "outputs": [], "source": [ @@ -180,7 +189,7 @@ }, { "cell_type": "markdown", - "id": "10", + "id": "11", "metadata": {}, "source": [ "### Train test split" @@ -189,7 +198,7 @@ { "cell_type": "code", "execution_count": null, - "id": "11", + "id": "12", "metadata": {}, "outputs": [], "source": [ @@ -204,7 +213,7 @@ { "cell_type": "code", "execution_count": null, - "id": "12", + "id": "13", "metadata": {}, "outputs": [], "source": [ @@ -220,7 +229,7 @@ { "cell_type": "code", "execution_count": null, - "id": "13", + "id": "14", "metadata": {}, "outputs": [], "source": [ @@ -229,7 +238,7 @@ }, { "cell_type": "markdown", - "id": "14", + "id": "15", "metadata": {}, "source": [ "### Scaling (Standardization)" @@ -238,7 +247,7 @@ { "cell_type": "code", "execution_count": null, - "id": "15", + "id": "16", "metadata": {}, "outputs": [], "source": [ @@ -274,7 +283,7 @@ }, { "cell_type": "markdown", - "id": "16", + "id": "17", "metadata": {}, "source": [ "## 3. Exploratory Data Analysis (EDA)" @@ -282,7 +291,7 @@ }, { "cell_type": "markdown", - "id": "17", + "id": "18", "metadata": {}, "source": [ "### Boxplots by Target Class" @@ -291,7 +300,7 @@ { "cell_type": "code", "execution_count": null, - "id": "18", + "id": "19", "metadata": {}, "outputs": [], "source": [ @@ -308,7 +317,7 @@ }, { "cell_type": "markdown", - "id": "19", + "id": "20", "metadata": {}, "source": [ "### Pearson's Correlation" @@ -317,7 +326,7 @@ { "cell_type": "code", "execution_count": null, - "id": "20", + "id": "21", "metadata": {}, "outputs": [], "source": [ @@ -331,7 +340,7 @@ }, { "cell_type": "markdown", - "id": "21", + "id": "22", "metadata": {}, "source": [ "## 4. Feature Selection" @@ -340,7 +349,7 @@ { "cell_type": "code", "execution_count": null, - "id": "22", + "id": "23", "metadata": {}, "outputs": [], "source": [ @@ -369,7 +378,7 @@ { "cell_type": "code", "execution_count": null, - "id": "23", + "id": "24", "metadata": {}, "outputs": [], "source": [ @@ -384,7 +393,7 @@ { "cell_type": "code", "execution_count": null, - "id": "24", + "id": "25", "metadata": {}, "outputs": [], "source": [ @@ -395,7 +404,7 @@ }, { "cell_type": "markdown", - "id": "25", + "id": "26", "metadata": {}, "source": [ "### Correlation check\n" @@ -404,7 +413,7 @@ { "cell_type": "code", "execution_count": null, - "id": "26", + "id": "27", "metadata": {}, "outputs": [], "source": [ @@ -418,7 +427,7 @@ }, { "cell_type": "markdown", - "id": "27", + "id": "28", "metadata": {}, "source": [ "### Multicollinearity check" @@ -427,7 +436,7 @@ { "cell_type": "code", "execution_count": null, - "id": "28", + "id": "29", "metadata": {}, "outputs": [], "source": [ @@ -443,7 +452,7 @@ }, { "cell_type": "markdown", - "id": "29", + "id": "30", "metadata": {}, "source": [ "## 5. Classifier Model" @@ -451,7 +460,7 @@ }, { "cell_type": "markdown", - "id": "30", + "id": "31", "metadata": {}, "source": [ "### Select classifier: Logistic Regression or XGBoost" @@ -460,7 +469,7 @@ { "cell_type": "code", "execution_count": null, - "id": "31", + "id": "32", "metadata": {}, "outputs": [], "source": [ @@ -475,7 +484,7 @@ }, { "cell_type": "markdown", - "id": "32", + "id": "33", "metadata": {}, "source": [ "### Hyperparameter tuning with K-Fold Cross Validation\n", @@ -488,7 +497,7 @@ { "cell_type": "code", "execution_count": null, - "id": "33", + "id": "34", "metadata": {}, "outputs": [], "source": [ @@ -521,7 +530,7 @@ }, { "cell_type": "markdown", - "id": "34", + "id": "35", "metadata": {}, "source": [ "For the full list of scikit-learn's scoring string names, refer to: https://scikit-learn.org/stable/modules/model_evaluation.html#string-name-scorers" @@ -530,7 +539,7 @@ { "cell_type": "code", "execution_count": null, - "id": "35", + "id": "36", "metadata": {}, "outputs": [], "source": [ @@ -547,7 +556,7 @@ { "cell_type": "code", "execution_count": null, - "id": "36", + "id": "37", "metadata": {}, "outputs": [], "source": [ @@ -564,14 +573,13 @@ " verbose=1,\n", ")\n", "# execute search\n", - "with warnings.catch_warnings(action=\"ignore\"):\n", - " result_cv = grid_search.fit(X_train, y_train)" + "result_cv = grid_search.fit(X_train, y_train)" ] }, { "cell_type": "code", "execution_count": null, - "id": "37", + "id": "38", "metadata": {}, "outputs": [], "source": [ @@ -586,7 +594,7 @@ }, { "cell_type": "markdown", - "id": "38", + "id": "39", "metadata": {}, "source": [ "### Final Model" @@ -595,7 +603,7 @@ { "cell_type": "code", "execution_count": null, - "id": "39", + "id": "40", "metadata": {}, "outputs": [], "source": [ @@ -623,7 +631,7 @@ { "cell_type": "code", "execution_count": null, - "id": "40", + "id": "41", "metadata": {}, "outputs": [], "source": [ @@ -637,7 +645,7 @@ { "cell_type": "code", "execution_count": null, - "id": "41", + "id": "42", "metadata": {}, "outputs": [], "source": [ @@ -647,7 +655,7 @@ }, { "cell_type": "markdown", - "id": "42", + "id": "43", "metadata": {}, "source": [ "**Plot target rate per group of predicted probability**\n", @@ -658,7 +666,7 @@ { "cell_type": "code", "execution_count": null, - "id": "43", + "id": "44", "metadata": {}, "outputs": [], "source": [ @@ -667,7 +675,7 @@ }, { "cell_type": "markdown", - "id": "44", + "id": "45", "metadata": {}, "source": [ "**Define optimal threshold for separating classes using the ROC Curve**\n", @@ -678,7 +686,7 @@ { "cell_type": "code", "execution_count": null, - "id": "45", + "id": "46", "metadata": {}, "outputs": [], "source": [ @@ -695,7 +703,7 @@ { "cell_type": "code", "execution_count": null, - "id": "46", + "id": "47", "metadata": {}, "outputs": [], "source": [ @@ -707,7 +715,7 @@ }, { "cell_type": "markdown", - "id": "47", + "id": "48", "metadata": {}, "source": [ "### Feature Importance\n", @@ -719,7 +727,7 @@ { "cell_type": "code", "execution_count": null, - "id": "48", + "id": "49", "metadata": {}, "outputs": [], "source": [ @@ -750,7 +758,7 @@ }, { "cell_type": "markdown", - "id": "49", + "id": "50", "metadata": {}, "source": [ "### Performance Metrics" @@ -759,7 +767,7 @@ { "cell_type": "code", "execution_count": null, - "id": "50", + "id": "51", "metadata": {}, "outputs": [], "source": [ @@ -777,7 +785,7 @@ }, { "cell_type": "markdown", - "id": "51", + "id": "52", "metadata": {}, "source": [ "#### Confusion Matrix" @@ -786,7 +794,7 @@ { "cell_type": "code", "execution_count": null, - "id": "52", + "id": "53", "metadata": {}, "outputs": [], "source": [ @@ -804,7 +812,7 @@ }, { "cell_type": "markdown", - "id": "53", + "id": "54", "metadata": {}, "source": [ "#### ROC AUC" @@ -813,7 +821,7 @@ { "cell_type": "code", "execution_count": null, - "id": "54", + "id": "55", "metadata": {}, "outputs": [], "source": [ @@ -822,7 +830,7 @@ }, { "cell_type": "markdown", - "id": "55", + "id": "56", "metadata": {}, "source": [ "#### KS Gain" @@ -831,7 +839,7 @@ { "cell_type": "code", "execution_count": null, - "id": "56", + "id": "57", "metadata": {}, "outputs": [], "source": [ @@ -843,7 +851,7 @@ { "cell_type": "code", "execution_count": null, - "id": "57", + "id": "58", "metadata": {}, "outputs": [], "source": [ @@ -853,7 +861,7 @@ { "cell_type": "code", "execution_count": null, - "id": "58", + "id": "59", "metadata": {}, "outputs": [], "source": [] @@ -861,7 +869,7 @@ { "cell_type": "code", "execution_count": null, - "id": "59", + "id": "60", "metadata": {}, "outputs": [], "source": [] @@ -869,7 +877,7 @@ ], "metadata": { "kernelspec": { - "display_name": "ds", + "display_name": ".venv", "language": "python", "name": "python3" }, @@ -883,7 +891,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.12.8" + "version": "3.12.9" } }, "nbformat": 4, diff --git a/src/classification-multiclass.ipynb b/src/classification-multiclass.ipynb index c66f3f1..573ed99 100644 --- a/src/classification-multiclass.ipynb +++ b/src/classification-multiclass.ipynb @@ -14,33 +14,55 @@ "cell_type": "code", "execution_count": null, "id": "1", + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import warnings\n", + "import logging\n", + "\n", + "# configure logging\n", + "logging.basicConfig(\n", + " level=logging.INFO, format=\"%(asctime)s [%(levelname)s] %(message)s\", datefmt=\"%H:%M:%S\",\n", + ")\n", + "logger = logging.getLogger(__name__)\n", + "\n", + "# get warning filter policy from the environment variables\n", + "# set to \"ignore\" for rendering the HTMLs, or to \"once\" otherwise\n", + "WARNING_FILTER_POLICY = os.getenv(\"WARNING_FILTER_POLICY\", \"once\")\n", + "logger.info(f\"{WARNING_FILTER_POLICY = }\")\n", + "warnings.filterwarnings(WARNING_FILTER_POLICY)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2", "metadata": { "tags": [] }, "outputs": [], "source": [ - "import os\n", - "import logging\n", - "import warnings\n", "import itertools\n", "import numpy as np\n", "import pandas as pd\n", "import seaborn as sns\n", - "import matplotlib as mpl\n", - "import matplotlib.pyplot as plt\n", "import shap\n", "\n", "from sklearn.preprocessing import StandardScaler\n", "from sklearn.linear_model import LogisticRegression\n", "from sklearn.model_selection import train_test_split, RepeatedStratifiedKFold, GridSearchCV\n", "from statsmodels.stats.outliers_influence import variance_inflation_factor\n", - "from xgboost import XGBClassifier" + "from xgboost import XGBClassifier\n", + "\n", + "pd.set_option(\"display.max_columns\", None)\n", + "pd.options.display.float_format = \"{:,.2f}\".format" ] }, { "cell_type": "code", "execution_count": null, - "id": "2", + "id": "3", "metadata": { "tags": [] }, @@ -73,23 +95,10 @@ { "cell_type": "code", "execution_count": null, - "id": "3", + "id": "4", "metadata": {}, "outputs": [], "source": [ - "logging.basicConfig(\n", - " level=logging.INFO,\n", - " format=\"%(asctime)s [%(levelname)s] %(message)s\",\n", - " datefmt=\"%H:%M:%S\",\n", - ")\n", - "logger = logging.getLogger(__name__)\n", - "\n", - "pd.set_option(\"display.max_columns\", None)\n", - "pd.options.display.float_format = \"{:,.2f}\".format\n", - "\n", - "mpl.rcParams[\"font.sans-serif\"] = \"Arial\"\n", - "plt.set_loglevel(\"WARNING\")\n", - "\n", "# plots configuration\n", "sns.set_style(\"darkgrid\")\n", "sns.set_palette(\"colorblind\")\n", @@ -99,7 +108,7 @@ }, { "cell_type": "markdown", - "id": "4", + "id": "5", "metadata": {}, "source": [ "## 1. Load Data\n", @@ -114,7 +123,7 @@ { "cell_type": "code", "execution_count": null, - "id": "5", + "id": "6", "metadata": {}, "outputs": [], "source": [ @@ -126,7 +135,7 @@ }, { "cell_type": "markdown", - "id": "6", + "id": "7", "metadata": { "tags": [] }, @@ -136,7 +145,7 @@ }, { "cell_type": "markdown", - "id": "7", + "id": "8", "metadata": {}, "source": [ "### Target column\n", @@ -155,7 +164,7 @@ { "cell_type": "code", "execution_count": null, - "id": "8", + "id": "9", "metadata": {}, "outputs": [], "source": [ @@ -171,7 +180,7 @@ { "cell_type": "code", "execution_count": null, - "id": "9", + "id": "10", "metadata": {}, "outputs": [], "source": [ @@ -181,7 +190,7 @@ }, { "cell_type": "markdown", - "id": "10", + "id": "11", "metadata": {}, "source": [ "### Train test split" @@ -190,7 +199,7 @@ { "cell_type": "code", "execution_count": null, - "id": "11", + "id": "12", "metadata": {}, "outputs": [], "source": [ @@ -205,7 +214,7 @@ { "cell_type": "code", "execution_count": null, - "id": "12", + "id": "13", "metadata": {}, "outputs": [], "source": [ @@ -221,7 +230,7 @@ { "cell_type": "code", "execution_count": null, - "id": "13", + "id": "14", "metadata": {}, "outputs": [], "source": [ @@ -230,7 +239,7 @@ }, { "cell_type": "markdown", - "id": "14", + "id": "15", "metadata": {}, "source": [ "### Scaling (Standardization)" @@ -239,7 +248,7 @@ { "cell_type": "code", "execution_count": null, - "id": "15", + "id": "16", "metadata": {}, "outputs": [], "source": [ @@ -277,7 +286,7 @@ }, { "cell_type": "markdown", - "id": "16", + "id": "17", "metadata": {}, "source": [ "## 3. Exploratory Data Analysis (EDA)" @@ -285,7 +294,7 @@ }, { "cell_type": "markdown", - "id": "17", + "id": "18", "metadata": {}, "source": [ "### Boxplots by Target Class" @@ -294,7 +303,7 @@ { "cell_type": "code", "execution_count": null, - "id": "18", + "id": "19", "metadata": {}, "outputs": [], "source": [ @@ -311,7 +320,7 @@ }, { "cell_type": "markdown", - "id": "19", + "id": "20", "metadata": {}, "source": [ "### Pearson's Correlation" @@ -320,7 +329,7 @@ { "cell_type": "code", "execution_count": null, - "id": "20", + "id": "21", "metadata": {}, "outputs": [], "source": [ @@ -334,7 +343,7 @@ }, { "cell_type": "markdown", - "id": "21", + "id": "22", "metadata": {}, "source": [ "## 4. Feature Selection" @@ -343,7 +352,7 @@ { "cell_type": "code", "execution_count": null, - "id": "22", + "id": "23", "metadata": {}, "outputs": [], "source": [ @@ -372,7 +381,7 @@ { "cell_type": "code", "execution_count": null, - "id": "23", + "id": "24", "metadata": {}, "outputs": [], "source": [ @@ -394,7 +403,7 @@ { "cell_type": "code", "execution_count": null, - "id": "24", + "id": "25", "metadata": {}, "outputs": [], "source": [ @@ -416,7 +425,7 @@ { "cell_type": "code", "execution_count": null, - "id": "25", + "id": "26", "metadata": {}, "outputs": [], "source": [ @@ -427,7 +436,7 @@ }, { "cell_type": "markdown", - "id": "26", + "id": "27", "metadata": {}, "source": [ "### Correlation check\n" @@ -436,7 +445,7 @@ { "cell_type": "code", "execution_count": null, - "id": "27", + "id": "28", "metadata": {}, "outputs": [], "source": [ @@ -450,7 +459,7 @@ }, { "cell_type": "markdown", - "id": "28", + "id": "29", "metadata": {}, "source": [ "### Multicollinearity check\n" @@ -459,7 +468,7 @@ { "cell_type": "code", "execution_count": null, - "id": "29", + "id": "30", "metadata": {}, "outputs": [], "source": [ @@ -475,7 +484,7 @@ }, { "cell_type": "markdown", - "id": "30", + "id": "31", "metadata": {}, "source": [ "## 5. Classifier Model" @@ -483,7 +492,7 @@ }, { "cell_type": "markdown", - "id": "31", + "id": "32", "metadata": {}, "source": [ "### Select classifier: Logistic Regression or XGBoost" @@ -492,7 +501,7 @@ { "cell_type": "code", "execution_count": null, - "id": "32", + "id": "33", "metadata": {}, "outputs": [], "source": [ @@ -507,7 +516,7 @@ }, { "cell_type": "markdown", - "id": "33", + "id": "34", "metadata": {}, "source": [ "### Hyperparameter tuning with K-Fold Cross Validation\n", @@ -518,7 +527,7 @@ { "cell_type": "code", "execution_count": null, - "id": "34", + "id": "35", "metadata": {}, "outputs": [], "source": [ @@ -553,7 +562,7 @@ }, { "cell_type": "markdown", - "id": "35", + "id": "36", "metadata": {}, "source": [ "For the full list of scikit-learn's scoring string names, refer to: https://scikit-learn.org/stable/modules/model_evaluation.html#string-name-scorers" @@ -562,7 +571,7 @@ { "cell_type": "code", "execution_count": null, - "id": "36", + "id": "37", "metadata": {}, "outputs": [], "source": [ @@ -585,7 +594,7 @@ { "cell_type": "code", "execution_count": null, - "id": "37", + "id": "38", "metadata": {}, "outputs": [], "source": [ @@ -602,14 +611,13 @@ " verbose=1,\n", ")\n", "# execute search\n", - "with warnings.catch_warnings(action=\"ignore\"):\n", - " result_cv = grid_search.fit(X_train, y_train)" + "result_cv = grid_search.fit(X_train, y_train)" ] }, { "cell_type": "code", "execution_count": null, - "id": "38", + "id": "39", "metadata": {}, "outputs": [], "source": [ @@ -624,7 +632,7 @@ }, { "cell_type": "markdown", - "id": "39", + "id": "40", "metadata": {}, "source": [ "### Final Model" @@ -633,7 +641,7 @@ { "cell_type": "code", "execution_count": null, - "id": "40", + "id": "41", "metadata": {}, "outputs": [], "source": [ @@ -661,7 +669,7 @@ { "cell_type": "code", "execution_count": null, - "id": "41", + "id": "42", "metadata": {}, "outputs": [], "source": [ @@ -686,7 +694,7 @@ { "cell_type": "code", "execution_count": null, - "id": "42", + "id": "43", "metadata": {}, "outputs": [], "source": [ @@ -696,7 +704,7 @@ }, { "cell_type": "markdown", - "id": "43", + "id": "44", "metadata": {}, "source": [ "**Plot target rate per group of predicted probability**\n", @@ -707,7 +715,7 @@ { "cell_type": "code", "execution_count": null, - "id": "44", + "id": "45", "metadata": {}, "outputs": [], "source": [ @@ -720,7 +728,7 @@ }, { "cell_type": "markdown", - "id": "45", + "id": "46", "metadata": {}, "source": [ "### Feature Importance\n", @@ -732,7 +740,7 @@ { "cell_type": "code", "execution_count": null, - "id": "46", + "id": "47", "metadata": {}, "outputs": [], "source": [ @@ -788,7 +796,7 @@ }, { "cell_type": "markdown", - "id": "47", + "id": "48", "metadata": {}, "source": [ "### Performance Metrics" @@ -797,7 +805,7 @@ { "cell_type": "code", "execution_count": null, - "id": "48", + "id": "49", "metadata": {}, "outputs": [], "source": [ @@ -815,7 +823,7 @@ }, { "cell_type": "markdown", - "id": "49", + "id": "50", "metadata": {}, "source": [ "#### Confusion Matrix" @@ -824,7 +832,7 @@ { "cell_type": "code", "execution_count": null, - "id": "50", + "id": "51", "metadata": {}, "outputs": [], "source": [ @@ -842,7 +850,7 @@ }, { "cell_type": "markdown", - "id": "51", + "id": "52", "metadata": {}, "source": [ "#### ROC AUC" @@ -851,7 +859,7 @@ { "cell_type": "code", "execution_count": null, - "id": "52", + "id": "53", "metadata": {}, "outputs": [], "source": [ @@ -867,7 +875,7 @@ { "cell_type": "code", "execution_count": null, - "id": "53", + "id": "54", "metadata": {}, "outputs": [], "source": [] @@ -875,7 +883,7 @@ { "cell_type": "code", "execution_count": null, - "id": "54", + "id": "55", "metadata": {}, "outputs": [], "source": [] @@ -883,7 +891,7 @@ ], "metadata": { "kernelspec": { - "display_name": "ds", + "display_name": ".venv", "language": "python", "name": "python3" }, @@ -897,7 +905,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.12.8" + "version": "3.12.9" } }, "nbformat": 4, diff --git a/src/clustering.ipynb b/src/clustering.ipynb index 8eb0ca9..08e9048 100755 --- a/src/clustering.ipynb +++ b/src/clustering.ipynb @@ -17,6 +17,28 @@ "source": [ "import os\n", "import warnings\n", + "import logging\n", + "\n", + "# configure logging\n", + "logging.basicConfig(\n", + " level=logging.INFO, format=\"%(asctime)s [%(levelname)s] %(message)s\", datefmt=\"%H:%M:%S\",\n", + ")\n", + "logger = logging.getLogger(__name__)\n", + "\n", + "# get warning filter policy from the environment variables\n", + "# set to \"ignore\" for rendering the HTMLs, or to \"once\" otherwise\n", + "WARNING_FILTER_POLICY = os.getenv(\"WARNING_FILTER_POLICY\", \"once\")\n", + "logger.info(f\"{WARNING_FILTER_POLICY = }\")\n", + "warnings.filterwarnings(WARNING_FILTER_POLICY)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2", + "metadata": {}, + "outputs": [], + "source": [ "import numpy as np\n", "import pandas as pd\n", "import seaborn as sns\n", @@ -25,13 +47,14 @@ "from sklearn.preprocessing import StandardScaler\n", "from sklearn.cluster import KMeans\n", "\n", - "pd.set_option('display.max_columns', None)" + "pd.set_option(\"display.max_columns\", None)\n", + "pd.options.display.float_format = \"{:,.2f}\".format" ] }, { "cell_type": "code", "execution_count": null, - "id": "2", + "id": "3", "metadata": {}, "outputs": [], "source": [ @@ -43,7 +66,7 @@ { "cell_type": "code", "execution_count": null, - "id": "3", + "id": "4", "metadata": {}, "outputs": [], "source": [ @@ -56,7 +79,7 @@ }, { "cell_type": "markdown", - "id": "4", + "id": "5", "metadata": {}, "source": [ "## 1. Preprocessing" @@ -64,7 +87,7 @@ }, { "cell_type": "markdown", - "id": "5", + "id": "6", "metadata": {}, "source": [ "### Load data\n", @@ -79,7 +102,7 @@ { "cell_type": "code", "execution_count": null, - "id": "6", + "id": "7", "metadata": {}, "outputs": [], "source": [ @@ -91,7 +114,7 @@ { "cell_type": "code", "execution_count": null, - "id": "7", + "id": "8", "metadata": {}, "outputs": [], "source": [ @@ -109,7 +132,7 @@ { "cell_type": "code", "execution_count": null, - "id": "8", + "id": "9", "metadata": {}, "outputs": [], "source": [ @@ -126,7 +149,7 @@ }, { "cell_type": "markdown", - "id": "9", + "id": "10", "metadata": {}, "source": [ "### Scale data (if necessary)\n", @@ -137,7 +160,7 @@ { "cell_type": "code", "execution_count": null, - "id": "10", + "id": "11", "metadata": {}, "outputs": [], "source": [ @@ -148,7 +171,7 @@ }, { "cell_type": "markdown", - "id": "11", + "id": "12", "metadata": { "tags": [] }, @@ -158,7 +181,7 @@ }, { "cell_type": "markdown", - "id": "12", + "id": "13", "metadata": {}, "source": [ "### Find best number of clusters" @@ -167,7 +190,7 @@ { "cell_type": "code", "execution_count": null, - "id": "13", + "id": "14", "metadata": { "tags": [] }, @@ -178,7 +201,7 @@ }, { "cell_type": "markdown", - "id": "14", + "id": "15", "metadata": {}, "source": [ "Elbow Method implementation:\n", @@ -189,7 +212,7 @@ { "cell_type": "code", "execution_count": null, - "id": "15", + "id": "16", "metadata": {}, "outputs": [], "source": [ @@ -207,7 +230,7 @@ { "cell_type": "code", "execution_count": null, - "id": "16", + "id": "17", "metadata": {}, "outputs": [], "source": [ @@ -216,7 +239,7 @@ }, { "cell_type": "markdown", - "id": "17", + "id": "18", "metadata": {}, "source": [ "### Fit final model" @@ -225,7 +248,7 @@ { "cell_type": "code", "execution_count": null, - "id": "18", + "id": "19", "metadata": {}, "outputs": [], "source": [ @@ -237,7 +260,7 @@ { "cell_type": "code", "execution_count": null, - "id": "19", + "id": "20", "metadata": {}, "outputs": [], "source": [ @@ -252,7 +275,7 @@ }, { "cell_type": "markdown", - "id": "20", + "id": "21", "metadata": {}, "source": [ "### Describe clusters" @@ -261,7 +284,7 @@ { "cell_type": "code", "execution_count": null, - "id": "21", + "id": "22", "metadata": {}, "outputs": [], "source": [ @@ -278,7 +301,7 @@ { "cell_type": "code", "execution_count": null, - "id": "22", + "id": "23", "metadata": {}, "outputs": [], "source": [ @@ -295,7 +318,7 @@ { "cell_type": "code", "execution_count": null, - "id": "23", + "id": "24", "metadata": {}, "outputs": [], "source": [] @@ -303,7 +326,7 @@ { "cell_type": "code", "execution_count": null, - "id": "24", + "id": "25", "metadata": {}, "outputs": [], "source": [] @@ -311,7 +334,7 @@ ], "metadata": { "kernelspec": { - "display_name": "ds", + "display_name": ".venv", "language": "python", "name": "python3" }, @@ -325,7 +348,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.12.8" + "version": "3.12.9" } }, "nbformat": 4, diff --git a/src/histogram_analysis.ipynb b/src/histogram_analysis.ipynb index b73e1f1..e7b104a 100755 --- a/src/histogram_analysis.ipynb +++ b/src/histogram_analysis.ipynb @@ -26,17 +26,41 @@ "outputs": [], "source": [ "import os\n", + "import warnings\n", + "import logging\n", + "\n", + "# configure logging\n", + "logging.basicConfig(\n", + " level=logging.INFO, format=\"%(asctime)s [%(levelname)s] %(message)s\", datefmt=\"%H:%M:%S\",\n", + ")\n", + "logger = logging.getLogger(__name__)\n", + "\n", + "# get warning filter policy from the environment variables\n", + "# set to \"ignore\" for rendering the HTMLs, or to \"once\" otherwise\n", + "WARNING_FILTER_POLICY = os.getenv(\"WARNING_FILTER_POLICY\", \"once\")\n", + "logger.info(f\"{WARNING_FILTER_POLICY = }\")\n", + "warnings.filterwarnings(WARNING_FILTER_POLICY)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3", + "metadata": {}, + "outputs": [], + "source": [ "import numpy as np\n", "import pandas as pd\n", "import seaborn as sns\n", "\n", - "pd.set_option(\"display.max_columns\", None)" + "pd.set_option(\"display.max_columns\", None)\n", + "pd.options.display.float_format = \"{:,.2f}\".format" ] }, { "cell_type": "code", "execution_count": null, - "id": "3", + "id": "4", "metadata": {}, "outputs": [], "source": [ @@ -52,7 +76,7 @@ { "cell_type": "code", "execution_count": null, - "id": "4", + "id": "5", "metadata": {}, "outputs": [], "source": [ @@ -65,7 +89,7 @@ }, { "cell_type": "markdown", - "id": "5", + "id": "6", "metadata": {}, "source": [ "## 1. Load Data" @@ -73,7 +97,7 @@ }, { "cell_type": "markdown", - "id": "6", + "id": "7", "metadata": {}, "source": [ "In this notebook, we will use the **Medical Insurance Payout Dataset**. This dataset contains historical data for over 1300 insurance customers (age, sex, BMI, number of children, smoking habits, and region) along with their actual medical charges. i.e., the expenditure for the customer.\n", @@ -86,7 +110,7 @@ { "cell_type": "code", "execution_count": null, - "id": "7", + "id": "8", "metadata": {}, "outputs": [], "source": [ @@ -97,7 +121,7 @@ }, { "cell_type": "markdown", - "id": "8", + "id": "9", "metadata": {}, "source": [ "## 2. Plot Histograms" @@ -105,7 +129,7 @@ }, { "cell_type": "markdown", - "id": "9", + "id": "10", "metadata": {}, "source": [ "### Distribution of Medical Charges" @@ -114,7 +138,7 @@ { "cell_type": "code", "execution_count": null, - "id": "10", + "id": "11", "metadata": {}, "outputs": [], "source": [ @@ -140,7 +164,7 @@ }, { "cell_type": "markdown", - "id": "11", + "id": "12", "metadata": {}, "source": [ "### Distribution of Medical Charges by Smoking Status" @@ -149,7 +173,7 @@ { "cell_type": "code", "execution_count": null, - "id": "12", + "id": "13", "metadata": {}, "outputs": [], "source": [ @@ -175,7 +199,7 @@ }, { "cell_type": "markdown", - "id": "13", + "id": "14", "metadata": {}, "source": [ "### COVID impact on Medical Charges" @@ -183,7 +207,7 @@ }, { "cell_type": "markdown", - "id": "14", + "id": "15", "metadata": {}, "source": [ "Let's do a (hypothetical) simulation of the impact of an event such as COVID-19 on the distribution of medical charges. Let's assume that COVID caused the average medical charge to increase by ~10% for all insurance customers in the database" @@ -192,7 +216,7 @@ { "cell_type": "code", "execution_count": null, - "id": "15", + "id": "16", "metadata": {}, "outputs": [], "source": [ @@ -208,7 +232,7 @@ { "cell_type": "code", "execution_count": null, - "id": "16", + "id": "17", "metadata": {}, "outputs": [], "source": [ @@ -233,7 +257,7 @@ { "cell_type": "code", "execution_count": null, - "id": "17", + "id": "18", "metadata": {}, "outputs": [], "source": [] @@ -241,7 +265,7 @@ ], "metadata": { "kernelspec": { - "display_name": "ds", + "display_name": ".venv", "language": "python", "name": "python3" }, @@ -255,7 +279,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.12.8" + "version": "3.12.9" } }, "nbformat": 4, diff --git a/src/regression.ipynb b/src/regression.ipynb index aa2266d..49d612f 100644 --- a/src/regression.ipynb +++ b/src/regression.ipynb @@ -14,32 +14,54 @@ "cell_type": "code", "execution_count": null, "id": "1", + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import warnings\n", + "import logging\n", + "\n", + "# configure logging\n", + "logging.basicConfig(\n", + " level=logging.INFO, format=\"%(asctime)s [%(levelname)s] %(message)s\", datefmt=\"%H:%M:%S\",\n", + ")\n", + "logger = logging.getLogger(__name__)\n", + "\n", + "# get warning filter policy from the environment variables\n", + "# set to \"ignore\" for rendering the HTMLs, or to \"once\" otherwise\n", + "WARNING_FILTER_POLICY = os.getenv(\"WARNING_FILTER_POLICY\", \"once\")\n", + "logger.info(f\"{WARNING_FILTER_POLICY = }\")\n", + "warnings.filterwarnings(WARNING_FILTER_POLICY)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2", "metadata": { "tags": [] }, "outputs": [], "source": [ - "import os\n", - "import logging\n", - "import warnings\n", "import numpy as np\n", "import pandas as pd\n", "import seaborn as sns\n", - "import matplotlib as mpl\n", - "import matplotlib.pyplot as plt\n", "import shap\n", "\n", "from sklearn.preprocessing import StandardScaler\n", "from sklearn.linear_model import ElasticNet\n", "from sklearn.model_selection import train_test_split, RepeatedKFold, GridSearchCV\n", "from statsmodels.stats.outliers_influence import variance_inflation_factor\n", - "from xgboost import XGBRegressor" + "from xgboost import XGBRegressor\n", + "\n", + "pd.set_option(\"display.max_columns\", None)\n", + "pd.options.display.float_format = \"{:,.2f}\".format" ] }, { "cell_type": "code", "execution_count": null, - "id": "2", + "id": "3", "metadata": { "tags": [] }, @@ -69,23 +91,10 @@ { "cell_type": "code", "execution_count": null, - "id": "3", + "id": "4", "metadata": {}, "outputs": [], "source": [ - "logging.basicConfig(\n", - " level=logging.INFO,\n", - " format=\"%(asctime)s [%(levelname)s] %(message)s\",\n", - " datefmt=\"%H:%M:%S\",\n", - ")\n", - "logger = logging.getLogger(__name__)\n", - "\n", - "pd.set_option(\"display.max_columns\", None)\n", - "pd.options.display.float_format = \"{:,.2f}\".format\n", - "\n", - "mpl.rcParams[\"font.sans-serif\"] = \"Arial\"\n", - "plt.set_loglevel(\"WARNING\")\n", - "\n", "# plots configuration\n", "sns.set_style(\"darkgrid\")\n", "sns.set_palette(\"colorblind\")\n", @@ -95,7 +104,7 @@ }, { "cell_type": "markdown", - "id": "4", + "id": "5", "metadata": {}, "source": [ "## 1. Load Data\n", @@ -110,7 +119,7 @@ { "cell_type": "code", "execution_count": null, - "id": "5", + "id": "6", "metadata": {}, "outputs": [], "source": [ @@ -122,7 +131,7 @@ { "cell_type": "code", "execution_count": null, - "id": "6", + "id": "7", "metadata": {}, "outputs": [], "source": [ @@ -139,7 +148,7 @@ }, { "cell_type": "markdown", - "id": "7", + "id": "8", "metadata": { "tags": [] }, @@ -149,7 +158,7 @@ }, { "cell_type": "markdown", - "id": "8", + "id": "9", "metadata": {}, "source": [ "### Target column\n", @@ -160,7 +169,7 @@ { "cell_type": "code", "execution_count": null, - "id": "9", + "id": "10", "metadata": {}, "outputs": [], "source": [ @@ -170,7 +179,7 @@ }, { "cell_type": "markdown", - "id": "10", + "id": "11", "metadata": {}, "source": [ "### Train test split" @@ -179,7 +188,7 @@ { "cell_type": "code", "execution_count": null, - "id": "11", + "id": "12", "metadata": {}, "outputs": [], "source": [ @@ -193,7 +202,7 @@ { "cell_type": "code", "execution_count": null, - "id": "12", + "id": "13", "metadata": {}, "outputs": [], "source": [ @@ -202,7 +211,7 @@ }, { "cell_type": "markdown", - "id": "13", + "id": "14", "metadata": {}, "source": [ "### Scaling (Standardization)" @@ -211,7 +220,7 @@ { "cell_type": "code", "execution_count": null, - "id": "14", + "id": "15", "metadata": {}, "outputs": [], "source": [ @@ -247,7 +256,7 @@ }, { "cell_type": "markdown", - "id": "15", + "id": "16", "metadata": {}, "source": [ "## 3. Exploratory Data Analysis (EDA)" @@ -255,7 +264,7 @@ }, { "cell_type": "markdown", - "id": "16", + "id": "17", "metadata": {}, "source": [ "### Boxplots by Target Class" @@ -264,7 +273,7 @@ { "cell_type": "code", "execution_count": null, - "id": "17", + "id": "18", "metadata": {}, "outputs": [], "source": [ @@ -280,7 +289,7 @@ { "cell_type": "code", "execution_count": null, - "id": "18", + "id": "19", "metadata": {}, "outputs": [], "source": [ @@ -296,7 +305,7 @@ }, { "cell_type": "markdown", - "id": "19", + "id": "20", "metadata": {}, "source": [ "### Pearson's Correlation" @@ -305,7 +314,7 @@ { "cell_type": "code", "execution_count": null, - "id": "20", + "id": "21", "metadata": {}, "outputs": [], "source": [ @@ -319,7 +328,7 @@ }, { "cell_type": "markdown", - "id": "21", + "id": "22", "metadata": {}, "source": [ "## 4. Feature Selection" @@ -328,7 +337,7 @@ { "cell_type": "code", "execution_count": null, - "id": "22", + "id": "23", "metadata": {}, "outputs": [], "source": [ @@ -355,7 +364,7 @@ { "cell_type": "code", "execution_count": null, - "id": "23", + "id": "24", "metadata": {}, "outputs": [], "source": [ @@ -370,7 +379,7 @@ { "cell_type": "code", "execution_count": null, - "id": "24", + "id": "25", "metadata": {}, "outputs": [], "source": [ @@ -381,7 +390,7 @@ }, { "cell_type": "markdown", - "id": "25", + "id": "26", "metadata": {}, "source": [ "### Correlation check\n" @@ -390,7 +399,7 @@ { "cell_type": "code", "execution_count": null, - "id": "26", + "id": "27", "metadata": {}, "outputs": [], "source": [ @@ -404,7 +413,7 @@ }, { "cell_type": "markdown", - "id": "27", + "id": "28", "metadata": {}, "source": [ "### Multicollinearity check" @@ -413,7 +422,7 @@ { "cell_type": "code", "execution_count": null, - "id": "28", + "id": "29", "metadata": {}, "outputs": [], "source": [ @@ -429,7 +438,7 @@ }, { "cell_type": "markdown", - "id": "29", + "id": "30", "metadata": {}, "source": [ "## 5. Regression Model" @@ -437,7 +446,7 @@ }, { "cell_type": "markdown", - "id": "30", + "id": "31", "metadata": {}, "source": [ "### Select regressor: Linear Regression or XGBoost" @@ -446,7 +455,7 @@ { "cell_type": "code", "execution_count": null, - "id": "31", + "id": "32", "metadata": {}, "outputs": [], "source": [ @@ -461,7 +470,7 @@ }, { "cell_type": "markdown", - "id": "32", + "id": "33", "metadata": {}, "source": [ "### Hyperparameter tuning with K-Fold Cross Validation\n", @@ -472,7 +481,7 @@ { "cell_type": "code", "execution_count": null, - "id": "33", + "id": "34", "metadata": {}, "outputs": [], "source": [ @@ -508,7 +517,7 @@ }, { "cell_type": "markdown", - "id": "34", + "id": "35", "metadata": {}, "source": [ "For the full list of scikit-learn's scoring string names, refer to: https://scikit-learn.org/stable/modules/model_evaluation.html#string-name-scorers" @@ -517,7 +526,7 @@ { "cell_type": "code", "execution_count": null, - "id": "35", + "id": "36", "metadata": {}, "outputs": [], "source": [ @@ -526,7 +535,7 @@ " \"neg_median_absolute_error\": \"Median Absolute Error\",\n", " \"neg_mean_squared_error\": \"Mean Squared Error\",\n", " \"neg_root_mean_squared_error\": \"Root Mean Squared Error\",\n", - " \"max_error\": \"Maximum Residual Error\",\n", + " \"neg_max_error\": \"Maximum Residual Error\",\n", " \"r2\": \"R-squared (Coefficient of Determination)\",\n", "}\n", "refit_metric = \"neg_root_mean_squared_error\" # metric to optimize for the final model" @@ -535,7 +544,7 @@ { "cell_type": "code", "execution_count": null, - "id": "36", + "id": "37", "metadata": {}, "outputs": [], "source": [ @@ -552,14 +561,13 @@ " verbose=1,\n", ")\n", "# execute search\n", - "with warnings.catch_warnings(action=\"ignore\"):\n", - " result_cv = grid_search.fit(X_train, y_train)" + "result_cv = grid_search.fit(X_train, y_train)" ] }, { "cell_type": "code", "execution_count": null, - "id": "37", + "id": "38", "metadata": {}, "outputs": [], "source": [ @@ -567,14 +575,14 @@ "for i, (metric_key, metric_name) in enumerate(cv_scoring_metrics.items(), start=1):\n", " print(\n", " f\" {str(i) + \".\":>2} {metric_name:.<42} \"\n", - " f\"{result_cv.cv_results_[f\"mean_test_{metric_key}\"][result_cv.best_index_]:+,.3f}\"\n", + " f\"{abs(result_cv.cv_results_[f\"mean_test_{metric_key}\"][result_cv.best_index_]):,.3f}\"\n", " )\n", "print(f\"\\nBest Hyperparameters: {result_cv.best_params_}\")" ] }, { "cell_type": "markdown", - "id": "38", + "id": "39", "metadata": {}, "source": [ "### Final Model" @@ -583,7 +591,7 @@ { "cell_type": "code", "execution_count": null, - "id": "39", + "id": "40", "metadata": {}, "outputs": [], "source": [ @@ -611,7 +619,7 @@ { "cell_type": "code", "execution_count": null, - "id": "40", + "id": "41", "metadata": {}, "outputs": [], "source": [ @@ -625,7 +633,7 @@ { "cell_type": "code", "execution_count": null, - "id": "41", + "id": "42", "metadata": {}, "outputs": [], "source": [ @@ -635,7 +643,7 @@ }, { "cell_type": "markdown", - "id": "42", + "id": "43", "metadata": {}, "source": [ "### Feature Importance\n", @@ -647,7 +655,7 @@ { "cell_type": "code", "execution_count": null, - "id": "43", + "id": "44", "metadata": {}, "outputs": [], "source": [ @@ -678,7 +686,7 @@ }, { "cell_type": "markdown", - "id": "44", + "id": "45", "metadata": {}, "source": [ "### Performance Metrics" @@ -687,7 +695,7 @@ { "cell_type": "code", "execution_count": null, - "id": "45", + "id": "46", "metadata": {}, "outputs": [], "source": [ @@ -706,7 +714,7 @@ { "cell_type": "code", "execution_count": null, - "id": "46", + "id": "47", "metadata": {}, "outputs": [], "source": [] @@ -714,7 +722,7 @@ { "cell_type": "code", "execution_count": null, - "id": "47", + "id": "48", "metadata": {}, "outputs": [], "source": [] @@ -722,7 +730,7 @@ ], "metadata": { "kernelspec": { - "display_name": "ds", + "display_name": ".venv", "language": "python", "name": "python3" }, @@ -736,7 +744,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.12.8" + "version": "3.12.9" } }, "nbformat": 4, diff --git a/src/utils/clustering.py b/src/utils/clustering.py index afb7af8..0fcdf0b 100644 --- a/src/utils/clustering.py +++ b/src/utils/clustering.py @@ -2,7 +2,6 @@ import seaborn as sns import matplotlib.pyplot as plt -from tqdm import tqdm from sklearn.cluster import KMeans from sklearn.metrics import silhouette_score @@ -13,7 +12,7 @@ def search_kmeans(df_cl: pd.DataFrame, max_n_clusters: int) -> pd.DataFrame: kmeans_search_lst = [] - for i in tqdm(range(1, max_n_clusters + 1)): + for i in range(1, max_n_clusters + 1): kmeans_dict = dict() kmeans_dict["n_clusters"] = i kmeans_model = KMeans(n_clusters=i, verbose=0, random_state=RANDOM_SEED) diff --git a/src/utils/common.py b/src/utils/common.py index 8423601..f16e5be 100644 --- a/src/utils/common.py +++ b/src/utils/common.py @@ -2,10 +2,10 @@ import numpy as np import pandas as pd import seaborn as sns -import matplotlib as mpl +import matplotlib_inline from matplotlib import pyplot as plt from matplotlib import ticker as mticker -from .constants import REPO_NAME, SMALL_FONTSIZE, MEDIUM_FONTSIZE, BIG_FONTSIZE, FIGURE_DPI +from .constants import REPO_NAME, SMALL_FONTSIZE, MEDIUM_FONTSIZE, BIG_FONTSIZE def get_repo_root_path() -> str: @@ -22,23 +22,25 @@ def convert_to_integer(s: pd.Series) -> pd.Series: return pd.to_numeric(s, downcast="integer", errors="raise") -def _set_plot_font_sizes() -> None: - plt.rc("font", size=SMALL_FONTSIZE) # default font size +def set_plotting_config() -> None: + # set font sizes + plt.rc( + "font", + size=SMALL_FONTSIZE, # default font size + family="sans-serif", # font family + **{"sans-serif": ["Arial"]}, # font name + ) plt.rc("figure", titlesize=BIG_FONTSIZE) # figure title plt.rc("legend", fontsize=SMALL_FONTSIZE) # legend - plt.rc("axes", titlesize=MEDIUM_FONTSIZE) # axes title - plt.rc("axes", labelsize=SMALL_FONTSIZE) # axes labels + plt.rc("axes", titlesize=MEDIUM_FONTSIZE, labelsize=SMALL_FONTSIZE) # axes title and labels plt.rc("xtick", labelsize=SMALL_FONTSIZE) # x tick labels plt.rc("ytick", labelsize=SMALL_FONTSIZE) # y tick labels + # set figure resolution + matplotlib_inline.backend_inline.set_matplotlib_formats("retina") -def _set_figure_dpi() -> None: - mpl.rcParams["figure.dpi"] = FIGURE_DPI - - -def set_plotting_config() -> None: - _set_plot_font_sizes() - _set_figure_dpi() + # set log level + plt.set_loglevel("warning") def plot_boxplot_by_class( diff --git a/src/utils/constants.py b/src/utils/constants.py index 45cf5d8..08fef0a 100644 --- a/src/utils/constants.py +++ b/src/utils/constants.py @@ -8,6 +8,3 @@ SMALL_FONTSIZE = 12 MEDIUM_FONTSIZE = 14 BIG_FONTSIZE = 17 - -# plots resolution -FIGURE_DPI = 100 diff --git a/uv.lock b/uv.lock new file mode 100644 index 0000000..80b7727 --- /dev/null +++ b/uv.lock @@ -0,0 +1,2077 @@ +version = 1 +revision = 1 +requires-python = ">=3.12" + +[[package]] +name = "anyio" +version = "4.9.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "idna" }, + { name = "sniffio" }, + { name = "typing-extensions", marker = "python_full_version < '3.13'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/95/7d/4c1bd541d4dffa1b52bd83fb8527089e097a106fc90b467a7313b105f840/anyio-4.9.0.tar.gz", hash = "sha256:673c0c244e15788651a4ff38710fea9675823028a6f08a5eda409e0c9840a028", size = 190949 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a1/ee/48ca1a7c89ffec8b6a0c5d02b89c305671d5ffd8d3c94acf8b8c408575bb/anyio-4.9.0-py3-none-any.whl", hash = "sha256:9f76d541cad6e36af7beb62e978876f3b41e3e04f2c1fbf0884604c0a9c4d93c", size = 100916 }, +] + +[[package]] +name = "appnope" +version = "0.1.4" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/35/5d/752690df9ef5b76e169e68d6a129fa6d08a7100ca7f754c89495db3c6019/appnope-0.1.4.tar.gz", hash = "sha256:1de3860566df9caf38f01f86f65e0e13e379af54f9e4bee1e66b48f2efffd1ee", size = 4170 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/81/29/5ecc3a15d5a33e31b26c11426c45c501e439cb865d0bff96315d86443b78/appnope-0.1.4-py2.py3-none-any.whl", hash = "sha256:502575ee11cd7a28c0205f379b525beefebab9d161b7c964670864014ed7213c", size = 4321 }, +] + +[[package]] +name = "argon2-cffi" +version = "23.1.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "argon2-cffi-bindings" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/31/fa/57ec2c6d16ecd2ba0cf15f3c7d1c3c2e7b5fcb83555ff56d7ab10888ec8f/argon2_cffi-23.1.0.tar.gz", hash = "sha256:879c3e79a2729ce768ebb7d36d4609e3a78a4ca2ec3a9f12286ca057e3d0db08", size = 42798 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a4/6a/e8a041599e78b6b3752da48000b14c8d1e8a04ded09c88c714ba047f34f5/argon2_cffi-23.1.0-py3-none-any.whl", hash = "sha256:c670642b78ba29641818ab2e68bd4e6a78ba53b7eff7b4c3815ae16abf91c7ea", size = 15124 }, +] + +[[package]] +name = "argon2-cffi-bindings" +version = "21.2.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "cffi" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/b9/e9/184b8ccce6683b0aa2fbb7ba5683ea4b9c5763f1356347f1312c32e3c66e/argon2-cffi-bindings-21.2.0.tar.gz", hash = "sha256:bb89ceffa6c791807d1305ceb77dbfacc5aa499891d2c55661c6459651fc39e3", size = 1779911 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d4/13/838ce2620025e9666aa8f686431f67a29052241692a3dd1ae9d3692a89d3/argon2_cffi_bindings-21.2.0-cp36-abi3-macosx_10_9_x86_64.whl", hash = "sha256:ccb949252cb2ab3a08c02024acb77cfb179492d5701c7cbdbfd776124d4d2367", size = 29658 }, + { url = "https://files.pythonhosted.org/packages/b3/02/f7f7bb6b6af6031edb11037639c697b912e1dea2db94d436e681aea2f495/argon2_cffi_bindings-21.2.0-cp36-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9524464572e12979364b7d600abf96181d3541da11e23ddf565a32e70bd4dc0d", size = 80583 }, + { url = "https://files.pythonhosted.org/packages/ec/f7/378254e6dd7ae6f31fe40c8649eea7d4832a42243acaf0f1fff9083b2bed/argon2_cffi_bindings-21.2.0-cp36-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b746dba803a79238e925d9046a63aa26bf86ab2a2fe74ce6b009a1c3f5c8f2ae", size = 86168 }, + { url = "https://files.pythonhosted.org/packages/74/f6/4a34a37a98311ed73bb80efe422fed95f2ac25a4cacc5ae1d7ae6a144505/argon2_cffi_bindings-21.2.0-cp36-abi3-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:58ed19212051f49a523abb1dbe954337dc82d947fb6e5a0da60f7c8471a8476c", size = 82709 }, + { url = "https://files.pythonhosted.org/packages/74/2b/73d767bfdaab25484f7e7901379d5f8793cccbb86c6e0cbc4c1b96f63896/argon2_cffi_bindings-21.2.0-cp36-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:bd46088725ef7f58b5a1ef7ca06647ebaf0eb4baff7d1d0d177c6cc8744abd86", size = 83613 }, + { url = "https://files.pythonhosted.org/packages/4f/fd/37f86deef67ff57c76f137a67181949c2d408077e2e3dd70c6c42912c9bf/argon2_cffi_bindings-21.2.0-cp36-abi3-musllinux_1_1_i686.whl", hash = "sha256:8cd69c07dd875537a824deec19f978e0f2078fdda07fd5c42ac29668dda5f40f", size = 84583 }, + { url = "https://files.pythonhosted.org/packages/6f/52/5a60085a3dae8fded8327a4f564223029f5f54b0cb0455a31131b5363a01/argon2_cffi_bindings-21.2.0-cp36-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:f1152ac548bd5b8bcecfb0b0371f082037e47128653df2e8ba6e914d384f3c3e", size = 88475 }, + { url = "https://files.pythonhosted.org/packages/8b/95/143cd64feb24a15fa4b189a3e1e7efbaeeb00f39a51e99b26fc62fbacabd/argon2_cffi_bindings-21.2.0-cp36-abi3-win32.whl", hash = "sha256:603ca0aba86b1349b147cab91ae970c63118a0f30444d4bc80355937c950c082", size = 27698 }, + { url = "https://files.pythonhosted.org/packages/37/2c/e34e47c7dee97ba6f01a6203e0383e15b60fb85d78ac9a15cd066f6fe28b/argon2_cffi_bindings-21.2.0-cp36-abi3-win_amd64.whl", hash = "sha256:b2ef1c30440dbbcba7a5dc3e319408b59676e2e039e2ae11a8775ecf482b192f", size = 30817 }, + { url = "https://files.pythonhosted.org/packages/5a/e4/bf8034d25edaa495da3c8a3405627d2e35758e44ff6eaa7948092646fdcc/argon2_cffi_bindings-21.2.0-cp38-abi3-macosx_10_9_universal2.whl", hash = "sha256:e415e3f62c8d124ee16018e491a009937f8cf7ebf5eb430ffc5de21b900dad93", size = 53104 }, +] + +[[package]] +name = "arrow" +version = "1.3.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "python-dateutil" }, + { name = "types-python-dateutil" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/2e/00/0f6e8fcdb23ea632c866620cc872729ff43ed91d284c866b515c6342b173/arrow-1.3.0.tar.gz", hash = "sha256:d4540617648cb5f895730f1ad8c82a65f2dad0166f57b75f3ca54759c4d67a85", size = 131960 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f8/ed/e97229a566617f2ae958a6b13e7cc0f585470eac730a73e9e82c32a3cdd2/arrow-1.3.0-py3-none-any.whl", hash = "sha256:c728b120ebc00eb84e01882a6f5e7927a53960aa990ce7dd2b10f39005a67f80", size = 66419 }, +] + +[[package]] +name = "asttokens" +version = "3.0.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/4a/e7/82da0a03e7ba5141f05cce0d302e6eed121ae055e0456ca228bf693984bc/asttokens-3.0.0.tar.gz", hash = "sha256:0dcd8baa8d62b0c1d118b399b2ddba3c4aff271d0d7a9e0d4c1681c79035bbc7", size = 61978 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/25/8a/c46dcc25341b5bce5472c718902eb3d38600a903b14fa6aeecef3f21a46f/asttokens-3.0.0-py3-none-any.whl", hash = "sha256:e3078351a059199dd5138cb1c706e6430c05eff2ff136af5eb4790f9d28932e2", size = 26918 }, +] + +[[package]] +name = "async-lru" +version = "2.0.5" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/b2/4d/71ec4d3939dc755264f680f6c2b4906423a304c3d18e96853f0a595dfe97/async_lru-2.0.5.tar.gz", hash = "sha256:481d52ccdd27275f42c43a928b4a50c3bfb2d67af4e78b170e3e0bb39c66e5bb", size = 10380 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/03/49/d10027df9fce941cb8184e78a02857af36360d33e1721df81c5ed2179a1a/async_lru-2.0.5-py3-none-any.whl", hash = "sha256:ab95404d8d2605310d345932697371a5f40def0487c03d6d0ad9138de52c9943", size = 6069 }, +] + +[[package]] +name = "attrs" +version = "25.3.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/5a/b0/1367933a8532ee6ff8d63537de4f1177af4bff9f3e829baf7331f595bb24/attrs-25.3.0.tar.gz", hash = "sha256:75d7cefc7fb576747b2c81b4442d4d4a1ce0900973527c011d1030fd3bf4af1b", size = 812032 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/77/06/bb80f5f86020c4551da315d78b3ab75e8228f89f0162f2c3a819e407941a/attrs-25.3.0-py3-none-any.whl", hash = "sha256:427318ce031701fea540783410126f03899a97ffc6f61596ad581ac2e40e3bc3", size = 63815 }, +] + +[[package]] +name = "babel" +version = "2.17.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/7d/6b/d52e42361e1aa00709585ecc30b3f9684b3ab62530771402248b1b1d6240/babel-2.17.0.tar.gz", hash = "sha256:0c54cffb19f690cdcc52a3b50bcbf71e07a808d1c80d549f2459b9d2cf0afb9d", size = 9951852 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b7/b8/3fe70c75fe32afc4bb507f75563d39bc5642255d1d94f1f23604725780bf/babel-2.17.0-py3-none-any.whl", hash = "sha256:4d0b53093fdfb4b21c92b5213dba5a1b23885afa8383709427046b21c366e5f2", size = 10182537 }, +] + +[[package]] +name = "beautifulsoup4" +version = "4.13.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "soupsieve" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/f0/3c/adaf39ce1fb4afdd21b611e3d530b183bb7759c9b673d60db0e347fd4439/beautifulsoup4-4.13.3.tar.gz", hash = "sha256:1bd32405dacc920b42b83ba01644747ed77456a65760e285fbc47633ceddaf8b", size = 619516 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f9/49/6abb616eb3cbab6a7cca303dc02fdf3836de2e0b834bf966a7f5271a34d8/beautifulsoup4-4.13.3-py3-none-any.whl", hash = "sha256:99045d7d3f08f91f0d656bc9b7efbae189426cd913d830294a15eefa0ea4df16", size = 186015 }, +] + +[[package]] +name = "bleach" +version = "6.2.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "webencodings" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/76/9a/0e33f5054c54d349ea62c277191c020c2d6ef1d65ab2cb1993f91ec846d1/bleach-6.2.0.tar.gz", hash = "sha256:123e894118b8a599fd80d3ec1a6d4cc7ce4e5882b1317a7e1ba69b56e95f991f", size = 203083 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/fc/55/96142937f66150805c25c4d0f31ee4132fd33497753400734f9dfdcbdc66/bleach-6.2.0-py3-none-any.whl", hash = "sha256:117d9c6097a7c3d22fd578fcd8d35ff1e125df6736f554da4e432fdd63f31e5e", size = 163406 }, +] + +[package.optional-dependencies] +css = [ + { name = "tinycss2" }, +] + +[[package]] +name = "certifi" +version = "2025.1.31" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/1c/ab/c9f1e32b7b1bf505bf26f0ef697775960db7932abeb7b516de930ba2705f/certifi-2025.1.31.tar.gz", hash = "sha256:3d5da6925056f6f18f119200434a4780a94263f10d1c21d032a6f6b2baa20651", size = 167577 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/38/fc/bce832fd4fd99766c04d1ee0eead6b0ec6486fb100ae5e74c1d91292b982/certifi-2025.1.31-py3-none-any.whl", hash = "sha256:ca78db4565a652026a4db2bcdf68f2fb589ea80d0be70e03929ed730746b84fe", size = 166393 }, +] + +[[package]] +name = "cffi" +version = "1.17.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pycparser" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/fc/97/c783634659c2920c3fc70419e3af40972dbaf758daa229a7d6ea6135c90d/cffi-1.17.1.tar.gz", hash = "sha256:1c39c6016c32bc48dd54561950ebd6836e1670f2ae46128f67cf49e789c52824", size = 516621 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/5a/84/e94227139ee5fb4d600a7a4927f322e1d4aea6fdc50bd3fca8493caba23f/cffi-1.17.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:805b4371bf7197c329fcb3ead37e710d1bca9da5d583f5073b799d5c5bd1eee4", size = 183178 }, + { url = "https://files.pythonhosted.org/packages/da/ee/fb72c2b48656111c4ef27f0f91da355e130a923473bf5ee75c5643d00cca/cffi-1.17.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:733e99bc2df47476e3848417c5a4540522f234dfd4ef3ab7fafdf555b082ec0c", size = 178840 }, + { url = "https://files.pythonhosted.org/packages/cc/b6/db007700f67d151abadf508cbfd6a1884f57eab90b1bb985c4c8c02b0f28/cffi-1.17.1-cp312-cp312-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1257bdabf294dceb59f5e70c64a3e2f462c30c7ad68092d01bbbfb1c16b1ba36", size = 454803 }, + { url = "https://files.pythonhosted.org/packages/1a/df/f8d151540d8c200eb1c6fba8cd0dfd40904f1b0682ea705c36e6c2e97ab3/cffi-1.17.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:da95af8214998d77a98cc14e3a3bd00aa191526343078b530ceb0bd710fb48a5", size = 478850 }, + { url = "https://files.pythonhosted.org/packages/28/c0/b31116332a547fd2677ae5b78a2ef662dfc8023d67f41b2a83f7c2aa78b1/cffi-1.17.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d63afe322132c194cf832bfec0dc69a99fb9bb6bbd550f161a49e9e855cc78ff", size = 485729 }, + { url = "https://files.pythonhosted.org/packages/91/2b/9a1ddfa5c7f13cab007a2c9cc295b70fbbda7cb10a286aa6810338e60ea1/cffi-1.17.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f79fc4fc25f1c8698ff97788206bb3c2598949bfe0fef03d299eb1b5356ada99", size = 471256 }, + { url = "https://files.pythonhosted.org/packages/b2/d5/da47df7004cb17e4955df6a43d14b3b4ae77737dff8bf7f8f333196717bf/cffi-1.17.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b62ce867176a75d03a665bad002af8e6d54644fad99a3c70905c543130e39d93", size = 479424 }, + { url = "https://files.pythonhosted.org/packages/0b/ac/2a28bcf513e93a219c8a4e8e125534f4f6db03e3179ba1c45e949b76212c/cffi-1.17.1-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:386c8bf53c502fff58903061338ce4f4950cbdcb23e2902d86c0f722b786bbe3", size = 484568 }, + { url = "https://files.pythonhosted.org/packages/d4/38/ca8a4f639065f14ae0f1d9751e70447a261f1a30fa7547a828ae08142465/cffi-1.17.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:4ceb10419a9adf4460ea14cfd6bc43d08701f0835e979bf821052f1805850fe8", size = 488736 }, + { url = "https://files.pythonhosted.org/packages/86/c5/28b2d6f799ec0bdecf44dced2ec5ed43e0eb63097b0f58c293583b406582/cffi-1.17.1-cp312-cp312-win32.whl", hash = "sha256:a08d7e755f8ed21095a310a693525137cfe756ce62d066e53f502a83dc550f65", size = 172448 }, + { url = "https://files.pythonhosted.org/packages/50/b9/db34c4755a7bd1cb2d1603ac3863f22bcecbd1ba29e5ee841a4bc510b294/cffi-1.17.1-cp312-cp312-win_amd64.whl", hash = "sha256:51392eae71afec0d0c8fb1a53b204dbb3bcabcb3c9b807eedf3e1e6ccf2de903", size = 181976 }, + { url = "https://files.pythonhosted.org/packages/8d/f8/dd6c246b148639254dad4d6803eb6a54e8c85c6e11ec9df2cffa87571dbe/cffi-1.17.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:f3a2b4222ce6b60e2e8b337bb9596923045681d71e5a082783484d845390938e", size = 182989 }, + { url = "https://files.pythonhosted.org/packages/8b/f1/672d303ddf17c24fc83afd712316fda78dc6fce1cd53011b839483e1ecc8/cffi-1.17.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:0984a4925a435b1da406122d4d7968dd861c1385afe3b45ba82b750f229811e2", size = 178802 }, + { url = "https://files.pythonhosted.org/packages/0e/2d/eab2e858a91fdff70533cab61dcff4a1f55ec60425832ddfdc9cd36bc8af/cffi-1.17.1-cp313-cp313-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d01b12eeeb4427d3110de311e1774046ad344f5b1a7403101878976ecd7a10f3", size = 454792 }, + { url = "https://files.pythonhosted.org/packages/75/b2/fbaec7c4455c604e29388d55599b99ebcc250a60050610fadde58932b7ee/cffi-1.17.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:706510fe141c86a69c8ddc029c7910003a17353970cff3b904ff0686a5927683", size = 478893 }, + { url = "https://files.pythonhosted.org/packages/4f/b7/6e4a2162178bf1935c336d4da8a9352cccab4d3a5d7914065490f08c0690/cffi-1.17.1-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:de55b766c7aa2e2a3092c51e0483d700341182f08e67c63630d5b6f200bb28e5", size = 485810 }, + { url = "https://files.pythonhosted.org/packages/c7/8a/1d0e4a9c26e54746dc08c2c6c037889124d4f59dffd853a659fa545f1b40/cffi-1.17.1-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c59d6e989d07460165cc5ad3c61f9fd8f1b4796eacbd81cee78957842b834af4", size = 471200 }, + { url = "https://files.pythonhosted.org/packages/26/9f/1aab65a6c0db35f43c4d1b4f580e8df53914310afc10ae0397d29d697af4/cffi-1.17.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dd398dbc6773384a17fe0d3e7eeb8d1a21c2200473ee6806bb5e6a8e62bb73dd", size = 479447 }, + { url = "https://files.pythonhosted.org/packages/5f/e4/fb8b3dd8dc0e98edf1135ff067ae070bb32ef9d509d6cb0f538cd6f7483f/cffi-1.17.1-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:3edc8d958eb099c634dace3c7e16560ae474aa3803a5df240542b305d14e14ed", size = 484358 }, + { url = "https://files.pythonhosted.org/packages/f1/47/d7145bf2dc04684935d57d67dff9d6d795b2ba2796806bb109864be3a151/cffi-1.17.1-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:72e72408cad3d5419375fc87d289076ee319835bdfa2caad331e377589aebba9", size = 488469 }, + { url = "https://files.pythonhosted.org/packages/bf/ee/f94057fa6426481d663b88637a9a10e859e492c73d0384514a17d78ee205/cffi-1.17.1-cp313-cp313-win32.whl", hash = "sha256:e03eab0a8677fa80d646b5ddece1cbeaf556c313dcfac435ba11f107ba117b5d", size = 172475 }, + { url = "https://files.pythonhosted.org/packages/7c/fc/6a8cb64e5f0324877d503c854da15d76c1e50eb722e320b15345c4d0c6de/cffi-1.17.1-cp313-cp313-win_amd64.whl", hash = "sha256:f6a16c31041f09ead72d69f583767292f750d24913dadacf5756b966aacb3f1a", size = 182009 }, +] + +[[package]] +name = "cfgv" +version = "3.4.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/11/74/539e56497d9bd1d484fd863dd69cbbfa653cd2aa27abfe35653494d85e94/cfgv-3.4.0.tar.gz", hash = "sha256:e52591d4c5f5dead8e0f673fb16db7949d2cfb3f7da4582893288f0ded8fe560", size = 7114 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c5/55/51844dd50c4fc7a33b653bfaba4c2456f06955289ca770a5dbd5fd267374/cfgv-3.4.0-py2.py3-none-any.whl", hash = "sha256:b7265b1f29fd3316bfcd2b330d63d024f2bfd8bcb8b0272f8e19a504856c48f9", size = 7249 }, +] + +[[package]] +name = "charset-normalizer" +version = "3.4.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/16/b0/572805e227f01586461c80e0fd25d65a2115599cc9dad142fee4b747c357/charset_normalizer-3.4.1.tar.gz", hash = "sha256:44251f18cd68a75b56585dd00dae26183e102cd5e0f9f1466e6df5da2ed64ea3", size = 123188 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/0a/9a/dd1e1cdceb841925b7798369a09279bd1cf183cef0f9ddf15a3a6502ee45/charset_normalizer-3.4.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:73d94b58ec7fecbc7366247d3b0b10a21681004153238750bb67bd9012414545", size = 196105 }, + { url = "https://files.pythonhosted.org/packages/d3/8c/90bfabf8c4809ecb648f39794cf2a84ff2e7d2a6cf159fe68d9a26160467/charset_normalizer-3.4.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dad3e487649f498dd991eeb901125411559b22e8d7ab25d3aeb1af367df5efd7", size = 140404 }, + { url = "https://files.pythonhosted.org/packages/ad/8f/e410d57c721945ea3b4f1a04b74f70ce8fa800d393d72899f0a40526401f/charset_normalizer-3.4.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c30197aa96e8eed02200a83fba2657b4c3acd0f0aa4bdc9f6c1af8e8962e0757", size = 150423 }, + { url = "https://files.pythonhosted.org/packages/f0/b8/e6825e25deb691ff98cf5c9072ee0605dc2acfca98af70c2d1b1bc75190d/charset_normalizer-3.4.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2369eea1ee4a7610a860d88f268eb39b95cb588acd7235e02fd5a5601773d4fa", size = 143184 }, + { url = "https://files.pythonhosted.org/packages/3e/a2/513f6cbe752421f16d969e32f3583762bfd583848b763913ddab8d9bfd4f/charset_normalizer-3.4.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc2722592d8998c870fa4e290c2eec2c1569b87fe58618e67d38b4665dfa680d", size = 145268 }, + { url = "https://files.pythonhosted.org/packages/74/94/8a5277664f27c3c438546f3eb53b33f5b19568eb7424736bdc440a88a31f/charset_normalizer-3.4.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ffc9202a29ab3920fa812879e95a9e78b2465fd10be7fcbd042899695d75e616", size = 147601 }, + { url = "https://files.pythonhosted.org/packages/7c/5f/6d352c51ee763623a98e31194823518e09bfa48be2a7e8383cf691bbb3d0/charset_normalizer-3.4.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:804a4d582ba6e5b747c625bf1255e6b1507465494a40a2130978bda7b932c90b", size = 141098 }, + { url = "https://files.pythonhosted.org/packages/78/d4/f5704cb629ba5ab16d1d3d741396aec6dc3ca2b67757c45b0599bb010478/charset_normalizer-3.4.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:0f55e69f030f7163dffe9fd0752b32f070566451afe180f99dbeeb81f511ad8d", size = 149520 }, + { url = "https://files.pythonhosted.org/packages/c5/96/64120b1d02b81785f222b976c0fb79a35875457fa9bb40827678e54d1bc8/charset_normalizer-3.4.1-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:c4c3e6da02df6fa1410a7680bd3f63d4f710232d3139089536310d027950696a", size = 152852 }, + { url = "https://files.pythonhosted.org/packages/84/c9/98e3732278a99f47d487fd3468bc60b882920cef29d1fa6ca460a1fdf4e6/charset_normalizer-3.4.1-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:5df196eb874dae23dcfb968c83d4f8fdccb333330fe1fc278ac5ceeb101003a9", size = 150488 }, + { url = "https://files.pythonhosted.org/packages/13/0e/9c8d4cb99c98c1007cc11eda969ebfe837bbbd0acdb4736d228ccaabcd22/charset_normalizer-3.4.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:e358e64305fe12299a08e08978f51fc21fac060dcfcddd95453eabe5b93ed0e1", size = 146192 }, + { url = "https://files.pythonhosted.org/packages/b2/21/2b6b5b860781a0b49427309cb8670785aa543fb2178de875b87b9cc97746/charset_normalizer-3.4.1-cp312-cp312-win32.whl", hash = "sha256:9b23ca7ef998bc739bf6ffc077c2116917eabcc901f88da1b9856b210ef63f35", size = 95550 }, + { url = "https://files.pythonhosted.org/packages/21/5b/1b390b03b1d16c7e382b561c5329f83cc06623916aab983e8ab9239c7d5c/charset_normalizer-3.4.1-cp312-cp312-win_amd64.whl", hash = "sha256:6ff8a4a60c227ad87030d76e99cd1698345d4491638dfa6673027c48b3cd395f", size = 102785 }, + { url = "https://files.pythonhosted.org/packages/38/94/ce8e6f63d18049672c76d07d119304e1e2d7c6098f0841b51c666e9f44a0/charset_normalizer-3.4.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:aabfa34badd18f1da5ec1bc2715cadc8dca465868a4e73a0173466b688f29dda", size = 195698 }, + { url = "https://files.pythonhosted.org/packages/24/2e/dfdd9770664aae179a96561cc6952ff08f9a8cd09a908f259a9dfa063568/charset_normalizer-3.4.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:22e14b5d70560b8dd51ec22863f370d1e595ac3d024cb8ad7d308b4cd95f8313", size = 140162 }, + { url = "https://files.pythonhosted.org/packages/24/4e/f646b9093cff8fc86f2d60af2de4dc17c759de9d554f130b140ea4738ca6/charset_normalizer-3.4.1-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8436c508b408b82d87dc5f62496973a1805cd46727c34440b0d29d8a2f50a6c9", size = 150263 }, + { url = "https://files.pythonhosted.org/packages/5e/67/2937f8d548c3ef6e2f9aab0f6e21001056f692d43282b165e7c56023e6dd/charset_normalizer-3.4.1-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2d074908e1aecee37a7635990b2c6d504cd4766c7bc9fc86d63f9c09af3fa11b", size = 142966 }, + { url = "https://files.pythonhosted.org/packages/52/ed/b7f4f07de100bdb95c1756d3a4d17b90c1a3c53715c1a476f8738058e0fa/charset_normalizer-3.4.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:955f8851919303c92343d2f66165294848d57e9bba6cf6e3625485a70a038d11", size = 144992 }, + { url = "https://files.pythonhosted.org/packages/96/2c/d49710a6dbcd3776265f4c923bb73ebe83933dfbaa841c5da850fe0fd20b/charset_normalizer-3.4.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:44ecbf16649486d4aebafeaa7ec4c9fed8b88101f4dd612dcaf65d5e815f837f", size = 147162 }, + { url = "https://files.pythonhosted.org/packages/b4/41/35ff1f9a6bd380303dea55e44c4933b4cc3c4850988927d4082ada230273/charset_normalizer-3.4.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:0924e81d3d5e70f8126529951dac65c1010cdf117bb75eb02dd12339b57749dd", size = 140972 }, + { url = "https://files.pythonhosted.org/packages/fb/43/c6a0b685fe6910d08ba971f62cd9c3e862a85770395ba5d9cad4fede33ab/charset_normalizer-3.4.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:2967f74ad52c3b98de4c3b32e1a44e32975e008a9cd2a8cc8966d6a5218c5cb2", size = 149095 }, + { url = "https://files.pythonhosted.org/packages/4c/ff/a9a504662452e2d2878512115638966e75633519ec11f25fca3d2049a94a/charset_normalizer-3.4.1-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:c75cb2a3e389853835e84a2d8fb2b81a10645b503eca9bcb98df6b5a43eb8886", size = 152668 }, + { url = "https://files.pythonhosted.org/packages/6c/71/189996b6d9a4b932564701628af5cee6716733e9165af1d5e1b285c530ed/charset_normalizer-3.4.1-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:09b26ae6b1abf0d27570633b2b078a2a20419c99d66fb2823173d73f188ce601", size = 150073 }, + { url = "https://files.pythonhosted.org/packages/e4/93/946a86ce20790e11312c87c75ba68d5f6ad2208cfb52b2d6a2c32840d922/charset_normalizer-3.4.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:fa88b843d6e211393a37219e6a1c1df99d35e8fd90446f1118f4216e307e48cd", size = 145732 }, + { url = "https://files.pythonhosted.org/packages/cd/e5/131d2fb1b0dddafc37be4f3a2fa79aa4c037368be9423061dccadfd90091/charset_normalizer-3.4.1-cp313-cp313-win32.whl", hash = "sha256:eb8178fe3dba6450a3e024e95ac49ed3400e506fd4e9e5c32d30adda88cbd407", size = 95391 }, + { url = "https://files.pythonhosted.org/packages/27/f2/4f9a69cc7712b9b5ad8fdb87039fd89abba997ad5cbe690d1835d40405b0/charset_normalizer-3.4.1-cp313-cp313-win_amd64.whl", hash = "sha256:b1ac5992a838106edb89654e0aebfc24f5848ae2547d22c2c3f66454daa11971", size = 102702 }, + { url = "https://files.pythonhosted.org/packages/0e/f6/65ecc6878a89bb1c23a086ea335ad4bf21a588990c3f535a227b9eea9108/charset_normalizer-3.4.1-py3-none-any.whl", hash = "sha256:d98b1668f06378c6dbefec3b92299716b931cd4e6061f3c875a71ced1780ab85", size = 49767 }, +] + +[[package]] +name = "cloudpickle" +version = "3.1.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/52/39/069100b84d7418bc358d81669d5748efb14b9cceacd2f9c75f550424132f/cloudpickle-3.1.1.tar.gz", hash = "sha256:b216fa8ae4019d5482a8ac3c95d8f6346115d8835911fd4aefd1a445e4242c64", size = 22113 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/7e/e8/64c37fadfc2816a7701fa8a6ed8d87327c7d54eacfbfb6edab14a2f2be75/cloudpickle-3.1.1-py3-none-any.whl", hash = "sha256:c8c5a44295039331ee9dad40ba100a9c7297b6f988e50e87ccdf3765a668350e", size = 20992 }, +] + +[[package]] +name = "colorama" +version = "0.4.6" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/d8/53/6f443c9a4a8358a93a6792e2acffb9d9d5cb0a5cfd8802644b7b1c9a02e4/colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44", size = 27697 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6", size = 25335 }, +] + +[[package]] +name = "comm" +version = "0.2.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "traitlets" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/e9/a8/fb783cb0abe2b5fded9f55e5703015cdf1c9c85b3669087c538dd15a6a86/comm-0.2.2.tar.gz", hash = "sha256:3fd7a84065306e07bea1773df6eb8282de51ba82f77c72f9c85716ab11fe980e", size = 6210 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e6/75/49e5bfe642f71f272236b5b2d2691cf915a7283cc0ceda56357b61daa538/comm-0.2.2-py3-none-any.whl", hash = "sha256:e6fb86cb70ff661ee8c9c14e7d36d6de3b4066f1441be4063df9c5009f0a64d3", size = 7180 }, +] + +[[package]] +name = "contourpy" +version = "1.3.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "numpy" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/25/c2/fc7193cc5383637ff390a712e88e4ded0452c9fbcf84abe3de5ea3df1866/contourpy-1.3.1.tar.gz", hash = "sha256:dfd97abd83335045a913e3bcc4a09c0ceadbe66580cf573fe961f4a825efa699", size = 13465753 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/37/6b/175f60227d3e7f5f1549fcb374592be311293132207e451c3d7c654c25fb/contourpy-1.3.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:0ffa84be8e0bd33410b17189f7164c3589c229ce5db85798076a3fa136d0e509", size = 271494 }, + { url = "https://files.pythonhosted.org/packages/6b/6a/7833cfae2c1e63d1d8875a50fd23371394f540ce809d7383550681a1fa64/contourpy-1.3.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:805617228ba7e2cbbfb6c503858e626ab528ac2a32a04a2fe88ffaf6b02c32bc", size = 255444 }, + { url = "https://files.pythonhosted.org/packages/7f/b3/7859efce66eaca5c14ba7619791b084ed02d868d76b928ff56890d2d059d/contourpy-1.3.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ade08d343436a94e633db932e7e8407fe7de8083967962b46bdfc1b0ced39454", size = 307628 }, + { url = "https://files.pythonhosted.org/packages/48/b2/011415f5e3f0a50b1e285a0bf78eb5d92a4df000553570f0851b6e309076/contourpy-1.3.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:47734d7073fb4590b4a40122b35917cd77be5722d80683b249dac1de266aac80", size = 347271 }, + { url = "https://files.pythonhosted.org/packages/84/7d/ef19b1db0f45b151ac78c65127235239a8cf21a59d1ce8507ce03e89a30b/contourpy-1.3.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2ba94a401342fc0f8b948e57d977557fbf4d515f03c67682dd5c6191cb2d16ec", size = 318906 }, + { url = "https://files.pythonhosted.org/packages/ba/99/6794142b90b853a9155316c8f470d2e4821fe6f086b03e372aca848227dd/contourpy-1.3.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:efa874e87e4a647fd2e4f514d5e91c7d493697127beb95e77d2f7561f6905bd9", size = 323622 }, + { url = "https://files.pythonhosted.org/packages/3c/0f/37d2c84a900cd8eb54e105f4fa9aebd275e14e266736778bb5dccbf3bbbb/contourpy-1.3.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:1bf98051f1045b15c87868dbaea84f92408337d4f81d0e449ee41920ea121d3b", size = 1266699 }, + { url = "https://files.pythonhosted.org/packages/3a/8a/deb5e11dc7d9cc8f0f9c8b29d4f062203f3af230ba83c30a6b161a6effc9/contourpy-1.3.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:61332c87493b00091423e747ea78200659dc09bdf7fd69edd5e98cef5d3e9a8d", size = 1326395 }, + { url = "https://files.pythonhosted.org/packages/1a/35/7e267ae7c13aaf12322ccc493531f1e7f2eb8fba2927b9d7a05ff615df7a/contourpy-1.3.1-cp312-cp312-win32.whl", hash = "sha256:e914a8cb05ce5c809dd0fe350cfbb4e881bde5e2a38dc04e3afe1b3e58bd158e", size = 175354 }, + { url = "https://files.pythonhosted.org/packages/a1/35/c2de8823211d07e8a79ab018ef03960716c5dff6f4d5bff5af87fd682992/contourpy-1.3.1-cp312-cp312-win_amd64.whl", hash = "sha256:08d9d449a61cf53033612cb368f3a1b26cd7835d9b8cd326647efe43bca7568d", size = 220971 }, + { url = "https://files.pythonhosted.org/packages/9a/e7/de62050dce687c5e96f946a93546910bc67e483fe05324439e329ff36105/contourpy-1.3.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:a761d9ccfc5e2ecd1bf05534eda382aa14c3e4f9205ba5b1684ecfe400716ef2", size = 271548 }, + { url = "https://files.pythonhosted.org/packages/78/4d/c2a09ae014ae984c6bdd29c11e74d3121b25eaa117eca0bb76340efd7e1c/contourpy-1.3.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:523a8ee12edfa36f6d2a49407f705a6ef4c5098de4f498619787e272de93f2d5", size = 255576 }, + { url = "https://files.pythonhosted.org/packages/ab/8a/915380ee96a5638bda80cd061ccb8e666bfdccea38d5741cb69e6dbd61fc/contourpy-1.3.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ece6df05e2c41bd46776fbc712e0996f7c94e0d0543af1656956d150c4ca7c81", size = 306635 }, + { url = "https://files.pythonhosted.org/packages/29/5c/c83ce09375428298acd4e6582aeb68b1e0d1447f877fa993d9bf6cd3b0a0/contourpy-1.3.1-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:573abb30e0e05bf31ed067d2f82500ecfdaec15627a59d63ea2d95714790f5c2", size = 345925 }, + { url = "https://files.pythonhosted.org/packages/29/63/5b52f4a15e80c66c8078a641a3bfacd6e07106835682454647aca1afc852/contourpy-1.3.1-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a9fa36448e6a3a1a9a2ba23c02012c43ed88905ec80163f2ffe2421c7192a5d7", size = 318000 }, + { url = "https://files.pythonhosted.org/packages/9a/e2/30ca086c692691129849198659bf0556d72a757fe2769eb9620a27169296/contourpy-1.3.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3ea9924d28fc5586bf0b42d15f590b10c224117e74409dd7a0be3b62b74a501c", size = 322689 }, + { url = "https://files.pythonhosted.org/packages/6b/77/f37812ef700f1f185d348394debf33f22d531e714cf6a35d13d68a7003c7/contourpy-1.3.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:5b75aa69cb4d6f137b36f7eb2ace9280cfb60c55dc5f61c731fdf6f037f958a3", size = 1268413 }, + { url = "https://files.pythonhosted.org/packages/3f/6d/ce84e79cdd128542ebeb268f84abb4b093af78e7f8ec504676673d2675bc/contourpy-1.3.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:041b640d4ec01922083645a94bb3b2e777e6b626788f4095cf21abbe266413c1", size = 1326530 }, + { url = "https://files.pythonhosted.org/packages/72/22/8282f4eae20c73c89bee7a82a19c4e27af9b57bb602ecaa00713d5bdb54d/contourpy-1.3.1-cp313-cp313-win32.whl", hash = "sha256:36987a15e8ace5f58d4d5da9dca82d498c2bbb28dff6e5d04fbfcc35a9cb3a82", size = 175315 }, + { url = "https://files.pythonhosted.org/packages/e3/d5/28bca491f65312b438fbf076589dcde7f6f966b196d900777f5811b9c4e2/contourpy-1.3.1-cp313-cp313-win_amd64.whl", hash = "sha256:a7895f46d47671fa7ceec40f31fae721da51ad34bdca0bee83e38870b1f47ffd", size = 220987 }, + { url = "https://files.pythonhosted.org/packages/2f/24/a4b285d6adaaf9746e4700932f579f1a7b6f9681109f694cfa233ae75c4e/contourpy-1.3.1-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:9ddeb796389dadcd884c7eb07bd14ef12408aaae358f0e2ae24114d797eede30", size = 285001 }, + { url = "https://files.pythonhosted.org/packages/48/1d/fb49a401b5ca4f06ccf467cd6c4f1fd65767e63c21322b29b04ec40b40b9/contourpy-1.3.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:19c1555a6801c2f084c7ddc1c6e11f02eb6a6016ca1318dd5452ba3f613a1751", size = 268553 }, + { url = "https://files.pythonhosted.org/packages/79/1e/4aef9470d13fd029087388fae750dccb49a50c012a6c8d1d634295caa644/contourpy-1.3.1-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:841ad858cff65c2c04bf93875e384ccb82b654574a6d7f30453a04f04af71342", size = 310386 }, + { url = "https://files.pythonhosted.org/packages/b0/34/910dc706ed70153b60392b5305c708c9810d425bde12499c9184a1100888/contourpy-1.3.1-cp313-cp313t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4318af1c925fb9a4fb190559ef3eec206845f63e80fb603d47f2d6d67683901c", size = 349806 }, + { url = "https://files.pythonhosted.org/packages/31/3c/faee6a40d66d7f2a87f7102236bf4780c57990dd7f98e5ff29881b1b1344/contourpy-1.3.1-cp313-cp313t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:14c102b0eab282427b662cb590f2e9340a9d91a1c297f48729431f2dcd16e14f", size = 321108 }, + { url = "https://files.pythonhosted.org/packages/17/69/390dc9b20dd4bb20585651d7316cc3054b7d4a7b4f8b710b2b698e08968d/contourpy-1.3.1-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:05e806338bfeaa006acbdeba0ad681a10be63b26e1b17317bfac3c5d98f36cda", size = 327291 }, + { url = "https://files.pythonhosted.org/packages/ef/74/7030b67c4e941fe1e5424a3d988080e83568030ce0355f7c9fc556455b01/contourpy-1.3.1-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:4d76d5993a34ef3df5181ba3c92fabb93f1eaa5729504fb03423fcd9f3177242", size = 1263752 }, + { url = "https://files.pythonhosted.org/packages/f0/ed/92d86f183a8615f13f6b9cbfc5d4298a509d6ce433432e21da838b4b63f4/contourpy-1.3.1-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:89785bb2a1980c1bd87f0cb1517a71cde374776a5f150936b82580ae6ead44a1", size = 1318403 }, + { url = "https://files.pythonhosted.org/packages/b3/0e/c8e4950c77dcfc897c71d61e56690a0a9df39543d2164040301b5df8e67b/contourpy-1.3.1-cp313-cp313t-win32.whl", hash = "sha256:8eb96e79b9f3dcadbad2a3891672f81cdcab7f95b27f28f1c67d75f045b6b4f1", size = 185117 }, + { url = "https://files.pythonhosted.org/packages/c1/31/1ae946f11dfbd229222e6d6ad8e7bd1891d3d48bde5fbf7a0beb9491f8e3/contourpy-1.3.1-cp313-cp313t-win_amd64.whl", hash = "sha256:287ccc248c9e0d0566934e7d606201abd74761b5703d804ff3df8935f523d546", size = 236668 }, +] + +[[package]] +name = "cycler" +version = "0.12.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/a9/95/a3dbbb5028f35eafb79008e7522a75244477d2838f38cbb722248dabc2a8/cycler-0.12.1.tar.gz", hash = "sha256:88bb128f02ba341da8ef447245a9e138fae777f6a23943da4540077d3601eb1c", size = 7615 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e7/05/c19819d5e3d95294a6f5947fb9b9629efb316b96de511b418c53d245aae6/cycler-0.12.1-py3-none-any.whl", hash = "sha256:85cef7cff222d8644161529808465972e51340599459b8ac3ccbac5a854e0d30", size = 8321 }, +] + +[[package]] +name = "debugpy" +version = "1.8.13" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/51/d4/f35f539e11c9344652f362c22413ec5078f677ac71229dc9b4f6f85ccaa3/debugpy-1.8.13.tar.gz", hash = "sha256:837e7bef95bdefba426ae38b9a94821ebdc5bea55627879cd48165c90b9e50ce", size = 1641193 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/79/ad/dff929b6b5403feaab0af0e5bb460fd723f9c62538b718a9af819b8fff20/debugpy-1.8.13-cp312-cp312-macosx_14_0_universal2.whl", hash = "sha256:2b8de94c5c78aa0d0ed79023eb27c7c56a64c68217d881bee2ffbcb13951d0c1", size = 2501004 }, + { url = "https://files.pythonhosted.org/packages/d6/4f/b7d42e6679f0bb525888c278b0c0d2b6dff26ed42795230bb46eaae4f9b3/debugpy-1.8.13-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:887d54276cefbe7290a754424b077e41efa405a3e07122d8897de54709dbe522", size = 4222346 }, + { url = "https://files.pythonhosted.org/packages/ec/18/d9b3e88e85d41f68f77235112adc31012a784e45a3fcdbb039777d570a0f/debugpy-1.8.13-cp312-cp312-win32.whl", hash = "sha256:3872ce5453b17837ef47fb9f3edc25085ff998ce63543f45ba7af41e7f7d370f", size = 5226639 }, + { url = "https://files.pythonhosted.org/packages/c9/f7/0df18a4f530ed3cc06f0060f548efe9e3316102101e311739d906f5650be/debugpy-1.8.13-cp312-cp312-win_amd64.whl", hash = "sha256:63ca7670563c320503fea26ac688988d9d6b9c6a12abc8a8cf2e7dd8e5f6b6ea", size = 5268735 }, + { url = "https://files.pythonhosted.org/packages/b1/db/ae7cd645c1826aae557cebccbc448f0cc9a818d364efb88f8d80e7a03f41/debugpy-1.8.13-cp313-cp313-macosx_14_0_universal2.whl", hash = "sha256:31abc9618be4edad0b3e3a85277bc9ab51a2d9f708ead0d99ffb5bb750e18503", size = 2485416 }, + { url = "https://files.pythonhosted.org/packages/ec/ed/db4b10ff3b5bb30fe41d9e86444a08bb6448e4d8265e7768450b8408dd36/debugpy-1.8.13-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a0bd87557f97bced5513a74088af0b84982b6ccb2e254b9312e29e8a5c4270eb", size = 4218784 }, + { url = "https://files.pythonhosted.org/packages/82/82/ed81852a8d94086f51664d032d83c7f87cd2b087c6ea70dabec7c1ba813d/debugpy-1.8.13-cp313-cp313-win32.whl", hash = "sha256:5268ae7fdca75f526d04465931cb0bd24577477ff50e8bb03dab90983f4ebd02", size = 5226270 }, + { url = "https://files.pythonhosted.org/packages/15/63/aa92fb341a78ec40f1c414ec7a7885c2ee17032eee00d12cee0cdc502af4/debugpy-1.8.13-cp313-cp313-win_amd64.whl", hash = "sha256:79ce4ed40966c4c1631d0131606b055a5a2f8e430e3f7bf8fd3744b09943e8e8", size = 5268621 }, + { url = "https://files.pythonhosted.org/packages/37/4f/0b65410a08b6452bfd3f7ed6f3610f1a31fb127f46836e82d31797065dcb/debugpy-1.8.13-py2.py3-none-any.whl", hash = "sha256:d4ba115cdd0e3a70942bd562adba9ec8c651fe69ddde2298a1be296fc331906f", size = 5229306 }, +] + +[[package]] +name = "decorator" +version = "5.2.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/43/fa/6d96a0978d19e17b68d634497769987b16c8f4cd0a7a05048bec693caa6b/decorator-5.2.1.tar.gz", hash = "sha256:65f266143752f734b0a7cc83c46f4618af75b8c5911b00ccb61d0ac9b6da0360", size = 56711 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/4e/8c/f3147f5c4b73e7550fe5f9352eaa956ae838d5c51eb58e7a25b9f3e2643b/decorator-5.2.1-py3-none-any.whl", hash = "sha256:d316bb415a2d9e2d2b3abcc4084c6502fc09240e292cd76a76afc106a1c8e04a", size = 9190 }, +] + +[[package]] +name = "defusedxml" +version = "0.7.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/0f/d5/c66da9b79e5bdb124974bfe172b4daf3c984ebd9c2a06e2b8a4dc7331c72/defusedxml-0.7.1.tar.gz", hash = "sha256:1bb3032db185915b62d7c6209c5a8792be6a32ab2fedacc84e01b52c51aa3e69", size = 75520 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/07/6c/aa3f2f849e01cb6a001cd8554a88d4c77c5c1a31c95bdf1cf9301e6d9ef4/defusedxml-0.7.1-py2.py3-none-any.whl", hash = "sha256:a352e7e428770286cc899e2542b6cdaedb2b4953ff269a210103ec58f6198a61", size = 25604 }, +] + +[[package]] +name = "distlib" +version = "0.3.9" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/0d/dd/1bec4c5ddb504ca60fc29472f3d27e8d4da1257a854e1d96742f15c1d02d/distlib-0.3.9.tar.gz", hash = "sha256:a60f20dea646b8a33f3e7772f74dc0b2d0772d2837ee1342a00645c81edf9403", size = 613923 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/91/a1/cf2472db20f7ce4a6be1253a81cfdf85ad9c7885ffbed7047fb72c24cf87/distlib-0.3.9-py2.py3-none-any.whl", hash = "sha256:47f8c22fd27c27e25a65601af709b38e4f0a45ea4fc2e710f65755fa8caaaf87", size = 468973 }, +] + +[[package]] +name = "executing" +version = "2.2.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/91/50/a9d80c47ff289c611ff12e63f7c5d13942c65d68125160cefd768c73e6e4/executing-2.2.0.tar.gz", hash = "sha256:5d108c028108fe2551d1a7b2e8b713341e2cb4fc0aa7dcf966fa4327a5226755", size = 978693 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/7b/8f/c4d9bafc34ad7ad5d8dc16dd1347ee0e507a52c3adb6bfa8887e1c6a26ba/executing-2.2.0-py2.py3-none-any.whl", hash = "sha256:11387150cad388d62750327a53d3339fad4888b39a6fe233c3afbb54ecffd3aa", size = 26702 }, +] + +[[package]] +name = "fastjsonschema" +version = "2.21.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/8b/50/4b769ce1ac4071a1ef6d86b1a3fb56cdc3a37615e8c5519e1af96cdac366/fastjsonschema-2.21.1.tar.gz", hash = "sha256:794d4f0a58f848961ba16af7b9c85a3e88cd360df008c59aac6fc5ae9323b5d4", size = 373939 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/90/2b/0817a2b257fe88725c25589d89aec060581aabf668707a8d03b2e9e0cb2a/fastjsonschema-2.21.1-py3-none-any.whl", hash = "sha256:c9e5b7e908310918cf494a434eeb31384dd84a98b57a30bcb1f535015b554667", size = 23924 }, +] + +[[package]] +name = "filelock" +version = "3.18.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/0a/10/c23352565a6544bdc5353e0b15fc1c563352101f30e24bf500207a54df9a/filelock-3.18.0.tar.gz", hash = "sha256:adbc88eabb99d2fec8c9c1b229b171f18afa655400173ddc653d5d01501fb9f2", size = 18075 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/4d/36/2a115987e2d8c300a974597416d9de88f2444426de9571f4b59b2cca3acc/filelock-3.18.0-py3-none-any.whl", hash = "sha256:c401f4f8377c4464e6db25fff06205fd89bdd83b65eb0488ed1b160f780e21de", size = 16215 }, +] + +[[package]] +name = "fonttools" +version = "4.56.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/1c/8c/9ffa2a555af0e5e5d0e2ed7fdd8c9bef474ed676995bb4c57c9cd0014248/fonttools-4.56.0.tar.gz", hash = "sha256:a114d1567e1a1586b7e9e7fc2ff686ca542a82769a296cef131e4c4af51e58f4", size = 3462892 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/39/32/71cfd6877999576a11824a7fe7bc0bb57c5c72b1f4536fa56a3e39552643/fonttools-4.56.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:d6f195c14c01bd057bc9b4f70756b510e009c83c5ea67b25ced3e2c38e6ee6e9", size = 2747757 }, + { url = "https://files.pythonhosted.org/packages/15/52/d9f716b072c5061a0b915dd4c387f74bef44c68c069e2195c753905bd9b7/fonttools-4.56.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:fa760e5fe8b50cbc2d71884a1eff2ed2b95a005f02dda2fa431560db0ddd927f", size = 2279007 }, + { url = "https://files.pythonhosted.org/packages/d1/97/f1b3a8afa9a0d814a092a25cd42f59ccb98a0bb7a295e6e02fc9ba744214/fonttools-4.56.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d54a45d30251f1d729e69e5b675f9a08b7da413391a1227781e2a297fa37f6d2", size = 4783991 }, + { url = "https://files.pythonhosted.org/packages/95/70/2a781bedc1c45a0c61d29c56425609b22ed7f971da5d7e5df2679488741b/fonttools-4.56.0-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:661a8995d11e6e4914a44ca7d52d1286e2d9b154f685a4d1f69add8418961563", size = 4855109 }, + { url = "https://files.pythonhosted.org/packages/0c/02/a2597858e61a5e3fb6a14d5f6be9e6eb4eaf090da56ad70cedcbdd201685/fonttools-4.56.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:9d94449ad0a5f2a8bf5d2f8d71d65088aee48adbe45f3c5f8e00e3ad861ed81a", size = 4762496 }, + { url = "https://files.pythonhosted.org/packages/f2/00/aaf00100d6078fdc73f7352b44589804af9dc12b182a2540b16002152ba4/fonttools-4.56.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:f59746f7953f69cc3290ce2f971ab01056e55ddd0fb8b792c31a8acd7fee2d28", size = 4990094 }, + { url = "https://files.pythonhosted.org/packages/bf/dc/3ff1db522460db60cf3adaf1b64e0c72b43406717d139786d3fa1eb20709/fonttools-4.56.0-cp312-cp312-win32.whl", hash = "sha256:bce60f9a977c9d3d51de475af3f3581d9b36952e1f8fc19a1f2254f1dda7ce9c", size = 2142888 }, + { url = "https://files.pythonhosted.org/packages/6f/e3/5a181a85777f7809076e51f7422e0dc77eb04676c40ec8bf6a49d390d1ff/fonttools-4.56.0-cp312-cp312-win_amd64.whl", hash = "sha256:300c310bb725b2bdb4f5fc7e148e190bd69f01925c7ab437b9c0ca3e1c7cd9ba", size = 2189734 }, + { url = "https://files.pythonhosted.org/packages/a5/55/f06b48d48e0b4ec3a3489efafe9bd4d81b6e0802ac51026e3ee4634e89ba/fonttools-4.56.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:f20e2c0dfab82983a90f3d00703ac0960412036153e5023eed2b4641d7d5e692", size = 2735127 }, + { url = "https://files.pythonhosted.org/packages/59/db/d2c7c9b6dd5cbd46f183e650a47403ffb88fca17484eb7c4b1cd88f9e513/fonttools-4.56.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:f36a0868f47b7566237640c026c65a86d09a3d9ca5df1cd039e30a1da73098a0", size = 2272519 }, + { url = "https://files.pythonhosted.org/packages/4d/a2/da62d779c34a0e0c06415f02eab7fa3466de5d46df459c0275a255cefc65/fonttools-4.56.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:62b4c6802fa28e14dba010e75190e0e6228513573f1eeae57b11aa1a39b7e5b1", size = 4762423 }, + { url = "https://files.pythonhosted.org/packages/be/6a/fd4018e0448c8a5e12138906411282c5eab51a598493f080a9f0960e658f/fonttools-4.56.0-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a05d1f07eb0a7d755fbe01fee1fd255c3a4d3730130cf1bfefb682d18fd2fcea", size = 4834442 }, + { url = "https://files.pythonhosted.org/packages/6d/63/fa1dec8efb35bc11ef9c39b2d74754b45d48a3ccb2cf78c0109c0af639e8/fonttools-4.56.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:0073b62c3438cf0058488c002ea90489e8801d3a7af5ce5f7c05c105bee815c3", size = 4742800 }, + { url = "https://files.pythonhosted.org/packages/dd/f4/963247ae8c73ccc4cf2929e7162f595c81dbe17997d1d0ea77da24a217c9/fonttools-4.56.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:e2cad98c94833465bcf28f51c248aaf07ca022efc6a3eba750ad9c1e0256d278", size = 4963746 }, + { url = "https://files.pythonhosted.org/packages/ea/e0/46f9600c39c644b54e4420f941f75fa200d9288c9ae171e5d80918b8cbb9/fonttools-4.56.0-cp313-cp313-win32.whl", hash = "sha256:d0cb73ccf7f6d7ca8d0bc7ea8ac0a5b84969a41c56ac3ac3422a24df2680546f", size = 2140927 }, + { url = "https://files.pythonhosted.org/packages/27/6d/3edda54f98a550a0473f032d8050315fbc8f1b76a0d9f3879b72ebb2cdd6/fonttools-4.56.0-cp313-cp313-win_amd64.whl", hash = "sha256:62cc1253827d1e500fde9dbe981219fea4eb000fd63402283472d38e7d8aa1c6", size = 2186709 }, + { url = "https://files.pythonhosted.org/packages/bf/ff/44934a031ce5a39125415eb405b9efb76fe7f9586b75291d66ae5cbfc4e6/fonttools-4.56.0-py3-none-any.whl", hash = "sha256:1088182f68c303b50ca4dc0c82d42083d176cba37af1937e1a976a31149d4d14", size = 1089800 }, +] + +[[package]] +name = "fqdn" +version = "1.5.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/30/3e/a80a8c077fd798951169626cde3e239adeba7dab75deb3555716415bd9b0/fqdn-1.5.1.tar.gz", hash = "sha256:105ed3677e767fb5ca086a0c1f4bb66ebc3c100be518f0e0d755d9eae164d89f", size = 6015 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/cf/58/8acf1b3e91c58313ce5cb67df61001fc9dcd21be4fadb76c1a2d540e09ed/fqdn-1.5.1-py3-none-any.whl", hash = "sha256:3a179af3761e4df6eb2e026ff9e1a3033d3587bf980a0b1b2e1e5d08d7358014", size = 9121 }, +] + +[[package]] +name = "h11" +version = "0.14.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/f5/38/3af3d3633a34a3316095b39c8e8fb4853a28a536e55d347bd8d8e9a14b03/h11-0.14.0.tar.gz", hash = "sha256:8f19fbbe99e72420ff35c00b27a34cb9937e902a8b810e2c88300c6f0a3b699d", size = 100418 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/95/04/ff642e65ad6b90db43e668d70ffb6736436c7ce41fcc549f4e9472234127/h11-0.14.0-py3-none-any.whl", hash = "sha256:e3fe4ac4b851c468cc8363d500db52c2ead036020723024a109d37346efaa761", size = 58259 }, +] + +[[package]] +name = "httpcore" +version = "1.0.7" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "certifi" }, + { name = "h11" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/6a/41/d7d0a89eb493922c37d343b607bc1b5da7f5be7e383740b4753ad8943e90/httpcore-1.0.7.tar.gz", hash = "sha256:8551cb62a169ec7162ac7be8d4817d561f60e08eaa485234898414bb5a8a0b4c", size = 85196 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/87/f5/72347bc88306acb359581ac4d52f23c0ef445b57157adedb9aee0cd689d2/httpcore-1.0.7-py3-none-any.whl", hash = "sha256:a3fff8f43dc260d5bd363d9f9cf1830fa3a458b332856f34282de498ed420edd", size = 78551 }, +] + +[[package]] +name = "httpx" +version = "0.28.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "anyio" }, + { name = "certifi" }, + { name = "httpcore" }, + { name = "idna" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/b1/df/48c586a5fe32a0f01324ee087459e112ebb7224f646c0b5023f5e79e9956/httpx-0.28.1.tar.gz", hash = "sha256:75e98c5f16b0f35b567856f597f06ff2270a374470a5c2392242528e3e3e42fc", size = 141406 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/2a/39/e50c7c3a983047577ee07d2a9e53faf5a69493943ec3f6a384bdc792deb2/httpx-0.28.1-py3-none-any.whl", hash = "sha256:d909fcccc110f8c7faf814ca82a9a4d816bc5a6dbfea25d6591d6985b8ba59ad", size = 73517 }, +] + +[[package]] +name = "identify" +version = "2.6.9" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/9b/98/a71ab060daec766acc30fb47dfca219d03de34a70d616a79a38c6066c5bf/identify-2.6.9.tar.gz", hash = "sha256:d40dfe3142a1421d8518e3d3985ef5ac42890683e32306ad614a29490abeb6bf", size = 99249 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/07/ce/0845144ed1f0e25db5e7a79c2354c1da4b5ce392b8966449d5db8dca18f1/identify-2.6.9-py2.py3-none-any.whl", hash = "sha256:c98b4322da415a8e5a70ff6e51fbc2d2932c015532d77e9f8537b4ba7813b150", size = 99101 }, +] + +[[package]] +name = "idna" +version = "3.10" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/f1/70/7703c29685631f5a7590aa73f1f1d3fa9a380e654b86af429e0934a32f7d/idna-3.10.tar.gz", hash = "sha256:12f65c9b470abda6dc35cf8e63cc574b1c52b11df2c86030af0ac09b01b13ea9", size = 190490 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/76/c6/c88e154df9c4e1a2a66ccf0005a88dfb2650c1dffb6f5ce603dfbd452ce3/idna-3.10-py3-none-any.whl", hash = "sha256:946d195a0d259cbba61165e88e65941f16e9b36ea6ddb97f00452bae8b1287d3", size = 70442 }, +] + +[[package]] +name = "ipykernel" +version = "6.29.5" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "appnope", marker = "sys_platform == 'darwin'" }, + { name = "comm" }, + { name = "debugpy" }, + { name = "ipython" }, + { name = "jupyter-client" }, + { name = "jupyter-core" }, + { name = "matplotlib-inline" }, + { name = "nest-asyncio" }, + { name = "packaging" }, + { name = "psutil" }, + { name = "pyzmq" }, + { name = "tornado" }, + { name = "traitlets" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/e9/5c/67594cb0c7055dc50814b21731c22a601101ea3b1b50a9a1b090e11f5d0f/ipykernel-6.29.5.tar.gz", hash = "sha256:f093a22c4a40f8828f8e330a9c297cb93dcab13bd9678ded6de8e5cf81c56215", size = 163367 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/94/5c/368ae6c01c7628438358e6d337c19b05425727fbb221d2a3c4303c372f42/ipykernel-6.29.5-py3-none-any.whl", hash = "sha256:afdb66ba5aa354b09b91379bac28ae4afebbb30e8b39510c9690afb7a10421b5", size = 117173 }, +] + +[[package]] +name = "ipython" +version = "9.0.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "colorama", marker = "sys_platform == 'win32'" }, + { name = "decorator" }, + { name = "ipython-pygments-lexers" }, + { name = "jedi" }, + { name = "matplotlib-inline" }, + { name = "pexpect", marker = "sys_platform != 'emscripten' and sys_platform != 'win32'" }, + { name = "prompt-toolkit" }, + { name = "pygments" }, + { name = "stack-data" }, + { name = "traitlets" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/7d/ce/012a0f40ca58a966f87a6e894d6828e2817657cbdf522b02a5d3a87d92ce/ipython-9.0.2.tar.gz", hash = "sha256:ec7b479e3e5656bf4f58c652c120494df1820f4f28f522fb7ca09e213c2aab52", size = 4366102 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/20/3a/917cb9e72f4e1a4ea13c862533205ae1319bd664119189ee5cc9e4e95ebf/ipython-9.0.2-py3-none-any.whl", hash = "sha256:143ef3ea6fb1e1bffb4c74b114051de653ffb7737a3f7ab1670e657ca6ae8c44", size = 600524 }, +] + +[[package]] +name = "ipython-pygments-lexers" +version = "1.1.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pygments" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/ef/4c/5dd1d8af08107f88c7f741ead7a40854b8ac24ddf9ae850afbcf698aa552/ipython_pygments_lexers-1.1.1.tar.gz", hash = "sha256:09c0138009e56b6854f9535736f4171d855c8c08a563a0dcd8022f78355c7e81", size = 8393 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d9/33/1f075bf72b0b747cb3288d011319aaf64083cf2efef8354174e3ed4540e2/ipython_pygments_lexers-1.1.1-py3-none-any.whl", hash = "sha256:a9462224a505ade19a605f71f8fa63c2048833ce50abc86768a0d81d876dc81c", size = 8074 }, +] + +[[package]] +name = "ipywidgets" +version = "8.1.5" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "comm" }, + { name = "ipython" }, + { name = "jupyterlab-widgets" }, + { name = "traitlets" }, + { name = "widgetsnbextension" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/c7/4c/dab2a281b07596a5fc220d49827fe6c794c66f1493d7a74f1df0640f2cc5/ipywidgets-8.1.5.tar.gz", hash = "sha256:870e43b1a35656a80c18c9503bbf2d16802db1cb487eec6fab27d683381dde17", size = 116723 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/22/2d/9c0b76f2f9cc0ebede1b9371b6f317243028ed60b90705863d493bae622e/ipywidgets-8.1.5-py3-none-any.whl", hash = "sha256:3290f526f87ae6e77655555baba4f36681c555b8bdbbff430b70e52c34c86245", size = 139767 }, +] + +[[package]] +name = "isoduration" +version = "20.11.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "arrow" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/7c/1a/3c8edc664e06e6bd06cce40c6b22da5f1429aa4224d0c590f3be21c91ead/isoduration-20.11.0.tar.gz", hash = "sha256:ac2f9015137935279eac671f94f89eb00584f940f5dc49462a0c4ee692ba1bd9", size = 11649 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/7b/55/e5326141505c5d5e34c5e0935d2908a74e4561eca44108fbfb9c13d2911a/isoduration-20.11.0-py3-none-any.whl", hash = "sha256:b2904c2a4228c3d44f409c8ae8e2370eb21a26f7ac2ec5446df141dde3452042", size = 11321 }, +] + +[[package]] +name = "jedi" +version = "0.19.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "parso" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/72/3a/79a912fbd4d8dd6fbb02bf69afd3bb72cf0c729bb3063c6f4498603db17a/jedi-0.19.2.tar.gz", hash = "sha256:4770dc3de41bde3966b02eb84fbcf557fb33cce26ad23da12c742fb50ecb11f0", size = 1231287 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c0/5a/9cac0c82afec3d09ccd97c8b6502d48f165f9124db81b4bcb90b4af974ee/jedi-0.19.2-py2.py3-none-any.whl", hash = "sha256:a8ef22bde8490f57fe5c7681a3c83cb58874daf72b4784de3cce5b6ef6edb5b9", size = 1572278 }, +] + +[[package]] +name = "jinja2" +version = "3.1.6" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "markupsafe" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/df/bf/f7da0350254c0ed7c72f3e33cef02e048281fec7ecec5f032d4aac52226b/jinja2-3.1.6.tar.gz", hash = "sha256:0137fb05990d35f1275a587e9aee6d56da821fc83491a0fb838183be43f66d6d", size = 245115 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/62/a1/3d680cbfd5f4b8f15abc1d571870c5fc3e594bb582bc3b64ea099db13e56/jinja2-3.1.6-py3-none-any.whl", hash = "sha256:85ece4451f492d0c13c5dd7c13a64681a86afae63a5f347908daf103ce6d2f67", size = 134899 }, +] + +[[package]] +name = "joblib" +version = "1.4.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/64/33/60135848598c076ce4b231e1b1895170f45fbcaeaa2c9d5e38b04db70c35/joblib-1.4.2.tar.gz", hash = "sha256:2382c5816b2636fbd20a09e0f4e9dad4736765fdfb7dca582943b9c1366b3f0e", size = 2116621 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/91/29/df4b9b42f2be0b623cbd5e2140cafcaa2bef0759a00b7b70104dcfe2fb51/joblib-1.4.2-py3-none-any.whl", hash = "sha256:06d478d5674cbc267e7496a410ee875abd68e4340feff4490bcb7afb88060ae6", size = 301817 }, +] + +[[package]] +name = "json5" +version = "0.10.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/85/3d/bbe62f3d0c05a689c711cff57b2e3ac3d3e526380adb7c781989f075115c/json5-0.10.0.tar.gz", hash = "sha256:e66941c8f0a02026943c52c2eb34ebeb2a6f819a0be05920a6f5243cd30fd559", size = 48202 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/aa/42/797895b952b682c3dafe23b1834507ee7f02f4d6299b65aaa61425763278/json5-0.10.0-py3-none-any.whl", hash = "sha256:19b23410220a7271e8377f81ba8aacba2fdd56947fbb137ee5977cbe1f5e8dfa", size = 34049 }, +] + +[[package]] +name = "jsonpointer" +version = "3.0.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/6a/0a/eebeb1fa92507ea94016a2a790b93c2ae41a7e18778f85471dc54475ed25/jsonpointer-3.0.0.tar.gz", hash = "sha256:2b2d729f2091522d61c3b31f82e11870f60b68f43fbc705cb76bf4b832af59ef", size = 9114 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/71/92/5e77f98553e9e75130c78900d000368476aed74276eb8ae8796f65f00918/jsonpointer-3.0.0-py2.py3-none-any.whl", hash = "sha256:13e088adc14fca8b6aa8177c044e12701e6ad4b28ff10e65f2267a90109c9942", size = 7595 }, +] + +[[package]] +name = "jsonschema" +version = "4.23.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "attrs" }, + { name = "jsonschema-specifications" }, + { name = "referencing" }, + { name = "rpds-py" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/38/2e/03362ee4034a4c917f697890ccd4aec0800ccf9ded7f511971c75451deec/jsonschema-4.23.0.tar.gz", hash = "sha256:d71497fef26351a33265337fa77ffeb82423f3ea21283cd9467bb03999266bc4", size = 325778 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/69/4a/4f9dbeb84e8850557c02365a0eee0649abe5eb1d84af92a25731c6c0f922/jsonschema-4.23.0-py3-none-any.whl", hash = "sha256:fbadb6f8b144a8f8cf9f0b89ba94501d143e50411a1278633f56a7acf7fd5566", size = 88462 }, +] + +[package.optional-dependencies] +format-nongpl = [ + { name = "fqdn" }, + { name = "idna" }, + { name = "isoduration" }, + { name = "jsonpointer" }, + { name = "rfc3339-validator" }, + { name = "rfc3986-validator" }, + { name = "uri-template" }, + { name = "webcolors" }, +] + +[[package]] +name = "jsonschema-specifications" +version = "2024.10.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "referencing" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/10/db/58f950c996c793472e336ff3655b13fbcf1e3b359dcf52dcf3ed3b52c352/jsonschema_specifications-2024.10.1.tar.gz", hash = "sha256:0f38b83639958ce1152d02a7f062902c41c8fd20d558b0c34344292d417ae272", size = 15561 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d1/0f/8910b19ac0670a0f80ce1008e5e751c4a57e14d2c4c13a482aa6079fa9d6/jsonschema_specifications-2024.10.1-py3-none-any.whl", hash = "sha256:a09a0680616357d9a0ecf05c12ad234479f549239d0f5b55f3deea67475da9bf", size = 18459 }, +] + +[[package]] +name = "jupyter" +version = "1.1.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "ipykernel" }, + { name = "ipywidgets" }, + { name = "jupyter-console" }, + { name = "jupyterlab" }, + { name = "nbconvert" }, + { name = "notebook" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/58/f3/af28ea964ab8bc1e472dba2e82627d36d470c51f5cd38c37502eeffaa25e/jupyter-1.1.1.tar.gz", hash = "sha256:d55467bceabdea49d7e3624af7e33d59c37fff53ed3a350e1ac957bed731de7a", size = 5714959 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/38/64/285f20a31679bf547b75602702f7800e74dbabae36ef324f716c02804753/jupyter-1.1.1-py2.py3-none-any.whl", hash = "sha256:7a59533c22af65439b24bbe60373a4e95af8f16ac65a6c00820ad378e3f7cc83", size = 2657 }, +] + +[[package]] +name = "jupyter-client" +version = "8.6.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "jupyter-core" }, + { name = "python-dateutil" }, + { name = "pyzmq" }, + { name = "tornado" }, + { name = "traitlets" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/71/22/bf9f12fdaeae18019a468b68952a60fe6dbab5d67cd2a103cac7659b41ca/jupyter_client-8.6.3.tar.gz", hash = "sha256:35b3a0947c4a6e9d589eb97d7d4cd5e90f910ee73101611f01283732bd6d9419", size = 342019 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/11/85/b0394e0b6fcccd2c1eeefc230978a6f8cb0c5df1e4cd3e7625735a0d7d1e/jupyter_client-8.6.3-py3-none-any.whl", hash = "sha256:e8a19cc986cc45905ac3362915f410f3af85424b4c0905e94fa5f2cb08e8f23f", size = 106105 }, +] + +[[package]] +name = "jupyter-console" +version = "6.6.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "ipykernel" }, + { name = "ipython" }, + { name = "jupyter-client" }, + { name = "jupyter-core" }, + { name = "prompt-toolkit" }, + { name = "pygments" }, + { name = "pyzmq" }, + { name = "traitlets" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/bd/2d/e2fd31e2fc41c14e2bcb6c976ab732597e907523f6b2420305f9fc7fdbdb/jupyter_console-6.6.3.tar.gz", hash = "sha256:566a4bf31c87adbfadf22cdf846e3069b59a71ed5da71d6ba4d8aaad14a53539", size = 34363 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ca/77/71d78d58f15c22db16328a476426f7ac4a60d3a5a7ba3b9627ee2f7903d4/jupyter_console-6.6.3-py3-none-any.whl", hash = "sha256:309d33409fcc92ffdad25f0bcdf9a4a9daa61b6f341177570fdac03de5352485", size = 24510 }, +] + +[[package]] +name = "jupyter-core" +version = "5.7.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "platformdirs" }, + { name = "pywin32", marker = "platform_python_implementation != 'PyPy' and sys_platform == 'win32'" }, + { name = "traitlets" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/00/11/b56381fa6c3f4cc5d2cf54a7dbf98ad9aa0b339ef7a601d6053538b079a7/jupyter_core-5.7.2.tar.gz", hash = "sha256:aa5f8d32bbf6b431ac830496da7392035d6f61b4f54872f15c4bd2a9c3f536d9", size = 87629 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c9/fb/108ecd1fe961941959ad0ee4e12ee7b8b1477247f30b1fdfd83ceaf017f0/jupyter_core-5.7.2-py3-none-any.whl", hash = "sha256:4f7315d2f6b4bcf2e3e7cb6e46772eba760ae459cd1f59d29eb57b0a01bd7409", size = 28965 }, +] + +[[package]] +name = "jupyter-events" +version = "0.12.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "jsonschema", extra = ["format-nongpl"] }, + { name = "packaging" }, + { name = "python-json-logger" }, + { name = "pyyaml" }, + { name = "referencing" }, + { name = "rfc3339-validator" }, + { name = "rfc3986-validator" }, + { name = "traitlets" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/9d/c3/306d090461e4cf3cd91eceaff84bede12a8e52cd821c2d20c9a4fd728385/jupyter_events-0.12.0.tar.gz", hash = "sha256:fc3fce98865f6784c9cd0a56a20644fc6098f21c8c33834a8d9fe383c17e554b", size = 62196 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e2/48/577993f1f99c552f18a0428731a755e06171f9902fa118c379eb7c04ea22/jupyter_events-0.12.0-py3-none-any.whl", hash = "sha256:6464b2fa5ad10451c3d35fabc75eab39556ae1e2853ad0c0cc31b656731a97fb", size = 19430 }, +] + +[[package]] +name = "jupyter-lsp" +version = "2.2.5" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "jupyter-server" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/85/b4/3200b0b09c12bc3b72d943d923323c398eff382d1dcc7c0dbc8b74630e40/jupyter-lsp-2.2.5.tar.gz", hash = "sha256:793147a05ad446f809fd53ef1cd19a9f5256fd0a2d6b7ce943a982cb4f545001", size = 48741 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/07/e0/7bd7cff65594fd9936e2f9385701e44574fc7d721331ff676ce440b14100/jupyter_lsp-2.2.5-py3-none-any.whl", hash = "sha256:45fbddbd505f3fbfb0b6cb2f1bc5e15e83ab7c79cd6e89416b248cb3c00c11da", size = 69146 }, +] + +[[package]] +name = "jupyter-server" +version = "2.15.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "anyio" }, + { name = "argon2-cffi" }, + { name = "jinja2" }, + { name = "jupyter-client" }, + { name = "jupyter-core" }, + { name = "jupyter-events" }, + { name = "jupyter-server-terminals" }, + { name = "nbconvert" }, + { name = "nbformat" }, + { name = "overrides" }, + { name = "packaging" }, + { name = "prometheus-client" }, + { name = "pywinpty", marker = "os_name == 'nt'" }, + { name = "pyzmq" }, + { name = "send2trash" }, + { name = "terminado" }, + { name = "tornado" }, + { name = "traitlets" }, + { name = "websocket-client" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/61/8c/df09d4ab646141f130f9977b32b206ba8615d1969b2eba6a2e84b7f89137/jupyter_server-2.15.0.tar.gz", hash = "sha256:9d446b8697b4f7337a1b7cdcac40778babdd93ba614b6d68ab1c0c918f1c4084", size = 725227 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e2/a2/89eeaf0bb954a123a909859fa507fa86f96eb61b62dc30667b60dbd5fdaf/jupyter_server-2.15.0-py3-none-any.whl", hash = "sha256:872d989becf83517012ee669f09604aa4a28097c0bd90b2f424310156c2cdae3", size = 385826 }, +] + +[[package]] +name = "jupyter-server-terminals" +version = "0.5.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pywinpty", marker = "os_name == 'nt'" }, + { name = "terminado" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/fc/d5/562469734f476159e99a55426d697cbf8e7eb5efe89fb0e0b4f83a3d3459/jupyter_server_terminals-0.5.3.tar.gz", hash = "sha256:5ae0295167220e9ace0edcfdb212afd2b01ee8d179fe6f23c899590e9b8a5269", size = 31430 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/07/2d/2b32cdbe8d2a602f697a649798554e4f072115438e92249624e532e8aca6/jupyter_server_terminals-0.5.3-py3-none-any.whl", hash = "sha256:41ee0d7dc0ebf2809c668e0fc726dfaf258fcd3e769568996ca731b6194ae9aa", size = 13656 }, +] + +[[package]] +name = "jupyterlab" +version = "4.3.6" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "async-lru" }, + { name = "httpx" }, + { name = "ipykernel" }, + { name = "jinja2" }, + { name = "jupyter-core" }, + { name = "jupyter-lsp" }, + { name = "jupyter-server" }, + { name = "jupyterlab-server" }, + { name = "notebook-shim" }, + { name = "packaging" }, + { name = "setuptools" }, + { name = "tornado" }, + { name = "traitlets" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/2f/a5/f0cfd8d8fd521eba1a0beddc201bd0131df8d1355eb4917e92a0ffbac5d6/jupyterlab-4.3.6.tar.gz", hash = "sha256:2900ffdbfca9ed37c4ad7fdda3eb76582fd945d46962af3ac64741ae2d6b2ff4", size = 21827019 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d7/be/422f69447dbd77ddd58251b0945382099fd740e99918a147142f1e852a9d/jupyterlab-4.3.6-py3-none-any.whl", hash = "sha256:fc9eb0455562a56a9bd6d2977cf090842f321fa1a298fcee9bf8c19de353d5fd", size = 11681705 }, +] + +[[package]] +name = "jupyterlab-pygments" +version = "0.3.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/90/51/9187be60d989df97f5f0aba133fa54e7300f17616e065d1ada7d7646b6d6/jupyterlab_pygments-0.3.0.tar.gz", hash = "sha256:721aca4d9029252b11cfa9d185e5b5af4d54772bb8072f9b7036f4170054d35d", size = 512900 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b1/dd/ead9d8ea85bf202d90cc513b533f9c363121c7792674f78e0d8a854b63b4/jupyterlab_pygments-0.3.0-py3-none-any.whl", hash = "sha256:841a89020971da1d8693f1a99997aefc5dc424bb1b251fd6322462a1b8842780", size = 15884 }, +] + +[[package]] +name = "jupyterlab-server" +version = "2.27.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "babel" }, + { name = "jinja2" }, + { name = "json5" }, + { name = "jsonschema" }, + { name = "jupyter-server" }, + { name = "packaging" }, + { name = "requests" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/0a/c9/a883ce65eb27905ce77ace410d83587c82ea64dc85a48d1f7ed52bcfa68d/jupyterlab_server-2.27.3.tar.gz", hash = "sha256:eb36caca59e74471988f0ae25c77945610b887f777255aa21f8065def9e51ed4", size = 76173 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/54/09/2032e7d15c544a0e3cd831c51d77a8ca57f7555b2e1b2922142eddb02a84/jupyterlab_server-2.27.3-py3-none-any.whl", hash = "sha256:e697488f66c3db49df675158a77b3b017520d772c6e1548c7d9bcc5df7944ee4", size = 59700 }, +] + +[[package]] +name = "jupyterlab-widgets" +version = "3.0.13" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/59/73/fa26bbb747a9ea4fca6b01453aa22990d52ab62dd61384f1ac0dc9d4e7ba/jupyterlab_widgets-3.0.13.tar.gz", hash = "sha256:a2966d385328c1942b683a8cd96b89b8dd82c8b8f81dda902bb2bc06d46f5bed", size = 203556 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a9/93/858e87edc634d628e5d752ba944c2833133a28fa87bb093e6832ced36a3e/jupyterlab_widgets-3.0.13-py3-none-any.whl", hash = "sha256:e3cda2c233ce144192f1e29914ad522b2f4c40e77214b0cc97377ca3d323db54", size = 214392 }, +] + +[[package]] +name = "kiwisolver" +version = "1.4.8" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/82/59/7c91426a8ac292e1cdd53a63b6d9439abd573c875c3f92c146767dd33faf/kiwisolver-1.4.8.tar.gz", hash = "sha256:23d5f023bdc8c7e54eb65f03ca5d5bb25b601eac4d7f1a042888a1f45237987e", size = 97538 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/fc/aa/cea685c4ab647f349c3bc92d2daf7ae34c8e8cf405a6dcd3a497f58a2ac3/kiwisolver-1.4.8-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:d6af5e8815fd02997cb6ad9bbed0ee1e60014438ee1a5c2444c96f87b8843502", size = 124152 }, + { url = "https://files.pythonhosted.org/packages/c5/0b/8db6d2e2452d60d5ebc4ce4b204feeb16176a851fd42462f66ade6808084/kiwisolver-1.4.8-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:bade438f86e21d91e0cf5dd7c0ed00cda0f77c8c1616bd83f9fc157fa6760d31", size = 66555 }, + { url = "https://files.pythonhosted.org/packages/60/26/d6a0db6785dd35d3ba5bf2b2df0aedc5af089962c6eb2cbf67a15b81369e/kiwisolver-1.4.8-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:b83dc6769ddbc57613280118fb4ce3cd08899cc3369f7d0e0fab518a7cf37fdb", size = 65067 }, + { url = "https://files.pythonhosted.org/packages/c9/ed/1d97f7e3561e09757a196231edccc1bcf59d55ddccefa2afc9c615abd8e0/kiwisolver-1.4.8-cp312-cp312-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:111793b232842991be367ed828076b03d96202c19221b5ebab421ce8bcad016f", size = 1378443 }, + { url = "https://files.pythonhosted.org/packages/29/61/39d30b99954e6b46f760e6289c12fede2ab96a254c443639052d1b573fbc/kiwisolver-1.4.8-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:257af1622860e51b1a9d0ce387bf5c2c4f36a90594cb9514f55b074bcc787cfc", size = 1472728 }, + { url = "https://files.pythonhosted.org/packages/0c/3e/804163b932f7603ef256e4a715e5843a9600802bb23a68b4e08c8c0ff61d/kiwisolver-1.4.8-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:69b5637c3f316cab1ec1c9a12b8c5f4750a4c4b71af9157645bf32830e39c03a", size = 1478388 }, + { url = "https://files.pythonhosted.org/packages/8a/9e/60eaa75169a154700be74f875a4d9961b11ba048bef315fbe89cb6999056/kiwisolver-1.4.8-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:782bb86f245ec18009890e7cb8d13a5ef54dcf2ebe18ed65f795e635a96a1c6a", size = 1413849 }, + { url = "https://files.pythonhosted.org/packages/bc/b3/9458adb9472e61a998c8c4d95cfdfec91c73c53a375b30b1428310f923e4/kiwisolver-1.4.8-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cc978a80a0db3a66d25767b03688f1147a69e6237175c0f4ffffaaedf744055a", size = 1475533 }, + { url = "https://files.pythonhosted.org/packages/e4/7a/0a42d9571e35798de80aef4bb43a9b672aa7f8e58643d7bd1950398ffb0a/kiwisolver-1.4.8-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:36dbbfd34838500a31f52c9786990d00150860e46cd5041386f217101350f0d3", size = 2268898 }, + { url = "https://files.pythonhosted.org/packages/d9/07/1255dc8d80271400126ed8db35a1795b1a2c098ac3a72645075d06fe5c5d/kiwisolver-1.4.8-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:eaa973f1e05131de5ff3569bbba7f5fd07ea0595d3870ed4a526d486fe57fa1b", size = 2425605 }, + { url = "https://files.pythonhosted.org/packages/84/df/5a3b4cf13780ef6f6942df67b138b03b7e79e9f1f08f57c49957d5867f6e/kiwisolver-1.4.8-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:a66f60f8d0c87ab7f59b6fb80e642ebb29fec354a4dfad687ca4092ae69d04f4", size = 2375801 }, + { url = "https://files.pythonhosted.org/packages/8f/10/2348d068e8b0f635c8c86892788dac7a6b5c0cb12356620ab575775aad89/kiwisolver-1.4.8-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:858416b7fb777a53f0c59ca08190ce24e9abbd3cffa18886a5781b8e3e26f65d", size = 2520077 }, + { url = "https://files.pythonhosted.org/packages/32/d8/014b89fee5d4dce157d814303b0fce4d31385a2af4c41fed194b173b81ac/kiwisolver-1.4.8-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:085940635c62697391baafaaeabdf3dd7a6c3643577dde337f4d66eba021b2b8", size = 2338410 }, + { url = "https://files.pythonhosted.org/packages/bd/72/dfff0cc97f2a0776e1c9eb5bef1ddfd45f46246c6533b0191887a427bca5/kiwisolver-1.4.8-cp312-cp312-win_amd64.whl", hash = "sha256:01c3d31902c7db5fb6182832713d3b4122ad9317c2c5877d0539227d96bb2e50", size = 71853 }, + { url = "https://files.pythonhosted.org/packages/dc/85/220d13d914485c0948a00f0b9eb419efaf6da81b7d72e88ce2391f7aed8d/kiwisolver-1.4.8-cp312-cp312-win_arm64.whl", hash = "sha256:a3c44cb68861de93f0c4a8175fbaa691f0aa22550c331fefef02b618a9dcb476", size = 65424 }, + { url = "https://files.pythonhosted.org/packages/79/b3/e62464a652f4f8cd9006e13d07abad844a47df1e6537f73ddfbf1bc997ec/kiwisolver-1.4.8-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:1c8ceb754339793c24aee1c9fb2485b5b1f5bb1c2c214ff13368431e51fc9a09", size = 124156 }, + { url = "https://files.pythonhosted.org/packages/8d/2d/f13d06998b546a2ad4f48607a146e045bbe48030774de29f90bdc573df15/kiwisolver-1.4.8-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:54a62808ac74b5e55a04a408cda6156f986cefbcf0ada13572696b507cc92fa1", size = 66555 }, + { url = "https://files.pythonhosted.org/packages/59/e3/b8bd14b0a54998a9fd1e8da591c60998dc003618cb19a3f94cb233ec1511/kiwisolver-1.4.8-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:68269e60ee4929893aad82666821aaacbd455284124817af45c11e50a4b42e3c", size = 65071 }, + { url = "https://files.pythonhosted.org/packages/f0/1c/6c86f6d85ffe4d0ce04228d976f00674f1df5dc893bf2dd4f1928748f187/kiwisolver-1.4.8-cp313-cp313-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:34d142fba9c464bc3bbfeff15c96eab0e7310343d6aefb62a79d51421fcc5f1b", size = 1378053 }, + { url = "https://files.pythonhosted.org/packages/4e/b9/1c6e9f6dcb103ac5cf87cb695845f5fa71379021500153566d8a8a9fc291/kiwisolver-1.4.8-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3ddc373e0eef45b59197de815b1b28ef89ae3955e7722cc9710fb91cd77b7f47", size = 1472278 }, + { url = "https://files.pythonhosted.org/packages/ee/81/aca1eb176de671f8bda479b11acdc42c132b61a2ac861c883907dde6debb/kiwisolver-1.4.8-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:77e6f57a20b9bd4e1e2cedda4d0b986ebd0216236f0106e55c28aea3d3d69b16", size = 1478139 }, + { url = "https://files.pythonhosted.org/packages/49/f4/e081522473671c97b2687d380e9e4c26f748a86363ce5af48b4a28e48d06/kiwisolver-1.4.8-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:08e77738ed7538f036cd1170cbed942ef749137b1311fa2bbe2a7fda2f6bf3cc", size = 1413517 }, + { url = "https://files.pythonhosted.org/packages/8f/e9/6a7d025d8da8c4931522922cd706105aa32b3291d1add8c5427cdcd66e63/kiwisolver-1.4.8-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a5ce1e481a74b44dd5e92ff03ea0cb371ae7a0268318e202be06c8f04f4f1246", size = 1474952 }, + { url = "https://files.pythonhosted.org/packages/82/13/13fa685ae167bee5d94b415991c4fc7bb0a1b6ebea6e753a87044b209678/kiwisolver-1.4.8-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:fc2ace710ba7c1dfd1a3b42530b62b9ceed115f19a1656adefce7b1782a37794", size = 2269132 }, + { url = "https://files.pythonhosted.org/packages/ef/92/bb7c9395489b99a6cb41d502d3686bac692586db2045adc19e45ee64ed23/kiwisolver-1.4.8-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:3452046c37c7692bd52b0e752b87954ef86ee2224e624ef7ce6cb21e8c41cc1b", size = 2425997 }, + { url = "https://files.pythonhosted.org/packages/ed/12/87f0e9271e2b63d35d0d8524954145837dd1a6c15b62a2d8c1ebe0f182b4/kiwisolver-1.4.8-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:7e9a60b50fe8b2ec6f448fe8d81b07e40141bfced7f896309df271a0b92f80f3", size = 2376060 }, + { url = "https://files.pythonhosted.org/packages/02/6e/c8af39288edbce8bf0fa35dee427b082758a4b71e9c91ef18fa667782138/kiwisolver-1.4.8-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:918139571133f366e8362fa4a297aeba86c7816b7ecf0bc79168080e2bd79957", size = 2520471 }, + { url = "https://files.pythonhosted.org/packages/13/78/df381bc7b26e535c91469f77f16adcd073beb3e2dd25042efd064af82323/kiwisolver-1.4.8-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:e063ef9f89885a1d68dd8b2e18f5ead48653176d10a0e324e3b0030e3a69adeb", size = 2338793 }, + { url = "https://files.pythonhosted.org/packages/d0/dc/c1abe38c37c071d0fc71c9a474fd0b9ede05d42f5a458d584619cfd2371a/kiwisolver-1.4.8-cp313-cp313-win_amd64.whl", hash = "sha256:a17b7c4f5b2c51bb68ed379defd608a03954a1845dfed7cc0117f1cc8a9b7fd2", size = 71855 }, + { url = "https://files.pythonhosted.org/packages/a0/b6/21529d595b126ac298fdd90b705d87d4c5693de60023e0efcb4f387ed99e/kiwisolver-1.4.8-cp313-cp313-win_arm64.whl", hash = "sha256:3cd3bc628b25f74aedc6d374d5babf0166a92ff1317f46267f12d2ed54bc1d30", size = 65430 }, + { url = "https://files.pythonhosted.org/packages/34/bd/b89380b7298e3af9b39f49334e3e2a4af0e04819789f04b43d560516c0c8/kiwisolver-1.4.8-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:370fd2df41660ed4e26b8c9d6bbcad668fbe2560462cba151a721d49e5b6628c", size = 126294 }, + { url = "https://files.pythonhosted.org/packages/83/41/5857dc72e5e4148eaac5aa76e0703e594e4465f8ab7ec0fc60e3a9bb8fea/kiwisolver-1.4.8-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:84a2f830d42707de1d191b9490ac186bf7997a9495d4e9072210a1296345f7dc", size = 67736 }, + { url = "https://files.pythonhosted.org/packages/e1/d1/be059b8db56ac270489fb0b3297fd1e53d195ba76e9bbb30e5401fa6b759/kiwisolver-1.4.8-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:7a3ad337add5148cf51ce0b55642dc551c0b9d6248458a757f98796ca7348712", size = 66194 }, + { url = "https://files.pythonhosted.org/packages/e1/83/4b73975f149819eb7dcf9299ed467eba068ecb16439a98990dcb12e63fdd/kiwisolver-1.4.8-cp313-cp313t-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7506488470f41169b86d8c9aeff587293f530a23a23a49d6bc64dab66bedc71e", size = 1465942 }, + { url = "https://files.pythonhosted.org/packages/c7/2c/30a5cdde5102958e602c07466bce058b9d7cb48734aa7a4327261ac8e002/kiwisolver-1.4.8-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2f0121b07b356a22fb0414cec4666bbe36fd6d0d759db3d37228f496ed67c880", size = 1595341 }, + { url = "https://files.pythonhosted.org/packages/ff/9b/1e71db1c000385aa069704f5990574b8244cce854ecd83119c19e83c9586/kiwisolver-1.4.8-cp313-cp313t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d6d6bd87df62c27d4185de7c511c6248040afae67028a8a22012b010bc7ad062", size = 1598455 }, + { url = "https://files.pythonhosted.org/packages/85/92/c8fec52ddf06231b31cbb779af77e99b8253cd96bd135250b9498144c78b/kiwisolver-1.4.8-cp313-cp313t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:291331973c64bb9cce50bbe871fb2e675c4331dab4f31abe89f175ad7679a4d7", size = 1522138 }, + { url = "https://files.pythonhosted.org/packages/0b/51/9eb7e2cd07a15d8bdd976f6190c0164f92ce1904e5c0c79198c4972926b7/kiwisolver-1.4.8-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:893f5525bb92d3d735878ec00f781b2de998333659507d29ea4466208df37bed", size = 1582857 }, + { url = "https://files.pythonhosted.org/packages/0f/95/c5a00387a5405e68ba32cc64af65ce881a39b98d73cc394b24143bebc5b8/kiwisolver-1.4.8-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:b47a465040146981dc9db8647981b8cb96366fbc8d452b031e4f8fdffec3f26d", size = 2293129 }, + { url = "https://files.pythonhosted.org/packages/44/83/eeb7af7d706b8347548313fa3a3a15931f404533cc54fe01f39e830dd231/kiwisolver-1.4.8-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:99cea8b9dd34ff80c521aef46a1dddb0dcc0283cf18bde6d756f1e6f31772165", size = 2421538 }, + { url = "https://files.pythonhosted.org/packages/05/f9/27e94c1b3eb29e6933b6986ffc5fa1177d2cd1f0c8efc5f02c91c9ac61de/kiwisolver-1.4.8-cp313-cp313t-musllinux_1_2_ppc64le.whl", hash = "sha256:151dffc4865e5fe6dafce5480fab84f950d14566c480c08a53c663a0020504b6", size = 2390661 }, + { url = "https://files.pythonhosted.org/packages/d9/d4/3c9735faa36ac591a4afcc2980d2691000506050b7a7e80bcfe44048daa7/kiwisolver-1.4.8-cp313-cp313t-musllinux_1_2_s390x.whl", hash = "sha256:577facaa411c10421314598b50413aa1ebcf5126f704f1e5d72d7e4e9f020d90", size = 2546710 }, + { url = "https://files.pythonhosted.org/packages/4c/fa/be89a49c640930180657482a74970cdcf6f7072c8d2471e1babe17a222dc/kiwisolver-1.4.8-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:be4816dc51c8a471749d664161b434912eee82f2ea66bd7628bd14583a833e85", size = 2349213 }, +] + +[[package]] +name = "kneed" +version = "0.8.5" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "numpy" }, + { name = "scipy" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/ba/0f/958e27a378042e0366dfea8baab4a53121cb37c114117666051390cd7bb8/kneed-0.8.5.tar.gz", hash = "sha256:a4847ac4f1d04852fea278d5de7aa8bfdc3beb7fbca4a182fec0f0efee43f4b1", size = 12783 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/9a/1b/7e726d8616e813007874468c61790099ba21493e0ea07561b7d9fc53151c/kneed-0.8.5-py3-none-any.whl", hash = "sha256:2f3fbd4e9bd808e65052841448702c41ea64d5fc78735cbfc97ab25f08bd9815", size = 10290 }, +] + +[[package]] +name = "llvmlite" +version = "0.44.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/89/6a/95a3d3610d5c75293d5dbbb2a76480d5d4eeba641557b69fe90af6c5b84e/llvmlite-0.44.0.tar.gz", hash = "sha256:07667d66a5d150abed9157ab6c0b9393c9356f229784a4385c02f99e94fc94d4", size = 171880 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/15/86/e3c3195b92e6e492458f16d233e58a1a812aa2bfbef9bdd0fbafcec85c60/llvmlite-0.44.0-cp312-cp312-macosx_10_14_x86_64.whl", hash = "sha256:1d671a56acf725bf1b531d5ef76b86660a5ab8ef19bb6a46064a705c6ca80aad", size = 28132297 }, + { url = "https://files.pythonhosted.org/packages/d6/53/373b6b8be67b9221d12b24125fd0ec56b1078b660eeae266ec388a6ac9a0/llvmlite-0.44.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:5f79a728e0435493611c9f405168682bb75ffd1fbe6fc360733b850c80a026db", size = 26201105 }, + { url = "https://files.pythonhosted.org/packages/cb/da/8341fd3056419441286c8e26bf436923021005ece0bff5f41906476ae514/llvmlite-0.44.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c0143a5ef336da14deaa8ec26c5449ad5b6a2b564df82fcef4be040b9cacfea9", size = 42361901 }, + { url = "https://files.pythonhosted.org/packages/53/ad/d79349dc07b8a395a99153d7ce8b01d6fcdc9f8231355a5df55ded649b61/llvmlite-0.44.0-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d752f89e31b66db6f8da06df8b39f9b91e78c5feea1bf9e8c1fba1d1c24c065d", size = 41184247 }, + { url = "https://files.pythonhosted.org/packages/e2/3b/a9a17366af80127bd09decbe2a54d8974b6d8b274b39bf47fbaedeec6307/llvmlite-0.44.0-cp312-cp312-win_amd64.whl", hash = "sha256:eae7e2d4ca8f88f89d315b48c6b741dcb925d6a1042da694aa16ab3dd4cbd3a1", size = 30332380 }, + { url = "https://files.pythonhosted.org/packages/89/24/4c0ca705a717514c2092b18476e7a12c74d34d875e05e4d742618ebbf449/llvmlite-0.44.0-cp313-cp313-macosx_10_14_x86_64.whl", hash = "sha256:319bddd44e5f71ae2689859b7203080716448a3cd1128fb144fe5c055219d516", size = 28132306 }, + { url = "https://files.pythonhosted.org/packages/01/cf/1dd5a60ba6aee7122ab9243fd614abcf22f36b0437cbbe1ccf1e3391461c/llvmlite-0.44.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:9c58867118bad04a0bb22a2e0068c693719658105e40009ffe95c7000fcde88e", size = 26201090 }, + { url = "https://files.pythonhosted.org/packages/d2/1b/656f5a357de7135a3777bd735cc7c9b8f23b4d37465505bd0eaf4be9befe/llvmlite-0.44.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:46224058b13c96af1365290bdfebe9a6264ae62fb79b2b55693deed11657a8bf", size = 42361904 }, + { url = "https://files.pythonhosted.org/packages/d8/e1/12c5f20cb9168fb3464a34310411d5ad86e4163c8ff2d14a2b57e5cc6bac/llvmlite-0.44.0-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:aa0097052c32bf721a4efc03bd109d335dfa57d9bffb3d4c24cc680711b8b4fc", size = 41184245 }, + { url = "https://files.pythonhosted.org/packages/d0/81/e66fc86539293282fd9cb7c9417438e897f369e79ffb62e1ae5e5154d4dd/llvmlite-0.44.0-cp313-cp313-win_amd64.whl", hash = "sha256:2fb7c4f2fb86cbae6dca3db9ab203eeea0e22d73b99bc2341cdf9de93612e930", size = 30331193 }, +] + +[[package]] +name = "markupsafe" +version = "3.0.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/b2/97/5d42485e71dfc078108a86d6de8fa46db44a1a9295e89c5d6d4a06e23a62/markupsafe-3.0.2.tar.gz", hash = "sha256:ee55d3edf80167e48ea11a923c7386f4669df67d7994554387f84e7d8b0a2bf0", size = 20537 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/22/09/d1f21434c97fc42f09d290cbb6350d44eb12f09cc62c9476effdb33a18aa/MarkupSafe-3.0.2-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:9778bd8ab0a994ebf6f84c2b949e65736d5575320a17ae8984a77fab08db94cf", size = 14274 }, + { url = "https://files.pythonhosted.org/packages/6b/b0/18f76bba336fa5aecf79d45dcd6c806c280ec44538b3c13671d49099fdd0/MarkupSafe-3.0.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:846ade7b71e3536c4e56b386c2a47adf5741d2d8b94ec9dc3e92e5e1ee1e2225", size = 12348 }, + { url = "https://files.pythonhosted.org/packages/e0/25/dd5c0f6ac1311e9b40f4af06c78efde0f3b5cbf02502f8ef9501294c425b/MarkupSafe-3.0.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1c99d261bd2d5f6b59325c92c73df481e05e57f19837bdca8413b9eac4bd8028", size = 24149 }, + { url = "https://files.pythonhosted.org/packages/f3/f0/89e7aadfb3749d0f52234a0c8c7867877876e0a20b60e2188e9850794c17/MarkupSafe-3.0.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e17c96c14e19278594aa4841ec148115f9c7615a47382ecb6b82bd8fea3ab0c8", size = 23118 }, + { url = "https://files.pythonhosted.org/packages/d5/da/f2eeb64c723f5e3777bc081da884b414671982008c47dcc1873d81f625b6/MarkupSafe-3.0.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:88416bd1e65dcea10bc7569faacb2c20ce071dd1f87539ca2ab364bf6231393c", size = 22993 }, + { url = "https://files.pythonhosted.org/packages/da/0e/1f32af846df486dce7c227fe0f2398dc7e2e51d4a370508281f3c1c5cddc/MarkupSafe-3.0.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:2181e67807fc2fa785d0592dc2d6206c019b9502410671cc905d132a92866557", size = 24178 }, + { url = "https://files.pythonhosted.org/packages/c4/f6/bb3ca0532de8086cbff5f06d137064c8410d10779c4c127e0e47d17c0b71/MarkupSafe-3.0.2-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:52305740fe773d09cffb16f8ed0427942901f00adedac82ec8b67752f58a1b22", size = 23319 }, + { url = "https://files.pythonhosted.org/packages/a2/82/8be4c96ffee03c5b4a034e60a31294daf481e12c7c43ab8e34a1453ee48b/MarkupSafe-3.0.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:ad10d3ded218f1039f11a75f8091880239651b52e9bb592ca27de44eed242a48", size = 23352 }, + { url = "https://files.pythonhosted.org/packages/51/ae/97827349d3fcffee7e184bdf7f41cd6b88d9919c80f0263ba7acd1bbcb18/MarkupSafe-3.0.2-cp312-cp312-win32.whl", hash = "sha256:0f4ca02bea9a23221c0182836703cbf8930c5e9454bacce27e767509fa286a30", size = 15097 }, + { url = "https://files.pythonhosted.org/packages/c1/80/a61f99dc3a936413c3ee4e1eecac96c0da5ed07ad56fd975f1a9da5bc630/MarkupSafe-3.0.2-cp312-cp312-win_amd64.whl", hash = "sha256:8e06879fc22a25ca47312fbe7c8264eb0b662f6db27cb2d3bbbc74b1df4b9b87", size = 15601 }, + { url = "https://files.pythonhosted.org/packages/83/0e/67eb10a7ecc77a0c2bbe2b0235765b98d164d81600746914bebada795e97/MarkupSafe-3.0.2-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:ba9527cdd4c926ed0760bc301f6728ef34d841f405abf9d4f959c478421e4efd", size = 14274 }, + { url = "https://files.pythonhosted.org/packages/2b/6d/9409f3684d3335375d04e5f05744dfe7e9f120062c9857df4ab490a1031a/MarkupSafe-3.0.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:f8b3d067f2e40fe93e1ccdd6b2e1d16c43140e76f02fb1319a05cf2b79d99430", size = 12352 }, + { url = "https://files.pythonhosted.org/packages/d2/f5/6eadfcd3885ea85fe2a7c128315cc1bb7241e1987443d78c8fe712d03091/MarkupSafe-3.0.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:569511d3b58c8791ab4c2e1285575265991e6d8f8700c7be0e88f86cb0672094", size = 24122 }, + { url = "https://files.pythonhosted.org/packages/0c/91/96cf928db8236f1bfab6ce15ad070dfdd02ed88261c2afafd4b43575e9e9/MarkupSafe-3.0.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:15ab75ef81add55874e7ab7055e9c397312385bd9ced94920f2802310c930396", size = 23085 }, + { url = "https://files.pythonhosted.org/packages/c2/cf/c9d56af24d56ea04daae7ac0940232d31d5a8354f2b457c6d856b2057d69/MarkupSafe-3.0.2-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f3818cb119498c0678015754eba762e0d61e5b52d34c8b13d770f0719f7b1d79", size = 22978 }, + { url = "https://files.pythonhosted.org/packages/2a/9f/8619835cd6a711d6272d62abb78c033bda638fdc54c4e7f4272cf1c0962b/MarkupSafe-3.0.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:cdb82a876c47801bb54a690c5ae105a46b392ac6099881cdfb9f6e95e4014c6a", size = 24208 }, + { url = "https://files.pythonhosted.org/packages/f9/bf/176950a1792b2cd2102b8ffeb5133e1ed984547b75db47c25a67d3359f77/MarkupSafe-3.0.2-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:cabc348d87e913db6ab4aa100f01b08f481097838bdddf7c7a84b7575b7309ca", size = 23357 }, + { url = "https://files.pythonhosted.org/packages/ce/4f/9a02c1d335caabe5c4efb90e1b6e8ee944aa245c1aaaab8e8a618987d816/MarkupSafe-3.0.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:444dcda765c8a838eaae23112db52f1efaf750daddb2d9ca300bcae1039adc5c", size = 23344 }, + { url = "https://files.pythonhosted.org/packages/ee/55/c271b57db36f748f0e04a759ace9f8f759ccf22b4960c270c78a394f58be/MarkupSafe-3.0.2-cp313-cp313-win32.whl", hash = "sha256:bcf3e58998965654fdaff38e58584d8937aa3096ab5354d493c77d1fdd66d7a1", size = 15101 }, + { url = "https://files.pythonhosted.org/packages/29/88/07df22d2dd4df40aba9f3e402e6dc1b8ee86297dddbad4872bd5e7b0094f/MarkupSafe-3.0.2-cp313-cp313-win_amd64.whl", hash = "sha256:e6a2a455bd412959b57a172ce6328d2dd1f01cb2135efda2e4576e8a23fa3b0f", size = 15603 }, + { url = "https://files.pythonhosted.org/packages/62/6a/8b89d24db2d32d433dffcd6a8779159da109842434f1dd2f6e71f32f738c/MarkupSafe-3.0.2-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:b5a6b3ada725cea8a5e634536b1b01c30bcdcd7f9c6fff4151548d5bf6b3a36c", size = 14510 }, + { url = "https://files.pythonhosted.org/packages/7a/06/a10f955f70a2e5a9bf78d11a161029d278eeacbd35ef806c3fd17b13060d/MarkupSafe-3.0.2-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:a904af0a6162c73e3edcb969eeeb53a63ceeb5d8cf642fade7d39e7963a22ddb", size = 12486 }, + { url = "https://files.pythonhosted.org/packages/34/cf/65d4a571869a1a9078198ca28f39fba5fbb910f952f9dbc5220afff9f5e6/MarkupSafe-3.0.2-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4aa4e5faecf353ed117801a068ebab7b7e09ffb6e1d5e412dc852e0da018126c", size = 25480 }, + { url = "https://files.pythonhosted.org/packages/0c/e3/90e9651924c430b885468b56b3d597cabf6d72be4b24a0acd1fa0e12af67/MarkupSafe-3.0.2-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c0ef13eaeee5b615fb07c9a7dadb38eac06a0608b41570d8ade51c56539e509d", size = 23914 }, + { url = "https://files.pythonhosted.org/packages/66/8c/6c7cf61f95d63bb866db39085150df1f2a5bd3335298f14a66b48e92659c/MarkupSafe-3.0.2-cp313-cp313t-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d16a81a06776313e817c951135cf7340a3e91e8c1ff2fac444cfd75fffa04afe", size = 23796 }, + { url = "https://files.pythonhosted.org/packages/bb/35/cbe9238ec3f47ac9a7c8b3df7a808e7cb50fe149dc7039f5f454b3fba218/MarkupSafe-3.0.2-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:6381026f158fdb7c72a168278597a5e3a5222e83ea18f543112b2662a9b699c5", size = 25473 }, + { url = "https://files.pythonhosted.org/packages/e6/32/7621a4382488aa283cc05e8984a9c219abad3bca087be9ec77e89939ded9/MarkupSafe-3.0.2-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:3d79d162e7be8f996986c064d1c7c817f6df3a77fe3d6859f6f9e7be4b8c213a", size = 24114 }, + { url = "https://files.pythonhosted.org/packages/0d/80/0985960e4b89922cb5a0bac0ed39c5b96cbc1a536a99f30e8c220a996ed9/MarkupSafe-3.0.2-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:131a3c7689c85f5ad20f9f6fb1b866f402c445b220c19fe4308c0b147ccd2ad9", size = 24098 }, + { url = "https://files.pythonhosted.org/packages/82/78/fedb03c7d5380df2427038ec8d973587e90561b2d90cd472ce9254cf348b/MarkupSafe-3.0.2-cp313-cp313t-win32.whl", hash = "sha256:ba8062ed2cf21c07a9e295d5b8a2a5ce678b913b45fdf68c32d95d6c1291e0b6", size = 15208 }, + { url = "https://files.pythonhosted.org/packages/4f/65/6079a46068dfceaeabb5dcad6d674f5f5c61a6fa5673746f42a9f4c233b3/MarkupSafe-3.0.2-cp313-cp313t-win_amd64.whl", hash = "sha256:e444a31f8db13eb18ada366ab3cf45fd4b31e4db1236a4448f68778c1d1a5a2f", size = 15739 }, +] + +[[package]] +name = "matplotlib" +version = "3.10.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "contourpy" }, + { name = "cycler" }, + { name = "fonttools" }, + { name = "kiwisolver" }, + { name = "numpy" }, + { name = "packaging" }, + { name = "pillow" }, + { name = "pyparsing" }, + { name = "python-dateutil" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/2f/08/b89867ecea2e305f408fbb417139a8dd941ecf7b23a2e02157c36da546f0/matplotlib-3.10.1.tar.gz", hash = "sha256:e8d2d0e3881b129268585bf4765ad3ee73a4591d77b9a18c214ac7e3a79fb2ba", size = 36743335 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/7c/1d/5e0dc3b59c034e43de16f94deb68f4ad8a96b3ea00f4b37c160b7474928e/matplotlib-3.10.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:66e907a06e68cb6cfd652c193311d61a12b54f56809cafbed9736ce5ad92f107", size = 8175488 }, + { url = "https://files.pythonhosted.org/packages/7a/81/dae7e14042e74da658c3336ab9799128e09a1ee03964f2d89630b5d12106/matplotlib-3.10.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:e9b4bb156abb8fa5e5b2b460196f7db7264fc6d62678c03457979e7d5254b7be", size = 8046264 }, + { url = "https://files.pythonhosted.org/packages/21/c4/22516775dcde10fc9c9571d155f90710761b028fc44f660508106c363c97/matplotlib-3.10.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1985ad3d97f51307a2cbfc801a930f120def19ba22864182dacef55277102ba6", size = 8452048 }, + { url = "https://files.pythonhosted.org/packages/63/23/c0615001f67ce7c96b3051d856baedc0c818a2ed84570b9bf9bde200f85d/matplotlib-3.10.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c96f2c2f825d1257e437a1482c5a2cf4fee15db4261bd6fc0750f81ba2b4ba3d", size = 8597111 }, + { url = "https://files.pythonhosted.org/packages/ca/c0/a07939a82aed77770514348f4568177d7dadab9787ebc618a616fe3d665e/matplotlib-3.10.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:35e87384ee9e488d8dd5a2dd7baf471178d38b90618d8ea147aced4ab59c9bea", size = 9402771 }, + { url = "https://files.pythonhosted.org/packages/a6/b6/a9405484fb40746fdc6ae4502b16a9d6e53282ba5baaf9ebe2da579f68c4/matplotlib-3.10.1-cp312-cp312-win_amd64.whl", hash = "sha256:cfd414bce89cc78a7e1d25202e979b3f1af799e416010a20ab2b5ebb3a02425c", size = 8063742 }, + { url = "https://files.pythonhosted.org/packages/60/73/6770ff5e5523d00f3bc584acb6031e29ee5c8adc2336b16cd1d003675fe0/matplotlib-3.10.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:c42eee41e1b60fd83ee3292ed83a97a5f2a8239b10c26715d8a6172226988d7b", size = 8176112 }, + { url = "https://files.pythonhosted.org/packages/08/97/b0ca5da0ed54a3f6599c3ab568bdda65269bc27c21a2c97868c1625e4554/matplotlib-3.10.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:4f0647b17b667ae745c13721602b540f7aadb2a32c5b96e924cd4fea5dcb90f1", size = 8046931 }, + { url = "https://files.pythonhosted.org/packages/df/9a/1acbdc3b165d4ce2dcd2b1a6d4ffb46a7220ceee960c922c3d50d8514067/matplotlib-3.10.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:aa3854b5f9473564ef40a41bc922be978fab217776e9ae1545c9b3a5cf2092a3", size = 8453422 }, + { url = "https://files.pythonhosted.org/packages/51/d0/2bc4368abf766203e548dc7ab57cf7e9c621f1a3c72b516cc7715347b179/matplotlib-3.10.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7e496c01441be4c7d5f96d4e40f7fca06e20dcb40e44c8daa2e740e1757ad9e6", size = 8596819 }, + { url = "https://files.pythonhosted.org/packages/ab/1b/8b350f8a1746c37ab69dda7d7528d1fc696efb06db6ade9727b7887be16d/matplotlib-3.10.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:5d45d3f5245be5b469843450617dcad9af75ca50568acf59997bed9311131a0b", size = 9402782 }, + { url = "https://files.pythonhosted.org/packages/89/06/f570373d24d93503988ba8d04f213a372fa1ce48381c5eb15da985728498/matplotlib-3.10.1-cp313-cp313-win_amd64.whl", hash = "sha256:8e8e25b1209161d20dfe93037c8a7f7ca796ec9aa326e6e4588d8c4a5dd1e473", size = 8063812 }, + { url = "https://files.pythonhosted.org/packages/fc/e0/8c811a925b5a7ad75135f0e5af46408b78af88bbb02a1df775100ef9bfef/matplotlib-3.10.1-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:19b06241ad89c3ae9469e07d77efa87041eac65d78df4fcf9cac318028009b01", size = 8214021 }, + { url = "https://files.pythonhosted.org/packages/4a/34/319ec2139f68ba26da9d00fce2ff9f27679fb799a6c8e7358539801fd629/matplotlib-3.10.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:01e63101ebb3014e6e9f80d9cf9ee361a8599ddca2c3e166c563628b39305dbb", size = 8090782 }, + { url = "https://files.pythonhosted.org/packages/77/ea/9812124ab9a99df5b2eec1110e9b2edc0b8f77039abf4c56e0a376e84a29/matplotlib-3.10.1-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3f06bad951eea6422ac4e8bdebcf3a70c59ea0a03338c5d2b109f57b64eb3972", size = 8478901 }, + { url = "https://files.pythonhosted.org/packages/c9/db/b05bf463689134789b06dea85828f8ebe506fa1e37593f723b65b86c9582/matplotlib-3.10.1-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a3dfb036f34873b46978f55e240cff7a239f6c4409eac62d8145bad3fc6ba5a3", size = 8613864 }, + { url = "https://files.pythonhosted.org/packages/c2/04/41ccec4409f3023a7576df3b5c025f1a8c8b81fbfe922ecfd837ac36e081/matplotlib-3.10.1-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:dc6ab14a7ab3b4d813b88ba957fc05c79493a037f54e246162033591e770de6f", size = 9409487 }, + { url = "https://files.pythonhosted.org/packages/ac/c2/0d5aae823bdcc42cc99327ecdd4d28585e15ccd5218c453b7bcd827f3421/matplotlib-3.10.1-cp313-cp313t-win_amd64.whl", hash = "sha256:bc411ebd5889a78dabbc457b3fa153203e22248bfa6eedc6797be5df0164dbf9", size = 8134832 }, +] + +[[package]] +name = "matplotlib-inline" +version = "0.1.7" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "traitlets" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/99/5b/a36a337438a14116b16480db471ad061c36c3694df7c2084a0da7ba538b7/matplotlib_inline-0.1.7.tar.gz", hash = "sha256:8423b23ec666be3d16e16b60bdd8ac4e86e840ebd1dd11a30b9f117f2fa0ab90", size = 8159 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/8f/8e/9ad090d3553c280a8060fbf6e24dc1c0c29704ee7d1c372f0c174aa59285/matplotlib_inline-0.1.7-py3-none-any.whl", hash = "sha256:df192d39a4ff8f21b1895d72e6a13f5fcc5099f00fa84384e0ea28c2cc0653ca", size = 9899 }, +] + +[[package]] +name = "mistune" +version = "3.1.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/c4/79/bda47f7dd7c3c55770478d6d02c9960c430b0cf1773b72366ff89126ea31/mistune-3.1.3.tar.gz", hash = "sha256:a7035c21782b2becb6be62f8f25d3df81ccb4d6fa477a6525b15af06539f02a0", size = 94347 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/01/4d/23c4e4f09da849e127e9f123241946c23c1e30f45a88366879e064211815/mistune-3.1.3-py3-none-any.whl", hash = "sha256:1a32314113cff28aa6432e99e522677c8587fd83e3d51c29b82a52409c842bd9", size = 53410 }, +] + +[[package]] +name = "nbclient" +version = "0.10.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "jupyter-client" }, + { name = "jupyter-core" }, + { name = "nbformat" }, + { name = "traitlets" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/87/66/7ffd18d58eae90d5721f9f39212327695b749e23ad44b3881744eaf4d9e8/nbclient-0.10.2.tar.gz", hash = "sha256:90b7fc6b810630db87a6d0c2250b1f0ab4cf4d3c27a299b0cde78a4ed3fd9193", size = 62424 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/34/6d/e7fa07f03a4a7b221d94b4d586edb754a9b0dc3c9e2c93353e9fa4e0d117/nbclient-0.10.2-py3-none-any.whl", hash = "sha256:4ffee11e788b4a27fabeb7955547e4318a5298f34342a4bfd01f2e1faaeadc3d", size = 25434 }, +] + +[[package]] +name = "nbconvert" +version = "7.16.6" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "beautifulsoup4" }, + { name = "bleach", extra = ["css"] }, + { name = "defusedxml" }, + { name = "jinja2" }, + { name = "jupyter-core" }, + { name = "jupyterlab-pygments" }, + { name = "markupsafe" }, + { name = "mistune" }, + { name = "nbclient" }, + { name = "nbformat" }, + { name = "packaging" }, + { name = "pandocfilters" }, + { name = "pygments" }, + { name = "traitlets" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/a3/59/f28e15fc47ffb73af68a8d9b47367a8630d76e97ae85ad18271b9db96fdf/nbconvert-7.16.6.tar.gz", hash = "sha256:576a7e37c6480da7b8465eefa66c17844243816ce1ccc372633c6b71c3c0f582", size = 857715 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/cc/9a/cd673b2f773a12c992f41309ef81b99da1690426bd2f96957a7ade0d3ed7/nbconvert-7.16.6-py3-none-any.whl", hash = "sha256:1375a7b67e0c2883678c48e506dc320febb57685e5ee67faa51b18a90f3a712b", size = 258525 }, +] + +[[package]] +name = "nbformat" +version = "5.10.4" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "fastjsonschema" }, + { name = "jsonschema" }, + { name = "jupyter-core" }, + { name = "traitlets" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/6d/fd/91545e604bc3dad7dca9ed03284086039b294c6b3d75c0d2fa45f9e9caf3/nbformat-5.10.4.tar.gz", hash = "sha256:322168b14f937a5d11362988ecac2a4952d3d8e3a2cbeb2319584631226d5b3a", size = 142749 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a9/82/0340caa499416c78e5d8f5f05947ae4bc3cba53c9f038ab6e9ed964e22f1/nbformat-5.10.4-py3-none-any.whl", hash = "sha256:3b48d6c8fbca4b299bf3982ea7db1af21580e4fec269ad087b9e81588891200b", size = 78454 }, +] + +[[package]] +name = "nest-asyncio" +version = "1.6.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/83/f8/51569ac65d696c8ecbee95938f89d4abf00f47d58d48f6fbabfe8f0baefe/nest_asyncio-1.6.0.tar.gz", hash = "sha256:6f172d5449aca15afd6c646851f4e31e02c598d553a667e38cafa997cfec55fe", size = 7418 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a0/c4/c2971a3ba4c6103a3d10c4b0f24f461ddc027f0f09763220cf35ca1401b3/nest_asyncio-1.6.0-py3-none-any.whl", hash = "sha256:87af6efd6b5e897c81050477ef65c62e2b2f35d51703cae01aff2905b1852e1c", size = 5195 }, +] + +[[package]] +name = "nodeenv" +version = "1.9.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/43/16/fc88b08840de0e0a72a2f9d8c6bae36be573e475a6326ae854bcc549fc45/nodeenv-1.9.1.tar.gz", hash = "sha256:6ec12890a2dab7946721edbfbcd91f3319c6ccc9aec47be7c7e6b7011ee6645f", size = 47437 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d2/1d/1b658dbd2b9fa9c4c9f32accbfc0205d532c8c6194dc0f2a4c0428e7128a/nodeenv-1.9.1-py2.py3-none-any.whl", hash = "sha256:ba11c9782d29c27c70ffbdda2d7415098754709be8a7056d79a737cd901155c9", size = 22314 }, +] + +[[package]] +name = "notebook" +version = "7.3.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "jupyter-server" }, + { name = "jupyterlab" }, + { name = "jupyterlab-server" }, + { name = "notebook-shim" }, + { name = "tornado" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/71/0f/7781fed05f79d1047c039dfd17fbd6e6670bcf5ad330baa997bcc62525b5/notebook-7.3.3.tar.gz", hash = "sha256:707a313fb882d35f921989eb3d204de942ed5132a44e4aa1fe0e8f24bb9dc25d", size = 12758099 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f2/bf/5e5fcf79c559600b738d7577c8360bfd4cfa705400af06f23b3a049e44b6/notebook-7.3.3-py3-none-any.whl", hash = "sha256:b193df0878956562d5171c8e25c9252b8e86c9fcc16163b8ee3fe6c5e3f422f7", size = 13142886 }, +] + +[[package]] +name = "notebook-shim" +version = "0.2.4" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "jupyter-server" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/54/d2/92fa3243712b9a3e8bafaf60aac366da1cada3639ca767ff4b5b3654ec28/notebook_shim-0.2.4.tar.gz", hash = "sha256:b4b2cfa1b65d98307ca24361f5b30fe785b53c3fd07b7a47e89acb5e6ac638cb", size = 13167 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f9/33/bd5b9137445ea4b680023eb0469b2bb969d61303dedb2aac6560ff3d14a1/notebook_shim-0.2.4-py3-none-any.whl", hash = "sha256:411a5be4e9dc882a074ccbcae671eda64cceb068767e9a3419096986560e1cef", size = 13307 }, +] + +[[package]] +name = "numba" +version = "0.61.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "llvmlite" }, + { name = "numpy" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/3c/88/c13a935f200fda51384411e49840a8e7f70c9cb1ee8d809dd0f2477cf7ef/numba-0.61.0.tar.gz", hash = "sha256:888d2e89b8160899e19591467e8fdd4970e07606e1fbc248f239c89818d5f925", size = 2816484 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/63/c9/c61881e7f2e253e745209f078bbd428ce23b6cf901f7d93afe166720ff95/numba-0.61.0-cp312-cp312-macosx_10_14_x86_64.whl", hash = "sha256:152146ecdbb8d8176f294e9f755411e6f270103a11c3ff50cecc413f794e52c8", size = 2769758 }, + { url = "https://files.pythonhosted.org/packages/e1/28/ddec0147a4933f86ceaca580aa9bb767d5632ecdb1ece6cfb3eab4ac78e5/numba-0.61.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:5cafa6095716fcb081618c28a8d27bf7c001e09696f595b41836dec114be2905", size = 2772445 }, + { url = "https://files.pythonhosted.org/packages/18/74/6a9f0e6c76c088f8a6aa702eab31734068061dca5cc0f34e8bc1eb447de1/numba-0.61.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:ffe9fe373ed30638d6e20a0269f817b2c75d447141f55a675bfcf2d1fe2e87fb", size = 3882115 }, + { url = "https://files.pythonhosted.org/packages/53/68/d7c31e53f08e6b4669c9b5a3cd7c5fb9097220c5ef388bc099ca8ab9749f/numba-0.61.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:9f25f7fef0206d55c1cfb796ad833cbbc044e2884751e56e798351280038484c", size = 3573296 }, + { url = "https://files.pythonhosted.org/packages/94/4f/8357a99a14f331b865a42cb4756ae37da85599b9c95e01277ea10361e91a/numba-0.61.0-cp312-cp312-win_amd64.whl", hash = "sha256:550d389573bc3b895e1ccb18289feea11d937011de4d278b09dc7ed585d1cdcb", size = 2828077 }, + { url = "https://files.pythonhosted.org/packages/3b/54/71fba18e4af5619f1ea8175ee92e82dd8e220bd6feb8c0153c6b814c8a60/numba-0.61.0-cp313-cp313-macosx_10_14_x86_64.whl", hash = "sha256:b96fafbdcf6f69b69855273e988696aae4974115a815f6818fef4af7afa1f6b8", size = 2768024 }, + { url = "https://files.pythonhosted.org/packages/39/76/2448b43d08e904aad1b1b9cd12835b19411e84a81aa9192f83642a5e0afd/numba-0.61.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:5f6c452dca1de8e60e593f7066df052dd8da09b243566ecd26d2b796e5d3087d", size = 2769541 }, + { url = "https://files.pythonhosted.org/packages/32/8f/4bb2374247ab988c9eac587b304b2947a36d605b9bb9ba4bf06e955c17d3/numba-0.61.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:44240e694d4aa321430c97b21453e46014fe6c7b8b7d932afa7f6a88cc5d7e5e", size = 3890102 }, + { url = "https://files.pythonhosted.org/packages/ab/bc/dc2d03555289ae5263f65c01d45eb186ce347585c191daf0e60021d5ed39/numba-0.61.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:764f0e47004f126f58c3b28e0a02374c420a9d15157b90806d68590f5c20cc89", size = 3580239 }, + { url = "https://files.pythonhosted.org/packages/61/08/71247ce560d2c222d9ca705c7d3547fc4069b96fc85d71aabeb890befe9f/numba-0.61.0-cp313-cp313-win_amd64.whl", hash = "sha256:074cd38c5b1f9c65a4319d1f3928165f48975ef0537ad43385b2bd908e6e2e35", size = 2828035 }, +] + +[[package]] +name = "numpy" +version = "2.1.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/25/ca/1166b75c21abd1da445b97bf1fa2f14f423c6cfb4fc7c4ef31dccf9f6a94/numpy-2.1.3.tar.gz", hash = "sha256:aa08e04e08aaf974d4458def539dece0d28146d866a39da5639596f4921fd761", size = 20166090 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/8a/f0/385eb9970309643cbca4fc6eebc8bb16e560de129c91258dfaa18498da8b/numpy-2.1.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:f55ba01150f52b1027829b50d70ef1dafd9821ea82905b63936668403c3b471e", size = 20849658 }, + { url = "https://files.pythonhosted.org/packages/54/4a/765b4607f0fecbb239638d610d04ec0a0ded9b4951c56dc68cef79026abf/numpy-2.1.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:13138eadd4f4da03074851a698ffa7e405f41a0845a6b1ad135b81596e4e9958", size = 13492258 }, + { url = "https://files.pythonhosted.org/packages/bd/a7/2332679479c70b68dccbf4a8eb9c9b5ee383164b161bee9284ac141fbd33/numpy-2.1.3-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:a6b46587b14b888e95e4a24d7b13ae91fa22386c199ee7b418f449032b2fa3b8", size = 5090249 }, + { url = "https://files.pythonhosted.org/packages/c1/67/4aa00316b3b981a822c7a239d3a8135be2a6945d1fd11d0efb25d361711a/numpy-2.1.3-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:0fa14563cc46422e99daef53d725d0c326e99e468a9320a240affffe87852564", size = 6621704 }, + { url = "https://files.pythonhosted.org/packages/5e/da/1a429ae58b3b6c364eeec93bf044c532f2ff7b48a52e41050896cf15d5b1/numpy-2.1.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8637dcd2caa676e475503d1f8fdb327bc495554e10838019651b76d17b98e512", size = 13606089 }, + { url = "https://files.pythonhosted.org/packages/9e/3e/3757f304c704f2f0294a6b8340fcf2be244038be07da4cccf390fa678a9f/numpy-2.1.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2312b2aa89e1f43ecea6da6ea9a810d06aae08321609d8dc0d0eda6d946a541b", size = 16043185 }, + { url = "https://files.pythonhosted.org/packages/43/97/75329c28fea3113d00c8d2daf9bc5828d58d78ed661d8e05e234f86f0f6d/numpy-2.1.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:a38c19106902bb19351b83802531fea19dee18e5b37b36454f27f11ff956f7fc", size = 16410751 }, + { url = "https://files.pythonhosted.org/packages/ad/7a/442965e98b34e0ae9da319f075b387bcb9a1e0658276cc63adb8c9686f7b/numpy-2.1.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:02135ade8b8a84011cbb67dc44e07c58f28575cf9ecf8ab304e51c05528c19f0", size = 14082705 }, + { url = "https://files.pythonhosted.org/packages/ac/b6/26108cf2cfa5c7e03fb969b595c93131eab4a399762b51ce9ebec2332e80/numpy-2.1.3-cp312-cp312-win32.whl", hash = "sha256:e6988e90fcf617da2b5c78902fe8e668361b43b4fe26dbf2d7b0f8034d4cafb9", size = 6239077 }, + { url = "https://files.pythonhosted.org/packages/a6/84/fa11dad3404b7634aaab50733581ce11e5350383311ea7a7010f464c0170/numpy-2.1.3-cp312-cp312-win_amd64.whl", hash = "sha256:0d30c543f02e84e92c4b1f415b7c6b5326cbe45ee7882b6b77db7195fb971e3a", size = 12566858 }, + { url = "https://files.pythonhosted.org/packages/4d/0b/620591441457e25f3404c8057eb924d04f161244cb8a3680d529419aa86e/numpy-2.1.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:96fe52fcdb9345b7cd82ecd34547fca4321f7656d500eca497eb7ea5a926692f", size = 20836263 }, + { url = "https://files.pythonhosted.org/packages/45/e1/210b2d8b31ce9119145433e6ea78046e30771de3fe353f313b2778142f34/numpy-2.1.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:f653490b33e9c3a4c1c01d41bc2aef08f9475af51146e4a7710c450cf9761598", size = 13507771 }, + { url = "https://files.pythonhosted.org/packages/55/44/aa9ee3caee02fa5a45f2c3b95cafe59c44e4b278fbbf895a93e88b308555/numpy-2.1.3-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:dc258a761a16daa791081d026f0ed4399b582712e6fc887a95af09df10c5ca57", size = 5075805 }, + { url = "https://files.pythonhosted.org/packages/78/d6/61de6e7e31915ba4d87bbe1ae859e83e6582ea14c6add07c8f7eefd8488f/numpy-2.1.3-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:016d0f6f5e77b0f0d45d77387ffa4bb89816b57c835580c3ce8e099ef830befe", size = 6608380 }, + { url = "https://files.pythonhosted.org/packages/3e/46/48bdf9b7241e317e6cf94276fe11ba673c06d1fdf115d8b4ebf616affd1a/numpy-2.1.3-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c181ba05ce8299c7aa3125c27b9c2167bca4a4445b7ce73d5febc411ca692e43", size = 13602451 }, + { url = "https://files.pythonhosted.org/packages/70/50/73f9a5aa0810cdccda9c1d20be3cbe4a4d6ea6bfd6931464a44c95eef731/numpy-2.1.3-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5641516794ca9e5f8a4d17bb45446998c6554704d888f86df9b200e66bdcce56", size = 16039822 }, + { url = "https://files.pythonhosted.org/packages/ad/cd/098bc1d5a5bc5307cfc65ee9369d0ca658ed88fbd7307b0d49fab6ca5fa5/numpy-2.1.3-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:ea4dedd6e394a9c180b33c2c872b92f7ce0f8e7ad93e9585312b0c5a04777a4a", size = 16411822 }, + { url = "https://files.pythonhosted.org/packages/83/a2/7d4467a2a6d984549053b37945620209e702cf96a8bc658bc04bba13c9e2/numpy-2.1.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:b0df3635b9c8ef48bd3be5f862cf71b0a4716fa0e702155c45067c6b711ddcef", size = 14079598 }, + { url = "https://files.pythonhosted.org/packages/e9/6a/d64514dcecb2ee70bfdfad10c42b76cab657e7ee31944ff7a600f141d9e9/numpy-2.1.3-cp313-cp313-win32.whl", hash = "sha256:50ca6aba6e163363f132b5c101ba078b8cbd3fa92c7865fd7d4d62d9779ac29f", size = 6236021 }, + { url = "https://files.pythonhosted.org/packages/bb/f9/12297ed8d8301a401e7d8eb6b418d32547f1d700ed3c038d325a605421a4/numpy-2.1.3-cp313-cp313-win_amd64.whl", hash = "sha256:747641635d3d44bcb380d950679462fae44f54b131be347d5ec2bce47d3df9ed", size = 12560405 }, + { url = "https://files.pythonhosted.org/packages/a7/45/7f9244cd792e163b334e3a7f02dff1239d2890b6f37ebf9e82cbe17debc0/numpy-2.1.3-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:996bb9399059c5b82f76b53ff8bb686069c05acc94656bb259b1d63d04a9506f", size = 20859062 }, + { url = "https://files.pythonhosted.org/packages/b1/b4/a084218e7e92b506d634105b13e27a3a6645312b93e1c699cc9025adb0e1/numpy-2.1.3-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:45966d859916ad02b779706bb43b954281db43e185015df6eb3323120188f9e4", size = 13515839 }, + { url = "https://files.pythonhosted.org/packages/27/45/58ed3f88028dcf80e6ea580311dc3edefdd94248f5770deb980500ef85dd/numpy-2.1.3-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:baed7e8d7481bfe0874b566850cb0b85243e982388b7b23348c6db2ee2b2ae8e", size = 5116031 }, + { url = "https://files.pythonhosted.org/packages/37/a8/eb689432eb977d83229094b58b0f53249d2209742f7de529c49d61a124a0/numpy-2.1.3-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:a9f7f672a3388133335589cfca93ed468509cb7b93ba3105fce780d04a6576a0", size = 6629977 }, + { url = "https://files.pythonhosted.org/packages/42/a3/5355ad51ac73c23334c7caaed01adadfda49544f646fcbfbb4331deb267b/numpy-2.1.3-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d7aac50327da5d208db2eec22eb11e491e3fe13d22653dce51b0f4109101b408", size = 13575951 }, + { url = "https://files.pythonhosted.org/packages/c4/70/ea9646d203104e647988cb7d7279f135257a6b7e3354ea6c56f8bafdb095/numpy-2.1.3-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4394bc0dbd074b7f9b52024832d16e019decebf86caf909d94f6b3f77a8ee3b6", size = 16022655 }, + { url = "https://files.pythonhosted.org/packages/14/ce/7fc0612903e91ff9d0b3f2eda4e18ef9904814afcae5b0f08edb7f637883/numpy-2.1.3-cp313-cp313t-musllinux_1_1_x86_64.whl", hash = "sha256:50d18c4358a0a8a53f12a8ba9d772ab2d460321e6a93d6064fc22443d189853f", size = 16399902 }, + { url = "https://files.pythonhosted.org/packages/ef/62/1d3204313357591c913c32132a28f09a26357e33ea3c4e2fe81269e0dca1/numpy-2.1.3-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:14e253bd43fc6b37af4921b10f6add6925878a42a0c5fe83daee390bca80bc17", size = 14067180 }, + { url = "https://files.pythonhosted.org/packages/24/d7/78a40ed1d80e23a774cb8a34ae8a9493ba1b4271dde96e56ccdbab1620ef/numpy-2.1.3-cp313-cp313t-win32.whl", hash = "sha256:08788d27a5fd867a663f6fc753fd7c3ad7e92747efc73c53bca2f19f8bc06f48", size = 6291907 }, + { url = "https://files.pythonhosted.org/packages/86/09/a5ab407bd7f5f5599e6a9261f964ace03a73e7c6928de906981c31c38082/numpy-2.1.3-cp313-cp313t-win_amd64.whl", hash = "sha256:2564fbdf2b99b3f815f2107c1bbc93e2de8ee655a69c261363a1172a79a257d4", size = 12644098 }, +] + +[[package]] +name = "nvidia-nccl-cu12" +version = "2.26.2" +source = { registry = "https://pypi.org/simple" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/69/5b/ca2f213f637305633814ae8c36b153220e40a07ea001966dcd87391f3acb/nvidia_nccl_cu12-2.26.2-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:5c196e95e832ad30fbbb50381eb3cbd1fadd5675e587a548563993609af19522", size = 291671495 }, + { url = "https://files.pythonhosted.org/packages/67/ca/f42388aed0fddd64ade7493dbba36e1f534d4e6fdbdd355c6a90030ae028/nvidia_nccl_cu12-2.26.2-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:694cf3879a206553cc9d7dbda76b13efaf610fdb70a50cba303de1b0d1530ac6", size = 201319755 }, +] + +[[package]] +name = "overrides" +version = "7.7.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/36/86/b585f53236dec60aba864e050778b25045f857e17f6e5ea0ae95fe80edd2/overrides-7.7.0.tar.gz", hash = "sha256:55158fa3d93b98cc75299b1e67078ad9003ca27945c76162c1c0766d6f91820a", size = 22812 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/2c/ab/fc8290c6a4c722e5514d80f62b2dc4c4df1a68a41d1364e625c35990fcf3/overrides-7.7.0-py3-none-any.whl", hash = "sha256:c7ed9d062f78b8e4c1a7b70bd8796b35ead4d9f510227ef9c5dc7626c60d7e49", size = 17832 }, +] + +[[package]] +name = "packaging" +version = "24.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/d0/63/68dbb6eb2de9cb10ee4c9c14a0148804425e13c4fb20d61cce69f53106da/packaging-24.2.tar.gz", hash = "sha256:c228a6dc5e932d346bc5739379109d49e8853dd8223571c7c5b55260edc0b97f", size = 163950 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/88/ef/eb23f262cca3c0c4eb7ab1933c3b1f03d021f2c48f54763065b6f0e321be/packaging-24.2-py3-none-any.whl", hash = "sha256:09abb1bccd265c01f4a3aa3f7a7db064b36514d2cba19a2f694fe6150451a759", size = 65451 }, +] + +[[package]] +name = "pandas" +version = "2.2.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "numpy" }, + { name = "python-dateutil" }, + { name = "pytz" }, + { name = "tzdata" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/9c/d6/9f8431bacc2e19dca897724cd097b1bb224a6ad5433784a44b587c7c13af/pandas-2.2.3.tar.gz", hash = "sha256:4f18ba62b61d7e192368b84517265a99b4d7ee8912f8708660fb4a366cc82667", size = 4399213 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/17/a3/fb2734118db0af37ea7433f57f722c0a56687e14b14690edff0cdb4b7e58/pandas-2.2.3-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:b1d432e8d08679a40e2a6d8b2f9770a5c21793a6f9f47fdd52c5ce1948a5a8a9", size = 12529893 }, + { url = "https://files.pythonhosted.org/packages/e1/0c/ad295fd74bfac85358fd579e271cded3ac969de81f62dd0142c426b9da91/pandas-2.2.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:a5a1595fe639f5988ba6a8e5bc9649af3baf26df3998a0abe56c02609392e0a4", size = 11363475 }, + { url = "https://files.pythonhosted.org/packages/c6/2a/4bba3f03f7d07207481fed47f5b35f556c7441acddc368ec43d6643c5777/pandas-2.2.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:5de54125a92bb4d1c051c0659e6fcb75256bf799a732a87184e5ea503965bce3", size = 15188645 }, + { url = "https://files.pythonhosted.org/packages/38/f8/d8fddee9ed0d0c0f4a2132c1dfcf0e3e53265055da8df952a53e7eaf178c/pandas-2.2.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fffb8ae78d8af97f849404f21411c95062db1496aeb3e56f146f0355c9989319", size = 12739445 }, + { url = "https://files.pythonhosted.org/packages/20/e8/45a05d9c39d2cea61ab175dbe6a2de1d05b679e8de2011da4ee190d7e748/pandas-2.2.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:6dfcb5ee8d4d50c06a51c2fffa6cff6272098ad6540aed1a76d15fb9318194d8", size = 16359235 }, + { url = "https://files.pythonhosted.org/packages/1d/99/617d07a6a5e429ff90c90da64d428516605a1ec7d7bea494235e1c3882de/pandas-2.2.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:062309c1b9ea12a50e8ce661145c6aab431b1e99530d3cd60640e255778bd43a", size = 14056756 }, + { url = "https://files.pythonhosted.org/packages/29/d4/1244ab8edf173a10fd601f7e13b9566c1b525c4f365d6bee918e68381889/pandas-2.2.3-cp312-cp312-win_amd64.whl", hash = "sha256:59ef3764d0fe818125a5097d2ae867ca3fa64df032331b7e0917cf5d7bf66b13", size = 11504248 }, + { url = "https://files.pythonhosted.org/packages/64/22/3b8f4e0ed70644e85cfdcd57454686b9057c6c38d2f74fe4b8bc2527214a/pandas-2.2.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:f00d1345d84d8c86a63e476bb4955e46458b304b9575dcf71102b5c705320015", size = 12477643 }, + { url = "https://files.pythonhosted.org/packages/e4/93/b3f5d1838500e22c8d793625da672f3eec046b1a99257666c94446969282/pandas-2.2.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:3508d914817e153ad359d7e069d752cdd736a247c322d932eb89e6bc84217f28", size = 11281573 }, + { url = "https://files.pythonhosted.org/packages/f5/94/6c79b07f0e5aab1dcfa35a75f4817f5c4f677931d4234afcd75f0e6a66ca/pandas-2.2.3-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:22a9d949bfc9a502d320aa04e5d02feab689d61da4e7764b62c30b991c42c5f0", size = 15196085 }, + { url = "https://files.pythonhosted.org/packages/e8/31/aa8da88ca0eadbabd0a639788a6da13bb2ff6edbbb9f29aa786450a30a91/pandas-2.2.3-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f3a255b2c19987fbbe62a9dfd6cff7ff2aa9ccab3fc75218fd4b7530f01efa24", size = 12711809 }, + { url = "https://files.pythonhosted.org/packages/ee/7c/c6dbdb0cb2a4344cacfb8de1c5808ca885b2e4dcfde8008266608f9372af/pandas-2.2.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:800250ecdadb6d9c78eae4990da62743b857b470883fa27f652db8bdde7f6659", size = 16356316 }, + { url = "https://files.pythonhosted.org/packages/57/b7/8b757e7d92023b832869fa8881a992696a0bfe2e26f72c9ae9f255988d42/pandas-2.2.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:6374c452ff3ec675a8f46fd9ab25c4ad0ba590b71cf0656f8b6daa5202bca3fb", size = 14022055 }, + { url = "https://files.pythonhosted.org/packages/3b/bc/4b18e2b8c002572c5a441a64826252ce5da2aa738855747247a971988043/pandas-2.2.3-cp313-cp313-win_amd64.whl", hash = "sha256:61c5ad4043f791b61dd4752191d9f07f0ae412515d59ba8f005832a532f8736d", size = 11481175 }, + { url = "https://files.pythonhosted.org/packages/76/a3/a5d88146815e972d40d19247b2c162e88213ef51c7c25993942c39dbf41d/pandas-2.2.3-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:3b71f27954685ee685317063bf13c7709a7ba74fc996b84fc6821c59b0f06468", size = 12615650 }, + { url = "https://files.pythonhosted.org/packages/9c/8c/f0fd18f6140ddafc0c24122c8a964e48294acc579d47def376fef12bcb4a/pandas-2.2.3-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:38cf8125c40dae9d5acc10fa66af8ea6fdf760b2714ee482ca691fc66e6fcb18", size = 11290177 }, + { url = "https://files.pythonhosted.org/packages/ed/f9/e995754eab9c0f14c6777401f7eece0943840b7a9fc932221c19d1abee9f/pandas-2.2.3-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:ba96630bc17c875161df3818780af30e43be9b166ce51c9a18c1feae342906c2", size = 14651526 }, + { url = "https://files.pythonhosted.org/packages/25/b0/98d6ae2e1abac4f35230aa756005e8654649d305df9a28b16b9ae4353bff/pandas-2.2.3-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1db71525a1538b30142094edb9adc10be3f3e176748cd7acc2240c2f2e5aa3a4", size = 11871013 }, + { url = "https://files.pythonhosted.org/packages/cc/57/0f72a10f9db6a4628744c8e8f0df4e6e21de01212c7c981d31e50ffc8328/pandas-2.2.3-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:15c0e1e02e93116177d29ff83e8b1619c93ddc9c49083f237d4312337a61165d", size = 15711620 }, + { url = "https://files.pythonhosted.org/packages/ab/5f/b38085618b950b79d2d9164a711c52b10aefc0ae6833b96f626b7021b2ed/pandas-2.2.3-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:ad5b65698ab28ed8d7f18790a0dc58005c7629f227be9ecc1072aa74c0c1d43a", size = 13098436 }, +] + +[[package]] +name = "pandocfilters" +version = "1.5.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/70/6f/3dd4940bbe001c06a65f88e36bad298bc7a0de5036115639926b0c5c0458/pandocfilters-1.5.1.tar.gz", hash = "sha256:002b4a555ee4ebc03f8b66307e287fa492e4a77b4ea14d3f934328297bb4939e", size = 8454 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ef/af/4fbc8cab944db5d21b7e2a5b8e9211a03a79852b1157e2c102fcc61ac440/pandocfilters-1.5.1-py2.py3-none-any.whl", hash = "sha256:93be382804a9cdb0a7267585f157e5d1731bbe5545a85b268d6f5fe6232de2bc", size = 8663 }, +] + +[[package]] +name = "parso" +version = "0.8.4" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/66/94/68e2e17afaa9169cf6412ab0f28623903be73d1b32e208d9e8e541bb086d/parso-0.8.4.tar.gz", hash = "sha256:eb3a7b58240fb99099a345571deecc0f9540ea5f4dd2fe14c2a99d6b281ab92d", size = 400609 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c6/ac/dac4a63f978e4dcb3c6d3a78c4d8e0192a113d288502a1216950c41b1027/parso-0.8.4-py2.py3-none-any.whl", hash = "sha256:a418670a20291dacd2dddc80c377c5c3791378ee1e8d12bffc35420643d43f18", size = 103650 }, +] + +[[package]] +name = "patsy" +version = "1.0.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "numpy" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/d1/81/74f6a65b848ffd16c18f920620ce999fe45fe27f01ab3911260ce4ed85e4/patsy-1.0.1.tar.gz", hash = "sha256:e786a9391eec818c054e359b737bbce692f051aee4c661f4141cc88fb459c0c4", size = 396010 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/87/2b/b50d3d08ea0fc419c183a84210571eba005328efa62b6b98bc28e9ead32a/patsy-1.0.1-py2.py3-none-any.whl", hash = "sha256:751fb38f9e97e62312e921a1954b81e1bb2bcda4f5eeabaf94db251ee791509c", size = 232923 }, +] + +[[package]] +name = "pexpect" +version = "4.9.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "ptyprocess" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/42/92/cc564bf6381ff43ce1f4d06852fc19a2f11d180f23dc32d9588bee2f149d/pexpect-4.9.0.tar.gz", hash = "sha256:ee7d41123f3c9911050ea2c2dac107568dc43b2d3b0c7557a33212c398ead30f", size = 166450 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/9e/c3/059298687310d527a58bb01f3b1965787ee3b40dce76752eda8b44e9a2c5/pexpect-4.9.0-py2.py3-none-any.whl", hash = "sha256:7236d1e080e4936be2dc3e326cec0af72acf9212a7e1d060210e70a47e253523", size = 63772 }, +] + +[[package]] +name = "pillow" +version = "11.2.0" +source = { registry = "https://pypi.org/simple" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b7/d8/cd5483af4982faa07e33b8d4e0be9212df666cabf86ce4ee2d631c80c4c7/pillow-11.2.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:ffcfac194064f4a5bc389b9ef910a60b9bc8573a7e59f081fbc71a59794140f2", size = 11417787 }, + { url = "https://files.pythonhosted.org/packages/dc/9f/1e5f59ce9a3aab74a5c4429be91efffea5624335335072ba9eddada31148/pillow-11.2.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40f972b6e2a97c633fa25c3c8405d45942747eaf838805d76f7da0363a7696ec", size = 8922770 }, + { url = "https://files.pythonhosted.org/packages/e5/b2/bf7b89d9561dfd1d8cafba9da689ee44f82298575b3682a3568f0e558193/pillow-11.2.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8b46bd331193a7b223864010e751a30ec48245e5106e00eda94b435e37109de1", size = 17098029 }, + { url = "https://files.pythonhosted.org/packages/97/7e/71db1559bd5d3e42210f3c7dc0e43b15bd9d2c3eb1d69ad239108745bd92/pillow-11.2.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6581ee02eec144cbcdeb6bef055142129cfda0f52939480d81806ee8d61ab4f3", size = 18940242 }, + { url = "https://files.pythonhosted.org/packages/11/3e/886a818c1ec342c750830636ff58214d192b111881f8fbc3bd5c2449d204/pillow-11.2.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:c2a6700be18335d341ddd12facbc49dcf0febbfeefdda4990e017698a8e66f59", size = 17630373 }, + { url = "https://files.pythonhosted.org/packages/21/95/7b33e3cd5210c9e5028e915aaac7a97c27f7e01726c36ac1e2cf6805bfd0/pillow-11.2.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:b228ae10b25650539d37c056063c8f34c0da4f69419ce03249dfd0adc322d46b", size = 19631958 }, + { url = "https://files.pythonhosted.org/packages/82/18/00315a811f2d850456ece317c6acaf2c4535ee699b31219a5683a37d8d14/pillow-11.2.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:36de59d15a95584fad3758c4bcb992c3bf279ddd86236d3f096dbf696efc2b48", size = 18041279 }, + { url = "https://files.pythonhosted.org/packages/72/9c/f20eee2e239e6ec17f24448a92dbea69a2e5ce23d1fabf4cdb6cebf8f306/pillow-11.2.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:23fedf0def4193848ce5be2f685e3606c1ebf42a033cfa092b729e6cf844d337", size = 19809904 }, + { url = "https://files.pythonhosted.org/packages/cf/b2/a2c03b90be1916e712227dd012dfc4ae56a7a035709f60a6d993c27b6b6e/pillow-11.2.0-cp312-cp312-win32.whl", hash = "sha256:290fd44a0b517a48af7660b7586538f9db1fe0656d7d6c97b0aefd2a4ad2b14d", size = 2332129 }, + { url = "https://files.pythonhosted.org/packages/ec/fb/d878e8bfedfd6c959b408088cb98c337f143b0db9758c07bcc57c1192f98/pillow-11.2.0-cp312-cp312-win_amd64.whl", hash = "sha256:728f2381d178722d008772e57b3252b8c6c0c284cee76e798ffb392ca71e3fd9", size = 13807352 }, + { url = "https://files.pythonhosted.org/packages/9c/61/95c09187bdd6af8069ef300c23f0704b22324ee1acb5796cc1b919d36d8f/pillow-11.2.0-cp312-cp312-win_arm64.whl", hash = "sha256:65dfab8348a42ba9883141e4fa09a7db6ff2a203d17450717b998fb27f4ba0b4", size = 2414775 }, + { url = "https://files.pythonhosted.org/packages/96/36/04fcdfbd2969480e83006da7c111eb1387e0920434fdfb1b40bdff79c2eb/pillow-11.2.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:0b46b5ee8443213da50b08caa1b2f7c407233873f6153c6fcf558fab57ac658b", size = 11417676 }, + { url = "https://files.pythonhosted.org/packages/3d/99/12b26ba105f6abf85186831cf76ae1cd75c35d98cfb87c75614e3f049119/pillow-11.2.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:70daf0b69bc9017796cb74a38a597a34e311e4e3e5254d7e4aa42aab3d1d3eac", size = 8922706 }, + { url = "https://files.pythonhosted.org/packages/77/e8/a160d7a7a7e16a5c76a713538e99b748de6596ed77506c42d2f8d26b7cd3/pillow-11.2.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:00f8830ac57dc38bfacfb8bb1b7987da7ccbf99131dae0e826bfbaa2c8dfc990", size = 17089927 }, + { url = "https://files.pythonhosted.org/packages/84/7d/cc2ee517c7b379b78e4ede359c8ef52bde35e977fb316c3b0b41aaab7f5a/pillow-11.2.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:597b629e4d135ebb904f58d0b3328c58dba3ac7bd1f9585e0dddcb250d3f955c", size = 18934553 }, + { url = "https://files.pythonhosted.org/packages/a6/bf/ad1ce340d20e9a7a80e67f74d9b58bf6192a42564d4a3178513774183581/pillow-11.2.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:106fce029c62ddfed767e4f8cc3396e90ba45320d87df58bf83a73fdbe73f09b", size = 17624994 }, + { url = "https://files.pythonhosted.org/packages/b4/d8/20a183f52b2703afb1243aa1cb80b3bbcfe32f75507615ca93889de24e71/pillow-11.2.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:676461578f605c8e56ea108c371632e4bf40697996d80b5899c592043432e5f1", size = 19629070 }, + { url = "https://files.pythonhosted.org/packages/67/f8/533090a933cebffaa8c80c883748be029fdf604e87e5e54c9cc0d10a1c3f/pillow-11.2.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:0f19332b8de08305db8e003272ba86f5a65231b848a44ceef2e9593101637208", size = 18038064 }, + { url = "https://files.pythonhosted.org/packages/3e/16/927e75ec70a940f8982ab457f8da3f244ff48081bf1e20bc99dd53a9f072/pillow-11.2.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:fe61cadf4f0449de64fb6911bca381f1a4c1f04cce3027a0d12ebaba626f97cd", size = 19807725 }, + { url = "https://files.pythonhosted.org/packages/57/51/61e667252e9d9ea08d28f44b122fd625d5392b6897f35287acc10f2b117b/pillow-11.2.0-cp313-cp313-win32.whl", hash = "sha256:25e533264bec3ca5dc6e1587810dce1c2dda0b9d63ed4e27fa72092cb351cd55", size = 2332160 }, + { url = "https://files.pythonhosted.org/packages/e1/c2/1b4b80569fc8d8ce96fe536990c05ddff463220b24a2b15cb1a0f1ede740/pillow-11.2.0-cp313-cp313-win_amd64.whl", hash = "sha256:9565b70771a38b096b0f78d830be5338ca9a0b810ea6cbfab54c64f0266f9c72", size = 13807002 }, + { url = "https://files.pythonhosted.org/packages/77/4a/887e419baa6ca3f7b1a783d0d63a6c619eb397104251e48fd68bb08c585f/pillow-11.2.0-cp313-cp313-win_arm64.whl", hash = "sha256:6fa29d8fcaf8a830ced88f390baffe32ae1ba41873a3f6c8946213d067db7ae0", size = 2414656 }, + { url = "https://files.pythonhosted.org/packages/96/27/2b8e9214a5823dacb3449f7eae04b90357573c0f874bf845cc342dacd0df/pillow-11.2.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:07cd635650ddd10be04148e7f2e895afa240d0ea5e810cd10f650adba13f5f93", size = 11420605 }, + { url = "https://files.pythonhosted.org/packages/85/ff/e659ec2ac852204af59c3acd3bf934fd90288d46f26cbab5fe3b8d52f0dc/pillow-11.2.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:b653372f6b3a7df74cd52abc8c400670ab08dd4473317508ed45668e87df0284", size = 8926094 }, + { url = "https://files.pythonhosted.org/packages/54/5f/69a1f4c324ec2129d72cd8d993b38751247ebf64d0f4bab1eb102469c891/pillow-11.2.0-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:63e0395313bc32de78309e24a298dc4986f8a93fc917032412f510a272ee9f25", size = 17130485 }, + { url = "https://files.pythonhosted.org/packages/f2/e3/5a068cd8b36a947657eef37f95a7c30fc3fa25c5bd37d0636ae8e3ce0cf2/pillow-11.2.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:47dcb3289613e256c2419242f6d92b1c9ce28368bd337f6a0cf50cddfb8cc69a", size = 18967506 }, + { url = "https://files.pythonhosted.org/packages/5e/b0/ee2756e831b2e748de9d842c955515ac1c2f3eb2a0f459f900aed392e278/pillow-11.2.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:a32b243b37c060884bd04f6bb709a22d86ec05662374076e6a35c07ac0361477", size = 17659106 }, + { url = "https://files.pythonhosted.org/packages/84/62/a3c8363ddf75d30a092b298be4d31adaabdf37e29ef9cb7d66b044925605/pillow-11.2.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:4122bb11e4d238ed28ecff24863c620aac22732bc102ab9550e99f6fd6eaf869", size = 19656789 }, + { url = "https://files.pythonhosted.org/packages/6f/d1/bed738848132ddd1ca190044003adc73c2ae29542934dba2fa78a7a840ea/pillow-11.2.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:be7a73fb992a9cb8e4362220037ea10d3eb52d0804220ca98de4f7d24c1f36c9", size = 18069453 }, + { url = "https://files.pythonhosted.org/packages/52/88/435ac4b0014631a06766f01a6b9d3c912d8340185787451e12b9e20966b3/pillow-11.2.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:0d3408815fc6ab70283916e0b058e7a4bbacdd7b5bb71ef81a9a507df4580b9a", size = 19834284 }, + { url = "https://files.pythonhosted.org/packages/9d/92/6b497fefb3d976560af2fb935546428bd3bd159ec51c3e0ab7085dd51865/pillow-11.2.0-cp313-cp313t-win32.whl", hash = "sha256:491e11b37e322fc6e80b485f99203e6c4ed69ea170eb6d25e9cb9eb0b92db7e5", size = 2335712 }, + { url = "https://files.pythonhosted.org/packages/78/aa/b26ac0fa7e7445a80c19f1141999236cbcdd7b2fccf13e086610165891fa/pillow-11.2.0-cp313-cp313t-win_amd64.whl", hash = "sha256:4202372ccb549a3f12de2cebbafa82b0a3d8e2cb5569fa4b698e7da6b6521687", size = 13812562 }, + { url = "https://files.pythonhosted.org/packages/f4/77/5db48884e61bd14398a8a3d9e4c254e1e1714aff58bdabe1275f393daef9/pillow-11.2.0-cp313-cp313t-win_arm64.whl", hash = "sha256:a9cd300d223efadd1e540521bae1fcdab406ef6c5f2ca6e46370f5671b607f26", size = 2416602 }, +] + +[[package]] +name = "platformdirs" +version = "4.3.7" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/b6/2d/7d512a3913d60623e7eb945c6d1b4f0bddf1d0b7ada5225274c87e5b53d1/platformdirs-4.3.7.tar.gz", hash = "sha256:eb437d586b6a0986388f0d6f74aa0cde27b48d0e3d66843640bfb6bdcdb6e351", size = 21291 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/6d/45/59578566b3275b8fd9157885918fcd0c4d74162928a5310926887b856a51/platformdirs-4.3.7-py3-none-any.whl", hash = "sha256:a03875334331946f13c549dbd8f4bac7a13a50a895a0eb1e8c6a8ace80d40a94", size = 18499 }, +] + +[[package]] +name = "pre-commit" +version = "4.2.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "cfgv" }, + { name = "identify" }, + { name = "nodeenv" }, + { name = "pyyaml" }, + { name = "virtualenv" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/08/39/679ca9b26c7bb2999ff122d50faa301e49af82ca9c066ec061cfbc0c6784/pre_commit-4.2.0.tar.gz", hash = "sha256:601283b9757afd87d40c4c4a9b2b5de9637a8ea02eaff7adc2d0fb4e04841146", size = 193424 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/88/74/a88bf1b1efeae488a0c0b7bdf71429c313722d1fc0f377537fbe554e6180/pre_commit-4.2.0-py2.py3-none-any.whl", hash = "sha256:a009ca7205f1eb497d10b845e52c838a98b6cdd2102a6c8e4540e94ee75c58bd", size = 220707 }, +] + +[[package]] +name = "prometheus-client" +version = "0.21.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/62/14/7d0f567991f3a9af8d1cd4f619040c93b68f09a02b6d0b6ab1b2d1ded5fe/prometheus_client-0.21.1.tar.gz", hash = "sha256:252505a722ac04b0456be05c05f75f45d760c2911ffc45f2a06bcaed9f3ae3fb", size = 78551 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ff/c2/ab7d37426c179ceb9aeb109a85cda8948bb269b7561a0be870cc656eefe4/prometheus_client-0.21.1-py3-none-any.whl", hash = "sha256:594b45c410d6f4f8888940fe80b5cc2521b305a1fafe1c58609ef715a001f301", size = 54682 }, +] + +[[package]] +name = "prompt-toolkit" +version = "3.0.50" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "wcwidth" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/a1/e1/bd15cb8ffdcfeeb2bdc215de3c3cffca11408d829e4b8416dcfe71ba8854/prompt_toolkit-3.0.50.tar.gz", hash = "sha256:544748f3860a2623ca5cd6d2795e7a14f3d0e1c3c9728359013f79877fc89bab", size = 429087 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e4/ea/d836f008d33151c7a1f62caf3d8dd782e4d15f6a43897f64480c2b8de2ad/prompt_toolkit-3.0.50-py3-none-any.whl", hash = "sha256:9b6427eb19e479d98acff65196a307c555eb567989e6d88ebbb1b509d9779198", size = 387816 }, +] + +[[package]] +name = "psutil" +version = "7.0.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/2a/80/336820c1ad9286a4ded7e845b2eccfcb27851ab8ac6abece774a6ff4d3de/psutil-7.0.0.tar.gz", hash = "sha256:7be9c3eba38beccb6495ea33afd982a44074b78f28c434a1f51cc07fd315c456", size = 497003 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ed/e6/2d26234410f8b8abdbf891c9da62bee396583f713fb9f3325a4760875d22/psutil-7.0.0-cp36-abi3-macosx_10_9_x86_64.whl", hash = "sha256:101d71dc322e3cffd7cea0650b09b3d08b8e7c4109dd6809fe452dfd00e58b25", size = 238051 }, + { url = "https://files.pythonhosted.org/packages/04/8b/30f930733afe425e3cbfc0e1468a30a18942350c1a8816acfade80c005c4/psutil-7.0.0-cp36-abi3-macosx_11_0_arm64.whl", hash = "sha256:39db632f6bb862eeccf56660871433e111b6ea58f2caea825571951d4b6aa3da", size = 239535 }, + { url = "https://files.pythonhosted.org/packages/2a/ed/d362e84620dd22876b55389248e522338ed1bf134a5edd3b8231d7207f6d/psutil-7.0.0-cp36-abi3-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1fcee592b4c6f146991ca55919ea3d1f8926497a713ed7faaf8225e174581e91", size = 275004 }, + { url = "https://files.pythonhosted.org/packages/bf/b9/b0eb3f3cbcb734d930fdf839431606844a825b23eaf9a6ab371edac8162c/psutil-7.0.0-cp36-abi3-manylinux_2_12_x86_64.manylinux2010_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4b1388a4f6875d7e2aff5c4ca1cc16c545ed41dd8bb596cefea80111db353a34", size = 277986 }, + { url = "https://files.pythonhosted.org/packages/eb/a2/709e0fe2f093556c17fbafda93ac032257242cabcc7ff3369e2cb76a97aa/psutil-7.0.0-cp36-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a5f098451abc2828f7dc6b58d44b532b22f2088f4999a937557b603ce72b1993", size = 279544 }, + { url = "https://files.pythonhosted.org/packages/50/e6/eecf58810b9d12e6427369784efe814a1eec0f492084ce8eb8f4d89d6d61/psutil-7.0.0-cp37-abi3-win32.whl", hash = "sha256:ba3fcef7523064a6c9da440fc4d6bd07da93ac726b5733c29027d7dc95b39d99", size = 241053 }, + { url = "https://files.pythonhosted.org/packages/50/1b/6921afe68c74868b4c9fa424dad3be35b095e16687989ebbb50ce4fceb7c/psutil-7.0.0-cp37-abi3-win_amd64.whl", hash = "sha256:4cf3d4eb1aa9b348dec30105c55cd9b7d4629285735a102beb4441e38db90553", size = 244885 }, +] + +[[package]] +name = "ptyprocess" +version = "0.7.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/20/e5/16ff212c1e452235a90aeb09066144d0c5a6a8c0834397e03f5224495c4e/ptyprocess-0.7.0.tar.gz", hash = "sha256:5c5d0a3b48ceee0b48485e0c26037c0acd7d29765ca3fbb5cb3831d347423220", size = 70762 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/22/a6/858897256d0deac81a172289110f31629fc4cee19b6f01283303e18c8db3/ptyprocess-0.7.0-py2.py3-none-any.whl", hash = "sha256:4b41f3967fce3af57cc7e94b888626c18bf37a083e3651ca8feeb66d492fef35", size = 13993 }, +] + +[[package]] +name = "pure-eval" +version = "0.2.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/cd/05/0a34433a064256a578f1783a10da6df098ceaa4a57bbeaa96a6c0352786b/pure_eval-0.2.3.tar.gz", hash = "sha256:5f4e983f40564c576c7c8635ae88db5956bb2229d7e9237d03b3c0b0190eaf42", size = 19752 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/8e/37/efad0257dc6e593a18957422533ff0f87ede7c9c6ea010a2177d738fb82f/pure_eval-0.2.3-py3-none-any.whl", hash = "sha256:1db8e35b67b3d218d818ae653e27f06c3aa420901fa7b081ca98cbedc874e0d0", size = 11842 }, +] + +[[package]] +name = "pycparser" +version = "2.22" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/1d/b2/31537cf4b1ca988837256c910a668b553fceb8f069bedc4b1c826024b52c/pycparser-2.22.tar.gz", hash = "sha256:491c8be9c040f5390f5bf44a5b07752bd07f56edf992381b05c701439eec10f6", size = 172736 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/13/a3/a812df4e2dd5696d1f351d58b8fe16a405b234ad2886a0dab9183fb78109/pycparser-2.22-py3-none-any.whl", hash = "sha256:c3702b6d3dd8c7abc1afa565d7e63d53a1d0bd86cdc24edd75470f4de499cfcc", size = 117552 }, +] + +[[package]] +name = "pygments" +version = "2.19.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/7c/2d/c3338d48ea6cc0feb8446d8e6937e1408088a72a39937982cc6111d17f84/pygments-2.19.1.tar.gz", hash = "sha256:61c16d2a8576dc0649d9f39e089b5f02bcd27fba10d8fb4dcc28173f7a45151f", size = 4968581 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/8a/0b/9fcc47d19c48b59121088dd6da2488a49d5f72dacf8262e2790a1d2c7d15/pygments-2.19.1-py3-none-any.whl", hash = "sha256:9ea1544ad55cecf4b8242fab6dd35a93bbce657034b0611ee383099054ab6d8c", size = 1225293 }, +] + +[[package]] +name = "pyparsing" +version = "3.2.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/bb/22/f1129e69d94ffff626bdb5c835506b3a5b4f3d070f17ea295e12c2c6f60f/pyparsing-3.2.3.tar.gz", hash = "sha256:b9c13f1ab8b3b542f72e28f634bad4de758ab3ce4546e4301970ad6fa77c38be", size = 1088608 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/05/e7/df2285f3d08fee213f2d041540fa4fc9ca6c2d44cf36d3a035bf2a8d2bcc/pyparsing-3.2.3-py3-none-any.whl", hash = "sha256:a749938e02d6fd0b59b356ca504a24982314bb090c383e3cf201c95ef7e2bfcf", size = 111120 }, +] + +[[package]] +name = "python-dateutil" +version = "2.9.0.post0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "six" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/66/c0/0c8b6ad9f17a802ee498c46e004a0eb49bc148f2fd230864601a86dcf6db/python-dateutil-2.9.0.post0.tar.gz", hash = "sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3", size = 342432 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ec/57/56b9bcc3c9c6a792fcbaf139543cee77261f3651ca9da0c93f5c1221264b/python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427", size = 229892 }, +] + +[[package]] +name = "python-json-logger" +version = "3.3.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/9e/de/d3144a0bceede957f961e975f3752760fbe390d57fbe194baf709d8f1f7b/python_json_logger-3.3.0.tar.gz", hash = "sha256:12b7e74b17775e7d565129296105bbe3910842d9d0eb083fc83a6a617aa8df84", size = 16642 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/08/20/0f2523b9e50a8052bc6a8b732dfc8568abbdc42010aef03a2d750bdab3b2/python_json_logger-3.3.0-py3-none-any.whl", hash = "sha256:dd980fae8cffb24c13caf6e158d3d61c0d6d22342f932cb6e9deedab3d35eec7", size = 15163 }, +] + +[[package]] +name = "pytz" +version = "2025.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/f8/bf/abbd3cdfb8fbc7fb3d4d38d320f2441b1e7cbe29be4f23797b4a2b5d8aac/pytz-2025.2.tar.gz", hash = "sha256:360b9e3dbb49a209c21ad61809c7fb453643e048b38924c765813546746e81c3", size = 320884 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/81/c4/34e93fe5f5429d7570ec1fa436f1986fb1f00c3e0f43a589fe2bbcd22c3f/pytz-2025.2-py2.py3-none-any.whl", hash = "sha256:5ddf76296dd8c44c26eb8f4b6f35488f3ccbf6fbbd7adee0b7262d43f0ec2f00", size = 509225 }, +] + +[[package]] +name = "pywin32" +version = "310" +source = { registry = "https://pypi.org/simple" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/6b/ec/4fdbe47932f671d6e348474ea35ed94227fb5df56a7c30cbbb42cd396ed0/pywin32-310-cp312-cp312-win32.whl", hash = "sha256:8a75a5cc3893e83a108c05d82198880704c44bbaee4d06e442e471d3c9ea4f3d", size = 8796239 }, + { url = "https://files.pythonhosted.org/packages/e3/e5/b0627f8bb84e06991bea89ad8153a9e50ace40b2e1195d68e9dff6b03d0f/pywin32-310-cp312-cp312-win_amd64.whl", hash = "sha256:bf5c397c9a9a19a6f62f3fb821fbf36cac08f03770056711f765ec1503972060", size = 9503839 }, + { url = "https://files.pythonhosted.org/packages/1f/32/9ccf53748df72301a89713936645a664ec001abd35ecc8578beda593d37d/pywin32-310-cp312-cp312-win_arm64.whl", hash = "sha256:2349cc906eae872d0663d4d6290d13b90621eaf78964bb1578632ff20e152966", size = 8459470 }, + { url = "https://files.pythonhosted.org/packages/1c/09/9c1b978ffc4ae53999e89c19c77ba882d9fce476729f23ef55211ea1c034/pywin32-310-cp313-cp313-win32.whl", hash = "sha256:5d241a659c496ada3253cd01cfaa779b048e90ce4b2b38cd44168ad555ce74ab", size = 8794384 }, + { url = "https://files.pythonhosted.org/packages/45/3c/b4640f740ffebadd5d34df35fecba0e1cfef8fde9f3e594df91c28ad9b50/pywin32-310-cp313-cp313-win_amd64.whl", hash = "sha256:667827eb3a90208ddbdcc9e860c81bde63a135710e21e4cb3348968e4bd5249e", size = 9503039 }, + { url = "https://files.pythonhosted.org/packages/b4/f4/f785020090fb050e7fb6d34b780f2231f302609dc964672f72bfaeb59a28/pywin32-310-cp313-cp313-win_arm64.whl", hash = "sha256:e308f831de771482b7cf692a1f308f8fca701b2d8f9dde6cc440c7da17e47b33", size = 8458152 }, +] + +[[package]] +name = "pywinpty" +version = "2.0.15" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/2d/7c/917f9c4681bb8d34bfbe0b79d36bbcd902651aeab48790df3d30ba0202fb/pywinpty-2.0.15.tar.gz", hash = "sha256:312cf39153a8736c617d45ce8b6ad6cd2107de121df91c455b10ce6bba7a39b2", size = 29017 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/88/e5/9714def18c3a411809771a3fbcec70bffa764b9675afb00048a620fca604/pywinpty-2.0.15-cp312-cp312-win_amd64.whl", hash = "sha256:83a8f20b430bbc5d8957249f875341a60219a4e971580f2ba694fbfb54a45ebc", size = 1405243 }, + { url = "https://files.pythonhosted.org/packages/fb/16/2ab7b3b7f55f3c6929e5f629e1a68362981e4e5fed592a2ed1cb4b4914a5/pywinpty-2.0.15-cp313-cp313-win_amd64.whl", hash = "sha256:ab5920877dd632c124b4ed17bc6dd6ef3b9f86cd492b963ffdb1a67b85b0f408", size = 1405020 }, + { url = "https://files.pythonhosted.org/packages/7c/16/edef3515dd2030db2795dbfbe392232c7a0f3dc41b98e92b38b42ba497c7/pywinpty-2.0.15-cp313-cp313t-win_amd64.whl", hash = "sha256:a4560ad8c01e537708d2790dbe7da7d986791de805d89dd0d3697ca59e9e4901", size = 1404151 }, +] + +[[package]] +name = "pyyaml" +version = "6.0.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/54/ed/79a089b6be93607fa5cdaedf301d7dfb23af5f25c398d5ead2525b063e17/pyyaml-6.0.2.tar.gz", hash = "sha256:d584d9ec91ad65861cc08d42e834324ef890a082e591037abe114850ff7bbc3e", size = 130631 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/86/0c/c581167fc46d6d6d7ddcfb8c843a4de25bdd27e4466938109ca68492292c/PyYAML-6.0.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:c70c95198c015b85feafc136515252a261a84561b7b1d51e3384e0655ddf25ab", size = 183873 }, + { url = "https://files.pythonhosted.org/packages/a8/0c/38374f5bb272c051e2a69281d71cba6fdb983413e6758b84482905e29a5d/PyYAML-6.0.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:ce826d6ef20b1bc864f0a68340c8b3287705cae2f8b4b1d932177dcc76721725", size = 173302 }, + { url = "https://files.pythonhosted.org/packages/c3/93/9916574aa8c00aa06bbac729972eb1071d002b8e158bd0e83a3b9a20a1f7/PyYAML-6.0.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1f71ea527786de97d1a0cc0eacd1defc0985dcf6b3f17bb77dcfc8c34bec4dc5", size = 739154 }, + { url = "https://files.pythonhosted.org/packages/95/0f/b8938f1cbd09739c6da569d172531567dbcc9789e0029aa070856f123984/PyYAML-6.0.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9b22676e8097e9e22e36d6b7bda33190d0d400f345f23d4065d48f4ca7ae0425", size = 766223 }, + { url = "https://files.pythonhosted.org/packages/b9/2b/614b4752f2e127db5cc206abc23a8c19678e92b23c3db30fc86ab731d3bd/PyYAML-6.0.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:80bab7bfc629882493af4aa31a4cfa43a4c57c83813253626916b8c7ada83476", size = 767542 }, + { url = "https://files.pythonhosted.org/packages/d4/00/dd137d5bcc7efea1836d6264f049359861cf548469d18da90cd8216cf05f/PyYAML-6.0.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:0833f8694549e586547b576dcfaba4a6b55b9e96098b36cdc7ebefe667dfed48", size = 731164 }, + { url = "https://files.pythonhosted.org/packages/c9/1f/4f998c900485e5c0ef43838363ba4a9723ac0ad73a9dc42068b12aaba4e4/PyYAML-6.0.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8b9c7197f7cb2738065c481a0461e50ad02f18c78cd75775628afb4d7137fb3b", size = 756611 }, + { url = "https://files.pythonhosted.org/packages/df/d1/f5a275fdb252768b7a11ec63585bc38d0e87c9e05668a139fea92b80634c/PyYAML-6.0.2-cp312-cp312-win32.whl", hash = "sha256:ef6107725bd54b262d6dedcc2af448a266975032bc85ef0172c5f059da6325b4", size = 140591 }, + { url = "https://files.pythonhosted.org/packages/0c/e8/4f648c598b17c3d06e8753d7d13d57542b30d56e6c2dedf9c331ae56312e/PyYAML-6.0.2-cp312-cp312-win_amd64.whl", hash = "sha256:7e7401d0de89a9a855c839bc697c079a4af81cf878373abd7dc625847d25cbd8", size = 156338 }, + { url = "https://files.pythonhosted.org/packages/ef/e3/3af305b830494fa85d95f6d95ef7fa73f2ee1cc8ef5b495c7c3269fb835f/PyYAML-6.0.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:efdca5630322a10774e8e98e1af481aad470dd62c3170801852d752aa7a783ba", size = 181309 }, + { url = "https://files.pythonhosted.org/packages/45/9f/3b1c20a0b7a3200524eb0076cc027a970d320bd3a6592873c85c92a08731/PyYAML-6.0.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:50187695423ffe49e2deacb8cd10510bc361faac997de9efef88badc3bb9e2d1", size = 171679 }, + { url = "https://files.pythonhosted.org/packages/7c/9a/337322f27005c33bcb656c655fa78325b730324c78620e8328ae28b64d0c/PyYAML-6.0.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0ffe8360bab4910ef1b9e87fb812d8bc0a308b0d0eef8c8f44e0254ab3b07133", size = 733428 }, + { url = "https://files.pythonhosted.org/packages/a3/69/864fbe19e6c18ea3cc196cbe5d392175b4cf3d5d0ac1403ec3f2d237ebb5/PyYAML-6.0.2-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:17e311b6c678207928d649faa7cb0d7b4c26a0ba73d41e99c4fff6b6c3276484", size = 763361 }, + { url = "https://files.pythonhosted.org/packages/04/24/b7721e4845c2f162d26f50521b825fb061bc0a5afcf9a386840f23ea19fa/PyYAML-6.0.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:70b189594dbe54f75ab3a1acec5f1e3faa7e8cf2f1e08d9b561cb41b845f69d5", size = 759523 }, + { url = "https://files.pythonhosted.org/packages/2b/b2/e3234f59ba06559c6ff63c4e10baea10e5e7df868092bf9ab40e5b9c56b6/PyYAML-6.0.2-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:41e4e3953a79407c794916fa277a82531dd93aad34e29c2a514c2c0c5fe971cc", size = 726660 }, + { url = "https://files.pythonhosted.org/packages/fe/0f/25911a9f080464c59fab9027482f822b86bf0608957a5fcc6eaac85aa515/PyYAML-6.0.2-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:68ccc6023a3400877818152ad9a1033e3db8625d899c72eacb5a668902e4d652", size = 751597 }, + { url = "https://files.pythonhosted.org/packages/14/0d/e2c3b43bbce3cf6bd97c840b46088a3031085179e596d4929729d8d68270/PyYAML-6.0.2-cp313-cp313-win32.whl", hash = "sha256:bc2fa7c6b47d6bc618dd7fb02ef6fdedb1090ec036abab80d4681424b84c1183", size = 140527 }, + { url = "https://files.pythonhosted.org/packages/fa/de/02b54f42487e3d3c6efb3f89428677074ca7bf43aae402517bc7cca949f3/PyYAML-6.0.2-cp313-cp313-win_amd64.whl", hash = "sha256:8388ee1976c416731879ac16da0aff3f63b286ffdd57cdeb95f3f2e085687563", size = 156446 }, +] + +[[package]] +name = "pyzmq" +version = "26.3.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "cffi", marker = "implementation_name == 'pypy'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/3a/ed/c3876f3b3e8beba336214ce44e1efa1792dd537027cef24192ac2b077d7c/pyzmq-26.3.0.tar.gz", hash = "sha256:f1cd68b8236faab78138a8fc703f7ca0ad431b17a3fcac696358600d4e6243b3", size = 276733 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/7b/03/7170c3814bb9106c1bca67700c731aaf1cd990fd2f0097c754acb600330e/pyzmq-26.3.0-cp312-cp312-macosx_10_15_universal2.whl", hash = "sha256:c80653332c6136da7f4d4e143975e74ac0fa14f851f716d90583bc19e8945cea", size = 1348354 }, + { url = "https://files.pythonhosted.org/packages/74/f3/908b17f9111cdc764aef1de3d36026a2984c46ed90c3c2c85f28b66142f0/pyzmq-26.3.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6e317ee1d4528a03506cb1c282cd9db73660a35b3564096de37de7350e7d87a7", size = 671056 }, + { url = "https://files.pythonhosted.org/packages/02/ad/afcb8484b65ceacd1609f709c2caeed31bd6c49261a7507cd5c175cc105f/pyzmq-26.3.0-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:943a22ebb3daacb45f76a9bcca9a7b74e7d94608c0c0505da30af900b998ca8d", size = 908597 }, + { url = "https://files.pythonhosted.org/packages/a1/b5/4eeeae0aaaa6ef0c74cfa8b2273b53382bd858df6d99485f2fc8211e7002/pyzmq-26.3.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3fc9e71490d989144981ea21ef4fdfaa7b6aa84aff9632d91c736441ce2f6b00", size = 865260 }, + { url = "https://files.pythonhosted.org/packages/74/6a/63db856e93e3a3c3dc98a1de28a902cf1b21c7b0d3856cd5931d7cfd30af/pyzmq-26.3.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:e281a8071a06888575a4eb523c4deeefdcd2f5fe4a2d47e02ac8bf3a5b49f695", size = 859916 }, + { url = "https://files.pythonhosted.org/packages/e1/ce/d522c9b46ee3746d4b98c81969c568c2c6296e931a65f2c87104b645654c/pyzmq-26.3.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:be77efd735bb1064605be8dec6e721141c1421ef0b115ef54e493a64e50e9a52", size = 1201368 }, + { url = "https://files.pythonhosted.org/packages/5a/56/29dcd3647a39e933eb489fda261a1e2700a59d4a9432889a85166e15651c/pyzmq-26.3.0-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:7a4ac2ffa34f1212dd586af90f4ba894e424f0cabb3a49cdcff944925640f6ac", size = 1512663 }, + { url = "https://files.pythonhosted.org/packages/6b/36/7c570698127a43398ed1b1832dada59496e633115016addbce5eda9938a6/pyzmq-26.3.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:ba698c7c252af83b6bba9775035263f0df5f807f0404019916d4b71af8161f66", size = 1411693 }, + { url = "https://files.pythonhosted.org/packages/de/54/51d39bef85a7cdbca36227f7defdbfcdc5011b8361a3bfc0e8df431f5a5d/pyzmq-26.3.0-cp312-cp312-win32.whl", hash = "sha256:214038aaa88e801e54c2ef0cfdb2e6df27eb05f67b477380a452b595c5ecfa37", size = 581244 }, + { url = "https://files.pythonhosted.org/packages/f2/6a/9512b11a1d0c5648534f03d5ab0c3222f55dc9c192029c1cb00a0ca044e2/pyzmq-26.3.0-cp312-cp312-win_amd64.whl", hash = "sha256:bad7fe0372e505442482ca3ccbc0d6f38dae81b1650f57a0aa6bbee18e7df495", size = 643559 }, + { url = "https://files.pythonhosted.org/packages/27/9f/faf5c9cf91b61eeb82a5e919d024d3ac28a795c92cce817be264ccd757d3/pyzmq-26.3.0-cp312-cp312-win_arm64.whl", hash = "sha256:b7b578d604e79e99aa39495becea013fd043fa9f36e4b490efa951f3d847a24d", size = 557664 }, + { url = "https://files.pythonhosted.org/packages/37/16/97b8c5107bfccb39120e611671a452c9ff6e8626fb3f8d4c15afd652b6ae/pyzmq-26.3.0-cp313-cp313-macosx_10_15_universal2.whl", hash = "sha256:fa85953df84beb7b8b73cb3ec3f5d92b62687a09a8e71525c6734e020edf56fd", size = 1345691 }, + { url = "https://files.pythonhosted.org/packages/a5/61/d5572d95040c0bb5b31eed5b23f3f0f992d94e4e0de0cea62e3c7f3a85c1/pyzmq-26.3.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:209d09f0ab6ddbcebe64630d1e6ca940687e736f443c265ae15bc4bfad833597", size = 670622 }, + { url = "https://files.pythonhosted.org/packages/1c/0c/f0235d27388aacf4ed8bcc1d574f6f2f629da0a20610faa0a8e9d363c2b0/pyzmq-26.3.0-cp313-cp313-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d35cc1086f1d4f907df85c6cceb2245cb39a04f69c3f375993363216134d76d4", size = 908683 }, + { url = "https://files.pythonhosted.org/packages/cb/52/664828f9586c396b857eec088d208230463e3dc991a24df6adbad98fbaa3/pyzmq-26.3.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b380e9087078ba91e45fb18cdd0c25275ffaa045cf63c947be0ddae6186bc9d9", size = 865212 }, + { url = "https://files.pythonhosted.org/packages/2b/14/213b2967030b7d7aecc32dd453830f98799b3cbf2b10a40232e9f22a6520/pyzmq-26.3.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:6d64e74143587efe7c9522bb74d1448128fdf9897cc9b6d8b9927490922fd558", size = 860068 }, + { url = "https://files.pythonhosted.org/packages/aa/e5/ff50c8fade69d1c0469652832c626d1910668697642c10cb0e1b6183ef9a/pyzmq-26.3.0-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:efba4f53ac7752eea6d8ca38a4ddac579e6e742fba78d1e99c12c95cd2acfc64", size = 1201303 }, + { url = "https://files.pythonhosted.org/packages/9a/e2/fff5e483be95ccc11a05781323e001e63ec15daec1d0f6f08de72ca534db/pyzmq-26.3.0-cp313-cp313-musllinux_1_1_i686.whl", hash = "sha256:9b0137a1c40da3b7989839f9b78a44de642cdd1ce20dcef341de174c8d04aa53", size = 1512892 }, + { url = "https://files.pythonhosted.org/packages/21/75/cc44d276e43136e5692e487c3c019f816e11ed445261e434217c28cc98c4/pyzmq-26.3.0-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:a995404bd3982c089e57b428c74edd5bfc3b0616b3dbcd6a8e270f1ee2110f36", size = 1411736 }, + { url = "https://files.pythonhosted.org/packages/ee/1c/d070cbc9a7961fe772641c51bb3798d88cb1f8e20ca718407363462624cf/pyzmq-26.3.0-cp313-cp313-win32.whl", hash = "sha256:240b1634b9e530ef6a277d95cbca1a6922f44dfddc5f0a3cd6c722a8de867f14", size = 581214 }, + { url = "https://files.pythonhosted.org/packages/38/d3/91082f1151ff5b54e0bed40eb1a26f418530ab07ecaec4dbb83e3d9fa9a9/pyzmq-26.3.0-cp313-cp313-win_amd64.whl", hash = "sha256:fe67291775ea4c2883764ba467eb389c29c308c56b86c1e19e49c9e1ed0cbeca", size = 643412 }, + { url = "https://files.pythonhosted.org/packages/e0/cf/dabe68dfdf3e67bea6152eeec4b251cf899ee5b853cfb5c97e4719f9e6e9/pyzmq-26.3.0-cp313-cp313-win_arm64.whl", hash = "sha256:73ca9ae9a9011b714cf7650450cd9c8b61a135180b708904f1f0a05004543dce", size = 557444 }, + { url = "https://files.pythonhosted.org/packages/c0/56/e7576ac71c1566da4f4ec586351462a2bb202143fb074bf56df8fe85dcc3/pyzmq-26.3.0-cp313-cp313t-macosx_10_15_universal2.whl", hash = "sha256:fea7efbd7e49af9d7e5ed6c506dfc7de3d1a628790bd3a35fd0e3c904dc7d464", size = 1340288 }, + { url = "https://files.pythonhosted.org/packages/f1/ab/0bca97e94d420b5908968bc479e51c3686a9f80d8893450eefcd673b1b1d/pyzmq-26.3.0-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c4430c7cba23bb0e2ee203eee7851c1654167d956fc6d4b3a87909ccaf3c5825", size = 662462 }, + { url = "https://files.pythonhosted.org/packages/ee/be/99e89b55863808da322ac3ab52d8e135dcf2241094aaa468bfe2923d5194/pyzmq-26.3.0-cp313-cp313t-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:016d89bee8c7d566fad75516b4e53ec7c81018c062d4c51cd061badf9539be52", size = 896464 }, + { url = "https://files.pythonhosted.org/packages/38/d4/a4be06a313c8d6a5fe1d92975db30aca85f502e867fca392532e06a28c3c/pyzmq-26.3.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:04bfe59852d76d56736bfd10ac1d49d421ab8ed11030b4a0332900691507f557", size = 853432 }, + { url = "https://files.pythonhosted.org/packages/12/e6/e608b4c34106bbf5b3b382662ea90a43b2e23df0aa9c1f0fd4e21168d523/pyzmq-26.3.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:1fe05bd0d633a0f672bb28cb8b4743358d196792e1caf04973b7898a0d70b046", size = 845884 }, + { url = "https://files.pythonhosted.org/packages/c3/a9/d5e6355308ba529d9cd3576ee8bb3b2e2b726571748f515fbb8559401f5b/pyzmq-26.3.0-cp313-cp313t-musllinux_1_1_aarch64.whl", hash = "sha256:2aa1a9f236d5b835fb8642f27de95f9edcfd276c4bc1b6ffc84f27c6fb2e2981", size = 1191454 }, + { url = "https://files.pythonhosted.org/packages/6a/9a/a21dc6c73ac242e425709c1e0049368d8f5db5de7c1102a45f93f5c492b3/pyzmq-26.3.0-cp313-cp313t-musllinux_1_1_i686.whl", hash = "sha256:21399b31753bf321043ea60c360ed5052cc7be20739785b1dff1820f819e35b3", size = 1500397 }, + { url = "https://files.pythonhosted.org/packages/87/88/0236056156da0278c9ca2e2562463643597808b5bbd6c34009ba217e7e92/pyzmq-26.3.0-cp313-cp313t-musllinux_1_1_x86_64.whl", hash = "sha256:d015efcd96aca8882057e7e6f06224f79eecd22cad193d3e6a0a91ec67590d1f", size = 1398401 }, +] + +[[package]] +name = "referencing" +version = "0.36.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "attrs" }, + { name = "rpds-py" }, + { name = "typing-extensions", marker = "python_full_version < '3.13'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/2f/db/98b5c277be99dd18bfd91dd04e1b759cad18d1a338188c936e92f921c7e2/referencing-0.36.2.tar.gz", hash = "sha256:df2e89862cd09deabbdba16944cc3f10feb6b3e6f18e902f7cc25609a34775aa", size = 74744 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c1/b1/3baf80dc6d2b7bc27a95a67752d0208e410351e3feb4eb78de5f77454d8d/referencing-0.36.2-py3-none-any.whl", hash = "sha256:e8699adbbf8b5c7de96d8ffa0eb5c158b3beafce084968e2ea8bb08c6794dcd0", size = 26775 }, +] + +[[package]] +name = "requests" +version = "2.32.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "certifi" }, + { name = "charset-normalizer" }, + { name = "idna" }, + { name = "urllib3" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/63/70/2bf7780ad2d390a8d301ad0b550f1581eadbd9a20f896afe06353c2a2913/requests-2.32.3.tar.gz", hash = "sha256:55365417734eb18255590a9ff9eb97e9e1da868d4ccd6402399eaf68af20a760", size = 131218 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f9/9b/335f9764261e915ed497fcdeb11df5dfd6f7bf257d4a6a2a686d80da4d54/requests-2.32.3-py3-none-any.whl", hash = "sha256:70761cfe03c773ceb22aa2f671b4757976145175cdfca038c02654d061d6dcc6", size = 64928 }, +] + +[[package]] +name = "rfc3339-validator" +version = "0.1.4" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "six" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/28/ea/a9387748e2d111c3c2b275ba970b735e04e15cdb1eb30693b6b5708c4dbd/rfc3339_validator-0.1.4.tar.gz", hash = "sha256:138a2abdf93304ad60530167e51d2dfb9549521a836871b88d7f4695d0022f6b", size = 5513 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/7b/44/4e421b96b67b2daff264473f7465db72fbdf36a07e05494f50300cc7b0c6/rfc3339_validator-0.1.4-py2.py3-none-any.whl", hash = "sha256:24f6ec1eda14ef823da9e36ec7113124b39c04d50a4d3d3a3c2859577e7791fa", size = 3490 }, +] + +[[package]] +name = "rfc3986-validator" +version = "0.1.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/da/88/f270de456dd7d11dcc808abfa291ecdd3f45ff44e3b549ffa01b126464d0/rfc3986_validator-0.1.1.tar.gz", hash = "sha256:3d44bde7921b3b9ec3ae4e3adca370438eccebc676456449b145d533b240d055", size = 6760 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/9e/51/17023c0f8f1869d8806b979a2bffa3f861f26a3f1a66b094288323fba52f/rfc3986_validator-0.1.1-py2.py3-none-any.whl", hash = "sha256:2f235c432ef459970b4306369336b9d5dbdda31b510ca1e327636e01f528bfa9", size = 4242 }, +] + +[[package]] +name = "rpds-py" +version = "0.24.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/0b/b3/52b213298a0ba7097c7ea96bee95e1947aa84cc816d48cebb539770cdf41/rpds_py-0.24.0.tar.gz", hash = "sha256:772cc1b2cd963e7e17e6cc55fe0371fb9c704d63e44cacec7b9b7f523b78919e", size = 26863 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/1a/e0/1c55f4a3be5f1ca1a4fd1f3ff1504a1478c1ed48d84de24574c4fa87e921/rpds_py-0.24.0-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:d8551e733626afec514b5d15befabea0dd70a343a9f23322860c4f16a9430205", size = 366945 }, + { url = "https://files.pythonhosted.org/packages/39/1b/a3501574fbf29118164314dbc800d568b8c1c7b3258b505360e8abb3902c/rpds_py-0.24.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:0e374c0ce0ca82e5b67cd61fb964077d40ec177dd2c4eda67dba130de09085c7", size = 351935 }, + { url = "https://files.pythonhosted.org/packages/dc/47/77d3d71c55f6a374edde29f1aca0b2e547325ed00a9da820cabbc9497d2b/rpds_py-0.24.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d69d003296df4840bd445a5d15fa5b6ff6ac40496f956a221c4d1f6f7b4bc4d9", size = 390817 }, + { url = "https://files.pythonhosted.org/packages/4e/ec/1e336ee27484379e19c7f9cc170f4217c608aee406d3ae3a2e45336bff36/rpds_py-0.24.0-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:8212ff58ac6dfde49946bea57474a386cca3f7706fc72c25b772b9ca4af6b79e", size = 401983 }, + { url = "https://files.pythonhosted.org/packages/07/f8/39b65cbc272c635eaea6d393c2ad1ccc81c39eca2db6723a0ca4b2108fce/rpds_py-0.24.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:528927e63a70b4d5f3f5ccc1fa988a35456eb5d15f804d276709c33fc2f19bda", size = 451719 }, + { url = "https://files.pythonhosted.org/packages/32/05/05c2b27dd9c30432f31738afed0300659cb9415db0ff7429b05dfb09bbde/rpds_py-0.24.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a824d2c7a703ba6daaca848f9c3d5cb93af0505be505de70e7e66829affd676e", size = 442546 }, + { url = "https://files.pythonhosted.org/packages/7d/e0/19383c8b5d509bd741532a47821c3e96acf4543d0832beba41b4434bcc49/rpds_py-0.24.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:44d51febb7a114293ffd56c6cf4736cb31cd68c0fddd6aa303ed09ea5a48e029", size = 393695 }, + { url = "https://files.pythonhosted.org/packages/9d/15/39f14e96d94981d0275715ae8ea564772237f3fa89bc3c21e24de934f2c7/rpds_py-0.24.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:3fab5f4a2c64a8fb64fc13b3d139848817a64d467dd6ed60dcdd6b479e7febc9", size = 427218 }, + { url = "https://files.pythonhosted.org/packages/22/b9/12da7124905a680f690da7a9de6f11de770b5e359f5649972f7181c8bf51/rpds_py-0.24.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:9be4f99bee42ac107870c61dfdb294d912bf81c3c6d45538aad7aecab468b6b7", size = 568062 }, + { url = "https://files.pythonhosted.org/packages/88/17/75229017a2143d915f6f803721a6d721eca24f2659c5718a538afa276b4f/rpds_py-0.24.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:564c96b6076a98215af52f55efa90d8419cc2ef45d99e314fddefe816bc24f91", size = 596262 }, + { url = "https://files.pythonhosted.org/packages/aa/64/8e8a1d8bd1b6b638d6acb6d41ab2cec7f2067a5b8b4c9175703875159a7c/rpds_py-0.24.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:75a810b7664c17f24bf2ffd7f92416c00ec84b49bb68e6a0d93e542406336b56", size = 564306 }, + { url = "https://files.pythonhosted.org/packages/68/1c/a7eac8d8ed8cb234a9b1064647824c387753343c3fab6ed7c83481ed0be7/rpds_py-0.24.0-cp312-cp312-win32.whl", hash = "sha256:f6016bd950be4dcd047b7475fdf55fb1e1f59fc7403f387be0e8123e4a576d30", size = 224281 }, + { url = "https://files.pythonhosted.org/packages/bb/46/b8b5424d1d21f2f2f3f2d468660085318d4f74a8df8289e3dd6ad224d488/rpds_py-0.24.0-cp312-cp312-win_amd64.whl", hash = "sha256:998c01b8e71cf051c28f5d6f1187abbdf5cf45fc0efce5da6c06447cba997034", size = 239719 }, + { url = "https://files.pythonhosted.org/packages/9d/c3/3607abc770395bc6d5a00cb66385a5479fb8cd7416ddef90393b17ef4340/rpds_py-0.24.0-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:3d2d8e4508e15fc05b31285c4b00ddf2e0eb94259c2dc896771966a163122a0c", size = 367072 }, + { url = "https://files.pythonhosted.org/packages/d8/35/8c7ee0fe465793e3af3298dc5a9f3013bd63e7a69df04ccfded8293a4982/rpds_py-0.24.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:0f00c16e089282ad68a3820fd0c831c35d3194b7cdc31d6e469511d9bffc535c", size = 351919 }, + { url = "https://files.pythonhosted.org/packages/91/d3/7e1b972501eb5466b9aca46a9c31bcbbdc3ea5a076e9ab33f4438c1d069d/rpds_py-0.24.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:951cc481c0c395c4a08639a469d53b7d4afa252529a085418b82a6b43c45c240", size = 390360 }, + { url = "https://files.pythonhosted.org/packages/a2/a8/ccabb50d3c91c26ad01f9b09a6a3b03e4502ce51a33867c38446df9f896b/rpds_py-0.24.0-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:c9ca89938dff18828a328af41ffdf3902405a19f4131c88e22e776a8e228c5a8", size = 400704 }, + { url = "https://files.pythonhosted.org/packages/53/ae/5fa5bf0f3bc6ce21b5ea88fc0ecd3a439e7cb09dd5f9ffb3dbe1b6894fc5/rpds_py-0.24.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ed0ef550042a8dbcd657dfb284a8ee00f0ba269d3f2286b0493b15a5694f9fe8", size = 450839 }, + { url = "https://files.pythonhosted.org/packages/e3/ac/c4e18b36d9938247e2b54f6a03746f3183ca20e1edd7d3654796867f5100/rpds_py-0.24.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2b2356688e5d958c4d5cb964af865bea84db29971d3e563fb78e46e20fe1848b", size = 441494 }, + { url = "https://files.pythonhosted.org/packages/bf/08/b543969c12a8f44db6c0f08ced009abf8f519191ca6985509e7c44102e3c/rpds_py-0.24.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:78884d155fd15d9f64f5d6124b486f3d3f7fd7cd71a78e9670a0f6f6ca06fb2d", size = 393185 }, + { url = "https://files.pythonhosted.org/packages/da/7e/f6eb6a7042ce708f9dfc781832a86063cea8a125bbe451d663697b51944f/rpds_py-0.24.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:6a4a535013aeeef13c5532f802708cecae8d66c282babb5cd916379b72110cf7", size = 426168 }, + { url = "https://files.pythonhosted.org/packages/38/b0/6cd2bb0509ac0b51af4bb138e145b7c4c902bb4b724d6fd143689d6e0383/rpds_py-0.24.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:84e0566f15cf4d769dade9b366b7b87c959be472c92dffb70462dd0844d7cbad", size = 567622 }, + { url = "https://files.pythonhosted.org/packages/64/b0/c401f4f077547d98e8b4c2ec6526a80e7cb04f519d416430ec1421ee9e0b/rpds_py-0.24.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:823e74ab6fbaa028ec89615ff6acb409e90ff45580c45920d4dfdddb069f2120", size = 595435 }, + { url = "https://files.pythonhosted.org/packages/9f/ec/7993b6e803294c87b61c85bd63e11142ccfb2373cf88a61ec602abcbf9d6/rpds_py-0.24.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:c61a2cb0085c8783906b2f8b1f16a7e65777823c7f4d0a6aaffe26dc0d358dd9", size = 563762 }, + { url = "https://files.pythonhosted.org/packages/1f/29/4508003204cb2f461dc2b83dd85f8aa2b915bc98fe6046b9d50d4aa05401/rpds_py-0.24.0-cp313-cp313-win32.whl", hash = "sha256:60d9b630c8025b9458a9d114e3af579a2c54bd32df601c4581bd054e85258143", size = 223510 }, + { url = "https://files.pythonhosted.org/packages/f9/12/09e048d1814195e01f354155fb772fb0854bd3450b5f5a82224b3a319f0e/rpds_py-0.24.0-cp313-cp313-win_amd64.whl", hash = "sha256:6eea559077d29486c68218178ea946263b87f1c41ae7f996b1f30a983c476a5a", size = 239075 }, + { url = "https://files.pythonhosted.org/packages/d2/03/5027cde39bb2408d61e4dd0cf81f815949bb629932a6c8df1701d0257fc4/rpds_py-0.24.0-cp313-cp313t-macosx_10_12_x86_64.whl", hash = "sha256:d09dc82af2d3c17e7dd17120b202a79b578d79f2b5424bda209d9966efeed114", size = 362974 }, + { url = "https://files.pythonhosted.org/packages/bf/10/24d374a2131b1ffafb783e436e770e42dfdb74b69a2cd25eba8c8b29d861/rpds_py-0.24.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:5fc13b44de6419d1e7a7e592a4885b323fbc2f46e1f22151e3a8ed3b8b920405", size = 348730 }, + { url = "https://files.pythonhosted.org/packages/7a/d1/1ef88d0516d46cd8df12e5916966dbf716d5ec79b265eda56ba1b173398c/rpds_py-0.24.0-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c347a20d79cedc0a7bd51c4d4b7dbc613ca4e65a756b5c3e57ec84bd43505b47", size = 387627 }, + { url = "https://files.pythonhosted.org/packages/4e/35/07339051b8b901ecefd449ebf8e5522e92bcb95e1078818cbfd9db8e573c/rpds_py-0.24.0-cp313-cp313t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:20f2712bd1cc26a3cc16c5a1bfee9ed1abc33d4cdf1aabd297fe0eb724df4272", size = 394094 }, + { url = "https://files.pythonhosted.org/packages/dc/62/ee89ece19e0ba322b08734e95441952062391065c157bbd4f8802316b4f1/rpds_py-0.24.0-cp313-cp313t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:aad911555286884be1e427ef0dc0ba3929e6821cbeca2194b13dc415a462c7fd", size = 449639 }, + { url = "https://files.pythonhosted.org/packages/15/24/b30e9f9e71baa0b9dada3a4ab43d567c6b04a36d1cb531045f7a8a0a7439/rpds_py-0.24.0-cp313-cp313t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0aeb3329c1721c43c58cae274d7d2ca85c1690d89485d9c63a006cb79a85771a", size = 438584 }, + { url = "https://files.pythonhosted.org/packages/28/d9/49f7b8f3b4147db13961e19d5e30077cd0854ccc08487026d2cb2142aa4a/rpds_py-0.24.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2a0f156e9509cee987283abd2296ec816225145a13ed0391df8f71bf1d789e2d", size = 391047 }, + { url = "https://files.pythonhosted.org/packages/49/b0/e66918d0972c33a259ba3cd7b7ff10ed8bd91dbcfcbec6367b21f026db75/rpds_py-0.24.0-cp313-cp313t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:aa6800adc8204ce898c8a424303969b7aa6a5e4ad2789c13f8648739830323b7", size = 418085 }, + { url = "https://files.pythonhosted.org/packages/e1/6b/99ed7ea0a94c7ae5520a21be77a82306aac9e4e715d4435076ead07d05c6/rpds_py-0.24.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:a18fc371e900a21d7392517c6f60fe859e802547309e94313cd8181ad9db004d", size = 564498 }, + { url = "https://files.pythonhosted.org/packages/28/26/1cacfee6b800e6fb5f91acecc2e52f17dbf8b0796a7c984b4568b6d70e38/rpds_py-0.24.0-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:9168764133fd919f8dcca2ead66de0105f4ef5659cbb4fa044f7014bed9a1797", size = 590202 }, + { url = "https://files.pythonhosted.org/packages/a9/9e/57bd2f9fba04a37cef673f9a66b11ca8c43ccdd50d386c455cd4380fe461/rpds_py-0.24.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:5f6e3cec44ba05ee5cbdebe92d052f69b63ae792e7d05f1020ac5e964394080c", size = 561771 }, + { url = "https://files.pythonhosted.org/packages/9f/cf/b719120f375ab970d1c297dbf8de1e3c9edd26fe92c0ed7178dd94b45992/rpds_py-0.24.0-cp313-cp313t-win32.whl", hash = "sha256:8ebc7e65ca4b111d928b669713865f021b7773350eeac4a31d3e70144297baba", size = 221195 }, + { url = "https://files.pythonhosted.org/packages/2d/e5/22865285789f3412ad0c3d7ec4dc0a3e86483b794be8a5d9ed5a19390900/rpds_py-0.24.0-cp313-cp313t-win_amd64.whl", hash = "sha256:675269d407a257b8c00a6b58205b72eec8231656506c56fd429d924ca00bb350", size = 237354 }, +] + +[[package]] +name = "scikit-learn" +version = "1.6.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "joblib" }, + { name = "numpy" }, + { name = "scipy" }, + { name = "threadpoolctl" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/9e/a5/4ae3b3a0755f7b35a280ac90b28817d1f380318973cff14075ab41ef50d9/scikit_learn-1.6.1.tar.gz", hash = "sha256:b4fc2525eca2c69a59260f583c56a7557c6ccdf8deafdba6e060f94c1c59738e", size = 7068312 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/0a/18/c797c9b8c10380d05616db3bfb48e2a3358c767affd0857d56c2eb501caa/scikit_learn-1.6.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:926f207c804104677af4857b2c609940b743d04c4c35ce0ddc8ff4f053cddc1b", size = 12104516 }, + { url = "https://files.pythonhosted.org/packages/c4/b7/2e35f8e289ab70108f8cbb2e7a2208f0575dc704749721286519dcf35f6f/scikit_learn-1.6.1-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:2c2cae262064e6a9b77eee1c8e768fc46aa0b8338c6a8297b9b6759720ec0ff2", size = 11167837 }, + { url = "https://files.pythonhosted.org/packages/a4/f6/ff7beaeb644bcad72bcfd5a03ff36d32ee4e53a8b29a639f11bcb65d06cd/scikit_learn-1.6.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1061b7c028a8663fb9a1a1baf9317b64a257fcb036dae5c8752b2abef31d136f", size = 12253728 }, + { url = "https://files.pythonhosted.org/packages/29/7a/8bce8968883e9465de20be15542f4c7e221952441727c4dad24d534c6d99/scikit_learn-1.6.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2e69fab4ebfc9c9b580a7a80111b43d214ab06250f8a7ef590a4edf72464dd86", size = 13147700 }, + { url = "https://files.pythonhosted.org/packages/62/27/585859e72e117fe861c2079bcba35591a84f801e21bc1ab85bce6ce60305/scikit_learn-1.6.1-cp312-cp312-win_amd64.whl", hash = "sha256:70b1d7e85b1c96383f872a519b3375f92f14731e279a7b4c6cfd650cf5dffc52", size = 11110613 }, + { url = "https://files.pythonhosted.org/packages/2e/59/8eb1872ca87009bdcdb7f3cdc679ad557b992c12f4b61f9250659e592c63/scikit_learn-1.6.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:2ffa1e9e25b3d93990e74a4be2c2fc61ee5af85811562f1288d5d055880c4322", size = 12010001 }, + { url = "https://files.pythonhosted.org/packages/9d/05/f2fc4effc5b32e525408524c982c468c29d22f828834f0625c5ef3d601be/scikit_learn-1.6.1-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:dc5cf3d68c5a20ad6d571584c0750ec641cc46aeef1c1507be51300e6003a7e1", size = 11096360 }, + { url = "https://files.pythonhosted.org/packages/c8/e4/4195d52cf4f113573fb8ebc44ed5a81bd511a92c0228889125fac2f4c3d1/scikit_learn-1.6.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c06beb2e839ecc641366000ca84f3cf6fa9faa1777e29cf0c04be6e4d096a348", size = 12209004 }, + { url = "https://files.pythonhosted.org/packages/94/be/47e16cdd1e7fcf97d95b3cb08bde1abb13e627861af427a3651fcb80b517/scikit_learn-1.6.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e8ca8cb270fee8f1f76fa9bfd5c3507d60c6438bbee5687f81042e2bb98e5a97", size = 13171776 }, + { url = "https://files.pythonhosted.org/packages/34/b0/ca92b90859070a1487827dbc672f998da95ce83edce1270fc23f96f1f61a/scikit_learn-1.6.1-cp313-cp313-win_amd64.whl", hash = "sha256:7a1c43c8ec9fde528d664d947dc4c0789be4077a3647f232869f41d9bf50e0fb", size = 11071865 }, + { url = "https://files.pythonhosted.org/packages/12/ae/993b0fb24a356e71e9a894e42b8a9eec528d4c70217353a1cd7a48bc25d4/scikit_learn-1.6.1-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:a17c1dea1d56dcda2fac315712f3651a1fea86565b64b48fa1bc090249cbf236", size = 11955804 }, + { url = "https://files.pythonhosted.org/packages/d6/54/32fa2ee591af44507eac86406fa6bba968d1eb22831494470d0a2e4a1eb1/scikit_learn-1.6.1-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:6a7aa5f9908f0f28f4edaa6963c0a6183f1911e63a69aa03782f0d924c830a35", size = 11100530 }, + { url = "https://files.pythonhosted.org/packages/3f/58/55856da1adec655bdce77b502e94a267bf40a8c0b89f8622837f89503b5a/scikit_learn-1.6.1-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0650e730afb87402baa88afbf31c07b84c98272622aaba002559b614600ca691", size = 12433852 }, + { url = "https://files.pythonhosted.org/packages/ff/4f/c83853af13901a574f8f13b645467285a48940f185b690936bb700a50863/scikit_learn-1.6.1-cp313-cp313t-win_amd64.whl", hash = "sha256:3f59fe08dc03ea158605170eb52b22a105f238a5d512c4470ddeca71feae8e5f", size = 11337256 }, +] + +[[package]] +name = "scipy" +version = "1.15.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "numpy" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/b7/b9/31ba9cd990e626574baf93fbc1ac61cf9ed54faafd04c479117517661637/scipy-1.15.2.tar.gz", hash = "sha256:cd58a314d92838f7e6f755c8a2167ead4f27e1fd5c1251fd54289569ef3495ec", size = 59417316 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/4b/5d/3c78815cbab499610f26b5bae6aed33e227225a9fa5290008a733a64f6fc/scipy-1.15.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:c4697a10da8f8765bb7c83e24a470da5797e37041edfd77fd95ba3811a47c4fd", size = 38756184 }, + { url = "https://files.pythonhosted.org/packages/37/20/3d04eb066b471b6e171827548b9ddb3c21c6bbea72a4d84fc5989933910b/scipy-1.15.2-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:869269b767d5ee7ea6991ed7e22b3ca1f22de73ab9a49c44bad338b725603301", size = 30163558 }, + { url = "https://files.pythonhosted.org/packages/a4/98/e5c964526c929ef1f795d4c343b2ff98634ad2051bd2bbadfef9e772e413/scipy-1.15.2-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:bad78d580270a4d32470563ea86c6590b465cb98f83d760ff5b0990cb5518a93", size = 22437211 }, + { url = "https://files.pythonhosted.org/packages/1d/cd/1dc7371e29195ecbf5222f9afeedb210e0a75057d8afbd942aa6cf8c8eca/scipy-1.15.2-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:b09ae80010f52efddb15551025f9016c910296cf70adbf03ce2a8704f3a5ad20", size = 25232260 }, + { url = "https://files.pythonhosted.org/packages/f0/24/1a181a9e5050090e0b5138c5f496fee33293c342b788d02586bc410c6477/scipy-1.15.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5a6fd6eac1ce74a9f77a7fc724080d507c5812d61e72bd5e4c489b042455865e", size = 35198095 }, + { url = "https://files.pythonhosted.org/packages/c0/53/eaada1a414c026673eb983f8b4a55fe5eb172725d33d62c1b21f63ff6ca4/scipy-1.15.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2b871df1fe1a3ba85d90e22742b93584f8d2b8e6124f8372ab15c71b73e428b8", size = 37297371 }, + { url = "https://files.pythonhosted.org/packages/e9/06/0449b744892ed22b7e7b9a1994a866e64895363572677a316a9042af1fe5/scipy-1.15.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:03205d57a28e18dfd39f0377d5002725bf1f19a46f444108c29bdb246b6c8a11", size = 36872390 }, + { url = "https://files.pythonhosted.org/packages/6a/6f/a8ac3cfd9505ec695c1bc35edc034d13afbd2fc1882a7c6b473e280397bb/scipy-1.15.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:601881dfb761311045b03114c5fe718a12634e5608c3b403737ae463c9885d53", size = 39700276 }, + { url = "https://files.pythonhosted.org/packages/f5/6f/e6e5aff77ea2a48dd96808bb51d7450875af154ee7cbe72188afb0b37929/scipy-1.15.2-cp312-cp312-win_amd64.whl", hash = "sha256:e7c68b6a43259ba0aab737237876e5c2c549a031ddb7abc28c7b47f22e202ded", size = 40942317 }, + { url = "https://files.pythonhosted.org/packages/53/40/09319f6e0f276ea2754196185f95cd191cb852288440ce035d5c3a931ea2/scipy-1.15.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:01edfac9f0798ad6b46d9c4c9ca0e0ad23dbf0b1eb70e96adb9fa7f525eff0bf", size = 38717587 }, + { url = "https://files.pythonhosted.org/packages/fe/c3/2854f40ecd19585d65afaef601e5e1f8dbf6758b2f95b5ea93d38655a2c6/scipy-1.15.2-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:08b57a9336b8e79b305a143c3655cc5bdbe6d5ece3378578888d2afbb51c4e37", size = 30100266 }, + { url = "https://files.pythonhosted.org/packages/dd/b1/f9fe6e3c828cb5930b5fe74cb479de5f3d66d682fa8adb77249acaf545b8/scipy-1.15.2-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:54c462098484e7466362a9f1672d20888f724911a74c22ae35b61f9c5919183d", size = 22373768 }, + { url = "https://files.pythonhosted.org/packages/15/9d/a60db8c795700414c3f681908a2b911e031e024d93214f2d23c6dae174ab/scipy-1.15.2-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:cf72ff559a53a6a6d77bd8eefd12a17995ffa44ad86c77a5df96f533d4e6c6bb", size = 25154719 }, + { url = "https://files.pythonhosted.org/packages/37/3b/9bda92a85cd93f19f9ed90ade84aa1e51657e29988317fabdd44544f1dd4/scipy-1.15.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9de9d1416b3d9e7df9923ab23cd2fe714244af10b763975bea9e4f2e81cebd27", size = 35163195 }, + { url = "https://files.pythonhosted.org/packages/03/5a/fc34bf1aa14dc7c0e701691fa8685f3faec80e57d816615e3625f28feb43/scipy-1.15.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fb530e4794fc8ea76a4a21ccb67dea33e5e0e60f07fc38a49e821e1eae3b71a0", size = 37255404 }, + { url = "https://files.pythonhosted.org/packages/4a/71/472eac45440cee134c8a180dbe4c01b3ec247e0338b7c759e6cd71f199a7/scipy-1.15.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:5ea7ed46d437fc52350b028b1d44e002646e28f3e8ddc714011aaf87330f2f32", size = 36860011 }, + { url = "https://files.pythonhosted.org/packages/01/b3/21f890f4f42daf20e4d3aaa18182dddb9192771cd47445aaae2e318f6738/scipy-1.15.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:11e7ad32cf184b74380f43d3c0a706f49358b904fa7d5345f16ddf993609184d", size = 39657406 }, + { url = "https://files.pythonhosted.org/packages/0d/76/77cf2ac1f2a9cc00c073d49e1e16244e389dd88e2490c91d84e1e3e4d126/scipy-1.15.2-cp313-cp313-win_amd64.whl", hash = "sha256:a5080a79dfb9b78b768cebf3c9dcbc7b665c5875793569f48bf0e2b1d7f68f6f", size = 40961243 }, + { url = "https://files.pythonhosted.org/packages/4c/4b/a57f8ddcf48e129e6054fa9899a2a86d1fc6b07a0e15c7eebff7ca94533f/scipy-1.15.2-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:447ce30cee6a9d5d1379087c9e474628dab3db4a67484be1b7dc3196bfb2fac9", size = 38870286 }, + { url = "https://files.pythonhosted.org/packages/0c/43/c304d69a56c91ad5f188c0714f6a97b9c1fed93128c691148621274a3a68/scipy-1.15.2-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:c90ebe8aaa4397eaefa8455a8182b164a6cc1d59ad53f79943f266d99f68687f", size = 30141634 }, + { url = "https://files.pythonhosted.org/packages/44/1a/6c21b45d2548eb73be9b9bff421aaaa7e85e22c1f9b3bc44b23485dfce0a/scipy-1.15.2-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:def751dd08243934c884a3221156d63e15234a3155cf25978b0a668409d45eb6", size = 22415179 }, + { url = "https://files.pythonhosted.org/packages/74/4b/aefac4bba80ef815b64f55da06f62f92be5d03b467f2ce3668071799429a/scipy-1.15.2-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:302093e7dfb120e55515936cb55618ee0b895f8bcaf18ff81eca086c17bd80af", size = 25126412 }, + { url = "https://files.pythonhosted.org/packages/b1/53/1cbb148e6e8f1660aacd9f0a9dfa2b05e9ff1cb54b4386fe868477972ac2/scipy-1.15.2-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7cd5b77413e1855351cdde594eca99c1f4a588c2d63711388b6a1f1c01f62274", size = 34952867 }, + { url = "https://files.pythonhosted.org/packages/2c/23/e0eb7f31a9c13cf2dca083828b97992dd22f8184c6ce4fec5deec0c81fcf/scipy-1.15.2-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6d0194c37037707b2afa7a2f2a924cf7bac3dc292d51b6a925e5fcb89bc5c776", size = 36890009 }, + { url = "https://files.pythonhosted.org/packages/03/f3/e699e19cabe96bbac5189c04aaa970718f0105cff03d458dc5e2b6bd1e8c/scipy-1.15.2-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:bae43364d600fdc3ac327db99659dcb79e6e7ecd279a75fe1266669d9a652828", size = 36545159 }, + { url = "https://files.pythonhosted.org/packages/af/f5/ab3838e56fe5cc22383d6fcf2336e48c8fe33e944b9037fbf6cbdf5a11f8/scipy-1.15.2-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:f031846580d9acccd0044efd1a90e6f4df3a6e12b4b6bd694a7bc03a89892b28", size = 39136566 }, + { url = "https://files.pythonhosted.org/packages/0a/c8/b3f566db71461cabd4b2d5b39bcc24a7e1c119535c8361f81426be39bb47/scipy-1.15.2-cp313-cp313t-win_amd64.whl", hash = "sha256:fe8a9eb875d430d81755472c5ba75e84acc980e4a8f6204d402849234d3017db", size = 40477705 }, +] + +[[package]] +name = "seaborn" +version = "0.13.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "matplotlib" }, + { name = "numpy" }, + { name = "pandas" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/86/59/a451d7420a77ab0b98f7affa3a1d78a313d2f7281a57afb1a34bae8ab412/seaborn-0.13.2.tar.gz", hash = "sha256:93e60a40988f4d65e9f4885df477e2fdaff6b73a9ded434c1ab356dd57eefff7", size = 1457696 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/83/11/00d3c3dfc25ad54e731d91449895a79e4bf2384dc3ac01809010ba88f6d5/seaborn-0.13.2-py3-none-any.whl", hash = "sha256:636f8336facf092165e27924f223d3c62ca560b1f2bb5dff7ab7fad265361987", size = 294914 }, +] + +[[package]] +name = "send2trash" +version = "1.8.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/fd/3a/aec9b02217bb79b87bbc1a21bc6abc51e3d5dcf65c30487ac96c0908c722/Send2Trash-1.8.3.tar.gz", hash = "sha256:b18e7a3966d99871aefeb00cfbcfdced55ce4871194810fc71f4aa484b953abf", size = 17394 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/40/b0/4562db6223154aa4e22f939003cb92514c79f3d4dccca3444253fd17f902/Send2Trash-1.8.3-py3-none-any.whl", hash = "sha256:0c31227e0bd08961c7665474a3d1ef7193929fedda4233843689baa056be46c9", size = 18072 }, +] + +[[package]] +name = "setuptools" +version = "78.1.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/a9/5a/0db4da3bc908df06e5efae42b44e75c81dd52716e10192ff36d0c1c8e379/setuptools-78.1.0.tar.gz", hash = "sha256:18fd474d4a82a5f83dac888df697af65afa82dec7323d09c3e37d1f14288da54", size = 1367827 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/54/21/f43f0a1fa8b06b32812e0975981f4677d28e0f3271601dc88ac5a5b83220/setuptools-78.1.0-py3-none-any.whl", hash = "sha256:3e386e96793c8702ae83d17b853fb93d3e09ef82ec62722e61da5cd22376dcd8", size = 1256108 }, +] + +[[package]] +name = "shap" +version = "0.47.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "cloudpickle" }, + { name = "numba" }, + { name = "numpy" }, + { name = "packaging" }, + { name = "pandas" }, + { name = "scikit-learn" }, + { name = "scipy" }, + { name = "slicer" }, + { name = "tqdm" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/33/7d/ffcdb3f245dba816e3c1de87549f1fc3067346c95219e4fcd91fc4bc7962/shap-0.47.1.tar.gz", hash = "sha256:292dc548c53fb0438fddacea793eb3482936621ad50a65529630684155625dce", size = 2501647 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a7/38/2122dc1ea73b8e152d5547f547a9574f76dc298351c06360d1e0da97f5f3/shap-0.47.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:961fd025aa5d53b6df947ab0e2d6b1f98d206bac36ed65c4c51f74921b7dadbc", size = 500293 }, + { url = "https://files.pythonhosted.org/packages/8a/4f/071cc7b54566765269d4c73a2de8b64b43a1b05f978b66d9a04a8b73ace0/shap-0.47.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:0f876a4eb06666555f5b83edca3c900ca7c3a8d5ebf2deef330b41a62f7dda7f", size = 492315 }, + { url = "https://files.pythonhosted.org/packages/01/d8/eec1414b04a21cec3b27834c505de8be861a49eb583550f653d5a4848f26/shap-0.47.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3f67da47d287dcd861dd664d92d595c8a0d5c89cba895e37c21b76ca905edc72", size = 957359 }, + { url = "https://files.pythonhosted.org/packages/41/33/ca5f9dd2e2b6ddf88892f787cfb8fb3ab539488901428a189c14d1ef03b7/shap-0.47.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c0a36cdc8c681901491714d9cea9f98af4c2da6328394382195386f99d520b5e", size = 967307 }, + { url = "https://files.pythonhosted.org/packages/43/fe/be106ba63beea861faee8a2ae7a6c18704a44784e0f6466a9ef18d7cf0b0/shap-0.47.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:55709c08d163b74e9de4b201e8fe37362270914cef43e89c75d58728f1c5f0b6", size = 1985261 }, + { url = "https://files.pythonhosted.org/packages/1c/d8/83094be435583d2ed8d50fe2280497883b767fe19350bfc0165f4a43a053/shap-0.47.1-cp312-cp312-win_amd64.whl", hash = "sha256:4dd5a497dba744e1190fbc1631603f5d8441eabde0bf882b9e6aefb9f083812d", size = 490601 }, +] + +[[package]] +name = "simple-data-science" +version = "0.1.0" +source = { virtual = "." } +dependencies = [ + { name = "jupyter" }, + { name = "kneed" }, + { name = "matplotlib" }, + { name = "numpy" }, + { name = "pandas" }, + { name = "pre-commit" }, + { name = "scikit-learn" }, + { name = "seaborn" }, + { name = "shap" }, + { name = "statsmodels" }, + { name = "xgboost" }, +] + +[package.metadata] +requires-dist = [ + { name = "jupyter", specifier = ">=1.1.1" }, + { name = "kneed", specifier = ">=0.8.5" }, + { name = "matplotlib", specifier = ">=3.10.1" }, + { name = "numpy", specifier = ">=2.1.3" }, + { name = "pandas", specifier = ">=2.2.3" }, + { name = "pre-commit", specifier = ">=4.2.0" }, + { name = "scikit-learn", specifier = ">=1.6.1" }, + { name = "seaborn", specifier = ">=0.13.2" }, + { name = "shap", specifier = ">=0.47.1" }, + { name = "statsmodels", specifier = ">=0.14.4" }, + { name = "xgboost", specifier = "~=2.1.4" }, +] + +[[package]] +name = "six" +version = "1.17.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/94/e7/b2c673351809dca68a0e064b6af791aa332cf192da575fd474ed7d6f16a2/six-1.17.0.tar.gz", hash = "sha256:ff70335d468e7eb6ec65b95b99d3a2836546063f63acc5171de367e834932a81", size = 34031 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b7/ce/149a00dd41f10bc29e5921b496af8b574d8413afcd5e30dfa0ed46c2cc5e/six-1.17.0-py2.py3-none-any.whl", hash = "sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274", size = 11050 }, +] + +[[package]] +name = "slicer" +version = "0.0.8" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/d3/f9/b4bce2825b39b57760b361e6131a3dacee3d8951c58cb97ad120abb90317/slicer-0.0.8.tar.gz", hash = "sha256:2e7553af73f0c0c2d355f4afcc3ecf97c6f2156fcf4593955c3f56cf6c4d6eb7", size = 14894 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/63/81/9ef641ff4e12cbcca30e54e72fb0951a2ba195d0cda0ba4100e532d929db/slicer-0.0.8-py3-none-any.whl", hash = "sha256:6c206258543aecd010d497dc2eca9d2805860a0b3758673903456b7df7934dc3", size = 15251 }, +] + +[[package]] +name = "sniffio" +version = "1.3.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/a2/87/a6771e1546d97e7e041b6ae58d80074f81b7d5121207425c964ddf5cfdbd/sniffio-1.3.1.tar.gz", hash = "sha256:f4324edc670a0f49750a81b895f35c3adb843cca46f0530f79fc1babb23789dc", size = 20372 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e9/44/75a9c9421471a6c4805dbf2356f7c181a29c1879239abab1ea2cc8f38b40/sniffio-1.3.1-py3-none-any.whl", hash = "sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2", size = 10235 }, +] + +[[package]] +name = "soupsieve" +version = "2.6" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/d7/ce/fbaeed4f9fb8b2daa961f90591662df6a86c1abf25c548329a86920aedfb/soupsieve-2.6.tar.gz", hash = "sha256:e2e68417777af359ec65daac1057404a3c8a5455bb8abc36f1a9866ab1a51abb", size = 101569 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d1/c2/fe97d779f3ef3b15f05c94a2f1e3d21732574ed441687474db9d342a7315/soupsieve-2.6-py3-none-any.whl", hash = "sha256:e72c4ff06e4fb6e4b5a9f0f55fe6e81514581fca1515028625d0f299c602ccc9", size = 36186 }, +] + +[[package]] +name = "stack-data" +version = "0.6.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "asttokens" }, + { name = "executing" }, + { name = "pure-eval" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/28/e3/55dcc2cfbc3ca9c29519eb6884dd1415ecb53b0e934862d3559ddcb7e20b/stack_data-0.6.3.tar.gz", hash = "sha256:836a778de4fec4dcd1dcd89ed8abff8a221f58308462e1c4aa2a3cf30148f0b9", size = 44707 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f1/7b/ce1eafaf1a76852e2ec9b22edecf1daa58175c090266e9f6c64afcd81d91/stack_data-0.6.3-py3-none-any.whl", hash = "sha256:d5558e0c25a4cb0853cddad3d77da9891a08cb85dd9f9f91b9f8cd66e511e695", size = 24521 }, +] + +[[package]] +name = "statsmodels" +version = "0.14.4" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "numpy" }, + { name = "packaging" }, + { name = "pandas" }, + { name = "patsy" }, + { name = "scipy" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/1f/3b/963a015dd8ea17e10c7b0e2f14d7c4daec903baf60a017e756b57953a4bf/statsmodels-0.14.4.tar.gz", hash = "sha256:5d69e0f39060dc72c067f9bb6e8033b6dccdb0bae101d76a7ef0bcc94e898b67", size = 20354802 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f5/99/654fd41a9024643ee70b239e5ebc987bf98ce9fc2693bd550bee58136564/statsmodels-0.14.4-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:5221dba7424cf4f2561b22e9081de85f5bb871228581124a0d1b572708545199", size = 10220508 }, + { url = "https://files.pythonhosted.org/packages/67/d8/ac30cf4cf97adaa48548be57e7cf02e894f31b45fd55bf9213358d9781c9/statsmodels-0.14.4-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:17672b30c6b98afe2b095591e32d1d66d4372f2651428e433f16a3667f19eabb", size = 9912317 }, + { url = "https://files.pythonhosted.org/packages/e0/77/2440d551eaf27f9c1d3650e13b3821a35ad5b21d3a19f62fb302af9203e8/statsmodels-0.14.4-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ab5e6312213b8cfb9dca93dd46a0f4dccb856541f91d3306227c3d92f7659245", size = 10301662 }, + { url = "https://files.pythonhosted.org/packages/fa/e1/60a652f18996a40a7410aeb7eb476c18da8a39792c7effe67f06883e9852/statsmodels-0.14.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4bbb150620b53133d6cd1c5d14c28a4f85701e6c781d9b689b53681effaa655f", size = 10741763 }, + { url = "https://files.pythonhosted.org/packages/81/0c/2453eec3ac25e300847d9ed97f41156de145e507391ecb5ac989e111e525/statsmodels-0.14.4-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:bb695c2025d122a101c2aca66d2b78813c321b60d3a7c86bb8ec4467bb53b0f9", size = 10879534 }, + { url = "https://files.pythonhosted.org/packages/59/9a/e466a1b887a1441141e52dbcc98152f013d85076576da6eed2357f2016ae/statsmodels-0.14.4-cp312-cp312-win_amd64.whl", hash = "sha256:7f7917a51766b4e074da283c507a25048ad29a18e527207883d73535e0dc6184", size = 9823866 }, + { url = "https://files.pythonhosted.org/packages/31/f8/2662e6a101315ad336f75168fa9bac71f913ebcb92a6be84031d84a0f21f/statsmodels-0.14.4-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:b5a24f5d2c22852d807d2b42daf3a61740820b28d8381daaf59dcb7055bf1a79", size = 10186886 }, + { url = "https://files.pythonhosted.org/packages/fa/c0/ee6e8ed35fc1ca9c7538c592f4974547bf72274bc98db1ae4a6e87481a83/statsmodels-0.14.4-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:df4f7864606fa843d7e7c0e6af288f034a2160dba14e6ccc09020a3cf67cb092", size = 9880066 }, + { url = "https://files.pythonhosted.org/packages/d1/97/3380ca6d8fd66cfb3d12941e472642f26e781a311c355a4e97aab2ed0216/statsmodels-0.14.4-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:91341cbde9e8bea5fb419a76e09114e221567d03f34ca26e6d67ae2c27d8fe3c", size = 10283521 }, + { url = "https://files.pythonhosted.org/packages/fe/2a/55c5b5c5e5124a202ea3fe0bcdbdeceaf91b4ec6164b8434acb9dd97409c/statsmodels-0.14.4-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1322286a7bfdde2790bf72d29698a1b76c20b8423a55bdcd0d457969d0041f72", size = 10723228 }, + { url = "https://files.pythonhosted.org/packages/4f/76/67747e49dc758daae06f33aad8247b718cd7d224f091d2cd552681215bb2/statsmodels-0.14.4-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:e31b95ac603415887c9f0d344cb523889cf779bc52d68e27e2d23c358958fec7", size = 10859503 }, + { url = "https://files.pythonhosted.org/packages/1d/eb/cb8b01f5edf8f135eb3d0553d159db113a35b2948d0e51eeb735e7ae09ea/statsmodels-0.14.4-cp313-cp313-win_amd64.whl", hash = "sha256:81030108d27aecc7995cac05aa280cf8c6025f6a6119894eef648997936c2dd0", size = 9817574 }, +] + +[[package]] +name = "terminado" +version = "0.18.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "ptyprocess", marker = "os_name != 'nt'" }, + { name = "pywinpty", marker = "os_name == 'nt'" }, + { name = "tornado" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/8a/11/965c6fd8e5cc254f1fe142d547387da17a8ebfd75a3455f637c663fb38a0/terminado-0.18.1.tar.gz", hash = "sha256:de09f2c4b85de4765f7714688fff57d3e75bad1f909b589fde880460c753fd2e", size = 32701 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/6a/9e/2064975477fdc887e47ad42157e214526dcad8f317a948dee17e1659a62f/terminado-0.18.1-py3-none-any.whl", hash = "sha256:a4468e1b37bb318f8a86514f65814e1afc977cf29b3992a4500d9dd305dcceb0", size = 14154 }, +] + +[[package]] +name = "threadpoolctl" +version = "3.6.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/b7/4d/08c89e34946fce2aec4fbb45c9016efd5f4d7f24af8e5d93296e935631d8/threadpoolctl-3.6.0.tar.gz", hash = "sha256:8ab8b4aa3491d812b623328249fab5302a68d2d71745c8a4c719a2fcaba9f44e", size = 21274 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/32/d5/f9a850d79b0851d1d4ef6456097579a9005b31fea68726a4ae5f2d82ddd9/threadpoolctl-3.6.0-py3-none-any.whl", hash = "sha256:43a0b8fd5a2928500110039e43a5eed8480b918967083ea48dc3ab9f13c4a7fb", size = 18638 }, +] + +[[package]] +name = "tinycss2" +version = "1.4.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "webencodings" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/7a/fd/7a5ee21fd08ff70d3d33a5781c255cbe779659bd03278feb98b19ee550f4/tinycss2-1.4.0.tar.gz", hash = "sha256:10c0972f6fc0fbee87c3edb76549357415e94548c1ae10ebccdea16fb404a9b7", size = 87085 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e6/34/ebdc18bae6aa14fbee1a08b63c015c72b64868ff7dae68808ab500c492e2/tinycss2-1.4.0-py3-none-any.whl", hash = "sha256:3a49cf47b7675da0b15d0c6e1df8df4ebd96e9394bb905a5775adb0d884c5289", size = 26610 }, +] + +[[package]] +name = "tornado" +version = "6.4.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/59/45/a0daf161f7d6f36c3ea5fc0c2de619746cc3dd4c76402e9db545bd920f63/tornado-6.4.2.tar.gz", hash = "sha256:92bad5b4746e9879fd7bf1eb21dce4e3fc5128d71601f80005afa39237ad620b", size = 501135 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/26/7e/71f604d8cea1b58f82ba3590290b66da1e72d840aeb37e0d5f7291bd30db/tornado-6.4.2-cp38-abi3-macosx_10_9_universal2.whl", hash = "sha256:e828cce1123e9e44ae2a50a9de3055497ab1d0aeb440c5ac23064d9e44880da1", size = 436299 }, + { url = "https://files.pythonhosted.org/packages/96/44/87543a3b99016d0bf54fdaab30d24bf0af2e848f1d13d34a3a5380aabe16/tornado-6.4.2-cp38-abi3-macosx_10_9_x86_64.whl", hash = "sha256:072ce12ada169c5b00b7d92a99ba089447ccc993ea2143c9ede887e0937aa803", size = 434253 }, + { url = "https://files.pythonhosted.org/packages/cb/fb/fdf679b4ce51bcb7210801ef4f11fdac96e9885daa402861751353beea6e/tornado-6.4.2-cp38-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1a017d239bd1bb0919f72af256a970624241f070496635784d9bf0db640d3fec", size = 437602 }, + { url = "https://files.pythonhosted.org/packages/4f/3b/e31aeffffc22b475a64dbeb273026a21b5b566f74dee48742817626c47dc/tornado-6.4.2-cp38-abi3-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c36e62ce8f63409301537222faffcef7dfc5284f27eec227389f2ad11b09d946", size = 436972 }, + { url = "https://files.pythonhosted.org/packages/22/55/b78a464de78051a30599ceb6983b01d8f732e6f69bf37b4ed07f642ac0fc/tornado-6.4.2-cp38-abi3-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bca9eb02196e789c9cb5c3c7c0f04fb447dc2adffd95265b2c7223a8a615ccbf", size = 437173 }, + { url = "https://files.pythonhosted.org/packages/79/5e/be4fb0d1684eb822c9a62fb18a3e44a06188f78aa466b2ad991d2ee31104/tornado-6.4.2-cp38-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:304463bd0772442ff4d0f5149c6f1c2135a1fae045adf070821c6cdc76980634", size = 437892 }, + { url = "https://files.pythonhosted.org/packages/f5/33/4f91fdd94ea36e1d796147003b490fe60a0215ac5737b6f9c65e160d4fe0/tornado-6.4.2-cp38-abi3-musllinux_1_2_i686.whl", hash = "sha256:c82c46813ba483a385ab2a99caeaedf92585a1f90defb5693351fa7e4ea0bf73", size = 437334 }, + { url = "https://files.pythonhosted.org/packages/2b/ae/c1b22d4524b0e10da2f29a176fb2890386f7bd1f63aacf186444873a88a0/tornado-6.4.2-cp38-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:932d195ca9015956fa502c6b56af9eb06106140d844a335590c1ec7f5277d10c", size = 437261 }, + { url = "https://files.pythonhosted.org/packages/b5/25/36dbd49ab6d179bcfc4c6c093a51795a4f3bed380543a8242ac3517a1751/tornado-6.4.2-cp38-abi3-win32.whl", hash = "sha256:2876cef82e6c5978fde1e0d5b1f919d756968d5b4282418f3146b79b58556482", size = 438463 }, + { url = "https://files.pythonhosted.org/packages/61/cc/58b1adeb1bb46228442081e746fcdbc4540905c87e8add7c277540934edb/tornado-6.4.2-cp38-abi3-win_amd64.whl", hash = "sha256:908b71bf3ff37d81073356a5fadcc660eb10c1476ee6e2725588626ce7e5ca38", size = 438907 }, +] + +[[package]] +name = "tqdm" +version = "4.67.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "colorama", marker = "sys_platform == 'win32'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/a8/4b/29b4ef32e036bb34e4ab51796dd745cdba7ed47ad142a9f4a1eb8e0c744d/tqdm-4.67.1.tar.gz", hash = "sha256:f8aef9c52c08c13a65f30ea34f4e5aac3fd1a34959879d7e59e63027286627f2", size = 169737 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d0/30/dc54f88dd4a2b5dc8a0279bdd7270e735851848b762aeb1c1184ed1f6b14/tqdm-4.67.1-py3-none-any.whl", hash = "sha256:26445eca388f82e72884e0d580d5464cd801a3ea01e63e5601bdff9ba6a48de2", size = 78540 }, +] + +[[package]] +name = "traitlets" +version = "5.14.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/eb/79/72064e6a701c2183016abbbfedaba506d81e30e232a68c9f0d6f6fcd1574/traitlets-5.14.3.tar.gz", hash = "sha256:9ed0579d3502c94b4b3732ac120375cda96f923114522847de4b3bb98b96b6b7", size = 161621 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/00/c0/8f5d070730d7836adc9c9b6408dec68c6ced86b304a9b26a14df072a6e8c/traitlets-5.14.3-py3-none-any.whl", hash = "sha256:b74e89e397b1ed28cc831db7aea759ba6640cb3de13090ca145426688ff1ac4f", size = 85359 }, +] + +[[package]] +name = "types-python-dateutil" +version = "2.9.0.20241206" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/a9/60/47d92293d9bc521cd2301e423a358abfac0ad409b3a1606d8fbae1321961/types_python_dateutil-2.9.0.20241206.tar.gz", hash = "sha256:18f493414c26ffba692a72369fea7a154c502646301ebfe3d56a04b3767284cb", size = 13802 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/0f/b3/ca41df24db5eb99b00d97f89d7674a90cb6b3134c52fb8121b6d8d30f15c/types_python_dateutil-2.9.0.20241206-py3-none-any.whl", hash = "sha256:e248a4bc70a486d3e3ec84d0dc30eec3a5f979d6e7ee4123ae043eedbb987f53", size = 14384 }, +] + +[[package]] +name = "typing-extensions" +version = "4.13.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/0e/3e/b00a62db91a83fff600de219b6ea9908e6918664899a2d85db222f4fbf19/typing_extensions-4.13.0.tar.gz", hash = "sha256:0a4ac55a5820789d87e297727d229866c9650f6521b64206413c4fbada24d95b", size = 106520 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e0/86/39b65d676ec5732de17b7e3c476e45bb80ec64eb50737a8dce1a4178aba1/typing_extensions-4.13.0-py3-none-any.whl", hash = "sha256:c8dd92cc0d6425a97c18fbb9d1954e5ff92c1ca881a309c45f06ebc0b79058e5", size = 45683 }, +] + +[[package]] +name = "tzdata" +version = "2025.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/95/32/1a225d6164441be760d75c2c42e2780dc0873fe382da3e98a2e1e48361e5/tzdata-2025.2.tar.gz", hash = "sha256:b60a638fcc0daffadf82fe0f57e53d06bdec2f36c4df66280ae79bce6bd6f2b9", size = 196380 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/5c/23/c7abc0ca0a1526a0774eca151daeb8de62ec457e77262b66b359c3c7679e/tzdata-2025.2-py2.py3-none-any.whl", hash = "sha256:1a403fada01ff9221ca8044d701868fa132215d84beb92242d9acd2147f667a8", size = 347839 }, +] + +[[package]] +name = "uri-template" +version = "1.3.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/31/c7/0336f2bd0bcbada6ccef7aaa25e443c118a704f828a0620c6fa0207c1b64/uri-template-1.3.0.tar.gz", hash = "sha256:0e00f8eb65e18c7de20d595a14336e9f337ead580c70934141624b6d1ffdacc7", size = 21678 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e7/00/3fca040d7cf8a32776d3d81a00c8ee7457e00f80c649f1e4a863c8321ae9/uri_template-1.3.0-py3-none-any.whl", hash = "sha256:a44a133ea12d44a0c0f06d7d42a52d71282e77e2f937d8abd5655b8d56fc1363", size = 11140 }, +] + +[[package]] +name = "urllib3" +version = "2.3.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/aa/63/e53da845320b757bf29ef6a9062f5c669fe997973f966045cb019c3f4b66/urllib3-2.3.0.tar.gz", hash = "sha256:f8c5449b3cf0861679ce7e0503c7b44b5ec981bec0d1d3795a07f1ba96f0204d", size = 307268 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c8/19/4ec628951a74043532ca2cf5d97b7b14863931476d117c471e8e2b1eb39f/urllib3-2.3.0-py3-none-any.whl", hash = "sha256:1cee9ad369867bfdbbb48b7dd50374c0967a0bb7710050facf0dd6911440e3df", size = 128369 }, +] + +[[package]] +name = "virtualenv" +version = "20.30.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "distlib" }, + { name = "filelock" }, + { name = "platformdirs" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/38/e0/633e369b91bbc664df47dcb5454b6c7cf441e8f5b9d0c250ce9f0546401e/virtualenv-20.30.0.tar.gz", hash = "sha256:800863162bcaa5450a6e4d721049730e7f2dae07720e0902b0e4040bd6f9ada8", size = 4346945 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/4c/ed/3cfeb48175f0671ec430ede81f628f9fb2b1084c9064ca67ebe8c0ed6a05/virtualenv-20.30.0-py3-none-any.whl", hash = "sha256:e34302959180fca3af42d1800df014b35019490b119eba981af27f2fa486e5d6", size = 4329461 }, +] + +[[package]] +name = "wcwidth" +version = "0.2.13" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/6c/63/53559446a878410fc5a5974feb13d31d78d752eb18aeba59c7fef1af7598/wcwidth-0.2.13.tar.gz", hash = "sha256:72ea0c06399eb286d978fdedb6923a9eb47e1c486ce63e9b4e64fc18303972b5", size = 101301 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/fd/84/fd2ba7aafacbad3c4201d395674fc6348826569da3c0937e75505ead3528/wcwidth-0.2.13-py2.py3-none-any.whl", hash = "sha256:3da69048e4540d84af32131829ff948f1e022c1c6bdb8d6102117aac784f6859", size = 34166 }, +] + +[[package]] +name = "webcolors" +version = "24.11.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/7b/29/061ec845fb58521848f3739e466efd8250b4b7b98c1b6c5bf4d40b419b7e/webcolors-24.11.1.tar.gz", hash = "sha256:ecb3d768f32202af770477b8b65f318fa4f566c22948673a977b00d589dd80f6", size = 45064 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/60/e8/c0e05e4684d13459f93d312077a9a2efbe04d59c393bc2b8802248c908d4/webcolors-24.11.1-py3-none-any.whl", hash = "sha256:515291393b4cdf0eb19c155749a096f779f7d909f7cceea072791cb9095b92e9", size = 14934 }, +] + +[[package]] +name = "webencodings" +version = "0.5.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/0b/02/ae6ceac1baeda530866a85075641cec12989bd8d31af6d5ab4a3e8c92f47/webencodings-0.5.1.tar.gz", hash = "sha256:b36a1c245f2d304965eb4e0a82848379241dc04b865afcc4aab16748587e1923", size = 9721 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f4/24/2a3e3df732393fed8b3ebf2ec078f05546de641fe1b667ee316ec1dcf3b7/webencodings-0.5.1-py2.py3-none-any.whl", hash = "sha256:a0af1213f3c2226497a97e2b3aa01a7e4bee4f403f95be16fc9acd2947514a78", size = 11774 }, +] + +[[package]] +name = "websocket-client" +version = "1.8.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/e6/30/fba0d96b4b5fbf5948ed3f4681f7da2f9f64512e1d303f94b4cc174c24a5/websocket_client-1.8.0.tar.gz", hash = "sha256:3239df9f44da632f96012472805d40a23281a991027ce11d2f45a6f24ac4c3da", size = 54648 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/5a/84/44687a29792a70e111c5c477230a72c4b957d88d16141199bf9acb7537a3/websocket_client-1.8.0-py3-none-any.whl", hash = "sha256:17b44cc997f5c498e809b22cdf2d9c7a9e71c02c8cc2b6c56e7c2d1239bfa526", size = 58826 }, +] + +[[package]] +name = "widgetsnbextension" +version = "4.0.13" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/56/fc/238c424fd7f4ebb25f8b1da9a934a3ad7c848286732ae04263661eb0fc03/widgetsnbextension-4.0.13.tar.gz", hash = "sha256:ffcb67bc9febd10234a362795f643927f4e0c05d9342c727b65d2384f8feacb6", size = 1164730 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/21/02/88b65cc394961a60c43c70517066b6b679738caf78506a5da7b88ffcb643/widgetsnbextension-4.0.13-py3-none-any.whl", hash = "sha256:74b2692e8500525cc38c2b877236ba51d34541e6385eeed5aec15a70f88a6c71", size = 2335872 }, +] + +[[package]] +name = "xgboost" +version = "2.1.4" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "numpy" }, + { name = "nvidia-nccl-cu12", marker = "platform_machine != 'aarch64' and sys_platform == 'linux'" }, + { name = "scipy" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/e2/5e/860a1ef13ce38db8c257c83e138be64bcffde8f401e84bf1e2e91838afa3/xgboost-2.1.4.tar.gz", hash = "sha256:ab84c4bbedd7fae1a26f61e9dd7897421d5b08454b51c6eb072abc1d346d08d7", size = 1091127 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b6/fe/7a1d2342c2e93f22b41515e02b73504c7809247b16ae395bd2ee7ef11e19/xgboost-2.1.4-py3-none-macosx_10_15_x86_64.macosx_11_0_x86_64.macosx_12_0_x86_64.whl", hash = "sha256:78d88da184562deff25c820d943420342014dd55e0f4c017cc4563c2148df5ee", size = 2140692 }, + { url = "https://files.pythonhosted.org/packages/f5/b6/653a70910739f127adffbefb688ebc22b51139292757de7c22b1e04ce792/xgboost-2.1.4-py3-none-macosx_12_0_arm64.whl", hash = "sha256:523db01d4e74b05c61a985028bde88a4dd380eadc97209310621996d7d5d14a7", size = 1939418 }, + { url = "https://files.pythonhosted.org/packages/43/06/905fee34c10fb0d0c3baa15106413b76f360d8e958765ec57c9eddf762fa/xgboost-2.1.4-py3-none-manylinux2014_aarch64.whl", hash = "sha256:57c7e98111aceef4b689d7d2ce738564a1f7fe44237136837a47847b8b33bade", size = 4442052 }, + { url = "https://files.pythonhosted.org/packages/f8/6a/41956f91ab984f2fa44529b2551d825a20d33807eba051a60d06ede2a87c/xgboost-2.1.4-py3-none-manylinux2014_x86_64.whl", hash = "sha256:f1343a512e634822eab30d300bfc00bf777dc869d881cc74854b42173cfcdb14", size = 4533170 }, + { url = "https://files.pythonhosted.org/packages/b1/53/37032dca20dae7a88ad1907f817a81f232ca6e935f0c28c98db3c0a0bd22/xgboost-2.1.4-py3-none-manylinux_2_28_aarch64.whl", hash = "sha256:d366097d0db047315736f46af852feaa907f6d7371716af741cdce488ae36d20", size = 4206715 }, + { url = "https://files.pythonhosted.org/packages/e4/3c/e3a93bfa7e8693c825df5ec02a40f7ff5f0950e02198b1e85da9315a8d47/xgboost-2.1.4-py3-none-manylinux_2_28_x86_64.whl", hash = "sha256:8df6da72963969ab2bf49a520c3e147b1e15cbeddd3aa0e3e039b3532c739339", size = 223642416 }, + { url = "https://files.pythonhosted.org/packages/43/80/0b5a2dfcf5b4da27b0b68d2833f05d77e1a374d43db951fca200a1f12a52/xgboost-2.1.4-py3-none-win_amd64.whl", hash = "sha256:8bbfe4fedc151b83a52edbf0de945fd94358b09a81998f2945ad330fd5f20cd6", size = 124910381 }, +] From 8d6a8c735472f696cc33ad2537663e99ef3e7bb2 Mon Sep 17 00:00:00 2001 From: Pedro Antonacio <30991781+antonacio@users.noreply.github.com> Date: Wed, 2 Apr 2025 22:14:35 -0300 Subject: [PATCH 10/10] renaming examples/ folder to docs/ to allow GH pages deployment (#9) --- .pre-commit-config.yaml | 2 +- Makefile | 4 ++-- README.md | 10 +++++----- {examples => docs}/classification-binary.html | 0 {examples => docs}/classification-multiclass.html | 0 {examples => docs}/clustering.html | 0 {examples => docs}/histogram_analysis.html | 0 {examples => docs}/regression.html | 0 8 files changed, 8 insertions(+), 8 deletions(-) rename {examples => docs}/classification-binary.html (100%) rename {examples => docs}/classification-multiclass.html (100%) rename {examples => docs}/clustering.html (100%) rename {examples => docs}/histogram_analysis.html (100%) rename {examples => docs}/regression.html (100%) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index e7f530f..b070009 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,4 +1,4 @@ -exclude: ^(data/|examples/) +exclude: ^(data/|docs/) repos: - repo: https://github.com/pre-commit/pre-commit-hooks rev: v5.0.0 diff --git a/Makefile b/Makefile index b186845..5e27e08 100644 --- a/Makefile +++ b/Makefile @@ -8,8 +8,8 @@ unzip-datasets: unzip -j data/fetal_health.csv.zip -d data/ convert-notebooks-to-html: - rm -rf examples/*.html + rm -rf docs/*.html @for nb in src/*.ipynb; do \ echo "Converting $$nb to HTML..."; \ - WARNING_FILTER_POLICY=ignore jupyter nbconvert --to html --execute "$$nb" --output-dir=examples/ --ExtractOutputPreprocessor.enabled=False; \ + WARNING_FILTER_POLICY=ignore jupyter nbconvert --to html --execute "$$nb" --output-dir=docs/ --ExtractOutputPreprocessor.enabled=False; \ done diff --git a/README.md b/README.md index d687b2b..7565d03 100644 --- a/README.md +++ b/README.md @@ -5,11 +5,11 @@ This project compiles simple and practical examples for common Data Science use cases with tabular data. You can access complete examples using the following links: -1. Binary Classification -2. Multiclass Classification -3. Regression -4. Clustering -5. Histogram Analysis +1. [Binary Classification](https://antonacio.github.io/simple-data-science/classification-binary.html) +2. [Multiclass Classification](https://antonacio.github.io/simple-data-science/classification-multiclass.html) +3. [Regression](https://antonacio.github.io/simple-data-science/regression.html) +4. [Clustering](https://antonacio.github.io/simple-data-science/clustering.html) +5. [Histogram Analysis](https://antonacio.github.io/simple-data-science/histogram_analysis.html) ## Setup diff --git a/examples/classification-binary.html b/docs/classification-binary.html similarity index 100% rename from examples/classification-binary.html rename to docs/classification-binary.html diff --git a/examples/classification-multiclass.html b/docs/classification-multiclass.html similarity index 100% rename from examples/classification-multiclass.html rename to docs/classification-multiclass.html diff --git a/examples/clustering.html b/docs/clustering.html similarity index 100% rename from examples/clustering.html rename to docs/clustering.html diff --git a/examples/histogram_analysis.html b/docs/histogram_analysis.html similarity index 100% rename from examples/histogram_analysis.html rename to docs/histogram_analysis.html diff --git a/examples/regression.html b/docs/regression.html similarity index 100% rename from examples/regression.html rename to docs/regression.html