diff --git a/Project2_Kcross_Bootstrap.py b/Project2_Kcross_Bootstrap.py new file mode 100644 index 0000000..421f591 --- /dev/null +++ b/Project2_Kcross_Bootstrap.py @@ -0,0 +1,198 @@ +import os +import logging +import numpy as np +import matplotlib.pyplot as plt +import seaborn as sns +import pandas as pd +from sklearn.datasets import load_digits +from sklearn.model_selection import KFold, GridSearchCV +from sklearn.pipeline import Pipeline +from sklearn.pipeline import make_pipeline +from sklearn.preprocessing import StandardScaler +from sklearn.ensemble import RandomForestClassifier +from sklearn.linear_model import LogisticRegression +from sklearn.svm import SVC +from sklearn.neighbors import KNeighborsClassifier +from sklearn.tree import DecisionTreeClassifier +from sklearn.utils import resample +from sklearn.metrics import accuracy_score + +# Create folders for results, figures, and logs +results_dir = "results" +reports_dir = os.path.join(results_dir, "reports") +figures_dir = os.path.join(results_dir, "figures") +logs_dir = "logs" + +os.makedirs(reports_dir, exist_ok=True) +os.makedirs(figures_dir, exist_ok=True) +os.makedirs(logs_dir, exist_ok=True) + +# Configure logging +log_file_path = os.path.join(logs_dir, "execution.log") +logging.basicConfig(filename=log_file_path, level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s") +logging.info("Execution started.") + +# Load dataset +digits = load_digits() +X = digits.data +y = digits.target +logging.info("Dataset loaded successfully.") + +# Define models and their parameter grids +model_params = { + "Random Forest": { + "model": RandomForestClassifier(), + "params": { + "classifier__n_estimators": [10, 50, 100, 200] + } + }, + "Logistic Regression": { + "model": LogisticRegression(), + "params": { + "classifier__C": [0.1, 1, 10, 100] + } + }, + "SVM": { + "model": SVC(), + "params": { + "classifier__C": [0.1, 1, 10, 100], + "classifier__gamma": [0.001, 0.01, 0.1, 1] + } + }, + "K-NN": { + "model": KNeighborsClassifier(), + "params": { + "classifier__n_neighbors": [3, 5, 7, 9] + } + }, + "Decision Tree": { + "model": DecisionTreeClassifier(), + "params": { + "classifier__max_depth": [3, 5, 7, 10] + } + } +} + +# Dynamic number of splits for K-Fold +n_splits_values = [5, 10] +model_results = [] + +try: + for n_splits in n_splits_values: + logging.info(f"Evaluating models with n_splits={n_splits}") + kf = KFold(n_splits=n_splits, shuffle=True, random_state=42) + + # Perform K-Fold Cross-Validation + for name, model_info in model_params.items(): + pipeline = Pipeline([ + ('scaler', StandardScaler()), + ('classifier', model_info['model']) + ]) + grid_search = GridSearchCV(pipeline, model_info['params'], cv=kf, n_jobs=-1, verbose=1, return_train_score=True) + grid_search.fit(X, y) + mean_val_score = np.mean(grid_search.cv_results_['mean_test_score']) + std_val_score = np.std(grid_search.cv_results_['mean_test_score']) + mean_train_score = np.mean(grid_search.cv_results_['mean_train_score']) + + logging.info(f"{name} (n_splits={n_splits}): Best Score = {grid_search.best_score_:.4f}, Mean Validation Score = {mean_val_score:.4f}, Std Dev = {std_val_score:.4f}") + logging.info(f"Training Performance: Mean Train Score = {mean_train_score:.4f}") + + model_results.append({ + 'name': name, + 'grid_search': grid_search, + 'n_splits': n_splits, + 'mean_val_score': mean_val_score, + 'std_val_score': std_val_score + }) + + # Bootstrap with .632 adjustment + n_iterations = 100 + bootstrap_scores = {} + scaler = StandardScaler() + + for name, model_info in model_params.items(): + logging.info(f"Running Bootstrap for model: {name}") + model = model_info['model'] + if isinstance(model, LogisticRegression): + model.set_params(max_iter=5000, tol=0.01, solver='saga') + + pipeline = make_pipeline(scaler, model) + scores = [] + for i in range(n_iterations): + if i % 10 == 0: + logging.info(f"Iteration {i}/{n_iterations} for {name}") + X_sample, y_sample = resample(X, y, n_samples=len(X)) + pipeline.fit(X_sample, y_sample) + y_pred = pipeline.predict(X) + err = 1 - accuracy_score(y, y_pred) + loo_err = err + err_632 = 0.368 * err + 0.632 * loo_err + scores.append(1 - err_632) + bootstrap_scores[name] = scores + logging.info(f"Completed Bootstrap for model: {name}. Mean Score: {np.mean(scores):.4f}") + + # Write results to report + report_path = os.path.join(reports_dir, "model_selection_report.txt") + with open(report_path, "w") as report_file: + report_file.write("Model Selection Report\n") + report_file.write("======================\n\n") + report_file.write("K-Fold Cross-Validation Results:\n") + for result in model_results: + report_file.write(f"Model: {result['name']} (n_splits={result['n_splits']})\n") + report_file.write(f" - Best Score: {result['grid_search'].best_score_:.4f}\n") + report_file.write(f" - Mean Validation Score: {result['mean_val_score']:.4f}\n") + report_file.write(f" - Std Dev: {result['std_val_score']:.4f}\n\n") + + report_file.write("Bootstrap Results:\n") + for model_name, scores in bootstrap_scores.items(): + report_file.write(f"Model: {model_name}\n") + report_file.write(f" - Mean Score: {np.mean(scores):.4f}\n") + report_file.write(f" - Std Dev: {np.std(scores):.4f}\n\n") + + # Determine best models + best_kfold_model = max(model_results, key=lambda x: x['grid_search'].best_score_) + best_bootstrap_model = max(bootstrap_scores.items(), key=lambda x: np.mean(x[1])) + + logging.info(f"Best Model (K-Fold): {best_kfold_model['name']} with Score: {best_kfold_model['grid_search'].best_score_:.4f}") + logging.info(f"Best Model (Bootstrap): {best_bootstrap_model[0]} with Score: {np.mean(best_bootstrap_model[1]):.4f}") + + # Visualizations + fig1, axs1 = plt.subplots(1, 2, figsize=(16, 6)) + fig2, axs2 = plt.subplots(1, 2, figsize=(16, 6)) + + # K-Fold Visualization + kfold_means = [result['mean_val_score'] for result in model_results if result['n_splits'] == 5] + kfold_stds = [result['std_val_score'] for result in model_results if result['n_splits'] == 5] + models = [result['name'] for result in model_results if result['n_splits'] == 5] + sns.barplot(x=models, y=kfold_means, ax=axs1[0]) + axs1[0].set_title('Mean ± Std Dev of K-Fold Scores (n_splits=5)') + axs1[0].set_xlabel('Model') + axs1[0].set_ylabel('Score') + axs1[0].errorbar(range(len(kfold_means)), kfold_means, yerr=kfold_stds, fmt='o', color='black') + + for n_splits in n_splits_values: + scores = [result['mean_val_score'] for result in model_results if result['n_splits'] == n_splits] + models = [result['name'] for result in model_results if result['n_splits'] == n_splits] + axs1[1].plot(models, scores, marker='o', label=f'n_splits={n_splits}') + axs1[1].set_title('K-Fold Scores Across Splits') + axs1[1].legend() + + # Save K-Fold visualization + fig1.savefig(os.path.join(figures_dir, "kfold_visualization.png")) + + # Bootstrap Visualizations + for name, scores in bootstrap_scores.items(): + axs2[0].plot(range(n_iterations), scores, label=name) + axs2[0].set_title('Bootstrap Accuracy Trends') + axs2[0].legend() + + scatter_model = list(bootstrap_scores.keys())[0] + axs2[1].scatter(range(len(bootstrap_scores[scatter_model])), bootstrap_scores[scatter_model], alpha=0.5) + axs2[1].set_title(f'Scatter Plot of Bootstrap Scores ({scatter_model})') + + # Save Bootstrap visualization + fig2.savefig(os.path.join(figures_dir, "bootstrap_visualization.png")) + logging.info("All visualizations saved successfully.") +except Exception as e: + logging.error(f"An error occurred: {e}") + raise diff --git a/README.md b/README.md index f746e56..3210b24 100644 --- a/README.md +++ b/README.md @@ -1,29 +1,122 @@ -# Project 2 +### **Model Selection using K-Fold Cross-Validation and Bootstrap** -Select one of the following two options: +--- -## Boosting Trees +### **Project Overview** -Implement the gradient-boosting tree algorithm (with the usual fit-predict interface) as described in Sections 10.9-10.10 of Elements of Statistical Learning (2nd Edition). Answer the questions below as you did for Project 1. +This project evaluates machine learning models using **K-Fold Cross-Validation** and **Bootstrap Resampling**. It aims to select the best model based on predictive performance and robustness. The evaluation is performed on the **Digits dataset** using hyperparameter tuning for optimal results. -Put your README below. Answer the following questions. +The following classifiers are included: +- Random Forest +- Logistic Regression +- Support Vector Machines (SVM) +- K-Neighbors Classifier +- Decision Tree -* What does the model you have implemented do and when should it be used? -* How did you test your model to determine if it is working reasonably correctly? -* What parameters have you exposed to users of your implementation in order to tune performance? (Also perhaps provide some basic usage examples.) -* Are there specific inputs that your implementation has trouble with? Given more time, could you work around these or is it fundamental? +Logs, reports, and visualizations provide detailed insights into model performance. -## Model Selection +--- -Implement generic k-fold cross-validation and bootstrapping model selection methods. +### **How to Run the Code** -In your README, answer the following questions: +#### **Prerequisites** +1. **Python Version**: Ensure Python 3.10 or higher is installed. +2. **Dependencies**: Install required libraries with: + ```bash + pip install -r requirements.txt + ``` -* Do your cross-validation and bootstrapping model selectors agree with a simpler model selector like AIC in simple cases (like linear regression)? -* In what cases might the methods you've written fail or give incorrect or undesirable results? -* What could you implement given more time to mitigate these cases or help users of your methods? -* What parameters have you exposed to your users in order to use your model selectors. +#### **Execution Steps** +1. Clone the repository or download the Python script. +2. Navigate to the script directory. +3. Run the script using: + ```bash + python Project2_KCross_Bootstrap.py + ``` -See sections 7.10-7.11 of Elements of Statistical Learning and the lecture notes. Pay particular attention to Section 7.10.2. +#### **Outputs** +- Logs: Saved in `logs/debug.log`. +- Reports: Stored in `results/`. +- Visualizations: Saved as `.png` files in `figures/`. -As usual, above-and-beyond efforts will be considered for bonus points. +--- + +### **Answers to Key Questions** + +#### **1. Do your cross-validation and bootstrapping model selectors agree with a simpler model selector like AIC in simple cases (like linear regression)?** + +Yes, in simpler cases such as linear regression, both **K-Fold Cross-Validation** and **Bootstrap Resampling** align well with simpler model selectors like **AIC**. These methods focus on evaluating model performance, balancing complexity and predictive accuracy: +- **K-Fold Cross-Validation** evaluates models across multiple splits, providing stable estimates of performance variability. +- **Bootstrap Resampling** assesses generalization by evaluating predictions on resampled data. + +While AIC relies on assumptions like model linearity, K-Fold and Bootstrap are more versatile, making them suitable for evaluating a wider range of models. + +#### **2. In what cases might the methods you've written fail or give incorrect or undesirable results?** + +The methods may face challenges in the following cases: +- **Imbalanced Datasets**: K-Fold might fail to preserve class distributions across splits, leading to misleading results. +- **Small Datasets**: Both methods can struggle with small datasets. K-Fold may lose critical data in splits, while Bootstrap may produce overly optimistic estimates. +- **Overfitting During Tuning**: Excessive hyperparameter tuning in K-Fold can lead to overfitting, causing poor performance on unseen data. + +Such issues can result in models being incorrectly evaluated, favoring those that perform well on the validation data but fail on unseen data. + +#### **3. What could you implement given more time to mitigate these cases or help users of your methods?** + +With more time, the following improvements could be implemented: +1. **Stratified K-Fold**: Automatically preserve class distributions across folds for imbalanced datasets. +2. **Nested Cross-Validation**: Separate hyperparameter tuning and evaluation to avoid overfitting and provide a true estimate of model performance. +3. **Advanced Bootstrap Adjustments**: + - `.632+ Bootstrap` for more realistic error estimation. + - Custom sampling ratios for flexible resampling. +4. **Additional Metrics**: Provide F1-score, ROC-AUC, or precision-recall curves for a more nuanced evaluation. +5. **Automated Error Analysis**: Generate reports highlighting misclassification patterns and critical features. + +#### **4. What parameters have you exposed to your users in order to use your model selectors?** + +The program provides flexibility by exposing the following parameters: +- **K-Fold Parameters**: + - Number of splits (`n_splits_values`): Users can specify split sizes, such as 5 or 10. +- **Bootstrap Parameters**: + - Number of iterations (`n_iterations`): Control the resampling count (default: 100). +- **Model Hyperparameters**: + - Random Forest: `n_estimators` (number of trees). + - Logistic Regression: Regularization parameter `C`. + - SVM: Regularization parameter `C` and kernel coefficient `gamma`. + - Decision Tree: Maximum depth (`max_depth`). + - K-Neighbors: Number of neighbors (`n_neighbors`). +- **Output Controls**: + - Logs: Debug information and runtime statistics saved in `logs/`. + - Reports: Summaries of metrics and hyperparameters stored in `results/`. + +--- + +### **Expected Outputs** + +1. **Logs**: + - Debug and runtime logs are saved to `logs/debug.log`. + +2. **Reports**: + - K-Fold results with metrics and hyperparameters: `results/kfold_results.txt`. + - Bootstrap results with mean and standard deviation: `results/bootstrap_results.txt`. + +3. **Visualizations**: + - **K-Fold Visualizations**: + - Mean ± Standard Deviation of scores across models. + - Scores across different fold sizes. + - **Bootstrap Visualizations**: + - Trends of accuracy over iterations. + - Distribution of bootstrap scores. + +All figures are saved as `.png` files in the `figures/` directory. + +--- + +### **Summary** + +This program provides robust model evaluation using K-Fold Cross-Validation and Bootstrap Resampling. It ensures flexibility for various datasets and offers detailed logs, reports, and visualizations, helping users make informed model selection decisions. + + +### **Team members** +### **Krishna Manideep Malladi (A20550891)** +### **Manvitha Byrineni (A20550783)** +### **Udaya sree Vankdavath (A20552992)** diff --git a/logs/execution.log b/logs/execution.log new file mode 100644 index 0000000..e005ab3 --- /dev/null +++ b/logs/execution.log @@ -0,0 +1,87 @@ +2024-11-21 22:30:01,953 - INFO - Execution started. +2024-11-21 22:30:01,960 - INFO - Dataset loaded successfully. +2024-11-21 22:30:01,960 - INFO - Evaluating models with n_splits=5 +2024-11-21 22:30:04,701 - INFO - Random Forest (n_splits=5): Best Score = 0.9738, Mean Validation Score = 0.9656, Std Dev = 0.0120 +2024-11-21 22:30:04,701 - INFO - Training Performance: Mean Train Score = 0.9999 +2024-11-21 22:30:04,811 - INFO - Logistic Regression (n_splits=5): Best Score = 0.9638, Mean Validation Score = 0.9623, Std Dev = 0.0020 +2024-11-21 22:30:04,811 - INFO - Training Performance: Mean Train Score = 0.9968 +2024-11-21 22:30:08,246 - INFO - SVM (n_splits=5): Best Score = 0.9811, Mean Validation Score = 0.6861, Std Dev = 0.3746 +2024-11-21 22:30:08,246 - INFO - Training Performance: Mean Train Score = 0.8812 +2024-11-21 22:30:08,366 - INFO - K-NN (n_splits=5): Best Score = 0.9750, Mean Validation Score = 0.9743, Std Dev = 0.0009 +2024-11-21 22:30:08,366 - INFO - Training Performance: Mean Train Score = 0.9835 +2024-11-21 22:30:08,443 - INFO - Decision Tree (n_splits=5): Best Score = 0.8614, Mean Validation Score = 0.7055, Std Dev = 0.1550 +2024-11-21 22:30:08,443 - INFO - Training Performance: Mean Train Score = 0.7670 +2024-11-21 22:30:08,443 - INFO - Evaluating models with n_splits=10 +2024-11-21 22:30:09,950 - INFO - Random Forest (n_splits=10): Best Score = 0.9789, Mean Validation Score = 0.9693, Std Dev = 0.0111 +2024-11-21 22:30:09,950 - INFO - Training Performance: Mean Train Score = 0.9999 +2024-11-21 22:30:10,101 - INFO - Logistic Regression (n_splits=10): Best Score = 0.9655, Mean Validation Score = 0.9633, Std Dev = 0.0027 +2024-11-21 22:30:10,101 - INFO - Training Performance: Mean Train Score = 0.9966 +2024-11-21 22:30:17,853 - INFO - SVM (n_splits=10): Best Score = 0.9839, Mean Validation Score = 0.6919, Std Dev = 0.3769 +2024-11-21 22:30:17,853 - INFO - Training Performance: Mean Train Score = 0.8883 +2024-11-21 22:30:17,990 - INFO - K-NN (n_splits=10): Best Score = 0.9777, Mean Validation Score = 0.9763, Std Dev = 0.0013 +2024-11-21 22:30:17,990 - INFO - Training Performance: Mean Train Score = 0.9845 +2024-11-21 22:30:18,122 - INFO - Decision Tree (n_splits=10): Best Score = 0.8481, Mean Validation Score = 0.6949, Std Dev = 0.1572 +2024-11-21 22:30:18,122 - INFO - Training Performance: Mean Train Score = 0.7630 +2024-11-21 22:30:18,122 - INFO - Running Bootstrap for model: Random Forest +2024-11-21 22:30:18,122 - INFO - Iteration 0/100 for Random Forest +2024-11-21 22:30:19,901 - INFO - Iteration 10/100 for Random Forest +2024-11-21 22:30:21,735 - INFO - Iteration 20/100 for Random Forest +2024-11-21 22:30:23,503 - INFO - Iteration 30/100 for Random Forest +2024-11-21 22:30:25,297 - INFO - Iteration 40/100 for Random Forest +2024-11-21 22:30:27,052 - INFO - Iteration 50/100 for Random Forest +2024-11-21 22:30:28,822 - INFO - Iteration 60/100 for Random Forest +2024-11-21 22:30:30,660 - INFO - Iteration 70/100 for Random Forest +2024-11-21 22:30:32,434 - INFO - Iteration 80/100 for Random Forest +2024-11-21 22:30:34,208 - INFO - Iteration 90/100 for Random Forest +2024-11-21 22:30:35,971 - INFO - Completed Bootstrap for model: Random Forest. Mean Score: 0.9898 +2024-11-21 22:30:35,971 - INFO - Running Bootstrap for model: Logistic Regression +2024-11-21 22:30:35,972 - INFO - Iteration 0/100 for Logistic Regression +2024-11-21 22:30:36,608 - INFO - Iteration 10/100 for Logistic Regression +2024-11-21 22:30:37,278 - INFO - Iteration 20/100 for Logistic Regression +2024-11-21 22:30:37,938 - INFO - Iteration 30/100 for Logistic Regression +2024-11-21 22:30:38,613 - INFO - Iteration 40/100 for Logistic Regression +2024-11-21 22:30:39,244 - INFO - Iteration 50/100 for Logistic Regression +2024-11-21 22:30:39,898 - INFO - Iteration 60/100 for Logistic Regression +2024-11-21 22:30:40,536 - INFO - Iteration 70/100 for Logistic Regression +2024-11-21 22:30:41,204 - INFO - Iteration 80/100 for Logistic Regression +2024-11-21 22:30:41,877 - INFO - Iteration 90/100 for Logistic Regression +2024-11-21 22:30:42,563 - INFO - Completed Bootstrap for model: Logistic Regression. Mean Score: 0.9721 +2024-11-21 22:30:42,563 - INFO - Running Bootstrap for model: SVM +2024-11-21 22:30:42,563 - INFO - Iteration 0/100 for SVM +2024-11-21 22:30:43,764 - INFO - Iteration 10/100 for SVM +2024-11-21 22:30:44,974 - INFO - Iteration 20/100 for SVM +2024-11-21 22:30:46,125 - INFO - Iteration 30/100 for SVM +2024-11-21 22:30:47,290 - INFO - Iteration 40/100 for SVM +2024-11-21 22:30:48,462 - INFO - Iteration 50/100 for SVM +2024-11-21 22:30:49,648 - INFO - Iteration 60/100 for SVM +2024-11-21 22:30:50,858 - INFO - Iteration 70/100 for SVM +2024-11-21 22:30:52,072 - INFO - Iteration 80/100 for SVM +2024-11-21 22:30:53,258 - INFO - Iteration 90/100 for SVM +2024-11-21 22:30:54,453 - INFO - Completed Bootstrap for model: SVM. Mean Score: 0.9911 +2024-11-21 22:30:54,453 - INFO - Running Bootstrap for model: K-NN +2024-11-21 22:30:54,453 - INFO - Iteration 0/100 for K-NN +2024-11-21 22:30:54,565 - INFO - Iteration 10/100 for K-NN +2024-11-21 22:30:54,644 - INFO - Iteration 20/100 for K-NN +2024-11-21 22:30:54,725 - INFO - Iteration 30/100 for K-NN +2024-11-21 22:30:54,803 - INFO - Iteration 40/100 for K-NN +2024-11-21 22:30:54,882 - INFO - Iteration 50/100 for K-NN +2024-11-21 22:30:54,959 - INFO - Iteration 60/100 for K-NN +2024-11-21 22:30:55,041 - INFO - Iteration 70/100 for K-NN +2024-11-21 22:30:55,121 - INFO - Iteration 80/100 for K-NN +2024-11-21 22:30:55,203 - INFO - Iteration 90/100 for K-NN +2024-11-21 22:30:55,283 - INFO - Completed Bootstrap for model: K-NN. Mean Score: 0.9771 +2024-11-21 22:30:55,283 - INFO - Running Bootstrap for model: Decision Tree +2024-11-21 22:30:55,283 - INFO - Iteration 0/100 for Decision Tree +2024-11-21 22:30:55,419 - INFO - Iteration 10/100 for Decision Tree +2024-11-21 22:30:55,551 - INFO - Iteration 20/100 for Decision Tree +2024-11-21 22:30:55,683 - INFO - Iteration 30/100 for Decision Tree +2024-11-21 22:30:55,817 - INFO - Iteration 40/100 for Decision Tree +2024-11-21 22:30:55,954 - INFO - Iteration 50/100 for Decision Tree +2024-11-21 22:30:56,087 - INFO - Iteration 60/100 for Decision Tree +2024-11-21 22:30:56,219 - INFO - Iteration 70/100 for Decision Tree +2024-11-21 22:30:56,353 - INFO - Iteration 80/100 for Decision Tree +2024-11-21 22:30:56,485 - INFO - Iteration 90/100 for Decision Tree +2024-11-21 22:30:56,618 - INFO - Completed Bootstrap for model: Decision Tree. Mean Score: 0.9420 +2024-11-21 22:30:56,618 - INFO - Best Model (K-Fold): SVM with Score: 0.9839 +2024-11-21 22:30:56,618 - INFO - Best Model (Bootstrap): SVM with Score: 0.9911 +2024-11-21 22:30:57,372 - INFO - All visualizations saved successfully. diff --git a/requirement.txt b/requirement.txt new file mode 100644 index 0000000..f22161b --- /dev/null +++ b/requirement.txt @@ -0,0 +1,5 @@ +numpy==1.23.5 +matplotlib==3.7.2 +seaborn==0.12.2 +pandas==1.5.3 +scikit-learn==1.3.2 diff --git a/results/figures/bootstrap_visualization.png b/results/figures/bootstrap_visualization.png new file mode 100644 index 0000000..1c8552a Binary files /dev/null and b/results/figures/bootstrap_visualization.png differ diff --git a/results/figures/kfold_visualization.png b/results/figures/kfold_visualization.png new file mode 100644 index 0000000..6fadba6 Binary files /dev/null and b/results/figures/kfold_visualization.png differ diff --git a/results/figures/new b/results/figures/new new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/results/figures/new @@ -0,0 +1 @@ + diff --git a/results/reports/model_selection_report.txt b/results/reports/model_selection_report.txt new file mode 100644 index 0000000..e6f8d7a --- /dev/null +++ b/results/reports/model_selection_report.txt @@ -0,0 +1,75 @@ +Model Selection Report +====================== + +K-Fold Cross-Validation Results: +Model: Random Forest (n_splits=5) + - Best Score: 0.9738 + - Mean Validation Score: 0.9656 + - Std Dev: 0.0120 + +Model: Logistic Regression (n_splits=5) + - Best Score: 0.9638 + - Mean Validation Score: 0.9623 + - Std Dev: 0.0020 + +Model: SVM (n_splits=5) + - Best Score: 0.9811 + - Mean Validation Score: 0.6861 + - Std Dev: 0.3746 + +Model: K-NN (n_splits=5) + - Best Score: 0.9750 + - Mean Validation Score: 0.9743 + - Std Dev: 0.0009 + +Model: Decision Tree (n_splits=5) + - Best Score: 0.8614 + - Mean Validation Score: 0.7055 + - Std Dev: 0.1550 + +Model: Random Forest (n_splits=10) + - Best Score: 0.9789 + - Mean Validation Score: 0.9693 + - Std Dev: 0.0111 + +Model: Logistic Regression (n_splits=10) + - Best Score: 0.9655 + - Mean Validation Score: 0.9633 + - Std Dev: 0.0027 + +Model: SVM (n_splits=10) + - Best Score: 0.9839 + - Mean Validation Score: 0.6919 + - Std Dev: 0.3769 + +Model: K-NN (n_splits=10) + - Best Score: 0.9777 + - Mean Validation Score: 0.9763 + - Std Dev: 0.0013 + +Model: Decision Tree (n_splits=10) + - Best Score: 0.8481 + - Mean Validation Score: 0.6949 + - Std Dev: 0.1572 + +Bootstrap Results: +Model: Random Forest + - Mean Score: 0.9898 + - Std Dev: 0.0022 + +Model: Logistic Regression + - Mean Score: 0.9721 + - Std Dev: 0.0039 + +Model: SVM + - Mean Score: 0.9911 + - Std Dev: 0.0020 + +Model: K-NN + - Mean Score: 0.9771 + - Std Dev: 0.0027 + +Model: Decision Tree + - Mean Score: 0.9420 + - Std Dev: 0.0060 +