diff --git a/lab-hyper-tuning.ipynb b/lab-hyper-tuning.ipynb
index 847d487..56486fa 100644
--- a/lab-hyper-tuning.ipynb
+++ b/lab-hyper-tuning.ipynb
@@ -1,343 +1,78 @@
-{
- "cells": [
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "# LAB | Hyperparameter Tuning"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "**Load the data**\n",
- "\n",
- "Finally step in order to maximize the performance on your Spaceship Titanic model.\n",
- "\n",
- "The data can be found here:\n",
- "\n",
- "https://raw.githubusercontent.com/data-bootcamp-v4/data/main/spaceship_titanic.csv\n",
- "\n",
- "Metadata\n",
- "\n",
- "https://github.com/data-bootcamp-v4/data/blob/main/spaceship_titanic.md"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "So far we've been training and evaluating models with default values for hyperparameters.\n",
- "\n",
- "Today we will perform the same feature engineering as before, and then compare the best working models you got so far, but now fine tuning it's hyperparameters."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 1,
- "metadata": {},
- "outputs": [],
- "source": [
- "#Libraries\n",
- "import pandas as pd\n",
- "import numpy as np\n",
- "from sklearn.model_selection import train_test_split"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 2,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "
\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " PassengerId | \n",
- " HomePlanet | \n",
- " CryoSleep | \n",
- " Cabin | \n",
- " Destination | \n",
- " Age | \n",
- " VIP | \n",
- " RoomService | \n",
- " FoodCourt | \n",
- " ShoppingMall | \n",
- " Spa | \n",
- " VRDeck | \n",
- " Name | \n",
- " Transported | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " | 0 | \n",
- " 0001_01 | \n",
- " Europa | \n",
- " False | \n",
- " B/0/P | \n",
- " TRAPPIST-1e | \n",
- " 39.0 | \n",
- " False | \n",
- " 0.0 | \n",
- " 0.0 | \n",
- " 0.0 | \n",
- " 0.0 | \n",
- " 0.0 | \n",
- " Maham Ofracculy | \n",
- " False | \n",
- "
\n",
- " \n",
- " | 1 | \n",
- " 0002_01 | \n",
- " Earth | \n",
- " False | \n",
- " F/0/S | \n",
- " TRAPPIST-1e | \n",
- " 24.0 | \n",
- " False | \n",
- " 109.0 | \n",
- " 9.0 | \n",
- " 25.0 | \n",
- " 549.0 | \n",
- " 44.0 | \n",
- " Juanna Vines | \n",
- " True | \n",
- "
\n",
- " \n",
- " | 2 | \n",
- " 0003_01 | \n",
- " Europa | \n",
- " False | \n",
- " A/0/S | \n",
- " TRAPPIST-1e | \n",
- " 58.0 | \n",
- " True | \n",
- " 43.0 | \n",
- " 3576.0 | \n",
- " 0.0 | \n",
- " 6715.0 | \n",
- " 49.0 | \n",
- " Altark Susent | \n",
- " False | \n",
- "
\n",
- " \n",
- " | 3 | \n",
- " 0003_02 | \n",
- " Europa | \n",
- " False | \n",
- " A/0/S | \n",
- " TRAPPIST-1e | \n",
- " 33.0 | \n",
- " False | \n",
- " 0.0 | \n",
- " 1283.0 | \n",
- " 371.0 | \n",
- " 3329.0 | \n",
- " 193.0 | \n",
- " Solam Susent | \n",
- " False | \n",
- "
\n",
- " \n",
- " | 4 | \n",
- " 0004_01 | \n",
- " Earth | \n",
- " False | \n",
- " F/1/S | \n",
- " TRAPPIST-1e | \n",
- " 16.0 | \n",
- " False | \n",
- " 303.0 | \n",
- " 70.0 | \n",
- " 151.0 | \n",
- " 565.0 | \n",
- " 2.0 | \n",
- " Willy Santantines | \n",
- " True | \n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " PassengerId HomePlanet CryoSleep Cabin Destination Age VIP \\\n",
- "0 0001_01 Europa False B/0/P TRAPPIST-1e 39.0 False \n",
- "1 0002_01 Earth False F/0/S TRAPPIST-1e 24.0 False \n",
- "2 0003_01 Europa False A/0/S TRAPPIST-1e 58.0 True \n",
- "3 0003_02 Europa False A/0/S TRAPPIST-1e 33.0 False \n",
- "4 0004_01 Earth False F/1/S TRAPPIST-1e 16.0 False \n",
- "\n",
- " RoomService FoodCourt ShoppingMall Spa VRDeck Name \\\n",
- "0 0.0 0.0 0.0 0.0 0.0 Maham Ofracculy \n",
- "1 109.0 9.0 25.0 549.0 44.0 Juanna Vines \n",
- "2 43.0 3576.0 0.0 6715.0 49.0 Altark Susent \n",
- "3 0.0 1283.0 371.0 3329.0 193.0 Solam Susent \n",
- "4 303.0 70.0 151.0 565.0 2.0 Willy Santantines \n",
- "\n",
- " Transported \n",
- "0 False \n",
- "1 True \n",
- "2 False \n",
- "3 False \n",
- "4 True "
- ]
- },
- "execution_count": 2,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "spaceship = pd.read_csv(\"https://raw.githubusercontent.com/data-bootcamp-v4/data/main/spaceship_titanic.csv\")\n",
- "spaceship.head()"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "Now perform the same as before:\n",
- "- Feature Scaling\n",
- "- Feature Selection\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 9,
- "metadata": {},
- "outputs": [],
- "source": [
- "#your code here"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "- Now let's use the best model we got so far in order to see how it can improve when we fine tune it's hyperparameters."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "#your code here"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "- Evaluate your model"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 1,
- "metadata": {},
- "outputs": [],
- "source": [
- "#your code here"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "**Grid/Random Search**"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "For this lab we will use Grid Search."
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "- Define hyperparameters to fine tune."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "#your code here"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "- Run Grid Search"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": []
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "- Evaluate your model"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": []
- }
- ],
- "metadata": {
- "kernelspec": {
- "display_name": "Python 3",
- "language": "python",
- "name": "python3"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.10.9"
- }
- },
- "nbformat": 4,
- "nbformat_minor": 2
+# Libraries
+import pandas as pd
+import numpy as np
+from sklearn.model_selection import train_test_split, GridSearchCV
+from sklearn.preprocessing import StandardScaler
+from sklearn.feature_selection import SelectKBest, f_classif
+from sklearn.ensemble import RandomForestClassifier
+from sklearn.metrics import accuracy_score
+
+# 1️⃣ Load data
+spaceship = pd.read_csv("https://raw.githubusercontent.com/data-bootcamp-v4/data/main/spaceship_titanic.csv")
+
+# Drop columns not useful
+spaceship = spaceship.drop(["PassengerId", "Name", "Cabin"], axis=1)
+
+# Convert boolean to int
+spaceship["CryoSleep"] = spaceship["CryoSleep"].astype(float)
+spaceship["VIP"] = spaceship["VIP"].astype(float)
+spaceship["Transported"] = spaceship["Transported"].astype(int)
+
+# Handle missing values
+spaceship = spaceship.fillna(0)
+
+# One-hot encoding for categorical variables
+spaceship = pd.get_dummies(spaceship, columns=["HomePlanet","Destination"], drop_first=True)
+
+# 2️⃣ Split X and y
+X = spaceship.drop("Transported", axis=1)
+y = spaceship["Transported"]
+
+X_train, X_test, y_train, y_test = train_test_split(
+ X, y, test_size=0.2, random_state=42
+)
+
+# 3️⃣ Feature Scaling
+scaler = StandardScaler()
+X_train_scaled = scaler.fit_transform(X_train)
+X_test_scaled = scaler.transform(X_test)
+
+# 4️⃣ Feature Selection
+selector = SelectKBest(score_func=f_classif, k=10)
+X_train_selected = selector.fit_transform(X_train_scaled, y_train)
+X_test_selected = selector.transform(X_test_scaled)
+
+# 5️⃣ Baseline model
+model = RandomForestClassifier(random_state=42)
+model.fit(X_train_selected, y_train)
+
+# 6️⃣ Evaluation baseline
+y_pred = model.predict(X_test_selected)
+accuracy = accuracy_score(y_test, y_pred)
+print("Baseline accuracy:", accuracy)
+
+# 7️⃣ Define hyperparameters for Grid Search
+param_grid = {
+ "n_estimators": [100, 200, 300],
+ "max_depth": [None, 10, 20],
+ "min_samples_split": [2, 5, 10],
+ "min_samples_leaf": [1, 2, 4]
}
+
+# 8️⃣ Grid Search
+grid_search = GridSearchCV(
+ estimator=RandomForestClassifier(random_state=42),
+ param_grid=param_grid,
+ cv=5,
+ scoring="accuracy",
+ n_jobs=-1
+)
+grid_search.fit(X_train_selected, y_train)
+
+print("Best parameters:", grid_search.best_params_)
+
+# 9️⃣ Evaluation best model
+best_model = grid_search.best_estimator_
+y_pred = best_model.predict(X_test_selected)
+accuracy = accuracy_score(y_test, y_pred)
+print("Best model accuracy:", accuracy)