From 397d7e4ca63d139eadadd01f70263aa4ca7f0a8b Mon Sep 17 00:00:00 2001
From: ceciliabetek <ceciliabetek@gmail.com>
Date: Wed, 11 Mar 2026 20:05:30 +0100
Subject: [PATCH] Update lab-hyper-tuning.ipynb

---
 lab-hyper-tuning.ipynb | 419 ++++++++---------------------------------
 1 file changed, 77 insertions(+), 342 deletions(-)
diff --git a/lab-hyper-tuning.ipynb b/lab-hyper-tuning.ipynb
index 847d487..56486fa 100644
--- a/lab-hyper-tuning.ipynb
+++ b/lab-hyper-tuning.ipynb
@@ -1,343 +1,78 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# LAB | Hyperparameter Tuning"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "**Load the data**\n",
-    "\n",
-    "Finally step in order to maximize the performance on your Spaceship Titanic model.\n",
-    "\n",
-    "The data can be found here:\n",
-    "\n",
-    "https://raw.githubusercontent.com/data-bootcamp-v4/data/main/spaceship_titanic.csv\n",
-    "\n",
-    "Metadata\n",
-    "\n",
-    "https://github.com/data-bootcamp-v4/data/blob/main/spaceship_titanic.md"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "So far we've been training and evaluating models with default values for hyperparameters.\n",
-    "\n",
-    "Today we will perform the same feature engineering as before, and then compare the best working models you got so far, but now fine tuning it's hyperparameters."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#Libraries\n",
-    "import pandas as pd\n",
-    "import numpy as np\n",
-    "from sklearn.model_selection import train_test_split"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>PassengerId</th>\n",
-       "      <th>HomePlanet</th>\n",
-       "      <th>CryoSleep</th>\n",
-       "      <th>Cabin</th>\n",
-       "      <th>Destination</th>\n",
-       "      <th>Age</th>\n",
-       "      <th>VIP</th>\n",
-       "      <th>RoomService</th>\n",
-       "      <th>FoodCourt</th>\n",
-       "      <th>ShoppingMall</th>\n",
-       "      <th>Spa</th>\n",
-       "      <th>VRDeck</th>\n",
-       "      <th>Name</th>\n",
-       "      <th>Transported</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>0001_01</td>\n",
-       "      <td>Europa</td>\n",
-       "      <td>False</td>\n",
-       "      <td>B/0/P</td>\n",
-       "      <td>TRAPPIST-1e</td>\n",
-       "      <td>39.0</td>\n",
-       "      <td>False</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>Maham Ofracculy</td>\n",
-       "      <td>False</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>0002_01</td>\n",
-       "      <td>Earth</td>\n",
-       "      <td>False</td>\n",
-       "      <td>F/0/S</td>\n",
-       "      <td>TRAPPIST-1e</td>\n",
-       "      <td>24.0</td>\n",
-       "      <td>False</td>\n",
-       "      <td>109.0</td>\n",
-       "      <td>9.0</td>\n",
-       "      <td>25.0</td>\n",
-       "      <td>549.0</td>\n",
-       "      <td>44.0</td>\n",
-       "      <td>Juanna Vines</td>\n",
-       "      <td>True</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>0003_01</td>\n",
-       "      <td>Europa</td>\n",
-       "      <td>False</td>\n",
-       "      <td>A/0/S</td>\n",
-       "      <td>TRAPPIST-1e</td>\n",
-       "      <td>58.0</td>\n",
-       "      <td>True</td>\n",
-       "      <td>43.0</td>\n",
-       "      <td>3576.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>6715.0</td>\n",
-       "      <td>49.0</td>\n",
-       "      <td>Altark Susent</td>\n",
-       "      <td>False</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>0003_02</td>\n",
-       "      <td>Europa</td>\n",
-       "      <td>False</td>\n",
-       "      <td>A/0/S</td>\n",
-       "      <td>TRAPPIST-1e</td>\n",
-       "      <td>33.0</td>\n",
-       "      <td>False</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>1283.0</td>\n",
-       "      <td>371.0</td>\n",
-       "      <td>3329.0</td>\n",
-       "      <td>193.0</td>\n",
-       "      <td>Solam Susent</td>\n",
-       "      <td>False</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>0004_01</td>\n",
-       "      <td>Earth</td>\n",
-       "      <td>False</td>\n",
-       "      <td>F/1/S</td>\n",
-       "      <td>TRAPPIST-1e</td>\n",
-       "      <td>16.0</td>\n",
-       "      <td>False</td>\n",
-       "      <td>303.0</td>\n",
-       "      <td>70.0</td>\n",
-       "      <td>151.0</td>\n",
-       "      <td>565.0</td>\n",
-       "      <td>2.0</td>\n",
-       "      <td>Willy Santantines</td>\n",
-       "      <td>True</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "  PassengerId HomePlanet CryoSleep  Cabin  Destination   Age    VIP  \\\n",
-       "0     0001_01     Europa     False  B/0/P  TRAPPIST-1e  39.0  False   \n",
-       "1     0002_01      Earth     False  F/0/S  TRAPPIST-1e  24.0  False   \n",
-       "2     0003_01     Europa     False  A/0/S  TRAPPIST-1e  58.0   True   \n",
-       "3     0003_02     Europa     False  A/0/S  TRAPPIST-1e  33.0  False   \n",
-       "4     0004_01      Earth     False  F/1/S  TRAPPIST-1e  16.0  False   \n",
-       "\n",
-       "   RoomService  FoodCourt  ShoppingMall     Spa  VRDeck               Name  \\\n",
-       "0          0.0        0.0           0.0     0.0     0.0    Maham Ofracculy   \n",
-       "1        109.0        9.0          25.0   549.0    44.0       Juanna Vines   \n",
-       "2         43.0     3576.0           0.0  6715.0    49.0      Altark Susent   \n",
-       "3          0.0     1283.0         371.0  3329.0   193.0       Solam Susent   \n",
-       "4        303.0       70.0         151.0   565.0     2.0  Willy Santantines   \n",
-       "\n",
-       "   Transported  \n",
-       "0        False  \n",
-       "1         True  \n",
-       "2        False  \n",
-       "3        False  \n",
-       "4         True  "
-      ]
-     },
-     "execution_count": 2,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "spaceship = pd.read_csv(\"https://raw.githubusercontent.com/data-bootcamp-v4/data/main/spaceship_titanic.csv\")\n",
-    "spaceship.head()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Now perform the same as before:\n",
-    "- Feature Scaling\n",
-    "- Feature Selection\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 9,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#your code here"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "- Now let's use the best model we got so far in order to see how it can improve when we fine tune it's hyperparameters."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#your code here"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "- Evaluate your model"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#your code here"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "**Grid/Random Search**"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "For this lab we will use Grid Search."
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "- Define hyperparameters to fine tune."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#your code here"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "- Run Grid Search"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "- Evaluate your model"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.10.9"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
+# Libraries
+import pandas as pd
+import numpy as np
+from sklearn.model_selection import train_test_split, GridSearchCV
+from sklearn.preprocessing import StandardScaler
+from sklearn.feature_selection import SelectKBest, f_classif
+from sklearn.ensemble import RandomForestClassifier
+from sklearn.metrics import accuracy_score
+
+# 1️⃣ Load data
+spaceship = pd.read_csv("https://raw.githubusercontent.com/data-bootcamp-v4/data/main/spaceship_titanic.csv")
+
+# Drop columns not useful
+spaceship = spaceship.drop(["PassengerId", "Name", "Cabin"], axis=1)
+
+# Convert boolean to int
+spaceship["CryoSleep"] = spaceship["CryoSleep"].astype(float)
+spaceship["VIP"] = spaceship["VIP"].astype(float)
+spaceship["Transported"] = spaceship["Transported"].astype(int)
+
+# Handle missing values
+spaceship = spaceship.fillna(0)
+
+# One-hot encoding for categorical variables
+spaceship = pd.get_dummies(spaceship, columns=["HomePlanet","Destination"], drop_first=True)
+
+# 2️⃣ Split X and y
+X = spaceship.drop("Transported", axis=1)
+y = spaceship["Transported"]
+
+X_train, X_test, y_train, y_test = train_test_split(
+    X, y, test_size=0.2, random_state=42
+)
+
+# 3️⃣ Feature Scaling
+scaler = StandardScaler()
+X_train_scaled = scaler.fit_transform(X_train)
+X_test_scaled = scaler.transform(X_test)
+
+# 4️⃣ Feature Selection
+selector = SelectKBest(score_func=f_classif, k=10)
+X_train_selected = selector.fit_transform(X_train_scaled, y_train)
+X_test_selected = selector.transform(X_test_scaled)
+
+# 5️⃣ Baseline model
+model = RandomForestClassifier(random_state=42)
+model.fit(X_train_selected, y_train)
+
+# 6️⃣ Evaluation baseline
+y_pred = model.predict(X_test_selected)
+accuracy = accuracy_score(y_test, y_pred)
+print("Baseline accuracy:", accuracy)
+
+# 7️⃣ Define hyperparameters for Grid Search
+param_grid = {
+    "n_estimators": [100, 200, 300],
+    "max_depth": [None, 10, 20],
+    "min_samples_split": [2, 5, 10],
+    "min_samples_leaf": [1, 2, 4]
 }
+
+# 8️⃣ Grid Search
+grid_search = GridSearchCV(
+    estimator=RandomForestClassifier(random_state=42),
+    param_grid=param_grid,
+    cv=5,
+    scoring="accuracy",
+    n_jobs=-1
+)
+grid_search.fit(X_train_selected, y_train)
+
+print("Best parameters:", grid_search.best_params_)
+
+# 9️⃣ Evaluation best model
+best_model = grid_search.best_estimator_
+y_pred = best_model.predict(X_test_selected)
+accuracy = accuracy_score(y_test, y_pred)
+print("Best model accuracy:", accuracy)

	PassengerId	HomePlanet	CryoSleep	Cabin	Destination	Age	VIP	RoomService	FoodCourt	ShoppingMall	Spa	VRDeck	Name	Transported
0	0001_01	Europa	False	B/0/P	TRAPPIST-1e	39.0	False	0.0	0.0	0.0	0.0	0.0	Maham Ofracculy	False
1	0002_01	Earth	False	F/0/S	TRAPPIST-1e	24.0	False	109.0	9.0	25.0	549.0	44.0	Juanna Vines	True
2	0003_01	Europa	False	A/0/S	TRAPPIST-1e	58.0	True	43.0	3576.0	0.0	6715.0	49.0	Altark Susent	False
3	0003_02	Europa	False	A/0/S	TRAPPIST-1e	33.0	False	0.0	1283.0	371.0	3329.0	193.0	Solam Susent	False
4	0004_01	Earth	False	F/1/S	TRAPPIST-1e	16.0	False	303.0	70.0	151.0	565.0	2.0	Willy Santantines	True