diff --git a/lab-hyper-tuning.ipynb b/lab-hyper-tuning.ipynb index 847d487..8bb6a63 100644 --- a/lab-hyper-tuning.ipynb +++ b/lab-hyper-tuning.ipynb @@ -35,19 +35,23 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 20, "metadata": {}, "outputs": [], "source": [ "#Libraries\n", "import pandas as pd\n", "import numpy as np\n", - "from sklearn.model_selection import train_test_split" + "from sklearn.model_selection import train_test_split\n", + "from sklearn.preprocessing import StandardScaler\n", + "from sklearn.metrics import classification_report, accuracy_score\n", + "from sklearn.ensemble import GradientBoostingClassifier\n", + "from sklearn.model_selection import GridSearchCV" ] }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 21, "metadata": {}, "outputs": [ { @@ -200,7 +204,7 @@ "4 True " ] }, - "execution_count": 2, + "execution_count": 21, "metadata": {}, "output_type": "execute_result" } @@ -221,11 +225,95 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "PassengerId 0\n", + "HomePlanet 201\n", + "CryoSleep 217\n", + "Cabin 199\n", + "Destination 182\n", + "Age 179\n", + "VIP 203\n", + "RoomService 181\n", + "FoodCourt 183\n", + "ShoppingMall 208\n", + "Spa 183\n", + "VRDeck 188\n", + "Name 200\n", + "Transported 0\n", + "dtype: int64" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "spaceship.isnull().sum()" + ] + }, + { + "cell_type": "code", + "execution_count": 23, "metadata": {}, "outputs": [], "source": [ - "#your code here" + "spaceship = spaceship.dropna()" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [], + "source": [ + "spaceship[['Deck','CabinNum','Side']] = spaceship['Cabin'].str.split('/', expand=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [], + "source": [ + "spaceship = spaceship.drop([\"Cabin\", \"PassengerId\", \"Name\"], axis=1)" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [], + "source": [ + "features = pd.get_dummies(spaceship.drop(\"Transported\", axis=1), drop_first=True)\n", + "target = spaceship[\"Transported\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [], + "source": [ + "X_train, X_test, y_train, y_test = train_test_split(\n", + " features, target, test_size=0.2, random_state=42, stratify=target\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [], + "source": [ + "scaler = StandardScaler()\n", + "X_train_scaled = scaler.fit_transform(X_train)\n", + "X_test_scaled = scaler.transform(X_test)" ] }, { @@ -237,11 +325,756 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 29, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
GradientBoostingClassifier(random_state=42)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + ], + "text/plain": [ + "GradientBoostingClassifier(random_state=42)" + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "#your code here" + "#your code here\n", + "gb = GradientBoostingClassifier(random_state=42)\n", + "gb.fit(X_train_scaled, y_train)" ] }, { @@ -253,11 +1086,32 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 30, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Accuracy base: 0.791981845688351\n", + " precision recall f1-score support\n", + "\n", + " False 0.83 0.73 0.78 656\n", + " True 0.76 0.85 0.80 666\n", + "\n", + " accuracy 0.79 1322\n", + " macro avg 0.80 0.79 0.79 1322\n", + "weighted avg 0.80 0.79 0.79 1322\n", + "\n" + ] + } + ], "source": [ - "#your code here" + "#your code here\n", + "y_pred = gb.predict(X_test_scaled)\n", + "\n", + "print(\"Accuracy base:\", accuracy_score(y_test, y_pred))\n", + "print(classification_report(y_test, y_pred))" ] }, { @@ -283,11 +1137,43 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 31, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'\\nparam_grid = {\\n \"n_estimators\": [100, 200, 300],\\n \"learning_rate\": [0.01, 0.05, 0.1],\\n \"max_depth\": [3, 4, 5],\\n \"min_samples_split\": [2, 5, 10]\\n}\\n'" + ] + }, + "execution_count": 31, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#your code here\n", + "\"\"\"\n", + "param_grid = {\n", + " \"n_estimators\": [100, 200, 300],\n", + " \"learning_rate\": [0.01, 0.05, 0.1],\n", + " \"max_depth\": [3, 4, 5],\n", + " \"min_samples_split\": [2, 5, 10]\n", + "}\n", + "\"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": 32, "metadata": {}, "outputs": [], "source": [ - "#your code here" + "param_grid = {\n", + " \"n_estimators\": [10, 20],\n", + " \"learning_rate\": [0.05, 0.1],\n", + " \"max_depth\": [3, 4]\n", + "}" ] }, { @@ -299,10 +1185,907 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 33, "metadata": {}, "outputs": [], - "source": [] + "source": [ + "grid_search = GridSearchCV(\n", + " estimator=GradientBoostingClassifier(random_state=42),\n", + " param_grid=param_grid,\n", + " cv=5,\n", + " scoring=\"accuracy\",\n", + " n_jobs=-1\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
GridSearchCV(cv=5, estimator=GradientBoostingClassifier(random_state=42),\n",
+       "             n_jobs=-1,\n",
+       "             param_grid={'learning_rate': [0.05, 0.1], 'max_depth': [3, 4],\n",
+       "                         'n_estimators': [10, 20]},\n",
+       "             scoring='accuracy')
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + ], + "text/plain": [ + "GridSearchCV(cv=5, estimator=GradientBoostingClassifier(random_state=42),\n", + " n_jobs=-1,\n", + " param_grid={'learning_rate': [0.05, 0.1], 'max_depth': [3, 4],\n", + " 'n_estimators': [10, 20]},\n", + " scoring='accuracy')" + ] + }, + "execution_count": 34, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "grid_search.fit(X_train_scaled, y_train)" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Mejores parámetros:\n", + "{'learning_rate': 0.1, 'max_depth': 4, 'n_estimators': 20}\n" + ] + } + ], + "source": [ + "print(\"Mejores parámetros:\")\n", + "print(grid_search.best_params_)" + ] }, { "cell_type": "markdown", @@ -311,6 +2094,37 @@ "- Evaluate your model" ] }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Accuracy optimizado: 0.783661119515885\n", + " precision recall f1-score support\n", + "\n", + " False 0.80 0.76 0.78 656\n", + " True 0.77 0.81 0.79 666\n", + "\n", + " accuracy 0.78 1322\n", + " macro avg 0.78 0.78 0.78 1322\n", + "weighted avg 0.78 0.78 0.78 1322\n", + "\n" + ] + } + ], + "source": [ + "best_model = grid_search.best_estimator_\n", + "\n", + "y_pred_best = best_model.predict(X_test_scaled)\n", + "\n", + "print(\"Accuracy optimizado:\", accuracy_score(y_test, y_pred_best))\n", + "print(classification_report(y_test, y_pred_best))" + ] + }, { "cell_type": "code", "execution_count": null, @@ -335,7 +2149,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.9" + "version": "3.10.2" } }, "nbformat": 4,