diff --git a/lab-hyper-tuning.ipynb b/lab-hyper-tuning.ipynb index 847d487..29f3228 100644 --- a/lab-hyper-tuning.ipynb +++ b/lab-hyper-tuning.ipynb @@ -35,19 +35,22 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 47, "metadata": {}, "outputs": [], "source": [ "#Libraries\n", "import pandas as pd\n", "import numpy as np\n", - "from sklearn.model_selection import train_test_split" + "from sklearn.model_selection import train_test_split\n", + "from xgboost import XGBClassifier\n", + "from sklearn.model_selection import GridSearchCV, StratifiedKFold\n", + "from sklearn.metrics import roc_auc_score, accuracy_score, classification_report, f1_score, precision_score, recall_score" ] }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 32, "metadata": {}, "outputs": [ { @@ -200,7 +203,7 @@ "4 True " ] }, - "execution_count": 2, + "execution_count": 32, "metadata": {}, "output_type": "execute_result" } @@ -221,11 +224,210 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 33, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(6606, 14)" + ] + }, + "execution_count": 33, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = spaceship.dropna()\n", + "df.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
| \n", + " | PassengerId | \n", + "HomePlanet | \n", + "CryoSleep | \n", + "Cabin | \n", + "Destination | \n", + "Age | \n", + "VIP | \n", + "RoomService | \n", + "FoodCourt | \n", + "ShoppingMall | \n", + "Spa | \n", + "VRDeck | \n", + "Name | \n", + "Transported | \n", + "
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | \n", + "0001_01 | \n", + "Europa | \n", + "False | \n", + "B | \n", + "TRAPPIST-1e | \n", + "39.0 | \n", + "False | \n", + "0.0 | \n", + "0.0 | \n", + "0.0 | \n", + "0.0 | \n", + "0.0 | \n", + "Maham Ofracculy | \n", + "False | \n", + "
| 1 | \n", + "0002_01 | \n", + "Earth | \n", + "False | \n", + "F | \n", + "TRAPPIST-1e | \n", + "24.0 | \n", + "False | \n", + "109.0 | \n", + "9.0 | \n", + "25.0 | \n", + "549.0 | \n", + "44.0 | \n", + "Juanna Vines | \n", + "True | \n", + "
| 2 | \n", + "0003_01 | \n", + "Europa | \n", + "False | \n", + "A | \n", + "TRAPPIST-1e | \n", + "58.0 | \n", + "True | \n", + "43.0 | \n", + "3576.0 | \n", + "0.0 | \n", + "6715.0 | \n", + "49.0 | \n", + "Altark Susent | \n", + "False | \n", + "
| 3 | \n", + "0003_02 | \n", + "Europa | \n", + "False | \n", + "A | \n", + "TRAPPIST-1e | \n", + "33.0 | \n", + "False | \n", + "0.0 | \n", + "1283.0 | \n", + "371.0 | \n", + "3329.0 | \n", + "193.0 | \n", + "Solam Susent | \n", + "False | \n", + "
| 4 | \n", + "0004_01 | \n", + "Earth | \n", + "False | \n", + "F | \n", + "TRAPPIST-1e | \n", + "16.0 | \n", + "False | \n", + "303.0 | \n", + "70.0 | \n", + "151.0 | \n", + "565.0 | \n", + "2.0 | \n", + "Willy Santantines | \n", + "True | \n", + "
XGBClassifier(base_score=None, booster=None, callbacks=None,\n", + " colsample_bylevel=None, colsample_bynode=None,\n", + " colsample_bytree=None, device=None, early_stopping_rounds=None,\n", + " enable_categorical=False, eval_metric=None, feature_types=None,\n", + " feature_weights=None, gamma=None, grow_policy=None,\n", + " importance_type=None, interaction_constraints=None,\n", + " learning_rate=0.05, max_bin=None, max_cat_threshold=None,\n", + " max_cat_to_onehot=None, max_delta_step=None, max_depth=None,\n", + " max_leaves=None, min_child_weight=None, missing=nan,\n", + " monotone_constraints=None, multi_strategy=None, n_estimators=300,\n", + " n_jobs=None, num_parallel_tree=None, ...)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
XGBClassifier(base_score=None, booster=None, callbacks=None,\n", + " colsample_bylevel=None, colsample_bynode=None,\n", + " colsample_bytree=None, device=None, early_stopping_rounds=None,\n", + " enable_categorical=False, eval_metric=None, feature_types=None,\n", + " feature_weights=None, gamma=None, grow_policy=None,\n", + " importance_type=None, interaction_constraints=None,\n", + " learning_rate=0.05, max_bin=None, max_cat_threshold=None,\n", + " max_cat_to_onehot=None, max_delta_step=None, max_depth=None,\n", + " max_leaves=None, min_child_weight=None, missing=nan,\n", + " monotone_constraints=None, multi_strategy=None, n_estimators=300,\n", + " n_jobs=None, num_parallel_tree=None, ...)
XGBClassifier(base_score=None, booster=None, callbacks=None,\n", + " colsample_bylevel=None, colsample_bynode=None,\n", + " colsample_bytree=0.8, device=None, early_stopping_rounds=None,\n", + " enable_categorical=False, eval_metric='logloss',\n", + " feature_types=None, feature_weights=None, gamma=None,\n", + " grow_policy=None, importance_type=None,\n", + " interaction_constraints=None, learning_rate=0.05, max_bin=None,\n", + " max_cat_threshold=None, max_cat_to_onehot=None,\n", + " max_delta_step=None, max_depth=4, max_leaves=None,\n", + " min_child_weight=3, missing=nan, monotone_constraints=None,\n", + " multi_strategy=None, n_estimators=1000, n_jobs=None,\n", + " num_parallel_tree=None, ...)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
XGBClassifier(base_score=None, booster=None, callbacks=None,\n", + " colsample_bylevel=None, colsample_bynode=None,\n", + " colsample_bytree=0.8, device=None, early_stopping_rounds=None,\n", + " enable_categorical=False, eval_metric='logloss',\n", + " feature_types=None, feature_weights=None, gamma=None,\n", + " grow_policy=None, importance_type=None,\n", + " interaction_constraints=None, learning_rate=0.05, max_bin=None,\n", + " max_cat_threshold=None, max_cat_to_onehot=None,\n", + " max_delta_step=None, max_depth=4, max_leaves=None,\n", + " min_child_weight=3, missing=nan, monotone_constraints=None,\n", + " multi_strategy=None, n_estimators=1000, n_jobs=None,\n", + " num_parallel_tree=None, ...)
GridSearchCV(cv=StratifiedKFold(n_splits=5, random_state=42, shuffle=True),\n",
+ " estimator=XGBClassifier(base_score=None, booster=None,\n",
+ " callbacks=None, colsample_bylevel=None,\n",
+ " colsample_bynode=None,\n",
+ " colsample_bytree=0.8, device=None,\n",
+ " early_stopping_rounds=None,\n",
+ " enable_categorical=False,\n",
+ " eval_metric='logloss', feature_types=None,\n",
+ " feature_weights=None, gamma=None,\n",
+ " grow_p...\n",
+ " max_delta_step=None, max_depth=4,\n",
+ " max_leaves=None, min_child_weight=3,\n",
+ " missing=nan, monotone_constraints=None,\n",
+ " multi_strategy=None, n_estimators=1000,\n",
+ " n_jobs=None, num_parallel_tree=None, ...),\n",
+ " n_jobs=-1,\n",
+ " param_grid={'colsample_bytree': [0.5, 0.8], 'max_depth': [3, 4, 5],\n",
+ " 'min_child_weight': [1, 3, 5], 'reg_lambda': [1, 3, 5],\n",
+ " 'subsample': [0.7, 0.8, 0.9]},\n",
+ " scoring='roc_auc', verbose=2)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. GridSearchCV(cv=StratifiedKFold(n_splits=5, random_state=42, shuffle=True),\n",
+ " estimator=XGBClassifier(base_score=None, booster=None,\n",
+ " callbacks=None, colsample_bylevel=None,\n",
+ " colsample_bynode=None,\n",
+ " colsample_bytree=0.8, device=None,\n",
+ " early_stopping_rounds=None,\n",
+ " enable_categorical=False,\n",
+ " eval_metric='logloss', feature_types=None,\n",
+ " feature_weights=None, gamma=None,\n",
+ " grow_p...\n",
+ " max_delta_step=None, max_depth=4,\n",
+ " max_leaves=None, min_child_weight=3,\n",
+ " missing=nan, monotone_constraints=None,\n",
+ " multi_strategy=None, n_estimators=1000,\n",
+ " n_jobs=None, num_parallel_tree=None, ...),\n",
+ " n_jobs=-1,\n",
+ " param_grid={'colsample_bytree': [0.5, 0.8], 'max_depth': [3, 4, 5],\n",
+ " 'min_child_weight': [1, 3, 5], 'reg_lambda': [1, 3, 5],\n",
+ " 'subsample': [0.7, 0.8, 0.9]},\n",
+ " scoring='roc_auc', verbose=2)XGBClassifier(base_score=None, booster=None, callbacks=None,\n", + " colsample_bylevel=None, colsample_bynode=None,\n", + " colsample_bytree=0.8, device=None, early_stopping_rounds=None,\n", + " enable_categorical=False, eval_metric='logloss',\n", + " feature_types=None, feature_weights=None, gamma=None,\n", + " grow_policy=None, importance_type=None,\n", + " interaction_constraints=None, learning_rate=0.05, max_bin=None,\n", + " max_cat_threshold=None, max_cat_to_onehot=None,\n", + " max_delta_step=None, max_depth=3, max_leaves=None,\n", + " min_child_weight=1, missing=nan, monotone_constraints=None,\n", + " multi_strategy=None, n_estimators=1000, n_jobs=None,\n", + " num_parallel_tree=None, ...)
XGBClassifier(base_score=None, booster=None, callbacks=None,\n", + " colsample_bylevel=None, colsample_bynode=None,\n", + " colsample_bytree=0.8, device=None, early_stopping_rounds=None,\n", + " enable_categorical=False, eval_metric='logloss',\n", + " feature_types=None, feature_weights=None, gamma=None,\n", + " grow_policy=None, importance_type=None,\n", + " interaction_constraints=None, learning_rate=0.05, max_bin=None,\n", + " max_cat_threshold=None, max_cat_to_onehot=None,\n", + " max_delta_step=None, max_depth=3, max_leaves=None,\n", + " min_child_weight=1, missing=nan, monotone_constraints=None,\n", + " multi_strategy=None, n_estimators=1000, n_jobs=None,\n", + " num_parallel_tree=None, ...)