diff --git a/DM_FinalProject.ipynb b/DM_FinalProject.ipynb
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/DM_FinalProject.ipynb
@@ -0,0 +1 @@
+
diff --git a/preprocessing.ipynb b/preprocessing.ipynb
index 1dc4dac..6fdf93d 100644
--- a/preprocessing.ipynb
+++ b/preprocessing.ipynb
@@ -1,1058 +1 @@
-{
- "cells": [
- {
- "cell_type": "code",
- "execution_count": 2,
- "id": "e4e0cc2a",
- "metadata": {},
- "outputs": [],
- "source": [
- "import pandas as pd\n",
- "import numpy as np\n",
- "import matplotlib.pyplot as plt\n",
- "import os\n",
- "from sklearn.model_selection import train_test_split\n",
- "from sklearn.impute import SimpleImputer\n",
- "from sklearn.ensemble import RandomForestClassifier\n",
- "from sklearn.metrics import accuracy_score, confusion_matrix, classification_report\n",
- "from concurrent.futures import ThreadPoolExecutor\n",
- "from tqdm import tqdm"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 3,
- "id": "5b0aa12f",
- "metadata": {},
- "outputs": [],
- "source": [
- "train_df = pd.read_csv(\"child-mind-institute-problematic-internet-use/train.csv\")\n",
- "test_df = pd.read_csv(\"child-mind-institute-problematic-internet-use/test.csv\")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "eab58219",
- "metadata": {},
- "outputs": [],
- "source": [
- "conflict_rows = train_df[(train_df['PAQ_A-PAQ_A_Total'].notna()) & (train_df['PAQ_C-PAQ_C_Total'].notna())]\n",
- "\n",
- "# 判斷是否存在衝突行\n",
- "if not conflict_rows.empty:\n",
- " train_df = train_df.drop(conflict_rows.index)\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 5,
- "id": "e4d6f5f0",
- "metadata": {},
- "outputs": [],
- "source": [
- "# 將合併結果存回 column1\n",
- "train_df['PAQ_A-PAQ_A_Total'] = train_df['PAQ_A-PAQ_A_Total'].fillna(train_df['PAQ_C-PAQ_C_Total'])\n",
- "train_df['PAQ_A-Season'] = train_df['PAQ_A-Season'].fillna(train_df['PAQ_C-Season'])\n",
- "test_df['PAQ_A-PAQ_A_Total'] = test_df['PAQ_A-PAQ_A_Total'].fillna(test_df['PAQ_C-PAQ_C_Total'])\n",
- "test_df['PAQ_A-Season'] = test_df['PAQ_A-Season'].fillna(test_df['PAQ_C-Season'])\n",
- "\n",
- "# 刪除 column2\n",
- "train_df = train_df.drop(columns=['PAQ_C-PAQ_C_Total', 'PAQ_C-Season'])\n",
- "test_df = test_df.drop(columns=['PAQ_C-PAQ_C_Total', 'PAQ_C-Season'])\n",
- "\n",
- "train_df = train_df.rename(columns={'PAQ_A-Season': 'PAQ-Season'})\n",
- "train_df = train_df.rename(columns={'PAQ_A-PAQ_A_Total': 'PAQ-PAQ_Total'})\n",
- "test_df = test_df.rename(columns={'PAQ_A-Season': 'PAQ-Season'})\n",
- "test_df = test_df.rename(columns={'PAQ_A-PAQ_A_Total': 'PAQ-PAQ_Total'})\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 6,
- "id": "f178f013",
- "metadata": {},
- "outputs": [],
- "source": [
- "df = train_df.dropna(axis=1, thresh=len(train_df) - 3000)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 7,
- "id": "dd247f1d",
- "metadata": {},
- "outputs": [],
- "source": [
- "def process_file(filename, dirname):\n",
- " df = pd.read_parquet(os.path.join(dirname, filename, 'part-0.parquet'))\n",
- " df.drop('step', axis=1, inplace=True)\n",
- " return df.describe().values.reshape(-1), filename.split('=')[1]"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 8,
- "id": "28ab11be",
- "metadata": {},
- "outputs": [],
- "source": [
- "def load_time_series(dirname) -> pd.DataFrame:\n",
- " ids = os.listdir(dirname)\n",
- " \n",
- " with ThreadPoolExecutor() as executor:\n",
- " results = list(tqdm(executor.map(lambda fname: process_file(fname, dirname), ids), total=len(ids)))\n",
- " \n",
- " stats, indexes = zip(*results)\n",
- " \n",
- " df = pd.DataFrame(stats, columns=[f\"Stat_{i}\" for i in range(len(stats[0]))])\n",
- " df['id'] = indexes\n",
- " \n",
- " return df"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 9,
- "id": "7f168bce",
- "metadata": {},
- "outputs": [],
- "source": [
- "# 把SII是空的column刪除\n",
- "train_df = train_df.dropna(subset=['sii'])"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 10,
- "id": "484016f4",
- "metadata": {},
- "outputs": [],
- "source": [
- "# PCIAT 有些欄位是空的,會影響最後SII結果,把若填滿PCIAT有可能改變SII的column刪除\n",
- "PCIAT_cols = [f'PCIAT-PCIAT_{i+1:02d}' for i in range(20)]\n",
- "def IncorrectRows(row):\n",
- " if pd.isna(row['PCIAT-PCIAT_Total']):\n",
- " return np.nan\n",
- " max_possible = row['PCIAT-PCIAT_Total'] + row[PCIAT_cols].isna().sum() * 5\n",
- " if row['PCIAT-PCIAT_Total'] <= 30 and max_possible <= 30:\n",
- " return 0\n",
- " elif 31 <= row['PCIAT-PCIAT_Total'] <= 49 and max_possible <= 49:\n",
- " return 1\n",
- " elif 50 <= row['PCIAT-PCIAT_Total'] <= 79 and max_possible <= 79:\n",
- " return 2\n",
- " elif row['PCIAT-PCIAT_Total'] >= 80 and max_possible >= 80:\n",
- " return 3\n",
- " return np.nan\n",
- "\n",
- "train_df['recal_sii'] = train_df.apply(IncorrectRows, axis=1)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 11,
- "id": "a0858571",
- "metadata": {},
- "outputs": [],
- "source": [
- "mismatch_rows = train_df[\n",
- " (train_df['recal_sii'] != train_df['sii']) & train_df['sii'].notna()\n",
- "]\n",
- "mismatch_indexes = mismatch_rows.index\n",
- "train_df = train_df.drop(mismatch_indexes)\n",
- "train_df = train_df.drop(['recal_sii'], axis=1)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 12,
- "id": "bc52dc24",
- "metadata": {},
- "outputs": [],
- "source": [
- "# 把有關Season的column做mapping \n",
- "SEASON_COLS = [\n",
- " \"Basic_Demos-Enroll_Season\", \n",
- " \"CGAS-Season\", \n",
- " \"Physical-Season\", \n",
- " \"Fitness_Endurance-Season\", \n",
- " \"FGC-Season\", \n",
- " \"BIA-Season\", \n",
- " \"PAQ-Season\", \n",
- " \"SDS-Season\",\n",
- " \"PreInt_EduHx-Season\", \n",
- " ]\n",
- "def update(df):\n",
- " for c in SEASON_COLS: \n",
- " df[c] = df[c].fillna('Missing')\n",
- " df[c] = df[c].astype('category')\n",
- " return df\n",
- "train_df = update(train_df)\n",
- "test_df = update(test_df)\n",
- "season_mapping = {'Spring': 0, 'Summer': 1, 'Fall': 2, 'Winter': 3, 'Missing': 4}\n",
- "for col in SEASON_COLS:\n",
- " train_df[col] = train_df[col].map(season_mapping)\n",
- " test_df[col] = test_df[col].map(season_mapping)\n",
- "train_df['PCIAT-Season'] = train_df['PCIAT-Season'].map(season_mapping)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 13,
- "id": "05c95e2d",
- "metadata": {},
- "outputs": [],
- "source": [
- "# 做Imputer\n",
- "train_id = train_df['id']\n",
- "test_id = test_df['id']\n",
- "train_features = train_df.drop(columns=['id'])\n",
- "test_features = test_df.drop(columns=['id'])\n",
- "\n",
- "imputer = SimpleImputer(strategy='median')\n",
- "train_features_imputed = pd.DataFrame(imputer.fit_transform(train_features), columns=train_features.columns, index=train_features.index)\n",
- "test_features_imputed = pd.DataFrame(imputer.fit_transform(test_features), columns=test_features.columns, index=test_features.index)\n",
- "\n",
- "train_df = pd.concat([train_id, train_features_imputed], axis=1)\n",
- "test_df = pd.concat([test_id, test_features_imputed], axis=1)\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "f4ab2aab",
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Basic_Demos-Sex -0.094312\n",
- "Physical-Diastolic_BP 0.066374\n",
- "Physical-HeartRate -0.035771\n",
- "PAQ-PAQ_Total -0.042217\n",
- "Fitness_Endurance-Max_Stage -0.020330\n",
- "Fitness_Endurance-Time_Mins -0.038346\n",
- "Fitness_Endurance-Time_Sec 0.001800\n",
- "FGC-FGC_SRL -0.073663\n",
- "FGC-FGC_SRR -0.064219\n",
- "BIA-BIA_Activity_Level_num 0.075633\n",
- "BIA-BIA_BMC -0.007859\n",
- "BIA-BIA_BMR 0.028779\n",
- "BIA-BIA_DEE 0.041886\n",
- "BIA-BIA_ECW 0.027491\n",
- "BIA-BIA_FFM 0.028779\n",
- "BIA-BIA_FFMI 0.085982\n",
- "BIA-BIA_FMI 0.066753\n",
- "BIA-BIA_Fat 0.031164\n",
- "BIA-BIA_ICW 0.041286\n",
- "BIA-BIA_LDM 0.019975\n",
- "BIA-BIA_LST 0.059496\n",
- "BIA-BIA_SMM 0.041344\n",
- "BIA-BIA_TBW 0.033559\n",
- "Name: PCIAT-PCIAT_Total, dtype: float64\n"
- ]
- }
- ],
- "source": [
- "# 尋找和PCIAT_Total相關性低的column並刪除 \n",
- "train_cor = train_df.drop('id', axis=1)\n",
- "test_cor = test_df.drop('id', axis=1)\n",
- "corr_matrix = train_cor[['PCIAT-PCIAT_Total', 'Basic_Demos-Age', 'Basic_Demos-Sex', 'Physical-BMI', \n",
- " 'Physical-Height', 'Physical-Weight', 'Physical-Waist_Circumference',\n",
- " 'Physical-Diastolic_BP', 'Physical-Systolic_BP', 'Physical-HeartRate',\n",
- " 'PreInt_EduHx-computerinternet_hoursday', 'SDS-SDS_Total_T',\n",
- " 'PAQ-PAQ_Total', 'Fitness_Endurance-Max_Stage', 'Fitness_Endurance-Time_Mins', \n",
- " 'Fitness_Endurance-Time_Sec', 'FGC-FGC_CU', 'FGC-FGC_GSND', 'FGC-FGC_GSD', \n",
- " 'FGC-FGC_PU', 'FGC-FGC_SRL', 'FGC-FGC_SRR', 'FGC-FGC_TL', 'BIA-BIA_Activity_Level_num', \n",
- " 'BIA-BIA_BMC', 'BIA-BIA_BMI', 'BIA-BIA_BMR', 'BIA-BIA_DEE', 'BIA-BIA_ECW', 'BIA-BIA_FFM',\n",
- " 'BIA-BIA_FFMI', 'BIA-BIA_FMI', 'BIA-BIA_Fat', 'BIA-BIA_Frame_num', 'BIA-BIA_ICW', \n",
- " 'BIA-BIA_LDM', 'BIA-BIA_LST', 'BIA-BIA_SMM', 'BIA-BIA_TBW']].corr()\n",
- "sii_corr = corr_matrix['PCIAT-PCIAT_Total'].drop('PCIAT-PCIAT_Total')\n",
- "filtered_corr = sii_corr[(sii_corr > 0.1) | (sii_corr < -0.1)]\n",
- "other_corr = sii_corr[(sii_corr <= 0.1) & (sii_corr >= -0.1)]\n",
- "other_corr_columns = other_corr.index.tolist()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 15,
- "id": "68ee6785",
- "metadata": {},
- "outputs": [],
- "source": [
- "train_df = train_df.drop(columns=other_corr_columns)\n",
- "test_df = test_df.drop(columns=other_corr_columns)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 16,
- "id": "b652c300",
- "metadata": {},
- "outputs": [
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- " 0%| | 0/996 [00:00, ?it/s]"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "100%|██████████| 996/996 [01:21<00:00, 12.22it/s]\n",
- "100%|██████████| 2/2 [00:00<00:00, 11.38it/s]\n"
- ]
- }
- ],
- "source": [
- "# 把parquet data加進去 \n",
- "train_ts = load_time_series(\"child-mind-institute-problematic-internet-use/series_train.parquet\")\n",
- "test_ts = load_time_series(\"child-mind-institute-problematic-internet-use/series_test.parquet\")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 17,
- "id": "c8676ea4",
- "metadata": {},
- "outputs": [],
- "source": [
- "TARGET_COLS = [\n",
- " \"PCIAT-Season\",\n",
- " \"PCIAT-PCIAT_01\",\n",
- " \"PCIAT-PCIAT_02\",\n",
- " \"PCIAT-PCIAT_03\",\n",
- " \"PCIAT-PCIAT_04\",\n",
- " \"PCIAT-PCIAT_05\",\n",
- " \"PCIAT-PCIAT_06\",\n",
- " \"PCIAT-PCIAT_07\",\n",
- " \"PCIAT-PCIAT_08\",\n",
- " \"PCIAT-PCIAT_09\",\n",
- " \"PCIAT-PCIAT_10\",\n",
- " \"PCIAT-PCIAT_11\",\n",
- " \"PCIAT-PCIAT_12\",\n",
- " \"PCIAT-PCIAT_13\",\n",
- " \"PCIAT-PCIAT_14\",\n",
- " \"PCIAT-PCIAT_15\",\n",
- " \"PCIAT-PCIAT_16\", \n",
- " \"PCIAT-PCIAT_17\",\n",
- " \"PCIAT-PCIAT_18\",\n",
- " \"PCIAT-PCIAT_19\",\n",
- " \"PCIAT-PCIAT_20\",\n",
- " \"PCIAT-PCIAT_Total\"\n",
- "]\n",
- "train_df = train_df.drop(TARGET_COLS,axis=1)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 18,
- "id": "71707ed9",
- "metadata": {},
- "outputs": [],
- "source": [
- "train_df = pd.merge(train_df, train_ts, how=\"left\", on='id')\n",
- "test_df = pd.merge(test_df, test_ts, how=\"left\", on='id')"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 19,
- "id": "e2c466b0",
- "metadata": {},
- "outputs": [],
- "source": [
- "train_df = train_df.drop('id', axis=1)\n",
- "test_df = test_df.drop('id', axis=1)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 20,
- "id": "a6aa61e8",
- "metadata": {},
- "outputs": [],
- "source": [
- "train_df = train_df.fillna(0)\n",
- "test_df = test_df.fillna(0)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "709996b3",
- "metadata": {},
- "outputs": [],
- "source": [
- "from imblearn.over_sampling import SMOTE\n",
- "X_train = train_df.drop(columns=['sii']) # 假設 'sii' 是目標欄位\n",
- "y_train = train_df['sii']\n",
- "\n",
- "# 使用 SMOTE 進行過採樣\n",
- "smote = SMOTE(random_state=42)\n",
- "X_train_resampled, y_train_resampled = smote.fit_resample(X_train, y_train)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 26,
- "id": "48d5c74d",
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "
\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " Basic_Demos-Enroll_Season | \n",
- " Basic_Demos-Age | \n",
- " CGAS-Season | \n",
- " CGAS-CGAS_Score | \n",
- " Physical-Season | \n",
- " Physical-BMI | \n",
- " Physical-Height | \n",
- " Physical-Weight | \n",
- " Physical-Waist_Circumference | \n",
- " Physical-Systolic_BP | \n",
- " ... | \n",
- " Stat_86 | \n",
- " Stat_87 | \n",
- " Stat_88 | \n",
- " Stat_89 | \n",
- " Stat_90 | \n",
- " Stat_91 | \n",
- " Stat_92 | \n",
- " Stat_93 | \n",
- " Stat_94 | \n",
- " Stat_95 | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " | 0 | \n",
- " 2.000000 | \n",
- " 5.000000 | \n",
- " 3.000000 | \n",
- " 51.000000 | \n",
- " 2.000000 | \n",
- " 16.877316 | \n",
- " 46.000000 | \n",
- " 50.800000 | \n",
- " 26.000000 | \n",
- " 114.000000 | \n",
- " ... | \n",
- " 0.000000 | \n",
- " 0.000000 | \n",
- " 0.000000 | \n",
- " 0.0 | \n",
- " 0.000000 | \n",
- " 0.000000 | \n",
- " 0.000000e+00 | \n",
- " 0.0 | \n",
- " 0.0 | \n",
- " 0.000000 | \n",
- "
\n",
- " \n",
- " | 1 | \n",
- " 1.000000 | \n",
- " 9.000000 | \n",
- " 4.000000 | \n",
- " 65.000000 | \n",
- " 2.000000 | \n",
- " 14.035590 | \n",
- " 48.000000 | \n",
- " 46.000000 | \n",
- " 22.000000 | \n",
- " 122.000000 | \n",
- " ... | \n",
- " 0.000000 | \n",
- " 0.000000 | \n",
- " 0.000000 | \n",
- " 0.0 | \n",
- " 0.000000 | \n",
- " 0.000000 | \n",
- " 0.000000e+00 | \n",
- " 0.0 | \n",
- " 0.0 | \n",
- " 0.000000 | \n",
- "
\n",
- " \n",
- " | 2 | \n",
- " 1.000000 | \n",
- " 10.000000 | \n",
- " 2.000000 | \n",
- " 71.000000 | \n",
- " 2.000000 | \n",
- " 16.648696 | \n",
- " 56.500000 | \n",
- " 75.600000 | \n",
- " 26.000000 | \n",
- " 117.000000 | \n",
- " ... | \n",
- " 0.000000 | \n",
- " 0.000000 | \n",
- " 0.000000 | \n",
- " 0.0 | \n",
- " 0.000000 | \n",
- " 0.000000 | \n",
- " 0.000000e+00 | \n",
- " 0.0 | \n",
- " 0.0 | \n",
- " 0.000000 | \n",
- "
\n",
- " \n",
- " | 3 | \n",
- " 3.000000 | \n",
- " 9.000000 | \n",
- " 2.000000 | \n",
- " 71.000000 | \n",
- " 1.000000 | \n",
- " 18.292347 | \n",
- " 56.000000 | \n",
- " 81.600000 | \n",
- " 26.000000 | \n",
- " 117.000000 | \n",
- " ... | \n",
- " 1.546979 | \n",
- " 4.004276 | \n",
- " 89.751656 | \n",
- " 0.0 | \n",
- " 2633.250000 | \n",
- " 4188.500000 | \n",
- " 8.611000e+13 | \n",
- " 7.0 | \n",
- " 3.0 | \n",
- " 85.000000 | \n",
- "
\n",
- " \n",
- " | 4 | \n",
- " 0.000000 | \n",
- " 13.000000 | \n",
- " 3.000000 | \n",
- " 50.000000 | \n",
- " 1.000000 | \n",
- " 22.279952 | \n",
- " 59.500000 | \n",
- " 112.200000 | \n",
- " 26.000000 | \n",
- " 102.000000 | \n",
- " ... | \n",
- " 1.146284 | \n",
- " 2.952888 | \n",
- " 89.476036 | \n",
- " 1.0 | \n",
- " 2597.800049 | \n",
- " 4175.000000 | \n",
- " 8.639500e+13 | \n",
- " 7.0 | \n",
- " 3.0 | \n",
- " 91.000000 | \n",
- "
\n",
- " \n",
- " | ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- "
\n",
- " \n",
- " | 6339 | \n",
- " 1.299801 | \n",
- " 12.299801 | \n",
- " 0.899402 | \n",
- " 77.001995 | \n",
- " 2.299801 | \n",
- " 27.994593 | \n",
- " 65.599601 | \n",
- " 171.102593 | \n",
- " 26.000000 | \n",
- " 130.303790 | \n",
- " ... | \n",
- " 1.030342 | \n",
- " 1.382063 | \n",
- " 88.721179 | \n",
- " 1.0 | \n",
- " 2209.489687 | \n",
- " 4190.601596 | \n",
- " 8.639500e+13 | \n",
- " 7.0 | \n",
- " 1.0 | \n",
- " 70.213165 | \n",
- "
\n",
- " \n",
- " | 6340 | \n",
- " 0.353541 | \n",
- " 14.292917 | \n",
- " 1.000000 | \n",
- " 45.000000 | \n",
- " 1.707083 | \n",
- " 28.676977 | \n",
- " 66.918187 | \n",
- " 182.725328 | \n",
- " 26.000000 | \n",
- " 144.495192 | \n",
- " ... | \n",
- " 0.000000 | \n",
- " 0.000000 | \n",
- " 0.000000 | \n",
- " 0.0 | \n",
- " 0.000000 | \n",
- " 0.000000 | \n",
- " 0.000000e+00 | \n",
- " 0.0 | \n",
- " 0.0 | \n",
- " 0.000000 | \n",
- "
\n",
- " \n",
- " | 6341 | \n",
- " 0.974194 | \n",
- " 14.000000 | \n",
- " 3.948389 | \n",
- " 65.077417 | \n",
- " 1.077417 | \n",
- " 20.305357 | \n",
- " 61.332264 | \n",
- " 108.727772 | \n",
- " 26.000000 | \n",
- " 127.638722 | \n",
- " ... | \n",
- " 0.000000 | \n",
- " 0.000000 | \n",
- " 0.000000 | \n",
- " 0.0 | \n",
- " 0.000000 | \n",
- " 0.000000 | \n",
- " 0.000000e+00 | \n",
- " 0.0 | \n",
- " 0.0 | \n",
- " 0.000000 | \n",
- "
\n",
- " \n",
- " | 6342 | \n",
- " 1.847061 | \n",
- " 17.000000 | \n",
- " 3.000000 | \n",
- " 53.058783 | \n",
- " 2.000000 | \n",
- " 22.445850 | \n",
- " 66.464713 | \n",
- " 141.155331 | \n",
- " 26.000000 | \n",
- " 128.847061 | \n",
- " ... | \n",
- " 0.000000 | \n",
- " 0.000000 | \n",
- " 0.000000 | \n",
- " 0.0 | \n",
- " 0.000000 | \n",
- " 0.000000 | \n",
- " 0.000000e+00 | \n",
- " 0.0 | \n",
- " 0.0 | \n",
- " 0.000000 | \n",
- "
\n",
- " \n",
- " | 6343 | \n",
- " 2.023595 | \n",
- " 15.905620 | \n",
- " 3.905620 | \n",
- " 64.764050 | \n",
- " 1.952810 | \n",
- " 45.737848 | \n",
- " 67.558987 | \n",
- " 296.787350 | \n",
- " 48.457316 | \n",
- " 156.150581 | \n",
- " ... | \n",
- " 0.000000 | \n",
- " 0.000000 | \n",
- " 0.000000 | \n",
- " 0.0 | \n",
- " 0.000000 | \n",
- " 0.000000 | \n",
- " 0.000000e+00 | \n",
- " 0.0 | \n",
- " 0.0 | \n",
- " 0.000000 | \n",
- "
\n",
- " \n",
- "
\n",
- "
6344 rows × 129 columns
\n",
- "
"
- ],
- "text/plain": [
- " Basic_Demos-Enroll_Season Basic_Demos-Age CGAS-Season \\\n",
- "0 2.000000 5.000000 3.000000 \n",
- "1 1.000000 9.000000 4.000000 \n",
- "2 1.000000 10.000000 2.000000 \n",
- "3 3.000000 9.000000 2.000000 \n",
- "4 0.000000 13.000000 3.000000 \n",
- "... ... ... ... \n",
- "6339 1.299801 12.299801 0.899402 \n",
- "6340 0.353541 14.292917 1.000000 \n",
- "6341 0.974194 14.000000 3.948389 \n",
- "6342 1.847061 17.000000 3.000000 \n",
- "6343 2.023595 15.905620 3.905620 \n",
- "\n",
- " CGAS-CGAS_Score Physical-Season Physical-BMI Physical-Height \\\n",
- "0 51.000000 2.000000 16.877316 46.000000 \n",
- "1 65.000000 2.000000 14.035590 48.000000 \n",
- "2 71.000000 2.000000 16.648696 56.500000 \n",
- "3 71.000000 1.000000 18.292347 56.000000 \n",
- "4 50.000000 1.000000 22.279952 59.500000 \n",
- "... ... ... ... ... \n",
- "6339 77.001995 2.299801 27.994593 65.599601 \n",
- "6340 45.000000 1.707083 28.676977 66.918187 \n",
- "6341 65.077417 1.077417 20.305357 61.332264 \n",
- "6342 53.058783 2.000000 22.445850 66.464713 \n",
- "6343 64.764050 1.952810 45.737848 67.558987 \n",
- "\n",
- " Physical-Weight Physical-Waist_Circumference Physical-Systolic_BP \\\n",
- "0 50.800000 26.000000 114.000000 \n",
- "1 46.000000 22.000000 122.000000 \n",
- "2 75.600000 26.000000 117.000000 \n",
- "3 81.600000 26.000000 117.000000 \n",
- "4 112.200000 26.000000 102.000000 \n",
- "... ... ... ... \n",
- "6339 171.102593 26.000000 130.303790 \n",
- "6340 182.725328 26.000000 144.495192 \n",
- "6341 108.727772 26.000000 127.638722 \n",
- "6342 141.155331 26.000000 128.847061 \n",
- "6343 296.787350 48.457316 156.150581 \n",
- "\n",
- " ... Stat_86 Stat_87 Stat_88 Stat_89 Stat_90 Stat_91 \\\n",
- "0 ... 0.000000 0.000000 0.000000 0.0 0.000000 0.000000 \n",
- "1 ... 0.000000 0.000000 0.000000 0.0 0.000000 0.000000 \n",
- "2 ... 0.000000 0.000000 0.000000 0.0 0.000000 0.000000 \n",
- "3 ... 1.546979 4.004276 89.751656 0.0 2633.250000 4188.500000 \n",
- "4 ... 1.146284 2.952888 89.476036 1.0 2597.800049 4175.000000 \n",
- "... ... ... ... ... ... ... ... \n",
- "6339 ... 1.030342 1.382063 88.721179 1.0 2209.489687 4190.601596 \n",
- "6340 ... 0.000000 0.000000 0.000000 0.0 0.000000 0.000000 \n",
- "6341 ... 0.000000 0.000000 0.000000 0.0 0.000000 0.000000 \n",
- "6342 ... 0.000000 0.000000 0.000000 0.0 0.000000 0.000000 \n",
- "6343 ... 0.000000 0.000000 0.000000 0.0 0.000000 0.000000 \n",
- "\n",
- " Stat_92 Stat_93 Stat_94 Stat_95 \n",
- "0 0.000000e+00 0.0 0.0 0.000000 \n",
- "1 0.000000e+00 0.0 0.0 0.000000 \n",
- "2 0.000000e+00 0.0 0.0 0.000000 \n",
- "3 8.611000e+13 7.0 3.0 85.000000 \n",
- "4 8.639500e+13 7.0 3.0 91.000000 \n",
- "... ... ... ... ... \n",
- "6339 8.639500e+13 7.0 1.0 70.213165 \n",
- "6340 0.000000e+00 0.0 0.0 0.000000 \n",
- "6341 0.000000e+00 0.0 0.0 0.000000 \n",
- "6342 0.000000e+00 0.0 0.0 0.000000 \n",
- "6343 0.000000e+00 0.0 0.0 0.000000 \n",
- "\n",
- "[6344 rows x 129 columns]"
- ]
- },
- "execution_count": 26,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "X_train_resampled"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 27,
- "id": "7a006722",
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "0 2.0\n",
- "1 0.0\n",
- "2 0.0\n",
- "3 1.0\n",
- "4 1.0\n",
- " ... \n",
- "6339 3.0\n",
- "6340 3.0\n",
- "6341 3.0\n",
- "6342 3.0\n",
- "6343 3.0\n",
- "Name: sii, Length: 6344, dtype: float64"
- ]
- },
- "execution_count": 27,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "y_train_resampled"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 28,
- "id": "ac081d5c",
- "metadata": {},
- "outputs": [],
- "source": [
- "X_train = X_train_resampled\n",
- "y_train = y_train_resampled\n",
- "X_test = test_df\n",
- "\n",
- "model = RandomForestClassifier(random_state=0)\n",
- "model.fit(X_train, y_train)\n",
- "\n",
- "test_df['sii'] = model.predict(X_test)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 29,
- "id": "f77e9cff",
- "metadata": {},
- "outputs": [],
- "source": [
- "submit_df = pd.concat([test_id, test_df['sii']], axis=1)\n",
- "submit_df['sii'] = submit_df['sii'].astype(int)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 33,
- "id": "fb35c244",
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " id | \n",
- " sii | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " | 0 | \n",
- " 00008ff9 | \n",
- " 2 | \n",
- "
\n",
- " \n",
- " | 1 | \n",
- " 000fd460 | \n",
- " 0 | \n",
- "
\n",
- " \n",
- " | 2 | \n",
- " 00105258 | \n",
- " 0 | \n",
- "
\n",
- " \n",
- " | 3 | \n",
- " 00115b9f | \n",
- " 1 | \n",
- "
\n",
- " \n",
- " | 4 | \n",
- " 0016bb22 | \n",
- " 2 | \n",
- "
\n",
- " \n",
- " | 5 | \n",
- " 001f3379 | \n",
- " 1 | \n",
- "
\n",
- " \n",
- " | 6 | \n",
- " 0038ba98 | \n",
- " 0 | \n",
- "
\n",
- " \n",
- " | 7 | \n",
- " 0068a485 | \n",
- " 0 | \n",
- "
\n",
- " \n",
- " | 8 | \n",
- " 0069fbed | \n",
- " 0 | \n",
- "
\n",
- " \n",
- " | 9 | \n",
- " 0083e397 | \n",
- " 0 | \n",
- "
\n",
- " \n",
- " | 10 | \n",
- " 0087dd65 | \n",
- " 0 | \n",
- "
\n",
- " \n",
- " | 11 | \n",
- " 00abe655 | \n",
- " 0 | \n",
- "
\n",
- " \n",
- " | 12 | \n",
- " 00ae59c9 | \n",
- " 1 | \n",
- "
\n",
- " \n",
- " | 13 | \n",
- " 00af6387 | \n",
- " 0 | \n",
- "
\n",
- " \n",
- " | 14 | \n",
- " 00bd4359 | \n",
- " 0 | \n",
- "
\n",
- " \n",
- " | 15 | \n",
- " 00c0cd71 | \n",
- " 2 | \n",
- "
\n",
- " \n",
- " | 16 | \n",
- " 00d56d4b | \n",
- " 0 | \n",
- "
\n",
- " \n",
- " | 17 | \n",
- " 00d9913d | \n",
- " 0 | \n",
- "
\n",
- " \n",
- " | 18 | \n",
- " 00e6167c | \n",
- " 0 | \n",
- "
\n",
- " \n",
- " | 19 | \n",
- " 00ebc35d | \n",
- " 0 | \n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " id sii\n",
- "0 00008ff9 2\n",
- "1 000fd460 0\n",
- "2 00105258 0\n",
- "3 00115b9f 1\n",
- "4 0016bb22 2\n",
- "5 001f3379 1\n",
- "6 0038ba98 0\n",
- "7 0068a485 0\n",
- "8 0069fbed 0\n",
- "9 0083e397 0\n",
- "10 0087dd65 0\n",
- "11 00abe655 0\n",
- "12 00ae59c9 1\n",
- "13 00af6387 0\n",
- "14 00bd4359 0\n",
- "15 00c0cd71 2\n",
- "16 00d56d4b 0\n",
- "17 00d9913d 0\n",
- "18 00e6167c 0\n",
- "19 00ebc35d 0"
- ]
- },
- "execution_count": 33,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "submit_df"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 34,
- "id": "57edb940",
- "metadata": {},
- "outputs": [],
- "source": [
- "submit_df.to_csv('submission.csv', index=False)"
- ]
- }
- ],
- "metadata": {
- "kernelspec": {
- "display_name": "base",
- "language": "python",
- "name": "python3"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.12.4"
- }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}
+{"metadata":{"kernelspec":{"language":"python","display_name":"Python 3","name":"python3"},"language_info":{"name":"python","version":"3.10.14","mimetype":"text/x-python","codemirror_mode":{"name":"ipython","version":3},"pygments_lexer":"ipython3","nbconvert_exporter":"python","file_extension":".py"},"kaggle":{"accelerator":"nvidiaTeslaT4","dataSources":[{"sourceId":81933,"databundleVersionId":9643020,"sourceType":"competition"},{"sourceId":7453542,"sourceType":"datasetVersion","datasetId":921302}],"dockerImageVersionId":30786,"isInternetEnabled":false,"language":"python","sourceType":"notebook","isGpuEnabled":true}},"nbformat_minor":4,"nbformat":4,"cells":[{"cell_type":"code","source":"!pip -q install /kaggle/input/pytorchtabnet/pytorch_tabnet-4.1.0-py3-none-any.whl","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2024-11-13T05:46:49.144324Z","iopub.execute_input":"2024-11-13T05:46:49.144713Z","iopub.status.idle":"2024-11-13T05:47:21.520893Z","shell.execute_reply.started":"2024-11-13T05:46:49.144668Z","shell.execute_reply":"2024-11-13T05:47:21.519659Z"}},"outputs":[],"execution_count":1},{"cell_type":"code","source":"import pandas as pd\nimport numpy as np\nimport matplotlib.pyplot as plt\nimport os\nfrom sklearn.neural_network import MLPClassifier\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.impute import SimpleImputer, KNNImputer\nfrom sklearn.ensemble import RandomForestClassifier\nfrom sklearn.metrics import accuracy_score, confusion_matrix, classification_report\nfrom concurrent.futures import ThreadPoolExecutor\nfrom tqdm import tqdm\nfrom pytorch_tabnet.tab_model import TabNetClassifier, TabNetRegressor\nfrom xgboost import XGBRegressor\nfrom pytorch_tabnet.callbacks import Callback\nimport torch\nimport torch.nn as nn\nimport torch.optim as optim\nfrom sklearn.preprocessing import StandardScaler","metadata":{"_uuid":"8f2839f25d086af736a60e9eeb907d3b93b6e0e5","_cell_guid":"b1076dfc-b9ad-4769-8c92-a6c4dae69d19","trusted":true,"execution":{"iopub.status.busy":"2024-11-13T05:47:21.523344Z","iopub.execute_input":"2024-11-13T05:47:21.523650Z","iopub.status.idle":"2024-11-13T05:47:26.360140Z","shell.execute_reply.started":"2024-11-13T05:47:21.523617Z","shell.execute_reply":"2024-11-13T05:47:26.359348Z"}},"outputs":[],"execution_count":2},{"cell_type":"code","source":"train_df = pd.read_csv('/kaggle/input/child-mind-institute-problematic-internet-use/train.csv')\ntest_df = pd.read_csv('/kaggle/input/child-mind-institute-problematic-internet-use/test.csv')","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2024-11-13T05:47:26.361302Z","iopub.execute_input":"2024-11-13T05:47:26.361700Z","iopub.status.idle":"2024-11-13T05:47:26.436436Z","shell.execute_reply.started":"2024-11-13T05:47:26.361667Z","shell.execute_reply":"2024-11-13T05:47:26.435657Z"}},"outputs":[],"execution_count":3},{"cell_type":"code","source":"conflict_rows = train_df[(train_df['PAQ_A-PAQ_A_Total'].notna()) & (train_df['PAQ_C-PAQ_C_Total'].notna())]\n\n# 判斷是否存在衝突行\nif not conflict_rows.empty:\n train_df = train_df.drop(conflict_rows.index)","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2024-11-13T05:47:26.438470Z","iopub.execute_input":"2024-11-13T05:47:26.438816Z","iopub.status.idle":"2024-11-13T05:47:26.456364Z","shell.execute_reply.started":"2024-11-13T05:47:26.438782Z","shell.execute_reply":"2024-11-13T05:47:26.455598Z"}},"outputs":[],"execution_count":4},{"cell_type":"code","source":"# 將合併結果存回 column1\ntrain_df['PAQ_A-PAQ_A_Total'] = train_df['PAQ_A-PAQ_A_Total'].fillna(train_df['PAQ_C-PAQ_C_Total'])\ntrain_df['PAQ_A-Season'] = train_df['PAQ_A-Season'].fillna(train_df['PAQ_C-Season'])\ntest_df['PAQ_A-PAQ_A_Total'] = test_df['PAQ_A-PAQ_A_Total'].fillna(test_df['PAQ_C-PAQ_C_Total'])\ntest_df['PAQ_A-Season'] = test_df['PAQ_A-Season'].fillna(test_df['PAQ_C-Season'])\n\n# 刪除 column2\ntrain_df = train_df.drop(columns=['PAQ_C-PAQ_C_Total', 'PAQ_C-Season'])\ntest_df = test_df.drop(columns=['PAQ_C-PAQ_C_Total', 'PAQ_C-Season'])\n\ntrain_df = train_df.rename(columns={'PAQ_A-Season': 'PAQ-Season'})\ntrain_df = train_df.rename(columns={'PAQ_A-PAQ_A_Total': 'PAQ-PAQ_Total'})\ntest_df = test_df.rename(columns={'PAQ_A-Season': 'PAQ-Season'})\ntest_df = test_df.rename(columns={'PAQ_A-PAQ_A_Total': 'PAQ-PAQ_Total'})","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2024-11-13T05:47:26.457373Z","iopub.execute_input":"2024-11-13T05:47:26.457643Z","iopub.status.idle":"2024-11-13T05:47:26.586129Z","shell.execute_reply.started":"2024-11-13T05:47:26.457614Z","shell.execute_reply":"2024-11-13T05:47:26.585245Z"}},"outputs":[],"execution_count":5},{"cell_type":"code","source":"df = train_df.dropna(axis=1, thresh=len(train_df) - 3000)","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2024-11-13T05:47:26.587254Z","iopub.execute_input":"2024-11-13T05:47:26.587547Z","iopub.status.idle":"2024-11-13T05:47:26.599714Z","shell.execute_reply.started":"2024-11-13T05:47:26.587515Z","shell.execute_reply":"2024-11-13T05:47:26.598996Z"}},"outputs":[],"execution_count":6},{"cell_type":"code","source":"def process_file(filename, dirname):\n df = pd.read_parquet(os.path.join(dirname, filename, 'part-0.parquet'))\n df.drop('step', axis=1, inplace=True)\n return df.describe().values.reshape(-1), filename.split('=')[1]","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2024-11-13T05:47:26.600700Z","iopub.execute_input":"2024-11-13T05:47:26.600978Z","iopub.status.idle":"2024-11-13T05:47:26.606093Z","shell.execute_reply.started":"2024-11-13T05:47:26.600948Z","shell.execute_reply":"2024-11-13T05:47:26.605265Z"}},"outputs":[],"execution_count":7},{"cell_type":"code","source":"def load_time_series(dirname) -> pd.DataFrame:\n ids = os.listdir(dirname)\n \n with ThreadPoolExecutor() as executor:\n results = list(tqdm(executor.map(lambda fname: process_file(fname, dirname), ids), total=len(ids)))\n \n stats, indexes = zip(*results)\n \n df = pd.DataFrame(stats, columns=[f\"Stat_{i}\" for i in range(len(stats[0]))])\n df['id'] = indexes\n \n return df\n\nclass AutoEncoder(nn.Module):\n def __init__(self, input_dim, encoding_dim):\n super(AutoEncoder, self).__init__()\n self.encoder = nn.Sequential(\n nn.Linear(input_dim, encoding_dim*3),\n nn.ReLU(),\n nn.Linear(encoding_dim*3, encoding_dim*2),\n nn.ReLU(),\n nn.Linear(encoding_dim*2, encoding_dim),\n nn.ReLU()\n )\n self.decoder = nn.Sequential(\n nn.Linear(encoding_dim, input_dim*2),\n nn.ReLU(),\n nn.Linear(input_dim*2, input_dim*3),\n nn.ReLU(),\n nn.Linear(input_dim*3, input_dim),\n nn.Sigmoid()\n )\n \n def forward(self, x):\n encoded = self.encoder(x)\n decoded = self.decoder(encoded)\n return decoded\n\ndef perform_autoencoder(df, encoding_dim=50, epochs=50, batch_size=32):\n scaler = StandardScaler()\n df_scaled = scaler.fit_transform(df)\n \n data_tensor = torch.FloatTensor(df_scaled)\n \n input_dim = data_tensor.shape[1]\n autoencoder = AutoEncoder(input_dim, encoding_dim)\n \n criterion = nn.MSELoss()\n optimizer = optim.Adam(autoencoder.parameters())\n \n for epoch in range(epochs):\n for i in range(0, len(data_tensor), batch_size):\n batch = data_tensor[i : i + batch_size]\n optimizer.zero_grad()\n reconstructed = autoencoder(batch)\n loss = criterion(reconstructed, batch)\n loss.backward()\n optimizer.step()\n \n if (epoch + 1) % 10 == 0:\n print(f'Epoch [{epoch + 1}/{epochs}], Loss: {loss.item():.4f}]')\n \n with torch.no_grad():\n encoded_data = autoencoder.encoder(data_tensor).numpy()\n \n df_encoded = pd.DataFrame(encoded_data, columns=[f'Enc_{i + 1}' for i in range(encoded_data.shape[1])])\n \n return df_encoded\n\ndef feature_engineering(df):\n season_cols = [col for col in df.columns if 'Season' in col]\n df = df.drop(season_cols, axis=1) \n df['BMI_Age'] = df['Physical-BMI'] * df['Basic_Demos-Age']\n df['Internet_Hours_Age'] = df['PreInt_EduHx-computerinternet_hoursday'] * df['Basic_Demos-Age']\n df['BMI_Internet_Hours'] = df['Physical-BMI'] * df['PreInt_EduHx-computerinternet_hoursday']\n df['BFP_BMI'] = df['BIA-BIA_Fat'] / df['BIA-BIA_BMI']\n df['FFMI_BFP'] = df['BIA-BIA_FFMI'] / df['BIA-BIA_Fat']\n df['FMI_BFP'] = df['BIA-BIA_FMI'] / df['BIA-BIA_Fat']\n df['LST_TBW'] = df['BIA-BIA_LST'] / df['BIA-BIA_TBW']\n df['BFP_BMR'] = df['BIA-BIA_Fat'] * df['BIA-BIA_BMR']\n df['BFP_DEE'] = df['BIA-BIA_Fat'] * df['BIA-BIA_DEE']\n # df['BMR_Weight'] = df['BIA-BIA_BMR'] / df['Physical-Weight']\n # df['DEE_Weight'] = df['BIA-BIA_DEE'] / df['Physical-Weight']\n df['SMM_Height'] = df['BIA-BIA_SMM'] / df['Physical-Height']\n df['Muscle_to_Fat'] = df['BIA-BIA_SMM'] / df['BIA-BIA_FMI']\n # df['Hydration_Status'] = df['BIA-BIA_TBW'] / df['Physical-Weight']\n df['ICW_TBW'] = df['BIA-BIA_ICW'] / df['BIA-BIA_TBW']\n \n return df","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2024-11-13T05:47:26.607577Z","iopub.execute_input":"2024-11-13T05:47:26.607928Z","iopub.status.idle":"2024-11-13T05:47:26.627323Z","shell.execute_reply.started":"2024-11-13T05:47:26.607889Z","shell.execute_reply":"2024-11-13T05:47:26.626499Z"}},"outputs":[],"execution_count":8},{"cell_type":"code","source":"# 把SII是空的column刪除\ntrain_df = train_df.dropna(subset=['sii'])","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2024-11-13T05:47:26.628552Z","iopub.execute_input":"2024-11-13T05:47:26.628895Z","iopub.status.idle":"2024-11-13T05:47:26.643958Z","shell.execute_reply.started":"2024-11-13T05:47:26.628855Z","shell.execute_reply":"2024-11-13T05:47:26.643000Z"}},"outputs":[],"execution_count":9},{"cell_type":"code","source":"# PCIAT 有些欄位是空的,會影響最後SII結果,把若填滿PCIAT有可能改變SII的column刪除\nPCIAT_cols = [f'PCIAT-PCIAT_{i+1:02d}' for i in range(20)]\ndef IncorrectRows(row):\n if pd.isna(row['PCIAT-PCIAT_Total']):\n return np.nan\n max_possible = row['PCIAT-PCIAT_Total'] + row[PCIAT_cols].isna().sum() * 5\n if row['PCIAT-PCIAT_Total'] <= 30 and max_possible <= 30:\n return 0\n elif 31 <= row['PCIAT-PCIAT_Total'] <= 49 and max_possible <= 49:\n return 1\n elif 50 <= row['PCIAT-PCIAT_Total'] <= 79 and max_possible <= 79:\n return 2\n elif row['PCIAT-PCIAT_Total'] >= 80 and max_possible >= 80:\n return 3\n return np.nan\n\ntrain_df['recal_sii'] = train_df.apply(IncorrectRows, axis=1)","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2024-11-13T05:47:26.647652Z","iopub.execute_input":"2024-11-13T05:47:26.648313Z","iopub.status.idle":"2024-11-13T05:47:28.056316Z","shell.execute_reply.started":"2024-11-13T05:47:26.648280Z","shell.execute_reply":"2024-11-13T05:47:28.055050Z"}},"outputs":[],"execution_count":10},{"cell_type":"code","source":"mismatch_rows = train_df[\n (train_df['recal_sii'] != train_df['sii']) & train_df['sii'].notna()\n]\nmismatch_indexes = mismatch_rows.index\ntrain_df = train_df.drop(mismatch_indexes)\ntrain_df = train_df.drop(['recal_sii'], axis=1)","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2024-11-13T05:47:28.057357Z","iopub.execute_input":"2024-11-13T05:47:28.057658Z","iopub.status.idle":"2024-11-13T05:47:28.069454Z","shell.execute_reply.started":"2024-11-13T05:47:28.057620Z","shell.execute_reply":"2024-11-13T05:47:28.068574Z"}},"outputs":[],"execution_count":11},{"cell_type":"code","source":"# 把有關Season的column做mapping \nSEASON_COLS = [\n \"Basic_Demos-Enroll_Season\", \n \"CGAS-Season\", \n \"Physical-Season\", \n \"Fitness_Endurance-Season\", \n \"FGC-Season\", \n \"BIA-Season\", \n \"PAQ-Season\",\n \"SDS-Season\",\n \"PreInt_EduHx-Season\", \n ]\ndef update(df):\n for c in SEASON_COLS: \n df[c] = df[c].fillna('Missing')\n df[c] = df[c].astype('category')\n return df\ntrain_df = update(train_df)\ntest_df = update(test_df)\nseason_mapping = {'Spring': 0, 'Summer': 1, 'Fall': 2, 'Winter': 3, 'Missing': 4}\nfor col in SEASON_COLS:\n train_df[col] = train_df[col].map(season_mapping)\n test_df[col] = test_df[col].map(season_mapping)\ntrain_df['PCIAT-Season'] = train_df['PCIAT-Season'].map(season_mapping)","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2024-11-13T05:47:28.070494Z","iopub.execute_input":"2024-11-13T05:47:28.070811Z","iopub.status.idle":"2024-11-13T05:47:28.112556Z","shell.execute_reply.started":"2024-11-13T05:47:28.070781Z","shell.execute_reply":"2024-11-13T05:47:28.111870Z"}},"outputs":[],"execution_count":12},{"cell_type":"code","source":"# 做Imputer\ntrain_id = train_df['id']\ntest_id = test_df['id']\ntrain_features = train_df.drop(columns=['id'])\ntest_features = test_df.drop(columns=['id'])\n\nimputer = SimpleImputer(strategy='median')\ntrain_features_imputed = pd.DataFrame(imputer.fit_transform(train_features), columns=train_features.columns, index=train_features.index)\ntest_features_imputed = pd.DataFrame(imputer.fit_transform(test_features), columns=test_features.columns, index=test_features.index)\n\ntrain_df = pd.concat([train_id, train_features_imputed], axis=1)\ntest_df = pd.concat([test_id, test_features_imputed], axis=1)","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2024-11-13T05:47:28.113660Z","iopub.execute_input":"2024-11-13T05:47:28.114017Z","iopub.status.idle":"2024-11-13T05:47:28.161128Z","shell.execute_reply.started":"2024-11-13T05:47:28.113975Z","shell.execute_reply":"2024-11-13T05:47:28.160436Z"}},"outputs":[],"execution_count":13},{"cell_type":"code","source":"train_cor = train_df.drop('id', axis=1)\ntest_cor = test_df.drop('id', axis=1)","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2024-11-13T05:47:28.162106Z","iopub.execute_input":"2024-11-13T05:47:28.162408Z","iopub.status.idle":"2024-11-13T05:47:28.167753Z","shell.execute_reply.started":"2024-11-13T05:47:28.162368Z","shell.execute_reply":"2024-11-13T05:47:28.166856Z"}},"outputs":[],"execution_count":14},{"cell_type":"code","source":"# # 尋找和PCIAT_Total相關性低的column並刪除 \n# corr_matrix = train_cor[['PCIAT-PCIAT_Total', 'Basic_Demos-Age', 'Basic_Demos-Sex', 'Physical-BMI', \n# 'Physical-Height', 'Physical-Weight', 'Physical-Waist_Circumference',\n# 'Physical-Diastolic_BP', 'Physical-Systolic_BP', 'Physical-HeartRate',\n# 'PreInt_EduHx-computerinternet_hoursday', 'SDS-SDS_Total_T', 'PAQ-PAQ_Total',\n# 'Fitness_Endurance-Max_Stage', 'Fitness_Endurance-Time_Mins', \n# 'Fitness_Endurance-Time_Sec', 'FGC-FGC_CU', 'FGC-FGC_GSND', 'FGC-FGC_GSD', \n# 'FGC-FGC_PU', 'FGC-FGC_SRL', 'FGC-FGC_SRR', 'FGC-FGC_TL', 'BIA-BIA_Activity_Level_num', \n# 'BIA-BIA_BMC', 'BIA-BIA_BMI', 'BIA-BIA_BMR', 'BIA-BIA_DEE', 'BIA-BIA_ECW', 'BIA-BIA_FFM',\n# 'BIA-BIA_FFMI', 'BIA-BIA_FMI', 'BIA-BIA_Fat', 'BIA-BIA_Frame_num', 'BIA-BIA_ICW', \n# 'BIA-BIA_LDM', 'BIA-BIA_LST', 'BIA-BIA_SMM', 'BIA-BIA_TBW']].corr()\n# sii_corr = corr_matrix['PCIAT-PCIAT_Total'].drop('PCIAT-PCIAT_Total')\n# filtered_corr = sii_corr[(sii_corr > 0.1) | (sii_corr < -0.1)]\n# other_corr = sii_corr[(sii_corr <= 0.1) & (sii_corr >= -0.1)]\n# other_corr_columns = other_corr.index.tolist()\n# print(other_corr)","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2024-11-13T05:47:28.168816Z","iopub.execute_input":"2024-11-13T05:47:28.169127Z","iopub.status.idle":"2024-11-13T05:47:28.177548Z","shell.execute_reply.started":"2024-11-13T05:47:28.169095Z","shell.execute_reply":"2024-11-13T05:47:28.176522Z"}},"outputs":[],"execution_count":15},{"cell_type":"code","source":"# plt.figure(figsize=(8, 6))\n# filtered_corr.sort_values().plot(kind='barh', color='coral')\n# plt.title('Features with Correlation > 0.1 or < -0.1 with PCIAT-PCIAT_Total')\n# plt.xlabel('Correlation coefficient')\n# plt.ylabel('Features')\n# plt.show()","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2024-11-13T05:47:28.178568Z","iopub.execute_input":"2024-11-13T05:47:28.178889Z","iopub.status.idle":"2024-11-13T05:47:28.186015Z","shell.execute_reply.started":"2024-11-13T05:47:28.178859Z","shell.execute_reply":"2024-11-13T05:47:28.185274Z"}},"outputs":[],"execution_count":16},{"cell_type":"code","source":"# 把parquet data加進去 \ntrain_ts = load_time_series(\"/kaggle/input/child-mind-institute-problematic-internet-use/series_train.parquet\")\ntest_ts = load_time_series(\"/kaggle/input/child-mind-institute-problematic-internet-use/series_test.parquet\")\n\ntrain_ts_noID = train_ts.drop('id', axis=1)\ntest_ts_noID = test_ts.drop('id', axis=1)\n\ntrain_ts_encoded = perform_autoencoder(train_ts_noID, encoding_dim=60, epochs=100, batch_size=32)\ntest_ts_encoded = perform_autoencoder(test_ts_noID, encoding_dim=60, epochs=100, batch_size=32)\n\ntime_series_cols = train_ts_encoded.columns.tolist()\ntrain_ts_encoded[\"id\"]=train_ts[\"id\"]\ntest_ts_encoded['id']=test_ts[\"id\"]","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2024-11-13T05:47:28.187115Z","iopub.execute_input":"2024-11-13T05:47:28.188948Z","iopub.status.idle":"2024-11-13T05:49:01.937083Z","shell.execute_reply.started":"2024-11-13T05:47:28.188917Z","shell.execute_reply":"2024-11-13T05:49:01.936140Z"}},"outputs":[{"name":"stderr","text":"100%|██████████| 996/996 [01:21<00:00, 12.24it/s]\n100%|██████████| 2/2 [00:00<00:00, 10.34it/s]\n","output_type":"stream"},{"name":"stdout","text":"Epoch [10/100], Loss: 1.6444]\nEpoch [20/100], Loss: 1.5530]\nEpoch [30/100], Loss: 1.5247]\nEpoch [40/100], Loss: 1.5126]\nEpoch [50/100], Loss: 1.5120]\nEpoch [60/100], Loss: 1.5064]\nEpoch [70/100], Loss: 1.5068]\nEpoch [80/100], Loss: 1.5042]\nEpoch [90/100], Loss: 1.4803]\nEpoch [100/100], Loss: 1.4700]\nEpoch [10/100], Loss: 0.9721]\nEpoch [20/100], Loss: 0.4846]\nEpoch [30/100], Loss: 0.4271]\nEpoch [40/100], Loss: 0.4271]\nEpoch [50/100], Loss: 0.4271]\nEpoch [60/100], Loss: 0.4271]\nEpoch [70/100], Loss: 0.4271]\nEpoch [80/100], Loss: 0.4271]\nEpoch [90/100], Loss: 0.4271]\nEpoch [100/100], Loss: 0.4271]\n","output_type":"stream"}],"execution_count":17},{"cell_type":"code","source":"TARGET_COLS = [\n \"PCIAT-Season\",\n \"PCIAT-PCIAT_01\",\n \"PCIAT-PCIAT_02\",\n \"PCIAT-PCIAT_03\",\n \"PCIAT-PCIAT_04\",\n \"PCIAT-PCIAT_05\",\n \"PCIAT-PCIAT_06\",\n \"PCIAT-PCIAT_07\",\n \"PCIAT-PCIAT_08\",\n \"PCIAT-PCIAT_09\",\n \"PCIAT-PCIAT_10\",\n \"PCIAT-PCIAT_11\",\n \"PCIAT-PCIAT_12\",\n \"PCIAT-PCIAT_13\",\n \"PCIAT-PCIAT_14\",\n \"PCIAT-PCIAT_15\",\n \"PCIAT-PCIAT_16\", \n \"PCIAT-PCIAT_17\",\n \"PCIAT-PCIAT_18\",\n \"PCIAT-PCIAT_19\",\n \"PCIAT-PCIAT_20\",\n \"PCIAT-PCIAT_Total\"\n]\ntrain_df = train_df.drop(TARGET_COLS,axis=1)","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2024-11-13T05:49:01.938394Z","iopub.execute_input":"2024-11-13T05:49:01.938891Z","iopub.status.idle":"2024-11-13T05:49:01.945395Z","shell.execute_reply.started":"2024-11-13T05:49:01.938856Z","shell.execute_reply":"2024-11-13T05:49:01.944474Z"}},"outputs":[],"execution_count":18},{"cell_type":"code","source":"train_df = pd.merge(train_df, train_ts_encoded, how=\"left\", on='id')\ntest_df = pd.merge(test_df, test_ts_encoded, how=\"left\", on='id')","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2024-11-13T05:49:01.946900Z","iopub.execute_input":"2024-11-13T05:49:01.947195Z","iopub.status.idle":"2024-11-13T05:49:01.980265Z","shell.execute_reply.started":"2024-11-13T05:49:01.947164Z","shell.execute_reply":"2024-11-13T05:49:01.979392Z"}},"outputs":[],"execution_count":19},{"cell_type":"code","source":"imputer = KNNImputer(n_neighbors=5)\nnumeric_cols = train_df.select_dtypes(include=['float64', 'int64','float32', 'int32']).columns\nimputed_data = imputer.fit_transform(train_df[numeric_cols])\ntrain_imputed = pd.DataFrame(imputed_data, columns=numeric_cols)\ntrain_imputed['sii'] = train_imputed['sii'].round().astype(int)\nfor col in train_df.columns:\n if col not in numeric_cols:\n train_imputed[col] = train_df[col]\n \ntrain_df = train_imputed\n\ntrain_df = feature_engineering(train_df)\ntrain_df = train_df.dropna(thresh=10, axis=0)\ntest_df = feature_engineering(test_df)","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2024-11-13T05:49:01.981365Z","iopub.execute_input":"2024-11-13T05:49:01.981932Z","iopub.status.idle":"2024-11-13T05:49:05.215521Z","shell.execute_reply.started":"2024-11-13T05:49:01.981898Z","shell.execute_reply":"2024-11-13T05:49:05.214702Z"}},"outputs":[],"execution_count":20},{"cell_type":"code","source":"# train_df = train_df.drop(columns=other_corr_columns)\n# test_df = test_df.drop(columns=other_corr_columns)","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2024-11-13T05:49:05.216628Z","iopub.execute_input":"2024-11-13T05:49:05.216938Z","iopub.status.idle":"2024-11-13T05:49:05.221191Z","shell.execute_reply.started":"2024-11-13T05:49:05.216906Z","shell.execute_reply":"2024-11-13T05:49:05.220002Z"}},"outputs":[],"execution_count":21},{"cell_type":"code","source":"featuresCols = ['Basic_Demos-Age', 'Basic_Demos-Sex',\n 'CGAS-CGAS_Score', 'Physical-BMI',\n 'Physical-Height', 'Physical-Weight', 'Physical-Waist_Circumference',\n 'Physical-Diastolic_BP', 'Physical-HeartRate', 'Physical-Systolic_BP',\n 'Fitness_Endurance-Max_Stage',\n 'Fitness_Endurance-Time_Mins', 'Fitness_Endurance-Time_Sec',\n 'FGC-FGC_CU', 'FGC-FGC_CU_Zone', 'FGC-FGC_GSND',\n 'FGC-FGC_GSND_Zone', 'FGC-FGC_GSD', 'FGC-FGC_GSD_Zone', 'FGC-FGC_PU',\n 'FGC-FGC_PU_Zone', 'FGC-FGC_SRL', 'FGC-FGC_SRL_Zone', 'FGC-FGC_SRR',\n 'FGC-FGC_SRR_Zone', 'FGC-FGC_TL', 'FGC-FGC_TL_Zone',\n 'BIA-BIA_Activity_Level_num', 'BIA-BIA_BMC', 'BIA-BIA_BMI',\n 'BIA-BIA_BMR', 'BIA-BIA_DEE', 'BIA-BIA_ECW', 'BIA-BIA_FFM',\n 'BIA-BIA_FFMI', 'BIA-BIA_FMI', 'BIA-BIA_Fat', 'BIA-BIA_Frame_num',\n 'BIA-BIA_ICW', 'BIA-BIA_LDM', 'BIA-BIA_LST', 'BIA-BIA_SMM',\n 'BIA-BIA_TBW', 'PAQ-PAQ_Total',\n 'SDS-SDS_Total_Raw','SDS-SDS_Total_T',\n 'PreInt_EduHx-computerinternet_hoursday', 'sii', 'BMI_Age','Internet_Hours_Age','BMI_Internet_Hours',\n 'BFP_BMI', 'FFMI_BFP', 'FMI_BFP', 'LST_TBW', 'BFP_BMR', 'BFP_DEE','SMM_Height', 'Muscle_to_Fat', 'ICW_TBW']\n\nfeaturesCols += time_series_cols\n\ntrain_df = train_df[featuresCols]\ntrain_df = train_df.dropna(subset='sii')\n\nfeaturesCols = ['Basic_Demos-Age', 'Basic_Demos-Sex',\n 'CGAS-CGAS_Score', 'Physical-BMI',\n 'Physical-Height', 'Physical-Weight', 'Physical-Waist_Circumference',\n 'Physical-Diastolic_BP', 'Physical-HeartRate', 'Physical-Systolic_BP',\n 'Fitness_Endurance-Max_Stage',\n 'Fitness_Endurance-Time_Mins', 'Fitness_Endurance-Time_Sec',\n 'FGC-FGC_CU', 'FGC-FGC_CU_Zone', 'FGC-FGC_GSND',\n 'FGC-FGC_GSND_Zone', 'FGC-FGC_GSD', 'FGC-FGC_GSD_Zone', 'FGC-FGC_PU',\n 'FGC-FGC_PU_Zone', 'FGC-FGC_SRL', 'FGC-FGC_SRL_Zone', 'FGC-FGC_SRR',\n 'FGC-FGC_SRR_Zone', 'FGC-FGC_TL', 'FGC-FGC_TL_Zone',\n 'BIA-BIA_Activity_Level_num', 'BIA-BIA_BMC', 'BIA-BIA_BMI',\n 'BIA-BIA_BMR', 'BIA-BIA_DEE', 'BIA-BIA_ECW', 'BIA-BIA_FFM',\n 'BIA-BIA_FFMI', 'BIA-BIA_FMI', 'BIA-BIA_Fat', 'BIA-BIA_Frame_num',\n 'BIA-BIA_ICW', 'BIA-BIA_LDM', 'BIA-BIA_LST', 'BIA-BIA_SMM',\n 'BIA-BIA_TBW', 'PAQ-PAQ_Total',\n 'SDS-SDS_Total_Raw',\n 'SDS-SDS_Total_T',\n 'PreInt_EduHx-computerinternet_hoursday', 'BMI_Age','Internet_Hours_Age','BMI_Internet_Hours',\n 'BFP_BMI', 'FFMI_BFP', 'FMI_BFP', 'LST_TBW', 'BFP_BMR', 'BFP_DEE',\n 'SMM_Height', 'Muscle_to_Fat', 'ICW_TBW']\n\nfeaturesCols += time_series_cols\ntest_df = test_df[featuresCols]","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2024-11-13T05:49:05.222607Z","iopub.execute_input":"2024-11-13T05:49:05.222907Z","iopub.status.idle":"2024-11-13T05:49:05.237028Z","shell.execute_reply.started":"2024-11-13T05:49:05.222876Z","shell.execute_reply":"2024-11-13T05:49:05.235986Z"}},"outputs":[],"execution_count":22},{"cell_type":"code","source":"# train_df = train_df.drop('id', axis=1)\n# test_df = test_df.drop('id', axis=1)","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2024-11-13T05:49:05.238314Z","iopub.execute_input":"2024-11-13T05:49:05.238602Z","iopub.status.idle":"2024-11-13T05:49:05.245472Z","shell.execute_reply.started":"2024-11-13T05:49:05.238571Z","shell.execute_reply":"2024-11-13T05:49:05.244643Z"}},"outputs":[],"execution_count":23},{"cell_type":"code","source":"print(f'Train Shape : {train_df.shape} || Test Shape : {test_df.shape}')","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2024-11-13T05:49:05.246391Z","iopub.execute_input":"2024-11-13T05:49:05.246936Z","iopub.status.idle":"2024-11-13T05:49:05.254552Z","shell.execute_reply.started":"2024-11-13T05:49:05.246905Z","shell.execute_reply":"2024-11-13T05:49:05.253669Z"}},"outputs":[{"name":"stdout","text":"Train Shape : (2718, 120) || Test Shape : (20, 119)\n","output_type":"stream"}],"execution_count":24},{"cell_type":"code","source":"train_df = train_df.fillna(0)\ntest_df = test_df.fillna(0)","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2024-11-13T05:49:05.255633Z","iopub.execute_input":"2024-11-13T05:49:05.255975Z","iopub.status.idle":"2024-11-13T05:49:05.262661Z","shell.execute_reply.started":"2024-11-13T05:49:05.255931Z","shell.execute_reply":"2024-11-13T05:49:05.261725Z"}},"outputs":[],"execution_count":25},{"cell_type":"code","source":"from imblearn.over_sampling import SMOTE\nX_train = train_df.drop(columns=['sii']) # 假設 'sii' 是目標欄位\ny_train = train_df['sii']\n\n# 使用 SMOTE 進行過採樣\nsmote = SMOTE(random_state=42)\nX_train_resampled, y_train_resampled = smote.fit_resample(X_train, y_train)","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2024-11-13T05:49:05.263929Z","iopub.execute_input":"2024-11-13T05:49:05.264314Z","iopub.status.idle":"2024-11-13T05:49:05.491792Z","shell.execute_reply.started":"2024-11-13T05:49:05.264275Z","shell.execute_reply":"2024-11-13T05:49:05.490934Z"}},"outputs":[],"execution_count":26},{"cell_type":"code","source":"X_train_resampled","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2024-11-13T05:49:05.493068Z","iopub.execute_input":"2024-11-13T05:49:05.493638Z","iopub.status.idle":"2024-11-13T05:49:05.530858Z","shell.execute_reply.started":"2024-11-13T05:49:05.493589Z","shell.execute_reply":"2024-11-13T05:49:05.529929Z"}},"outputs":[{"execution_count":27,"output_type":"execute_result","data":{"text/plain":" Basic_Demos-Age Basic_Demos-Sex CGAS-CGAS_Score Physical-BMI \\\n0 5.000000 0.000000 51.000000 16.877316 \n1 9.000000 0.000000 65.000000 14.035590 \n2 10.000000 1.000000 71.000000 16.648696 \n3 9.000000 0.000000 71.000000 18.292347 \n4 13.000000 1.000000 50.000000 22.279952 \n... ... ... ... ... \n6339 12.599601 0.000000 71.005984 28.680067 \n6340 13.646459 0.646459 57.929175 22.659126 \n6341 14.000000 0.974194 65.077417 18.905577 \n6342 17.000000 0.152939 53.058783 22.445850 \n6343 15.905620 0.000000 64.764050 45.737848 \n\n Physical-Height Physical-Weight Physical-Waist_Circumference \\\n0 46.000000 50.800000 26.000000 \n1 48.000000 46.000000 22.000000 \n2 56.500000 75.600000 26.000000 \n3 56.000000 81.600000 26.000000 \n4 59.500000 112.200000 26.000000 \n... ... ... ... \n6339 65.599601 175.479681 26.000000 \n6340 63.136403 131.784379 26.000000 \n6341 60.601618 98.790989 26.000000 \n6342 66.464713 141.155331 26.000000 \n6343 67.558987 296.787350 48.457316 \n\n Physical-Diastolic_BP Physical-HeartRate Physical-Systolic_BP ... \\\n0 68.000000 81.000000 114.000000 ... \n1 75.000000 70.000000 122.000000 ... \n2 65.000000 94.000000 117.000000 ... \n3 60.000000 97.000000 117.000000 ... \n4 60.000000 73.000000 102.000000 ... \n... ... ... ... ... \n6339 74.897407 86.599601 134.201197 ... \n6340 97.929175 87.363743 154.192073 ... \n6341 104.045194 81.000000 144.200028 ... \n6342 79.694122 79.529392 128.847061 ... \n6343 71.480911 92.669670 156.150581 ... \n\n Enc_51 Enc_52 Enc_53 Enc_54 Enc_55 Enc_56 Enc_57 \\\n0 1.261670 3.364906 1.822499 3.199418 1.867914 0.601543 1.124913 \n1 0.877661 2.240514 2.114751 1.985586 2.209620 0.819748 2.199565 \n2 1.659091 0.704123 1.858778 1.130553 1.789839 1.242414 6.432002 \n3 1.258142 2.517515 0.000000 2.423617 2.779365 0.000000 0.000000 \n4 2.321296 0.205333 3.107339 0.000000 3.114447 1.274335 3.261590 \n... ... ... ... ... ... ... ... \n6339 0.525903 0.324441 3.958662 0.538026 1.577378 1.804936 2.348030 \n6340 0.722748 0.678223 3.279521 0.606808 2.857975 2.727269 1.942552 \n6341 0.100740 0.387067 3.715885 0.029209 3.591292 3.906484 1.904498 \n6342 1.780591 0.936604 2.920789 0.641967 2.601398 1.152416 3.379448 \n6343 2.631158 0.894702 2.344437 2.090473 2.944876 1.378858 1.562434 \n\n Enc_58 Enc_59 Enc_60 \n0 1.397312 2.965155 1.810345 \n1 1.322297 1.787001 1.673745 \n2 1.905194 0.867131 2.600431 \n3 0.000000 2.142169 0.294263 \n4 2.929590 1.146064 0.000000 \n... ... ... ... \n6339 3.283607 2.516716 2.757017 \n6340 1.797713 1.541758 3.451328 \n6341 1.364211 1.504025 4.319537 \n6342 2.633135 0.877951 1.667096 \n6343 2.606388 2.460010 1.192702 \n\n[6344 rows x 119 columns]","text/html":"\n\n
\n \n \n | \n Basic_Demos-Age | \n Basic_Demos-Sex | \n CGAS-CGAS_Score | \n Physical-BMI | \n Physical-Height | \n Physical-Weight | \n Physical-Waist_Circumference | \n Physical-Diastolic_BP | \n Physical-HeartRate | \n Physical-Systolic_BP | \n ... | \n Enc_51 | \n Enc_52 | \n Enc_53 | \n Enc_54 | \n Enc_55 | \n Enc_56 | \n Enc_57 | \n Enc_58 | \n Enc_59 | \n Enc_60 | \n
\n \n \n \n | 0 | \n 5.000000 | \n 0.000000 | \n 51.000000 | \n 16.877316 | \n 46.000000 | \n 50.800000 | \n 26.000000 | \n 68.000000 | \n 81.000000 | \n 114.000000 | \n ... | \n 1.261670 | \n 3.364906 | \n 1.822499 | \n 3.199418 | \n 1.867914 | \n 0.601543 | \n 1.124913 | \n 1.397312 | \n 2.965155 | \n 1.810345 | \n
\n \n | 1 | \n 9.000000 | \n 0.000000 | \n 65.000000 | \n 14.035590 | \n 48.000000 | \n 46.000000 | \n 22.000000 | \n 75.000000 | \n 70.000000 | \n 122.000000 | \n ... | \n 0.877661 | \n 2.240514 | \n 2.114751 | \n 1.985586 | \n 2.209620 | \n 0.819748 | \n 2.199565 | \n 1.322297 | \n 1.787001 | \n 1.673745 | \n
\n \n | 2 | \n 10.000000 | \n 1.000000 | \n 71.000000 | \n 16.648696 | \n 56.500000 | \n 75.600000 | \n 26.000000 | \n 65.000000 | \n 94.000000 | \n 117.000000 | \n ... | \n 1.659091 | \n 0.704123 | \n 1.858778 | \n 1.130553 | \n 1.789839 | \n 1.242414 | \n 6.432002 | \n 1.905194 | \n 0.867131 | \n 2.600431 | \n
\n \n | 3 | \n 9.000000 | \n 0.000000 | \n 71.000000 | \n 18.292347 | \n 56.000000 | \n 81.600000 | \n 26.000000 | \n 60.000000 | \n 97.000000 | \n 117.000000 | \n ... | \n 1.258142 | \n 2.517515 | \n 0.000000 | \n 2.423617 | \n 2.779365 | \n 0.000000 | \n 0.000000 | \n 0.000000 | \n 2.142169 | \n 0.294263 | \n
\n \n | 4 | \n 13.000000 | \n 1.000000 | \n 50.000000 | \n 22.279952 | \n 59.500000 | \n 112.200000 | \n 26.000000 | \n 60.000000 | \n 73.000000 | \n 102.000000 | \n ... | \n 2.321296 | \n 0.205333 | \n 3.107339 | \n 0.000000 | \n 3.114447 | \n 1.274335 | \n 3.261590 | \n 2.929590 | \n 1.146064 | \n 0.000000 | \n
\n \n | ... | \n ... | \n ... | \n ... | \n ... | \n ... | \n ... | \n ... | \n ... | \n ... | \n ... | \n ... | \n ... | \n ... | \n ... | \n ... | \n ... | \n ... | \n ... | \n ... | \n ... | \n ... | \n
\n \n | 6339 | \n 12.599601 | \n 0.000000 | \n 71.005984 | \n 28.680067 | \n 65.599601 | \n 175.479681 | \n 26.000000 | \n 74.897407 | \n 86.599601 | \n 134.201197 | \n ... | \n 0.525903 | \n 0.324441 | \n 3.958662 | \n 0.538026 | \n 1.577378 | \n 1.804936 | \n 2.348030 | \n 3.283607 | \n 2.516716 | \n 2.757017 | \n
\n \n | 6340 | \n 13.646459 | \n 0.646459 | \n 57.929175 | \n 22.659126 | \n 63.136403 | \n 131.784379 | \n 26.000000 | \n 97.929175 | \n 87.363743 | \n 154.192073 | \n ... | \n 0.722748 | \n 0.678223 | \n 3.279521 | \n 0.606808 | \n 2.857975 | \n 2.727269 | \n 1.942552 | \n 1.797713 | \n 1.541758 | \n 3.451328 | \n
\n \n | 6341 | \n 14.000000 | \n 0.974194 | \n 65.077417 | \n 18.905577 | \n 60.601618 | \n 98.790989 | \n 26.000000 | \n 104.045194 | \n 81.000000 | \n 144.200028 | \n ... | \n 0.100740 | \n 0.387067 | \n 3.715885 | \n 0.029209 | \n 3.591292 | \n 3.906484 | \n 1.904498 | \n 1.364211 | \n 1.504025 | \n 4.319537 | \n
\n \n | 6342 | \n 17.000000 | \n 0.152939 | \n 53.058783 | \n 22.445850 | \n 66.464713 | \n 141.155331 | \n 26.000000 | \n 79.694122 | \n 79.529392 | \n 128.847061 | \n ... | \n 1.780591 | \n 0.936604 | \n 2.920789 | \n 0.641967 | \n 2.601398 | \n 1.152416 | \n 3.379448 | \n 2.633135 | \n 0.877951 | \n 1.667096 | \n
\n \n | 6343 | \n 15.905620 | \n 0.000000 | \n 64.764050 | \n 45.737848 | \n 67.558987 | \n 296.787350 | \n 48.457316 | \n 71.480911 | \n 92.669670 | \n 156.150581 | \n ... | \n 2.631158 | \n 0.894702 | \n 2.344437 | \n 2.090473 | \n 2.944876 | \n 1.378858 | \n 1.562434 | \n 2.606388 | \n 2.460010 | \n 1.192702 | \n
\n \n
\n
6344 rows × 119 columns
\n
"},"metadata":{}}],"execution_count":27},{"cell_type":"code","source":"y_train_resampled","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2024-11-13T05:49:05.535501Z","iopub.execute_input":"2024-11-13T05:49:05.535824Z","iopub.status.idle":"2024-11-13T05:49:05.543279Z","shell.execute_reply.started":"2024-11-13T05:49:05.535790Z","shell.execute_reply":"2024-11-13T05:49:05.542154Z"}},"outputs":[{"execution_count":28,"output_type":"execute_result","data":{"text/plain":"0 2\n1 0\n2 0\n3 1\n4 1\n ..\n6339 3\n6340 3\n6341 3\n6342 3\n6343 3\nName: sii, Length: 6344, dtype: int64"},"metadata":{}}],"execution_count":28},{"cell_type":"code","source":"X_train = X_train_resampled.values\ny_train = y_train_resampled.values.reshape(-1, 1)\nX_test = test_df.values\n\n# model = MLPClassifier(hidden_layer_sizes=(128, 64, 32), max_iter=500, random_state=42)\n# model.fit(X_train, y_train)\n\n# model = RandomForestClassifier(random_state=0)\n# model.fit(X_train, y_train)\n\nX_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42)\n\nmodel = TabNetRegressor(\n n_d=64, # Width of the decision prediction layer\n n_a=64, # Width of the attention embedding for each step\n n_steps=5, # Number of steps in the architecture\n gamma=1.5, # Coefficient for feature selection regularization\n n_independent=2, # Number of independent GLU layer in each GLU block\n n_shared=2, # Number of shared GLU layer in each GLU block\n lambda_sparse=1e-4, # Sparsity regularization\n optimizer_fn=torch.optim.Adam,\n optimizer_params=dict(lr=2e-2, weight_decay=1e-5),\n mask_type='entmax',\n scheduler_params=dict(mode=\"min\", patience=10, min_lr=1e-5, factor=0.5),\n scheduler_fn=torch.optim.lr_scheduler.ReduceLROnPlateau,\n verbose=1,\n device_name='cuda' if torch.cuda.is_available() else 'cpu'\n)\n\n\n# 訓練 TabNet 模型\nmodel.fit(\n X_train, y_train,\n eval_set=[(X_val, y_val)], # 指定驗證集\n eval_name=['val'], # 命名驗證集\n eval_metric=['mae'], # 設定評估指標,例如 MAE\n max_epochs=500, \n patience=50, \n batch_size=1024,\n virtual_batch_size=128,\n num_workers=0,\n drop_last=False,\n)\n\n# model = XGBRegressor(\n# n_estimators=200, # 設置樹的數量\n# learning_rate=0.05, # 設置學習率\n# max_depth=6, # 最大樹深\n# subsample=0.8, # 隨機採樣比例\n# colsample_bytree=0.8, # 每棵樹的列采樣率\n# reg_alpha=1, # Increased from 0.1\n# reg_lambda=5, # Increased from 1\n# random_state=42\n# )\n\n# # 訓練 XGBRegressor,並設置早停\n# model.fit(\n# X_train, y_train,\n# eval_set=[(X_val, y_val)], # 指定驗證集\n# early_stopping_rounds=50, # 如果在 50 個 rounds 中,驗證集上的結果不再改善則停止訓練\n# verbose=True # 顯示訓練過程\n# )\n\n\ntest_df['sii'] = model.predict(X_test)","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2024-11-13T05:49:05.544770Z","iopub.execute_input":"2024-11-13T05:49:05.545171Z","iopub.status.idle":"2024-11-13T05:52:23.712861Z","shell.execute_reply.started":"2024-11-13T05:49:05.545137Z","shell.execute_reply":"2024-11-13T05:52:23.711861Z"}},"outputs":[{"name":"stderr","text":"/opt/conda/lib/python3.10/site-packages/pytorch_tabnet/abstract_model.py:82: UserWarning: Device used : cuda\n warnings.warn(f\"Device used : {self.device}\")\n","output_type":"stream"},{"name":"stdout","text":"epoch 0 | loss: 6.48088 | val_mae: 11.4224 | 0:00:01s\nepoch 1 | loss: 3.0014 | val_mae: 5.97052 | 0:00:01s\nepoch 2 | loss: 2.18413 | val_mae: 4.40949 | 0:00:02s\nepoch 3 | loss: 1.87663 | val_mae: 3.81592 | 0:00:02s\nepoch 4 | loss: 1.22423 | val_mae: 3.40053 | 0:00:02s\nepoch 5 | loss: 1.08828 | val_mae: 2.61743 | 0:00:03s\nepoch 6 | loss: 0.97629 | val_mae: 3.78685 | 0:00:03s\nepoch 7 | loss: 0.89271 | val_mae: 5.3196 | 0:00:04s\nepoch 8 | loss: 0.85532 | val_mae: 5.82879 | 0:00:04s\nepoch 9 | loss: 0.74888 | val_mae: 2.86027 | 0:00:04s\nepoch 10 | loss: 0.6972 | val_mae: 1.41034 | 0:00:05s\nepoch 11 | loss: 0.69719 | val_mae: 1.28311 | 0:00:05s\nepoch 12 | loss: 0.69969 | val_mae: 1.14508 | 0:00:06s\nepoch 13 | loss: 0.59078 | val_mae: 1.08213 | 0:00:06s\nepoch 14 | loss: 0.60524 | val_mae: 1.15833 | 0:00:07s\nepoch 15 | loss: 0.61787 | val_mae: 0.88686 | 0:00:07s\nepoch 16 | loss: 0.62558 | val_mae: 0.93153 | 0:00:07s\nepoch 17 | loss: 0.57334 | val_mae: 0.93411 | 0:00:08s\nepoch 18 | loss: 0.58509 | val_mae: 0.93036 | 0:00:08s\nepoch 19 | loss: 0.52418 | val_mae: 0.91495 | 0:00:08s\nepoch 20 | loss: 0.51392 | val_mae: 0.86802 | 0:00:09s\nepoch 21 | loss: 0.52913 | val_mae: 0.87921 | 0:00:09s\nepoch 22 | loss: 0.54695 | val_mae: 0.79027 | 0:00:10s\nepoch 23 | loss: 0.52668 | val_mae: 0.73901 | 0:00:10s\nepoch 24 | loss: 0.5011 | val_mae: 0.75841 | 0:00:10s\nepoch 25 | loss: 0.47266 | val_mae: 0.7547 | 0:00:11s\nepoch 26 | loss: 0.45865 | val_mae: 0.76172 | 0:00:11s\nepoch 27 | loss: 0.46872 | val_mae: 0.79685 | 0:00:12s\nepoch 28 | loss: 0.44856 | val_mae: 0.76449 | 0:00:12s\nepoch 29 | loss: 0.46872 | val_mae: 0.71646 | 0:00:12s\nepoch 30 | loss: 0.44068 | val_mae: 0.72265 | 0:00:13s\nepoch 31 | loss: 0.42659 | val_mae: 0.69917 | 0:00:13s\nepoch 32 | loss: 0.4546 | val_mae: 0.6914 | 0:00:14s\nepoch 33 | loss: 0.44429 | val_mae: 0.70442 | 0:00:14s\nepoch 34 | loss: 0.4369 | val_mae: 0.70166 | 0:00:14s\nepoch 35 | loss: 0.40845 | val_mae: 0.67789 | 0:00:15s\nepoch 36 | loss: 0.3948 | val_mae: 0.66101 | 0:00:15s\nepoch 37 | loss: 0.38659 | val_mae: 0.63114 | 0:00:15s\nepoch 38 | loss: 0.39296 | val_mae: 0.62454 | 0:00:16s\nepoch 39 | loss: 0.39577 | val_mae: 0.57916 | 0:00:16s\nepoch 40 | loss: 0.39311 | val_mae: 0.57471 | 0:00:17s\nepoch 41 | loss: 0.38526 | val_mae: 0.56525 | 0:00:17s\nepoch 42 | loss: 0.37192 | val_mae: 0.54737 | 0:00:18s\nepoch 43 | loss: 0.3588 | val_mae: 0.55591 | 0:00:18s\nepoch 44 | loss: 0.35507 | val_mae: 0.54695 | 0:00:18s\nepoch 45 | loss: 0.35904 | val_mae: 0.56725 | 0:00:19s\nepoch 46 | loss: 0.353 | val_mae: 0.56555 | 0:00:19s\nepoch 47 | loss: 0.33853 | val_mae: 0.55043 | 0:00:19s\nepoch 48 | loss: 0.33528 | val_mae: 0.53158 | 0:00:20s\nepoch 49 | loss: 0.33009 | val_mae: 0.53298 | 0:00:20s\nepoch 50 | loss: 0.322 | val_mae: 0.53689 | 0:00:21s\nepoch 51 | loss: 0.31595 | val_mae: 0.51777 | 0:00:21s\nepoch 52 | loss: 0.3147 | val_mae: 0.52906 | 0:00:22s\nepoch 53 | loss: 0.30779 | val_mae: 0.51697 | 0:00:22s\nepoch 54 | loss: 0.30978 | val_mae: 0.5383 | 0:00:22s\nepoch 55 | loss: 0.31289 | val_mae: 0.52325 | 0:00:23s\nepoch 56 | loss: 0.32303 | val_mae: 0.5209 | 0:00:23s\nepoch 57 | loss: 0.31037 | val_mae: 0.50058 | 0:00:24s\nepoch 58 | loss: 0.29908 | val_mae: 0.49511 | 0:00:24s\nepoch 59 | loss: 0.30051 | val_mae: 0.4947 | 0:00:24s\nepoch 60 | loss: 0.30248 | val_mae: 0.49454 | 0:00:25s\nepoch 61 | loss: 0.28474 | val_mae: 0.47484 | 0:00:25s\nepoch 62 | loss: 0.27101 | val_mae: 0.46047 | 0:00:26s\nepoch 63 | loss: 0.26703 | val_mae: 0.48885 | 0:00:26s\nepoch 64 | loss: 0.27625 | val_mae: 0.46967 | 0:00:27s\nepoch 65 | loss: 0.26972 | val_mae: 0.45922 | 0:00:27s\nepoch 66 | loss: 0.27568 | val_mae: 0.4646 | 0:00:27s\nepoch 67 | loss: 0.28902 | val_mae: 0.45984 | 0:00:28s\nepoch 68 | loss: 0.28289 | val_mae: 0.47522 | 0:00:28s\nepoch 69 | loss: 0.29235 | val_mae: 0.45521 | 0:00:28s\nepoch 70 | loss: 0.28482 | val_mae: 0.47567 | 0:00:29s\nepoch 71 | loss: 0.28862 | val_mae: 0.45885 | 0:00:29s\nepoch 72 | loss: 0.27421 | val_mae: 0.44701 | 0:00:30s\nepoch 73 | loss: 0.2607 | val_mae: 0.43948 | 0:00:30s\nepoch 74 | loss: 0.25798 | val_mae: 0.45778 | 0:00:30s\nepoch 75 | loss: 0.25439 | val_mae: 0.43334 | 0:00:31s\nepoch 76 | loss: 0.24477 | val_mae: 0.43692 | 0:00:31s\nepoch 77 | loss: 0.23369 | val_mae: 0.41563 | 0:00:32s\nepoch 78 | loss: 0.22097 | val_mae: 0.42804 | 0:00:32s\nepoch 79 | loss: 0.22463 | val_mae: 0.41656 | 0:00:32s\nepoch 80 | loss: 0.21311 | val_mae: 0.43018 | 0:00:33s\nepoch 81 | loss: 0.22251 | val_mae: 0.42096 | 0:00:33s\nepoch 82 | loss: 0.23856 | val_mae: 0.4346 | 0:00:34s\nepoch 83 | loss: 0.22806 | val_mae: 0.45499 | 0:00:34s\nepoch 84 | loss: 0.2249 | val_mae: 0.41676 | 0:00:34s\nepoch 85 | loss: 0.20735 | val_mae: 0.42195 | 0:00:35s\nepoch 86 | loss: 0.19801 | val_mae: 0.40718 | 0:00:35s\nepoch 87 | loss: 0.18962 | val_mae: 0.41033 | 0:00:36s\nepoch 88 | loss: 0.18901 | val_mae: 0.38627 | 0:00:36s\nepoch 89 | loss: 0.18984 | val_mae: 0.39282 | 0:00:36s\nepoch 90 | loss: 0.18969 | val_mae: 0.3895 | 0:00:37s\nepoch 91 | loss: 0.18186 | val_mae: 0.40511 | 0:00:37s\nepoch 92 | loss: 0.19037 | val_mae: 0.40054 | 0:00:38s\nepoch 93 | loss: 0.18903 | val_mae: 0.39453 | 0:00:38s\nepoch 94 | loss: 0.18476 | val_mae: 0.39046 | 0:00:38s\nepoch 95 | loss: 0.17165 | val_mae: 0.38591 | 0:00:39s\nepoch 96 | loss: 0.16755 | val_mae: 0.38735 | 0:00:39s\nepoch 97 | loss: 0.16491 | val_mae: 0.3851 | 0:00:39s\nepoch 98 | loss: 0.1659 | val_mae: 0.37832 | 0:00:40s\nepoch 99 | loss: 0.15661 | val_mae: 0.37687 | 0:00:40s\nepoch 100| loss: 0.15818 | val_mae: 0.38168 | 0:00:41s\nepoch 101| loss: 0.15682 | val_mae: 0.38221 | 0:00:41s\nepoch 102| loss: 0.15139 | val_mae: 0.36793 | 0:00:41s\nepoch 103| loss: 0.14553 | val_mae: 0.37494 | 0:00:42s\nepoch 104| loss: 0.14443 | val_mae: 0.37052 | 0:00:42s\nepoch 105| loss: 0.13828 | val_mae: 0.37553 | 0:00:43s\nepoch 106| loss: 0.14546 | val_mae: 0.3655 | 0:00:43s\nepoch 107| loss: 0.13861 | val_mae: 0.36527 | 0:00:43s\nepoch 108| loss: 0.13595 | val_mae: 0.37287 | 0:00:44s\nepoch 109| loss: 0.13018 | val_mae: 0.37096 | 0:00:44s\nepoch 110| loss: 0.12787 | val_mae: 0.36449 | 0:00:45s\nepoch 111| loss: 0.13244 | val_mae: 0.37788 | 0:00:45s\nepoch 112| loss: 0.14916 | val_mae: 0.38248 | 0:00:45s\nepoch 113| loss: 0.16093 | val_mae: 0.3897 | 0:00:46s\nepoch 114| loss: 0.16199 | val_mae: 0.39899 | 0:00:46s\nepoch 115| loss: 0.16366 | val_mae: 0.38225 | 0:00:47s\nepoch 116| loss: 0.16124 | val_mae: 0.3761 | 0:00:47s\nepoch 117| loss: 0.15309 | val_mae: 0.37511 | 0:00:47s\nepoch 118| loss: 0.15655 | val_mae: 0.38381 | 0:00:48s\nepoch 119| loss: 0.15274 | val_mae: 0.38961 | 0:00:48s\nepoch 120| loss: 0.15683 | val_mae: 0.369 | 0:00:48s\nepoch 121| loss: 0.1511 | val_mae: 0.39378 | 0:00:49s\nepoch 122| loss: 0.14418 | val_mae: 0.39161 | 0:00:49s\nepoch 123| loss: 0.13844 | val_mae: 0.37324 | 0:00:50s\nepoch 124| loss: 0.13275 | val_mae: 0.35898 | 0:00:50s\nepoch 125| loss: 0.12284 | val_mae: 0.35898 | 0:00:50s\nepoch 126| loss: 0.11983 | val_mae: 0.35964 | 0:00:51s\nepoch 127| loss: 0.11664 | val_mae: 0.34983 | 0:00:51s\nepoch 128| loss: 0.11573 | val_mae: 0.34319 | 0:00:52s\nepoch 129| loss: 0.11149 | val_mae: 0.34795 | 0:00:52s\nepoch 130| loss: 0.11477 | val_mae: 0.34277 | 0:00:52s\nepoch 131| loss: 0.10572 | val_mae: 0.3419 | 0:00:53s\nepoch 132| loss: 0.10359 | val_mae: 0.33799 | 0:00:53s\nepoch 133| loss: 0.09949 | val_mae: 0.34125 | 0:00:54s\nepoch 134| loss: 0.10497 | val_mae: 0.33852 | 0:00:54s\nepoch 135| loss: 0.0997 | val_mae: 0.34089 | 0:00:55s\nepoch 136| loss: 0.09739 | val_mae: 0.33332 | 0:00:55s\nepoch 137| loss: 0.09458 | val_mae: 0.33695 | 0:00:55s\nepoch 138| loss: 0.09471 | val_mae: 0.33592 | 0:00:56s\nepoch 139| loss: 0.09454 | val_mae: 0.33795 | 0:00:56s\nepoch 140| loss: 0.09362 | val_mae: 0.33736 | 0:00:57s\nepoch 141| loss: 0.09201 | val_mae: 0.34156 | 0:00:57s\nepoch 142| loss: 0.10164 | val_mae: 0.33414 | 0:00:57s\nepoch 143| loss: 0.09523 | val_mae: 0.33537 | 0:00:58s\nepoch 144| loss: 0.0903 | val_mae: 0.3345 | 0:00:58s\nepoch 145| loss: 0.09123 | val_mae: 0.329 | 0:00:59s\nepoch 146| loss: 0.09036 | val_mae: 0.33208 | 0:00:59s\nepoch 147| loss: 0.08732 | val_mae: 0.32531 | 0:00:59s\nepoch 148| loss: 0.08969 | val_mae: 0.32206 | 0:01:00s\nepoch 149| loss: 0.08871 | val_mae: 0.32377 | 0:01:00s\nepoch 150| loss: 0.08823 | val_mae: 0.31975 | 0:01:00s\nepoch 151| loss: 0.08112 | val_mae: 0.31621 | 0:01:01s\nepoch 152| loss: 0.08145 | val_mae: 0.3287 | 0:01:01s\nepoch 153| loss: 0.08241 | val_mae: 0.32463 | 0:01:02s\nepoch 154| loss: 0.07735 | val_mae: 0.32352 | 0:01:02s\nepoch 155| loss: 0.07764 | val_mae: 0.32413 | 0:01:03s\nepoch 156| loss: 0.08354 | val_mae: 0.32483 | 0:01:03s\nepoch 157| loss: 0.08031 | val_mae: 0.32112 | 0:01:03s\nepoch 158| loss: 0.07694 | val_mae: 0.32268 | 0:01:04s\nepoch 159| loss: 0.07843 | val_mae: 0.32354 | 0:01:04s\nepoch 160| loss: 0.07475 | val_mae: 0.31299 | 0:01:04s\nepoch 161| loss: 0.07048 | val_mae: 0.31245 | 0:01:05s\nepoch 162| loss: 0.07815 | val_mae: 0.31995 | 0:01:05s\nepoch 163| loss: 0.08051 | val_mae: 0.32467 | 0:01:06s\nepoch 164| loss: 0.08253 | val_mae: 0.31931 | 0:01:06s\nepoch 165| loss: 0.07915 | val_mae: 0.31159 | 0:01:07s\nepoch 166| loss: 0.07401 | val_mae: 0.31821 | 0:01:07s\nepoch 167| loss: 0.0715 | val_mae: 0.3107 | 0:01:07s\nepoch 168| loss: 0.07332 | val_mae: 0.31703 | 0:01:08s\nepoch 169| loss: 0.07948 | val_mae: 0.3217 | 0:01:08s\nepoch 170| loss: 0.08399 | val_mae: 0.32094 | 0:01:08s\nepoch 171| loss: 0.07646 | val_mae: 0.33298 | 0:01:09s\nepoch 172| loss: 0.07377 | val_mae: 0.31219 | 0:01:09s\nepoch 173| loss: 0.06844 | val_mae: 0.31301 | 0:01:10s\nepoch 174| loss: 0.07071 | val_mae: 0.3111 | 0:01:10s\nepoch 175| loss: 0.06722 | val_mae: 0.31464 | 0:01:10s\nepoch 176| loss: 0.0657 | val_mae: 0.30792 | 0:01:11s\nepoch 177| loss: 0.06514 | val_mae: 0.3219 | 0:01:11s\nepoch 178| loss: 0.0656 | val_mae: 0.3148 | 0:01:12s\nepoch 179| loss: 0.06385 | val_mae: 0.30718 | 0:01:12s\nepoch 180| loss: 0.06551 | val_mae: 0.31059 | 0:01:12s\nepoch 181| loss: 0.06243 | val_mae: 0.30552 | 0:01:13s\nepoch 182| loss: 0.05909 | val_mae: 0.31088 | 0:01:13s\nepoch 183| loss: 0.05926 | val_mae: 0.30634 | 0:01:14s\nepoch 184| loss: 0.05498 | val_mae: 0.30431 | 0:01:14s\nepoch 185| loss: 0.0567 | val_mae: 0.30047 | 0:01:14s\nepoch 186| loss: 0.05317 | val_mae: 0.29815 | 0:01:15s\nepoch 187| loss: 0.05156 | val_mae: 0.30129 | 0:01:15s\nepoch 188| loss: 0.05506 | val_mae: 0.29738 | 0:01:15s\nepoch 189| loss: 0.0545 | val_mae: 0.29256 | 0:01:16s\nepoch 190| loss: 0.0492 | val_mae: 0.29233 | 0:01:16s\nepoch 191| loss: 0.04835 | val_mae: 0.31109 | 0:01:17s\nepoch 192| loss: 0.05484 | val_mae: 0.2929 | 0:01:17s\nepoch 193| loss: 0.04864 | val_mae: 0.29071 | 0:01:17s\nepoch 194| loss: 0.04966 | val_mae: 0.29721 | 0:01:18s\nepoch 195| loss: 0.04741 | val_mae: 0.29565 | 0:01:18s\nepoch 196| loss: 0.04409 | val_mae: 0.29069 | 0:01:19s\nepoch 197| loss: 0.04661 | val_mae: 0.29113 | 0:01:19s\nepoch 198| loss: 0.04292 | val_mae: 0.29111 | 0:01:19s\nepoch 199| loss: 0.04267 | val_mae: 0.29373 | 0:01:20s\nepoch 200| loss: 0.04457 | val_mae: 0.28697 | 0:01:20s\nepoch 201| loss: 0.04033 | val_mae: 0.28897 | 0:01:21s\nepoch 202| loss: 0.04443 | val_mae: 0.2995 | 0:01:21s\nepoch 203| loss: 0.04422 | val_mae: 0.29514 | 0:01:21s\nepoch 204| loss: 0.04595 | val_mae: 0.29696 | 0:01:22s\nepoch 205| loss: 0.04591 | val_mae: 0.29156 | 0:01:22s\nepoch 206| loss: 0.04603 | val_mae: 0.29039 | 0:01:23s\nepoch 207| loss: 0.0478 | val_mae: 0.28577 | 0:01:23s\nepoch 208| loss: 0.04331 | val_mae: 0.28226 | 0:01:23s\nepoch 209| loss: 0.04061 | val_mae: 0.2838 | 0:01:24s\nepoch 210| loss: 0.03885 | val_mae: 0.27524 | 0:01:24s\nepoch 211| loss: 0.04033 | val_mae: 0.27871 | 0:01:25s\nepoch 212| loss: 0.03702 | val_mae: 0.27508 | 0:01:25s\nepoch 213| loss: 0.03761 | val_mae: 0.27015 | 0:01:26s\nepoch 214| loss: 0.03627 | val_mae: 0.276 | 0:01:26s\nepoch 215| loss: 0.03595 | val_mae: 0.27179 | 0:01:26s\nepoch 216| loss: 0.03348 | val_mae: 0.26921 | 0:01:27s\nepoch 217| loss: 0.03641 | val_mae: 0.27974 | 0:01:27s\nepoch 218| loss: 0.03771 | val_mae: 0.2777 | 0:01:28s\nepoch 219| loss: 0.03705 | val_mae: 0.26746 | 0:01:28s\nepoch 220| loss: 0.03151 | val_mae: 0.27308 | 0:01:28s\nepoch 221| loss: 0.03456 | val_mae: 0.27159 | 0:01:29s\nepoch 222| loss: 0.03258 | val_mae: 0.27376 | 0:01:29s\nepoch 223| loss: 0.03368 | val_mae: 0.27243 | 0:01:30s\nepoch 224| loss: 0.03005 | val_mae: 0.26911 | 0:01:30s\nepoch 225| loss: 0.03335 | val_mae: 0.26408 | 0:01:30s\nepoch 226| loss: 0.03271 | val_mae: 0.27387 | 0:01:31s\nepoch 227| loss: 0.03436 | val_mae: 0.26946 | 0:01:31s\nepoch 228| loss: 0.03145 | val_mae: 0.26807 | 0:01:32s\nepoch 229| loss: 0.02826 | val_mae: 0.26407 | 0:01:32s\nepoch 230| loss: 0.02924 | val_mae: 0.26663 | 0:01:33s\nepoch 231| loss: 0.02924 | val_mae: 0.26137 | 0:01:33s\nepoch 232| loss: 0.02976 | val_mae: 0.26726 | 0:01:34s\nepoch 233| loss: 0.02795 | val_mae: 0.26878 | 0:01:34s\nepoch 234| loss: 0.02723 | val_mae: 0.25983 | 0:01:34s\nepoch 235| loss: 0.02601 | val_mae: 0.26364 | 0:01:35s\nepoch 236| loss: 0.02506 | val_mae: 0.27109 | 0:01:35s\nepoch 237| loss: 0.02352 | val_mae: 0.25968 | 0:01:36s\nepoch 238| loss: 0.02833 | val_mae: 0.26282 | 0:01:36s\nepoch 239| loss: 0.02536 | val_mae: 0.26077 | 0:01:36s\nepoch 240| loss: 0.0264 | val_mae: 0.26619 | 0:01:37s\nepoch 241| loss: 0.03178 | val_mae: 0.28542 | 0:01:37s\nepoch 242| loss: 0.02852 | val_mae: 0.27186 | 0:01:38s\nepoch 243| loss: 0.0332 | val_mae: 0.27562 | 0:01:38s\nepoch 244| loss: 0.02824 | val_mae: 0.27803 | 0:01:38s\nepoch 245| loss: 0.02788 | val_mae: 0.26828 | 0:01:39s\nepoch 246| loss: 0.02456 | val_mae: 0.26329 | 0:01:39s\nepoch 247| loss: 0.02478 | val_mae: 0.26422 | 0:01:40s\nepoch 248| loss: 0.02591 | val_mae: 0.2629 | 0:01:40s\nepoch 249| loss: 0.0236 | val_mae: 0.26416 | 0:01:40s\nepoch 250| loss: 0.02225 | val_mae: 0.25688 | 0:01:41s\nepoch 251| loss: 0.02363 | val_mae: 0.26074 | 0:01:41s\nepoch 252| loss: 0.02326 | val_mae: 0.25867 | 0:01:41s\nepoch 253| loss: 0.02177 | val_mae: 0.25618 | 0:01:42s\nepoch 254| loss: 0.02011 | val_mae: 0.26251 | 0:01:42s\nepoch 255| loss: 0.02092 | val_mae: 0.25696 | 0:01:43s\nepoch 256| loss: 0.02187 | val_mae: 0.26532 | 0:01:43s\nepoch 257| loss: 0.02092 | val_mae: 0.2562 | 0:01:43s\nepoch 258| loss: 0.0212 | val_mae: 0.26377 | 0:01:44s\nepoch 259| loss: 0.02127 | val_mae: 0.25465 | 0:01:44s\nepoch 260| loss: 0.01991 | val_mae: 0.26485 | 0:01:45s\nepoch 261| loss: 0.01881 | val_mae: 0.25565 | 0:01:45s\nepoch 262| loss: 0.0201 | val_mae: 0.26203 | 0:01:45s\nepoch 263| loss: 0.01757 | val_mae: 0.25431 | 0:01:46s\nepoch 264| loss: 0.01781 | val_mae: 0.25519 | 0:01:46s\nepoch 265| loss: 0.02158 | val_mae: 0.25456 | 0:01:47s\nepoch 266| loss: 0.01944 | val_mae: 0.25477 | 0:01:47s\nepoch 267| loss: 0.01776 | val_mae: 0.25359 | 0:01:47s\nepoch 268| loss: 0.01894 | val_mae: 0.25991 | 0:01:48s\nepoch 269| loss: 0.01676 | val_mae: 0.25183 | 0:01:48s\nepoch 270| loss: 0.01739 | val_mae: 0.25645 | 0:01:48s\nepoch 271| loss: 0.01729 | val_mae: 0.25526 | 0:01:49s\nepoch 272| loss: 0.01606 | val_mae: 0.25172 | 0:01:49s\nepoch 273| loss: 0.0174 | val_mae: 0.25336 | 0:01:50s\nepoch 274| loss: 0.01596 | val_mae: 0.2507 | 0:01:50s\nepoch 275| loss: 0.01818 | val_mae: 0.25323 | 0:01:50s\nepoch 276| loss: 0.01563 | val_mae: 0.25342 | 0:01:51s\nepoch 277| loss: 0.01736 | val_mae: 0.25319 | 0:01:51s\nepoch 278| loss: 0.0165 | val_mae: 0.25224 | 0:01:52s\nepoch 279| loss: 0.01725 | val_mae: 0.25216 | 0:01:52s\nepoch 280| loss: 0.01883 | val_mae: 0.25905 | 0:01:52s\nepoch 281| loss: 0.01695 | val_mae: 0.24956 | 0:01:53s\nepoch 282| loss: 0.0155 | val_mae: 0.25363 | 0:01:53s\nepoch 283| loss: 0.01619 | val_mae: 0.24633 | 0:01:53s\nepoch 284| loss: 0.01479 | val_mae: 0.24688 | 0:01:54s\nepoch 285| loss: 0.016 | val_mae: 0.25018 | 0:01:54s\nepoch 286| loss: 0.01815 | val_mae: 0.25087 | 0:01:55s\nepoch 287| loss: 0.01778 | val_mae: 0.25068 | 0:01:55s\nepoch 288| loss: 0.01582 | val_mae: 0.24758 | 0:01:55s\nepoch 289| loss: 0.01415 | val_mae: 0.24894 | 0:01:56s\nepoch 290| loss: 0.01544 | val_mae: 0.25109 | 0:01:56s\nepoch 291| loss: 0.01563 | val_mae: 0.25023 | 0:01:57s\nepoch 292| loss: 0.0141 | val_mae: 0.25286 | 0:01:57s\nepoch 293| loss: 0.01345 | val_mae: 0.24635 | 0:01:58s\nepoch 294| loss: 0.01345 | val_mae: 0.24745 | 0:01:58s\nepoch 295| loss: 0.01286 | val_mae: 0.24596 | 0:01:59s\nepoch 296| loss: 0.01353 | val_mae: 0.24833 | 0:01:59s\nepoch 297| loss: 0.01323 | val_mae: 0.24714 | 0:01:59s\nepoch 298| loss: 0.01406 | val_mae: 0.24753 | 0:02:00s\nepoch 299| loss: 0.01252 | val_mae: 0.24481 | 0:02:00s\nepoch 300| loss: 0.01258 | val_mae: 0.24544 | 0:02:01s\nepoch 301| loss: 0.01243 | val_mae: 0.24218 | 0:02:01s\nepoch 302| loss: 0.01355 | val_mae: 0.24593 | 0:02:02s\nepoch 303| loss: 0.01468 | val_mae: 0.24579 | 0:02:02s\nepoch 304| loss: 0.01237 | val_mae: 0.24665 | 0:02:02s\nepoch 305| loss: 0.01206 | val_mae: 0.24304 | 0:02:03s\nepoch 306| loss: 0.01203 | val_mae: 0.2462 | 0:02:03s\nepoch 307| loss: 0.01281 | val_mae: 0.24459 | 0:02:04s\nepoch 308| loss: 0.01251 | val_mae: 0.24603 | 0:02:04s\nepoch 309| loss: 0.01191 | val_mae: 0.24828 | 0:02:04s\nepoch 310| loss: 0.01292 | val_mae: 0.2472 | 0:02:05s\nepoch 311| loss: 0.01109 | val_mae: 0.24642 | 0:02:05s\nepoch 312| loss: 0.01257 | val_mae: 0.24212 | 0:02:06s\nepoch 313| loss: 0.01379 | val_mae: 0.24634 | 0:02:06s\nepoch 314| loss: 0.0113 | val_mae: 0.24361 | 0:02:06s\nepoch 315| loss: 0.01379 | val_mae: 0.24842 | 0:02:07s\nepoch 316| loss: 0.0128 | val_mae: 0.24189 | 0:02:07s\nepoch 317| loss: 0.01164 | val_mae: 0.24365 | 0:02:08s\nepoch 318| loss: 0.01258 | val_mae: 0.24117 | 0:02:08s\nepoch 319| loss: 0.01191 | val_mae: 0.24089 | 0:02:08s\nepoch 320| loss: 0.01174 | val_mae: 0.24633 | 0:02:09s\nepoch 321| loss: 0.01162 | val_mae: 0.24518 | 0:02:09s\nepoch 322| loss: 0.01091 | val_mae: 0.24653 | 0:02:10s\nepoch 323| loss: 0.01207 | val_mae: 0.24608 | 0:02:10s\nepoch 324| loss: 0.01232 | val_mae: 0.24668 | 0:02:10s\nepoch 325| loss: 0.01416 | val_mae: 0.24462 | 0:02:11s\nepoch 326| loss: 0.01051 | val_mae: 0.24604 | 0:02:11s\nepoch 327| loss: 0.01187 | val_mae: 0.24369 | 0:02:12s\nepoch 328| loss: 0.01014 | val_mae: 0.24478 | 0:02:12s\nepoch 329| loss: 0.01284 | val_mae: 0.24228 | 0:02:12s\nepoch 330| loss: 0.01135 | val_mae: 0.24266 | 0:02:13s\nepoch 331| loss: 0.01078 | val_mae: 0.23864 | 0:02:13s\nepoch 332| loss: 0.01019 | val_mae: 0.23905 | 0:02:13s\nepoch 333| loss: 0.01029 | val_mae: 0.23965 | 0:02:14s\nepoch 334| loss: 0.00989 | val_mae: 0.23992 | 0:02:14s\nepoch 335| loss: 0.01099 | val_mae: 0.23843 | 0:02:15s\nepoch 336| loss: 0.01161 | val_mae: 0.24128 | 0:02:15s\nepoch 337| loss: 0.01031 | val_mae: 0.23989 | 0:02:15s\nepoch 338| loss: 0.01056 | val_mae: 0.24146 | 0:02:16s\nepoch 339| loss: 0.01027 | val_mae: 0.23808 | 0:02:16s\nepoch 340| loss: 0.00998 | val_mae: 0.23982 | 0:02:17s\nepoch 341| loss: 0.01053 | val_mae: 0.23861 | 0:02:17s\nepoch 342| loss: 0.00998 | val_mae: 0.2386 | 0:02:18s\nepoch 343| loss: 0.00978 | val_mae: 0.23828 | 0:02:18s\nepoch 344| loss: 0.01084 | val_mae: 0.23856 | 0:02:18s\nepoch 345| loss: 0.01058 | val_mae: 0.23884 | 0:02:19s\nepoch 346| loss: 0.00977 | val_mae: 0.23747 | 0:02:19s\nepoch 347| loss: 0.00914 | val_mae: 0.23715 | 0:02:20s\nepoch 348| loss: 0.01036 | val_mae: 0.23986 | 0:02:20s\nepoch 349| loss: 0.00963 | val_mae: 0.23694 | 0:02:20s\nepoch 350| loss: 0.00912 | val_mae: 0.23927 | 0:02:21s\nepoch 351| loss: 0.01024 | val_mae: 0.23782 | 0:02:21s\nepoch 352| loss: 0.00985 | val_mae: 0.23817 | 0:02:22s\nepoch 353| loss: 0.00918 | val_mae: 0.23642 | 0:02:22s\nepoch 354| loss: 0.00921 | val_mae: 0.23894 | 0:02:23s\nepoch 355| loss: 0.00857 | val_mae: 0.23502 | 0:02:23s\nepoch 356| loss: 0.00961 | val_mae: 0.23848 | 0:02:23s\nepoch 357| loss: 0.01006 | val_mae: 0.2352 | 0:02:24s\nepoch 358| loss: 0.00912 | val_mae: 0.23588 | 0:02:24s\nepoch 359| loss: 0.00878 | val_mae: 0.2366 | 0:02:25s\nepoch 360| loss: 0.00939 | val_mae: 0.23839 | 0:02:25s\nepoch 361| loss: 0.00829 | val_mae: 0.23657 | 0:02:26s\nepoch 362| loss: 0.00841 | val_mae: 0.23641 | 0:02:26s\nepoch 363| loss: 0.01028 | val_mae: 0.23858 | 0:02:26s\nepoch 364| loss: 0.01078 | val_mae: 0.24012 | 0:02:27s\nepoch 365| loss: 0.01015 | val_mae: 0.24296 | 0:02:27s\nepoch 366| loss: 0.01027 | val_mae: 0.24033 | 0:02:27s\nepoch 367| loss: 0.01198 | val_mae: 0.23822 | 0:02:28s\nepoch 368| loss: 0.01089 | val_mae: 0.23902 | 0:02:28s\nepoch 369| loss: 0.01139 | val_mae: 0.23662 | 0:02:29s\nepoch 370| loss: 0.00922 | val_mae: 0.23744 | 0:02:29s\nepoch 371| loss: 0.0107 | val_mae: 0.23816 | 0:02:30s\nepoch 372| loss: 0.00999 | val_mae: 0.23747 | 0:02:30s\nepoch 373| loss: 0.01083 | val_mae: 0.23821 | 0:02:30s\nepoch 374| loss: 0.00971 | val_mae: 0.23634 | 0:02:31s\nepoch 375| loss: 0.00901 | val_mae: 0.23642 | 0:02:31s\nepoch 376| loss: 0.00942 | val_mae: 0.23613 | 0:02:31s\nepoch 377| loss: 0.00946 | val_mae: 0.23654 | 0:02:32s\nepoch 378| loss: 0.01004 | val_mae: 0.23643 | 0:02:32s\nepoch 379| loss: 0.0091 | val_mae: 0.23541 | 0:02:33s\nepoch 380| loss: 0.01235 | val_mae: 0.23549 | 0:02:33s\nepoch 381| loss: 0.00885 | val_mae: 0.23527 | 0:02:34s\nepoch 382| loss: 0.00853 | val_mae: 0.2347 | 0:02:34s\nepoch 383| loss: 0.00901 | val_mae: 0.23525 | 0:02:34s\nepoch 384| loss: 0.00949 | val_mae: 0.23606 | 0:02:35s\nepoch 385| loss: 0.00914 | val_mae: 0.23582 | 0:02:35s\nepoch 386| loss: 0.00822 | val_mae: 0.23477 | 0:02:35s\nepoch 387| loss: 0.00902 | val_mae: 0.2352 | 0:02:36s\nepoch 388| loss: 0.00931 | val_mae: 0.23527 | 0:02:36s\nepoch 389| loss: 0.00878 | val_mae: 0.23438 | 0:02:37s\nepoch 390| loss: 0.0097 | val_mae: 0.23414 | 0:02:37s\nepoch 391| loss: 0.00838 | val_mae: 0.23466 | 0:02:37s\nepoch 392| loss: 0.00898 | val_mae: 0.23448 | 0:02:38s\nepoch 393| loss: 0.00934 | val_mae: 0.23371 | 0:02:38s\nepoch 394| loss: 0.00853 | val_mae: 0.23357 | 0:02:39s\nepoch 395| loss: 0.00998 | val_mae: 0.23429 | 0:02:39s\nepoch 396| loss: 0.01049 | val_mae: 0.23408 | 0:02:39s\nepoch 397| loss: 0.00791 | val_mae: 0.23593 | 0:02:40s\nepoch 398| loss: 0.00916 | val_mae: 0.23495 | 0:02:40s\nepoch 399| loss: 0.00894 | val_mae: 0.23429 | 0:02:41s\nepoch 400| loss: 0.00876 | val_mae: 0.23504 | 0:02:41s\nepoch 401| loss: 0.00902 | val_mae: 0.23512 | 0:02:41s\nepoch 402| loss: 0.00911 | val_mae: 0.23465 | 0:02:42s\nepoch 403| loss: 0.00875 | val_mae: 0.23515 | 0:02:42s\nepoch 404| loss: 0.00944 | val_mae: 0.23458 | 0:02:42s\nepoch 405| loss: 0.00802 | val_mae: 0.23431 | 0:02:43s\nepoch 406| loss: 0.00833 | val_mae: 0.23471 | 0:02:43s\nepoch 407| loss: 0.00936 | val_mae: 0.23471 | 0:02:44s\nepoch 408| loss: 0.0087 | val_mae: 0.23465 | 0:02:44s\nepoch 409| loss: 0.00872 | val_mae: 0.23507 | 0:02:44s\nepoch 410| loss: 0.0084 | val_mae: 0.23488 | 0:02:45s\nepoch 411| loss: 0.00864 | val_mae: 0.23439 | 0:02:45s\nepoch 412| loss: 0.00942 | val_mae: 0.23437 | 0:02:45s\nepoch 413| loss: 0.00984 | val_mae: 0.23468 | 0:02:46s\nepoch 414| loss: 0.00963 | val_mae: 0.23472 | 0:02:46s\nepoch 415| loss: 0.00934 | val_mae: 0.23458 | 0:02:47s\nepoch 416| loss: 0.00925 | val_mae: 0.23432 | 0:02:47s\nepoch 417| loss: 0.00852 | val_mae: 0.23441 | 0:02:47s\nepoch 418| loss: 0.0091 | val_mae: 0.23418 | 0:02:48s\nepoch 419| loss: 0.00903 | val_mae: 0.23436 | 0:02:48s\nepoch 420| loss: 0.0085 | val_mae: 0.23409 | 0:02:49s\nepoch 421| loss: 0.00919 | val_mae: 0.23434 | 0:02:49s\nepoch 422| loss: 0.00875 | val_mae: 0.23446 | 0:02:49s\nepoch 423| loss: 0.00932 | val_mae: 0.23462 | 0:02:50s\nepoch 424| loss: 0.00897 | val_mae: 0.2342 | 0:02:50s\nepoch 425| loss: 0.00854 | val_mae: 0.23425 | 0:02:51s\nepoch 426| loss: 0.01033 | val_mae: 0.23436 | 0:02:51s\nepoch 427| loss: 0.00844 | val_mae: 0.2338 | 0:02:51s\nepoch 428| loss: 0.00814 | val_mae: 0.23379 | 0:02:52s\nepoch 429| loss: 0.00809 | val_mae: 0.23376 | 0:02:52s\nepoch 430| loss: 0.00881 | val_mae: 0.23365 | 0:02:52s\nepoch 431| loss: 0.00885 | val_mae: 0.23397 | 0:02:53s\nepoch 432| loss: 0.00794 | val_mae: 0.23395 | 0:02:53s\nepoch 433| loss: 0.00778 | val_mae: 0.23377 | 0:02:54s\nepoch 434| loss: 0.00896 | val_mae: 0.234 | 0:02:54s\nepoch 435| loss: 0.00811 | val_mae: 0.23425 | 0:02:54s\nepoch 436| loss: 0.0079 | val_mae: 0.23397 | 0:02:55s\nepoch 437| loss: 0.00795 | val_mae: 0.23364 | 0:02:55s\nepoch 438| loss: 0.00846 | val_mae: 0.23377 | 0:02:56s\nepoch 439| loss: 0.00898 | val_mae: 0.234 | 0:02:56s\nepoch 440| loss: 0.00786 | val_mae: 0.2339 | 0:02:56s\nepoch 441| loss: 0.00735 | val_mae: 0.23309 | 0:02:57s\nepoch 442| loss: 0.00752 | val_mae: 0.23327 | 0:02:57s\nepoch 443| loss: 0.00893 | val_mae: 0.23388 | 0:02:58s\nepoch 444| loss: 0.00921 | val_mae: 0.23382 | 0:02:58s\nepoch 445| loss: 0.00812 | val_mae: 0.23365 | 0:02:58s\nepoch 446| loss: 0.00849 | val_mae: 0.23379 | 0:02:59s\nepoch 447| loss: 0.00887 | val_mae: 0.23409 | 0:02:59s\nepoch 448| loss: 0.0091 | val_mae: 0.23326 | 0:03:00s\nepoch 449| loss: 0.00874 | val_mae: 0.23316 | 0:03:00s\nepoch 450| loss: 0.00946 | val_mae: 0.23382 | 0:03:01s\nepoch 451| loss: 0.0082 | val_mae: 0.2339 | 0:03:01s\nepoch 452| loss: 0.00902 | val_mae: 0.23358 | 0:03:02s\nepoch 453| loss: 0.00853 | val_mae: 0.23354 | 0:03:02s\nepoch 454| loss: 0.00852 | val_mae: 0.23396 | 0:03:02s\nepoch 455| loss: 0.00875 | val_mae: 0.23354 | 0:03:03s\nepoch 456| loss: 0.00936 | val_mae: 0.23343 | 0:03:03s\nepoch 457| loss: 0.00868 | val_mae: 0.23341 | 0:03:03s\nepoch 458| loss: 0.00882 | val_mae: 0.23344 | 0:03:04s\nepoch 459| loss: 0.00808 | val_mae: 0.23381 | 0:03:04s\nepoch 460| loss: 0.00913 | val_mae: 0.23353 | 0:03:05s\nepoch 461| loss: 0.0086 | val_mae: 0.23383 | 0:03:05s\nepoch 462| loss: 0.00826 | val_mae: 0.23373 | 0:03:06s\nepoch 463| loss: 0.00825 | val_mae: 0.23371 | 0:03:06s\nepoch 464| loss: 0.00837 | val_mae: 0.23389 | 0:03:06s\nepoch 465| loss: 0.00761 | val_mae: 0.2334 | 0:03:07s\nepoch 466| loss: 0.00823 | val_mae: 0.23397 | 0:03:07s\nepoch 467| loss: 0.00866 | val_mae: 0.23413 | 0:03:08s\nepoch 468| loss: 0.00806 | val_mae: 0.23432 | 0:03:08s\nepoch 469| loss: 0.01046 | val_mae: 0.23399 | 0:03:08s\nepoch 470| loss: 0.00811 | val_mae: 0.23377 | 0:03:09s\nepoch 471| loss: 0.00784 | val_mae: 0.23394 | 0:03:09s\nepoch 472| loss: 0.00817 | val_mae: 0.23416 | 0:03:10s\nepoch 473| loss: 0.00964 | val_mae: 0.23382 | 0:03:10s\nepoch 474| loss: 0.00785 | val_mae: 0.23383 | 0:03:10s\nepoch 475| loss: 0.00887 | val_mae: 0.23364 | 0:03:11s\nepoch 476| loss: 0.00937 | val_mae: 0.23392 | 0:03:11s\nepoch 477| loss: 0.00838 | val_mae: 0.23379 | 0:03:12s\nepoch 478| loss: 0.0082 | val_mae: 0.23388 | 0:03:12s\nepoch 479| loss: 0.00876 | val_mae: 0.23384 | 0:03:13s\nepoch 480| loss: 0.00892 | val_mae: 0.23384 | 0:03:13s\nepoch 481| loss: 0.00805 | val_mae: 0.23389 | 0:03:13s\nepoch 482| loss: 0.00819 | val_mae: 0.23438 | 0:03:14s\nepoch 483| loss: 0.0081 | val_mae: 0.23367 | 0:03:14s\nepoch 484| loss: 0.00798 | val_mae: 0.23421 | 0:03:14s\nepoch 485| loss: 0.00833 | val_mae: 0.23362 | 0:03:15s\nepoch 486| loss: 0.00819 | val_mae: 0.23385 | 0:03:15s\nepoch 487| loss: 0.00973 | val_mae: 0.23377 | 0:03:16s\nepoch 488| loss: 0.00804 | val_mae: 0.23398 | 0:03:16s\nepoch 489| loss: 0.00817 | val_mae: 0.23406 | 0:03:16s\nepoch 490| loss: 0.00813 | val_mae: 0.23352 | 0:03:17s\nepoch 491| loss: 0.00815 | val_mae: 0.23351 | 0:03:17s\n\nEarly stopping occurred at epoch 491 with best_epoch = 441 and best_val_mae = 0.23309\n","output_type":"stream"},{"name":"stderr","text":"/opt/conda/lib/python3.10/site-packages/pytorch_tabnet/callbacks.py:172: UserWarning: Best weights from best epoch are automatically used!\n warnings.warn(wrn_msg)\n","output_type":"stream"}],"execution_count":29},{"cell_type":"code","source":"submit_df = pd.concat([test_id, test_df['sii']], axis=1)\nsubmit_df['sii'] = submit_df['sii'].astype(int)","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2024-11-13T05:52:23.714054Z","iopub.execute_input":"2024-11-13T05:52:23.714382Z","iopub.status.idle":"2024-11-13T05:52:23.720325Z","shell.execute_reply.started":"2024-11-13T05:52:23.714349Z","shell.execute_reply":"2024-11-13T05:52:23.719273Z"}},"outputs":[],"execution_count":30},{"cell_type":"code","source":"submit_df","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2024-11-13T05:52:23.721517Z","iopub.execute_input":"2024-11-13T05:52:23.721836Z","iopub.status.idle":"2024-11-13T05:52:23.736980Z","shell.execute_reply.started":"2024-11-13T05:52:23.721797Z","shell.execute_reply":"2024-11-13T05:52:23.736124Z"}},"outputs":[{"execution_count":31,"output_type":"execute_result","data":{"text/plain":" id sii\n0 00008ff9 0\n1 000fd460 0\n2 00105258 0\n3 00115b9f 1\n4 0016bb22 0\n5 001f3379 1\n6 0038ba98 0\n7 0068a485 0\n8 0069fbed 0\n9 0083e397 0\n10 0087dd65 0\n11 00abe655 0\n12 00ae59c9 1\n13 00af6387 0\n14 00bd4359 0\n15 00c0cd71 0\n16 00d56d4b 0\n17 00d9913d 0\n18 00e6167c 0\n19 00ebc35d 0","text/html":"\n\n
\n \n \n | \n id | \n sii | \n
\n \n \n \n | 0 | \n 00008ff9 | \n 0 | \n
\n \n | 1 | \n 000fd460 | \n 0 | \n
\n \n | 2 | \n 00105258 | \n 0 | \n
\n \n | 3 | \n 00115b9f | \n 1 | \n
\n \n | 4 | \n 0016bb22 | \n 0 | \n
\n \n | 5 | \n 001f3379 | \n 1 | \n
\n \n | 6 | \n 0038ba98 | \n 0 | \n
\n \n | 7 | \n 0068a485 | \n 0 | \n
\n \n | 8 | \n 0069fbed | \n 0 | \n
\n \n | 9 | \n 0083e397 | \n 0 | \n
\n \n | 10 | \n 0087dd65 | \n 0 | \n
\n \n | 11 | \n 00abe655 | \n 0 | \n
\n \n | 12 | \n 00ae59c9 | \n 1 | \n
\n \n | 13 | \n 00af6387 | \n 0 | \n
\n \n | 14 | \n 00bd4359 | \n 0 | \n
\n \n | 15 | \n 00c0cd71 | \n 0 | \n
\n \n | 16 | \n 00d56d4b | \n 0 | \n
\n \n | 17 | \n 00d9913d | \n 0 | \n
\n \n | 18 | \n 00e6167c | \n 0 | \n
\n \n | 19 | \n 00ebc35d | \n 0 | \n
\n \n
\n
"},"metadata":{}}],"execution_count":31},{"cell_type":"code","source":"submit_df.to_csv('submission.csv', index=False)","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2024-11-13T05:52:23.738164Z","iopub.execute_input":"2024-11-13T05:52:23.738495Z","iopub.status.idle":"2024-11-13T05:52:23.746544Z","shell.execute_reply.started":"2024-11-13T05:52:23.738461Z","shell.execute_reply":"2024-11-13T05:52:23.745722Z"}},"outputs":[],"execution_count":32},{"cell_type":"code","source":"","metadata":{"trusted":true},"outputs":[],"execution_count":null}]}
\ No newline at end of file