From 97f9fe4b93808d2a781718399ebe70feed40e426 Mon Sep 17 00:00:00 2001 From: MLobo1997 Date: Mon, 15 Apr 2019 21:47:26 +0100 Subject: [PATCH 01/24] Started optimizing SVR --- analysis.ipynb | 113 +++++++++-------- models.ipynb | 250 +++++++++++++++++++++++++++++++++++++ requirements.txt | 9 ++ utils/ContinuityImputer.py | 10 +- utils/DataFrameSelector.py | 11 ++ utils/NoiseRemover.py | 21 ++++ 6 files changed, 357 insertions(+), 57 deletions(-) create mode 100644 models.ipynb create mode 100644 utils/DataFrameSelector.py create mode 100644 utils/NoiseRemover.py diff --git a/analysis.ipynb b/analysis.ipynb index a4d93af..e70303a 100644 --- a/analysis.ipynb +++ b/analysis.ipynb @@ -2,9 +2,18 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, + "execution_count": 36, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The autoreload extension is already loaded. To reload it, use:\n", + " %reload_ext autoreload\n" + ] + } + ], "source": [ "%load_ext autoreload\n", "import pandas as pd\n", @@ -21,7 +30,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 37, "metadata": {}, "outputs": [ { @@ -247,7 +256,7 @@ "[5 rows x 25 columns]" ] }, - "execution_count": 2, + "execution_count": 37, "metadata": {}, "output_type": "execute_result" } @@ -261,7 +270,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 38, "metadata": {}, "outputs": [ { @@ -314,7 +323,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 39, "metadata": {}, "outputs": [ { @@ -348,7 +357,7 @@ "dtype: float64" ] }, - "execution_count": 4, + "execution_count": 39, "metadata": {}, "output_type": "execute_result" } @@ -368,7 +377,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 40, "metadata": {}, "outputs": [ { @@ -587,7 +596,7 @@ "[5 rows x 25 columns]" ] }, - "execution_count": 5, + "execution_count": 40, "metadata": {}, "output_type": "execute_result" } @@ -598,7 +607,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 41, "metadata": {}, "outputs": [], "source": [ @@ -607,7 +616,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 42, "metadata": {}, "outputs": [ { @@ -826,7 +835,7 @@ "[5 rows x 25 columns]" ] }, - "execution_count": 7, + "execution_count": 42, "metadata": {}, "output_type": "execute_result" } @@ -839,7 +848,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 43, "metadata": {}, "outputs": [ { @@ -1058,7 +1067,7 @@ "[5 rows x 25 columns]" ] }, - "execution_count": 8, + "execution_count": 43, "metadata": {}, "output_type": "execute_result" } @@ -1078,7 +1087,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 44, "metadata": {}, "outputs": [ { @@ -1304,7 +1313,7 @@ "[5 rows x 24 columns]" ] }, - "execution_count": 9, + "execution_count": 44, "metadata": {}, "output_type": "execute_result" } @@ -1324,7 +1333,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 45, "metadata": {}, "outputs": [ { @@ -1357,7 +1366,7 @@ "dtype: float64" ] }, - "execution_count": 10, + "execution_count": 45, "metadata": {}, "output_type": "execute_result" } @@ -1368,7 +1377,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 46, "metadata": {}, "outputs": [ { @@ -1467,7 +1476,7 @@ "[1 rows x 24 columns]" ] }, - "execution_count": 11, + "execution_count": 46, "metadata": {}, "output_type": "execute_result" } @@ -1478,7 +1487,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 47, "metadata": {}, "outputs": [ { @@ -1667,7 +1676,7 @@ "[4 rows x 24 columns]" ] }, - "execution_count": 12, + "execution_count": 47, "metadata": {}, "output_type": "execute_result" } @@ -1688,7 +1697,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 48, "metadata": {}, "outputs": [ { @@ -1720,7 +1729,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 49, "metadata": {}, "outputs": [ { @@ -1744,7 +1753,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 50, "metadata": {}, "outputs": [ { @@ -1775,7 +1784,7 @@ "Name: total_cases, dtype: float64" ] }, - "execution_count": 15, + "execution_count": 50, "metadata": {}, "output_type": "execute_result" } @@ -1797,7 +1806,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 51, "metadata": {}, "outputs": [ { @@ -1806,7 +1815,7 @@ "(1451, 22)" ] }, - "execution_count": 16, + "execution_count": 51, "metadata": {}, "output_type": "execute_result" } @@ -1817,7 +1826,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 52, "metadata": {}, "outputs": [ { @@ -1845,7 +1854,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 53, "metadata": {}, "outputs": [ { @@ -1879,7 +1888,7 @@ "dtype: float64" ] }, - "execution_count": 18, + "execution_count": 53, "metadata": {}, "output_type": "execute_result" } @@ -1895,7 +1904,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 54, "metadata": {}, "outputs": [ { @@ -1935,7 +1944,7 @@ }, { "cell_type": "code", - "execution_count": 35, + "execution_count": 20, "metadata": {}, "outputs": [ { @@ -2252,7 +2261,7 @@ "max 2.228153e+00 1.063787e+01 " ] }, - "execution_count": 35, + "execution_count": 20, "metadata": {}, "output_type": "execute_result" } @@ -2284,7 +2293,7 @@ }, { "cell_type": "code", - "execution_count": 36, + "execution_count": 21, "metadata": {}, "outputs": [], "source": [ @@ -2294,7 +2303,7 @@ }, { "cell_type": "code", - "execution_count": 37, + "execution_count": 22, "metadata": {}, "outputs": [ { @@ -2303,7 +2312,7 @@ "2" ] }, - "execution_count": 37, + "execution_count": 22, "metadata": {}, "output_type": "execute_result" } @@ -2326,7 +2335,7 @@ }, { "cell_type": "code", - "execution_count": 38, + "execution_count": 23, "metadata": {}, "outputs": [ { @@ -2361,7 +2370,7 @@ }, { "cell_type": "code", - "execution_count": 57, + "execution_count": 24, "metadata": {}, "outputs": [ { @@ -2435,7 +2444,7 @@ }, { "cell_type": "code", - "execution_count": 58, + "execution_count": 25, "metadata": {}, "outputs": [ { @@ -2446,7 +2455,7 @@ " pooling_func='deprecated')" ] }, - "execution_count": 58, + "execution_count": 25, "metadata": {}, "output_type": "execute_result" } @@ -2460,7 +2469,7 @@ }, { "cell_type": "code", - "execution_count": 59, + "execution_count": 26, "metadata": {}, "outputs": [ { @@ -2532,7 +2541,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 27, "metadata": {}, "outputs": [ { @@ -2541,7 +2550,7 @@ "('1990-04-30', '2010-06-25')" ] }, - "execution_count": 20, + "execution_count": 27, "metadata": {}, "output_type": "execute_result" } @@ -2552,7 +2561,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 28, "metadata": {}, "outputs": [ { @@ -2584,7 +2593,7 @@ }, { "cell_type": "code", - "execution_count": 28, + "execution_count": 29, "metadata": {}, "outputs": [ { @@ -2647,7 +2656,7 @@ }, { "cell_type": "code", - "execution_count": 32, + "execution_count": 31, "metadata": {}, "outputs": [ { @@ -2694,7 +2703,7 @@ }, { "cell_type": "code", - "execution_count": 33, + "execution_count": 32, "metadata": {}, "outputs": [], "source": [ @@ -2703,7 +2712,7 @@ }, { "cell_type": "code", - "execution_count": 34, + "execution_count": 33, "metadata": {}, "outputs": [ { @@ -2751,7 +2760,7 @@ }, { "cell_type": "code", - "execution_count": 65, + "execution_count": 34, "metadata": {}, "outputs": [ { @@ -2761,7 +2770,7 @@ " svd_solver='auto', tol=0.0, whiten=False)" ] }, - "execution_count": 65, + "execution_count": 34, "metadata": {}, "output_type": "execute_result" } @@ -2774,7 +2783,7 @@ }, { "cell_type": "code", - "execution_count": 111, + "execution_count": 35, "metadata": {}, "outputs": [ { diff --git a/models.ipynb b/models.ipynb new file mode 100644 index 0000000..ed8b50c --- /dev/null +++ b/models.ipynb @@ -0,0 +1,250 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "%load_ext autoreload\n", + "import pandas as pd\n", + "import numpy as np" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Loading the Data" + ] + }, + { + "cell_type": "code", + "execution_count": 48, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['city',\n", + " 'year',\n", + " 'weekofyear',\n", + " 'week_start_date',\n", + " 'ndvi_ne',\n", + " 'ndvi_nw',\n", + " 'ndvi_se',\n", + " 'ndvi_sw',\n", + " 'precipitation_amt_mm',\n", + " 'reanalysis_air_temp_k',\n", + " 'reanalysis_avg_temp_k',\n", + " 'reanalysis_dew_point_temp_k',\n", + " 'reanalysis_max_air_temp_k',\n", + " 'reanalysis_min_air_temp_k',\n", + " 'reanalysis_precip_amt_kg_per_m2',\n", + " 'reanalysis_relative_humidity_percent',\n", + " 'reanalysis_sat_precip_amt_mm',\n", + " 'reanalysis_specific_humidity_g_per_kg',\n", + " 'reanalysis_tdtr_k',\n", + " 'station_avg_temp_c',\n", + " 'station_diur_temp_rng_c',\n", + " 'station_max_temp_c',\n", + " 'station_min_temp_c',\n", + " 'station_precip_mm']" + ] + }, + "execution_count": 48, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "X_train_1 = pd.read_csv('data/dengue_features_train.csv')\n", + "y_train = pd.read_csv('data/dengue_labels_train.csv')['total_cases']\n", + "attr = list(X_train_1)\n", + "attr" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Cleaning the noisy training data" + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "metadata": {}, + "outputs": [], + "source": [ + "y_train = y_train[X_train_1['weekofyear'] != 53]\n", + "X_train_1 = X_train_1[X_train_1['weekofyear'] != 53]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Data Pipeline" + ] + }, + { + "cell_type": "code", + "execution_count": 50, + "metadata": {}, + "outputs": [], + "source": [ + "%autoreload\n", + "from sklearn.pipeline import Pipeline\n", + "from sklearn.preprocessing import StandardScaler\n", + "from utils.ContinuityImputer import ContinuityImputer\n", + "from utils.DataFrameSelector import DataFrameSelector\n", + "\n", + "pipeline = Pipeline([\n", + " ('imputer', ContinuityImputer(attributes=attr[4:])),\n", + " ('dataframe_selector', DataFrameSelector(attribute_names=attr[4:])),\n", + " ('scaler', StandardScaler()),\n", + "])" + ] + }, + { + "cell_type": "code", + "execution_count": 51, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(1451, 20)" + ] + }, + "execution_count": 51, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "X_train = pipeline.fit_transform(X_train_1)\n", + "X_train.shape" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Model Selection" + ] + }, + { + "cell_type": "code", + "execution_count": 127, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.model_selection import RandomizedSearchCV\n", + "from sklearn.ensemble import RandomForestRegressor, AdaBoostRegressor\n", + "from sklearn.neighbors import KNeighborsRegressor\n", + "from sklearn.svm import SVR\n", + "from sklearn.tree import DecisionTreeRegressor\n", + "from scipy.stats import randint as sp_randint\n", + "score_metric='neg_mean_absolute_error'\n", + "jobs=-1 #-1 to make it execute in parallel\n", + "k_folds=10\n", + "n_iter_search = 20\n", + "verbose_level = 1\n", + "random_n = 42" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## SVR" + ] + }, + { + "cell_type": "code", + "execution_count": 128, + "metadata": {}, + "outputs": [], + "source": [ + "C = sp_randint(0, 10000)\n", + "params = {'kernel':['rbf', 'sigmoid','linear'], 'gamma':['scale'], 'C': C}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Fitting 10 folds for each of 20 candidates, totalling 200 fits\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.\n" + ] + } + ], + "source": [ + "SVR_optimizer = RandomizedSearchCV(estimator=SVR(), param_distributions=params, n_iter=n_iter_search, scoring=score_metric, n_jobs=jobs, cv=k_folds, verbose=verbose_level, random_state=random_n, return_train_score=True, iid=True)\n", + "SVR_optimizer.fit(X_train, y_train)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "params = {'kernel':['poly'], 'degree':sp_randint(2,8), 'gamma':['scale'], 'C': C}," + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "SVR_poly_optimizer = RandomizedSearchCV(estimator=SVR(), param_distributions=params, n_iter=n_iter_search, scoring=score_metric, n_jobs=jobs, cv=k_folds, verbose=verbose_level, random_state=random_n, return_train_score=True, iid=True)\n", + "SVR_poly_optimizer.fit(X_train, y_train)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pd.DataFrame(SVR_optimizer.cv_results_)[['mean_fit_time','param_C', 'param_kernel', 'mean_test_score', 'mean_train_score']]" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.2" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/requirements.txt b/requirements.txt index f91d9ea..ef8b002 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,6 +2,7 @@ appnope==0.1.0 attrs==19.1.0 backcall==0.1.0 bleach==3.1.0 +cycler==0.10.0 decorator==4.4.0 defusedxml==0.5.0 entrypoints==0.3 @@ -15,11 +16,15 @@ jupyter-client==5.2.4 jupyter-core==4.4.0 jupyterlab==0.35.4 jupyterlab-server==0.2.0 +kiwisolver==1.0.1 MarkupSafe==1.1.1 +matplotlib==3.0.3 mistune==0.8.4 nbconvert==5.4.1 nbformat==4.4.0 notebook==5.7.8 +numpy==1.16.2 +pandas==0.24.2 pandocfilters==1.4.2 parso==0.4.0 pexpect==4.7.0 @@ -28,9 +33,13 @@ prometheus-client==0.6.0 prompt-toolkit==2.0.9 ptyprocess==0.6.0 Pygments==2.3.1 +pyparsing==2.4.0 pyrsistent==0.14.11 python-dateutil==2.8.0 +pytz==2018.9 pyzmq==18.0.1 +scikit-learn==0.20.3 +scipy==1.2.1 Send2Trash==1.5.0 six==1.12.0 terminado==0.8.2 diff --git a/utils/ContinuityImputer.py b/utils/ContinuityImputer.py index 58e498e..0c4d058 100644 --- a/utils/ContinuityImputer.py +++ b/utils/ContinuityImputer.py @@ -10,8 +10,9 @@ def fit(self, X, y=None): X_iq = X[X['city'] == 'iq'] X_sj = X[X['city'] == 'sj'] - self.medians_iq = {attr: np.nanmedian(X_iq[attr]) for attr in self.attributes} - self.medians_sj = {attr: np.nanmedian(X_sj[attr]) for attr in self.attributes} + medians_iq = {attr: np.nanmedian(X_iq[attr]) for attr in self.attributes} + medians_sj = {attr: np.nanmedian(X_sj[attr]) for attr in self.attributes} + self.last_values = {'sj': medians_sj, 'iq': medians_iq} return self @@ -20,14 +21,13 @@ def transform(self, X): X = X.copy() for attr in self.attributes: - last_values = {'sj': self.medians_sj[attr], 'iq': self.medians_iq[attr]} r = [] for _, curr in X.iterrows(): city = curr['city'] val = curr[attr] if val is not None and not np.isnan(val): - last_values[city] = val - r.append(last_values[city]) + self.last_values[city][attr] = val + r.append(self.last_values[city][attr]) X[attr] = r return X \ No newline at end of file diff --git a/utils/DataFrameSelector.py b/utils/DataFrameSelector.py new file mode 100644 index 0000000..42e46ea --- /dev/null +++ b/utils/DataFrameSelector.py @@ -0,0 +1,11 @@ +from sklearn.base import BaseEstimator, TransformerMixin + +class DataFrameSelector(BaseEstimator, TransformerMixin): + + def __init__(self, attribute_names): + self.attribute_names = attribute_names + + def fit(self, X, y=None): + return self + def transform(self, X): + return X[self.attribute_names] \ No newline at end of file diff --git a/utils/NoiseRemover.py b/utils/NoiseRemover.py new file mode 100644 index 0000000..6e8b0b9 --- /dev/null +++ b/utils/NoiseRemover.py @@ -0,0 +1,21 @@ +from sklearn.base import BaseEstimator, TransformerMixin + +class NoiseRemover(BaseEstimator, TransformerMixin): + def __init__(self, noisy_weeks=53, copy=True): + self.noisy_weeks = noisy_weeks + self.train_set = True + self.copy = copy + + def fit(self, X, y=None): + return self + + def transform(self, X): + if self.train_set: + if self.copy: + X = X.copy() + X = X[X['weekofyear'] != self.noisy_weeks] + self.train_set = False + + return X + + From 0f22f4238547f886ff5b0b00e42352cd214f40ea Mon Sep 17 00:00:00 2001 From: MLobo1997 Date: Tue, 16 Apr 2019 15:03:06 +0100 Subject: [PATCH 02/24] Last Weeks is working --- analysis.ipynb | 153 ++++++++++++++++++++---------- models.ipynb | 191 +++++++++++++++++++++++++++++++------- utils/DataFrameDropper.py | 19 ++++ utils/LastWeeks.py | 41 ++++++++ 4 files changed, 321 insertions(+), 83 deletions(-) create mode 100644 utils/DataFrameDropper.py create mode 100644 utils/LastWeeks.py diff --git a/analysis.ipynb b/analysis.ipynb index e70303a..587a6db 100644 --- a/analysis.ipynb +++ b/analysis.ipynb @@ -2,18 +2,9 @@ "cells": [ { "cell_type": "code", - "execution_count": 36, + "execution_count": 1, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The autoreload extension is already loaded. To reload it, use:\n", - " %reload_ext autoreload\n" - ] - } - ], + "outputs": [], "source": [ "%load_ext autoreload\n", "import pandas as pd\n", @@ -30,7 +21,7 @@ }, { "cell_type": "code", - "execution_count": 37, + "execution_count": 2, "metadata": {}, "outputs": [ { @@ -256,7 +247,7 @@ "[5 rows x 25 columns]" ] }, - "execution_count": 37, + "execution_count": 2, "metadata": {}, "output_type": "execute_result" } @@ -270,7 +261,7 @@ }, { "cell_type": "code", - "execution_count": 38, + "execution_count": 3, "metadata": {}, "outputs": [ { @@ -323,7 +314,7 @@ }, { "cell_type": "code", - "execution_count": 39, + "execution_count": 4, "metadata": {}, "outputs": [ { @@ -357,7 +348,7 @@ "dtype: float64" ] }, - "execution_count": 39, + "execution_count": 4, "metadata": {}, "output_type": "execute_result" } @@ -377,7 +368,7 @@ }, { "cell_type": "code", - "execution_count": 40, + "execution_count": 5, "metadata": {}, "outputs": [ { @@ -596,7 +587,7 @@ "[5 rows x 25 columns]" ] }, - "execution_count": 40, + "execution_count": 5, "metadata": {}, "output_type": "execute_result" } @@ -607,7 +598,7 @@ }, { "cell_type": "code", - "execution_count": 41, + "execution_count": 6, "metadata": {}, "outputs": [], "source": [ @@ -616,7 +607,7 @@ }, { "cell_type": "code", - "execution_count": 42, + "execution_count": 7, "metadata": {}, "outputs": [ { @@ -835,7 +826,7 @@ "[5 rows x 25 columns]" ] }, - "execution_count": 42, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" } @@ -848,7 +839,7 @@ }, { "cell_type": "code", - "execution_count": 43, + "execution_count": 8, "metadata": {}, "outputs": [ { @@ -1067,7 +1058,7 @@ "[5 rows x 25 columns]" ] }, - "execution_count": 43, + "execution_count": 8, "metadata": {}, "output_type": "execute_result" } @@ -1087,7 +1078,7 @@ }, { "cell_type": "code", - "execution_count": 44, + "execution_count": 9, "metadata": {}, "outputs": [ { @@ -1313,7 +1304,7 @@ "[5 rows x 24 columns]" ] }, - "execution_count": 44, + "execution_count": 9, "metadata": {}, "output_type": "execute_result" } @@ -1333,7 +1324,7 @@ }, { "cell_type": "code", - "execution_count": 45, + "execution_count": 10, "metadata": {}, "outputs": [ { @@ -1366,7 +1357,7 @@ "dtype: float64" ] }, - "execution_count": 45, + "execution_count": 10, "metadata": {}, "output_type": "execute_result" } @@ -1377,7 +1368,7 @@ }, { "cell_type": "code", - "execution_count": 46, + "execution_count": 11, "metadata": {}, "outputs": [ { @@ -1476,7 +1467,7 @@ "[1 rows x 24 columns]" ] }, - "execution_count": 46, + "execution_count": 11, "metadata": {}, "output_type": "execute_result" } @@ -1487,7 +1478,7 @@ }, { "cell_type": "code", - "execution_count": 47, + "execution_count": 12, "metadata": {}, "outputs": [ { @@ -1676,7 +1667,7 @@ "[4 rows x 24 columns]" ] }, - "execution_count": 47, + "execution_count": 12, "metadata": {}, "output_type": "execute_result" } @@ -1729,7 +1720,7 @@ }, { "cell_type": "code", - "execution_count": 49, + "execution_count": 15, "metadata": {}, "outputs": [ { @@ -1753,7 +1744,7 @@ }, { "cell_type": "code", - "execution_count": 50, + "execution_count": 16, "metadata": {}, "outputs": [ { @@ -1784,7 +1775,7 @@ "Name: total_cases, dtype: float64" ] }, - "execution_count": 50, + "execution_count": 16, "metadata": {}, "output_type": "execute_result" } @@ -1806,7 +1797,7 @@ }, { "cell_type": "code", - "execution_count": 51, + "execution_count": 17, "metadata": {}, "outputs": [ { @@ -1815,7 +1806,7 @@ "(1451, 22)" ] }, - "execution_count": 51, + "execution_count": 17, "metadata": {}, "output_type": "execute_result" } @@ -1826,7 +1817,7 @@ }, { "cell_type": "code", - "execution_count": 52, + "execution_count": 18, "metadata": {}, "outputs": [ { @@ -1854,7 +1845,7 @@ }, { "cell_type": "code", - "execution_count": 53, + "execution_count": 19, "metadata": {}, "outputs": [ { @@ -1888,7 +1879,7 @@ "dtype: float64" ] }, - "execution_count": 53, + "execution_count": 19, "metadata": {}, "output_type": "execute_result" } @@ -1904,7 +1895,7 @@ }, { "cell_type": "code", - "execution_count": 54, + "execution_count": 20, "metadata": {}, "outputs": [ { @@ -1944,7 +1935,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 21, "metadata": {}, "outputs": [ { @@ -2261,7 +2252,7 @@ "max 2.228153e+00 1.063787e+01 " ] }, - "execution_count": 20, + "execution_count": 21, "metadata": {}, "output_type": "execute_result" } @@ -2293,7 +2284,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 40, "metadata": {}, "outputs": [], "source": [ @@ -2303,7 +2294,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 41, "metadata": {}, "outputs": [ { @@ -2312,7 +2303,7 @@ "2" ] }, - "execution_count": 22, + "execution_count": 41, "metadata": {}, "output_type": "execute_result" } @@ -2335,7 +2326,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 42, "metadata": {}, "outputs": [ { @@ -2370,7 +2361,7 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 43, "metadata": {}, "outputs": [ { @@ -2541,22 +2532,82 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 47, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "('2000-07-01', '2010-06-25')" + ] + }, + "execution_count": 47, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "min(train_data[train_data['city'] == 'iq']['week_start_date']), max(train_data[train_data['city'] == 'iq']['week_start_date'])" + ] + }, + { + "cell_type": "code", + "execution_count": 50, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "('2010-07-02', '2013-06-25')" + ] + }, + "execution_count": 50, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "min(test_data[test_data['city'] == 'iq']['week_start_date']), max(test_data[test_data['city'] == 'iq']['week_start_date'])" + ] + }, + { + "cell_type": "code", + "execution_count": 48, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "('1990-04-30', '2008-04-22')" + ] + }, + "execution_count": 48, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "min(train_data[train_data['city'] == 'sj']['week_start_date']), max(train_data[train_data['city'] == 'sj']['week_start_date'])" + ] + }, + { + "cell_type": "code", + "execution_count": 52, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "('1990-04-30', '2010-06-25')" + "('2008-04-29', '2013-04-23')" ] }, - "execution_count": 27, + "execution_count": 52, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "min(train_data['week_start_date']), max(train_data['week_start_date'])" + "min(test_data[test_data['city'] == 'sj']['week_start_date']), max(test_data[test_data['city'] == 'sj']['week_start_date'])" ] }, { diff --git a/models.ipynb b/models.ipynb index ed8b50c..4363ee6 100644 --- a/models.ipynb +++ b/models.ipynb @@ -20,7 +20,7 @@ }, { "cell_type": "code", - "execution_count": 48, + "execution_count": 2, "metadata": {}, "outputs": [ { @@ -52,7 +52,7 @@ " 'station_precip_mm']" ] }, - "execution_count": 48, + "execution_count": 2, "metadata": {}, "output_type": "execute_result" } @@ -73,12 +73,31 @@ }, { "cell_type": "code", - "execution_count": 49, + "execution_count": 3, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "(1451, 24)" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "y_train = y_train[X_train_1['weekofyear'] != 53]\n", - "X_train_1 = X_train_1[X_train_1['weekofyear'] != 53]" + "def bools_to_indexes(booleans):\n", + " r = []\n", + " for idx, x in enumerate(booleans):\n", + " if x:\n", + " r.append(idx)\n", + " return r\n", + "\n", + "y_train = y_train.drop(bools_to_indexes(X_train_1['weekofyear'] == 53))\n", + "X_train_1 = X_train_1.drop(bools_to_indexes(X_train_1['weekofyear'] == 53))\n", + "X_train_1.shape" ] }, { @@ -90,35 +109,40 @@ }, { "cell_type": "code", - "execution_count": 50, + "execution_count": 16, "metadata": {}, "outputs": [], "source": [ "%autoreload\n", "from sklearn.pipeline import Pipeline\n", "from sklearn.preprocessing import StandardScaler\n", + "from sklearn.decomposition import PCA\n", "from utils.ContinuityImputer import ContinuityImputer\n", - "from utils.DataFrameSelector import DataFrameSelector\n", + "from utils.DataFrameDropper import DataFrameDropper\n", + "from utils.LastWeeks import LastWeeks\n", + "lw = LastWeeks(attributes=['ndvi_ne', 'precipitation_amt_mm', 'reanalysis_relative_humidity_percent'], weeks=3)\n", "\n", "pipeline = Pipeline([\n", " ('imputer', ContinuityImputer(attributes=attr[4:])),\n", - " ('dataframe_selector', DataFrameSelector(attribute_names=attr[4:])),\n", - " ('scaler', StandardScaler()),\n", + " ('lw', LastWeeks(attributes=attr[4:], weeks=2)),\n", + " ('dataframe_dropper', DataFrameDropper(attribute_names=attr[:4])),\n", + " #('scaler', StandardScaler()),\n", + " #('pca', PCA(n_components=9))\n", "])" ] }, { "cell_type": "code", - "execution_count": 51, + "execution_count": 17, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "(1451, 20)" + "(1456, 60)" ] }, - "execution_count": 51, + "execution_count": 17, "metadata": {}, "output_type": "execute_result" } @@ -137,7 +161,7 @@ }, { "cell_type": "code", - "execution_count": 127, + "execution_count": 55, "metadata": {}, "outputs": [], "source": [ @@ -147,10 +171,9 @@ "from sklearn.svm import SVR\n", "from sklearn.tree import DecisionTreeRegressor\n", "from scipy.stats import randint as sp_randint\n", + "from scipy.stats import uniform as sp_uniform\n", "score_metric='neg_mean_absolute_error'\n", "jobs=-1 #-1 to make it execute in parallel\n", - "k_folds=10\n", - "n_iter_search = 20\n", "verbose_level = 1\n", "random_n = 42" ] @@ -159,61 +182,167 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## SVR" + "## SVR\n", + "* The results with the kernel *sigmoid* were too bad, so we removed them." ] }, { "cell_type": "code", - "execution_count": 128, + "execution_count": 14, "metadata": {}, "outputs": [], "source": [ + "k_folds=10\n", + "n_iter_search = 100\n", "C = sp_randint(0, 10000)\n", - "params = {'kernel':['rbf', 'sigmoid','linear'], 'gamma':['scale'], 'C': C}" + "params = {'kernel':['rbf', 'linear'], 'gamma':['scale'], 'C': C}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "SVR_optimizer = RandomizedSearchCV(estimator=SVR(), param_distributions=params, n_iter=n_iter_search, scoring=score_metric, n_jobs=jobs, cv=k_folds, verbose=verbose_level, random_state=random_n, return_train_score=True, iid=True)\n", + "SVR_optimizer.fit(X_train, y_train)\n", + "SVR_optimizer.best_score_" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [], + "source": [ + "params = {'kernel':['poly'], 'degree':sp_randint(2,8), 'gamma':['scale'], 'C': C}" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, + "outputs": [], + "source": [ + "SVR_poly_optimizer = RandomizedSearchCV(estimator=SVR(), param_distributions=params, n_iter=n_iter_search, scoring=score_metric, n_jobs=jobs, cv=k_folds, verbose=verbose_level, random_state=random_n, return_train_score=True, iid=True)\n", + "SVR_poly_optimizer.fit(X_train, y_train)\n", + "SVR_poly_optimizer.best_score_" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Regression Trees" + ] + }, + { + "cell_type": "code", + "execution_count": 61, + "metadata": {}, + "outputs": [], + "source": [ + "k_folds=10\n", + "n_iter_search = 100\n", + "min_samples = sp_uniform(0.03, 0.35)\n", + "params = {'criterion':['mae'], 'max_depth': sp_randint(2, 5), 'min_samples_split': min_samples, 'min_samples_leaf': min_samples}" + ] + }, + { + "cell_type": "code", + "execution_count": 62, + "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Fitting 10 folds for each of 20 candidates, totalling 200 fits\n" + "Fitting 10 folds for each of 100 candidates, totalling 1000 fits\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.\n" + "[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.\n", + "[Parallel(n_jobs=-1)]: Done 42 tasks | elapsed: 2.4s\n", + "[Parallel(n_jobs=-1)]: Done 319 tasks | elapsed: 8.5s\n", + "[Parallel(n_jobs=-1)]: Done 1000 out of 1000 | elapsed: 22.8s finished\n" ] + }, + { + "data": { + "text/plain": [ + "-18.391109579600275" + ] + }, + "execution_count": 62, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ - "SVR_optimizer = RandomizedSearchCV(estimator=SVR(), param_distributions=params, n_iter=n_iter_search, scoring=score_metric, n_jobs=jobs, cv=k_folds, verbose=verbose_level, random_state=random_n, return_train_score=True, iid=True)\n", - "SVR_optimizer.fit(X_train, y_train)" + "Tree_optimizer = RandomizedSearchCV(estimator=DecisionTreeRegressor(), param_distributions=params, n_iter=n_iter_search, scoring=score_metric, n_jobs=jobs, cv=k_folds, verbose=verbose_level, random_state=random_n, return_train_score=True, iid=True)\n", + "Tree_optimizer.fit(X_train, y_train)\n", + "Tree_optimizer.best_score_" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Random Forests" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 66, "metadata": {}, "outputs": [], "source": [ - "params = {'kernel':['poly'], 'degree':sp_randint(2,8), 'gamma':['scale'], 'C': C}," + "k_folds=5\n", + "n_iter_search = 100\n", + "min_samples = sp_uniform(0.01, 0.35)\n", + "params = {'n_estimators': sp_randint(5,30), 'criterion':['mae'], 'max_depth': sp_randint(2, 8), 'min_samples_split': min_samples, 'min_samples_leaf': min_samples}" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 67, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Fitting 5 folds for each of 100 candidates, totalling 500 fits\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.\n", + "[Parallel(n_jobs=-1)]: Done 42 tasks | elapsed: 6.2s\n", + "[Parallel(n_jobs=-1)]: Done 261 tasks | elapsed: 27.3s\n", + "[Parallel(n_jobs=-1)]: Done 500 out of 500 | elapsed: 53.8s finished\n" + ] + }, + { + "data": { + "text/plain": [ + "-18.976912474155757" + ] + }, + "execution_count": 67, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "SVR_poly_optimizer = RandomizedSearchCV(estimator=SVR(), param_distributions=params, n_iter=n_iter_search, scoring=score_metric, n_jobs=jobs, cv=k_folds, verbose=verbose_level, random_state=random_n, return_train_score=True, iid=True)\n", - "SVR_poly_optimizer.fit(X_train, y_train)" + "Forest_optimizer = RandomizedSearchCV(estimator=RandomForestRegressor(), param_distributions=params, n_iter=n_iter_search, scoring=score_metric, n_jobs=jobs, cv=k_folds, verbose=verbose_level, random_state=random_n, return_train_score=True, iid=True)\n", + "Forest_optimizer.fit(X_train, y_train)\n", + "Forest_optimizer.best_score_" ] }, { @@ -221,9 +350,7 @@ "execution_count": null, "metadata": {}, "outputs": [], - "source": [ - "pd.DataFrame(SVR_optimizer.cv_results_)[['mean_fit_time','param_C', 'param_kernel', 'mean_test_score', 'mean_train_score']]" - ] + "source": [] } ], "metadata": { diff --git a/utils/DataFrameDropper.py b/utils/DataFrameDropper.py new file mode 100644 index 0000000..4c8c265 --- /dev/null +++ b/utils/DataFrameDropper.py @@ -0,0 +1,19 @@ +from sklearn.base import BaseEstimator, TransformerMixin +import pandas + +class DataFrameDropper(BaseEstimator, TransformerMixin): + + def __init__(self, attribute_names, copy = True): + self.attribute_names = attribute_names + self.copy = copy + + def fit(self, X, y=None): + return self + + def transform(self, X): + if self.copy: + X = X.copy() + if isinstance(X, pandas.core.frame.DataFrame): + return X.drop(self.attribute_names, axis = 1) + + raise ValueError('You try to drop some columns from something which is not a DataFrame') \ No newline at end of file diff --git a/utils/LastWeeks.py b/utils/LastWeeks.py new file mode 100644 index 0000000..8ed0aaf --- /dev/null +++ b/utils/LastWeeks.py @@ -0,0 +1,41 @@ +from sklearn.base import BaseEstimator, TransformerMixin +from collections import deque +import numpy as np +import pandas as pd + + +class LastWeeks(BaseEstimator, TransformerMixin): + def __init__(self, attributes, weeks=2, new_attributes_prefix='last_weeks_', copy=True): + self.attributes = attributes + self.weeks = weeks + self.new_attributes_prefix = new_attributes_prefix + self.copy = copy + + def fit(self, X, y=None): + attr_medians = [np.nanmedian(X[attr]) for attr in self.attributes] + dq = deque([attr_medians for _ in range(self.weeks)]) + self.last = {'sj': dq, 'iq': dq} + + return self + + def transform(self, X): + if self.copy: + X = X.copy() + + r = np.ndarray(shape=[X.shape[0], self.weeks, len(self.attributes)]) + + for idx, (_, week) in enumerate(X.iterrows()): + city = week['city'] + r[idx] = self.last[city] + self.last[city].pop() + self.last[city].appendleft(week[self.attributes]) + + r = pd.DataFrame(r.reshape([X.shape[0], self.weeks * len(self.attributes)]), + columns=[self.new_attributes_prefix + str(week) + '_' + str(attr) + for week in range(self.weeks) + for attr in self.attributes + ]) + + X = pd.concat([X, r], axis=1) + + return X From fb0c69f4dc41977e53716c8c0efe22eb2a53a347 Mon Sep 17 00:00:00 2001 From: MLobo1997 Date: Tue, 16 Apr 2019 16:52:32 +0100 Subject: [PATCH 03/24] Actually working now --- models.ipynb | 562 +++++++++++++++++++++++++++++++++++++++++++-- utils/LastWeeks.py | 2 +- 2 files changed, 548 insertions(+), 16 deletions(-) diff --git a/models.ipynb b/models.ipynb index 4363ee6..71a7ba4 100644 --- a/models.ipynb +++ b/models.ipynb @@ -20,7 +20,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 19, "metadata": {}, "outputs": [ { @@ -52,7 +52,7 @@ " 'station_precip_mm']" ] }, - "execution_count": 2, + "execution_count": 19, "metadata": {}, "output_type": "execute_result" } @@ -73,7 +73,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 20, "metadata": {}, "outputs": [ { @@ -82,7 +82,7 @@ "(1451, 24)" ] }, - "execution_count": 3, + "execution_count": 20, "metadata": {}, "output_type": "execute_result" } @@ -95,8 +95,11 @@ " r.append(idx)\n", " return r\n", "\n", - "y_train = y_train.drop(bools_to_indexes(X_train_1['weekofyear'] == 53))\n", - "X_train_1 = X_train_1.drop(bools_to_indexes(X_train_1['weekofyear'] == 53))\n", + "idx = bools_to_indexes(X_train_1['weekofyear'] == 53)\n", + "y_train.drop(idx, inplace=True)\n", + "y_train.reset_index(drop=True, inplace=True)\n", + "X_train_1.drop(idx, inplace=True)\n", + "X_train_1.reset_index(drop=True, inplace=True)\n", "X_train_1.shape" ] }, @@ -109,7 +112,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 21, "metadata": {}, "outputs": [], "source": [ @@ -125,24 +128,32 @@ "pipeline = Pipeline([\n", " ('imputer', ContinuityImputer(attributes=attr[4:])),\n", " ('lw', LastWeeks(attributes=attr[4:], weeks=2)),\n", - " ('dataframe_dropper', DataFrameDropper(attribute_names=attr[:4])),\n", + " #('dataframe_dropper', DataFrameDropper(attribute_names=attr[:4])),\n", " #('scaler', StandardScaler()),\n", - " #('pca', PCA(n_components=9))\n", + " #('pca', PCA(n_components=0.9))\n", "])" ] }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 22, "metadata": {}, "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X: (1451, 24)\n", + "R: (1451, 40)\n" + ] + }, { "data": { "text/plain": [ - "(1456, 60)" + "(1456, 64)" ] }, - "execution_count": 17, + "execution_count": 22, "metadata": {}, "output_type": "execute_result" } @@ -161,7 +172,7 @@ }, { "cell_type": "code", - "execution_count": 55, + "execution_count": 6, "metadata": {}, "outputs": [], "source": [ @@ -192,8 +203,8 @@ "metadata": {}, "outputs": [], "source": [ - "k_folds=10\n", - "n_iter_search = 100\n", + "k_folds=4\n", + "n_iter_search = 20\n", "C = sp_randint(0, 10000)\n", "params = {'kernel':['rbf', 'linear'], 'gamma':['scale'], 'C': C}" ] @@ -345,6 +356,527 @@ "Forest_optimizer.best_score_" ] }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,\n", + " 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,\n", + " 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5])" + ] + }, + "execution_count": 34, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "np.sum(np.isnan(X_train), axis=0)" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "300" + ] + }, + "execution_count": 35, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "5*60" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "nan" + ] + }, + "execution_count": 37, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "np.nan" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
cityyearweekofyearweek_start_datendvi_nendvi_nwndvi_sendvi_swprecipitation_amt_mmreanalysis_air_temp_k...last_weeks_1_reanalysis_precip_amt_kg_per_m2last_weeks_1_reanalysis_relative_humidity_percentlast_weeks_1_reanalysis_sat_precip_amt_mmlast_weeks_1_reanalysis_specific_humidity_g_per_kglast_weeks_1_reanalysis_tdtr_klast_weeks_1_station_avg_temp_clast_weeks_1_station_diur_temp_rng_clast_weeks_1_station_max_temp_clast_weeks_1_station_min_temp_clast_weeks_1_station_precip_mm
139NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN...24.9476.6614290.0015.2514292.64285726.6857148.38571432.221.747.5
451NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN...12.8078.4185710.0016.5642862.12857125.9285716.04285730.022.229.7
763NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN...21.6874.7785710.0014.2614291.95714324.9857144.90000028.321.123.8
1170NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN...96.2195.53000093.7318.4085717.22857128.40000010.40000034.222.5232.1
1430NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN...31.2087.64142919.0418.0014298.62857128.96666711.26666735.222.50.8
\n", + "

5 rows × 64 columns

\n", + "
" + ], + "text/plain": [ + " city year weekofyear week_start_date ndvi_ne ndvi_nw ndvi_se \\\n", + "139 NaN NaN NaN NaN NaN NaN NaN \n", + "451 NaN NaN NaN NaN NaN NaN NaN \n", + "763 NaN NaN NaN NaN NaN NaN NaN \n", + "1170 NaN NaN NaN NaN NaN NaN NaN \n", + "1430 NaN NaN NaN NaN NaN NaN NaN \n", + "\n", + " ndvi_sw precipitation_amt_mm reanalysis_air_temp_k ... \\\n", + "139 NaN NaN NaN ... \n", + "451 NaN NaN NaN ... \n", + "763 NaN NaN NaN ... \n", + "1170 NaN NaN NaN ... \n", + "1430 NaN NaN NaN ... \n", + "\n", + " last_weeks_1_reanalysis_precip_amt_kg_per_m2 \\\n", + "139 24.94 \n", + "451 12.80 \n", + "763 21.68 \n", + "1170 96.21 \n", + "1430 31.20 \n", + "\n", + " last_weeks_1_reanalysis_relative_humidity_percent \\\n", + "139 76.661429 \n", + "451 78.418571 \n", + "763 74.778571 \n", + "1170 95.530000 \n", + "1430 87.641429 \n", + "\n", + " last_weeks_1_reanalysis_sat_precip_amt_mm \\\n", + "139 0.00 \n", + "451 0.00 \n", + "763 0.00 \n", + "1170 93.73 \n", + "1430 19.04 \n", + "\n", + " last_weeks_1_reanalysis_specific_humidity_g_per_kg \\\n", + "139 15.251429 \n", + "451 16.564286 \n", + "763 14.261429 \n", + "1170 18.408571 \n", + "1430 18.001429 \n", + "\n", + " last_weeks_1_reanalysis_tdtr_k last_weeks_1_station_avg_temp_c \\\n", + "139 2.642857 26.685714 \n", + "451 2.128571 25.928571 \n", + "763 1.957143 24.985714 \n", + "1170 7.228571 28.400000 \n", + "1430 8.628571 28.966667 \n", + "\n", + " last_weeks_1_station_diur_temp_rng_c last_weeks_1_station_max_temp_c \\\n", + "139 8.385714 32.2 \n", + "451 6.042857 30.0 \n", + "763 4.900000 28.3 \n", + "1170 10.400000 34.2 \n", + "1430 11.266667 35.2 \n", + "\n", + " last_weeks_1_station_min_temp_c last_weeks_1_station_precip_mm \n", + "139 21.7 47.5 \n", + "451 22.2 29.7 \n", + "763 21.1 23.8 \n", + "1170 22.5 232.1 \n", + "1430 22.5 0.8 \n", + "\n", + "[5 rows x 64 columns]" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "X_train.loc[idx,:]" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ab
012
134
256
\n", + "
" + ], + "text/plain": [ + " a b\n", + "0 1 2\n", + "1 3 4\n", + "2 5 6" + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = pd.DataFrame([[1,2],[3,4],[5,6]], columns=['a','b'])\n", + "df" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ab
012
256
\n", + "
" + ], + "text/plain": [ + " a b\n", + "0 1 2\n", + "2 5 6" + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.drop([1],inplace=True)\n", + "df" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ab
012
156
\n", + "
" + ], + "text/plain": [ + " a b\n", + "0 1 2\n", + "1 5 6" + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.reset_index(drop=True, inplace=True)\n", + "df" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0\n", + "1\n" + ] + } + ], + "source": [ + "for idx, (_, x) in enumerate(df.iterrows()):\n", + " print(idx)" + ] + }, { "cell_type": "code", "execution_count": null, diff --git a/utils/LastWeeks.py b/utils/LastWeeks.py index 8ed0aaf..1129f6f 100644 --- a/utils/LastWeeks.py +++ b/utils/LastWeeks.py @@ -24,7 +24,7 @@ def transform(self, X): r = np.ndarray(shape=[X.shape[0], self.weeks, len(self.attributes)]) - for idx, (_, week) in enumerate(X.iterrows()): + for idx, week in X.iterrows(): city = week['city'] r[idx] = self.last[city] self.last[city].pop() From baca2ca8091ab211a43da99bb8491e8e6c95bf3e Mon Sep 17 00:00:00 2001 From: MLobo1997 Date: Tue, 16 Apr 2019 20:35:31 +0100 Subject: [PATCH 04/24] calculated optimal parameters --- models.ipynb | 687 ++++++++++----------------------------------------- 1 file changed, 124 insertions(+), 563 deletions(-) diff --git a/models.ipynb b/models.ipynb index 71a7ba4..13271f3 100644 --- a/models.ipynb +++ b/models.ipynb @@ -20,7 +20,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 2, "metadata": {}, "outputs": [ { @@ -52,7 +52,7 @@ " 'station_precip_mm']" ] }, - "execution_count": 19, + "execution_count": 2, "metadata": {}, "output_type": "execute_result" } @@ -73,7 +73,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 3, "metadata": {}, "outputs": [ { @@ -82,7 +82,7 @@ "(1451, 24)" ] }, - "execution_count": 20, + "execution_count": 3, "metadata": {}, "output_type": "execute_result" } @@ -112,7 +112,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ @@ -128,32 +128,24 @@ "pipeline = Pipeline([\n", " ('imputer', ContinuityImputer(attributes=attr[4:])),\n", " ('lw', LastWeeks(attributes=attr[4:], weeks=2)),\n", - " #('dataframe_dropper', DataFrameDropper(attribute_names=attr[:4])),\n", - " #('scaler', StandardScaler()),\n", - " #('pca', PCA(n_components=0.9))\n", + " ('dataframe_dropper', DataFrameDropper(attribute_names=attr[:4])),\n", + " ('scaler', StandardScaler()),\n", + " ('pca', PCA(n_components=0.95))\n", "])" ] }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 5, "metadata": {}, "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X: (1451, 24)\n", - "R: (1451, 40)\n" - ] - }, { "data": { "text/plain": [ - "(1456, 64)" + "(1451, 23)" ] }, - "execution_count": 22, + "execution_count": 5, "metadata": {}, "output_type": "execute_result" } @@ -199,7 +191,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 7, "metadata": {}, "outputs": [], "source": [ @@ -211,64 +203,14 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "SVR_optimizer = RandomizedSearchCV(estimator=SVR(), param_distributions=params, n_iter=n_iter_search, scoring=score_metric, n_jobs=jobs, cv=k_folds, verbose=verbose_level, random_state=random_n, return_train_score=True, iid=True)\n", - "SVR_optimizer.fit(X_train, y_train)\n", - "SVR_optimizer.best_score_" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "metadata": {}, - "outputs": [], - "source": [ - "params = {'kernel':['poly'], 'degree':sp_randint(2,8), 'gamma':['scale'], 'C': C}" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "SVR_poly_optimizer = RandomizedSearchCV(estimator=SVR(), param_distributions=params, n_iter=n_iter_search, scoring=score_metric, n_jobs=jobs, cv=k_folds, verbose=verbose_level, random_state=random_n, return_train_score=True, iid=True)\n", - "SVR_poly_optimizer.fit(X_train, y_train)\n", - "SVR_poly_optimizer.best_score_" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Regression Trees" - ] - }, - { - "cell_type": "code", - "execution_count": 61, - "metadata": {}, - "outputs": [], - "source": [ - "k_folds=10\n", - "n_iter_search = 100\n", - "min_samples = sp_uniform(0.03, 0.35)\n", - "params = {'criterion':['mae'], 'max_depth': sp_randint(2, 5), 'min_samples_split': min_samples, 'min_samples_leaf': min_samples}" - ] - }, - { - "cell_type": "code", - "execution_count": 62, + "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Fitting 10 folds for each of 100 candidates, totalling 1000 fits\n" + "Fitting 4 folds for each of 20 candidates, totalling 80 fits\n" ] }, { @@ -276,57 +218,67 @@ "output_type": "stream", "text": [ "[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.\n", - "[Parallel(n_jobs=-1)]: Done 42 tasks | elapsed: 2.4s\n", - "[Parallel(n_jobs=-1)]: Done 319 tasks | elapsed: 8.5s\n", - "[Parallel(n_jobs=-1)]: Done 1000 out of 1000 | elapsed: 22.8s finished\n" + "[Parallel(n_jobs=-1)]: Done 42 tasks | elapsed: 24.6min\n", + "[Parallel(n_jobs=-1)]: Done 80 out of 80 | elapsed: 52.6min finished\n" ] }, { "data": { "text/plain": [ - "-18.391109579600275" + "-19.17685248872835" ] }, - "execution_count": 62, + "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "Tree_optimizer = RandomizedSearchCV(estimator=DecisionTreeRegressor(), param_distributions=params, n_iter=n_iter_search, scoring=score_metric, n_jobs=jobs, cv=k_folds, verbose=verbose_level, random_state=random_n, return_train_score=True, iid=True)\n", - "Tree_optimizer.fit(X_train, y_train)\n", - "Tree_optimizer.best_score_" + "SVR_optimizer = RandomizedSearchCV(estimator=SVR(), param_distributions=params, n_iter=n_iter_search, scoring=score_metric, n_jobs=jobs, cv=k_folds, verbose=verbose_level, random_state=random_n, return_train_score=True, iid=True)\n", + "SVR_optimizer.fit(X_train, y_train)\n", + "SVR_optimizer.best_score_" ] }, { - "cell_type": "markdown", + "cell_type": "code", + "execution_count": 15, "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "SVR(C=769, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma='scale',\n", + " kernel='linear', max_iter=-1, shrinking=True, tol=0.001, verbose=False)" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "## Random Forests" + "SVR_optimizer.best_estimator_" ] }, { "cell_type": "code", - "execution_count": 66, + "execution_count": 9, "metadata": {}, "outputs": [], "source": [ - "k_folds=5\n", - "n_iter_search = 100\n", - "min_samples = sp_uniform(0.01, 0.35)\n", - "params = {'n_estimators': sp_randint(5,30), 'criterion':['mae'], 'max_depth': sp_randint(2, 8), 'min_samples_split': min_samples, 'min_samples_leaf': min_samples}" + "params = {'kernel':['poly'], 'degree':sp_randint(2,8), 'gamma':['scale'], 'C': C}" ] }, { "cell_type": "code", - "execution_count": 67, + "execution_count": 10, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Fitting 5 folds for each of 100 candidates, totalling 500 fits\n" + "Fitting 4 folds for each of 20 candidates, totalling 80 fits\n" ] }, { @@ -334,88 +286,53 @@ "output_type": "stream", "text": [ "[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.\n", - "[Parallel(n_jobs=-1)]: Done 42 tasks | elapsed: 6.2s\n", - "[Parallel(n_jobs=-1)]: Done 261 tasks | elapsed: 27.3s\n", - "[Parallel(n_jobs=-1)]: Done 500 out of 500 | elapsed: 53.8s finished\n" + "[Parallel(n_jobs=-1)]: Done 42 tasks | elapsed: 2.0min\n", + "[Parallel(n_jobs=-1)]: Done 80 out of 80 | elapsed: 2.8min finished\n" ] }, { "data": { "text/plain": [ - "-18.976912474155757" + "-25.45358085803704" ] }, - "execution_count": 67, + "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "Forest_optimizer = RandomizedSearchCV(estimator=RandomForestRegressor(), param_distributions=params, n_iter=n_iter_search, scoring=score_metric, n_jobs=jobs, cv=k_folds, verbose=verbose_level, random_state=random_n, return_train_score=True, iid=True)\n", - "Forest_optimizer.fit(X_train, y_train)\n", - "Forest_optimizer.best_score_" + "SVR_poly_optimizer = RandomizedSearchCV(estimator=SVR(), param_distributions=params, n_iter=n_iter_search, scoring=score_metric, n_jobs=jobs, cv=k_folds, verbose=verbose_level, random_state=random_n, return_train_score=True, iid=True)\n", + "SVR_poly_optimizer.fit(X_train, y_train)\n", + "SVR_poly_optimizer.best_score_" ] }, { "cell_type": "code", - "execution_count": 34, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array([5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,\n", - " 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,\n", - " 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5])" - ] - }, - "execution_count": 34, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ - "np.sum(np.isnan(X_train), axis=0)" + "SVR_poly_optimizer.best_estimator_" ] }, { - "cell_type": "code", - "execution_count": 35, + "cell_type": "markdown", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "300" - ] - }, - "execution_count": 35, - "metadata": {}, - "output_type": "execute_result" - } - ], "source": [ - "5*60" + "## Regression Trees" ] }, { "cell_type": "code", - "execution_count": 37, + "execution_count": 11, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "nan" - ] - }, - "execution_count": 37, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ - "np.nan" + "k_folds=10\n", + "n_iter_search = 1000\n", + "min_samples = sp_uniform(0.01, 0.35)\n", + "params = {'criterion':['mae'], 'max_depth': sp_randint(2, 10), 'min_samples_split': min_samples, 'min_samples_leaf': min_samples}" ] }, { @@ -423,241 +340,35 @@ "execution_count": 12, "metadata": {}, "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Fitting 10 folds for each of 1000 candidates, totalling 10000 fits\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.\n", + "[Parallel(n_jobs=-1)]: Done 76 tasks | elapsed: 4.4s\n", + "[Parallel(n_jobs=-1)]: Done 376 tasks | elapsed: 17.6s\n", + "[Parallel(n_jobs=-1)]: Done 876 tasks | elapsed: 43.4s\n", + "[Parallel(n_jobs=-1)]: Done 1576 tasks | elapsed: 1.3min\n", + "[Parallel(n_jobs=-1)]: Done 2476 tasks | elapsed: 2.0min\n", + "[Parallel(n_jobs=-1)]: Done 3576 tasks | elapsed: 2.9min\n", + "[Parallel(n_jobs=-1)]: Done 4876 tasks | elapsed: 3.9min\n", + "[Parallel(n_jobs=-1)]: Done 6376 tasks | elapsed: 5.2min\n", + "[Parallel(n_jobs=-1)]: Done 8076 tasks | elapsed: 6.6min\n", + "[Parallel(n_jobs=-1)]: Done 9976 tasks | elapsed: 8.2min\n", + "[Parallel(n_jobs=-1)]: Done 10000 out of 10000 | elapsed: 8.2min finished\n" + ] + }, { "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
cityyearweekofyearweek_start_datendvi_nendvi_nwndvi_sendvi_swprecipitation_amt_mmreanalysis_air_temp_k...last_weeks_1_reanalysis_precip_amt_kg_per_m2last_weeks_1_reanalysis_relative_humidity_percentlast_weeks_1_reanalysis_sat_precip_amt_mmlast_weeks_1_reanalysis_specific_humidity_g_per_kglast_weeks_1_reanalysis_tdtr_klast_weeks_1_station_avg_temp_clast_weeks_1_station_diur_temp_rng_clast_weeks_1_station_max_temp_clast_weeks_1_station_min_temp_clast_weeks_1_station_precip_mm
139NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN...24.9476.6614290.0015.2514292.64285726.6857148.38571432.221.747.5
451NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN...12.8078.4185710.0016.5642862.12857125.9285716.04285730.022.229.7
763NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN...21.6874.7785710.0014.2614291.95714324.9857144.90000028.321.123.8
1170NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN...96.2195.53000093.7318.4085717.22857128.40000010.40000034.222.5232.1
1430NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN...31.2087.64142919.0418.0014298.62857128.96666711.26666735.222.50.8
\n", - "

5 rows × 64 columns

\n", - "
" - ], "text/plain": [ - " city year weekofyear week_start_date ndvi_ne ndvi_nw ndvi_se \\\n", - "139 NaN NaN NaN NaN NaN NaN NaN \n", - "451 NaN NaN NaN NaN NaN NaN NaN \n", - "763 NaN NaN NaN NaN NaN NaN NaN \n", - "1170 NaN NaN NaN NaN NaN NaN NaN \n", - "1430 NaN NaN NaN NaN NaN NaN NaN \n", - "\n", - " ndvi_sw precipitation_amt_mm reanalysis_air_temp_k ... \\\n", - "139 NaN NaN NaN ... \n", - "451 NaN NaN NaN ... \n", - "763 NaN NaN NaN ... \n", - "1170 NaN NaN NaN ... \n", - "1430 NaN NaN NaN ... \n", - "\n", - " last_weeks_1_reanalysis_precip_amt_kg_per_m2 \\\n", - "139 24.94 \n", - "451 12.80 \n", - "763 21.68 \n", - "1170 96.21 \n", - "1430 31.20 \n", - "\n", - " last_weeks_1_reanalysis_relative_humidity_percent \\\n", - "139 76.661429 \n", - "451 78.418571 \n", - "763 74.778571 \n", - "1170 95.530000 \n", - "1430 87.641429 \n", - "\n", - " last_weeks_1_reanalysis_sat_precip_amt_mm \\\n", - "139 0.00 \n", - "451 0.00 \n", - "763 0.00 \n", - "1170 93.73 \n", - "1430 19.04 \n", - "\n", - " last_weeks_1_reanalysis_specific_humidity_g_per_kg \\\n", - "139 15.251429 \n", - "451 16.564286 \n", - "763 14.261429 \n", - "1170 18.408571 \n", - "1430 18.001429 \n", - "\n", - " last_weeks_1_reanalysis_tdtr_k last_weeks_1_station_avg_temp_c \\\n", - "139 2.642857 26.685714 \n", - "451 2.128571 25.928571 \n", - "763 1.957143 24.985714 \n", - "1170 7.228571 28.400000 \n", - "1430 8.628571 28.966667 \n", - "\n", - " last_weeks_1_station_diur_temp_rng_c last_weeks_1_station_max_temp_c \\\n", - "139 8.385714 32.2 \n", - "451 6.042857 30.0 \n", - "763 4.900000 28.3 \n", - "1170 10.400000 34.2 \n", - "1430 11.266667 35.2 \n", - "\n", - " last_weeks_1_station_min_temp_c last_weeks_1_station_precip_mm \n", - "139 21.7 47.5 \n", - "451 22.2 29.7 \n", - "763 21.1 23.8 \n", - "1170 22.5 232.1 \n", - "1430 22.5 0.8 \n", - "\n", - "[5 rows x 64 columns]" + "-18.271881461061337" ] }, "execution_count": 12, @@ -666,223 +377,73 @@ } ], "source": [ - "X_train.loc[idx,:]" + "Tree_optimizer = RandomizedSearchCV(estimator=DecisionTreeRegressor(), param_distributions=params, n_iter=n_iter_search, scoring=score_metric, n_jobs=jobs, cv=k_folds, verbose=verbose_level, random_state=random_n, return_train_score=True, iid=True)\n", + "Tree_optimizer.fit(X_train, y_train)\n", + "Tree_optimizer.best_score_" ] }, { - "cell_type": "code", - "execution_count": 28, + "cell_type": "markdown", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
ab
012
134
256
\n", - "
" - ], - "text/plain": [ - " a b\n", - "0 1 2\n", - "1 3 4\n", - "2 5 6" - ] - }, - "execution_count": 28, - "metadata": {}, - "output_type": "execute_result" - } - ], "source": [ - "df = pd.DataFrame([[1,2],[3,4],[5,6]], columns=['a','b'])\n", - "df" + "## Random Forests" ] }, { "cell_type": "code", - "execution_count": 29, + "execution_count": 13, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
ab
012
256
\n", - "
" - ], - "text/plain": [ - " a b\n", - "0 1 2\n", - "2 5 6" - ] - }, - "execution_count": 29, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ - "df.drop([1],inplace=True)\n", - "df" + "k_folds=5\n", + "n_iter_search = 30\n", + "min_samples = sp_uniform(0.01, 0.35)\n", + "params = {'n_estimators': sp_randint(2,30), 'criterion':['mae'], 'max_depth': sp_randint(2, 10), 'min_samples_split': min_samples, 'min_samples_leaf': min_samples}" ] }, { "cell_type": "code", - "execution_count": 30, + "execution_count": 14, "metadata": {}, "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Fitting 5 folds for each of 30 candidates, totalling 150 fits\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[Parallel(n_jobs=-1)]: Done 42 tasks | elapsed: 7.8s\n", + "[Parallel(n_jobs=-1)]: Done 150 out of 150 | elapsed: 24.6s finished\n" + ] + }, { "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
ab
012
156
\n", - "
" - ], "text/plain": [ - " a b\n", - "0 1 2\n", - "1 5 6" + "-18.8661842407535" ] }, - "execution_count": 30, + "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "df.reset_index(drop=True, inplace=True)\n", - "df" - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "0\n", - "1\n" - ] - } - ], - "source": [ - "for idx, (_, x) in enumerate(df.iterrows()):\n", - " print(idx)" + "Forest_optimizer = RandomizedSearchCV(estimator=RandomForestRegressor(), param_distributions=params, n_iter=n_iter_search, scoring=score_metric, n_jobs=jobs, cv=k_folds, verbose=verbose_level, random_state=random_n, return_train_score=True, iid=True)\n", + "Forest_optimizer.fit(X_train, y_train)\n", + "Forest_optimizer.best_score_" ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { From 610dac0af98f10468b78a32abfebbe7cf023b292 Mon Sep 17 00:00:00 2001 From: MLobo1997 Date: Wed, 17 Apr 2019 10:46:25 +0100 Subject: [PATCH 05/24] found that the optimal number of previous weeks was 3 --- models.ipynb | 191 +++++++++++++++++++++++++++++++++------------------ 1 file changed, 125 insertions(+), 66 deletions(-) diff --git a/models.ipynb b/models.ipynb index 13271f3..74a7ebf 100644 --- a/models.ipynb +++ b/models.ipynb @@ -112,7 +112,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 50, "metadata": {}, "outputs": [], "source": [ @@ -127,25 +127,25 @@ "\n", "pipeline = Pipeline([\n", " ('imputer', ContinuityImputer(attributes=attr[4:])),\n", - " ('lw', LastWeeks(attributes=attr[4:], weeks=2)),\n", + " ('lw', LastWeeks(attributes=attr[4:], weeks=3)),\n", " ('dataframe_dropper', DataFrameDropper(attribute_names=attr[:4])),\n", " ('scaler', StandardScaler()),\n", - " ('pca', PCA(n_components=0.95))\n", + " #('pca', PCA(n_components=0.65))\n", "])" ] }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 51, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "(1451, 23)" + "(1451, 80)" ] }, - "execution_count": 5, + "execution_count": 51, "metadata": {}, "output_type": "execute_result" } @@ -186,7 +186,7 @@ "metadata": {}, "source": [ "## SVR\n", - "* The results with the kernel *sigmoid* were too bad, so we removed them." + "* The results with the kernel *sigmoid* and *poly* were too bad, so we removed them." ] }, { @@ -260,25 +260,42 @@ "SVR_optimizer.best_estimator_" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Regression Trees\n", + "* 18.01 - with 2 previous weeks & without PCA & with (max_depth=6, min_samples_leaf=0.1611807565247405, min_samples_split=0.11193019906931466)\n", + "* 18.29 - With PCA at 0.9\n", + "* 18.27 - With PCA at 0.95\n", + "* 18.36 - With PCA at 0.65. PCA appears to be only making the model worse.\n", + "* 18.38 - Without PCA and with previous weeks. Clearly the previous weeks are useful\n", + "* **17.87** - Without PCA and with 3 previous weeks\n", + "* **17.86** - Without PCA and with 4 previous weeks" + ] + }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 56, "metadata": {}, "outputs": [], "source": [ - "params = {'kernel':['poly'], 'degree':sp_randint(2,8), 'gamma':['scale'], 'C': C}" + "k_folds=8\n", + "n_iter_search = 50\n", + "min_samples = sp_uniform(0.01, 0.35)\n", + "params = {'criterion':['mae'], 'max_depth': sp_randint(2, 10), 'min_samples_split': min_samples, 'min_samples_leaf': min_samples}" ] }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 57, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Fitting 4 folds for each of 20 candidates, totalling 80 fits\n" + "Fitting 8 folds for each of 50 candidates, totalling 400 fits\n" ] }, { @@ -286,65 +303,83 @@ "output_type": "stream", "text": [ "[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.\n", - "[Parallel(n_jobs=-1)]: Done 42 tasks | elapsed: 2.0min\n", - "[Parallel(n_jobs=-1)]: Done 80 out of 80 | elapsed: 2.8min finished\n" + "[Parallel(n_jobs=-1)]: Done 42 tasks | elapsed: 7.0s\n", + "[Parallel(n_jobs=-1)]: Done 192 tasks | elapsed: 26.5s\n", + "[Parallel(n_jobs=-1)]: Done 400 out of 400 | elapsed: 55.3s finished\n" ] }, { "data": { "text/plain": [ - "-25.45358085803704" + "-17.86526533425224" ] }, - "execution_count": 10, + "execution_count": 57, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "SVR_poly_optimizer = RandomizedSearchCV(estimator=SVR(), param_distributions=params, n_iter=n_iter_search, scoring=score_metric, n_jobs=jobs, cv=k_folds, verbose=verbose_level, random_state=random_n, return_train_score=True, iid=True)\n", - "SVR_poly_optimizer.fit(X_train, y_train)\n", - "SVR_poly_optimizer.best_score_" + "Tree_optimizer = RandomizedSearchCV(estimator=DecisionTreeRegressor(), param_distributions=params, n_iter=n_iter_search, scoring=score_metric, n_jobs=jobs, cv=k_folds, verbose=verbose_level, random_state=random_n, return_train_score=True, iid=True)\n", + "Tree_optimizer.fit(X_train, y_train)\n", + "Tree_optimizer.best_score_" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 61, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "DecisionTreeRegressor(criterion='mae', max_depth=4, max_features=None,\n", + " max_leaf_nodes=None, min_impurity_decrease=0.0,\n", + " min_impurity_split=None, min_samples_leaf=0.286561439185922,\n", + " min_samples_split=0.22208599117335398,\n", + " min_weight_fraction_leaf=0.0, presort=False, random_state=None,\n", + " splitter='best')" + ] + }, + "execution_count": 61, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "SVR_poly_optimizer.best_estimator_" + "Tree_optimizer.best_estimator_" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "## Regression Trees" + "## Random Forests\n", + "* 18.34 With 4 previous weeks and without PCA" ] }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 58, "metadata": {}, "outputs": [], "source": [ - "k_folds=10\n", - "n_iter_search = 1000\n", + "k_folds=5\n", + "n_iter_search = 100\n", "min_samples = sp_uniform(0.01, 0.35)\n", - "params = {'criterion':['mae'], 'max_depth': sp_randint(2, 10), 'min_samples_split': min_samples, 'min_samples_leaf': min_samples}" + "params = {'n_estimators': sp_randint(2,30), 'criterion':['mae'], 'max_depth': sp_randint(2, 10), 'min_samples_split': min_samples, 'min_samples_leaf': min_samples}" ] }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 59, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Fitting 10 folds for each of 1000 candidates, totalling 10000 fits\n" + "Fitting 5 folds for each of 100 candidates, totalling 500 fits\n" ] }, { @@ -352,98 +387,122 @@ "output_type": "stream", "text": [ "[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.\n", - "[Parallel(n_jobs=-1)]: Done 76 tasks | elapsed: 4.4s\n", - "[Parallel(n_jobs=-1)]: Done 376 tasks | elapsed: 17.6s\n", - "[Parallel(n_jobs=-1)]: Done 876 tasks | elapsed: 43.4s\n", - "[Parallel(n_jobs=-1)]: Done 1576 tasks | elapsed: 1.3min\n", - "[Parallel(n_jobs=-1)]: Done 2476 tasks | elapsed: 2.0min\n", - "[Parallel(n_jobs=-1)]: Done 3576 tasks | elapsed: 2.9min\n", - "[Parallel(n_jobs=-1)]: Done 4876 tasks | elapsed: 3.9min\n", - "[Parallel(n_jobs=-1)]: Done 6376 tasks | elapsed: 5.2min\n", - "[Parallel(n_jobs=-1)]: Done 8076 tasks | elapsed: 6.6min\n", - "[Parallel(n_jobs=-1)]: Done 9976 tasks | elapsed: 8.2min\n", - "[Parallel(n_jobs=-1)]: Done 10000 out of 10000 | elapsed: 8.2min finished\n" + "[Parallel(n_jobs=-1)]: Done 42 tasks | elapsed: 23.8s\n", + "[Parallel(n_jobs=-1)]: Done 192 tasks | elapsed: 1.7min\n", + "[Parallel(n_jobs=-1)]: Done 442 tasks | elapsed: 4.0min\n", + "[Parallel(n_jobs=-1)]: Done 500 out of 500 | elapsed: 4.4min finished\n" ] }, { "data": { "text/plain": [ - "-18.271881461061337" + "-18.3364346427751" ] }, - "execution_count": 12, + "execution_count": 59, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "Tree_optimizer = RandomizedSearchCV(estimator=DecisionTreeRegressor(), param_distributions=params, n_iter=n_iter_search, scoring=score_metric, n_jobs=jobs, cv=k_folds, verbose=verbose_level, random_state=random_n, return_train_score=True, iid=True)\n", - "Tree_optimizer.fit(X_train, y_train)\n", - "Tree_optimizer.best_score_" + "Forest_optimizer = RandomizedSearchCV(estimator=RandomForestRegressor(), param_distributions=params, n_iter=n_iter_search, scoring=score_metric, n_jobs=jobs, cv=k_folds, verbose=verbose_level, random_state=random_n, return_train_score=True, iid=True)\n", + "Forest_optimizer.fit(X_train, y_train)\n", + "Forest_optimizer.best_score_" + ] + }, + { + "cell_type": "code", + "execution_count": 60, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "RandomForestRegressor(bootstrap=True, criterion='mae', max_depth=8,\n", + " max_features='auto', max_leaf_nodes=None,\n", + " min_impurity_decrease=0.0, min_impurity_split=None,\n", + " min_samples_leaf=0.15819051824722938,\n", + " min_samples_split=0.1482085313614494,\n", + " min_weight_fraction_leaf=0.0, n_estimators=9, n_jobs=None,\n", + " oob_score=False, random_state=None, verbose=0, warm_start=False)" + ] + }, + "execution_count": 60, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "Forest_optimizer.best_estimator_" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "## Random Forests" + "## KNN\n", + "* -21.349 - with PCA at 0.65 & 2 previous weeks\n", + "* -20.36 - without PCA" ] }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 35, "metadata": {}, "outputs": [], "source": [ - "k_folds=5\n", - "n_iter_search = 30\n", - "min_samples = sp_uniform(0.01, 0.35)\n", - "params = {'n_estimators': sp_randint(2,30), 'criterion':['mae'], 'max_depth': sp_randint(2, 10), 'min_samples_split': min_samples, 'min_samples_leaf': min_samples}" + "k_folds=10\n", + "n_iter_search = 100\n", + "params = {'n_neighbors': sp_randint(3,150), 'weights': ['uniform', 'distance']}" ] }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 36, "metadata": {}, "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.\n" - ] - }, { "name": "stdout", "output_type": "stream", "text": [ - "Fitting 5 folds for each of 30 candidates, totalling 150 fits\n" + "Fitting 10 folds for each of 100 candidates, totalling 1000 fits\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "[Parallel(n_jobs=-1)]: Done 42 tasks | elapsed: 7.8s\n", - "[Parallel(n_jobs=-1)]: Done 150 out of 150 | elapsed: 24.6s finished\n" + "[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.\n", + "[Parallel(n_jobs=-1)]: Done 42 tasks | elapsed: 4.5s\n", + "[Parallel(n_jobs=-1)]: Done 192 tasks | elapsed: 19.7s\n", + "[Parallel(n_jobs=-1)]: Done 442 tasks | elapsed: 45.0s\n", + "[Parallel(n_jobs=-1)]: Done 792 tasks | elapsed: 1.3min\n", + "[Parallel(n_jobs=-1)]: Done 1000 out of 1000 | elapsed: 1.6min finished\n" ] }, { "data": { "text/plain": [ - "-18.8661842407535" + "-20.359505759574677" ] }, - "execution_count": 14, + "execution_count": 36, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "Forest_optimizer = RandomizedSearchCV(estimator=RandomForestRegressor(), param_distributions=params, n_iter=n_iter_search, scoring=score_metric, n_jobs=jobs, cv=k_folds, verbose=verbose_level, random_state=random_n, return_train_score=True, iid=True)\n", - "Forest_optimizer.fit(X_train, y_train)\n", - "Forest_optimizer.best_score_" + "KNN_optimizer = RandomizedSearchCV(estimator=KNeighborsRegressor(n_jobs=-1), param_distributions=params, n_iter=n_iter_search, scoring=score_metric, n_jobs=jobs, cv=k_folds, verbose=verbose_level, random_state=random_n, return_train_score=True, iid=True)\n", + "KNN_optimizer.fit(X_train, y_train)\n", + "KNN_optimizer.best_score_" ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { From b338df149ccf47a2856cf63b2b6c3956d4832cb4 Mon Sep 17 00:00:00 2001 From: MLobo1997 Date: Wed, 17 Apr 2019 12:04:22 +0100 Subject: [PATCH 06/24] created the LastInfected method --- models.ipynb | 1673 ++++++++++++++++++++++++++++++++++++++++- utils/LastInfected.py | 34 + utils/LastWeeks.py | 2 +- 3 files changed, 1669 insertions(+), 40 deletions(-) create mode 100644 utils/LastInfected.py diff --git a/models.ipynb b/models.ipynb index 74a7ebf..0f3dc6c 100644 --- a/models.ipynb +++ b/models.ipynb @@ -112,7 +112,7 @@ }, { "cell_type": "code", - "execution_count": 50, + "execution_count": 20, "metadata": {}, "outputs": [], "source": [ @@ -130,13 +130,13 @@ " ('lw', LastWeeks(attributes=attr[4:], weeks=3)),\n", " ('dataframe_dropper', DataFrameDropper(attribute_names=attr[:4])),\n", " ('scaler', StandardScaler()),\n", - " #('pca', PCA(n_components=0.65))\n", + " #('pca', PCA(n_components=0.95))\n", "])" ] }, { "cell_type": "code", - "execution_count": 51, + "execution_count": 21, "metadata": {}, "outputs": [ { @@ -145,7 +145,7 @@ "(1451, 80)" ] }, - "execution_count": 51, + "execution_count": 21, "metadata": {}, "output_type": "execute_result" } @@ -270,32 +270,33 @@ "* 18.27 - With PCA at 0.95\n", "* 18.36 - With PCA at 0.65. PCA appears to be only making the model worse.\n", "* 18.38 - Without PCA and with previous weeks. Clearly the previous weeks are useful\n", - "* **17.87** - Without PCA and with 3 previous weeks\n", - "* **17.86** - Without PCA and with 4 previous weeks" + "* 17.87 - Without PCA and with 3 previous weeks\n", + "* 17.86 - Without PCA and with 4 previous weeks\n", + "* 18.28 - Withou PCA 0.95 and 3 previous weeks fixed" ] }, { "cell_type": "code", - "execution_count": 56, + "execution_count": 16, "metadata": {}, "outputs": [], "source": [ - "k_folds=8\n", - "n_iter_search = 50\n", + "k_folds=10\n", + "n_iter_search = 100\n", "min_samples = sp_uniform(0.01, 0.35)\n", "params = {'criterion':['mae'], 'max_depth': sp_randint(2, 10), 'min_samples_split': min_samples, 'min_samples_leaf': min_samples}" ] }, { "cell_type": "code", - "execution_count": 57, + "execution_count": 17, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Fitting 8 folds for each of 50 candidates, totalling 400 fits\n" + "Fitting 10 folds for each of 100 candidates, totalling 1000 fits\n" ] }, { @@ -303,18 +304,20 @@ "output_type": "stream", "text": [ "[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.\n", - "[Parallel(n_jobs=-1)]: Done 42 tasks | elapsed: 7.0s\n", - "[Parallel(n_jobs=-1)]: Done 192 tasks | elapsed: 26.5s\n", - "[Parallel(n_jobs=-1)]: Done 400 out of 400 | elapsed: 55.3s finished\n" + "[Parallel(n_jobs=-1)]: Done 42 tasks | elapsed: 3.3s\n", + "[Parallel(n_jobs=-1)]: Done 192 tasks | elapsed: 12.8s\n", + "[Parallel(n_jobs=-1)]: Done 442 tasks | elapsed: 29.8s\n", + "[Parallel(n_jobs=-1)]: Done 792 tasks | elapsed: 56.1s\n", + "[Parallel(n_jobs=-1)]: Done 1000 out of 1000 | elapsed: 1.2min finished\n" ] }, { "data": { "text/plain": [ - "-17.86526533425224" + "-18.274293590627153" ] }, - "execution_count": 57, + "execution_count": 17, "metadata": {}, "output_type": "execute_result" } @@ -327,21 +330,21 @@ }, { "cell_type": "code", - "execution_count": 61, + "execution_count": 18, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "DecisionTreeRegressor(criterion='mae', max_depth=4, max_features=None,\n", + "DecisionTreeRegressor(criterion='mae', max_depth=2, max_features=None,\n", " max_leaf_nodes=None, min_impurity_decrease=0.0,\n", - " min_impurity_split=None, min_samples_leaf=0.286561439185922,\n", - " min_samples_split=0.22208599117335398,\n", + " min_impurity_split=None, min_samples_leaf=0.2320229706454773,\n", + " min_samples_split=0.24824690804416838,\n", " min_weight_fraction_leaf=0.0, presort=False, random_state=None,\n", " splitter='best')" ] }, - "execution_count": 61, + "execution_count": 18, "metadata": {}, "output_type": "execute_result" } @@ -355,31 +358,33 @@ "metadata": {}, "source": [ "## Random Forests\n", - "* 18.34 With 4 previous weeks and without PCA" + "* 18.34 With 4 previous weeks and without PCA\n", + "* **17.79** With fixed 3 previous weeks and PCA at 0.95 (n_estimators= ?, max_depth = 2, min_samples_leaf=0.112, min_samples_split=0.224)\n", + "* **17.79** With fixed 3 previous weeks and without PCA (n_estimators= ?, max_depth = 5, min_samples_leaf=0.07, min_samples_split=0.27)" ] }, { "cell_type": "code", - "execution_count": 58, + "execution_count": 48, "metadata": {}, "outputs": [], "source": [ - "k_folds=5\n", - "n_iter_search = 100\n", + "k_folds=10\n", + "n_iter_search = 30\n", "min_samples = sp_uniform(0.01, 0.35)\n", "params = {'n_estimators': sp_randint(2,30), 'criterion':['mae'], 'max_depth': sp_randint(2, 10), 'min_samples_split': min_samples, 'min_samples_leaf': min_samples}" ] }, { "cell_type": "code", - "execution_count": 59, + "execution_count": 49, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Fitting 5 folds for each of 100 candidates, totalling 500 fits\n" + "Fitting 10 folds for each of 30 candidates, totalling 300 fits\n" ] }, { @@ -388,46 +393,45 @@ "text": [ "[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.\n", "[Parallel(n_jobs=-1)]: Done 42 tasks | elapsed: 23.8s\n", - "[Parallel(n_jobs=-1)]: Done 192 tasks | elapsed: 1.7min\n", - "[Parallel(n_jobs=-1)]: Done 442 tasks | elapsed: 4.0min\n", - "[Parallel(n_jobs=-1)]: Done 500 out of 500 | elapsed: 4.4min finished\n" + "[Parallel(n_jobs=-1)]: Done 192 tasks | elapsed: 2.1min\n", + "[Parallel(n_jobs=-1)]: Done 300 out of 300 | elapsed: 3.1min finished\n" ] }, { "data": { "text/plain": [ - "-18.3364346427751" + "-17.740020145257915" ] }, - "execution_count": 59, + "execution_count": 49, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "Forest_optimizer = RandomizedSearchCV(estimator=RandomForestRegressor(), param_distributions=params, n_iter=n_iter_search, scoring=score_metric, n_jobs=jobs, cv=k_folds, verbose=verbose_level, random_state=random_n, return_train_score=True, iid=True)\n", + "Forest_optimizer = RandomizedSearchCV(estimator=RandomForestRegressor(n_jobs=-1), param_distributions=params, n_iter=n_iter_search, scoring=score_metric, n_jobs=jobs, cv=k_folds, verbose=verbose_level, random_state=random_n, return_train_score=True, iid=True)\n", "Forest_optimizer.fit(X_train, y_train)\n", "Forest_optimizer.best_score_" ] }, { "cell_type": "code", - "execution_count": 60, + "execution_count": 50, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "RandomForestRegressor(bootstrap=True, criterion='mae', max_depth=8,\n", + "RandomForestRegressor(bootstrap=True, criterion='mae', max_depth=5,\n", " max_features='auto', max_leaf_nodes=None,\n", " min_impurity_decrease=0.0, min_impurity_split=None,\n", - " min_samples_leaf=0.15819051824722938,\n", - " min_samples_split=0.1482085313614494,\n", - " min_weight_fraction_leaf=0.0, n_estimators=9, n_jobs=None,\n", + " min_samples_leaf=0.09435891310910409,\n", + " min_samples_split=0.24914223158891036,\n", + " min_weight_fraction_leaf=0.0, n_estimators=13, n_jobs=-1,\n", " oob_score=False, random_state=None, verbose=0, warm_start=False)" ] }, - "execution_count": 60, + "execution_count": 50, "metadata": {}, "output_type": "execute_result" } @@ -497,6 +501,1597 @@ "KNN_optimizer.best_score_" ] }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": {}, + "outputs": [], + "source": [ + "%autoreload\n", + "from utils.LastInfected import LastInfected\n", + "tmp = pd.concat([LastInfected(weeks=2).fit_transform(X_train_1, y=y_train), y_train], axis=1)" + ] + }, + { + "cell_type": "code", + "execution_count": 58, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
cityyearweekofyearweek_start_datendvi_nendvi_nwndvi_sendvi_swprecipitation_amt_mmreanalysis_air_temp_k...reanalysis_specific_humidity_g_per_kgreanalysis_tdtr_kstation_avg_temp_cstation_diur_temp_rng_cstation_max_temp_cstation_min_temp_cstation_precip_mmlast_infected_0last_infected_1total_cases
900sj2007372007-09-10NaN0.0569000.2385430.18748610.37301.117143...17.7200003.15714328.8714296.51428633.925.010.471.092.0112
901sj2007382007-09-17-0.0134500.0749000.1525710.13192970.39301.217143...18.0371432.81428628.3000006.28571432.824.426.9112.071.0106
902sj2007392007-09-24-0.030700-0.0029400.1527290.14462994.37301.052857...17.9814293.58571428.1714296.02857132.224.421.3106.0112.0101
903sj2007402007-10-010.0960000.0247670.1853000.11772974.50301.022857...18.1185712.68571427.9857147.24285732.822.286.6101.0106.0170
904sj2007412007-10-080.0090000.1040000.1181290.126343108.26300.790000...18.3757143.00000028.1285716.91428633.323.914.5170.0101.0135
905sj2007422007-10-150.0210000.1326670.2459430.18975717.56301.492857...17.8457143.18571429.1000007.54285733.924.410.2135.0170.0106
906sj2007432007-10-22NaN-0.0091500.1911860.17640016.48301.007143...17.2757142.47142927.9571436.44285732.224.48.6106.0135.068
907sj2007442007-10-290.1243000.0543000.1568140.123529137.55299.458571...17.5028572.60000026.2000005.40000030.622.289.268.0106.048
908sj2007452007-11-05-0.251700-0.0486000.2051710.17288315.25300.604286...17.2957142.25714327.4428576.85714332.222.84.148.068.048
909sj2007462007-11-12-0.058900-0.0625500.2057430.20254342.00299.934286...17.0828573.54285726.8142866.68571431.122.865.748.048.026
910sj2007472007-11-19NaNNaN0.2044860.15628673.37299.821429...17.1871432.51428626.9000006.20000031.122.840.426.048.033
911sj2007482007-11-26-0.059500-0.0416670.0909170.12908615.95299.090000...14.7685712.07142925.4428575.38571428.922.236.433.026.029
912sj2007492007-12-03NaN-0.0423500.0956000.08900017.85299.020000...15.6757142.10000025.8428575.40000029.422.834.529.033.017
913sj2007502007-12-10-0.133050-0.0455500.1514400.14317131.30298.900000...16.1300002.48571425.7714295.08571428.922.230.217.029.012
914sj2007512007-12-17NaN-0.0390000.1734170.15017162.11298.668571...16.3442862.37142925.0714294.91428628.921.7108.212.017.013
915sj2007522007-12-240.0148000.0163000.2072670.1445780.00298.602857...15.3185712.98571425.0857146.24285728.321.116.813.012.017
916sj200812008-01-010.000600-0.3096000.2398140.1955570.00298.038571...14.9114291.84285725.4000005.30000029.422.255.517.013.015
917sj200822008-01-08NaN-0.1082500.3304860.24428637.24298.142857...14.9800002.05714324.9714295.01428628.321.164.815.017.014
918sj200832008-01-15NaN0.0383000.1250000.1088430.00297.627143...14.4885713.00000024.4285715.62857127.820.62.514.015.015
919sj200842008-01-22-0.026800-0.2153000.1126140.16021481.22297.968571...15.0657142.00000024.5285714.58571427.821.183.115.014.010
920sj200852008-01-29NaN-0.1354000.2233000.1709430.00298.021429...14.4085713.30000024.5714296.44285728.920.03.110.015.09
921sj200862008-02-05-0.111700-0.0032000.2328430.2711710.00297.237143...13.2257142.07142924.2142865.15714327.221.135.99.010.02
922sj200872008-02-120.072000-0.0631000.1502000.1492710.00297.838571...14.3671432.15714324.8000006.24285728.321.16.42.09.06
923sj200882008-02-19-0.138650-0.0950670.2460570.2281290.00297.907143...14.5385711.88571424.9000005.78571428.321.713.36.02.08
924sj200892008-02-26NaN0.0154200.2116290.1173430.00297.765714...13.9671432.28571424.7428575.50000027.821.112.98.06.05
925sj2008102008-03-04-0.088900-0.0900330.2232430.1541860.00297.878571...14.4742862.61428625.1142866.11428629.421.113.05.08.01
926sj2008112008-03-11-0.321400-0.1412000.1106430.1410140.00297.595714...13.7214292.08571425.3285715.81428628.922.24.41.05.02
927sj2008122008-03-180.0449000.0244500.1016290.0880000.00297.404286...13.7371433.87142925.2000007.04285730.020.60.52.01.03
928sj2008132008-03-250.077850-0.0399000.3104710.29624327.19296.958571...13.6442862.88571425.0428575.78571430.021.11.83.02.04
929sj2008142008-04-01-0.038000-0.0168330.1193710.0663863.82298.081429...14.6628572.71428626.2428576.81428630.622.20.54.03.03
930sj2008152008-04-08-0.155200-0.0527500.1377570.14121416.96297.460000...14.1842862.18571425.0000005.71428629.421.730.73.04.01
931sj2008162008-04-150.001800NaN0.2039000.2098430.00297.630000...13.8585712.78571425.3142866.24285729.421.711.21.03.03
932sj2008172008-04-22-0.037000-0.0103670.0773140.0905860.00298.672857...15.6714293.95714327.0428577.51428631.723.30.33.01.05
933iq2000262000-07-010.1928860.1322570.3408860.24720025.41296.740000...16.6514298.92857126.40000010.77500032.520.73.00.00.00
934iq2000272000-07-080.2168330.2761000.2894570.24165760.61296.634286...16.86285710.31428626.90000011.56666734.020.855.60.00.00
935iq2000282000-07-150.1767570.1731290.2041140.12801455.52296.415714...17.1200007.38571426.80000011.46666733.020.738.10.00.00
936iq2000292000-07-220.2277290.1454290.2542000.2003145.60295.357143...14.4314299.11428625.76666710.53333331.514.730.00.00.00
937iq2000302000-07-290.3286430.3221290.2543710.36104362.76296.432857...15.4442869.50000026.60000011.48000033.319.14.00.00.00
938iq2000312000-08-050.2055290.1907570.2316710.25531416.24297.191429...13.42142913.77142925.34000010.94000032.017.011.50.00.00
939iq2000322000-08-120.3124860.3299860.3805860.38727189.37297.320000...15.31142911.47142927.01666711.65000034.019.972.90.00.00
940iq2000332000-08-190.3841330.3922400.3417800.38275042.08297.627143...15.46571413.70000026.58333310.31666733.020.550.10.00.00
941iq2000342000-08-260.4081570.3221570.4067140.30271449.22298.238571...14.44428613.77142926.90000013.40000034.019.089.20.00.00
942iq2000352000-09-020.3320430.3210570.3146140.32425753.65299.218571...15.05714312.45714327.11666712.26666734.020.078.00.00.00
943iq2000362000-09-090.2955860.2956830.3122140.26592923.12300.802857...12.65285714.90000028.36666712.90000035.821.756.90.00.01
944iq2000372000-09-160.2846570.3097570.3878830.32815734.62299.858571...15.22714313.85714327.42500012.77500034.520.518.91.00.00
945iq2000382000-09-230.3488140.2957170.4048430.24257197.55297.435714...14.33857111.31428627.53333312.56666736.020.5104.20.01.00
946iq2000392000-09-300.1756860.0994830.2257140.18278695.89299.355714...14.79857114.94285727.15000012.17500034.020.557.90.00.00
947iq2000402000-10-070.3375400.2769430.2775000.25505046.22298.372857...16.14857111.97142926.70000011.67500034.020.063.00.00.00
948iq2000412000-10-140.2235330.1339140.3498000.10091731.10298.474286...16.07142913.48571427.65714311.30000034.021.03.00.00.01
949iq2000422000-10-210.2748000.1870570.3739430.27947125.21299.211429...15.52857114.92857127.77500012.27500036.021.045.21.00.01
\n", + "

50 rows × 27 columns

\n", + "
" + ], + "text/plain": [ + " city year weekofyear week_start_date ndvi_ne ndvi_nw ndvi_se \\\n", + "900 sj 2007 37 2007-09-10 NaN 0.056900 0.238543 \n", + "901 sj 2007 38 2007-09-17 -0.013450 0.074900 0.152571 \n", + "902 sj 2007 39 2007-09-24 -0.030700 -0.002940 0.152729 \n", + "903 sj 2007 40 2007-10-01 0.096000 0.024767 0.185300 \n", + "904 sj 2007 41 2007-10-08 0.009000 0.104000 0.118129 \n", + "905 sj 2007 42 2007-10-15 0.021000 0.132667 0.245943 \n", + "906 sj 2007 43 2007-10-22 NaN -0.009150 0.191186 \n", + "907 sj 2007 44 2007-10-29 0.124300 0.054300 0.156814 \n", + "908 sj 2007 45 2007-11-05 -0.251700 -0.048600 0.205171 \n", + "909 sj 2007 46 2007-11-12 -0.058900 -0.062550 0.205743 \n", + "910 sj 2007 47 2007-11-19 NaN NaN 0.204486 \n", + "911 sj 2007 48 2007-11-26 -0.059500 -0.041667 0.090917 \n", + "912 sj 2007 49 2007-12-03 NaN -0.042350 0.095600 \n", + "913 sj 2007 50 2007-12-10 -0.133050 -0.045550 0.151440 \n", + "914 sj 2007 51 2007-12-17 NaN -0.039000 0.173417 \n", + "915 sj 2007 52 2007-12-24 0.014800 0.016300 0.207267 \n", + "916 sj 2008 1 2008-01-01 0.000600 -0.309600 0.239814 \n", + "917 sj 2008 2 2008-01-08 NaN -0.108250 0.330486 \n", + "918 sj 2008 3 2008-01-15 NaN 0.038300 0.125000 \n", + "919 sj 2008 4 2008-01-22 -0.026800 -0.215300 0.112614 \n", + "920 sj 2008 5 2008-01-29 NaN -0.135400 0.223300 \n", + "921 sj 2008 6 2008-02-05 -0.111700 -0.003200 0.232843 \n", + "922 sj 2008 7 2008-02-12 0.072000 -0.063100 0.150200 \n", + "923 sj 2008 8 2008-02-19 -0.138650 -0.095067 0.246057 \n", + "924 sj 2008 9 2008-02-26 NaN 0.015420 0.211629 \n", + "925 sj 2008 10 2008-03-04 -0.088900 -0.090033 0.223243 \n", + "926 sj 2008 11 2008-03-11 -0.321400 -0.141200 0.110643 \n", + "927 sj 2008 12 2008-03-18 0.044900 0.024450 0.101629 \n", + "928 sj 2008 13 2008-03-25 0.077850 -0.039900 0.310471 \n", + "929 sj 2008 14 2008-04-01 -0.038000 -0.016833 0.119371 \n", + "930 sj 2008 15 2008-04-08 -0.155200 -0.052750 0.137757 \n", + "931 sj 2008 16 2008-04-15 0.001800 NaN 0.203900 \n", + "932 sj 2008 17 2008-04-22 -0.037000 -0.010367 0.077314 \n", + "933 iq 2000 26 2000-07-01 0.192886 0.132257 0.340886 \n", + "934 iq 2000 27 2000-07-08 0.216833 0.276100 0.289457 \n", + "935 iq 2000 28 2000-07-15 0.176757 0.173129 0.204114 \n", + "936 iq 2000 29 2000-07-22 0.227729 0.145429 0.254200 \n", + "937 iq 2000 30 2000-07-29 0.328643 0.322129 0.254371 \n", + "938 iq 2000 31 2000-08-05 0.205529 0.190757 0.231671 \n", + "939 iq 2000 32 2000-08-12 0.312486 0.329986 0.380586 \n", + "940 iq 2000 33 2000-08-19 0.384133 0.392240 0.341780 \n", + "941 iq 2000 34 2000-08-26 0.408157 0.322157 0.406714 \n", + "942 iq 2000 35 2000-09-02 0.332043 0.321057 0.314614 \n", + "943 iq 2000 36 2000-09-09 0.295586 0.295683 0.312214 \n", + "944 iq 2000 37 2000-09-16 0.284657 0.309757 0.387883 \n", + "945 iq 2000 38 2000-09-23 0.348814 0.295717 0.404843 \n", + "946 iq 2000 39 2000-09-30 0.175686 0.099483 0.225714 \n", + "947 iq 2000 40 2000-10-07 0.337540 0.276943 0.277500 \n", + "948 iq 2000 41 2000-10-14 0.223533 0.133914 0.349800 \n", + "949 iq 2000 42 2000-10-21 0.274800 0.187057 0.373943 \n", + "\n", + " ndvi_sw precipitation_amt_mm reanalysis_air_temp_k ... \\\n", + "900 0.187486 10.37 301.117143 ... \n", + "901 0.131929 70.39 301.217143 ... \n", + "902 0.144629 94.37 301.052857 ... \n", + "903 0.117729 74.50 301.022857 ... \n", + "904 0.126343 108.26 300.790000 ... \n", + "905 0.189757 17.56 301.492857 ... \n", + "906 0.176400 16.48 301.007143 ... \n", + "907 0.123529 137.55 299.458571 ... \n", + "908 0.172883 15.25 300.604286 ... \n", + "909 0.202543 42.00 299.934286 ... \n", + "910 0.156286 73.37 299.821429 ... \n", + "911 0.129086 15.95 299.090000 ... \n", + "912 0.089000 17.85 299.020000 ... \n", + "913 0.143171 31.30 298.900000 ... \n", + "914 0.150171 62.11 298.668571 ... \n", + "915 0.144578 0.00 298.602857 ... \n", + "916 0.195557 0.00 298.038571 ... \n", + "917 0.244286 37.24 298.142857 ... \n", + "918 0.108843 0.00 297.627143 ... \n", + "919 0.160214 81.22 297.968571 ... \n", + "920 0.170943 0.00 298.021429 ... \n", + "921 0.271171 0.00 297.237143 ... \n", + "922 0.149271 0.00 297.838571 ... \n", + "923 0.228129 0.00 297.907143 ... \n", + "924 0.117343 0.00 297.765714 ... \n", + "925 0.154186 0.00 297.878571 ... \n", + "926 0.141014 0.00 297.595714 ... \n", + "927 0.088000 0.00 297.404286 ... \n", + "928 0.296243 27.19 296.958571 ... \n", + "929 0.066386 3.82 298.081429 ... \n", + "930 0.141214 16.96 297.460000 ... \n", + "931 0.209843 0.00 297.630000 ... \n", + "932 0.090586 0.00 298.672857 ... \n", + "933 0.247200 25.41 296.740000 ... \n", + "934 0.241657 60.61 296.634286 ... \n", + "935 0.128014 55.52 296.415714 ... \n", + "936 0.200314 5.60 295.357143 ... \n", + "937 0.361043 62.76 296.432857 ... \n", + "938 0.255314 16.24 297.191429 ... \n", + "939 0.387271 89.37 297.320000 ... \n", + "940 0.382750 42.08 297.627143 ... \n", + "941 0.302714 49.22 298.238571 ... \n", + "942 0.324257 53.65 299.218571 ... \n", + "943 0.265929 23.12 300.802857 ... \n", + "944 0.328157 34.62 299.858571 ... \n", + "945 0.242571 97.55 297.435714 ... \n", + "946 0.182786 95.89 299.355714 ... \n", + "947 0.255050 46.22 298.372857 ... \n", + "948 0.100917 31.10 298.474286 ... \n", + "949 0.279471 25.21 299.211429 ... \n", + "\n", + " reanalysis_specific_humidity_g_per_kg reanalysis_tdtr_k \\\n", + "900 17.720000 3.157143 \n", + "901 18.037143 2.814286 \n", + "902 17.981429 3.585714 \n", + "903 18.118571 2.685714 \n", + "904 18.375714 3.000000 \n", + "905 17.845714 3.185714 \n", + "906 17.275714 2.471429 \n", + "907 17.502857 2.600000 \n", + "908 17.295714 2.257143 \n", + "909 17.082857 3.542857 \n", + "910 17.187143 2.514286 \n", + "911 14.768571 2.071429 \n", + "912 15.675714 2.100000 \n", + "913 16.130000 2.485714 \n", + "914 16.344286 2.371429 \n", + "915 15.318571 2.985714 \n", + "916 14.911429 1.842857 \n", + "917 14.980000 2.057143 \n", + "918 14.488571 3.000000 \n", + "919 15.065714 2.000000 \n", + "920 14.408571 3.300000 \n", + "921 13.225714 2.071429 \n", + "922 14.367143 2.157143 \n", + "923 14.538571 1.885714 \n", + "924 13.967143 2.285714 \n", + "925 14.474286 2.614286 \n", + "926 13.721429 2.085714 \n", + "927 13.737143 3.871429 \n", + "928 13.644286 2.885714 \n", + "929 14.662857 2.714286 \n", + "930 14.184286 2.185714 \n", + "931 13.858571 2.785714 \n", + "932 15.671429 3.957143 \n", + "933 16.651429 8.928571 \n", + "934 16.862857 10.314286 \n", + "935 17.120000 7.385714 \n", + "936 14.431429 9.114286 \n", + "937 15.444286 9.500000 \n", + "938 13.421429 13.771429 \n", + "939 15.311429 11.471429 \n", + "940 15.465714 13.700000 \n", + "941 14.444286 13.771429 \n", + "942 15.057143 12.457143 \n", + "943 12.652857 14.900000 \n", + "944 15.227143 13.857143 \n", + "945 14.338571 11.314286 \n", + "946 14.798571 14.942857 \n", + "947 16.148571 11.971429 \n", + "948 16.071429 13.485714 \n", + "949 15.528571 14.928571 \n", + "\n", + " station_avg_temp_c station_diur_temp_rng_c station_max_temp_c \\\n", + "900 28.871429 6.514286 33.9 \n", + "901 28.300000 6.285714 32.8 \n", + "902 28.171429 6.028571 32.2 \n", + "903 27.985714 7.242857 32.8 \n", + "904 28.128571 6.914286 33.3 \n", + "905 29.100000 7.542857 33.9 \n", + "906 27.957143 6.442857 32.2 \n", + "907 26.200000 5.400000 30.6 \n", + "908 27.442857 6.857143 32.2 \n", + "909 26.814286 6.685714 31.1 \n", + "910 26.900000 6.200000 31.1 \n", + "911 25.442857 5.385714 28.9 \n", + "912 25.842857 5.400000 29.4 \n", + "913 25.771429 5.085714 28.9 \n", + "914 25.071429 4.914286 28.9 \n", + "915 25.085714 6.242857 28.3 \n", + "916 25.400000 5.300000 29.4 \n", + "917 24.971429 5.014286 28.3 \n", + "918 24.428571 5.628571 27.8 \n", + "919 24.528571 4.585714 27.8 \n", + "920 24.571429 6.442857 28.9 \n", + "921 24.214286 5.157143 27.2 \n", + "922 24.800000 6.242857 28.3 \n", + "923 24.900000 5.785714 28.3 \n", + "924 24.742857 5.500000 27.8 \n", + "925 25.114286 6.114286 29.4 \n", + "926 25.328571 5.814286 28.9 \n", + "927 25.200000 7.042857 30.0 \n", + "928 25.042857 5.785714 30.0 \n", + "929 26.242857 6.814286 30.6 \n", + "930 25.000000 5.714286 29.4 \n", + "931 25.314286 6.242857 29.4 \n", + "932 27.042857 7.514286 31.7 \n", + "933 26.400000 10.775000 32.5 \n", + "934 26.900000 11.566667 34.0 \n", + "935 26.800000 11.466667 33.0 \n", + "936 25.766667 10.533333 31.5 \n", + "937 26.600000 11.480000 33.3 \n", + "938 25.340000 10.940000 32.0 \n", + "939 27.016667 11.650000 34.0 \n", + "940 26.583333 10.316667 33.0 \n", + "941 26.900000 13.400000 34.0 \n", + "942 27.116667 12.266667 34.0 \n", + "943 28.366667 12.900000 35.8 \n", + "944 27.425000 12.775000 34.5 \n", + "945 27.533333 12.566667 36.0 \n", + "946 27.150000 12.175000 34.0 \n", + "947 26.700000 11.675000 34.0 \n", + "948 27.657143 11.300000 34.0 \n", + "949 27.775000 12.275000 36.0 \n", + "\n", + " station_min_temp_c station_precip_mm last_infected_0 last_infected_1 \\\n", + "900 25.0 10.4 71.0 92.0 \n", + "901 24.4 26.9 112.0 71.0 \n", + "902 24.4 21.3 106.0 112.0 \n", + "903 22.2 86.6 101.0 106.0 \n", + "904 23.9 14.5 170.0 101.0 \n", + "905 24.4 10.2 135.0 170.0 \n", + "906 24.4 8.6 106.0 135.0 \n", + "907 22.2 89.2 68.0 106.0 \n", + "908 22.8 4.1 48.0 68.0 \n", + "909 22.8 65.7 48.0 48.0 \n", + "910 22.8 40.4 26.0 48.0 \n", + "911 22.2 36.4 33.0 26.0 \n", + "912 22.8 34.5 29.0 33.0 \n", + "913 22.2 30.2 17.0 29.0 \n", + "914 21.7 108.2 12.0 17.0 \n", + "915 21.1 16.8 13.0 12.0 \n", + "916 22.2 55.5 17.0 13.0 \n", + "917 21.1 64.8 15.0 17.0 \n", + "918 20.6 2.5 14.0 15.0 \n", + "919 21.1 83.1 15.0 14.0 \n", + "920 20.0 3.1 10.0 15.0 \n", + "921 21.1 35.9 9.0 10.0 \n", + "922 21.1 6.4 2.0 9.0 \n", + "923 21.7 13.3 6.0 2.0 \n", + "924 21.1 12.9 8.0 6.0 \n", + "925 21.1 13.0 5.0 8.0 \n", + "926 22.2 4.4 1.0 5.0 \n", + "927 20.6 0.5 2.0 1.0 \n", + "928 21.1 1.8 3.0 2.0 \n", + "929 22.2 0.5 4.0 3.0 \n", + "930 21.7 30.7 3.0 4.0 \n", + "931 21.7 11.2 1.0 3.0 \n", + "932 23.3 0.3 3.0 1.0 \n", + "933 20.7 3.0 0.0 0.0 \n", + "934 20.8 55.6 0.0 0.0 \n", + "935 20.7 38.1 0.0 0.0 \n", + "936 14.7 30.0 0.0 0.0 \n", + "937 19.1 4.0 0.0 0.0 \n", + "938 17.0 11.5 0.0 0.0 \n", + "939 19.9 72.9 0.0 0.0 \n", + "940 20.5 50.1 0.0 0.0 \n", + "941 19.0 89.2 0.0 0.0 \n", + "942 20.0 78.0 0.0 0.0 \n", + "943 21.7 56.9 0.0 0.0 \n", + "944 20.5 18.9 1.0 0.0 \n", + "945 20.5 104.2 0.0 1.0 \n", + "946 20.5 57.9 0.0 0.0 \n", + "947 20.0 63.0 0.0 0.0 \n", + "948 21.0 3.0 0.0 0.0 \n", + "949 21.0 45.2 1.0 0.0 \n", + "\n", + " total_cases \n", + "900 112 \n", + "901 106 \n", + "902 101 \n", + "903 170 \n", + "904 135 \n", + "905 106 \n", + "906 68 \n", + "907 48 \n", + "908 48 \n", + "909 26 \n", + "910 33 \n", + "911 29 \n", + "912 17 \n", + "913 12 \n", + "914 13 \n", + "915 17 \n", + "916 15 \n", + "917 14 \n", + "918 15 \n", + "919 10 \n", + "920 9 \n", + "921 2 \n", + "922 6 \n", + "923 8 \n", + "924 5 \n", + "925 1 \n", + "926 2 \n", + "927 3 \n", + "928 4 \n", + "929 3 \n", + "930 1 \n", + "931 3 \n", + "932 5 \n", + "933 0 \n", + "934 0 \n", + "935 0 \n", + "936 0 \n", + "937 0 \n", + "938 0 \n", + "939 0 \n", + "940 0 \n", + "941 0 \n", + "942 0 \n", + "943 1 \n", + "944 0 \n", + "945 0 \n", + "946 0 \n", + "947 0 \n", + "948 1 \n", + "949 1 \n", + "\n", + "[50 rows x 27 columns]" + ] + }, + "execution_count": 58, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "tmp[900:950]" + ] + }, { "cell_type": "code", "execution_count": null, diff --git a/utils/LastInfected.py b/utils/LastInfected.py new file mode 100644 index 0000000..6c1c60c --- /dev/null +++ b/utils/LastInfected.py @@ -0,0 +1,34 @@ +from sklearn.base import BaseEstimator, TransformerMixin +from collections import deque +import numpy as np +import pandas as pd + +class LastInfected(BaseEstimator, TransformerMixin): + def __init__(self, weeks=1, new_attributes_prefix='last_infected_', copy=True): + self.weeks=weeks + self.new_attributes_prefix = new_attributes_prefix + self.copy=copy + dq = deque([0 for _ in range(weeks)]) + self.last = {'sj': dq.copy(), 'iq': dq.copy()} + + def fit(self, X, y): + self.y = y + return self + + def transform(self, X, model=None): + if self.copy: + X = X.copy() + + r = np.ndarray(shape=[X.shape[0], self.weeks]) + + for idx, n_infected in enumerate(self.y): + city = X.loc[idx, 'city'] + r[idx] = self.last[city] + self.last[city].pop() + self.last[city].appendleft(n_infected) + + r = pd.DataFrame(r, columns=[self.new_attributes_prefix + str(week) for week in range(self.weeks)]) + + X = pd.concat([X, r], axis=1) + + return X \ No newline at end of file diff --git a/utils/LastWeeks.py b/utils/LastWeeks.py index 1129f6f..abe0379 100644 --- a/utils/LastWeeks.py +++ b/utils/LastWeeks.py @@ -14,7 +14,7 @@ def __init__(self, attributes, weeks=2, new_attributes_prefix='last_weeks_', cop def fit(self, X, y=None): attr_medians = [np.nanmedian(X[attr]) for attr in self.attributes] dq = deque([attr_medians for _ in range(self.weeks)]) - self.last = {'sj': dq, 'iq': dq} + self.last = {'sj': dq.copy(), 'iq': dq.copy()} return self From 9e0bfee88afac303073e9cc11312d0b640722c17 Mon Sep 17 00:00:00 2001 From: MLobo1997 Date: Wed, 17 Apr 2019 13:52:08 +0100 Subject: [PATCH 07/24] found 8.49 model with adaboost --- models.ipynb | 1735 +++----------------------------------------------- 1 file changed, 100 insertions(+), 1635 deletions(-) diff --git a/models.ipynb b/models.ipynb index 0f3dc6c..f88b08d 100644 --- a/models.ipynb +++ b/models.ipynb @@ -112,7 +112,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 68, "metadata": {}, "outputs": [], "source": [ @@ -123,11 +123,13 @@ "from utils.ContinuityImputer import ContinuityImputer\n", "from utils.DataFrameDropper import DataFrameDropper\n", "from utils.LastWeeks import LastWeeks\n", + "from utils.LastInfected import LastInfected\n", "lw = LastWeeks(attributes=['ndvi_ne', 'precipitation_amt_mm', 'reanalysis_relative_humidity_percent'], weeks=3)\n", "\n", "pipeline = Pipeline([\n", " ('imputer', ContinuityImputer(attributes=attr[4:])),\n", " ('lw', LastWeeks(attributes=attr[4:], weeks=3)),\n", + " ('lf', LastInfected(weeks=3)),\n", " ('dataframe_dropper', DataFrameDropper(attribute_names=attr[:4])),\n", " ('scaler', StandardScaler()),\n", " #('pca', PCA(n_components=0.95))\n", @@ -136,22 +138,22 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 69, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "(1451, 80)" + "(1451, 83)" ] }, - "execution_count": 21, + "execution_count": 69, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "X_train = pipeline.fit_transform(X_train_1)\n", + "X_train = pipeline.fit_transform(X_train_1, y_train)\n", "X_train.shape" ] }, @@ -272,12 +274,14 @@ "* 18.38 - Without PCA and with previous weeks. Clearly the previous weeks are useful\n", "* 17.87 - Without PCA and with 3 previous weeks\n", "* 17.86 - Without PCA and with 4 previous weeks\n", - "* 18.28 - Withou PCA 0.95 and 3 previous weeks fixed" + "* 18.28 - With PCA 0.95 and 3 previous weeks fixed\n", + "* 9.16 - Without PCA, with 3 weeks and 1 last infection (max_depth=5, min_samples_leaf=0.03, min_samples_split=0.108)\n", + "* **9.04** - Without PCA, with 3 weeks and 1 last infection (max_depth=5, min_samples_leaf=0.03, min_samples_split=0.108)" ] }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 70, "metadata": {}, "outputs": [], "source": [ @@ -289,7 +293,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 71, "metadata": {}, "outputs": [ { @@ -304,20 +308,20 @@ "output_type": "stream", "text": [ "[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.\n", - "[Parallel(n_jobs=-1)]: Done 42 tasks | elapsed: 3.3s\n", - "[Parallel(n_jobs=-1)]: Done 192 tasks | elapsed: 12.8s\n", - "[Parallel(n_jobs=-1)]: Done 442 tasks | elapsed: 29.8s\n", - "[Parallel(n_jobs=-1)]: Done 792 tasks | elapsed: 56.1s\n", - "[Parallel(n_jobs=-1)]: Done 1000 out of 1000 | elapsed: 1.2min finished\n" + "[Parallel(n_jobs=-1)]: Done 42 tasks | elapsed: 8.0s\n", + "[Parallel(n_jobs=-1)]: Done 192 tasks | elapsed: 32.2s\n", + "[Parallel(n_jobs=-1)]: Done 442 tasks | elapsed: 1.3min\n", + "[Parallel(n_jobs=-1)]: Done 792 tasks | elapsed: 2.3min\n", + "[Parallel(n_jobs=-1)]: Done 1000 out of 1000 | elapsed: 2.9min finished\n" ] }, { "data": { "text/plain": [ - "-18.274293590627153" + "-9.041006202618883" ] }, - "execution_count": 17, + "execution_count": 71, "metadata": {}, "output_type": "execute_result" } @@ -330,21 +334,21 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 72, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "DecisionTreeRegressor(criterion='mae', max_depth=2, max_features=None,\n", + "DecisionTreeRegressor(criterion='mae', max_depth=5, max_features=None,\n", " max_leaf_nodes=None, min_impurity_decrease=0.0,\n", - " min_impurity_split=None, min_samples_leaf=0.2320229706454773,\n", - " min_samples_split=0.24824690804416838,\n", + " min_impurity_split=None, min_samples_leaf=0.02801756293749627,\n", + " min_samples_split=0.107526262482814,\n", " min_weight_fraction_leaf=0.0, presort=False, random_state=None,\n", " splitter='best')" ] }, - "execution_count": 18, + "execution_count": 72, "metadata": {}, "output_type": "execute_result" } @@ -359,32 +363,34 @@ "source": [ "## Random Forests\n", "* 18.34 With 4 previous weeks and without PCA\n", - "* **17.79** With fixed 3 previous weeks and PCA at 0.95 (n_estimators= ?, max_depth = 2, min_samples_leaf=0.112, min_samples_split=0.224)\n", - "* **17.79** With fixed 3 previous weeks and without PCA (n_estimators= ?, max_depth = 5, min_samples_leaf=0.07, min_samples_split=0.27)" + "* 17.79 With fixed 3 previous weeks and PCA at 0.95 (n_estimators= ?, max_depth = 2, min_samples_leaf=0.112, min_samples_split=0.224)\n", + "* 17.74 With fixed 3 previous weeks and without PCA (n_estimators= 13 max_depth = 5, min_samples_leaf=0.09, min_samples_split=0.24)\n", + "* **9.13** with 3 previous weeks and 1 last infected (n_estimators=9 max_depth = 9, min_samples_leaf=0.014, min_samples_split=0.07)\n", + "* 9.22 with 3 previous weeks and 3 last infected (n_estimators=9 max_depth = 9, min_samples_leaf=0.014, min_samples_split=0.08)" ] }, { "cell_type": "code", - "execution_count": 48, + "execution_count": 73, "metadata": {}, "outputs": [], "source": [ "k_folds=10\n", - "n_iter_search = 30\n", + "n_iter_search = 40\n", "min_samples = sp_uniform(0.01, 0.35)\n", "params = {'n_estimators': sp_randint(2,30), 'criterion':['mae'], 'max_depth': sp_randint(2, 10), 'min_samples_split': min_samples, 'min_samples_leaf': min_samples}" ] }, { "cell_type": "code", - "execution_count": 49, + "execution_count": 74, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Fitting 10 folds for each of 30 candidates, totalling 300 fits\n" + "Fitting 10 folds for each of 40 candidates, totalling 400 fits\n" ] }, { @@ -392,18 +398,18 @@ "output_type": "stream", "text": [ "[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.\n", - "[Parallel(n_jobs=-1)]: Done 42 tasks | elapsed: 23.8s\n", - "[Parallel(n_jobs=-1)]: Done 192 tasks | elapsed: 2.1min\n", - "[Parallel(n_jobs=-1)]: Done 300 out of 300 | elapsed: 3.1min finished\n" + "[Parallel(n_jobs=-1)]: Done 42 tasks | elapsed: 24.7s\n", + "[Parallel(n_jobs=-1)]: Done 192 tasks | elapsed: 2.4min\n", + "[Parallel(n_jobs=-1)]: Done 400 out of 400 | elapsed: 4.9min finished\n" ] }, { "data": { "text/plain": [ - "-17.740020145257915" + "-9.22168619342982" ] }, - "execution_count": 49, + "execution_count": 74, "metadata": {}, "output_type": "execute_result" } @@ -416,22 +422,22 @@ }, { "cell_type": "code", - "execution_count": 50, + "execution_count": 75, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "RandomForestRegressor(bootstrap=True, criterion='mae', max_depth=5,\n", + "RandomForestRegressor(bootstrap=True, criterion='mae', max_depth=9,\n", " max_features='auto', max_leaf_nodes=None,\n", " min_impurity_decrease=0.0, min_impurity_split=None,\n", - " min_samples_leaf=0.09435891310910409,\n", - " min_samples_split=0.24914223158891036,\n", - " min_weight_fraction_leaf=0.0, n_estimators=13, n_jobs=-1,\n", + " min_samples_leaf=0.014927937950279559,\n", + " min_samples_split=0.0795948414310818,\n", + " min_weight_fraction_leaf=0.0, n_estimators=9, n_jobs=-1,\n", " oob_score=False, random_state=None, verbose=0, warm_start=False)" ] }, - "execution_count": 50, + "execution_count": 75, "metadata": {}, "output_type": "execute_result" } @@ -440,6 +446,63 @@ "Forest_optimizer.best_estimator_" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Adaboost of Trees\n", + "* 10.78 - With 3 last weeks a 3 last infected \n", + "* **8.49** - With 3 last weeks a 3 last infected and only max_depth tuned." + ] + }, + { + "cell_type": "code", + "execution_count": 97, + "metadata": {}, + "outputs": [], + "source": [ + "k_folds=10\n", + "n_iter_search = 10\n", + "params = {'base_estimator__criterion':['mae'], 'base_estimator__max_depth': sp_randint(2,7)}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "AdaTree_optimizer = RandomizedSearchCV(estimator=AdaBoostRegressor(base_estimator=DecisionTreeRegressor()), param_distributions=params, n_iter=n_iter_search, scoring=score_metric, n_jobs=jobs, cv=k_folds, verbose=verbose_level, random_state=random_n, return_train_score=True, iid=True)\n", + "AdaTree_optimizer.fit(X_train, y_train)\n", + "AdaTree_optimizer.best_score_" + ] + }, + { + "cell_type": "code", + "execution_count": 92, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "AdaBoostRegressor(base_estimator=DecisionTreeRegressor(criterion='mae', max_depth=4, max_features=None,\n", + " max_leaf_nodes=None, min_impurity_decrease=0.0,\n", + " min_impurity_split=None, min_samples_leaf=1,\n", + " min_samples_split=2, min_weight_fraction_leaf=0.0,\n", + " presort=False, random_state=None, splitter='best'),\n", + " learning_rate=1.0, loss='linear', n_estimators=50,\n", + " random_state=None)" + ] + }, + "execution_count": 92, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "AdaTree_optimizer.best_estimator_" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -500,1604 +563,6 @@ "KNN_optimizer.fit(X_train, y_train)\n", "KNN_optimizer.best_score_" ] - }, - { - "cell_type": "code", - "execution_count": 37, - "metadata": {}, - "outputs": [], - "source": [ - "%autoreload\n", - "from utils.LastInfected import LastInfected\n", - "tmp = pd.concat([LastInfected(weeks=2).fit_transform(X_train_1, y=y_train), y_train], axis=1)" - ] - }, - { - "cell_type": "code", - "execution_count": 58, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
cityyearweekofyearweek_start_datendvi_nendvi_nwndvi_sendvi_swprecipitation_amt_mmreanalysis_air_temp_k...reanalysis_specific_humidity_g_per_kgreanalysis_tdtr_kstation_avg_temp_cstation_diur_temp_rng_cstation_max_temp_cstation_min_temp_cstation_precip_mmlast_infected_0last_infected_1total_cases
900sj2007372007-09-10NaN0.0569000.2385430.18748610.37301.117143...17.7200003.15714328.8714296.51428633.925.010.471.092.0112
901sj2007382007-09-17-0.0134500.0749000.1525710.13192970.39301.217143...18.0371432.81428628.3000006.28571432.824.426.9112.071.0106
902sj2007392007-09-24-0.030700-0.0029400.1527290.14462994.37301.052857...17.9814293.58571428.1714296.02857132.224.421.3106.0112.0101
903sj2007402007-10-010.0960000.0247670.1853000.11772974.50301.022857...18.1185712.68571427.9857147.24285732.822.286.6101.0106.0170
904sj2007412007-10-080.0090000.1040000.1181290.126343108.26300.790000...18.3757143.00000028.1285716.91428633.323.914.5170.0101.0135
905sj2007422007-10-150.0210000.1326670.2459430.18975717.56301.492857...17.8457143.18571429.1000007.54285733.924.410.2135.0170.0106
906sj2007432007-10-22NaN-0.0091500.1911860.17640016.48301.007143...17.2757142.47142927.9571436.44285732.224.48.6106.0135.068
907sj2007442007-10-290.1243000.0543000.1568140.123529137.55299.458571...17.5028572.60000026.2000005.40000030.622.289.268.0106.048
908sj2007452007-11-05-0.251700-0.0486000.2051710.17288315.25300.604286...17.2957142.25714327.4428576.85714332.222.84.148.068.048
909sj2007462007-11-12-0.058900-0.0625500.2057430.20254342.00299.934286...17.0828573.54285726.8142866.68571431.122.865.748.048.026
910sj2007472007-11-19NaNNaN0.2044860.15628673.37299.821429...17.1871432.51428626.9000006.20000031.122.840.426.048.033
911sj2007482007-11-26-0.059500-0.0416670.0909170.12908615.95299.090000...14.7685712.07142925.4428575.38571428.922.236.433.026.029
912sj2007492007-12-03NaN-0.0423500.0956000.08900017.85299.020000...15.6757142.10000025.8428575.40000029.422.834.529.033.017
913sj2007502007-12-10-0.133050-0.0455500.1514400.14317131.30298.900000...16.1300002.48571425.7714295.08571428.922.230.217.029.012
914sj2007512007-12-17NaN-0.0390000.1734170.15017162.11298.668571...16.3442862.37142925.0714294.91428628.921.7108.212.017.013
915sj2007522007-12-240.0148000.0163000.2072670.1445780.00298.602857...15.3185712.98571425.0857146.24285728.321.116.813.012.017
916sj200812008-01-010.000600-0.3096000.2398140.1955570.00298.038571...14.9114291.84285725.4000005.30000029.422.255.517.013.015
917sj200822008-01-08NaN-0.1082500.3304860.24428637.24298.142857...14.9800002.05714324.9714295.01428628.321.164.815.017.014
918sj200832008-01-15NaN0.0383000.1250000.1088430.00297.627143...14.4885713.00000024.4285715.62857127.820.62.514.015.015
919sj200842008-01-22-0.026800-0.2153000.1126140.16021481.22297.968571...15.0657142.00000024.5285714.58571427.821.183.115.014.010
920sj200852008-01-29NaN-0.1354000.2233000.1709430.00298.021429...14.4085713.30000024.5714296.44285728.920.03.110.015.09
921sj200862008-02-05-0.111700-0.0032000.2328430.2711710.00297.237143...13.2257142.07142924.2142865.15714327.221.135.99.010.02
922sj200872008-02-120.072000-0.0631000.1502000.1492710.00297.838571...14.3671432.15714324.8000006.24285728.321.16.42.09.06
923sj200882008-02-19-0.138650-0.0950670.2460570.2281290.00297.907143...14.5385711.88571424.9000005.78571428.321.713.36.02.08
924sj200892008-02-26NaN0.0154200.2116290.1173430.00297.765714...13.9671432.28571424.7428575.50000027.821.112.98.06.05
925sj2008102008-03-04-0.088900-0.0900330.2232430.1541860.00297.878571...14.4742862.61428625.1142866.11428629.421.113.05.08.01
926sj2008112008-03-11-0.321400-0.1412000.1106430.1410140.00297.595714...13.7214292.08571425.3285715.81428628.922.24.41.05.02
927sj2008122008-03-180.0449000.0244500.1016290.0880000.00297.404286...13.7371433.87142925.2000007.04285730.020.60.52.01.03
928sj2008132008-03-250.077850-0.0399000.3104710.29624327.19296.958571...13.6442862.88571425.0428575.78571430.021.11.83.02.04
929sj2008142008-04-01-0.038000-0.0168330.1193710.0663863.82298.081429...14.6628572.71428626.2428576.81428630.622.20.54.03.03
930sj2008152008-04-08-0.155200-0.0527500.1377570.14121416.96297.460000...14.1842862.18571425.0000005.71428629.421.730.73.04.01
931sj2008162008-04-150.001800NaN0.2039000.2098430.00297.630000...13.8585712.78571425.3142866.24285729.421.711.21.03.03
932sj2008172008-04-22-0.037000-0.0103670.0773140.0905860.00298.672857...15.6714293.95714327.0428577.51428631.723.30.33.01.05
933iq2000262000-07-010.1928860.1322570.3408860.24720025.41296.740000...16.6514298.92857126.40000010.77500032.520.73.00.00.00
934iq2000272000-07-080.2168330.2761000.2894570.24165760.61296.634286...16.86285710.31428626.90000011.56666734.020.855.60.00.00
935iq2000282000-07-150.1767570.1731290.2041140.12801455.52296.415714...17.1200007.38571426.80000011.46666733.020.738.10.00.00
936iq2000292000-07-220.2277290.1454290.2542000.2003145.60295.357143...14.4314299.11428625.76666710.53333331.514.730.00.00.00
937iq2000302000-07-290.3286430.3221290.2543710.36104362.76296.432857...15.4442869.50000026.60000011.48000033.319.14.00.00.00
938iq2000312000-08-050.2055290.1907570.2316710.25531416.24297.191429...13.42142913.77142925.34000010.94000032.017.011.50.00.00
939iq2000322000-08-120.3124860.3299860.3805860.38727189.37297.320000...15.31142911.47142927.01666711.65000034.019.972.90.00.00
940iq2000332000-08-190.3841330.3922400.3417800.38275042.08297.627143...15.46571413.70000026.58333310.31666733.020.550.10.00.00
941iq2000342000-08-260.4081570.3221570.4067140.30271449.22298.238571...14.44428613.77142926.90000013.40000034.019.089.20.00.00
942iq2000352000-09-020.3320430.3210570.3146140.32425753.65299.218571...15.05714312.45714327.11666712.26666734.020.078.00.00.00
943iq2000362000-09-090.2955860.2956830.3122140.26592923.12300.802857...12.65285714.90000028.36666712.90000035.821.756.90.00.01
944iq2000372000-09-160.2846570.3097570.3878830.32815734.62299.858571...15.22714313.85714327.42500012.77500034.520.518.91.00.00
945iq2000382000-09-230.3488140.2957170.4048430.24257197.55297.435714...14.33857111.31428627.53333312.56666736.020.5104.20.01.00
946iq2000392000-09-300.1756860.0994830.2257140.18278695.89299.355714...14.79857114.94285727.15000012.17500034.020.557.90.00.00
947iq2000402000-10-070.3375400.2769430.2775000.25505046.22298.372857...16.14857111.97142926.70000011.67500034.020.063.00.00.00
948iq2000412000-10-140.2235330.1339140.3498000.10091731.10298.474286...16.07142913.48571427.65714311.30000034.021.03.00.00.01
949iq2000422000-10-210.2748000.1870570.3739430.27947125.21299.211429...15.52857114.92857127.77500012.27500036.021.045.21.00.01
\n", - "

50 rows × 27 columns

\n", - "
" - ], - "text/plain": [ - " city year weekofyear week_start_date ndvi_ne ndvi_nw ndvi_se \\\n", - "900 sj 2007 37 2007-09-10 NaN 0.056900 0.238543 \n", - "901 sj 2007 38 2007-09-17 -0.013450 0.074900 0.152571 \n", - "902 sj 2007 39 2007-09-24 -0.030700 -0.002940 0.152729 \n", - "903 sj 2007 40 2007-10-01 0.096000 0.024767 0.185300 \n", - "904 sj 2007 41 2007-10-08 0.009000 0.104000 0.118129 \n", - "905 sj 2007 42 2007-10-15 0.021000 0.132667 0.245943 \n", - "906 sj 2007 43 2007-10-22 NaN -0.009150 0.191186 \n", - "907 sj 2007 44 2007-10-29 0.124300 0.054300 0.156814 \n", - "908 sj 2007 45 2007-11-05 -0.251700 -0.048600 0.205171 \n", - "909 sj 2007 46 2007-11-12 -0.058900 -0.062550 0.205743 \n", - "910 sj 2007 47 2007-11-19 NaN NaN 0.204486 \n", - "911 sj 2007 48 2007-11-26 -0.059500 -0.041667 0.090917 \n", - "912 sj 2007 49 2007-12-03 NaN -0.042350 0.095600 \n", - "913 sj 2007 50 2007-12-10 -0.133050 -0.045550 0.151440 \n", - "914 sj 2007 51 2007-12-17 NaN -0.039000 0.173417 \n", - "915 sj 2007 52 2007-12-24 0.014800 0.016300 0.207267 \n", - "916 sj 2008 1 2008-01-01 0.000600 -0.309600 0.239814 \n", - "917 sj 2008 2 2008-01-08 NaN -0.108250 0.330486 \n", - "918 sj 2008 3 2008-01-15 NaN 0.038300 0.125000 \n", - "919 sj 2008 4 2008-01-22 -0.026800 -0.215300 0.112614 \n", - "920 sj 2008 5 2008-01-29 NaN -0.135400 0.223300 \n", - "921 sj 2008 6 2008-02-05 -0.111700 -0.003200 0.232843 \n", - "922 sj 2008 7 2008-02-12 0.072000 -0.063100 0.150200 \n", - "923 sj 2008 8 2008-02-19 -0.138650 -0.095067 0.246057 \n", - "924 sj 2008 9 2008-02-26 NaN 0.015420 0.211629 \n", - "925 sj 2008 10 2008-03-04 -0.088900 -0.090033 0.223243 \n", - "926 sj 2008 11 2008-03-11 -0.321400 -0.141200 0.110643 \n", - "927 sj 2008 12 2008-03-18 0.044900 0.024450 0.101629 \n", - "928 sj 2008 13 2008-03-25 0.077850 -0.039900 0.310471 \n", - "929 sj 2008 14 2008-04-01 -0.038000 -0.016833 0.119371 \n", - "930 sj 2008 15 2008-04-08 -0.155200 -0.052750 0.137757 \n", - "931 sj 2008 16 2008-04-15 0.001800 NaN 0.203900 \n", - "932 sj 2008 17 2008-04-22 -0.037000 -0.010367 0.077314 \n", - "933 iq 2000 26 2000-07-01 0.192886 0.132257 0.340886 \n", - "934 iq 2000 27 2000-07-08 0.216833 0.276100 0.289457 \n", - "935 iq 2000 28 2000-07-15 0.176757 0.173129 0.204114 \n", - "936 iq 2000 29 2000-07-22 0.227729 0.145429 0.254200 \n", - "937 iq 2000 30 2000-07-29 0.328643 0.322129 0.254371 \n", - "938 iq 2000 31 2000-08-05 0.205529 0.190757 0.231671 \n", - "939 iq 2000 32 2000-08-12 0.312486 0.329986 0.380586 \n", - "940 iq 2000 33 2000-08-19 0.384133 0.392240 0.341780 \n", - "941 iq 2000 34 2000-08-26 0.408157 0.322157 0.406714 \n", - "942 iq 2000 35 2000-09-02 0.332043 0.321057 0.314614 \n", - "943 iq 2000 36 2000-09-09 0.295586 0.295683 0.312214 \n", - "944 iq 2000 37 2000-09-16 0.284657 0.309757 0.387883 \n", - "945 iq 2000 38 2000-09-23 0.348814 0.295717 0.404843 \n", - "946 iq 2000 39 2000-09-30 0.175686 0.099483 0.225714 \n", - "947 iq 2000 40 2000-10-07 0.337540 0.276943 0.277500 \n", - "948 iq 2000 41 2000-10-14 0.223533 0.133914 0.349800 \n", - "949 iq 2000 42 2000-10-21 0.274800 0.187057 0.373943 \n", - "\n", - " ndvi_sw precipitation_amt_mm reanalysis_air_temp_k ... \\\n", - "900 0.187486 10.37 301.117143 ... \n", - "901 0.131929 70.39 301.217143 ... \n", - "902 0.144629 94.37 301.052857 ... \n", - "903 0.117729 74.50 301.022857 ... \n", - "904 0.126343 108.26 300.790000 ... \n", - "905 0.189757 17.56 301.492857 ... \n", - "906 0.176400 16.48 301.007143 ... \n", - "907 0.123529 137.55 299.458571 ... \n", - "908 0.172883 15.25 300.604286 ... \n", - "909 0.202543 42.00 299.934286 ... \n", - "910 0.156286 73.37 299.821429 ... \n", - "911 0.129086 15.95 299.090000 ... \n", - "912 0.089000 17.85 299.020000 ... \n", - "913 0.143171 31.30 298.900000 ... \n", - "914 0.150171 62.11 298.668571 ... \n", - "915 0.144578 0.00 298.602857 ... \n", - "916 0.195557 0.00 298.038571 ... \n", - "917 0.244286 37.24 298.142857 ... \n", - "918 0.108843 0.00 297.627143 ... \n", - "919 0.160214 81.22 297.968571 ... \n", - "920 0.170943 0.00 298.021429 ... \n", - "921 0.271171 0.00 297.237143 ... \n", - "922 0.149271 0.00 297.838571 ... \n", - "923 0.228129 0.00 297.907143 ... \n", - "924 0.117343 0.00 297.765714 ... \n", - "925 0.154186 0.00 297.878571 ... \n", - "926 0.141014 0.00 297.595714 ... \n", - "927 0.088000 0.00 297.404286 ... \n", - "928 0.296243 27.19 296.958571 ... \n", - "929 0.066386 3.82 298.081429 ... \n", - "930 0.141214 16.96 297.460000 ... \n", - "931 0.209843 0.00 297.630000 ... \n", - "932 0.090586 0.00 298.672857 ... \n", - "933 0.247200 25.41 296.740000 ... \n", - "934 0.241657 60.61 296.634286 ... \n", - "935 0.128014 55.52 296.415714 ... \n", - "936 0.200314 5.60 295.357143 ... \n", - "937 0.361043 62.76 296.432857 ... \n", - "938 0.255314 16.24 297.191429 ... \n", - "939 0.387271 89.37 297.320000 ... \n", - "940 0.382750 42.08 297.627143 ... \n", - "941 0.302714 49.22 298.238571 ... \n", - "942 0.324257 53.65 299.218571 ... \n", - "943 0.265929 23.12 300.802857 ... \n", - "944 0.328157 34.62 299.858571 ... \n", - "945 0.242571 97.55 297.435714 ... \n", - "946 0.182786 95.89 299.355714 ... \n", - "947 0.255050 46.22 298.372857 ... \n", - "948 0.100917 31.10 298.474286 ... \n", - "949 0.279471 25.21 299.211429 ... \n", - "\n", - " reanalysis_specific_humidity_g_per_kg reanalysis_tdtr_k \\\n", - "900 17.720000 3.157143 \n", - "901 18.037143 2.814286 \n", - "902 17.981429 3.585714 \n", - "903 18.118571 2.685714 \n", - "904 18.375714 3.000000 \n", - "905 17.845714 3.185714 \n", - "906 17.275714 2.471429 \n", - "907 17.502857 2.600000 \n", - "908 17.295714 2.257143 \n", - "909 17.082857 3.542857 \n", - "910 17.187143 2.514286 \n", - "911 14.768571 2.071429 \n", - "912 15.675714 2.100000 \n", - "913 16.130000 2.485714 \n", - "914 16.344286 2.371429 \n", - "915 15.318571 2.985714 \n", - "916 14.911429 1.842857 \n", - "917 14.980000 2.057143 \n", - "918 14.488571 3.000000 \n", - "919 15.065714 2.000000 \n", - "920 14.408571 3.300000 \n", - "921 13.225714 2.071429 \n", - "922 14.367143 2.157143 \n", - "923 14.538571 1.885714 \n", - "924 13.967143 2.285714 \n", - "925 14.474286 2.614286 \n", - "926 13.721429 2.085714 \n", - "927 13.737143 3.871429 \n", - "928 13.644286 2.885714 \n", - "929 14.662857 2.714286 \n", - "930 14.184286 2.185714 \n", - "931 13.858571 2.785714 \n", - "932 15.671429 3.957143 \n", - "933 16.651429 8.928571 \n", - "934 16.862857 10.314286 \n", - "935 17.120000 7.385714 \n", - "936 14.431429 9.114286 \n", - "937 15.444286 9.500000 \n", - "938 13.421429 13.771429 \n", - "939 15.311429 11.471429 \n", - "940 15.465714 13.700000 \n", - "941 14.444286 13.771429 \n", - "942 15.057143 12.457143 \n", - "943 12.652857 14.900000 \n", - "944 15.227143 13.857143 \n", - "945 14.338571 11.314286 \n", - "946 14.798571 14.942857 \n", - "947 16.148571 11.971429 \n", - "948 16.071429 13.485714 \n", - "949 15.528571 14.928571 \n", - "\n", - " station_avg_temp_c station_diur_temp_rng_c station_max_temp_c \\\n", - "900 28.871429 6.514286 33.9 \n", - "901 28.300000 6.285714 32.8 \n", - "902 28.171429 6.028571 32.2 \n", - "903 27.985714 7.242857 32.8 \n", - "904 28.128571 6.914286 33.3 \n", - "905 29.100000 7.542857 33.9 \n", - "906 27.957143 6.442857 32.2 \n", - "907 26.200000 5.400000 30.6 \n", - "908 27.442857 6.857143 32.2 \n", - "909 26.814286 6.685714 31.1 \n", - "910 26.900000 6.200000 31.1 \n", - "911 25.442857 5.385714 28.9 \n", - "912 25.842857 5.400000 29.4 \n", - "913 25.771429 5.085714 28.9 \n", - "914 25.071429 4.914286 28.9 \n", - "915 25.085714 6.242857 28.3 \n", - "916 25.400000 5.300000 29.4 \n", - "917 24.971429 5.014286 28.3 \n", - "918 24.428571 5.628571 27.8 \n", - "919 24.528571 4.585714 27.8 \n", - "920 24.571429 6.442857 28.9 \n", - "921 24.214286 5.157143 27.2 \n", - "922 24.800000 6.242857 28.3 \n", - "923 24.900000 5.785714 28.3 \n", - "924 24.742857 5.500000 27.8 \n", - "925 25.114286 6.114286 29.4 \n", - "926 25.328571 5.814286 28.9 \n", - "927 25.200000 7.042857 30.0 \n", - "928 25.042857 5.785714 30.0 \n", - "929 26.242857 6.814286 30.6 \n", - "930 25.000000 5.714286 29.4 \n", - "931 25.314286 6.242857 29.4 \n", - "932 27.042857 7.514286 31.7 \n", - "933 26.400000 10.775000 32.5 \n", - "934 26.900000 11.566667 34.0 \n", - "935 26.800000 11.466667 33.0 \n", - "936 25.766667 10.533333 31.5 \n", - "937 26.600000 11.480000 33.3 \n", - "938 25.340000 10.940000 32.0 \n", - "939 27.016667 11.650000 34.0 \n", - "940 26.583333 10.316667 33.0 \n", - "941 26.900000 13.400000 34.0 \n", - "942 27.116667 12.266667 34.0 \n", - "943 28.366667 12.900000 35.8 \n", - "944 27.425000 12.775000 34.5 \n", - "945 27.533333 12.566667 36.0 \n", - "946 27.150000 12.175000 34.0 \n", - "947 26.700000 11.675000 34.0 \n", - "948 27.657143 11.300000 34.0 \n", - "949 27.775000 12.275000 36.0 \n", - "\n", - " station_min_temp_c station_precip_mm last_infected_0 last_infected_1 \\\n", - "900 25.0 10.4 71.0 92.0 \n", - "901 24.4 26.9 112.0 71.0 \n", - "902 24.4 21.3 106.0 112.0 \n", - "903 22.2 86.6 101.0 106.0 \n", - "904 23.9 14.5 170.0 101.0 \n", - "905 24.4 10.2 135.0 170.0 \n", - "906 24.4 8.6 106.0 135.0 \n", - "907 22.2 89.2 68.0 106.0 \n", - "908 22.8 4.1 48.0 68.0 \n", - "909 22.8 65.7 48.0 48.0 \n", - "910 22.8 40.4 26.0 48.0 \n", - "911 22.2 36.4 33.0 26.0 \n", - "912 22.8 34.5 29.0 33.0 \n", - "913 22.2 30.2 17.0 29.0 \n", - "914 21.7 108.2 12.0 17.0 \n", - "915 21.1 16.8 13.0 12.0 \n", - "916 22.2 55.5 17.0 13.0 \n", - "917 21.1 64.8 15.0 17.0 \n", - "918 20.6 2.5 14.0 15.0 \n", - "919 21.1 83.1 15.0 14.0 \n", - "920 20.0 3.1 10.0 15.0 \n", - "921 21.1 35.9 9.0 10.0 \n", - "922 21.1 6.4 2.0 9.0 \n", - "923 21.7 13.3 6.0 2.0 \n", - "924 21.1 12.9 8.0 6.0 \n", - "925 21.1 13.0 5.0 8.0 \n", - "926 22.2 4.4 1.0 5.0 \n", - "927 20.6 0.5 2.0 1.0 \n", - "928 21.1 1.8 3.0 2.0 \n", - "929 22.2 0.5 4.0 3.0 \n", - "930 21.7 30.7 3.0 4.0 \n", - "931 21.7 11.2 1.0 3.0 \n", - "932 23.3 0.3 3.0 1.0 \n", - "933 20.7 3.0 0.0 0.0 \n", - "934 20.8 55.6 0.0 0.0 \n", - "935 20.7 38.1 0.0 0.0 \n", - "936 14.7 30.0 0.0 0.0 \n", - "937 19.1 4.0 0.0 0.0 \n", - "938 17.0 11.5 0.0 0.0 \n", - "939 19.9 72.9 0.0 0.0 \n", - "940 20.5 50.1 0.0 0.0 \n", - "941 19.0 89.2 0.0 0.0 \n", - "942 20.0 78.0 0.0 0.0 \n", - "943 21.7 56.9 0.0 0.0 \n", - "944 20.5 18.9 1.0 0.0 \n", - "945 20.5 104.2 0.0 1.0 \n", - "946 20.5 57.9 0.0 0.0 \n", - "947 20.0 63.0 0.0 0.0 \n", - "948 21.0 3.0 0.0 0.0 \n", - "949 21.0 45.2 1.0 0.0 \n", - "\n", - " total_cases \n", - "900 112 \n", - "901 106 \n", - "902 101 \n", - "903 170 \n", - "904 135 \n", - "905 106 \n", - "906 68 \n", - "907 48 \n", - "908 48 \n", - "909 26 \n", - "910 33 \n", - "911 29 \n", - "912 17 \n", - "913 12 \n", - "914 13 \n", - "915 17 \n", - "916 15 \n", - "917 14 \n", - "918 15 \n", - "919 10 \n", - "920 9 \n", - "921 2 \n", - "922 6 \n", - "923 8 \n", - "924 5 \n", - "925 1 \n", - "926 2 \n", - "927 3 \n", - "928 4 \n", - "929 3 \n", - "930 1 \n", - "931 3 \n", - "932 5 \n", - "933 0 \n", - "934 0 \n", - "935 0 \n", - "936 0 \n", - "937 0 \n", - "938 0 \n", - "939 0 \n", - "940 0 \n", - "941 0 \n", - "942 0 \n", - "943 1 \n", - "944 0 \n", - "945 0 \n", - "946 0 \n", - "947 0 \n", - "948 1 \n", - "949 1 \n", - "\n", - "[50 rows x 27 columns]" - ] - }, - "execution_count": 58, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "tmp[900:950]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { From ad1e138d93888f3582aa24360e30ba42f670c4d7 Mon Sep 17 00:00:00 2001 From: MLobo1997 Date: Wed, 17 Apr 2019 18:47:38 +0100 Subject: [PATCH 08/24] refactored the pipeline --- OurPipeline.py | 21 +++++++++++++++++++++ models.ipynb | 35 +++++++---------------------------- utils/__init__.py | 0 3 files changed, 28 insertions(+), 28 deletions(-) create mode 100644 OurPipeline.py create mode 100644 utils/__init__.py diff --git a/OurPipeline.py b/OurPipeline.py new file mode 100644 index 0000000..08454be --- /dev/null +++ b/OurPipeline.py @@ -0,0 +1,21 @@ +from sklearn.pipeline import Pipeline +from sklearn.preprocessing import StandardScaler +from sklearn.decomposition import PCA +from utils.ContinuityImputer import ContinuityImputer +from utils.DataFrameDropper import DataFrameDropper +from utils.LastWeeks import LastWeeks +from utils.LastInfected import LastInfected + +def create_pipeline(attr, n_weeks, pca_n_components=None, n_non_train=4): + pipelist = [ + ('imputer', ContinuityImputer(attributes=attr[n_non_train:])), + ('lw', LastWeeks(attributes=attr[n_non_train:], weeks=n_weeks)), + ('lf', LastInfected(weeks=n_weeks)), + ('dataframe_dropper', DataFrameDropper(attribute_names=attr[:n_non_train])), + ('scaler', StandardScaler()), + ] + + if pca_n_components is not None: + pipelist.append(('pca', PCA(n_components=pca_n_components))) + + return Pipeline(pipelist) \ No newline at end of file diff --git a/models.ipynb b/models.ipynb index f88b08d..f26a577 100644 --- a/models.ipynb +++ b/models.ipynb @@ -112,33 +112,7 @@ }, { "cell_type": "code", - "execution_count": 68, - "metadata": {}, - "outputs": [], - "source": [ - "%autoreload\n", - "from sklearn.pipeline import Pipeline\n", - "from sklearn.preprocessing import StandardScaler\n", - "from sklearn.decomposition import PCA\n", - "from utils.ContinuityImputer import ContinuityImputer\n", - "from utils.DataFrameDropper import DataFrameDropper\n", - "from utils.LastWeeks import LastWeeks\n", - "from utils.LastInfected import LastInfected\n", - "lw = LastWeeks(attributes=['ndvi_ne', 'precipitation_amt_mm', 'reanalysis_relative_humidity_percent'], weeks=3)\n", - "\n", - "pipeline = Pipeline([\n", - " ('imputer', ContinuityImputer(attributes=attr[4:])),\n", - " ('lw', LastWeeks(attributes=attr[4:], weeks=3)),\n", - " ('lf', LastInfected(weeks=3)),\n", - " ('dataframe_dropper', DataFrameDropper(attribute_names=attr[:4])),\n", - " ('scaler', StandardScaler()),\n", - " #('pca', PCA(n_components=0.95))\n", - "])" - ] - }, - { - "cell_type": "code", - "execution_count": 69, + "execution_count": 6, "metadata": {}, "outputs": [ { @@ -147,12 +121,17 @@ "(1451, 83)" ] }, - "execution_count": 69, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ + "%autoreload\n", + "from OurPipeline import create_pipeline\n", + "\n", + "pipeline = create_pipeline(attr, n_weeks=3, pca_n_components=None)\n", + "\n", "X_train = pipeline.fit_transform(X_train_1, y_train)\n", "X_train.shape" ] diff --git a/utils/__init__.py b/utils/__init__.py new file mode 100644 index 0000000..e69de29 From dfa75a029613ebc28b776ffcc14a6e0ca8aaaf12 Mon Sep 17 00:00:00 2001 From: MLobo1997 Date: Wed, 17 Apr 2019 19:28:43 +0100 Subject: [PATCH 09/24] modeled pca --- analysis.ipynb | 103 ++++++++++++++++++++++++------------------------- models.ipynb | 4 +- 2 files changed, 53 insertions(+), 54 deletions(-) diff --git a/analysis.ipynb b/analysis.ipynb index 587a6db..76e407c 100644 --- a/analysis.ipynb +++ b/analysis.ipynb @@ -598,11 +598,12 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 56, "metadata": {}, "outputs": [], "source": [ - "train_data = train_data[train_data['weekofyear'] != 53]" + "train_data = train_data[train_data['weekofyear'] != 53]\n", + "train_data.reset_index(drop=True, inplace=True)" ] }, { @@ -1688,7 +1689,7 @@ }, { "cell_type": "code", - "execution_count": 48, + "execution_count": 13, "metadata": {}, "outputs": [ { @@ -1720,7 +1721,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 14, "metadata": {}, "outputs": [ { @@ -1744,7 +1745,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 15, "metadata": {}, "outputs": [ { @@ -1775,7 +1776,7 @@ "Name: total_cases, dtype: float64" ] }, - "execution_count": 16, + "execution_count": 15, "metadata": {}, "output_type": "execute_result" } @@ -1797,7 +1798,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 16, "metadata": {}, "outputs": [ { @@ -1806,7 +1807,7 @@ "(1451, 22)" ] }, - "execution_count": 17, + "execution_count": 16, "metadata": {}, "output_type": "execute_result" } @@ -1817,7 +1818,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 17, "metadata": {}, "outputs": [ { @@ -1845,7 +1846,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 18, "metadata": {}, "outputs": [ { @@ -1879,7 +1880,7 @@ "dtype: float64" ] }, - "execution_count": 19, + "execution_count": 18, "metadata": {}, "output_type": "execute_result" } @@ -1895,7 +1896,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 19, "metadata": {}, "outputs": [ { @@ -1935,7 +1936,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 20, "metadata": {}, "outputs": [ { @@ -2252,7 +2253,7 @@ "max 2.228153e+00 1.063787e+01 " ] }, - "execution_count": 21, + "execution_count": 20, "metadata": {}, "output_type": "execute_result" } @@ -2284,7 +2285,7 @@ }, { "cell_type": "code", - "execution_count": 40, + "execution_count": 21, "metadata": {}, "outputs": [], "source": [ @@ -2294,7 +2295,7 @@ }, { "cell_type": "code", - "execution_count": 41, + "execution_count": 22, "metadata": {}, "outputs": [ { @@ -2303,7 +2304,7 @@ "2" ] }, - "execution_count": 41, + "execution_count": 22, "metadata": {}, "output_type": "execute_result" } @@ -2326,7 +2327,7 @@ }, { "cell_type": "code", - "execution_count": 42, + "execution_count": 23, "metadata": {}, "outputs": [ { @@ -2361,7 +2362,7 @@ }, { "cell_type": "code", - "execution_count": 43, + "execution_count": 24, "metadata": {}, "outputs": [ { @@ -2532,7 +2533,7 @@ }, { "cell_type": "code", - "execution_count": 47, + "execution_count": 27, "metadata": {}, "outputs": [ { @@ -2541,7 +2542,7 @@ "('2000-07-01', '2010-06-25')" ] }, - "execution_count": 47, + "execution_count": 27, "metadata": {}, "output_type": "execute_result" } @@ -2552,7 +2553,7 @@ }, { "cell_type": "code", - "execution_count": 50, + "execution_count": 28, "metadata": {}, "outputs": [ { @@ -2561,7 +2562,7 @@ "('2010-07-02', '2013-06-25')" ] }, - "execution_count": 50, + "execution_count": 28, "metadata": {}, "output_type": "execute_result" } @@ -2572,7 +2573,7 @@ }, { "cell_type": "code", - "execution_count": 48, + "execution_count": 29, "metadata": {}, "outputs": [ { @@ -2581,7 +2582,7 @@ "('1990-04-30', '2008-04-22')" ] }, - "execution_count": 48, + "execution_count": 29, "metadata": {}, "output_type": "execute_result" } @@ -2592,7 +2593,7 @@ }, { "cell_type": "code", - "execution_count": 52, + "execution_count": 30, "metadata": {}, "outputs": [ { @@ -2601,7 +2602,7 @@ "('2008-04-29', '2013-04-23')" ] }, - "execution_count": 52, + "execution_count": 30, "metadata": {}, "output_type": "execute_result" } @@ -2612,7 +2613,7 @@ }, { "cell_type": "code", - "execution_count": 28, + "execution_count": 31, "metadata": {}, "outputs": [ { @@ -2644,7 +2645,7 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": 32, "metadata": {}, "outputs": [ { @@ -2691,7 +2692,7 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": 33, "metadata": {}, "outputs": [], "source": [ @@ -2707,7 +2708,7 @@ }, { "cell_type": "code", - "execution_count": 31, + "execution_count": 34, "metadata": {}, "outputs": [ { @@ -2754,7 +2755,7 @@ }, { "cell_type": "code", - "execution_count": 32, + "execution_count": 35, "metadata": {}, "outputs": [], "source": [ @@ -2763,7 +2764,7 @@ }, { "cell_type": "code", - "execution_count": 33, + "execution_count": 36, "metadata": {}, "outputs": [ { @@ -2811,37 +2812,41 @@ }, { "cell_type": "code", - "execution_count": 34, + "execution_count": 57, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "PCA(copy=True, iterated_power='auto', n_components=None, random_state=None,\n", - " svd_solver='auto', tol=0.0, whiten=False)" + "(1451, 65)" ] }, - "execution_count": 34, + "execution_count": 57, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "pca = PCA()\n", - "pca.fit(X_train)\n", - "pca" + "%autoreload\n", + "from OurPipeline import create_pipeline\n", + "\n", + "attr=list(train_data)[:-1]\n", + "pipeline = create_pipeline(attr, n_weeks=3, pca_n_components=0.999)\n", + "\n", + "X_train = pipeline.fit_transform(train_data.iloc[:,:-1].copy(), train_data.iloc[:,-1].copy())\n", + "X_train.shape" ] }, { "cell_type": "code", - "execution_count": 35, + "execution_count": 63, "metadata": {}, "outputs": [ { "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAA34AAAE/CAYAAAAZshH0AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAIABJREFUeJzs3X2czXX+//HH2xhmmBG5ylXGRhKGmEJXtNZFpYmimq2trBStpbaWrotq6cJX6GK7kESNKDJFmshVWxJF9SNr29qQcpGLYRgz5vX743NmzDA4w5xzPjOe99vt3M45n8/nvD/Pz1DOa97vz/vtzAwREREREREpu8pFOoCIiIiIiIiElgo/ERERERGRMk6Fn4iIiIiISBmnwk9ERERERKSMU+EnIiIiIiJSxqnwExERERERKeNU+ImISKnjnHvNOfdYkMd+4Jy7KQQZEpxz5pwrX9JtF3Gui5xza0N9HhERKbtC/o+ViIicvJxzPwK1gQMFNr9mZoPClcHMLg3XuULFzJYATSOdQ0RESi8VfiIiEmpXmNm8SIcorZxz5c0sJ9I5RESkdNNQTxERiQjn3AvOuXcKvH/COTffeTo55zY45+5zzm11zv3onLv+CO1Uc86975zb4pzbHnhdv8D+hc65WwKvb3bOfeKcezpw7A/OuUsLHHuKc26Cc26Tc26jc+4x51xUYF9U4HNbnXP/BS4/yrUNc869fci2sc65cYHXfZ1za5xzGc65/zrnbitwXN61D3PO/QJMzNtW4Jh7nHPfBz6/2jnXq8C+Y13jqc65ic65nwP73y2wr4dzbqVzbodz7lPnXOKRrlFEREoXFX4iIhIpdwEtA4XKRUA/4CYzs8D+04AaQD3gJuAl51xRwx3LAROBhsDpwF7g2aOctx2wNtD2k8AE55wL7HsNyAEaA+cAXYFbAvv6Az0C25OA3kc5x1TgMudcPHhFI3AN8GZg/+ZAW1WAvsAY51ybAp8/DTg1cE23FtH+98BFwCnAcGCKc65OkNc4GagENAdqAWMCGc8BXgVuA6oDLwJpzrmKR7lOEREpJVT4iYhIqL0b6EHKe/QHMLNM4E/A/wFTgL+a2YZDPvugmWWZ2SJgNl7xVIiZbTOzd8ws08wygMeBjkfJ8z8ze9nMDgCTgDpAbedcbeAy4A4z22Nmm/GKousCn7sGeMbM1pvZb8DII53AzP4HfAnk9cT9Hsg0s6WB/bPN7HvzLALS8Qq5PLnAw4Fr31tE+9PN7GczyzWzt4B1wHlBXGMd4FJggJltN7PswPnBKzBfNLPPzeyAmU0CsoD2R/lZiohIKaF7/EREJNR6HukePzP7PDBsshYw7ZDd281sT4H3/wPqHtqGc64SXoHWHagW2BzvnIsKFD6H+qXA+TMDHWFxeD1s0cCmg51jlAPWB17XLfA6L8/RvAmkAK8Df+Rgbx+BoZcPA2cGzlEJ+KbAZ7eY2b4jNeycuxH4G5AQ2BSH17sXzDX+Zmbbi2i2IXCTc+6vBbZVoIifuYiIlD7q8RMRkYhxzv0FqAj8DAw9ZHc151zlAu9PDxx3qLvwZrxsZ2ZVgIvzmi9mnPV4PVw1zKxq4FHFzJoH9m8CGhyS52imA50C9xv2IlD4BYZOvgM8DdQ2s6rAnEPyGkfgnGsIvAwMAqoHPv8twV3veuBU51zVI+x7vMC1VzWzSmaWGkS7IiLicyr8REQkIpxzZwKPATfgDfkc6pxrfchhw51zFQL3APbAK6YOFY93X98O59ypeD1pxWZmm/CGXI52zlVxzpVzzp3hnMsbNjoNGOycq++cqwbcc4z2tgAL8e4//MHM1gR2VcArdrcAOYHev67FiFoZrzDcAt5EMUCLYlzjB8DzgUlxop1zeYXyy8AA51y7wAQ7lZ1zl+fdpygiIqWbCj8REQm195xzuws8Zjpv0fMpwBNmtsrM1gH3AZMLTCbyC7Adr5fvDbz70r4rov1ngFhgK7AUmHsCWW/EK8xWB879Nt79ceAVRh8Cq/Du35sRRHtvAn+gwDDPwH2Ig/EKye14w0DTgg1oZquB0cBnwK9AS+BfwX4er8jOBr7Dm2TmjkC7y/EmsHk2kOs/wM3FaFdERHzMHZw8TURExB+cc52AKWZW/1jHioiIyLGpx09ERERERKSMU+EnIiIiIiJSxmmop4iIiIiISBmnHj8REREREZEyToWfiIiIiIhIGVc+0gFORI0aNSwhISHSMUrUnj17qFy58rEPjAC/ZvNrLvBvNr/mAv9m82su8G825So+v2bzay7wbza/5gL/ZlOu4vNrNr/mAn9nO14rVqzYamY1j3mgmZXaR9u2ba2sWbBgQaQjHJFfs/k1l5l/s/k1l5l/s/k1l5l/sylX8fk1m19zmfk3m19zmfk3m3IVn1+z+TWXmb+zHS9guQVRO2mop4iIiIiISBmnwk9ERERERKSMU+EnIiIiUgqMHTuWFi1a0Lx5c5555hkAVq1aRYcOHWjZsiVXXHEFu3btUq5SkM2vuf785z9Tq1YtWrRokb/tt99+o0uXLjRp0oQuXbqwfft2wLtdbPDgwTRu3JjExES+/PLLkzZbaVGqJ3cpSnZ2Nhs2bGDfvn2RjnJcTjnlFNasWRPpGEXyaza/5oLgs8XExFC/fn2io6PDkEpEJPLGjh3Lyy+/jJnRv39/7rjjDlatWsWAAQPYvXs3CQkJvPHGG1SpUkW5gG+//ZaXX36ZZcuWUaFCBbp3706PHj245ZZbePrpp+nYsSOvvvoqTz31FI8++uhJn8vP2fyaC+Dmm29m0KBB3HjjjfnbRo0aRefOnbnnnnsYNWoUo0aN4oknnuCDDz5g3bp1rFu3js8//5yBAwfy+eefn5TZSosyV/ht2LCB+Ph4EhIScM5FOk6xZWRkEB8fH+kYRfJrNr/mguCymRnbtm1jw4YNNGrUKEzJREQix69ffP2aC2DNmjW0a9eOSpUqAdCxY0dmzJjBv//9by6++GIAunTpQrdu3cKaza+5/JzNr7kALr74Yn788cdC22bNmsXChQsBuOmmm+jUqRNPPPEEs2bN4sYbb8Q5R/v27dmxYwebNm2iTp06J1220qLMDfXct28f1atXL5VFn5ycnHNUr1691PZSi4i/FTWkbOXKlbRv357WrVuTlJTEsmXLwpqp4Bff8uXLH/GL7zvvvKNcAS1atGDJkiVs27aNzMxM5syZw/r162nevDmzZs0CYPr06axfv165fJ7Nr7mO5Ndff80vmE477TR+/fVXADZu3EiDBg3yj6tfvz4bN25UNh8rc4UfoKJPSh39nRWRUCjYg7Vq1Sref/99/vOf/zB06FAefvhhVq5cyYgRIxg6dGhYc/n1i69fcwE0a9aMYcOG0bVrV7p3707r1q2Jiori1Vdf5fnnn6dt27ZkZGRQoUIF5fJ5Nr/mCoZzzrffWfyczS/KZOEXaY8//jjNmzcnMTGR1q1bh3VM8XfffUfr1q0555xz+P7770Nyjptvvpm33377qMc89NBDzJs3r0TO16lTJ5YvX14ibRVUkhlF5OTmx141OHIPlnMuf+KInTt3Urdu3bDm8usXX7/mytOvXz9WrFjB4sWLqVatGmeeeSZnnXUW6enprFixgpSUFM444wzlKgXZ/JqrKLVr12bTpk0AbNq0iVq1agFQr169Qr8E2bBhA/Xq1VM2H1PhV8I+++wz3n//fb788ku+/vpr5s2bV6irOdTeffddevfuzVdffRXR/2GMGDGCP/zhDxE7/7EcOHDA9xlFpHTwa68aHLkH65lnnuHvf/87DRo04O6772bkyJFhz+bXL75+zQWwefNmAH766SdmzJjBH//4x/xtubm5PPbYYwwYMEC5SkE2v+YqSnJyMpMmTQJg0qRJXHnllfnbX3/9dcyMpUuXcsopp4T9Hjo/Z/OjkBV+zrlXnXObnXPfFth2qnPuI+fcusBztcB255wb55z7j3Pua+dcm1DlCrVNmzZRo0YNKlasCECNGjXyf5OakJDA1q1bAVi+fDmdOnUC4JFHHuGmm27ioosuonnz5syYMYOhQ4fSsmVLunfvTnZ29mHnyftNcmJiIr169WL79u3MmTOHZ555hhdeeIFLLrnksM+kp6fToUMH2rRpQ58+fdi9ezc7d+6kadOmrF27FoCUlBRefvllAOLi4rjzzjtp3rw5nTt3zs9e0IgRIzj33HNp0aIFt956K2YGFO4VTEhI4OGHH6ZNmza0bNmS7777DoA9e/bw5z//mfPOO49zzjknfxjN3r17ue6662jWrBm9evVi7969h5137ty59OnTJ//9woUL6dGjBwADBw4kKSmJ5s2b8/DDD+cfk5CQwLBhw2jTpg3Tp08vlPFI19GpUyeGDRvGeeedx5lnnsmSJUsAr3C8++67adGiBYmJiYwfPx6AFStW0LFjR9q2bUu3bt345ZdfABg3bhxnn302iYmJXHfddYddj4gEp6ietWuvvZbWrVvTunVrEhISaN26dVgz+bVXDY7cg/XCCy8wZswY1q9fz5gxY+jXr1/Ys/n1i69fcwFcffXVnH322VxxxRU899xzVK1aldTU1PzitG7duvTt21e5SkE2v+ZKSUmhQ4cOrF27lvr16zNhwgTuuecePvroI5o0acK8efO45557ALjsssv43e9+R+PGjenfvz/PP//8SZut1DCzkDyAi4E2wLcFtj0J3BN4fQ/wROD1ZcAHgAPaA58Hc462bdvaoVavXn3YtnDKyMiwVq1aWZMmTWzgwIG2cOHC/H0NGza0LVu2mJnZF198YR07djQzs4cfftguuOAC279/v/3rX/+y2NhYmzNnjpmZ9ezZ02bOnHnYeVq2bJnf9oMPPmhDhgzJb+upp5467PgtW7bYRRddZLt37zYzs1GjRtnw4cPNzCw9Pd3at29vqamp1q1bt/zPADZlyhQzMxs+fLj179/fzMxuuukmmz59upmZbdu2Lf/4G264wdLS0g47pmHDhjZu3DgzM3vuueesX79+ZmZ277332uTJk83MbPv27dakSRPbvXu3jR492vr27WtmZqtWrbKoqCj74osvCl1Pdna2NWjQwHbv3m27du2yAQMG5LeVlyknJ8c6duxoq1atys/xxBNP5LcRzHV07NjR/va3v5mZ2ezZs61z585mZvb888/b1VdfbdnZ2fmf379/v3Xo0ME2b95sZmZTp061G264wczM6tSpY/v27cu/1qKE8+/uggULwnau4vJrNr/mMvNvtpLO9c0331jz5s1tz549lp2dbZ07d7Z169YVOuZvf/tb/v/bwpVr9erV1qRJE9u6davt2bPH2rdvb4MGDbLVq1dbgwYNrH79+la3bl378ccfj9lWqP8s7733XnvuueesSpUqlpuba2Zmubm5Fh8fH/ZcF154oTVr1swSExNt3rx5Zmb2zDPPWJMmTaxJkyY2bNiw/IzhzObXXCXJr9mUq/j8ms2vucz8ne14AcstiNopZMs5mNli51zCIZuvBDoFXk8CFgLDAttfDwRf6pyr6pyrY2abTijEHXfAypUn1MRhWreGwG+ZixIXF8eKFStYsmQJCxYs4Nprr2XUqFHcfPPNR2320ksvJTo6mubNm3PgwAG6d+8OQMuWLQ+bunbnzp3s2LGDjh07At70tQV7v4qydOlSVq9ezQUXXADA/v376dChA+DNUDZ9+nT+8pe/sGrVqvzPlCtXjmuvvRaAG264gZ49ex7W7oIFC3jyySfJzMzkt99+o3nz5lxxxRWHHXfVVVcB0LZtW2bMmAF4PZBpaWk8/fTTgDcj608//cTixYsZPHgwAImJiSQmJh7WXvny5enevTvvvfce3bp1Y/bs2Tz55JMATJs2jZdeeomcnBw2bdrE6tWr89vIu57iXEfB7Hl/FvPmzWPAgAGUL+/9J3Tqqafy7bff8u2339KlSxfA6xWsWbNm/nVcf/319OzZs8ifo4gc25GmQM8bQmlmTJs2jY8//jisuQr2qlWuXPmwXrWrr76aadOm0a9fv4jcV7x582Zq1aqV34O1dOlSxo8fz6JFi+jUqRMff/wxTZo0CXuuvBEUBQ0ZMoQhQ4aEPUtBJZErLS2NiRMnsmvXLpKTk0sy3gkLWzYzyM0N+pH2wQe8PnUqu374geTAv6OYeY9Qvz7KvrRFi3gjLY1dV1xBcuB7V6FrPNr1l8T2o+xLW7yYN+bMYddll5F80UVH/nyw5ymhz6R98glv5uW68MLiny+E0j75hNcXLfLlf5vhEO51/GoXKOZ+AWoHXtcDCk6RtSGw7cQKvwiJioqiU6dOdOrUiZYtWzJp0iRuvvlmypcvT25uLsBhU/fnDQ0tV64c0dHR+bMSlStXjpycnBPOZGZ06dKF1NTUw/bl5uayZs0aKlWqxPbt26lfv36RbRw6U9K+ffu4/fbbWb58OQ0aNOCRRx454pIEedcXFRWVfz1mxjvvvEPTpk2P65quu+46nn32WWJiYkhKSiI+Pp4ffviBp59+mi+++IJq1apx8803F8pUuXLlw9o51nUUlb0oZkbz5s357LPP8rdlZGQAMHv2bBYvXsx7773H448/zjfffJNfNIr4TVELWD/yyCO8/PLL+b/M+Mc//sFll10W1lwtWrTg/vvvZ9u2bcTGxjJnzhySkpLy9y9ZsoTatWtHpIjp169f/nDJ++67j/r163PvvfcyduxYAPr06cMtt9wS9lzgDSnbtm0b0dHR+UPKXn75ZYYMGUJOTg4xMTG89NJLEcl2osJSxJhBdjZkZcH+/cd8Tlu8mJSnnyZz/37S58whdeBAkhMT4cAByMnxnkPxCKLttK1bSfn+ezLNSJ81i9QGDUiOiytWgRb0ozh/jkAKkAl88OGHpAJ++EpeMNf7ixb5Jhcckm3xYt9kK5jrvSVLfJMLDvl7tngxqampJ13xF7FvnmZmzrli/9rBOXcrcCt4M/nkLdqY55RTTsn/sk2oFr3Ma78I69atwzlH48aNAfj888+pU6cOGRkZNGjQgCVLltC1a1dSU1M5cOAAGRkZZGVlER0dTUZGBgcOHAicwjtHwX15ypUrxymnnMKHH37I+eefzyuvvEKHDh0Oa6ugFi1a8Mknn7By5UrOOOMM9uzZw88//0yTJk0YP348jRs35oEHHuCmm25i3rx5REdHk5uby+TJk+nduzcTJ06kXbt2ZGRkkJ2dzd69e9myZQtmRsWKFdm0aRPTpk3jyiuvLHRMRkYGZsbu3bupWLEie/bsyb/uSy65hNGjR/P000/jnGPVqlW0atWKdu3aMWnSJM4991xWr17N119/zZ49ew67pjZt2rBixQpyc3Pp1asXGRkZbNq0idjYWMqVK8f333/PnDlzaN++/WE5gKCu48CBA/nn3r17N2ZGRkYGF110Ec899xxJSUmUL1+e3377jbp16/Lrr78yb9482rVrR3Z2NmvXruXss89m/fr1JCUl0apVK1JTU9m0aRNVq1YtdD379u077O9zqOzevTts5youv2bzay4o2Ww//PBD/r3C0dHRDB06lJo1a/Ljjz+SnJxcqNf8WOcMxc/syiuvpEOHDsTGxpKQkMCmTZvyzzFmzBjOO++8iOTavn071apV49dff2XKlCk8//zzVK1albFjx9K6dWtWrFjBaaedFpFshy4Andf+6NGj87dlZGQc9bzh/PvvDhyg3P79x3ws+vpr/v722+zLyWHu++/zTNeu/KF+fcplZ1MuOxuX95yT430mJyd/W8HtLifnsM/kvw7sK450vC+WgFf8jR1brC++Vq5c/oNy5bCoqODeB57J+3wR79N27SIz0HuTacas7Gw61KiBOecdl/ec175zoXsucM5pixaRGRhxlAlMa92apoHJ1wwg7xfPzuW/LvHtBfblfTlNff99MgMzs2cCqe3b0+iQQuGoX2SPtLRAcbcXcZ43332XzMAvmjOBN88/n4bBjig6niUPgvzMmzNnkvmvfx3MdcEFNOzVq/jnC4FC2TIzmThxIlWqVIlwqvAKd+H3a94QTudcHWBzYPtGoODUl/UD2w5jZi8BLwEkJSVZ3gQpedasWUN8fHxJ5w6amTFo0CB27NhB+fLlady4MS+99BLx8fGMGDGCfv36MXLkSDp16kRUVBTx8fFUrFiRihUrEh8fn1/c5F1DwX0FTZ48mQEDBpCZmcnvfvc7Jk6ceFhbBcXHxzNp0iT69+9PVlYWAI899hiVK1dm8uTJLFu2jPj4eD788EPGjh3L8OHDqVy5Mt988w2jR4+mVq1aTJgwgfj4eKKjo4mNjaVBgwbceuutdOjQgdNOO4127drlnzvvmPj4eJxzxMXFER8fT+XKlfOv+9FHH+WOO+7gggsuIDc3l0aNGvH+++9zxx130LdvX8477zyaNWtG27ZtqVy5cpF/rldccQWvvfYab775JpUqVeL888+nbdu2nHvuuTRo0IALL7yQmJiYw3IAQV1HVFRU/rmzsrJwzhEfH8+gQYP46aefuOCCC4iOjqZ///4MGjSIGTNmMHjwYHbu3ElOTg4DBgwgKSmJAQMGsHPnTsyMIUOGFDnTa0xMDOecc86J/yUMwsKFCzn0vx2/8Gs2v+aCks22ZcsWfv/73+cPN+/ZsycbN24kISGBuLi4Yp0nFD+zTp068dRTTwEHe9Y6depETk4O1157LStWrDjiqIVQ5rrooovye9UmTpxI586dqVq1KkOGDGHixInExMSQmppK27Ztw57tuB04ABkZpL3zDpNTU/lTjx4kt2sH+/Z5j717D74+1qM4xwY5yuVFIG9sxr6cHL6ZM4fb8naWKwcVK0KFCkd+jos7+v7jfO66dCkT77+fzH37qBQTQ9exY6FbN4iK8h7lyx98feijXDlvLbIQ/HECJKelkZqSQmZmJpUqVeLKf/6Tmj7o8bgmLY2ZBXJdM3w4TX2QKyUxkbQCuVLuvZeWPsgF8MezzuK9Atn+OGwYrXyQ7Y9NmhTONXSoL3LB4dn69u3rn//fhomz4xnrG2zj3j1+75tZi8D7p4BtZjbKOXcPcKqZDXXOXQ4MwpvkpR0wzszOO1b7SUlJduj6bmvWrKFZs2YleyFhlJGREdHCtaC4uDh2796d/95P2Qryay4oXrZw/t311ZfLQ/g1m19zQclmW7NmDVdeeSWfffYZsbGxdO7cmaSkJKpXr85rr71GlSpVSEpKYvTo0VSrVi1sufIUvF+ta9euLF26lKpVqzJ37lxGjhzJokWLjtlGmf6zzM72RqXs2uU9H+l1MPv37i00NKoSBDdsyzmIiTn4iI0t/L44j6N8Nu2zz0gZOtQrsGJjSX3lFZJ79vQKsKio4/8ZloC8Iah9+/b13VAyv2ZTruLzaza/5gJ/ZzsRzrkVZpZ0rONC1uPnnEvFm8ilhnNuA/AwMAqY5pzrB/wPuCZw+By8ou8/eP++RGbOXxGRMCrqXro8o0eP5u6772bLli3UqFEjbJmONFHJwIEDefDBB3HO8eCDD3LXXXfx6quvhi1XnqLuVwOYOnUqKSkpYc9TInJzYdcu0qZPZ0pqKruWLvXuCTuewu0I91kfJjYW4uOhShXvOT4e6taFpk0LbUv/6CMyP/kE8P5xTk9OJvmuu45erEVHH99QsmJKTkoitWFDX36JS05OpkqVKr78BYNfsylX8fk1m19zgb+zhUMoZ/U80r/AnYs41oC/hCqLHJ+CvX0iUrIKLvpdoUIFunfvTo8ePWjcuDHr168nPT2d008/PSLZipqopHbt2vn7+/fvn79uZrgVNeMiwGuvvRbeIAWZwZ49sH077Nhx8BHs+127SDPL71mbvWBB0T1rlSoVLtaqVIEGDQ7fllfIHbqt4L4gJ5fq2ro1E7/8Mn9oVNd+/eDii0v253cCTvYvcSIixaFpBUVEIuBoSxPceeedPPnkk1x55ZURyVbU9P+bNm2iTp06AMycOZMWLVpEJNuJOuIskPv2Fa9YO/R9YGKuI4qLg6pVvUe1al7B1rJl/vv0+fMLTYiQfvXVJA8ffrBYi4sLulgrScnJyaSmpvqyV01ERIqnTBZ+ZnbY0gMifhbKe23Fn460NMGsWbOoV68erVq1ili2ooZT/vWvf2XlypU450hISODFF1+MWL6jys31CrGtW2HLlkKPtM8/J2X2bDIPHCA9LY3UOnVIzsnxjg9MenVEMTGFC7eaNaFJk4Pv8/YV9f6UU7zhj0fRtW1bJhaYdKDrjTdC8+Yl+IM5fupVExEpG8pc4RcTE8O2bduoXr26ij8pFcyMbdu2ERMTE+koEkZF3UuXlZXFP/7xD9LT0yOarajhlJMnT45AErwZHvOKuCKKucO2b916xN639PLlyQzsy8zNJT0mhuTOnQsXaUUVblWreoVfCKlnTUREQq3MFX7169dnw4YNbNmyJdJRjsu+fft8WwD4NZtfc0Hw2WJiYo45Bb2UPYfeS1e7dm3efffd/N6+DRs20KZNG5YtW8Zpp50WyajFdsQhlXv3Hr1wO3Tb9u1HPklez1vNmtC4MXTo4L2uUePg9rxHjRp0/eijwr1q//d/4KMCSz1rIiISSmWu8IuOjqZRo0aRjnHcFi5cGLa13IrLr9n8mgv8ne1kUdTMmX//+9957733qFChAmeccQYTJ07Mnx0ynIq6l27IkCH5+xMSEli+fHlYZ/U8Lvv3w6ZN8PPPsHEjaR9+SMprr5GZk+MNqfzd70jOzvYKuczMotuIiipcsLVuXahoO6yQq1692Pe8qVdNREROZmWu8BMRyXOkmTO7dOnCyJEjKV++PMOGDWPkyJE88cQTYc93pKUJfMPMu/9t48ajP7Zs8Y4NSMeboAQCQyqzskju2PHohVzVquFZAkC9aiIicpJS4SciZdbRZs7M0759e95+++2I5DvS0gR5fvzxx9CdPK+XLq94C/TWHfbYu/fwz1avDvXqeY+2bQ++Djy6fvMNE2+77eCQymef9dWQShERkZORCj8RKbOONHNmQa+++irXXntthBKeuMPupQu2l27z5sMbq1ChcEGXnFy4qKtb13sc477V5FatSK1SRUMqRUREfESFn4iUWUXNnBkVFZW///HHH6d8+fJcf/31EUxZTGZe0bZ3fWxDAAAgAElEQVR2LWlvvUXKSy8dvJeudm2Sd+woupeuRo2DBVxSklfAHdJTR/XqJTbcUkMqRURE/EWFn4iUaYfOnJk3e+prr73G+++/z/z58/259Mu+fbBuHaxde/hj506giHvpKlcmOSXl8IKubl2oWDFilyIiIiKRp8JPRMq0ombOnDt3Lk8++SSLFi3Kv/8vIsxgwwb4978PL+7+979CE6ZQvz40bQrXXw9nnglNm9J1/Xom3nHHwXvpRo/WvXQiIiJSJBV+IlIixowZwyuvvIJzjpYtWzJx4kQ+/fRT7r77bvbv30/btm2ZMGEC5Ys5Bf+JKmrmzEGDBpGVlUWXLl0Ab4KXf/7zn6ELkZFRdHH3738XXt6gcmWvuOvQAW6+2XvdtCk0aQJxcYc1mwyk1q6te+lERETkmFT4icgJ27hxI+PGjWP16tXExsZyzTXX8Oabb/Lwww8zf/58zjzzTB566CEmTZqUP+wyXIqaOfM///lPyZ/owAGvl66ooZk//3zwOOcgIcEr6Dp2PFjcNW3qDcks5rBT3UsnIiIiwVDhJyIlIicnh7179xIdHU1mZiaVK1emQoUKnHnmmQD5a+eFu/ArCYVmzrzooqKLu//8B7KyDn6oalWvmPvDHwoXd40bH3NWTBEREZGSpsJPRE5YvXr1uPvuuzn99NOJjY2la9euXHPNNQwdOpTly5eTlJTE22+/zfr16yMdNXi7dsHXX5M2ZQopEyZ4M2e++y6peEMsAShfHs44wyvoLrvMew7cf0fNmmFZkFxEREQkGCr8ROSEbd++nVmzZvHDDz9QtWpV+vTpwxtvvMHUqVO58847ycrKomvXroWWUvCN3Fz4/nv4+mtYtergc2Dx9EIzZwLp559P8j33eMVdo0YQHR2h4CIiIiLBU+EnIids3rx5NGrUiJo1awJw1VVX8emnn3LDDTfk32OXnp7Ov//970jG9JZB+OabwgXeN98cnGClXDmvx65dO7j1VkhMpOuvvzLxr389OHPmsGFwxRWRvQ4RERGRYlLhJyIn7PTTT2fp0qVkZmYSGxvL/PnzSUpKyl9KISsriyeeeIL7778/PIEOHID//rdwgbdqlTf5Sp5q1aBVK+jfHxITvddnnw2xsYWaSgZSa9TQzJkiIiJSqqnwE5ET1q5dO3r37k2bNm0oX74855xzDrfeeisPPPAA77//Prm5uQwcOJDf//73JX/yHTu8XruCQzUP7cXLWyLhttu8Ai8x0VvYPMh78DRzpoiIiJR2KvxESpmi1surWLEiDzzwANOnTycqKoqBAwcyePDgsOYaPnw4w4cPL7Ttqaee4qmnniqZExw44N2LV7AX7+uvC/finXrqwV68vAKviF48ERERkZONCj+RUqSo9fKmTp2KmbF+/Xq+++47ypUrx+bNmyMd9bjkL5vw888k169fuMD79tuDvXhRUQd78QYMODhU8zjWwRMRERE5GajwEyllDl0vr27dujzwwAO8+eablCtXDoBatWpFOGUx7NgBX3xB2qRJpEydSuaBA4WXTcjrxQtMtpJ/L57WwhMREREJmgo/kVKkqPXyunbtSkpKCm+99RYzZ86kZs2ajBs3jiZNmkQ67uGys73eu2XL4PPPvcd33wFFLJvQowfJ//ynevFERERESoAKP5FSpKj18qZMmUJWVhYxMTEsX76cGTNm8Oc//zl/GYWIMfPWwitY5H35Jezb5+2vWdNbNuH666FdO7pu2cLE/v0PLpvQv783AYuIiIiInDAVfiKlyJHWy6tfvz5XXXUVAL169aJv377hDxcYsplf5C1bBnn3GsbEQJs2MHCgV+y1awcNGxbqyUsGUuPitGyCiIiISAio8BMpRY60Xl6VKlVYsGABjRo1YtGiRZx55pmhDZI3ZDOvyPv8c1i79uD+s86CSy89WOS1bAnR0cdsVssmiIiIiISGCj+RUuRI6+Xt3buX66+/njFjxhAXF8crr7xScifNG7JZsMj76quDQzZr1fKKuz/9yXs+91w45ZSSO7+IiIiInDAVfiKlTFHr5VWsWJHZs2eXzAl27Ch8X96yZbBli7cvJgbatoXbbz/Ym3f66Zp8RURERMTnVPiJnGTy18rbtYvk7t0PDtnMK/YKDtls1gwuv/xgkdeiRVBDNkVERETEX1T4iZxE0mbMIOWPfyQzK4v0tDRSy5UjOSfH21m7tlfc3XgjnHeehmyKiIiIlCEq/ETKupwcWLAApk0jffJkMrOyAMjMzSU9MZHke+/VkE0RERGRMk6Fn0hZdOAALF4Mb70F77wDW7dCXBxd27dn4tKlZGZleWvlDR8OWjZBREREpMwrF+kAIn40ZswYmjdvTosWLUhJSWHfvn08++yzNG7cGOccW7dujXTEw+XmesXeoEHewue//z1MmQJ/+APMmAGbN5O8cCGp06bRs2dPUlNTtVaeiIiIyElCPX4ih9i4cSPjxo1j9erVxMbGcs011zB16lQuuOACevTo4a815nJzYelSr2dv+nTYtAliY70JWa69Fi67DCpVKvQRrZUnIiIicvJR4SdShJycHPbu3Ut0dDSZmZnUrVuXc845J9KxPGbeDJx5xd6GDVCxolfkXXMN9OgBcXGRTikiIiIiPqLCT+QQ9erV4+677+b0008nNjaWrl270rVr18iGMoMVK2DaNO/xv/9BhQrQvTuMGgVXXAFVqkQ2o4iIiIj4lu7xEznE9u3bmTVrFj/88AM///wze/bsYcqUKeEPYgYrV8K990Ljxt7yCmPGQPPmMGkS/PorzJoF11+vok9EREREjko9fiKHmDdvHo0aNaJmzZoAXHXVVXz66afccMMNoT+5GXz7rder99ZbsG4dREV5E7Tcfz/07Amnnhr6HCIiIiJSpqjwEznE6aefztKlS8nMzCQ2Npb58+eTlJQU2pOuXn1wGOeaNVCuHFxyCfz979CrF9SoEdrzi4iIiEiZpqGeIodo164dvXv3pk2bNrRs2ZLc3FxuvfVWxo0bR/369dmwYQOJiYnccsstJ3aif/8bHn0UWrb0hm+OGAG1a8Pzz3uzc86bB/37q+gTERERkROmHj+RIgwfPpzhw4cX2jZ48GAGDx58Yg1///3BYZyrVnnbLrwQxo+Hq6+GOnVOrH0RERERkSKo8BMJkbS0NCZOnMiudetI3r7dK/hWrPB2dujgTdTSp4+32LqIiIiISAhFpPBzzt0J3AIY8A3QF6gDTAWqAyuAP5nZ/kjkEzlRaZMnk9KvH5nZ2aS/+y6pQPJ558HTT3vF3umnRzqiiIiIiJxEwn6Pn3OuHjAYSDKzFkAUcB3wBDDGzBoD24F+4c4mckIOHIC5c6FPH9JvuonM7GwAMoH0P/0JPv8c7rpLRZ+IiIiIhF2kJncpD8Q658oDlYBNwO+BtwP7JwE9I5RNpHj+9z945BFo1AguvRQWLKDrFVdQKSYGgEqVKtG1d+/IZhQRERGRk1rYCz8z2wg8DfyEV/DtxBvaucPMcgKHbQB045P4V1YWTJ8O3bp5Bd+IEdCsmXcf38aNJM+aRepbb9GzZ09SU1NJTk6OdGIREREROYk5MwvvCZ2rBrwDXAvsAKbj9fQ9EhjmiXOuAfBBYCjooZ+/FbgVoHbt2m2nTp0aruhhsXv3buLi4iIdo0h+zRbOXJV++IE6c+ZwWno60bt2sa92bX7p3p1N3buTddppEc1WHH7NBf7N5tdc4N9sylV8fs3m11zg32x+zQX+zaZcxefXbH7NBf7OdrwuueSSFWZ27EWnzSysD6APMKHA+xuBF4CtQPnAtg7Ah8dqq23btlbWLFiwINIRjqiks3333XfWqlWr/Ed8fLyNGTPGHn74Yatbt27+9tmzZ4c112EyMsxeecWsfXszMIuONuvd22zuXLOcnMhmO05+zWXm32x+zWXm32zKVXx+zebXXGb+zebXXGb+zaZcxefXbH7NZebvbMcLWG5B1GGRmNXzJ6C9c64SsBfoDCwHFgC98Wb2vAmYFYFsEkZNmzZl5cqVABw4cIB69erRq1cvJk6cyJ133sndd98duXBm3mQsr7wCU6fCnj3eUM7Ro+FPf4KaNSOXTURERESkmMJe+JnZ5865t4EvgRzgK+AlYDYw1Tn3WGDbhHBnk8iZP38+Z5xxBg0bNoxskK1bYfJkr+BbvRoqV4Zrr4VbboH27cG5yOYTERERETkOEVnHz8weBh4+ZPN/gfMiEEd8YOrUqaSkpOS/f/bZZ3n99ddJSkpi9OjRVKtWLXQnz82FefO8Yu/ddyE7G9q1g5df9oq++PjQnVtEREREJAwitZyDSL79+/eTlpZGnz59ABg4cCDff/89K1eupE6dOtx1112hOfFPP8Hw4d6snN26wccfw1/+At98A0uXer18KvpEREREpAyISI+fSEEffPABbdq0oXbt2gD5zwD9+/enR48eJXey/fshLc3r3UtP97b94Q/w1FNw5ZVQsWLJnUtERERExCdU+EnEpaamFhrmuWnTJurUqQPAzJkzadHisFU9im/1apgwAV5/3buPr0EDePBB6NsXEhJOvH0RERERER9T4ScRtWfPHj766CNefPHF/G1Dhw5l5cqVOOdISEgotK9Ydu/2FlR/5RX47DOIjobkZG8IZ5cuEBVVQlchIiIiIuJvKvwkoipXrsy2bdsKbZs8efLxN2gGy5YdXIZh925vGYann/aWYahV6wQTi4iIiIiUPir8pFRLS0tj4sSJ7NqwgeStW72C7//9P6hU6eAyDB06aBkGERERETmpqfCTUistLY2U664jc+9e0t99l1QguV07eOklr+irUiXSEUVEREREfEGFn5Ra6W+8QebevQBkAunXXUdyampkQ4mIiIiI+JDW8ZPSad48uqalUSkwhLNSpUp0LTAzqIiIiIiIHKTCT0qfqVPhsstIbtKE1FdfpWfPnqSmppKcnBzpZCIiIiIivqShnlK6PPMM3HkndOwI775LctWqVElIoFOnTpFOJiIiIiLiW+rxk9IhNxeGDvWKvquvhrlzoWrVSKcSERERESkV1OMn/pedDf36weTJcPvtMG6cFl8XERERESkGFX7ib7t3Q+/e8OGH8NhjcN99WpNPRERERKSYVPiJf23eDJdfDl99BRMmwJ//HOlEIiIiIiKlkgo/8af//he6dYONG+Hdd6FHj0gnEhEREREptVT4if989RVceql3b9/8+dChQ6QTiYiIiIiUaprVU/xl/nxvqYaKFeGTT1T0iYiIiIiUABV+4h9Tp3o9fQ0bwqefQrNmkU4kIiIiIlImqPATfxg7FlJSvB6+JUugXr1IJxIRERERKTNU+ElkmcGwYXDHHXDVVd6yDVqYXURERESkRGlyF4mc7Gy45RZ4/XUYOBDGj9fC7CIiIiIiIaDCTyJj927o0wfmzoVHH4X779fC7CIiIiIiIaLCT8JvyxZvYfYVK+Dll71ePxERERERCRkVfhJeP/zgLcy+fr23MPsVV0Q6kYiIiIhImafCT8Lnq6/gsssgK8tbr+/88yOdSERERETkpKBZPSU8Pv7YW5g9Ohr+9S8VfSIiIiIiYaTCT0Lvrbege3dvYfbPPtPC7CIiIiIiYabCT0Jr3DhvYfb27WHxYi3MLiIiIiISASr8JDTM4J57YMgQ6NnTW5i9WrVIpxIREREROSlpchcpeQUXZh8wAJ59Vguzi4iIiIhEkHr8TgI7duygd+/enHXWWTRr1ozPPvuM6dOn07x5c8qVK8fy5ctL7mR79sCVV3pF34gR8PzzKvpERERERCJMPX4ngSFDhtC9e3fefvtt9u/fT2ZmJlWrVmXGjBncdtttJXciLcwuIiIiIuJLKvzKuJ07d7J48WJee+01ACpUqECFChWoWrVqyZ6o4MLsM2dCcnLJti8iIiIiIsdNQz3LuB9++IGaNWvSt29fzjnnHG655Rb27NlTsidZtcpbl2/rVpg3T0WfiIiIiIjPqPAr43Jycvjyyy8ZOHAgX331FZUrV2bUqFEld4IFC+Dii72F2T/5BC64oOTaFhERERGREqHCr4yrX78+9evXp127dgD07t2bL7/8smQanzbNW5i9QQP49FM4++ySaVdEREREREqUCr8y7rTTTqNBgwasXbsWgPnz53N2SRRo48fDddfBeefBkiVQv/6JtykiIiIiIiERdOHnnIt1zjUNZRgJjfHjx3P99deTmJjIypUrue+++5g5cyb169fns88+4/LLL6dbt27BNWYG990Hgwd7C7Onp2thdhERERERnwtqVk/n3BXA00AFoJFzrjUwwsw0i0cp0Lp168PW6uvVqxe9evUqVjsuJwf69oVJk+C22+C557RGn4iIiIhIKRDscg6PAOcBCwHMbKVzrlGIMokPpU2bRtrgwez89VeShw+HBx8E5yIdS0REREREghBs4ZdtZjtd4S/6FoI84kNps2aRkpJCZm4uqRUqkNq6Nckq+kRERERESo1g7/H7f865PwJRzrkmzrnxwKchzCU+kv7CC2Tm5gKQuX8/6enpEU4kIiIiIiLFEWzh91egOZAFpAK7gDuO96TOuarOubedc98559Y45zo45051zn3knFsXeNaMIX6QlUXXr76iUqCHr1KlSnTt2jXCoUREREREpDiCKvzMLNPM7jezc80sKfB63wmcdyww18zOAloBa4B7gPlm1gSYH3gvkTZ+PMmbN5P6yCP07NmT1NRUkpM1p4+IiIiISGkS7Kye73H4PX07geXAi8UpAp1zpwAXAzcDmNl+YL9z7kqgU+CwSXgTyQwLtl0JgS1b4NFH4bLLSH7oIaosXEinTp0inUpERERERIrJmR17jhbn3FigJt4wT4Br8YZ7GlDFzP4U9Am9pSBeAlbj9fatAIYAG82sauAYB2zPe3/I528FbgWoXbt226lTpwZ76lJh9+7dxMXFRToGAE3GjqVuWhpfTJhAZkKCr7IV5Ndc4N9sfs0F/s3m11zg32zKVXx+zebXXODfbH7NBf7NplzF59dsfs0F/s52vC655JIVZpZ0zAPN7JgP4IsjbQP+XzBtFPhcEpADtAu8Hws8Cuw45Ljtx2qrbdu2VtYsWLAg0hE8q1ebRUWZ3X57/ibfZDuEX3OZ+TebX3OZ+TebX3OZ+TebchWfX7P5NZeZf7P5NZeZf7MpV/H5NZtfc5n5O9vxApZbEHVYsJO7xDnnTs97E3idVyrvD7KNPBuADWb2eeD920Ab4FfnXJ1A+3WAzcVsV0rS0KFQuTI88kikk4iIiIiIyAkKdh2/u4BPnHPfAw5oBNzunKuMdz9e0MzsF+fceudcUzNbC3TGG/a5GrgJGBV4nlWcdqUEzZsH778PTz4JNWtGOo2IiIiIiJygoAo/M5vjnGsCnBXYtNYOTujyzHGc96/AG865CsB/gb54M4xOc871A/4HXHMc7cqJOnAA7roLGjWCv/410mlERERERKQEBNvjB9AEaArEAK2cc5jZ68dzUjNbiXev36E6H097UoJeew2+/hreegtiYiKdRkRERERESkCwyzk8jLfUwtnAHOBS4BPguAo/8amMDHjgAejQAfr0iXQaEREREREpIcH2+PXGW3rhKzPr65yrDUwJXSyJiCefhF9+gZkzwblIpxERERERkRIS7Kyee80sF8hxzlXBm3GzQehiSditXw+jR0NKCrRvH+k0IiIiIiJSgoLt8VvunKsKvIy34Ppu4LOQpZLwu/9+yM2FkSMjnUREREREREpYsLN63h54+U/n3Fygipl9HbpYElbLl8PkyXDvvdCwYaTTiIiIiIhICQtqqKdzbn7eazP70cy+LrhNSjEz+NvfoFYtuOeeSKcREREREZEQOGqPn3MuBqgE1HDOVcNbvB2gClAvxNkkHGbOhCVL4J//hCpVIp1GRERERERC4FhDPW8D7gDq4t3bl1f47QKeDWEuCYf9+2HoUGjeHPr1i3QaEREREREJkaMWfmY2FhjrnPurmY0PUyYJl+eeg++/h7lzoXyw8/yIiIiIiEhpE+zkLuOdc+cDCQU/Y2ZawL202rYNRoyA7t2hW7dIpxERERERkRAKqvBzzk0GzgBWAgcCmw1Q4VdajRgBu3bB009HOomIiIiIiIRYsOP7koCzzcxCGUbCZO1aeP556N/fu79PRERERETKtKCWcwC+BU4LZRAJo6FDITYWhg+PdBIREREREQmDYHv8agCrnXPLgKy8jWaWHJJUEjoLFkBaGowcCbVrRzqNiIiIiIiEQbCF3yOhDCFhcuCAt1h7w4Zwxx2RTiMiIiIiImES7Kyei5xzDYEmZjbPOVcJiAptNClxkyfDypWQmgoxMZFOIyIiIiIiYRLUPX7Ouf7A28CLgU31gHdDFUpCYM8euO8+aN8err020mlERERERCSMgh3q+RfgPOBzADNb55yrFbJUUvKeego2bYJ33gHnIp1GRERERETCKNhZPbPMbH/eG+dcebx1/KQ02LgRnnwSrrkGOnSIdBoREREREQmzYAu/Rc65+4BY51wXYDrwXuhiSYl64AFvYpdRoyKdREREREREIiDYwu8eYAvwDXAbMAd4IFShpAR9+SVMmuTN4tmoUaTTiIiIiIhIBAR7j18s8KqZvQzgnIsKbMsMVTApAWZw111Qvbo3sYuIiIiIiJyUgu3xm49X6OWJBeaVfBwpUWlpsHAhjBgBp5wS6TQiIiIiIhIhwRZ+MWa2O+9N4HWl0ESSErF/P/z979CsGfTvH+k0IiIiIiISQcEO9dzjnGtjZl8COOfaAntDF0tO2AsvwLp1MHs2lA/2j1lERERERMqiYCuCIcB059zPgANOA7QKuF/99hsMHw5dusCll0Y6jYiIiIiIRNgxCz/nXDmgAnAW0DSwea2ZZYcymJyAxx6DnTth9Ggt1i4iIiIiIscu/Mws1zn3nJmdA3wbhkxyItatg2efhX79oGXLSKcREREREREfCHpWT+fc1c6p+8j3hg2DihW9mTxFREREREQIvvC7DZgO7HfO7XLOZTjndoUwlxyPRYtg5ky491447bRIpxEREREREZ8IanIXM4sPdRA5Qbm58Le/QYMGcOedkU4jIiIiIiI+ElThFxjieT3QyMwedc41AOqY2bKQppPgvfEGfPklTJkCsbGRTiMiIiIiIj4S7FDP54EOwB8D73cDz4UkkRRfZqY3vPPccyElJdJpRERERETEZ4Jdx6+dmbVxzn0FYGbbnXMVQphLimP0aNi4EaZOhXLB1vIiIiIiInKyCLZKyHbORQEG4JyrCeSGLJUE7+efYdQo6N0bLrww0mlERERERMSHgi38xgEzgVrOuceBT4B/hCyVBO/BByEnxyv+REREREREihDsrJ5vOOdWAJ0BB/Q0szUhTSbHtmoVTJzozeZ5xhmRTiMiIiIiIj511MLPORcDDAAaA98AL5pZTjiCyTGYeQXfqafCAw9EOo2IiIiIiPjYsXr8JgHZwBLgUqAZcEeoQ0kQZs+Gjz+G8eOhatVIpxERERERER87VuF3tpm1BHDOTQC0bp8fZGfD3XdD06Zw222RTiMiIiIiIj53rMIvO++FmeV467hLxL34IqxdC++9B9HRkU4jIiIiIiI+d6zCr5VzblfgtQNiA+8dYGZWJaTp5HA7dsAjj0DnznD55ZFOIyIiIiIipcBRCz8ziwrViQPrAi4HNppZD+dcI2AqUB1YAfzJzPaH6vyl1uOPw2+/eYu2qwdWRERERESCEOw6fqEwBCi4JMQTwBgzawxsB/pFJJWfff89jBsHfftCq1aRTiMiIiIiIqVERAo/51x94HLglcB7B/weeDtwyCSgZySy+do993j39D36aKSTiIiIiIhIKRKpHr9ngKFAbuB9dWBHgTUCNwD1IhHMtz75BN5+G4YNg7p1I51GRERERERKEWdm4T2hcz2Ay8zsdudcJ+Bu4GZgaWCYJ865BsAHZtaiiM/fCtwKULt27bZTp04NV/Sw2L17N3FxcYU35ubS5i9/oeLWrXw+eTK5MTH+yeYDfs0F/s3m11zg32x+zQX+zaZcxefXbH7NBf7N5tdc4N9sylV8fs3m11zg72zH65JLLllhZknHPNDMwvoARuL16P0I/AJkAm8AW4HygWM6AB8eq622bduaX+zdu9fOPfdcS0xMtLPPPtseeughMzO76aabLCEhwVq1amWtWrWyr7766qjtLFiw4PCNU6aYgdnrr4cgefCKzOYDfs1l5t9sfs1l5t9sfs1l5t9sylV8fs3m11xm/s3m11xm/s2mXMXn12x+zWXm72zHC1huQdRhx1rOocSZ2b3AvQB5PX5mdr1zbjrQG29mz5uAWeHOdiIqVqzIxx9/TFxcHNnZ2Vx44YVceumlADz11FP07t37+BreuxfuvRfatoXrry/BxCIiIiIicrKI5KyehxoG/M059x+8e/4mRDhPsTjn8ruNs7Ozyc7OpkQWvB8zBtavh//7Pyjnpz8uEREREREpLSJaSZjZQjPrEXj9XzM7z8wam1kfM8uKZLbjceDAAVq3bk2tWrXo0qUL7dq1A+D+++8nMTGRO++8k6ysYlzWL7/AyJHQqxdcfHGIUouIiIiISFmnLqQSFBUVxcqVK9mwYQPLli3j22+/ZeTIkXz33Xd88cUX/PbbbzzxxBPBN/jQQ5CVBcX5jIiIiIiIyCFU+IVA1apVueSSS5g7dy516tTBOUfFihXp27cvy5YtC66Rr7+GCRNg0CBo0iS0gUVEREREpExT4VdCtmzZwo4dO/j/7d19sFx1ecDx70MCMWmKQVDkTW7VUGRIjBLFah1JGCs1FnWKGGttYHRSpIx9CWPTdqQzzjAT6/iCtZ0URcVWjagQMwbRDJiKNohBAkmIINJUAymgFWtKVBKe/nHOlWsgL7t7ds+Pk+9n5s7d93zZvZzdZ8/ZswA7d+5kzZo1nHzyyWzfvh2o9h6C0O0AAA/vSURBVJ66cuVKTj31cd9Q8XiZcPHFMGMGvOtdw8yWJEmSdBAY+V49u2r79u0sWrSI3bt38+ijj3Luuefymte8hvnz5/Pggw+SmcyZM4fly5fv/8auuw7WrIHLLoMjjhh+vCRJkqROc/BryOzZs7n11lsfd/oNN9zQ0+3E7t2wZEm1eecFFzSVJ0mSJOkg5uBXkFWrVnHVJZfw0y1bOHvlSjjssLaTJEmSJHWAg18hVq1axZsWLuThnTu55pBD+AxwdttRkiRJkjrBwW8Ixpau7vk6P15zOQ/v3AnAw48+ypvf/RGOXNf7w7N12YKeryNJkiSp29yrZyGmjr2QmDwFgJg8haljL2y5SJIkSVJXuMavENNmns5RZ7+Tk392C9/9zdOYNvP0tpMkSZIkdYSDX0GmzTydN8w6jfdt9GGRJEmS1Bw39ZQkSZKkjnPwkyRJkqSOc/CTJEmSpI5z8JMkSZKkjnPwkyRJkqSOc/CTJEmSpI5z8JMkSZKkjnPwkyRJkqSOc/CTJEmSpI5z8JMkSZKkjnPwkyRJkqSOc/CTJEmSpI5z8JMkSZKkjnPwkyRJkqSOc/CTJEmSpI5z8JMkSZKkjnPwkyRJkqSOc/CTJEmSpI6b3HaARmts6eq+r7tk1i7O6/P6W5ct6PvflSRJkjQY1/hJkiRJUsc5+EmSJElSxzn4SZIkSVLHOfhJkiRJUsc5+EmSJElSxzn4SZIkSVLHOfhJkiRJUsc5+EmSJElSxzn4SZIkSVLHOfhJkiRJUsc5+EmSJElSxzn4SZIkSVLHOfhJkiRJUseNfPCLiBMi4msRcUdEbI6IP69Pf1pErImI79W/jxh1myRJkiR1URtr/HYBSzLzFOAlwJ9FxCnAUuD6zJwJXF8flyRJkiQNaOSDX2Zuz8zv1Id/BmwBjgNeC1xZX+xK4HWjbpMkSZKkLorMbO8fjxgDvg6cCvwgM2fUpwfwk/Hje1xnMbAY4Oijjz5txYoVI+s9UBvv/Wnf1z16Kty/s/9/e9ZxT93n+W217a9rEDt27GD69OlDu/1BlNpWaheU21ZqF5TbZlfvSm0rtQvKbSu1C8pts6t3pbaV2gVlt/Vr3rx5t2Tm3P1drrXBLyKmA/8OXJqZV0fEQxMHvYj4SWbu83N+c+fOzfXr1w87tWdjS1f3fd0ls3bxvo2T+77+1mUL9nl+W2376xrE2rVrOeOMM4Z2+4Mota3ULii3rdQuKLfNrt6V2lZqF5TbVmoXlNtmV+9KbSu1C8pu61dEHNDg18pePSPiUOALwKcy8+r65Psj4pj6/GOAB9pokyRJkqSuaWOvngFcAWzJzPdPOGsVsKg+vAj44qjbJEmSJKmL+t+msH8vA94CbIyIDfVpfwssA66KiLcC/wWc20KbJEmSJHXOyAe/zPwGEHs5+8xRtkiSJEnSwaCVz/hJkiRJkkbHwU+SJEmSOs7BT5IkSZI6zsFPkiRJkjrOwU+SJEmSOs7BT5IkSZI6zsFPkiRJkjrOwU+SJEmSOs7BT5IkSZI6zsFPkiRJkjrOwU+SJEmSOm5y2wESwNjS1X1fd8msXZw3wPW3LlvQ93UlSZKkJwPX+EmSJElSxzn4SZIkSVLHOfhJkiRJUsc5+EmSJElSxzn4SZIkSVLHOfhJkiRJUsc5+EmSJElSxzn4SZIkSVLHOfhJkiRJUsc5+EmSJElSxzn4SZIkSVLHOfhJkiRJUsc5+EmSJElSxzn4SZIkSVLHOfhJkiRJUsc5+EmSJElSxzn4SZIkSVLHOfhJkiRJUsc5+EmSJElSx01uO0Aq2djS1QNdf8msXZzX521sXbZgoH9bkiRJGucaP0mSJEnqONf4SU9Sg6yNHGRNJLg2UpIk6cnGNX6SJEmS1HEOfpIkSZLUcQ5+kiRJktRxDn6SJEmS1HHu3EVSo/wKDEmSpPI4+Ek6aLS1J1QHUkmS1DYHP0lqmWtJJUnSsPkZP0mSJEnquKLW+EXEWcBlwCTgo5m5rOUkSTqouXmsJEndUMzgFxGTgH8CXglsA74dEasy8452yyRJpWlrIIV9D6VutitJKlUxgx/wYuDuzLwHICJWAK8FHPwkSRpQqcOyJGk0Shr8jgN+OOH4NuD0llokSdIIlLyWtNRNnUu+zySVKzKz7QYAIuIc4KzMfFt9/C3A6Zl50R6XWwwsro/+NnDnSEOH7yjgR21H7EWpbaV2QbltpXZBuW2ldkG5bXb1rtS2Urug3LZSu6DcNrt6V2pbqV1Qdlu/TszMp+/vQiWt8bsXOGHC8ePr035NZl4OXD6qqFGLiPWZObftjidSalupXVBuW6ldUG5bqV1QbptdvSu1rdQuKLet1C4ot82u3pXaVmoXlN02bCV9ncO3gZkR8VsRcRiwEFjVcpMkSZIkPekVs8YvM3dFxEXAV6i+zuFjmbm55SxJkiRJetIrZvADyMxrgWvb7mhZyZuxltpWaheU21ZqF5TbVmoXlNtmV+9KbSu1C8ptK7ULym2zq3eltpXaBWW3DVUxO3eRJEmSJA1HSZ/xkyRJkiQNgYPfiEXE7ojYEBGbIuJzETGtPv2ZEbEiIr4fEbdExLURcVJ93nUR8VBEfKmUroiYExHrImJzRNweEW8sqO3EiPhOfZ3NEXFBCV0Trnd4RGyLiA8Po6vftgnX2RARQ9mxUp9dz4qIr0bEloi4IyLG2u6KiHkT7qsNEfHziHhd0139tNXn/UP9t78lIj4UEVFI13vqy29qepnRZ88TLluj2snYtyLi7oj4bFQ7HCuh66K6KSPiqH6bhtD1qYi4s76tj0XEoQW1XRERt0X1PPX5iJheQteE2/xQROzot2kYbRHxiYj4z3hs+TankK6IiEsj4q6olm3v6LdrCG03Tri/7ouIlYV0nRmPvR76RkQ8t5Cu+XXXpoi4MiIG+uhZr22xj9ew0eDyv0iZ6c8If4AdEw5/CvgrIIB1wAUTzns+8PL68JnAHwBfKqULOAmYWZ92LLAdmFFI22HAlPq06cBW4Ni2uyYcvwz4NPDhUh7PPa9TWNda4JUTHs9pJXRNOO1pwP8Mo6ufNuClwDepdpI1qb7cGQV0LQDWUH22/Deo9uR8eMt/W0+4bAWuAhbWh5cDby+k6wXAGNUy7aiC7q9X19cN4DOD3F9DaDt8wuH3A0tL6KrPmwv8Kw0sexu+zz4BnDNo0xC6zgc+CRxSH39GKW173O4XgD8poQu4C3heffhC4BNtd1GtdPohcFJ9/N3AW0f5WLKP17A0uPwv8cc1fu26EXguMA94JDOXj5+Rmbdl5o314euBn5XUlZl3Zeb36tPuAx4A9vvFkSNq+2Vm/qI+eQqjWbN9QI9lRJwGHA18dQRNPbW1YL9dEXEKMDkz19Sn78jMh9vu2uPy5wBfHkHXgbYl8BTqN0CAQ4H7C+g6Bfh6Zu7KzP8DbgfOarHnCZetERHAfODz9UlXAk2tzR1omZ+Zt2bm1oZamuy6NmvAzVTfw1tK2//Crx7XqVT/f7TeFRGTgPcC72yop7G2IRq06+3AuzPz0fpyDxTUBlRb9FAtP/pe49dwVwKH14efCtxXQNeRwC8z8676+BrgDxvqOqC2vb2GHfLyvwgOfi2pV2v/PrAROBW4pd2iSj9dEfFiqheZ3y+lLSJOiIjbqd5Vek/9P3arXRFxCPA+4OJhtfTbVntKRKyPiJtiSJst9tF1EvBQRFwdEbdGxHvrF0xtd020kGotx1AdaFtmrgO+RvUO5nbgK5m5pe0u4DbgrIiYFtVmivOAE1rs2ZsjgYcyc1d9fBtwXAFdQ9FkV1SbeL4FuK6ktoj4OPDfwMnAPxbSdRGwKjO3D9ozhDaAS+tN4D4QEVMK6XoO8Mb6eerLETFz0K4G28a9Drh+/A2HArreBlwbEduo/t9cVkDXj4DJETH+Bern0NBzQQOvYYey/C+Jg9/oTY2IDcB64AfAFS33jOurKyKOodpU5fzxd+FKaMvMH2bmbKp3fRZFxNEFdF0IXJuZ24bQsqd+Hs8TM3Mu8EfAByPiOQV0TabaLONi4EXAs4HzCugCfvX3P4vq+0eHpae2+jMcz6Na63IcMD8iXt52V2Z+lerrev6DalBeB+xuq2eEDqauf6ZaqzvoVgSNtmXm+VSbc20BBvlsaSNdEXEs8AYaGEKbbqv9DdWQ/CKqTdn/upCuKcDP6+epjwAfG+C2mm4b9yYGfyOwya6/BF6dmccDH6fa3LnVrnrLgIXAByLiZqo1goM+F5T8GrYoRX2P30FiZ2b+2gelI2Iz1Tsebeq5q96kYTXwd5l5U0lt4zLzvojYRDU8fH5/lx9y1+8AL4+IC6k+q3ZYROzIzKUNd/XTRmbeW/++JyLWUn2eqOm1uL12bQM2ZOY99WVXAi+h+RfP/f6NnQtck5mPNNwzUa9trwduyswd9WW/TPW31/Qmvf38jV0KXFpf9tNUnz9prWcvfgzMiIjJ9bu+xwP3FtDVtEa7IuLvqTb3/9PS2gAyc3dErKDatPLjLXe9gOpNyburLcuYFhF3Z2bfO95osI0JayF/Ua8tHWQrlSYfy23A1fXha+j/cRxGG/WWDC+mWga33hURTween5nfqk/6LIOtjW/yb2wd1esyIuL3qLbuGURTr2GbXv4XxzV+ZbgBmBIRi8dPiIjZQ3qXvhd77YpqL0fXAJ/MzKYHqkHbjo+IqfVpRwC/C9zZdldmvjkzn5WZY1RPpJ8c0tDXc1tEHDG+OU/95PUy4I62u6h2ADKjfgKDatv7ErrGNfHubj/21fYD4BURMbne9O4VVGs6Wu2KiEkRceT4acBshv9Z156XrfW70V/jsRcMi4Avtt01In11RcTbgFcBbxriu+Y9t0XlueOHgbOB77bdlZmrM/OZmTlWPx88PODQ11hbfZlj6t9BteniphK6qD43N68+/AqafeNo0DaolhlfysyfF9L1E+Cp8dhexV9J888F/f6NPaP+PYVqjfLyfV2+6ba9vYYd0fK/XVnAHmYOph/2svcuqs1QrqJaw7KZ6l2I8T0O3Qg8COykesfrVW13AX8MPAJsmPAzp4T7jGrhdjvVZ4puBxaX0LXHZc5jRHv1PMD77KVU28TfVv8eaA9bTd5nEx7PjVR7mzuskK4xqncCDxnW49jnYzkJ+BeqJ/g7gPcX0vWUuucO4Kamlxd9PoZPuGyl2qT4ZuBu4HPUewkuoOsd9fFdVDtp+GghXbvqy48/F1xSwmNJ9eb2N+tlxyaqvf31vSfZJu+zA7ndFh/PGybcZ/8GTC+ka0Z9uY1Um4o/v5T7rD5vLXBWYY/l63nsuX0t8OxCut5L9Rx1J/AXo77P2MdrWBpc/pf4E/V/pCRJkiSpo9zUU5IkSZI6zsFPkiRJkjrOwU+SJEmSOs7BT5IkSZI6zsFPkiRJkjrOwU+SJEmSOs7BT5IkSZI6zsFPkiRJkjru/wGW3Wp2GDdpLwAAAABJRU5ErkJggg==\n", + "image/png": "iVBORw0KGgoAAAANSUhEUgAABJUAAAE/CAYAAAD/i/LxAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAIABJREFUeJzs3X18zfX/x/HHZ+zKbK4tzEWuks21RIXhayk6IfHdVyWR7yQX35L0+36lVCjkoiKSucpcsxPKvhj5klwUJaJLF5FrtjmY7bx+f3zOzjY759gMx3jdb7fP7Vx9Xuf9PidiT6/3+2OICEoppZRSSimllFJK5YWPtyeglFJKKaWUUkoppQoeDZWUUkoppZRSSimlVJ5pqKSUUkoppZRSSiml8kxDJaWUUkoppZRSSimVZxoqKaWUUkoppZRSSqk801BJKaWUUkoppZRSSuWZhkpKKaWUuq0ZhjHTMIy3c3nuF4Zh9LgBc6hiGIYYhlH4er+3i7GaG4ax70aPo5RSSil1w/9io5RSSimVG4Zh/AGEAulZnp4pIi/erDmIyCM3a6wbRUQ2Avd4ex5KKaWUuv1pqKSUUkqpW8ljIrLG25MoqAzDKCwiad6eh1JKKaXuDLr8TSmllFK3PMMwphiGsSTL43cNw1hrmCINwzhsGMb/GYZx0jCMPwzD6O7mfUoYhrHCMIwThmGccdwPy/L6esMwejvuP2sYxv8MwxjrOPd3wzAeyXJuMcMwPjUM46hhGH8ahvG2YRiFHK8VctSdNAzjN6C9h8/2qmEYi694bqJhGJMc93sahrHXMIxkwzB+Mwzjn1nOy/jsrxqG8RcQm/FclnOGGobxq6N+j2EYnbK8drXPWNIwjFjDMI44Xl+e5bUOhmHsNAzjrGEYmw3DqOvuMyqllFLq9qShklJKKaUKgpeBOo4QpDnQC+ghIuJ4/S6gNFAB6AFMMwzD1RIwHyAWqAxUAi4AH3oY935gn+O93wM+NQzDcLw2E0gDqgMNgCigt+O154EOjucbA108jDEfeNQwjGAwAymgKzDP8fpxx3uFAD2B8YZhNMxSfxdQ0vGZ+rh4/1+B5kAx4E1grmEY5XL5GecARYBwoCww3jHHBsAM4J9AKWAqYDUMw9/D51RKKaXUbUZDJaWUUkrdSpY7Ol8yjucBRMQGPA28D8wF+ovI4Stqh4nIJRHZAKzEDGayEZFTIrJERGwikgy8A7T0MJ8DIvKJiKQDs4ByQKhhGKHAo8AgETkvIscxA5e/O+q6AhNE5JCInAZGuRtARA4A3wIZHUStAZuIbHG8vlJEfhXTBiABMyTKYAeGOz77BRfvv0hEjoiIXUQWAD8DTXLxGcsBjwAxInJGRC47xgczvJoqIt+ISLqIzAIuAU09fJdKKaWUus3onkpKKaWUupV0dLenkoh841hKVhZYeMXLZ0TkfJbHB4DyV76HYRhFMMOfdkAJx9PBhmEUcoQqV/ory/g2RwNPUczOIF/gaGZTDz7AIcf98lnuZ8zHk3lANDAb+AeZXUo4lqMNB2o6xigC/JCl9oSIXHT3xoZhPAO8BFRxPFUUsyspN5/xtIiccfG2lYEehmH0z/KcHy6+c6WUUkrdvrRTSSmllFIFgmEY/QB/4Agw5IqXSxiGEZTlcSXHeVd6GfPKaPeLSAjQIuPt8zidQ5idOaVFpLjjCBGRcMfrR4GKV8zHk0VApGN/p044QiXHcrIlwFggVESKA6uumK/ghmEYlYFPgBeBUo763eTu8x4CShqGUdzNa+9k+ezFRaSIiMTl4n2VUkopdZvQUEkppZRStzzDMGoCbwNPYS6DG2IYRv0rTnvTMAw/x55LHTCDmisFY+6jdNYwjJKYHUB5JiJHMZehjTMMI8QwDB/DMKoZhpGxlG4hMMAwjDDDMEoAQ6/yfieA9Zj7Pf0uInsdL/lhBmkngDRH11JUHqYahBk6nQBz028gIg+f8QtgsmODc1/DMDJCuE+AGMMw7ndslh5kGEb7jH2hlFJKKXVn0FBJKaWUUreSzw3DSMlyLDMMozDmPkrvisguEfkZ+D9gTpaNof8CzmB2J32GuQ/QTy7efwIQCJwEtgBf5mOuz2CGPnscYy/G3I8IzNBlNbALc7+kpbl4v3nA38iy9M2x79MAzJDqDObSOGtuJygie4BxwNfAMaAOsCm39ZgB3mXgJ8wNwwc53nc75mbkHzrm9QvwbB7eVymllFK3ASPzoilKKaWUUgWPYRiRwFwRCfP2XJRSSiml7iTaqaSUUkoppZRSSiml8kxDJaWUUkoppZRSSimVZ7r8TSmllFJKKaWUUkrlmXYqKaWUUkoppZRSSqk801BJKaWUUkoppZRSSuVZYW9PID9Kly4tVapU8fY0bqjz588TFBTklXpvjp3fep27zr0gjZ3fep37nTd3/d507gVp7PzW69x17gVp7PzW69zvvLnr93Znzr0g2LFjx0kRKXPVE0WkwB6NGjWS211iYqLX6r05dn7rde7eqde5e6de5+6d+jt17PzW69y9U69z9069zt079Tp379Tr3Ave2Pmt17nf3oDtkotcRpe/KaWUUkoppZRSSqk801BJKaWUUkoppZRSSuWZhkpKKaWUUkoppW57EydOJCIigvDwcCZMmADArl276NevH3Xq1OGxxx4jKSnphtR7c2ydu/fmfico0Bt1u3L58mUOHz7MxYsXvT2V66JYsWLs3bvXK/XeHDu/9QVt7gEBAYSFheHr63vNYyqllFJKKXU1EydO5JNPPkFEeP755xk0aBC7du0iJiaGY8eOER4ezmeffUZISEie6vv164ePjw9VqlS5YfX5mfvu3bv55JNP2Lp1K35+frRr144OHTrQu3dv53vNmDGDMWPG8NZbb13Xem+OrXP33tzvFLddqHT48GGCg4OpUqUKhmF4ezr5lpycTHBwsFfqvTl2fusL0txFhFOnTnH48GHuvvvuax5TKaWUUkrlnquAYufOnbzwwgv4+flRuHBhJk+eTJMmTXJdGxMTw8mTJylevLjbWnf1NyOY8fRD8tixYxERfvvttwIXEORm7nv37uX++++nSJEiALRs2ZKlS5eyf/9+6tWrB0Dbtm15+OGHr3u9N8fWuXtv7neK227528WLFylVqtRtESipO4NhGJQqVeq26a5TSiml1J3D1dKQjGCmfv36NG7cmK1bt+aptmnTpvTu3dtjbX7HzhpQ7Nq1ixUrVvDLL78wZMgQevTowc6dOxkxYgRDhgzJU+3w4cOZPn2621pP9RnByg8//ECnTp0YM2bMda/P+kNy4cKFs/2Q3KJFC8D8IXnJkiUux/ZUn/WH7BtRn9+5R0REsHHjRk6dOoXNZmPVqlUcOnSI8PBwNm3aBMCiRYs4dOjQda/35tg6d+/N/U5x24VKgAZKqsDRX7NKKaWUuhYazOR9bHAfUBiGwfnz5wE4d+4c5cuXz1Ntxt4q7mo91d+MYMbTD8nx8fFAwQwIcjP3e++9l1dffZWoqCjatWtH/fr1KVSoEDNmzCA+Pp5GjRqRnJyMn5/fda/35tg6d+/N/U5xW4ZK3vbOO+8QHh5O3bp1qV+/Pt98881NG/unn36ifv36NGjQgF9//fWGjPHss8+yePFij+e8/vrrJCYmXpfxIiMj2b59+3V5r6xef/111qxZc93fVymllFJ3FlfBTLdu3ejduzf169enSpUq1K9f/7rXazBzbWOD+4BiwoQJTJ06lYoVKzJ48GBGjRqVp9pXXnmFrl27uq31VH8zghlPPyRPnjyZPn36FMiAIDdzB+jVqxc7duzgq6++okSJEtSsWZNatWoxZswYduzYQXR0NNWqVbsh9d4cW+fuvbnfCW7YnkqGYcwAOgDHRSTC8VxJYAFQBfgD6CoiZwyzTWMi8ChgA54VkW9v1NxupK+//poVK1bw7bff4u/vz8mTJ0lNTb1p4y9fvpwuXbrwn//8BzD39vGGESNGeG3s3EhPT2fEiBHenoZSSimlrhNXe8x069aNHTt2ULRoUc6ePUvx4sXZuXNnrmv37dtHSkoKaWlpbmvd7fOyYMEC1q9fT2RkJC+//DLFihVzOe/81Lvb7yMjmAkMDMx1MHNlfW6DmX//+9+cOnWKwMBAVq1aRePGjZkwYQKRkZHExsZit9vZvHlznmoffvhhLl68iK+vr8va/I4N2QOKoKAgZ0AxZcoUXnjhBYYPH87ChQvp1atXjn+E9FQ7fvx4SpUqxfHjx13WeqqfMWMGTz/9NMuWLcNiseQqmLmW+l69etGrVy8A/u///o+wsDBq1apFQkIC69evp3z58qxcudJlraf6MWPGEBkZyf79+29YfX7nfvz4ccqWLcvBgwdZunQpW7Zs4fjx4wDY7XbefvttYmJibki9N8fWuXtv7ncEEbkhB9ACaAjszvLce8BQx/2hwLuO+48CXwAG0BT4JjdjNGrUSK60Z8+eHM/dTEuWLJEOHTq4fK1y5cpy4sQJERHZtm2btGzZUkREhg8fLs8884w89NBDUqlSJVmyZIm88sorEhERIW3atJHU1NQc7/Xdd9/J/fffL3Xq1JGOHTvK6dOnZeXKlRIaGirly5eXyMhIERFJSkpy1qxevVqaNm0qDRo0kC5dukhycrKcPXtWatasKT/99JOIiPz973+XadOmiYhIUFCQDBo0SGrXri2tW7eW48ePi4hIjx49ZNGiRSIi8uabb0rjxo0lPDxcnn/+ebHb7c5zZs+e7fzcr7/+ujRo0EAiIiJk7969IiKSkpIiPXv2lPvuu0/q168vy5cvFxERm80m3bp1k5o1a0rHjh2lSZMmsm3btmyf/4svvpAuXbo4HycmJkr79u1FRCQmJkYaNGggtWvXltdffz3b9z9kyBBp0KCBxMXFefwc586dExGRli1bypAhQ+S+++6TGjVqyFdffSUiImlpafLyyy9LeHi41KlTRyZNmiQiItu3b5cWLVpI/fr1JSoqSo4cOSIiIhMnTpR7771X6tSpI926dXP56yPj125iYqLL13PLm/U6d+/U69y9U3+njp3fep27d+pvxtgTJkyQ8PBwqV27towfP975/KRJk6RixYpSu3ZteeWVV/JU37VrV6lWrZrUq1dPKleuLPXq1XNZ+8MPP0h4eLicP39eLl++LG3atJGff/4529xfeuklefPNN/NUm1HvrlZEZOHChfLcc885H48YMULeffddZ63dbpewsDDZv3//da/fs2eP1KhRQ06ePCnnz5+Xpk2byosvvih79uyRihUrSpkyZaR8+fLyxx9/uBzbU33ZsmUlLCzMY72IyPTp06Vhw4bSvHlziYmJkYEDB0r//v3ljTfeEBGRBQsWSJs2bfJUu3jxYklMTPRYm9+xr/Taa6/JRx99JCEhIbJu3ToREbHb7RIcHJynWrvd7vzvlpvarPUimb9e9+3bJ/fdd98NqT927JiIiBw4cEDuueceOXPmjPO5tWvXytNPPy2ffvqp2/Hc1ScmJkp6evoNrc/v3B966CG59957pW7durJmzRoRMf/fExYWJjVq1JBXX33V+fPM9a735tg6d+/NvSADtktusp/cnHStB2ZHUtZQaR9QznG/HLDPcX8qEO3qPE/HrRgqJScnS7169aRGjRrSt29fWb9+vfM1T6HSgw8+KKmpqbJz504JDAyUVatWiYhIhw4dZNmyZTnGqVOnjvO9hw0bJgMHDnS+15gxY5znZYRKJ06ckObNm0tKSoqIiIwePdr5l6OEhARp2rSpxMXFycMPP+ysBWTu3LkiYoYu/fr1E5HsodKpU6ec5z/11FNitVqd52QNlTJCl48++kh69eolIuYfgHPmzBERkTNnzkiNGjUkJSVFxo0bJz179pSkpCTZtWuXFCpUKEeodPnyZalYsaLz88TExDjf69SpU5KUlCRpaWnSsmVL2bVrl3MeGX9Ju9rnWLBggYiYodJLL70kIiIrV650/sVk8uTJ8sQTT8jly5ed9ampqdKsWTM5fvy4JCUlyfz586Vnz54iIlKuXDm5ePGi87O6oqGSzr0gjp3fep17wRs7v/U6d+/U57bWXTDUv39/ueeee9wGQ+6CmXXr1kmbNm1k9erVIpL5A2Fu67PO/UYFO55qRUTWrVvnMRRyF8xkjL1hwwZx9XfW61Wvwcy1j+0qoKhVq5bz1/6aNWukYcOGeapNTEyUxMREj7Xu6m9WMOPuh+QaNWpIWFhYgQwIcjt3dwrC/59vxXqd++0tt6HSDVv+5kaoiBx13P8LCHXcrwBkXfR72PHcUfJj0CBw0aacL/Xrg2OtuytFixZlx44dbNy4kcTERLp168bo0aN59tlnPb7tI488gq+vL3Xq1CE9PZ127doBULt2bf74449s5547d46zZ8/SsmVLAHr06MGTTz7p8f23bNnCnj17ePDBBwFITU2lWbNmgLmR36JFi+jXrx+7du1y1vj4+NCtWzcAnnrqKTp37pzjfRMTE3nvvfew2WycPn2a8PBwHnvssRznZdQ2atSIpUuXApCQkIDVamXs2LGAeeW+gwcP8tVXXzFgwAAA6tatS926dXO8X+HChWnXrh2ff/45Xbp0YeXKlbz33nsALFy4kI8//hi73c7Ro0fZs2eP8z0yPs/VPkf16tVdzj3jv8WaNWuIiYmhcGHzt1DJkiXZvXs3u3fvpm3bttjtdkSEcuXKOT9H9+7d6dixIx07dnQ5B6WUUgpcL8UCWLp0KTExMRQqVIj27ds7/9y7XrXulmEdOnSITZs2sWvXLvz9/Z1t/1m5W0a1fft2hg4d6vzzsmzZsi4/s7v6jL18RISFCxeybt06l/XulkJl2LhxI6GhodSoUSPPtd9//73bWnC/FClDXFwc0dHRLmuvR72r5UCvvfYaEydOZMOGDTz55JP07t07z/XLly8HuGq9q6UhH3zwAbt27aJVq1asW7fO7XfnrnbDhg0AHmvzOzbAE088walTp/D19eWjjz6iePHifPLJJzz33HPExsYSEBDAtGnT8lQ7cOBAzp49S+nSpd3WuqufOHEiY8eOJTAwkM6dO9OzZ88bUr9x48Yczw0cOJCBAwc6l1x64q6+Xr16V63Nb31+5+6K1WolNjaWpKQkLBbLTa335tj5rc91rdnJAnZ7tlvr558zc/Zsko4dw/LII5nnZT3Xw3PW1auZvXAhST//jOVvf8t5vof3siYmMnf5cpJ27sTSsmXOmqzzdnFYN25k7urVJL300jV977ebmx0qOYmIGIYhea0zDKMP0AcgNDSU9evXZ3u9WLFizr18/FNT8UlPz/9ks7CnpnIpF3sFNWrUiEaNGlG9enXmzZvHE088gY+PD0lJSfj7+3P69GnS09NJTk7m0qVL+Pr6Ouft6+tLSkoKgHNNe9b9iZKTkxER53MpKSnY7XaX75Uxhs1mc64vzyo5ORm73c7u3bsJDAzk8OHD2dbsJycnU7hwYVJSUpxjXr58mQsXLnDixAn69u3Lhg0bCAsLY+TIkZw7d855TsacRITLly+TnJzMxYsXuXTpEsnJyaSnpzN79uwcf9inpaVhs9mcc7fb7Tm+AwCLxcK0adMICAhwbl75ww8/8N5777Fu3TpKlSpFTEwMZ8+edc4j6/fm6XNcuHDBOce0tDSSk5O5cOGC83NkzDHrnFJSUqhVqxZr164lPT3d+RfC5ORk5s+fz6ZNm/jiiy9466232LJli/Mv2BkuXrzI+vXrSUlJyfHrOi+8Wa9z9069zt079Xfq2PmtvxPmvnjxYlauXImI0KFDB7p06cLMmTP5/PPPKVGiBAC9e/emadOmOWp///13JkyYwJQpU/D19WXIkCGUKVOG48eP89VXXzFp0iT8/Pw4c+ZMjrnkpxZg/fr1VKpUyXmlr0qVKjFmzBj27dtHp06d+Prrr53n7tmzJ1utzWYjISGB+Ph4/P39mT9/Pvfccw/ff/89c+bMYcuWLQQEBNC3b19q1aqVY2x39Rl/Lk6aNIkiRYrw559/8ueff7r83h9//HGaNWtGYGAgVapU4ejRo876Tz75hCZNmrj97+euFmD16tUeawGqVavGuHHjAPjkk08oU6YM69ev59y5cyxYsICpU6fesPozZ85QokQJjh07xty5c5k8ebIzYKhevTrvv/8+d911V57rt2zZgmEY7Nixw2P9gAEDSEpKolChQrzwwgvOK8dNnDiRDz74AD8/PwYNGuSy3l1tnz59uHz5MgEBAW5r8zs2wFtvvZXtccZ577//PkWLFgXMv8e5qndXO27cOFJSUihatKjbWnf19erVY8qUKc6xM8K13NanpKRQr149mjVrxoMPPuix3pVNmzbx9ddfs2nTJuc/RN+MWmf95s1s/uorHrr/fgy7HSM9HdLTnfcNuz3781me++rbb9n0/fd8FxFBZHg4ht2eWWu3Q0ZtxuP0dAwREn/6icHLl3MxLY0vV6zg/fbtaVO1qvN87HYMR5BgXPmc3c6aAwcY+NVXXEhPZ/Xnn/PBAw/Qtnx58zxHjbvb1ceO8cLOnVyw21lttfJxeDjtSpXKPA9y1GW9/+WZM/T+/XcuiLA6Pp5PK1bk0eBgl/XZxnY8XnX+PM+eOoVNhITly5lZogSPZezDdcXnvvK5FampPHXpEjYgYflyPitcmMd8fLLXZIzlghWIxtxMefWKFcQBeYlmstZ/kZCQp/qstSsTE/M1tnXLFoYNG3ZNv+ZvJzc7VDpmGEY5ETlqGEY5IOOfuv4EKmY5L8zxXA4iMg2YBtC4cWO5Mo3eu3cvwcHB5oPJk6/n3J08XTBw3759+Pj4OIOSffv2Ua1aNYKDg6latSr79u2jatWqfPHFFxQqVIjg4GD8/f3x9/fPnDc47xuG4fK1kiVLsnPnTpo3b86yZcto1aqVy/dKTk4mODiYVq1aMXjwYI4dO0b16tU5f/48f/75JzVr1mTcuHFERETw7rvv8uKLL/L111/j6+uL3W5n9erV/P3vf8dqtdKiRQuCg4Px9fUlMDAQX19fDMOgSpUqpKenO7uGMs7x8fEhODgYwzAoWrQowcHBBAUFOT/3I488wowZM/jggw8wDIPvvvuOBg0a0Lp1a5YvX07Lli05cOAAu3fvJigoKNt3AGZ314svvshnn33GU089RXBwMHa7neDgYEqUKIHNZmPNmjW0bds2xzwAj5/jscceIzg4mEKFCjnHvnTpEoZhOOc+Z84c2rdvT+HChTl9+jQNGzbk9OnT7N69m4iICAICAti/fz/33nsvBw8epH379kRFRVG5cmXn+2QVEBBAgwYNrvlfWTJ4s17n7p16nbt36u/UsfNbXxDm7qrb54033uCjjz6iQoUKAIwcOZJHH300R+3u3btZv349P/74o7PbJywsjCpVqtC1a1emTJnicewTJ07QunVrZ8dyx44d+fPPP9m+fTtPP/00UVFRN6QWzH+smzdvHnXq1CEwMJDXXnuNxo0bc/bsWX7++Wc+//xzAgICGDt2LPfdd1+O+rS0NEaMGEFQUBAtWrTA39+f/fv3ExISwscff0xQUBDdunXjt99+w7xGy9XrIyMjWb9+PT/99BN9+vTx+N8uMjKSMWPGAJkdN5GRkaxdu5YtW7awY8cOwsLC8lSblpZG586dmTZtmttayN4xs2PHDrZs2ULx4sV57733qFOnzlU7yvNT37x5c2fHSmxsLG3atKF48eLZOmbi4uJo1KhRnuqfe+455s+fT0BAgMf677//PsdzkZGR1KlT56q/19zV9u/fP1e/V/MztjsZ3Rc9e/bMV+fHtdRbrVZiZ8yg59NPY2nXDtLS4PJl8zbrfRfPWdevZ+Rbb2G7dInENWuI+9e/sDRs6P49rqzfs4eRy5ZhS0sjcfVq4qKisFSsaJ6Tnp69JuNwPG/96y9G/vgjNrudxC++IK5KFSxFi+Y4z91ja2oqI+12bEDiqlXX9EP+MMwf8r/cvDlP9ZOBi477F9PS+DE+nr55GHsXcMFx/0J6Ot9t3kwvf3/w8bnqsf3sWS44ApsLdjtbDx7kmcuXzdcNI/v5WR8XLgw+Pmy22bjgCG0uiLApPZ3ocuVynu/qvmGw4bvvsJ08CY7vbkPZsjx5//0eazKeW7thAzbH7z8bsCYigo5RUZ7rstwmfP45ti1bnPUJDz6IpXPn7OdmPa54LmH+fGyOK43bgIQ2bbA8/fRV6zAMEj79FNvq1Zm1jzyC5Z//zFkLOZ8zDBI++gib1QrApUuXOHr0aL7+XnI7uNmhkhXoAYx23MZnef5FwzDmA/cD57IskytQUlJS6N+/P2fPnqVw4cJUr17d2fo6fPhwevXqxbBhw/L9C2/WrFnExMRgs9moWrVqjg6kK5UpU4aZM2cSHR3NpUuXAHj77bcREaZPn87WrVsJDg6mRYsWvP3227z55psEBQWxdetW3n77bcqWLcuCBQuyvWfx4sV5/vnniYiI4K677nL5F0xPhg0bxqBBg6hbty52u527776bFStW0LdvX3r27Enjxo0JDw93+5eYQoUK0aFDB2bOnMmsWbMAqFevHg0aNKBRo0ZUrlw5V6nxtXyO3r17s3//furWrYuvry/PP/88L774IosXL2bAgAGcOXMGu93OoEGDqFmzJk899RTnzp1DRBgwYADFixfP03ellFIqb641GHK3BAygS5cuVw2F3C3jyi13S7H2799PYGAg999/v9tgJz+14H4ZVkbH7pYtW9i2bRtdu3Z1GQy5Wkb1008/0blzZwzDoEmTJvj4+HDy5EnKlCmTY3xX9WB2XS9dupQdO3Z4/O5cLYUC2LFjB7Vq1cp1KJS1ds2aNVSsWNFjLbheigTm8i1PS9euR72r5UAPPfQQO3bsyFUw465+2rRp+Q5mbtqSHLsdUlMhNRWr1crszz4j6ddfzWUtly9nHqmpHh9bt20jeupUbKmpJKxcSdw//oGlRo3sNRlHRiCTtf7QIaK3b8dmt5NgtRJXsyaWkBDX51/x2HrpEtFpaeYPufHxeQ5WEjB/QAawXbpEwujR116flkbC2rVYiheHQoXMECPrccVzCSdPYnOEIza7nYT0dCxVq7o819XjhHXrsDl+f9uAhKZNzYAh6/mFCmWL7dfvAAAgAElEQVS/n+W5hBkzsK1alVn/+ONYBg7MrMk4fHxyPBeVmEjsv/6F7eJFigQGEjV5MrRrlxmMZNRlPbI8F/X558T+4x/YbDaKFClCVFwc5PLXfJTVSmx0dGbt7Nm5rnVZP3ly/urfe+/a5/7mm3kbOyIie/2QIXmrv+suYr/5JrN+wIDcz93fn9iNGzNrY2LyNvbFi8SuWZNZf5V/sLkj5GbjpWs5gDjMPZEuY+6R1AsoBawFfgbWACUd5xrAR8CvwA9A49yMcStu1H29Zb16282uDwoK8trY+a335tjXWq8bdevcC+LY+a3XuRe8sfNbn98NoxMTE2Xs2LECOC9+cSV3mz4PHz5cYmJiPI7rbtPm3NSKuN90efjw4RIaGip16tSRnj17yunTp92+h6uNk8PDw6VTp05it9vlm2++kSpVqrjcjDY/tVfK2Pj44Ycflvfff9/5fNWqVZ1XhM3K1cbBU6ZMkWHDhkliYqLs27dPwsLC3I7tql5E5N1335UWLVpcdb6uNvAVEXn44YdlypQp11Tbo0cP+de//nXVsd3x5u+1+Ph46dixo8THx1977bJlIhcuiJw7J3L8uMihQyK//iqyZ4/Izp0i33wj8tVXImvWiKxcKbJ0qcQPHixF/PwEkCJ+fhLfq5fIu++KvPWWyLBhIq++KvLSSyIvvijSp4/Is8+K/OMfIl26iFgsEt+woRTx8THrfXwkvmpVkfBwkRo1RCpXFilfXqR0aZGQEJGAAJFChZw7ncSDFAGz1vHY/a4oOY9+jtqMo1/W1wsVMscLDhYpWVIkNFSkQgWRKlXMudWuLf1KlcpeX6GCyMMPi3ToINKxo8iTT5qftUcPkd69RWJiRPr3F3npJenXsGH22mbNRN5/X2TSJJHJk0U++UQkNlZk7lyR+fNFFi8WiY83v/eEBIl/6y0p4u9vfvaAAIkfP15k926Rn34S+eUXkQMHRP78U+TYMZFTp0SSkkRsNpHUVBG7XeLj46VIkSJmfZEiefp1k5/aW6X+Wn+v5Lfem2Pnt17nfu31BQXe3qhbRNz9s0obF+cK0O9GzUUppZRSN5e7DaPB3HNk8ODBnDhxgtKlS+eoddctVL16dY4fP05CQgKVKlVyO3Z+uoXcdfuUKlWKZcuWsWnTJho3bsy4ceOc+yNl5a7bp2/fvjRv3pxWrVoxbNgwXn75ZWbMmOFyDu46fpo3b37Vjp/81ILrjh0fHx82b94MwP79+0lNTXX5381Vt81zzz3Hc889x5w5cyhRogSzZs1yufTNXT3kvtvHVccNwNChQ6+pWwdg5syZ17wH1zV369jtcOkS1mXLmD1nDkm//IKleXO4dMk8Ll7MfuviOeuuXUQvXmx2nKxYQVxkJJbQ0JznZz0yapOTiU5Odu6Vkq+OmdRUEj79NHu9n5/HI+HIkexdL3Y7llq1wNc3+7kuHiesXIlt0yazFkj429+w9OplnptxfsZ9F4+jNmwgtn9/bBcumF0rs2bB4487lxtdTX46R3LUDh2ap+4JS9u2xNWte81L7ywWC3FxcddUn5/aW6U+JCTkmjvz8lPvzbHzW69zv/b6243XNupWt76jRwvkCkSllFLXiatgaNiwYcybN4/g4GDKli3LzJkzKV++fLa6GxUKDRkyhI8++ohJkybx+OOPu63PTzB0o0Kh0NBQ9u7di4+PD88//7xzSZ0r+Ql28lML7oOhJUuWEBERgZ+fn9tgyFUw4+fnx9y5c695GRbkLhRy55qCHRFzSdKFC1iXL2f2vHkk/fgjlgcegAsXzPAl662L56w//kj0F1+Yoc7nnxPXuDGWEiVyFwpdvpx9E9ovv8xfsJOWRsLXX5uhkr9/5hEQAMWKZX/s70/CN99g273brMWxFKlLl8zz/Pxy3s/yXNT//kfsK69kLieaPt0MZvz8zHDGTaiYIUe4MnFi7oOZ8PDstf375y2YqV6duDJlCmQwk/Eed+oP6fpDvlLeo6GSUkopdZu61lAI3AdDr7zyCm3atCEyMpJJkyYxYsQIPv7442y1NyoUio+Pp3Tp0tSrV8/j585vMHQjQqGs/1CzbNkyIiIi3NbnJ9jJTy24D4b+/e9/e2V/nYyNi5NOnMDSurUZ3NhsmWGOh/vWXbuIXro0M9h54AEspUrlDIJc3bfbr/3qQr6+ZqiTlgaALT2dhJ9/xlKtmhncBAVBqVLZgpwrbxNWr8bmuNqeDUho2xZLTEz2cz3cj1q9mtju3TPDlXnzrr1j5rXX8hbM1K9PXKVKGsx4IZhRSilvuC1DJRFx+5clpW5F4uZym0qpO5u3QiHwHAxlOH/+vMs/b29EKHTp0iVGjhzJsGHDcvXd5ScYuhGhUP/+/dm0aRNFixalSpUqTJ061W19foKdmx4KpaWZYY7NBufPZ7+12bBu2ED0pEmZGx8/8QSWSpUyazKOjFAoy2E9ezZzGVZ+Ny5OTyfh+++xVK4MgYHmUbJk5v2AgBz3E1atyr6U6tFHzc1/M851dRsQYG7+e2UwExubt41gGzTIXv/ii3kLdjp21GBGgxmllLopbrtQKSAggFOnTlGqVCkNllSBICKcOnWKgIAAb09FKXUL8WYoBO6DIYDp06fz9NNPU6xYMRIdl/TN6kaEQqGhoSxfvpzevXsTEBDA4cOHadiwIVu3buWuu+7KUZ+fYOhGhEJz5szJ1RIwd6652yctDeuiRcyePZukffuwNG1qhj5ZA6CsxxXPWX/9NfNqVvHxxN11FxbDyAx/UlM9Dp8t2Ll8mYT587H4+0ORIq6PkiWd9xO2bcu+DKt5c/OS0UWKmCFOxm3W+1mei/ryy+xXZcrrlZWuvDrRP/8JubzKj7f3iMl4Dw1mlFJK3Wi3XagUFhbG4cOHOXHihLencl1cvHgxX2FDfuq9OXZ+6wva3AMCAq56uWKlVMHkqtvolVdeYcGCBRQvXpxq1aoRGxvr3JQ4gzdDIXC/hAygd+/ezJ07l1GjRvHhhx/y5ptv5qi/EaHQwIEDncFMlSpV2L59e572BsptMHTTQ6G0tMwgJyUlx33rxo1ET5mS2e3z6KNYypfPGQi5OKypqXnbm6dQIXN5luNIOHMmc9NkERKCgszLtGeEQEFB2W+vuB/1zTfEDhmSufHxvHnQsWOuvrMc3T6DB+etW+fxx+/YzX+VUkqpm+W2C5V8fX25++67vT2N62b9+vU0aNDAK/XeHDu/9QV57kqpW4urUGjRokW88sorHDx4kK1btzrDmiu56zZq27Yt7dq1o02bNrz66quMGjWKd999N1utt0MhcL2ELKvu3bvz6KOPuqy/EaFQXlzvYMgZCp07h+XhhyE52fORlOS8b927l+hNm8wlWFYrcRUqmN0+GcHRpUsex87R7bNiBZaSJbOFPwQFQblyOZ5LSEzEtnWrWQsktGuHZdCgzHMyQqCMw9c320bKOYKdcePyFuzcd98176/j7W6d61GvlFJK3e5uu1BJKaWUul7chUIRERGMGDHC7dW/MnjqNsq4THnTpk1ZvHhxjlpvh0LgOhj6+eefna/Hx8dTq1Ytl7U3IhTK6o8//sjT+yGCdfFiZs+YQdLevVgaNzaDn6QkOHcu837Ww/G89dAhog8eNDt18nKJ9cBACA4m4eJFbOnpgOMS6X5+WB56CIoWNYOcq9xGbdlC7MCBZrdPkSJExcVd+6bLffvCww/n+mvzdrCjoY5SSil1a9NQSSml1G3NXafRG2+8wd69ez12GnkKhY4dO3bVsT11G2WYMWMG3bp1c1nvzVAIXAdDvXr14ttvvyU4OJjKlSu73M8JbkAoBFiXL2fm9Okk/fyzGQqdOQNnz5q3V96/4rH19Gmi09KuvgwsMBBCQrIdCT4+2BwXVHBeYr17dwgOznmEhJi3RYual0/HRbDz/vt56/apU4e40NAC2+2jlFJKqduXhkpKKaVuea6CodOnTzN48GDOnTtHlSpVWLhwISVKlMhW56nTaOnSpW7DnAy5CYU88dRtBPDOO+9QuHBhunfv7rLem6EQuA6GlixZkr8lZDNmkHTkCJZmzbIHQFc5rH/9RfT589iA1StXug6FfHygeHEoUSLzqFQJihcnYft2bN9+CziCofbtsbz6ao4ACV/fHPPO9yXWtdtHKaWUUrcpDZWUUkrd0twFQ9OmTaNhw4ZMmzaN0aNHM3r06Bz7EuVms2tPrhYK5Ya7bqMvv/ySDRs2sHbtWrebbd9qoRBk2VvozBkszZvD6dO5OqwHDxJ99OjVLw9fuHD2UKh0aahRg4Qffsh+JbB27bAMGZL93ODgbPsBZZUjGOrTB5o3z9Vn9nYopJRSSil1q9JQSSml1A03fvx4pk+fjmEY1KlTh9jYWDZv3kxMTAx+fn40atSITz/9lMKFc/6x5C4Yio+PZ+TIkQD06NGDyMjIPG12nVtXW4J2Na66jb788kvmz5/Ptm3bnJ/LlRsaCmVchUzE3DD61Kmcx8mT2R5bf/2V6F9/zd3eQsWLm5eHdxwJQUGZm00DCa1bY+nfP3soVKKEuZeQi2DI5d5ArVrl+nN7+0peSimllFK3Iw2VlFJK3VB//vknkyZNYs+ePQQGBtK1a1fmzZvH8OHDGTlyJE8//TSvv/46s2bNcoY3WbkLho4dO0apUqUAuOuuu1zucXQ9Oo1chUJ54arb6MUXX8Rms9G2bVvA3KzbU8dRVh4vTS8CNpsZBrk4rNu3E/3f/5pXIYuPJ65ECSwpKZCa6n7AYsWgVCkoVYqE1NTsews1b47ln//MFh5RsqQZKF3xPecIhQYOLFBLyJRSSimlVE4aKimllLoqd51GgwcP5syZM7Ro0cJtpxFAWloaFy5cwNfXF5vNRlBQEH5+flSsWBGAtm3bMmrUKJehUm6CIcMw3C4hy2+nkatQaNmyZfTp04ekpCTat29P/fr1Wb16tct6V91Gv/zyS+66jUTMzaaPH4fjx7GuXEn0+PHYUlNJWLGCuGbNsPj7Zw+PLl50/V4+PiT4+WVehUyEhNBQLL16mUvMHMFRtqNkyWx7DOUIhgYPznUwpKGQUkoppdTtR0MlpZRSHnnqNFq7di1Hjhxh3bp1bjuNKlSowODBg6lUqRKBgYFERUXRtWtXhgwZwr59+4iMjGTx4sUcOnTI7RxcBUOhoaGcOnUKgKNHj1K2bFmXtfntNHIVCnXq1IkSJUpcW7hx6RLWuXP5bM4cklq0wHL33c7QyOWRluYsTYDMJWRpaSR8/z2W2rWhYkVo0MAMh9wdxYsTtWJF9lBo9Oib2i2koZBSSiml1O1FQyWllLpDuOo22rRpE3379qVIkSIULVqUmTNnUr169Ry17jqNatasyZEjRzx2Gp05c4b4+Hh+//13ihcvzpNPPslnn33G/Pnz+ec//8mnn35KVFSUx2VproKh33//ndWrV/PEE08wa9YsHn/8cZe17jqN+vfvz/Hjx6/aaeSKy32Jzp6Fv/6Co0fd3x49ivXMGaIxw6EVGzZk7ksUFARly5pHxYrQqFHmY8cRtWcPsUOHYrtwwQyFZs/WJWRKKaWUUsprNFRSSqk7gKtuo/nz5zNy5Ej+/e9/06NHDyZPnszbb7/NzJkzs9V66jTavn07gMdOozVr1nD33XdTpkwZADp37szmzZt56qmnmDRpEpGRkSQkJLB//36383cVDA0dOpS2bdtSo0YNKleuzMKFC13Wuus06tSp09WXoImYl7Q/cgT+/BOOHMG6di3RCxaYnUJWK3GlSmFJSoJLl3LWBwRAuXLmUasWtGplXtr+m28Ax75EzzyDZfJkM1S6Csvf/kZclSoaCimllFJKqVuChkpKKVVAuOo0atu2LUePHqVo0aIcP36cJk2asHz5cpf1V3YblS9fHsMwOH/+PADnzp2jfPnyOeo8dRr961//4uTJkzzxxBNuO40qVarEli1bsNlsBAYGsnbtWho3bszx48cBuHTpEu+++y7//ve/3X52V8FQqVKleP/996/9KmgLFjDnk09I+t//zCVoR45kHo4AiSNHcoRF2Zag2e0klCyJ5dlnzeDorruy34aE5LiSWY59iZ54IleBUgYNhZRSSiml1K1CQyWllCoA3HUabdy40dlt88QTT7hdAuaq2ygqKorp06fToUMHRo4cSUhIiMv9hjx1GmWMn5qa6rbT6P7776dLly40bNiQwoUL06BBA/r06cN//vMfFi5cSEBAAH379qV169Z5+k7cXgUtLQ2OHTODoYwjIyhyHNYDB4i+cAEbsGrt2uxL0CpUMI8HHoDy5TOPChWgfHmiduwgtkePzFDovfdu+hI0pZRSSimlbgUaKiml1E3iqtPI39+f//znP8yePZuiRYvSt29fBgwY4LLeVadRhqSkJNatW0dsbKzLWlfdRnPnzmXp0qWMGjWKF154gTFjxvDSSy8xffr0bLWeOo3Kli1LamrqVTuN3nzzTd58881sz40ZM4b27dvnrePGZjNDoQULiH7rrcyroDVqhMVuN0Ojv/4Cuz17XeHCZudQhQoQHm5eBe377823BBK6d8cyZQoEB191Cpa77ybOz0+XoCmllFJKqTuehkpKKXUTuOs0EhEOHTrErFmzaN26tXNJ2JXcdRplWL58OW3atCEkJMRlvatuo02bNrFr1y5niNWtWzfatWuXo9ZTp9GKFSs4f/48L730Uv47jc6fh8OHM49Dh3LeP30acHEVtP37sdx3H0REZHYaZRzly5sbXfv4OMfOsQSta9dcBUoZNBRSSimllFJKQyWllLppXHUa/ec//2HevHkcPnwYgLJly7qsdddp9NRTTwEQFxdH79693Y7trtto0aJFzg22//vf/3Lvvfe6rHfXaTRmzJirb3YNkJpqdhEdPAgHD2JdvTpzs+v4eOKKFMHi2NspmzJlICwMKleGBx8071esSNSBA8S+8w62ixfNUGjmTF2CppRSSiml1E2moZJSSuWBqyVsMTExrF69mtDQUABmzpxJ/fr1s9W56zSKjo5mwYIFzJ49m6pVqzJp0iRq1KiRY1xP+xqdO3eOrVu3smzZMrfzdtdtFBYWxssvv8zYsWMpUaIEM2bMyNP34ew2OngQS3i4MzTi4EGzuyjj/l9/mVdSc8jWaSRCwt13Y+neHSpWNIOjsDCzyyggwOW4FiCubl1dgqaUUkoppZQXaaiklFK55G4JG0BMTAzDhw93W+uu0+jSpUsEBAQwdepUTp8+zXPPPefySmfuOo0ANmzYQIcOHQhwE8BkcNVt1KlTJ0qUKOE5WLl40Vx6dvAgHDiQ2W20fTvRP/xghkLLl2dudg0QGGgGRJUqwSOPmLdZjqidO4l99tnM5WfvvJOnTiPQUEgppZRSSilv01BJKaXywNNm2Z646zQKCwujc+fOHDhwgE6dOtGzZ0+X9e46jQDWrVvH6NGjr+nzWOPjmTVtGknffYfl7rtzBEccOGBeSe1K5cqRYLdjc3Qf2YCERx7B8vbbZnBUqhQYhttxLTVqEOfvr8vPlFJKKaWUKsA0VFJK3VFcLV/L6PAZMGAAM2bMICUlxWWtuyVs8+bN49NPP2XJkiW0adOG0aNH4+/vn63WXadRSEgIiYmJVK1alQ0bNlCzZk23c3fVaQQwYcIEz906Fy7AH3/Ab79lHr//jnXnTqIPHMAGfLlqVWanUUCAuYdRpUrQoYN5m/G4UiVzaZq/f87NrmNioGFDj99/VtpppJRSSimlVMGmoZJS6o7hbvnas88+y759+zhz5ozHendL2EaNGsVPP/3EAw88QJ8+fXj33Xd5/fXXs9W66zS6cOEC3bt3Z+/evYSGhjJ9+vQ8fSar1UrsjBkk/f47lurVs4VGzvtHj2YvKlIEqlYlwTAy9zUCErp2xfLhh1C6tMcuowy62bVSSimllFJ3Ng2VlFIFiqtOo379+rF9+3ZSUlJo0KABM2fOpGjRoi7rXS1fS09P5+OPP2bVqlUeN7v2tFn2vn378Pf3p2fPnowdO9ZlvatOI39/f1auXHn1K6iJmMvQ9u83j59/xrphA9Fbt5p7GsXHZ3YaGYbZTVS1KrRrZ95WrQp3323eli0LhpGz06h7d/Nqa3mg3UZKKaWUUkrduTRUUkoVGO46jcaPH09ISAjr16/HarXy4YcfMnTo0Bz17pavTZw4kQceeIBy5cp5HN/dErajjk4gEWH58uVERETk6XM5r6CWlISlZctswZHz/v79kJycWeTrS0LRotn3NHrsMSzjxplL1K5YfueKdhoppZRSSiml8kNDJaVUgeKq0ygkJAQwQ50LFy5guFm65Wr52uzZs1m0aJHLvYqu5G4J2yOPPMIff/xBkSJFqF+/Ph9//LHnNxKBQ4fgxx+xLlpE9OzZ2NLTc15BzTDMvYxq1oQePczbmjWhRg2oXJmolSuzdxr17m2+lgfaaaSUUkoppZS6VhoqKaUKDHedRgA9e/YkPj6eevXqMW7cOJf1rpavDR8+3LmvUUBAADabjerVq/PLL7+4fA9XS9jWrVvnevmaiHkFtT174Mcfs986NgNPgOz7GjVrhmXIEDM8qlrV3DTbDe00UkoppZRSSnmTj7cnoJS684wfP57w8HAiIiKIjo7m4sWLdO/enWeeeYaIiAiee+45Ll++nKMua6fRkSNHOH/+PHPnzgUgNjaWRYsWce+997JgwQKX42ZdviYirF27lpdeeom//vqL+fPnO7uN3AVK7ljj4/lw1Cisr78OY8dCz55w//0QEgJVqsCjj8Irr8CqVVC0KDz7LEyZAhs2EDVnDkWKFAEwu42GDoWOHaF2bY+BUgaLxcLAgQM1UFJKKaWUUkrddNqppJS6qdzti9S9e3d69+5NZGQk//jHP5g+fTp9+/bNVutpo2yAQoUK8fe//5333nuPnj175hjb3fK1XBMxr6T244+wezfs3o31f/8jev9+bMAXCQnm8rW77jJDoZ49zdvwcPO2VKkcb2kB4kJCtNtIKaWUUkopVeBoqKSUuulc7YsUFRXF+vXrMQyDJk2acPjw4Rx17jbK/uWXX6hevToigtVqpVatWm7HdrV8LetG2SmOZWmcPJktPHLeP3Mms7BsWRJ8fbMvX+vVC8v06Xn6PnRfI6WUUkoppVRBpKGSUirPxo8fz/Tp0zEMgzp16hAbG8v06dMZNWoUR44c4cSJE5QuXdplrad9kQAuX77MnDlzmDhxYo5ad51GrVu3dgZCDzzwAFOmTMn1Z7EuWED0s89iu3iRhBUriKtdG8uxY3DsWOZJxYub3UZdu0JEhHk/PBzKliXKas2+WbZ2GimllFJKKaXuEBoqKaXyxN3ytQcffJBx48YxdOhQj/WursA2d+5c5xK2F154gRYtWtC8eXOX9a46jTZt2gTgerPsDGlp8PPP8MMP8P335u0PP5Dw+++ZnUZpaST89ReW9u3N8CgjQCpf3rwSmwu6WbZSSimllFLqTqWhklIqz1wtX2vQoAHnzp27aq2nfZFmzZrFmTNnmDp1ap7n5FzCdu4clvvuc4ZGzhBp7164dMk8uVAh8+pqTZoQ9cADxC5ejO3SJbPT6JNPII/BkC5fU0oppZRSSt2JNFRSSuXJ1ZavXY27fZGmT5/Otm3b2LZtGz4+ubwwpQj88gvWKVOI/uADs9No+XJzs+yMc8qXhzp14G9/g7p1zfu1ajmvrGYB4rp21U4jpZRSSimllMojDZWUugPt27ePbt26OR//9ttvjBgxglatWtGvXz98fHyoUqUKn332GSEhIdlqr7Z87Wrc7YsUFBREaGgozZo1A8wOptdffz178cmTsHUrfPONebt1K5w+TQJk3yy7eXMsI0aYAZKLK65dSTuNlFJKKaWUUirvNFRS6g50zz33sHPnTgDS09OpUKECnTp1okuXLjz//PMMGjSIGTNmMGbMGN56661stZ6Wr+WWq32R0tLSsu+JdPEibNliBkgZIdKvv5qv+fiYex117mwuYbtwgdjXXsvcLHvwYNCASCmllFJKKaVuKA2VlLrDrV27lmrVqlG5cmX2799PvXr1AGjbti0PP/xwjlDJ3fK1fBHB+vHHLPj0U5JCQ7EcPw67dsHly+brFSrA/fdDnz7mbaNGULSos9wCxFWpokvYlFJKKaWUUuom0lBJqTvc/PnziY6OBiA8PJxNmzbRqlUrFi1axKFDh3Kc72752qRJk3jrrbc4c+YMdevW5dFHH2X69OmuB01ONjuPtmyBr7/G+tVXRCcnYwOWA3F16mB56SUzQGrSxAyVrkKXsCmllFJKKaXUzaWhklIFkLs9kSIjI3nhhRfw8/OjcOHCTJ48mSZNmrh9n9TUVKxWK6NGjQJgxowZPP300yxbtgyLxYKfn5/LOlfL1wYMGEDdunVzhjoisH+/M0Di669h926w283X772XhLAwbHv3Ao49kVq0wDJ6dN6+FKWUUkoppZRSN5WGSkoVQO72RHr++efp0aMHr776KqtWrWLIkCGsX7/e7ft88cUXNGzYkNDQUABq1arFmDFjiIyMZP/+/axcuTLXc7JarcTGxpL0119YypTJDJC2bIHTp82TihUzu486dYJmzcwupBIliLJaiY2OztwTKQ9Xk1NKKaWUUkop5R1eCZUMw/gX0BsQ4AegJ1AOmA+UAnYAT4tIqjfmp1RBknVPJMMwOH/+PADnzp2jfPnyHmvj4uKcS98Ajh8/DoDdbuftt98mJibm6hM4cgTr++8TPXEitrQ0EpYvJw5znyNq184MkJo2hXvvNTfZvoLFYiEuLk73RFJKKaWUUkqpAuSmh0qGYVQABgC1ReSCYRgLgb8DjwLjRWS+YRgfA72AKTd7fkoVNFn3RJowYQKRkZHExsZit9vZvHmz27rz58/z3//+l6lTpzqfi4uLY+zYsQQGBtK5c2d69uyZvUgEfv8dvvoq8/j1VxIwl63huE147DEss2dD8UPbaPkAACAASURBVOK5/hy6J5JSSimllFJKFSw5WwZujsJAoGEYhYEiwFGgNbDY8fosoKOX5qbUDbdv3z7q16/vPEJCQpgwYQIAS5cupVatWoSHhzNkyBCP75OxJ9KTTz4JwJQpU3jhhRc4dOgQ48ePp1evXm5rg4KCOHXqFMWKFXM+N3DgQObMmcP+/fsZPXo0BsCePfDxx/CPf0DFilCtGvTsCfHxUKcOvP8+UWPHUqRIEQBz+Vrv3nkKlJRSSimllFJKFTw3vVNJRP40DGMscBC4ACRgLnc7KyJpjtMOA1e/3JNSBZS7PZESExPZtGkTu3btwt/f37kczZ0r90SaNWsWy5cvB+DJJ5+kd+/eeZqXddky5k6cSFK1alhOn4b//Q9OnjRfLFcOWraEFi3MI8tSNgsQV6OGLl9TSimllFJKqTuIISI3d0DDKAEsAboBZ4FFmB1Kb4hIdcc5FYEvRCTCRX0foA9AaGhoo/nz59+sqXtFSkoKRYsW9Uq9N8fOb31Bmvu2bduYNWsWH374IW+88QZ/+9vfeOihh3JVO2LECO677z4eeeQRAHr06EFMTAzNmjVjx44dTJ06lWnTprl/A7udor/9RvFvv2XLunX8c98+bJjtgzNLluShJk04V7cuZ+vW5WL58mAY1+1z32r1Onedu459c+p17jr3gjR2fut17jr3gjR2fut17gVv7PzW69yvvb4gaNWq1Q4RaXzVE0Xkph7Ak8CnWR4/g7l30kmgsOO5ZsDqq71Xo0aN5HaXmJjotXpvjp3f+oI09549e8oHH3wgIiL16tWTZ555Rpo0aSItWrSQrVu3uq1LSUmRkiVLytmzZ53Pbdy4UWrUqCF169aVJk2ayPbt27MX2e0ie/aIfPihSOfOIiVLipg7JUm/YsUEc/N8AaRfv355+swiBet7v5XGzm+9zt079Xfq2Pmt17l7p17n7p16nbt36nXu3qnXuRe8sfNbr3O/vQHbJRcZjzeu/nYQaGoYRhHM5W9tgO1AItAF8wpwPYB4L8xNqZsqY0+kUaNGAZCWlkZycjJbtmxh27ZtdO3ald9++w3DRYdQxp5IWT300ENMmzYtc7NrEfj1V0hMhHXrzNu//jJfq1QJLBZo3RpatSLq22+JjY7GZrOZ+yJFRd3Ij66UUkoppZRSqoDzxp5K3xiGsRj4FkgDvgOmASuB+YZhvO147tObPTel8mLfvn1069bN+fi3335jxIgRDBo0CIBx48YxePBgTpw4QenSpV2+x5V7IoWFhdG8eXMMw6BJkyb4+Phw8uRJypQpk6s5Wa1WZk+eTNLy5VjOnjVDpIMHzRfvussZING6Ndx9d7blbJawMOLi4nRfJKWUUkoppZRSueKNTiVEZDgw/IqnfwOaeGE6Sl0Td5ttAxw/fpyEhAQqVark8T3i4uKIjo52Pu7YsSObN28GYP/+/aSmproNpJwuXIANG7BOnkz0ihXYRPhi9WrigoOxREXBq6+aIdI991x1TySLxUJISEhmp5NSSimllFJKKeWGV0IlpW43a9eupVq1alSuXBmAjz76iEmTJvH444+7rTl//jz//e9/mTp1qvO55557jiVLlvD/7d17nFV1ufjxz1cQZUDEBEEFRRT1mIqCRpYlZiBpjdjRzCiVIsuORaV1TLvYRaMLntSsoyWoXUDjlzJHU2dC8XIsjZuKF9ITlJiKlYA6Kox8f3+sBQyw195rzTCzZ4bP+/Xar9m3Z32fvfcze6397LW+++CDD6ZHjx5cf/31Wx76FiM89RTccQfcfjvMnQuvv059t240phPvNwL1Z5xB7Y9/vLUfqiRJkiRJgE0laauYOXPmhj2OZs+eTb9+/Rg+fHjZmFJzIvXo0YOLLrpoyz2FXn01OZTt9tuTZtJf/pJcv//+8KlPwbhxjH35ZaafdZZzIkmSJEmS2oVNJamVmk+23djYyKWXXsrXvva1Fi2rrq6O6dOns3rVKmqHDdvYRLr3XlizBnr1Sg5lO+88GDcOhg7dEFsLzNhhB+dEkiRJkiS1C5tKUis1n2z70UcfZenSpUyaNIkdd9yR5cuXM2LECB566CEGDhxYdjl1N93E6WecQeMbb1A/ezYzYqQW4K1vhc99LmkiHX007LBD5jKcE0mSJEmS1F5sKmmblfXrbc8++yw33ngjffv2Zd9992X69On07ds3cznNJ9s+5JBDWLFiBXPnzmX06NEMGTKEefPmZU+2vWwZ3HYb3Hor9fX1NK5bB0BjjNSPHk3tDTfA4MFb7TFLkiRJkrS1bFftBKRqWf/rbYsWLWL+/PnU1NRw8sknM2bMGKZPn84jjzzC/vvvz3e/+93MZayfbPuDH/xgvkGbmuD+++GCC+Dgg2GffeDcc+Evf2HsBz5ATboXUk1NDWO/8AUbSpIkSZKkDss9lSQ2/fW2vffem7lz5wLw9re/nVmzZmXGlZpsu7lly5bBv/4FM2bArbcmcyS99BJ07w7HHAOTJsGJJ8KwYcmcSOmcSs6JJEmSJEnq6GwqSWz6623NTZs2bZND5PKomz2bX1x5Jauvu47av/wFHngA3nwT+veH2lp4//thzBjYeectYp0TSZIkSZLUWdhU0jav+a+3NXfJJZfQvXt3JkyYkG9BS5ZQ9/Wvc/pvfkNjjPwOmLHPPtR+5StJI+nII2E7jziVJEmSJHUNNpXUaWVNtH3GGWdw/vnns2rVKoYMGcJNN93ELrvskrmc5r/ett4dd9zBPffcw5w5cwghZCexYgXMnAm//CX86U/UA43pTY1A/QknUPvtb7fugUqSJEmS1AG524Q6rayJtqdMmcKIESN46qmnOO6445gyZUrZ5TT/9TZIGkozZ86krq6OmpqaLQMaG5NG0oknwh57wOTJsHYtTJ3K2OnTN8TU1NQwduzYrfqYJUmSJEnqKGwqqUtoPtH27NmzOf744wE488wzueWWWzLjSv1627nnnktjYyNjxozhsMMO49Of/nQyJ9KcOXDWWTBgAJx+Ojz6KHzpS8nfhQvhi1+k9qyzmDFjBuPHj2fGjBlOti1JkiRJ6rI8/E1dQvOJtl944QV23XVXAAYOHMgLL7yQGVfq19uefvpp5s6dm0yW/eij8ItfwN57w7PPQp8+8KEPwUc/mvx6W4k5kpxsW5IkSZK0LbCppE4va6JtgBBC+TmRSqi77jp+88MfsvqVV6j961+he3d43/vgssvgAx+Anj23VuqSJEmSJHVaNpXU6W0+0faAAQM27H303HPPsdtuu1VeSGMj3HwzdT/8IacvWkQj8NvttmPG2WdT+53vQP/+bfgIJEmSJEnqfJxTSZ3e5hNt19bWcueddwJw/fXXc9JJJ5UOXLcO7rkHPvEJGDgQPvpR6p9+euOvt61bR/3229tQkiRJkiSpBPdUUtWtXLmSSZMmsXjxYkIITJs2jR/96EfMnz+f3r17s3LlSvr27cuiRYu2iF0/0fbVV1+94boLLriAMWPGMGzYMPbee29uuummTYOefhpuuCGZK2nZMujdG049Fc44g7ErVzJ9wgQaGxv99TZJkiRJksqwqaSqmzx5MuPGjWPWrFmsWbOGxsZGbrzxxg2TZZ933nnsvPPOJWNLTbS96667ctlll206UfbKlXDjjUkz6YEHIAQYMwa+8x04+WSoqQGglmTPp+nTpzNx4kR/vU2SJEmSpAw2lVRVq1at4t577+W6664DoEePHvTo0WPD7TFGbrrpJu66667cy6yrq2P69Omsfuklanv0gOuvh7o6eOMNOOgg+N73YMIE2HPPkvH+epskSZIkSZXZVFJVLV26lP79+zNx4kQefvhhRo4cyeWXX06vXr0AuO+++xgwYADDhg3Ltby6ujpOP+00Gl9/nfpbbmEGULvrrnD22XDmmTBiRLKXkiRJkiRJahUn6lZVNTU1sWDBAs455xwWLlxIr169mDJlyobbN5+EO1OM0NBA/ec+R+PrrwPQCNSfcAL8/e9wxRUwcqQNJUmSJEmSthKbSqqqQYMGMWjQIEaNGgXAKaecwoIFCwB48803+e1vf8tpp52WvYDXXoOf/QwOPhjGjmXsypXUbL89QDLR9qc+Bc0Op5MkSZIkSVuHTSVV1cCBAxk8eDBLliwBYM6cORx00EEAzJ8/nwMPPJBBgwZtGfjss3DRRTB4cHJoWzp3Uu0LLzBj1izGjx/PjBkznGhbkiRJkqQ24pxKqrorr7ySCRMmsGbNGoYOHcr06dMBuOuuu7Y89O1Pf4If/QhuugnefBPGj4fPfx7e9a4Nh7Y50bYkSZIkSW3PppJabeXKlUyaNInFixcTQmDatGkcddRRAEydOpXzzz+fF198kX79+pWMP+yww5g3b94W119wwQVJY6ipCW6+OWkmPfAA7LQTfPazcO65MHRoWz40SZIkSZKUwaaSWm3y5MmMGzeOWbNmsWbNGhobGwFYsWIF9fX17LXXXoWXWVdXx/XXXMPq666j9q674JlnkgbS5ZfDWWdBnz5b+VFIkiRJkqQibCqpVVatWsW9997LddddB0CPHj3okU6MfdVVV3HFFVdw0kknFVpm3bXXcvqnP01jUxN3ADMOOYTaH/8YTjwRunXbyo9AkiRJkiS1hBN1q1WWLl1K//79mThxIocffjiTJk3i1VdfZfbs2fTr14/hw4fnX9gzz8A551D/yU/S2NQEQCNQ/+53Q22tDSVJkiRJkjoQm0pqlaamJhYsWMA555zDwoUL6dWrFxdffDGXXnopEydOzLeQZ56Bz3wG9t0Xrr2WsccfT03PngDU1NQwduzYNnwEkiRJkiSpJWwqqVUGDRrEoEGDGDVqFACnnHIKCxYsYOnSpUyaNIkhQ4awfPlyRowYwfPPP79p8PLl8B//AfvtBz//OXz84/DUU9TefjszZs5k/PjxzJgxg9ra2io8MkmSJEmSVI5zKqlVBg4cyODBg1myZAkHHHAAc+bMYcSIEcyZM4e5c+cyevRohgwZwrx58zb++tuzz8J3vws/+xmsW5c0ky68EPbee8Nya2tr6dOnT/Lrb5IkSZIkqcOxqaRWu/LKK5kwYQJr1qxh6NChTJ8+vfQdn30WpkyBa65JmkkTJybNpCFD2jVfSZIkSZLUejaVxMqVK5k0aRKLFy8mhMC0adNYvnw5X/rSl/jb3/7GQw89xBFHHJEZf9hhhzFv3rzM25c98AB861tJM+nNN+Gss+Cii2wmSZIkSZLUidlUEpMnT2bcuHHMmjWLNWvW0NjYSN++ffnWt77FtGnTWrTMuro6bvjJT1g9dSq1DQ3Q1LSxmbTPPlv3AUiSJEmSpHZnU2kbt2rVKu69916uu+46AHr06EGPHj3o27cvL7zwQouWWXfDDZz+iU/Q2NTE7cCM976X2quvhqFDt17ikiRJkiSpqvz1t23c0qVL6d+/PxMnTuTwww9n0qRJvPrqqy1b2MqV8NWvUp82lAAagfoDDrChJEmSJElSF2NTaRvX1NTEggULOOecc1i4cCG9evViypQpxRby6qvJr7ntsw9ccglj3/52anbcEYCamhrGjh3bBplLkiRJkqRqsqm0jRs0aBCDBg1i1KhRAJxyyiksWLAgX/Abb8AVVyR7IV14IRx9NCxcSO199zHjxhsZP348M2bMoLa2tg0fgSRJkiRJqobccyqFEHoCe8UYl7RhPmpnAwcOZPDgwSxZsoQDDjiAOXPmcNBBB5UPamqC66+Hb34TnnkGRo+GW26Bo47acJfa2lr69OnD6NGj2zR/SZIkSZJUHbn2VAohfABYBNyRXj4shFDXlomp/Vx55ZVMmDCBQw89lEWLFnHhhRdy8803c+qpp/KHP/yBE088keOPPx7WrYOZM+Ggg2DSJNh9d2hogLvu2qShJEmSJEmSur68eypdDLwNmAsQY1wUQvB34buIww47jHnz5m1y3cknn8wuu+yS7GkUI9x6Kxx+ODzyCBx8MMyeDR/4AIRQnaQlSZIkSVJV5W0qrY0xrgqbNhBiG+SjDqKuro7p06ez+v77qb31VnjwQdhvP/j1r+G002A7p+OSJEmSJGlblrep9FgI4SNAtxDCMOBzwAMtHTSE0Bf4OXAwSXPq48AS4EZgCLAM+FCM8aWWjqGWq6ur4/QPf5jG116j/pZbmLHrrtT+7Gdw5pmw/fbVTk+SJEmSJHUAeXc3+SzwVuANYAawGvh8K8a9HLgjxnggMBx4ArgAmBNjHAbMSS+rCurr6mh87TUAGoH6U09N5lCyoSRJkiRJklK5mkoxxsYY40UxxiNjjEek519vyYAhhJ2BdwPXpsteE2NcCZwEXJ/e7XpgfEuWr1Z6/XXG/u//UpNerKmpYez73lfVlCRJkiRJUseT6/C3EML/sOUcSquAecDVBRtM+wAvAtNDCMOB+cBkYECM8bn0Ps8DAwosU1vDunXw0Y9S++STzPjyl5n+5z8zceJEamtrq52ZJEmSJEnqYEKMlefbDiFcDvQnOfQN4DSSQ+Ai0CfG+LHcA4ZwBPBH4J0xxgfTZa8GPhtj7Nvsfi/FGHcpEX82cDbAgAEDRs6cOTPv0J3SK6+8Qu/evdslft+rrmLwrFk8/ZnPsPzUU9t17K0db+7m3pnGbm28uW97ufu8mXtnGru18eZu7p1p7NbGm/u2l7vP27aZe2dw7LHHzo8xHlHxjjHGiifgT1nXAY/lWUazuIHAsmaX3wXcRjJR9+7pdbsDSyota+TIkbGru/vuu9sn/rLLYoQYP//59h+7DeLNvTrx5l6deHOvTvy2OnZr4829OvHmXp14c69OvLlXJ97cO9/YrY03964NmBdz9HjyTtTdO4Sw1/oL6fn1bbk1OZexvon1PPBMCOGA9KrjgMeBOuDM9LozgdlFlqtWuOkm+OIX4ZRTYOrUamcjSZIkSZI6gVxzKgHnAfeHEP4PCCTzIn0mhNCLjZNrF/FZ4FchhB7AX4CJJJOG3xRC+ATwV+BDLViuirr3XvjYx+Doo+EXv4Dt8vYZJUmSJEnStixXUynG+LsQwjDgwPSqJXHj5Nw/KjpojHERUOrYvOOKLkuJIUOGsNNOO9GtWze6d+/OvHnzNtw2depUzj//fF588UX69eu3Mejxx+Gkk2DoUJg9G3bcsQqZS5IkSZKkzijvnkoAw4ADgB2B4SEEYow3tE1aaom7775706YRsGLFCurr69lrr702vfPf/w7ve1/SSLr9dnjLW9oxU0mSJEmS1NnlOtYphPAN4Mr0dCzwfcDfme8ErrrqKr7//e8TQth45erVcMIJ8K9/we9+B0OGVC0/SZIkSZLUOeWdQOcUkkPTno8xTgSGAzu3WVYqLITA2LFjGTlyJNdccw0As2fPpl+/fgwfPnzjHdeuTSbkXrwYZs2Cww+vUsaSJEmSJKkzy3v422sxxnUhhKYQQh9gBTC4DfNSQffffz977rknK1asYMyYMRx44IFceumlfO1rX9t4pxjhk5+EhgaYPh2OP756CUuSJEmSpE4tb1NpXgihL/AzYD7wCvCHNstKhe25554A7Lbbbpx88sncc889LF26lEmTJrHjjjuyfPlyRuy3Hw+tXs3Ab30LzjqruglLkiRJkqROLdfhbzHGz8QYV8YY/xsYA5yZHganDuDVV1/l5Zdf3nC+vr6eI488khUrVjBz5kyWLVvGoL59WbB6NQMnTYKvfrXKGUuSJEmSpM4u155KIYQ5McbjAGKMyza/TtX1wgsvcPLJJwPQ1NTERz7yEcaNG7fxDrfdBv/8J7z3vfDTn0LzSbslSZIkSZJaoGxTKYSwI1AD9Ash7AKs70b0AfZs49yU09ChQ3n44YdL3rbTk0/CeeexbORIuPlm6J73iEdJkiRJkqRslToMnwI+D+xBMpfS+qbSauDHbZiXtoK6q6/mjsmTebZvX2pvuw169652SpIkSZIkqYsoO6dSjPHyGOM+wPkxxqExxn3S0/AYo02lDqzu5ps5/Zxz+OmaNZy+ejV1Dz5Y7ZQkSZIkSVIXkutYqBjjlSGEdwBDmsfEGG9oo7zUSvW/+hWNMQLQ+Prr1NfXU1tbW+WsJEmSJElSV5F3ou5fAPsCi4A306sjYFOpgxrbowfTgUagpqaGsWPHVjslSZIkSZLUheSdtfkI4KAY011f1OHVPvMMM/bdl+mHHMLEiRPdS0mSJEmSJG1VeZtKi4GBwHNtmIu2lpdfhj/+kdrzz6fP8cczevToamckSZIkSZK6mLxNpX7A4yGEh4A31l8ZY3T3l45o7lxoaoIxY6qdiSRJkiRJ6qLyNpUubssktJU1NEDPnvDOd8If/lDtbCRJkiRJUheU99ff7gkh7A0MizH+PoRQA3Rr29TUYg0N8O53ww47VDsTSZIkSZLURW2X504hhE8Cs4Cr06v2BG5pq6TUCsuXw5NPeuibJEmSJElqU7maSsB/AO8EVgPEGJ8CdmurpNQKDQ3J37Fjq5uHJEmSJEnq0vI2ld6IMa5ZfyGE0B2IbZOSWqWhAQYOhIMPrnYmkiRJkiSpC8vbVLonhHAh0DOEMAb4DfA/bZeWWmTdOvj97+G974UQqp2NJEmSJEnqwvI2lS4AXgQeBT4F/A74alslpRZ65BF48UXnU5IkSZIkSW0u16+/AT2BaTHGnwGEELql1zW2VWJqgfXzKb33vdXNQ5IkSZIkdXl591SaQ9JEWq8n8Putn45apaEB3vpW2GOPamciSZIkSZK6uLxNpR1jjK+sv5Cer2mblNQir78O993noW+SJEmSJKld5G0qvRpCGLH+QghhJPBa26SkFrn//qSxZFNJkiRJkiS1g7xzKk0GfhNC+DsQgIHAaW2WlYpraIDtt4djjql2JpIkSZIkaRtQsakUQtgO6AEcCByQXr0kxri2LRNTQQ0N8I53QK9e1c5EkiRJkiRtAyoe/hZjXAdcFWNcG2NcnJ5sKHUkK1bAwoUe+iZJkiRJktpN7l9/CyH8ewghtGk2apk5c5K/NpUkSZIkSVI7ydtU+hTwG2BNCGF1COHlEMLqNsxLRTQ0wC67wMiR1c5EkiRJkiRtI3JN1B1j3KmtE1ELxZg0ld7zHujWrdrZSJIkSZKkbUSuPZVC4qMhhK+llweHEN7WtqkplyVLYPlyD32TJEmSJEntKu/hbz8BjgI+kl5+BbiqTTJSMQ0NyV+bSpIkSZIkqR3lOvwNGBVjHBFCWAgQY3wphNCjDfNSXg0NMHRocpIkSZIkSWonefdUWhtC6AZEgBBCf2Bdm2WlfNauhblz3UtJkiRJkiS1u7xNpSuAm4HdQgiXAPcDl7ZZVsrnwQfh5ZdtKkmSJEmSpHaX99fffhVCmA8cBwRgfIzxiTbNTJU1NMB22yW//CZJkiRJktSOyjaVQgg7Ap8G9gMeBa6OMTa1R2LKoaEBjjgCdtml2plIkiRJkqRtTKXD364HjiBpKL0P+GGbZ6R8Vq2Chx7y0DdJkiRJklQVlQ5/OyjGeAhACOFa4KG2T0m53H03vPmmTSVJkiRJklQVlfZUWrv+jIe9dTANDdCrFxx1VLUzkSRJkiRJ26BKeyoNDyGsTs8HoGd6OQAxxtinTbNTtoYGOOYY6NGj2plIkiRJkqRtUNk9lWKM3WKMfdLTTjHG7s3Ot6qhFELoFkJYGEK4Nb28TwjhwRDC0yGEG0MIdkuy/PWv8NRTHvomSZIkSZKqptLhb21pMvBEs8vfA/4rxrgf8BLwiapk1Rk0NCR/bSpJkiRJkqQqqUpTKYQwCDgR+Hl6OQDvAWald7keGF+N3DqFhgbYYw846KBqZyJJkiRJkrZR1dpT6UfAl4F16eVdgZXNJgNfDuxZjcQ6vHXrYM6cZC+lEKqdjSRJkiRJ2kaFGGP7DhjC+4ETYoyfCSGMBs4HzgL+mB76RghhMHB7jPHgEvFnA2cDDBgwYOTMmTPbK/WqeOWVV+jdu/eGy72XLOGIT3+axy+8kBU5Dn/bPL41Y3emeHM39840dmvjzX3by93nzdw709itjTd3c+9MY7c23ty3vdx93rbN3DuDY489dn6M8YiKd4wxtusJ+C7JnkjLgOeBRuBXwD+A7ul9jgLurLSskSNHxq7u7rvv3vSK7343Rojx+edbFt+asTtRvLlXJ97cqxNv7tWJ31bHbm28uVcn3tyrE2/u1Yk39+rEm3vnG7u18ebetQHzYo4eT7sf/hZj/EqMcVCMcQjwYeCuGOME4G7glPRuZwKz2zu3TqGhAQ49FAYMqHYmkiRJkiRpG1bNX3/b3H8CXwwhPE0yx9K1Vc6n42lshPvv91ffJEmSJElS1XWv5uAxxrnA3PT8X4C3VTOfDu/ee2HNGptKkiRJkiSp6jrSnkqqpKEBevSAd72r2plIkiRJkqRtnE2lzqShAY4+Gmpqqp2JJEmSJEnaxtlU6iyefx4efdRD3yRJkiRJUodgU6mz+P3vk782lSRJkiRJUgdgU6mzaGiAXXeFww+vdiaSJEmSJEk2lTqC119/nbe97W0MHz6ct771rXzjG98A4Mc//jETJkwghMA/7rwTjjsOtvMlkyRJkiRJ1WeHogPYYYcduOuuu3j44YdZtGgRd9xxB3/84x955zvfydSpU9l7jz3ghRc89E2SJEmSJHUY3audgCCEQO/evQFYu3Yta9euJYTA4YcfzqpVq+C115I72lSSJEmSJEkdhHsqdRBvvvkmhx12GLvtthtjxoxh1KhRG298/XUYOhT23rt6CUqSJEmSJDVjU6mD6NatG4sWLWL58uU89NBDLF68GICwdm3SVDrmmCpnKEmSJEmStJFNpQ6mb9++HHvssdxxxx0A9HnsMYgRRo+ubmKSJEmSJEnN2FTqAF588UVWrlwJwGuvvUZDQwMHHnggAG+ZPz+509FHVys9SZIkSZKkLdhU6gCee+45jj32WA499FCOPPJIxowZw/vf/36uuOIK3jFzJsuBQ48+mkmTWy2M7AAAF59JREFUJlU7VUmSJEmSJMBff+sQDj30UBYuXLjF9Z/72Mf47Be+QPj61+Gb36xCZpIkSZIkSaW5p1IHVve97/HZdeuo69mz2qlIkiRJkiRtwqZSB1VXV8fpU6dyFXD6t79NXV1dtVOSJEmSJEnawMPfOoghF9y2yeV/NlxDY1MTAI2NjUz45jXs+kC3Dbcvm3Jiu+YnSZIkSZLUnHsqdVA9h4wgdN8BgNB9B3oOGVHljCRJkiRJkjZyT6UOqmbYKPrVfpkDX57PkzuNpGbYqGqnJEmSJEmStIFNpQ6sZtgoTj1kJFMf9WWSJEmSJEkdi4e/SZIkSZIkqTCbSpIkSZIkSSrMppIkSZIkSZIKs6kkSZIkSZKkwmwqSZIkSZIkqTCbSpIkSZIkSSrMppIkSZIkSZIKs6kkSZIkSZKkwmwqSZIkSZIkqTCbSpIkSZIkSSrMppIkSZIkSZIKs6kkSZIkSZKkwmwqSZIkSZIkqTCbSpIkSZIkSSrMppIkSZIkSZIKs6kkSZIkSZKkwmwqSZIkSZIkqTCbSpIkSZIkSSrMppIkSZIkSZIKs6kkSZIkSZKkwrpXOwG13pALbsu87bxDmjirxO3LppzYlilJkiRJkqQuzj2VJEmSJEmSVFi7N5VCCINDCHeHEB4PITwWQpicXv+WEEJDCOGp9O8u7Z2bJEmSJEmS8qnGnkpNwHkxxoOAtwP/EUI4CLgAmBNjHAbMSS9LkiRJkiSpA2r3plKM8bkY44L0/MvAE8CewEnA9endrgfGt3dukiRJkiRJyqeqcyqFEIYAhwMPAgNijM+lNz0PDKhSWpIkSZIkSaogxBirM3AIvYF7gEtijL8NIayMMfZtdvtLMcYt5lUKIZwNnA0wYMCAkTNnzmy3nNvSo8+uKnn9gJ7wwmtbXn/InjtXjM0bn+WVV16hd+/eFe/XEePN3dw709itjTf3bS93nzdz70xjtzbe3M29M43d2nhz3/Zy93nbNnPvDI499tj5McYjKt4xxtjuJ2B74E7gi82uWwLsnp7fHVhSaTkjR46MXcXe/3lrydMVv7yl5PV5YvPGZ7n77rtb9ZiqGW/u1Yk39+rEm3t14rfVsVsbb+7ViTf36sSbe3Xizb068ebe+cZubby5d23AvJijv1ONX38LwLXAEzHGy5rdVAecmZ4/E5jd3rlJkiRJkiQpn+5VGPOdwMeAR0MIi9LrLgSmADeFED4B/BX4UBVykyRJkiRJUg7t3lSKMd4PhIybj2vPXCRJkiRJktQyVf31N0mSJEmSJHVONpUkSZIkSZJUmE0lSZIkSZIkFWZTSZIkSZIkSYXZVJIkSZIkSVJhNpUkSZIkSZJUmE0lSZIkSZIkFWZTSZIkSZIkSYXZVJIkSZIkSVJhNpUkSZIkSZJUmE0lSZIkSZIkFWZTSZIkSZIkSYXZVJIkSZIkSVJhNpUkSZIkSZJUmE0lSZIkSZIkFWZTSZIkSZIkSYXZVJIkSZIkSVJhNpUkSZIkSZJUmE0lSZIkSZIkFWZTSZIkSZIkSYXZVJIkSZIkSVJhNpUkSZIkSZJUmE0lSZIkSZIkFWZTSZIkSZIkSYXZVJIkSZIkSVJhNpUkSZIkSZJUmE0lSZIkSZIkFWZTSZIkSZIkSYXZVJIkSZIkSVJh3audgKpvyAW3lbz+vEOaOCvjtmVTTmzLlCRJkiRJUgfnnkqSJEmSJEkqzKaSJEmSJEmSCrOpJEmSJEmSpMJsKkmSJEmSJKkwm0qSJEmSJEkqzKaSJEmSJEmSCrOpJEmSJEmSpMJsKkmSJEmSJKkwm0qSJEmSJEkqrHu1E1DnN+SC20pef94hTZxV4rZlU06sGJs3XpIkSZIkVYdNJXVqRRtaYFNKkiRJkqStwaaStmnuZSVJkiRJUsvYVJKqpLV7WbWmISZJkiRJUmt1qKZSCGEccDnQDfh5jHFKlVOSuqTW7mVVzYaYe4hJkiRJUsfQYZpKIYRuwFXAGGA58KcQQl2M8fHqZiapK+nMDTH3TpMkSZLUkXSYphLwNuDpGONfAEIIM4GTAJtKkrQVdOaGmLnniy01viRJktRWOlJTaU/gmWaXlwOjqpSLJEmdXmduiHWmRmJr4829ZWO3Nt7cWzZ2a+O7Su6SpESIMVY7BwBCCKcA42KMk9LLHwNGxRjP3ex+ZwNnpxcPAJa0a6Ltrx/wjyrFV3Ps1sabe3Xizb068eZenfhtdezWxpt7deLNvTrx5l6deHOvTry5d76xWxtv7l3b3jHG/hXvFWPsECfgKODOZpe/Anyl2nlV+wTMq1Z8Ncc2d3M3984xtrlve2Obu7mbe+cY29zN3dwd29w75thd7bQdHcefgGEhhH1CCD2ADwN1Vc5JkiRJkiRJJXSYOZVijE0hhHOBO4FuwLQY42NVTkuSJEmSJEkldJimEkCM8XfA76qdRwdzTRXjqzl2a+PNvTrx5l6deHOvTvy2OnZr4829OvHmXp14c69OvLlXJ97cO9/YrY03d3WcibolSZIkSZLUeXSkOZUkSZIkSZLUSdhU6qBCCNNCCCtCCItbEDs4hHB3COHxEMJjIYTJBeN3DCE8FEJ4OI3/Zgty6BZCWBhCuLUFsctCCI+GEBaFEOa1IL5vCGFWCOHJEMITIYSjCsQekI67/rQ6hPD5AvFfSJ+zxSGEGSGEHQvmPjmNfSzPuKXqJITwlhBCQwjhqfTvLgViT03HXhdCOKIFY/8gfd4fCSHcHELoWzD+22nsohBCfQhhj7yxzW47L4QQQwj9Co59cQjh2Wav/QlF4tPrP5s+/sdCCN8vMPaNzcZdFkJYVDD3w0IIf1z/PxNCeFvB+OEhhD+k/3f/E0LokxFb8r2lQM1lxVesuzKxuWquTHzemiv7vlqu7sqMnavmyo2ds+ayxq9Yd2Vic9Vcmfi8NVdyfRSSH/V4MITwdPo4ehSIPTeNq/Q+kRX/qxDCkpC8V08LIWxfMP7a9LpHQrKu6p03ttntV4QQXmlB7teFEJY2e90PKxAbQgiXhBD+HJJ16+cKjn1fs3H/HkK4pWD8cSGEBWn8/SGE/QrEvieNXRxCuD6EUHb6h7DZNkyeeisTm6veysTnqrcy8RXrLSu22fVl663M2BXrrUJ8rprLiM1Vb2XiK9ZbhfjcNRdKbPeG/OvVUrFFtuVKxRfZlisVn3e9mrm9H/Jty5Uau8i2XMnxQ771aqmxi2zLlYovsi1XKj7venWLz0l5661MfK6ay4gtUm+l4nPVW1Z8s9vK1lzG2Lnrrcur9s/PeSp9At4NjAAWtyB2d2BEen4n4M/AQQXiA9A7Pb898CDw9oI5fBH4NXBrC/JfBvRrxXN3PTApPd8D6NvC5XQDngf2znn/PYGlQM/08k3AWQXGOxhYDNSQzHf2e2C/onUCfB+4ID1/AfC9ArH/BhwAzAWOaMHYY4Hu6fnvZY1dJr5Ps/OfA/47b2x6/WCSyf7/Wq6GMsa+GDg/52tVKv7Y9DXbIb28W5Hcm90+Ffh6wbHrgfel508A5haM/xNwTHr+48C3M2JLvrcUqLms+Ip1VyY2V82Vic9bc5nvq5XqrszYuWquTHzemqu4TsiquzJj56q5MvF5a67k+ojk/fXD6fX/DZxTIPZwYAgV1jVl4k9IbwvAjFJjV4hvXnOXkf7v5IlNLx8B/AJ4pQW5XwecUqHesmInAjcA21Wot4rbEMD/A84oOP6fgX9Lr/8McF3O2HcAzwD7p9d/C/hEhedgk22YPPVWJjZXvZWJz1VvZeIr1ltWbN56KzN2xXqrEJ+r5rJyz1NvZcauWG9Z8SRf2ueuuVK1Qf71aqnYIttypeKLbMuVis+7Xi35P0H+bblSY19M/m25UvF516slc292e6VtuVJjF9mWKxWfd726xeekvPVWJj5XzWXEFqm3UvG56i0rPm/NZYydu966+sk9lTqoGOO9wL9aGPtcjHFBev5l4AmShkfe+BhjXP+N1PbpKeaNDyEMAk4Efp476a0khLAzyYfmawFijGtijCtbuLjjgP+LMf61QEx3oGdIvpGqAf5eIPbfgAdjjI0xxibgHuCD5QIy6uQkkjc+0r/j88bGGJ+IMS7Jk2xGfH2aO8AfgUEF41c3u9iLjLor8//xX8CXs+JyxOeSEX8OMCXG+EZ6nxVFxw4hBOBDJB8ciowdgfXfSO1MmbrLiN8fuDc93wD8e0Zs1ntL3porGZ+n7srE5qq5MvF5a67c+2rZutsK78lZ8Xlrruz45equTGyumisTn7fmstZH7wFmpdeXrLms2BjjwhjjslLj5Yz/XXpbBB4iu+ay4lfDhue9JyXqJis2hNAN+AFJvRXOvdJjrhB7DvCtGOO69H5Z9VZ27PTb8/cAJfccKRNfseYyYt8E1sQY/5xen1lvaX6bbMOkr1PFeisVm+aUq97KxOeqtzLxFestKzZvvWXFF5ERn6vmyo1dqd7KxOder5aI35UCNZch13q1lDzr1ArxubflMuJzrVfLyLUt10ZyrVfLybMtlyF3zWWouF4t8zkpV71lxeepuTKxueqtTHyueqvwGbFszW3lz5ddkk2lLi6EMITkW7IHC8Z1S3fbXAE0xBiLxP+I5B9zXZExm4lAfQhhfgjh7IKx+wAvAtNDshvyz0MIvVqYx4cpsEKIMT4L/BD4G/AcsCrGWF9gvMXAu0IIu4YQaki+pRhcIH69ATHG59LzzwMDWrCMreHjwO1Fg0Kyq/szwATg6wXiTgKejTE+XHTMZs5Nd6GdVm7X3wz7k7x+D4YQ7gkhHNmC8d8FvBBjfKpg3OeBH6TP2w+BrxSMf4xkgwLgVHLU3WbvLYVrrqXvTRVic9Xc5vFFa655fNG6K5F7oZrbLL5wzWU8d7nqbrPYwjW3WXzumtt8fQT8H7Cy2UbocjKadK1cl5WND8lhSB8D7igaH0KYTvK/ciBwZYHYc4G6Zv9vLcn9krTm/iuEsEOB2H2B00JyWMbtIYRhLRgbkg8rczb7IJAnfhLwuxDCcpLnfUqeWJJGTPew8ZCMUyj/Hrf5Nsyu5Ky3ErFFZcbnqbes+Dz1lhGbu97K5F6x3srE5625cs97xXrLiM9Vbxnx/6BYzZXa7s27Xm3NNnOe+Err1ZLxOderW8QWXKdm5Z53vVoqPu96tdzzlmedWiq+yHq1VHye9WrW56S89daaz1l5YsvVW2Z8znorGZ+z5srl3prPDl1H7AC7S3kqfSLZXbrw4W/N4nsD84EPtmIZfYG7gYNz3v/9wE/S86Np2eFve6Z/dwMeBt5dIPYIoAkYlV6+nIzdPysspwfJRsGAAjG7AHcB/Um+Hb0F+GjBcT+Rvmb3Aj8FflS0Tkg2fpvf/lLRGiPHLtMV4i8CbobkFyaLxqe3fQX4Zp5Ykr3CHgR2Ti8vo8JhBiWetwEkhzxuB1wCTCsYv5hkYz0AbyM5FLLk4y/zvP0UOK8Fr/kVwL+n5z8E/L5g/IEku13PB74B/LNC/CbvLUVqrlR8kborE5u35jLfFyvV3ObxReuuxPNWtOY2j89dcxWeu4p1V2LsojW3eXyhmktj1q+Pjgaebnb94FL/TxmxBze7ruzrlSP+Z+R4jy4T3w34CTAxZ+y7gfvZeJhAxcORNh+b5HDEAOxA8m105uEZJWJfWV8naf3f18LHffv62imY+2/ZuG7/EvDzArFHAfeRNJi+AyzKiNliGwbol6feSsVudnvZessRX7becsRn1lvG494jb71ljZ233srEV6y5HI+7bL2VGTtXvZWJz1Vz6X232O4l53q1VGyz2+ZSeZ1aLr7ierVcfHp95no143EXWaeWis+9Xs2Iz7VerfC85Vmnlho793o1I77iepWMz0kF6q3s56xyNZcjtmy9VYrPUW+l4n+Qp+bKPG+FtuO68qnqCXgq8+K0oqlE0tS4E/jiVsjj6+Q/Pvm7JN/iLSPpdDcCv2zF2BfnHTu9/0BgWbPL7wJua8G4JwH1BWNOBa5tdvkM0g2NFj72S4HPFK0TYAmwe3p+d2BJ0Rort1KoFA+cBfwBqGlJfLPb9ipX/2zaVDqE5FvpZempiWSPsYEtHLvi/16J5/0O4Nhml/8P6F/geesOvAAMasFrvop0JUyyIbS6Fc/7/sBDZWK3eG8pWHOZ702V6i4rNm/NlRs7Z81tEl+k7nKMXbbmMp73IjWX9dxVrLuMsXPXXI7HXrbmNrvv10k+4P2DjR92jwLuzBl7frPLyygwf1/zeJIN9ltI53opGt/suneT48uXNPYbJOvV9fW2jmbNjhaMPbrA2OcDTwL7NHvNV7XgeesH/BPYseDz9iWSw9Gb/68+3sLHPRa4KeP+pbZhfpWn3jJif9ns9rL1Vi4+T71VGr9cvWXEvpS33nKOnVlvWfF5aq7C81ax3jLib8tbbzkfe2bNlVjexST/b7nXq5vHNrs8lxzbcqXiKbAtlzV+s+eu4meZNPZrFNyWqzD2kDxjb/a8516vZjxvubflSoxdaFuuwmMvuV4l43NS3nrLis9Tc+Vi89RbpbEr1VtG/Jw8NZdz7Nz11hVPHv7WBYUQAskxn0/EGC9rQXz/kM68H0LoCYwhWbFXFGP8SoxxUIxxCMnhY3fFGD9aYOxeIYSd1p8nWRHn/gW8GOPzwDMhhAPSq44DHs8b38zpFD8W+m/A20MINelrcBzJ/CG5hRB2S//uRfKt3K8L5gBQB5yZnj8TmN2CZbRICGEcyS7gtTHGxhbEN9+1/STy192jMcbdYoxD0tpbTjI58PMFxt692cWTKVB3qVtIJngkhLA/G/d2y+u9wJMxxuUFx4XkuPtj0vPvAQodPtes7rYDvkoyEW2p+2W9t+Squda8N2XF5q25MvG5aq5UfN66KzN2rpor87zlqrkKz3vZuisTm6vmyjz2vDVXan30BMkeKKekdytZc61Zl5WLDyFMAo4HTo/pXC8F4peE9Fek0uemtlROGbHzY4wDm9VbY4yx5C9Slcl992Zjj6dEzZV53jbUG8lr/+fNYyvEQ/Ka3RpjfL30s1b2Nd85rXOaXZf3ca+vtx2A/ySj3jK2YSaQo95au/2TFZ+33krFAx/LU28ZY++St97K5F6x3srFk6PmKjzvFest43k7iRz1VuGx56q5Mtu9Fderrd1mzoovsF7Niq+4Xs2I/VPebbkyY+ddr2Y9dxXXqxWe94rbcmXi865Xsx57xfVqmc9JubbjWvM5Kys2b72Vic+1HZcRvyBPzZUZu7WfHbqOana0PGWfSBoazwFrSQq87C+VbBZ7NMmxto8Ai9LTCQXiDwUWpvGLqbB7fJnljKbg4W/AUJLdOB8mOTb4ohaMexgwL83/FmCXgvG9SL7Z2rkFY3+T5M1sMcmvpexQMP4+kjfnh4HjWlInJPM/zCFZGf0eeEuB2JPT82+QfNOS+e1/RvzTJL94sr7uyv0CQ6n4/5c+d48A/0O6e2/R/w8qfyNcauxfAI+mY9eRfmNTIL4HyTeri4EFwHuK5E7yKzmfbuFrfjTJ7s4Pk+zGO7Jg/GSSjfU/k8wbkbXrccn3lgI1lxVfse7KxOaquTLxeWuu4vtqVt2VGTtXzZWJz1tzmblXqrsyY+equTLxeWuu5PqIZF3xUPr6/4YS77VlYj+X1lsTyUZ81mEtWfFNJN9er388WYf0bBFPsov8/6av+2KSvWD65B17s/uUOxwpK/e7mo39S9JfSssZ25fkG+1HSb5RHl5k7PS2ucC4Cu9xWeOfnI79cLqcoQVif0DSFFgCfL7c+M2WNZqNhzJVrLcysbnqrUx8rnorFZ+33rLGzltvZXKvWG8V4nPVXFbueeqtzNgV661CfK6aI2O7lxzr1TKxubblysTnXa9mxVdcr2bFbnafZWT/ElfW2HnXq1nxFder5XInx7ZcmbHzrlez4vOuV7f4nJSn3irE5625UrFFPjuUis+1HZcVX6DmSo2d+7NDVz+t38VOkiRJkiRJys3D3yRJkiRJklSYTSVJkiRJkiQVZlNJkiRJkiRJhdlUkiRJkiRJUmE2lSRJkiRJklSYTSVJkiRJkiQVZlNJkiRJkiRJhdlUkiRJkiRJUmH/H8GgfaEKBa7NAAAAAElFTkSuQmCC\n", "text/plain": [ - "
" + "
" ] }, "metadata": { @@ -2851,10 +2856,11 @@ } ], "source": [ + "pca = pipeline.named_steps['pca']\n", "m = len(pca.explained_variance_ratio_)\n", - "plt.figure(figsize=(15,5))\n", + "plt.figure(figsize=(20,5))\n", "plt.bar(x=range(m), height=pca.explained_variance_ratio_ * 100)\n", - "plt.xticks(range(m), ['PC'+str(i) for i in range(1,m+1)])\n", + "plt.xticks(range(m), [str(i) for i in range(1,m+1)])\n", "plt.title(\"Explained variance\")\n", "plt.ylabel(\"Percentage\")\n", "\n", @@ -2871,13 +2877,6 @@ "plt.grid()\n", "plt.show()" ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { diff --git a/models.ipynb b/models.ipynb index f26a577..27db8e7 100644 --- a/models.ipynb +++ b/models.ipynb @@ -112,7 +112,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 7, "metadata": {}, "outputs": [ { @@ -121,7 +121,7 @@ "(1451, 83)" ] }, - "execution_count": 6, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" } From 2db5d1172cc075ad263887cbe16a277d3b536ee7 Mon Sep 17 00:00:00 2001 From: MLobo1997 Date: Wed, 17 Apr 2019 23:13:12 +0100 Subject: [PATCH 10/24] optimizing...... --- OurPipeline.py | 17 +- analysis.ipynb | 3 +- models.ipynb | 545 +++++++++++++++++++++--------------------- utils/LastInfected.py | 2 +- utils/LastWeeks.py | 2 +- 5 files changed, 282 insertions(+), 287 deletions(-) diff --git a/OurPipeline.py b/OurPipeline.py index 08454be..35531c0 100644 --- a/OurPipeline.py +++ b/OurPipeline.py @@ -6,16 +6,15 @@ from utils.LastWeeks import LastWeeks from utils.LastInfected import LastInfected -def create_pipeline(attr, n_weeks, pca_n_components=None, n_non_train=4): - pipelist = [ +def create_pipeline(attr, n_weeks, n_weeks_infected, estimator_optimizer=None, pca=None, n_non_train=4): + + return Pipeline([ ('imputer', ContinuityImputer(attributes=attr[n_non_train:])), - ('lw', LastWeeks(attributes=attr[n_non_train:], weeks=n_weeks)), - ('lf', LastInfected(weeks=n_weeks)), + ('l_weeks', LastWeeks(attributes=attr[n_non_train:], weeks=n_weeks)), + ('l_infected', LastInfected(weeks=n_weeks_infected)), ('dataframe_dropper', DataFrameDropper(attribute_names=attr[:n_non_train])), ('scaler', StandardScaler()), + ('pca', pca), + ('est_opt', estimator_optimizer), ] - - if pca_n_components is not None: - pipelist.append(('pca', PCA(n_components=pca_n_components))) - - return Pipeline(pipelist) \ No newline at end of file +) \ No newline at end of file diff --git a/analysis.ipynb b/analysis.ipynb index 76e407c..cea2bd7 100644 --- a/analysis.ipynb +++ b/analysis.ipynb @@ -2829,9 +2829,10 @@ "source": [ "%autoreload\n", "from OurPipeline import create_pipeline\n", + "from sklearn.decomposition import PCA\n", "\n", "attr=list(train_data)[:-1]\n", - "pipeline = create_pipeline(attr, n_weeks=3, pca_n_components=0.999)\n", + "pipeline = create_pipeline(attr, n_weeks=3, pca=PCA(0.999))\n", "\n", "X_train = pipeline.fit_transform(train_data.iloc[:,:-1].copy(), train_data.iloc[:,-1].copy())\n", "X_train.shape" diff --git a/models.ipynb b/models.ipynb index 27db8e7..cde0b58 100644 --- a/models.ipynb +++ b/models.ipynb @@ -103,39 +103,6 @@ "X_train_1.shape" ] }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Data Pipeline" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(1451, 83)" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "%autoreload\n", - "from OurPipeline import create_pipeline\n", - "\n", - "pipeline = create_pipeline(attr, n_weeks=3, pca_n_components=None)\n", - "\n", - "X_train = pipeline.fit_transform(X_train_1, y_train)\n", - "X_train.shape" - ] - }, { "cell_type": "markdown", "metadata": {}, @@ -145,7 +112,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 77, "metadata": {}, "outputs": [], "source": [ @@ -158,8 +125,9 @@ "from scipy.stats import uniform as sp_uniform\n", "score_metric='neg_mean_absolute_error'\n", "jobs=-1 #-1 to make it execute in parallel\n", - "verbose_level = 1\n", - "random_n = 42" + "verbose_level = 0\n", + "random_n = 42\n", + "base_args = {'estimator': None, 'param_distributions': None, 'n_iter': None, 'scoring': score_metric, 'n_jobs': jobs, 'cv': None, 'verbose': verbose_level, 'random_state': random_n, 'return_train_score': True, 'iid': True}" ] }, { @@ -172,73 +140,15 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 78, "metadata": {}, "outputs": [], "source": [ - "k_folds=4\n", + "k_folds=10\n", "n_iter_search = 20\n", "C = sp_randint(0, 10000)\n", - "params = {'kernel':['rbf', 'linear'], 'gamma':['scale'], 'C': C}" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Fitting 4 folds for each of 20 candidates, totalling 80 fits\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.\n", - "[Parallel(n_jobs=-1)]: Done 42 tasks | elapsed: 24.6min\n", - "[Parallel(n_jobs=-1)]: Done 80 out of 80 | elapsed: 52.6min finished\n" - ] - }, - { - "data": { - "text/plain": [ - "-19.17685248872835" - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "SVR_optimizer = RandomizedSearchCV(estimator=SVR(), param_distributions=params, n_iter=n_iter_search, scoring=score_metric, n_jobs=jobs, cv=k_folds, verbose=verbose_level, random_state=random_n, return_train_score=True, iid=True)\n", - "SVR_optimizer.fit(X_train, y_train)\n", - "SVR_optimizer.best_score_" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "SVR(C=769, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma='scale',\n", - " kernel='linear', max_iter=-1, shrinking=True, tol=0.001, verbose=False)" - ] - }, - "execution_count": 15, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "SVR_optimizer.best_estimator_" + "params = {'kernel':['linear'], 'gamma':['scale'], 'C': C}\n", + "SVR_optimizer = RandomizedSearchCV(estimator=SVR(), param_distributions=params, n_iter=n_iter_search, scoring=score_metric, n_jobs=jobs, cv=k_folds, verbose=verbose_level, random_state=random_n, return_train_score=True, iid=True)" ] }, { @@ -260,80 +170,15 @@ }, { "cell_type": "code", - "execution_count": 70, + "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "k_folds=10\n", "n_iter_search = 100\n", "min_samples = sp_uniform(0.01, 0.35)\n", - "params = {'criterion':['mae'], 'max_depth': sp_randint(2, 10), 'min_samples_split': min_samples, 'min_samples_leaf': min_samples}" - ] - }, - { - "cell_type": "code", - "execution_count": 71, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Fitting 10 folds for each of 100 candidates, totalling 1000 fits\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.\n", - "[Parallel(n_jobs=-1)]: Done 42 tasks | elapsed: 8.0s\n", - "[Parallel(n_jobs=-1)]: Done 192 tasks | elapsed: 32.2s\n", - "[Parallel(n_jobs=-1)]: Done 442 tasks | elapsed: 1.3min\n", - "[Parallel(n_jobs=-1)]: Done 792 tasks | elapsed: 2.3min\n", - "[Parallel(n_jobs=-1)]: Done 1000 out of 1000 | elapsed: 2.9min finished\n" - ] - }, - { - "data": { - "text/plain": [ - "-9.041006202618883" - ] - }, - "execution_count": 71, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "Tree_optimizer = RandomizedSearchCV(estimator=DecisionTreeRegressor(), param_distributions=params, n_iter=n_iter_search, scoring=score_metric, n_jobs=jobs, cv=k_folds, verbose=verbose_level, random_state=random_n, return_train_score=True, iid=True)\n", - "Tree_optimizer.fit(X_train, y_train)\n", - "Tree_optimizer.best_score_" - ] - }, - { - "cell_type": "code", - "execution_count": 72, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "DecisionTreeRegressor(criterion='mae', max_depth=5, max_features=None,\n", - " max_leaf_nodes=None, min_impurity_decrease=0.0,\n", - " min_impurity_split=None, min_samples_leaf=0.02801756293749627,\n", - " min_samples_split=0.107526262482814,\n", - " min_weight_fraction_leaf=0.0, presort=False, random_state=None,\n", - " splitter='best')" - ] - }, - "execution_count": 72, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "Tree_optimizer.best_estimator_" + "params = {'criterion':['mae'], 'max_depth': sp_randint(2, 10), 'min_samples_split': min_samples, 'min_samples_leaf': min_samples}\n", + "Tree_optimizer = RandomizedSearchCV(estimator=DecisionTreeRegressor(), param_distributions=params, n_iter=n_iter_search, scoring=score_metric, n_jobs=jobs, cv=k_folds, verbose=verbose_level, random_state=random_n, return_train_score=True, iid=True)" ] }, { @@ -350,161 +195,265 @@ }, { "cell_type": "code", - "execution_count": 73, + "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "k_folds=10\n", "n_iter_search = 40\n", - "min_samples = sp_uniform(0.01, 0.35)\n", - "params = {'n_estimators': sp_randint(2,30), 'criterion':['mae'], 'max_depth': sp_randint(2, 10), 'min_samples_split': min_samples, 'min_samples_leaf': min_samples}" + "params = {'n_estimators': sp_randint(2,50), 'criterion':['mae'], 'max_depth': sp_randint(2, 10)}\n", + "Forest_optimizer = RandomizedSearchCV(estimator=RandomForestRegressor(n_jobs=-1), param_distributions=params, n_iter=n_iter_search, scoring=score_metric, n_jobs=jobs, cv=k_folds, verbose=verbose_level, random_state=random_n, return_train_score=True, iid=True)" ] }, { - "cell_type": "code", - "execution_count": 74, + "cell_type": "markdown", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Fitting 10 folds for each of 40 candidates, totalling 400 fits\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.\n", - "[Parallel(n_jobs=-1)]: Done 42 tasks | elapsed: 24.7s\n", - "[Parallel(n_jobs=-1)]: Done 192 tasks | elapsed: 2.4min\n", - "[Parallel(n_jobs=-1)]: Done 400 out of 400 | elapsed: 4.9min finished\n" - ] - }, - { - "data": { - "text/plain": [ - "-9.22168619342982" - ] - }, - "execution_count": 74, - "metadata": {}, - "output_type": "execute_result" - } - ], "source": [ - "Forest_optimizer = RandomizedSearchCV(estimator=RandomForestRegressor(n_jobs=-1), param_distributions=params, n_iter=n_iter_search, scoring=score_metric, n_jobs=jobs, cv=k_folds, verbose=verbose_level, random_state=random_n, return_train_score=True, iid=True)\n", - "Forest_optimizer.fit(X_train, y_train)\n", - "Forest_optimizer.best_score_" + "## Adaboost of Trees\n", + "* 10.78 - With 3 last weeks a 3 last infected \n", + "* **8.49** - With 3 last weeks a 3 last infected and only max_depth tuned." ] }, { "cell_type": "code", - "execution_count": 75, + "execution_count": 8, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "RandomForestRegressor(bootstrap=True, criterion='mae', max_depth=9,\n", - " max_features='auto', max_leaf_nodes=None,\n", - " min_impurity_decrease=0.0, min_impurity_split=None,\n", - " min_samples_leaf=0.014927937950279559,\n", - " min_samples_split=0.0795948414310818,\n", - " min_weight_fraction_leaf=0.0, n_estimators=9, n_jobs=-1,\n", - " oob_score=False, random_state=None, verbose=0, warm_start=False)" - ] - }, - "execution_count": 75, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ - "Forest_optimizer.best_estimator_" + "k_folds=10\n", + "n_iter_search = 20\n", + "params = {'base_estimator__criterion':['mae'], 'base_estimator__max_depth': sp_randint(2,7), 'base_estimator__n_estimators': sp_randint(40, 100)}\n", + "AdaTree_optimizer = RandomizedSearchCV(estimator=AdaBoostRegressor(base_estimator=DecisionTreeRegressor()), param_distributions=params, n_iter=n_iter_search, scoring=score_metric, n_jobs=jobs, cv=k_folds, verbose=verbose_level, random_state=random_n, return_train_score=True, iid=True)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "## Adaboost of Trees\n", - "* 10.78 - With 3 last weeks a 3 last infected \n", - "* **8.49** - With 3 last weeks a 3 last infected and only max_depth tuned." + "## KNN\n", + "* 21.349 - with PCA at 0.65 & 2 previous weeks\n", + "* 20.36 - without PCA" ] }, { "cell_type": "code", - "execution_count": 97, + "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "k_folds=10\n", - "n_iter_search = 10\n", - "params = {'base_estimator__criterion':['mae'], 'base_estimator__max_depth': sp_randint(2,7)}" + "n_iter_search = 100\n", + "params = {'n_neighbors': sp_randint(3,150), 'weights': ['uniform', 'distance']}\n", + "KNN_optimizer = RandomizedSearchCV(estimator=KNeighborsRegressor(n_jobs=-1), param_distributions=params, n_iter=n_iter_search, scoring=score_metric, n_jobs=jobs, cv=k_folds, verbose=verbose_level, random_state=random_n, return_train_score=True, iid=True)" ] }, { - "cell_type": "code", - "execution_count": null, + "cell_type": "markdown", "metadata": {}, - "outputs": [], "source": [ - "AdaTree_optimizer = RandomizedSearchCV(estimator=AdaBoostRegressor(base_estimator=DecisionTreeRegressor()), param_distributions=params, n_iter=n_iter_search, scoring=score_metric, n_jobs=jobs, cv=k_folds, verbose=verbose_level, random_state=random_n, return_train_score=True, iid=True)\n", - "AdaTree_optimizer.fit(X_train, y_train)\n", - "AdaTree_optimizer.best_score_" + "# Optimization" ] }, { "cell_type": "code", - "execution_count": 92, + "execution_count": 76, "metadata": {}, "outputs": [ { - "data": { - "text/plain": [ - "AdaBoostRegressor(base_estimator=DecisionTreeRegressor(criterion='mae', max_depth=4, max_features=None,\n", - " max_leaf_nodes=None, min_impurity_decrease=0.0,\n", - " min_impurity_split=None, min_samples_leaf=1,\n", - " min_samples_split=2, min_weight_fraction_leaf=0.0,\n", - " presort=False, random_state=None, splitter='best'),\n", - " learning_rate=1.0, loss='linear', n_estimators=50,\n", - " random_state=None)" - ] - }, - "execution_count": 92, - "metadata": {}, - "output_type": "execute_result" + "name": "stdout", + "output_type": "stream", + "text": [ + "Fitting 10 folds for each of 100 candidates, totalling 1000 fits\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.\n", + "[Parallel(n_jobs=-1)]: Done 42 tasks | elapsed: 3.4s\n", + "[Parallel(n_jobs=-1)]: Done 239 tasks | elapsed: 11.8s\n", + "[Parallel(n_jobs=-1)]: Done 739 tasks | elapsed: 36.9s\n", + "[Parallel(n_jobs=-1)]: Done 993 out of 1000 | elapsed: 50.0s remaining: 0.4s\n", + "[Parallel(n_jobs=-1)]: Done 1000 out of 1000 | elapsed: 50.1s finished\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Best score of 12.7829083390765 with the estimator [DecisionTreeRegressor(criterion='mae', max_depth=7, max_features=None,\n", + " max_leaf_nodes=None, min_impurity_decrease=0.0,\n", + " min_impurity_split=None, min_samples_leaf=0.048518173584686866,\n", + " min_samples_split=0.08977730688967958,\n", + " min_weight_fraction_leaf=0.0, presort=False, random_state=None,\n", + " splitter='best'), 1, 3, PCA(copy=True, iterated_power='auto', n_components=0.95, random_state=None,\n", + " svd_solver='auto', tol=0.0, whiten=False), -12.7829083390765]\n", + "1/8\t[[DecisionTreeRegressor(criterion='mae', max_depth=7, max_features=None,\n", + " max_leaf_nodes=None, min_impurity_decrease=0.0,\n", + " min_impurity_split=None, min_samples_leaf=0.048518173584686866,\n", + " min_samples_split=0.08977730688967958,\n", + " min_weight_fraction_leaf=0.0, presort=False, random_state=None,\n", + " splitter='best'), 1, 3, PCA(copy=True, iterated_power='auto', n_components=0.95, random_state=None,\n", + " svd_solver='auto', tol=0.0, whiten=False), -12.7829083390765]]\n", + "Fitting 10 folds for each of 100 candidates, totalling 1000 fits\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.\n", + "[Parallel(n_jobs=-1)]: Done 42 tasks | elapsed: 4.1s\n", + "[Parallel(n_jobs=-1)]: Done 192 tasks | elapsed: 16.8s\n", + "[Parallel(n_jobs=-1)]: Done 442 tasks | elapsed: 41.2s\n", + "[Parallel(n_jobs=-1)]: Done 792 tasks | elapsed: 1.3min\n", + "[Parallel(n_jobs=-1)]: Done 1000 out of 1000 | elapsed: 1.6min finished\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Best score of 8.982081323225362 with the estimator [DecisionTreeRegressor(criterion='mae', max_depth=5, max_features=None,\n", + " max_leaf_nodes=None, min_impurity_decrease=0.0,\n", + " min_impurity_split=None, min_samples_leaf=0.02801756293749627,\n", + " min_samples_split=0.107526262482814,\n", + " min_weight_fraction_leaf=0.0, presort=False, random_state=None,\n", + " splitter='best'), 1, 3, None, -8.982081323225362]\n", + "2/8\t[[DecisionTreeRegressor(criterion='mae', max_depth=7, max_features=None,\n", + " max_leaf_nodes=None, min_impurity_decrease=0.0,\n", + " min_impurity_split=None, min_samples_leaf=0.048518173584686866,\n", + " min_samples_split=0.08977730688967958,\n", + " min_weight_fraction_leaf=0.0, presort=False, random_state=None,\n", + " splitter='best'), 1, 3, PCA(copy=True, iterated_power='auto', n_components=0.95, random_state=None,\n", + " svd_solver='auto', tol=0.0, whiten=False), -12.7829083390765], [DecisionTreeRegressor(criterion='mae', max_depth=5, max_features=None,\n", + " max_leaf_nodes=None, min_impurity_decrease=0.0,\n", + " min_impurity_split=None, min_samples_leaf=0.02801756293749627,\n", + " min_samples_split=0.107526262482814,\n", + " min_weight_fraction_leaf=0.0, presort=False, random_state=None,\n", + " splitter='best'), 1, 3, None, -8.982081323225362]]\n", + "Fitting 10 folds for each of 100 candidates, totalling 1000 fits\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.\n", + "[Parallel(n_jobs=-1)]: Done 120 tasks | elapsed: 6.4s\n", + "[Parallel(n_jobs=-1)]: Done 420 tasks | elapsed: 18.9s\n", + "[Parallel(n_jobs=-1)]: Done 920 tasks | elapsed: 45.4s\n", + "[Parallel(n_jobs=-1)]: Done 1000 out of 1000 | elapsed: 49.9s finished\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "3/8\t[[DecisionTreeRegressor(criterion='mae', max_depth=7, max_features=None,\n", + " max_leaf_nodes=None, min_impurity_decrease=0.0,\n", + " min_impurity_split=None, min_samples_leaf=0.048518173584686866,\n", + " min_samples_split=0.08977730688967958,\n", + " min_weight_fraction_leaf=0.0, presort=False, random_state=None,\n", + " splitter='best'), 1, 3, PCA(copy=True, iterated_power='auto', n_components=0.95, random_state=None,\n", + " svd_solver='auto', tol=0.0, whiten=False), -12.7829083390765], [DecisionTreeRegressor(criterion='mae', max_depth=5, max_features=None,\n", + " max_leaf_nodes=None, min_impurity_decrease=0.0,\n", + " min_impurity_split=None, min_samples_leaf=0.02801756293749627,\n", + " min_samples_split=0.107526262482814,\n", + " min_weight_fraction_leaf=0.0, presort=False, random_state=None,\n", + " splitter='best'), 1, 3, None, -8.982081323225362], [DecisionTreeRegressor(criterion='mae', max_depth=7, max_features=None,\n", + " max_leaf_nodes=None, min_impurity_decrease=0.0,\n", + " min_impurity_split=None, min_samples_leaf=0.048518173584686866,\n", + " min_samples_split=0.08977730688967958,\n", + " min_weight_fraction_leaf=0.0, presort=False, random_state=None,\n", + " splitter='best'), 1, 4, PCA(copy=True, iterated_power='auto', n_components=0.95, random_state=None,\n", + " svd_solver='auto', tol=0.0, whiten=False), -13.062715368711233]]\n", + "Fitting 10 folds for each of 100 candidates, totalling 1000 fits\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.\n", + "[Parallel(n_jobs=-1)]: Done 42 tasks | elapsed: 4.1s\n" + ] + }, + { + "ename": "KeyboardInterrupt", + "evalue": "", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 19\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mp\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mpca\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 20\u001b[0m \u001b[0mpipeline\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcreate_pipeline\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mattr\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mn_weeks\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mw\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mn_weeks_infected\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mwi\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mestimator_optimizer\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mopt\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mpca\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mp\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 21\u001b[0;31m \u001b[0mpipeline\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX_train_1\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_train\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 22\u001b[0m \u001b[0mscore\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpipeline\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnamed_steps\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'est_opt'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbest_score_\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 23\u001b[0m \u001b[0mbest_estimator\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpipeline\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnamed_steps\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'est_opt'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbest_estimator_\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/Development/UMinho-AA2/.env/lib/python3.7/site-packages/sklearn/pipeline.py\u001b[0m in \u001b[0;36mfit\u001b[0;34m(self, X, y, **fit_params)\u001b[0m\n\u001b[1;32m 265\u001b[0m \u001b[0mXt\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfit_params\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_fit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mfit_params\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 266\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_final_estimator\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 267\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_final_estimator\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mXt\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mfit_params\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 268\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 269\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/Development/UMinho-AA2/.env/lib/python3.7/site-packages/sklearn/model_selection/_search.py\u001b[0m in \u001b[0;36mfit\u001b[0;34m(self, X, y, groups, **fit_params)\u001b[0m\n\u001b[1;32m 720\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mresults_container\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 721\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 722\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_run_search\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mevaluate_candidates\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 723\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 724\u001b[0m \u001b[0mresults\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mresults_container\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/Development/UMinho-AA2/.env/lib/python3.7/site-packages/sklearn/model_selection/_search.py\u001b[0m in \u001b[0;36m_run_search\u001b[0;34m(self, evaluate_candidates)\u001b[0m\n\u001b[1;32m 1513\u001b[0m evaluate_candidates(ParameterSampler(\n\u001b[1;32m 1514\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mparam_distributions\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mn_iter\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1515\u001b[0;31m random_state=self.random_state))\n\u001b[0m", + "\u001b[0;32m~/Development/UMinho-AA2/.env/lib/python3.7/site-packages/sklearn/model_selection/_search.py\u001b[0m in \u001b[0;36mevaluate_candidates\u001b[0;34m(candidate_params)\u001b[0m\n\u001b[1;32m 709\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mparameters\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mtrain\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtest\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 710\u001b[0m in product(candidate_params,\n\u001b[0;32m--> 711\u001b[0;31m cv.split(X, y, groups)))\n\u001b[0m\u001b[1;32m 712\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 713\u001b[0m \u001b[0mall_candidate_params\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mextend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcandidate_params\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/Development/UMinho-AA2/.env/lib/python3.7/site-packages/sklearn/externals/joblib/parallel.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, iterable)\u001b[0m\n\u001b[1;32m 928\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 929\u001b[0m \u001b[0;32mwith\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_backend\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mretrieval_context\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 930\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mretrieve\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 931\u001b[0m \u001b[0;31m# Make sure that we get a last message telling us we are done\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 932\u001b[0m \u001b[0melapsed_time\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtime\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtime\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m-\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_start_time\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/Development/UMinho-AA2/.env/lib/python3.7/site-packages/sklearn/externals/joblib/parallel.py\u001b[0m in \u001b[0;36mretrieve\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 831\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 832\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mgetattr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_backend\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'supports_timeout'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;32mFalse\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 833\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_output\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mextend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mjob\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtimeout\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtimeout\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 834\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 835\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_output\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mextend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mjob\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/Development/UMinho-AA2/.env/lib/python3.7/site-packages/sklearn/externals/joblib/_parallel_backends.py\u001b[0m in \u001b[0;36mwrap_future_result\u001b[0;34m(future, timeout)\u001b[0m\n\u001b[1;32m 519\u001b[0m AsyncResults.get from multiprocessing.\"\"\"\n\u001b[1;32m 520\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 521\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mfuture\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mresult\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtimeout\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mtimeout\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 522\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mLokyTimeoutError\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 523\u001b[0m \u001b[0;32mraise\u001b[0m \u001b[0mTimeoutError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/Cellar/python/3.7.2_2/Frameworks/Python.framework/Versions/3.7/lib/python3.7/concurrent/futures/_base.py\u001b[0m in \u001b[0;36mresult\u001b[0;34m(self, timeout)\u001b[0m\n\u001b[1;32m 425\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__get_result\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 426\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 427\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_condition\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mwait\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtimeout\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 428\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 429\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_state\u001b[0m \u001b[0;32min\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0mCANCELLED\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mCANCELLED_AND_NOTIFIED\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/Cellar/python/3.7.2_2/Frameworks/Python.framework/Versions/3.7/lib/python3.7/threading.py\u001b[0m in \u001b[0;36mwait\u001b[0;34m(self, timeout)\u001b[0m\n\u001b[1;32m 294\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0;31m# restore state no matter what (e.g., KeyboardInterrupt)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 295\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mtimeout\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 296\u001b[0;31m \u001b[0mwaiter\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0macquire\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 297\u001b[0m \u001b[0mgotit\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mTrue\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 298\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mKeyboardInterrupt\u001b[0m: " + ] } ], "source": [ - "AdaTree_optimizer.best_estimator_" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## KNN\n", - "* -21.349 - with PCA at 0.65 & 2 previous weeks\n", - "* -20.36 - without PCA" + "%autoreload\n", + "from OurPipeline import create_pipeline\n", + "\n", + "#pipeline = create_pipeline(attr, estimator_optimizer=RandomizedSearchCV(None, None), n_weeks=0, n_weeks_infected=0)\n", + "optimizers=[Tree_optimizer]#, Forest_optimizer, AdaTree_optimizer, KNN_optimizer, SVR_optimizer]\n", + "weeks = [1,2]\n", + "weeks_infected = [3,4]\n", + "pca = [PCA(0.95), None]\n", + "\n", + "n_total = len(optimizers) * len(weeks) * len(weeks_infected) * len(pca)\n", + "\n", + "results=[]\n", + "best_attempt = None\n", + "best_score = np.inf\n", + "idx=0\n", + "for opt in optimizers:\n", + " for w in weeks:\n", + " for wi in weeks_infected:\n", + " for p in pca:\n", + " pipeline = create_pipeline(attr, n_weeks=w, n_weeks_infected=wi, estimator_optimizer=opt, pca=p)\n", + " pipeline.fit(X_train_1, y_train)\n", + " score = pipeline.named_steps['est_opt'].best_score_\n", + " best_estimator = pipeline.named_steps['est_opt'].best_estimator_\n", + " attempt = [best_estimator, w, wi, p, score]\n", + " if abs(score) < best_score:\n", + " best_score = abs(score)\n", + " best_estimator = attempt\n", + " print('\\nBest score of {} with the estimator {}'.format(best_score, best_estimator))\n", + " idx+=1\n", + " print(str(idx) + '/' + str(n_total), end='\\t')\n", + " results.append(attempt)\n", + " print(results)\n", + " " ] }, { "cell_type": "code", - "execution_count": 35, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ - "k_folds=10\n", - "n_iter_search = 100\n", - "params = {'n_neighbors': sp_randint(3,150), 'weights': ['uniform', 'distance']}" + "%autoreload\n", + "from OurPipeline import create_pipeline\n", + "\n", + "pipeline = create_pipeline(attr, n_weeks=2, n_weeks_infected=2, estimator_optimizer=Tree_optimizer, pca=None)" ] }, { "cell_type": "code", - "execution_count": 36, + "execution_count": 69, "metadata": {}, "outputs": [ { @@ -519,29 +468,75 @@ "output_type": "stream", "text": [ "[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.\n", - "[Parallel(n_jobs=-1)]: Done 42 tasks | elapsed: 4.5s\n", - "[Parallel(n_jobs=-1)]: Done 192 tasks | elapsed: 19.7s\n", - "[Parallel(n_jobs=-1)]: Done 442 tasks | elapsed: 45.0s\n", - "[Parallel(n_jobs=-1)]: Done 792 tasks | elapsed: 1.3min\n", - "[Parallel(n_jobs=-1)]: Done 1000 out of 1000 | elapsed: 1.6min finished\n" + "[Parallel(n_jobs=-1)]: Done 42 tasks | elapsed: 5.9s\n", + "[Parallel(n_jobs=-1)]: Done 192 tasks | elapsed: 23.9s\n", + "[Parallel(n_jobs=-1)]: Done 442 tasks | elapsed: 56.8s\n", + "[Parallel(n_jobs=-1)]: Done 792 tasks | elapsed: 1.8min\n", + "[Parallel(n_jobs=-1)]: Done 1000 out of 1000 | elapsed: 2.2min finished\n" ] }, { "data": { "text/plain": [ - "-20.359505759574677" + "Pipeline(memory=None,\n", + " steps=[('imputer', ContinuityImputer(attributes=['ndvi_ne', 'ndvi_nw', 'ndvi_se', 'ndvi_sw', 'precipitation_amt_mm', 'reanalysis_air_temp_k', 'reanalysis_avg_temp_k', 'reanalysis_dew_point_temp_k', 'reanalysis_max_air_temp_k', 'reanalysis_min_air_temp_k', 'reanalysis_precip_amt_kg_per_m2', 'reanalys...t=True,\n", + " return_train_score=True, scoring='neg_mean_absolute_error',\n", + " verbose=1))])" + ] + }, + "execution_count": 69, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pipeline.fit(X_train_1, y_train)" + ] + }, + { + "cell_type": "code", + "execution_count": 70, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "-9.066505858028945" ] }, - "execution_count": 36, + "execution_count": 70, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "KNN_optimizer = RandomizedSearchCV(estimator=KNeighborsRegressor(n_jobs=-1), param_distributions=params, n_iter=n_iter_search, scoring=score_metric, n_jobs=jobs, cv=k_folds, verbose=verbose_level, random_state=random_n, return_train_score=True, iid=True)\n", - "KNN_optimizer.fit(X_train, y_train)\n", - "KNN_optimizer.best_score_" + "pipeline.named_steps['est_opt'].best_score_" ] + }, + { + "cell_type": "code", + "execution_count": 72, + "metadata": {}, + "outputs": [ + { + "ename": "SyntaxError", + "evalue": "invalid syntax (, line 1)", + "output_type": "error", + "traceback": [ + "\u001b[0;36m File \u001b[0;32m\"\"\u001b[0;36m, line \u001b[0;32m1\u001b[0m\n\u001b[0;31m 2.copy()\u001b[0m\n\u001b[0m ^\u001b[0m\n\u001b[0;31mSyntaxError\u001b[0m\u001b[0;31m:\u001b[0m invalid syntax\n" + ] + } + ], + "source": [ + "2.copy()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { diff --git a/utils/LastInfected.py b/utils/LastInfected.py index 6c1c60c..1830dce 100644 --- a/utils/LastInfected.py +++ b/utils/LastInfected.py @@ -24,8 +24,8 @@ def transform(self, X, model=None): for idx, n_infected in enumerate(self.y): city = X.loc[idx, 'city'] r[idx] = self.last[city] - self.last[city].pop() self.last[city].appendleft(n_infected) + self.last[city].pop() r = pd.DataFrame(r, columns=[self.new_attributes_prefix + str(week) for week in range(self.weeks)]) diff --git a/utils/LastWeeks.py b/utils/LastWeeks.py index abe0379..0de33dd 100644 --- a/utils/LastWeeks.py +++ b/utils/LastWeeks.py @@ -27,8 +27,8 @@ def transform(self, X): for idx, week in X.iterrows(): city = week['city'] r[idx] = self.last[city] - self.last[city].pop() self.last[city].appendleft(week[self.attributes]) + self.last[city].pop() r = pd.DataFrame(r.reshape([X.shape[0], self.weeks * len(self.attributes)]), columns=[self.new_attributes_prefix + str(week) + '_' + str(attr) From 2c2127f58ae21f1ebb1b76e7b13b2d3a5cf3f475 Mon Sep 17 00:00:00 2001 From: MLobo1997 Date: Thu, 18 Apr 2019 08:59:55 +0100 Subject: [PATCH 11/24] optimizing after error --- models.ipynb | 470 ++++++++++++++++++++++++++++++++------------------- 1 file changed, 296 insertions(+), 174 deletions(-) diff --git a/models.ipynb b/models.ipynb index cde0b58..d8de1c1 100644 --- a/models.ipynb +++ b/models.ipynb @@ -112,7 +112,7 @@ }, { "cell_type": "code", - "execution_count": 77, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ @@ -140,7 +140,7 @@ }, { "cell_type": "code", - "execution_count": 78, + "execution_count": 5, "metadata": {}, "outputs": [], "source": [ @@ -216,13 +216,13 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 11, "metadata": {}, "outputs": [], "source": [ "k_folds=10\n", "n_iter_search = 20\n", - "params = {'base_estimator__criterion':['mae'], 'base_estimator__max_depth': sp_randint(2,7), 'base_estimator__n_estimators': sp_randint(40, 100)}\n", + "params = {'n_estimators': sp_randint(40, 100), 'base_estimator__criterion':['mae'], 'base_estimator__max_depth': sp_randint(2,7)}\n", "AdaTree_optimizer = RandomizedSearchCV(estimator=AdaBoostRegressor(base_estimator=DecisionTreeRegressor()), param_distributions=params, n_iter=n_iter_search, scoring=score_metric, n_jobs=jobs, cv=k_folds, verbose=verbose_level, random_state=random_n, return_train_score=True, iid=True)" ] }, @@ -237,7 +237,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 12, "metadata": {}, "outputs": [], "source": [ @@ -251,145 +251,94 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Optimization" + "# Optimization\n" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Best score of 12.752929014472777 with the estimator DecisionTreeRegressor(criterion='mae', max_depth=7, max_features=None,\n", + " max_leaf_nodes=None, min_impurity_decrease=0.0,\n", + " min_impurity_split=None, min_samples_leaf=0.048518173584686866,\n", + " min_samples_split=0.08977730688967958,\n", + " min_weight_fraction_leaf=0.0, presort=False, random_state=None,\n", + " splitter='best')\n", + "1/40\t\n", + "Best score of 8.983459682977257 with the estimator DecisionTreeRegressor(criterion='mae', max_depth=5, max_features=None,\n", + " max_leaf_nodes=None, min_impurity_decrease=0.0,\n", + " min_impurity_split=None, min_samples_leaf=0.02801756293749627,\n", + " min_samples_split=0.107526262482814,\n", + " min_weight_fraction_leaf=0.0, presort=False, random_state=None,\n", + " splitter='best')\n", + "2/40\t3/40\t\n", + "Best score of 8.971743625086148 with the estimator DecisionTreeRegressor(criterion='mae', max_depth=5, max_features=None,\n", + " max_leaf_nodes=None, min_impurity_decrease=0.0,\n", + " min_impurity_split=None, min_samples_leaf=0.02801756293749627,\n", + " min_samples_split=0.107526262482814,\n", + " min_weight_fraction_leaf=0.0, presort=False, random_state=None,\n", + " splitter='best')\n", + "4/40\t5/40\t6/40\t7/40\t8/40\t9/40\t\n", + "Best score of 7.174190213645762 with the estimator RandomForestRegressor(bootstrap=True, criterion='mae', max_depth=6,\n", + " max_features='auto', max_leaf_nodes=None,\n", + " min_impurity_decrease=0.0, min_impurity_split=None,\n", + " min_samples_leaf=1, min_samples_split=2,\n", + " min_weight_fraction_leaf=0.0, n_estimators=8, n_jobs=-1,\n", + " oob_score=False, random_state=None, verbose=0, warm_start=False)\n", + "10/40\t11/40\t12/40\t13/40\t14/40\t15/40\t16/40" ] }, { "cell_type": "code", - "execution_count": 76, + "execution_count": 10, "metadata": {}, "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Fitting 10 folds for each of 100 candidates, totalling 1000 fits\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.\n", - "[Parallel(n_jobs=-1)]: Done 42 tasks | elapsed: 3.4s\n", - "[Parallel(n_jobs=-1)]: Done 239 tasks | elapsed: 11.8s\n", - "[Parallel(n_jobs=-1)]: Done 739 tasks | elapsed: 36.9s\n", - "[Parallel(n_jobs=-1)]: Done 993 out of 1000 | elapsed: 50.0s remaining: 0.4s\n", - "[Parallel(n_jobs=-1)]: Done 1000 out of 1000 | elapsed: 50.1s finished\n" - ] - }, { "name": "stdout", "output_type": "stream", "text": [ "\n", - "Best score of 12.7829083390765 with the estimator [DecisionTreeRegressor(criterion='mae', max_depth=7, max_features=None,\n", - " max_leaf_nodes=None, min_impurity_decrease=0.0,\n", - " min_impurity_split=None, min_samples_leaf=0.048518173584686866,\n", - " min_samples_split=0.08977730688967958,\n", - " min_weight_fraction_leaf=0.0, presort=False, random_state=None,\n", - " splitter='best'), 1, 3, PCA(copy=True, iterated_power='auto', n_components=0.95, random_state=None,\n", - " svd_solver='auto', tol=0.0, whiten=False), -12.7829083390765]\n", - "1/8\t[[DecisionTreeRegressor(criterion='mae', max_depth=7, max_features=None,\n", + "Best score of 12.752929014472777 with the estimator DecisionTreeRegressor(criterion='mae', max_depth=7, max_features=None,\n", " max_leaf_nodes=None, min_impurity_decrease=0.0,\n", " min_impurity_split=None, min_samples_leaf=0.048518173584686866,\n", " min_samples_split=0.08977730688967958,\n", " min_weight_fraction_leaf=0.0, presort=False, random_state=None,\n", - " splitter='best'), 1, 3, PCA(copy=True, iterated_power='auto', n_components=0.95, random_state=None,\n", - " svd_solver='auto', tol=0.0, whiten=False), -12.7829083390765]]\n", - "Fitting 10 folds for each of 100 candidates, totalling 1000 fits\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.\n", - "[Parallel(n_jobs=-1)]: Done 42 tasks | elapsed: 4.1s\n", - "[Parallel(n_jobs=-1)]: Done 192 tasks | elapsed: 16.8s\n", - "[Parallel(n_jobs=-1)]: Done 442 tasks | elapsed: 41.2s\n", - "[Parallel(n_jobs=-1)]: Done 792 tasks | elapsed: 1.3min\n", - "[Parallel(n_jobs=-1)]: Done 1000 out of 1000 | elapsed: 1.6min finished\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "Best score of 8.982081323225362 with the estimator [DecisionTreeRegressor(criterion='mae', max_depth=5, max_features=None,\n", + " splitter='best')\n", + "1/40\t\n", + "Best score of 8.983459682977257 with the estimator DecisionTreeRegressor(criterion='mae', max_depth=5, max_features=None,\n", " max_leaf_nodes=None, min_impurity_decrease=0.0,\n", " min_impurity_split=None, min_samples_leaf=0.02801756293749627,\n", " min_samples_split=0.107526262482814,\n", " min_weight_fraction_leaf=0.0, presort=False, random_state=None,\n", - " splitter='best'), 1, 3, None, -8.982081323225362]\n", - "2/8\t[[DecisionTreeRegressor(criterion='mae', max_depth=7, max_features=None,\n", - " max_leaf_nodes=None, min_impurity_decrease=0.0,\n", - " min_impurity_split=None, min_samples_leaf=0.048518173584686866,\n", - " min_samples_split=0.08977730688967958,\n", - " min_weight_fraction_leaf=0.0, presort=False, random_state=None,\n", - " splitter='best'), 1, 3, PCA(copy=True, iterated_power='auto', n_components=0.95, random_state=None,\n", - " svd_solver='auto', tol=0.0, whiten=False), -12.7829083390765], [DecisionTreeRegressor(criterion='mae', max_depth=5, max_features=None,\n", + " splitter='best')\n", + "2/40\t3/40\t\n", + "Best score of 8.971743625086148 with the estimator DecisionTreeRegressor(criterion='mae', max_depth=5, max_features=None,\n", " max_leaf_nodes=None, min_impurity_decrease=0.0,\n", " min_impurity_split=None, min_samples_leaf=0.02801756293749627,\n", " min_samples_split=0.107526262482814,\n", " min_weight_fraction_leaf=0.0, presort=False, random_state=None,\n", - " splitter='best'), 1, 3, None, -8.982081323225362]]\n", - "Fitting 10 folds for each of 100 candidates, totalling 1000 fits\n" + " splitter='best')\n", + "4/40\t5/40\t6/40\t7/40\t8/40\t9/40\t\n", + "Best score of 7.174190213645762 with the estimator RandomForestRegressor(bootstrap=True, criterion='mae', max_depth=6,\n", + " max_features='auto', max_leaf_nodes=None,\n", + " min_impurity_decrease=0.0, min_impurity_split=None,\n", + " min_samples_leaf=1, min_samples_split=2,\n", + " min_weight_fraction_leaf=0.0, n_estimators=8, n_jobs=-1,\n", + " oob_score=False, random_state=None, verbose=0, warm_start=False)\n", + "10/40\t11/40\t12/40\t13/40\t14/40\t15/40\t16/40\t" ] }, { - "name": "stderr", - "output_type": "stream", - "text": [ - "[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.\n", - "[Parallel(n_jobs=-1)]: Done 120 tasks | elapsed: 6.4s\n", - "[Parallel(n_jobs=-1)]: Done 420 tasks | elapsed: 18.9s\n", - "[Parallel(n_jobs=-1)]: Done 920 tasks | elapsed: 45.4s\n", - "[Parallel(n_jobs=-1)]: Done 1000 out of 1000 | elapsed: 49.9s finished\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "3/8\t[[DecisionTreeRegressor(criterion='mae', max_depth=7, max_features=None,\n", - " max_leaf_nodes=None, min_impurity_decrease=0.0,\n", - " min_impurity_split=None, min_samples_leaf=0.048518173584686866,\n", - " min_samples_split=0.08977730688967958,\n", - " min_weight_fraction_leaf=0.0, presort=False, random_state=None,\n", - " splitter='best'), 1, 3, PCA(copy=True, iterated_power='auto', n_components=0.95, random_state=None,\n", - " svd_solver='auto', tol=0.0, whiten=False), -12.7829083390765], [DecisionTreeRegressor(criterion='mae', max_depth=5, max_features=None,\n", - " max_leaf_nodes=None, min_impurity_decrease=0.0,\n", - " min_impurity_split=None, min_samples_leaf=0.02801756293749627,\n", - " min_samples_split=0.107526262482814,\n", - " min_weight_fraction_leaf=0.0, presort=False, random_state=None,\n", - " splitter='best'), 1, 3, None, -8.982081323225362], [DecisionTreeRegressor(criterion='mae', max_depth=7, max_features=None,\n", - " max_leaf_nodes=None, min_impurity_decrease=0.0,\n", - " min_impurity_split=None, min_samples_leaf=0.048518173584686866,\n", - " min_samples_split=0.08977730688967958,\n", - " min_weight_fraction_leaf=0.0, presort=False, random_state=None,\n", - " splitter='best'), 1, 4, PCA(copy=True, iterated_power='auto', n_components=0.95, random_state=None,\n", - " svd_solver='auto', tol=0.0, whiten=False), -13.062715368711233]]\n", - "Fitting 10 folds for each of 100 candidates, totalling 1000 fits\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.\n", - "[Parallel(n_jobs=-1)]: Done 42 tasks | elapsed: 4.1s\n" - ] - }, - { - "ename": "KeyboardInterrupt", - "evalue": "", + "ename": "ValueError", + "evalue": "Invalid parameter n_estimators for estimator DecisionTreeRegressor(criterion='mae', max_depth=5, max_features=None,\n max_leaf_nodes=None, min_impurity_decrease=0.0,\n min_impurity_split=None, min_samples_leaf=1,\n min_samples_split=2, min_weight_fraction_leaf=0.0,\n presort=False, random_state=None, splitter='best'). Check the list of available parameters with `estimator.get_params().keys()`.", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", - "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 19\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mp\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mpca\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 20\u001b[0m \u001b[0mpipeline\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcreate_pipeline\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mattr\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mn_weeks\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mw\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mn_weeks_infected\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mwi\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mestimator_optimizer\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mopt\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mpca\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mp\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 21\u001b[0;31m \u001b[0mpipeline\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX_train_1\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_train\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 22\u001b[0m \u001b[0mscore\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpipeline\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnamed_steps\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'est_opt'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbest_score_\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 23\u001b[0m \u001b[0mbest_estimator\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpipeline\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnamed_steps\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'est_opt'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbest_estimator_\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31m_RemoteTraceback\u001b[0m Traceback (most recent call last)", + "\u001b[0;31m_RemoteTraceback\u001b[0m: \n\"\"\"\nTraceback (most recent call last):\n File \"/Users/miguellobo/Development/UMinho-AA2/.env/lib/python3.7/site-packages/sklearn/externals/joblib/externals/loky/process_executor.py\", line 418, in _process_worker\n r = call_item()\n File \"/Users/miguellobo/Development/UMinho-AA2/.env/lib/python3.7/site-packages/sklearn/externals/joblib/externals/loky/process_executor.py\", line 272, in __call__\n return self.fn(*self.args, **self.kwargs)\n File \"/Users/miguellobo/Development/UMinho-AA2/.env/lib/python3.7/site-packages/sklearn/externals/joblib/_parallel_backends.py\", line 567, in __call__\n return self.func(*args, **kwargs)\n File \"/Users/miguellobo/Development/UMinho-AA2/.env/lib/python3.7/site-packages/sklearn/externals/joblib/parallel.py\", line 225, in __call__\n for func, args, kwargs in self.items]\n File \"/Users/miguellobo/Development/UMinho-AA2/.env/lib/python3.7/site-packages/sklearn/externals/joblib/parallel.py\", line 225, in \n for func, args, kwargs in self.items]\n File \"/Users/miguellobo/Development/UMinho-AA2/.env/lib/python3.7/site-packages/sklearn/model_selection/_validation.py\", line 514, in _fit_and_score\n estimator.set_params(**parameters)\n File \"/Users/miguellobo/Development/UMinho-AA2/.env/lib/python3.7/site-packages/sklearn/base.py\", line 224, in set_params\n valid_params[key].set_params(**sub_params)\n File \"/Users/miguellobo/Development/UMinho-AA2/.env/lib/python3.7/site-packages/sklearn/base.py\", line 215, in set_params\n (key, self))\nValueError: Invalid parameter n_estimators for estimator DecisionTreeRegressor(criterion='mae', max_depth=5, max_features=None,\n max_leaf_nodes=None, min_impurity_decrease=0.0,\n min_impurity_split=None, min_samples_leaf=1,\n min_samples_split=2, min_weight_fraction_leaf=0.0,\n presort=False, random_state=None, splitter='best'). Check the list of available parameters with `estimator.get_params().keys()`.\n\"\"\"", + "\nThe above exception was the direct cause of the following exception:\n", + "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 19\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mp\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mpca\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 20\u001b[0m \u001b[0mpipeline\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcreate_pipeline\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mattr\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mn_weeks\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mw\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mn_weeks_infected\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mwi\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mestimator_optimizer\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mopt\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mpca\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mp\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 21\u001b[0;31m \u001b[0mpipeline\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX_train_1\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_train\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 22\u001b[0m \u001b[0mscore\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpipeline\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnamed_steps\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'est_opt'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbest_score_\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 23\u001b[0m \u001b[0mbest_estimator\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpipeline\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnamed_steps\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'est_opt'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbest_estimator_\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m~/Development/UMinho-AA2/.env/lib/python3.7/site-packages/sklearn/pipeline.py\u001b[0m in \u001b[0;36mfit\u001b[0;34m(self, X, y, **fit_params)\u001b[0m\n\u001b[1;32m 265\u001b[0m \u001b[0mXt\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfit_params\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_fit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mfit_params\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 266\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_final_estimator\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 267\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_final_estimator\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mXt\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mfit_params\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 268\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 269\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m~/Development/UMinho-AA2/.env/lib/python3.7/site-packages/sklearn/model_selection/_search.py\u001b[0m in \u001b[0;36mfit\u001b[0;34m(self, X, y, groups, **fit_params)\u001b[0m\n\u001b[1;32m 720\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mresults_container\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 721\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 722\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_run_search\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mevaluate_candidates\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 723\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 724\u001b[0m \u001b[0mresults\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mresults_container\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m~/Development/UMinho-AA2/.env/lib/python3.7/site-packages/sklearn/model_selection/_search.py\u001b[0m in \u001b[0;36m_run_search\u001b[0;34m(self, evaluate_candidates)\u001b[0m\n\u001b[1;32m 1513\u001b[0m evaluate_candidates(ParameterSampler(\n\u001b[1;32m 1514\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mparam_distributions\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mn_iter\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1515\u001b[0;31m random_state=self.random_state))\n\u001b[0m", @@ -397,18 +346,18 @@ "\u001b[0;32m~/Development/UMinho-AA2/.env/lib/python3.7/site-packages/sklearn/externals/joblib/parallel.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, iterable)\u001b[0m\n\u001b[1;32m 928\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 929\u001b[0m \u001b[0;32mwith\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_backend\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mretrieval_context\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 930\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mretrieve\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 931\u001b[0m \u001b[0;31m# Make sure that we get a last message telling us we are done\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 932\u001b[0m \u001b[0melapsed_time\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtime\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtime\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m-\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_start_time\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m~/Development/UMinho-AA2/.env/lib/python3.7/site-packages/sklearn/externals/joblib/parallel.py\u001b[0m in \u001b[0;36mretrieve\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 831\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 832\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mgetattr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_backend\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'supports_timeout'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;32mFalse\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 833\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_output\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mextend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mjob\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtimeout\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtimeout\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 834\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 835\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_output\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mextend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mjob\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m~/Development/UMinho-AA2/.env/lib/python3.7/site-packages/sklearn/externals/joblib/_parallel_backends.py\u001b[0m in \u001b[0;36mwrap_future_result\u001b[0;34m(future, timeout)\u001b[0m\n\u001b[1;32m 519\u001b[0m AsyncResults.get from multiprocessing.\"\"\"\n\u001b[1;32m 520\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 521\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mfuture\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mresult\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtimeout\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mtimeout\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 522\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mLokyTimeoutError\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 523\u001b[0m \u001b[0;32mraise\u001b[0m \u001b[0mTimeoutError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/usr/local/Cellar/python/3.7.2_2/Frameworks/Python.framework/Versions/3.7/lib/python3.7/concurrent/futures/_base.py\u001b[0m in \u001b[0;36mresult\u001b[0;34m(self, timeout)\u001b[0m\n\u001b[1;32m 425\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__get_result\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 426\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 427\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_condition\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mwait\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtimeout\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 428\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 429\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_state\u001b[0m \u001b[0;32min\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0mCANCELLED\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mCANCELLED_AND_NOTIFIED\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/usr/local/Cellar/python/3.7.2_2/Frameworks/Python.framework/Versions/3.7/lib/python3.7/threading.py\u001b[0m in \u001b[0;36mwait\u001b[0;34m(self, timeout)\u001b[0m\n\u001b[1;32m 294\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0;31m# restore state no matter what (e.g., KeyboardInterrupt)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 295\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mtimeout\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 296\u001b[0;31m \u001b[0mwaiter\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0macquire\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 297\u001b[0m \u001b[0mgotit\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mTrue\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 298\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;31mKeyboardInterrupt\u001b[0m: " + "\u001b[0;32m/usr/local/Cellar/python/3.7.2_2/Frameworks/Python.framework/Versions/3.7/lib/python3.7/concurrent/futures/_base.py\u001b[0m in \u001b[0;36mresult\u001b[0;34m(self, timeout)\u001b[0m\n\u001b[1;32m 430\u001b[0m \u001b[0;32mraise\u001b[0m \u001b[0mCancelledError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 431\u001b[0m \u001b[0;32melif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_state\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0mFINISHED\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 432\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__get_result\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 433\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 434\u001b[0m \u001b[0;32mraise\u001b[0m \u001b[0mTimeoutError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/Cellar/python/3.7.2_2/Frameworks/Python.framework/Versions/3.7/lib/python3.7/concurrent/futures/_base.py\u001b[0m in \u001b[0;36m__get_result\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 382\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m__get_result\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 383\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_exception\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 384\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_exception\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 385\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 386\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_result\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mValueError\u001b[0m: Invalid parameter n_estimators for estimator DecisionTreeRegressor(criterion='mae', max_depth=5, max_features=None,\n max_leaf_nodes=None, min_impurity_decrease=0.0,\n min_impurity_split=None, min_samples_leaf=1,\n min_samples_split=2, min_weight_fraction_leaf=0.0,\n presort=False, random_state=None, splitter='best'). Check the list of available parameters with `estimator.get_params().keys()`." ] } ], "source": [ "%autoreload\n", "from OurPipeline import create_pipeline\n", + "from sklearn.decomposition import PCA\n", "\n", - "#pipeline = create_pipeline(attr, estimator_optimizer=RandomizedSearchCV(None, None), n_weeks=0, n_weeks_infected=0)\n", - "optimizers=[Tree_optimizer]#, Forest_optimizer, AdaTree_optimizer, KNN_optimizer, SVR_optimizer]\n", + "optimizers=[AdaTree_optimizer, KNN_optimizer, SVR_optimizer]#[Tree_optimizer, Forest_optimizer \n", "weeks = [1,2]\n", "weeks_infected = [3,4]\n", "pca = [PCA(0.95), None]\n", @@ -430,105 +379,278 @@ " attempt = [best_estimator, w, wi, p, score]\n", " if abs(score) < best_score:\n", " best_score = abs(score)\n", - " best_estimator = attempt\n", + " best_attempt = attempt\n", " print('\\nBest score of {} with the estimator {}'.format(best_score, best_estimator))\n", " idx+=1\n", " print(str(idx) + '/' + str(n_total), end='\\t')\n", - " results.append(attempt)\n", - " print(results)\n", - " " + " results.append(attempt)" ] }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "%autoreload\n", - "from OurPipeline import create_pipeline\n", - "\n", - "pipeline = create_pipeline(attr, n_weeks=2, n_weeks_infected=2, estimator_optimizer=Tree_optimizer, pca=None)" - ] - }, - { - "cell_type": "code", - "execution_count": 69, + "execution_count": 19, "metadata": {}, "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Fitting 10 folds for each of 100 candidates, totalling 1000 fits\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.\n", - "[Parallel(n_jobs=-1)]: Done 42 tasks | elapsed: 5.9s\n", - "[Parallel(n_jobs=-1)]: Done 192 tasks | elapsed: 23.9s\n", - "[Parallel(n_jobs=-1)]: Done 442 tasks | elapsed: 56.8s\n", - "[Parallel(n_jobs=-1)]: Done 792 tasks | elapsed: 1.8min\n", - "[Parallel(n_jobs=-1)]: Done 1000 out of 1000 | elapsed: 2.2min finished\n" - ] - }, { "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
estimatorweeksweeks_infectedPCAscore
0DecisionTreeRegressor(criterion='mae', max_dep...13PCA(copy=True, iterated_power='auto', n_compon...-12.752929
1DecisionTreeRegressor(criterion='mae', max_dep...13None-8.983460
2DecisionTreeRegressor(criterion='mae', max_dep...14PCA(copy=True, iterated_power='auto', n_compon...-13.050310
3DecisionTreeRegressor(criterion='mae', max_dep...14None-8.971744
4DecisionTreeRegressor(criterion='mae', max_dep...23PCA(copy=True, iterated_power='auto', n_compon...-13.235700
5DecisionTreeRegressor(criterion='mae', max_dep...23None-9.039628
6DecisionTreeRegressor(criterion='mae', max_dep...24PCA(copy=True, iterated_power='auto', n_compon...-12.913163
7DecisionTreeRegressor(criterion='mae', max_dep...24None-9.025844
8(DecisionTreeRegressor(criterion='mae', max_de...13PCA(copy=True, iterated_power='auto', n_compon...-9.806561
9(DecisionTreeRegressor(criterion='mae', max_de...13None-7.174190
10(DecisionTreeRegressor(criterion='mae', max_de...14PCA(copy=True, iterated_power='auto', n_compon...-10.178593
11(DecisionTreeRegressor(criterion='mae', max_de...14None-7.197634
12(DecisionTreeRegressor(criterion='mae', max_de...23PCA(copy=True, iterated_power='auto', n_compon...-10.119454
13(DecisionTreeRegressor(criterion='mae', max_de...23None-7.177250
14(DecisionTreeRegressor(criterion='mae', max_de...24PCA(copy=True, iterated_power='auto', n_compon...-10.353996
15(DecisionTreeRegressor(criterion='mae', max_de...24None-7.212562
\n", + "
" + ], "text/plain": [ - "Pipeline(memory=None,\n", - " steps=[('imputer', ContinuityImputer(attributes=['ndvi_ne', 'ndvi_nw', 'ndvi_se', 'ndvi_sw', 'precipitation_amt_mm', 'reanalysis_air_temp_k', 'reanalysis_avg_temp_k', 'reanalysis_dew_point_temp_k', 'reanalysis_max_air_temp_k', 'reanalysis_min_air_temp_k', 'reanalysis_precip_amt_kg_per_m2', 'reanalys...t=True,\n", - " return_train_score=True, scoring='neg_mean_absolute_error',\n", - " verbose=1))])" + " estimator weeks weeks_infected \\\n", + "0 DecisionTreeRegressor(criterion='mae', max_dep... 1 3 \n", + "1 DecisionTreeRegressor(criterion='mae', max_dep... 1 3 \n", + "2 DecisionTreeRegressor(criterion='mae', max_dep... 1 4 \n", + "3 DecisionTreeRegressor(criterion='mae', max_dep... 1 4 \n", + "4 DecisionTreeRegressor(criterion='mae', max_dep... 2 3 \n", + "5 DecisionTreeRegressor(criterion='mae', max_dep... 2 3 \n", + "6 DecisionTreeRegressor(criterion='mae', max_dep... 2 4 \n", + "7 DecisionTreeRegressor(criterion='mae', max_dep... 2 4 \n", + "8 (DecisionTreeRegressor(criterion='mae', max_de... 1 3 \n", + "9 (DecisionTreeRegressor(criterion='mae', max_de... 1 3 \n", + "10 (DecisionTreeRegressor(criterion='mae', max_de... 1 4 \n", + "11 (DecisionTreeRegressor(criterion='mae', max_de... 1 4 \n", + "12 (DecisionTreeRegressor(criterion='mae', max_de... 2 3 \n", + "13 (DecisionTreeRegressor(criterion='mae', max_de... 2 3 \n", + "14 (DecisionTreeRegressor(criterion='mae', max_de... 2 4 \n", + "15 (DecisionTreeRegressor(criterion='mae', max_de... 2 4 \n", + "\n", + " PCA score \n", + "0 PCA(copy=True, iterated_power='auto', n_compon... -12.752929 \n", + "1 None -8.983460 \n", + "2 PCA(copy=True, iterated_power='auto', n_compon... -13.050310 \n", + "3 None -8.971744 \n", + "4 PCA(copy=True, iterated_power='auto', n_compon... -13.235700 \n", + "5 None -9.039628 \n", + "6 PCA(copy=True, iterated_power='auto', n_compon... -12.913163 \n", + "7 None -9.025844 \n", + "8 PCA(copy=True, iterated_power='auto', n_compon... -9.806561 \n", + "9 None -7.174190 \n", + "10 PCA(copy=True, iterated_power='auto', n_compon... -10.178593 \n", + "11 None -7.197634 \n", + "12 PCA(copy=True, iterated_power='auto', n_compon... -10.119454 \n", + "13 None -7.177250 \n", + "14 PCA(copy=True, iterated_power='auto', n_compon... -10.353996 \n", + "15 None -7.212562 " ] }, - "execution_count": 69, + "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "pipeline.fit(X_train_1, y_train)" + "pd.DataFrame(results, columns=['estimator', 'weeks', 'weeks_infected', 'PCA', 'score'])" ] }, { "cell_type": "code", - "execution_count": 70, + "execution_count": 21, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "-9.066505858028945" + "[RandomForestRegressor(bootstrap=True, criterion='mae', max_depth=6,\n", + " max_features='auto', max_leaf_nodes=None,\n", + " min_impurity_decrease=0.0, min_impurity_split=None,\n", + " min_samples_leaf=1, min_samples_split=2,\n", + " min_weight_fraction_leaf=0.0, n_estimators=8, n_jobs=-1,\n", + " oob_score=False, random_state=None, verbose=0, warm_start=False),\n", + " 1,\n", + " 3,\n", + " None,\n", + " -7.174190213645762]" ] }, - "execution_count": 70, + "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "pipeline.named_steps['est_opt'].best_score_" + "best_attempt" ] }, { "cell_type": "code", - "execution_count": 72, + "execution_count": 25, "metadata": {}, "outputs": [ { - "ename": "SyntaxError", - "evalue": "invalid syntax (, line 1)", - "output_type": "error", - "traceback": [ - "\u001b[0;36m File \u001b[0;32m\"\"\u001b[0;36m, line \u001b[0;32m1\u001b[0m\n\u001b[0;31m 2.copy()\u001b[0m\n\u001b[0m ^\u001b[0m\n\u001b[0;31mSyntaxError\u001b[0m\u001b[0;31m:\u001b[0m invalid syntax\n" - ] + "data": { + "text/plain": [ + "RandomForestRegressor(bootstrap=True, criterion='mae', max_depth=5,\n", + " max_features='auto', max_leaf_nodes=None,\n", + " min_impurity_decrease=0.0, min_impurity_split=None,\n", + " min_samples_leaf=1, min_samples_split=2,\n", + " min_weight_fraction_leaf=0.0, n_estimators=41, n_jobs=-1,\n", + " oob_score=False, random_state=None, verbose=0, warm_start=False)" + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ - "2.copy()" + "best_estimator" ] }, { From 38184b47a96340910370706ab85325049ee27100 Mon Sep 17 00:00:00 2001 From: MLobo1997 Date: Thu, 18 Apr 2019 21:23:33 +0100 Subject: [PATCH 12/24] ALL THE MODELS DATA --- models.ipynb | 177 +++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 129 insertions(+), 48 deletions(-) diff --git a/models.ipynb b/models.ipynb index d8de1c1..8640b57 100644 --- a/models.ipynb +++ b/models.ipynb @@ -140,12 +140,12 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 34, "metadata": {}, "outputs": [], "source": [ "k_folds=10\n", - "n_iter_search = 20\n", + "n_iter_search = 5\n", "C = sp_randint(0, 10000)\n", "params = {'kernel':['linear'], 'gamma':['scale'], 'C': C}\n", "SVR_optimizer = RandomizedSearchCV(estimator=SVR(), param_distributions=params, n_iter=n_iter_search, scoring=score_metric, n_jobs=jobs, cv=k_folds, verbose=verbose_level, random_state=random_n, return_train_score=True, iid=True)" @@ -170,7 +170,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 27, "metadata": {}, "outputs": [], "source": [ @@ -195,7 +195,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 28, "metadata": {}, "outputs": [], "source": [ @@ -216,7 +216,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 29, "metadata": {}, "outputs": [], "source": [ @@ -237,7 +237,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 30, "metadata": {}, "outputs": [], "source": [ @@ -251,7 +251,8 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Optimization\n" + "# Optimization\n", + "* Interestingly, PCA mas all the models worst in this case." ] }, { @@ -290,7 +291,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 36, "metadata": {}, "outputs": [ { @@ -298,47 +299,22 @@ "output_type": "stream", "text": [ "\n", - "Best score of 12.752929014472777 with the estimator DecisionTreeRegressor(criterion='mae', max_depth=7, max_features=None,\n", - " max_leaf_nodes=None, min_impurity_decrease=0.0,\n", - " min_impurity_split=None, min_samples_leaf=0.048518173584686866,\n", - " min_samples_split=0.08977730688967958,\n", - " min_weight_fraction_leaf=0.0, presort=False, random_state=None,\n", - " splitter='best')\n", - "1/40\t\n", - "Best score of 8.983459682977257 with the estimator DecisionTreeRegressor(criterion='mae', max_depth=5, max_features=None,\n", - " max_leaf_nodes=None, min_impurity_decrease=0.0,\n", - " min_impurity_split=None, min_samples_leaf=0.02801756293749627,\n", - " min_samples_split=0.107526262482814,\n", - " min_weight_fraction_leaf=0.0, presort=False, random_state=None,\n", - " splitter='best')\n", - "2/40\t3/40\t\n", - "Best score of 8.971743625086148 with the estimator DecisionTreeRegressor(criterion='mae', max_depth=5, max_features=None,\n", - " max_leaf_nodes=None, min_impurity_decrease=0.0,\n", - " min_impurity_split=None, min_samples_leaf=0.02801756293749627,\n", - " min_samples_split=0.107526262482814,\n", - " min_weight_fraction_leaf=0.0, presort=False, random_state=None,\n", - " splitter='best')\n", - "4/40\t5/40\t6/40\t7/40\t8/40\t9/40\t\n", - "Best score of 7.174190213645762 with the estimator RandomForestRegressor(bootstrap=True, criterion='mae', max_depth=6,\n", - " max_features='auto', max_leaf_nodes=None,\n", - " min_impurity_decrease=0.0, min_impurity_split=None,\n", - " min_samples_leaf=1, min_samples_split=2,\n", - " min_weight_fraction_leaf=0.0, n_estimators=8, n_jobs=-1,\n", - " oob_score=False, random_state=None, verbose=0, warm_start=False)\n", - "10/40\t11/40\t12/40\t13/40\t14/40\t15/40\t16/40\t" + "Best score of 7.332691708334687 with the estimator SVR(C=5734, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma='scale',\n", + " kernel='linear', max_iter=-1, shrinking=True, tol=0.001, verbose=False)\n", + "1/8\t\n", + "Best score of 6.522347109745663 with the estimator SVR(C=5191, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma='scale',\n", + " kernel='linear', max_iter=-1, shrinking=True, tol=0.001, verbose=False)\n", + "2/8\t3/8\t4/8\t" ] }, { - "ename": "ValueError", - "evalue": "Invalid parameter n_estimators for estimator DecisionTreeRegressor(criterion='mae', max_depth=5, max_features=None,\n max_leaf_nodes=None, min_impurity_decrease=0.0,\n min_impurity_split=None, min_samples_leaf=1,\n min_samples_split=2, min_weight_fraction_leaf=0.0,\n presort=False, random_state=None, splitter='best'). Check the list of available parameters with `estimator.get_params().keys()`.", + "ename": "KeyboardInterrupt", + "evalue": "", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31m_RemoteTraceback\u001b[0m Traceback (most recent call last)", - "\u001b[0;31m_RemoteTraceback\u001b[0m: \n\"\"\"\nTraceback (most recent call last):\n File \"/Users/miguellobo/Development/UMinho-AA2/.env/lib/python3.7/site-packages/sklearn/externals/joblib/externals/loky/process_executor.py\", line 418, in _process_worker\n r = call_item()\n File \"/Users/miguellobo/Development/UMinho-AA2/.env/lib/python3.7/site-packages/sklearn/externals/joblib/externals/loky/process_executor.py\", line 272, in __call__\n return self.fn(*self.args, **self.kwargs)\n File \"/Users/miguellobo/Development/UMinho-AA2/.env/lib/python3.7/site-packages/sklearn/externals/joblib/_parallel_backends.py\", line 567, in __call__\n return self.func(*args, **kwargs)\n File \"/Users/miguellobo/Development/UMinho-AA2/.env/lib/python3.7/site-packages/sklearn/externals/joblib/parallel.py\", line 225, in __call__\n for func, args, kwargs in self.items]\n File \"/Users/miguellobo/Development/UMinho-AA2/.env/lib/python3.7/site-packages/sklearn/externals/joblib/parallel.py\", line 225, in \n for func, args, kwargs in self.items]\n File \"/Users/miguellobo/Development/UMinho-AA2/.env/lib/python3.7/site-packages/sklearn/model_selection/_validation.py\", line 514, in _fit_and_score\n estimator.set_params(**parameters)\n File \"/Users/miguellobo/Development/UMinho-AA2/.env/lib/python3.7/site-packages/sklearn/base.py\", line 224, in set_params\n valid_params[key].set_params(**sub_params)\n File \"/Users/miguellobo/Development/UMinho-AA2/.env/lib/python3.7/site-packages/sklearn/base.py\", line 215, in set_params\n (key, self))\nValueError: Invalid parameter n_estimators for estimator DecisionTreeRegressor(criterion='mae', max_depth=5, max_features=None,\n max_leaf_nodes=None, min_impurity_decrease=0.0,\n min_impurity_split=None, min_samples_leaf=1,\n min_samples_split=2, min_weight_fraction_leaf=0.0,\n presort=False, random_state=None, splitter='best'). Check the list of available parameters with `estimator.get_params().keys()`.\n\"\"\"", - "\nThe above exception was the direct cause of the following exception:\n", - "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", - "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 19\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mp\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mpca\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 20\u001b[0m \u001b[0mpipeline\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcreate_pipeline\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mattr\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mn_weeks\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mw\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mn_weeks_infected\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mwi\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mestimator_optimizer\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mopt\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mpca\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mp\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 21\u001b[0;31m \u001b[0mpipeline\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX_train_1\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_train\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 22\u001b[0m \u001b[0mscore\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpipeline\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnamed_steps\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'est_opt'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbest_score_\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 23\u001b[0m \u001b[0mbest_estimator\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpipeline\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnamed_steps\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'est_opt'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbest_estimator_\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 19\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mp\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mpca\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 20\u001b[0m \u001b[0mpipeline\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcreate_pipeline\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mattr\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mn_weeks\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mw\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mn_weeks_infected\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mwi\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mestimator_optimizer\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mopt\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mpca\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mp\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 21\u001b[0;31m \u001b[0mpipeline\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX_train_1\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_train\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 22\u001b[0m \u001b[0mscore\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpipeline\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnamed_steps\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'est_opt'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbest_score_\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 23\u001b[0m \u001b[0mbest_estimator\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpipeline\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnamed_steps\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'est_opt'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbest_estimator_\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m~/Development/UMinho-AA2/.env/lib/python3.7/site-packages/sklearn/pipeline.py\u001b[0m in \u001b[0;36mfit\u001b[0;34m(self, X, y, **fit_params)\u001b[0m\n\u001b[1;32m 265\u001b[0m \u001b[0mXt\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfit_params\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_fit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mfit_params\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 266\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_final_estimator\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 267\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_final_estimator\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mXt\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mfit_params\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 268\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 269\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m~/Development/UMinho-AA2/.env/lib/python3.7/site-packages/sklearn/model_selection/_search.py\u001b[0m in \u001b[0;36mfit\u001b[0;34m(self, X, y, groups, **fit_params)\u001b[0m\n\u001b[1;32m 720\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mresults_container\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 721\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 722\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_run_search\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mevaluate_candidates\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 723\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 724\u001b[0m \u001b[0mresults\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mresults_container\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m~/Development/UMinho-AA2/.env/lib/python3.7/site-packages/sklearn/model_selection/_search.py\u001b[0m in \u001b[0;36m_run_search\u001b[0;34m(self, evaluate_candidates)\u001b[0m\n\u001b[1;32m 1513\u001b[0m evaluate_candidates(ParameterSampler(\n\u001b[1;32m 1514\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mparam_distributions\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mn_iter\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1515\u001b[0;31m random_state=self.random_state))\n\u001b[0m", @@ -346,9 +322,9 @@ "\u001b[0;32m~/Development/UMinho-AA2/.env/lib/python3.7/site-packages/sklearn/externals/joblib/parallel.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, iterable)\u001b[0m\n\u001b[1;32m 928\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 929\u001b[0m \u001b[0;32mwith\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_backend\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mretrieval_context\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 930\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mretrieve\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 931\u001b[0m \u001b[0;31m# Make sure that we get a last message telling us we are done\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 932\u001b[0m \u001b[0melapsed_time\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtime\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtime\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m-\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_start_time\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m~/Development/UMinho-AA2/.env/lib/python3.7/site-packages/sklearn/externals/joblib/parallel.py\u001b[0m in \u001b[0;36mretrieve\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 831\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 832\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mgetattr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_backend\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'supports_timeout'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;32mFalse\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 833\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_output\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mextend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mjob\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtimeout\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtimeout\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 834\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 835\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_output\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mextend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mjob\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m~/Development/UMinho-AA2/.env/lib/python3.7/site-packages/sklearn/externals/joblib/_parallel_backends.py\u001b[0m in \u001b[0;36mwrap_future_result\u001b[0;34m(future, timeout)\u001b[0m\n\u001b[1;32m 519\u001b[0m AsyncResults.get from multiprocessing.\"\"\"\n\u001b[1;32m 520\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 521\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mfuture\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mresult\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtimeout\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mtimeout\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 522\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mLokyTimeoutError\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 523\u001b[0m \u001b[0;32mraise\u001b[0m \u001b[0mTimeoutError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/usr/local/Cellar/python/3.7.2_2/Frameworks/Python.framework/Versions/3.7/lib/python3.7/concurrent/futures/_base.py\u001b[0m in \u001b[0;36mresult\u001b[0;34m(self, timeout)\u001b[0m\n\u001b[1;32m 430\u001b[0m \u001b[0;32mraise\u001b[0m \u001b[0mCancelledError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 431\u001b[0m \u001b[0;32melif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_state\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0mFINISHED\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 432\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__get_result\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 433\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 434\u001b[0m \u001b[0;32mraise\u001b[0m \u001b[0mTimeoutError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/usr/local/Cellar/python/3.7.2_2/Frameworks/Python.framework/Versions/3.7/lib/python3.7/concurrent/futures/_base.py\u001b[0m in \u001b[0;36m__get_result\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 382\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m__get_result\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 383\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_exception\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 384\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_exception\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 385\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 386\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_result\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;31mValueError\u001b[0m: Invalid parameter n_estimators for estimator DecisionTreeRegressor(criterion='mae', max_depth=5, max_features=None,\n max_leaf_nodes=None, min_impurity_decrease=0.0,\n min_impurity_split=None, min_samples_leaf=1,\n min_samples_split=2, min_weight_fraction_leaf=0.0,\n presort=False, random_state=None, splitter='best'). Check the list of available parameters with `estimator.get_params().keys()`." + "\u001b[0;32m/usr/local/Cellar/python/3.7.2_2/Frameworks/Python.framework/Versions/3.7/lib/python3.7/concurrent/futures/_base.py\u001b[0m in \u001b[0;36mresult\u001b[0;34m(self, timeout)\u001b[0m\n\u001b[1;32m 425\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__get_result\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 426\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 427\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_condition\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mwait\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtimeout\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 428\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 429\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_state\u001b[0m \u001b[0;32min\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0mCANCELLED\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mCANCELLED_AND_NOTIFIED\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/Cellar/python/3.7.2_2/Frameworks/Python.framework/Versions/3.7/lib/python3.7/threading.py\u001b[0m in \u001b[0;36mwait\u001b[0;34m(self, timeout)\u001b[0m\n\u001b[1;32m 294\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0;31m# restore state no matter what (e.g., KeyboardInterrupt)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 295\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mtimeout\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 296\u001b[0;31m \u001b[0mwaiter\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0macquire\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 297\u001b[0m \u001b[0mgotit\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mTrue\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 298\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mKeyboardInterrupt\u001b[0m: " ] } ], @@ -357,7 +333,7 @@ "from OurPipeline import create_pipeline\n", "from sklearn.decomposition import PCA\n", "\n", - "optimizers=[AdaTree_optimizer, KNN_optimizer, SVR_optimizer]#[Tree_optimizer, Forest_optimizer \n", + "optimizers=[SVR_optimizer]#[Tree_optimizer, Forest_optimizer, AdaTree_optimizer, KNN_optimizer, \n", "weeks = [1,2]\n", "weeks_infected = [3,4]\n", "pca = [PCA(0.95), None]\n", @@ -653,12 +629,117 @@ "best_estimator" ] }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[AdaBoostRegressor(base_estimator=DecisionTreeRegressor(criterion='mae', max_depth=6, max_features=None,\n", + " max_leaf_nodes=None, min_impurity_decrease=0.0,\n", + " min_impurity_split=None, min_samples_leaf=1,\n", + " min_samples_split=2, min_weight_fraction_leaf=0.0,\n", + " presort=False, random_state=None, splitter='best'),\n", + " learning_rate=1.0, loss='linear', n_estimators=41,\n", + " random_state=None), 2, 4, None, -7.412474155754652]" + ] + }, + "execution_count": 32, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "best_attempt" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[SVR(C=5191, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma='scale',\n", + " kernel='linear', max_iter=-1, shrinking=True, tol=0.001, verbose=False),\n", + " 1,\n", + " 3,\n", + " None,\n", + " -6.522347109745663]" + ] + }, + "execution_count": 37, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "best_attempt" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[[SVR(C=5734, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma='scale',\n", + " kernel='linear', max_iter=-1, shrinking=True, tol=0.001, verbose=False),\n", + " 1,\n", + " 3,\n", + " PCA(copy=True, iterated_power='auto', n_components=0.95, random_state=None,\n", + " svd_solver='auto', tol=0.0, whiten=False),\n", + " -7.332691708334687],\n", + " [SVR(C=5191, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma='scale',\n", + " kernel='linear', max_iter=-1, shrinking=True, tol=0.001, verbose=False),\n", + " 1,\n", + " 3,\n", + " None,\n", + " -6.522347109745663],\n", + " [SVR(C=860, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma='scale',\n", + " kernel='linear', max_iter=-1, shrinking=True, tol=0.001, verbose=False),\n", + " 1,\n", + " 4,\n", + " PCA(copy=True, iterated_power='auto', n_components=0.95, random_state=None,\n", + " svd_solver='auto', tol=0.0, whiten=False),\n", + " -7.8651335759783985],\n", + " [SVR(C=860, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma='scale',\n", + " kernel='linear', max_iter=-1, shrinking=True, tol=0.001, verbose=False),\n", + " 1,\n", + " 4,\n", + " None,\n", + " -6.53493877991638]]" + ] + }, + "execution_count": 38, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "results" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Predict" + ] + }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], - "source": [] + "source": [ + "model = SVR()" + ] } ], "metadata": { From 1deaa13eb8a1ab7619ffb728831e757a8a4ac6c4 Mon Sep 17 00:00:00 2001 From: MLobo1997 Date: Fri, 19 Apr 2019 00:47:18 +0100 Subject: [PATCH 13/24] sequential predictions and with noise --- OurPipeline.py | 4 +- models.ipynb | 541 +++++++++++++++--------------------------- utils/LastInfected.py | 33 ++- utils/LastWeeks.py | 2 + 4 files changed, 219 insertions(+), 361 deletions(-) diff --git a/OurPipeline.py b/OurPipeline.py index 35531c0..40654cd 100644 --- a/OurPipeline.py +++ b/OurPipeline.py @@ -6,12 +6,12 @@ from utils.LastWeeks import LastWeeks from utils.LastInfected import LastInfected -def create_pipeline(attr, n_weeks, n_weeks_infected, estimator_optimizer=None, pca=None, n_non_train=4): +def create_pipeline(attr, n_weeks, n_weeks_infected, estimator_optimizer=None, pca=None, add_noise=False, noise_mean=None, noise_std=None, n_non_train=4): return Pipeline([ ('imputer', ContinuityImputer(attributes=attr[n_non_train:])), ('l_weeks', LastWeeks(attributes=attr[n_non_train:], weeks=n_weeks)), - ('l_infected', LastInfected(weeks=n_weeks_infected)), + ('l_infected', LastInfected(weeks=n_weeks_infected, add_noise=add_noise, noise_mean=noise_mean, noise_std=noise_std)), ('dataframe_dropper', DataFrameDropper(attribute_names=attr[:n_non_train])), ('scaler', StandardScaler()), ('pca', pca), diff --git a/models.ipynb b/models.ipynb index 8640b57..0f971f8 100644 --- a/models.ipynb +++ b/models.ipynb @@ -140,7 +140,7 @@ }, { "cell_type": "code", - "execution_count": 34, + "execution_count": 5, "metadata": {}, "outputs": [], "source": [ @@ -170,7 +170,7 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 6, "metadata": {}, "outputs": [], "source": [ @@ -195,7 +195,7 @@ }, { "cell_type": "code", - "execution_count": 28, + "execution_count": 7, "metadata": {}, "outputs": [], "source": [ @@ -216,7 +216,7 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": 8, "metadata": {}, "outputs": [], "source": [ @@ -237,7 +237,7 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": 9, "metadata": {}, "outputs": [], "source": [ @@ -252,88 +252,21 @@ "metadata": {}, "source": [ "# Optimization\n", - "* Interestingly, PCA mas all the models worst in this case." - ] - }, - { - "cell_type": "raw", - "metadata": {}, - "source": [ - "Best score of 12.752929014472777 with the estimator DecisionTreeRegressor(criterion='mae', max_depth=7, max_features=None,\n", - " max_leaf_nodes=None, min_impurity_decrease=0.0,\n", - " min_impurity_split=None, min_samples_leaf=0.048518173584686866,\n", - " min_samples_split=0.08977730688967958,\n", - " min_weight_fraction_leaf=0.0, presort=False, random_state=None,\n", - " splitter='best')\n", - "1/40\t\n", - "Best score of 8.983459682977257 with the estimator DecisionTreeRegressor(criterion='mae', max_depth=5, max_features=None,\n", - " max_leaf_nodes=None, min_impurity_decrease=0.0,\n", - " min_impurity_split=None, min_samples_leaf=0.02801756293749627,\n", - " min_samples_split=0.107526262482814,\n", - " min_weight_fraction_leaf=0.0, presort=False, random_state=None,\n", - " splitter='best')\n", - "2/40\t3/40\t\n", - "Best score of 8.971743625086148 with the estimator DecisionTreeRegressor(criterion='mae', max_depth=5, max_features=None,\n", - " max_leaf_nodes=None, min_impurity_decrease=0.0,\n", - " min_impurity_split=None, min_samples_leaf=0.02801756293749627,\n", - " min_samples_split=0.107526262482814,\n", - " min_weight_fraction_leaf=0.0, presort=False, random_state=None,\n", - " splitter='best')\n", - "4/40\t5/40\t6/40\t7/40\t8/40\t9/40\t\n", - "Best score of 7.174190213645762 with the estimator RandomForestRegressor(bootstrap=True, criterion='mae', max_depth=6,\n", - " max_features='auto', max_leaf_nodes=None,\n", - " min_impurity_decrease=0.0, min_impurity_split=None,\n", - " min_samples_leaf=1, min_samples_split=2,\n", - " min_weight_fraction_leaf=0.0, n_estimators=8, n_jobs=-1,\n", - " oob_score=False, random_state=None, verbose=0, warm_start=False)\n", - "10/40\t11/40\t12/40\t13/40\t14/40\t15/40\t16/40" + "* Interestingly, PCA makes all the models worst in this case.\n", + "* After the exaustive search, the best model was the SVR which obtained an MAE of 6.52." ] }, { "cell_type": "code", - "execution_count": 36, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "Best score of 7.332691708334687 with the estimator SVR(C=5734, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma='scale',\n", - " kernel='linear', max_iter=-1, shrinking=True, tol=0.001, verbose=False)\n", - "1/8\t\n", - "Best score of 6.522347109745663 with the estimator SVR(C=5191, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma='scale',\n", - " kernel='linear', max_iter=-1, shrinking=True, tol=0.001, verbose=False)\n", - "2/8\t3/8\t4/8\t" - ] - }, - { - "ename": "KeyboardInterrupt", - "evalue": "", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", - "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 19\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mp\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mpca\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 20\u001b[0m \u001b[0mpipeline\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcreate_pipeline\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mattr\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mn_weeks\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mw\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mn_weeks_infected\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mwi\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mestimator_optimizer\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mopt\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mpca\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mp\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 21\u001b[0;31m \u001b[0mpipeline\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX_train_1\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_train\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 22\u001b[0m \u001b[0mscore\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpipeline\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnamed_steps\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'est_opt'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbest_score_\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 23\u001b[0m \u001b[0mbest_estimator\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpipeline\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnamed_steps\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'est_opt'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbest_estimator_\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m~/Development/UMinho-AA2/.env/lib/python3.7/site-packages/sklearn/pipeline.py\u001b[0m in \u001b[0;36mfit\u001b[0;34m(self, X, y, **fit_params)\u001b[0m\n\u001b[1;32m 265\u001b[0m \u001b[0mXt\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfit_params\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_fit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mfit_params\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 266\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_final_estimator\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 267\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_final_estimator\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mXt\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mfit_params\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 268\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 269\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m~/Development/UMinho-AA2/.env/lib/python3.7/site-packages/sklearn/model_selection/_search.py\u001b[0m in \u001b[0;36mfit\u001b[0;34m(self, X, y, groups, **fit_params)\u001b[0m\n\u001b[1;32m 720\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mresults_container\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 721\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 722\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_run_search\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mevaluate_candidates\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 723\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 724\u001b[0m \u001b[0mresults\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mresults_container\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m~/Development/UMinho-AA2/.env/lib/python3.7/site-packages/sklearn/model_selection/_search.py\u001b[0m in \u001b[0;36m_run_search\u001b[0;34m(self, evaluate_candidates)\u001b[0m\n\u001b[1;32m 1513\u001b[0m evaluate_candidates(ParameterSampler(\n\u001b[1;32m 1514\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mparam_distributions\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mn_iter\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1515\u001b[0;31m random_state=self.random_state))\n\u001b[0m", - "\u001b[0;32m~/Development/UMinho-AA2/.env/lib/python3.7/site-packages/sklearn/model_selection/_search.py\u001b[0m in \u001b[0;36mevaluate_candidates\u001b[0;34m(candidate_params)\u001b[0m\n\u001b[1;32m 709\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mparameters\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mtrain\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtest\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 710\u001b[0m in product(candidate_params,\n\u001b[0;32m--> 711\u001b[0;31m cv.split(X, y, groups)))\n\u001b[0m\u001b[1;32m 712\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 713\u001b[0m \u001b[0mall_candidate_params\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mextend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcandidate_params\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m~/Development/UMinho-AA2/.env/lib/python3.7/site-packages/sklearn/externals/joblib/parallel.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, iterable)\u001b[0m\n\u001b[1;32m 928\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 929\u001b[0m \u001b[0;32mwith\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_backend\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mretrieval_context\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 930\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mretrieve\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 931\u001b[0m \u001b[0;31m# Make sure that we get a last message telling us we are done\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 932\u001b[0m \u001b[0melapsed_time\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtime\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtime\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m-\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_start_time\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m~/Development/UMinho-AA2/.env/lib/python3.7/site-packages/sklearn/externals/joblib/parallel.py\u001b[0m in \u001b[0;36mretrieve\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 831\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 832\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mgetattr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_backend\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'supports_timeout'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;32mFalse\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 833\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_output\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mextend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mjob\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtimeout\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtimeout\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 834\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 835\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_output\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mextend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mjob\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m~/Development/UMinho-AA2/.env/lib/python3.7/site-packages/sklearn/externals/joblib/_parallel_backends.py\u001b[0m in \u001b[0;36mwrap_future_result\u001b[0;34m(future, timeout)\u001b[0m\n\u001b[1;32m 519\u001b[0m AsyncResults.get from multiprocessing.\"\"\"\n\u001b[1;32m 520\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 521\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mfuture\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mresult\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtimeout\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mtimeout\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 522\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mLokyTimeoutError\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 523\u001b[0m \u001b[0;32mraise\u001b[0m \u001b[0mTimeoutError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/usr/local/Cellar/python/3.7.2_2/Frameworks/Python.framework/Versions/3.7/lib/python3.7/concurrent/futures/_base.py\u001b[0m in \u001b[0;36mresult\u001b[0;34m(self, timeout)\u001b[0m\n\u001b[1;32m 425\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__get_result\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 426\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 427\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_condition\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mwait\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtimeout\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 428\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 429\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_state\u001b[0m \u001b[0;32min\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0mCANCELLED\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mCANCELLED_AND_NOTIFIED\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/usr/local/Cellar/python/3.7.2_2/Frameworks/Python.framework/Versions/3.7/lib/python3.7/threading.py\u001b[0m in \u001b[0;36mwait\u001b[0;34m(self, timeout)\u001b[0m\n\u001b[1;32m 294\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0;31m# restore state no matter what (e.g., KeyboardInterrupt)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 295\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mtimeout\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 296\u001b[0;31m \u001b[0mwaiter\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0macquire\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 297\u001b[0m \u001b[0mgotit\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mTrue\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 298\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;31mKeyboardInterrupt\u001b[0m: " - ] - } - ], + "outputs": [], "source": [ "%autoreload\n", "from OurPipeline import create_pipeline\n", "from sklearn.decomposition import PCA\n", "\n", - "optimizers=[SVR_optimizer]#[Tree_optimizer, Forest_optimizer, AdaTree_optimizer, KNN_optimizer, \n", + "optimizers=[SVR_optimizer, Tree_optimizer, Forest_optimizer, AdaTree_optimizer, KNN_optimizer]\n", "weeks = [1,2]\n", "weeks_infected = [3,4]\n", "pca = [PCA(0.95), None]\n", @@ -364,238 +297,30 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
estimatorweeksweeks_infectedPCAscore
0DecisionTreeRegressor(criterion='mae', max_dep...13PCA(copy=True, iterated_power='auto', n_compon...-12.752929
1DecisionTreeRegressor(criterion='mae', max_dep...13None-8.983460
2DecisionTreeRegressor(criterion='mae', max_dep...14PCA(copy=True, iterated_power='auto', n_compon...-13.050310
3DecisionTreeRegressor(criterion='mae', max_dep...14None-8.971744
4DecisionTreeRegressor(criterion='mae', max_dep...23PCA(copy=True, iterated_power='auto', n_compon...-13.235700
5DecisionTreeRegressor(criterion='mae', max_dep...23None-9.039628
6DecisionTreeRegressor(criterion='mae', max_dep...24PCA(copy=True, iterated_power='auto', n_compon...-12.913163
7DecisionTreeRegressor(criterion='mae', max_dep...24None-9.025844
8(DecisionTreeRegressor(criterion='mae', max_de...13PCA(copy=True, iterated_power='auto', n_compon...-9.806561
9(DecisionTreeRegressor(criterion='mae', max_de...13None-7.174190
10(DecisionTreeRegressor(criterion='mae', max_de...14PCA(copy=True, iterated_power='auto', n_compon...-10.178593
11(DecisionTreeRegressor(criterion='mae', max_de...14None-7.197634
12(DecisionTreeRegressor(criterion='mae', max_de...23PCA(copy=True, iterated_power='auto', n_compon...-10.119454
13(DecisionTreeRegressor(criterion='mae', max_de...23None-7.177250
14(DecisionTreeRegressor(criterion='mae', max_de...24PCA(copy=True, iterated_power='auto', n_compon...-10.353996
15(DecisionTreeRegressor(criterion='mae', max_de...24None-7.212562
\n", - "
" - ], - "text/plain": [ - " estimator weeks weeks_infected \\\n", - "0 DecisionTreeRegressor(criterion='mae', max_dep... 1 3 \n", - "1 DecisionTreeRegressor(criterion='mae', max_dep... 1 3 \n", - "2 DecisionTreeRegressor(criterion='mae', max_dep... 1 4 \n", - "3 DecisionTreeRegressor(criterion='mae', max_dep... 1 4 \n", - "4 DecisionTreeRegressor(criterion='mae', max_dep... 2 3 \n", - "5 DecisionTreeRegressor(criterion='mae', max_dep... 2 3 \n", - "6 DecisionTreeRegressor(criterion='mae', max_dep... 2 4 \n", - "7 DecisionTreeRegressor(criterion='mae', max_dep... 2 4 \n", - "8 (DecisionTreeRegressor(criterion='mae', max_de... 1 3 \n", - "9 (DecisionTreeRegressor(criterion='mae', max_de... 1 3 \n", - "10 (DecisionTreeRegressor(criterion='mae', max_de... 1 4 \n", - "11 (DecisionTreeRegressor(criterion='mae', max_de... 1 4 \n", - "12 (DecisionTreeRegressor(criterion='mae', max_de... 2 3 \n", - "13 (DecisionTreeRegressor(criterion='mae', max_de... 2 3 \n", - "14 (DecisionTreeRegressor(criterion='mae', max_de... 2 4 \n", - "15 (DecisionTreeRegressor(criterion='mae', max_de... 2 4 \n", - "\n", - " PCA score \n", - "0 PCA(copy=True, iterated_power='auto', n_compon... -12.752929 \n", - "1 None -8.983460 \n", - "2 PCA(copy=True, iterated_power='auto', n_compon... -13.050310 \n", - "3 None -8.971744 \n", - "4 PCA(copy=True, iterated_power='auto', n_compon... -13.235700 \n", - "5 None -9.039628 \n", - "6 PCA(copy=True, iterated_power='auto', n_compon... -12.913163 \n", - "7 None -9.025844 \n", - "8 PCA(copy=True, iterated_power='auto', n_compon... -9.806561 \n", - "9 None -7.174190 \n", - "10 PCA(copy=True, iterated_power='auto', n_compon... -10.178593 \n", - "11 None -7.197634 \n", - "12 PCA(copy=True, iterated_power='auto', n_compon... -10.119454 \n", - "13 None -7.177250 \n", - "14 PCA(copy=True, iterated_power='auto', n_compon... -10.353996 \n", - "15 None -7.212562 " - ] - }, - "execution_count": 19, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "pd.DataFrame(results, columns=['estimator', 'weeks', 'weeks_infected', 'PCA', 'score'])" ] }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 37, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "[RandomForestRegressor(bootstrap=True, criterion='mae', max_depth=6,\n", - " max_features='auto', max_leaf_nodes=None,\n", - " min_impurity_decrease=0.0, min_impurity_split=None,\n", - " min_samples_leaf=1, min_samples_split=2,\n", - " min_weight_fraction_leaf=0.0, n_estimators=8, n_jobs=-1,\n", - " oob_score=False, random_state=None, verbose=0, warm_start=False),\n", + "[SVR(C=5191, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma='scale',\n", + " kernel='linear', max_iter=-1, shrinking=True, tol=0.001, verbose=False),\n", " 1,\n", " 3,\n", " None,\n", - " -7.174190213645762]" + " -6.522347109745663]" ] }, - "execution_count": 21, + "execution_count": 37, "metadata": {}, "output_type": "execute_result" } @@ -604,132 +329,243 @@ "best_attempt" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Predict" + ] + }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 171, + "metadata": {}, + "outputs": [], + "source": [ + "%autoreload\n", + "from OurPipeline import create_pipeline\n", + "\n", + "pipeline = create_pipeline(attr, n_weeks=1, n_weeks_infected=3, pca=None)\n", + "X_train = pipeline.fit_transform(X_train_1, y_train)" + ] + }, + { + "cell_type": "code", + "execution_count": 56, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "RandomForestRegressor(bootstrap=True, criterion='mae', max_depth=5,\n", - " max_features='auto', max_leaf_nodes=None,\n", - " min_impurity_decrease=0.0, min_impurity_split=None,\n", - " min_samples_leaf=1, min_samples_split=2,\n", - " min_weight_fraction_leaf=0.0, n_estimators=41, n_jobs=-1,\n", - " oob_score=False, random_state=None, verbose=0, warm_start=False)" + "SVR(C=5191, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma='scale',\n", + " kernel='linear', max_iter=-1, shrinking=True, tol=0.001, verbose=False)" ] }, - "execution_count": 25, + "execution_count": 56, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "best_estimator" + "model = SVR(kernel= 'linear', C=5191, gamma='scale')\n", + "model.fit(X_train, y_train)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Loading test data" + ] + }, + { + "cell_type": "code", + "execution_count": 59, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(416, 24)\n" + ] + } + ], + "source": [ + "X_test_1 = pd.read_csv('data/dengue_features_test.csv')\n", + "print(X_test_1.shape)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## One by one prediction\n", + "* Given that we are making sequential predictions, i.e.: the prediction from a week relies on the prediction from the previous weeks, we must make the transformations and predictions one by one.\n", + "* Given that this kind of prediction is very prone to a snowball effect on errors our first solution had an error of 26. To solve this we came up with the idea of adding noise to the train data. However for this solution we need to know both: the mean of the error and its standard deviation (*std*). We already know the mean (MAE), we just need to know the *std*" ] }, { "cell_type": "code", - "execution_count": 32, + "execution_count": 172, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "[AdaBoostRegressor(base_estimator=DecisionTreeRegressor(criterion='mae', max_depth=6, max_features=None,\n", - " max_leaf_nodes=None, min_impurity_decrease=0.0,\n", - " min_impurity_split=None, min_samples_leaf=1,\n", - " min_samples_split=2, min_weight_fraction_leaf=0.0,\n", - " presort=False, random_state=None, splitter='best'),\n", - " learning_rate=1.0, loss='linear', n_estimators=41,\n", - " random_state=None), 2, 4, None, -7.412474155754652]" + "416" ] }, - "execution_count": 32, + "execution_count": 172, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "best_attempt" + "predictions=[]\n", + "for idx in range(X_test_1.shape[0]):\n", + " x = pipeline.transform(X_test_1.loc[idx:idx,:])\n", + " pred = model.predict(x)\n", + " pred = int(np.round(pred))\n", + " pipeline.named_steps['l_infected'].append_y(pred)\n", + " predictions.append(pred)\n", + "len(predictions)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Calculating an approximation of the *std*\n", + "* It is approximately 10.9. We can see that the MAE is close to the one calculated in the cross-validation." ] }, { "cell_type": "code", - "execution_count": 37, + "execution_count": 216, + "metadata": {}, + "outputs": [], + "source": [ + "%autoreload\n", + "from OurPipeline import create_pipeline\n", + "from sklearn.model_selection import ShuffleSplit\n", + "\n", + "pipeline = create_pipeline(attr, n_weeks=1, n_weeks_infected=3, pca=None)\n", + "X_train = pipeline.fit_transform(X_train_1, y_train)" + ] + }, + { + "cell_type": "code", + "execution_count": 217, + "metadata": {}, + "outputs": [], + "source": [ + "sp = ShuffleSplit(n_splits=1, train_size=1000, test_size=None, random_state=random_n)\n", + "for train, test in sp.split(X_train, y_train):\n", + " X_train_std = X_train[train]\n", + " y_train_std = y_train[train]\n", + " X_test_std = X_train[test]\n", + " y_test_std = y_train[test]\n", + "X_train_std.shape, y_train_std.shape\n", + "X_test_std.shape, y_test_std.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 222, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "[SVR(C=5191, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma='scale',\n", - " kernel='linear', max_iter=-1, shrinking=True, tol=0.001, verbose=False),\n", - " 1,\n", - " 3,\n", - " None,\n", - " -6.522347109745663]" + "SVR(C=5191, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma='scale',\n", + " kernel='linear', max_iter=-1, shrinking=True, tol=0.001, verbose=False)" ] }, - "execution_count": 37, + "execution_count": 222, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "best_attempt" + "model = SVR(kernel= 'linear', C=5191, gamma='scale')\n", + "model.fit(X_train_std, y_train_std)" ] }, { "cell_type": "code", - "execution_count": 38, + "execution_count": 234, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "[[SVR(C=5734, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma='scale',\n", - " kernel='linear', max_iter=-1, shrinking=True, tol=0.001, verbose=False),\n", - " 1,\n", - " 3,\n", - " PCA(copy=True, iterated_power='auto', n_components=0.95, random_state=None,\n", - " svd_solver='auto', tol=0.0, whiten=False),\n", - " -7.332691708334687],\n", - " [SVR(C=5191, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma='scale',\n", - " kernel='linear', max_iter=-1, shrinking=True, tol=0.001, verbose=False),\n", - " 1,\n", - " 3,\n", - " None,\n", - " -6.522347109745663],\n", - " [SVR(C=860, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma='scale',\n", - " kernel='linear', max_iter=-1, shrinking=True, tol=0.001, verbose=False),\n", - " 1,\n", - " 4,\n", - " PCA(copy=True, iterated_power='auto', n_components=0.95, random_state=None,\n", - " svd_solver='auto', tol=0.0, whiten=False),\n", - " -7.8651335759783985],\n", - " [SVR(C=860, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma='scale',\n", - " kernel='linear', max_iter=-1, shrinking=True, tol=0.001, verbose=False),\n", - " 1,\n", - " 4,\n", - " None,\n", - " -6.53493877991638]]" + "(6.7785087719298245, 10.959317651673116)" ] }, - "execution_count": 38, + "execution_count": 234, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "results" + "predictions = model.predict(X_test_std)\n", + "predictions = list(map(lambda x: int(np.round(x)), predictions))\n", + "errors = list(map(abs, predictions - y_test_std))\n", + "np.mean(errors), np.std(errors)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "# Predict" + "# One by one prediction with noise" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "%autoreload\n", + "from OurPipeline import create_pipeline\n", + "\n", + "pipeline = create_pipeline(attr, n_weeks=1, n_weeks_infected=3, add_noise=True, noise_mean=6.5, noise_std=6.5, pca=None)\n", + "X_train = pipeline.fit_transform(X_train_1, y_train)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "model = SVR(kernel= 'linear', C=5191, gamma='scale')\n", + "model.fit(X_train, y_train)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "predictions=[]\n", + "for idx in range(X_test_1.shape[0]):\n", + " x = pipeline.transform(X_test_1.loc[idx:idx,:])\n", + " pred = model.predict(x)\n", + " pred = int(np.round(pred))\n", + " pipeline.named_steps['l_infected'].append_y(pred)\n", + " predictions.append(pred)\n", + "len(predictions)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Submission" ] }, { @@ -738,7 +574,10 @@ "metadata": {}, "outputs": [], "source": [ - "model = SVR()" + "submit = pd.DataFrame(predictions, columns=['total_cases'])\n", + "x_3 = X_test_1.iloc[:,:3].copy()\n", + "submit = pd.concat([x_3, submit], axis=1)\n", + "submit.to_csv('data/submit.csv', index=False)" ] } ], diff --git a/utils/LastInfected.py b/utils/LastInfected.py index 1830dce..9854129 100644 --- a/utils/LastInfected.py +++ b/utils/LastInfected.py @@ -2,33 +2,50 @@ from collections import deque import numpy as np import pandas as pd +from random import gauss, choice class LastInfected(BaseEstimator, TransformerMixin): - def __init__(self, weeks=1, new_attributes_prefix='last_infected_', copy=True): + def __init__(self, weeks=1, new_attributes_prefix='last_infected_', add_noise=False, noise_mean=None, noise_std=None, copy=True): self.weeks=weeks self.new_attributes_prefix = new_attributes_prefix self.copy=copy dq = deque([0 for _ in range(weeks)]) self.last = {'sj': dq.copy(), 'iq': dq.copy()} + self.add_noise = add_noise + self.noise_mean = noise_mean + self.noise_std = noise_std + + self.first = True def fit(self, X, y): - self.y = y + self.y = y.to_list() return self def transform(self, X, model=None): if self.copy: X = X.copy() + X.reset_index(drop=True, inplace=True) + r = np.ndarray(shape=[X.shape[0], self.weeks]) - for idx, n_infected in enumerate(self.y): - city = X.loc[idx, 'city'] - r[idx] = self.last[city] - self.last[city].appendleft(n_infected) - self.last[city].pop() + for idx, x in X.iterrows(): + self.city = x['city'] + r[idx] = self.last[self.city] + if self.first: + self.append_y(self.y[idx]) r = pd.DataFrame(r, columns=[self.new_attributes_prefix + str(week) for week in range(self.weeks)]) X = pd.concat([X, r], axis=1) - return X \ No newline at end of file + self.first=False + + return X + + def append_y(self, new_y): + if self.add_noise: + noise = int(np.round(choice([-1,1]) * gauss(mu=self.noise_mean, sigma=self.noise_std))) + new_y += noise + self.last[self.city].appendleft(new_y) + self.last[self.city].pop() \ No newline at end of file diff --git a/utils/LastWeeks.py b/utils/LastWeeks.py index 0de33dd..8591f95 100644 --- a/utils/LastWeeks.py +++ b/utils/LastWeeks.py @@ -22,6 +22,8 @@ def transform(self, X): if self.copy: X = X.copy() + X.reset_index(drop=True, inplace=True) + r = np.ndarray(shape=[X.shape[0], self.weeks, len(self.attributes)]) for idx, week in X.iterrows(): From 04d191308ce84c9580792442988cdf4dfa16b1ae Mon Sep 17 00:00:00 2001 From: MLobo1997 Date: Fri, 19 Apr 2019 00:56:46 +0100 Subject: [PATCH 14/24] finding optimal SVR --- models.ipynb | 70 +++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 61 insertions(+), 9 deletions(-) diff --git a/models.ipynb b/models.ipynb index 0f971f8..cafc207 100644 --- a/models.ipynb +++ b/models.ipynb @@ -145,7 +145,7 @@ "outputs": [], "source": [ "k_folds=10\n", - "n_iter_search = 5\n", + "n_iter_search = 20\n", "C = sp_randint(0, 10000)\n", "params = {'kernel':['linear'], 'gamma':['scale'], 'C': C}\n", "SVR_optimizer = RandomizedSearchCV(estimator=SVR(), param_distributions=params, n_iter=n_iter_search, scoring=score_metric, n_jobs=jobs, cv=k_folds, verbose=verbose_level, random_state=random_n, return_train_score=True, iid=True)" @@ -380,7 +380,7 @@ }, { "cell_type": "code", - "execution_count": 59, + "execution_count": 13, "metadata": {}, "outputs": [ { @@ -524,7 +524,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 20, "metadata": {}, "outputs": [], "source": [ @@ -537,19 +537,64 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 11, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "SVR(C=5191, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma='scale',\n", + " kernel='linear', max_iter=-1, shrinking=True, tol=0.001, verbose=False)" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "SVR_optimizer.fit(X_train, y_train)\n", + "model=SVR_optimizer.best_estimator_\n", + "SVR_optimizer.best_score_" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "SVR(C=5191, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma='scale',\n", + " kernel='linear', max_iter=-1, shrinking=True, tol=0.001, verbose=False)" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "model = SVR(kernel= 'linear', C=5191, gamma='scale')\n", "model.fit(X_train, y_train)" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 15, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "416" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "predictions=[]\n", "for idx in range(X_test_1.shape[0]):\n", @@ -570,7 +615,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 17, "metadata": {}, "outputs": [], "source": [ @@ -579,6 +624,13 @@ "submit = pd.concat([x_3, submit], axis=1)\n", "submit.to_csv('data/submit.csv', index=False)" ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { From 31306cd5c4cc16bf7e853d36d718c4b84c1c8c95 Mon Sep 17 00:00:00 2001 From: MLobo1997 Date: Fri, 19 Apr 2019 00:59:54 +0100 Subject: [PATCH 15/24] tiny fix so that noise doesn't include negatives --- models.ipynb | 55 ++++++++----------------------------------- utils/LastInfected.py | 2 ++ 2 files changed, 12 insertions(+), 45 deletions(-) diff --git a/models.ipynb b/models.ipynb index cafc207..7644fbc 100644 --- a/models.ipynb +++ b/models.ipynb @@ -112,7 +112,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ @@ -140,7 +140,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ @@ -380,7 +380,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 5, "metadata": {}, "outputs": [ { @@ -524,7 +524,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 6, "metadata": {}, "outputs": [], "source": [ @@ -537,21 +537,9 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "SVR(C=5191, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma='scale',\n", - " kernel='linear', max_iter=-1, shrinking=True, tol=0.001, verbose=False)" - ] - }, - "execution_count": 11, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "SVR_optimizer.fit(X_train, y_train)\n", "model=SVR_optimizer.best_estimator_\n", @@ -560,41 +548,18 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "SVR(C=5191, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma='scale',\n", - " kernel='linear', max_iter=-1, shrinking=True, tol=0.001, verbose=False)" - ] - }, - "execution_count": 11, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "model.fit(X_train, y_train)" ] }, { "cell_type": "code", - "execution_count": 15, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "416" - ] - }, - "execution_count": 15, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "predictions=[]\n", "for idx in range(X_test_1.shape[0]):\n", diff --git a/utils/LastInfected.py b/utils/LastInfected.py index 9854129..332c59c 100644 --- a/utils/LastInfected.py +++ b/utils/LastInfected.py @@ -47,5 +47,7 @@ def append_y(self, new_y): if self.add_noise: noise = int(np.round(choice([-1,1]) * gauss(mu=self.noise_mean, sigma=self.noise_std))) new_y += noise + if new_y < 0: + new_y = 0 self.last[self.city].appendleft(new_y) self.last[self.city].pop() \ No newline at end of file From 3d7fb4344fa9059408cdaa3da67a598090cda6a3 Mon Sep 17 00:00:00 2001 From: MLobo1997 Date: Fri, 19 Apr 2019 10:11:11 +0100 Subject: [PATCH 16/24] after optimizing, again --- models.ipynb | 94 +++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 75 insertions(+), 19 deletions(-) diff --git a/models.ipynb b/models.ipynb index 7644fbc..5e774b7 100644 --- a/models.ipynb +++ b/models.ipynb @@ -170,7 +170,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 5, "metadata": {}, "outputs": [], "source": [ @@ -195,7 +195,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 6, "metadata": {}, "outputs": [], "source": [ @@ -216,7 +216,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 7, "metadata": {}, "outputs": [], "source": [ @@ -237,7 +237,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 8, "metadata": {}, "outputs": [], "source": [ @@ -258,18 +258,40 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 15, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Best score of 10.757898351648352 with the estimator DecisionTreeRegressor(criterion='mae', max_depth=7, max_features=None,\n", + " max_leaf_nodes=None, min_impurity_decrease=0.0,\n", + " min_impurity_split=None, min_samples_leaf=0.048518173584686866,\n", + " min_samples_split=0.08977730688967958,\n", + " min_weight_fraction_leaf=0.0, presort=False, random_state=None,\n", + " splitter='best')\n", + "1/4\t\n", + "Best score of 8.57545699492815 with the estimator RandomForestRegressor(bootstrap=True, criterion='mae', max_depth=5,\n", + " max_features='auto', max_leaf_nodes=None,\n", + " min_impurity_decrease=0.0, min_impurity_split=None,\n", + " min_samples_leaf=1, min_samples_split=2,\n", + " min_weight_fraction_leaf=0.0, n_estimators=26, n_jobs=-1,\n", + " oob_score=False, random_state=None, verbose=0, warm_start=False)\n", + "2/4\t3/4\t4/4\t" + ] + } + ], "source": [ "%autoreload\n", "from OurPipeline import create_pipeline\n", "from sklearn.decomposition import PCA\n", "\n", - "optimizers=[SVR_optimizer, Tree_optimizer, Forest_optimizer, AdaTree_optimizer, KNN_optimizer]\n", - "weeks = [1,2]\n", - "weeks_infected = [3,4]\n", - "pca = [PCA(0.95), None]\n", + "optimizers=[Tree_optimizer, Forest_optimizer, AdaTree_optimizer, KNN_optimizer]#, SVR_optimizer]\n", + "weeks = [1]\n", + "weeks_infected = [3]\n", + "pca = [None]\n", "\n", "n_total = len(optimizers) * len(weeks) * len(weeks_infected) * len(pca)\n", "\n", @@ -281,7 +303,7 @@ " for w in weeks:\n", " for wi in weeks_infected:\n", " for p in pca:\n", - " pipeline = create_pipeline(attr, n_weeks=w, n_weeks_infected=wi, estimator_optimizer=opt, pca=p)\n", + " pipeline = create_pipeline(attr, n_weeks=w, n_weeks_infected=wi, estimator_optimizer=opt, add_noise=True, noise_mean=6.5, noise_std=6.5, pca=None)\n", " pipeline.fit(X_train_1, y_train)\n", " score = pipeline.named_steps['est_opt'].best_score_\n", " best_estimator = pipeline.named_steps['est_opt'].best_estimator_\n", @@ -380,7 +402,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 10, "metadata": {}, "outputs": [ { @@ -524,7 +546,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 11, "metadata": {}, "outputs": [], "source": [ @@ -537,9 +559,20 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 12, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "-8.436222184081323" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "SVR_optimizer.fit(X_train, y_train)\n", "model=SVR_optimizer.best_estimator_\n", @@ -548,18 +581,41 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 13, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "SVR(C=1685, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma='scale',\n", + " kernel='linear', max_iter=-1, shrinking=True, tol=0.001, verbose=False)" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "model.fit(X_train, y_train)" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 14, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "416" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "predictions=[]\n", "for idx in range(X_test_1.shape[0]):\n", From 9e33ca23526a897651aa95fd8065ae03725ce17b Mon Sep 17 00:00:00 2001 From: MLobo1997 Date: Fri, 19 Apr 2019 10:44:26 +0100 Subject: [PATCH 17/24] tried disabling the double layer of noise --- models.ipynb | 1150 ++++++++++++++++++++++++++++++++++++++++- utils/LastInfected.py | 1 + 2 files changed, 1146 insertions(+), 5 deletions(-) diff --git a/models.ipynb b/models.ipynb index 5e774b7..4326211 100644 --- a/models.ipynb +++ b/models.ipynb @@ -541,12 +541,13 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# One by one prediction with noise" + "# One by one prediction with noise\n", + "* When dealing with the test data, the noise adding feature of the pipeline must be disabled, otherwise our predictions will be based on 2 layers of noise: our synthetic noise and the one created by the predictive model." ] }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 18, "metadata": {}, "outputs": [], "source": [ @@ -602,7 +603,20 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 30, + "metadata": {}, + "outputs": [], + "source": [ + "%autoreload\n", + "from OurPipeline import create_pipeline\n", + "\n", + "pipeline = create_pipeline(attr, n_weeks=1, n_weeks_infected=3, add_noise=False, pca=None)\n", + "X_train = pipeline.fit_transform(X_train_1, y_train)" + ] + }, + { + "cell_type": "code", + "execution_count": 31, "metadata": {}, "outputs": [ { @@ -611,7 +625,7 @@ "416" ] }, - "execution_count": 14, + "execution_count": 31, "metadata": {}, "output_type": "execute_result" } @@ -636,7 +650,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 33, "metadata": {}, "outputs": [], "source": [ @@ -646,6 +660,1132 @@ "submit.to_csv('data/submit.csv', index=False)" ] }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
cityyearweekofyeartotal_cases
0sj2008183
1sj2008192
2sj2008202
3sj2008213
4sj2008228
5sj2008238
6sj20082410
7sj20082518
8sj20082620
9sj20082725
10sj20082826
11sj20082931
12sj20083036
13sj20083138
14sj20083239
15sj20083335
16sj20083441
17sj20083546
18sj20083646
19sj20083747
20sj20083846
21sj20083942
22sj20084052
23sj20084152
24sj20084248
25sj20084349
26sj20084445
27sj20084544
28sj20084644
29sj20084746
...............
386iq2012486
387iq2012490
388iq2012500
389iq201251-2
390iq201311
391iq20132-2
392iq20133-2
393iq20134-1
394iq201351
395iq201364
396iq201370
397iq201384
398iq201391
399iq201310-1
400iq201311-1
401iq201312-2
402iq2013132
403iq2013147
404iq2013152
405iq2013163
406iq2013176
407iq2013182
408iq2013190
409iq2013203
410iq2013211
411iq2013223
412iq2013230
413iq201324-2
414iq201325-1
415iq201326-2
\n", + "

416 rows × 4 columns

\n", + "
" + ], + "text/plain": [ + " city year weekofyear total_cases\n", + "0 sj 2008 18 3\n", + "1 sj 2008 19 2\n", + "2 sj 2008 20 2\n", + "3 sj 2008 21 3\n", + "4 sj 2008 22 8\n", + "5 sj 2008 23 8\n", + "6 sj 2008 24 10\n", + "7 sj 2008 25 18\n", + "8 sj 2008 26 20\n", + "9 sj 2008 27 25\n", + "10 sj 2008 28 26\n", + "11 sj 2008 29 31\n", + "12 sj 2008 30 36\n", + "13 sj 2008 31 38\n", + "14 sj 2008 32 39\n", + "15 sj 2008 33 35\n", + "16 sj 2008 34 41\n", + "17 sj 2008 35 46\n", + "18 sj 2008 36 46\n", + "19 sj 2008 37 47\n", + "20 sj 2008 38 46\n", + "21 sj 2008 39 42\n", + "22 sj 2008 40 52\n", + "23 sj 2008 41 52\n", + "24 sj 2008 42 48\n", + "25 sj 2008 43 49\n", + "26 sj 2008 44 45\n", + "27 sj 2008 45 44\n", + "28 sj 2008 46 44\n", + "29 sj 2008 47 46\n", + ".. ... ... ... ...\n", + "386 iq 2012 48 6\n", + "387 iq 2012 49 0\n", + "388 iq 2012 50 0\n", + "389 iq 2012 51 -2\n", + "390 iq 2013 1 1\n", + "391 iq 2013 2 -2\n", + "392 iq 2013 3 -2\n", + "393 iq 2013 4 -1\n", + "394 iq 2013 5 1\n", + "395 iq 2013 6 4\n", + "396 iq 2013 7 0\n", + "397 iq 2013 8 4\n", + "398 iq 2013 9 1\n", + "399 iq 2013 10 -1\n", + "400 iq 2013 11 -1\n", + "401 iq 2013 12 -2\n", + "402 iq 2013 13 2\n", + "403 iq 2013 14 7\n", + "404 iq 2013 15 2\n", + "405 iq 2013 16 3\n", + "406 iq 2013 17 6\n", + "407 iq 2013 18 2\n", + "408 iq 2013 19 0\n", + "409 iq 2013 20 3\n", + "410 iq 2013 21 1\n", + "411 iq 2013 22 3\n", + "412 iq 2013 23 0\n", + "413 iq 2013 24 -2\n", + "414 iq 2013 25 -1\n", + "415 iq 2013 26 -2\n", + "\n", + "[416 rows x 4 columns]" + ] + }, + "execution_count": 32, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "submit" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ndvi_nendvi_nwndvi_sendvi_swprecipitation_amt_mmreanalysis_air_temp_kreanalysis_avg_temp_kreanalysis_dew_point_temp_kreanalysis_max_air_temp_kreanalysis_min_air_temp_k...last_weeks_0_reanalysis_specific_humidity_g_per_kglast_weeks_0_reanalysis_tdtr_klast_weeks_0_station_avg_temp_clast_weeks_0_station_diur_temp_rng_clast_weeks_0_station_max_temp_clast_weeks_0_station_min_temp_clast_weeks_0_station_precip_mmlast_infected_0last_infected_1last_infected_2
00.1226000.1037250.1984830.17761712.42297.572857297.742857292.414286299.8295.9...17.0871432.85714327.4000007.36428632.822.223.80.00.00.0
10.1699000.1421750.1623570.15548622.82298.211429298.442857293.951429300.9296.4...14.0128572.62857125.4428576.90000029.420.016.06.00.00.0
20.0322500.1729670.1572000.17084334.54298.781429298.878571295.434286300.5297.3...15.3728572.37142926.7142866.37142931.722.28.69.06.00.0
30.1286330.2450670.2275570.23588615.36298.987143299.228571295.310000301.4297.0...16.8485712.30000026.7142866.48571432.222.841.45.09.06.0
40.1962000.2622000.2512000.2473407.52299.518571299.664286295.821429301.9297.5...16.6728572.42857127.4714296.77142933.323.34.00.05.09.0
50.1962000.1748500.2543140.1817439.58299.630000299.764286295.851429302.4298.1...17.2100003.01428628.9428579.37142935.023.95.87.00.05.0
60.1129000.0928000.2050710.2102713.48299.207143299.221429295.865714301.3297.7...17.2128572.10000028.1142866.94285734.423.939.13.07.00.0
70.0725000.0725000.1514710.133029151.12299.591429299.528571296.531429300.6298.4...17.2342862.04285727.4142866.77142932.223.329.70.03.07.0
80.1024500.1461750.1255710.12360019.32299.578571299.557143296.378571302.1297.7...17.9771431.57142928.3714297.68571433.922.821.11.00.03.0
90.1024500.1215500.1606830.20256714.41300.154286300.278571296.651429302.3298.7...17.7900001.88571428.3285717.38571433.922.821.12.01.00.0
100.1928750.0823500.1919430.15292922.27299.512857299.592857296.041429301.8298.0...18.0714292.01428628.3285716.51428633.924.41.17.02.01.0
110.2916000.2118000.3012000.28066759.17299.667143299.750000296.334286302.0297.3...17.4185712.15714327.5571437.15714331.721.763.70.07.02.0
120.1505670.1717000.2269000.21455716.48299.558571299.635714295.960000301.8297.1...17.7371432.41428628.1285716.90000032.823.912.27.00.07.0
130.1505670.2471500.3797000.38135732.66299.862857299.950000296.172857303.0298.3...17.3414292.07142928.1142866.35714331.722.832.60.07.00.0
140.1505670.0643330.1644430.13885728.80300.391429300.478571296.532857302.5298.8...17.5942862.58571428.2428578.08571434.422.837.611.00.07.0
\n", + "

15 rows × 43 columns

\n", + "
" + ], + "text/plain": [ + " ndvi_ne ndvi_nw ndvi_se ndvi_sw precipitation_amt_mm \\\n", + "0 0.122600 0.103725 0.198483 0.177617 12.42 \n", + "1 0.169900 0.142175 0.162357 0.155486 22.82 \n", + "2 0.032250 0.172967 0.157200 0.170843 34.54 \n", + "3 0.128633 0.245067 0.227557 0.235886 15.36 \n", + "4 0.196200 0.262200 0.251200 0.247340 7.52 \n", + "5 0.196200 0.174850 0.254314 0.181743 9.58 \n", + "6 0.112900 0.092800 0.205071 0.210271 3.48 \n", + "7 0.072500 0.072500 0.151471 0.133029 151.12 \n", + "8 0.102450 0.146175 0.125571 0.123600 19.32 \n", + "9 0.102450 0.121550 0.160683 0.202567 14.41 \n", + "10 0.192875 0.082350 0.191943 0.152929 22.27 \n", + "11 0.291600 0.211800 0.301200 0.280667 59.17 \n", + "12 0.150567 0.171700 0.226900 0.214557 16.48 \n", + "13 0.150567 0.247150 0.379700 0.381357 32.66 \n", + "14 0.150567 0.064333 0.164443 0.138857 28.80 \n", + "\n", + " reanalysis_air_temp_k reanalysis_avg_temp_k reanalysis_dew_point_temp_k \\\n", + "0 297.572857 297.742857 292.414286 \n", + "1 298.211429 298.442857 293.951429 \n", + "2 298.781429 298.878571 295.434286 \n", + "3 298.987143 299.228571 295.310000 \n", + "4 299.518571 299.664286 295.821429 \n", + "5 299.630000 299.764286 295.851429 \n", + "6 299.207143 299.221429 295.865714 \n", + "7 299.591429 299.528571 296.531429 \n", + "8 299.578571 299.557143 296.378571 \n", + "9 300.154286 300.278571 296.651429 \n", + "10 299.512857 299.592857 296.041429 \n", + "11 299.667143 299.750000 296.334286 \n", + "12 299.558571 299.635714 295.960000 \n", + "13 299.862857 299.950000 296.172857 \n", + "14 300.391429 300.478571 296.532857 \n", + "\n", + " reanalysis_max_air_temp_k reanalysis_min_air_temp_k ... \\\n", + "0 299.8 295.9 ... \n", + "1 300.9 296.4 ... \n", + "2 300.5 297.3 ... \n", + "3 301.4 297.0 ... \n", + "4 301.9 297.5 ... \n", + "5 302.4 298.1 ... \n", + "6 301.3 297.7 ... \n", + "7 300.6 298.4 ... \n", + "8 302.1 297.7 ... \n", + "9 302.3 298.7 ... \n", + "10 301.8 298.0 ... \n", + "11 302.0 297.3 ... \n", + "12 301.8 297.1 ... \n", + "13 303.0 298.3 ... \n", + "14 302.5 298.8 ... \n", + "\n", + " last_weeks_0_reanalysis_specific_humidity_g_per_kg \\\n", + "0 17.087143 \n", + "1 14.012857 \n", + "2 15.372857 \n", + "3 16.848571 \n", + "4 16.672857 \n", + "5 17.210000 \n", + "6 17.212857 \n", + "7 17.234286 \n", + "8 17.977143 \n", + "9 17.790000 \n", + "10 18.071429 \n", + "11 17.418571 \n", + "12 17.737143 \n", + "13 17.341429 \n", + "14 17.594286 \n", + "\n", + " last_weeks_0_reanalysis_tdtr_k last_weeks_0_station_avg_temp_c \\\n", + "0 2.857143 27.400000 \n", + "1 2.628571 25.442857 \n", + "2 2.371429 26.714286 \n", + "3 2.300000 26.714286 \n", + "4 2.428571 27.471429 \n", + "5 3.014286 28.942857 \n", + "6 2.100000 28.114286 \n", + "7 2.042857 27.414286 \n", + "8 1.571429 28.371429 \n", + "9 1.885714 28.328571 \n", + "10 2.014286 28.328571 \n", + "11 2.157143 27.557143 \n", + "12 2.414286 28.128571 \n", + "13 2.071429 28.114286 \n", + "14 2.585714 28.242857 \n", + "\n", + " last_weeks_0_station_diur_temp_rng_c last_weeks_0_station_max_temp_c \\\n", + "0 7.364286 32.8 \n", + "1 6.900000 29.4 \n", + "2 6.371429 31.7 \n", + "3 6.485714 32.2 \n", + "4 6.771429 33.3 \n", + "5 9.371429 35.0 \n", + "6 6.942857 34.4 \n", + "7 6.771429 32.2 \n", + "8 7.685714 33.9 \n", + "9 7.385714 33.9 \n", + "10 6.514286 33.9 \n", + "11 7.157143 31.7 \n", + "12 6.900000 32.8 \n", + "13 6.357143 31.7 \n", + "14 8.085714 34.4 \n", + "\n", + " last_weeks_0_station_min_temp_c last_weeks_0_station_precip_mm \\\n", + "0 22.2 23.8 \n", + "1 20.0 16.0 \n", + "2 22.2 8.6 \n", + "3 22.8 41.4 \n", + "4 23.3 4.0 \n", + "5 23.9 5.8 \n", + "6 23.9 39.1 \n", + "7 23.3 29.7 \n", + "8 22.8 21.1 \n", + "9 22.8 21.1 \n", + "10 24.4 1.1 \n", + "11 21.7 63.7 \n", + "12 23.9 12.2 \n", + "13 22.8 32.6 \n", + "14 22.8 37.6 \n", + "\n", + " last_infected_0 last_infected_1 last_infected_2 \n", + "0 0.0 0.0 0.0 \n", + "1 6.0 0.0 0.0 \n", + "2 9.0 6.0 0.0 \n", + "3 5.0 9.0 6.0 \n", + "4 0.0 5.0 9.0 \n", + "5 7.0 0.0 5.0 \n", + "6 3.0 7.0 0.0 \n", + "7 0.0 3.0 7.0 \n", + "8 1.0 0.0 3.0 \n", + "9 2.0 1.0 0.0 \n", + "10 7.0 2.0 1.0 \n", + "11 0.0 7.0 2.0 \n", + "12 7.0 0.0 7.0 \n", + "13 0.0 7.0 0.0 \n", + "14 11.0 0.0 7.0 \n", + "\n", + "[15 rows x 43 columns]" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "X_train.head(15)" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "SVR(C=1685, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma='scale',\n", + " kernel='linear', max_iter=-1, shrinking=True, tol=0.001, verbose=False)" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model" + ] + }, { "cell_type": "code", "execution_count": null, diff --git a/utils/LastInfected.py b/utils/LastInfected.py index 332c59c..41ab05f 100644 --- a/utils/LastInfected.py +++ b/utils/LastInfected.py @@ -47,6 +47,7 @@ def append_y(self, new_y): if self.add_noise: noise = int(np.round(choice([-1,1]) * gauss(mu=self.noise_mean, sigma=self.noise_std))) new_y += noise + print('With noise {}!!'.format(new_y)) if new_y < 0: new_y = 0 self.last[self.city].appendleft(new_y) From 75624bfa59f6275881593316027189e522d2c366 Mon Sep 17 00:00:00 2001 From: MLobo1997 Date: Fri, 19 Apr 2019 13:03:03 +0100 Subject: [PATCH 18/24] made the noise less agressive --- OurPipeline.py | 2 +- models.ipynb | 1355 ++++++++++------------------------------- utils/LastInfected.py | 5 +- 3 files changed, 326 insertions(+), 1036 deletions(-) diff --git a/OurPipeline.py b/OurPipeline.py index 40654cd..8a1d1dd 100644 --- a/OurPipeline.py +++ b/OurPipeline.py @@ -13,7 +13,7 @@ def create_pipeline(attr, n_weeks, n_weeks_infected, estimator_optimizer=None, p ('l_weeks', LastWeeks(attributes=attr[n_non_train:], weeks=n_weeks)), ('l_infected', LastInfected(weeks=n_weeks_infected, add_noise=add_noise, noise_mean=noise_mean, noise_std=noise_std)), ('dataframe_dropper', DataFrameDropper(attribute_names=attr[:n_non_train])), - ('scaler', StandardScaler()), + #('scaler', StandardScaler()), ('pca', pca), ('est_opt', estimator_optimizer), ] diff --git a/models.ipynb b/models.ipynb index 4326211..011b2e3 100644 --- a/models.ipynb +++ b/models.ipynb @@ -537,62 +537,92 @@ "np.mean(errors), np.std(errors)" ] }, + { + "cell_type": "code", + "execution_count": 124, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(6.49747, 4.943615438027113)" + ] + }, + "execution_count": 124, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from random import choice, gauss\n", + "r=[]\n", + "for _ in range(100000):\n", + " r.append(int(np.round(choice([-1,1]) * gauss(mu=0, sigma=8.2))))\n", + "r=np.abs(r)\n", + "np.mean(r), np.std(r)" + ] + }, { "cell_type": "markdown", "metadata": {}, "source": [ "# One by one prediction with noise\n", - "* When dealing with the test data, the noise adding feature of the pipeline must be disabled, otherwise our predictions will be based on 2 layers of noise: our synthetic noise and the one created by the predictive model." + "* When dealing with the test data, the noise adding feature of the pipeline must be disabled, otherwise our predictions will be based on 2 layers of noise: our synthetic noise and the one created by the predictive model.\n", + "* A very likely guess is that the errors when y is low is much smaller than when y is high." ] }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 125, "metadata": {}, "outputs": [], "source": [ "%autoreload\n", "from OurPipeline import create_pipeline\n", "\n", - "pipeline = create_pipeline(attr, n_weeks=1, n_weeks_infected=3, add_noise=True, noise_mean=6.5, noise_std=6.5, pca=None)\n", + "pipeline = create_pipeline(attr, n_weeks=1, n_weeks_infected=3, add_noise=True, noise_mean=0, noise_std=8.2, pca=None)\n", "X_train = pipeline.fit_transform(X_train_1, y_train)" ] }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 121, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "-8.436222184081323" + "-8.57545699492815" ] }, - "execution_count": 12, + "execution_count": 121, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "SVR_optimizer.fit(X_train, y_train)\n", - "model=SVR_optimizer.best_estimator_\n", - "SVR_optimizer.best_score_" + "#Forest_optimizer.fit(X_train, y_train)\n", + "model=Forest_optimizer.best_estimator_\n", + "Forest_optimizer.best_score_" ] }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 127, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "SVR(C=1685, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma='scale',\n", - " kernel='linear', max_iter=-1, shrinking=True, tol=0.001, verbose=False)" + "RandomForestRegressor(bootstrap=True, criterion='mae', max_depth=5,\n", + " max_features='auto', max_leaf_nodes=None,\n", + " min_impurity_decrease=0.0, min_impurity_split=None,\n", + " min_samples_leaf=1, min_samples_split=2,\n", + " min_weight_fraction_leaf=0.0, n_estimators=26, n_jobs=-1,\n", + " oob_score=False, random_state=None, verbose=0, warm_start=False)" ] }, - "execution_count": 13, + "execution_count": 127, "metadata": {}, "output_type": "execute_result" } @@ -603,20 +633,7 @@ }, { "cell_type": "code", - "execution_count": 30, - "metadata": {}, - "outputs": [], - "source": [ - "%autoreload\n", - "from OurPipeline import create_pipeline\n", - "\n", - "pipeline = create_pipeline(attr, n_weeks=1, n_weeks_infected=3, add_noise=False, pca=None)\n", - "X_train = pipeline.fit_transform(X_train_1, y_train)" - ] - }, - { - "cell_type": "code", - "execution_count": 31, + "execution_count": 141, "metadata": {}, "outputs": [ { @@ -625,15 +642,23 @@ "416" ] }, - "execution_count": 31, + "execution_count": 141, "metadata": {}, "output_type": "execute_result" } ], "source": [ + "%autoreload\n", + "from OurPipeline import create_pipeline\n", + "\n", + "pipeline = create_pipeline(attr, n_weeks=1, n_weeks_infected=3, add_noise=False, pca=None)\n", + "pipeline.fit_transform(X_train_1, y_train)\n", + "\n", + "X_test_f = pd.DataFrame([], columns=attr[4:])\n", "predictions=[]\n", "for idx in range(X_test_1.shape[0]):\n", " x = pipeline.transform(X_test_1.loc[idx:idx,:])\n", + " X_test_f = X_test_f.append(x, sort=False, ignore_index=True)\n", " pred = model.predict(x)\n", " pred = int(np.round(pred))\n", " pipeline.named_steps['l_infected'].append_y(pred)\n", @@ -650,7 +675,7 @@ }, { "cell_type": "code", - "execution_count": 33, + "execution_count": 148, "metadata": {}, "outputs": [], "source": [ @@ -662,548 +687,7 @@ }, { "cell_type": "code", - "execution_count": 32, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
cityyearweekofyeartotal_cases
0sj2008183
1sj2008192
2sj2008202
3sj2008213
4sj2008228
5sj2008238
6sj20082410
7sj20082518
8sj20082620
9sj20082725
10sj20082826
11sj20082931
12sj20083036
13sj20083138
14sj20083239
15sj20083335
16sj20083441
17sj20083546
18sj20083646
19sj20083747
20sj20083846
21sj20083942
22sj20084052
23sj20084152
24sj20084248
25sj20084349
26sj20084445
27sj20084544
28sj20084644
29sj20084746
...............
386iq2012486
387iq2012490
388iq2012500
389iq201251-2
390iq201311
391iq20132-2
392iq20133-2
393iq20134-1
394iq201351
395iq201364
396iq201370
397iq201384
398iq201391
399iq201310-1
400iq201311-1
401iq201312-2
402iq2013132
403iq2013147
404iq2013152
405iq2013163
406iq2013176
407iq2013182
408iq2013190
409iq2013203
410iq2013211
411iq2013223
412iq2013230
413iq201324-2
414iq201325-1
415iq201326-2
\n", - "

416 rows × 4 columns

\n", - "
" - ], - "text/plain": [ - " city year weekofyear total_cases\n", - "0 sj 2008 18 3\n", - "1 sj 2008 19 2\n", - "2 sj 2008 20 2\n", - "3 sj 2008 21 3\n", - "4 sj 2008 22 8\n", - "5 sj 2008 23 8\n", - "6 sj 2008 24 10\n", - "7 sj 2008 25 18\n", - "8 sj 2008 26 20\n", - "9 sj 2008 27 25\n", - "10 sj 2008 28 26\n", - "11 sj 2008 29 31\n", - "12 sj 2008 30 36\n", - "13 sj 2008 31 38\n", - "14 sj 2008 32 39\n", - "15 sj 2008 33 35\n", - "16 sj 2008 34 41\n", - "17 sj 2008 35 46\n", - "18 sj 2008 36 46\n", - "19 sj 2008 37 47\n", - "20 sj 2008 38 46\n", - "21 sj 2008 39 42\n", - "22 sj 2008 40 52\n", - "23 sj 2008 41 52\n", - "24 sj 2008 42 48\n", - "25 sj 2008 43 49\n", - "26 sj 2008 44 45\n", - "27 sj 2008 45 44\n", - "28 sj 2008 46 44\n", - "29 sj 2008 47 46\n", - ".. ... ... ... ...\n", - "386 iq 2012 48 6\n", - "387 iq 2012 49 0\n", - "388 iq 2012 50 0\n", - "389 iq 2012 51 -2\n", - "390 iq 2013 1 1\n", - "391 iq 2013 2 -2\n", - "392 iq 2013 3 -2\n", - "393 iq 2013 4 -1\n", - "394 iq 2013 5 1\n", - "395 iq 2013 6 4\n", - "396 iq 2013 7 0\n", - "397 iq 2013 8 4\n", - "398 iq 2013 9 1\n", - "399 iq 2013 10 -1\n", - "400 iq 2013 11 -1\n", - "401 iq 2013 12 -2\n", - "402 iq 2013 13 2\n", - "403 iq 2013 14 7\n", - "404 iq 2013 15 2\n", - "405 iq 2013 16 3\n", - "406 iq 2013 17 6\n", - "407 iq 2013 18 2\n", - "408 iq 2013 19 0\n", - "409 iq 2013 20 3\n", - "410 iq 2013 21 1\n", - "411 iq 2013 22 3\n", - "412 iq 2013 23 0\n", - "413 iq 2013 24 -2\n", - "414 iq 2013 25 -1\n", - "415 iq 2013 26 -2\n", - "\n", - "[416 rows x 4 columns]" - ] - }, - "execution_count": 32, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "submit" - ] - }, - { - "cell_type": "code", - "execution_count": 19, + "execution_count": 149, "metadata": {}, "outputs": [ { @@ -1238,7 +722,6 @@ " reanalysis_max_air_temp_k\n", " reanalysis_min_air_temp_k\n", " ...\n", - " last_weeks_0_reanalysis_specific_humidity_g_per_kg\n", " last_weeks_0_reanalysis_tdtr_k\n", " last_weeks_0_station_avg_temp_c\n", " last_weeks_0_station_diur_temp_rng_c\n", @@ -1248,542 +731,350 @@ " last_infected_0\n", " last_infected_1\n", " last_infected_2\n", + " pred\n", " \n", " \n", " \n", " \n", " 0\n", - " 0.122600\n", - " 0.103725\n", - " 0.198483\n", - " 0.177617\n", - " 12.42\n", - " 297.572857\n", - " 297.742857\n", - " 292.414286\n", - " 299.8\n", - " 295.9\n", + " -0.018900\n", + " -0.018900\n", + " 0.102729\n", + " 0.091200\n", + " 78.60\n", + " 298.492857\n", + " 298.550000\n", + " 294.527143\n", + " 301.1\n", + " 296.4\n", " ...\n", - " 17.087143\n", - " 2.857143\n", - " 27.400000\n", - " 7.364286\n", - " 32.8\n", - " 22.2\n", - " 23.8\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", + " 3.957143\n", + " 27.042857\n", + " 7.514286\n", + " 31.7\n", + " 23.3\n", + " 0.3\n", + " 5.0\n", + " 3.0\n", + " 1.0\n", + " 6\n", " \n", " \n", " 1\n", - " 0.169900\n", - " 0.142175\n", - " 0.162357\n", - " 0.155486\n", - " 22.82\n", - " 298.211429\n", - " 298.442857\n", - " 293.951429\n", - " 300.9\n", - " 296.4\n", + " -0.018000\n", + " -0.012400\n", + " 0.082043\n", + " 0.072314\n", + " 12.56\n", + " 298.475714\n", + " 298.557143\n", + " 294.395714\n", + " 300.8\n", + " 296.7\n", " ...\n", - " 14.012857\n", - " 2.628571\n", - " 25.442857\n", - " 6.900000\n", - " 29.4\n", - " 20.0\n", - " 16.0\n", + " 3.128571\n", + " 26.528571\n", + " 7.057143\n", + " 33.3\n", + " 21.7\n", + " 75.2\n", " 6.0\n", - " 0.0\n", - " 0.0\n", + " 5.0\n", + " 3.0\n", + " 7\n", " \n", " \n", " 2\n", - " 0.032250\n", - " 0.172967\n", - " 0.157200\n", - " 0.170843\n", - " 34.54\n", - " 298.781429\n", - " 298.878571\n", - " 295.434286\n", - " 300.5\n", - " 297.3\n", + " -0.001500\n", + " -0.012400\n", + " 0.151083\n", + " 0.091529\n", + " 3.66\n", + " 299.455714\n", + " 299.357143\n", + " 295.308571\n", + " 302.2\n", + " 296.4\n", " ...\n", - " 15.372857\n", - " 2.371429\n", - " 26.714286\n", - " 6.371429\n", - " 31.7\n", + " 2.571429\n", + " 26.071429\n", + " 5.557143\n", + " 30.0\n", " 22.2\n", - " 8.6\n", - " 9.0\n", + " 34.3\n", + " 7.0\n", " 6.0\n", - " 0.0\n", + " 5.0\n", + " 6\n", " \n", " \n", " 3\n", - " 0.128633\n", - " 0.245067\n", - " 0.227557\n", - " 0.235886\n", - " 15.36\n", - " 298.987143\n", - " 299.228571\n", - " 295.310000\n", - " 301.4\n", - " 297.0\n", + " -0.001500\n", + " -0.019867\n", + " 0.124329\n", + " 0.125686\n", + " 0.00\n", + " 299.690000\n", + " 299.728571\n", + " 294.402857\n", + " 303.0\n", + " 296.9\n", " ...\n", - " 16.848571\n", - " 2.300000\n", - " 26.714286\n", - " 6.485714\n", - " 32.2\n", + " 4.428571\n", + " 27.928571\n", + " 7.785714\n", + " 32.8\n", " 22.8\n", - " 41.4\n", - " 5.0\n", - " 9.0\n", + " 3.0\n", + " 6.0\n", + " 7.0\n", " 6.0\n", + " 6\n", " \n", " \n", " 4\n", - " 0.196200\n", - " 0.262200\n", - " 0.251200\n", - " 0.247340\n", - " 7.52\n", - " 299.518571\n", - " 299.664286\n", - " 295.821429\n", - " 301.9\n", - " 297.5\n", + " 0.056800\n", + " 0.039833\n", + " 0.062267\n", + " 0.075914\n", + " 0.76\n", + " 299.780000\n", + " 299.671429\n", + " 294.760000\n", + " 302.3\n", + " 297.3\n", " ...\n", - " 16.672857\n", - " 2.428571\n", - " 27.471429\n", - " 6.771429\n", + " 4.342857\n", + " 28.057143\n", + " 6.271429\n", " 33.3\n", - " 23.3\n", - " 4.0\n", - " 0.0\n", - " 5.0\n", - " 9.0\n", + " 24.4\n", + " 0.3\n", + " 6.0\n", + " 6.0\n", + " 7.0\n", + " 6\n", " \n", " \n", " 5\n", - " 0.196200\n", - " 0.174850\n", - " 0.254314\n", - " 0.181743\n", - " 9.58\n", - " 299.630000\n", - " 299.764286\n", - " 295.851429\n", - " 302.4\n", - " 298.1\n", + " -0.044000\n", + " -0.030467\n", + " 0.132000\n", + " 0.083529\n", + " 71.17\n", + " 299.768571\n", + " 299.728571\n", + " 295.314286\n", + " 301.9\n", + " 297.6\n", " ...\n", - " 17.210000\n", - " 3.014286\n", - " 28.942857\n", - " 9.371429\n", - " 35.0\n", - " 23.9\n", - " 5.8\n", - " 7.0\n", - " 0.0\n", - " 5.0\n", + " 3.542857\n", + " 27.614286\n", + " 7.085714\n", + " 33.3\n", + " 23.3\n", + " 84.1\n", + " 6.0\n", + " 6.0\n", + " 6.0\n", + " 7\n", " \n", " \n", " 6\n", - " 0.112900\n", - " 0.092800\n", - " 0.205071\n", - " 0.210271\n", - " 3.48\n", - " 299.207143\n", - " 299.221429\n", - " 295.865714\n", - " 301.3\n", - " 297.7\n", + " -0.044300\n", + " -0.024925\n", + " 0.132271\n", + " 0.159157\n", + " 48.99\n", + " 300.062857\n", + " 300.007143\n", + " 295.650000\n", + " 302.4\n", + " 297.5\n", " ...\n", - " 17.212857\n", - " 2.100000\n", - " 28.114286\n", - " 6.942857\n", - " 34.4\n", - " 23.9\n", - " 39.1\n", - " 3.0\n", + " 2.857143\n", + " 28.000000\n", + " 5.171429\n", + " 32.8\n", + " 25.0\n", + " 27.7\n", " 7.0\n", - " 0.0\n", + " 6.0\n", + " 6.0\n", + " 7\n", " \n", " \n", " 7\n", - " 0.072500\n", - " 0.072500\n", - " 0.151471\n", - " 0.133029\n", - " 151.12\n", - " 299.591429\n", - " 299.528571\n", - " 296.531429\n", - " 300.6\n", - " 298.4\n", + " -0.044300\n", + " 0.082150\n", + " 0.144371\n", + " 0.116729\n", + " 30.81\n", + " 300.484286\n", + " 300.578571\n", + " 295.997143\n", + " 303.5\n", + " 297.5\n", " ...\n", - " 17.234286\n", - " 2.042857\n", - " 27.414286\n", - " 6.771429\n", - " 32.2\n", + " 3.157143\n", + " 27.400000\n", + " 6.042857\n", + " 31.1\n", " 23.3\n", - " 29.7\n", - " 0.0\n", - " 3.0\n", + " 91.7\n", " 7.0\n", + " 7.0\n", + " 6.0\n", + " 6\n", " \n", " \n", " 8\n", - " 0.102450\n", - " 0.146175\n", - " 0.125571\n", - " 0.123600\n", - " 19.32\n", - " 299.578571\n", - " 299.557143\n", - " 296.378571\n", - " 302.1\n", - " 297.7\n", - " ...\n", - " 17.977143\n", - " 1.571429\n", - " 28.371429\n", - " 7.685714\n", - " 33.9\n", - " 22.8\n", - " 21.1\n", - " 1.0\n", - " 0.0\n", - " 3.0\n", - " \n", - " \n", - " 9\n", - " 0.102450\n", - " 0.121550\n", - " 0.160683\n", - " 0.202567\n", - " 14.41\n", - " 300.154286\n", - " 300.278571\n", - " 296.651429\n", - " 302.3\n", - " 298.7\n", - " ...\n", - " 17.790000\n", - " 1.885714\n", - " 28.328571\n", - " 7.385714\n", - " 33.9\n", - " 22.8\n", - " 21.1\n", - " 2.0\n", - " 1.0\n", - " 0.0\n", - " \n", - " \n", - " 10\n", - " 0.192875\n", - " 0.082350\n", - " 0.191943\n", - " 0.152929\n", - " 22.27\n", - " 299.512857\n", - " 299.592857\n", - " 296.041429\n", - " 301.8\n", - " 298.0\n", + " 0.010800\n", + " 0.049900\n", + " 0.100571\n", + " 0.117329\n", + " 8.02\n", + " 300.601429\n", + " 300.621429\n", + " 296.268571\n", + " 302.5\n", + " 298.5\n", " ...\n", - " 18.071429\n", - " 2.014286\n", - " 28.328571\n", - " 6.514286\n", - " 33.9\n", + " 3.900000\n", + " 28.757143\n", + " 6.985714\n", + " 34.4\n", " 24.4\n", - " 1.1\n", + " 0.3\n", + " 6.0\n", " 7.0\n", - " 2.0\n", - " 1.0\n", - " \n", - " \n", - " 11\n", - " 0.291600\n", - " 0.211800\n", - " 0.301200\n", - " 0.280667\n", - " 59.17\n", - " 299.667143\n", - " 299.750000\n", - " 296.334286\n", - " 302.0\n", - " 297.3\n", - " ...\n", - " 17.418571\n", - " 2.157143\n", - " 27.557143\n", - " 7.157143\n", - " 31.7\n", - " 21.7\n", - " 63.7\n", - " 0.0\n", " 7.0\n", - " 2.0\n", + " 6\n", " \n", " \n", - " 12\n", - " 0.150567\n", - " 0.171700\n", - " 0.226900\n", - " 0.214557\n", - " 16.48\n", - " 299.558571\n", - " 299.635714\n", - " 295.960000\n", - " 301.8\n", - " 297.1\n", + " 9\n", + " 0.072667\n", + " 0.106660\n", + " 0.155429\n", + " 0.164900\n", + " 17.52\n", + " 300.497143\n", + " 300.528571\n", + " 296.411429\n", + " 302.3\n", + " 298.7\n", " ...\n", - " 17.737143\n", - " 2.414286\n", - " 28.128571\n", - " 6.900000\n", + " 2.785714\n", + " 28.657143\n", + " 6.242857\n", " 32.8\n", " 23.9\n", - " 12.2\n", - " 7.0\n", - " 0.0\n", - " 7.0\n", - " \n", - " \n", - " 13\n", - " 0.150567\n", - " 0.247150\n", - " 0.379700\n", - " 0.381357\n", - " 32.66\n", - " 299.862857\n", - " 299.950000\n", - " 296.172857\n", - " 303.0\n", - " 298.3\n", - " ...\n", - " 17.341429\n", - " 2.071429\n", - " 28.114286\n", - " 6.357143\n", - " 31.7\n", - " 22.8\n", - " 32.6\n", - " 0.0\n", - " 7.0\n", - " 0.0\n", - " \n", - " \n", - " 14\n", - " 0.150567\n", - " 0.064333\n", - " 0.164443\n", - " 0.138857\n", - " 28.80\n", - " 300.391429\n", - " 300.478571\n", - " 296.532857\n", - " 302.5\n", - " 298.8\n", - " ...\n", - " 17.594286\n", - " 2.585714\n", - " 28.242857\n", - " 8.085714\n", - " 34.4\n", - " 22.8\n", - " 37.6\n", - " 11.0\n", - " 0.0\n", + " 28.7\n", + " 6.0\n", + " 6.0\n", " 7.0\n", + " 7\n", " \n", " \n", "\n", - "

15 rows × 43 columns

\n", + "

10 rows × 44 columns

\n", "" ], "text/plain": [ - " ndvi_ne ndvi_nw ndvi_se ndvi_sw precipitation_amt_mm \\\n", - "0 0.122600 0.103725 0.198483 0.177617 12.42 \n", - "1 0.169900 0.142175 0.162357 0.155486 22.82 \n", - "2 0.032250 0.172967 0.157200 0.170843 34.54 \n", - "3 0.128633 0.245067 0.227557 0.235886 15.36 \n", - "4 0.196200 0.262200 0.251200 0.247340 7.52 \n", - "5 0.196200 0.174850 0.254314 0.181743 9.58 \n", - "6 0.112900 0.092800 0.205071 0.210271 3.48 \n", - "7 0.072500 0.072500 0.151471 0.133029 151.12 \n", - "8 0.102450 0.146175 0.125571 0.123600 19.32 \n", - "9 0.102450 0.121550 0.160683 0.202567 14.41 \n", - "10 0.192875 0.082350 0.191943 0.152929 22.27 \n", - "11 0.291600 0.211800 0.301200 0.280667 59.17 \n", - "12 0.150567 0.171700 0.226900 0.214557 16.48 \n", - "13 0.150567 0.247150 0.379700 0.381357 32.66 \n", - "14 0.150567 0.064333 0.164443 0.138857 28.80 \n", + " ndvi_ne ndvi_nw ndvi_se ndvi_sw precipitation_amt_mm \\\n", + "0 -0.018900 -0.018900 0.102729 0.091200 78.60 \n", + "1 -0.018000 -0.012400 0.082043 0.072314 12.56 \n", + "2 -0.001500 -0.012400 0.151083 0.091529 3.66 \n", + "3 -0.001500 -0.019867 0.124329 0.125686 0.00 \n", + "4 0.056800 0.039833 0.062267 0.075914 0.76 \n", + "5 -0.044000 -0.030467 0.132000 0.083529 71.17 \n", + "6 -0.044300 -0.024925 0.132271 0.159157 48.99 \n", + "7 -0.044300 0.082150 0.144371 0.116729 30.81 \n", + "8 0.010800 0.049900 0.100571 0.117329 8.02 \n", + "9 0.072667 0.106660 0.155429 0.164900 17.52 \n", "\n", - " reanalysis_air_temp_k reanalysis_avg_temp_k reanalysis_dew_point_temp_k \\\n", - "0 297.572857 297.742857 292.414286 \n", - "1 298.211429 298.442857 293.951429 \n", - "2 298.781429 298.878571 295.434286 \n", - "3 298.987143 299.228571 295.310000 \n", - "4 299.518571 299.664286 295.821429 \n", - "5 299.630000 299.764286 295.851429 \n", - "6 299.207143 299.221429 295.865714 \n", - "7 299.591429 299.528571 296.531429 \n", - "8 299.578571 299.557143 296.378571 \n", - "9 300.154286 300.278571 296.651429 \n", - "10 299.512857 299.592857 296.041429 \n", - "11 299.667143 299.750000 296.334286 \n", - "12 299.558571 299.635714 295.960000 \n", - "13 299.862857 299.950000 296.172857 \n", - "14 300.391429 300.478571 296.532857 \n", + " reanalysis_air_temp_k reanalysis_avg_temp_k reanalysis_dew_point_temp_k \\\n", + "0 298.492857 298.550000 294.527143 \n", + "1 298.475714 298.557143 294.395714 \n", + "2 299.455714 299.357143 295.308571 \n", + "3 299.690000 299.728571 294.402857 \n", + "4 299.780000 299.671429 294.760000 \n", + "5 299.768571 299.728571 295.314286 \n", + "6 300.062857 300.007143 295.650000 \n", + "7 300.484286 300.578571 295.997143 \n", + "8 300.601429 300.621429 296.268571 \n", + "9 300.497143 300.528571 296.411429 \n", "\n", - " reanalysis_max_air_temp_k reanalysis_min_air_temp_k ... \\\n", - "0 299.8 295.9 ... \n", - "1 300.9 296.4 ... \n", - "2 300.5 297.3 ... \n", - "3 301.4 297.0 ... \n", - "4 301.9 297.5 ... \n", - "5 302.4 298.1 ... \n", - "6 301.3 297.7 ... \n", - "7 300.6 298.4 ... \n", - "8 302.1 297.7 ... \n", - "9 302.3 298.7 ... \n", - "10 301.8 298.0 ... \n", - "11 302.0 297.3 ... \n", - "12 301.8 297.1 ... \n", - "13 303.0 298.3 ... \n", - "14 302.5 298.8 ... \n", + " reanalysis_max_air_temp_k reanalysis_min_air_temp_k ... \\\n", + "0 301.1 296.4 ... \n", + "1 300.8 296.7 ... \n", + "2 302.2 296.4 ... \n", + "3 303.0 296.9 ... \n", + "4 302.3 297.3 ... \n", + "5 301.9 297.6 ... \n", + "6 302.4 297.5 ... \n", + "7 303.5 297.5 ... \n", + "8 302.5 298.5 ... \n", + "9 302.3 298.7 ... \n", "\n", - " last_weeks_0_reanalysis_specific_humidity_g_per_kg \\\n", - "0 17.087143 \n", - "1 14.012857 \n", - "2 15.372857 \n", - "3 16.848571 \n", - "4 16.672857 \n", - "5 17.210000 \n", - "6 17.212857 \n", - "7 17.234286 \n", - "8 17.977143 \n", - "9 17.790000 \n", - "10 18.071429 \n", - "11 17.418571 \n", - "12 17.737143 \n", - "13 17.341429 \n", - "14 17.594286 \n", + " last_weeks_0_reanalysis_tdtr_k last_weeks_0_station_avg_temp_c \\\n", + "0 3.957143 27.042857 \n", + "1 3.128571 26.528571 \n", + "2 2.571429 26.071429 \n", + "3 4.428571 27.928571 \n", + "4 4.342857 28.057143 \n", + "5 3.542857 27.614286 \n", + "6 2.857143 28.000000 \n", + "7 3.157143 27.400000 \n", + "8 3.900000 28.757143 \n", + "9 2.785714 28.657143 \n", "\n", - " last_weeks_0_reanalysis_tdtr_k last_weeks_0_station_avg_temp_c \\\n", - "0 2.857143 27.400000 \n", - "1 2.628571 25.442857 \n", - "2 2.371429 26.714286 \n", - "3 2.300000 26.714286 \n", - "4 2.428571 27.471429 \n", - "5 3.014286 28.942857 \n", - "6 2.100000 28.114286 \n", - "7 2.042857 27.414286 \n", - "8 1.571429 28.371429 \n", - "9 1.885714 28.328571 \n", - "10 2.014286 28.328571 \n", - "11 2.157143 27.557143 \n", - "12 2.414286 28.128571 \n", - "13 2.071429 28.114286 \n", - "14 2.585714 28.242857 \n", + " last_weeks_0_station_diur_temp_rng_c last_weeks_0_station_max_temp_c \\\n", + "0 7.514286 31.7 \n", + "1 7.057143 33.3 \n", + "2 5.557143 30.0 \n", + "3 7.785714 32.8 \n", + "4 6.271429 33.3 \n", + "5 7.085714 33.3 \n", + "6 5.171429 32.8 \n", + "7 6.042857 31.1 \n", + "8 6.985714 34.4 \n", + "9 6.242857 32.8 \n", "\n", - " last_weeks_0_station_diur_temp_rng_c last_weeks_0_station_max_temp_c \\\n", - "0 7.364286 32.8 \n", - "1 6.900000 29.4 \n", - "2 6.371429 31.7 \n", - "3 6.485714 32.2 \n", - "4 6.771429 33.3 \n", - "5 9.371429 35.0 \n", - "6 6.942857 34.4 \n", - "7 6.771429 32.2 \n", - "8 7.685714 33.9 \n", - "9 7.385714 33.9 \n", - "10 6.514286 33.9 \n", - "11 7.157143 31.7 \n", - "12 6.900000 32.8 \n", - "13 6.357143 31.7 \n", - "14 8.085714 34.4 \n", + " last_weeks_0_station_min_temp_c last_weeks_0_station_precip_mm \\\n", + "0 23.3 0.3 \n", + "1 21.7 75.2 \n", + "2 22.2 34.3 \n", + "3 22.8 3.0 \n", + "4 24.4 0.3 \n", + "5 23.3 84.1 \n", + "6 25.0 27.7 \n", + "7 23.3 91.7 \n", + "8 24.4 0.3 \n", + "9 23.9 28.7 \n", "\n", - " last_weeks_0_station_min_temp_c last_weeks_0_station_precip_mm \\\n", - "0 22.2 23.8 \n", - "1 20.0 16.0 \n", - "2 22.2 8.6 \n", - "3 22.8 41.4 \n", - "4 23.3 4.0 \n", - "5 23.9 5.8 \n", - "6 23.9 39.1 \n", - "7 23.3 29.7 \n", - "8 22.8 21.1 \n", - "9 22.8 21.1 \n", - "10 24.4 1.1 \n", - "11 21.7 63.7 \n", - "12 23.9 12.2 \n", - "13 22.8 32.6 \n", - "14 22.8 37.6 \n", + " last_infected_0 last_infected_1 last_infected_2 pred \n", + "0 5.0 3.0 1.0 6 \n", + "1 6.0 5.0 3.0 7 \n", + "2 7.0 6.0 5.0 6 \n", + "3 6.0 7.0 6.0 6 \n", + "4 6.0 6.0 7.0 6 \n", + "5 6.0 6.0 6.0 7 \n", + "6 7.0 6.0 6.0 7 \n", + "7 7.0 7.0 6.0 6 \n", + "8 6.0 7.0 7.0 6 \n", + "9 6.0 6.0 7.0 7 \n", "\n", - " last_infected_0 last_infected_1 last_infected_2 \n", - "0 0.0 0.0 0.0 \n", - "1 6.0 0.0 0.0 \n", - "2 9.0 6.0 0.0 \n", - "3 5.0 9.0 6.0 \n", - "4 0.0 5.0 9.0 \n", - "5 7.0 0.0 5.0 \n", - "6 3.0 7.0 0.0 \n", - "7 0.0 3.0 7.0 \n", - "8 1.0 0.0 3.0 \n", - "9 2.0 1.0 0.0 \n", - "10 7.0 2.0 1.0 \n", - "11 0.0 7.0 2.0 \n", - "12 7.0 0.0 7.0 \n", - "13 0.0 7.0 0.0 \n", - "14 11.0 0.0 7.0 \n", - "\n", - "[15 rows x 43 columns]" - ] - }, - "execution_count": 19, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "X_train.head(15)" - ] - }, - { - "cell_type": "code", - "execution_count": 24, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "SVR(C=1685, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma='scale',\n", - " kernel='linear', max_iter=-1, shrinking=True, tol=0.001, verbose=False)" + "[10 rows x 44 columns]" ] }, - "execution_count": 24, + "execution_count": 149, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "model" + "pd.concat([X_test_f, pd.DataFrame(predictions, columns=['pred'])], axis=1).head(10)" ] }, { diff --git a/utils/LastInfected.py b/utils/LastInfected.py index 41ab05f..f181aa9 100644 --- a/utils/LastInfected.py +++ b/utils/LastInfected.py @@ -47,8 +47,7 @@ def append_y(self, new_y): if self.add_noise: noise = int(np.round(choice([-1,1]) * gauss(mu=self.noise_mean, sigma=self.noise_std))) new_y += noise - print('With noise {}!!'.format(new_y)) - if new_y < 0: - new_y = 0 + #if new_y < 0: + #new_y = 0 self.last[self.city].appendleft(new_y) self.last[self.city].pop() \ No newline at end of file From eda28aba74c6a42dfbded2810af5b6f40cbb5451 Mon Sep 17 00:00:00 2001 From: MLobo1997 Date: Fri, 19 Apr 2019 13:36:04 +0100 Subject: [PATCH 19/24] :( --- models.ipynb | 149 ++++++++++++++++++++++++++++++++------------------- 1 file changed, 95 insertions(+), 54 deletions(-) diff --git a/models.ipynb b/models.ipynb index 011b2e3..489409f 100644 --- a/models.ipynb +++ b/models.ipynb @@ -199,7 +199,7 @@ "metadata": {}, "outputs": [], "source": [ - "k_folds=10\n", + "k_folds=\n", "n_iter_search = 40\n", "params = {'n_estimators': sp_randint(2,50), 'criterion':['mae'], 'max_depth': sp_randint(2, 10)}\n", "Forest_optimizer = RandomizedSearchCV(estimator=RandomForestRegressor(n_jobs=-1), param_distributions=params, n_iter=n_iter_search, scoring=score_metric, n_jobs=jobs, cv=k_folds, verbose=verbose_level, random_state=random_n, return_train_score=True, iid=True)" @@ -539,16 +539,16 @@ }, { "cell_type": "code", - "execution_count": 124, + "execution_count": 152, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "(6.49747, 4.943615438027113)" + "(6.53353, 4.950353092366241)" ] }, - "execution_count": 124, + "execution_count": 152, "metadata": {}, "output_type": "execute_result" } @@ -573,7 +573,7 @@ }, { "cell_type": "code", - "execution_count": 125, + "execution_count": 157, "metadata": {}, "outputs": [], "source": [ @@ -586,43 +586,30 @@ }, { "cell_type": "code", - "execution_count": 121, + "execution_count": 155, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "-8.57545699492815" - ] - }, - "execution_count": 121, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ - "#Forest_optimizer.fit(X_train, y_train)\n", - "model=Forest_optimizer.best_estimator_\n", - "Forest_optimizer.best_score_" + "model = RandomForestRegressor(criterion='mae', n_estimators=100, max_depth=3)" ] }, { "cell_type": "code", - "execution_count": 127, + "execution_count": 156, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "RandomForestRegressor(bootstrap=True, criterion='mae', max_depth=5,\n", + "RandomForestRegressor(bootstrap=True, criterion='mae', max_depth=3,\n", " max_features='auto', max_leaf_nodes=None,\n", " min_impurity_decrease=0.0, min_impurity_split=None,\n", " min_samples_leaf=1, min_samples_split=2,\n", - " min_weight_fraction_leaf=0.0, n_estimators=26, n_jobs=-1,\n", + " min_weight_fraction_leaf=0.0, n_estimators=100, n_jobs=None,\n", " oob_score=False, random_state=None, verbose=0, warm_start=False)" ] }, - "execution_count": 127, + "execution_count": 156, "metadata": {}, "output_type": "execute_result" } @@ -633,7 +620,7 @@ }, { "cell_type": "code", - "execution_count": 141, + "execution_count": 161, "metadata": {}, "outputs": [ { @@ -642,7 +629,7 @@ "416" ] }, - "execution_count": 141, + "execution_count": 161, "metadata": {}, "output_type": "execute_result" } @@ -687,7 +674,7 @@ }, { "cell_type": "code", - "execution_count": 149, + "execution_count": 162, "metadata": {}, "outputs": [ { @@ -781,7 +768,7 @@ " 6.0\n", " 5.0\n", " 3.0\n", - " 7\n", + " 6\n", " \n", " \n", " 2\n", @@ -802,7 +789,7 @@ " 30.0\n", " 22.2\n", " 34.3\n", - " 7.0\n", + " 6.0\n", " 6.0\n", " 5.0\n", " 6\n", @@ -827,7 +814,7 @@ " 22.8\n", " 3.0\n", " 6.0\n", - " 7.0\n", + " 6.0\n", " 6.0\n", " 6\n", " \n", @@ -852,7 +839,7 @@ " 0.3\n", " 6.0\n", " 6.0\n", - " 7.0\n", + " 6.0\n", " 6\n", " \n", " \n", @@ -877,7 +864,7 @@ " 6.0\n", " 6.0\n", " 6.0\n", - " 7\n", + " 6\n", " \n", " \n", " 6\n", @@ -898,10 +885,10 @@ " 32.8\n", " 25.0\n", " 27.7\n", - " 7.0\n", " 6.0\n", " 6.0\n", - " 7\n", + " 6.0\n", + " 6\n", " \n", " \n", " 7\n", @@ -922,8 +909,8 @@ " 31.1\n", " 23.3\n", " 91.7\n", - " 7.0\n", - " 7.0\n", + " 6.0\n", + " 6.0\n", " 6.0\n", " 6\n", " \n", @@ -947,8 +934,8 @@ " 24.4\n", " 0.3\n", " 6.0\n", - " 7.0\n", - " 7.0\n", + " 6.0\n", + " 6.0\n", " 6\n", " \n", " \n", @@ -972,8 +959,8 @@ " 28.7\n", " 6.0\n", " 6.0\n", - " 7.0\n", - " 7\n", + " 6.0\n", + " 6\n", " \n", " \n", "\n", @@ -1055,20 +1042,20 @@ "\n", " last_infected_0 last_infected_1 last_infected_2 pred \n", "0 5.0 3.0 1.0 6 \n", - "1 6.0 5.0 3.0 7 \n", - "2 7.0 6.0 5.0 6 \n", - "3 6.0 7.0 6.0 6 \n", - "4 6.0 6.0 7.0 6 \n", - "5 6.0 6.0 6.0 7 \n", - "6 7.0 6.0 6.0 7 \n", - "7 7.0 7.0 6.0 6 \n", - "8 6.0 7.0 7.0 6 \n", - "9 6.0 6.0 7.0 7 \n", + "1 6.0 5.0 3.0 6 \n", + "2 6.0 6.0 5.0 6 \n", + "3 6.0 6.0 6.0 6 \n", + "4 6.0 6.0 6.0 6 \n", + "5 6.0 6.0 6.0 6 \n", + "6 6.0 6.0 6.0 6 \n", + "7 6.0 6.0 6.0 6 \n", + "8 6.0 6.0 6.0 6 \n", + "9 6.0 6.0 6.0 6 \n", "\n", "[10 rows x 44 columns]" ] }, - "execution_count": 149, + "execution_count": 162, "metadata": {}, "output_type": "execute_result" } @@ -1079,10 +1066,64 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 158, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/plain": [ + "7.860576923076923" + ] + }, + "execution_count": 158, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from sklearn.metrics import mean_absolute_error\n", + "mean_absolute_error(model.predict(X_train), y_train)" + ] + }, + { + "cell_type": "code", + "execution_count": 160, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "371.265" + ] + }, + "execution_count": 160, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "max(model.predict(X_train))" + ] + }, + { + "cell_type": "code", + "execution_count": 163, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "6" + ] + }, + "execution_count": 163, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "max(predictions)" + ] } ], "metadata": { From 54f8548ad07fbf537ec25cf64988b4b34fde187f Mon Sep 17 00:00:00 2001 From: MLobo1997 Date: Fri, 19 Apr 2019 16:53:55 +0100 Subject: [PATCH 20/24] made new train_test split --- models.ipynb | 118 +++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 110 insertions(+), 8 deletions(-) diff --git a/models.ipynb b/models.ipynb index 489409f..5968e0c 100644 --- a/models.ipynb +++ b/models.ipynb @@ -112,7 +112,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ @@ -140,7 +140,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 5, "metadata": {}, "outputs": [], "source": [ @@ -170,7 +170,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 6, "metadata": {}, "outputs": [], "source": [ @@ -195,11 +195,11 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 8, "metadata": {}, "outputs": [], "source": [ - "k_folds=\n", + "k_folds=10\n", "n_iter_search = 40\n", "params = {'n_estimators': sp_randint(2,50), 'criterion':['mae'], 'max_depth': sp_randint(2, 10)}\n", "Forest_optimizer = RandomizedSearchCV(estimator=RandomForestRegressor(n_jobs=-1), param_distributions=params, n_iter=n_iter_search, scoring=score_metric, n_jobs=jobs, cv=k_folds, verbose=verbose_level, random_state=random_n, return_train_score=True, iid=True)" @@ -216,7 +216,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 9, "metadata": {}, "outputs": [], "source": [ @@ -237,7 +237,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 10, "metadata": {}, "outputs": [], "source": [ @@ -402,7 +402,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 11, "metadata": {}, "outputs": [ { @@ -1124,6 +1124,108 @@ "source": [ "max(predictions)" ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Test split of tail\n", + "* To simulate what we are doing with the test data, we are going to split the train data, for each city, by sampling N entries from the tail of each city for testing." + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "((933, 24), (933,), (518, 24), (518,))" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "idx_sj = X_train_1['city'] == 'sj'\n", + "X_sj = X_train_1[idx_sj]\n", + "y_sj = y_train[idx_sj]\n", + "\n", + "idx_iq = X_train_1['city'] == 'iq'\n", + "X_iq = X_train_1[idx_iq]\n", + "y_iq = y_train[idx_iq]\n", + "\n", + "X_sj.shape, y_sj.shape, X_iq.shape, y_iq.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "((653, 24), (280, 24), (653,), (280,), (362, 24), (156, 24), (362,), (156,))" + ] + }, + "execution_count": 41, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from sklearn.model_selection import train_test_split\n", + "\n", + "l = train_test_split(X_sj, y_sj, train_size=0.7, test_size=None, shuffle=False)\n", + "X_train_sj = l[0]\n", + "X_test_sj = l[1]\n", + "y_train_sj = l[2]\n", + "y_test_sj = l[3]\n", + "\n", + "l = train_test_split(X_iq, y_iq, train_size=0.7, test_size=None, shuffle=False)\n", + "X_train_iq = l[0]\n", + "X_test_iq = l[1]\n", + "y_train_iq = l[2]\n", + "y_test_iq = l[3]\n", + "\n", + "X_train_sj.shape, X_test_sj.shape, y_train_sj.shape, y_test_sj.shape, X_train_iq.shape, X_test_iq.shape, y_train_iq.shape, y_test_iq.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "((1015, 24), (1015,), (436, 24), (436,))" + ] + }, + "execution_count": 49, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "X_train_2 = pd.concat([X_train_sj, X_train_iq])\n", + "y_train_2 = pd.concat([y_train_sj, y_train_iq])\n", + "X_test_2 = pd.concat([X_test_sj, X_test_iq])\n", + "y_test_2 = pd.concat([y_test_sj, y_test_iq])\n", + "\n", + "X_train_2.shape, y_train_2.shape, X_test_2.shape, y_test_2.shape" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { From 3a053a57f31e36624d657b17fc88de3c585904d0 Mon Sep 17 00:00:00 2001 From: MLobo1997 Date: Fri, 19 Apr 2019 18:48:31 +0100 Subject: [PATCH 21/24] refactored predict_in_order and made the split --- models.ipynb | 168 +++++++++++++++++++++---- utils/LastInfected.py | 2 +- OurPipeline.py => utils/OurPipeline.py | 0 utils/predict_in_order.py | 14 +++ 4 files changed, 157 insertions(+), 27 deletions(-) rename OurPipeline.py => utils/OurPipeline.py (100%) create mode 100644 utils/predict_in_order.py diff --git a/models.ipynb b/models.ipynb index 5968e0c..199dc28 100644 --- a/models.ipynb +++ b/models.ipynb @@ -20,7 +20,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 156, "metadata": {}, "outputs": [ { @@ -52,7 +52,7 @@ " 'station_precip_mm']" ] }, - "execution_count": 2, + "execution_count": 156, "metadata": {}, "output_type": "execute_result" } @@ -402,7 +402,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 157, "metadata": {}, "outputs": [ { @@ -636,21 +636,13 @@ ], "source": [ "%autoreload\n", - "from OurPipeline import create_pipeline\n", + "from utils.OurPipeline import create_pipeline\n", + "from utils.predict_in_order import predict_in_order\n", "\n", "pipeline = create_pipeline(attr, n_weeks=1, n_weeks_infected=3, add_noise=False, pca=None)\n", "pipeline.fit_transform(X_train_1, y_train)\n", "\n", - "X_test_f = pd.DataFrame([], columns=attr[4:])\n", - "predictions=[]\n", - "for idx in range(X_test_1.shape[0]):\n", - " x = pipeline.transform(X_test_1.loc[idx:idx,:])\n", - " X_test_f = X_test_f.append(x, sort=False, ignore_index=True)\n", - " pred = model.predict(x)\n", - " pred = int(np.round(pred))\n", - " pipeline.named_steps['l_infected'].append_y(pred)\n", - " predictions.append(pred)\n", - "len(predictions)" + "predict_in_order(X_test_1, model, pipeline)" ] }, { @@ -1135,7 +1127,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 101, "metadata": {}, "outputs": [ { @@ -1144,7 +1136,7 @@ "((933, 24), (933,), (518, 24), (518,))" ] }, - "execution_count": 16, + "execution_count": 101, "metadata": {}, "output_type": "execute_result" } @@ -1163,16 +1155,16 @@ }, { "cell_type": "code", - "execution_count": 41, + "execution_count": 102, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "((653, 24), (280, 24), (653,), (280,), (362, 24), (156, 24), (362,), (156,))" + "((466, 24), (467, 24), (466,), (467,), (259, 24), (259, 24), (259,), (259,))" ] }, - "execution_count": 41, + "execution_count": 102, "metadata": {}, "output_type": "execute_result" } @@ -1180,13 +1172,13 @@ "source": [ "from sklearn.model_selection import train_test_split\n", "\n", - "l = train_test_split(X_sj, y_sj, train_size=0.7, test_size=None, shuffle=False)\n", + "l = train_test_split(X_sj, y_sj, train_size=0.5, test_size=None, shuffle=False)\n", "X_train_sj = l[0]\n", "X_test_sj = l[1]\n", "y_train_sj = l[2]\n", "y_test_sj = l[3]\n", "\n", - "l = train_test_split(X_iq, y_iq, train_size=0.7, test_size=None, shuffle=False)\n", + "l = train_test_split(X_iq, y_iq, train_size=0.5, test_size=None, shuffle=False)\n", "X_train_iq = l[0]\n", "X_test_iq = l[1]\n", "y_train_iq = l[2]\n", @@ -1197,16 +1189,16 @@ }, { "cell_type": "code", - "execution_count": 49, + "execution_count": 103, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "((1015, 24), (1015,), (436, 24), (436,))" + "((725, 24), (725,), (726, 24), (726,))" ] }, - "execution_count": 49, + "execution_count": 103, "metadata": {}, "output_type": "execute_result" } @@ -1217,15 +1209,139 @@ "X_test_2 = pd.concat([X_test_sj, X_test_iq])\n", "y_test_2 = pd.concat([y_test_sj, y_test_iq])\n", "\n", + "X_train_2.reset_index(drop=True, inplace=True)\n", + "X_test_2.reset_index(drop=True, inplace=True)\n", + "y_train_2.reset_index(drop=True, inplace=True)\n", + "y_test_2.reset_index(drop=True, inplace=True)\n", "X_train_2.shape, y_train_2.shape, X_test_2.shape, y_test_2.shape" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Pipeline" + ] + }, { "cell_type": "code", - "execution_count": null, + "execution_count": 151, + "metadata": {}, + "outputs": [], + "source": [ + "%autoreload\n", + "from OurPipeline import create_pipeline\n", + "\n", + "pipeline = create_pipeline(attr, n_weeks=1, n_weeks_infected=3, add_noise=False, pca=None)\n", + "X_train = pipeline.fit_transform(X_train_2, y_train_2)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Train" + ] + }, + { + "cell_type": "code", + "execution_count": 152, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "RandomForestRegressor(bootstrap=True, criterion='mae', max_depth=3,\n", + " max_features='auto', max_leaf_nodes=None,\n", + " min_impurity_decrease=0.0, min_impurity_split=None,\n", + " min_samples_leaf=1, min_samples_split=2,\n", + " min_weight_fraction_leaf=0.0, n_estimators=150, n_jobs=None,\n", + " oob_score=False, random_state=None, verbose=0, warm_start=False)" + ] + }, + "execution_count": 152, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model = RandomForestRegressor(criterion='mae', n_estimators=150, max_depth=3)\n", + "model.fit(X_train, y_train_2)" + ] + }, + { + "cell_type": "code", + "execution_count": 153, + "metadata": {}, + "outputs": [], + "source": [ + "%autoreload\n", + "from utils.OurPipeline import create_pipeline\n", + "from utils.predict_in_order import predict_in_order\n", + "\n", + "pipeline = create_pipeline(attr, n_weeks=1, n_weeks_infected=3, add_noise=False, pca=None)\n", + "pipeline.fit_transform(X_train_2, y_train_2)\n", + "\n", + "pred = predict_in_order(X_test_2, model=model, pipeline=pipeline)" + ] + }, + { + "cell_type": "code", + "execution_count": 154, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "11.414600550964188" + ] + }, + "execution_count": 154, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from sklearn.metrics import mean_absolute_error\n", + "mean_absolute_error(pred, y_test_2)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Submit" + ] + }, + { + "cell_type": "code", + "execution_count": 158, + "metadata": {}, + "outputs": [], + "source": [ + "%autoreload\n", + "from utils.OurPipeline import create_pipeline\n", + "from utils.predict_in_order import predict_in_order\n", + "\n", + "pipeline = create_pipeline(attr, n_weeks=1, n_weeks_infected=3, add_noise=False, pca=None)\n", + "X_train = pipeline.fit_transform(X_train_1, y_train)\n", + "\n", + "model.fit(X_train, y_train)\n", + "\n", + "pred = predict_in_order(X_test_1, model=model, pipeline=pipeline)" + ] + }, + { + "cell_type": "code", + "execution_count": 161, "metadata": {}, "outputs": [], - "source": [] + "source": [ + "submit = pd.DataFrame(pred, columns=['total_cases'])\n", + "x_3 = X_test_1.iloc[:,:3].copy()\n", + "submit = pd.concat([x_3, submit], axis=1)\n", + "submit.to_csv('data/submit.csv', index=False)" + ] } ], "metadata": { diff --git a/utils/LastInfected.py b/utils/LastInfected.py index f181aa9..de8dc36 100644 --- a/utils/LastInfected.py +++ b/utils/LastInfected.py @@ -15,9 +15,9 @@ def __init__(self, weeks=1, new_attributes_prefix='last_infected_', add_noise=Fa self.noise_mean = noise_mean self.noise_std = noise_std - self.first = True def fit(self, X, y): + self.first = True self.y = y.to_list() return self diff --git a/OurPipeline.py b/utils/OurPipeline.py similarity index 100% rename from OurPipeline.py rename to utils/OurPipeline.py diff --git a/utils/predict_in_order.py b/utils/predict_in_order.py new file mode 100644 index 0000000..e5dc45a --- /dev/null +++ b/utils/predict_in_order.py @@ -0,0 +1,14 @@ +import numpy as np + +def predict_in_order(X, model, pipeline): + #X_test_f = pd.DataFrame([], columns=attr[4:]) + predictions=[] + for idx in range(X.shape[0]): + x = pipeline.transform(X.loc[idx:idx,:]) + #X_test_f = X_test_f.append(x, sort=False, ignore_index=True) + pred = model.predict(x) + pred = int(np.round(pred)) + pipeline.named_steps['l_infected'].append_y(pred) + predictions.append(pred) + + return predictions \ No newline at end of file From 9cd1ec1de416e081f37eb40ef9d537e62c242f3a Mon Sep 17 00:00:00 2001 From: MLobo1997 Date: Fri, 19 Apr 2019 20:03:38 +0100 Subject: [PATCH 22/24] :( --- models.ipynb | 598 ++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 573 insertions(+), 25 deletions(-) diff --git a/models.ipynb b/models.ipynb index 199dc28..c45d042 100644 --- a/models.ipynb +++ b/models.ipynb @@ -20,7 +20,7 @@ }, { "cell_type": "code", - "execution_count": 156, + "execution_count": 2, "metadata": {}, "outputs": [ { @@ -52,7 +52,7 @@ " 'station_precip_mm']" ] }, - "execution_count": 156, + "execution_count": 2, "metadata": {}, "output_type": "execute_result" } @@ -112,7 +112,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 11, "metadata": {}, "outputs": [], "source": [ @@ -402,7 +402,7 @@ }, { "cell_type": "code", - "execution_count": 157, + "execution_count": 4, "metadata": {}, "outputs": [ { @@ -568,7 +568,7 @@ "source": [ "# One by one prediction with noise\n", "* When dealing with the test data, the noise adding feature of the pipeline must be disabled, otherwise our predictions will be based on 2 layers of noise: our synthetic noise and the one created by the predictive model.\n", - "* A very likely guess is that the errors when y is low is much smaller than when y is high." + "* A very likely guess is that the error when y is low is much smaller than when y is high." ] }, { @@ -1127,7 +1127,7 @@ }, { "cell_type": "code", - "execution_count": 101, + "execution_count": 24, "metadata": {}, "outputs": [ { @@ -1136,7 +1136,7 @@ "((933, 24), (933,), (518, 24), (518,))" ] }, - "execution_count": 101, + "execution_count": 24, "metadata": {}, "output_type": "execute_result" } @@ -1155,16 +1155,16 @@ }, { "cell_type": "code", - "execution_count": 102, + "execution_count": 25, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "((466, 24), (467, 24), (466,), (467,), (259, 24), (259, 24), (259,), (259,))" + "((186, 24), (747, 24), (186,), (747,), (103, 24), (415, 24), (103,), (415,))" ] }, - "execution_count": 102, + "execution_count": 25, "metadata": {}, "output_type": "execute_result" } @@ -1172,13 +1172,13 @@ "source": [ "from sklearn.model_selection import train_test_split\n", "\n", - "l = train_test_split(X_sj, y_sj, train_size=0.5, test_size=None, shuffle=False)\n", + "l = train_test_split(X_sj, y_sj, train_size=0.2, test_size=None, shuffle=False)\n", "X_train_sj = l[0]\n", "X_test_sj = l[1]\n", "y_train_sj = l[2]\n", "y_test_sj = l[3]\n", "\n", - "l = train_test_split(X_iq, y_iq, train_size=0.5, test_size=None, shuffle=False)\n", + "l = train_test_split(X_iq, y_iq, train_size=0.2, test_size=None, shuffle=False)\n", "X_train_iq = l[0]\n", "X_test_iq = l[1]\n", "y_train_iq = l[2]\n", @@ -1189,16 +1189,16 @@ }, { "cell_type": "code", - "execution_count": 103, + "execution_count": 26, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "((725, 24), (725,), (726, 24), (726,))" + "((289, 24), (289,), (1162, 24), (1162,))" ] }, - "execution_count": 103, + "execution_count": 26, "metadata": {}, "output_type": "execute_result" } @@ -1225,12 +1225,12 @@ }, { "cell_type": "code", - "execution_count": 151, + "execution_count": 27, "metadata": {}, "outputs": [], "source": [ "%autoreload\n", - "from OurPipeline import create_pipeline\n", + "from utils.OurPipeline import create_pipeline\n", "\n", "pipeline = create_pipeline(attr, n_weeks=1, n_weeks_infected=3, add_noise=False, pca=None)\n", "X_train = pipeline.fit_transform(X_train_2, y_train_2)" @@ -1245,7 +1245,7 @@ }, { "cell_type": "code", - "execution_count": 152, + "execution_count": 28, "metadata": {}, "outputs": [ { @@ -1259,7 +1259,7 @@ " oob_score=False, random_state=None, verbose=0, warm_start=False)" ] }, - "execution_count": 152, + "execution_count": 28, "metadata": {}, "output_type": "execute_result" } @@ -1271,7 +1271,7 @@ }, { "cell_type": "code", - "execution_count": 153, + "execution_count": 29, "metadata": {}, "outputs": [], "source": [ @@ -1287,16 +1287,16 @@ }, { "cell_type": "code", - "execution_count": 154, + "execution_count": 30, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "11.414600550964188" + "22.689328743545612" ] }, - "execution_count": 154, + "execution_count": 30, "metadata": {}, "output_type": "execute_result" } @@ -1315,7 +1315,7 @@ }, { "cell_type": "code", - "execution_count": 158, + "execution_count": 31, "metadata": {}, "outputs": [], "source": [ @@ -1333,7 +1333,7 @@ }, { "cell_type": "code", - "execution_count": 161, + "execution_count": 32, "metadata": {}, "outputs": [], "source": [ @@ -1342,6 +1342,554 @@ "submit = pd.concat([x_3, submit], axis=1)\n", "submit.to_csv('data/submit.csv', index=False)" ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
cityyearweekofyeartotal_cases
0sj2008186
1sj2008196
2sj2008206
3sj2008216
4sj2008226
5sj2008236
6sj2008246
7sj2008256
8sj2008266
9sj2008276
10sj2008286
11sj2008296
12sj2008306
13sj2008316
14sj2008326
15sj2008336
16sj2008346
17sj2008356
18sj2008366
19sj2008376
20sj2008386
21sj2008396
22sj2008406
23sj2008416
24sj2008426
25sj2008436
26sj2008446
27sj2008456
28sj2008466
29sj2008476
...............
386iq2012486
387iq2012496
388iq2012506
389iq2012516
390iq201316
391iq201326
392iq201336
393iq201346
394iq201356
395iq201366
396iq201376
397iq201386
398iq201396
399iq2013106
400iq2013116
401iq2013126
402iq2013136
403iq2013146
404iq2013156
405iq2013166
406iq2013176
407iq2013186
408iq2013196
409iq2013206
410iq2013216
411iq2013226
412iq2013236
413iq2013246
414iq2013256
415iq2013266
\n", + "

416 rows × 4 columns

\n", + "
" + ], + "text/plain": [ + " city year weekofyear total_cases\n", + "0 sj 2008 18 6\n", + "1 sj 2008 19 6\n", + "2 sj 2008 20 6\n", + "3 sj 2008 21 6\n", + "4 sj 2008 22 6\n", + "5 sj 2008 23 6\n", + "6 sj 2008 24 6\n", + "7 sj 2008 25 6\n", + "8 sj 2008 26 6\n", + "9 sj 2008 27 6\n", + "10 sj 2008 28 6\n", + "11 sj 2008 29 6\n", + "12 sj 2008 30 6\n", + "13 sj 2008 31 6\n", + "14 sj 2008 32 6\n", + "15 sj 2008 33 6\n", + "16 sj 2008 34 6\n", + "17 sj 2008 35 6\n", + "18 sj 2008 36 6\n", + "19 sj 2008 37 6\n", + "20 sj 2008 38 6\n", + "21 sj 2008 39 6\n", + "22 sj 2008 40 6\n", + "23 sj 2008 41 6\n", + "24 sj 2008 42 6\n", + "25 sj 2008 43 6\n", + "26 sj 2008 44 6\n", + "27 sj 2008 45 6\n", + "28 sj 2008 46 6\n", + "29 sj 2008 47 6\n", + ".. ... ... ... ...\n", + "386 iq 2012 48 6\n", + "387 iq 2012 49 6\n", + "388 iq 2012 50 6\n", + "389 iq 2012 51 6\n", + "390 iq 2013 1 6\n", + "391 iq 2013 2 6\n", + "392 iq 2013 3 6\n", + "393 iq 2013 4 6\n", + "394 iq 2013 5 6\n", + "395 iq 2013 6 6\n", + "396 iq 2013 7 6\n", + "397 iq 2013 8 6\n", + "398 iq 2013 9 6\n", + "399 iq 2013 10 6\n", + "400 iq 2013 11 6\n", + "401 iq 2013 12 6\n", + "402 iq 2013 13 6\n", + "403 iq 2013 14 6\n", + "404 iq 2013 15 6\n", + "405 iq 2013 16 6\n", + "406 iq 2013 17 6\n", + "407 iq 2013 18 6\n", + "408 iq 2013 19 6\n", + "409 iq 2013 20 6\n", + "410 iq 2013 21 6\n", + "411 iq 2013 22 6\n", + "412 iq 2013 23 6\n", + "413 iq 2013 24 6\n", + "414 iq 2013 25 6\n", + "415 iq 2013 26 6\n", + "\n", + "[416 rows x 4 columns]" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "submit" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { From ed24cc387af782fc3cf9c17d64e23414aff83653 Mon Sep 17 00:00:00 2001 From: MLobo1997 Date: Mon, 22 Apr 2019 18:56:25 +0100 Subject: [PATCH 23/24] yo --- models.ipynb | 1018 ++++++++++++++++++------------------------ utils/OurPipeline.py | 10 +- 2 files changed, 435 insertions(+), 593 deletions(-) diff --git a/models.ipynb b/models.ipynb index c45d042..c9ab18c 100644 --- a/models.ipynb +++ b/models.ipynb @@ -112,7 +112,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ @@ -140,12 +140,12 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 58, "metadata": {}, "outputs": [], "source": [ - "k_folds=10\n", - "n_iter_search = 20\n", + "k_folds=5\n", + "n_iter_search = 10\n", "C = sp_randint(0, 10000)\n", "params = {'kernel':['linear'], 'gamma':['scale'], 'C': C}\n", "SVR_optimizer = RandomizedSearchCV(estimator=SVR(), param_distributions=params, n_iter=n_iter_search, scoring=score_metric, n_jobs=jobs, cv=k_folds, verbose=verbose_level, random_state=random_n, return_train_score=True, iid=True)" @@ -170,7 +170,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 52, "metadata": {}, "outputs": [], "source": [ @@ -195,7 +195,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 53, "metadata": {}, "outputs": [], "source": [ @@ -216,7 +216,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 54, "metadata": {}, "outputs": [], "source": [ @@ -237,7 +237,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 55, "metadata": {}, "outputs": [], "source": [ @@ -258,37 +258,15 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "Best score of 10.757898351648352 with the estimator DecisionTreeRegressor(criterion='mae', max_depth=7, max_features=None,\n", - " max_leaf_nodes=None, min_impurity_decrease=0.0,\n", - " min_impurity_split=None, min_samples_leaf=0.048518173584686866,\n", - " min_samples_split=0.08977730688967958,\n", - " min_weight_fraction_leaf=0.0, presort=False, random_state=None,\n", - " splitter='best')\n", - "1/4\t\n", - "Best score of 8.57545699492815 with the estimator RandomForestRegressor(bootstrap=True, criterion='mae', max_depth=5,\n", - " max_features='auto', max_leaf_nodes=None,\n", - " min_impurity_decrease=0.0, min_impurity_split=None,\n", - " min_samples_leaf=1, min_samples_split=2,\n", - " min_weight_fraction_leaf=0.0, n_estimators=26, n_jobs=-1,\n", - " oob_score=False, random_state=None, verbose=0, warm_start=False)\n", - "2/4\t3/4\t4/4\t" - ] - } - ], + "outputs": [], "source": [ "%autoreload\n", "from OurPipeline import create_pipeline\n", "from sklearn.decomposition import PCA\n", "\n", - "optimizers=[Tree_optimizer, Forest_optimizer, AdaTree_optimizer, KNN_optimizer]#, SVR_optimizer]\n", + "optimizers=[Tree_optimizer, Forest_optimizer, AdaTree_optimizer, KNN_optimizer, SVR_optimizer]\n", "weeks = [1]\n", "weeks_infected = [3]\n", "pca = [None]\n", @@ -402,7 +380,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 39, "metadata": {}, "outputs": [ { @@ -568,7 +546,7 @@ "source": [ "# One by one prediction with noise\n", "* When dealing with the test data, the noise adding feature of the pipeline must be disabled, otherwise our predictions will be based on 2 layers of noise: our synthetic noise and the one created by the predictive model.\n", - "* A very likely guess is that the error when y is low is much smaller than when y is high." + "* A very likely guess for why it isn't working is that the error when y is low is much smaller than when y is high." ] }, { @@ -664,398 +642,6 @@ "submit.to_csv('data/submit.csv', index=False)" ] }, - { - "cell_type": "code", - "execution_count": 162, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
ndvi_nendvi_nwndvi_sendvi_swprecipitation_amt_mmreanalysis_air_temp_kreanalysis_avg_temp_kreanalysis_dew_point_temp_kreanalysis_max_air_temp_kreanalysis_min_air_temp_k...last_weeks_0_reanalysis_tdtr_klast_weeks_0_station_avg_temp_clast_weeks_0_station_diur_temp_rng_clast_weeks_0_station_max_temp_clast_weeks_0_station_min_temp_clast_weeks_0_station_precip_mmlast_infected_0last_infected_1last_infected_2pred
0-0.018900-0.0189000.1027290.09120078.60298.492857298.550000294.527143301.1296.4...3.95714327.0428577.51428631.723.30.35.03.01.06
1-0.018000-0.0124000.0820430.07231412.56298.475714298.557143294.395714300.8296.7...3.12857126.5285717.05714333.321.775.26.05.03.06
2-0.001500-0.0124000.1510830.0915293.66299.455714299.357143295.308571302.2296.4...2.57142926.0714295.55714330.022.234.36.06.05.06
3-0.001500-0.0198670.1243290.1256860.00299.690000299.728571294.402857303.0296.9...4.42857127.9285717.78571432.822.83.06.06.06.06
40.0568000.0398330.0622670.0759140.76299.780000299.671429294.760000302.3297.3...4.34285728.0571436.27142933.324.40.36.06.06.06
5-0.044000-0.0304670.1320000.08352971.17299.768571299.728571295.314286301.9297.6...3.54285727.6142867.08571433.323.384.16.06.06.06
6-0.044300-0.0249250.1322710.15915748.99300.062857300.007143295.650000302.4297.5...2.85714328.0000005.17142932.825.027.76.06.06.06
7-0.0443000.0821500.1443710.11672930.81300.484286300.578571295.997143303.5297.5...3.15714327.4000006.04285731.123.391.76.06.06.06
80.0108000.0499000.1005710.1173298.02300.601429300.621429296.268571302.5298.5...3.90000028.7571436.98571434.424.40.36.06.06.06
90.0726670.1066600.1554290.16490017.52300.497143300.528571296.411429302.3298.7...2.78571428.6571436.24285732.823.928.76.06.06.06
\n", - "

10 rows × 44 columns

\n", - "
" - ], - "text/plain": [ - " ndvi_ne ndvi_nw ndvi_se ndvi_sw precipitation_amt_mm \\\n", - "0 -0.018900 -0.018900 0.102729 0.091200 78.60 \n", - "1 -0.018000 -0.012400 0.082043 0.072314 12.56 \n", - "2 -0.001500 -0.012400 0.151083 0.091529 3.66 \n", - "3 -0.001500 -0.019867 0.124329 0.125686 0.00 \n", - "4 0.056800 0.039833 0.062267 0.075914 0.76 \n", - "5 -0.044000 -0.030467 0.132000 0.083529 71.17 \n", - "6 -0.044300 -0.024925 0.132271 0.159157 48.99 \n", - "7 -0.044300 0.082150 0.144371 0.116729 30.81 \n", - "8 0.010800 0.049900 0.100571 0.117329 8.02 \n", - "9 0.072667 0.106660 0.155429 0.164900 17.52 \n", - "\n", - " reanalysis_air_temp_k reanalysis_avg_temp_k reanalysis_dew_point_temp_k \\\n", - "0 298.492857 298.550000 294.527143 \n", - "1 298.475714 298.557143 294.395714 \n", - "2 299.455714 299.357143 295.308571 \n", - "3 299.690000 299.728571 294.402857 \n", - "4 299.780000 299.671429 294.760000 \n", - "5 299.768571 299.728571 295.314286 \n", - "6 300.062857 300.007143 295.650000 \n", - "7 300.484286 300.578571 295.997143 \n", - "8 300.601429 300.621429 296.268571 \n", - "9 300.497143 300.528571 296.411429 \n", - "\n", - " reanalysis_max_air_temp_k reanalysis_min_air_temp_k ... \\\n", - "0 301.1 296.4 ... \n", - "1 300.8 296.7 ... \n", - "2 302.2 296.4 ... \n", - "3 303.0 296.9 ... \n", - "4 302.3 297.3 ... \n", - "5 301.9 297.6 ... \n", - "6 302.4 297.5 ... \n", - "7 303.5 297.5 ... \n", - "8 302.5 298.5 ... \n", - "9 302.3 298.7 ... \n", - "\n", - " last_weeks_0_reanalysis_tdtr_k last_weeks_0_station_avg_temp_c \\\n", - "0 3.957143 27.042857 \n", - "1 3.128571 26.528571 \n", - "2 2.571429 26.071429 \n", - "3 4.428571 27.928571 \n", - "4 4.342857 28.057143 \n", - "5 3.542857 27.614286 \n", - "6 2.857143 28.000000 \n", - "7 3.157143 27.400000 \n", - "8 3.900000 28.757143 \n", - "9 2.785714 28.657143 \n", - "\n", - " last_weeks_0_station_diur_temp_rng_c last_weeks_0_station_max_temp_c \\\n", - "0 7.514286 31.7 \n", - "1 7.057143 33.3 \n", - "2 5.557143 30.0 \n", - "3 7.785714 32.8 \n", - "4 6.271429 33.3 \n", - "5 7.085714 33.3 \n", - "6 5.171429 32.8 \n", - "7 6.042857 31.1 \n", - "8 6.985714 34.4 \n", - "9 6.242857 32.8 \n", - "\n", - " last_weeks_0_station_min_temp_c last_weeks_0_station_precip_mm \\\n", - "0 23.3 0.3 \n", - "1 21.7 75.2 \n", - "2 22.2 34.3 \n", - "3 22.8 3.0 \n", - "4 24.4 0.3 \n", - "5 23.3 84.1 \n", - "6 25.0 27.7 \n", - "7 23.3 91.7 \n", - "8 24.4 0.3 \n", - "9 23.9 28.7 \n", - "\n", - " last_infected_0 last_infected_1 last_infected_2 pred \n", - "0 5.0 3.0 1.0 6 \n", - "1 6.0 5.0 3.0 6 \n", - "2 6.0 6.0 5.0 6 \n", - "3 6.0 6.0 6.0 6 \n", - "4 6.0 6.0 6.0 6 \n", - "5 6.0 6.0 6.0 6 \n", - "6 6.0 6.0 6.0 6 \n", - "7 6.0 6.0 6.0 6 \n", - "8 6.0 6.0 6.0 6 \n", - "9 6.0 6.0 6.0 6 \n", - "\n", - "[10 rows x 44 columns]" - ] - }, - "execution_count": 162, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "pd.concat([X_test_f, pd.DataFrame(predictions, columns=['pred'])], axis=1).head(10)" - ] - }, { "cell_type": "code", "execution_count": 158, @@ -1127,7 +713,7 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 5, "metadata": {}, "outputs": [ { @@ -1136,7 +722,7 @@ "((933, 24), (933,), (518, 24), (518,))" ] }, - "execution_count": 24, + "execution_count": 5, "metadata": {}, "output_type": "execute_result" } @@ -1155,16 +741,16 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 18, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "((186, 24), (747, 24), (186,), (747,), (103, 24), (415, 24), (103,), (415,))" + "((373, 24), (560, 24), (373,), (560,), (207, 24), (311, 24), (207,), (311,))" ] }, - "execution_count": 25, + "execution_count": 18, "metadata": {}, "output_type": "execute_result" } @@ -1172,13 +758,13 @@ "source": [ "from sklearn.model_selection import train_test_split\n", "\n", - "l = train_test_split(X_sj, y_sj, train_size=0.2, test_size=None, shuffle=False)\n", + "l = train_test_split(X_sj, y_sj, train_size=0.4, test_size=None, shuffle=False)\n", "X_train_sj = l[0]\n", "X_test_sj = l[1]\n", "y_train_sj = l[2]\n", "y_test_sj = l[3]\n", "\n", - "l = train_test_split(X_iq, y_iq, train_size=0.2, test_size=None, shuffle=False)\n", + "l = train_test_split(X_iq, y_iq, train_size=0.4, test_size=None, shuffle=False)\n", "X_train_iq = l[0]\n", "X_test_iq = l[1]\n", "y_train_iq = l[2]\n", @@ -1189,16 +775,16 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 19, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "((289, 24), (289,), (1162, 24), (1162,))" + "((580, 24), (580,), (871, 24), (871,))" ] }, - "execution_count": 26, + "execution_count": 19, "metadata": {}, "output_type": "execute_result" } @@ -1225,7 +811,7 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 44, "metadata": {}, "outputs": [], "source": [ @@ -1240,38 +826,68 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Train" + "### Train\n", + "* Since we can't use `RandomizedSearchCV` with this prediction mode, we opted to implement our own exhaustive search tool.\n", + "* `RandomForestRegressor`, the best combination was with 50 estimators and a maximum depth of 5." ] }, { "cell_type": "code", - "execution_count": 28, + "execution_count": 37, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "RandomForestRegressor(bootstrap=True, criterion='mae', max_depth=3,\n", - " max_features='auto', max_leaf_nodes=None,\n", - " min_impurity_decrease=0.0, min_impurity_split=None,\n", - " min_samples_leaf=1, min_samples_split=2,\n", - " min_weight_fraction_leaf=0.0, n_estimators=150, n_jobs=None,\n", - " oob_score=False, random_state=None, verbose=0, warm_start=False)" + "(17.044776119402986, (50, 5))" ] }, - "execution_count": 28, + "execution_count": 37, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "model = RandomForestRegressor(criterion='mae', n_estimators=150, max_depth=3)\n", - "model.fit(X_train, y_train_2)" + "%autoreload\n", + "from utils.OurPipeline import create_pipeline\n", + "from utils.predict_in_order import predict_in_order\n", + "from sklearn.metrics import mean_absolute_error\n", + "\n", + "estimators = [25, 50, 75]\n", + "depth = [2,3,4,5]\n", + "\n", + "best_mae=np.inf\n", + "best=None\n", + "for est in estimators:\n", + " for d in depth:\n", + " model = RandomForestRegressor(criterion='mae', n_estimators=est, max_depth=d)\n", + " model.fit(X_train, y_train_2)\n", + "\n", + "\n", + " #pipeline = create_pipeline(attr, n_weeks=1, n_weeks_infected=3, add_noise=False, pca=None)\n", + " #pipeline.fit_transform(X_train_2, y_train_2)\n", + "\n", + " pred = predict_in_order(X_test_2, model=model, pipeline=pipeline)\n", + "\n", + " mae = mean_absolute_error(pred, y_test_2)\n", + " if mae < best_mae:\n", + " best_mae = mae\n", + " best = (est, d)\n", + "\n", + "best_mae, best" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Submit\n", + "* Even though we obtain a MAE of approximately 17 on our custom test set (which has twice as many entries as the one from the competition), when we submit the data with that model we obtain a MAE of approximately 30." ] }, { "cell_type": "code", - "execution_count": 29, + "execution_count": 46, "metadata": {}, "outputs": [], "source": [ @@ -1280,60 +896,282 @@ "from utils.predict_in_order import predict_in_order\n", "\n", "pipeline = create_pipeline(attr, n_weeks=1, n_weeks_infected=3, add_noise=False, pca=None)\n", - "pipeline.fit_transform(X_train_2, y_train_2)\n", + "X_train = pipeline.fit_transform(X_train_2, y_train_2)\n", + "\n", + "model = RandomForestRegressor(criterion='mae', n_estimators=50, max_depth=5)\n", + "model.fit(X_train, y_train_2)\n", "\n", - "pred = predict_in_order(X_test_2, model=model, pipeline=pipeline)" + "pred = predict_in_order(X_test_1, model=model, pipeline=pipeline)" ] }, { "cell_type": "code", - "execution_count": 30, + "execution_count": 47, + "metadata": {}, + "outputs": [], + "source": [ + "submit = pd.DataFrame(pred, columns=['total_cases'])\n", + "x_3 = X_test_1.iloc[:,:3].copy()\n", + "submit = pd.concat([x_3, submit], axis=1)\n", + "submit.to_csv('data/submit.csv', index=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# A more simple prediction\n", + "* Given that we are not being able to make a very accurate prediction, perhaps the problem is the fact that we are trying to use the previous infected attribute, which clearly has potential, however we are not being able to harness it." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%autoreload\n", + "from utils.OurPipeline import create_pipeline\n", + "from sklearn.decomposition import PCA\n", + "\n", + "optimizers=[Tree_optimizer, Forest_optimizer, AdaTree_optimizer, KNN_optimizer, SVR_optimizer]\n", + "weeks = [3]\n", + "\n", + "n_total = len(optimizers) * len(weeks) \n", + "\n", + "\n", + "results=[]\n", + "best_attempt = None\n", + "best_score = np.inf\n", + "idx=0\n", + "for opt in optimizers:\n", + " for w in weeks:\n", + " pipeline = create_pipeline(attr, n_weeks=w, estimator_optimizer=opt, pca=None)\n", + " pipeline.fit(X_train_1, y_train)\n", + " score = pipeline.named_steps['est_opt'].best_score_\n", + " best_estimator = pipeline.named_steps['est_opt'].best_estimator_\n", + " attempt = [best_estimator, w, score]\n", + " if abs(score) < best_score:\n", + " best_score = abs(score)\n", + " best_attempt = attempt\n", + " print('\\nBest score of {} with the estimator {}'.format(best_score, best_estimator))\n", + " idx+=1\n", + " print(str(idx) + '/' + str(n_total), end='\\t')\n", + " results.append(attempt)" + ] + }, + { + "cell_type": "code", + "execution_count": 63, "metadata": {}, "outputs": [ { "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
012
0DecisionTreeRegressor(criterion='mae', max_dep...1-18.341489
1DecisionTreeRegressor(criterion='mae', max_dep...2-18.084080
2DecisionTreeRegressor(criterion='mae', max_dep...3-17.886975
3(DecisionTreeRegressor(criterion='mae', max_de...1-18.338104
4(DecisionTreeRegressor(criterion='mae', max_de...2-18.133689
5(DecisionTreeRegressor(criterion='mae', max_de...3-17.874649
6(DecisionTreeRegressor(criterion='mae', max_de...1-20.234666
7(DecisionTreeRegressor(criterion='mae', max_de...2-20.272226
8(DecisionTreeRegressor(criterion='mae', max_de...3-19.484149
9KNeighborsRegressor(algorithm='auto', leaf_siz...1-20.432433
\n", + "
" + ], "text/plain": [ - "22.689328743545612" + " 0 1 2\n", + "0 DecisionTreeRegressor(criterion='mae', max_dep... 1 -18.341489\n", + "1 DecisionTreeRegressor(criterion='mae', max_dep... 2 -18.084080\n", + "2 DecisionTreeRegressor(criterion='mae', max_dep... 3 -17.886975\n", + "3 (DecisionTreeRegressor(criterion='mae', max_de... 1 -18.338104\n", + "4 (DecisionTreeRegressor(criterion='mae', max_de... 2 -18.133689\n", + "5 (DecisionTreeRegressor(criterion='mae', max_de... 3 -17.874649\n", + "6 (DecisionTreeRegressor(criterion='mae', max_de... 1 -20.234666\n", + "7 (DecisionTreeRegressor(criterion='mae', max_de... 2 -20.272226\n", + "8 (DecisionTreeRegressor(criterion='mae', max_de... 3 -19.484149\n", + "9 KNeighborsRegressor(algorithm='auto', leaf_siz... 1 -20.432433" ] }, - "execution_count": 30, + "execution_count": 63, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "from sklearn.metrics import mean_absolute_error\n", - "mean_absolute_error(pred, y_test_2)" + "pd.DataFrame(results)" + ] + }, + { + "cell_type": "code", + "execution_count": 62, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[RandomForestRegressor(bootstrap=True, criterion='mae', max_depth=2,\n", + " max_features='auto', max_leaf_nodes=None,\n", + " min_impurity_decrease=0.0, min_impurity_split=None,\n", + " min_samples_leaf=1, min_samples_split=2,\n", + " min_weight_fraction_leaf=0.0, n_estimators=13, n_jobs=-1,\n", + " oob_score=False, random_state=None, verbose=0, warm_start=False),\n", + " 3,\n", + " -17.87464878333245]" + ] + }, + "execution_count": 62, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "best_attempt" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Submit" + "### Train" ] }, { "cell_type": "code", - "execution_count": 31, + "execution_count": 69, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "RandomForestRegressor(bootstrap=True, criterion='mae', max_depth=2,\n", + " max_features='auto', max_leaf_nodes=None,\n", + " min_impurity_decrease=0.0, min_impurity_split=None,\n", + " min_samples_leaf=1, min_samples_split=2,\n", + " min_weight_fraction_leaf=0.0, n_estimators=13, n_jobs=-1,\n", + " oob_score=False, random_state=42, verbose=0, warm_start=False)" + ] + }, + "execution_count": 69, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "%autoreload\n", "from utils.OurPipeline import create_pipeline\n", - "from utils.predict_in_order import predict_in_order\n", + "pipeline = create_pipeline(attr, n_weeks=3, pca=None)\n", + "X_train = pipeline.fit_transform(X_train_1)\n", "\n", - "pipeline = create_pipeline(attr, n_weeks=1, n_weeks_infected=3, add_noise=False, pca=None)\n", - "X_train = pipeline.fit_transform(X_train_1, y_train)\n", - "\n", - "model.fit(X_train, y_train)\n", - "\n", - "pred = predict_in_order(X_test_1, model=model, pipeline=pipeline)" + "model = RandomForestRegressor(bootstrap=True, criterion='mae', max_depth=2, n_estimators=13, n_jobs=-1, random_state=random_n)\n", + "model.fit(X_train, y_train)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Predict" + ] + }, + { + "cell_type": "code", + "execution_count": 76, + "metadata": {}, + "outputs": [], + "source": [ + "X_test = pipeline.transform(X_test_1)\n", + "pred = model.predict(X_test)\n", + "pred = list(map(lambda x: int(np.round(x)), pred))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Submit" ] }, { "cell_type": "code", - "execution_count": 32, + "execution_count": 78, "metadata": {}, "outputs": [], "source": [ @@ -1345,7 +1183,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 79, "metadata": {}, "outputs": [ { @@ -1381,210 +1219,210 @@ " sj\n", " 2008\n", " 18\n", - " 6\n", + " 19\n", " \n", " \n", " 1\n", " sj\n", " 2008\n", " 19\n", - " 6\n", + " 15\n", " \n", " \n", " 2\n", " sj\n", " 2008\n", " 20\n", - " 6\n", + " 12\n", " \n", " \n", " 3\n", " sj\n", " 2008\n", " 21\n", - " 6\n", + " 21\n", " \n", " \n", " 4\n", " sj\n", " 2008\n", " 22\n", - " 6\n", + " 12\n", " \n", " \n", " 5\n", " sj\n", " 2008\n", " 23\n", - " 6\n", + " 10\n", " \n", " \n", " 6\n", " sj\n", " 2008\n", " 24\n", - " 6\n", + " 10\n", " \n", " \n", " 7\n", " sj\n", " 2008\n", " 25\n", - " 6\n", + " 24\n", " \n", " \n", " 8\n", " sj\n", " 2008\n", " 26\n", - " 6\n", + " 24\n", " \n", " \n", " 9\n", " sj\n", " 2008\n", " 27\n", - " 6\n", + " 20\n", " \n", " \n", " 10\n", " sj\n", " 2008\n", " 28\n", - " 6\n", + " 23\n", " \n", " \n", " 11\n", " sj\n", " 2008\n", " 29\n", - " 6\n", + " 26\n", " \n", " \n", " 12\n", " sj\n", " 2008\n", " 30\n", - " 6\n", + " 38\n", " \n", " \n", " 13\n", " sj\n", " 2008\n", " 31\n", - " 6\n", + " 27\n", " \n", " \n", " 14\n", " sj\n", " 2008\n", " 32\n", - " 6\n", + " 26\n", " \n", " \n", " 15\n", " sj\n", " 2008\n", " 33\n", - " 6\n", + " 28\n", " \n", " \n", " 16\n", " sj\n", " 2008\n", " 34\n", - " 6\n", + " 27\n", " \n", " \n", " 17\n", " sj\n", " 2008\n", " 35\n", - " 6\n", + " 29\n", " \n", " \n", " 18\n", " sj\n", " 2008\n", " 36\n", - " 6\n", + " 64\n", " \n", " \n", " 19\n", " sj\n", " 2008\n", " 37\n", - " 6\n", + " 29\n", " \n", " \n", " 20\n", " sj\n", " 2008\n", " 38\n", - " 6\n", + " 79\n", " \n", " \n", " 21\n", " sj\n", " 2008\n", " 39\n", - " 6\n", + " 32\n", " \n", " \n", " 22\n", " sj\n", " 2008\n", " 40\n", - " 6\n", + " 31\n", " \n", " \n", " 23\n", " sj\n", " 2008\n", " 41\n", - " 6\n", + " 32\n", " \n", " \n", " 24\n", " sj\n", " 2008\n", " 42\n", - " 6\n", + " 30\n", " \n", " \n", " 25\n", " sj\n", " 2008\n", " 43\n", - " 6\n", + " 24\n", " \n", " \n", " 26\n", " sj\n", " 2008\n", " 44\n", - " 6\n", + " 32\n", " \n", " \n", " 27\n", " sj\n", " 2008\n", " 45\n", - " 6\n", + " 29\n", " \n", " \n", " 28\n", " sj\n", " 2008\n", " 46\n", - " 6\n", + " 26\n", " \n", " \n", " 29\n", " sj\n", " 2008\n", " 47\n", - " 6\n", + " 26\n", " \n", " \n", " ...\n", @@ -1598,210 +1436,210 @@ " iq\n", " 2012\n", " 48\n", - " 6\n", + " 5\n", " \n", " \n", " 387\n", " iq\n", " 2012\n", " 49\n", - " 6\n", + " 5\n", " \n", " \n", " 388\n", " iq\n", " 2012\n", " 50\n", - " 6\n", + " 7\n", " \n", " \n", " 389\n", " iq\n", " 2012\n", " 51\n", - " 6\n", + " 8\n", " \n", " \n", " 390\n", " iq\n", " 2013\n", " 1\n", - " 6\n", + " 5\n", " \n", " \n", " 391\n", " iq\n", " 2013\n", " 2\n", - " 6\n", + " 5\n", " \n", " \n", " 392\n", " iq\n", " 2013\n", " 3\n", - " 6\n", + " 5\n", " \n", " \n", " 393\n", " iq\n", " 2013\n", " 4\n", - " 6\n", + " 5\n", " \n", " \n", " 394\n", " iq\n", " 2013\n", " 5\n", - " 6\n", + " 5\n", " \n", " \n", " 395\n", " iq\n", " 2013\n", " 6\n", - " 6\n", + " 5\n", " \n", " \n", " 396\n", " iq\n", " 2013\n", " 7\n", - " 6\n", + " 5\n", " \n", " \n", " 397\n", " iq\n", " 2013\n", " 8\n", - " 6\n", + " 5\n", " \n", " \n", " 398\n", " iq\n", " 2013\n", " 9\n", - " 6\n", + " 5\n", " \n", " \n", " 399\n", " iq\n", " 2013\n", " 10\n", - " 6\n", + " 5\n", " \n", " \n", " 400\n", " iq\n", " 2013\n", " 11\n", - " 6\n", + " 5\n", " \n", " \n", " 401\n", " iq\n", " 2013\n", " 12\n", - " 6\n", + " 5\n", " \n", " \n", " 402\n", " iq\n", " 2013\n", " 13\n", - " 6\n", + " 5\n", " \n", " \n", " 403\n", " iq\n", " 2013\n", " 14\n", - " 6\n", + " 5\n", " \n", " \n", " 404\n", " iq\n", " 2013\n", " 15\n", - " 6\n", + " 5\n", " \n", " \n", " 405\n", " iq\n", " 2013\n", " 16\n", - " 6\n", + " 5\n", " \n", " \n", " 406\n", " iq\n", " 2013\n", " 17\n", - " 6\n", + " 5\n", " \n", " \n", " 407\n", " iq\n", " 2013\n", " 18\n", - " 6\n", + " 5\n", " \n", " \n", " 408\n", " iq\n", " 2013\n", " 19\n", - " 6\n", + " 5\n", " \n", " \n", " 409\n", " iq\n", " 2013\n", " 20\n", - " 6\n", + " 5\n", " \n", " \n", " 410\n", " iq\n", " 2013\n", " 21\n", - " 6\n", + " 5\n", " \n", " \n", " 411\n", " iq\n", " 2013\n", " 22\n", - " 6\n", + " 5\n", " \n", " \n", " 412\n", " iq\n", " 2013\n", " 23\n", - " 6\n", + " 5\n", " \n", " \n", " 413\n", " iq\n", " 2013\n", " 24\n", - " 6\n", + " 5\n", " \n", " \n", " 414\n", " iq\n", " 2013\n", " 25\n", - " 6\n", + " 5\n", " \n", " \n", " 415\n", " iq\n", " 2013\n", " 26\n", - " 6\n", + " 5\n", " \n", " \n", "\n", @@ -1810,72 +1648,72 @@ ], "text/plain": [ " city year weekofyear total_cases\n", - "0 sj 2008 18 6\n", - "1 sj 2008 19 6\n", - "2 sj 2008 20 6\n", - "3 sj 2008 21 6\n", - "4 sj 2008 22 6\n", - "5 sj 2008 23 6\n", - "6 sj 2008 24 6\n", - "7 sj 2008 25 6\n", - "8 sj 2008 26 6\n", - "9 sj 2008 27 6\n", - "10 sj 2008 28 6\n", - "11 sj 2008 29 6\n", - "12 sj 2008 30 6\n", - "13 sj 2008 31 6\n", - "14 sj 2008 32 6\n", - "15 sj 2008 33 6\n", - "16 sj 2008 34 6\n", - "17 sj 2008 35 6\n", - "18 sj 2008 36 6\n", - "19 sj 2008 37 6\n", - "20 sj 2008 38 6\n", - "21 sj 2008 39 6\n", - "22 sj 2008 40 6\n", - "23 sj 2008 41 6\n", - "24 sj 2008 42 6\n", - "25 sj 2008 43 6\n", - "26 sj 2008 44 6\n", - "27 sj 2008 45 6\n", - "28 sj 2008 46 6\n", - "29 sj 2008 47 6\n", + "0 sj 2008 18 19\n", + "1 sj 2008 19 15\n", + "2 sj 2008 20 12\n", + "3 sj 2008 21 21\n", + "4 sj 2008 22 12\n", + "5 sj 2008 23 10\n", + "6 sj 2008 24 10\n", + "7 sj 2008 25 24\n", + "8 sj 2008 26 24\n", + "9 sj 2008 27 20\n", + "10 sj 2008 28 23\n", + "11 sj 2008 29 26\n", + "12 sj 2008 30 38\n", + "13 sj 2008 31 27\n", + "14 sj 2008 32 26\n", + "15 sj 2008 33 28\n", + "16 sj 2008 34 27\n", + "17 sj 2008 35 29\n", + "18 sj 2008 36 64\n", + "19 sj 2008 37 29\n", + "20 sj 2008 38 79\n", + "21 sj 2008 39 32\n", + "22 sj 2008 40 31\n", + "23 sj 2008 41 32\n", + "24 sj 2008 42 30\n", + "25 sj 2008 43 24\n", + "26 sj 2008 44 32\n", + "27 sj 2008 45 29\n", + "28 sj 2008 46 26\n", + "29 sj 2008 47 26\n", ".. ... ... ... ...\n", - "386 iq 2012 48 6\n", - "387 iq 2012 49 6\n", - "388 iq 2012 50 6\n", - "389 iq 2012 51 6\n", - "390 iq 2013 1 6\n", - "391 iq 2013 2 6\n", - "392 iq 2013 3 6\n", - "393 iq 2013 4 6\n", - "394 iq 2013 5 6\n", - "395 iq 2013 6 6\n", - "396 iq 2013 7 6\n", - "397 iq 2013 8 6\n", - "398 iq 2013 9 6\n", - "399 iq 2013 10 6\n", - "400 iq 2013 11 6\n", - "401 iq 2013 12 6\n", - "402 iq 2013 13 6\n", - "403 iq 2013 14 6\n", - "404 iq 2013 15 6\n", - "405 iq 2013 16 6\n", - "406 iq 2013 17 6\n", - "407 iq 2013 18 6\n", - "408 iq 2013 19 6\n", - "409 iq 2013 20 6\n", - "410 iq 2013 21 6\n", - "411 iq 2013 22 6\n", - "412 iq 2013 23 6\n", - "413 iq 2013 24 6\n", - "414 iq 2013 25 6\n", - "415 iq 2013 26 6\n", + "386 iq 2012 48 5\n", + "387 iq 2012 49 5\n", + "388 iq 2012 50 7\n", + "389 iq 2012 51 8\n", + "390 iq 2013 1 5\n", + "391 iq 2013 2 5\n", + "392 iq 2013 3 5\n", + "393 iq 2013 4 5\n", + "394 iq 2013 5 5\n", + "395 iq 2013 6 5\n", + "396 iq 2013 7 5\n", + "397 iq 2013 8 5\n", + "398 iq 2013 9 5\n", + "399 iq 2013 10 5\n", + "400 iq 2013 11 5\n", + "401 iq 2013 12 5\n", + "402 iq 2013 13 5\n", + "403 iq 2013 14 5\n", + "404 iq 2013 15 5\n", + "405 iq 2013 16 5\n", + "406 iq 2013 17 5\n", + "407 iq 2013 18 5\n", + "408 iq 2013 19 5\n", + "409 iq 2013 20 5\n", + "410 iq 2013 21 5\n", + "411 iq 2013 22 5\n", + "412 iq 2013 23 5\n", + "413 iq 2013 24 5\n", + "414 iq 2013 25 5\n", + "415 iq 2013 26 5\n", "\n", "[416 rows x 4 columns]" ] }, - "execution_count": 17, + "execution_count": 79, "metadata": {}, "output_type": "execute_result" } diff --git a/utils/OurPipeline.py b/utils/OurPipeline.py index 8a1d1dd..d880f4b 100644 --- a/utils/OurPipeline.py +++ b/utils/OurPipeline.py @@ -6,14 +6,18 @@ from utils.LastWeeks import LastWeeks from utils.LastInfected import LastInfected -def create_pipeline(attr, n_weeks, n_weeks_infected, estimator_optimizer=None, pca=None, add_noise=False, noise_mean=None, noise_std=None, n_non_train=4): +def create_pipeline(attr, n_weeks, n_weeks_infected=None, estimator_optimizer=None, pca=None, add_noise=False, noise_mean=None, noise_std=None, n_non_train=4): + + l_infected = None + if n_weeks_infected is not None and n_weeks_infected > 0: + l_infected = LastInfected(weeks=n_weeks_infected, add_noise=add_noise, noise_mean=noise_mean, noise_std=noise_std) return Pipeline([ ('imputer', ContinuityImputer(attributes=attr[n_non_train:])), ('l_weeks', LastWeeks(attributes=attr[n_non_train:], weeks=n_weeks)), - ('l_infected', LastInfected(weeks=n_weeks_infected, add_noise=add_noise, noise_mean=noise_mean, noise_std=noise_std)), + ('l_infected', l_infected), ('dataframe_dropper', DataFrameDropper(attribute_names=attr[:n_non_train])), - #('scaler', StandardScaler()), + ('scaler', StandardScaler()), ('pca', pca), ('est_opt', estimator_optimizer), ] From 0fdd729218382501114b4570e78ae8c5d14cc88f Mon Sep 17 00:00:00 2001 From: MLobo1997 Date: Tue, 23 Apr 2019 11:37:45 +0100 Subject: [PATCH 24/24] Ready to deliver phase 1 --- models.ipynb | 1160 +++++++++----------------------------------------- 1 file changed, 201 insertions(+), 959 deletions(-) diff --git a/models.ipynb b/models.ipynb index c9ab18c..2db22e8 100644 --- a/models.ipynb +++ b/models.ipynb @@ -251,9 +251,163 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Optimization\n", + "# The most simple prediction\n", + "* Our first attempt consists of simply adding weather information from the previous weeks and finding the optimal the optimal parameter through exaustive search (coded by us) and find its optimal hyper-parameters (using `RandomSearchCV`).\n", "* Interestingly, PCA makes all the models worst in this case.\n", - "* After the exaustive search, the best model was the SVR which obtained an MAE of 6.52." + "* It turned out to be a `RandomForestRegressor` as you can see in the `best_attempt` variable. By using this model and adding the 3 previous weeks of weather to each entry, we obtained a MAE of approximately 17 by 10-folded cross validation.\n", + "* Unfortunatly, this model (when trained with all the train data) resulted in an 27 MAE when submitted to the platform. This indicates overfitting and that there must be considerable differences between the train and test data." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%autoreload\n", + "from utils.OurPipeline import create_pipeline\n", + "from sklearn.decomposition import PCA\n", + "\n", + "optimizers=[Tree_optimizer, Forest_optimizer, AdaTree_optimizer, KNN_optimizer, SVR_optimizer]\n", + "weeks = [1,2,3,4]\n", + "\n", + "n_total = len(optimizers) * len(weeks) \n", + "\n", + "\n", + "results=[]\n", + "best_attempt = None\n", + "best_score = np.inf\n", + "idx=0\n", + "for opt in optimizers:\n", + " for w in weeks:\n", + " pipeline = create_pipeline(attr, n_weeks=w, estimator_optimizer=opt, pca=None)\n", + " pipeline.fit(X_train_1, y_train)\n", + " score = pipeline.named_steps['est_opt'].best_score_\n", + " best_estimator = pipeline.named_steps['est_opt'].best_estimator_\n", + " attempt = [best_estimator, w, score]\n", + " if abs(score) < best_score:\n", + " best_score = abs(score)\n", + " best_attempt = attempt\n", + " print('\\nBest score of {} with the estimator {}'.format(best_score, best_estimator))\n", + " idx+=1\n", + " print(str(idx) + '/' + str(n_total), end='\\t')\n", + " results.append(attempt)" + ] + }, + { + "cell_type": "code", + "execution_count": 62, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[RandomForestRegressor(bootstrap=True, criterion='mae', max_depth=2,\n", + " max_features='auto', max_leaf_nodes=None,\n", + " min_impurity_decrease=0.0, min_impurity_split=None,\n", + " min_samples_leaf=1, min_samples_split=2,\n", + " min_weight_fraction_leaf=0.0, n_estimators=13, n_jobs=-1,\n", + " oob_score=False, random_state=None, verbose=0, warm_start=False),\n", + " 3,\n", + " -17.87464878333245]" + ] + }, + "execution_count": 62, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "best_attempt" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Train" + ] + }, + { + "cell_type": "code", + "execution_count": 69, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "RandomForestRegressor(bootstrap=True, criterion='mae', max_depth=2,\n", + " max_features='auto', max_leaf_nodes=None,\n", + " min_impurity_decrease=0.0, min_impurity_split=None,\n", + " min_samples_leaf=1, min_samples_split=2,\n", + " min_weight_fraction_leaf=0.0, n_estimators=13, n_jobs=-1,\n", + " oob_score=False, random_state=42, verbose=0, warm_start=False)" + ] + }, + "execution_count": 69, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "%autoreload\n", + "from utils.OurPipeline import create_pipeline\n", + "pipeline = create_pipeline(attr, n_weeks=3, pca=None)\n", + "X_train = pipeline.fit_transform(X_train_1)\n", + "\n", + "model = RandomForestRegressor(bootstrap=True, criterion='mae', max_depth=2, n_estimators=13, n_jobs=-1, random_state=random_n)\n", + "model.fit(X_train, y_train)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Predict" + ] + }, + { + "cell_type": "code", + "execution_count": 76, + "metadata": {}, + "outputs": [], + "source": [ + "X_test = pipeline.transform(X_test_1)\n", + "pred = model.predict(X_test)\n", + "pred = list(map(lambda x: int(np.round(x)), pred))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Submit" + ] + }, + { + "cell_type": "code", + "execution_count": 78, + "metadata": {}, + "outputs": [], + "source": [ + "submit = pd.DataFrame(pred, columns=['total_cases'])\n", + "x_3 = X_test_1.iloc[:,:3].copy()\n", + "submit = pd.concat([x_3, submit], axis=1)\n", + "submit.to_csv('data/submit.csv', index=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Prediction with the last infected\n", + "* As we could see on the analysis notebook, the number of infected on any week is highly linked to the number of infected at its previous weeks. Including the number of infected (or at least an approximation) on the previous weeks should be key to very accurate predictions.\n", + "* For this sake, we created the `LastInfected` module which is included in the pipeline.\n", + "* After the exaustive search, the best model was the SVR which obtained an MAE of 6.52 on the training dataset, which is a great improvement.\n", + "* Given that we are making sequential predictions, i.e.: the prediction from one week relies on the prediction from the previous weeks, we must make the transformations and predictions one by one.\n", + "* The submission MAE was approximately 26, which is an improvement and is not bad given that the `total_cases` feature on the training set ranges from 0 to 400. However, we were expecting a much smaller result.\n", + "\n", + "### Optimization" ] }, { @@ -267,9 +421,9 @@ "from sklearn.decomposition import PCA\n", "\n", "optimizers=[Tree_optimizer, Forest_optimizer, AdaTree_optimizer, KNN_optimizer, SVR_optimizer]\n", - "weeks = [1]\n", - "weeks_infected = [3]\n", - "pca = [None]\n", + "weeks = [1, 2, 3]\n", + "weeks_infected = [2, 3, 4]\n", + "pca = [PCA(0.95), None]\n", "\n", "n_total = len(optimizers) * len(weeks) * len(weeks_infected) * len(pca)\n", "\n", @@ -281,7 +435,7 @@ " for w in weeks:\n", " for wi in weeks_infected:\n", " for p in pca:\n", - " pipeline = create_pipeline(attr, n_weeks=w, n_weeks_infected=wi, estimator_optimizer=opt, add_noise=True, noise_mean=6.5, noise_std=6.5, pca=None)\n", + " pipeline = create_pipeline(attr, n_weeks=w, n_weeks_infected=wi, estimator_optimizer=opt, pca=None)\n", " pipeline.fit(X_train_1, y_train)\n", " score = pipeline.named_steps['est_opt'].best_score_\n", " best_estimator = pipeline.named_steps['est_opt'].best_estimator_\n", @@ -295,15 +449,6 @@ " results.append(attempt)" ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "pd.DataFrame(results, columns=['estimator', 'weeks', 'weeks_infected', 'PCA', 'score'])" - ] - }, { "cell_type": "code", "execution_count": 37, @@ -400,9 +545,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## One by one prediction\n", - "* Given that we are making sequential predictions, i.e.: the prediction from a week relies on the prediction from the previous weeks, we must make the transformations and predictions one by one.\n", - "* Given that this kind of prediction is very prone to a snowball effect on errors our first solution had an error of 26. To solve this we came up with the idea of adding noise to the train data. However for this solution we need to know both: the mean of the error and its standard deviation (*std*). We already know the mean (MAE), we just need to know the *std*" + "## One by one prediction" ] }, { @@ -422,22 +565,36 @@ } ], "source": [ - "predictions=[]\n", - "for idx in range(X_test_1.shape[0]):\n", - " x = pipeline.transform(X_test_1.loc[idx:idx,:])\n", - " pred = model.predict(x)\n", - " pred = int(np.round(pred))\n", - " pipeline.named_steps['l_infected'].append_y(pred)\n", - " predictions.append(pred)\n", + "from utils.predict_in_order import predict_in_order\n", + "predictions = predict_in_order(X_test_1, model, pipeline)\n", "len(predictions)" ] }, + { + "cell_type": "code", + "execution_count": 148, + "metadata": {}, + "outputs": [], + "source": [ + "submit = pd.DataFrame(predictions, columns=['total_cases'])\n", + "x_3 = X_test_1.iloc[:,:3].copy()\n", + "submit = pd.concat([x_3, submit], axis=1)\n", + "submit.to_csv('data/submit.csv', index=False)" + ] + }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Calculating an approximation of the *std*\n", - "* It is approximately 10.9. We can see that the MAE is close to the one calculated in the cross-validation." + "# One by one prediction with noise\n", + "* We believe the reason why our predictions were not so great, was because this kind of prediction is very prone to a snowball effect on errors.\n", + "* To solve this we came up with an idea: Our problem was currently being trained on data which has all `last_infected` columns with the exact correct values. However, when we are predicting with the test set, the values we use on `last_infected` are mere predictions. By adding random noise to the `last_infected` columns on the training data we would make our model more \"prepared\" to accept entries in which the `last_infected` columns are not so accurate.\n", + "* However for this solution we need to know both: the mean of the error and its standard deviation (*std*), so that we can reproduce the error by a gaussian distribution. We already know the mean (MAE), we just need to know the *std*\n", + "* When dealing with the test data, the noise adding feature of the pipeline must be disabled, otherwise our predictions will be based on 2 layers of noise: our \"synthetic\" noise and the one created by the predictive model.\n", + "* The submission's MAE increased again to approximately 27. \n", + "* A very for why it isn't working is that the error when y is low is much smaller than when y is high.\n", + "\n", + "### Calculating an approximation of the *std*" ] }, { @@ -515,38 +672,11 @@ "np.mean(errors), np.std(errors)" ] }, - { - "cell_type": "code", - "execution_count": 152, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(6.53353, 4.950353092366241)" - ] - }, - "execution_count": 152, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "from random import choice, gauss\n", - "r=[]\n", - "for _ in range(100000):\n", - " r.append(int(np.round(choice([-1,1]) * gauss(mu=0, sigma=8.2))))\n", - "r=np.abs(r)\n", - "np.mean(r), np.std(r)" - ] - }, { "cell_type": "markdown", "metadata": {}, "source": [ - "# One by one prediction with noise\n", - "* When dealing with the test data, the noise adding feature of the pipeline must be disabled, otherwise our predictions will be based on 2 layers of noise: our synthetic noise and the one created by the predictive model.\n", - "* A very likely guess for why it isn't working is that the error when y is low is much smaller than when y is high." + "### Adding the noise and training" ] }, { @@ -558,42 +688,24 @@ "%autoreload\n", "from OurPipeline import create_pipeline\n", "\n", - "pipeline = create_pipeline(attr, n_weeks=1, n_weeks_infected=3, add_noise=True, noise_mean=0, noise_std=8.2, pca=None)\n", + "pipeline = create_pipeline(attr, n_weeks=1, n_weeks_infected=3, add_noise=True, noise_mean=6.78, noise_std=10.96, pca=None)\n", "X_train = pipeline.fit_transform(X_train_1, y_train)" ] }, { "cell_type": "code", - "execution_count": 155, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ - "model = RandomForestRegressor(criterion='mae', n_estimators=100, max_depth=3)" + "model.fit(X_train, y_train)" ] }, { - "cell_type": "code", - "execution_count": 156, + "cell_type": "markdown", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "RandomForestRegressor(bootstrap=True, criterion='mae', max_depth=3,\n", - " max_features='auto', max_leaf_nodes=None,\n", - " min_impurity_decrease=0.0, min_impurity_split=None,\n", - " min_samples_leaf=1, min_samples_split=2,\n", - " min_weight_fraction_leaf=0.0, n_estimators=100, n_jobs=None,\n", - " oob_score=False, random_state=None, verbose=0, warm_start=False)" - ] - }, - "execution_count": 156, - "metadata": {}, - "output_type": "execute_result" - } - ], "source": [ - "model.fit(X_train, y_train)" + "### Disabling the noise and predicting" ] }, { @@ -620,7 +732,8 @@ "pipeline = create_pipeline(attr, n_weeks=1, n_weeks_infected=3, add_noise=False, pca=None)\n", "pipeline.fit_transform(X_train_1, y_train)\n", "\n", - "predict_in_order(X_test_1, model, pipeline)" + "predictions = predict_in_order(X_test_1, model, pipeline)\n", + "len(predictions)" ] }, { @@ -642,73 +755,18 @@ "submit.to_csv('data/submit.csv', index=False)" ] }, - { - "cell_type": "code", - "execution_count": 158, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "7.860576923076923" - ] - }, - "execution_count": 158, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "from sklearn.metrics import mean_absolute_error\n", - "mean_absolute_error(model.predict(X_train), y_train)" - ] - }, - { - "cell_type": "code", - "execution_count": 160, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "371.265" - ] - }, - "execution_count": 160, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "max(model.predict(X_train))" - ] - }, - { - "cell_type": "code", - "execution_count": 163, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "6" - ] - }, - "execution_count": 163, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "max(predictions)" - ] - }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Test split of tail\n", - "* To simulate what we are doing with the test data, we are going to split the train data, for each city, by sampling N entries from the tail of each city for testing." + "* To simulate what we are doing with the test data, we are going to split the train data, for each city, by sampling N entries from the tail of each city for testing.\n", + "* We now have 580 entries of train data and 871 entres of test data, to figure out what is wrong.\n", + "* Since we can't use `RandomizedSearchCV` with this prediction mode (the one-by-one explained before), we opted to implement our own exhaustive search tool.\n", + "* Here we only worked with the `RandomForestRegressor` because it brought results almost as good as the `SVR` model and took far less time training.\n", + "* The optimal model turned out to be `RandomForestRegressor` with with 50 estimators and a maximum depth of 5.\n", + "* Even though we obtain a MAE of approximately 17 on our custom test set (which has twice as many entries as the one from the competition), when we submit the data with that model we obtain a MAE of approximately 30.\n", + "* We are hoping to be able to improve this result on the phase 2 of the project." ] }, { @@ -826,9 +884,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Train\n", - "* Since we can't use `RandomizedSearchCV` with this prediction mode, we opted to implement our own exhaustive search tool.\n", - "* `RandomForestRegressor`, the best combination was with 50 estimators and a maximum depth of 5." + "### Train" ] }, { @@ -881,8 +937,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Submit\n", - "* Even though we obtain a MAE of approximately 17 on our custom test set (which has twice as many entries as the one from the competition), when we submit the data with that model we obtain a MAE of approximately 30." + "### Submit" ] }, { @@ -915,819 +970,6 @@ "submit = pd.concat([x_3, submit], axis=1)\n", "submit.to_csv('data/submit.csv', index=False)" ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# A more simple prediction\n", - "* Given that we are not being able to make a very accurate prediction, perhaps the problem is the fact that we are trying to use the previous infected attribute, which clearly has potential, however we are not being able to harness it." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "%autoreload\n", - "from utils.OurPipeline import create_pipeline\n", - "from sklearn.decomposition import PCA\n", - "\n", - "optimizers=[Tree_optimizer, Forest_optimizer, AdaTree_optimizer, KNN_optimizer, SVR_optimizer]\n", - "weeks = [3]\n", - "\n", - "n_total = len(optimizers) * len(weeks) \n", - "\n", - "\n", - "results=[]\n", - "best_attempt = None\n", - "best_score = np.inf\n", - "idx=0\n", - "for opt in optimizers:\n", - " for w in weeks:\n", - " pipeline = create_pipeline(attr, n_weeks=w, estimator_optimizer=opt, pca=None)\n", - " pipeline.fit(X_train_1, y_train)\n", - " score = pipeline.named_steps['est_opt'].best_score_\n", - " best_estimator = pipeline.named_steps['est_opt'].best_estimator_\n", - " attempt = [best_estimator, w, score]\n", - " if abs(score) < best_score:\n", - " best_score = abs(score)\n", - " best_attempt = attempt\n", - " print('\\nBest score of {} with the estimator {}'.format(best_score, best_estimator))\n", - " idx+=1\n", - " print(str(idx) + '/' + str(n_total), end='\\t')\n", - " results.append(attempt)" - ] - }, - { - "cell_type": "code", - "execution_count": 63, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
012
0DecisionTreeRegressor(criterion='mae', max_dep...1-18.341489
1DecisionTreeRegressor(criterion='mae', max_dep...2-18.084080
2DecisionTreeRegressor(criterion='mae', max_dep...3-17.886975
3(DecisionTreeRegressor(criterion='mae', max_de...1-18.338104
4(DecisionTreeRegressor(criterion='mae', max_de...2-18.133689
5(DecisionTreeRegressor(criterion='mae', max_de...3-17.874649
6(DecisionTreeRegressor(criterion='mae', max_de...1-20.234666
7(DecisionTreeRegressor(criterion='mae', max_de...2-20.272226
8(DecisionTreeRegressor(criterion='mae', max_de...3-19.484149
9KNeighborsRegressor(algorithm='auto', leaf_siz...1-20.432433
\n", - "
" - ], - "text/plain": [ - " 0 1 2\n", - "0 DecisionTreeRegressor(criterion='mae', max_dep... 1 -18.341489\n", - "1 DecisionTreeRegressor(criterion='mae', max_dep... 2 -18.084080\n", - "2 DecisionTreeRegressor(criterion='mae', max_dep... 3 -17.886975\n", - "3 (DecisionTreeRegressor(criterion='mae', max_de... 1 -18.338104\n", - "4 (DecisionTreeRegressor(criterion='mae', max_de... 2 -18.133689\n", - "5 (DecisionTreeRegressor(criterion='mae', max_de... 3 -17.874649\n", - "6 (DecisionTreeRegressor(criterion='mae', max_de... 1 -20.234666\n", - "7 (DecisionTreeRegressor(criterion='mae', max_de... 2 -20.272226\n", - "8 (DecisionTreeRegressor(criterion='mae', max_de... 3 -19.484149\n", - "9 KNeighborsRegressor(algorithm='auto', leaf_siz... 1 -20.432433" - ] - }, - "execution_count": 63, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "pd.DataFrame(results)" - ] - }, - { - "cell_type": "code", - "execution_count": 62, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[RandomForestRegressor(bootstrap=True, criterion='mae', max_depth=2,\n", - " max_features='auto', max_leaf_nodes=None,\n", - " min_impurity_decrease=0.0, min_impurity_split=None,\n", - " min_samples_leaf=1, min_samples_split=2,\n", - " min_weight_fraction_leaf=0.0, n_estimators=13, n_jobs=-1,\n", - " oob_score=False, random_state=None, verbose=0, warm_start=False),\n", - " 3,\n", - " -17.87464878333245]" - ] - }, - "execution_count": 62, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "best_attempt" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Train" - ] - }, - { - "cell_type": "code", - "execution_count": 69, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "RandomForestRegressor(bootstrap=True, criterion='mae', max_depth=2,\n", - " max_features='auto', max_leaf_nodes=None,\n", - " min_impurity_decrease=0.0, min_impurity_split=None,\n", - " min_samples_leaf=1, min_samples_split=2,\n", - " min_weight_fraction_leaf=0.0, n_estimators=13, n_jobs=-1,\n", - " oob_score=False, random_state=42, verbose=0, warm_start=False)" - ] - }, - "execution_count": 69, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "%autoreload\n", - "from utils.OurPipeline import create_pipeline\n", - "pipeline = create_pipeline(attr, n_weeks=3, pca=None)\n", - "X_train = pipeline.fit_transform(X_train_1)\n", - "\n", - "model = RandomForestRegressor(bootstrap=True, criterion='mae', max_depth=2, n_estimators=13, n_jobs=-1, random_state=random_n)\n", - "model.fit(X_train, y_train)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Predict" - ] - }, - { - "cell_type": "code", - "execution_count": 76, - "metadata": {}, - "outputs": [], - "source": [ - "X_test = pipeline.transform(X_test_1)\n", - "pred = model.predict(X_test)\n", - "pred = list(map(lambda x: int(np.round(x)), pred))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Submit" - ] - }, - { - "cell_type": "code", - "execution_count": 78, - "metadata": {}, - "outputs": [], - "source": [ - "submit = pd.DataFrame(pred, columns=['total_cases'])\n", - "x_3 = X_test_1.iloc[:,:3].copy()\n", - "submit = pd.concat([x_3, submit], axis=1)\n", - "submit.to_csv('data/submit.csv', index=False)" - ] - }, - { - "cell_type": "code", - "execution_count": 79, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
cityyearweekofyeartotal_cases
0sj20081819
1sj20081915
2sj20082012
3sj20082121
4sj20082212
5sj20082310
6sj20082410
7sj20082524
8sj20082624
9sj20082720
10sj20082823
11sj20082926
12sj20083038
13sj20083127
14sj20083226
15sj20083328
16sj20083427
17sj20083529
18sj20083664
19sj20083729
20sj20083879
21sj20083932
22sj20084031
23sj20084132
24sj20084230
25sj20084324
26sj20084432
27sj20084529
28sj20084626
29sj20084726
...............
386iq2012485
387iq2012495
388iq2012507
389iq2012518
390iq201315
391iq201325
392iq201335
393iq201345
394iq201355
395iq201365
396iq201375
397iq201385
398iq201395
399iq2013105
400iq2013115
401iq2013125
402iq2013135
403iq2013145
404iq2013155
405iq2013165
406iq2013175
407iq2013185
408iq2013195
409iq2013205
410iq2013215
411iq2013225
412iq2013235
413iq2013245
414iq2013255
415iq2013265
\n", - "

416 rows × 4 columns

\n", - "
" - ], - "text/plain": [ - " city year weekofyear total_cases\n", - "0 sj 2008 18 19\n", - "1 sj 2008 19 15\n", - "2 sj 2008 20 12\n", - "3 sj 2008 21 21\n", - "4 sj 2008 22 12\n", - "5 sj 2008 23 10\n", - "6 sj 2008 24 10\n", - "7 sj 2008 25 24\n", - "8 sj 2008 26 24\n", - "9 sj 2008 27 20\n", - "10 sj 2008 28 23\n", - "11 sj 2008 29 26\n", - "12 sj 2008 30 38\n", - "13 sj 2008 31 27\n", - "14 sj 2008 32 26\n", - "15 sj 2008 33 28\n", - "16 sj 2008 34 27\n", - "17 sj 2008 35 29\n", - "18 sj 2008 36 64\n", - "19 sj 2008 37 29\n", - "20 sj 2008 38 79\n", - "21 sj 2008 39 32\n", - "22 sj 2008 40 31\n", - "23 sj 2008 41 32\n", - "24 sj 2008 42 30\n", - "25 sj 2008 43 24\n", - "26 sj 2008 44 32\n", - "27 sj 2008 45 29\n", - "28 sj 2008 46 26\n", - "29 sj 2008 47 26\n", - ".. ... ... ... ...\n", - "386 iq 2012 48 5\n", - "387 iq 2012 49 5\n", - "388 iq 2012 50 7\n", - "389 iq 2012 51 8\n", - "390 iq 2013 1 5\n", - "391 iq 2013 2 5\n", - "392 iq 2013 3 5\n", - "393 iq 2013 4 5\n", - "394 iq 2013 5 5\n", - "395 iq 2013 6 5\n", - "396 iq 2013 7 5\n", - "397 iq 2013 8 5\n", - "398 iq 2013 9 5\n", - "399 iq 2013 10 5\n", - "400 iq 2013 11 5\n", - "401 iq 2013 12 5\n", - "402 iq 2013 13 5\n", - "403 iq 2013 14 5\n", - "404 iq 2013 15 5\n", - "405 iq 2013 16 5\n", - "406 iq 2013 17 5\n", - "407 iq 2013 18 5\n", - "408 iq 2013 19 5\n", - "409 iq 2013 20 5\n", - "410 iq 2013 21 5\n", - "411 iq 2013 22 5\n", - "412 iq 2013 23 5\n", - "413 iq 2013 24 5\n", - "414 iq 2013 25 5\n", - "415 iq 2013 26 5\n", - "\n", - "[416 rows x 4 columns]" - ] - }, - "execution_count": 79, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "submit" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": {